Home
Name Modified Size InfoDownloads / Week
README 2015-05-04 4.8 kB
ExampleGraphicalOutput.pdf 2015-05-04 5.5 kB
BoCluSt.txt 2015-05-04 17.5 kB
Totals: 3 Items   27.8 kB 2
 
# _____________________________________________________________________________
#|                                   BoCluSt                                   |
#|                                                                             |
#| BOotstrap CLUster STability                                                 |
#|                                                                             |
#| R function for the determination of cluster structure (or community         |
#| detection) in sets of correlated variables by comparing the stability       |
#| under bootstrap resampling of different cluster allocations of these        |   
#| variables                                                                   |                                           |
#|_____________________________________________________________________________|
# _____________________________________________________________________________
#|                                                                             |
#|   Carlos Garcia                                                             |
#|   CIBUS Campus Sur                                                          |
#|   Universidade de Santiago de Compostela                                    |                                   |
#|   15782 A Corunha                                                           |
#|   Galiza - Spain                                                            |
#|                                                                             |
#|   carlos.garcia.suarez@usc.es                                               |
#|_____________________________________________________________________________|
                                                                             
# ******************************************************************************
# USAGE 

# INPUT:  Numeric array of two dimensions, observations in rows, variables in
#          columns

# OUTPUT: 
#         1) "result.out": object including: 
#         - "NumVar": Number of variables
#         - "VarCriterion": Variance criterion in the observed sample, for 1 to
#             n clusters 
#         - "0.025NulQuantile": 0.025 Quantile of the variance criterion in 
#             the null, randomized distribution (for 1 to n clusters; NA for 1
#             and n)
#         - "NumClus": Number of clusters found
#         - "VarAllocSel": Allocation of variables (for 1 to n variables, as 
#             ordered in the input DATA matrix) to clusters in the minimum
#              variance partition
#         - "VarAllocAll": Allocation of variables (for 1 to n variables, as 
#             ordered in the input DATA matrix) to clusters in all partitions

#         2) Screen outputs:
#         - Plot of variance criterion in the analysed sample (small circles),
#           and 0.025 quartile for the null case (simple line) for 1 to
#           n clusters (NA for 1 and n)
#         - variance criteria and their 0.025, 0.5 and 0.975 quantiles  
#           in the null,randomized cases (NA for 1 and n clusters, i.e. the
#           extreme points plotted correspond to 2 and n-1 clusters)

# NOTE: BoCluSt requires R package "cluster" if Partition Around Medoids is 
        chosen ("PAM=TRUE") as clustering procedure


# call: "result.out<-BoCluSt(DATA=DATA,N_BOO=100,NR_BOO=100,N_SAMPLES=100,
#                            N_THIN=0,PAM=TRUE,print_D=TRUE,print_Dr=TRUE)"

  


# ******************************************************************************
# ******************************************************************************
# Function  
# ..............................................................................
# ..............................................................................

BoCluSt <- function(DATA,N_BOO=100,NR_BOO=100,N_SAMPLES=100,N_THIN=0,PAM=TRUE,
                    print_D=TRUE,print_Dr=TRUE){
     # DATA: data file, observations in rows, variables in columns
     # N_BOO: number of bootstrap replicates for the observed case:
     # 100 could be OK
     # N_BOOR:number of bootstrap replicates for each of the randomized
     # null cases: 100 could be OK
     # N_SAMPLES: number of randomized null cases, 100 could be OK
     # N_THIN: Sample size for thinning; 0: no thinning-uses actual sample size
     # PAM: TRUE: Partition Around Medioids Analysis done. Distances among
                  # variables are: 1- absolute pearson correlations
     # PAM: FALSE: k-means analysis
     # print_D=TRUE: print.default() of observed sample bootstrap progress?
     # print_Dr=TRUE: print.default() of the randomization progress?
 






Source: README, updated 2015-05-04