# _____________________________________________________________________________
#| BoCluSt |
#| |
#| BOotstrap CLUster STability |
#| |
#| R function for the determination of cluster structure (or community |
#| detection) in sets of correlated variables by comparing the stability |
#| under bootstrap resampling of different cluster allocations of these |
#| variables | |
#|_____________________________________________________________________________|
# _____________________________________________________________________________
#| |
#| Carlos Garcia |
#| CIBUS Campus Sur |
#| Universidade de Santiago de Compostela | |
#| 15782 A Corunha |
#| Galiza - Spain |
#| |
#| carlos.garcia.suarez@usc.es |
#|_____________________________________________________________________________|
# ******************************************************************************
# USAGE
# INPUT: Numeric array of two dimensions, observations in rows, variables in
# columns
# OUTPUT:
# 1) "result.out": object including:
# - "NumVar": Number of variables
# - "VarCriterion": Variance criterion in the observed sample, for 1 to
# n clusters
# - "0.025NulQuantile": 0.025 Quantile of the variance criterion in
# the null, randomized distribution (for 1 to n clusters; NA for 1
# and n)
# - "NumClus": Number of clusters found
# - "VarAllocSel": Allocation of variables (for 1 to n variables, as
# ordered in the input DATA matrix) to clusters in the minimum
# variance partition
# - "VarAllocAll": Allocation of variables (for 1 to n variables, as
# ordered in the input DATA matrix) to clusters in all partitions
# 2) Screen outputs:
# - Plot of variance criterion in the analysed sample (small circles),
# and 0.025 quartile for the null case (simple line) for 1 to
# n clusters (NA for 1 and n)
# - variance criteria and their 0.025, 0.5 and 0.975 quantiles
# in the null,randomized cases (NA for 1 and n clusters, i.e. the
# extreme points plotted correspond to 2 and n-1 clusters)
# NOTE: BoCluSt requires R package "cluster" if Partition Around Medoids is
chosen ("PAM=TRUE") as clustering procedure
# call: "result.out<-BoCluSt(DATA=DATA,N_BOO=100,NR_BOO=100,N_SAMPLES=100,
# N_THIN=0,PAM=TRUE,print_D=TRUE,print_Dr=TRUE)"
# ******************************************************************************
# ******************************************************************************
# Function
# ..............................................................................
# ..............................................................................
BoCluSt <- function(DATA,N_BOO=100,NR_BOO=100,N_SAMPLES=100,N_THIN=0,PAM=TRUE,
print_D=TRUE,print_Dr=TRUE){
# DATA: data file, observations in rows, variables in columns
# N_BOO: number of bootstrap replicates for the observed case:
# 100 could be OK
# N_BOOR:number of bootstrap replicates for each of the randomized
# null cases: 100 could be OK
# N_SAMPLES: number of randomized null cases, 100 could be OK
# N_THIN: Sample size for thinning; 0: no thinning-uses actual sample size
# PAM: TRUE: Partition Around Medioids Analysis done. Distances among
# variables are: 1- absolute pearson correlations
# PAM: FALSE: k-means analysis
# print_D=TRUE: print.default() of observed sample bootstrap progress?
# print_Dr=TRUE: print.default() of the randomization progress?