[Genex-cvs-commit] genex-server/rcluster-1.0/lib/r ChangeLog,1.1,1.2 permcluster.r,1.5,1.6

SourceForge Headquarters 1320 Columbia Street Suite 310 San Diego, CA 92101 +1 (858) 422-6466

Update of /cvsroot/genex/genex-server/rcluster-1.0/lib/r
In directory sc8-pr-cvs1:/tmp/cvs-serv16754

Modified Files:
	ChangeLog permcluster.r 
Log Message:
* permcluster.r (Repository): 
Added new version from kschlauch

Index: ChangeLog
===================================================================
RCS file: /cvsroot/genex/genex-server/rcluster-1.0/lib/r/ChangeLog,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -d -r1.1 -r1.2
*** ChangeLog	18 Nov 2002 04:42:58 -0000	1.1
--- ChangeLog	8 Apr 2003 20:21:44 -0000	1.2
***************
*** 1,2 ****
--- 1,7 ----
+ 2003-04-08  Jason E. Stewart  <ja...@op...>
+ 
+ 	* permcluster.r (Repository): 
+ 	Added new version from kschlauch
+ 
  2002-11-17 Harry Mangalam <hj...@ta...>
    * committed Karen's changes to permcluster.r

Index: permcluster.r
===================================================================
RCS file: /cvsroot/genex/genex-server/rcluster-1.0/lib/r/permcluster.r,v
retrieving revision 1.5
retrieving revision 1.6
diff -C2 -d -r1.5 -r1.6
*** permcluster.r	18 Nov 2002 04:42:58 -0000	1.5
--- permcluster.r	8 Apr 2003 20:21:45 -0000	1.6
***************
*** 1,14 ****
! #All rights reserved                      #
! 
! ##########################################
! ## Main program call
! ## This program does a breadth-first search of the clustering tree generated by Murtagh's
! ## hclust procedure to store the clusters for use of the permutation routine called in index.
! ## INPUT: data.txt is the user's tab-delimited "txt" data file
! ##        dist.method is the user-defined distance method 
! ##        stop.level  is the user-defined level at which the clustering should stop
! ## OUTPUT: Two files written to the user directory in "txt" format
! ##         as "User.Data" and "Graph.Data".
! #######################################################################################
  perm.program<-function(data.txt,log.flag,dist.method,agglom.method,stop.level,num.perms,thresh)
  {options(digits=10)
--- 1,19 ----
! ###############################################################
! ##            Copyright 2000-2003                            ##
! ##            National Center for Genome Resources           ##
! ##            George Mason University                        ##
! ##            All rights reserved                            ##
! ##            permcluster.2003.r, written by Karen Schlauch  ##
! ###############################################################
! ################################################################################################
! ## Main program call                                                                          ##
! ## This program does a breadth-first search of the clustering tree generated by Murtagh's     ##
! ## hclust procedure to store the clusters for use of the permutation routine called in index. ##
! ## INPUT: data.txt is the user's tab-delimited "txt" data file                                ##
! ##        dist.method is the user-defined distance method                                     ##
! ##        stop.level  is the user-defined level at which the clustering should stop           ##
! ## OUTPUT: Two files written to the user directory in "txt" format                            ##
! ##         as "User.Data" and "Graph.Data".                                                   ##
! ################################################################################################
  perm.program<-function(data.txt,log.flag,dist.method,agglom.method,stop.level,num.perms,thresh)
  {options(digits=10)
***************
*** 22,43 ****
   stuff<-index(data.file,gene.names,dist.method,agglom.method,stop.level,num.perms,thresh)
  return(stuff)}
! 
! #############################################################################################
! ## The scan.data function reads in tab-delimited "txt" file,
! ## which is in the following form:
! ##
! ## genename	ratio1	ratio2	ratio3	ratio4 ....
! ## gene1	.1	.2	.3	.4
! ##
! ## Only the FIRST row should contain column labels or other text. The program
! ## thus skips only the FIRST row, regardless of what it contains. To change this,
! ## simply change the "skip" variable which is now set to 1.
! ## INPUT: data.txt is the user's tab-delimited "txt" data file
! ## OUTPUT: Data: the data file in matrix form with the gene name replaced by gene number,
! ##         where the gene number is the position of the gene in the data file
! ##         num.exp: the number of experiments (or replicates) of the data file
! ##         num.genes: the number of genes in the data file
! ##         gene.names: a vector of the gene names in the data file
! ############################################################################################# 
  scan.data<-function(data.txt,log.flag)
   {num.exp<-length(scan(data.txt,sep="\t",what="",skip=2,nlines=1))-1
--- 27,47 ----
   stuff<-index(data.file,gene.names,dist.method,agglom.method,stop.level,num.perms,thresh)
  return(stuff)}
! ###############################################################################################
! ## The scan.data function reads in tab-delimited "txt" file,                                 ##
! ## which is in the following form:                                                           ##
! ##                                                                                           ##
! ## genename	ratio1	ratio2	ratio3	ratio4 ....                                          ##
! ## gene1	.1	.2	.3	.4                                                   ##   
! ##                                                                                           ##
! ## Only the FIRST row should contain column labels or other text. The program                ##
! ## thus skips only the FIRST row, regardless of what it contains. To change this,            ##  
! ## simply change the "skip" variable which is now set to 1.                                  ##
! ## INPUT: data.txt is the user's tab-delimited "txt" data file                               ##  
! ## OUTPUT: Data: the data file in matrix form with the gene name replaced by gene number,    ##
! ##         where the gene number is the position of the gene in the data file                ##
! ##         num.exp: the number of experiments (or replicates) of the data file               ##
! ##         num.genes: the number of genes in the data file                                   ## 
! ##         gene.names: a vector of the gene names in the data file                           ## 
! ############################################################################################### 
  scan.data<-function(data.txt,log.flag)
   {num.exp<-length(scan(data.txt,sep="\t",what="",skip=2,nlines=1))-1
***************
*** 56,94 ****
    if(log.flag=="F") return(Data,num.exp,num.genes,gene.names)}

! ##############################################################################
! ## The get.clust.list function first calls the hclust function to generate the
! ## clustering info and clustering tree.
! ## The clustering info given by the hclust function is parsed.
! ## The first loop counts the total number of clusters, the second loop stores
! ## necessary information to build the View.Matrix in the following function.
! ## (See details below in comments)
! ## INPUT:  data.file: the data file output of scan.data
! ##         dist.method: the user-defined distance method
! ## OUTPUT: View.Matrix: a somewhat empty shell of View.Matrix to be completed
! ##                      in the index function here it contains the gene 
! ##                      expression profiles and gene numbers of each cluster
! ##                      and the cluster dimension
! ##         I.list:      a list storing indeces of the clusters of View.Matrix, height, dimension
! ##         m:           the merge matrix produced by the hclust function 
! ##         d:           the number of rows of the merge matrix
! ##         num.exp:     the number of experiments
! ##         dim.sum:     the sum of the dimensions of all clusters
! ##         Left:        the Left child of the cluster being considered
! ##         Right:       the right child of the cluster being considered
! ##         Orig:        the cluster being considered
! ##         num.clusters:the number of total clusters considered
! #####################################################################

  get.clust.list<- function(data.file,dist.method,agglom.method)
  {library(mva)
   num.exp<-dim(data.file)[2]-1
! 
!  if((dist.method =="cos") || (dist.method=="pcos") || (dist.method=="cor") || (dist.method=="eucna"))
    dist.matrix<-other.dist.metrics(data.file,dist.method)$A
!  if((dist.method !="cos") && (dist.method!="pcos") && (dist.method!="cor") && (dist.method!="eucna"))
    dist.matrix<-dist(data.file[,2:(num.exp+1)],method=dist.method)

   clust.info <- hclust(dist.matrix,agglom.method) 
-   
   m          <- clust.info$merge
   h          <- clust.info$height
--- 60,100 ----
    if(log.flag=="F") return(Data,num.exp,num.genes,gene.names)}

! ################################################################################################# 
! ## The get.clust.list function first calls the hclust function to generate the                 ##
! ## clustering info and clustering tree.                                                        ## 
! ## The clustering info given by the hclust function is parsed.                                 ##
! ## The first loop counts the total number of clusters, the second loop stores                  ##  
! ## necessary information to build the View.Matrix in the following function.                   ##
! ## (See details below in comments)                                                             ##
! ## INPUT:  data.file: the data file output of scan.data                                        ##
! ##         dist.method: the user-defined distance method                                       ##
! ##          "euc": euclidean distance, in which NA's are excluded and no scaling occurs      ##
! ##          "cor":   correlation coefficient                                                   ##
! ##          "cos":   cosine of the angle between two gene profile vectors                      ##
! ##          "pcos":  Munneke's penalized cosine distance metric
! ## OUTPUT: View.Matrix: a somewhat empty shell of View.Matrix to be completed                  ##
! ##                      in the index function here it contains the gene                        ##
! ##                      expression profiles and gene numbers of each cluster                   ##
! ##                      and the cluster dimension                                              ##
! ##         I.list:      a list storing indeces of the clusters of View.Matrix,height,dimension ##
! ##         m:           the merge matrix produced by the hclust function                       ##
! ##         d:           the number of rows of the merge matrix                                 ##
! ##         num.exp:     the number of experiments                                              ##
! ##         dim.sum:     the sum of the dimensions of all clusters                              ##
! ##         Left:        the Left child of the cluster being considered                         ##
! ##         Right:       the right child of the cluster being considered                        ##
! ##         Orig:        the cluster being considered                                           ##
! ##         num.clusters:the number of total clusters considered                                ##
! #################################################################################################

  get.clust.list<- function(data.file,dist.method,agglom.method)
  {library(mva)
   num.exp<-dim(data.file)[2]-1
!  if((dist.method =="cos") || (dist.method=="pcos") || (dist.method=="cor") || (dist.method=="euc"))
    dist.matrix<-other.dist.metrics(data.file,dist.method)$A
!  if((dist.method !="cos") && (dist.method!="pcos") && (dist.method!="cor") && (dist.method!="euc"))
    dist.matrix<-dist(data.file[,2:(num.exp+1)],method=dist.method)

   clust.info <- hclust(dist.matrix,agglom.method) 
   m          <- clust.info$merge
   h          <- clust.info$height
***************
*** 98,120 ****
   either<-0
   neither<-0
! 
!  ## The loop below counts the total number clusters appearing in the clustering tree as follows.
!  ## The merge matrix m generated by hclust consists of d rows.
!  ## Each row i represents a parent cluster; each of the two column values of row i
!  ## represents one of the two child clusters of cluster i. Therefore, the number of
!  ## total clusters *should be* 2*d. There are two obstructions to this deduction.
!  ## First, clusters of dimension two (and thus having singletons as both child clusters)
!  ## are counted twice: once as a whole cluster and once as two singleton clusters.
!  ## As we are not clustering any clusters of dimension two, it is enough to count these
!  ## clusters once, as an entire cluster of dimension 2.
!  ## Secondly, the original cluster (consisting of the entire data
!  ## set) is not counted. Thus the total number of clusters is 2*(d-both)+1, where both
!  ## is the number of clusters consisting of two singleton subclusters.
!  ## D is the vector which counts the dimension of each cluster.
!  ## dim.sum is the sum of dimensions of all clusters we consider:
!  ## D is the dimension of all clusters but the original,
!  ## d+1 is the dimension of the original,
!  ## 2*both represents all pairs of singleton subclusters counted twice.
!  
   for(i in 1:d)
    {if(m[i,1]<0 && m[i,2]<0)
--- 104,126 ----
   either<-0
   neither<-0
!  ###################################################################################################
!  ## The loop below counts the total number clusters appearing in the clustering tree as follows.  ##
!  ## The merge matrix m generated by hclust consists of d rows.                                    ##
!  ## Each row i represents a parent cluster; each of the two column values of row i                ##
!  ## represents one of the two child clusters of cluster i. Therefore, the number of               ##
!  ## total clusters *should be* 2*d. There are two obstructions to this deduction.                 ##
!  ## First, clusters of dimension two (and thus having singletons as both child clusters)          ##
!  ## are counted twice: once as a whole cluster and once as two singleton clusters.                ##
!  ## As we are not clustering any clusters of dimension two, it is enough to count these           ##
!  ## clusters once, as an entire cluster of dimension 2.                                           ##
!  ## Secondly, the original cluster (consisting of the entire data                                 ##
!  ## set) is not counted. Thus the total number of clusters is 2*(d-both)+1, where both            ##
!  ## is the number of clusters consisting of two singleton subclusters.                            ##
!  ## D is the vector which counts the dimension of each cluster.                                   ##
!  ## dim.sum is the sum of dimensions of all clusters we consider:                                 ##
!  ## D is the dimension of all clusters but the original,                                          ##
!  ## d+1 is the dimension of the original,                                                         ##
!  ## 2*both represents all pairs of singleton subclusters counted twice.                           ##
!  ###################################################################################################
   for(i in 1:d)
    {if(m[i,1]<0 && m[i,2]<0)
***************
*** 136,149 ****
   a<-dim.sum+1 

!  ## The loop below determines which genes belong to each cluster by unraveling
!  ## the hclust merge matrix m. View.Matrix is being set up to store the 
!  ## cluster name, gene number of each gene within the cluster and the dimension
!  ## of the cluster. Genes are identified by their numerical placement within
!  ## the data file.
!  ## I.list[[i]][1] stores the starting row number of View.Matrix for cluster i
!  ## I.list[[i]][2] stores the ending   row number of View.Matrix for cluster i
!  ## I.list[[i]][3] stores the dimension of cluster i
!  ## I.list[[i]][4] stores the height of cluster i
!  
   for(i in 1:d)
    {I.list[[i]]<-vector(mode="numeric",length=4)
--- 142,156 ----
   a<-dim.sum+1 

!  #################################################################################
!  ## The loop below determines which genes belong to each cluster by unraveling  ##
!  ## the hclust merge matrix m. View.Matrix is being set up to store the         ##
!  ## cluster name, gene number of each gene within the cluster and the dimension ##
!  ## of the cluster. Genes are identified by their numerical placement within    ##
!  ## the data file.                                                              ##
!  ## I.list[[i]][1] stores the starting row number of View.Matrix for cluster i  ##
!  ## I.list[[i]][2] stores the ending   row number of View.Matrix for cluster i  ##
!  ## I.list[[i]][3] stores the dimension of cluster i                            ##
!  ## I.list[[i]][4] stores the height of cluster i                               ##
!  #################################################################################
   for(i in 1:d)
    {I.list[[i]]<-vector(mode="numeric",length=4)
***************
*** 169,173 ****

   View.Matrix[1:(d+1),4:(4+num.exp)]<-data.file
- 
   Orig <-View.Matrix[I.list[[d]][1]:I.list[[d]][2],4:(4+num.exp)]

--- 176,179 ----
***************
*** 191,219 ****
  return(View.Matrix,I.list,m,d,h,num.exp,dim.sum,Left,Right,Orig,num.clusters)}

! ###################################################################################################
! ## This function sets up the rest of View.Matrix, which stores and returns the CLUSTERLABEL, 
! ## GENENUM, and CLUSTERDIMENSION. View.Matrix stores the clusters in the order generated
! ## by traversing the clustering tree from top down, one level at a time, left to right.
! ## This traversal is accomplished by setting up and using v.stack, 
! ## which acts as a top-down breadth-first searching stack. 
! ## The columns of v.stack contain the level of the clustering tree, the node number, the number
! ## of the parent of that node, the cluster name associated to the node, the cluster number (with respect to
! ## the merge matrix m found in the function get.clust.list, the dimension of the cluster, the height of the cluster,
! ## and how many times the cluster has been visited. [PLEASE NOTE: Cluster Name corresponds to the variables
! ## z and k, and an old numbering system I use to check whether the clustering is correct. These can be ignored.]
! ## The algorithm works as follows. Beginning with the top cluster (entire data set) of the tree, each
! ## cluster is visited. If the cluster has dimension
! ## larger than two, its two subclusters will be determined first. Then the left child cluster will be visited
! ## first. If it has dimension greater than two, we continue to follow the chain of left subclusters until
! ## a subcluster has dimension two or less. At this point, we begin the traversal up to the parent node of the 
! ## cluster just visited, then to its right subcluster. If the dimension of this right subcluster is more than
! ## two, we begin to traverse the chain of left subclusters until we reach one with dimension less than or
! ## equal to two. ETC. A node can only be visited twice. 
! ## INPUT:  data.file, gene.names generated by scan.data
! ##         dist.method, stop.level are user inputs
! ##         num.perms: the number of permutations defined by the user
! ##         thresh: the confidence threshold defined by the user as a percentage
! ## OUTPUT: View.Matrix, User.Matrix, which are written out in txt file format as Graph.Data, User.Data         
! ###############################################################################################################

  index<-function(data.file,gene.names,dist.metric,agglom.method,stop.level,num.perms,thresh) 
--- 197,227 ----
  return(View.Matrix,I.list,m,d,h,num.exp,dim.sum,Left,Right,Orig,num.clusters)}

! ##########################################################################################################
! ## This function sets up the rest of View.Matrix, which stores and returns the CLUSTERLABEL,            ##
! ## GENENUM, and CLUSTERDIMENSION. View.Matrix stores the clusters in the order generated                ##
! ## by traversing the clustering tree from top down, one level at a time, left to right.                 ##
! ## This traversal is accomplished by setting up and using v.stack, which acts as a top-down             ##
! ## breadth-first searching stack. The columns of v.stack contain the level of the clustering            ## 
! ## tree, the node number, the number of the parent of that node, the cluster name associated            ##
! ## to the node, the cluster number (with respect to the merge matrix m found in the function            ##
! ## get.clust.list, the dimension of the cluster, the height of the cluster, and how many times          ##
! ## the cluster has been visited. [PLEASE NOTE: Cluster Name corresponds to the variables                ##
! ## z and k, and an old numbering system I use to check whether the clustering is correct.               ##
! ## These can be ignored.]                                                                               ##
! ## The algorithm works as follows. Beginning with the top cluster (entire data set) of the tree,        ## 
! ## each cluster is visited. If the cluster has dimension larger than two, its two subclusters           ##
! ## will be determined first. Then the left child cluster will be visited first. If it has               ##
! ## dimension greater than two, we continue to follow the chain of left subclusters until                ##
! ## a subcluster has dimension two or less. At this point, we begin the traversal up to the              ## 
! ## parent node of the cluster just visited, then to its right subcluster. If the dimension of           ##
! ## this right subcluster is more than two, we begin to traverse the chain of left subclusters           ##
! ## until we reach one with dimension less than or equal to two. ETC.                                    ##
! ## A node can only be visited twice.                                                                    ## 
! ## INPUT:  data.file, gene.names generated by scan.data                                                 ##
! ##         dist.method, stop.level are user inputs                                                      ##
! ##         num.perms: the number of permutations defined by the user                                    ##
! ##         thresh: the confidence threshold defined by the user as a percentage                         ##
! ## OUTPUT: View.Matrix, User.Matrix, which are written out in txt file format as Graph.Data, User.Data  ##       
! ##########################################################################################################

  index<-function(data.file,gene.names,dist.metric,agglom.method,stop.level,num.perms,thresh) 
***************
*** 318,323 ****
    ret.pos<-ret.pos+1                          ## go to next cluster on stack
    tmpT<-v.stack[ret.pos,6]
!   if(v.stack[ret.pos,9]==-1) tmp<-0           ## if the cluster hasn't been visited by now, it's the last (stop) line on v.stack
!   if(v.stack[ret.pos,9]!=-1)                  ## if the cluster has been visited, continue loop 
     {if(tmpT<0)  tmp<-data.file[abs(tmpT),]
      if(tmpT>0)  tmp<-View.Matrix[I.list[[tmpT]][1]:I.list[[tmpT]][2],4:(num.exp+4)]}} ##no more clstrs on stack
--- 326,331 ----
    ret.pos<-ret.pos+1                          ## go to next cluster on stack
    tmpT<-v.stack[ret.pos,6]
!   if(v.stack[ret.pos,9]==-1) tmp<-0       ## if the cluster hasn't been visited by now, it's the last (stop) line on v.stack
!   if(v.stack[ret.pos,9]!=-1)              ## if the cluster has been visited, continue loop 
     {if(tmpT<0)  tmp<-data.file[abs(tmpT),]
      if(tmpT>0)  tmp<-View.Matrix[I.list[[tmpT]][1]:I.list[[tmpT]][2],4:(num.exp+4)]}} ##no more clstrs on stack
***************
*** 325,329 ****

   rm(I.list)
!  v.stack<-v.stack[!is.na(v.stack[,1]),]       ## get rid of dummy (stop) row at bottom ## 
   ## !is.na(View.Matrix[,1])] is the first subclustering which is listed twice but only counted once 
   ## We are deleting the second listing here
--- 333,337 ----

   rm(I.list)
!  v.stack<-v.stack[!is.na(v.stack[,1]),]   ## delete dummy (stop) row at bottom ## 
   ## !is.na(View.Matrix[,1])] is the first subclustering which is listed twice but only counted once 
   ## We are deleting the second listing here
***************
*** 361,365 ****
   GRAPH.LABEL[1:(d+1)]<-rep(paste(0,0,"ORIG"),d+1) 
   last.level<-v.stack[dim(v.stack)[1],1]
!  print(paste("The Last TREE LEVEL is: ",last.level))

   acc.vec<-c(rep(1,d+1),rep(-1,dim(View.Matrix)[1]-(d+1)))  ## this is the ACC column of View.Matrix
--- 369,373 ----
   GRAPH.LABEL[1:(d+1)]<-rep(paste(0,0,"ORIG"),d+1) 
   last.level<-v.stack[dim(v.stack)[1],1]
!  print(paste("The Clustering Tree has ",last.level, " levels"))

   acc.vec<-c(rep(1,d+1),rep(-1,dim(View.Matrix)[1]-(d+1)))  ## this is the ACC column of View.Matrix
***************
*** 372,379 ****
   ## OLD V.dimnames<-list(NULL,c(      "CLUSTER","NAME","DIM","GeneNum",c(1:num.exp))
   ## NEW V.dimnames<-list(NULL,c("ACC","CLUSTER","NAME","DIM","GeneNum",c(1:num.exp))) ## column names of View.Matrix
- 
   ## In the following loop, v.stack is traversed from top to bottom.
   ## Note: v.stack[i,1]==1 means the cluster has been perused
!  ##       View.Matrix[i,1]==1 means the cluster's subclusters have been accepted
   b<-0
   for(i in 1:length(v.stack[,8]))
--- 380,386 ----
   ## OLD V.dimnames<-list(NULL,c(      "CLUSTER","NAME","DIM","GeneNum",c(1:num.exp))
   ## NEW V.dimnames<-list(NULL,c("ACC","CLUSTER","NAME","DIM","GeneNum",c(1:num.exp))) ## column names of View.Matrix
   ## In the following loop, v.stack is traversed from top to bottom.
   ## Note: v.stack[i,1]==1 means the cluster has been perused
!  ## View.Matrix[i,1]==1 means the cluster's subclusters have been accepted
   b<-0
   for(i in 1:length(v.stack[,8]))
***************
*** 445,452 ****
          {D1<-H0
           single.flag<-1
!          tstat.matrix[3*clust.count,(p+2)]<-abs(T1)
!          ##write.table(tmp.data,file=paste(v.stack[i,5],v.stack[i,6],p,
!           ##          ".perm.txt",sep=""),quote=F,sep="\t",col.names=F,row.names=F)
! }
        rm(tmp.data)
        if(T2<0) D2<-H0
--- 452,456 ----
          {D1<-H0
           single.flag<-1
!          tstat.matrix[3*clust.count,(p+2)]<-abs(T1)}
        rm(tmp.data)
        if(T2<0) D2<-H0
***************
*** 454,464 ****
           {D1<-H0-tmp.h[T1]
            single.flag<-0 
!           tstat.matrix[3*clust.count,(p+2)]<-0}
!       if(T2>0) D2<-H0-tmp.h[T2]
!       t.stat<-D1+D2
!       t.seq[p]<-t.stat
!       single.flag.seq[p]<-single.flag}  ##end of permutation loop
!       rm(C0)
!       t.seq[1:num.perms]<-t.seq[order(t.seq[1:num.perms])] ## order the num.perms test statistics
      tstat.matrix[(3*clust.count-2),3:(num.perms+3)]<-round(t.seq,2)   
      tstat.matrix[(3*clust.count-1),3:(num.perms+3)]<-single.flag.seq
--- 458,468 ----
           {D1<-H0-tmp.h[T1]
            single.flag<-0 
!          tstat.matrix[3*clust.count,(p+2)]<-0}
!     if(T2>0) D2<-H0-tmp.h[T2]
!     t.stat<-D1+D2
!     t.seq[p]<-t.stat
!     single.flag.seq[p]<-single.flag}  ##end of permutation loop
!     rm(C0)
!     t.seq[1:num.perms]<-t.seq[order(t.seq[1:num.perms])] ## order the num.perms test statistics
      tstat.matrix[(3*clust.count-2),3:(num.perms+3)]<-round(t.seq,2)   
      tstat.matrix[(3*clust.count-1),3:(num.perms+3)]<-single.flag.seq
***************
*** 480,487 ****

      if(accept)
!      {print("accepting the clusters of")
!       #print(View.Matrix[a,2:3]) 
!       #print("GRAPH LABEL")
!       #print(GRAPH.LABEL[a]) 
        v.stack[!is.na(M),1]<-1                                     
        ## the children are accepted, so set their acc var to 1
--- 484,489 ----

      if(accept)
!      {print("accepting the subclusters of")
!       print(GRAPH.LABEL[a])
        v.stack[!is.na(M),1]<-1                                     
        ## the children are accepted, so set their acc var to 1
***************
*** 489,509 ****

      if(!accept)
!      {print("not accepting the subclusters of cluster")
!       #print(View.Matrix[a,2:3])
!       #print("GRAPH LABEL") 
!       #print(GRAPH.LABEL[a])
        v.stack[!is.na(M),1]<-0                                     
        ## the children are not accepted, so set their acc var to 0
        View.Matrix[!is.na(N),1]<-0}}}}
!  
   ## v.stack has been completely traversed 
-  ##return(View.Matrix)
   View.Matrix<-View.Matrix[,-3]                                    
!  ## deleting archaic numbering scheme and putting in labels
!  
   View.Matrix[,2]<-GRAPH.LABEL
   View.Matrix[1:(d+1),2]<-rep(paste(0,0,"ORIG", sep=","),d+1)

-  
   Graph.Matrix<-data.frame(ACC=View.Matrix[!duplicated(View.Matrix[,2]),1],
                            GRAPHLABEL=View.Matrix[!duplicated(View.Matrix[,2]),2],
--- 491,506 ----

      if(!accept)
!      {print("not accepting the subclusters of cluster") 
!       print(GRAPH.LABEL[a])
        v.stack[!is.na(M),1]<-0                                     
        ## the children are not accepted, so set their acc var to 0
        View.Matrix[!is.na(N),1]<-0}}}}
! 
   ## v.stack has been completely traversed 
   View.Matrix<-View.Matrix[,-3]                                    
!  ## deleting archaic numbering scheme and putting in labels 
   View.Matrix[,2]<-GRAPH.LABEL
   View.Matrix[1:(d+1),2]<-rep(paste(0,0,"ORIG", sep=","),d+1)

   Graph.Matrix<-data.frame(ACC=View.Matrix[!duplicated(View.Matrix[,2]),1],
                            GRAPHLABEL=View.Matrix[!duplicated(View.Matrix[,2]),2],
***************
*** 513,530 ****
   write.table(Graph.Matrix,file="Graph.Data",quote=F,sep="\t",col.names=TRUE,row.names=F)

- 
   View.Matrix<-data.frame(ACC=View.Matrix[,1],
                           CLUSTERLABEL=view.single.LABEL,
-                          #GENENAME=gene.names[as.numeric(View.Matrix[,4])],
                           CLUSTERDIMENSION=View.Matrix[,3],
                           GENENUM=as.numeric(View.Matrix[,4]))
   write.table(View.Matrix, file="User.Data", quote=F,sep="\t",col.names=TRUE,row.names=F)
! 
!  rm(View.Matrix,Graph.Matrix) 
! #return(v.stack,View.Matrix,Graph.Matrix)
! tmp.tstat.matrix<-tstat.matrix[!is.na(tstat.matrix[,1]),]
! return(tmp.tstat.matrix)}
! 
! 
  ####################################################################################
  ## The permute function permutes the data file generated by scan.data by permuting
--- 510,519 ----
   write.table(Graph.Matrix,file="Graph.Data",quote=F,sep="\t",col.names=TRUE,row.names=F)

   View.Matrix<-data.frame(ACC=View.Matrix[,1],
                           CLUSTERLABEL=view.single.LABEL,
                           CLUSTERDIMENSION=View.Matrix[,3],
                           GENENUM=as.numeric(View.Matrix[,4]))
   write.table(View.Matrix, file="User.Data", quote=F,sep="\t",col.names=TRUE,row.names=F)
! rm(View.Matrix,Graph.Matrix)}
  ####################################################################################
  ## The permute function permutes the data file generated by scan.data by permuting
***************
*** 546,624 ****
   check.vector<-vector(mode="character",length=(num.cols-1))
   for(c in 2:num.cols)
!   {check.vector[c]<-round(mean(perm.data[,c]),5)==round(mean(data.file[,c]),5)
     if(check.vector[c]!="TRUE")
       {print("permute isn't working right")
        return(c)}} 
!  return(perm.data,check.vector)}  
! 
! ## This function creates a "dist" matrix using the given
! ## gene expression matrix and code for distance metric 
! ## "cos"  ==> 1-cosine of the angle between the two vectors
! ## "pcos" ==> penalized cosine metric (Brian's)
! ## "cor"  ==> abs(cor-1) Pearson's correlation coefficient

  other.dist.metrics<-function(expression.matrix,dist.metric)
   {library(mva)
    EM<-expression.matrix
!   ##EM does not have a label row!##
!   EM<-EM[-1,]
    EM<-EM[,-1] ##delete gene.name column
-   print(dim(EM))
    r<-dim(EM)[1]
    c<-dim(EM)[2]
    DM<-matrix(ncol=r,nrow=r)

!   if(dist.metric=="eucna") 
!    {for(j in 1:r)
!      {for(i in j:r)
!       {##print(paste(j,i))
!        x<-EM[j,]
!        y<-EM[i,]
!        na.posn<-vector(mode="numeric") ##records posn of na's##
!        na.posn<-NULL
!        for(k in 1:length(x))
!         {if(is.na(x[k]) || is.na(y[k]))
!            na.posn<-c(na.posn,k)}
!        if(length(na.posn)==0)
!           {tmp<-rbind(x,y)
!            distxy<-dist(tmp,method="euclidean")}
!        if(length(na.posn)>0)
!          {tmp<-rbind(x,y)
!           nona.tmp<-tmp[,-(na.posn)]
!           distxy<-dist(nona.tmp,method="euclidean")}
!           ##col.dist<-(tmp.dist^2)/(dim(tmp)[2]-length(na.posn)) ## num of non-NA cols ##
!           ##distxy<-sqrt(tmp.dist^2+(col.dist*length(na.posn)))
!         DM[i,j]<-distxy}}}
! 
!   
!   if(dist.metric=="pcos") 
!     {for(j in 1:r)
!       {for(i in j:r)
!         {x<-EM[j,]
!          y<-EM[i,]
!          distxy<-munneke.dist(x,y)
!          DM[i,j]<-distxy}}}
!   
!   if(dist.metric=="cos")
!     {for(j in 1:r)
!       {for(i in j:r)
!         {#print(paste("j=",j,"i=",i))
!          x<-EM[j,]
!          y<-EM[i,]
!          distxy<-cos.dist(x,y)
!          DM[i,j]<-distxy}}}
!     
! 
!   if(dist.metric=="cor")
!     {for(j in 1:r)
!       {for(i in j:r)
!         {print(paste("j=",j,"i=",i))
!          x<-EM[j,]
!          y<-EM[i,]
!          distxy<-abs(1-cor(x,y,use="pairwise.complete.obs"))
!          DM[i,j]<-distxy}}}

  A<-as.dist(DM)
- #print("finished getting distance matrix")  
  return(DM,A)}

--- 535,587 ----
   check.vector<-vector(mode="character",length=(num.cols-1))
   for(c in 2:num.cols)
!   {check.vector[c]<-round(mean(perm.data[,c],na.rm=T),5)==round(mean(data.file[,c],na.rm=T),5)
     if(check.vector[c]!="TRUE")
       {print("permute isn't working right")
        return(c)}} 
!   return(perm.data,check.vector)}  

+ ##########################################################################
+ ## The other.dist.metrics  function creates a "dist" matrix             ##  
+ ## using the given gene expression matrix and code for distance metric  ##
+ ## "cos"  ==> 1-cosine of the angle between the two vectors             ##
+ ## "pcos" ==> penalized cosine metric (Brian Munneke)                   ##
+ ## "cor"  ==> abs(cor-1) Pearson's correlation coefficient              ##
+ ##########################################################################
  other.dist.metrics<-function(expression.matrix,dist.metric)
   {library(mva)
    EM<-expression.matrix
!   ##labels<-key[EM[,1],2]
!   ##labels<-EM[,1]
    EM<-EM[,-1] ##delete gene.name column
    r<-dim(EM)[1]
    c<-dim(EM)[2]
    DM<-matrix(ncol=r,nrow=r)

!   for(j in 1:r)
!    {for(i in j:r)
!      {x<-EM[j,]
!       y<-EM[i,]
!       na.posn<-vector(mode="numeric")  ##record positions of NA's##
!       na.posn<-NULL
!       for(k in 1:length(x))
!        {if(is.na(x[k]) || is.na(y[k]))
!           na.posn<-c(na.posn,k)}
!       if(length(na.posn)==0)
!         tmpd<-rbind(x,y)
!       if(length(na.posn)>0)
!         {tmp1<-rbind(x,y)
!          nona.tmp<-tmp1[,-(na.posn)]  ##do not include NA's##
!          tmpd<-nona.tmp}
!       if(dist.metric=="euc") 
!         distxy<-dist(tmpd,method="euclidean")
!       if(dist.metric=="pcos")  
!         distxy<-munneke.dist(tmpd[1,],tmpd[2,])
!       if(dist.metric=="cos")
!         distxy<-cos.dist(tmpd[1,],tmpd[2,])
!       if(dist.metric=="cor")
!         distxy<-abs(1-cor(x,y,use="pairwise.complete.obs"))
!       DM[i,j]<-distxy}}

  A<-as.dist(DM)
  return(DM,A)}

***************
*** 627,644 ****
  ########################################################################
  munneke.dist<-function(x,y)
! {sumxy<-0
!  sumx<-0
!  sumy<-0
!  diffxy<-0
!  for(i in 1:length(x))
!   {sumxy<-sumxy+(x[i]*y[i])
!    sumx<-sumx+x[i]^2
!    sumy<-sumy+y[i]^2
!    diffxy<-diffxy+abs(x[i]-y[i])}
!    cosxy<-sumxy/(sqrt(sumx)*sqrt(sumy))
!    p<-diffxy/length(x)
!    p<-2*(p/(1+p))
!    distxy<-1-cosxy+p
!    return(distxy)}

  ########################################################################
--- 590,599 ----
  ########################################################################
  munneke.dist<-function(x,y)
!  {cosxy<-sum(x*y)/sqrt(sum(x^2)*sum(y^2)) 
!   diffxy<-sum(abs(x-y))
!   p<-diffxy/length(x)
!   p<-2*(p/(1+p))
!   pcosxy<-1-cosxy+p
!   return(pcosxy)}

  ########################################################################
***************
*** 646,672 ****
  ########################################################################
  cos.dist<-function(x,y)
! {sumxy<-0
!  sumx<-0
!  sumy<-0
!  diffxy<-0
!  for(i in 1:length(x))
!   {sumxy<-sumxy+(x[i]*y[i])
!    sumx<-sumx+x[i]^2
!    sumy<-sumy+y[i]^2
!    diffxy<-diffxy+abs(x[i]-y[i])}
!    tmpcosxy<-sumxy/(sqrt(sumx)*sqrt(sumy))
!    cosxy<-1-tmpcosxy
!    return(cosxy)}
! 
! 
! 
! 
! 
! 
! 
! 
! 
! 
! 
! 
! 
--- 601,604 ----
  ########################################################################
  cos.dist<-function(x,y)
! {cosxy<-sum(x*y)/sqrt(sum(x^2)*sum(y^2)) 
!  return(1-cosxy)}