Dear CDK-Developer,

This modules calculates
similarity/distance/dis-similarity between two molecules.

I have used this in the "Pathway Hunter Tool" and I would like to contribute it to CDK.

Kindly feel free to get back.

Best regards,

Asad





import java.io.*;
import java.util.*;

/*  $RCSfile: BinaryCoefficient.java,v $
 *  $Author: $
 *  $Date:  $
 *  $Revision: $
 *
 *  Copyright (C) 2005  The Chemistry Development Kit (CDK) project
 *
 *  Contact: cdk-devel@lists.sourceforge.net
 *
 *  This program is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public License
 *  as published by the Free Software Foundation; either version 2.1
 *  of the License, or (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 *
 */

/**
 * Module to calculate similarity/distance/dis-similarity between two molecules
 *
 * @author         asad.rahman@uni-koeln.de
 * @cdk.created    2005-05-09
 * @cdk.module     
 */

*
* @param  BitSet  FingerPrint of Molecule A
* @param  BitSet  FingerPrint of Molecule B
* @param  int     Size of the bitset 
* @exception CDKException Exception thrown if something wrong with parameters 
* @return                 
*/

public class BinaryCoefficient {
  
    private int OnA = 0;
    private int OnAorOnB = 0;
    private int OnB = 0;
    private int OnAandOnB = 0;
    private int Sum = 0;
    private int N = 0;


    public BinaryCoefficient (BitSet A, BitSet B, int size) throws IOException {

        int counter = 0;
        N = size;

      /**
       * Count the the number of Bits "on" on A
       */

        for (int i = 0; i <= A.size(); i++) {
            if (A.get(i))
                ++counter;
        }
        OnA = counter;


      /**
       * Count the the number of Bits "on" on B
       */
        counter = 0;
        for (int i = 0; i <= B.size(); i++) {
            if (B.get(i))
                ++counter;
        }

        OnB = counter;

        /**
         *  AND operation for A AND B
         */

        BitSet AANDB = (BitSet) A.clone();
        
        AANDB.and(B);
        counter = 0;

        /**
         * Count the bits True in both the molecules after performing AND operation. Number of Bits "on" on A and B
         */

        for (int i = 0; i <= AANDB.size(); i++) {
            boolean state = AANDB.get(i);
            if (AANDB.get(i))
                ++counter;
        }

        OnAandOnB = counter;

        /**
         *  OR operation for A U B
         */

        BitSet AORB = (BitSet)A.clone();
                
        AORB.or(B);       
        
        counter=0;
       
       /**
         * Count the bits True in both the molecules after performing OR operation
         */
      
       for (int i=0; i<= AORB.size(); i++) {
            
            if (AORB.get(i))
               ++counter;
        
        }
        
        /**
         *  Number of Bits "off" on A and B. Here this means OnAorOnB = OnA + OnB - OnAandOnB  and using set theory d= n - |a + b -c|
         */

         OnAorOnB = counter; 
       
         Sum = size - OnAorOnB;     
        
        //    System.out.println("A " + A + " len " + A.length() + "\n B " + B + " len " + B.length() + " \n on A " + OnA + " \n on B " +  OnB +  "  \n AANDB " + AANDB + "\n  OnAandOnB " + OnAandOnB  + " \n OnAorOnB " + OnAorOnB );
    }


/**
*  Number of bits True in Molecule A
*/
 
 public int getA() throws IOException {
        return OnA;
    }

/**
*  Number of bits True in Molecule B
*/

 public int getB() throws IOException {
        return OnB;
    }


/**
*  Number of bits True in Molecule A AND Molecule B
*/
    public int getAB() throws IOException {
        return OnAandOnB;
    }

/**
 *  Number of bits True in Molecule A OR Molecule B
 */
    public int getAUB() throws IOException {
        return OnAorOnB;
    }


  /**
   *  Returns inter-molecular dissimilarity Cosine coefficient between Molecule A and B
   */
    public double getCosine() throws IOException {
        return (Math.sqrt((double) OnAorOnB / ((double) OnA + (double) OnB)));
    }

   /**
    *  Returns Dice similarity coefficient  between Molecule A and B
    */

    public double getDice() throws IOException {
        return (2 * (double) OnAorOnB / ((double) OnA + (double) OnB));
    }

    /**
     *  Returns  Euclidean distance between Molecule A and B
     */

    public double getEuclidean() throws IOException {
        return (Math.sqrt((double) OnAorOnB - (double) OnAandOnB));
    }
   
   /**
    *  Returns Hamming distance between Molecule A and B
    */

    public double getHamming() throws IOException {
        return (((double) OnAorOnB - (double) OnAandOnB)) / N; // Normalized the score by dividing it with size = N
    }

   /**
    *  Returns Soergel distance between Molecule A and B
    */
    public double getSoergel() throws IOException {
        return (((double) OnAorOnB - (double) OnAandOnB) / (double) OnAorOnB);
    }
    
    /**
     *  Returns Tanimoto similarity between Molecule A and B
     */

    public double getTanimoto() throws IOException {
        return ((double) OnAandOnB / (double) OnAorOnB);
    }

  /**
    *  Returns number of bits "off" in Molecule A U and Molecule B such that d= n - |a + b -c|
    */
    public int getSum() throws IOException {
        return Sum;
    }

}