From: Christoph S. <ste...@us...> - 2002-10-04 13:40:32
|
Update of /cvsroot/cdk/cdk/src/org/openscience/cdk/isomorphism/mcss In directory usw-pr-cvs1:/tmp/cvs-serv25572/mcss Added Files: RGraph.java RMap.java RNode.java RTools.java Log Message: New UniversalIsomorphismTester class for Isomorphism testing, subgraph isomorphism testing and maximum common subgraph isomorphism detection. Thanks to Thierry Hanser and Stephane Werner from the IXELIS company. --- NEW FILE: RGraph.java --- /* $RCSfile: RGraph.java,v $ * $Author: steinbeck $ * $Date: 2002/10/04 13:40:28 $ * $Revision: 1.1 $ * * Copyright (C) 1997-2002 The Chemistry Development Kit (CDK) project * * This code has been kindly provided by Stephane Werner * and Thierry Hanser from IXELIS ma...@ix... * * IXELIS sarl - Semantic Information Systems * 17 rue des Cèdres 67200 Strasbourg, France * Tel/Fax : +33(0)3 88 27 81 39 Email: ma...@ix... * * CDK Contact: cdk...@li... * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2.1 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * */ package org.openscience.cdk.isomorphism.mcss; import java.util.List; import java.util.ArrayList; import java.util.Iterator; import java.util.ListIterator; import java.util.BitSet; /** * This class implements the Resolution Graph (RGraph). * The RGraph is a graph based representation of the search problem. * An RGraph is constructred from the two compared graphs (G1 and G2). * Each vertex (node) in the RGraph represents a possible association * from an edge in G1 with an edge in G2. Thus two compatible bonds * in two molecular graphs are represented by a vertex in the RGraph. * Each edge in the RGraph corresponds to a common adjacency relationship * between the 2 couple of compatible edges associated to the 2 RGraph nodes * forming this edge. * * Exeample: G1 : C-C=O and G2 : C-C-C=0 * 1 2 3 1 2 3 4 * * The resulting RGraph(G1,G2) will contain 3 nodes: * * Node A : association between bond C-C : 1-2 in G1 and 1-2 in G2 * Node B : association between bond C-C : 1-2 in G1 and 2-3 in G2 * Node C : association between bond C=0 : 2-3 in G1 and 3-4 in G2 * The RGraph will also contain one edge representing the * adjacency between node B and C that is : bonds 1-2 and 2-3 in G1 * and bonds 2-3 and 3-4 in G2. * * Once the RGraph has been built from the two compared graphs * it becomes a very interesting tool to perform all kinds of * structural search (isomorphism, substructure search, maximal common * substructure,....). * * The search may be constrained by mandatory elements (e.g. bonds that * have to be present in the mapped common substructures). * * Performing a query on an RGraph requires simply to set the constrains * (if any) and to invoke the parsing method (parse()) * * The RGraph has been designed to be a generic tool. It may be constructed * from any kind of source graphs, thus it is not restricted to a chemical * context. * * The RGraph model is indendant from the CDK model and the link between * both model is performed by the RTools class. In this way the RGraph * class may be reused in other graph context (conceptual graphs,....) * * /!\ Important note: This implementation of the algorithm has not been * optimized for speed at this stage. It has been * written with the goal to clearly retrace the * principle of the underlined search method. There is * room for optimization in many ways including the * the algorithm itself. * * The general algoritm derives from "Machine Learning of * of generic Reactions : 3. An efficient Algorithm for Maximal Common * Substructure determination" C. Tonnelier, Ph. Jauffret, T. Hanser * and G. Kaufmann. Tetrahedron Vol. 3, No 6, pp. 351-358, 1990. * and modified in the These of T. Hanser "Apprentissage automatique * de méthodes de synthèse à partir d'exemples". Université Louis Pasteur * STRASBOURG 1993. * * @author Stephane Werner from IXELIS ma...@ix... * @created 17 juillet 2002 */ public class RGraph { // an RGraph is a list of RGraph nodes // each node keeping track of its // neighbours. List graph = null; // maximal number of iterations before // search break. int maxIteration = 10000; // dimensions of the compared graphs int firstGraphSize = 0; int secondGraphSize = 0; // constrains BitSet c1 = null; BitSet c2 = null; // current solution list List solutionList = null; // flag to define if we want to get all possible 'mappings' boolean findAllMap = false; // flag to define if we want to get all possible 'structures' boolean findAllStructure = true; // working variables boolean stop = false; int nbIteration = 0; BitSet graphBitSet = null; /** * Constructor for the RGraph object * Create an empty RGraph. */ public RGraph() { graph = new ArrayList(); solutionList = new ArrayList(); graphBitSet = new BitSet(); } /** * Returns the size of the first of the two * compared graphs * @return The size of the first of the two compared graphs */ public int getFirstGraphSize() { return firstGraphSize; } /** * Returns the size of the second of the two * compared graphs * @return The size of the second of the two compared graphs */ public int getSecondGraphSize() { return secondGraphSize; } /** * Sets the size of the first of the two * compared graphs * @param n1 The size of the second of the two compared graphs */ public void setFirstGraphSize(int n1) { firstGraphSize = n1; } /** * Returns the size of the second of the two * compared graphs * @param n2 The size of the second of the two compared graphs */ public void setSecondGraphSize(int n2) { secondGraphSize = n2; } /** * Reinitialisation of the TGraph */ public void clear() { graph.clear(); graphBitSet.clear(); } /** * Returns the graph object of this RGraph * @return The graph object, a list */ public List getGraph() { return this.graph; } /** * Adds a new node to the RGraph * @param newNode The node to add to the graph */ public void addNode(RNode newNode) { graph.add(newNode); graphBitSet.set(graph.size() - 1); } /** * Parsing of the RGraph. This is the main method * to perform a query. Given the constrains c1 and c2 * defining mandatory elements in G1 and G2 and given * the search options, this méthod builds an initial set * of starting nodes (B) and parses recursively the * RGraph to find a list of solution according to * these parameters. * * @param c1 constrain on the graph G1 * @param c2 constrain on the graph G2 * @param findAllStructure true if we want all results to be generated * @param findAllMap true is we want all possible 'mappings' */ public void parse(BitSet c1, BitSet c2, boolean findAllStructure, boolean findAllMap) { // initialize the list of solution solutionList.clear(); // builds the set of starting nodes // according to the constrains BitSet b = buildB(c1, c2); // setup options setAllStructure(findAllStructure); setAllMap(findAllMap); // parse recursively the RGraph parseRec(new BitSet(b.size()), b, new BitSet(b.size())); } /** * Parsing of the RGraph. This is the recursive method * to perform a query. The method will recursively * parse the RGraph thru connected nodes and visiting the * RGraph using allowed adjacency relationship. * * @param traversed node already parsed * @param extension possible extension node (allowed neighbours) * @param forbiden node forbiden (set of node incompatible with the current solution) */ private void parseRec(BitSet traversed, BitSet extension, BitSet forbidden) { BitSet newTraversed = null; BitSet newExtension = null; BitSet newForbidden = null; BitSet potentialNode = null; // if there is no more extension possible we // have reached a potential new solution if(extension.isEmpty()) { solution(traversed); } // carry on with each possible extension else { // calculates the set of nodes that may still // be reached at this stage (not forbiden) potentialNode = ((BitSet) graphBitSet.clone()); potentialNode.andNot(forbidden); potentialNode.or(traversed); // checks if we must continue the search // according to the potential node set if(mustContinue(potentialNode)) { // carry on research and update iteration count nbIteration++; // for each node in the set of possible extension (neighbours of // the current partial solution, include the node to the solution // and perse recursively the RGraph with the new context. for(int x = extension.nextSetBit(0); x >= 0 && !stop; x = extension.nextSetBit(x + 1)) { // evaluates the new set of forbidden nodes // by including the nodes not compatible with the // newly accepted node. newForbidden = (BitSet) forbidden.clone(); newForbidden.or(((RNode) graph.get(x)).forbidden); // if it is the first time we are here then // traversed is empty and we initialize the set of // possible extensions to the extension of the first // accepted node in the solution. if(traversed.isEmpty()) { newExtension = (BitSet) (((RNode) graph.get(x)).extension.clone()); } // else we simply update the set of solution by // including the neighbours of the newly accepted node else { newExtension = (BitSet) extension.clone(); newExtension.or(((RNode) graph.get(x)).extension); } // extension my not contain forbidden nodes newExtension.andNot(newForbidden); // create the new set of traversed node // (update current partial solution) // and add x to the set of forbidden node // (a node may only appear once in a solution) newTraversed = (BitSet) traversed.clone(); newTraversed.set(x); forbidden.set(x); // parse recursively the RGraph parseRec(newTraversed, newExtension, newForbidden); } } } } /** * Checks if a potantial solution is a real one * (not included in a previous solution) * and add this solution to the solution list * in case of success. * * @param traversed new potential solution */ private void solution(BitSet traversed) { boolean included = false; BitSet projG1 = projectG1(traversed); BitSet projG2 = projectG2(traversed); // the solution must follows the search constrains // (must contain the mandatory elements in G1 an G2) if(isContainedIn(c1, projG1) && isContainedIn(c2, projG2)) { // the solution should not be included in a previous solution // at the RGraph level. So we check against all prevous solution // On the other hand if a previous solution is included in the // new one, the previous solution is removed. for(ListIterator i = solutionList.listIterator(); i.hasNext() && !included; ) { BitSet sol = (BitSet) i.next(); if(!sol.equals(traversed)) { // if we asked to save all 'mappings' then keep this mapping if(findAllMap && (projG1.equals(projectG1(sol)) || projG2.equals(projectG2(sol)))) { // do nothing } // if the new solution is included mark it as included else if(isContainedIn(projG1, projectG1(sol)) || isContainedIn(projG2, projectG2(sol))) { included = true; } // if the previous solution is contained in the new one, remove the previous solution else if(isContainedIn(projectG1(sol), projG1) || isContainedIn(projectG2(sol), projG2)) { i.remove(); } } else { // solution already exists included = true; } } if(included == false) { // if it is really a new solution add it to the // list of current solution solutionList.add(traversed); } if(!findAllStructure) { // if we need only one solution // stop the search process // (e.g. substructure search) stop = true; } } } /** * Determine if there are potential soltution remaining * @param potentialNode set of remaining potential nodes * @return true if it is worse to continue the search */ private boolean mustContinue(BitSet potentialNode) { boolean result = true; boolean cancel = false; BitSet projG1 = projectG1(potentialNode); BitSet projG2 = projectG2(potentialNode); // if we reached the maximum number of // serach iterations than do not continue if(nbIteration >= maxIteration) { return false; } // if constrains may no more be fullfilled than stop. if(!isContainedIn(c1, projG1) || !isContainedIn(c2, projG2)) { return false; } // check if the solution potential is not included in an already // existing solution for(Iterator i = solutionList.iterator(); i.hasNext() && !cancel; ) { BitSet sol = (BitSet) i.next(); // if we want every 'mappings' do not stop if(findAllMap && (projG1.equals(projectG1(sol)) || projG2.equals(projectG2(sol)))) { // do nothing } // if it is not possible to do better than an already existing solution than stop. else if(isContainedIn(projG1, projectG1(sol)) || isContainedIn(projG2, projectG2(sol))) { result = false; cancel = true; } } return result; } /** * Builds the initial extension set. This is the * set of node tha may be used as seed for the * RGraph parsing. This set depends on the constrains * defined by the user. * @param c1 constraint in the graph G1 * @param c2 constraint in the graph G2 * @return */ private BitSet buildB(BitSet c1, BitSet c2) { this.c1 = c1; this.c2 = c2; BitSet bs = new BitSet(); // only nodes that fulfill the initial constrains // are allowed in the initial extension set : B for(Iterator i = graph.iterator(); i.hasNext(); ) { RNode rn = (RNode) i.next(); if((c1.get(rn.rMap.id1) || c1.isEmpty()) && (c2.get(rn.rMap.id2) || c2.isEmpty())) { bs.set(graph.indexOf(rn)); } } return bs; } /** * Returns the list of solutions * * @return The solution list */ public List getSolutions() { return solutionList; } /** * Converts a RGraph bitset (set of RNode) * to a list of RMap that represents the * mapping between to substructures in G1 and G2 * (the projection of the RGraph bitset on G1 * and G2) * * @param set the BitSet * @return the RMap list */ public List bitSetToRMap(BitSet set) { List rMapList = new ArrayList(); for(int x = set.nextSetBit(0); x >= 0; x = set.nextSetBit(x + 1)) { RNode xNode = (RNode) graph.get(x); rMapList.add(xNode.rMap); } return rMapList; } /** * Sets the 'AllStructres' option. If true * all possible solutions will be generated. If false * the search will stop as soon as a solution is found. * (e.g. when we just want to know if a G2 is * a substructure of G1 or not) * * @param findAllStructure */ public void setAllStructure(boolean findAllStructure) { this.findAllStructure = findAllStructure; } /** * Sets the 'finAllMap' option. If true * all possible 'mappings' will be generated. If false * the search will keep only one 'mapping' per structure * association. * * @param findAllMap */ public void setAllMap(boolean findAllMap) { this.findAllMap = findAllMap; } /** * Sets the maxIteration for the RGraph parsing * * @param it The new maxIteration value */ public void setMaxIteration(int it) { this.maxIteration = it; } /** * Returns a string representation of the RGraph * @return the string representation of the RGraph */ public String toString() { String message = ""; int j = 0; for(Iterator i = graph.iterator(); i.hasNext(); ) { RNode rn = (RNode) i.next(); message += "-------------\n" + "RNode " + j + "\n" + rn.toString() + "\n"; j++; } return message; } ///////////////////////////////// // BitSet tools /** * Projects a RGraph bitset on the source graph G1 * @param set RGraph BitSet to project * @return The associate BitSet in G1 */ public BitSet projectG1(BitSet set) { BitSet projection = new BitSet(firstGraphSize); RNode xNode = null; for(int x = set.nextSetBit(0); x >= 0; x = set.nextSetBit(x + 1)) { xNode = (RNode) graph.get(x); projection.set(xNode.rMap.id1); } return projection; } /** * Projects a RGraph bitset on the source graph G2 * @param set RGraph BitSet to project * @return The associate BitSet in G2 */ public BitSet projectG2(BitSet set) { BitSet projection = new BitSet(secondGraphSize); RNode xNode = null; for(int x = set.nextSetBit(0); x >= 0; x = set.nextSetBit(x + 1)) { xNode = (RNode) graph.get(x); projection.set(xNode.rMap.id2); } return projection; } /** * Test if set A is contained in set B * @param A a bitSet * @param B a bitSet * @return true if A is contained in B */ private boolean isContainedIn(BitSet A, BitSet B) { boolean result = false; if(A.isEmpty()) { return true; } BitSet setA = (BitSet) A.clone(); setA.and(B); if(setA.equals(A)) { result = true; } return result; } } --- NEW FILE: RMap.java --- /* * $RCSfile: RMap.java,v $ * $Author: steinbeck $ * $Date: 2002/10/04 13:40:29 $ * $Revision: 1.1 $ * * Copyright (C) 1997-2002 The Chemistry Development Kit (CDK) project * * This code has been kindly provided by Stephane Werner * and Thierry Hanser from IXELIS ma...@ix... * * IXELIS sarl - Semantic Information Systems * 17 rue des Cèdres 67200 Strasbourg, France * Tel/Fax : +33(0)3 88 27 81 39 Email: ma...@ix... * * CDK Contact: cdk...@li... * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2.1 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * */ package org.openscience.cdk.isomorphism.mcss; /** * An RMap implements the association between an edge (bond) in G1 and an edge * (bond) in G2, G1 and G2 being the compared graphs in a RGraph context. * *@author Stephane Werner from IXELIS ma...@ix... *@created 24 juillet 2002 */ public class RMap { int id1 = 0; int id2 = 0; /** * Constructor for the RMap * *@param id1 number of the edge (bond) in the graphe 1 *@param id2 number of the edge (bond) in the graphe 2 */ public RMap(int id1, int id2) { this.id1 = id1; this.id2 = id2; } /** * Sets the id1 attribute of the RMap object * *@param id1 The new id1 value */ public void setId1(int id1) { this.id1 = id1; } /** * Sets the id2 attribute of the RMap object * *@param id2 The new id2 value */ public void setId2(int id2) { this.id2 = id2; } /** * Gets the id1 attribute of the RMap object * *@return The id1 value */ public int getId1() { return id1; } /** * Gets the id2 attribute of the RMap object * *@return The id2 value */ public int getId2() { return id2; } } --- NEW FILE: RNode.java --- /* * $RCSfile: RNode.java,v $ * $Author: steinbeck $ * $Date: 2002/10/04 13:40:29 $ * $Revision: 1.1 $ * * Copyright (C) 1997-2002 The Chemistry Development Kit (CDK) project * * This code has been kindly provided by Stephane Werner * and Thierry Hanser from IXELIS ma...@ix... * * IXELIS sarl - Semantic Information Systems * 17 rue des Cèdres 67200 Strasbourg, France * Tel/Fax : +33(0)3 88 27 81 39 Email: ma...@ix... * * CDK Contact: cdk...@li... * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2.1 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * */ package org.openscience.cdk.isomorphism.mcss; import java.util.BitSet; /** * Node of the resolution graphe (RGraph) An RNode represents an association * betwwen two edges of the source graphs G1 and G2 that are compared. Two * edges may be associated if they have at least one common feature. The * association is defined outside this class. The node keeps tracks of the ID * of the mapped edges (in an RMap), of its neighbours in the RGraph it belongs * to and of the set of incompatible nodes (nodes that may not be along with * this node in the same solution) * *@author Stephane Werner from IXELIS ma...@ix... *@created 17 juillet 2002 */ public class RNode { // G1/G2 mapping RMap rMap = null; // set of neighbour nodes in the RGraph BitSet extension = null; // set of incompatible nodes in the RGraph BitSet forbidden = null; /** * Constructor for the RNode object * *@param id1 number of the bond in the graphe 1 *@param id2 number of the bond in the graphe 2 */ public RNode(int id1, int id2) { rMap = new RMap(id1, id2); extension = new BitSet(); forbidden = new BitSet(); } /** * Sets the rMap attribute of the RNode object * *@param rMap The new rMap value */ public void setRMap(RMap rMap) { this.rMap = rMap; } /** * Sets the extension attribute of the RNode object * *@param extension The new extension value */ public void setExtension(BitSet extension) { this.extension = extension; } /** * Sets the forbidden attribute of the RNode object * *@param forbidden The new forbidden value */ public void setForbidden(BitSet forbidden) { this.forbidden = forbidden; } /** * Gets the rMap attribute of the RNode object * *@return The rMap value */ public RMap getRMap() { return rMap; } /** * Gets the extension attribute of the RNode object * *@return The extension value */ public BitSet getExtension() { return extension; } /** * Gets the forbidden attribute of the RNode object * *@return The forbidden value */ public BitSet getForbidden() { return forbidden; } /** * Returns a string representation of the RNode * *@return the string representation of the RNode */ public String toString() { return ("id1 : " + rMap.id1 + ", id2 : " + rMap.id2 + "\n" + "extension : " + extension + "\n" + "forbiden : " + forbidden); } } --- NEW FILE: RTools.java --- /* $RCSfile: RTools.java,v $ * $Author: steinbeck $ * $Date: 2002/10/04 13:40:29 $ * $Revision: 1.1 $ * * Copyright (C) 1997-2002 The Chemistry Development Kit (CDK) project * * This code has been kindly provided by Stephane Werner * and Thierry Hanser from IXELIS ma...@ix... * * IXELIS sarl - Semantic Information Systems * 17 rue des Cèdres 67200 Strasbourg, France * Tel/Fax : +33(0)3 88 27 81 39 Email: ma...@ix... * * CDK Contact: cdk...@li... * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2.1 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * */ package org.openscience.cdk.isomorphism.mcss; import java.util.*; import java.awt.Dimension; import javax.swing.JPanel; import javax.swing.JFrame; import java.io.*; import org.openscience.cdk.*; import org.openscience.cdk.renderer.*; import org.openscience.cdk.geometry.*; import org.openscience.cdk.io.*; import org.openscience.cdk.exception.*; /** * This class implements a multipurpose structure comparaison tool. * It allows to find maximal common substructure, find the * mapping of a substructure in another structure, and the mapping of * 2 isomorph structure. * Structure comparaison may be associated to bond constraints * (mandatory bonds, e.g. scaffolds, reaction cores,...) on each source graph. * The constraint flexibility allows a number of interesting queries. * The substructure analysis relies on the RGraph generic class (see: RGraph) * This class implements the link between the RGraph model and the * the CDK model in this way the RGraph remains independant and may be used * in other contexts. * This algorithm derives from the algorithm described in "Machine Learning of * of generic Reactions : 3. An efficient Algorithm for Maximal Common * Substructure determination" C. Tonnelier, Ph. Jauffret, T. Hanser * and G. Kaufmann. Tetrahedron Vol. 3, No 6, pp. 351-358, 1990. * and modified in the These of T. Hanser "Apprentissage automatique * de méthodes de synthèse à partir d'exemples". Université Louis Pasteur * STRASBOURG 1993. * * < FONT COLOR="#FF0000"> * warning : As a result of the adjacency perception used in this algorithm * there is a single limitation : cyclopropane and isobutane are seen as isomorph * This is due to the fact that these two compounds are the only ones where * each bond is connected two each other bond (bonds are fully conected) * with the same number of bonds and still they have different structures * The algotihm could be easily enhanced with a simple atom mapping manager * to provide an atom level overlap definition that would reveal this case. * We decided not to penalize the whole procedure because of one single * exception query. Furthermore isomorphism may be discarded since the number of atoms are * not the same (3 != 4) and in most case this will be already * screened out by a fingerprint based filtering. * It is possible to add a special treatment for these special query. * < /FONT> * * @author Stephane Werner from IXELIS ma...@ix... * @created 17 juillet 2002 * */ public class RTools { final static int ID1 = 0; final static int ID2 = 1; /////////////////////////////////////////////////////////////////////////// // Query Methods // // This methods are simple applications of the RGraph model on atom containers // using different constrains and search options. They give an exemple of the // most common queries but of course it is possible to define other type of // queries exploiting the constrain and option combinations // //// // Isomorphism search /** * Tests if g1 and g2 are isomorph * * @param g1 first molecule * @param g2 second molecule * @return true if the 2 molecule are isomorph */ public static boolean isIsomorph(AtomContainer g1, AtomContainer g2) { return (getIsomorphMap(g1, g2) != null); } /** * Returns the first isomorph mapping found or null * * @param g1 first molecule * @param g2 second molecule * @return the first isomorph mapping found projected of g1 */ public static List getIsomorphMap(AtomContainer g1, AtomContainer g2) { List result = null; List rMapsList = search(g1, g2, getBitSet(g1), getBitSet(g1), false, false); if(!rMapsList.isEmpty()) result = (List) rMapsList.get(0); return result; } /** * Returns all the isomorph 'mappings' found between two * atom containers. * * @param g1 first molecule * @param g2 second molecule * @return the list of all the 'mappings' */ public static List getIsomorphMaps(AtomContainer g1, AtomContainer g2) { return search(g1, g2, getBitSet(g1), getBitSet(g1), true, true); } ///// // Subgraph search /** * Returns all the subgraph 'mappings' found for g2 in g1 * @param g1 first molecule * @param g2 second molecule * @return the list of all the 'mappings' found projected of g1 * (list of AtomContainer ) */ public static List getSubgraphMaps(AtomContainer g1, AtomContainer g2) { return search(g1, g2, new BitSet(), getBitSet(g2), true, true); } /** * Returns the first subgraph found for g2 in g1 * @param g1 first molecule * @param g2 second molecule * @return the first subgraph mapping found projected on g1 */ public static List getSubgraphMap(AtomContainer g1, AtomContainer g2) { List result = null; List rMapsList = search(g1, g2, new BitSet(), getBitSet(g2), false, false); if(!rMapsList.isEmpty()) result = (List) rMapsList.get(0); return result; } /** * Tests if g2 a subgraph of g1 * * @param g1 first molecule * @param g2 second molecule * @return true if g2 a subgraph on g1 */ public static boolean isSubgraph(AtomContainer g1, AtomContainer g2) { return (getSubgraphMap(g1, g2) != null); } //// // Maximum common substructure search /** * Returns all the maximal common substructure between 2 atom containers * * @param g1 first molecule * @param g2 second molecule * @return the list of all the maximal common substructure * found projected of g1 (list of AtomContainer ) */ public static List getOverlaps(AtomContainer g1, AtomContainer g2) { List rMapsList = search(g1, g2, new BitSet(), new BitSet(), true, false); // projection on G1 ArrayList graphList = projectList(rMapsList, g1, ID1); // reduction of set of solution (isomorphism and substructure // with different 'mappings' ArrayList reducedGraphList = getMaximum(graphList); return reducedGraphList; } ////////////////////////////////////////////////// // Internal methods /** * Builds the RGraph ( resolution graph ), from two atomContainer * (description of the two molecules to compare) * This is the interface point between the CDK model and * the generic MCSS algorithm based on the RGRaph. * @param g1 Description of the first molecule * @param g2 Description of the second molecule * @return the rGraph */ public static RGraph buildRGraph(AtomContainer g1, AtomContainer g2) { RGraph rGraph = new RGraph(); nodeConstructor(rGraph, g1, g2); arcConstructor(rGraph, g1, g2); return rGraph; } /** * Builds the nodes of the RGraph ( resolution graph ), from * two atom containers (description of the two molecules to compare) * * @param gr the target RGraph * @param ac1 description of the first molecule * @param ac2 description of the second molecule */ private static void nodeConstructor(RGraph gr, AtomContainer ac1, AtomContainer ac2) { // resets the target graph. gr.clear(); Bond[] bondsA1 = ac1.getBonds(); Bond[] bondsA2 = ac2.getBonds(); int k = 0; // compares each bond of G1 to each bond of G2 for(int i = 0; i < bondsA1.length; i++) { for(int j = 0; j < bondsA2.length; j++) { // if both bonds are compatible then create an association node // in the resolution graph if(bondsA1[i].getOrder() == bondsA2[j].getOrder() && ((bondsA1[i].getAtomAt(0).getSymbol().equals(bondsA2[j].getAtomAt(0).getSymbol()) && bondsA1[i].getAtomAt(1).getSymbol().equals(bondsA2[j].getAtomAt(1).getSymbol())) || (bondsA1[i].getAtomAt(0).getSymbol().equals(bondsA2[j].getAtomAt(1).getSymbol()) && bondsA1[i].getAtomAt(1).getSymbol().equals(bondsA2[j].getAtomAt(0).getSymbol())) ) ) { gr.addNode(new RNode(i, j)); } } } } /** * Build edges of the RGraphs * This method create the edge of the RGraph and * calculates the incompatibility and neighbourhood * relationships between RGraph nodes. * @param gr the rGraph * @param ac1 Description of the first molecule * @param ac2 Description of the second molecule */ private static void arcConstructor(RGraph gr, AtomContainer ac1, AtomContainer ac2) { // each node is incompatible with himself for(int i = 0; i < gr.graph.size(); i++) { RNode x = (RNode) gr.graph.get(i); x.forbidden.set(i); } Bond a1 = null; Bond a2 = null; Bond b1 = null; Bond b2 = null; Bond[] bondsA1 = ac1.getBonds(); Bond[] bondsA2 = ac2.getBonds(); gr.setFirstGraphSize(ac1.getBondCount()); gr.setSecondGraphSize(ac2.getBondCount()); for(int i = 0; i < gr.graph.size(); i++) { RNode x = (RNode) gr.graph.get(i); // two nodes are neighbours if their adjacency // relationship in are equivalent in G1 and G2 // else they are incompatible. for(int j = i + 1; j < gr.graph.size(); j++) { RNode y = (RNode) gr.graph.get(j); a1 = bondsA1[((RNode) gr.graph.get(i)).rMap.id1]; a2 = bondsA2[((RNode) gr.graph.get(i)).rMap.id2]; b1 = bondsA1[((RNode) gr.graph.get(j)).rMap.id1]; b2 = bondsA2[((RNode) gr.graph.get(j)).rMap.id2]; if(a1.equals(b1) || a2.equals(b2) || (!adjacency(a1, b1).equals(adjacency(a2, b2)))) { x.forbidden.set(j); y.forbidden.set(i); } else if(!adjacency(a1, b1).equals("")) { x.extension.set(j); y.extension.set(i); } } } } /** * Determines if 2 bond have 1 atom in common * * @param a first bond * @param b second bond * @return the symbol of the common atom or "" if * the 2 bonds have no common atom */ private static String adjacency(Bond a, Bond b) { String symbol = ""; if(a.contains(b.getAtomAt(0))) { symbol = b.getAtomAt(0).getSymbol(); } else if(a.contains(b.getAtomAt(1))) { symbol = b.getAtomAt(1).getSymbol(); } return symbol; } /** * Transforms an AtomContainer into a BitSet (which's size = number of bond * in the atomContainer, all the bit are set to true) * * @param ac AtomContainer to transform * @return The bitSet */ public static BitSet getBitSet(AtomContainer ac) { BitSet bs = null; int n = ac.getBondCount(); if(n != 0) { bs = new BitSet(n); bs.set(0, n, true); } else { bs = new BitSet(); } return bs; } /** * General Rgraph parsing method (usually not used directly) * This method is the entry point for the recursive search * adapted to the atom container input. * * @param g1 first molecule * @param g2 second molecule * @param c1 initial condition ( bonds from g1 that * must be contains in the solution ) * @param c2 initial condition ( bonds from g2 that * must be contains in the solution ) * @param findAllStructure if false stop at the first structure found * @param findAllMap if true search all the 'mappings' for one same * structure * @return a list of rMapList that represent the serach solutions */ public static List search(AtomContainer g1, AtomContainer g2, BitSet c1, BitSet c2, boolean findAllStructure, boolean findAllMap) { // reset result ArrayList rMapsList = new ArrayList(); // build the RGraph corresponding to this problem RGraph rGraph = buildRGraph(g1, g2); // parse the RGraph with the given constrains and options rGraph.parse(c1, c2, findAllStructure,findAllMap); List solutionList = rGraph.getSolutions(); // convertions of RGraph's internal solutions to G1/G2 mappings for(Iterator i = solutionList.iterator(); i.hasNext(); ) { BitSet set = (BitSet) i.next(); rMapsList.add(rGraph.bitSetToRMap(set)); } return rMapsList; } ////////////////////////////////////// // Manipulation tools /** * Projects a list of RMap on a molecule * * @param rMapList the list to project * @param g the molecule on which project * @param id the id in the RMap of the molecule g * @return an AtomContainer */ public static AtomContainer project(List rMapList, AtomContainer g, int id) { AtomContainer ac = new AtomContainer(); Bond[] bondList = g.getBonds(); Hashtable table = new Hashtable(); Atom a1 = null; Atom a2 = null; Atom a = null; Bond bond = null; for(Iterator i = rMapList.iterator(); i.hasNext(); ) { RMap rMap = (RMap) i.next(); if(RTools.ID1 == 0) { bond = bondList[rMap.id1]; } else { bond = bondList[rMap.id2]; } a = bond.getAtomAt(0); a1 = (Atom) table.get(a); if(a1 == null) { a1 = (Atom) a.clone(); ac.addAtom(a1); table.put(a, a1); } a = bond.getAtomAt(1); a2 = (Atom) table.get(a); if(a2 == null) { a2 = (Atom) a.clone(); ac.addAtom(a2); table.put(a, a2); } ac.addBond(new Bond(a1, a2, bond.getOrder())); } return ac; } /** * Project a list of RMapsList on a molecule * * @param rMapsList list of RMapsList to project * @param g the molecule on which project * @param id the id in the RMap of the molecule g * @return a list of AtomContainer */ public static ArrayList projectList(List rMapsList, AtomContainer g, int id) { ArrayList graphList = new ArrayList(); for(Iterator i = rMapsList.iterator(); i.hasNext(); ) { List rMapList = (List) i.next(); AtomContainer ac = project(rMapList, g, id); graphList.add(ac); } return graphList; } /** * remove all redundant solution * * @param graphList the list of structure to clean * @return the list cleaned */ private static ArrayList getMaximum(ArrayList graphList) { ArrayList reducedGraphList = (ArrayList) graphList.clone(); for(int i = 0; i < graphList.size(); i++) { AtomContainer gi = (AtomContainer) graphList.get(i); for(int j = i + 1; j < graphList.size(); j++) { AtomContainer gj = (AtomContainer) graphList.get(j); if(i != j) { // if G1 isomorph to G2 or G1 included in G2 then Gi // is discarded (redondant or irrevelant) if(isIsomorph(gi, gj) || isSubgraph(gi, gj)) { reducedGraphList.remove(gj); } } } } return reducedGraphList; } /** * Test utility on command line * * Usage : java RTools g1.mol g2.mol * */ public static void main(String[] args) { // loading the source graphs System.out.println("Loading... "); AtomContainer g1 = loadFile(new File(args[0])); AtomContainer g2 = loadFile(new File(args[1])); System.out.println("Searching... "); long start = System.currentTimeMillis(); // some trivial queries start = System.currentTimeMillis(); System.out.println("isIsomorph(g1,g2) : " + isIsomorph(g1,g2) + " (" + (System.currentTimeMillis()- start) + " ms)"); start = System.currentTimeMillis(); System.out.println("isIsomorph(g1,g1) : " + isIsomorph(g1,g1) + " (" + (System.currentTimeMillis()- start) + " ms)"); start = System.currentTimeMillis(); System.out.println("isIsomorph(g2,g2) : " + isIsomorph(g2,g2) + " (" + (System.currentTimeMillis()- start) + " ms)"); start = System.currentTimeMillis(); System.out.println("isSubGraph(g1,g2) : " + isSubgraph(g1,g2) + " (" + (System.currentTimeMillis()- start) + " ms)"); start = System.currentTimeMillis(); System.out.println("isSubGraph(g2,g1) : " + isSubgraph(g2,g1) + " (" + (System.currentTimeMillis()- start) + " ms)"); start = System.currentTimeMillis(); System.out.println("isSubGraph(g1,g1) : " + isSubgraph(g1,g1) + " (" + (System.currentTimeMillis()- start) + " ms)"); start = System.currentTimeMillis(); System.out.println("isSubGraph(g2,g2) : " + isSubgraph(g2,g2) + " (" + (System.currentTimeMillis()- start) + " ms)"); start = System.currentTimeMillis(); System.out.println("getOverlaps(g1,g2) : " + getOverlaps(g1,g2).size() + " (" + (System.currentTimeMillis()- start) + " ms)"); start = System.currentTimeMillis(); System.out.println("getOverlaps(g2,g1) : " + getOverlaps(g2,g1).size() + " (" + (System.currentTimeMillis()- start) + " ms)"); start = System.currentTimeMillis(); System.out.println("getOverlaps(g1,g1) : " + getOverlaps(g1,g1).size() + " (" + (System.currentTimeMillis()- start) + " ms)"); start = System.currentTimeMillis(); System.out.println("getOverlaps(g1,g1) : " + getOverlaps(g2,g2).size() + " (" + (System.currentTimeMillis()- start) + " ms)"); } /** * Test purpose file reading method * Load a molfile into an atom container * @param file the file to load */ private static AtomContainer loadFile(File file) { ChemFile outFile = null; ChemObjectReader reader = null; ChemFile chemFile = new ChemFile(); try { FileReader fileReader = null; if(file.exists()) { fileReader = new FileReader(file); reader = new MDLReader(fileReader); } else System.err.println("Target file doas not exist:" + file); try { outFile = (ChemFile) reader.read((ChemObject) new ChemFile()); } catch(UnsupportedChemObjectException ex) { System.out.println(ex); } fileReader.close(); } catch(IOException e) { System.out.println(e); } return outFile.getChemSequence(0).getChemModel(0).getAllInOneContainer(); } } |