|
From: Stephanie C. <SC...@sc...> - 2011-09-30 18:26:05
|
We are experiencing different results using the UniversalIsomorphismTester between CDK versions 1.0.3 and 1.4.4, particularly with aromatic groups. If the test code is correct then it is possible that version 1.4.4 is wrong. The test code and output is below.
public static void main(String[] args) {
String[] fgSmiles = { "O=C1C=CC=CC1=O", "O=C1C=CC(=O)C=C1", "ClC=N" };
// SIDs 26755257, 29215022
String[] testSmiles = {
"CC1C(C(CC(O1)OC2CC(CC3=C(C4=C(C(=C23)O)C(=O)C5=C(C4=O)C=CC=C5OC)O)(C(=O)CO)O)[NH3+])O",
"CC1C(C(CC(O1)OC2CC(CC3=C(C4=C(C(=C23)O)C(=O)C5=C(C4=O)C=CC=C5OC)O)(C(=O)C)O)N)O",
"CC1C(C(CC(O1)OC2CC(CC3=C(C4=C(C(=C23)O)C(=O)C5=C(C4=O)C=CC=C5OC)O)(C(=O)C)O)N)O.Cl",
"CC1C(C(CC(O1)OC2CC(CC3=C(C4=C(C(=C23)O)C(=O)C5=C(C4=O)C=CC=C5OC)O)(C(=O)CO)O)N)O.Cl",
"CC1C(C(CC(O1)OC2CC(CC3=C(C4=C(C(=C23)O)C(=O)C5=C(C4=O)C=CC=C5OC)O)(C(=O)C)O)N)O",
"C1CC2=CC=CC=C2C1NC3=C(C(=O)C4=C(C3=O)N=CC=C4)Cl",
"CC1(OCC(O1)CSC2=C(C(=O)C3=C(C2=O)C=CC=N3)SCC4COC(O4)(C)C)C",
"CC1C(C(CC(O1)OC2CC(CC3=C(C4=C(C(=C23)O)C(=O)C5=C(C4=O)C=CC=C5OC)O)(C(=O)CO)O)N)O",
"CC1COC2=C1C(=O)C(=O)C3=C2C=CC4=C3CCCC4(C)C",
"C1=CC=C2C(=C1)C=CC3=C2C=CC4=C3C(=O)C=CC4=O",
"CC1=CC(=O)C2=C(C1=O)C=CC=C2O",
"CC1C(C(CC(O1)OC2CC(CC3=C(C4=C(C(=C23)O)C(=O)C5=C(C4=O)C=CC=C5OC)O)(C(=O)C)O)N)O",
"C1=CC=C2C(=C1)C(=O)C3=C(C2=O)C(=C(C=C3NC4=CC(=C(C=C4)NC5=NC(=NC(=N5)NC6=CC=C(C=C6)S(=O)(=O)[O-])Cl)S(=O)(=O)[O-])S(=O)(=O)[O-])N.[Na+].[Na+].[Na+]",
"CC1C(C(CC(O1)OC2CC(CC3=C(C4=C(C(=C23)O)C(=O)C5=C(C4=O)C=CC=C5O)O)(C(=O)C)O)N)O",
"C1=CC=C2C(=C1)C(=O)C3=C(C2=O)C(=C(C=C3NC4=CC(=C(C=C4)NC5=NC(=NC(=N5)NC6=CC=C(C=C6)S(=O)(=O)[O-])Cl)S(=O)(=O)[O-])S(=O)(=O)[O-])N",
"C1=CC(=C(C=C1CN=C(N)NC(=O)C2=C(N=C(C(=N2)Cl)N)N)Cl)Cl.Cl", "C1=CC2=C(C(=O)C=CC2=O)C(=C1)O" };
try {
System.out.print("Smiles\t");
for (String group : fgSmiles) {
System.out.print(group + "\t");
}
System.out.println();
for (String smiles : testSmiles) {
System.out.print(smiles);
IMolecule mol1 = getMolecule(smiles);
for (String ss : fgSmiles) {
IMolecule mol2 = getMolecule(ss);
System.out.print("\t" + UniversalIsomorphismTester.isSubgraph(mol1, mol2) );
}
System.out.println();
}
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
/* CDK Version 1.4.4 */
public static IMolecule getMolecule(String structure) throws CDKException, IOException {
IMolecule mol = null;
IChemObjectBuilder builder = DefaultChemObjectBuilder.getInstance();
SMILESReader sr = new SMILESReader(new StringReader(structure));
MoleculeSet set = (MoleculeSet) sr.read(builder.newInstance(new MoleculeSet().getClass()));
mol = set.getMolecule(0);
CDKHydrogenAdder adder = CDKHydrogenAdder.getInstance(builder);
adder.addImplicitHydrogens(mol);
return mol;
}
/* CDK Version 1.0.3 */
public static IMolecule getMolecule(String structure) throws CDKException, IOException {
IMolecule mol = null;
DefaultChemObjectBuilder builder = DefaultChemObjectBuilder.getInstance();
SMILESReader sr = new SMILESReader(new StringReader(structure));
MoleculeSet set = (MoleculeSet) sr.read(builder.newMoleculeSet());
mol = set.getMolecule(0);
HydrogenAdder adder = new HydrogenAdder();
adder.addImplicitHydrogensToSatisfyValency(mol);
return mol;
}
CDK 1.4.4
CDK 1.0.3
CDK 1.4.4
CDK 1.0.3
CDK 1.4.4
CDK 1.0.3
Smiles
O=C1C=CC(=O)C=C1
O=C1C=CC(=O)C=C1
O=C1C=CC=CC1=O
O=C1C=CC=CC1=O
ClC=N
ClC=N
CC1C(C(CC(O1)OC2CC(CC3=C(C4=C(C(=C23)O)C(=O)C5=C(C4=O)C=CC=C5OC)O)(C(=O)CO)O)[NH3+])O
FALSE
TRUE
CC1C(C(CC(O1)OC2CC(CC3=C(C4=C(C(=C23)O)C(=O)C5=C(C4=O)C=CC=C5OC)O)(C(=O)C)O)N)O
FALSE
TRUE
CC1C(C(CC(O1)OC2CC(CC3=C(C4=C(C(=C23)O)C(=O)C5=C(C4=O)C=CC=C5OC)O)(C(=O)C)O)N)O.Cl
FALSE
TRUE
CC1C(C(CC(O1)OC2CC(CC3=C(C4=C(C(=C23)O)C(=O)C5=C(C4=O)C=CC=C5OC)O)(C(=O)CO)O)N)O.Cl
FALSE
TRUE
CC1C(C(CC(O1)OC2CC(CC3=C(C4=C(C(=C23)O)C(=O)C5=C(C4=O)C=CC=C5OC)O)(C(=O)C)O)N)O
FALSE
TRUE
C1CC2=CC=CC=C2C1NC3=C(C(=O)C4=C(C3=O)N=CC=C4)Cl
FALSE
TRUE
CC1(OCC(O1)CSC2=C(C(=O)C3=C(C2=O)C=CC=N3)SCC4COC(O4)(C)C)C
FALSE
TRUE
CC1C(C(CC(O1)OC2CC(CC3=C(C4=C(C(=C23)O)C(=O)C5=C(C4=O)C=CC=C5OC)O)(C(=O)CO)O)N)O
FALSE
TRUE
C1=CC=C2C(=C1)C=CC3=C2C=CC4=C3C(=O)C=CC4=O
FALSE
TRUE
CC1=CC(=O)C2=C(C1=O)C=CC=C2O
FALSE
TRUE
CC1C(C(CC(O1)OC2CC(CC3=C(C4=C(C(=C23)O)C(=O)C5=C(C4=O)C=CC=C5OC)O)(C(=O)C)O)N)O
FALSE
TRUE
CC1C(C(CC(O1)OC2CC(CC3=C(C4=C(C(=C23)O)C(=O)C5=C(C4=O)C=CC=C5O)O)(C(=O)C)O)N)O
FALSE
TRUE
C1=CC2=C(C(=O)C=CC2=O)C(=C1)O
FALSE
TRUE
CC1COC2=C1C(=O)C(=O)C3=C2C=CC4=C3CCCC4(C)C
FALSE
TRUE
C1=CC=C2C(=C1)C(=O)C3=C(C2=O)C(=C(C=C3NC4=CC(=C(C=C4)NC5=NC(=NC(=N5)NC6=CC=C(C=C6)S(=O)(=O)[O-])Cl)S(=O)(=O)[O-])S(=O)(=O)[O-])N.[Na+].[Na+].[Na+]
FALSE
TRUE
C1=CC=C2C(=C1)C(=O)C3=C(C2=O)C(=C(C=C3NC4=CC(=C(C=C4)NC5=NC(=NC(=N5)NC6=CC=C(C=C6)S(=O)(=O)[O-])Cl)S(=O)(=O)[O-])S(=O)(=O)[O-])N
FALSE
TRUE
C1=CC(=C(C=C1CN=C(N)NC(=O)C2=C(N=C(C(=N2)Cl)N)N)Cl)Cl.Cl
FALSE
TRUE
|