Work at SourceForge, help us to make it a better place! We have an immediate need for a Support Technician in our San Francisco or Denver office.

Close

[r15020]: cdk-fingerprint-paper / trunk / src / makeHitStatistics.groovy Maximize Restore History

Download this file

makeHitStatistics.groovy    152 lines (133 with data), 5.2 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import org.openscience.cdk.smiles.SmilesParser;
import org.openscience.cdk.DefaultChemObjectBuilder;
import org.openscience.cdk.isomorphism.UniversalIsomorphismTester;
import org.openscience.cdk.tools.manipulator.AtomContainerManipulator;
import org.openscience.cdk.aromaticity.CDKHueckelAromaticityDetector;
import org.openscience.cdk.fingerprint.ExtendedFingerprinter;
import org.openscience.cdk.fingerprint.Fingerprinter;
import org.openscience.cdk.fingerprint.MACCSFingerprinter;
import org.openscience.cdk.fingerprint.EStateFingerprinter;
import org.openscience.cdk.fingerprint.SubstructureFingerprinter;
def cli = new CliBuilder(usage: 'makeHitStatistics.groovy -[qd]');
cli.with {
h longOpt: 'help',
'show usage information';
q longOpt: 'query-file',
args: 1,
argName: 'queryFile',
'File containing queries';
d longOpt: 'database-file',
args: 1,
argName: 'databaseFile',
'File containing the database';
}
def options = cli.parse(args)
if (!options) {
cli.usage();
return;
}
if (options.h) {
cli.usage();
return;
}
if ( !options.q || !options.d ) {
cli.usage();
return;
}
if (options.'query-file') {
}
def parse(SMILES) {
SmilesParser parser = new SmilesParser(
DefaultChemObjectBuilder.getInstance() );
def mol = parser.parseSmiles(SMILES);
AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol);
CDKHueckelAromaticityDetector.detectAromaticity(mol);
return mol;
}
def isSubgraph( testMolecule, queryMolecule ) {
UniversalIsomorphismTester.isSubgraph( testMolecule,
queryMolecule );
}
def boolean bitSetMatches( testBitSet, queryBitset ) {
return queryBitset.equals( testBitSet.and(queryBitset) );
}
def fingerprintMatches( testMolecule, queryMolecule ) {
def fingerprinter = new Fingerprinter();
def t = fingerprinter.getFingerprint( testMolecule );
def q = fingerprinter.getFingerprint( queryMolecule );
return bitSetMatches( t, q );
}
def extendedFingerprintMatches( testMolecule, queryMolecule ) {
def extendedFingerprinter = new ExtendedFingerprinter();
def t = extendedFingerprinter.getFingerprint( testMolecule );
def q = extendedFingerprinter.getFingerprint( queryMolecule );
return bitSetMatches( t, q );
}
def MACCSFingerprintMatches( testMolecule, queryMolecule ) {
def MACCSFingerprinter = new MACCSFingerprinter();
def t = MACCSFingerprinter.getFingerprint( testMolecule );
def q = MACCSFingerprinter.getFingerprint( queryMolecule );
return bitSetMatches( t, q );
}
def EStateFingerprintMatches( testMolecule, queryMolecule ) {
def EStateFingerprinter = new EStateFingerprinter();
def t = EStateFingerprinter.getFingerprint( testMolecule );
def q = EStateFingerprinter.getFingerprint( queryMolecule );
return bitSetMatches( t, q );
}
def SubstructureFingerprintMatches( testMolecule, queryMolecule ) {
def SubstructureFingerprinter = new SubstructureFingerprinter();
def t = SubstructureFingerprinter.getFingerprint( testMolecule );
def q = SubstructureFingerprinter.getFingerprint( queryMolecule );
return bitSetMatches( t, q );
}
def database = [];
def queries = [];
new File(options.d).eachLine { line ->
def m = line =~ /(^\S+)/;
def smiles = m[0][1];
database.add(smiles);
}
new File(options.q).eachLine { line ->
def m = line =~ /(^\S+)/;
def smiles = m[0][1];
queries.add(smiles);
}
print "Database file: " + options.d + ", "
+ database.size() + " molecules\n";
print "Query file: " + options.q + ", "
+ queries.size() + " molecules\n";
def total = queries.size() * database.size();
def i = 0;
println();
def oldPercent = "";
targetFile = new File("result.txt");
targetFile.write( "Query SMILES, "
+ "Target SMILES, "
+ "Universal Isomorphism hit, "
+ "Fingerprint hit, "
+ "ExtendedFingerprint hit, "
+ "MACCSFingerprint hit, "
+ "EStateFingerprinter hit, "
+ "SubstructureFingerprinter hit\n" );
for ( query in queries ) {
for ( smiles in database ) {
def queryMolecule = parse(query);
def testMolecule = parse(smiles);
targetFile.append(
query + ", "
+ smiles + ", "
+ isSubgraph( testMolecule, queryMolecule ) + ", "
+ fingerprintMatches( testMolecule, queryMolecule ) + ", "
+ extendedFingerprintMatches( testMolecule, queryMolecule ) + ", "
+ MACCSFingerprintMatches( testMolecule, queryMolecule ) + ", "
+ EStateFingerprintMatches( testMolecule, queryMolecule ) + ", "
+ SubstructureFingerprintMatches( testMolecule, queryMolecule )
+ "\n" );
def percent = sprintf( "%5.1f", (double)((100 * i++) / total) );
if ( !oldPercent.equals(percent) ) {
print percent + "% \r";
oldPercent = percent;
}
}
}