Update of /cvsroot/cdk/cdk-plugins/wwmmplugin/src/uk/ac/cam/ch/wwmm/util In directory sc8-pr-cvs1:/tmp/cvs-serv26020 Added Files: BuildMolecule.java CanonicalizationDir.java Canonicalization.java CDKConverter.java Converter.java FileMerger2.java FileMerger.java FileSplitter.java Fingerprint.java FormulaGenerator.java JOELibConverter.java LegacyConverter.java MatrixBuilder.java MergeIChIandCML.java MoleculeProperties.java MolUtil.java multiExec.java SaxonTransform.java Scraper.java SplitSDF.java XUpdate.java Log Message: change them to cdk plugin --- NEW FILE: BuildMolecule.java --- package uk.ac.cam.ch.wwmm.util; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.io.PrintStream; import java.io.StringReader; import java.io.StringWriter; import java.awt.BorderLayout; import java.util.StringTokenizer; import javax.swing.JFrame; import javax.xml.parsers.DocumentBuilderFactory; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NodeList; import org.openscience.cdk.Atom; import org.openscience.cdk.Bond; import org.openscience.cdk.ChemFile; import org.openscience.cdk.ChemModel; import org.openscience.cdk.ChemObject; import org.openscience.cdk.ChemSequence; import org.openscience.cdk.Molecule; import org.openscience.cdk.SetOfMolecules; import org.openscience.cdk.renderer.MoleculeViewer2D; import org.openscience.cdk.renderer.MoleculeViewer3D; import org.openscience.cdk.layout.StructureDiagramGenerator; import org.openscience.cdk.libio.jmol.Convertor; import org.openscience.cdk.io.CMLReader; import org.openscience.cdk.io.CMLWriter; import org.openscience.cdk.tools.SaturationChecker; import org.openscience.jmol.PublicJmol; import javax.vecmath.Point2d; import javax.vecmath.Point3d; import uk.co.demon.ursus.dom.PMRDOMUtil; /** license stuff goes here - probably Artistic - */ /**@author P.Murray-Rust, 2002 */ /**Tests building of molecule from scratch (atom by atom) */ public class BuildMolecule { /** constructor */ public BuildMolecule() { } /** build N=O and display */ public static void test1() { // make empty Molecule and add atoms and bonds Molecule molecule = new Molecule(); Atom atom1 = new Atom("N", new Point2d(1.0, 0.0)); Atom atom2 = new Atom("O"); atom2.setX2D(0.0); atom2.setY2D(1.0); Bond bond1 = new Bond(atom1, atom2, 2.0f); molecule.addAtom(atom1); molecule.addAtom(atom2); molecule.addBond(bond1); debug(molecule); MoleculeViewer2D mv2d = new MoleculeViewer2D(molecule); mv2d.display(); } /** build C1=CC1(=O) and test coordinate generation */ public static void test2() { // make empty Molecule and add atoms and bonds Molecule molecule = new Molecule(); Atom c1 = new Atom("C"); Atom c2 = new Atom("C"); Atom c3 = new Atom("C"); Atom o1 = new Atom("O"); molecule.addAtom(c1); molecule.addAtom(c2); molecule.addAtom(c3); molecule.addAtom(o1); molecule.addBond(new Bond(c1, c2, 2.0f)); molecule.addBond(new Bond(c1, c3, 1.0f)); molecule.addBond(new Bond(c2, c3, 1.0f)); molecule.addBond(new Bond(o1, c3, 2.0f)); boolean generate2d = true; // should generate coordinates MoleculeViewer2D.display(molecule, generate2d); Atom cl = new Atom("Cl"); molecule.addAtom(cl); molecule.addBond(new Bond(c1, cl, 1.0f)); // alternative StructureDiagramGenerator sdg = new StructureDiagramGenerator(molecule); try { sdg.generateCoordinates(); } catch (Exception sdge) { System.out.println("StructureDiagramGenerator threw Exception: "+sdge); } debug(molecule); System.out.println("Generating coordinates..."); molecule = sdg.getMolecule(); debug(molecule); MoleculeViewer2D mv2d = new MoleculeViewer2D(molecule); mv2d.display(); } static void debugBond(PrintStream s, Bond b) { s.println("Bond info======"); Atom[] atoms = b.getAtoms(); s.println("Bond 2D centre: "+b.get2DCenter()); s.println(b.getAtomAt(0).getSymbol()+"-"+b.getAtomAt(0).getSymbol()); try { s.println("Bond length: "+b.getLength()); } catch (Throwable e) { System.err.println("No bond length"); } // s.println("Bond order: "+b.getOrder()); // s.println("Bond stereo: "+b.getStereo()); } static void debug(Molecule molecule) { System.out.println("=== molecule ==="); // Returns the number of Atoms in this Container. int nAtoms = molecule.getAtomCount(); System.out.println("The molecule has "+nAtoms+" atoms"); java.util.Enumeration atoms = molecule.atoms(); while (atoms.hasMoreElements()) { Atom atom = (Atom) atoms.nextElement(); System.out.println("Atom: "+atom); } // Returns the array of bonds of this AtomContainer. System.out.println(" ---Bonds---"); Bond[] bonds = molecule.getBonds(); if (bonds == null || bonds.length == 0) { System.out.println("NO BONDS"); } else { // Returns the number of Bonds in this Container. int nBonds = molecule.getBondCount(); System.out.println("The molecule has "+nBonds+" bonds"); for (int i = 0; i < nBonds; i++) { Bond bond = bonds[i]; debugBond(System.out, bond); } } } static void convert(String filename) throws IOException, TestException { } static void testXYZ(String filename) throws IOException, TestException { BufferedReader br = new BufferedReader(new FileReader(filename)); String na = br.readLine(); if (na == null) { throw new TestException("Corrupt/empty XYZ file"); } String title = br.readLine(); if (title == null) { throw new TestException("Corrupt/empty XYZ file reading second line"); } int natoms = 0; try { natoms = Integer.parseInt(na); } catch (NumberFormatException nfe) { throw new TestException("XYZ file must start with an integer"); } Molecule molecule = new Molecule(); for (int i = 0; i < natoms; i++) { String line = br.readLine(); if (line == null) { throw new TestException("Not enough lines in XYZ file at line: "+(i+2)); } StringTokenizer st = new StringTokenizer(line); if (st.countTokens() != 4) { throw new TestException("Bad format of line in XYZ file: "+line); } String element = st.nextToken(); String x = st.nextToken(); String y = st.nextToken(); String z = st.nextToken(); double x3, y3, z3; try { x3 = new Double(x).doubleValue(); y3 = new Double(y).doubleValue(); z3 = new Double(z).doubleValue(); } catch (NumberFormatException nfe1) { throw new TestException("Bad coordinate in: "+line); } Atom a = new Atom(element, new Point3d(x3, y3, z3)); molecule.addAtom(a); } System.out.println("Read XYZ OK... natoms="+natoms); joinTheDots(molecule); // guessBondOrders(molecule); // debug(molecule); System.out.println("======================"); StructureDiagramGenerator sdg = new StructureDiagramGenerator(molecule); try { sdg.generateCoordinates(); molecule = sdg.getMolecule(); } catch (Exception sdge) { System.out.println("StructureDiagramGenerator threw Exception: "+sdge); } // debug(molecule); MoleculeViewer2D mv2d = new MoleculeViewer2D(molecule); mv2d.display(); // MoleculeViewer3D mv3d = new MoleculeViewer3D(molecule); // mv3d.display(); view3D(molecule); } static void testCML(String filename) throws Exception { Document doc = null; try { doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new File(filename)); } catch (Exception ee) { System.err.println("Parser error: "+ee); return; } NodeList mols = doc.getElementsByTagName("molecule"); String out = "<cml>"; for (int i = 0; i < mols.getLength(); i++) { Element mol = (Element) mols.item(i); StringWriter sw = new StringWriter(); PMRDOMUtil.outputEventStream(mol, sw, 0, uk.co.demon.ursus.dom.PMRDOMUtil.PRETTY); sw.close(); StringReader sr = new StringReader(sw.toString()); CMLReader cr = new CMLReader(sr); ChemFile cf = (ChemFile) cr.read(new ChemFile()); // System.out.println("OBJ "+obj.getClass()); Molecule molecule = getMolecule(cf); joinTheDots(molecule); sw = new StringWriter(); CMLWriter cmlw = new CMLWriter(sw); cmlw.write(molecule); sw.close(); out += sw.toString(); } out += "</cml>"; FileWriter fw = new FileWriter(filename+".1"); fw.write(out); fw.close(); } public static Molecule getMolecule(ChemFile chemFile) { for (int i = 0; i < chemFile.getChemSequenceCount(); i++) { ChemModel[] cms = chemFile.getChemSequence(i).getChemModels(); for (int j = 0; j < cms.length; j++) { if (cms[j] == null) { continue; } SetOfMolecules som = cms[j].getSetOfMolecules(); for (int k = 0; k < som.getMoleculeCount(); k++) { Molecule mol = som.getMolecule(k); if (mol != null) return mol; } } } return null; } public static void view3D(Molecule m) { try { JFrame frame = new JFrame("CDK Molecule Viewer"); frame.getContentPane().setLayout(new BorderLayout()); org.openscience.jmol.PublicJmol jmol = org.openscience.jmol.PublicJmol.getJmol(frame); jmol.showChemFrame(Convertor.convert(m)); frame.getContentPane().add(jmol, BorderLayout.CENTER); } catch (Exception e) { e.printStackTrace(); } } public final static double TOLERANCE = 0.2; static void joinTheDots(Molecule molecule) { int natoms = molecule.getAtomCount(); Atom[] atoms = molecule.getAtoms(); for (int i = 1; i < natoms; i++) { Atom ai = atoms[i]; String elemi = ai.getSymbol(); double radi = getRadius(elemi); Point3d pi = ai.getPoint3D(); for (int j = 0; j < i; j++) { Atom aj = atoms[j]; String elemj = aj.getSymbol(); double radj = getRadius(elemj); Point3d pj = aj.getPoint3D(); double dist = pi.distance(pj); if (dist < radi + radj + TOLERANCE) { double paulingBO = getPaulingBO(dist, radi, radj); double cdkBO = 1.0; if (paulingBO < 1.3) { cdkBO = 1.0; } else if (paulingBO < 1.75) { cdkBO = 1.5; } else if (paulingBO < 2.5) { cdkBO = 2.0; } else if (paulingBO < 5) { cdkBO = 3.0; } Bond b = new Bond(ai, aj, cdkBO); molecule.addBond(b); } } } } public static double getPaulingBO(double length, double covr0, double covr1) { return Math.exp(2.303*(-length + covr0 + covr1)/0.7); } // not finished public static void guessBondOrders(Molecule molecule) { //SaturationChecker boolean start = true; int FINISHED = 0; for (int i = 0; i < molecule.getAtomCount(); i++) { Atom atom = molecule.getAtomAt(i); atom.flags = new boolean[10]; atom.flags[FINISHED] = false; if (atom.getSymbol().equals("C")) { atom.setMaxBondOrder(3.0); atom.setMaxBondOrderSum(4.0); } if (atom.getSymbol().equals("N")) { atom.setMaxBondOrder(3.0); atom.setMaxBondOrderSum(4.0); } if (atom.getSymbol().equals("O")) { atom.setMaxBondOrder(2.0); atom.setMaxBondOrderSum(2.0); } if (atom.getSymbol().equals("F")) { atom.setMaxBondOrder(1.0); atom.setMaxBondOrderSum(1.0); } if (atom.getSymbol().equals("H")) { atom.setMaxBondOrder(1.0); atom.setMaxBondOrderSum(1.0); } if (atom.getSymbol().equals("H")) { } else { int hc = 0; Atom[] ligands = molecule.getConnectedAtoms(atom); for (int j = 0; j < ligands.length; j++) { if (ligands[j].getSymbol().equals("H")) hc++; } atom.setHydrogenCount(hc); } } boolean change = true; while (change) { change = false; for (int i = 0; i < molecule.getAtomCount(); i++) { Atom atom = molecule.getAtomAt(i); if (atom.flags[FINISHED]) continue; System.out.println("AT....: "+atom.getSymbol()+ "/"+atom.getHydrogenCount()+ "/"+molecule.getBondCount(atom)+ "/"+molecule.getBondOrderSum(atom)+ "/"+atom.getMaxBondOrder()+ "/"+atom.getMaxBondOrderSum()+ "/"+molecule.getMinimumBondOrder(atom)+ "/"+molecule.getHighestCurrentBondOrder(atom) ); } for (int i = 0; i < molecule.getAtomCount(); i++) { Atom atom = molecule.getAtomAt(i); System.out.println("AT: "+atom.getSymbol()+ "/"+atom.getHydrogenCount()+ "/"+molecule.getBondCount(atom)+ "/"+molecule.getBondOrderSum(atom)+ "/"+molecule.getMinimumBondOrder(atom)+ "/"+molecule.getHighestCurrentBondOrder(atom) ); } } for (int i = 0; i < molecule.getAtomCount(); i++) { Atom atom = molecule.getAtomAt(i); // if we don't do this we get Index OOB later atom.flags = null; } } static double getRadius(String elem) { if (elem.equals("B")) return 0.83; if (elem.equals("C")) return 0.77; if (elem.equals("N")) return 0.7; if (elem.equals("O")) return 0.65; if (elem.equals("F")) return 0.60; if (elem.equals("Si")) return 1.1; if (elem.equals("P")) return 1.05; if (elem.equals("S")) return 1.0; if (elem.equals("Cl")) return 0.95; if (elem.equals("H")) return 0.35; return 1.5; } public static void main(String[] args) throws Exception { if (args.length == 0) { System.out.println("Usage: java uk.ac.cam.ch.wwmm.util.BuildMolecule -TEST [testnumber] -FILE [filename]"); } int i = 0; while (i < args.length) { if (args[i].equals("-TEST")) { i++; if (args[i].equals("1")) { test1(); } else if (args[i].equals("2")) { test2(); } else { System.out.println("Unknown test: "+args[i]); } i++; } else if (args[i].equals("-FILE")) { i++; String file = args[i++]; int idx = file.lastIndexOf("."); if (idx != -1) { String suffix = file.substring(idx+1); if (suffix.equals("xyz")) { try { testXYZ(file); } catch (TestException te) { System.err.println("Broken XYZ file: "+file+" ["+te+"]"); } catch (IOException ioe) { System.err.println("ReadError: "+file+" ["+ioe+"]"); } } else if (suffix.equals("xml")) { try { testCML(file); } catch (TestException te) { System.err.println("Broken CML file: "+file+" ["+te+"]"); } catch (IOException ioe) { System.err.println("ReadError: "+file+" ["+ioe+"]"); } } else { System.out.println("Cannot read file type: "+file); } } } else { System.err.println("Unknown argument: "+args[i]); } } } }; class TestException extends Exception { public TestException() { } public TestException(String s) { super(s); } }; --- NEW FILE: CanonicalizationDir.java --- package uk.ac.cam.ch.wwmm.util; //import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileWriter; import org.w3c.dom.*; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import org.apache.xml.security.c14n.Canonicalizer; public class CanonicalizationDir { public static void main(String args[]) throws Exception { //Parse the arguments if (args.length == 0) { System.out.println( "Usage: java uk.ac.cam.ch.wwmm.util.CanonicalizationDir -inputDir inputDir -outputDir outputDir"); } int i = 0; String inputDir=""; String outputDir=""; while (i < args.length) { if (false) { } else if (args[i].equals("-inputDir")) { i++; inputDir = args[i++]; } else if (args[i].equals("-outputDir")) { i++; outputDir = args[i++]; } else { System.err.println("Unknown argument: " + args[i]); i++; } } System.out.println(" inputDir = " + inputDir); System.out.println(" outputDir = " + outputDir); String inputFileName = ""; String inputFile=""; String outputFile=""; File dir = new File(inputDir); File testDir; if (dir.isDirectory()) { String[] files = dir.list(); System.out.println("There are " + files.length + " files"); for (int j = 0; j < files.length; j++) { System.out.println("j=" + j); inputFileName = files[j]; testDir = new File(inputDir + "/" + inputFileName); if (!testDir.isDirectory()) { inputFile = inputDir + "/" + inputFileName; outputFile = outputDir + "/" + inputFileName; //Canonicalize the input file org.apache.xml.security.Init.init(); DocumentBuilderFactory dfactory = DocumentBuilderFactory.newInstance(); dfactory.setNamespaceAware(true); dfactory.setValidating(true); DocumentBuilder documentBuilder = dfactory.newDocumentBuilder(); // this is to throw away all validation warnings documentBuilder.setErrorHandler(new org.apache.xml.security.utils.IgnoreAllErrorHandler()); Document doc = documentBuilder.parse(new File(inputFile)); // after playing around, we have our document now Canonicalizer c14n = Canonicalizer.getInstance( "http://www.w3.org/TR/2001/REC-xml-c14n-20010315#WithComments"); byte outputBytes[] = c14n.canonicalizeSubtree(doc); FileWriter fw = new FileWriter(new File(outputFile)); fw.write(new String(outputBytes)); fw.close(); } else { System.out.println(" Directory: " + inputFileName); } } } } } --- NEW FILE: Canonicalization.java --- package uk.ac.cam.ch.wwmm.util; //import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileWriter; import org.w3c.dom.*; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import org.apache.xml.security.c14n.Canonicalizer; public class Canonicalization { public static void main(String args[]) throws Exception { //Parse the arguments if (args.length == 0) { System.out.println( "Usage: java uk.ac.cam.ch.wwmm.util.Canonicalization -inputFile inputFile -outputFile outputFile"); exit(0); } int i = 0; String inputFile=""; String outputFile=""; while (i < args.length) { if (false) { exit(0); } else if (args[i].equals("-inputFile")) { i++; inputFile = args[i++]; } else if (args[i].equals("-outputFile")) { i++; outputFile = args[i++]; } else { System.err.println("Unknown argument: " + args[i]); i++; } } System.out.println(" inputFile = " + inputFile); System.out.println(" outputFile = " + outputFile); //Canonicalize the input file org.apache.xml.security.Init.init(); DocumentBuilderFactory dfactory = DocumentBuilderFactory.newInstance(); dfactory.setNamespaceAware(true); dfactory.setValidating(true); DocumentBuilder documentBuilder = dfactory.newDocumentBuilder(); // this is to throw away all validation warnings documentBuilder.setErrorHandler(new org.apache.xml.security.utils.IgnoreAllErrorHandler()); // byte inputBytes[] = input.getBytes(); Document doc = documentBuilder.parse(new File(inputFile)); // after playing around, we have our document now Canonicalizer c14n = Canonicalizer.getInstance( "http://www.w3.org/TR/2001/REC-xml-c14n-20010315#WithComments"); byte outputBytes[] = c14n.canonicalizeSubtree(doc); // System.out.println(new String(outputBytes)); FileWriter fw = new FileWriter(new File(outputFile)); fw.write(outputBytes); fw.close(); } } --- NEW FILE: CDKConverter.java --- package uk.ac.cam.ch.wwmm.util; import java.io.File; import org.openscience.cdk.applications.FileConvertor; import org.apache.log4j.Logger; import org.apache.log4j.PropertyConfigurator; /** * Description of the Class This class is used to operate the cdk file * converter. Entire directories can be processed, and various chemical file * formats can be processed in batch. (currently mol, pdb work) * *@author Yong Zhang and Juergen Harter *@created 19 February 2003 */ public class CDKConverter { static Logger logger = Logger.getLogger(CDKConverter.class.getName()); /** * The main program for the CDKConverter class * *@param args The command line arguments *@exception Exception Description of the Exception */ public static void main(String[] args) throws Exception { PropertyConfigurator.configure("./build/log4j_CDK.properties"); File testDir; if (args.length == 0) { logger.info( "Usage: java uk.ac.cam.ch.wwmm.util.CDKConverter -inputFormat inputFormat -inputDir inputDir -outputFormat outputFormat -outputDir outputDir"); } int i = 0; String inputFormat = ""; String inputDir = ""; String outputFormat = ""; String outputDir = ""; while (i < args.length) { if (false) { } else if (args[i].equals("-inputFormat")) { i++; inputFormat = args[i++]; } else if (args[i].equals("-inputDir")) { i++; inputDir = args[i++]; } else if (args[i].equals("-outputFormat")) { i++; outputFormat = args[i++]; } else if (args[i].equals("-outputDir")) { i++; outputDir = args[i++]; } else { System.err.println("Unknown argument: " + args[i]); } } String inputFileName = ""; String outputFileName = ""; int loc = 0; File dir = new File(inputDir); logger.info(" inputFormat = " + inputFormat); logger.info(" inputDir = " + inputDir); logger.info(" outputFormat = " + outputFormat); logger.info(" outputDir = " + outputDir); FileConvertor fc = new FileConvertor(inputFormat, outputFormat); if (dir.isDirectory()) { String[] files = dir.list(); logger.info(" File numbers = " + files.length); for (int j = 0; j < files.length; j++) { inputFileName = files[j]; logger.info(inputFileName); testDir = new File(inputDir + "/" + inputFileName); if (!testDir.isDirectory()) { loc = inputFileName.indexOf("."); outputFileName = inputFileName.substring(0, loc) + "." + outputFormat; fc.convert(inputDir + "/" + inputFileName, outputDir + "/" + outputFileName); } else { logger.info(" Directory: " + inputFileName); } } } } } --- NEW FILE: Converter.java --- package uk.ac.cam.ch.wwmm.util; import java.awt.BorderLayout; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.io.PrintStream; import java.io.Reader; import java.io.StringReader; import java.io.StringWriter; import java.util.StringTokenizer; import java.util.Vector; import javax.swing.JFrame; //import org.openscience.jmol.PublicJmol; import javax.vecmath.Point2d; import javax.vecmath.Point3d; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.DocumentBuilderFactory; import org.openscience.cdk.Atom; import org.openscience.cdk.Bond; import org.openscience.cdk.ChemFile; import org.openscience.cdk.ChemModel; import org.openscience.cdk.ChemObject; import org.openscience.cdk.ChemSequence; import org.openscience.cdk.Molecule; import org.openscience.cdk.SetOfMolecules; import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.exception.UnsupportedChemObjectException; import org.openscience.cdk.io.CMLReader; import org.openscience.cdk.io.CMLWriter; //import org.openscience.cdk.libio.jmol.Convertor; import org.openscience.cdk.io.ChemObjectWriter; import org.openscience.cdk.io.IChIReader; import org.openscience.cdk.io.MDLReader; import org.openscience.cdk.io.MDLWriter; import org.openscience.cdk.layout.StructureDiagramGenerator; import org.openscience.cdk.renderer.MoleculeViewer2D; import org.openscience.cdk.renderer.MoleculeViewer3D; import org.openscience.cdk.tools.SaturationChecker; import org.w3c.dom.Document; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.w3c.dom.NodeList; import uk.co.demon.ursus.dom.PMRDOMUtil; /** * license stuff goes here - probably Artistic - Tests building of molecule * from scratch (atom by atom) * *@author P.Murray-Rust, 2002 *@created 21 February 2003 */ public class Converter { /* * -- * public static void view3D(Molecule m) { * try * { * JFrame frame = new JFrame("CDK Molecule Viewer"); * frame.getContentPane().setLayout(new BorderLayout()); * org.openscience.jmol.PublicJmol jmol = org.openscience.jmol.PublicJmol.getJmol(frame); * jmol.showChemFrame(Convertor.convert(m)); * frame.getContentPane().add(jmol, BorderLayout.CENTER); * } catch (Exception e) * { * e.printStackTrace(); * } * } * -- */ /** Description of the Field */ public final static double TOLERANCE = 0.2; /** constructor */ public Converter() { } /** * Description of the Method * *@param s Description of the Parameter *@param b Description of the Parameter */ static void debugBond(PrintStream s, Bond b) { s.println("Bond info======"); Atom[] atoms = b.getAtoms(); s.println("Bond 2D centre: " + b.get2DCenter()); s.println(b.getAtomAt(0).getSymbol() + "-" + b.getAtomAt(0).getSymbol()); try { s.println("Bond length: " + b.getLength()); } catch (Throwable e) { System.err.println("No bond length"); } // s.println("Bond order: "+b.getOrder()); // s.println("Bond stereo: "+b.getStereo()); } /** * Description of the Method * *@param molecule Description of the Parameter */ static void debug(Molecule molecule) { System.out.println("=== molecule ==="); // Returns the number of Atoms in this Container. int nAtoms = molecule.getAtomCount(); System.out.println("The molecule has " + nAtoms + " atoms"); java.util.Enumeration atoms = molecule.atoms(); while (atoms.hasMoreElements()) { Atom atom = (Atom) atoms.nextElement(); System.out.println("Atom: " + atom); } // Returns the array of bonds of this AtomContainer. System.out.println(" ---Bonds---"); Bond[] bonds = molecule.getBonds(); if (bonds == null || bonds.length == 0) { System.out.println("NO BONDS"); } else { // Returns the number of Bonds in this Container. int nBonds = molecule.getBondCount(); System.out.println("The molecule has " + nBonds + " bonds"); for (int i = 0; i < nBonds; i++) { Bond bond = bonds[i]; debugBond(System.out, bond); } } } /** * Gets the molecule attribute of the Converter class * *@param chemFile Description of the Parameter *@return The molecule value */ public static Molecule getMolecule(ChemFile chemFile) { if (chemFile == null) { return null; } for (int i = 0; i < chemFile.getChemSequenceCount(); i++) { ChemModel[] cms = chemFile.getChemSequence(i).getChemModels(); for (int j = 0; j < cms.length; j++) { if (cms[j] == null) { continue; } SetOfMolecules som = cms[j].getSetOfMolecules(); for (int k = 0; k < som.getMoleculeCount(); k++) { Molecule mol = som.getMolecule(k); if (mol != null) { return mol; } } } } return null; } /** * Description of the Method * *@param molecule Description of the Parameter */ static void joinTheDots(Molecule molecule) { int natoms = molecule.getAtomCount(); Atom[] atoms = molecule.getAtoms(); for (int i = 1; i < natoms; i++) { Atom ai = atoms[i]; String elemi = ai.getSymbol(); double radi = getRadius(elemi); Point3d pi = ai.getPoint3D(); for (int j = 0; j < i; j++) { Atom aj = atoms[j]; String elemj = aj.getSymbol(); double radj = getRadius(elemj); Point3d pj = aj.getPoint3D(); double dist = pi.distance(pj); if (dist < radi + radj + TOLERANCE) { double paulingBO = getPaulingBO(dist, radi, radj); double cdkBO = 1.0; if (paulingBO < 1.3) { cdkBO = 1.0; } else if (paulingBO < 1.75) { cdkBO = 1.5; } else if (paulingBO < 2.5) { cdkBO = 2.0; } else if (paulingBO < 5) { cdkBO = 3.0; } Bond b = new Bond(ai, aj, cdkBO); molecule.addBond(b); } } } } /** * Gets the paulingBO attribute of the Converter class * *@param length Description of the Parameter *@param covr0 Description of the Parameter *@param covr1 Description of the Parameter *@return The paulingBO value */ public static double getPaulingBO(double length, double covr0, double covr1) { return Math.exp(2.303 * (-length + covr0 + covr1) / 0.7); } /** * Gets the radius attribute of the Converter class * *@param elem Description of the Parameter *@return The radius value */ static double getRadius(String elem) { if (elem.equals("B")) { return 0.83; } if (elem.equals("C")) { return 0.77; } if (elem.equals("N")) { return 0.7; } if (elem.equals("O")) { return 0.65; } if (elem.equals("F")) { return 0.60; } if (elem.equals("Si")) { return 1.1; } if (elem.equals("P")) { return 1.05; } if (elem.equals("S")) { return 1.0; } if (elem.equals("Cl")) { return 0.95; } if (elem.equals("H")) { return 0.35; } return 1.5; } /** * Description of the Method * *@param filename Description of the Parameter *@return Description of the Return Value *@exception IOException Description of the Exception *@exception CDKException Description of the Exception */ public static Molecule[] readIChI(String filename) throws IOException, CDKException { Document doc = null; try { doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new File(filename)); } catch (Exception e) { throw new CDKException("Cannot read XML file: " + e); } NodeList molList = doc.getElementsByTagName("structure"); Molecule[] mols = new Molecule[molList.getLength()]; System.out.println("Reading IChI: " + mols.length); for (int i = 0; i < molList.getLength(); i++) { System.out.println("IChI" + (i + 1)); try { mols[i] = readIChI((Element) molList.item(i)); } catch (ArrayIndexOutOfBoundsException aoobe) { System.out.println("Array OOB: " + aoobe + " in mol: " + (i + 1)); // aoobe.printStackTrace(); } catch (NumberFormatException nfe) { System.out.println("NFE: " + nfe + " in mol: " + (i + 1)); } } return mols; } /** * Description of the Method * *@param sdfReader Description of the Parameter *@return Description of the Return Value *@exception IOException Description of the Exception *@exception CDKException Description of the Exception */ public static Molecule[] readSDF(Reader sdfReader) throws IOException, CDKException { BufferedReader br = new BufferedReader(sdfReader); String molContent = ""; Vector molVector = new Vector(); boolean mend = false; while (true) { String s = br.readLine(); if (s == null) { break; } if (s.equals("$$$$")) { MDLReader mr = new MDLReader(new StringReader(molContent)); ChemFile molFile = (ChemFile) mr.read((ChemObject) new ChemFile()); Molecule mol = Converter.getMolecule(molFile); if (mol == null) { throw new CDKException("Bad molecule: " + molContent); } molVector.addElement(mol); molContent = ""; mend = false; } else if (s.equals("M END")) { molContent += s + "\n"; mend = true; } else { if (!mend) { molContent += s + "\n"; } } } Molecule[] mols = new Molecule[molVector.size()]; for (int i = 0; i < molVector.size(); i++) { mols[i] = (Molecule) molVector.elementAt(i); } return mols; } /** * Description of the Method * *@param filename Description of the Parameter *@param suffixin Description of the Parameter *@return Description of the Return Value *@exception IOException Description of the Exception *@exception CDKException Description of the Exception */ public static Molecule[] readMolecules(String filename, String suffixin) throws IOException, CDKException { File file = new File(filename); FileReader fr = new FileReader(file); if (suffixin.equals("sdf")) { return Converter.readSDF(new FileReader(new File(filename))); } else if (suffixin.equals("mol")) { MDLReader mr = new MDLReader(fr); ChemFile cf = (ChemFile) mr.read((ChemObject) new ChemFile()); Molecule[] mols = new Molecule[1]; mols[0] = Converter.getMolecule(cf); return mols; } else if (suffixin.equals("ichi")) { Molecule[] mols = readIChI(filename); return mols; } else if (suffixin.equals("xml") || suffixin.equals("cml")) { Document doc = null; try { doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(file); } catch (Exception e) { throw new CDKException("Cannot read XML file: " + e); } NodeList molList = doc.getElementsByTagName("molecule"); Molecule[] mols = new Molecule[molList.getLength()]; for (int i = 0; i < molList.getLength(); i++) { mols[i] = readCML((Element) molList.item(i)); } return mols; } else { throw new CDKException("Cannot read format: " + suffixin); } } /** * Description of the Method * *@param molecule Description of the Parameter *@return Description of the Return Value *@exception IOException Description of the Exception *@exception UnsupportedChemObjectException Description of the Exception */ public static Molecule readCML(Element molecule) throws IOException, UnsupportedChemObjectException { StringWriter sw = new StringWriter(); PMRDOMUtil.outputEventStream(molecule, sw, PMRDOMUtil.PRETTY, 0); sw.close(); StringReader sr = new StringReader(sw.toString()); CMLReader cr = new CMLReader(sr); ChemFile cf = (ChemFile) cr.read((ChemObject) new ChemFile()); return Converter.getMolecule(cf); } /** * Description of the Method * *@param molecule Description of the Parameter *@return Description of the Return Value *@exception IOException Description of the Exception *@exception UnsupportedChemObjectException Description of the Exception */ public static Molecule readIChI(Element molecule) throws IOException, UnsupportedChemObjectException { StringWriter sw = new StringWriter(); PMRDOMUtil.outputEventStream(molecule, sw, PMRDOMUtil.PRETTY, 0); sw.close(); StringReader sr = new StringReader(sw.toString()); IChIReader ir = new IChIReader(sr); ChemFile cf = (ChemFile) ir.read((ChemObject) new ChemFile()); return Converter.getMolecule(cf); } /** * Description of the Method * *@param filename Description of the Parameter *@param number Description of the Parameter *@param mol Description of the Parameter *@param suffixout Description of the Parameter *@exception IOException Description of the Exception *@exception CDKException Description of the Exception */ public static void writeMolecule(String filename, int number, Molecule mol, String suffixout) throws IOException, CDKException { if (filename == null || suffixout == null) { System.out.println("Null output file"); return; } if (mol == null) { System.out.println("Null molecule skipped"); return; } filename = filename.substring(0, filename.length() - suffixout.length() - 1) + "_" + number + "." + suffixout; if (suffixout.equals("xml") || suffixout.equals("cml")) { System.out.println("Writing to file: " + filename); FileWriter fw = new FileWriter(filename); ChemObjectWriter cow = new CMLWriter(fw); try { if (mol != null) { cow.write(mol); } } catch (NullPointerException npe) { System.out.println("Null pointer : " + filename); } fw.close(); } else if (suffixout.equals("mol")) { FileWriter fw = new FileWriter(filename); ChemObjectWriter cow = new MDLWriter(fw); if (mol != null) { cow.write(mol); } fw.close(); } else { throw new CDKException("Cannot write format: " + suffixout); } } /** * Description of the Method * *@param filename Description of the Parameter *@param mols Description of the Parameter *@param suffixout Description of the Parameter *@exception IOException Description of the Exception *@exception CDKException Description of the Exception */ public static void writeMolecules(String filename, Molecule[] mols, String suffixout) throws IOException, CDKException { if (suffixout.equals("xml") || suffixout.equals("cml")) { FileWriter fw = new FileWriter(filename); fw.write("<moleculeList>\n"); ChemObjectWriter cow = new CMLWriter(fw); for (int i = 0; i < mols.length; i++) { System.out.print(" " + i); cow.write(mols[i]); } fw.write("</moleculeList>\n"); fw.close(); } else if (suffixout.equals("sdf")) { FileWriter fw = new FileWriter(filename); ChemObjectWriter cow = new MDLWriter(fw); for (int i = 0; i < mols.length; i++) { cow.write(mols[i]); fw.write("$$$$\n"); } fw.close(); } else { throw new CDKException("Cannot write format: " + suffixout); } } /** * The main program for the Converter class * *@param args The command line arguments *@exception Exception Description of the Exception */ public static void main(String[] args) throws Exception { // try { if (args.length == 0) { System.out.println("Usage: java uk.ac.cam.ch.wwmm.util.Converter -TEST [testnumber] -FILE [filename]"); } int i = 0; String filein = null; String suffixin = null; String fileout = null; String suffixout = null; boolean concat = false; while (i < args.length) { if (false) { } else if (args[i].equals("-CONCAT")) { concat = true; } else if (args[i].equals("-IN")) { i++; filein = args[i++]; int idx = filein.lastIndexOf("."); if (idx != -1) { suffixin = filein.substring(idx + 1); } } else if (args[i].equals("-OUT")) { i++; fileout = args[i++]; int idx = fileout.lastIndexOf("."); if (idx != -1) { suffixout = fileout.substring(idx + 1); } } else { System.err.println("Unknown argument: " + args[i]); } } if ("sdf".equals(suffixout)) { concat = true; } if (filein != null) { Molecule[] mols = readMolecules(filein, suffixin); if (mols == null || mols.length == 0) { System.err.println("NO MOLECULES READ"); } else { if (!concat) { for (int j = 0; j < mols.length; j++) { writeMolecule(fileout, j + 1, mols[j], suffixout); } } else { writeMolecules(fileout, mols, suffixout); } } } // } catch (NullPointerException npe) { // npe.printStackTrace(); // } } } --- NEW FILE: FileMerger2.java --- package uk.ac.cam.ch.wwmm.util; import java.io.*; /** * Description of the Class * *@author Administrator *@created 21 February 2003 */ public class FileMerger { /** * Description of the Method * *@param filename Description of the Parameter *@return Description of the Return Value *@exception Exception Description of the Exception *@exception IOException Description of the Exception */ private static String findCASNCI(String filename) throws Exception, IOException { BufferedReader in; String inLine; String casno; String ncino; in = new BufferedReader(new FileReader(new File(filename))); inLine = in.readLine(); //NCI number is the first of the mol file ncino = inLine; //System.out.println(inLine); //Find the CAS no. while ((inLine != null) && (inLine.indexOf("<CAS_RN>") == -1)) { inLine = in.readLine(); } //Read the CAS no casno = in.readLine(); in.close(); return "<name convention=\"NCI\">" + ncino.trim() + "</name>\n" + "<name convention=\"CAS\">" + casno.trim() + "</name>\n"; } /** * The main program for the FileMerger class * *@param args The command line arguments *@exception Exception Description of the Exception *@exception IOException Description of the Exception */ public static void main(String[] args) throws Exception, IOException { // defines in, out as buffered readers BufferedReader inMol; BufferedReader inCML; BufferedWriter outCML; String inLineMol; String inLineCML; String casNCI; //String[] filesMol="", filesCML=""; // if the program is used incorrectly this will inform the user on how it should be done if (args.length == 0) { System.out.println("Usage: FileMerger readdir1 readdir2 writefile"); System.exit(0); } //open the input directories File dirMol = new File(args[0]); String[] filesMol = dirMol.list(); // if (dirMol.isDirectory()) { // String[] filesMol = dirMol.list(); // } else { // System.out.println("readdir1 is not a directory!"); // System.exit(0); //} File dirCML = new File(args[1]); //if (dirCML.isDirectory()) { String[] filesCML = dirCML.list(); // } else { // System.out.println("readdir2 is not a directory!"); // System.exit(0); //} //Open the write file outCML = new BufferedWriter(new FileWriter(args[2])); outCML.write("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"); outCML.write("<entryList>\n"); for (int i = 0; i < filesMol.length; i++) { //open the input file one by one in two directories inCML = new BufferedReader(new FileReader(new File(args[1] + "/" + filesCML[i]))); //discard the first two lines of the CML file, such as //<?xml version="1.0"?> //<list title="sequence"> inLineCML = inCML.readLine(); inLineCML = inCML.readLine(); inLineCML = inCML.readLine(); //Find the CAS no in the Mol file outCML.write("<entry id=\"e" + Integer.toString(i + 1) + "\">\n"); casNCI = findCASNCI(args[0] + "/" + filesMol[i]); outCML.write(casNCI); //Write to the end of the CML file, and write out to the big XML file //Discard the lines after </molecule>, such as //</list> while ((inLineCML != null) && (inLineCML.indexOf("</molecule>") == -1)) { outCML.write(inLineCML + "\n"); inLineCML = inCML.readLine(); } //Write the end tag of entry outCML.write("</molecule>\n"); outCML.write("</entry>\n"); inCML.close(); } outCML.write("</entryList>\n"); outCML.write("</xml>"); outCML.flush(); outCML.close(); } } --- NEW FILE: FileMerger.java --- package uk.ac.cam.ch.wwmm.util; /** * Merges seperate .cml files (was employed for NCI_diversity set) into one big * .xml file, and while merging, it compares each existing .cml file to the * corresponding .mol file and takes the CAS number from there, adding it to * the .cml file at the correct position. * *@author Yong Zhang 2002 *@author Juergen Harter 2002 License: artistic license */ import java.io.*; /** * Description of the Class * *@author jharter *@created 21 February 2003 */ public class FileMerger { /** * Description of the Method * *@param filename Description of the Parameter *@return Description of the Return Value *@exception Exception Description of the Exception *@exception IOException Description of the Exception */ private static String findCASNCI(String filename) throws Exception, IOException { BufferedReader in; String inLine; String casno; String ncino; in = new BufferedReader(new FileReader(new File(filename))); inLine = in.readLine(); //NCI number is the first of the mol file ncino = inLine; //System.out.println(inLine); //Find the CAS no. while ((inLine != null) && (inLine.indexOf("<CAS_RN>") == -1)) { inLine = in.readLine(); } //Read the CAS no casno = in.readLine(); in.close(); return "<name convention=\"NCI\">" + ncino.trim() + "</name>\n" + "<name convention=\"CAS\">" + casno.trim() + "</name>\n"; } /** * The main program for the FileMerger class * *@param args The command line arguments *@exception Exception Description of the Exception *@exception IOException Description of the Exception */ public static void main(String[] args) throws Exception, IOException { // defines in, out as buffered readers BufferedReader inMol; // defines in, out as buffered readers BufferedReader inCML; BufferedWriter outCML; String inLineMol; String inLineCML; String casNCI; //String[] filesMol="", filesCML=""; // if the program is used incorrectly this will inform the user on how it should be done if (args.length == 0) { System.out.println("Usage: FileMerger readdir1 readdir2 writefile"); System.exit(0); } //open the input directories File dirMol = new File(args[0]); String[] filesMol = dirMol.list(); // if (dirMol.isDirectory()) { // String[] filesMol = dirMol.list(); // } else { // System.out.println("readdir1 is not a directory!"); // System.exit(0); //} File dirCML = new File(args[1]); //if (dirCML.isDirectory()) { String[] filesCML = dirCML.list(); // } else { // System.out.println("readdir2 is not a directory!"); // System.exit(0); //} //Open the write file outCML = new BufferedWriter(new FileWriter(args[2])); outCML.write("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"); outCML.write("<entryList>\n"); for (int i = 0; i < filesMol.length; i++) { //open the input file one by one in two directories inCML = new BufferedReader(new FileReader(new File(args[1] + "/" + filesCML[i]))); //discard the first two lines of the CML file, such as //<?xml version="1.0"?> //<list title="sequence"> inLineCML = inCML.readLine(); inLineCML = inCML.readLine(); inLineCML = inCML.readLine(); //Find the CAS no in the Mol file outCML.write("<entry id=\"e" + Integer.toString(i + 1) + "\">\n"); casNCI = findCASNCI(args[0] + "/" + filesMol[i]); outCML.write(casNCI); //Write to the end of the CML file, and write out to the big XML file //Discard the lines after </molecule>, such as //</list> while ((inLineCML != null) && (inLineCML.indexOf("</molecule>") == -1)) { outCML.write(inLineCML + "\n"); inLineCML = inCML.readLine(); } //Write the end tag of entry outCML.write("</molecule>\n"); outCML.write("</entry>\n"); inCML.close(); } outCML.write("</entryList>\n"); outCML.write("</xml>"); outCML.flush(); outCML.close(); } } --- NEW FILE: FileSplitter.java --- package uk.ac.cam.ch.wwmm.util; /** * Uses split to break up an input file around the regular expression which is * user defined. * *@author Joe Townsend 2002 *@adapted from PerkinReader *@author J.M.Goodman 2002 @ modified by Yong Zhang and Juergen Harter * License: artistic license This is a test file to experiment with regular * expressions and to practise using file reading and writing techniques */ /** modified to split a .bin file to read in and split mol files */ import java.io.*; /** * Description of the Class * *@author Administrator *@created 21 February 2003 */ public class FileSplitter { /** * The main program for the FileSplitter class * *@param args The command line arguments *@exception Exception Description of the Exception *@exception IOException Description of the Exception */ public static void main(String[] args) throws Exception, IOException { // defines in, out and ink as buffered readers BufferedReader in; BufferedWriter out; // if the program is used incorrectly this will inform the user on how it should be done if (args.length == 0) { System.out.println("Usage: FileSplitter readfile writedir filenameprefix"); System.exit(0); } //open the input file in = new BufferedReader(new FileReader(new File(args[0]))); int i = 0; String inLine; inLine = in.readLine(); // performs operations until end of file is reached while (inLine != null) { // split input with the pattern System.out.println(inLine); i++; out = new BufferedWriter(new FileWriter(args[1] + "/" + args[2] + Integer.toString(i) + ".mol")); //inLine = in.readLine(); //System.out.println(inLine); while ((inLine != null) && (inLine.indexOf("$$$$") == -1)) { out.write(inLine); out.newLine(); inLine = in.readLine(); } inLine = in.readLine(); out.flush(); out.close(); } in.close(); } } --- NEW FILE: Fingerprint.java --- package uk.ac.cam.ch.wwmm.util; import java.io.FileWriter; import java.io.IOException; import java.util.BitSet; import javax.xml.parsers.DocumentBuilderFactory; import org.openscience.cdk.Molecule; import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.fingerprint.Fingerprinter; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.xmlcml.cml.CMLException; import uk.co.demon.ursus.dom.PMRDOMUtil; /** * Description of the Class * *@author Administrator *@created 21 February 2003 */ public class Fingerprint { /** Description of the Field */ protected Molecule molecule; /** Description of the Field */ protected BitSet bs; /** Description of the Field */ protected long uid; /** Constructor for the Fingerprint object */ public Fingerprint() { } /** * Sets the molecule attribute of the Fingerprint object * *@param mol The new molecule value *@exception CDKException Description of the Exception */ public void setMolecule(Molecule mol) throws CDKException { this.molecule = mol; bs = Fingerprinter.getFingerprint(mol); } /** * Gets the uID attribute of the Fingerprint object * *@return The uID value */ public String getUID() { return Integer.toHexString(bs.hashCode()); } /** * Gets the bitSet attribute of the Fingerprint object * *@return The bitSet value */ public BitSet getBitSet() { return bs; } /** * Description of the Method * *@return Description of the Return Value */ public String toString() { return "bs: " + bs + "uid: " + this.getUID(); } /** * Description of the Method * *@param doc Description of the Parameter *@return Description of the Return Value */ public Element createElement(Document doc) { Element fp = doc.createElement("cdkFingerprint"); fp.setAttribute("bitset", bs.toString()); fp.setAttribute("uid", "" + this.getUID()); return fp; } /** * Description of the Method * *@param infile Description of the Parameter *@return Description of the Return Value */ public static Molecule readCDK(String infile) { Molecule mol = null; // code goes here return mol; } /** * A unit test for JUnit * *@param mol Description of the Parameter *@param outfile Description of the Parameter *@exception Exception Description of the Exception */ public static void test(Molecule mol, String outfile) throws Exception { if (mol == null) { throw new CMLException("Null molecule in Fingerprint test"); } Fingerprint fp = new Fingerprint(); fp.setMolecule(mol); if (outfile.equals("")) { System.out.println("Fingerprint: " + fp); } else if (outfile.endsWith(".xml")) { Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument(); Element top = doc.createElement("top"); doc.appendChild(top); Element fpElem = fp.createElement(doc); top.appendChild(fpElem); FileWriter fw = new FileWriter(outfile); PMRDOMUtil.outputEventStream(doc.getDocumentElement(), fw, PMRDOMUtil.PRETTY, 0); fw.close(); } } /** * adds FP to Document (e.g. for concatenation). If empty element, creates * toplevel <fpList> * *@param doc The feature to be added to the ToDocument attribute */ public void addToDocument(Document doc) { Element fp = this.createElement(doc); Element top = doc.getDocumentElement(); if (top == null) { top = doc.createElement("fpList"); doc.appendChild(top); } top.appendChild(fp); } /** * Description of the Method * *@param cmlFile Description of the Parameter *@return Description of the Return Value *@exception Exception Description of the Exception */ public static Fingerprint[] parse(String cmlFile) throws Exception { Molecule[] mols = MolUtil.readCMLEntries(cmlFile); Fingerprint[] fps = new Fingerprint[mols.length]; for (int i = 0; i < mols.length; i++) { fps[i] = new Fingerprint(); fps[i].setMolecule(mols[i]); } return fps; } /** * The main program for the Fingerprint class * *@param args The command line arguments *@exception Exception Description of the Exception */ public static void main(String[] args) throws Exception { if (args.length == 0) { System.out.println("Usage: uk.ac.cam.ch.wwmm.util.Fingerprint -IN file -OUT file"); } String infile = ""; String outfile = ""; int i = 0; while (i < args.length) { if (args[i].equalsIgnoreCase("-in")) { infile = args[++i]; i++; } else if (args[i].equalsIgnoreCase("-out")) { outfile = args[++i]; i++; } else { System.err.println("Bad arg: " + args[i++]); } } if (!infile.equals("")) { System.out.println("Input from: " + infile); if (infile.endsWith(".xml") || infile.endsWith(".cml")) { Molecule[] mols = MolUtil.readCMLEntries(infile); System.out.println("MOLS: " + mols.length); Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument(); for (i = 0; i < mols.length; i++) { Fingerprint fp = new Fingerprint(); fp.setMolecule(mols[i]); fp.addToDocument(doc); } if (!outfile.equals("")) { FileWriter fw = new FileWriter(outfile); PMRDOMUtil.outputEventStream(doc.getDocumentElement(), fw, PMRDOMUtil.PRETTY, 0); fw.close(); } } else if (infile.endsWith(".mol")) { Molecule mol = MolUtil.readMDL(infile); test(mol, outfile); } else { throw new Exception("Cannot read files of type: " + infile); } } } } --- NEW FILE: FormulaGenerator.java --- package uk.ac.cam.ch.wwmm.util; import java.io.FileWriter; import java.io.IOException; import java.util.BitSet; import javax.xml.parsers.DocumentBuilderFactory; //import org.openscience.cdk.tools.MFAnalyser; import org.openscience.cdk.Molecule; import org.openscience.cdk.exception.CDKException; import org.w3c.dom.Document; import org.w3c.dom.Element; //import org.openscience.cdk.fingerprint.Fingerprinter; import org.xmlcml.cml.CMLException; import uk.ac.cam.ch.wwmm.util.MolUtil; import uk.co.demon.ursus.dom.PMRDOMUtil; /** * Generates CML2-compliant formula from a molecule (CDK or (future) JUMBO) * *@author YY *@author JH *@created 21 February 2003 */ public class FormulaGenerator { /** Description of the Field */ protected Molecule molecule; /** Description of the Field */ protected String formula; /** Description of the Field */ protected long uid; /** Creates a new FormulaGenerator object. */ public FormulaGenerator() { } /** * set Molecule for FG must be called before use * *@param mol in CDK representation *@throws CDKException invalid molecule */ public void setMolecule(Molecule mol) throws CDKException { if (mol == null) { throw new CDKException("null molecule"); } int num = 0; int flag = 0; int numOfAtom = mol.getAtomCount(); StringBuffer fm = new StringBuffer(); String[] elements = new String[numOfAtom]; int[] elementCount = new int[numOfAtom]; for (int i = 0; i < numOfAtom; i++) { flag = 0; String symbol = mol.getAtomAt(i).getSymbol(); if (num == 0) { elements[num] = symbol; elementCount[num]++; num++; } else { for (int j = 0; j < num; j++) { if (elements[j].equals(symbol)) { elementCount[j]++; flag = 1; } } if (flag == 0) { elements[num] = symbol; elementCount[num]++; num++; } } } for (int i = 0; i < num; i++) { if (i > 0) { fm.append(" "); } fm.append(elements[i] + " " + elementCount[i]); } formula = fm.toString(); System.out.println("formula=" + formula); } /** * returns string as * *@return "formula"+formula */ public String toString() { return "formula: " + formula; } /** * DOCUMENT ME! * *@param doc DOCUMENT ME! *@return DOCUMENT ME! */ public Element createElement(Document doc) { Element fg = doc.createElement("cdkFormulaGenerator"); fg.setAttribute("formula", formula.toString()); return fg; } /** * DOCUMENT ME! * *@param infile DOCUMENT ME! *@return DOCUMENT ME! *@exception Exception Description of the Exception */ public static Molecule readCDK(String infile) throws Exception { Molecule mol = null; if (true) { throw new Exception("readCDK not yet implemented"); } // code goes here return mol; } /** * tests FG on a molecule and optionally outputs fomula to file * *@param mol *@param outfile *@throws Exception (CDKException, IOException, * ParserConfigurationException...) */ public static void test(Molecule mol, String outfile) throws Exception { if (mol == null) { throw new CMLException("Null molecule in FormulaGenerator test"); } FormulaGenerator fg = new FormulaGenerator(); fg.setMolecule(mol); ... [truncated message content] |