From: <eg...@us...> - 2008-05-07 17:28:26
|
Revision: 10916 http://cdk.svn.sourceforge.net/cdk/?rev=10916&view=rev Author: egonw Date: 2008-05-07 09:50:41 -0700 (Wed, 07 May 2008) Log Message: ----------- Added a PubChem Substance XML reader (yes, PubChem SubstanceS XML is basically what SDF is to MDL molfile) Modified Paths: -------------- cdk/trunk/src/main/org/openscience/cdk/io/pubchemxml/PubChemXMLHelper.java Added Paths: ----------- cdk/trunk/src/data/asn/pubchem/sid577309.xml cdk/trunk/src/main/org/openscience/cdk/io/PCSubstanceXMLReader.java cdk/trunk/src/main/org/openscience/cdk/io/formats/PubChemSubstanceXMLFormat.java cdk/trunk/src/test/org/openscience/cdk/io/PCSubstanceXMLReaderTest.java Added: cdk/trunk/src/data/asn/pubchem/sid577309.xml =================================================================== --- cdk/trunk/src/data/asn/pubchem/sid577309.xml (rev 0) +++ cdk/trunk/src/data/asn/pubchem/sid577309.xml 2008-05-07 16:50:41 UTC (rev 10916) @@ -0,0 +1,257 @@ +<?xml version="1.0"?> +<PC-Substance + xmlns="http://www.ncbi.nlm.nih.gov" + xmlns:xs="http://www.w3.org/2001/XMLSchema-instance" + xs:schemaLocation="http://www.ncbi.nlm.nih.gov ftp://ftp.ncbi.nlm.nih.gov/pubchem/specifications/pubchem.xsd" +> + <PC-Substance_sid> + <PC-ID> + <PC-ID_id>577309</PC-ID_id> + <PC-ID_version>2</PC-ID_version> + </PC-ID> + </PC-Substance_sid> + <PC-Substance_source> + <PC-Source> + <PC-Source_db> + <PC-DBTracking> + <PC-DBTracking_name>DTP/NCI</PC-DBTracking_name> + <PC-DBTracking_source-id> + <Object-id> + <Object-id_str>371097</Object-id_str> + </Object-id> + </PC-DBTracking_source-id> + </PC-DBTracking> + </PC-Source_db> + </PC-Source> + </PC-Substance_source> + <PC-Substance_synonyms> + <PC-Substance_synonyms_E>NSC371097</PC-Substance_synonyms_E> + </PC-Substance_synonyms> + <PC-Substance_comment> + <PC-Substance_comment_E>DTP/NCI from SANSS. Release-June 2007.</PC-Substance_comment_E> + <PC-Substance_comment_E>Structure Evaluation:No Comparision - Unparameterized Atom - Ge.</PC-Substance_comment_E> + <PC-Substance_comment_E>Deposition record created from database webdb on host dtpiv1.ncifcrf.gov on Feb 22, 2008</PC-Substance_comment_E> + </PC-Substance_comment> + <PC-Substance_xref> + <PC-XRefData> + <PC-XRefData_regid>371097</PC-XRefData_regid> + </PC-XRefData> + <PC-XRefData> + <PC-XRefData_dburl>http://dtp.nci.nih.gov/</PC-XRefData_dburl> + </PC-XRefData> + <PC-XRefData> + <PC-XRefData_sburl>http://dtp.nci.nih.gov/dtpstandard/servlet/dwindex?searchtype=NSC&outputformat=html&searchlist=371097</PC-XRefData_sburl> + </PC-XRefData> + </PC-Substance_xref> + <PC-Substance_compound> + <PC-Compounds> + <PC-Compound> + <PC-Compound_id> + <PC-CompoundType> + <PC-CompoundType_type value="deposited">0</PC-CompoundType_type> + </PC-CompoundType> + </PC-Compound_id> + <PC-Compound_atoms> + <PC-Atoms> + <PC-Atoms_aid> + <PC-Atoms_aid_E>1</PC-Atoms_aid_E> + <PC-Atoms_aid_E>2</PC-Atoms_aid_E> + <PC-Atoms_aid_E>3</PC-Atoms_aid_E> + <PC-Atoms_aid_E>4</PC-Atoms_aid_E> + <PC-Atoms_aid_E>5</PC-Atoms_aid_E> + <PC-Atoms_aid_E>6</PC-Atoms_aid_E> + <PC-Atoms_aid_E>7</PC-Atoms_aid_E> + <PC-Atoms_aid_E>8</PC-Atoms_aid_E> + <PC-Atoms_aid_E>9</PC-Atoms_aid_E> + <PC-Atoms_aid_E>10</PC-Atoms_aid_E> + <PC-Atoms_aid_E>11</PC-Atoms_aid_E> + <PC-Atoms_aid_E>12</PC-Atoms_aid_E> + <PC-Atoms_aid_E>13</PC-Atoms_aid_E> + <PC-Atoms_aid_E>14</PC-Atoms_aid_E> + <PC-Atoms_aid_E>15</PC-Atoms_aid_E> + <PC-Atoms_aid_E>16</PC-Atoms_aid_E> + <PC-Atoms_aid_E>17</PC-Atoms_aid_E> + <PC-Atoms_aid_E>18</PC-Atoms_aid_E> + <PC-Atoms_aid_E>19</PC-Atoms_aid_E> + </PC-Atoms_aid> + <PC-Atoms_element> + <PC-Element value="a">255</PC-Element> + <PC-Element value="a">255</PC-Element> + <PC-Element value="ge">32</PC-Element> + <PC-Element value="ge">32</PC-Element> + <PC-Element value="o">8</PC-Element> + <PC-Element value="c">6</PC-Element> + <PC-Element value="c">6</PC-Element> + <PC-Element value="c">6</PC-Element> + <PC-Element value="c">6</PC-Element> + <PC-Element value="c">6</PC-Element> + <PC-Element value="c">6</PC-Element> + <PC-Element value="c">6</PC-Element> + <PC-Element value="c">6</PC-Element> + <PC-Element value="c">6</PC-Element> + <PC-Element value="c">6</PC-Element> + <PC-Element value="c">6</PC-Element> + <PC-Element value="c">6</PC-Element> + <PC-Element value="c">6</PC-Element> + <PC-Element value="c">6</PC-Element> + </PC-Atoms_element> + <PC-Atoms_label> + <PC-AtomString> + <PC-AtomString_aid>1</PC-AtomString_aid> + <PC-AtomString_value>R</PC-AtomString_value> + </PC-AtomString> + <PC-AtomString> + <PC-AtomString_aid>2</PC-AtomString_aid> + <PC-AtomString_value>R</PC-AtomString_value> + </PC-AtomString> + </PC-Atoms_label> + </PC-Atoms> + </PC-Compound_atoms> + <PC-Compound_bonds> + <PC-Bonds> + <PC-Bonds_aid1> + <PC-Bonds_aid1_E>1</PC-Bonds_aid1_E> + <PC-Bonds_aid1_E>2</PC-Bonds_aid1_E> + <PC-Bonds_aid1_E>3</PC-Bonds_aid1_E> + <PC-Bonds_aid1_E>3</PC-Bonds_aid1_E> + <PC-Bonds_aid1_E>3</PC-Bonds_aid1_E> + <PC-Bonds_aid1_E>3</PC-Bonds_aid1_E> + <PC-Bonds_aid1_E>4</PC-Bonds_aid1_E> + <PC-Bonds_aid1_E>4</PC-Bonds_aid1_E> + <PC-Bonds_aid1_E>4</PC-Bonds_aid1_E> + <PC-Bonds_aid1_E>6</PC-Bonds_aid1_E> + <PC-Bonds_aid1_E>6</PC-Bonds_aid1_E> + <PC-Bonds_aid1_E>7</PC-Bonds_aid1_E> + <PC-Bonds_aid1_E>7</PC-Bonds_aid1_E> + <PC-Bonds_aid1_E>8</PC-Bonds_aid1_E> + <PC-Bonds_aid1_E>9</PC-Bonds_aid1_E> + <PC-Bonds_aid1_E>12</PC-Bonds_aid1_E> + <PC-Bonds_aid1_E>13</PC-Bonds_aid1_E> + <PC-Bonds_aid1_E>14</PC-Bonds_aid1_E> + <PC-Bonds_aid1_E>15</PC-Bonds_aid1_E> + </PC-Bonds_aid1> + <PC-Bonds_aid2> + <PC-Bonds_aid2_E>4</PC-Bonds_aid2_E> + <PC-Bonds_aid2_E>5</PC-Bonds_aid2_E> + <PC-Bonds_aid2_E>5</PC-Bonds_aid2_E> + <PC-Bonds_aid2_E>6</PC-Bonds_aid2_E> + <PC-Bonds_aid2_E>12</PC-Bonds_aid2_E> + <PC-Bonds_aid2_E>13</PC-Bonds_aid2_E> + <PC-Bonds_aid2_E>7</PC-Bonds_aid2_E> + <PC-Bonds_aid2_E>14</PC-Bonds_aid2_E> + <PC-Bonds_aid2_E>15</PC-Bonds_aid2_E> + <PC-Bonds_aid2_E>8</PC-Bonds_aid2_E> + <PC-Bonds_aid2_E>9</PC-Bonds_aid2_E> + <PC-Bonds_aid2_E>10</PC-Bonds_aid2_E> + <PC-Bonds_aid2_E>11</PC-Bonds_aid2_E> + <PC-Bonds_aid2_E>10</PC-Bonds_aid2_E> + <PC-Bonds_aid2_E>11</PC-Bonds_aid2_E> + <PC-Bonds_aid2_E>16</PC-Bonds_aid2_E> + <PC-Bonds_aid2_E>17</PC-Bonds_aid2_E> + <PC-Bonds_aid2_E>18</PC-Bonds_aid2_E> + <PC-Bonds_aid2_E>19</PC-Bonds_aid2_E> + </PC-Bonds_aid2> + <PC-Bonds_order> + <PC-BondType value="single">1</PC-BondType> + <PC-BondType value="single">1</PC-BondType> + <PC-BondType value="single">1</PC-BondType> + <PC-BondType value="single">1</PC-BondType> + <PC-BondType value="single">1</PC-BondType> + <PC-BondType value="single">1</PC-BondType> + <PC-BondType value="single">1</PC-BondType> + <PC-BondType value="single">1</PC-BondType> + <PC-BondType value="single">1</PC-BondType> + <PC-BondType value="double">2</PC-BondType> + <PC-BondType value="single">1</PC-BondType> + <PC-BondType value="double">2</PC-BondType> + <PC-BondType value="single">1</PC-BondType> + <PC-BondType value="single">1</PC-BondType> + <PC-BondType value="double">2</PC-BondType> + <PC-BondType value="single">1</PC-BondType> + <PC-BondType value="single">1</PC-BondType> + <PC-BondType value="single">1</PC-BondType> + <PC-BondType value="single">1</PC-BondType> + </PC-Bonds_order> + </PC-Bonds> + </PC-Compound_bonds> + <PC-Compound_coords> + <PC-Coordinates> + <PC-Coordinates_type> + <PC-CoordinateType value="twod">1</PC-CoordinateType> + <PC-CoordinateType value="submitted">3</PC-CoordinateType> + </PC-Coordinates_type> + <PC-Coordinates_aid> + <PC-Coordinates_aid_E>1</PC-Coordinates_aid_E> + <PC-Coordinates_aid_E>2</PC-Coordinates_aid_E> + <PC-Coordinates_aid_E>3</PC-Coordinates_aid_E> + <PC-Coordinates_aid_E>4</PC-Coordinates_aid_E> + <PC-Coordinates_aid_E>5</PC-Coordinates_aid_E> + <PC-Coordinates_aid_E>6</PC-Coordinates_aid_E> + <PC-Coordinates_aid_E>7</PC-Coordinates_aid_E> + <PC-Coordinates_aid_E>8</PC-Coordinates_aid_E> + <PC-Coordinates_aid_E>9</PC-Coordinates_aid_E> + <PC-Coordinates_aid_E>10</PC-Coordinates_aid_E> + <PC-Coordinates_aid_E>11</PC-Coordinates_aid_E> + <PC-Coordinates_aid_E>12</PC-Coordinates_aid_E> + <PC-Coordinates_aid_E>13</PC-Coordinates_aid_E> + <PC-Coordinates_aid_E>14</PC-Coordinates_aid_E> + <PC-Coordinates_aid_E>15</PC-Coordinates_aid_E> + <PC-Coordinates_aid_E>16</PC-Coordinates_aid_E> + <PC-Coordinates_aid_E>17</PC-Coordinates_aid_E> + <PC-Coordinates_aid_E>18</PC-Coordinates_aid_E> + <PC-Coordinates_aid_E>19</PC-Coordinates_aid_E> + </PC-Coordinates_aid> + <PC-Coordinates_conformers> + <PC-Conformer> + <PC-Conformer_x> + <PC-Conformer_x_E>4.5</PC-Conformer_x_E> + <PC-Conformer_x_E>2</PC-Conformer_x_E> + <PC-Conformer_x_E>3.5</PC-Conformer_x_E> + <PC-Conformer_x_E>3.5</PC-Conformer_x_E> + <PC-Conformer_x_E>2.5</PC-Conformer_x_E> + <PC-Conformer_x_E>3.5</PC-Conformer_x_E> + <PC-Conformer_x_E>3.5</PC-Conformer_x_E> + <PC-Conformer_x_E>4.366</PC-Conformer_x_E> + <PC-Conformer_x_E>2.634</PC-Conformer_x_E> + <PC-Conformer_x_E>4.366</PC-Conformer_x_E> + <PC-Conformer_x_E>2.634</PC-Conformer_x_E> + <PC-Conformer_x_E>3.5</PC-Conformer_x_E> + <PC-Conformer_x_E>4.5</PC-Conformer_x_E> + <PC-Conformer_x_E>3.5</PC-Conformer_x_E> + <PC-Conformer_x_E>2.5</PC-Conformer_x_E> + <PC-Conformer_x_E>4.366</PC-Conformer_x_E> + <PC-Conformer_x_E>5</PC-Conformer_x_E> + <PC-Conformer_x_E>2.634</PC-Conformer_x_E> + <PC-Conformer_x_E>2</PC-Conformer_x_E> + </PC-Conformer_x> + <PC-Conformer_y> + <PC-Conformer_y_E>-2</PC-Conformer_y_E> + <PC-Conformer_y_E>2.866</PC-Conformer_y_E> + <PC-Conformer_y_E>2</PC-Conformer_y_E> + <PC-Conformer_y_E>-2</PC-Conformer_y_E> + <PC-Conformer_y_E>2</PC-Conformer_y_E> + <PC-Conformer_y_E>1</PC-Conformer_y_E> + <PC-Conformer_y_E>-1</PC-Conformer_y_E> + <PC-Conformer_y_E>0.5</PC-Conformer_y_E> + <PC-Conformer_y_E>0.5</PC-Conformer_y_E> + <PC-Conformer_y_E>-0.5</PC-Conformer_y_E> + <PC-Conformer_y_E>-0.5</PC-Conformer_y_E> + <PC-Conformer_y_E>3</PC-Conformer_y_E> + <PC-Conformer_y_E>2</PC-Conformer_y_E> + <PC-Conformer_y_E>-3</PC-Conformer_y_E> + <PC-Conformer_y_E>-2</PC-Conformer_y_E> + <PC-Conformer_y_E>3.5</PC-Conformer_y_E> + <PC-Conformer_y_E>2.866</PC-Conformer_y_E> + <PC-Conformer_y_E>-3.5</PC-Conformer_y_E> + <PC-Conformer_y_E>-2.866</PC-Conformer_y_E> + </PC-Conformer_y> + </PC-Conformer> + </PC-Coordinates_conformers> + </PC-Coordinates> + </PC-Compound_coords> + <PC-Compound_charge>0</PC-Compound_charge> + </PC-Compound> + </PC-Compounds> + </PC-Substance_compound> +</PC-Substance> + Added: cdk/trunk/src/main/org/openscience/cdk/io/PCSubstanceXMLReader.java =================================================================== --- cdk/trunk/src/main/org/openscience/cdk/io/PCSubstanceXMLReader.java (rev 0) +++ cdk/trunk/src/main/org/openscience/cdk/io/PCSubstanceXMLReader.java 2008-05-07 16:50:41 UTC (rev 10916) @@ -0,0 +1,149 @@ +/* $Revision: 10774 $ $Author: egonw $ $Date: 2008-05-03 08:50:01 +0200 (Sat, 03 May 2008) $ + * + * Copyright (C) 2008 Egon Willighagen <eg...@us...> + * + * Contact: cdk...@li... + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * All we ask is that proper credit is given for our work, which includes + * - but is not limited to - adding the above copyright notice to the beginning + * of your source code files, and to any copyright notice that you may distribute + * with programs based on this work. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + */ +package org.openscience.cdk.io; + +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; +import java.io.StringReader; + +import org.openscience.cdk.exception.CDKException; +import org.openscience.cdk.interfaces.IChemObject; +import org.openscience.cdk.interfaces.IChemObjectBuilder; +import org.openscience.cdk.interfaces.IMolecule; +import org.openscience.cdk.io.formats.IResourceFormat; +import org.openscience.cdk.io.formats.PubChemSubstanceXMLFormat; +import org.openscience.cdk.io.pubchemxml.PubChemXMLHelper; +import org.xmlpull.v1.XmlPullParser; +import org.xmlpull.v1.XmlPullParserFactory; + +/** + * Reads an object from ASN formated input for PubChem Compound entries. The following + * bits are supported: atoms.aid, atoms.element, bonds.aid1, bonds.aid2. Additionally, + * it extracts the InChI and canonical SMILES properties. + * + * @cdk.module io + * @cdk.svnrev $Revision: 10774 $ + * + * @cdk.keyword file format, PubChem Compound ASN + */ +public class PCSubstanceXMLReader extends DefaultChemObjectReader { + + private Reader input; + private XmlPullParser parser; + private PubChemXMLHelper parserHelper; + private IChemObjectBuilder builder; + + IMolecule molecule = null; + + /** + * Construct a new reader from a Reader type object. + * + * @param input reader from which input is read + */ + public PCSubstanceXMLReader(Reader input) throws Exception { + setReader(input); + } + + public PCSubstanceXMLReader(InputStream input) throws Exception { + setReader(input); + } + + public PCSubstanceXMLReader() throws Exception { + this(new StringReader("")); + } + + public IResourceFormat getFormat() { + return PubChemSubstanceXMLFormat.getInstance(); + } + + public void setReader(Reader input) throws CDKException { + try { + XmlPullParserFactory factory = XmlPullParserFactory.newInstance( + System.getProperty(XmlPullParserFactory.PROPERTY_NAME), null + ); + factory.setNamespaceAware(true); + parser = factory.newPullParser(); + this.input = input; + parser.setInput(input); + } catch (Exception exception) { + throw new CDKException("Error while creating reader: " + exception.getMessage(), exception); + } + } + + public void setReader(InputStream input) throws CDKException { + setReader(new InputStreamReader(input)); + } + + public boolean accepts(Class classObject) { + Class[] interfaces = classObject.getInterfaces(); + for (int i=0; i<interfaces.length; i++) { + if (IMolecule.class.equals(interfaces[i])) return true; + } + return false; + } + + public IChemObject read(IChemObject object) throws CDKException { + if (object instanceof IMolecule) { + try { + parserHelper = new PubChemXMLHelper(object.getBuilder()); + builder = object.getBuilder(); + return readMolecule((IMolecule)object); + } catch (IOException e) { + throw new CDKException("An IO Exception occured while reading the file.", e); + } catch (CDKException e) { + throw e; + } catch (Exception e) { + throw new CDKException("An error occured: " + e.getMessage(), e); + } + } else { + throw new CDKException("Only supported is reading of ChemFile objects."); + } + } + + public void close() throws IOException { + input.close(); + } + + // private procedures + + private IMolecule readMolecule(IMolecule file) throws Exception { + boolean foundCompound = false; + while (parser.next() != XmlPullParser.END_DOCUMENT) { + if (parser.getEventType() == XmlPullParser.START_TAG) { + if (parser.getName().equals("PC-Compound")) { + foundCompound = true; + break; + } + } + } + if (foundCompound) { + return parserHelper.parseMolecule(parser, builder); + } + return null; + } + +} Added: cdk/trunk/src/main/org/openscience/cdk/io/formats/PubChemSubstanceXMLFormat.java =================================================================== --- cdk/trunk/src/main/org/openscience/cdk/io/formats/PubChemSubstanceXMLFormat.java (rev 0) +++ cdk/trunk/src/main/org/openscience/cdk/io/formats/PubChemSubstanceXMLFormat.java 2008-05-07 16:50:41 UTC (rev 10916) @@ -0,0 +1,79 @@ +/* $Revision$ $Author$ $Date$ + * + * Copyright (C) 2008 Egon Willighagen <eg...@us...> + * + * Contact: cdk...@li... + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + */ +package org.openscience.cdk.io.formats; + +import org.openscience.cdk.tools.DataFeatures; + +/** + * @cdk.module io + * @cdk.svnrev $Revision$ + * @cdk.set io-formats + */ +public class PubChemSubstanceXMLFormat implements IChemFormatMatcher { + + private static IResourceFormat myself = null; + + private PubChemSubstanceXMLFormat() {} + + public static IResourceFormat getInstance() { + if (myself == null) myself = new PubChemSubstanceXMLFormat(); + return myself; + } + + public String getFormatName() { + return "PubChem Substance XML"; + } + + public String getMIMEType() { + return null; + } + public String getPreferredNameExtension() { + return getNameExtensions()[0]; + } + public String[] getNameExtensions() { + return new String[]{"xml"}; + } + + public String getReaderClassName() { + return null; + } + public String getWriterClassName() { + return null; + } + + public boolean isXMLBased() { + return true; + } + + public int getSupportedDataFeatures() { + return DataFeatures.NONE; + } + + public int getRequiredDataFeatures() { + return DataFeatures.NONE; + } + + public boolean matches(int lineNumber, String line) { + if (lineNumber == 2 && line.startsWith("<PC-Substance") && + !line.startsWith("<PC-Substances")) return true; + return false; + } +} Modified: cdk/trunk/src/main/org/openscience/cdk/io/pubchemxml/PubChemXMLHelper.java =================================================================== --- cdk/trunk/src/main/org/openscience/cdk/io/pubchemxml/PubChemXMLHelper.java 2008-05-07 16:25:42 UTC (rev 10915) +++ cdk/trunk/src/main/org/openscience/cdk/io/pubchemxml/PubChemXMLHelper.java 2008-05-07 16:50:41 UTC (rev 10916) @@ -156,9 +156,14 @@ if (EL_ELEMENT.equals(parser.getName())) { int atomicNumber = Integer.parseInt(parser.nextText()); IElement element = factory.getElement(atomicNumber); - IAtom atom = molecule.getBuilder().newAtom(element.getSymbol()); - atom.setAtomicNumber(element.getAtomicNumber()); - molecule.addAtom(atom); + if (element == null) { + IAtom atom = molecule.getBuilder().newPseudoAtom(); + molecule.addAtom(atom); + } else { + IAtom atom = molecule.getBuilder().newAtom(element.getSymbol()); + atom.setAtomicNumber(element.getAtomicNumber()); + molecule.addAtom(atom); + } } } } Added: cdk/trunk/src/test/org/openscience/cdk/io/PCSubstanceXMLReaderTest.java =================================================================== --- cdk/trunk/src/test/org/openscience/cdk/io/PCSubstanceXMLReaderTest.java (rev 0) +++ cdk/trunk/src/test/org/openscience/cdk/io/PCSubstanceXMLReaderTest.java 2008-05-07 16:50:41 UTC (rev 10916) @@ -0,0 +1,75 @@ +/* $Revision: 10838 $ $Author: egonw $ $Date: 2008-05-05 23:03:59 +0200 (Mon, 05 May 2008) $ + * + * Copyright (C) 2006-2007 Egon Willighagen <eg...@us...> + * + * Contact: cdk...@li... + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * All we ask is that proper credit is given for our work, which includes + * - but is not limited to - adding the above copyright notice to the beginning + * of your source code files, and to any copyright notice that you may distribute + * with programs based on this work. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + */ +package org.openscience.cdk.io; + +import java.io.InputStream; + +import junit.framework.Test; +import junit.framework.TestSuite; + +import org.openscience.cdk.CDKTestCase; +import org.openscience.cdk.Molecule; +import org.openscience.cdk.interfaces.IMolecule; +import org.openscience.cdk.interfaces.IPseudoAtom; +import org.openscience.cdk.tools.LoggingTool; + +/** + * @cdk.module test-io + */ +public class PCSubstanceXMLReaderTest extends CDKTestCase { + + private LoggingTool logger; + + public PCSubstanceXMLReaderTest(String name) { + super(name); + logger = new LoggingTool(this); + } + + public static Test suite() { + return new TestSuite(PCSubstanceXMLReaderTest.class); + } + + public void testAccepts() throws Exception { + PCSubstanceXMLReader reader = new PCSubstanceXMLReader(); + assertTrue(reader.accepts(Molecule.class)); + } + + public void testReading() throws Exception { + String filename = "data/asn/pubchem/sid577309.xml"; + logger.info("Testing: " + filename); + InputStream ins = this.getClass().getClassLoader().getResourceAsStream(filename); + PCSubstanceXMLReader reader = new PCSubstanceXMLReader(ins); + IMolecule molecule = (IMolecule)reader.read(new Molecule()); + assertNotNull(molecule); + + // check atom stuff + assertEquals(19, molecule.getAtomCount()); + assertTrue(molecule.getAtom(0) instanceof IPseudoAtom); + + // check bond stuff + assertEquals(19, molecule.getBondCount()); + assertNotNull(molecule.getBond(3)); + } +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |