From: Nicolas <ni...@us...> - 2005-03-05 17:26:34
|
Update of /cvsroot/jmol/Jmol/src/org/jmol/smiles In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv8560/src/org/jmol/smiles Added Files: SmilesAtom.java SmilesBond.java SmilesParser.java InvalidSmilesException.java SmilesMolecule.java Log Message: SMILES Parser --- NEW FILE: SmilesAtom.java --- /* $RCSfile: SmilesAtom.java,v $ * $Author: nicove $ * $Date: 2005/03/05 17:26:22 $ * $Revision: 1.1 $ * * Copyright (C) 2005 The Jmol Development Team * * Contact: jmo...@li... * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA * 02111-1307 USA. */ package org.jmol.smiles; /** * Atom in a SmilesMolecule */ public class SmilesAtom { private int number; private String symbol; private Integer atomicMass; private int charge; private Integer hydrogenCount; private SmilesBond[] bonds; private int bondsCount; private final static int INITIAL_BONDS = 4; /** * SmilesAtom constructor * * @param number Atom number in the molecule */ public SmilesAtom(int number) { this.number = number; this.symbol = null; this.atomicMass = null; this.charge = 0; bonds = new SmilesBond[INITIAL_BONDS]; bondsCount = 0; } /** * Creates missing hydrogen * * @param molecule Molecule containing the atom */ public void createMissingHydrogen(SmilesMolecule molecule) { // Determing max count int count = 0; if (hydrogenCount == null) { if (symbol != null) { if (symbol.equals("B")) { count = 3; } else if (symbol.equals("Br")) { count = 1; } else if (symbol.equals("C")) { count = 4; } else if (symbol.equals("Cl")) { count = 1; } else if (symbol.equals("F")) { count = 1; } else if (symbol.equals("I")) { count = 1; } else if (symbol.equals("N")) { count = 3; } else if (symbol.equals("O")) { count = 2; } else if (symbol.equals("P")) { count = 3; } else if (symbol.equals("S")) { count = 2; } } for (int i = 0; i < bondsCount; i++) { SmilesBond bond = bonds[i]; switch (bond.getBondType()) { case SmilesBond.TYPE_SINGLE: count -= 1; break; case SmilesBond.TYPE_DOUBLE: count -= 2; break; case SmilesBond.TYPE_TRIPLE: count -= 3; break; } } } else { count = hydrogenCount.intValue(); } // Adding hydrogens for (int i = 0; i < count; i++) { SmilesAtom hydrogen = molecule.createAtom(); molecule.createBond(this, hydrogen, SmilesBond.TYPE_SINGLE); hydrogen.setSymbol("H"); } } public int getNumber() { return number; } public String getSymbol() { return symbol; } public void setSymbol(String symbol) { this.symbol = symbol; } public Integer getAtomicMass() { return atomicMass; } public void setAtomicMass(Integer mass) { this.atomicMass = mass; } public int getCharge() { return charge; } public void setCharge(int charge) { this.charge = charge; } public Integer getHydrogenCount() { return hydrogenCount; } public void setHydrogenCount(Integer count) { this.hydrogenCount = count; } public int getBondsCount() { return bondsCount; } public SmilesBond getBond(int number) { if ((number >= 0) && (number < bondsCount)) { return bonds[number]; } return null; } public void addBond(SmilesBond bond) { if (bondsCount >= bonds.length) { SmilesBond[] tmp = new SmilesBond[bonds.length * 2]; for (int i = 0; i < bonds.length; i++) { tmp[i] = bonds[i]; } bonds = tmp; } bonds[bondsCount] = bond; bondsCount++; } } --- NEW FILE: SmilesBond.java --- /* $RCSfile: SmilesBond.java,v $ * $Author: nicove $ * $Date: 2005/03/05 17:26:22 $ * $Revision: 1.1 $ * * Copyright (C) 2005 The Jmol Development Team * * Contact: jmo...@li... * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA * 02111-1307 USA. */ package org.jmol.smiles; /** * Bond in a SmilesMolecule */ public class SmilesBond { // Bond orders public final static int TYPE_UNKOWN = -1; public final static int TYPE_SINGLE = 1; public final static int TYPE_DOUBLE = 2; public final static int TYPE_TRIPLE = 3; public final static int TYPE_AROMATIC = 4; // Bond expressions public final static char CODE_SINGLE = '-'; public final static char CODE_DOUBLE = '='; public final static char CODE_TRIPLE = '#'; public final static char CODE_AROMATIC = ':'; private SmilesAtom atom1; private SmilesAtom atom2; private int bondType; /** * SmilesBond constructor * * @param atom1 First atom * @param atom2 Second atom * @param bondType Bond type */ public SmilesBond(SmilesAtom atom1, SmilesAtom atom2, int bondType) { this.atom1 = atom1; this.atom2 = atom2; this.bondType = bondType; } /** * @param code Bond code * @return Bond type */ public static int getBondTypeFromCode(char code) { switch (code) { case CODE_SINGLE: return TYPE_SINGLE; case CODE_DOUBLE: return TYPE_DOUBLE; case CODE_TRIPLE: return TYPE_TRIPLE; case CODE_AROMATIC: return TYPE_AROMATIC; } return TYPE_UNKOWN; } public SmilesAtom getAtom1() { return atom1; } public void setAtom1(SmilesAtom atom) { this.atom1 = atom; } public SmilesAtom getAtom2() { return atom2; } public void setAtom2(SmilesAtom atom) { this.atom2 = atom; } public int getBondType() { return bondType; } public void setBondType(int bondType) { this.bondType = bondType; } } --- NEW FILE: SmilesParser.java --- /* $RCSfile: SmilesParser.java,v $ * $Author: nicove $ * $Date: 2005/03/05 17:26:22 $ * $Revision: 1.1 $ * * Copyright (C) 2005 The Jmol Development Team * * Contact: jmo...@li... * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA * 02111-1307 USA. */ package org.jmol.smiles; /** * Parses a SMILES String to create a SmilesMolecule. * The SMILES specification has been found at http://www.daylight.com/smiles/. * * Currently this parser supports only parts of the SMILES specification. * * An example on how to use it: * <pre> * try { * SmilesParser sp = new SmilesParser(); * SmilesMolecule sm = sp.parseSmiles("CC(C)C(=O)O"); * // Use the resulting molecule * } catch (InvalidSmilesException e) { * // Exception management * } * </pre> * * @see <a href="http://www.daylight.com/smiles/">SMILES Home Page</a> */ public class SmilesParser { private SmilesBond[] ringBonds; /** * SmilesParser constructor */ public SmilesParser() { ringBonds = null; } /** * Parse a SMILES String * * @param smiles SMILES String * @return Molecule corresponding to smiles * @throws InvalidSmilesException */ public SmilesMolecule parseSmiles(String smiles) throws InvalidSmilesException { if (smiles == null) { throw new InvalidSmilesException("SMILES expressions must not be null"); } // First pass SmilesMolecule molecule = new SmilesMolecule(); parseSmiles(molecule, smiles, null); // Implicit hydrogren creation for (int i = 0; i< molecule.getAtomsCount(); i++) { SmilesAtom atom = molecule.getAtom(i); atom.createMissingHydrogen(molecule); } // Check for rings if (ringBonds != null) { for (int i = 0; i < ringBonds.length; i++) { if (ringBonds[i] != null) { throw new InvalidSmilesException("Open ring"); } } } return molecule; } /** * Parse a part of a SMILES String * * @param molecule Resulting molecule * @param smiles SMILES String * @param currentAtom Current atom * @throws InvalidSmilesException */ private void parseSmiles( SmilesMolecule molecule, String smiles, SmilesAtom currentAtom) throws InvalidSmilesException { if ((smiles == null) || (smiles.length() == 0)) { return; } // Branching int index = 0; char firstChar = smiles.charAt(index); if (firstChar == '(') { index++; int currentIndex = index; int parenthesisCount = 1; while ((currentIndex < smiles.length()) && (parenthesisCount > 0)) { switch (smiles.charAt(currentIndex)) { case '(': parenthesisCount++; break; case ')': parenthesisCount--; break; } currentIndex++; } if (parenthesisCount != 0) { throw new InvalidSmilesException("Unbalanced parenthesis"); } String subSmiles = smiles.substring(index, currentIndex - 1); parseSmiles(molecule, subSmiles, currentAtom); index = currentIndex; } // Bonds firstChar = smiles.charAt(index); int bondType = SmilesBond.getBondTypeFromCode(firstChar); if (bondType != SmilesBond.TYPE_UNKOWN) { if (currentAtom == null) { throw new InvalidSmilesException("Bond without a previous atom"); } index++; } // Atom firstChar = smiles.charAt(index); if ((firstChar >= '0') && (firstChar <= '9')) { // Ring String subSmiles = smiles.substring(index, index + 1); parseRing(molecule, subSmiles, currentAtom, bondType); index++; } else if (firstChar == '%') { // Ring index++; if ((smiles.charAt(index) < 0) || (smiles.charAt(index) > 9)) { throw new InvalidSmilesException("Ring number must follow the % sign"); } int currentIndex = index; while ((currentIndex < smiles.length()) && (smiles.charAt(currentIndex) >= '0') && (smiles.charAt(currentIndex) <= '9')) { currentIndex++; } String subSmiles = smiles.substring(index, currentIndex); parseRing(molecule, subSmiles, currentAtom, bondType); index = currentIndex; } else if (firstChar == '[') { // Atom definition index++; int currentIndex = index; while ((currentIndex < smiles.length()) && (smiles.charAt(currentIndex) != ']')) { currentIndex++; } if (currentIndex >= smiles.length()) { throw new InvalidSmilesException("Unmatched ["); } String subSmiles = smiles.substring(index, currentIndex); currentAtom = parseAtom(molecule, subSmiles, currentAtom, bondType, true); index = currentIndex + 1; } else if (((firstChar >= 'a') && (firstChar <= 'z')) || ((firstChar >= 'A') && (firstChar <= 'Z')) || (firstChar == '*')) { // Atom definition int size = 1; if (index + 1 < smiles.length()) { char secondChar = smiles.charAt(index + 1); if ((firstChar >= 'A') && (firstChar <= 'Z') && (secondChar >= 'a') && (secondChar <= 'z')) { size = 2; } } String subSmiles = smiles.substring(index, index + size); currentAtom = parseAtom(molecule, subSmiles, currentAtom, bondType, false); index += size; } // Next part of the SMILES String if (index < smiles.length()) { String subSmiles = smiles.substring(index); parseSmiles(molecule, subSmiles, currentAtom); } } /** * Parses an atom definition * * @param molecule Resulting molecule * @param smiles SMILES String * @param currentAtom Current atom * @param bondType Bond type * @param complete Indicates if is a complete definition (between []) * @return New atom * @throws InvalidSmilesException */ private SmilesAtom parseAtom( SmilesMolecule molecule, String smiles, SmilesAtom currentAtom, int bondType, boolean complete) throws InvalidSmilesException { if ((smiles == null) || (smiles.length() == 0)) { throw new InvalidSmilesException("Empty atom definition"); } // Atomic mass int index = 0; char firstChar = smiles.charAt(index); Integer atomicMass = null; if ((firstChar >= '0') && (firstChar <= '9')) { int currentIndex = index; while ((currentIndex < smiles.length()) && (smiles.charAt(currentIndex) >= '0') && (smiles.charAt(currentIndex) <= '9')) { currentIndex++; } String sub = smiles.substring(index, currentIndex); try { atomicMass = Integer.valueOf(sub); } catch (NumberFormatException e) { throw new InvalidSmilesException("Non numeric atomic mass"); } index = currentIndex; } // Symbol if (index >= smiles.length()) { throw new InvalidSmilesException("Missing atom symbol"); } firstChar = smiles.charAt(index); if (((firstChar < 'a') || (firstChar > 'z')) && ((firstChar < 'A') || (firstChar > 'Z')) && (firstChar != '*')) { throw new InvalidSmilesException("Unexpected atom symbol"); } int size = 1; if (index + 1 < smiles.length()) { char secondChar = smiles.charAt(index + 1); if ((firstChar >= 'A') && (firstChar <= 'Z') && (secondChar >= 'a') && (secondChar <= 'z')) { size = 2; } } String atomSymbol = smiles.substring(index, index + size); index += size; // Chirality if (index < smiles.length()) { //TODO } // Hydrogen count Integer hydrogenCount = null; if (index < smiles.length()) { firstChar = smiles.charAt(index); if (firstChar == 'H') { index++; int currentIndex = index; while ((currentIndex < smiles.length()) && (smiles.charAt(currentIndex) >= '0') && (smiles.charAt(currentIndex) <= '9')) { currentIndex++; } if (currentIndex > index) { String sub = smiles.substring(index, currentIndex); try { hydrogenCount = Integer.valueOf(sub); } catch (NumberFormatException e) { throw new InvalidSmilesException("Non numeric hydrogen count"); } } else { hydrogenCount = new Integer(1); } index = currentIndex; } } if ((hydrogenCount == null) && (complete)) { hydrogenCount = new Integer(0); } // Charge int charge = 0; if (index < smiles.length()) { firstChar = smiles.charAt(index); if ((firstChar == '+') || (firstChar == '-')) { int count = 1; index++; if (index < smiles.length()) { char nextChar = smiles.charAt(index); if ((nextChar >= '0') && (nextChar <= '9')) { int currentIndex = index; while ((currentIndex < smiles.length()) && (smiles.charAt(currentIndex) >= '0') && (smiles.charAt(currentIndex) <= '9')) { currentIndex++; } String sub = smiles.substring(index, currentIndex); try { count = Integer.parseInt(sub); } catch (NumberFormatException e) { throw new InvalidSmilesException("Non numeric charge"); } index = currentIndex; } else { int currentIndex = index; while ((currentIndex < smiles.length()) && (smiles.charAt(currentIndex) == firstChar)) { currentIndex++; count++; } index = currentIndex; } } if (firstChar == '+') { charge = count; } else { charge = -count; } } } // Final check if (index < smiles.length()) { throw new InvalidSmilesException("Unexpected characters after atom definition"); } // Create atom if (bondType == SmilesBond.TYPE_UNKOWN) { bondType = SmilesBond.TYPE_SINGLE; } SmilesAtom newAtom = molecule.createAtom(); if (currentAtom != null) { molecule.createBond(currentAtom, newAtom, bondType); } newAtom.setSymbol(atomSymbol); newAtom.setAtomicMass(atomicMass); newAtom.setCharge(charge); newAtom.setHydrogenCount(hydrogenCount); return newAtom; } /** * Parses a ring definition * * @param molecule Resulting molecule * @param smiles SMILES String * @param currentAtom Current atom * @param bondType Bond type * @throws InvalidSmilesException */ private void parseRing( SmilesMolecule molecule, String smiles, SmilesAtom currentAtom, int bondType) throws InvalidSmilesException { // Extracting ring number int ringNum = 0; try { ringNum = Integer.parseInt(smiles); } catch (NumberFormatException e) { throw new InvalidSmilesException("Non numeric ring identifier"); } // Checking rings buffer is big enough if (ringBonds == null) { ringBonds = new SmilesBond[10]; for (int i = 0; i < ringBonds.length; i++) { ringBonds[i] = null; } } if (ringNum >= ringBonds.length) { SmilesBond[] tmp = new SmilesBond[ringNum + 1]; for (int i = 0; i < ringBonds.length; i++) { tmp[i] = ringBonds[i]; } for (int i = ringBonds.length; i < tmp.length; i++) { tmp[i] = null; } } // Ring management if (ringBonds[ringNum] == null) { ringBonds[ringNum] = molecule.createBond(currentAtom, null, bondType); } else { if (bondType == SmilesBond.TYPE_UNKOWN) { bondType = ringBonds[ringNum].getBondType(); if (bondType == SmilesBond.TYPE_UNKOWN) { bondType = SmilesBond.TYPE_SINGLE; } } else { if ((ringBonds[ringNum].getBondType() != SmilesBond.TYPE_UNKOWN) && (ringBonds[ringNum].getBondType() != bondType)) { throw new InvalidSmilesException("Incoherent bond type for ring"); } } ringBonds[ringNum].setBondType(bondType); ringBonds[ringNum].setAtom2(currentAtom); currentAtom.addBond(ringBonds[ringNum]); ringBonds[ringNum] = null; } } /* private static void outputMolecule(SmilesMolecule molecule) { for (int i = 0; i < molecule.getAtomsCount(); i++) { SmilesAtom atom = molecule.getAtom(i); System.out.print("Atom (" + i + "): " + atom.getSymbol()); System.out.print(" :"); for (int j = 0; j < atom.getBondsCount(); j++) { SmilesBond bond = atom.getBond(j); System.out.print( " " + bond.getAtom1().getNumber() + "-" + bond.getAtom2().getNumber() + "-" + bond.getBondType()); } System.out.println(); } } private static void testMolecule(String smiles) { try { SmilesParser parser = new SmilesParser(); SmilesMolecule molecule = parser.parseSmiles(smiles); System.out.println("SMILES: " + smiles); outputMolecule(molecule); } catch (InvalidSmilesException e) { System.out.println("Erreur: " + e); } } public static void main(String[] args) { testMolecule("[S]"); testMolecule("[Au]"); testMolecule("C"); testMolecule("P"); testMolecule("S"); testMolecule("Cl"); testMolecule("[OH-]"); testMolecule("[OH-1]"); testMolecule("[Fe+2]"); testMolecule("[Fe++]"); testMolecule("[235U]"); testMolecule("[*+2]"); testMolecule("CC"); testMolecule("C-C"); testMolecule("[CH3]-[CH3]"); testMolecule("C=O"); testMolecule("C#N"); testMolecule("C=C"); testMolecule("C=CC=C"); testMolecule("CC(C)C(=O)O"); testMolecule("FC(F)F"); testMolecule("C(F)(F)F"); testMolecule("O=Cl(=O)(=O)[O-]"); testMolecule("CCCC(C(=O)O)CCC"); testMolecule("C1CCCCC1"); testMolecule("C=1CCCCC1"); } */ } --- NEW FILE: InvalidSmilesException.java --- /* $RCSfile: InvalidSmilesException.java,v $ * $Author: nicove $ * $Date: 2005/03/05 17:26:22 $ * $Revision: 1.1 $ * * Copyright (C) 2005 The Jmol Development Team * * Contact: jmo...@li... * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA * 02111-1307 USA. */ package org.jmol.smiles; /** * Exception thrown for invalid SMILES String */ public class InvalidSmilesException extends Exception { public InvalidSmilesException() { super(); } public InvalidSmilesException(String message) { super(message); } public InvalidSmilesException(Throwable cause) { super(cause); } public InvalidSmilesException(String message, Throwable cause) { super(message, cause); } } --- NEW FILE: SmilesMolecule.java --- /* $RCSfile: SmilesMolecule.java,v $ * $Author: nicove $ * $Date: 2005/03/05 17:26:22 $ * $Revision: 1.1 $ * * Copyright (C) 2005 The Jmol Development Team * * Contact: jmo...@li... * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA * 02111-1307 USA. */ package org.jmol.smiles; /** * Molecule created from a SMILES String */ public class SmilesMolecule { private SmilesAtom[] atoms; private int atomsCount; private SmilesBond[] bonds; private int bondsCount; private final static int INITIAL_ATOMS = 16; private final static int INITIAL_BONDS = 16; /** * SmilesMolecule constructor */ public SmilesMolecule() { atoms = new SmilesAtom[INITIAL_ATOMS]; atomsCount = 0; bonds = new SmilesBond[INITIAL_BONDS]; bondsCount = 0; } /* ============================================================= */ /* Atoms */ /* ============================================================= */ public SmilesAtom createAtom() { if (atomsCount >= atoms.length) { SmilesAtom[] tmp = new SmilesAtom[atoms.length * 2]; for (int i = 0; i < atoms.length; i++) { tmp[i] = atoms[i]; } atoms = tmp; } SmilesAtom atom = new SmilesAtom(atomsCount); atoms[atomsCount] = atom; atomsCount++; return atom; } public int getAtomsCount() { return atomsCount; } public SmilesAtom getAtom(int number) { if ((number >= 0) && (number < atomsCount)) { return atoms[number]; } return null; } /* ============================================================= */ /* Bonds */ /* ============================================================= */ public SmilesBond createBond( SmilesAtom atom1, SmilesAtom atom2, int bondType) { if (bondsCount >= bonds.length) { SmilesBond[] tmp = new SmilesBond[bonds.length * 2]; for (int i = 0; i < bonds.length; i++) { tmp[i] = bonds[i]; } bonds = tmp; } SmilesBond bond = new SmilesBond(atom1, atom2, bondType); bonds[bondsCount] = bond; bondsCount++; if (atom1 != null) { atom1.addBond(bond); } if (atom2 != null) { atom2.addBond(bond); } return bond; } public int getBondsCount() { return bondsCount; } public SmilesBond getBond(int number) { if ((number >= 0) && (number < bondsCount)) { return bonds[number]; } return null; } } |