From: <sus...@us...> - 2007-04-26 21:30:37
|
Revision: 8270 http://svn.sourceforge.net/cdk/?rev=8270&view=rev Author: sushil_ronghe Date: 2007-04-26 14:30:30 -0700 (Thu, 26 Apr 2007) Log Message: ----------- Major change in design Modified Paths: -------------- trunk/cdk/src/org/openscience/cdk/smiles/smarts/SMARTSParser.jj Modified: trunk/cdk/src/org/openscience/cdk/smiles/smarts/SMARTSParser.jj =================================================================== --- trunk/cdk/src/org/openscience/cdk/smiles/smarts/SMARTSParser.jj 2007-04-26 19:42:00 UTC (rev 8269) +++ trunk/cdk/src/org/openscience/cdk/smiles/smarts/SMARTSParser.jj 2007-04-26 21:30:30 UTC (rev 8270) @@ -1,721 +1,868 @@ - - -options { -STATIC=false; -OPTIMIZE_TOKEN_MANAGER=true; -FORCE_LA_CHECK=true; -} - -PARSER_BEGIN(SMARTSParser) -package org.openscience.cdk.smiles.smarts; -import java.io.StringReader; -import org.openscience.cdk.Atom; -import org.openscience.cdk.ChemObject; -import org.openscience.cdk.config.IsotopeFactory; -import org.openscience.cdk.exception.CDKException; -import org.openscience.cdk.isomorphism.matchers.IQueryAtom; -import org.openscience.cdk.isomorphism.matchers.IQueryBond; -import org.openscience.cdk.isomorphism.matchers.QueryAtomContainer; -import org.openscience.cdk.isomorphism.matchers.SymbolQueryAtom; -import org.openscience.cdk.isomorphism.matchers.smarts.AliphaticAtom; -import org.openscience.cdk.isomorphism.matchers.smarts.AnyAtom; -import org.openscience.cdk.isomorphism.matchers.smarts.AnyOrderQueryBond; -import org.openscience.cdk.isomorphism.matchers.smarts.AromaticAtom; -import org.openscience.cdk.isomorphism.matchers.smarts.AromaticQueryBond; -import org.openscience.cdk.isomorphism.matchers.smarts.DegreeAtom; -import org.openscience.cdk.isomorphism.matchers.smarts.FormalChargeAtom; -import org.openscience.cdk.isomorphism.matchers.smarts.ImplicitHCountAtom; -import org.openscience.cdk.isomorphism.matchers.smarts.SMARTSAtom; -import org.openscience.cdk.isomorphism.matchers.smarts.SMARTSBond; -import org.openscience.cdk.isomorphism.matchers.smarts.TotalHCountAtom; -import org.openscience.cdk.isomorphism.matchers.smarts.OrderQueryBond; -import org.openscience.cdk.isomorphism.matchers.smarts.RingAtom; -import org.openscience.cdk.isomorphism.matchers.smarts.TotalRingConnectionAtom; -import org.openscience.cdk.isomorphism.matchers.smarts.OperatorContainer; -import org.openscience.cdk.isomorphism.matchers.smarts.SMARTSOperatorAtom; -import org.openscience.cdk.isomorphism.matchers.smarts.TotalConnectionAtom; -import org.openscience.cdk.tools.LoggingTool; - -/* $RCSfile$ - * $Original Author: egonw $ - * $code extension: sushil ronghe$ - * $Date: 2007-04-18 (Wed, 18 April 2007) $ - * $Revision: 6382 $ - * - * Copyright (C) 2004-2007 The Chemistry Development Kit (CDK) project - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * (or see http://www.gnu.org/copyleft/lesser.html) - */ - -/** - * This parser implements a nearly complete subset of the SMARTS syntax as defined on - * <a href="http://www.daylight.com/dayhtml/doc/theory/theory.smarts.html">the - * Daylight website</a>. - * - * <p>Example code using SMARTS substructure search looks like: - * <pre> - * SmilesParser sp = new SmilesParser(); - * AtomContainer atomContainer = sp.parseSmiles("CC(=O)OC(=O)C"); - * QueryAtomContainer query = SMARTSParser.parse("C*C"); - * boolean queryMatch = UniversalIsomorphismTester.isSubgraph(atomContainer, query); - * </pre> - * - * <p>See the cdk.test.smiles.smarts.ParserTest for examples of the implemented - * subset. - * - * @see org.openscience.cdk.isomorphism.matchers.smarts.SMARTSAtom - * - * @author Sushil Ronghe - * @cdk.created 2007-04-29 - * - * @cdk.require ant1.6 - * @cdk.module smarts - * - * @cdk.keyword SMARTS - * @cdk.keyword substructure search - */ -public class SMARTSParser { - - private IQueryAtom previousAtom = null; - private SMARTSBond currentBond = null; - private QueryAtomContainer container = null; - private QueryAtomContainer opcontainer = new QueryAtomContainer (); - private OperatorContainer OperMap = new OperatorContainer(); - private final static LoggingTool logger = new LoggingTool(SMARTSParser.class); - - public static QueryAtomContainer parse(String stringToParse) throws CDKException { - QueryAtomContainer container = null; - - StringReader stringReader = new StringReader(stringToParse); - try { - SMARTSParser parser = new SMARTSParser(stringReader); - container = parser.Parse(); - } catch (ParseException exception) { - throw new CDKException("The string " + stringToParse + " is not a valid" + - " SMARTS string: " + exception.getMessage(), exception); - } - return container; - } - -} -PARSER_END(SMARTSParser) - -JAVACODE -void AddAnyAtom() { - Token tok = getToken(-1); - logger.debug("Adding SMARTS atom: " + tok.image); - AnyAtom atom = new AnyAtom(); - container.addAtom(atom); - if (previousAtom != null) AddCurrentBond(previousAtom, atom); - previousAtom = atom; -} - -JAVACODE -void AddAliphaticAtom() { - logger.debug("Adding aliphatic atom"); - AliphaticAtom atom = new AliphaticAtom(); - container.addAtom(atom); - if (previousAtom != null) AddCurrentBond(previousAtom, atom); - previousAtom = atom; -} - -JAVACODE -void AddELementAromaticAtom(){ - Token tok = getToken(-1); - logger.debug("Adding aromatic atom"); - String ss = new String(tok.image); - AromaticAtom atom = new AromaticAtom(new Atom(ss.toUpperCase())); - container.addAtom(atom); - if (previousAtom != null) AddCurrentBond(previousAtom, atom); - previousAtom = atom; -} - -JAVACODE -void AddAromaticAtom() { - logger.debug("Adding aromatic atom"); - AromaticAtom atom = new AromaticAtom(); - container.addAtom(atom); - if (previousAtom != null) AddCurrentBond(previousAtom, atom); - previousAtom = atom; -} - -JAVACODE -void AddCurrentBond(IQueryAtom atom, IQueryAtom atom2) { - if (currentBond == null) { - // add default bond - currentBond = new AnyOrderQueryBond(); - } - Atom[] atoms = new Atom[2]; - atoms[0] = (Atom)atom; - atoms[1] = (Atom)atom2; - currentBond.setAtoms(atoms); - container.addBond(currentBond); - currentBond = null; -} - -JAVACODE -void AddNewElementAtom() { - Token tok = getToken(-1); - logger.debug("Adding SMARTS atom: " + tok.image); - SymbolQueryAtom atom = new SymbolQueryAtom(new Atom(tok.image)); - container.addAtom(atom); -} -JAVACODE -void AddTotalConnectionAtom(){ - TotalConnectionAtom atom; - Token tok = getToken(-1); - logger.debug("adding smarts atoms:" + tok.image); - String ss = new String(tok.image); - if(ss=="X"){ - atom = new TotalConnectionAtom(); - } - else{ - atom=new TotalConnectionAtom(Integer.parseInt(tok.image)); - } - container.addAtom(atom); - if (previousAtom != null) AddCurrentBond(previousAtom, atom); - previousAtom = atom; - -} - - -JAVACODE -void AddNewNumberAtom() { - Token tok = getToken(0); - logger.debug("Adding SMARTS atom: " + tok.image); - try { - String sym = IsotopeFactory.getInstance(new ChemObject().getBuilder()).getElement(Integer.parseInt(tok.image)).getSymbol(); - SymbolQueryAtom atom = new SymbolQueryAtom(new Atom(sym)); - container.addAtom(atom); - if (previousAtom != null) AddCurrentBond(previousAtom, atom); - previousAtom = atom; - } catch (Exception exp) { - logger.debug("Exception caught: " + exp); - } -} - -JAVACODE -void AddNewDegreeAtom() { - DegreeAtom atom; - Token tok = getToken(-1); - logger.debug("Adding SMARTS degree atom: " + tok.image); - String ss = new String(tok.image); - if(ss.equals("D")){ - atom=new DegreeAtom(); - } - else{ - atom=new DegreeAtom(Integer.parseInt(tok.image)); - } - container.addAtom(atom); - if (previousAtom != null) AddCurrentBond(previousAtom, atom); - previousAtom = atom; -} - -JAVACODE -void AddNewFormalChargeAtom() { - Token tok = getToken(-1); - logger.debug("Adding SMARTS formal charge atom: " + tok.image); - String ss = new String(tok.image); - int count = getSignCount(ss); - FormalChargeAtom atom = new FormalChargeAtom(count); - container.addAtom(atom); - if (previousAtom != null) AddCurrentBond(previousAtom, atom); - previousAtom = atom; -} -JAVACODE - -void AddNewRingAtom(){ -RingAtom atom; -Token tt = getToken(-1); -logger.debug("adding SMARTS Ring atom: "+ tt.image); -String ss = new String(tt.image); -if(ss.equals("r")){ - atom=new RingAtom(); -} -else{ - atom=new RingAtom(Integer.parseInt(tt.image)); -} -container.addAtom(atom); -if (previousAtom != null) AddCurrentBond(previousAtom, atom); - previousAtom = atom; -} -JAVACODE - -void AddTotalRingConnectionAtom(){ -TotalRingConnectionAtom atom; -Token tt = getToken(-1); -logger.debug("adding SMARTS format charge atom: "+ tt.image); -String ss = new String(tt.image); -if(ss.equals("x")){ - atom=new TotalRingConnectionAtom(); -} -else{ - atom=new TotalRingConnectionAtom(Integer.parseInt(tt.image)); -} -container.addAtom(atom); -if (previousAtom != null) AddCurrentBond(previousAtom, atom); - previousAtom = atom; -} -JAVACODE -int getSignCount(String sss) { - int count=0; - for (int i=0;i<sss.length();i++){ - if(sss.charAt(i)=='+'|sss.charAt(i)=='-'){ - count++; - } - } - return count; -} - -JAVACODE -void AddNewImplicitHCountAtom() { - ImplicitHCountAtom atom; - Token tok = getToken(-1); - logger.debug("Adding SMARTS implicit Hcount atom: " + tok.image); - String ss = new String(tok.image); - if(ss.equals("h")){ - atom=new ImplicitHCountAtom(); - } - else{ - atom=new ImplicitHCountAtom(Integer.parseInt(tok.image)); - } - container.addAtom(atom); - if (previousAtom != null) AddCurrentBond(previousAtom, atom); - previousAtom = atom; -} - -JAVACODE -void AddNewTotalHCountAtom() { - TotalHCountAtom atom; - Token tok = getToken(-1); - logger.debug("Adding SMARTS total Hcount atom: " + tok.image); - String ss = new String(tok.image); - if(ss.equals("H")){ - atom=new TotalHCountAtom(); - } - else{ - atom=new TotalHCountAtom(Integer.parseInt(tok.image)); - } - container.addAtom(atom); - if (previousAtom != null) AddCurrentBond(previousAtom, atom); - previousAtom = atom; -} -JAVACODE -void setProperty(){ - IQueryAtom atom = (IQueryAtom)container.getLastAtom(); - if(atom instanceof TotalHCountAtom || atom instanceof TotalConnectionAtom){ - container.removeAtom(atom); - IQueryAtom Tatom = (IQueryAtom)container.getLastAtom(); - container.removeAtom(Tatom); - atom.setSymbol(Tatom.getSymbol()); - container.addAtom(atom); - previousAtom = atom; - } - else{ - IQueryAtom atomT = (IQueryAtom)container.getLastAtom(); - if (previousAtom != null) AddCurrentBond(previousAtom, atomT); - previousAtom = atomT; - } - - - -} -JAVACODE -void AddOperator(){ - Token tok = getToken(-1); - try{ - OperMap.addElement(new String(tok.image)); - opcontainer.addAtom (previousAtom); - container.removeAtom(previousAtom); - previousAtom = null; - } - catch(java.lang.NullPointerException NPT){ - //System.out.println("error caught"); - NPT.printStackTrace(); - } - -} -JAVACODE -void AddOperatorAtom(){ - Token tok = getToken(-1); - logger.debug("Adding SMARTS total OP atom: " + tok.image); - container.setOperator("!"); - -} -JAVACODE -void AddFinalOperatorExpression(){ - Token tok = getToken(-1); - logger.debug("Adding SMARTS total OPF atom: " + tok.image); - if(OperMap.size()!=0 && opcontainer.getAtomCount()!=0){ - opcontainer.addAtom (previousAtom); - container.removeAtom(previousAtom); - SMARTSOperatorAtom atom = new SMARTSOperatorAtom(opcontainer,OperMap); - container.addAtom(atom); - if (previousAtom != null) AddCurrentBond(previousAtom, atom); - previousAtom = atom; - } - OperMap = new OperatorContainer(); - opcontainer = new QueryAtomContainer (); -} -JAVACODE -void AddOperatorExpression(){ - Token tok = getToken(-1); - logger.debug("Adding SMARTS total OPEX atom: " + tok.image); -} - -JAVACODE -void SetCurrentBondToOrderQuery(double order) { - currentBond = new OrderQueryBond(); - currentBond.setOrder(order); -} - -JAVACODE -void SetCurrentBondToAnyOrder() { - currentBond = new AnyOrderQueryBond(); -} - -JAVACODE -void SetCurrentBondToAromatic() { - currentBond = new AromaticQueryBond(); -} - -QueryAtomContainer Parse() : -{} -{ - { - container = new QueryAtomContainer(); - previousAtom = null; - } - - (SMARTS())+ <EOF> - - { return container; } -} - -void SMARTS() : -{} -{ - AtomExpression() [ BondExpression() ] -} - -void AtomExpression() : -{} -{ - Element() | SMARTSAtomExpression() -} - - -void SMARTSAtomExpression() : -{} -{ - "*" AddAnyAtom() | - "a" AddAromaticAtom() | - "A" AddAliphaticAtom() | - AromaticAtms() | - (InBracketsSMARTSAtom()) -} - -void InBracketsSMARTSAtom() : -{} -{ - "[" - FinalOperatorExpression() - - "]" -} - -void ExpressionAtom(): -{} -{ - (Degree() | - TotalHCount() | - ImplicitHCount() | - TotalRingConnection() | - RingAtom() | - AtomicNumber() | - AromaticAtms() | - ElementWithComplexExp() | - TotalConnectionAtom() | - ChargeAtom() ) -} -void OperatorAtom(): -{} -{ - <excl> ExpressionAtom() AddOperatorAtom() -} - -void AtomicNumber() : -{} -{ - <HASH> (<DIGIT> | <DIGITS>) AddNewNumberAtom() -} -void OperatorExpression(): -{} -{ - ( Operator() (OperatorAtom() | ExpressionAtom()))* - AddOperatorExpression() -} -void FinalOperatorExpression(): -{} -{ - (OperatorAtom() | ExpressionAtom()) [OperatorExpression()] - AddFinalOperatorExpression() -} - -void ElementWithComplexExp(): -{String tok=null;} -{ - Element() [TotalHCount()|TotalConnectionAtom()] setProperty() - -} -void TotalHCount() : -{} -{ - <H> [<DIGIT>] AddNewTotalHCountAtom() -} -void Element() : -{} -{ - (TwoCharElement() | OneCharElement()) - AddNewElementAtom() -} - -void ChargeAtom(): -{Token t;} -{ - (("+"|"-") | <SIGNS>) AddNewFormalChargeAtom() -} - -void ImplicitHCount() : -{} -{ - <h> [<DIGIT>] AddNewImplicitHCountAtom() -} - -void Degree() : -{} -{ - <D> [<DIGIT>] AddNewDegreeAtom() -} -void RingAtom() : -{} -{ - - <r> [<DIGIT>] AddNewRingAtom() -} -void TotalRingConnection(): -{} -{ - <x> [<DIGIT>] AddTotalRingConnectionAtom() -} -void TotalConnectionAtom(): -{} -{ - <X> [<DIGIT>] AddTotalConnectionAtom() -} - - -void BondExpression() : -{} -{ - ("-" SetCurrentBondToOrderQuery(1.0) ) | - ("=" SetCurrentBondToOrderQuery(2.0) ) | - (<HASH> SetCurrentBondToOrderQuery(3.0) ) | - (":" SetCurrentBondToAromatic() ) | - ("~" SetCurrentBondToAnyOrder() ) -} - -void TwoCharElement() : -{} -{ - <HE> | <LI> | <BE> | <NE> | <NA> | <MG> | <AL> | <SI> | <AR> | <CA> | <SC> | - <TI> | <CR> | <MN> | <FE> | <CO> | <NI> | <CU> | <ZN> | <GA> | <GE> | <AS> | - <SE> | <BR> | <KR> | <RB> | <SR> | <ZR> | <NB> | <MO> | <TC> | <RU> | <RH> | - <PD> | <AG> | <CD> | <IN> | <SN> | <SB> | <TE> | <XE> | <CS> | <BA> | <LA> | - <HF> | <TA> | <RE> | <OS> | <IR> | <PT> | <AU> | <HG> | <TL> | <PB> | <BI> | - <PO> | <AT> | <RN> | <FR> | <RA> | <AC> | <TH> | <PA> -} - -void OneCharElement() : -{} -{ - <H> | <B> | <C> | <N> | <O> | <F> | <P> | <S> | <K> | <V> | <Y> | <I> | <U> - -} -String CharElement(): -{} -{ - (<B> | <C> | <N> | <O> | <F> | <P> | <S> | <K> | <V> | <Y> | <I> | <U>) - {return getToken(-1).image;} -} -void Operator(): -{} -{ - (<coln>|<coma>|<amps>) AddOperator() -} - -void AromaticAtms(): -{} -{ - (<c>|<n>|<o>|<p>|<s>) AddELementAromaticAtom() -} -TOKEN: -{ - < DIGIT: ["0"-"9"] > -} - -TOKEN: -{ - <SIGN: ["+","-"] > -} -TOKEN: -{ - <SIGNS: (["+","-"])+> -} -TOKEN: /* Opetator*/ -{ - < excl: "!" > -} -TOKEN: -{ - < amps :"&" > -} -TOKEN: -{ - <coma:","> -} -TOKEN: -{ - <coln:";"> -} -TOKEN: -{ - <h:"h"> -} -TOKEN: -{ - <D:"D"> -} -TOKEN: -{ - <X:"X"> -} -TOKEN: -{ - <r:"r"> -} -TOKEN: -{ - <x:"x"> -} -TOKEN: -{ - <HASH:"#"> -} -TOKEN : -{ - < DIGITS: (["0"-"9"])+ > -} -TOKEN : -{ - <c:"c"> - |<n:"n"> - |<o:"o"> - |<s:"s"> - |<p:"p"> -} - -TOKEN : /* ELEMENTS */ -{ - < H: "H" > -| < HE: "He" > -| < LI: "Li" > -| < BE: "Be" > -| < B: "B" > -| < C: "C" > -| < N: "N" > -| < O: "O" > -| < F: "F" > -| < NE: "Ne" > -| < NA: "Na" > -| < MG: "Mg" > -| < AL: "Al" > -| < SI: "Si" > -| < P: "P" > -| < S: "S" > -| < CL: "Cl" > -| < AR: "Ar" > -| < K: "K" > -| < CA: "Ca" > -| < SC: "Sc" > -| < TI: "Ti" > -| < V: "V" > -| < CR: "Cr" > -| < MN: "Mn" > -| < FE: "Fe" > -| < CO: "Co" > -| < NI: "Ni" > -| < CU: "Cu" > -| < ZN: "Zn" > -| < GA: "Ga" > -| < GE: "Ge" > -| < AS: "As" > -| < SE: "Se" > -| < BR: "Br" > -| < KR: "Kr" > -| < RB: "Rb" > -| < SR: "Sr" > -| < Y: "Y" > -| < ZR: "Zr" > -| < NB: "Nb" > -| < MO: "Mo" > -| < TC: "Tc" > -| < RU: "Ru" > -| < RH: "Rh" > -| < PD: "Pd" > -| < AG: "Ag" > -| < CD: "Cd" > -| < IN: "In" > -| < SN: "Sn" > -| < SB: "Sb" > -| < TE: "Te" > -| < I: "I" > -| < XE: "Xe" > -| < CS: "Cs" > -| < BA: "Ba" > -| < LA: "La" > -| < HF: "Hf" > -| < TA: "Ta" > -| < W: "W" > -| < RE: "Re" > -| < OS: "Os" > -| < IR: "Ir" > -| < PT: "Pt" > -| < AU: "Au" > -| < HG: "Hg" > -| < TL: "Tl" > -| < PB: "Pb" > -| < BI: "Bi" > -| < PO: "Po" > -| < AT: "At" > -| < RN: "Rn" > -| < FR: "Fr" > -| < RA: "Ra" > -| < AC: "Ac" > -| < TH: "Th" > -| < PA: "Pa" > -| < U: "U" > -} + + +options { +STATIC=false; +OPTIMIZE_TOKEN_MANAGER=true; +FORCE_LA_CHECK=true; +} + +PARSER_BEGIN(SMARTSParser) +package org.openscience.cdk.smiles.smarts; +import java.io.StringReader; +import org.openscience.cdk.Atom; +import org.openscience.cdk.ChemObject; +import org.openscience.cdk.config.IsotopeFactory; +import org.openscience.cdk.exception.CDKException; +import org.openscience.cdk.isomorphism.matchers.IQueryAtom; +import org.openscience.cdk.isomorphism.matchers.IQueryBond; +import org.openscience.cdk.isomorphism.matchers.QueryAtomContainer; +import org.openscience.cdk.isomorphism.matchers.SymbolQueryAtom; +import org.openscience.cdk.isomorphism.matchers.smarts.AliphaticAtom; +import org.openscience.cdk.isomorphism.matchers.smarts.AnyAtom; +import org.openscience.cdk.isomorphism.matchers.smarts.AnyOrderQueryBond; +import org.openscience.cdk.isomorphism.matchers.smarts.AromaticAtom; +import org.openscience.cdk.isomorphism.matchers.smarts.AromaticQueryBond; +import org.openscience.cdk.isomorphism.matchers.smarts.DegreeAtom; +import org.openscience.cdk.isomorphism.matchers.smarts.FormalChargeAtom; +import org.openscience.cdk.isomorphism.matchers.smarts.ImplicitHCountAtom; +import org.openscience.cdk.isomorphism.matchers.smarts.SMARTSAtom; +import org.openscience.cdk.isomorphism.matchers.smarts.SMARTSBond; +import org.openscience.cdk.isomorphism.matchers.smarts.TotalHCountAtom; +import org.openscience.cdk.isomorphism.matchers.smarts.OrderQueryBond; +import org.openscience.cdk.isomorphism.matchers.smarts.RingAtom; +import org.openscience.cdk.isomorphism.matchers.smarts.TotalRingConnectionAtom; +import org.openscience.cdk.isomorphism.matchers.smarts.OperatorContainer; +import org.openscience.cdk.isomorphism.matchers.smarts.SMARTSOperatorAtom; +import org.openscience.cdk.isomorphism.matchers.smarts.TotalConnectionAtom; +import org.openscience.cdk.isomorphism.matchers.smarts.ColumnAtom; +import org.openscience.cdk.isomorphism.matchers.smarts.BracketAtom; +import org.openscience.cdk.isomorphism.matchers.smarts.RecurseAtom; +import org.openscience.cdk.isomorphism.matchers.smarts.MassAtom; +import org.openscience.cdk.isomorphism.matchers.smarts.TotalValencyAtom; +import org.openscience.cdk.tools.LoggingTool; + +/* $RCSfile$ + * $Original Author: egonw $ + * $code extension: sushil ronghe$ + * $Date: 2007-04-18 (Wed, 18 April 2007) $ + * $Revision: 6382 $ + * + * Copyright (C) 2004-2007 The Chemistry Development Kit (CDK) project + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * (or see http://www.gnu.org/copyleft/lesser.html) + */ + +/** + * This parser implements a nearly complete subset of the SMARTS syntax as defined on + * <a href="http://www.daylight.com/dayhtml/doc/theory/theory.smarts.html">the + * Daylight website</a>. + * + * <p>Example code using SMARTS substructure search looks like: + * <pre> + * SmilesParser sp = new SmilesParser(); + * AtomContainer atomContainer = sp.parseSmiles("CC(=O)OC(=O)C"); + * QueryAtomContainer query = SMARTSParser.parse("C*C"); + * boolean queryMatch = UniversalIsomorphismTester.isSubgraph(atomContainer, query); + * </pre> + * + * <p>See the cdk.test.smiles.smarts.ParserTest for examples of the implemented + * subset. + * + * @see org.openscience.cdk.isomorphism.matchers.smarts.SMARTSAtom + * + * @author Sushil Ronghe + * @cdk.created 2007-04-29 + * + * @cdk.require ant1.6 + * @cdk.module smarts + * + * @cdk.keyword SMARTS + * @cdk.keyword substructure search + */ +public class SMARTSParser { + + private IQueryAtom previousAtom = null; + private SMARTSBond currentBond = null; + private QueryAtomContainer lastatomcontainer = new QueryAtomContainer (); + private IQueryAtom lastatom; + private QueryAtomContainer container = null; + private QueryAtomContainer atomcontainer = new QueryAtomContainer (); + private QueryAtomContainer smartexpress = new QueryAtomContainer(); + private static boolean bstart=false; + private static boolean cstart=false; + private static boolean rstart=false; + private int columncounter=0; + private OperatorContainer OperMap = new OperatorContainer(); + private final static LoggingTool logger = new LoggingTool(SMARTSParser.class); + + public static QueryAtomContainer parse(String stringToParse) throws CDKException { + QueryAtomContainer container = null; + + StringReader stringReader = new StringReader(stringToParse); + try { + SMARTSParser parser = new SMARTSParser(stringReader); + container = parser.Parse(); + } catch (ParseException exception) { + throw new CDKException("The string " + stringToParse + " is not a valid" + + " SMARTS string: " + exception.getMessage(), exception); + } + catch(Exception ex){ + ex.printStackTrace(); + } + return container; + } + +} +PARSER_END(SMARTSParser) + +JAVACODE +void StartRecursion(){ + rstart = true; +} +JAVACODE +void AddStartBracket(){ + if(rstart){} + else{ + logger.debug("Bracket start is on: "); + container.addAtom(new BracketAtom()); + bstart = true; + } + +} +JAVACODE +void AddEndBracket(){ + logger.debug("Bracket end: "); + if(rstart && !bstart){rstart= false;} + + else if(bstart) + caseBraket(); +} +JAVACODE +void caseBraket(){ +SMARTSOperatorAtom atom; + while(!(container.getLastAtom() instanceof BracketAtom)){ + IQueryAtom tempatom = (IQueryAtom)container.getLastAtom(); + atomcontainer.addAtom(tempatom); + container.removeAtom(tempatom); + } + IQueryAtom BA =(IQueryAtom)container.getLastAtom(); + container.removeAtom(BA); + if(OperMap.size()<1){ + atom = new SMARTSOperatorAtom(atomcontainer); + } + else{ + atom = new SMARTSOperatorAtom(atomcontainer,OperMap); + } + container.addAtom(atom); + ///////////////////////////////////////////////////////////////// + if (previousAtom != null) AddCurrentBond(previousAtom, atom); + previousAtom = atom; + ///////////////////////////////////////////////////////////////// + bstart = false; + OperMap = new OperatorContainer(); + atomcontainer = new QueryAtomContainer(); +} +JAVACODE +void AddStartColumn(){ + logger.debug("Column start is on: "); + if(!cstart){ + cstart = true; + lastatom = (IQueryAtom)container.getLastAtom(); + columncounter++; + } + if(cstart){ + lastatomcontainer.addAtom((IQueryAtom)container.getLastAtom()); + columncounter++; + } + +} +JAVACODE +void AddEndColumn(){ + logger.debug("Column end is on: "); + columncounter--; + if(rstart && !cstart){ + rstart= false; + } + else if(cstart){ + if(columncounter == 1){ + previousAtom = lastatom; + lastatomcontainer = new QueryAtomContainer(); + cstart = false; + } + else if(lastatomcontainer.getAtomCount() > 0){ + previousAtom = (IQueryAtom)lastatomcontainer.getLastAtom(); + + } + else{ + cstart = false; + } + } + +} + +JAVACODE +void AddMassAtom(){ + Token tok = getToken(-1); + logger.debug("Adding Mass Atom: "+ tok.image); + MassAtom atom = new MassAtom(Integer.parseInt(tok.image)); + container.addAtom(atom); + if(!bstart ){ + if (previousAtom != null) AddCurrentBond(previousAtom, atom); + previousAtom = atom; + } +} +JAVACODE +void AddAnyAtom() { + Token tok = getToken(-1); + logger.debug("Adding SMARTS atom: " + tok.image); + AnyAtom atom = new AnyAtom(); + container.addAtom(atom); + if(!bstart ){ + if (previousAtom != null) AddCurrentBond(previousAtom, atom); + previousAtom = atom; + } +} + +JAVACODE +void AddAliphaticAtom() { + logger.debug("Adding aliphatic atom"); + AliphaticAtom atom = new AliphaticAtom(); + container.addAtom(atom); + if(!bstart ){ + if (previousAtom != null) AddCurrentBond(previousAtom, atom); + previousAtom = atom; + } + + } + +JAVACODE +void AddELementAromaticAtom(){ + Token tok = getToken(-1); + logger.debug("Adding aromatic atom"); + String ss = new String(tok.image); + AromaticAtom atom = new AromaticAtom(new Atom(ss.toUpperCase())); + container.addAtom(atom); + if(!bstart ){ + if (previousAtom != null) AddCurrentBond(previousAtom, atom); + previousAtom = atom; + } +} + +JAVACODE +void AddAromaticAtom() { + logger.debug("Adding aromatic atom"); + AromaticAtom atom = new AromaticAtom(); + container.addAtom(atom); + if(!bstart ){ + if (previousAtom != null) AddCurrentBond(previousAtom, atom); + previousAtom = atom; + } + +} + +JAVACODE +void AddCurrentBond(IQueryAtom atom, IQueryAtom atom2) { + if (currentBond == null) { + // add default bond + currentBond = new AnyOrderQueryBond(); + } + Atom[] atoms = new Atom[2]; + atoms[0] = (Atom)atom; + atoms[1] = (Atom)atom2; + currentBond.setAtoms(atoms); + container.addBond(currentBond); + currentBond = null; +} + +JAVACODE +void AddNewElementAtom() { + Token tok = getToken(-1); + logger.debug("Adding SMARTS atom: " + tok.image); + SymbolQueryAtom atom = new SymbolQueryAtom(new Atom(tok.image)); + container.addAtom(atom); + if(!bstart ){ + if (previousAtom != null) AddCurrentBond(previousAtom, atom); + previousAtom = atom; + } +} +JAVACODE +void AddTotalConnectionAtom(){ + TotalConnectionAtom atom; + Token tok = getToken(-1); + logger.debug("adding smarts atoms:" + tok.image); + String ss = new String(tok.image); + if(ss=="X"){ + atom = new TotalConnectionAtom(); + } + else{ + atom=new TotalConnectionAtom(Integer.parseInt(tok.image)); + } + container.addAtom(atom); + if(!bstart ){ + if (previousAtom != null) AddCurrentBond(previousAtom, atom); + previousAtom = atom; + } + +} + + +JAVACODE +void AddNewNumberAtom() { + Token tok = getToken(0); + logger.debug("Adding SMARTS atom: " + tok.image); + try { + String sym = IsotopeFactory.getInstance(new ChemObject().getBuilder()).getElement(Integer.parseInt(tok.image)).getSymbol(); + SymbolQueryAtom atom = new SymbolQueryAtom(new Atom(sym)); + container.addAtom(atom); + if(!bstart ){ + if (previousAtom != null) AddCurrentBond(previousAtom, atom); + previousAtom = atom; + } + } catch (Exception exp) { + logger.debug("Exception caught: " + exp); + } +} + +JAVACODE +void AddNewDegreeAtom() { + DegreeAtom atom; + Token tok = getToken(-1); + logger.debug("Adding SMARTS degree atom: " + tok.image); + String ss = new String(tok.image); + if(ss.equals("D")){ + atom=new DegreeAtom(); + } + else{ + atom=new DegreeAtom(Integer.parseInt(tok.image)); + } + container.addAtom(atom); + if(!bstart ){ + if (previousAtom != null) AddCurrentBond(previousAtom, atom); + previousAtom = atom; + } +} + +JAVACODE +void AddNewFormalChargeAtom() { + Token tok = getToken(-1); + FormalChargeAtom atom; + logger.debug("Adding SMARTS formal charge atom: " + tok.image); + String ss = new String(tok.image); + if(ss.equals("+") || ss.equals("-")) + atom = new FormalChargeAtom(1); + else + atom = new FormalChargeAtom(Integer.parseInt(tok.image)); + container.addAtom(atom); + if(!bstart ){ + if (previousAtom != null) AddCurrentBond(previousAtom, atom); + previousAtom = atom; + } +} +JAVACODE + +void AddNewRingAtom(){ +RingAtom atom; +Token tt = getToken(-1); +logger.debug("adding SMARTS Ring atom: "+ tt.image); +String ss = new String(tt.image); +if(ss.equals("r")){ + atom=new RingAtom(); +} +else{ + atom=new RingAtom(Integer.parseInt(tt.image)); +} +container.addAtom(atom); +if(!bstart ){ + if (previousAtom != null) AddCurrentBond(previousAtom, atom); + previousAtom = atom; + } +} +JAVACODE + +void AddTotalRingConnectionAtom(){ +TotalRingConnectionAtom atom; +Token tt = getToken(-1); +logger.debug("adding SMARTS format charge atom: "+ tt.image); +String ss = new String(tt.image); +if(ss.equals("x")){ + atom=new TotalRingConnectionAtom(); +} +else{ + atom=new TotalRingConnectionAtom(Integer.parseInt(tt.image)); +} +container.addAtom(atom); +if(!bstart ){ + if (previousAtom != null) AddCurrentBond(previousAtom, atom); + previousAtom = atom; + } +} +JAVACODE +int getSignCount(String sss) { + int count=0; + for (int i=0;i<sss.length();i++){ + if(sss.charAt(i)=='+'|sss.charAt(i)=='-'){ + count++; + } + } + return count; +} + +JAVACODE +void AddNewImplicitHCountAtom() { + ImplicitHCountAtom atom; + Token tok = getToken(-1); + logger.debug("Adding SMARTS implicit Hcount atom: " + tok.image); + String ss = new String(tok.image); + if(ss.equals("h")){ + atom=new ImplicitHCountAtom(); + } + else{ + atom=new ImplicitHCountAtom(Integer.parseInt(tok.image)); + } + container.addAtom(atom); + if(!bstart ){ + if (previousAtom != null) AddCurrentBond(previousAtom, atom); + previousAtom = atom; + } +} + +JAVACODE +void AddNewTotalHCountAtom() { + TotalHCountAtom atom; + Token tok = getToken(-1); + logger.debug("Adding SMARTS total Hcount atom: " + tok.image); + String ss = new String(tok.image); + if(ss.equals("H")){ + atom=new TotalHCountAtom(); + } + else{ + atom=new TotalHCountAtom(Integer.parseInt(tok.image)); + } + container.addAtom(atom); + if(!bstart ){ + if (previousAtom != null) AddCurrentBond(previousAtom, atom); + previousAtom = atom; + } +} +JAVACODE +void AddValencyAtom(){ + TotalValencyAtom atom; + Token tok = getToken(-1); + logger.debug("Adding SMARTS total Hcount atom: " + tok.image); + String ss = new String(tok.image); + if(ss.equals("v")){ + atom=new TotalValencyAtom(); + } + else{ + atom=new TotalValencyAtom(Integer.parseInt(tok.image)); + } + container.addAtom(atom); + if(!bstart ){ + if (previousAtom != null) AddCurrentBond(previousAtom, atom); + previousAtom = atom; + } + +} +JAVACODE +void AddOperator(){ + Token tok = getToken(-1); + try{ + OperMap.addElement(tok.image); + } + catch(java.lang.NullPointerException NPT){ + //System.out.println("error caught"); + NPT.printStackTrace(); + } + +} +JAVACODE +void AddOperatorAtom(){ + Token tok = getToken(-1); + logger.debug("Adding SMARTS total OP atom: " + tok.image); + container.setOperator("!"); + +} +JAVACODE +void SetCurrentBondToOrderQuery(double order) { + currentBond = new OrderQueryBond(); + currentBond.setOrder(order); +} + +JAVACODE +void SetCurrentBondToAnyOrder() { + currentBond = new AnyOrderQueryBond(); +} + +JAVACODE +void SetCurrentBondToAromatic() { + currentBond = new AromaticQueryBond(); +} + +QueryAtomContainer Parse() : +{} +{ + { + container = new QueryAtomContainer(); + previousAtom = null; + } + ((SMARTS())+ |ConsumeToken()) <EOF> + { return container; } +} + +void SMARTS() : +{} +{ + (AtomExpression()) [ BondExpression() ] +} +void BondExpression() : +{} +{ + + ("=" SetCurrentBondToOrderQuery(2.0) ) | + (<HASH> SetCurrentBondToOrderQuery(3.0) ) | + (":" SetCurrentBondToAromatic() ) | + ("~" SetCurrentBondToAnyOrder() ) +} +void AtomExpression(): +{} +{ + ( + "*" AddAnyAtom() | + "a" AddAromaticAtom() | + "A" AddAliphaticAtom() | + Recursion() | + Element() | + StartBracket() | + EndBracket() | + StartColumn() | + EndColumn() | + Degree() | + TotalHCount() | + ImplicitHCount() | + TotalRingConnection() | + RingAtom() | + AtomicNumber() | + AromaticAtms() | + TotalConnectionAtom() | + OperatorAtom() | + Operator() | + ChargeAtom() | + MassAtom() + + ) +} +void ConsumeToken(): +{} +{ + <DOT> +} +void OperatorAtom(): +{} +{ + <excl> AtomExpression() AddOperatorAtom() +} +void MassAtom(): +{} +{ + <DIGIT> AddMassAtom() +} + +void StartBracket(): +{} +{ + <STARTOFBRACKET> AddStartBracket() +} +void Recursion(): +{} +{ + <$> [<STARTOFBRACKET>|<STARTOFCOLUMN>] StartRecursion() +} +void EndBracket(): +{} +{ + <ENDOFBRACKET> AddEndBracket() +} +void StartColumn(): +{} +{ + <STARTOFCOLUMN> AddStartColumn() +} +void EndColumn(): +{} +{ + <ENDOFCOLUMN> AddEndColumn() +} + +void AtomicNumber() : +{} +{ + <HASH> (<DIGIT> | <DIGITS>) AddNewNumberAtom() +} + +void TotalHCount() : +{} +{ + <H> [<DIGIT>] AddNewTotalHCountAtom() +} +void Element() : +{} +{ + (TwoCharElement() | OneCharElement()) + AddNewElementAtom() +} + +void ChargeAtom(): +{Token t;} +{ + ("+"|"-") [<DIGIT>]AddNewFormalChargeAtom() +} +void ValenyAtom(): +{} +{ + <v> [<DIGIT>] AddValencyAtom() +} + +void ImplicitHCount() : +{} +{ + <h> [<DIGIT>] AddNewImplicitHCountAtom() +} + +void Degree() : +{} +{ + <D> [<DIGIT>] AddNewDegreeAtom() +} +void RingAtom() : +{} +{ + + <r> [<DIGIT>] AddNewRingAtom() +} +void TotalRingConnection(): +{} +{ + <x> [<DIGIT>] AddTotalRingConnectionAtom() +} +void TotalConnectionAtom(): +{} +{ + <X> [<DIGIT>] AddTotalConnectionAtom() +} + +void TwoCharElement() : +{} +{ + <HE> | <LI> | <BE> | <NE> | <NA> | <MG> | <AL> | <SI> | <AR> | <CA> | <SC> | + <TI> | <CR> | <MN> | <FE> | <CO> | <NI> | <CU> | <ZN> | <GA> | <GE> | <AS> | + <SE> | <BR> | <KR> | <RB> | <SR> | <ZR> | <NB> | <MO> | <TC> | <RU> | <RH> | + <PD> | <AG> | <CD> | <IN> | <SN> | <SB> | <TE> | <XE> | <CS> | <BA> | <LA> | + <HF> | <TA> | <RE> | <OS> | <IR> | <PT> | <AU> | <HG> | <TL> | <PB> | <BI> | + <PO> | <AT> | <RN> | <FR> | <RA> | <AC> | <TH> | <PA> | <CL> +} + +void OneCharElement() : +{} +{ + <B> | <C> | <N> | <O> | <F> | <P> | <S> | <K> | <V> | <Y> | <I> | <U> + +} +void Operator(): +{} +{ + (<coln>|<coma>|<amps>) AddOperator() +} + +void AromaticAtms(): +{} +{ + (<c>|<n>|<o>|<p>|<s>) AddELementAromaticAtom() +} +TOKEN: +{ + < DIGIT: ["0"-"9"] > +} + +TOKEN: +{ + <SIGN: ["+","-"] > +} +TOKEN: +{ + <SIGNS: (["+","-"])+> +} +TOKEN: +{ + <$:"$"> +} +TOKEN: /* Opetator*/ +{ + < excl: "!" > +} +TOKEN: +{ + < amps :"&" > +} +TOKEN: +{ + <coma:","> +} +TOKEN: +{ + <coln:";"> +} +TOKEN: +{ + <h:"h"> +} +TOKEN: +{ + <D:"D"> +} +TOKEN: +{ + <X:"X"> +} +TOKEN: +{ + <r:"r"> +} +TOKEN: +{ + <DOT:"."> +} +TOKEN: +{ + <x:"x"> +} +TOKEN: +{ + <v:"v"> +} +TOKEN: +{ + <HASH:"#"> +} +TOKEN: +{ + <STARTOFBRACKET:"["> +} +TOKEN: +{ + <ENDOFBRACKET:"]"> +} +TOKEN: +{ + <STARTOFCOLUMN:"("> +} +TOKEN: +{ + <ENDOFCOLUMN:")"> +} +TOKEN : +{ + < DIGITS: (["0"-"9"])+ > +} +TOKEN : +{ + <c:"c"> + |<n:"n"> + |<o:"o"> + |<s:"s"> + |<p:"p"> +} + +TOKEN : /* ELEMENTS */ +{ + < H: "H" > +| < HE: "He" > +| < LI: "Li" > +| < BE: "Be" > +| < B: "B" > +| < C: "C" > +| < N: "N" > +| < O: "O" > +| < F: "F" > +| < NE: "Ne" > +| < NA: "Na" > +| < MG: "Mg" > +| < AL: "Al" > +| < SI: "Si" > +| < P: "P" > +| < S: "S" > +| < CL: "Cl" > +| < AR: "Ar" > +| < K: "K" > +| < CA: "Ca" > +| < SC: "Sc" > +| < TI: "Ti" > +| < V: "V" > +| < CR: "Cr" > +| < MN: "Mn" > +| < FE: "Fe" > +| < CO: "Co" > +| < NI: "Ni" > +| < CU: "Cu" > +| < ZN: "Zn" > +| < GA: "Ga" > +| < GE: "Ge" > +| < AS: "As" > +| < SE: "Se" > +| < BR: "Br" > +| < KR: "Kr" > +| < RB: "Rb" > +| < SR: "Sr" > +| < Y: "Y" > +| < ZR: "Zr" > +| < NB: "Nb" > +| < MO: "Mo" > +| < TC: "Tc" > +| < RU: "Ru" > +| < RH: "Rh" > +| < PD: "Pd" > +| < AG: "Ag" > +| < CD: "Cd" > +| < IN: "In" > +| < SN: "Sn" > +| < SB: "Sb" > +| < TE: "Te" > +| < I: "I" > +| < XE: "Xe" > +| < CS: "Cs" > +| < BA: "Ba" > +| < LA: "La" > +| < HF: "Hf" > +| < TA: "Ta" > +| < W: "W" > +| < RE: "Re" > +| < OS: "Os" > +| < IR: "Ir" > +| < PT: "Pt" > +| < AU: "Au" > +| < HG: "Hg" > +| < TL: "Tl" > +| < PB: "Pb" > +| < BI: "Bi" > +| < PO: "Po" > +| < AT: "At" > +| < RN: "Rn" > +| < FR: "Fr" > +| < RA: "Ra" > +| < AC: "Ac" > +| < TH: "Th" > +| < PA: "Pa" > +| < U: "U" > +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |