From: <tho...@us...> - 2009-03-02 20:48:43
|
Revision: 14333 http://cdk.svn.sourceforge.net/cdk/?rev=14333&view=rev Author: thomaskuhn Date: 2009-03-02 20:48:39 +0000 (Mon, 02 Mar 2009) Log Message: ----------- Extracts the intermed id's from a mol file Added Paths: ----------- cdk-taverna/trunk/src/main/java/org/openscience/cdk/applications/taverna/tools/ExtractIntermedIDFromMolFile.java Added: cdk-taverna/trunk/src/main/java/org/openscience/cdk/applications/taverna/tools/ExtractIntermedIDFromMolFile.java =================================================================== --- cdk-taverna/trunk/src/main/java/org/openscience/cdk/applications/taverna/tools/ExtractIntermedIDFromMolFile.java (rev 0) +++ cdk-taverna/trunk/src/main/java/org/openscience/cdk/applications/taverna/tools/ExtractIntermedIDFromMolFile.java 2009-03-02 20:48:39 UTC (rev 14333) @@ -0,0 +1,193 @@ +/* $RCSfile$ + * $Author: egonw $ + * $Date: 2008-05-05 12:58:11 +0200 (Mo, 05 Mai 2008) $ + * $Revision: 10819 $ + * + * Copyright (C) 2008 by Thomas Kuhn <tho...@us...> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * All we ask is that proper credit is given for our work, which includes + * - but is not limited to - adding the above copyright notice to the beginning + * of your source code files, and to any copyright notice that you may distribute + * with programs based on this work. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.openscience.cdk.applications.taverna.tools; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.embl.ebi.escience.baclava.DataThing; +import org.openscience.cdk.applications.taverna.scuflworkers.cdk.CDKLocalWorker; + +import uk.ac.soton.itinnovation.taverna.enactor.entities.TaskExecutionException; + +/** + * Class which implements a local worker for the cdk-taverna project which + * provides the possibility to extract the orginial ID of the molecule. + * In this specific case will it be the EXNO. This is an unique id used within the Chapman and Hall database. + * + * + * @author Thomas Kuhn + * + */ +public class ExtractIntermedIDFromMolFile implements CDKLocalWorker { + + // Region: class variables + + /** + * Names of the input ports + */ + private String[] inputNames = new String[] { "inputMOLFileList", "Origin" }; + /** + * Names of the output ports + */ + private String[] outputNames = new String[] { "outputMOLFileList", "IDList", "Origin" }; + + // Region: XML tags/attributes + + // End of region + + // End of region + + // Region: get / set properties + + // End of region + + // Region: public methods + + // Region: input and output definition + + /** + * Get the input port names + */ + public String[] inputNames() { + return inputNames; + } + + /** + * Get the input port types + */ + public String[] inputTypes() { + return new String[] { CDKLocalWorker.STRING_ARRAY, CDKLocalWorker.STRING_ARRAY }; + } + + /** + * Get the output port names + */ + public String[] outputNames() { + return outputNames; + } + + /** + * Get the output port types + */ + public String[] outputTypes() { + return new String[] { CDKLocalWorker.STRING_ARRAY, CDKLocalWorker.STRING_ARRAY, CDKLocalWorker.STRING_ARRAY}; + } + + // End of region + + // Region: local worker execution + + /** + * Executes this processor. This worker gets a list of strings. Each string + * contains a MDL MOL file. Each MOL file contains the original ID of this molecule. + * This ID in this specific case is it the EXNO is used as unique identifier within the Chapman and Hall Database + * + * @param inputs + * The input map which contains the input data + * @return The map which contains the result of this worker + * @throws TaskExecutionException + * If the execution of this method failed. + */ + @SuppressWarnings("unchecked") + public Map<String, DataThing> execute(Map<String, DataThing> inputs) + throws TaskExecutionException { + List<String> inputList = null; + List<String> originList = null; + List<String> originResultList = null; + boolean addOrigin = false; + boolean onlyOneOrigin = false; + + Map<String, DataThing> outputs = new HashMap<String, DataThing>(); + if (inputs.get(inputNames[0]) != null) { + inputList = (List<String>)((DataThing)inputs.get(inputNames[0])).getDataObject(); + } else { + return null; + } + if (inputs.get(inputNames[1]) != null) { + originList = (List<String>)((DataThing)inputs.get(inputNames[1])).getDataObject(); + if (originList != null && originList.size() > 0) { + addOrigin = true; + } + if (originList.size() == 1) { + onlyOneOrigin = true; + } + } + List<String> listOfIDNumbers = new ArrayList<String>(100); + + try { + originResultList = new ArrayList<String>(); +// Pattern pattern = Pattern.compile("\\n>\\s<IMD-No>\\s+\\(\\D+\\.?\\D?\\)\\n(\\D{3}\\d\\d-?\\D?)\\n\\n>\\s<"); +// Pattern pattern = Pattern.compile("\\n>\\s<IMD-No>\\s+\\(\\D+\\.?\\D?\\)\\n(IMD-\\D{6})\\n\\n>\\s<"); + Pattern pattern = Pattern.compile("\\n>\\s+<IMD-No>\\s+\\(\\d+\\.?\\d*\\)\\n(IMD-\\d{6})\\n\\n>"); +// Pattern pattern = Pattern.compile("(IMD-\\d{6})"); + + Matcher matcher; + + for (String molFile : inputList) { + matcher = pattern.matcher(molFile); + if (matcher.find()) { + listOfIDNumbers.add(matcher.group(1)); + if (onlyOneOrigin) { + originResultList.add(originList.get(0)); + } + } + else + { + listOfIDNumbers.add("ID_Not_Found"); + } + } + if (addOrigin) { + if (!onlyOneOrigin) { + originResultList = originList; + } + } + } catch (Exception exception) { + throw new TaskExecutionException(exception); + } + outputs.put(outputNames[0], new DataThing(inputList)); + outputs.put(outputNames[1], new DataThing(listOfIDNumbers)); + outputs.put(outputNames[2], new DataThing(originResultList)); + return outputs; + } + + // End of region + + // Region: XML processing methods + + // End of region + + // End of region + + // Region: private methods + + // End of region + +} Property changes on: cdk-taverna/trunk/src/main/java/org/openscience/cdk/applications/taverna/tools/ExtractIntermedIDFromMolFile.java ___________________________________________________________________ Added: svn:mime-type + text/plain This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |