From: <mig...@us...> - 2006-05-26 12:08:22
|
Revision: 6317 Author: miguelrojasch Date: 2006-05-26 05:06:58 -0700 (Fri, 26 May 2006) ViewCVS: http://svn.sourceforge.net/cdk/?rev=6317&view=rev Log Message: ----------- new package for weka libio. And tests Modified Paths: -------------- trunk/cdk/build.xml Added Paths: ----------- trunk/cdk/jar/weka.jar trunk/cdk/src/META-INF/libio-weka.cdkdepends trunk/cdk/src/META-INF/libio-weka.datafiles trunk/cdk/src/META-INF/libio-weka.libdepends trunk/cdk/src/META-INF/test-libio-weka.cdkdepends trunk/cdk/src/META-INF/test-libio-weka.datafiles trunk/cdk/src/META-INF/test-libio-weka.devellibdepends trunk/cdk/src/META-INF/test-libio-weka.libdepends trunk/cdk/src/data/arff/ trunk/cdk/src/data/arff/Table1.arff trunk/cdk/src/data/arff/Table2.arff trunk/cdk/src/org/openscience/cdk/libio/weka/ trunk/cdk/src/org/openscience/cdk/libio/weka/Weka.java trunk/cdk/src/org/openscience/cdk/test/libio/weka/ trunk/cdk/src/org/openscience/cdk/test/libio/weka/WekaTest.java Modified: trunk/cdk/build.xml =================================================================== --- trunk/cdk/build.xml 2006-05-25 13:49:35 UTC (rev 6316) +++ trunk/cdk/build.xml 2006-05-26 12:06:58 UTC (rev 6317) @@ -593,6 +593,7 @@ <antcall target="compile-module"><param name="module" value="extra"/></antcall> <antcall target="compile-module"><param name="module" value="smiles"/></antcall> <antcall target="compile-module"><param name="module" value="libio-cml"/></antcall> + <antcall target="compile-module"><param name="module" value="libio-weka"/></antcall> <antcall target="compile-module"><param name="module" value="pdb-cml"/></antcall> <antcall target="compile-module"><param name="module" value="reaction"/></antcall> <antcall target="compile-module"><param name="module" value="charges"/></antcall> @@ -616,6 +617,7 @@ <antcall target="compile-module"><param name="module" value="test-io"/></antcall> <antcall target="compile-module"><param name="module" value="test-extra"/></antcall> <antcall target="compile-module"><param name="module" value="test-smiles"/></antcall> + <antcall target="compile-module"><param name="module" value="test-libio-weka"/></antcall> <antcall target="compile-module"><param name="module" value="test-reaction"/></antcall> <antcall target="compile-module"><param name="module" value="test-forcefield"/></antcall> <antcall target="compile-module"><param name="module" value="test-valencycheck"/></antcall> @@ -644,6 +646,7 @@ <includesfile name="${metainf}/io-jmol.libdepends"/> <includesfile name="${metainf}/io.libdepends"/> <includesfile name="${metainf}/libio-cml.libdepends"/> + <includesfile name="${metainf}/libio-weka.libdepends"/> <includesfile name="${metainf}/nonotify.libdepends"/> <includesfile name="${metainf}/pdb-cml.libdepends"/> <includesfile name="${metainf}/pdb.libdepends"/> @@ -675,6 +678,7 @@ <include name="cdk-qsar.jar"/> <include name="cdk-qsar-cml.jar"/> <include name="cdk-qsar-pdb.jar"/> + <include name="cdk-libio-weka.jar"/> <include name="cdk-reaction.jar"/> <include name="cdk-render.jar"/> <include name="cdk-standard.jar"/> @@ -854,7 +858,8 @@ <antcall target="test-module"><param name="module" value="forcefield"/></antcall> <antcall target="test-module"><param name="module" value="extra"/></antcall> <antcall target="test-module"><param name="module" value="valencycheck"/></antcall> - <antcall target="test-module"><param name="module" value="reaction"/></antcall> + <antcall target="test-module"><param name="module" value="reaction"/></antcall> + <antcall target="test-module"><param name="module" value="libio-weka"/></antcall> <antcall target="test-module"><param name="module" value="smiles"/></antcall> <antcall target="test-module"><param name="module" value="experimental"/></antcall> <antcall target="test-module"><param name="module" value="qsar"/></antcall> Added: trunk/cdk/jar/weka.jar =================================================================== (Binary files differ) Property changes on: trunk/cdk/jar/weka.jar ___________________________________________________________________ Name: svn:mime-type + application/octet-stream Added: trunk/cdk/src/META-INF/libio-weka.cdkdepends =================================================================== --- trunk/cdk/src/META-INF/libio-weka.cdkdepends (rev 0) +++ trunk/cdk/src/META-INF/libio-weka.cdkdepends 2006-05-26 12:06:58 UTC (rev 6317) @@ -0,0 +1,2 @@ + + Added: trunk/cdk/src/META-INF/libio-weka.datafiles =================================================================== --- trunk/cdk/src/META-INF/libio-weka.datafiles (rev 0) +++ trunk/cdk/src/META-INF/libio-weka.datafiles 2006-05-26 12:06:58 UTC (rev 6317) @@ -0,0 +1 @@ +data/arff/** Added: trunk/cdk/src/META-INF/libio-weka.libdepends =================================================================== --- trunk/cdk/src/META-INF/libio-weka.libdepends (rev 0) +++ trunk/cdk/src/META-INF/libio-weka.libdepends 2006-05-26 12:06:58 UTC (rev 6317) @@ -0,0 +1,2 @@ +weka.jar + Added: trunk/cdk/src/META-INF/test-libio-weka.cdkdepends =================================================================== --- trunk/cdk/src/META-INF/test-libio-weka.cdkdepends (rev 0) +++ trunk/cdk/src/META-INF/test-libio-weka.cdkdepends 2006-05-26 12:06:58 UTC (rev 6317) @@ -0,0 +1,4 @@ +cdk-core.jar +cdk-data.jar +cdk-libio-weka.jar +cdk-test-core.jar Added: trunk/cdk/src/META-INF/test-libio-weka.datafiles =================================================================== --- trunk/cdk/src/META-INF/test-libio-weka.datafiles (rev 0) +++ trunk/cdk/src/META-INF/test-libio-weka.datafiles 2006-05-26 12:06:58 UTC (rev 6317) @@ -0,0 +1 @@ +data/arff/** Added: trunk/cdk/src/META-INF/test-libio-weka.devellibdepends =================================================================== --- trunk/cdk/src/META-INF/test-libio-weka.devellibdepends (rev 0) +++ trunk/cdk/src/META-INF/test-libio-weka.devellibdepends 2006-05-26 12:06:58 UTC (rev 6317) @@ -0,0 +1,2 @@ +junit.jar + Added: trunk/cdk/src/META-INF/test-libio-weka.libdepends =================================================================== --- trunk/cdk/src/META-INF/test-libio-weka.libdepends (rev 0) +++ trunk/cdk/src/META-INF/test-libio-weka.libdepends 2006-05-26 12:06:58 UTC (rev 6317) @@ -0,0 +1,2 @@ +weka.jar + Added: trunk/cdk/src/data/arff/Table1.arff =================================================================== --- trunk/cdk/src/data/arff/Table1.arff (rev 0) +++ trunk/cdk/src/data/arff/Table1.arff 2006-05-26 12:06:58 UTC (rev 6317) @@ -0,0 +1,27 @@ +@relation 'table1' +@attribute 'EffectivePolarizabilityDescriptor' real +@attribute 'SigmaElectronegativityDescriptor' real +@attribute 'ProtonTotalPartialChargeDescriptor' real +@attribute 'predicted valency' real +@data +0.39 , 9.62 , -0.15 ,12.74, +1.64 , 9.77 , -0.13 ,11.3 , +1.06 , 12.56 , -0.16 ,13.0 , +1.26 , 10.51 , -0.05 ,12.6 , +1.45 , 10.15 , -0.09 ,11.9 , +2.27 , 9.8 , -0.13 ,11.01, +1.89 , 10.54 , -0.05 ,12.5 , +1.06 , 10.15 , 0.01 ,12.9 , +2.59 , 9.81 , -0.13 ,10.88, +2.74 , 9.81 , -0.13 ,10.84, +2.79 , 9.85 , -0.12 ,11.21, +0.39 , 9.14 , -0.11 ,11.67, +1.64 , 9.28 , -0.1 ,10.53, +1.06 , 10.34 , 0.03 ,12.0 , +1.06 , 10.15 , 0.01 ,11.83, +2.27 , 9.31 , -0.09 ,10.28, +2.59 , 9.31 , -0.09 ,10.18, +2.74 , 9.31 , -0.09 ,10.15, +2.79 , 9.35 , -0.09 ,10.43, +0.39 , 9.06 , -0.11 ,10.38, +1.64 , 9.2 , -0.09 ,9.5 , Added: trunk/cdk/src/data/arff/Table2.arff =================================================================== --- trunk/cdk/src/data/arff/Table2.arff (rev 0) +++ trunk/cdk/src/data/arff/Table2.arff 2006-05-26 12:06:58 UTC (rev 6317) @@ -0,0 +1,7 @@ +@relation 'table1' +@attribute 'EffectivePolarizabilityDescriptor' real +@attribute 'SigmaElectronegativityDescriptor' real +@attribute 'ProtonTotalPartialChargeDescriptor' real +@attribute 'ProtonTotalPa' real +@data +0.39 , 9.06 , -0.11,0.0 Added: trunk/cdk/src/org/openscience/cdk/libio/weka/Weka.java =================================================================== --- trunk/cdk/src/org/openscience/cdk/libio/weka/Weka.java (rev 0) +++ trunk/cdk/src/org/openscience/cdk/libio/weka/Weka.java 2006-05-26 12:06:58 UTC (rev 6317) @@ -0,0 +1,226 @@ +/* $RCSfile$ + * $Author: egonw $ + * $Date: 2006-05-01 10:43:42 +0200 (Mo, 01 Mai 2006) $ + * $Revision: 6095 $ + * + * Copyright (C) 2003-2006 The Chemistry Development Kit (CDK) project + * + * Contact: cdk...@li... + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + */ +package org.openscience.cdk.libio.weka; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; +import java.io.StringReader; + +import weka.classifiers.Classifier; +import weka.core.Instance; +import weka.core.Instances; + + +/** + * <p>Weka class is a library which use the program WEKA: a Machine Learning Project.</p> + * To inizalizate weka class is typically done like: <pre> + * Classifier lr = new LinearRegression(); + * weka.setDataset("/some/where/dataTraining.arff", lr); + * String testARFF = "/some/where/dataTest.arff"; + * double[] result = weka.getPrediction(testARFF); + * </pre> + * You have also the possibility to introduce directly values, done like: + * <pre> + * Classifier lr = new LinearRegression(); + * String[] attrib = {"aX","bX","cX","PY" }; + * int[] typAttrib = {Weka.NUMERIC,Weka.NUMERIC,Weka.NUMERIC,Weka.NUMERIC, }; + * double[][] x = {{0.39,9.62 ,-0.15 }, + * {1.64,9.77 ,-0.13}, + * {1.06,12.56,-0.16}, + * double[] y = {12.74,11.3 ,13.0}; + * weka.setDataset(attrib, typAttrib, y, x, lr); + * double[] testX = {0.39,9.06,-0.11}; + * double resultY = weka.getPrediction(testX); + * </pre> + * @author Miguel Rojas + * @cdk.created 2006-05-23 + * @cdk.module libio-weka + * @cdk.keyword weka,Machine Learning + * @cdk.depends weka.jar + */ +public class Weka { + + public static final int NUMERIC = 0; + public static final int NOMINAL = 1; + + /** type of classifier*/ + private Classifier classifier; + + private Instances labeled; + /** + * Constructor of the Weka + */ + public Weka() { + } + /** + * Set the file format arff to analize which contains the dataset and the type of classifier + * + * @param setDataset Path of the dataset file format arff to train + * @param classifier Type of Classifier + * @return The Instances value + * @throws Exception + */ + public Instances setDataset(String pathTable, Classifier classifier) throws Exception{ + this.classifier = classifier; + InputStream ins = this.getClass().getClassLoader().getResourceAsStream(pathTable); + Reader insr = new InputStreamReader(ins); + Instances instances = new Instances(new BufferedReader(insr)); + instances.setClassIndex(instances.numAttributes() - 1); + + labeled = new Instances(instances); + classifier.buildClassifier(labeled); + + for (int i = 0; i < instances.numInstances(); i++) { + double clsLabel = classifier.classifyInstance(instances.instance(i)); + labeled.instance(i).setClassValue(clsLabel); + } + return labeled; + } + /** + * + * Set the array which contains the dataset and the type of classifier. This method + * will be used for classifier which work with numerical values. + * + * @param attrib String with the attribut names + * @param typAttrib Attribute type: NUMERICAL or NOMINAL. + * @param x An array of independent variables. The observations should be in the rows + * and the variables should be in the columns + * @param y An array containing the dependent variable. It is possible numeric or string. + * @param classifier Type of Classifier + * @return The Instances value + * @throws Exception + */ + public Instances setDataset(String[] attrib, int[] typAttrib, Object[]y, double[][] x, Classifier classifier) throws Exception{ + return setDataset(attrib, typAttrib ,null,y,x,classifier); + } + /** + * + * Set the array which contains the dataset and the type of classifier.This method + * will be used for classifier which work with String values. + * + * @param attrib String with the attribut names. + * @param typAttrib Attribute type: NUMERICAL or NOMINAL. + * @param classAttrib String with the attribut class. + * @param x An array of independent variables. The observations should be in the rows + * and the variables should be in the columns + * @param y An array containing the dependent variable. It is possible numeric or string. + * @param classifier Type of classifier + * @return The Instances value + * @throws Exception + */ + public Instances setDataset(String[] attrib, int[] typAttrib, String[] classAttrib, Object[]y, double[][] x, Classifier classifier) throws Exception{ + this.classifier = classifier; + Reader reader = createAttributes(attrib,typAttrib,classAttrib,y,x); + Instances instances = new Instances(reader); + instances.setClassIndex(instances.numAttributes() - 1); + labeled = new Instances(instances); + classifier.buildClassifier(labeled); + + for (int i = 0; i < instances.numInstances(); i++) { + double clsLabel = classifier.classifyInstance(instances.instance(i)); + labeled.instance(i).setClassValue(clsLabel); + } + return labeled; + } + /** + * Return of the predicted value + * + * @param value An array of independent variables which contians the values with whose to test + * @return Result of the prediction + * @throws Exception + */ + public double getPrediction(double[] value) throws Exception{ + Instance instance = new Instance(labeled.numAttributes()); + instance.setDataset(labeled); + for(int i = 0 ; i < value.length ; i++) + instance.setValue(i, value[i]); +// instance.setValue(value.length, 0.0); + return classifier.classifyInstance(instance); + } + /** + * Return of the predicted value + * + * @param pathARRF path of the file format arff which contians the values with whose to test. + * @return Result of the prediction. + * @throws Exception + */ + public double[] getPrediction(String pathARFF) throws Exception{ + InputStream ins = this.getClass().getClassLoader().getResourceAsStream(pathARFF); + Reader insr = new InputStreamReader(ins); + Instances test = new Instances(new BufferedReader(insr)); + double[] result = new double[test.numInstances()]; + for(int i = 0 ; i < test.numInstances(); i++){ + result[i] = classifier.classifyInstance(test.instance(i)); + } + return result; + } + /** + * create a Reader with necessary attributes to iniziate a Instances for weka. + * + * @param attrib String with the attribut class + * @param typAttrib Attribute type: NOMINAL or NUMERIC. + * @param y An array containing the independent variable. + * @param x An array of dependent variables. + * @return The Reader containing the attributes + * @throws IOException + */ + private Reader createAttributes(String[] attrib, int[] typAttrib, String[] classAttrib, Object[]y, double[][] x) throws IOException{ + String string ="@relation table1 \n"; + for(int i = 0; i < attrib.length ; i++){ + string += ("@attribute "+attrib[i]); + if(typAttrib[i] == NUMERIC) + string += " numeric \n"; + else if(typAttrib[i] == NOMINAL) + string += " string \n"; + } + + if(classAttrib != null){ + string += "@attribute class "; + string += "{"; + for(int i = 0; i < classAttrib.length ; i++){ + string += (classAttrib[i]); + if(i != classAttrib.length -1) + string += ","; + } + string += "}\n"; + } + + string += ("@data "); + if(x != null && y != null){ + for(int j = 0 ; j < x.length; j++){ + for(int i = 0 ; i < x[0].length ; i++){ + string += x[j][i]+","; + } + string += y[j]+", \n"; + } + } + + + Reader reader = new StringReader(string); + return reader; + } +} \ No newline at end of file Added: trunk/cdk/src/org/openscience/cdk/test/libio/weka/WekaTest.java =================================================================== --- trunk/cdk/src/org/openscience/cdk/test/libio/weka/WekaTest.java (rev 0) +++ trunk/cdk/src/org/openscience/cdk/test/libio/weka/WekaTest.java 2006-05-26 12:06:58 UTC (rev 6317) @@ -0,0 +1,139 @@ +/* $RCSfile$ + * $Author: egonw $ + * $Date: 2006-05-01 10:49:56 +0200 (Mo, 01 Mai 2006) $ + * $Revision: 6096 $ + * + * Copyright (C) 2004-2006 The Chemistry Development Kit (CDK) project + * + * Contact: cdk...@sl... + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * All we ask is that proper credit is given for our work, which includes + * - but is not limited to - adding the above copyright notice to the beginning + * of your source code files, and to any copyright notice that you may distribute + * with programs based on this work. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * */ +package org.openscience.cdk.test.libio.weka; + +import junit.framework.Test; +import junit.framework.TestSuite; + +import org.openscience.cdk.exception.CDKException; +import org.openscience.cdk.libio.weka.Weka; +import org.openscience.cdk.test.CDKTestCase; + +import weka.classifiers.functions.LinearRegression; +import weka.classifiers.trees.J48; + +/** + * TestCase for Weka class. + * + * @cdk.module test-libio-weka + * + */ +public class WekaTest extends CDKTestCase { + + private Weka weka; + /** + * Constructor for the WekaTest object + * + */ + public WekaTest() { + weka = new Weka(); + } + /** + * A unit test suite for JUnit + * + *@return The test suite + */ + public static Test suite() { + return new TestSuite(WekaTest.class); + } + /** + * A unit test for JUnit. Test usign file arff format, algorithm = Lineal Regression + */ + public void test1() throws ClassNotFoundException, CDKException, java.lang.Exception { + LinearRegression lr = new LinearRegression(); + String[] options = new String[4]; + options[0] = "-U"; + options[1] = "0"; + options[2] = "-R"; + options[3] = "0.00000008"; + lr.setOptions(options); + weka.setDataset("data/arff/Table1.arff", lr); + double[] result = weka.getPrediction("data/arff/Table2.arff"); + assertNotNull(result); + } + /** + * A unit test for JUnit. Test using Array, algorithm = Lineal Regression + */ + public void test2() throws ClassNotFoundException, CDKException, java.lang.Exception { + LinearRegression lr = new LinearRegression(); + String[] options = new String[4]; + options[0] = "-U"; + options[1] = "0"; + options[2] = "-R"; + options[3] = "0.00000008"; + lr.setOptions(options); + weka.setDataset("data/arff/Table1.arff", lr); + double[] testX = {0.39,9.06,-0.11}; + double result = weka.getPrediction(testX); +// assertNotNull(result); + } + /** + * A unit test for JUnit. Test using Array, algorithm = Lineal Regression + */ + public void test3() throws ClassNotFoundException, CDKException, java.lang.Exception { + LinearRegression lr = new LinearRegression(); + String[] attrib = {"aX","bX","cX", "PY" }; + int[] typAttrib = {Weka.NUMERIC,Weka.NUMERIC,Weka.NUMERIC,Weka.NUMERIC}; + double[][] x = {{0.39,9.62 ,-0.15 },{1.64 , 9.77 , -0.13},{1.06 , 12.56 , -0.16}, + {1.26 , 10.51 , -0.05},{1.45 , 10.15 , -0.09},{2.27 , 9.8 , -0.13}, + {1.89 , 10.54 , -0.05},{1.06 , 10.15 , 0.01},{2.59 , 9.81 , -0.13}, + {2.74 , 9.81 , -0.13},{2.79 , 9.85 , -0.12},{0.39 , 9.14 , -0.11}, + {1.64 , 9.28 , -0.1}, {1.06 , 10.34 , 0.03},{1.06 , 10.15 , 0.01}, + {2.27 , 9.31 , -0.09},{2.59 , 9.31 , -0.09},{2.74 , 9.31 , -0.09}, + {2.79 , 9.35 , -0.09},{0.39 , 9.06 , -0.11},{1.64 , 9.2 , -0.09}}; + double[] y = { 12.74,11.3 ,13.0 ,12.6 ,11.9 ,11.01,12.5 ,12.9 ,10.88,10.84,11.21,11.67, + 10.53,12.0 ,11.83,10.28,10.28,10.15,10.43,10.38,9.5 }; + Double[] yD = new Double[y.length]; + for(int i = 0 ; i< yD.length; i++) + yD[i] = new Double(y[i]); + weka.setDataset(attrib, typAttrib, yD, x, lr); + double[] testX = {0.39,9.06,-0.11}; + double resultY = weka.getPrediction(testX); +// assertNotNull(resultY); + } + /** + * A unit test for JUnit. Test using Array, algorithm = J48 + */ + public void test4() throws ClassNotFoundException, CDKException, java.lang.Exception { + String[] options = new String[1]; + options[0] = "-U"; + J48 j48 = new J48(); + j48.setOptions(options); + + String[] attrib = {"aX","bX","cX"}; + int[] typAttrib = {Weka.NUMERIC,Weka.NUMERIC,Weka.NUMERIC}; + String[] classAttrib = {"A_","B_","C_"}; + double[][] x = {{10,10 ,10 },{10 , 10 , -10},{-10 , -10 , -10}, + {11,11 ,11 },{11 , 11 , -11},{-11 , -11 , -11}}; + String[] y = { "A_","B_" ,"C_","A_","B_" ,"C_"}; + weka.setDataset(attrib, typAttrib, classAttrib, y, x, j48); + double[] testX = {10,10,-11}; + double resultY = weka.getPrediction(testX); +// assertNotNull(resultY); + } +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |