From: <jen...@us...> - 2008-03-13 06:55:17
|
Revision: 707 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=707&view=rev Author: jenslehmann Date: 2008-03-12 23:55:13 -0700 (Wed, 12 Mar 2008) Log Message: ----------- - wrote script for training on one and testing on another conf file - small algorithm improvements Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedROLearner.java trunk/src/dl-learner/org/dllearner/algorithms/refexamples/MultiHeuristic.java trunk/src/dl-learner/org/dllearner/examples/Carcinogenesis.java trunk/src/dl-learner/org/dllearner/learningproblems/ScoreTwoValued.java trunk/src/dl-learner/org/dllearner/refinementoperators/RhoDRDown.java Added Paths: ----------- trunk/src/dl-learner/org/dllearner/utilities/TestValidation.java Modified: trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedROLearner.java =================================================================== --- trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedROLearner.java 2008-03-12 23:15:31 UTC (rev 706) +++ trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedROLearner.java 2008-03-13 06:55:13 UTC (rev 707) @@ -267,7 +267,7 @@ long lastReductionTime = System.nanoTime(); // try a traversal after 100 seconds long traversalInterval = 1000l * 1000000000l; - long reductionInterval = 100l * 1000000000l; + long reductionInterval = 300l * 1000000000l; long currentTime; while(!solutionFound && !stop) { Modified: trunk/src/dl-learner/org/dllearner/algorithms/refexamples/MultiHeuristic.java =================================================================== --- trunk/src/dl-learner/org/dllearner/algorithms/refexamples/MultiHeuristic.java 2008-03-12 23:15:31 UTC (rev 706) +++ trunk/src/dl-learner/org/dllearner/algorithms/refexamples/MultiHeuristic.java 2008-03-13 06:55:13 UTC (rev 707) @@ -21,7 +21,8 @@ import java.util.List; -import org.dllearner.core.owl.DatatypeValueRestriction; +import org.dllearner.core.owl.BooleanValueRestriction; +import org.dllearner.core.owl.DatatypeSomeRestriction; import org.dllearner.core.owl.Description; import org.dllearner.core.owl.Thing; import org.dllearner.utilities.ConceptComparator; @@ -78,7 +79,7 @@ private double expansionPenaltyFactor; private double gainBonusFactor; private double nodeChildPenalty = 0.0001; - private double startNodeBonus = 0.8; + private double startNodeBonus = 1.0; // examples private int nrOfNegativeExamples; @@ -144,11 +145,16 @@ // do not count TOP symbols (in particular in ALL r.TOP and EXISTS r.TOP) // as they provide no extra information if(description instanceof Thing) - bonus = 1; + bonus = 2; + if(description instanceof BooleanValueRestriction) + bonus = -1; + // some bonus for doubles because they are already penalised by length 3 - if(description instanceof DatatypeValueRestriction) - bonus = 1; + if(description instanceof DatatypeSomeRestriction) { +// System.out.println(description); + bonus = 3; + } List<Description> children = description.getChildren(); for(Description child : children) { Modified: trunk/src/dl-learner/org/dllearner/examples/Carcinogenesis.java =================================================================== --- trunk/src/dl-learner/org/dllearner/examples/Carcinogenesis.java 2008-03-12 23:15:31 UTC (rev 706) +++ trunk/src/dl-learner/org/dllearner/examples/Carcinogenesis.java 2008-03-13 06:55:13 UTC (rev 707) @@ -112,6 +112,16 @@ // list of all "hasProperty" test private static Set<String> tests = new TreeSet<String>(); + // we ignore the ames test since its distribution in PTE-2 is so + // different from the training substances that a different testing + // strategy was probably in use + private static boolean ignoreAmes = false; + private static boolean ignoreSalmonella = false;; + private static boolean ignoreCytogenCa = false; + // if true we learn carcinogenic, if false we learn non-carcinogenic + private static boolean learnCarcinogenic = true; + private static boolean useNewGroups = true; + /** * @param args * No arguments supported. @@ -168,8 +178,10 @@ // define properties including domain and range String kbString = "DPDOMAIN(" + getURI2("charge") + ") = " + getURI2("Atom") + ".\n"; kbString += "DPRANGE(" + getURI2("charge") + ") = DOUBLE.\n"; - kbString += "DPDOMAIN(" + getURI2("amesTestPositive") + ") = " + getURI2("Compound") + ".\n"; - kbString += "DPRANGE(" + getURI2("amesTestPositive") + ") = BOOLEAN.\n"; + if(!ignoreAmes) { + kbString += "DPDOMAIN(" + getURI2("amesTestPositive") + ") = " + getURI2("Compound") + ".\n"; + kbString += "DPRANGE(" + getURI2("amesTestPositive") + ") = BOOLEAN.\n"; + } kbString += "OPDOMAIN(" + getURI2("hasAtom") + ") = " + getURI2("Compound") + ".\n"; kbString += "OPRANGE(" + getURI2("hasAtom") + ") = " + getURI2("Atom") + ".\n"; kbString += "OPDOMAIN(" + getURI2("hasBond") + ") = " + getURI2("Compound") + ".\n"; @@ -177,7 +189,10 @@ kbString += "OPDOMAIN(" + getURI2("inBond") + ") = " + getURI2("Bond") + ".\n"; kbString += "OPRANGE(" + getURI2("inBond") + ") = " + getURI2("Atom") + ".\n"; kbString += "OPDOMAIN(" + getURI2("hasStructure") + ") = " + getURI2("Compound") + ".\n"; - kbString += "OPRANGE(" + getURI2("hasStructure") + ") = " + getURI2("Structure") + ".\n"; + kbString += "OPRANGE(" + getURI2("hasStructure") + ") = " + getURI2("Structure") + ".\n"; + kbString += getURI2("Di") + " SUB " + getURI2("Structure") + ".\n"; + kbString += getURI2("Halide") + " SUB " + getURI2("Structure") + ".\n"; + kbString += getURI2("Ring") + " SUB " + getURI2("Structure") + ".\n"; KB kb2 = KBParser.parseKBFile(kbString); kb.addKB(kb2); @@ -195,7 +210,7 @@ // are in ames.pl [the rest is negative in Prolog by CWA], so // we add negative test results here) for(String compound : compounds) { - if(!compoundsAmes.contains(compound)) { + if(!ignoreAmes && !compoundsAmes.contains(compound)) { BooleanDatatypePropertyAssertion ames = getBooleanDatatypePropertyAssertion(compound, "amesTestPositive", false); kb.addAxiom(ames); } @@ -255,15 +270,19 @@ // generating test examples for PTE-1 // => put all in one file, because they were used as training for PTE-2 - // File confPTE1File = new File("examples/carcinogenesis/testpte1.conf"); - // Files.clearFile(confPTE1File); + File confPTE1File = new File("examples/carcinogenesis/testpte1.conf"); + Files.clearFile(confPTE1File); File testPTE1Positives = new File(prologDirectory + "pte1.f"); File testPTE1Negatives = new File(prologDirectory + "pte1.n"); List<Individual> posPTE1Examples = getExamples(testPTE1Positives); List<Individual> negPTE1Examples = getExamples(testPTE1Negatives); appendPosExamples(confTrainFile, posPTE1Examples); - appendNegExamples(confTrainFile, negPTE1Examples); + appendNegExamples(confTrainFile, negPTE1Examples); + Files.clearFile(confPTE1File); + Files.appendFile(confPTE1File, "import(\"pte.owl\");\nreasoner=fastInstanceChecker;\n\n"); + appendPosExamples(confPTE1File, posPTE1Examples); + appendNegExamples(confPTE1File, negPTE1Examples); // create a PTE-2 test file File confPTE2File = new File("examples/carcinogenesis/testpte2.conf"); @@ -288,10 +307,12 @@ // remaining stuff or use closed world assumption in the // TBox dematerialisation later on if(headName.equals("ames")) { + if(!ignoreAmes) { String compoundName = head.getArgument(0).toPLString(); BooleanDatatypePropertyAssertion ames = getBooleanDatatypePropertyAssertion(compoundName, "amesTestPositive", true); axioms.add(ames); compoundsAmes.add(compoundName); + } } else if (headName.equals("atm")) { String compoundName = head.getArgument(0).toPLString(); String atomName = head.getArgument(1).toPLString(); @@ -350,48 +371,67 @@ } else if (headName.equals("has_property")) { String compoundName = head.getArgument(0).toPLString(); String testName = head.getArgument(1).toPLString(); - String resultStr = head.getArgument(2).toPLString(); - boolean testResult = (resultStr.equals("p")) ? true : false; - - // create a new datatype property if it does not exist already - if(!tests.contains(testName)) { - String axiom1 = "DPDOMAIN(" + getURI2(testName) + ") = " + getURI2("Compound") + ".\n"; - String axiom2 = "DPRANGE(" + getURI2(testName) + ") = BOOLEAN.\n"; - KB kb = KBParser.parseKBFile(axiom1 + axiom2); - axioms.addAll(kb.getAxioms()); + if(!(ignoreSalmonella && testName.equals("salmonella")) + && !(ignoreCytogenCa && testName.equals("cytogen_ca"))) { + String resultStr = head.getArgument(2).toPLString(); + boolean testResult = (resultStr.equals("p")) ? true : false; + + // create a new datatype property if it does not exist already + if(!tests.contains(testName)) { + String axiom1 = "DPDOMAIN(" + getURI2(testName) + ") = " + getURI2("Compound") + ".\n"; + String axiom2 = "DPRANGE(" + getURI2(testName) + ") = BOOLEAN.\n"; + KB kb = KBParser.parseKBFile(axiom1 + axiom2); + axioms.addAll(kb.getAxioms()); + } + // create an axiom with the test result + DatatypePropertyAssertion dpa = getBooleanDatatypePropertyAssertion(compoundName, testName, + testResult); + axioms.add(dpa); } - // create an axiom with the test result - DatatypePropertyAssertion dpa = getBooleanDatatypePropertyAssertion(compoundName, testName, - testResult); - axioms.add(dpa); // either parse this or ashby_alert - not both - ashby_alert contains // all information in ind already } else if (headName.equals("ind") || headName.equals("ring_no")) { + // parse this only if the new groups are not parsed +// if(!useNewGroups) { String compoundName = head.getArgument(0).toPLString(); String structureName = head.getArgument(1).toPLString(); - int count = Integer.parseInt(head.getArgument(2).toPLString()); +// int count = Integer.parseInt(head.getArgument(2).toPLString()); // upper case first letter String structureClass = structureName.substring(0,1).toUpperCase() + structureName.substring(1);; String structureInstance = structureName + "-" + structureNr; - if (!bondTypes.contains(structureClass)) { - NamedClass subClass = getAtomicConcept(structureClass); - SubClassAxiom sc = new SubClassAxiom(subClass, getAtomicConcept("Structure")); - axioms.add(sc); - structureTypes.add(structureClass); - } + addStructureSubclass(axioms, structureClass); - for(int i=0; i<count; i++) { +// for(int i=0; i<count; i++) { ObjectPropertyAssertion op = getRoleAssertion("hasStructure", compoundName, structureInstance); axioms.add(op); // make e.g. halide10-382 instance of Bond-3 ClassAssertionAxiom ca = getConceptAssertion(structureClass, structureInstance); axioms.add(ca); structureNr++; - } +// } +// } } else if (headName.equals("ashby_alert")) { // ... currently ignored ... } else if (newGroups.contains(headName)) { + if(useNewGroups) { + String compoundName = head.getArgument(0).toPLString(); + String structureName = headName; +// int count = Integer.parseInt(head.getArgument(2).toPLString()); + // upper case first letter + String structureClass = structureName.substring(0,1).toUpperCase() + structureName.substring(1);; + String structureInstance = structureName + "-" + structureNr; + + addStructureSubclass(axioms, structureClass); + +// for(int i=0; i<count; i++) { + ObjectPropertyAssertion op = getRoleAssertion("hasStructure", compoundName, structureInstance); + axioms.add(op); + ClassAssertionAxiom ca = getConceptAssertion(structureClass, structureInstance); + axioms.add(ca); + structureNr++; +// } + } } else { // print clauses which are not supported yet System.out.println("unsupported clause"); @@ -402,6 +442,23 @@ return axioms; } + private static void addStructureSubclass(List<Axiom> axioms, String structureClass) { + // build in more fine-grained subclasses e.g. Di+number is subclass of Di + if (!structureTypes.contains(structureClass)) { + NamedClass nc = getAtomicConcept("Structure"); + if(structureClass.contains("Di")) + nc = getAtomicConcept("Di"); + else if(structureClass.contains("ring") || structureClass.contains("Ring")) + nc = getAtomicConcept("Ring"); + else if(structureClass.contains("halide") || structureClass.contains("Halide")) + nc = getAtomicConcept("Halide"); + NamedClass subClass = getAtomicConcept(structureClass); + SubClassAxiom sc = new SubClassAxiom(subClass, nc); + axioms.add(sc); + structureTypes.add(structureClass); + } + } + // takes a *.f or *.n file as input and returns the // contained examples private static List<Individual> getExamples(File file) throws FileNotFoundException, IOException, ParseException { @@ -419,7 +476,10 @@ private static void appendPosExamples(File file, List<Individual> examples) { StringBuffer content = new StringBuffer(); for(Individual example : examples) { - content.append("+\""+example.toString()+"\"\n"); + if(learnCarcinogenic) + content.append("+\""+example.toString()+"\"\n"); + else + content.append("-\""+example.toString()+"\"\n"); } Files.appendFile(file, content.toString()); } @@ -427,7 +487,10 @@ private static void appendNegExamples(File file, List<Individual> examples) { StringBuffer content = new StringBuffer(); for(Individual example : examples) { - content.append("-\""+example.toString()+"\"\n"); + if(learnCarcinogenic) + content.append("-\""+example.toString()+"\"\n"); + else + content.append("+\""+example.toString()+"\"\n"); } Files.appendFile(file, content.toString()); } @@ -582,6 +645,7 @@ * <p>Positives (19): <br /> * <ul> * <li>t3 (SE+3NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BCACAFD4-123F-7908-7B521E4F665EFBD9</li> + * <li>t4 (3CE+NE) - contradicts IJCAI-97 paper and should probably be case 75-52-5 instead of 75-52-8: http://ntp.niehs.nih.gov/index.cfm?objectid=BCE49084-123F-7908-7BE127F7AF1FFBB5</li> * <li>t5: paper</li> * <li>t7: paper</li> * <li>t8: paper</li> @@ -607,7 +671,7 @@ * <ul> * <li>t1 (4NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BD9FF53C-123F-7908-7B123DAE0A25B122 </li> * <li>t2 (4NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BCF8651E-123F-7908-7B21DD5ED83CD0FF </li> - * <li>t4: paper</li> + * <li><strike>t4: paper</strike></li> * <li>t6: paper</li> * <li>t11: paper</li> * <li>t13 (4NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BD136ED6-123F-7908-7B619EE79F2FD062</li> @@ -623,21 +687,47 @@ * <li>t29: probably a negative (see http://ntp.niehs.nih.gov/index.cfm?objectid=BD855EA1-123F-7908-7B573FC3C08188DC) but * no tests directly for this substance</li> * </ul> + * + * <p>The following examples are probably not part of the IJCAI PTE-2 challenge + * (reports younger than 1998): + * <ul> + * <li>pos: t21 (5/99), t25 (9/04), t30(10/01)</li> + * <li>neg: t26 (5/99), t27 (05/01), t28 (05/00), t29 (09/02)</li> + * </ul> * </p> + * </p> * @return A string for all examples as used in the conf file. */ public static String getPTE2Examples() { - String[] pos = new String[] {"t3","t5","t7","t8","t9","t10","t12", - "t14","t15","t16","t18","t19","t20","t21","t22","t23","t24", - "t25","t30"}; - String[] neg = new String[] {"t1", "t2", "t4", "t6", "t11", "t13", - "t17","t26","t27","t28"}; + String[] pos = new String[] {"t3","t4","t5","t7","t8", + "t9", + "t10","t12", + "t14","t15","t16","t18","t19","t20", + "t21", + "t22", + "t23", + "t24", + "t25", + "t30"}; + String[] neg = new String[] {"t1", "t2", + "t6", "t11", "t13", + "t17","t26","t27", + "t28","t29" + }; String ret = ""; - for(String posEx : pos) - ret += "+" + getURI2(posEx) + "\n"; - for(String negEx : neg) - ret += "-" + getURI2(negEx) + "\n"; + for(String posEx : pos) { + if(learnCarcinogenic) + ret += "+" + getURI2(posEx) + "\n"; + else + ret += "-" + getURI2(posEx) + "\n"; + } + for(String negEx : neg) { + if(learnCarcinogenic) + ret += "-" + getURI2(negEx) + "\n"; + else + ret += "+" + getURI2(negEx) + "\n"; + } return ret; } Modified: trunk/src/dl-learner/org/dllearner/learningproblems/ScoreTwoValued.java =================================================================== --- trunk/src/dl-learner/org/dllearner/learningproblems/ScoreTwoValued.java 2008-03-12 23:15:31 UTC (rev 706) +++ trunk/src/dl-learner/org/dllearner/learningproblems/ScoreTwoValued.java 2008-03-13 06:55:13 UTC (rev 707) @@ -54,9 +54,9 @@ String str = ""; str += "score: " + score + "\n"; str += "accuracy: " + (1 + classificationScore) + "\n"; - str += "posAsPos: " + posAsPos + "\n"; - str += "positive examples classified as negative: " + posAsNeg + "\n"; - str += "negative examples classified as positive: " + negAsPos + "\n"; + str += "posAsPos (" + posAsPos.size() + "): " + posAsPos + "\n"; + str += "positive examples classified as negative (" + posAsNeg.size() + "): " + posAsNeg + "\n"; + str += "negative examples classified as positive (" + negAsPos.size() + "): " + negAsPos + "\n"; return str; } Modified: trunk/src/dl-learner/org/dllearner/refinementoperators/RhoDRDown.java =================================================================== --- trunk/src/dl-learner/org/dllearner/refinementoperators/RhoDRDown.java 2008-03-12 23:15:31 UTC (rev 706) +++ trunk/src/dl-learner/org/dllearner/refinementoperators/RhoDRDown.java 2008-03-13 06:55:13 UTC (rev 707) @@ -286,7 +286,9 @@ ConceptTransformation.cleanConceptNonRecursive(mc); ConceptTransformation.transformToOrderedNegationNormalFormNonRecursive(mc, conceptComparator); - refinements.add(mc); + // check whether the intersection is OK (sanity checks), then add it + if(checkIntersection(mc)) + refinements.add(mc); } } @@ -431,6 +433,17 @@ } } + // check for double datatype properties + if(c instanceof DatatypeSomeRestriction && + description instanceof DatatypeSomeRestriction) { + DataRange dr = ((DatatypeSomeRestriction)c).getDataRange(); + DataRange dr2 = ((DatatypeSomeRestriction)description).getDataRange(); + // it does not make sense to have statements like height >= 1.8 AND height >= 1.7 + if((dr instanceof DoubleMaxValue && dr2 instanceof DoubleMaxValue) + ||(dr instanceof DoubleMinValue && dr2 instanceof DoubleMinValue)) + skip = true; + } + // perform a disjointness check when named classes are added; // this can avoid a lot of superfluous computation in the algorithm e.g. // when A1 looks good, so many refinements of the form (A1 OR (A2 AND A3)) @@ -465,6 +478,33 @@ return refinements; } + // when a child of an intersection is refined and reintegrated into the + // intersection, we can perform some sanity checks; + // method returns true if everything is OK and false otherwise + private boolean checkIntersection(Intersection intersection) { + // rule 1: max. restrictions at most once + boolean maxDoubleOccurence = false; + // rule 2: min restrictions at most once + boolean minDoubleOccurence = false; + for(Description child : intersection.getChildren()) { + if(child instanceof DatatypeSomeRestriction) { + DataRange dr = ((DatatypeSomeRestriction)child).getDataRange(); + if(dr instanceof DoubleMaxValue) { + if(maxDoubleOccurence) + return false; + else + maxDoubleOccurence = true; + } else if(dr instanceof DoubleMinValue) { + if(minDoubleOccurence) + return false; + else + minDoubleOccurence = true; + } + } + } + return true; + } + private void computeTopRefinements(int maxLength) { computeTopRefinements(maxLength, null); } Added: trunk/src/dl-learner/org/dllearner/utilities/TestValidation.java =================================================================== --- trunk/src/dl-learner/org/dllearner/utilities/TestValidation.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/utilities/TestValidation.java 2008-03-13 06:55:13 UTC (rev 707) @@ -0,0 +1,75 @@ +/** + * Copyright (C) 2007-2008, Jens Lehmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ +package org.dllearner.utilities; + +import java.io.File; +import java.util.Set; + +import org.apache.log4j.ConsoleAppender; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.apache.log4j.SimpleLayout; +import org.dllearner.cli.Start; +import org.dllearner.core.ComponentInitException; +import org.dllearner.core.LearningProblem; +import org.dllearner.core.ReasoningService; +import org.dllearner.core.Score; +import org.dllearner.core.owl.Description; +import org.dllearner.core.owl.Individual; + +/** + * @author Jens Lehmann + * + */ +public class TestValidation { + + private static Logger logger = Logger.getRootLogger(); + + public static void main(String args[]) throws ComponentInitException { + + // create logger (a simple logger which outputs + // its messages to the console) + SimpleLayout layout = new SimpleLayout(); + ConsoleAppender consoleAppender = new ConsoleAppender(layout); + logger.removeAllAppenders(); + logger.addAppender(consoleAppender); + logger.setLevel(Level.DEBUG); + + String filenameTrain = args[0]; + String filenameTest = args[1]; + + Start start = new Start(new File(filenameTrain)); + start.start(false); + Description solution = start.getLearningAlgorithm().getBestSolution(); + + logger.setLevel(Level.WARN); + + Start startTest = new Start(new File(filenameTest)); + ReasoningService rs = startTest.getReasoningService(); + LearningProblem lp = startTest.getLearningProblem(); + + Set<Individual> result = rs.retrieval(solution); + System.out.println("retrieval result: " + result); + + Score score = lp.computeScore(solution); + System.out.println(score); + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |