From: <jen...@us...> - 2008-03-16 19:03:58
|
Revision: 713 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=713&view=rev Author: jenslehmann Date: 2008-03-16 12:03:46 -0700 (Sun, 16 Mar 2008) Log Message: ----------- partial cardinality restriction learning support Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/Info.java trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedROLearner.java trunk/src/dl-learner/org/dllearner/algorithms/refexamples/MultiHeuristic.java trunk/src/dl-learner/org/dllearner/core/ComponentManager.java trunk/src/dl-learner/org/dllearner/core/ComponentPool.java trunk/src/dl-learner/org/dllearner/core/Reasoner.java trunk/src/dl-learner/org/dllearner/core/ReasonerComponent.java trunk/src/dl-learner/org/dllearner/core/ReasoningService.java trunk/src/dl-learner/org/dllearner/core/owl/ObjectCardinalityRestriction.java trunk/src/dl-learner/org/dllearner/examples/Carcinogenesis.java trunk/src/dl-learner/org/dllearner/reasoning/DIGReasoner.java trunk/src/dl-learner/org/dllearner/reasoning/FastInstanceChecker.java trunk/src/dl-learner/org/dllearner/reasoning/FastRetrievalReasoner.java trunk/src/dl-learner/org/dllearner/reasoning/OWLAPIReasoner.java trunk/src/dl-learner/org/dllearner/refinementoperators/RhoDRDown.java trunk/src/dl-learner/org/dllearner/utilities/ConceptComparator.java trunk/src/dl-learner/org/dllearner/utilities/CrossValidation.java Modified: trunk/src/dl-learner/org/dllearner/Info.java =================================================================== --- trunk/src/dl-learner/org/dllearner/Info.java 2008-03-14 22:58:35 UTC (rev 712) +++ trunk/src/dl-learner/org/dllearner/Info.java 2008-03-16 19:03:46 UTC (rev 713) @@ -3,6 +3,6 @@ package org.dllearner; public class Info { - public static final String build = "2008-02-18"; + public static final String build = "2008-03-16"; } \ No newline at end of file Modified: trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedROLearner.java =================================================================== --- trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedROLearner.java 2008-03-14 22:58:35 UTC (rev 712) +++ trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedROLearner.java 2008-03-16 19:03:46 UTC (rev 713) @@ -30,7 +30,6 @@ import java.util.SortedSet; import java.util.TreeSet; -import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.dllearner.algorithms.refinement.RefinementOperator; import org.dllearner.core.LearningProblem; @@ -39,8 +38,8 @@ import org.dllearner.core.owl.Description; import org.dllearner.core.owl.Individual; import org.dllearner.core.owl.Intersection; +import org.dllearner.core.owl.Thing; import org.dllearner.core.owl.Union; -import org.dllearner.core.owl.Thing; import org.dllearner.learningproblems.PosNegLP; import org.dllearner.learningproblems.PosOnlyDefinitionLP; import org.dllearner.refinementoperators.RhoDRDown; @@ -116,7 +115,7 @@ // the divide&conquer approach in many ILP programs using a // clause by clause search; after a period of time the candidate // set is reduced to focus CPU time on the most promising concepts - private boolean useCandidateReduction = true; + private boolean useCandidateReduction = false; private int candidatePostReductionSize = 30; // setting to true gracefully stops the algorithm @@ -214,6 +213,11 @@ posOnly = false; nrOfPositiveExamples = lp.getPositiveExamples().size(); nrOfNegativeExamples = lp.getNegativeExamples().size(); + +// System.out.println(nrOfPositiveExamples); +// System.out.println(nrOfNegativeExamples); +// System.exit(0); + } else if(learningProblem instanceof PosOnlyDefinitionLP) { PosOnlyDefinitionLP lp = (PosOnlyDefinitionLP) learningProblem; this.posOnlyLearningProblem = lp; @@ -236,10 +240,40 @@ this.useShortConceptConstruction = useShortConceptConstruction; baseURI = rs.getBaseURI(); - logger.setLevel(Level.DEBUG); +// logger.setLevel(Level.DEBUG); } public void start() { + /* +// String conceptStr = "(\"http://dl-learner.org/carcinogenesis#Compound\" AND (>= 2 \"http://dl-learner.org/carcinogenesis#hasStructure\".\"http://dl-learner.org/carcinogenesis#Ar_halide\" OR ((\"http://dl-learner.org/carcinogenesis#amesTestPositive\" IS TRUE) AND >= 5 \"http://dl-learner.org/carcinogenesis#hasBond\". TOP)))"; + String conceptStr = "(\"http://dl-learner.org/carcinogenesis#Compound\" AND ((\"http://dl-learner.org/carcinogenesis#amesTestPositive\" IS TRUE) AND (\"http://dl-learner.org/carcinogenesis#amesTestPositive\" IS TRUE)))"; + try { + NamedClass struc = new NamedClass("http://dl-learner.org/carcinogenesis#Compound"); + Description d = KBParser.parseConcept(conceptStr); +// SortedSet<Description> ds = (SortedSet<Description>) operator.refine(d,15,null,struc); +// System.out.println(ds); + + System.out.println(RhoDRDown.checkIntersection((Intersection)d)); + + + Set<Individual> coveredNegatives = rs.instanceCheck(d, learningProblem.getNegativeExamples()); + Set<Individual> coveredPositives = rs.instanceCheck(d, learningProblem.getPositiveExamples()); + ExampleBasedNode ebn = new ExampleBasedNode(d); + ebn.setCoveredExamples(coveredPositives, coveredNegatives); + extendNodeProper(ebn,15); + + // Individual i = new Individual("http://dl-learner.org/carcinogenesis#d101"); +// for(Individual i : learningProblem.getPositiveExamples()) +// rs.instanceCheck(ds.last(), i); + + System.out.println("finished"); + } catch (ParseException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + System.exit(0); + */ + // calculate quality threshold required for a solution allowedMisclassifications = (int) Math.round(noise * nrOfExamples); @@ -290,8 +324,10 @@ if(useCandidateReduction && (currentTime - lastReductionTime > reductionInterval)) { reduceCandidates(); lastReductionTime = System.nanoTime(); +// Logger.getRootLogger().setLevel(Level.TRACE); } + System.out.println("next expanded: " + candidates.last().getShortDescription(nrOfPositiveExamples, nrOfNegativeExamples, baseURI)); // chose best node according to heuristics bestNode = candidates.last(); // extend best node @@ -304,7 +340,8 @@ // newCandidates has been filled during node expansion candidates.addAll(newCandidates); candidatesStable.addAll(newCandidates); - +// System.out.println("done"); + if(writeSearchTree) { // String treeString = ""; String treeString = "best node: " + bestNode+ "\n"; @@ -338,13 +375,13 @@ logger.info(" " + c + " (length " + c.getLength() +", depth " + c.getDepth() + ")"); } } - System.out.println("size of candidate set: " + candidates.size()); + logger.debug("size of candidate set: " + candidates.size()); printStatistics(true); if(stop) - System.out.println("Algorithm stopped."); + logger.info("Algorithm stopped."); else - System.out.println("Algorithm terminated succesfully."); + logger.info("Algorithm terminated succesfully."); } // we apply the operator recursively until all proper refinements up @@ -397,14 +434,18 @@ childConceptsDeletionTimeNs += System.nanoTime() - childConceptsDeletionTimeNsStart; +// if(refinements.size()<30) +// System.out.println("refinements: " + refinements); + long evaluateSetCreationTimeNsStart = System.nanoTime(); // alle Konzepte, die länger als horizontal expansion sind, müssen ausgewertet // werden - Set<Description> toEvaluateConcepts = new TreeSet<Description>(conceptComparator); + TreeSet<Description> toEvaluateConcepts = new TreeSet<Description>(conceptComparator); Iterator<Description> it = refinements.iterator(); // for(Concept refinement : refinements) { - while(it.hasNext()) { + while(it.hasNext()) { + Description refinement = it.next(); if(refinement.getLength()>node.getHorizontalExpansion()) { // sagt aus, ob festgestellt wurde, ob refinement proper ist @@ -413,7 +454,6 @@ // 1. short concept construction if(useShortConceptConstruction) { - // kurzes Konzept konstruieren Description shortConcept = ConceptTransformation.getShortConcept(refinement, conceptComparator); int n = conceptComparator.compare(shortConcept, concept); @@ -422,11 +462,14 @@ if(n==0) { propernessTestsAvoidedByShortConceptConstruction++; propernessDetected = true; + + System.out.println("refinement " + refinement + " can be shortened"); +// System.exit(0); } } // 2. too weak test - if(!propernessDetected && useTooWeakList) { + if(!propernessDetected && useTooWeakList) { if(refinement instanceof Intersection) { boolean tooWeakElement = containsTooWeakElement((Intersection)refinement); if(tooWeakElement) { @@ -454,14 +497,28 @@ } // properness konnte nicht vorher ermittelt werden - if(!propernessDetected) + if(!propernessDetected) { toEvaluateConcepts.add(refinement); +// if(!res) { +// System.out.println("already in: " + refinement); +// Comparator comp = toEvaluateConcepts.comparator(); +// for(Description d : toEvaluateConcepts) { +// if(comp.compare(d,refinement)==0) +// System.out.println("see: " + d); +// } +// } + } } + +// System.out.println("handled " + refinement + " length: " + refinement.getLength() + " (new size: " + toEvaluateConcepts.size() + ")"); + } evaluateSetCreationTimeNs += System.nanoTime() - evaluateSetCreationTimeNsStart; +// System.out.println("intermediate 1"); + // System.out.println(toEvaluateConcepts.size()); Set<Description> improperConcepts = null; @@ -476,6 +533,11 @@ } } +// if(toEvaluateConcepts.size()<10) +// System.out.println("to evaluate: " + toEvaluateConcepts); +// else +// System.out.println("to evaluate: more than 10"); + long improperConceptsRemovalTimeNsStart = System.nanoTime(); // die improper Konzepte werden von den auszuwertenden gelöscht, d.h. // alle proper concepts bleiben übrig (einfache Umbenennung) @@ -486,6 +548,13 @@ refinements.removeAll(properConcepts); improperConceptsRemovalTimeNs += System.nanoTime() - improperConceptsRemovalTimeNsStart; +// if(refinements.size()<10) +// System.out.println("refinements: " + refinements); +// else +// System.out.println("refinements: more than 10"); +// +// System.out.println("improper concepts: " + improperConcepts); + for(Description refinement : properConcepts) { long redundancyCheckTimeNsStart = System.nanoTime(); boolean nonRedundant = properRefinements.add(refinement); @@ -603,18 +672,18 @@ } } - // es sind jetzt noch alle Konzepte übrig, die improper refinements sind // auf jedem dieser Konzepte wird die Funktion erneut aufgerufen, da sich // proper refinements ergeben könnten for(Description refinement : refinements) { // for(int i=0; i<=recDepth; i++) // System.out.print(" "); - // System.out.println("call: " + refinement + " [maxLength " + maxLength + "]"); +// System.out.println("call: " + refinement + " [maxLength " + maxLength + "]"); extendNodeProper(node, refinement, maxLength, recDepth+1); // for(int i=0; i<=recDepth; i++) // System.out.print(" "); - // System.out.println("finished: " + refinement + " [maxLength " + maxLength + "]"); + // System.out.println("finished: " + refinement + " [maxLength " + maxLength + "]"); +// System.exit(0); } } @@ -637,26 +706,25 @@ // + nrOfNegativeExamples - bestNode.getCoveredNegatives().size())/(double)nrOfExamples); // Refinementoperator auf Konzept anwenden // String bestNodeString = "currently best node: " + bestNode + " accuracy: " + df.format(accuracy) + "%"; - System.out.println("start node: " + startNode.getShortDescription(nrOfPositiveExamples, nrOfNegativeExamples, baseURI)); + logger.debug("start node: " + startNode.getShortDescription(nrOfPositiveExamples, nrOfNegativeExamples, baseURI)); String bestNodeString = "currently best node: " + bestNode.getShortDescription(nrOfPositiveExamples, nrOfNegativeExamples, baseURI); // searchTree += bestNodeString + "\n"; - System.out.println(bestNodeString); + logger.debug(bestNodeString); String expandedNodeString = "next expanded node: " + candidates.last().getShortDescription(nrOfPositiveExamples, nrOfNegativeExamples, baseURI); // searchTree += expandedNodeString + "\n"; - System.out.println(expandedNodeString); - System.out.println("algorithm runtime " + Helper.prettyPrintNanoSeconds(algorithmRuntime)); - System.out.println("size of candidate set: " + candidates.size()); + logger.debug(expandedNodeString); + logger.debug("algorithm runtime " + Helper.prettyPrintNanoSeconds(algorithmRuntime)); + logger.debug("size of candidate set: " + candidates.size()); // System.out.println("properness max recursion depth: " + maxRecDepth); // System.out.println("max. number of one-step refinements: " + maxNrOfRefinements); // System.out.println("max. number of children of a node: " + maxNrOfChildren); - System.out.println("subsumption time: " + Helper.prettyPrintNanoSeconds(rs.getSubsumptionReasoningTimeNs())); - System.out.println("instance check time: " + Helper.prettyPrintNanoSeconds(rs.getInstanceCheckReasoningTimeNs())); - System.out.println("retrieval time: " + Helper.prettyPrintNanoSeconds(rs.getRetrievalReasoningTimeNs())); + logger.debug("subsumption time: " + Helper.prettyPrintNanoSeconds(rs.getSubsumptionReasoningTimeNs())); + logger.debug("instance check time: " + Helper.prettyPrintNanoSeconds(rs.getInstanceCheckReasoningTimeNs())); + logger.debug("retrieval time: " + Helper.prettyPrintNanoSeconds(rs.getRetrievalReasoningTimeNs())); } if(computeBenchmarkInformation) { - long reasoningTime = rs.getOverallReasoningTimeNs(); double reasoningPercentage = 100 * reasoningTime/(double)algorithmRuntime; long propWithoutReasoning = propernessCalcTimeNs-propernessCalcReasoningTimeNs; @@ -674,24 +742,24 @@ double onnfTimePercentage = 100 * ConceptTransformation.onnfTimeNs/(double)algorithmRuntime; double shorteningTimePercentage = 100 * ConceptTransformation.shorteningTimeNs/(double)algorithmRuntime; - System.out.println("reasoning percentage: " + df.format(reasoningPercentage) + "%"); - System.out.println(" subsumption check time: " + df.format(subPercentage) + "%"); - System.out.println("proper calculation percentage (wo. reasoning): " + df.format(propPercentage) + "%"); - System.out.println(" deletion time percentage: " + df.format(deletionPercentage) + "%"); - System.out.println(" refinement calculation percentage: " + df.format(refinementPercentage) + "%"); - System.out.println(" m calculation percentage: " + df.format(mComputationTimePercentage) + "%"); - System.out.println(" top calculation percentage: " + df.format(topComputationTimePercentage) + "%"); - System.out.println(" redundancy check percentage: " + df.format(redundancyCheckPercentage) + "%"); - System.out.println(" evaluate set creation time percentage: " + df.format(evaluateSetCreationTimePercentage) + "%"); - System.out.println(" improper concepts removal time percentage: " + df.format(improperConceptsRemovalTimePercentage) + "%"); - System.out.println("clean time percentage: " + df.format(cleanTimePercentage) + "%"); - System.out.println("onnf time percentage: " + df.format(onnfTimePercentage) + "%"); - System.out.println("shortening time percentage: " + df.format(shorteningTimePercentage) + "%"); + logger.debug("reasoning percentage: " + df.format(reasoningPercentage) + "%"); + logger.debug(" subsumption check time: " + df.format(subPercentage) + "%"); + logger.debug("proper calculation percentage (wo. reasoning): " + df.format(propPercentage) + "%"); + logger.debug(" deletion time percentage: " + df.format(deletionPercentage) + "%"); + logger.debug(" refinement calculation percentage: " + df.format(refinementPercentage) + "%"); + logger.debug(" m calculation percentage: " + df.format(mComputationTimePercentage) + "%"); + logger.debug(" top calculation percentage: " + df.format(topComputationTimePercentage) + "%"); + logger.debug(" redundancy check percentage: " + df.format(redundancyCheckPercentage) + "%"); + logger.debug(" evaluate set creation time percentage: " + df.format(evaluateSetCreationTimePercentage) + "%"); + logger.debug(" improper concepts removal time percentage: " + df.format(improperConceptsRemovalTimePercentage) + "%"); + logger.debug("clean time percentage: " + df.format(cleanTimePercentage) + "%"); + logger.debug("onnf time percentage: " + df.format(onnfTimePercentage) + "%"); + logger.debug("shortening time percentage: " + df.format(shorteningTimePercentage) + "%"); } - System.out.println("properness tests (reasoner/short concept/too weak list): " + propernessTestsReasoner + "/" + propernessTestsAvoidedByShortConceptConstruction + logger.debug("properness tests (reasoner/short concept/too weak list): " + propernessTestsReasoner + "/" + propernessTestsAvoidedByShortConceptConstruction + "/" + propernessTestsAvoidedByTooWeakList); - System.out.println("concept tests (reasoner/too weak list/overly general list/redundant concepts): " + conceptTestsReasoner + "/" + logger.debug("concept tests (reasoner/too weak list/overly general list/redundant concepts): " + conceptTestsReasoner + "/" + conceptTestsTooWeakList + "/" + conceptTestsOverlyGeneralList + "/" + redundantConcepts); } @@ -737,8 +805,8 @@ Set<Individual> currentCoveredNeg = startNode.getCoveredNegatives(); double currentAccuracy = startNode.getAccuracy(nrOfPositiveExamples, nrOfNegativeExamples); int currentMisclassifications = nrOfPositiveExamples - currentCoveredPos.size() + currentCoveredNeg.size(); - System.out.println("tree traversal start node " + startNode.getShortDescription(nrOfPositiveExamples, nrOfNegativeExamples, baseURI)); - System.out.println("tree traversal start accuracy: " + currentAccuracy); + logger.debug("tree traversal start node " + startNode.getShortDescription(nrOfPositiveExamples, nrOfNegativeExamples, baseURI)); + logger.debug("tree traversal start accuracy: " + currentAccuracy); int i=0; // start from the most promising nodes NavigableSet<ExampleBasedNode> reverseView = candidatesStable.descendingSet(); @@ -775,9 +843,9 @@ ConceptTransformation.transformToOrderedNegationNormalFormNonRecursive(mc, conceptComparator); // System.out.println("extended concept to: " + mc); - System.out.println("misclassifications: " + misclassifications); - System.out.println("misclassified positives: " + misclassifiedPositives); - System.out.println("accuracy: " + accuracy); + logger.debug("misclassifications: " + misclassifications); + logger.debug("misclassified positives: " + misclassifiedPositives); + logger.debug("accuracy: " + accuracy); // update variables currentDescription = mc; @@ -787,8 +855,8 @@ currentAccuracy = accuracy; if(accuracy > 1 - noise) { - System.out.println("traversal found " + mc); - System.out.println("accuracy: " + accuracy); + logger.info("traversal found " + mc); + logger.info("accuracy: " + accuracy); System.exit(0); } } @@ -844,9 +912,9 @@ i++; } candidates.retainAll(promisingNodes); - System.out.println("searched " + i + " nodes and picked the following promising descriptions:"); + logger.debug("searched " + i + " nodes and picked the following promising descriptions:"); for(ExampleBasedNode node : promisingNodes) - System.out.println(node.getShortDescription(nrOfPositiveExamples, nrOfNegativeExamples, baseURI)); + logger.debug(node.getShortDescription(nrOfPositiveExamples, nrOfNegativeExamples, baseURI)); } /* Modified: trunk/src/dl-learner/org/dllearner/algorithms/refexamples/MultiHeuristic.java =================================================================== --- trunk/src/dl-learner/org/dllearner/algorithms/refexamples/MultiHeuristic.java 2008-03-14 22:58:35 UTC (rev 712) +++ trunk/src/dl-learner/org/dllearner/algorithms/refexamples/MultiHeuristic.java 2008-03-16 19:03:46 UTC (rev 713) @@ -21,7 +21,6 @@ import java.util.List; -import org.dllearner.core.owl.BooleanValueRestriction; import org.dllearner.core.owl.DatatypeSomeRestriction; import org.dllearner.core.owl.Description; import org.dllearner.core.owl.Thing; @@ -145,10 +144,10 @@ // do not count TOP symbols (in particular in ALL r.TOP and EXISTS r.TOP) // as they provide no extra information if(description instanceof Thing) - bonus = 2; + bonus = 1; - if(description instanceof BooleanValueRestriction) - bonus = -1; +// if(description instanceof BooleanValueRestriction) +// bonus = -1; // some bonus for doubles because they are already penalised by length 3 if(description instanceof DatatypeSomeRestriction) { Modified: trunk/src/dl-learner/org/dllearner/core/ComponentManager.java =================================================================== --- trunk/src/dl-learner/org/dllearner/core/ComponentManager.java 2008-03-14 22:58:35 UTC (rev 712) +++ trunk/src/dl-learner/org/dllearner/core/ComponentManager.java 2008-03-16 19:03:46 UTC (rev 713) @@ -355,6 +355,10 @@ pool.unregisterComponent(component); } + public void freeAllComponents() { + pool.clearComponents(); + } + public <T> T getConfigOptionValue(Component component, ConfigOption<T> option) { T object = pool.getLastValidConfigValue(component, option); if(object==null) Modified: trunk/src/dl-learner/org/dllearner/core/ComponentPool.java =================================================================== --- trunk/src/dl-learner/org/dllearner/core/ComponentPool.java 2008-03-14 22:58:35 UTC (rev 712) +++ trunk/src/dl-learner/org/dllearner/core/ComponentPool.java 2008-03-16 19:03:46 UTC (rev 713) @@ -70,4 +70,11 @@ lastValidConfigValue.get(component).put(entry.getOption(), entry.getValue()); } + // unregisters all components + public void clearComponents() { + components = new LinkedList<Component>(); + lastValidConfigValue = new HashMap<Component,Map<ConfigOption<?>,Object>>(); + configEntryHistory = new HashMap<Component,List<ConfigEntry<?>>>(); + } + } Modified: trunk/src/dl-learner/org/dllearner/core/Reasoner.java =================================================================== --- trunk/src/dl-learner/org/dllearner/core/Reasoner.java 2008-03-14 22:58:35 UTC (rev 712) +++ trunk/src/dl-learner/org/dllearner/core/Reasoner.java 2008-03-16 19:03:46 UTC (rev 713) @@ -128,4 +128,5 @@ public SortedSet<Individual> getIndividuals(); + public void releaseKB(); } Modified: trunk/src/dl-learner/org/dllearner/core/ReasonerComponent.java =================================================================== --- trunk/src/dl-learner/org/dllearner/core/ReasonerComponent.java 2008-03-14 22:58:35 UTC (rev 712) +++ trunk/src/dl-learner/org/dllearner/core/ReasonerComponent.java 2008-03-16 19:03:46 UTC (rev 713) @@ -251,4 +251,6 @@ throw new ReasoningMethodUnsupportedException(); } + public abstract void releaseKB(); + } Modified: trunk/src/dl-learner/org/dllearner/core/ReasoningService.java =================================================================== --- trunk/src/dl-learner/org/dllearner/core/ReasoningService.java 2008-03-14 22:58:35 UTC (rev 712) +++ trunk/src/dl-learner/org/dllearner/core/ReasoningService.java 2008-03-16 19:03:46 UTC (rev 713) @@ -603,6 +603,10 @@ return reasoner.getPrefixes(); } + public void releaseKB() { + reasoner.releaseKB(); + } + public long getInstanceCheckReasoningTimeNs() { return instanceCheckReasoningTimeNs; } Modified: trunk/src/dl-learner/org/dllearner/core/owl/ObjectCardinalityRestriction.java =================================================================== --- trunk/src/dl-learner/org/dllearner/core/owl/ObjectCardinalityRestriction.java 2008-03-14 22:58:35 UTC (rev 712) +++ trunk/src/dl-learner/org/dllearner/core/owl/ObjectCardinalityRestriction.java 2008-03-16 19:03:46 UTC (rev 713) @@ -13,7 +13,7 @@ } public int getLength() { - return 1 + role.getLength() + getChild(0).getLength(); + return 2 + role.getLength() + getChild(0).getLength(); } public int getNumber() { Modified: trunk/src/dl-learner/org/dllearner/examples/Carcinogenesis.java =================================================================== --- trunk/src/dl-learner/org/dllearner/examples/Carcinogenesis.java 2008-03-14 22:58:35 UTC (rev 712) +++ trunk/src/dl-learner/org/dllearner/examples/Carcinogenesis.java 2008-03-16 19:03:46 UTC (rev 713) @@ -118,6 +118,7 @@ private static boolean ignoreAmes = false; private static boolean ignoreSalmonella = false;; private static boolean ignoreCytogenCa = false; + private static boolean includeMutagenesis = true; // if true we learn carcinogenic, if false we learn non-carcinogenic private static boolean learnCarcinogenic = true; private static boolean useNewGroups = true; @@ -134,10 +135,11 @@ // TODO: newgroups are not mapped currently String[] files = new String[] { "newgroups.pl", "ames.pl", "atoms.pl", "bonds.pl", "gentoxprops.pl", - "ind_nos.pl", "ind_pos.pl", "pte2/canc_nos.pl", "pte2/pte2ames.pl", "pte2/pte2atoms.pl", - "pte2/pte2bonds.pl", "pte2/pte2gentox.pl", "pte2/pte2ind_nos.pl", "pte2/pte2newgroups.pl" + "ind_nos.pl", "ind_pos.pl"}; + // "pte2/canc_nos.pl", "pte2/pte2ames.pl", "pte2/pte2atoms.pl", + // "pte2/pte2bonds.pl", "pte2/pte2gentox.pl", "pte2/pte2ind_nos.pl", "pte2/pte2newgroups.pl" // "train.b" => not a pure Prolog file but Progol/Aleph specific - }; + // }; File owlFile = new File("examples/carcinogenesis/pte.owl"); Program program = null; @@ -182,6 +184,10 @@ kbString += "DPDOMAIN(" + getURI2("amesTestPositive") + ") = " + getURI2("Compound") + ".\n"; kbString += "DPRANGE(" + getURI2("amesTestPositive") + ") = BOOLEAN.\n"; } + if(includeMutagenesis) { + kbString += "DPDOMAIN(" + getURI2("isMutagenic") + ") = " + getURI2("Compound") + ".\n"; + kbString += "DPRANGE(" + getURI2("isMutagenic") + ") = BOOLEAN.\n"; + } kbString += "OPDOMAIN(" + getURI2("hasAtom") + ") = " + getURI2("Compound") + ".\n"; kbString += "OPRANGE(" + getURI2("hasAtom") + ") = " + getURI2("Atom") + ".\n"; kbString += "OPDOMAIN(" + getURI2("hasBond") + ") = " + getURI2("Compound") + ".\n"; @@ -205,6 +211,10 @@ for (Axiom axiom : axioms) kb.addAxiom(axiom); } + + if(includeMutagenesis) + addMutagenesis(kb); + // special handling for ames test (we assume the ames test // was performed on all compounds but only the positive ones // are in ames.pl [the rest is negative in Prolog by CWA], so @@ -395,42 +405,39 @@ // if(!useNewGroups) { String compoundName = head.getArgument(0).toPLString(); String structureName = head.getArgument(1).toPLString(); -// int count = Integer.parseInt(head.getArgument(2).toPLString()); + int count = Integer.parseInt(head.getArgument(2).toPLString()); // upper case first letter String structureClass = structureName.substring(0,1).toUpperCase() + structureName.substring(1);; String structureInstance = structureName + "-" + structureNr; addStructureSubclass(axioms, structureClass); -// for(int i=0; i<count; i++) { + for(int i=0; i<count; i++) { ObjectPropertyAssertion op = getRoleAssertion("hasStructure", compoundName, structureInstance); axioms.add(op); // make e.g. halide10-382 instance of Bond-3 ClassAssertionAxiom ca = getConceptAssertion(structureClass, structureInstance); axioms.add(ca); structureNr++; + } // } -// } } else if (headName.equals("ashby_alert")) { // ... currently ignored ... } else if (newGroups.contains(headName)) { if(useNewGroups) { String compoundName = head.getArgument(0).toPLString(); String structureName = headName; -// int count = Integer.parseInt(head.getArgument(2).toPLString()); // upper case first letter String structureClass = structureName.substring(0,1).toUpperCase() + structureName.substring(1);; String structureInstance = structureName + "-" + structureNr; addStructureSubclass(axioms, structureClass); -// for(int i=0; i<count; i++) { ObjectPropertyAssertion op = getRoleAssertion("hasStructure", compoundName, structureInstance); axioms.add(op); ClassAssertionAxiom ca = getConceptAssertion(structureClass, structureInstance); axioms.add(ca); structureNr++; -// } } } else { // print clauses which are not supported yet @@ -732,4 +739,36 @@ return ret; } + private static void addMutagenesis(KB kb) { + String[] mutagenicCompounds = new String[] { + "d101", "d104", "d106", "d107", "d112", "d113", "d117", + "d121", "d123", "d126", "d128", "d13", "d135", "d137", + "d139", "d140", "d143", "d144", "d145", "d146", "d147", + "d152", "d153", "d154", "d155", "d156", "d159", "d160", + "d161", "d163", "d164", "d166", "d168", "d171", "d173", + "d174", "d177", "d179", "d18", "d180", "d182", "d183", + "d185", "d186", "d187", "d188", "d189", "d19", "d191", + "d192", "d193", "d195", "d197", "d2", "d201", "d202", + "d205", "d206", "d207", "d211", "d214", "d215", "d216", + "d224", "d225", "d227", "d228", "d229", "d231", "d235", + "d237", "d239", "d242", "d245", "d246", "d249", "d251", + "d254", "d257", "d258", "d261", "d264", "d266", "d269", + "d27", "d270", "d271", "d28", "d288", "d292", "d297", + "d300", "d308", "d309", "d311", "d313", "d314", "d322", + "d323", "d324", "d329", "d330", "d332", "d334", "d35", + "d36", "d37", "d38", "d41", "d42", "d48", "d50", "d51", + "d54", "d58", "d61", "d62", "d63", "d66", "d69", "d72", + "d76", "d77", "d78", "d84", "d86", "d89", "d92", "d96"}; + TreeSet<String> mutagenic = new TreeSet<String>(Arrays.asList(mutagenicCompounds)); + + for(String compound : compounds) { + if(mutagenic.contains(compound)) { + BooleanDatatypePropertyAssertion muta = getBooleanDatatypePropertyAssertion(compound, "isMutagenic", true); + kb.addAxiom(muta); + } else { + BooleanDatatypePropertyAssertion muta = getBooleanDatatypePropertyAssertion(compound, "isMutagenic", false); + kb.addAxiom(muta); + } + } + } } Modified: trunk/src/dl-learner/org/dllearner/reasoning/DIGReasoner.java =================================================================== --- trunk/src/dl-learner/org/dllearner/reasoning/DIGReasoner.java 2008-03-14 22:58:35 UTC (rev 712) +++ trunk/src/dl-learner/org/dllearner/reasoning/DIGReasoner.java 2008-03-16 19:03:46 UTC (rev 713) @@ -760,6 +760,7 @@ return identifier; } + @Override public void releaseKB() { connector.releaseKB(kbURI); } Modified: trunk/src/dl-learner/org/dllearner/reasoning/FastInstanceChecker.java =================================================================== --- trunk/src/dl-learner/org/dllearner/reasoning/FastInstanceChecker.java 2008-03-14 22:58:35 UTC (rev 712) +++ trunk/src/dl-learner/org/dllearner/reasoning/FastInstanceChecker.java 2008-03-16 19:03:46 UTC (rev 713) @@ -50,6 +50,9 @@ import org.dllearner.core.owl.Negation; import org.dllearner.core.owl.Nothing; import org.dllearner.core.owl.ObjectAllRestriction; +import org.dllearner.core.owl.ObjectCardinalityRestriction; +import org.dllearner.core.owl.ObjectMaxCardinalityRestriction; +import org.dllearner.core.owl.ObjectMinCardinalityRestriction; import org.dllearner.core.owl.ObjectProperty; import org.dllearner.core.owl.ObjectPropertyExpression; import org.dllearner.core.owl.ObjectPropertyHierarchy; @@ -290,6 +293,82 @@ return false; } return true; + } else if (description instanceof ObjectMinCardinalityRestriction) { + ObjectPropertyExpression ope = ((ObjectCardinalityRestriction) description).getRole(); + if (!(ope instanceof ObjectProperty)) + throw new ReasoningMethodUnsupportedException("Instance check for description " + + description + " unsupported. Inverse object properties not supported."); + ObjectProperty op = (ObjectProperty) ope; + Description child = description.getChild(0); + Map<Individual, SortedSet<Individual>> mapping = opPos.get(op); + + if (mapping == null) { + logger.warn("Instance check of a description with an undefinied property (" + op + + ")."); + return true; + } + + int number = ((ObjectCardinalityRestriction) description).getNumber(); + int nrOfFillers = 0; + + SortedSet<Individual> roleFillers = opPos.get(op).get(individual); + // return false if there are none or not enough role fillers + if (roleFillers == null || roleFillers.size() < number) + return false; + + int index = 0; + for (Individual roleFiller : roleFillers) { + index++; + if (instanceCheck(child, roleFiller)) { + nrOfFillers++; + if(nrOfFillers == number) + return true; + // earyl abort: e.g. >= 10 hasStructure.Methyl; + // if there are 11 fillers and 2 are not Methyl, the result is false + } /* else { + if(roleFillers.size() - index < number) + return false; + }*/ + } + return false; + } else if (description instanceof ObjectMaxCardinalityRestriction) { + ObjectPropertyExpression ope = ((ObjectCardinalityRestriction) description).getRole(); + if (!(ope instanceof ObjectProperty)) + throw new ReasoningMethodUnsupportedException("Instance check for description " + + description + " unsupported. Inverse object properties not supported."); + ObjectProperty op = (ObjectProperty) ope; + Description child = description.getChild(0); + Map<Individual, SortedSet<Individual>> mapping = opPos.get(op); + + if (mapping == null) { + logger.warn("Instance check of a description with an undefinied property (" + op + + ")."); + return true; + } + + int number = ((ObjectCardinalityRestriction) description).getNumber(); + int nrOfFillers = 0; + + SortedSet<Individual> roleFillers = opPos.get(op).get(individual); + // return false if there are none or not enough role fillers + if (roleFillers == null || roleFillers.size() > number) + return true; + + int index = 0; + for (Individual roleFiller : roleFillers) { + index++; + if (instanceCheck(child, roleFiller)) { + nrOfFillers++; + if(nrOfFillers == number) + return false; + // earyl abort: e.g. <= 5 hasStructure.Methyl; + // if there are 6 fillers and 2 are not Methyl, the result is true + } /* else { + if(roleFillers.size() - index <= number) + return true; + } */ + } + return true; } else if (description instanceof BooleanValueRestriction) { DatatypeProperty dp = ((BooleanValueRestriction)description).getRestrictedPropertyExpresssion(); boolean value = ((BooleanValueRestriction)description).getBooleanValue(); @@ -503,6 +582,19 @@ @Override public Description getRange(ObjectProperty objectProperty) { return rc.getRange(objectProperty); + } + + @Override + public Map<Individual, SortedSet<Individual>> getRoleMembers(ObjectProperty atomicRole) { + return opPos.get(atomicRole); + } + + /* (non-Javadoc) + * @see org.dllearner.core.ReasonerComponent#releaseKB() + */ + @Override + public void releaseKB() { + rc.releaseKB(); } } Modified: trunk/src/dl-learner/org/dllearner/reasoning/FastRetrievalReasoner.java =================================================================== --- trunk/src/dl-learner/org/dllearner/reasoning/FastRetrievalReasoner.java 2008-03-14 22:58:35 UTC (rev 712) +++ trunk/src/dl-learner/org/dllearner/reasoning/FastRetrievalReasoner.java 2008-03-16 19:03:46 UTC (rev 713) @@ -182,5 +182,13 @@ */ public Map<String, String> getPrefixes() { return rc.getPrefixes(); - } + } + + /* (non-Javadoc) + * @see org.dllearner.core.ReasonerComponent#releaseKB() + */ + @Override + public void releaseKB() { + rc.releaseKB(); + } } Modified: trunk/src/dl-learner/org/dllearner/reasoning/OWLAPIReasoner.java =================================================================== --- trunk/src/dl-learner/org/dllearner/reasoning/OWLAPIReasoner.java 2008-03-14 22:58:35 UTC (rev 712) +++ trunk/src/dl-learner/org/dllearner/reasoning/OWLAPIReasoner.java 2008-03-16 19:03:46 UTC (rev 713) @@ -1021,4 +1021,18 @@ return prefixes; } + /* (non-Javadoc) + * @see org.dllearner.core.ReasonerComponent#releaseKB() + */ + @Override + public void releaseKB() { + try { + reasoner.clearOntologies(); + reasoner.dispose(); + } catch (OWLReasonerException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + } Modified: trunk/src/dl-learner/org/dllearner/refinementoperators/RhoDRDown.java =================================================================== --- trunk/src/dl-learner/org/dllearner/refinementoperators/RhoDRDown.java 2008-03-14 22:58:35 UTC (rev 712) +++ trunk/src/dl-learner/org/dllearner/refinementoperators/RhoDRDown.java 2008-03-16 19:03:46 UTC (rev 713) @@ -32,6 +32,7 @@ import java.util.TreeSet; import java.util.Map.Entry; +import org.apache.log4j.Logger; import org.dllearner.algorithms.refinement.RefinementOperator; import org.dllearner.core.ReasoningService; import org.dllearner.core.owl.BooleanValueRestriction; @@ -47,6 +48,9 @@ import org.dllearner.core.owl.Negation; import org.dllearner.core.owl.Nothing; import org.dllearner.core.owl.ObjectAllRestriction; +import org.dllearner.core.owl.ObjectCardinalityRestriction; +import org.dllearner.core.owl.ObjectMaxCardinalityRestriction; +import org.dllearner.core.owl.ObjectMinCardinalityRestriction; import org.dllearner.core.owl.ObjectProperty; import org.dllearner.core.owl.ObjectPropertyExpression; import org.dllearner.core.owl.ObjectQuantorRestriction; @@ -72,6 +76,10 @@ */ public class RhoDRDown implements RefinementOperator { + @SuppressWarnings({"unused"}) + private static Logger logger = Logger + .getLogger(RhoDRDown.class); + private ReasoningService rs; // hierarchies @@ -82,6 +90,9 @@ private Map<DatatypeProperty,Description> dpDomains = new TreeMap<DatatypeProperty,Description>(); private Map<ObjectProperty,Description> opRanges = new TreeMap<ObjectProperty,Description>(); + // maximum number of fillers for eeach role + private Map<ObjectProperty,Integer> maxNrOfFillers = new TreeMap<ObjectProperty,Integer>(); + // start concept (can be used to start from an arbitrary concept, needs // to be Thing or NamedClass), note that when you use e.g. Compound as // start class, then the algorithm should start the search with class @@ -137,6 +148,7 @@ private boolean applyExistsFilter = true; private boolean useAllConstructor = true; private boolean useExistsConstructor = true; + private boolean useCardinalityRestrictions = true; private boolean useNegation = true; private boolean useBooleanDatatypes = true; private boolean useDoubleDatatypes = true; @@ -181,7 +193,36 @@ computeSplits(dp); } + // determine the maximum number of fillers for each role + for(ObjectProperty op : rs.getAtomicRoles()) { + int maxFillers = 0; + Map<Individual,SortedSet<Individual>> opMembers = rs.getRoleMembers(op); + for(SortedSet<Individual> inds : opMembers.values()) { + if(inds.size()>maxFillers) + maxFillers = inds.size(); + } + maxNrOfFillers.put(op, maxFillers); + } + /* + String conceptStr = "(\"http://dl-learner.org/carcinogenesis#Compound\" AND (>= 2 \"http://dl-learner.org/carcinogenesis#hasStructure\".\"http://dl-learner.org/carcinogenesis#Ar_halide\" OR ((\"http://dl-learner.org/carcinogenesis#amesTestPositive\" IS TRUE) AND >= 5 \"http://dl-learner.org/carcinogenesis#hasBond\". TOP)))"; + try { + NamedClass struc = new NamedClass("http://dl-learner.org/carcinogenesis#Compound"); + Description d = KBParser.parseConcept(conceptStr); + SortedSet<Description> ds = (SortedSet<Description>) refine(d,15,null,struc); + System.out.println(ds); + + Individual i = new Individual("http://dl-learner.org/carcinogenesis#d101"); + rs.instanceCheck(ds.first(), i); + + } catch (ParseException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + System.exit(0); + */ + + /* NamedClass struc = new NamedClass("http://dl-learner.org/carcinogenesis#Atom"); ObjectProperty op = new ObjectProperty("http://dl-learner.org/carcinogenesis#hasAtom"); ObjectSomeRestriction oar = new ObjectSomeRestriction(op,Thing.instance); @@ -222,7 +263,7 @@ public Set<Description> refine(Description description, int maxLength, List<Description> knownRefinements, Description currDomain) { -// System.out.println(description + " " + currDomain + " " + maxLength); +// logger.trace(description + " " + currDomain + " " + maxLength); // actions needing to be performed if this is the first time the // current domain is used @@ -336,6 +377,15 @@ for(ObjectProperty moreSpecialRole : moreSpecialRoles) refinements.add(new ObjectSomeRestriction(moreSpecialRole, description.getChild(0))); + // rule 3: EXISTS r.D => >= 2 r.D + // (length increases by 1 so we have to check whether max length is sufficient) + if(useCardinalityRestrictions) { + if(maxLength > description.getLength() && maxNrOfFillers.get(ar)>1) { + ObjectMinCardinalityRestriction min = new ObjectMinCardinalityRestriction(2,role,description.getChild(0)); + refinements.add(min); + } + } + } else if (description instanceof ObjectAllRestriction) { ObjectPropertyExpression role = ((ObjectQuantorRestriction)description).getRole(); Description range = opRanges.get(role); @@ -360,6 +410,28 @@ refinements.add(new ObjectAllRestriction(moreSpecialRole, description.getChild(0))); } + // rule 4: ALL r.D => <= (maxFillers-1) r.D + // (length increases by 1 so we have to check whether max length is sufficient) + if(useCardinalityRestrictions) { + if(maxLength > description.getLength() && maxNrOfFillers.get(ar)>1) { + ObjectMaxCardinalityRestriction max = new ObjectMaxCardinalityRestriction(maxNrOfFillers.get(ar)-1,role,description.getChild(0)); + refinements.add(max); + } + } + } else if (description instanceof ObjectCardinalityRestriction) { + if(description instanceof ObjectMaxCardinalityRestriction) { + // <= x r.C => <= (x-1) r.C + ObjectMaxCardinalityRestriction max = (ObjectMaxCardinalityRestriction) description; + int number = max.getNumber(); + if(number > 0) + refinements.add(new ObjectMaxCardinalityRestriction(number-1,max.getRole(),max.getChild(0))); + } else if(description instanceof ObjectMinCardinalityRestriction) { + // >= x r.C => >= (x+1) r.C + ObjectMinCardinalityRestriction min = (ObjectMinCardinalityRestriction) description; + int number = min.getNumber(); + if(number < maxNrOfFillers.get(min.getRole())) + refinements.add(new ObjectMinCardinalityRestriction(number+1,min.getRole(),min.getChild(0))); + } } else if (description instanceof DatatypeSomeRestriction) { DatatypeSomeRestriction dsr = (DatatypeSomeRestriction) description; @@ -434,6 +506,7 @@ } // check for double datatype properties + /* if(c instanceof DatatypeSomeRestriction && description instanceof DatatypeSomeRestriction) { DataRange dr = ((DatatypeSomeRestriction)c).getDataRange(); @@ -442,7 +515,7 @@ if((dr instanceof DoubleMaxValue && dr2 instanceof DoubleMaxValue) ||(dr instanceof DoubleMinValue && dr2 instanceof DoubleMinValue)) skip = true; - } + }*/ // perform a disjointness check when named classes are added; // this can avoid a lot of superfluous computation in the algorithm e.g. @@ -462,7 +535,9 @@ ConceptTransformation.cleanConceptNonRecursive(mc); ConceptTransformation.transformToOrderedNegationNormalFormNonRecursive(mc, conceptComparator); - refinements.add(mc); + // last check before intersection is added + if(checkIntersection(mc)) + refinements.add(mc); } } } @@ -481,11 +556,15 @@ // when a child of an intersection is refined and reintegrated into the // intersection, we can perform some sanity checks; // method returns true if everything is OK and false otherwise - private boolean checkIntersection(Intersection intersection) { + // TODO: can be implemented more efficiently if the newly added child + // is given as parameter + public static boolean checkIntersection(Intersection intersection) { // rule 1: max. restrictions at most once boolean maxDoubleOccurence = false; // rule 2: min restrictions at most once boolean minDoubleOccurence = false; + // rule 3: no double boolean datatypes + TreeSet<DatatypeProperty> occuredDP = new TreeSet<DatatypeProperty>(); for(Description child : intersection.getChildren()) { if(child instanceof DatatypeSomeRestriction) { DataRange dr = ((DatatypeSomeRestriction)child).getDataRange(); @@ -500,7 +579,14 @@ else minDoubleOccurence = true; } + } else if(child instanceof BooleanValueRestriction) { + DatatypeProperty dp = (DatatypeProperty) ((BooleanValueRestriction)child).getRestrictedPropertyExpression(); +// System.out.println("dp: " + dp); + // return false if the boolean property exists already + if(!occuredDP.add(dp)) + return false; } +// System.out.println(child.getClass()); } return true; } Modified: trunk/src/dl-learner/org/dllearner/utilities/ConceptComparator.java =================================================================== --- trunk/src/dl-learner/org/dllearner/utilities/ConceptComparator.java 2008-03-14 22:58:35 UTC (rev 712) +++ trunk/src/dl-learner/org/dllearner/utilities/ConceptComparator.java 2008-03-16 19:03:46 UTC (rev 713) @@ -12,6 +12,9 @@ import org.dllearner.core.owl.NamedClass; import org.dllearner.core.owl.Nothing; import org.dllearner.core.owl.Description; +import org.dllearner.core.owl.ObjectCardinalityRestriction; +import org.dllearner.core.owl.ObjectMaxCardinalityRestriction; +import org.dllearner.core.owl.ObjectMinCardinalityRestriction; import org.dllearner.core.owl.ObjectSomeRestriction; import org.dllearner.core.owl.Intersection; import org.dllearner.core.owl.SimpleDoubleDataRange; @@ -172,6 +175,46 @@ return roleCompare; } else return -1; + } else if(concept1 instanceof ObjectMinCardinalityRestriction) { + if(concept2.getChildren().size()<1 || concept2 instanceof Negation || concept2 instanceof ObjectQuantorRestriction) + return 1; + // first criterion: object property + // second criterion: number + // third criterion: children + else if(concept2 instanceof ObjectMinCardinalityRestriction) { + int roleCompare = rc.compare(((ObjectCardinalityRestriction)concept1).getRole(), ((ObjectCardinalityRestriction)concept2).getRole()); + if(roleCompare == 0) { + Integer number1 = ((ObjectCardinalityRestriction)concept1).getNumber(); + Integer number2 = ((ObjectCardinalityRestriction)concept2).getNumber(); + int numberCompare = number1.compareTo(number2); + if(numberCompare == 0) + return compare(concept1.getChild(0), concept2.getChild(0)); + else + return numberCompare; + } else + return roleCompare; + } else + return -1; + } else if(concept1 instanceof ObjectMaxCardinalityRestriction) { + if(concept2.getChildren().size()<1 || concept2 instanceof Negation || concept2 instanceof ObjectQuantorRestriction || concept2 instanceof ObjectMinCardinalityRestriction) + return 1; + // first criterion: object property + // second criterion: number + // third criterion: children + else if(concept2 instanceof ObjectMaxCardinalityRestriction) { + int roleCompare = rc.compare(((ObjectCardinalityRestriction)concept1).getRole(), ((ObjectCardinalityRestriction)concept2).getRole()); + if(roleCompare == 0) { + Integer number1 = ((ObjectCardinalityRestriction)concept1).getNumber(); + Integer number2 = ((ObjectCardinalityRestriction)concept2).getNumber(); + int numberCompare = number1.compareTo(number2); + if(numberCompare == 0) + return compare(concept1.getChild(0), concept2.getChild(0)); + else + return numberCompare; + } else + return roleCompare; + } else + return -1; } else if(concept1 instanceof Intersection) { if(concept2.getChildren().size()<2) return 1; Modified: trunk/src/dl-learner/org/dllearner/utilities/CrossValidation.java =================================================================== --- trunk/src/dl-learner/org/dllearner/utilities/CrossValidation.java 2008-03-14 22:58:35 UTC (rev 712) +++ trunk/src/dl-learner/org/dllearner/utilities/CrossValidation.java 2008-03-16 19:03:46 UTC (rev 713) @@ -21,9 +21,11 @@ import java.io.File; import java.text.DecimalFormat; +import java.util.Collections; import java.util.HashSet; import java.util.LinkedList; import java.util.List; +import java.util.Random; import java.util.Set; import org.apache.log4j.ConsoleAppender; @@ -65,20 +67,27 @@ else leaveOneOut = true; + if(folds < 2) { + System.out.println("At least 2 fold needed."); + System.exit(0); + } + // create logger (a simple logger which outputs // its messages to the console) SimpleLayout layout = new SimpleLayout(); ConsoleAppender consoleAppender = new ConsoleAppender(layout); logger.removeAllAppenders(); logger.addAppender(consoleAppender); - logger.setLevel(Level.WARN); + logger.setLevel(Level.WARN); + // disable OWL API info output + java.util.logging.Logger.getLogger("").setLevel(java.util.logging.Level.WARNING); new CrossValidation(file, folds, leaveOneOut); } public CrossValidation(File file, int folds, boolean leaveOneOut) { - + DecimalFormat df = new DecimalFormat(); ComponentManager cm = ComponentManager.getInstance(); @@ -104,10 +113,13 @@ if(lp instanceof PosNegLP) { + // get examples and shuffle them to Set<Individual> posExamples = ((PosNegLP)lp).getPositiveExamples(); List<Individual> posExamplesList = new LinkedList<Individual>(posExamples); + Collections.shuffle(posExamplesList, new Random(1)); Set<Individual> negExamples = ((PosNegLP)lp).getNegativeExamples(); List<Individual> negExamplesList = new LinkedList<Individual>(negExamples); + Collections.shuffle(negExamplesList, new Random(2)); // sanity check whether nr. of folds makes sense for this benchmark if(!leaveOneOut && (posExamples.size()<folds && negExamples.size()<folds)) { @@ -138,6 +150,9 @@ int[] splitsPos = calculateSplits(posExamples.size(),folds); int[] splitsNeg = calculateSplits(negExamples.size(),folds); +// System.out.println(splitsPos[0]); +// System.out.println(splitsNeg[0]); + // calculating training and test sets for(int i=0; i<folds; i++) { Set<Individual> testPos = getTestingSet(posExamplesList, splitsPos, i); @@ -180,16 +195,45 @@ Set<String> neg = Datastructures.individualSetToStringSet(trainingSetsNeg.get(currFold)); cm.applyConfigEntry(lp, "positiveExamples", pos); cm.applyConfigEntry(lp, "negativeExamples", neg); +// System.out.println("pos: " + pos.size()); +// System.out.println("neg: " + neg.size()); +// System.exit(0); + // es fehlt init zwischendurch + LearningAlgorithm la = start.getLearningAlgorithm(); + // init again, because examples have changed + try { + la.init(); + } catch (ComponentInitException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } long algorithmStartTime = System.nanoTime(); la.start(); long algorithmDuration = System.nanoTime() - algorithmStartTime; runtime.addNumber(algorithmDuration/(double)1000000000); Description concept = la.getBestSolution(); - int correctExamples = getCorrectPosClassified(rs, concept, testSetsPos.get(currFold)) - + getCorrectNegClassified(rs, concept, testSetsNeg.get(currFold)); + + Set<Individual> tmp = rs.instanceCheck(concept, testSetsPos.get(currFold)); + Set<Individual> tmp2 = Helper.difference(testSetsPos.get(currFold), tmp); + Set<Individual> tmp3 = rs.instanceCheck(concept, testSetsNeg.get(currFold)); + + System.out.println("test set errors pos: " + tmp2); + System.out.println("test set errors neg: " + tmp3); + + // calculate training accuracies + int trainingCorrectPosClassified = getCorrectPosClassified(rs, concept, trainingSetsPos.get(currFold)); + int trainingCorrectNegClassified = getCorrectNegClassified(rs, concept, trainingSetsNeg.get(currFold)); + int trainingCorrectExamples = trainingCorrectPosClassified + trainingCorrectNegClassified; + double trainingAccuracy = 100*((double)trainingCorrectExamples/(trainingSetsPos.get(currFold).size()+ + trainingSetsNeg.get(currFold).size())); + + // calculate test accuracies + int correctPosClassified = getCorrectPosClassified(rs, concept, testSetsPos.get(currFold)); + int correctNegClassified = getCorrectNegClassified(rs, concept, testSetsNeg.get(currFold)); + int correctExamples = correctPosClassified + correctNegClassified; double currAccuracy = 100*((double)correctExamples/(testSetsPos.get(currFold).size()+ testSetsNeg.get(currFold).size())); accuracy.addNumber(currAccuracy); @@ -197,10 +241,17 @@ length.addNumber(concept.getLength()); System.out.println("fold " + currFold + " (" + file + "):"); + System.out.println(" training: " + pos.size() + " positive and " + neg.size() + " negative examples"); + System.out.println(" testing: " + correctPosClassified + "/" + testSetsPos.get(currFold).size() + " correct positives, " + + correctNegClassified + "/" + testSetsNeg.get(currFold).size() + " correct negatives"); System.out.println(" concept: " + concept); - System.out.println(" accuracy: " + df.format(currAccuracy) + "%"); + System.out.println(" accuracy: " + df.format(currAccuracy) + "% (" + df.format(trainingAccuracy) + "% on training set)"); System.out.println(" length: " + df.format(concept.getLength())); System.out.println(" runtime: " + df.format(algorithmDuration/(double)1000000000) + "s"); + + // free all resources + start.getReasoningService().releaseKB(); + cm.freeAllComponents(); } System.out.println(); @@ -211,12 +262,12 @@ } - private int getCorrectPosClassified(ReasoningService rs, Description concept, Set<Individual> posClassified) { - return rs.instanceCheck(concept, posClassified).size(); + private int getCorrectPosClassified(ReasoningService rs, Description concept, Set<Individual> testSetPos) { + return rs.instanceCheck(concept, testSetPos).size(); } - private int getCorrectNegClassified(ReasoningService rs, Description concept, Set<Individual> negClassified) { - return negClassified.size() - rs.instanceCheck(concept, negClassified).size(); + private int getCorrectNegClassified(ReasoningService rs, Description concept, Set<Individual> testSetNeg) { + return testSetNeg.size() - rs.instanceCheck(concept, testSetNeg).size(); } private Set<Individual> getTestingSet(List<Individual> examples, int[] splits, int fold) { @@ -229,6 +280,8 @@ // the split corresponds to the ends of the folds int toIndex = splits[fold]; +// System.out.println("from " + fromIndex + " to " + toIndex); + Set<Individual> testingSet = new HashSet<Individual>(); // +1 because 2nd element is exclusive in subList method testingSet.addAll(examples.subList(fromIndex, toIndex)); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |