From: <jen...@us...> - 2008-03-09 19:26:51
|
Revision: 699 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=699&view=rev Author: jenslehmann Date: 2008-03-09 12:25:46 -0700 (Sun, 09 Mar 2008) Log Message: ----------- small algorithm bug fixes and reasoning improvements Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedNode.java trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedROLearner.java trunk/src/dl-learner/org/dllearner/algorithms/refexamples/MultiHeuristic.java trunk/src/dl-learner/org/dllearner/core/owl/BooleanValueRestriction.java trunk/src/dl-learner/org/dllearner/core/owl/DatatypeValueRestriction.java trunk/src/dl-learner/org/dllearner/reasoning/OWLAPIReasoner.java trunk/src/dl-learner/org/dllearner/refinementoperators/RhoDRDown.java Modified: trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedNode.java =================================================================== --- trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedNode.java 2008-03-09 17:49:11 UTC (rev 698) +++ trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedNode.java 2008-03-09 19:25:46 UTC (rev 699) @@ -47,8 +47,8 @@ private Set<Individual> coveredNegatives; // the method by which quality was evaluated in this node - public enum QualityEvaluationMethod { TOP, REASONER, TOO_WEAK_LIST, OVERLY_GENERAL_LIST }; - private QualityEvaluationMethod qualityEvaluationMethod = QualityEvaluationMethod.TOP; + public enum QualityEvaluationMethod { START, REASONER, TOO_WEAK_LIST, OVERLY_GENERAL_LIST }; + private QualityEvaluationMethod qualityEvaluationMethod = QualityEvaluationMethod.START; // all properties of a node in the search tree private Description concept; Modified: trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedROLearner.java =================================================================== --- trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedROLearner.java 2008-03-09 17:49:11 UTC (rev 698) +++ trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedROLearner.java 2008-03-09 19:25:46 UTC (rev 699) @@ -599,6 +599,7 @@ // + nrOfNegativeExamples - bestNode.getCoveredNegatives().size())/(double)nrOfExamples); // Refinementoperator auf Konzept anwenden // String bestNodeString = "currently best node: " + bestNode + " accuracy: " + df.format(accuracy) + "%"; + System.out.println("start node: " + startNode.getShortDescription(nrOfPositiveExamples, nrOfNegativeExamples, baseURI)); String bestNodeString = "currently best node: " + bestNode.getShortDescription(nrOfPositiveExamples, nrOfNegativeExamples, baseURI); // searchTree += bestNodeString + "\n"; System.out.println(bestNodeString); @@ -612,6 +613,7 @@ // System.out.println("max. number of children of a node: " + maxNrOfChildren); System.out.println("subsumption time: " + Helper.prettyPrintNanoSeconds(rs.getSubsumptionReasoningTimeNs())); System.out.println("instance check time: " + Helper.prettyPrintNanoSeconds(rs.getInstanceCheckReasoningTimeNs())); + System.out.println("retrieval time: " + Helper.prettyPrintNanoSeconds(rs.getRetrievalReasoningTimeNs())); } if(computeBenchmarkInformation) { Modified: trunk/src/dl-learner/org/dllearner/algorithms/refexamples/MultiHeuristic.java =================================================================== --- trunk/src/dl-learner/org/dllearner/algorithms/refexamples/MultiHeuristic.java 2008-03-09 17:49:11 UTC (rev 698) +++ trunk/src/dl-learner/org/dllearner/algorithms/refexamples/MultiHeuristic.java 2008-03-09 19:25:46 UTC (rev 699) @@ -19,6 +19,10 @@ */ package org.dllearner.algorithms.refexamples; +import java.util.List; + +import org.dllearner.core.owl.BooleanValueRestriction; +import org.dllearner.core.owl.Description; import org.dllearner.utilities.ConceptComparator; /** @@ -73,13 +77,14 @@ private double expansionPenaltyFactor; private double gainBonusFactor; private double nodeChildPenalty = 0.0001; + private double startNodeBonus = 0.8; // examples private int nrOfNegativeExamples; private int nrOfExamples; public MultiHeuristic(int nrOfPositiveExamples, int nrOfNegativeExamples) { - this(nrOfPositiveExamples, nrOfNegativeExamples, 0.03, 0.5); + this(nrOfPositiveExamples, nrOfNegativeExamples, 0.02, 0.5); } public MultiHeuristic(int nrOfPositiveExamples, int nrOfNegativeExamples, double expansionPenaltyFactor, double gainBonusFactor) { @@ -113,8 +118,11 @@ if(parent != null) { double parentAccuracy = getAccuracy(parent.getCoveredPositives().size(),parent.getCoveredNegatives().size()); gain = accuracy - parentAccuracy; + } else { + accuracy += startNodeBonus; } - return accuracy + gainBonusFactor * gain - expansionPenaltyFactor * node.getHorizontalExpansion() - nodeChildPenalty * node.getChildren().size(); + int he = node.getHorizontalExpansion() - getHeuristicLengthBonus(node.getConcept()); + return accuracy + gainBonusFactor * gain - expansionPenaltyFactor * he - nodeChildPenalty * node.getChildren().size(); } private double getAccuracy(int coveredPositives, int coveredNegatives) { @@ -127,4 +135,18 @@ return multi.getNodeScore(node); } + // this function can be used to give some constructs a length bonus + // compared to their syntactic length + private static int getHeuristicLengthBonus(Description description) { + int bonus = 0; + + if(description instanceof BooleanValueRestriction) + bonus = 1; + + List<Description> children = description.getChildren(); + for(Description child : children) { + bonus += getHeuristicLengthBonus(child); + } + return bonus; + } } Modified: trunk/src/dl-learner/org/dllearner/core/owl/BooleanValueRestriction.java =================================================================== --- trunk/src/dl-learner/org/dllearner/core/owl/BooleanValueRestriction.java 2008-03-09 17:49:11 UTC (rev 698) +++ trunk/src/dl-learner/org/dllearner/core/owl/BooleanValueRestriction.java 2008-03-09 19:25:46 UTC (rev 699) @@ -50,5 +50,12 @@ public boolean getBooleanValue() { return booleanValue; } - + + /** + * Boolean value restrictions have length 2, because they encode two + * pieces of information: the property and the boolean value. + */ + public int getLength() { + return 2; + } } Modified: trunk/src/dl-learner/org/dllearner/core/owl/DatatypeValueRestriction.java =================================================================== --- trunk/src/dl-learner/org/dllearner/core/owl/DatatypeValueRestriction.java 2008-03-09 17:49:11 UTC (rev 698) +++ trunk/src/dl-learner/org/dllearner/core/owl/DatatypeValueRestriction.java 2008-03-09 19:25:46 UTC (rev 699) @@ -29,7 +29,7 @@ * @author Jens Lehmann * */ -public class DatatypeValueRestriction extends ValueRestriction { +public abstract class DatatypeValueRestriction extends ValueRestriction { public DatatypeValueRestriction(DatatypeProperty restrictedPropertyExpression, Constant value) { super(restrictedPropertyExpression, value); @@ -55,9 +55,12 @@ /* (non-Javadoc) * @see org.dllearner.core.owl.KBElement#getLength() */ - public int getLength() { - return 1 + restrictedPropertyExpression.getLength() + value.getLength(); - } + // we do not add the + 1 here because e.g. for boolean values we + // probably do not want to add it while for double value we may + // add it (because "<=" ">=" are possible while boolean has only "=") +// public int getLength() { +// return 1 + restrictedPropertyExpression.getLength() + value.getLength(); +// } /* (non-Javadoc) * @see org.dllearner.core.owl.KBElement#toString(java.lang.String, java.util.Map) Modified: trunk/src/dl-learner/org/dllearner/reasoning/OWLAPIReasoner.java =================================================================== --- trunk/src/dl-learner/org/dllearner/reasoning/OWLAPIReasoner.java 2008-03-09 17:49:11 UTC (rev 698) +++ trunk/src/dl-learner/org/dllearner/reasoning/OWLAPIReasoner.java 2008-03-09 19:25:46 UTC (rev 699) @@ -659,6 +659,31 @@ return map; } + public Map<Individual, SortedSet<Double>> getDoubleValues(DatatypeProperty datatypeProperty) { + OWLDataProperty prop = getOWLAPIDescription(datatypeProperty); + Map<Individual, SortedSet<Double>> map = new TreeMap<Individual, SortedSet<Double>>(); + for(Individual i : individuals) { + OWLIndividual ind = factory.getOWLIndividual(URI.create(i.getName())); + + // get all related individuals via OWL API + Set<OWLConstant> inds = null; + try { + inds = reasoner.getRelatedValues(ind, prop); + } catch (OWLReasonerException e) { + e.printStackTrace(); + } + + // convert data back to DL-Learner structures + SortedSet<Double> is = new TreeSet<Double>(); + for(OWLConstant oi : inds) { + Double d = Double.parseDouble(oi.getLiteral()); + is.add(d); + } + map.put(i, is); + } + return map; + } + @Override public Map<Individual, SortedSet<Constant>> getDatatypeMembers(DatatypeProperty datatypeProperty) { OWLDataProperty prop = getOWLAPIDescription(datatypeProperty); Modified: trunk/src/dl-learner/org/dllearner/refinementoperators/RhoDRDown.java =================================================================== --- trunk/src/dl-learner/org/dllearner/refinementoperators/RhoDRDown.java 2008-03-09 17:49:11 UTC (rev 698) +++ trunk/src/dl-learner/org/dllearner/refinementoperators/RhoDRDown.java 2008-03-09 19:25:46 UTC (rev 699) @@ -129,6 +129,7 @@ private boolean useExistsConstructor = true; private boolean useNegation = true; private boolean useBooleanDatatypes = true; + private boolean disjointChecks = true; private boolean instanceBasedDisjoints = true; // caches for reasoner queries @@ -164,18 +165,38 @@ dpDomains.put(dp, rs.getDomain(dp)); } - -// NamedClass struc = new NamedClass("http://dl-learner.org/carcinogenesis#Structure"); + /* + NamedClass struc = new NamedClass("http://dl-learner.org/carcinogenesis#Compound"); + ObjectProperty op = new ObjectProperty("http://dl-learner.org/carcinogenesis#hasAtom"); + ObjectAllRestriction oar = new ObjectAllRestriction(op,struc); + String str = "((\"http://dl-learner.org/carcinogenesis#amesTestPositive\" IS FALSE) OR ALL \"http://dl-learner.org/carcinogenesis#hasAtom\".TOP)"; + Description desc = null; + try { + desc = KBParser.parseConcept(str); + } catch (ParseException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } +// System.out.println(oar.getLength()); // computeTopRefinements(3,struc); // for(Description d : topARefinements.get(struc).get(1)) { // bei 3 ist noch alles OK, bei 4 seltsamer Fehler mit Union -// for(Description d : refine(Thing.instance,3,null,struc)) { + Set<Description> ds = refine(struc,10,null,struc); + Set<Description> improper = new HashSet<Description>(); + for(Description d : ds) { // if(d instanceof Union) -// System.out.println(d); -// } + if(rs.subsumes(d, struc)) { + improper.add(d); + System.out.println(d); + } + } // System.out.println(refine(Thing.instance,7,null,struc).size()); -// System.exit(0); + System.out.println(ds.size()); + System.out.println(improper.size()); + System.exit(0); + */ + if(startClass != null) this.startClass = startClass; } @@ -199,15 +220,16 @@ public Set<Description> refine(Description description, int maxLength, List<Description> knownRefinements, Description currDomain) { -// System.out.println(description + " " + currDomain); +// System.out.println(description + " " + currDomain + " " + maxLength); // actions needing to be performed if this is the first time the // current domain is used if(!(currDomain instanceof Thing) && !topARefinementsLength.containsKey(currDomain)) topARefinementsLength.put((NamedClass)currDomain, 0); - // TODO: check whether using list or set makes more sense + // check whether using list or set makes more sense // here; and whether HashSet or TreeSet should be used + // => TreeSet because duplicates are possible Set<Description> refinements = new TreeSet<Description>(conceptComparator); // used as temporary variable @@ -261,7 +283,8 @@ // (non-recursive variant because only depth 1 was modified) ConceptTransformation.cleanConceptNonRecursive(mc); ConceptTransformation.transformToOrderedNegationNormalFormNonRecursive(mc, conceptComparator); - + + refinements.add(mc); } } @@ -270,6 +293,8 @@ // refine one of the elements for(Description child : description.getChildren()) { +// System.out.println("union child: " + child + " " + maxLength + " " + description.getLength() + " " + child.getLength()); + // refine child tmp = refine(child, maxLength - description.getLength()+child.getLength(),null,currDomain); @@ -377,9 +402,10 @@ // this can avoid a lot of superfluous computation in the algorithm e.g. // when A1 looks good, so many refinements of the form (A1 OR (A2 AND A3)) // are generated which are all equal to A1 due to disjointness of A2 and A3 - if(c instanceof NamedClass && isDisjoint(description, c)) - // refinements.add(mc); + if(disjointChecks && c instanceof NamedClass && description instanceof NamedClass && isDisjoint(description, c)) { skip = true; +// System.out.println(c + " ignored when refining " + description); + } if(!skip) { Intersection mc = new Intersection(); @@ -545,14 +571,23 @@ SortedSet<Description> m1 = rs.getMoreSpecialConcepts(new Thing()); m.put(1,m1); + SortedSet<Description> m2 = new TreeSet<Description>(conceptComparator); if(useNegation) { Set<Description> m2tmp = rs.getMoreGeneralConcepts(new Nothing()); - SortedSet<Description> m2 = new TreeSet<Description>(conceptComparator); for(Description c : m2tmp) { m2.add(new Negation(c)); } - m.put(2,m2); } + + // boolean datatypes, e.g. testPositive = true + if(useBooleanDatatypes) { + Set<DatatypeProperty> booleanDPs = rs.getBooleanDatatypeProperties(); + for(DatatypeProperty dp : booleanDPs) { + m2.add(new BooleanValueRestriction(dp,true)); + m2.add(new BooleanValueRestriction(dp,false)); + } + } + m.put(2,m2); SortedSet<Description> m3 = new TreeSet<Description>(conceptComparator); if(useExistsConstructor) { @@ -571,15 +606,6 @@ } } - // boolean datatypes, e.g. testPositive = true - if(useBooleanDatatypes) { - Set<DatatypeProperty> booleanDPs = rs.getBooleanDatatypeProperties(); - for(DatatypeProperty dp : booleanDPs) { - m3.add(new BooleanValueRestriction(dp,true)); - m3.add(new BooleanValueRestriction(dp,false)); - } - } - m.put(3,m3); mComputationTimeNs += System.nanoTime() - mComputationTimeStartNs; @@ -600,6 +626,7 @@ SortedSet<Description> m1 = rs.getMoreSpecialConcepts(nc); mA.get(nc).put(1,m1); + SortedSet<Description> m2 = new TreeSet<Description>(conceptComparator); if(useNegation) { // the definition in the paper is more complex, but acutally // we only have to insert the most specific concepts satisfying @@ -608,7 +635,7 @@ // subClassOf A and thus: if A and B are disjoint then also A' // and B; if not A AND B = B then also not A' AND B = B SortedSet<Description> m2tmp = rs.getMoreGeneralConcepts(new Nothing()); - SortedSet<Description> m2 = new TreeSet<Description>(conceptComparator); + for(Description c : m2tmp) { if(c instanceof Thing) m2.add(c); @@ -617,13 +644,23 @@ if(!isNotADisjoint(a, nc) && isNotAMeaningful(a, nc)) m2.add(new Negation(a)); } + } + } + + // compute applicable properties + computeMg(nc); + + // boolean datatypes, e.g. testPositive = true + if(useBooleanDatatypes) { + Set<DatatypeProperty> booleanDPs = mgbd.get(nc); + for(DatatypeProperty dp : booleanDPs) { + m2.add(new BooleanValueRestriction(dp,true)); + m2.add(new BooleanValueRestriction(dp,false)); } - mA.get(nc).put(2,m2); } + + mA.get(nc).put(2,m2); - // compute applicable properties - computeMg(nc); - SortedSet<Description> m3 = new TreeSet<Description>(conceptComparator); if(useExistsConstructor) { for(ObjectProperty r : mgr.get(nc)) { @@ -640,15 +677,6 @@ } } - // boolean datatypes, e.g. testPositive = true - if(useBooleanDatatypes) { - Set<DatatypeProperty> booleanDPs = mgbd.get(nc); - for(DatatypeProperty dp : booleanDPs) { - m3.add(new BooleanValueRestriction(dp,true)); - m3.add(new BooleanValueRestriction(dp,false)); - } - } - mA.get(nc).put(3,m3); mComputationTimeNs += System.nanoTime() - mComputationTimeStartNs; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |