From: <lor...@us...> - 2012-05-02 15:25:24
|
Revision: 3675 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3675&view=rev Author: lorenz_b Date: 2012-05-02 14:52:45 +0000 (Wed, 02 May 2012) Log Message: ----------- Refactored measure for disjointness. Added popularity precomputation for classes. Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/DisjointClassesLearner.java trunk/components-core/src/main/java/org/dllearner/kb/sparql/SPARQLTasks.java trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/DisjointClassesLearner.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/DisjointClassesLearner.java 2012-05-02 09:26:50 UTC (rev 3674) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/DisjointClassesLearner.java 2012-05-02 14:52:45 UTC (rev 3675) @@ -21,6 +21,7 @@ import java.net.URL; import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -44,6 +45,7 @@ import org.dllearner.core.owl.NamedClass; import org.dllearner.kb.LocalModelBasedSparqlEndpointKS; import org.dllearner.kb.SparqlEndpointKS; +import org.dllearner.kb.sparql.SparqlEndpoint; import org.dllearner.learningproblems.AxiomScore; import org.dllearner.learningproblems.Heuristics; import org.slf4j.Logger; @@ -84,6 +86,8 @@ private Set<NamedClass> allClasses; + private int popularity; + public DisjointClassesLearner(SparqlEndpointKS ks){ this.ks = ks; } @@ -119,9 +123,13 @@ fetchedRows = 0; currentlyBestEvaluatedDescriptions = new ArrayList<EvaluatedDescription>(); - //TODO + //we return here if the class contains no instances + popularity = reasoner.getPopularity(classToDescribe); + if(popularity == 0){ + return; + } - //at first get all existing classes in knowledgebase + //at first get all existing classes in knowledge base allClasses = getAllClasses(); allClasses.remove(classToDescribe); @@ -316,17 +324,24 @@ //firstly, create disjoint classexpressions which not occur and give score of 1 for(NamedClass cls : completeDisjointclasses){ if(useClassPopularity){ - int popularity = 0; + int overlap = 0; + int pop; if(ks.isRemote()){ - popularity = reasoner.getIndividualsCount(cls); + pop = reasoner.getPopularity(cls); } else { - popularity = ((LocalModelBasedSparqlEndpointKS)ks).getModel().getOntClass(cls.getName()).listInstances().toSet().size(); + pop = ((LocalModelBasedSparqlEndpointKS)ks).getModel().getOntClass(cls.getName()).listInstances().toSet().size(); } //we skip classes with no instances - if(popularity == 0) continue; - double[] confidenceInterval = Heuristics.getConfidenceInterval95Wald(popularity, 0); - double accuracy = (confidenceInterval[0] + confidenceInterval[1]) / 2; - evalDesc = new EvaluatedDescription(cls, new AxiomScore(1- accuracy)); + if(pop == 0) continue; + + //we compute the estimated precision + double precision = accuracy(pop, overlap); + //we compute the estimated recall + double recall = accuracy(popularity, overlap); + //compute the overall score + double score = 1 - fMEasure(precision, recall); + + evalDesc = new EvaluatedDescription(cls, new AxiomScore(score)); } else { evalDesc = new EvaluatedDescription(cls, new AxiomScore(1)); } @@ -335,23 +350,51 @@ } //secondly, create disjoint classexpressions with score 1 - (#occurence/#all) - for(Entry<NamedClass, Integer> entry : sortByValues(class2Count)){ - //drop classes from OWL and RDF namespace - if(entry.getKey().getName().startsWith(OWL2.getURI()) || entry.getKey().getName().startsWith(RDF.getURI()))continue; -// evalDesc = new EvaluatedDescription(entry.getKey(), -// new AxiomScore(1 - (entry.getValue() / (double)all))); - double[] confidenceInterval = Heuristics.getConfidenceInterval95Wald(total, entry.getValue()); - double accuracy = (confidenceInterval[0] + confidenceInterval[1]) / 2; - evalDesc = new EvaluatedDescription(entry.getKey(), - new AxiomScore(1 - accuracy)); - evalDescs.add(evalDesc); + for (Entry<NamedClass, Integer> entry : sortByValues(class2Count)) { + NamedClass cls = entry.getKey(); + // drop classes from OWL and RDF namespace + if (cls.getName().startsWith(OWL2.getURI()) || cls.getName().startsWith(RDF.getURI())) + continue; + if (useClassPopularity) { + int overlap = entry.getValue(); + int pop; + if (ks.isRemote()) { + pop = reasoner.getPopularity(cls); + } else { + pop = ((LocalModelBasedSparqlEndpointKS) ks).getModel() + .getOntClass(cls.getName()).listInstances().toSet() + .size(); + } + // we skip classes with no instances + if (pop == 0) + continue; + + // we compute the estimated precision + double precision = accuracy(pop, overlap); + // we compute the estimated recall + double recall = accuracy(popularity, overlap); + // compute the overall score + double score = 1 - fMEasure(precision, recall); + + evalDesc = new EvaluatedDescription(cls, new AxiomScore(score)); + } else { + evalDesc = new EvaluatedDescription(cls, new AxiomScore(1)); + } } class2Count.put(classToDescribe, total); return evalDescs; } + private double accuracy(int total, int success){ + double[] confidenceInterval = Heuristics.getConfidenceInterval95Wald(total, success); + return (confidenceInterval[0] + confidenceInterval[1]) / 2; + } + private double fMEasure(double precision, double recall){ + return 2 * precision * recall / (precision + recall); + } + private void keepMostGeneralClasses(Set<NamedClass> classes){ if(ks.isRemote()){ if(reasoner.isPrepared()){ @@ -384,12 +427,14 @@ } public static void main(String[] args) throws Exception{ -// SparqlEndpointKS ks = new SparqlEndpointKS(new SparqlEndpoint(new URL("http://dbpedia.aksw.org:8902/sparql"), Collections.singletonList("http://dbpedia.org"), Collections.<String>emptyList())); - SparqlEndpointKS ks = new LocalModelBasedSparqlEndpointKS(new URL("http://dl-learner.svn.sourceforge.net/viewvc/dl-learner/trunk/examples/swore/swore.rdf?revision=2217")); + SparqlEndpointKS ks = new SparqlEndpointKS(new SparqlEndpoint(new URL("http://dbpedia.aksw.org:8902/sparql"), Collections.singletonList("http://dbpedia.org"), Collections.<String>emptyList())); + ks = new LocalModelBasedSparqlEndpointKS(new URL("http://dl-learner.svn.sourceforge.net/viewvc/dl-learner/trunk/examples/swore/swore.rdf?revision=2217")); + ks = new SparqlEndpointKS(SparqlEndpoint.getEndpointDBpediaLiveAKSW()); DisjointClassesLearner l = new DisjointClassesLearner(ks); - l.setClassToDescribe(new NamedClass("http://ns.softwiki.de/req/CustomerRequirement")); + l.setClassToDescribe(new NamedClass("http://dbpedia.org/ontology/Book")); l.init(); -// l.getReasoner().prepareSubsumptionHierarchy(); + l.getReasoner().prepareSubsumptionHierarchy(); + l.getReasoner().precomputeClassPopularity(); // System.out.println(l.getReasoner().getClassHierarchy().getSubClasses(new NamedClass("http://dbpedia.org/ontology/Athlete"), false));System.exit(0); l.start(); Modified: trunk/components-core/src/main/java/org/dllearner/kb/sparql/SPARQLTasks.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/kb/sparql/SPARQLTasks.java 2012-05-02 09:26:50 UTC (rev 3674) +++ trunk/components-core/src/main/java/org/dllearner/kb/sparql/SPARQLTasks.java 2012-05-02 14:52:45 UTC (rev 3675) @@ -702,6 +702,10 @@ public Set<NamedClass> getAllClasses() { Set<NamedClass> classes = new TreeSet<NamedClass>(); String query = "PREFIX owl: <http://www.w3.org/2002/07/owl#> SELECT ?c WHERE {?c a owl:Class} LIMIT 1000"; + /* + * String query = "PREFIX owl: <http://www.w3.org/2002/07/owl#> PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#> " + + "SELECT ?c WHERE {{?c a owl:Class} UNION {?c rdfs:subClassOf ?d} UNION {?d rdfs:subClassOf ?c}} LIMIT 1000"; + */ SparqlQuery sq = new SparqlQuery(query, sparqlEndpoint); ResultSet q = sq.send(false); while (q.hasNext()) { Modified: trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java 2012-05-02 09:26:50 UTC (rev 3674) +++ trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java 2012-05-02 14:52:45 UTC (rev 3675) @@ -93,7 +93,9 @@ private ClassHierarchy hierarchy; private OntModel model; + private Map<NamedClass, Integer> classPopularityMap; + public SPARQLReasoner(SparqlEndpointKS ks) { this.ks = ks; @@ -111,6 +113,36 @@ this.model = model; } + public void precomputeClassPopularity(){ + logger.info("Precomputing class popularity ..."); + classPopularityMap = new HashMap<NamedClass, Integer>(); + + Set<NamedClass> classes = new SPARQLTasks(ks.getEndpoint()).getAllClasses(); + String queryTemplate = "SELECT (COUNT(*) AS ?cnt) WHERE {?s a <%s>}"; + + ResultSet rs; + for(NamedClass nc : classes){ + rs = executeSelectQuery(String.format(queryTemplate, nc.getName())); + int cnt = rs.next().getLiteral("cnt").getInt(); + classPopularityMap.put(nc, cnt); + } + } + + public int getPopularity(NamedClass nc){ + if(classPopularityMap.containsKey(nc)){ + return classPopularityMap.get(nc); + } else { + System.out.println("Cache miss: " + nc); + String queryTemplate = "SELECT (COUNT(*) AS ?cnt) WHERE {?s a <%s>}"; + + ResultSet rs = executeSelectQuery(String.format(queryTemplate, nc.getName())); + int cnt = rs.next().getLiteral("cnt").getInt(); + classPopularityMap.put(nc, cnt); + return cnt; + } + + } + public final ClassHierarchy prepareSubsumptionHierarchy() { logger.info("Preparing subsumption hierarchy ..."); long startTime = System.currentTimeMillis(); @@ -915,6 +947,10 @@ this.cache = cache; } + public void setUseCache(boolean useCache) { + this.useCache = useCache; + } + private boolean executeAskQuery(String query){ boolean ret; if(ks.isRemote()){ This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |