From: <lor...@us...> - 2012-07-16 12:52:49
|
Revision: 3794 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3794&view=rev Author: lorenz_b Date: 2012-07-16 12:52:38 +0000 (Mon, 16 Jul 2012) Log Message: ----------- Added class to compute PMI. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java Added Paths: ----------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/PMI.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-07-16 07:10:22 UTC (rev 3793) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-07-16 12:52:38 UTC (rev 3794) @@ -155,6 +155,8 @@ private Set<String> relevantKeywords; + private boolean useDomainRangeRestriction = true; + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex){ this(endpoint, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger()); } @@ -305,6 +307,10 @@ reasoner = new SPARQLReasoner(new SparqlEndpointKS(endpoint)); } + public void setUseDomainRangeRestriction(boolean useDomainRangeRestriction) { + this.useDomainRangeRestriction = useDomainRangeRestriction; + } + /* * Only for Evaluation useful. */ @@ -619,31 +625,66 @@ Query q = new Query(query.getQuery()); boolean drop = false; - if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.SYMPROPERTY){ - for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ - String objectVar = triple.getValue().getName(); - String subjectVar = triple.getVariable().getName(); -// System.out.println(triple); - for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(objectVar)){ -// System.out.println(typeTriple); - if(true){//reasoner.isObjectProperty(a.getUri())){ - Description range = reasoner.getRange(new ObjectProperty(a.getUri())); + if(useDomainRangeRestriction){ + if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.SYMPROPERTY){ + for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ + String objectVar = triple.getValue().getName(); + String subjectVar = triple.getVariable().getName(); +// System.out.println(triple); + for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(objectVar)){ +// System.out.println(typeTriple); + if(true){//reasoner.isObjectProperty(a.getUri())){ + Description range = reasoner.getRange(new ObjectProperty(a.getUri())); +// System.out.println(a); + if(range != null){ + Set<Description> allRanges = new HashSet<Description>(); + SortedSet<Description> superClasses; + if(range instanceof NamedClass){ + superClasses = reasoner.getSuperClasses(range); + allRanges.addAll(superClasses); + } else { + for(Description nc : range.getChildren()){ + superClasses = reasoner.getSuperClasses(nc); + allRanges.addAll(superClasses); + } + } + allRanges.add(range); + allRanges.remove(new NamedClass(Thing.instance.getURI())); + + Set<Description> allTypes = new HashSet<Description>(); + String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); + Description type = new NamedClass(typeURI); + superClasses = reasoner.getSuperClasses(type); + allTypes.addAll(superClasses); + allTypes.add(type); + + if(!org.mindswap.pellet.utils.SetUtils.intersects(allRanges, allTypes)){ + drop = true; + } + } + } else { + drop = true; + } + + } + for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(subjectVar)){ + Description domain = reasoner.getDomain(new ObjectProperty(a.getUri())); // System.out.println(a); - if(range != null){ - Set<Description> allRanges = new HashSet<Description>(); + if(domain != null){ + Set<Description> allDomains = new HashSet<Description>(); SortedSet<Description> superClasses; - if(range instanceof NamedClass){ - superClasses = reasoner.getSuperClasses(range); - allRanges.addAll(superClasses); + if(domain instanceof NamedClass){ + superClasses = reasoner.getSuperClasses(domain); + allDomains.addAll(superClasses); } else { - for(Description nc : range.getChildren()){ + for(Description nc : domain.getChildren()){ superClasses = reasoner.getSuperClasses(nc); - allRanges.addAll(superClasses); + allDomains.addAll(superClasses); } } - allRanges.add(range); - allRanges.remove(new NamedClass(Thing.instance.getURI())); - + allDomains.add(domain); + allDomains.remove(new NamedClass(Thing.instance.getURI())); + Set<Description> allTypes = new HashSet<Description>(); String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); Description type = new NamedClass(typeURI); @@ -651,46 +692,13 @@ allTypes.addAll(superClasses); allTypes.add(type); - if(!org.mindswap.pellet.utils.SetUtils.intersects(allRanges, allTypes)){ + if(!org.mindswap.pellet.utils.SetUtils.intersects(allDomains, allTypes)){ drop = true; - } - } - } else { - drop = true; - } - - } - for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(subjectVar)){ - Description domain = reasoner.getDomain(new ObjectProperty(a.getUri())); -// System.out.println(a); - if(domain != null){ - Set<Description> allDomains = new HashSet<Description>(); - SortedSet<Description> superClasses; - if(domain instanceof NamedClass){ - superClasses = reasoner.getSuperClasses(domain); - allDomains.addAll(superClasses); - } else { - for(Description nc : domain.getChildren()){ - superClasses = reasoner.getSuperClasses(nc); - allDomains.addAll(superClasses); + System.err.println("DROPPING: \n" + q.toString()); + } else { + } } - allDomains.add(domain); - allDomains.remove(new NamedClass(Thing.instance.getURI())); - - Set<Description> allTypes = new HashSet<Description>(); - String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); - Description type = new NamedClass(typeURI); - superClasses = reasoner.getSuperClasses(type); - allTypes.addAll(superClasses); - allTypes.add(type); - - if(!org.mindswap.pellet.utils.SetUtils.intersects(allDomains, allTypes)){ - drop = true; -// System.err.println("DROPPING: \n" + q.toString()); - } else { - - } } } } Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/PMI.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/PMI.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/PMI.java 2012-07-16 12:52:38 UTC (rev 3794) @@ -0,0 +1,187 @@ +package org.dllearner.algorithm.tbsl.util; + +import java.util.HashMap; +import java.util.Map; + +import org.dllearner.core.owl.NamedClass; +import org.dllearner.core.owl.ObjectProperty; +import org.dllearner.core.owl.Property; +import org.dllearner.kb.sparql.ExtractionDBCache; +import org.dllearner.kb.sparql.SparqlEndpoint; +import org.dllearner.kb.sparql.SparqlQuery; + +import com.hp.hpl.jena.query.QuerySolution; +import com.hp.hpl.jena.query.ResultSet; + +public class PMI { + + private SparqlEndpoint endpoint; + private ExtractionDBCache cache; + + public PMI(SparqlEndpoint endpoint, ExtractionDBCache cache) { + this.endpoint = endpoint; + this.cache = cache; + } + + public double getDirectedPMI(ObjectProperty prop, NamedClass cls){ + System.out.println(String.format("Computing PMI(%s, %s)", prop, cls)); + String query = String.format("SELECT (COUNT(?x) AS ?cnt) WHERE {?x a <%s>}", cls.getName()); + ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); + double classOccurenceCnt = rs.next().getLiteral("cnt").getInt(); + System.out.println("Class occurence: " + classOccurenceCnt); + + query = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?s <%s> ?o}", prop.getName()); + rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); + double propertyOccurenceCnt = rs.next().getLiteral("cnt").getInt(); + System.out.println("Property occurence: " + propertyOccurenceCnt); + + query = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?s <%s> ?o. ?o a <%s>}", prop.getName(), cls.getName()); + rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); + double coOccurenceCnt = rs.next().getLiteral("cnt").getInt(); + System.out.println("Co-occurence: " + coOccurenceCnt); + + query = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?s ?p ?o}"); + rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); + double total = rs.next().getLiteral("cnt").getInt(); + System.out.println("Total: " + total); + + if(classOccurenceCnt == 0 || propertyOccurenceCnt == 0 || coOccurenceCnt == 0){ + return 0; + } + + double pmi = Math.log( (coOccurenceCnt * total) / (classOccurenceCnt * propertyOccurenceCnt) ); + + return pmi; + } + + public double getDirectedPMI(NamedClass cls, Property prop){ + System.out.println(String.format("Computing PMI(%s, %s)", cls, prop)); + String query = String.format("SELECT (COUNT(?x) AS ?cnt) WHERE {?x a <%s>}", cls.getName()); + ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); + double classOccurenceCnt = rs.next().getLiteral("cnt").getInt(); + System.out.println("Class occurence: " + classOccurenceCnt); + + query = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?s <%s> ?o}", prop.getName()); + rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); + double propertyOccurenceCnt = rs.next().getLiteral("cnt").getInt(); + System.out.println("Property occurence: " + propertyOccurenceCnt); + + query = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?s a <%s>. ?s <%s> ?o}", cls.getName(), prop.getName()); + rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); + double coOccurenceCnt = rs.next().getLiteral("cnt").getInt(); + System.out.println("Co-occurence: " + coOccurenceCnt); + + query = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?s ?p ?o}"); + rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); + double total = rs.next().getLiteral("cnt").getInt(); + System.out.println("Total: " + total); + + if(classOccurenceCnt == 0 || propertyOccurenceCnt == 0 || coOccurenceCnt == 0){ + return 0; + } + + double pmi = Math.log( (coOccurenceCnt * total) / (classOccurenceCnt * propertyOccurenceCnt) ); + + return pmi; + } + + /** + * Returns the direction of the given triple, computed by calculated the PMI values of each combination. + * @param subject + * @param predicate + * @param object + * @return -1 if the given triple should by reversed, else 1. + */ + public int getDirection(NamedClass subject, ObjectProperty predicate, NamedClass object){ + System.out.println(String.format("Computing direction between [%s, %s, %s]", subject, predicate, object)); + double pmi_obj_pred = getDirectedPMI(object, predicate);System.out.println("PMI(OBJECT, PREDICATE): " + pmi_obj_pred); + double pmi_pred_subj = getDirectedPMI(predicate, subject);System.out.println("PMI(PREDICATE, SUBJECT): " + pmi_pred_subj); + double pmi_subj_pred = getDirectedPMI(subject, predicate);System.out.println("PMI(SUBJECT, PREDICATE): " + pmi_subj_pred); + double pmi_pred_obj = getDirectedPMI(predicate, object);System.out.println("PMI(PREDICATE, OBJECT): " + pmi_pred_obj); + + double threshold = 2.0; + + double value = ((pmi_obj_pred + pmi_pred_subj) - (pmi_subj_pred + pmi_pred_obj)); + System.out.println("(PMI(OBJECT, PREDICATE) + PMI(PREDICATE, SUBJECT)) - (PMI(SUBJECT, PREDICATE) + PMI(PREDICATE, OBJECT)) = " + value); + + if( value > threshold){ + System.out.println(object + "---" + predicate + "--->" + subject); + return -1; + } else { + System.out.println(subject + "---" + predicate + "--->" + object); + return 1; + } + } + + public Map<ObjectProperty, Integer> getMostFrequentProperties(NamedClass cls1, NamedClass cls2){ + Map<ObjectProperty, Integer> prop2Cnt = new HashMap<ObjectProperty, Integer>(); + String query = String.format("SELECT ?p (COUNT(*) AS ?cnt) WHERE {?x1 a <%s>. ?x2 a <%s>. ?x1 ?p ?x2} GROUP BY ?p", cls1, cls2); + ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); + QuerySolution qs; + while(rs.hasNext()){ + qs = rs.next(); + ObjectProperty p = new ObjectProperty(qs.getResource("p").getURI()); + int cnt = qs.getLiteral("cnt").getInt(); + prop2Cnt.put(p, cnt); + } + return prop2Cnt; + } + + public static void main(String[] args) { + SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpedia(); + ExtractionDBCache cache = new ExtractionDBCache("cache"); + String NS = "http://dbpedia.org/ontology/"; + + PMI pmiGen = new PMI(endpoint, cache); + System.out.println(pmiGen.getDirectedPMI( + new ObjectProperty(NS + "author"), + new NamedClass(NS+ "Person"))); + + System.out.println("#########################################"); + + System.out.println(pmiGen.getDirectedPMI( + new ObjectProperty(NS + "author"), + new NamedClass(NS+ "Writer"))); + + System.out.println("#########################################"); + + System.out.println(pmiGen.getDirectedPMI( + new NamedClass(NS+ "Book"), + new ObjectProperty(NS + "author")) + ); + + System.out.println("#########################################"); + + System.out.println(pmiGen.getDirection( + new NamedClass(NS+ "Writer"), + new ObjectProperty(NS + "author"), + new NamedClass(NS+ "Book"))); + + System.out.println("#########################################"); + + System.out.println(pmiGen.getDirection( + new NamedClass(NS+ "Person"), + new ObjectProperty(NS + "starring"), + new NamedClass(NS+ "Film"))); + + System.out.println("#########################################"); + + System.out.println(pmiGen.getMostFrequentProperties( + new NamedClass(NS+ "Person"), + new NamedClass(NS+ "Film"))); + + System.out.println("#########################################"); + + System.out.println(pmiGen.getMostFrequentProperties( + new NamedClass(NS+ "Film"), + new NamedClass(NS+ "Actor"))); + + System.out.println("#########################################"); + + System.out.println(pmiGen.getMostFrequentProperties( + new NamedClass(NS+ "Film"), + new NamedClass(NS+ "Person"))); + + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |