From: <lor...@us...> - 2012-06-28 13:44:58
|
Revision: 3766 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3766&view=rev Author: lorenz_b Date: 2012-06-28 13:44:49 +0000 (Thu, 28 Jun 2012) Log Message: ----------- Added object to precompute popularity of entities. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/QTL.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java Added Paths: ----------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/PopularityMap.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/QTL.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/QTL.java 2012-06-26 14:34:56 UTC (rev 3765) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/QTL.java 2012-06-28 13:44:49 UTC (rev 3766) @@ -285,7 +285,7 @@ private String getDistinctSPARQLQuery(QueryTree<String> tree){ String query = tree.toSPARQLQueryString(); - query = "SELECT DISTINCT " + query.substring(7); +// query = "SELECT DISTINCT " + query.substring(7); return query; } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-06-26 14:34:56 UTC (rev 3765) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-06-28 13:44:49 UTC (rev 3766) @@ -46,6 +46,7 @@ import org.dllearner.algorithm.tbsl.sparql.WeightedQuery; import org.dllearner.algorithm.tbsl.templator.Templator; import org.dllearner.algorithm.tbsl.util.Knowledgebase; +import org.dllearner.algorithm.tbsl.util.PopularityMap; import org.dllearner.algorithm.tbsl.util.Similarity; import org.dllearner.common.index.Index; import org.dllearner.common.index.IndexResultItem; @@ -139,6 +140,8 @@ private String [] grammarFiles = new String[]{"tbsl/lexicon/english.lex"}; + private PopularityMap popularityMap; + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex){ this(endpoint, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger()); } @@ -777,33 +780,46 @@ } private double getProminenceValue(String uri, SlotType type){ - int cnt = 1; - String query = null; - if(type == SlotType.CLASS){ - query = "SELECT COUNT(?s) WHERE {?s a <%s>}"; - } else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY - || type == SlotType.DATATYPEPROPERTY || type == SlotType.OBJECTPROPERTY){ - query = "SELECT COUNT(*) WHERE {?s <%s> ?o}"; - } else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){ - query = "SELECT COUNT(*) WHERE {?s ?p <%s>}"; + Integer popularity = null; + if(popularityMap != null){ + if(type == SlotType.CLASS || type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY + || type == SlotType.DATATYPEPROPERTY || type == SlotType.OBJECTPROPERTY){ + popularity = popularityMap.getPopularity(uri); + } + } + if(popularity == null){ + String query = null; + if(type == SlotType.CLASS){ + query = "SELECT COUNT(?s) WHERE {?s a <%s>}"; + } else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY + || type == SlotType.DATATYPEPROPERTY || type == SlotType.OBJECTPROPERTY){ + query = "SELECT COUNT(*) WHERE {?s <%s> ?o}"; + } else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){ + query = "SELECT COUNT(*) WHERE {?s ?p <%s>}"; + } + query = String.format(query, uri); + + ResultSet rs = executeSelect(query); + QuerySolution qs; + String projectionVar; + while(rs.hasNext()){ + qs = rs.next(); + projectionVar = qs.varNames().next(); + popularity = qs.get(projectionVar).asLiteral().getInt(); + } } - query = String.format(query, uri); - ResultSet rs = executeSelect(query); - QuerySolution qs; - String projectionVar; - while(rs.hasNext()){ - qs = rs.next(); - projectionVar = qs.varNames().next(); - cnt = qs.get(projectionVar).asLiteral().getInt(); - } + // if(cnt == 0){ // return 0; // } // return Math.log(cnt); - return cnt; + return popularity; } + public void setPopularityMap(PopularityMap popularityMap) { + this.popularityMap = popularityMap; + } private List<String> pruneList(List<String> words){ Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/PopularityMap.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/PopularityMap.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/PopularityMap.java 2012-06-28 13:44:49 UTC (rev 3766) @@ -0,0 +1,163 @@ +package org.dllearner.algorithm.tbsl.util; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.util.HashMap; +import java.util.Map; + +import org.dllearner.core.owl.DatatypeProperty; +import org.dllearner.core.owl.NamedClass; +import org.dllearner.core.owl.ObjectProperty; +import org.dllearner.kb.sparql.ExtractionDBCache; +import org.dllearner.kb.sparql.SPARQLTasks; +import org.dllearner.kb.sparql.SparqlEndpoint; +import org.dllearner.kb.sparql.SparqlQuery; + +import com.hp.hpl.jena.query.QuerySolution; +import com.hp.hpl.jena.query.ResultSet; + +public class PopularityMap { + + enum EntityType { + CLASS, PROPERTY, RESOURCE + } + + private SparqlEndpoint endpoint; + private ExtractionDBCache cache; + private String file; + + private Map<String, Integer> class2Popularity = new HashMap<String, Integer>(); + private Map<String, Integer> property2Popularity = new HashMap<String, Integer>(); + private Map<String, Integer> resource2Popularity = new HashMap<String, Integer>(); + + public PopularityMap(String file, SparqlEndpoint endpoint, ExtractionDBCache cache) { + this.file = file; + this.endpoint = endpoint; + this.cache = cache; + + } + + public void init() { + boolean deserialized = deserialize(); + if(!deserialized){ + // load popularity of classes + for (NamedClass nc : new SPARQLTasks(endpoint).getAllClasses()) { + System.out.println("Computing popularity for " + nc); + String query = String.format("SELECT COUNT(?s) WHERE {?s a <%s>}", nc.getName()); + int popularity = loadPopularity(query); + class2Popularity.put(nc.getName(), Integer.valueOf(popularity)); + } + // load popularity of properties + for (ObjectProperty op : new SPARQLTasks(endpoint).getAllObjectProperties()) { + System.out.println("Computing popularity for " + op); + String query = String.format("SELECT COUNT(*) WHERE {?s <%s> ?o}", op.getName()); + int popularity = loadPopularity(query); + class2Popularity.put(op.getName(), Integer.valueOf(popularity)); + } + for (DatatypeProperty dp : new SPARQLTasks(endpoint).getAllDataProperties()) { + System.out.println("Computing popularity for " + dp); + String query = String.format("SELECT COUNT(*) WHERE {?s <%s> ?o}", dp.getName()); + int popularity = loadPopularity(query); + class2Popularity.put(dp.getName(), Integer.valueOf(popularity)); + } + serialize(); + } + } + + private void serialize(){ + ObjectOutputStream oos = null; + try { + oos = new ObjectOutputStream(new FileOutputStream(new File(file))); + oos.writeObject(class2Popularity); + } catch (FileNotFoundException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } finally { + if(oos != null){ + try { + oos.close(); + } catch (IOException e) { + e.printStackTrace(); + } + } + + } + } + + private boolean deserialize(){ + File mapFile = new File(file); + if(mapFile.exists()){ + ObjectInputStream ois = null; + try { + ois = new ObjectInputStream(new FileInputStream(new File(file))); + class2Popularity = (Map<String, Integer>) ois.readObject(); + } catch (FileNotFoundException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } catch (ClassNotFoundException e) { + e.printStackTrace(); + } finally { + if(ois != null){ + try { + ois.close(); + } catch (IOException e) { + e.printStackTrace(); + } + } + + } + return true; + } + return false; + } + + private int loadPopularity(String query){ + int pop = 0; + ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); + QuerySolution qs; + String projectionVar; + while(rs.hasNext()){ + qs = rs.next(); + projectionVar = qs.varNames().next(); + pop = qs.get(projectionVar).asLiteral().getInt(); + } + return pop; + } + + public int getPopularity(String uri, EntityType entityType){ + Integer popularity; + if(entityType == EntityType.CLASS){ + popularity = class2Popularity.get(uri); + } else if(entityType == EntityType.PROPERTY){ + popularity = property2Popularity.get(uri); + } else { + popularity = resource2Popularity.get(uri); + } + return popularity; + } + + public Integer getPopularity(String uri){ + Integer popularity = class2Popularity.get(uri); + if(popularity == null){ + popularity = property2Popularity.get(uri); + } + if(popularity == null){ + popularity = resource2Popularity.get(uri); + } + return popularity; + } + + public static void main(String[] args) { + new PopularityMap("dbpedia_popularity.map", SparqlEndpoint.getEndpointDBpedia(), new ExtractionDBCache("cache")).init(); + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |