From: <jen...@us...> - 2009-05-22 14:14:39
|
Revision: 1766 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=1766&view=rev Author: jenslehmann Date: 2009-05-22 13:47:11 +0000 (Fri, 22 May 2009) Log Message: ----------- DBpedia->LGD city matching Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/kb/sparql/SPARQLTasks.java trunk/src/dl-learner/org/dllearner/learningproblems/PosOnlyLP.java trunk/src/dl-learner/org/dllearner/refinementoperators/RhoDRDown.java trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java trunk/src/dl-learner/org/dllearner/scripts/matching/Evaluation.java trunk/src/dl-learner/org/dllearner/scripts/matching/LGDPoint.java trunk/src/dl-learner/org/dllearner/scripts/matching/LearnOSMClasses.java Modified: trunk/src/dl-learner/org/dllearner/kb/sparql/SPARQLTasks.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/sparql/SPARQLTasks.java 2009-05-21 08:46:28 UTC (rev 1765) +++ trunk/src/dl-learner/org/dllearner/kb/sparql/SPARQLTasks.java 2009-05-22 13:47:11 UTC (rev 1766) @@ -28,7 +28,6 @@ import org.dllearner.utilities.datastructures.StringTuple; import org.dllearner.utilities.owl.OWLVocabulary; -import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.query.ResultSetFactory; import com.hp.hpl.jena.query.ResultSetFormatter; Modified: trunk/src/dl-learner/org/dllearner/learningproblems/PosOnlyLP.java =================================================================== --- trunk/src/dl-learner/org/dllearner/learningproblems/PosOnlyLP.java 2009-05-21 08:46:28 UTC (rev 1765) +++ trunk/src/dl-learner/org/dllearner/learningproblems/PosOnlyLP.java 2009-05-22 13:47:11 UTC (rev 1766) @@ -28,10 +28,8 @@ import java.util.SortedSet; import java.util.TreeSet; -import org.dllearner.core.EvaluatedDescription; import org.dllearner.core.LearningProblem; import org.dllearner.core.ReasonerComponent; -import org.dllearner.core.configurators.PosNegLPStandardConfigurator; import org.dllearner.core.configurators.PosOnlyLPConfigurator; import org.dllearner.core.options.CommonConfigMappings; import org.dllearner.core.options.ConfigEntry; Modified: trunk/src/dl-learner/org/dllearner/refinementoperators/RhoDRDown.java =================================================================== --- trunk/src/dl-learner/org/dllearner/refinementoperators/RhoDRDown.java 2009-05-21 08:46:28 UTC (rev 1765) +++ trunk/src/dl-learner/org/dllearner/refinementoperators/RhoDRDown.java 2009-05-22 13:47:11 UTC (rev 1766) @@ -172,6 +172,7 @@ private boolean useNegation = true; private boolean useBooleanDatatypes = true; private boolean useDoubleDatatypes = true; + @SuppressWarnings("unused") private boolean useStringDatatypes = false; private boolean disjointChecks = true; private boolean instanceBasedDisjoints = true; Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java 2009-05-21 08:46:28 UTC (rev 1765) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java 2009-05-22 13:47:11 UTC (rev 1766) @@ -32,6 +32,9 @@ import java.util.Date; import java.util.LinkedList; +import org.apache.log4j.Logger; +import org.dllearner.kb.sparql.Cache; +import org.dllearner.kb.sparql.SPARQLTasks; import org.dllearner.kb.sparql.SparqlEndpoint; import org.dllearner.kb.sparql.SparqlQuery; import org.dllearner.utilities.Files; @@ -50,6 +53,8 @@ */ public class DBpediaLinkedGeoData { + private static Logger logger = Logger.getLogger(DBpediaLinkedGeoData.class); + // chose between nt and dat private static String dbpediaFileFormat = "dat"; static File dbpediaFile = new File("log/DBpedia_POIs." + dbpediaFileFormat); @@ -57,12 +62,15 @@ private static File matchingFile = new File("log/DBpedia_GeoData_Links.nt"); private static File missesFile = new File("log/DBpedia_GeoData_Misses.dat"); - private static double scoreThreshold = 0.8; + private static double scoreThreshold = 0.9; private static StringDistance distance = new Jaro(); + private static String usedDatatype = "xsd:decimal"; + // public static SparqlEndpoint dbpediaEndpoint = SparqlEndpoint.getEndpointDBpedia(); public static SparqlEndpoint dbpediaEndpoint = SparqlEndpoint.getEndpointLOCALDBpedia(); private static SparqlEndpoint geoDataEndpoint = SparqlEndpoint.getEndpointLOCALGeoData(); + private static SPARQLTasks lgd = new SPARQLTasks(new Cache("cache/matcher/"), geoDataEndpoint); // read in DBpedia ontology such that we perform taxonomy reasoning // private static ReasonerComponent reasoner = TestOntologies.getTestOntology(TestOntology.DBPEDIA_OWL); @@ -283,19 +291,20 @@ // (we make sure that returned points are in the same POI class) String queryStr = "select ?point ?lat ?long ?name ?name_en ?name_int where { "; queryStr += LGDPoint.getSPARQLRestriction(dbpediaPoint.getPoiClass(), "?point"); - queryStr += "?point <http://linkedgeodata.org/vocabulary/latitude> ?lat ."; - queryStr += "FILTER (xsd:float(?lat) > " + minLat + ") ."; - queryStr += "FILTER (xsd:float(?lat) < " + maxLat + ") ."; - queryStr += "?point <http://linkedgeodata.org/vocabulary/longitude> ?long ."; - queryStr += "FILTER (xsd:float(?long) > " + minLong + ") ."; - queryStr += "FILTER (xsd:float(?long) < " + maxLong + ") ."; - queryStr += "?point <http://linkedgeodata.org/vocabulary/name> ?name ."; - queryStr += "OPTIONAL { ?point <http://linkedgeodata.org/vocabulary/name%25en> ?name_en } ."; - queryStr += "OPTIONAL { ?point <http://linkedgeodata.org/vocabulary/name_int> ?name_int } ."; + queryStr += "?point <http://www.w3.org/2003/01/geo/wgs84_pos#lat> ?lat ."; + queryStr += "FILTER ("+usedDatatype+"(?lat) > " + minLat + ") ."; + queryStr += "FILTER ("+usedDatatype+"(?lat) < " + maxLat + ") ."; + queryStr += "?point <http://www.w3.org/2003/01/geo/wgs84_pos#long> ?long ."; + queryStr += "FILTER ("+usedDatatype+"(?long) > " + minLong + ") ."; + queryStr += "FILTER ("+usedDatatype+"(?long) < " + maxLong + ") ."; + queryStr += "?point <http://linkedgeodata.org/vocabulary#name> ?name ."; + queryStr += "OPTIONAL { ?point <http://linkedgeodata.org/vocabulary#name%25en> ?name_en } ."; + queryStr += "OPTIONAL { ?point <http://linkedgeodata.org/vocabulary#name_int> ?name_int } ."; queryStr += "}"; - SparqlQuery query = new SparqlQuery(queryStr, geoDataEndpoint); - ResultSet rs = query.send(); +// SparqlQuery query = new SparqlQuery(queryStr, geoDataEndpoint); +// ResultSet rs = query.send(); + ResultSet rs = lgd.queryAsResultSet(queryStr); double highestScore = 0; String bestURI = null; @@ -312,6 +321,10 @@ // from LGD we take name, name%25en, name, int_name String dbpediaLabel1 = dbpediaPoint.getLabel(); String dbpediaLabel2 = dbpediaPoint.getPlainLabel(); + +// System.out.println("label 1: " + dbpediaLabel1); +// System.out.println("label 2: " + dbpediaLabel2); + String lgdLabel1 = qs.getLiteral("name").toString(); stringSimilarity = distance.score(dbpediaLabel1, lgdLabel1); stringSimilarity = Math.max(distance.score(dbpediaLabel2, lgdLabel1), stringSimilarity); @@ -340,10 +353,10 @@ } if(highestScore > scoreThreshold) { - System.out.println("Match: " + highestScore + " " + bestLabel + " (" + bestURI + ")"); + logger.info("Match: " + highestScore + " " + bestLabel + " (" + dbpediaPoint.getUri() + " --> " + bestURI + ")"); return URI.create(bestURI); } else { - System.out.println("No match: " + highestScore + " " + bestLabel + " (" + bestURI + ")"); + logger.info("No match: " + highestScore + " " + bestLabel + " (" + dbpediaPoint.getUri() + " --/-> " + bestURI + ")"); return null; } Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java 2009-05-21 08:46:28 UTC (rev 1765) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java 2009-05-22 13:47:11 UTC (rev 1766) @@ -25,7 +25,8 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.dllearner.kb.sparql.SparqlQuery; +import org.dllearner.kb.sparql.Cache; +import org.dllearner.kb.sparql.SPARQLTasks; import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; @@ -46,8 +47,12 @@ // number of decimals indicates a large object) private int decimalCount; - Pattern pattern = Pattern.compile("\\w+"); + // all word symbols + space + Pattern pattern = Pattern.compile("[\\w| ]+"); + // use a cache such that DBpedia points can be quickly constructed from URIs + private static SPARQLTasks st = new SPARQLTasks(new Cache("cache/dbpedia_points/"), DBpediaLinkedGeoData.dbpediaEndpoint); + /** * Constructs a DBpedia point using SPARQL. * @param uri URI of DBpedia resource. @@ -66,8 +71,9 @@ queryStr += "FILTER (?type LIKE <http://dbpedia.org/ontology/%>) ."; queryStr += "} }"; - SparqlQuery query = new SparqlQuery(queryStr, DBpediaLinkedGeoData.dbpediaEndpoint); - ResultSet rs = query.send(); +// SparqlQuery query = new SparqlQuery(queryStr, DBpediaLinkedGeoData.dbpediaEndpoint); + ResultSet rs = st.queryAsResultSet(queryStr); +// ResultSet rs = query.send(); classes = new String[] { }; List<String> classList = new LinkedList<String>(); Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/Evaluation.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/Evaluation.java 2009-05-21 08:46:28 UTC (rev 1765) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/Evaluation.java 2009-05-22 13:47:11 UTC (rev 1766) @@ -19,12 +19,9 @@ */ package org.dllearner.scripts.matching; -import java.io.BufferedReader; import java.io.File; -import java.io.FileReader; import java.io.IOException; import java.net.URI; -import java.util.HashMap; import java.util.Map; import java.util.Map.Entry; import java.util.zip.DataFormatException; @@ -86,10 +83,10 @@ URI matchedURI = null; if(dbpediaPoint.getPoiClass() == POIClass.CITY) { - logger.trace("searching match for " + match.getKey() + "..."); + logger.info("Eval: searching match for " + match.getKey() + "..."); matchedURI = DBpediaLinkedGeoData.findGeoDataMatch(dbpediaPoint); } else { - System.out.println("skipping"); + System.out.println("skipping " + dbpediaPoint.getUri() + " (not detected as a city)"); continue; } @@ -98,15 +95,15 @@ // no match found if(matchedURI == null) { noMatchCount++; - logger.trace(" ... no match found"); + logger.info("Eval: ... no match found"); // correct match found } else if(matchedURI.equals(testURI)) { correctMatchCount++; - logger.trace(" ... " + testURI + " correctly detected"); + logger.info("Eval: ... " + testURI + " correctly detected"); // incorrect match found } else { incorrectMatchCount++; - logger.trace(" ... " + matchedURI + " detected, but " + testURI + " is correct"); + logger.info("Eval: ... " + matchedURI + " detected, but " + testURI + " is correct"); } tests++; @@ -153,7 +150,7 @@ public static void main(String args[]) throws IOException, DataFormatException { - Logger.getRootLogger().setLevel(Level.TRACE); + Logger.getRootLogger().setLevel(Level.INFO); // test file File testFile = new File("log/geodata/owlsameas_en.dat"); // map for collecting matches @@ -161,8 +158,8 @@ // perform evaluation and print results Evaluation eval = new Evaluation(matches); System.out.println(eval.getTests() + " points tested (" + eval.getDiscarded() + " discarded)"); - System.out.println("precision: " + eval.getPrecision()); - System.out.println("recall: " + eval.getRecall()); + System.out.println("precision: " + eval.getPrecision() + " (" + eval.getCorrectMatchCount() + "/" + eval.getMatchCount() + ")"); + System.out.println("recall: " + eval.getRecall() + " (" + eval.getCorrectMatchCount() + "/" + eval.getTests() + ")"); } } Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/LGDPoint.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/LGDPoint.java 2009-05-21 08:46:28 UTC (rev 1765) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/LGDPoint.java 2009-05-22 13:47:11 UTC (rev 1766) @@ -51,7 +51,7 @@ public static String getSPARQLRestriction(POIClass poiClass, String variable) { switch(poiClass) { - case CITY : return variable + " <http://linkedgeodata.org/vocabulary#amenity> \"city\" ."; + case CITY : return "{ " + variable + " <http://linkedgeodata.org/vocabulary#place> \"city\" } UNION {" + variable + " <http://linkedgeodata.org/vocabulary#place> \"village\" } UNION {" + variable + " <http://linkedgeodata.org/vocabulary#place> \"town\" } UNION {" + variable + " <http://linkedgeodata.org/vocabulary#place> \"suburb\" }"; default: throw new Error("Cannot restrict."); } } Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/LearnOSMClasses.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/LearnOSMClasses.java 2009-05-21 08:46:28 UTC (rev 1765) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/LearnOSMClasses.java 2009-05-22 13:47:11 UTC (rev 1766) @@ -29,17 +29,10 @@ import java.util.zip.DataFormatException; import org.dllearner.algorithms.celoe.CELOE; -import org.dllearner.algorithms.refinement2.ROLComponent2; import org.dllearner.core.ComponentInitException; import org.dllearner.core.ComponentManager; import org.dllearner.core.LearningProblemUnsupportedException; import org.dllearner.core.ReasonerComponent; -import org.dllearner.core.owl.Description; -import org.dllearner.core.owl.Individual; -import org.dllearner.core.owl.NamedClass; -import org.dllearner.core.owl.ObjectProperty; -import org.dllearner.core.owl.ObjectValueRestriction; -import org.dllearner.core.owl.Union; import org.dllearner.kb.manipulator.Manipulator; import org.dllearner.kb.manipulator.StringToResource; import org.dllearner.kb.manipulator.Rule.Months; @@ -47,7 +40,6 @@ import org.dllearner.kb.sparql.SPARQLTasks; import org.dllearner.kb.sparql.SparqlEndpoint; import org.dllearner.kb.sparql.SparqlKnowledgeSource; -import org.dllearner.learningproblems.EvaluatedDescriptionPosOnly; import org.dllearner.learningproblems.PosOnlyLP; import org.dllearner.reasoning.FastInstanceChecker; @@ -79,6 +71,12 @@ // test whether the dbpediaURI is a city String query = "ASK {<"+dbpediaURI+"> a <http://dbpedia.org/ontology/City>}"; boolean isCity = dbpedia.ask(query); + if(!isCity) { + // DBpedia ontology does not capture all cities, so we also use UMBEL, YAGO + String query2 = "ASK {<"+dbpediaURI+"> a ?x . FILTER(?x LIKE <%City%>) }"; + String query3 = "ASK {<"+dbpediaURI+"> a ?x . FILTER(?x LIKE <%Cities%>) }"; + isCity = dbpedia.ask(query2) || dbpedia.ask(query3); + } // System.out.println(isCity + " " + lgdURI); if(isCity) { positives.add(lgdURI.toString()); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |