From: <jen...@us...> - 2009-05-25 16:15:44
|
Revision: 1767 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=1767&view=rev Author: jenslehmann Date: 2009-05-25 16:15:37 +0000 (Mon, 25 May 2009) Log Message: ----------- extended DBpedia->LGD matching to cover 7 different POI types Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/kb/sparql/SPARQLTasks.java trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java trunk/src/dl-learner/org/dllearner/scripts/matching/Evaluation.java trunk/src/dl-learner/org/dllearner/scripts/matching/LGDPoint.java trunk/src/dl-learner/org/dllearner/scripts/matching/POIClass.java Modified: trunk/src/dl-learner/org/dllearner/kb/sparql/SPARQLTasks.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/sparql/SPARQLTasks.java 2009-05-22 13:47:11 UTC (rev 1766) +++ trunk/src/dl-learner/org/dllearner/kb/sparql/SPARQLTasks.java 2009-05-25 16:15:37 UTC (rev 1767) @@ -52,7 +52,7 @@ * the Endpoint the sparql queries will be send to */ public SPARQLTasks(final SparqlEndpoint sparqlEndpoint) { - super(); +// super(); this.cache = null; this.sparqlEndpoint = sparqlEndpoint; } @@ -64,7 +64,7 @@ * the Endpoint the sparql queries will be send to */ public SPARQLTasks(final Cache cache, final SparqlEndpoint sparqlEndpoint) { - super(); +// super(); this.cache = cache; this.sparqlEndpoint = sparqlEndpoint; } @@ -472,7 +472,6 @@ /** * low level, executes query returns ResultSet. - * TODO: Why convert from result set to JSON and back? See method below. * * @param sparqlQueryString * The query @@ -480,7 +479,13 @@ */ public ResultSetRewindable queryAsResultSet(String sparqlQueryString) { SparqlQuery sq = new SparqlQuery(sparqlQueryString, sparqlEndpoint); - return sq.send(); + if(cache == null) { + return sq.send(); + } else { + // get JSON from cache and convert to result set + String json = cache.executeSparqlQuery(sq); + return SparqlQuery.convertJSONtoResultSet(json); + } } /** Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java 2009-05-22 13:47:11 UTC (rev 1766) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java 2009-05-25 16:15:37 UTC (rev 1767) @@ -70,7 +70,7 @@ // public static SparqlEndpoint dbpediaEndpoint = SparqlEndpoint.getEndpointDBpedia(); public static SparqlEndpoint dbpediaEndpoint = SparqlEndpoint.getEndpointLOCALDBpedia(); private static SparqlEndpoint geoDataEndpoint = SparqlEndpoint.getEndpointLOCALGeoData(); - private static SPARQLTasks lgd = new SPARQLTasks(new Cache("cache/matcher/"), geoDataEndpoint); + private static SPARQLTasks lgd = new SPARQLTasks(new Cache("cache/lgd/"), geoDataEndpoint); // read in DBpedia ontology such that we perform taxonomy reasoning // private static ReasonerComponent reasoner = TestOntologies.getTestOntology(TestOntology.DBPEDIA_OWL); @@ -266,27 +266,39 @@ fos.close(); } + /** + * The main matching method. The matching is directed from DBpedia to LGD, + * i.e. given a POI in DBpedia, we try to find a match in LGD. + * + * @param dbpediaPoint The DBpedia point. + * @return The URI of the matched LGD point or null if no match was found. + * @throws IOException Thrown if a query or linked data access does not work. + */ public static URI findGeoDataMatch(DBpediaPoint dbpediaPoint) throws IOException { // 1 degree is about 111 km (depending on the specific point) - int distanceThresholdMeters = 1000; + double distanceThresholdMeters = dbpediaPoint.getPoiClass().getMaxBox(); boolean quiet = true; if(useSparqlForGettingNearbyPoints) { - // TODO: convert from meters to lat/long - double distanceThresholdLat = 0.5; - double distanceThresholdLong = 0.5; - - // Triplify: $1= , $2= , $3 = distance in meters + // deprecated: direct specification of long/lat difference +// double distanceThresholdLat = 0.5; +// double distanceThresholdLong = 0.5; + // create a box around the point +// double minLat2 = dbpediaPoint.getGeoLat() - distanceThresholdLat; +// double maxLat2 = dbpediaPoint.getGeoLat() + distanceThresholdLat; +// double minLong2 = dbpediaPoint.getGeoLong() - distanceThresholdLong; +// double maxLong2 = dbpediaPoint.getGeoLong() + distanceThresholdLong; + + // Triplify: $1 = latitude, $2 = longitude, $3 = distance in meters + // LGD uses integer for lat/long (standard values multiplied by 10000000) // $box='longitude between CEIL(($2-($3/1000)/abs(cos(radians($1))*111))*10000000) and CEIL(($2+($3/1000)/abs(cos(radians($1))*111))*10000000) // AND latitude between CEIL(($1-($3/1000/111))*10000000) and CEIL(($1+($3/1000/111))*10000000)'; - // create a box around the point - double minLat = dbpediaPoint.getGeoLat() - distanceThresholdLat; - double maxLat = dbpediaPoint.getGeoLat() + distanceThresholdLat; - double minLong = dbpediaPoint.getGeoLong() - distanceThresholdLong; - double maxLong = dbpediaPoint.getGeoLong() + distanceThresholdLong; - + double minLat = dbpediaPoint.getGeoLat()-(distanceThresholdMeters/1000/111); + double maxLat = dbpediaPoint.getGeoLat()+(distanceThresholdMeters/1000/111); + double minLong = dbpediaPoint.getGeoLong()-(distanceThresholdMeters/1000)/Math.abs(Math.cos(Math.toRadians(dbpediaPoint.getGeoLat()))*111); + double maxLong = dbpediaPoint.getGeoLong()+(distanceThresholdMeters/1000)/Math.abs(Math.cos(Math.toRadians(dbpediaPoint.getGeoLat()))*111); // query all points in the box corresponding to this class // (we make sure that returned points are in the same POI class) String queryStr = "select ?point ?lat ?long ?name ?name_en ?name_int where { "; Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java 2009-05-22 13:47:11 UTC (rev 1766) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java 2009-05-25 16:15:37 UTC (rev 1767) @@ -68,7 +68,7 @@ queryStr += "<"+uri+"> rdfs:label ?label . "; queryStr += "OPTIONAL { <"+uri+"> rdf:type ?type . "; queryStr += "FILTER (!(?type LIKE <http://dbpedia.org/ontology/Resource>)) ."; - queryStr += "FILTER (?type LIKE <http://dbpedia.org/ontology/%>) ."; + queryStr += "FILTER (?type LIKE <http://dbpedia.org/ontology/%> || ?type LIKE <http://umbel.org/umbel/sc/%>) ."; queryStr += "} }"; // SparqlQuery query = new SparqlQuery(queryStr, DBpediaLinkedGeoData.dbpediaEndpoint); @@ -147,10 +147,28 @@ return str + ")"; } + // maps classes (in DBpedia ontology or otherwise) to supported POI classes private POIClass getPOIClass(String[] classes) { for(String clazz : classes) { +// System.out.println("class: " + clazz); if(clazz.equals("http://dbpedia.org/ontology/City")) { return POIClass.CITY; + } if(clazz.equals("http://umbel.org/umbel/sc/City")) { + return POIClass.CITY; + } else if(clazz.equals("http://dbpedia.org/ontology/Lake")) { + return POIClass.LAKE; + } else if(clazz.equals("http://dbpedia.org/ontology/University")) { + return POIClass.UNIVERSITY; + } else if(clazz.equals("http://dbpedia.org/ontology/School")) { + return POIClass.SCHOOL; + } else if(clazz.equals("http://dbpedia.org/ontology/Country")) { + return POIClass.COUNTRY; + } else if(clazz.equals("http://dbpedia.org/ontology/Airport")) { + return POIClass.AIRPORT; + } else if(clazz.equals("http://umbel.org/umbel/sc/Airfield")) { + return POIClass.AIRPORT; + } else if(clazz.equals("http://dbpedia.org/ontology/Station")) { + return POIClass.RAILWAY_STATION; } } return null; Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/Evaluation.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/Evaluation.java 2009-05-22 13:47:11 UTC (rev 1766) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/Evaluation.java 2009-05-25 16:15:37 UTC (rev 1767) @@ -22,6 +22,8 @@ import java.io.File; import java.io.IOException; import java.net.URI; +import java.util.Date; +import java.util.HashMap; import java.util.Map; import java.util.Map.Entry; import java.util.zip.DataFormatException; @@ -47,6 +49,11 @@ private double precision; private double recall; + private Map<POIClass, Integer> testsPerClass = new HashMap<POIClass, Integer>(); + private Map<POIClass, Integer> noMatchPerClass = new HashMap<POIClass, Integer>(); + private Map<POIClass, Integer> correctMatchPerClass = new HashMap<POIClass, Integer>(); + private Map<POIClass, Integer> incorrectMatchPerClass = new HashMap<POIClass, Integer>(); + private static Logger logger = Logger.getLogger(Evaluation.class); // map from DBpedia to LinkedGeoData @@ -58,6 +65,14 @@ correctMatchCount = 0; incorrectMatchCount = 0; + // init counts + for(POIClass poiClass : POIClass.values()) { + testsPerClass.put(poiClass, 0); + noMatchPerClass.put(poiClass, 0); + correctMatchPerClass.put(poiClass, 0); + incorrectMatchPerClass.put(poiClass, 0); + } + for(Entry<URI,URI> match : testMatches.entrySet()) { // find point in DBpedia file: // approach 1: @@ -82,12 +97,16 @@ URI matchedURI = null; - if(dbpediaPoint.getPoiClass() == POIClass.CITY) { - logger.info("Eval: searching match for " + match.getKey() + "..."); + if(dbpediaPoint.getPoiClass() == null) { + if(dbpediaPoint.getClasses().length == 0) { + System.out.println("skipping " + dbpediaPoint.getUri() + " (unknown POI type)"); + } else { + System.out.println("skipping " + dbpediaPoint.getUri() + " (unsupported POI type)"); + } + continue; + } else { + logger.info("Eval: searching match for " + match.getKey() + "(" + dbpediaPoint.getPoiClass() + ") ..."); matchedURI = DBpediaLinkedGeoData.findGeoDataMatch(dbpediaPoint); - } else { - System.out.println("skipping " + dbpediaPoint.getUri() + " (not detected as a city)"); - continue; } URI testURI = match.getValue(); @@ -95,18 +114,22 @@ // no match found if(matchedURI == null) { noMatchCount++; + inc(noMatchPerClass, dbpediaPoint.getPoiClass()); logger.info("Eval: ... no match found"); // correct match found } else if(matchedURI.equals(testURI)) { correctMatchCount++; + inc(correctMatchPerClass, dbpediaPoint.getPoiClass()); logger.info("Eval: ... " + testURI + " correctly detected"); // incorrect match found } else { incorrectMatchCount++; + inc(incorrectMatchPerClass, dbpediaPoint.getPoiClass()); logger.info("Eval: ... " + matchedURI + " detected, but " + testURI + " is correct"); } tests++; + inc(testsPerClass, dbpediaPoint.getPoiClass()); } matchCount = correctMatchCount + incorrectMatchCount; @@ -148,6 +171,50 @@ return discarded; } + private void inc(Map<POIClass,Integer> map, POIClass poiClass) { +// if(map.containsKey(poiClass)) { + map.put(poiClass, map.get(poiClass)+1); +// } else { +// map.put(poiClass, 1); +// } + } + + public Integer getCorrectMatchPerClass(POIClass poiClass) { + return correctMatchPerClass.get(poiClass); + } + + public Integer getIncorrectMatchPerClass(POIClass poiClass) { + return incorrectMatchPerClass.get(poiClass); + } + + public Integer getTestsPerClass(POIClass poiClass) { + return testsPerClass.get(poiClass); + } + + public Integer getMatchPerClass(POIClass poiClass) { + return incorrectMatchPerClass.get(poiClass) + correctMatchPerClass.get(poiClass); + } + + public Integer getNoMatchPerClass(POIClass poiClass) { + return noMatchPerClass.get(poiClass); + } + + public double getPrecisionPerClass(POIClass poiClass) { + if(getMatchPerClass(poiClass) == 0) { + return 0; + } else { + return correctMatchPerClass.get(poiClass) / (double) getMatchPerClass(poiClass); + } + } + + public double getRecallPerClass(POIClass poiClass) { + if(testsPerClass.get(poiClass) == 0) { + return 0; + } else { + return correctMatchPerClass.get(poiClass) / (double) testsPerClass.get(poiClass); + } + } + public static void main(String args[]) throws IOException, DataFormatException { Logger.getRootLogger().setLevel(Level.INFO); @@ -156,10 +223,24 @@ // map for collecting matches Map<URI,URI> matches = Utility.getMatches(testFile); // perform evaluation and print results + System.out.println(new Date()); Evaluation eval = new Evaluation(matches); + System.out.println(new Date()); + + for(POIClass poiClass : POIClass.values()) { + System.out.println(); + System.out.println("summary for POI class " + poiClass + ":"); + System.out.println(eval.getTestsPerClass(poiClass) + " points tested"); + System.out.println("precision: " + eval.getPrecisionPerClass(poiClass) + " (" + eval.getCorrectMatchPerClass(poiClass) + "/" + eval.getMatchPerClass(poiClass) + ")"); + System.out.println("recall: " + eval.getRecallPerClass(poiClass) + " (" + eval.getCorrectMatchPerClass(poiClass) + "/" + eval.getTestsPerClass(poiClass) + ")"); + } + + System.out.println(""); + System.out.println("Overall summary:"); System.out.println(eval.getTests() + " points tested (" + eval.getDiscarded() + " discarded)"); System.out.println("precision: " + eval.getPrecision() + " (" + eval.getCorrectMatchCount() + "/" + eval.getMatchCount() + ")"); System.out.println("recall: " + eval.getRecall() + " (" + eval.getCorrectMatchCount() + "/" + eval.getTests() + ")"); + } } Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/LGDPoint.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/LGDPoint.java 2009-05-22 13:47:11 UTC (rev 1766) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/LGDPoint.java 2009-05-25 16:15:37 UTC (rev 1767) @@ -52,6 +52,12 @@ public static String getSPARQLRestriction(POIClass poiClass, String variable) { switch(poiClass) { case CITY : return "{ " + variable + " <http://linkedgeodata.org/vocabulary#place> \"city\" } UNION {" + variable + " <http://linkedgeodata.org/vocabulary#place> \"village\" } UNION {" + variable + " <http://linkedgeodata.org/vocabulary#place> \"town\" } UNION {" + variable + " <http://linkedgeodata.org/vocabulary#place> \"suburb\" }"; + case UNIVERSITY : return variable + " <http://linkedgeodata.org/vocabulary#amenity> \"university\" . "; + case SCHOOL : return variable + " <http://linkedgeodata.org/vocabulary#amenity> \"school\" . "; + case AIRPORT : return variable + " <http://linkedgeodata.org/vocabulary#aeroway> \"aerodrome\" . "; + case LAKE : return variable + " <http://linkedgeodata.org/vocabulary#natural> \"water\" . "; + case COUNTRY : return variable + " <http://linkedgeodata.org/vocabulary#place> \"country\" . "; + case RAILWAY_STATION : return variable + " <http://linkedgeodata.org/vocabulary#railway> \"station\" . "; default: throw new Error("Cannot restrict."); } } Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/POIClass.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/POIClass.java 2009-05-22 13:47:11 UTC (rev 1766) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/POIClass.java 2009-05-25 16:15:37 UTC (rev 1767) @@ -28,6 +28,44 @@ */ public enum POIClass { - CITY, COUNTRY + // 50 km box + CITY (50000), + // 5 km box + AIRPORT (5000), + + // 10 km box + UNIVERSITY (10000), + + // 10 km box - usage unclear +// MUNICIPALITY (10000), + + SCHOOL (10000), + + RAILWAY_STATION (10000), + + // 1000 km box + LAKE (1000000000), + + // 5000 km box + COUNTRY (5000000); + + private double maxBox; + + POIClass(double maxBox) { + this.maxBox = maxBox; + } + + /** + * Maximum distance coordinates and actual position of + * this POI type can differ in meters. Retrieving POIs within + * the box specified by this distance should always contain + * the POI itself. + * + * @return The distance in meters different typical POIs for this + * type can differ. + */ + public double getMaxBox() { + return maxBox; + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |