From: <jen...@us...> - 2009-05-29 07:46:49
|
Revision: 1780 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=1780&view=rev Author: jenslehmann Date: 2009-05-29 07:46:41 +0000 (Fri, 29 May 2009) Log Message: ----------- further fine tuning of geo matching Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java trunk/src/dl-learner/org/dllearner/scripts/matching/POIClass.java Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java 2009-05-28 14:21:36 UTC (rev 1779) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java 2009-05-29 07:46:41 UTC (rev 1780) @@ -214,9 +214,9 @@ double matchFreq = 100*matches/(double)total; double matchCountFreq = 100*matches/(double)counter; long diffMs = currDate.getTime() - startDate.getTime(); - long diffMinutes = diffMs / (60 * 1000); long diffHours = diffMs / (60 * 60 * 1000); - double pointPercentage = total / (double) totalPOICount; + long diffMinutes = diffMs / (60 * 1000) - diffHours * 60; + double pointPercentage = 100 * total / (double) totalPOICount; double pointsPerMs = total / (double) diffMs; double pointsPerHour = 3600 * 1000 * pointsPerMs; long estimatedMs = totalPOICount * diffMs / total; @@ -396,7 +396,9 @@ queryStr += "OPTIONAL { ?point <http://linkedgeodata.org/vocabulary#name_int> ?name_int } ."; // filter out ways => we assume that it is always better to match a point and not a way // (if there is a way, there should also be a point but not vice versa) -// queryStr += "FILTER (?point LIKE <http://linkedgeodata.org/triplify/node/%>) ."; + // => according to OSM data model, ways do not have longitude/latitude, so we should + // always match nodes and not ways (TODO: discuss with Soeren) + queryStr += "FILTER (?point LIKE <http://linkedgeodata.org/triplify/node/%>) ."; queryStr += "}"; // SparqlQuery query = new SparqlQuery(queryStr, geoDataEndpoint); @@ -408,10 +410,8 @@ String bestLabel = null; while(rs.hasNext()) { QuerySolution qs = rs.nextSolution(); + String lgdURI = qs.getResource("point").toString(); - // measure string similarity and proximity - // TODO: incomplete - // step 1: string similarity double stringSimilarity; // from DBpedia we take the full label and an abbreviated version; @@ -442,18 +442,24 @@ double lat = qs.getLiteral("lat").getDouble(); double lon = qs.getLiteral("long").getDouble(); double distance = spatialDistance(dbpediaPoint.getGeoLat(), dbpediaPoint.getGeoLong(), lat, lon); - double frac = distance / dbpediaPoint.getPoiClass().getMaxBox(); - double distanceScore = Math.pow(frac-1,4); + double frac = Math.min(1,distance / dbpediaPoint.getPoiClass().getMaxBox()); + double distanceScore = Math.pow(frac-1,2); +// System.out.println(dbpediaPoint.getPoiClass().getMaxBox()); +// System.out.println(distance); +// System.out.println(frac); +// System.out.println(distanceScore); +// System.out.println("==============="); + double score = 0.8 * stringSimilarity + 0.2 * distanceScore; // if there is a node and a way, we prefer the node (better representative) - if(qs.getResource("point").toString().contains("/way/")) { - score -= 0.02; - } +// if(lgdURI.contains("/way/")) { +// score -= 0.02; +// } if(score > highestScore) { highestScore = score; - bestURI = qs.getResource("point").getURI(); + bestURI = lgdURI; bestLabel = lgdLabel1; } Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/POIClass.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/POIClass.java 2009-05-28 14:21:36 UTC (rev 1779) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/POIClass.java 2009-05-29 07:46:41 UTC (rev 1780) @@ -31,8 +31,8 @@ // 50 km box CITY (50000), - // 5 km box - AIRPORT (5000), + // 10 km box + AIRPORT (10000), // 10 km box UNIVERSITY (10000), @@ -53,11 +53,11 @@ // 10 km box MOUNTAIN (10000), - // 10000 km box (continents?) - ISLAND (10000000), + // 1000 km box (continents are not counted as islands in UMBEL and DBpedia ontology) + ISLAND (1000000), - // 1 km box - STADIUM (1000), + // 2 km box + STADIUM (2000), // 1000 km box RIVER (1000000), This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |