From: <jen...@us...> - 2009-05-20 08:50:59
|
Revision: 1762 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=1762&view=rev Author: jenslehmann Date: 2009-05-20 08:50:53 +0000 (Wed, 20 May 2009) Log Message: ----------- prepared SPARQL based matching Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java trunk/src/dl-learner/org/dllearner/scripts/matching/Evaluation.java trunk/src/dl-learner/org/dllearner/scripts/matching/Point.java Added Paths: ----------- trunk/src/dl-learner/org/dllearner/scripts/matching/LGDPoint.java trunk/src/dl-learner/org/dllearner/scripts/matching/POIClass.java Removed Paths: ------------- trunk/src/dl-learner/org/dllearner/scripts/matching/OSMPoint.java Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java 2009-05-19 21:05:48 UTC (rev 1761) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java 2009-05-20 08:50:53 UTC (rev 1762) @@ -70,7 +70,7 @@ // true = SPARQL is used for retrieving close points; // false = Triplify spatial extension is used - private static boolean useSparqlForGettingNearbyPoints = false; + private static boolean useSparqlForGettingNearbyPoints = true; public static void main(String[] args) throws IOException { @@ -266,36 +266,87 @@ if(useSparqlForGettingNearbyPoints) { // TODO: convert from meters to lat/long - double distanceThresholdLat = 0.3; - double distanceThresholdLong = 0.3; + double distanceThresholdLat = 0.5; + double distanceThresholdLong = 0.5; + // Triplify: $1= , $2= , $3 = distance in meters + // $box='longitude between CEIL(($2-($3/1000)/abs(cos(radians($1))*111))*10000000) and CEIL(($2+($3/1000)/abs(cos(radians($1))*111))*10000000) + // AND latitude between CEIL(($1-($3/1000/111))*10000000) and CEIL(($1+($3/1000/111))*10000000)'; + // create a box around the point double minLat = dbpediaPoint.getGeoLat() - distanceThresholdLat; double maxLat = dbpediaPoint.getGeoLat() + distanceThresholdLat; double minLong = dbpediaPoint.getGeoLong() - distanceThresholdLong; double maxLong = dbpediaPoint.getGeoLong() + distanceThresholdLong; - // query all points in the box - String queryStr = "select ?point ?lat ?long ?name where { "; + // query all points in the box corresponding to this class + // (we make sure that returned points are in the same POI class) + String queryStr = "select ?point ?lat ?long ?name ?name_en ?name_int where { "; + queryStr += LGDPoint.getSPARQLRestriction(dbpediaPoint.getPoiClass(), "?point"); queryStr += "?point <http://linkedgeodata.org/vocabulary/latitude> ?lat ."; queryStr += "FILTER (xsd:float(?lat) > " + minLat + ") ."; queryStr += "FILTER (xsd:float(?lat) < " + maxLat + ") ."; queryStr += "?point <http://linkedgeodata.org/vocabulary/longitude> ?long ."; queryStr += "FILTER (xsd:float(?long) > " + minLong + ") ."; queryStr += "FILTER (xsd:float(?long) < " + maxLong + ") ."; - queryStr += "?point <http://linkedgeodata.org/vocabulary/name> ?name ."; + queryStr += "?point <http://linkedgeodata.org/vocabulary/name> ?name ."; + queryStr += "OPTIONAL { ?point <http://linkedgeodata.org/vocabulary/name%25en> ?name_en } ."; + queryStr += "OPTIONAL { ?point <http://linkedgeodata.org/vocabulary/name_int> ?name_int } ."; queryStr += "}"; SparqlQuery query = new SparqlQuery(queryStr, geoDataEndpoint); ResultSet rs = query.send(); + double highestScore = 0; + String bestURI = null; + String bestLabel = null; while(rs.hasNext()) { -// QuerySolution qs = rs.nextSolution(); + QuerySolution qs = rs.nextSolution(); // measure string similarity and proximity // TODO: incomplete - } - return null; + + // step 1: string similarity + double stringSimilarity; + // from DBpedia we take the full label and an abbreviated version; + // from LGD we take name, name%25en, name, int_name + String dbpediaLabel1 = dbpediaPoint.getLabel(); + String dbpediaLabel2 = dbpediaPoint.getPlainLabel(); + String lgdLabel1 = qs.getLiteral("name").toString(); + stringSimilarity = distance.score(dbpediaLabel1, lgdLabel1); + stringSimilarity = Math.max(distance.score(dbpediaLabel2, lgdLabel1), stringSimilarity); + if(qs.contains("name_en")) { + String lgdLabel2 = qs.getLiteral("name_en").toString(); + stringSimilarity = distance.score(dbpediaLabel1, lgdLabel2); + stringSimilarity = Math.max(distance.score(dbpediaLabel2, lgdLabel2), stringSimilarity); + } + if(qs.contains("name_int")) { + String lgdLabel3 = qs.getLiteral("name_int").toString(); + stringSimilarity = distance.score(dbpediaLabel1, lgdLabel3); + stringSimilarity = Math.max(distance.score(dbpediaLabel2, lgdLabel3), stringSimilarity); + } + + // step 2: spatial distance + // ... not yet taken into account ... see spatialDistance() method below + + double score = stringSimilarity; + + if(score > highestScore) { + highestScore = score; + bestURI = qs.getResource("point").getURI(); + bestLabel = lgdLabel1; + } + + } + + if(highestScore > scoreThreshold) { + System.out.println("Match: " + highestScore + " " + bestLabel + " (" + bestURI + ")"); + return URI.create(bestURI); + } else { + System.out.println("No match: " + highestScore + " " + bestLabel + " (" + bestURI + ")"); + return null; + } + // use Tripliy spatial extension } else { @@ -349,4 +400,24 @@ } } } + + // returns distance between two points in meters + public static double spatialDistance(double lat1, double long1, double lat2, double long2) { +// $distance='ROUND(1000*1.609 * 3956 * 2 * ASIN(SQRT( POWER(SIN(($1 - latitude/10000000) * pi()/180 / 2), 2) + +// COS($1 * pi()/180) * COS(latitude/10000000 * pi()/180) * POWER(SIN(($2 - longitude/10000000) * pi() +// /180 / 2), 2) ) )) AS distance'; +// double distance = 1000 * 1.609 * 3956 * 2 * +// Math.asin(Math.sqrt(Math.pow(Math.sin((lat1 - lat2)/1000000, b))); + + // implementation according to http://www.movable-type.co.uk/scripts/latlong.html + double r = 6371000; // meters + double dLat = Math.toRadians(lat2-lat1); + double dLon = Math.toRadians(long2-long1); + double a = Math.sin(dLat/2) * Math.sin(dLat/2) + + Math.cos(Math.toRadians(lat1)) * Math.cos(Math.toRadians(lat2)) * + Math.sin(dLon/2) * Math.sin(dLon/2); + double c = 2 * Math.atan2(Math.sqrt(a), Math.sqrt(1-a)); + double distance = r * c; + return distance; + } } Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java 2009-05-19 21:05:48 UTC (rev 1761) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java 2009-05-20 08:50:53 UTC (rev 1762) @@ -22,6 +22,8 @@ import java.net.URI; import java.util.LinkedList; import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.dllearner.kb.sparql.SparqlQuery; @@ -35,8 +37,6 @@ * */ public class DBpediaPoint extends Point { - - private URI uri; private String label; @@ -46,13 +46,14 @@ // number of decimals indicates a large object) private int decimalCount; + Pattern pattern = Pattern.compile("\\w+"); /** * Constructs a DBpedia point using SPARQL. * @param uri URI of DBpedia resource. */ public DBpediaPoint(URI uri) throws Exception { - super(0,0); + super(uri, null, 0,0); this.uri = uri; // construct DBpedia query @@ -91,30 +92,35 @@ } classes = classList.toArray(classes); + poiClass = getPOIClass(classes); } public DBpediaPoint(URI uri, String label, String[] classes, double geoLat, double geoLong, int decimalCount) { - super(geoLat,geoLong); - this.uri = uri; + super(uri, null, geoLat,geoLong); this.label = label; this.classes = classes; this.decimalCount = decimalCount; + poiClass = getPOIClass(classes); } /** - * @return the uri - */ - public URI getUri() { - return uri; - } - - /** * @return the label */ public String getLabel() { return label; } + /** + * + * @return Returns only first characters until a special symbol occurs, i.e. instead + * of "Stretton, Derbyshire" it returns "Stretton". + */ + public String getPlainLabel() { + Matcher matcher = pattern.matcher(label); + matcher.find(); + return label.substring(0, matcher.end()); + } + public String[] getClasses() { return classes; } @@ -134,4 +140,13 @@ } return str + ")"; } + + private POIClass getPOIClass(String[] classes) { + for(String clazz : classes) { + if(clazz.equals("http://dbpedia.org/ontology/City")) { + return POIClass.CITY; + } + } + return null; + } } Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/Evaluation.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/Evaluation.java 2009-05-19 21:05:48 UTC (rev 1761) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/Evaluation.java 2009-05-20 08:50:53 UTC (rev 1762) @@ -72,7 +72,7 @@ // memory to efficiently evaluate a lot of parameter settings without // requiring to perform slow HTTP or SPARQL requests - logger.trace("searching match for " + match.getKey() + "..."); +// logger.trace("searching match for " + match.getKey() + "..."); DBpediaPoint dbpediaPoint = null; try { @@ -82,8 +82,17 @@ discarded++; continue; } - URI matchedURI = DBpediaLinkedGeoData.findGeoDataMatch(dbpediaPoint); + URI matchedURI = null; + + if(dbpediaPoint.getPoiClass() == POIClass.CITY) { + logger.trace("searching match for " + match.getKey() + "..."); + matchedURI = DBpediaLinkedGeoData.findGeoDataMatch(dbpediaPoint); + } else { + System.out.println("skipping"); + continue; + } + URI testURI = match.getValue(); // no match found Copied: trunk/src/dl-learner/org/dllearner/scripts/matching/LGDPoint.java (from rev 1760, trunk/src/dl-learner/org/dllearner/scripts/matching/OSMPoint.java) =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/LGDPoint.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/LGDPoint.java 2009-05-20 08:50:53 UTC (rev 1762) @@ -0,0 +1,58 @@ +/** + * Copyright (C) 2007-2009, Jens Lehmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ +package org.dllearner.scripts.matching; + +import java.net.URI; + +/** + * A LinkedGeoData point. + * + * @author Jens Lehmann + * + */ +public class LGDPoint extends Point { + + private double name; + + public LGDPoint(URI uri, double geoLat, double geoLong) { + super(uri, null, geoLat, geoLong); + } + + /** + * @return the name + */ + public double getName() { + return name; + } + + /** + * @param name the name to set + */ + public void setName(double name) { + this.name = name; + } + + public static String getSPARQLRestriction(POIClass poiClass, String variable) { + switch(poiClass) { + case CITY : return variable + " <http://linkedgeodata.org/vocabulary#amenity> \"city\" ."; + default: throw new Error("Cannot restrict."); + } + } +} Property changes on: trunk/src/dl-learner/org/dllearner/scripts/matching/LGDPoint.java ___________________________________________________________________ Added: svn:mergeinfo + Deleted: trunk/src/dl-learner/org/dllearner/scripts/matching/OSMPoint.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/OSMPoint.java 2009-05-19 21:05:48 UTC (rev 1761) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/OSMPoint.java 2009-05-20 08:50:53 UTC (rev 1762) @@ -1,96 +0,0 @@ -/** - * Copyright (C) 2007-2009, Jens Lehmann - * - * This file is part of DL-Learner. - * - * DL-Learner is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3 of the License, or - * (at your option) any later version. - * - * DL-Learner is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - */ -package org.dllearner.scripts.matching; - -/** - * @author Jens Lehmann - * - */ -public class OSMPoint { - - private long id; - - private double geoLat; - - private double geoLong; - - private double name; - - public OSMPoint(long id) { - this.id = id; - } - - /** - * @return the geoLat - */ - public double getGeoLat() { - return geoLat; - } - - /** - * @param geoLat the geoLat to set - */ - public void setGeoLat(double geoLat) { - this.geoLat = geoLat; - } - - /** - * @return the geoLong - */ - public double getGeoLong() { - return geoLong; - } - - /** - * @param geoLong the geoLong to set - */ - public void setGeoLong(double geoLong) { - this.geoLong = geoLong; - } - - /** - * @return the name - */ - public double getName() { - return name; - } - - /** - * @param name the name to set - */ - public void setName(double name) { - this.name = name; - } - - /** - * @return the id - */ - public long getId() { - return id; - } - - /** - * @param id the id to set - */ - public void setId(long id) { - this.id = id; - } - -} Added: trunk/src/dl-learner/org/dllearner/scripts/matching/POIClass.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/POIClass.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/POIClass.java 2009-05-20 08:50:53 UTC (rev 1762) @@ -0,0 +1,33 @@ +/** + * Copyright (C) 2007-2009, Jens Lehmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ +package org.dllearner.scripts.matching; + +/** + * Contains all types of points of interests (POIs) we are + * interested in. + * + * @author Jens Lehmann + * + */ +public enum POIClass { + + CITY, COUNTRY + +} Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/Point.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/Point.java 2009-05-19 21:05:48 UTC (rev 1761) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/Point.java 2009-05-20 08:50:53 UTC (rev 1762) @@ -19,6 +19,8 @@ */ package org.dllearner.scripts.matching; +import java.net.URI; + /** * A geo location. * @@ -26,12 +28,18 @@ * */ public class Point { - + protected double geoLat; protected double geoLong; - public Point(double geoLat, double geoLong) { + protected URI uri; + + protected POIClass poiClass; + + public Point(URI uri, POIClass poiClass, double geoLat, double geoLong) { + this.uri = uri; + this.poiClass = poiClass; this.geoLat = geoLat; this.geoLong = geoLong; } @@ -44,4 +52,12 @@ return geoLong; } + public URI getUri() { + return uri; + } + + public POIClass getPoiClass() { + return poiClass; + } + } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |