From: <jen...@us...> - 2009-04-01 11:59:26
|
Revision: 1678 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=1678&view=rev Author: jenslehmann Date: 2009-04-01 11:59:16 +0000 (Wed, 01 Apr 2009) Log Message: ----------- simple match finder for Auerbachs Keller Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java Added Paths: ----------- trunk/src/dl-learner/org/dllearner/scripts/matching/OSMPoint.java Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java 2009-04-01 11:17:54 UTC (rev 1677) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java 2009-04-01 11:59:16 UTC (rev 1678) @@ -19,7 +19,12 @@ */ package org.dllearner.scripts.matching; +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; import java.net.URI; +import java.net.URL; +import java.net.URLConnection; import org.dllearner.kb.sparql.SPARQLTasks; import org.dllearner.kb.sparql.SparqlEndpoint; @@ -36,13 +41,15 @@ */ public class DBpediaLinkedGeoData { - public static void main(String[] args) { + public static void main(String[] args) throws IOException { // we start from the DBpedia URI and try to find the corresponding // OSM URI (assuming that each location having coordinates in Wikipedia also // exists in OSM) URI dbpediaURI = URI.create("http://dbpedia.org/resource/Auerbachs_Keller"); + int distanceThresholdMeters = 100; + // use official DBpedia endpoint (switch to db0 later) SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpedia(); SPARQLTasks st = new SPARQLTasks(endpoint); @@ -60,6 +67,27 @@ System.out.println("lat: " + geoLat + ", long: " + geoLong); + URL linkedGeoDataURL = new URL("http://linkedgeodata.org/triplify/near/"+geoLat+","+geoLong+"/"+distanceThresholdMeters); + + // TODO: replace by SPARQL query + + URLConnection conn = linkedGeoDataURL.openConnection(); + BufferedReader rd = new BufferedReader(new InputStreamReader(conn.getInputStream())); + StringBuffer sb = new StringBuffer(); + String line=""; +// int pointID = 0; + while ((line = rd.readLine()) != null) + { + if(line.contains("Auerbach")) { + System.out.println(line); + } + + sb.append(line); + } + rd.close(); + +// System.out.println(sb.toString()); + } } Added: trunk/src/dl-learner/org/dllearner/scripts/matching/OSMPoint.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/OSMPoint.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/OSMPoint.java 2009-04-01 11:59:16 UTC (rev 1678) @@ -0,0 +1,96 @@ +/** + * Copyright (C) 2007-2009, Jens Lehmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ +package org.dllearner.scripts.matching; + +/** + * @author Jens Lehmann + * + */ +public class OSMPoint { + + private long id; + + private double geoLat; + + private double geoLong; + + private double name; + + public OSMPoint(long id) { + this.id = id; + } + + /** + * @return the geoLat + */ + public double getGeoLat() { + return geoLat; + } + + /** + * @param geoLat the geoLat to set + */ + public void setGeoLat(double geoLat) { + this.geoLat = geoLat; + } + + /** + * @return the geoLong + */ + public double getGeoLong() { + return geoLong; + } + + /** + * @param geoLong the geoLong to set + */ + public void setGeoLong(double geoLong) { + this.geoLong = geoLong; + } + + /** + * @return the name + */ + public double getName() { + return name; + } + + /** + * @param name the name to set + */ + public void setName(double name) { + this.name = name; + } + + /** + * @return the id + */ + public long getId() { + return id; + } + + /** + * @param id the id to set + */ + public void setId(long id) { + this.id = id; + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <jen...@us...> - 2009-04-20 17:32:21
|
Revision: 1720 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=1720&view=rev Author: jenslehmann Date: 2009-04-20 17:32:06 +0000 (Mon, 20 Apr 2009) Log Message: ----------- continued ontology matching Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java Added Paths: ----------- trunk/src/dl-learner/org/dllearner/scripts/matching/Point.java Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java 2009-04-20 15:12:11 UTC (rev 1719) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java 2009-04-20 17:32:06 UTC (rev 1720) @@ -22,13 +22,10 @@ import java.io.BufferedReader; import java.io.File; import java.io.FileOutputStream; +import java.io.FileReader; import java.io.IOException; -import java.io.InputStreamReader; import java.net.URI; -import java.net.URL; -import java.net.URLConnection; -import org.dllearner.kb.sparql.SPARQLTasks; import org.dllearner.kb.sparql.SparqlEndpoint; import org.dllearner.kb.sparql.SparqlQuery; @@ -44,9 +41,13 @@ */ public class DBpediaLinkedGeoData { - private static File dbpediaFile = new File("log/DBpedia_POIs.nt"); + // chose between nt and csv + private static String dbpediaFileFormat = "csv"; + private static File dbpediaFile = new File("log/DBpedia_POIs." + dbpediaFileFormat); private static boolean regenerateFile = false; + private static File matchingFile = new File("log/DBpedia_GeoData_Links.nt"); + private static SparqlEndpoint dbpediaEndpoint = SparqlEndpoint.getEndpointLOCALDBpedia(); private static SparqlEndpoint geoDataEndpoint = SparqlEndpoint.getEndpointLOCALGeoData(); @@ -57,56 +58,37 @@ createDBpediaFile(); } + FileOutputStream fos = new FileOutputStream(matchingFile, true); // read file point by point - // for each point: call match method - - System.exit(0); - - // we start from the DBpedia URI and try to find the corresponding - // OSM URI (assuming that each location having coordinates in Wikipedia also - // exists in OSM) - URI dbpediaURI = URI.create("http://dbpedia.org/resource/Auerbachs_Keller"); - - int distanceThresholdMeters = 100; - - // use official DBpedia endpoint (switch to db0 later) - SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpedia(); - SPARQLTasks st = new SPARQLTasks(endpoint); - - // query latitude and longitude - String query = "SELECT ?lat ?long WHERE { "; - query += "<" + dbpediaURI + "> <http://www.w3.org/2003/01/geo/wgs84_pos#lat> ?lat ."; - query += "<" + dbpediaURI + "> <http://www.w3.org/2003/01/geo/wgs84_pos#long> ?long . } LIMIT 1"; - - // perform query and read lat and long from results - ResultSet results = st.queryAsResultSet(query); - QuerySolution qs = results.nextSolution(); - String geoLat = qs.getLiteral("lat").getString(); - String geoLong = qs.getLiteral("long").getString(); - - System.out.println("lat: " + geoLat + ", long: " + geoLong); - - URL linkedGeoDataURL = new URL("http://linkedgeodata.org/triplify/near/"+geoLat+","+geoLong+"/"+distanceThresholdMeters); - - // TODO: replace by SPARQL query - - URLConnection conn = linkedGeoDataURL.openConnection(); - BufferedReader rd = new BufferedReader(new InputStreamReader(conn.getInputStream())); - StringBuffer sb = new StringBuffer(); - String line=""; -// int pointID = 0; - while ((line = rd.readLine()) != null) - { - if(line.contains("Auerbach")) { - System.out.println(line); + BufferedReader br = new BufferedReader(new FileReader(dbpediaFile)); + String line; + int counter = 0; + int matches = 0; + while ((line = br.readLine()) != null) { + + // read line and convert it into an object + String[] parts = line.split(","); + URI uri = URI.create(parts[0]); + String label = parts[1]; + double geoLat = new Double(parts[2]); + double geoLong = new Double(parts[3]); + DBpediaPoint dp = new DBpediaPoint(uri, label, geoLat, geoLong); + + // find match (we assume there is exactly one match) + URI matchURI = findGeoDataMatch(dp); + if(matchURI != null) { + String matchStr = "<" + uri + "> <http://www.w3.org/2002/07/owl#sameAs> <" + matchURI + "> .\n"; + fos.write(matchStr.getBytes()); + matches++; } + counter++; - sb.append(line); + if(counter % 1000 == 0) { + System.out.println(counter + " points processed. " + matches + " matches found."); + } } - rd.close(); - -// System.out.println(sb.toString()); - + br.close(); + fos.close(); } // downloads information about DBpedia into a separate file @@ -140,10 +122,15 @@ String geoLong = qs.getLiteral("long").getString(); String label = qs.getLiteral("label").getString(); - String content = "<" + object + ">" + " <http://www.w3.org/2000/01/rdf-schema#label> \"" + label + "\" .\n"; - content += "<" + object + ">" + " <http://www.w3.org/2003/01/geo/wgs84_pos#lat> \"" + geoLat + "\"^^<http://www.w3.org/2001/XMLSchema#float> .\n"; - content += "<" + object + ">" + " <http://www.w3.org/2003/01/geo/wgs84_pos#long> \"" + geoLong + "\"^^<http://www.w3.org/2001/XMLSchema#float> .\n"; - + String content = ""; + if(dbpediaFileFormat.equals("nt")) { + content += "<" + object + ">" + " <http://www.w3.org/2000/01/rdf-schema#label> \"" + label + "\" .\n"; + content += "<" + object + ">" + " <http://www.w3.org/2003/01/geo/wgs84_pos#lat> \"" + geoLat + "\"^^<http://www.w3.org/2001/XMLSchema#float> .\n"; + content += "<" + object + ">" + " <http://www.w3.org/2003/01/geo/wgs84_pos#long> \"" + geoLong + "\"^^<http://www.w3.org/2001/XMLSchema#float> .\n"; + } else { + content += object + ",\"" + label + "\"," + geoLat + "," + geoLong + "\n"; + } + fos.write(content.getBytes()); counter++; @@ -154,13 +141,56 @@ } while(counter == limit); - fos.close(); - } - private static URI findLinkedGeoDataMatch(DBpediaPoint dbpediaPoint) { + private static URI findGeoDataMatch(DBpediaPoint dbpediaPoint) { + // get all GeoData points close to the given point +// SparqlQuery query = new SparqlQuery("", geoDataEndpoint); + + /* + int distanceThresholdMeters = 100; + + // use official DBpedia endpoint (switch to db0 later) + SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpedia(); + SPARQLTasks st = new SPARQLTasks(endpoint); + + // query latitude and longitude + String query = "SELECT ?lat ?long WHERE { "; + query += "<" + dbpediaURI + "> <http://www.w3.org/2003/01/geo/wgs84_pos#lat> ?lat ."; + query += "<" + dbpediaURI + "> <http://www.w3.org/2003/01/geo/wgs84_pos#long> ?long . } LIMIT 1"; + + // perform query and read lat and long from results + ResultSet results = st.queryAsResultSet(query); + QuerySolution qs = results.nextSolution(); + String geoLat = qs.getLiteral("lat").getString(); + String geoLong = qs.getLiteral("long").getString(); + + System.out.println("lat: " + geoLat + ", long: " + geoLong); + + URL linkedGeoDataURL = new URL("http://linkedgeodata.org/triplify/near/"+geoLat+","+geoLong+"/"+distanceThresholdMeters); + + // TODO: replace by SPARQL query + + URLConnection conn = linkedGeoDataURL.openConnection(); + BufferedReader rd = new BufferedReader(new InputStreamReader(conn.getInputStream())); + StringBuffer sb = new StringBuffer(); + String line=""; +// int pointID = 0; + while ((line = rd.readLine()) != null) + { + if(line.contains("Auerbach")) { + System.out.println(line); + } + + sb.append(line); + } + rd.close(); + +// System.out.println(sb.toString()); + + */ return null; } } Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java 2009-04-20 15:12:11 UTC (rev 1719) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java 2009-04-20 17:32:06 UTC (rev 1720) @@ -19,10 +19,38 @@ */ package org.dllearner.scripts.matching; +import java.net.URI; + /** + * A geo location in DBpedia. + * * @author Jens Lehmann * */ -public class DBpediaPoint { +public class DBpediaPoint extends Point { + private URI uri; + + private String label; + + public DBpediaPoint(URI uri, String label, double geoLat, double geoLong) { + super(geoLat,geoLong); + this.uri = uri; + this.label = label; + } + + /** + * @return the uri + */ + public URI getUri() { + return uri; + } + + /** + * @return the label + */ + public String getLabel() { + return label; + } + } Added: trunk/src/dl-learner/org/dllearner/scripts/matching/Point.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/Point.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/Point.java 2009-04-20 17:32:06 UTC (rev 1720) @@ -0,0 +1,47 @@ +/** + * Copyright (C) 2007-2009, Jens Lehmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ +package org.dllearner.scripts.matching; + +/** + * A geo location. + * + * @author Jens Lehmann + * + */ +public class Point { + + private double geoLat; + + private double geoLong; + + public Point(double geoLat, double geoLong) { + this.geoLat = geoLat; + this.geoLong = geoLong; + } + + public double getGeoLat() { + return geoLat; + } + + public double getGeoLong() { + return geoLong; + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ku...@us...> - 2009-05-05 11:52:41
|
Revision: 1735 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=1735&view=rev Author: kurzum Date: 2009-05-05 11:52:27 +0000 (Tue, 05 May 2009) Log Message: ----------- code for testing existing matching Added Paths: ----------- trunk/src/dl-learner/org/dllearner/scripts/matching/LearnCriteria.java trunk/src/dl-learner/org/dllearner/scripts/matching/Mcollect.java trunk/src/dl-learner/org/dllearner/scripts/matching/Pcollect.java trunk/src/dl-learner/org/dllearner/scripts/matching/SameCollect.java Added: trunk/src/dl-learner/org/dllearner/scripts/matching/LearnCriteria.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/LearnCriteria.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/LearnCriteria.java 2009-05-05 11:52:27 UTC (rev 1735) @@ -0,0 +1,120 @@ +package org.dllearner.scripts.matching; + +import java.io.BufferedReader; +import java.io.InputStreamReader; +import java.net.URL; +import java.net.URLConnection; +import java.util.ArrayList; +import java.util.SortedSet; +import java.util.TreeSet; + +import org.dllearner.kb.sparql.Cache; +import org.dllearner.kb.sparql.SPARQLTasks; +import org.dllearner.kb.sparql.SparqlEndpoint; +import org.dllearner.utilities.CSVFileToArray; +import org.dllearner.utilities.datastructures.StringTuple; + +import com.wcohen.ss.Jaro; +import com.wcohen.ss.api.StringDistance; + +public class LearnCriteria { + ArrayList<SameCollect>sameAs =new ArrayList<SameCollect>() ; + Mcollect m ; + /** + * @param args + */ + public static void main(String[] args) { + LearnCriteria lc = new LearnCriteria(); + @SuppressWarnings("unused") + StringDistance distance = new Jaro(); + try{ + CSVFileToArray csv = new CSVFileToArray("osmdata/owlsameas_en.csv"); + ArrayList<String> al =null ; + + while ((al = csv.next()) != null){ + //System.out.println(al); + if(al.size()!=2)continue; + //if(distance.score(al.get(0), al.get(1))>=0.7){ + //System.out.println(distance.score(al.get(0), al.get(1))); + //System.out.println(al); + //} + //String dbpedia = al.get(1).replace("%25", "%"); + String dbpedia = al.get(1); + lc.sameAs.add(new SameCollect(al.get(0), dbpedia)); + } + }catch (Exception e) { + e.printStackTrace(); + } + + Mcollect m = new Mcollect(); + SPARQLTasks dbpedia = new SPARQLTasks(new Cache("matching"), SparqlEndpoint.getEndpointLOCALDBpedia()); + int countzerold = 0; + int countzerodb = 0; + for (int x = 0; x<lc.sameAs.size();x++) { + SameCollect s = lc.sameAs.get(x); + String query = "SELECT * WHERE {<"+s.db+"> ?p ?o}"; + s.dbdata = dbpedia.queryAsRDFNodeTuple(query, "?p", "?o"); + s.lddata = lc.getLinkedData(s.ld); +// System.exit(0); +// System.out.println(s.lddata); +// for (StringTuple string : s.lddata ) { +// System.out.println(string); +// } + m.add(s); + if(s.dbdata.size() == 0){ + System.out.println(s.db); + countzerodb+=1; + } + //if( s.lddata.size() == 0)countzerold+=1; +// if(x>110) break; + System.out.println(x); + } + + System.out.println(countzerodb); + System.out.println(countzerold); + // System.exit(0); + + System.out.println(m); + //System.out.println(lc.sameAs); + //System.out.println(lc.sameAs.size()); + + + } + + public SortedSet<StringTuple> getLinkedData(String url){ + SortedSet<StringTuple> result = new TreeSet<StringTuple>(); + try{ + URL linkedGeoDataURL = new URL(url); + + URLConnection conn = linkedGeoDataURL.openConnection(); + BufferedReader rd = new BufferedReader(new InputStreamReader(conn.getInputStream())); + String line=""; + @SuppressWarnings("unused") + boolean oneLine = false; + while ((line = rd.readLine()) != null) + + { oneLine = true; +// System.out.println(line);continue; + line = line.replace("<"+url+"#id>", ""); + line = line.replace("<"+url+">", ""); + + + String p = line.substring(line.indexOf("<")+1,line.indexOf(">") ); + line = line.substring(line.indexOf(">")+1); + line = line.substring(0,line.lastIndexOf(".")); + line = line.trim(); + line = line.substring(1); + String o = line.substring(0,line.length()-1); +// System.out.println(new StringTuple(p,o)); + result.add(new StringTuple(p,o)); + } + + rd.close(); + + }catch (Exception e) { + e.printStackTrace(); + } + return result; + } + +} Added: trunk/src/dl-learner/org/dllearner/scripts/matching/Mcollect.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/Mcollect.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/Mcollect.java 2009-05-05 11:52:27 UTC (rev 1735) @@ -0,0 +1,63 @@ +package org.dllearner.scripts.matching; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import org.dllearner.utilities.datastructures.RDFNodeTuple; +import org.dllearner.utilities.datastructures.StringTuple; + +import com.wcohen.ss.Jaro; +import com.wcohen.ss.api.StringDistance; + +public class Mcollect { + String name; + List<Pcollect> props = new ArrayList<Pcollect>(); + + + public void add(SameCollect s){ + StringDistance distance = new Jaro(); + + for (RDFNodeTuple db : s.dbdata) { + for (StringTuple ld : s.lddata) { +// System.out.println(ld.b); +// System.out.println(db.b.toString()); +// System.out.println(istance.score(ld.b,db.b.toString())); + if ( distance.score(ld.b,db.b.toString())>=0.90){ + boolean found = false; + for (Pcollect p : props){ + + if(p.ldp.equals(ld.a) && p.dbp.equals( db.a.toString())){ + p.count +=1; + found = true; + } + } + if(found==false){ + props.add(new Pcollect(ld.a, db.a.toString())); + } + + + }; + + } + } + } + + @SuppressWarnings("unchecked") + @Override + public String toString(){ + //SortedSet<Pcollect> s = new TreeSet<Pcollect>(); +// for(Pcollect one : s){ +// s.add(one); +// +// } + String ret = ""; + Collections.sort(props ); + for(int a=0; a<props.size();a++){ + + ret+= props.get(a).toString()+"\n"; + + } + return ret; + } +} Added: trunk/src/dl-learner/org/dllearner/scripts/matching/Pcollect.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/Pcollect.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/Pcollect.java 2009-05-05 11:52:27 UTC (rev 1735) @@ -0,0 +1,34 @@ +package org.dllearner.scripts.matching; + +public class Pcollect implements Comparable<Pcollect>{ + + String ldp; + String dbp; + int count=1; + + + + + public Pcollect(String ldp, String dbp) { + super(); + this.ldp = ldp.trim(); + this.dbp = dbp.trim(); + } + + + @Override + public String toString(){ + String ret = "count : "+count+" : "+ldp+ " = "+dbp; + + return ret; + } + + public int compareTo(Pcollect in){ + + Pcollect other = (Pcollect) in; + if(this.count==other.count)return 0; + if( this.count>other.count){ + return -1; + }else {return 1;} + } +} Added: trunk/src/dl-learner/org/dllearner/scripts/matching/SameCollect.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/SameCollect.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/SameCollect.java 2009-05-05 11:52:27 UTC (rev 1735) @@ -0,0 +1,23 @@ +package org.dllearner.scripts.matching; + +import java.util.SortedSet; + +import org.dllearner.utilities.datastructures.RDFNodeTuple; +import org.dllearner.utilities.datastructures.StringTuple; + +public class SameCollect { + + String ld; + String db; + + SortedSet<RDFNodeTuple> dbdata; + SortedSet<StringTuple> lddata; + + public SameCollect(String ld, String db) { + super(); + this.ld = ld; + this.db = db; + } + + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <jen...@us...> - 2009-05-07 08:48:13
|
Revision: 1746 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=1746&view=rev Author: jenslehmann Date: 2009-05-07 08:48:00 +0000 (Thu, 07 May 2009) Log Message: ----------- mapping evaluator Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java Added Paths: ----------- trunk/src/dl-learner/org/dllearner/scripts/matching/Evaluation.java Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java 2009-05-07 08:45:50 UTC (rev 1745) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java 2009-05-07 08:48:00 UTC (rev 1746) @@ -52,7 +52,7 @@ // chose between nt and dat private static String dbpediaFileFormat = "dat"; - private static File dbpediaFile = new File("log/DBpedia_POIs." + dbpediaFileFormat); + static File dbpediaFile = new File("log/DBpedia_POIs." + dbpediaFileFormat); private static boolean regenerateFile = false; private static File matchingFile = new File("log/DBpedia_GeoData_Links.nt"); @@ -60,7 +60,7 @@ private static double scoreThreshold = 0.8; private static StringDistance distance = new Jaro(); - private static SparqlEndpoint dbpediaEndpoint = SparqlEndpoint.getEndpointLOCALDBpedia(); + public static SparqlEndpoint dbpediaEndpoint = SparqlEndpoint.getEndpointLOCALDBpedia(); private static SparqlEndpoint geoDataEndpoint = SparqlEndpoint.getEndpointLOCALGeoData(); // read in DBpedia ontology such that we perform taxonomy reasoning @@ -257,7 +257,7 @@ fos.close(); } - private static URI findGeoDataMatch(DBpediaPoint dbpediaPoint) throws IOException { + public static URI findGeoDataMatch(DBpediaPoint dbpediaPoint) throws IOException { // 1 degree is about 111 km (depending on the specific point) int distanceThresholdMeters = 1000; Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java 2009-05-07 08:45:50 UTC (rev 1745) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java 2009-05-07 08:48:00 UTC (rev 1746) @@ -21,6 +21,11 @@ import java.net.URI; +import org.dllearner.kb.sparql.SparqlQuery; + +import com.hp.hpl.jena.query.QuerySolution; +import com.hp.hpl.jena.query.ResultSet; + /** * A geo location in DBpedia. * @@ -39,6 +44,40 @@ // number of decimals indicates a large object) private int decimalCount; + + /** + * Constructs a DBpedia point using SPARQL. + * @param uri URI of DBpedia resource. + */ + public DBpediaPoint(URI uri) { + super(0,0); + this.uri = uri; + + // construct DBpedia query + String queryStr = "SELECT ?lat, ?long, ?label, ?type WHERE {"; + queryStr += "<"+uri+"> <http://www.w3.org/2003/01/geo/wgs84_pos#lat> ?lat ."; + queryStr += "<"+uri+"> <http://www.w3.org/2003/01/geo/wgs84_pos#long> ?long ."; + queryStr += "?object rdfs:label ?label . "; + queryStr += "OPTIONAL { <"+uri+" rdf:type ?type . "; + queryStr += "FILTER (!(?type LIKE <http://dbpedia.org/ontology/Resource>)) ."; + queryStr += "FILTER (?type LIKE <http://dbpedia.org/ontology/%>) ."; + queryStr += "} }"; + + SparqlQuery query = new SparqlQuery(queryStr, DBpediaLinkedGeoData.dbpediaEndpoint); + ResultSet rs = query.send(); + classes = new String[] { }; + int count = 0; + + while(rs.hasNext()) { + QuerySolution qs = rs.nextSolution(); + geoLat = qs.getLiteral("lat").getDouble(); + geoLong = qs.getLiteral("long").getDouble(); + label = qs.getLiteral("label").getString(); + classes[count] = qs.get("type").toString(); + count++; + } + } + public DBpediaPoint(URI uri, String label, String[] classes, double geoLat, double geoLong, int decimalCount) { super(geoLat,geoLong); this.uri = uri; Added: trunk/src/dl-learner/org/dllearner/scripts/matching/Evaluation.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/Evaluation.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/Evaluation.java 2009-05-07 08:48:00 UTC (rev 1746) @@ -0,0 +1,138 @@ +/** + * Copyright (C) 2007-2009, Jens Lehmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ +package org.dllearner.scripts.matching; + +import java.io.BufferedReader; +import java.io.FileReader; +import java.io.IOException; +import java.net.URI; +import java.util.HashMap; +import java.util.Map; +import java.util.Map.Entry; + +/** + * Performs an evaluation of a matching method method by analising it + * on a test set. + * + * @author Jens Lehmann + * + */ +public class Evaluation { + + private int tests; + private int noMatchCount; + private int correctMatchCount; + private int incorrectMatchCount; + private int matchCount; + private double precision; + private double recall; + + // map from DBpedia to LinkedGeoData + public Evaluation(Map<URI,URI> testMatches) throws IOException { + + tests = 0; + noMatchCount = 0; + correctMatchCount = 0; + incorrectMatchCount = 0; + + for(Entry<URI,URI> match : testMatches.entrySet()) { + // find point in DBpedia file: + // approach 1: + // step 1: locate point in DBpedia file + // step 2: read all information about point + // step 3: write a method converting this information into a DBpedia point + + // "problem": might be good to put all relevant DBpedia and GeoData points in + // memory to efficiently evaluate a lot of parameter settings without + // requiring to perform slow HTTP or SPARQL requests + + DBpediaPoint dbpediaPoint = new DBpediaPoint(match.getKey()); + URI matchedURI = DBpediaLinkedGeoData.findGeoDataMatch(dbpediaPoint); + + URI testURI = match.getValue(); + + // no match found + if(matchedURI == null) { + noMatchCount++; + // correct match found + } else if(matchedURI.equals(testURI)) { + correctMatchCount++; + // incorrect match found + } else { + incorrectMatchCount++; + } + + tests++; + } + + matchCount = correctMatchCount + incorrectMatchCount; + // determine proportion of correct matchings + precision = correctMatchCount / (double) matchCount; + // determine proportion of correct matches + recall = correctMatchCount / (double) tests; + } + + public int getCorrectMatchCount() { + return correctMatchCount; + } + + public int getIncorrectMatchCount() { + return incorrectMatchCount; + } + + public int getMatchCount() { + return matchCount; + } + + public int getNoMatchCount() { + return noMatchCount; + } + + public double getPrecision() { + return precision; + } + + public double getRecall() { + return recall; + } + + public int getTests() { + return tests; + } + + public static void main(String args[]) throws IOException { + // test file + String testFile = "log/geodata/owlsameas_en.dat"; + // map for collecting matches + Map<URI,URI> matches = new HashMap<URI,URI>(); + // read file line by line to collect matches + BufferedReader br = new BufferedReader(new FileReader(testFile)); + String line; + while ((line = br.readLine()) != null) { + String[] tmp = line.split(" "); + matches.put(URI.create(tmp[1]), URI.create(tmp[0])); + } + // perform evaluation and print results + Evaluation eval = new Evaluation(matches); + System.out.println("precision: " + eval.getPrecision()); + System.out.println("recall: " + eval.getRecall()); + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <jen...@us...> - 2009-05-07 10:12:05
|
Revision: 1747 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=1747&view=rev Author: jenslehmann Date: 2009-05-07 10:11:58 +0000 (Thu, 07 May 2009) Log Message: ----------- mapping evaluator ctd. Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java trunk/src/dl-learner/org/dllearner/scripts/matching/Evaluation.java Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java 2009-05-07 08:48:00 UTC (rev 1746) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java 2009-05-07 10:11:58 UTC (rev 1747) @@ -60,6 +60,7 @@ private static double scoreThreshold = 0.8; private static StringDistance distance = new Jaro(); +// public static SparqlEndpoint dbpediaEndpoint = SparqlEndpoint.getEndpointDBpedia(); public static SparqlEndpoint dbpediaEndpoint = SparqlEndpoint.getEndpointLOCALDBpedia(); private static SparqlEndpoint geoDataEndpoint = SparqlEndpoint.getEndpointLOCALGeoData(); @@ -319,6 +320,8 @@ // perform string similarity // (we can use a variety of string matching heuristics) +// System.out.println(label); +// System.out.println(dbpediaPoint); double score = distance.score(label, dbpediaPoint.getLabel()); if(score > highestScore) { highestScore = score; Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java 2009-05-07 08:48:00 UTC (rev 1746) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java 2009-05-07 10:11:58 UTC (rev 1747) @@ -20,6 +20,8 @@ package org.dllearner.scripts.matching; import java.net.URI; +import java.util.LinkedList; +import java.util.List; import org.dllearner.kb.sparql.SparqlQuery; @@ -49,7 +51,7 @@ * Constructs a DBpedia point using SPARQL. * @param uri URI of DBpedia resource. */ - public DBpediaPoint(URI uri) { + public DBpediaPoint(URI uri) throws Exception { super(0,0); this.uri = uri; @@ -57,8 +59,8 @@ String queryStr = "SELECT ?lat, ?long, ?label, ?type WHERE {"; queryStr += "<"+uri+"> <http://www.w3.org/2003/01/geo/wgs84_pos#lat> ?lat ."; queryStr += "<"+uri+"> <http://www.w3.org/2003/01/geo/wgs84_pos#long> ?long ."; - queryStr += "?object rdfs:label ?label . "; - queryStr += "OPTIONAL { <"+uri+" rdf:type ?type . "; + queryStr += "<"+uri+"> rdfs:label ?label . "; + queryStr += "OPTIONAL { <"+uri+"> rdf:type ?type . "; queryStr += "FILTER (!(?type LIKE <http://dbpedia.org/ontology/Resource>)) ."; queryStr += "FILTER (?type LIKE <http://dbpedia.org/ontology/%>) ."; queryStr += "} }"; @@ -66,16 +68,23 @@ SparqlQuery query = new SparqlQuery(queryStr, DBpediaLinkedGeoData.dbpediaEndpoint); ResultSet rs = query.send(); classes = new String[] { }; - int count = 0; + List<String> classList = new LinkedList<String>(); + if(!rs.hasNext()) { + throw new Exception("cannot construct point for " + uri + " (latitude/longitude missing?)"); + } + while(rs.hasNext()) { QuerySolution qs = rs.nextSolution(); geoLat = qs.getLiteral("lat").getDouble(); geoLong = qs.getLiteral("long").getDouble(); label = qs.getLiteral("label").getString(); - classes[count] = qs.get("type").toString(); - count++; + if(qs.contains("type")) { + classList.add(qs.get("type").toString()); + } } + + classes = classList.toArray(classes); } public DBpediaPoint(URI uri, String label, String[] classes, double geoLat, double geoLong, int decimalCount) { Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/Evaluation.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/Evaluation.java 2009-05-07 08:48:00 UTC (rev 1746) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/Evaluation.java 2009-05-07 10:11:58 UTC (rev 1747) @@ -27,6 +27,9 @@ import java.util.Map; import java.util.Map.Entry; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; + /** * Performs an evaluation of a matching method method by analising it * on a test set. @@ -44,6 +47,8 @@ private double precision; private double recall; + private static Logger logger = Logger.getLogger(Evaluation.class); + // map from DBpedia to LinkedGeoData public Evaluation(Map<URI,URI> testMatches) throws IOException { @@ -63,7 +68,15 @@ // memory to efficiently evaluate a lot of parameter settings without // requiring to perform slow HTTP or SPARQL requests - DBpediaPoint dbpediaPoint = new DBpediaPoint(match.getKey()); + logger.trace("searching match for " + match.getKey() + "..."); + + DBpediaPoint dbpediaPoint = null; + try { + dbpediaPoint = new DBpediaPoint(match.getKey()); + } catch (Exception e) { + logger.debug(e.getMessage()); + continue; + } URI matchedURI = DBpediaLinkedGeoData.findGeoDataMatch(dbpediaPoint); URI testURI = match.getValue(); @@ -71,12 +84,15 @@ // no match found if(matchedURI == null) { noMatchCount++; + logger.trace(" ... no match found"); // correct match found } else if(matchedURI.equals(testURI)) { correctMatchCount++; + logger.trace(" ... " + testURI + " correctly detected"); // incorrect match found } else { incorrectMatchCount++; + logger.trace(" ... " + matchedURI + " detected, but " + testURI + " is correct"); } tests++; @@ -118,6 +134,8 @@ } public static void main(String args[]) throws IOException { + + Logger.getRootLogger().setLevel(Level.TRACE); // test file String testFile = "log/geodata/owlsameas_en.dat"; // map for collecting matches @@ -126,8 +144,13 @@ BufferedReader br = new BufferedReader(new FileReader(testFile)); String line; while ((line = br.readLine()) != null) { - String[] tmp = line.split(" "); - matches.put(URI.create(tmp[1]), URI.create(tmp[0])); + String[] tmp = line.split("\t"); +// System.out.println(line); +// for(String test : tmp) { +// System.out.println(test); +// } + + matches.put(URI.create(tmp[1]), URI.create(tmp[0] + "#id")); } // perform evaluation and print results Evaluation eval = new Evaluation(matches); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <jen...@us...> - 2009-05-07 11:41:55
|
Revision: 1748 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=1748&view=rev Author: jenslehmann Date: 2009-05-07 11:41:54 +0000 (Thu, 07 May 2009) Log Message: ----------- mapping evaluator working Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java trunk/src/dl-learner/org/dllearner/scripts/matching/Evaluation.java Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java 2009-05-07 10:11:58 UTC (rev 1747) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java 2009-05-07 11:41:54 UTC (rev 1748) @@ -77,7 +77,13 @@ while(rs.hasNext()) { QuerySolution qs = rs.nextSolution(); geoLat = qs.getLiteral("lat").getDouble(); + if(((Double)geoLat).toString().contains("E")) { + geoLat = 0.0; + } geoLong = qs.getLiteral("long").getDouble(); + if(((Double)geoLong).toString().contains("E")) { + geoLong = 0.0; + } label = qs.getLiteral("label").getString(); if(qs.contains("type")) { classList.add(qs.get("type").toString()); Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/Evaluation.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/Evaluation.java 2009-05-07 10:11:58 UTC (rev 1747) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/Evaluation.java 2009-05-07 11:41:54 UTC (rev 1748) @@ -40,6 +40,7 @@ public class Evaluation { private int tests; + private int discarded; private int noMatchCount; private int correctMatchCount; private int incorrectMatchCount; @@ -53,6 +54,7 @@ public Evaluation(Map<URI,URI> testMatches) throws IOException { tests = 0; + discarded = 0; noMatchCount = 0; correctMatchCount = 0; incorrectMatchCount = 0; @@ -75,6 +77,7 @@ dbpediaPoint = new DBpediaPoint(match.getKey()); } catch (Exception e) { logger.debug(e.getMessage()); + discarded++; continue; } URI matchedURI = DBpediaLinkedGeoData.findGeoDataMatch(dbpediaPoint); @@ -133,6 +136,10 @@ return tests; } + public int getDiscarded() { + return discarded; + } + public static void main(String args[]) throws IOException { Logger.getRootLogger().setLevel(Level.TRACE); @@ -154,6 +161,7 @@ } // perform evaluation and print results Evaluation eval = new Evaluation(matches); + System.out.println(eval.getTests() + " points tested (" + eval.getDiscarded() + " discarded)"); System.out.println("precision: " + eval.getPrecision()); System.out.println("recall: " + eval.getRecall()); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <jen...@us...> - 2009-05-20 08:50:59
|
Revision: 1762 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=1762&view=rev Author: jenslehmann Date: 2009-05-20 08:50:53 +0000 (Wed, 20 May 2009) Log Message: ----------- prepared SPARQL based matching Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java trunk/src/dl-learner/org/dllearner/scripts/matching/Evaluation.java trunk/src/dl-learner/org/dllearner/scripts/matching/Point.java Added Paths: ----------- trunk/src/dl-learner/org/dllearner/scripts/matching/LGDPoint.java trunk/src/dl-learner/org/dllearner/scripts/matching/POIClass.java Removed Paths: ------------- trunk/src/dl-learner/org/dllearner/scripts/matching/OSMPoint.java Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java 2009-05-19 21:05:48 UTC (rev 1761) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java 2009-05-20 08:50:53 UTC (rev 1762) @@ -70,7 +70,7 @@ // true = SPARQL is used for retrieving close points; // false = Triplify spatial extension is used - private static boolean useSparqlForGettingNearbyPoints = false; + private static boolean useSparqlForGettingNearbyPoints = true; public static void main(String[] args) throws IOException { @@ -266,36 +266,87 @@ if(useSparqlForGettingNearbyPoints) { // TODO: convert from meters to lat/long - double distanceThresholdLat = 0.3; - double distanceThresholdLong = 0.3; + double distanceThresholdLat = 0.5; + double distanceThresholdLong = 0.5; + // Triplify: $1= , $2= , $3 = distance in meters + // $box='longitude between CEIL(($2-($3/1000)/abs(cos(radians($1))*111))*10000000) and CEIL(($2+($3/1000)/abs(cos(radians($1))*111))*10000000) + // AND latitude between CEIL(($1-($3/1000/111))*10000000) and CEIL(($1+($3/1000/111))*10000000)'; + // create a box around the point double minLat = dbpediaPoint.getGeoLat() - distanceThresholdLat; double maxLat = dbpediaPoint.getGeoLat() + distanceThresholdLat; double minLong = dbpediaPoint.getGeoLong() - distanceThresholdLong; double maxLong = dbpediaPoint.getGeoLong() + distanceThresholdLong; - // query all points in the box - String queryStr = "select ?point ?lat ?long ?name where { "; + // query all points in the box corresponding to this class + // (we make sure that returned points are in the same POI class) + String queryStr = "select ?point ?lat ?long ?name ?name_en ?name_int where { "; + queryStr += LGDPoint.getSPARQLRestriction(dbpediaPoint.getPoiClass(), "?point"); queryStr += "?point <http://linkedgeodata.org/vocabulary/latitude> ?lat ."; queryStr += "FILTER (xsd:float(?lat) > " + minLat + ") ."; queryStr += "FILTER (xsd:float(?lat) < " + maxLat + ") ."; queryStr += "?point <http://linkedgeodata.org/vocabulary/longitude> ?long ."; queryStr += "FILTER (xsd:float(?long) > " + minLong + ") ."; queryStr += "FILTER (xsd:float(?long) < " + maxLong + ") ."; - queryStr += "?point <http://linkedgeodata.org/vocabulary/name> ?name ."; + queryStr += "?point <http://linkedgeodata.org/vocabulary/name> ?name ."; + queryStr += "OPTIONAL { ?point <http://linkedgeodata.org/vocabulary/name%25en> ?name_en } ."; + queryStr += "OPTIONAL { ?point <http://linkedgeodata.org/vocabulary/name_int> ?name_int } ."; queryStr += "}"; SparqlQuery query = new SparqlQuery(queryStr, geoDataEndpoint); ResultSet rs = query.send(); + double highestScore = 0; + String bestURI = null; + String bestLabel = null; while(rs.hasNext()) { -// QuerySolution qs = rs.nextSolution(); + QuerySolution qs = rs.nextSolution(); // measure string similarity and proximity // TODO: incomplete - } - return null; + + // step 1: string similarity + double stringSimilarity; + // from DBpedia we take the full label and an abbreviated version; + // from LGD we take name, name%25en, name, int_name + String dbpediaLabel1 = dbpediaPoint.getLabel(); + String dbpediaLabel2 = dbpediaPoint.getPlainLabel(); + String lgdLabel1 = qs.getLiteral("name").toString(); + stringSimilarity = distance.score(dbpediaLabel1, lgdLabel1); + stringSimilarity = Math.max(distance.score(dbpediaLabel2, lgdLabel1), stringSimilarity); + if(qs.contains("name_en")) { + String lgdLabel2 = qs.getLiteral("name_en").toString(); + stringSimilarity = distance.score(dbpediaLabel1, lgdLabel2); + stringSimilarity = Math.max(distance.score(dbpediaLabel2, lgdLabel2), stringSimilarity); + } + if(qs.contains("name_int")) { + String lgdLabel3 = qs.getLiteral("name_int").toString(); + stringSimilarity = distance.score(dbpediaLabel1, lgdLabel3); + stringSimilarity = Math.max(distance.score(dbpediaLabel2, lgdLabel3), stringSimilarity); + } + + // step 2: spatial distance + // ... not yet taken into account ... see spatialDistance() method below + + double score = stringSimilarity; + + if(score > highestScore) { + highestScore = score; + bestURI = qs.getResource("point").getURI(); + bestLabel = lgdLabel1; + } + + } + + if(highestScore > scoreThreshold) { + System.out.println("Match: " + highestScore + " " + bestLabel + " (" + bestURI + ")"); + return URI.create(bestURI); + } else { + System.out.println("No match: " + highestScore + " " + bestLabel + " (" + bestURI + ")"); + return null; + } + // use Tripliy spatial extension } else { @@ -349,4 +400,24 @@ } } } + + // returns distance between two points in meters + public static double spatialDistance(double lat1, double long1, double lat2, double long2) { +// $distance='ROUND(1000*1.609 * 3956 * 2 * ASIN(SQRT( POWER(SIN(($1 - latitude/10000000) * pi()/180 / 2), 2) + +// COS($1 * pi()/180) * COS(latitude/10000000 * pi()/180) * POWER(SIN(($2 - longitude/10000000) * pi() +// /180 / 2), 2) ) )) AS distance'; +// double distance = 1000 * 1.609 * 3956 * 2 * +// Math.asin(Math.sqrt(Math.pow(Math.sin((lat1 - lat2)/1000000, b))); + + // implementation according to http://www.movable-type.co.uk/scripts/latlong.html + double r = 6371000; // meters + double dLat = Math.toRadians(lat2-lat1); + double dLon = Math.toRadians(long2-long1); + double a = Math.sin(dLat/2) * Math.sin(dLat/2) + + Math.cos(Math.toRadians(lat1)) * Math.cos(Math.toRadians(lat2)) * + Math.sin(dLon/2) * Math.sin(dLon/2); + double c = 2 * Math.atan2(Math.sqrt(a), Math.sqrt(1-a)); + double distance = r * c; + return distance; + } } Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java 2009-05-19 21:05:48 UTC (rev 1761) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java 2009-05-20 08:50:53 UTC (rev 1762) @@ -22,6 +22,8 @@ import java.net.URI; import java.util.LinkedList; import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.dllearner.kb.sparql.SparqlQuery; @@ -35,8 +37,6 @@ * */ public class DBpediaPoint extends Point { - - private URI uri; private String label; @@ -46,13 +46,14 @@ // number of decimals indicates a large object) private int decimalCount; + Pattern pattern = Pattern.compile("\\w+"); /** * Constructs a DBpedia point using SPARQL. * @param uri URI of DBpedia resource. */ public DBpediaPoint(URI uri) throws Exception { - super(0,0); + super(uri, null, 0,0); this.uri = uri; // construct DBpedia query @@ -91,30 +92,35 @@ } classes = classList.toArray(classes); + poiClass = getPOIClass(classes); } public DBpediaPoint(URI uri, String label, String[] classes, double geoLat, double geoLong, int decimalCount) { - super(geoLat,geoLong); - this.uri = uri; + super(uri, null, geoLat,geoLong); this.label = label; this.classes = classes; this.decimalCount = decimalCount; + poiClass = getPOIClass(classes); } /** - * @return the uri - */ - public URI getUri() { - return uri; - } - - /** * @return the label */ public String getLabel() { return label; } + /** + * + * @return Returns only first characters until a special symbol occurs, i.e. instead + * of "Stretton, Derbyshire" it returns "Stretton". + */ + public String getPlainLabel() { + Matcher matcher = pattern.matcher(label); + matcher.find(); + return label.substring(0, matcher.end()); + } + public String[] getClasses() { return classes; } @@ -134,4 +140,13 @@ } return str + ")"; } + + private POIClass getPOIClass(String[] classes) { + for(String clazz : classes) { + if(clazz.equals("http://dbpedia.org/ontology/City")) { + return POIClass.CITY; + } + } + return null; + } } Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/Evaluation.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/Evaluation.java 2009-05-19 21:05:48 UTC (rev 1761) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/Evaluation.java 2009-05-20 08:50:53 UTC (rev 1762) @@ -72,7 +72,7 @@ // memory to efficiently evaluate a lot of parameter settings without // requiring to perform slow HTTP or SPARQL requests - logger.trace("searching match for " + match.getKey() + "..."); +// logger.trace("searching match for " + match.getKey() + "..."); DBpediaPoint dbpediaPoint = null; try { @@ -82,8 +82,17 @@ discarded++; continue; } - URI matchedURI = DBpediaLinkedGeoData.findGeoDataMatch(dbpediaPoint); + URI matchedURI = null; + + if(dbpediaPoint.getPoiClass() == POIClass.CITY) { + logger.trace("searching match for " + match.getKey() + "..."); + matchedURI = DBpediaLinkedGeoData.findGeoDataMatch(dbpediaPoint); + } else { + System.out.println("skipping"); + continue; + } + URI testURI = match.getValue(); // no match found Copied: trunk/src/dl-learner/org/dllearner/scripts/matching/LGDPoint.java (from rev 1760, trunk/src/dl-learner/org/dllearner/scripts/matching/OSMPoint.java) =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/LGDPoint.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/LGDPoint.java 2009-05-20 08:50:53 UTC (rev 1762) @@ -0,0 +1,58 @@ +/** + * Copyright (C) 2007-2009, Jens Lehmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ +package org.dllearner.scripts.matching; + +import java.net.URI; + +/** + * A LinkedGeoData point. + * + * @author Jens Lehmann + * + */ +public class LGDPoint extends Point { + + private double name; + + public LGDPoint(URI uri, double geoLat, double geoLong) { + super(uri, null, geoLat, geoLong); + } + + /** + * @return the name + */ + public double getName() { + return name; + } + + /** + * @param name the name to set + */ + public void setName(double name) { + this.name = name; + } + + public static String getSPARQLRestriction(POIClass poiClass, String variable) { + switch(poiClass) { + case CITY : return variable + " <http://linkedgeodata.org/vocabulary#amenity> \"city\" ."; + default: throw new Error("Cannot restrict."); + } + } +} Property changes on: trunk/src/dl-learner/org/dllearner/scripts/matching/LGDPoint.java ___________________________________________________________________ Added: svn:mergeinfo + Deleted: trunk/src/dl-learner/org/dllearner/scripts/matching/OSMPoint.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/OSMPoint.java 2009-05-19 21:05:48 UTC (rev 1761) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/OSMPoint.java 2009-05-20 08:50:53 UTC (rev 1762) @@ -1,96 +0,0 @@ -/** - * Copyright (C) 2007-2009, Jens Lehmann - * - * This file is part of DL-Learner. - * - * DL-Learner is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3 of the License, or - * (at your option) any later version. - * - * DL-Learner is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - */ -package org.dllearner.scripts.matching; - -/** - * @author Jens Lehmann - * - */ -public class OSMPoint { - - private long id; - - private double geoLat; - - private double geoLong; - - private double name; - - public OSMPoint(long id) { - this.id = id; - } - - /** - * @return the geoLat - */ - public double getGeoLat() { - return geoLat; - } - - /** - * @param geoLat the geoLat to set - */ - public void setGeoLat(double geoLat) { - this.geoLat = geoLat; - } - - /** - * @return the geoLong - */ - public double getGeoLong() { - return geoLong; - } - - /** - * @param geoLong the geoLong to set - */ - public void setGeoLong(double geoLong) { - this.geoLong = geoLong; - } - - /** - * @return the name - */ - public double getName() { - return name; - } - - /** - * @param name the name to set - */ - public void setName(double name) { - this.name = name; - } - - /** - * @return the id - */ - public long getId() { - return id; - } - - /** - * @param id the id to set - */ - public void setId(long id) { - this.id = id; - } - -} Added: trunk/src/dl-learner/org/dllearner/scripts/matching/POIClass.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/POIClass.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/POIClass.java 2009-05-20 08:50:53 UTC (rev 1762) @@ -0,0 +1,33 @@ +/** + * Copyright (C) 2007-2009, Jens Lehmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ +package org.dllearner.scripts.matching; + +/** + * Contains all types of points of interests (POIs) we are + * interested in. + * + * @author Jens Lehmann + * + */ +public enum POIClass { + + CITY, COUNTRY + +} Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/Point.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/Point.java 2009-05-19 21:05:48 UTC (rev 1761) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/Point.java 2009-05-20 08:50:53 UTC (rev 1762) @@ -19,6 +19,8 @@ */ package org.dllearner.scripts.matching; +import java.net.URI; + /** * A geo location. * @@ -26,12 +28,18 @@ * */ public class Point { - + protected double geoLat; protected double geoLong; - public Point(double geoLat, double geoLong) { + protected URI uri; + + protected POIClass poiClass; + + public Point(URI uri, POIClass poiClass, double geoLat, double geoLong) { + this.uri = uri; + this.poiClass = poiClass; this.geoLat = geoLat; this.geoLong = geoLong; } @@ -44,4 +52,12 @@ return geoLong; } + public URI getUri() { + return uri; + } + + public POIClass getPoiClass() { + return poiClass; + } + } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <jen...@us...> - 2009-05-25 20:35:01
|
Revision: 1769 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=1769&view=rev Author: jenslehmann Date: 2009-05-25 20:34:26 +0000 (Mon, 25 May 2009) Log Message: ----------- adjustments for full matching run Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java trunk/src/dl-learner/org/dllearner/scripts/matching/POIClass.java Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java 2009-05-25 17:29:33 UTC (rev 1768) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java 2009-05-25 20:34:26 UTC (rev 1769) @@ -30,13 +30,15 @@ import java.net.URLConnection; import java.util.Collection; import java.util.Date; +import java.util.HashMap; import java.util.LinkedList; +import java.util.Map; +import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.dllearner.kb.sparql.Cache; import org.dllearner.kb.sparql.SPARQLTasks; import org.dllearner.kb.sparql.SparqlEndpoint; -import org.dllearner.kb.sparql.SparqlQuery; import org.dllearner.utilities.Files; import com.hp.hpl.jena.query.QuerySolution; @@ -73,6 +75,9 @@ private static SparqlEndpoint geoDataEndpoint = SparqlEndpoint.getEndpointLOCALGeoData(); private static SPARQLTasks lgd = new SPARQLTasks(new Cache("cache/lgd/"), geoDataEndpoint); + private static Map<POIClass, Integer> noMatchPerClass = new HashMap<POIClass, Integer>(); + private static Map<POIClass, Integer> matchPerClass = new HashMap<POIClass, Integer>(); + // read in DBpedia ontology such that we perform taxonomy reasoning // private static ReasonerComponent reasoner = TestOntologies.getTestOntology(TestOntology.DBPEDIA_OWL); // private static ClassHierarchy hierarchy = reasoner.getClassHierarchy(); @@ -83,11 +88,18 @@ public static void main(String[] args) throws IOException { + Logger.getRootLogger().setLevel(Level.WARN); + // download all objects having geo-coordinates from DBpedia if necessary if(!dbpediaFile.exists() || regenerateFile) { createDBpediaFile(); } + for(POIClass poiClass : POIClass.values()) { + matchPerClass.put(poiClass, 0); + noMatchPerClass.put(poiClass, 0); + } + Files.clearFile(matchingFile); Files.clearFile(missesFile); FileOutputStream fos = new FileOutputStream(matchingFile, true); @@ -100,6 +112,7 @@ // temporary variables needed while reading in file int itemCount = 0; + int skipCount = 0; URI uri = null; String label = null; String[] classes = null; @@ -111,23 +124,30 @@ if(line.isEmpty()) { DBpediaPoint dp = new DBpediaPoint(uri, label, classes, geoLat, geoLong, decimalCount); + POIClass poiClass = dp.getPoiClass(); - // find match (we assume there is exactly one match) - URI matchURI = findGeoDataMatch(dp); - if(matchURI == null) { - String missStr = dp.toString() + "\n"; - fosMiss.write(missStr.getBytes()); + if(poiClass != null) { + // find match (we assume there is exactly one match) + URI matchURI = findGeoDataMatch(dp); + if(matchURI == null) { + String missStr = dp.toString() + "\n"; + fosMiss.write(missStr.getBytes()); + noMatchPerClass.put(poiClass, noMatchPerClass.get(poiClass)+1); + } else { + String matchStr = "<" + dp.getUri() + "> <http://www.w3.org/2002/07/owl#sameAs> <" + matchURI + "> .\n"; + fos.write(matchStr.getBytes()); + matches++; + matchPerClass.put(poiClass, noMatchPerClass.get(poiClass)+1); + } + counter++; + + if(counter % 100 == 0) { + System.out.println(new Date().toString() + ": " + counter + " points processed. " + matches + " matches found. " + skipCount + " POIs skipped."); + } } else { - String matchStr = "<" + dp.getUri() + "> <http://www.w3.org/2002/07/owl#sameAs> <" + matchURI + "> .\n"; - fos.write(matchStr.getBytes()); - matches++; + skipCount++; } - counter++; - if(counter % 1000 == 0) { - System.out.println(new Date().toString() + ": " + counter + " points processed. " + matches + " matches found."); - } - itemCount = 0; } else { switch(itemCount) { @@ -159,6 +179,21 @@ } br.close(); fos.close(); + + + + for(POIClass poiClass : POIClass.values()) { + System.out.println(); + System.out.println("summary for POI class " + poiClass + ":"); + System.out.println("matches " + matchPerClass.get(poiClass)); + System.out.println("no matches " + noMatchPerClass.get(poiClass)); + } + + System.out.println(""); + System.out.println("Overall summary:"); + System.out.println(skipCount + " POIs skipped (no classification available)"); + System.out.println(counter + " POIs processed"); + System.out.println(matches + " matches found"); } // downloads information about DBpedia into a separate file Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/POIClass.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/POIClass.java 2009-05-25 17:29:33 UTC (rev 1768) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/POIClass.java 2009-05-25 20:34:26 UTC (rev 1769) @@ -44,11 +44,11 @@ RAILWAY_STATION (10000), - // 1000 km box - LAKE (1000000000), + // 200 km box (largest lake is the Caspian Sea with 1200 km length) + LAKE (200000), - // 5000 km box - COUNTRY (5000000); + // 2000 km box (Russia has radius 4000 km) + COUNTRY (2000000); private double maxBox; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <jen...@us...> - 2009-05-26 11:02:27
|
Revision: 1770 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=1770&view=rev Author: jenslehmann Date: 2009-05-26 11:02:20 +0000 (Tue, 26 May 2009) Log Message: ----------- added debug output Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java trunk/src/dl-learner/org/dllearner/scripts/matching/LearnOSMClasses.java Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java 2009-05-25 20:34:26 UTC (rev 1769) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java 2009-05-26 11:02:20 UTC (rev 1770) @@ -28,6 +28,7 @@ import java.net.URI; import java.net.URL; import java.net.URLConnection; +import java.text.DecimalFormat; import java.util.Collection; import java.util.Date; import java.util.HashMap; @@ -78,6 +79,11 @@ private static Map<POIClass, Integer> noMatchPerClass = new HashMap<POIClass, Integer>(); private static Map<POIClass, Integer> matchPerClass = new HashMap<POIClass, Integer>(); + private static DecimalFormat df = new DecimalFormat(); + private static int skipCount = 0; + private static int counter = 0; + private static int matches = 0; + // read in DBpedia ontology such that we perform taxonomy reasoning // private static ReasonerComponent reasoner = TestOntologies.getTestOntology(TestOntology.DBPEDIA_OWL); // private static ClassHierarchy hierarchy = reasoner.getClassHierarchy(); @@ -107,12 +113,9 @@ // read file point by point BufferedReader br = new BufferedReader(new FileReader(dbpediaFile)); String line; - int counter = 0; - int matches = 0; // temporary variables needed while reading in file int itemCount = 0; - int skipCount = 0; URI uri = null; String label = null; String[] classes = null; @@ -141,8 +144,9 @@ } counter++; - if(counter % 100 == 0) { - System.out.println(new Date().toString() + ": " + counter + " points processed. " + matches + " matches found. " + skipCount + " POIs skipped."); + if(counter % 1000 == 0) { +// System.out.println(new Date().toString() + ": " + counter + " points processed. " + matches + " matches found. " + skipCount + " POIs skipped."); + printSummary(); } } else { skipCount++; @@ -180,20 +184,27 @@ br.close(); fos.close(); + printSummary(); + System.out.println("Finished Successfully."); + } + + private static void printSummary() { + System.out.println("Summary at date " + new Date().toString()); - for(POIClass poiClass : POIClass.values()) { - System.out.println(); - System.out.println("summary for POI class " + poiClass + ":"); - System.out.println("matches " + matchPerClass.get(poiClass)); - System.out.println("no matches " + noMatchPerClass.get(poiClass)); + double per = matchPerClass.get(poiClass)/(double)(matchPerClass.get(poiClass)+noMatchPerClass.get(poiClass)); + System.out.println("POI class " + poiClass + ": " + matchPerClass.get(poiClass) + " matches found, " + df.format(per) + "% of DBpedia POIs matched" ); } - System.out.println(""); - System.out.println("Overall summary:"); - System.out.println(skipCount + " POIs skipped (no classification available)"); - System.out.println(counter + " POIs processed"); - System.out.println(matches + " matches found"); +// System.out.println(""); + System.out.println("Overall:"); + int total = skipCount + matches; + double skipFreq = skipCount/(double)total; + double countFreq = counter/(double)total; + double matchFreq = matches/(double)total; + System.out.println(skipCount + " POIs skipped (cannot be assigned to a POI class) = " + df.format(skipFreq) + "%"); + System.out.println(counter + " POIs processed = " + df.format(countFreq) + "%"); + System.out.println(matches + " matches found = " + df.format(matchFreq) + "%"); } // downloads information about DBpedia into a separate file Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/LearnOSMClasses.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/LearnOSMClasses.java 2009-05-25 20:34:26 UTC (rev 1769) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/LearnOSMClasses.java 2009-05-26 11:02:20 UTC (rev 1770) @@ -54,7 +54,7 @@ public class LearnOSMClasses { private static SparqlEndpoint dbpediaEndpoint = SparqlEndpoint.getEndpointLOCALDBpedia(); - private static SparqlEndpoint geoDataEndpoint = SparqlEndpoint.getEndpointLOCALGeoData(); +// private static SparqlEndpoint geoDataEndpoint = SparqlEndpoint.getEndpointLOCALGeoData(); public static void main(String args[]) throws IOException, DataFormatException, LearningProblemUnsupportedException, ComponentInitException { File matchesFile = new File("log/geodata/owlsameas_en.dat"); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <jen...@us...> - 2009-05-26 14:22:19
|
Revision: 1771 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=1771&view=rev Author: jenslehmann Date: 2009-05-26 14:22:14 +0000 (Tue, 26 May 2009) Log Message: ----------- support for matching mountains, islands, stadiums, rivers, radio stations, lighthouses collection of stats Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java trunk/src/dl-learner/org/dllearner/scripts/matching/LGDPoint.java trunk/src/dl-learner/org/dllearner/scripts/matching/POIClass.java Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java 2009-05-26 11:02:20 UTC (rev 1770) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java 2009-05-26 14:22:14 UTC (rev 1771) @@ -142,13 +142,15 @@ matches++; matchPerClass.put(poiClass, noMatchPerClass.get(poiClass)+1); } +// System.out.println(poiClass); counter++; if(counter % 1000 == 0) { // System.out.println(new Date().toString() + ": " + counter + " points processed. " + matches + " matches found. " + skipCount + " POIs skipped."); printSummary(); - } + } } else { +// System.out.println(dp.getUri() + " " + dp.getClasses()); skipCount++; } @@ -192,16 +194,17 @@ System.out.println("Summary at date " + new Date().toString()); for(POIClass poiClass : POIClass.values()) { - double per = matchPerClass.get(poiClass)/(double)(matchPerClass.get(poiClass)+noMatchPerClass.get(poiClass)); - System.out.println("POI class " + poiClass + ": " + matchPerClass.get(poiClass) + " matches found, " + df.format(per) + "% of DBpedia POIs matched" ); + int classTests = matchPerClass.get(poiClass)+noMatchPerClass.get(poiClass); + double per = classTests == 0 ? 0 : 100 * matchPerClass.get(poiClass)/(double)(classTests); + System.out.println("POI class " + poiClass + ": " + matchPerClass.get(poiClass) + " matches found, " + df.format(per) + "% match rate" ); } // System.out.println(""); System.out.println("Overall:"); - int total = skipCount + matches; - double skipFreq = skipCount/(double)total; - double countFreq = counter/(double)total; - double matchFreq = matches/(double)total; + int total = skipCount + counter; + double skipFreq = 100*skipCount/(double)total; + double countFreq = 100*counter/(double)total; + double matchFreq = 100*matches/(double)total; System.out.println(skipCount + " POIs skipped (cannot be assigned to a POI class) = " + df.format(skipFreq) + "%"); System.out.println(counter + " POIs processed = " + df.format(countFreq) + "%"); System.out.println(matches + " matches found = " + df.format(matchFreq) + "%"); Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java 2009-05-26 11:02:20 UTC (rev 1770) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java 2009-05-26 14:22:14 UTC (rev 1771) @@ -148,7 +148,7 @@ } // maps classes (in DBpedia ontology or otherwise) to supported POI classes - private POIClass getPOIClass(String[] classes) { + public static POIClass getPOIClass(String[] classes) { for(String clazz : classes) { // System.out.println("class: " + clazz); if(clazz.equals("http://dbpedia.org/ontology/City")) { @@ -169,6 +169,20 @@ return POIClass.AIRPORT; } else if(clazz.equals("http://dbpedia.org/ontology/Station")) { return POIClass.RAILWAY_STATION; + } else if(clazz.equals("http://dbpedia.org/ontology/Stadium")) { + return POIClass.STADIUM; + } else if(clazz.equals("http://dbpedia.org/ontology/Island")) { + return POIClass.ISLAND; + } else if(clazz.equals("http://dbpedia.org/ontology/River")) { + return POIClass.RIVER; + } else if(clazz.equals("http://dbpedia.org/ontology/Bridge")) { + return POIClass.BRIDGE; + } else if(clazz.equals("http://dbpedia.org/ontology/Mountain")) { + return POIClass.MOUNTAIN; + } else if(clazz.equals("http://umbel.org/umbel/sc/Mountain")) { + return POIClass.MOUNTAIN; + } else if(clazz.equals("http://dbpedia.org/ontology/Lighthouse")) { + return POIClass.LIGHT_HOUSE; } } return null; Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/LGDPoint.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/LGDPoint.java 2009-05-26 11:02:20 UTC (rev 1770) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/LGDPoint.java 2009-05-26 14:22:14 UTC (rev 1771) @@ -58,6 +58,13 @@ case LAKE : return variable + " <http://linkedgeodata.org/vocabulary#natural> \"water\" . "; case COUNTRY : return variable + " <http://linkedgeodata.org/vocabulary#place> \"country\" . "; case RAILWAY_STATION : return variable + " <http://linkedgeodata.org/vocabulary#railway> \"station\" . "; + case ISLAND : return variable + " <http://linkedgeodata.org/vocabulary#place> \"island\" . "; + case STADIUM : return variable + " <http://linkedgeodata.org/vocabulary#leisure> \"stadium\" . "; + case RIVER : return variable + " <http://linkedgeodata.org/vocabulary#waterway> ?something . "; + case BRIDGE : return variable + " <http://linkedgeodata.org/vocabulary#bridge> ?something . "; + case MOUNTAIN : return variable + " <http://linkedgeodata.org/vocabulary#natural> \"peak\" . "; + case RADIO_STATION : return variable + " <http://linkedgeodata.org/vocabulary#amenity> \"studio\" . "; + case LIGHT_HOUSE : return variable + " <http://linkedgeodata.org/vocabulary#man_made> \"lighthouse\" . "; default: throw new Error("Cannot restrict."); } } Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/POIClass.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/POIClass.java 2009-05-26 11:02:20 UTC (rev 1770) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/POIClass.java 2009-05-26 14:22:14 UTC (rev 1771) @@ -47,6 +47,27 @@ // 200 km box (largest lake is the Caspian Sea with 1200 km length) LAKE (200000), + // 1 km box + BRIDGE (1000), + + // 10 km box + MOUNTAIN (10000), + + // 10000 km box (continents?) + ISLAND (10000000), + + // 1 km box + STADIUM (1000), + + // 1000 km box + RIVER (1000000), + + // 1 km box + RADIO_STATION (1000), + + // 1 km box + LIGHT_HOUSE (1000), + // 2000 km box (Russia has radius 4000 km) COUNTRY (2000000); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <jen...@us...> - 2009-05-28 14:21:44
|
Revision: 1779 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=1779&view=rev Author: jenslehmann Date: 2009-05-28 14:21:36 +0000 (Thu, 28 May 2009) Log Message: ----------- matching refined Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java trunk/src/dl-learner/org/dllearner/scripts/matching/Evaluation.java Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java 2009-05-28 12:28:40 UTC (rev 1778) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java 2009-05-28 14:21:36 UTC (rev 1779) @@ -84,7 +84,10 @@ private static int skipCount = 0; private static int counter = 0; private static int matches = 0; + private static Date startDate; + private static final int totalPOICount = 328232; + // read in DBpedia ontology such that we perform taxonomy reasoning // private static ReasonerComponent reasoner = TestOntologies.getTestOntology(TestOntology.DBPEDIA_OWL); // private static ClassHierarchy hierarchy = reasoner.getClassHierarchy(); @@ -124,6 +127,8 @@ Double geoLat = null; Double geoLong = null; + startDate = new Date(); + System.out.println("Start matching process at date " + startDate); while ((line = br.readLine()) != null) { if(line.isEmpty()) { @@ -192,12 +197,13 @@ } private static void printSummary() { - System.out.println("Summary at date " + new Date().toString()); + Date currDate = new Date(); + System.out.println("Summary at date " + currDate.toString()); for(POIClass poiClass : POIClass.values()) { int classTests = matchPerClass.get(poiClass)+noMatchPerClass.get(poiClass); - double per = classTests == 0 ? 0 : 100 * matchPerClass.get(poiClass)/(double)(classTests); - System.out.println("POI class " + poiClass + ": " + matchPerClass.get(poiClass) + " matches found, " + df.format(per) + "% match rate" ); + double per = (classTests == 0) ? 0 : 100 * matchPerClass.get(poiClass)/(double)(classTests); + System.out.println("POI class " + getFixedLengthString(poiClass,15) + ": " + getFixedLengthString(matchPerClass.get(poiClass),5) + " matches found from " + getFixedLengthString(classTests,5) + " POIs = " + df.format(per) + "% match rate" ); } // System.out.println(""); @@ -206,11 +212,31 @@ double skipFreq = 100*skipCount/(double)total; double countFreq = 100*counter/(double)total; double matchFreq = 100*matches/(double)total; + double matchCountFreq = 100*matches/(double)counter; + long diffMs = currDate.getTime() - startDate.getTime(); + long diffMinutes = diffMs / (60 * 1000); + long diffHours = diffMs / (60 * 60 * 1000); + double pointPercentage = total / (double) totalPOICount; + double pointsPerMs = total / (double) diffMs; + double pointsPerHour = 3600 * 1000 * pointsPerMs; + long estimatedMs = totalPOICount * diffMs / total; + Date estimatedDate = new Date(startDate.getTime() + estimatedMs); + System.out.println("algorithm runtime: " + diffHours + " hours " + diffMinutes + " minutes, estimated to finish at " + estimatedDate); + System.out.println(df.format(pointPercentage) + "% of points skipped or processed = " + df.format(pointsPerHour) + " points per hour"); System.out.println(skipCount + " POIs skipped (cannot be assigned to a POI class) = " + df.format(skipFreq) + "%"); System.out.println(counter + " POIs processed = " + df.format(countFreq) + "%"); - System.out.println(matches + " matches found = " + df.format(matchFreq) + "%"); + System.out.println(matches + " matches found = " + df.format(matchCountFreq) + "% of processed POIs, " + df.format(matchFreq) + "% of all POIs"); + System.out.println(); } + private static String getFixedLengthString(Object object, int length) { + String str = object.toString(); + for(int i = str.length(); i < length; i++ ) { + str = " " + str; + } + return str; + } + // downloads information about DBpedia into a separate file private static void createDBpediaFile() throws IOException { @@ -350,7 +376,11 @@ double minLat = dbpediaPoint.getGeoLat()-(distanceThresholdMeters/1000/111); double maxLat = dbpediaPoint.getGeoLat()+(distanceThresholdMeters/1000/111); double minLong = dbpediaPoint.getGeoLong()-(distanceThresholdMeters/1000)/Math.abs(Math.cos(Math.toRadians(dbpediaPoint.getGeoLat()))*111); - double maxLong = dbpediaPoint.getGeoLong()+(distanceThresholdMeters/1000)/Math.abs(Math.cos(Math.toRadians(dbpediaPoint.getGeoLat()))*111); + double maxLong = dbpediaPoint.getGeoLong()+(distanceThresholdMeters/1000)/Math.abs(Math.cos(Math.toRadians(dbpediaPoint.getGeoLat()))*111); + +// System.out.println("lat: " + minLat + " < " + dbpediaPoint.getGeoLat() + " < " + maxLat); +// System.out.println("long: " + minLong + " < " + dbpediaPoint.getGeoLong() + " < " + maxLong); + // query all points in the box corresponding to this class // (we make sure that returned points are in the same POI class) String queryStr = "select ?point ?lat ?long ?name ?name_en ?name_int where { "; @@ -398,7 +428,9 @@ if(qs.contains("name_en")) { String lgdLabel2 = qs.getLiteral("name_en").toString(); stringSimilarity = distance.score(dbpediaLabel1, lgdLabel2); - stringSimilarity = Math.max(distance.score(dbpediaLabel2, lgdLabel2), stringSimilarity); + stringSimilarity = Math.max(distance.score(dbpediaLabel2, lgdLabel2), stringSimilarity); + System.out.println(qs.getResource("point").getURI()); + System.exit(0); } if(qs.contains("name_int")) { String lgdLabel3 = qs.getLiteral("name_int").toString(); Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/Evaluation.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/Evaluation.java 2009-05-28 12:28:40 UTC (rev 1778) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/Evaluation.java 2009-05-28 14:21:36 UTC (rev 1779) @@ -94,6 +94,7 @@ try { dbpediaPoint = new DBpediaPoint(match.getKey()); } catch (Exception e) { +// System.out.println("discarded: " + match.getKey()); logger.debug(e.getMessage()); discarded++; continue; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <jen...@us...> - 2009-05-29 07:46:49
|
Revision: 1780 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=1780&view=rev Author: jenslehmann Date: 2009-05-29 07:46:41 +0000 (Fri, 29 May 2009) Log Message: ----------- further fine tuning of geo matching Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java trunk/src/dl-learner/org/dllearner/scripts/matching/POIClass.java Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java 2009-05-28 14:21:36 UTC (rev 1779) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java 2009-05-29 07:46:41 UTC (rev 1780) @@ -214,9 +214,9 @@ double matchFreq = 100*matches/(double)total; double matchCountFreq = 100*matches/(double)counter; long diffMs = currDate.getTime() - startDate.getTime(); - long diffMinutes = diffMs / (60 * 1000); long diffHours = diffMs / (60 * 60 * 1000); - double pointPercentage = total / (double) totalPOICount; + long diffMinutes = diffMs / (60 * 1000) - diffHours * 60; + double pointPercentage = 100 * total / (double) totalPOICount; double pointsPerMs = total / (double) diffMs; double pointsPerHour = 3600 * 1000 * pointsPerMs; long estimatedMs = totalPOICount * diffMs / total; @@ -396,7 +396,9 @@ queryStr += "OPTIONAL { ?point <http://linkedgeodata.org/vocabulary#name_int> ?name_int } ."; // filter out ways => we assume that it is always better to match a point and not a way // (if there is a way, there should also be a point but not vice versa) -// queryStr += "FILTER (?point LIKE <http://linkedgeodata.org/triplify/node/%>) ."; + // => according to OSM data model, ways do not have longitude/latitude, so we should + // always match nodes and not ways (TODO: discuss with Soeren) + queryStr += "FILTER (?point LIKE <http://linkedgeodata.org/triplify/node/%>) ."; queryStr += "}"; // SparqlQuery query = new SparqlQuery(queryStr, geoDataEndpoint); @@ -408,10 +410,8 @@ String bestLabel = null; while(rs.hasNext()) { QuerySolution qs = rs.nextSolution(); + String lgdURI = qs.getResource("point").toString(); - // measure string similarity and proximity - // TODO: incomplete - // step 1: string similarity double stringSimilarity; // from DBpedia we take the full label and an abbreviated version; @@ -442,18 +442,24 @@ double lat = qs.getLiteral("lat").getDouble(); double lon = qs.getLiteral("long").getDouble(); double distance = spatialDistance(dbpediaPoint.getGeoLat(), dbpediaPoint.getGeoLong(), lat, lon); - double frac = distance / dbpediaPoint.getPoiClass().getMaxBox(); - double distanceScore = Math.pow(frac-1,4); + double frac = Math.min(1,distance / dbpediaPoint.getPoiClass().getMaxBox()); + double distanceScore = Math.pow(frac-1,2); +// System.out.println(dbpediaPoint.getPoiClass().getMaxBox()); +// System.out.println(distance); +// System.out.println(frac); +// System.out.println(distanceScore); +// System.out.println("==============="); + double score = 0.8 * stringSimilarity + 0.2 * distanceScore; // if there is a node and a way, we prefer the node (better representative) - if(qs.getResource("point").toString().contains("/way/")) { - score -= 0.02; - } +// if(lgdURI.contains("/way/")) { +// score -= 0.02; +// } if(score > highestScore) { highestScore = score; - bestURI = qs.getResource("point").getURI(); + bestURI = lgdURI; bestLabel = lgdLabel1; } Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/POIClass.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/POIClass.java 2009-05-28 14:21:36 UTC (rev 1779) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/POIClass.java 2009-05-29 07:46:41 UTC (rev 1780) @@ -31,8 +31,8 @@ // 50 km box CITY (50000), - // 5 km box - AIRPORT (5000), + // 10 km box + AIRPORT (10000), // 10 km box UNIVERSITY (10000), @@ -53,11 +53,11 @@ // 10 km box MOUNTAIN (10000), - // 10000 km box (continents?) - ISLAND (10000000), + // 1000 km box (continents are not counted as islands in UMBEL and DBpedia ontology) + ISLAND (1000000), - // 1 km box - STADIUM (1000), + // 2 km box + STADIUM (2000), // 1000 km box RIVER (1000000), This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <jen...@us...> - 2009-05-29 15:44:52
|
Revision: 1782 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=1782&view=rev Author: jenslehmann Date: 2009-05-29 15:44:38 +0000 (Fri, 29 May 2009) Log Message: ----------- matching adjustments and small bug fix Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java 2009-05-29 15:22:25 UTC (rev 1781) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java 2009-05-29 15:44:38 UTC (rev 1782) @@ -135,6 +135,8 @@ DBpediaPoint dp = new DBpediaPoint(uri, label, classes, geoLat, geoLong, decimalCount); POIClass poiClass = dp.getPoiClass(); +// System.out.println("DBpedia Point: " + dp); + if(poiClass != null) { // find match (we assume there is exactly one match) URI matchURI = findGeoDataMatch(dp); @@ -165,7 +167,13 @@ switch(itemCount) { case 0 : uri = URI.create(line); break; case 1 : label = line; break; - case 2 : classes = line.substring(1, line.length()).split(","); break; + case 2 : line = line.substring(1, line.length()-1); // strip brackets + if(line.length()>1) { + classes = line.split(","); + } else { + classes = new String[0]; + } + break; case 3 : geoLat = new Double(line); // we avoid "computerized scientific notation" e.g. 9.722222457639873E-4 @@ -398,7 +406,7 @@ // (if there is a way, there should also be a point but not vice versa) // => according to OSM data model, ways do not have longitude/latitude, so we should // always match nodes and not ways (TODO: discuss with Soeren) - queryStr += "FILTER (?point LIKE <http://linkedgeodata.org/triplify/node/%>) ."; +// queryStr += "FILTER (?point LIKE <http://linkedgeodata.org/triplify/node/%>) ."; queryStr += "}"; // SparqlQuery query = new SparqlQuery(queryStr, geoDataEndpoint); @@ -453,9 +461,9 @@ double score = 0.8 * stringSimilarity + 0.2 * distanceScore; // if there is a node and a way, we prefer the node (better representative) -// if(lgdURI.contains("/way/")) { -// score -= 0.02; -// } + if(lgdURI.contains("/way/")) { + score -= 0.02; + } if(score > highestScore) { highestScore = score; Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java 2009-05-29 15:22:25 UTC (rev 1781) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java 2009-05-29 15:44:38 UTC (rev 1782) @@ -140,7 +140,7 @@ @Override public String toString() { - String str = uri + ", \"" + label + "\", " + geoLat + ", " + geoLong + " (classes: "; + String str = uri + ", \"" + label + "\", " + geoLat + ", " + geoLong + " (" + classes.length + " classes: "; for(String clazz : classes) { str += clazz + " "; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |