From: <jen...@us...> - 2009-04-20 17:32:21
|
Revision: 1720 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=1720&view=rev Author: jenslehmann Date: 2009-04-20 17:32:06 +0000 (Mon, 20 Apr 2009) Log Message: ----------- continued ontology matching Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java Added Paths: ----------- trunk/src/dl-learner/org/dllearner/scripts/matching/Point.java Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java 2009-04-20 15:12:11 UTC (rev 1719) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java 2009-04-20 17:32:06 UTC (rev 1720) @@ -22,13 +22,10 @@ import java.io.BufferedReader; import java.io.File; import java.io.FileOutputStream; +import java.io.FileReader; import java.io.IOException; -import java.io.InputStreamReader; import java.net.URI; -import java.net.URL; -import java.net.URLConnection; -import org.dllearner.kb.sparql.SPARQLTasks; import org.dllearner.kb.sparql.SparqlEndpoint; import org.dllearner.kb.sparql.SparqlQuery; @@ -44,9 +41,13 @@ */ public class DBpediaLinkedGeoData { - private static File dbpediaFile = new File("log/DBpedia_POIs.nt"); + // chose between nt and csv + private static String dbpediaFileFormat = "csv"; + private static File dbpediaFile = new File("log/DBpedia_POIs." + dbpediaFileFormat); private static boolean regenerateFile = false; + private static File matchingFile = new File("log/DBpedia_GeoData_Links.nt"); + private static SparqlEndpoint dbpediaEndpoint = SparqlEndpoint.getEndpointLOCALDBpedia(); private static SparqlEndpoint geoDataEndpoint = SparqlEndpoint.getEndpointLOCALGeoData(); @@ -57,56 +58,37 @@ createDBpediaFile(); } + FileOutputStream fos = new FileOutputStream(matchingFile, true); // read file point by point - // for each point: call match method - - System.exit(0); - - // we start from the DBpedia URI and try to find the corresponding - // OSM URI (assuming that each location having coordinates in Wikipedia also - // exists in OSM) - URI dbpediaURI = URI.create("http://dbpedia.org/resource/Auerbachs_Keller"); - - int distanceThresholdMeters = 100; - - // use official DBpedia endpoint (switch to db0 later) - SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpedia(); - SPARQLTasks st = new SPARQLTasks(endpoint); - - // query latitude and longitude - String query = "SELECT ?lat ?long WHERE { "; - query += "<" + dbpediaURI + "> <http://www.w3.org/2003/01/geo/wgs84_pos#lat> ?lat ."; - query += "<" + dbpediaURI + "> <http://www.w3.org/2003/01/geo/wgs84_pos#long> ?long . } LIMIT 1"; - - // perform query and read lat and long from results - ResultSet results = st.queryAsResultSet(query); - QuerySolution qs = results.nextSolution(); - String geoLat = qs.getLiteral("lat").getString(); - String geoLong = qs.getLiteral("long").getString(); - - System.out.println("lat: " + geoLat + ", long: " + geoLong); - - URL linkedGeoDataURL = new URL("http://linkedgeodata.org/triplify/near/"+geoLat+","+geoLong+"/"+distanceThresholdMeters); - - // TODO: replace by SPARQL query - - URLConnection conn = linkedGeoDataURL.openConnection(); - BufferedReader rd = new BufferedReader(new InputStreamReader(conn.getInputStream())); - StringBuffer sb = new StringBuffer(); - String line=""; -// int pointID = 0; - while ((line = rd.readLine()) != null) - { - if(line.contains("Auerbach")) { - System.out.println(line); + BufferedReader br = new BufferedReader(new FileReader(dbpediaFile)); + String line; + int counter = 0; + int matches = 0; + while ((line = br.readLine()) != null) { + + // read line and convert it into an object + String[] parts = line.split(","); + URI uri = URI.create(parts[0]); + String label = parts[1]; + double geoLat = new Double(parts[2]); + double geoLong = new Double(parts[3]); + DBpediaPoint dp = new DBpediaPoint(uri, label, geoLat, geoLong); + + // find match (we assume there is exactly one match) + URI matchURI = findGeoDataMatch(dp); + if(matchURI != null) { + String matchStr = "<" + uri + "> <http://www.w3.org/2002/07/owl#sameAs> <" + matchURI + "> .\n"; + fos.write(matchStr.getBytes()); + matches++; } + counter++; - sb.append(line); + if(counter % 1000 == 0) { + System.out.println(counter + " points processed. " + matches + " matches found."); + } } - rd.close(); - -// System.out.println(sb.toString()); - + br.close(); + fos.close(); } // downloads information about DBpedia into a separate file @@ -140,10 +122,15 @@ String geoLong = qs.getLiteral("long").getString(); String label = qs.getLiteral("label").getString(); - String content = "<" + object + ">" + " <http://www.w3.org/2000/01/rdf-schema#label> \"" + label + "\" .\n"; - content += "<" + object + ">" + " <http://www.w3.org/2003/01/geo/wgs84_pos#lat> \"" + geoLat + "\"^^<http://www.w3.org/2001/XMLSchema#float> .\n"; - content += "<" + object + ">" + " <http://www.w3.org/2003/01/geo/wgs84_pos#long> \"" + geoLong + "\"^^<http://www.w3.org/2001/XMLSchema#float> .\n"; - + String content = ""; + if(dbpediaFileFormat.equals("nt")) { + content += "<" + object + ">" + " <http://www.w3.org/2000/01/rdf-schema#label> \"" + label + "\" .\n"; + content += "<" + object + ">" + " <http://www.w3.org/2003/01/geo/wgs84_pos#lat> \"" + geoLat + "\"^^<http://www.w3.org/2001/XMLSchema#float> .\n"; + content += "<" + object + ">" + " <http://www.w3.org/2003/01/geo/wgs84_pos#long> \"" + geoLong + "\"^^<http://www.w3.org/2001/XMLSchema#float> .\n"; + } else { + content += object + ",\"" + label + "\"," + geoLat + "," + geoLong + "\n"; + } + fos.write(content.getBytes()); counter++; @@ -154,13 +141,56 @@ } while(counter == limit); - fos.close(); - } - private static URI findLinkedGeoDataMatch(DBpediaPoint dbpediaPoint) { + private static URI findGeoDataMatch(DBpediaPoint dbpediaPoint) { + // get all GeoData points close to the given point +// SparqlQuery query = new SparqlQuery("", geoDataEndpoint); + + /* + int distanceThresholdMeters = 100; + + // use official DBpedia endpoint (switch to db0 later) + SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpedia(); + SPARQLTasks st = new SPARQLTasks(endpoint); + + // query latitude and longitude + String query = "SELECT ?lat ?long WHERE { "; + query += "<" + dbpediaURI + "> <http://www.w3.org/2003/01/geo/wgs84_pos#lat> ?lat ."; + query += "<" + dbpediaURI + "> <http://www.w3.org/2003/01/geo/wgs84_pos#long> ?long . } LIMIT 1"; + + // perform query and read lat and long from results + ResultSet results = st.queryAsResultSet(query); + QuerySolution qs = results.nextSolution(); + String geoLat = qs.getLiteral("lat").getString(); + String geoLong = qs.getLiteral("long").getString(); + + System.out.println("lat: " + geoLat + ", long: " + geoLong); + + URL linkedGeoDataURL = new URL("http://linkedgeodata.org/triplify/near/"+geoLat+","+geoLong+"/"+distanceThresholdMeters); + + // TODO: replace by SPARQL query + + URLConnection conn = linkedGeoDataURL.openConnection(); + BufferedReader rd = new BufferedReader(new InputStreamReader(conn.getInputStream())); + StringBuffer sb = new StringBuffer(); + String line=""; +// int pointID = 0; + while ((line = rd.readLine()) != null) + { + if(line.contains("Auerbach")) { + System.out.println(line); + } + + sb.append(line); + } + rd.close(); + +// System.out.println(sb.toString()); + + */ return null; } } Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java 2009-04-20 15:12:11 UTC (rev 1719) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java 2009-04-20 17:32:06 UTC (rev 1720) @@ -19,10 +19,38 @@ */ package org.dllearner.scripts.matching; +import java.net.URI; + /** + * A geo location in DBpedia. + * * @author Jens Lehmann * */ -public class DBpediaPoint { +public class DBpediaPoint extends Point { + private URI uri; + + private String label; + + public DBpediaPoint(URI uri, String label, double geoLat, double geoLong) { + super(geoLat,geoLong); + this.uri = uri; + this.label = label; + } + + /** + * @return the uri + */ + public URI getUri() { + return uri; + } + + /** + * @return the label + */ + public String getLabel() { + return label; + } + } Added: trunk/src/dl-learner/org/dllearner/scripts/matching/Point.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/Point.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/Point.java 2009-04-20 17:32:06 UTC (rev 1720) @@ -0,0 +1,47 @@ +/** + * Copyright (C) 2007-2009, Jens Lehmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ +package org.dllearner.scripts.matching; + +/** + * A geo location. + * + * @author Jens Lehmann + * + */ +public class Point { + + private double geoLat; + + private double geoLong; + + public Point(double geoLat, double geoLong) { + this.geoLat = geoLat; + this.geoLong = geoLong; + } + + public double getGeoLat() { + return geoLat; + } + + public double getGeoLong() { + return geoLong; + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |