From: <jen...@us...> - 2009-05-07 08:48:13
|
Revision: 1746 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=1746&view=rev Author: jenslehmann Date: 2009-05-07 08:48:00 +0000 (Thu, 07 May 2009) Log Message: ----------- mapping evaluator Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java Added Paths: ----------- trunk/src/dl-learner/org/dllearner/scripts/matching/Evaluation.java Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java 2009-05-07 08:45:50 UTC (rev 1745) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaLinkedGeoData.java 2009-05-07 08:48:00 UTC (rev 1746) @@ -52,7 +52,7 @@ // chose between nt and dat private static String dbpediaFileFormat = "dat"; - private static File dbpediaFile = new File("log/DBpedia_POIs." + dbpediaFileFormat); + static File dbpediaFile = new File("log/DBpedia_POIs." + dbpediaFileFormat); private static boolean regenerateFile = false; private static File matchingFile = new File("log/DBpedia_GeoData_Links.nt"); @@ -60,7 +60,7 @@ private static double scoreThreshold = 0.8; private static StringDistance distance = new Jaro(); - private static SparqlEndpoint dbpediaEndpoint = SparqlEndpoint.getEndpointLOCALDBpedia(); + public static SparqlEndpoint dbpediaEndpoint = SparqlEndpoint.getEndpointLOCALDBpedia(); private static SparqlEndpoint geoDataEndpoint = SparqlEndpoint.getEndpointLOCALGeoData(); // read in DBpedia ontology such that we perform taxonomy reasoning @@ -257,7 +257,7 @@ fos.close(); } - private static URI findGeoDataMatch(DBpediaPoint dbpediaPoint) throws IOException { + public static URI findGeoDataMatch(DBpediaPoint dbpediaPoint) throws IOException { // 1 degree is about 111 km (depending on the specific point) int distanceThresholdMeters = 1000; Modified: trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java 2009-05-07 08:45:50 UTC (rev 1745) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/DBpediaPoint.java 2009-05-07 08:48:00 UTC (rev 1746) @@ -21,6 +21,11 @@ import java.net.URI; +import org.dllearner.kb.sparql.SparqlQuery; + +import com.hp.hpl.jena.query.QuerySolution; +import com.hp.hpl.jena.query.ResultSet; + /** * A geo location in DBpedia. * @@ -39,6 +44,40 @@ // number of decimals indicates a large object) private int decimalCount; + + /** + * Constructs a DBpedia point using SPARQL. + * @param uri URI of DBpedia resource. + */ + public DBpediaPoint(URI uri) { + super(0,0); + this.uri = uri; + + // construct DBpedia query + String queryStr = "SELECT ?lat, ?long, ?label, ?type WHERE {"; + queryStr += "<"+uri+"> <http://www.w3.org/2003/01/geo/wgs84_pos#lat> ?lat ."; + queryStr += "<"+uri+"> <http://www.w3.org/2003/01/geo/wgs84_pos#long> ?long ."; + queryStr += "?object rdfs:label ?label . "; + queryStr += "OPTIONAL { <"+uri+" rdf:type ?type . "; + queryStr += "FILTER (!(?type LIKE <http://dbpedia.org/ontology/Resource>)) ."; + queryStr += "FILTER (?type LIKE <http://dbpedia.org/ontology/%>) ."; + queryStr += "} }"; + + SparqlQuery query = new SparqlQuery(queryStr, DBpediaLinkedGeoData.dbpediaEndpoint); + ResultSet rs = query.send(); + classes = new String[] { }; + int count = 0; + + while(rs.hasNext()) { + QuerySolution qs = rs.nextSolution(); + geoLat = qs.getLiteral("lat").getDouble(); + geoLong = qs.getLiteral("long").getDouble(); + label = qs.getLiteral("label").getString(); + classes[count] = qs.get("type").toString(); + count++; + } + } + public DBpediaPoint(URI uri, String label, String[] classes, double geoLat, double geoLong, int decimalCount) { super(geoLat,geoLong); this.uri = uri; Added: trunk/src/dl-learner/org/dllearner/scripts/matching/Evaluation.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/matching/Evaluation.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/scripts/matching/Evaluation.java 2009-05-07 08:48:00 UTC (rev 1746) @@ -0,0 +1,138 @@ +/** + * Copyright (C) 2007-2009, Jens Lehmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ +package org.dllearner.scripts.matching; + +import java.io.BufferedReader; +import java.io.FileReader; +import java.io.IOException; +import java.net.URI; +import java.util.HashMap; +import java.util.Map; +import java.util.Map.Entry; + +/** + * Performs an evaluation of a matching method method by analising it + * on a test set. + * + * @author Jens Lehmann + * + */ +public class Evaluation { + + private int tests; + private int noMatchCount; + private int correctMatchCount; + private int incorrectMatchCount; + private int matchCount; + private double precision; + private double recall; + + // map from DBpedia to LinkedGeoData + public Evaluation(Map<URI,URI> testMatches) throws IOException { + + tests = 0; + noMatchCount = 0; + correctMatchCount = 0; + incorrectMatchCount = 0; + + for(Entry<URI,URI> match : testMatches.entrySet()) { + // find point in DBpedia file: + // approach 1: + // step 1: locate point in DBpedia file + // step 2: read all information about point + // step 3: write a method converting this information into a DBpedia point + + // "problem": might be good to put all relevant DBpedia and GeoData points in + // memory to efficiently evaluate a lot of parameter settings without + // requiring to perform slow HTTP or SPARQL requests + + DBpediaPoint dbpediaPoint = new DBpediaPoint(match.getKey()); + URI matchedURI = DBpediaLinkedGeoData.findGeoDataMatch(dbpediaPoint); + + URI testURI = match.getValue(); + + // no match found + if(matchedURI == null) { + noMatchCount++; + // correct match found + } else if(matchedURI.equals(testURI)) { + correctMatchCount++; + // incorrect match found + } else { + incorrectMatchCount++; + } + + tests++; + } + + matchCount = correctMatchCount + incorrectMatchCount; + // determine proportion of correct matchings + precision = correctMatchCount / (double) matchCount; + // determine proportion of correct matches + recall = correctMatchCount / (double) tests; + } + + public int getCorrectMatchCount() { + return correctMatchCount; + } + + public int getIncorrectMatchCount() { + return incorrectMatchCount; + } + + public int getMatchCount() { + return matchCount; + } + + public int getNoMatchCount() { + return noMatchCount; + } + + public double getPrecision() { + return precision; + } + + public double getRecall() { + return recall; + } + + public int getTests() { + return tests; + } + + public static void main(String args[]) throws IOException { + // test file + String testFile = "log/geodata/owlsameas_en.dat"; + // map for collecting matches + Map<URI,URI> matches = new HashMap<URI,URI>(); + // read file line by line to collect matches + BufferedReader br = new BufferedReader(new FileReader(testFile)); + String line; + while ((line = br.readLine()) != null) { + String[] tmp = line.split(" "); + matches.put(URI.create(tmp[1]), URI.create(tmp[0])); + } + // perform evaluation and print results + Evaluation eval = new Evaluation(matches); + System.out.println("precision: " + eval.getPrecision()); + System.out.println("recall: " + eval.getRecall()); + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |