From: <jen...@us...> - 2008-01-29 19:24:54
|
Revision: 457 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=457&view=rev Author: jenslehmann Date: 2008-01-29 11:24:02 -0800 (Tue, 29 Jan 2008) Log Message: ----------- - started to rewrite Cache such that it can be used for arbitrary SPARQL queries (i.e. not only in the extraction algorithm) - not working yet Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlKnowledgeSource.java trunk/src/dl-learner/org/dllearner/kb/sparql/TypedSparqlQuery.java trunk/src/dl-learner/org/dllearner/kb/sparql/TypedSparqlQueryClasses.java trunk/src/dl-learner/org/dllearner/kb/sparql/test/JenaQueryToResultSpeedTest.java trunk/src/dl-learner/org/dllearner/kb/sparql/test/TestResultSet.java trunk/src/dl-learner/org/dllearner/server/ClientState.java trunk/src/dl-learner/org/dllearner/server/DLLearnerWS.java Added Paths: ----------- trunk/src/dl-learner/org/dllearner/kb/sparql/Cache.java trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlQuery.java Removed Paths: ------------- trunk/src/dl-learner/org/dllearner/kb/sparql/query/ Copied: trunk/src/dl-learner/org/dllearner/kb/sparql/Cache.java (from rev 456, trunk/src/dl-learner/org/dllearner/kb/sparql/query/Cache.java) =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/sparql/Cache.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/kb/sparql/Cache.java 2008-01-29 19:24:02 UTC (rev 457) @@ -0,0 +1,254 @@ +/** + * Copyright (C) 2007-2008, Jens Lehmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ +package org.dllearner.kb.sparql; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.io.Serializable; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.HashMap; + +import org.apache.log4j.Logger; + +import com.hp.hpl.jena.query.ResultSet; + +/** + * SPARQL query cache to avoid possibly expensive multiple queries. The queries + * and their results are written to files. A cache has an associated cache + * directory where all files are written. + * + * Each SPARQL query and its result is written to one file. The name of this + * file is a hash of the query. The result of the query is written as JSON + * serialisation of the SPARQL XML result, see + * http://www.w3.org/TR/rdf-sparql-json-res/. + * + * Apart from the query and its result, a timestamp of the query is stored. + * After a configurable amount of time, query results are considered outdated. + * If a cached result of a SPARQL query exists, but is too old, the cache + * behaves as if the cached result would not exist. + * + * @author Sebastian Hellmann + * @author Sebastian Knappe + * @author Jens Lehmann + */ +public class Cache implements Serializable { + + private static Logger logger = Logger.getLogger(Cache.class); + + private static final long serialVersionUID = 843308736471742205L; + + // maps hash of a SPARQL queries to JSON representation + // of its results; this + private HashMap<String, String> hm; + + private transient String cacheDir = ""; + private transient String fileEnding = ".cache"; + private long timestamp; + + // specifies after how many seconds a cached result becomes invalid + private long freshnessSeconds = 15 * 24 * 60 * 60; + + /** + * Constructor for the cache itself. + * + * @param cacheDir + * Where the base path to the cache is . + */ + public Cache(String cacheDir) { + this.cacheDir = cacheDir + File.separator; + if (!new File(cacheDir).exists()) { + logger + .info("Created directory: " + cacheDir + " : " + new File(cacheDir).mkdir() + + "."); + } + } + + /** + * constructor for single cache object(one entry) + * + * @param sparqlQuery + * query + * @param content + * that is the sparql query result as xml + */ + private Cache(String sparqlQuery, String content) { + // this.content = c; + // this.sparqlquery = sparql; + this.timestamp = System.currentTimeMillis(); + this.hm = new HashMap<String, String>(); + hm.put(sparqlQuery, content); + } + + private String getHash(String string) { + // calculate md5 hash of the string (code is somewhat + // difficult to read, but there doesn't seem to be a + // single function call in Java for md5 hashing) + MessageDigest md5 = null; + try { + md5 = MessageDigest.getInstance("MD5"); + } catch (NoSuchAlgorithmException e) { + e.printStackTrace(); + } + md5.reset(); + md5.update(string.getBytes()); + byte[] result = md5.digest(); + + StringBuffer hexString = new StringBuffer(); + for (int i = 0; i < result.length; i++) { + hexString.append(Integer.toHexString(0xFF & result[i])); + } + return hexString.toString(); + } + + private String getFilename(String sparqlQuery) { + return getHash(sparqlQuery) + fileEnding; + } + + /** + * Gets the query result for a SPARQL query. + * + * @param sparqlQuery + * SPARQL query to check. + * @return Query result or null if no result has been found or it is + * outdated. + */ + public String get(String sparqlQuery) { + Cache c = readFromFile(getFilename(sparqlQuery)); + if (c == null) + return null; + // System.out.println(" file found"); + if (!c.checkFreshness()) + return null; + // System.out.println("fresh"); + String xml = ""; + try { + xml = c.hm.get(sparqlQuery); + } catch (Exception e) { + return null; + } + return xml; + } + + /** + * @param key + * is the resource, the identifier + * @param sparqlquery + * is the query used as another identifier + * @param content + * is the result of the query + */ + public void put(String sparqlQuery, String content) { + String hash = getHash(sparqlQuery); + Cache c = readFromFile(hash); + if (c == null) { + c = new Cache(sparqlQuery, content); + putIntoFile(hash, c); + } else { + c.hm.put(sparqlQuery, content); + putIntoFile(hash, c); + } + + } + + public void checkFile(String Filename) { + if (!new File(Filename).exists()) { + try { + new File(Filename).createNewFile(); + } catch (Exception e) { + e.printStackTrace(); + } + + } + + } + + /** + * puts a cache entry in a file + * + * @param filename + * @param c + */ + protected void putIntoFile(String filename, Cache c) { + try { + // FileWriter fw=new FileWriter(new File(Filename),true); + FileOutputStream fos = new FileOutputStream(filename, false); + ObjectOutputStream o = new ObjectOutputStream(fos); + o.writeObject(c); + fos.flush(); + fos.close(); + } catch (Exception e) { + System.out.println("Not in cache creating: " + filename); + } + } + + /** + * reads a cache entry from a file + * + * @param Filename + * @return cache entry + */ + protected Cache readFromFile(String Filename) { + Cache content = null; + try { + FileInputStream fos = new FileInputStream(Filename); + ObjectInputStream o = new ObjectInputStream(fos); + content = (Cache) o.readObject(); + } catch (IOException e) { + e.printStackTrace(); + } catch (ClassNotFoundException e) { + e.printStackTrace(); + } + return content; + } + + private boolean checkFreshness() { + if ((System.currentTimeMillis() - this.timestamp) <= (freshnessSeconds * 1000)) + // fresh + return true; + else + return false; + } + + /** + * Takes a SPARQL query (which has not been evaluated yet) as argument and + * returns a result set. The result set is taken from this cache if the + * query is stored here. Otherwise the query is send and its result added to + * the cache and returned. Convenience method. + * + * @param query + * The SPARQL query. + * @return Jena result set. + */ + public ResultSet executeSparqlQuery(SparqlQuery query) { + if (hm.containsKey(query.getQueryString())) { + String result = hm.get(query.getQueryString()); + return SparqlQuery.JSONtoResultSet(result); + } else { + query.send(); + return query.getResultSet(); + } + } + +} Modified: trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlKnowledgeSource.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlKnowledgeSource.java 2008-01-29 17:34:13 UTC (rev 456) +++ trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlKnowledgeSource.java 2008-01-29 19:24:02 UTC (rev 457) @@ -46,7 +46,6 @@ import org.dllearner.core.dl.KB; import org.dllearner.kb.sparql.configuration.SparqlEndpoint; import org.dllearner.kb.sparql.configuration.SparqlQueryType; -import org.dllearner.kb.sparql.query.SparqlQuery; import org.dllearner.parser.KBParser; import org.dllearner.reasoning.DIGConverter; import org.dllearner.reasoning.JenaOWLDIGConverter; Copied: trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlQuery.java (from rev 456, trunk/src/dl-learner/org/dllearner/kb/sparql/query/SparqlQuery.java) =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlQuery.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlQuery.java 2008-01-29 19:24:02 UTC (rev 457) @@ -0,0 +1,204 @@ +/** + * Copyright (C) 2007-2008, Jens Lehmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ +package org.dllearner.kb.sparql; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.nio.charset.Charset; +import java.util.Iterator; +import java.util.List; + +import org.apache.log4j.Logger; +import org.dllearner.kb.sparql.configuration.SparqlEndpoint; + +import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.query.ResultSetFactory; +import com.hp.hpl.jena.query.ResultSetFormatter; +import com.hp.hpl.jena.sparql.core.ResultBinding; +import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP; + +/** + * Represents one SPARQL query. It includes support for stopping the SPARQL + * query (which may be necessary if a timeout is reached). + * + * @author Jens Lehmann + * + */ +public class SparqlQuery { + + private static Logger logger = Logger.getLogger(SparqlKnowledgeSource.class); + + private boolean isRunning = false; + // TODO: declare as private + protected String queryString; + private QueryEngineHTTP queryExecution; + private SparqlEndpoint endpoint; + // TODO: declare as private + protected ResultSet rs = null; + + /** + * Standard constructor. + * + * @param queryString + * @param endpoint + */ + public SparqlQuery(String queryString, SparqlEndpoint endpoint) { + this.queryString = queryString; + this.endpoint = endpoint; + } + + @Deprecated + public void setIsRunning(boolean running) { + this.isRunning = running; + } + + /** + * Sends a SPARQL query using the Jena library. + */ + public ResultSet send() { + isRunning = true; + logger.info(queryString); + + String service = endpoint.getURL().toString(); + logger.info(endpoint.getURL().toString()); + // Jena access to SPARQL endpoint + queryExecution = new QueryEngineHTTP(service, queryString); + for (String dgu : endpoint.getDefaultGraphURIs()) { + queryExecution.addDefaultGraph(dgu); + } + for (String ngu : endpoint.getNamedGraphURIs()) { + queryExecution.addNamedGraph(ngu); + } + logger.info("query SPARQL server"); + + rs = queryExecution.execSelect(); + logger.info(rs.getResultVars().toString()); + isRunning = false; + return rs; + } + + public void stop() { + queryExecution.abort(); + isRunning = false; + } + + public String getQueryString() { + return queryString; + } + + public ResultSet getResultSet() { + return rs; + } + + public boolean isRunning() { + return isRunning; + } + + public boolean hasCompleted() { + return (rs != null); + } + + /** + * TODO define the format + * + * @return + */ + @Deprecated + @SuppressWarnings( { "unchecked" }) + public String[][] getAsStringArray() { + if (rs == null) + this.send(); + System.out.println("Starting Query"); + List<ResultBinding> l = ResultSetFormatter.toList(rs); + List<String> resultVars = rs.getResultVars(); + String[][] array = new String[l.size()][resultVars.size()]; + Iterator<String> iter = resultVars.iterator(); + int i = 0, j = 0; + + for (ResultBinding resultBinding : l) { + while (iter.hasNext()) { + String varName = (String) iter.next(); + array[i][j] = resultBinding.get(varName).toString(); + j++; + } + iter = resultVars.iterator(); + i++; + j = 0; + } + System.out.println("Query complete"); + return array; + } + + /** + * sends a query and returns XML + * + * @return String xml + */ + public static String getAsXMLString(ResultSet resultSet) { + //if (rs == null) + // this.send(); + return ResultSetFormatter.asXMLString(resultSet); + } + + /** + * sends a query and returns complicated Jena List with ResultBindings + * + * + * @return jena List<ResultBinding> + */ + @Deprecated + @SuppressWarnings( { "unchecked" }) + public List<ResultBinding> getAsList() { + if (rs == null) + this.send(); + return ResultSetFormatter.toList(rs); + } + + /** + * Converts Jena result set to JSON. + * + * @param resultSet The result set to transform. + * @return JSON representation of the result set. + */ + public static String getAsJSON(ResultSet resultSet) { + // if (rs == null) + // this.send(); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + ResultSetFormatter.outputAsJSON(baos, resultSet); + // possible Jena bug: Jena modifies the result set during + // JSON transformation, so we need to get it back + resultSet = JSONtoResultSet(baos.toString()); + return baos.toString(); + } + + /** + * Converts from JSON to internal Jena format. + * + * @param json + * A JSON representation if a SPARQL query result. + * @return A Jena ResultSet. + */ + public static ResultSet JSONtoResultSet(String json) { + ByteArrayInputStream bais = new ByteArrayInputStream(json + .getBytes(Charset.forName("UTF-8"))); + return ResultSetFactory.fromJSON(bais); + } + +} Modified: trunk/src/dl-learner/org/dllearner/kb/sparql/TypedSparqlQuery.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/sparql/TypedSparqlQuery.java 2008-01-29 17:34:13 UTC (rev 456) +++ trunk/src/dl-learner/org/dllearner/kb/sparql/TypedSparqlQuery.java 2008-01-29 19:24:02 UTC (rev 457) @@ -25,8 +25,6 @@ import java.util.Set; import org.dllearner.kb.sparql.configuration.Configuration; -import org.dllearner.kb.sparql.query.Cache; -import org.dllearner.kb.sparql.query.CachedSparqlQuery; import org.dllearner.utilities.StringTuple; import com.hp.hpl.jena.query.ResultSet; @@ -85,13 +83,12 @@ String sparqlQueryString = sparqlQueryMaker .makeSubjectQueryUsingFilters(uri.toString()); - CachedSparqlQuery csq = new CachedSparqlQuery(configuration - .getSparqlEndpoint(), cache, uri.toString(), sparqlQueryString); +// CachedSparqlQuery csq = new CachedSparqlQuery(configuration +// .getSparqlEndpoint(), cache, uri.toString(), sparqlQueryString); + SparqlQuery query = new SparqlQuery(sparqlQueryString, configuration.getSparqlEndpoint()); + ResultSet rs = cache.executeSparqlQuery(query); - // TODO optimize - ResultSet rs = csq.getAsResultSet(); - List<ResultBinding> l = ResultSetFormatter.toList(rs); p(l.toString()); for (ResultBinding resultBinding : l) { Modified: trunk/src/dl-learner/org/dllearner/kb/sparql/TypedSparqlQueryClasses.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/sparql/TypedSparqlQueryClasses.java 2008-01-29 17:34:13 UTC (rev 456) +++ trunk/src/dl-learner/org/dllearner/kb/sparql/TypedSparqlQueryClasses.java 2008-01-29 19:24:02 UTC (rev 457) @@ -25,7 +25,6 @@ import java.util.Set; import org.dllearner.kb.sparql.configuration.Configuration; -import org.dllearner.kb.sparql.query.CachedSparqlQuery; import org.dllearner.utilities.StringTuple; import com.hp.hpl.jena.query.ResultSet; @@ -64,11 +63,9 @@ + " FILTER (!regex(str(?object),'http://xmlns.com/foaf/0.1/'))" + "}"; - CachedSparqlQuery csq = new CachedSparqlQuery(configuration - .getSparqlEndpoint(), cache, uri.toString(), sparqlQueryString); - - // TODO optimize - ResultSet rs = csq.getAsResultSet(); + SparqlQuery query = new SparqlQuery(sparqlQueryString, configuration.getSparqlEndpoint()); + ResultSet rs = cache.executeSparqlQuery(query); + List<ResultBinding> l = ResultSetFormatter.toList(rs); for (ResultBinding resultBinding : l) { Modified: trunk/src/dl-learner/org/dllearner/kb/sparql/test/JenaQueryToResultSpeedTest.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/sparql/test/JenaQueryToResultSpeedTest.java 2008-01-29 17:34:13 UTC (rev 456) +++ trunk/src/dl-learner/org/dllearner/kb/sparql/test/JenaQueryToResultSpeedTest.java 2008-01-29 19:24:02 UTC (rev 457) @@ -19,8 +19,8 @@ */ package org.dllearner.kb.sparql.test; +import org.dllearner.kb.sparql.SparqlQuery; import org.dllearner.kb.sparql.configuration.SparqlEndpoint; -import org.dllearner.kb.sparql.query.SparqlQuery; import com.hp.hpl.jena.query.ResultSet; Modified: trunk/src/dl-learner/org/dllearner/kb/sparql/test/TestResultSet.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/sparql/test/TestResultSet.java 2008-01-29 17:34:13 UTC (rev 456) +++ trunk/src/dl-learner/org/dllearner/kb/sparql/test/TestResultSet.java 2008-01-29 19:24:02 UTC (rev 457) @@ -25,8 +25,8 @@ import java.io.ObjectOutputStream; import java.util.List; +import org.dllearner.kb.sparql.SparqlQuery; import org.dllearner.kb.sparql.configuration.SparqlEndpoint; -import org.dllearner.kb.sparql.query.SparqlQuery; import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.sparql.core.ResultBinding; Modified: trunk/src/dl-learner/org/dllearner/server/ClientState.java =================================================================== --- trunk/src/dl-learner/org/dllearner/server/ClientState.java 2008-01-29 17:34:13 UTC (rev 456) +++ trunk/src/dl-learner/org/dllearner/server/ClientState.java 2008-01-29 19:24:02 UTC (rev 457) @@ -34,7 +34,7 @@ import org.dllearner.core.ReasoningService; import org.dllearner.kb.OWLFile; import org.dllearner.kb.sparql.SparqlKnowledgeSource; -import org.dllearner.kb.sparql.query.SparqlQuery; +import org.dllearner.kb.sparql.SparqlQuery; /** * Stores the state of a DL-Learner client session. Modified: trunk/src/dl-learner/org/dllearner/server/DLLearnerWS.java =================================================================== --- trunk/src/dl-learner/org/dllearner/server/DLLearnerWS.java 2008-01-29 17:34:13 UTC (rev 456) +++ trunk/src/dl-learner/org/dllearner/server/DLLearnerWS.java 2008-01-29 19:24:02 UTC (rev 457) @@ -47,7 +47,7 @@ import org.dllearner.core.dl.Individual; import org.dllearner.kb.OWLFile; import org.dllearner.kb.sparql.SparqlKnowledgeSource; -import org.dllearner.kb.sparql.query.SparqlQuery; +import org.dllearner.kb.sparql.SparqlQuery; import org.dllearner.learningproblems.PosNegDefinitionLP; import org.dllearner.learningproblems.PosNegInclusionLP; import org.dllearner.learningproblems.PosOnlyDefinitionLP; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |