From: <lor...@us...> - 2010-12-13 13:39:59
|
Revision: 2537 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=2537&view=rev Author: lorenz_b Date: 2010-12-13 13:39:52 +0000 (Mon, 13 Dec 2010) Log Message: ----------- Some changes for evaluation. Modified Paths: -------------- trunk/autosparql/src/main/java/org/dllearner/autosparql/server/ExampleFinder.java trunk/autosparql/src/main/java/org/dllearner/autosparql/server/Generalisation.java trunk/autosparql/src/main/java/org/dllearner/autosparql/server/evaluation/EvaluationScript.java trunk/autosparql/src/main/java/org/dllearner/autosparql/server/evaluation/QueryFilterScript.java trunk/autosparql/src/main/java/org/dllearner/autosparql/server/evaluation/QuerySyntaxFilterScript.java trunk/autosparql/src/main/java/org/dllearner/autosparql/server/evaluation/SingleQueryEvaluationScript.java trunk/sparql-query-generator/src/main/java/org/dllearner/sparqlquerygenerator/datastructures/impl/QueryTreeImpl.java trunk/sparql-query-generator/src/main/java/org/dllearner/sparqlquerygenerator/operations/lgg/LGGGeneratorImpl.java trunk/sparql-query-generator/src/main/java/org/dllearner/sparqlquerygenerator/operations/nbr/strategy/GreedyNBRStrategy.java trunk/sparql-query-generator/src/main/java/org/dllearner/sparqlquerygenerator/util/Filter.java Modified: trunk/autosparql/src/main/java/org/dllearner/autosparql/server/ExampleFinder.java =================================================================== --- trunk/autosparql/src/main/java/org/dllearner/autosparql/server/ExampleFinder.java 2010-12-13 13:03:48 UTC (rev 2536) +++ trunk/autosparql/src/main/java/org/dllearner/autosparql/server/ExampleFinder.java 2010-12-13 13:39:52 UTC (rev 2537) @@ -3,6 +3,7 @@ import java.util.ArrayList; import java.util.HashSet; import java.util.List; +import java.util.Set; import org.apache.log4j.Logger; import org.dllearner.autosparql.client.exception.SPARQLQueryException; @@ -10,16 +11,18 @@ import org.dllearner.autosparql.server.util.SPARQLEndpointEx; import org.dllearner.kb.sparql.ExtractionDBCache; import org.dllearner.kb.sparql.SparqlQuery; -import org.dllearner.sparqlquerygenerator.SPARQLQueryGenerator; +import org.dllearner.sparqlquerygenerator.SPARQLQueryGeneratorCached; import org.dllearner.sparqlquerygenerator.cache.ModelCache; +import org.dllearner.sparqlquerygenerator.cache.QueryTreeCache; import org.dllearner.sparqlquerygenerator.datastructures.QueryTree; -import org.dllearner.sparqlquerygenerator.impl.SPARQLQueryGeneratorImpl; +import org.dllearner.sparqlquerygenerator.impl.SPARQLQueryGeneratorCachedImpl; +import org.dllearner.sparqlquerygenerator.operations.nbr.strategy.BruteForceNBRStrategy; +import org.dllearner.sparqlquerygenerator.operations.nbr.strategy.GreedyNBRStrategy; import org.dllearner.sparqlquerygenerator.util.ModelGenerator; import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSetRewindable; import com.hp.hpl.jena.rdf.model.Model; -import com.hp.hpl.jena.rdf.model.StmtIterator; import com.hp.hpl.jena.sparql.vocabulary.FOAF; import com.hp.hpl.jena.vocabulary.RDFS; @@ -30,6 +33,7 @@ private ExtractionDBCache constructCache; private ModelGenerator modelGen; private ModelCache modelCache; + private QueryTreeCache queryTreeCache; private List<String> posExamples; private List<String> negExamples; @@ -40,6 +44,10 @@ private QueryTree<String> currentQueryTree; + private Set<String> testedQueries; + + private SPARQLQueryGeneratorCached queryGen; + public ExampleFinder(SPARQLEndpointEx endpoint, ExtractionDBCache selectCache, ExtractionDBCache constructCache){ this.endpoint = endpoint; this.selectCache = selectCache; @@ -47,6 +55,11 @@ modelGen = new ModelGenerator(endpoint, new HashSet<String>(endpoint.getPredicateFilters()), constructCache); modelCache = new ModelCache(modelGen); + queryTreeCache = new QueryTreeCache(); + testedQueries = new HashSet<String>(); + + queryGen = new SPARQLQueryGeneratorCachedImpl(new GreedyNBRStrategy()); +// queryGen = new SPARQLQueryGeneratorCachedImpl(new BruteForceNBRStrategy()); } public Example findSimilarExample(List<String> posExamples, @@ -54,23 +67,22 @@ this.posExamples = posExamples; this.negExamples = negExamples; - - QueryTreeGenerator treeGen = new QueryTreeGenerator(constructCache, endpoint, 5000); - List<QueryTree<String>> posExampleTrees = new ArrayList<QueryTree<String>>(); List<QueryTree<String>> negExampleTrees = new ArrayList<QueryTree<String>>(); Model model; + QueryTree<String> queryTree; for(String resource : posExamples){ logger.info("Fetching model for resource: " + resource); model = modelCache.getModel(resource); - logger.info("Statements:\n" + model.listStatements().toList()); - posExampleTrees.add(treeGen.getQueryTree(resource, model)); + queryTree = queryTreeCache.getQueryTree(resource, model); + posExampleTrees.add(queryTree); } for(String resource : negExamples){ logger.info("Fetching model for resource: " + resource); model = modelCache.getModel(resource); - negExampleTrees.add(treeGen.getQueryTree(resource, model)); + queryTree = queryTreeCache.getQueryTree(resource, model); + negExampleTrees.add(queryTree); } if(posExamples.size() == 1 && negExamples.isEmpty()){ @@ -126,11 +138,16 @@ currentQueryTree = genTree; logger.info("Query after generalisation: \n\n" + currentQuery); -// currentQuery = currentQuery + " ORDER BY ?x0 LIMIT 10"; - currentQuery = currentQuery;// + " LIMIT 10"; String result = ""; try { - result = selectCache.executeSelectQuery(endpoint, currentQuery + " LIMIT 10"); + logger.info(tree.getChildren().isEmpty()); + if(testedQueries.contains(currentQuery) && !currentQueryTree.getChildren().isEmpty()){ + return findExampleByGeneralisation(currentQueryTree); + } else { + result = selectCache.executeSelectQuery(endpoint, getLimitedQuery(currentQuery, (posExamples.size()+negExamples.size()+1))); + testedQueries.add(currentQuery); + } + } catch (Exception e) { e.printStackTrace(); throw new SPARQLQueryException(e, encodeHTML(currentQuery)); @@ -142,6 +159,7 @@ while(rs.hasNext()){ qs = rs.next(); uri = qs.getResource("x0").getURI(); + logger.info(uri); if(!posExamples.contains(uri) && !negExamples.contains(uri)){ logger.info("Found new example: " + uri); return getExample(uri); @@ -194,21 +212,26 @@ private Example findExampleByLGG(List<QueryTree<String>> posExamplesTrees, List<QueryTree<String>> negExamplesTrees) throws SPARQLQueryException{ logger.info("USING LGG"); - SPARQLQueryGenerator gen = new SPARQLQueryGeneratorImpl(endpoint.getURL().toString()); if(negExamplesTrees.isEmpty()){ logger.info("No negative examples given. Avoiding big queries by GENERALISATION"); - List<QueryTree<String>> trees = gen.getSPARQLQueryTrees(posExamplesTrees, negExamplesTrees); - return findExampleByGeneralisation(trees.get(0)); + queryGen.getSPARQLQueries(posExamplesTrees); + QueryTree<String> lgg = queryGen.getLastLGG(); + return findExampleByGeneralisation(lgg); } - List<String> queries = gen.getSPARQLQueries(posExamplesTrees, negExamplesTrees); + List<String> queries = queryGen.getSPARQLQueries(posExamplesTrees, negExamplesTrees); for(String query : queries){ + if(testedQueries.contains(query)){ + logger.info("Skipping query because it was already tested before:\n" + query); + continue; + } logger.info("Trying query"); - currentQuery = query;// + " LIMIT 10"; + currentQuery = query; logger.info(query); String result = ""; try { - result = selectCache.executeSelectQuery(endpoint, currentQuery + " LIMIT 10"); + result = selectCache.executeSelectQuery(endpoint, getLimitedQuery(currentQuery, 10)); + testedQueries.add(currentQuery); } catch (Exception e) { e.printStackTrace(); throw new SPARQLQueryException(e, encodeHTML(query)); @@ -228,14 +251,16 @@ } logger.info("None of the queries contained a new example."); logger.info("Changing to Generalisation..."); - return findExampleByGeneralisation(gen.getLastLGG()); + return findExampleByGeneralisation(queryGen.getLastLGG()); } private Example getExample(String uri){ logger.info("Retrieving data for resource " + uri); StringBuilder sb = new StringBuilder(); sb.append("SELECT ?label ?imageURL ?comment WHERE{\n"); + sb.append("OPTIONAL{\n"); sb.append("<").append(uri).append("> <").append(RDFS.label.getURI()).append("> ").append("?label.\n"); + sb.append("}\n"); sb.append("OPTIONAL{\n"); sb.append("<").append(uri).append("> <").append(FOAF.depiction.getURI()).append("> ").append("?imageURL.\n"); sb.append("}\n"); @@ -247,18 +272,23 @@ sb.append("}"); ResultSetRewindable rs = SparqlQuery.convertJSONtoResultSet(selectCache.executeSelectQuery(endpoint, sb.toString())); - QuerySolution qs = rs.next(); - - String label = qs.getLiteral("label").getLexicalForm(); - + String label = uri; String imageURL = ""; - if(qs.getResource("imageURL") != null){ - imageURL = qs.getResource("imageURL").getURI(); - } - String comment = ""; - if(qs.getLiteral("comment") != null){ - comment = qs.getLiteral("comment").getLexicalForm(); + if(rs.hasNext()){ + QuerySolution qs = rs.next(); + + if(qs.getLiteral("label") != null){ + label = qs.getLiteral("label").getLexicalForm(); + } + + if(qs.getResource("imageURL") != null){ + imageURL = qs.getResource("imageURL").getURI(); + } + + if(qs.getLiteral("comment") != null){ + comment = qs.getLiteral("comment").getLexicalForm(); + } } return new Example(uri, label, imageURL, comment); @@ -288,4 +318,8 @@ public String getCurrentQueryHTML(){ return encodeHTML(currentQuery); } + + public String getLimitedQuery(String query, int limit){ + return query + " LIMIT " + limit; + } } Modified: trunk/autosparql/src/main/java/org/dllearner/autosparql/server/Generalisation.java =================================================================== --- trunk/autosparql/src/main/java/org/dllearner/autosparql/server/Generalisation.java 2010-12-13 13:03:48 UTC (rev 2536) +++ trunk/autosparql/src/main/java/org/dllearner/autosparql/server/Generalisation.java 2010-12-13 13:39:52 UTC (rev 2537) @@ -1,5 +1,10 @@ package org.dllearner.autosparql.server; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; + import org.apache.log4j.Logger; import org.dllearner.sparqlquerygenerator.datastructures.QueryTree; import org.dllearner.sparqlquerygenerator.datastructures.impl.QueryTreeImpl; @@ -14,6 +19,10 @@ private int maxEdgeCount = 10; public double pruningFactor = 0.5; + boolean invert = false; + + private List<QueryTree<N>> rest; + public QueryTree<N> generalise(QueryTree<N> queryTree){ QueryTree<N> copy = new QueryTreeImpl<N>(queryTree); @@ -54,17 +63,25 @@ private void pruneTree(QueryTree<N> tree, double limit){ logger.info("Pruning tree:"); - logger.info(tree.getStringRepresentation()); +// logger.info(tree.getStringRepresentation()); + logger.info("Number of triple pattern: " + ((QueryTreeImpl<N>)tree).getTriplePatternCount()); +// logger.info(((QueryTreeImpl<N>)tree).getSPARQLQueryTree().getStringRepresentation()); int childCountBefore = tree.getChildCount(); - for(QueryTree<N> child : tree.getChildren()){ + List<QueryTree<N>> children = new ArrayList<QueryTree<N>>(tree.getChildren()); +// Collections.shuffle(children); + QueryTree<N> child; + for(Iterator<QueryTree<N>> iter = children.iterator(); iter.hasNext(); ){ + child = iter.next(); logger.info("Removing child: " + child); tree.removeChild((QueryTreeImpl<N>) child); - if( (tree.getUserObjectClosure().size() - 1) <= maxEdgeCount + if( (tree.getChildCount()) <= maxEdgeCount && (double)tree.getChildCount()/childCountBefore <= limit){ break; } } + + } private void retainTypeEdges(QueryTree<N> tree){ Modified: trunk/autosparql/src/main/java/org/dllearner/autosparql/server/evaluation/EvaluationScript.java =================================================================== --- trunk/autosparql/src/main/java/org/dllearner/autosparql/server/evaluation/EvaluationScript.java 2010-12-13 13:03:48 UTC (rev 2536) +++ trunk/autosparql/src/main/java/org/dllearner/autosparql/server/evaluation/EvaluationScript.java 2010-12-13 13:39:52 UTC (rev 2537) @@ -4,6 +4,7 @@ import java.net.URL; import java.sql.Connection; import java.sql.DriverManager; +import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; @@ -24,6 +25,7 @@ import org.dllearner.autosparql.server.util.SPARQLEndpointEx; import org.dllearner.kb.sparql.ExtractionDBCache; import org.dllearner.kb.sparql.SparqlEndpoint; +import org.dllearner.kb.sparql.SparqlQuery; import org.dllearner.sparqlquerygenerator.impl.SPARQLQueryGeneratorImpl; import org.dllearner.sparqlquerygenerator.operations.lgg.LGGGeneratorImpl; import org.dllearner.sparqlquerygenerator.operations.nbr.NBRGeneratorImpl; @@ -31,6 +33,7 @@ import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP; +import com.jamonapi.MonitorFactory; public class EvaluationScript { @@ -57,10 +60,11 @@ Logger.getLogger(SPARQLQueryGeneratorImpl.class).setLevel(Level.OFF); Logger.getLogger(LGGGeneratorImpl.class).setLevel(Level.OFF); Logger.getLogger(NBRGeneratorImpl.class).setLevel(Level.OFF); - Logger.getLogger(Generalisation.class).setLevel(Level.OFF); + Logger.getLogger(Generalisation.class).setLevel(Level.INFO); SPARQLEndpointEx endpoint = new SPARQLEndpointEx( +// new URL("http://dbpedia.org/sparql"), new URL("http://db0.aksw.org:8999/sparql"), Collections.singletonList("http://dbpedia.org"), Collections.<String>emptyList(), @@ -71,11 +75,11 @@ Class.forName("com.mysql.jdbc.Driver"); Connection conn = DriverManager.getConnection("jdbc:mysql://139.18.2.173/dbpedia_queries", "root", "WQPRisDa2"); + PreparedStatement ps = conn.prepareStatement("INSERT INTO evaluation {?,?,?,?,?,?,?,?,?,?}"); - //fetch all queries from table 'tmp', where the number of results is lower than 2000 Statement st = conn.createStatement(); - ResultSet queries = st.executeQuery("SELECT * FROM queries_final WHERE resultCount<2000 ORDER BY resultCount DESC"); + ResultSet queries = st.executeQuery("SELECT * FROM queries_final WHERE resultCount<2000 AND query not like '%filter%' ORDER BY resultCount DESC"); queries.last(); logger.info("Evaluating " + queries.getRow() + " queries."); queries.beforeFirst(); @@ -83,7 +87,6 @@ int id; String query; - QueryEngineHTTP qexec; com.hp.hpl.jena.query.ResultSet rs; SortedSet<String> resources; QuerySolution qs; @@ -91,84 +94,120 @@ List<String> posExamples; List<String> negExamples; //iterate over the queries + int testedCnt = 0; + int learnedCnt = 0; while(queries.next()){ id = queries.getInt("id"); query = queries.getString("query"); logger.info("Evaluating query:\n" + query); + testedCnt++; + MonitorFactory.getTimeMonitor("Query").reset(); + MonitorFactory.getTimeMonitor("LGG").reset(); + MonitorFactory.getTimeMonitor("NBR").reset(); - - //send query to SPARQLEndpoint - qexec = new QueryEngineHTTP(endpoint.getURL().toString(), query); - for (String dgu : endpoint.getDefaultGraphURIs()) { - qexec.addDefaultGraph(dgu); - } - for (String ngu : endpoint.getNamedGraphURIs()) { - qexec.addNamedGraph(ngu); - } - rs = qexec.execSelect(); - - - //put the URIs for the resources in variable var0 into a separate list - resources = new TreeSet<String>(); - while(rs.hasNext()){ - qs = rs.next(); - if(qs.get("var0").isURIResource()){ - resources.add(qs.get("var0").asResource().getURI()); + try { + //send query to SPARQLEndpoint + rs = SparqlQuery.convertJSONtoResultSet(selectQueriesCache.executeSelectQuery(endpoint, query)); + + + //put the URIs for the resources in variable var0 into a separate list + resources = new TreeSet<String>(); + while(rs.hasNext()){ + qs = rs.next(); + if(qs.get("var0").isURIResource()){ + resources.add(qs.get("var0").asResource().getURI()); + } } + logger.info("Query returned " + resources.size() + " results:\n" + resources); + + + //start learning + exampleFinder = new ExampleFinder(endpoint, selectQueriesCache, constructQueriesCache); + posExamples = new ArrayList<String>(); + negExamples = new ArrayList<String>(); + //we choose the first resource in the set as positive example + String posExample = resources.first(); + logger.info("Selected " + posExample + " as first positive example."); + posExamples.add(posExample); + //we ask for the next similar example + String nextExample; + String learnedQuery = ""; + boolean equivalentQueries = false; + do{ + nextExample = exampleFinder.findSimilarExample(posExamples, negExamples).getURI(); + logger.info("Next suggested example is " + nextExample); + //if the example is contained in the resultset of the query, we add it to the positive examples, + //otherwise to the negatives + if(resources.contains(nextExample)){ + posExamples.add(nextExample); + logger.info("Suggested example is considered as positive example."); + } else { + negExamples.add(nextExample); + logger.info("Suggested example is considered as negative example."); + } + if(learnedQuery.equals(exampleFinder.getCurrentQuery())){ + continue; + } + learnedQuery = exampleFinder.getCurrentQuery(); + logger.info("Learned query:\n" + learnedQuery); + equivalentQueries = isEquivalentQuery(resources, learnedQuery, endpoint); + logger.info("Original query and learned query are equivalent: " + equivalentQueries); + } while(!equivalentQueries); + + int posExamplesCount = posExamples.size(); + int negExamplesCount = negExamples.size(); + int examplesCount = posExamplesCount + negExamplesCount; + double queryTime = MonitorFactory.getTimeMonitor("Query").getTotal(); + double lggTime = MonitorFactory.getTimeMonitor("LGG").getTotal(); + double nbrTime = MonitorFactory.getTimeMonitor("NBR").getTotal(); + double totalTime = queryTime + nbrTime + lggTime; + + write2DB(ps, id, query, learnedQuery, + examplesCount, posExamplesCount, negExamplesCount, + totalTime, queryTime, lggTime, nbrTime); + logger.info("Number of examples needed: " + + (posExamples.size() + negExamples.size()) + + "(+" + posExamples.size() + "/-" + negExamples.size() + ")"); + learnedCnt++; + if(testedCnt == 200){ + break; } - logger.info("Query returned " + resources.size() + " results:\n" + resources); - - - //start learning - exampleFinder = new ExampleFinder(endpoint, selectQueriesCache, constructQueriesCache); - posExamples = new ArrayList<String>(); - negExamples = new ArrayList<String>(); - //we choose the first resource in the set as positive example - String posExample = resources.first(); - logger.info("Selected " + posExample + " as first positive example."); - posExamples.add(posExample); - //we ask for the next similar example -// String nextExample = exampleFinder.findSimilarExample(posExamples, negExamples).getURI(); -// logger.info("Next suggested example is " + nextExample); -// //if the example is contained in the resultset of the query, we add it to the positive examples, -// //otherwise to the negatives -// if(resources.contains(nextExample)){ -// posExamples.add(nextExample); -// logger.info("Suggested example is considered as positive example."); -// } else { -// negExamples.add(nextExample); -// logger.info("Suggested example is considered as negative example."); -// } -// nextExample = exampleFinder.findSimilarExample(posExamples, negExamples).getURI(); - - String nextExample; - String learnedQuery; - boolean equivalentQueries = false; - do{ - nextExample = exampleFinder.findSimilarExample(posExamples, negExamples).getURI(); - logger.info("Next suggested example is " + nextExample); - //if the example is contained in the resultset of the query, we add it to the positive examples, - //otherwise to the negatives - if(resources.contains(nextExample)){ - posExamples.add(nextExample); - logger.info("Suggested example is considered as positive example."); - } else { - negExamples.add(nextExample); - logger.info("Suggested example is considered as negative example."); - } - learnedQuery = exampleFinder.getCurrentQuery(); - logger.info("Learned query:\n" + learnedQuery); - equivalentQueries = isEquivalentQuery(resources, learnedQuery, endpoint); - logger.info("Original query and learned query are equivalent: " + equivalentQueries); - } while(!equivalentQueries); - - logger.info("Number of examples needed: " + (posExamples.size() + negExamples.size())); - - break; + } catch (Exception e) { + logger.error("Error while learning query " + id, e); + } } + logger.info("Learned " + learnedCnt + " of " + testedCnt + " queries"); + logger.info("Time to compute LGG(total): " + MonitorFactory.getTimeMonitor("LGG").getTotal()); + logger.info("Time to compute LGG(avg): " + MonitorFactory.getTimeMonitor("LGG").getAvg()); + logger.info("Time to compute LGG(min): " + MonitorFactory.getTimeMonitor("LGG").getMin()); + logger.info("Time to compute LGG(max): " + MonitorFactory.getTimeMonitor("LGG").getMax()); } + private static void write2DB(PreparedStatement ps, + int id, String originalQuery, String learnedQuery, int examplesCount, + int posExamplesCount, int negExamplesCount, double totalTime, + double queryTime, double lggTime, double nbrTime){ + try { + ps.setInt(1, id); + ps.setString(2, originalQuery); + ps.setString(3, learnedQuery); + ps.setInt(4, examplesCount); + ps.setInt(5, posExamplesCount); + ps.setInt(6, negExamplesCount); + ps.setDouble(7, totalTime); + ps.setDouble(8, queryTime); + ps.setDouble(9, lggTime); + ps.setDouble(10, nbrTime); + + ps.executeUpdate(); + } catch (SQLException e) { + e.printStackTrace(); + } + + } + + /** * Check if resultset of the learned query is equivalent to the resultset of the original query * @param originalResources Modified: trunk/autosparql/src/main/java/org/dllearner/autosparql/server/evaluation/QueryFilterScript.java =================================================================== --- trunk/autosparql/src/main/java/org/dllearner/autosparql/server/evaluation/QueryFilterScript.java 2010-12-13 13:03:48 UTC (rev 2536) +++ trunk/autosparql/src/main/java/org/dllearner/autosparql/server/evaluation/QueryFilterScript.java 2010-12-13 13:39:52 UTC (rev 2537) @@ -19,6 +19,7 @@ import com.hp.hpl.jena.graph.Triple; import com.hp.hpl.jena.query.Query; import com.hp.hpl.jena.query.QueryFactory; +import com.hp.hpl.jena.query.QueryParseException; import com.hp.hpl.jena.sparql.algebra.Algebra; import com.hp.hpl.jena.sparql.algebra.Op; import com.hp.hpl.jena.sparql.core.BasicPattern; @@ -36,7 +37,20 @@ private Connection conn; private SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpedia(); + private static final String PREFIXES = "PREFIX owl: <http://www.w3.org/2002/07/owl#> " + + "PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> " + + "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> " + + "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> " + + "PREFIX foaf: <http://xmlns.com/foaf/0.1/> " + + "PREFIX dc: <http://purl.org/dc/elements/1.1/> " + + "PREFIX : <http://dbpedia.org/resource/> " + + "PREFIX dbpedia2: <http://dbpedia.org/property/> " + + "PREFIX dbpedia: <http://dbpedia.org/> " + + "PREFIX skos: <http://www.w3.org/2004/02/skos/core#> " + + "PREFIX umbelBus: <http://umbel.org/umbel/sc/Business> " + + "PREFIX umbelCountry: <http://umbel.org/umbel/sc/IndependentCountry>"; + public QueryFilterScript(){ try { Class.forName("com.mysql.jdbc.Driver"); @@ -51,7 +65,8 @@ private void run(){ // countQueryResultSet(); - filterQueriesWithTargetVarNotSubject(); + filterQueriesWithSyntaxError(); +// filterQueriesWithTargetVarNotSubject(); } private void countQueryResultSet(){ @@ -109,12 +124,42 @@ } } + private void filterQueriesWithSyntaxError(){ + try { + Statement st = conn.createStatement(); + PreparedStatement ps = conn.prepareStatement("DELETE FROM queries_final WHERE id = ?"); + ResultSet rs = st.executeQuery("SELECT * FROM queries_final where query not like '%filter%'"); + + int id; + String query; + while(rs.next()){ + id = rs.getInt("id"); + query = rs.getString("query"); + + try { + if(!checkQuerySyntax(query)){ + System.out.println(query); + System.out.println(rs.getInt("resultCount")); + ps.setInt(1, id); +// ps.execute(); + } + } catch (Exception e) { + logger.error("ERROR. An error occured while working with query " + id, e); + } + + } + } catch (SQLException e) { + e.printStackTrace(); + } + + } + private void filterQueriesWithTargetVarNotSubject(){ try { Statement st = conn.createStatement(); - PreparedStatement ps = conn.prepareStatement("DELETE FROM tmp WHERE id = ?"); + PreparedStatement ps = conn.prepareStatement("DELETE FROM queries_final WHERE id = ?"); - ResultSet rs = st.executeQuery("SELECT * FROM tmp"); + ResultSet rs = st.executeQuery("SELECT * FROM queries_final WHERE resultCount<2000 AND query not like '%filter%'"); int id; String query; @@ -170,15 +215,25 @@ return true; } - private boolean checkQuerySyntax(String query) { - Query q = QueryFactory.create(query); - Op op = Algebra.compile(q); - // ... perform checks ... can we fully decide when an algebra expression is not in the target language? - SSE.write(op) ; + private boolean checkQuerySyntax(String queryString) { + queryString = queryString.replaceAll("owl:sameAs", "<http://www.w3.org/2002/07/owl#owl:sameAs>"); + queryString = queryString.replaceAll("skos:subject", "<http://www.w3.org/2004/02/skos/core#subject>"); +// Query q = QueryFactory.create(query); +// Op op = Algebra.compile(q); +// // ... perform checks ... can we fully decide when an algebra expression is not in the target language? +// SSE.write(op) ; + try { + QueryFactory.create(queryString); + } catch (Exception e) { + e.printStackTrace(); + return false; + } return true; } private boolean checkTargetVarIsSubject(String queryString){ + queryString = queryString.replaceAll("owl:sameAs", "<http://www.w3.org/2002/07/owl#owl:sameAs>"); + queryString = queryString.replaceAll("skos:subject", "<http://www.w3.org/2004/02/skos/core#subject>"); try { Query query = QueryFactory.create(queryString); Element queryPattern = query.getQueryPattern(); Modified: trunk/autosparql/src/main/java/org/dllearner/autosparql/server/evaluation/QuerySyntaxFilterScript.java =================================================================== --- trunk/autosparql/src/main/java/org/dllearner/autosparql/server/evaluation/QuerySyntaxFilterScript.java 2010-12-13 13:03:48 UTC (rev 2536) +++ trunk/autosparql/src/main/java/org/dllearner/autosparql/server/evaluation/QuerySyntaxFilterScript.java 2010-12-13 13:39:52 UTC (rev 2537) @@ -14,7 +14,16 @@ * @param args */ public static void main(String[] args) { - String queryString = "SELECT ?var0 WHERE {<s> <p> ?var0;<p1> <o1>.}"; +// String queryString = "SELECT ?var0 WHERE {<s> <p> ?var0;<p1> <o1>.}"; +// String queryString = "SELECT DISTINCT ?var0, ?var1 WHERE { <http://dbpedia.org/resource/Taylor_Swift> skos:subject ?var0. " + +// "?var0 rdfs:label ?var1 FILTER langMatches( lang(?var1), \"en\" ) }"; + String queryString = "PREFIX dc: <http://purl.org/dc/elements/1.1/> PREFIX : <http://dbpedia.org/resource/> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> " + + "PREFIX dbpedia2: <http://dbpedia.org/property/> PREFIX foaf: <http://xmlns.com/foaf/0.1/> PREFIX owl: <http://www.w3.org/2002/07/owl#> " + + "PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> PREFIX dbpedia: <http://dbpedia.org/> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> " + + "PREFIX skos: <http://www.w3.org/2004/02/skos/core#> " + + "SELECT ?var0 ?var1 WHERE { { ?var1 ?var0 :Bohol_Sea .} " + + "FILTER ( ( isURI(?var1) && ( ! regex(?var0, \"^http://dbpedia.org/property/redirect\") ) ) " + + "&& ( ! regex(?var0, \"^http://dbpedia.org/property/disambiguates\") ) ) }"; Query query = QueryFactory.create(queryString); Element queryPattern = query.getQueryPattern(); // System.out.println(queryPattern); Modified: trunk/autosparql/src/main/java/org/dllearner/autosparql/server/evaluation/SingleQueryEvaluationScript.java =================================================================== --- trunk/autosparql/src/main/java/org/dllearner/autosparql/server/evaluation/SingleQueryEvaluationScript.java 2010-12-13 13:03:48 UTC (rev 2536) +++ trunk/autosparql/src/main/java/org/dllearner/autosparql/server/evaluation/SingleQueryEvaluationScript.java 2010-12-13 13:39:52 UTC (rev 2537) @@ -55,12 +55,12 @@ Logger.getLogger(SPARQLQueryGeneratorImpl.class).setLevel(Level.INFO); Logger.getLogger(LGGGeneratorImpl.class).setLevel(Level.OFF); Logger.getLogger(NBRGeneratorImpl.class).setLevel(Level.OFF); - Logger.getLogger(Generalisation.class).setLevel(Level.OFF); + Logger.getLogger(Generalisation.class).setLevel(Level.INFO); SPARQLEndpointEx endpoint = new SPARQLEndpointEx( -// new URL("http://dbpedia.org/sparql"), - new URL("http://db0.aksw.org:8999/sparql"), + new URL("http://dbpedia.org/sparql"), +// new URL("http://db0.aksw.org:8999/sparql"), Collections.singletonList("http://dbpedia.org"), Collections.<String>emptyList(), null, null, Modified: trunk/sparql-query-generator/src/main/java/org/dllearner/sparqlquerygenerator/datastructures/impl/QueryTreeImpl.java =================================================================== --- trunk/sparql-query-generator/src/main/java/org/dllearner/sparqlquerygenerator/datastructures/impl/QueryTreeImpl.java 2010-12-13 13:03:48 UTC (rev 2536) +++ trunk/sparql-query-generator/src/main/java/org/dllearner/sparqlquerygenerator/datastructures/impl/QueryTreeImpl.java 2010-12-13 13:39:52 UTC (rev 2537) @@ -292,6 +292,49 @@ getUserObjectClosure(this, objects); return objects; } + + public int getTriplePatternCount(){ + return countTriplePattern(this); + } + + private int countTriplePattern(QueryTree<N> tree){ + int cnt = 0; + Object object; + if(!tree.isLeaf()){ + for(QueryTree<N> child : tree.getChildren()){ + object = child.getUserObject(); + boolean objectIsResource = !object.equals("?"); + cnt++; + if(!objectIsResource){ + cnt+=countTriplePattern(child); + } + } + } + return cnt; + } + + public QueryTree<N> getSPARQLQueryTree(){ + return createSPARQLQueryTree(this); + } + + private QueryTree<N> createSPARQLQueryTree(QueryTree<N> tree){ + QueryTree<N> copy = new QueryTreeImpl<N>(tree.getUserObject()); + if(tree.getUserObject().equals("?")){ + for(QueryTree<N> child : tree.getChildren()){ + copy.addChild((QueryTreeImpl<N>) createSPARQLQueryTree(child), tree.getEdge(child)); + } + } +// for(QueryTree<N> child : tree.getChildren()){ +// if(child.getUserObject().equals("?")){ +// copy.addChild((QueryTreeImpl<N>) createSPARQLQueryTree(child), tree.getEdge(child)); +// } else { +// copy.addChild((QueryTreeImpl<N>) child, tree.getEdge(child)); +// } +// +// } + + return copy; + } private void getUserObjectClosure(QueryTree<N> tree, Set<N> bin) { bin.add(tree.getUserObject()); Modified: trunk/sparql-query-generator/src/main/java/org/dllearner/sparqlquerygenerator/operations/lgg/LGGGeneratorImpl.java =================================================================== --- trunk/sparql-query-generator/src/main/java/org/dllearner/sparqlquerygenerator/operations/lgg/LGGGeneratorImpl.java 2010-12-13 13:03:48 UTC (rev 2536) +++ trunk/sparql-query-generator/src/main/java/org/dllearner/sparqlquerygenerator/operations/lgg/LGGGeneratorImpl.java 2010-12-13 13:39:52 UTC (rev 2537) @@ -28,6 +28,9 @@ import org.dllearner.sparqlquerygenerator.datastructures.QueryTree; import org.dllearner.sparqlquerygenerator.datastructures.impl.QueryTreeImpl; +import com.jamonapi.Monitor; +import com.jamonapi.MonitorFactory; + /** * * @author Lorenz Bühmann @@ -68,12 +71,15 @@ if(trees.size() == 1){ return trees.iterator().next(); } + Monitor mon = MonitorFactory.getTimeMonitor("LGG"); + mon.start(); QueryTree<N> lgg = computeLGG(treeList.get(0), treeList.get(1), learnFilters); logger.info("LGG for 1 and 2:\n" + lgg.getStringRepresentation()); for(int i = 2; i < treeList.size(); i++){ lgg = computeLGG(lgg, treeList.get(i), learnFilters); logger.info("LGG for 1-" + (i+1) + ":\n" + lgg.getStringRepresentation()); } + mon.stop(); logger.info("LGG = "); logger.info(lgg.getStringRepresentation()); @@ -82,10 +88,12 @@ } private QueryTree<N> computeLGG(QueryTree<N> tree1, QueryTree<N> tree2, boolean learnFilters){ - logger.debug("Computing LGG for"); - logger.debug(tree1.getStringRepresentation()); - logger.debug("and"); - logger.debug(tree2.getStringRepresentation()); + if(logger.isDebugEnabled()){ + logger.debug("Computing LGG for"); + logger.debug(tree1.getStringRepresentation()); + logger.debug("and"); + logger.debug(tree2.getStringRepresentation()); + } QueryTree<N> lgg = new QueryTreeImpl<N>(tree1.getUserObject()); // if(!lgg.getUserObject().equals(tree2.getUserObject())){ @@ -114,36 +122,46 @@ Set<QueryTreeImpl<N>> addedChildren; QueryTreeImpl<N> lggChild; for(Object edge : tree1.getEdges()){ - logger.debug("Regarding egde: " + edge); + if(logger.isDebugEnabled()){ + logger.debug("Regarding egde: " + edge); + } addedChildren = new HashSet<QueryTreeImpl<N>>(); for(QueryTree<N> child1 : tree1.getChildren(edge)){ for(QueryTree<N> child2 : tree2.getChildren(edge)){ lggChild = (QueryTreeImpl<N>) computeLGG(child1, child2, learnFilters); boolean add = true; for(QueryTreeImpl<N> addedChild : addedChildren){ - logger.debug("Subsumption test"); + if(logger.isDebugEnabled()){ + logger.debug("Subsumption test"); + } if(addedChild.isSubsumedBy(lggChild)){ - logger.debug("Previously added child"); - logger.debug(addedChild.getStringRepresentation()); - logger.debug("is subsumed by"); - logger.debug(lggChild.getStringRepresentation()); - logger.debug("so we can skip adding the LGG"); + if(logger.isDebugEnabled()){ + logger.debug("Previously added child"); + logger.debug(addedChild.getStringRepresentation()); + logger.debug("is subsumed by"); + logger.debug(lggChild.getStringRepresentation()); + logger.debug("so we can skip adding the LGG"); + } add = false; break; } else if(lggChild.isSubsumedBy(addedChild)){ - logger.debug("Computed LGG"); - logger.debug(lggChild.getStringRepresentation()); - logger.debug("is subsumed by previously added child"); - logger.debug(addedChild.getStringRepresentation()); - logger.debug("so we can remove it"); + if(logger.isDebugEnabled()){ + logger.debug("Computed LGG"); + logger.debug(lggChild.getStringRepresentation()); + logger.debug("is subsumed by previously added child"); + logger.debug(addedChild.getStringRepresentation()); + logger.debug("so we can remove it"); + } lgg.removeChild(addedChild); } } if(add){ lgg.addChild(lggChild, edge); addedChildren.add(lggChild); - logger.debug("Adding child"); - logger.debug(lggChild.getStringRepresentation()); + if(logger.isDebugEnabled()){ + logger.debug("Adding child"); + logger.debug(lggChild.getStringRepresentation()); + } } } } Modified: trunk/sparql-query-generator/src/main/java/org/dllearner/sparqlquerygenerator/operations/nbr/strategy/GreedyNBRStrategy.java =================================================================== --- trunk/sparql-query-generator/src/main/java/org/dllearner/sparqlquerygenerator/operations/nbr/strategy/GreedyNBRStrategy.java 2010-12-13 13:03:48 UTC (rev 2536) +++ trunk/sparql-query-generator/src/main/java/org/dllearner/sparqlquerygenerator/operations/nbr/strategy/GreedyNBRStrategy.java 2010-12-13 13:39:52 UTC (rev 2537) @@ -22,6 +22,7 @@ import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -29,6 +30,8 @@ import org.dllearner.sparqlquerygenerator.datastructures.QueryTree; import org.dllearner.sparqlquerygenerator.datastructures.impl.QueryTreeImpl; +import com.hp.hpl.jena.sparql.function.library.e; + /** * * @author Lorenz Bühmann @@ -65,17 +68,54 @@ // } // } // + System.out.println(printTreeWithValues(nbr, matrix)); + + List<QueryTree<N>> candidates2Remove = new ArrayList<QueryTree<N>>(); + for(Entry<QueryTree<N>, List<Integer>> entry : matrix.entrySet()){ - System.err.println(entry.getValue()); if(sum(entry.getValue()) < negExampleTrees.size()/2.0){ - System.err.println("REMOVE"); - nbr.removeChild((QueryTreeImpl<N>) entry.getKey()); + candidates2Remove.add(entry.getKey()); } } + removeLeafs(nbr, candidates2Remove); return nbr; } + private void removeLeafs(QueryTree<N> nbr, List<QueryTree<N>> candidates2Remove){ + for(Iterator<QueryTree<N>> iter = nbr.getLeafs().iterator(); iter.hasNext();){ + QueryTree<N> leaf = iter.next(); + + if(candidates2Remove.contains(leaf)){ + leaf.getParent().removeChild((QueryTreeImpl<N>) leaf); + } + } + } + + private String printTreeWithValues(QueryTree<N> tree, Map<QueryTree<N>, List<Integer>> matrix){ + int depth = tree.getPathToRoot().size(); + StringBuilder sb = new StringBuilder(); + if(tree.isRoot()){ + sb.append("TREE\n\n"); + } +// ren = ren.replace("\n", "\n" + sb); + sb.append(tree.getUserObject() + "(" +matrix.get(tree) + ")"); + sb.append("\n"); + for (QueryTree<N> child : tree.getChildren()) { + for (int i = 0; i < depth; i++) { + sb.append("\t"); + } + Object edge = tree.getEdge(child); + if (edge != null) { + sb.append(" "); + sb.append(edge); + sb.append(" ---> "); + } + sb.append(printTreeWithValues(child, matrix)); + } + return sb.toString(); + } + private int sum(List<Integer> list){ int sum = 0; for(Integer i : list){ @@ -90,39 +130,58 @@ return Collections.singletonList(computeNBR(posExampleTree, negExampleTrees)); } +// private void checkTree(Map<QueryTree<N>, List<Integer>> matrix, QueryTree<N> posTree, QueryTree<N> negTree, int index){ +// int entry; +// if(!posTree.getUserObject().equals("?") && !posTree.getUserObject().equals(negTree.getUserObject())){ +// entry = 1; +// } else { +// entry = 1; +// for(Object edge : posTree.getEdges()){ +// for(QueryTree<N> child1 : posTree.getChildren(edge)){ +// for(QueryTree<N> child2 : negTree.getChildren(edge)){ +// if(!posTree.getUserObject().equals("?") && child1.getUserObject().equals(child2.getUserObject())){ +// entry = 0;break; +// } +// if(posTree.getUserObject().equals("?")){ +// checkTree(matrix, child1, child2, index); +// } +// } +// } +// } +// Object edge; +// for(QueryTree<N> child1 : posTree.getChildren()){ +// edge = posTree.getEdge(child1); +// for(QueryTree<N> child2 : negTree.getChildren(edge)){ +// +// } +// +// } +// } +// setMatrixEntry(matrix, posTree, index, entry); +// if(entry == 1){ +// for(QueryTree<N> child : posTree.getChildrenClosure()){ +// setMatrixEntry(matrix, child, index, 0); +// } +// } +// } + private void checkTree(Map<QueryTree<N>, List<Integer>> matrix, QueryTree<N> posTree, QueryTree<N> negTree, int index){ - int entry; - if(!posTree.getUserObject().equals("?") && !posTree.getUserObject().equals(negTree.getUserObject())){ + int entry = 1; + Object edge; + for(QueryTree<N> child1 : posTree.getChildren()){ entry = 1; - } else { - entry = 1; - for(Object edge : posTree.getEdges()){ - for(QueryTree<N> child1 : posTree.getChildren(edge)){ - for(QueryTree<N> child2 : negTree.getChildren(edge)){ - if(!posTree.getUserObject().equals("?") && child1.getUserObject().equals(child2.getUserObject())){ - entry = 0;break; - } - if(posTree.getUserObject().equals("?")){ - checkTree(matrix, child1, child2, index); - } - } - } - } - Object edge; - for(QueryTree<N> child1 : posTree.getChildren()){ - edge = posTree.getEdge(child1); - for(QueryTree<N> child2 : negTree.getChildren(edge)){ - - } - - } + edge = posTree.getEdge(child1); + for(QueryTree<N> child2 : negTree.getChildren(edge)){ + if(!child1.getUserObject().equals("?") && child1.getUserObject().equals(child2.getUserObject())){ + entry = 0;checkTree(matrix, child1, child2, index); + } else if(child1.getUserObject().equals("?")){ + entry = 0; + checkTree(matrix, child1, child2, index); + } + } + setMatrixEntry(matrix, child1, index, entry); } - setMatrixEntry(matrix, posTree, index, entry); - if(entry == 1){ - for(QueryTree<N> child : posTree.getChildrenClosure()){ - setMatrixEntry(matrix, child, index, 0); - } - } + } private void setMatrixEntry(Map<QueryTree<N>, List<Integer>> matrix, QueryTree<N> row, int column, int entry){ @@ -134,7 +193,7 @@ try { list.set(column, entry); } catch (IndexOutOfBoundsException e) { - list.add(column, entry); + list.add(entry); } } Modified: trunk/sparql-query-generator/src/main/java/org/dllearner/sparqlquerygenerator/util/Filter.java =================================================================== --- trunk/sparql-query-generator/src/main/java/org/dllearner/sparqlquerygenerator/util/Filter.java 2010-12-13 13:03:48 UTC (rev 2536) +++ trunk/sparql-query-generator/src/main/java/org/dllearner/sparqlquerygenerator/util/Filter.java 2010-12-13 13:39:52 UTC (rev 2537) @@ -54,6 +54,9 @@ filters.add(FOAF.Image.toString()); filters.add(FOAF.surname.toString()); filters.add(FOAF.birthday.toString()); + filters.add(FOAF.name.toString()); + filters.add(FOAF.firstName.toString()); + filters.add(FOAF.givenname.toString()); return filters; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |