You can subscribe to this list here.
2007 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
(120) |
Sep
(36) |
Oct
(116) |
Nov
(17) |
Dec
(44) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2008 |
Jan
(143) |
Feb
(192) |
Mar
(74) |
Apr
(84) |
May
(105) |
Jun
(64) |
Jul
(49) |
Aug
(120) |
Sep
(159) |
Oct
(156) |
Nov
(51) |
Dec
(28) |
2009 |
Jan
(17) |
Feb
(55) |
Mar
(33) |
Apr
(57) |
May
(54) |
Jun
(28) |
Jul
(6) |
Aug
(16) |
Sep
(38) |
Oct
(30) |
Nov
(26) |
Dec
(52) |
2010 |
Jan
(7) |
Feb
(91) |
Mar
(65) |
Apr
(2) |
May
(14) |
Jun
(25) |
Jul
(38) |
Aug
(48) |
Sep
(80) |
Oct
(70) |
Nov
(75) |
Dec
(77) |
2011 |
Jan
(68) |
Feb
(53) |
Mar
(51) |
Apr
(35) |
May
(65) |
Jun
(101) |
Jul
(29) |
Aug
(230) |
Sep
(95) |
Oct
(49) |
Nov
(110) |
Dec
(63) |
2012 |
Jan
(41) |
Feb
(42) |
Mar
(25) |
Apr
(46) |
May
(51) |
Jun
(44) |
Jul
(45) |
Aug
(29) |
Sep
(12) |
Oct
(9) |
Nov
(17) |
Dec
(2) |
2013 |
Jan
(12) |
Feb
(14) |
Mar
(7) |
Apr
(16) |
May
(54) |
Jun
(27) |
Jul
(11) |
Aug
(5) |
Sep
(85) |
Oct
(27) |
Nov
(37) |
Dec
(32) |
2014 |
Jan
(8) |
Feb
(29) |
Mar
(5) |
Apr
(3) |
May
(22) |
Jun
(3) |
Jul
(4) |
Aug
(3) |
Sep
|
Oct
|
Nov
|
Dec
|
From: <lor...@us...> - 2012-08-23 13:19:14
|
Revision: 3834 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3834&view=rev Author: lorenz_b Date: 2012-08-23 13:19:03 +0000 (Thu, 23 Aug 2012) Log Message: ----------- Started feature to explain score in enrichment algorithms and return pos and neg examples. Modified Paths: -------------- trunk/components-core/pom.xml trunk/components-core/src/main/java/org/dllearner/algorithms/properties/FunctionalObjectPropertyAxiomLearner.java trunk/components-core/src/main/java/org/dllearner/core/AbstractAxiomLearningAlgorithm.java Added Paths: ----------- trunk/components-core/src/main/java/org/dllearner/algorithms/properties/ObjectPropertyDomainAxiomLearner2.java Modified: trunk/components-core/pom.xml =================================================================== --- trunk/components-core/pom.xml 2012-08-23 13:13:39 UTC (rev 3833) +++ trunk/components-core/pom.xml 2012-08-23 13:19:03 UTC (rev 3834) @@ -242,7 +242,12 @@ <artifactId>jsexp</artifactId> <version>0.1.0</version> </dependency> - + + <dependency> + <groupId>xerces</groupId> + <artifactId>xercesImpl</artifactId> + <version>2.8.0</version> + </dependency> </dependencies> <dependencyManagement> <dependencies> Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/properties/FunctionalObjectPropertyAxiomLearner.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/properties/FunctionalObjectPropertyAxiomLearner.java 2012-08-23 13:13:39 UTC (rev 3833) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/properties/FunctionalObjectPropertyAxiomLearner.java 2012-08-23 13:19:03 UTC (rev 3834) @@ -20,6 +20,11 @@ package org.dllearner.algorithms.properties; import java.util.ArrayList; +import java.util.Collection; +import java.util.Iterator; +import java.util.List; +import java.util.SortedSet; +import java.util.TreeSet; import org.dllearner.core.AbstractAxiomLearningAlgorithm; import org.dllearner.core.ComponentAnn; @@ -27,12 +32,14 @@ import org.dllearner.core.config.ConfigOption; import org.dllearner.core.config.ObjectPropertyEditor; import org.dllearner.core.owl.FunctionalObjectPropertyAxiom; +import org.dllearner.core.owl.Individual; import org.dllearner.core.owl.ObjectProperty; import org.dllearner.kb.SparqlEndpointKS; import org.dllearner.kb.sparql.SparqlEndpoint; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.hp.hpl.jena.query.ParameterizedSparqlString; import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.rdf.model.Model; @@ -51,6 +58,8 @@ public FunctionalObjectPropertyAxiomLearner(SparqlEndpointKS ks){ this.ks = ks; + posExamplesQueryTemplate = new ParameterizedSparqlString("SELECT ?s WHERE {?s ?p ?o1. FILTER NOT EXISTS {?s ?p ?o2. FILTER(?o1 != ?o2)} }"); + negExamplesQueryTemplate = new ParameterizedSparqlString("SELECT ?s WHERE {?s ?p ?o1. ?s ?p ?o2. FILTER(?o1 != ?o2)}"); } public ObjectProperty getPropertyToDescribe() { @@ -59,6 +68,8 @@ public void setPropertyToDescribe(ObjectProperty propertyToDescribe) { this.propertyToDescribe = propertyToDescribe; + posExamplesQueryTemplate.setIri("p", propertyToDescribe.getURI().toString()); + negExamplesQueryTemplate.setIri("p", propertyToDescribe.getURI().toString()); } @Override @@ -86,36 +97,36 @@ } private void runSPARQL1_0_Mode() { - Model model = ModelFactory.createDefaultModel(); + workingModel = ModelFactory.createDefaultModel(); int limit = 1000; int offset = 0; String baseQuery = "CONSTRUCT {?s <%s> ?o.} WHERE {?s <%s> ?o} LIMIT %d OFFSET %d"; String query = String.format(baseQuery, propertyToDescribe.getName(), propertyToDescribe.getName(), limit, offset); Model newModel = executeConstructQuery(query); while(!terminationCriteriaSatisfied() && newModel.size() != 0){System.out.println(query); - model.add(newModel); + workingModel.add(newModel); // get number of instances of s with <s p o> query = String.format( "SELECT (COUNT(DISTINCT ?s) AS ?all) WHERE {?s <%s> ?o.}", propertyToDescribe.getName()); - ResultSet rs = executeSelectQuery(query, model); + ResultSet rs = executeSelectQuery(query, workingModel); QuerySolution qs; int all = 1; while (rs.hasNext()) { qs = rs.next(); all = qs.getLiteral("all").getInt(); } - System.out.println(all); + // get number of instances of s with <s p o> <s p o1> where o != o1 query = "SELECT (COUNT(DISTINCT ?s) AS ?functional) WHERE {?s <%s> ?o1. FILTER NOT EXISTS {?s <%s> ?o2. FILTER(?o1 != ?o2)} }"; query = query.replace("%s", propertyToDescribe.getURI().toString()); - rs = executeSelectQuery(query, model); + rs = executeSelectQuery(query, workingModel); int functional = 1; while (rs.hasNext()) { qs = rs.next(); functional = qs.getLiteral("functional").getInt(); } - System.out.println(functional); + if (all > 0) { currentlyBestAxioms.clear(); currentlyBestAxioms.add(new EvaluatedAxiom( @@ -155,13 +166,21 @@ } public static void main(String[] args) throws Exception{ - FunctionalObjectPropertyAxiomLearner l = new FunctionalObjectPropertyAxiomLearner(new SparqlEndpointKS(SparqlEndpoint.getEndpointDBpediaLiveAKSW())); - l.setPropertyToDescribe(new ObjectProperty("http://dbpedia.org/ontology/wikiPageExternalLink")); - l.setMaxExecutionTimeInSeconds(10); -// l.setForceSPARQL_1_0_Mode(true); + FunctionalObjectPropertyAxiomLearner l = new FunctionalObjectPropertyAxiomLearner(new SparqlEndpointKS(SparqlEndpoint.getEndpointDBpedia())); + l.setPropertyToDescribe(new ObjectProperty("http://dbpedia.org/ontology/league")); + l.setMaxExecutionTimeInSeconds(20); + l.setForceSPARQL_1_0_Mode(true); l.init(); l.start(); - System.out.println(l.getCurrentlyBestEvaluatedAxioms(5)); + List<EvaluatedAxiom> axioms = l.getCurrentlyBestEvaluatedAxioms(5); + System.out.println(axioms); + + for(EvaluatedAxiom axiom : axioms){ + printSubset(l.getPositiveExamples(axiom), 10); + printSubset(l.getNegativeExamples(axiom), 10); + l.explainScore(axiom); + } + } } Added: trunk/components-core/src/main/java/org/dllearner/algorithms/properties/ObjectPropertyDomainAxiomLearner2.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/properties/ObjectPropertyDomainAxiomLearner2.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/properties/ObjectPropertyDomainAxiomLearner2.java 2012-08-23 13:19:03 UTC (rev 3834) @@ -0,0 +1,221 @@ +/** + * Copyright (C) 2007-2011, Jens Lehmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +package org.dllearner.algorithms.properties; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.SortedSet; + +import org.apache.log4j.ConsoleAppender; +import org.apache.log4j.Level; +import org.apache.log4j.SimpleLayout; +import org.dllearner.core.AbstractAxiomLearningAlgorithm; +import org.dllearner.core.ComponentAnn; +import org.dllearner.core.EvaluatedAxiom; +import org.dllearner.core.config.ConfigOption; +import org.dllearner.core.config.ObjectPropertyEditor; +import org.dllearner.core.owl.Description; +import org.dllearner.core.owl.Individual; +import org.dllearner.core.owl.NamedClass; +import org.dllearner.core.owl.ObjectProperty; +import org.dllearner.core.owl.ObjectPropertyDomainAxiom; +import org.dllearner.kb.SparqlEndpointKS; +import org.dllearner.kb.sparql.SparqlEndpoint; +import org.dllearner.reasoning.SPARQLReasoner; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.hp.hpl.jena.query.ParameterizedSparqlString; +import com.hp.hpl.jena.query.QuerySolution; +import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.ModelFactory; +import com.hp.hpl.jena.rdf.model.RDFNode; +import com.hp.hpl.jena.rdf.model.Resource; +import com.hp.hpl.jena.rdf.model.Statement; +import com.hp.hpl.jena.rdf.model.StmtIterator; +import com.hp.hpl.jena.vocabulary.OWL; +import com.hp.hpl.jena.vocabulary.RDF; +import com.hp.hpl.jena.vocabulary.RDFS; + +@ComponentAnn(name="objectproperty domain axiom learner", shortName="opldomain", version=0.1) +public class ObjectPropertyDomainAxiomLearner2 extends AbstractAxiomLearningAlgorithm { + + private static final Logger logger = LoggerFactory.getLogger(ObjectPropertyDomainAxiomLearner2.class); + + private Map<Individual, SortedSet<Description>> individual2Types; + + + @ConfigOption(name="propertyToDescribe", description="", propertyEditorClass=ObjectPropertyEditor.class) + private ObjectProperty propertyToDescribe; + + public ObjectPropertyDomainAxiomLearner2(SparqlEndpointKS ks){ + this.ks = ks; + super.posExamplesQueryTemplate = new ParameterizedSparqlString("SELECT DISTINCT ?s WHERE {?s a ?type}"); + super.negExamplesQueryTemplate = new ParameterizedSparqlString("SELECT DISTINCT ?s WHERE {?s ?p ?o. FILTER NOT EXISTS{?s a ?type}}"); + + } + + public ObjectProperty getPropertyToDescribe() { + return propertyToDescribe; + } + + public void setPropertyToDescribe(ObjectProperty propertyToDescribe) { + this.propertyToDescribe = propertyToDescribe; +// negExamplesQueryTemplate.clearParams(); +// posExamplesQueryTemplate.clearParams(); + } + + @Override + public void start() { + iterativeQueryTemplate.setIri("p", propertyToDescribe.getName()); + logger.info("Start learning..."); + startTime = System.currentTimeMillis(); + fetchedRows = 0; + currentlyBestAxioms = new ArrayList<EvaluatedAxiom>(); + + if(returnOnlyNewAxioms){ + //get existing domains + Description existingDomain = reasoner.getDomain(propertyToDescribe); + if(existingDomain != null){ + existingAxioms.add(new ObjectPropertyDomainAxiom(propertyToDescribe, existingDomain)); + if(reasoner.isPrepared()){ + if(reasoner.getClassHierarchy().contains(existingDomain)){ + for(Description sup : reasoner.getClassHierarchy().getSuperClasses(existingDomain)){ + existingAxioms.add(new ObjectPropertyDomainAxiom(propertyToDescribe, existingDomain)); + logger.info("Existing domain(inferred): " + sup); + } + } + + } + } + } + + runSPARQL1_0_Mode(); + logger.info("...finished in {}ms.", (System.currentTimeMillis()-startTime)); + } + + private void runSPARQL1_0_Mode() { + workingModel = ModelFactory.createDefaultModel(); + int limit = 1000; + int offset = 0; + String baseQuery = "CONSTRUCT {?s a ?type.} WHERE {?s <%s> ?o. ?s a ?type.} LIMIT %d OFFSET %d"; + String query = String.format(baseQuery, propertyToDescribe.getName(), limit, offset); + Model newModel = executeConstructQuery(query); + while(!terminationCriteriaSatisfied() && newModel.size() != 0){ + workingModel.add(newModel); + // get number of distinct subjects + query = "SELECT (COUNT(DISTINCT ?s) AS ?all) WHERE {?s a ?type.}"; + ResultSet rs = executeSelectQuery(query, workingModel); + QuerySolution qs; + int all = 1; + while (rs.hasNext()) { + qs = rs.next(); + all = qs.getLiteral("all").getInt(); + } + + // get class and number of instances + query = "SELECT ?type (COUNT(DISTINCT ?s) AS ?cnt) WHERE {?s a ?type.} GROUP BY ?type"; + rs = executeSelectQuery(query, workingModel); + + if (all > 0) { + currentlyBestAxioms.clear(); + while(rs.hasNext()){ + qs = rs.next(); + Resource type = qs.get("type").asResource(); + //omit owl:Thing as trivial domain + if(type.equals(OWL.Thing)){ + continue; + } + currentlyBestAxioms.add(new EvaluatedAxiom( + new ObjectPropertyDomainAxiom(propertyToDescribe, new NamedClass(type.getURI())), + computeScore(all, qs.get("cnt").asLiteral().getInt()))); + } + + } + offset += limit; + query = String.format(baseQuery, propertyToDescribe.getName(), limit, offset); + newModel = executeConstructQuery(query); + fillWithInference(newModel); + } + } + + private void fillWithInference(Model model){ + Model additionalModel = ModelFactory.createDefaultModel(); + if(reasoner.isPrepared()){ + for(StmtIterator iter = model.listStatements(null, RDF.type, (RDFNode)null); iter.hasNext();){ + Statement st = iter.next(); + Description cls = new NamedClass(st.getObject().asResource().getURI()); + if(reasoner.getClassHierarchy().contains(cls)){ + for(Description sup : reasoner.getClassHierarchy().getSuperClasses(cls)){ + additionalModel.add(st.getSubject(), st.getPredicate(), model.createResource(sup.toString())); + } + } + } + } + model.add(additionalModel); + } + + @Override + protected SortedSet<Individual> getPositiveExamples(EvaluatedAxiom evAxiom) { + ObjectPropertyDomainAxiom axiom = (ObjectPropertyDomainAxiom) evAxiom.getAxiom(); + posExamplesQueryTemplate.setIri("type", axiom.getDomain().toString()); + return super.getPositiveExamples(evAxiom); + } + + @Override + protected SortedSet<Individual> getNegativeExamples(EvaluatedAxiom evAxiom) { + ObjectPropertyDomainAxiom axiom = (ObjectPropertyDomainAxiom) evAxiom.getAxiom(); + negExamplesQueryTemplate.setIri("type", axiom.getDomain().toString()); + return super.getNegativeExamples(evAxiom); + } + + public static void main(String[] args) throws Exception{ + org.apache.log4j.Logger.getRootLogger().addAppender(new ConsoleAppender(new SimpleLayout())); + org.apache.log4j.Logger.getRootLogger().setLevel(Level.INFO); + org.apache.log4j.Logger.getLogger(DataPropertyDomainAxiomLearner.class).setLevel(Level.INFO); + + SparqlEndpointKS ks = new SparqlEndpointKS(SparqlEndpoint.getEndpointDBpediaLiveAKSW()); + + SPARQLReasoner reasoner = new SPARQLReasoner(ks); + reasoner.prepareSubsumptionHierarchy(); + + + ObjectPropertyDomainAxiomLearner2 l = new ObjectPropertyDomainAxiomLearner2(ks); + l.setReasoner(reasoner); + l.setPropertyToDescribe(new ObjectProperty("http://dbpedia.org/ontology/league")); + l.setMaxFetchedRows(20000); + l.setMaxExecutionTimeInSeconds(20); + l.addFilterNamespace("http://dbpedia.org/ontology/"); +// l.setReturnOnlyNewAxioms(true); + l.init(); + l.start(); + + List<EvaluatedAxiom> axioms = l.getCurrentlyBestEvaluatedAxioms(10, 0.3); + System.out.println(axioms); + for(EvaluatedAxiom axiom : axioms){ + printSubset(l.getPositiveExamples(axiom), 10); + printSubset(l.getNegativeExamples(axiom), 10); + l.explainScore(axiom); + } + } + +} Modified: trunk/components-core/src/main/java/org/dllearner/core/AbstractAxiomLearningAlgorithm.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/core/AbstractAxiomLearningAlgorithm.java 2012-08-23 13:13:39 UTC (rev 3833) +++ trunk/components-core/src/main/java/org/dllearner/core/AbstractAxiomLearningAlgorithm.java 2012-08-23 13:19:03 UTC (rev 3834) @@ -24,6 +24,7 @@ import java.util.Collection; import java.util.Collections; import java.util.Comparator; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -37,6 +38,7 @@ import org.dllearner.core.owl.Axiom; import org.dllearner.core.owl.ClassHierarchy; import org.dllearner.core.owl.Description; +import org.dllearner.core.owl.Individual; import org.dllearner.core.owl.NamedClass; import org.dllearner.kb.LocalModelBasedSparqlEndpointKS; import org.dllearner.kb.SparqlEndpointKS; @@ -116,6 +118,11 @@ protected ParameterizedSparqlString iterativeQueryTemplate; + protected Model workingModel; + protected ParameterizedSparqlString posExamplesQueryTemplate; + protected ParameterizedSparqlString negExamplesQueryTemplate; + + public AbstractAxiomLearningAlgorithm() { existingAxioms = new TreeSet<Axiom>(new AxiomComparator()); } @@ -255,7 +262,7 @@ } - protected Model executeConstructQuery(String query) { + protected Model executeConstructQuery(String query) {System.out.println(query); logger.debug("Sending query\n{} ...", query); if(ks.isRemote()){ SparqlEndpoint endpoint = ((SparqlEndpointKS) ks).getEndpoint(); @@ -266,7 +273,12 @@ queryExecution.setNamedGraphURIs(endpoint.getNamedGraphURIs()); try { Model model = queryExecution.execConstruct(); + fetchedRows += model.size(); timeout = false; + if(model.size() == 0){ + fullDataLoaded = true; + } + return model; } catch (QueryExceptionHTTP e) { if(e.getCause() instanceof SocketTimeoutException){ @@ -277,8 +289,13 @@ return ModelFactory.createDefaultModel(); } } else { - QueryExecution qexec = QueryExecutionFactory.create(query, ((LocalModelBasedSparqlEndpointKS)ks).getModel()); - return qexec.execConstruct(); + QueryExecution queryExecution = QueryExecutionFactory.create(query, ((LocalModelBasedSparqlEndpointKS)ks).getModel()); + Model model = queryExecution.execConstruct(); + fetchedRows += model.size(); + if(model.size() == 0){ + fullDataLoaded = true; + } + return model; } } @@ -456,6 +473,67 @@ filterNamespaces.add(namespace); } + protected SortedSet<Individual> getPositiveExamples(EvaluatedAxiom axiom){ + if(workingModel != null){ + SortedSet<Individual> posExamples = new TreeSet<Individual>(); + + ResultSet rs = executeSelectQuery(posExamplesQueryTemplate.toString(), workingModel); + while(rs.hasNext()){ + posExamples.add(new Individual(rs.next().get("s").asResource().getURI())); + } + + return posExamples; + } else { + throw new UnsupportedOperationException("Getting positive examples is not possible."); + } + } + + protected SortedSet<Individual> getNegativeExamples(EvaluatedAxiom axiom){ + if(workingModel != null){ + SortedSet<Individual> negExamples = new TreeSet<Individual>(); + + ResultSet rs = executeSelectQuery(negExamplesQueryTemplate.toString(), workingModel); + while(rs.hasNext()){ + negExamples.add(new Individual(rs.next().get("s").asResource().getURI())); + } + + return negExamples; + } else { + throw new UnsupportedOperationException("Getting negative examples is not possible."); + } + } + + protected void explainScore(EvaluatedAxiom evAxiom){ + int posExampleCnt = getPositiveExamples(evAxiom).size(); + int negExampleCnt = getNegativeExamples(evAxiom).size(); + int total = posExampleCnt + negExampleCnt; + StringBuilder sb = new StringBuilder(); + String lb = "\n"; + sb.append("######################################").append(lb); + sb.append("Explanation:").append(lb); + sb.append("Score(").append(evAxiom.getAxiom()).append(") = ").append(evAxiom.getScore().getAccuracy()).append(lb); + sb.append("Total number of resources:\t").append(total).append(lb); + sb.append("Number of positive examples:\t").append(posExampleCnt).append(lb); + sb.append("Number of negative examples:\t").append(negExampleCnt).append(lb); + sb.append("Complete data processed:\t").append(fullDataLoaded).append(lb); + sb.append("######################################"); + System.out.println(sb.toString()); + } + + protected static <E> void printSubset(Collection<E> collection, int maxSize){ + StringBuffer sb = new StringBuffer(); + int i = 0; + Iterator<E> iter = collection.iterator(); + while(iter.hasNext() && i < maxSize){ + sb.append(iter.next().toString()).append(", "); + i++; + } + if(iter.hasNext()){ + sb.append("...(").append(collection.size()-i).append(" more)"); + } + System.out.println(sb.toString()); + } + protected <K,T extends Set<V>, V> void addToMap(Map<K, T> map, K key, V value ){ T values = map.get(key); if(values == null){ This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-08-23 13:13:51
|
Revision: 3833 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3833&view=rev Author: lorenz_b Date: 2012-08-23 13:13:39 +0000 (Thu, 23 Aug 2012) Log Message: ----------- Set lexicon in learning algorithm for Oxford knowledgebase. Modified Paths: -------------- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-08-23 12:55:25 UTC (rev 3832) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-08-23 13:13:39 UTC (rev 3833) @@ -864,6 +864,8 @@ learner = new SPARQLTemplateBasedLearner2(model,mappingIndex,pretagged?null:POSTaggerHolder.posTagger); try {learner.init();} catch (ComponentInitException e) {throw new RuntimeException(e);} + learner.setGrammarFiles(new String[]{"tbsl/lexicon/english.lex","tbsl/lexicon/english_oxford.lex"}); + learner.setUseDomainRangeRestriction(false); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ki...@us...> - 2012-08-23 12:55:31
|
Revision: 3832 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3832&view=rev Author: kirdie Date: 2012-08-23 12:55:25 +0000 (Thu, 23 Aug 2012) Log Message: ----------- added querytestdata class. Added Paths: ----------- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java Added: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java (rev 0) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java 2012-08-23 12:55:25 UTC (rev 3832) @@ -0,0 +1,209 @@ +/** Helper Class for SPARQLTemplateBasedLearner3Test that encapsulates questions, their learned SPARQL queries and the answers of those SPARQL queries. + * Also provides methods for serialization and import/export in the QALD benchmark XML format.**/ +package org.dllearner.algorithm.tbsl.learning; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.io.PrintWriter; +import java.io.Serializable; +import java.io.StringWriter; +import java.util.Collection; +import java.util.HashSet; +import java.util.Set; +import java.util.SortedMap; +import java.util.concurrent.ConcurrentSkipListMap; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.transform.OutputKeys; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.stream.StreamResult; +import org.dllearner.algorithm.tbsl.learning.SPARQLTemplateBasedLearner3Test.LearnStatus; +import org.dllearner.kb.sparql.ExtractionDBCache; +import org.dllearner.kb.sparql.SparqlEndpoint; +import org.w3c.dom.DOMException; +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.NodeList; +import org.xml.sax.SAXException; + +class QueryTestData implements Serializable +{ + public SortedMap<Integer, String> id2Question = new ConcurrentSkipListMap<Integer, String>(); + public SortedMap<Integer, String> id2Query = new ConcurrentSkipListMap<Integer, String>(); + public SortedMap<Integer, Set<String>> id2Answers = new ConcurrentSkipListMap<Integer, Set<String>>(); + public SortedMap<Integer, LearnStatus> id2LearnStatus = new ConcurrentSkipListMap<Integer, LearnStatus>(); + private static final int MAXIMUM_QUESTIONS = Integer.MAX_VALUE; + + private static final String persistancePath = "cache/"+SPARQLTemplateBasedLearner3Test.class.getSimpleName()+'/'+QueryTestData.class.getSimpleName(); + + /** Saves the test data to a binary file to a default location overwriting the last save. Uses serialization. **/ + public synchronized void save() + { + try + { + ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(new File(persistancePath))); + oos.writeObject(this); + oos.close(); + } catch(IOException e) {throw new RuntimeException(e);} + } + + /** Loads the test data written by save(). **/ + public static QueryTestData load() throws FileNotFoundException, IOException + { + try + { + ObjectInputStream ois = new ObjectInputStream(new FileInputStream(new File(persistancePath))); + QueryTestData testData = (QueryTestData) ois.readObject(); + ois.close(); + return testData; + } + catch (ClassNotFoundException e){throw new RuntimeException(e);} + } + + public QueryTestData generateAnswers(SparqlEndpoint endpoint, ExtractionDBCache cache) + { + if(!id2Answers.isEmpty()) {throw new AssertionError("Answers already existing.");} + for(int i:id2Query.keySet()) + { + Set<String> uris = SPARQLTemplateBasedLearner3Test.getUris(endpoint, id2Query.get(i),cache); + id2Answers.put(i, uris); // empty answer set better transfers intended meaning and doesn't cause NPEs in html generation :-) + if(!uris.isEmpty()) {/*id2Answers.put(i, uris);*/} + else {id2LearnStatus.put(i, LearnStatus.QUERY_RESULT_EMPTY);} + } + return this; + } + + /** reads test data from a QALD2 benchmark XML file, including questions, queries and answers. + * each question needs to have a query but not necessarily an answer. + * @param file a QALD benchmark XML file + * @return the test data read from the XML file */ + public static QueryTestData readQaldXml(final File file) + { + QueryTestData testData = new QueryTestData(); + try { + DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + DocumentBuilder db = dbf.newDocumentBuilder(); + Document doc = db.parse(file); + doc.getDocumentElement().normalize(); + NodeList questionNodes = doc.getElementsByTagName("question"); + int id; + + for(int i = 0; i < questionNodes.getLength(); i++) + { + if(i>=MAXIMUM_QUESTIONS) break; // TODO: remove later? + String question; + String query; + Set<String> answers = new HashSet<String>(); + Element questionNode = (Element) questionNodes.item(i); + //read question ID + id = Integer.valueOf(questionNode.getAttribute("id")); + //Read question + question = ((Element)questionNode.getElementsByTagName("string").item(0)).getChildNodes().item(0).getNodeValue().trim(); + //Read SPARQL query + query = ((Element)questionNode.getElementsByTagName("query").item(0)).getChildNodes().item(0).getNodeValue().trim(); + // //Read answers + // answers = new HashSet<String>(); + // NodeList aswersNodes = questionNode.getElementsByTagName("answer"); + // for(int j = 0; j < aswersNodes.getLength(); j++){ + // Element answerNode = (Element) aswersNodes.item(j); + // answers.add(((Element)answerNode.getElementsByTagName("uri").item(0)).getChildNodes().item(0).getNodeValue().trim()); + // } + + if(!query.equals("OUT OF SCOPE")) // marker in qald benchmark file, will create holes interval of ids (e.g. 1,2,5,7) + { + testData.id2Question.put(id, question); + testData.id2Query.put(id, query); + Element answersElement = (Element) questionNode.getElementsByTagName("answers").item(0); + if(answersElement!=null) + { + NodeList answerElements = answersElement.getElementsByTagName("answer"); + for(int j=0; j<answerElements.getLength();j++) + { + String answer = ((Element)answerElements.item(j)).getTextContent(); + answers.add(answer); + } + testData.id2Answers.put(id, answers); + } + } + // question2Answers.put(question, answers); + + } + } catch (DOMException e) { + e.printStackTrace(); + } catch (ParserConfigurationException e) { + e.printStackTrace(); + } catch (SAXException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } + return testData; + } + + /** write the test data to a QALD2 benchmark XML file, including questions, queries and answers. + * each question needs to have a query but not necessarily an answer. + * @param file a QALD benchmark XML file **/ + public void writeQaldXml(final File file) + { + // see http://www.genedavis.com/library/xml/java_dom_xml_creation.jsp + try + { + DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + DocumentBuilder db = dbf.newDocumentBuilder(); + Document doc = db.newDocument(); + Element root = doc.createElement("dataset"); + doc.appendChild(root); + + for(Integer i:id2Question.keySet()) + { + Element questionElement = doc.createElement("question"); + questionElement.setAttribute("id", i.toString()); + questionElement.setAttribute("answertype", "resource"); + root.appendChild(questionElement); + Element stringElement = doc.createElement("string"); + stringElement.setTextContent(id2Question.get(i)); + questionElement.appendChild(stringElement); + String query = id2Query.get(i); + if(query!=null) + { + Element queryElement = doc.createElement("query"); +// queryElement.setTextContent(query); + queryElement.appendChild(doc.createCDATASection(query)); + questionElement.appendChild(queryElement); + } + Collection<String> answers = id2Answers.get(i); + if(answers!=null) + { + for(String answer: answers) + { + Element answerElement = doc.createElement("answer"); + answerElement.setTextContent(answer); + questionElement.appendChild(answerElement); + } + } + } + //set up a transformer + TransformerFactory transfac = TransformerFactory.newInstance(); + Transformer trans = transfac.newTransformer(); + trans.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); + trans.setOutputProperty(OutputKeys.INDENT, "yes"); + + + //create string from xml tree + PrintWriter sw = new PrintWriter(file); + StreamResult result = new StreamResult(sw); + DOMSource source = new DOMSource(doc); + trans.transform(source, result); + } + catch (Exception e) {throw new RuntimeException(e);} + } + +} \ No newline at end of file This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ki...@us...> - 2012-08-23 12:50:07
|
Revision: 3831 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3831&view=rev Author: kirdie Date: 2012-08-23 12:49:55 +0000 (Thu, 23 Aug 2012) Log Message: ----------- more oxford test code. Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/core/AbstractAxiomLearningAlgorithm.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java Modified: trunk/components-core/src/main/java/org/dllearner/core/AbstractAxiomLearningAlgorithm.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/core/AbstractAxiomLearningAlgorithm.java 2012-08-23 08:40:43 UTC (rev 3830) +++ trunk/components-core/src/main/java/org/dllearner/core/AbstractAxiomLearningAlgorithm.java 2012-08-23 12:49:55 UTC (rev 3831) @@ -460,7 +460,7 @@ T values = map.get(key); if(values == null){ try { - values = (T) values.getClass().newInstance(); + values = (T) value.getClass().newInstance(); } catch (InstantiationException e) { e.printStackTrace(); } catch (IllegalAccessException e) { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-08-23 08:40:43 UTC (rev 3830) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-08-23 12:49:55 UTC (rev 3831) @@ -93,119 +93,119 @@ import com.jamonapi.MonitorFactory; public class SPARQLTemplateBasedLearner2 implements SparqlQueryLearningAlgorithm{ - + enum Mode{ BEST_QUERY, BEST_NON_EMPTY_QUERY } - + private Mode mode = Mode.BEST_QUERY; - + private static final Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner2.class); private Monitor templateMon = MonitorFactory.getTimeMonitor("template"); private Monitor sparqlMon = MonitorFactory.getTimeMonitor("sparql"); - + private boolean useRemoteEndpointValidation; private boolean stopIfQueryResultNotEmpty; private int maxTestedQueriesPerTemplate = 50; private int maxQueryExecutionTimeInSeconds; private int maxTestedQueries = 200; private int maxIndexResults; - - private SparqlEndpoint endpoint; - private Model model; - + + private SparqlEndpoint endpoint = null; + private Model model = null; + private ExtractionDBCache cache = new ExtractionDBCache("cache"); - + private Index resourcesIndex; private Index classesIndex; private Index propertiesIndex; - + private Index datatypePropertiesIndex; private Index objectPropertiesIndex; - + private MappingBasedIndex mappingIndex; - - private Templator templateGenerator; + + private Templator templateGenerator = null; private Lemmatizer lemmatizer; private PartOfSpeechTagger posTagger; private WordNet wordNet; - + private String question; private int learnedPos = -1; - + private Set<Template> templates; private Map<Template, Collection<? extends Query>> template2Queries; private Map<Slot, List<String>> slot2URI; - + private Collection<WeightedQuery> sparqlQueryCandidates; private SortedSet<WeightedQuery> learnedSPARQLQueries; private SortedSet<WeightedQuery> generatedQueries; - + private SPARQLReasoner reasoner; - + private String currentlyExecutedQuery; - + private boolean dropZeroScoredQueries = true; private boolean useManualMappingsIfExistOnly = true; - + private boolean multiThreaded = true; - + private String [] grammarFiles = new String[]{"tbsl/lexicon/english.lex"}; - + private PopularityMap popularityMap; - + private Set<String> relevantKeywords; - + private boolean useDomainRangeRestriction = true; - + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex){ this(endpoint, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger()); } - + public SPARQLTemplateBasedLearner2(Knowledgebase knowledgebase, PartOfSpeechTagger posTagger, WordNet wordNet, Options options){ this(knowledgebase.getEndpoint(), knowledgebase.getResourceIndex(), knowledgebase.getClassIndex(),knowledgebase.getPropertyIndex(), posTagger, wordNet, options); } - + public SPARQLTemplateBasedLearner2(Knowledgebase knowledgebase){ this(knowledgebase.getEndpoint(), knowledgebase.getResourceIndex(), knowledgebase.getClassIndex(),knowledgebase.getPropertyIndex(), new StanfordPartOfSpeechTagger(), new WordNet(), new Options()); } - + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index index){ this(endpoint, index, new StanfordPartOfSpeechTagger()); } - + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger){ this(endpoint, resourcesIndex, classesIndex, propertiesIndex, posTagger, new WordNet(), new Options()); } - + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index index, PartOfSpeechTagger posTagger){ this(endpoint, index, posTagger, new WordNet(), new Options()); } - + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, WordNet wordNet){ this(endpoint, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger(), wordNet, new Options()); } - + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index index, WordNet wordNet){ this(endpoint, index, new StanfordPartOfSpeechTagger(), wordNet, new Options()); } - + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet){ this(endpoint, resourcesIndex, classesIndex, propertiesIndex, posTagger, wordNet, new Options(), new ExtractionDBCache("cache")); } - + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index index, PartOfSpeechTagger posTagger, WordNet wordNet){ this(endpoint, index, index, index, posTagger, wordNet, new Options(), new ExtractionDBCache("cache")); } - + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet, Options options){ this(endpoint, resourcesIndex, classesIndex, propertiesIndex, posTagger, wordNet, options, new ExtractionDBCache("cache")); } - + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index index, PartOfSpeechTagger posTagger, WordNet wordNet, Options options){ this(endpoint, index, index, index, posTagger, wordNet, options, new ExtractionDBCache("cache")); } - + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet, Options options, ExtractionDBCache cache){ this.endpoint = endpoint; this.resourcesIndex = resourcesIndex; @@ -214,9 +214,9 @@ this.posTagger = posTagger; this.wordNet = wordNet; this.cache = cache; - + setOptions(options); - + if(propertiesIndex instanceof SPARQLPropertiesIndex){ if(propertiesIndex instanceof VirtuosoPropertiesIndex){ datatypePropertiesIndex = new VirtuosoDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); @@ -231,29 +231,29 @@ } reasoner = new SPARQLReasoner(new SparqlEndpointKS(endpoint), cache); } - + public SPARQLTemplateBasedLearner2(Model model, Index resourcesIndex, Index classesIndex, Index propertiesIndex){ this(model, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger()); } - + public SPARQLTemplateBasedLearner2(Model model, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger){ this(model, resourcesIndex, classesIndex, propertiesIndex, posTagger, new WordNet(), new Options()); } - + public SPARQLTemplateBasedLearner2(Model model, Index resourcesIndex, Index classesIndex, Index propertiesIndex, WordNet wordNet){ this(model, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger(), wordNet, new Options()); } - + public SPARQLTemplateBasedLearner2(Model model, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet, Options options){ this(model, resourcesIndex, classesIndex, propertiesIndex, posTagger, wordNet, options, new ExtractionDBCache("cache")); } - + public SPARQLTemplateBasedLearner2(Model model, MappingBasedIndex mappingBasedIndex, PartOfSpeechTagger posTagger) { this(model, new SPARQLIndex(model),new SPARQLIndex(model),new SPARQLIndex(model),posTagger); setMappingIndex(mappingBasedIndex); } - + public SPARQLTemplateBasedLearner2(Model model, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet, Options options, ExtractionDBCache cache){ this.model = model; this.resourcesIndex = resourcesIndex; @@ -262,9 +262,9 @@ this.posTagger = posTagger; this.wordNet = wordNet; this.cache = cache; - + setOptions(options); - + if(propertiesIndex instanceof SPARQLPropertiesIndex){ if(propertiesIndex instanceof VirtuosoPropertiesIndex){ datatypePropertiesIndex = new VirtuosoDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); @@ -279,25 +279,27 @@ } reasoner = new SPARQLReasoner(new LocalModelBasedSparqlEndpointKS(ModelFactory.createOntologyModel(OntModelSpec.RDFS_MEM, model)), cache); } - - public void setGrammarFiles(String[] grammarFiles){ + + public void setGrammarFiles(String[] grammarFiles) + { + if(templateGenerator==null) {throw new AssertionError("Learner not initialized. Please call init();");} templateGenerator.setGrammarFiles(grammarFiles); } - + @Override public void init() throws ComponentInitException { - templateGenerator = new Templator(posTagger, wordNet, grammarFiles); - lemmatizer = new LingPipeLemmatizer(); + templateGenerator = new Templator(posTagger, wordNet, grammarFiles); + lemmatizer = new LingPipeLemmatizer(); } - + public void setMappingIndex(MappingBasedIndex mappingIndex) { this.mappingIndex = mappingIndex; } - + public void setCache(ExtractionDBCache cache) { this.cache = cache; } - + public void setKnowledgebase(Knowledgebase knowledgebase){ this.endpoint = knowledgebase.getEndpoint(); this.resourcesIndex = knowledgebase.getResourceIndex(); @@ -318,28 +320,28 @@ } reasoner = new SPARQLReasoner(new SparqlEndpointKS(endpoint)); } - + public void setUseDomainRangeRestriction(boolean useDomainRangeRestriction) { this.useDomainRangeRestriction = useDomainRangeRestriction; } - + /* * Only for Evaluation useful. */ public void setUseIdealTagger(boolean value){ templateGenerator.setUNTAGGED_INPUT(!value); } - + private void setOptions(Options options){ maxIndexResults = Integer.parseInt(options.get("solr.query.limit", "10")); - + maxQueryExecutionTimeInSeconds = Integer.parseInt(options.get("sparql.query.maxExecutionTimeInSeconds", "20")); cache.setMaxExecutionTimeInSeconds(maxQueryExecutionTimeInSeconds); - + useRemoteEndpointValidation = options.get("learning.validationType", "remote").equals("remote") ? true : false; stopIfQueryResultNotEmpty = Boolean.parseBoolean(options.get("learning.stopAfterFirstNonEmptyQueryResult", "true")); maxTestedQueriesPerTemplate = Integer.parseInt(options.get("learning.maxTestedQueriesPerTemplate", "20")); - + String wordnetPath = options.get("wordnet.dictionary", "tbsl/dict"); wordnetPath = this.getClass().getClassLoader().getResource(wordnetPath).getPath(); System.setProperty("wordnet.database.dir", wordnetPath); @@ -347,20 +349,20 @@ public void setEndpoint(SparqlEndpoint endpoint){ this.endpoint = endpoint; - + reasoner = new SPARQLReasoner(new SparqlEndpointKS(endpoint)); reasoner.setCache(cache); reasoner.prepareSubsumptionHierarchy(); } - + public void setQuestion(String question){ this.question = question; } - + public void setUseRemoteEndpointValidation(boolean useRemoteEndpointValidation){ this.useRemoteEndpointValidation = useRemoteEndpointValidation; } - + public int getMaxQueryExecutionTimeInSeconds() { return maxQueryExecutionTimeInSeconds; } @@ -383,11 +385,11 @@ slot2URI = new HashMap<Slot, List<String>>(); relevantKeywords = new HashSet<String>(); currentlyExecutedQuery = null; - -// templateMon.reset(); -// sparqlMon.reset(); + + // templateMon.reset(); + // sparqlMon.reset(); } - + public void learnSPARQLQueries() throws NoTemplateFoundException{ reset(); //generate SPARQL query templates @@ -403,13 +405,13 @@ relevantKeywords.addAll(templateGenerator.getUnknownWords()); if(templates.isEmpty()){ throw new NoTemplateFoundException(); - + } logger.debug("Templates:"); for(Template t : templates){ logger.debug(t); } - + //get the weighted query candidates generatedQueries = getWeightedSPARQLQueries(templates); sparqlQueryCandidates = new ArrayList<WeightedQuery>(); @@ -422,7 +424,7 @@ } i++; } - + if(mode == Mode.BEST_QUERY){ double bestScore = -1; for(WeightedQuery candidate : generatedQueries){ @@ -439,15 +441,15 @@ if(useRemoteEndpointValidation){ //on remote endpoint validateAgainstRemoteEndpoint(sparqlQueryCandidates); } else {//on local model - + } } } - + public SortedSet<WeightedQuery> getGeneratedQueries() { return generatedQueries; } - + public SortedSet<WeightedQuery> getGeneratedQueries(int topN) { SortedSet<WeightedQuery> topNQueries = new TreeSet<WeightedQuery>(); int max = Math.min(topN, generatedQueries.size()); @@ -459,28 +461,28 @@ } return topNQueries; } - + public Set<Template> getTemplates(){ return templates; } - + public List<String> getGeneratedSPARQLQueries(){ List<String> queries = new ArrayList<String>(); for(WeightedQuery wQ : sparqlQueryCandidates){ queries.add(wQ.getQuery().toString()); } - + return queries; } - + public Map<Template, Collection<? extends Query>> getTemplates2SPARQLQueries(){ return template2Queries; } - + public Map<Slot, List<String>> getSlot2URIs(){ return slot2URI; } - + private void normProminenceValues(Set<Allocation> allocations){ double min = 0; double max = 0; @@ -497,25 +499,25 @@ a.setProminence(prominence); } } - + private void computeScore(Set<Allocation> allocations){ double alpha = 0.8; double beta = 1 - alpha; - + for(Allocation a : allocations){ double score = alpha * a.getSimilarity() + beta * a.getProminence(); a.setScore(score); } - + } - + public Set<String> getRelevantKeywords(){ return relevantKeywords; } - + private SortedSet<WeightedQuery> getWeightedSPARQLQueries(Set<Template> templates){ logger.debug("Generating SPARQL query candidates..."); - + Map<Slot, Set<Allocation>> slot2Allocations = new TreeMap<Slot, Set<Allocation>>(new Comparator<Slot>() { @Override @@ -528,23 +530,22 @@ } }); slot2Allocations = Collections.synchronizedMap(new HashMap<Slot, Set<Allocation>>()); - - + + SortedSet<WeightedQuery> allQueries = new TreeSet<WeightedQuery>(); - + Set<Allocation> allocations; - + for(Template t : templates){ - logger.info("Processing template:\n" + t.toString()); - System.err.println(QueryFactory.create(t.getQuery().toString(), Syntax.syntaxSPARQL_11)); + logger.info("Processing template:\n" + t.toString()); allocations = new TreeSet<Allocation>(); boolean containsRegex = t.getQuery().toString().toLowerCase().contains("(regex("); - + ExecutorService executor = Executors.newFixedThreadPool(t.getSlots().size()); List<Future<Map<Slot, SortedSet<Allocation>>>> list = new ArrayList<Future<Map<Slot, SortedSet<Allocation>>>>(); - + long startTime = System.currentTimeMillis(); - + for (Slot slot : t.getSlots()) { if(!slot2Allocations.containsKey(slot)){//System.out.println(slot + ": " + slot.hashCode());System.out.println(slot2Allocations); Callable<Map<Slot, SortedSet<Allocation>>> worker = new SlotProcessor(slot); @@ -552,7 +553,7 @@ list.add(submit); } } - + for (Future<Map<Slot, SortedSet<Allocation>>> future : list) { try { Map<Slot, SortedSet<Allocation>> result = future.get(); @@ -564,10 +565,10 @@ e.printStackTrace(); } } - + executor.shutdown(); - - + + /*for(Slot slot : t.getSlots()){ allocations = slot2Allocations2.get(slot); if(allocations == null){ @@ -575,7 +576,7 @@ slot2Allocations2.put(slot, allocations); } slot2Allocations.put(slot, allocations); - + //for tests add the property URI with http://dbpedia.org/property/ namespace //TODO should be replaced by usage of a separate SOLR index Set<Allocation> tmp = new HashSet<Allocation>(); @@ -590,11 +591,11 @@ allocations.addAll(tmp); }*/ logger.debug("Time needed: " + (System.currentTimeMillis() - startTime) + "ms"); - + Set<WeightedQuery> queries = new HashSet<WeightedQuery>(); Query cleanQuery = t.getQuery(); queries.add(new WeightedQuery(cleanQuery)); - + Set<WeightedQuery> tmp = new TreeSet<WeightedQuery>(); List<Slot> sortedSlots = new ArrayList<Slot>(); Set<Slot> classSlots = new HashSet<Slot>(); @@ -628,125 +629,125 @@ queries.addAll(tmp); tmp.clear(); } - + for(Slot slot : sortedSlots){ if(!slot2Allocations.get(slot).isEmpty()){ for(Allocation a : slot2Allocations.get(slot)){ for(WeightedQuery query : queries){ - Query q = new Query(query.getQuery()); - - boolean drop = false; - if(useDomainRangeRestriction){ - if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.SYMPROPERTY){ - for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ - String objectVar = triple.getValue().getName(); - String subjectVar = triple.getVariable().getName(); -// System.out.println(triple); - for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(objectVar)){ -// System.out.println(typeTriple); - if(true){//reasoner.isObjectProperty(a.getUri())){ - Description range = reasoner.getRange(new ObjectProperty(a.getUri())); -// System.out.println(a); - if(range != null){ - Set<Description> allRanges = new HashSet<Description>(); - SortedSet<Description> superClasses; - if(range instanceof NamedClass){ - superClasses = reasoner.getSuperClasses(range); - allRanges.addAll(superClasses); - } else { - for(Description nc : range.getChildren()){ - superClasses = reasoner.getSuperClasses(nc); - allRanges.addAll(superClasses); - } - } - allRanges.add(range); - allRanges.remove(new NamedClass(Thing.instance.getURI())); - - Set<Description> allTypes = new HashSet<Description>(); - String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); - Description type = new NamedClass(typeURI); - superClasses = reasoner.getSuperClasses(type); - allTypes.addAll(superClasses); - allTypes.add(type); - - if(!org.mindswap.pellet.utils.SetUtils.intersects(allRanges, allTypes)){ - drop = true; - } - } - } else { - drop = true; - } - - } - for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(subjectVar)){ - Description domain = reasoner.getDomain(new ObjectProperty(a.getUri())); -// System.out.println(a); - if(domain != null){ - Set<Description> allDomains = new HashSet<Description>(); + Query q = new Query(query.getQuery()); + + boolean drop = false; + if(useDomainRangeRestriction){ + if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.SYMPROPERTY){ + for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ + String objectVar = triple.getValue().getName(); + String subjectVar = triple.getVariable().getName(); + // System.out.println(triple); + for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(objectVar)){ + // System.out.println(typeTriple); + if(true){//reasoner.isObjectProperty(a.getUri())){ + Description range = reasoner.getRange(new ObjectProperty(a.getUri())); + // System.out.println(a); + if(range != null){ + Set<Description> allRanges = new HashSet<Description>(); SortedSet<Description> superClasses; - if(domain instanceof NamedClass){ - superClasses = reasoner.getSuperClasses(domain); - allDomains.addAll(superClasses); + if(range instanceof NamedClass){ + superClasses = reasoner.getSuperClasses(range); + allRanges.addAll(superClasses); } else { - for(Description nc : domain.getChildren()){ + for(Description nc : range.getChildren()){ superClasses = reasoner.getSuperClasses(nc); - allDomains.addAll(superClasses); + allRanges.addAll(superClasses); } } - allDomains.add(domain); - allDomains.remove(new NamedClass(Thing.instance.getURI())); - + allRanges.add(range); + allRanges.remove(new NamedClass(Thing.instance.getURI())); + Set<Description> allTypes = new HashSet<Description>(); String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); Description type = new NamedClass(typeURI); superClasses = reasoner.getSuperClasses(type); allTypes.addAll(superClasses); allTypes.add(type); - - if(!org.mindswap.pellet.utils.SetUtils.intersects(allDomains, allTypes)){ - drop = true; - } else { - + + if(!org.mindswap.pellet.utils.SetUtils.intersects(allRanges, allTypes)){ + drop = true; + } + } + } else { + drop = true; + } + + } + for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(subjectVar)){ + Description domain = reasoner.getDomain(new ObjectProperty(a.getUri())); + // System.out.println(a); + if(domain != null){ + Set<Description> allDomains = new HashSet<Description>(); + SortedSet<Description> superClasses; + if(domain instanceof NamedClass){ + superClasses = reasoner.getSuperClasses(domain); + allDomains.addAll(superClasses); + } else { + for(Description nc : domain.getChildren()){ + superClasses = reasoner.getSuperClasses(nc); + allDomains.addAll(superClasses); } } + allDomains.add(domain); + allDomains.remove(new NamedClass(Thing.instance.getURI())); + + Set<Description> allTypes = new HashSet<Description>(); + String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); + Description type = new NamedClass(typeURI); + superClasses = reasoner.getSuperClasses(type); + allTypes.addAll(superClasses); + allTypes.add(type); + + if(!org.mindswap.pellet.utils.SetUtils.intersects(allDomains, allTypes)){ + drop = true; + } else { + + } } } } } - - if(!drop){ - if(slot.getSlotType() == SlotType.RESOURCE){//avoid queries where predicate is data property and object resource->add REGEX filter in this case - for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ - SPARQL_Value object = triple.getValue(); - if(object.isVariable() && object.getName().equals(slot.getAnchor())){//only consider triple where SLOT is in object position - SPARQL_Property predicate = triple.getProperty(); - if(!predicate.isVariable()){//only consider triple where predicate is URI - String predicateURI = predicate.getName().replace("<", "").replace(">", ""); - if(isDatatypeProperty(predicateURI)){//if data property - q.addFilter(new SPARQL_Filter(new SPARQL_Pair( - object, "'" + slot.getWords().get(0) + "'", SPARQL_PairType.REGEX))); - } else { - q.replaceVarWithURI(slot.getAnchor(), a.getUri()); - } - } else { - q.replaceVarWithURI(slot.getAnchor(), a.getUri()); - } + } + + if(!drop){ + if(slot.getSlotType() == SlotType.RESOURCE){//avoid queries where predicate is data property and object resource->add REGEX filter in this case + for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ + SPARQL_Value object = triple.getValue(); + if(object.isVariable() && object.getName().equals(slot.getAnchor())){//only consider triple where SLOT is in object position + SPARQL_Property predicate = triple.getProperty(); + if(!predicate.isVariable()){//only consider triple where predicate is URI + String predicateURI = predicate.getName().replace("<", "").replace(">", ""); + if(isDatatypeProperty(predicateURI)){//if data property + q.addFilter(new SPARQL_Filter(new SPARQL_Pair( + object, "'" + slot.getWords().get(0) + "'", SPARQL_PairType.REGEX))); } else { q.replaceVarWithURI(slot.getAnchor(), a.getUri()); } + } else { + q.replaceVarWithURI(slot.getAnchor(), a.getUri()); } - } else { - q.replaceVarWithURI(slot.getAnchor(), a.getUri()); + } else { + q.replaceVarWithURI(slot.getAnchor(), a.getUri()); + } } - WeightedQuery w = new WeightedQuery(q); - double newScore = query.getScore() + a.getScore(); - w.setScore(newScore); - w.addAllocations(query.getAllocations()); - w.addAllocation(a); - tmp.add(w); + } else { + q.replaceVarWithURI(slot.getAnchor(), a.getUri()); } - - + WeightedQuery w = new WeightedQuery(q); + double newScore = query.getScore() + a.getScore(); + w.setScore(newScore); + w.addAllocations(query.getAllocations()); + w.addAllocation(a); + tmp.add(w); + } + + } } //lower queries with FILTER-REGEX @@ -755,7 +756,7 @@ wQ.setScore(wQ.getScore() - 0.01); } } - + queries.clear(); queries.addAll(tmp);//System.out.println(tmp); tmp.clear(); @@ -776,9 +777,9 @@ } } } - + } - + } else { if(slot.getSlotType() == SlotType.SYMPROPERTY){ for(WeightedQuery wQ : queries){ @@ -800,50 +801,50 @@ List<SPARQL_Triple> typeTriples = wQ.getQuery().getRDFTypeTriples(typeVar); for(SPARQL_Triple typeTriple : typeTriples){ String typeURI = typeTriple.getValue().getName().replace("<", "").replace(">", ""); -// List<Entry<String, Integer>> mostFrequentProperties = UnknownPropertyHelper.getMostFrequentProperties(endpoint, cache, typeURI, resourceURI, direction); -// for(Entry<String, Integer> property : mostFrequentProperties){ -// wQ.getQuery().replaceVarWithURI(slot.getAnchor(), property.getKey()); -// wQ.setScore(wQ.getScore() + 0.1); -// } + // List<Entry<String, Integer>> mostFrequentProperties = UnknownPropertyHelper.getMostFrequentProperties(endpoint, cache, typeURI, resourceURI, direction); + // for(Entry<String, Integer> property : mostFrequentProperties){ + // wQ.getQuery().replaceVarWithURI(slot.getAnchor(), property.getKey()); + // wQ.setScore(wQ.getScore() + 0.1); + // } } - + } } } } -// else if(slot.getSlotType() == SlotType.CLASS){ -// String token = slot.getWords().get(0); -// if(slot.getToken().contains("house")){ -// String regexToken = token.replace("houses", "").replace("house", "").trim(); -// try { -// Map<Slot, SortedSet<Allocation>> ret = new SlotProcessor(new Slot(null, SlotType.CLASS, Collections.singletonList("house"))).call(); -// SortedSet<Allocation> alloc = ret.entrySet().iterator().next().getValue(); -// if(alloc != null && !alloc.isEmpty()){ -// String uri = alloc.first().getUri(); -// for(WeightedQuery query : queries){ -// Query q = query.getQuery(); -// for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ -// SPARQL_Term subject = triple.getVariable(); -// SPARQL_Term object = new SPARQL_Term("desc"); -// object.setIsVariable(true); -// object.setIsURI(false); -// q.addCondition(new SPARQL_Triple(subject, new SPARQL_Property("<http://purl.org/goodrelations/v1#description>"), object)); -// q.addFilter(new SPARQL_Filter(new SPARQL_Pair( -// object, "'" + regexToken + "'", SPARQL_PairType.REGEX))); -// } -// q.replaceVarWithURI(slot.getAnchor(), uri); -// -// } -// } -// } catch (Exception e) { -// e.printStackTrace(); -// } -// } -// } - - + // else if(slot.getSlotType() == SlotType.CLASS){ + // String token = slot.getWords().get(0); + // if(slot.getToken().contains("house")){ + // String regexToken = token.replace("houses", "").replace("house", "").trim(); + // try { + // Map<Slot, SortedSet<Allocation>> ret = new SlotProcessor(new Slot(null, SlotType.CLASS, Collections.singletonList("house"))).call(); + // SortedSet<Allocation> alloc = ret.entrySet().iterator().next().getValue(); + // if(alloc != null && !alloc.isEmpty()){ + // String uri = alloc.first().getUri(); + // for(WeightedQuery query : queries){ + // Query q = query.getQuery(); + // for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ + // SPARQL_Term subject = triple.getVariable(); + // SPARQL_Term object = new SPARQL_Term("desc"); + // object.setIsVariable(true); + // object.setIsURI(false); + // q.addCondition(new SPARQL_Triple(subject, new SPARQL_Property("<http://purl.org/goodrelations/v1#description>"), object)); + // q.addFilter(new SPARQL_Filter(new SPARQL_Pair( + // object, "'" + regexToken + "'", SPARQL_PairType.REGEX))); + // } + // q.replaceVarWithURI(slot.getAnchor(), uri); + // + // } + // } + // } catch (Exception e) { + // e.printStackTrace(); + // } + // } + // } + + } - + } for (Iterator<WeightedQuery> iterator = queries.iterator(); iterator.hasNext();) { WeightedQuery wQ = iterator.next(); @@ -852,9 +853,10 @@ iterator.remove(); } } else { + if(t.getSlots().size()==0) throw new AssertionError("no slots for query "+wQ); wQ.setScore(wQ.getScore()/t.getSlots().size()); } - + } allQueries.addAll(queries); List<Query> qList = new ArrayList<Query>(); @@ -866,7 +868,7 @@ logger.debug("...done in "); return allQueries; } - + private double getProminenceValue(String uri, SlotType type){ Integer popularity = null; if(popularityMap != null){ @@ -890,7 +892,7 @@ query = "SELECT COUNT(*) WHERE {?s ?p <%s>}"; } query = String.format(query, uri); - + ResultSet rs = executeSelect(query); QuerySolution qs; String projectionVar; @@ -903,20 +905,20 @@ if(popularity == null){ popularity = Integer.valueOf(0); } - - -// if(cnt == 0){ -// return 0; -// } -// return Math.log(cnt); + + + // if(cnt == 0){ + // return 0; + // } + // return Math.log(cnt); return popularity; } - + public void setPopularityMap(PopularityMap popularityMap) { this.popularityMap = popularityMap; } - - + + private List<String> pruneList(List<String> words){ List<String> prunedList = new ArrayList<String>(); for(String w1 : words){ @@ -934,13 +936,13 @@ } } logger.info("Pruned list: " + prunedList); -// return getLemmatizedWords(words); + // return getLemmatizedWords(words); return prunedList; } - + private List<String> getLemmatizedWords(List<String> words){ logger.info("Pruning word list " + words + "..."); -// mon.start(); + // mon.start(); List<String> pruned = new ArrayList<String>(); for(String word : words){ //currently only stem single words @@ -952,15 +954,15 @@ pruned.add(lemWord); } } - + } -// mon.stop(); -// logger.info("Done in " + mon.getLastValue() + "ms."); + // mon.stop(); + // logger.info("Done in " + mon.getLastValue() + "ms."); logger.info("Pruned list: " + pruned); return pruned; } - - + + private Index getIndexBySlotType(Slot slot){ Index index = null; SlotType type = slot.getSlotType(); @@ -977,12 +979,12 @@ } return index; } - + private void validateAgainstRemoteEndpoint(Collection<WeightedQuery> queries){ SPARQL_QueryType queryType = queries.iterator().next().getQuery().getQt(); validate(queries, queryType); } - + private void validate(Collection<WeightedQuery> queries, SPARQL_QueryType queryType){ logger.debug("Testing candidate SPARQL queries on remote endpoint..."); sparqlMon.start(); @@ -995,7 +997,7 @@ com.hp.hpl.jena.query.Query q = QueryFactory.create(query.getQuery().toString(), Syntax.syntaxARQ); q.setLimit(1); ResultSet rs = executeSelect(q.toString()); - + results = new ArrayList<String>(); QuerySolution qs; String projectionVar; @@ -1007,7 +1009,7 @@ } else if(qs.get(projectionVar).isURIResource()){ results.add(qs.get(projectionVar).asResource().getURI()); } - + } if(!results.isEmpty()){ try{ @@ -1029,7 +1031,7 @@ } catch (Exception e) { e.printStackTrace(); } - + } } else if(queryType == SPARQL_QueryType.ASK){ for(WeightedQuery query : queries){ @@ -1037,31 +1039,39 @@ logger.debug("Testing query:\n" + query); boolean result = executeAskQuery(query.getQuery().toString()); learnedSPARQLQueries.add(query); -// if(stopIfQueryResultNotEmpty && result){ -// return; -// } + // if(stopIfQueryResultNotEmpty && result){ + // return; + // } if(stopIfQueryResultNotEmpty){ return; } logger.debug("Result: " + result); } } - + sparqlMon.stop(); logger.debug("Done in " + sparqlMon.getLastValue() + "ms."); } - - private boolean executeAskQuery(String query){ - currentlyExecutedQuery = query; - QueryEngineHTTP qe = new QueryEngineHTTP(endpoint.getURL().toString(), query); - for(String uri : endpoint.getDefaultGraphURIs()){ - qe.addDefaultGraph(uri); + + private boolean executeAskQuery(String query) + { + if(query==null) throw new NullPointerException("Parameter query == null"); + currentlyExecutedQuery = query; + + boolean ret; + if (model == null) + { + QueryEngineHTTP qe = new QueryEngineHTTP(endpoint.getURL().toString(), query); + qe.setDefaultGraphURIs(endpoint.getDefaultGraphURIs()); + ret = qe.execAsk(); } - boolean ret = qe.execAsk(); + else {ret = QueryExecutionFactory.create(QueryFactory.create(query, Syntax.syntaxARQ), model).execAsk();} return ret; } - - private ResultSet executeSelect(String query) { + + private ResultSet executeSelect(String query) + { + if(query==null) throw new NullPointerException("Parameter query == null"); currentlyExecutedQuery = query; ResultSet rs; if (model == null) { @@ -1076,14 +1086,14 @@ rs = QueryExecutionFactory.create(QueryFactory.create(query, Syntax.syntaxARQ), model) .execSelect(); } - + return rs; } - + public String getCurrentlyExecutedQuery() { return currentlyExecutedQuery; } - + public int getLearnedPosition() { if(learnedPos >= 0){ return learnedPos+1; @@ -1112,7 +1122,7 @@ return null; } } - + public SortedSet<WeightedQuery> getLearnedSPARQLQueries() { return learnedSPARQLQueries; } @@ -1126,13 +1136,13 @@ @Override public void setLearningProblem(LearningProblem learningProblem) { // TODO Auto-generated method stub - + } - + class SlotProcessor implements Callable<Map<Slot, SortedSet<Allocation>>>{ - + private Slot slot; - + public SlotProcessor(Slot slot) { this.slot = slot; } @@ -1143,13 +1153,13 @@ result.put(slot, computeAllocations(slot)); return result; } - + private SortedSet<Allocation> computeAllocations(Slot slot){ logger.debug("Computing allocations for slot: " + slot); SortedSet<Allocation> allocations = new TreeSet<Allocation>(); - + Index index = getIndexBySlotType(slot); - + IndexResultSet rs; for(String word : slot.getWords()){ rs = new IndexResultSet(); @@ -1178,33 +1188,33 @@ rs.add(index.getResourcesWithScores(word, 20)); } } - - + + for(IndexResultItem item : rs.getItems()){ double similarity = Similarity.getSimilarity(word, item.getLabel()); -// //get the labels of the redirects and compute the highest similarity -// if(slot.getSlotType() == SlotType.RESOURCE){ -// Set<String> labels = getRedirectLabels(item.getUri()); -// for(String label : labels){ -// double tmp = Similarity.getSimilarity(word, label); -// if(tmp > similarity){ -// similarity = tmp; -// } -// } -// } + // //get the labels of the redirects and compute the highest similarity + // if(slot.getSlotType() == SlotType.RESOURCE){ + // Set<String> labels = getRedirectLabels(item.getUri()); + // for(String label : labels){ + // double tmp = Similarity.getSimilarity(word, label); + // if(tmp > similarity){ + // similarity = tmp; + // } + // } + // } double prominence = getProminenceValue(item.getUri(), slot.getSlotType()); allocations.add(new Allocation(item.getUri(), prominence, similarity)); } - + } - + normProminenceValues(allocations); - + computeScore(allocations); logger.debug("Found " + allocations.size() + " allocations for slot " + slot); return new TreeSet<Allocation>(allocations); } - + private Index getIndexBySlotType(Slot slot){ Index index = null; SlotType type = slot.getSlotType(); @@ -1221,13 +1231,15 @@ } return index; } - + } - - public String getTaggedInput(){ + + public String getTaggedInput() + { + if(templateGenerator==null) {throw new AssertionError("Learner not initialized. Please call init();");} return templateGenerator.getTaggedInput(); } - + private boolean isDatatypeProperty(String uri){ Boolean isDatatypeProperty = null; if(mappingIndex != null){ @@ -1239,7 +1251,7 @@ } return isDatatypeProperty; } - + /** * @param args * @throws NoTemplateFoundException @@ -1253,20 +1265,20 @@ Index resourcesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_resources"); Index classesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_classes"); Index propertiesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_properties"); - + SPARQLTemplateBasedLearner2 learner = new SPARQLTemplateBasedLearner2(endpoint, resourcesIndex, classesIndex, propertiesIndex); learner.init(); - + String question = "What is the highest mountain?"; - + learner.setQuestion(question); learner.learnSPARQLQueries(); System.out.println("Learned query:\n" + learner.getBestSPARQLQuery()); System.out.println("Lexical answer type is: " + learner.getTemplates().iterator().next().getLexicalAnswerType()); System.out.println(learner.getLearnedPosition()); - + } - + } Modified: trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java 2012-08-23 08:40:43 UTC (rev 3830) +++ trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java 2012-08-23 12:49:55 UTC (rev 3831) @@ -127,7 +127,7 @@ return irs; } - private ResultSet executeSelect(String query){System.out.println(query); + private ResultSet executeSelect(String query){ ResultSet rs; if(model == null){ if(cache == null){ Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-08-23 08:40:43 UTC (rev 3830) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-08-23 12:49:55 UTC (rev 3831) @@ -4,10 +4,8 @@ import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; -import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; -import java.io.InputStream; import java.io.InputStreamReader; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; @@ -15,7 +13,6 @@ import java.io.Serializable; import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; -import java.net.URL; import java.net.URLDecoder; import java.text.DateFormat; import java.util.Collection; @@ -61,17 +58,13 @@ import org.dllearner.common.index.Index; import org.dllearner.common.index.MappingBasedIndex; import org.dllearner.common.index.SOLRIndex; -import org.dllearner.common.index.SPARQLClassesIndex; -import org.dllearner.common.index.SPARQLIndex; -import org.dllearner.common.index.VirtuosoClassesIndex; -import org.dllearner.common.index.VirtuosoResourcesIndex; import org.dllearner.core.ComponentInitException; import org.dllearner.kb.sparql.ExtractionDBCache; import org.dllearner.kb.sparql.SparqlEndpoint; import org.dllearner.kb.sparql.SparqlQuery; import org.ini4j.Options; -import org.junit.*; -import org.w3c.dom.DOMException; +import org.junit.Before; +import org.junit.Test; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NodeList; @@ -111,16 +104,16 @@ {test("QALD 2 Benchmark ideally tagged", new File(getClass().getClassLoader().getResource("tbsl/evaluation/qald2-dbpedia-train-tagged(ideal).xml").getFile()), SparqlEndpoint.getEndpointDBpedia(),dbpediaLiveCache);} - @Test public void testOxford() throws IOException + @Test public void generateXMLOxford() throws IOException { Model m = loadOxfordModel(); List<String> questions = new LinkedList<String>(); BufferedReader in = new BufferedReader((new InputStreamReader(getClass().getClassLoader().getResourceAsStream("tbsl/oxford_eval_queries.txt")))); int j=0; - for(String line=in.readLine();line!=null;) + for(String line;(line=in.readLine())!=null;) { j++; - if(j>5) break; + if(j>1) break; if(!line.isEmpty()) {questions.add(line.replace("question: ", ""));} } in.close(); @@ -133,7 +126,8 @@ SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_dataproperty_mappings.txt").getPath(), SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_objectproperty_mappings.txt").getPath() ); - generateTestDataMultiThreaded(id2Question, null,m,mappingIndex,OXFORD_PRETAGGED); + QueryTestData testData = generateTestDataMultiThreaded(id2Question, null,m,mappingIndex,OXFORD_PRETAGGED); + testData.writeQaldXml(new File("log/test.xml")); } private Model loadOxfordModel() @@ -250,7 +244,7 @@ generateUpdatedXML(referenceXML,updatedReferenceXML,endpoint,cache); } - QueryTestData referenceTestData = readQueries(updatedReferenceXML); + QueryTestData referenceTestData = QueryTestData.readQaldXml(updatedReferenceXML); logger.info(title+" subset loaded with "+referenceTestData.id2Question.size()+" questions."); long startLearning = System.currentTimeMillis(); @@ -463,7 +457,7 @@ } } - private static class LearnStatus implements Serializable + public static class LearnStatus implements Serializable { public enum Type {OK, TIMEOUT, NO_TEMPLATE_FOUND,QUERY_RESULT_EMPTY,NO_QUERY_LEARNED,EXCEPTION} @@ -693,14 +687,13 @@ // private SPARQLTemplateBasedLearner2 oxfordLearner; // private SPARQLTemplateBasedLearner2 dbpediaLiveLearner; - private final ExtractionDBCache oxfordCache = new ExtractionDBCache("cache"); +// private final ExtractionDBCache oxfordCache = new ExtractionDBCache("cache"); private final static ExtractionDBCache dbpediaLiveCache = new ExtractionDBCache("cache"); private final Knowledgebase dbpediaLiveKnowledgebase = createDBpediaLiveKnowledgebase(dbpediaLiveCache); static final SparqlEndpoint dbpediaLiveEndpoint = SparqlEndpoint.getEndpointDBpediaLiveAKSW(); //static SparqlEndpoint oxfordEndpoint; - private static final int MAXIMUM_QUESTIONS = Integer.MAX_VALUE; // private ResultSet executeDBpediaLiveSelect(String query){return SparqlQuery.convertJSONtoResultSet(dbpediaLiveCache.executeSelectQuery(dbpediaLiveEndpoint, query));} @@ -732,7 +725,7 @@ Logger.getRootLogger().setLevel(Level.WARN); Logger.getLogger(Templator.class).setLevel(Level.WARN); Logger.getLogger(Parser.class).setLevel(Level.WARN); - Logger.getLogger(SPARQLTemplateBasedLearner2.class).setLevel(Level.WARN); + Logger.getLogger(SPARQLTemplateBasedLearner2.class).setLevel(Level.INFO); // Logger.getLogger(SPARQLTemplateBasedLearner2.class).setLevel(Level.INFO); logger.setLevel(Level.INFO); // TODO: remove when finishing implementation of this class logger.addAppender(new FileAppender(new SimpleLayout(), "log/"+this.getClass().getSimpleName()+".log", false)); @@ -741,131 +734,7 @@ // oxfordLearner = new SPARQLTemplateBasedLearner2(createOxfordKnowledgebase(oxfordCache)); } - private static class QueryTestData implements Serializable - { - public SortedMap<Integer, String> id2Question = new ConcurrentSkipListMap<Integer, String>(); - public SortedMap<Integer, String> id2Query = new ConcurrentSkipListMap<Integer, String>(); - public SortedMap<Integer, Set<String>> id2Answers = new ConcurrentSkipListMap<Integer, Set<String>>(); - public SortedMap<Integer, LearnStatus> id2LearnStatus = new ConcurrentSkipListMap<Integer, LearnStatus>(); - - private static final String persistancePath = "cache/"+SPARQLTemplateBasedLearner3Test.class.getSimpleName()+'/'+QueryTestData.class.getSimpleName(); - - public synchronized void write() - { - try - { - ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(new File(persistancePath))); - oos.writeObject(this); - oos.close(); - } catch(IOException e) {throw new RuntimeException(e);} - } - - public static QueryTestData read() throws FileNotFoundException, IOException - { - try - { - ObjectInputStream ois = new ObjectInputStream(new FileInputStream(new File(persistancePath))); - QueryTestData testData = (QueryTestData) ois.readObject(); - ois.close(); - return testData; - } - catch (ClassNotFoundException e){throw new RuntimeException(e);} - } - - public QueryTestData generateAnswers(SparqlEndpoint endpoint, ExtractionDBCache cache) - { - if(!id2Answers.isEmpty()) {throw new AssertionError("Answers already existing.");} - for(int i:id2Query.keySet()) - { - Set<String> uris = getUris(endpoint, id2Query.get(i),cache); - id2Answers.put(i, uris); // empty answer set better transfers intended meaning and doesn't cause NPEs in html generation :-) - if(!uris.isEmpty()) {/*id2Answers.put(i, uris);*/} - else {id2LearnStatus.put(i, LearnStatus.QUERY_RESULT_EMPTY);} - } - return this; - } - - - } - - private QueryTestData readQueries(final File file) - { - QueryTestData testData = new QueryTestData(); - try { - DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); - DocumentBuilder db = dbf.newDocumentBuilder(); - Document doc = db.parse(file); - doc.getDocumentElement().normalize(); - NodeList questionNodes = doc.getElementsByTagName("question"); - int id; - - for(int i = 0; i < questionNodes.getLength(); i++) - { - if(i>=MAXIMUM_QUESTIONS) break; // TODO: remove - String question; - String query; - Set<String> answers = new HashSet<String>(); - Element questionNode = (Element) questionNodes.item(i); - //read question ID - id = Integer.valueOf(questionNode.getAttribute("id")); - //Read question - question = ((Element)questionNode.getElementsByTagName("string").item(0)).getChildNodes().item(0).getNodeValue().trim(); - //Read SPARQL query - query = ((Element)questionNode.getElementsByTagName("query").item(0)).getChildNodes().item(0).getNodeValue().trim(); - // //Read answers - // answers = new HashSet<String>(); - // NodeList aswersNodes = questionNode.getElementsByTagName("answer"); - // for(int j = 0; j < aswersNodes.getLength(); j++){ - // Element answerNode = (Element) aswersNodes.item(j); - // answers.add(((Element)answerNode.getElementsByTagName("uri").item(0)).getChildNodes().item(0).getNodeValue().trim()); - // } - - if(!query.equals("OUT OF SCOPE")) // marker in qald benchmark file, will create holes interval of ids (e.g. 1,2,5,7) - { - testData.id2Question.put(id, question); - testData.id2Query.put(id, query); - Element answersElement = (Element) questionNode.getElementsByTagName("answers").item(0); - if(answersElement!=null) - { - NodeList answerElements = answersElement.getElementsByTagName("answer"); - for(int j=0; j<answerElements.getLength();j++) - { - String answer = ((Element)answerElements.item(j)).getTextContent(); - answers.add(answer); - } - testData.id2Answers.put(id, answers); - } - } - // question2Answers.put(question, answers); - - } - } catch (DOMException e) { - e.printStackTrace(); - } catch (ParserConfigurationException e) { - e.printStackTrace(); - } catch (SAXException e) { - e.printStackTrace(); - } catch (IOException e) { - e.printStackTrace(); - } - // StringBuilder sb = new StringBuilder(); - // for(Entry<Integer, String> e : id2Question.entrySet()){ - // sb.append(e.getKey()+ ": " + extractSentence(e.getValue()) + "\n"); - // } - // try { - // BufferedWriter out = new BufferedWriter(new FileWriter("questions.txt")); - // out.write(sb.toString()); - // out.close(); - // } - // catch (IOException e) - // { - // System.out.println("Exception "); - // - // } - return testData; - } - - protected static Set<String> getUris(final SparqlEndpoint endpoint, final String query, ExtractionDBCache cache) + public static Set<String> getUris(final SparqlEndpoint endpoint, final String query, ExtractionDBCache cache) { if(query==null) {throw new AssertionError("query is null");} if(endpoint==null) {throw new AssertionError("endpoint is null");} @@ -969,8 +838,7 @@ {static public final PartOfSpeechTagger posTagger = new SynchronizedStanfordPartOfSpeechTagger();} static private final WordNet wordnet = new WordNet(); - static private final Options options = new Options(); - private final boolean pretagged; + static private final Options options = new Options(); private final SPARQLTemplateBasedLearner2 learner; public LearnQueryCallable(String question, int id, QueryTestData testData, Knowledgebase knowledgeBase,boolean pretagged) @@ -978,8 +846,8 @@ this.question=question; this.id=id; this.testData=testData; - this.pretagged=pretagged; learner = new SPARQLTemplateBasedLearner2(knowledgeBase,pretagged?null:POSTaggerHolder.posTagger,wordnet,options); + try {learner.init();} catch (ComponentInitException e) {throw new RuntimeException(e);} } public LearnQueryCallable(String question, int id, QueryTestData testData, Model model,MappingBasedIndex index,boolean pretagged) @@ -987,20 +855,15 @@ this.question=question; this.id=id; this.testData=testData; - this.pretagged=pretagged; MappingBasedIndex mappingIndex= new MappingBasedIndex( SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_class_mappings.txt").getPath(), SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_resource_mappings.txt").getPath(), SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_dataproperty_mappings.txt").getPath(), SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_objectproperty_mappings.txt").getPath() ); - + learner = new SPARQLTemplateBasedLearner2(model,mappingIndex,pretagged?null:POSTaggerHolder.posTagger); - try { - learner.init(); - } catch (ComponentInitException e) { - e.printStackTrace(); - } + try {learner.init();} catch (ComponentInitException e) {throw new RuntimeException(e);} } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-08-23 05:27:59
|
Revision: 3829 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3829&view=rev Author: lorenz_b Date: 2012-08-23 05:27:53 +0000 (Thu, 23 Aug 2012) Log Message: ----------- Added method to configure cache time. Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/kb/sparql/ExtractionDBCache.java Modified: trunk/components-core/src/main/java/org/dllearner/kb/sparql/ExtractionDBCache.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/kb/sparql/ExtractionDBCache.java 2012-08-22 13:41:04 UTC (rev 3828) +++ trunk/components-core/src/main/java/org/dllearner/kb/sparql/ExtractionDBCache.java 2012-08-23 05:27:53 UTC (rev 3829) @@ -110,6 +110,10 @@ } } + public void setFreshnessInMilliseconds(long freshnessInMilliseconds) { + this.freshnessInMilliseconds = freshnessInMilliseconds; + } + public Model executeConstructQuery(SparqlEndpoint endpoint, String query) throws SQLException, UnsupportedEncodingException { return executeConstructQuery(endpoint, query, maxExecutionTimeInSeconds); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-08-22 13:41:14
|
Revision: 3828 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3828&view=rev Author: lorenz_b Date: 2012-08-22 13:41:04 +0000 (Wed, 22 Aug 2012) Log Message: ----------- Added missing init method in UNIT test. Modified Paths: -------------- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-08-22 13:40:11 UTC (rev 3827) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-08-22 13:41:04 UTC (rev 3828) @@ -178,10 +178,12 @@ { // see http://jena.apache.org/documentation/javadoc/jena/com/hp/hpl/jena/rdf/model/Model.html#read%28java.io.InputStream,%20java.lang.String,%20java.lang.String%29 String ending = s.substring(s.lastIndexOf('.')+1, s.length()); - String type = (ending.equals("ttl")||ending.equals("nt"))?"N3":ending.equals("owl")?"RDF/XML":String.valueOf(Integer.valueOf("filetype "+ending+" not handled.")); + String type = (ending.equals("ttl")||ending.equals("nt"))?"TURTLE":ending.equals("owl")?"RDF/XML":String.valueOf(Integer.valueOf("filetype "+ending+" not handled.")); // switch(type) {case "ttl":type="TURTLE";break;case "owl":type="RDF/XML";break;default:throw new RuntimeException("filetype "+ending+" not handled.");} // no Java 1.7 :-( - try{m.read(getClass().getClassLoader().getResourceAsStream("oxford/"+s),null, type);} - catch(RuntimeException e) {throw new RuntimeException("Could not read into model: "+s,e);} + try{ +// m.read(new FileInputStream(new File("/home/lorenz/arbeit/papers/question-answering-iswc-2012/data/"+s)), null, type);}catch (FileNotFoundException e) {} + m.read(getClass().getClassLoader().getResourceAsStream("oxford/"+s),null, type);} + catch(RuntimeException e) {throw new RuntimeException("Could not read into model: "+s,e);} } // test("Oxford evaluation questions", new File(getClass().getClassLoader().getResource("tbsl/evaluation/qald2-dbpedia-train-tagged(ideal).xml").getFile()), // SparqlEndpoint.getEndpointDBpediaLiveAKSW(),dbpediaLiveCache); @@ -994,6 +996,11 @@ ); learner = new SPARQLTemplateBasedLearner2(model,mappingIndex,pretagged?null:POSTaggerHolder.posTagger); + try { + learner.init(); + } catch (ComponentInitException e) { + e.printStackTrace(); + } } @@ -1024,7 +1031,7 @@ } catch(Exception e) { - logger.error(String.format("Exception for question \"%s\": %s",question,e.getLocalizedMessage())); + logger.error(String.format("Exception for question %d \"%s\": %s",id, question,e.getLocalizedMessage())); e.printStackTrace(); return LearnStatus.exceptionStatus(e); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-08-22 13:40:22
|
Revision: 3827 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3827&view=rev Author: lorenz_b Date: 2012-08-22 13:40:11 +0000 (Wed, 22 Aug 2012) Log Message: ----------- Added missing init method in UNIT test. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-08-22 13:15:47 UTC (rev 3826) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-08-22 13:40:11 UTC (rev 3827) @@ -47,7 +47,6 @@ import org.dllearner.algorithm.tbsl.util.PopularityMap; import org.dllearner.algorithm.tbsl.util.PopularityMap.EntityType; import org.dllearner.algorithm.tbsl.util.Similarity; -import org.dllearner.algorithm.tbsl.util.UnknownPropertyHelper; import org.dllearner.algorithm.tbsl.util.UnknownPropertyHelper.SymPropertyDirection; import org.dllearner.common.index.Index; import org.dllearner.common.index.IndexResultItem; @@ -68,6 +67,7 @@ import org.dllearner.core.owl.NamedClass; import org.dllearner.core.owl.ObjectProperty; import org.dllearner.core.owl.Thing; +import org.dllearner.kb.LocalModelBasedSparqlEndpointKS; import org.dllearner.kb.SparqlEndpointKS; import org.dllearner.kb.sparql.ExtractionDBCache; import org.dllearner.kb.sparql.SparqlEndpoint; @@ -76,14 +76,19 @@ import org.ini4j.InvalidFileFormatException; import org.ini4j.Options; +import com.hp.hpl.jena.ontology.OntModelSpec; import com.hp.hpl.jena.query.QueryExecutionFactory; import com.hp.hpl.jena.query.QueryFactory; import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.query.Syntax; import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.ModelFactory; import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP; -import com.hp.hpl.jena.vocabulary.RDFS; +import com.hp.hpl.jena.sparql.expr.ExprAggregator; +import com.hp.hpl.jena.sparql.expr.ExprVar; +import com.hp.hpl.jena.sparql.expr.aggregate.AggCount; +import com.hp.hpl.jena.sparql.expr.aggregate.Aggregator; import com.jamonapi.Monitor; import com.jamonapi.MonitorFactory; @@ -272,6 +277,7 @@ datatypePropertiesIndex = propertiesIndex; objectPropertiesIndex = propertiesIndex; } + reasoner = new SPARQLReasoner(new LocalModelBasedSparqlEndpointKS(ModelFactory.createOntologyModel(OntModelSpec.RDFS_MEM, model)), cache); } public void setGrammarFiles(String[] grammarFiles){ @@ -529,7 +535,8 @@ Set<Allocation> allocations; for(Template t : templates){ - logger.debug("Processing template:\n" + t.toString()); + logger.info("Processing template:\n" + t.toString()); + System.err.println(QueryFactory.create(t.getQuery().toString(), Syntax.syntaxSPARQL_11)); allocations = new TreeSet<Allocation>(); boolean containsRegex = t.getQuery().toString().toLowerCase().contains("(regex("); @@ -859,19 +866,6 @@ logger.debug("...done in "); return allQueries; } - - private Set<String> getRedirectLabels(String uri){ - Set<String> labels = new HashSet<String>(); - String query = String.format("SELECT ?label WHERE {?s <http://dbpedia.org/ontology/wikiPageRedirects> <%s>. ?s <%s> ?label.}", uri, RDFS.label.getURI()); - ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); - QuerySolution qs; - while(rs.hasNext()){ - qs = rs.next(); - labels.add(qs.getLiteral("label").getLexicalForm()); - - } - return labels; - } private double getProminenceValue(String uri, SlotType type){ Integer popularity = null; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ki...@us...> - 2012-08-22 13:15:57
|
Revision: 3826 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3826&view=rev Author: kirdie Date: 2012-08-22 13:15:47 +0000 (Wed, 22 Aug 2012) Log Message: ----------- added oxford test but there are still errors thrown by the learner (lorenz please look at this). Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-08-13 06:15:24 UTC (rev 3825) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-08-22 13:15:47 UTC (rev 3826) @@ -22,7 +22,6 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; - import org.apache.log4j.Logger; import org.dllearner.algorithm.tbsl.nlp.Lemmatizer; import org.dllearner.algorithm.tbsl.nlp.LingPipeLemmatizer; @@ -56,6 +55,7 @@ import org.dllearner.common.index.MappingBasedIndex; import org.dllearner.common.index.SOLRIndex; import org.dllearner.common.index.SPARQLDatatypePropertiesIndex; +import org.dllearner.common.index.SPARQLIndex; import org.dllearner.common.index.SPARQLObjectPropertiesIndex; import org.dllearner.common.index.SPARQLPropertiesIndex; import org.dllearner.common.index.VirtuosoDatatypePropertiesIndex; @@ -243,6 +243,12 @@ this(model, resourcesIndex, classesIndex, propertiesIndex, posTagger, wordNet, options, new ExtractionDBCache("cache")); } + public SPARQLTemplateBasedLearner2(Model model, MappingBasedIndex mappingBasedIndex, PartOfSpeechTagger posTagger) + { + this(model, new SPARQLIndex(model),new SPARQLIndex(model),new SPARQLIndex(model),posTagger); + setMappingIndex(mappingBasedIndex); + } + public SPARQLTemplateBasedLearner2(Model model, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet, Options options, ExtractionDBCache cache){ this.model = model; this.resourcesIndex = resourcesIndex; Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-08-13 06:15:24 UTC (rev 3825) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-08-22 13:15:47 UTC (rev 3826) @@ -1,17 +1,21 @@ package org.dllearner.algorithm.tbsl.learning; import static org.junit.Assert.fail; +import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; import java.io.PrintWriter; import java.io.Serializable; import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; +import java.net.URL; import java.net.URLDecoder; import java.text.DateFormat; import java.util.Collection; @@ -26,6 +30,7 @@ import java.util.Set; import java.util.SortedMap; import java.util.Stack; +import java.util.TreeMap; import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentSkipListMap; import java.util.concurrent.ExecutionException; @@ -56,6 +61,10 @@ import org.dllearner.common.index.Index; import org.dllearner.common.index.MappingBasedIndex; import org.dllearner.common.index.SOLRIndex; +import org.dllearner.common.index.SPARQLClassesIndex; +import org.dllearner.common.index.SPARQLIndex; +import org.dllearner.common.index.VirtuosoClassesIndex; +import org.dllearner.common.index.VirtuosoResourcesIndex; import org.dllearner.core.ComponentInitException; import org.dllearner.kb.sparql.ExtractionDBCache; import org.dllearner.kb.sparql.SparqlEndpoint; @@ -70,6 +79,8 @@ import cern.colt.Arrays; import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.ModelFactory; import com.hp.hpl.jena.rdf.model.RDFNode; import com.hp.hpl.jena.sparql.engine.http.QueryExceptionHTTP; @@ -91,30 +102,106 @@ // problem mit "In/IN which/WDT films/NNS did/VBD Julia/NNP Roberts/NNP as/RB well/RB as/IN Richard/NNP Gere/NNP play/NN" public class SPARQLTemplateBasedLearner3Test -{ - private static final boolean PRETAGGED = true; - +{ private static final File evaluationFolder = new File("cache/evaluation"); + private static final boolean DBPEDIA_PRETAGGED = true; + private static final boolean OXFORD_PRETAGGED = false; /*@Test*/ public void testDBpedia() throws Exception {test("QALD 2 Benchmark ideally tagged", new File(getClass().getClassLoader().getResource("tbsl/evaluation/qald2-dbpedia-train-tagged(ideal).xml").getFile()), SparqlEndpoint.getEndpointDBpedia(),dbpediaLiveCache);} - //@Test public void testOxford() {test(new File(""),"");} - @Test public void justTestTheLastWorkingOnesDBpedia() throws Exception + @Test public void testOxford() throws IOException + { + Model m = loadOxfordModel(); + List<String> questions = new LinkedList<String>(); + BufferedReader in = new BufferedReader((new InputStreamReader(getClass().getClassLoader().getResourceAsStream("tbsl/oxford_eval_queries.txt")))); + int j=0; + for(String line=in.readLine();line!=null;) + { + j++; + if(j>5) break; + if(!line.isEmpty()) {questions.add(line.replace("question: ", ""));} + } + in.close(); + SortedMap<Integer,String> id2Question = new TreeMap<Integer, String>(); + Iterator<String> it = questions.iterator(); + for(int i=0;i<questions.size();i++) {id2Question.put(i, it.next());} + MappingBasedIndex mappingIndex= new MappingBasedIndex( + SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_class_mappings.txt").getPath(), + SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_resource_mappings.txt").getPath(), + SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_dataproperty_mappings.txt").getPath(), + SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_objectproperty_mappings.txt").getPath() + ); + generateTestDataMultiThreaded(id2Question, null,m,mappingIndex,OXFORD_PRETAGGED); + } + + private Model loadOxfordModel() + { + // load it into a model because we can and it's faster and doesn't rely on endpoint availability + // the files are located in the paper svn under question-answering-iswc-2012/data + // ls *ttl | xargs -I @ echo \"@\", + final String[] rdf = { + "abbeys-sales-triple.ttl", + "andrewsonline-sales-triple.ttl", + "anker-sales-triple.ttl", + "bairstoweves-sales-triple.ttl", + "ballards-sales-triple.ttl", + "breckon-sales-triple.ttl", + "buckellandballard-sales-triple.ttl", + "carterjonas-sales.ttl", + "churchgribben-salse-triple.ttl", + "findaproperty-sales-triple.ttl", + "johnwood-sales-triple.ttl", + "martinco-letting-triples.ttl", + "scottfraser-letting-triples.ttl", + "scottfraser-sales-triples.ttl", + "scottsymonds-sales-triple.ttl", + "scrivenerandreinger-sales-triple.ttl", + "sequencehome-sales-triple.ttl", + "teampro-sales.ttl", + "thomasmerrifield-sales-triples.ttl", + "wwagency-letting-triple_with-XSD.ttl", + "wwagency-sales-triple_with-XSD.ttl", + // ls links/*ttl | xargs -I @ echo \"@\", + "links/allNear.ttl", + "links/all_walking_distance.ttl", + "links/lgd_data.ttl", + // ls schema/* | xargs -I @ echo \"@\", + "schema/goodRelations.owl", + "schema/LGD-Dump-110406-Ontology.nt", + "schema/ontology.ttl", + "schema/vCard.owl" + }; + Model m = ModelFactory.createDefaultModel(); + for(final String s:rdf) + { + // see http://jena.apache.org/documentation/javadoc/jena/com/hp/hpl/jena/rdf/model/Model.html#read%28java.io.InputStream,%20java.lang.String,%20java.lang.String%29 + String ending = s.substring(s.lastIndexOf('.')+1, s.length()); + String type = (ending.equals("ttl")||ending.equals("nt"))?"N3":ending.equals("owl")?"RDF/XML":String.valueOf(Integer.valueOf("filetype "+ending+" not handled.")); + // switch(type) {case "ttl":type="TURTLE";break;case "owl":type="RDF/XML";break;default:throw new RuntimeException("filetype "+ending+" not handled.");} // no Java 1.7 :-( + try{m.read(getClass().getClassLoader().getResourceAsStream("oxford/"+s),null, type);} + catch(RuntimeException e) {throw new RuntimeException("Could not read into model: "+s,e);} + } + // test("Oxford evaluation questions", new File(getClass().getClassLoader().getResource("tbsl/evaluation/qald2-dbpedia-train-tagged(ideal).xml").getFile()), + // SparqlEndpoint.getEndpointDBpediaLiveAKSW(),dbpediaLiveCache); + return m; + } + + /*@Test*/ public void justTestTheLastWorkingOnesDBpedia() throws Exception { SortedMap<Long,Evaluation> evaluations; - + if((evaluations=Evaluation.read()).isEmpty()) { testDBpedia(); evaluations=Evaluation.read(); } - + Evaluation latestEvaluation = evaluations.get(evaluations.lastKey()); for(String question: latestEvaluation.correctlyAnsweredQuestions) { - LearnStatus status = new LearnQueryCallable(question, 0,new QueryTestData() , dbpediaLiveKnowledgebase).call(); + LearnStatus status = new LearnQueryCallable(question, 0,new QueryTestData() , dbpediaLiveKnowledgebase,DBPEDIA_PRETAGGED).call(); if(status.type!=LearnStatus.Type.OK) {fail("Failed with question \""+question+"\", query status: "+status);} } } @@ -165,7 +252,7 @@ logger.info(title+" subset loaded with "+referenceTestData.id2Question.size()+" questions."); long startLearning = System.currentTimeMillis(); - QueryTestData learnedTestData = generateTestDataMultiThreaded(referenceTestData.id2Question, dbpediaLiveKnowledgebase); + QueryTestData learnedTestData = generateTestDataMultiThreaded(referenceTestData.id2Question, dbpediaLiveKnowledgebase,null,null,DBPEDIA_PRETAGGED); long endLearning = System.currentTimeMillis(); logger.info("finished learning after "+(endLearning-startLearning)/1000.0+"s"); learnedTestData.generateAnswers(endpoint,cache); @@ -414,10 +501,16 @@ } // enum LearnStatus {OK, TIMEOUT,EXCEPTION,NO_TEMPLATE_FOUND,QUERY_RESULT_EMPTY, NO_QUERY_LEARNED;} - /** - * @return the test data containing those of the given questions for which queries were found and the results of the queries + /** + * @param id2Question + * @param kb either the kb or both the model and the index can be null. if the kb is null the model and index are used, else the kb is used. + * @param model can be null if the kb is not null + * @param index can be null if the kb is not null + * @return the test data containing those of the given questions for which queries were found and the results of the queries + * @throws MalformedURLException + * @throws ComponentInitException */ - private QueryTestData generateTestDataMultiThreaded(SortedMap<Integer, String> id2Question,Knowledgebase kb) throws MalformedURLException, ComponentInitException + private QueryTestData generateTestDataMultiThreaded(SortedMap<Integer, String> id2Question,Knowledgebase kb,Model model, MappingBasedIndex index,boolean pretagged) { QueryTestData testData = new QueryTestData(); // -- only create the learner parameters once to save time -- @@ -435,7 +528,8 @@ for(int i: id2Question.keySet()) {//if(i != 78)continue; - futures.put(i,service.submit(new LearnQueryCallable(id2Question.get(i),i, testData,kb))); + if(kb!=null) {futures.put(i,service.submit(new LearnQueryCallable(id2Question.get(i),i, testData,kb,pretagged)));} + else {futures.put(i,service.submit(new LearnQueryCallable(id2Question.get(i),i, testData,model,index,pretagged)));} } for(int i: id2Question.keySet()) {//if(i != 78)continue; @@ -688,6 +782,8 @@ } return this; } + + } private QueryTestData readQueries(final File file) @@ -840,58 +936,74 @@ // System.out.println(diff+" millis as a whole, "+diff/1000.0+" millis per run"); // } - // private Knowledgebase createOxfordKnowledgebase(ExtractionDBCache cache) - // { - // URL url; - // try{url = new URL("http://lgd.aksw.org:8900/sparql");} catch(Exception e) {throw new RuntimeException(e);} - // SparqlEndpoint endpoint = new SparqlEndpoint(url, Collections.singletonList("http://diadem.cs.ox.ac.uk"), Collections.<String>emptyList()); + // private Knowledgebase createOxfordKnowledgebase(ExtractionDBCache cache) + // { + // URL url; + // try{url = new URL("http://lgd.aksw.org:8900/sparql");} catch(Exception e) {throw new RuntimeException(e);} + // SparqlEndpoint endpoint = new SparqlEndpoint(url, Collections.singletonList("http://diadem.cs.ox.ac.uk"), Collections.<String>emptyList()); + // + // SPARQLIndex resourcesIndex = new VirtuosoResourcesIndex(endpoint, cache); + // SPARQLIndex classesIndex = new VirtuosoClassesIndex(endpoint, cache); + // SPARQLIndex propertiesIndex = new VirtuosoPropertiesIndex(endpoint, cache); + // MappingBasedIndex mappingIndex= new MappingBasedIndex( + // SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_class_mappings.txt").getPath(), + // SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_resource_mappings.txt").getPath(), + // SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_dataproperty_mappings.txt").getPath(), + // SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_objectproperty_mappings.txt").getPath() + // ); + // + // Knowledgebase kb = new Knowledgebase(oxfordEndpoint, "Oxford - Real estate", "TODO", resourcesIndex, propertiesIndex, classesIndex, mappingIndex); // - // SPARQLIndex resourcesIndex = new VirtuosoResourcesIndex(endpoint, cache); - // SPARQLIndex classesIndex = new VirtuosoClassesIndex(endpoint, cache); - // SPARQLIndex propertiesIndex = new VirtuosoPropertiesIndex(endpoint, cache); - // MappingBasedIndex mappingIndex= new MappingBasedIndex( - // SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_class_mappings.txt").getPath(), - // SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_resource_mappings.txt").getPath(), - // SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_dataproperty_mappings.txt").getPath(), - // SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_objectproperty_mappings.txt").getPath() - // ); - // - // Knowledgebase kb = new Knowledgebase(oxfordEndpoint, "Oxford - Real estate", "TODO", resourcesIndex, propertiesIndex, classesIndex, mappingIndex); - // return kb; - // } + // return kb; + // } private static class LearnQueryCallable implements Callable<LearnStatus> { private final String question; // private final String endpoint; private final int id; private final QueryTestData testData; - private final Knowledgebase knowledgeBase; - static private final PartOfSpeechTagger posTagger = PRETAGGED? null: new SynchronizedStanfordPartOfSpeechTagger(); + static private class POSTaggerHolder + {static public final PartOfSpeechTagger posTagger = new SynchronizedStanfordPartOfSpeechTagger();} + static private final WordNet wordnet = new WordNet(); static private final Options options = new Options(); + private final boolean pretagged; + private final SPARQLTemplateBasedLearner2 learner; + public LearnQueryCallable(String question, int id, QueryTestData testData, Knowledgebase knowledgeBase,boolean pretagged) + { + this.question=question; + this.id=id; + this.testData=testData; + this.pretagged=pretagged; + learner = new SPARQLTemplateBasedLearner2(knowledgeBase,pretagged?null:POSTaggerHolder.posTagger,wordnet,options); + } - public LearnQueryCallable(String question, int id, QueryTestData testData, Knowledgebase knowledgeBase) + public LearnQueryCallable(String question, int id, QueryTestData testData, Model model,MappingBasedIndex index,boolean pretagged) { this.question=question; this.id=id; - this.knowledgeBase=knowledgeBase; this.testData=testData; + this.pretagged=pretagged; + MappingBasedIndex mappingIndex= new MappingBasedIndex( + SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_class_mappings.txt").getPath(), + SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_resource_mappings.txt").getPath(), + SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_dataproperty_mappings.txt").getPath(), + SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_objectproperty_mappings.txt").getPath() + ); + + learner = new SPARQLTemplateBasedLearner2(model,mappingIndex,pretagged?null:POSTaggerHolder.posTagger); } + @Override public LearnStatus call() { logger.trace("learning question: "+question); try { // learn query - // TODO: change to knowledgebase parameter - SPARQLTemplateBasedLearner3 learner = new SPARQLTemplateBasedLearner3(createDBpediaLiveKnowledgebase(dbpediaLiveCache),posTagger,wordnet,options); - // SPARQLTemplateBasedLearner2 dbpediaLiveLearner = new SPARQLTemplateBasedLearner2(createDBpediaLiveKnowledgebase(dbpediaLiveCache)); - learner.init(); - learner.setUseIdealTagger(true); learner.setQuestion(question); learner.learnSPARQLQueries(); String learnedQuery = learner.getBestSPARQLQuery(); @@ -1170,4 +1282,4 @@ // } private static ResultSet executeSelect(SparqlEndpoint endpoint, String query, ExtractionDBCache cache){return SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query));} -} \ No newline at end of file +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-08-13 06:15:32
|
Revision: 3825 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3825&view=rev Author: lorenz_b Date: 2012-08-13 06:15:24 +0000 (Mon, 13 Aug 2012) Log Message: ----------- Testing new learning. Modified Paths: -------------- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-08-13 06:12:58 UTC (rev 3824) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-08-13 06:15:24 UTC (rev 3825) @@ -10,7 +10,9 @@ import java.io.ObjectOutputStream; import java.io.PrintWriter; import java.io.Serializable; +import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; +import java.net.URLDecoder; import java.text.DateFormat; import java.util.Collection; import java.util.Collections; @@ -50,6 +52,7 @@ import org.dllearner.algorithm.tbsl.nlp.WordNet; import org.dllearner.algorithm.tbsl.templator.Templator; import org.dllearner.algorithm.tbsl.util.Knowledgebase; +import org.dllearner.common.index.HierarchicalIndex; import org.dllearner.common.index.Index; import org.dllearner.common.index.MappingBasedIndex; import org.dllearner.common.index.SOLRIndex; @@ -95,7 +98,7 @@ /*@Test*/ public void testDBpedia() throws Exception {test("QALD 2 Benchmark ideally tagged", new File(getClass().getClassLoader().getResource("tbsl/evaluation/qald2-dbpedia-train-tagged(ideal).xml").getFile()), - SparqlEndpoint.getEndpointDBpediaLiveAKSW(),dbpediaLiveCache);} + SparqlEndpoint.getEndpointDBpedia(),dbpediaLiveCache);} //@Test public void testOxford() {test(new File(""),"");} @Test public void justTestTheLastWorkingOnesDBpedia() throws Exception @@ -428,18 +431,18 @@ Map<Integer,Future<LearnStatus>> futures = new HashMap<Integer,Future<LearnStatus>>(); // List<FutureTask> todo = new ArrayList<FutureTask>(id2Question.size()); - ExecutorService service = Executors.newFixedThreadPool(10); + ExecutorService service = Executors.newFixedThreadPool(1); for(int i: id2Question.keySet()) - { + {//if(i != 78)continue; futures.put(i,service.submit(new LearnQueryCallable(id2Question.get(i),i, testData,kb))); } for(int i: id2Question.keySet()) - { + {//if(i != 78)continue; String question = id2Question.get(i); try { - testData.id2LearnStatus.put(i,futures.get(i).get(30, TimeUnit.SECONDS)); + testData.id2LearnStatus.put(i,futures.get(i).get(30, TimeUnit.MINUTES)); } catch (InterruptedException e) { @@ -613,7 +616,9 @@ // resourcesIndex.setSortField("pagerank"); Index classesIndex = new SOLRIndex("http://dbpedia.aksw.org:8080/solr/dbpedia_classes"); Index propertiesIndex = new SOLRIndex("http://dbpedia.aksw.org:8080/solr/dbpedia_properties"); - + SOLRIndex boa_propertiesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/boa_fact_detail"); + boa_propertiesIndex.setSortField("boa-score"); + propertiesIndex = new HierarchicalIndex(boa_propertiesIndex, propertiesIndex); MappingBasedIndex mappingIndex= new MappingBasedIndex( SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("test/dbpedia_class_mappings.txt").getPath(), SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("test/dbpedia_resource_mappings.txt").getPath(), @@ -786,7 +791,7 @@ if(node!=null&&node.isResource()) { String uri=node.asResource().getURI(); - uris.add(uri); + uris.add(urlDecode(uri)); } else // there is no variable "uri" { @@ -799,7 +804,7 @@ { variable = "?"+varName; String uri=node2.asResource().getURI(); - uris.add(uri); + uris.add(urlDecode(uri)); continue resultsetloop; } } @@ -808,6 +813,17 @@ } return uris; } + + private static String urlDecode(String url){ + String decodedURL = null; + try { + decodedURL = URLDecoder.decode(url, "UTF-8"); + } catch (UnsupportedEncodingException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + return decodedURL; + } // private ResultSet executeOxfordSelect(String query){return SparqlQuery.convertJSONtoResultSet(oxfordCache.executeSelectQuery(oxfordEndpoint, query));} @@ -871,7 +887,7 @@ { // learn query // TODO: change to knowledgebase parameter - SPARQLTemplateBasedLearner2 learner = new SPARQLTemplateBasedLearner2(createDBpediaLiveKnowledgebase(dbpediaLiveCache),posTagger,wordnet,options); + SPARQLTemplateBasedLearner3 learner = new SPARQLTemplateBasedLearner3(createDBpediaLiveKnowledgebase(dbpediaLiveCache),posTagger,wordnet,options); // SPARQLTemplateBasedLearner2 dbpediaLiveLearner = new SPARQLTemplateBasedLearner2(createDBpediaLiveKnowledgebase(dbpediaLiveCache)); learner.init(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-08-13 06:13:04
|
Revision: 3824 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3824&view=rev Author: lorenz_b Date: 2012-08-13 06:12:58 +0000 (Mon, 13 Aug 2012) Log Message: ----------- Continued new learning. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/SPARQLEndpointMetrics.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/SPARQLEndpointMetrics.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/SPARQLEndpointMetrics.java 2012-08-13 06:12:17 UTC (rev 3823) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/SPARQLEndpointMetrics.java 2012-08-13 06:12:58 UTC (rev 3824) @@ -34,6 +34,8 @@ public SPARQLEndpointMetrics(SparqlEndpoint endpoint, ExtractionDBCache cache) { this.endpoint = endpoint; this.cache = cache; + cache.setFreshnessInMilliseconds(31536000000l); + cache.setMaxExecutionTimeInSeconds(30); this.reasoner = new SPARQLReasoner(new SparqlEndpointKS(endpoint), cache); } @@ -56,7 +58,7 @@ if(coOccurenceCnt > 0 && classOccurenceCnt > 0 && propertyOccurenceCnt > 0){ pmi = Math.log( (coOccurenceCnt * total) / (classOccurenceCnt * propertyOccurenceCnt) ); } - log.info(String.format("PMI(%s, %s) = %f", prop, cls, pmi)); + log.debug(String.format("PMI(%s, %s) = %f", prop, cls, pmi)); return pmi; } @@ -78,7 +80,7 @@ if(coOccurenceCnt > 0 && classOccurenceCnt > 0 && propertyOccurenceCnt > 0){ pmi = Math.log( (coOccurenceCnt * total) / (classOccurenceCnt * propertyOccurenceCnt) ); } - log.info(String.format("PMI(%s, %s) = %f", cls, prop, pmi)); + log.debug(String.format("PMI(%s, %s) = %f", cls, prop, pmi)); return pmi; } @@ -100,7 +102,7 @@ if(coOccurenceCnt > 0 && subjectOccurenceCnt > 0 && objectOccurenceCnt > 0){ pmi = Math.log( (coOccurenceCnt * total) / (subjectOccurenceCnt * objectOccurenceCnt) ); } - log.info(String.format("PMI(%s, %s) = %f", subject, object, pmi)); + log.debug(String.format("PMI(%s, %s) = %f", subject, object, pmi)); return pmi; } @@ -152,6 +154,7 @@ * @return */ public int getOccurencesSubjectPredicate(NamedClass cls, Property prop){ + log.trace(String.format("Computing number of occurences as subject and predicate for [%s, %s]", cls.getName(), prop.getName())); String query = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?s a <%s>. ?s <%s> ?o}", cls.getName(), prop.getName()); ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); int cnt = rs.next().getLiteral("cnt").getInt(); @@ -164,6 +167,7 @@ * @return */ public int getOccurencesPredicateObject(Property prop, NamedClass cls){ + log.trace(String.format("Computing number of occurences as predicate and object for [%s, %s]", prop.getName(), cls.getName())); String query = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?o a <%s>. ?s <%s> ?o}", cls.getName(), prop.getName()); ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); int cnt = rs.next().getLiteral("cnt").getInt(); @@ -176,6 +180,7 @@ * @return */ public int getOccurencesSubjectObject(NamedClass subject, NamedClass object){ + log.trace(String.format("Computing number of occurences as subject and object for [%s, %s]", subject.getName(), object.getName())); String query = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?s a <%s>. ?s ?p ?o. ?o a <%s>}", subject.getName(), object.getName()); ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); int cnt = rs.next().getLiteral("cnt").getInt(); @@ -188,6 +193,7 @@ * @return */ public int getOccurencesInSubjectPosition(NamedClass cls){ + log.trace(String.format("Computing number of occurences in subject position for %s", cls.getName())); String query = String.format("SELECT (COUNT(?s) AS ?cnt) WHERE {?s a <%s>. ?s ?p ?o.}", cls.getName()); ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); int classOccurenceCnt = rs.next().getLiteral("cnt").getInt(); @@ -200,6 +206,7 @@ * @return */ public int getOccurencesInObjectPosition(NamedClass cls){ + log.trace(String.format("Computing number of occurences in object position for %s", cls.getName())); String query = String.format("SELECT (COUNT(?s) AS ?cnt) WHERE {?o a <%s>. ?s ?p ?o.}", cls.getName()); ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); int classOccurenceCnt = rs.next().getLiteral("cnt").getInt(); @@ -212,6 +219,7 @@ * @return */ public int getOccurences(Property prop){ + log.trace(String.format("Computing number of occurences as predicate for %s", prop.getName())); String query = String.format("SELECT (COUNT(*) AS ?cnt) WHERE {?s <%s> ?o}", prop.getName()); ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); int propOccurenceCnt = rs.next().getLiteral("cnt").getInt(); @@ -225,6 +233,7 @@ * @return */ public int getOccurences(NamedClass cls){ + log.trace(String.format("Computing number of occurences in subject or object position for %s", cls.getName())); String query = String.format("SELECT (COUNT(?s) AS ?cnt) WHERE {?s a <%s>.{?s ?p1 ?o1.} UNION {?o2 ?p2 ?s} }", cls.getName()); ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); int classOccurenceCnt = rs.next().getLiteral("cnt").getInt(); @@ -256,17 +265,18 @@ } public double getGoodness(NamedClass subject, ObjectProperty predicate, NamedClass object){ - + log.info(String.format("Computing goodness of [%s, %s, %s]", subject.getName(), predicate.getName(), object.getName())); double pmi_subject_predicate = getDirectedPMI(subject, predicate); double pmi_preciate_object = getDirectedPMI(predicate, object); double pmi_subject_object = getPMI(subject, object); double goodness = pmi_subject_predicate + pmi_preciate_object + 2*pmi_subject_object; - + log.info(String.format("Goodness of [%s, %s, %s]=%f", subject.getName(), predicate.getName(), object.getName(), Double.valueOf(goodness))); return goodness; } public double getGoodness(Individual subject, ObjectProperty predicate, NamedClass object){ + log.info(String.format("Computing goodness of [%s, %s, %s]", subject.getName(), predicate.getName(), object.getName())); //this is independent of the subject types double pmi_preciate_object = getDirectedPMI(predicate, object); @@ -275,6 +285,7 @@ //TODO inference Set<NamedClass> types = reasoner.getTypes(subject); for(NamedClass type : types){ + if(!type.getName().startsWith("http://dbpedia.org/ontology/"))continue; double pmi_subject_predicate = getDirectedPMI(type, predicate); double pmi_subject_object = getPMI(type, object); double tmpGoodness = pmi_subject_predicate + pmi_preciate_object + 2*pmi_subject_object; @@ -282,10 +293,12 @@ goodness = tmpGoodness; } } + log.info(String.format("Goodness of [%s, %s, %s]=%f", subject.getName(), predicate.getName(), object.getName(), Double.valueOf(goodness))); return goodness; } public double getGoodness(NamedClass subject, ObjectProperty predicate, Individual object){ + log.info(String.format("Computing goodness of [%s, %s, %s]", subject.getName(), predicate.getName(), object.getName())); //this is independent of the object types double pmi_subject_predicate = getDirectedPMI(subject, predicate); @@ -294,6 +307,7 @@ //TODO inference Set<NamedClass> types = reasoner.getTypes(object); for(NamedClass type : types){ + if(!type.getName().startsWith("http://dbpedia.org/ontology/"))continue; double pmi_preciate_object = getDirectedPMI(predicate, type); double pmi_subject_object = getPMI(subject, type); double tmpGoodness = pmi_subject_predicate + pmi_preciate_object + 2*pmi_subject_object; @@ -301,6 +315,7 @@ goodness = tmpGoodness; } } + log.info(String.format("Goodness of [%s, %s, %s]=%f", subject.getName(), predicate.getName(), object.getName(), Double.valueOf(goodness))); return goodness; } @@ -380,7 +395,7 @@ public static void main(String[] args) { SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpedia(); - ExtractionDBCache cache = new ExtractionDBCache("/opt/tbsl/cache2"); + ExtractionDBCache cache = new ExtractionDBCache("/opt/tbsl/dbpedia_pmi_cache"); String NS = "http://dbpedia.org/ontology/"; String NS_Res = "http://dbpedia.org/resource/"; @@ -398,6 +413,7 @@ Individual danBrowne = new Individual(NS_Res + "Dan_Browne"); SPARQLEndpointMetrics pmiGen = new SPARQLEndpointMetrics(endpoint, cache); + System.out.println(pmiGen.getPMI(new NamedClass(NS + "River"), new NamedClass(NS + "Film"))); pmiGen.precompute(Arrays.asList(new String[]{"http://dbpedia.org/ontology/"})); System.out.println(pmiGen.getDirectedPMI(pAuthor, person)); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-08-13 06:12:23
|
Revision: 3823 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3823&view=rev Author: lorenz_b Date: 2012-08-13 06:12:17 +0000 (Mon, 13 Aug 2012) Log Message: ----------- Continued new learning. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3.java 2012-08-10 13:51:26 UTC (rev 3822) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3.java 2012-08-13 06:12:17 UTC (rev 3823) @@ -52,6 +52,7 @@ import org.dllearner.algorithm.tbsl.util.Similarity; import org.dllearner.algorithm.tbsl.util.UnknownPropertyHelper; import org.dllearner.algorithm.tbsl.util.UnknownPropertyHelper.SymPropertyDirection; +import org.dllearner.common.index.HierarchicalIndex; import org.dllearner.common.index.Index; import org.dllearner.common.index.IndexResultItem; import org.dllearner.common.index.IndexResultSet; @@ -80,6 +81,8 @@ import org.ini4j.InvalidFileFormatException; import org.ini4j.Options; import org.semanticweb.HermiT.Configuration.DirectBlockingType; +import org.semanticweb.owlapi.model.IRI; +import org.semanticweb.owlapi.util.SimpleIRIShortFormProvider; import com.hp.hpl.jena.query.QueryExecutionFactory; import com.hp.hpl.jena.query.QueryFactory; @@ -119,6 +122,8 @@ private ExtractionDBCache cache = new ExtractionDBCache("cache"); + private SimpleIRIShortFormProvider iriSfp = new SimpleIRIShortFormProvider(); + private Index resourcesIndex; private Index classesIndex; private Index propertiesIndex; @@ -167,14 +172,17 @@ public SPARQLTemplateBasedLearner3(Knowledgebase knowledgebase, PartOfSpeechTagger posTagger, WordNet wordNet, Options options){ this(knowledgebase.getEndpoint(), knowledgebase.getResourceIndex(), knowledgebase.getClassIndex(),knowledgebase.getPropertyIndex(), posTagger, wordNet, options); + setMappingIndex(knowledgebase.getMappingIndex()); } public SPARQLTemplateBasedLearner3(Knowledgebase knowledgebase, PartOfSpeechTagger posTagger, WordNet wordNet, Options options, ExtractionDBCache cache){ this(knowledgebase.getEndpoint(), knowledgebase.getResourceIndex(), knowledgebase.getClassIndex(),knowledgebase.getPropertyIndex(), posTagger, wordNet, options, cache); + setMappingIndex(knowledgebase.getMappingIndex()); } public SPARQLTemplateBasedLearner3(Knowledgebase knowledgebase){ this(knowledgebase.getEndpoint(), knowledgebase.getResourceIndex(), knowledgebase.getClassIndex(),knowledgebase.getPropertyIndex(), new StanfordPartOfSpeechTagger(), new WordNet(), new Options()); + setMappingIndex(knowledgebase.getMappingIndex()); } public SPARQLTemplateBasedLearner3(SparqlEndpoint endpoint, Index index){ @@ -630,7 +638,7 @@ } } - SPARQLEndpointMetrics metrics = new SPARQLEndpointMetrics(endpoint, new ExtractionDBCache("/opt/tbsl/cache2")); + SPARQLEndpointMetrics metrics = new SPARQLEndpointMetrics(endpoint, new ExtractionDBCache("/opt/tbsl/dbpedia_pmi_cache")); for (Iterator<WeightedQuery> iterator = queries.iterator(); iterator.hasNext();) { WeightedQuery wQ = iterator.next(); Query q = wQ.getQuery(); @@ -647,9 +655,10 @@ types.add(typeTriple.getValue().getName().replace(">", "").replace("<", "")); } for(String type : types){ - metrics.getGoodness(new NamedClass(type), + double goodness = metrics.getGoodness(new NamedClass(type), new ObjectProperty(predicate.getName().replace(">", "").replace("<", "")), new Individual(object.getName().replace(">", "").replace("<", ""))); + wQ.setScore(wQ.getScore()+goodness); } } else if(object.isVariable() && !subject.isVariable()){ String varName = triple.getVariable().getName(); @@ -658,9 +667,10 @@ types.add(typeTriple.getValue().getName().replace(">", "").replace("<", "")); } for(String type : types){ - metrics.getGoodness(new Individual(subject.getName().replace(">", "").replace("<", "")), + double goodness = metrics.getGoodness(new Individual(subject.getName().replace(">", "").replace("<", "")), new ObjectProperty(predicate.getName().replace(">", "").replace("<", "")), new NamedClass(type)); + wQ.setScore(wQ.getScore()+goodness); } } } @@ -942,7 +952,11 @@ for(IndexResultItem item : rs.getItems()){ - double similarity = Similarity.getSimilarity(word, item.getLabel()); + String label = item.getLabel(); + if(label == null){ + label = iriSfp.getShortForm(IRI.create(item.getUri())); + } + double similarity = Similarity.getSimilarity(word, label); // //get the labels of the redirects and compute the highest similarity // if(slot.getSlotType() == SlotType.RESOURCE){ // Set<String> labels = getRedirectLabels(item.getUri()); @@ -1010,9 +1024,13 @@ */ public static void main(String[] args) throws Exception { SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpedia(); - Index resourcesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_resources"); + SOLRIndex resourcesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_resources"); + resourcesIndex.setPrimarySearchField("label"); Index classesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_classes"); Index propertiesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_properties"); + SOLRIndex boa_propertiesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/boa_fact_detail"); + boa_propertiesIndex.setSortField("boa-score"); + propertiesIndex = new HierarchicalIndex(boa_propertiesIndex, propertiesIndex); SPARQLTemplateBasedLearner3 learner = new SPARQLTemplateBasedLearner3(endpoint, resourcesIndex, classesIndex, propertiesIndex); learner.init(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ki...@us...> - 2012-08-10 13:51:34
|
Revision: 3822 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3822&view=rev Author: kirdie Date: 2012-08-10 13:51:26 +0000 (Fri, 10 Aug 2012) Log Message: ----------- finished junit test. added a small and fast one for just checking the last working queries. Modified Paths: -------------- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-08-09 15:12:57 UTC (rev 3821) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-08-10 13:51:26 UTC (rev 3822) @@ -1,5 +1,6 @@ package org.dllearner.algorithm.tbsl.learning; +import static org.junit.Assert.fail; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; @@ -57,14 +58,13 @@ import org.dllearner.kb.sparql.SparqlEndpoint; import org.dllearner.kb.sparql.SparqlQuery; import org.ini4j.Options; -import org.junit.Before; -import org.junit.Test; import org.junit.*; import org.w3c.dom.DOMException; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; +import cern.colt.Arrays; import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.rdf.model.RDFNode; @@ -89,38 +89,38 @@ // problem mit "In/IN which/WDT films/NNS did/VBD Julia/NNP Roberts/NNP as/RB well/RB as/IN Richard/NNP Gere/NNP play/NN" public class SPARQLTemplateBasedLearner3Test { + private static final boolean PRETAGGED = true; + private static final File evaluationFolder = new File("cache/evaluation"); - @Test public void testDBpedia() throws Exception - {test("QALD 2 Benchmark", new File(getClass().getClassLoader().getResource("tbsl/evaluation/qald2-dbpedia-train-tagged(ideal).xml").getFile()), + /*@Test*/ public void testDBpedia() throws Exception + {test("QALD 2 Benchmark ideally tagged", new File(getClass().getClassLoader().getResource("tbsl/evaluation/qald2-dbpedia-train-tagged(ideal).xml").getFile()), SparqlEndpoint.getEndpointDBpediaLiveAKSW(),dbpediaLiveCache);} //@Test public void testOxford() {test(new File(""),"");} - public void test(String title, final File referenceXML,final SparqlEndpoint endpoint,ExtractionDBCache cache) throws ParserConfigurationException, SAXException, IOException, TransformerException, ComponentInitException, NoTemplateFoundException - { - final boolean EVALUATE = true; - if(EVALUATE) + @Test public void justTestTheLastWorkingOnesDBpedia() throws Exception + { + SortedMap<Long,Evaluation> evaluations; + + if((evaluations=Evaluation.read()).isEmpty()) { - String dir = "cache/"+getClass().getSimpleName()+"/"; + testDBpedia(); + evaluations=Evaluation.read(); + } + + Evaluation latestEvaluation = evaluations.get(evaluations.lastKey()); + for(String question: latestEvaluation.correctlyAnsweredQuestions) + { + LearnStatus status = new LearnQueryCallable(question, 0,new QueryTestData() , dbpediaLiveKnowledgebase).call(); + if(status.type!=LearnStatus.Type.OK) {fail("Failed with question \""+question+"\", query status: "+status);} + } + } - new File(dir).mkdirs(); - File updatedReferenceXML=new File(dir+"updated_"+referenceXML.getName()); - if(!updatedReferenceXML.exists()) - { - logger.info("Generating updated reference for "+title); - generateUpdatedXML(referenceXML,updatedReferenceXML,endpoint,cache); - } + public void test(String title, final File referenceXML,final SparqlEndpoint endpoint,ExtractionDBCache cache) throws ParserConfigurationException, SAXException, IOException, TransformerException, ComponentInitException, NoTemplateFoundException + { + generateTestDataIfNecessaryAndEvaluateAndWrite(title,referenceXML,endpoint,cache); + generateHTML(title); - QueryTestData referenceTestData = readQueries(updatedReferenceXML); - logger.info(title+" subset loaded with "+referenceTestData.id2Question.size()+" questions."); - - QueryTestData learnedTestData = generateTestDataMultiThreaded(referenceTestData.id2Question, dbpediaLiveKnowledgebase).generateAnswers(endpoint,cache); - Evaluation evaluation = evaluate(referenceTestData, learnedTestData); - logger.info(evaluation); - evaluation.write(); - } - generateHTML(); - // if(evaluation.numberOfCorrectAnswers<3) {fail("only " + evaluation.numberOfCorrectAnswers+" correct answers.");} /* { logger.info("Comparing updated reference test data with learned test data:"); @@ -146,6 +146,33 @@ learnedTestData.write();*/ } + private void generateTestDataIfNecessaryAndEvaluateAndWrite(String title, final File referenceXML,final SparqlEndpoint endpoint,ExtractionDBCache cache) throws ParserConfigurationException, SAXException, IOException, TransformerException, ComponentInitException + { + String dir = "cache/"+getClass().getSimpleName()+"/"; + + new File(dir).mkdirs(); + File updatedReferenceXML=new File(dir+"updated_"+referenceXML.getName()); + if(!updatedReferenceXML.exists()) + { + logger.info("Generating updated reference for "+title); + generateUpdatedXML(referenceXML,updatedReferenceXML,endpoint,cache); + } + + QueryTestData referenceTestData = readQueries(updatedReferenceXML); + logger.info(title+" subset loaded with "+referenceTestData.id2Question.size()+" questions."); + + long startLearning = System.currentTimeMillis(); + QueryTestData learnedTestData = generateTestDataMultiThreaded(referenceTestData.id2Question, dbpediaLiveKnowledgebase); + long endLearning = System.currentTimeMillis(); + logger.info("finished learning after "+(endLearning-startLearning)/1000.0+"s"); + learnedTestData.generateAnswers(endpoint,cache); + long endGeneratingAnswers = System.currentTimeMillis(); + logger.info("finished generating answers in "+(endGeneratingAnswers-endLearning)/1000.0+"s"); + Evaluation evaluation = evaluate(referenceTestData, learnedTestData); + logger.info(evaluation); + evaluation.write(); + } + /** evaluates a data set against a reference. * @param reference the test data assumed to be correct. needs to contain the answers for all queries. * @param suspect the test data to compare with the reference. @@ -186,7 +213,7 @@ static class Evaluation implements Serializable { - private static final long serialVersionUID = 4L; + private static final long serialVersionUID = 5L; final QueryTestData testData; final QueryTestData referenceData; int numberOfQuestions = 0; @@ -333,7 +360,7 @@ } @Override public String toString() - { + { StringBuilder sb = new StringBuilder(); if(!aMinusB.isEmpty()) sb.append("questions a/b: "+aMinusB+" ("+aMinusB.size()+" elements)\n"); if(!bMinusA.isEmpty()) sb.append("questions b/a: "+bMinusA+" ("+bMinusA.size()+" elements)\n"); @@ -344,8 +371,46 @@ } } - enum LearnStatus {OK, TIMEOUT,EXCEPTION,NO_TEMPLATE_FOUND,QUERY_RESULT_EMPTY, NO_QUERY_LEARNED} - + private static class LearnStatus implements Serializable + { + public enum Type {OK, TIMEOUT, NO_TEMPLATE_FOUND,QUERY_RESULT_EMPTY,NO_QUERY_LEARNED,EXCEPTION} + + public final Type type; + + private static final long serialVersionUID = 1L; + public static final LearnStatus OK = new LearnStatus(Type.OK,null); + public static final LearnStatus TIMEOUT = new LearnStatus(Type.TIMEOUT,null); + public static final LearnStatus NO_TEMPLATE_FOUND = new LearnStatus(Type.NO_TEMPLATE_FOUND,null); + public static final LearnStatus QUERY_RESULT_EMPTY = new LearnStatus(Type.QUERY_RESULT_EMPTY,null); + public static final LearnStatus NO_QUERY_LEARNED = new LearnStatus(Type.NO_QUERY_LEARNED,null); + + public final Exception exception; + + private LearnStatus(Type type, Exception exception) {this.type=type;this.exception = exception;} + + public static LearnStatus exceptionStatus(Exception cause) + { + if (cause == null) throw new NullPointerException(); + return new LearnStatus(Type.EXCEPTION,cause); + } + + @Override public String toString() + { + switch(type) + { + case OK: return "OK"; + case TIMEOUT: return "timeout"; + case NO_TEMPLATE_FOUND: return "no template found"; + case QUERY_RESULT_EMPTY:return "query result empty"; + case NO_QUERY_LEARNED: return "no query learned"; + case EXCEPTION: return "<summary>Exception: <details>"+Arrays.toString(exception.getStackTrace())+"</details></summary>"; + default: throw new RuntimeException("switch type not handled"); + } + } + + } + // enum LearnStatus {OK, TIMEOUT,EXCEPTION,NO_TEMPLATE_FOUND,QUERY_RESULT_EMPTY, NO_QUERY_LEARNED;} + /** * @return the test data containing those of the given questions for which queries were found and the results of the queries */ @@ -536,6 +601,7 @@ static final SparqlEndpoint dbpediaLiveEndpoint = SparqlEndpoint.getEndpointDBpediaLiveAKSW(); //static SparqlEndpoint oxfordEndpoint; + private static final int MAXIMUM_QUESTIONS = Integer.MAX_VALUE; // private ResultSet executeDBpediaLiveSelect(String query){return SparqlQuery.convertJSONtoResultSet(dbpediaLiveCache.executeSelectQuery(dbpediaLiveEndpoint, query));} @@ -567,7 +633,7 @@ Logger.getLogger(Parser.class).setLevel(Level.WARN); Logger.getLogger(SPARQLTemplateBasedLearner2.class).setLevel(Level.WARN); // Logger.getLogger(SPARQLTemplateBasedLearner2.class).setLevel(Level.INFO); - logger.setLevel(Level.TRACE); // TODO: remove when finishing implementation of this class + logger.setLevel(Level.INFO); // TODO: remove when finishing implementation of this class logger.addAppender(new FileAppender(new SimpleLayout(), "log/"+this.getClass().getSimpleName()+".log", false)); // oxfordEndpoint = new SparqlEndpoint(new URL("http://lgd.aksw.org:8900/sparql"), Collections.singletonList("http://diadem.cs.ox.ac.uk"), Collections.<String>emptyList()); @@ -632,7 +698,7 @@ for(int i = 0; i < questionNodes.getLength(); i++) { - // if(i>3) break; // TODO: remove + if(i>=MAXIMUM_QUESTIONS) break; // TODO: remove String question; String query; Set<String> answers = new HashSet<String>(); @@ -785,7 +851,7 @@ private final QueryTestData testData; private final Knowledgebase knowledgeBase; - static private final PartOfSpeechTagger posTagger = new SynchronizedStanfordPartOfSpeechTagger(); + static private final PartOfSpeechTagger posTagger = PRETAGGED? null: new SynchronizedStanfordPartOfSpeechTagger(); static private final WordNet wordnet = new WordNet(); static private final Options options = new Options(); @@ -832,7 +898,7 @@ { logger.error(String.format("Exception for question \"%s\": %s",question,e.getLocalizedMessage())); e.printStackTrace(); - return LearnStatus.EXCEPTION; + return LearnStatus.exceptionStatus(e); } return LearnStatus.OK; } @@ -917,9 +983,9 @@ { Integer id = question2Id.get(question); if(id==null) {System.err.println(question);continue;} - out.println( - "<tr><td>"+question+"</td>"+ - "<td>"+evaluation.testData.id2LearnStatus.get(id)+"</td></tr>"); + out.println( + "<tr><td>"+question+"</td>"+ + "<td>"+evaluation.testData.id2LearnStatus.get(id)+"</td></tr>"); } } out.println("</table>\n</body>\n</html>"); @@ -935,7 +1001,7 @@ try { PrintWriter out = new PrintWriter(link); - out.println("<html>"); + out.println("<!DOCTYPE html><html>"); out.println("<head><style type='text/css'>"); out.println(".added {text-color:green;}"); out.println(".added li {list-style: none;margin-left: 0;padding-left: -2em;text-indent: -2em;color:darkgreen;}"); @@ -956,10 +1022,10 @@ return "<a href='"+link.getAbsolutePath()+"'>change</a>"; } - static void generateHTML() + static void generateHTML(String title) { StringBuilder sb = new StringBuilder(); - sb.append("<html>\n<body>\n<table style='width:100%'>\n"); + sb.append("<!DOCTYPE html><html><head><title>"+title+"</title></head>\n<body>\n<table style='width:100%'>\n"); SortedMap<Long,Evaluation> evaluations = Evaluation.read(); // SortedSet<Long> timestampsDescending = new TreeSet<Long>(Collections.reverseOrder()); // timestampsDescending.addAll(evaluations.keySet()); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ki...@us...> - 2012-08-09 15:13:08
|
Revision: 3821 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3821&view=rev Author: kirdie Date: 2012-08-09 15:12:57 +0000 (Thu, 09 Aug 2012) Log Message: ----------- corrected a faulty reference query in the qald2 dbpedia train benchmark. also extended the junit test for sparqltemplatedbasedlearner2. Modified Paths: -------------- trunk/components-ext/src/main/resources/tbsl/evaluation/qald2-dbpedia-train-tagged(ideal).xml trunk/components-ext/src/main/resources/tbsl/evaluation/qald2-dbpedia-train.xml trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java Modified: trunk/components-ext/src/main/resources/tbsl/evaluation/qald2-dbpedia-train-tagged(ideal).xml =================================================================== (Binary files differ) Modified: trunk/components-ext/src/main/resources/tbsl/evaluation/qald2-dbpedia-train.xml =================================================================== (Binary files differ) Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-08-09 12:47:29 UTC (rev 3820) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-08-09 15:12:57 UTC (rev 3821) @@ -11,7 +11,6 @@ import java.io.Serializable; import java.net.MalformedURLException; import java.text.DateFormat; -import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Date; @@ -26,11 +25,12 @@ import java.util.Stack; import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentSkipListMap; +import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; -import java.util.concurrent.FutureTask; import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; @@ -59,6 +59,7 @@ import org.ini4j.Options; import org.junit.Before; import org.junit.Test; +import org.junit.*; import org.w3c.dom.DOMException; import org.w3c.dom.Document; import org.w3c.dom.Element; @@ -84,34 +85,40 @@ * logging output is also wrote to the file log/#classname. * @author Konrad Höffner * **/ + +// problem mit "In/IN which/WDT films/NNS did/VBD Julia/NNP Roberts/NNP as/RB well/RB as/IN Richard/NNP Gere/NNP play/NN" public class SPARQLTemplateBasedLearner3Test { - private static final File evaluationFolder = new File("log/evaluation"); + private static final File evaluationFolder = new File("cache/evaluation"); @Test public void testDBpedia() throws Exception - {test("QALD 2 Benchmark", new File(getClass().getClassLoader().getResource("tbsl/evaluation/qald2-dbpedia-train.xml").getFile()), + {test("QALD 2 Benchmark", new File(getClass().getClassLoader().getResource("tbsl/evaluation/qald2-dbpedia-train-tagged(ideal).xml").getFile()), SparqlEndpoint.getEndpointDBpediaLiveAKSW(),dbpediaLiveCache);} //@Test public void testOxford() {test(new File(""),"");} public void test(String title, final File referenceXML,final SparqlEndpoint endpoint,ExtractionDBCache cache) throws ParserConfigurationException, SAXException, IOException, TransformerException, ComponentInitException, NoTemplateFoundException { -// String dir = "cache/"+getClass().getSimpleName()+"/"; -// -// new File(dir).mkdirs(); -// File updatedReferenceXML=new File(dir+"updated_"+referenceXML.getName()); -// if(!updatedReferenceXML.exists()) -// { -// logger.info("Generating updated reference for "+title); -// generateUpdatedXML(referenceXML,updatedReferenceXML,endpoint,cache); -// } -// -// QueryTestData referenceTestData = readQueries(updatedReferenceXML); -// logger.info(title+" subset loaded with "+referenceTestData.id2Question.size()+" questions."); -// -// QueryTestData learnedTestData = generateTestData(referenceTestData.id2Question, dbpediaLiveKnowledgebase).generateAnswers(endpoint,cache); -// Evaluation evaluation = evaluate(referenceTestData, learnedTestData); -// logger.info(evaluation); -// evaluation.write(); + final boolean EVALUATE = true; + if(EVALUATE) + { + String dir = "cache/"+getClass().getSimpleName()+"/"; + + new File(dir).mkdirs(); + File updatedReferenceXML=new File(dir+"updated_"+referenceXML.getName()); + if(!updatedReferenceXML.exists()) + { + logger.info("Generating updated reference for "+title); + generateUpdatedXML(referenceXML,updatedReferenceXML,endpoint,cache); + } + + QueryTestData referenceTestData = readQueries(updatedReferenceXML); + logger.info(title+" subset loaded with "+referenceTestData.id2Question.size()+" questions."); + + QueryTestData learnedTestData = generateTestDataMultiThreaded(referenceTestData.id2Question, dbpediaLiveKnowledgebase).generateAnswers(endpoint,cache); + Evaluation evaluation = evaluate(referenceTestData, learnedTestData); + logger.info(evaluation); + evaluation.write(); + } generateHTML(); // if(evaluation.numberOfCorrectAnswers<3) {fail("only " + evaluation.numberOfCorrectAnswers+" correct answers.");} @@ -146,7 +153,7 @@ private static Evaluation evaluate(QueryTestData reference, QueryTestData suspect) { // Diff d = diffTestData(reference,testData); - Evaluation evaluation = new Evaluation(suspect); + Evaluation evaluation = new Evaluation(suspect,reference); evaluation.numberOfQuestions = reference.id2Question.keySet().size(); for(int i: reference.id2Question.keySet()) @@ -170,8 +177,8 @@ else { evaluation.incorrectlyAnsweredQuestions.add(question); - logger.debug("learned queries differing: "+referenceQuery+"\n"+suspectQuery); - logger.debug("learned answers differing: "+reference.id2Answers.get(i)+"\n"+suspect.id2Answers.get(i)); + logger.debug("learned queries differing. reference query:\n"+referenceQuery+"\nsuspect query:\n"+suspectQuery); + logger.debug("learned answers differing: reference answers:\n"+reference.id2Answers.get(i)+"\nsuspect answers:\n"+suspect.id2Answers.get(i)); } } return evaluation; @@ -179,8 +186,9 @@ static class Evaluation implements Serializable { - private static final long serialVersionUID = 2L; + private static final long serialVersionUID = 4L; final QueryTestData testData; + final QueryTestData referenceData; int numberOfQuestions = 0; int numberOfAnsweredQuestions = 0; int numberOfCorrectAnswers = 0; @@ -188,9 +196,9 @@ double recall = 0; final Set<String> unansweredQuestions = new HashSet<String>(); final Set<String> incorrectlyAnsweredQuestions = new HashSet<String>(); - final Set<String> correctlyAnsweredQuestions = new HashSet<String>(); + final Set<String> correctlyAnsweredQuestions = new HashSet<String>(); - public Evaluation(QueryTestData testData) {this.testData = testData;} + public Evaluation(QueryTestData testData,QueryTestData referenceData) {this.testData = testData;this.referenceData = referenceData;} void computePrecisionAndRecall() // we have at maximum one answer set per question { @@ -336,10 +344,12 @@ } } + enum LearnStatus {OK, TIMEOUT,EXCEPTION,NO_TEMPLATE_FOUND,QUERY_RESULT_EMPTY, NO_QUERY_LEARNED} + /** * @return the test data containing those of the given questions for which queries were found and the results of the queries */ - private QueryTestData generateTestData(SortedMap<Integer, String> id2Question,Knowledgebase kb) throws MalformedURLException, ComponentInitException + private QueryTestData generateTestDataMultiThreaded(SortedMap<Integer, String> id2Question,Knowledgebase kb) throws MalformedURLException, ComponentInitException { QueryTestData testData = new QueryTestData(); // -- only create the learner parameters once to save time -- @@ -349,25 +359,42 @@ // ---------------------------------------------------------- // int successes = 0; - // List<Callable<Object>> todo = new ArrayList<Callable<Object>>(id2Question.size()); - List<FutureTask> todo = new ArrayList<FutureTask>(id2Question.size()); + // List<Callable<Object>> todo = new ArrayList<Callable<Object>>(id2Question.size()); + Map<Integer,Future<LearnStatus>> futures = new HashMap<Integer,Future<LearnStatus>>(); + + // List<FutureTask> todo = new ArrayList<FutureTask>(id2Question.size()); ExecutorService service = Executors.newFixedThreadPool(10); for(int i: id2Question.keySet()) { - Callable c = Executors.callable(new LearnQueryRunnable(id2Question.get(i),i, testData,kb)); - FutureTask task = new FutureTask(c); - todo.add(task); + futures.put(i,service.submit(new LearnQueryCallable(id2Question.get(i),i, testData,kb))); } - List<Future> futures = new LinkedList<Future>(); - for(FutureTask task : todo) + for(int i: id2Question.keySet()) { - futures.add(service.submit(task)); - } - for(Future future:futures) try {future.get(30, TimeUnit.SECONDS);} catch (Exception e) {logger.warn("Timeout while generating test data.");} + String question = id2Question.get(i); + try + { + testData.id2LearnStatus.put(i,futures.get(i).get(30, TimeUnit.SECONDS)); + } + catch (InterruptedException e) + { + // logger.warn("Timeout while generating test data for question "+id2Question.get(i)+"."); + // testData.id2LearnStatus.put(i, LearnStatus.TIMEOUT); + throw new RuntimeException("question= "+question,e); + } + catch (ExecutionException e) + { + throw new RuntimeException("question="+question,e); + } + catch (TimeoutException e) + { + logger.warn("Timeout while generating test data for question "+question+"."); + testData.id2LearnStatus.put(i, LearnStatus.TIMEOUT); + } + } service.shutdown(); -// try{service.awaitTermination(10, TimeUnit.MINUTES);} catch (InterruptedException e) {throw new RuntimeException("Timeout while generating test data.");} - + // try{service.awaitTermination(10, TimeUnit.MINUTES);} catch (InterruptedException e) {throw new RuntimeException("Timeout while generating test data.");} + // try{service.invokeAll(todo);} catch (InterruptedException e) {throw new RuntimeException(e);} // logger.debug("generating query for question \""+question+"\", id "+i); // long start = System.currentTimeMillis(); @@ -552,6 +579,7 @@ public SortedMap<Integer, String> id2Question = new ConcurrentSkipListMap<Integer, String>(); public SortedMap<Integer, String> id2Query = new ConcurrentSkipListMap<Integer, String>(); public SortedMap<Integer, Set<String>> id2Answers = new ConcurrentSkipListMap<Integer, Set<String>>(); + public SortedMap<Integer, LearnStatus> id2LearnStatus = new ConcurrentSkipListMap<Integer, LearnStatus>(); private static final String persistancePath = "cache/"+SPARQLTemplateBasedLearner3Test.class.getSimpleName()+'/'+QueryTestData.class.getSimpleName(); @@ -580,7 +608,13 @@ public QueryTestData generateAnswers(SparqlEndpoint endpoint, ExtractionDBCache cache) { if(!id2Answers.isEmpty()) {throw new AssertionError("Answers already existing.");} - for(int i:id2Query.keySet()) {id2Answers.put(i, getUris(endpoint, id2Query.get(i),cache));} + for(int i:id2Query.keySet()) + { + Set<String> uris = getUris(endpoint, id2Query.get(i),cache); + id2Answers.put(i, uris); // empty answer set better transfers intended meaning and doesn't cause NPEs in html generation :-) + if(!uris.isEmpty()) {/*id2Answers.put(i, uris);*/} + else {id2LearnStatus.put(i, LearnStatus.QUERY_RESULT_EMPTY);} + } return this; } } @@ -743,7 +777,7 @@ // Knowledgebase kb = new Knowledgebase(oxfordEndpoint, "Oxford - Real estate", "TODO", resourcesIndex, propertiesIndex, classesIndex, mappingIndex); // return kb; // } - private static class LearnQueryRunnable implements Runnable + private static class LearnQueryCallable implements Callable<LearnStatus> { private final String question; // private final String endpoint; @@ -756,7 +790,7 @@ static private final Options options = new Options(); - public LearnQueryRunnable(String question, int id, QueryTestData testData, Knowledgebase knowledgeBase) + public LearnQueryCallable(String question, int id, QueryTestData testData, Knowledgebase knowledgeBase) { this.question=question; this.id=id; @@ -764,7 +798,7 @@ this.testData=testData; } - @Override public void run() + @Override public LearnStatus call() { logger.trace("learning question: "+question); try @@ -772,16 +806,19 @@ // learn query // TODO: change to knowledgebase parameter SPARQLTemplateBasedLearner2 learner = new SPARQLTemplateBasedLearner2(createDBpediaLiveKnowledgebase(dbpediaLiveCache),posTagger,wordnet,options); + // SPARQLTemplateBasedLearner2 dbpediaLiveLearner = new SPARQLTemplateBasedLearner2(createDBpediaLiveKnowledgebase(dbpediaLiveCache)); learner.init(); + learner.setUseIdealTagger(true); learner.setQuestion(question); learner.learnSPARQLQueries(); - String learnedQuery = learner.getBestSPARQLQuery(); + String learnedQuery = learner.getBestSPARQLQuery(); + testData.id2Question.put(id, question); if(learnedQuery!=null&&!learnedQuery.isEmpty()) - { - testData.id2Question.put(id, question); + { testData.id2Query.put(id, learnedQuery); } + else {return LearnStatus.NO_QUERY_LEARNED;} logger.trace("learned query for question "+question+": "+learnedQuery); // Set<String> learnedURIs = getUris(DBPEDIA_LIVE_ENDPOINT_URL_STRING,learnedQuery); @@ -789,12 +826,15 @@ catch(NoTemplateFoundException e) { logger.warn(String.format("no template found for question \"%s\"",question)); + return LearnStatus.NO_TEMPLATE_FOUND; } catch(Exception e) { logger.error(String.format("Exception for question \"%s\": %s",question,e.getLocalizedMessage())); - e.printStackTrace(); - } + e.printStackTrace(); + return LearnStatus.EXCEPTION; + } + return LearnStatus.OK; } } @@ -813,7 +853,7 @@ { Set<String> removedStrings = new HashSet<String>(from); removedStrings.removeAll(to); - sb.append("<ul>"); + sb.append("<ul class='removed'>"); for(String removed: removedStrings) {sb.append("<li>"+removed+"</li>\n");} sb.append("</ul>\n"); @@ -821,11 +861,27 @@ return sb.toString(); } - private static String escapePre(String s) {return s.replace("<", "<").replace(">", "&rt;");} - + private static String escapePre(String s) {return s.replace("<", "<").replace(">", ">");} + + private static String getAnswerHTMLList(String[] answers) + { + StringBuilder sbAnswers = new StringBuilder(); + final int MAX = 10; + for(int i=0;i<answers.length;i++) + { + if(i>=MAX) + { + sbAnswers.append("["+(answers.length-i+1)+" more...]"); + break; + } + sbAnswers.append("<li><a href='"+answers[i]+"'>"+answers[i].replace("http://dbpedia.org/resource/","dbpedia:")+"</a></li>\n"); + } + return sbAnswers.toString(); + } + /** Generates the HTML string content for one of the 3 colored bars which represent the correctly, incorrectly and unanswered question. * Also creates and links to a file which contains the questions.*/ - private static String createColoredColumn(/*@NonNull*/ File link,/*@NonNull*/ String title,/*@NonNull*/ String color,/*@NonNull*/ Collection<String> questions, int numberOfQuestionsTotal, boolean htmlAndIncludeQueriesAndAnswers, Evaluation evaluation) + private static String createColoredColumn(/*@NonNull*/ File link,/*@NonNull*/ String title,/*@NonNull*/ String color,/*@NonNull*/ Collection<String> questions, int numberOfQuestionsTotal, boolean queriesAvailable, Evaluation evaluation) { final StringBuilder sb = new StringBuilder(); sb.append("<a href='"+link.getAbsolutePath()+"' title='"+title+"'>"); @@ -836,34 +892,38 @@ try { PrintWriter out = new PrintWriter(link); - Map<String,Integer> question2Id = new HashMap<String,Integer>(); - for(Integer i: evaluation.testData.id2Question.keySet()) {question2Id.put(evaluation.testData.id2Question.get(i),i);} - if(htmlAndIncludeQueriesAndAnswers) + final Map<String,Integer> question2Id = new HashMap<String,Integer>(); + // only the reference data contains entries for questions without answers + for(Integer i: evaluation.referenceData.id2Question.keySet()) {question2Id.put(evaluation.referenceData.id2Question.get(i),i);} + out.println("<!DOCTYPE html><html>\n<head><title>"+title+"</title></head>\n<body>\n<table border='1'>"); + if(queriesAvailable) { - out.println("<html>\n<head><title>"+title+"</title></head>\n<body>\n<table border='1'>"); - out.println("<tr><th>Question</th><th>Query</th><th>Answers</th></tr>"); + out.println("<tr><th>Question</th><th>Learned Query</th><th>Reference Query</th><th>Learned Answers</th><th>Reference Answers</th></tr>"); for(String question: questions) { - Integer id = question2Id.get(question); - String[] answers = evaluation.testData.id2Answers.get(id).toArray(new String[0]); - StringBuilder sb2 = new StringBuilder(); - final int MAX = 10; - for(int i=0;i<answers.length;i++) - { - if(i>=MAX) - { - sb2.append("["+(answers.length-i+1)+" more...]"); - break; - } - sb2.append("<li><a href='"+answers[i]+"'>"+answers[i].replace("http://dbpedia.org/resource/","dbpedia:")+"</a></li>\n"); - } - out.println("<tr><td>"+question+"</td><td><code><pre>"+escapePre(evaluation.testData.id2Query.get(id))+"</pre></code></td><td><ul><code><pre>"+escapePre(sb2.toString())+"</pre></code></ul></td></tr>"); - } - - out.println("</table>\n</body>\n</html>"); + Integer id = question2Id.get(question); + if(evaluation.testData.id2Answers.get(id)==null) {System.err.println(question);continue;} + out.println( + "<tr><td>"+question+"</td>"+ + "<td><code><pre>"+escapePre(evaluation.testData.id2Query.get(id))+"</pre></code></td>"+ + "<td><code><pre>"+escapePre(evaluation.referenceData.id2Query.get(id))+"</pre></code></td>"+ + "<td><ul>"+getAnswerHTMLList(evaluation.testData.id2Answers.get(id).toArray(new String[0]))+"</ul></td>"+ + "<td><ul>"+getAnswerHTMLList(evaluation.referenceData.id2Answers.get(id).toArray(new String[0]))+"</ul></td></tr>"); + } } else - {for(String question: questions) {out.println(question);}} - out.close(); + { + out.println("<tr><th>Question</th><th>Error Type</th></tr>"); + for(String question: questions) + { + Integer id = question2Id.get(question); + if(id==null) {System.err.println(question);continue;} + out.println( + "<tr><td>"+question+"</td>"+ + "<td>"+evaluation.testData.id2LearnStatus.get(id)+"</td></tr>"); + } + } + out.println("</table>\n</body>\n</html>"); + out.close(); } catch (Exception e){throw new RuntimeException(e);} @@ -885,7 +945,7 @@ out.println("</style></head>"); out.println("<body>"); - out.println(diffHTML("Correctly Answered Questions", from.correctlyAnsweredQuestions, to.correctlyAnsweredQuestions)); + out.println(diffHTML("Correctly Answered Questions (precision and recall = 1)", from.correctlyAnsweredQuestions, to.correctlyAnsweredQuestions)); out.println(diffHTML("Incorrectly Answered Questions", from.incorrectlyAnsweredQuestions, to.incorrectlyAnsweredQuestions)); out.println(diffHTML("Unanswered Questions", from.unansweredQuestions, to.unansweredQuestions)); out.println("</body>\n</html>"); @@ -928,7 +988,7 @@ sb2.append("<div style='width:100%;height:1em;border:solid 1px;'>"); sb2.append(createColoredColumn(new File(folder,"correctly_answered.html"), "Correctly Answered Questions", "green", e.correctlyAnsweredQuestions, e.numberOfQuestions,true,e)); sb2.append(createColoredColumn(new File(folder,"incorrectly_answered.html"), "Incorrectly Answered Questions", "orange", e.incorrectlyAnsweredQuestions, e.numberOfQuestions,true,e)); - sb2.append(createColoredColumn(new File(folder,"unanswered.txt"), "Unanswered Questions", "red", e.unansweredQuestions, e.numberOfQuestions,false,e)); + sb2.append(createColoredColumn(new File(folder,"unanswered.html"), "Unanswered Questions", "red", e.unansweredQuestions, e.numberOfQuestions,false,e)); sb2.append("<span style='width:1000px;'></span>"); sb2.append("</td></tr>\n"); last = e; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-08-09 12:47:35
|
Revision: 3820 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3820&view=rev Author: lorenz_b Date: 2012-08-09 12:47:29 +0000 (Thu, 09 Aug 2012) Log Message: ----------- Got rid of annoying NPE. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-08-09 10:46:16 UTC (rev 3819) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-08-09 12:47:29 UTC (rev 3820) @@ -787,11 +787,11 @@ List<SPARQL_Triple> typeTriples = wQ.getQuery().getRDFTypeTriples(typeVar); for(SPARQL_Triple typeTriple : typeTriples){ String typeURI = typeTriple.getValue().getName().replace("<", "").replace(">", ""); - List<Entry<String, Integer>> mostFrequentProperties = UnknownPropertyHelper.getMostFrequentProperties(endpoint, cache, typeURI, resourceURI, direction); - for(Entry<String, Integer> property : mostFrequentProperties){ - wQ.getQuery().replaceVarWithURI(slot.getAnchor(), property.getKey()); - wQ.setScore(wQ.getScore() + 0.1); - } +// List<Entry<String, Integer>> mostFrequentProperties = UnknownPropertyHelper.getMostFrequentProperties(endpoint, cache, typeURI, resourceURI, direction); +// for(Entry<String, Integer> property : mostFrequentProperties){ +// wQ.getQuery().replaceVarWithURI(slot.getAnchor(), property.getKey()); +// wQ.setScore(wQ.getScore() + 0.1); +// } } } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java 2012-08-09 10:46:16 UTC (rev 3819) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java 2012-08-09 12:47:29 UTC (rev 3820) @@ -51,7 +51,8 @@ if (clashing != null && s.type.equals(clashing)) { for (SPARQL_Triple triple : query.conditions) { if (triple.property.toString().equals("?"+s.anchor)) { - if (triple.value.toString().equals("?"+var)) return null; + if (triple.value.toString().equals("?"+var)) + return null; } } } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2012-08-09 10:46:16 UTC (rev 3819) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2012-08-09 12:47:29 UTC (rev 3820) @@ -382,8 +382,9 @@ try { Template temp = d2s.convert(drs,slots); + temp = temp.checkandrefine(); if (temp == null) {continue;} - temp = temp.checkandrefine(); + if (USE_WORDNET) { // find WordNet synonyms This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ki...@us...> - 2012-08-09 10:46:22
|
Revision: 3819 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3819&view=rev Author: kirdie Date: 2012-08-09 10:46:16 +0000 (Thu, 09 Aug 2012) Log Message: ----------- html is escaped now. timeout for queries. Modified Paths: -------------- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-08-08 12:41:16 UTC (rev 3818) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-08-09 10:46:16 UTC (rev 3819) @@ -28,7 +28,9 @@ import java.util.concurrent.ConcurrentSkipListMap; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; +import java.util.concurrent.Future; import java.util.concurrent.FutureTask; +import java.util.concurrent.TimeUnit; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; @@ -93,23 +95,23 @@ public void test(String title, final File referenceXML,final SparqlEndpoint endpoint,ExtractionDBCache cache) throws ParserConfigurationException, SAXException, IOException, TransformerException, ComponentInitException, NoTemplateFoundException { - String dir = "cache/"+getClass().getSimpleName()+"/"; - - new File(dir).mkdirs(); - File updatedReferenceXML=new File(dir+"updated_"+referenceXML.getName()); - if(!updatedReferenceXML.exists()) - { - logger.info("Generating updated reference for "+title); - generateUpdatedXML(referenceXML,updatedReferenceXML,endpoint,cache); - } - - QueryTestData referenceTestData = readQueries(updatedReferenceXML); - logger.info(title+" subset loaded with "+referenceTestData.id2Question.size()+" questions."); - - QueryTestData learnedTestData = generateTestData(referenceTestData.id2Question, dbpediaLiveKnowledgebase).generateAnswers(endpoint,cache); - Evaluation evaluation = evaluate(referenceTestData, learnedTestData); - logger.info(evaluation); - evaluation.write(); +// String dir = "cache/"+getClass().getSimpleName()+"/"; +// +// new File(dir).mkdirs(); +// File updatedReferenceXML=new File(dir+"updated_"+referenceXML.getName()); +// if(!updatedReferenceXML.exists()) +// { +// logger.info("Generating updated reference for "+title); +// generateUpdatedXML(referenceXML,updatedReferenceXML,endpoint,cache); +// } +// +// QueryTestData referenceTestData = readQueries(updatedReferenceXML); +// logger.info(title+" subset loaded with "+referenceTestData.id2Question.size()+" questions."); +// +// QueryTestData learnedTestData = generateTestData(referenceTestData.id2Question, dbpediaLiveKnowledgebase).generateAnswers(endpoint,cache); +// Evaluation evaluation = evaluate(referenceTestData, learnedTestData); +// logger.info(evaluation); +// evaluation.write(); generateHTML(); // if(evaluation.numberOfCorrectAnswers<3) {fail("only " + evaluation.numberOfCorrectAnswers+" correct answers.");} @@ -354,13 +356,18 @@ for(int i: id2Question.keySet()) { Callable c = Executors.callable(new LearnQueryRunnable(id2Question.get(i),i, testData,kb)); - FutureTask task = new FutureTask(c); + FutureTask task = new FutureTask(c); todo.add(task); } + List<Future> futures = new LinkedList<Future>(); for(FutureTask task : todo) { - service.execute(task); + futures.add(service.submit(task)); } + for(Future future:futures) try {future.get(30, TimeUnit.SECONDS);} catch (Exception e) {logger.warn("Timeout while generating test data.");} + service.shutdown(); +// try{service.awaitTermination(10, TimeUnit.MINUTES);} catch (InterruptedException e) {throw new RuntimeException("Timeout while generating test data.");} + // try{service.invokeAll(todo);} catch (InterruptedException e) {throw new RuntimeException(e);} // logger.debug("generating query for question \""+question+"\", id "+i); // long start = System.currentTimeMillis(); @@ -488,7 +495,7 @@ // int successfullTestThreadRuns = 0; /** */ - private static final String DBPEDIA_LIVE_ENDPOINT_URL_STRING = "http://dbpedia.org/sparql"; + private static final String DBPEDIA_LIVE_ENDPOINT_URL_STRING = "http://live.dbpedia.org/sparql"; private static final Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner3Test.class); @@ -814,6 +821,8 @@ return sb.toString(); } + private static String escapePre(String s) {return s.replace("<", "<").replace(">", "&rt;");} + /** Generates the HTML string content for one of the 3 colored bars which represent the correctly, incorrectly and unanswered question. * Also creates and links to a file which contains the questions.*/ private static String createColoredColumn(/*@NonNull*/ File link,/*@NonNull*/ String title,/*@NonNull*/ String color,/*@NonNull*/ Collection<String> questions, int numberOfQuestionsTotal, boolean htmlAndIncludeQueriesAndAnswers, Evaluation evaluation) @@ -848,7 +857,7 @@ } sb2.append("<li><a href='"+answers[i]+"'>"+answers[i].replace("http://dbpedia.org/resource/","dbpedia:")+"</a></li>\n"); } - out.println("<tr><td>"+question+"</td><td>"+evaluation.testData.id2Query.get(id)+"</td><td><ul>"+sb2.toString()+"</ul></td></tr>"); + out.println("<tr><td>"+question+"</td><td><code><pre>"+escapePre(evaluation.testData.id2Query.get(id))+"</pre></code></td><td><ul><code><pre>"+escapePre(sb2.toString())+"</pre></code></ul></td></tr>"); } out.println("</table>\n</body>\n</html>"); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-08-08 12:41:25
|
Revision: 3818 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3818&view=rev Author: lorenz_b Date: 2012-08-08 12:41:16 +0000 (Wed, 08 Aug 2012) Log Message: ----------- Added converter from OWLOntologyChange to SPARUL. Added Paths: ----------- trunk/components-core/src/main/java/org/dllearner/utilities/SPARULTranslator.java Added: trunk/components-core/src/main/java/org/dllearner/utilities/SPARULTranslator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/utilities/SPARULTranslator.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/utilities/SPARULTranslator.java 2012-08-08 12:41:16 UTC (rev 3818) @@ -0,0 +1,83 @@ +package org.dllearner.utilities; + +import java.util.List; + +import org.coode.owlapi.rdf.model.AbstractTranslator; +import org.coode.owlapi.rdf.model.RDFLiteralNode; +import org.coode.owlapi.rdf.model.RDFNode; +import org.coode.owlapi.rdf.model.RDFResourceNode; +import org.semanticweb.owlapi.model.IRI; +import org.semanticweb.owlapi.model.OWLLiteral; +import org.semanticweb.owlapi.model.OWLOntology; +import org.semanticweb.owlapi.model.OWLOntologyChange; +import org.semanticweb.owlapi.model.OWLOntologyManager; +import org.semanticweb.owlapi.model.RemoveAxiom; + +public class SPARULTranslator extends AbstractTranslator<RDFNode, RDFResourceNode, RDFResourceNode, RDFLiteralNode> { + + private StringBuilder sb; + + public SPARULTranslator(OWLOntologyManager manager, OWLOntology ontology, + boolean useStrongTyping) { + super(manager, ontology, useStrongTyping); + // TODO Auto-generated constructor stub + } + + public String translate(OWLOntologyChange change){ + sb = new StringBuilder(); + sb.append(change instanceof RemoveAxiom ? "DELETE DATA" : "INSERT DATA"); + sb.append("{"); + change.getAxiom().accept(this); + sb.append("}"); + sb.append("\n"); + + return sb.toString(); + } + + public String translate(List<OWLOntologyChange> changes){ + sb = new StringBuilder(); + for(OWLOntologyChange change : changes){ + sb.append(change instanceof RemoveAxiom ? "DELETE DATA" : "INSERT DATA"); + sb.append("{"); + change.getAxiom().accept(this); + sb.append("}"); + sb.append("\n"); + } + + return sb.toString(); + } + + @Override + protected void addTriple(RDFResourceNode subject, RDFResourceNode pred, + RDFNode object) { + sb.append(subject).append(" ").append(pred).append(" ").append(object).append("\n"); + + } + + @Override + protected RDFResourceNode getAnonymousNode(Object key) { + return new RDFResourceNode(System.identityHashCode(key)); + } + + @Override + protected RDFResourceNode getPredicateNode(IRI iri) { + return new RDFResourceNode(iri); + } + + @Override + protected RDFResourceNode getResourceNode(IRI iri) { + return new RDFResourceNode(iri); + } + + @Override + protected RDFLiteralNode getLiteralNode(OWLLiteral literal) { + if(literal.getDatatype() != null){ + return new RDFLiteralNode(literal.toString(), literal.getDatatype().getIRI()); + } else { + return new RDFLiteralNode(literal.toString(), literal.getLang()); + } + + } + + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-08-08 12:31:45
|
Revision: 3817 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3817&view=rev Author: lorenz_b Date: 2012-08-08 12:31:39 +0000 (Wed, 08 Aug 2012) Log Message: ----------- Added more information to return JSON. Modified Paths: -------------- trunk/interfaces/src/main/java/org/dllearner/server/EnrichmentServlet.java Modified: trunk/interfaces/src/main/java/org/dllearner/server/EnrichmentServlet.java =================================================================== --- trunk/interfaces/src/main/java/org/dllearner/server/EnrichmentServlet.java 2012-08-08 10:29:35 UTC (rev 3816) +++ trunk/interfaces/src/main/java/org/dllearner/server/EnrichmentServlet.java 2012-08-08 12:31:39 UTC (rev 3817) @@ -75,6 +75,7 @@ import org.dllearner.reasoning.FastInstanceChecker; import org.dllearner.reasoning.SPARQLReasoner; import org.dllearner.utilities.Helper; +import org.dllearner.utilities.SPARULTranslator; import org.dllearner.utilities.datastructures.Datastructures; import org.dllearner.utilities.datastructures.SetManipulation; import org.dllearner.utilities.datastructures.SortedSetTuple; @@ -82,12 +83,16 @@ import org.dllearner.utilities.owl.OWLAPIConverter; import org.json.JSONArray; import org.json.simple.JSONObject; +import org.semanticweb.owlapi.apibinding.OWLManager; import org.semanticweb.owlapi.io.OWLObjectRenderer; +import org.semanticweb.owlapi.model.AddAxiom; import org.semanticweb.owlapi.model.AxiomType; -import org.semanticweb.owlapi.util.DefaultPrefixManager; +import org.semanticweb.owlapi.model.OWLAxiom; +import org.semanticweb.owlapi.model.OWLOntology; +import org.semanticweb.owlapi.model.OWLOntologyCreationException; +import org.semanticweb.owlapi.model.OWLOntologyManager; import uk.ac.manchester.cs.owl.owlapi.mansyntaxrenderer.ManchesterOWLSyntaxOWLObjectRendererImpl; -import uk.ac.manchester.cs.owl.owlapi.mansyntaxrenderer.ManchesterOWLSyntaxPrefixNameShortFormProvider; public class EnrichmentServlet extends HttpServlet { @@ -99,6 +104,9 @@ private static final List<String> entityTypes = Arrays.asList(new String[]{"class", "objectproperty", "dataproperty"}); private static String validAxiomTypes = ""; + + private SPARULTranslator sparul; + private OWLOntology ont; static { axiomType2Class = new DualHashBidiMap<AxiomType, Class<? extends LearningAlgorithm>>(); @@ -128,13 +136,16 @@ objectPropertyAlgorithms = new LinkedList<Class<? extends LearningAlgorithm>>(); objectPropertyAlgorithms.add(DisjointObjectPropertyAxiomLearner.class); objectPropertyAlgorithms.add(EquivalentObjectPropertyAxiomLearner.class); + objectPropertyAlgorithms.add(SubObjectPropertyOfAxiomLearner.class); + objectPropertyAlgorithms.add(ObjectPropertyDomainAxiomLearner.class); + objectPropertyAlgorithms.add(ObjectPropertyRangeAxiomLearner.class); objectPropertyAlgorithms.add(FunctionalObjectPropertyAxiomLearner.class); objectPropertyAlgorithms.add(InverseFunctionalObjectPropertyAxiomLearner.class); - objectPropertyAlgorithms.add(ObjectPropertyDomainAxiomLearner.class); - objectPropertyAlgorithms.add(ObjectPropertyRangeAxiomLearner.class); - objectPropertyAlgorithms.add(SubObjectPropertyOfAxiomLearner.class); objectPropertyAlgorithms.add(SymmetricObjectPropertyAxiomLearner.class); + objectPropertyAlgorithms.add(AsymmetricObjectPropertyAxiomLearner.class); objectPropertyAlgorithms.add(TransitiveObjectPropertyAxiomLearner.class); + objectPropertyAlgorithms.add(ReflexiveObjectPropertyAxiomLearner.class); + objectPropertyAlgorithms.add(IrreflexiveObjectPropertyAxiomLearner.class); dataPropertyAlgorithms = new LinkedList<Class<? extends LearningAlgorithm>>(); dataPropertyAlgorithms.add(DisjointDataPropertyAxiomLearner.class); @@ -160,6 +171,17 @@ private String cacheDir; + public EnrichmentServlet() { + OWLOntologyManager man = OWLManager.createOWLOntologyManager(); + OWLOntology ont = null; + try { + ont = man.createOntology(); + } catch (OWLOntologyCreationException e1) { + e1.printStackTrace(); + } + sparul = new SPARULTranslator(man, ont, false); + } + @Override public void init() throws ServletException { super.init(); @@ -257,7 +279,8 @@ List<Future<JSONObject>> list = new ArrayList<Future<JSONObject>>(); final OWLObjectRenderer renderer = new ManchesterOWLSyntaxOWLObjectRendererImpl(); - renderer.setShortFormProvider(new ManchesterOWLSyntaxPrefixNameShortFormProvider(new DefaultPrefixManager())); +// renderer.setShortFormProvider(new ManchesterOWLSyntaxPrefixNameShortFormProvider(new DefaultPrefixManager())); + for (final AxiomType axiomType : executableAxiomTypes) { @@ -270,7 +293,10 @@ List<EvaluatedAxiom> axioms = getEvaluatedAxioms(ks, reasoner, entity, axiomType, maxExecutionTimeInSeconds, threshold, maxNrOfReturnedAxioms, useInference); for(EvaluatedAxiom ax : axioms){ JSONObject axiomObject = new JSONObject(); - axiomObject.put("axiom", renderer.render(OWLAPIConverter.getOWLAPIAxiom(ax.getAxiom()))); + OWLAxiom axiom = OWLAPIConverter.getOWLAPIAxiom(ax.getAxiom()); + axiomObject.put("axiom", axiom); + axiomObject.put("axiom_rendered", renderer.render(axiom)); + axiomObject.put("axiom_sparul", getSPARUL(axiom)); axiomObject.put("confidence", ax.getScore().getAccuracy()); axiomArray.put(axiomObject); } @@ -317,6 +343,10 @@ pw.close(); } + private String getSPARUL(OWLAxiom axiom){ + return sparul.translate(new AddAxiom(ont, axiom)); + } + private boolean oneOf(String value, String... possibleValues){ for(String v : possibleValues){ if(v.equals(value)){ @@ -519,7 +549,7 @@ public static void main(String[] args) { String s = ""; SortedSet<String> types = new TreeSet<String>(); - for(AxiomType t : getAxiomTypes("dataproperty")){ + for(AxiomType t : getAxiomTypes("objectproperty")){ s += "\"" + t.getName() + "\""; s+= ", "; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ki...@us...> - 2012-08-08 10:29:42
|
Revision: 3816 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3816&view=rev Author: kirdie Date: 2012-08-08 10:29:35 +0000 (Wed, 08 Aug 2012) Log Message: ----------- more log statements in junit test. Modified Paths: -------------- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-08-02 14:15:26 UTC (rev 3815) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-08-08 10:29:35 UTC (rev 3816) @@ -28,6 +28,7 @@ import java.util.concurrent.ConcurrentSkipListMap; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; +import java.util.concurrent.FutureTask; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; @@ -92,23 +93,23 @@ public void test(String title, final File referenceXML,final SparqlEndpoint endpoint,ExtractionDBCache cache) throws ParserConfigurationException, SAXException, IOException, TransformerException, ComponentInitException, NoTemplateFoundException { - // String dir = "cache/"+getClass().getSimpleName()+"/"; - // - // new File(dir).mkdirs(); - // File updatedReferenceXML=new File(dir+"updated_"+referenceXML.getName()); - // if(!updatedReferenceXML.exists()) - // { - // logger.info("Generating updated reference for "+title); - // generateUpdatedXML(referenceXML,updatedReferenceXML,endpoint,cache); - // } - // - // QueryTestData referenceTestData = readQueries(updatedReferenceXML); - // logger.info(title+" subset loaded with "+referenceTestData.id2Question.size()+" questions."); - // - // QueryTestData learnedTestData = generateTestData(referenceTestData.id2Question, dbpediaLiveKnowledgebase).generateAnswers(endpoint,cache); - // Evaluation evaluation = evaluate(referenceTestData, learnedTestData); - // logger.info(evaluation); - // evaluation.write(); + String dir = "cache/"+getClass().getSimpleName()+"/"; + + new File(dir).mkdirs(); + File updatedReferenceXML=new File(dir+"updated_"+referenceXML.getName()); + if(!updatedReferenceXML.exists()) + { + logger.info("Generating updated reference for "+title); + generateUpdatedXML(referenceXML,updatedReferenceXML,endpoint,cache); + } + + QueryTestData referenceTestData = readQueries(updatedReferenceXML); + logger.info(title+" subset loaded with "+referenceTestData.id2Question.size()+" questions."); + + QueryTestData learnedTestData = generateTestData(referenceTestData.id2Question, dbpediaLiveKnowledgebase).generateAnswers(endpoint,cache); + Evaluation evaluation = evaluate(referenceTestData, learnedTestData); + logger.info(evaluation); + evaluation.write(); generateHTML(); // if(evaluation.numberOfCorrectAnswers<3) {fail("only " + evaluation.numberOfCorrectAnswers+" correct answers.");} @@ -346,13 +347,21 @@ // ---------------------------------------------------------- // int successes = 0; - List<Callable<Object>> todo = new ArrayList<Callable<Object>>(id2Question.size()); + // List<Callable<Object>> todo = new ArrayList<Callable<Object>>(id2Question.size()); + List<FutureTask> todo = new ArrayList<FutureTask>(id2Question.size()); ExecutorService service = Executors.newFixedThreadPool(10); for(int i: id2Question.keySet()) - {todo.add(Executors.callable(new LearnQueryRunnable(id2Question.get(i),i, testData,kb)));} - - try{service.invokeAll(todo);} catch (InterruptedException e) {throw new RuntimeException(e);} + { + Callable c = Executors.callable(new LearnQueryRunnable(id2Question.get(i),i, testData,kb)); + FutureTask task = new FutureTask(c); + todo.add(task); + } + for(FutureTask task : todo) + { + service.execute(task); + } + // try{service.invokeAll(todo);} catch (InterruptedException e) {throw new RuntimeException(e);} // logger.debug("generating query for question \""+question+"\", id "+i); // long start = System.currentTimeMillis(); // SPARQLTemplateBasedLearner2 dbpediaLiveLearner = new SPARQLTemplateBasedLearner2(dbpediaLiveKnowledgebase,posTagger,wordnet,options); @@ -409,11 +418,13 @@ for(int i = 0; i < questionNodes.getLength(); i++) { Element questionNode = (Element) questionNodes.item(i); - //keep the id to aid comparison between original and updated files + //keep the id to aid comparison between original and updated files id = Integer.valueOf(questionNode.getAttribute("id")); //Read question question = ((Element)questionNode.getElementsByTagName("string").item(0)).getChildNodes().item(0).getNodeValue().trim(); + + logger.trace("id "+id+", question: "+question); //Read SPARQL query query = ((Element)questionNode.getElementsByTagName("query").item(0)).getChildNodes().item(0).getNodeValue().trim(); // //Read answers @@ -477,7 +488,7 @@ // int successfullTestThreadRuns = 0; /** */ - private static final String DBPEDIA_LIVE_ENDPOINT_URL_STRING = "http://live.dbpedia.org/sparql"; + private static final String DBPEDIA_LIVE_ENDPOINT_URL_STRING = "http://dbpedia.org/sparql"; private static final Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner3Test.class); @@ -522,7 +533,7 @@ Logger.getLogger(Parser.class).setLevel(Level.WARN); Logger.getLogger(SPARQLTemplateBasedLearner2.class).setLevel(Level.WARN); // Logger.getLogger(SPARQLTemplateBasedLearner2.class).setLevel(Level.INFO); - logger.setLevel(Level.INFO); // TODO: remove when finishing implementation of this class + logger.setLevel(Level.TRACE); // TODO: remove when finishing implementation of this class logger.addAppender(new FileAppender(new SimpleLayout(), "log/"+this.getClass().getSimpleName()+".log", false)); // oxfordEndpoint = new SparqlEndpoint(new URL("http://lgd.aksw.org:8900/sparql"), Collections.singletonList("http://diadem.cs.ox.ac.uk"), Collections.<String>emptyList()); @@ -580,7 +591,7 @@ for(int i = 0; i < questionNodes.getLength(); i++) { - if(i>3) break; // TODO: remove + // if(i>3) break; // TODO: remove String question; String query; Set<String> answers = new HashSet<String>(); @@ -807,11 +818,11 @@ * Also creates and links to a file which contains the questions.*/ private static String createColoredColumn(/*@NonNull*/ File link,/*@NonNull*/ String title,/*@NonNull*/ String color,/*@NonNull*/ Collection<String> questions, int numberOfQuestionsTotal, boolean htmlAndIncludeQueriesAndAnswers, Evaluation evaluation) { - final StringBuilder sb = new StringBuilder(); - sb.append("<a href='"+link.getAbsolutePath()+"' title='"+title+"'>"); - sb.append("<div style='float:left;width:"+100.0*questions.size()/numberOfQuestionsTotal+"%;height:1em;background-color:"+color+";'></div>"); - sb.append("</a>"); - + final StringBuilder sb = new StringBuilder(); + sb.append("<a href='"+link.getAbsolutePath()+"' title='"+title+"'>"); + sb.append("<div style='float:left;width:"+100.0*questions.size()/numberOfQuestionsTotal+"%;height:1em;background-color:"+color+";'></div>"); + sb.append("</a>"); + // link.getParentFile().mkdirs(); try { @@ -839,7 +850,7 @@ } out.println("<tr><td>"+question+"</td><td>"+evaluation.testData.id2Query.get(id)+"</td><td><ul>"+sb2.toString()+"</ul></td></tr>"); } - + out.println("</table>\n</body>\n</html>"); } else {for(String question: questions) {out.println(question);}} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ki...@us...> - 2012-08-02 14:15:37
|
Revision: 3815 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3815&view=rev Author: kirdie Date: 2012-08-02 14:15:26 +0000 (Thu, 02 Aug 2012) Log Message: ----------- updated JUnit test. Modified Paths: -------------- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-08-02 12:16:03 UTC (rev 3814) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-08-02 14:15:26 UTC (rev 3815) @@ -1,6 +1,5 @@ package org.dllearner.algorithm.tbsl.learning; -import static org.junit.Assert.fail; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; @@ -16,15 +15,15 @@ import java.util.Collection; import java.util.Collections; import java.util.Date; +import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedList; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.SortedMap; -import java.util.SortedSet; import java.util.Stack; -import java.util.TreeSet; import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentSkipListMap; import java.util.concurrent.ExecutorService; @@ -37,7 +36,6 @@ import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; -import jjtraveler.Fail; import org.apache.log4j.FileAppender; import org.apache.log4j.Level; import org.apache.log4j.Logger; @@ -66,7 +64,6 @@ import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.rdf.model.RDFNode; -import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP; import com.hp.hpl.jena.sparql.engine.http.QueryExceptionHTTP; /** Tests TSBL against the qald2 benchmark test data with the DBpedia endpoint. @@ -95,26 +92,26 @@ public void test(String title, final File referenceXML,final SparqlEndpoint endpoint,ExtractionDBCache cache) throws ParserConfigurationException, SAXException, IOException, TransformerException, ComponentInitException, NoTemplateFoundException { -// String dir = "cache/"+getClass().getSimpleName()+"/"; -// -// new File(dir).mkdirs(); -// File updatedReferenceXML=new File(dir+"updated_"+referenceXML.getName()); -// if(!updatedReferenceXML.exists()) -// { -// logger.info("Generating updated reference for "+title); -// generateUpdatedXML(referenceXML,updatedReferenceXML,endpoint,cache); -// } -// -// QueryTestData referenceTestData = readQueries(updatedReferenceXML); -// logger.info(title+" subset loaded with "+referenceTestData.id2Question.size()+" questions."); -// -// QueryTestData learnedTestData = generateTestData(referenceTestData.id2Question, dbpediaLiveKnowledgebase).generateAnswers(endpoint,cache); -// Evaluation evaluation = evaluate(referenceTestData, learnedTestData); -// logger.info(evaluation); -// evaluation.write(); + // String dir = "cache/"+getClass().getSimpleName()+"/"; + // + // new File(dir).mkdirs(); + // File updatedReferenceXML=new File(dir+"updated_"+referenceXML.getName()); + // if(!updatedReferenceXML.exists()) + // { + // logger.info("Generating updated reference for "+title); + // generateUpdatedXML(referenceXML,updatedReferenceXML,endpoint,cache); + // } + // + // QueryTestData referenceTestData = readQueries(updatedReferenceXML); + // logger.info(title+" subset loaded with "+referenceTestData.id2Question.size()+" questions."); + // + // QueryTestData learnedTestData = generateTestData(referenceTestData.id2Question, dbpediaLiveKnowledgebase).generateAnswers(endpoint,cache); + // Evaluation evaluation = evaluate(referenceTestData, learnedTestData); + // logger.info(evaluation); + // evaluation.write(); generateHTML(); -// if(evaluation.numberOfCorrectAnswers<3) {fail("only " + evaluation.numberOfCorrectAnswers+" correct answers.");} + // if(evaluation.numberOfCorrectAnswers<3) {fail("only " + evaluation.numberOfCorrectAnswers+" correct answers.");} /* { logger.info("Comparing updated reference test data with learned test data:"); Diff queryTestDataDiff = diffTestData(referenceTestData,learnedTestData); @@ -146,7 +143,7 @@ private static Evaluation evaluate(QueryTestData reference, QueryTestData suspect) { // Diff d = diffTestData(reference,testData); - Evaluation evaluation = new Evaluation(); + Evaluation evaluation = new Evaluation(suspect); evaluation.numberOfQuestions = reference.id2Question.keySet().size(); for(int i: reference.id2Question.keySet()) @@ -179,7 +176,8 @@ static class Evaluation implements Serializable { - private static final long serialVersionUID = 1L; + private static final long serialVersionUID = 2L; + final QueryTestData testData; int numberOfQuestions = 0; int numberOfAnsweredQuestions = 0; int numberOfCorrectAnswers = 0; @@ -189,6 +187,8 @@ final Set<String> incorrectlyAnsweredQuestions = new HashSet<String>(); final Set<String> correctlyAnsweredQuestions = new HashSet<String>(); + public Evaluation(QueryTestData testData) {this.testData = testData;} + void computePrecisionAndRecall() // we have at maximum one answer set per question { precision = numberOfCorrectAnswers / numberOfAnsweredQuestions; @@ -580,6 +580,7 @@ for(int i = 0; i < questionNodes.getLength(); i++) { + if(i>3) break; // TODO: remove String question; String query; Set<String> answers = new HashSet<String>(); @@ -786,31 +787,62 @@ { Set<String> addedStrings = new HashSet<String>(to); addedStrings.removeAll(from); - - for(String added: addedStrings) {sb.append("<span style='color:green'>"+added+"</span></br>\n");} + sb.append("<ul class='added'>"); + for(String added: addedStrings) {sb.append("<li>"+added+"</li>\n");} + sb.append("</ul>\n"); } sb.append('\n'); { Set<String> removedStrings = new HashSet<String>(from); removedStrings.removeAll(to); - for(String removed: removedStrings) {sb.append("<span style='color:red'>"+removed+"</span></br>\n");} + sb.append("<ul>"); + for(String removed: removedStrings) {sb.append("<li>"+removed+"</li>\n");} + sb.append("</ul>\n"); + } return sb.toString(); } /** Generates the HTML string content for one of the 3 colored bars which represent the correctly, incorrectly and unanswered question. * Also creates and links to a file which contains the questions.*/ - private static String createColoredColumn(/*@NonNull*/ File link,/*@NonNull*/ String title,/*@NonNull*/ String color,/*@NonNull*/ Collection<String> questions, int numberOfQuestionsTotal) - { - final StringBuilder sb = new StringBuilder(); - sb.append("<a href='"+link.getAbsolutePath()+"' title='"+title+"'>"); - sb.append("<div style='float:left;width:"+100.0*questions.size()/numberOfQuestionsTotal+"%;height:1em;background-color:"+color+";'></div>"); - sb.append("</a>"); + private static String createColoredColumn(/*@NonNull*/ File link,/*@NonNull*/ String title,/*@NonNull*/ String color,/*@NonNull*/ Collection<String> questions, int numberOfQuestionsTotal, boolean htmlAndIncludeQueriesAndAnswers, Evaluation evaluation) + { + final StringBuilder sb = new StringBuilder(); + sb.append("<a href='"+link.getAbsolutePath()+"' title='"+title+"'>"); + sb.append("<div style='float:left;width:"+100.0*questions.size()/numberOfQuestionsTotal+"%;height:1em;background-color:"+color+";'></div>"); + sb.append("</a>"); + // link.getParentFile().mkdirs(); try { PrintWriter out = new PrintWriter(link); - for(String question: questions) {out.println(question);} + Map<String,Integer> question2Id = new HashMap<String,Integer>(); + for(Integer i: evaluation.testData.id2Question.keySet()) {question2Id.put(evaluation.testData.id2Question.get(i),i);} + if(htmlAndIncludeQueriesAndAnswers) + { + out.println("<html>\n<head><title>"+title+"</title></head>\n<body>\n<table border='1'>"); + out.println("<tr><th>Question</th><th>Query</th><th>Answers</th></tr>"); + for(String question: questions) + { + Integer id = question2Id.get(question); + String[] answers = evaluation.testData.id2Answers.get(id).toArray(new String[0]); + StringBuilder sb2 = new StringBuilder(); + final int MAX = 10; + for(int i=0;i<answers.length;i++) + { + if(i>=MAX) + { + sb2.append("["+(answers.length-i+1)+" more...]"); + break; + } + sb2.append("<li><a href='"+answers[i]+"'>"+answers[i].replace("http://dbpedia.org/resource/","dbpedia:")+"</a></li>\n"); + } + out.println("<tr><td>"+question+"</td><td>"+evaluation.testData.id2Query.get(id)+"</td><td><ul>"+sb2.toString()+"</ul></td></tr>"); + } + + out.println("</table>\n</body>\n</html>"); + } else + {for(String question: questions) {out.println(question);}} out.close(); } catch (Exception e){throw new RuntimeException(e);} @@ -823,7 +855,16 @@ try { PrintWriter out = new PrintWriter(link); - out.println("<html>\n<body>\n"); + out.println("<html>"); + out.println("<head><style type='text/css'>"); + out.println(".added {text-color:green;}"); + out.println(".added li {list-style: none;margin-left: 0;padding-left: -2em;text-indent: -2em;color:darkgreen;}"); + out.println(".added li:before {content: '+ ';}"); + out.println(".removed li {list-style: none;margin-left: 0;padding-left: -2em;text-indent: -2em;color:darkred;}"); + out.println(".removed li:before {content: '- ';}"); + + out.println("</style></head>"); + out.println("<body>"); out.println(diffHTML("Correctly Answered Questions", from.correctlyAnsweredQuestions, to.correctlyAnsweredQuestions)); out.println(diffHTML("Incorrectly Answered Questions", from.incorrectlyAnsweredQuestions, to.incorrectlyAnsweredQuestions)); out.println(diffHTML("Unanswered Questions", from.unansweredQuestions, to.unansweredQuestions)); @@ -840,12 +881,12 @@ StringBuilder sb = new StringBuilder(); sb.append("<html>\n<body>\n<table style='width:100%'>\n"); SortedMap<Long,Evaluation> evaluations = Evaluation.read(); -// SortedSet<Long> timestampsDescending = new TreeSet<Long>(Collections.reverseOrder()); -// timestampsDescending.addAll(evaluations.keySet()); + // SortedSet<Long> timestampsDescending = new TreeSet<Long>(Collections.reverseOrder()); + // timestampsDescending.addAll(evaluations.keySet()); Evaluation last = null; - + Stack<String> stack = new Stack<String>(); // show reverse chronological order (we can't iterate in reverse order because of the diffs of the evaluations) - + for(long timestamp: evaluations.keySet()) { StringBuilder sb2 = new StringBuilder(); @@ -863,11 +904,11 @@ if(last.equals(e)) {/*sb2.append("no change");*/} else {sb2.append(createChangeHTML(new File(folder,"change.html"),last,e));} } - sb2.append("</td><td width='100%'>"); + sb2.append("</td><td width='100%'>"); sb2.append("<div style='width:100%;height:1em;border:solid 1px;'>"); - sb2.append(createColoredColumn(new File(folder,"correctly_answered.txt"), "Correctly Answered Questions", "green", e.correctlyAnsweredQuestions, e.numberOfQuestions)); - sb2.append(createColoredColumn(new File(folder,"incorrectly_answered.txt"), "Incorrectly Answered Questions", "orange", e.incorrectlyAnsweredQuestions, e.numberOfQuestions)); - sb2.append(createColoredColumn(new File(folder,"unanswered.txt"), "Unanswered Questions", "red", e.unansweredQuestions, e.numberOfQuestions)); + sb2.append(createColoredColumn(new File(folder,"correctly_answered.html"), "Correctly Answered Questions", "green", e.correctlyAnsweredQuestions, e.numberOfQuestions,true,e)); + sb2.append(createColoredColumn(new File(folder,"incorrectly_answered.html"), "Incorrectly Answered Questions", "orange", e.incorrectlyAnsweredQuestions, e.numberOfQuestions,true,e)); + sb2.append(createColoredColumn(new File(folder,"unanswered.txt"), "Unanswered Questions", "red", e.unansweredQuestions, e.numberOfQuestions,false,e)); sb2.append("<span style='width:1000px;'></span>"); sb2.append("</td></tr>\n"); last = e; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-08-02 12:16:14
|
Revision: 3814 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3814&view=rev Author: lorenz_b Date: 2012-08-02 12:16:03 +0000 (Thu, 02 Aug 2012) Log Message: ----------- Updated URL parameters. Modified Paths: -------------- trunk/interfaces/src/main/java/org/dllearner/server/EnrichmentServlet.java Modified: trunk/interfaces/src/main/java/org/dllearner/server/EnrichmentServlet.java =================================================================== --- trunk/interfaces/src/main/java/org/dllearner/server/EnrichmentServlet.java 2012-08-01 11:06:36 UTC (rev 3813) +++ trunk/interfaces/src/main/java/org/dllearner/server/EnrichmentServlet.java 2012-08-02 12:16:03 UTC (rev 3814) @@ -11,6 +11,8 @@ import java.util.HashSet; import java.util.LinkedList; import java.util.List; +import java.util.SortedSet; +import java.util.TreeSet; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; @@ -46,21 +48,37 @@ import org.dllearner.algorithms.properties.SymmetricObjectPropertyAxiomLearner; import org.dllearner.algorithms.properties.TransitiveObjectPropertyAxiomLearner; import org.dllearner.core.AbstractAxiomLearningAlgorithm; +import org.dllearner.core.AbstractReasonerComponent; import org.dllearner.core.AnnComponentManager; import org.dllearner.core.AxiomLearningAlgorithm; import org.dllearner.core.ComponentInitException; import org.dllearner.core.EvaluatedAxiom; +import org.dllearner.core.EvaluatedDescription; import org.dllearner.core.LearningAlgorithm; +import org.dllearner.core.Score; import org.dllearner.core.config.ConfigHelper; +import org.dllearner.core.owl.Axiom; import org.dllearner.core.owl.DatatypeProperty; import org.dllearner.core.owl.Entity; +import org.dllearner.core.owl.EquivalentClassesAxiom; +import org.dllearner.core.owl.Individual; import org.dllearner.core.owl.NamedClass; import org.dllearner.core.owl.ObjectProperty; +import org.dllearner.core.owl.SubClassAxiom; import org.dllearner.kb.SparqlEndpointKS; import org.dllearner.kb.sparql.ExtractionDBCache; import org.dllearner.kb.sparql.SPARQLTasks; import org.dllearner.kb.sparql.SparqlEndpoint; +import org.dllearner.kb.sparql.SparqlKnowledgeSource; +import org.dllearner.learningproblems.ClassLearningProblem; +import org.dllearner.learningproblems.Heuristics.HeuristicType; +import org.dllearner.reasoning.FastInstanceChecker; import org.dllearner.reasoning.SPARQLReasoner; +import org.dllearner.utilities.Helper; +import org.dllearner.utilities.datastructures.Datastructures; +import org.dllearner.utilities.datastructures.SetManipulation; +import org.dllearner.utilities.datastructures.SortedSetTuple; +import org.dllearner.utilities.examples.AutomaticNegativeExampleFinderSPARQL2; import org.dllearner.utilities.owl.OWLAPIConverter; import org.json.JSONArray; import org.json.simple.JSONObject; @@ -139,34 +157,42 @@ private static final int DEFAULT_MAX_EXECUTION_TIME_IN_SECONDS = 10; private static final int DEFAULT_MAX_NR_OF_RETURNED_AXIOMS = 10; private static final double DEFAULT_THRESHOLD = 0.75; + + private String cacheDir; + + @Override + public void init() throws ServletException { + super.init(); + cacheDir = getServletContext().getRealPath("cache"); + } @Override protected void doGet(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException { long timeStamp = System.currentTimeMillis(); - String endpointURL = req.getParameter("endpoint"); + String endpointURL = req.getParameter("endpoint_url"); if (endpointURL == null) { throw new IllegalStateException("Missing parameter: endpoint"); } - String graphURI = req.getParameter("graph"); + String graphURI = req.getParameter("default_graph_uri"); SparqlEndpoint endpoint = new SparqlEndpoint(new URL(endpointURL), Collections.singletonList(graphURI), Collections.<String> emptyList()); - final boolean useInference = req.getParameter("useInference") == null ? false : Boolean.valueOf(req - .getParameter("useInference")); + final boolean useInference = req.getParameter("use_inference") == null ? false : Boolean.valueOf(req + .getParameter("use_inference")); final int maxNrOfReturnedAxioms = req.getParameter("maxNrOfReturnedAxioms") == null ? DEFAULT_MAX_NR_OF_RETURNED_AXIOMS : Integer.parseInt(req.getParameter("maxNrOfReturnedAxioms")); final int maxExecutionTimeInSeconds = req.getParameter("maxExecutionTimeInSeconds") == null ? DEFAULT_MAX_EXECUTION_TIME_IN_SECONDS : Integer.parseInt(req.getParameter("maxExecutionTimeInSeconds")); final double threshold = req.getParameter("threshold") == null ? DEFAULT_THRESHOLD : Double.parseDouble(req.getParameter("threshold")); - String resourceURI = req.getParameter("resource"); + String resourceURI = req.getParameter("resource_uri"); if (resourceURI == null) { - throw new IllegalStateException("Missing parameter: resourceURI"); + throw new IllegalStateException("Missing parameter: resource_uri"); } - String axiomTypeStrings[] = req.getParameterValues("axiomTypes"); + String axiomTypeStrings[] = req.getParameterValues("axiom_types"); if (axiomTypeStrings == null) { - throw new IllegalStateException("Missing parameter: axiomTypes"); + throw new IllegalStateException("Missing parameter: axiom_types"); } axiomTypeStrings = axiomTypeStrings[0].split(","); @@ -181,7 +207,7 @@ } SPARQLTasks st = new SPARQLTasks(endpoint); - String entityType = req.getParameter("entityType"); + String entityType = req.getParameter("entity_type"); final Entity entity; if(entityType != null){ if(oneOf(entityType, entityTypes)){ @@ -217,7 +243,7 @@ ks.setSupportsSPARQL_1_1(supportsSPARQL_1_1); final SPARQLReasoner reasoner = new SPARQLReasoner(new SparqlEndpointKS(endpoint)); - reasoner.setCache(new ExtractionDBCache("cache")); + reasoner.setCache(new ExtractionDBCache(cacheDir)); if (useInference && !reasoner.isPrepared()) { System.out.print("Precomputing subsumption hierarchy ... "); long startTime = System.currentTimeMillis(); @@ -248,7 +274,8 @@ axiomObject.put("confidence", ax.getScore().getAccuracy()); axiomArray.put(axiomObject); } - result.put(axiomType, axiomArray); + result.put("axiom_type", axiomType); + result.put("axioms", axiomArray); return result; } @@ -271,7 +298,7 @@ executor.shutdown(); - + resp.setContentType("application/json"); PrintWriter pw = resp.getWriter(); JSONObject finalResult = new JSONObject(); finalResult.put("result", result); @@ -282,7 +309,11 @@ finalResult.put("resource uri", resourceURI); finalResult.put("entity type", entityType); finalResult.put("omitted axiom types", omittedAxiomTypes); - pw.print(finalResult.toJSONString()); + String resultString = finalResult.toJSONString(); + if(req.getParameter("jsonp_callback") != null){ + resultString = req.getParameter("jsonp_callback") + "(" + resultString + ")"; + } + pw.print(resultString); pw.close(); } @@ -319,40 +350,124 @@ private List<EvaluatedAxiom> applyLearningAlgorithm(Class<? extends LearningAlgorithm> algorithmClass, SparqlEndpointKS ks, SPARQLReasoner reasoner, Entity entity, int maxExecutionTimeInSeconds, double threshold, int maxNrOfReturnedAxioms) throws ComponentInitException { - AxiomLearningAlgorithm learner = null; - try { - learner = (AxiomLearningAlgorithm) algorithmClass.getConstructor(SparqlEndpointKS.class).newInstance(ks); - } catch (Exception e) { - e.printStackTrace(); - } - if (classAlgorithms.contains(algorithmClass)) { - ConfigHelper.configure(learner, "classToDescribe", entity); + List<EvaluatedAxiom> learnedAxioms = null; + if(algorithmClass == CELOE.class){ + learnedAxioms = applyCELOE(ks, (NamedClass) entity, true, false, threshold); } else { - ConfigHelper.configure(learner, "propertyToDescribe", entity); - } - ConfigHelper.configure(learner, "maxExecutionTimeInSeconds", maxExecutionTimeInSeconds); - // if(reasoner != null){ - ((AbstractAxiomLearningAlgorithm) learner).setReasoner(reasoner); - // } - learner.init(); - String algName = AnnComponentManager.getName(learner); - System.out.print("Applying " + algName + " on " + entity + " ... "); - long startTime = System.currentTimeMillis(); - try { - learner.start(); - } catch (Exception e) { - if (e.getCause() instanceof SocketTimeoutException) { - System.out.println("Query timed out (endpoint possibly too slow)."); - } else { + AxiomLearningAlgorithm learner = null; + try { + + learner = (AxiomLearningAlgorithm) algorithmClass.getConstructor(SparqlEndpointKS.class).newInstance(ks); + } catch (Exception e) { e.printStackTrace(); } + if (classAlgorithms.contains(algorithmClass)) { + ConfigHelper.configure(learner, "classToDescribe", entity); + } else { + ConfigHelper.configure(learner, "propertyToDescribe", entity); + } + ConfigHelper.configure(learner, "maxExecutionTimeInSeconds", maxExecutionTimeInSeconds); + // if(reasoner != null){ + ((AbstractAxiomLearningAlgorithm) learner).setReasoner(reasoner); + // } + learner.init(); + String algName = AnnComponentManager.getName(learner); + System.out.print("Applying " + algName + " on " + entity + " ... "); + long startTime = System.currentTimeMillis(); + try { + learner.start(); + } catch (Exception e) { + if (e.getCause() instanceof SocketTimeoutException) { + System.out.println("Query timed out (endpoint possibly too slow)."); + } else { + e.printStackTrace(); + } + } + long runtime = System.currentTimeMillis() - startTime; + System.out.println("done in " + runtime + " ms"); + learnedAxioms = learner.getCurrentlyBestEvaluatedAxioms(maxNrOfReturnedAxioms, threshold); } - long runtime = System.currentTimeMillis() - startTime; - System.out.println("done in " + runtime + " ms"); - List<EvaluatedAxiom> learnedAxioms = learner.getCurrentlyBestEvaluatedAxioms(maxNrOfReturnedAxioms, threshold); + return learnedAxioms; } + + private List<EvaluatedAxiom> applyCELOE(SparqlEndpointKS ks, NamedClass nc, boolean equivalence, boolean reuseKnowledgeSource, double threshold) throws ComponentInitException { + // get instances of class as positive examples + SPARQLReasoner sr = new SPARQLReasoner(ks); + SortedSet<Individual> posExamples = sr.getIndividuals(nc, 20); + if(posExamples.isEmpty()){ + System.out.println("Skipping CELOE because class " + nc.toString() + " is empty."); + return Collections.emptyList(); + } + SortedSet<String> posExStr = Helper.getStringSet(posExamples); + + // use own implementation of negative example finder + long startTime = System.currentTimeMillis(); + System.out.print("finding negatives ... "); + AutomaticNegativeExampleFinderSPARQL2 finder = new AutomaticNegativeExampleFinderSPARQL2(ks.getEndpoint()); + SortedSet<String> negExStr = finder.getNegativeExamples(nc.getName(), posExStr); + negExStr = SetManipulation.fuzzyShrink(negExStr, 20); + SortedSet<Individual> negExamples = Helper.getIndividualSet(negExStr); + SortedSetTuple<Individual> examples = new SortedSetTuple<Individual>(posExamples, negExamples); + long runTime = System.currentTimeMillis() - startTime; + System.out.println("done (" + negExStr.size()+ " examples fround in " + runTime + " ms)"); + + SparqlKnowledgeSource ks2; + AbstractReasonerComponent rc; + ks2 = new SparqlKnowledgeSource(); + ks2.setInstances(Datastructures.individualSetToStringSet(examples.getCompleteSet())); + ks2.setUrl(ks.getEndpoint().getURL()); + ks2.setDefaultGraphURIs(new TreeSet<String>(ks.getEndpoint().getDefaultGraphURIs())); + ks2.setUseLits(false); + ks2.setUseCacheDatabase(true); + ks2.setCacheDir(cacheDir); + ks2.setRecursionDepth(2); + ks2.setCloseAfterRecursion(true); + ks2.setDissolveBlankNodes(false); + ks2.setSaveExtractedFragment(true); + startTime = System.currentTimeMillis(); + System.out.print("getting knowledge base fragment ... "); + ks2.init(); + runTime = System.currentTimeMillis() - startTime; + System.out.println("done in " + runTime + " ms"); + rc = new FastInstanceChecker(ks2); + rc.init(); + + ClassLearningProblem lp = new ClassLearningProblem(rc); + lp.setClassToDescribe(nc); + lp.setEquivalence(equivalence); + lp.setHeuristic(HeuristicType.FMEASURE); + lp.setUseApproximations(false); + lp.setMaxExecutionTimeInSeconds(10); + lp.init(); + + CELOE la = new CELOE(lp, rc); + la.setMaxExecutionTimeInSeconds(10); + la.setNoisePercentage(25); + la.init(); + startTime = System.currentTimeMillis(); + System.out.print("running CELOE (for " + (equivalence ? "equivalent classes" : "sub classes") + ") ... "); + la.start(); + runTime = System.currentTimeMillis() - startTime; + System.out.println("done in " + runTime + " ms"); + + // convert the result to axioms (to make it compatible with the other algorithms) + List<? extends EvaluatedDescription> learnedDescriptions = la.getCurrentlyBestEvaluatedDescriptions(threshold); + List<EvaluatedAxiom> learnedAxioms = new LinkedList<EvaluatedAxiom>(); + for(EvaluatedDescription learnedDescription : learnedDescriptions) { + Axiom axiom; + if(equivalence) { + axiom = new EquivalentClassesAxiom(nc, learnedDescription.getDescription()); + } else { + axiom = new SubClassAxiom(nc, learnedDescription.getDescription()); + } + Score score = lp.computeScore(learnedDescription.getDescription()); + learnedAxioms.add(new EvaluatedAxiom(axiom, score)); + } + return learnedAxioms; + } + private Entity getEntity(String resourceURI, String entityType, SparqlEndpoint endpoint) { Entity entity = null; if (entityType.equals("class")) { @@ -380,7 +495,7 @@ return entityType; } - public Collection<AxiomType> getAxiomTypes(String entityType){ + public static Collection<AxiomType> getAxiomTypes(String entityType){ List<AxiomType> types = new ArrayList<AxiomType>(); List<Class<? extends LearningAlgorithm>> algorithms = null; @@ -401,5 +516,15 @@ return types; } + public static void main(String[] args) { + String s = ""; + SortedSet<String> types = new TreeSet<String>(); + for(AxiomType t : getAxiomTypes("dataproperty")){ + s += "\"" + t.getName() + "\""; + s+= ", "; + } + System.out.println(s); + } + } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ki...@us...> - 2012-08-01 11:06:43
|
Revision: 3813 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3813&view=rev Author: kirdie Date: 2012-08-01 11:06:36 +0000 (Wed, 01 Aug 2012) Log Message: ----------- Junit test finished. generates file /log/SPARQLTemplateBasedLearner3Test.html. Modified Paths: -------------- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-07-31 16:14:04 UTC (rev 3812) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-08-01 11:06:36 UTC (rev 3813) @@ -1,5 +1,6 @@ package org.dllearner.algorithm.tbsl.learning; +import static org.junit.Assert.fail; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; @@ -12,7 +13,6 @@ import java.net.MalformedURLException; import java.text.DateFormat; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.Date; @@ -23,6 +23,7 @@ import java.util.Set; import java.util.SortedMap; import java.util.SortedSet; +import java.util.Stack; import java.util.TreeSet; import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentSkipListMap; @@ -36,6 +37,7 @@ import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; +import jjtraveler.Fail; import org.apache.log4j.FileAppender; import org.apache.log4j.Level; import org.apache.log4j.Logger; @@ -53,11 +55,9 @@ import org.dllearner.kb.sparql.ExtractionDBCache; import org.dllearner.kb.sparql.SparqlEndpoint; import org.dllearner.kb.sparql.SparqlQuery; -import org.eclipse.jdt.annotation.NonNull; import org.ini4j.Options; import org.junit.Before; import org.junit.Test; -import org.openjena.atlas.logging.Log; import org.w3c.dom.DOMException; import org.w3c.dom.Document; import org.w3c.dom.Element; @@ -89,28 +89,32 @@ private static final File evaluationFolder = new File("log/evaluation"); @Test public void testDBpedia() throws Exception - {test("QALD 2 Benchmark", new File(getClass().getClassLoader().getResource("tbsl/evaluation/qald2-dbpedia-train.xml").getFile()),"http://live.dbpedia.org/sparql");} + {test("QALD 2 Benchmark", new File(getClass().getClassLoader().getResource("tbsl/evaluation/qald2-dbpedia-train.xml").getFile()), + SparqlEndpoint.getEndpointDBpediaLiveAKSW(),dbpediaLiveCache);} //@Test public void testOxford() {test(new File(""),"");} - public void test(String title, final File referenceXML,final String endpoint) throws ParserConfigurationException, SAXException, IOException, TransformerException, ComponentInitException, NoTemplateFoundException + public void test(String title, final File referenceXML,final SparqlEndpoint endpoint,ExtractionDBCache cache) throws ParserConfigurationException, SAXException, IOException, TransformerException, ComponentInitException, NoTemplateFoundException { - String dir = "cache/"+getClass().getSimpleName()+"/"; - new File(dir).mkdirs(); - File updatedReferenceXML=new File(dir+"updated_"+referenceXML.getName()); - if(!updatedReferenceXML.exists()) - { - logger.info("Generating updated reference for "+title); - generateUpdatedXML(referenceXML,updatedReferenceXML,endpoint); - } +// String dir = "cache/"+getClass().getSimpleName()+"/"; +// +// new File(dir).mkdirs(); +// File updatedReferenceXML=new File(dir+"updated_"+referenceXML.getName()); +// if(!updatedReferenceXML.exists()) +// { +// logger.info("Generating updated reference for "+title); +// generateUpdatedXML(referenceXML,updatedReferenceXML,endpoint,cache); +// } +// +// QueryTestData referenceTestData = readQueries(updatedReferenceXML); +// logger.info(title+" subset loaded with "+referenceTestData.id2Question.size()+" questions."); +// +// QueryTestData learnedTestData = generateTestData(referenceTestData.id2Question, dbpediaLiveKnowledgebase).generateAnswers(endpoint,cache); +// Evaluation evaluation = evaluate(referenceTestData, learnedTestData); +// logger.info(evaluation); +// evaluation.write(); + generateHTML(); - QueryTestData referenceTestData = readQueries(updatedReferenceXML); - logger.info(title+" subset loaded with "+referenceTestData.id2Question.size()+" questions."); - - QueryTestData learnedTestData = generateTestData(referenceTestData.id2Question, endpoint).generateAnswers(endpoint); - Evaluation evaluation = evaluate(referenceTestData, learnedTestData); - logger.info(evaluation); - evaluation.write(); - generateHTML(); +// if(evaluation.numberOfCorrectAnswers<3) {fail("only " + evaluation.numberOfCorrectAnswers+" correct answers.");} /* { logger.info("Comparing updated reference test data with learned test data:"); Diff queryTestDataDiff = diffTestData(referenceTestData,learnedTestData); @@ -253,51 +257,31 @@ return evaluation; } catch (Exception e){throw new RuntimeException(e);} - } - } + } - /** - * @param savedTestData - * @param newTestData - * @return - */ - private static Diff diffTestData(QueryTestData reference, QueryTestData newData) - { - // if(d.id2Question.size()!=e.id2Question.size()) - // logger.info("comparing test data a against b. number of questions: "+reference.id2Question.size()+" vs "+newData.id2Question.size()); - // if(reference.id2Question.size()!=newData.id2Question.size()) - // { - // logger.info("questions a: "+reference.id2Question.keySet()); - // logger.info("questions b: "+newData.id2Question.keySet()); - // } - Diff diff = new Diff(); - diff.aMinusB.addAll(reference.id2Question.keySet()); - diff.aMinusB.removeAll(newData.id2Question.keySet()); - - diff.bMinusA.addAll(newData.id2Question.keySet()); - diff.bMinusA.removeAll(reference.id2Question.keySet()); - - diff.intersection.addAll(reference.id2Question.keySet()); - diff.intersection.retainAll(newData.id2Question.keySet()); - - for(int i: diff.intersection) + @Override public boolean equals(Object obj) { - // the questions are the same - we don't care about the answer - if(reference.id2Question.get(i).equals(newData.id2Question.get(i))) - - if(reference.id2Answers.containsKey(i)&&!reference.id2Answers.get(i).equals(newData.id2Answers.get(i))) - { - // logger.info("different answers:"); - // logger.info("a: "+reference.id2Answers.get(i)); - // logger.info("b: "+newData.id2Answers.get(i)); - diff.differentAnswers.add(i); - } + if (this == obj) return true; + if (obj == null) return false; + if (getClass() != obj.getClass()) return false; + Evaluation other = (Evaluation) obj; + if (correctlyAnsweredQuestions == null) + { + if (other.correctlyAnsweredQuestions != null) return false; + } + else if (!correctlyAnsweredQuestions.equals(other.correctlyAnsweredQuestions)) return false; + if (incorrectlyAnsweredQuestions == null) + { + if (other.incorrectlyAnsweredQuestions != null) return false; + } + else if (!incorrectlyAnsweredQuestions.equals(other.incorrectlyAnsweredQuestions)) return false; + if (unansweredQuestions == null) + { + if (other.unansweredQuestions != null) return false; + } + else if (!unansweredQuestions.equals(other.unansweredQuestions)) return false; + return true; } - // if(!eMinusD.isEmpty()) logger.info("questions E/D: "+eMinusD+" ("+eMinusD.size()+" elements)"); - - - // TODO Auto-generated method stub - return diff; } public static class Diff @@ -307,6 +291,36 @@ final Set<Integer> intersection = new HashSet<Integer>(); final Set<Integer> differentAnswers = new HashSet<Integer>(); + public Diff(QueryTestData reference, QueryTestData newData) + { + // if(d.id2Question.size()!=e.id2Question.size()) + // logger.info("comparing test data a against b. number of questions: "+reference.id2Question.size()+" vs "+newData.id2Question.size()); + // if(reference.id2Question.size()!=newData.id2Question.size()) + // { + // logger.info("questions a: "+reference.id2Question.keySet()); + // logger.info("questions b: "+newData.id2Question.keySet()); + // } + aMinusB.addAll(reference.id2Question.keySet()); + aMinusB.removeAll(newData.id2Question.keySet()); + + bMinusA.addAll(newData.id2Question.keySet()); + bMinusA.removeAll(reference.id2Question.keySet()); + + intersection.addAll(reference.id2Question.keySet()); + intersection.retainAll(newData.id2Question.keySet()); + + for(int i: intersection) + { + // the questions are the same - we don't care about the answer + if(reference.id2Question.get(i).equals(newData.id2Question.get(i))) + + if(reference.id2Answers.containsKey(i)&&!reference.id2Answers.get(i).equals(newData.id2Answers.get(i))) + { + differentAnswers.add(i); + } + } + } + @Override public String toString() { StringBuilder sb = new StringBuilder(); @@ -322,7 +336,7 @@ /** * @return the test data containing those of the given questions for which queries were found and the results of the queries */ - private QueryTestData generateTestData(SortedMap<Integer, String> id2Question,String endpoint) throws MalformedURLException, ComponentInitException + private QueryTestData generateTestData(SortedMap<Integer, String> id2Question,Knowledgebase kb) throws MalformedURLException, ComponentInitException { QueryTestData testData = new QueryTestData(); // -- only create the learner parameters once to save time -- @@ -336,7 +350,7 @@ ExecutorService service = Executors.newFixedThreadPool(10); for(int i: id2Question.keySet()) - {todo.add(Executors.callable(new LearnQueryRunnable(id2Question.get(i),i,endpoint, testData)));} + {todo.add(Executors.callable(new LearnQueryRunnable(id2Question.get(i),i, testData,kb)));} try{service.invokeAll(todo);} catch (InterruptedException e) {throw new RuntimeException(e);} // logger.debug("generating query for question \""+question+"\", id "+i); @@ -375,7 +389,7 @@ * @throws SAXException * @throws TransformerException */ - private void generateUpdatedXML(File originalFile, File updatedFile,String endpoint) throws ParserConfigurationException, SAXException, IOException, TransformerException + private void generateUpdatedXML(File originalFile, File updatedFile,SparqlEndpoint endpoint, ExtractionDBCache cache) throws ParserConfigurationException, SAXException, IOException, TransformerException { logger.info(String.format("Updating question file \"%s\" by removing questions without nonempty resource list answer and adding answers.\n" + " Saving the result to file \"%s\"",originalFile.getPath(),updatedFile.getPath())); @@ -412,7 +426,7 @@ if(!query.equals("OUT OF SCOPE")) // marker in qald benchmark file, will create holes interval of ids (e.g. 1,2,5,7) { - Set<String> uris = getUris(endpoint, query); + Set<String> uris = getUris(endpoint, query,cache); if(!uris.isEmpty()) { // remove reference answers of the benchmark because they are obtained from an other endpoint @@ -478,7 +492,7 @@ static final SparqlEndpoint dbpediaLiveEndpoint = SparqlEndpoint.getEndpointDBpediaLiveAKSW(); //static SparqlEndpoint oxfordEndpoint; - private ResultSet executeDBpediaLiveSelect(String query){return SparqlQuery.convertJSONtoResultSet(dbpediaLiveCache.executeSelectQuery(dbpediaLiveEndpoint, query));} + // private ResultSet executeDBpediaLiveSelect(String query){return SparqlQuery.convertJSONtoResultSet(dbpediaLiveCache.executeSelectQuery(dbpediaLiveEndpoint, query));} private static Knowledgebase createDBpediaLiveKnowledgebase(ExtractionDBCache cache) @@ -545,10 +559,10 @@ catch (ClassNotFoundException e){throw new RuntimeException(e);} } - public QueryTestData generateAnswers(String endpoint) + public QueryTestData generateAnswers(SparqlEndpoint endpoint, ExtractionDBCache cache) { if(!id2Answers.isEmpty()) {throw new AssertionError("Answers already existing.");} - for(int i:id2Query.keySet()) {id2Answers.put(i, getUris(endpoint, id2Query.get(i)));} + for(int i:id2Query.keySet()) {id2Answers.put(i, getUris(endpoint, id2Query.get(i),cache));} return this; } } @@ -571,8 +585,7 @@ Set<String> answers = new HashSet<String>(); Element questionNode = (Element) questionNodes.item(i); //read question ID - id = Integer.valueOf(questionNode.getAttribute("id")); - if(id>5) continue; // TODO: remove + id = Integer.valueOf(questionNode.getAttribute("id")); //Read question question = ((Element)questionNode.getElementsByTagName("string").item(0)).getChildNodes().item(0).getNodeValue().trim(); //Read SPARQL query @@ -630,15 +643,16 @@ return testData; } - protected static Set<String> getUris(final String endpoint, final String query) + protected static Set<String> getUris(final SparqlEndpoint endpoint, final String query, ExtractionDBCache cache) { if(query==null) {throw new AssertionError("query is null");} if(endpoint==null) {throw new AssertionError("endpoint is null");} if(!query.contains("SELECT")&&!query.contains("select")) {return Collections.<String>emptySet();} // abort when not a select query Set<String> uris = new HashSet<String>(); - QueryEngineHTTP qe = new QueryEngineHTTP(DBPEDIA_LIVE_ENDPOINT_URL_STRING, query); + // QueryEngineHTTP qe = new QueryEngineHTTP(DBPEDIA_LIVE_ENDPOINT_URL_STRING, query); ResultSet rs; - try{rs = qe.execSelect();} + // try{rs = qe.execSelect();} + try{rs = executeSelect(endpoint, query, cache);} catch(QueryExceptionHTTP e) { logger.error("Error getting uris for query "+query+" at endpoint "+endpoint,e); @@ -713,19 +727,21 @@ private static class LearnQueryRunnable implements Runnable { private final String question; - private final String endpoint; + // private final String endpoint; private final int id; private final QueryTestData testData; + private final Knowledgebase knowledgeBase; static private final PartOfSpeechTagger posTagger = new SynchronizedStanfordPartOfSpeechTagger(); static private final WordNet wordnet = new WordNet(); static private final Options options = new Options(); - public LearnQueryRunnable(String question, int id,String endpoint, QueryTestData testData) + + public LearnQueryRunnable(String question, int id, QueryTestData testData, Knowledgebase knowledgeBase) { this.question=question; this.id=id; - this.endpoint=endpoint; + this.knowledgeBase=knowledgeBase; this.testData=testData; } @@ -735,12 +751,13 @@ try { // learn query - SPARQLTemplateBasedLearner2 dbpediaLiveLearner = new SPARQLTemplateBasedLearner2(createDBpediaLiveKnowledgebase(dbpediaLiveCache),posTagger,wordnet,options); + // TODO: change to knowledgebase parameter + SPARQLTemplateBasedLearner2 learner = new SPARQLTemplateBasedLearner2(createDBpediaLiveKnowledgebase(dbpediaLiveCache),posTagger,wordnet,options); // SPARQLTemplateBasedLearner2 dbpediaLiveLearner = new SPARQLTemplateBasedLearner2(createDBpediaLiveKnowledgebase(dbpediaLiveCache)); - dbpediaLiveLearner.init(); - dbpediaLiveLearner.setQuestion(question); - dbpediaLiveLearner.learnSPARQLQueries(); - String learnedQuery = dbpediaLiveLearner.getBestSPARQLQuery(); + learner.init(); + learner.setQuestion(question); + learner.learnSPARQLQueries(); + String learnedQuery = learner.getBestSPARQLQuery(); if(learnedQuery!=null&&!learnedQuery.isEmpty()) { testData.id2Question.put(id, question); @@ -762,6 +779,24 @@ } } + public static String diffHTML(String title, Set<String> from, Set<String> to) + { + StringBuilder sb = new StringBuilder(); + sb.append("<h3>"+title+"</h3>"); + { + Set<String> addedStrings = new HashSet<String>(to); + addedStrings.removeAll(from); + + for(String added: addedStrings) {sb.append("<span style='color:green'>"+added+"</span></br>\n");} + } + sb.append('\n'); + { + Set<String> removedStrings = new HashSet<String>(from); + removedStrings.removeAll(to); + for(String removed: removedStrings) {sb.append("<span style='color:red'>"+removed+"</span></br>\n");} + } + return sb.toString(); + } /** Generates the HTML string content for one of the 3 colored bars which represent the correctly, incorrectly and unanswered question. * Also creates and links to a file which contains the questions.*/ @@ -783,34 +818,64 @@ return sb.toString(); } + static String createChangeHTML(File link, Evaluation from, Evaluation to) + { + try + { + PrintWriter out = new PrintWriter(link); + out.println("<html>\n<body>\n"); + out.println(diffHTML("Correctly Answered Questions", from.correctlyAnsweredQuestions, to.correctlyAnsweredQuestions)); + out.println(diffHTML("Incorrectly Answered Questions", from.incorrectlyAnsweredQuestions, to.incorrectlyAnsweredQuestions)); + out.println(diffHTML("Unanswered Questions", from.unansweredQuestions, to.unansweredQuestions)); + out.println("</body>\n</html>"); + out.close(); + } + catch (Exception e){throw new RuntimeException(e);} + + return "<a href='"+link.getAbsolutePath()+"'>change</a>"; + } + static void generateHTML() { StringBuilder sb = new StringBuilder(); - sb.append("<html><body><table style='width:100%'>"); + sb.append("<html>\n<body>\n<table style='width:100%'>\n"); SortedMap<Long,Evaluation> evaluations = Evaluation.read(); - SortedSet<Long> timestampsDescending = new TreeSet<Long>(Collections.reverseOrder()); - timestampsDescending.addAll(evaluations.keySet()); - for(long timestamp: timestampsDescending) +// SortedSet<Long> timestampsDescending = new TreeSet<Long>(Collections.reverseOrder()); +// timestampsDescending.addAll(evaluations.keySet()); + Evaluation last = null; + + Stack<String> stack = new Stack<String>(); // show reverse chronological order (we can't iterate in reverse order because of the diffs of the evaluations) + + for(long timestamp: evaluations.keySet()) { + StringBuilder sb2 = new StringBuilder(); try { File folder = new File("log/"+SPARQLTemplateBasedLearner3Test.class.getSimpleName()+"/"+timestamp); folder.mkdirs(); Evaluation e = evaluations.get(timestamp); - sb.append("<tr><td style='white-space: nowrap'>"); + sb2.append("<tr><td style='white-space: nowrap'>"); Date date = new Date(timestamp); - sb.append(DateFormat.getInstance().format(date)); - sb.append("</td><td width='100%'>"); - sb.append("<div style='width:100%;height:1em;border:solid 1px;'>"); - sb.append(createColoredColumn(new File(folder,"correctly_answered.txt"), "Correctly Answered Questions", "green", e.correctlyAnsweredQuestions, e.numberOfQuestions)); - sb.append(createColoredColumn(new File(folder,"incorrectly_answered.txt"), "Incorrectly Answered Questions", "orange", e.incorrectlyAnsweredQuestions, e.numberOfQuestions)); - sb.append(createColoredColumn(new File(folder,"unanswered.txt"), "Unanswered Questions", "red", e.unansweredQuestions, e.numberOfQuestions)); - sb.append("<span style='width:1000px;'></span>"); - sb.append("</td></tr>"); + sb2.append(DateFormat.getInstance().format(date)); + sb2.append("</td><td style='white-space: nowrap'>"); + if(last!=null) + { + if(last.equals(e)) {/*sb2.append("no change");*/} + else {sb2.append(createChangeHTML(new File(folder,"change.html"),last,e));} + } + sb2.append("</td><td width='100%'>"); + sb2.append("<div style='width:100%;height:1em;border:solid 1px;'>"); + sb2.append(createColoredColumn(new File(folder,"correctly_answered.txt"), "Correctly Answered Questions", "green", e.correctlyAnsweredQuestions, e.numberOfQuestions)); + sb2.append(createColoredColumn(new File(folder,"incorrectly_answered.txt"), "Incorrectly Answered Questions", "orange", e.incorrectlyAnsweredQuestions, e.numberOfQuestions)); + sb2.append(createColoredColumn(new File(folder,"unanswered.txt"), "Unanswered Questions", "red", e.unansweredQuestions, e.numberOfQuestions)); + sb2.append("<span style='width:1000px;'></span>"); + sb2.append("</td></tr>\n"); + last = e; + stack.push(sb2.toString()); } catch(Exception e) {logger.warn("error with evaluation from timestamp "+timestamp,e);} } - - sb.append("</table></body></html>"); + while(!stack.isEmpty()) {sb.append(stack.pop());} + sb.append("</table>\n</body>\n</html>"); try { PrintWriter out = new PrintWriter("log/"+SPARQLTemplateBasedLearner3Test.class.getSimpleName()+".html"); @@ -900,4 +965,6 @@ // // //fail("Not yet implemented"); // } + + private static ResultSet executeSelect(SparqlEndpoint endpoint, String query, ExtractionDBCache cache){return SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query));} } \ No newline at end of file This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ki...@us...> - 2012-07-31 16:14:16
|
Revision: 3812 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3812&view=rev Author: kirdie Date: 2012-07-31 16:14:04 +0000 (Tue, 31 Jul 2012) Log Message: ----------- SPARQLTemplateBasedLearner3Test now also saves an evaluation history and creates an html file that displays it. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/SynchronizedStanfordPartOfSpeechTagger.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/SynchronizedStanfordPartOfSpeechTagger.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/SynchronizedStanfordPartOfSpeechTagger.java 2012-07-31 10:36:11 UTC (rev 3811) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/SynchronizedStanfordPartOfSpeechTagger.java 2012-07-31 16:14:04 UTC (rev 3812) @@ -1,10 +1,6 @@ package org.dllearner.algorithm.tbsl.nlp; -public class SynchronizedStanfordPartOfSpeechTagger extends StanfordPartOfSpeechTagger { - - @Override - public synchronized String tag(String sentence) { - return super.tag(sentence); - } - -} +public class SynchronizedStanfordPartOfSpeechTagger extends StanfordPartOfSpeechTagger +{ + @Override public synchronized String tag(String sentence) {return super.tag(sentence);} +} \ No newline at end of file Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-07-31 10:36:11 UTC (rev 3811) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-07-31 16:14:04 UTC (rev 3812) @@ -7,16 +7,27 @@ import java.io.IOException; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; +import java.io.PrintWriter; import java.io.Serializable; import java.net.MalformedURLException; +import java.text.DateFormat; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; import java.util.Collections; +import java.util.Date; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Set; import java.util.SortedMap; -import java.util.TreeMap; +import java.util.SortedSet; +import java.util.TreeSet; +import java.util.concurrent.Callable; +import java.util.concurrent.ConcurrentSkipListMap; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; @@ -25,14 +36,13 @@ import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; -import net.sf.oval.constraint.AssertTrue; import org.apache.log4j.FileAppender; import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.apache.log4j.SimpleLayout; import org.dllearner.algorithm.tbsl.ltag.parser.Parser; import org.dllearner.algorithm.tbsl.nlp.PartOfSpeechTagger; -import org.dllearner.algorithm.tbsl.nlp.StanfordPartOfSpeechTagger; +import org.dllearner.algorithm.tbsl.nlp.SynchronizedStanfordPartOfSpeechTagger; import org.dllearner.algorithm.tbsl.nlp.WordNet; import org.dllearner.algorithm.tbsl.templator.Templator; import org.dllearner.algorithm.tbsl.util.Knowledgebase; @@ -43,10 +53,11 @@ import org.dllearner.kb.sparql.ExtractionDBCache; import org.dllearner.kb.sparql.SparqlEndpoint; import org.dllearner.kb.sparql.SparqlQuery; +import org.eclipse.jdt.annotation.NonNull; import org.ini4j.Options; import org.junit.Before; import org.junit.Test; -import static org.junit.Assert.*; +import org.openjena.atlas.logging.Log; import org.w3c.dom.DOMException; import org.w3c.dom.Document; import org.w3c.dom.Element; @@ -56,6 +67,7 @@ import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.rdf.model.RDFNode; import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP; +import com.hp.hpl.jena.sparql.engine.http.QueryExceptionHTTP; /** Tests TSBL against the qald2 benchmark test data with the DBpedia endpoint. * The qald2 endpoint is not used because it may not always be available. @@ -74,42 +86,174 @@ * **/ public class SPARQLTemplateBasedLearner3Test { + private static final File evaluationFolder = new File("log/evaluation"); + @Test public void testDBpedia() throws Exception - {test(new File(getClass().getClassLoader().getResource("tbsl/evaluation/qald2-dbpedia-train.xml").getFile()),"http://live.dbpedia.org/sparql");} + {test("QALD 2 Benchmark", new File(getClass().getClassLoader().getResource("tbsl/evaluation/qald2-dbpedia-train.xml").getFile()),"http://live.dbpedia.org/sparql");} //@Test public void testOxford() {test(new File(""),"");} - public void test(final File referenceXML,final String endpoint) throws ParserConfigurationException, SAXException, IOException, TransformerException, ComponentInitException, NoTemplateFoundException + public void test(String title, final File referenceXML,final String endpoint) throws ParserConfigurationException, SAXException, IOException, TransformerException, ComponentInitException, NoTemplateFoundException { String dir = "cache/"+getClass().getSimpleName()+"/"; new File(dir).mkdirs(); File updatedReferenceXML=new File(dir+"updated_"+referenceXML.getName()); - if(!updatedReferenceXML.exists()) {generateUpdatedXML(referenceXML,updatedReferenceXML,endpoint);} + if(!updatedReferenceXML.exists()) + { + logger.info("Generating updated reference for "+title); + generateUpdatedXML(referenceXML,updatedReferenceXML,endpoint); + } - logger.debug("Reading updated reference test data"); QueryTestData referenceTestData = readQueries(updatedReferenceXML); - QueryTestData learnedTestData = generateTestData(referenceTestData.id2Question); + logger.info(title+" subset loaded with "+referenceTestData.id2Question.size()+" questions."); - logger.info("Comparing updated reference test data a with learned test data b:"); - Diff queryTestDataDiff = diffTestData(referenceTestData,learnedTestData); - logger.info(queryTestDataDiff); + QueryTestData learnedTestData = generateTestData(referenceTestData.id2Question, endpoint).generateAnswers(endpoint); + Evaluation evaluation = evaluate(referenceTestData, learnedTestData); + logger.info(evaluation); + evaluation.write(); + generateHTML(); + /* { + logger.info("Comparing updated reference test data with learned test data:"); + Diff queryTestDataDiff = diffTestData(referenceTestData,learnedTestData); + logger.info(queryTestDataDiff); + } + logger.info("Comparing learned test data with old learned test data"); - logger.info("Comparing learned test data with old learned test data"); + try{ + QueryTestData oldLearnedTestData = QueryTestData.read(); + Diff queryTestDataDiff2 = diffTestData(oldLearnedTestData,learnedTestData); + logger.info(queryTestDataDiff2); + // assertFalse("the following queries did not return an answer in the current learned test data: "+queryTestDataDiff2.aMinusB, + // queryTestDataDiff2.aMinusB.isEmpty()); + assertTrue("the following queries had different answers: "+queryTestDataDiff2.differentAnswers, + queryTestDataDiff2.differentAnswers.isEmpty()); - try{ - QueryTestData oldLearnedTestData = QueryTestData.read(); - Diff queryTestDataDiff2 = diffTestData(oldLearnedTestData,learnedTestData); - logger.info(queryTestDataDiff); -// assertFalse("the following queries did not return an answer in the current learned test data: "+queryTestDataDiff2.aMinusB, -// queryTestDataDiff2.aMinusB.isEmpty()); - assertFalse("the following queries had different answers: "+queryTestDataDiff2.differentAnswers, - queryTestDataDiff2.differentAnswers.isEmpty()); - + } + catch(IOException e) + { + logger.info("Old test data not loadable, creating it and exiting."); + } + learnedTestData.write();*/ + } + + /** evaluates a data set against a reference. + * @param reference the test data assumed to be correct. needs to contain the answers for all queries. + * @param suspect the test data to compare with the reference. + * if a query for a question does not match and the answers are not provided or don't match as well then the question is marked as incorrectly answered.*/ + private static Evaluation evaluate(QueryTestData reference, QueryTestData suspect) + { + // Diff d = diffTestData(reference,testData); + Evaluation evaluation = new Evaluation(); + evaluation.numberOfQuestions = reference.id2Question.keySet().size(); + + for(int i: reference.id2Question.keySet()) + { + String question = reference.id2Question.get(i); + if(!suspect.id2Query.containsKey(i)) + { + evaluation.unansweredQuestions.add(question); + continue; + } + evaluation.numberOfAnsweredQuestions++; + + String referenceQuery = reference.id2Query.get(i); + String suspectQuery = suspect.id2Query.get(i); + // reference is required to contain answers for every key so we shouldn't get NPEs here (even though it could be the empty set but that shouldn't happen because only questions with nonempty answers are included in the updated reference) + if(referenceQuery.equals(suspectQuery)||reference.id2Answers.get(i).equals(suspect.id2Answers.get(i))) + { + evaluation.correctlyAnsweredQuestions.add(question); + evaluation.numberOfCorrectAnswers++; + } + else + { + evaluation.incorrectlyAnsweredQuestions.add(question); + logger.debug("learned queries differing: "+referenceQuery+"\n"+suspectQuery); + logger.debug("learned answers differing: "+reference.id2Answers.get(i)+"\n"+suspect.id2Answers.get(i)); + } } - catch(IOException e) + return evaluation; + } + + static class Evaluation implements Serializable + { + private static final long serialVersionUID = 1L; + int numberOfQuestions = 0; + int numberOfAnsweredQuestions = 0; + int numberOfCorrectAnswers = 0; + double precision = 0; + double recall = 0; + final Set<String> unansweredQuestions = new HashSet<String>(); + final Set<String> incorrectlyAnsweredQuestions = new HashSet<String>(); + final Set<String> correctlyAnsweredQuestions = new HashSet<String>(); + + void computePrecisionAndRecall() // we have at maximum one answer set per question { - logger.info("Old test data not loadable, creating it and exiting."); - learnedTestData.write(); + precision = numberOfCorrectAnswers / numberOfAnsweredQuestions; + recall = numberOfCorrectAnswers / numberOfQuestions; } + + @Override public String toString() + { + StringBuffer sb = new StringBuffer(); + sb.append(numberOfAnsweredQuestions+" of "+numberOfQuestions+" questions answered, "); + sb.append(numberOfCorrectAnswers+" correct answers."); + sb.append("precision: "+precision+", recall: "+recall+"\n"); + sb.append("Detailed List: "); + sb.append(toHTML()); + return sb.toString(); + } + + public String toHTML() + { + StringBuffer sb = new StringBuffer(); + sb.append(htmlDetailsList("Unanswered Questions",unansweredQuestions)); + sb.append(htmlDetailsList("Wrongly Answered Questions",incorrectlyAnsweredQuestions)); + sb.append(htmlDetailsList("Correctly Answered Questions",correctlyAnsweredQuestions)); + return sb.toString(); + } + + public static String htmlDetailsList(/*@NonNull*/ String summary,/*@NonNull*/ Collection<String> elements) + { + if(elements.isEmpty()) {return "<p>"+summary+": none</p>";} + + StringBuffer sb = new StringBuffer(); + sb.append("<p><details>\n<summary>"+summary+"</summary>\n<ul>"); + for(String element: elements) + sb.append("<li>"+element+"</li>"); + sb.append("</ul>\n</details></p>"); + return sb.toString(); + } + + public synchronized void write() + { + evaluationFolder.mkdirs(); + try + { + ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(new File(evaluationFolder,String.valueOf(System.currentTimeMillis())))); + oos.writeObject(this); + oos.close(); + } catch(IOException e) {throw new RuntimeException(e);} + } + + public static SortedMap<Long,Evaluation> read() + { + SortedMap<Long,Evaluation> evaluations = new ConcurrentSkipListMap<Long,Evaluation>(); + evaluationFolder.mkdirs(); + File[] files = evaluationFolder.listFiles(); + for(int i=0;i<files.length;i++) {evaluations.put(Long.valueOf(files[i].getName()),read(files[i]));} + return evaluations; + } + + private static Evaluation read(File file) + { + try + { + ObjectInputStream ois = new ObjectInputStream(new FileInputStream(file)); + Evaluation evaluation = (Evaluation) ois.readObject(); + ois.close(); + return evaluation; + } + catch (Exception e){throw new RuntimeException(e);} + } } /** @@ -117,23 +261,37 @@ * @param newTestData * @return */ - private static Diff diffTestData(QueryTestData a, QueryTestData b) + private static Diff diffTestData(QueryTestData reference, QueryTestData newData) { // if(d.id2Question.size()!=e.id2Question.size()) - {logger.info("comparing test data a against b. number of questions: "+a.id2Question.size()+" vs "+b.id2Question.size());} + // logger.info("comparing test data a against b. number of questions: "+reference.id2Question.size()+" vs "+newData.id2Question.size()); + // if(reference.id2Question.size()!=newData.id2Question.size()) + // { + // logger.info("questions a: "+reference.id2Question.keySet()); + // logger.info("questions b: "+newData.id2Question.keySet()); + // } Diff diff = new Diff(); - diff.aMinusB.addAll(a.id2Question.keySet()); - diff.aMinusB.removeAll(b.id2Question.keySet()); + diff.aMinusB.addAll(reference.id2Question.keySet()); + diff.aMinusB.removeAll(newData.id2Question.keySet()); - diff.bMinusA.addAll(b.id2Question.keySet()); - diff.bMinusA.removeAll(a.id2Question.keySet()); + diff.bMinusA.addAll(newData.id2Question.keySet()); + diff.bMinusA.removeAll(reference.id2Question.keySet()); - diff.intersection.addAll(a.id2Question.keySet()); - diff.intersection.retainAll(b.id2Question.keySet()); + diff.intersection.addAll(reference.id2Question.keySet()); + diff.intersection.retainAll(newData.id2Question.keySet()); for(int i: diff.intersection) { - if(a.id2Answers.containsKey(i)&&!a.id2Answers.get(i).equals(b.id2Answers.get(i))) {diff.differentAnswers.add(i);} + // the questions are the same - we don't care about the answer + if(reference.id2Question.get(i).equals(newData.id2Question.get(i))) + + if(reference.id2Answers.containsKey(i)&&!reference.id2Answers.get(i).equals(newData.id2Answers.get(i))) + { + // logger.info("different answers:"); + // logger.info("a: "+reference.id2Answers.get(i)); + // logger.info("b: "+newData.id2Answers.get(i)); + diff.differentAnswers.add(i); + } } // if(!eMinusD.isEmpty()) logger.info("questions E/D: "+eMinusD+" ("+eMinusD.size()+" elements)"); @@ -155,49 +313,57 @@ if(!aMinusB.isEmpty()) sb.append("questions a/b: "+aMinusB+" ("+aMinusB.size()+" elements)\n"); if(!bMinusA.isEmpty()) sb.append("questions b/a: "+bMinusA+" ("+bMinusA.size()+" elements)\n"); if(!intersection.isEmpty()) sb.append("questions intersection: "+intersection+" ("+intersection.size()+" elements)\n"); - if(!differentAnswers.isEmpty()) sb.append("questions with different answers: "+differentAnswers+" ("+differentAnswers.size()+" elements)\n"); - return sb.substring(0, sb.length()-2); // remove last \n + if(!differentAnswers.isEmpty()) {sb.append("questions with different answers: "+differentAnswers+" ("+differentAnswers.size()+" elements)\n");} + else {sb.append("all answers are equal\n");} + return sb.substring(0, sb.length()-1); // remove last \n } } /** * @return the test data containing those of the given questions for which queries were found and the results of the queries */ - private QueryTestData generateTestData(SortedMap<Integer, String> id2Question) throws MalformedURLException, ComponentInitException + private QueryTestData generateTestData(SortedMap<Integer, String> id2Question,String endpoint) throws MalformedURLException, ComponentInitException { QueryTestData testData = new QueryTestData(); // -- only create the learner parameters once to save time -- - PartOfSpeechTagger posTagger = new StanfordPartOfSpeechTagger(); - WordNet wordnet = new WordNet(); - Options options = new Options(); + // PartOfSpeechTagger posTagger = new StanfordPartOfSpeechTagger(); + // WordNet wordnet = new WordNet(); + // Options options = new Options(); // ---------------------------------------------------------- - int successes = 0; - for(int i:id2Question.keySet()) - { - String question = id2Question.get(i); - logger.debug("generating query for question \""+question+"\", id "+i); - long start = System.currentTimeMillis(); - SPARQLTemplateBasedLearner2 dbpediaLiveLearner = new SPARQLTemplateBasedLearner2(dbpediaLiveKnowledgebase,posTagger,wordnet,options); + // int successes = 0; - dbpediaLiveLearner.init(); - dbpediaLiveLearner.setQuestion(question); + List<Callable<Object>> todo = new ArrayList<Callable<Object>>(id2Question.size()); + ExecutorService service = Executors.newFixedThreadPool(10); - try{dbpediaLiveLearner.learnSPARQLQueries();} - catch(NoTemplateFoundException e) {continue;} - catch(Exception e) {logger.error("Error processing question "+question,e);continue;} - successes++; - testData.id2Question.put(i, question); - String learnedQuery = dbpediaLiveLearner.getBestSPARQLQuery(); - testData.id2Query.put(i, learnedQuery); - // generate answers - // getUris(endpoint, learnedQuery); + for(int i: id2Question.keySet()) + {todo.add(Executors.callable(new LearnQueryRunnable(id2Question.get(i),i,endpoint, testData)));} - long end = System.currentTimeMillis(); - logger.debug(String.format("Generated query \"%s\" after %d ms", learnedQuery,end-start)); + try{service.invokeAll(todo);} catch (InterruptedException e) {throw new RuntimeException(e);} + // logger.debug("generating query for question \""+question+"\", id "+i); + // long start = System.currentTimeMillis(); + // SPARQLTemplateBasedLearner2 dbpediaLiveLearner = new SPARQLTemplateBasedLearner2(dbpediaLiveKnowledgebase,posTagger,wordnet,options); + // // dbpediaLiveLearner.setUseIdealTagger(true); // TODO: use this or not? + // dbpediaLiveLearner.init(); + // dbpediaLiveLearner.setQuestion(question); + // + // try{dbpediaLiveLearner.learnSPARQLQueries();} + // catch(NoTemplateFoundException e) {continue;} + // catch(NullPointerException e) {continue;} + //catch(Exception e) {logger.error("Error processing question """+question,e);continue;} + // successes++; + // String learnedQuery = dbpediaLiveLearner.getBestSPARQLQuery(); + // if(learnedQuery==null) {continue;} + // + // testData.id2Question.put(i, question); + // testData.id2Query.put(i, learnedQuery); + // try {testData.id2Answers.put(i,getUris(endpoint, learnedQuery));} + // catch(Exception e) {logger.warn("Error with learned query "+learnedQuery+" for question "+question+" at endpoint "+endpoint+": "+e.getLocalizedMessage());} - } - logger.info(String.format("Successfully learned queries for %d of %d questions.",successes,id2Question.size())); - // TODO Auto-generated method stub + long end = System.currentTimeMillis(); + // logger.debug(String.format("Generated query \"%s\" after %d ms", learnedQuery,end-start)); + + + // logger.info(String.format("Learned queries for %d of %d questions.",successes,id2Question.size())); return testData; } @@ -305,7 +471,7 @@ // private SPARQLTemplateBasedLearner2 dbpediaLiveLearner; private final ExtractionDBCache oxfordCache = new ExtractionDBCache("cache"); - private final ExtractionDBCache dbpediaLiveCache = new ExtractionDBCache("cache"); + private final static ExtractionDBCache dbpediaLiveCache = new ExtractionDBCache("cache"); private final Knowledgebase dbpediaLiveKnowledgebase = createDBpediaLiveKnowledgebase(dbpediaLiveCache); @@ -313,41 +479,9 @@ //static SparqlEndpoint oxfordEndpoint; private ResultSet executeDBpediaLiveSelect(String query){return SparqlQuery.convertJSONtoResultSet(dbpediaLiveCache.executeSelectQuery(dbpediaLiveEndpoint, query));} - // private ResultSet executeOxfordSelect(String query){return SparqlQuery.convertJSONtoResultSet(oxfordCache.executeSelectQuery(oxfordEndpoint, query));} - // @Test public void benchmarkCreateOxfordKnowledgeBase() - // { - // long start = System.currentTimeMillis(); - // for(int i=0;i<1000;i++) - // { - // createOxfordKnowledgebase(oxfordCache); - // } - // long end = System.currentTimeMillis(); - // long diff = end-start; - // System.out.println(diff+" millis as a whole, "+diff/1000.0+" millis per run"); - // } - // private Knowledgebase createOxfordKnowledgebase(ExtractionDBCache cache) - // { - // URL url; - // try{url = new URL("http://lgd.aksw.org:8900/sparql");} catch(Exception e) {throw new RuntimeException(e);} - // SparqlEndpoint endpoint = new SparqlEndpoint(url, Collections.singletonList("http://diadem.cs.ox.ac.uk"), Collections.<String>emptyList()); - // - // SPARQLIndex resourcesIndex = new VirtuosoResourcesIndex(endpoint, cache); - // SPARQLIndex classesIndex = new VirtuosoClassesIndex(endpoint, cache); - // SPARQLIndex propertiesIndex = new VirtuosoPropertiesIndex(endpoint, cache); - // MappingBasedIndex mappingIndex= new MappingBasedIndex( - // SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_class_mappings.txt").getPath(), - // SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_resource_mappings.txt").getPath(), - // SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_dataproperty_mappings.txt").getPath(), - // SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_objectproperty_mappings.txt").getPath() - // ); - // - // Knowledgebase kb = new Knowledgebase(oxfordEndpoint, "Oxford - Real estate", "TODO", resourcesIndex, propertiesIndex, classesIndex, mappingIndex); - // return kb; - // } - - private Knowledgebase createDBpediaLiveKnowledgebase(ExtractionDBCache cache) + private static Knowledgebase createDBpediaLiveKnowledgebase(ExtractionDBCache cache) { SOLRIndex resourcesIndex = new SOLRIndex("http://dbpedia.aksw.org:8080/solr/dbpedia_resources"); resourcesIndex.setPrimarySearchField("label"); @@ -372,22 +506,24 @@ Logger.getRootLogger().setLevel(Level.WARN); Logger.getLogger(Templator.class).setLevel(Level.WARN); Logger.getLogger(Parser.class).setLevel(Level.WARN); - Logger.getLogger(SPARQLTemplateBasedLearner2.class).setLevel(Level.INFO); - logger.setLevel(Level.ALL); // TODO: remove when finishing implementation of this class + Logger.getLogger(SPARQLTemplateBasedLearner2.class).setLevel(Level.WARN); + // Logger.getLogger(SPARQLTemplateBasedLearner2.class).setLevel(Level.INFO); + logger.setLevel(Level.INFO); // TODO: remove when finishing implementation of this class logger.addAppender(new FileAppender(new SimpleLayout(), "log/"+this.getClass().getSimpleName()+".log", false)); + // oxfordEndpoint = new SparqlEndpoint(new URL("http://lgd.aksw.org:8900/sparql"), Collections.singletonList("http://diadem.cs.ox.ac.uk"), Collections.<String>emptyList()); // oxfordLearner = new SPARQLTemplateBasedLearner2(createOxfordKnowledgebase(oxfordCache)); } private static class QueryTestData implements Serializable { - public SortedMap<Integer, String> id2Question = new TreeMap<Integer, String>(); - public SortedMap<Integer, String> id2Query = new TreeMap<Integer, String>(); - public SortedMap<Integer, Set<String>> id2Answers = new TreeMap<Integer, Set<String>>(); + public SortedMap<Integer, String> id2Question = new ConcurrentSkipListMap<Integer, String>(); + public SortedMap<Integer, String> id2Query = new ConcurrentSkipListMap<Integer, String>(); + public SortedMap<Integer, Set<String>> id2Answers = new ConcurrentSkipListMap<Integer, Set<String>>(); private static final String persistancePath = "cache/"+SPARQLTemplateBasedLearner3Test.class.getSimpleName()+'/'+QueryTestData.class.getSimpleName(); - public void write() + public synchronized void write() { try { @@ -408,12 +544,18 @@ } catch (ClassNotFoundException e){throw new RuntimeException(e);} } + + public QueryTestData generateAnswers(String endpoint) + { + if(!id2Answers.isEmpty()) {throw new AssertionError("Answers already existing.");} + for(int i:id2Query.keySet()) {id2Answers.put(i, getUris(endpoint, id2Query.get(i)));} + return this; + } } private QueryTestData readQueries(final File file) { QueryTestData testData = new QueryTestData(); - logger.info("Reading file containing queries and answers..."); try { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); DocumentBuilder db = dbf.newDocumentBuilder(); @@ -430,6 +572,7 @@ Element questionNode = (Element) questionNodes.item(i); //read question ID id = Integer.valueOf(questionNode.getAttribute("id")); + if(id>5) continue; // TODO: remove //Read question question = ((Element)questionNode.getElementsByTagName("string").item(0)).getChildNodes().item(0).getNodeValue().trim(); //Read SPARQL query @@ -445,7 +588,7 @@ if(!query.equals("OUT OF SCOPE")) // marker in qald benchmark file, will create holes interval of ids (e.g. 1,2,5,7) { testData.id2Question.put(id, question); - testData.id2Query.put(id, query); + testData.id2Query.put(id, query); Element answersElement = (Element) questionNode.getElementsByTagName("answers").item(0); if(answersElement!=null) { @@ -484,16 +627,23 @@ // System.out.println("Exception "); // // } - logger.info("Done."); return testData; } - private Set<String> getUris(String endpoint, String query) - { + protected static Set<String> getUris(final String endpoint, final String query) + { + if(query==null) {throw new AssertionError("query is null");} + if(endpoint==null) {throw new AssertionError("endpoint is null");} if(!query.contains("SELECT")&&!query.contains("select")) {return Collections.<String>emptySet();} // abort when not a select query Set<String> uris = new HashSet<String>(); QueryEngineHTTP qe = new QueryEngineHTTP(DBPEDIA_LIVE_ENDPOINT_URL_STRING, query); - ResultSet rs = qe.execSelect(); + ResultSet rs; + try{rs = qe.execSelect();} + catch(QueryExceptionHTTP e) + { + logger.error("Error getting uris for query "+query+" at endpoint "+endpoint,e); + return Collections.<String>emptySet(); + } String variable = "?uri"; resultsetloop: while(rs.hasNext()) @@ -520,81 +670,161 @@ continue resultsetloop; } } - return Collections.<String>emptySet(); // we didn't a resource for the first query solution - give up and don't look in the others + if(uris.isEmpty()) {return Collections.<String>emptySet();} // we didn't a resource for the first query solution - give up and don't look in the others } } return uris; } - // private class TestQueryThread implements Runnable + + // private ResultSet executeOxfordSelect(String query){return SparqlQuery.convertJSONtoResultSet(oxfordCache.executeSelectQuery(oxfordEndpoint, query));} + + // @Test public void benchmarkCreateOxfordKnowledgeBase() // { - // private String question; - // private String referenceQuery; - // - // public TestQueryThread(String question, String referenceQuery) + // long start = System.currentTimeMillis(); + // for(int i=0;i<1000;i++) // { - // this.question=question; - // this.referenceQuery=referenceQuery; + // createOxfordKnowledgebase(oxfordCache); // } - // // String referenceQuery = id2Query.get(i); - // // String question = id2Question.get(i); - // @Override public void run() - // { + // long end = System.currentTimeMillis(); + // long diff = end-start; + // System.out.println(diff+" millis as a whole, "+diff/1000.0+" millis per run"); + // } + + // private Knowledgebase createOxfordKnowledgebase(ExtractionDBCache cache) + // { + // URL url; + // try{url = new URL("http://lgd.aksw.org:8900/sparql");} catch(Exception e) {throw new RuntimeException(e);} + // SparqlEndpoint endpoint = new SparqlEndpoint(url, Collections.singletonList("http://diadem.cs.ox.ac.uk"), Collections.<String>emptyList()); // - // logger.trace("question: "+question); + // SPARQLIndex resourcesIndex = new VirtuosoResourcesIndex(endpoint, cache); + // SPARQLIndex classesIndex = new VirtuosoClassesIndex(endpoint, cache); + // SPARQLIndex propertiesIndex = new VirtuosoPropertiesIndex(endpoint, cache); + // MappingBasedIndex mappingIndex= new MappingBasedIndex( + // SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_class_mappings.txt").getPath(), + // SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_resource_mappings.txt").getPath(), + // SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_dataproperty_mappings.txt").getPath(), + // SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_objectproperty_mappings.txt").getPath() + // ); // - // // TODO: check for query isomorphism and leave out result comparison if possible - // // TODO: only load the reference answers once and permanently cache them somehow (file, ehcache, serialization, ...) - // // get the answers for the gold standard query - // logger.trace("reference query: "+referenceQuery); - // - // try - // { - // Set<String> referenceURIs = getUris(DBPEDIA_LIVE_ENDPOINT_URL_STRING,referenceQuery); - // - // // learn query - // SPARQLTemplateBasedLearner2 dbpediaLiveLearner = new SPARQLTemplateBasedLearner2(createDBpediaLiveKnowledgebase(dbpediaLiveCache)); - // dbpediaLiveLearner.init(); - // dbpediaLiveLearner.setQuestion(question); - // dbpediaLiveLearner.learnSPARQLQueries(); - // String learnedQuery = dbpediaLiveLearner.getBestSPARQLQuery(); - // - // logger.trace(learnedQuery); - // - // Set<String> learnedURIs = getUris(DBPEDIA_LIVE_ENDPOINT_URL_STRING,learnedQuery); - // - // logger.trace("referenced uris: "+referenceURIs); - // logger.trace("learned uris: "+learnedURIs); - // - // boolean correctMatch = referenceURIs.equals(learnedURIs); - // logger.trace(correctMatch?"matches":"doesn't match"); - //// if(correctMatch) {synchronized(this) {correctMatches++;}} - // } - // catch(NoTemplateFoundException e) - // { - // synchronized(this) {numberOfNoTemplateFoundExceptions++;} - // logger.warn(String.format("no template found for question \"%s\"",question)); - // } - // catch(Exception e) - // { - // synchronized(this) {numberOfOtherExceptions++;} - // logger.error(String.format("Exception for question \"%s\": %s",question,e.getLocalizedMessage())); - // e.printStackTrace(); - // // maybe the exception has corrupted the learner? better create a new one - // // - // } - // // get the answers for the learned query - // // compare gold standard query and learned query answers - // } - // + // Knowledgebase kb = new Knowledgebase(oxfordEndpoint, "Oxford - Real estate", "TODO", resourcesIndex, propertiesIndex, classesIndex, mappingIndex); + // return kb; // } - - private void updateFile(File originalFile, File updatedFile, String endpoint) + private static class LearnQueryRunnable implements Runnable { + private final String question; + private final String endpoint; + private final int id; + private final QueryTestData testData; + static private final PartOfSpeechTagger posTagger = new SynchronizedStanfordPartOfSpeechTagger(); + static private final WordNet wordnet = new WordNet(); + static private final Options options = new Options(); + public LearnQueryRunnable(String question, int id,String endpoint, QueryTestData testData) + { + this.question=question; + this.id=id; + this.endpoint=endpoint; + this.testData=testData; + } + + @Override public void run() + { + logger.trace("learning question: "+question); + try + { + // learn query + SPARQLTemplateBasedLearner2 dbpediaLiveLearner = new SPARQLTemplateBasedLearner2(createDBpediaLiveKnowledgebase(dbpediaLiveCache),posTagger,wordnet,options); + // SPARQLTemplateBasedLearner2 dbpediaLiveLearner = new SPARQLTemplateBasedLearner2(createDBpediaLiveKnowledgebase(dbpediaLiveCache)); + dbpediaLiveLearner.init(); + dbpediaLiveLearner.setQuestion(question); + dbpediaLiveLearner.learnSPARQLQueries(); + String learnedQuery = dbpediaLiveLearner.getBestSPARQLQuery(); + if(learnedQuery!=null&&!learnedQuery.isEmpty()) + { + testData.id2Question.put(id, question); + testData.id2Query.put(id, learnedQuery); + } + logger.trace("learned query for question "+question+": "+learnedQuery); + + // Set<String> learnedURIs = getUris(DBPEDIA_LIVE_ENDPOINT_URL_STRING,learnedQuery); + } + catch(NoTemplateFoundException e) + { + logger.warn(String.format("no template found for question \"%s\"",question)); + } + catch(Exception e) + { + logger.error(String.format("Exception for question \"%s\": %s",question,e.getLocalizedMessage())); + e.printStackTrace(); + } + } } + + /** Generates the HTML string content for one of the 3 colored bars which represent the correctly, incorrectly and unanswered question. + * Also creates and links to a file which contains the questions.*/ + private static String createColoredColumn(/*@NonNull*/ File link,/*@NonNull*/ String title,/*@NonNull*/ String color,/*@NonNull*/ Collection<String> questions, int numberOfQuestionsTotal) + { + final StringBuilder sb = new StringBuilder(); + sb.append("<a href='"+link.getAbsolutePath()+"' title='"+title+"'>"); + sb.append("<div style='float:left;width:"+100.0*questions.size()/numberOfQuestionsTotal+"%;height:1em;background-color:"+color+";'></div>"); + sb.append("</a>"); + // link.getParentFile().mkdirs(); + try + { + PrintWriter out = new PrintWriter(link); + for(String question: questions) {out.println(question);} + out.close(); + } + catch (Exception e){throw new RuntimeException(e);} + + return sb.toString(); + } + + static void generateHTML() + { + StringBuilder sb = new StringBuilder(); + sb.append("<html><body><table style='width:100%'>"); + SortedMap<Long,Evaluation> evaluations = Evaluation.read(); + SortedSet<Long> timestampsDescending = new TreeSet<Long>(Collections.reverseOrder()); + timestampsDescending.addAll(evaluations.keySet()); + for(long timestamp: timestampsDescending) + { + try + { + File folder = new File("log/"+SPARQLTemplateBasedLearner3Test.class.getSimpleName()+"/"+timestamp); + folder.mkdirs(); + Evaluation e = evaluations.get(timestamp); + sb.append("<tr><td style='white-space: nowrap'>"); + Date date = new Date(timestamp); + sb.append(DateFormat.getInstance().format(date)); + sb.append("</td><td width='100%'>"); + sb.append("<div style='width:100%;height:1em;border:solid 1px;'>"); + sb.append(createColoredColumn(new File(folder,"correctly_answered.txt"), "Correctly Answered Questions", "green", e.correctlyAnsweredQuestions, e.numberOfQuestions)); + sb.append(createColoredColumn(new File(folder,"incorrectly_answered.txt"), "Incorrectly Answered Questions", "orange", e.incorrectlyAnsweredQuestions, e.numberOfQuestions)); + sb.append(createColoredColumn(new File(folder,"unanswered.txt"), "Unanswered Questions", "red", e.unansweredQuestions, e.numberOfQuestions)); + sb.append("<span style='width:1000px;'></span>"); + sb.append("</td></tr>"); + } catch(Exception e) {logger.warn("error with evaluation from timestamp "+timestamp,e);} + } + + sb.append("</table></body></html>"); + try + { + PrintWriter out = new PrintWriter("log/"+SPARQLTemplateBasedLearner3Test.class.getSimpleName()+".html"); + out.println(sb.toString()); + out.close(); + } + catch (Exception e){throw new RuntimeException(e);} + } + // private void updateFile(File originalFile, File updatedFile, String endpoint) + // { + // + // + // } + // private void test(File file) throws MalformedURLException, InterruptedException // { // SortedMap<Integer, String> id2Question = new TreeMap<Integer, String>(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-07-31 10:36:17
|
Revision: 3811 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3811&view=rev Author: lorenz_b Date: 2012-07-31 10:36:11 +0000 (Tue, 31 Jul 2012) Log Message: ----------- Added synchronized POS tagger. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3.java Added Paths: ----------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/SynchronizedStanfordPartOfSpeechTagger.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3.java 2012-07-30 13:54:13 UTC (rev 3810) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3.java 2012-07-31 10:36:11 UTC (rev 3811) @@ -630,7 +630,7 @@ } } - SPARQLEndpointMetrics metrics = new SPARQLEndpointMetrics(endpoint, cache); + SPARQLEndpointMetrics metrics = new SPARQLEndpointMetrics(endpoint, new ExtractionDBCache("/opt/tbsl/cache2")); for (Iterator<WeightedQuery> iterator = queries.iterator(); iterator.hasNext();) { WeightedQuery wQ = iterator.next(); Query q = wQ.getQuery(); @@ -1009,7 +1009,7 @@ * @throws InvalidFileFormatException */ public static void main(String[] args) throws Exception { - SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpediaLiveAKSW(); + SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpedia(); Index resourcesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_resources"); Index classesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_classes"); Index propertiesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_properties"); Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/SynchronizedStanfordPartOfSpeechTagger.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/SynchronizedStanfordPartOfSpeechTagger.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/SynchronizedStanfordPartOfSpeechTagger.java 2012-07-31 10:36:11 UTC (rev 3811) @@ -0,0 +1,10 @@ +package org.dllearner.algorithm.tbsl.nlp; + +public class SynchronizedStanfordPartOfSpeechTagger extends StanfordPartOfSpeechTagger { + + @Override + public synchronized String tag(String sentence) { + return super.tag(sentence); + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-07-30 13:54:20
|
Revision: 3810 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3810&view=rev Author: lorenz_b Date: 2012-07-30 13:54:13 +0000 (Mon, 30 Jul 2012) Log Message: ----------- Added servlet class for enrichment. Modified Paths: -------------- trunk/interfaces/pom.xml trunk/interfaces/src/main/java/org/dllearner/cli/CLI.java trunk/interfaces/src/main/webapp/WEB-INF/web.xml trunk/interfaces/src/test/java/org/dllearner/test/junit/ExampleTests.java Added Paths: ----------- trunk/interfaces/.lastUsedExample trunk/interfaces/src/main/java/org/dllearner/server/EnrichmentServlet.java Added: trunk/interfaces/.lastUsedExample =================================================================== --- trunk/interfaces/.lastUsedExample (rev 0) +++ trunk/interfaces/.lastUsedExample 2012-07-30 13:54:13 UTC (rev 3810) @@ -0,0 +1 @@ +na \ No newline at end of file Modified: trunk/interfaces/pom.xml =================================================================== --- trunk/interfaces/pom.xml 2012-07-28 19:33:29 UTC (rev 3809) +++ trunk/interfaces/pom.xml 2012-07-30 13:54:13 UTC (rev 3810) @@ -344,6 +344,13 @@ <extended>true</extended> <logTimeZone>GMT</logTimeZone> </requestLog> + <connectors> + <connector implementation="org.mortbay.jetty.nio.SelectChannelConnector"> + <port>9099</port> + <maxIdleTime>60000</maxIdleTime> + </connector> + </connectors> + </configuration> <!--execution is used for testing --> <!--executions> <execution> <id>start-jetty</id> <phase>pre-integration-test</phase> Modified: trunk/interfaces/src/main/java/org/dllearner/cli/CLI.java =================================================================== --- trunk/interfaces/src/main/java/org/dllearner/cli/CLI.java 2012-07-28 19:33:29 UTC (rev 3809) +++ trunk/interfaces/src/main/java/org/dllearner/cli/CLI.java 2012-07-30 13:54:13 UTC (rev 3810) @@ -91,6 +91,8 @@ ApplicationContextBuilder builder = new DefaultApplicationContextBuilder(); context = builder.buildApplicationContext(configuration,springConfigResources); + + knowledgeSource = context.getBean(KnowledgeSource.class); } } Added: trunk/interfaces/src/main/java/org/dllearner/server/EnrichmentServlet.java =================================================================== --- trunk/interfaces/src/main/java/org/dllearner/server/EnrichmentServlet.java (rev 0) +++ trunk/interfaces/src/main/java/org/dllearner/server/EnrichmentServlet.java 2012-07-30 13:54:13 UTC (rev 3810) @@ -0,0 +1,405 @@ +package org.dllearner.server; + +import java.io.IOException; +import java.io.PrintWriter; +import java.net.SocketTimeoutException; +import java.net.URL; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.apache.commons.collections15.BidiMap; +import org.apache.commons.collections15.bidimap.DualHashBidiMap; +import org.dllearner.algorithms.DisjointClassesLearner; +import org.dllearner.algorithms.SimpleSubclassLearner; +import org.dllearner.algorithms.celoe.CELOE; +import org.dllearner.algorithms.properties.AsymmetricObjectPropertyAxiomLearner; +import org.dllearner.algorithms.properties.DataPropertyDomainAxiomLearner; +import org.dllearner.algorithms.properties.DataPropertyRangeAxiomLearner; +import org.dllearner.algorithms.properties.DisjointDataPropertyAxiomLearner; +import org.dllearner.algorithms.properties.DisjointObjectPropertyAxiomLearner; +import org.dllearner.algorithms.properties.EquivalentDataPropertyAxiomLearner; +import org.dllearner.algorithms.properties.EquivalentObjectPropertyAxiomLearner; +import org.dllearner.algorithms.properties.FunctionalDataPropertyAxiomLearner; +import org.dllearner.algorithms.properties.FunctionalObjectPropertyAxiomLearner; +import org.dllearner.algorithms.properties.InverseFunctionalObjectPropertyAxiomLearner; +import org.dllearner.algorithms.properties.IrreflexiveObjectPropertyAxiomLearner; +import org.dllearner.algorithms.properties.ObjectPropertyDomainAxiomLearner; +import org.dllearner.algorithms.properties.ObjectPropertyRangeAxiomLearner; +import org.dllearner.algorithms.properties.ReflexiveObjectPropertyAxiomLearner; +import org.dllearner.algorithms.properties.SubDataPropertyOfAxiomLearner; +import org.dllearner.algorithms.properties.SubObjectPropertyOfAxiomLearner; +import org.dllearner.algorithms.properties.SymmetricObjectPropertyAxiomLearner; +import org.dllearner.algorithms.properties.TransitiveObjectPropertyAxiomLearner; +import org.dllearner.core.AbstractAxiomLearningAlgorithm; +import org.dllearner.core.AnnComponentManager; +import org.dllearner.core.AxiomLearningAlgorithm; +import org.dllearner.core.ComponentInitException; +import org.dllearner.core.EvaluatedAxiom; +import org.dllearner.core.LearningAlgorithm; +import org.dllearner.core.config.ConfigHelper; +import org.dllearner.core.owl.DatatypeProperty; +import org.dllearner.core.owl.Entity; +import org.dllearner.core.owl.NamedClass; +import org.dllearner.core.owl.ObjectProperty; +import org.dllearner.kb.SparqlEndpointKS; +import org.dllearner.kb.sparql.ExtractionDBCache; +import org.dllearner.kb.sparql.SPARQLTasks; +import org.dllearner.kb.sparql.SparqlEndpoint; +import org.dllearner.reasoning.SPARQLReasoner; +import org.dllearner.utilities.owl.OWLAPIConverter; +import org.json.JSONArray; +import org.json.simple.JSONObject; +import org.semanticweb.owlapi.io.OWLObjectRenderer; +import org.semanticweb.owlapi.model.AxiomType; +import org.semanticweb.owlapi.util.DefaultPrefixManager; + +import uk.ac.manchester.cs.owl.owlapi.mansyntaxrenderer.ManchesterOWLSyntaxOWLObjectRendererImpl; +import uk.ac.manchester.cs.owl.owlapi.mansyntaxrenderer.ManchesterOWLSyntaxPrefixNameShortFormProvider; + +public class EnrichmentServlet extends HttpServlet { + + private static List<Class<? extends LearningAlgorithm>> objectPropertyAlgorithms; + private static List<Class<? extends LearningAlgorithm>> dataPropertyAlgorithms; + private static List<Class<? extends LearningAlgorithm>> classAlgorithms; + private static BidiMap<AxiomType, Class<? extends LearningAlgorithm>> axiomType2Class; + + private static final List<String> entityTypes = Arrays.asList(new String[]{"class", "objectproperty", "dataproperty"}); + + private static String validAxiomTypes = ""; + + static { + axiomType2Class = new DualHashBidiMap<AxiomType, Class<? extends LearningAlgorithm>>(); + axiomType2Class.put(AxiomType.SUBCLASS_OF, SimpleSubclassLearner.class); + axiomType2Class.put(AxiomType.EQUIVALENT_CLASSES, CELOE.class); + axiomType2Class.put(AxiomType.DISJOINT_CLASSES, DisjointClassesLearner.class); + axiomType2Class.put(AxiomType.SUB_OBJECT_PROPERTY, SubObjectPropertyOfAxiomLearner.class); + axiomType2Class.put(AxiomType.EQUIVALENT_OBJECT_PROPERTIES, EquivalentObjectPropertyAxiomLearner.class); + axiomType2Class.put(AxiomType.DISJOINT_OBJECT_PROPERTIES, DisjointObjectPropertyAxiomLearner.class); + axiomType2Class.put(AxiomType.OBJECT_PROPERTY_DOMAIN, ObjectPropertyDomainAxiomLearner.class); + axiomType2Class.put(AxiomType.OBJECT_PROPERTY_RANGE, ObjectPropertyRangeAxiomLearner.class); + axiomType2Class.put(AxiomType.FUNCTIONAL_OBJECT_PROPERTY, FunctionalObjectPropertyAxiomLearner.class); + axiomType2Class.put(AxiomType.INVERSE_FUNCTIONAL_OBJECT_PROPERTY, + InverseFunctionalObjectPropertyAxiomLearner.class); + axiomType2Class.put(AxiomType.REFLEXIVE_OBJECT_PROPERTY, ReflexiveObjectPropertyAxiomLearner.class); + axiomType2Class.put(AxiomType.IRREFLEXIVE_OBJECT_PROPERTY, IrreflexiveObjectPropertyAxiomLearner.class); + axiomType2Class.put(AxiomType.SYMMETRIC_OBJECT_PROPERTY, SymmetricObjectPropertyAxiomLearner.class); + axiomType2Class.put(AxiomType.ASYMMETRIC_OBJECT_PROPERTY, AsymmetricObjectPropertyAxiomLearner.class); + axiomType2Class.put(AxiomType.TRANSITIVE_OBJECT_PROPERTY, TransitiveObjectPropertyAxiomLearner.class); + axiomType2Class.put(AxiomType.SUB_DATA_PROPERTY, SubDataPropertyOfAxiomLearner.class); + axiomType2Class.put(AxiomType.EQUIVALENT_DATA_PROPERTIES, EquivalentDataPropertyAxiomLearner.class); + axiomType2Class.put(AxiomType.DISJOINT_DATA_PROPERTIES, DisjointDataPropertyAxiomLearner.class); + axiomType2Class.put(AxiomType.DATA_PROPERTY_DOMAIN, DataPropertyDomainAxiomLearner.class); + axiomType2Class.put(AxiomType.DATA_PROPERTY_RANGE, DataPropertyRangeAxiomLearner.class); + axiomType2Class.put(AxiomType.FUNCTIONAL_DATA_PROPERTY, FunctionalDataPropertyAxiomLearner.class); + + objectPropertyAlgorithms = new LinkedList<Class<? extends LearningAlgorithm>>(); + objectPropertyAlgorithms.add(DisjointObjectPropertyAxiomLearner.class); + objectPropertyAlgorithms.add(EquivalentObjectPropertyAxiomLearner.class); + objectPropertyAlgorithms.add(FunctionalObjectPropertyAxiomLearner.class); + objectPropertyAlgorithms.add(InverseFunctionalObjectPropertyAxiomLearner.class); + objectPropertyAlgorithms.add(ObjectPropertyDomainAxiomLearner.class); + objectPropertyAlgorithms.add(ObjectPropertyRangeAxiomLearner.class); + objectPropertyAlgorithms.add(SubObjectPropertyOfAxiomLearner.class); + objectPropertyAlgorithms.add(SymmetricObjectPropertyAxiomLearner.class); + objectPropertyAlgorithms.add(TransitiveObjectPropertyAxiomLearner.class); + + dataPropertyAlgorithms = new LinkedList<Class<? extends LearningAlgorithm>>(); + dataPropertyAlgorithms.add(DisjointDataPropertyAxiomLearner.class); + dataPropertyAlgorithms.add(EquivalentDataPropertyAxiomLearner.class); + dataPropertyAlgorithms.add(FunctionalDataPropertyAxiomLearner.class); + dataPropertyAlgorithms.add(DataPropertyDomainAxiomLearner.class); + dataPropertyAlgorithms.add(DataPropertyRangeAxiomLearner.class); + dataPropertyAlgorithms.add(SubDataPropertyOfAxiomLearner.class); + + classAlgorithms = new LinkedList<Class<? extends LearningAlgorithm>>(); + classAlgorithms.add(DisjointClassesLearner.class); + classAlgorithms.add(SimpleSubclassLearner.class); + classAlgorithms.add(CELOE.class); + + for (AxiomType type : AxiomType.AXIOM_TYPES) { + validAxiomTypes += type.getName() + ", "; + } + } + + private static final int DEFAULT_MAX_EXECUTION_TIME_IN_SECONDS = 10; + private static final int DEFAULT_MAX_NR_OF_RETURNED_AXIOMS = 10; + private static final double DEFAULT_THRESHOLD = 0.75; + + @Override + protected void doGet(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException { + long timeStamp = System.currentTimeMillis(); + String endpointURL = req.getParameter("endpoint"); + if (endpointURL == null) { + throw new IllegalStateException("Missing parameter: endpoint"); + } + String graphURI = req.getParameter("graph"); + + SparqlEndpoint endpoint = new SparqlEndpoint(new URL(endpointURL), Collections.singletonList(graphURI), + Collections.<String> emptyList()); + + final boolean useInference = req.getParameter("useInference") == null ? false : Boolean.valueOf(req + .getParameter("useInference")); + + final int maxNrOfReturnedAxioms = req.getParameter("maxNrOfReturnedAxioms") == null ? DEFAULT_MAX_NR_OF_RETURNED_AXIOMS : Integer.parseInt(req.getParameter("maxNrOfReturnedAxioms")); + final int maxExecutionTimeInSeconds = req.getParameter("maxExecutionTimeInSeconds") == null ? DEFAULT_MAX_EXECUTION_TIME_IN_SECONDS : Integer.parseInt(req.getParameter("maxExecutionTimeInSeconds")); + final double threshold = req.getParameter("threshold") == null ? DEFAULT_THRESHOLD : Double.parseDouble(req.getParameter("threshold")); + + String resourceURI = req.getParameter("resource"); + if (resourceURI == null) { + throw new IllegalStateException("Missing parameter: resourceURI"); + } + + String axiomTypeStrings[] = req.getParameterValues("axiomTypes"); + if (axiomTypeStrings == null) { + throw new IllegalStateException("Missing parameter: axiomTypes"); + } + axiomTypeStrings = axiomTypeStrings[0].split(","); + + Collection<AxiomType> requestedAxiomTypes = new HashSet<AxiomType>(); + for (String typeStr : axiomTypeStrings) { + AxiomType type = AxiomType.getAxiomType(typeStr.trim()); + if (type == null) { + throw new IllegalStateException("Illegal axiom type: " + typeStr + ". Please use one of " + validAxiomTypes); + } else { + requestedAxiomTypes.add(type); + } + } + + SPARQLTasks st = new SPARQLTasks(endpoint); + String entityType = req.getParameter("entityType"); + final Entity entity; + if(entityType != null){ + if(oneOf(entityType, entityTypes)){ + entity = getEntity(resourceURI, entityType, endpoint); + } else { + throw new IllegalStateException("Illegal entity type: " + entityType + ". Please use one of " + entityTypes); + } + + } else { + entity = st.guessResourceType(resourceURI, true); + entityType = getEntityType(entity); + } + + Collection<AxiomType> executableAxiomTypes = new HashSet<AxiomType>(); + Collection<AxiomType> omittedAxiomTypes = new HashSet<AxiomType>(); + Collection<AxiomType> possibleAxiomTypes = getAxiomTypes(entityType); + for(AxiomType type : requestedAxiomTypes){ + if(possibleAxiomTypes.contains(type)){ + executableAxiomTypes.add(type); + } else { + omittedAxiomTypes.add(type); + } + } + + final SparqlEndpointKS ks = new SparqlEndpointKS(endpoint); + try { + ks.init(); + } catch (ComponentInitException e) { + e.printStackTrace(); + } + // check if endpoint supports SPARQL 1.1 + boolean supportsSPARQL_1_1 = st.supportsSPARQL_1_1(); + ks.setSupportsSPARQL_1_1(supportsSPARQL_1_1); + + final SPARQLReasoner reasoner = new SPARQLReasoner(new SparqlEndpointKS(endpoint)); + reasoner.setCache(new ExtractionDBCache("cache")); + if (useInference && !reasoner.isPrepared()) { + System.out.print("Precomputing subsumption hierarchy ... "); + long startTime = System.currentTimeMillis(); + reasoner.prepareSubsumptionHierarchy(); + System.out.println("done in " + (System.currentTimeMillis() - startTime) + " ms"); + } + + JSONArray result = new JSONArray(); + + ExecutorService executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); + List<Future<JSONObject>> list = new ArrayList<Future<JSONObject>>(); + + final OWLObjectRenderer renderer = new ManchesterOWLSyntaxOWLObjectRendererImpl(); + renderer.setShortFormProvider(new ManchesterOWLSyntaxPrefixNameShortFormProvider(new DefaultPrefixManager())); + + + for (final AxiomType axiomType : executableAxiomTypes) { + Callable<JSONObject> worker = new Callable<JSONObject>() { + + @Override + public JSONObject call() throws Exception { + JSONObject result = new JSONObject(); + JSONArray axiomArray = new JSONArray(); + List<EvaluatedAxiom> axioms = getEvaluatedAxioms(ks, reasoner, entity, axiomType, maxExecutionTimeInSeconds, threshold, maxNrOfReturnedAxioms, useInference); + for(EvaluatedAxiom ax : axioms){ + JSONObject axiomObject = new JSONObject(); + axiomObject.put("axiom", renderer.render(OWLAPIConverter.getOWLAPIAxiom(ax.getAxiom()))); + axiomObject.put("confidence", ax.getScore().getAccuracy()); + axiomArray.put(axiomObject); + } + result.put(axiomType, axiomArray); + return result; + } + + }; + Future<JSONObject> submit = executor.submit(worker); + list.add(submit); + } + + + for (Future<JSONObject> future : list) { + try { + JSONObject array = future.get(); + result.put(array); + } catch (InterruptedException e) { + e.printStackTrace(); + } catch (ExecutionException e) { + e.printStackTrace(); + } + } + + executor.shutdown(); + + + PrintWriter pw = resp.getWriter(); + JSONObject finalResult = new JSONObject(); + finalResult.put("result", result); + finalResult.put("timestamp", timeStamp); + finalResult.put("execution time", System.currentTimeMillis()-timeStamp); + finalResult.put("endpoint url", endpointURL); + finalResult.put("graph", graphURI); + finalResult.put("resource uri", resourceURI); + finalResult.put("entity type", entityType); + finalResult.put("omitted axiom types", omittedAxiomTypes); + pw.print(finalResult.toJSONString()); + pw.close(); + } + + private boolean oneOf(String value, String... possibleValues){ + for(String v : possibleValues){ + if(v.equals(value)){ + return true; + } + } + return false; + } + + private boolean oneOf(String value, Collection<String> possibleValues){ + for(String v : possibleValues){ + if(v.equals(value)){ + return true; + } + } + return false; + } + + private List<EvaluatedAxiom> getEvaluatedAxioms(SparqlEndpointKS endpoint, SPARQLReasoner reasoner, + Entity entity, AxiomType axiomType, int maxExecutionTimeInSeconds, + double threshold, int maxNrOfReturnedAxioms, boolean useInference) { + List<EvaluatedAxiom> learnedAxioms = new ArrayList<EvaluatedAxiom>(); + try { + learnedAxioms = applyLearningAlgorithm(axiomType2Class.get(axiomType), endpoint, reasoner, entity, maxExecutionTimeInSeconds, threshold, maxNrOfReturnedAxioms); + } catch (ComponentInitException e) { + e.printStackTrace(); + } + return learnedAxioms; + } + + private List<EvaluatedAxiom> applyLearningAlgorithm(Class<? extends LearningAlgorithm> algorithmClass, + SparqlEndpointKS ks, SPARQLReasoner reasoner, Entity entity, int maxExecutionTimeInSeconds, double threshold, int maxNrOfReturnedAxioms) + throws ComponentInitException { + AxiomLearningAlgorithm learner = null; + try { + learner = (AxiomLearningAlgorithm) algorithmClass.getConstructor(SparqlEndpointKS.class).newInstance(ks); + } catch (Exception e) { + e.printStackTrace(); + } + if (classAlgorithms.contains(algorithmClass)) { + ConfigHelper.configure(learner, "classToDescribe", entity); + } else { + ConfigHelper.configure(learner, "propertyToDescribe", entity); + } + ConfigHelper.configure(learner, "maxExecutionTimeInSeconds", maxExecutionTimeInSeconds); + // if(reasoner != null){ + ((AbstractAxiomLearningAlgorithm) learner).setReasoner(reasoner); + // } + learner.init(); + String algName = AnnComponentManager.getName(learner); + System.out.print("Applying " + algName + " on " + entity + " ... "); + long startTime = System.currentTimeMillis(); + try { + learner.start(); + } catch (Exception e) { + if (e.getCause() instanceof SocketTimeoutException) { + System.out.println("Query timed out (endpoint possibly too slow)."); + } else { + e.printStackTrace(); + } + } + long runtime = System.currentTimeMillis() - startTime; + System.out.println("done in " + runtime + " ms"); + List<EvaluatedAxiom> learnedAxioms = learner.getCurrentlyBestEvaluatedAxioms(maxNrOfReturnedAxioms, threshold); + return learnedAxioms; + } + + private Entity getEntity(String resourceURI, String entityType, SparqlEndpoint endpoint) { + Entity entity = null; + if (entityType.equals("class")) { + entity = new NamedClass(resourceURI); + } else if (entityType.equals("objectproperty")) { + entity = new ObjectProperty(resourceURI); + } else if (entityType.equals("dataproperty")) { + entity = new DatatypeProperty(resourceURI); + } else { + SPARQLTasks st = new SPARQLTasks(endpoint); + entity = st.guessResourceType(resourceURI, true); + } + return entity; + } + + private String getEntityType(Entity entity) { + String entityType = null; + if(entity instanceof NamedClass){ + entityType = "class"; + } else if(entity instanceof ObjectProperty){ + entityType = "objectproperty"; + } else if(entity instanceof ObjectProperty){ + entityType = "dataproperty"; + } + return entityType; + } + + public Collection<AxiomType> getAxiomTypes(String entityType){ + List<AxiomType> types = new ArrayList<AxiomType>(); + + List<Class<? extends LearningAlgorithm>> algorithms = null; + if(entityType.equals("class")){ + algorithms = classAlgorithms; + } else if(entityType.equals("objectproperty")){ + algorithms = objectPropertyAlgorithms; + } else if(entityType.equals("dataproperty")){ + algorithms = dataPropertyAlgorithms; + } + + if(algorithms != null){ + for(Class<? extends LearningAlgorithm> alg : algorithms){ + types.add(axiomType2Class.getKey(alg)); + } + } + + return types; + } + + +} Modified: trunk/interfaces/src/main/webapp/WEB-INF/web.xml =================================================================== --- trunk/interfaces/src/main/webapp/WEB-INF/web.xml 2012-07-28 19:33:29 UTC (rev 3809) +++ trunk/interfaces/src/main/webapp/WEB-INF/web.xml 2012-07-30 13:54:13 UTC (rev 3810) @@ -25,5 +25,14 @@ <servlet-name>NKEGeizhals</servlet-name> <url-pattern>/NKEGeizhals</url-pattern> </servlet-mapping> + + <servlet> + <servlet-name>Enrichment</servlet-name> + <servlet-class>org.dllearner.server.EnrichmentServlet</servlet-class> + </servlet> + <servlet-mapping> + <servlet-name>Enrichment</servlet-name> + <url-pattern>/Enrichment</url-pattern> + </servlet-mapping> </web-app> Modified: trunk/interfaces/src/test/java/org/dllearner/test/junit/ExampleTests.java =================================================================== --- trunk/interfaces/src/test/java/org/dllearner/test/junit/ExampleTests.java 2012-07-28 19:33:29 UTC (rev 3809) +++ trunk/interfaces/src/test/java/org/dllearner/test/junit/ExampleTests.java 2012-07-30 13:54:13 UTC (rev 3810) @@ -24,13 +24,11 @@ import java.util.ArrayList; import java.util.Collections; import java.util.Date; -import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Random; import java.util.Set; -import java.util.SortedMap; import java.util.TreeMap; import java.util.TreeSet; @@ -41,9 +39,7 @@ import org.dllearner.algorithms.gp.GP; import org.dllearner.cli.CLI; import org.dllearner.cli.QuickStart; -import org.dllearner.cli.Start; import org.dllearner.core.AbstractCELA; -import org.dllearner.core.ClassExpressionLearningAlgorithm; import org.dllearner.core.ComponentInitException; import org.dllearner.core.ComponentManager; import org.dllearner.core.LearningAlgorithm; @@ -153,6 +149,7 @@ break; } } + if(sparql == 2 && conf.contains("sparql")) ignored = true; if(ignored) { System.out.println("Skipping " + conf + " (is on ignore list)."); } else { @@ -163,6 +160,7 @@ // start example CLI start = new CLI(new File(conf)); start.init(); + // System.out.println("algorithm: " + start.getLearningAlgorithm()); boolean isSparql = start.getKnowledgeSource() instanceof SparqlKnowledgeSource; // boolean isSparql = false; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-07-28 19:33:35
|
Revision: 3809 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3809&view=rev Author: lorenz_b Date: 2012-07-28 19:33:29 +0000 (Sat, 28 Jul 2012) Log Message: ----------- Small modification in constructor Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/reasoning/PelletReasoner.java Modified: trunk/components-core/src/main/java/org/dllearner/reasoning/PelletReasoner.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/reasoning/PelletReasoner.java 2012-07-28 19:27:20 UTC (rev 3808) +++ trunk/components-core/src/main/java/org/dllearner/reasoning/PelletReasoner.java 2012-07-28 19:33:29 UTC (rev 3809) @@ -571,7 +571,7 @@ Logger pelletLogger = Logger.getLogger("org.mindswap.pellet"); pelletLogger.setLevel(Level.WARN); - if(reasoner != null){ + if(reasoner == null){ reasoner = PelletReasonerFactory.getInstance().createNonBufferingReasoner(ontology); } classifier = PelletIncremantalReasonerFactory.getInstance().createReasoner(reasoner); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |