From: <lor...@us...> - 2011-07-21 11:36:59
|
Revision: 2954 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=2954&view=rev Author: lorenz_b Date: 2011-07-21 11:36:47 +0000 (Thu, 21 Jul 2011) Log Message: ----------- Added configuration options via properties file for tbsl. Modified Paths: -------------- trunk/components-ext/pom.xml trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java trunk/components-ext/src/main/resources/tbsl/evaluation/evaluation.properties trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/GeneralisationTest.java trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/LGGTest.java Added Paths: ----------- trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/filters/QuestionBasedStatementFilter2.java trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/impl/QueryTreeFactoryImpl2.java trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/operations/FilterVisitor.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/Prefixes.java trunk/components-ext/src/main/resources/tbsl/tbsl.properties trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/QTLTest.java Modified: trunk/components-ext/pom.xml =================================================================== --- trunk/components-ext/pom.xml 2011-07-20 07:53:29 UTC (rev 2953) +++ trunk/components-ext/pom.xml 2011-07-21 11:36:47 UTC (rev 2954) @@ -132,6 +132,11 @@ <version>1.0.14</version> </dependency> + <dependency> + <groupId>org.ini4j</groupId> + <artifactId>ini4j</artifactId> + <version>0.5.2</version> + </dependency> </dependencies> <build> <plugins> Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/filters/QuestionBasedStatementFilter2.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/filters/QuestionBasedStatementFilter2.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/filters/QuestionBasedStatementFilter2.java 2011-07-21 11:36:47 UTC (rev 2954) @@ -0,0 +1,262 @@ +package org.dllearner.algorithm.qtl.filters; + +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.SortedSet; +import java.util.Map.Entry; +import java.util.Set; +import java.util.TreeSet; + + +import uk.ac.shef.wit.simmetrics.similaritymetrics.AbstractStringMetric; +import uk.ac.shef.wit.simmetrics.similaritymetrics.JaroWinkler; +import uk.ac.shef.wit.simmetrics.similaritymetrics.Levenshtein; +import uk.ac.shef.wit.simmetrics.similaritymetrics.QGramsDistance; + +import com.hp.hpl.jena.rdf.model.RDFNode; +import com.hp.hpl.jena.rdf.model.Statement; +import com.hp.hpl.jena.util.iterator.Filter; + +public class QuestionBasedStatementFilter2 extends Filter<Statement> { + + private Set<String> questionWords; + + private AbstractStringMetric qGramMetric; + private AbstractStringMetric levensteinMetric; + private AbstractStringMetric jaroWinklerMetric; + private I_Sub substringMetric; + + private double threshold = 0.4; + + private int topK = 3; + private double topKSumThreshold = 0.8; + + private Map<Statement, Double> statement2Similarity = new HashMap<Statement, Double>(); + + private Map<RDFNode, Boolean> cache = new HashMap<RDFNode, Boolean>(); + + private Map<Statement, String> statement2TokenMap = new HashMap<Statement, String>(); + + private Map<String, String> resource2TokenMap = new HashMap<String, String>(); + + int cnt = 0; + + public QuestionBasedStatementFilter2(Set<String> questionWords){ + this.questionWords = questionWords; + qGramMetric = new QGramsDistance(); + levensteinMetric = new Levenshtein(); + jaroWinklerMetric = new JaroWinkler(); + substringMetric = new I_Sub(); + + } + + private boolean isSimiliar2QuestionWord(String s, Statement st){ + for(String word : questionWords){ + if(areSimiliar(word, s, st)){ + statement2TokenMap.put(st, word); + resource2TokenMap.put(s, word); + return true; + } + } + return isSimilarWithSubstringMetrik(s, st); + } + + private boolean areSimiliar(String s1, String s2, Statement st){ + return (qGramMetric.getSimilarity(s1, s2) >= threshold) || + (levensteinMetric.getSimilarity(s1, s2) >= threshold); + } + + private boolean isSimilarWithSubstringMetrik(String s, Statement st){ + SortedSet<Double> values = new TreeSet<Double>(Collections.reverseOrder()); + for(String word : questionWords){ + double v = substringMetric.score(word, s, true); + if(v >= threshold){statement2TokenMap.put(st, word);resource2TokenMap.put(s, word); + return true; + } else { + values.add(Double.valueOf(v)); + } + } + double sum = 0; + for(Double v : getTopK(values)){ + if(v >= 0){ + sum += v; + } + + } + if(sum >= topKSumThreshold){ + statement2TokenMap.put(st, "ALL"); + } + return sum >= topKSumThreshold; + } + + private Set<Double> getTopK(SortedSet<Double> values){ + Set<Double> top = new HashSet<Double>(); + int k = 0; + for(Double v : values){ + if(k == topK){ + break; + } + top.add(v); + k++; + } + return top; + } + + + private String getFragment(String uri){ + int i = uri.lastIndexOf("#"); + if(i > 0){ + return uri.substring(i+1); + } else { + return uri.substring(uri.lastIndexOf("/")+1); + } + } + + @Override + public boolean accept(Statement s) { + Boolean similarPredicate = cache.get(s.getPredicate()); + Boolean similarObject = cache.get(s.getObject()); + if(similarPredicate != null && similarObject != null){ + String object = null; + if(s.getObject().isURIResource()){ + object = getFragment(s.getObject().asResource().getURI()); + } else if(s.getObject().isLiteral()){ + object = s.getObject().asLiteral().getLexicalForm(); + } + String token = resource2TokenMap.get(object); + if(token != null){ + statement2TokenMap.put(s, token); + } else { + token = resource2TokenMap.get(getFragment(s.getPredicate().getURI())); + if( token != null){ + statement2TokenMap.put(s, token); + } + } + + + return similarPredicate || similarObject; + } else if(similarPredicate == null && similarObject != null){ + if(similarObject){ + String object = null; + if(s.getObject().isURIResource()){ + object = getFragment(s.getObject().asResource().getURI()); + } else if(s.getObject().isLiteral()){ + object = s.getObject().asLiteral().getLexicalForm(); + } + String token = resource2TokenMap.get(object); + if( token != null){ + statement2TokenMap.put(s, token); + } + return true; + } else { + String predicate = getFragment(s.getPredicate().getURI()); + if (isSimiliar2QuestionWord(predicate, s)){ + cache.put(s.getPredicate(), Boolean.valueOf(true)); + return true; + } else { + cache.put(s.getPredicate(), Boolean.valueOf(false)); + return false; + } + } + } else if(similarPredicate != null && similarObject == null){ + if(similarPredicate){ + String object = null; + if(s.getObject().isURIResource()){ + object = getFragment(s.getObject().asResource().getURI()); + } else if(s.getObject().isLiteral()){ + object = s.getObject().asLiteral().getLexicalForm(); + } + if(isSimiliar2QuestionWord(object, s)){ + cache.put(s.getObject(), Boolean.valueOf(true)); + String token = resource2TokenMap.get(object); + if( token != null){ + statement2TokenMap.put(s, token); + } + return true; + } else { + cache.put(s.getObject(), Boolean.valueOf(false)); + } + + String token = resource2TokenMap.get(getFragment(s.getPredicate().getURI())); + if( token != null){ + statement2TokenMap.put(s, token); + } + return true; + } else { + String object = null; + if(s.getObject().isURIResource()){ + object = getFragment(s.getObject().asResource().getURI()); + } else if(s.getObject().isLiteral()){ + object = s.getObject().asLiteral().getLexicalForm(); + } + if(isSimiliar2QuestionWord(object, s)){ + cache.put(s.getObject(), Boolean.valueOf(true)); + return true; + } else { + cache.put(s.getObject(), Boolean.valueOf(false)); + return false; + } + } + } else { + String object = null; + if(s.getObject().isURIResource()){ + object = getFragment(s.getObject().asResource().getURI()); + } else if(s.getObject().isLiteral()){ + object = s.getObject().asLiteral().getLexicalForm(); + } + if(isSimiliar2QuestionWord(object, s)){ + cache.put(s.getObject(), Boolean.valueOf(true)); + return true; + } else { + cache.put(s.getObject(), Boolean.valueOf(false)); + } + + String predicate = getFragment(s.getPredicate().getURI()); + if (isSimiliar2QuestionWord(predicate, s)){ + cache.put(s.getPredicate(), Boolean.valueOf(true)); + return true; + } else { + cache.put(s.getPredicate(), Boolean.valueOf(false)); + } + return false; + } + } + +// @Override +// public boolean accept(Statement s) { +// String predicate = s.getPredicate().getURI().substring(s.getPredicate().getURI().lastIndexOf("/")); +// String object = null; +// if(s.getObject().isURIResource()){ +// object = s.getObject().asResource().getURI(); +// object = getFragment(s.getObject().asResource().getURI()); +// } else if(s.getObject().isLiteral()){ +// object = s.getObject().asLiteral().getLexicalForm(); +// } +// return isSimiliar2QuestionWord(predicate, s) || isSimiliar2QuestionWord(object, s); +// } + + public void setThreshold(double threshold){ + this.threshold = threshold; + } + + public double getThreshold(){ + return threshold; + } + + public Set<Statement> getStatementsBelowThreshold(double threshold){ + Set<Statement> statements = new HashSet<Statement>(); + for(Entry<Statement, Double> entry : statement2Similarity.entrySet()){ + if(entry.getValue().doubleValue() < threshold){ + statements.add(entry.getKey()); + } + } + return statements; + } + + public Map<Statement, String> getStatement2TokenMap() { + return statement2TokenMap; + } + +} Property changes on: trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/filters/QuestionBasedStatementFilter2.java ___________________________________________________________________ Added: svn:mime-type + text/plain Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/impl/QueryTreeFactoryImpl2.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/impl/QueryTreeFactoryImpl2.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/impl/QueryTreeFactoryImpl2.java 2011-07-21 11:36:47 UTC (rev 2954) @@ -0,0 +1,320 @@ +/** + * Copyright (C) 2007-2010, Jens Lehmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ +package org.dllearner.algorithm.qtl.impl; + +import java.util.Comparator; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Set; +import java.util.SortedMap; +import java.util.SortedSet; +import java.util.TreeMap; +import java.util.TreeSet; + +import org.dllearner.algorithm.qtl.QueryTreeFactory; +import org.dllearner.algorithm.qtl.datastructures.impl.QueryTreeImpl; +import org.dllearner.algorithm.qtl.filters.Filter; +import org.dllearner.algorithm.qtl.filters.Filters; +import org.dllearner.algorithm.qtl.filters.QuestionBasedStatementFilter; +import org.dllearner.algorithm.qtl.filters.QuestionBasedStatementFilter2; +import org.dllearner.algorithm.qtl.filters.ZeroFilter; + +import com.hp.hpl.jena.rdf.model.Literal; +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.Property; +import com.hp.hpl.jena.rdf.model.RDFNode; +import com.hp.hpl.jena.rdf.model.Resource; +import com.hp.hpl.jena.rdf.model.Selector; +import com.hp.hpl.jena.rdf.model.SimpleSelector; +import com.hp.hpl.jena.rdf.model.Statement; + +/** + * + * @author Lorenz Bühmann + * + */ +public class QueryTreeFactoryImpl2 implements QueryTreeFactory<String> { + + private int nodeId; + private Comparator<Statement> comparator; + private Set<String> predicateFilters; + + private Filter predicateFilter = new ZeroFilter(); + private Filter objectFilter = new ZeroFilter(); + private Selector statementSelector = new SimpleSelector(); + private com.hp.hpl.jena.util.iterator.Filter<Statement> keepFilter; + + public QueryTreeFactoryImpl2(){ + comparator = new StatementComparator(); + predicateFilters = new HashSet<String>(Filters.getAllFilterProperties()); + } + + public void setPredicateFilter(Filter filter){ + this.predicateFilter = filter; + } + + public void setObjectFilter(Filter filter){ + this.objectFilter = filter; + } + + @Override + public void setStatementSelector(Selector selector) { + this.statementSelector = selector; + + } + + @Override + public void setStatementFilter(com.hp.hpl.jena.util.iterator.Filter<Statement> statementFilter) { + this.keepFilter = statementFilter; + + } + + @Override + public QueryTreeImpl<String> getQueryTree(String example, Model model) { + if(keepFilter == null){ + return createTree(model.getResource(example), model); + } else { + return createTreeOptimized(model.getResource(example), model); + } + } + + @Override + public QueryTreeImpl<String> getQueryTree(String example, Model model, int maxEdges) { + if(keepFilter == null){ + return createTree(model.getResource(example), model); + } else { + return createTreeOptimized(model.getResource(example), model, maxEdges); + } + } + + @Override + public QueryTreeImpl<String> getQueryTree(Resource example, Model model) { + return createTree(example, model); + } + + @Override + public QueryTreeImpl<String> getQueryTree(String example) { + return new QueryTreeImpl<String>(example); + } + + private QueryTreeImpl<String> createTreeOptimized(Resource s, Model model, int maxEdges){ + nodeId = 0; + SortedMap<String, SortedSet<Statement>> resource2Statements = new TreeMap<String, SortedSet<Statement>>(); + + fillMap(s, model, resource2Statements, null); + + QuestionBasedStatementFilter filter = (QuestionBasedStatementFilter)keepFilter; + Set<Statement> statements; + int diff = valueCount(resource2Statements) - maxEdges; + main:while(diff > 0){ + double oldThreshold = filter.getThreshold(); + statements = filter.getStatementsBelowThreshold(oldThreshold+0.1); + for(SortedSet<Statement> set : resource2Statements.values()){ + for(Statement st : statements){ + if(set.remove(st)){ + diff--; + if(diff == 0){ + break main; + } + } + } + } + } + + + QueryTreeImpl<String> tree = new QueryTreeImpl<String>(s.toString()); + fillTree(tree, resource2Statements); + + tree.setUserObject("?"); + return tree; + } + + private int valueCount(SortedMap<String, SortedSet<Statement>> map){ + int cnt = 0; + for(SortedSet<Statement> statements : map.values()){ + cnt += statements.size(); + } + return cnt; + } + + private QueryTreeImpl<String> createTreeOptimized(Resource s, Model model){ + nodeId = 0; + SortedMap<String, SortedSet<Statement>> resource2Statements = new TreeMap<String, SortedSet<Statement>>(); + + fillMap(s, model, resource2Statements, null); + + QueryTreeImpl<String> tree = new QueryTreeImpl<String>(s.toString()); + fillTree(tree, resource2Statements); + + tree.setUserObject("?"); + return tree; + } + + private void fillMap(Resource s, Model model, SortedMap<String, SortedSet<Statement>> resource2Statements, String oldSimilarToken){ + Iterator<Statement> it = model.listStatements(s, null, (RDFNode)null).filterKeep(keepFilter); + Statement st; + SortedSet<Statement> statements; + while(it.hasNext()){ + st = it.next(); + String newSimilarToken = ((QuestionBasedStatementFilter2)keepFilter).getStatement2TokenMap().get(st); + System.out.println(st); + System.out.println(newSimilarToken); + if(!newSimilarToken.equals(oldSimilarToken) || newSimilarToken.equals("ALL")){ + statements = resource2Statements.get(st.getSubject().toString()); + if(statements == null){ + statements = new TreeSet<Statement>(comparator); + resource2Statements.put(st.getSubject().toString(), statements); + } + statements.add(st); + if(st.getObject().isURIResource() && !resource2Statements.containsKey(st.getObject().asResource().getURI())){ + fillMap(st.getObject().asResource(), model, resource2Statements, newSimilarToken); + } + } + + } + } + + private QueryTreeImpl<String> createTree(Resource s, Model model){ + nodeId = 0; + SortedMap<String, SortedSet<Statement>> resource2Statements = new TreeMap<String, SortedSet<Statement>>(); + + Statement st; + SortedSet<Statement> statements; + Iterator<Statement> it = model.listStatements(statementSelector); + while(it.hasNext()){ + st = it.next(); + statements = resource2Statements.get(st.getSubject().toString()); + if(statements == null){ + statements = new TreeSet<Statement>(comparator); + resource2Statements.put(st.getSubject().toString(), statements); + } + statements.add(st); + } + QueryTreeImpl<String> tree = new QueryTreeImpl<String>(s.toString()); + fillTree(tree, resource2Statements); + + tree.setUserObject("?"); + return tree; + } + + private void fillTree(QueryTreeImpl<String> tree, SortedMap<String, SortedSet<Statement>> resource2Statements){ + tree.setId(nodeId++); + if(resource2Statements.containsKey(tree.getUserObject())){ + QueryTreeImpl<String> subTree; + Property predicate; + RDFNode object; + for(Statement st : resource2Statements.get(tree.getUserObject())){ + predicate = st.getPredicate(); + object = st.getObject(); + if(!predicateFilter.isRelevantResource(predicate.getURI())){ + continue; + } + if(predicateFilters.contains(st.getPredicate().toString())){ + continue; + } + if(object.isLiteral()){ + Literal lit = st.getLiteral(); + String escapedLit = lit.getLexicalForm().replace("\"", "\\\""); + StringBuilder sb = new StringBuilder(); + sb.append("\"").append(escapedLit).append("\""); + if(lit.getDatatypeURI() != null){ + sb.append("^^<").append(lit.getDatatypeURI()).append(">"); + } + if(!lit.getLanguage().isEmpty()){ + sb.append("@").append(lit.getLanguage()); + } + subTree = new QueryTreeImpl<String>(sb.toString()); +// subTree = new QueryTreeImpl<String>(lit.toString()); + subTree.setId(nodeId++); + subTree.setLiteralNode(true); + tree.addChild(subTree, st.getPredicate().toString()); + } else if(objectFilter.isRelevantResource(object.asResource().getURI())){ + if(tree.getUserObjectPathToRoot().size() < 3 && + !tree.getUserObjectPathToRoot().contains(st.getObject().toString())){ + subTree = new QueryTreeImpl<String>(st.getObject().toString()); + subTree.setResourceNode(true); + tree.addChild(subTree, st.getPredicate().toString()); + fillTree(subTree, resource2Statements); + } + } + } + } + } + + class StatementComparator implements Comparator<Statement>{ + + @Override + public int compare(Statement s1, Statement s2) { +// if(s1.getPredicate() == null && s2.getPredicate() == null){ +// return 0; +// } +// return s1.getPredicate().toString().compareTo(s2.getPredicate().toString()) +// + s1.getObject().toString().compareTo(s2.getObject().toString()); + if(s1.getPredicate() == null && s2.getPredicate() == null){ + return 0; + } + + if(s1.getPredicate().toString().compareTo(s2.getPredicate().toString()) == 0){ + return s1.getObject().toString().compareTo(s2.getObject().toString()); + } else { + return s1.getPredicate().toString().compareTo(s2.getPredicate().toString()); + } + + } + + + + } + + public static String encode(String s) { + char [] htmlChars = s.toCharArray(); + StringBuffer encodedHtml = new StringBuffer(); + for (int i=0; i<htmlChars.length; i++) { + switch(htmlChars[i]) { + case '<': + encodedHtml.append("<"); + break; + case '>': + encodedHtml.append(">"); + break; + case '&': + encodedHtml.append("&"); + break; + case '\'': + encodedHtml.append("'"); + break; + case '"': + encodedHtml.append("""); + break; + case '\\': + encodedHtml.append("\"); + break; + case (char)133: + encodedHtml.append("…"); + break; + default: + encodedHtml.append(htmlChars[i]); + break; + } + } + return encodedHtml.toString(); + } + +} Property changes on: trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/impl/QueryTreeFactoryImpl2.java ___________________________________________________________________ Added: svn:mime-type + text/plain Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/operations/FilterVisitor.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/operations/FilterVisitor.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/operations/FilterVisitor.java 2011-07-21 11:36:47 UTC (rev 2954) @@ -0,0 +1,40 @@ +package org.dllearner.algorithm.qtl.operations; + +import java.util.ArrayList; +import java.util.List; + +import com.hp.hpl.jena.graph.Triple; +import com.hp.hpl.jena.sparql.algebra.Op; +import com.hp.hpl.jena.sparql.algebra.OpVisitorBase; +import com.hp.hpl.jena.sparql.algebra.op.OpBGP; +import com.hp.hpl.jena.sparql.algebra.op.OpFilter; +import com.hp.hpl.jena.sparql.algebra.op.OpProject; +import com.hp.hpl.jena.sparql.algebra.op.OpTriple; + +public class FilterVisitor extends OpVisitorBase { + + private List<Op> ops = new ArrayList<Op>(); + + + + @Override + public void visit(OpProject opProject) { + opProject.getSubOp().visit(this) ; + } + + @Override + public void visit(OpBGP opBGP) { + for (Triple t : opBGP.getPattern()){ + if(t.getObject().isURI()){ + System.out.println(t.getObject().toString()); + } + } + } + + @Override + public void visit(OpFilter opFilter) { + // TODO Auto-generated method stub + super.visit(opFilter); + } + +} Property changes on: trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/operations/FilterVisitor.java ___________________________________________________________________ Added: svn:mime-type + text/plain Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-07-20 07:53:29 UTC (rev 2953) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-07-21 11:36:47 UTC (rev 2954) @@ -1,5 +1,9 @@ package org.dllearner.algorithm.tbsl.learning; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; @@ -28,12 +32,15 @@ import org.dllearner.algorithm.tbsl.sparql.SlotType; import org.dllearner.algorithm.tbsl.sparql.Template; import org.dllearner.algorithm.tbsl.templator.Templator; +import org.dllearner.algorithm.tbsl.util.Prefixes; import org.dllearner.algorithm.tbsl.util.Similarity; import org.dllearner.core.Oracle; import org.dllearner.core.SparqlQueryLearningAlgorithm; import org.dllearner.kb.sparql.ExtractionDBCache; import org.dllearner.kb.sparql.SparqlEndpoint; import org.dllearner.kb.sparql.SparqlQuery; +import org.ini4j.InvalidFileFormatException; +import org.ini4j.Options; import com.hp.hpl.jena.query.QueryExecution; import com.hp.hpl.jena.query.QueryExecutionFactory; @@ -53,17 +60,18 @@ LUCENE, SIMILARITY, NONE } + private static final String OPTIONS_FILE = "tbsl/tbsl.properties"; + private static final Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner.class); - private Monitor mon = MonitorFactory.getTimeMonitor("stbl"); + private Monitor mon = MonitorFactory.getTimeMonitor("tbsl"); - private static final int TOP_K = 5; - private static final String SOLR_SERVER_URL = "http://139.18.2.173:8080/apache-solr-3.1.0"; private static final int RECURSION_DEPTH = 2; - private Ranking ranking = Ranking.SIMILARITY; - private boolean useRemoteEndpointValidation = true; - private boolean stopIfQueryResultNotEmpty = true; - private int maxTestedQueriesPerTemplate = 25; + private Ranking ranking; + private boolean useRemoteEndpointValidation; + private boolean stopIfQueryResultNotEmpty; + private int maxTestedQueriesPerTemplate; + private int maxQueryExecutionTimeInSeconds; private SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpediaLiveAKSW(); private ExtractionDBCache cache = new ExtractionDBCache("cache"); @@ -92,35 +100,59 @@ private Lemmatizer lemmatizer = new LingPipeLemmatizer();// StanfordLemmatizer(); - private int maxQueryExecutionTimeInSeconds = 20; - - public SPARQLTemplateBasedLearner(){ - resource_index = new SolrSearch(SOLR_SERVER_URL + "/dbpedia_resources"); - resource_index.setHitsPerPage(TOP_K); - class_index = new SolrSearch(SOLR_SERVER_URL + "/dbpedia_classes"); - class_index.setHitsPerPage(TOP_K); - property_index = new SolrSearch(SOLR_SERVER_URL + "/dbpedia_properties"); - property_index.setHitsPerPage(TOP_K); + try { + init(new Options(this.getClass().getClassLoader().getResourceAsStream(OPTIONS_FILE))); + } catch (InvalidFileFormatException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } Set<String> predicateFilters = new HashSet<String>(); predicateFilters.add("http://dbpedia.org/ontology/wikiPageWikiLink"); predicateFilters.add("http://dbpedia.org/property/wikiPageUsesTemplate"); - prefixMap = new HashMap<String, String>(); - prefixMap.put(RDF.getURI(), "rdf"); - prefixMap.put(RDFS.getURI(), "rdfs"); - prefixMap.put("http://dbpedia.org/ontology/", "dbo"); - prefixMap.put("http://dbpedia.org/property/", "dbp"); - prefixMap.put("http://dbpedia.org/resource/", "dbr"); - prefixMap.put(FOAF.getURI(), "foaf"); - prefixMap.put("http://dbpedia.org/class/yago/", "yago"); + prefixMap = Prefixes.getPrefixes(); modelGenenerator = new ModelGenerator(endpoint, predicateFilters); templateGenerator = new Templator(); + } + + public SPARQLTemplateBasedLearner(String optionsFile){ + try { + init(new Options(new FileReader(new File(optionsFile)))); + } catch (InvalidFileFormatException e) { + e.printStackTrace(); + } catch (FileNotFoundException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } + } + + public SPARQLTemplateBasedLearner(Options options){ + init(options); + } + + private void init(Options options){ + String resourcesIndexUrl = options.fetch("solr.resources.url"); + resource_index = new SolrSearch(resourcesIndexUrl); + String classesIndexUrl = options.fetch("solr.classes.url"); + class_index = new SolrSearch(classesIndexUrl); + + String propertiesIndexUrl = options.fetch("solr.properties.url"); + property_index = new SolrSearch(propertiesIndexUrl); + + maxQueryExecutionTimeInSeconds = Integer.parseInt(options.get("sparql.query.maxExecutionTimeInSeconds", "20")); cache.setMaxExecutionTimeInSeconds(maxQueryExecutionTimeInSeconds); + + ranking = Ranking.valueOf(options.get("learning.ranking", "similarity").toUpperCase()); + useRemoteEndpointValidation = options.get("learning.validationType", "remote").equals("remote") ? true : false; + stopIfQueryResultNotEmpty = Boolean.parseBoolean(options.get("learning.stopAfterFirstNonEmptyQueryResult", "true")); + maxTestedQueriesPerTemplate = Integer.parseInt(options.get("learning.maxTestedQueriesPerTemplate", "20")); } public void setEndpoint(SparqlEndpoint endpoint){ @@ -475,7 +507,8 @@ tmp = new TreeSet<String>(new StringSimilarityComparator(word)); uris = uriCache.get(word); if(uris == null){ - uris = index.getResources("label:\"" + word + "\"~0.7"); +// uris = index.getResources("label:\"" + word + "\"~0.7"); + uris = index.getResources("label:" + word + "~0.5"); uriCache.put(word, uris); } tmp.addAll(uris); @@ -711,7 +744,7 @@ // Logger.getLogger(DefaultHttpParams.class).setLevel(Level.OFF); // Logger.getLogger(HttpClient.class).setLevel(Level.OFF); // Logger.getLogger(HttpMethodBase.class).setLevel(Level.OFF); - String question = "Who developed the video game World of Warcraft?"; + String question = "Give me all books written by authors influenced by Ernest Hemingway."; SPARQLTemplateBasedLearner learner = new SPARQLTemplateBasedLearner(); SparqlEndpoint endpoint = new SparqlEndpoint(new URL("http://greententacle.techfak.uni-bielefeld.de:5171/sparql"), Collections.<String>singletonList(""), Collections.<String>emptyList()); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java 2011-07-20 07:53:29 UTC (rev 2953) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java 2011-07-21 11:36:47 UTC (rev 2954) @@ -46,6 +46,7 @@ params.set("rows", hitsPerPage); params.set("start", offset); response = server.query(params); + SolrDocumentList docList = response.getResults(); lastTotalHits = (int) docList.getNumFound(); for(SolrDocument d : docList){ Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/Prefixes.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/Prefixes.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/Prefixes.java 2011-07-21 11:36:47 UTC (rev 2954) @@ -0,0 +1,29 @@ +package org.dllearner.algorithm.tbsl.util; + +import java.util.HashMap; +import java.util.Map; + +import com.hp.hpl.jena.sparql.vocabulary.FOAF; +import com.hp.hpl.jena.vocabulary.RDF; +import com.hp.hpl.jena.vocabulary.RDFS; + + +public class Prefixes { + + private static Map<String, String> prefixes = new HashMap<String, String>(); + + static { + prefixes.put(RDF.getURI(), "rdf"); + prefixes.put(RDFS.getURI(), "rdfs"); + prefixes.put("http://dbpedia.org/ontology/", "dbo"); + prefixes.put("http://dbpedia.org/property/", "dbp"); + prefixes.put("http://dbpedia.org/resource/", "dbr"); + prefixes.put(FOAF.getURI(), "foaf"); + prefixes.put("http://dbpedia.org/class/yago/", "yago"); + } + + public static Map<String, String> getPrefixes(){ + return prefixes; + } + +} Property changes on: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/Prefixes.java ___________________________________________________________________ Added: svn:mime-type + text/plain Modified: trunk/components-ext/src/main/resources/tbsl/evaluation/evaluation.properties =================================================================== --- trunk/components-ext/src/main/resources/tbsl/evaluation/evaluation.properties 2011-07-20 07:53:29 UTC (rev 2953) +++ trunk/components-ext/src/main/resources/tbsl/evaluation/evaluation.properties 2011-07-21 11:36:47 UTC (rev 2954) @@ -4,7 +4,7 @@ # validate against remote endpoint if 'true', otherwise load a model and validate against it useRemoteEndpointValidation=true # number of tested SPARQL queries per template -maxTestedQueriesPerTemplate=25 +maxTestedQueriesPerTemplate=150 # max execution time for a SPARQL query before canceled -maxQueryExecutionTimeInSeconds=20 +maxQueryExecutionTimeInSeconds=40 Added: trunk/components-ext/src/main/resources/tbsl/tbsl.properties =================================================================== --- trunk/components-ext/src/main/resources/tbsl/tbsl.properties (rev 0) +++ trunk/components-ext/src/main/resources/tbsl/tbsl.properties 2011-07-21 11:36:47 UTC (rev 2954) @@ -0,0 +1,16 @@ +solr.server.url = http://139.18.2.173:8080/apache-solr-3.3.0 +solr.classes.url = ${solr.server.url}/dbpedia_classes +solr.resources.url = ${solr.server.url}/dbpedia_resources +solr.properties.url = ${solr.server.url}/dbpedia_properties +solr.query.limit = 20 + +sparql.endpoint.url = http://live.dbpedia.org/sparql +sparql.endpoint.defaultGraph = http://dbpedia.org +sparql.query.maxExecutionTimeInSeconds = 20 + +!remote | local +learning.validationType = remote +learning.stopAfterFirstNonEmptyQueryResult = true +learning.maxTestedQueriesPerTemplate = 20 +!similarity | lucene | none +learning.ranking = similarity Property changes on: trunk/components-ext/src/main/resources/tbsl/tbsl.properties ___________________________________________________________________ Added: svn:mime-type + text/plain Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/GeneralisationTest.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/GeneralisationTest.java 2011-07-20 07:53:29 UTC (rev 2953) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/GeneralisationTest.java 2011-07-21 11:36:47 UTC (rev 2954) @@ -1,7 +1,13 @@ package org.dllearner.algorithm.qtl; +import java.util.Arrays; +import java.util.HashSet; + import org.dllearner.algorithm.qtl.datastructures.QueryTree; +import org.dllearner.algorithm.qtl.filters.QuestionBasedStatementFilter; +import org.dllearner.algorithm.qtl.filters.QuestionBasedStatementFilter2; import org.dllearner.algorithm.qtl.impl.QueryTreeFactoryImpl; +import org.dllearner.algorithm.qtl.impl.QueryTreeFactoryImpl2; import org.dllearner.algorithm.qtl.operations.Generalisation; import org.junit.Test; @@ -22,7 +28,7 @@ @Test public void generalisationTest1(){ - String resource = "http://dbpedia.org/resource/Leipzig"; + String resource = "http://dbpedia.org/resource/Chelsea_F.C."; Generalisation<String> gen = new Generalisation<String>(); Model model = getModelForExample(resource, maxModelSizePerExample); @@ -31,8 +37,35 @@ QueryTree<String> genTree = gen.generalise(tree); String query = genTree.toSPARQLQueryString(); System.out.println(query); + System.out.println(tree.toQuery()); } + @Test + public void generalisationTest2(){ +// String resource = "http://dbpedia.org/resource/Interview_with_the_Vampire:_The_Vampire_Chronicles"; + String resource = "http://dbpedia.org/resource/Arsenal_F.C."; + + Generalisation<String> gen = new Generalisation<String>(); + Model model = getModelForExample(resource, maxModelSizePerExample); + QueryTreeFactory<String> treeFactory = new QueryTreeFactoryImpl2(); + QuestionBasedStatementFilter2 filter = new QuestionBasedStatementFilter2(new HashSet( +// Arrays.asList(new String[]{"film", "starring", "Brad Pitt"}))); + Arrays.asList(new String[]{"soccer club", "Premier League", "manager", "France"}))); + filter.setThreshold(0.6); + treeFactory.setStatementFilter(filter); + QueryTree<String> tree = treeFactory.getQueryTree(resource, model); + System.out.println(tree.getStringRepresentation()); + + QueryTreeFactory<String> treeFactory2 = new QueryTreeFactoryImpl(); + QuestionBasedStatementFilter filter2 = new QuestionBasedStatementFilter(new HashSet( +// Arrays.asList(new String[]{"film", "starring", "Brad Pitt"}))); + Arrays.asList(new String[]{"soccer club", "Premier League", "manager", "France"}))); + filter2.setThreshold(0.6); + treeFactory2.setStatementFilter(filter2); + QueryTree<String> tree2 = treeFactory2.getQueryTree(resource, model); + System.out.println(tree2.getStringRepresentation()); + } + private Model getModelForExample(String example, int maxSize){ Query query = makeConstructQuery(example, LIMIT, 0); QueryExecution qexec = QueryExecutionFactory.sparqlService(ENDPOINT_URL, query); @@ -63,15 +96,15 @@ sb.append("WHERE {\n"); sb.append("<").append(example).append("> ").append("?p0 ").append("?o0").append(".\n"); for(int i = 1; i < RECURSION_DEPTH; i++){ - sb.append("?o").append(i-1).append(" ").append("?p").append(i).append(" ").append("?o").append(i).append(".\n"); + sb.append("OPTIONAL{?o").append(i-1).append(" ").append("?p").append(i).append(" ").append("?o").append(i).append(".}\n"); } sb.append("FILTER (!regex (?p0, \"http://dbpedia.org/property/wikiPage\") && !regex(?p1, \"http://dbpedia.org/property/wikiPage\"))"); sb.append("}\n"); - sb.append("ORDER BY "); - for(int i = 0; i < RECURSION_DEPTH; i++){ - sb.append("?p").append(i).append(" ").append("?o").append(i).append(" "); - } +// sb.append("ORDER BY "); +// for(int i = 0; i < RECURSION_DEPTH; i++){ +// sb.append("?p").append(i).append(" ").append("?o").append(i).append(" "); +// } sb.append("\n"); sb.append("LIMIT ").append(limit).append("\n"); sb.append("OFFSET ").append(offset); Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/LGGTest.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/LGGTest.java 2011-07-20 07:53:29 UTC (rev 2953) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/LGGTest.java 2011-07-21 11:36:47 UTC (rev 2954) @@ -73,8 +73,10 @@ tree.dump(); System.out.println("-----------------------------"); cnt++; + System.out.println(((QueryTreeImpl<String>)tree).toQuery()); } + LGGGenerator<String> lggGenerator = new LGGGeneratorImpl<String>(); QueryTree<String> lgg = lggGenerator.getLGG(posExampleTrees); Added: trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/QTLTest.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/QTLTest.java (rev 0) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/QTLTest.java 2011-07-21 11:36:47 UTC (rev 2954) @@ -0,0 +1,57 @@ +package org.dllearner.algorithm.qtl; + +import java.util.HashSet; +import java.util.List; + +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.dllearner.algorithm.qtl.exception.EmptyLGGException; +import org.dllearner.algorithm.qtl.exception.NegativeTreeCoverageExecption; +import org.dllearner.algorithm.qtl.exception.TimeOutException; +import org.dllearner.algorithm.qtl.filters.QuestionBasedQueryTreeFilterAggressive; +import org.dllearner.algorithm.qtl.filters.QuestionBasedStatementFilter; +import org.dllearner.algorithm.qtl.operations.NBR; +import org.dllearner.algorithm.qtl.operations.PostLGG; +import org.dllearner.algorithm.qtl.util.SPARQLEndpointEx; +import org.dllearner.kb.sparql.ExtractionDBCache; +import org.dllearner.kb.sparql.SparqlEndpoint; + +import scala.actors.threadpool.Arrays; + +public class QTLTest { + + public static void main(String[] args) throws EmptyLGGException, NegativeTreeCoverageExecption, TimeOutException { + Logger.getLogger(NBR.class).setLevel(Level.DEBUG); + Logger.getLogger(PostLGG.class).setLevel(Level.DEBUG); + List<String> predicateFilters = Arrays.asList(new String[]{"http://dbpedia.org/ontology/wikiPageWikiLink", + "http://dbpedia.org/ontology/wikiPageExternalLink", "http://dbpedia.org/property/wikiPageUsesTemplate"}); + SPARQLEndpointEx endpoint = new SPARQLEndpointEx(SparqlEndpoint.getEndpointDBpediaLiveAKSW(), "", "", new HashSet<String>(predicateFilters)); + + QTL qtl = new QTL(endpoint, new ExtractionDBCache("cache")); + +// List<String> relevantWords = Arrays.asList(new String[]{"film", "star", "Brad Pitt"}); +// List<String> posExamples = Arrays.asList(new String[]{ +// "http://dbpedia.org/resource/Interview_with_the_Vampire:_The_Vampire_Chronicles", +// "http://dbpedia.org/resource/Megamind"}); +// List<String> negExamples = Arrays.asList(new String[]{"http://dbpedia.org/resource/Shukriya:_Till_Death_Do_Us_Apart"}); + + List<String> relevantWords = Arrays.asList(new String[]{"soccer club", "Premier League"}); + List<String> posExamples = Arrays.asList(new String[]{ + "http://dbpedia.org/resource/Arsenal_F.C.", + "http://dbpedia.org/resource/Chelsea_F.C."}); + List<String> negExamples = Arrays.asList(new String[]{}); + + QuestionBasedStatementFilter stmtFilter = new QuestionBasedStatementFilter(new HashSet<String>(relevantWords)); + qtl.addStatementFilter(stmtFilter); + + QuestionBasedQueryTreeFilterAggressive treeFilter = new QuestionBasedQueryTreeFilterAggressive(new HashSet<String>(relevantWords)); + qtl.addQueryTreeFilter(treeFilter); + + + + + String suggestion = qtl.getQuestion(posExamples, negExamples); + System.out.println(suggestion); + } + +} Property changes on: trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/QTLTest.java ___________________________________________________________________ Added: svn:mime-type + text/plain This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |