From: <lor...@us...> - 2012-05-10 13:31:36
|
Revision: 3701 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3701&view=rev Author: lorenz_b Date: 2012-05-10 13:31:25 +0000 (Thu, 10 May 2012) Log Message: ----------- Added preliminary support for FILTERs. Added LGG test for Oxford dataset. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/datastructures/QueryTree.java trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/datastructures/impl/QueryTreeImpl.java trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/impl/QueryTreeFactoryImpl.java trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/operations/lgg/LGGGeneratorImpl.java trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/LGGTest.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/datastructures/QueryTree.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/datastructures/QueryTree.java 2012-05-10 07:08:30 UTC (rev 3700) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/datastructures/QueryTree.java 2012-05-10 13:31:25 UTC (rev 3701) @@ -26,7 +26,9 @@ import org.dllearner.algorithm.qtl.datastructures.impl.QueryTreeImpl; +import com.hp.hpl.jena.datatypes.RDFDatatype; import com.hp.hpl.jena.query.Query; +import com.hp.hpl.jena.rdf.model.Literal; /** * @@ -129,8 +131,14 @@ String toSPARQLQueryString(boolean filtered); + Query toSPARQLQuery(); + int getTriplePatternCount(); Query toQuery(); + RDFDatatype getDatatype(); + + List<Literal> getLiterals(); + } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/datastructures/impl/QueryTreeImpl.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/datastructures/impl/QueryTreeImpl.java 2012-05-10 07:08:30 UTC (rev 3700) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/datastructures/impl/QueryTreeImpl.java 2012-05-10 13:31:25 UTC (rev 3701) @@ -22,10 +22,13 @@ import java.io.PrintWriter; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.Collections; import java.util.Comparator; +import java.util.Date; import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; @@ -33,15 +36,21 @@ import java.util.TreeSet; import java.util.regex.Pattern; +import javax.xml.bind.DatatypeConverter; + import org.dllearner.algorithm.qtl.datastructures.NodeRenderer; import org.dllearner.algorithm.qtl.datastructures.QueryTree; import org.dllearner.algorithm.qtl.filters.Filters; import com.hp.hpl.jena.datatypes.BaseDatatype; +import com.hp.hpl.jena.datatypes.RDFDatatype; +import com.hp.hpl.jena.datatypes.xsd.XSDDatatype; import com.hp.hpl.jena.graph.Node; import com.hp.hpl.jena.graph.Triple; import com.hp.hpl.jena.query.Query; import com.hp.hpl.jena.query.QueryFactory; +import com.hp.hpl.jena.query.Syntax; +import com.hp.hpl.jena.rdf.model.Literal; import com.hp.hpl.jena.sparql.syntax.ElementGroup; import com.hp.hpl.jena.sparql.syntax.ElementTriplesBlock; @@ -72,6 +81,8 @@ private boolean isLiteralNode = false; private boolean isResourceNode = false; + private List<Literal> literals = new ArrayList<Literal>(); + public QueryTreeImpl(N userObject) { this.userObject = userObject; @@ -80,7 +91,13 @@ edge2ChildrenMap = new HashMap<String, List<QueryTree<N>>>(); toStringRenderer = new NodeRenderer<N>() { public String render(QueryTree<N> object) { - return object.toString() + "(" + object.getId() + ")"; + String label = object.toString() + "(" + object.getId() + ")"; + if(object.isLiteralNode()){ + if(!object.getLiterals().isEmpty()){ + label += "Values: " + object.getLiterals(); + } + } + return label; } }; } @@ -672,6 +689,10 @@ return true; } + @Override + public Query toSPARQLQuery() { + return QueryFactory.create(toSPARQLQueryString(), Syntax.syntaxARQ); + } @Override public String toSPARQLQueryString() { @@ -680,8 +701,12 @@ } cnt = 0; StringBuilder sb = new StringBuilder(); - sb.append("SELECT ?x0 WHERE {\n"); - buildSPARQLQueryString(this, sb, false); + sb.append("SELECT DISTINCT ?x0 WHERE {\n"); + List<String> filters = new ArrayList<String>(); + buildSPARQLQueryString(this, sb, false, filters); + for(String filter : filters){ + sb.append(filter).append("\n"); + } sb.append("}"); return sb.toString(); } @@ -693,16 +718,23 @@ } cnt = 0; StringBuilder sb = new StringBuilder(); - sb.append("SELECT ?x0 WHERE {\n"); - buildSPARQLQueryString(this, sb, filtered); + List<String> filters = new ArrayList<String>(); + sb.append("SELECT DISTINCT ?x0 WHERE {\n"); + buildSPARQLQueryString(this, sb, filtered, filters); + for(String filter : filters){ + sb.append(filter).append("\n"); + } sb.append("}"); return sb.toString(); } - private void buildSPARQLQueryString(QueryTree<N> tree, StringBuilder sb, boolean filtered){ + private void buildSPARQLQueryString(QueryTree<N> tree, StringBuilder sb, boolean filtered, List<String> filters){ Object subject = null; if(tree.getUserObject().equals("?")){ subject = "?x" + cnt++; + if(tree.isLiteralNode() && !tree.getLiterals().isEmpty()){ + filters.add(getFilter(subject.toString(), tree.getLiterals())); + } } else { subject = "<" + tree.getUserObject() + ">"; } @@ -725,12 +757,61 @@ } sb.append(subject).append(" <").append(predicate).append("> ").append(object).append(".\n"); if(!objectIsResource){ - buildSPARQLQueryString(child, sb, filtered); + buildSPARQLQueryString(child, sb, filtered, filters); } } } } + private String getFilter(String varName, List<Literal> literals){ + String filter = "FILTER("; + + Literal min = getMin(literals); + filter += varName + ">=\"" + min.getLexicalForm() + "\"^^<" + min.getDatatypeURI() + ">"; + + filter += " && "; + + Literal max = getMax(literals); + filter += varName + "<=\"" + max.getLexicalForm() + "\"^^<" + min.getDatatypeURI() + ">"; + + filter += ")"; + return filter; + } + + private Literal getMin(List<Literal> literals){ + Iterator<Literal> iter = literals.iterator(); + Literal min = iter.next(); + Literal l; + while(iter.hasNext()){ + l = iter.next(); + if(l.getDatatype() == XSDDatatype.XSDinteger){ + min = (l.getInt() < min.getInt()) ? l : min; + } else if(l.getDatatype() == XSDDatatype.XSDdouble){ + min = (l.getDouble() < min.getDouble()) ? l : min; + } else if(l.getDatatype() == XSDDatatype.XSDdate){ + min = (DatatypeConverter.parseDate(l.getLexicalForm()).compareTo(DatatypeConverter.parseDate(min.getLexicalForm())) == -1) ? l : min; + } + } + return min; + } + + private Literal getMax(List<Literal> literals){ + Iterator<Literal> iter = literals.iterator(); + Literal max = iter.next(); + Literal l; + while(iter.hasNext()){ + l = iter.next(); + if(l.getDatatype() == XSDDatatype.XSDinteger){ + max = (l.getInt() > max.getInt()) ? l : max; + } else if(l.getDatatype() == XSDDatatype.XSDdouble){ + max = (l.getDouble() > max.getDouble()) ? l : max; + } else if(l.getDatatype() == XSDDatatype.XSDdate){ + max = (DatatypeConverter.parseDate(l.getLexicalForm()).compareTo(DatatypeConverter.parseDate(max.getLexicalForm())) == 1) ? l : max; + } + } + return max; + } + public Query toQuery(){ Query query = QueryFactory.make(); query.setQuerySelectType(); @@ -797,5 +878,29 @@ return triples; } + public void addLiteral(Literal l){ + literals.add(l); + } + + public List<Literal> getLiterals() { + return literals; + } + + public void addLiterals(Collection<Literal> literals) { + this.literals.addAll(literals); + } + + public RDFDatatype getDatatype(){ + if(isLiteralNode){ + if(!literals.isEmpty()){ + return literals.get(0).getDatatype(); + } else { + return null; + } + } else { + throw new UnsupportedOperationException("Node ist not a literal"); + } + } + } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/impl/QueryTreeFactoryImpl.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/impl/QueryTreeFactoryImpl.java 2012-05-10 07:08:30 UTC (rev 3700) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/impl/QueryTreeFactoryImpl.java 2012-05-10 13:31:25 UTC (rev 3701) @@ -35,6 +35,8 @@ import org.dllearner.algorithm.qtl.filters.QuestionBasedStatementFilter; import org.dllearner.algorithm.qtl.filters.ZeroFilter; +import com.hp.hpl.jena.datatypes.RDFDatatype; +import com.hp.hpl.jena.datatypes.xsd.XSDDatatype; import com.hp.hpl.jena.rdf.model.Literal; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.Property; @@ -60,6 +62,8 @@ private Selector statementSelector = new SimpleSelector(); private com.hp.hpl.jena.util.iterator.Filter<Statement> keepFilter; + private int maxDepth = 3; + public QueryTreeFactoryImpl(){ comparator = new StatementComparator(); predicateFilters = new HashSet<String>(Filters.getAllFilterProperties()); @@ -139,7 +143,8 @@ QueryTreeImpl<String> tree = new QueryTreeImpl<String>(s.toString()); - fillTree(tree, resource2Statements); + int depth = 0; + fillTree(tree, resource2Statements, depth); tree.setUserObject("?"); return tree; @@ -160,7 +165,8 @@ fillMap(s, model, resource2Statements); QueryTreeImpl<String> tree = new QueryTreeImpl<String>(s.toString()); - fillTree(tree, resource2Statements); + int depth = 0; + fillTree(tree, resource2Statements, depth); tree.setUserObject("?"); return tree; @@ -178,7 +184,7 @@ resource2Statements.put(st.getSubject().toString(), statements); } statements.add(st); - if(st.getObject().isURIResource() && !resource2Statements.containsKey(st.getObject().asResource().getURI())){ + if((st.getObject().isResource()) && !resource2Statements.containsKey(st.getObject().toString())){ fillMap(st.getObject().asResource(), model, resource2Statements); } } @@ -201,54 +207,73 @@ statements.add(st); } QueryTreeImpl<String> tree = new QueryTreeImpl<String>(s.toString()); - fillTree(tree, resource2Statements); + int depth = 0; + fillTree(tree, resource2Statements, depth); tree.setUserObject("?"); return tree; } - private void fillTree(QueryTreeImpl<String> tree, SortedMap<String, SortedSet<Statement>> resource2Statements){ - tree.setId(nodeId++); - if(resource2Statements.containsKey(tree.getUserObject())){ - QueryTreeImpl<String> subTree; - Property predicate; - RDFNode object; - for(Statement st : resource2Statements.get(tree.getUserObject())){ - predicate = st.getPredicate(); - object = st.getObject(); - if(!predicateFilter.isRelevantResource(predicate.getURI())){ - continue; - } - if(predicateFilters.contains(st.getPredicate().toString())){ - continue; - } - if(object.isLiteral()){ - Literal lit = st.getLiteral(); - String escapedLit = lit.getLexicalForm().replace("\"", "\\\""); - StringBuilder sb = new StringBuilder(); - sb.append("\"").append(escapedLit).append("\""); - if(lit.getDatatypeURI() != null){ - sb.append("^^<").append(lit.getDatatypeURI()).append(">"); + private void fillTree(QueryTreeImpl<String> tree, SortedMap<String, SortedSet<Statement>> resource2Statements, int depth){ + depth++; + tree.setId(nodeId++); + if(resource2Statements.containsKey(tree.getUserObject())){ + QueryTreeImpl<String> subTree; + Property predicate; + RDFNode object; + for(Statement st : resource2Statements.get(tree.getUserObject())){ + predicate = st.getPredicate(); + object = st.getObject(); + if(!predicateFilter.isRelevantResource(predicate.getURI())){ + continue; } - if(!lit.getLanguage().isEmpty()){ - sb.append("@").append(lit.getLanguage()); + if(predicateFilters.contains(st.getPredicate().toString())){ + continue; } - subTree = new QueryTreeImpl<String>(sb.toString()); -// subTree = new QueryTreeImpl<String>(lit.toString()); - subTree.setId(nodeId++); - subTree.setLiteralNode(true); - tree.addChild(subTree, st.getPredicate().toString()); - } else if(objectFilter.isRelevantResource(object.asResource().getURI())){ - if(tree.getUserObjectPathToRoot().size() < 3 && - !tree.getUserObjectPathToRoot().contains(st.getObject().toString())){ - subTree = new QueryTreeImpl<String>(st.getObject().toString()); - subTree.setResourceNode(true); + if(object.isLiteral()){ + Literal lit = st.getLiteral(); + String escapedLit = lit.getLexicalForm().replace("\"", "\\\""); + StringBuilder sb = new StringBuilder(); + sb.append("\"").append(escapedLit).append("\""); + if(lit.getDatatypeURI() != null){ + sb.append("^^<").append(lit.getDatatypeURI()).append(">"); + } + if(!lit.getLanguage().isEmpty()){ + sb.append("@").append(lit.getLanguage()); + } + subTree = new QueryTreeImpl<String>(sb.toString()); +// subTree = new QueryTreeImpl<String>(lit.toString()); + subTree.setId(nodeId++); + subTree.setLiteralNode(true); + if(lit.getDatatype() == XSDDatatype.XSDinteger || lit.getDatatype() == XSDDatatype.XSDdouble || lit.getDatatype() == XSDDatatype.XSDdate){ + subTree.addLiteral(lit); + } tree.addChild(subTree, st.getPredicate().toString()); - fillTree(subTree, resource2Statements); + } else if(objectFilter.isRelevantResource(object.asResource().getURI())){ + if(object.asResource().isAnon()){ + System.out.println(object); + } + if(!tree.getUserObjectPathToRoot().contains(st.getObject().toString())){ + subTree = new QueryTreeImpl<String>(st.getObject().toString()); + subTree.setResourceNode(true); + tree.addChild(subTree, st.getPredicate().toString()); + if(depth < maxDepth){ + fillTree(subTree, resource2Statements, depth); + } + + } + } else if(object.isAnon()){ + if(depth < maxDepth && + !tree.getUserObjectPathToRoot().contains(st.getObject().toString())){ + subTree = new QueryTreeImpl<String>(st.getObject().toString()); + subTree.setResourceNode(true); + tree.addChild(subTree, st.getPredicate().toString()); + fillTree(subTree, resource2Statements, depth); + } } } } - } + depth--; } class StatementComparator implements Comparator<Statement>{ Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/operations/lgg/LGGGeneratorImpl.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/operations/lgg/LGGGeneratorImpl.java 2012-05-10 07:08:30 UTC (rev 3700) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/operations/lgg/LGGGeneratorImpl.java 2012-05-10 13:31:25 UTC (rev 3701) @@ -29,6 +29,7 @@ import org.dllearner.algorithm.qtl.datastructures.QueryTree; import org.dllearner.algorithm.qtl.datastructures.impl.QueryTreeImpl; +import com.hp.hpl.jena.datatypes.RDFDatatype; import com.jamonapi.Monitor; import com.jamonapi.MonitorFactory; @@ -145,6 +146,15 @@ lgg.setUserObject((N)"?"); } + if(tree1.isLiteralNode() && tree2.isLiteralNode()){ + RDFDatatype d1 = tree1.getDatatype(); + RDFDatatype d2 = tree2.getDatatype(); + if(d1 != null && d2 != null && d1 == d2){ + ((QueryTreeImpl<N>)lgg).addLiterals(((QueryTreeImpl<N>)tree1).getLiterals()); + ((QueryTreeImpl<N>)lgg).addLiterals(((QueryTreeImpl<N>)tree2).getLiterals()); + } + } + Set<QueryTreeImpl<N>> addedChildren; QueryTreeImpl<N> lggChild; for(Object edge : new TreeSet<Object>(tree1.getEdges())){ Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/LGGTest.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/LGGTest.java 2012-05-10 07:08:30 UTC (rev 3700) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/LGGTest.java 2012-05-10 13:31:25 UTC (rev 3701) @@ -19,7 +19,11 @@ */ package org.dllearner.algorithm.qtl; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -39,14 +43,23 @@ import org.dllearner.algorithm.qtl.operations.lgg.LGGGeneratorImpl; import org.dllearner.algorithm.qtl.util.ModelGenerator; import org.dllearner.algorithm.qtl.util.ModelGenerator.Strategy; +import org.dllearner.kb.sparql.ConciseBoundedDescriptionGenerator; +import org.dllearner.kb.sparql.ConciseBoundedDescriptionGeneratorImpl; import org.dllearner.kb.sparql.ExtractionDBCache; import org.dllearner.kb.sparql.SparqlEndpoint; import org.dllearner.kb.sparql.SparqlQuery; import org.junit.Assert; import org.junit.Test; +import com.hp.hpl.jena.query.Query; +import com.hp.hpl.jena.query.QueryExecutionFactory; +import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.query.ResultSetFormatter; import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.ModelFactory; +import com.hp.hpl.jena.rdf.model.Statement; +import com.hp.hpl.jena.sparql.util.ModelUtils; import com.hp.hpl.jena.vocabulary.OWL; import com.hp.hpl.jena.vocabulary.RDF; import com.hp.hpl.jena.vocabulary.RDFS; @@ -61,6 +74,64 @@ private static final Logger logger = Logger.getLogger(LGGTest.class); +// @Test + public void testOxfordData(){ + Model model = ModelFactory.createOntologyModel(); + int depth = 3; + try { + model.read(new FileInputStream(new File("/home/lorenz/arbeit/papers/question-answering-iswc-2012/examples/ontology.ttl")), null, "TURTLE"); + System.out.println(model.size()); + model.read(new FileInputStream(new File("/home/lorenz/arbeit/papers/question-answering-iswc-2012/examples/finders.ttl")), "http://diadem.cs.ox.ac.uk/ontologies/real-estate#", "TURTLE"); + System.out.println(model.size()); +// model.read(new FileInputStream(new File("/home/lorenz/arbeit/papers/question-answering-iswc-2012/examples/martinco.ttl")), null, "TURTLE"); +// System.out.println(model.size()); +// model.write(new FileOutputStream(new File("/home/lorenz/arbeit/papers/question-answering-iswc-2012/examples/all.ttl")), "TURTLE", null); +// model.read(new FileInputStream(new File("/home/lorenz/arbeit/papers/question-answering-iswc-2012/examples/all.ttl")), null, "TURTLE"); + } catch (FileNotFoundException e) { + e.printStackTrace(); + } + +// for(Statement s : model.listStatements().toList()){ +// System.out.println(s); +// } +// +// ResultSet rs1 = QueryExecutionFactory.create("SELECT * WHERE {?s <http://diadem.cs.ox.ac.uk/ontologies/real-estate#rooms> ?o. ?o ?p ?o1}", model).execSelect(); +// System.out.println(ResultSetFormatter.asText(rs1)); + + ConciseBoundedDescriptionGenerator cbd = new ConciseBoundedDescriptionGeneratorImpl(model); + QueryTreeFactory<String> qtf = new QueryTreeFactoryImpl(); + + List<String> posExamples = Arrays.asList("http://diadem.cs.ox.ac.uk/ontologies/real-estate#inst004", + "http://diadem.cs.ox.ac.uk/ontologies/real-estate#inst005"); + + List<QueryTree<String>> trees = new ArrayList<QueryTree<String>>(); + + //get the trees for the positive examples of depth 3 + QueryTree<String> tree; + for(String ex : posExamples){ + tree = qtf.getQueryTree(ex, cbd.getConciseBoundedDescription(ex, depth)); + trees.add(tree); + System.out.println(tree.getStringRepresentation()); + } + + //compute the LGG + LGGGenerator<String> lggGen = new LGGGeneratorImpl<String>(); + QueryTree<String> lgg = lggGen.getLGG(trees); + System.out.println("LGG:\n" + lgg.getStringRepresentation()); + Query q = lgg.toSPARQLQuery(); + System.out.println("Query:\n" + q); + + //run the SPARQL query against the data - should be return at least the positive examples + List<String> result = new ArrayList<String>(); + ResultSet rs = QueryExecutionFactory.create(q, model).execSelect(); + while(rs.hasNext()){ + result.add(rs.next().getResource("x0").getURI()); + } + System.out.println(result); + Assert.assertTrue(result.containsAll(posExamples)); + + } + @Test public void testLGGWithDBpediaExample(){ QueryTreeFactory<String> factory = new QueryTreeFactoryImpl(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |