From: <lor...@us...> - 2011-01-13 16:09:27
|
Revision: 2611 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=2611&view=rev Author: lorenz_b Date: 2011-01-13 16:09:20 +0000 (Thu, 13 Jan 2011) Log Message: ----------- Continued optimized NBR algorithm. Modified Paths: -------------- trunk/autosparql/src/main/java/org/dllearner/autosparql/server/NBR.java trunk/autosparql/src/main/java/org/dllearner/autosparql/server/PostLGG.java trunk/autosparql/src/main/java/org/dllearner/autosparql/server/util/SPARQLEndpointEx.java trunk/sparql-query-generator/src/main/java/org/dllearner/sparqlquerygenerator/impl/QueryTreeFactoryImpl.java Added Paths: ----------- trunk/autosparql/src/main/java/org/dllearner/autosparql/server/util/QueryTreeConverter.java trunk/autosparql/src/main/java/org/dllearner/autosparql/server/util/TreeHelper.java Modified: trunk/autosparql/src/main/java/org/dllearner/autosparql/server/NBR.java =================================================================== --- trunk/autosparql/src/main/java/org/dllearner/autosparql/server/NBR.java 2011-01-12 15:26:27 UTC (rev 2610) +++ trunk/autosparql/src/main/java/org/dllearner/autosparql/server/NBR.java 2011-01-13 16:09:20 UTC (rev 2611) @@ -17,17 +17,18 @@ import org.apache.log4j.Logger; import org.dllearner.autosparql.client.model.Example; import org.dllearner.autosparql.server.QueryTreeChange.ChangeType; +import org.dllearner.autosparql.server.util.QueryTreeConverter; import org.dllearner.autosparql.server.util.SPARQLEndpointEx; +import org.dllearner.autosparql.server.util.TreeHelper; import org.dllearner.kb.sparql.ExtractionDBCache; import org.dllearner.kb.sparql.SparqlEndpoint; import org.dllearner.kb.sparql.SparqlQuery; -import org.dllearner.sparqlquerygenerator.QueryTreeFactory; +import org.dllearner.sparqlquerygenerator.cache.ModelCache; +import org.dllearner.sparqlquerygenerator.cache.QueryTreeCache; import org.dllearner.sparqlquerygenerator.datastructures.QueryTree; import org.dllearner.sparqlquerygenerator.datastructures.impl.QueryTreeImpl; -import org.dllearner.sparqlquerygenerator.impl.QueryTreeFactoryImpl; import org.dllearner.sparqlquerygenerator.util.Filter; import org.dllearner.sparqlquerygenerator.util.ModelGenerator; -import org.dllearner.sparqlquerygenerator.util.ModelGenerator.Strategy; import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSetRewindable; @@ -36,9 +37,11 @@ public class NBR<N> { private ExtractionDBCache cache; - private SparqlEndpoint endpoint; - private QueryTreeFactory<String> treeFactory; + private SPARQLEndpointEx endpoint; private ModelGenerator modelGen; + private ModelCache modelCache; + private QueryTreeCache treeCache; + private String query; private int limit; @@ -46,16 +49,19 @@ private QueryTree<N> lgg; private QueryTree<N> postLGG; + + private LastQueryTreeChangeComparator comparator = new LastQueryTreeChangeComparator(); private static final Logger logger = Logger.getLogger(NBR.class); - public NBR(SparqlEndpoint endpoint, ExtractionDBCache cache){ + public NBR(SPARQLEndpointEx endpoint, ExtractionDBCache cache){ this.endpoint = endpoint; this.cache = cache; modelGen = new ModelGenerator(endpoint, new HashSet<String>(((SPARQLEndpointEx)endpoint).getPredicateFilters()), cache); - treeFactory = new QueryTreeFactoryImpl(); + modelCache = new ModelCache(modelGen); + treeCache = new QueryTreeCache(); } public Example makeNBR(List<String> resources, QueryTree<N> lgg, List<QueryTree<N>> negTrees){ @@ -288,13 +294,13 @@ this.lgg = lgg; logger.info("Computing next question..."); postLGG = getFilteredTree(lgg); - PostLGG<N> postGen = new PostLGG<N>(); + PostLGG<N> postGen = new PostLGG<N>((SPARQLEndpointEx) endpoint); postGen.simplifyTree(postLGG, negTrees); // logger.debug("Starting generalisation with tree:\n" + postLGG.getStringRepresentation()); limit = knownResources.size(); List<GeneralisedQueryTree<N>> queue = getAllowedGeneralisations(new GeneralisedQueryTree<N>(postLGG)); logger.debug(getQueueLogInfo(queue)); -// logger.debug("New: " + getAllowedGeneralisationsSortedByMatrix(new GeneralisedQueryTree<N>(postLGG))); + logger.debug("New: " + getAllowedGeneralisationsSortedByMatrix(new GeneralisedQueryTree<N>(postLGG), negTrees)); GeneralisedQueryTree<N> tree1; QueryTree<N> tree2; @@ -307,11 +313,13 @@ logger.debug("Selecting first tree from queue"); tree1 = queue.remove(0); tmp = tree1; + if(logger.isDebugEnabled()){ logger.debug("Changes: " + tmp.getChanges()); } queryTree = tmp.getQueryTree(); boolean coversNegTree = coversNegativeTree(tmp.getQueryTree(), negTrees); + neededGeneralisations.add(tmp.getQueryTree()); logger.debug("covers negative tree: " + coversNegTree); while(!coversNegTree){ gens = getAllowedGeneralisationsSorted(tmp); @@ -368,8 +376,8 @@ } private QueryTree<N> getQueryTree(String resource){ - Model model = modelGen.createModel(resource, Strategy.CHUNKS, 2); - QueryTree<String> tree = treeFactory.getQueryTree(resource, model); + Model model = modelCache.getModel(resource); + QueryTree<String> tree = treeCache.getQueryTree(resource, model); return getFilteredTree((QueryTree<N>) tree); } @@ -465,23 +473,64 @@ return nodes; } - private List<QueryTreeChange> getAllowedGeneralisationsSortedByMatrix(GeneralisedQueryTree<N> tree){ - List<QueryTreeChange> changes = new ArrayList<QueryTreeChange>(); + private List<GeneralisedQueryTree<N>> getAllowedGeneralisationsSortedByMatrix(GeneralisedQueryTree<N> tree, List<QueryTree<N>> negTrees){ + Map<QueryTree<N>, List<Integer>> matrix = createMatrix(tree.getQueryTree(), negTrees); + List<GeneralisedQueryTree<N>> gens = new ArrayList<GeneralisedQueryTree<N>>(); + System.err.println(TreeHelper.getAbbreviatedTreeRepresentation(tree.getQueryTree(), endpoint.getBaseURI(), endpoint.getPrefixes())); + System.err.println(TreeHelper.getAbbreviatedTreeRepresentation(negTrees.get(0), endpoint.getBaseURI(), endpoint.getPrefixes())); + + Map<GeneralisedQueryTree<N>, Integer> genTree2Sum = new HashMap<GeneralisedQueryTree<N>, Integer>(); + + QueryTree<N> queryTree = tree.getQueryTree(); QueryTreeChange lastChange = tree.getLastChange(); + List<QueryTreeChange> changes = tree.getChanges(); + GeneralisedQueryTree<N> genTree; + N label; + Object edge; + QueryTree<N> parent; + boolean isLiteralNode; for(QueryTree<N> node : getPossibleNodes2Change(tree.getQueryTree())){ + label = node.getUserObject(); + parent = node.getParent(); + isLiteralNode = node.isLiteralNode(); + edge = parent.getEdge(node); if(lastChange.getType() == ChangeType.REMOVE_NODE){ if(node.getUserObject().equals("?") && node.getId() < lastChange.getNodeId()){ - changes.add(new QueryTreeChange(node.getId(), ChangeType.REMOVE_NODE)); + int pos = parent.removeChild((QueryTreeImpl<N>) node); + genTree = new GeneralisedQueryTree<N>(new QueryTreeImpl<N>(queryTree)); + genTree.addChanges(changes); + genTree.addChange(new QueryTreeChange(node.getId(), ChangeType.REMOVE_NODE)); + genTree2Sum.put(genTree, sum(matrix.get(node))); + parent.addChild((QueryTreeImpl<N>) node, edge, pos); } } else { if(node.getUserObject().equals("?")){ - changes.add(new QueryTreeChange(node.getId(), ChangeType.REMOVE_NODE)); + int pos = parent.removeChild((QueryTreeImpl<N>) node); + genTree = new GeneralisedQueryTree<N>(new QueryTreeImpl<N>(queryTree)); + genTree.addChanges(changes); + genTree.addChange(new QueryTreeChange(node.getId(), ChangeType.REMOVE_NODE)); + genTree2Sum.put(genTree, sum(matrix.get(node))); + parent.addChild((QueryTreeImpl<N>) node, edge, pos); } else if(lastChange.getNodeId() < node.getId()){ - changes.add(new QueryTreeChange(node.getId(), ChangeType.REPLACE_LABEL)); + node.setUserObject((N) "?"); + node.setVarNode(true); + genTree = new GeneralisedQueryTree<N>(new QueryTreeImpl<N>(queryTree)); + genTree.addChanges(changes); + genTree.addChange(new QueryTreeChange(node.getId(), ChangeType.REPLACE_LABEL)); + genTree2Sum.put(genTree, sum(matrix.get(node))); + node.setUserObject(label); + node.setLiteralNode(isLiteralNode); + node.setResourceNode(!isLiteralNode); } } } - return changes; + List<Entry<GeneralisedQueryTree<N>, Integer>> entries = new ArrayList<Entry<GeneralisedQueryTree<N>,Integer>>(genTree2Sum.entrySet()); + Collections.sort(entries, new NegativeTreeOccurenceComparator()); + for(Entry<GeneralisedQueryTree<N>, Integer> e : entries){ + gens.add(e.getKey()); + System.out.println(e.getKey().getChanges()); + } + return gens; } private List<GeneralisedQueryTree<N>> getAllowedGeneralisationsSorted(GeneralisedQueryTree<N> tree){ @@ -597,6 +646,7 @@ SortedSet<String> resources = new TreeSet<String>(); query = tree.toSPARQLQueryString(); +// logger.info(QueryTreeConverter.getSPARQLQuery(tree, endpoint.getBaseURI(), endpoint.getPrefixes())); query = getDistinctQuery(query); if(logger.isDebugEnabled()){ logger.debug("Testing query\n" + getLimitedQuery(query, limit, offset)); @@ -617,7 +667,7 @@ private String getNewResource(QueryTree<N> tree, List<String> knownResources){ int i = 0; int chunkSize = 10; - SortedSet<String> foundResources = getResources(tree, 10, chunkSize * i); + SortedSet<String> foundResources = getResources(tree, chunkSize, chunkSize * i); foundResources.removeAll(knownResources); QueryTree<N> newTree; while(!foundResources.isEmpty()){ @@ -629,7 +679,7 @@ } } i++; - foundResources = getResources(tree, 10, chunkSize * i); + foundResources = getResources(tree, chunkSize, chunkSize * i); foundResources.removeAll(knownResources); } logger.debug("Found no resource which would modify the LGG"); @@ -864,5 +914,35 @@ } } + + class NegativeTreeOccurenceComparator implements Comparator<Entry<GeneralisedQueryTree<N>,Integer>>{ + @Override + public int compare(Entry<GeneralisedQueryTree<N>, Integer> entry1, + Entry<GeneralisedQueryTree<N>, Integer> entry2) { + int sum1 = entry1.getValue(); + int sum2 = entry2.getValue(); + if(sum1 == sum2){ + QueryTreeChange change1 = entry1.getKey().getLastChange(); + QueryTreeChange change2 = entry2.getKey().getLastChange(); + if(change1.getType()==ChangeType.REPLACE_LABEL){ + if(change2.getType()==ChangeType.REPLACE_LABEL){ + return change1.getNodeId() - change2.getNodeId(); + } else { + return 1; + } + } else { + if(change2.getType()==ChangeType.REPLACE_LABEL){ + return 1; + } else { + return change2.getNodeId() - change1.getNodeId(); + } + } + } else { + return sum2-sum1; + } + } + + } + } Modified: trunk/autosparql/src/main/java/org/dllearner/autosparql/server/PostLGG.java =================================================================== --- trunk/autosparql/src/main/java/org/dllearner/autosparql/server/PostLGG.java 2011-01-12 15:26:27 UTC (rev 2610) +++ trunk/autosparql/src/main/java/org/dllearner/autosparql/server/PostLGG.java 2011-01-13 16:09:20 UTC (rev 2611) @@ -3,8 +3,11 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.Map; import org.apache.log4j.Logger; +import org.dllearner.autosparql.server.util.SPARQLEndpointEx; +import org.dllearner.autosparql.server.util.TreeHelper; import org.dllearner.sparqlquerygenerator.datastructures.QueryTree; import org.dllearner.sparqlquerygenerator.datastructures.impl.QueryTreeImpl; @@ -12,11 +15,17 @@ private static final Logger logger = Logger.getLogger(PostLGG.class); + private SPARQLEndpointEx endpoint; + + public PostLGG(SPARQLEndpointEx endpoint){ + this.endpoint = endpoint; + } + public void simplifyTree(QueryTree<N> tree, List<QueryTree<N>> negTrees){ if(logger.isDebugEnabled()){ logger.debug("Making post LGG simplification"); - logger.debug("LGG:\n" + tree.getStringRepresentation()); + logger.debug("LGG:\n" + TreeHelper.getAbbreviatedTreeRepresentation(tree, endpoint.getBaseURI(), endpoint.getPrefixes())); // int i = 1; // for(QueryTree<N> negTree : negTrees){ // logger.debug("Neg tree (" + i++ + "/" + negTrees.size() +"):\n" + negTree.getStringRepresentation()); @@ -48,7 +57,7 @@ } } if(logger.isDebugEnabled()){ - logger.debug("Pruned tree:\n" + tree.getStringRepresentation()); + logger.debug("Pruned tree:\n" + TreeHelper.getAbbreviatedTreeRepresentation(tree, endpoint.getBaseURI(), endpoint.getPrefixes())); } } Added: trunk/autosparql/src/main/java/org/dllearner/autosparql/server/util/QueryTreeConverter.java =================================================================== --- trunk/autosparql/src/main/java/org/dllearner/autosparql/server/util/QueryTreeConverter.java (rev 0) +++ trunk/autosparql/src/main/java/org/dllearner/autosparql/server/util/QueryTreeConverter.java 2011-01-13 16:09:20 UTC (rev 2611) @@ -0,0 +1,92 @@ +package org.dllearner.autosparql.server.util; + +import java.util.HashMap; +import java.util.Map; +import java.util.Map.Entry; + +import org.dllearner.sparqlquerygenerator.datastructures.QueryTree; + +public class QueryTreeConverter { + + private static int cnt; + + public static String getSPARQLQuery(QueryTree tree, String baseURI, Map<String, String> prefixes){ + if(tree.getChildCount() == 0){ + return "SELECT ?x0 WHERE {?x0 ?y ?z.}"; + } + cnt = 0; + Map<String, String> usedPrefixes = new HashMap<String, String>(); + StringBuilder sb = new StringBuilder(); + sb.append("SELECT ?x0 WHERE {\n"); + buildSPARQLQueryString(tree, sb, prefixes, usedPrefixes); + sb.append("}"); + + return sb.toString(); + } + + private static void buildSPARQLQueryString(QueryTree tree, StringBuilder sb, + Map<String, String> prefixes, Map<String, String> usedPrefixes){ + String subject = (String) tree.getUserObject(); + if(tree.getUserObject().equals("?")){ + subject = "?x" + cnt++; + } else { + boolean replaced = false; + for(Entry<String, String> entry : prefixes.entrySet()){ + if(subject.startsWith(entry.getValue())){ + replaced = true; + subject = entry.getKey() + ":" + subject.replace(entry.getValue(), ""); + usedPrefixes.put(entry.getKey(), entry.getValue()); + break; + } + } + if(!replaced){ + subject = "<" + subject + ">"; + } + } + String predicate; + String object; + if(!tree.isLeaf()){ + for(Object child : tree.getChildren()){ + predicate = (String) tree.getEdge((QueryTree) child); + + boolean replaced = false; + for(Entry<String, String> entry : prefixes.entrySet()){ + if(predicate.startsWith(entry.getValue())){ + replaced = true; + predicate = entry.getKey() + ":" + predicate.replace(entry.getValue(), ""); + usedPrefixes.put(entry.getKey(), entry.getValue()); + break; + } + } + if(!replaced){ + predicate = "<" + predicate + ">"; + } + + object = (String) ((QueryTree) child).getUserObject(); + boolean objectIsResource = !object.equals("?"); + if(!objectIsResource){ + object = "?x" + cnt; + } else if(((String)object).startsWith("http://")){ + replaced = false; + for(Entry<String, String> entry : prefixes.entrySet()){ + if(object.startsWith(entry.getValue())){ + replaced = true; + object = entry.getKey() + ":" + object.replace(entry.getValue(), ""); + usedPrefixes.put(entry.getKey(), entry.getValue()); + break; + } + } + if(!replaced){ + object = "<" + object + ">"; + } + + } + sb.append(subject).append(" ").append(predicate).append(" ").append(object).append(".\n"); + if(!objectIsResource){ + buildSPARQLQueryString((QueryTree) child, sb, prefixes, usedPrefixes); + } + } + } + } + +} Modified: trunk/autosparql/src/main/java/org/dllearner/autosparql/server/util/SPARQLEndpointEx.java =================================================================== --- trunk/autosparql/src/main/java/org/dllearner/autosparql/server/util/SPARQLEndpointEx.java 2011-01-12 15:26:27 UTC (rev 2610) +++ trunk/autosparql/src/main/java/org/dllearner/autosparql/server/util/SPARQLEndpointEx.java 2011-01-13 16:09:20 UTC (rev 2611) @@ -2,6 +2,7 @@ import java.net.URL; import java.util.List; +import java.util.Map; import org.dllearner.kb.sparql.SparqlEndpoint; @@ -9,6 +10,8 @@ private String label; private List<String> predicateFilters; private String prefix; + private String baseURI; + private Map<String, String> prefixes; public SPARQLEndpointEx(URL u, List<String> defaultGraphURIs, List<String> namedGraphURIs, String label, String prefix, List<String> predicateFilters) { @@ -19,6 +22,24 @@ this.predicateFilters = predicateFilters; } + public SPARQLEndpointEx(URL u, List<String> defaultGraphURIs, + List<String> namedGraphURIs, String label, String baseURI, Map<String, String> prefixes, List<String> predicateFilters) { + super(u, defaultGraphURIs, namedGraphURIs); + + this.label = label; + this.baseURI = baseURI; + this.prefixes = prefixes; + this.predicateFilters = predicateFilters; + } + + public SPARQLEndpointEx(SparqlEndpoint endpoint, String label, String prefix, List<String> predicateFilters) { + super(endpoint.getURL(), endpoint.getDefaultGraphURIs(), endpoint.getNamedGraphURIs()); + + this.label = label; + this.prefix = prefix; + this.predicateFilters = predicateFilters; + } + public String getLabel(){ return label; } @@ -27,6 +48,14 @@ return prefix; } + public String getBaseURI(){ + return baseURI; + } + + public Map<String, String> getPrefixes(){ + return prefixes; + } + public List<String> getPredicateFilters(){ return predicateFilters; } Added: trunk/autosparql/src/main/java/org/dllearner/autosparql/server/util/TreeHelper.java =================================================================== --- trunk/autosparql/src/main/java/org/dllearner/autosparql/server/util/TreeHelper.java (rev 0) +++ trunk/autosparql/src/main/java/org/dllearner/autosparql/server/util/TreeHelper.java 2011-01-13 16:09:20 UTC (rev 2611) @@ -0,0 +1,23 @@ +package org.dllearner.autosparql.server.util; + +import java.util.Map; +import java.util.Map.Entry; + +import org.dllearner.sparqlquerygenerator.datastructures.QueryTree; + +public class TreeHelper { + + public static String getAbbreviatedTreeRepresentation(QueryTree tree, + String baseURI, Map<String, String> prefixes) { + String treeString = tree.getStringRepresentation(); + if (baseURI != null) { + treeString = treeString.replace(baseURI, ""); + } + if(prefixes != null){ + for (Entry<String, String> prefix : prefixes.entrySet()) { + treeString = treeString.replace(prefix.getValue(), prefix.getKey()+ ":"); + } + } + return treeString; + } +} Modified: trunk/sparql-query-generator/src/main/java/org/dllearner/sparqlquerygenerator/impl/QueryTreeFactoryImpl.java =================================================================== --- trunk/sparql-query-generator/src/main/java/org/dllearner/sparqlquerygenerator/impl/QueryTreeFactoryImpl.java 2011-01-12 15:26:27 UTC (rev 2610) +++ trunk/sparql-query-generator/src/main/java/org/dllearner/sparqlquerygenerator/impl/QueryTreeFactoryImpl.java 2011-01-13 16:09:20 UTC (rev 2611) @@ -20,13 +20,14 @@ package org.dllearner.sparqlquerygenerator.impl; import java.util.Comparator; +import java.util.HashSet; import java.util.Iterator; +import java.util.List; +import java.util.Set; import java.util.SortedMap; import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import org.dllearner.sparqlquerygenerator.QueryTreeFactory; import org.dllearner.sparqlquerygenerator.datastructures.impl.QueryTreeImpl; @@ -46,9 +47,11 @@ private int nodeId; private Comparator<Statement> comparator; + private Set<String> predicateFilters; public QueryTreeFactoryImpl(){ comparator = new StatementComparator(); + predicateFilters = new HashSet<String>(Filter.getAllFilterProperties()); } @Override @@ -94,7 +97,7 @@ if(resource2Statements.containsKey(tree.getUserObject())){ QueryTreeImpl<String> subTree; for(Statement st : resource2Statements.get(tree.getUserObject())){ - if(Filter.getAllFilterProperties().contains(st.getPredicate().toString())){ + if(predicateFilters.contains(st.getPredicate().toString())){ continue; } if(st.getObject().isLiteral()){ This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |