From: <ku...@us...> - 2008-08-15 14:51:00
|
Revision: 1092 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=1092&view=rev Author: kurzum Date: 2008-08-15 14:50:53 +0000 (Fri, 15 Aug 2008) Log Message: ----------- code improvements Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/kb/extraction/ClassNode.java trunk/src/dl-learner/org/dllearner/kb/extraction/Configuration.java trunk/src/dl-learner/org/dllearner/kb/extraction/ExtractionAlgorithm.java trunk/src/dl-learner/org/dllearner/kb/extraction/InstanceNode.java trunk/src/dl-learner/org/dllearner/kb/extraction/Manager.java trunk/src/dl-learner/org/dllearner/kb/extraction/Node.java trunk/src/dl-learner/org/dllearner/kb/extraction/PropertyNode.java trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlQueryMaker.java trunk/src/dl-learner/org/dllearner/test/SparqlExtractionTest.java Added Paths: ----------- trunk/src/dl-learner/org/dllearner/kb/manipulator/Manipulator.java Removed Paths: ------------- trunk/src/dl-learner/org/dllearner/kb/manipulator/Manipulator.java trunk/src/dl-learner/org/dllearner/kb/old/ Modified: trunk/src/dl-learner/org/dllearner/kb/extraction/ClassNode.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/extraction/ClassNode.java 2008-08-15 13:53:47 UTC (rev 1091) +++ trunk/src/dl-learner/org/dllearner/kb/extraction/ClassNode.java 2008-08-15 14:50:53 UTC (rev 1092) @@ -89,6 +89,16 @@ @Override public void expandProperties(TupelAquisitor tupelAquisitor, Manipulator manipulator) { } + + @Override + public List<Node> getAllNodesAsList(List<Node> l){ + l.add(this); + for (PropertyNode props : properties) { + l.addAll(props.getB().getAllNodesAsList(l)); + } + + return l; + } /* * (non-Javadoc) Modified: trunk/src/dl-learner/org/dllearner/kb/extraction/Configuration.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/extraction/Configuration.java 2008-08-15 13:53:47 UTC (rev 1091) +++ trunk/src/dl-learner/org/dllearner/kb/extraction/Configuration.java 2008-08-15 14:50:53 UTC (rev 1092) @@ -30,15 +30,13 @@ */ public class Configuration { - //public SPARQLTasks sparqlTasks; - //private SparqlEndpoint endpoint; - //private SparqlQueryMaker sparqlQueryMaker; private Manipulator manipulator; private TupelAquisitor tupelAquisitor; // the following needs to be moved to // class extraction algorithm or manipulator + private boolean optimizeForDLLearner = true; private int recursiondepth; private boolean getAllSuperClasses = true; private boolean closeAfterRecursion = true; @@ -93,15 +91,12 @@ return tupelAquisitor; } - /* - public Configuration changeQueryType(SparqlQueryMaker sqm) { - // TODO must clone here - return new Configuration(this.endpoint, sqm, this.manipulator, - this.recursiondepth, this.getAllSuperClasses, - this.closeAfterRecursion, this.cacheDir); - }*/ + public boolean isOptimizeForDLLearner() { + return optimizeForDLLearner; + } + Modified: trunk/src/dl-learner/org/dllearner/kb/extraction/ExtractionAlgorithm.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/extraction/ExtractionAlgorithm.java 2008-08-15 13:53:47 UTC (rev 1091) +++ trunk/src/dl-learner/org/dllearner/kb/extraction/ExtractionAlgorithm.java 2008-08-15 14:50:53 UTC (rev 1092) @@ -27,6 +27,7 @@ import org.apache.log4j.Logger; import org.dllearner.kb.aquisitors.TupelAquisitor; +import org.dllearner.utilities.statistics.SimpleClock; /** * This class is used to extract the information . @@ -36,19 +37,14 @@ public class ExtractionAlgorithm { private Configuration configuration; - //private Manipulators manipulator; - //private int recursionDepth = 1; - // private boolean getAllSuperClasses = true; - // private boolean closeAfterRecursion = true; + private SortedSet<String> alreadyQueriedSuperClasses = new TreeSet<String>(); + + private static Logger logger = Logger .getLogger(ExtractionAlgorithm.class); public ExtractionAlgorithm(Configuration Configuration) { this.configuration = Configuration; - //this.manipulator = Configuration.getManipulator(); - //this.recursionDepth = Configuration.getRecursiondepth(); - // this.getAllSuperClasses = Configuration.isGetAllSuperClasses(); - // this.closeAfterRecursion=Configuration.isCloseAfterRecursion(); } public Node getFirstNode(URI u) { @@ -73,115 +69,106 @@ * @return */ public Node expandNode(URI uri, TupelAquisitor tupelAquisitor) { - //System.out.println(uri.toString()); - //System.out.println(manipulator); - //System.out.println(this.configuration); - long time = System.currentTimeMillis(); + + SimpleClock sc = new SimpleClock(); - Node n = getFirstNode(uri); - logger.info(n); - List<Node> initialNodes = new ArrayList<Node>(); - initialNodes.add(n); - logger.info("StartVector: " + initialNodes); - // n.expand(tsp, this.Manipulator); - // Vector<Node> second= - for (int x = 1; x <= configuration.getRecursiondepth(); x++) { + Node seedNode = getFirstNode(uri); + List<Node> newNodes = new ArrayList<Node>(); + List<Node> collectNodes = new ArrayList<Node>(); + List<Node> tmp = new ArrayList<Node>(); + + + logger.info(seedNode); + newNodes.add(seedNode); + logger.info("Starting Nodes: " + newNodes); - List<Node> tmp = new ArrayList<Node>(); - while (!initialNodes.isEmpty()) { - Node tmpNode = initialNodes.remove(0); - logger.info("Expanding " + tmpNode); - // System.out.println(this.Manipulator); + + for (int x = 0; x < configuration.getRecursiondepth(); x++) { + + sc.reset(); + while (!newNodes.isEmpty()) { + Node nextNode = newNodes.remove(0); + logger.info("Expanding " + nextNode); // these are the new not expanded nodes // the others are saved in connection with the original node - List<Node> tmpNodeList = tmpNode.expand(tupelAquisitor, - configuration.getManipulator()); + tmp.addAll(nextNode.expand(tupelAquisitor, + configuration.getManipulator())); //System.out.println(tmpVec); - tmp.addAll(tmpNodeList); + } - //CAVE: possible error here - initialNodes = tmp; - logger.info("Recursion counter: " + x + " with " + initialNodes.size() - + " Nodes remaining, needed: " - + (System.currentTimeMillis() - time) + "ms"); - time = System.currentTimeMillis(); + collectNodes.addAll(tmp); + newNodes.addAll(tmp); + tmp.clear(); + + logger.info("Recursion counter: " + x + " with " + newNodes.size() + + " Nodes remaining, " + sc.getAndSet("")); } - SortedSet<String> hadAlready = new TreeSet<String>(); - - //p(configuration.toString()); // gets All Class Nodes and expands them further if (configuration.isGetAllSuperClasses()) { - logger.info("Get all superclasses"); - // Set<Node> classes = new TreeSet<Node>(); - List<Node> classes = new ArrayList<Node>(); - List<Node> instances = new ArrayList<Node>(); + expandAllSuperClassesOfANode(collectNodes, tupelAquisitor); + } + + return seedNode; - for (Node one : initialNodes) { - if (one instanceof ClassNode) { - classes.add(one); - } - if (one instanceof InstanceNode) { - instances.add(one); - } + } + + private void expandAllSuperClassesOfANode(List<Node> allNodes, TupelAquisitor tupelAquisitor) { + logger.info("Get all superclasses"); + + + List<Node> classes = new ArrayList<Node>(); + List<Node> instances = new ArrayList<Node>(); + for (Node one : allNodes) { + if (one instanceof ClassNode) { + classes.add(one); } - // System.out.println(instances.size()); - //TODO LinkedData incompatibility - //TupelAquisitor tupelAquisitorClasses = configuration.sparqlTupelAquisitorClasses; - //XXX this should be solved in a better way - tupelAquisitor.setClassMode(true); - if (configuration.isCloseAfterRecursion()) { - while (!instances.isEmpty()) { - logger.trace("Getting classes for remaining instances: " - + instances.size()); - Node next = instances.remove(0); - logger.trace("Getting classes for: " + next); - classes.addAll(next.expand(tupelAquisitor, configuration.getManipulator())); - if (classes.size() >= configuration.getBreakSuperClassesAfter()) { - break; - } - } + if (one instanceof InstanceNode) { + instances.add(one); } - //XXX this should be solved in a better way - tupelAquisitor.setClassMode(false); - - List<Node> tmp = new ArrayList<Node>(); - int i = 0; - while (!classes.isEmpty()) { - logger.trace("Remaining classes: " + classes.size()); - // Iterator<Node> it=classes.iterator(); - // Node next =(Node) it.next(); - // classes.remove(next); - Node next = classes.remove(0); - if (!hadAlready.contains(next.getURI().toString())) { - logger.trace("Getting SuperClass for: " + next); - // System.out.println(hadAlready.size()); - hadAlready.add(next.getURI().toString()); - tmp = next.expand(tupelAquisitor, configuration.getManipulator()); - classes.addAll(tmp); - tmp = new ArrayList<Node>(); - // if(i % 50==0)System.out.println("got "+i+" extra classes, - // max: "+manipulator.breakSuperClassRetrievalAfter); - i++; - if (i >= configuration.getBreakSuperClassesAfter()) { - break; - } - } - // System.out.println("Skipping"); + } + + //TODO LinkedData incompatibility + + tupelAquisitor.setClassMode(true); + if (configuration.isCloseAfterRecursion()) { + while (!instances.isEmpty()) { + logger.trace("Getting classes for remaining instances: " + + instances.size()); + Node next = instances.remove(0); + logger.trace("Getting classes for: " + next); + classes.addAll(next.expand(tupelAquisitor, configuration.getManipulator())); + if (classes.size() >= configuration.getBreakSuperClassesAfter()) { + break; + }//endif + }//endwhile + }//endif + tupelAquisitor.setClassMode(false); + + + + int i = 0; + while (!classes.isEmpty()) { + logger.trace("Remaining classes: " + classes.size()); + Node next = classes.remove(0); + if (!alreadyQueriedSuperClasses.contains(next.getURI().toString())) { + logger.trace("Getting Superclasses for: " + next); + alreadyQueriedSuperClasses.add(next.getURI().toString()); + classes.addAll(next.expand(tupelAquisitor, configuration.getManipulator())); + + if (i > configuration.getBreakSuperClassesAfter()) { + break; + }//endinnerif + i++; + }//endouterif - // if - // (classes.size()>=manipulator.breakSuperClassRetrievalAfter){break;} - - } - // System.out.println((System.currentTimeMillis()-time)+""); - + }//endwhile + if(!configuration.isOptimizeForDLLearner()){ + alreadyQueriedSuperClasses.clear(); } - return n; } - - } Modified: trunk/src/dl-learner/org/dllearner/kb/extraction/InstanceNode.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/extraction/InstanceNode.java 2008-08-15 13:53:47 UTC (rev 1091) +++ trunk/src/dl-learner/org/dllearner/kb/extraction/InstanceNode.java 2008-08-15 14:50:53 UTC (rev 1092) @@ -25,6 +25,7 @@ import java.util.SortedSet; import java.util.TreeSet; +import org.apache.log4j.Logger; import org.dllearner.kb.aquisitors.TupelAquisitor; import org.dllearner.kb.manipulator.Manipulator; import org.dllearner.utilities.datastructures.RDFNodeTuple; @@ -37,6 +38,9 @@ * */ public class InstanceNode extends Node { + + private static Logger logger = Logger + .getLogger(InstanceNode.class); private SortedSet<ClassNode> classes = new TreeSet<ClassNode>(); //SortedSet<StringTuple> datatypes = new TreeSet<StringTuple>(); @@ -88,6 +92,20 @@ return newNodes; } + + @Override + public List<Node> getAllNodesAsList(List<Node> l){ + l.add(this); + logger.trace(this+"\nclasses: "+classes.size()+"\nrelInstances: "+properties.size()); + for (ClassNode clazz : classes) { + l.addAll(clazz.getAllNodesAsList(l)); + } + for (PropertyNode props : properties) { + l.addAll(props.getB().getAllNodesAsList(l)); + } + + return l; + } // gets the types for properties recursively @Override Modified: trunk/src/dl-learner/org/dllearner/kb/extraction/Manager.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/extraction/Manager.java 2008-08-15 13:53:47 UTC (rev 1091) +++ trunk/src/dl-learner/org/dllearner/kb/extraction/Manager.java 2008-08-15 14:50:53 UTC (rev 1092) @@ -102,47 +102,9 @@ return nt.toString(); } -/* public void addPredicateFilter(String str) { - this.configuration.getSparqlQueryMaker().addPredicateFilter(str); - } -*/ public Configuration getConfiguration() { return configuration; } - /* - * public void calculateSubjects(String label, int limit) { - * System.out.println("SparqlModul: Collecting Subjects"); - * oldSparqlOntologyCollector oc = new oldSparqlOntologyCollector(url); try { - * subjects = oc.getSubjectsFromLabel(label, limit); } catch (IOException e) { - * subjects = new String[1]; subjects[0] = "[Error]Sparql Endpoint could not - * be reached."; } System.out.println("SparqlModul: ****Finished"); } - * - * /** TODO SparqlOntologyCollector needs to be removed @param subject - */ - /* - * public void calculateTriples(String subject) { - * System.out.println("SparqlModul: Collecting Triples"); - * oldSparqlOntologyCollector oc = new oldSparqlOntologyCollector(url); try { - * triples = oc.collectTriples(subject); } catch (IOException e) { triples = - * new String[1]; triples[0] = "[Error]Sparql Endpoint could not be - * reached."; } System.out.println("SparqlModul: ****Finished"); } - */ - /** - * TODO SparqlOntologyCollector needs to be removed - * - * @param concept - */ - - /* - * public void calculateConceptSubjects(String concept) { - * System.out.println("SparqlModul: Collecting Subjects"); - * oldSparqlOntologyCollector oc = new oldSparqlOntologyCollector(url); try { - * conceptSubjects = oc.getSubjectsFromConcept(concept); } catch - * (IOException e) { conceptSubjects = new String[1]; conceptSubjects[0] = - * "[Error]Sparql Endpoint could not be reached."; } - * System.out.println("SparqlModul: ****Finished"); } - */ - } \ No newline at end of file Modified: trunk/src/dl-learner/org/dllearner/kb/extraction/Node.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/extraction/Node.java 2008-08-15 13:53:47 UTC (rev 1091) +++ trunk/src/dl-learner/org/dllearner/kb/extraction/Node.java 2008-08-15 14:50:53 UTC (rev 1092) @@ -83,6 +83,8 @@ public URI getURI() { return uri; } + + public abstract List<Node> getAllNodesAsList(List<Node> l); public boolean equals(Node n) { if (this.uri.equals(n.uri)) Modified: trunk/src/dl-learner/org/dllearner/kb/extraction/PropertyNode.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/extraction/PropertyNode.java 2008-08-15 13:53:47 UTC (rev 1091) +++ trunk/src/dl-learner/org/dllearner/kb/extraction/PropertyNode.java 2008-08-15 14:50:53 UTC (rev 1092) @@ -76,6 +76,12 @@ } } + + @Override + public List<Node> getAllNodesAsList(List<Node> l){ + throw new RuntimeException("PropertyNode.getAllNodesAsList() should never be called"); + } + public Node getA() { return a; Deleted: trunk/src/dl-learner/org/dllearner/kb/manipulator/Manipulator.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/manipulator/Manipulator.java 2008-08-15 13:53:47 UTC (rev 1091) +++ trunk/src/dl-learner/org/dllearner/kb/manipulator/Manipulator.java 2008-08-15 14:50:53 UTC (rev 1092) @@ -1,207 +0,0 @@ -/** - * Copyright (C) 2007, Sebastian Hellmann - * - * This file is part of DL-Learner. - * - * DL-Learner is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3 of the License, or - * (at your option) any later version. - * - * DL-Learner is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - */ -package org.dllearner.kb.manipulator; - -import java.util.ArrayList; -import java.util.List; -import java.util.SortedSet; - -import org.dllearner.kb.extraction.ClassNode; -import org.dllearner.kb.extraction.InstanceNode; -import org.dllearner.kb.extraction.Node; -import org.dllearner.kb.manipulator.Rule.Months; -import org.dllearner.utilities.datastructures.RDFNodeTuple; -import org.dllearner.utilities.owl.OWLVocabulary; - -/** - * Used to manipulate retrieved tupels, identify blanknodes, etc. - * - * @author Sebastian Hellmann - * - */ -public class Manipulator { - - private List<Rule> rules = new ArrayList<Rule>(); - - private Manipulator() { - } - - public Manipulator(List<Rule> rules) { - for (Rule rule : rules) { - addRule(rule); - } - } - - /** - * this checks for consistency and manipulates the tuples, before they get - * triple - */ - public SortedSet<RDFNodeTuple> manipulate( Node node, SortedSet<RDFNodeTuple> tuples) { - - for (Rule rule : rules) { - tuples = rule.applyRule(node, tuples); - } - return tuples; - } - - - - public static Manipulator getManipulatorByName(String predefinedManipulator) - { - if (predefinedManipulator.equalsIgnoreCase("DBPEDIA-NAVIGATOR")) { - return getDBpediaNavigatorManipulator(); - - } else if(predefinedManipulator.equalsIgnoreCase("DEFAULT")){ - return getDefaultManipulator(); - } - else { - //QUALITY maybe not the best, - return getDefaultManipulator(); - } - } - - public static Manipulator getDBpediaNavigatorManipulator(){ - Manipulator m = new Manipulator(); - m.addRule(new DBPediaNavigatorCityLocatorRule(Months.JANUARY)); - m.addRule(new DBpediaNavigatorOtherRule(Months.DECEMBER)); - return m; - } - - public static Manipulator getDefaultManipulator(){ - Manipulator m = new Manipulator(); - m.addDefaultRules(Months.DECEMBER); - return m; - } - - //HACK -// if(t.a.equals("http://www.holygoat.co.uk/owl/redwood/0.1/tags/taggedWithTag")) { -// //hackGetLabel(t.b); -// -// } - - // GovTrack hack - // => we convert a string literal to a URI - // => TODO: introduce an option for converting literals for certain - // properties into URIs -// String sp = "http://purl.org/dc/elements/1.1/subject"; -// if(t.a.equals(sp)) { -// System.out.println(t); -// System.exit(0); -// } - - - private void addDefaultRules(Months month){ - - addRule(new TypeFilterRule(month, OWLVocabulary.RDF_TYPE, OWLVocabulary.OWL_CLASS,ClassNode.class.getCanonicalName() )) ; - addRule(new TypeFilterRule(month, OWLVocabulary.RDF_TYPE, OWLVocabulary.OWL_THING,InstanceNode.class.getCanonicalName() )) ; - addRule(new TypeFilterRule(month, "", OWLVocabulary.OWL_CLASS, ClassNode.class.getCanonicalName()) ) ; - } - - public synchronized void addRule(Rule newRule){ - rules.add(newRule); - List<Rule> l = new ArrayList<Rule>(); - - for (Months month : Rule.MONTHS) { - for (Rule rule : rules) { - if(rule.month.equals(month)) { - l.add(rule); - } - } - - } - rules = l; - } - - - - /* - private RDFNodeTuple manipulateTuple(String subject, RDFNodeTuple tuple) { - - for (int i = 0; i < replacementRules.size(); i++) { - ReplacementRule replace = replacementRules.get(i); - tuple = replace.applyRule(subject, tuple); - } - return tuple; - }*/ - - /*private String hackGetLabel(String resname){ - String query="" + - "SELECT ?o \n" + - "WHERE { \n" + - "<"+resname+"> "+ " <http://www.holygoat.co.uk/owl/redwood/0.1/tags/tagName> ?o " + - "}"; - - System.out.println(query); - //http://dbtune.org/musicbrainz/sparql?query= - //SELECT ?o WHERE { <http://dbtune.org/musicbrainz/resource/tag/1391> <http://www.holygoat.co.uk/owl/redwood/0.1/tags/tagName> ?o } - SparqlQuery s=new SparqlQuery(query,SparqlEndpoint.EndpointMusicbrainz()); - ResultSet rs=s.send(); - while (rs.hasNext()){ - rs.nextBinding(); - } - //System.out.println("AAA"+s.getAsXMLString(s.send()) ); - return ""; - }*/ - - /*private void replacePredicate(StringTuple t) { - for (StringTuple rep : replacePredicate) { - if (rep.a.equals(t.a)) { - t.a = rep.b; - } - } -} - -private void replaceObject(StringTuple t) { - for (StringTuple rep : replaceObject) { - if (rep.a.equals(t.a)) { - t.a = rep.b; - } - } -}*/ - - - - /* - // remove <rdf:type, owl:class> - // this is done to avoid transformation to owl:subclassof - if (t.a.equals(type) && t.b.equals(classns) - && node instanceof ClassNode) { - toRemove.add(t); - } - - // all with type class - if (t.b.equals(classns) && node instanceof ClassNode) { - toRemove.add(t); - } - - // remove all instances with owl:type thing - if (t.a.equals(type) && t.b.equals(thing) - && node instanceof InstanceNode) { - toRemove.add(t); - } - - } - tuples.removeAll(toRemove); - - return tuples; -} -*/ - -} Copied: trunk/src/dl-learner/org/dllearner/kb/manipulator/Manipulator.java (from rev 1084, trunk/src/dl-learner/org/dllearner/kb/manipulator/Manipulator.java) =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/manipulator/Manipulator.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/kb/manipulator/Manipulator.java 2008-08-15 14:50:53 UTC (rev 1092) @@ -0,0 +1,152 @@ +/** + * Copyright (C) 2007, Sebastian Hellmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ +package org.dllearner.kb.manipulator; + +import java.util.ArrayList; +import java.util.List; +import java.util.SortedSet; + +import org.dllearner.kb.extraction.ClassNode; +import org.dllearner.kb.extraction.InstanceNode; +import org.dllearner.kb.extraction.Node; +import org.dllearner.kb.manipulator.Rule.Months; +import org.dllearner.utilities.datastructures.RDFNodeTuple; +import org.dllearner.utilities.owl.OWLVocabulary; + +/** + * Used to manipulate retrieved tupels, identify blanknodes, etc. + * + * @author Sebastian Hellmann + * + */ +public class Manipulator { + + private List<Rule> rules = new ArrayList<Rule>(); + + private Manipulator() { + } + + public Manipulator(List<Rule> rules) { + for (Rule rule : rules) { + addRule(rule); + } + } + + /** + * this checks for consistency and manipulates the tuples, before they get + * triple + */ + public SortedSet<RDFNodeTuple> manipulate( Node node, SortedSet<RDFNodeTuple> tuples) { + + for (Rule rule : rules) { + tuples = rule.applyRule(node, tuples); + } + return tuples; + } + + + + public static Manipulator getManipulatorByName(String predefinedManipulator) + { + if (predefinedManipulator.equalsIgnoreCase("DBPEDIA-NAVIGATOR")) { + return getDBpediaNavigatorManipulator(); + + } else if(predefinedManipulator.equalsIgnoreCase("DEFAULT")){ + return getDefaultManipulator(); + } + else { + //QUALITY maybe not the best, + return getDefaultManipulator(); + } + } + + public static Manipulator getDBpediaNavigatorManipulator(){ + Manipulator m = new Manipulator(); + m.addRule(new DBPediaNavigatorCityLocatorRule(Months.JANUARY)); + m.addRule(new DBpediaNavigatorOtherRule(Months.DECEMBER)); + return m; + } + + public static Manipulator getDefaultManipulator(){ + Manipulator m = new Manipulator(); + m.addDefaultRules(Months.DECEMBER); + return m; + } + + //HACK +// if(t.a.equals("http://www.holygoat.co.uk/owl/redwood/0.1/tags/taggedWithTag")) { +// //hackGetLabel(t.b); +// +// } + + // GovTrack hack + // => we convert a string literal to a URI + // => TODO: introduce an option for converting literals for certain + // properties into URIs +// String sp = "http://purl.org/dc/elements/1.1/subject"; +// if(t.a.equals(sp)) { +// System.out.println(t); +// System.exit(0); +// } + + + private void addDefaultRules(Months month){ + + addRule(new TypeFilterRule(month, OWLVocabulary.RDF_TYPE, OWLVocabulary.OWL_CLASS,ClassNode.class.getCanonicalName() )) ; + addRule(new TypeFilterRule(month, OWLVocabulary.RDF_TYPE, OWLVocabulary.OWL_THING,InstanceNode.class.getCanonicalName() )) ; + addRule(new TypeFilterRule(month, "", OWLVocabulary.OWL_CLASS, ClassNode.class.getCanonicalName()) ) ; + } + + public synchronized void addRule(Rule newRule){ + rules.add(newRule); + List<Rule> l = new ArrayList<Rule>(); + + for (Months month : Rule.MONTHS) { + for (Rule rule : rules) { + if(rule.month.equals(month)) { + l.add(rule); + } + } + + } + rules = l; + } + + + /*private String hackGetLabel(String resname){ + String query="" + + "SELECT ?o \n" + + "WHERE { \n" + + "<"+resname+"> "+ " <http://www.holygoat.co.uk/owl/redwood/0.1/tags/tagName> ?o " + + "}"; + + System.out.println(query); + //http://dbtune.org/musicbrainz/sparql?query= + //SELECT ?o WHERE { <http://dbtune.org/musicbrainz/resource/tag/1391> <http://www.holygoat.co.uk/owl/redwood/0.1/tags/tagName> ?o } + SparqlQuery s=new SparqlQuery(query,SparqlEndpoint.EndpointMusicbrainz()); + ResultSet rs=s.send(); + while (rs.hasNext()){ + rs.nextBinding(); + } + //System.out.println("AAA"+s.getAsXMLString(s.send()) ); + return ""; + }*/ + +} Property changes on: trunk/src/dl-learner/org/dllearner/kb/manipulator/Manipulator.java ___________________________________________________________________ Added: svn:mergeinfo + Modified: trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlQueryMaker.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlQueryMaker.java 2008-08-15 13:53:47 UTC (rev 1091) +++ trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlQueryMaker.java 2008-08-15 14:50:53 UTC (rev 1092) @@ -216,15 +216,25 @@ assembled = false; predicateFilterList.add(newFilter); } + + public void addObjectFilter(String newFilter) { + assembled = false; + objectFilterList.add(newFilter); + } + + public void combineWith(SparqlQueryMaker sqm){ + predicateFilterList.addAll(sqm.predicateFilterList); + objectFilterList.addAll(sqm.objectFilterList); + } public static SparqlQueryMaker getSparqlQueryMakerByName(String name) { if (name.equalsIgnoreCase("YAGO")) - return getYAGOFilter(); + return getAllowYAGOFilter(); else if (name.equalsIgnoreCase("SKOS")) - return getSKOSFilter(); + return getAllowSKOSFilter(); else if (name.equalsIgnoreCase("YAGOSKOS")) - return getYAGOSKOS(); + return getAllowYAGOandSKOSFilter(); else if (name.equalsIgnoreCase("YAGOSPECIALHIERARCHY")) return getYagoSpecialHierarchyFilter(); else if (name.equalsIgnoreCase("TEST")) @@ -234,160 +244,111 @@ else return null; } + + private void addFiltersForDBpediaSKOS() { + addPredicateFilter("http://www.w3.org/2004/02/skos/core"); + addObjectFilter("http://www.w3.org/2004/02/skos/core"); + addObjectFilter("http://dbpedia.org/resource/Category:"); + addObjectFilter("http://dbpedia.org/resource/Template"); + } + private void addFiltersForYago() { + addObjectFilter("http://dbpedia.org/class/yago"); + + } + private void addFiltersForOWLSameAs() { + addPredicateFilter("http://www.w3.org/2002/07/owl#sameAs"); + } + private void addFiltersForFOAF() { + addPredicateFilter("http://xmlns.com/foaf/0.1/"); + addObjectFilter("http://xmlns.com/foaf/0.1/"); + + } + + private void addFiltersForWordNet() { + addObjectFilter("http://www.w3.org/2006/03/wn/wn20/instances/synset"); + + } + private void addFiltersForGeonames() { + addObjectFilter("http://www.geonames.org"); + + } + private void addFiltersForFlickrwrappr() { + addObjectFilter("http://www4.wiwiss.fu-berlin.de/flickrwrappr"); + + } + + private void addFiltersForDBpedia() { + addPredicateFilter("http://dbpedia.org/property/reference"); + addPredicateFilter("http://dbpedia.org/property/website"); + addPredicateFilter("http://dbpedia.org/property/wikipage"); + addPredicateFilter("http://dbpedia.org/property/wikiPageUsesTemplate"); + addPredicateFilter("http://dbpedia.org/property/relatedInstance"); + addPredicateFilter("http://dbpedia.org/property/owner"); + addPredicateFilter("http://dbpedia.org/property/standard"); + addObjectFilter("http://upload.wikimedia.org/wikipedia/commons"); + addObjectFilter("http://upload.wikimedia.org/wikipedia"); + } + + public static SparqlQueryMaker getAllowSKOSFilter() { + SparqlQueryMaker sqm = new SparqlQueryMaker("forbid", new TreeSet<String>(), new TreeSet<String>(), false); + sqm.combineWith(getAllowYAGOandSKOSFilter()); + sqm.addFiltersForYago(); + + sqm.addPredicateFilter("http://www.w3.org/2004/02/skos/core#narrower"); + sqm.addObjectFilter("http://dbpedia.org/resource/Template"); + + return sqm; + } - public static SparqlQueryMaker getYAGOFilter() { - SortedSet<String> pred = new TreeSet<String>(); - pred.add("http://www.w3.org/2004/02/skos/core"); - pred.add("http://www.w3.org/2002/07/owl#sameAs"); - pred.add("http://xmlns.com/foaf/0.1/"); - pred.add("http://dbpedia.org/property/reference"); - pred.add("http://dbpedia.org/property/website"); - pred.add("http://dbpedia.org/property/wikipage"); - pred.add("http://dbpedia.org/property/wikiPageUsesTemplate"); - pred.add("http://dbpedia.org/property/relatedInstance"); - pred.add("http://dbpedia.org/property/owner"); - pred.add("http://dbpedia.org/property/standard"); - - SortedSet<String> obj = new TreeSet<String>(); - // obj.add("http://dbpedia.org/resource/Category:Wikipedia_"); - // obj.add("http://dbpedia.org/resource/Category:Articles_"); - obj.add("http://dbpedia.org/resource/Category:"); - obj.add("http://dbpedia.org/resource/Template"); - obj.add("http://xmlns.com/foaf/0.1/"); - obj.add("http://upload.wikimedia.org/wikipedia/commons"); - obj.add("http://upload.wikimedia.org/wikipedia"); - obj.add("http://www.geonames.org"); - obj.add("http://www.w3.org/2006/03/wn/wn20/instances/synset"); - obj.add("http://www4.wiwiss.fu-berlin.de/flickrwrappr"); - obj.add("http://www.w3.org/2004/02/skos/core"); - - return new SparqlQueryMaker("forbid", obj, pred, false); + public static SparqlQueryMaker getAllowYAGOFilter() { + SparqlQueryMaker sqm = new SparqlQueryMaker("forbid", new TreeSet<String>(), new TreeSet<String>(), false); + sqm.combineWith(getAllowYAGOandSKOSFilter()); + sqm.addFiltersForDBpediaSKOS(); + return sqm; } public static SparqlQueryMaker getDBpediaNavigatorFilter() { - SortedSet<String> pred = new TreeSet<String>(); - pred.add("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"); - pred.add("http://www.w3.org/2000/01/rdf-schema#subClassOf"); - pred.add("http://www.w3.org/2003/01/geo/wgs84_pos#lat"); - pred.add("http://www.w3.org/2003/01/geo/wgs84_pos#long"); + SparqlQueryMaker sqm = new SparqlQueryMaker("allow", new TreeSet<String>(), new TreeSet<String>(), false); + sqm.addPredicateFilter("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"); + sqm.addPredicateFilter("http://www.w3.org/2000/01/rdf-schema#subClassOf"); + sqm.addPredicateFilter("http://www.w3.org/2003/01/geo/wgs84_pos#lat"); + sqm.addPredicateFilter("http://www.w3.org/2003/01/geo/wgs84_pos#long"); // pred.add("http://dbpedia.org/property/wikipage"); // pred.add("http://dbpedia.org/property/wikiPageUsesTemplate"); // pred.add("http://dbpedia.org/property/relatedInstance"); // pred.add("http://dbpedia.org/property/owner"); // pred.add("http://dbpedia.org/property/standard"); - return new SparqlQueryMaker("allow", new TreeSet<String>(), pred, true); + return sqm; } public static SparqlQueryMaker getYagoSpecialHierarchyFilter() { - SortedSet<String> pred = new TreeSet<String>(); - pred.add("http://www.w3.org/2004/02/skos/core"); - pred.add("http://www.w3.org/2002/07/owl#sameAs"); - pred.add("http://xmlns.com/foaf/0.1/"); - - pred.add("http://dbpedia.org/property/reference"); - pred.add("http://dbpedia.org/property/website"); - pred.add("http://dbpedia.org/property/wikipage"); - pred.add("http://dbpedia.org/property/wikiPageUsesTemplate"); - pred.add("http://dbpedia.org/property/relatedInstance"); - pred.add("http://dbpedia.org/property/monarch"); - - SortedSet<String> obj = new TreeSet<String>(); - obj.add("http://dbpedia.org/resource/Category:Wikipedia_"); - obj.add("http://dbpedia.org/resource/Category:Articles_"); - obj.add("http://dbpedia.org/resource/Template"); - obj.add("http://xmlns.com/foaf/0.1/"); - obj.add("http://upload.wikimedia.org/wikipedia/commons"); - obj.add("http://upload.wikimedia.org/wikipedia"); - obj.add("http://www.geonames.org"); - obj.add("http://www.w3.org/2006/03/wn/wn20/instances/synset"); - obj.add("http://www4.wiwiss.fu-berlin.de/flickrwrappr"); - obj.add("http://www.w3.org/2004/02/skos/core"); - - return new SparqlQueryMaker("forbid", obj, pred, false); + SparqlQueryMaker sqm = new SparqlQueryMaker("forbid", new TreeSet<String>(), new TreeSet<String>(), false); + sqm.combineWith(getAllowYAGOFilter()); + sqm.addPredicateFilter("http://dbpedia.org/property/monarch"); + return sqm; } - public static SparqlQueryMaker getSKOSFilter() { - SortedSet<String> pred = new TreeSet<String>(); - // pred.add("http://www.w3.org/2004/02/skos/core"); - pred.add("http://www.w3.org/2002/07/owl#sameAs"); - pred.add("http://xmlns.com/foaf/0.1/"); - pred.add("http://dbpedia.org/property/reference"); - pred.add("http://dbpedia.org/property/website"); - pred.add("http://dbpedia.org/property/wikipage"); - pred.add("http://www.w3.org/2004/02/skos/core#narrower"); - pred.add("http://dbpedia.org/property/wikiPageUsesTemplate"); - SortedSet<String> obj = new TreeSet<String>(); - // obj.add("http://dbpedia.org/resource/Category:Wikipedia_"); - // obj.add("http://dbpedia.org/resource/Category:Articles_"); - obj.add("http://xmlns.com/foaf/0.1/"); - obj.add("http://upload.wikimedia.org/wikipedia/commons"); - obj.add("http://upload.wikimedia.org/wikipedia"); + public static SparqlQueryMaker getAllowYAGOandSKOSFilter() { + SparqlQueryMaker sqm = new SparqlQueryMaker("forbid", new TreeSet<String>(), new TreeSet<String>(), false); + sqm.addFiltersForFOAF(); + sqm.addFiltersForDBpedia(); - obj.add("http://www.geonames.org"); - obj.add("http://www.w3.org/2006/03/wn/wn20/instances/synset"); - obj.add("http://www4.wiwiss.fu-berlin.de/flickrwrappr"); - - obj.add("http://dbpedia.org/class/yago"); - obj.add("http://dbpedia.org/resource/Template"); - - return new SparqlQueryMaker("forbid", obj, pred, false); + sqm.addFiltersForGeonames(); + sqm.addFiltersForWordNet(); + sqm.addFiltersForFlickrwrappr(); + sqm.addFiltersForOWLSameAs(); + + sqm.addPredicateFilter("http://www.w3.org/2004/02/skos/core#narrower"); + sqm.addObjectFilter("http://dbpedia.org/resource/Template"); + return sqm; } - public static SparqlQueryMaker getYAGOSKOS() { - SortedSet<String> pred = new TreeSet<String>(); - // pred.add("http://www.w3.org/2004/02/skos/core"); - pred.add("http://www.w3.org/2002/07/owl#sameAs"); - pred.add("http://xmlns.com/foaf/0.1/"); - - pred.add("http://dbpedia.org/property/reference"); - pred.add("http://dbpedia.org/property/website"); - pred.add("http://dbpedia.org/property/wikipage"); - // pred.add("http://www.w3.org/2004/02/skos/core#narrower"); - pred.add("http://dbpedia.org/property/wikiPageUsesTemplate"); - - SortedSet<String> obj = new TreeSet<String>(); - // obj.add("http://dbpedia.org/resource/Category:Wikipedia_"); - // obj.add("http://dbpedia.org/resource/Category:Articles_"); - obj.add("http://xmlns.com/foaf/0.1/"); - obj.add("http://upload.wikimedia.org/wikipedia/commons"); - obj.add("http://upload.wikimedia.org/wikipedia"); - - obj.add("http://www.geonames.org"); - obj.add("http://www.w3.org/2006/03/wn/wn20/instances/synset"); - obj.add("http://www4.wiwiss.fu-berlin.de/flickrwrappr"); - - // obj.add("http://dbpedia.org/class/yago"); - obj.add("http://dbpedia.org/resource/Template"); - - return new SparqlQueryMaker("forbid", obj, pred, false); - } - public static SparqlQueryMaker test() { - SortedSet<String> pred = new TreeSet<String>(); - pred.add("http://www.w3.org/2004/02/skos/core"); - pred.add("http://www.w3.org/2002/07/owl#sameAs"); - pred.add("http://xmlns.com/foaf/0.1/"); - // pred.add("http://dbpedia.org/property/reference"); - // pred.add("http://dbpedia.org/property/website"); - // pred.add("http://dbpedia.org/property/wikipage"); - pred.add("http://dbpedia.org/property/wikiPageUsesTemplate"); - pred.add("http://dbpedia.org/property/relatedInstance"); - - SortedSet<String> obj = new TreeSet<String>(); - // obj.add("http://dbpedia.org/resource/Category:Wikipedia_"); - // obj.add("http://dbpedia.org/resource/Category:Articles_"); - obj.add("http://dbpedia.org/resource/Category:"); - obj.add("http://dbpedia.org/resource/Template"); - obj.add("http://xmlns.com/foaf/0.1/"); - obj.add("http://upload.wikimedia.org/wikipedia/commons"); - obj.add("http://upload.wikimedia.org/wikipedia"); - obj.add("http://www.geonames.org"); - obj.add("http://www.w3.org/2006/03/wn/wn20/instances/synset"); - obj.add("http://www4.wiwiss.fu-berlin.de/flickrwrappr"); - obj.add("http://www.w3.org/2004/02/skos/core"); - return new SparqlQueryMaker("forbid", obj, pred, false); + SparqlQueryMaker sqm = new SparqlQueryMaker("forbid", new TreeSet<String>(), new TreeSet<String>(), false); + + return sqm; } public static void main(String[] args) { @@ -402,53 +363,5 @@ } - /* - * private String internalFilterAssemblySubject() { - * - * boolean emptyPredicateFilter = getPredicateFilterList().isEmpty(); - * boolean emptyObjectFilter = getObjectFilterList().isEmpty(); - * - * String filterString = ""; if (!isLiterals()) { filterString += - * "(!isLiteral(?object))"; if (!getPredicateFilterList().isEmpty()) { - * filterString += "&&("; } - * } else if (!emptyPredicateFilter) { filterString += "("; } boolean - * firstRun = true; for (String p : getPredicateFilterList()) { filterString += - * lineend; filterString += (firstRun) ? handlePredicate(p).substring(2) : - * handlePredicate(p); firstRun = false; } if (!emptyPredicateFilter) { - * filterString += ")"; } if ((!emptyPredicateFilter || !isLiterals()) && - * !emptyObjectFilter) { filterString += "&&("; }else if - * (!emptyObjectFilter) { filterString += "("; } - * - * firstRun = true; for (String o : getObjectFilterList()) { filterString += - * lineend; filterString += (firstRun) ? handleObject(o).substring(2) : - * handleObject(o) ; firstRun = false; } if (!emptyObjectFilter){ - * filterString += ")"; } - * - * return filterString; } - */ - /* - * private String filterSubject(String ns) { return "&&( - * !regex(str(?subject), '" + ns + "') )"; } - * - * - * private String handlePredicate (String ns) { return (isAllowMode()) ? - * allowPredicate(ns) : filterPredicate(ns) ; } - * - * private String handleObject (String ns) { return (isAllowMode()) ? - * allowObject(ns) : filterObject(ns) ; } - * - * private static String filterPredicate(String ns) { return "&&( - * !regex(str(?predicate), '" + ns + "') )"; } - * - * private static String filterObject(String ns) { return "&&( - * !regex(str(?object), '" + ns + "') )"; } - * - * private static String allowPredicate(String ns) { return "||( - * regex(str(?predicate), '" + ns + "') )"; } - * - * private static String allowObject(String ns) { return "||( - * regex(str(?object), '" + ns + "') )"; } - */ - } Modified: trunk/src/dl-learner/org/dllearner/test/SparqlExtractionTest.java =================================================================== --- trunk/src/dl-learner/org/dllearner/test/SparqlExtractionTest.java 2008-08-15 13:53:47 UTC (rev 1091) +++ trunk/src/dl-learner/org/dllearner/test/SparqlExtractionTest.java 2008-08-15 14:50:53 UTC (rev 1092) @@ -23,6 +23,10 @@ import java.io.FileWriter; import java.net.URI; +import org.apache.log4j.ConsoleAppender; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.apache.log4j.SimpleLayout; import org.dllearner.kb.aquisitors.SparqlTupelAquisitor; import org.dllearner.kb.extraction.Configuration; import org.dllearner.kb.extraction.Manager; @@ -38,17 +42,28 @@ * */ public class SparqlExtractionTest { + + private static Logger logger = Logger.getRootLogger(); + public static void main(String[] args) { System.out.println("Start"); +// create logger (a simple logger which outputs + // its messages to the console) + SimpleLayout layout = new SimpleLayout(); + ConsoleAppender consoleAppender = new ConsoleAppender(layout); + logger.removeAllAppenders(); + logger.addAppender(consoleAppender); + logger.setLevel(Level.TRACE); + // String test2 = "http://www.extraction.org/config#dbpediatest"; // String test = "http://www.extraction.org/config#localjoseki"; try { // URI u = new URI(test); Manager m = new Manager(); Configuration conf = new Configuration ( - new SparqlTupelAquisitor(SparqlQueryMaker.getYAGOFilter(), SPARQLTasks.getPredefinedSPARQLTasksWithCache("DBPEDIA")), + new SparqlTupelAquisitor(SparqlQueryMaker.getAllowYAGOFilter(), SPARQLTasks.getPredefinedSPARQLTasksWithCache("DBPEDIA")), Manipulator.getDefaultManipulator(), 1, true, This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |