From: <ku...@us...> - 2008-08-19 15:35:27
|
Revision: 1102 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=1102&view=rev Author: kurzum Date: 2008-08-19 15:35:22 +0000 (Tue, 19 Aug 2008) Log Message: ----------- working improved aquisitor Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/kb/aquisitors/LinkedDataTupelAquisitor.java trunk/src/dl-learner/org/dllearner/kb/aquisitors/SparqlTupelAquisitor.java trunk/src/dl-learner/org/dllearner/kb/aquisitors/SparqlTupelAquisitorImproved.java trunk/src/dl-learner/org/dllearner/kb/aquisitors/TupelAquisitor.java trunk/src/dl-learner/org/dllearner/kb/extraction/ExtractionAlgorithm.java trunk/src/dl-learner/org/dllearner/kb/manipulator/DBpediaNavigatorOtherRule.java trunk/src/dl-learner/org/dllearner/kb/manipulator/Manipulator.java trunk/src/dl-learner/org/dllearner/kb/manipulator/TypeFilterRule.java trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlKnowledgeSource.java trunk/src/dl-learner/org/dllearner/test/SparqlExtractionTest.java Modified: trunk/src/dl-learner/org/dllearner/kb/aquisitors/LinkedDataTupelAquisitor.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/aquisitors/LinkedDataTupelAquisitor.java 2008-08-19 15:23:53 UTC (rev 1101) +++ trunk/src/dl-learner/org/dllearner/kb/aquisitors/LinkedDataTupelAquisitor.java 2008-08-19 15:35:22 UTC (rev 1102) @@ -45,12 +45,17 @@ // standard query get a tupels (p,o) for subject s @Override - public SortedSet<RDFNodeTuple> getTupelForResource(String uri) { - + public SortedSet<RDFNodeTuple> retrieveTupel(String uri){ throw new RuntimeException("Not Implemented yet"); - - } + @Override + public SortedSet<RDFNodeTuple> retrieveClassesForInstances(String uri){ + throw new RuntimeException("Not Implemented yet"); + } + @Override + public SortedSet<RDFNodeTuple> retrieveTuplesForClassesOnly(String uri){ + throw new RuntimeException("Not Implemented yet"); + } Modified: trunk/src/dl-learner/org/dllearner/kb/aquisitors/SparqlTupelAquisitor.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/aquisitors/SparqlTupelAquisitor.java 2008-08-19 15:23:53 UTC (rev 1101) +++ trunk/src/dl-learner/org/dllearner/kb/aquisitors/SparqlTupelAquisitor.java 2008-08-19 15:35:22 UTC (rev 1102) @@ -34,8 +34,11 @@ */ public class SparqlTupelAquisitor extends TupelAquisitor { + @SuppressWarnings("unused") private static Logger logger = Logger.getLogger(SparqlTupelAquisitor.class); + protected static final String PREDICATE = "predicate"; + protected static final String OBJECT = "object"; protected SparqlQueryMaker sparqlQueryMaker; protected SPARQLTasks sparqlTasks; @@ -48,26 +51,24 @@ this.sparqlTasks = sparqlTasks; } - - // standard query get a tupels (p,o) for subject s @Override - public SortedSet<RDFNodeTuple> getTupelForResource(String uri) { - checkURIforValidity(uri); + public SortedSet<RDFNodeTuple> retrieveTupel(String uri){ + // getQuery + String sparqlQueryString = sparqlQueryMaker.makeSubjectQueryUsingFilters(uri); + return sparqlTasks.queryAsRDFNodeTuple(sparqlQueryString, PREDICATE, OBJECT); - String pred = "predicate"; - String obj = "object"; - String sparqlQueryString = ""; + } + @Override + public SortedSet<RDFNodeTuple> retrieveClassesForInstances(String uri){ // getQuery - if (classMode) { - sparqlQueryString = sparqlQueryMaker.makeClassQueryUsingFilters(uri); - }else { - sparqlQueryString = sparqlQueryMaker.makeSubjectQueryUsingFilters(uri); - } + String sparqlQueryString = sparqlQueryMaker.makeClassQueryUsingFilters(uri); + return sparqlTasks.queryAsRDFNodeTuple(sparqlQueryString, PREDICATE, OBJECT); - - return sparqlTasks.queryAsRDFNodeTuple(sparqlQueryString, pred, obj); - } + @Override + public SortedSet<RDFNodeTuple> retrieveTuplesForClassesOnly(String uri){ + return retrieveTupel(uri); + } Modified: trunk/src/dl-learner/org/dllearner/kb/aquisitors/SparqlTupelAquisitorImproved.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/aquisitors/SparqlTupelAquisitorImproved.java 2008-08-19 15:23:53 UTC (rev 1101) +++ trunk/src/dl-learner/org/dllearner/kb/aquisitors/SparqlTupelAquisitorImproved.java 2008-08-19 15:35:22 UTC (rev 1102) @@ -54,61 +54,79 @@ this.recursionDepth = recursionDepth; } - - // standard query get a tupels (p,o) for subject s -// standard query get a tupels (p,o) for subject s + @Override - public SortedSet<RDFNodeTuple> getTupelForResource(String uri) { - checkURIforValidity(uri); - String sparqlQueryString = ""; - String pred = "predicate"; - String obj = "object"; + public SortedSet<RDFNodeTuple> retrieveTupel(String uri){ - // getQuery - if (classMode) { - - - sparqlQueryString = sparqlQueryMaker.makeClassQueryUsingFilters(uri); - return sparqlTasks.queryAsRDFNodeTuple(sparqlQueryString, pred, obj); - } - SortedSet<RDFNodeTuple> cachedSet = resources.get(uri); if(cachedSet!=null) { return cachedSet; } //SortedSet<RDFNodeTuple> tmp = new TreeSet<RDFNodeTuple>(); - sparqlQueryString = sparqlQueryMaker.makeSubjectQueryLevel(uri, recursionDepth); + String sparqlQueryString = sparqlQueryMaker.makeSubjectQueryLevel(uri, recursionDepth); + //System.out.println(sparqlQueryString); ResultSetRewindable rsw= sparqlTasks.queryAsResultSet(sparqlQueryString); @SuppressWarnings("unchecked") List<ResultBinding> l = ResultSetFormatter.toList(rsw); rsw.reset(); + int count = 0; + + for (ResultBinding binding : l) { + count++; + } + int resultsetcount = 0; int i = 0; for (ResultBinding binding : l) { - i=0; - RDFNode nextURI = binding.get(obj+i); - add(uri, new RDFNodeTuple(binding.get(pred+i), nextURI )); - - for (i=1; i < recursionDepth; i++) { - RDFNode tmpURI = binding.get(obj+i); - add(nextURI.toString(), new RDFNodeTuple(binding.get(pred+i),tmpURI)); - logger.trace("For: "+nextURI.toString()+ " added :"+resources.get(nextURI.toString())); - nextURI = tmpURI; - } + i = 0; + RDFNode nextOBJ = binding.get(OBJECT+i); + RDFNode nextPRED = binding.get(PREDICATE+i); + RDFNodeTuple tmptuple = new RDFNodeTuple(nextPRED, nextOBJ ); + add(uri,tmptuple); + boolean cont = !nextOBJ.isLiteral(); + for (i=0; (i < recursionDepth) && cont; i++) { + RDFNode tmpPREDURI = binding.get(PREDICATE+i); + RDFNode tmpOBJURI = binding.get(OBJECT+i); + if(tmpOBJURI==null) { + cont=false; + }else if (tmpOBJURI.isLiteral()) { + tmptuple = new RDFNodeTuple(tmpPREDURI, tmpOBJURI ); + add(nextOBJ.toString(), tmptuple); + logger.trace(tmptuple); + logger.trace("For: "+nextOBJ.toString()+ " added :"+resources.get(nextOBJ.toString())); + cont=false; + }else { + tmptuple = new RDFNodeTuple(tmpPREDURI, tmpOBJURI ); + add(nextOBJ.toString(), tmptuple); + logger.trace(tmptuple); + logger.trace("For: "+nextOBJ.toString()+ " added :"+resources.get(nextOBJ.toString())); + nextOBJ = tmpOBJURI; + cont = true; + } + }//end for + resultsetcount++; } + //System.out.println("original count "+count); + logger.warn("SparqlTupelAquisitor retrieved : "+resultsetcount); if(resultsetcount>999) { logger.warn("SparqlTupelAquisitor retrieved more than 1000 results, there might some be missing"); } - return resources.get(uri); - - //return sparqlTasks.queryAsRDFNodeTuple(sparqlQueryString, pred, obj); + return ((cachedSet=resources.get(uri))==null)?new TreeSet<RDFNodeTuple>():cachedSet; } + @Override + public SortedSet<RDFNodeTuple> retrieveTuplesForClassesOnly(String uri){ + //getQuery + String sparqlQueryString = sparqlQueryMaker.makeSubjectQueryUsingFilters(uri); + return sparqlTasks.queryAsRDFNodeTuple(sparqlQueryString, PREDICATE, OBJECT); + } + + private void add(String uri, RDFNodeTuple tuple){ SortedSet<RDFNodeTuple> set = resources.get(uri); if(set==null){ Modified: trunk/src/dl-learner/org/dllearner/kb/aquisitors/TupelAquisitor.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/aquisitors/TupelAquisitor.java 2008-08-19 15:23:53 UTC (rev 1101) +++ trunk/src/dl-learner/org/dllearner/kb/aquisitors/TupelAquisitor.java 2008-08-19 15:35:22 UTC (rev 1102) @@ -38,20 +38,41 @@ private static Logger logger = Logger.getLogger(TupelAquisitor.class); + protected final int NORMAL = 0; + protected final int CLASSES_FOR_INSTANCES = 1; + protected final int CLASS_INFORMATION = 2; - protected boolean classMode = false; + protected int mode = 0; private boolean uriDebugCheck = true; - public abstract SortedSet<RDFNodeTuple> getTupelForResource(String uri); + public final SortedSet<RDFNodeTuple> getTupelForResource(String uri){ + checkURIforValidity(uri); + if (mode == NORMAL) { + return retrieveTupel(uri); + } else if(mode == CLASSES_FOR_INSTANCES){ + return retrieveClassesForInstances(uri); + }else if(mode == CLASS_INFORMATION){ + return retrieveTuplesForClassesOnly(uri); + }else{ + throw new RuntimeException("undefined mode in aquisitor"); + } + } + public abstract SortedSet<RDFNodeTuple> retrieveTupel(String uri); + public abstract SortedSet<RDFNodeTuple> retrieveClassesForInstances(String uri); + public abstract SortedSet<RDFNodeTuple> retrieveTuplesForClassesOnly(String uri); - public void setClassMode(boolean classMode) { - this.classMode = classMode; - } + /*private void setMode(int mode) { + this.mode = mode; + }*/ - public boolean isClassMode() { - return classMode; + public int getMode() { + return mode; } + public void setNextTaskToNormal(){mode = NORMAL;} + public void setNextTaskToClassesForInstances(){mode = CLASSES_FOR_INSTANCES;} + public void setNextTaskToClassInformation(){mode = CLASS_INFORMATION;} + protected boolean checkURIforValidity(String uri){ if(uriDebugCheck) return true; try{ Modified: trunk/src/dl-learner/org/dllearner/kb/extraction/ExtractionAlgorithm.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/extraction/ExtractionAlgorithm.java 2008-08-19 15:23:53 UTC (rev 1101) +++ trunk/src/dl-learner/org/dllearner/kb/extraction/ExtractionAlgorithm.java 2008-08-19 15:35:22 UTC (rev 1102) @@ -70,6 +70,7 @@ public Node expandNode(String uri, TupelAquisitor tupelAquisitor) { SimpleClock sc = new SimpleClock(); + tupelAquisitor.setNextTaskToNormal(); Node seedNode = getFirstNode(uri); List<Node> newNodes = new ArrayList<Node>(); @@ -131,7 +132,7 @@ //TODO LinkedData incompatibility - tupelAquisitor.setClassMode(true); + tupelAquisitor.setNextTaskToClassesForInstances(); if (configuration.isCloseAfterRecursion()) { while (!instances.isEmpty()) { logger.trace("Getting classes for remaining instances: " @@ -144,12 +145,11 @@ }//endif }//endwhile }//endif - tupelAquisitor.setClassMode(false); - + tupelAquisitor.setNextTaskToClassInformation(); int i = 0; - while (!classes.isEmpty()) { + while (!classes.isEmpty() && false) { logger.trace("Remaining classes: " + classes.size()); Node next = classes.remove(0); if (!alreadyQueriedSuperClasses.contains(next.getURI().toString())) { Modified: trunk/src/dl-learner/org/dllearner/kb/manipulator/DBpediaNavigatorOtherRule.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/manipulator/DBpediaNavigatorOtherRule.java 2008-08-19 15:23:53 UTC (rev 1101) +++ trunk/src/dl-learner/org/dllearner/kb/manipulator/DBpediaNavigatorOtherRule.java 2008-08-19 15:35:22 UTC (rev 1102) @@ -51,7 +51,6 @@ typeTuple = tuple; } - //TODO this doesn't work, because it is unclear what toString() method returns if (tuple.a.toString().equals("http://www.w3.org/2003/01/geo/wgs84_pos#lat") && tuple.b.isLiteral()){ lat = ((Literal) tuple.b).getFloat(); Modified: trunk/src/dl-learner/org/dllearner/kb/manipulator/Manipulator.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/manipulator/Manipulator.java 2008-08-19 15:23:53 UTC (rev 1101) +++ trunk/src/dl-learner/org/dllearner/kb/manipulator/Manipulator.java 2008-08-19 15:35:22 UTC (rev 1102) @@ -23,6 +23,7 @@ import java.util.List; import java.util.SortedSet; +import org.apache.log4j.Logger; import org.dllearner.kb.extraction.ClassNode; import org.dllearner.kb.extraction.InstanceNode; import org.dllearner.kb.extraction.Node; @@ -39,6 +40,7 @@ */ public class Manipulator { + private static Logger logger = Logger.getLogger(Manipulator.class); private List<Rule> rules = new ArrayList<Rule>(); private Manipulator() { @@ -56,9 +58,11 @@ */ public SortedSet<RDFNodeTuple> manipulate( Node node, SortedSet<RDFNodeTuple> tuples) { JamonMonitorLogger.getTimeMonitor(Manipulator.class, "Time for Rules").start(); + logger.warn("before: "+tuples.size()); for (Rule rule : rules) { tuples = rule.applyRule(node, tuples); } + logger.warn("after: "+tuples.size()); JamonMonitorLogger.getTimeMonitor(Manipulator.class, "Time for Rules").stop(); return tuples; } @@ -111,9 +115,13 @@ private void addDefaultRules(Months month){ - addRule(new TypeFilterRule(month, OWLVocabulary.RDF_TYPE, OWLVocabulary.OWL_CLASS,ClassNode.class.getCanonicalName() )) ; - addRule(new TypeFilterRule(month, OWLVocabulary.RDF_TYPE, OWLVocabulary.OWL_THING,InstanceNode.class.getCanonicalName() )) ; - addRule(new TypeFilterRule(month, "", OWLVocabulary.OWL_CLASS, ClassNode.class.getCanonicalName()) ) ; + // addRule(new TypeFilterRule(month, OWLVocabulary.RDF_TYPE, OWLVocabulary.OWL_CLASS,ClassNode.class.getCanonicalName() )) ; + // addRule(new TypeFilterRule(month, OWLVocabulary.RDF_TYPE, OWLVocabulary.OWL_THING,InstanceNode.class.getCanonicalName() )) ; + // addRule(new TypeFilterRule(month, "", OWLVocabulary.OWL_CLASS, ClassNode.class.getCanonicalName()) ) ; + addRule(new TypeFilterRule(month, OWLVocabulary.RDF_TYPE, OWLVocabulary.OWL_CLASS,ClassNode.class )) ; + addRule(new TypeFilterRule(month, OWLVocabulary.RDF_TYPE, OWLVocabulary.OWL_THING,InstanceNode.class )) ; + addRule(new TypeFilterRule(month, "", OWLVocabulary.OWL_CLASS, ClassNode.class) ) ; + } public synchronized void addRule(Rule newRule){ Modified: trunk/src/dl-learner/org/dllearner/kb/manipulator/TypeFilterRule.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/manipulator/TypeFilterRule.java 2008-08-19 15:23:53 UTC (rev 1101) +++ trunk/src/dl-learner/org/dllearner/kb/manipulator/TypeFilterRule.java 2008-08-19 15:35:22 UTC (rev 1102) @@ -22,21 +22,24 @@ import java.util.SortedSet; import java.util.TreeSet; +import org.apache.log4j.Logger; import org.dllearner.kb.extraction.Node; import org.dllearner.utilities.datastructures.RDFNodeTuple; public class TypeFilterRule extends Rule{ + public static Logger logger = Logger.getLogger(TypeFilterRule.class); + String predicateFilter; String objectFilter; - String canonicalClassName; + String classCanonicalName; - public TypeFilterRule(Months month, String predicateFilter, String objectFilter, String canonicalClassName) { + public TypeFilterRule(Months month, String predicateFilter, String objectFilter, Class<? extends Node> clazz) { super(month); this.predicateFilter = predicateFilter; this.objectFilter = objectFilter; - this.canonicalClassName = canonicalClassName; + this.classCanonicalName = clazz.getCanonicalName(); } @@ -45,12 +48,20 @@ public SortedSet<RDFNodeTuple> applyRule(Node subject, SortedSet<RDFNodeTuple> tuples){ SortedSet<RDFNodeTuple> keep = new TreeSet<RDFNodeTuple>(); for (RDFNodeTuple tuple : tuples) { + String a = tuple.a.toString(); + String b = tuple.b.toString(); + //System.out.println(a+b); boolean remove = (tuple.aPartContains(predicateFilter) && - tuple.bPartContains(objectFilter) && - subject.getClass().getCanonicalName().equals(canonicalClassName)); + tuple.bPartContains(objectFilter) && + // QUALITY this might be dead wrong + (classCanonicalName.equalsIgnoreCase(subject.getClass().getCanonicalName())) + ); if(!remove){ keep.add(tuple); + }else{ + logger.warn("Removed: "+subject+"::"+tuple); } + } return keep; } Modified: trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlKnowledgeSource.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlKnowledgeSource.java 2008-08-19 15:23:53 UTC (rev 1101) +++ trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlKnowledgeSource.java 2008-08-19 15:35:22 UTC (rev 1102) @@ -47,6 +47,7 @@ import org.dllearner.core.config.StringTupleListConfigOption; import org.dllearner.core.owl.KB; import org.dllearner.kb.aquisitors.SparqlTupelAquisitor; +import org.dllearner.kb.aquisitors.SparqlTupelAquisitorImproved; import org.dllearner.kb.aquisitors.TupelAquisitor; import org.dllearner.kb.extraction.Configuration; import org.dllearner.kb.extraction.Manager; @@ -410,7 +411,8 @@ public TupelAquisitor getTupelAquisitor() { - return new SparqlTupelAquisitor(getSparqlQueryMaker(), getSPARQLTasks()); + //return new SparqlTupelAquisitor(getSparqlQueryMaker(), getSPARQLTasks()); + return new SparqlTupelAquisitorImproved(getSparqlQueryMaker(), getSPARQLTasks(),recursionDepth); } /* (non-Javadoc) Modified: trunk/src/dl-learner/org/dllearner/test/SparqlExtractionTest.java =================================================================== --- trunk/src/dl-learner/org/dllearner/test/SparqlExtractionTest.java 2008-08-19 15:23:53 UTC (rev 1101) +++ trunk/src/dl-learner/org/dllearner/test/SparqlExtractionTest.java 2008-08-19 15:35:22 UTC (rev 1102) @@ -26,7 +26,7 @@ import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.apache.log4j.SimpleLayout; -import org.dllearner.kb.aquisitors.SparqlTupelAquisitor; +import org.dllearner.kb.aquisitors.SparqlTupelAquisitorImproved; import org.dllearner.kb.extraction.Configuration; import org.dllearner.kb.extraction.Manager; import org.dllearner.kb.manipulator.Manipulator; @@ -34,6 +34,7 @@ import org.dllearner.kb.sparql.SparqlQueryMaker; import org.dllearner.scripts.NT2RDF; import org.dllearner.utilities.JamonMonitorLogger; +import org.dllearner.utilities.statistics.SimpleClock; /** * Test class, uses the whole thing @@ -55,17 +56,19 @@ ConsoleAppender consoleAppender = new ConsoleAppender(layout); logger.removeAllAppenders(); logger.addAppender(consoleAppender); - logger.setLevel(Level.TRACE); + logger.setLevel(Level.INFO); // String test2 = "http://www.extraction.org/config#dbpediatest"; // String test = "http://www.extraction.org/config#localjoseki"; try { // URI u = new URI(test); + int recursionDepth=3; Manager m = new Manager(); Configuration conf = new Configuration ( - new SparqlTupelAquisitor(SparqlQueryMaker.getTestFilter(), SPARQLTasks.getPredefinedSPARQLTasksWithCache("DBPEDIA")), + new SparqlTupelAquisitorImproved(SparqlQueryMaker.getAllowYAGOFilter(), + SPARQLTasks.getPredefinedSPARQLTasksWithCache("DBPEDIA"),recursionDepth), Manipulator.getDefaultManipulator(), - 1, + recursionDepth, true, true, 200 @@ -79,6 +82,7 @@ fw.write(m.extract(u2)); fw.flush(); fw.close(); + NT2RDF.convertNT2RDF(filename); JamonMonitorLogger.printAllSortedByLabel(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |