From: <ku...@us...> - 2008-08-04 08:43:32
|
Revision: 1046 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=1046&view=rev Author: kurzum Date: 2008-08-04 08:43:26 +0000 (Mon, 04 Aug 2008) Log Message: ----------- latest changes Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/kb/sparql/SPARQLTasks.java trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlQuery.java trunk/src/dl-learner/org/dllearner/scripts/WikipediaCategoryCleaner.java trunk/src/dl-learner/org/dllearner/scripts/improveWikipedia/ConceptSPARQLReEvaluator.java trunk/src/dl-learner/org/dllearner/scripts/improveWikipedia/WikipediaCategoryTasks.java trunk/src/dl-learner/org/dllearner/utilities/datastructures/SetManipulation.java trunk/src/dl-learner/org/dllearner/utilities/examples/AutomaticNegativeExampleFinderSPARQL.java trunk/src/dl-learner/org/dllearner/utilities/learn/LearnSPARQLConfiguration.java Added Paths: ----------- trunk/src/dl-learner/org/dllearner/test/FilterTest.java Modified: trunk/src/dl-learner/org/dllearner/kb/sparql/SPARQLTasks.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/sparql/SPARQLTasks.java 2008-08-02 18:25:33 UTC (rev 1045) +++ trunk/src/dl-learner/org/dllearner/kb/sparql/SPARQLTasks.java 2008-08-04 08:43:26 UTC (rev 1046) @@ -112,7 +112,7 @@ final SortedSet<String> tmpSet = new TreeSet<String>(); // collect super/subclasses for the depth - for (; (depth != 0) && (!toBeRetrieved.isEmpty()); depth--) { + for (; (depth > 0) && (!toBeRetrieved.isEmpty()); depth--) { // collect super/subclasses for each class in toBeRetrieved // accumulate in tmpSet for (String oneClass : toBeRetrieved) { Modified: trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlQuery.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlQuery.java 2008-08-02 18:25:33 UTC (rev 1045) +++ trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlQuery.java 2008-08-04 08:43:26 UTC (rev 1046) @@ -94,13 +94,14 @@ // Jena access to SPARQL endpoint queryExecution = new QueryEngineHTTP(service, sparqlQueryString); + //System.out.println(sparqlEndpoint.getDefaultGraphURIs()); + for (String dgu : sparqlEndpoint.getDefaultGraphURIs()) { queryExecution.addDefaultGraph(dgu); } for (String ngu : sparqlEndpoint.getNamedGraphURIs()) { queryExecution.addNamedGraph(ngu); } - // TODO remove after overnext Jena release HttpQuery.urlLimit = 3 * 1024; JamonMonitorLogger.getTimeMonitor(SparqlQuery.class, "httpTime") Modified: trunk/src/dl-learner/org/dllearner/scripts/WikipediaCategoryCleaner.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/WikipediaCategoryCleaner.java 2008-08-02 18:25:33 UTC (rev 1045) +++ trunk/src/dl-learner/org/dllearner/scripts/WikipediaCategoryCleaner.java 2008-08-04 08:43:26 UTC (rev 1046) @@ -50,6 +50,7 @@ import org.dllearner.utilities.examples.AutomaticPositiveExampleFinderSPARQL; import org.dllearner.utilities.learn.LearnSPARQLConfiguration; import org.dllearner.utilities.learn.LearnSparql; +import org.dllearner.utilities.statistics.SimpleClock; public class WikipediaCategoryCleaner { @@ -67,6 +68,8 @@ private static final boolean DEVELOP = true; public static final int SPARQL_RESULTSET_LIMIT = 500; + + private static final int DEPTH_OF_RDFS = 0; // the 70/30 strategy was abandoned public static double PERCENT_OF_SKOSSET = 1.0; @@ -84,13 +87,16 @@ * @param args */ public static void main(String[] args) { + SimpleClock sc = new SimpleClock(); initLogger(); setup(); logger.info("Start"); - + SortedSet<String> wikipediaCategories = new TreeSet<String>(); + + String test = "http://dbpedia.org/resource/Category:Prime_Ministers_of_the_United_Kingdom"; + wikipediaCategories.add(test); test = "http://dbpedia.org/resource/Category:Best_Actor_Academy_Award_winners"; - SortedSet<String> wikipediaCategories = new TreeSet<String>(); wikipediaCategories.add(test); for (String target : wikipediaCategories) { @@ -99,7 +105,7 @@ } - System.out.println("Finished"); + sc.printAndSet("Finished"); // JamonMonitorLogger.printAllSortedByLabel(); } @@ -114,7 +120,7 @@ ConceptSPARQLReEvaluator csparql; wikiTasks = new WikipediaCategoryTasks(sparqlTasks); - csparql = new ConceptSPARQLReEvaluator(sparqlTasks); + csparql = new ConceptSPARQLReEvaluator(sparqlTasks, DEPTH_OF_RDFS, SPARQL_RESULTSET_LIMIT); // PHASE 1 ************* @@ -153,9 +159,9 @@ conceptresults = learn(getConfToRelearn(), currentPOSITIVEex, currentNEGATIVEex); // TODO select concepts - logger.info("reducing concept size before evaluating"); + logger.info("reducing concept size before evaluating from "+conceptresults.size()); conceptresults = selectConcepts(conceptresults); - // reevaluate versus the Endpoint + // reevaluate versus the Endpoint conceptresults = csparql.reevaluateConceptsByLowestRecall( conceptresults, currentPOSITIVEex); @@ -165,9 +171,13 @@ } private static void collectResults(WikipediaCategoryTasks wikiTasks) { - System.out.println(wikiTasks.getFullPositiveSet()); - System.out.println(wikiTasks.getCleanedPositiveSet()); - System.out.println(wikiTasks.getDefinitelyWrongIndividuals()); + //logger.setLevel(Level.DEBUG); + printSet("fullpos", wikiTasks.getFullPositiveSet()); + + printSet("cleanedpos", wikiTasks.getCleanedPositiveSet()); + + printSet("wrongindividuals", wikiTasks.getDefinitelyWrongIndividuals()); + } private static List<EvaluatedDescription> selectConcepts( @@ -287,6 +297,7 @@ // url = "http://dbpedia.openlinksw.com:8890/sparql"; sparqlTasks = new SPARQLTasks(cache, SparqlEndpoint .getEndpointDBpedia()); + } } @@ -308,17 +319,19 @@ logger.addAppender(consoleAppender); logger.addAppender(fileAppender); logger.setLevel(Level.DEBUG); - Logger.getLogger(KnowledgeSource.class).setLevel(Level.WARN); - Logger.getLogger(SparqlKnowledgeSource.class).setLevel(Level.WARN); Logger.getLogger(Manager.class).setLevel(Level.INFO); - Logger.getLogger(ExtractionAlgorithm.class).setLevel(Level.WARN); + Level lwarn = Level.WARN; + Logger.getLogger(KnowledgeSource.class).setLevel(lwarn); + Logger.getLogger(SparqlKnowledgeSource.class).setLevel(lwarn); + + Logger.getLogger(ExtractionAlgorithm.class).setLevel(lwarn); Logger.getLogger(AutomaticNegativeExampleFinderSPARQL.class).setLevel( - Level.WARN); + lwarn); Logger.getLogger(AutomaticPositiveExampleFinderSPARQL.class).setLevel( - Level.WARN); - Logger.getLogger(ExampleBasedROLComponent.class).setLevel(Level.WARN); - Logger.getLogger(SparqlQuery.class).setLevel(Level.INFO); - Logger.getLogger(Cache.class).setLevel(Level.INFO); + lwarn); + Logger.getLogger(ExampleBasedROLComponent.class).setLevel(lwarn); + Logger.getLogger(SparqlQuery.class).setLevel(lwarn); + Logger.getLogger(Cache.class).setLevel(lwarn); } Modified: trunk/src/dl-learner/org/dllearner/scripts/improveWikipedia/ConceptSPARQLReEvaluator.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/improveWikipedia/ConceptSPARQLReEvaluator.java 2008-08-02 18:25:33 UTC (rev 1045) +++ trunk/src/dl-learner/org/dllearner/scripts/improveWikipedia/ConceptSPARQLReEvaluator.java 2008-08-04 08:43:26 UTC (rev 1046) @@ -99,7 +99,10 @@ // NegAsNeg doesnt exist, because all SortedSet<Individual> NegAsNeg = new TreeSet<Individual>(); - for (EvaluatedDescription ed : descToBeReevaluated) { + // elements are immediately removed from the list to save memory + while (!descToBeReevaluated.isEmpty()) { + EvaluatedDescription ed = descToBeReevaluated.remove(0); + try { instances = retrieveInstances(ed); // PosAsPos @@ -114,8 +117,13 @@ .getIndividualSet(PosAsPos), Helper .getIndividualSet(PosAsNeg), NegAsPos, NegAsNeg)); - PosAsPos.clear(); - PosAsNeg.clear(); + }catch(Exception e){ + logger.warn("ERROR occured, while evaluating, I'm ignoring it : "+e.toString()); + logger.warn("Concept was: "+ed.getDescription().toKBSyntaxString()); + }finally{ + PosAsPos.clear(); + PosAsNeg.clear(); + } } @@ -145,11 +153,11 @@ SortedSet<Individual> NegAsPos = new TreeSet<Individual>(); SortedSet<Individual> NegAsNeg = new TreeSet<Individual>(); - + // elements are immediately removed from the list to save memory while (!descToBeReevaluated.isEmpty()) { EvaluatedDescription ed = descToBeReevaluated.remove(0); - + try { instances = retrieveInstances(ed); // PosAsPos @@ -163,10 +171,13 @@ returnSet.add(new EvaluatedDescription(ed.getDescription(), Helper .getIndividualSet(PosAsPos), Helper .getIndividualSet(PosAsNeg), NegAsPos, NegAsNeg)); - - PosAsPos.clear(); - PosAsNeg.clear(); - + }catch(Exception e){ + logger.warn("ERROR occured, while evaluating, I'm ignoring it :"+e.toString()); + logger.warn("Concept was: "+ed.getDescription().toKBSyntaxString()); + }finally{ + PosAsPos.clear(); + PosAsNeg.clear(); + } } logger.info("finished reevaluating by lowest recall :" + returnSet.size() + " concepts"); Modified: trunk/src/dl-learner/org/dllearner/scripts/improveWikipedia/WikipediaCategoryTasks.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/improveWikipedia/WikipediaCategoryTasks.java 2008-08-02 18:25:33 UTC (rev 1045) +++ trunk/src/dl-learner/org/dllearner/scripts/improveWikipedia/WikipediaCategoryTasks.java 2008-08-04 08:43:26 UTC (rev 1046) @@ -67,8 +67,9 @@ List<EvaluatedDescription> conceptresults, SortedSet<String> posExamples) { - definitelyWrongIndividuals = Helper.getStringSet(conceptresults.get(0) - .getNotCoveredPositives()); + definitelyWrongIndividuals.clear(); + definitelyWrongIndividuals.addAll(Helper.getStringSet(conceptresults.get(0) + .getNotCoveredPositives())); // clean the examples posExamples.removeAll(definitelyWrongIndividuals); @@ -145,7 +146,7 @@ * fullPositiveSet.size()); int neglimit = (int) Math.round(poslimit * negFactor); - posExamples = SetManipulation.fuzzyShrink(fullPositiveSet, poslimit); + posExamples.addAll(SetManipulation.fuzzyShrink(fullPositiveSet, poslimit)); // NEGATIVES Added: trunk/src/dl-learner/org/dllearner/test/FilterTest.java =================================================================== --- trunk/src/dl-learner/org/dllearner/test/FilterTest.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/test/FilterTest.java 2008-08-04 08:43:26 UTC (rev 1046) @@ -0,0 +1,35 @@ +package org.dllearner.test; + +import org.dllearner.kb.sparql.Cache; +import org.dllearner.kb.sparql.SPARQLTasks; +import org.dllearner.kb.sparql.SparqlEndpoint; +import org.dllearner.utilities.statistics.SimpleClock; + +public class FilterTest { + + /** + * @param args + */ + public static void main(String[] args) { + // TODO Auto-generated method stub + String qlong="SELECT * WHERE { <http://dbpedia.org/resource/%22Big%22_Ron> ?predicate ?object. FILTER( (!isLiteral(?object))&&( ( !regex(str(?predicate), 'http://dbpedia.org/property/relatedInstance') ) &&( !regex(str(?predicate), 'http://dbpedia.org/property/website') ) &&( !regex(str(?predicate), 'http://dbpedia.org/property/owner') ) &&( !regex(str(?predicate), 'http://dbpedia.org/property/wikiPageUsesTemplate') ) &&( !regex(str(?predicate), 'http://www.w3.org/2002/07/owl#sameAs') ) &&( !regex(str(?predicate), 'http://xmlns.com/foaf/0.1/') ) &&( !regex(str(?predicate), 'http://dbpedia.org/property/standard') ) &&( !regex(str(?predicate), 'http://dbpedia.org/property/wikipage') ) &&( !regex(str(?predicate), 'http://dbpedia.org/property/reference') ) &&( !regex(str(?predicate), 'http://www.w3.org/2004/02/skos/core') ))&&( ( !regex(str(?object), 'http://xmlns.com/foaf/0.1/') ) &&( !regex(str(?object), 'http://upload.wikimedia.org/wikipedia') ) &&( !regex(str(?object), 'http://www4.wiwiss.fu-berlin.de/flickrwrappr') ) &&( !regex(str(?object), 'http://dbpedia.org/resource/Template') ) &&( !regex(str(?object), 'http://upload.wikimedia.org/wikipedia/commons') ) &&( !regex(str(?object), 'http://www.w3.org/2006/03/wn/wn20/instances/synset') ) &&( !regex(str(?object), 'http://dbpedia.org/resource/Category:') ) &&( !regex(str(?object), 'http://www.w3.org/2004/02/skos/core') ) &&( !regex(str(?object), 'http://www.geonames.org') ))).}"; + String qshort="SELECT * WHERE { <http://dbpedia.org/resource/%22Big%22_Ron> ?predicate ?object. FILTER (!isLiteral(?object)).}"; + + SimpleClock sc = new SimpleClock(); + SPARQLTasks st = new SPARQLTasks(Cache.getPersistentCache(), SparqlEndpoint.getEndpointDBpedia()); + + for (int i = 0; i < 10; i++) { + st.query(qshort); + } + sc.printAndSet("long "); + + + + for (int i = 0; i < 10; i++) { + st.query(qlong); + } + + sc.printAndSet("short "); + } + +} Modified: trunk/src/dl-learner/org/dllearner/utilities/datastructures/SetManipulation.java =================================================================== --- trunk/src/dl-learner/org/dllearner/utilities/datastructures/SetManipulation.java 2008-08-02 18:25:33 UTC (rev 1045) +++ trunk/src/dl-learner/org/dllearner/utilities/datastructures/SetManipulation.java 2008-08-04 08:43:26 UTC (rev 1046) @@ -18,8 +18,9 @@ * @return */ public static SortedSet<String> fuzzyShrink(SortedSet<String> set, int limit) { - if (set.size() <= limit) + if (set.size() <= limit) { return set; + } SortedSet<String> ret = new TreeSet<String>(); Random r = new Random(); double treshold = ((double) limit) / set.size(); @@ -48,8 +49,9 @@ */ public static SortedSet<String> stableShrink(SortedSet<String> set, int limit) { - if (set.size() <= limit) + if (set.size() <= limit) { return set; + } SortedSet<String> ret = new TreeSet<String>(); for (String oneInd : set) { @@ -63,8 +65,9 @@ } /** + * XXX * getFirst n Elements from list. - * + * changes the list!!! * @param list * @param nrElements * @return returns the list shrunken to size. it is an ARRAYLIST now Modified: trunk/src/dl-learner/org/dllearner/utilities/examples/AutomaticNegativeExampleFinderSPARQL.java =================================================================== --- trunk/src/dl-learner/org/dllearner/utilities/examples/AutomaticNegativeExampleFinderSPARQL.java 2008-08-02 18:25:33 UTC (rev 1045) +++ trunk/src/dl-learner/org/dllearner/utilities/examples/AutomaticNegativeExampleFinderSPARQL.java 2008-08-04 08:43:26 UTC (rev 1046) @@ -111,8 +111,8 @@ String SPARQLquery = "SELECT * WHERE { \n" + "<" + oneInstance + "> " + "?p ?object. \n" + "FILTER (REGEX(str(?object), '" + objectnamespace + "')).\n" + "}"; - this.fromRelated.addAll(sparqltasks.queryAsSet(SPARQLquery, "object")); - this.fromRelated.removeAll(this.fullPositiveSet); + fromRelated.addAll(sparqltasks.queryAsSet(SPARQLquery, "object")); + fromRelated.removeAll(fullPositiveSet); } @@ -161,7 +161,7 @@ } - this.fromParallelClasses.removeAll(this.fullPositiveSet); + fromParallelClasses.removeAll(fullPositiveSet); logger.debug("|-neg Example size from parallelclass: " + fromParallelClasses.size()); } @@ -181,27 +181,27 @@ for (String oneSuperClass : superClasses) { logger.debug(oneSuperClass); - this.fromSuperclasses.addAll(sparqltasks.retrieveInstancesForClassDescription("\"" + fromSuperclasses.addAll(sparqltasks.retrieveInstancesForClassDescription("\"" + oneSuperClass + "\"", resultLimit)); } - this.fromSuperclasses.removeAll(this.fullPositiveSet); + this.fromSuperclasses.removeAll(fullPositiveSet); logger.debug("|-neg Example from superclass: " + fromSuperclasses.size()); } @SuppressWarnings("unused") private void makeNegativeExamplesFromDomain(String role, int resultLimit){ logger.debug("making Negative Examples from Domain of : "+role); - this.fromDomain.addAll(sparqltasks.getDomainInstances(role, resultLimit)); - this.fromDomain.removeAll(this.fullPositiveSet); + fromDomain.addAll(sparqltasks.getDomainInstances(role, resultLimit)); + fromDomain.removeAll(fullPositiveSet); logger.debug("|-neg Example size from Domain: "+this.fromDomain.size()); } @SuppressWarnings("unused") private void makeNegativeExamplesFromRange(String role, int resultLimit){ logger.debug("making Negative Examples from Range of : "+role); - this.fromRange.addAll(sparqltasks.getRangeInstances(role, resultLimit)); - this.fromRange.removeAll(this.fullPositiveSet); - logger.debug("|-neg Example size from Range: "+this.fromRange.size()); + fromRange.addAll(sparqltasks.getRangeInstances(role, resultLimit)); + fromRange.removeAll(fullPositiveSet); + logger.debug("|-neg Example size from Range: "+fromRange.size()); } } Modified: trunk/src/dl-learner/org/dllearner/utilities/learn/LearnSPARQLConfiguration.java =================================================================== --- trunk/src/dl-learner/org/dllearner/utilities/learn/LearnSPARQLConfiguration.java 2008-08-02 18:25:33 UTC (rev 1045) +++ trunk/src/dl-learner/org/dllearner/utilities/learn/LearnSPARQLConfiguration.java 2008-08-04 08:43:26 UTC (rev 1046) @@ -42,6 +42,7 @@ // KNOWLEDGESOURCE cm.applyConfigEntry(ks, "url", sparqlEndpoint.getURL().toString()); + cm.applyConfigEntry(ks, "predefinedEndpoint", "DBPEDIA"); cm.applyConfigEntry(ks, "recursionDepth", recursiondepth); cm.applyConfigEntry(ks, "closeAfterRecursion", closeAfterRecursion); cm.applyConfigEntry(ks, "predefinedFilter", predefinedFilter); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |