From: <ku...@us...> - 2008-05-21 15:07:08
|
Revision: 912 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=912&view=rev Author: kurzum Date: 2008-05-21 07:59:31 -0700 (Wed, 21 May 2008) Log Message: ----------- cleanup Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/kb/sparql/Cache.java trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlQuery.java trunk/src/dl-learner/org/dllearner/scripts/SKOS7030.java trunk/src/dl-learner/org/dllearner/scripts/SPARQLExtractionEvaluation.java trunk/src/dl-learner/org/dllearner/scripts/SPARQLMassLearning.java trunk/src/dl-learner/org/dllearner/test/SparqlEndpointTest.java trunk/src/dl-learner/org/dllearner/utilities/examples/AutomaticExampleFinderRolesSPARQL.java trunk/src/dl-learner/org/dllearner/utilities/examples/AutomaticExampleFinderSKOSSPARQL.java trunk/src/dl-learner/org/dllearner/utilities/examples/AutomaticExampleFinderSPARQLold.java trunk/src/dl-learner/org/dllearner/utilities/examples/AutomaticNegativeExampleFinderSPARQL.java trunk/src/dl-learner/org/dllearner/utilities/learn/LearnSparql.java Added Paths: ----------- trunk/src/dl-learner/org/dllearner/utilities/JamonTimeLogger.java trunk/src/dl-learner/org/dllearner/utilities/examples/AutomaticPositiveExampleFinderSPARQL.java trunk/src/dl-learner/org/dllearner/utilities/examples/SPARQLTasks.java Removed Paths: ------------- trunk/src/dl-learner/org/dllearner/utilities/datastructures/JenaResultSetConvenience.java Modified: trunk/src/dl-learner/org/dllearner/kb/sparql/Cache.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/sparql/Cache.java 2008-05-21 13:53:46 UTC (rev 911) +++ trunk/src/dl-learner/org/dllearner/kb/sparql/Cache.java 2008-05-21 14:59:31 UTC (rev 912) @@ -82,6 +82,18 @@ this("cache"); } */ + public static Cache getPersistentCache(){ + Cache c = new Cache("cachePersistant"); + c.setFreshnessInDays(365); + return c; + } + + public static Cache getDefaultCache(){ + Cache c = new Cache("cache"); + + return c; + } + /** * Constructor for the cache itself. * @@ -242,5 +254,9 @@ return json; } } + + public void setFreshnessInDays(int days){ + freshnessSeconds = days * 24 * 60 * 60; + } } Modified: trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlQuery.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlQuery.java 2008-05-21 13:53:46 UTC (rev 911) +++ trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlQuery.java 2008-05-21 14:59:31 UTC (rev 912) @@ -112,6 +112,9 @@ return queryString; } + /** + * @return String JSON + */ public String getResult() { return json; } Modified: trunk/src/dl-learner/org/dllearner/scripts/SKOS7030.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/SKOS7030.java 2008-05-21 13:53:46 UTC (rev 911) +++ trunk/src/dl-learner/org/dllearner/scripts/SKOS7030.java 2008-05-21 14:59:31 UTC (rev 912) @@ -1,7 +1,8 @@ package org.dllearner.scripts; -import java.util.LinkedList; +import java.util.HashSet; import java.util.List; +import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; @@ -10,222 +11,198 @@ import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.apache.log4j.SimpleLayout; +import org.dllearner.algorithms.refexamples.ExampleBasedROLComponent; +import org.dllearner.core.ComponentManager; import org.dllearner.core.KnowledgeSource; +import org.dllearner.core.LearningAlgorithm; +import org.dllearner.core.LearningProblem; +import org.dllearner.core.ReasonerComponent; +import org.dllearner.core.ReasoningService; import org.dllearner.core.owl.Description; -import org.dllearner.core.owl.NamedClass; -import org.dllearner.core.owl.Union; import org.dllearner.kb.sparql.Cache; import org.dllearner.kb.sparql.SparqlEndpoint; -import org.dllearner.kb.sparql.SparqlQuery; -import org.dllearner.kb.sparql.SparqlQueryDescriptionConvertRDFS; -import org.dllearner.utilities.datastructures.JenaResultSetConvenience; +import org.dllearner.kb.sparql.SparqlKnowledgeSource; +import org.dllearner.learningproblems.PosNegDefinitionLP; +import org.dllearner.learningproblems.PosNegLP; +import org.dllearner.reasoning.FastInstanceChecker; import org.dllearner.utilities.datastructures.SetManipulation; -import org.dllearner.utilities.examples.AutomaticExampleFinderSKOSSPARQL; -import org.dllearner.utilities.learn.LearnSparql; -import org.dllearner.utilities.statistics.SimpleClock; +import org.dllearner.utilities.examples.AutomaticNegativeExampleFinderSPARQL; +import org.dllearner.utilities.examples.AutomaticPositiveExampleFinderSPARQL; +import org.dllearner.utilities.examples.SPARQLTasks; -import com.hp.hpl.jena.query.ResultSet; -import com.hp.hpl.jena.query.ResultSetFormatter; -import com.hp.hpl.jena.sparql.core.ResultBinding; - public class SKOS7030 { - static Cache c; - static SparqlEndpoint se; + private static SPARQLTasks sparqlTasks; + private static Logger logger = Logger.getRootLogger(); + static boolean local = true; + static String url = ""; + //LEARNING + static int recursiondepth=1; + static boolean closeAfterRecursion=true; + static boolean randomizeCache=false; + static double noise=15; + static int maxExecutionTimeInSeconds = 30; + static int guaranteeXgoodDescriptions = 40; + //static int limit=200; - //vars - static boolean useRelated = false; - static boolean useSuperClasses = false; - static boolean useParallelClasses = true; - static int poslimit = 10; - static int neglimit = 20; - static int recursiondepth=1; - static boolean closeAfterRecursion=true; - static boolean randomizeCache=false; - static int resultsize=20; - static double noise=15; - static int limit=200; - static double percentage=0.3; + //examples + static int sparqlResultSize=2000; + static double percentOfSKOSSet=0.7; + static double negfactor=0.3; + SortedSet<String> posExamples = new TreeSet<String>(); + SortedSet<String> fullPositiveSet = new TreeSet<String>(); + SortedSet<String> fullminusposRest = new TreeSet<String>(); + SortedSet<String> negExamples = new TreeSet<String>(); + + + + + /** * @param args */ public static void main(String[] args) { - init(); - //logger.setLevel(Level.TRACE); - Logger.getLogger(KnowledgeSource.class).setLevel(Level.WARN); - //System.out.println(Logger.getLogger(SparqlQuery.class).getLevel()); - SimpleClock sc=new SimpleClock(); + System.out.println("Start"); + initLogger(); + //parameters - se = SparqlEndpoint.EndpointLOCALDBpedia(); -// String t="\"http://dbpedia.org/class/yago/Fiction106367107\""; -// t="(\"http://dbpedia.org/class/yago/HeadOfState110164747\" AND (\"http://dbpedia.org/class/yago/Negotiator110351874\" AND \"http://dbpedia.org/class/yago/Representative110522035\"))"; -// //System.out.println(t); -// //t="\"http://www.w3.org/2004/02/skos/core#subject\""; -// //conceptRewrite(t); -// //getSubClasses(t); -// -// AutomaticExampleFinderSKOSSPARQL ae= new AutomaticExampleFinderSKOSSPARQL( se); -// try{ -// System.out.println("oneconcept: "+t); -// SortedSet<String> instances = ae.queryConceptAsStringSet(conceptRewrite(t), 200); -// if(instances.size()>=0)System.out.println("size of instances "+instances.size()); -// if(instances.size()>=0 && instances.size()<100) System.out.println("instances"+instances); -// }catch (Exception e) { -// e.printStackTrace(); -// } - SortedSet<String> concepts = new TreeSet<String>(); + if(local){ + url = "http://139.18.2.37:8890/sparql"; + sparqlTasks = new SPARQLTasks(Cache.getPersistentCache(),SparqlEndpoint.EndpointLOCALDBpedia()); + }else{ + url = "http://dbpedia.openlinksw.com:8890/sparql"; + sparqlTasks = new SPARQLTasks(Cache.getPersistentCache(),SparqlEndpoint.EndpointDBpedia()); + } String prim="http://dbpedia.org/resource/Category:Prime_Ministers_of_the_United_Kingdom"; String award=("http://dbpedia.org/resource/Category:Best_Actor_Academy_Award_winners"); - System.out.println(DBpediaSKOS(prim)); -// double acc1=0.0; -// for (int i = 0; i < 5; i++) { -// acc1+=DBpediaSKOS(prim); -// } -// System.out.println("accprim"+(acc1/5)); -// -// double acc2=0.0; -// for (int i = 0; i < 5; i++) { -// acc2+=DBpediaSKOS(award); -// } -// System.out.println("accprim"+(acc2/5)); + SKOS7030 s= new SKOS7030(); -// DBpediaSKOS(concepts.first()); -// DBpediaSKOS(concepts.first()); -// concepts.remove(concepts.first()); -// DBpediaSKOS(concepts.first()); -// DBpediaSKOS(concepts.first()); -// concepts.remove(concepts.first()); -// DBpediaSKOS(concepts.first()); -// DBpediaSKOS(concepts.first()); - //algorithm="refinement"; - //roles(); + s.makeExamples(prim, percentOfSKOSSet, negfactor, sparqlResultSize); + //QUALITY s.posExamples + List<Description> conceptresults = s.learn(s.posExamples, s.negExamples); + logger.debug("found nr of concepts: "+conceptresults.size()); - /*System.out.println(Level.DEBUG.getClass()); - System.out.println(Level.toLevel("INFO")); - System.out.println(Level.INFO);*/ - //System.exit(0); + for (Description oneConcept : conceptresults) { + s.evaluate(oneConcept, 1000); + } + + + System.out.println("Finished"); + + } + void evaluate(Description oneConcept, int sparqlResultLimit){ + logger.debug("oneconcept: "+oneConcept); + SortedSet<String> instances = sparqlTasks.retrieveInstancesForConcept(oneConcept.toKBSyntaxString(), sparqlResultLimit); - sc.printAndSet("Finished"); - + System.out.println(fullminusposRest.size()); + System.out.println(instances.size()); + + SortedSet<String> coveredInRest = new TreeSet<String>(fullminusposRest); + coveredInRest.retainAll(instances); + + System.out.println(fullminusposRest.size()); + System.out.println(instances.size()); + System.out.println(coveredInRest.size()); + + + + //SortedSet<String> possibleNewCandidates = new TreeSet<String>(); + //SortedSet<String> notCoveredInTotal = new TreeSet<String>(); + + } - - static double DBpediaSKOS(String concept){ - se = SparqlEndpoint.EndpointLOCALDBpedia(); - //se = SparqlEndpoint.EndpointDBpedia(); - String url = "http://dbpedia.openlinksw.com:8890/sparql"; - url = "http://139.18.2.37:8890/sparql"; + static void DBpediaSKOS(String SKOSConcept){ + //concepts.add("http://dbpedia.org/resource/Category:Grammy_Award_winners"); //concepts.add("EXISTS \"http://dbpedia.org/property/grammyawards\".TOP"); - SortedSet<String> posExamples = new TreeSet<String>(); - SortedSet<String> negExamples = new TreeSet<String>(); + //HashMap<String, ResultSet> result = new HashMap<String, ResultSet>(); //HashMap<String, String> result2 = new HashMap<String, String>(); //System.out.println(concepts.first()); //logger.setLevel(Level.TRACE); - AutomaticExampleFinderSKOSSPARQL ae= new AutomaticExampleFinderSKOSSPARQL( se); + - ae.initDBpediaSKOS(concept,percentage , useRelated, useParallelClasses); - posExamples = ae.getPosExamples(); - negExamples = ae.getNegExamples(); - for (String string2 : negExamples) { - logger.debug("-"+string2); - } +// LearnSparql ls = new LearnSparql(); +// +// //igno.add(oneConcept.replaceAll("\"", "")); +// +// List<Description> conceptresults= ls.learnDBpediaSKOS(posExamples, negExamples, url,new TreeSet<String>(),recursiondepth, closeAfterRecursion,randomizeCache,resultsize,noise); +// +// System.out.println("concepts"+conceptresults); +// //System.exit(0); +// +// SortedSet<ResultCompare> res=new TreeSet<ResultCompare>(); +// for (Description oneConcept : conceptresults) { +// try{ +// +// +// int i=0; +// int a=0; +// for (String oneinst : instances) { +// boolean inRest=false; +// boolean inTotal=false; +// for (String onerest : rest) { +// if(onerest.equalsIgnoreCase(oneinst)) +// { i++; inRest=true; break;} +// +// } +// if (inRest){coveredInRest.add(oneinst);}; +// +// for (String onetotal : totalSKOSset) { +// if(onetotal.equalsIgnoreCase(oneinst)) +// { a++; inTotal=true; break;} +// } +// if(!inRest && !inTotal){ +// possibleNewCandidates.add(oneinst); +// } +// } +// +// for (String onetotal : totalSKOSset) { +// boolean mm=false; +// for (String oneinst : instances) { +// if(onetotal.equalsIgnoreCase(oneinst)){ +// mm=true;break; +// } +// +// } +// if(!mm)notCoveredInTotal.add(onetotal); +// +// } +// +// +// +// double accuracy= (double)i/rest.size(); +// double accuracy2= (double)a/totalSKOSset.size(); +// +// logger.debug((new ResultCompare(oneConcept.toKBSyntaxString(),instances,accuracy,accuracy2,instances.size(), +// coveredInRest,possibleNewCandidates,notCoveredInTotal)).toStringFull()); +// +// //if(instances.size()>=0)System.out.println("size of instances "+instances.size()); +// //if(instances.size()>=0 && instances.size()<100) System.out.println("instances"+instances); +// }catch (Exception e) {e.printStackTrace();} +// } - for (String string2 : posExamples) { - logger.debug("+"+string2); - } - SortedSet<String> totalSKOSset= ae.totalSKOSset; - SortedSet<String> rest= ae.rest; - logger.debug(totalSKOSset); - logger.debug(rest); - - - LearnSparql ls = new LearnSparql(); - - //igno.add(oneConcept.replaceAll("\"", "")); - - List<Description> conceptresults= ls.learnDBpediaSKOS(posExamples, negExamples, url,new TreeSet<String>(),recursiondepth, closeAfterRecursion,randomizeCache,resultsize,noise); - - System.out.println("concepts"+conceptresults); - //System.exit(0); - logger.debug("found nr of concepts:"+conceptresults.size()); - SortedSet<ResultCompare> res=new TreeSet<ResultCompare>(); - for (Description oneConcept : conceptresults) { - try{ - System.out.println("oneconcept: "+oneConcept); - String rewritten = SparqlQueryDescriptionConvertRDFS.conceptRewrite(oneConcept.toKBSyntaxString(), se, c, true); - SortedSet<String> instances = ae.queryConceptAsStringSet(rewritten, 200); - SortedSet<String> coveredInRest = new TreeSet<String>(); - SortedSet<String> possibleNewCandidates = new TreeSet<String>(); - SortedSet<String> notCoveredInTotal = new TreeSet<String>(); - - int i=0; - int a=0; - for (String oneinst : instances) { - boolean inRest=false; - boolean inTotal=false; - for (String onerest : rest) { - if(onerest.equalsIgnoreCase(oneinst)) - { i++; inRest=true; break;} - - } - if (inRest){coveredInRest.add(oneinst);}; - - for (String onetotal : totalSKOSset) { - if(onetotal.equalsIgnoreCase(oneinst)) - { a++; inTotal=true; break;} - } - if(!inRest && !inTotal){ - possibleNewCandidates.add(oneinst); - } - } - - for (String onetotal : totalSKOSset) { - boolean mm=false; - for (String oneinst : instances) { - if(onetotal.equalsIgnoreCase(oneinst)){ - mm=true;break; - } - - } - if(!mm)notCoveredInTotal.add(onetotal); - - } - - - - double accuracy= (double)i/rest.size(); - double accuracy2= (double)a/totalSKOSset.size(); - - logger.debug((new ResultCompare(oneConcept.toKBSyntaxString(),instances,accuracy,accuracy2,instances.size(), - coveredInRest,possibleNewCandidates,notCoveredInTotal)).toStringFull()); - - //if(instances.size()>=0)System.out.println("size of instances "+instances.size()); - //if(instances.size()>=0 && instances.size()<100) System.out.println("instances"+instances); - }catch (Exception e) {e.printStackTrace();} - } - // System.out.println(res.last()); // res.remove(res.last()); // System.out.println(res.last()); @@ -250,7 +227,7 @@ // // } // - return 0.0; +// return 0.0; //System.out.println("AAAAAAAA"); @@ -268,7 +245,7 @@ - public static void init() { + public static void initLogger() { SimpleLayout layout = new SimpleLayout(); // create logger (a simple logger which outputs @@ -283,12 +260,11 @@ logger.addAppender(consoleAppender); logger.addAppender(fileAppender); logger.setLevel(Level.DEBUG); - c = new Cache("cachetemp"); - - + Logger.getLogger(KnowledgeSource.class).setLevel(Level.WARN); + } - public static SortedSet<String> selectDBpediaConcepts(int number){ + /*public static SortedSet<String> selectDBpediaConcepts(int number){ String query = "SELECT DISTINCT ?concept WHERE { \n" + "[] a ?concept .FILTER (regex(str(?concept),'yago'))" + " \n} \n"; //LIMIT "+number+" @@ -298,8 +274,181 @@ ResultSet rs =SparqlQuery.JSONtoResultSet(JSON); JenaResultSetConvenience rsc = new JenaResultSetConvenience(rs); return SetManipulation.fuzzyShrink(rsc.getStringListForVariable("concept"),number); + }*/ + + public void makeExamples(String SKOSConcept, double percentOfSKOSSet , double negfactor, int sparqlResultSize){ + + //POSITIVES + AutomaticPositiveExampleFinderSPARQL apos = new AutomaticPositiveExampleFinderSPARQL(sparqlTasks); + apos.makePositiveExamplesFromSKOSConcept(SKOSConcept); + fullPositiveSet = apos.getPosExamples(); + + int poslimit=(int)Math.round(percentOfSKOSSet*fullPositiveSet.size()); + int neglimit=(int)Math.round(poslimit*negfactor); + + this.posExamples = SetManipulation.fuzzyShrink(fullPositiveSet,poslimit); + + + + //NEGATIVES + + AutomaticNegativeExampleFinderSPARQL aneg = new AutomaticNegativeExampleFinderSPARQL(fullPositiveSet,sparqlTasks); + + aneg.makeNegativeExamplesFromParallelClasses(posExamples, sparqlResultSize); + SortedSet<String> negativeSet = aneg.getNegativeExamples(neglimit); + + logger.debug("POSITIVE EXAMPLES"); + for (String pos : posExamples) { + logger.debug("+"+pos); + } + + logger.debug("NEGATIVE EXAMPLES"); + for (String negs : negativeSet) { + logger.debug("-"+negs); + } + + + + fullminusposRest = fullPositiveSet; + fullminusposRest.removeAll(posExamples); + + + logger.debug(fullPositiveSet); + logger.debug(fullminusposRest); } + public List<Description> learn(SortedSet<String> posExamples, SortedSet<String> negExamples){ + + SortedSet<String> instances = new TreeSet<String>(); + instances.addAll(posExamples); + instances.addAll(negExamples); + + + ComponentManager cm = ComponentManager.getInstance(); + LearningAlgorithm la = null; + ReasoningService rs = null; + LearningProblem lp = null; + SparqlKnowledgeSource ks =null; + try { + Set<KnowledgeSource> sources = new HashSet<KnowledgeSource>(); + ks = cm.knowledgeSource(SparqlKnowledgeSource.class); + ReasonerComponent r = new FastInstanceChecker(sources); + rs = new ReasoningService(r); + //System.out.println("satisfy: "+rs.isSatisfiable()); + lp = new PosNegDefinitionLP(rs); + ((PosNegLP) lp).setPositiveExamples(SetManipulation.stringToInd(posExamples)); + ((PosNegLP) lp).setNegativeExamples(SetManipulation.stringToInd(negExamples)); + + la = cm.learningAlgorithm(ExampleBasedROLComponent.class, lp, rs); + + logger.debug("start learning"); + + + //KNOWLEDGESOURCE + cm.applyConfigEntry(ks, "instances",instances); + cm.applyConfigEntry(ks, "url",url); + cm.applyConfigEntry(ks, "recursionDepth",recursiondepth); + cm.applyConfigEntry(ks, "closeAfterRecursion",closeAfterRecursion); + cm.applyConfigEntry(ks, "predefinedFilter","YAGO"); + if(local) + cm.applyConfigEntry(ks, "predefinedEndpoint","LOCALDBPEDIA"); + else { + cm.applyConfigEntry(ks, "predefinedEndpoint","DBPEDIA"); + } + if(randomizeCache) + cm.applyConfigEntry(ks, "cacheDir","cache/"+System.currentTimeMillis()+""); + else {cm.applyConfigEntry(ks, "cacheDir",Cache.getDefaultCache());} + + //LEARNINGALGORITHM + cm.applyConfigEntry(la,"useAllConstructor",false); + cm.applyConfigEntry(la,"useExistsConstructor",true); + cm.applyConfigEntry(la,"useCardinalityRestrictions",false); + cm.applyConfigEntry(la,"useNegation",false); + cm.applyConfigEntry(la,"minExecutionTimeInSeconds",0); + cm.applyConfigEntry(la,"maxExecutionTimeInSeconds",maxExecutionTimeInSeconds); + cm.applyConfigEntry(la,"guaranteeXgoodDescriptions",guaranteeXgoodDescriptions); + cm.applyConfigEntry(la,"writeSearchTree",false); + cm.applyConfigEntry(la,"searchTreeFile","log/SKOS.txt"); + cm.applyConfigEntry(la,"replaceSearchTree",true); + cm.applyConfigEntry(la,"noisePercentage",noise); + //cm.applyConfigEntry(la,"guaranteeXgoodDescriptions",999999); + cm.applyConfigEntry(la,"logLevel","TRACE"); + /*if(ignoredConcepts.size()>0) + cm.applyConfigEntry(la,"ignoredConcepts",ignoredConcepts); + */ + + ks.init(); + sources.add(ks); + r.init(); + lp.init(); + la.init(); + + + la.start(); + //Statistics.addTimeCollecting(sc.getTime()); + //Statistics.addTimeLearning(sc.getTime()); + + + return la.getGoodSolutions(); + + }catch (Exception e) {e.printStackTrace();} + return null; + + } + +// String t="\"http://dbpedia.org/class/yago/Fiction106367107\""; +// t="(\"http://dbpedia.org/class/yago/HeadOfState110164747\" AND (\"http://dbpedia.org/class/yago/Negotiator110351874\" AND \"http://dbpedia.org/class/yago/Representative110522035\"))"; +// //System.out.println(t); +// //t="\"http://www.w3.org/2004/02/skos/core#subject\""; +// //conceptRewrite(t); +// //getSubClasses(t); +// +// AutomaticExampleFinderSKOSSPARQL ae= new AutomaticExampleFinderSKOSSPARQL( se); +// try{ +// System.out.println("oneconcept: "+t); +// SortedSet<String> instances = ae.queryConceptAsStringSet(conceptRewrite(t), 200); +// if(instances.size()>=0)System.out.println("size of instances "+instances.size()); +// if(instances.size()>=0 && instances.size()<100) System.out.println("instances"+instances); +// }catch (Exception e) { +// e.printStackTrace(); +// } + //SortedSet<String> concepts = new TreeSet<String>(); + + + + //System.out.println(DBpediaSKOS(prim)); +// double acc1=0.0; +// for (int i = 0; i < 5; i++) { +// acc1+=DBpediaSKOS(prim); +// } +// System.out.println("accprim"+(acc1/5)); +// +// double acc2=0.0; +// for (int i = 0; i < 5; i++) { +// acc2+=DBpediaSKOS(award); +// } +// System.out.println("accprim"+(acc2/5)); + +// DBpediaSKOS(concepts.first()); +// DBpediaSKOS(concepts.first()); +// concepts.remove(concepts.first()); +// DBpediaSKOS(concepts.first()); +// DBpediaSKOS(concepts.first()); +// concepts.remove(concepts.first()); +// DBpediaSKOS(concepts.first()); +// DBpediaSKOS(concepts.first()); + //algorithm="refinement"; + //roles(); + + /*System.out.println(Level.DEBUG.getClass()); + System.out.println(Level.toLevel("INFO")); + System.out.println(Level.INFO);*/ + //System.exit(0); + + + + + } Modified: trunk/src/dl-learner/org/dllearner/scripts/SPARQLExtractionEvaluation.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/SPARQLExtractionEvaluation.java 2008-05-21 13:53:46 UTC (rev 911) +++ trunk/src/dl-learner/org/dllearner/scripts/SPARQLExtractionEvaluation.java 2008-05-21 14:59:31 UTC (rev 912) @@ -14,8 +14,6 @@ import org.dllearner.kb.sparql.Cache; import org.dllearner.kb.sparql.SparqlEndpoint; import org.dllearner.kb.sparql.SparqlQuery; -import org.dllearner.utilities.datastructures.JenaResultSetConvenience; -import org.dllearner.utilities.datastructures.SetManipulation; import org.dllearner.utilities.examples.AutomaticExampleFinderSPARQLold; import org.dllearner.utilities.learn.LearnSparql; import org.dllearner.utilities.statistics.SimpleClock; @@ -107,7 +105,7 @@ System.out.println(oneConcept); AutomaticExampleFinderSPARQLold ae= new AutomaticExampleFinderSPARQLold( se); - ae.initDBpedia(oneConcept, useRelated, useSuperClasses,useParallelClasses, poslimit, neglimit); + //ae.initDBpedia(oneConcept, useRelated, useSuperClasses,useParallelClasses, poslimit, neglimit); posExamples = ae.getPosExamples(); negExamples = ae.getNegExamples(); @@ -183,6 +181,7 @@ } + //FIXME public static SortedSet<String> selectDBpediaConcepts(int number){ String query = "SELECT DISTINCT ?concept WHERE { \n" + "[] a ?concept .FILTER (regex(str(?concept),'yago'))" + @@ -190,8 +189,9 @@ String JSON = (c.executeSparqlQuery(new SparqlQuery(query, se))); ResultSet rs =SparqlQuery.JSONtoResultSet(JSON); - JenaResultSetConvenience rsc = new JenaResultSetConvenience(rs); - return SetManipulation.fuzzyShrink(rsc.getStringListForVariable("concept"),number); + //JenaResultSetConvenience rsc = new JenaResultSetConvenience(rs); + //return SetManipulation.fuzzyShrink(rsc.getStringListForVariable("concept"),number); + return null; } public static SortedSet<String> initConcepts(){ Modified: trunk/src/dl-learner/org/dllearner/scripts/SPARQLMassLearning.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/SPARQLMassLearning.java 2008-05-21 13:53:46 UTC (rev 911) +++ trunk/src/dl-learner/org/dllearner/scripts/SPARQLMassLearning.java 2008-05-21 14:59:31 UTC (rev 912) @@ -13,9 +13,6 @@ import org.dllearner.kb.sparql.SparqlEndpoint; import org.dllearner.kb.sparql.SparqlKnowledgeSource; import org.dllearner.kb.sparql.SparqlQuery; -import org.dllearner.utilities.datastructures.JenaResultSetConvenience; -import org.dllearner.utilities.datastructures.SetManipulation; -import org.dllearner.utilities.examples.AutomaticExampleFinderRolesSPARQL; import org.dllearner.utilities.examples.AutomaticExampleFinderSPARQLold; import org.dllearner.utilities.learn.ConfWriter; import org.dllearner.utilities.learn.LearnSparql; @@ -114,12 +111,12 @@ //HashMap<String, String> result2 = new HashMap<String, String>(); //System.out.println(concepts.first()); //logger.setLevel(Level.TRACE); - AutomaticExampleFinderRolesSPARQL ae= new AutomaticExampleFinderRolesSPARQL( se); + //AutomaticExampleFinderRolesSPARQL ae= new AutomaticExampleFinderRolesSPARQL( se); - ae.initDomainRange(roles.first(), poslimit, neglimit); + //ae.initDomainRange(roles.first(), poslimit, neglimit); - posExamples = ae.getPosExamples(); - negExamples = ae.getNegExamples(); + //posExamples = ae.getPosExamples(); + //negExamples = ae.getNegExamples(); System.out.println(posExamples); System.out.println(negExamples); @@ -192,7 +189,7 @@ poslimit=10; neglimit=10; - ae.initDBpedia(concept, useRelated, useSuperClasses,useParallelClasses, poslimit, neglimit); + //ae.initDBpedia(concept, useRelated, useSuperClasses,useParallelClasses, poslimit, neglimit); posExamples = ae.getPosExamples(); negExamples = ae.getNegExamples(); @@ -286,8 +283,9 @@ String JSON = (c.executeSparqlQuery(new SparqlQuery(query, se))); ResultSet rs =SparqlQuery.JSONtoResultSet(JSON); - JenaResultSetConvenience rsc = new JenaResultSetConvenience(rs); - return SetManipulation.fuzzyShrink(rsc.getStringListForVariable("concept"),number); + //JenaResultSetConvenience rsc = new JenaResultSetConvenience(rs); + //return SetManipulation.fuzzyShrink(rsc.getStringListForVariable("concept"),number); + return null; } Modified: trunk/src/dl-learner/org/dllearner/test/SparqlEndpointTest.java =================================================================== --- trunk/src/dl-learner/org/dllearner/test/SparqlEndpointTest.java 2008-05-21 13:53:46 UTC (rev 911) +++ trunk/src/dl-learner/org/dllearner/test/SparqlEndpointTest.java 2008-05-21 14:59:31 UTC (rev 912) @@ -11,12 +11,9 @@ import org.apache.log4j.SimpleLayout; import org.dllearner.kb.sparql.SparqlEndpoint; import org.dllearner.kb.sparql.SparqlKnowledgeSource; -import org.dllearner.kb.sparql.SparqlQuery; -import org.dllearner.utilities.datastructures.JenaResultSetConvenience; +import org.dllearner.utilities.examples.SPARQLTasks; import org.dllearner.utilities.statistics.SimpleClock; -import com.hp.hpl.jena.query.ResultSet; - public class SparqlEndpointTest { private static Logger logger = Logger.getRootLogger(); static Set<String> working = new HashSet<String>(); @@ -65,22 +62,15 @@ SimpleClock sc = new SimpleClock(); try{ - String query ="" + + String SPARQLquery ="" + "SELECT DISTINCT ?c " + "WHERE {[] a ?c }" + "LIMIT 100"; - query ="SELECT DISTINCT ?c WHERE {[] a ?c }LIMIT 100"; - - SparqlQuery s = new SparqlQuery(query,se); + SPARQLquery ="SELECT DISTINCT ?c WHERE {[] a ?c }LIMIT 100"; + int i = new SPARQLTasks(se).queryAsSet(SPARQLquery, "c").size(); - s.send(); - String result = s.getResult(); - ResultSet rs = SparqlQuery.JSONtoResultSet(result); - JenaResultSetConvenience jsr = new JenaResultSetConvenience(rs); - int i = jsr.getStringListForVariable("c").size(); - working.add(sc.getAndSet("endpoint working: "+se.getURL()+" ("+((i==100)?"more than 100 concepts":"about "+i+" concepts")+" )")); }catch (Exception e) {notworking.add(sc.getAndSet("endpoint NOT working: "+se.getURL()));} } Added: trunk/src/dl-learner/org/dllearner/utilities/JamonTimeLogger.java =================================================================== --- trunk/src/dl-learner/org/dllearner/utilities/JamonTimeLogger.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/utilities/JamonTimeLogger.java 2008-05-21 14:59:31 UTC (rev 912) @@ -0,0 +1,9 @@ +package org.dllearner.utilities; + +/** + * provides convenience functions for timelogs + * + */ +public class JamonTimeLogger { + +} Deleted: trunk/src/dl-learner/org/dllearner/utilities/datastructures/JenaResultSetConvenience.java =================================================================== --- trunk/src/dl-learner/org/dllearner/utilities/datastructures/JenaResultSetConvenience.java 2008-05-21 13:53:46 UTC (rev 911) +++ trunk/src/dl-learner/org/dllearner/utilities/datastructures/JenaResultSetConvenience.java 2008-05-21 14:59:31 UTC (rev 912) @@ -1,37 +0,0 @@ -package org.dllearner.utilities.datastructures; - -import java.util.List; -import java.util.SortedSet; -import java.util.TreeSet; - -import com.hp.hpl.jena.query.ResultSet; -import com.hp.hpl.jena.query.ResultSetFormatter; -import com.hp.hpl.jena.sparql.core.ResultBinding; - -public class JenaResultSetConvenience { - ResultSet rs; - - public JenaResultSetConvenience(ResultSet rs) { - super(); - this.rs = rs; - } - - @SuppressWarnings("unchecked") - public SortedSet<String> getStringListForVariable(String var){ - SortedSet<String> result = new TreeSet<String>(); - - //String s=ResultSetFormatter.asXMLString(this.rs); - List<ResultBinding> l = ResultSetFormatter.toList(this.rs); - - for (ResultBinding resultBinding : l) { - - result.add(resultBinding.get(var).toString()); - - } - - return result; - - } - - -} Modified: trunk/src/dl-learner/org/dllearner/utilities/examples/AutomaticExampleFinderRolesSPARQL.java =================================================================== --- trunk/src/dl-learner/org/dllearner/utilities/examples/AutomaticExampleFinderRolesSPARQL.java 2008-05-21 13:53:46 UTC (rev 911) +++ trunk/src/dl-learner/org/dllearner/utilities/examples/AutomaticExampleFinderRolesSPARQL.java 2008-05-21 14:59:31 UTC (rev 912) @@ -1,18 +1,12 @@ package org.dllearner.utilities.examples; import java.util.SortedSet; -import java.util.TreeSet; import org.apache.log4j.Logger; import org.dllearner.core.ComponentManager; import org.dllearner.kb.sparql.Cache; import org.dllearner.kb.sparql.SparqlEndpoint; -import org.dllearner.kb.sparql.SparqlQuery; -import org.dllearner.utilities.datastructures.JenaResultSetConvenience; -import org.dllearner.utilities.datastructures.SetManipulation; - -import com.hp.hpl.jena.query.ResultSet; - +// keep this class it still needs to be worked in public class AutomaticExampleFinderRolesSPARQL { private static Logger logger = Logger @@ -24,7 +18,7 @@ private SortedSet<String> negExamples; private int roleLimit=1000; - + /* public AutomaticExampleFinderRolesSPARQL(SparqlEndpoint se){ this.c=new Cache("cachetemp"); this.se=se; @@ -119,7 +113,7 @@ + */ - } Modified: trunk/src/dl-learner/org/dllearner/utilities/examples/AutomaticExampleFinderSKOSSPARQL.java =================================================================== --- trunk/src/dl-learner/org/dllearner/utilities/examples/AutomaticExampleFinderSKOSSPARQL.java 2008-05-21 13:53:46 UTC (rev 911) +++ trunk/src/dl-learner/org/dllearner/utilities/examples/AutomaticExampleFinderSKOSSPARQL.java 2008-05-21 14:59:31 UTC (rev 912) @@ -7,13 +7,8 @@ import org.dllearner.core.ComponentManager; import org.dllearner.kb.sparql.Cache; import org.dllearner.kb.sparql.SparqlEndpoint; -import org.dllearner.kb.sparql.SparqlQuery; -import org.dllearner.kb.sparql.SparqlQueryDescriptionConvertVisitor; -import org.dllearner.utilities.datastructures.JenaResultSetConvenience; import org.dllearner.utilities.datastructures.SetManipulation; -import com.hp.hpl.jena.query.ResultSet; - public class AutomaticExampleFinderSKOSSPARQL { private static Logger logger = Logger @@ -35,8 +30,9 @@ negExamples = new TreeSet<String>(); } + @Deprecated public void initDBpediaSKOS(String concept, double percent, boolean useRelated,boolean useParallelClasses) { - dbpediaMakePositiveExamplesFromConcept( concept); + //dbpediaMakePositiveExamplesFromConcept( concept); SortedSet<String> keepForClean = new TreeSet<String>(); keepForClean.addAll(this.posExamples); totalSKOSset.addAll(this.posExamples); @@ -55,12 +51,12 @@ logger.debug("rest: "+rest.size()); if(useRelated) { - dbpediaMakeNegativeExamplesFromRelatedInstances(this.posExamples); + //dbpediaMakeNegativeExamplesFromRelatedInstances(this.posExamples); } if(useParallelClasses) { int limit = this.posExamples.size(); - makeNegativeExamplesFromClassesOfInstances(limit); + //makeNegativeExamplesFromClassesOfInstances(limit); } //clean negExamples.removeAll(keepForClean); @@ -98,16 +94,8 @@ - public SortedSet<String> dbpediaGetPosOnly(String concept, int limit){ - dbpediaMakePositiveExamplesFromConcept( concept); - return SetManipulation.fuzzyShrink(this.posExamples, limit); - } - public SortedSet<String> getPosOnly(String concept, int limit){ - makePositiveExamplesFromConcept( concept); - return SetManipulation.fuzzyShrink(this.posExamples, limit); - } - + /* private void dbpediaMakePositiveExamplesFromConcept(String concept){ logger.debug("making Positive Examples from Concept: "+concept); if(concept.contains("http://dbpedia.org/resource/Category:")) { @@ -118,14 +106,15 @@ .getStringListForVariable("subject"); } logger.debug("pos Example size: "+posExamples.size()); - } + }*/ + /* private void makePositiveExamplesFromConcept(String concept){ logger.debug("making Positive Examples from Concept: "+concept); this.posExamples = new JenaResultSetConvenience(queryConcept(concept,0)) .getStringListForVariable("subject"); logger.debug(" pos Example size: "+posExamples.size()); - } + }*/ @@ -145,60 +134,18 @@ - private void dbpediaMakeNegativeExamplesFromRelatedInstances(SortedSet<String> subject) { - logger.debug("making examples from related instances"); - for (String string : subject) { - dbpediaMakeNegativeExamplesFromRelatedInstances(string); - } - logger.debug(" negExample size: "+negExamples.size()); - } - private void makeNegativeExamplesFromRelatedInstances(SortedSet<String> subject, String namespace) { - logger.debug("making examples from related instances"); - for (String string : subject) { - makeNegativeExamplesFromRelatedInstances(string,namespace); - } - logger.debug(" negExample size: "+negExamples.size()); - } - /** * * @param subject * @return */ - private void dbpediaMakeNegativeExamplesFromRelatedInstances(String subject) { - // SortedSet<String> result = new TreeSet<String>(); - String query = "SELECT * WHERE { \n" + "<" + subject + "> " + "?p ?o. \n" - + "FILTER (REGEX(str(?o), 'http://dbpedia.org/resource/')).\n" - + "FILTER (!REGEX(str(?p), 'http://www.w3.org/2004/02/skos'))\n" - + "}"; - - String JSON = (c.executeSparqlQuery(new SparqlQuery(query, se))); - ResultSet rs =SparqlQuery.JSONtoResultSet(JSON); - JenaResultSetConvenience rsc = new JenaResultSetConvenience(rs); - this.negExamples.addAll(rsc.getStringListForVariable("o")); - - - } - private void makeNegativeExamplesFromRelatedInstances(String subject, String namespace) { - // SortedSet<String> result = new TreeSet<String>(); - String query = "SELECT * WHERE { \n" + "<" + subject + "> " + "?p ?o. \n" - + "FILTER (REGEX(str(?o), '"+namespace+"')).\n" - + "}"; - - String JSON = (c.executeSparqlQuery(new SparqlQuery(query, se))); - ResultSet rs =SparqlQuery.JSONtoResultSet(JSON); - JenaResultSetConvenience rsc = new JenaResultSetConvenience(rs); - this.negExamples.addAll(rsc.getStringListForVariable("o")); - - - } - + /* private void makeNegativeExamplesFromSuperClasses(String concept) { SortedSet<String> superClasses = new TreeSet<String>(); @@ -213,109 +160,14 @@ this.negExamples.addAll(rsc.getStringListForVariable("subject")); } logger.debug(" neg Example size: "+negExamples.size()); - } + }*/ - private void makeNegativeExamplesFromClassesOfInstances(int limit) { - logger.debug("making neg Examples from parallel classes"); - SortedSet<String> classes = new TreeSet<String>(); - //superClasses.add(concept.replace("\"", "")); - //logger.debug("before"+superClasses); - //superClasses = dbpediaGetSuperClasses( superClasses, 4); - //logger.debug("getting negExamples from "+superClasses.size()+" superclasses"); - JenaResultSetConvenience rsc; - ResultSet rs=null; - for (String instance : posExamples) { - //System.out.println(instance); - rs = getClassesForInstance(instance); - //System.out.println(ResultSetFormatter.asXMLString(rs)); - rsc = new JenaResultSetConvenience(rs); - classes.addAll(rsc.getStringListForVariable("subject")); - //System.out.println(classes); - } - logger.debug("getting negExamples from "+classes.size()+" parallel classes"); - for (String oneClass : classes) { - logger.debug(oneClass); - rsc = new JenaResultSetConvenience(queryConcept("\""+oneClass+"\"",limit)); - this.negExamples.addAll(rsc.getStringListForVariable("subject")); - } - logger.debug("neg Example size: "+negExamples.size()); - - } - - private SortedSet<String> getSuperClasses(SortedSet<String> superClasses, int depth) { - SortedSet<String> ret = new TreeSet<String>(); - SortedSet<String> tmpset = new TreeSet<String>(); - ret.addAll(superClasses); - //logger.debug(superClasses); - JenaResultSetConvenience rsc; - - String query = ""; - for (; depth != 0 ; depth--) { - for (String oneSuperClass : superClasses) { - - //tmp = oneSuperClass.replace("\"", ""); - query = "SELECT * WHERE { \n" + "<" + oneSuperClass + "> " - + "<http://www.w3.org/2000/01/rdf-schema#subClassOf> ?superclass. \n" - + "}"; - - String JSON = (c.executeSparqlQuery(new SparqlQuery(query, se))); - ResultSet rs =SparqlQuery.JSONtoResultSet(JSON); - rsc = new JenaResultSetConvenience(rs); - tmpset.addAll(rsc.getStringListForVariable("superclass")); - - } - ret.addAll(tmpset); - //logger.debug(ret); - superClasses.clear(); - superClasses.addAll(tmpset); - tmpset.clear(); - } - //logger.debug(concept); - //logger.debug(query); - return ret; - } - - - public ResultSet queryConcept(String concept,int limit) { - ResultSet rs = null; - try { - String query = SparqlQueryDescriptionConvertVisitor - .getSparqlQuery(concept,limit); - - SparqlQuery sq = new SparqlQuery(query, se); - String JSON = c.executeSparqlQuery(sq); - //System.out.println("JSON:\n"+JSON); - rs = SparqlQuery.JSONtoResultSet(JSON); - } catch (Exception e) { - e.printStackTrace(); - } - - return rs; - } - public SortedSet<String> queryConceptAsStringSet(String concept,int limit) { - ResultSet rs = null; - try { - String query = SparqlQueryDescriptionConvertVisitor - .getSparqlQuery(concept,limit); - - SparqlQuery sq = new SparqlQuery(query, se); - //System.out.println(query); - String JSON = c.executeSparqlQuery(sq); - //System.out.println("JSON:\n"+JSON); - rs = SparqlQuery.JSONtoResultSet(JSON); - - } catch (Exception e) { - e.printStackTrace(); - } - return new JenaResultSetConvenience(rs).getStringListForVariable("subject"); - - } - + /* public ResultSet dbpediaQuerySKOSConcept(String SKOSconcept,int limit) { if(limit==0)limit=99999; // @@ -337,31 +189,10 @@ } return rs; - } + }*/ - public ResultSet getClassesForInstance(String instance) { - ResultSet rs = null; - try { - - String query = "SELECT ?subject WHERE { \n " + - "<" + instance + ">"+ - " a " + - "?subject " + - "\n" + - "} LIMIT 200"; - SparqlQuery sq = new SparqlQuery(query, se); - //System.out.println(query); - String JSON = c.executeSparqlQuery(sq); - //System.out.println(JSON); - rs = SparqlQuery.JSONtoResultSet(JSON); + - } catch (Exception e) { - e.printStackTrace(); - } - - return rs; - } - public SortedSet<String> getPosExamples() { return posExamples; } Modified: trunk/src/dl-learner/org/dllearner/utilities/examples/AutomaticExampleFinderSPARQLold.java =================================================================== --- trunk/src/dl-learner/org/dllearner/utilities/examples/AutomaticExampleFinderSPARQLold.java 2008-05-21 13:53:46 UTC (rev 911) +++ trunk/src/dl-learner/org/dllearner/utilities/examples/AutomaticExampleFinderSPARQLold.java 2008-05-21 14:59:31 UTC (rev 912) @@ -7,13 +7,7 @@ import org.dllearner.core.ComponentManager; import org.dllearner.kb.sparql.Cache; import org.dllearner.kb.sparql.SparqlEndpoint; -import org.dllearner.kb.sparql.SparqlQuery; -import org.dllearner.kb.sparql.SparqlQueryDescriptionConvertVisitor; -import org.dllearner.utilities.datastructures.JenaResultSetConvenience; -import org.dllearner.utilities.datastructures.SetManipulation; -import com.hp.hpl.jena.query.ResultSet; - public class AutomaticExampleFinderSPARQLold { private static Logger logger = Logger @@ -33,7 +27,7 @@ posExamples = new TreeSet<String>(); negExamples = new TreeSet<String>(); } - + /* public void initDBpedia(String concept, boolean useRelated, boolean useSuperclasses,boolean useParallelClasses, int poslimit, int neglimit) { dbpediaMakePositiveExamplesFromConcept( concept); SortedSet<String> keepForClean = new TreeSet<String>(); @@ -62,43 +56,21 @@ logger.debug("Finished examples for concept: "+concept); } - public void init(String concept, String namespace, boolean useRelated, boolean useSuperclasses,boolean useParallelClasses, int poslimit, int neglimit) { - makePositiveExamplesFromConcept( concept); - SortedSet<String> keepForClean = new TreeSet<String>(); - keepForClean.addAll(this.posExamples); - this.posExamples = SetManipulation.fuzzyShrink(this.posExamples, poslimit); - logger.trace("shrinking: pos Example size: "+posExamples.size()); - - if(useRelated) { - makeNegativeExamplesFromRelatedInstances(this.posExamples,namespace); - } - if(useSuperclasses) { - makeNegativeExamplesFromSuperClasses(concept); - } - if(useParallelClasses) { - makeNegativeExamplesFromClassesOfInstances(); - } - //clean - negExamples.removeAll(keepForClean); - logger.trace("neg Example size after cleaning: "+negExamples.size()); - this.negExamples = SetManipulation.fuzzyShrink(negExamples, neglimit); - logger.debug("pos Example size after shrinking: "+posExamples.size()); - logger.debug("neg Example size after shrinking: "+negExamples.size()); - logger.debug("Finished examples for concept: "+concept); - } + */ - + /* public SortedSet<String> dbpediaGetPosOnly(String concept, int limit){ dbpediaMakePositiveExamplesFromConcept( concept); return SetManipulation.fuzzyShrink(this.posExamples, limit); - } + }*/ - public SortedSet<String> getPosOnly(String concept, int limit){ + /*public SortedSet<String> getPosOnly(String concept, int limit){ makePositiveExamplesFromConcept( concept); return SetManipulation.fuzzyShrink(this.posExamples, limit); - } + }*/ + /* private void dbpediaMakePositiveExamplesFromConcept(String concept){ logger.debug("making Positive Examples from Concept: "+concept); if(concept.contains("http://dbpedia.org/resource/Category:")) { @@ -110,16 +82,12 @@ } logger.debug(" pos Example size: "+posExamples.size()); } + */ - private void makePositiveExamplesFromConcept(String concept){ - logger.debug("making Positive Examples from Concept: "+concept); - this.posExamples = new JenaResultSetConvenience(queryConcept(concept,0)) - .getStringListForVariable("subject"); - logger.debug(" pos Example size: "+posExamples.size()); - } + /*private void makePositiveExamplesFromConcept(String concept){ logger.debug("making Positive Examples from Concept: "+concept); if(concept.contains("http://dbpedia.org/resource/Category:")) { @@ -136,29 +104,11 @@ - private void dbpediaMakeNegativeExamplesFromRelatedInstances(SortedSet<String> subject) { - logger.debug("making examples from related instances"); - for (String string : subject) { - dbpediaMakeNegativeExamplesFromRelatedInstances(string); - } - logger.debug(" negExample size: "+negExamples.size()); - } - private void makeNegativeExamplesFromRelatedInstances(SortedSet<String> subject, String namespace) { - logger.debug("making examples from related instances"); - for (String string : subject) { - makeNegativeExamplesFromRelatedInstances(string,namespace); - } - logger.debug(" negExample size: "+negExamples.size()); - } - /** - * - * @param subject - * @return - */ - private void dbpediaMakeNegativeExamplesFromRelatedInstances(String subject) { + + /*private void dbpediaMakeNegativeExamplesFromRelatedInstances(String subject) { // SortedSet<String> result = new TreeSet<String>(); String query = "SELECT * WHERE { \n" + "<" + subject + "> " + "?p ?o. \n" @@ -172,8 +122,8 @@ this.negExamples.addAll(rsc.getStringListForVariable("o")); - } - + }*/ + /* private void makeNegativeExamplesFromRelatedInstances(String subject, String namespace) { // SortedSet<String> result = new TreeSet<String>(); @@ -187,27 +137,13 @@ this.negExamples.addAll(rsc.getStringListForVariable("o")); - } + }*/ - private void makeNegativeExamplesFromSuperClasses(String concept) { - - SortedSet<String> superClasses = new TreeSet<String>(); - superClasses.add(concept.replace("\"", "")); - //logger.debug("before"+superClasses); - superClasses = getSuperClasses( superClasses, 4); - logger.debug("making neg Examples from "+superClasses.size()+" superclasses"); - JenaResultSetConvenience rsc; - for (String oneSuperClass : superClasses) { - logger.debug(oneSuperClass); - rsc = new JenaResultSetConvenience(queryConcept("\""+oneSuperClass+"\"", limit)); - this.negExamples.addAll(rsc.getStringListForVariable("subject")); - } - logger.debug(" neg Example size: "+negExamples.size()); - } + /* private void makeNegativeExamplesFromClassesOfInstances() { logger.debug("making neg Examples from parallel classes"); SortedSet<String> classes = new TreeSet<String>(); @@ -233,107 +169,13 @@ } logger.debug("neg Example size: "+negExamples.size()); - } + }*/ - private SortedSet<String> getSuperClasses(SortedSet<String> superClasses, int depth) { - SortedSet<String> ret = new TreeSet<String>(); - SortedSet<String> tmpset = new TreeSet<String>(); - ret.addAll(superClasses); - //logger.debug(superClasses); - JenaResultSetConvenience rsc; - - String query = ""; - for (; depth != 0 ; depth--) { - for (String oneSuperClass : superClasses) { - - //tmp = oneSuperClass.replace("\"", ""); - query = "SELECT * WHERE { \n" + "<" + oneSuperClass + "> " - + "<http://www.w3.org/2000/01/rdf-schema#subClassOf> ?superclass. \n" - + "}"; - - String JSON = (c.executeSparqlQuery(new SparqlQuery(query, se))); - ResultSet rs =SparqlQuery.JSONtoResultSet(JSON); - rsc = new JenaResultSetConvenience(rs); - tmpset.addAll(rsc.getStringListForVariable("superclass")); - - } - ret.addAll(tmpset); - //logger.debug(ret); - superClasses.clear(); - superClasses.addAll(tmpset); - tmpset.clear(); - } - //logger.debug(concept); - //logger.debug(query); - return ret; - } - public ResultSet queryConcept(String concept,int limit) { - ResultSet rs = null; - try { - String query = SparqlQueryDescriptionConvertVisitor - .getSparqlQuery(concept,limit); - - SparqlQuery sq = new SparqlQuery(query, se); - String JSON = c.executeSparqlQuery(sq); - //System.out.println("JSON:\n"+JSON); - rs = SparqlQuery.JSONtoResultSet(JSON); - - } catch (Exception e) { - e.printStackTrace(); - } - - return rs; - } - public ResultSet dbpediaQuerySKOSConcept(String SKOSconcept,int limit) { - if(limit==0)limit=99999; - // - ResultSet rs = null; - try { - - String query = "SELECT * WHERE { \n " + - "?subject " + - "<http://www.w3.org/2004/02/skos/core#subject> " + - "<" + SKOSconcept + "> \n" + - "} LIMIT "+limit; - SparqlQuery sq = new SparqlQuery(query, se); - String JSON = c.executeSparqlQuery(sq); - //System.out.println(JSON); - rs = SparqlQuery.JSONtoResultSet(JSON); - } catch (Exception e) { - e.printStackTrace(); - } - - return rs; - } - - public ResultSet getClassesForInstance(String instance) { - ResultSet rs = null; - try { - - String query = "SELECT ?subject WHERE { \n " + - "<" + instance + ">"+ - " a " + - "?subject " + - "\n" + - "} LIMIT "+limit; - SparqlQuery sq = new SparqlQuery(query, se); - //System.out.println(query); - String JSON = c.executeSparqlQuery(sq); - //System.out.println(JSON); - rs = SparqlQuery.JSONtoResultSet(JSON); - - } catch (Exception e) { - e.printStackTrace(); - } - - return rs; - } - public SortedSet<String> getPosExamples() { return posExamples; } Modified: trunk/src/dl-learner/org/dllearner/utilities/examples/AutomaticNegativeExampleFinderSPARQL.java =================================================================== --- trunk/src/dl-learner/org/dllearner/utilities/examples/AutomaticNegativeExampleFinderSPARQL.java 2008-05-21 13:53:46 UTC (rev 911) +++ trunk/src/dl-learner/org/dllearner/utilities/examples/AutomaticNegativeExampleFinderSPARQL.java 2008-05-21 14:59:31 UTC (rev 912) @@ -1,21 +1,145 @@ package org.dllearner.utilities.examples; import java.util.SortedSet; +import java.util.TreeSet; +import org.apache.log4j.Logger; +import org.dllearner.core.ComponentManager; import org.dllearner.kb.sparql.Cache; import org.dllearner.kb.sparql.SparqlEndpoint; +import org.dllearner.kb.sparql.SparqlQuery; +import org.dllearner.utilities.datastructures.SetManipulation; +import com.hp.hpl.jena.query.ResultSet; + public class AutomaticNegativeExampleFinderSPARQL { + // CHECK + private static Logger logger = Logger.getLogger(ComponentManager.class); + + private SPARQLTasks sparqltasks; + + private SortedSet<String> fullPositiveSet; - private Cache c; - private SparqlEndpoint se; - private SortedSet<String> posExamples; - private SortedSet<String> negExamples; + //static boolean useRelated = false; + private SortedSet<String> fromRelated; + //static boolean useSuperClasses = false; + private SortedSet<String> fromSuperclasses; + //static boolean useParallelClasses = true; + private SortedSet<String> fromParallelClasses; - static boolean useRelated = false; - static boolean useSuperClasses = false; - static boolean useParallelClasses = true; static int poslimit = 10; static int neglimit = 20; + + // CHECK all vars needed + public AutomaticNegativeExampleFinderSPARQL( + SortedSet<String> fullPositiveSet, + SPARQLTasks st) { + super(); + this.fromParallelClasses = new TreeSet<String>(); + this.fromRelated = new TreeSet<String>(); + this.fromSuperclasses = new TreeSet<String>(); + + this.fullPositiveSet = fullPositiveSet; + this.sparqltasks = st; + + } + + + + public SortedSet<String> getNegativeExamples(int neglimit ) { + + SortedSet<String> negatives = new TreeSet<String>(); + negatives.addAll(fromParallelClasses); + negatives.addAll(fromRelated); + negatives.addAll(fromSuperclasses); + logger.debug("neg Example size before shrinking: " + negatives.size()); + negatives = SetManipulation.fuzzyShrink(negatives,neglimit); + logger.debug("neg Example size after shrinking: " + negatives.size()); + return negatives; + } + + // CHECK namespace + public void makeNegativeExamplesFromRelatedInstances(SortedSet<String> instances, + String namespace) { + logger.debug("making examples from related instances"); + for (String oneInstance : instances) { + makeNegativeExamplesFromRelatedInstances(oneInstance, namespace); + } + logger.debug("|-negExample size from related: " + fromRelated.size()); + } + + private void makeNegativeExamplesFromRelatedInstances(String oneInstance, String objectnamespace) { + // SortedSet<String> result = new TreeSet<String>(); + + String SPARQLquery = "SELECT * WHERE { \n" + "<" + oneInstance + "> " + "?p ?object. \n" + + "FILTER (REGEX(str(?object), '" + objectnamespace + "')).\n" + "}"; + + this.fromRelated.addAll(sparqltasks.queryAsSet(SPARQLquery, "object")); + this.fromRelated.removeAll(this.fullPositiveSet); + + } + + // QUALITY: weird function, best removed + public void dbpediaMakeNegativeExamplesFromRelatedInstances(String subject) { + // SortedSet<String> result = new TreeSet<String>(); + + String SPARQLquery = "SELECT * WHERE { \n" + "<" + subject + "> " + "?p ?o. \n" + + "FILTER (REGEX(str(?o), 'http://dbpedia.org/resource/')).\n" + + "FILTER (!REGEX(str(?p), 'http://www.w3.org/2004/02/skos'))\n" + "}"; + + this.fromRelated.addAll(sparqltasks.queryAsSet(SPARQLquery, "o")); + + } + + public void makeNegativeExamplesFromParallelClasses(SortedSet<String> positiveSet, int resultLimit){ + makeNegativeExamplesFromClassesOfInstances(positiveSet, resultLimit); + } + + public void makeNegativeExamplesFromClassesOfInstances(SortedSet<String> positiveSet, + int resultLimit) { + logger.debug("making neg Examples from parallel classes"); + SortedSet<String> classes = new TreeSet<String>(); + // superClasses.add(concept.replace("\"", "")); + // logger.debug("before"+superClasses); + // superClasses = dbpediaGetSuperClasses( superClasses, 4); + // logger.debug("getting negExamples from "+superClasses.size()+" + // superclasses"); + + for (String instance : positiveSet) { + classes.addAll(sparqltasks.getClassesForInstance(instance, resultLimit)); + } + logger.debug("getting negExamples from " + classes.size() + " parallel classes"); + for (String oneClass : classes) { + logger.debug(oneClass); + // rsc = new + // JenaResultSetConvenience(queryConcept("\""+oneClass+"\"",limit)); + this.fromParallelClasses.addAll(sparqltasks.retrieveInstancesForConcept("\"" + oneClass + + "\"", resultLimit)); + + } + + this.fromParallelClasses.removeAll(this.fullPositiveSet); + logger.debug("|-neg Example size from parallelclass: " + fromParallelClasses.size()); + + } + + public void makeNegativeExamplesFromSuperClasses(String concept, int resultLimit) { + + concept = concept.replaceAll("\"", ""); + // superClasses.add(concept.replace("\"", "")); + // logger.debug("before"+superClasses); + SortedSet<String> superClasses = sparqltasks.getSuperClasses(concept, 4); + logger.debug("making neg Examples from " + superClasses.size() + " superclasses"); + + for (String oneSuperClass : superClasses) { + logger.debug(oneSuperClass); + this.fromSuperclasses.addAll(sparqltasks.retrieveInstancesForConcept("\"" + + oneSuperClass + "\"", resultLimit)); + + } + this.fromSuperclasses.removeAll(this.fullPositiveSet); + logger.debug("|-neg Example from superclass: " + fromSuperclasses.size()); + } + } Added: trunk/src/dl-learner/org/dllearner/utilities/examples/AutomaticPositiveExampleFinderSPARQL.java =================================================================== --- trunk/src/dl-learner/org/dllearner/utilities/examples/AutomaticPositiveExampleFinderSPARQL.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/utilities/examples/AutomaticPositiveExampleFinderSPARQL.java 2008-05-21 14:59:31 UTC (rev 912) @@ -0,0 +1,59 @@ +package org.dllearner.utilities.examples; + +import java.util.SortedSet; +import java.util.TreeSet; + +import org.apache.log4j.Logger; +import org.dllearner.core.ComponentManager; +import org.dllearner.kb.sparql.Cache; +import org.dllearner.kb.sparql.SparqlEndpoint; + +public class AutomaticPositiveExampleFinderSPARQL { + + //CHECK + private static Logger logger = Logger + .getLogger(ComponentManager.class); + + + private SPARQLTasks sparqltasks; + + private SortedSet<String> posExamples; + + public AutomaticPositiveExampleFinderSPARQL(SPARQLTasks st) { + super(); + + this.posExamples = new TreeSet<String>(); + this.sparqltasks = st; + } + + //QUALITY resultsize is not accounted for + public void makePositiveExamplesFromConcept(String conceptKBSyntax){ + logger.debug("making Positive Examples from Concept: "+conceptKBSyntax); + this.posExamples = sparqltasks.retrieveInstancesForConcept(conceptKBSyntax, 0); + logger.debug(" pos Example size: "+posExamples.size()); + } + + + //QUALITY resultsize is not accounted for + public void makePositiveExamplesFromRoleAndObject(String role, String object){ + logger.debug("making Positive Examples from role: "+role+" and object: "+object); + this.posExamples = sparqltasks.retrieveDISTINCTSubjectsForRoleAndObject(role, object, 0); + logger.debug(" pos Example size: "+posExamples.size()); + } + + //QUALITY resultsize is not accounted for + public void makePositiveExamplesFromSKOSConcept(String SKOSConcept){ + logger.debug("making Positive Examples from SKOSConcept: "+SKOSConcept); + this.posExamples = sparqltasks.retrieveInstancesForSKOSConcept(SKOSConcept, 0); + logger.debug(" pos Example size: "+posExamples.size()); + } + + public SortedSet<String> getPosExamples() { + return posExamples; + } + + + + + +} Added: trunk/src/dl-learner/org/dllearner/utilities/examples/SPARQLTasks.java =================================================================== --- trunk/src/dl-learner/org/dllearner/utilities/examples/SPARQLTasks.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/utilities/examples/SPARQLTasks.java 2008-05-21 14:59:31 UTC (rev 912) @@ -0,0 +1,216 @@ +package org.dllearner.utilities.examples; + +import java.util.List; +import java.util.SortedSet; +import java.util.TreeSet; + +import org.dllearner.kb.sparql.Cache; +import org.dllearner.kb.sparql.SparqlEndpoint; +import org.dllearner.kb.sparql.SparqlQuery; +import org.dllearner.kb.sparql.SparqlQueryDescriptionConvertVisitor; + +import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.query.ResultSetFormatter; +import com.hp.hpl.jena.sparql.core.ResultBinding; + +public class SPARQLTasks { + + private Cache c; + private SparqlEndpoint se; + + public SPARQLTasks(Cache c, SparqlEndpoint se) { + super(); + this.c = c; + this.se = se; + } + + public SPARQLTasks( SparqlEndpoint se) { + super(); + this.c = null; + this.se = se; + } + + + + + /** + * QUALITY: doesn't seem optimal, check! + * get all superclasses up to a certain depth + * 1 means direct superclasses + * depth + * @param superClasses + * @param depth + * @return + */ + public SortedSet<String> getSuperClasses(String oneClass, int depth) { + SortedSet<String> superClasses = new TreeSet<String>(); + superClasses.add(oneClass); + SortedSet<String> ret = new TreeSet<String>(); + SortedSet<String> tmpset = new TreeSet<String>(); + //ret.addAll(superClasses); + //logger.debug(superClasses); + + + String SPARQLquery = ""; + for (; depth != 0 ; depth--) { + for (String oneSuperClass : superClasses) { + ... [truncated message content] |