From: <ku...@us...> - 2008-05-16 07:05:12
|
Revision: 872 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=872&view=rev Author: kurzum Date: 2008-05-16 00:05:09 -0700 (Fri, 16 May 2008) Log Message: ----------- Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedROLComponent.java trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedROLearner.java trunk/src/dl-learner/org/dllearner/core/LearningAlgorithm.java trunk/src/dl-learner/org/dllearner/kb/sparql/Cache.java trunk/src/dl-learner/org/dllearner/kb/sparql/ExtractionAlgorithm.java trunk/src/dl-learner/org/dllearner/kb/sparql/Manager.java trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlQuery.java trunk/src/dl-learner/org/dllearner/kb/sparql/TypedSparqlQuery.java trunk/src/dl-learner/org/dllearner/utilities/AutomaticExampleFinderSPARQL.java trunk/src/dl-learner/org/dllearner/utilities/LearnSparql.java Added Paths: ----------- trunk/src/dl-learner/org/dllearner/scripts/SKOS7030.java trunk/src/dl-learner/org/dllearner/utilities/AutomaticExampleFinderSKOSSPARQL.java Modified: trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedROLComponent.java =================================================================== --- trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedROLComponent.java 2008-05-16 01:23:08 UTC (rev 871) +++ trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedROLComponent.java 2008-05-16 07:05:09 UTC (rev 872) @@ -25,6 +25,7 @@ import java.util.LinkedList; import java.util.List; import java.util.Set; +import java.util.SortedSet; import org.apache.log4j.Level; import org.apache.log4j.Logger; @@ -383,6 +384,12 @@ public synchronized List<Description> getBestSolutions(int nrOfSolutions) { return algorithm.getBestSolutions(nrOfSolutions); } + + //HACK + @Override + public SortedSet<String> getBestSolutionsAsKBSyntax(int nrOfSolutions){ + return this.algorithm.getBestSolutionsAsKBSyntax(nrOfSolutions); + } @Override public void stop() { Modified: trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedROLearner.java =================================================================== --- trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedROLearner.java 2008-05-16 01:23:08 UTC (rev 871) +++ trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedROLearner.java 2008-05-16 07:05:09 UTC (rev 872) @@ -48,7 +48,6 @@ import org.dllearner.utilities.ConceptTransformation; import org.dllearner.utilities.Files; import org.dllearner.utilities.Helper; -import org.dllearner.utilities.SimpleClock; /** * Implements the example based refinement operator learning @@ -1061,6 +1060,7 @@ List<Description> best = new LinkedList<Description>(); int i=0; for(ExampleBasedNode n : candidatesStable.descendingSet()) { + best.add(n.getConcept()); if(i==nrOfSolutions) return best; @@ -1069,7 +1069,27 @@ return best; } + //HACK + public SortedSet<String> getBestSolutionsAsKBSyntax(int nrOfSolutions){ + SortedSet<String> result = new TreeSet<String>(); + for (Description d : solutions) { + result.add(d.toKBSyntaxString()); + //if(result.size()==nrOfSolutions)break; + } + /*if(result.size()<nrOfSolutions){ + + for(ExampleBasedNode n : candidatesStable.descendingSet()) { + + result.add(n.getConcept().toKBSyntaxString()); + if(result.size()>=nrOfSolutions) + return result; + + } + }*/ + return result; + } + public void printBestSolutions(int nrOfSolutions, boolean showOrderedSolutions){ if(!logger.isTraceEnabled()) return; Modified: trunk/src/dl-learner/org/dllearner/core/LearningAlgorithm.java =================================================================== --- trunk/src/dl-learner/org/dllearner/core/LearningAlgorithm.java 2008-05-16 01:23:08 UTC (rev 871) +++ trunk/src/dl-learner/org/dllearner/core/LearningAlgorithm.java 2008-05-16 07:05:09 UTC (rev 872) @@ -22,6 +22,7 @@ import java.util.Collection; import java.util.LinkedList; import java.util.List; +import java.util.SortedSet; import org.dllearner.core.owl.Description; @@ -63,6 +64,11 @@ return single; } + //HACK + public SortedSet<String> getBestSolutionsAsKBSyntax(int nrOfSolutions){ + return null; + } + /** * Returns all learning problems supported by this component. */ Modified: trunk/src/dl-learner/org/dllearner/kb/sparql/Cache.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/sparql/Cache.java 2008-05-16 01:23:08 UTC (rev 871) +++ trunk/src/dl-learner/org/dllearner/kb/sparql/Cache.java 2008-05-16 07:05:09 UTC (rev 872) @@ -31,6 +31,7 @@ import java.util.LinkedList; import org.apache.log4j.Logger; +import org.dllearner.core.KnowledgeSource; import org.dllearner.utilities.SimpleClock; import org.dllearner.utilities.Statistics; @@ -59,8 +60,9 @@ */ public class Cache implements Serializable { - private static Logger logger = Logger.getLogger(Cache.class); + private static Logger logger = Logger.getLogger(KnowledgeSource.class); + private static final long serialVersionUID = 843308736471742205L; // maps hash of a SPARQL queries to JSON representation Modified: trunk/src/dl-learner/org/dllearner/kb/sparql/ExtractionAlgorithm.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/sparql/ExtractionAlgorithm.java 2008-05-16 01:23:08 UTC (rev 871) +++ trunk/src/dl-learner/org/dllearner/kb/sparql/ExtractionAlgorithm.java 2008-05-16 07:05:09 UTC (rev 872) @@ -24,6 +24,7 @@ import java.util.Vector; import org.apache.log4j.Logger; +import org.dllearner.core.KnowledgeSource; import org.dllearner.kb.sparql.configuration.Configuration; import org.dllearner.kb.sparql.datastructure.ClassNode; import org.dllearner.kb.sparql.datastructure.InstanceNode; @@ -42,7 +43,7 @@ // private boolean getAllSuperClasses = true; // private boolean closeAfterRecursion = true; private static Logger logger = Logger - .getLogger(ExtractionAlgorithm.class); + .getLogger(KnowledgeSource.class); public ExtractionAlgorithm(Configuration Configuration) { this.configuration = Configuration; Modified: trunk/src/dl-learner/org/dllearner/kb/sparql/Manager.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/sparql/Manager.java 2008-05-16 01:23:08 UTC (rev 871) +++ trunk/src/dl-learner/org/dllearner/kb/sparql/Manager.java 2008-05-16 07:05:09 UTC (rev 872) @@ -24,6 +24,8 @@ import java.util.SortedSet; import java.util.TreeSet; +import org.apache.log4j.Logger; +import org.dllearner.core.KnowledgeSource; import org.dllearner.kb.sparql.configuration.Configuration; import org.dllearner.kb.sparql.configuration.SparqlEndpoint; import org.dllearner.kb.sparql.configuration.SparqlQueryType; @@ -42,7 +44,10 @@ private TypedSparqlQuery typedSparqlQuery; private ExtractionAlgorithm extractionAlgorithm; + private static Logger logger = Logger + .getLogger(KnowledgeSource.class); + public void useConfiguration(SparqlQueryType SparqlQueryType, SparqlEndpoint SparqlEndpoint, Manipulator manipulator, int recursiondepth, boolean getAllSuperClasses, @@ -76,8 +81,10 @@ // System.out.println(ExtractionAlgorithm.getFirstNode(uri)); System.out.println("Start extracting"); SortedSet<String> ret = new TreeSet<String>(); - + int progress=0; for (String one : instances) { + progress++; + logger.info("Progress: "+progress+" of "+instances.size()+" finished"); try { Node n = extractionAlgorithm.expandNode(new URI(one), typedSparqlQuery); Modified: trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlQuery.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlQuery.java 2008-05-16 01:23:08 UTC (rev 871) +++ trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlQuery.java 2008-05-16 07:05:09 UTC (rev 872) @@ -24,8 +24,8 @@ import java.nio.charset.Charset; import org.apache.log4j.Logger; +import org.dllearner.core.KnowledgeSource; import org.dllearner.kb.sparql.configuration.SparqlEndpoint; -import org.dllearner.utilities.SimpleClock; import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.query.ResultSetFactory; @@ -42,7 +42,7 @@ */ public class SparqlQuery { - private static Logger logger = Logger.getLogger(SparqlKnowledgeSource.class); + private static Logger logger = Logger.getLogger(KnowledgeSource.class); public String extraDebugInfo = ""; private boolean isRunning = false; Modified: trunk/src/dl-learner/org/dllearner/kb/sparql/TypedSparqlQuery.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/sparql/TypedSparqlQuery.java 2008-05-16 01:23:08 UTC (rev 871) +++ trunk/src/dl-learner/org/dllearner/kb/sparql/TypedSparqlQuery.java 2008-05-16 07:05:09 UTC (rev 872) @@ -24,6 +24,8 @@ import java.util.List; import java.util.Set; +import org.apache.log4j.Logger; +import org.dllearner.core.KnowledgeSource; import org.dllearner.kb.sparql.configuration.Configuration; import org.dllearner.utilities.SimpleClock; import org.dllearner.utilities.StringTuple; @@ -39,6 +41,10 @@ * */ public class TypedSparqlQuery implements TypedSparqlQueryInterface { + + private static Logger logger = Logger.getLogger(KnowledgeSource.class); + + boolean print_flag = false; protected Configuration configuration; private SparqlQueryMaker sparqlQueryMaker; Added: trunk/src/dl-learner/org/dllearner/scripts/SKOS7030.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/SKOS7030.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/scripts/SKOS7030.java 2008-05-16 07:05:09 UTC (rev 872) @@ -0,0 +1,221 @@ +package org.dllearner.scripts; + +import java.util.SortedSet; +import java.util.TreeSet; + +import org.apache.log4j.ConsoleAppender; +import org.apache.log4j.FileAppender; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.apache.log4j.SimpleLayout; +import org.dllearner.core.KnowledgeSource; +import org.dllearner.kb.sparql.Cache; +import org.dllearner.kb.sparql.SparqlQuery; +import org.dllearner.kb.sparql.configuration.SparqlEndpoint; +import org.dllearner.utilities.AutomaticExampleFinderSKOSSPARQL; +import org.dllearner.utilities.JenaResultSetConvenience; +import org.dllearner.utilities.LearnSparql; +import org.dllearner.utilities.SetManipulation; +import org.dllearner.utilities.SimpleClock; + +import com.hp.hpl.jena.query.ResultSet; + +public class SKOS7030 { + + static Cache c; + static SparqlEndpoint se; + private static Logger logger = Logger.getRootLogger(); + + static String standardSettings=""; + static String algorithm="refexamples"; + static String standardSettingsRefexamples = + "refexamples.minExecutionTimeInSeconds = 30;\n" + + "refexamples.maxExecutionTimeInSeconds = 30;\n" + + "//refexamples.guaranteeXgoodDescriptions = 10;\n" + + "refexamples.logLevel=\"TRACE\";\n" + + "refexamples.noisePercentage = 0.10;\n" + + "refexamples.writeSearchTree = false;\n" + + "refexamples.searchTreeFile = \"searchTree.txt\";\n" + + "refexamples.replaceSearchTree = true;\n\n" ; + + static String standardSettingsRefinement = + "refinement.minExecutionTimeInSeconds = 30;\n" + + "refinement.maxExecutionTimeInSeconds = 30;\n" + + "//refinement.guaranteeXgoodDescriptions = 10;\n" + + "refinement.logLevel=\"TRACE\";\n" + + "refinement.writeSearchTree = false;\n" + + "refinement.searchTreeFile = \"searchTree.txt\";\n" + + "refinement.replaceSearchTree = true;\n\n" ; + + + + static String standardDBpedia="" + + "sparql.recursionDepth = 1;\n" + + "sparql.predefinedFilter = \"YAGO\";\n" + + "sparql.predefinedEndpoint = \"DBPEDIA\";\n"; + //"sparql.logLevel = \"INFO\";\n"; + + + //vars + static boolean useRelated = false; + static boolean useSuperClasses = false; + static boolean useParallelClasses = true; + static int poslimit = 10; + static int neglimit = 20; + + /** + * @param args + */ + public static void main(String[] args) { + init(); + //logger.setLevel(Level.TRACE); + Logger.getLogger(KnowledgeSource.class).setLevel(Level.INFO); + //System.out.println(Logger.getLogger(SparqlQuery.class).getLevel()); + SimpleClock sc=new SimpleClock(); + + standardSettings=standardSettingsRefexamples+standardDBpedia; + //standardSettings=standardSettingsRefinement+standardDBpedia; + + DBpediaSKOS(); + //algorithm="refinement"; + //roles(); + + /*System.out.println(Level.DEBUG.getClass()); + System.out.println(Level.toLevel("INFO")); + System.out.println(Level.INFO);*/ + //System.exit(0); + + + + sc.printAndSet("Finished"); + + } + + + + static void DBpediaSKOS(){ + se = SparqlEndpoint.EndpointLOCALDBpedia(); + String url = "http://dbpedia.openlinksw.com:8890/sparql"; + url = "http://139.18.2.37:8890/sparql"; + + SortedSet<String> concepts = new TreeSet<String>(); + + concepts.add("http://dbpedia.org/resource/Category:Prime_Ministers_of_the_United_Kingdom"); + //concepts.add("http://dbpedia.org/resource/Category:Grammy_Award_winners"); + //concepts.add("EXISTS \"http://dbpedia.org/property/grammyawards\".TOP"); + + SortedSet<String> posExamples = new TreeSet<String>(); + SortedSet<String> negExamples = new TreeSet<String>(); + + //HashMap<String, ResultSet> result = new HashMap<String, ResultSet>(); + //HashMap<String, String> result2 = new HashMap<String, String>(); + //System.out.println(concepts.first()); + //logger.setLevel(Level.TRACE); + String concept=concepts.first(); + + AutomaticExampleFinderSKOSSPARQL ae= new AutomaticExampleFinderSKOSSPARQL( se); + useRelated = false; + useParallelClasses = true; + int recursiondepth=1; + boolean closeAfterRecursion=true; + boolean randomizeCache=false; + ae.initDBpediaSKOS(concept, 0.1, useRelated, useParallelClasses); + posExamples = ae.getPosExamples(); + negExamples = ae.getNegExamples(); + SortedSet<String> totalSKOSset= ae.totalSKOSset; + SortedSet<String> rest= ae.rest; + + + + LearnSparql ls = new LearnSparql(); + + //igno.add(oneConcept.replaceAll("\"", "")); + + SortedSet<String> conceptresults= ls.learnDBpediaSKOS(posExamples, negExamples, url,new TreeSet<String>(),recursiondepth, closeAfterRecursion,randomizeCache); + System.out.println(conceptresults); + for (String string : conceptresults) { + SortedSet<String> instances = ae.queryConceptAsStringSet(string, 0); + if(instances.size()>0)System.out.println("size "+instances.size()); + if(instances.size()<=15 && instances.size()>0) System.out.println(instances); + } + + + + //System.out.println("AAAAAAAA"); + //System.exit(0); + //"relearned concept: "; + //cf.writeSPARQL(confname, posExamples, negExamples, url, new TreeSet<String>(),standardSettings,algorithm); + // + + + //Statistics.print(); + } + + + /*************************************************************************** + * *********************OLDCODE String + * conj="(\"http://dbpedia.org/class/yago/Person100007846\" AND + * \"http://dbpedia.org/class/yago/Head110162991\")"; + * + * + * concepts.add("EXISTS \"http://dbpedia.org/property/disambiguates\".TOP"); + * concepts.add("EXISTS + * \"http://dbpedia.org/property/successor\".\"http://dbpedia.org/class/yago/Person100007846\""); + * concepts.add("EXISTS \"http://dbpedia.org/property/successor\"."+conj); + * //concepts.add("ALL \"http://dbpedia.org/property/disambiguates\".TOP"); + * //concepts.add("ALL + * \"http://dbpedia.org/property/successor\".\"http://dbpedia.org/class/yago/Person100007846\""); + * concepts.add("\"http://dbpedia.org/class/yago/Person100007846\""); + * concepts.add(conj); + * concepts.add("(\"http://dbpedia.org/class/yago/Person100007846\" OR + * \"http://dbpedia.org/class/yago/Head110162991\")"); + * + * //concepts.add("NOT \"http://dbpedia.org/class/yago/Person100007846\""); + * + * for (String kbsyntax : concepts) { + * result.put(kbsyntax,queryConcept(kbsyntax)); } + * System.out.println("************************"); for (String string : + * result.keySet()) { System.out.println("KBSyntayString: "+string); + * System.out.println("Query:\n"+result.get(string).hasNext()); + * System.out.println("************************"); } + **************************************************************************/ + + + + + + public static void init() { + + SimpleLayout layout = new SimpleLayout(); + // create logger (a simple logger which outputs + // its messages to the console) + FileAppender fileAppender =null; ; + try{ + fileAppender = new FileAppender(layout,"the_log.txt",false); + }catch (Exception e) {e.printStackTrace();} + + ConsoleAppender consoleAppender = new ConsoleAppender(layout); + logger.removeAllAppenders(); + logger.addAppender(consoleAppender); + logger.addAppender(fileAppender); + logger.setLevel(Level.DEBUG); + c = new Cache("cachetemp"); + + + } + + public static SortedSet<String> selectDBpediaConcepts(int number){ + String query = "SELECT DISTINCT ?concept WHERE { \n" + + "[] a ?concept .FILTER (regex(str(?concept),'yago'))" + + " \n} \n"; //LIMIT "+number+" + + + String JSON = (c.executeSparqlQuery(new SparqlQuery(query, se))); + ResultSet rs =SparqlQuery.JSONtoResultSet(JSON); + JenaResultSetConvenience rsc = new JenaResultSetConvenience(rs); + return SetManipulation.fuzzyShrink(rsc.getStringListForVariable("concept"),number); + } + + + +} Added: trunk/src/dl-learner/org/dllearner/utilities/AutomaticExampleFinderSKOSSPARQL.java =================================================================== --- trunk/src/dl-learner/org/dllearner/utilities/AutomaticExampleFinderSKOSSPARQL.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/utilities/AutomaticExampleFinderSKOSSPARQL.java 2008-05-16 07:05:09 UTC (rev 872) @@ -0,0 +1,408 @@ +package org.dllearner.utilities; + +import java.util.SortedSet; +import java.util.TreeSet; + +import org.apache.log4j.Logger; +import org.dllearner.core.ComponentManager; +import org.dllearner.kb.sparql.Cache; +import org.dllearner.kb.sparql.SparqlQuery; +import org.dllearner.kb.sparql.SparqlQueryDescriptionConvertVisitor; +import org.dllearner.kb.sparql.configuration.SparqlEndpoint; + +import com.hp.hpl.jena.query.ResultSet; + +public class AutomaticExampleFinderSKOSSPARQL { + + private static Logger logger = Logger + .getLogger(ComponentManager.class); + + private Cache c; + private SparqlEndpoint se; + private SortedSet<String> posExamples= new TreeSet<String>();; + private SortedSet<String> negExamples= new TreeSet<String>();; + public SortedSet<String> totalSKOSset= new TreeSet<String>();; + public SortedSet<String> rest= new TreeSet<String>();; + private int limit=2000; + + + public AutomaticExampleFinderSKOSSPARQL(SparqlEndpoint se){ + this.c=new Cache("cachetemp"); + this.se=se; + posExamples = new TreeSet<String>(); + negExamples = new TreeSet<String>(); + } + + public void initDBpediaSKOS(String concept, double percent, boolean useRelated,boolean useParallelClasses) { + dbpediaMakePositiveExamplesFromConcept( concept); + SortedSet<String> keepForClean = new TreeSet<String>(); + keepForClean.addAll(this.posExamples); + totalSKOSset.addAll(this.posExamples); + rest.addAll(totalSKOSset); + int poslimit=(int)Math.round(percent*totalSKOSset.size()); + int neglimit=2*poslimit; + while (this.posExamples.size()>poslimit) { + this.posExamples.remove(posExamples.last()); + } + //this.posExamples = SetManipulation.fuzzyShrink(this.posExamples, poslimit); + + rest.removeAll(this.posExamples); + + logger.debug("pos Example size: "+posExamples.size()); + logger.debug("totalSKOSset: "+totalSKOSset.size()); + logger.debug("rest: "+rest.size()); + + if(useRelated) { + dbpediaMakeNegativeExamplesFromRelatedInstances(this.posExamples); + } + + if(useParallelClasses) { + int limit = this.posExamples.size(); + makeNegativeExamplesFromClassesOfInstances(limit); + } + //clean + negExamples.removeAll(keepForClean); + logger.debug("neg Example size after cleaning: "+negExamples.size()); + this.negExamples = SetManipulation.fuzzyShrink(negExamples, neglimit); + logger.debug("pos Example size after shrinking: "+posExamples.size()); + logger.debug("neg Example size after shrinking: "+negExamples.size()); + logger.debug("Finished examples for concept: "+concept); + } + +/* public void init(String concept, String namespace, boolean useRelated, boolean useSuperclasses,boolean useParallelClasses, int poslimit, int neglimit) { + makePositiveExamplesFromConcept( concept); + SortedSet<String> keepForClean = new TreeSet<String>(); + keepForClean.addAll(this.posExamples); + this.posExamples = SetManipulation.fuzzyShrink(this.posExamples, poslimit); + logger.trace("shrinking: pos Example size: "+posExamples.size()); + + if(useRelated) { + makeNegativeExamplesFromRelatedInstances(this.posExamples,namespace); + } + if(useSuperclasses) { + makeNegativeExamplesFromSuperClasses(concept); + } + if(useParallelClasses) { + makeNegativeExamplesFromClassesOfInstances(); + } + //clean + negExamples.removeAll(keepForClean); + logger.debug("neg Example size after cleaning: "+negExamples.size()); + this.negExamples = SetManipulation.fuzzyShrink(negExamples, neglimit); + logger.debug("pos Example size after shrinking: "+posExamples.size()); + logger.debug("neg Example size after shrinking: "+negExamples.size()); + logger.debug("Finished examples for concept: "+concept); + }*/ + + + + public SortedSet<String> dbpediaGetPosOnly(String concept, int limit){ + dbpediaMakePositiveExamplesFromConcept( concept); + return SetManipulation.fuzzyShrink(this.posExamples, limit); + } + + public SortedSet<String> getPosOnly(String concept, int limit){ + makePositiveExamplesFromConcept( concept); + return SetManipulation.fuzzyShrink(this.posExamples, limit); + } + + private void dbpediaMakePositiveExamplesFromConcept(String concept){ + logger.debug("making Positive Examples from Concept: "+concept); + if(concept.contains("http://dbpedia.org/resource/Category:")) { + this.posExamples = new JenaResultSetConvenience(dbpediaQuerySKOSConcept(concept,limit)) + .getStringListForVariable("subject"); + }else { + this.posExamples = new JenaResultSetConvenience(queryConcept(concept,limit)) + .getStringListForVariable("subject"); + } + logger.debug("pos Example size: "+posExamples.size()); + } + + private void makePositiveExamplesFromConcept(String concept){ + logger.debug("making Positive Examples from Concept: "+concept); + this.posExamples = new JenaResultSetConvenience(queryConcept(concept,0)) + .getStringListForVariable("subject"); + logger.debug(" pos Example size: "+posExamples.size()); + } + + + + /*private void makePositiveExamplesFromConcept(String concept){ + logger.debug("making Positive Examples from Concept: "+concept); + if(concept.contains("http://dbpedia.org/resource/Category:")) { + this.posExamples = new JenaResultSetConvenience(querySKOSConcept(concept,0)) + .getStringListForVariable("subject"); + }else { + this.posExamples = new JenaResultSetConvenience(queryConcept(concept,0)) + .getStringListForVariable("subject"); + } + logger.debug(" pos Example size: "+posExamples.size()); + }*/ + + + + + + private void dbpediaMakeNegativeExamplesFromRelatedInstances(SortedSet<String> subject) { + logger.debug("making examples from related instances"); + for (String string : subject) { + dbpediaMakeNegativeExamplesFromRelatedInstances(string); + } + logger.debug(" negExample size: "+negExamples.size()); + } + + private void makeNegativeExamplesFromRelatedInstances(SortedSet<String> subject, String namespace) { + logger.debug("making examples from related instances"); + for (String string : subject) { + makeNegativeExamplesFromRelatedInstances(string,namespace); + } + logger.debug(" negExample size: "+negExamples.size()); + } + + + /** + * + * @param subject + * @return + */ + private void dbpediaMakeNegativeExamplesFromRelatedInstances(String subject) { + // SortedSet<String> result = new TreeSet<String>(); + + String query = "SELECT * WHERE { \n" + "<" + subject + "> " + "?p ?o. \n" + + "FILTER (REGEX(str(?o), 'http://dbpedia.org/resource/')).\n" + + "FILTER (!REGEX(str(?p), 'http://www.w3.org/2004/02/skos'))\n" + + "}"; + + String JSON = (c.executeSparqlQuery(new SparqlQuery(query, se))); + ResultSet rs =SparqlQuery.JSONtoResultSet(JSON); + JenaResultSetConvenience rsc = new JenaResultSetConvenience(rs); + this.negExamples.addAll(rsc.getStringListForVariable("o")); + + + } + + private void makeNegativeExamplesFromRelatedInstances(String subject, String namespace) { + // SortedSet<String> result = new TreeSet<String>(); + + String query = "SELECT * WHERE { \n" + "<" + subject + "> " + "?p ?o. \n" + + "FILTER (REGEX(str(?o), '"+namespace+"')).\n" + + "}"; + + String JSON = (c.executeSparqlQuery(new SparqlQuery(query, se))); + ResultSet rs =SparqlQuery.JSONtoResultSet(JSON); + JenaResultSetConvenience rsc = new JenaResultSetConvenience(rs); + this.negExamples.addAll(rsc.getStringListForVariable("o")); + + + } + + + private void makeNegativeExamplesFromSuperClasses(String concept) { + + SortedSet<String> superClasses = new TreeSet<String>(); + superClasses.add(concept.replace("\"", "")); + //logger.debug("before"+superClasses); + superClasses = getSuperClasses( superClasses, 4); + logger.debug("making neg Examples from "+superClasses.size()+" superclasses"); + JenaResultSetConvenience rsc; + for (String oneSuperClass : superClasses) { + logger.debug(oneSuperClass); + rsc = new JenaResultSetConvenience(queryConcept("\""+oneSuperClass+"\"", limit)); + this.negExamples.addAll(rsc.getStringListForVariable("subject")); + } + logger.debug(" neg Example size: "+negExamples.size()); + } + + + + private void makeNegativeExamplesFromClassesOfInstances(int limit) { + logger.debug("making neg Examples from parallel classes"); + SortedSet<String> classes = new TreeSet<String>(); + //superClasses.add(concept.replace("\"", "")); + //logger.debug("before"+superClasses); + //superClasses = dbpediaGetSuperClasses( superClasses, 4); + //logger.debug("getting negExamples from "+superClasses.size()+" superclasses"); + JenaResultSetConvenience rsc; + ResultSet rs=null; + for (String instance : posExamples) { + //System.out.println(instance); + rs = getClassesForInstance(instance); + //System.out.println(ResultSetFormatter.asXMLString(rs)); + rsc = new JenaResultSetConvenience(rs); + classes.addAll(rsc.getStringListForVariable("subject")); + //System.out.println(classes); + } + logger.debug("getting negExamples from "+classes.size()+" parallel classes"); + for (String oneClass : classes) { + logger.debug(oneClass); + rsc = new JenaResultSetConvenience(queryConcept("\""+oneClass+"\"",limit)); + this.negExamples.addAll(rsc.getStringListForVariable("subject")); + } + logger.debug("neg Example size: "+negExamples.size()); + + } + + private SortedSet<String> getSuperClasses(SortedSet<String> superClasses, int depth) { + SortedSet<String> ret = new TreeSet<String>(); + SortedSet<String> tmpset = new TreeSet<String>(); + ret.addAll(superClasses); + //logger.debug(superClasses); + JenaResultSetConvenience rsc; + + String query = ""; + for (; depth != 0 ; depth--) { + for (String oneSuperClass : superClasses) { + + //tmp = oneSuperClass.replace("\"", ""); + query = "SELECT * WHERE { \n" + "<" + oneSuperClass + "> " + + "<http://www.w3.org/2000/01/rdf-schema#subClassOf> ?superclass. \n" + + "}"; + + String JSON = (c.executeSparqlQuery(new SparqlQuery(query, se))); + ResultSet rs =SparqlQuery.JSONtoResultSet(JSON); + rsc = new JenaResultSetConvenience(rs); + tmpset.addAll(rsc.getStringListForVariable("superclass")); + + } + ret.addAll(tmpset); + //logger.debug(ret); + superClasses.clear(); + superClasses.addAll(tmpset); + tmpset.clear(); + } + //logger.debug(concept); + //logger.debug(query); + return ret; + } + + + + public ResultSet queryConcept(String concept,int limit) { + ResultSet rs = null; + try { + String query = SparqlQueryDescriptionConvertVisitor + .getSparqlQuery(concept,limit); + + SparqlQuery sq = new SparqlQuery(query, se); + String JSON = c.executeSparqlQuery(sq); + //System.out.println("JSON:\n"+JSON); + rs = SparqlQuery.JSONtoResultSet(JSON); + + } catch (Exception e) { + e.printStackTrace(); + } + + return rs; + } + + public SortedSet<String> queryConceptAsStringSet(String concept,int limit) { + ResultSet rs = null; + try { + String query = SparqlQueryDescriptionConvertVisitor + .getSparqlQuery(concept,limit); + + SparqlQuery sq = new SparqlQuery(query, se); + String JSON = c.executeSparqlQuery(sq); + //System.out.println("JSON:\n"+JSON); + rs = SparqlQuery.JSONtoResultSet(JSON); + + } catch (Exception e) { + e.printStackTrace(); + } + return new JenaResultSetConvenience(rs).getStringListForVariable("subject"); + + } + + public ResultSet dbpediaQuerySKOSConcept(String SKOSconcept,int limit) { + if(limit==0)limit=99999; + // + ResultSet rs = null; + try { + + String query = "SELECT * WHERE { \n " + + "?subject " + + "<http://www.w3.org/2004/02/skos/core#subject> " + + "<" + SKOSconcept + "> \n" + + "} LIMIT "+limit; + SparqlQuery sq = new SparqlQuery(query, se); + String JSON = c.executeSparqlQuery(sq); + //System.out.println(JSON); + rs = SparqlQuery.JSONtoResultSet(JSON); + + } catch (Exception e) { + e.printStackTrace(); + } + + return rs; + } + + public ResultSet getClassesForInstance(String instance) { + ResultSet rs = null; + try { + + String query = "SELECT ?subject WHERE { \n " + + "<" + instance + ">"+ + " a " + + "?subject " + + "\n" + + "}"; + SparqlQuery sq = new SparqlQuery(query, se); + //System.out.println(query); + String JSON = c.executeSparqlQuery(sq); + //System.out.println(JSON); + rs = SparqlQuery.JSONtoResultSet(JSON); + + } catch (Exception e) { + e.printStackTrace(); + } + + return rs; + } + + public SortedSet<String> getPosExamples() { + return posExamples; + } + + public SortedSet<String> getNegExamples() { + return negExamples; + } + + + /** + * NOT WORKING + * @param description + */ + public void getSubClasses(String description) { + ResultSet rs = null; + try { + String query = SparqlQueryDescriptionConvertVisitor + .getSparqlSubclassQuery(description.replace("\"", "")); + + rs = new SparqlQuery(query, se).send(); + System.out.println(query); + //System.out.println(SparqlQuery.getAsXMLString(rs)); + System.out.println(rs.getResultVars()); + SortedSet<String> remainingClasses = new JenaResultSetConvenience(rs).getStringListForVariable("subject"); + SortedSet<String> alreadyQueried = new TreeSet<String>(); + alreadyQueried.add(description); + while (remainingClasses.size()!=0){ + String tmp = remainingClasses.first(); + remainingClasses.remove(tmp); + query = SparqlQueryDescriptionConvertVisitor + .getSparqlSubclassQuery(tmp); + alreadyQueried.add(tmp); + rs = new SparqlQuery(query, se).send(); + remainingClasses.addAll(new JenaResultSetConvenience(rs).getStringListForVariable("subject")); + } + //System.out.println(JSON); + System.out.println(alreadyQueried); + + } catch (Exception e) { + e.printStackTrace(); + } + + } + + + +} Modified: trunk/src/dl-learner/org/dllearner/utilities/AutomaticExampleFinderSPARQL.java =================================================================== --- trunk/src/dl-learner/org/dllearner/utilities/AutomaticExampleFinderSPARQL.java 2008-05-16 01:23:08 UTC (rev 871) +++ trunk/src/dl-learner/org/dllearner/utilities/AutomaticExampleFinderSPARQL.java 2008-05-16 07:05:09 UTC (rev 872) @@ -21,6 +21,7 @@ private SparqlEndpoint se; private SortedSet<String> posExamples; private SortedSet<String> negExamples; + public SortedSet<String> totalSKOSset; private int limit=2000; Modified: trunk/src/dl-learner/org/dllearner/utilities/LearnSparql.java =================================================================== --- trunk/src/dl-learner/org/dllearner/utilities/LearnSparql.java 2008-05-16 01:23:08 UTC (rev 871) +++ trunk/src/dl-learner/org/dllearner/utilities/LearnSparql.java 2008-05-16 07:05:09 UTC (rev 872) @@ -110,6 +110,95 @@ //System.out.println( la.getBestSolution());; } + public SortedSet<String> learnDBpediaSKOS(SortedSet<String> posExamples,SortedSet<String> negExamples, + String uri, SortedSet<String> ignoredConcepts, int recursiondepth, + boolean closeAfterRecursion, boolean randomizeCache){ + + ComponentManager cm = ComponentManager.getInstance(); + LearningAlgorithm la = null; + ReasoningService rs = null; + LearningProblem lp = null; + SparqlKnowledgeSource ks =null; + try { + Set<KnowledgeSource> sources = new HashSet<KnowledgeSource>(); + + ks = cm.knowledgeSource(SparqlKnowledgeSource.class); + + SortedSet<String> instances = new TreeSet<String>(); + instances.addAll(posExamples); + instances.addAll(negExamples); + cm.applyConfigEntry(ks, "instances",instances); + cm.applyConfigEntry(ks, "url",uri); + cm.applyConfigEntry(ks, "recursionDepth",recursiondepth); + cm.applyConfigEntry(ks, "closeAfterRecursion",closeAfterRecursion); + cm.applyConfigEntry(ks, "predefinedFilter","YAGO"); + cm.applyConfigEntry(ks, "predefinedEndpoint","LOCALDBPEDIA"); + if(randomizeCache) + cm.applyConfigEntry(ks, "cacheDir","cache/"+System.currentTimeMillis()+""); + else {cm.applyConfigEntry(ks, "cacheDir","cacheSKOS");} + //cm.applyConfigEntry(ks, "format","KB"); + + sc.setTime(); + ks.init(); + Statistics.addTimeCollecting(sc.getTime()); + sources.add(ks); + //if (true)return; + //System.out.println(ks.getNTripleURL()); + // + + ReasonerComponent r = new FastInstanceChecker(sources); + //cm.applyConfigEntry(r,"useAllConstructor",false); + //cm.applyConfigEntry(r,"useExistsConstructor",true); + r.init(); + rs = new ReasoningService(r); + + lp = new PosNegDefinitionLP(rs); + //cm.applyConfigEntry(lp, "positiveExamples",toInd(posExamples)); + ((PosNegLP) lp).setPositiveExamples(SetManipulation.stringToInd(posExamples)); + ((PosNegLP) lp).setNegativeExamples(SetManipulation.stringToInd(negExamples)); + //cm.applyConfigEntry(lp, "negativeExamples",toInd(negExamples)); + lp.init(); + + la = cm.learningAlgorithm(ExampleBasedROLComponent.class, lp, rs); + + cm.applyConfigEntry(la,"useAllConstructor",false); + cm.applyConfigEntry(la,"useExistsConstructor",true); + cm.applyConfigEntry(la,"useCardinalityRestrictions",false); + cm.applyConfigEntry(la,"useNegation",false); + cm.applyConfigEntry(la,"minExecutionTimeInSeconds",0); + cm.applyConfigEntry(la,"maxExecutionTimeInSeconds",50); + cm.applyConfigEntry(la,"guaranteeXgoodDescriptions",15); + cm.applyConfigEntry(la,"writeSearchTree",true); + cm.applyConfigEntry(la,"searchTreeFile","log/SKOS.txt"); + cm.applyConfigEntry(la,"replaceSearchTree",true); + //cm.applyConfigEntry(la,"noisePercentage",0.15); + + + //cm.applyConfigEntry(la,"guaranteeXgoodDescriptions",999999); + cm.applyConfigEntry(la,"logLevel","TRACE"); + + //cm.applyConfigEntry(la,"quiet",false); + //System.out.println(ignoredConcepts.first());; + if(ignoredConcepts.size()>0) + cm.applyConfigEntry(la,"ignoredConcepts",ignoredConcepts); + la.init(); + + System.out.println("start learning"); + sc.setTime(); + la.start(); + Statistics.addTimeLearning(sc.getTime()); + return la.getBestSolutionsAsKBSyntax(15); + //if(sc.getTime()/1000 >= 20)System.out.println("XXXMAX time reached"); + + //System.out.println("best"+la(20)); + //((ExampleBasedROLComponent)la).printBestSolutions(10000); + + }catch (Exception e) {e.printStackTrace();} + return null; + //System.out.println( la.getBestSolution());; + } + + } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |