From: <ku...@us...> - 2008-05-06 18:16:31
|
Revision: 830 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=830&view=rev Author: kurzum Date: 2008-05-06 11:16:26 -0700 (Tue, 06 May 2008) Log Message: ----------- script for learning roles automatically, conf files are created in a folder examples/stest, which must exist Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/algorithms/refinement/ROLearner.java trunk/src/dl-learner/org/dllearner/kb/sparql/configuration/SparqlEndpoint.java trunk/src/dl-learner/org/dllearner/scripts/SPARQLMassLearning.java trunk/src/dl-learner/org/dllearner/test/SparqlEndpointTest.java trunk/src/dl-learner/org/dllearner/utilities/ConfWriter.java Added Paths: ----------- trunk/src/dl-learner/org/dllearner/utilities/AutomaticExampleFinderRolesSPARQL.java trunk/src/dl-learner/org/dllearner/utilities/AutomaticExampleFinderSPARQL.java Removed Paths: ------------- trunk/src/dl-learner/org/dllearner/utilities/AutomaticExampleFinderSPARQL.java Modified: trunk/src/dl-learner/org/dllearner/algorithms/refinement/ROLearner.java =================================================================== --- trunk/src/dl-learner/org/dllearner/algorithms/refinement/ROLearner.java 2008-05-06 15:02:50 UTC (rev 829) +++ trunk/src/dl-learner/org/dllearner/algorithms/refinement/ROLearner.java 2008-05-06 18:16:26 UTC (rev 830) @@ -567,9 +567,11 @@ if(show>=5){break;} show++; } + } logger.info(" horizontal expansion: " + minimumHorizontalExpansion + " to " + maximumHorizontalExpansion); logger.info(" size of candidate set: " + candidates.size()); + //logger.trace("test"); //logger.trace(solutions.size()); printBestSolutions(0); Modified: trunk/src/dl-learner/org/dllearner/kb/sparql/configuration/SparqlEndpoint.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/sparql/configuration/SparqlEndpoint.java 2008-05-06 15:02:50 UTC (rev 829) +++ trunk/src/dl-learner/org/dllearner/kb/sparql/configuration/SparqlEndpoint.java 2008-05-06 18:16:26 UTC (rev 830) @@ -291,7 +291,9 @@ } catch (Exception e) { e.printStackTrace(); } - return new SparqlEndpoint(u); + LinkedList<String> defaultGraphURIs=new LinkedList<String>(); + defaultGraphURIs.add("http://www.rdfabout.com/rdf/schema/census/"); + return new SparqlEndpoint(u, defaultGraphURIs, new LinkedList<String>()); } Modified: trunk/src/dl-learner/org/dllearner/scripts/SPARQLMassLearning.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/SPARQLMassLearning.java 2008-05-06 15:02:50 UTC (rev 829) +++ trunk/src/dl-learner/org/dllearner/scripts/SPARQLMassLearning.java 2008-05-06 18:16:26 UTC (rev 830) @@ -11,6 +11,7 @@ import org.dllearner.kb.sparql.Cache; import org.dllearner.kb.sparql.SparqlKnowledgeSource; import org.dllearner.kb.sparql.configuration.SparqlEndpoint; +import org.dllearner.utilities.AutomaticExampleFinderRolesSPARQL; import org.dllearner.utilities.AutomaticExampleFinderSPARQL; import org.dllearner.utilities.ConfWriter; import org.dllearner.utilities.SimpleClock; @@ -20,82 +21,168 @@ static Cache c; static SparqlEndpoint se; private static Logger logger = Logger.getRootLogger(); + + static String standardSettings=""; + static String algorithm="refexamples"; + static String standardSettingsRefexamples = + "refexamples.minExecutionTimeInSeconds = 30;\n" + + "refexamples.maxExecutionTimeInSeconds = 30;\n" + + "//refexamples.guaranteeXgoodDescriptions = 10;\n" + + "refexamples.logLevel=\"TRACE\";\n" + + "refexamples.noisePercentage = 0.10;\n" + + "refexamples.writeSearchTree = false;\n" + + "refexamples.searchTreeFile = \"searchTree.txt\";\n" + + "refexamples.replaceSearchTree = true;\n\n" ; + + static String standardSettingsRefinement = + "refinement.minExecutionTimeInSeconds = 30;\n" + + "refinement.maxExecutionTimeInSeconds = 30;\n" + + "//refinement.guaranteeXgoodDescriptions = 10;\n" + + "refinement.logLevel=\"TRACE\";\n" + + "refinement.writeSearchTree = false;\n" + + "refinement.searchTreeFile = \"searchTree.txt\";\n" + + "refinement.replaceSearchTree = true;\n\n" ; + + + + static String standardDBpedia="" + + "sparql.recursionDepth = 1;\n" + + "sparql.predefinedFilter = \"YAGO\";\n" + + "sparql.predefinedEndpoint = \"DBPEDIA\";\n"; + //"sparql.logLevel = \"INFO\";\n"; + + + //vars + static boolean useRelated = false; + static boolean useSuperClasses = false; + static boolean useParallelClasses = true; + static int poslimit = 10; + static int neglimit = 20; + /** * @param args */ public static void main(String[] args) { init(); + //logger.setLevel(Level.TRACE); + Logger.getLogger(SparqlKnowledgeSource.class).setLevel(Level.INFO); + //System.out.println(Logger.getLogger(SparqlQuery.class).getLevel()); + SimpleClock sc=new SimpleClock(); - //vars - boolean useRelated = false; - boolean useSuperClasses = false; - boolean useParallelClasses = true; - int poslimit = 10; - int neglimit = 20; + standardSettings=standardSettingsRefexamples+standardDBpedia; + standardSettings=standardSettingsRefinement+standardDBpedia; + //DBpedia(); + //algorithm="refinement"; + roles(); - - try { - //logger.setLevel(Level.TRACE); - Logger.getLogger(SparqlKnowledgeSource.class).setLevel(Level.INFO); - //System.out.println(Logger.getLogger(SparqlQuery.class).getLevel()); - - /*System.out.println(Level.DEBUG.getClass()); + /*System.out.println(Level.DEBUG.getClass()); System.out.println(Level.toLevel("INFO")); System.out.println(Level.INFO);*/ //System.exit(0); - SimpleClock sc=new SimpleClock(); - //concepts.add("(EXISTS \"monarch\".TOP AND EXISTS \"predecessor\".(\"Knight\" OR \"Secretary\"))"); - - SortedSet<String> concepts = new TreeSet<String>(); - concepts.add("(\"http://dbpedia.org/class/yago/HeadOfState110164747\" AND (\"http://dbpedia.org/class/yago/Negotiator110351874\" AND \"http://dbpedia.org/class/yago/Representative110522035\"))"); - //concepts.add("\"http://dbpedia.org/class/yago/Person100007846\""); - //concepts.add("\"http://dbpedia.org/class/yago/FieldMarshal110086821\""); - //concepts.add("http://dbpedia.org/resource/Category:Prime_Ministers_of_the_United_Kingdom"); - //concepts.add("http://dbpedia.org/resource/Category:Grammy_Award_winners"); - //concepts.add("EXISTS \"http://dbpedia.org/property/grammyawards\".TOP"); - SortedSet<String> posExamples = new TreeSet<String>(); - SortedSet<String> negExamples = new TreeSet<String>(); - String url = "http://dbpedia.openlinksw.com:8890/sparql"; - //HashMap<String, ResultSet> result = new HashMap<String, ResultSet>(); - //HashMap<String, String> result2 = new HashMap<String, String>(); - //System.out.println(concepts.first()); - //logger.setLevel(Level.TRACE); - AutomaticExampleFinderSPARQL ae= new AutomaticExampleFinderSPARQL( se); - //System.out.println(new JenaResultSetConvenience(ae.queryConcept(concepts.first(), 0)).getStringListForVariable("?subject") );; - //System.out.println(new JenaResultSetConvenience(ae.queryConcept(concepts.first(), 0)).getStringListForVariable("?subject").size() );; - //ae.getSubClasses(concepts.first()); - //System.exit(0); - - ae.init(concepts.first(), useRelated, useSuperClasses,useParallelClasses, poslimit, neglimit); + - posExamples = ae.getPosExamples(); - negExamples = ae.getNegExamples(); - - System.out.println(posExamples); - System.out.println(negExamples); - //System.exit(0); - String tmp = concepts.first().replace("http://dbpedia.org/resource/Category:", "").replace("\"",""); - tmp = tmp.replace("http://dbpedia.org/class/yago/", ""); - tmp = tmp.replace("http://dbpedia.org/property/", ""); - String confname = URLEncoder.encode(tmp, "UTF-8")+".conf"; - // - ConfWriter cf=new ConfWriter(); - cf.addToStats("relearned concept: "+concepts.first()); - - //System.exit(0); - //"relearned concept: "; - cf.writeSPARQL(confname, posExamples, negExamples, url, new TreeSet<String>()); - //new LearnSparql().learn(posExamples, negExamples, "http://dbpedia.openlinksw.com:8890/sparql", new TreeSet<String>()); - sc.printAndSet("Finished"); - } catch (Exception e) { - e.printStackTrace(); - } - } + + static void DBpedia(){ + se = SparqlEndpoint.EndpointDBpedia(); + //concepts.add("(EXISTS \"monarch\".TOP AND EXISTS \"predecessor\".(\"Knight\" OR \"Secretary\"))"); + + SortedSet<String> concepts = new TreeSet<String>(); + concepts.add("(\"http://dbpedia.org/class/yago/HeadOfState110164747\" AND (\"http://dbpedia.org/class/yago/Negotiator110351874\" AND \"http://dbpedia.org/class/yago/Representative110522035\"))"); + //concepts.add("\"http://dbpedia.org/class/yago/Person100007846\""); + //concepts.add("\"http://dbpedia.org/class/yago/FieldMarshal110086821\""); + //concepts.add("http://dbpedia.org/resource/Category:Prime_Ministers_of_the_United_Kingdom"); + //concepts.add("http://dbpedia.org/resource/Category:Grammy_Award_winners"); + //concepts.add("EXISTS \"http://dbpedia.org/property/grammyawards\".TOP"); + + SortedSet<String> posExamples = new TreeSet<String>(); + SortedSet<String> negExamples = new TreeSet<String>(); + String url = "http://dbpedia.openlinksw.com:8890/sparql"; + //HashMap<String, ResultSet> result = new HashMap<String, ResultSet>(); + //HashMap<String, String> result2 = new HashMap<String, String>(); + //System.out.println(concepts.first()); + //logger.setLevel(Level.TRACE); + AutomaticExampleFinderSPARQL ae= new AutomaticExampleFinderSPARQL( se); + //System.out.println(new JenaResultSetConvenience(ae.queryConcept(concepts.first(), 0)).getStringListForVariable("?subject") );; + //System.out.println(new JenaResultSetConvenience(ae.queryConcept(concepts.first(), 0)).getStringListForVariable("?subject").size() );; + //ae.getSubClasses(concepts.first()); + //System.exit(0); + + ae.initDBpedia(concepts.first(), useRelated, useSuperClasses,useParallelClasses, poslimit, neglimit); + + posExamples = ae.getPosExamples(); + negExamples = ae.getNegExamples(); + + System.out.println(posExamples); + System.out.println(negExamples); + //System.exit(0); + String tmp = concepts.first().replace("http://dbpedia.org/resource/Category:", "").replace("\"",""); + tmp = tmp.replace("http://dbpedia.org/class/yago/", ""); + tmp = tmp.replace("http://dbpedia.org/property/", ""); + String confname = ""; + try{ + confname = URLEncoder.encode(tmp, "UTF-8")+".conf"; + }catch (Exception e) {e.printStackTrace();} + // + ConfWriter cf=new ConfWriter(); + cf.addToStats("relearned concept: "+concepts.first()); + + //System.exit(0); + //"relearned concept: "; + cf.writeSPARQL(confname, posExamples, negExamples, url, new TreeSet<String>(),standardSettings,algorithm); + //new LearnSparql().learn(posExamples, negExamples, "http://dbpedia.openlinksw.com:8890/sparql", new TreeSet<String>()); + + + } + + + + static void roles(){ + + se = SparqlEndpoint.EndpointDBpedia(); + //se = SparqlEndpoint.EndpointUSCensus(); + SortedSet<String> roles = new TreeSet<String>(); + roles.add("http://dbpedia.org/property/birthPlace"); + //roles.add("http://www.rdfabout.com/rdf/schema/census/landArea"); + standardSettings+=algorithm+".ignoredRoles = {\""+roles.first()+"\"};\n"; + + SortedSet<String> posExamples = new TreeSet<String>(); + SortedSet<String> negExamples = new TreeSet<String>(); + String url = "http://dbpedia.openlinksw.com:8890/sparql"; + //HashMap<String, ResultSet> result = new HashMap<String, ResultSet>(); + //HashMap<String, String> result2 = new HashMap<String, String>(); + //System.out.println(concepts.first()); + //logger.setLevel(Level.TRACE); + AutomaticExampleFinderRolesSPARQL ae= new AutomaticExampleFinderRolesSPARQL( se); + + ae.initDomainRange(roles.first(), poslimit, neglimit); + + posExamples = ae.getPosExamples(); + negExamples = ae.getNegExamples(); + + System.out.println(posExamples); + System.out.println(negExamples); + //System.exit(0); + String tmp = roles.first().replace("http://dbpedia.org/property/", "").replace("\"",""); + String confname = ""; + try{ + confname = URLEncoder.encode(tmp, "UTF-8")+".conf"; + }catch (Exception e) {e.printStackTrace();} + // + ConfWriter cf=new ConfWriter(); + cf.addToStats("relearned role: "+roles.first()); + + //System.exit(0); + //"relearned concept: "; + cf.writeSPARQL(confname, posExamples, negExamples, url, new TreeSet<String>(),standardSettings,algorithm); + //new LearnSparql().learn(posExamples, negExamples, "http://dbpedia.openlinksw.com:8890/sparql", new TreeSet<String>()); + + + } /*************************************************************************** * *********************OLDCODE String @@ -131,7 +218,7 @@ public static void init() { - se = SparqlEndpoint.EndpointDBpedia(); + // create logger (a simple logger which outputs // its messages to the console) SimpleLayout layout = new SimpleLayout(); Modified: trunk/src/dl-learner/org/dllearner/test/SparqlEndpointTest.java =================================================================== --- trunk/src/dl-learner/org/dllearner/test/SparqlEndpointTest.java 2008-05-06 15:02:50 UTC (rev 829) +++ trunk/src/dl-learner/org/dllearner/test/SparqlEndpointTest.java 2008-05-06 18:16:26 UTC (rev 830) @@ -5,6 +5,7 @@ import java.util.Set; import org.apache.log4j.ConsoleAppender; +import org.apache.log4j.FileAppender; import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.apache.log4j.SimpleLayout; @@ -24,7 +25,12 @@ public static void main(String[] args) { SimpleLayout layout = new SimpleLayout(); ConsoleAppender consoleAppender = new ConsoleAppender(layout); + FileAppender fileAppender =null; ; + try{ + fileAppender = new FileAppender(layout,"endpoints.txt",false); + }catch (Exception e) {e.printStackTrace();} logger.removeAllAppenders(); + logger.addAppender(fileAppender); logger.addAppender(consoleAppender); logger.setLevel(Level.DEBUG); Logger.getLogger(SparqlKnowledgeSource.class).setLevel(Level.INFO); Added: trunk/src/dl-learner/org/dllearner/utilities/AutomaticExampleFinderRolesSPARQL.java =================================================================== --- trunk/src/dl-learner/org/dllearner/utilities/AutomaticExampleFinderRolesSPARQL.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/utilities/AutomaticExampleFinderRolesSPARQL.java 2008-05-06 18:16:26 UTC (rev 830) @@ -0,0 +1,123 @@ +package org.dllearner.utilities; + +import java.util.SortedSet; +import java.util.TreeSet; + +import org.apache.log4j.Logger; +import org.dllearner.core.ComponentManager; +import org.dllearner.kb.sparql.Cache; +import org.dllearner.kb.sparql.SparqlQuery; +import org.dllearner.kb.sparql.configuration.SparqlEndpoint; + +import com.hp.hpl.jena.query.ResultSet; + +public class AutomaticExampleFinderRolesSPARQL { + + private static Logger logger = Logger + .getLogger(ComponentManager.class); + + private Cache c; + private SparqlEndpoint se; + private SortedSet<String> posExamples; + private SortedSet<String> negExamples; + private int roleLimit=1000; + + + public AutomaticExampleFinderRolesSPARQL(SparqlEndpoint se){ + this.c=new Cache(); + this.se=se; + posExamples = new TreeSet<String>(); + negExamples = new TreeSet<String>(); + } + + public void initDomainRange(String role, int poslimit, int neglimit) { + makePositiveExamplesAsDomain( role); + SortedSet<String> keepForClean = new TreeSet<String>(); + keepForClean.addAll(this.posExamples); + this.posExamples = SetManipulation.fuzzyShrink(this.posExamples, poslimit); + logger.trace("shrinking: pos Example size: "+posExamples.size()); + + makeNegativeExamplesAsRange( role); + + + //clean + negExamples.removeAll(keepForClean); + logger.trace("neg Example size after cleaning: "+negExamples.size()); + this.negExamples = SetManipulation.fuzzyShrink(negExamples, neglimit); + logger.debug("pos Example size after shrinking: "+posExamples.size()); + logger.debug("neg Example size after shrinking: "+negExamples.size()); + logger.debug("Finished examples for role: "+role); + } + + + + + private void makePositiveExamplesAsDomain(String role){ + logger.debug("making Positive Examples from Role as Domain: "+role); + this.posExamples.addAll(getDomain( role, roleLimit)); + logger.debug(" pos Example size: "+posExamples.size()); + } + + private void makeNegativeExamplesAsRange(String role){ + logger.debug("making Negative Examples from Role as Range: "+role); + this.negExamples.addAll(getRange( role, roleLimit)); + logger.debug(" neg Example size: "+negExamples.size()); + } + + private SortedSet<String> getDomain(String role,int limit){ + + String query = "" + + "SELECT DISTINCT ?domain " + + "WHERE { \n" + + "?domain <" + role + "> " + " ?o. \n" + + "?domain a []\n." + + "FILTER (!isLiteral(?domain))." + + "}\n" + + "LIMIT "+limit; + + + String JSON = (c.executeSparqlQuery(new SparqlQuery(query, se))); + ResultSet rs =SparqlQuery.JSONtoResultSet(JSON); + JenaResultSetConvenience rsc = new JenaResultSetConvenience(rs); + return rsc.getStringListForVariable("domain"); + + } + + private SortedSet<String> getRange(String role,int limit){ + + String query = "" + + "SELECT DISTINCT ?range " + + "WHERE { \n" + + "?s <" + role + "> " + " ?range. \n" + + "?range a [].\n" + + "FILTER (!isLiteral(?range))." + + "}\n" + + "LIMIT "+limit; + + + String JSON = (c.executeSparqlQuery(new SparqlQuery(query, se))); + ResultSet rs =SparqlQuery.JSONtoResultSet(JSON); + JenaResultSetConvenience rsc = new JenaResultSetConvenience(rs); + return rsc.getStringListForVariable("range"); + + } + + + + + + + public SortedSet<String> getPosExamples() { + return posExamples; + } + + public SortedSet<String> getNegExamples() { + return negExamples; + } + + + + + + +} Deleted: trunk/src/dl-learner/org/dllearner/utilities/AutomaticExampleFinderSPARQL.java =================================================================== --- trunk/src/dl-learner/org/dllearner/utilities/AutomaticExampleFinderSPARQL.java 2008-05-06 15:02:50 UTC (rev 829) +++ trunk/src/dl-learner/org/dllearner/utilities/AutomaticExampleFinderSPARQL.java 2008-05-06 18:16:26 UTC (rev 830) @@ -1,294 +0,0 @@ -package org.dllearner.utilities; - -import java.util.SortedSet; -import java.util.TreeSet; - -import org.apache.log4j.Logger; -import org.dllearner.core.ComponentManager; -import org.dllearner.kb.sparql.Cache; -import org.dllearner.kb.sparql.SparqlQuery; -import org.dllearner.kb.sparql.SparqlQueryDescriptionConvertVisitor; -import org.dllearner.kb.sparql.configuration.SparqlEndpoint; - -import com.hp.hpl.jena.query.ResultSet; - -public class AutomaticExampleFinderSPARQL { - - private static Logger logger = Logger - .getLogger(ComponentManager.class); - - private Cache c; - private SparqlEndpoint se; - private SortedSet<String> posExamples; - private SortedSet<String> negExamples; - - - public AutomaticExampleFinderSPARQL(SparqlEndpoint se){ - this.c=new Cache(); - this.se=se; - posExamples = new TreeSet<String>(); - negExamples = new TreeSet<String>(); - } - - public void init(String concept, boolean useRelated, boolean useSuperclasses,boolean useParallelClasses, int poslimit, int neglimit) { - makePositiveExamplesFromConcept( concept); - SortedSet<String> keepForClean = new TreeSet<String>(); - keepForClean.addAll(this.posExamples); - this.posExamples = SetManipulation.fuzzyShrink(this.posExamples, poslimit); - logger.trace("shrinking: pos Example size: "+posExamples.size()); - - if(useRelated) { - dbpediaMakeNegativeExamplesFromRelatedInstances(this.posExamples); - } - if(useSuperclasses) { - dbpediaMakeNegativeExamplesFromSuperClasses(concept); - } - if(useParallelClasses) { - dbpediaMakeNegativeExamplesFromClassesOfInstances(); - } - //clean - negExamples.removeAll(keepForClean); - logger.trace("neg Example size after cleaning: "+negExamples.size()); - this.negExamples = SetManipulation.fuzzyShrink(negExamples, neglimit); - logger.debug("pos Example size after shrinking: "+posExamples.size()); - logger.debug("neg Example size after shrinking: "+negExamples.size()); - logger.debug("Finished examples for concept: "+concept); - } - - public SortedSet<String> getPosOnly(String concept, int limit){ - makePositiveExamplesFromConcept( concept); - return SetManipulation.fuzzyShrink(this.posExamples, limit); - } - - private void makePositiveExamplesFromConcept(String concept){ - logger.debug("making Positive Examples from Concept: "+concept); - if(concept.contains("http://dbpedia.org/resource/Category:")) { - this.posExamples = new JenaResultSetConvenience(querySKOSConcept(concept,0)) - .getStringListForVariable("subject"); - }else { - this.posExamples = new JenaResultSetConvenience(queryConcept(concept,0)) - .getStringListForVariable("subject"); - } - logger.debug(" pos Example size: "+posExamples.size()); - } - - - - - - private void dbpediaMakeNegativeExamplesFromRelatedInstances(SortedSet<String> subject) { - logger.debug("making examples from related instances"); - for (String string : subject) { - dbpediaMakeNegativeExamplesFromRelatedInstances(string); - } - logger.debug(" negExample size: "+negExamples.size()); - } - - - /** - * - * @param subject - * @return - */ - private void dbpediaMakeNegativeExamplesFromRelatedInstances(String subject) { - // SortedSet<String> result = new TreeSet<String>(); - - String query = "SELECT * WHERE { \n" + "<" + subject + "> " + "?p ?o. \n" - + "FILTER (REGEX(str(?o), 'http://dbpedia.org/resource/')).\n" - + "FILTER (!REGEX(str(?p), 'http://www.w3.org/2004/02/skos'))\n" - + "}"; - - String JSON = (c.executeSparqlQuery(new SparqlQuery(query, se))); - ResultSet rs =SparqlQuery.JSONtoResultSet(JSON); - JenaResultSetConvenience rsc = new JenaResultSetConvenience(rs); - this.negExamples.addAll(rsc.getStringListForVariable("o")); - - - } - - - private void dbpediaMakeNegativeExamplesFromSuperClasses(String concept) { - - SortedSet<String> superClasses = new TreeSet<String>(); - superClasses.add(concept.replace("\"", "")); - //logger.debug("before"+superClasses); - superClasses = dbpediaGetSuperClasses( superClasses, 4); - logger.debug("making neg Examples from "+superClasses.size()+" superclasses"); - JenaResultSetConvenience rsc; - for (String oneSuperClass : superClasses) { - - rsc = new JenaResultSetConvenience(queryConcept("\""+oneSuperClass+"\"", 0)); - this.negExamples.addAll(rsc.getStringListForVariable("subject")); - } - logger.debug(" neg Example size: "+negExamples.size()); - } - - private void dbpediaMakeNegativeExamplesFromClassesOfInstances() { - logger.debug("making neg Examples from parallel classes"); - SortedSet<String> classes = new TreeSet<String>(); - //superClasses.add(concept.replace("\"", "")); - //logger.debug("before"+superClasses); - //superClasses = dbpediaGetSuperClasses( superClasses, 4); - //logger.debug("getting negExamples from "+superClasses.size()+" superclasses"); - JenaResultSetConvenience rsc; - ResultSet rs=null; - for (String instance : posExamples) { - //System.out.println(instance); - rs = getClassesForInstance(instance); - //System.out.println(ResultSetFormatter.asXMLString(rs)); - rsc = new JenaResultSetConvenience(rs); - classes.addAll(rsc.getStringListForVariable("subject")); - //System.out.println(classes); - } - logger.debug("getting negExamples from "+classes.size()+" parallel classes"); - for (String oneClass : classes) { - - rsc = new JenaResultSetConvenience(queryConcept("\""+oneClass+"\"",0)); - this.negExamples.addAll(rsc.getStringListForVariable("subject")); - } - logger.debug("neg Example size: "+negExamples.size()); - - } - - private SortedSet<String> dbpediaGetSuperClasses(SortedSet<String> superClasses, int depth) { - SortedSet<String> ret = new TreeSet<String>(); - SortedSet<String> tmpset = new TreeSet<String>(); - ret.addAll(superClasses); - //logger.debug(superClasses); - JenaResultSetConvenience rsc; - - String query = ""; - for (; depth != 0 ; depth--) { - for (String oneSuperClass : superClasses) { - //logger.debug("one"+oneSuperClass); - //tmp = oneSuperClass.replace("\"", ""); - query = "SELECT * WHERE { \n" + "<" + oneSuperClass + "> " - + "<http://www.w3.org/2000/01/rdf-schema#subClassOf> ?superclass. \n" - + "}"; - String JSON = (c.executeSparqlQuery(new SparqlQuery(query, se))); - ResultSet rs =SparqlQuery.JSONtoResultSet(JSON); - rsc = new JenaResultSetConvenience(rs); - tmpset.addAll(rsc.getStringListForVariable("superclass")); - } - ret.addAll(tmpset); - //logger.debug(ret); - superClasses.clear(); - superClasses.addAll(tmpset); - tmpset.clear(); - } - //logger.debug(concept); - //logger.debug(query); - return ret; - } - - public ResultSet queryConcept(String concept,int limit) { - ResultSet rs = null; - try { - String query = SparqlQueryDescriptionConvertVisitor - .getSparqlQuery(concept,limit); - - SparqlQuery sq = new SparqlQuery(query, se); - String JSON = c.executeSparqlQuery(sq); - //System.out.println("JSON:\n"+JSON); - rs = SparqlQuery.JSONtoResultSet(JSON); - - } catch (Exception e) { - e.printStackTrace(); - } - - return rs; - } - - public ResultSet querySKOSConcept(String SKOSconcept,int limit) { - if(limit==0)limit=99999; - // - ResultSet rs = null; - try { - - String query = "SELECT * WHERE { \n " + - "?subject " + - "<http://www.w3.org/2004/02/skos/core#subject> " + - "<" + SKOSconcept + "> \n" + - "} LIMIT "+limit; - SparqlQuery sq = new SparqlQuery(query, se); - String JSON = c.executeSparqlQuery(sq); - //System.out.println(JSON); - rs = SparqlQuery.JSONtoResultSet(JSON); - - } catch (Exception e) { - e.printStackTrace(); - } - - return rs; - } - - public ResultSet getClassesForInstance(String instance) { - ResultSet rs = null; - try { - - String query = "SELECT ?subject WHERE { \n " + - "<" + instance + ">"+ - " a " + - "?subject " + - "\n" + - "}"; - SparqlQuery sq = new SparqlQuery(query, se); - //System.out.println(query); - String JSON = c.executeSparqlQuery(sq); - //System.out.println(JSON); - rs = SparqlQuery.JSONtoResultSet(JSON); - - } catch (Exception e) { - e.printStackTrace(); - } - - return rs; - } - - public SortedSet<String> getPosExamples() { - return posExamples; - } - - public SortedSet<String> getNegExamples() { - return negExamples; - } - - - /** - * NOT WORKING - * @param description - */ - public void getSubClasses(String description) { - ResultSet rs = null; - try { - String query = SparqlQueryDescriptionConvertVisitor - .getSparqlSubclassQuery(description.replace("\"", "")); - - rs = new SparqlQuery(query, se).send(); - System.out.println(query); - //System.out.println(SparqlQuery.getAsXMLString(rs)); - System.out.println(rs.getResultVars()); - SortedSet<String> remainingClasses = new JenaResultSetConvenience(rs).getStringListForVariable("subject"); - SortedSet<String> alreadyQueried = new TreeSet<String>(); - alreadyQueried.add(description); - while (remainingClasses.size()!=0){ - String tmp = remainingClasses.first(); - remainingClasses.remove(tmp); - query = SparqlQueryDescriptionConvertVisitor - .getSparqlSubclassQuery(tmp); - alreadyQueried.add(tmp); - rs = new SparqlQuery(query, se).send(); - remainingClasses.addAll(new JenaResultSetConvenience(rs).getStringListForVariable("subject")); - } - //System.out.println(JSON); - System.out.println(alreadyQueried); - - } catch (Exception e) { - e.printStackTrace(); - } - - } - - - -} Added: trunk/src/dl-learner/org/dllearner/utilities/AutomaticExampleFinderSPARQL.java =================================================================== --- trunk/src/dl-learner/org/dllearner/utilities/AutomaticExampleFinderSPARQL.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/utilities/AutomaticExampleFinderSPARQL.java 2008-05-06 18:16:26 UTC (rev 830) @@ -0,0 +1,374 @@ +package org.dllearner.utilities; + +import java.util.SortedSet; +import java.util.TreeSet; + +import org.apache.log4j.Logger; +import org.dllearner.core.ComponentManager; +import org.dllearner.kb.sparql.Cache; +import org.dllearner.kb.sparql.SparqlQuery; +import org.dllearner.kb.sparql.SparqlQueryDescriptionConvertVisitor; +import org.dllearner.kb.sparql.configuration.SparqlEndpoint; + +import com.hp.hpl.jena.query.ResultSet; + +public class AutomaticExampleFinderSPARQL { + + private static Logger logger = Logger + .getLogger(ComponentManager.class); + + private Cache c; + private SparqlEndpoint se; + private SortedSet<String> posExamples; + private SortedSet<String> negExamples; + + + public AutomaticExampleFinderSPARQL(SparqlEndpoint se){ + this.c=new Cache(); + this.se=se; + posExamples = new TreeSet<String>(); + negExamples = new TreeSet<String>(); + } + + public void initDBpedia(String concept, boolean useRelated, boolean useSuperclasses,boolean useParallelClasses, int poslimit, int neglimit) { + dbpediaMakePositiveExamplesFromConcept( concept); + SortedSet<String> keepForClean = new TreeSet<String>(); + keepForClean.addAll(this.posExamples); + this.posExamples = SetManipulation.fuzzyShrink(this.posExamples, poslimit); + logger.trace("shrinking: pos Example size: "+posExamples.size()); + + if(useRelated) { + dbpediaMakeNegativeExamplesFromRelatedInstances(this.posExamples); + } + if(useSuperclasses) { + makeNegativeExamplesFromSuperClasses(concept); + } + if(useParallelClasses) { + makeNegativeExamplesFromClassesOfInstances(); + } + //clean + negExamples.removeAll(keepForClean); + logger.trace("neg Example size after cleaning: "+negExamples.size()); + this.negExamples = SetManipulation.fuzzyShrink(negExamples, neglimit); + logger.debug("pos Example size after shrinking: "+posExamples.size()); + logger.debug("neg Example size after shrinking: "+negExamples.size()); + logger.debug("Finished examples for concept: "+concept); + } + + public void init(String concept, String namespace, boolean useRelated, boolean useSuperclasses,boolean useParallelClasses, int poslimit, int neglimit) { + makePositiveExamplesFromConcept( concept); + SortedSet<String> keepForClean = new TreeSet<String>(); + keepForClean.addAll(this.posExamples); + this.posExamples = SetManipulation.fuzzyShrink(this.posExamples, poslimit); + logger.trace("shrinking: pos Example size: "+posExamples.size()); + + if(useRelated) { + makeNegativeExamplesFromRelatedInstances(this.posExamples,namespace); + } + if(useSuperclasses) { + makeNegativeExamplesFromSuperClasses(concept); + } + if(useParallelClasses) { + makeNegativeExamplesFromClassesOfInstances(); + } + //clean + negExamples.removeAll(keepForClean); + logger.trace("neg Example size after cleaning: "+negExamples.size()); + this.negExamples = SetManipulation.fuzzyShrink(negExamples, neglimit); + logger.debug("pos Example size after shrinking: "+posExamples.size()); + logger.debug("neg Example size after shrinking: "+negExamples.size()); + logger.debug("Finished examples for concept: "+concept); + } + + + + public SortedSet<String> dbpediaGetPosOnly(String concept, int limit){ + dbpediaMakePositiveExamplesFromConcept( concept); + return SetManipulation.fuzzyShrink(this.posExamples, limit); + } + + public SortedSet<String> getPosOnly(String concept, int limit){ + makePositiveExamplesFromConcept( concept); + return SetManipulation.fuzzyShrink(this.posExamples, limit); + } + + private void dbpediaMakePositiveExamplesFromConcept(String concept){ + logger.debug("making Positive Examples from Concept: "+concept); + if(concept.contains("http://dbpedia.org/resource/Category:")) { + this.posExamples = new JenaResultSetConvenience(dbpediaQuerySKOSConcept(concept,0)) + .getStringListForVariable("subject"); + }else { + this.posExamples = new JenaResultSetConvenience(queryConcept(concept,0)) + .getStringListForVariable("subject"); + } + logger.debug(" pos Example size: "+posExamples.size()); + } + + private void makePositiveExamplesFromConcept(String concept){ + logger.debug("making Positive Examples from Concept: "+concept); + this.posExamples = new JenaResultSetConvenience(queryConcept(concept,0)) + .getStringListForVariable("subject"); + logger.debug(" pos Example size: "+posExamples.size()); + } + + + + /*private void makePositiveExamplesFromConcept(String concept){ + logger.debug("making Positive Examples from Concept: "+concept); + if(concept.contains("http://dbpedia.org/resource/Category:")) { + this.posExamples = new JenaResultSetConvenience(querySKOSConcept(concept,0)) + .getStringListForVariable("subject"); + }else { + this.posExamples = new JenaResultSetConvenience(queryConcept(concept,0)) + .getStringListForVariable("subject"); + } + logger.debug(" pos Example size: "+posExamples.size()); + }*/ + + + + + + private void dbpediaMakeNegativeExamplesFromRelatedInstances(SortedSet<String> subject) { + logger.debug("making examples from related instances"); + for (String string : subject) { + dbpediaMakeNegativeExamplesFromRelatedInstances(string); + } + logger.debug(" negExample size: "+negExamples.size()); + } + + private void makeNegativeExamplesFromRelatedInstances(SortedSet<String> subject, String namespace) { + logger.debug("making examples from related instances"); + for (String string : subject) { + makeNegativeExamplesFromRelatedInstances(string,namespace); + } + logger.debug(" negExample size: "+negExamples.size()); + } + + + /** + * + * @param subject + * @return + */ + private void dbpediaMakeNegativeExamplesFromRelatedInstances(String subject) { + // SortedSet<String> result = new TreeSet<String>(); + + String query = "SELECT * WHERE { \n" + "<" + subject + "> " + "?p ?o. \n" + + "FILTER (REGEX(str(?o), 'http://dbpedia.org/resource/')).\n" + + "FILTER (!REGEX(str(?p), 'http://www.w3.org/2004/02/skos'))\n" + + "}"; + + String JSON = (c.executeSparqlQuery(new SparqlQuery(query, se))); + ResultSet rs =SparqlQuery.JSONtoResultSet(JSON); + JenaResultSetConvenience rsc = new JenaResultSetConvenience(rs); + this.negExamples.addAll(rsc.getStringListForVariable("o")); + + + } + + private void makeNegativeExamplesFromRelatedInstances(String subject, String namespace) { + // SortedSet<String> result = new TreeSet<String>(); + + String query = "SELECT * WHERE { \n" + "<" + subject + "> " + "?p ?o. \n" + + "FILTER (REGEX(str(?o), '"+namespace+"')).\n" + + "}"; + + String JSON = (c.executeSparqlQuery(new SparqlQuery(query, se))); + ResultSet rs =SparqlQuery.JSONtoResultSet(JSON); + JenaResultSetConvenience rsc = new JenaResultSetConvenience(rs); + this.negExamples.addAll(rsc.getStringListForVariable("o")); + + + } + + + private void makeNegativeExamplesFromSuperClasses(String concept) { + + SortedSet<String> superClasses = new TreeSet<String>(); + superClasses.add(concept.replace("\"", "")); + //logger.debug("before"+superClasses); + superClasses = getSuperClasses( superClasses, 4); + logger.debug("making neg Examples from "+superClasses.size()+" superclasses"); + JenaResultSetConvenience rsc; + for (String oneSuperClass : superClasses) { + + rsc = new JenaResultSetConvenience(queryConcept("\""+oneSuperClass+"\"", 0)); + this.negExamples.addAll(rsc.getStringListForVariable("subject")); + } + logger.debug(" neg Example size: "+negExamples.size()); + } + + + + private void makeNegativeExamplesFromClassesOfInstances() { + logger.debug("making neg Examples from parallel classes"); + SortedSet<String> classes = new TreeSet<String>(); + //superClasses.add(concept.replace("\"", "")); + //logger.debug("before"+superClasses); + //superClasses = dbpediaGetSuperClasses( superClasses, 4); + //logger.debug("getting negExamples from "+superClasses.size()+" superclasses"); + JenaResultSetConvenience rsc; + ResultSet rs=null; + for (String instance : posExamples) { + //System.out.println(instance); + rs = getClassesForInstance(instance); + //System.out.println(ResultSetFormatter.asXMLString(rs)); + rsc = new JenaResultSetConvenience(rs); + classes.addAll(rsc.getStringListForVariable("subject")); + //System.out.println(classes); + } + logger.debug("getting negExamples from "+classes.size()+" parallel classes"); + for (String oneClass : classes) { + + rsc = new JenaResultSetConvenience(queryConcept("\""+oneClass+"\"",0)); + this.negExamples.addAll(rsc.getStringListForVariable("subject")); + } + logger.debug("neg Example size: "+negExamples.size()); + + } + + private SortedSet<String> getSuperClasses(SortedSet<String> superClasses, int depth) { + SortedSet<String> ret = new TreeSet<String>(); + SortedSet<String> tmpset = new TreeSet<String>(); + ret.addAll(superClasses); + //logger.debug(superClasses); + JenaResultSetConvenience rsc; + + String query = ""; + for (; depth != 0 ; depth--) { + for (String oneSuperClass : superClasses) { + //logger.debug("one"+oneSuperClass); + //tmp = oneSuperClass.replace("\"", ""); + query = "SELECT * WHERE { \n" + "<" + oneSuperClass + "> " + + "<http://www.w3.org/2000/01/rdf-schema#subClassOf> ?superclass. \n" + + "}"; + String JSON = (c.executeSparqlQuery(new SparqlQuery(query, se))); + ResultSet rs =SparqlQuery.JSONtoResultSet(JSON); + rsc = new JenaResultSetConvenience(rs); + tmpset.addAll(rsc.getStringListForVariable("superclass")); + } + ret.addAll(tmpset); + //logger.debug(ret); + superClasses.clear(); + superClasses.addAll(tmpset); + tmpset.clear(); + } + //logger.debug(concept); + //logger.debug(query); + return ret; + } + + + + public ResultSet queryConcept(String concept,int limit) { + ResultSet rs = null; + try { + String query = SparqlQueryDescriptionConvertVisitor + .getSparqlQuery(concept,limit); + + SparqlQuery sq = new SparqlQuery(query, se); + String JSON = c.executeSparqlQuery(sq); + //System.out.println("JSON:\n"+JSON); + rs = SparqlQuery.JSONtoResultSet(JSON); + + } catch (Exception e) { + e.printStackTrace(); + } + + return rs; + } + + public ResultSet dbpediaQuerySKOSConcept(String SKOSconcept,int limit) { + if(limit==0)limit=99999; + // + ResultSet rs = null; + try { + + String query = "SELECT * WHERE { \n " + + "?subject " + + "<http://www.w3.org/2004/02/skos/core#subject> " + + "<" + SKOSconcept + "> \n" + + "} LIMIT "+limit; + SparqlQuery sq = new SparqlQuery(query, se); + String JSON = c.executeSparqlQuery(sq); + //System.out.println(JSON); + rs = SparqlQuery.JSONtoResultSet(JSON); + + } catch (Exception e) { + e.printStackTrace(); + } + + return rs; + } + + public ResultSet getClassesForInstance(String instance) { + ResultSet rs = null; + try { + + String query = "SELECT ?subject WHERE { \n " + + "<" + instance + ">"+ + " a " + + "?subject " + + "\n" + + "}"; + SparqlQuery sq = new SparqlQuery(query, se); + //System.out.println(query); + String JSON = c.executeSparqlQuery(sq); + //System.out.println(JSON); + rs = SparqlQuery.JSONtoResultSet(JSON); + + } catch (Exception e) { + e.printStackTrace(); + } + + return rs; + } + + public SortedSet<String> getPosExamples() { + return posExamples; + } + + public SortedSet<String> getNegExamples() { + return negExamples; + } + + + /** + * NOT WORKING + * @param description + */ + public void getSubClasses(String description) { + ResultSet rs = null; + try { + String query = SparqlQueryDescriptionConvertVisitor + .getSparqlSubclassQuery(description.replace("\"", "")); + + rs = new SparqlQuery(query, se).send(); + System.out.println(query); + //System.out.println(SparqlQuery.getAsXMLString(rs)); + System.out.println(rs.getResultVars()); + SortedSet<String> remainingClasses = new JenaResultSetConvenience(rs).getStringListForVariable("subject"); + SortedSet<String> alreadyQueried = new TreeSet<String>(); + alreadyQueried.add(description); + while (remainingClasses.size()!=0){ + String tmp = remainingClasses.first(); + remainingClasses.remove(tmp); + query = SparqlQueryDescriptionConvertVisitor + .getSparqlSubclassQuery(tmp); + alreadyQueried.add(tmp); + rs = new SparqlQuery(query, se).send(); + remainingClasses.addAll(new JenaResultSetConvenience(rs).getStringListForVariable("subject")); + } + //System.out.println(JSON); + System.out.println(alreadyQueried); + + } catch (Exception e) { + e.printStackTrace(); + } + + } + + + +} Modified: trunk/src/dl-learner/org/dllearner/utilities/ConfWriter.java =================================================================== --- trunk/src/dl-learner/org/dllearner/utilities/ConfWriter.java 2008-05-06 15:02:50 UTC (rev 829) +++ trunk/src/dl-learner/org/dllearner/utilities/ConfWriter.java 2008-05-06 18:16:26 UTC (rev 830) @@ -43,23 +43,12 @@ } public void writeSPARQL(String filename, SortedSet<String> pos, SortedSet<String> neg, - String uri, SortedSet<String> ignoredConcepts) { + String uri, SortedSet<String> ignoredConcepts, String standardSettings, String prefixAlgortihm) { - String prefixAlgortihm = "refexamples"; + String prefixSparql = "sparql"; - String standardSettings = - "sparql.recursionDepth = 1;\n" + - "sparql.predefinedFilter = \"YAGO\";\n" + - "sparql.predefinedEndpoint = \"DBPEDIA\";\n" + - "refexamples.minExecutionTimeInSeconds = 30;\n" + - "refexamples.maxExecutionTimeInSeconds = 30;\n" + - "//refexamples.guaranteeXgoodDescriptions = 10;\n" + - "refexamples.logLevel=\"TRACE\";\n" + - "refexamples.noisePercentage = 0.10;\n" + - "refexamples.writeSearchTree = true;\n" + - "refexamples.searchTreeFile = \"searchTree.txt\";\n"+ - "refexamples.replaceSearchTree = true;\n\n" ; + // "sparql.format = \"KB\";\n\n"; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |