From: <ku...@us...> - 2011-04-04 14:17:25
|
Revision: 2747 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=2747&view=rev Author: kurzum Date: 2011-04-04 14:17:14 +0000 (Mon, 04 Apr 2011) Log Message: ----------- script for relearning dbpedia ontology Modified Paths: -------------- trunk/scripts/src/main/java/org/dllearner/scripts/improveWikipedia/DBpediaClassLearnerCELOE.java trunk/scripts/src/main/java/org/dllearner/scripts/improveWikipedia/InstanceFinderSPARQL.java Modified: trunk/scripts/src/main/java/org/dllearner/scripts/improveWikipedia/DBpediaClassLearnerCELOE.java =================================================================== --- trunk/scripts/src/main/java/org/dllearner/scripts/improveWikipedia/DBpediaClassLearnerCELOE.java 2011-04-04 07:21:31 UTC (rev 2746) +++ trunk/scripts/src/main/java/org/dllearner/scripts/improveWikipedia/DBpediaClassLearnerCELOE.java 2011-04-04 14:17:14 UTC (rev 2747) @@ -2,7 +2,7 @@ * Copyright (C) 2007-2011, Jens Lehmann * * This file is part of DL-Learner. - * + * * DL-Learner is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or @@ -19,96 +19,147 @@ */ package org.dllearner.scripts.improveWikipedia; -import java.io.File; -import java.io.IOException; -import java.util.Set; -import java.util.SortedSet; - +import org.aksw.commons.sparql.core.ResultSetRenderer; +import org.aksw.commons.sparql.core.SparqlEndpoint; +import org.aksw.commons.sparql.core.SparqlTemplate; +import org.aksw.commons.sparql.core.decorator.CachingSparqlEndpoint; +import org.aksw.commons.sparql.core.impl.HttpSparqlEndpoint; +import org.apache.velocity.VelocityContext; +import org.apache.velocity.app.Velocity; import org.dllearner.algorithms.celoe.CELOE; import org.dllearner.core.ComponentManager; import org.dllearner.core.LearningProblemUnsupportedException; import org.dllearner.core.OntologyFormat; import org.dllearner.core.ReasonerComponent; import org.dllearner.core.configurators.CELOEConfigurator; -import org.dllearner.core.owl.Description; -import org.dllearner.core.owl.EquivalentClassesAxiom; -import org.dllearner.core.owl.Individual; -import org.dllearner.core.owl.KB; -import org.dllearner.core.owl.NamedClass; +import org.dllearner.core.owl.*; import org.dllearner.gui.Config; import org.dllearner.gui.ConfigSave; import org.dllearner.kb.sparql.SparqlKnowledgeSource; import org.dllearner.learningproblems.PosNegLPStandard; import org.dllearner.reasoning.FastInstanceChecker; +import org.dllearner.utilities.Helper; import org.dllearner.utilities.datastructures.Datastructures; import org.dllearner.utilities.datastructures.SortedSetTuple; +import java.io.File; +import java.io.IOException; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; +import java.util.SortedSet; + /** * A script, which learns definitions / super classes of classes in the DBpedia ontology. - * + * * @author Jens Lehmann - * */ public class DBpediaClassLearnerCELOE { - //SparqlEndpoint se ; + SparqlEndpoint sparqlEndpoint = new CachingSparqlEndpoint(new HttpSparqlEndpoint("http://dbpedia.org/sparql", "http://dbpedia.org"), "cache/"); - public DBpediaClassLearnerCELOE() { - // OPTIONAL: if you want to do some case distinctions in the learnClass method, you could add - // parameters to the constructure e.g. YAGO_ - } - - public KB learnAllClasses(Set<String> classesToLearn) throws LearningProblemUnsupportedException, IOException { - KB kb = new KB(); - for(String classToLearn : classesToLearn) { - kb.addAxiom(new EquivalentClassesAxiom(new NamedClass(classToLearn), learnClass(classToLearn))); - } - return kb; - } - - public Description learnClass(String classToLearn) throws LearningProblemUnsupportedException, IOException { - - // TODO: code for getting postive and negative examples for class to learn - SortedSet<Individual> posExamples = null; - SortedSet<Individual> negExamples = null; - SortedSetTuple<Individual> examples = new SortedSetTuple<Individual>(posExamples, negExamples); - - ComponentManager cm = ComponentManager.getInstance(); - - SparqlKnowledgeSource ks = cm.knowledgeSource(SparqlKnowledgeSource.class); - ks.getConfigurator().setInstances(Datastructures.individualSetToStringSet(examples.getCompleteSet())); - ks.getConfigurator().setPredefinedEndpoint("DBPEDIA"); // TODO: probably the official endpoint is too slow? - - ReasonerComponent rc = cm.reasoner(FastInstanceChecker.class, ks); - - PosNegLPStandard lp = cm.learningProblem(PosNegLPStandard.class, rc); - lp.getConfigurator().setAccuracyMethod("fMeasure"); - lp.getConfigurator().setUseApproximations(false); - - CELOE la = cm.learningAlgorithm(CELOE.class, lp, rc); - CELOEConfigurator cc = la.getConfigurator(); - cc.setMaxExecutionTimeInSeconds(100); - cc.setNoisePercentage(20); - // TODO: set more options as needed - - // to write the above configuration in a conf file (optional) - Config cf = new Config(cm, ks, rc, lp, la); - new ConfigSave(cf).saveFile(new File("/dev/null")); - - la.start(); - - return la.getCurrentlyBestDescription(); - } - - public static void main(String args[]) throws LearningProblemUnsupportedException, IOException { - DBpediaClassLearnerCELOE dcl = new DBpediaClassLearnerCELOE(); - Set<String> classesToLearn = null; - KB kb = dcl.learnAllClasses(classesToLearn); - kb.export(new File("/dev/null"), OntologyFormat.RDF_XML); // TODO: pick appropriate place to save ontology - } + public DBpediaClassLearnerCELOE() { + // OPTIONAL: if you want to do some case distinctions in the learnClass method, you could add + // parameters to the constructure e.g. YAGO_ + } + public KB learnAllClasses(Set<String> classesToLearn) throws LearningProblemUnsupportedException, IOException { + KB kb = new KB(); + for (String classToLearn : classesToLearn) { + Description d = learnClass(classToLearn); + if (d == null) { + continue; + } + kb.addAxiom(new EquivalentClassesAxiom(new NamedClass(classToLearn), d)); + } + return kb; + } - public Set<String> getInstances(){ - return null; + public Description learnClass(String classToLearn) throws LearningProblemUnsupportedException, IOException { + Set<String> posEx = getPosEx(classToLearn); + if (posEx.isEmpty()) { + return null; + } + Set<String> classes = new HashSet<String>(); + + for (String pos : posEx) { + SparqlTemplate st = new SparqlTemplate(0); + st.addFilter(sparqlEndpoint.like("classes", new HashSet<String>(Arrays.asList(new String[]{"http://dbpedia.org/ontology/"})))); + VelocityContext vc = new VelocityContext(); + vc.put("instance", pos ); + String query = st.getQuery("sparqltemplates/directClassesOfInstance.vm", vc); + classes.addAll(ResultSetRenderer.asStringSet(sparqlEndpoint.executeSelect(query))); + classes.remove(classToLearn); + } + System.out.println(classes.size()); + System.exit(0); + + Set<String> negEx = new HashSet<String>(); + for (String oneClass : classes) { + /* st = new SparqlTemplate(0); + st.addFilter(sparqlEndpoint.like("classes", new HashSet<String>(Arrays.asList(new String[]{"http://dbpedia.org/ontology/"})))); + query = st.getQuery("sparqltemplates/classesOfInstance.vm", new VelocityContext()); + classes.addAll(ResultSetRenderer.asStringSet(sparqlEndpoint.executeSelect(query))); + classes.remove(classToLearn);*/ + } + //System.out.println(posEx); + System.exit(0); + //Set<String> negEx = InstanceFinderSPARQL.findInstancesWithSimilarClasses(posEx, -1, sparqlEndpoint); + SortedSet<Individual> posExamples = Helper.getIndividualSet(posEx); + SortedSet<Individual> negExamples = Helper.getIndividualSet(negEx); + SortedSetTuple<Individual> examples = new SortedSetTuple<Individual>(posExamples, negExamples); + System.out.println(posEx.size()); + System.out.println(negEx.size()); + System.exit(0); + ComponentManager cm = ComponentManager.getInstance(); + + SparqlKnowledgeSource ks = cm.knowledgeSource(SparqlKnowledgeSource.class); + ks.getConfigurator().setInstances(Datastructures.individualSetToStringSet(examples.getCompleteSet())); + ks.getConfigurator().setPredefinedEndpoint("DBPEDIA"); // TODO: probably the official endpoint is too slow? + + ReasonerComponent rc = cm.reasoner(FastInstanceChecker.class, ks); + + PosNegLPStandard lp = cm.learningProblem(PosNegLPStandard.class, rc); + lp.getConfigurator().setAccuracyMethod("fMeasure"); + lp.getConfigurator().setUseApproximations(false); + + CELOE la = cm.learningAlgorithm(CELOE.class, lp, rc); + CELOEConfigurator cc = la.getConfigurator(); + cc.setMaxExecutionTimeInSeconds(100); + cc.setNoisePercentage(20); + // TODO: set more options as needed + + // to write the above configuration in a conf file (optional) + Config cf = new Config(cm, ks, rc, lp, la); + new ConfigSave(cf).saveFile(new File("/dev/null")); + + la.start(); + + return la.getCurrentlyBestDescription(); } - + + public static void main(String args[]) throws LearningProblemUnsupportedException, IOException { + + DBpediaClassLearnerCELOE dcl = new DBpediaClassLearnerCELOE(); + SparqlTemplate st = new SparqlTemplate(0); + st.addFilter(dcl.sparqlEndpoint.like("classes", new HashSet<String>(Arrays.asList(new String[]{"http://dbpedia.org/ontology/"})))); + + String query = st.getQuery("sparqltemplates/allClasses.vm", new VelocityContext()); + //System.out.println(query); + Set<String> classesToLearn = ResultSetRenderer.asStringSet(dcl.sparqlEndpoint.executeSelect(query)); + //System.out.println(classesToLearn); + + KB kb = dcl.learnAllClasses(classesToLearn); + System.exit(0); + kb.export(new File("/dev/null"), OntologyFormat.RDF_XML); // TODO: pick appropriate place to save ontology + } + + + public Set<String> getPosEx(String clazz) { + VelocityContext vc = new VelocityContext(); + vc.put("class", clazz); + vc.put("limit", 0); + String query = SparqlTemplate.instancesOfClass(vc); + return ResultSetRenderer.asStringSet(sparqlEndpoint.executeSelect(query)); + } + } Modified: trunk/scripts/src/main/java/org/dllearner/scripts/improveWikipedia/InstanceFinderSPARQL.java =================================================================== --- trunk/scripts/src/main/java/org/dllearner/scripts/improveWikipedia/InstanceFinderSPARQL.java 2011-04-04 07:21:31 UTC (rev 2746) +++ trunk/scripts/src/main/java/org/dllearner/scripts/improveWikipedia/InstanceFinderSPARQL.java 2011-04-04 14:17:14 UTC (rev 2747) @@ -22,78 +22,148 @@ import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.vocabulary.OWL; import com.hp.hpl.jena.vocabulary.RDF; +import org.aksw.commons.sparql.core.ResultSetRenderer; import org.aksw.commons.sparql.core.SparqlEndpoint; +import org.aksw.commons.sparql.core.SparqlTemplate; import org.apache.log4j.Logger; +import org.apache.velocity.VelocityContext; +import org.dllearner.algorithm.tbsl.learning.SPARQLTemplateBasedLearner; import org.dllearner.kb.sparql.SPARQLTasks; import org.dllearner.utilities.datastructures.SetManipulation; -import org.dllearner.utilities.owl.OWLVocabulary; -import java.util.Iterator; -import java.util.Set; -import java.util.SortedSet; -import java.util.TreeSet; +import java.util.*; public class InstanceFinderSPARQL { // LOGGER: ComponentManager private static Logger logger = Logger.getLogger(InstanceFinderSPARQL.class); + /** + * query is SELECT ?subject { ?subject rdf:type owl:Thing } LIMIT 100 + * + * @param blacklist instances removed from the returned set + * @param sparqlEndpoint + * @param limit if <= 0, 100 will be used + * @return a set of uris + */ + public static Set<String> arbitraryInstances(Set<String> blacklist, SparqlEndpoint sparqlEndpoint, int limit) { + String query = "SELECT ?subject { ?subject <" + RDF.type + "> <" + OWL.Thing + "> } " + ((limit > 0) ? "LIMIT " + limit : " LIMIT 100"); + ResultSet r = sparqlEndpoint.executeSelect(query); + Set<String> s = ResultSetRenderer.asStringSet(r); + s.removeAll(blacklist); + logger.debug("retrieving " + s.size() + " random instances "); + return s; + } - public static void randomInstances(Set<String> blacklist, SparqlEndpoint se) { - logger.debug("retrieving random instances "); - String query = "SELECT ?subject { ?subject <" + RDF.type + "> <" + OWL.Thing + "> } "; - //ResultSet r = se.executeSelect(query); + /** + * TODO refactor + * makes neg ex from related instances, that take part in a role R(pos,neg) + * filters all objects, that don't use the given namespace + * + * @param instances + */ + public static void relatedInstances(Set<String> instances, Set<String> blacklist, Set<String> allowedProperties, SparqlEndpoint sparqlEndpoint) { + /*public void makeNegativeExamplesFromRelatedInstances(SortedSet<String> instances, + String objectNamespace) { + logger.debug("making examples from related instances"); + for (String oneInstance : instances) { + //makeNegativeExamplesFromRelatedInstances(oneInstance, objectNamespace); + } + logger.debug("|-negExample size from related: " + fromRelated.size()); + } */ + /*String query = "SELECT * { " + "<" + oneInstance + "> " + "?p ?object.}" + + "FILTER (REGEX(str(?object), '" + objectnamespace + "')).\n" + "}"; + // SortedSet<String> result = new TreeSet<String>(); + String SPARQLquery = "SELECT * WHERE { \n" + "<" + oneInstance + "> " + "?p ?object. \n" + + "FILTER (REGEX(str(?object), '" + objectnamespace + "')).\n" + "}"; - //fromRandom = sparqltasks.queryAsSet(sparqlQueryString, variable); - //fromRandom.removeAll(fullPositiveSet); - //logger.debug("|-negExample size from random: " + fromRandom.size()); - + fromRelated.addAll(sparqltasks.queryAsSet(SPARQLquery, "object")); + fromRelated.removeAll(fullPositiveSet); + */ } - private SPARQLTasks sparqltasks; - private SortedSet<String> filterClasses; - private SortedSet<String> fullPositiveSet; + /** + * TODO document + * @param instances this will serve as a blacklist also + */ + public static Set<String> findInstancesWithSimilarClasses(Set<String> instances, + int limit, SparqlEndpoint sparqlEndpoint) { + Set<String> classes = new HashSet<String>(); + Set<String> ret = new HashSet<String>(); + for (String instance : instances) { + try { + SparqlTemplate st = new SparqlTemplate(0); + //st.addFilter(sparqlEndpoint.like()); + VelocityContext vc = new VelocityContext(); + vc.put("instance", instance); + vc.put("limit", limit); + String query = SparqlTemplate.classesOfInstance(vc); + classes.addAll(ResultSetRenderer.asStringSet(sparqlEndpoint.executeSelect(query))); + } catch (Exception e) { + logger.warn("ignoring SPARQLQuery failure geClasses for " + instance); + } - private SortedSet<String> fromRelated = new TreeSet<String>(); - private SortedSet<String> fromNearbyClasses = new TreeSet<String>(); - private SortedSet<String> fromSuperclasses = new TreeSet<String>(); - private SortedSet<String> fromParallelClasses = new TreeSet<String>(); - private SortedSet<String> fromRandom = new TreeSet<String>(); - private SortedSet<String> fromDomain = new TreeSet<String>(); - private SortedSet<String> fromRange = new TreeSet<String>(); - static int poslimit = 10; - static int neglimit = 20; + } + System.out.println(classes); + System.exit(0); + logger.debug("retrieved " + classes.size() + " classes"); - /** - * takes as input a full positive set to make sure no negatives are added as positives - * - * @param fullPositiveSet - * @param st - */ - public InstanceFinderSPARQL( - SortedSet<String> fullPositiveSet, - SPARQLTasks st, SortedSet<String> filterClasses) { - super(); - this.fullPositiveSet = new TreeSet<String>(); - this.fullPositiveSet.addAll(fullPositiveSet); - this.sparqltasks = st; - this.filterClasses = filterClasses; - } + for (String oneClass : classes) { + logger.debug(oneClass); + try { + VelocityContext vc = new VelocityContext(); + vc.put("class", oneClass); + vc.put("limit", limit); + String query = SparqlTemplate.classesOfInstance(vc); + ret.addAll(ResultSetRenderer.asStringSet(sparqlEndpoint.executeSelect(query))); + } catch (Exception e) { + logger.warn("ignoring SPARQLQuery failure classesOfInstance for " + oneClass); + } + } - /** - * see <code> getNegativeExamples(int neglimit, boolean stable )</code> - * - * @param neglimit - */ - public SortedSet<String> getNegativeExamples(int neglimit) { - return getNegativeExamples(neglimit, false); + ret.removeAll(instances); + return ret; + + // superClasses.add(concept.replace("\"", "")); + // logger.debug("before"+superClasses); + // superClasses = dbpediaGetSuperClasses( superClasses, 4); + // logger.debug("getting negExamples from "+superClasses.size()+" + // superclasses"); + + /*for (String oneClass : classes) { + logger.debug(oneClass); + // rsc = new + // JenaResultSetConvenience(queryConcept("\""+oneClass+"\"",limit)); + try { + this.fromParallelClasses.addAll(sparqltasks.retrieveInstancesForClassDescription("\"" + oneClass + + "\"", sparqlResultLimit)); + } catch (Exception e) { + logger.warn("ignoring SPARQLQuery failure, see log/sparql.txt"); + } + } + + for (String instance : positiveSet) { + try { + classes.addAll(sparqltasks.getClassesForInstance(instance, sparqlResultLimit)); + + } + logger.debug("getting negExamples from " + classes.size() + " parallel classes"); + + + fromParallelClasses.removeAll(fullPositiveSet); + logger.debug("|-neg Example size from parallelclass: " + fromParallelClasses.size()); + */ } + + + + /** * aggregates all collected neg examples * CAVE: it is necessary to call one of the make functions before calling this @@ -102,7 +172,10 @@ * @param neglimit size of negative Example set, 0 means all, which can be quite large several thousands * @param stable decides whether neg Examples are randomly picked, default false, faster for developing, since the cache can be used */ - public SortedSet<String> getNegativeExamples(int neglimit, boolean stable) { + /*public SortedSet<String> getNegativeExamples + ( + int neglimit, + boolean stable) { SortedSet<String> negatives = new TreeSet<String>(); negatives.addAll(fromNearbyClasses); negatives.addAll(fromParallelClasses); @@ -124,60 +197,22 @@ } logger.debug("neg Example size after shrinking: " + negatives.size()); return negatives; - } + } */ - public void makeNegativeExamplesFromRandomInstances() { - logger.debug("making random examples "); - String variable = "subject"; - String sparqlQueryString = "SELECT ?" + variable + " WHERE {" + - "?" + variable + " <" + OWLVocabulary.RDF_TYPE + ">" + " ?o" + - "}"; - - fromRandom = sparqltasks.queryAsSet(sparqlQueryString, variable); - fromRandom.removeAll(fullPositiveSet); - logger.debug("|-negExample size from random: " + fromRandom.size()); - } - - /** - * makes neg ex from related instances, that take part in a role R(pos,neg) - * filters all objects, that don't use the given namespace - * - * @param instances - * @param objectNamespace - */ - public void makeNegativeExamplesFromRelatedInstances(SortedSet<String> instances, - String objectNamespace) { - logger.debug("making examples from related instances"); - for (String oneInstance : instances) { - makeNegativeExamplesFromRelatedInstances(oneInstance, objectNamespace); - } - logger.debug("|-negExample size from related: " + fromRelated.size()); - } - - private void makeNegativeExamplesFromRelatedInstances(String oneInstance, String objectnamespace) { - // SortedSet<String> result = new TreeSet<String>(); - - String SPARQLquery = "SELECT * WHERE { \n" + "<" + oneInstance + "> " + "?p ?object. \n" - + "FILTER (REGEX(str(?object), '" + objectnamespace + "')).\n" + "}"; - - fromRelated.addAll(sparqltasks.queryAsSet(SPARQLquery, "object")); - fromRelated.removeAll(fullPositiveSet); - - } - // keep a while may still be needed /*public void dbpediaMakeNegativeExamplesFromRelatedInstances(String subject) { - // SortedSet<String> result = new TreeSet<String>(); + // SortedSet<String> result = new TreeSet<String>(); - String SPARQLquery = "SELECT * WHERE { \n" + "<" + subject + "> " + "?p ?o. \n" - + "FILTER (REGEX(str(?o), 'http://dbpedia.org/resource/')).\n" - + "FILTER (!REGEX(str(?p), 'http://www.w3.org/2004/02/skos'))\n" + "}"; + String SPARQLquery = "SELECT * WHERE { \n" + "<" + subject + "> " + "?p ?o. \n" + + "FILTER (REGEX(str(?o), 'http://dbpedia.org/resource/')).\n" + + "FILTER (!REGEX(str(?p), 'http://www.w3.org/2004/02/skos'))\n" + "}"; - this.fromRelated.addAll(sparqltasks.queryAsSet(SPARQLquery, "o")); + this.fromRelated.addAll(sparqltasks.queryAsSet(SPARQLquery, "o")); - }*/ + }*/ + /* public void makeNegativeExamplesFromNearbyClasses(SortedSet<String> positiveSet, int sparqlResultLimit) { SortedSet<String> classes = new TreeSet<String>(); Iterator<String> instanceIter = positiveSet.iterator(); @@ -220,54 +255,8 @@ this.fromNearbyClasses.removeAll(fullPositiveSet); } - /** - * makes negEx from classes, the posEx belong to. - * Gets all Classes from PosEx, gets Instances from these Classes, returns all - * - * @param positiveSet - * @param sparqlResultLimit */ - public void makeNegativeExamplesFromParallelClasses(SortedSet<String> positiveSet, int sparqlResultLimit) { - makeNegativeExamplesFromClassesOfInstances(positiveSet, sparqlResultLimit); - } - private void makeNegativeExamplesFromClassesOfInstances(SortedSet<String> positiveSet, - int sparqlResultLimit) { - logger.debug("making neg Examples from parallel classes"); - SortedSet<String> classes = new TreeSet<String>(); - // superClasses.add(concept.replace("\"", "")); - // logger.debug("before"+superClasses); - // superClasses = dbpediaGetSuperClasses( superClasses, 4); - // logger.debug("getting negExamples from "+superClasses.size()+" - // superclasses"); - - for (String instance : positiveSet) { - try { - classes.addAll(sparqltasks.getClassesForInstance(instance, sparqlResultLimit)); - } catch (Exception e) { - e.printStackTrace(); - logger.warn("ignoring SPARQLQuery failure, see log/sparql.txt"); - } - } - logger.debug("getting negExamples from " + classes.size() + " parallel classes"); - for (String oneClass : classes) { - logger.debug(oneClass); - // rsc = new - // JenaResultSetConvenience(queryConcept("\""+oneClass+"\"",limit)); - try { - this.fromParallelClasses.addAll(sparqltasks.retrieveInstancesForClassDescription("\"" + oneClass - + "\"", sparqlResultLimit)); - } catch (Exception e) { - logger.warn("ignoring SPARQLQuery failure, see log/sparql.txt"); - } - } - - fromParallelClasses.removeAll(fullPositiveSet); - logger.debug("|-neg Example size from parallelclass: " + fromParallelClasses.size()); - - } - - /** * it gets the first class of an arbitrary instance and queries the superclasses of it, * could be more elaborate. @@ -276,7 +265,7 @@ * @param positiveSet * @param sparqlResultSetLimit */ - public void makeNegativeExamplesFromSuperClassesOfInstances(SortedSet<String> positiveSet, + /* public void makeNegativeExamplesFromSuperClassesOfInstances(SortedSet<String> positiveSet, int sparqlResultSetLimit) { SortedSet<String> classes = new TreeSet<String>(); Iterator<String> instanceIter = positiveSet.iterator(); @@ -291,13 +280,14 @@ public void makeNegativeExamplesFromSuperClasses(String concept, int sparqlResultSetLimit) { makeNegativeExamplesFromSuperClasses(concept, sparqlResultSetLimit, 2); } - + */ /** * if pos ex derive from one class, then neg ex are taken from a superclass * * @param concept * @param sparqlResultSetLimit */ + /* public void makeNegativeExamplesFromSuperClasses(String concept, int sparqlResultSetLimit, int depth) { concept = concept.replaceAll("\"", ""); @@ -331,4 +321,5 @@ fromRange.removeAll(fullPositiveSet); logger.debug("|-neg Example size from Range: " + fromRange.size()); } + */ } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <dc...@us...> - 2012-07-07 11:36:18
|
Revision: 3776 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3776&view=rev Author: dcherix Date: 2012-07-07 11:36:11 +0000 (Sat, 07 Jul 2012) Log Message: ----------- Created new dbpedia learn script for the simple sparql component Modified Paths: -------------- trunk/scripts/src/main/java/org/dllearner/scripts/improveWikipedia/DBpediaClassLearnerCELOE.java Added Paths: ----------- trunk/scripts/src/main/java/org/dllearner/scripts/improveWikipedia/NewSparqlCompDBpediaClassLearnerCELOE.java Modified: trunk/scripts/src/main/java/org/dllearner/scripts/improveWikipedia/DBpediaClassLearnerCELOE.java =================================================================== --- trunk/scripts/src/main/java/org/dllearner/scripts/improveWikipedia/DBpediaClassLearnerCELOE.java 2012-07-07 11:34:48 UTC (rev 3775) +++ trunk/scripts/src/main/java/org/dllearner/scripts/improveWikipedia/DBpediaClassLearnerCELOE.java 2012-07-07 11:36:11 UTC (rev 3776) @@ -53,6 +53,7 @@ import org.dllearner.reasoning.FastInstanceChecker; import org.dllearner.refinementoperators.RhoDRDown; import org.dllearner.utilities.Helper; +import org.dllearner.utilities.JamonMonitorLogger; import org.dllearner.utilities.datastructures.Datastructures; import org.dllearner.utilities.datastructures.SetManipulation; import org.dllearner.utilities.datastructures.SortedSetTuple; @@ -66,6 +67,8 @@ import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.rdf.model.ModelFactory; +import com.jamonapi.Monitor; +import com.jamonapi.MonitorFactory; /** * A script, which learns definitions / super classes of classes in the DBpedia @@ -101,19 +104,26 @@ public static void main(String args[]) throws LearningProblemUnsupportedException, IOException, Exception { - - DBpediaClassLearnerCELOE dcl = new DBpediaClassLearnerCELOE(); - Set<String> classesToLearn = dcl.getClasses(); - KB kb = dcl.learnAllClasses(classesToLearn); - kb.export(new File("result.owl"), OntologyFormat.RDF_XML); - // Set<String> pos = dcl.getPosEx("http://dbpedia.org/ontology/Person"); - // dcl.getNegEx("http://dbpedia.org/ontology/Person", pos); + for (int i = 0; i < 4; i++) { + DBpediaClassLearnerCELOE dcl = new DBpediaClassLearnerCELOE(); + Set<String> classesToLearn = dcl.getClasses(); + Monitor mon = MonitorFactory.start("Learn DBpedia"); + KB kb = dcl.learnAllClasses(classesToLearn); + mon.stop(); + kb.export(new File("/home/dcherix/dllearner/old/result.owl"), + OntologyFormat.RDF_XML); + // Set<String> pos = + // dcl.getPosEx("http://dbpedia.org/ontology/Person"); + // dcl.getNegEx("http://dbpedia.org/ontology/Person", pos); + logger.info("Test " + i + ":\n" + + JamonMonitorLogger.getStringForAllSortedByLabel()); + } } public KB learnAllClasses(Set<String> classesToLearn) { KB kb = new KB(); for (String classToLearn : classesToLearn) { - System.out.println(classToLearn); + logger.info("Leraning class: " + classToLearn); try { Description d = learnClass(classToLearn); if (d == null @@ -124,9 +134,10 @@ } kb.addAxiom(new EquivalentClassesAxiom(new NamedClass( classToLearn), d)); - kb.export(new File("result_partial.owl"), + kb.export(new File( + "/home/dcherix/dllearner/old/result_partial.owl"), OntologyFormat.RDF_XML); - System.out.println("DESCRIPTION: " + d); + } catch (Exception e) { logger.warn("", e); } @@ -175,7 +186,7 @@ ks.setObjList(new HashSet<String>(Arrays.asList(new String[] { "http://dbpedia.org/class/yago/", - "" + "http://dbpedia.org/resource/Category:" }))); + "http://dbpedia.org/resource/Category:" }))); ks.init(); @@ -193,7 +204,7 @@ CELOE la = cm.learningAlgorithm(CELOE.class, lp, rc); // CELOEConfigurator cc = la.getConfigurator(); la.setMaxExecutionTimeInSeconds(100); - + la.init(); RhoDRDown op = (RhoDRDown) la.getOperator(); op.setUseNegation(false); @@ -323,7 +334,6 @@ */ String targetClass = getParallelClass(clazz); - System.out.println(targetClass); logger.info("using class for negatives: " + targetClass); if (targetClass != null) { SparqlTemplate st = SparqlTemplate Added: trunk/scripts/src/main/java/org/dllearner/scripts/improveWikipedia/NewSparqlCompDBpediaClassLearnerCELOE.java =================================================================== --- trunk/scripts/src/main/java/org/dllearner/scripts/improveWikipedia/NewSparqlCompDBpediaClassLearnerCELOE.java (rev 0) +++ trunk/scripts/src/main/java/org/dllearner/scripts/improveWikipedia/NewSparqlCompDBpediaClassLearnerCELOE.java 2012-07-07 11:36:11 UTC (rev 3776) @@ -0,0 +1,439 @@ +/** + * Copyright (C) 2007-2011, Jens Lehmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ +package org.dllearner.scripts.improveWikipedia; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; + +import org.aksw.commons.sparql.core.SparqlTemplate; +import org.apache.velocity.VelocityContext; +import org.dllearner.algorithms.celoe.CELOE; +import org.dllearner.algorithms.fuzzydll.FuzzyCELOE; +import org.dllearner.core.AbstractReasonerComponent; +import org.dllearner.core.ComponentManager; +import org.dllearner.core.LearningProblemUnsupportedException; +import org.dllearner.core.OntologyFormat; +import org.dllearner.core.owl.Description; +import org.dllearner.core.owl.EquivalentClassesAxiom; +import org.dllearner.core.owl.Individual; +import org.dllearner.core.owl.KB; +import org.dllearner.core.owl.NamedClass; +import org.dllearner.core.owl.Thing; +import org.dllearner.gui.Config; +import org.dllearner.gui.ConfigSave; +import org.dllearner.kb.sparql.SparqlEndpoint; +import org.dllearner.kb.sparql.simple.SparqlSimpleExtractor; +import org.dllearner.learningproblems.PosNegLPStandard; +import org.dllearner.reasoning.FastInstanceChecker; +import org.dllearner.refinementoperators.RhoDRDown; +import org.dllearner.utilities.Helper; +import org.dllearner.utilities.JamonMonitorLogger; +import org.dllearner.utilities.datastructures.Datastructures; +import org.dllearner.utilities.datastructures.SetManipulation; +import org.dllearner.utilities.datastructures.SortedSetTuple; + +import com.hp.hpl.jena.ontology.OntClass; +import com.hp.hpl.jena.ontology.OntModel; +import com.hp.hpl.jena.query.Query; +import com.hp.hpl.jena.query.QueryExecution; +import com.hp.hpl.jena.query.QueryExecutionFactory; +import com.hp.hpl.jena.query.QueryFactory; +import com.hp.hpl.jena.query.QuerySolution; +import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.rdf.model.ModelFactory; +import com.jamonapi.Monitor; +import com.jamonapi.MonitorFactory; + +/** + * A script, which learns definitions / super classes of classes in the DBpedia + * ontology. + * + * TODO: This script made heavy use of aksw-commons-sparql-scala and needs to be + * rewritten to use aksw-commons-sparql (the new SPARQL API). + * + * @author Jens Lehmann + * @author Sebastian Hellmann + * @author Didier Cherix + */ +public class NewSparqlCompDBpediaClassLearnerCELOE { + + public static String endpointurl = "http://live.dbpedia.org/sparql"; + public static int examplesize = 30; + + private static org.apache.log4j.Logger logger = org.apache.log4j.Logger + .getLogger(NewSparqlCompDBpediaClassLearnerCELOE.class); + + SparqlEndpoint sparqlEndpoint = null; + + public NewSparqlCompDBpediaClassLearnerCELOE() { + // OPTIONAL: if you want to do some case distinctions in the learnClass + // method, you could add + // parameters to the constructure e.g. YAGO_ + try { + sparqlEndpoint = new SparqlEndpoint(new URL(endpointurl)); + } catch (MalformedURLException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + + public static void main(String args[]) + throws LearningProblemUnsupportedException, IOException, Exception { + for (int i = 0; i < 10; i++) { + NewSparqlCompDBpediaClassLearnerCELOE dcl = new NewSparqlCompDBpediaClassLearnerCELOE(); + Set<String> classesToLearn = dcl.getClasses(); + + Monitor mon = MonitorFactory.start("Learn DBpedia"); + KB kb = dcl.learnAllClasses(classesToLearn); + mon.stop(); + kb.export(new File("/home/dcherix/dllearner/simple/result" + i + + ".owl"), OntologyFormat.RDF_XML); + // Set<String> pos = + // dcl.getPosEx("http://dbpedia.org/ontology/Person"); + // dcl.getNegEx("http://dbpedia.org/ontology/Person", pos); + logger.info("Test" + i + ":\n" + + JamonMonitorLogger.getStringForAllSortedByLabel()); + System.out.println(JamonMonitorLogger + .getStringForAllSortedByLabel()); + } + } + + public KB learnAllClasses(Set<String> classesToLearn) { + KB kb = new KB(); + for (String classToLearn : classesToLearn) { + logger.info(classToLearn); + System.out.println(classToLearn); + try { + Description d = learnClass(classToLearn); + if (d == null + || d.toKBSyntaxString().equals( + new Thing().toKBSyntaxString())) { + logger.error("Description was " + d + ", continueing"); + continue; + } + kb.addAxiom(new EquivalentClassesAxiom(new NamedClass( + classToLearn), d)); + kb.export(new File( + "/home/dcherix/dllearner/simple/result_partial.owl"), + OntologyFormat.RDF_XML); + + } catch (Exception e) { + logger.warn("", e); + } + } + + return kb; + } + + public Description learnClass(String classToLearn) throws Exception { + // TODO: use aksw-commons-sparql instead of sparql-scala + SortedSet<String> posEx = new TreeSet<String>(getPosEx(classToLearn)); + logger.info("Found " + posEx.size() + " positive examples"); + if (posEx.isEmpty()) { + return null; + } + SortedSet<String> negEx = new TreeSet<String>(getNegEx(classToLearn, + posEx)); + + posEx = SetManipulation.fuzzyShrink(posEx, examplesize); + negEx = SetManipulation.fuzzyShrink(negEx, examplesize); + + SortedSet<Individual> posExamples = Helper.getIndividualSet(posEx); + SortedSet<Individual> negExamples = Helper.getIndividualSet(negEx); + SortedSetTuple<Individual> examples = new SortedSetTuple<Individual>( + posExamples, negExamples); + + ComponentManager cm = ComponentManager.getInstance(); + + SparqlSimpleExtractor ks = cm + .knowledgeSource(SparqlSimpleExtractor.class); + ks.setInstances(new ArrayList<String>(Datastructures + .individualSetToStringSet(examples.getCompleteSet()))); + // ks.getConfigurator().setPredefinedEndpoint("DBPEDIA"); // TODO: + // probably the official endpoint is too slow? + ks.setEndpointURL(endpointurl); + // ks.setUseLits(false); + // ks.setUseCacheDatabase(true); + ks.setRecursionDepth(1); + ArrayList<String> ontologyUrls = new ArrayList<String>(); + ontologyUrls.add("http://downloads.dbpedia.org/3.6/dbpedia_3.6.owl"); + ks.setOntologySchemaUrls(ontologyUrls); + ks.setAboxfilter("FILTER (!regex(str(?p), '^http://dbpedia.org/property/wikiPageUsesTemplate') && " + + "!regex(str(?p), '^http://dbpedia.org/ontology/wikiPageExternalLink') && " + + "!regex(str(?p), '^http://dbpedia.org/property/wordnet_type') && " + + "!regex(str(?p), '^http://www.w3.org/2002/07/owl#sameAs')) ."); + ks.setTboxfilter("FILTER ( !regex(str(?class), '^http://dbpedia.org/class/yago/') && " + + "!regex(str(?class), '^http://dbpedia.org/resource/Category:')) "); + // ks.setCloseAfterRecursion(true); + // ks.setSaveExtractedFragment(true); + // ks.setPredList(new HashSet<String>(Arrays.asList(new String[] { + // "http://dbpedia.org/property/wikiPageUsesTemplate",(!regex(str(?p), + // '^http://dbpedia.org/resource/') && ! regex(str(?o), + // '^http://dbpedia.org/resource/Category') ) + // "http://dbpedia.org/ontology/wikiPageExternalLink", + // "http://dbpedia.org/property/wordnet_type", + // "http://www.w3.org/2002/07/owl#sameAs" }))); + + // ks.setObjList(new HashSet<String>(Arrays.asList(new String[] { + // "http://dbpedia.org/class/yago/", + // "" + "http://dbpedia.org/resource/Category:" }))); + + ks.init(); + + AbstractReasonerComponent rc = cm.reasoner(FastInstanceChecker.class, + ks); + rc.init(); + + PosNegLPStandard lp = cm.learningProblem(PosNegLPStandard.class, rc); + lp.setPositiveExamples(posExamples); + lp.setNegativeExamples(negExamples); + lp.setAccuracyMethod("fmeasure"); + lp.setUseApproximations(false); + lp.init(); + CELOE la = cm.learningAlgorithm(CELOE.class, lp, rc); + // CELOEConfigurator cc = la.getConfigurator(); + la.setMaxExecutionTimeInSeconds(100); + la.init(); + RhoDRDown op = (RhoDRDown) la.getOperator(); + + op.setUseNegation(false); + op.setUseAllConstructor(false); + op.setUseCardinalityRestrictions(false); + op.setUseHasValueConstructor(true); + + la.setNoisePercentage(20); + la.setIgnoredConcepts(new HashSet<NamedClass>(Arrays + .asList(new NamedClass[] { new NamedClass(classToLearn) }))); + la.init(); + + // to write the above configuration in a conf file (optional) + Config cf = new Config(cm, ks, rc, lp, la); + // new ConfigSave(cf).saveFile(new File("/dev/null")); + + la.start(); + + cm.freeAllComponents(); + return la.getCurrentlyBestDescription(); + } + + public Set<String> getClasses() throws Exception { + OntModel model = ModelFactory.createOntologyModel(); + model.read(new FileInputStream( + "/home/dcherix/Downloads/dbpedia_3.6.owl"), null); + Set<OntClass> classes = model.listClasses().toSet(); + Set<String> results = new HashSet<String>(); + int i = 0; + for (OntClass ontClass : classes) { + results.add(ontClass.getURI()); + i++; + if (i == 100) + break; + } + System.out.println(results.size()); + return results; + } + + // gets all DBpedia Classes + // public Set<String> getClasses() throws Exception { + // SparqlTemplate st = SparqlTemplate.getInstance("allClasses.vm"); + // st.setLimit(0); + // st.addFilter(sparqlEndpoint.like("classes", new + // HashSet<String>(Arrays.asList(new + // String[]{"http://dbpedia.org/ontology/"})))); + // VelocityContext vc = st.putSgetVelocityContext(); + // String query = st.getQuery(); + // return new + // HashSet<String>(ResultSetRenderer.asStringSet(sparqlEndpoint.executeSelect(query))); + // } + // + public Set<String> getPosEx(String clazz) throws Exception { + SparqlTemplate st = SparqlTemplate.getInstance("instancesOfClass.vm"); + st.setLimit(0); + VelocityContext vc = st.getVelocityContext(); + vc.put("class", clazz); + String queryString = st.getQuery(); + return this.executeResourceQuery(queryString); + } + + /** + * gets all direct classes of all instances and has a look, what the most + * common is + * + * @param clazz + * @param posEx + * @return + * @throws Exception + */ + public String selectClass(String clazz, Set<String> posEx) throws Exception { + Map<String, Integer> m = new HashMap<String, Integer>(); + // TODO: use aksw-commons-sparql instead of sparql-scala + /* + * for (String pos : posEx) { SparqlTemplate st = + * SparqlTemplate.getInstance("directClassesOfInstance.vm"); + * st.setLimit(0); st.addFilter(sparqlEndpoint.like("direct", new + * HashSet<String>(Arrays.asList(new + * String[]{"http://dbpedia.org/ontology/"})))); VelocityContext vc = + * st.getVelocityContext(); vc.put("instance", pos); String query = + * st.getQuery(); Set<String> classes = new + * HashSet<String>(ResultSetRenderer + * .asStringSet(sparqlEndpoint.executeSelect(query))); + * classes.remove(clazz); for (String s : classes) { if (m.get(s) == + * null) { m.put(s, 0); } m.put(s, m.get(s).intValue() + 1); } } + */ + + int max = 0; + String maxClass = ""; + for (String key : m.keySet()) { + if (m.get(key).intValue() > max) { + maxClass = key; + } + } + + return maxClass; + } + + /** + * gets instances of a class or random instances + * + * @param clazz + * @param posEx + * @return + * @throws Exception + */ + + public Set<String> getNegEx(String clazz, Set<String> posEx) + throws Exception { + Set<String> negEx = new HashSet<String>(); + // TODO: use aksw-commons-sparql instead of sparql-scala + /* + * String targetClass = getParallelClass(clazz); + * logger.info("using class for negatives: " + targetClass); if + * (targetClass != null) { + * + * SparqlTemplate st = + * SparqlTemplate.getInstance("instancesOfClass.vm"); st.setLimit(0); + * VelocityContext vc = st.getVelocityContext(); vc.put("class", + * targetClass); // st.addFilter(sparqlEndpoint.like("class", new + * HashSet<String>(Arrays.asList(new + * String[]{"http://dbpedia.org/ontology/"})))); String query = + * st.getQuery(); // negEx.addAll(new + * HashSet<String>(ResultSetRenderer.asStringSet + * (sparqlEndpoint.executeSelect(query)))); } else { + * + * SparqlTemplate st = SparqlTemplate.getInstance("someInstances.vm"); + * st.setLimit(posEx.size() + 100); VelocityContext vc = + * st.getVelocityContext(); String query = st.getQuery(); // + * negEx.addAll(new + * HashSet<String>(ResultSetRenderer.asStringSet(sparqlEndpoint + * .executeSelect(query)))); } negEx.removeAll(posEx); + */ + + String targetClass = getParallelClass(clazz); + logger.info("using class for negatives: " + targetClass); + if (targetClass != null) { + SparqlTemplate st = SparqlTemplate + .getInstance("instancesOfClass.vm"); + st.setLimit(0); + VelocityContext vc = st.getVelocityContext(); + vc.put("class", targetClass); + st.addFilter("FILTER ( ?class LIKE (<http://dbpedia.org/ontology/%>"); + + String query = st.getQuery(); + negEx.addAll(this.executeResourceQuery(query)); + } else { + SparqlTemplate st = SparqlTemplate.getInstance("someInstances.vm"); + st.setLimit(posEx.size() + 100); + VelocityContext vc = st.getVelocityContext(); + String query = st.getQuery(); + negEx.addAll(this.executeResourceQuery(query)); + } + negEx.removeAll(posEx); + return negEx; + + } + + public String getParallelClass(String clazz) throws Exception { + // TODO: use aksw-commons-sparql instead of sparql-scala + // SparqlTemplate st = SparqlTemplate.getInstance("parallelClass.vm"); + // st.setLimit(0); + // VelocityContext vc = st.getVelocityContext(); + // vc.put("class", clazz); + // String query = st.getQuery(); + // Set<String> parClasses = new + // HashSet<String>(ResultSetRenderer.asStringSet(sparqlEndpoint.executeSelect(query))); + // for (String s : parClasses) { + // return s; + // } + SparqlTemplate st = SparqlTemplate.getInstance("parallelClass.vm"); + st.setLimit(0); + VelocityContext vc = st.getVelocityContext(); + vc.put("class", clazz); + String query = st.getQuery(); + Set<String> parClasses = this.executeClassQuery(query); + for (String s : parClasses) { + if (s.startsWith("http://dbpedia.org/ontology")) { + if (!s.endsWith("Unknown")) { + return s; + } + } + } + return null; + } + + public Set<String> executeResourceQuery(String queryString) { + Query query = QueryFactory.create(queryString); + QueryExecution qexec = QueryExecutionFactory.sparqlService(endpointurl, + query); + ResultSet resultSet = qexec.execSelect(); + QuerySolution solution; + Set<String> results = new HashSet<String>(); + while (resultSet.hasNext()) { + solution = resultSet.next(); + results.add(solution.getResource("instances").getURI()); + } + return results; + } + + public Set<String> executeClassQuery(String queryString) { + Query query = QueryFactory.create(queryString); + QueryExecution qexec = QueryExecutionFactory.sparqlService(endpointurl, + query); + ResultSet resultSet = qexec.execSelect(); + QuerySolution solution; + Set<String> results = new HashSet<String>(); + while (resultSet.hasNext()) { + solution = resultSet.next(); + results.add(solution.getResource("sub").getURI()); + } + return results; + } + +} Property changes on: trunk/scripts/src/main/java/org/dllearner/scripts/improveWikipedia/NewSparqlCompDBpediaClassLearnerCELOE.java ___________________________________________________________________ Added: svn:mime-type + text/plain This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <dc...@us...> - 2013-03-02 09:41:38
|
Revision: 3908 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3908&view=rev Author: dcherix Date: 2013-03-02 09:41:31 +0000 (Sat, 02 Mar 2013) Log Message: ----------- Removed magic strings from scripts Modified Paths: -------------- trunk/scripts/src/main/java/org/dllearner/scripts/improveWikipedia/DBpediaClassLearnerCELOE.java trunk/scripts/src/main/java/org/dllearner/scripts/improveWikipedia/NewSparqlCompDBpediaClassLearnerCELOE.java Modified: trunk/scripts/src/main/java/org/dllearner/scripts/improveWikipedia/DBpediaClassLearnerCELOE.java =================================================================== --- trunk/scripts/src/main/java/org/dllearner/scripts/improveWikipedia/DBpediaClassLearnerCELOE.java 2013-02-27 15:25:49 UTC (rev 3907) +++ trunk/scripts/src/main/java/org/dllearner/scripts/improveWikipedia/DBpediaClassLearnerCELOE.java 2013-03-02 09:41:31 UTC (rev 3908) @@ -453,4 +453,4 @@ } } -} +} \ No newline at end of file Modified: trunk/scripts/src/main/java/org/dllearner/scripts/improveWikipedia/NewSparqlCompDBpediaClassLearnerCELOE.java =================================================================== --- trunk/scripts/src/main/java/org/dllearner/scripts/improveWikipedia/NewSparqlCompDBpediaClassLearnerCELOE.java 2013-02-27 15:25:49 UTC (rev 3907) +++ trunk/scripts/src/main/java/org/dllearner/scripts/improveWikipedia/NewSparqlCompDBpediaClassLearnerCELOE.java 2013-03-02 09:41:31 UTC (rev 3908) @@ -433,4 +433,4 @@ return results; } -} +} \ No newline at end of file This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <dc...@us...> - 2013-03-12 09:29:07
|
Revision: 3911 http://sourceforge.net/p/dl-learner/code/3911 Author: dcherix Date: 2013-03-12 09:29:04 +0000 (Tue, 12 Mar 2013) Log Message: ----------- statics paths removed Modified Paths: -------------- trunk/scripts/src/main/java/org/dllearner/scripts/improveWikipedia/DBpediaClassLearnerCELOE.java trunk/scripts/src/main/java/org/dllearner/scripts/improveWikipedia/NewSparqlCompDBpediaClassLearnerCELOE.java Modified: trunk/scripts/src/main/java/org/dllearner/scripts/improveWikipedia/DBpediaClassLearnerCELOE.java =================================================================== --- trunk/scripts/src/main/java/org/dllearner/scripts/improveWikipedia/DBpediaClassLearnerCELOE.java 2013-03-04 15:00:36 UTC (rev 3910) +++ trunk/scripts/src/main/java/org/dllearner/scripts/improveWikipedia/DBpediaClassLearnerCELOE.java 2013-03-12 09:29:04 UTC (rev 3911) @@ -88,369 +88,387 @@ * @author Sebastian Hellmann */ public class DBpediaClassLearnerCELOE { - - public static String endpointurl = "http://live.dbpedia.org/sparql"; - public static int examplesize = 30; - - private static org.apache.log4j.Logger logger = org.apache.log4j.Logger - .getLogger(DBpediaClassLearnerCELOE.class); - - SparqlEndpoint sparqlEndpoint = null; - private Cache cache; - - public DBpediaClassLearnerCELOE() { - // OPTIONAL: if you want to do some case distinctions in the learnClass - // method, you could add - // parameters to the constructure e.g. YAGO_ - try { - sparqlEndpoint = new SparqlEndpoint(new URL(endpointurl)); - } catch (MalformedURLException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - cache = new Cache("basCache"); - } - - public static void main(String args[]) - throws LearningProblemUnsupportedException, IOException, Exception { - for (int i = 0; i < 4; i++) { - DBpediaClassLearnerCELOE dcl = new DBpediaClassLearnerCELOE(); - Set<String> classesToLearn = dcl.getClasses(); - Monitor mon = MonitorFactory.start("Learn DBpedia"); - KB kb = dcl.learnAllClasses(classesToLearn); - mon.stop(); - kb.export(new File("/home/dcherix/dllearner/old/result"+i+".owl"), - OntologyFormat.RDF_XML); - // Set<String> pos = - // dcl.getPosEx("http://dbpedia.org/ontology/Person"); - // dcl.getNegEx("http://dbpedia.org/ontology/Person", pos); - logger.info("Test" + i + ":\n" - + JamonMonitorLogger.getStringForAllSortedByLabel()); - System.gc(); - } - } - - public KB learnAllClasses(Set<String> classesToLearn) { - KB kb = new KB(); - for (String classToLearn : classesToLearn) { - logger.info("Leanring class: " + classToLearn); - try { - Description d = learnClass(classToLearn); - if (d == null - || d.toKBSyntaxString().equals( - new Thing().toKBSyntaxString())) { - logger.error("Description was " + d + ", continueing"); - continue; - } - kb.addAxiom(new EquivalentClassesAxiom(new NamedClass( - classToLearn), d)); - kb.export(new File( - "/home/dcherix/dllearner/old/result_partial.owl"), - OntologyFormat.RDF_XML); - - } catch (Exception e) { - logger.warn("", e); - } - this.dropCache(); - } - - return kb; - } - - public Description learnClass(String classToLearn) throws Exception { - // TODO: use aksw-commons-sparql instead of sparql-scala - SortedSet<String> posEx = new TreeSet<String>(getPosEx(classToLearn)); - logger.info("Found " + posEx.size() + " positive examples"); - if (posEx.isEmpty()) { - return null; - } - SortedSet<String> negEx = new TreeSet<String>(getNegEx(classToLearn, - posEx)); - - posEx = SetManipulation.fuzzyShrink(posEx, examplesize); - negEx = SetManipulation.fuzzyShrink(negEx, examplesize); - - SortedSet<Individual> posExamples = Helper.getIndividualSet(posEx); - SortedSet<Individual> negExamples = Helper.getIndividualSet(negEx); - SortedSetTuple<Individual> examples = new SortedSetTuple<Individual>( - posExamples, negExamples); - - ComponentManager cm = ComponentManager.getInstance(); - - SparqlKnowledgeSource ks = cm - .knowledgeSource(SparqlKnowledgeSource.class); - ks.setInstances(Datastructures.individualSetToStringSet(examples - .getCompleteSet())); - // ks.getConfigurator().setPredefinedEndpoint("DBPEDIA"); // TODO: - // probably the official endpoint is too slow? - ks.setUrl(new URL(endpointurl)); - ks.setUseLits(false); - ks.setUseCacheDatabase(true); - ks.setUseCache(true); - ks.setRecursionDepth(1); - ks.setCloseAfterRecursion(true); - ks.setSaveExtractedFragment(true); - ks.setPredList(new HashSet<String>(Arrays.asList(new String[] { - "http://dbpedia.org/property/wikiPageUsesTemplate", - "http://dbpedia.org/ontology/wikiPageExternalLink", - "http://dbpedia.org/property/wordnet_type", - "http://www.w3.org/2002/07/owl#sameAs" }))); - - ks.setObjList(new HashSet<String>(Arrays.asList(new String[] { - "http://dbpedia.org/class/yago/", - "http://dbpedia.org/resource/Category:" }))); - - ks.init(); - - AbstractReasonerComponent rc = cm.reasoner(FastInstanceChecker.class, - ks); - rc.init(); - - PosNegLPStandard lp = cm.learningProblem(PosNegLPStandard.class, rc); - lp.setPositiveExamples(posExamples); - lp.setNegativeExamples(negExamples); - lp.setAccuracyMethod("fmeasure"); - lp.setUseApproximations(false); - lp.init(); - - CELOE la = cm.learningAlgorithm(CELOE.class, lp, rc); - // CELOEConfigurator cc = la.getConfigurator(); - la.setMaxExecutionTimeInSeconds(100); - la.init(); - RhoDRDown op = (RhoDRDown) la.getOperator(); - - op.setUseNegation(false); - op.setUseAllConstructor(false); - op.setUseCardinalityRestrictions(false); - op.setUseHasValueConstructor(true); - la.setNoisePercentage(20); - la.setIgnoredConcepts(new HashSet<NamedClass>(Arrays - .asList(new NamedClass[] { new NamedClass(classToLearn) }))); - la.init(); - - // to write the above configuration in a conf file (optional) - Config cf = new Config(cm, ks, rc, lp, la); - new ConfigSave(cf).saveFile(new File("/dev/null")); - - la.start(); - - cm.freeAllComponents(); - return la.getCurrentlyBestDescription(); - } - - public Set<String> getClasses() throws Exception { - OntModel model = ModelFactory.createOntologyModel(); - model.read(new FileInputStream( - "/home/dcherix/Downloads/dbpedia_3.6.owl"), null); - Set<OntClass> classes = model.listClasses().toSet(); - Set<String> results = new HashSet<String>(); - for (OntClass ontClass : classes) { - results.add(ontClass.getURI()); - } - return results; - } - - // gets all DBpedia Classes - // public Set<String> getClasses() throws Exception { - // SparqlTemplate st = SparqlTemplate.getInstance("allClasses.vm"); - // st.setLimit(0); - // st.addFilter(sparqlEndpoint.like("classes", new - // HashSet<String>(Arrays.asList(new - // String[]{"http://dbpedia.org/ontology/"})))); - // VelocityContext vc = st.putSgetVelocityContext(); - // String query = st.getQuery(); - // return new - // HashSet<String>(ResultSetRenderer.asStringSet(sparqlEndpoint.executeSelect(query))); - // } - // - public Set<String> getPosEx(String clazz) throws Exception { -// SparqlTemplate st = SparqlTemplate.getInstance("instancesOfClass.vm"); -// st.setLimit(0); -// VelocityContext vc = st.getVelocityContext(); -// vc.put("class", clazz); -// String queryString = st.getQuery(); - StringBuilder queryString = new StringBuilder(); - queryString.append("PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>"); - queryString.append(" SELECT ?instances WHERE { ?instances rdf:type <"); - queryString.append(clazz); - queryString.append("> }"); - System.out.println(queryString); - return this.executeResourceQuery(queryString.toString()); - } - - /** - * gets all direct classes of all instances and has a look, what the most - * common is - * - * @param clazz - * @param posEx - * @return - * @throws Exception - */ - public String selectClass(String clazz, Set<String> posEx) throws Exception { - Map<String, Integer> m = new HashMap<String, Integer>(); - // TODO: use aksw-commons-sparql instead of sparql-scala - /* - * for (String pos : posEx) { SparqlTemplate st = - * SparqlTemplate.getInstance("directClassesOfInstance.vm"); - * st.setLimit(0); st.addFilter(sparqlEndpoint.like("direct", new - * HashSet<String>(Arrays.asList(new - * String[]{"http://dbpedia.org/ontology/"})))); VelocityContext vc = - * st.getVelocityContext(); vc.put("instance", pos); String query = - * st.getQuery(); Set<String> classes = new - * HashSet<String>(ResultSetRenderer - * .asStringSet(sparqlEndpoint.executeSelect(query))); - * classes.remove(clazz); for (String s : classes) { if (m.get(s) == - * null) { m.put(s, 0); } m.put(s, m.get(s).intValue() + 1); } } - */ - - int max = 0; - String maxClass = ""; - for (String key : m.keySet()) { - if (m.get(key).intValue() > max) { - maxClass = key; - } - } - - return maxClass; - } - - /** - * gets instances of a class or random instances - * - * @param clazz - * @param posEx - * @return - * @throws Exception - */ - - public Set<String> getNegEx(String clazz, Set<String> posEx) - throws Exception { - Set<String> negEx = new HashSet<String>(); - // TODO: use aksw-commons-sparql instead of sparql-scala - /* - * String targetClass = getParallelClass(clazz); - * logger.info("using class for negatives: " + targetClass); if - * (targetClass != null) { - * - * SparqlTemplate st = - * SparqlTemplate.getInstance("instancesOfClass.vm"); st.setLimit(0); - * VelocityContext vc = st.getVelocityContext(); vc.put("class", - * targetClass); // st.addFilter(sparqlEndpoint.like("class", new - * HashSet<String>(Arrays.asList(new - * String[]{"http://dbpedia.org/ontology/"})))); String query = - * st.getQuery(); // negEx.addAll(new - * HashSet<String>(ResultSetRenderer.asStringSet - * (sparqlEndpoint.executeSelect(query)))); } else { - * - * SparqlTemplate st = SparqlTemplate.getInstance("someInstances.vm"); - * st.setLimit(posEx.size() + 100); VelocityContext vc = - * st.getVelocityContext(); String query = st.getQuery(); // - * negEx.addAll(new - * HashSet<String>(ResultSetRenderer.asStringSet(sparqlEndpoint - * .executeSelect(query)))); } negEx.removeAll(posEx); - */ - - String targetClass = getParallelClass(clazz); - logger.info("using class for negatives: " + targetClass); - if (targetClass != null) { - SparqlTemplate st = SparqlTemplate - .getInstance("instancesOfClass2.vm"); - st.setLimit(0); - VelocityContext vc = st.getVelocityContext(); - vc.put("class", targetClass); - st.addFilter("FILTER ( ?class LIKE (<http://dbpedia.org/ontology/%>"); - - String query = st.getQuery(); - negEx.addAll(this.executeResourceQuery(query)); - } else { - SparqlTemplate st = SparqlTemplate.getInstance("someInstances.vm"); - st.setLimit(posEx.size() + 100); - VelocityContext vc = st.getVelocityContext(); - String query = st.getQuery(); - negEx.addAll(this.executeResourceQuery(query)); - } - negEx.removeAll(posEx); - return negEx; - - } - - public String getParallelClass(String clazz) throws Exception { - // TODO: use aksw-commons-sparql instead of sparql-scala - // SparqlTemplate st = SparqlTemplate.getInstance("parallelClass.vm"); - // st.setLimit(0); - // VelocityContext vc = st.getVelocityContext(); - // vc.put("class", clazz); - // String query = st.getQuery(); - // Set<String> parClasses = new - // HashSet<String>(ResultSetRenderer.asStringSet(sparqlEndpoint.executeSelect(query))); - // for (String s : parClasses) { - // return s; - // } - SparqlTemplate st = SparqlTemplate.getInstance("parallelClass.vm"); - st.setLimit(0); - VelocityContext vc = st.getVelocityContext(); - vc.put("class", clazz); - String query = st.getQuery(); - Set<String> parClasses = this.executeClassQuery(query); - for (String s : parClasses) { - if (s.startsWith("http://dbpedia.org/ontology")) { - if (!s.endsWith("Unknown")) { - return s; - } - } - } - return null; - } - - public Set<String> executeResourceQuery(String queryString) { -// Query query = QueryFactory.create(queryString); -// QueryExecution qexec = QueryExecutionFactory.sparqlService(endpointurl, -// query); -// ResultSet resultSet = qexec.execSelect(); - ResultSetRewindable resultSet = SparqlQuery.convertJSONtoResultSet(cache.executeSparqlQuery(new SparqlQuery(queryString,sparqlEndpoint))); - QuerySolution solution; - Set<String> results = new HashSet<String>(); - while (resultSet.hasNext()) { - solution = resultSet.next(); - results.add(solution.getResource("instances").getURI()); - } - return results; - } - - public Set<String> executeClassQuery(String queryString) { -// Query query = QueryFactory.create(queryString); -// QueryExecution qexec = QueryExecutionFactory.sparqlService(endpointurl, -// query); -// ResultSet resultSet = qexec.execSelect(); - ResultSetRewindable resultSet = SparqlQuery.convertJSONtoResultSet(cache.executeSparqlQuery(new SparqlQuery(queryString,sparqlEndpoint))); - QuerySolution solution; - Set<String> results = new HashSet<String>(); - while (resultSet.hasNext()) { - solution = resultSet.next(); - results.add(solution.getResource("sub").getURI()); - } - return results; - } - - private void dropCache(){ - try { - Class.forName("org.h2.Driver"); - String databaseName="extraction"; - String databaseDirectory="cache"; - Connection conn = DriverManager.getConnection("jdbc:h2:"+databaseDirectory+"/"+databaseName, "sa", ""); - Statement st = conn.createStatement(); - st.execute("DELETE FROM QUERY_CACHE"); - st.close(); - conn.close(); - System.gc(); - } catch (ClassNotFoundException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } catch (SQLException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - } - + + public static String endpointurl = "http://live.dbpedia.org/sparql"; + public static int examplesize = 30; + + private static org.apache.log4j.Logger logger = org.apache.log4j.Logger + .getLogger(DBpediaClassLearnerCELOE.class); + private static String output; + private static String input; + + SparqlEndpoint sparqlEndpoint = null; + private Cache cache; + + public DBpediaClassLearnerCELOE() { + // OPTIONAL: if you want to do some case distinctions in the learnClass + // method, you could add + // parameters to the constructure e.g. YAGO_ + try { + sparqlEndpoint = new SparqlEndpoint(new URL(endpointurl)); + } catch (MalformedURLException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + cache = new Cache("basCache"); + } + + public static void main(String args[]) throws LearningProblemUnsupportedException, IOException, + Exception { + if (args.length < 3) { + usage(); + return; + } + int iter; + try { + output = args[1]; + input = args[0]; + iter = Integer.parseInt(args[2]); + } catch (Exception e) { + usage(); + return; + } + for (int i = 0; i < iter; i++) { + DBpediaClassLearnerCELOE dcl = new DBpediaClassLearnerCELOE(); + Set<String> classesToLearn = dcl.getClasses(); + Monitor mon = MonitorFactory.start("Learn DBpedia"); + KB kb = dcl.learnAllClasses(classesToLearn); + mon.stop(); + kb.export(new File(output + "/result" + i + ".owl"), OntologyFormat.RDF_XML); + // Set<String> pos = + // dcl.getPosEx("http://dbpedia.org/ontology/Person"); + // dcl.getNegEx("http://dbpedia.org/ontology/Person", pos); + logger.info("Test" + i + ":\n" + JamonMonitorLogger.getStringForAllSortedByLabel()); + System.gc(); + } + } + + /** + * Show the required parameters for usage + */ + private static void usage() { + System.out.println("***************************************************************"); + System.out.println("* Usage: java DBpediaClassLearnerCELOE input output iteration *"); + System.out.println("* As input is the dbpedia schema as owl necessary *"); + System.out.println("* As output is a directory for the owl results file expected *"); + System.out.println("***************************************************************"); + } + + public KB learnAllClasses(Set<String> classesToLearn) { + KB kb = new KB(); + for (String classToLearn : classesToLearn) { + logger.info("Learning class: " + classToLearn); + try { + Description d = learnClass(classToLearn); + if (d == null || d.toKBSyntaxString().equals(new Thing().toKBSyntaxString())) { + logger.error("Description was " + d + ", continueing"); + continue; + } + kb.addAxiom(new EquivalentClassesAxiom(new NamedClass(classToLearn), d)); + kb.export(new File(output+"/result_partial.owl"), + OntologyFormat.RDF_XML); + + } catch (Exception e) { + logger.warn("", e); + } + this.dropCache(); + } + + return kb; + } + + public Description learnClass(String classToLearn) throws Exception { + // TODO: use aksw-commons-sparql instead of sparql-scala + SortedSet<String> posEx = new TreeSet<String>(getPosEx(classToLearn)); + logger.info("Found " + posEx.size() + " positive examples"); + if (posEx.isEmpty()) { + return null; + } + SortedSet<String> negEx = new TreeSet<String>(getNegEx(classToLearn, posEx)); + + posEx = SetManipulation.fuzzyShrink(posEx, examplesize); + negEx = SetManipulation.fuzzyShrink(negEx, examplesize); + + SortedSet<Individual> posExamples = Helper.getIndividualSet(posEx); + SortedSet<Individual> negExamples = Helper.getIndividualSet(negEx); + SortedSetTuple<Individual> examples = new SortedSetTuple<Individual>(posExamples, + negExamples); + + ComponentManager cm = ComponentManager.getInstance(); + + SparqlKnowledgeSource ks = cm.knowledgeSource(SparqlKnowledgeSource.class); + ks.setInstances(Datastructures.individualSetToStringSet(examples.getCompleteSet())); + // ks.getConfigurator().setPredefinedEndpoint("DBPEDIA"); // TODO: + // probably the official endpoint is too slow? + ks.setUrl(new URL(endpointurl)); + ks.setUseLits(false); + ks.setUseCacheDatabase(true); + ks.setUseCache(true); + ks.setRecursionDepth(1); + ks.setCloseAfterRecursion(true); + ks.setSaveExtractedFragment(true); + ks.setPredList(new HashSet<String>(Arrays + .asList(new String[] { "http://dbpedia.org/property/wikiPageUsesTemplate", + "http://dbpedia.org/ontology/wikiPageExternalLink", + "http://dbpedia.org/property/wordnet_type", + "http://www.w3.org/2002/07/owl#sameAs" }))); + + ks.setObjList(new HashSet<String>(Arrays.asList(new String[] { + "http://dbpedia.org/class/yago/", "http://dbpedia.org/resource/Category:" }))); + + ks.init(); + + AbstractReasonerComponent rc = cm.reasoner(FastInstanceChecker.class, ks); + rc.init(); + + PosNegLPStandard lp = cm.learningProblem(PosNegLPStandard.class, rc); + lp.setPositiveExamples(posExamples); + lp.setNegativeExamples(negExamples); + lp.setAccuracyMethod("fmeasure"); + lp.setUseApproximations(false); + lp.init(); + + CELOE la = cm.learningAlgorithm(CELOE.class, lp, rc); + // CELOEConfigurator cc = la.getConfigurator(); + la.setMaxExecutionTimeInSeconds(100); + la.init(); + RhoDRDown op = (RhoDRDown) la.getOperator(); + + op.setUseNegation(false); + op.setUseAllConstructor(false); + op.setUseCardinalityRestrictions(false); + op.setUseHasValueConstructor(true); + la.setNoisePercentage(20); + la.setIgnoredConcepts(new HashSet<NamedClass>(Arrays + .asList(new NamedClass[] { new NamedClass(classToLearn) }))); + la.init(); + + // to write the above configuration in a conf file (optional) + Config cf = new Config(cm, ks, rc, lp, la); + new ConfigSave(cf).saveFile(new File("/dev/null")); + + la.start(); + + cm.freeAllComponents(); + return la.getCurrentlyBestDescription(); + } + + public Set<String> getClasses() throws Exception { + OntModel model = ModelFactory.createOntologyModel(); + model.read(new FileInputStream(input), null); + Set<OntClass> classes = model.listClasses().toSet(); + Set<String> results = new HashSet<String>(); + for (OntClass ontClass : classes) { + results.add(ontClass.getURI()); + } + return results; + } + + // gets all DBpedia Classes + // public Set<String> getClasses() throws Exception { + // SparqlTemplate st = SparqlTemplate.getInstance("allClasses.vm"); + // st.setLimit(0); + // st.addFilter(sparqlEndpoint.like("classes", new + // HashSet<String>(Arrays.asList(new + // String[]{"http://dbpedia.org/ontology/"})))); + // VelocityContext vc = st.putSgetVelocityContext(); + // String query = st.getQuery(); + // return new + // HashSet<String>(ResultSetRenderer.asStringSet(sparqlEndpoint.executeSelect(query))); + // } + // + public Set<String> getPosEx(String clazz) throws Exception { + // SparqlTemplate st = + // SparqlTemplate.getInstance("instancesOfClass.vm"); + // st.setLimit(0); + // VelocityContext vc = st.getVelocityContext(); + // vc.put("class", clazz); + // String queryString = st.getQuery(); + StringBuilder queryString = new StringBuilder(); + queryString.append("PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>"); + queryString.append(" SELECT ?instances WHERE { ?instances rdf:type <"); + queryString.append(clazz); + queryString.append("> }"); + System.out.println(queryString); + return this.executeResourceQuery(queryString.toString()); + } + + /** + * gets all direct classes of all instances and has a look, what the most + * common is + * + * @param clazz + * @param posEx + * @return + * @throws Exception + */ + public String selectClass(String clazz, Set<String> posEx) throws Exception { + Map<String, Integer> m = new HashMap<String, Integer>(); + // TODO: use aksw-commons-sparql instead of sparql-scala + /* + * for (String pos : posEx) { SparqlTemplate st = + * SparqlTemplate.getInstance("directClassesOfInstance.vm"); + * st.setLimit(0); st.addFilter(sparqlEndpoint.like("direct", new + * HashSet<String>(Arrays.asList(new + * String[]{"http://dbpedia.org/ontology/"})))); VelocityContext vc = + * st.getVelocityContext(); vc.put("instance", pos); String query = + * st.getQuery(); Set<String> classes = new + * HashSet<String>(ResultSetRenderer + * .asStringSet(sparqlEndpoint.executeSelect(query))); + * classes.remove(clazz); for (String s : classes) { if (m.get(s) == + * null) { m.put(s, 0); } m.put(s, m.get(s).intValue() + 1); } } + */ + + int max = 0; + String maxClass = ""; + for (String key : m.keySet()) { + if (m.get(key).intValue() > max) { + maxClass = key; + } + } + + return maxClass; + } + + /** + * gets instances of a class or random instances + * + * @param clazz + * @param posEx + * @return + * @throws Exception + */ + + public Set<String> getNegEx(String clazz, Set<String> posEx) throws Exception { + Set<String> negEx = new HashSet<String>(); + // TODO: use aksw-commons-sparql instead of sparql-scala + /* + * String targetClass = getParallelClass(clazz); + * logger.info("using class for negatives: " + targetClass); if + * (targetClass != null) { + * + * SparqlTemplate st = + * SparqlTemplate.getInstance("instancesOfClass.vm"); st.setLimit(0); + * VelocityContext vc = st.getVelocityContext(); vc.put("class", + * targetClass); // st.addFilter(sparqlEndpoint.like("class", new + * HashSet<String>(Arrays.asList(new + * String[]{"http://dbpedia.org/ontology/"})))); String query = + * st.getQuery(); // negEx.addAll(new + * HashSet<String>(ResultSetRenderer.asStringSet + * (sparqlEndpoint.executeSelect(query)))); } else { + * + * SparqlTemplate st = SparqlTemplate.getInstance("someInstances.vm"); + * st.setLimit(posEx.size() + 100); VelocityContext vc = + * st.getVelocityContext(); String query = st.getQuery(); // + * negEx.addAll(new + * HashSet<String>(ResultSetRenderer.asStringSet(sparqlEndpoint + * .executeSelect(query)))); } negEx.removeAll(posEx); + */ + + String targetClass = getParallelClass(clazz); + logger.info("using class for negatives: " + targetClass); + if (targetClass != null) { + SparqlTemplate st = SparqlTemplate.getInstance("instancesOfClass2.vm"); + st.setLimit(0); + VelocityContext vc = st.getVelocityContext(); + vc.put("class", targetClass); + st.addFilter("FILTER ( ?class LIKE (<http://dbpedia.org/ontology/%>"); + + String query = st.getQuery(); + negEx.addAll(this.executeResourceQuery(query)); + } else { + SparqlTemplate st = SparqlTemplate.getInstance("someInstances.vm"); + st.setLimit(posEx.size() + 100); + VelocityContext vc = st.getVelocityContext(); + String query = st.getQuery(); + negEx.addAll(this.executeResourceQuery(query)); + } + negEx.removeAll(posEx); + return negEx; + + } + + public String getParallelClass(String clazz) throws Exception { + // TODO: use aksw-commons-sparql instead of sparql-scala + // SparqlTemplate st = SparqlTemplate.getInstance("parallelClass.vm"); + // st.setLimit(0); + // VelocityContext vc = st.getVelocityContext(); + // vc.put("class", clazz); + // String query = st.getQuery(); + // Set<String> parClasses = new + // HashSet<String>(ResultSetRenderer.asStringSet(sparqlEndpoint.executeSelect(query))); + // for (String s : parClasses) { + // return s; + // } + SparqlTemplate st = SparqlTemplate.getInstance("parallelClass.vm"); + st.setLimit(0); + VelocityContext vc = st.getVelocityContext(); + vc.put("class", clazz); + String query = st.getQuery(); + Set<String> parClasses = this.executeClassQuery(query); + for (String s : parClasses) { + if (s.startsWith("http://dbpedia.org/ontology")) { + if (!s.endsWith("Unknown")) { + return s; + } + } + } + return null; + } + + public Set<String> executeResourceQuery(String queryString) { + // Query query = QueryFactory.create(queryString); + // QueryExecution qexec = + // QueryExecutionFactory.sparqlService(endpointurl, + // query); + // ResultSet resultSet = qexec.execSelect(); + ResultSetRewindable resultSet = SparqlQuery.convertJSONtoResultSet(cache + .executeSparqlQuery(new SparqlQuery(queryString, sparqlEndpoint))); + QuerySolution solution; + Set<String> results = new HashSet<String>(); + while (resultSet.hasNext()) { + solution = resultSet.next(); + results.add(solution.getResource("instances").getURI()); + } + return results; + } + + public Set<String> executeClassQuery(String queryString) { + // Query query = QueryFactory.create(queryString); + // QueryExecution qexec = + // QueryExecutionFactory.sparqlService(endpointurl, + // query); + // ResultSet resultSet = qexec.execSelect(); + ResultSetRewindable resultSet = SparqlQuery.convertJSONtoResultSet(cache + .executeSparqlQuery(new SparqlQuery(queryString, sparqlEndpoint))); + QuerySolution solution; + Set<String> results = new HashSet<String>(); + while (resultSet.hasNext()) { + solution = resultSet.next(); + results.add(solution.getResource("sub").getURI()); + } + return results; + } + + private void dropCache() { + try { + Class.forName("org.h2.Driver"); + String databaseName = "extraction"; + String databaseDirectory = "cache"; + Connection conn = DriverManager.getConnection("jdbc:h2:" + databaseDirectory + "/" + + databaseName, "sa", ""); + Statement st = conn.createStatement(); + st.execute("DELETE FROM QUERY_CACHE"); + st.close(); + conn.close(); + System.gc(); + } catch (ClassNotFoundException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (SQLException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + } \ No newline at end of file Modified: trunk/scripts/src/main/java/org/dllearner/scripts/improveWikipedia/NewSparqlCompDBpediaClassLearnerCELOE.java =================================================================== --- trunk/scripts/src/main/java/org/dllearner/scripts/improveWikipedia/NewSparqlCompDBpediaClassLearnerCELOE.java 2013-03-04 15:00:36 UTC (rev 3910) +++ trunk/scripts/src/main/java/org/dllearner/scripts/improveWikipedia/NewSparqlCompDBpediaClassLearnerCELOE.java 2013-03-12 09:29:04 UTC (rev 3911) @@ -90,6 +90,8 @@ private static org.apache.log4j.Logger logger = org.apache.log4j.Logger .getLogger(NewSparqlCompDBpediaClassLearnerCELOE.class); + private static String output; + private static String input; SparqlEndpoint sparqlEndpoint = null; @@ -107,6 +109,19 @@ public static void main(String args[]) throws LearningProblemUnsupportedException, IOException, Exception { + if (args.length < 3) { + usage(); + return; + } + int iter; + try { + output = args[1]; + input = args[0]; + iter = Integer.parseInt(args[2]); + } catch (Exception e) { + usage(); + return; + } for (int i = 0; i < 4; i++) { NewSparqlCompDBpediaClassLearnerCELOE dcl = new NewSparqlCompDBpediaClassLearnerCELOE(); Set<String> classesToLearn = dcl.getClasses(); @@ -114,7 +129,7 @@ Monitor mon = MonitorFactory.start("Learn DBpedia"); KB kb = dcl.learnAllClasses(classesToLearn); mon.stop(); - kb.export(new File("/home/dcherix/dllearner/simple/result" + i + kb.export(new File(output+"/result" + i + ".owl"), OntologyFormat.RDF_XML); // Set<String> pos = // dcl.getPosEx("http://dbpedia.org/ontology/Person"); @@ -125,6 +140,17 @@ .getStringForAllSortedByLabel()); } } + + /** + * Show the required parameters for usage + */ + private static void usage() { + System.out.println("***************************************************************"); + System.out.println("* Usage: java DBpediaClassLearnerCELOE input output iteration *"); + System.out.println("* As input is the dbpedia schema as owl necessary *"); + System.out.println("* As output is a directory for the owl results file expected *"); + System.out.println("***************************************************************"); + } public KB learnAllClasses(Set<String> classesToLearn) { KB kb = new KB(); @@ -141,7 +167,7 @@ kb.addAxiom(new EquivalentClassesAxiom(new NamedClass( classToLearn), d)); kb.export(new File( - "/home/dcherix/dllearner/simple/result_partial.owl"), + output+"/result_partial.owl"), OntologyFormat.RDF_XML); } catch (Exception e) { @@ -184,7 +210,7 @@ // ks.setUseCacheDatabase(true); ks.setRecursionDepth(1); ArrayList<String> ontologyUrls = new ArrayList<String>(); - ontologyUrls.add("http://downloads.dbpedia.org/3.6/dbpedia_3.6.owl"); + ontologyUrls.add(new File(input).toURI().toURL().toString()); ks.setOntologySchemaUrls(ontologyUrls); ks.setAboxfilter("FILTER (!regex(str(?p), '^http://dbpedia.org/property/wikiPageUsesTemplate') && " + "!regex(str(?p), '^http://dbpedia.org/ontology/wikiPageExternalLink') && " @@ -247,7 +273,7 @@ public Set<String> getClasses() throws Exception { OntModel model = ModelFactory.createOntologyModel(); model.read(new FileInputStream( - "/home/dcherix/Downloads/dbpedia_3.6.owl"), null); + input), null); Set<OntClass> classes = model.listClasses().toSet(); Set<String> results = new HashSet<String>(); int i = 0; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |