From: <ku...@us...> - 2008-05-19 10:20:26
|
Revision: 897 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=897&view=rev Author: kurzum Date: 2008-05-19 03:20:14 -0700 (Mon, 19 May 2008) Log Message: ----------- new extraction structure (will be modified a little bit more later on) Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/cli/Start.java trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlKnowledgeSource.java trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlQuery.java trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlQueryMaker.java trunk/src/dl-learner/org/dllearner/scripts/SKOS7030.java trunk/src/dl-learner/org/dllearner/scripts/SPARQLExtractionEvaluation.java trunk/src/dl-learner/org/dllearner/scripts/SPARQLMassLearning.java trunk/src/dl-learner/org/dllearner/test/JenaQueryToResultSpeedTest.java trunk/src/dl-learner/org/dllearner/test/SparqlEndpointTest.java trunk/src/dl-learner/org/dllearner/test/SparqlExtractionTest.java trunk/src/dl-learner/org/dllearner/utilities/AutomaticExampleFinderRolesSPARQL.java trunk/src/dl-learner/org/dllearner/utilities/AutomaticExampleFinderSKOSSPARQL.java trunk/src/dl-learner/org/dllearner/utilities/AutomaticExampleFinderSPARQL.java Added Paths: ----------- trunk/src/dl-learner/org/dllearner/kb/extraction/ trunk/src/dl-learner/org/dllearner/kb/extraction/ClassNode.java trunk/src/dl-learner/org/dllearner/kb/extraction/Configuration.java trunk/src/dl-learner/org/dllearner/kb/extraction/ExtractionAlgorithm.java trunk/src/dl-learner/org/dllearner/kb/extraction/InstanceNode.java trunk/src/dl-learner/org/dllearner/kb/extraction/Manager.java trunk/src/dl-learner/org/dllearner/kb/extraction/Manipulator.java trunk/src/dl-learner/org/dllearner/kb/extraction/Node.java trunk/src/dl-learner/org/dllearner/kb/extraction/PropertyNode.java trunk/src/dl-learner/org/dllearner/kb/extraction/TypedSparqlQuery.java trunk/src/dl-learner/org/dllearner/kb/extraction/TypedSparqlQueryClasses.java trunk/src/dl-learner/org/dllearner/kb/extraction/TypedSparqlQueryInterface.java trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlEndpoint.java trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlQueryType.java Removed Paths: ------------- trunk/src/dl-learner/org/dllearner/kb/sparql/ExtractionAlgorithm.java trunk/src/dl-learner/org/dllearner/kb/sparql/Manager.java trunk/src/dl-learner/org/dllearner/kb/sparql/Manipulator.java trunk/src/dl-learner/org/dllearner/kb/sparql/TypedSparqlQuery.java trunk/src/dl-learner/org/dllearner/kb/sparql/TypedSparqlQueryClasses.java trunk/src/dl-learner/org/dllearner/kb/sparql/TypedSparqlQueryInterface.java trunk/src/dl-learner/org/dllearner/kb/sparql/configuration/ trunk/src/dl-learner/org/dllearner/kb/sparql/datastructure/ Modified: trunk/src/dl-learner/org/dllearner/cli/Start.java =================================================================== --- trunk/src/dl-learner/org/dllearner/cli/Start.java 2008-05-19 10:14:48 UTC (rev 896) +++ trunk/src/dl-learner/org/dllearner/cli/Start.java 2008-05-19 10:20:14 UTC (rev 897) @@ -70,8 +70,8 @@ import org.dllearner.core.owl.ObjectProperty; import org.dllearner.kb.KBFile; import org.dllearner.kb.OWLFile; +import org.dllearner.kb.extraction.TypedSparqlQuery; import org.dllearner.kb.sparql.SparqlKnowledgeSource; -import org.dllearner.kb.sparql.TypedSparqlQuery; import org.dllearner.learningproblems.PosNegDefinitionLP; import org.dllearner.learningproblems.PosNegInclusionLP; import org.dllearner.learningproblems.PosOnlyDefinitionLP; Copied: trunk/src/dl-learner/org/dllearner/kb/extraction/ClassNode.java (from rev 895, trunk/src/dl-learner/org/dllearner/kb/sparql/datastructure/ClassNode.java) =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/extraction/ClassNode.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/kb/extraction/ClassNode.java 2008-05-19 10:20:14 UTC (rev 897) @@ -0,0 +1,115 @@ +/** + * Copyright (C) 2007, Sebastian Hellmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ +package org.dllearner.kb.extraction; + +import java.net.URI; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Set; +import java.util.Vector; + +import org.dllearner.utilities.StringTuple; + +/** + * Is a node in the graph, that is a class. + * + * @author Sebastian Hellmann + */ +public class ClassNode extends Node { + Set<PropertyNode> properties = new HashSet<PropertyNode>(); + + public ClassNode(URI u) { + super(u); + } + + // expands all directly connected nodes + @Override + public Vector<Node> expand(TypedSparqlQueryInterface tsq, Manipulator m) { + + Set<StringTuple> s = tsq.getTupelForResource(this.uri); + // see manipulator + s = m.check(s, this); + Vector<Node> Nodes = new Vector<Node>(); + Iterator<StringTuple> it = s.iterator(); + while (it.hasNext()) { + StringTuple t = (StringTuple) it.next(); + try { + // substitute rdf:type with owl:subclassof + if (t.a.equals(m.type) || t.a.equals(m.subclass)) { + ClassNode tmp = new ClassNode(new URI(t.b)); + properties.add(new PropertyNode(new URI(m.subclass), this, + tmp)); + Nodes.add(tmp); + } else { + // further expansion stops here + // Nodes.add(tmp); is missing on purpose + ClassNode tmp = new ClassNode(new URI(t.b)); + properties.add(new PropertyNode(new URI(t.a), this, tmp)); + // System.out.println(m.blankNodeIdentifier); + // System.out.println("XXXXX"+t.b); + + // if o is a blank node expand further + // TODO this needs a lot more work + if (t.b.startsWith(m.blankNodeIdentifier)) { + tmp.expand(tsq, m); + System.out.println(m.blankNodeIdentifier); + System.out.println("XXXXX" + t.b); + } + // Nodes.add(tmp); + } + } catch (Exception e) { + System.out.println(t); + e.printStackTrace(); + } + + } + return Nodes; + } + + // gets the types for properties recursively + @Override + public void expandProperties(TypedSparqlQueryInterface tsq, Manipulator m) { + } + + /* + * (non-Javadoc) + * + * @see org.dllearner.kb.sparql.datastructure.Node#toNTriple() + */ + @Override + public Set<String> toNTriple() { + Set<String> s = new HashSet<String>(); + s.add("<" + this.uri + "><" + rdftype + "><" + classns + ">."); + + for (PropertyNode one : properties) { + s.add("<" + this.uri + "><" + one.getURI() + "><" + + one.getB().getURI() + ">."); + s.addAll(one.getB().toNTriple()); + } + + return s; + } + + @Override + public int compareTo(Node n) { + return super.compareTo(n); + } + +} Copied: trunk/src/dl-learner/org/dllearner/kb/extraction/Configuration.java (from rev 895, trunk/src/dl-learner/org/dllearner/kb/sparql/configuration/Configuration.java) =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/extraction/Configuration.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/kb/extraction/Configuration.java 2008-05-19 10:20:14 UTC (rev 897) @@ -0,0 +1,99 @@ +/** + * Copyright (C) 2007, Sebastian Hellmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ +package org.dllearner.kb.extraction; + +import org.dllearner.kb.sparql.SparqlEndpoint; +import org.dllearner.kb.sparql.SparqlQueryType; + +/** + * Stores all configuration settings. this class collects all configuration + * information see the other classes, which are used as attributes here + * + * @author Sebastian Hellmann + */ +public class Configuration { + + private SparqlEndpoint endpoint; + private SparqlQueryType sparqlQueryType; + private Manipulator manipulator; + // the following needs to be moved to + // class extraction algorithm or manipulator + private int recursiondepth; + private boolean getAllSuperClasses = true; + private boolean closeAfterRecursion = true; + public int numberOfUncachedSparqlQueries = 0; + public int numberOfCachedSparqlQueries = 0; + public String cacheDir="cache"; + + public Configuration(SparqlEndpoint specificSparqlEndpoint, + SparqlQueryType sparqlQueryType, Manipulator manipulator, + int recursiondepth, boolean getAllSuperClasses, + boolean closeAfterRecursion, String cacheDir) { + this.endpoint = specificSparqlEndpoint; + this.sparqlQueryType = sparqlQueryType; + this.manipulator = manipulator; + this.recursiondepth = recursiondepth; + this.getAllSuperClasses = getAllSuperClasses; + this.closeAfterRecursion = closeAfterRecursion; + this.cacheDir=cacheDir; + + } + + public Configuration changeQueryType(SparqlQueryType sqt) { + // TODO must clone here + return new Configuration(this.endpoint, sqt, this.manipulator, + this.recursiondepth, this.getAllSuperClasses, + this.closeAfterRecursion, this.cacheDir); + + } + + public Manipulator getManipulator() { + return this.manipulator; + } + + public SparqlEndpoint getSparqlEndpoint() { + return endpoint; + } + + public SparqlQueryType getSparqlQueryType() { + return sparqlQueryType; + } + + public boolean isGetAllSuperClasses() { + return getAllSuperClasses; + } + + public boolean isCloseAfterRecursion() { + return closeAfterRecursion; + } + + public int getRecursiondepth() { + return recursiondepth; + } + + public void increaseNumberOfuncachedSparqlQueries() { + numberOfUncachedSparqlQueries++; + } + + public void increaseNumberOfCachedSparqlQueries() { + numberOfCachedSparqlQueries++; + } + +} Copied: trunk/src/dl-learner/org/dllearner/kb/extraction/ExtractionAlgorithm.java (from rev 895, trunk/src/dl-learner/org/dllearner/kb/sparql/ExtractionAlgorithm.java) =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/extraction/ExtractionAlgorithm.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/kb/extraction/ExtractionAlgorithm.java 2008-05-19 10:20:14 UTC (rev 897) @@ -0,0 +1,177 @@ +/** + * Copyright (C) 2007, Sebastian Hellmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ +package org.dllearner.kb.extraction; + +import java.net.URI; +import java.util.HashSet; +import java.util.Vector; + +import org.apache.log4j.Logger; +import org.dllearner.core.KnowledgeSource; + +/** + * This class is used to extract the information . + * + * @author Sebastian Hellmann + */ +public class ExtractionAlgorithm { + + private Configuration configuration; + private Manipulator manipulator; + private int recursionDepth = 1; + // private boolean getAllSuperClasses = true; + // private boolean closeAfterRecursion = true; + private static Logger logger = Logger + .getLogger(KnowledgeSource.class); + + public ExtractionAlgorithm(Configuration Configuration) { + this.configuration = Configuration; + this.manipulator = Configuration.getManipulator(); + this.recursionDepth = Configuration.getRecursiondepth(); + // this.getAllSuperClasses = Configuration.isGetAllSuperClasses(); + // this.closeAfterRecursion=Configuration.isCloseAfterRecursion(); + } + + public Node getFirstNode(URI u) { + return new InstanceNode(u); + } + + public Vector<Node> expandAll(URI[] u, TypedSparqlQuery tsp) { + Vector<Node> v = new Vector<Node>(); + for (URI one : u) { + v.add(expandNode(one, tsp)); + } + return v; + } + + /** + * most important function expands one example cave: the recursion is not a + * recursion anymore, it was transformed to an iteration + * + * @param uri + * @param typedSparqlQuery + * @return + */ + public Node expandNode(URI uri, TypedSparqlQuery typedSparqlQuery) { + //System.out.println(uri.toString()); + //System.out.println(manipulator); + //System.out.println(this.configuration); + long time = System.currentTimeMillis(); + Node n = getFirstNode(uri); + System.out.println(n); + Vector<Node> v = new Vector<Node>(); + v.add(n); + logger.info("StartVector: " + v); + // n.expand(tsp, this.Manipulator); + // Vector<Node> second= + for (int x = 1; x <= recursionDepth; x++) { + + Vector<Node> tmp = new Vector<Node>(); + while (v.size() > 0) { + Node tmpNode = v.remove(0); + logger.info("Expanding " + tmpNode); + // System.out.println(this.Manipulator); + // these are the new not expanded nodes + // the others are saved in connection with the original node + Vector<Node> tmpVec = tmpNode.expand(typedSparqlQuery, + manipulator); + //System.out.println(tmpVec); + tmp.addAll(tmpVec); + } + v = tmp; + logger.info("Recursion counter: " + x + " with " + v.size() + + " Nodes remaining, needed: " + + (System.currentTimeMillis() - time) + "ms"); + time = System.currentTimeMillis(); + } + + HashSet<String> hadAlready = new HashSet<String>(); + + //p(configuration.toString()); + // gets All Class Nodes and expands them further + if (this.configuration.isGetAllSuperClasses()) { + logger.info("Get all superclasses"); + // Set<Node> classes = new TreeSet<Node>(); + Vector<Node> classes = new Vector<Node>(); + + Vector<Node> instances = new Vector<Node>(); + for (Node one : v) { + if (one instanceof ClassNode) { + classes.add(one); + } + if (one instanceof InstanceNode) { + instances.add(one); + } + + } + // System.out.println(instances.size()); + TypedSparqlQueryClasses tsqc = new TypedSparqlQueryClasses( + configuration); + if (this.configuration.isCloseAfterRecursion()) { + while (instances.size() > 0) { + logger.trace("Getting classes for remaining instances: " + + instances.size()); + Node next = instances.remove(0); + logger.trace("Getting classes for: " + next); + classes.addAll(next.expand(tsqc, manipulator)); + if (classes.size() >= manipulator.breakSuperClassRetrievalAfter) { + break; + } + } + } + Vector<Node> tmp = new Vector<Node>(); + int i = 0; + while (classes.size() > 0) { + logger.trace("Remaining classes: " + classes.size()); + // Iterator<Node> it=classes.iterator(); + // Node next =(Node) it.next(); + // classes.remove(next); + Node next = classes.remove(0); + + if (!hadAlready.contains(next.getURI().toString())) { + logger.trace("Getting SuperClass for: " + next); + // System.out.println(hadAlready.size()); + hadAlready.add(next.getURI().toString()); + tmp = next.expand(typedSparqlQuery, manipulator); + classes.addAll(tmp); + tmp = new Vector<Node>(); + // if(i % 50==0)System.out.println("got "+i+" extra classes, + // max: "+manipulator.breakSuperClassRetrievalAfter); + i++; + if (i >= manipulator.breakSuperClassRetrievalAfter) { + break; + } + } + // System.out.println("Skipping"); + + // if + // (classes.size()>=manipulator.breakSuperClassRetrievalAfter){break;} + + } + // System.out.println((System.currentTimeMillis()-time)+""); + + } + return n; + + } + + + +} Copied: trunk/src/dl-learner/org/dllearner/kb/extraction/InstanceNode.java (from rev 895, trunk/src/dl-learner/org/dllearner/kb/sparql/datastructure/InstanceNode.java) =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/extraction/InstanceNode.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/kb/extraction/InstanceNode.java 2008-05-19 10:20:14 UTC (rev 897) @@ -0,0 +1,120 @@ +/** + * Copyright (C) 2007, Sebastian Hellmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ +package org.dllearner.kb.extraction; + +import java.net.URI; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Set; +import java.util.Vector; + +import org.dllearner.utilities.StringTuple; + +/** + * A node in the graph that is an instance. + * + * @author Sebastian Hellmann + * + */ +public class InstanceNode extends Node { + + Set<ClassNode> classes = new HashSet<ClassNode>(); + Set<StringTuple> datatypes = new HashSet<StringTuple>(); + Set<PropertyNode> properties = new HashSet<PropertyNode>(); + + public InstanceNode(URI u) { + super(u); + // this.type = "instance"; + + } + + // expands all directly connected nodes + @Override + public Vector<Node> expand(TypedSparqlQueryInterface tsq, Manipulator m) { + + Set<StringTuple> s = tsq.getTupelForResource(uri); + // see Manipulator + m.check(s, this); + // System.out.println("fffffff"+m); + Vector<Node> Nodes = new Vector<Node>(); + + Iterator<StringTuple> it = s.iterator(); + while (it.hasNext()) { + StringTuple t = (StringTuple) it.next(); + //RBC + if(!t.b.startsWith("http:"))continue; + + // basically : if p is rdf:type then o is a class + // else it is an instance + try { + if (t.a.equals(m.type)) { + ClassNode tmp = new ClassNode(new URI(t.b)); + classes.add(tmp); + Nodes.add(tmp); + } else { + InstanceNode tmp = new InstanceNode(new URI(t.b)); + properties.add(new PropertyNode(new URI(t.a), this, tmp)); + Nodes.add(tmp); + + } + } catch (Exception e) { + System.out.println("Problem with: " + t); + e.printStackTrace(); + } + + } + expanded = true; + return Nodes; + } + + // gets the types for properties recursively + @Override + public void expandProperties(TypedSparqlQueryInterface tsq, Manipulator m) { + for (PropertyNode one : properties) { + one.expandProperties(tsq, m); + } + + } + + @Override + public Set<String> toNTriple() { + Set<String> s = new HashSet<String>(); + s.add("<" + uri + "><" + rdftype + "><" + thing + ">."); + for (ClassNode one : classes) { + s.add("<" + uri + "><" + rdftype + "><" + one.getURI() + ">."); + s.addAll(one.toNTriple()); + } + for (PropertyNode one : properties) { + s.add("<" + uri + "><" + one.getURI() + "><" + one.getB().getURI() + + ">."); + s.addAll(one.toNTriple()); + s.addAll(one.getB().toNTriple()); + } + + return s; + } + + @Override + public int compareTo(Node n) { + return super.compareTo(n); + // + } + +} Copied: trunk/src/dl-learner/org/dllearner/kb/extraction/Manager.java (from rev 895, trunk/src/dl-learner/org/dllearner/kb/sparql/Manager.java) =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/extraction/Manager.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/kb/extraction/Manager.java 2008-05-19 10:20:14 UTC (rev 897) @@ -0,0 +1,155 @@ +/** + * Copyright (C) 2007, Sebastian Hellmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ +package org.dllearner.kb.extraction; + +import java.net.URI; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; + +import org.apache.log4j.Logger; +import org.dllearner.core.KnowledgeSource; +import org.dllearner.kb.sparql.SparqlEndpoint; +import org.dllearner.kb.sparql.SparqlQueryType; +import org.dllearner.utilities.Statistics; + +/** + * An object of this class encapsulates everything. + * + * @author Sebastian Hellmann + * + */ +public class Manager { + + private Configuration configuration; + private TypedSparqlQuery typedSparqlQuery; + private ExtractionAlgorithm extractionAlgorithm; + + private static Logger logger = Logger + .getLogger(KnowledgeSource.class); + + + public void useConfiguration(SparqlQueryType SparqlQueryType, + SparqlEndpoint SparqlEndpoint, Manipulator manipulator, + int recursiondepth, boolean getAllSuperClasses, + boolean closeAfterRecursion, String cacheDir) { + + this.configuration = new Configuration(SparqlEndpoint, SparqlQueryType, + manipulator, recursiondepth, getAllSuperClasses, + closeAfterRecursion, cacheDir); + //System.out.println(this.configuration); + this.typedSparqlQuery = new TypedSparqlQuery(configuration); + this.extractionAlgorithm = new ExtractionAlgorithm(configuration); + + } + + public String extract(URI uri) { + // this.TypedSparqlQuery.query(uri); + // System.out.println(ExtractionAlgorithm.getFirstNode(uri)); + System.out.println("Start extracting"); + + Node n = extractionAlgorithm.expandNode(uri, typedSparqlQuery); + Set<String> s = n.toNTriple(); + String nt = ""; + for (String str : s) { + nt += str + "\n"; + } + return nt; + } + + public String extract(Set<String> instances) { + // this.TypedSparqlQuery.query(uri); + // System.out.println(ExtractionAlgorithm.getFirstNode(uri)); + System.out.println("Start extracting"); + SortedSet<String> ret = new TreeSet<String>(); + int progress=0; + for (String one : instances) { + progress++; + logger.info("Progress: "+progress+" of "+instances.size()+" finished"); + try { + Node n = extractionAlgorithm.expandNode(new URI(one), + typedSparqlQuery); + ret.addAll(n.toNTriple()); + } catch (Exception e) { + e.printStackTrace(); + } + } + System.out.println("Finished extracting, start conversion"); + StringBuffer nt = new StringBuffer(); + Object[] arr = ret.toArray(); + for (int i = 0; i < arr.length; i++) { + nt.append((String) arr[i] + "\n"); + if (i % 1000 == 0) + System.out.println(i + " of " + arr.length + " triples done"); + } + System.out.println(arr.length + " of " + arr.length + " triples done"); + /* + * String tmp=""; while ( ret.size() > 0) { tmp=ret.first(); nt+=tmp; + * ret.remove(tmp); System.out.println(ret.size()); } /*for (String str : + * ret) { nt += str + "\n"; } + */ + Statistics.addTriples(ret.size()); + return nt.toString(); + } + + public void addPredicateFilter(String str) { + this.configuration.getSparqlQueryType().addPredicateFilter(str); + + } + + public Configuration getConfiguration() { + return configuration; + } + + /* + * public void calculateSubjects(String label, int limit) { + * System.out.println("SparqlModul: Collecting Subjects"); + * oldSparqlOntologyCollector oc = new oldSparqlOntologyCollector(url); try { + * subjects = oc.getSubjectsFromLabel(label, limit); } catch (IOException e) { + * subjects = new String[1]; subjects[0] = "[Error]Sparql Endpoint could not + * be reached."; } System.out.println("SparqlModul: ****Finished"); } + * + * /** TODO SparqlOntologyCollector needs to be removed @param subject + */ + /* + * public void calculateTriples(String subject) { + * System.out.println("SparqlModul: Collecting Triples"); + * oldSparqlOntologyCollector oc = new oldSparqlOntologyCollector(url); try { + * triples = oc.collectTriples(subject); } catch (IOException e) { triples = + * new String[1]; triples[0] = "[Error]Sparql Endpoint could not be + * reached."; } System.out.println("SparqlModul: ****Finished"); } + */ + /** + * TODO SparqlOntologyCollector needs to be removed + * + * @param concept + */ + + /* + * public void calculateConceptSubjects(String concept) { + * System.out.println("SparqlModul: Collecting Subjects"); + * oldSparqlOntologyCollector oc = new oldSparqlOntologyCollector(url); try { + * conceptSubjects = oc.getSubjectsFromConcept(concept); } catch + * (IOException e) { conceptSubjects = new String[1]; conceptSubjects[0] = + * "[Error]Sparql Endpoint could not be reached."; } + * System.out.println("SparqlModul: ****Finished"); } + */ + +} \ No newline at end of file Copied: trunk/src/dl-learner/org/dllearner/kb/extraction/Manipulator.java (from rev 895, trunk/src/dl-learner/org/dllearner/kb/sparql/Manipulator.java) =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/extraction/Manipulator.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/kb/extraction/Manipulator.java 2008-05-19 10:20:14 UTC (rev 897) @@ -0,0 +1,163 @@ +/** + * Copyright (C) 2007, Sebastian Hellmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ +package org.dllearner.kb.extraction; + +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.Set; + +import org.dllearner.kb.sparql.SparqlEndpoint; +import org.dllearner.kb.sparql.SparqlQuery; +import org.dllearner.utilities.JenaResultSetConvenience; +import org.dllearner.utilities.StringTuple; + +import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.query.ResultSetFormatter; + +/** + * Used to manipulate retrieved tupels, identify blanknodes, etc. + * + * @author Sebastian Hellmann + * + */ +public class Manipulator { + public final String subclass = "http://www.w3.org/2000/01/rdf-schema#subClassOf"; + public final String type = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"; + final String objectProperty = "http://www.w3.org/2002/07/owl#ObjectProperty"; + final String classns = "http://www.w3.org/2002/07/owl#Class"; + final String thing = "http://www.w3.org/2002/07/owl#Thing"; + + public String blankNodeIdentifier = "bnode"; + public int breakSuperClassRetrievalAfter = 200; + public LinkedList<StringTuple> replacePredicate; + public LinkedList<StringTuple> replaceObject; + + // Set<String> classproperties; + + public Manipulator(String blankNodeIdentifier, + int breakSuperClassRetrievalAfter, + LinkedList<StringTuple> replacePredicate, + LinkedList<StringTuple> replaceObject) { + this.blankNodeIdentifier = blankNodeIdentifier; + this.replaceObject = replaceObject; + this.replacePredicate = replacePredicate; + this.breakSuperClassRetrievalAfter = breakSuperClassRetrievalAfter; + // Set<String> classproperties = new HashSet<String>(); + // classproperties.add(subclass); + + } + + /** + * this checks for consistency and manipulates the tuples, before they get + * triple + * + * @param tuples + * tuples for the node + * @param node + * @return + */ + public Set<StringTuple> check(Set<StringTuple> tuples, Node node) { + Set<StringTuple> toRemove = new HashSet<StringTuple>(); + Iterator<StringTuple> it = tuples.iterator(); + while (it.hasNext()) { + StringTuple t = (StringTuple) it.next(); + + //HACK +// if(t.a.equals("http://www.holygoat.co.uk/owl/redwood/0.1/tags/taggedWithTag")) { +// //hackGetLabel(t.b); +// +// } + + // GovTrack hack + // => we convert a string literal to a URI + // => TODO: introduce an option for converting literals for certain + // properties into URIs +// String sp = "http://purl.org/dc/elements/1.1/subject"; +// if(t.a.equals(sp)) { +// System.out.println(t); +// System.exit(0); +// } + + replacePredicate(t); + replaceObject(t); + + + // remove <rdf:type, owl:class> + // this is done to avoid transformation to owl:subclassof + if (t.a.equals(type) && t.b.equals(classns) + && node instanceof ClassNode) { + toRemove.add(t); + } + + // all with type class + if (t.b.equals(classns) && node instanceof ClassNode) { + toRemove.add(t); + } + + // remove all instances with owl:type thing + if (t.a.equals(type) && t.b.equals(thing) + && node instanceof InstanceNode) { + toRemove.add(t); + } + + } + tuples.removeAll(toRemove); + + return tuples; + } + + private void replacePredicate(StringTuple t) { + for (StringTuple rep : replacePredicate) { + if (rep.a.equals(t.a)) { + t.a = rep.b; + } + } + } + + private void replaceObject(StringTuple t) { + for (StringTuple rep : replaceObject) { + if (rep.a.equals(t.a)) { + t.a = rep.b; + } + } + } + + //HACK + private String hackGetLabel(String resname){ + String query="" + + "SELECT ?o \n" + + "WHERE { \n" + + "<"+resname+"> "+ " <http://www.holygoat.co.uk/owl/redwood/0.1/tags/tagName> ?o " + + "}"; + + System.out.println(query); + //http://dbtune.org/musicbrainz/sparql?query= + //SELECT ?o WHERE { <http://dbtune.org/musicbrainz/resource/tag/1391> <http://www.holygoat.co.uk/owl/redwood/0.1/tags/tagName> ?o } + SparqlQuery s=new SparqlQuery(query,SparqlEndpoint.EndpointMusicbrainz()); + ResultSet rs=s.send(); + while (rs.hasNext()){ + rs.nextBinding(); + } + //System.out.println("AAA"+s.getAsXMLString(s.send()) ); + return ""; + } + +} Copied: trunk/src/dl-learner/org/dllearner/kb/extraction/Node.java (from rev 895, trunk/src/dl-learner/org/dllearner/kb/sparql/datastructure/Node.java) =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/extraction/Node.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/kb/extraction/Node.java 2008-05-19 10:20:14 UTC (rev 897) @@ -0,0 +1,98 @@ +/** + * Copyright (C) 2007, Sebastian Hellmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ +package org.dllearner.kb.extraction; + +import java.net.URI; +import java.util.Set; +import java.util.Vector; + + +/** + * Abstract class. defines functions to expand the nodes + * + * @author Sebastian Hellmann + * + */ +public abstract class Node implements Comparable<Node> { + + final String subclass = "http://www.w3.org/2000/01/rdf-schema#subClassOf"; + final String rdftype = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"; + final String objectProperty = "http://www.w3.org/2002/07/owl#ObjectProperty"; + final String classns = "http://www.w3.org/2002/07/owl#Class"; + final String thing = "http://www.w3.org/2002/07/owl#Thing"; + + URI uri; + // protected String type; + protected boolean expanded = false; + + public Node(URI u) { + this.uri = u; + } + + /** + * Nodes are expanded with a certain context, given by the typedSparqlQuery + * and the manipulator + * + * @param typedSparqlQuery + * @param manipulator + * @return Vector<Node> all Nodes that are new because of expansion + */ + public abstract Vector<Node> expand( + TypedSparqlQueryInterface typedSparqlQuery, Manipulator manipulator); + + /** + * gets type defs for properties like rdf:type SymmetricProperties + * + * @param typedSparqlQuery + * @param manipulator + * @return Vector<Node> + */ + public abstract void expandProperties( + TypedSparqlQueryInterface typedSparqlQuery, Manipulator manipulator); + + /** + * output + * + * @return a set of n-triple + */ + public abstract Set<String> toNTriple(); + + @Override + public String toString() { + return "Node: " + uri + ":" + this.getClass().getSimpleName(); + + } + + public URI getURI() { + return uri; + } + + public boolean equals(Node n) { + if (this.uri.equals(n.uri)) + return true; + else + return false; + } + + public int compareTo(Node n) { + return this.uri.toString().compareTo(n.uri.toString()); + } + +} Copied: trunk/src/dl-learner/org/dllearner/kb/extraction/PropertyNode.java (from rev 895, trunk/src/dl-learner/org/dllearner/kb/sparql/datastructure/PropertyNode.java) =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/extraction/PropertyNode.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/kb/extraction/PropertyNode.java 2008-05-19 10:20:14 UTC (rev 897) @@ -0,0 +1,119 @@ +/** + * Copyright (C) 2007, Sebastian Hellmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ +package org.dllearner.kb.extraction; + +import java.net.URI; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Set; +import java.util.Vector; + +import org.dllearner.utilities.StringTuple; + +/** + * Property node, has connection to a and b part + * + * @author Sebastian Hellmann + * + */ + +public class PropertyNode extends Node { + + // the a and b part of a property + private Node a; + private Node b; + // specialtypes like owl:symmetricproperty + private Set<String> specialTypes; + + public PropertyNode(URI u, Node a, Node b) { + super(u); + // this.type = "property"; + this.a = a; + this.b = b; + this.specialTypes = new HashSet<String>(); + } + + // Property Nodes are normally not expanded, + // this function is never called + @Override + public Vector<Node> expand(TypedSparqlQueryInterface tsq, Manipulator m) { + return null; + } + + // gets the types for properties recursively + @Override + public void expandProperties(TypedSparqlQueryInterface tsq, Manipulator m) { + b.expandProperties(tsq, m); + Set<StringTuple> s = tsq.getTupelForResource(uri); + + Iterator<StringTuple> it = s.iterator(); + while (it.hasNext()) { + StringTuple t = (StringTuple) it.next(); + try { + if (t.a.equals(m.type)) { + specialTypes.add(t.b); + } + } catch (Exception e) { + System.out.println(t); + e.printStackTrace(); + } + + } + + } + + public Node getA() { + return a; + } + + public Node getB() { + return b; + } + + @Override + public Set<String> toNTriple() { + Set<String> s = new HashSet<String>(); + s.add("<" + uri + "><" + + "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" + "><" + + "http://www.w3.org/2002/07/owl#ObjectProperty" + ">."); + for (String one : specialTypes) { + s.add("<" + uri + "><" + + "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" + "><" + + one + ">."); + + } + + return s; + } + + @Override + public boolean equals(Node n) { + if (this.uri.equals(n.uri)) + return true; + else + return false; + } + + @Override + public int compareTo(Node n) { + return super.compareTo(n); + } + +} Copied: trunk/src/dl-learner/org/dllearner/kb/extraction/TypedSparqlQuery.java (from rev 895, trunk/src/dl-learner/org/dllearner/kb/sparql/TypedSparqlQuery.java) =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/extraction/TypedSparqlQuery.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/kb/extraction/TypedSparqlQuery.java 2008-05-19 10:20:14 UTC (rev 897) @@ -0,0 +1,249 @@ +/** + * Copyright (C) 2007, Sebastian Hellmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ +package org.dllearner.kb.extraction; + +import java.net.URI; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.log4j.Logger; +import org.dllearner.core.KnowledgeSource; +import org.dllearner.kb.sparql.Cache; +import org.dllearner.kb.sparql.SparqlQuery; +import org.dllearner.kb.sparql.SparqlQueryMaker; +import org.dllearner.utilities.SimpleClock; +import org.dllearner.utilities.StringTuple; + +import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.query.ResultSetFormatter; +import com.hp.hpl.jena.sparql.core.ResultBinding; + +/** + * Can execute different queries. + * + * @author Sebastian Hellmann + * + */ +public class TypedSparqlQuery implements TypedSparqlQueryInterface { + + private static Logger logger = Logger.getLogger(KnowledgeSource.class); + + + boolean print_flag = false; + protected Configuration configuration; + private SparqlQueryMaker sparqlQueryMaker; + Cache cache; + + // boolean debug_no_cache = false;// true means no cache is used + // private SparqlHTTPRequest SparqlHTTPRequest; + // private SparqlQuery sparqlQuery; + // private CachedSparqlQuery cachedSparqlQuery; + + public TypedSparqlQuery(Configuration Configuration) { + this.configuration = Configuration; + this.sparqlQueryMaker = new SparqlQueryMaker(Configuration + .getSparqlQueryType()); + + this.cache = new Cache(configuration.cacheDir); + // this.sparqlQuery=new SparqlQuery(configuration.getSparqlEndpoint()); + // this.cachedSparqlQuery=new + // CachedSparqlQuery(this.sparqlQuery,this.cache); + } + + // standard query get a tupels (p,o) for subject s + /** + * uses a cache and gets the result tuples for a resource u + * + * @param uri + * the resource + * @param sparqlQueryString + * @param a + * the name of the first bound variable for xml parsing, normally + * predicate + * @param b + * the name of the second bound variable for xml parsing, + * normally object + * @return + */ + @SuppressWarnings({"unchecked"}) + public Set<StringTuple> getTupelForResource(URI uri) { + Set<StringTuple> s = new HashSet<StringTuple>(); + + String a = "predicate"; + String b = "object"; + // getQuery + String sparqlQueryString = sparqlQueryMaker + .makeSubjectQueryUsingFilters(uri.toString()); + +// CachedSparqlQuery csq = new CachedSparqlQuery(configuration +// .getSparqlEndpoint(), cache, uri.toString(), sparqlQueryString); + + SparqlQuery query = new SparqlQuery(sparqlQueryString, configuration.getSparqlEndpoint()); + query.extraDebugInfo=uri.toString(); + String JSON = cache.executeSparqlQuery(query); + + ResultSet rs = SparqlQuery.JSONtoResultSet(JSON); + + List<ResultBinding> l = ResultSetFormatter.toList(rs); + + p(l.toString()); + for (ResultBinding resultBinding : l) { + + s.add(new StringTuple(resultBinding.get(a).toString(), + resultBinding.get(b).toString())); + } + return s; + } + + /*@Deprecated + private Set<StringTuple> cachedSparql(URI uri, String sparqlQueryString, + String a, String b) { + return null; + /* + * OLD CODE FOLLOWING keep until Jena is working String FromCache = + * cache.get(u.toString(), sparqlQueryString); if (debug_no_cache) { + * //FromCache = null; } String xml = null; // if not in cache get it + * from EndPoint if (FromCache == null) { + * configuration.increaseNumberOfuncachedSparqlQueries(); // try { xml = + * sendAndReceiveSPARQL(sparqlQueryString); + * + * //} catch (IOException e) {e.printStackTrace();} + * + * p(sparqlQueryString); // System.out.println(xml); if + * (!debug_no_cache) { cache.put(uri.toString(), sparqlQueryString, + * xml); } // System.out.print("\n"); } else { + * configuration.increaseNumberOfCachedSparqlQueries(); xml = FromCache; // + * System.out.println("FROM CACHE"); } + */ + // System.out.println(sparql); + // System.out.println(xml); + // process XML + //} + + /** + * TODO old XML processing, can be removed, once Jena is done + * + * @param xml + * @param a + * @param b + * @return a Set of Tuples <a|b> + */ + @Deprecated + public Set<StringTuple> processResult(String xml, String a, String b) { + + Set<StringTuple> ret = new HashSet<StringTuple>(); + // TODO if result is empty, catch exceptions + String resEnd = "</result>"; + String one = "binding name=\"" + a + "\""; + String two = "binding name=\"" + b + "\""; + String endbinding = "binding"; + String uri = "uri"; + // String uridel = "<uri>"; + String bnode = "<bnode>"; + // String uriend = "</uri>"; + String predtmp = ""; + String objtmp = ""; + // System.out.println(getNextResult(xml)); + String nextResult = ""; + while ((nextResult = getNextResult(xml)) != null) { + // System.out.println(xml.indexOf(resEnd)); + // System.out.println(xml); + if (nextResult.indexOf(bnode) != -1) { + xml = xml.substring(xml.indexOf(resEnd) + resEnd.length()); + continue; + } + // get pred + // predtmp = nextResult.substring(nextResult.indexOf(one) + + // one.length()); + predtmp = getinTag(nextResult, one, endbinding); + predtmp = getinTag(predtmp, uri, uri); + // System.out.println(predtmp); + + // getobj + objtmp = getinTag(nextResult, two, endbinding); + objtmp = getinTag(objtmp, uri, uri); + // System.out.println(objtmp); + + StringTuple st = new StringTuple(predtmp, objtmp); + // System.out.println(st); + ret.add(st); + xml = xml.substring(xml.indexOf(resEnd) + resEnd.length()); + + } + /* + * while (xml.indexOf(one) != -1) { + * + * + * // System.out.println(new Tupel(predtmp,objtmp)); } + */ + + return ret; + + } + + /** + * TODO used by old XML processing, can be removed once Jena is done + * + * @param xml + * @return + */ + @Deprecated + private String getNextResult(String xml) { + String res1 = "<result>"; + String res2 = "</result>"; + if (xml.indexOf(res1) == -1) + return null; + xml = xml.substring(xml.indexOf(res1) + res1.length()); + xml = xml.substring(0, xml.indexOf(res2)); + // System.out.println(xml); + return xml; + } + + /** + * TODO used by old XML processing, can be removed once Jena is done + * + * @param xml + * @param starttag + * @param endtag + * @return + */ + @Deprecated + private String getinTag(String xml, String starttag, String endtag) { + String res1 = "<" + starttag + ">"; + // System.out.println(res1); + String res2 = "</" + endtag + ">"; + if (xml.indexOf(res1) == -1) + return null; + xml = xml.substring(xml.indexOf(res1) + res1.length()); + // System.out.println(xml); + xml = xml.substring(0, xml.indexOf(res2)); + // System.out.println(xml); + + return xml; + } + + public void p(String str) { + if (print_flag) { + System.out.println(str); + } + } + +} Copied: trunk/src/dl-learner/org/dllearner/kb/extraction/TypedSparqlQueryClasses.java (from rev 895, trunk/src/dl-learner/org/dllearner/kb/sparql/TypedSparqlQueryClasses.java) =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/extraction/TypedSparqlQueryClasses.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/kb/extraction/TypedSparqlQueryClasses.java 2008-05-19 10:20:14 UTC (rev 897) @@ -0,0 +1,79 @@ +/** + * Copyright (C) 2007, Sebastian Hellmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ +package org.dllearner.kb.extraction; + +import java.net.URI; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.dllearner.kb.sparql.SparqlQuery; +import org.dllearner.utilities.StringTuple; + +import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.query.ResultSetFormatter; +import com.hp.hpl.jena.sparql.core.ResultBinding; + +/** + * Can execute different queries. + * + * @author Sebastian Hellmann + * + */ +public class TypedSparqlQueryClasses extends TypedSparqlQuery implements + TypedSparqlQueryInterface { + + public TypedSparqlQueryClasses(Configuration configuration) { + super(configuration); + } + + /* + * Special TypedSparqlQuery which returns superclasses of classes + * (non-Javadoc) + * + * @see org.dllearner.kb.sparql.TypedSparqlQuery#getTupelForResource(java.net.URI) + */ + @Override + @SuppressWarnings({"unchecked"}) + public Set<StringTuple> getTupelForResource(URI uri) { + Set<StringTuple> s = new HashSet<StringTuple>(); + String a = "predicate"; + String b = "object"; + // getQuery for all super classes of classes only + String sparqlQueryString = "SELECT ?predicate ?object " + "WHERE {" + + "<" + uri.toString() + "> ?predicate ?object;" + + "a ?object . " + + " FILTER (!regex(str(?object),'http://xmlns.com/foaf/0.1/'))" + + "}"; + + SparqlQuery query = new SparqlQuery(sparqlQueryString, configuration.getSparqlEndpoint()); + query.extraDebugInfo=uri.toString(); + ResultSet rs = SparqlQuery.JSONtoResultSet(cache.executeSparqlQuery(query)); + + List<ResultBinding> l = ResultSetFormatter.toList(rs); + for (ResultBinding resultBinding : l) { + + s.add(new StringTuple(resultBinding.get(a).toString(), + resultBinding.get(b).toString())); + } + return s; + } + +} Copied: trunk/src/dl-learner/org/dllearner/kb/extraction/TypedSparqlQueryInterface.java (from rev 895, trunk/src/dl-learner/org/dllearner/kb/sparql/TypedSparqlQueryInterface.java) =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/extraction/TypedSparqlQueryInterface.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/kb/extraction/TypedSparqlQueryInterface.java 2008-05-19 10:20:14 UTC (rev 897) @@ -0,0 +1,39 @@ +/** + * Copyright (C) 2007, Sebastian Hellmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ +package org.dllearner.kb.extraction; + +import java.net.URI; +import java.util.Set; + +import org.dllearner.utilities.StringTuple; + +/** + * + * Typed SPARQL query interface. The typing means that they all have the same + * input and the same output: They are fn: resource -> ( a | b ) where a + * normally is a predicate and b an object + * + * @author Sebastian Hellmann + * + */ +public interface TypedSparqlQueryInterface { + + public Set<StringTuple> getTupelForResource(URI u); +} Deleted: trunk/src/dl-learner/org/dllearner/kb/sparql/ExtractionAlgorithm.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/sparql/ExtractionAlgorithm.java 2008-05-19 10:14:48 UTC (rev 896) +++ trunk/src/dl-learner/org/dllearner/kb/sparql/ExtractionAlgorithm.java 2008-05-19 10:20:14 UTC (rev 897) @@ -1,181 +0,0 @@ -/** - * Copyright (C) 2007, Sebastian Hellmann - * - * This file is part of DL-Learner. - * - * DL-Learner is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3 of the License, or - * (at your option) any later version. - * - * DL-Learner is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - */ -package org.dllearner.kb.sparql; - -import java.net.URI; -import java.util.HashSet; -import java.util.Vector; - -import org.apache.log4j.Logger; -import org.dllearner.core.KnowledgeSource; -import org.dllearner.kb.sparql.configuration.Configuration; -import org.dllearner.kb.sparql.datastructure.ClassNode; -import org.dllearner.kb.sparql.datastructure.InstanceNode; -import org.dllearner.kb.sparql.datastructure.Node; - -/** - * This class is used to extract the information . - * - * @author Sebastian Hellmann - */ -public class ExtractionAlgorithm { - - private Configuration configuration; - private Manipulator manipulator; - private int recursionDepth = 1; - // private boolean getAllSuperClasses = true; - // private boolean closeAfterRecursion = true; - private static Logger logger = Logger - .getLogger(KnowledgeSource.class); - - public ExtractionAlgorithm(Configuration Configuration) { - this.configuration = Configuration; - this.manipulator = Configuration.getManipulator(); - this.recursionDepth = Configuration.getRecursiondepth(); - // this.getAllSuperClasses = Configuration.isGetAllSuperClasses(); - // this.closeAfterRecursion=Configuration.isCloseAfterRecursion(); - } - - public Node getFirstNode(URI u) { - return new InstanceNode(u); - } - - public Vector<Node> expandAll(URI[] u, TypedSparqlQuery tsp) { - Vector<Node> v = new Vector<Node>(); - for (URI one : u) { - v.add(expandNode(one, tsp)); - } - return v; - } - - /** - * most important function expands one example cave: the recursion is not a - * recursion anymore, it was transformed to an iteration - * - * @param uri - * @param typedSparqlQuery - * @return - */ - public Node expandNode(URI uri, TypedSparqlQuery typedSparqlQuery) { - //System.out.println(uri.toString()); - //System.out.println(manipulator); - //System.out.println(this.configuration); - long time = System.currentTimeMillis(); - Node n = getFirstNode(uri); - System.out.println(n); - Vector<Node> v = new Vector<Node>(); - v.add(n); - logger.info("StartVector: " + v); - // n.expand(tsp, this.Manipulator); - // Vector<Node> second= - for (int x = 1; x <= recursionDepth; x++) { - - Vector<Node> tmp = new Vector<Node>(); - while (v.size() > 0) { - Node tmpNode = v.remove(0); - logger.info("Expanding " + tmpNode); - // System.out.println(this.Manipulator); - // these are the new not expanded nodes - // the others are saved in connection with the original node - Vector<Node> tmpVec = tmpNode.expand(typedSparqlQuery, - manipulator); - //System.out.println(tmpVec); - tmp.addAll(tmpVec); - } - v = tmp; - logger.info("Recursion counter: " + x + " with " + v.size() - + " Nodes remaining, needed: " - + (System.currentTimeMillis() - time) + "ms"); - time = System.currentTimeMillis(); - } - - HashSet<String> hadAlready = new HashSet<String>(); - - //p(configuration.toString()); - // gets All Class Nodes and expands them further - if (this.configuration.isGetAllSuperClasses()) { - logger.info("Get all superclasses"); - // Set<Node> classes = new TreeSet<Node>(); - Vector<Node> classes = new Vector<Node>(); - - Vector<Node> instances = new Vector<Node>(); - for (Node one : v) { - if (one instanceof ClassNode) { - classes.add(one); - } - if (one instanceof InstanceNode) { - instances.add(one); - } - - } - // System.out.println(instances.size()); - TypedSparqlQueryClasses tsqc = new TypedSparqlQueryClasses( - configuration); - if (this.configuration.isCloseAfterRecursion()) { - while (instances.size() > 0) { - logger.trace("Getting classes for remaining instances: " - + instances.size()); - Node next = instances.remove(0); - logger.trace("Getting classes for: " + next); - classes.addAll(next.expand(tsqc, manipulator)); - if (classes.size() >= manipulator.breakSuperClassRetrievalAfter) { - break; - } - } - } - Vector<Node> tmp = new Vector<Node>(); - int i = 0; - while (classes.size() > 0) { - logger.trace("Remaining classes: " + classes.size()); - // Iterator<Node> it=classes.iterator(); - // Node next =(Node) it.next(); - // classes.remove(next); - Node next = classes.remove(0); - - if (!hadAlready.contains(next.getURI().toString())) { - logger.trace("Getting SuperClass for: " + next); - // System.out.println(hadAlready.size()); - hadAlready.add(next.getURI().toString()); - tmp = next.expand(typedSparqlQuery, manipulator); - classes.addAll(tmp); - tmp = new Vector<Node>(); - // if(i % 50==0)System.out.println("got "+i+" extra classes, - // max: "+manipulator.breakSuperClassRetrievalAfter); - i++; - if (i >= manipulator.breakSuperClassRetrievalAfter) { - break; - } - } - // System.out.println("Skipping"); - - // if - // (classes.size()>=manipulator.breakSuperClassRetrievalAfter){break;} - - } - // System.out.println((System.currentTimeMillis()-time)+""); - - } - return n; - - } - - - -} Deleted: trunk/src/dl-learner/org/dllearner/kb/sparql/Manager.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/sparql/Manager.java 2008-05-19 10:14:48 UTC (rev 896) +++ trunk/src/dl-learner/org/dllearner/kb/sparql/Manager.java 2008-05-19 10:20:14 UTC (rev 897) @@ -1,157 +0,0 @@ -/** - * Copyright (C) 2007, Sebastian Hellmann - * - * This file is part of DL-Learner. - * - * DL-Learner is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3 of the License, or - * (at your option) any later version. - * - * DL-Learner is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - */ -package org.dllearner.kb.sparql; - -import java.net.URI; -import java.util.Set; -import java.util.SortedSet; -import java.util.TreeSet; - -import org.apache.log4j.Logger; -import org.dllearner.core.KnowledgeSource; -import org.dllearner.kb.sparql.configuration.Configuration; -import org.dllearner.kb.sparql.configuration.SparqlEndpoint; -import org.dllearner.kb.sparql.configuration.SparqlQueryType; -import org.dllearner.kb.sparql.datastructure.Node; -import org.dllearner.utilities.Statistics; - -/** - * An object of this class encapsulates everything. - * - * @author Sebastian Hellmann - * - */ -public class Manager { - - private Configuration configuration; - private TypedSparqlQuery typedSparqlQuery; - private ExtractionAlgorithm extractionAlgorithm; - - private static Logger logger = Logger - .getLogger(KnowledgeSource.class); - - - public void useConfiguration(SparqlQueryType SparqlQueryType, - SparqlEndpoint SparqlEndpoint, Manipulator manipulator, - int recursiondepth, boolean getAllSuperClasses, - boolean closeAfterRecursion, String cacheDir) { - - this.configuration = new Configuration(SparqlEndpoint, SparqlQueryType, - manipulator, recursiondepth, getAllSuperClasses, - closeAfterRecursion, cacheDir); - //System.out.println(this.configuration); - this.typedSparqlQuery = new TypedSparqlQuery(configuration); - this.extractionAlgorithm = new ExtractionAlgorithm(configuration); - - } - - public String extract(URI uri) { - // this.TypedSparqlQuery.query(uri); - // System.out.println(ExtractionAlgorithm.getFirstNode(uri)); - System.out.println("Start extracting"); - - Node n = extractionAlgorithm.expandNode(uri, typedSparqlQuery); - Set<String> s = n.toNTriple(); - String nt = ""; - for (String str : s) { - nt += str + "\n"; - } - return nt; - } - - public String extract(Set<String> instances) { - // this.TypedSparqlQuery.query(uri); - // System.out.println(ExtractionAlgorithm.getFirstNode(uri)); - System.out.println("Start extracting"); - SortedSet<String> ret = new TreeSet<String>(); - int progress=0; - for (String one : instances) { - progress++; - logger.info("Progress: "+progress+" of "+instances.size()+" finished"); - try { - Node n = extractionAlgorithm.expandNode(new URI(one), - typedSparqlQuery); - ret.addAll(n.toNTriple()); - } catch (Exc... [truncated message content] |