From: <ku...@us...> - 2007-12-05 16:16:06
|
Revision: 329 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=329&view=rev Author: kurzum Date: 2007-12-05 08:15:59 -0800 (Wed, 05 Dec 2007) Log Message: ----------- some minor changes, role domain learning doesn't really work, because the instances have to be chosen in a smarter way. Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/kb/SparqlEndpointRestructured.java trunk/src/dl-learner/org/dllearner/kb/sparql/ClassNode.java trunk/src/dl-learner/org/dllearner/kb/sparql/InstanceNode.java trunk/src/dl-learner/org/dllearner/kb/sparql/Manager.java trunk/src/dl-learner/org/dllearner/kb/sparql/PredefinedEndpoint.java trunk/src/dl-learner/org/dllearner/kb/sparql/PropertyNode.java Added Paths: ----------- trunk/examples/sparql/role_domain_placeOfBirth_yago.conf trunk/examples/sparql/role_range_placeOfBirth_yago.conf Added: trunk/examples/sparql/role_domain_placeOfBirth_yago.conf =================================================================== --- trunk/examples/sparql/role_domain_placeOfBirth_yago.conf (rev 0) +++ trunk/examples/sparql/role_domain_placeOfBirth_yago.conf 2007-12-05 16:15:59 UTC (rev 329) @@ -0,0 +1,58 @@ +/** + * + * solutions: + * + */ + + +// recursion depth of extraction algorithm +sparql2.recursionDepth = 1; + +// list of ignored roles +sparql2.predList = { + "http://www.w3.org/2004/02/skos/core", + "http://www.w3.org/2002/07/owl#sameAs", + "http://xmlns.com/foaf/0.1/", + "http://dbpedia.org/property/reference", + "http://dbpedia.org/property/website", + "http://dbpedia.org/property/wikipage" +}; + +// list of ignored objects +sparql2.objList = { + "http://dbpedia.org/resource/Category:Wikipedia_", + "http://dbpedia.org/resource/Category:Articles_", + "http://xmlns.com/foaf/0.1/", + "http://upload.wikimedia.org/wikipedia/commons", + "http://upload.wikimedia.org/wikipedia", + "http://www.geonames.org", + "http://www.w3.org/2006/03/wn/wn20/instances/synset", + "http://www4.wiwiss.fu-berlin.de/flickrwrappr", + "http://www.w3.org/2004/02/skos/core" +}; + +// use DBpedia endpoint +import("http://dbpedia.openlinksw.com:8890/sparql","SPARQL2"); + +// the set of objects as starting point for fragment selection +// (should be identical to the set of examples) +sparql2.instances = { + "http://dbpedia.org/resource/Angela_Merkel", + "http://dbpedia.org/resource/Gerhard_Schr%C3%B6der" +}; +sparql2.role = "http://dbpedia.org/property/placeOfBirth"; +sparql2.learnDomain=true; +sparql2.learnRange=false; + + +/** examples **/ ++"http://dbpedia.org/resource/Richard_Burton" ++"http://dbpedia.org/resource/Gregory_Bateson" ++"http://dbpedia.org/resource/Gennaro_Gattuso" ++"http://dbpedia.org/resource/Thomas_Bayes" ++"http://dbpedia.org/resource/W%C5%82adys%C5%82aw_Anders" +-"http://dbpedia.org/resource/Roman_Dacia" +-"http://dbpedia.org/resource/Petrovichi" +-"http://dbpedia.org/resource/Scotland" +-"http://dbpedia.org/resource/Lanuvium" +-"http://dbpedia.org/resource/Riga" \ No newline at end of file Added: trunk/examples/sparql/role_range_placeOfBirth_yago.conf =================================================================== --- trunk/examples/sparql/role_range_placeOfBirth_yago.conf (rev 0) +++ trunk/examples/sparql/role_range_placeOfBirth_yago.conf 2007-12-05 16:15:59 UTC (rev 329) @@ -0,0 +1,59 @@ +/** + * + * solutions: + *(http://dbpedia.org/class/yago/Isle109319456 OR http://dbpedia.org/class/yago/Location100027167) + */ + + +// recursion depth of extraction algorithm +sparql2.recursionDepth = 1; + +// list of ignored roles +sparql2.predList = { + "http://www.w3.org/2004/02/skos/core", + "http://www.w3.org/2002/07/owl#sameAs", + "http://xmlns.com/foaf/0.1/", + "http://dbpedia.org/property/reference", + "http://dbpedia.org/property/website", + "http://dbpedia.org/property/wikipage" +}; + +// list of ignored objects +sparql2.objList = { + "http://dbpedia.org/resource/Category:Wikipedia_", + "http://dbpedia.org/resource/Category:Articles_", + "http://xmlns.com/foaf/0.1/", + "http://upload.wikimedia.org/wikipedia/commons", + "http://upload.wikimedia.org/wikipedia", + "http://www.geonames.org", + "http://www.w3.org/2006/03/wn/wn20/instances/synset", + "http://www4.wiwiss.fu-berlin.de/flickrwrappr", + "http://www.w3.org/2004/02/skos/core" +}; + +// use DBpedia endpoint +import("http://dbpedia.openlinksw.com:8890/sparql","SPARQL2"); + +// the set of objects as starting point for fragment selection +// (should be identical to the set of examples) +sparql2.instances = { + "http://dbpedia.org/resource/Angela_Merkel", + "http://dbpedia.org/resource/Gerhard_Schr%C3%B6der" +}; + +sparql2.role = "http://dbpedia.org/property/placeOfBirth"; +sparql2.learnDomain=false; +sparql2.learnRange=true; + + +/** examples **/ ++"http://dbpedia.org/resource/Roman_Dacia" +//+"http://dbpedia.org/resource/Petrovichi" ++"http://dbpedia.org/resource/Scotland" ++"http://dbpedia.org/resource/Lanuvium" ++"http://dbpedia.org/resource/Riga" +-"http://dbpedia.org/resource/Richard_Burton" +-"http://dbpedia.org/resource/Gregory_Bateson" +-"http://dbpedia.org/resource/Gennaro_Gattuso" +-"http://dbpedia.org/resource/Thomas_Bayes" +-"http://dbpedia.org/resource/W%C5%82adys%C5%82aw_Anders" \ No newline at end of file Modified: trunk/src/dl-learner/org/dllearner/kb/SparqlEndpointRestructured.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/SparqlEndpointRestructured.java 2007-12-05 13:09:58 UTC (rev 328) +++ trunk/src/dl-learner/org/dllearner/kb/SparqlEndpointRestructured.java 2007-12-05 16:15:59 UTC (rev 329) @@ -26,11 +26,11 @@ import java.net.MalformedURLException; import java.net.URI; import java.net.URL; -import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedList; +import java.util.Random; import java.util.Set; import org.dllearner.core.KnowledgeSource; @@ -83,6 +83,7 @@ private int breakSuperClassRetrievalAfter = 500; private boolean learnDomain = false; + private boolean learnRange = false; private String role; private String blankNodeIdentifier = "bnode"; @@ -160,7 +161,8 @@ options.add(new BooleanConfigOption("getAllSuperClasses", "If true then all superclasses are retrieved until the most general class (owl:Thing) is reached.", true)); options.add(new BooleanConfigOption("learnDomain", "learns the Domain for a Role")); - options.add(new StringConfigOption("role", "role to learn Domain from")); + options.add(new BooleanConfigOption("learnRange", "learns the Range for a Role")); + options.add(new StringConfigOption("role", "role to learn Domain/Range from")); options.add(new StringConfigOption("blankNodeIdentifier", "used to identify blanknodes in Tripels")); @@ -214,6 +216,8 @@ getAllSuperClasses = (Boolean) entry.getValue(); } else if (option.equals("learnDomain")) { learnDomain = (Boolean) entry.getValue(); + }else if (option.equals("learnRange")) { + learnRange = (Boolean) entry.getValue(); } else if (option.equals("role")) { role = (String) entry.getValue(); } else if (option.equals("blankNodeIdentifier")) { @@ -260,32 +264,76 @@ } // get Options for Filters - System.out.println("aaa"+predefinedFilter); + if (predefinedFilter >= 1) { sqt = PredefinedFilter.getFilter(predefinedFilter); } else { sqt = new SparqlQueryType("forbid", objList, predList, useLits + ""); - System.out.println(sqt); + } // give everything to the manager m.useConfiguration(sqt, sse, man, recursionDepth, getAllSuperClasses); try { String ont = ""; // used to learn a domain of a role - if (learnDomain) { - instances = m.getDomainInstancesForRole(role); + if (learnDomain || learnRange) { + Set<String> pos=new HashSet<String>(); + Set<String> neg=new HashSet<String>(); + if(learnDomain){ + pos = m.getDomainInstancesForRole(role); + neg = m.getRangeInstancesForRole(role); + }else if(learnRange){ + neg = m.getDomainInstancesForRole(role); + pos = m.getRangeInstancesForRole(role); + } + //choose 30 + + + Set<String> tmp=new HashSet<String>(); + for(String one:pos){ + tmp.add(one); + if(tmp.size()>=5)break; + } + pos=tmp; + System.out.println(pos.size()); + + tmp=new HashSet<String>(); + for(String one:neg){ + tmp.add(one); + if(tmp.size()>=5)break; + } + neg=tmp; + + instances=new HashSet<String>(); + instances.addAll( pos); + + instances.addAll(neg); + + for(String one:pos){ + System.out.println("+\""+one+"\""); + } + for(String one:neg){ + System.out.println("-\""+one+"\""); + } + + /*Random r= new Random(); + + + Object[] arr=instances.toArray(); + while(instances.size()>=30){ + + }*/ // add the role to the filter(a solution is always EXISTS // role.TOP) m.addPredicateFilter(role); //System.out.println(instances); // THIS is a workaround - for(String one:instances){ - System.out.println("+\""+one+"\""); - } + } // the actual extraction is started here ont = m.extract(instances); + System.out.println("Finished collecting Fragment"); if (dumpToFile) { String filename = System.currentTimeMillis() + ".nt"; Modified: trunk/src/dl-learner/org/dllearner/kb/sparql/ClassNode.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/sparql/ClassNode.java 2007-12-05 13:09:58 UTC (rev 328) +++ trunk/src/dl-learner/org/dllearner/kb/sparql/ClassNode.java 2007-12-05 16:15:59 UTC (rev 329) @@ -23,6 +23,8 @@ import java.util.HashSet; import java.util.Iterator; import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; import java.util.Vector; import org.dllearner.utilities.StringTuple; Modified: trunk/src/dl-learner/org/dllearner/kb/sparql/InstanceNode.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/sparql/InstanceNode.java 2007-12-05 13:09:58 UTC (rev 328) +++ trunk/src/dl-learner/org/dllearner/kb/sparql/InstanceNode.java 2007-12-05 16:15:59 UTC (rev 329) @@ -23,6 +23,8 @@ import java.util.HashSet; import java.util.Iterator; import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; import java.util.Vector; import org.dllearner.utilities.StringTuple; Modified: trunk/src/dl-learner/org/dllearner/kb/sparql/Manager.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/sparql/Manager.java 2007-12-05 13:09:58 UTC (rev 328) +++ trunk/src/dl-learner/org/dllearner/kb/sparql/Manager.java 2007-12-05 16:15:59 UTC (rev 329) @@ -22,6 +22,8 @@ import java.net.URI; import java.util.HashSet; import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; import org.dllearner.utilities.StringTuple; @@ -53,6 +55,7 @@ Set<StringTuple> t = this.typedSparqlQuery.getTupelsForRole(u); Set<String> ret = new HashSet<String>(); for (StringTuple one : t) { + ret.add(one.a); } return ret; @@ -68,6 +71,7 @@ Set<StringTuple> t = this.typedSparqlQuery.getTupelsForRole(u); Set<String> ret = new HashSet<String>(); for (StringTuple one : t) { + ret.add(one.b); } return ret; @@ -91,7 +95,7 @@ // this.TypedSparqlQuery.query(uri); // System.out.println(ExtractionAlgorithm.getFirstNode(uri)); System.out.println("Start extracting"); - Set<String> ret = new HashSet<String>(); + SortedSet<String> ret = new TreeSet<String>(); for (String one : instances) { try { @@ -101,12 +105,26 @@ e.printStackTrace(); } } - - String nt = ""; - for (String str : ret) { + System.out.println("Finished extracting, start conversion"); + StringBuffer nt = new StringBuffer(); + Object[] arr=ret.toArray(); + for (int i = 0; i < arr.length; i++) { + nt.append((String) arr[i]+"\n"); + if(i%1000==0)System.out.println(i+" of "+arr.length+" triples done"); + } + /* + String tmp=""; + while ( ret.size() > 0) { + tmp=ret.first(); + nt+=tmp; + ret.remove(tmp); + System.out.println(ret.size()); + + } + /*for (String str : ret) { nt += str + "\n"; - } - return nt; + }*/ + return nt.toString(); } public void addPredicateFilter(String str) { Modified: trunk/src/dl-learner/org/dllearner/kb/sparql/PredefinedEndpoint.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/sparql/PredefinedEndpoint.java 2007-12-05 13:09:58 UTC (rev 328) +++ trunk/src/dl-learner/org/dllearner/kb/sparql/PredefinedEndpoint.java 2007-12-05 16:15:59 UTC (rev 329) @@ -76,6 +76,22 @@ } return new SpecificSparqlEndpoint(u, "www4.wiwiss.fu-berlin.de", m); } + + /* + * it only has 4 classes + public static SpecificSparqlEndpoint dblp() { + URL u = null; + HashMap<String, String> m = new HashMap<String, String>(); + // m.put("default-graph-uri", "http://dbpedia.org"); + // m.put("format", "application/sparql-results.xml"); + try { + u = new URL("http://www4.wiwiss.fu-berlin.de/dblp/sparql"); + } catch (Exception e) { + e.printStackTrace(); + } + return new SpecificSparqlEndpoint(u, "www4.wiwiss.fu-berlin.de", m); + } + */ public static SpecificSparqlEndpoint govTrack() { URL u = null; HashMap<String, String> m = new HashMap<String, String>(); @@ -88,5 +104,55 @@ } return new SpecificSparqlEndpoint(u, "www.rdfabout.com", m); } + public static SpecificSparqlEndpoint revyu() { + URL u = null; + HashMap<String, String> m = new HashMap<String, String>(); + // m.put("default-graph-uri", "http://dbpedia.org"); + // m.put("format", "application/sparql-results.xml"); + //http://revyu.com/sparql?query=SELECT DISTINCT * WHERE {[] a ?c} + try { + u = new URL("http://revyu.com/sparql"); + } catch (Exception e) { + e.printStackTrace(); + } + return new SpecificSparqlEndpoint(u, "revyu.com", m); + } + // returns strange xml + /*public static SpecificSparqlEndpoint dbtune() { + URL u = null; + HashMap<String, String> m = new HashMap<String, String>(); + // m.put("default-graph-uri", "http://dbpedia.org"); + // m.put("format", "application/sparql-results.xml"); + //http://dbtune.org:2020/sparql/?query=SELECT DISTINCT * WHERE {[] a ?c}Limit 10 + http://dbtune.org:2020/evaluateQuery?repository=default&serialization=rdfxml&queryLanguage=SPARQL&query=SELECT+DISTINCT+*+WHERE+%7B%5B%5D+a+%3Fc%7D + &resultFormat=xml + &resourceFormat=ns&entailment=none + http://dbtune.org:2020/evaluateQuery + ?repository=default&serialization=rdfxml&queryLanguage=SPARQL + &query=SELECT+DISTINCT+*+WHERE+%7B%5B%5D+a+%3Fc%7D + &resultFormat=xml + &resourceFormat=ns&entailment=none + try { + u = new URL("http://dbtune.org:2020/sparql/"); + } catch (Exception e) { + e.printStackTrace(); + } + return new SpecificSparqlEndpoint(u, "dbtune.org", m); + }*/ + + public static SpecificSparqlEndpoint myopenlink() { + URL u = null; + HashMap<String, String> m = new HashMap<String, String>(); + m.put("default-graph-uri", "http://myopenlink.net/dataspace"); + m.put("format", "application/sparql-results.xml"); + //http://myopenlink.net:8890/sparql/?query=select+distinct+%3FConcept+where+%7B%5B%5D+a+%3FConcept%7D + try { + u = new URL("http://myopenlink.net:8890/sparql/"); + } catch (Exception e) { + e.printStackTrace(); + } + return new SpecificSparqlEndpoint(u, "myopenlink.net", m); + } + } Modified: trunk/src/dl-learner/org/dllearner/kb/sparql/PropertyNode.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/sparql/PropertyNode.java 2007-12-05 13:09:58 UTC (rev 328) +++ trunk/src/dl-learner/org/dllearner/kb/sparql/PropertyNode.java 2007-12-05 16:15:59 UTC (rev 329) @@ -23,6 +23,8 @@ import java.util.HashSet; import java.util.Iterator; import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; import java.util.Vector; import org.dllearner.utilities.StringTuple; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |