From: <ku...@us...> - 2007-08-29 08:32:48
|
Revision: 99 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=99&view=rev Author: kurzum Date: 2007-08-29 01:32:41 -0700 (Wed, 29 Aug 2007) Log Message: ----------- sparqlmodule sourcecode Added Paths: ----------- trunk/src/dl-learner/org/dllearner/modules/sparql/ trunk/src/dl-learner/org/dllearner/modules/sparql/Cache.java trunk/src/dl-learner/org/dllearner/modules/sparql/OntologyCollector.java trunk/src/dl-learner/org/dllearner/modules/sparql/PartialOntology.java trunk/src/dl-learner/org/dllearner/modules/sparql/QueryMaker.java trunk/src/dl-learner/org/dllearner/modules/sparql/SimpleHTTPRequest.java trunk/src/dl-learner/org/dllearner/modules/sparql/SparqlFilter.java trunk/src/dl-learner/org/dllearner/modules/sparql/SparqlModule.java trunk/src/dl-learner/org/dllearner/modules/sparql/Util.java Added: trunk/src/dl-learner/org/dllearner/modules/sparql/Cache.java =================================================================== --- trunk/src/dl-learner/org/dllearner/modules/sparql/Cache.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/modules/sparql/Cache.java 2007-08-29 08:32:41 UTC (rev 99) @@ -0,0 +1,115 @@ +package org.dllearner.modules.sparql; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.io.Serializable; +import java.net.URLEncoder; + +public class Cache implements Serializable{ + // Object can be the cache itself + // or a cache object(one entry) + + final static long serialVersionUID=104; + transient String basedir=""; + transient String fileending=".cache"; + long timestamp; + String content=""; + long daysoffreshness=15; + long multiplier=24*60*60*1000;//h m s ms + String sparqlquery=""; + + //constructor for the cache itself + public Cache(String path){ + this.basedir=path+File.separator; + if(!new File(path).exists()) + {System.out.println(new File(path).mkdir());;} + + } + +// constructor for single cache object(one entry) + public Cache(String c, String sparql){ + this.content=c; + this.sparqlquery=sparql; + this.timestamp=System.currentTimeMillis(); + } + + + public String get(String key, String sparql){ + //System.out.println("get From "+key); + String ret=null; + try{ + Cache c =readFromFile(makeFilename(key)) ; + if(c==null)return null; + //System.out.println(" file found"); + if(!c.checkFreshness())return null; + //System.out.println("fresh"); + if(!c.validate(sparql))return null; + //System.out.println("valid"); + ret=c.content; + }catch (Exception e) {e.printStackTrace();} + return ret; + }; + public void put(String key, String content, String sparql){ + //System.out.println("put into "+key); + Cache c=new Cache(content,sparql); + putIntoFile(makeFilename(key), c); + } + + + String makeFilename(String key){ + String ret=""; + try{ + ret=basedir+URLEncoder.encode(key, "UTF-8")+fileending; + }catch (Exception e) {e.printStackTrace();} + return ret; + } + boolean checkFreshness(){ + if((System.currentTimeMillis()-this.timestamp)<=(daysoffreshness*multiplier)) + //fresh + return true; + else return false; + } + boolean validate(String sparql){ + if(this.sparqlquery.equals(sparql)) + //valid + return true; + else return false; + } + + public void checkFile(String Filename){ + if(!new File(Filename).exists()){ + try{ + new File(Filename).createNewFile(); + }catch (Exception e) {e.printStackTrace();} + + } + + } + + public void putIntoFile(String Filename,Cache content){ + try{ + //FileWriter fw=new FileWriter(new File(Filename),true); + FileOutputStream fos = new FileOutputStream( Filename , false ); + ObjectOutputStream o = new ObjectOutputStream( fos ); + o.writeObject( content ); + fos.flush(); + fos.close(); + }catch (Exception e) {System.out.println("Not in cache creating: "+Filename);} + } + + public Cache readFromFile(String Filename){ + Cache content=null; + try{ + FileInputStream fos = new FileInputStream( Filename ); + ObjectInputStream o = new ObjectInputStream( fos ); + content=(Cache)o.readObject(); + //FileReader fr=new FileReader(new File(Filename,"r")); + //BufferedReader br=new BufferedReader(fr); + }catch (Exception e) {} + return content; + + } +} Added: trunk/src/dl-learner/org/dllearner/modules/sparql/OntologyCollector.java =================================================================== --- trunk/src/dl-learner/org/dllearner/modules/sparql/OntologyCollector.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/modules/sparql/OntologyCollector.java 2007-08-29 08:32:41 UTC (rev 99) @@ -0,0 +1,262 @@ +package org.dllearner.modules.sparql; + +import java.net.InetAddress; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.Iterator; + +public class OntologyCollector { + + boolean print_flag=false; + SimpleHTTPRequest s; + QueryMaker q; + Cache c; + InetAddress ia; + SparqlFilter sf; + String[] subjectList; + int numberOfRecursions; + HashSet<String> properties; + HashSet<String> classes; + HashSet<String> instances; + HashSet<String> triples; + + String subclass="http://www.w3.org/2000/01/rdf-schema#subClassOf"; + String type="http://www.w3.org/1999/02/22-rdf-syntax-ns#type"; + String objectProperty="http://www.w3.org/2002/07/owl#ObjectProperty"; + String classns="http://www.w3.org/2002/07/owl#Class"; + String thing="http://www.w3.org/2002/07/owl#Thing"; + + + String[] defaultClasses={ + "http://dbpedia.org/class/yago", + "http://dbpedia.org/resource/Category:", + "http://dbpedia.org/resource/Template:", + "http://www.w3.org/2004/02/skos/core", + "http://dbpedia.org/class/"}; //TODO FEHLER hier fehlt yago + + + public OntologyCollector(String[] subjectList,int numberOfRecursions, + int filterMode, String[] FilterPredList,String[] FilterObjList,String[] defClasses){ + this.subjectList=subjectList; + this.numberOfRecursions=numberOfRecursions; + + this.s=new SimpleHTTPRequest(); + this.q=new QueryMaker(); + this.c=new Cache("cache"); + if(defClasses!=null && defClasses.length>0 ){ + this.defaultClasses=defClasses; + } + + try{ + this.sf=new SparqlFilter(filterMode,FilterPredList,FilterObjList); + this.ia=InetAddress.getByName("dbpedia.openlinksw.com"); + //this.fw=new FileWriter(new File(System.currentTimeMillis()+".nt"),true); + this.properties=new HashSet<String>(); + this.classes=new HashSet<String>(); + this.instances=new HashSet<String>(); + this.triples=new HashSet<String>(); + //this.all=new HashSet<String>(); + }catch (Exception e) {e.printStackTrace();} + + } + public String collectOntology(){ + getRecursiveList(subjectList, numberOfRecursions); + finalize(); + String ret=""; + for (Iterator<String> iter = triples.iterator(); iter.hasNext();) { + ret += iter.next(); + + } + return ret; + } + + public void getRecursiveList(String[] subjects,int NumberofRecursions){ + for (int i = 0; i < subjects.length; i++) { + getRecursive(subjects[i], NumberofRecursions); + + } + + } + + public void getRecursive(String StartingSubject,int NumberofRecursions){ + System.out.print("SparqlModul: Depth: "+NumberofRecursions+" @ "+StartingSubject+" "); + if(NumberofRecursions<=0) + { return; + } + else {NumberofRecursions--;} + //System.out.println(NumberofRecursions); + try{ + + String sparql=q.makeQueryFilter(StartingSubject,this.sf); + p(sparql); + p("*******************"); + String FromCache=c.get(StartingSubject, sparql); + String xml; + if(FromCache==null){ + xml=s.sendAndReceive(ia, 8890, sparql); + c.put(StartingSubject, xml, sparql); + System.out.print("\n"); + } + else{ + xml=FromCache; + System.out.println("FROM CACHE"); + } + p(xml); + p("***********************"); + String[] newSubjects=processResult(StartingSubject,xml); + + for (int i = 0; (i < newSubjects.length)&& NumberofRecursions!=0; i++) { + getRecursive(newSubjects[i], NumberofRecursions); + } + + //System.out.println(xml); + }catch (Exception e) {e.printStackTrace();} + + } + + public String[] processResult(String subject,String xml){ + //TODO if result is empty, catch exceptions + String one="<binding name=\"predicate\"><uri>"; + String two="<binding name=\"object\"><uri>"; + String end="</uri></binding>"; + String predtmp=""; + String objtmp=""; + ArrayList<String> al=new ArrayList<String>(); + + while(xml.indexOf(one)!=-1){ + //get pred + xml=xml.substring(xml.indexOf(one)+one.length()); + predtmp=xml.substring(0,xml.indexOf(end)); + //getobj + xml=xml.substring(xml.indexOf(two)+two.length()); + objtmp=xml.substring(0,xml.indexOf(end)); + + + processTriples(subject, predtmp, objtmp,al); + //System.out.println(al.size()); + + } + + Object[] o=al.toArray(); + String[] ret=new String[o.length]; + for (int i = 0; i < o.length; i++) { + ret[i]=(String)o[i]; + } + return ret; + //return (String[])al.toArray(); + //System.out.println(xml); + } + public void processTriples(String s,String p, String o,ArrayList<String> al){ + String t="/Category"; + if(s.equals(t) || o.equals(t))return ; + + if(sf.mode==2) + { + if( o.startsWith("http://dbpedia.org/resource/Category:") + && + !p.startsWith("http://www.w3.org/2004/02/skos/core") + ) + {return;} + if(p.equals("http://www.w3.org/2004/02/skos/core#broader")){ + p=subclass; + } + else if(p.equals("http://www.w3.org/2004/02/skos/core#subject")){ + p=type; + } + else {} + } + + //save for further processing + al.add(o); + + if(isClass(o)){ + classes.add(o); + if(isClass(s))p=subclass; + else p=type; + } + else { + instances.add(o); + this.properties.add(p); + } + + + + //maketriples + try{ + this.triples.add(makeTriples(s, p, o)); + //fw.write(makeTriples(subject, predtmp, objtmp)); + }catch (Exception e) {e.printStackTrace();} + + + return; + } +// also makes subclass property between classes + public String makeTriples(String s,String p, String o){ + //s=replaceNamespace(s); + //p=replaceNamespace(p); + //o=replaceNamespace(o); + String ret=""; + ret="<"+s+"> <"+p+"> <"+o+">.\n"; + return ret; + } + + public boolean isClass(String obj){ + + boolean retval=false; + for (String defclass : defaultClasses) { + if(obj.contains(defclass))retval=true; + } + return retval; + } + + + @Override + public void finalize(){ + typeProperties(); + typeClasses(); + typeInstances(); + } + + public void typeProperties(){ + String rdfns="http://www.w3.org/1999/02/22-rdf-syntax-ns"; + String owlns="http://www.w3.org/2002/07/owl"; + Iterator<String> it=properties.iterator(); + String current=""; + while (it.hasNext()){ + try{ + current=it.next(); + if(current.equals(subclass))continue; + if(current.contains(rdfns)||current.contains(owlns)){/*DO NOTHING*/} + else {this.triples.add(makeTriples(current,type,objectProperty));} + }catch (Exception e) {} + + } + } + public void typeClasses(){ + Iterator<String> it=classes.iterator(); + String current=""; + while (it.hasNext()){ + try{ + current=it.next(); + this.triples.add(makeTriples(current,type,classns)); + }catch (Exception e) {} + } + } + public void typeInstances(){ + Iterator<String> it=instances.iterator(); + String current=""; + while (it.hasNext()){ + try{ + current=it.next(); + this.triples.add(makeTriples(current,type,thing)); + }catch (Exception e) {} + } + } + + public void p(String s){ + if(print_flag) + System.out.println(s); + } + + +} Added: trunk/src/dl-learner/org/dllearner/modules/sparql/PartialOntology.java =================================================================== --- trunk/src/dl-learner/org/dllearner/modules/sparql/PartialOntology.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/modules/sparql/PartialOntology.java 2007-08-29 08:32:41 UTC (rev 99) @@ -0,0 +1,380 @@ +package org.dllearner.modules.sparql; + +import java.io.File; +import java.io.FileWriter; +import java.net.InetAddress; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.SortedSet; + +import org.dllearner.ConfigurationOption; +import org.dllearner.dl.AtomicConcept; +import org.dllearner.dl.Individual; +import org.dllearner.dl.KB; +import org.dllearner.modules.PreprocessingModule; + + +public class PartialOntology implements PreprocessingModule { + SimpleHTTPRequest s; + QueryMaker q; + Cache c; + InetAddress ia; + FileWriter fw; + HashSet<String> properties; + HashSet<String> classes; + HashSet<String> instances; + //HashSet<String> all;// remove after cache is here + String[] FilterPredList=null; + String[] FilterObjList=null; + + String[] defaultClasses={ + "http://dbpedia.org/class/yago", + "http://dbpedia.org/resource/Category:", + "http://www.w3.org/2004/02/skos/core", + "http://dbpedia.org/class/"}; //TODO FEHLER hier fehlt yago + + + public String getModuleName(){ + return "SparqlModule"; + } + + public void preprocess(KB kb, + Map<AtomicConcept, SortedSet<Individual>> positiveExamples, + Map<AtomicConcept, SortedSet<Individual>> negativeExamples, + List<ConfigurationOption> confOptions, + List<List<String>> functionCalls, String baseDir, + boolean useQueryMode) { + + String filename=System.currentTimeMillis()+".nt"; + ArrayList<String> al=new ArrayList<String>(); + //int numberOfRecursions=3; + String prefix=""; + al.add("import");al.add(filename);al.add( "N-TRIPLES"); + functionCalls.add(al); + al=new ArrayList<String>(); + + for (int i = 0; i < confOptions.size(); i++) { + if(confOptions.get(i).getOption().equals("hidePrefix")){ + prefix=confOptions.get(i).getStrValue(); + } + System.out.println(confOptions.get(i).getOption()); + if(confOptions.get(i).getOption().equals("gp")) + System.out.println(confOptions.get(i).getIntValue()+"AAAAAAAAAAAAAAAAAAAA"); + } + //System.out.println(confOptions); + //Iterator it=positiveExamples.keySet().iterator(); + addMapToArrayList(al,positiveExamples); + addMapToArrayList(al,negativeExamples); + String[] subjectList=new String[al.size()]; + Object[] o=al.toArray(); + for (int i = 0; i < subjectList.length; i++) { + subjectList[i]=prefix+(String)o[i]; + } + + + + + try{ + this.fw=new FileWriter(new File(baseDir+File.separator+filename),true); + + //this.getRecursiveList(subjectList,numberOfRecursions); + //this.finalize(); + + System.out.println("****Finished"); + + /*System.out.println(this.classes); + System.out.println(this.properties); + System.out.println(this.instances); + System.out.println();*/ + + /*for (String s : subjectList) { + //System.out.println("+test(\""+s+"\")."); + } + */ + this.fw.close(); + }catch (Exception e) {e.printStackTrace();} + + + //System.out.println(positiveExamples); + //System.out.println(functionCalls); + //System.out.println(confOptions); + +// System.out.println(baseDir); + + } + + void addMapToArrayList(ArrayList<String> al, Map<AtomicConcept, SortedSet<Individual>> m){ + Iterator<AtomicConcept> it=m.keySet().iterator(); + while(it.hasNext()){ + SortedSet<Individual> s=m.get(it.next()); + Iterator<Individual> inner =s.iterator(); + while(inner.hasNext()){ + al.add(inner.next().toString()); + } + } + + } + + + + /*public static void main(String[] args){ + try{ + int numberOfRecursions=1; + + /*String[] subjectList={ + "http://dbpedia.org/resource/Adolf_Hitler", + "http://dbpedia.org/resource/Prince_Chlodwig_zu_HohenloheSchillingsf%C3%BCrst", + "http://dbpedia.org/resource/Prince_Maximilian_of_Baden", + "http://dbpedia.org/resource/Franz_von_Papen", + "http://dbpedia.org/resource/Joseph_Goebbels", + "http://dbpedia.org/resource/Gerhard_Schr%C3%B6der", + "http://dbpedia.org/resource/Angela_Merkel", + "http://dbpedia.org/resource/Helmut_Kohl", + "http://dbpedia.org/resource/Helmut_Schmidt", + "http://dbpedia.org/resource/Ludwig_Erhard", + "http://dbpedia.org/resource/Willy_Brandt" + };*/ + + /*String[] subjectList=args; + + + + + + SparqlModule sm=new SparqlModule(); + + sm.getRecursiveList(subjectList,numberOfRecursions); + + + sm.finalize(); + + System.out.println("****Finished preprocessing"); + //System.out.println(sm.classes); + //System.out.println(sm.properties); + //System.out.println(sm.instances); + //System.out.println(); + + for (String s : subjectList) { + System.out.println("+test(\""+s+"\")."); + } + + sm.fw.close(); + }catch (Exception e) {e.printStackTrace();} + + }*/ + + + public PartialOntology() { + this.s=new SimpleHTTPRequest(); + this.q=new QueryMaker(); + this.c=new Cache("cache"); + try{ + this.ia=InetAddress.getByName("dbpedia.openlinksw.com"); + //this.fw=new FileWriter(new File(System.currentTimeMillis()+".nt"),true); + this.properties=new HashSet<String>(); + this.classes=new HashSet<String>(); + this.instances=new HashSet<String>(); + //this.all=new HashSet<String>(); + + }catch (Exception e) {e.printStackTrace();} + } + + + + public void getRecursiveList(String[] subjects,int NumberofRecursions){ + for (int i = 0; i < subjects.length; i++) { + getRecursive(subjects[i], NumberofRecursions); + + } + + } + + public void getRecursive(String StartingSubject,int NumberofRecursions){ + System.out.print("Tiefe: "+NumberofRecursions+" @ "+StartingSubject+" "); + if(NumberofRecursions<=0) + { return; + } + else {NumberofRecursions--;} + //System.out.println(NumberofRecursions); + try{ + + String sparql=q.makeQueryFilter(StartingSubject, new SparqlFilter(0,null,null)); + String FromCache=c.get(StartingSubject, sparql); + String xml; + if(FromCache==null){ + xml=s.sendAndReceive(ia, 8890, sparql); + c.put(StartingSubject, xml, sparql); + System.out.print("\n"); + } + else{ + xml=FromCache; + System.out.println("FROM CACHE"); + } + + String[] newSubjects=processResult(StartingSubject,xml); + + for (int i = 0; (i < newSubjects.length)&& NumberofRecursions!=0; i++) { + getRecursive(newSubjects[i], NumberofRecursions); + } + + //System.out.println(xml); + }catch (Exception e) {e.printStackTrace();} + + } + + public String[] processResult(String subject,String xml){ + //TODO if result is empty, catch exceptions + String one="<binding name=\"predicate\"><uri>"; + String two="<binding name=\"object\"><uri>"; + String end="</uri></binding>"; + String predtmp=""; + String objtmp=""; + ArrayList<String> al=new ArrayList<String>(); + + while(xml.indexOf(one)!=-1){ + //get pred + xml=xml.substring(xml.indexOf(one)+one.length()); + predtmp=xml.substring(0,xml.indexOf(end)); + //getobj + xml=xml.substring(xml.indexOf(two)+two.length()); + objtmp=xml.substring(0,xml.indexOf(end)); + + //save for further processing + al.add(objtmp); + this.properties.add(predtmp); + if(isClass(objtmp))classes.add(objtmp); + else instances.add(objtmp); + + //maketriples + try{ + fw.write(makeTriples(subject, predtmp, objtmp)); + }catch (Exception e) {e.printStackTrace();} + //System.out.println(predtmp); + //System.out.println(objtmp); + //xml=xml.substring(xml.indexOf(one)+one.length()); + } + + Object[] o=al.toArray(); + String[] ret=new String[o.length]; + for (int i = 0; i < o.length; i++) { + ret[i]=(String)o[i]; + } + return ret; + //return (String[])al.toArray(); + //System.out.println(xml); + } + + public String makeTriples(String s,String p, String o){ + //this.properties.add(p); + String subclass="http://www.w3.org/2000/01/rdf-schema#subClassOf"; + + String ret=""; + if(isClass(s))ret="<"+s+"> <"+subclass+"> <"+o+">.\n"; + else ret="<"+s+"> <"+p+"> <"+o+">.\n"; + + + return ret; + } + + public String makeTriplesNoSub(String s,String p, String o){ + //this.properties.add(p); + + + String ret=""; + ret="<"+s+"> <"+p+"> <"+o+">.\n"; + + + return ret; + } + + @Override + public void finalize(){ + typeProperties(); + typeClasses(); + typeInstances(); + } + + public void typeProperties(){ + String rdfns="http://www.w3.org/1999/02/22-rdf-syntax-ns"; + String owlns="http://www.w3.org/2002/07/owl"; + + Iterator<String> it=properties.iterator(); + String p="http://www.w3.org/1999/02/22-rdf-syntax-ns#type"; + String o="http://www.w3.org/2002/07/owl#ObjectProperty"; + + String current=""; + while (it.hasNext()){ + try{ + current=it.next(); + if(current.contains(rdfns)||current.contains(owlns)){/*DO NOTHING*/} + else {this.fw.write(makeTriples(current,p,o));} + }catch (Exception e) {} + + } + + + } + public void typeClasses(){ + Iterator<String> it=classes.iterator(); + String p="http://www.w3.org/1999/02/22-rdf-syntax-ns#type"; + String o="http://www.w3.org/2002/07/owl#Class"; + + String current=""; + while (it.hasNext()){ + try{ + current=it.next(); + this.fw.write(makeTriplesNoSub(current,p,o)); + }catch (Exception e) {} + + } + + + } + public void typeInstances(){ + Iterator<String>it=instances.iterator(); + String p="http://www.w3.org/1999/02/22-rdf-syntax-ns#type"; + //String o1="http://www.w3.org/2002/07/owl#Class"; + String o2="http://www.w3.org/2002/07/owl#Thing"; + String current=""; + while (it.hasNext()){ + try{ + current=it.next(); + //fw.write(makeTriples(current,p,o1)); + + this.fw.write(makeTriples(current,p,o2)); + }catch (Exception e) {} + } + + + } + + public boolean isClass(String obj){ + + boolean retval=false; + for (String defclass : defaultClasses) { + if(obj.contains(defclass))retval=true; + } + return retval; + } + + + + public void printHashSet(HashSet<String> h){ + Iterator<String> it=h.iterator(); + String current=""; + while (it.hasNext()){ + current=it.next(); + + if(current.contains("http://dbpedia.org/resource/"))System.out.println("test(\""+current+"\")."); + } + } + + + + + + +} Added: trunk/src/dl-learner/org/dllearner/modules/sparql/QueryMaker.java =================================================================== --- trunk/src/dl-learner/org/dllearner/modules/sparql/QueryMaker.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/modules/sparql/QueryMaker.java 2007-08-29 08:32:41 UTC (rev 99) @@ -0,0 +1,84 @@ +package org.dllearner.modules.sparql; + +public class QueryMaker { + //Good + /*public static String owl ="http://www.w3.org/2002/07/owl#"; + public static String xsd="http://www.w3.org/2001/XMLSchema#"; + public static String rdfs="http://www.w3.org/2000/01/rdf-schema#"; + public static String rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"; + public static String base="http://dbpedia.org/resource/"; + public static String dbpedia2="http://dbpedia.org/property/"; + public static String dbpedia="http://dbpedia.org/"; + + + //BAD + public static String skos="http://www.w3.org/2004/02/skos/core#"; + public static String foaf="http://xmlns.com/foaf/0.1/"; + public static String dc="http://purl.org/dc/elements/1.1/"; + public static String foreign="http://dbpedia.org/property/wikipage-"; + public static String sameAs="http://www.w3.org/2002/07/owl#sameAs"; + public static String reference="http://dbpedia.org/property/reference";*/ + + + + int tempyago=0; + + + + + public String makeQueryFilter(String subject, SparqlFilter sf){ + + + String Filter=""; + if(!sf.useLiterals)Filter+="!isLiteral(?object))"; + for (String p : sf.getPredFilter()) { + Filter+="\n" + filterPredicate(p); + } + for (String o : sf.getObjFilter()) { + Filter+="\n" + filterObject(o); + } + + + String ret= + "SELECT * WHERE { \n" + + "<"+ + subject+ + + "> ?predicate ?object.\n" + + "FILTER( \n" + + "(" +Filter+").}"; + //System.out.println(ret); + return ret; + } + + + /*public String makeQueryDefault(String subject){ + String ret= + "SELECT * WHERE { \n" + + "<"+ + subject+ + + "> ?predicate ?object.\n" + + "FILTER( \n" + + "(!isLiteral(?object))" + + "\n" + filterPredicate(skos)+ + //"\n" + filterObject(skos)+ + "\n" + filterPredicate(foaf)+ + "\n" + filterObject(foaf)+ + "\n" + filterPredicate(foreign)+ + "\n" + filterPredicate(sameAs)+ + "\n" + filterPredicate(reference)+ + ")." + + " }"; + + //System.out.println(ret); + return ret; +}*/ + + public String filterObject(String ns){ + return "&&( !regex((?object), '"+ns+"') )"; + } + public String filterPredicate(String ns){ + return "&&( !regex(str(?predicate), '"+ns+"') )"; + } +} Added: trunk/src/dl-learner/org/dllearner/modules/sparql/SimpleHTTPRequest.java =================================================================== --- trunk/src/dl-learner/org/dllearner/modules/sparql/SimpleHTTPRequest.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/modules/sparql/SimpleHTTPRequest.java 2007-08-29 08:32:41 UTC (rev 99) @@ -0,0 +1,98 @@ +package org.dllearner.modules.sparql; + +import java.io.BufferedInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.net.InetAddress; +import java.net.Socket; +import java.net.URLEncoder; + + + +public class SimpleHTTPRequest { + static final char value[]={13,10}; + static final String cut=new String(value); + + + + + public String sendAndReceive(InetAddress ia, int port, String sparql){ + String retval=""; + // + + byte resp[]=null; + + try{ + Socket SparqlServer=new Socket(ia,port); + String request=makeHeader(sparql); + // send request + (SparqlServer.getOutputStream()).write(request.getBytes()); + + //get Response + resp=readBuffer(new BufferedInputStream(SparqlServer.getInputStream())); + retval=new String(resp); + retval=subtractResponseHeader(retval); + //retval="||"+retval; + + SparqlServer.close(); + + + + } + catch(Exception e){e.printStackTrace();} + //System.out.println("got it"); + return retval; + + }//down + + public static byte[] readBuffer(InputStream IS) + throws IOException{ + byte buffer[] = new byte[0xffff]; + int nbytes=0; + byte resp[]=new byte[0]; + while ((nbytes=IS.read(buffer))!=-1) { + byte tmp[]=new byte[resp.length+nbytes]; + int i=0; + for (;i<resp.length;i++){ + tmp[i]=resp[i]; + } + for(int a=0;a<nbytes;a++,i++){ + tmp[i]=buffer[a]; + } + resp=tmp; + } + return resp; + } + + public String subtractResponseHeader(String in){ + //System.out.println(in.indexOf(cut+""+cut)); + return in.substring(in.indexOf(cut+""+cut)+4); + + + } + + public String makeHeader(String query){ + + + String RequestHeader=""; + try{ + + RequestHeader="GET /sparql?default-graph-uri=http%3A%2F%2Fdbpedia.org&query=" + + //"SELECT%20%2A%20WHERE%20%7B%20%3Chttp%3A%2F%2Fdbpedia.org%2Fresource%2FAristotle%3E%20%3Fa%20%3Fb%20%7D%20" + + URLEncoder.encode(query, "UTF-8")+ + //query+// URLencode + "&format=application%2Fsparql-results%2Bxml HTTP/1.1"+cut+ + "Host: dbpedia.openlinksw.com"+cut+ + "Connection: close"+cut+ + //"Accept-Encoding: gzip"+cut+ + "Accept: text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5"+cut+ + "Accept-Language: de-de,de;q=0.8,en-us;q=0.5,en;q=0.3"+cut+ + "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7"+cut+ + "User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.8.1.4) Gecko/20070515 Firefox/2.0.0.4 Web-Sniffer/1.0.24"+cut+ + cut; + }catch (Exception e) {e.printStackTrace();} + return RequestHeader; + + } + +} Added: trunk/src/dl-learner/org/dllearner/modules/sparql/SparqlFilter.java =================================================================== --- trunk/src/dl-learner/org/dllearner/modules/sparql/SparqlFilter.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/modules/sparql/SparqlFilter.java 2007-08-29 08:32:41 UTC (rev 99) @@ -0,0 +1,91 @@ +package org.dllearner.modules.sparql; + +public class SparqlFilter { + public int mode=0; + // 0 yago, 1 only cat, 2 skos+cat + String[] PredFilter=null; + String[] ObjFilter=null; + boolean useLiterals=false; + + + String[] yagoPredFilterDefault={ + "http://www.w3.org/2004/02/skos/core", + "http://xmlns.com/foaf/0.1/", + "http://dbpedia.org/property/wikipage-", + "http://www.w3.org/2002/07/owl#sameAs", + "http://dbpedia.org/property/reference" }; + String[] yagoObjFilterDefault={ + "http://dbpedia.org/resource/Category:Articles_", + "http://dbpedia.org/resource/Category:Wikipedia_", + "http://xmlns.com/foaf/0.1/", + "http://dbpedia.org/resource/Category", + "http://dbpedia.org/resource/Template", + "http://upload.wikimedia.org/wikipedia/commons"}; + + String[] onlyCatPredFilterDefault={ + "http://www.w3.org/2004/02/skos/core", + "http://xmlns.com/foaf/0.1/", + "http://dbpedia.org/property/wikipage-", + "http://www.w3.org/2002/07/owl#sameAs", + "http://dbpedia.org/property/reference" }; + String[] onlyCatObjFilterDefault={ + "http://dbpedia.org/resource/Category:Articles_", + "http://dbpedia.org/resource/Category:Wikipedia_", + "http://xmlns.com/foaf/0.1/", + "http://dbpedia.org/class/yago", + "http://dbpedia.org/resource/Template", + "http://upload.wikimedia.org/wikipedia/commons"}; + + String[] skosPredFilterDefault={ + "http://www.w3.org/2004/02/skos/core#narrower", + "http://xmlns.com/foaf/0.1/", + "http://dbpedia.org/property/wikipage-", + "http://www.w3.org/2002/07/owl#sameAs", + "http://dbpedia.org/property/reference" }; + String[] skosObjFilterDefault={ + "http://dbpedia.org/resource/Category:Articles_", + "http://dbpedia.org/resource/Category:Wikipedia_", + "http://xmlns.com/foaf/0.1/", + "http://dbpedia.org/class/yago", + "http://dbpedia.org/resource/Template", + "http://upload.wikimedia.org/wikipedia/commons"}; + + public SparqlFilter(int mode, String[] pred, String[] obj) { + if (mode==-1 && (pred==null || pred.length==0 || obj==null||obj.length==0)) + {mode=0;} + + switch (mode){ + case 0: //yago + ObjFilter=yagoObjFilterDefault; + PredFilter=yagoPredFilterDefault; + break; + case 1: // only Categories + ObjFilter=onlyCatObjFilterDefault; + PredFilter=onlyCatPredFilterDefault; + break; + case 2: + ObjFilter=skosObjFilterDefault; + PredFilter=skosPredFilterDefault; + break; + default: + ObjFilter=obj; + PredFilter=pred; + break; + + }} + public SparqlFilter(int mode, String[] pred, String[] obj,boolean uselits) throws Exception{ + this(mode, pred,obj); + this.useLiterals=uselits; + } + + public String[] getObjFilter(){ + return this.ObjFilter; + } + public String[] getPredFilter(){ + return this.PredFilter; + } + + + + } + Added: trunk/src/dl-learner/org/dllearner/modules/sparql/SparqlModule.java =================================================================== --- trunk/src/dl-learner/org/dllearner/modules/sparql/SparqlModule.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/modules/sparql/SparqlModule.java 2007-08-29 08:32:41 UTC (rev 99) @@ -0,0 +1,172 @@ +package org.dllearner.modules.sparql; + +import java.io.File; +import java.io.FileWriter; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.SortedSet; + +import org.dllearner.ConfigurationOption; +import org.dllearner.Main; +import org.dllearner.dl.AtomicConcept; +import org.dllearner.dl.Individual; +import org.dllearner.dl.KB; +import org.dllearner.modules.PreprocessingModule; + + +public class SparqlModule implements PreprocessingModule { + + FileWriter fw; + + //HashSet<String> all;// remove after cache is here + String[] FilterPredList=null; + String[] FilterObjList=null; + + + +public String getModuleName(){ + return "Sparql Module v0.3"; +} + + public void preprocess(KB kb, + Map<AtomicConcept, SortedSet<Individual>> positiveExamples, + Map<AtomicConcept, SortedSet<Individual>> negativeExamples, + List<ConfigurationOption> confOptions, + List<List<String>> functionCalls, String baseDir, + boolean useQueryMode) { + + + + //temporary file + String filename=System.currentTimeMillis()+".nt"; + + // add filename + ArrayList<String> al=new ArrayList<String>(); + al.add("import");al.add(filename);al.add( "N-TRIPLES"); + functionCalls.add(al); + + // get options hidePrefix and recursion + int numberOfRecursions=0; + String prefix=""; + int filterMode=-1; + Set<String> predList=null; + Set<String> objList=null; + Set<String> classList=null; + //boolean useLiterals=false; + + System.out.println("SparqlModul: Processing Options"); + Main.getConfMgr().addStringOption("preprocessingModule", new String[] {}); + + for (int i = 0; i < confOptions.size(); i++) { + if(confOptions.get(i).getOption().equals("hidePrefix")){ + prefix=confOptions.get(i).getStrValue(); + } + //sparqlModule options + if(confOptions.get(i).getOption().equals("sparqlModule")){ + if(confOptions.get(i).getSubOption().equals("numberOfRecursion")){ + numberOfRecursions=confOptions.get(i).getIntValue(); + Main.getConfMgr().addIntegerOption("sparqlModule.numberOfRecursion", new Integer[] { 1, 3 }); + } + if(confOptions.get(i).getSubOption().equals("filterMode")){ + filterMode=confOptions.get(i).getIntValue(); + Main.getConfMgr().addIntegerOption("sparqlModule.filterMode", new Integer[] { 0, 2 }); + } + if(confOptions.get(i).getSubOption().equals("sparqlPredicateFilterList")){ + predList=confOptions.get(i).getSetValues(); + Main.getConfMgr().addSetOption("sparqlModule.sparqlPredicateFilterList"); + } + if(confOptions.get(i).getSubOption().equals("sparqlObjectFilterList")){ + objList=confOptions.get(i).getSetValues(); + Main.getConfMgr().addSetOption("sparqlModule.sparqlObjectFilterList"); + } + if(confOptions.get(i).getSubOption().equals("classList")){ + classList=confOptions.get(i).getSetValues(); + Main.getConfMgr().addSetOption("sparqlModule.classList"); + } + if(confOptions.get(i).getSubOption().equals("useLiterals")){ + //useLiterals=confOptions.get(i).; + + } + } + }// end for + System.out.println("SparqlModul: Processing finished"); + // subject for which information is drafted from wikipedia + String[] subjectList=makeSubjectList(prefix, positiveExamples, negativeExamples); + + + try{ + this.fw=new FileWriter(new File(baseDir+File.separator+filename),true); + System.out.println("SparqlModul: Collecting Ontology"); + OntologyCollector oc=new OntologyCollector(subjectList, numberOfRecursions, + filterMode, Util.setToArray(predList),Util.setToArray( objList),Util.setToArray(classList)); + + String ont=oc.collectOntology(); + fw.write(ont); + fw.flush(); + //.getRecursiveList(subjectList,numberOfRecursions); + //type classes and properties + //this.finalize(); + + System.out.println("SparqlModul: ****Finished"); + //System.out.println(yago); + //System.out.println(subjectList.length); + //System.out.println(subjectList); + + + this.fw.close(); + //System.exit(0); + }catch (Exception e) {e.printStackTrace();} + + } + + + String[] makeSubjectList(String prefix, + Map<AtomicConcept, SortedSet<Individual>> positive, + Map<AtomicConcept, SortedSet<Individual>> negative){ + + //prefix + prefix=""; + + ArrayList<String> al=new ArrayList<String>(); + Iterator<AtomicConcept> it=positive.keySet().iterator(); + while(it.hasNext()){ + SortedSet<Individual> s=positive.get(it.next()); + Iterator<Individual> inner =s.iterator(); + while(inner.hasNext()){ + al.add(inner.next().toString()); + } + } + + it=negative.keySet().iterator(); + while(it.hasNext()){ + SortedSet<Individual> s=negative.get(it.next()); + Iterator<Individual> inner =s.iterator(); + while(inner.hasNext()){ + al.add(inner.next().toString()); + } + } + String[] subjectList=new String[al.size()]; + Object[] o=al.toArray(); + for (int i = 0; i < subjectList.length; i++) { + subjectList[i]=prefix+(String)o[i]; + } + return subjectList; + } + + public SparqlModule() { + + + } + + + + + + + + + +} Added: trunk/src/dl-learner/org/dllearner/modules/sparql/Util.java =================================================================== --- trunk/src/dl-learner/org/dllearner/modules/sparql/Util.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/modules/sparql/Util.java 2007-08-29 08:32:41 UTC (rev 99) @@ -0,0 +1,44 @@ +package org.dllearner.modules.sparql; + +import java.util.HashSet; +import java.util.Iterator; +import java.util.Set; + +public class Util { + + + public static String[] setToArray(Set<String> s){ + if(s==null)return null; + String[] ret=new String[s.size()]; + int i=0; + for (Iterator<String> iter = s.iterator(); iter.hasNext();) { + ret[i] = iter.next(); + i++; + + } + return ret; + + } + + public static String replaceNamespace(String s){ + s=s.replace("http://dbpedia.org/class/yago/", "yago:"); + s=s.replace("http://dbpedia.org/class/", "yago2:"); + s=s.replace("http://dbpedia.org/resource/Category:", "cat:"); + s=s.replace("http://dbpedia.org/resource/Template:", "temp:"); + s=s.replace("http://www.w3.org/2004/02/skos/core#", "skos:"); + + s=s.replace("http://dbpedia.org/property/", "prop:"); + //s=s.replace("http://dbpedia.org/resource/", "base:"); + return s; + } + + public static void printHashSet(HashSet<String> h){ + Iterator<String> it=h.iterator(); + String current=""; + while (it.hasNext()){ + current=it.next(); + + if(current.contains("http://dbpedia.org/resource/"))System.out.println("test(\""+current+"\")."); + } + } +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |