From: <seb...@us...> - 2011-12-07 16:20:49
|
Revision: 3486 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3486&view=rev Author: sebastianwtr Date: 2011-12-07 16:20:39 +0000 (Wed, 07 Dec 2011) Log Message: ----------- [tbsl exploration] Split Interation1 into two different cases and did the "simple" case . Modified the SQLite functions. And changed other functions to get it all running. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/GetRessourcePropertys.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/SparqlObject.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/mySQLDictionary.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/GetRessourcePropertys.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/GetRessourcePropertys.java 2011-12-07 16:15:38 UTC (rev 3485) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/GetRessourcePropertys.java 2011-12-07 16:20:39 UTC (rev 3486) @@ -9,34 +9,62 @@ import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; import org.dllearner.algorithm.tbsl.exploration.sax.MySaxParser; public class GetRessourcePropertys { - public ArrayList<String> getPropertys(String element) throws IOException{ - try{ - sendServerPropertyRequest(element); - return do_parsing("answer_property"); - } catch (Exception e){ - return null; - } + public HashMap<String,String> getPropertys(String element, String side) throws IOException{ + + return sendServerPropertyRequest(element,side); + } + /** * Get an uri and saves the properties of this resource * @param vergleich + * @return * @throws IOException */ - private void sendServerPropertyRequest(String vergleich) throws IOException{ + private HashMap<String,String> sendServerPropertyRequest(String vergleich, String side) throws IOException{ + /* + * + * For the second Iteration, I can just add the sparql property here. + */ - String bla123 = vergleich; + /* + * + * SELECT DISTINCT ?p WHERE {<http://dbpedia.org/resource/Berlin> ?y ?p.} für Berlin links der Property + * PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> SELECT DISTINCT ?s ?p WHERE {<http://dbpedia.org/resource/Berlin> ?p ?y. ?p rdfs:label ?s.} + * + * SELECT DISTINCT ?p WHERE {?y ?p <http://dbpedia.org/resource/Berlin>.} für Berlin rechts der Property + * PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> SELECT DISTINCT ?s ?p WHERE {?y ?p <http://dbpedia.org/resource/Berlin>. ?p rdfs:label ?s.} + * http://greententacle.techfak.uni-bielefeld.de:5171/sparql?default-graph-uri=&query=PREFIX+rdfs%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2000%2F01%2Frdf-schema%23%3E+SELECT+DISTINCT+%3Fs+%3Fp+WHERE+{%3Fy+%3Fp+%3Chttp%3A%2F%2Fdbpedia.org%2Fresource%2FBerlin%3E.+%3Fp+rdfs%3Alabel+%3Fs.}&format=text%2Fhtml&debug=on&timeout= + */ + + String vergleichorig = vergleich; + /*String bla123 = vergleich; //to get only the name bla123=bla123.replace("http://dbpedia.org/resource/Category:",""); bla123=bla123.replace("http://dbpedia.org/resource/",""); - vergleich=bla123; - String tmp="http://greententacle.techfak.uni-bielefeld.de:5171/sparql?default-graph-uri=&query=PREFIX+rdfs%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2000%2F01%2Frdf-schema%23%3E%0D%0APREFIX+res%3A+%3Chttp%3A%2F%2Fdbpedia.org%2Fresource%2F%3E%0D%0A%0D%0ASELECT+DISTINCT+%3Fp+%3Fl+WHERE++{%0D%0A+{+res%3A"+vergleich+"+%3Fp+%3Fo+.+}%0D%0A+UNION%0D%0A+{+%3Fs+%3Fp+res%3A"+vergleich+"+.+}%0D%0A+{+%3Fp+rdfs%3Alabel+%3Fl+.+}%0D%0A}%0D%0A&format=text%2Fhtml&debug=on&timeout="; + vergleich=bla123;*/ + + String tmp_left="http://greententacle.techfak.uni-bielefeld.de:5171/sparql?default-graph-uri=&query="+createServerRequest("PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> SELECT DISTINCT ?s ?p WHERE {?y ?p <"+vergleichorig+">. ?p rdfs:label ?s.}")+"%0D%0A&format=text%2Fhtml&debug=on&timeout="; + //System.out.println("property right!!! : " +tmp_right); + String tmp_right="http://greententacle.techfak.uni-bielefeld.de:5171/sparql?default-graph-uri=&query="+createServerRequest("PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> SELECT DISTINCT ?s ?p WHERE {<"+vergleichorig+"> ?p ?y. ?p rdfs:label ?s.}")+"%0D%0A&format=text%2Fhtml&debug=on&timeout="; + + String verarbeitungsstring=null; + if(side.contains("RIGHT")) verarbeitungsstring=tmp_right; + if(side.contains("LEFT")) verarbeitungsstring=tmp_left; + + //just in case..... + if(!side.contains("LEFT") && !side.contains("RIGHT")) verarbeitungsstring=tmp_left; + + //String verarbeitungsstring="http://greententacle.techfak.uni-bielefeld.de:5171/sparql?default-graph-uri=&query=PREFIX+rdfs%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2000%2F01%2Frdf-schema%23%3E%0D%0APREFIX+res%3A+%3Chttp%3A%2F%2Fdbpedia.org%2Fresource%2F%3E%0D%0A%0D%0ASELECT+DISTINCT+%3Fp+%3Fl+WHERE++{%0D%0A+{+res%3A"+vergleich+"+%3Fp+%3Fo+.+}%0D%0A+UNION%0D%0A+{+%3Fs+%3Fp+res%3A"+vergleich+"+.+}%0D%0A+{+%3Fp+rdfs%3Alabel+%3Fl+.+}%0D%0A}%0D%0A&format=text%2Fhtml&debug=on&timeout="; URL url; InputStream is; InputStreamReader isr; @@ -45,7 +73,7 @@ String result=""; try { - url = new URL(tmp); + url = new URL(verarbeitungsstring); is = url.openStream(); isr = new InputStreamReader(is); r = new BufferedReader(isr); @@ -60,9 +88,29 @@ System.out.println("Can not connect"); } - FileWriter w = new FileWriter("answer_property"); + /* FileWriter w = new FileWriter("answer_property"); w.write(result); w.close(); + */ + + HashMap<String,String> hm = new HashMap(); + result=result.replace("<th>s</th>",""); + result=result.replace("<th>p</th>",""); + result=result.replace("<table class=\"sparql\" border=\"1\">",""); + result=result.replace("<tr>",""); + result=result.replace("</tr>",""); + result=result.replace("\n", ""); + result=result.replace(" ", ""); + result=result.replaceFirst("<td>", ""); + + String[] tmp_array=result.split("</td><td>"); + + for(int i =1; i<=tmp_array.length-2;i=i+2) { + hm.put(tmp_array[i-1].toLowerCase(), tmp_array[i]); + //System.out.println(tmp_array[i-1].toLowerCase() + " " +tmp_array[i]); + } + + return hm; } @@ -85,4 +133,47 @@ return indexObject; } + + + + private String createServerRequest(String query){ + String anfrage=null; + anfrage=removeSpecialKeys(query); + anfrage=anfrage.replace("<","<"); + anfrage=anfrage.replace("%gt;",">"); + anfrage=anfrage.replace("&","&"); + //anfrage=anfrage.replaceAll("#>","%23%3E%0D%0A%"); + anfrage=anfrage.replace("#","%23"); + anfrage=anfrage.replace(" ","+"); + anfrage=anfrage.replace("/","%2F"); + anfrage=anfrage.replace(":","%3A"); + anfrage=anfrage.replace("?","%3F"); + anfrage=anfrage.replace("$","%24"); + //anfrage=anfrage.replaceAll("F>+","F%3E%0D%0A"); + anfrage=anfrage.replace(">","%3E"); + anfrage=anfrage.replace("<","%3C"); + anfrage=anfrage.replace("\"","%22"); + anfrage=anfrage.replace("\n","%0D%0A%09"); + anfrage=anfrage.replace("%%0D%0A%09","%09"); + anfrage=anfrage.replace("=","%3D"); + anfrage=anfrage.replace("@","%40"); + anfrage=anfrage.replace("&","%26"); + anfrage=anfrage.replace("(","%28"); + anfrage=anfrage.replace(")","%29"); + anfrage=anfrage.replace("%3E%0D%0A%25","%3E"); + //anfrage=anfrage.replaceAll("\n",".%0D%0A%09"); + return anfrage; + } + + private String removeSpecialKeys(String query){ + query=query.replace("\\",""); + //query=query.replaceAll("\a",""); + query=query.replace("\b",""); + query=query.replace("\f",""); + query=query.replace("\r",""); + query=query.replace("\t",""); + // query=query.replaceAll("\v",""); + return query; + } + } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/SparqlObject.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/SparqlObject.java 2011-12-07 16:15:38 UTC (rev 3485) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/SparqlObject.java 2011-12-07 16:20:39 UTC (rev 3486) @@ -15,9 +15,13 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Map.Entry; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import java.util.Set; import net.didion.jwnl.JWNLException; +import net.didion.jwnl.data.POS; import org.dllearner.algorithm.tbsl.exploration.sax.ParseXmlHtml; import org.dllearner.algorithm.tbsl.nlp.WordNet; @@ -57,18 +61,15 @@ //Konstruktor public SparqlObject() throws MalformedURLException, ClassNotFoundException, SQLException{ - wordnet = new WordNet(); - //hm=hm_new; - /*hm=ParseXmlHtml.parse_xml("/home/swalter/workspace/qaldEntity2",hm); - hm=ParseXmlHtml.parse_xml("/home/swalter/workspace/qaldEntity1",hm);*/ + this.wordnet = new WordNet(); System.out.println("Loading SPARQL Templator"); - btemplator = new BasicTemplator(); - templator = new Templator(); + this.btemplator = new BasicTemplator(); + this.templator = new Templator(); System.out.println("Loading SPARQL Templator Done\n"); - System.out.println("Start Indexing Wikipedia URI's"); - myindex = new mySQLDictionary(); + System.out.println("Start Indexing"); + this.myindex = new mySQLDictionary(); - System.out.println("Done:Indexing Wikipedia URI's"); + System.out.println("Done:Indexing"); setExplorationdepthwordnet(1); //eigentlich immer mit 0 initialisieren setIterationdepth(1); @@ -128,17 +129,23 @@ System.out.println("The Questionparsing took "+ (endParsingTime-startParsingTime)+ " ms"); ArrayList<String> final_answer = new ArrayList<String>(); - //if(!lstquery.isEmpty()){ + if(lstquery.isEmpty()){ + saveNotParsedQuestions(question); + } //for each querry + //TODO: Add function that no qery is send to the server, if querylist==null for(ArrayList<String> querylist : lstquery){ - /* - * ################################################################################################# - */ - //only testfunction to save the generated queries in the tmp-folder + boolean startIterating=true; String query=""; - query=querylist.get(0).toString(); - if(getIterationdepth()==-1){ + if(querylist.get(0).contains("ERROR"))startIterating=false; + else query=querylist.get(0).toString(); + + //TODO: Somewhere is an error, because sometimes there is an double _ a __ and thats not allowed. + //fixing it now with an replace of "__" to "" + query=query.replace("__", ""); + + if(getIterationdepth()==-1&&startIterating==true){ String tmp = new String(); String s = null; BufferedReader in = null; @@ -181,7 +188,7 @@ * ################################################################################################# */ //Iteration 0 - if(getIterationdepth()==0){ + if(getIterationdepth()==0&&startIterating==true){ String tmp = new String(); String s = null; BufferedReader in = null; @@ -208,170 +215,26 @@ } String answer; answer=sendServerQuestionRequest(query); - final_answer.add(answer); - /*System.out.println(query); - if (query=="" || query==" "||query.length()==0) answer="Could not parse"; - System.out.println("Antwort: " + answer); - String out=tmp + "\n" + "Question: "+question + "\n"+"Query: " + query +"\n Anwer: "+answer+"\n\n##############################"; - - BufferedWriter outfile = new BufferedWriter( - new OutputStreamWriter( - new FileOutputStream( "/tmp/answer.txt" ) ) ); - - outfile.write(out); - outfile.close(); */ + final_answer.add("Begin:\n"+query +"\n"+answer+" \n End"); + } /* * ################################################################################################# */ //Iterration 1 - if(getIterationdepth()==1){ + if(getIterationdepth()==1&&startIterating==true){ - //asking server - String answer; - answer=sendServerQuestionRequest(query); - System.out.println(query); - //if Emty answer, get properties an look up the right property with levensthein - if(answer.contains("EmtyAnswer")){ - //TODO: get all information from the query - //TODO: maybe put the query + information in an array list of arraylist. each arraylist contains the query, the variables and the uris. Then iterate over the List and get the query for sending to server - String rescource=""; - - //get the resource of the query. always the last Item in the array! - //Funktioniert! - String resource_tmp=""; - int tmp_length=querylist.size(); - resource_tmp=querylist.get(tmp_length-1); - String[] array_tmp = resource_tmp.split(":"); - rescource=array_tmp[1]; - - - //the property we are looking for is always the second last in the array! - //Funktioniert! - String property_to_compare_with=""; - tmp_length=querylist.size(); - //second last - property_to_compare_with=querylist.get(tmp_length-2); - array_tmp = property_to_compare_with.split(":"); - property_to_compare_with=array_tmp[1]; - //System.out.println("property_to_compare_with: "+property_to_compare_with); - - - //contains uri AND string, every second is the string - //Funktioniert - ArrayList<String> properties = new ArrayList<String>(); - GetRessourcePropertys property = new GetRessourcePropertys(); - Boolean goOnAfterProperty = true; - try { - //using uri now, not the string - //properties=property.getPropertys(hm.get(rescource.toLowerCase())); - properties=property.getPropertys(getUriFromIndex(rescource.toLowerCase(),0)); - if (properties==null){ - final_answer.add("Error in getting Properties\n"); - goOnAfterProperty=false; - } - //System.out.println(properties); - } catch (IOException e) { - // TODO Auto-generated catch block - //e.printStackTrace(); - final_answer.add("Error in getting Properties\n"); - goOnAfterProperty=false; - - } - if(goOnAfterProperty==true){ - //property_to_compare_with mit der Liste der propertys vergleichen, und wenn der normalisierte Wert >= LvenstheinMin ist, einbauen und neue query erzeugen. - Levenshtein levensthein = new Levenshtein(); - ArrayList<String> new_queries= new ArrayList<String>(); - for(int i =1; i<=properties.size()-2;i=i+2){ - //double tmp=levensthein.nld(property_to_compare_with.toLowerCase(), properties.get(i).toLowerCase()); - double tmp=levensthein.computeLevenshteinDistance(property_to_compare_with.toLowerCase(), properties.get(i).toLowerCase()); - //create new query - //System.out.println(tmp); - //if(tmp>=LvenstheinMin){ - if(tmp<=3.0){ - //System.out.println(tmp); - //alte property uri mit neuer ersetzen: - String query_tmp=query; - //query_tmp=query_tmp.replace(hm.get(property_to_compare_with.toLowerCase()),properties.get(i-1)); - query_tmp=query_tmp.replace(getUriFromIndex(property_to_compare_with.toLowerCase(),1),properties.get(i-1)); - //System.out.println("hm.get(property_to_compare_with.toLowerCase(): " + hm.get(property_to_compare_with.toLowerCase())); - new_queries.add(query_tmp); - } - - } - - System.out.println("Start Iterating Wordnet with "+property_to_compare_with+" and deept of "+explorationdepthwordnet); - ArrayList<String> semantics=new ArrayList<String>(); - ArrayList<String> tmp_semantics=new ArrayList<String>(); - ArrayList<String> result_SemanticsMatchProperties=new ArrayList<String>(); - semantics.add(property_to_compare_with); - tmp_semantics=semantics; - Boolean goOnAfterWordnet = true; - for(int i=0;i<=explorationdepthwordnet;i++){ - - try { - tmp_semantics=getSemantics(tmp_semantics); - if (tmp_semantics==null){ - goOnAfterWordnet=false; - final_answer.add("Error in searching Wordnet\n"); - } - else{ - //each word only one time - for(String k : tmp_semantics){ - if(!semantics.contains(k)) semantics.add(k); - } - } - - } catch (IOException e) { - // TODO Auto-generated catch block - //e.printStackTrace(); - goOnAfterWordnet=false; - final_answer.add("Error in searching Wordnet\n"); - - } - - } - - if(goOnAfterWordnet==true){ - // ArrayList<String> new_queries= new ArrayList<String>(); - - //TODO: Try, if it works, if you use only one loop: (b.lowerCase).contains(properties.get(h)) - for(int h=1;h<properties.size()-2;h=h+2){ - for(String b : semantics){ - //System.out.println(properties.get(h)); - //System.out.println(b); - if(properties.get(h).contains(b.toLowerCase())){ - if(!result_SemanticsMatchProperties.contains(properties.get(h))){ - //create new query - result_SemanticsMatchProperties.add(properties.get(h)); - String query_tmp=query; - - //query_tmp=query_tmp.replace(hm.get(property_to_compare_with.toLowerCase()),properties.get(h-1)); - query_tmp=query_tmp.replace(getUriFromIndex(property_to_compare_with.toLowerCase(),1),properties.get(h-1)); - //System.out.println("hm.get(property_to_compare_with.toLowerCase(): " + hm.get(property_to_compare_with.toLowerCase())); - new_queries.add(query_tmp); - } - } - } - } - - for(String bla : new_queries){ - String answer_tmp; - answer_tmp=sendServerQuestionRequest(bla); - if(!answer_tmp.contains("EmtyAnswer")){ - final_answer.add(answer_tmp); - } - } - } - } - } + //4, because of query + three conditions for the simple case + if(querylist.size()==4)final_answer=simpleIteration1Case(querylist, query); + //if we have more conditions, we need to change the way of replacing the uris got from wordnet etc + } /* * ################################################################################################# */ //Iterration 2 - if(getIterationdepth()==2){ + if(getIterationdepth()==2&&startIterating==true){ } } @@ -403,9 +266,21 @@ String out=""; for(String answer : final_answer){ + //only answered question + // if(!answer.contains("Error in searching Wordnet with word") && !answer.contains("EmtyAnswer")&& !answer.contains("Error in getting Properties"))out=out+ "\n"+answer+"\n"; + + /* + //only questions with wordnet error + if(answer.contains("Error in searching Wordnet with word"))out=out+ "\n"+answer+"\n"; - out=out+ "\n"+answer+"\n"; - + //only questions with emty answers + if(answer.contains("EmtyAnswer"))out=out+ "\n"+answer+"\n"; +*/ + //only questions with Error in Properties + if(answer.contains("Error in getting Properties"))out=out+ "\n"+answer+"\n"; + + + } System.out.println(question); System.out.println(out); @@ -416,13 +291,227 @@ outfile.write(tmp+"\n"+question+" :\n"+out); outfile.close(); } - - // string=string.replaceAll("?", ""); - //create_Sparql_query_old(string); - - // } + + /** + * Is the function for the Case, you are in Iteration one and have only one triple of condition (s,p,o). + * @param querylist + * @param query + * @return a list with answers from the Server + * @throws SQLException + * @throws JWNLException + */ + private ArrayList<String> simpleIteration1Case(ArrayList<String> querylist, String query) throws SQLException, + JWNLException { + //asking server + String answer; + ArrayList<String> final_answer=new ArrayList<String>(); + + /* + * First try the original query on the server. If that doesnt work, try it with Iteration + */ + answer=sendServerQuestionRequest(query); + if(answer.contains("EmtyAnswer")){ + + String resource=""; + String property_to_compare_with=""; + String sideOfProperty="LEFT"; + + + int tmpcounter=0; + for(String s : querylist){ + //we dont need the first one, because thats the query itself + tmpcounter=tmpcounter+1; + if(tmpcounter>=1){ + if(s.contains("LEFT")){ + sideOfProperty="LEFT"; + resource=s.replace("LEFT",""); + } + if(s.contains("RIGHT")){ + sideOfProperty="RIGHT"; + resource=s.replace("RIGHT",""); + } + if(s.contains("PROPERTY")){ + property_to_compare_with=s.replace("PROPERTY",""); + } + + } + } + System.out.println("Property to compare:: "+ property_to_compare_with); + System.out.println("Resource: "+ resource); + //contains uri AND string, every second is the string + HashMap<String,String> properties = new HashMap<String, String>(); + GetRessourcePropertys property = new GetRessourcePropertys(); + Boolean goOnAfterProperty = true; + try { + /* + * TODO: Have to check now, if we need a right Property or a left one + */ + properties=property.getPropertys(getUriFromIndex(resource.toLowerCase(),0),sideOfProperty); + if (properties==null){ + //final_answer.add("Error in getting Properties\n"); + + final_answer.add("Begin:\n"+query +"\nError in getting Properties \n End"); + goOnAfterProperty=false; + } + //System.out.println(properties); + } catch (IOException e) { + // TODO Auto-generated catch block + //e.printStackTrace(); + + final_answer.add("Begin:\n"+query +"\nError in getting Properties \n End"); + goOnAfterProperty=false; + + } + if(goOnAfterProperty==true){ + //property_to_compare_with mit der Liste der propertys vergleichen, und wenn der normalisierte Wert >= LvenstheinMin ist, einbauen und neue query erzeugen. + ArrayList<String> new_queries= new ArrayList<String>(); + for (Entry<String, String> entry : properties.entrySet()) { + String key = entry.getKey(); + String value = entry.getValue(); + double tmp=Levenshtein.computeLevenshteinDistance(property_to_compare_with.toLowerCase(), key); + + /* + * TODO: Implement Normalised levensthein + */ + if(tmp<=3.0){ + //alte property uri mit neuer ersetzen: + String query_tmp=query; + String test = getUriFromIndex(property_to_compare_with.toLowerCase(),1); + //query_tmp=query_tmp.replace(test,properties.get(i-1)); + query_tmp=query_tmp.replace(test,value); + new_queries.add(query_tmp); + } + + } + + System.out.println("Start Iterating Wordnet with "+property_to_compare_with+" and deept of "+explorationdepthwordnet); + ArrayList<String> semantics=new ArrayList<String>(); + ArrayList<String> tmp_semantics=new ArrayList<String>(); + ArrayList<String> result_SemanticsMatchProperties=new ArrayList<String>(); + semantics.add(property_to_compare_with); + + //first check, if there is a singular form in the wordnet dictionary.. eg children -> child + String _temp_=myindex.getWordnetHelp(property_to_compare_with); + if(_temp_==null){ + tmp_semantics=semantics; + } + else{ + semantics.clear(); + semantics.add(_temp_); + tmp_semantics=semantics; + } + Boolean goOnAfterWordnet = true; + for(int i=0;i<=explorationdepthwordnet;i++){ + + try { + tmp_semantics=getSemantics(tmp_semantics); + if (tmp_semantics==null){ + goOnAfterWordnet=false; + final_answer.add("Begin:\n"+query +"\n Error in searching Wordnet with word "+semantics+" \n End"); + + } + else{ + //each word only one time + for(String k : tmp_semantics){ + if(!semantics.contains(k)) semantics.add(k); + } + } + + } catch (IOException e) { + // TODO Auto-generated catch block + //e.printStackTrace(); + goOnAfterWordnet=false; + final_answer.add("Begin:\n"+query +"\n Error in searching Wordnet with word "+semantics+" \n End"); + + } + + } + + if(goOnAfterWordnet==true){ + + for (Entry<String, String> entry : properties.entrySet()) { + String key = entry.getKey(); + String value = entry.getValue(); + + for(String b : semantics){ + if(key.contains(b.toLowerCase())){ + if(!result_SemanticsMatchProperties.contains(key)){ + //create new query + result_SemanticsMatchProperties.add(key); + String query_tmp=query; + String test = getUriFromIndex(property_to_compare_with.toLowerCase(),1); + query_tmp=query_tmp.replace(test,value); + System.out.println("New query after wordnet: "+ query_tmp); + new_queries.add(query_tmp); + } + } + } + } + + for(String bla : new_queries){ + String answer_tmp; + answer_tmp=sendServerQuestionRequest(bla); + System.out.println("Antwort vom Server: "+answer_tmp); + final_answer.add("Begin:\n"+bla +"\n"+answer_tmp+" \n End"); + } + } + } + } + + return final_answer; + } + + + /** + * Iterates thru the conditions and returns an array, where one can see, if the Property is left or right from the resource + * @param query + * @return returns an array, where one can see, if the Property is left or right from the resource + */ + private static ArrayList<String> createLeftAndRightPropertyArray(String query){ + query=query.replace(" ", " "); + Pattern p = Pattern.compile (".*\\{(.*\\<http.*)\\}.*"); + Matcher m = p.matcher (query); + ArrayList<String> lstquery = new ArrayList<String>(); + while (m.find()) { + String tmp= m.group(1); + tmp=tmp.replace("http://dbpedia.org/resource/","").replace("http://dbpedia.org/property/", "").replace("http://dbpedia.org/ontology/", ""); + + //split on . for sign for end of conditions + String[] firstArray=tmp.split("\\."); + for(String i : firstArray){ + + String[] secondArray=i.split(" "); + //always in three counts + int counter=0; + for(String j : secondArray){ + counter=counter+1; + //only one condition + if(secondArray.length%3==0){ + if(counter==1&&j.contains("<")){ + //position of Property is right + lstquery.add("RIGHT"+j.replace("<", "").replace(">","")); + } + else if(counter==3&&j.contains("<")){ + //position of Property is left + lstquery.add("RIGHT"+j.replace("<", "").replace(">","")); + } + else if(counter==2){ + lstquery.add("PROPERTY"+j.replace("<", "").replace(">","")); + } + + else if(j.contains("?")) lstquery.add("VARIABLE"); + } + if(counter==0)counter=0; + + + } + } + } + + return lstquery; + } /** * Method gets a String and takes the information from the templator to creat a Sparql query. @@ -435,11 +524,6 @@ Set<BasicQueryTemplate> querytemps = btemplator.buildBasicQueries(question); for (BasicQueryTemplate temp : querytemps) { - /*System.out.println("temp.getQt();" + temp.getQt()); - System.out.println("temp.getSelTerms();" + temp.getSelTerms()); - System.out.println("temp.getVariablesAsStringList();" + temp.getVariablesAsStringList()); - System.out.println("temp.getConditions();" + temp.getConditions()); - System.out.println("temp.getSlots();" + temp.getSlots());*/ ArrayList<String> lstquerynew = new ArrayList<String>(); ArrayList<String> lstquerupsidedown = new ArrayList<String>(); String query; @@ -454,7 +538,7 @@ for(SPARQL_Filter tmp : temp.getFilters()) filters=filters+tmp+" "; //System.out.println("\n"); System.out.println("\n"); - query="PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "+temp.getQt().toString()+" "+selTerms+" WHERE { "+ conditions.replace("--","") + "}"+filters; + query="PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "+temp.getQt().toString()+" "+selTerms+" WHERE {"+ conditions.replace("--","") + filters+"}"; String conditions_new = ""; for(Path condition: temp.getConditions()){ @@ -478,37 +562,46 @@ System.out.println("Conditions_new: " + conditions_new); - String query_upside_down = "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "+temp.getQt().toString()+" "+selTerms+" WHERE { "+ conditions_new.replace("--","") + "}"+filters; + String query_upside_down = "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "+temp.getQt().toString()+" "+selTerms+" WHERE {"+ conditions_new.replace("--","") +filters+ "}"; String[] slots= null; + int slotcounter=1; for(Slot slot : temp.getSlots()){ - String tmp= slot.toString(); - tmp= tmp.replace("UNSPEC",""); - tmp= tmp.replace("RESOURCE",""); - tmp= tmp.replace("{",""); - tmp= tmp.replace("}",""); - tmp=tmp.replace(" ",""); + //see below + slotcounter=slotcounter+1; + + + String slotstring=slot.toString().replace("UNSPEC","").replace("RESOURCE","").replace("{","").replace("}",""); + slotstring=slotstring.replace(" ",""); //System.out.println(tmp); //damit auch wirklich nur ?y und nicht ?y0 ersetzt wird, einfach nach "?y " suchen. - String[] array = tmp.split(":"); + String[] array = slotstring.split(":"); String replace; if(array[0].length()<2)replace = "?"+array[0]+" "; else replace="?"+array[0]; - //System.out.println("replace: " + replace); - //hier dann den hm wert von array[1] eintragen + - - //String hm_result=hm.get(array[1].toLowerCase()); - String hm_result=getUriFromIndex(array[1].toLowerCase(),0); + //TODO: Hotfix: get rid of " PROPERTY " + String _ThingGettingURIfor_=array[1]; + _ThingGettingURIfor_=_ThingGettingURIfor_.replace(" PROPERTY ","").toLowerCase(); + String hm_result=getUriFromIndex(_ThingGettingURIfor_,0); try { if(hm_result.contains("Category:")) hm_result=hm_result.replace("Category:",""); } catch ( Exception e ) { - //System.out.println( "Das war keine Zahl!" ); + } + /*always the middle slot is the property + * so count and always take the second of third to become a property + */ + if(slotcounter%2==0){ + hm_result=getUriFromIndex(_ThingGettingURIfor_,1); + } + //set back to 0 to start new + if(slotcounter==3) slotcounter=0; query=query.replace(replace, "<"+hm_result+">"); query_upside_down=query_upside_down.replace(replace, "<"+hm_result+">"); @@ -516,23 +609,27 @@ lstquerupsidedown.add(query_upside_down); lstquerynew.add(query); - //slots hinzufügen - for(Slot slot : temp.getSlots()){ - String tmp= slot.toString(); - tmp= tmp.replace("UNSPEC",""); - tmp= tmp.replace("RESOURCE",""); - tmp= tmp.replace("{",""); - tmp= tmp.replace("}",""); - tmp=tmp.replace(" ",""); - lstquerupsidedown.add(tmp); - lstquerynew.add(tmp); + + + ArrayList<String> lsttmp=createLeftAndRightPropertyArray(query); + //if its lower than three, we dont have any conditions and dont need to check it. + //also if the size%3 isnt 0, than something else is wrong and we dont need to test the query + if(lsttmp.size()>=3&&lsttmp.size()%3==0)for(String i : lsttmp) lstquerynew.add(i); + else{ + lstquerynew.clear(); + lstquerynew.add("ERROR"); } - //System.out.println("Query: "+query); - /*lstquery.add(query); - lstquery.add(query_upside_down);*/ + + lsttmp.clear(); + lsttmp=createLeftAndRightPropertyArray(query_upside_down); + if(lsttmp.size()>=3&&lsttmp.size()%3==0)for(String i : lsttmp) lstquerupsidedown.add(i); + else{ + lstquerupsidedown.clear(); + lstquerupsidedown.add("ERROR"); + } + lstquery.add(lstquerynew); lstquery.add(lstquerupsidedown); - } return lstquery; @@ -540,6 +637,41 @@ + private void saveNotParsedQuestions(String question) throws IOException{ + BufferedReader in = null; + + String tmp=""; + // Lies Textzeilen aus der Datei in einen Vector: + try { + in = new BufferedReader( + new InputStreamReader( + new FileInputStream( "/tmp/notParsedQuestions" ) ) ); + String s; + while( null != (s = in.readLine()) ) { + tmp=tmp+"\n"+s; + } + } catch( FileNotFoundException ex ) { + } catch( Exception ex ) { + System.out.println( ex ); + } finally { + if( in != null ) + try { + in.close(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + + String out=""; + BufferedWriter outfile = new BufferedWriter( + new OutputStreamWriter( + new FileOutputStream( "/tmp/notParsedQuestions" ) ) ); + + outfile.write(tmp+"\n"+question); + outfile.close(); + } + /** * * @param string @@ -549,27 +681,31 @@ */ private String getUriFromIndex(String string, int fall) throws SQLException{ String result=null; + //just to be sure its only 0 or 1 + if(fall!=0 && fall!=1) fall=0; if(fall==0){ - //result=hm.get(string.toLowerCase()); - //if(result==null)result=myindex.getURI(string); result=myindex.getResourceURI(string.toLowerCase()); if(result==null)result=myindex.getPropertyURI(string.toLowerCase()); } if(fall==1){ - /*result=hm.get(string.toLowerCase()); - if(result==null)result=myindex.getURI(string); - if(result==null)result="http://dbpedia.org/property/"+string.toLowerCase();*/ - //should be alway property an not resource - //result=result.replace("resource", "property"); result=myindex.getPropertyURI(string.toLowerCase()); if(result==null){ result=myindex.getResourceURI(string.toLowerCase()); - result=result.replace("resource", "property"); + if(result!=null) result=result.replace("resource", "property"); } } - - if(result==null) return "http://dbpedia.org/property/"+string.toLowerCase(); + String tmp=""; + tmp=string.toLowerCase(); + tmp=tmp.replace("property",""); + tmp=tmp.replace(" ", "_"); + if(result==null) { + if(fall==1)return "http://dbpedia.org/property/"+tmp; + if(fall==0)return "http://dbpedia.org/resource/"+tmp; + else{ + return result; + } + } else return result; } @@ -578,19 +714,98 @@ private static ArrayList<String> getSemantics (ArrayList<String> semantics) throws IOException, JWNLException { ArrayList<String> result = new ArrayList<String>(); + //result.clear(); + //try{ try{ for(String id :semantics){ - List<String> array =wordnet.getRelatedNouns(id); - for(String i:array){ - if(!result.contains(i))result.add(i); + List<String> array_relatedNouns=null; + List<String> array_bestsynonyms=null; + List<String> array_siterterms=null; + //array.clear(); + System.out.println("Wordnet Word: "+id); + array_relatedNouns =wordnet.getRelatedNouns(id); + + array_bestsynonyms=wordnet.getBestSynonyms(POS.NOUN, id); + + array_siterterms=wordnet.getSisterTerms(POS.NOUN, id); + + if(array_relatedNouns!=null){ + for(String i:array_relatedNouns){ + if(!result.contains(i))result.add(i); + } } + if(array_bestsynonyms!=null){ + for(String i:array_bestsynonyms){ + if(!result.contains(i))result.add(i); + } + } + if(array_siterterms!=null){ + for(String i:array_siterterms){ + if(!result.contains(i))result.add(i); + } + } + } - return result; - } catch (Exception e) { - return null; } + catch(Exception e){ + if(result.isEmpty()) return null; + } + + if(!result.isEmpty()) return result; + else{ + //System.out.println("Didnt find ") + /*this is the case, if the first time nothing was found. + * but sometimes wordnet doesnt find anything e.g. die place... bzt you have also die and place + * so we try to find the seperate words and test them as well + */ + try{ + for(String id :semantics){ + String[] tmp_array=id.split(" "); + if(tmp_array.length>=2){ + for(String tmp : tmp_array){ + List<String> array_relatedNouns=null; + List<String> array_bestsynonyms=null; + List<String> array_siterterms=null; + //array.clear(); + //System.out.println("Wordnet Word: "+tmp); + array_relatedNouns =wordnet.getRelatedNouns(tmp); + + array_bestsynonyms=wordnet.getBestSynonyms(POS.NOUN, tmp); + + array_siterterms=wordnet.getSisterTerms(POS.NOUN, tmp); + + if(array_relatedNouns!=null){ + for(String i:array_relatedNouns){ + if(!result.contains(i))result.add(i); + } + } + if(array_bestsynonyms!=null){ + for(String i:array_bestsynonyms){ + if(!result.contains(i))result.add(i); + } + } + if(array_siterterms!=null){ + for(String i:array_siterterms){ + if(!result.contains(i))result.add(i); + } + } + + } + } + + } + } + catch(Exception e){ + if(result.isEmpty()) return null; + } + + } + + if(!result.isEmpty()) return result; + else return null; + // else{ return result;} } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/mySQLDictionary.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/mySQLDictionary.java 2011-12-07 16:15:38 UTC (rev 3485) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/mySQLDictionary.java 2011-12-07 16:20:39 UTC (rev 3486) @@ -21,56 +21,13 @@ conn = DriverManager.getConnection("jdbc:sqlite::memory:"); createIndexPropertys(); createIndexResource(); + createWordnetHelp(); //optional!! //createIndexWikipedia(); } -private String createSimpleHashKey(String string){ - string = string.replace("!",""); - string = string.replace(":",""); - string = string.replace("/",""); - string = string.replace("\\",""); - string = string.replace("?",""); - string = string.replace(":",""); - - string = string.replace("a","1"); - string = string.replace("b","2"); - string = string.replace("c","3"); - string = string.replace("d","4"); - string = string.replace("e","5"); - string = string.replace("f","6"); - string = string.replace("g","7"); - string = string.replace("h","8"); - string = string.replace("i","9"); - string = string.replace("j","10"); - string = string.replace("k","11"); - string = string.replace("l","12"); - string = string.replace("m","13"); - string = string.replace("n","14"); - string = string.replace("o","15"); - string = string.replace("p","16"); - string = string.replace("q","17"); - string = string.replace("r","18"); - string = string.replace("s","19"); - string = string.replace("t","20"); - string = string.replace("u","21"); - string = string.replace("v","22"); - string = string.replace("w","23"); - string = string.replace("x","24"); - string = string.replace("y","25"); - string = string.replace("z","26"); - string = string.replace("ä","0"); - string = string.replace("ö","0"); - string = string.replace("ü","0"); - string = string.replace("?","0"); - string = string.replace(" ","0"); - return string; - - -} - public String getResourceURI(String string) throws SQLException{ Statement stat = conn.createStatement(); ResultSet rs; @@ -115,7 +72,81 @@ } + public String getWordnetHelp(String string) throws SQLException{ + Statement stat = conn.createStatement(); + ResultSet rs; + try { + rs = stat.executeQuery("select singular from wordnet where plural='"+string.toLowerCase()+"';"); + return rs.getString("singular"); + } catch (Exception e) { + // TODO Auto-generated catch block + //e.printStackTrace(); + return null; + } + + } + + private void createWordnetHelp() throws SQLException{ /*System.out.println("Start SQL test"); + Class.forName( "org.sqlite.JDBC" ); + conn = DriverManager.getConnection("jdbc:sqlite::memory:");*/ + System.out.println("start generating Wordnet Help-Function"); + Statement stat = conn.createStatement(); + stat.executeUpdate("drop table if exists wordnet;"); + stat.executeUpdate("create table wordnet (plural, singular);"); + PreparedStatement prep = conn.prepareStatement("insert into wordnet values (?, ?);"); + BufferedReader in=null; + // conn.setAutoCommit(false); + int zaehler=0; + try { + in = new BufferedReader( + new InputStreamReader( + new FileInputStream( "/home/swalter/workspace/noun.exc" ) ) ); + String s; + while( null != (s = in.readLine()) ) { + String[] tmp_array =s.split(" "); + if(tmp_array.length>=2){ + prep.setString(1, tmp_array[0]); + prep.setString(2, tmp_array[1]); + String temp=""; + if(tmp_array.length>2){ + for(int i =1;i<tmp_array.length;i++){ + temp=temp+tmp_array[i]+" "; + } + prep.setString(2, temp); + } + prep.addBatch(); + zaehler=zaehler+1; + //if(zaehler%10000==0) System.out.println(zaehler); + if(zaehler%10000==0){ + conn.setAutoCommit(false); + prep.executeBatch(); + conn.setAutoCommit(false); + System.out.println("done"); + } + + } + } + } catch( FileNotFoundException ex ) { + } catch( Exception ex ) { + System.out.println( ex ); + } finally { + if( in != null ) + try { + in.close(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + + conn.setAutoCommit(false); + prep.executeBatch(); + conn.setAutoCommit(true); + System.out.println("Done"); + + } + private void createIndexWikipedia() throws ClassNotFoundException, SQLException{ /*System.out.println("Start SQL test"); Class.forName( "org.sqlite.JDBC" ); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |