From: <seb...@us...> - 2011-11-10 13:01:32
|
Revision: 3392 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3392&view=rev Author: sebastianwtr Date: 2011-11-10 13:01:22 +0000 (Thu, 10 Nov 2011) Log Message: ----------- [tbsl] fixed some errors and programmed iteration depth 1 Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/GetRessourcePropertys.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/SparqlObject.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/exploration_main.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/test_vergleich.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/sax/ParseXmlHtml.java Added Paths: ----------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/Levenshtein.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/GetRessourcePropertys.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/GetRessourcePropertys.java 2011-11-10 08:15:41 UTC (rev 3391) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/GetRessourcePropertys.java 2011-11-10 13:01:22 UTC (rev 3392) @@ -15,8 +15,12 @@ public class GetRessourcePropertys { public ArrayList<String> getPropertys(String element) throws IOException{ + try{ sendServerPropertyRequest(element); return do_parsing("answer_property"); + } catch (Exception e){ + return null; + } } Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/Levenshtein.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/Levenshtein.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/Levenshtein.java 2011-11-10 13:01:22 UTC (rev 3392) @@ -0,0 +1,88 @@ +package org.dllearner.algorithm.tbsl.exploration.Sparql; + + +import java.lang.Math; +import java.math.BigDecimal; + +public class Levenshtein { + + + //http://de.wikipedia.org/wiki/Levenshtein-Distanz + public double nld(String orig, String eing){ + //int result = diff(orig,eing); + int result = computeLevenshteinDistance(orig,eing); + int length=Math.max(orig.length(),eing.length()); + + + //if distance between both is zero, then the NLD must be one + if(result==0 ){ + return 1; + } + else{ + BigDecimal m = new BigDecimal(result); + BigDecimal n = new BigDecimal(length); + + BigDecimal c = new BigDecimal(0); + c=m.divide(n, 5, BigDecimal.ROUND_FLOOR); + + return c.doubleValue(); + } + + } + + public int diff(String orig, String eing) { + + int matrix[][] = new int[orig.length() + 1][eing.length() + 1]; + for (int i = 0; i < orig.length() + 1; i++) { + matrix[i][0] = i; + } + for (int i = 0; i < eing.length() + 1; i++) { + matrix[0][i] = i; + } + for (int a = 1; a < orig.length() + 1; a++) { + for (int b = 1; b < eing.length() + 1; b++) { + int right = 0; + if (orig.charAt(a - 1) != eing.charAt(b - 1)) { + right = 1; + } + int mini = matrix[a - 1][b] + 1; + if (matrix[a][b - 1] + 1 < mini) { + mini = matrix[a][b - 1] + 1; + } + if (matrix[a - 1][b - 1] + right < mini) { + mini = matrix[a - 1][b - 1] + right; + } + matrix[a][b] = mini; + } + } + + return matrix[orig.length()][eing.length()]; + } + + + //http://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance#Java + private static int minimum(int a, int b, int c) { + return Math.min(Math.min(a, b), c); + } + + public static int computeLevenshteinDistance(CharSequence str1, + CharSequence str2) { + int[][] distance = new int[str1.length() + 1][str2.length() + 1]; + + for (int i = 0; i <= str1.length(); i++) + distance[i][0] = i; + for (int j = 0; j <= str2.length(); j++) + distance[0][j] = j; + + for (int i = 1; i <= str1.length(); i++) + for (int j = 1; j <= str2.length(); j++) + distance[i][j] = minimum( + distance[i - 1][j] + 1, + distance[i][j - 1] + 1, + distance[i - 1][j - 1] + + ((str1.charAt(i - 1) == str2.charAt(j - 1)) ? 0 + : 1)); + + return distance[str1.length()][str2.length()]; + } +} Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/SparqlObject.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/SparqlObject.java 2011-11-10 08:15:41 UTC (rev 3391) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/SparqlObject.java 2011-11-10 13:01:22 UTC (rev 3392) @@ -17,6 +17,7 @@ import net.didion.jwnl.JWNLException; +import org.dllearner.algorithm.tbsl.exploration.sax.ParseXmlHtml; import org.dllearner.algorithm.tbsl.nlp.WordNet; import org.dllearner.algorithm.tbsl.sparql.BasicQueryTemplate; import org.dllearner.algorithm.tbsl.sparql.Path; @@ -36,22 +37,26 @@ static int explorationdepthwordnet=1; static int iterationdepth =0; static int numberofanswers=1; + static double LvenstheinMin = 0.95; static WordNet wordnet; BasicTemplator btemplator; Templator templator; - HashMap<String, String> hm; + private static HashMap<String, String> hm = new HashMap<String, String>(); //Konstruktor - public SparqlObject(HashMap<String, String> hm_new) throws MalformedURLException{ + public SparqlObject() throws MalformedURLException{ wordnet = new WordNet(); - hm=hm_new; + //hm=hm_new; + hm=ParseXmlHtml.parse_xml("/home/swalter/workspace/qaldEntity2",hm); + hm=ParseXmlHtml.parse_xml("/home/swalter/workspace/qaldEntity1",hm); System.out.println("Loading SPARQL Templator"); btemplator = new BasicTemplator(); templator = new Templator(); System.out.println("Loading SPARQL Templator Done\n"); setExplorationdepthwordnet(1); - setIterationdepth(0); + //eigentlich immer mit 0 initialisieren + setIterationdepth(1); setNumberofanswers(1); } @@ -101,17 +106,23 @@ public void create_Sparql_query(String question) throws JWNLException, IOException{ //create_Sparql_query_new(string); - ArrayList<String> lstquery = new ArrayList<String>(); + ArrayList<ArrayList<String>> lstquery = new ArrayList<ArrayList<String>>(); + long startParsingTime = System.currentTimeMillis(); lstquery=getQuery(question); + long endParsingTime = System.currentTimeMillis(); + System.out.println("The Questionparsing took "+ (endParsingTime-startParsingTime)+ " ms"); + ArrayList<String> final_answer = new ArrayList<String>(); //if(!lstquery.isEmpty()){ //for each querry - for(String query : lstquery){ + for(ArrayList<String> querylist : lstquery){ /* * ################################################################################################# */ //only testfunction to save the generated queries in the tmp-folder + String query=""; + query=querylist.get(0).toString(); if(getIterationdepth()==-1){ String tmp = new String(); String s = null; @@ -139,7 +150,7 @@ } String out=null; - if (query=="" || query==" ") query="Could not parse"; + if (query=="" || query==" "||query.length()==0) query="Could not parse"; out=tmp + "\n" + question + ":\n"+query+"\n"; BufferedWriter outfile = new BufferedWriter( @@ -182,7 +193,9 @@ } String answer; answer=sendServerQuestionRequest(query); - System.out.println(query); + final_answer.add(answer); + /*System.out.println(query); + if (query=="" || query==" "||query.length()==0) answer="Could not parse"; System.out.println("Antwort: " + answer); String out=tmp + "\n" + "Question: "+question + "\n"+"Query: " + query +"\n Anwer: "+answer+"\n\n##############################"; @@ -191,7 +204,7 @@ new FileOutputStream( "/tmp/answer.txt" ) ) ); outfile.write(out); - outfile.close(); + outfile.close(); */ } /* * ################################################################################################# @@ -199,6 +212,134 @@ //Iterration 1 if(getIterationdepth()==1){ + //asking server + String answer; + answer=sendServerQuestionRequest(query); + + //if Emty answer, get properties an look up the right property with levensthein + if(answer.contains("EmtyAnswer")){ + //TODO: get all information from the query + //TODO: maybe put the query + information in an array list of arraylist. each arraylist contains the query, the variables and the uris. Then iterate over the List and get the query for sending to server + String rescource=""; + + //get the resource of the query. always the last Item in the array! + //Funktioniert! + String resource_tmp=""; + int tmp_length=querylist.size(); + resource_tmp=querylist.get(tmp_length-1); + String[] array_tmp = resource_tmp.split(":"); + rescource=array_tmp[1]; + + + //the property we are looking for is always the second last in the array! + //Funktioniert! + String property_to_compare_with=""; + tmp_length=querylist.size(); + //second last + property_to_compare_with=querylist.get(tmp_length-2); + array_tmp = property_to_compare_with.split(":"); + property_to_compare_with=array_tmp[1]; + //System.out.println("property_to_compare_with: "+property_to_compare_with); + + + //contains uri AND string, every second is the string + //Funktioniert + ArrayList<String> properties = new ArrayList<String>(); + GetRessourcePropertys property = new GetRessourcePropertys(); + Boolean goOnAfterProperty = true; + try { + //using uri now, not the string + properties=property.getPropertys(hm.get(rescource.toLowerCase())); + if (properties==null){ + final_answer.add("Error in getting Properties\n"); + goOnAfterProperty=false; + } + //System.out.println(properties); + } catch (IOException e) { + // TODO Auto-generated catch block + //e.printStackTrace(); + final_answer.add("Error in getting Properties\n"); + goOnAfterProperty=false; + + } + if(goOnAfterProperty==true){ + //property_to_compare_with mit der Liste der propertys vergleichen, und wenn der normalisierte Wert >= LvenstheinMin ist, einbauen und neue query erzeugen. + Levenshtein levensthein = new Levenshtein(); + ArrayList<String> new_queries= new ArrayList<String>(); + for(int i =1; i<=properties.size()-2;i=i+2){ + //double tmp=levensthein.nld(property_to_compare_with.toLowerCase(), properties.get(i).toLowerCase()); + double tmp=levensthein.computeLevenshteinDistance(property_to_compare_with.toLowerCase(), properties.get(i).toLowerCase()); + //create new query + //System.out.println(tmp); + //if(tmp>=LvenstheinMin){ + if(tmp<=3.0){ + //System.out.println(tmp); + //alte property uri mit neuer ersetzen: + String query_tmp=query; + query_tmp=query_tmp.replace(hm.get(property_to_compare_with.toLowerCase()),properties.get(i-1)); + //System.out.println("hm.get(property_to_compare_with.toLowerCase(): " + hm.get(property_to_compare_with.toLowerCase())); + new_queries.add(query_tmp); + } + + } + + System.out.println("Start Iterating Wordnet with "+property_to_compare_with+" and deept of "+explorationdepthwordnet); + ArrayList<String> semantics=new ArrayList<String>(); + ArrayList<String> tmp_semantics=new ArrayList<String>(); + ArrayList<String> result_SemanticsMatchProperties=new ArrayList<String>(); + semantics.add(property_to_compare_with); + tmp_semantics=semantics; + Boolean goOnAfterWordnet = true; + for(int i=0;i<=explorationdepthwordnet;i++){ + + try { + tmp_semantics=getSemantics(tmp_semantics); + } catch (IOException e) { + // TODO Auto-generated catch block + //e.printStackTrace(); + goOnAfterWordnet=false; + final_answer.add("Error in searching Wordnet\n"); + + } + //each word only one time + for(String k : tmp_semantics){ + if(!semantics.contains(k)) semantics.add(k); + } + + } + + if(goOnAfterWordnet==true){ + // ArrayList<String> new_queries= new ArrayList<String>(); + + //TODO: Try, if it works, if you use only one loop: (b.lowerCase).contains(properties.get(h)) + for(int h=1;h<properties.size()-2;h=h+2){ + for(String b : semantics){ + //System.out.println(properties.get(h)); + //System.out.println(b); + if(properties.get(h).contains(b.toLowerCase())){ + if(!result_SemanticsMatchProperties.contains(properties.get(h))){ + //create new query + result_SemanticsMatchProperties.add(properties.get(h)); + String query_tmp=query; + query_tmp=query_tmp.replace(hm.get(property_to_compare_with.toLowerCase()),properties.get(h-1)); + //System.out.println("hm.get(property_to_compare_with.toLowerCase(): " + hm.get(property_to_compare_with.toLowerCase())); + new_queries.add(query_tmp); + } + } + } + } + + for(String bla : new_queries){ + String answer_tmp; + answer_tmp=sendServerQuestionRequest(bla); + if(!answer_tmp.contains("EmtyAnswer")){ + final_answer.add(answer_tmp); + } + } + } + } + } + } /* * ################################################################################################# @@ -208,6 +349,46 @@ } } + + BufferedReader in = null; + + String tmp=""; + // Lies Textzeilen aus der Datei in einen Vector: + try { + in = new BufferedReader( + new InputStreamReader( + new FileInputStream( "/tmp/answer" ) ) ); + String s; + while( null != (s = in.readLine()) ) { + tmp=tmp+"\n"+s; + } + } catch( FileNotFoundException ex ) { + } catch( Exception ex ) { + System.out.println( ex ); + } finally { + if( in != null ) + try { + in.close(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + + String out=""; + for(String answer : final_answer){ + + out=out+ "\n"+answer+"\n"; + + } + System.out.println(question); + System.out.println(out); + BufferedWriter outfile = new BufferedWriter( + new OutputStreamWriter( + new FileOutputStream( "/tmp/answer" ) ) ); + + outfile.write(tmp+"\n"+question+" :\n"+out); + outfile.close(); } // string=string.replaceAll("?", ""); @@ -220,8 +401,8 @@ * @param question question in natural language * @return ArrayList of Sparql queries. */ - private ArrayList<String> getQuery(String question) { - ArrayList<String> lstquery = new ArrayList<String>(); + private ArrayList<ArrayList<String>> getQuery(String question) { + ArrayList<ArrayList<String>> lstquery = new ArrayList<ArrayList<String>>(); Set<BasicQueryTemplate> querytemps = btemplator.buildBasicQueries(question); for (BasicQueryTemplate temp : querytemps) { @@ -230,7 +411,8 @@ System.out.println("temp.getVariablesAsStringList();" + temp.getVariablesAsStringList()); System.out.println("temp.getConditions();" + temp.getConditions()); System.out.println("temp.getSlots();" + temp.getSlots());*/ - + ArrayList<String> lstquerynew = new ArrayList<String>(); + ArrayList<String> lstquerupsidedown = new ArrayList<String>(); String query; String selTerms =""; for(SPARQL_Term terms :temp.getSelTerms()) selTerms=selTerms+(terms.toString())+" "; @@ -270,7 +452,6 @@ String[] slots= null; for(Slot slot : temp.getSlots()){ - //hier muss dann noch die abfrage aus der hm raus, also das direkt die uri eingebettet wird. String tmp= slot.toString(); tmp= tmp.replace("UNSPEC",""); tmp= tmp.replace("RESOURCE",""); @@ -300,9 +481,25 @@ query_upside_down=query_upside_down.replace(replace, "<"+hm_result+">"); } + lstquerupsidedown.add(query_upside_down); + lstquerynew.add(query); + + //slots hinzufügen + for(Slot slot : temp.getSlots()){ + String tmp= slot.toString(); + tmp= tmp.replace("UNSPEC",""); + tmp= tmp.replace("RESOURCE",""); + tmp= tmp.replace("{",""); + tmp= tmp.replace("}",""); + tmp=tmp.replace(" ",""); + lstquerupsidedown.add(tmp); + lstquerynew.add(tmp); + } //System.out.println("Query: "+query); - lstquery.add(query); - lstquery.add(query_upside_down); + /*lstquery.add(query); + lstquery.add(query_upside_down);*/ + lstquery.add(lstquerynew); + lstquery.add(lstquerupsidedown); } @@ -455,6 +652,9 @@ string=string.replace("<td>",""); string=string.replace("<th>callret-0</th>", ""); string=string.replace("<th>y</th>",""); + while (string.contains(" ")) string=string.replace(" ",""); + if (string.length()==0) string="EmtyAnswer"; + //System.out.println("Stringlänge: "+string.length()); return string; } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/exploration_main.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/exploration_main.java 2011-11-10 08:15:41 UTC (rev 3391) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/exploration_main.java 2011-11-10 13:01:22 UTC (rev 3392) @@ -31,7 +31,7 @@ */ public class exploration_main { - private static HashMap<String, String> hm = new HashMap<String, String>(); + //private static HashMap<String, String> hm = new HashMap<String, String>(); private static String qaldEntity2="http://greententacle.techfak.uni-bielefeld.de:5171/sparql?default-graph-uri=&query=SELECT+DISTINCT+%3Fc+%3Fl++WHERE+{%0D%0A++%3Fx+rdf%3Atype+%3Fc+.%0D%0A++%3Fc+rdfs%3Alabel+%3Fl+.%0D%0A++FILTER+%28lang%28%3Fl%29+%3D+%27en%27%29%0D%0A}&format=text%2Fhtml&debug=on&timeout="; private static String qaldEntity1="http://greententacle.techfak.uni-bielefeld.de:5171/sparql?default-graph-uri=&query=SELECT+DISTINCT+%3Fx+%3Fl++WHERE+{%0D%0A++%3Fx+rdf%3Atype+%3Fc+.%0D%0A++%3Fx+rdfs%3Alabel+%3Fl+.%0D%0A++FILTER+%28lang%28%3Fl%29+%3D+%27en%27%29%0D%0A}&format=text%2Fhtml&debug=on&timeout="; /** @@ -50,20 +50,20 @@ System.out.println("Start Indexing"); //For testing! - hm=ParseXmlHtml.parse_xml("/home/swalter/workspace/ressource/sparql_zwei",hm); - hm=ParseXmlHtml.parse_xml("/home/swalter/workspace/ressource/sparql_eins",hm); + //hm=ParseXmlHtml.parse_xml("/home/swalter/workspace/qaldEntity2",hm); + //hm=ParseXmlHtml.parse_xml("/home/swalter/workspace/qaldEntity1",hm); - + //30% Ram /* * For real use! */ - /* hm=ParseXmlHtml.parse_xml((getEntity(qaldEntity2,"/tmp/qaldEntity2")),hm); + /*hm=ParseXmlHtml.parse_xml((getEntity(qaldEntity2,"/tmp/qaldEntity2")),hm); System.out.println("Entity2 done"); hm=ParseXmlHtml.parse_xml((getEntity(qaldEntity1,"/tmp/qaldEntity1")),hm); System.out.println("Entity1 done");*/ System.out.println("Done with indexing\n"); System.out.println("Start generating Wordnet Dictionary"); - SparqlObject sparql = new SparqlObject(hm); + SparqlObject sparql = new SparqlObject(); System.out.println("Generating Wordnet Dictionary Done"); long stopInitTime = System.currentTimeMillis(); System.out.println("Time for Initialising "+(stopInitTime-startInitTime)+" ms"); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/test_vergleich.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/test_vergleich.java 2011-11-10 08:15:41 UTC (rev 3391) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/test_vergleich.java 2011-11-10 13:01:22 UTC (rev 3392) @@ -9,6 +9,7 @@ import java.net.URL; import java.util.ArrayList; +import org.dllearner.algorithm.tbsl.exploration.Sparql.Levenshtein; import org.dllearner.algorithm.tbsl.exploration.sax.MySaxParser; Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/sax/ParseXmlHtml.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/sax/ParseXmlHtml.java 2011-11-10 08:15:41 UTC (rev 3391) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/sax/ParseXmlHtml.java 2011-11-10 13:01:22 UTC (rev 3392) @@ -5,10 +5,15 @@ import java.util.HashMap; public class ParseXmlHtml { + + //TODO: zweite Hashmap in der als Key (barack,Obama) auf die Value barack Obama verweißt + //TODO: Rückgabewert in List<Map<String, String>> listOfMaps = new ArrayList<Map<String, String>>(); ändern und dann beide hashmaps übergeben und dann hm aus dem Funktionsheader nehmen + public static HashMap<String, String> parse_xml(String dateiname, HashMap<String, String> hm) { ArrayList<String> indexObject = null; File file = new File(dateiname); + HashMap<String, String> hm_new = new HashMap<String, String>(); try { MySaxParser parser = new MySaxParser(file); @@ -18,11 +23,18 @@ { hm.put((indexObject.get(i+1)).toLowerCase(), indexObject.get(i)); }*/ + int zaehler=0; for (int i = 1; i < indexObject.size(); i=i+2) { hm.put((indexObject.get(i)).toLowerCase(), indexObject.get(i-1)); + String[] tmp_array = indexObject.get(i).toLowerCase().split(" "); + if(tmp_array.length>=2) { + for(String tmp : tmp_array)hm_new.put(tmp.toLowerCase(), indexObject.get(i-1)); + } + zaehler=zaehler+1; } indexObject.clear(); + System.out.println("Anzahl: "+zaehler); } catch (Exception ex) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |