From: <seb...@us...> - 2011-09-23 08:33:51
|
Revision: 3284 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3284&view=rev Author: sebastianwtr Date: 2011-09-23 08:33:44 +0000 (Fri, 23 Sep 2011) Log Message: ----------- [tbsl] added some new packetches and function for the QALD Projekt at UNI Bi Added Paths: ----------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/GetRessourcePropertys.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/SparqlFilter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/SparqlObject.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/Levenshtein.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/Parsing.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/exploration_main.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/test_vergleich.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/sax/ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/sax/MySaxHandler.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/sax/MySaxParser.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/sax/ParseXmlHtml.java Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/GetRessourcePropertys.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/GetRessourcePropertys.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/GetRessourcePropertys.java 2011-09-23 08:33:44 UTC (rev 3284) @@ -0,0 +1,84 @@ +package org.dllearner.algorithm.tbsl.exploration.Sparql; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; + +import org.dllearner.algorithm.tbsl.exploration.sax.MySaxParser; + +public class GetRessourcePropertys { + + public ArrayList<String> getPropertys(String element) throws IOException{ + sendServerPropertyRequest(element); + return do_parsing("answer_property"); + + + } + /** + * Get an uri and saves the properties of this resource + * @param vergleich + * @throws IOException + */ + private void sendServerPropertyRequest(String vergleich) throws IOException{ + + String bla123 = vergleich; + //to get only the name + bla123=bla123.replace("http://dbpedia.org/resource/Category:",""); + bla123=bla123.replace("http://dbpedia.org/resource/",""); + vergleich=bla123; + String tmp="http://greententacle.techfak.uni-bielefeld.de:5171/sparql?default-graph-uri=&query=PREFIX+rdfs%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2000%2F01%2Frdf-schema%23%3E%0D%0APREFIX+res%3A+%3Chttp%3A%2F%2Fdbpedia.org%2Fresource%2F%3E%0D%0A%0D%0ASELECT+DISTINCT+%3Fp+%3Fl+WHERE++{%0D%0A+{+res%3A"+vergleich+"+%3Fp+%3Fo+.+}%0D%0A+UNION%0D%0A+{+%3Fs+%3Fp+res%3A"+vergleich+"+.+}%0D%0A+{+%3Fp+rdfs%3Alabel+%3Fl+.+}%0D%0A}%0D%0A&format=text%2Fhtml&debug=on&timeout="; + URL url; + InputStream is; + InputStreamReader isr; + BufferedReader r; + String str; + String result=""; + + try { + url = new URL(tmp); + is = url.openStream(); + isr = new InputStreamReader(is); + r = new BufferedReader(isr); + do { + str = r.readLine(); + if (str != null) + result=result+str; + } while (str != null); + } catch (MalformedURLException e) { + System.out.println("Must enter a valid URL"); + } catch (IOException e) { + System.out.println("Can not connect"); + } + + FileWriter w = new FileWriter("answer_property"); + w.write(result); + w.close(); + } + + + private static ArrayList<String> do_parsing(String datei) + { + ArrayList<String> indexObject = null; + + File file = new File(datei); + try + { + MySaxParser parser = new MySaxParser(file); + parser.parse(); + indexObject = parser.getIndexObject(); + } + catch (Exception ex) + { + System.out.println("Another exciting error occured: " + ex.getLocalizedMessage()); + } + + return indexObject; + } + +} Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/SparqlFilter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/SparqlFilter.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/SparqlFilter.java 2011-09-23 08:33:44 UTC (rev 3284) @@ -0,0 +1,54 @@ +package org.dllearner.algorithm.tbsl.exploration.Sparql; + +import java.util.HashMap; + +public class SparqlFilter { + public void create_Sparql_who(String string,HashMap<String, String> hm){ + // string=string.replaceAll("?", ""); + String[] array= string.split(" "); + //schauen ob erstes Wort ein who ist! + if(array[0].contains("who")){ + int position=0; + for(int i=0;i<array.length;i++){ + if (array[i].contains("of")){ + position=i; + break; + } + } + String vor_of=array[position-1]; + String nach_of=""; + //wenn nur ein element hinter of kommt + if(array.length-position-1==1){ + nach_of=array[position+1]; + } + else{ + for(int i=position+1; i<array.length;i++){ + //nach_of=nach_of+array[i]+" "; + nach_of=(nach_of.concat(array[i])).concat(" "); + } + + //letztes leerzeichen loeschen + nach_of = nach_of.substring(0, nach_of.length()-1); + } + String uri_vor_of=" "; + String uri_nach_of=" "; + + uri_vor_of=hm.get(vor_of); + uri_nach_of=hm.get(nach_of); + if(uri_vor_of!=null && uri_nach_of!=null){ + uri_nach_of=uri_nach_of.replace("Category:", ""); + uri_nach_of=uri_nach_of.replace("category:", ""); + + + String anfrage=null; + anfrage="PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>select ?x where { <"+uri_nach_of+"> <"+uri_vor_of+"> ?x.}"; + + } + else{ + //System.out.println("Nothing to do"); + } + + } + + } +} Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/SparqlObject.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/SparqlObject.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/Sparql/SparqlObject.java 2011-09-23 08:33:44 UTC (rev 3284) @@ -0,0 +1,364 @@ +package org.dllearner.algorithm.tbsl.exploration.Sparql; +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Set; + +import net.didion.jwnl.JWNLException; + +import org.dllearner.algorithm.tbsl.nlp.WordNet; +import org.dllearner.algorithm.tbsl.sparql.BasicQueryTemplate; +import org.dllearner.algorithm.tbsl.templator.BasicTemplator; +import org.dllearner.algorithm.tbsl.templator.Templator; + + + +public class SparqlObject { + //global Variable dict + + //start counting with 0 + static int iteration_deept=1; + static WordNet wordnet; + BasicTemplator btemplator; + Templator templator; + + /*Set<BasicQueryTemplate> querytemps = btemplator.buildBasicQueries(line); + for (BasicQueryTemplate temp : querytemps) { + System.out.println(temp.toString()); + } + + */ + //Konstruktor + public SparqlObject() throws MalformedURLException{ + wordnet = new WordNet(); + System.out.println("Loading SPARQL Templator"); + btemplator = new BasicTemplator(); + templator = new Templator(); + System.out.println("Loading SPARQL Templator Done\n"); + } + + + public void create_Sparql_query(String string,HashMap<String, String> hm) throws JWNLException{ + // string=string.replaceAll("?", ""); + String[] array= string.split(" "); + String teststring=""; + /*Set<BasicQueryTemplate> querytemps = btemplator.buildBasicQueries("Who is the Mayor of Berlin?"); + for (BasicQueryTemplate temp : querytemps) { + //System.out.println(temp.toString()); + teststring=teststring.concat(temp.toString()); + } + + System.out.println("##################"); + System.out.println(teststring);*/ + + /** + * Cluster function + */ + /* + int length=array.length; + int [] result_array= new int[length]; + for(int p =0;p<length;p++){ + result_array[p]=0; + } + int zaehler=1; + + //looking for max 3 word as one index + for(int z=length-1;z>=0;z=z-1){ + if(z-2>=0){ + String tmp1 = array[z]; + String tmp2 = array[z-1]; + String tmp3 = array[z-2]; + + String tmpstring3=(((tmp3.concat(" ")).concat(tmp2)).concat(" ")).concat(tmp1); + String tmpstring2=(tmp2.concat(" ")).concat(tmp1); + String tmpstring1=tmp1; + + //always looking for the "biggest" match + if(hm.get(tmpstring3)!=null){ + result_array[z]=zaehler; + result_array[z-1]=zaehler; + result_array[z-2]=zaehler; + zaehler++; + } + else{ + if(hm.get(tmpstring2)!=null){ + result_array[z]=zaehler; + result_array[z-1]=zaehler; + zaehler++; + } + else{ + if(hm.get(tmpstring1)!=null){ + result_array[z]=zaehler; + zaehler++; + } + } + } + + } + else{ + if(z-1>=0){ + String tmp1 = array[z]; + String tmp2 = array[z-1]; + + String tmpstring2=(tmp2.concat(" ")).concat(tmp1); + String tmpstring1=tmp1; + + //always looking for the "biggest" match + + if(hm.get(tmpstring2)!=null){ + result_array[z]=zaehler; + result_array[z-1]=zaehler; + zaehler++; + } + else{ + if(hm.get(tmpstring1)!=null){ + result_array[z]=zaehler; + zaehler++; + } + } + } + if(z==0){ + if(hm.get(array[z])!=null){ + result_array[z]=zaehler; + zaehler++; + } + } + } + } + + System.out.println("###### Cluster ######"); + for(int p =0;p<length;p++){ + System.out.println(result_array[p]); + } + System.out.println("######"); + */ + + //look, if the first word is a who! + if(array[0].contains("who")){ + int position=0; + for(int i=0;i<array.length;i++){ + if (array[i].contains("of")){ + position=i; + break; + } + } + String vor_of=array[position-1]; + String nach_of=""; + //if there is only one element after of + if(array.length-position-1==1){ + nach_of=array[position+1]; + } + else{ + for(int i=position+1; i<array.length;i++){ + nach_of=(nach_of.concat(array[i])).concat(" "); + } + + //delete last emty space + nach_of = nach_of.substring(0, nach_of.length()-1); + } + String uri_vor_of=" "; + String uri_nach_of=" "; + + uri_vor_of=hm.get(vor_of); + uri_nach_of=hm.get(nach_of); + if(uri_vor_of!=null && uri_nach_of!=null){ + uri_nach_of=uri_nach_of.replace("Category:", ""); + uri_nach_of=uri_nach_of.replace("category:", ""); + + String anfrage; + anfrage="PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>select ?x where { <"+uri_nach_of+"> <"+uri_vor_of+"> ?x.}"; + + //if there is no answer from the server, start searching with wordnet + String result=""; + result=sendServerQuestionRequest(anfrage); + if(result!="noanswer"){ + System.out.println(result); + } + else{ + long startTime = System.currentTimeMillis(); + + System.out.println("Get Propertys of "+nach_of); + + //contains uri AND string, every second is the string + ArrayList<String> properties = new ArrayList<String>(); + GetRessourcePropertys property = new GetRessourcePropertys(); + try { + //using uri now, not the string + properties=property.getPropertys(hm.get(nach_of)); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + System.out.println("Start Iterating Wordnet with "+vor_of+" and deept of "+iteration_deept); + ArrayList<String> semantics=new ArrayList<String>(); + ArrayList<String> tmp_semantics=new ArrayList<String>(); + ArrayList<String> result_SemanticsMatchProperties=new ArrayList<String>(); + semantics.add(vor_of); + tmp_semantics=semantics; + for(int i=0;i<=iteration_deept;i++){ + + try { + tmp_semantics=getSemantics(tmp_semantics); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + + } + //each word only one time + for(String k : tmp_semantics){ + if(!semantics.contains(k)) semantics.add(k); + } + + } + long endTime = System.currentTimeMillis(); + System.out.println("Getting Properties and Semantics took "+(endTime-startTime) +" ms\n"); + + //TODO: Try, if it works, if you use only one loop: (b.lowerCase).contains(properties.get(h)) + for(int h=1;h<properties.size()-2;h=h+2){ + for(String b : semantics){ + //System.out.println(properties.get(h)); + //System.out.println(b); + if(properties.get(h).contains(b.toLowerCase())){ + if(!result_SemanticsMatchProperties.contains(properties.get(h))) + result_SemanticsMatchProperties.add(properties.get(h)); + } + } + } + for(String b : result_SemanticsMatchProperties){ + vor_of=b.toLowerCase(); + uri_vor_of=hm.get(vor_of); + if(uri_vor_of!=null){ + anfrage="PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>select ?x where { <"+uri_nach_of+"> <"+uri_vor_of+"> ?x.}"; + System.out.println("Answer with the property \" " + b + "\" :\n"+sendServerQuestionRequest(anfrage)); + } + } + long endTime2 = System.currentTimeMillis(); + System.out.println("Getting Properties, Semantics and Answer from server took "+(endTime2-startTime) +" ms"); + } + } + + } + + } + + private static ArrayList<String> getSemantics (ArrayList<String> semantics) throws IOException, JWNLException { + ArrayList<String> result = new ArrayList<String>(); + for(String id :semantics){ + List<String> array =wordnet.getRelatedNouns(id); + for(String i:array){ + if(!result.contains(i))result.add(i); + } + + + } + return result; + } + + + + + private String sendServerQuestionRequest(String query){ + //SPARQL-Endpoint of Semantic Computing Group + String tmp="http://greententacle.techfak.uni-bielefeld.de:5171/sparql?default-graph-uri=&query="+createServerRequest(query)+"&format=text%2Fhtml&debug=on&timeout="; + URL url; + InputStream is; + InputStreamReader isr; + BufferedReader r; + String str=""; + String result=""; + + try { + url = new URL(tmp); + is = url.openStream(); + isr = new InputStreamReader(is); + r = new BufferedReader(isr); + int counter=0; + do { + str = r.readLine(); + if (str != null){ + result=result.concat(str); + counter=counter+1;} + } while (str != null); + + if(result.isEmpty()) System.out.println("HALOSHSS"); + //TODO:if counter = 5 or less, there is an empty answer from the Server! Still to Verify! + if(counter<=5){ + System.out.println("Empty Answer from Server"); + return "noanswer"; + } + } catch (MalformedURLException e) { + System.out.println("Must enter a valid URL"); + } catch (IOException e) { + System.out.println("Can not connect"); + } + + + + return createAnswer(result); + } + + + private String createAnswer(String string){ + string=string.replace("table",""); + string=string.replace("<tr>", ""); + string=string.replace("</tr>", ""); + string=string.replace("</>",""); + string=string.replace("<th>l</th>",""); + string=string.replace("<th>x</th>",""); + string=string.replace("< class=\"sparql\" border=\"1\">",""); + string=string.replace("\n",""); + string=string.replace(" ",""); + string=string.replace("</td>",""); + string=string.replace("<td>",""); + return string; + + } + + + private String createServerRequest(String query){ + String anfrage=null; + anfrage=removeSpecialKeys(query); + anfrage=anfrage.replace("<","<"); + anfrage=anfrage.replace("%gt;",">"); + anfrage=anfrage.replace("&","&"); + //anfrage=anfrage.replaceAll("#>","%23%3E%0D%0A%"); + anfrage=anfrage.replace("#","%23"); + anfrage=anfrage.replace(" ","+"); + anfrage=anfrage.replace("/","%2F"); + anfrage=anfrage.replace(":","%3A"); + anfrage=anfrage.replace("?","%3F"); + anfrage=anfrage.replace("$","%24"); + //anfrage=anfrage.replaceAll("F>+","F%3E%0D%0A"); + anfrage=anfrage.replace(">","%3E"); + anfrage=anfrage.replace("<","%3C"); + anfrage=anfrage.replace("\"","%22"); + anfrage=anfrage.replace("\n","%0D%0A%09"); + anfrage=anfrage.replace("%%0D%0A%09","%09"); + anfrage=anfrage.replace("=","%3D"); + anfrage=anfrage.replace("@","%40"); + anfrage=anfrage.replace("&","%26"); + anfrage=anfrage.replace("(","%28"); + anfrage=anfrage.replace(")","%29"); + anfrage=anfrage.replace("%3E%0D%0A%25","%3E"); + //anfrage=anfrage.replaceAll("\n",".%0D%0A%09"); + return anfrage; + } + + private String removeSpecialKeys(String query){ + query=query.replace("\\",""); + //query=query.replaceAll("\a",""); + query=query.replace("\b",""); + query=query.replace("\f",""); + query=query.replace("\r",""); + query=query.replace("\t",""); + // query=query.replaceAll("\v",""); + return query; + } +} Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/Levenshtein.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/Levenshtein.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/Levenshtein.java 2011-09-23 08:33:44 UTC (rev 3284) @@ -0,0 +1,60 @@ +package org.dllearner.algorithm.tbsl.exploration.exploration_main; + + +import java.lang.Math; +import java.math.BigDecimal; + +public class Levenshtein { + + + //http://de.wikipedia.org/wiki/Levenshtein-Distanz + public double nld(String orig, String eing){ + int result = diff(orig,eing); + int length=Math.max(orig.length(),eing.length()); + + + //if distance between both is zero, then the NLD must be one + if(result==0 ){ + return 1; + } + else{ + BigDecimal m = new BigDecimal(result); + BigDecimal n = new BigDecimal(length); + + BigDecimal c = new BigDecimal(0); + c=m.divide(n, 5, BigDecimal.ROUND_FLOOR); + + return c.doubleValue(); + } + + } + + public int diff(String orig, String eing) { + + int matrix[][] = new int[orig.length() + 1][eing.length() + 1]; + for (int i = 0; i < orig.length() + 1; i++) { + matrix[i][0] = i; + } + for (int i = 0; i < eing.length() + 1; i++) { + matrix[0][i] = i; + } + for (int a = 1; a < orig.length() + 1; a++) { + for (int b = 1; b < eing.length() + 1; b++) { + int right = 0; + if (orig.charAt(a - 1) != eing.charAt(b - 1)) { + right = 1; + } + int mini = matrix[a - 1][b] + 1; + if (matrix[a][b - 1] + 1 < mini) { + mini = matrix[a][b - 1] + 1; + } + if (matrix[a - 1][b - 1] + right < mini) { + mini = matrix[a - 1][b - 1] + right; + } + matrix[a][b] = mini; + } + } + + return matrix[orig.length()][eing.length()]; + } +} Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/Parsing.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/Parsing.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/Parsing.java 2011-09-23 08:33:44 UTC (rev 3284) @@ -0,0 +1,19 @@ +package org.dllearner.algorithm.tbsl.exploration.exploration_main; +import java.util.HashMap; + + +public class Parsing { + public static void do_parsing(HashMap<String, String> hm, String string){ + String [] array = string.split(" "); + + for(String name : hm.values()){ + //System.err.println(name); + for(String inhalt : array){ + if(name.equals(inhalt)){ + System.out.println("Super " + inhalt); + } + } + } + + } +} Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/exploration_main.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/exploration_main.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/exploration_main.java 2011-09-23 08:33:44 UTC (rev 3284) @@ -0,0 +1,144 @@ +package org.dllearner.algorithm.tbsl.exploration.exploration_main; +import java.io.BufferedReader; + +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.net.URL; +import java.nio.channels.Channels; +import java.nio.channels.ReadableByteChannel; +import java.util.*; + +import net.didion.jwnl.JWNLException; + +import org.dllearner.algorithm.tbsl.exploration.Sparql.SparqlObject; +import org.dllearner.algorithm.tbsl.exploration.sax.ParseXmlHtml; + +/* + * + * As you need more than 512 MB Ram, increase usable RAM for Java + * in Eclipse Run -> RunConfigurations -> Arguments -> VM Arguments -> -Xmx1024m + */ + +// Sax example from http://www.bennyn.de/programmierung/java/java-xml-sax-parser.html + +/* + * + * eins:http://greententacle.techfak.uni-bielefeld.de:5171/sparql?default-graph-uri=&query=SELECT+DISTINCT+%3Fx+%3Fl++WHERE+{%0D%0A++%3Fx+rdf%3Atype+%3Fc+.%0D%0A++%3Fx+rdfs%3Alabel+%3Fl+.%0D%0A++FILTER+%28lang%28%3Fl%29+%3D+%27en%27%29%0D%0A}&format=text%2Fhtml&debug=on&timeout= + * zwei:http://greententacle.techfak.uni-bielefeld.de:5171/sparql?default-graph-uri=&query=SELECT+DISTINCT+%3Fc+%3Fl++WHERE+{%0D%0A++%3Fx+rdf%3Atype+%3Fc+.%0D%0A++%3Fc+rdfs%3Alabel+%3Fl+.%0D%0A++FILTER+%28lang%28%3Fl%29+%3D+%27en%27%29%0D%0A}&format=text%2Fhtml&debug=on&timeout= + * + */ +public class exploration_main { + + private static HashMap<String, String> hm = new HashMap<String, String>(); + private static String qaldEntity2="http://greententacle.techfak.uni-bielefeld.de:5171/sparql?default-graph-uri=&query=SELECT+DISTINCT+%3Fc+%3Fl++WHERE+{%0D%0A++%3Fx+rdf%3Atype+%3Fc+.%0D%0A++%3Fc+rdfs%3Alabel+%3Fl+.%0D%0A++FILTER+%28lang%28%3Fl%29+%3D+%27en%27%29%0D%0A}&format=text%2Fhtml&debug=on&timeout="; + private static String qaldEntity1="http://greententacle.techfak.uni-bielefeld.de:5171/sparql?default-graph-uri=&query=SELECT+DISTINCT+%3Fx+%3Fl++WHERE+{%0D%0A++%3Fx+rdf%3Atype+%3Fc+.%0D%0A++%3Fx+rdfs%3Alabel+%3Fl+.%0D%0A++FILTER+%28lang%28%3Fl%29+%3D+%27en%27%29%0D%0A}&format=text%2Fhtml&debug=on&timeout="; + /** + * @param args + * @throws IOException + * @throws JWNLException + * @throws InterruptedException + */ + public static void main(String[] args) throws IOException, JWNLException, InterruptedException { + + /** + * Do the starting initializing stuff + */ + long startInitTime = System.currentTimeMillis(); + + System.out.println("Start Indexing"); + + //For testing! + hm=ParseXmlHtml.parse_xml("/home/swalter/workspace/ressource/sparql_zwei",hm); + hm=ParseXmlHtml.parse_xml("/home/swalter/workspace/ressource/sparql_eins",hm); + + + /* + * For real use! + */ + /* hm=ParseXmlHtml.parse_xml((getEntity(qaldEntity2,"/tmp/qaldEntity2")),hm); + System.out.println("Entity2 done"); + hm=ParseXmlHtml.parse_xml((getEntity(qaldEntity1,"/tmp/qaldEntity1")),hm); + System.out.println("Entity1 done");*/ + System.out.println("Done with indexing\n"); + System.out.println("Start generating Wordnet Dictionary"); + SparqlObject sparql = new SparqlObject(); + System.out.println("Generating Wordnet Dictionary Done"); + long stopInitTime = System.currentTimeMillis(); + System.out.println("Time for Initialising "+(stopInitTime-startInitTime)+" ms"); + + boolean schleife=true; + while(schleife==true){ + BufferedReader in = new BufferedReader(new InputStreamReader(System.in)); + String line; + try { + System.out.println("\n\n"); + System.out.println("Please enter a Question:"); + line = in.readLine(); + if(line.contains("quit")){ + schleife=false; + System.out.println("Bye!"); + } + if(line.contains("text")&& schleife==true){ + TimeZone.setDefault(TimeZone.getTimeZone("GMT")); + + + System.out.println("Please enter Path of txt. File:"); + line=in.readLine(); + + //Start Time measuring + long startTime = System.currentTimeMillis(); + String s=""; + BufferedReader in_file = new BufferedReader(new InputStreamReader(new FileInputStream(line))); + int anzahl=0; + while( null != (s = in_file.readLine()) ) { + System.out.println(s); + anzahl++; + //get each line and send it to the parser + s=s.replace("?",""); + sparql.create_Sparql_query(s.toLowerCase(),hm); + } + long timeNow = System.currentTimeMillis(); + long diff = timeNow-startTime; + + System.out.println("Time for "+anzahl+" questions = "+diff+" ms."); + + } + else if(schleife==true){ + long startTime = System.currentTimeMillis(); + line=line.replace("?",""); + /* Set<BasicQueryTemplate> querytemps = btemplator.buildBasicQueries(line); + for (BasicQueryTemplate temp : querytemps) { + System.out.println(temp.toString()); + }*/ + sparql.create_Sparql_query(line.toLowerCase(),hm); + long endTime= System.currentTimeMillis(); + System.out.println("\n The complete answering of the Question took "+(endTime-startTime)+" ms"); + } + + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + + } + + + + private static String getEntity(String query, String name) throws IOException, InterruptedException{ + + // String query_complete="wget "+"\""+query+"\""+" -O "+"\""+name+"\""; + URL url = new URL(query); + ReadableByteChannel rbc = Channels.newChannel(url.openStream()); + //System.out.println(rbc.toString()); + FileOutputStream fos = new FileOutputStream(name); + //max 200MB = 209715200 Byte + fos.getChannel().transferFrom(rbc, 0, 209715200 ); + + + return name; + } + +} Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/test_vergleich.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/test_vergleich.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/test_vergleich.java 2011-09-23 08:33:44 UTC (rev 3284) @@ -0,0 +1,131 @@ +package org.dllearner.algorithm.tbsl.exploration.exploration_main; +import java.io.BufferedReader; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; + +import org.dllearner.algorithm.tbsl.exploration.sax.MySaxParser; + + +/* + * + * wget "http://greententacle.techfak.uni-bielefeld.de:5171/sparql?default-graph-uri=&query=PREFIX+rdfs%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2000%2F01%2Frdf-schema%23%3E%0D%0APREFIX+res%3A+%3Chttp%3A%2F%2Fdbpedia.org%2Fresource%2F%3E%0D%0A%0D%0ASELECT+DISTINCT+%3Fp+%3Fl+WHERE++{%0D%0A+{+res%3ABerlin+%3Fp+%3Fo+.+}%0D%0A+UNION%0D%0A+{+%3Fs+%3Fp+res%3ABerlin+.+}%0D%0A+{+%3Fp+rdfs%3Alabel+%3Fl+.+}%0D%0A}&format=text%2Fhtml&debug=on&timeout=" -O bla.txt + + */ +public class test_vergleich { + + public String DoVergleich(String suchbegriff, String vergleich) throws IOException{ + String ergebnis_string=""; + //sendServerRequest(vergleich); + sendServerRequest_new(vergleich); + ergebnis_string=do_parsing("answer",suchbegriff); + + return ergebnis_string; + + + } + + private void sendServerRequest(String vergleich) throws IOException{ + String tmp="wget -O answer \"http://greententacle.techfak.uni-bielefeld.de:5171/sparql?default-graph-uri=&query=PREFIX+rdfs%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2000%2F01%2Frdf-schema%23%3E%0D%0APREFIX+res%3A+%3Chttp%3A%2F%2Fdbpedia.org%2Fresource%2F%3E%0D%0A%0D%0ASELECT+DISTINCT+%3Fp+%3Fl+WHERE++{%0D%0A+{+res%3ABerlin+%3Fp+%3Fo+.+}%0D%0A+UNION%0D%0A+{+%3Fs+%3Fp+res%3A"+vergleich+"+.+}%0D%0A+{+%3Fp+rdfs%3Alabel+%3Fl+.+}%0D%0A}%0D%0A&format=text%2Fhtml&debug=on&timeout=\""; + //System.out.println(tmp); + Process p = Runtime.getRuntime().exec(tmp); + try { + p.waitFor(); + } catch (InterruptedException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + + private void sendServerRequest_new(String vergleich) throws IOException{ + String tmp="http://greententacle.techfak.uni-bielefeld.de:5171/sparql?default-graph-uri=&query=PREFIX+rdfs%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2000%2F01%2Frdf-schema%23%3E%0D%0APREFIX+res%3A+%3Chttp%3A%2F%2Fdbpedia.org%2Fresource%2F%3E%0D%0A%0D%0ASELECT+DISTINCT+%3Fp+%3Fl+WHERE++{%0D%0A+{+res%3ABerlin+%3Fp+%3Fo+.+}%0D%0A+UNION%0D%0A+{+%3Fs+%3Fp+res%3A"+vergleich+"+.+}%0D%0A+{+%3Fp+rdfs%3Alabel+%3Fl+.+}%0D%0A}%0D%0A&format=text%2Fhtml&debug=on&timeout="; + URL url; + InputStream is; + InputStreamReader isr; + BufferedReader r; + String str; + String result=""; + + try { + url = new URL(tmp); + is = url.openStream(); + isr = new InputStreamReader(is); + r = new BufferedReader(isr); + do { + str = r.readLine(); + if (str != null) + result=result+str; + } while (str != null); + } catch (MalformedURLException e) { + System.out.println("Must enter a valid URL"); + } catch (IOException e) { + System.out.println("Can not connect"); + } + + FileWriter w = new FileWriter("answer"); + w.write(result); + w.close(); + } + + + private static String do_parsing(String datei, String suchbergriff) + { + ArrayList<String> indexObject = null; + String ergebnis_uri=""; + double zwischenwert=0; + double tmp=0; + Levenshtein levenshtein = new Levenshtein(); + + File file = new File(datei); + try + { + MySaxParser parser = new MySaxParser(file); + parser.parse(); + indexObject = parser.getIndexObject(); + for (int i = 1; i < indexObject.size(); i=i+2) + { + System.out.println((indexObject.get(i)).toLowerCase()); + tmp = levenshtein.nld(suchbergriff.toLowerCase(), (indexObject.get(i)).toLowerCase()); + System.out.println(tmp); + System.out.println("######"); + + String ergebnis_string; + if(tmp==1.0){ + zwischenwert=tmp; + System.out.println(tmp); + System.out.println("YEAH!!!!"); + ergebnis_string=indexObject.get(i); + + ergebnis_uri=indexObject.get(i-1); + System.out.println(ergebnis_uri); + i=indexObject.size(); + break; + } + if(tmp>zwischenwert){ + zwischenwert=tmp; + System.out.println(tmp); + ergebnis_string=indexObject.get(i); + + ergebnis_uri=indexObject.get(i-1); + System.out.println(ergebnis_uri); + } + } + indexObject.clear(); + + } + catch (Exception ex) + { + System.out.println("Another exciting error occured: " + ex.getLocalizedMessage()); + } + + return ergebnis_uri; + } + + + +} Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/sax/MySaxHandler.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/sax/MySaxHandler.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/sax/MySaxHandler.java 2011-09-23 08:33:44 UTC (rev 3284) @@ -0,0 +1,77 @@ +package org.dllearner.algorithm.tbsl.exploration.sax; + +import java.util.ArrayList; +import org.xml.sax.Attributes; +import org.xml.sax.SAXException; +import org.xml.sax.helpers.DefaultHandler; + +public class MySaxHandler extends DefaultHandler +{ + + private StringBuffer buffer; + private boolean buffering; + private ArrayList<String> indexObject; + + public MySaxHandler() + { + this.buffer = null; + this.buffering = false; + indexObject = new ArrayList<String>(); + } + + @Override + public void startDocument() throws SAXException + { + this.buffer = new StringBuffer(""); + } + + @Override + public void startElement(String namespaceURI, String localName, String tagName, Attributes attributes) throws SAXException + { + String tag = tagName; + //name=td + if (tag.equals("td")) + { + this.buffering = true; + } + } + + @Override + public void endElement(String namespaceURI, String localName, String tagName) throws SAXException + { + String tag = tagName; + String tagValue = null; + + //name=td + if (tag.equals("td")) + { + tagValue = this.buffer.toString(); + this.buffering = false; + this.buffer = new StringBuffer(); + } + + parseValue(tagValue); + } + + @Override + public void characters(char chars[], int start, int length) + { + if (this.buffering) + { + this.buffer = this.buffer.append(chars, start, length); + } + } + + private void parseValue(String value) + { + if (value != null) + { + this.indexObject.add(value); + } + } + + public ArrayList<String> getIndexObject() + { + return this.indexObject; + } +} \ No newline at end of file Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/sax/MySaxParser.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/sax/MySaxParser.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/sax/MySaxParser.java 2011-09-23 08:33:44 UTC (rev 3284) @@ -0,0 +1,51 @@ +package org.dllearner.algorithm.tbsl.exploration.sax; + + +import java.io.File; +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.parsers.SAXParser; +import javax.xml.parsers.SAXParserFactory; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; +import org.xml.sax.XMLReader; + +public class MySaxParser +{ + private URL url; + private ArrayList<String> indexObject; + + public MySaxParser() + { + super(); + } + + public MySaxParser(File file) throws MalformedURLException + { + this.url = file.toURI().toURL(); + } + + public void parse() throws ParserConfigurationException, SAXException, IOException + { + // Initialize SAX Parser: + SAXParserFactory factory = SAXParserFactory.newInstance(); + SAXParser parser = factory.newSAXParser(); + XMLReader reader = parser.getXMLReader(); + // Create SAX Handler: + MySaxHandler handler = new MySaxHandler(); + reader.setContentHandler(handler); + // Parse XML file: + InputSource input = new InputSource(url.openStream()); + reader.parse(input); + // Get the result: + this.indexObject = handler.getIndexObject(); + } + + public ArrayList<String> getIndexObject() + { + return this.indexObject; + } +} Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/sax/ParseXmlHtml.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/sax/ParseXmlHtml.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/sax/ParseXmlHtml.java 2011-09-23 08:33:44 UTC (rev 3284) @@ -0,0 +1,36 @@ +package org.dllearner.algorithm.tbsl.exploration.sax; + +import java.io.File; +import java.util.ArrayList; +import java.util.HashMap; + +public class ParseXmlHtml { + public static HashMap<String, String> parse_xml(String dateiname, HashMap<String, String> hm) + { + ArrayList<String> indexObject = null; + File file = new File(dateiname); + try + { + MySaxParser parser = new MySaxParser(file); + parser.parse(); + indexObject = parser.getIndexObject(); + /*for (int i = 0; i < indexObject.size(); i++) + { + hm.put((indexObject.get(i+1)).toLowerCase(), indexObject.get(i)); + }*/ + for (int i = 1; i < indexObject.size(); i=i+2) + { + hm.put((indexObject.get(i)).toLowerCase(), indexObject.get(i-1)); + } + indexObject.clear(); + + } + catch (Exception ex) + { + System.out.println("Another exciting error occured: " + ex.getLocalizedMessage()); + } + return hm; + } + + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |