From: <ki...@us...> - 2012-07-26 14:21:26
|
Revision: 3807 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3807&view=rev Author: kirdie Date: 2012-07-26 14:21:14 +0000 (Thu, 26 Jul 2012) Log Message: ----------- test nearly finished. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/MainInterface.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/agreement/Unification.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/MainInterface.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/MainInterface.java 2012-07-26 11:01:22 UTC (rev 3806) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/MainInterface.java 2012-07-26 14:21:14 UTC (rev 3807) @@ -32,41 +32,41 @@ public class MainInterface { //private static int anzahlAbgeschickterQueries = 10; - + private static ArrayList<Template> global_template_list=new ArrayList<Template>(); private static BasicTemplator btemplator_global; private static SQLiteIndex myindex_global; private static WordNet wordnet_global; private static StanfordLemmatizer lemmatiser_global; private static String type_global=""; - + public static ArrayList<String> startQuestioning(String question,BasicTemplator btemplator,SQLiteIndex myindex, WordNet wordnet,StanfordLemmatizer lemmatiser) throws ClassNotFoundException, SQLException, IOException{ - + /* * true, if you have to push a button to get to the next module * false, goes through all */ boolean wait = false; //Setting.setThresholdSelect(0.5); - + if(Setting.isWaitModus())wait=true; - + TemplateBuilder templateObject = new TemplateBuilder(btemplator, myindex); ArrayList<Template> template_list = new ArrayList<Template>(); - + /* * Array List with the answers from the queries */ ArrayList<String> answers = new ArrayList<String>(); - - + + /* * generate Templates! */ template_list=templateObject.createTemplates(question); - + answers = singleSteps(myindex, wordnet, lemmatiser, wait, template_list); - + return answers; } @@ -85,17 +85,17 @@ private static ArrayList<String> singleSteps(SQLiteIndex myindex, WordNet wordnet, StanfordLemmatizer lemmatiser, boolean wait, ArrayList<Template> template_list) - throws IOException { - + throws IOException { + ArrayList<String> answers = new ArrayList<String>(); /* * generate Queries and test the first Time */ ArrayList<QueryPair> qp = new ArrayList<QueryPair>(); - + //generate QueryPair String Question=""; - + //TODO: parallel here? for(Template t : template_list){ Question=t.getQuestion(); @@ -113,7 +113,7 @@ if(!contain)qp.add(p); } } - + //sort QueryPairs qp=LinearSort.doSort(qp); qp=HeuristicSort.doHeuristicSort(qp, Question); @@ -124,7 +124,7 @@ for(QueryPair q : qp){ System.out.println(q.getQuery()+" rank:"+q.getRank()); } - + int anzahl=1; boolean go_on = true; for(QueryPair q : qp){ @@ -139,7 +139,7 @@ //if(qp.size()<3)go_on=true; System.out.println("Got Answer from Server with this Query: "+ q.getQuery()); if(Setting.isTagging()) write_ResourcePropertyInformation(q.getResource(),q.getPropertyName(),q.getProperty()); - + //printSingleQuery(q.getQuery(),Question); //go_on=true; boolean contains_uri=false; @@ -165,7 +165,7 @@ else answers.add(s); } } - + } } //if(checkAnswer(answer_tmp))answers.addAll(answer_tmp); @@ -173,26 +173,26 @@ } anzahl+=1; } - + System.out.println("\n Answer from Server: \n"); for(String answer:answers){ System.out.println(answer); } if(wait)DebugMode.waitForButton(); - - + + if(answers.isEmpty()&&Setting.getModuleStep()>=2){ - + answers.clear(); //Setting.setLevenstheinMin(0.65); //Setting.setAnzahlAbgeschickterQueries(10); answers.addAll(doStart(myindex, wordnet, lemmatiser, template_list,"LEVENSTHEIN","neu")); if(wait)DebugMode.waitForButton(); } - + if(answers.isEmpty()&&Setting.getModuleStep()>=3){ - + answers.clear(); //Setting.setAnzahlAbgeschickterQueries(10); answers.addAll(doStart(myindex, wordnet, lemmatiser, template_list,"WORDNET","neu")); @@ -201,31 +201,31 @@ if(answers.isEmpty()&&Setting.getModuleStep()>=4){ - + answers.clear(); //Setting.setAnzahlAbgeschickterQueries(10); //Setting.setThresholdSelect(0.2); answers.addAll(doStart(myindex, wordnet, lemmatiser, template_list,"RELATE","neu")); if(wait)DebugMode.waitForButton(); } - - + + if(answers.isEmpty()&&Setting.getModuleStep()>=5){ System.out.println("NO Answer from Server =>Start Query Manipulation"); answers.clear(); answers.addAll(stufe5(myindex,wordnet,lemmatiser,wait,template_list)); if(wait)DebugMode.waitForButton(); } - - - - - - + + + + + + /* * return answers! */ - + return answers; } @@ -234,13 +234,13 @@ - - - - + + + + private static ArrayList<String> doStart(SQLiteIndex myindex, WordNet wordnet, StanfordLemmatizer lemmatiser, ArrayList<Template> template_list, String type, String test) { ArrayList<String> answers = new ArrayList<String>(); @@ -248,9 +248,9 @@ boolean special=false; int anzahl; boolean go_on; - + System.out.println("No answer from direkt match, start "+type+"Modul"); - + /*ArrayList<Thread> thread_list = new ArrayList<Thread>(); ThreadGroup group = new ThreadGroup("QA-Threads"); int anzahl_thread=0; @@ -260,7 +260,7 @@ wordnet_global=wordnet; lemmatiser_global=lemmatiser; type_global=type; - + for(Template t : template_list){ final int anzahl_thread_new=anzahl_thread; @@ -270,11 +270,11 @@ { String blub=do_something(anzahl_thread_new); }; - + thread_list.add(t1); t1.start(); - - + + } catch (SQLException e) { // TODO Auto-generated catch block e.printStackTrace(); @@ -286,14 +286,14 @@ e.printStackTrace(); } anzahl_thread+=1; - + } - */ - + */ + /* * NOw wait until all are finished */ - + /*for(int i =0; i<thread_list.size();i++){ try { thread_list.get(i).join(); @@ -302,42 +302,42 @@ e.printStackTrace(); } }*/ - - + + for(Template t : template_list){ try{ - - ArrayList<ArrayList<Hypothesis>> hypothesenSetList = new ArrayList<ArrayList<Hypothesis>>(); - - - for(ArrayList<Hypothesis> l_h : t.getHypothesen()){ - ArrayList<ArrayList<Hypothesis>> generated_hypothesis = new ArrayList<ArrayList<Hypothesis>>(); - generated_hypothesis= IterationModule.new_iteration(t.getElm(),l_h,t.getCondition(),type,myindex,wordnet,lemmatiser); - for(ArrayList<Hypothesis> h_t : generated_hypothesis){ - ArrayList<Hypothesis> new_hypothesen_set = new ArrayList<Hypothesis>(); - for(Hypothesis bla : h_t){ - new_hypothesen_set.add(bla); - } - hypothesenSetList.add(new_hypothesen_set); + + ArrayList<ArrayList<Hypothesis>> hypothesenSetList = new ArrayList<ArrayList<Hypothesis>>(); + + + for(ArrayList<Hypothesis> l_h : t.getHypothesen()){ + ArrayList<ArrayList<Hypothesis>> generated_hypothesis = new ArrayList<ArrayList<Hypothesis>>(); + generated_hypothesis= IterationModule.new_iteration(t.getElm(),l_h,t.getCondition(),type,myindex,wordnet,lemmatiser); + for(ArrayList<Hypothesis> h_t : generated_hypothesis){ + ArrayList<Hypothesis> new_hypothesen_set = new ArrayList<Hypothesis>(); + for(Hypothesis bla : h_t){ + new_hypothesen_set.add(bla); } - - //hypothesenSetList.addAll(blub); + hypothesenSetList.add(new_hypothesen_set); } - if(type.contains("WORDNET"))t.setHypothesenWordnet(hypothesenSetList); - if(type.contains("LEVENSTHEIN"))t.setHypothesenLevensthein(hypothesenSetList); - if(type.contains("RELATE"))t.setHypothesenRelate(hypothesenSetList); - + + //hypothesenSetList.addAll(blub); } - + if(type.contains("WORDNET"))t.setHypothesenWordnet(hypothesenSetList); + if(type.contains("LEVENSTHEIN"))t.setHypothesenLevensthein(hypothesenSetList); + if(type.contains("RELATE"))t.setHypothesenRelate(hypothesenSetList); + + } + //} catch (Exception e){ - + } - + } - - + + /* * Generate Queries and test queries */ @@ -359,12 +359,12 @@ if(!contain&&checkQuery(p.getQuery()))qp.add(p); } } - + //sort QueryPairs qp=LinearSort.doSort(qp); qp=HeuristicSort.doHeuristicSort(qp, Question); //printQueries(qp, type, Question); - + System.out.println("Following Querries were created:"); for(QueryPair q : qp){ System.out.println(q.getQuery()+" rank:"+q.getRank()); @@ -385,7 +385,7 @@ //else go_on=false; //go_on=true; go_on=false; - + System.out.println("Got Answer from Server with this Query: "+ q.getQuery()); if(Setting.isTagging()) write_ResourcePropertyInformation(q.getResource(),q.getPropertyName(),q.getProperty()); //printSingleQuery(q.getQuery(),Question); @@ -400,8 +400,8 @@ */ if(Question.toLowerCase().contains("which")) go_on=false; if(Question.toLowerCase().contains("who")) go_on=false; - - + + boolean contains_uri=false; for(String s : answer_tmp){ if(s.contains("http")){ @@ -413,8 +413,8 @@ for(String answer:answer_tmp){ System.out.println(answer); }*/ - - + + for(String s : answer_tmp){ if(checkAnswer(s)){ boolean double_result = false; @@ -437,7 +437,7 @@ //if(checkAnswer(answer_tmp))answers.addAll(answer_tmp); } } - + else if(q.getRank()>Setting.getThresholdAsk()&go_on &q.getQuery().contains("ASK")){ ArrayList<String> answer_tmp = new ArrayList<String>(); answer_tmp=ServerUtil.requestAnswerFromServer(q.getQuery()); @@ -455,8 +455,8 @@ go_on=true; } } - - + + boolean contains_uri=false; for(String s : answer_tmp){ if(s.contains("http")){ @@ -464,8 +464,8 @@ break; } } - - + + for(String s : answer_tmp){ if(checkAnswer(s)){ boolean double_result = false; @@ -501,12 +501,12 @@ for(String answer:answers){ System.out.println(answer); } - + return answers; } - - - + + + private static ArrayList<String> filterAnswer(ArrayList<String> answers, String Question){ if(Question.toLowerCase().contains("who")){ boolean contains_only_uri=true; @@ -521,29 +521,29 @@ new_answer.add(s); } } - + return new_answer; } else{ return answers; } } - - + + return answers; } private static boolean checkAnswer(String answer){ if(answer.contains("File:")||answer.contains(".png")||answer.contains("upload.wikimedia.org")||answer.contains("dbpedia.org/datatype/")||answer.contains("http://www.w3.org/2001/XMLSchema")||answer.contains("flickerwrappr/photos/")) return false; else return true; - + } - + private static boolean checkQuery(String query){ if(query.contains("wikiPageWiki")||query.contains("wikiPageExternal")||query.contains("wikiPageRedirects")|| query.contains("thumbnail")||query.contains("wikiPage")) return false; else return true; - + } - + private static void printQueries(ArrayList<QueryPair> qp, String type, String Question){ /*String dateiname="/home/swalter/Dokumente/Auswertung/CreatedQuery"+Setting.getLevenstheinMin()+".txt"; String result_string =""; @@ -558,7 +558,7 @@ catch (IOException e) { System.err.println("Error: " + e); } - + File file = new File(dateiname); BufferedWriter bw = null; try { @@ -571,18 +571,18 @@ querylist="\n Modul: "+type+"\nfor Question: "+ Question+"\n"; int anzahl= 0; /* - * write only the first 10 queries: - */ - /* for(QueryPair q : qp){ + * write only the first 10 queries: + */ + /* for(QueryPair q : qp){ if(anzahl<10){ querylist+=q.getQuery()+" "+q.getRank()+"\n"; anzahl+=1; } - + } - - + + try { bw.write(result_string+querylist); } catch (IOException e) { @@ -602,9 +602,9 @@ e.printStackTrace(); }*/ } - - - + + + private static void printSingleQuery(String query,String Question){ /*String dateiname="/home/swalter/Dokumente/Auswertung/WorkingQuery"+Setting.getLevenstheinMin()+".txt"; String result_string =""; @@ -619,7 +619,7 @@ catch (IOException e) { System.err.println("Error: " + e); } - + File file = new File(dateiname); BufferedWriter bw = null; try { @@ -628,8 +628,8 @@ // TODO Auto-generated catch block e2.printStackTrace(); } - + try { bw.write(result_string+Question+" "+query+"\n"); } catch (IOException e) { @@ -649,11 +649,11 @@ e.printStackTrace(); }*/ } - - - + + + private static ArrayList<String> stufe5(SQLiteIndex myindex, WordNet wordnet,StanfordLemmatizer lemmatiser, boolean wait,ArrayList<Template> template_list){ - + ArrayList<Template> new_template_list=new ArrayList<Template>(); ArrayList<String> answers=new ArrayList<String>(); /* @@ -668,7 +668,7 @@ if(condition.get(1).toLowerCase().equals("isa")) go_on=true; System.out.println("go_on:"+go_on); if(go_on){ - + String resource_variable=condition.get(0); String class_variable=condition.get(2); Hypothesis resource_h = null; @@ -686,11 +686,11 @@ class_h=h; } } - + } System.out.println("go_on_resource:"+go_on_resource); if(go_on_resource){ - + /* * manipulate Class variable to make a property from it */ @@ -703,82 +703,82 @@ small_h_list.add(resource_h); small_h_list.add(class_h); new_hypothesen_list.add(small_h_list); - + ArrayList<String> condition_new = new ArrayList<String>(); condition_new.add("?x"); condition_new.add("?y"); condition_new.add("?z"); - + ArrayList<ArrayList<String>> new_c_list = new ArrayList<ArrayList<String>>(); new_c_list.add(condition_new); - + Template new_Template = new Template(new_c_list, t.getQueryType(), "","" , "?z", "", "", t.getQuestion()); - + new_Template.setHypothesen(new_hypothesen_list); Elements elm = new Elements(new_Template.getCondition(),new_Template.getHypothesen()); - if(elm.isElementEmty()==false){ - //elm.printAll(); - new_Template.setElm(elm); - new_template_list.add(new_Template); - } - - Template template_reverse_conditions = new Template(new_Template.getCondition(),new_Template.getQueryType(), new_Template.getHaving(), new_Template.getFilter(), new_Template.getSelectTerm(), new_Template.getOrderBy(), new_Template.getLimit(), new_Template.getQuestion()); - template_reverse_conditions.setHypothesen(new_hypothesen_list); - - ArrayList<ArrayList<String>> condition_template_reverse_conditions = template_reverse_conditions.getCondition(); - ArrayList<ArrayList<String>> condition_reverse_new= new ArrayList<ArrayList<String>>(); + if(elm.isElementEmty()==false){ + //elm.printAll(); + new_Template.setElm(elm); + new_template_list.add(new_Template); + } + Template template_reverse_conditions = new Template(new_Template.getCondition(),new_Template.getQueryType(), new_Template.getHaving(), new_Template.getFilter(), new_Template.getSelectTerm(), new_Template.getOrderBy(), new_Template.getLimit(), new_Template.getQuestion()); + template_reverse_conditions.setHypothesen(new_hypothesen_list); - for (ArrayList<String> x : condition_template_reverse_conditions){ - ArrayList<String> new_list = new ArrayList<String>(); - new_list.add(x.get(2)); - new_list.add(x.get(1)); - new_list.add(x.get(0)); - condition_reverse_new.add(new_list); - } - - - template_reverse_conditions.setCondition(condition_reverse_new); - - Elements elm_reverse = new Elements(template_reverse_conditions.getCondition(),template_reverse_conditions.getHypothesen()); - if(elm_reverse.isElementEmty()==false){ - //elm.printAll(); - template_reverse_conditions.setElm(elm_reverse); - new_template_list.add(template_reverse_conditions); - } - - - + ArrayList<ArrayList<String>> condition_template_reverse_conditions = template_reverse_conditions.getCondition(); + ArrayList<ArrayList<String>> condition_reverse_new= new ArrayList<ArrayList<String>>(); + + + for (ArrayList<String> x : condition_template_reverse_conditions){ + ArrayList<String> new_list = new ArrayList<String>(); + new_list.add(x.get(2)); + new_list.add(x.get(1)); + new_list.add(x.get(0)); + condition_reverse_new.add(new_list); + } + + + template_reverse_conditions.setCondition(condition_reverse_new); + + Elements elm_reverse = new Elements(template_reverse_conditions.getCondition(),template_reverse_conditions.getHypothesen()); + if(elm_reverse.isElementEmty()==false){ + //elm.printAll(); + template_reverse_conditions.setElm(elm_reverse); + new_template_list.add(template_reverse_conditions); + } + + + } - - - - + + + + } } - - - - + + + + /* * only if condition.size==2 */ if(t.getCondition().size()==2){ System.out.println("Yeah, found two Conditions!"); - + /* * now look if one have the [isa][resource] or [resource][isa] case */ ArrayList<String> condition1=new ArrayList<String>(); ArrayList<String> condition2=new ArrayList<String>(); - + condition1=t.getCondition().get(0); condition2=t.getCondition().get(1); System.out.println("condition1:"+condition1); System.out.println("condition2:"+condition2); - + boolean go_on=false; - + if(condition1.get(1).toLowerCase().contains("isa")&&!condition2.get(1).toLowerCase().contains("isa")){ String resource1_variable=condition2.get(0); String resource2_variable=condition2.get(2); @@ -788,17 +788,17 @@ if(h.getType().toLowerCase().contains("resource")) go_on=true; } } - + } - + /*if(condition2.get(0).contains("resource/")||condition2.get(2).contains("resource/")){ go_on=true; } else go_on=false;*/ } - + else if(condition2.get(1).toLowerCase().contains("isa")){ - + String resource1_variable=condition1.get(0); String resource2_variable=condition1.get(2); for(ArrayList<Hypothesis> h_l :t.getHypothesen()){ @@ -807,10 +807,10 @@ if(h.getType().toLowerCase().contains("resource")) go_on=true; } } - + } - - + + /* * in the conditions there is for sure no resource!!! */ @@ -820,28 +820,28 @@ else go_on=false;*/ } else go_on=false; - - + + System.out.println("Go_on:"+go_on); if(go_on==true){ - + /* * use now only the conditions WITHOUT the class */ ArrayList<ArrayList<Hypothesis>> new_hypothesen_list = new ArrayList<ArrayList<Hypothesis>>(); - + String resource_variable=null; for(ArrayList<Hypothesis> h_l :t.getHypothesen()){ ArrayList<Hypothesis> t_h_l = new ArrayList<Hypothesis>(); - + for(Hypothesis h : h_l){ if(!h.getType().toLowerCase().contains("isa"))t_h_l.add(h); if(h.getType().toLowerCase().contains("resource"))resource_variable=h.getVariable(); } - + if(t_h_l.size()>0) new_hypothesen_list.add(t_h_l); } - + /* * New HypothesenList */ @@ -855,12 +855,12 @@ ArrayList<String> new_condition= new ArrayList<String>(); if(!condition1.get(1).toLowerCase().contains("isa")) new_condition=condition1; else new_condition=condition2; - + String new_SelectTerm=null; - + if(new_condition.get(0).contains(resource_variable)) new_SelectTerm=new_condition.get(2); else new_SelectTerm=new_condition.get(0); - + ArrayList<ArrayList<String>> new_c_list = new ArrayList<ArrayList<String>>(); new_c_list.add(new_condition); /* @@ -874,19 +874,19 @@ new_Template.setElm(t.getElm()); new_template_list.add(new_Template); //new_Template.printAll(); - + } - - + + } - + if(t.getCondition().size()>=30){ ArrayList<ArrayList<Hypothesis>> new_hypothesen_list = new ArrayList<ArrayList<Hypothesis>>(); for(ArrayList<Hypothesis> h_l :t.getHypothesen()){ /* * if greater 2, than it means, there are at least 3 propertys/resources or whatever */ - + /* * Resource ?x * Property ?y @@ -940,56 +940,56 @@ list_two.add(h_r); list_two.add(h_p2); new_hypothesen_list.add(list_two); - + } } } - + ArrayList<ArrayList<String>> condition_new=new ArrayList<ArrayList<String>>(); ArrayList<String> con = new ArrayList<String>(); con.add("?x"); con.add("?y"); con.add("?z"); condition_new.add(con); - + ArrayList<ArrayList<String>> condition_new_r=new ArrayList<ArrayList<String>>(); ArrayList<String> con_r = new ArrayList<String>(); con_r.add("?z"); con_r.add("?y"); con_r.add("?x"); condition_new_r.add(con_r); - - - + + + Template template_new = new Template(condition_new,"SELECT", t.getHaving(), t.getFilter(), "?z", t.getOrderBy(), t.getLimit(), t.getQuestion()); template_new.setHypothesen(new_hypothesen_list); template_new.setElm(t.getElm()); - + Template template_new_r = new Template(condition_new_r,"SELECT", t.getHaving(), t.getFilter(), "?z", t.getOrderBy(), t.getLimit(), t.getQuestion()); template_new_r.setHypothesen(new_hypothesen_list); template_new_r.setElm(t.getElm()); - + Elements elm = new Elements(template_new.getCondition(),template_new.getHypothesen()); - if(elm.isElementEmty()==false){ - //elm.printAll(); - template_new.setElm(elm); - new_template_list.add(template_new); - } - - Elements elm_r = new Elements(template_new.getCondition(),template_new.getHypothesen()); - if(elm.isElementEmty()==false){ - //elm.printAll(); - template_new_r.setElm(elm_r); - new_template_list.add(template_new_r); - } - - - + if(elm.isElementEmty()==false){ + //elm.printAll(); + template_new.setElm(elm); + new_template_list.add(template_new); + } + + Elements elm_r = new Elements(template_new.getCondition(),template_new.getHypothesen()); + if(elm.isElementEmty()==false){ + //elm.printAll(); + template_new_r.setElm(elm_r); + new_template_list.add(template_new_r); + } + + + //new_template_list.add(template_new); //new_template_list.add(template_new_r); } } - + /* * if there are new templates, start rescursive call; */ @@ -1003,18 +1003,18 @@ return answers; } } - + return answers; } - - + + private static String do_something(int number) throws SQLException, JWNLException, IOException{ //String str_number=Thread.currentThread().getName(); //System.out.println("ThreadName: "+str_number); //int number= Integer.parseInt(str_number); ArrayList<ArrayList<Hypothesis>> hypothesenSetList = new ArrayList<ArrayList<Hypothesis>>(); - - + + for(ArrayList<Hypothesis> l_h : global_template_list.get(number).getHypothesen()){ ArrayList<ArrayList<Hypothesis>> generated_hypothesis = new ArrayList<ArrayList<Hypothesis>>(); generated_hypothesis= IterationModule.new_iteration(global_template_list.get(number).getElm(),l_h,global_template_list.get(number).getCondition(),type_global,myindex_global,wordnet_global,lemmatiser_global); @@ -1025,61 +1025,40 @@ } hypothesenSetList.add(new_hypothesen_set); } - + //hypothesenSetList.addAll(blub); } if(type_global.contains("WORDNET"))global_template_list.get(number).setHypothesenWordnet(hypothesenSetList); if(type_global.contains("LEVENSTHEIN"))global_template_list.get(number).setHypothesenLevensthein(hypothesenSetList); if(type_global.contains("RELATE"))global_template_list.get(number).setHypothesenRelate(hypothesenSetList); return "DONE"; - + } - + private static void write_ResourcePropertyInformation(String Resource, String PropertyName, String Property){ String dateiname="/home/swalter/Dokumente/Auswertung/ResourcePropertyRelation.txt"; String result_string =""; //Open the file for reading - try { - BufferedReader br = new BufferedReader(new FileReader(dateiname)); - String thisLine; - while ((thisLine = br.readLine()) != null) { // while loop begins here - result_string+=thisLine+"\n"; - } // end while - } // end try - catch (IOException e) { - System.err.println("Error: " + e); - } - - - - File file = new File(dateiname); - BufferedWriter bw = null; try { - bw = new BufferedWriter(new FileWriter(file)); - } catch (IOException e2) { - // TODO Auto-generated catch block - e2.printStackTrace(); + BufferedReader br = new BufferedReader(new FileReader(dateiname)); + String thisLine; + while ((thisLine = br.readLine()) != null) { // while loop begins here + result_string+=thisLine+"\n"; + } // end while + } // end try + catch (IOException e) { + System.err.println("Error: " + e); } - - - try { - bw.write(result_string+Resource+"::"+PropertyName+"::"+Property+"\n"); - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - try { - bw.flush(); - } catch (IOException e1) { - // TODO Auto-generated catch block - e1.printStackTrace(); - } - try { - bw.close(); - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } + File file = new File(dateiname); + BufferedWriter bw = null; + try + { + bw = new BufferedWriter(new FileWriter(file)); + bw.write(result_string+Resource+"::"+PropertyName+"::"+Property+"\n"); + bw.flush(); + bw.close(); + } + catch (IOException e) {e.printStackTrace();} } } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/agreement/Unification.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/agreement/Unification.java 2012-07-26 11:01:22 UTC (rev 3806) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/agreement/Unification.java 2012-07-26 14:21:14 UTC (rev 3807) @@ -58,7 +58,7 @@ private static boolean unify(LexicalSelection a, LexicalSelection b) { if (a == null && b == null) { return true; - } else if (a.equals(b)) { + } else if (a!=null&&a.equals(b)) { return true; } return false; Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-07-26 11:01:22 UTC (rev 3806) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-07-26 14:21:14 UTC (rev 3807) @@ -1,12 +1,14 @@ package org.dllearner.algorithm.tbsl.learning; -import static org.junit.Assert.assertTrue; -import org.ini4j.Options; import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.io.Serializable; import java.net.MalformedURLException; -import java.net.URL; import java.util.Collections; import java.util.HashSet; import java.util.Iterator; @@ -15,19 +17,15 @@ import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.TimeUnit; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.Transformer; -import javax.xml.transform.TransformerConfigurationException; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; -import opennlp.tools.postag.POSTagger; +import net.sf.oval.constraint.AssertTrue; import org.apache.log4j.FileAppender; import org.apache.log4j.Level; import org.apache.log4j.Logger; @@ -41,17 +39,14 @@ import org.dllearner.common.index.Index; import org.dllearner.common.index.MappingBasedIndex; import org.dllearner.common.index.SOLRIndex; -import org.dllearner.common.index.SPARQLIndex; -import org.dllearner.common.index.VirtuosoClassesIndex; -import org.dllearner.common.index.VirtuosoPropertiesIndex; -import org.dllearner.common.index.VirtuosoResourcesIndex; import org.dllearner.core.ComponentInitException; import org.dllearner.kb.sparql.ExtractionDBCache; import org.dllearner.kb.sparql.SparqlEndpoint; import org.dllearner.kb.sparql.SparqlQuery; +import org.ini4j.Options; import org.junit.Before; import org.junit.Test; -import org.openjena.atlas.logging.Log; +import static org.junit.Assert.*; import org.w3c.dom.DOMException; import org.w3c.dom.Document; import org.w3c.dom.Element; @@ -68,6 +63,7 @@ * only contains the questions where the reference query does not return a nonempty list of resources. * This could be questions which return literals, ask queries, queries which have no results in the DBpedia endpoint * and queries that cause errors. This updated test file contains the reference answers as well and is only created once. + * The answers in the updated query could be out of date as well, so if the answers don't match they are newly queried from the reference query. * Because there are multiple queries that are not all valid at first, further test runs are compared against the first run. * The updated test data and the test runs are saved in the cache folder in the same format as the original test data * (an xml with the tags question, query and answer). @@ -78,20 +74,42 @@ * **/ public class SPARQLTemplateBasedLearner3Test { - @Test public void testDBpedia() throws ParserConfigurationException, SAXException, IOException, TransformerException, ComponentInitException, NoTemplateFoundException + @Test public void testDBpedia() throws Exception {test(new File(getClass().getClassLoader().getResource("tbsl/evaluation/qald2-dbpedia-train.xml").getFile()),"http://live.dbpedia.org/sparql");} //@Test public void testOxford() {test(new File(""),"");} - public void test(File file, String endpoint) throws ParserConfigurationException, SAXException, IOException, TransformerException, ComponentInitException, NoTemplateFoundException + public void test(final File referenceXML,final String endpoint) throws ParserConfigurationException, SAXException, IOException, TransformerException, ComponentInitException, NoTemplateFoundException { String dir = "cache/"+getClass().getSimpleName()+"/"; new File(dir).mkdirs(); - File updatedFile=new File(dir+"updated_"+file.getName()); - if(!updatedFile.exists()) {generateUpdatedFile(file,updatedFile,endpoint);} + File updatedReferenceXML=new File(dir+"updated_"+referenceXML.getName()); + if(!updatedReferenceXML.exists()) {generateUpdatedXML(referenceXML,updatedReferenceXML,endpoint);} - QueryTestData savedTestData = readQueries(updatedFile); - QueryTestData newTestData = generateTestData(savedTestData.id2Question); - Diff QueryTestDataDiff = diffTestData(savedTestData,newTestData); + logger.debug("Reading updated reference test data"); + QueryTestData referenceTestData = readQueries(updatedReferenceXML); + QueryTestData learnedTestData = generateTestData(referenceTestData.id2Question); + + logger.info("Comparing updated reference test data a with learned test data b:"); + Diff queryTestDataDiff = diffTestData(referenceTestData,learnedTestData); + logger.info(queryTestDataDiff); + + logger.info("Comparing learned test data with old learned test data"); + + try{ + QueryTestData oldLearnedTestData = QueryTestData.read(); + Diff queryTestDataDiff2 = diffTestData(oldLearnedTestData,learnedTestData); + logger.info(queryTestDataDiff); +// assertFalse("the following queries did not return an answer in the current learned test data: "+queryTestDataDiff2.aMinusB, +// queryTestDataDiff2.aMinusB.isEmpty()); + assertFalse("the following queries had different answers: "+queryTestDataDiff2.differentAnswers, + queryTestDataDiff2.differentAnswers.isEmpty()); + + } + catch(IOException e) + { + logger.info("Old test data not loadable, creating it and exiting."); + learnedTestData.write(); + } } /** @@ -99,32 +117,47 @@ * @param newTestData * @return */ - private Diff diffTestData(QueryTestData d, QueryTestData e) + private static Diff diffTestData(QueryTestData a, QueryTestData b) { -// if(d.id2Question.size()!=e.id2Question.size()) - {logger.info("comparing test data D against E. number of questions: "+d.id2Question.size()+" vs "+e.id2Question.size());} - - Set<Integer> dMinusE = new HashSet<Integer>(d.id2Question.keySet()); - dMinusE.removeAll(e.id2Question.keySet()); - if(!dMinusE.isEmpty()) logger.info("questions D/E: "+dMinusE+" ("+dMinusE.size()+" elements)"); - - Set<Integer> eMinusD = new HashSet<Integer>(e.id2Question.keySet()); - eMinusD.removeAll(d.id2Question.keySet()); - if(!eMinusD.isEmpty()) logger.info("questions E/D: "+eMinusD+" ("+eMinusD.size()+" elements)"); - - Set<Integer> intersection = new HashSet<Integer>(d.id2Question.keySet()); - intersection.retainAll(e.id2Question.keySet()); - - if(!eMinusD.isEmpty()) logger.info("questions E/D: "+eMinusD+" ("+eMinusD.size()+" elements)"); - - + // if(d.id2Question.size()!=e.id2Question.size()) + {logger.info("comparing test data a against b. number of questions: "+a.id2Question.size()+" vs "+b.id2Question.size());} + Diff diff = new Diff(); + diff.aMinusB.addAll(a.id2Question.keySet()); + diff.aMinusB.removeAll(b.id2Question.keySet()); + + diff.bMinusA.addAll(b.id2Question.keySet()); + diff.bMinusA.removeAll(a.id2Question.keySet()); + + diff.intersection.addAll(a.id2Question.keySet()); + diff.intersection.retainAll(b.id2Question.keySet()); + + for(int i: diff.intersection) + { + if(a.id2Answers.containsKey(i)&&!a.id2Answers.get(i).equals(b.id2Answers.get(i))) {diff.differentAnswers.add(i);} + } + // if(!eMinusD.isEmpty()) logger.info("questions E/D: "+eMinusD+" ("+eMinusD.size()+" elements)"); + + // TODO Auto-generated method stub - return null; + return diff; } - private class Diff + public static class Diff { + final Set<Integer> aMinusB = new HashSet<Integer>(); + final Set<Integer> bMinusA = new HashSet<Integer>(); + final Set<Integer> intersection = new HashSet<Integer>(); + final Set<Integer> differentAnswers = new HashSet<Integer>(); + @Override public String toString() + { + StringBuilder sb = new StringBuilder(); + if(!aMinusB.isEmpty()) sb.append("questions a/b: "+aMinusB+" ("+aMinusB.size()+" elements)\n"); + if(!bMinusA.isEmpty()) sb.append("questions b/a: "+bMinusA+" ("+bMinusA.size()+" elements)\n"); + if(!intersection.isEmpty()) sb.append("questions intersection: "+intersection+" ("+intersection.size()+" elements)\n"); + if(!differentAnswers.isEmpty()) sb.append("questions with different answers: "+differentAnswers+" ("+differentAnswers.size()+" elements)\n"); + return sb.substring(0, sb.length()-2); // remove last \n + } } /** @@ -148,7 +181,7 @@ dbpediaLiveLearner.init(); dbpediaLiveLearner.setQuestion(question); - + try{dbpediaLiveLearner.learnSPARQLQueries();} catch(NoTemplateFoundException e) {continue;} catch(Exception e) {logger.error("Error processing question "+question,e);continue;} @@ -156,7 +189,9 @@ testData.id2Question.put(i, question); String learnedQuery = dbpediaLiveLearner.getBestSPARQLQuery(); testData.id2Query.put(i, learnedQuery); - + // generate answers + // getUris(endpoint, learnedQuery); + long end = System.currentTimeMillis(); logger.debug(String.format("Generated query \"%s\" after %d ms", learnedQuery,end-start)); @@ -174,7 +209,7 @@ * @throws SAXException * @throws TransformerException */ - private void generateUpdatedFile(File originalFile, File updatedFile,String endpoint) throws ParserConfigurationException, SAXException, IOException, TransformerException + private void generateUpdatedXML(File originalFile, File updatedFile,String endpoint) throws ParserConfigurationException, SAXException, IOException, TransformerException { logger.info(String.format("Updating question file \"%s\" by removing questions without nonempty resource list answer and adding answers.\n" + " Saving the result to file \"%s\"",originalFile.getPath(),updatedFile.getPath())); @@ -262,55 +297,55 @@ // int successfullTestThreadRuns = 0; /** */ - private static final String DBPEDIA_LIVE_ENDPOINT_URL_STRING = "http://live.dbpedia.org/sparql"; + private static final String DBPEDIA_LIVE_ENDPOINT_URL_STRING = "http://live.dbpedia.org/sparql"; - private static Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner3Test.class); + private static final Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner3Test.class); // private SPARQLTemplateBasedLearner2 oxfordLearner; // private SPARQLTemplateBasedLearner2 dbpediaLiveLearner; - private ExtractionDBCache oxfordCache = new ExtractionDBCache("cache"); - private ExtractionDBCache dbpediaLiveCache = new ExtractionDBCache("cache"); + private final ExtractionDBCache oxfordCache = new ExtractionDBCache("cache"); + private final ExtractionDBCache dbpediaLiveCache = new ExtractionDBCache("cache"); - private Knowledgebase dbpediaLiveKnowledgebase = createDBpediaLiveKnowledgebase(dbpediaLiveCache); - - static SparqlEndpoint dbpediaLiveEndpoint = SparqlEndpoint.getEndpointDBpediaLiveAKSW(); + private final Knowledgebase dbpediaLiveKnowledgebase = createDBpediaLiveKnowledgebase(dbpediaLiveCache); + + static final SparqlEndpoint dbpediaLiveEndpoint = SparqlEndpoint.getEndpointDBpediaLiveAKSW(); //static SparqlEndpoint oxfordEndpoint; private ResultSet executeDBpediaLiveSelect(String query){return SparqlQuery.convertJSONtoResultSet(dbpediaLiveCache.executeSelectQuery(dbpediaLiveEndpoint, query));} -// private ResultSet executeOxfordSelect(String query){return SparqlQuery.convertJSONtoResultSet(oxfordCache.executeSelectQuery(oxfordEndpoint, query));} + // private ResultSet executeOxfordSelect(String query){return SparqlQuery.convertJSONtoResultSet(oxfordCache.executeSelectQuery(oxfordEndpoint, query));} -// @Test public void benchmarkCreateOxfordKnowledgeBase() -// { -// long start = System.currentTimeMillis(); -// for(int i=0;i<1000;i++) -// { -// createOxfordKnowledgebase(oxfordCache); -// } -// long end = System.currentTimeMillis(); -// long diff = end-start; -// System.out.println(diff+" millis as a whole, "+diff/1000.0+" millis per run"); -// } + // @Test public void benchmarkCreateOxfordKnowledgeBase() + // { + // long start = System.currentTimeMillis(); + // for(int i=0;i<1000;i++) + // { + // createOxfordKnowledgebase(oxfordCache); + // } + // long end = System.currentTimeMillis(); + // long diff = end-start; + // System.out.println(diff+" millis as a whole, "+diff/1000.0+" millis per run"); + // } -// private Knowledgebase createOxfordKnowledgebase(ExtractionDBCache cache) -// { -// URL url; -// try{url = new URL("http://lgd.aksw.org:8900/sparql");} catch(Exception e) {throw new RuntimeException(e);} -// SparqlEndpoint endpoint = new SparqlEndpoint(url, Collections.singletonList("http://diadem.cs.ox.ac.uk"), Collections.<String>emptyList()); -// -// SPARQLIndex resourcesIndex = new VirtuosoResourcesIndex(endpoint, cache); -// SPARQLIndex classesIndex = new VirtuosoClassesIndex(endpoint, cache); -// SPARQLIndex propertiesIndex = new VirtuosoPropertiesIndex(endpoint, cache); -// MappingBasedIndex mappingIndex= new MappingBasedIndex( -// SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_class_mappings.txt").getPath(), -// SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_resource_mappings.txt").getPath(), -// SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_dataproperty_mappings.txt").getPath(), -// SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_objectproperty_mappings.txt").getPath() -// ); -// -// Knowledgebase kb = new Knowledgebase(oxfordEndpoint, "Oxford - Real estate", "TODO", resourcesIndex, propertiesIndex, classesIndex, mappingIndex); -// return kb; -// } + // private Knowledgebase createOxfordKnowledgebase(ExtractionDBCache cache) + // { + // URL url; + // try{url = new URL("http://lgd.aksw.org:8900/sparql");} catch(Exception e) {throw new RuntimeException(e);} + // SparqlEndpoint endpoint = new SparqlEndpoint(url, Collections.singletonList("http://diadem.cs.ox.ac.uk"), Collections.<String>emptyList()); + // + // SPARQLIndex resourcesIndex = new VirtuosoResourcesIndex(endpoint, cache); + // SPARQLIndex classesIndex = new VirtuosoClassesIndex(endpoint, cache); + // SPARQLIndex propertiesIndex = new VirtuosoPropertiesIndex(endpoint, cache); + // MappingBasedIndex mappingIndex= new MappingBasedIndex( + // SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_class_mappings.txt").getPath(), + // SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_resource_mappings.txt").getPath(), + // SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_dataproperty_mappings.txt").getPath(), + // SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_objectproperty_mappings.txt").getPath() + // ); + // + // Knowledgebase kb = new Knowledgebase(oxfordEndpoint, "Oxford - Real estate", "TODO", resourcesIndex, propertiesIndex, classesIndex, mappingIndex); + // return kb; + // } private Knowledgebase createDBpediaLiveKnowledgebase(ExtractionDBCache cache) { @@ -344,11 +379,35 @@ // oxfordLearner = new SPARQLTemplateBasedLearner2(createOxfordKnowledgebase(oxfordCache)); } - private class QueryTestData + private static class QueryTestData implements Serializable { public SortedMap<Integer, String> id2Question = new TreeMap<Integer, String>(); public SortedMap<Integer, String> id2Query = new TreeMap<Integer, String>(); public SortedMap<Integer, Set<String>> id2Answers = new TreeMap<Integer, Set<String>>(); + + private static final String persistancePath = "cache/"+SPARQLTemplateBasedLearner3Test.class.getSimpleName()+'/'+QueryTestData.class.getSimpleName(); + + public void write() + { + try + { + ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(new File(persistancePath))); + oos.writeObject(this); + oos.close(); + } catch(IOException e) {throw new RuntimeException(e);} + } + + public static QueryTestData read() throws FileNotFoundException, IOException + { + try + { + ObjectInputStream ois = new ObjectInputStream(new FileInputStream(new File(persistancePath))); + QueryTestData testData = (QueryTestData) ois.readObject(); + ois.close(); + return testData; + } + catch (ClassNotFoundException e){throw new RuntimeException(e);} + } } private QueryTestData readQueries(final File file) @@ -467,69 +526,69 @@ return uris; } - private class TestQueryThread implements Runnable - { - private String question; - private String referenceQuery; + // private class TestQueryThread implements Runnable + // { + // private String question; + // private String referenceQuery; + // + // public TestQueryThread(String question, String referenceQuery) + // { + // this.question=question; + // this.referenceQuery=referenceQuery; + // } + // // String referenceQuery = id2Query.get(i); + // // String question = id2Question.get(i); + // @Override public void run() + // { + // + // logger.trace("question: "+question); + // + // // TODO: check for query isomorphism and leave out result comparison if possible + // // TODO: only load the reference answers once and permanently cache them somehow (file, ehcache, serialization, ...) + // // get the answers for the gold standard query + // logger.trace("reference query: "+referenceQuery); + // + // try + // { + // Set<String> referenceURIs = getUris(DBPEDIA_LIVE_ENDPOINT_URL_STRING,referenceQuery); + // + // // learn query + // SPARQLTemplateBasedLearner2 dbpediaLiveLearner = new SPARQLTemplateBasedLearner2(createDBpediaLiveKnowledgebase(dbpediaLiveCache)); + // dbpediaLiveLearner.init(); + // dbpediaLiveLearner.setQuestion(question); + // dbpediaLiveLearner.learnSPARQLQueries(); + // String learnedQuery = dbpediaLiveLearner.getBestSPARQLQuery(); + // + // logger.trace(learnedQuery); + // + // Set<String> learnedURIs = getUris(DBPEDIA_LIVE_ENDPOINT_URL_STRING,learnedQuery); + // + // logger.trace("referenced uris: "+referenceURIs); + // logger.trace("learned uris: "+learnedURIs); + // + // boolean correctMatch = referenceURIs.equals(learnedURIs); + // logger.trace(correctMatch?"matches":"doesn't match"); + //// if(correctMatch) {synchronized(this) {correctMatches++;}} + // } + // catch(NoTemplateFoundException e) + // { + // synchronized(this) {numberOfNoTemplateFoundExceptions++;} + // logger.warn(String.format("no template found for question \"%s\"",question)); + // } + // catch(Exception e) + // { + // synchronized(this) {numberOfOtherExceptions++;} + // logger.error(String.format("Exception for question \"%s\": %s",question,e.getLocalizedMessage())); + // e.printStackTrace(); + // // maybe the exception has corrupted the learner? better create a new one + // // + // } + // // get the answers for the learned query + // // compare gold standard query and learned query answers + // } + // + // } - public TestQueryThread(String question, String referenceQuery) - { - this.question=question; - this.referenceQuery=referenceQuery; - } - // String referenceQuery = id2Query.get(i); - // String question = id2Question.get(i); - @Override public void run() - { - - logger.trace("question: "+question); - - // TODO: check for query isomorphism and leave out result comparison if possible - // TODO: only load the reference answers once and permanently cache them somehow (file, ehcache, serialization, ...) - // get the answers for the gold standard query - logger.trace("reference query: "+referenceQuery); - - try - { - Set<String> referenceURIs = getUris(DBPEDIA_LIVE_ENDPOINT_URL_STRING,referenceQuery); - - // learn query - SPARQLTemplateBasedLearner2 dbpediaLiveLearner = new SPARQLTemplateBasedLearner2(createDBpediaLiveKnowledgebase(dbpediaLiveCache)); - dbpediaLiveLearner.init(); - dbpediaLiveLearner.setQuestion(question); - dbpediaLiveLearner.learnSPARQLQueries(); - String learnedQuery = dbpediaLiveLearner.getBestSPARQLQuery(); - - logger.trace(learnedQuery); - - Set<String> learnedURIs = getUris(DBPEDIA_LIVE_ENDPOINT_URL_STRING,learnedQuery); - - logger.trace("referenced uris: "+referenceURIs); - logger.trace("learned uris: "+learnedURIs); - - boolean correctMatch = referenceURIs.equals(learnedURIs); - logger.trace(correctMatch?"matches":"doesn't match"); - if(correctMatch) {synchronized(this) {correctMatches++;}} - } - catch(NoTemplateFoundException e) - { - synchronized(this) {numberOfNoTemplateFoundExceptions++;} - logger.warn(String.format("no template found for question \"%s\"",question)); - } - catch(Exception e) - { - synchronized(this) {numberOfOtherExceptions++;} - logger.error(String.format("Exception for question \"%s\": %s",question,e.getLocalizedMessage())); - e.printStackTrace(); - // maybe the exception has corrupted the learner? better create a new one - // - } - // get the answers for the learned query - // compare gold standard query and learned query answers - } - - } - private void updateFile(File originalFile, File updatedFile, String endpoint) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |