From: <ku...@us...> - 2008-05-17 01:46:53
|
Revision: 888 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=888&view=rev Author: kurzum Date: 2008-05-16 18:46:45 -0700 (Fri, 16 May 2008) Log Message: ----------- Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedROLearner.java trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlQuery.java trunk/src/dl-learner/org/dllearner/scripts/SKOS7030.java trunk/src/dl-learner/org/dllearner/utilities/AutomaticExampleFinderSKOSSPARQL.java trunk/src/dl-learner/org/dllearner/utilities/LearnSparql.java Added Paths: ----------- trunk/src/dl-learner/org/dllearner/scripts/ResultCompare.java Modified: trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedROLearner.java =================================================================== --- trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedROLearner.java 2008-05-16 21:16:09 UTC (rev 887) +++ trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedROLearner.java 2008-05-17 01:46:45 UTC (rev 888) @@ -1072,9 +1072,12 @@ //HACK public SortedSet<String> getBestSolutionsAsKBSyntax(int nrOfSolutions){ + if(nrOfSolutions==0)nrOfSolutions=99999; SortedSet<String> result = new TreeSet<String>(); for (Description d : solutions) { + result.add(d.toKBSyntaxString()); + if(result.size()==nrOfSolutions)return result; //if(result.size()==nrOfSolutions)break; } /*if(result.size()<nrOfSolutions){ Modified: trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlQuery.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlQuery.java 2008-05-16 21:16:09 UTC (rev 887) +++ trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlQuery.java 2008-05-17 01:46:45 UTC (rev 888) @@ -95,10 +95,10 @@ logger.trace(rs.getResultVars().toString()); } catch (Exception e){ sendException=new SparqlQueryException(e.getMessage()); - logger.error(e.getMessage()); - e.printStackTrace(); - logger.error("Exception when querying Sparql Endpoint in " + this.getClass()); - logger.error(queryString); + logger.debug(e.getMessage()); + //e.printStackTrace(); + logger.debug("Exception when querying Sparql Endpoint in " + this.getClass()); + logger.debug(queryString); } isRunning = false; return rs; Added: trunk/src/dl-learner/org/dllearner/scripts/ResultCompare.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/ResultCompare.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/scripts/ResultCompare.java 2008-05-17 01:46:45 UTC (rev 888) @@ -0,0 +1,67 @@ +package org.dllearner.scripts; + +import java.util.SortedSet; + +public class ResultCompare implements Comparable { + String concept; + SortedSet<String> instances; + double accuracy; + double accuracy2; + int nrOfInstances; + SortedSet<String> coveredInRest; + SortedSet<String> possibleNewCandidates; + SortedSet<String> notCoveredInTotal; + + + public ResultCompare(String concept, SortedSet<String> instances, double accuracy, + double accuracy2, int nrOfInstances, SortedSet<String> coveredInRest, + SortedSet<String> possibleNewCandidates, SortedSet<String> notCoveredInTotal) { + super(); + this.concept = concept; + this.instances = instances; + this.accuracy = accuracy; + this.accuracy2 = accuracy2; + this.nrOfInstances = nrOfInstances; + this.coveredInRest = coveredInRest; + this.possibleNewCandidates = possibleNewCandidates; + this.notCoveredInTotal = notCoveredInTotal; + } + + + + + public int compareTo(Object in) { + ResultCompare obj =(ResultCompare) in; + if(obj.accuracy > this.accuracy) return 1; + else if(obj.accuracy == this.accuracy){ + + if(obj.nrOfInstances<this.nrOfInstances)return 1; + else if(obj.nrOfInstances>this.nrOfInstances)return -1; + else return 1; + //if(obj.nrOfInstances==this.nrOfInstances)return 0; + } + else {//if(obj.accuracy < this.accuracy){ + return -1; + } + + } + + + + + public String toString(){ + String ret=""; + ret+="concept\t"+concept+"\n"; + ret+="instances\t"+instances+"\n"; + ret+="accuracy\t"+accuracy+"\n"; + ret+="nrOfInstances\t"+nrOfInstances+"\n"; + ret+="accuracy2\t"+accuracy2+"\n"; + ret+="coveredInRest("+coveredInRest.size()+")\t"+coveredInRest+"\n"; + ret+="possibleNewCandidates("+possibleNewCandidates.size()+")\t"+possibleNewCandidates+"\n"; + ret+="notCoveredInTotal("+notCoveredInTotal.size()+")\t"+notCoveredInTotal+"\n"; + + return ret; + + } + +} Modified: trunk/src/dl-learner/org/dllearner/scripts/SKOS7030.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/SKOS7030.java 2008-05-16 21:16:09 UTC (rev 887) +++ trunk/src/dl-learner/org/dllearner/scripts/SKOS7030.java 2008-05-17 01:46:45 UTC (rev 888) @@ -1,5 +1,6 @@ package org.dllearner.scripts; +import java.util.LinkedList; import java.util.List; import java.util.SortedSet; import java.util.TreeSet; @@ -10,9 +11,11 @@ import org.apache.log4j.Logger; import org.apache.log4j.SimpleLayout; import org.dllearner.core.KnowledgeSource; +import org.dllearner.core.owl.Description; +import org.dllearner.core.owl.NamedClass; +import org.dllearner.core.owl.Union; import org.dllearner.kb.sparql.Cache; import org.dllearner.kb.sparql.SparqlQuery; -import org.dllearner.kb.sparql.SparqlQueryDescriptionConvertVisitor; import org.dllearner.kb.sparql.configuration.SparqlEndpoint; import org.dllearner.utilities.AutomaticExampleFinderSKOSSPARQL; import org.dllearner.utilities.JenaResultSetConvenience; @@ -67,25 +70,57 @@ static int poslimit = 10; static int neglimit = 20; + static int recursiondepth=1; + static boolean closeAfterRecursion=true; + static boolean randomizeCache=false; + + static int resultsize=50; + static double noise=10; + static int limit=200; + static double percentage=0.7; + /** * @param args */ public static void main(String[] args) { init(); //logger.setLevel(Level.TRACE); - Logger.getLogger(KnowledgeSource.class).setLevel(Level.INFO); + Logger.getLogger(KnowledgeSource.class).setLevel(Level.WARN); //System.out.println(Logger.getLogger(SparqlQuery.class).getLevel()); SimpleClock sc=new SimpleClock(); - standardSettings=standardSettingsRefexamples+standardDBpedia; - //standardSettings=standardSettingsRefinement+standardDBpedia; se = SparqlEndpoint.EndpointLOCALDBpedia(); - String t="\"http://dbpedia.org/class/yago/Fiction106367107\""; - t="\"http://www.w3.org/2004/02/skos/core#subject\""; - getSubClasses(t); +// String t="\"http://dbpedia.org/class/yago/Fiction106367107\""; +// t="(\"http://dbpedia.org/class/yago/HeadOfState110164747\" AND (\"http://dbpedia.org/class/yago/Negotiator110351874\" AND \"http://dbpedia.org/class/yago/Representative110522035\"))"; +// //System.out.println(t); +// //t="\"http://www.w3.org/2004/02/skos/core#subject\""; +// //conceptRewrite(t); +// //getSubClasses(t); +// +// AutomaticExampleFinderSKOSSPARQL ae= new AutomaticExampleFinderSKOSSPARQL( se); +// try{ +// System.out.println("oneconcept: "+t); +// SortedSet<String> instances = ae.queryConceptAsStringSet(conceptRewrite(t), 200); +// if(instances.size()>=0)System.out.println("size of instances "+instances.size()); +// if(instances.size()>=0 && instances.size()<100) System.out.println("instances"+instances); +// }catch (Exception e) { +// e.printStackTrace(); +// } + SortedSet<String> concepts = new TreeSet<String>(); - //DBpediaSKOS(); + concepts.add("http://dbpedia.org/resource/Category:Prime_Ministers_of_the_United_Kingdom"); + concepts.add("http://dbpedia.org/resource/Category:German_women_in_politics"); + concepts.add("http://dbpedia.org/resource/Category:Best_Actor_Academy_Award_winners"); + + DBpediaSKOS(concepts.first()); + DBpediaSKOS(concepts.first()); + concepts.remove(concepts.first()); + DBpediaSKOS(concepts.first()); + DBpediaSKOS(concepts.first()); + concepts.remove(concepts.first()); + DBpediaSKOS(concepts.first()); + DBpediaSKOS(concepts.first()); //algorithm="refinement"; //roles(); @@ -102,15 +137,12 @@ - static void DBpediaSKOS(){ + static void DBpediaSKOS(String concept){ se = SparqlEndpoint.EndpointLOCALDBpedia(); //se = SparqlEndpoint.EndpointDBpedia(); String url = "http://dbpedia.openlinksw.com:8890/sparql"; url = "http://139.18.2.37:8890/sparql"; - SortedSet<String> concepts = new TreeSet<String>(); - - concepts.add("http://dbpedia.org/resource/Category:Prime_Ministers_of_the_United_Kingdom"); //concepts.add("http://dbpedia.org/resource/Category:Grammy_Award_winners"); //concepts.add("EXISTS \"http://dbpedia.org/property/grammyawards\".TOP"); @@ -121,15 +153,11 @@ //HashMap<String, String> result2 = new HashMap<String, String>(); //System.out.println(concepts.first()); //logger.setLevel(Level.TRACE); - String concept=concepts.first(); + AutomaticExampleFinderSKOSSPARQL ae= new AutomaticExampleFinderSKOSSPARQL( se); - useRelated = false; - useParallelClasses = true; - int recursiondepth=1; - boolean closeAfterRecursion=true; - boolean randomizeCache=false; - ae.initDBpediaSKOS(concept, 0.1, useRelated, useParallelClasses); + + ae.initDBpediaSKOS(concept,percentage , useRelated, useParallelClasses); posExamples = ae.getPosExamples(); negExamples = ae.getNegExamples(); @@ -142,25 +170,93 @@ } SortedSet<String> totalSKOSset= ae.totalSKOSset; SortedSet<String> rest= ae.rest; - + logger.debug(totalSKOSset); + logger.debug(rest); LearnSparql ls = new LearnSparql(); //igno.add(oneConcept.replaceAll("\"", "")); - SortedSet<String> conceptresults= ls.learnDBpediaSKOS(posExamples, negExamples, url,new TreeSet<String>(),recursiondepth, closeAfterRecursion,randomizeCache); - System.out.println(conceptresults); - System.out.println(conceptresults.size()); - for (String string : conceptresults) { - System.out.println(string); - SortedSet<String> instances = ae.queryConceptAsStringSet(string, 0); - if(instances.size()>=0)System.out.println("size "+instances.size()); - if(instances.size()>=0 && instances.size()>0) System.out.println(instances); + SortedSet<String> conceptresults= ls.learnDBpediaSKOS(posExamples, negExamples, url,new TreeSet<String>(),recursiondepth, closeAfterRecursion,randomizeCache,resultsize,noise); + //System.out.println("concepts"+conceptresults); + logger.debug("found nr of concepts:"+conceptresults.size()); + SortedSet<ResultCompare> res=new TreeSet<ResultCompare>(); + for (String oneConcept : conceptresults) { + try{ + System.out.println("oneconcept: "+oneConcept); + SortedSet<String> instances = ae.queryConceptAsStringSet(conceptRewrite(oneConcept), 200); + SortedSet<String> coveredInRest = new TreeSet<String>(); + SortedSet<String> possibleNewCandidates = new TreeSet<String>(); + SortedSet<String> notCoveredInTotal = new TreeSet<String>(); + + int i=0; + int a=0; + for (String oneinst : instances) { + boolean inRest=false; + boolean inTotal=false; + for (String onerest : rest) { + if(onerest.equalsIgnoreCase(oneinst)) + { i++; inRest=true; break;} + + } + if (inRest){coveredInRest.add(oneinst);}; + + for (String onetotal : totalSKOSset) { + if(onetotal.equalsIgnoreCase(oneinst)) + { a++; inTotal=true; break;} + } + if(!inRest && !inTotal){ + possibleNewCandidates.add(oneinst); + } + } + + for (String onetotal : totalSKOSset) { + boolean mm=false; + for (String oneinst : instances) { + if(onetotal.equalsIgnoreCase(oneinst)){ + mm=true;break; + } + + } + if(!mm)notCoveredInTotal.add(onetotal); + + } + + + + double accuracy= (double)i/rest.size(); + double accuracy2= (double)a/totalSKOSset.size(); + + res.add(new ResultCompare(oneConcept,instances,accuracy,accuracy2,instances.size(), + coveredInRest,possibleNewCandidates,notCoveredInTotal)); + + //if(instances.size()>=0)System.out.println("size of instances "+instances.size()); + //if(instances.size()>=0 && instances.size()<100) System.out.println("instances"+instances); + }catch (Exception e) {} } +// System.out.println(res.last()); +// res.remove(res.last()); +// System.out.println(res.last()); +// res.remove(res.last()); +// System.out.println(res.last()); +// res.remove(res.last()); +// + //double percent=0.80*(double)res.size();; + + while (res.size()>0){ + logger.debug(res.first()); + res.remove(res.first()); + //if(res.size()<=percent)break; + + } + + + + //System.out.println("AAAAAAAA"); //System.exit(0); //"relearned concept: "; @@ -172,34 +268,6 @@ } - /*************************************************************************** - * *********************OLDCODE String - * conj="(\"http://dbpedia.org/class/yago/Person100007846\" AND - * \"http://dbpedia.org/class/yago/Head110162991\")"; - * - * - * concepts.add("EXISTS \"http://dbpedia.org/property/disambiguates\".TOP"); - * concepts.add("EXISTS - * \"http://dbpedia.org/property/successor\".\"http://dbpedia.org/class/yago/Person100007846\""); - * concepts.add("EXISTS \"http://dbpedia.org/property/successor\"."+conj); - * //concepts.add("ALL \"http://dbpedia.org/property/disambiguates\".TOP"); - * //concepts.add("ALL - * \"http://dbpedia.org/property/successor\".\"http://dbpedia.org/class/yago/Person100007846\""); - * concepts.add("\"http://dbpedia.org/class/yago/Person100007846\""); - * concepts.add(conj); - * concepts.add("(\"http://dbpedia.org/class/yago/Person100007846\" OR - * \"http://dbpedia.org/class/yago/Head110162991\")"); - * - * //concepts.add("NOT \"http://dbpedia.org/class/yago/Person100007846\""); - * - * for (String kbsyntax : concepts) { - * result.put(kbsyntax,queryConcept(kbsyntax)); } - * System.out.println("************************"); for (String string : - * result.keySet()) { System.out.println("KBSyntayString: "+string); - * System.out.println("Query:\n"+result.get(string).hasNext()); - * System.out.println("************************"); } - **************************************************************************/ - @@ -238,10 +306,11 @@ /** - * NOT WORKING + * * @param description */ - public static SortedSet<String> getSubClasses(String description) { + public static SortedSet<String> getSubClasses(String description, int limit) { + if(limit==0)limit=10; ResultSet rs = null; //System.out.println(description); SortedSet<String> alreadyQueried = new TreeSet<String>(); @@ -249,38 +318,43 @@ String query = getSparqlSubclassQuery(description.replaceAll("\"", "")); String JSON = (c.executeSparqlQuery(new SparqlQuery(query, se))); rs =SparqlQuery.JSONtoResultSet(JSON); - SortedSet<String> remainingClasses = new TreeSet<String>(); + LinkedList<String> remainingClasses = new LinkedList<String>(); - remainingClasses.addAll(getSubclassesFromResultSet(rs)); + //make back + //remainingClasses.addAll(getSubclassesFromResultSet(rs)); alreadyQueried = new TreeSet<String>(); alreadyQueried.add(description.replaceAll("\"", "")); - + alreadyQueried.addAll(getSubclassesFromResultSet(rs)); + //remainingClasses.addAll(alreadyQueried); + return alreadyQueried; //SortedSet<String> remainingClasses = new JenaResultSetConvenience(rs).getStringListForVariable("subject"); - while (remainingClasses.size()!=0){ - SortedSet<String> tmpSet = new TreeSet<String>(); - String tmp = remainingClasses.first(); - remainingClasses.remove(tmp); - query = SparqlQueryDescriptionConvertVisitor - .getSparqlSubclassQuery(tmp); - alreadyQueried.add(tmp); - JSON = (c.executeSparqlQuery(new SparqlQuery(query, se))); - rs =SparqlQuery.JSONtoResultSet(JSON); - tmpSet=getSubclassesFromResultSet(rs); - for (String string : tmpSet) { - if(!alreadyQueried.contains(string)) - remainingClasses.add(string); - } - } +// while (remainingClasses.size()!=0){ +// SortedSet<String> tmpSet = new TreeSet<String>(); +// String tmp = remainingClasses.removeFirst(); +// //remainingClasses.remove(tmp); +// query = SparqlQueryDescriptionConvertVisitor +// .getSparqlSubclassQuery(tmp); +// alreadyQueried.add(tmp); +// if(alreadyQueried.size()==limit)break; +// JSON = (c.executeSparqlQuery(new SparqlQuery(query, se))); +// rs =SparqlQuery.JSONtoResultSet(JSON); +// tmpSet=getSubclassesFromResultSet(rs); +// for (String string : tmpSet) { +// if(!alreadyQueried.contains(string)) +// remainingClasses.add(string); +// } +// } //System.out.println(JSON); } catch (Exception e) { - e.printStackTrace(); + } - //System.out.println(alreadyQueried); + System.out.println("subclasses "+alreadyQueried); + System.out.println("nr of subclasses :"+alreadyQueried.size()); return alreadyQueried; } @@ -309,6 +383,44 @@ return ret; } + public static String conceptRewrite(String description) + { String quote = "\""; + String ret=""; + String currentconcept=""; + int lastPos=0; + SortedSet<String> subclasses=new TreeSet<String>(); + + while ((lastPos=description.lastIndexOf(quote))!=-1){ + ret=description.substring(lastPos+1,description.length())+ret; + description=description.substring(0,lastPos); + //System.out.println(description); + lastPos=description.lastIndexOf(quote); + currentconcept=description.substring(lastPos+1,description.length()); + description=description.substring(0,lastPos); + //replace + //currentconcept="\"blabla\""; + //System.out.println(currentconcept); + + + subclasses = getSubClasses( currentconcept, 0); + + if (subclasses.size()==1)currentconcept="\""+currentconcept+"\""; + else { + LinkedList<Description> nc = new LinkedList<Description>(); + for (String one : subclasses) { + nc.add(new NamedClass(one)); + } + currentconcept=new Union(nc).toKBSyntaxString(); + } + + ret=currentconcept+ret; + //ret+=description; + } + ret=description+ret; + //System.out.println(ret); + return ret; + } + } Modified: trunk/src/dl-learner/org/dllearner/utilities/AutomaticExampleFinderSKOSSPARQL.java =================================================================== --- trunk/src/dl-learner/org/dllearner/utilities/AutomaticExampleFinderSKOSSPARQL.java 2008-05-16 21:16:09 UTC (rev 887) +++ trunk/src/dl-learner/org/dllearner/utilities/AutomaticExampleFinderSKOSSPARQL.java 2008-05-17 01:46:45 UTC (rev 888) @@ -1,5 +1,7 @@ package org.dllearner.utilities; +import java.net.URL; +import java.net.URLEncoder; import java.util.SortedSet; import java.util.TreeSet; @@ -8,6 +10,7 @@ import org.dllearner.kb.sparql.Cache; import org.dllearner.kb.sparql.SparqlQuery; import org.dllearner.kb.sparql.SparqlQueryDescriptionConvertVisitor; +import org.dllearner.kb.sparql.SparqlQueryThreaded; import org.dllearner.kb.sparql.configuration.SparqlEndpoint; import com.hp.hpl.jena.query.ResultSet; @@ -27,7 +30,7 @@ public AutomaticExampleFinderSKOSSPARQL(SparqlEndpoint se){ - this.c=new Cache("cachetemp"); + this.c=new Cache("cacheExamplesValidation"); this.se=se; posExamples = new TreeSet<String>(); negExamples = new TreeSet<String>(); @@ -40,7 +43,7 @@ totalSKOSset.addAll(this.posExamples); rest.addAll(totalSKOSset); int poslimit=(int)Math.round(percent*totalSKOSset.size()); - int neglimit=2*poslimit; + int neglimit=(int)Math.round(1.4*poslimit); /*while (this.posExamples.size()>poslimit) { this.posExamples.remove(posExamples.last()); }*/ @@ -302,6 +305,7 @@ .getSparqlQuery(concept,limit); SparqlQuery sq = new SparqlQuery(query, se); + //System.out.println(query); String JSON = c.executeSparqlQuery(sq); //System.out.println("JSON:\n"+JSON); rs = SparqlQuery.JSONtoResultSet(JSON); @@ -345,7 +349,7 @@ " a " + "?subject " + "\n" + - "}"; + "} LIMIT 200"; SparqlQuery sq = new SparqlQuery(query, se); //System.out.println(query); String JSON = c.executeSparqlQuery(sq); Modified: trunk/src/dl-learner/org/dllearner/utilities/LearnSparql.java =================================================================== --- trunk/src/dl-learner/org/dllearner/utilities/LearnSparql.java 2008-05-16 21:16:09 UTC (rev 887) +++ trunk/src/dl-learner/org/dllearner/utilities/LearnSparql.java 2008-05-17 01:46:45 UTC (rev 888) @@ -112,7 +112,7 @@ public SortedSet<String> learnDBpediaSKOS(SortedSet<String> posExamples,SortedSet<String> negExamples, String uri, SortedSet<String> ignoredConcepts, int recursiondepth, - boolean closeAfterRecursion, boolean randomizeCache){ + boolean closeAfterRecursion, boolean randomizeCache, int resultsize, double noise){ ComponentManager cm = ComponentManager.getInstance(); @@ -168,12 +168,12 @@ cm.applyConfigEntry(la,"useCardinalityRestrictions",false); cm.applyConfigEntry(la,"useNegation",false); cm.applyConfigEntry(la,"minExecutionTimeInSeconds",0); - cm.applyConfigEntry(la,"maxExecutionTimeInSeconds",50); - cm.applyConfigEntry(la,"guaranteeXgoodDescriptions",15); + cm.applyConfigEntry(la,"maxExecutionTimeInSeconds",150); + cm.applyConfigEntry(la,"guaranteeXgoodDescriptions",40); cm.applyConfigEntry(la,"writeSearchTree",true); cm.applyConfigEntry(la,"searchTreeFile","log/SKOS.txt"); cm.applyConfigEntry(la,"replaceSearchTree",true); - //cm.applyConfigEntry(la,"noisePercentage",0.15); + cm.applyConfigEntry(la,"noisePercentage",noise); //cm.applyConfigEntry(la,"guaranteeXgoodDescriptions",999999); @@ -189,7 +189,7 @@ sc.setTime(); la.start(); Statistics.addTimeLearning(sc.getTime()); - return la.getBestSolutionsAsKBSyntax(0); + return la.getBestSolutionsAsKBSyntax(resultsize); //if(sc.getTime()/1000 >= 20)System.out.println("XXXMAX time reached"); //System.out.println("best"+la(20)); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |