From: <ku...@us...> - 2008-05-26 14:48:07
|
Revision: 929 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=929&view=rev Author: kurzum Date: 2008-05-26 07:48:00 -0700 (Mon, 26 May 2008) Log Message: ----------- intermediate Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/kb/sparql/SPARQLTasks.java trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlQueryDescriptionConvertRDFS.java trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlQueryDescriptionConvertVisitor.java trunk/src/dl-learner/org/dllearner/scripts/SKOS7030.java trunk/src/dl-learner/org/dllearner/utilities/examples/AutomaticNegativeExampleFinderSPARQL.java trunk/src/dl-learner/org/dllearner/utilities/examples/AutomaticPositiveExampleFinderSPARQL.java Added Paths: ----------- trunk/src/dl-learner/org/dllearner/utilities/datastructures/ResultConceptSorter.java Removed Paths: ------------- trunk/src/dl-learner/org/dllearner/scripts/ResultCompare.java Modified: trunk/src/dl-learner/org/dllearner/kb/sparql/SPARQLTasks.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/sparql/SPARQLTasks.java 2008-05-26 12:02:10 UTC (rev 928) +++ trunk/src/dl-learner/org/dllearner/kb/sparql/SPARQLTasks.java 2008-05-26 14:48:00 UTC (rev 929) @@ -1,5 +1,6 @@ package org.dllearner.kb.sparql; +import java.util.LinkedList; import java.util.List; import java.util.SortedSet; import java.util.TreeSet; @@ -12,10 +13,10 @@ public class SPARQLTasks { - //CHECK - @SuppressWarnings("unused") + //@SuppressWarnings("unused") + //LOGGER: SPARQLTasks private static Logger logger = Logger - .getLogger(SPARQLTasks.class); + .getLogger(SPARQLTasks.class); private Cache c; private SparqlEndpoint se; @@ -76,9 +77,95 @@ } - + /** + * gets a SortedSet of all subclasses QUALITY: maybe it is better to have a + * parameter int depth, to choose a depth of subclass interference + * + * @see conceptRewrite(String descriptionKBSyntax, SparqlEndpoint se, Cache + * c, boolean simple ) + * @param description + * @param se + * @param c + * @param simple + * @return + */ + public SortedSet<String> getSubClasses(String description, boolean simple) { + // ResultSet rs = null; + // System.out.println(description); + SortedSet<String> alreadyQueried = new TreeSet<String>(); + try { + + // initialisation get direct Subclasses + LinkedList<String> remainingClasses = new LinkedList<String>(); + + // collect remaining classes + remainingClasses.addAll(getDirectSubClasses(description.replaceAll("\"", ""))); + + // remainingClasses.addAll(alreadyQueried); + + // alreadyQueried = new TreeSet<String>(); + alreadyQueried.add(description.replaceAll("\"", "")); + + if (simple) { + alreadyQueried.addAll(remainingClasses); + return alreadyQueried; + } else { + + logger.warn("Retrieval auf all subclasses via SPARQL is cost intensive and might take a while"); + while (remainingClasses.size() != 0) { + SortedSet<String> tmpSet = new TreeSet<String>(); + String tmp = remainingClasses.removeFirst(); + alreadyQueried.add(tmp); + + tmpSet = getDirectSubClasses(tmp); + for (String string : tmpSet) { + if (!(alreadyQueried.contains(string))) { + remainingClasses.add(string); + }// if + }// for + }// while + }// else + + } catch (Exception e) { + + } + + return alreadyQueried; + } + /** + * QUALITY: workaround for a sparql glitch {?a owl:subclassOf ?b} returns an + * empty set on some entpoints. returns all direct subclasses of String + * concept + * + * @param concept + * @return SortedSet of direct subclasses as String + */ + private SortedSet<String> getDirectSubClasses(String concept) { + String SPARQLquery = "SELECT * \n"; + SPARQLquery += "WHERE {\n"; + SPARQLquery += " ?subject ?predicate <" + concept + "> \n"; + SPARQLquery += "}\n"; + + ResultSet rs = queryAsResultSet(SPARQLquery); + + SortedSet<String> subClasses = new TreeSet<String>(); + @SuppressWarnings("unchecked") + List<ResultBinding> l = ResultSetFormatter.toList(rs); + String p = "", s = ""; + for (ResultBinding resultBinding : l) { + + s = ((resultBinding.get("subject").toString())); + p = ((resultBinding.get("predicate").toString())); + if (p.equalsIgnoreCase("http://www.w3.org/2000/01/rdf-schema#subClassOf")) { + subClasses.add(s); + } + } + return subClasses; + } + + /** * QUALITY: buggy because role doesn't work sometimes * get subject with fixed role and object * @param role @@ -137,9 +224,26 @@ return queryAsSet(SPARQLquery, "subject"); } + /** + * get all instances for a concept including RDFS Reasoning + * @param conceptKBSyntax + * @param sparqlResultLimit + * @return + */ + public SortedSet<String> retrieveInstancesForConceptIncludingSubclasses (String conceptKBSyntax,int sparqlResultLimit) { + + String SPARQLquery = ""; + try{ + SPARQLquery = SparqlQueryDescriptionConvertVisitor + .getSparqlQueryIncludingSubclasses(conceptKBSyntax,sparqlResultLimit,this,true); + }catch (Exception e) {e.printStackTrace();} + return queryAsSet(SPARQLquery, "subject"); + } + + /** * get all direct Classes of an instance * @param instance @@ -232,8 +336,18 @@ /** - * lowlevel, executes query returns JSON + * low level, executes query returns ResultSet * @param SPARQLquery + * @return jena ResultSet + */ + public ResultSet queryAsResultSet(String SPARQLquery){ + return SparqlQuery.JSONtoResultSet(query(SPARQLquery)); + + } + + /** + * low level, executes query returns JSON + * @param SPARQLquery * @return */ public String query(String SPARQLquery){ Modified: trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlQueryDescriptionConvertRDFS.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlQueryDescriptionConvertRDFS.java 2008-05-26 12:02:10 UTC (rev 928) +++ trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlQueryDescriptionConvertRDFS.java 2008-05-26 14:48:00 UTC (rev 929) @@ -1,7 +1,25 @@ +/** + * Copyright (C) 2007, Sebastian Hellmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ package org.dllearner.kb.sparql; import java.util.LinkedList; -import java.util.List; import java.util.SortedSet; import java.util.TreeSet; @@ -10,14 +28,17 @@ import org.dllearner.core.owl.NamedClass; import org.dllearner.core.owl.Union; -import com.hp.hpl.jena.query.ResultSet; -import com.hp.hpl.jena.query.ResultSetFormatter; -import com.hp.hpl.jena.sparql.core.ResultBinding; -//COMMENT: header +/** + * @author Sebastian Hellmann + * Enables RDFS reasoning for the DL2SPARQL class + * by concept rewriting + * //QUALITY use SPARQLtasks + */ public class SparqlQueryDescriptionConvertRDFS { - static Logger logger = Logger.getLogger(SparqlQueryDescriptionConvertRDFS.class); + //LOGGER: SparqlQueryDescriptionConvertVisitor + static Logger logger = Logger.getLogger(SparqlQueryDescriptionConvertVisitor.class); /** * @@ -35,7 +56,7 @@ * RECOMMENDED for large hierarchies) * @return the altered String */ - public static String conceptRewrite(String descriptionKBSyntax, SparqlEndpoint se, Cache c, + public static String conceptRewrite(String descriptionKBSyntax, SPARQLTasks st, boolean simple) { String quote = "\""; String returnValue = ""; @@ -58,7 +79,7 @@ // System.out.println(currentconcept); // subclasses are retrieved - subclasses = getSubClasses(currentconcept, se, c, simple); + subclasses = st.getSubClasses(currentconcept, simple); // if only one then keep if (subclasses.size() == 1) @@ -80,100 +101,6 @@ return returnValue; } - /** - * gets a SortedSet of all subclasses QUALITY: maybe it is better to have a - * parameter int depth, to choose a depth of subclass interference - * - * @see conceptRewrite(String descriptionKBSyntax, SparqlEndpoint se, Cache - * c, boolean simple ) - * @param description - * @param se - * @param c - * @param simple - * @return - */ - private static SortedSet<String> getSubClasses(String description, SparqlEndpoint se, Cache c, - boolean simple) { + - // ResultSet rs = null; - // System.out.println(description); - SortedSet<String> alreadyQueried = new TreeSet<String>(); - try { - - // initialisation get direct Subclasses - LinkedList<String> remainingClasses = new LinkedList<String>(); - - // collect remaining classes - remainingClasses.addAll(getDirectSubClasses(description.replaceAll("\"", ""), se, c)); - - // remainingClasses.addAll(alreadyQueried); - - // alreadyQueried = new TreeSet<String>(); - alreadyQueried.add(description.replaceAll("\"", "")); - - if (simple) { - alreadyQueried.addAll(remainingClasses); - return alreadyQueried; - } else { - - logger - .warn("Retrieval auf all subclasses via SPARQL is cost intensive and might take a while"); - while (remainingClasses.size() != 0) { - SortedSet<String> tmpSet = new TreeSet<String>(); - String tmp = remainingClasses.removeFirst(); - alreadyQueried.add(tmp); - - tmpSet = getDirectSubClasses(tmp, se, c); - for (String string : tmpSet) { - if (!(alreadyQueried.contains(string))) { - remainingClasses.add(string); - }// if - }// for - }// while - }// else - - } catch (Exception e) { - - } - - return alreadyQueried; - } - - /** - * QUALITY: workaround for a sparql glitch {?a owl:subclassOf ?b} returns an - * empty set on some entpoints. returns all direct subclasses of String - * concept - * - * @param concept - * @return SortedSet of direct subclasses as String - */ - private static SortedSet<String> getDirectSubClasses(String concept, SparqlEndpoint se, Cache c) { - String query = "SELECT * \n"; - query += "WHERE {\n"; - query += " ?subject ?predicate <" + concept + "> \n"; - query += "}\n"; - - ResultSet rs = null; - if (c == null) { - rs = new SparqlQuery(query, se).send(); - } else { - String JSON = (c.executeSparqlQuery(new SparqlQuery(query, se))); - rs = SparqlQuery.JSONtoResultSet(JSON); - } - - SortedSet<String> subClasses = new TreeSet<String>(); - @SuppressWarnings("unchecked") - List<ResultBinding> l = ResultSetFormatter.toList(rs); - String p = "", s = ""; - for (ResultBinding resultBinding : l) { - - s = ((resultBinding.get("subject").toString())); - p = ((resultBinding.get("predicate").toString())); - if (p.equalsIgnoreCase("http://www.w3.org/2000/01/rdf-schema#subClassOf")) { - subClasses.add(s); - } - } - return subClasses; - } - } Modified: trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlQueryDescriptionConvertVisitor.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlQueryDescriptionConvertVisitor.java 2008-05-26 12:02:10 UTC (rev 928) +++ trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlQueryDescriptionConvertVisitor.java 2008-05-26 14:48:00 UTC (rev 929) @@ -70,14 +70,13 @@ return getSparqlQuery(defaultLimit); }*/ - private String getSparqlQuery(int limit) + private String getSparqlQuery(int resultLimit) { // for old function see below // it was using the object attribute in a strange way // QUALITY: what if this function is called several times?? should be private maybe? String tmpQuery= "SELECT ?subject \nWHERE {"+query+ - " }\n "; - if(limit>0) tmpQuery+="LIMIT "+limit; + " }\n "+ limit(resultLimit); query = tmpQuery; return query; @@ -112,11 +111,11 @@ return getSparqlQuery(description, defaultLimit); } - public static String getSparqlQuery(Description description, int limit) + public static String getSparqlQuery(Description description, int resultLimit) { SparqlQueryDescriptionConvertVisitor visitor=new SparqlQueryDescriptionConvertVisitor(); description.accept(visitor); - String ret = visitor.getSparqlQuery(limit); + String ret = visitor.getSparqlQuery(resultLimit); //HACK see replace might be a good solution, needs testing while (ret.contains("..")) { ret = ret.replace("..", "."); @@ -142,11 +141,11 @@ * @return * @throws ParseException */ - public static String getSparqlQueryIncludingSubclasses(String descriptionKBSyntax, int limit, SparqlEndpoint se,Cache c, boolean simple) throws ParseException + public static String getSparqlQueryIncludingSubclasses(String descriptionKBSyntax, int resultLimit, SPARQLTasks st, boolean simple) throws ParseException { - String rewritten = SparqlQueryDescriptionConvertRDFS.conceptRewrite(descriptionKBSyntax, se, c, simple); + String rewritten = SparqlQueryDescriptionConvertRDFS.conceptRewrite(descriptionKBSyntax, st, simple); - return getSparqlQuery(rewritten, limit); + return getSparqlQuery(rewritten, resultLimit); } @@ -341,6 +340,10 @@ logger.trace("DatatypeSomeRestriction"); } + private String limit(int resultLimit){ + if(resultLimit>0)return " LIMIT "+resultLimit; + return ""; + } } Deleted: trunk/src/dl-learner/org/dllearner/scripts/ResultCompare.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/ResultCompare.java 2008-05-26 12:02:10 UTC (rev 928) +++ trunk/src/dl-learner/org/dllearner/scripts/ResultCompare.java 2008-05-26 14:48:00 UTC (rev 929) @@ -1,83 +0,0 @@ -package org.dllearner.scripts; - -import java.util.SortedSet; - -public class ResultCompare implements Comparable<ResultCompare> { - String concept; - SortedSet<String> instances; - double accuracy; - double accuracy2; - int nrOfInstances; - SortedSet<String> coveredInRest; - SortedSet<String> possibleNewCandidates; - SortedSet<String> notCoveredInTotal; - - - public ResultCompare(String concept, SortedSet<String> instances, double accuracy, - double accuracy2, int nrOfInstances, SortedSet<String> coveredInRest, - SortedSet<String> possibleNewCandidates, SortedSet<String> notCoveredInTotal) { - super(); - this.concept = concept; - this.instances = instances; - this.accuracy = accuracy; - this.accuracy2 = accuracy2; - this.nrOfInstances = nrOfInstances; - this.coveredInRest = coveredInRest; - this.possibleNewCandidates = possibleNewCandidates; - this.notCoveredInTotal = notCoveredInTotal; - } - - - - - public int compareTo(ResultCompare in) { - ResultCompare obj = in; - if(obj.accuracy > this.accuracy) return 1; - else if(obj.accuracy == this.accuracy){ - - if(obj.nrOfInstances<this.nrOfInstances)return 1; - else if(obj.nrOfInstances>this.nrOfInstances)return -1; - else return 1; - //if(obj.nrOfInstances==this.nrOfInstances)return 0; - } - else {//if(obj.accuracy < this.accuracy){ - return -1; - } - - } - - - - - public String toStringFull(){ - String ret=""; - ret+="concept\t"+concept+"\n"; - ret+="instances\t"+instances+"\n"; - ret+="accuracy\t"+accuracy+"\n"; - ret+="nrOfInstances\t"+nrOfInstances+"\n"; - ret+="accuracy2\t"+accuracy2+"\n"; - ret+="coveredInRest("+coveredInRest.size()+")\t"+coveredInRest+"\n"; - ret+="possibleNewCandidates("+possibleNewCandidates.size()+")\t"+possibleNewCandidates+"\n"; - ret+="notCoveredInTotal("+notCoveredInTotal.size()+")\t"+notCoveredInTotal+"\n"; - - return ret; - - } - - @Override - public String toString(){ - String ret=""; - ret+="concept\t"+concept+"\n"; - //ret+="instances\t"+instances+"\n"; - ret+="accuracy\t"+accuracy+"\n"; - ret+="nrOfInstances\t"+nrOfInstances+"\n"; - ret+="accuracy2\t"+accuracy2+"\n"; - //ret+="coveredInRest("+coveredInRest.size()+")\t"+coveredInRest+"\n"; - //ret+="possibleNewCandidates("+possibleNewCandidates.size()+")\t"+possibleNewCandidates+"\n"; - //ret+="notCoveredInTotal("+notCoveredInTotal.size()+")\t"+notCoveredInTotal+"\n"; - - return ret; - - } - -} Modified: trunk/src/dl-learner/org/dllearner/scripts/SKOS7030.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/SKOS7030.java 2008-05-26 12:02:10 UTC (rev 928) +++ trunk/src/dl-learner/org/dllearner/scripts/SKOS7030.java 2008-05-26 14:48:00 UTC (rev 929) @@ -47,19 +47,14 @@ static int maxExecutionTimeInSeconds = 30; static int guaranteeXgoodDescriptions = 40; - //static int limit=200; - - - - //examples static int sparqlResultSize=2000; static double percentOfSKOSSet=0.2; static double negfactor=1.0; SortedSet<String> posExamples = new TreeSet<String>(); SortedSet<String> fullPositiveSet = new TreeSet<String>(); - SortedSet<String> fullminusposRest = new TreeSet<String>(); + SortedSet<String> fullPosSetWithoutPosExamples = new TreeSet<String>(); SortedSet<String> negExamples = new TreeSet<String>(); @@ -78,13 +73,14 @@ if(local){ url = "http://139.18.2.37:8890/sparql"; - //RBC sparqlTasks = new SPARQLTasks(Cache.getPersistentCache(),SparqlEndpoint.EndpointLOCALDBpedia()); }else{ url = "http://dbpedia.openlinksw.com:8890/sparql"; sparqlTasks = new SPARQLTasks(Cache.getPersistentCache(),SparqlEndpoint.EndpointDBpedia()); } + System.out.println(sparqlTasks.getDomain("http://dbpedia.org/property/predecessor", 1000)); + String prim="http://dbpedia.org/resource/Category:Prime_Ministers_of_the_United_Kingdom"; String award=("http://dbpedia.org/resource/Category:Best_Actor_Academy_Award_winners"); @@ -92,164 +88,93 @@ SKOS7030 s= new SKOS7030(); s.makeExamples(prim, percentOfSKOSSet, negfactor, sparqlResultSize); - //QUALITY s.posExamples - List<Description> conceptresults = s.learn(s.posExamples, s.negExamples); + + List<Description> conceptresults = s.learn(); logger.debug("found nr of concepts: "+conceptresults.size()); + System.out.println(conceptresults); - for (Description oneConcept : conceptresults) { + int x=0; + + SortedSet<ResultMostCoveredInRest> res = new TreeSet<ResultMostCoveredInRest>(); + for (Description concept : conceptresults) { + if(x++==100)break; + res.add(s.evaluate(concept, 1000)); - //s.evaluate(oneConcept, 1000); + } + + x=0; + for (ResultMostCoveredInRest resultMostCoveredInRest : res) { + if(x++==10)break; + System.out.println(resultMostCoveredInRest.concept); + System.out.println(resultMostCoveredInRest.accuracy); + System.out.println(resultMostCoveredInRest.retrievedInstancesSize); + } + s.print(res.first().concept, 1000); System.out.println("Finished"); JamonMonitorLogger.printAllSortedByLabel(); } - void evaluate(Description oneConcept, int sparqlResultLimit){ - logger.debug("oneconcept: "+oneConcept); - SortedSet<String> instances = sparqlTasks.retrieveInstancesForConcept(oneConcept.toKBSyntaxString(), sparqlResultLimit); + void print(Description concept, int sparqlResultLimit){ + logger.debug("evaluating concept: "+concept); +// SortedSet<String> instances = sparqlTasks.retrieveInstancesForConcept(oneConcept.toKBSyntaxString(), sparqlResultLimit); + SortedSet<String> instances = + sparqlTasks.retrieveInstancesForConceptIncludingSubclasses( + concept.toKBSyntaxString(),sparqlResultLimit); - System.out.println(fullminusposRest.size()); - System.out.println(instances.size()); - - SortedSet<String> coveredInRest = new TreeSet<String>(fullminusposRest); + SortedSet<String> coveredInRest = new TreeSet<String>(fullPosSetWithoutPosExamples); coveredInRest.retainAll(instances); + - System.out.println(fullminusposRest.size()); - System.out.println(instances.size()); - System.out.println(coveredInRest.size()); + SortedSet<String> coveredTotal = new TreeSet<String>(fullPositiveSet); + coveredTotal.retainAll(instances); + SortedSet<String> notCoveredInRest = new TreeSet<String>(fullPosSetWithoutPosExamples); + notCoveredInRest.retainAll(coveredInRest); + System.out.println(notCoveredInRest); - //SortedSet<String> possibleNewCandidates = new TreeSet<String>(); - //SortedSet<String> notCoveredInTotal = new TreeSet<String>(); + SortedSet<String> notCoveredTotal = new TreeSet<String>(fullPositiveSet); + notCoveredTotal.retainAll(coveredTotal); + System.out.println(notCoveredTotal); - } - - static void DBpediaSKOS(String SKOSConcept){ + ResultMostCoveredInRest evaluate(Description concept, int sparqlResultLimit){ + logger.debug("evaluating concept: "+concept); +// SortedSet<String> instances = sparqlTasks.retrieveInstancesForConcept(oneConcept.toKBSyntaxString(), sparqlResultLimit); + SortedSet<String> instances = + sparqlTasks.retrieveInstancesForConceptIncludingSubclasses( + concept.toKBSyntaxString(),sparqlResultLimit); + SortedSet<String> coveredInRest = new TreeSet<String>(fullPosSetWithoutPosExamples); + coveredInRest.retainAll(instances); - //concepts.add("http://dbpedia.org/resource/Category:Grammy_Award_winners"); - //concepts.add("EXISTS \"http://dbpedia.org/property/grammyawards\".TOP"); + SortedSet<String> coveredTotal = new TreeSet<String>(fullPositiveSet); + coveredTotal.retainAll(instances); + SortedSet<String> notCoveredInRest = new TreeSet<String>(fullPosSetWithoutPosExamples); + notCoveredInRest.retainAll(coveredInRest); - //HashMap<String, ResultSet> result = new HashMap<String, ResultSet>(); - //HashMap<String, String> result2 = new HashMap<String, String>(); - //System.out.println(concepts.first()); - //logger.setLevel(Level.TRACE); + SortedSet<String> notCoveredTotal = new TreeSet<String>(fullPositiveSet); + notCoveredTotal.retainAll(coveredTotal); + double acc = (double) (coveredInRest.size() / fullPosSetWithoutPosExamples.size()); + System.out.println("Accuracy: "+acc); + return new ResultMostCoveredInRest(concept,acc,instances.size()); - - -// LearnSparql ls = new LearnSparql(); -// -// //igno.add(oneConcept.replaceAll("\"", "")); -// -// List<Description> conceptresults= ls.learnDBpediaSKOS(posExamples, negExamples, url,new TreeSet<String>(),recursiondepth, closeAfterRecursion,randomizeCache,resultsize,noise); -// -// System.out.println("concepts"+conceptresults); -// //System.exit(0); -// -// SortedSet<ResultCompare> res=new TreeSet<ResultCompare>(); -// for (Description oneConcept : conceptresults) { -// try{ -// -// -// int i=0; -// int a=0; -// for (String oneinst : instances) { -// boolean inRest=false; -// boolean inTotal=false; -// for (String onerest : rest) { -// if(onerest.equalsIgnoreCase(oneinst)) -// { i++; inRest=true; break;} -// -// } -// if (inRest){coveredInRest.add(oneinst);}; -// -// for (String onetotal : totalSKOSset) { -// if(onetotal.equalsIgnoreCase(oneinst)) -// { a++; inTotal=true; break;} -// } -// if(!inRest && !inTotal){ -// possibleNewCandidates.add(oneinst); -// } -// } -// -// for (String onetotal : totalSKOSset) { -// boolean mm=false; -// for (String oneinst : instances) { -// if(onetotal.equalsIgnoreCase(oneinst)){ -// mm=true;break; -// } -// -// } -// if(!mm)notCoveredInTotal.add(onetotal); -// -// } -// -// -// -// double accuracy= (double)i/rest.size(); -// double accuracy2= (double)a/totalSKOSset.size(); -// -// logger.debug((new ResultCompare(oneConcept.toKBSyntaxString(),instances,accuracy,accuracy2,instances.size(), -// coveredInRest,possibleNewCandidates,notCoveredInTotal)).toStringFull()); -// -// //if(instances.size()>=0)System.out.println("size of instances "+instances.size()); -// //if(instances.size()>=0 && instances.size()<100) System.out.println("instances"+instances); -// }catch (Exception e) {e.printStackTrace();} -// } - -// System.out.println(res.last()); -// res.remove(res.last()); -// System.out.println(res.last()); -// res.remove(res.last()); -// System.out.println(res.last()); -// res.remove(res.last()); -// - - //double percent=0.80*(double)res.size();; -// double acc=res.first().accuracy; -// logger.debug(res.first().toStringFull()); -// res.remove(res.first()); -// logger.debug(res.first().toStringFull()); -// res.remove(res.first()); -// int i=0; -// while (res.size()>0){ -// logger.debug(res.first()); -// res.remove(res.first()); -// //if(res.size()<=percent)break; -// if(i>50)break; -// i++; -// -// } -// -// return 0.0; - - - //System.out.println("AAAAAAAA"); - //System.exit(0); - //"relearned concept: "; - //cf.writeSPARQL(confname, posExamples, negExamples, url, new TreeSet<String>(),standardSettings,algorithm); - // - - //Statistics.print(); + } - - - public static void initLogger() { SimpleLayout layout = new SimpleLayout(); @@ -302,7 +227,7 @@ AutomaticNegativeExampleFinderSPARQL aneg = new AutomaticNegativeExampleFinderSPARQL(fullPositiveSet,sparqlTasks); aneg.makeNegativeExamplesFromParallelClasses(posExamples, sparqlResultSize); - SortedSet<String> negativeSet = aneg.getNegativeExamples(neglimit); + this.negExamples = aneg.getNegativeExamples(neglimit); logger.debug("POSITIVE EXAMPLES"); for (String pos : posExamples) { @@ -310,27 +235,35 @@ } logger.debug("NEGATIVE EXAMPLES"); - for (String negs : negativeSet) { + for (String negs : this.negExamples) { logger.debug("-"+negs); } - fullminusposRest = fullPositiveSet; - fullminusposRest.removeAll(posExamples); + fullPosSetWithoutPosExamples = fullPositiveSet; + fullPosSetWithoutPosExamples.removeAll(posExamples); logger.debug(fullPositiveSet); - logger.debug(fullminusposRest); + logger.debug(fullPosSetWithoutPosExamples); } - public List<Description> learn(SortedSet<String> posExamples, SortedSet<String> negExamples){ + public List<Description> learn(){ SortedSet<String> instances = new TreeSet<String>(); - instances.addAll(posExamples); - instances.addAll(negExamples); + instances.addAll(this.posExamples); + instances.addAll(this.negExamples); + logger.info("Start Learning with"); + logger.info("positive examples: \t"+posExamples.size()); + logger.info("negative examples: \t"+negExamples.size()); + logger.info("instances \t"+instances.size()); + + + + ComponentManager cm = ComponentManager.getInstance(); LearningAlgorithm la = null; @@ -344,8 +277,8 @@ rs = new ReasoningService(r); //System.out.println("satisfy: "+rs.isSatisfiable()); lp = new PosNegDefinitionLP(rs); - ((PosNegLP) lp).setPositiveExamples(SetManipulation.stringToInd(posExamples)); - ((PosNegLP) lp).setNegativeExamples(SetManipulation.stringToInd(negExamples)); + ((PosNegLP) lp).setPositiveExamples(SetManipulation.stringToInd(this.posExamples)); + ((PosNegLP) lp).setNegativeExamples(SetManipulation.stringToInd(this.negExamples)); la = cm.learningAlgorithm(ExampleBasedROLComponent.class, lp, rs); @@ -454,8 +387,57 @@ System.out.println(Level.INFO);*/ //System.exit(0); + private class ResultCompare implements Comparable<ResultCompare>{ + Description concept ; + double accuracy = 0.0; + int retrievedInstancesSize=0; + + public int compareTo(ResultCompare o2) { + return 0; + } + public boolean equals(ResultCompare o2){ + return this.concept.equals(o2.concept); + } + + + public ResultCompare(Description conceptKBSyntax, double accuracy, int retrievedInstancesSize) { + super(); + this.concept = conceptKBSyntax; + this.accuracy = accuracy; + this.retrievedInstancesSize = retrievedInstancesSize; + } + + + } + + private class ResultMostCoveredInRest extends ResultCompare{ + + public ResultMostCoveredInRest(Description concept, double accuracy, + int retrievedInstancesSize) { + super(concept, accuracy, retrievedInstancesSize); + + } + public int compareTo(ResultMostCoveredInRest o2) { + if(this.equals(o2))return 0; + + if(this.accuracy > o2.accuracy){ + return 1; + } + else if(this.accuracy == o2.accuracy) { + if(this.retrievedInstancesSize < o2.retrievedInstancesSize ) + return 1; + else if(this.retrievedInstancesSize > o2.retrievedInstancesSize){ + return -1; + } + else return this.concept.toKBSyntaxString().compareTo(o2.concept.toKBSyntaxString()); + }else { + return -1; + } + + } + + } - } Added: trunk/src/dl-learner/org/dllearner/utilities/datastructures/ResultConceptSorter.java =================================================================== --- trunk/src/dl-learner/org/dllearner/utilities/datastructures/ResultConceptSorter.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/utilities/datastructures/ResultConceptSorter.java 2008-05-26 14:48:00 UTC (rev 929) @@ -0,0 +1,89 @@ +package org.dllearner.utilities.datastructures; + +import java.util.SortedSet; + +import org.dllearner.core.owl.Description; + +public class ResultConceptSorter implements Comparable<ResultConceptSorter> { + String concept; + SortedSet<String> instances; + double accuracy; + double accuracy2; + int nrOfInstances; + SortedSet<String> coveredInRest; + SortedSet<String> possibleNewCandidates; + SortedSet<String> notCoveredInTotal; + + + public ResultConceptSorter(String concept, SortedSet<String> instances, double accuracy, + double accuracy2, int nrOfInstances, SortedSet<String> coveredInRest, + SortedSet<String> possibleNewCandidates, SortedSet<String> notCoveredInTotal) { + super(); + this.concept = concept; + this.instances = instances; + this.accuracy = accuracy; + this.accuracy2 = accuracy2; + this.nrOfInstances = nrOfInstances; + this.coveredInRest = coveredInRest; + this.possibleNewCandidates = possibleNewCandidates; + this.notCoveredInTotal = notCoveredInTotal; + } + + + + + public int compareTo(ResultConceptSorter in) { + ResultConceptSorter obj = in; + if(obj.accuracy > this.accuracy) return 1; + else if(obj.accuracy == this.accuracy){ + + if(obj.nrOfInstances<this.nrOfInstances)return 1; + else if(obj.nrOfInstances>this.nrOfInstances)return -1; + else return 1; + //if(obj.nrOfInstances==this.nrOfInstances)return 0; + } + else {//if(obj.accuracy < this.accuracy){ + return -1; + } + + } + + + + + public String toStringFull(){ + String ret=""; + ret+="concept\t"+concept+"\n"; + ret+="instances\t"+instances+"\n"; + ret+="accuracy\t"+accuracy+"\n"; + ret+="nrOfInstances\t"+nrOfInstances+"\n"; + ret+="accuracy2\t"+accuracy2+"\n"; + ret+="coveredInRest("+coveredInRest.size()+")\t"+coveredInRest+"\n"; + ret+="possibleNewCandidates("+possibleNewCandidates.size()+")\t"+possibleNewCandidates+"\n"; + ret+="notCoveredInTotal("+notCoveredInTotal.size()+")\t"+notCoveredInTotal+"\n"; + + return ret; + + } + + @Override + public String toString(){ + String ret=""; + ret+="concept\t"+concept+"\n"; + //ret+="instances\t"+instances+"\n"; + ret+="accuracy\t"+accuracy+"\n"; + ret+="nrOfInstances\t"+nrOfInstances+"\n"; + ret+="accuracy2\t"+accuracy2+"\n"; + //ret+="coveredInRest("+coveredInRest.size()+")\t"+coveredInRest+"\n"; + //ret+="possibleNewCandidates("+possibleNewCandidates.size()+")\t"+possibleNewCandidates+"\n"; + //ret+="notCoveredInTotal("+notCoveredInTotal.size()+")\t"+notCoveredInTotal+"\n"; + + return ret; + + } + + + + + +} Modified: trunk/src/dl-learner/org/dllearner/utilities/examples/AutomaticNegativeExampleFinderSPARQL.java =================================================================== --- trunk/src/dl-learner/org/dllearner/utilities/examples/AutomaticNegativeExampleFinderSPARQL.java 2008-05-26 12:02:10 UTC (rev 928) +++ trunk/src/dl-learner/org/dllearner/utilities/examples/AutomaticNegativeExampleFinderSPARQL.java 2008-05-26 14:48:00 UTC (rev 929) @@ -10,7 +10,7 @@ public class AutomaticNegativeExampleFinderSPARQL { - // CHECK + // LOGGER: ComponentManager private static Logger logger = Logger.getLogger(ComponentManager.class); private SPARQLTasks sparqltasks; @@ -26,7 +26,13 @@ static int poslimit = 10; static int neglimit = 20; - // CHECK separate posexamples and fullposset + + /** + * takes as input a full positive set to make sure no negatives are added as positives + * + * @param fullPositiveSet + * @param SPARQLTasks st + */ public AutomaticNegativeExampleFinderSPARQL( SortedSet<String> fullPositiveSet, SPARQLTasks st) { @@ -38,6 +44,11 @@ + /** + * aggregates all collected neg examples + * @param neglimit + * @return + */ public SortedSet<String> getNegativeExamples(int neglimit ) { SortedSet<String> negatives = new TreeSet<String>(); @@ -50,12 +61,18 @@ return negatives; } - // CHECK namespace + + /** + * makes neg ex from related instances, that take part in a role R(pos,neg) + * filters all objects, that don't use the given namespace + * @param instances + * @param objectNamespace + */ public void makeNegativeExamplesFromRelatedInstances(SortedSet<String> instances, - String namespace) { + String objectNamespace) { logger.debug("making examples from related instances"); for (String oneInstance : instances) { - makeNegativeExamplesFromRelatedInstances(oneInstance, namespace); + makeNegativeExamplesFromRelatedInstances(oneInstance, objectNamespace); } logger.debug("|-negExample size from related: " + fromRelated.size()); } @@ -83,11 +100,16 @@ }*/ + /** + * makes neg ex from classes, the pos ex belong to + * @param positiveSet + * @param resultLimit + */ public void makeNegativeExamplesFromParallelClasses(SortedSet<String> positiveSet, int resultLimit){ makeNegativeExamplesFromClassesOfInstances(positiveSet, resultLimit); } - public void makeNegativeExamplesFromClassesOfInstances(SortedSet<String> positiveSet, + private void makeNegativeExamplesFromClassesOfInstances(SortedSet<String> positiveSet, int resultLimit) { logger.debug("making neg Examples from parallel classes"); SortedSet<String> classes = new TreeSet<String>(); @@ -115,6 +137,11 @@ } + /** + * if pos ex derive from one class, then neg ex are taken from a superclass + * @param concept + * @param resultLimit + */ public void makeNegativeExamplesFromSuperClasses(String concept, int resultLimit) { concept = concept.replaceAll("\"", ""); Modified: trunk/src/dl-learner/org/dllearner/utilities/examples/AutomaticPositiveExampleFinderSPARQL.java =================================================================== --- trunk/src/dl-learner/org/dllearner/utilities/examples/AutomaticPositiveExampleFinderSPARQL.java 2008-05-26 12:02:10 UTC (rev 928) +++ trunk/src/dl-learner/org/dllearner/utilities/examples/AutomaticPositiveExampleFinderSPARQL.java 2008-05-26 14:48:00 UTC (rev 929) @@ -9,7 +9,7 @@ public class AutomaticPositiveExampleFinderSPARQL { - //CHECK + // LOGGER: ComponentManager private static Logger logger = Logger .getLogger(ComponentManager.class); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |