From: <lor...@us...> - 2014-02-16 15:59:46
|
Revision: 4224 http://sourceforge.net/p/dl-learner/code/4224 Author: lorenz_b Date: 2014-02-16 15:59:43 +0000 (Sun, 16 Feb 2014) Log Message: ----------- Added ISLE experiments. Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/celoe/CELOE.java trunk/components-core/src/main/java/org/dllearner/algorithms/celoe/PCELOE.java trunk/components-core/src/main/java/org/dllearner/algorithms/elcopy/ELLearningAlgorithm.java trunk/components-core/src/main/java/org/dllearner/algorithms/elcopy/ELLearningAlgorithmDisjunctive.java trunk/components-core/src/main/java/org/dllearner/algorithms/elcopy/StableHeuristic.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/RelevanceMapGenerator.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/SolrSyntacticIndex.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/AbstractRelevanceMetric.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/PMIRelevanceMetric.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceUtils.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/SignificantPMIRelevanceMetric.java trunk/components-core/src/main/java/org/dllearner/core/EvaluatedDescription.java trunk/components-core/src/main/java/org/dllearner/kb/sparql/ConciseBoundedDescriptionGeneratorImpl.java trunk/components-core/src/main/java/org/dllearner/kb/sparql/SparqlEndpoint.java trunk/components-core/src/main/java/org/dllearner/reasoning/FastInstanceChecker.java trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java trunk/components-core/src/main/java/org/dllearner/refinementoperators/ELDown3.java trunk/components-core/src/main/java/org/dllearner/utilities/examples/AutomaticNegativeExampleFinderSPARQL2.java trunk/components-core/src/test/java/org/dllearner/algorithms/isle/DBpediaExperiment.java trunk/components-core/src/test/java/org/dllearner/algorithms/isle/KnowledgebaseSampleGenerator.java trunk/components-core/src/test/java/org/dllearner/algorithms/isle/metrics/RelevanceMetricsTest.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/celoe/CELOE.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/celoe/CELOE.java 2014-02-13 11:47:52 UTC (rev 4223) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/celoe/CELOE.java 2014-02-16 15:59:43 UTC (rev 4224) @@ -19,7 +19,10 @@ package org.dllearner.algorithms.celoe; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; import java.io.File; +import java.io.FileInputStream; import java.util.Collection; import java.util.Iterator; import java.util.LinkedList; @@ -29,6 +32,7 @@ import java.util.TreeSet; import org.apache.log4j.Logger; +import org.dllearner.algorithms.elcopy.ELLearningAlgorithm; import org.dllearner.core.AbstractCELA; import org.dllearner.core.AbstractHeuristic; import org.dllearner.core.AbstractKnowledgeSource; @@ -45,6 +49,7 @@ import org.dllearner.core.owl.NamedClass; import org.dllearner.core.owl.Restriction; import org.dllearner.core.owl.Thing; +import org.dllearner.kb.OWLAPIOntology; import org.dllearner.kb.OWLFile; import org.dllearner.learningproblems.ClassLearningProblem; import org.dllearner.learningproblems.PosNegLP; @@ -62,9 +67,19 @@ import org.dllearner.utilities.owl.ConceptTransformation; import org.dllearner.utilities.owl.DescriptionMinimizer; import org.dllearner.utilities.owl.EvaluatedDescriptionSet; +import org.dllearner.utilities.owl.OWLEntityTypeAdder; import org.dllearner.utilities.owl.PropertyContext; +import org.semanticweb.owlapi.apibinding.OWLManager; +import org.semanticweb.owlapi.model.OWLOntology; import org.springframework.beans.factory.annotation.Autowired; +import com.hp.hpl.jena.query.Query; +import com.hp.hpl.jena.query.QueryExecutionFactory; +import com.hp.hpl.jena.query.QueryFactory; +import com.hp.hpl.jena.query.ResultSetFormatter; +import com.hp.hpl.jena.query.Syntax; +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.ModelFactory; import com.jamonapi.Monitor; import com.jamonapi.MonitorFactory; @@ -578,6 +593,7 @@ // System.out.println("refining: " + node); int horizExp = node.getHorizontalExpansion(); TreeSet<Description> refinements = (TreeSet<Description>) operator.refine(node.getDescription(), horizExp+1); + System.out.println(refinements); node.incHorizontalExpansion(); node.setRefinementCount(refinements.size()); nodes.add(node); @@ -1120,16 +1136,44 @@ } public static void main(String[] args) throws Exception{ - AbstractKnowledgeSource ks = new OWLFile("../examples/family/father_oe.owl"); + String cls = "http://purl.org/procurement/public-contracts#Tender"; + String file = "/home/me/work/datasets/e-procurement/dl-learner-sample-with-classes-pco.rdf"; + Model model = ModelFactory.createDefaultModel(); + model.read(new FileInputStream(file), null); + OWLEntityTypeAdder.addEntityTypes(model); + Query query = QueryFactory.create("SELECT (COUNT(distinct ?s) as ?cnt) WHERE {" + + "?s a <" + cls + ">.}", Syntax.syntaxARQ); + System.out.println(ResultSetFormatter.asText(QueryExecutionFactory.create(query, model).execSelect())); + query = QueryFactory.create("SELECT ?p (COUNT(distinct ?s) AS ?cnt) WHERE {" + + "?s ?p ?o. ?p a <http://www.w3.org/2002/07/owl#ObjectProperty>." + + "?s a <" + cls + ">. " + +// "OPTIONAL{?x ?p ?o1. " + +// "FILTER NOT EXISTS{?x a <" + cls + ">.}}" + + + "}GROUP BY ?p ORDER BY DESC(?cnt)", Syntax.syntaxARQ); + + System.out.println(ResultSetFormatter.asText(QueryExecutionFactory.create(query, model).execSelect())); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + model.write(baos, "TURTLE"); + OWLOntology ontology = OWLManager.createOWLOntologyManager().loadOntologyFromOntologyDocument(new ByteArrayInputStream(baos.toByteArray())); + + AbstractKnowledgeSource ks = new OWLAPIOntology(ontology); ks.init(); AbstractReasonerComponent rc = new FastInstanceChecker(ks); rc.init(); ClassLearningProblem lp = new ClassLearningProblem(rc); - lp.setClassToDescribe(new NamedClass("http://example.com/father#father")); + lp.setClassToDescribe(new NamedClass("http://purl.org/procurement/public-contracts#Tender")); lp.init(); +// ELLearningAlgorithm alg = new ELLearningAlgorithm(lp, rc); +// alg.setNoisePercentage(30); +// alg.setClassToDescribe(new NamedClass("http://purl.org/procurement/public-contracts#Tender")); +// alg.init(); + + CELOE alg = new CELOE(lp, rc); alg.setMaxExecutionTimeInSeconds(10); alg.init(); Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/celoe/PCELOE.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/celoe/PCELOE.java 2014-02-13 11:47:52 UTC (rev 4223) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/celoe/PCELOE.java 2014-02-16 15:59:43 UTC (rev 4224) @@ -117,7 +117,6 @@ // private TreeSet<Description> descriptions; private SortedSet<Description> descriptions; - private EvaluatedDescriptionSet bestEvaluatedDescriptions; // if true, then each solution is evaluated exactly instead of approximately // private boolean exactBestDescriptionEvaluation = false; Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/elcopy/ELLearningAlgorithm.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/elcopy/ELLearningAlgorithm.java 2014-02-13 11:47:52 UTC (rev 4223) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/elcopy/ELLearningAlgorithm.java 2014-02-16 15:59:43 UTC (rev 4224) @@ -47,6 +47,7 @@ import org.dllearner.learningproblems.ScorePosNeg; import org.dllearner.refinementoperators.ELDown3; import org.dllearner.utilities.Helper; +import org.dllearner.utilities.owl.EvaluatedDescriptionSet; import com.jamonapi.Monitor; import com.jamonapi.MonitorFactory; @@ -88,6 +89,10 @@ @ConfigOption(name = "startClass", defaultValue="owl:Thing", description="You can specify a start class for the algorithm. To do this, you have to use Manchester OWL syntax without using prefixes.") private Description startClass; + private int maxClassExpressionDepth = 2; + + private int maxNrOfResults = 10; + private Set<NamedClass> ignoredConcepts = null; private NamedClass classToDescribe; @@ -155,8 +160,11 @@ } operator = new ELDown3(reasoner, instanceBasedDisjoints); + operator.setMaxClassExpressionDepth(maxClassExpressionDepth); noise = noisePercentage/100d; + + bestEvaluatedDescriptions = new EvaluatedDescriptionSet(maxNrOfResults); } @Override @@ -229,7 +237,8 @@ } else { node.setCoveredNegatives(negCovers); } - node.setScore(accuracy); +// node.setScore(accuracy); +// System.out.println(description + ":" + accuracy); // link to parent (unless start node) if(parentNode == null) { startNode = node; @@ -481,5 +490,19 @@ public void setTreeSearchTimeSeconds(double treeSearchTimeSeconds) { this.treeSearchTimeSeconds = treeSearchTimeSeconds; } + + /** + * @param maxNrOfResults the maxNrOfResults to set + */ + public void setMaxNrOfResults(int maxNrOfResults) { + this.maxNrOfResults = maxNrOfResults; + } + + /** + * @param maxClassExpressionDepth the maxClassExpressionDepth to set + */ + public void setMaxClassExpressionDepth(int maxClassExpressionDepth) { + this.maxClassExpressionDepth = maxClassExpressionDepth; + } } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/elcopy/ELLearningAlgorithmDisjunctive.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/elcopy/ELLearningAlgorithmDisjunctive.java 2014-02-13 11:47:52 UTC (rev 4223) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/elcopy/ELLearningAlgorithmDisjunctive.java 2014-02-16 15:59:43 UTC (rev 4224) @@ -242,9 +242,13 @@ // form union of trees found so far with if(treeCount==0) { bestEvaluatedDescription = learningProblem.evaluate(bestDescription); + bestEvaluatedDescriptions.add(bestEvaluatedDescription); } else { - bestCombinedDescription = new Union(bestEvaluatedDescription.getDescription(), bestDescription); + if(!bestEvaluatedDescription.equals(Thing.instance)){ + bestCombinedDescription = new Union(bestEvaluatedDescription.getDescription(), bestDescription); + } bestEvaluatedDescription = learningProblem.evaluate(bestCombinedDescription); + bestEvaluatedDescriptions.add(bestEvaluatedDescription); } // remove already covered examples @@ -405,7 +409,7 @@ // return (bestNode.getCoveredNegatives() == 0); // stop if there are no more positive examples to cover - if(currentPosExamples.size()==0) { + if(stopOnFirstDefinition && currentPosExamples.size()==0) { return true; } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/elcopy/StableHeuristic.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/elcopy/StableHeuristic.java 2014-02-13 11:47:52 UTC (rev 4223) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/elcopy/StableHeuristic.java 2014-02-16 15:59:43 UTC (rev 4224) @@ -36,8 +36,8 @@ @Override public int compare(SearchTreeNode o1, SearchTreeNode o2) { - double diff = o2.getScore() - o1.getScore(); -// diff = -diff; + int diff = o2.getCoveredNegatives() - o1.getCoveredNegatives(); +// diff = Double.compare(o1.getScore(), o2.getScore()); if(diff>0) { return 1; } else if(diff<0) { Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/RelevanceMapGenerator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/RelevanceMapGenerator.java 2014-02-13 11:47:52 UTC (rev 4223) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/RelevanceMapGenerator.java 2014-02-16 15:59:43 UTC (rev 4224) @@ -8,6 +8,11 @@ import java.io.ObjectOutputStream; import java.io.UnsupportedEncodingException; import java.net.URLEncoder; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; import org.apache.log4j.Logger; @@ -17,7 +22,6 @@ import org.dllearner.core.owl.NamedClass; import org.semanticweb.owlapi.model.OWLOntology; -import com.google.common.hash.HashCode; import com.google.common.hash.HashFunction; import com.google.common.hash.Hashing; @@ -35,12 +39,13 @@ public static String cacheDirectory = "cache/relevance"; public static Map<Entity, Double> generateRelevanceMap(NamedClass cls, OWLOntology ontology, RelevanceMetric relevanceMetric, boolean cached){ + logger.info("Relevance Metric: " + relevanceMetric.getClass().getSimpleName()); Map<Entity, Double> relevanceMap = null; File folder = new File(cacheDirectory); folder.mkdirs(); File file = null; try { - file = new File(folder, URLEncoder.encode(cls.getName(), "UTF-8") + ".rel"); + file = new File(folder, URLEncoder.encode(cls.getName() + "-" + relevanceMetric.getClass().getSimpleName(), "UTF-8") + ".rel"); } catch (UnsupportedEncodingException e2) { e2.printStackTrace(); } @@ -69,6 +74,22 @@ return relevanceMap; } + public static Map<RelevanceMetric, Map<Entity, Double>> generateRelevanceMaps(NamedClass cls, OWLOntology ontology, List<RelevanceMetric> relevanceMetrics, boolean cached){ + Map<RelevanceMetric, Map<Entity, Double>> metric2Map = new LinkedHashMap<>(); + for (RelevanceMetric relevanceMetric : relevanceMetrics) { + try { + long start = System.currentTimeMillis(); + metric2Map.put(relevanceMetric, generateRelevanceMap(cls, ontology, relevanceMetric, cached)); + long end = System.currentTimeMillis(); + logger.info("Operation took " + (end - start) + "ms"); + + } catch (Exception e) { + e.printStackTrace(); + } + } + return metric2Map; + } + public static Map<Entity, Double> generateRelevanceMap(NamedClass cls, OWLOntology ontology, RelevanceMetric relevanceMetric){ return generateRelevanceMap(cls, ontology, relevanceMetric, false); } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/SolrSyntacticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/SolrSyntacticIndex.java 2014-02-13 11:47:52 UTC (rev 4223) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/SolrSyntacticIndex.java 2014-02-16 15:59:43 UTC (rev 4224) @@ -4,14 +4,24 @@ package org.dllearner.algorithms.isle.index.syntactic; import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.ObjectOutputStream; +import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.TreeSet; import java.util.Map.Entry; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; import java.util.Set; +import org.apache.log4j.Logger; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrServer; import org.apache.solr.client.solrj.SolrServerException; @@ -28,11 +38,14 @@ import org.dllearner.algorithms.isle.textretrieval.RDFSLabelEntityTextRetriever; import org.dllearner.core.owl.Entity; import org.dllearner.core.owl.NamedClass; +import org.dllearner.core.owl.ObjectProperty; +import org.dllearner.utilities.owl.OWLAPIConverter; import org.semanticweb.owlapi.apibinding.OWLManager; +import org.semanticweb.owlapi.model.OWLEntity; import org.semanticweb.owlapi.model.OWLOntology; -import org.semanticweb.owlapi.model.OWLOntologyCreationException; import com.google.common.base.Joiner; +import com.google.common.collect.Sets; /** * @author Lorenz Buehmann @@ -40,6 +53,8 @@ */ public class SolrSyntacticIndex implements Index{ + private static final Logger logger = Logger.getLogger(SolrSyntacticIndex.class.getName()); + private SolrServer solr; private AnnotationEntityTextRetriever textRetriever; private String searchField; @@ -48,13 +63,78 @@ long totalNumberOfDocuments = -1; Map<Entity, Long> cache = new HashMap<>(); + Map<List<Entity>, Long> cache2 = new HashMap<>(); + private OWLOntology ontology; public SolrSyntacticIndex(OWLOntology ontology, String solrServerURL, String searchField) { + this.ontology = ontology; this.searchField = searchField; solr = new HttpSolrServer(solrServerURL); textRetriever = new RDFSLabelEntityTextRetriever(ontology); } + public void buildIndex(Set<NamedClass> classes){ + logger.info("Building cache..."); + + ExecutorService executor = Executors.newFixedThreadPool(6); + + final Set<OWLEntity> owlEntities = new TreeSet<OWLEntity>(); + owlEntities.addAll(ontology.getClassesInSignature()); + owlEntities.addAll(ontology.getDataPropertiesInSignature()); + owlEntities.addAll(ontology.getObjectPropertiesInSignature()); + + final Map<Set<Entity>, Long> cache = new HashMap<>(); + + + for (final NamedClass cls : classes) { + executor.submit(new Runnable() { + + @Override + public void run() { + Set<Entity> entities; + logger.info(cls); + Set<Entity> otherEntities = OWLAPIConverter.getEntities(owlEntities); + otherEntities.remove(cls); + //fA + long fA = getNumberOfDocumentsFor(cls); + entities = new HashSet<>(); + entities.add(cls); + cache.put(entities, fA); + for (Entity entity : otherEntities) { + //fB + long fB = getNumberOfDocumentsFor(entity); + entities = new HashSet<>(); + entities.add(entity); + cache.put(entities, fB); + //fAB + long fAB = getNumberOfDocumentsFor(cls, entity); + entities = new HashSet<>(); + entities.add(cls); + entities.add(entity); + cache.put(entities, fAB); + } + } + }); + } + executor.shutdown(); + try { + executor.awaitTermination(10, TimeUnit.DAYS); + } catch (InterruptedException e) { + e.printStackTrace(); + } + try { + ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream("frequencies.obj")); + oos.writeObject(cache); + } catch (FileNotFoundException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } + + } + + + /* (non-Javadoc) * @see org.dllearner.algorithms.isle.index.Index#getDocuments(org.dllearner.core.owl.Entity) */ @@ -109,7 +189,7 @@ * @see org.dllearner.algorithms.isle.index.Index#getNumberOfDocumentsFor(org.dllearner.core.owl.Entity) */ @Override - public long getNumberOfDocumentsFor(Entity entity) { + public synchronized long getNumberOfDocumentsFor(Entity entity) { if(cache.containsKey(entity)){ return cache.get(entity); } @@ -146,7 +226,11 @@ * @see org.dllearner.algorithms.isle.index.Index#getNumberOfDocumentsFor(org.dllearner.core.owl.Entity[]) */ @Override - public long getNumberOfDocumentsFor(Entity... entities) { + public synchronized long getNumberOfDocumentsFor(Entity... entities) { + List<Entity> entitiesList = Arrays.asList(entities); + if(cache2.containsKey(entitiesList)){ + return cache2.get(entitiesList); + } Set<String> queryStringParts = new HashSet<>(); @@ -177,6 +261,7 @@ try { QueryResponse response = solr.query(query); SolrDocumentList list = response.getResults(); + cache2.put(entitiesList, list.getNumFound()); return list.getNumFound(); } catch (SolrServerException e) { e.printStackTrace(); @@ -234,6 +319,11 @@ System.out.println(n); n = index.getNumberOfDocumentsForTyped(new NamedClass("http://dbpedia.org/ontology/Person"), new NamedClass("http://dbpedia.org/ontology/birthPlace")); System.out.println(n); + + System.out.println(index.getNumberOfDocumentsFor( + new NamedClass("http://dbpedia.org/ontology/Person"), new ObjectProperty("http://dbpedia.org/ontology/birthPlace"))); + System.out.println(index.getNumberOfDocumentsFor( + new NamedClass("http://dbpedia.org/ontology/Person"), new ObjectProperty("http://dbpedia.org/ontology/birthPlace"))); } } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/AbstractRelevanceMetric.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/AbstractRelevanceMetric.java 2014-02-13 11:47:52 UTC (rev 4223) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/AbstractRelevanceMetric.java 2014-02-16 15:59:43 UTC (rev 4224) @@ -7,7 +7,7 @@ import java.util.Map; import org.dllearner.algorithms.isle.index.Index; -import org.semanticweb.owlapi.model.OWLEntity; +import org.dllearner.core.owl.Entity; /** * @author Lorenz Buehmann @@ -20,33 +20,32 @@ public AbstractRelevanceMetric(Index index) { this.index = index; } - - public Map<OWLEntity,Double> normalizeMinMax( Map<OWLEntity,Double> hmEntity2Score ){ - Map<OWLEntity,Double> hmEntity2Norm = new HashMap<OWLEntity,Double>(); - double dMin = Double.MAX_VALUE; - Double dMax = Double.MIN_VALUE; - for( OWLEntity e : hmEntity2Score.keySet() ) - { - double dValue = hmEntity2Score.get(e); - if( dValue < dMin ){ - dMin = dValue; + + public static Map<Entity, Double> normalizeMinMax(Map<Entity, Double> hmEntity2Score) { + Map<Entity, Double> hmEntity2Norm = new HashMap<Entity, Double>(); + + double min = Double.MAX_VALUE; + double max = Double.MIN_VALUE; + + for (Entity e : hmEntity2Score.keySet()) { + double value = hmEntity2Score.get(e); + if (value < min) { + min = value; + } else if (value > max) { + max = value; } - else if( dValue > dMax ){ - dMax = dValue; - } } // System.out.println( "min="+ dMin +" max="+ dMax ); - for( OWLEntity e : hmEntity2Score.keySet() ) - { - double dValue = hmEntity2Score.get(e); - double dNorm = 0; - if( dMin == dMax ){ - dNorm = dValue; - } - else { - dNorm = ( dValue - dMin ) / ( dMax - dMin ); + for (Entity e : hmEntity2Score.keySet()) { + double value = hmEntity2Score.get(e); + double normalized = 0; + if (min == max) { + normalized = value; + normalized = 0.5; + } else { + normalized = (value - min) / (max - min); } - hmEntity2Norm.put( e, dNorm ); + hmEntity2Norm.put(e, normalized); } return hmEntity2Norm; } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/PMIRelevanceMetric.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/PMIRelevanceMetric.java 2014-02-13 11:47:52 UTC (rev 4223) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/PMIRelevanceMetric.java 2014-02-16 15:59:43 UTC (rev 4224) @@ -17,7 +17,7 @@ } @Override - public synchronized double getRelevance(Entity entityA, Entity entityB){ + public double getRelevance(Entity entityA, Entity entityB){ long nrOfDocumentsA = index.getNumberOfDocumentsFor(entityA); long nrOfDocumentsB = index.getNumberOfDocumentsFor(entityB); long nrOfDocumentsAB = index.getNumberOfDocumentsFor(entityA, entityB); @@ -28,13 +28,17 @@ double pB = nrOfDocuments == 0 ? 0 : ((double) nrOfDocumentsB / (double) nrOfDocuments); double pAB = nrOfDocuments == 0 ? 0 : ((double) nrOfDocumentsAB / (double) nrOfDocuments); + if(pAB == 0 || (pA * pB) == 0){ + return 0; + } + double pmi = Math.log(pAB / pA * pB); return pmi; } @Override - public synchronized double getNormalizedRelevance(Entity entityA, Entity entityB){ + public double getNormalizedRelevance(Entity entityA, Entity entityB){ long nrOfDocumentsA = index.getNumberOfDocumentsFor(entityA); long nrOfDocumentsB = index.getNumberOfDocumentsFor(entityB); long nrOfDocumentsAB = index.getNumberOfDocumentsFor(entityA, entityB); Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceUtils.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceUtils.java 2014-02-13 11:47:52 UTC (rev 4223) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceUtils.java 2014-02-16 15:59:43 UTC (rev 4224) @@ -3,10 +3,12 @@ */ package org.dllearner.algorithms.isle.metrics; +import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; +import java.util.TreeSet; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; @@ -25,6 +27,7 @@ private static final Logger logger = Logger.getLogger(RelevanceUtils.class.getName()); static int maxNrOfThreads = Math.max(1, Runtime.getRuntime().availableProcessors() - 1); + static boolean normalize = true; /** * Returns a map containing the relevance score based on the given metric between the entity and each other entity. @@ -43,8 +46,9 @@ executor.submit(new Runnable() { @Override public void run() { - double relevance = metric.getNormalizedRelevance(entity, otherEntity); - logger.info(otherEntity + ":" + relevance); +// double relevance = metric.getNormalizedRelevance(entity, otherEntity); + double relevance = metric.getRelevance(entity, otherEntity); +// logger.info(otherEntity + ":" + relevance); relevantEntities.put(otherEntity, relevance); } }); @@ -55,17 +59,21 @@ } catch (InterruptedException e) { e.printStackTrace(); } - + //normalize the values + if(normalize){ + return AbstractRelevanceMetric.normalizeMinMax(relevantEntities); + } return relevantEntities; } public static Map<Entity, Double> getRelevantEntities(Entity entity, OWLOntology ontology, RelevanceMetric metric){ - Set<OWLEntity> owlEntities = new HashSet<OWLEntity>(); + Set<OWLEntity> owlEntities = new TreeSet<OWLEntity>(); owlEntities.addAll(ontology.getClassesInSignature()); owlEntities.addAll(ontology.getDataPropertiesInSignature()); owlEntities.addAll(ontology.getObjectPropertiesInSignature()); Set<Entity> otherEntities = OWLAPIConverter.getEntities(owlEntities); +// Set<Entity> otherEntities = OWLAPIConverter.getEntities(new HashSet<OWLEntity>(new ArrayList<OWLEntity>(owlEntities).subList(0, 20))); otherEntities.remove(entity); return getRelevantEntities(entity, otherEntities, metric); Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/SignificantPMIRelevanceMetric.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/SignificantPMIRelevanceMetric.java 2014-02-13 11:47:52 UTC (rev 4223) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/SignificantPMIRelevanceMetric.java 2014-02-16 15:59:43 UTC (rev 4224) @@ -34,6 +34,10 @@ double N = index.getTotalNumberOfDocuments(); + if(fA == 0 || fB == 0 || fAB == 0){ + return 0; + } + double pmi = Math.log(fAB / (fA*fB/N + Math.sqrt(fA)*Math.sqrt(Math.log(delta)/-2))); return pmi; Modified: trunk/components-core/src/main/java/org/dllearner/core/EvaluatedDescription.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/core/EvaluatedDescription.java 2014-02-13 11:47:52 UTC (rev 4223) +++ trunk/components-core/src/main/java/org/dllearner/core/EvaluatedDescription.java 2014-02-16 15:59:43 UTC (rev 4224) @@ -24,6 +24,7 @@ import org.dllearner.core.owl.Description; import org.dllearner.kb.sparql.SparqlQueryDescriptionConvertVisitor; +import org.dllearner.utilities.owl.ConceptComparator; import org.dllearner.utilities.owl.OWLAPIDescriptionConvertVisitor; import org.dllearner.utilities.owl.OWLAPIRenderers; import org.json.JSONException; @@ -37,12 +38,13 @@ * @author Jens Lehmann * */ -public class EvaluatedDescription implements Serializable{ +public class EvaluatedDescription implements Serializable, Comparable<EvaluatedDescription>{ /** * */ private static final long serialVersionUID = 1106431570510815033L; + private static ConceptComparator conceptComparator = new ConceptComparator(); protected Description description; protected Score score; @@ -139,4 +141,16 @@ return description.toString() + " " + dfPercent.format(getAccuracy()); } + /* (non-Javadoc) + * @see java.lang.Comparable#compareTo(java.lang.Object) + */ + @Override + public int compareTo(EvaluatedDescription o) { + int diff = Double.compare(score.getAccuracy(), o.score.getAccuracy()); + if(diff == 0){ + conceptComparator.compare(description, o.getDescription()); + } + return diff; + } + } Modified: trunk/components-core/src/main/java/org/dllearner/kb/sparql/ConciseBoundedDescriptionGeneratorImpl.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/kb/sparql/ConciseBoundedDescriptionGeneratorImpl.java 2014-02-13 11:47:52 UTC (rev 4223) +++ trunk/components-core/src/main/java/org/dllearner/kb/sparql/ConciseBoundedDescriptionGeneratorImpl.java 2014-02-16 15:59:43 UTC (rev 4224) @@ -61,6 +61,12 @@ qef = new QueryExecutionFactoryPaginated(qef, 10000); } + public ConciseBoundedDescriptionGeneratorImpl(Model model, int maxRecursionDepth) { + this.maxRecursionDepth = maxRecursionDepth; + + qef = new QueryExecutionFactoryModel(model); + } + public ConciseBoundedDescriptionGeneratorImpl(SparqlEndpoint endpoint, String cacheDir) { this(endpoint, cacheDir, MAX_RECURSION_DEPTH_DEFAULT); } Modified: trunk/components-core/src/main/java/org/dllearner/kb/sparql/SparqlEndpoint.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/kb/sparql/SparqlEndpoint.java 2014-02-13 11:47:52 UTC (rev 4223) +++ trunk/components-core/src/main/java/org/dllearner/kb/sparql/SparqlEndpoint.java 2014-02-16 15:59:43 UTC (rev 4224) @@ -504,6 +504,43 @@ } return new SparqlEndpoint(u); } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((defaultGraphURIs == null) ? 0 : defaultGraphURIs.hashCode()); + result = prime * result + ((namedGraphURIs == null) ? 0 : namedGraphURIs.hashCode()); + result = prime * result + ((url == null) ? 0 : url.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + SparqlEndpoint other = (SparqlEndpoint) obj; + if (defaultGraphURIs == null) { + if (other.defaultGraphURIs != null) + return false; + } else if (!defaultGraphURIs.equals(other.defaultGraphURIs)) + return false; + if (namedGraphURIs == null) { + if (other.namedGraphURIs != null) + return false; + } else if (!namedGraphURIs.equals(other.namedGraphURIs)) + return false; + if (url == null) { + if (other.url != null) + return false; + } else if (!url.equals(other.url)) + return false; + return true; + } Modified: trunk/components-core/src/main/java/org/dllearner/reasoning/FastInstanceChecker.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/reasoning/FastInstanceChecker.java 2014-02-13 11:47:52 UTC (rev 4223) +++ trunk/components-core/src/main/java/org/dllearner/reasoning/FastInstanceChecker.java 2014-02-16 15:59:43 UTC (rev 4224) @@ -554,7 +554,9 @@ DatatypeSomeRestriction dsr = (DatatypeSomeRestriction) description; DatatypeProperty dp = (DatatypeProperty) dsr.getRestrictedPropertyExpression(); DataRange dr = dsr.getDataRange(); - if(dr.isDatatype() && ((Datatype)dr).isTopDatatype()){ + if(dr.isDatatype() +// && ((Datatype)dr).isTopDatatype() + ){ if(dpPos.get(dp).containsKey(individual)){ return true; } else { Modified: trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java 2014-02-13 11:47:52 UTC (rev 4223) +++ trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java 2014-02-16 15:59:43 UTC (rev 4224) @@ -82,6 +82,7 @@ import com.hp.hpl.jena.query.QueryExecution; import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.query.ResultSetFormatter; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; import com.hp.hpl.jena.rdf.model.RDFNode; @@ -657,7 +658,7 @@ String query = String.format( "SELECT ?type WHERE {<%s> a ?type . " + "FILTER NOT EXISTS{<%s> a ?moreSpecificType ." - + "?moreSpecificType <http://www.w3.org/2000/01/rdf-schema#subClassOf>+ ?type.}}", individual.getName(), individual.getName()); + + "?moreSpecificType <http://www.w3.org/2000/01/rdf-schema#subClassOf> ?type.}}", individual.getName(), individual.getName()); ResultSet rs = executeSelectQuery(query); QuerySolution qs; while(rs.hasNext()){ @@ -858,7 +859,8 @@ Set<NamedClass> siblings = new TreeSet<NamedClass>(); String query = "SELECT ?sub WHERE { <" + cls.getName() + "> <http://www.w3.org/2000/01/rdf-schema#subClassOf> ?super ."; query += "?sub <http://www.w3.org/2000/01/rdf-schema#subClassOf> ?super ."; - query += "FILTER( !SAMETERM(?sub, <" + cls.getName() + ">)) . }"; +// query += "FILTER NOT EXISTS{?sub2 <http://www.w3.org/2000/01/rdf-schema#subClassOf> ?super. ?sub <http://www.w3.org/2000/01/rdf-schema#subClassOf> ?super.}"; + query += "FILTER( !SAMETERM(?sub, <" + cls.getName() + ">) && !SAMETERM(?super, <http://www.w3.org/2000/01/rdf-schema#Resource>)) . }"; ResultSet rs = executeSelectQuery(query); QuerySolution qs; while(rs.hasNext()){ Modified: trunk/components-core/src/main/java/org/dllearner/refinementoperators/ELDown3.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/refinementoperators/ELDown3.java 2014-02-13 11:47:52 UTC (rev 4223) +++ trunk/components-core/src/main/java/org/dllearner/refinementoperators/ELDown3.java 2014-02-16 15:59:43 UTC (rev 4224) @@ -111,6 +111,8 @@ private ELDescriptionTreeComparator treeComp = new ELDescriptionTreeComparator(); private ELDescriptionEdgeComparator edgeComp = new ELDescriptionEdgeComparator(); private TreeAndRoleSetComparator mComp = new TreeAndRoleSetComparator(); + + private int maxClassExpressionDepth = 2; public ELDown3(AbstractReasonerComponent rs) { this(rs, true); @@ -180,7 +182,7 @@ refinements.addAll(refineLabel(tree, v, position)); } refinements.addAll(refineEdge(tree, v, position)); - if(v.isClassNode() && v.getLevel() < 4){ + if(v.isClassNode() && v.getLevel() <= maxClassExpressionDepth){ refinements.addAll(attachSubtree2(tree, v, position)); refinements.addAll(attachSubtreeDatatypeProperties(tree, v, position)); } @@ -655,6 +657,13 @@ } return false; } + + /** + * @param maxClassExpressionDepth the maxClassExpressionDepth to set + */ + public void setMaxClassExpressionDepth(int maxClassExpressionDepth) { + this.maxClassExpressionDepth = maxClassExpressionDepth; + } @Override public void init() throws ComponentInitException { Modified: trunk/components-core/src/main/java/org/dllearner/utilities/examples/AutomaticNegativeExampleFinderSPARQL2.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/utilities/examples/AutomaticNegativeExampleFinderSPARQL2.java 2014-02-13 11:47:52 UTC (rev 4223) +++ trunk/components-core/src/main/java/org/dllearner/utilities/examples/AutomaticNegativeExampleFinderSPARQL2.java 2014-02-16 15:59:43 UTC (rev 4224) @@ -194,6 +194,7 @@ Set<Description> superClasses = sr.getSuperClasses(nc); superClasses.remove(new NamedClass(Thing.instance.getURI())); superClasses.remove(Thing.instance); + superClasses.remove(new NamedClass("http://www.w3.org/2000/01/rdf-schema#Resource")); superClasses = filterByNamespace(superClasses); logger.info("Super classes: " + superClasses); @@ -211,7 +212,7 @@ logger.info("Negative examples(" + superClassNegativeExamples.size() + "): " + superClassNegativeExamples); negativeExamples.addAll(superClassNegativeExamples); } else if(strategy == RANDOM){//get some random examples - + String query = "SELECT ?s WHERE {?s a ?type. FILTER NOT EXIST{?type rdfs:subClassOf* }}"; } } return negativeExamples; Modified: trunk/components-core/src/test/java/org/dllearner/algorithms/isle/DBpediaExperiment.java =================================================================== --- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/DBpediaExperiment.java 2014-02-13 11:47:52 UTC (rev 4223) +++ trunk/components-core/src/test/java/org/dllearner/algorithms/isle/DBpediaExperiment.java 2014-02-16 15:59:43 UTC (rev 4224) @@ -9,9 +9,17 @@ import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.File; +import java.io.FileNotFoundException; import java.io.IOException; +import java.io.InputStream; import java.io.UnsupportedEncodingException; +import java.net.MalformedURLException; +import java.net.URL; import java.net.URLEncoder; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.PreparedStatement; +import java.sql.SQLException; import java.text.DecimalFormat; import java.util.ArrayList; import java.util.Arrays; @@ -23,19 +31,27 @@ import java.util.Map; import java.util.Set; import java.util.SortedSet; +import java.util.TreeSet; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; +import java.util.prefs.Preferences; import org.apache.log4j.Logger; -import org.coode.owlapi.rdfxml.parser.DataSomeValuesFromTranslator; import org.dllearner.algorithms.celoe.CELOE; -import org.dllearner.algorithms.el.ELLearningAlgorithmDisjunctive; import org.dllearner.algorithms.elcopy.ELLearningAlgorithm; import org.dllearner.algorithms.isle.index.Index; import org.dllearner.algorithms.isle.index.RelevanceMapGenerator; import org.dllearner.algorithms.isle.index.syntactic.SolrSyntacticIndex; +import org.dllearner.algorithms.isle.metrics.ChiSquareRelevanceMetric; +import org.dllearner.algorithms.isle.metrics.DiceRelevanceMetric; +import org.dllearner.algorithms.isle.metrics.JaccardRelevanceMetric; +import org.dllearner.algorithms.isle.metrics.LLRRelevanceMetric; import org.dllearner.algorithms.isle.metrics.PMIRelevanceMetric; import org.dllearner.algorithms.isle.metrics.RelevanceMetric; +import org.dllearner.algorithms.isle.metrics.SCIRelevanceMetric; +import org.dllearner.algorithms.isle.metrics.SignificantPMIRelevanceMetric; +import org.dllearner.algorithms.isle.metrics.TTestRelevanceMetric; import org.dllearner.core.AbstractCELA; import org.dllearner.core.AbstractLearningProblem; import org.dllearner.core.AbstractReasonerComponent; @@ -65,6 +81,8 @@ import org.dllearner.refinementoperators.RhoDRDown; import org.dllearner.utilities.PrefixCCMap; import org.dllearner.utilities.examples.AutomaticNegativeExampleFinderSPARQL2; +import org.ini4j.IniPreferences; +import org.ini4j.InvalidFileFormatException; import org.semanticweb.owlapi.apibinding.OWLManager; import org.semanticweb.owlapi.model.AxiomType; import org.semanticweb.owlapi.model.IRI; @@ -78,6 +96,7 @@ import com.google.common.base.Charsets; import com.google.common.collect.Sets; +import com.google.common.hash.HashCode; import com.google.common.hash.HashFunction; import com.google.common.hash.Hashing; import com.google.common.io.Files; @@ -108,9 +127,9 @@ private static final Logger logger = Logger.getLogger(DBpediaExperiment.class.getName()); private DecimalFormat dfPercent = new DecimalFormat("0.00%"); - HashFunction hf = Hashing.md5(); + final HashFunction hf = Hashing.md5(); - SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpedia(); + SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpediaLiveAKSW(); String namespace = "http://dbpedia.org/ontology/"; OWLOntology schema; @@ -120,7 +139,7 @@ String cacheDirectory = "cache/isle"; String testFolder = "experiments/isle/logs/"; - private SPARQLReasoner reasoner; + private SPARQLReasoner sparqlReasoner; private AutomaticNegativeExampleFinderSPARQL2 negativeExampleFinder; final int maxNrOfPositiveExamples = 100; @@ -129,25 +148,33 @@ int maxCBDDepth = 1; //learning algorithm settings - private int maxNrOfResults = 50; + private int maxNrOfResults = 100; private int maxExecutionTimeInSeconds = 10; - private double noiseInPercentage = 70; + private double noiseInPercentage = 50; private boolean useNegation = false; private boolean useAllConstructor = false; - private RelevanceMetric relevanceMetric; - String experimentsFolder = "experiments/isle/"; File resultsFolder = new File(experimentsFolder + "result/"); private boolean useEL = true; private boolean forceLongDescriptions = true; + + private List<RelevanceMetric> relevanceMetrics; + + private PreparedStatement ps; public DBpediaExperiment() { - reasoner = new SPARQLReasoner(new SparqlEndpointKS(endpoint), cacheDirectory); - negativeExampleFinder = new AutomaticNegativeExampleFinderSPARQL2(endpoint, reasoner); + try { + endpoint = new SparqlEndpoint(new URL("http://[2001:638:902:2010:0:168:35:138]/sparql"), "http://dbpedia.org"); + } catch (MalformedURLException e) { + e.printStackTrace(); + } + + sparqlReasoner = new SPARQLReasoner(new SparqlEndpointKS(endpoint), cacheDirectory); + negativeExampleFinder = new AutomaticNegativeExampleFinderSPARQL2(endpoint, sparqlReasoner); KnowledgebaseSampleGenerator.maxCBDDepth = maxCBDDepth; new File(experimentsFolder + "samples/").mkdirs(); KnowledgebaseSampleGenerator.cacheDir = experimentsFolder + "samples/"; @@ -155,18 +182,88 @@ loadSchema(); - relevanceMetric = new PMIRelevanceMetric(getSyntacticIndex()); + Index syntacticIndex = getSyntacticIndex(); + relevanceMetrics = new ArrayList<>(); + relevanceMetrics.add(new PMIRelevanceMetric(syntacticIndex)); + relevanceMetrics.add(new ChiSquareRelevanceMetric(syntacticIndex)); + relevanceMetrics.add(new DiceRelevanceMetric(syntacticIndex)); + relevanceMetrics.add(new JaccardRelevanceMetric(syntacticIndex)); + relevanceMetrics.add(new LLRRelevanceMetric(syntacticIndex)); + relevanceMetrics.add(new SCIRelevanceMetric(syntacticIndex)); + relevanceMetrics.add(new SignificantPMIRelevanceMetric(syntacticIndex, 0.5)); + relevanceMetrics.add(new TTestRelevanceMetric(syntacticIndex)); + resultsFolder.mkdirs(); + + initDBConnection(); } + /** + * Setup the database connection, create the table if not exists and prepare the INSERT statement. + */ + private void initDBConnection() { + try { + InputStream is = this.getClass().getClassLoader().getResourceAsStream("db_settings.ini"); + Preferences prefs = new IniPreferences(is); + String dbServer = prefs.node("database").get("server", null); + String dbName = prefs.node("database").get("name", null); + String dbUser = prefs.node("database").get("user", null); + String dbPass = prefs.node("database").get("pass", null); + + Class.forName("com.mysql.jdbc.Driver"); + String url = "jdbc:mysql://" + dbServer + "/" + dbName; + Connection conn = DriverManager.getConnection(url, dbUser, dbPass); + + java.sql.Statement st = conn.createStatement(); + String sql = "CREATE TABLE IF NOT EXISTS ISLE_Evaluation (" + + "id VARCHAR(100)," + + "class TEXT NOT NULL," + + "position TINYINT NOT NULL," + + "expression TEXT NOT NULL," + + "fscore DECIMAL(8,6) NOT NULL,"; + for (RelevanceMetric metric : relevanceMetrics) { + sql += metric.getClass().getSimpleName().replace("RelevanceMetric", "") + " DECIMAL(8,6) NOT NULL,"; + } + sql += "PRIMARY KEY(id)," + + "INDEX(class(200))) DEFAULT CHARSET=utf8"; + st.execute(sql); + + sql = "INSERT INTO ISLE_Evaluation (id, class, position, expression, fscore"; + for (RelevanceMetric metric : relevanceMetrics) { + sql += "," + metric.getClass().getSimpleName().replace("RelevanceMetric", ""); + } + sql += ") VALUES(?,?,?,?,?"; + for(int i = 0 ; i < relevanceMetrics.size(); i++){ + sql += ",?"; + } + sql += ")"; + ps = conn.prepareStatement(sql); + } catch (ClassNotFoundException e) { + e.printStackTrace(); + } catch (SQLException e) { + e.printStackTrace(); + } catch (InvalidFileFormatException e) { + e.printStackTrace(); + } catch (FileNotFoundException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } + } + public void run(){ Set<NamedClass> classes = getClasses(); - classes = reasoner.getMostSpecificClasses(); + classes = sparqlReasoner.getMostSpecificClasses(); List<NamedClass> classList = new ArrayList<>(classes); // Collections.reverse(classList); +// classList = classList.subList(0, 10); - for (NamedClass cls : classList) { + new SolrSyntacticIndex(schema, solrServerURL, searchField).buildIndex(classes); + + ExecutorService executor = Executors.newFixedThreadPool(6); + + for (final NamedClass cls : classList) { try { File resultsFile = new File(resultsFolder, URLEncoder.encode(cls.getName(), "UTF-8") + ".csv"); if(resultsFile.exists()){ @@ -175,12 +272,23 @@ } catch (UnsupportedEncodingException e1) { e1.printStackTrace(); } - try { - run(cls); - } catch (Exception e) { - logger.error("Error when learning class " + cls, e); - } + executor.submit(new Runnable() { + @Override + public void run() { + try { + DBpediaExperiment.this.run(cls); + } catch (Exception e) { + logger.error("Error when learning class " + cls, e); + } + } + }); } + executor.shutdown(); + try { + executor.awaitTermination(10, TimeUnit.DAYS); + } catch (InterruptedException e) { + e.printStackTrace(); + } } public void run(NamedClass cls){ @@ -198,14 +306,15 @@ SortedSet<Individual> negativeExamples = getNegativeExamples(cls, positiveExamples); //generate a sample of the knowledge base based on the examples - OWLOntology knowledgebaseSample = loadKnowledgebaseSample(Sets.union(positiveExamples, negativeExamples)); -// Map<Entity, Double> entityRelevance = RelevanceMapGenerator.generateRelevanceMap(cls, schema, relevanceMetric, true); - try { - Thread.sleep(2000); - } catch (InterruptedException e) { - e.printStackTrace(); - } - + OWLOntology knowledgebaseSample = loadKnowledgebaseSample(cls, Sets.union(positiveExamples, negativeExamples)); +// try { +// Thread.sleep(2000); +// } catch (InterruptedException e) { +// e.printStackTrace(); +// } + + return; + /** //set up the learning try { // set KB @@ -230,22 +339,6 @@ } lp.init(); - /** - Monitor mon = MonitorFactory.getTimeMonitor("time"); - Individual ex = positiveExamples.iterator().next(); - Description r = new DatatypeSomeRestriction(new DatatypeProperty("http://dbpedia.org/ontology/Astronaut/timeInSpace"), new Datatype("http://www.w3.org/2000/01/rdf-schema#Literal")); - mon.start(); - reasoner.hasType(r, ex); -// lp.getAccuracyOrTooWeak(r, 0.3d); - mon.stop(); - System.out.println(mon.getLastValue()); - r = new ObjectSomeRestriction(new ObjectProperty("http://dbpedia.org/ontology/nationality"), new NamedClass("http://dbpedia.org/ontology/Country")); - mon.start(); - reasoner.hasType(r, ex); -// lp.getAccuracyOrTooWeak(r, 0.3d); - mon.stop(); - System.out.println(mon.getLastValue()); - **/ // 1. run basic algorithm //set up the refinement operator and the allowed OWL constructs @@ -265,6 +358,7 @@ ((ELLearningAlgorithm)la).setIgnoredConcepts(Sets.newHashSet(cls)); ((ELLearningAlgorithm)la).setClassToDescribe(cls); ((ELLearningAlgorithm)la).setTreeSearchTimeSeconds(maxExecutionTimeInSeconds); + ((ELLearningAlgorithm)la).setMaxNrOfResults(maxNrOfResults); // la = new ELLearningAlgorithmDisjunctive(lp, reasoner); } else { //build CELOE la @@ -273,6 +367,7 @@ laTmp.setOperator(rop); laTmp.setMaxExecutionTimeInSeconds(maxExecutionTimeInSeconds); laTmp.setStartClass(startClass); + laTmp.setNoisePercentage(noiseInPercentage); new File(testFolder).mkdirs(); laTmp.setSearchTreeFile(testFolder + "searchTree.txt"); laTmp.setWriteSearchTree(true); @@ -285,19 +380,36 @@ } la.init(); la.start(); - Map<Entity, Double> entityRelevance = RelevanceMapGenerator.generateRelevanceMap(cls, schema, relevanceMetric, true); - + //compute the relevance scores + Map<RelevanceMetric, Map<Entity, Double>> entityRelevances = RelevanceMapGenerator.generateRelevanceMaps(cls, schema, relevanceMetrics, true); + //Write to DB + try { + write2DB(reasoner, lp, cls, la.getCurrentlyBestEvaluatedDescriptions(), entityRelevances); + } catch (SQLException e1) { + e1.printStackTrace(); + } + //write to CSV file int current = 1; StringBuilder sb = new StringBuilder(); + //the header line + sb.append("class expression,fmeasure"); + for (RelevanceMetric metric : relevanceMetrics) { + sb.append(",").append(metric.getClass().getSimpleName()); + } + sb.append("\n"); + //the entries for(EvaluatedDescription ed : la.getCurrentlyBestEvaluatedDescriptions().descendingSet()) { if(lp instanceof PosNegLPStandard) { double fMeasure = ((PosNegLPStandard)lp).getFMeasureOrTooWeakExact(ed.getDescription(),1); sb.append(replaceDataPropertyRanges(ed.getDescription()).toManchesterSyntaxString(reasoner.getBaseURI(), reasoner.getPrefixes()) + "," // + ((PosNegLPStandard)lp).getPredAccuracyOrTooWeakExact(ed.getDescription(),1) + "," + fMeasure); - double relevanceScore = getRelevanceScore(ed.getDescription(), entityRelevance); - sb.append(",").append(relevanceScore); - sb.append(",").append(fMeasure + relevanceScore); + for (RelevanceMetric metric : relevanceMetrics) { + double relevanceScore = getRelevanceScore(ed.getDescription(), entityRelevances.get(metric)); + sb.append(",").append(relevanceScore); + } + +// sb.append(",").append(fMeasure + relevanceScore); sb.append("\n"); } @@ -322,6 +434,7 @@ } catch (ComponentInitException e) { e.printStackTrace(); } + */ } /** @@ -414,7 +527,7 @@ private SortedSet<Individual> getPositiveExamples(NamedClass cls){ logger.info("Generating positive examples..."); - SortedSet<Individual> individuals = reasoner.getIndividuals(cls, 1000); + SortedSet<Individual> individuals = sparqlReasoner.getIndividuals(cls, 1000); List<Individual> individualsList = new ArrayList<>(individuals); // Collections.shuffle(individualsList, new Random(1234)); individuals.clear(); @@ -440,15 +553,14 @@ logger.info("Done. Number of logical axioms: " + schema.getLogicalAxiomCount()); } - private OWLOntology loadKnowledgebaseSample(Set<Individual> individuals){ + private OWLOntology loadKnowledgebaseSample(NamedClass nc, Set<Individual> individuals){ logger.info("Generating knowledge base sample..."); Model sampleModel = KnowledgebaseSampleGenerator.createKnowledgebaseSample(endpoint, namespace, individuals); sampleModel.setNsPrefix("dbo", "http://dbpedia.org/ontology/"); logger.info("Done. Size: " + sampleModel.size() + " triples"); cleanUp(sampleModel); logger.info("Clean up. Size: " + sampleModel.size() + " triples"); -// Query query = QueryFactory.create("SELECT ?p (COUNT(distinct ?s) AS ?cnt) WHERE {?s ?p ?o. ?s a <http://dbpedia.org/ontology/Cardinal>} GROUP BY ?p ORDER BY DESC(?cnt)", Syntax.syntaxARQ); -// System.out.println(ResultSetFormatter.asText(QueryExecutionFactory.create(query, sampleModel).execSelect())); + showPropertyDistribution(nc, sampleModel); try { ByteArrayOutputStream baos = new ByteArrayOutputStream(); @@ -462,6 +574,7 @@ man.removeAxioms(ontology, ontology.getAxioms(AxiomType.DATA_PROPERTY_RANGE)); man.removeAxioms(ontology, ontology.getAxioms(AxiomType.DISJOINT_CLASSES)); man.removeAxioms(ontology, ontology.getAxioms(AxiomType.SAME_INDIVIDUAL)); +// man.removeAxioms(ontology, ontology.getAxioms(AxiomType.OBJECT_PROPERTY_RANGE)); man.removeAxiom(ontology, df.getOWLObjectPropertyDomainAxiom( df.getOWLObjectProperty(IRI.create("http://dbpedia.org/ontology/mission")), df.getOWLClass(IRI.create("http://dbpedia.org/ontology/Aircraft")))); @@ -480,6 +593,17 @@ return null; } + private void showPropertyDistribution(NamedClass cls, Model model){ + Query query = QueryFactory.create("SELECT ?p (COUNT(distinct ?s) AS ?cnt) (COUNT(distinct ?x) AS ?negCnt) WHERE {" + + "?s ?p ?o. {?p a <http://www.w3.org/2002/07/owl#ObjectProperty>} UNION {?p a <http://www.w3.org/2002/07/owl#DatatypeProperty>}" + + "?s a <" + cls.getName() + ">. " + + "OPTIONAL{?x ?p ?o1. " + + "FILTER NOT EXISTS{?x a <" + cls.getName() + ">.}}} " + + "GROUP BY ?p ORDER BY DESC(?cnt)", Syntax.syntaxARQ); + + System.out.println(ResultSetFormatter.asText(QueryExecutionFactory.create(query, model).execSelect())); + } + private void cleanUp(Model model){ String dbo = "http://dbpedia.org/ontology/"; Set<String> blackList = Sets.newHashSet( @@ -591,6 +715,33 @@ return classes; } + private synchronized void write2DB(FastInstanceChecker reasoner, AbstractLearningProblem lp, NamedClass cls, TreeSet<? extends EvaluatedDescription> evaluatedDescriptions, Map<RelevanceMetric, Map<Entity, Double>> entityRelevances) throws SQLException{ + int position = 1; + for(EvaluatedDescription ed : evaluatedDescriptions.descendingSet()) { + String clsName = cls.getName(); + String expression = replaceDataPropertyRanges(ed.getDescription()).toManchesterSyntaxString(reasoner.getBaseURI(), reasoner.getPrefixes()); + HashCode hc = hf.newHasher() + .putString(clsName, Charsets.UTF_8) + .putString(expression, Charsets.UTF_8) + .hash(); + String id = hc.toString(); + double fMeasure = ((PosNegLPStandard)lp).getAccuracyOrTooWeakExact(ed.getDescription(), noiseInPercentage/100d); + ps.setString(1, id); + ps.setString(2, cls.getName()); + ps.setInt(3, position++); + ps.setString(4, expression); + ps.setDouble(5, fMeasure); + int col = 6; + for (RelevanceMetric metric : relevanceMetrics) { + double relevanceScore = getRelevanceScore(ed.getDescription(), entityRelevances.get(metric)); + ps.setDouble(col++, relevanceScore); + } + + ps.addBatch(); + } + ps.executeBatch(); + } + public static void main(String[] args) throws Exception { // ToStringRenderer.getInstance().setRenderer(new DLSyntaxObjectRenderer()); // String cls = "http://dbpedia.org/ontology/Astronaut"; @@ -607,8 +758,11 @@ // la.setPattern(DLLearnerAxiomConvertVisitor.getDLLearnerAxiom(pattern)); // la.start(); + long start = System.currentTimeMillis(); + new DBpediaExperiment().run(); +// new DBpediaExperiment().run(new NamedClass("http://dbpedia.org/ontology/SoccerClub")); + long end = System.currentTimeMillis(); + logger.info("Operation took " + (end - start) + "ms"); - new DBpediaExperiment().run(); -// new DBpediaExperiment().run(new NamedClass("http://dbpedia.org/ontology/Astronaut")); } } Modified: trunk/components-core/src/test/java/org/dllearner/algorithms/isle/KnowledgebaseSampleGenerator.java =================================================================== --- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/KnowledgebaseSampleGenerator.java 2014-02-13 11:47:52 UTC (rev 4223) +++ trunk/components-core/src/test/java/org/dllearner/algorithms/isle/KnowledgebaseSampleGenerator.java 2014-02-16 15:59:43 UTC (rev 4224) @@ -136,7 +136,7 @@ Model cbd; for (Individual individual : individuals) { try { - Thread.sleep(500); + Thread.sleep(100); } catch (InterruptedException e) { e.printStackTrace(); } Modified: trunk/components-core/src/test/java/org/dllearner/algorithms/isle/metrics/RelevanceMetricsTest.java =================================================================== --- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/metrics/RelevanceMetricsTest.java 2014-02-13 11:47:52 UTC (rev 4223) +++ trunk/components-core/src/test/java/org/dllearner/algorithms/isle/metrics/RelevanceMetricsTest.java 2014-02-16 15:59:43 UTC (rev 4224) @@ -62,7 +62,7 @@ relevance = metric.getRelevance(entity1, entity2); System.out.println(relevance); - // dbo:Person and dbo:Animal + // dbo:Person and dbo:birthPlace entity1 = new NamedClass(DBPEDIA_NS + "Person"); entity2 = new ObjectProperty(DBPEDIA_NS + "birthPlace"); relevance = metric.getRelevance(entity1, entity2); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |