From: <lor...@us...> - 2014-01-21 12:49:55
|
Revision: 4213 http://sourceforge.net/p/dl-learner/code/4213 Author: lorenz_b Date: 2014-01-21 12:49:51 +0000 (Tue, 21 Jan 2014) Log Message: ----------- Continued DBpedia experiment. Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/celoe/CELOE.java trunk/components-core/src/main/java/org/dllearner/algorithms/celoe/OEHeuristicRuntime.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/ISLE.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/NLPHeuristic.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/SolrSyntacticIndex.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceUtils.java trunk/components-core/src/main/java/org/dllearner/algorithms/pattern/PatternBasedAxiomLearningAlgorithm.java trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/LGGGeneratorImpl.java trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/NoiseSensitiveLGG.java trunk/components-core/src/main/java/org/dllearner/reasoning/FastInstanceChecker.java trunk/components-core/src/main/java/org/dllearner/reasoning/OWLAPIReasoner.java trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java trunk/components-core/src/main/java/org/dllearner/utilities/examples/AutomaticNegativeExampleFinderSPARQL2.java trunk/components-core/src/test/java/org/dllearner/algorithms/isle/DBpediaCorpusGenerator.java trunk/components-core/src/test/java/org/dllearner/algorithms/isle/DBpediaPlainExperiment.java trunk/components-core/src/test/java/org/dllearner/algorithms/isle/DBpediaSyntacticIndexBasedExperiment.java trunk/components-core/src/test/java/org/dllearner/algorithms/isle/Experiment.java trunk/components-core/src/test/java/org/dllearner/algorithms/isle/KnowledgebaseSampleGenerator.java trunk/components-core/src/test/java/org/dllearner/algorithms/qtl/QALDExperiment.java Added Paths: ----------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/RelevanceMapGenerator.java trunk/components-core/src/main/java/org/dllearner/core/AbstractHeuristic.java trunk/components-core/src/main/java/org/dllearner/core/Heuristic.java trunk/components-core/src/test/java/org/dllearner/algorithms/isle/DBpediaExperiment.java trunk/components-core/src/test/java/org/dllearner/algorithms/isle/FixDBpediaOntology.java trunk/components-core/src/test/resources/org/dllearner/algorithms/ trunk/components-core/src/test/resources/org/dllearner/algorithms/isle/ trunk/components-core/src/test/resources/org/dllearner/algorithms/isle/dbpedia_3.9.owl Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/celoe/CELOE.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/celoe/CELOE.java 2014-01-21 12:49:22 UTC (rev 4212) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/celoe/CELOE.java 2014-01-21 12:49:51 UTC (rev 4213) @@ -32,6 +32,7 @@ import org.apache.log4j.Logger; import org.dllearner.core.AbstractCELA; +import org.dllearner.core.AbstractHeuristic; import org.dllearner.core.AbstractKnowledgeSource; import org.dllearner.core.AbstractLearningProblem; import org.dllearner.core.AbstractReasonerComponent; @@ -57,7 +58,6 @@ import org.dllearner.refinementoperators.LengthLimitedRefinementOperator; import org.dllearner.refinementoperators.OperatorInverter; import org.dllearner.refinementoperators.ReasoningBasedRefinementOperator; -import org.dllearner.refinementoperators.RefinementOperator; import org.dllearner.refinementoperators.RhoDRDown; import org.dllearner.utilities.Files; import org.dllearner.utilities.Helper; @@ -98,7 +98,7 @@ // all nodes in the search tree (used for selecting most promising node) private TreeSet<OENode> nodes; - private OEHeuristicRuntime heuristic; // = new OEHeuristicRuntime(); + private AbstractHeuristic heuristic; // = new OEHeuristicRuntime(); // root of search tree private OENode startNode; // the class with which we start the refinement process @@ -132,7 +132,7 @@ // important parameters (non-config options but internal) private double noise; - private boolean filterFollowsFromKB; + private boolean filterFollowsFromKB = false; // less important parameters // forces that one solution cannot be subexpression of another expression; this option is useful to get diversity @@ -207,6 +207,7 @@ @SuppressWarnings("unused") private long timeLastImprovement = 0; + private boolean expandAccuracy100Nodes = false; // public CELOEConfigurator getConfigurator() { // return configurator; @@ -563,8 +564,12 @@ Iterator<OENode> it = nodes.descendingIterator(); while(it.hasNext()) { OENode node = it.next(); - if(node.getAccuracy() < 1.0 || node.getHorizontalExpansion() < node.getDescription().getLength()) { - return node; + if (isExpandAccuracy100Nodes() && node.getHorizontalExpansion() < node.getDescription().getLength()) { + return node; + } else { + if(node.getAccuracy() < 1.0 || node.getHorizontalExpansion() < node.getDescription().getLength()) { + return node; + } } } @@ -693,7 +698,9 @@ // System.out.println(bestEvaluatedDescriptions); } } - + +// bestEvaluatedDescriptions.add(node.getDescription(), accuracy, learningProblem); + // System.out.println(bestEvaluatedDescriptions.getSet().size()); } @@ -1040,12 +1047,12 @@ this.useMinimizer = useMinimizer; } - public OEHeuristicRuntime getHeuristic() { + public AbstractHeuristic getHeuristic() { return heuristic; } @Autowired(required=false) - public void setHeuristic(OEHeuristicRuntime heuristic) { + public void setHeuristic(AbstractHeuristic heuristic) { this.heuristic = heuristic; } @@ -1113,6 +1120,20 @@ return totalRuntimeNs; } + /** + * @return the expandAccuracy100Nodes + */ + public boolean isExpandAccuracy100Nodes() { + return expandAccuracy100Nodes; + } + + /** + * @param expandAccuracy100Nodes the expandAccuracy100Nodes to set + */ + public void setExpandAccuracy100Nodes(boolean expandAccuracy100Nodes) { + this.expandAccuracy100Nodes = expandAccuracy100Nodes; + } + public static void main(String[] args) throws Exception{ AbstractKnowledgeSource ks = new OWLFile("../examples/family/father_oe.owl"); ks.init(); Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/celoe/OEHeuristicRuntime.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/celoe/OEHeuristicRuntime.java 2014-01-21 12:49:22 UTC (rev 4212) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/celoe/OEHeuristicRuntime.java 2014-01-21 12:49:51 UTC (rev 4213) @@ -19,9 +19,7 @@ package org.dllearner.algorithms.celoe; -import java.util.Comparator; - -import org.dllearner.core.Component; +import org.dllearner.core.AbstractHeuristic; import org.dllearner.core.ComponentAnn; import org.dllearner.core.ComponentInitException; import org.dllearner.core.config.ConfigOption; @@ -36,8 +34,9 @@ * */ @ComponentAnn(name = "OEHeuristicRuntime", shortName = "celoe_heuristic", version = 0.5) -public class OEHeuristicRuntime implements Component, Comparator<OENode>{ +public class OEHeuristicRuntime extends AbstractHeuristic{ + // strong penalty for long descriptions private double expansionPenaltyFactor = 0.1; // bonus for being better than parent node @@ -59,24 +58,6 @@ public void init() throws ComponentInitException { } - - @Override - public int compare(OENode node1, OENode node2) { -// System.out.println("node1 " + node1); -// System.out.println("score: " + getNodeScore(node1)); -// System.out.println("node2 " + node2); -// System.out.println("score: " + getNodeScore(node2)); - - double diff = getNodeScore(node1) - getNodeScore(node2); - - if(diff>0) { - return 1; - } else if(diff<0) { - return -1; - } else { - return conceptComparator.compare(node1.getDescription(), node2.getDescription()); - } - } public double getNodeScore(OENode node) { // accuracy as baseline Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/ISLE.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/ISLE.java 2014-01-21 12:49:22 UTC (rev 4212) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/ISLE.java 2014-01-21 12:49:51 UTC (rev 4213) @@ -206,6 +206,7 @@ @SuppressWarnings("unused") private long timeLastImprovement = 0; + private boolean expandAccuracy100Nodes = false; // public CELOEConfigurator getConfigurator() { // return configurator; @@ -562,8 +563,12 @@ Iterator<OENode> it = nodes.descendingIterator(); while(it.hasNext()) { OENode node = it.next(); - if(node.getAccuracy() < 1.0 || node.getHorizontalExpansion() < node.getDescription().getLength()) { + if (isExpandAccuracy100Nodes()) { return node; + } else { + if(node.getAccuracy() < 1.0 || node.getHorizontalExpansion() < node.getDescription().getLength()) { + return node; + } } } @@ -1112,6 +1117,20 @@ return totalRuntimeNs; } + /** + * @return the expandAccuracy100Nodes + */ + public boolean isExpandAccuracy100Nodes() { + return expandAccuracy100Nodes; + } + + /** + * @param expandAccuracy100Nodes the expandAccuracy100Nodes to set + */ + public void setExpandAccuracy100Nodes(boolean expandAccuracy100Nodes) { + this.expandAccuracy100Nodes = expandAccuracy100Nodes; + } + public static void main(String[] args) throws Exception{ AbstractKnowledgeSource ks = new OWLFile("../examples/family/father_oe.owl"); ks.init(); Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/NLPHeuristic.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/NLPHeuristic.java 2014-01-21 12:49:22 UTC (rev 4212) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/NLPHeuristic.java 2014-01-21 12:49:51 UTC (rev 4213) @@ -19,21 +19,15 @@ package org.dllearner.algorithms.isle; -import java.util.Comparator; -import java.util.HashSet; import java.util.Map; import java.util.Set; import org.dllearner.algorithms.celoe.OENode; -import org.dllearner.core.Component; -import org.dllearner.core.ComponentInitException; +import org.dllearner.core.AbstractHeuristic; import org.dllearner.core.config.ConfigOption; import org.dllearner.core.owl.Description; import org.dllearner.core.owl.Entity; import org.dllearner.utilities.owl.ConceptComparator; -import org.dllearner.utilities.owl.OWLAPIConverter; -import org.semanticweb.owlapi.model.OWLClassExpression; -import org.semanticweb.owlapi.model.OWLEntity; /** * @@ -42,7 +36,7 @@ * @author Jens Lehmann * */ -public class NLPHeuristic implements Component, Comparator<OENode>{ +public class NLPHeuristic extends AbstractHeuristic{ // strong penalty for long descriptions private double expansionPenaltyFactor = 0.1; @@ -67,29 +61,6 @@ this.entityRelevance = entityRelevance; } - @Override - public void init() throws ComponentInitException { - - } - - @Override - public int compare(OENode node1, OENode node2) { -// System.out.println("node1 " + node1); -// System.out.println("score: " + getNodeScore(node1)); -// System.out.println("node2 " + node2); -// System.out.println("score: " + getNodeScore(node2)); - - double diff = getNodeScore(node1) - getNodeScore(node2); - - if(diff>0) { - return 1; - } else if(diff<0) { - return -1; - } else { - return conceptComparator.compare(node1.getDescription(), node2.getDescription()); - } - } - public double getNodeScore(OENode node) { // accuracy as baseline double score = node.getAccuracy(); @@ -124,38 +95,6 @@ return score; } - public double getExpansionPenaltyFactor() { - return expansionPenaltyFactor; - } - - public double getGainBonusFactor() { - return gainBonusFactor; - } - - public void setGainBonusFactor(double gainBonusFactor) { - this.gainBonusFactor = gainBonusFactor; - } - - public double getNodeRefinementPenalty() { - return nodeRefinementPenalty; - } - - public void setNodeRefinementPenalty(double nodeRefinementPenalty) { - this.nodeRefinementPenalty = nodeRefinementPenalty; - } - - public void setExpansionPenaltyFactor(double expansionPenaltyFactor) { - this.expansionPenaltyFactor = expansionPenaltyFactor; - } - - public double getStartNodeBonus() { - return startNodeBonus; - } - - public void setStartNodeBonus(double startNodeBonus) { - this.startNodeBonus = startNodeBonus; - } - /** * @param entityRelevance the entityRelevance to set */ Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/RelevanceMapGenerator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/RelevanceMapGenerator.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/RelevanceMapGenerator.java 2014-01-21 12:49:51 UTC (rev 4213) @@ -0,0 +1,75 @@ +package org.dllearner.algorithms.isle.index; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.io.UnsupportedEncodingException; +import java.net.URLEncoder; +import java.util.Map; + +import org.apache.log4j.Logger; +import org.dllearner.algorithms.isle.metrics.RelevanceMetric; +import org.dllearner.algorithms.isle.metrics.RelevanceUtils; +import org.dllearner.core.owl.Entity; +import org.dllearner.core.owl.NamedClass; +import org.semanticweb.owlapi.model.OWLOntology; + +import com.google.common.hash.HashCode; +import com.google.common.hash.HashFunction; +import com.google.common.hash.Hashing; + +/** + * Interface for an index which is able to resolve a given entity's URI to the set of documents containing + * this entity, i.e., documents which contain words disambiguated to the given entity. + * + * @author Lorenz Buehmann + * @author Daniel Fleischhacker + */ +public abstract class RelevanceMapGenerator { + + static HashFunction hf = Hashing.md5(); + private static final Logger logger = Logger.getLogger(RelevanceMapGenerator.class.getName()); + public static String cacheDirectory = "cache/relevance"; + + public static Map<Entity, Double> generateRelevanceMap(NamedClass cls, OWLOntology ontology, RelevanceMetric relevanceMetric, boolean cached){ + Map<Entity, Double> relevanceMap = null; + File folder = new File(cacheDirectory); + folder.mkdirs(); + File file = null; + try { + file = new File(folder, URLEncoder.encode(cls.getName(), "UTF-8") + ".rel"); + } catch (UnsupportedEncodingException e2) { + e2.printStackTrace(); + } + if(cached && file.exists()){ + try { + logger.info("Loading relevance map from disk..."); + ObjectInputStream ois = new ObjectInputStream(new FileInputStream(file)); + relevanceMap = (Map<Entity, Double>) ois.readObject(); + ois.close(); + logger.info("...done."); + } catch (Exception e) { + e.printStackTrace(); + } + } else { + logger.info("Building relevance map..."); + relevanceMap = RelevanceUtils.getRelevantEntities(cls, ontology, relevanceMetric); + try { + ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(file)); + oos.writeObject(relevanceMap); + oos.close(); + } catch (IOException e1) { + e1.printStackTrace(); + } + logger.info("...done."); + } + return relevanceMap; + } + + public static Map<Entity, Double> generateRelevanceMap(NamedClass cls, OWLOntology ontology, RelevanceMetric relevanceMetric){ + return generateRelevanceMap(cls, ontology, relevanceMetric, false); + } +} Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/SolrSyntacticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/SolrSyntacticIndex.java 2014-01-21 12:49:22 UTC (rev 4212) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/SolrSyntacticIndex.java 2014-01-21 12:49:51 UTC (rev 4213) @@ -3,6 +3,7 @@ */ package org.dllearner.algorithms.isle.index.syntactic; +import java.io.File; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; @@ -26,7 +27,10 @@ import org.dllearner.algorithms.isle.textretrieval.AnnotationEntityTextRetriever; import org.dllearner.algorithms.isle.textretrieval.RDFSLabelEntityTextRetriever; import org.dllearner.core.owl.Entity; +import org.dllearner.core.owl.NamedClass; +import org.semanticweb.owlapi.apibinding.OWLManager; import org.semanticweb.owlapi.model.OWLOntology; +import org.semanticweb.owlapi.model.OWLOntologyCreationException; import com.google.common.base.Joiner; @@ -39,6 +43,7 @@ private SolrServer solr; private AnnotationEntityTextRetriever textRetriever; private String searchField; + private String typesField = "types"; long totalNumberOfDocuments = -1; @@ -49,7 +54,7 @@ solr = new HttpSolrServer(solrServerURL); textRetriever = new RDFSLabelEntityTextRetriever(ontology); } - + /* (non-Javadoc) * @see org.dllearner.algorithms.isle.index.Index#getDocuments(org.dllearner.core.owl.Entity) */ @@ -120,7 +125,7 @@ phrase += token.getRawForm() + " "; } phrase.trim(); - terms.add(phrase); + terms.add(quotedString(phrase)); } queryString += Joiner.on("OR").join(terms); queryString += ")"; @@ -136,7 +141,7 @@ } return -1; } - + /* (non-Javadoc) * @see org.dllearner.algorithms.isle.index.Index#getNumberOfDocumentsFor(org.dllearner.core.owl.Entity[]) */ @@ -158,7 +163,7 @@ phrase += token.getRawForm() + " "; } phrase.trim(); - terms.add(phrase); + terms.add(quotedString(phrase)); } queryString += Joiner.on("OR").join(terms); queryString += ")"; @@ -178,5 +183,57 @@ } return -1; } + + + public long getNumberOfDocumentsForTyped(NamedClass resourceClass, Entity entity) { + + + Map<List<Token>, Double> relevantText = textRetriever.getRelevantText(entity); + + String queryString = "("; + Set<String> terms = new HashSet<>(); + for (Entry<List<Token>, Double> entry : relevantText.entrySet()) { + List<Token> tokens = entry.getKey(); + String phrase = ""; + for (Token token : tokens) { +// terms.add(token.getRawForm()); + phrase += token.getRawForm() + " "; + } + phrase.trim(); + terms.add(quotedString(phrase)); + } + queryString += Joiner.on("OR").join(terms); + queryString += ")";System.out.println(queryString); + + SolrQuery query = new SolrQuery( + searchField + ":" + queryString + " AND " + typesField + ":" + quotedString(resourceClass.getName()));//System.out.println(query); + try { + QueryResponse response = solr.query(query); + SolrDocumentList list = response.getResults(); + return list.getNumFound(); + } catch (SolrServerException e) { + e.printStackTrace(); + } + return -1; + } + + private String quotedString(String s){ + return "\"" + s.trim() + "\""; + } + + public static void main(String[] args) throws Exception { + String solrServerURL = "http://solr.aksw.org/en_dbpedia_resources/"; + String searchField = "comment"; + OWLOntology ontology = OWLManager.createOWLOntologyManager().loadOntologyFromOntologyDocument(new File("src/test/resources/org/dllearner/algorithms/isle/dbpedia_3.9.owl")); + SolrSyntacticIndex index = new SolrSyntacticIndex(ontology, solrServerURL, searchField); + long n = index.getNumberOfDocumentsFor(new NamedClass("http://dbpedia.org/ontology/Person"), new NamedClass("http://schema.org/Canal")); + System.out.println(n); + n = index.getNumberOfDocumentsForTyped(new NamedClass("http://dbpedia.org/ontology/Person"), new NamedClass("http://schema.org/Canal")); + System.out.println(n); + n = index.getNumberOfDocumentsForTyped(new NamedClass("http://dbpedia.org/ontology/Person"), new NamedClass("http://dbpedia.org/ontology/nationality")); + System.out.println(n); + n = index.getNumberOfDocumentsForTyped(new NamedClass("http://dbpedia.org/ontology/Person"), new NamedClass("http://dbpedia.org/ontology/birthPlace")); + System.out.println(n); + } } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceUtils.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceUtils.java 2014-01-21 12:49:22 UTC (rev 4212) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceUtils.java 2014-01-21 12:49:51 UTC (rev 4213) @@ -7,6 +7,9 @@ import java.util.HashSet; import java.util.Map; import java.util.Set; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; import org.apache.log4j.Logger; import org.dllearner.core.owl.Entity; @@ -20,38 +23,52 @@ */ public class RelevanceUtils { - private static final Logger logger = Logger.getLogger(RelevanceUtils.class.getName()); + static int maxNrOfThreads = Math.max(1, Runtime.getRuntime().availableProcessors() - 1); - public static Map<Entity, Double> getRelevantEntities(Entity entity, Set<Entity> otherEntities, RelevanceMetric metric){ - Map<Entity, Double> relevantEntities = new HashMap<Entity, Double>(); + /** + * Returns a map containing the relevance score based on the given metric between the entity and each other entity. + * @param entity + * @param otherEntities + * @param metric + * @return + */ + public static Map<Entity, Double> getRelevantEntities(final Entity entity, Set<Entity> otherEntities, final RelevanceMetric metric){ + logger.info("Get relevant entities for " + entity); + final Map<Entity, Double> relevantEntities = new HashMap<Entity, Double>(); - for (Entity otherEntity : otherEntities) { - double relevance = metric.getRelevance(entity, otherEntity); - relevantEntities.put(otherEntity, relevance); + ExecutorService executor = Executors.newFixedThreadPool(maxNrOfThreads); + + for (final Entity otherEntity : otherEntities) { + executor.submit(new Runnable() { + @Override + public void run() { + double relevance = metric.getNormalizedRelevance(entity, otherEntity); + logger.info(otherEntity + ":" + relevance); + relevantEntities.put(otherEntity, relevance); + } + }); } + executor.shutdown(); + try { + executor.awaitTermination(1, TimeUnit.DAYS); + } catch (InterruptedException e) { + e.printStackTrace(); + } return relevantEntities; } public static Map<Entity, Double> getRelevantEntities(Entity entity, OWLOntology ontology, RelevanceMetric metric){ - logger.info("Get relevant entities for " + entity); - Map<Entity, Double> relevantEntities = new HashMap<Entity, Double>(); - Set<OWLEntity> owlEntities = new HashSet<OWLEntity>(); owlEntities.addAll(ontology.getClassesInSignature()); owlEntities.addAll(ontology.getDataPropertiesInSignature()); owlEntities.addAll(ontology.getObjectPropertiesInSignature()); + Set<Entity> otherEntities = OWLAPIConverter.getEntities(owlEntities); - otherEntities.remove(entity); - for (Entity otherEntity : otherEntities) { - double relevance = metric.getNormalizedRelevance(entity, otherEntity); - logger.info(otherEntity + ":" + relevance); - relevantEntities.put(otherEntity, relevance); - } - return relevantEntities; + return getRelevantEntities(entity, otherEntities, metric); } } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/pattern/PatternBasedAxiomLearningAlgorithm.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/pattern/PatternBasedAxiomLearningAlgorithm.java 2014-01-21 12:49:22 UTC (rev 4212) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/pattern/PatternBasedAxiomLearningAlgorithm.java 2014-01-21 12:49:51 UTC (rev 4213) @@ -111,7 +111,7 @@ logger.info("Pattern: " + pattern); //get the maximum modal depth in the pattern axioms - int modalDepth = MaximumModalDepthDetector.getMaxModalDepth(OWLAPIAxiomConvertVisitor.convertAxiom(pattern)); + int modalDepth = MaximumModalDepthDetector.getMaxModalDepth(OWLAPIAxiomConvertVisitor.convertAxiom(pattern));modalDepth++; logger.info("Modal depth: " + modalDepth); //extract fragment @@ -119,7 +119,9 @@ //try to find instantiation of the pattern with confidence above threshold Set<OWLAxiom> instantiations = applyPattern(OWLAPIAxiomConvertVisitor.convertAxiom(pattern), dataFactory.getOWLClass(IRI.create(cls.getName())), fragment); - System.out.println(instantiations); + for (OWLAxiom instantiation : instantiations) { + System.out.println(instantiation); + } logger.info("...finished in {}ms.", (System.currentTimeMillis()-startTime)); } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/LGGGeneratorImpl.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/LGGGeneratorImpl.java 2014-01-21 12:49:22 UTC (rev 4212) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/LGGGeneratorImpl.java 2014-01-21 12:49:51 UTC (rev 4213) @@ -143,10 +143,9 @@ } //if NO we have to create a new tree as LGG and compute the LGG for the all child node pairs having the same edge to the parent nodes lgg = new QueryTreeImpl<N>(tree1.getUserObject()); - if(tree1.isResourceNode() && tree2.isResourceNode()){ - lgg.setIsResourceNode(true); - - } +// if(tree1.isResourceNode() && tree2.isResourceNode()){ +// lgg.setIsResourceNode(true); +// } // if(!lgg.getUserObject().equals(tree2.getUserObject())){ // lgg.setUserObject((N)"?"); Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/NoiseSensitiveLGG.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/NoiseSensitiveLGG.java 2014-01-21 12:49:22 UTC (rev 4212) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/NoiseSensitiveLGG.java 2014-01-21 12:49:51 UTC (rev 4213) @@ -31,6 +31,9 @@ private List<QueryTree<N>> posExamples; private List<QueryTree<N>> negExamples; + + private double coverageWeight = 0.6; + private double specifityWeight = 0.4; public NoiseSensitiveLGG() { } @@ -53,6 +56,7 @@ logger.trace("TODO list size: " + todoList.size()); //pick best element from todo list currentElement = todoList.poll(); + //generate the LGG between the chosen tree and each uncovered positive example for (QueryTree<N> example : currentElement.getFalseNegatives()) { QueryTree<N> tree = currentElement.getTree(); //compute the LGG @@ -63,7 +67,7 @@ //evaluate the LGG EvaluatedQueryTree<N> solution = evaluate(lgg); - if(solution.getScore() > currentlyBestScore){ + if(solution.getScore() >= currentlyBestScore){ //add to todo list, if not already contained in todo list or solution list todo(solution); currentlyBestScore = solution.getScore(); @@ -83,6 +87,7 @@ } private EvaluatedQueryTree<N> evaluate(QueryTree<N> lgg){ + //1. get a score for the coverage = recall oriented //compute positive examples which are not covered by LGG Collection<QueryTree<N>> uncoveredPositiveExamples = getUncoveredTrees(lgg, posExamples); //compute negative examples which are covered by LGG @@ -94,8 +99,20 @@ ? 0 : coveredPositiveExamples / (double)(coveredPositiveExamples + coveredNegativeExamples.size()); - double score = Heuristics.getFScore(recall, precision); + double coverageScore = recall;//Heuristics.getFScore(recall, precision); + //2. get a score for the specifity of the query, i.e. how many edges/nodes = precision oriented + int numberOfSpecificNodes = 0; + for (QueryTree<N> childNode : lgg.getChildrenClosure()) { + if(!childNode.getUserObject().equals("?")){ + numberOfSpecificNodes++; + } + } + double specifityScore = Math.log(numberOfSpecificNodes); + + //3.compute the total score + double score = coverageWeight * coverageScore + specifityWeight * specifityScore; + EvaluatedQueryTree<N> solution = new EvaluatedQueryTree<N>(lgg, uncoveredPositiveExamples, coveredNegativeExamples, score); return solution; Added: trunk/components-core/src/main/java/org/dllearner/core/AbstractHeuristic.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/core/AbstractHeuristic.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/core/AbstractHeuristic.java 2014-01-21 12:49:51 UTC (rev 4213) @@ -0,0 +1,112 @@ +/** + * Copyright (C) 2007-2011, Jens Lehmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +package org.dllearner.core; + +import java.util.Comparator; + +import org.dllearner.algorithms.celoe.OENode; +import org.dllearner.core.config.ConfigOption; +import org.dllearner.utilities.owl.ConceptComparator; + +/** + * Search algorithm heuristic for the ontology engineering algorithm. The heuristic + * has a strong bias towards short descriptions (i.e. the algorithm is likely to be + * less suitable for learning complex descriptions). + * + * @author Jens Lehmann + * + */ +@ComponentAnn(name = "OEHeuristicRuntime", shortName = "celoe_heuristic", version = 0.5) +public abstract class AbstractHeuristic extends AbstractComponent implements Heuristic, Comparator<OENode>{ + + // strong penalty for long descriptions + private double expansionPenaltyFactor = 0.1; + // bonus for being better than parent node + private double gainBonusFactor = 0.3; + // penalty if a node description has very many refinements since exploring + // such a node is computationally very expensive + private double nodeRefinementPenalty = 0.0001; + // syntactic comparison as final comparison criterion + private ConceptComparator conceptComparator = new ConceptComparator(); + + @ConfigOption(name = "startNodeBonus", defaultValue="0.1") + private double startNodeBonus = 0.1; + + public AbstractHeuristic() { + + } + + @Override + public void init() throws ComponentInitException { + + } + + @Override + public int compare(OENode node1, OENode node2) { +// System.out.println("node1 " + node1); +// System.out.println("score: " + getNodeScore(node1)); +// System.out.println("node2 " + node2); +// System.out.println("score: " + getNodeScore(node2)); + + double diff = getNodeScore(node1) - getNodeScore(node2); + + if(diff>0) { + return 1; + } else if(diff<0) { + return -1; + } else { + return conceptComparator.compare(node1.getDescription(), node2.getDescription()); + } + } + + public abstract double getNodeScore(OENode node); + + public double getExpansionPenaltyFactor() { + return expansionPenaltyFactor; + } + + public double getGainBonusFactor() { + return gainBonusFactor; + } + + public void setGainBonusFactor(double gainBonusFactor) { + this.gainBonusFactor = gainBonusFactor; + } + + public double getNodeRefinementPenalty() { + return nodeRefinementPenalty; + } + + public void setNodeRefinementPenalty(double nodeRefinementPenalty) { + this.nodeRefinementPenalty = nodeRefinementPenalty; + } + + public void setExpansionPenaltyFactor(double expansionPenaltyFactor) { + this.expansionPenaltyFactor = expansionPenaltyFactor; + } + + public double getStartNodeBonus() { + return startNodeBonus; + } + + public void setStartNodeBonus(double startNodeBonus) { + this.startNodeBonus = startNodeBonus; + } +} Added: trunk/components-core/src/main/java/org/dllearner/core/Heuristic.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/core/Heuristic.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/core/Heuristic.java 2014-01-21 12:49:51 UTC (rev 4213) @@ -0,0 +1,12 @@ +/** + * + */ +package org.dllearner.core; + +/** + * @author Lorenz Buehmann + * + */ +public interface Heuristic extends Component{ + +} Modified: trunk/components-core/src/main/java/org/dllearner/reasoning/FastInstanceChecker.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/reasoning/FastInstanceChecker.java 2014-01-21 12:49:22 UTC (rev 4212) +++ trunk/components-core/src/main/java/org/dllearner/reasoning/FastInstanceChecker.java 2014-01-21 12:49:51 UTC (rev 4213) @@ -1016,6 +1016,17 @@ public Map<String, String> getPrefixes() { return rc.getPrefixes(); } + + public void setPrefixes(Map<String, String> prefixes) { + rc.setPrefixes(prefixes); + } + + /** + * @param baseURI the baseURI to set + */ + public void setBaseURI(String baseURI) { + rc.setBaseURI(baseURI); + } @Override public Description getDomainImpl(ObjectProperty objectProperty) { Modified: trunk/components-core/src/main/java/org/dllearner/reasoning/OWLAPIReasoner.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/reasoning/OWLAPIReasoner.java 2014-01-21 12:49:22 UTC (rev 4212) +++ trunk/components-core/src/main/java/org/dllearner/reasoning/OWLAPIReasoner.java 2014-01-21 12:49:51 UTC (rev 4213) @@ -434,6 +434,20 @@ public SortedSet<Individual> getIndividuals() { return individuals; } + + /** + * @param prefixes the prefixes to set + */ + public void setPrefixes(Map<String, String> prefixes) { + this.prefixes = prefixes; + } + + /** + * @param baseURI the baseURI to set + */ + public void setBaseURI(String baseURI) { + this.baseURI = baseURI; + } /* (non-Javadoc) * @see org.dllearner.core.Reasoner#getReasonerType() Modified: trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java 2014-01-21 12:49:22 UTC (rev 4212) +++ trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java 2014-01-21 12:49:51 UTC (rev 4213) @@ -1505,6 +1505,19 @@ public SortedSet<Description> getMostGeneralClasses() { return hierarchy.getMostGeneralClasses(); } + + public SortedSet<NamedClass> getMostSpecificClasses() { + SortedSet<NamedClass> classes = new TreeSet<>(conceptComparator); + String query = "SELECT ?cls WHERE {?cls a <http://www.w3.org/2002/07/owl#Class>. " + + "FILTER NOT EXISTS{?sub <http://www.w3.org/2000/01/rdf-schema#subClassOf> ?cls. FILTER(?sub != <http://www.w3.org/2002/07/owl#Nothing>)}}"; + ResultSet rs = executeSelectQuery(query); + QuerySolution qs; + while(rs.hasNext()){ + qs = rs.next(); + classes.add(new NamedClass(qs.getResource("cls").getURI())); + } + return classes; + } @Override public SortedSet<Description> getSuperClasses(Description description) { Modified: trunk/components-core/src/main/java/org/dllearner/utilities/examples/AutomaticNegativeExampleFinderSPARQL2.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/utilities/examples/AutomaticNegativeExampleFinderSPARQL2.java 2014-01-21 12:49:22 UTC (rev 4212) +++ trunk/components-core/src/main/java/org/dllearner/utilities/examples/AutomaticNegativeExampleFinderSPARQL2.java 2014-01-21 12:49:51 UTC (rev 4213) @@ -181,6 +181,7 @@ int maxFrequency = positiveExamplesTypes.entrySet().iterator().next().getCount(); if(strategy == SIBLING){//get sibling class based examples + logger.info("Applying sibling classes strategy..."); SortedSet<Individual> siblingNegativeExamples = new TreeSet<Individual>(); //for each type of the positive examples for (NamedClass nc : positiveExamplesTypes.elementSet()) { @@ -188,17 +189,22 @@ //get sibling classes Set<NamedClass> siblingClasses = sr.getSiblingClasses(nc); siblingClasses = filterByNamespace(siblingClasses); - System.out.println("Sibling classes: " + siblingClasses); + logger.info("Sibling classes: " + siblingClasses); int limit = (int)Math.ceil(((double)frequency / positiveExamplesTypes.size()) / siblingClasses.size() * strategyLimit); //get instances for each sibling class for (NamedClass siblingClass : siblingClasses) { - siblingNegativeExamples.addAll(sr.getIndividualsExcluding(siblingClass, nc, limit)); + SortedSet<Individual> individuals = sr.getIndividualsExcluding(siblingClass, nc, maxNrOfReturnedInstances); + individuals.removeAll(siblingNegativeExamples); + SetManipulation.stableShrink(individuals, limit); + siblingNegativeExamples.addAll(individuals); } } siblingNegativeExamples = SetManipulation.stableShrink(siblingNegativeExamples, strategyLimit); + logger.info("Negative examples(" + siblingNegativeExamples.size() + "): " + siblingNegativeExamples); negativeExamples.addAll(siblingNegativeExamples); } else if(strategy == SUPERCLASS){//get super class based examples + logger.info("Applying super class strategy..."); SortedSet<Individual> superClassNegativeExamples = new TreeSet<Individual>(); //for each type of the positive examples for (NamedClass nc : positiveExamplesTypes.elementSet()) { @@ -206,15 +212,22 @@ //get super classes Set<Description> superClasses = sr.getSuperClasses(nc); superClasses.remove(new NamedClass(Thing.instance.getURI())); + superClasses.remove(Thing.instance); superClasses = filterByNamespace(superClasses); + logger.info("Super classes: " + superClasses); int limit = (int)Math.ceil(((double)frequency / positiveExamplesTypes.size()) / superClasses.size() * strategyLimit); //get instances for each super class for (Description superClass : superClasses) { - superClassNegativeExamples.addAll(sr.getIndividualsExcluding(superClass, nc, limit)); + SortedSet<Individual> individuals = sr.getIndividualsExcluding(superClass, nc, maxNrOfReturnedInstances); + individuals.removeAll(negativeExamples); + individuals.removeAll(superClassNegativeExamples); + SetManipulation.stableShrink(individuals, limit); + superClassNegativeExamples.addAll(individuals); } } superClassNegativeExamples = SetManipulation.stableShrink(superClassNegativeExamples, strategyLimit); + logger.info("Negative examples(" + superClassNegativeExamples.size() + "): " + superClassNegativeExamples); negativeExamples.addAll(superClassNegativeExamples); } else if(strategy == RANDOM){//get some random examples Modified: trunk/components-core/src/test/java/org/dllearner/algorithms/isle/DBpediaCorpusGenerator.java =================================================================== --- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/DBpediaCorpusGenerator.java 2014-01-21 12:49:22 UTC (rev 4212) +++ trunk/components-core/src/test/java/org/dllearner/algorithms/isle/DBpediaCorpusGenerator.java 2014-01-21 12:49:51 UTC (rev 4213) @@ -25,6 +25,7 @@ import org.apache.commons.compress.compressors.CompressorException; import org.apache.commons.compress.compressors.CompressorInputStream; import org.apache.commons.compress.compressors.CompressorStreamFactory; +import org.apache.log4j.Logger; import org.dllearner.core.owl.NamedClass; import org.dllearner.kb.sparql.SparqlEndpoint; import org.semanticweb.owlapi.apibinding.OWLManager; @@ -45,6 +46,8 @@ */ public class DBpediaCorpusGenerator { + private static final Logger logger = Logger.getLogger(DBpediaCorpusGenerator.class.getName()); + /** * Loads DBpedia ontology from remote URL. */ @@ -68,6 +71,7 @@ } public static Set<String> getDBpediaCorpusSample(String textProperty, int maxNrOfInstancesPerClass){ + logger.info("Generating DBpedia corpus based on " + textProperty + " for at most " + maxNrOfInstancesPerClass + " instances..."); Set<String> documents = new HashSet<>(); SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpedia(); @@ -115,10 +119,12 @@ } } } + logger.info("...done."); return documents; } public static Set<String> getDBpediaCorpusSample(String textProperty, Set<NamedClass> classes, int maxNrOfInstancesPerClass){ + logger.info("Generating DBpedia corpus based on " + textProperty + " for " + classes + " based on at most " + maxNrOfInstancesPerClass + " instances..."); Set<String> documents = new HashSet<>(); SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpedia(); @@ -163,6 +169,7 @@ } } } + logger.info("...done."); return documents; } Added: trunk/components-core/src/test/java/org/dllearner/algorithms/isle/DBpediaExperiment.java =================================================================== --- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/DBpediaExperiment.java (rev 0) +++ trunk/components-core/src/test/java/org/dllearner/algorithms/isle/DBpediaExperiment.java 2014-01-21 12:49:51 UTC (rev 4213) @@ -0,0 +1,425 @@ +/** + * + */ +package org.dllearner.algorithms.isle; + +import static org.dllearner.utilities.examples.AutomaticNegativeExampleFinderSPARQL2.Strategy.SIBLING; +import static org.dllearner.utilities.examples.AutomaticNegativeExampleFinderSPARQL2.Strategy.SUPERCLASS; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.net.URLEncoder; +import java.text.DecimalFormat; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.SortedSet; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; + +import org.apache.log4j.Logger; +import org.dllearner.algorithms.celoe.CELOE; +import org.dllearner.algorithms.isle.index.Index; +import org.dllearner.algorithms.isle.index.RelevanceMapGenerator; +import org.dllearner.algorithms.isle.index.syntactic.SolrSyntacticIndex; +import org.dllearner.algorithms.isle.metrics.PMIRelevanceMetric; +import org.dllearner.algorithms.isle.metrics.RelevanceMetric; +import org.dllearner.core.AbstractLearningProblem; +import org.dllearner.core.ComponentInitException; +import org.dllearner.core.EvaluatedDescription; +import org.dllearner.core.KnowledgeSource; +import org.dllearner.core.owl.Description; +import org.dllearner.core.owl.Entity; +import org.dllearner.core.owl.Individual; +import org.dllearner.core.owl.NamedClass; +import org.dllearner.kb.OWLAPIOntology; +import org.dllearner.kb.SparqlEndpointKS; +import org.dllearner.kb.sparql.SparqlEndpoint; +import org.dllearner.learningproblems.PosNegLPStandard; +import org.dllearner.learningproblems.PosOnlyLP; +import org.dllearner.reasoning.FastInstanceChecker; +import org.dllearner.reasoning.SPARQLReasoner; +import org.dllearner.refinementoperators.RhoDRDown; +import org.dllearner.utilities.PrefixCCMap; +import org.dllearner.utilities.examples.AutomaticNegativeExampleFinderSPARQL2; +import org.dllearner.utilities.owl.OWLAPIConverter; +import org.semanticweb.owlapi.apibinding.OWLManager; +import org.semanticweb.owlapi.model.AxiomType; +import org.semanticweb.owlapi.model.IRI; +import org.semanticweb.owlapi.model.OWLClass; +import org.semanticweb.owlapi.model.OWLClassExpression; +import org.semanticweb.owlapi.model.OWLDataFactory; +import org.semanticweb.owlapi.model.OWLOntology; +import org.semanticweb.owlapi.model.OWLOntologyCreationException; +import org.semanticweb.owlapi.model.OWLOntologyManager; + +import com.google.common.base.Charsets; +import com.google.common.collect.Sets; +import com.google.common.hash.HashFunction; +import com.google.common.hash.Hashing; +import com.google.common.io.Files; +import com.hp.hpl.jena.datatypes.xsd.XSDDatatype; +import com.hp.hpl.jena.rdf.model.Literal; +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.Property; +import com.hp.hpl.jena.rdf.model.RDFNode; +import com.hp.hpl.jena.rdf.model.Resource; +import com.hp.hpl.jena.rdf.model.Statement; +import com.hp.hpl.jena.vocabulary.OWL; +import com.hp.hpl.jena.vocabulary.RDF; +import com.hp.hpl.jena.vocabulary.RDFS; +import com.hp.hpl.jena.vocabulary.XSD; + +/** + * @author Lorenz Buehmann + * + */ +public class DBpediaExperiment { + + private static final Logger logger = Logger.getLogger(DBpediaExperiment.class.getName()); + + private DecimalFormat dfPercent = new DecimalFormat("0.00%"); + HashFunction hf = Hashing.md5(); + + SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpedia(); + String namespace = "http://dbpedia.org/ontology/"; + OWLOntology schema; + + static final String solrServerURL = "http://solr.aksw.org/en_dbpedia_resources/"; + static final String searchField = "comment"; + + String cacheDirectory = "cache/isle"; + String testFolder = "experiments/logs/"; + + private SPARQLReasoner reasoner; + private AutomaticNegativeExampleFinderSPARQL2 negativeExampleFinder; + + final int maxNrOfPositiveExamples = 20; + final int maxNrOfNegativeExamples = 50; + boolean posOnly = false; + int maxCBDDepth = 1; + + //learning algorithm settings + private int maxNrOfResults = 50; + private int maxExecutionTimeInSeconds = 20; + private boolean useNegation = false; + private boolean useAllConstructor = false; + + private RelevanceMetric relevanceMetric; + + File resultsFolder = new File("experiments/isle/result3/"); + + + public DBpediaExperiment() { + reasoner = new SPARQLReasoner(new SparqlEndpointKS(endpoint), cacheDirectory); + negativeExampleFinder = new AutomaticNegativeExampleFinderSPARQL2(endpoint); + KnowledgebaseSampleGenerator.maxCBDDepth = maxCBDDepth; + RelevanceMapGenerator.cacheDirectory = "experiments/relevance/"; + + loadSchema(); + + relevanceMetric = new PMIRelevanceMetric(getSyntacticIndex()); + + resultsFolder.mkdirs(); + } + + public void run(){ + ExecutorService es = Executors.newFixedThreadPool(6); + Set<NamedClass> classes = getClasses(); + classes = reasoner.getMostSpecificClasses(); + List<NamedClass> classList = new ArrayList<>(classes); + Collections.reverse(classList); + + for (NamedClass cls : classList) { + try { + File resultsFile = new File(resultsFolder, URLEncoder.encode(cls.getName(), "UTF-8")); + if(resultsFile.exists()){ + continue; + } + } catch (UnsupportedEncodingException e1) { + e1.printStackTrace(); + } + try { + run(cls); + } catch (Exception e) { + logger.error("Error when learning class " + cls, e); + } + } + } + + public void run(NamedClass cls){ + logger.info("Learning description of class " + cls); + //get some positive examples + SortedSet<Individual> positiveExamples = getPositiveExamples(cls); + + //we can stop if there are no positive examples + if(positiveExamples.isEmpty()){ + logger.info("Empty class."); + return; + } + + //get some negative examples + SortedSet<Individual> negativeExamples = getNegativeExamples(cls, positiveExamples); + + //generate a sample of the knowledge base based on the examples + OWLOntology knowledgebaseSample = loadKnowledgebaseSample(Sets.union(positiveExamples, negativeExamples)); + + //set up the learning + try { + // set KB + KnowledgeSource ks = new OWLAPIOntology(knowledgebaseSample); + + // set reasoner + FastInstanceChecker reasoner = new FastInstanceChecker(ks); + reasoner.init(); + reasoner.setPrefixes(PrefixCCMap.getInstance()); + reasoner.setBaseURI("http://dbpedia.org/ontology/"); + + // set learning problem + AbstractLearningProblem lp; + if(posOnly){ + lp = new PosOnlyLP(reasoner); + ((PosOnlyLP)lp).setPositiveExamples(positiveExamples); + } else { +// lp = new ClassLearningProblem(reasoner); +// ((ClassLearningProblem)lp).setClassToDescribe(cls); +// ((ClassLearningProblem)lp).setEquivalence(true); + lp = new PosNegLPStandard(reasoner, positiveExamples, negativeExamples); + } + lp.init(); + + + RhoDRDown rop = new RhoDRDown(); + rop.setReasoner(reasoner); + rop.setUseNegation(useNegation); + rop.setUseAllConstructor(useAllConstructor); + rop.init(); + + // + Map<Entity, Double> entityRelevance = RelevanceMapGenerator.generateRelevanceMap(cls, schema, relevanceMetric, true); + + //get the start class for the learning algorithms +// Description startClass = getStartClass(cls, equivalence, true); + + // 1. run basic ISLE + CELOE la = new CELOE(lp, reasoner); + la.setMaxNrOfResults(maxNrOfResults); + la.setOperator(rop); + la.setMaxExecutionTimeInSeconds(maxExecutionTimeInSeconds); +// isle.setStartClass(startClass); + new File(testFolder).mkdirs(); + la.setSearchTreeFile(testFolder + "searchTreeISLE.txt"); + la.setWriteSearchTree(true); +// isle.setTerminateOnNoiseReached(true); + la.setIgnoredConcepts(Collections.singleton(cls)); + la.setReplaceSearchTree(true); + la.setMaxExecutionTimeInSeconds(maxExecutionTimeInSeconds); + la.setExpandAccuracy100Nodes(true); + la.init(); + la.start(); + int current = 1; + StringBuilder sb = new StringBuilder(); + for(EvaluatedDescription ed : la.getCurrentlyBestEvaluatedDescriptions().descendingSet()) { + if(lp instanceof PosNegLPStandard) { + sb.append(current + ": " + ed.getDescription().toManchesterSyntaxString(reasoner.getBaseURI(), reasoner.getPrefixes()) + "," + + ((PosNegLPStandard)lp).getPredAccuracyOrTooWeakExact(ed.getDescription(),1) + "," + + ((PosNegLPStandard)lp).getFMeasureOrTooWeakExact(ed.getDescription(),1)); + } + sb.append(",").append(getRelevanceScore(ed.getDescription(), entityRelevance)); + sb.append("\n"); + + current++; + } + try { + Files.write(sb.toString(), new File(resultsFolder, URLEncoder.encode(cls.getName(), "UTF-8")), Charsets.UTF_8); + } catch (IOException e) { + e.printStackTrace(); + } +// System.exit(0); + +// //2. run with syntactic index +// Map<Entity, Double> entityRelevance = RelevanceMapGenerator.generateRelevanceMap(cls, schema, relevanceMetric, true); +// NLPHeuristic heuristic = new NLPHeuristic(entityRelevance); +// la.setHeuristic(heuristic); +// la.init(); +// la.start(); +// +// //3. run with semantic index + } catch (ComponentInitException e) { + e.printStackTrace(); + } + } + + private double getRelevanceScore(Description desc, Map<Entity, Double> entityRelevance){ + Set<Entity> entities = desc.getSignature(); + double score = 0; + for (Entity entity : entities) { + double relevance = entityRelevance.containsKey(entity) ? entityRelevance.get(entity) : 0;//System.out.println(entity + ":" + relevance); + if(!Double.isInfinite(relevance)){ + score += relevance; + } + } + return score; + } + + private SortedSet<Individual> getPositiveExamples(NamedClass cls){ + logger.info("Generating positive examples..."); + SortedSet<Individual> individuals = reasoner.getIndividuals(cls, maxNrOfPositiveExamples); + logger.info("Done. Got " + individuals.size() + ": " + individuals); + return individuals; + } + + private SortedSet<Individual> getNegativeExamples(NamedClass classToDescribe, Set<Individual> positiveExamples){ + logger.info("Generating positive examples..."); + SortedSet<Individual> individuals = negativeExampleFinder.getNegativeExamples(classToDescribe, positiveExamples, Arrays.asList(SUPERCLASS, SIBLING), maxNrOfNegativeExamples); + logger.info("Done. Got " + individuals.size() + ": " + individuals); + return individuals; + } + + private void loadSchema(){ + logger.info("Loading schema..."); + try { + schema = OWLManager.createOWLOntologyManager().loadOntologyFromOntologyDocument(new File("src/test/resources/org/dllearner/algorithms/isle/dbpedia_3.9.owl")); + } catch (OWLOntologyCreationException e1) { + e1.printStackTrace(); + } + logger.info("Done. Number of logical axioms: " + schema.getLogicalAxiomCount()); + } + + private OWLOntology loadKnowledgebaseSample(Set<Individual> individuals){ + logger.info("Generating knowledge base sample..."); + Model sampleModel = KnowledgebaseSampleGenerator.createKnowledgebaseSample(endpoint, namespace, individuals); + sampleModel.setNsPrefix("dbo", "http://dbpedia.org/ontology/"); + logger.info("Done. Size: " + sampleModel.size() + " triples"); + cleanUp(sampleModel); + try { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + sampleModel.write(baos, "TURTLE", null); + OWLOntologyManager man = OWLManager.createOWLOntologyManager(); + OWLDataFactory df = man.getOWLDataFactory(); + OWLOntology ontology = man.loadOntologyFromOntologyDocument(new ByteArrayInputStream(baos.toByteArray())); + man.addAxioms(ontology, schema.getAxioms()); + man.removeAxioms(ontology, ontology.getAxioms(AxiomType.FUNCTIONAL_DATA_PROPERTY)); + man.removeAxioms(ontology, ontology.getAxioms(AxiomType.FUNCTIONAL_OBJECT_PROPERTY)); + man.removeAxioms(ontology, ontology.getAxioms(AxiomType.DATA_PROPERTY_RANGE)); + man.removeAxioms(ontology, ontology.getAxioms(AxiomType.SAME_INDIVIDUAL)); + man.removeAxiom(ontology, df.getOWLObjectPropertyDomainAxiom( + df.getOWLObjectProperty(IRI.create("http://dbpedia.org/ontology/mission")), + df.getOWLClass(IRI.create("http://dbpedia.org/ontology/Aircraft")))); + return ontology; + } catch (Exception e) { + e.printStackTrace(); + } + return null; + } + + private void cleanUp(Model model){ + // filter out triples with String literals, as therein often occur + // some syntax errors and they are not relevant for learning + List<Statement> statementsToRemove = new ArrayList<Statement>(); + for (Iterator<Statement> iter = model.listStatements().toList().iterator(); iter.hasNext();) { + Statement st = iter.next(); + RDFNode object = st.getObject(); + if (object.isLiteral()) { + // statementsToRemove.add(st); + Literal lit = object.asLiteral(); + if (lit.getDatatype() == null || lit.getDatatype().equals(XSD.xstring)) { + st.changeObject("shortened", "en"); + } else if (lit.getDatatype().getURI().equals(XSD.gYear.getURI())) { + model.add(model.createStatement(st.getSubject(), st.getPredicate(), model.createTypedLiteral(1111, XSDDatatype.XSDgYear))); + statementsToRemove.add(st); + } else if (lit.getDatatype().getURI().equals(XSD.gYearMonth.getURI())) { + statementsToRemove.add(st); + } + } + //remove statements like <x a owl:Class> + if (st.getPredicate().equals(RDF.type)) { + if (object.equals(RDFS.Class.asNode()) || object.equals(OWL.Class.asNode()) || object.equals(RDFS.Literal.asNode()) + || object.equals(RDFS.Resource)) { + statementsToRemove.add(st); + } + } + + //remove unwanted properties + String dbo = "http://dbpedia.org/ontology/"; + Set<String> blackList = Sets.newHashSet(dbo + "wikiPageDisambiguates",dbo + "wikiPageExternalLink", + dbo + "wikiPageID", dbo + "wikiPageInterLanguageLink", dbo + "wikiPageRedirects", dbo + "wikiPageRevisionID", + dbo + "wikiPageWikiLink"); + for(String bl: blackList){ + if (st.getPredicate().getURI().equals(bl)) { + statementsToRemove.add(st); + } + } + } + + model.remove(statementsToRemove); + + statementsToRemove = new ArrayList<Statement>(); + for (Iterator<Statement> iter = model.listStatements().toList().iterator(); iter.hasNext();) { + Statement st = iter.next(); + Property predicate = st.getPredicate(); + if (predicate.equals(RDF.type)) { + Resource object = st.getObject().asResource(); + if (!object.getURI().startsWith(namespace) && !object.getURI().startsWith(OWL.NS)) { + statementsToRemove.add(st); + } else if (object.equals(OWL.FunctionalProperty.asNode())) { + statementsToRemove.add(st); + } + } else if (!predicate.equals(RDFS.subClassOf) && !predicate.equals(OWL.sameAs) && !predicate.asResource().getURI().startsWith(namespace)) { + statementsToRemove.add(st); + } + } + model.remove(statementsToRemove); + } + + private Index getSyntacticIndex(){ + return new SolrSyntacticIndex(schema, solrServerURL, searchField); + } + + private Index getSemanticIndex(){ + return null; + } + + /** + * Get the classes on which the experiment is applied. + * @return + */ + private Set<NamedClass> getClasses(){ + Set<NamedClass> classes = new HashSet<NamedClass>(); + + for(OWLClass cls : schema.getClassesInSignature()){ + classes.add(new NamedClass(cls.toStringID())); + } + + return classes; + } + + public static void main(String[] args) throws Exception { +// ToStringRenderer.getInstance().setRenderer(new DLSyntaxObjectRenderer()); +// String cls = "http://dbpedia.org/ontology/Astronaut"; +// OWLDataFactory df = new OWLDataFactoryImpl(); +// OWLAxiom pattern = df.getOWLSubClassOfAxiom( +// df.getOWLClass(IRI.create("http://dllearner.org/pattern/A")), +// df.getOWLObjectIntersectionOf( +// df.getOWLClass(IRI.create("http://dllearner.org/pattern/B")), +// df.getOWLObjectSomeValuesFrom( +// df.getOWLObjectProperty(IRI.create("http://dllearner.org/pattern/p")), +// df.getOWLClass(IRI.create("http://dllearner.org/pattern/C"))))); +// PatternBasedAxiomLearningAlgorithm la = new PatternBasedAxiomLearningAlgorithm(new SparqlEndpointKS(SparqlEndpoint.getEndpointDBpedia()), "cache", FragmentExtractionStrategy.INDIVIDUALS); +// la.setClass(new NamedClass(cls)); +// la.setPattern(DLLearnerAxiomConvertVisitor.getDLLearnerAxiom(pattern)); +// la.start(); + + +// new DBpediaExperiment().run(); + new DBpediaExperiment().run(new NamedClass("http://dbpedia.org/ontology/Sales")); + } +} Modified: trunk/components-core/src/test/java/org/dllearner/algorithms/isle/DBpediaPlainExperiment.java =================================================================== --- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/DBpediaPlainExperiment.java 2014-01-21 12:49:22 UTC (rev 4212) +++ trunk/components-core/src/test/java/org/dllearner/algorithms/isle/DBpediaPlainExperiment.java 2014-01-21 12:49:51 UTC (rev 4213) @@ -3,29 +3,25 @@ */ package org.dllearner.algorithms.isle; -import java.io.BufferedInputStream; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.net.MalformedURLException; -import java.net.URL; +import java.io.File; import java.util.ArrayList; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Set; -import org.apache.commons.compress.compressors.CompressorException; -import org.apache.commons.compress.compressors.CompressorInputStream; -import org.apache.commons.compress.compressors.CompressorStreamFactory; import org.dllearner.algorithms.isle.index.Index; -import o... [truncated message content] |