From: <lor...@us...> - 2013-07-08 13:51:34
|
Revision: 4015 http://sourceforge.net/p/dl-learner/code/4015 Author: lorenz_b Date: 2013-07-08 13:51:31 +0000 (Mon, 08 Jul 2013) Log Message: ----------- First refactoring of ISLE algorithm. Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/celoe/OEHeuristicRuntime.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/ISLE.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneSearcher.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/NLPHeuristic.java trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java trunk/components-core/src/main/java/org/dllearner/utilities/owl/OWLAPIConverter.java Added Paths: ----------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LabelEntityTextRetriever.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneBasedRelevance.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/OWLOntologyLuceneIndex.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/PMILuceneBasedRelevance.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/PMIRelevance.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/PMIRelevances.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/Relevance.java trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java Removed Paths: ------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/Relevances.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/celoe/OEHeuristicRuntime.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/celoe/OEHeuristicRuntime.java 2013-07-08 13:49:40 UTC (rev 4014) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/celoe/OEHeuristicRuntime.java 2013-07-08 13:51:31 UTC (rev 4015) @@ -25,7 +25,6 @@ import org.dllearner.core.ComponentAnn; import org.dllearner.core.ComponentInitException; import org.dllearner.core.config.ConfigOption; -import org.dllearner.core.config.DoubleEditor; import org.dllearner.utilities.owl.ConceptComparator; /** Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/ISLE.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/ISLE.java 2013-07-08 13:49:40 UTC (rev 4014) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/ISLE.java 2013-07-08 13:51:31 UTC (rev 4015) @@ -19,6 +19,7 @@ package org.dllearner.algorithms.isle; +import java.io.File; import java.text.DecimalFormat; import java.util.Collection; import java.util.Iterator; @@ -35,11 +36,10 @@ import org.dllearner.core.AbstractCELA; import org.dllearner.core.AbstractLearningProblem; import org.dllearner.core.AbstractReasonerComponent; +import org.dllearner.core.ComponentAnn; import org.dllearner.core.ComponentInitException; import org.dllearner.core.EvaluatedDescription; -import org.dllearner.core.options.BooleanConfigOption; -import org.dllearner.core.options.CommonConfigOptions; -import org.dllearner.core.options.ConfigOption; +import org.dllearner.core.config.ConfigOption; import org.dllearner.core.owl.ClassHierarchy; import org.dllearner.core.owl.Description; import org.dllearner.core.owl.Individual; @@ -51,16 +51,20 @@ import org.dllearner.learningproblems.PosNegLP; import org.dllearner.learningproblems.PosNegLPStandard; import org.dllearner.learningproblems.PosOnlyLP; +import org.dllearner.refinementoperators.CustomHierarchyRefinementOperator; +import org.dllearner.refinementoperators.CustomStartRefinementOperator; import org.dllearner.refinementoperators.LengthLimitedRefinementOperator; import org.dllearner.refinementoperators.OperatorInverter; -import org.dllearner.refinementoperators.RefinementOperator; +import org.dllearner.refinementoperators.ReasoningBasedRefinementOperator; import org.dllearner.refinementoperators.RhoDRDown; +import org.dllearner.utilities.Files; import org.dllearner.utilities.Helper; import org.dllearner.utilities.owl.ConceptComparator; import org.dllearner.utilities.owl.ConceptTransformation; import org.dllearner.utilities.owl.DescriptionMinimizer; import org.dllearner.utilities.owl.EvaluatedDescriptionSet; import org.dllearner.utilities.owl.PropertyContext; +import org.springframework.beans.factory.annotation.Autowired; import com.jamonapi.Monitor; import com.jamonapi.MonitorFactory; @@ -71,9 +75,11 @@ * @author Jens Lehmann * */ +@ComponentAnn(name="ISLE", shortName="isle", version=0.5, description="CELOE is an adapted and extended version of the OCEL algorithm applied for the ontology engineering use case. See http://jens-lehmann.org/files/2011/celoe.pdf for reference.") public class ISLE extends AbstractCELA { private static Logger logger = Logger.getLogger(CELOE.class); +// private CELOEConfigurator configurator; private boolean isRunning = false; private boolean stop = false; @@ -83,13 +89,17 @@ private LengthLimitedRefinementOperator operator; private DescriptionMinimizer minimizer; + @ConfigOption(name="useMinimizer", defaultValue="true", description="Specifies whether returned expressions should be minimised by removing those parts, which are not needed. (Basically the minimiser tries to find the shortest expression which is equivalent to the learned expression). Turning this feature off may improve performance.") + private boolean useMinimizer = true; // all nodes in the search tree (used for selecting most promising node) private TreeSet<OENode> nodes; +// private OEHeuristicRuntime heuristic; // = new OEHeuristicRuntime(); private NLPHeuristic heuristic = new NLPHeuristic(); // root of search tree private OENode startNode; // the class with which we start the refinement process + @ConfigOption(name = "startClass", defaultValue="owl:Thing", description="You can specify a start class for the algorithm. To do this, you have to use Manchester OWL syntax without using prefixes.") private Description startClass; // all descriptions in the search tree plus those which were too weak (for fast redundancy check) @@ -99,6 +109,7 @@ // if true, then each solution is evaluated exactly instead of approximately // private boolean exactBestDescriptionEvaluation = false; + @ConfigOption(name = "singleSuggestionMode", defaultValue="false", description="Use this if you are interested in only one suggestion and your learning problem has many (more than 1000) examples.") private boolean singleSuggestionMode; private Description bestDescription; private double bestAccuracy = Double.MIN_VALUE; @@ -115,11 +126,16 @@ private long nanoStartTime; - // important parameters + // important parameters (non-config options but internal) private double noise; - private double maxDepth; - private boolean filterFollowsFromKB; + + private boolean filterFollowsFromKB; + // less important parameters + // forces that one solution cannot be subexpression of another expression; this option is useful to get diversity + // but it can also suppress quite useful expressions + private boolean forceMutualDifference = false; + // utility variables private String baseURI; private Map<String, String> prefixes; @@ -130,80 +146,165 @@ private int expressionTests = 0; private int minHorizExp = 0; private int maxHorizExp = 0; + + // TODO: turn those into config options + + // important: do not initialise those with empty sets + // null = no settings for allowance / ignorance + // empty set = allow / ignore nothing (it is often not desired to allow no class!) + Set<NamedClass> allowedConcepts = null; + Set<NamedClass> ignoredConcepts = null; - private double noisePercentage = 0.0; + @ConfigOption(name = "writeSearchTree", defaultValue="false", description="specifies whether to write a search tree") + private boolean writeSearchTree = false; + @ConfigOption(name = "searchTreeFile", defaultValue="log/searchTree.txt", description="file to use for the search tree") + private String searchTreeFile = "log/searchTree.txt"; + + @ConfigOption(name = "replaceSearchTree", defaultValue="false", description="specifies whether to replace the search tree in the log file after each run or append the new search tree") + private boolean replaceSearchTree = false; + + @ConfigOption(name = "maxNrOfResults", defaultValue="10", description="Sets the maximum number of results one is interested in. (Setting this to a lower value may increase performance as the learning algorithm has to store/evaluate/beautify less descriptions).") private int maxNrOfResults = 10; - private boolean filterDescriptionsFollowingFromKB = true; + @ConfigOption(name = "noisePercentage", defaultValue="0.0", description="the (approximated) percentage of noise within the examples") + private double noisePercentage = 0.0; - private long maxExecutionTimeInSeconds = 10; + @ConfigOption(name = "filterDescriptionsFollowingFromKB", defaultValue="false", description="If true, then the results will not contain suggestions, which already follow logically from the knowledge base. Be careful, since this requires a potentially expensive consistency check for candidate solutions.") + private boolean filterDescriptionsFollowingFromKB = false; + @ConfigOption(name = "reuseExistingDescription", defaultValue="false", description="If true, the algorithm tries to find a good starting point close to an existing definition/super class of the given class in the knowledge base.") private boolean reuseExistingDescription = false; + + @ConfigOption(name = "maxClassExpressionTests", defaultValue="0", description="The maximum number of candidate hypothesis the algorithm is allowed to test (0 = no limit). The algorithm will stop afterwards. (The real number of tests can be slightly higher, because this criterion usually won't be checked after each single test.)") + private int maxClassExpressionTests = 0; + + @ConfigOption(name = "maxClassExpressionTestsAfterImprovement", defaultValue="0", description = "The maximum number of candidate hypothesis the algorithm is allowed after an improvement in accuracy (0 = no limit). The algorithm will stop afterwards. (The real number of tests can be slightly higher, because this criterion usually won't be checked after each single test.)") + private int maxClassExpressionTestsAfterImprovement = 0; + @ConfigOption(defaultValue = "10", name = "maxExecutionTimeInSeconds", description = "maximum execution of the algorithm in seconds") + private int maxExecutionTimeInSeconds = 10; + + @ConfigOption(defaultValue = "0", name = "maxExecutionTimeInSecondsAfterImprovement", description = "maximum execution of the algorithm in seconds") + private int maxExecutionTimeInSecondsAfterImprovement = 0; + + @ConfigOption(name = "terminateOnNoiseReached", defaultValue="false", description="specifies whether to terminate when noise criterion is met") + private boolean terminateOnNoiseReached = false; + + @ConfigOption(name = "maxDepth", defaultValue="7", description="maximum depth of description") + private double maxDepth = 7; + + @ConfigOption(name = "stopOnFirstDefinition", defaultValue="false", description="algorithm will terminate immediately when a correct definition is found") + private boolean stopOnFirstDefinition = false; + + private int expressionTestCountLastImprovement; + + + @SuppressWarnings("unused") + private long timeLastImprovement = 0; + +// public CELOEConfigurator getConfigurator() { +// return configurator; +// } + + public ISLE() { + + } + public ISLE(AbstractLearningProblem problem, AbstractReasonerComponent reasoner) { super(problem, reasoner); +// configurator = new CELOEConfigurator(this); } public static Collection<Class<? extends AbstractLearningProblem>> supportedLearningProblems() { Collection<Class<? extends AbstractLearningProblem>> problems = new LinkedList<Class<? extends AbstractLearningProblem>>(); problems.add(AbstractLearningProblem.class); return problems; - } - - public static Collection<ConfigOption<?>> createConfigOptions() { - Collection<ConfigOption<?>> options = new LinkedList<ConfigOption<?>>(); - options.add(CommonConfigOptions.useAllConstructor()); - options.add(CommonConfigOptions.useExistsConstructor()); - options.add(CommonConfigOptions.useHasValueConstructor()); - options.add(CommonConfigOptions.useDataHasValueConstructor()); - options.add(CommonConfigOptions.valueFreqencyThreshold()); - options.add(CommonConfigOptions.useCardinalityRestrictions()); - options.add(CommonConfigOptions.cardinalityLimit()); - // by default, we do not use negation (should be configurable in GUI) - options.add(CommonConfigOptions.useNegation(false)); - options.add(CommonConfigOptions.useBooleanDatatypes()); - options.add(CommonConfigOptions.useDoubleDatatypes()); - options.add(CommonConfigOptions.maxExecutionTimeInSeconds(10)); - options.add(CommonConfigOptions.getNoisePercentage()); - options.add(CommonConfigOptions.getMaxDepth(7)); - options.add(CommonConfigOptions.maxNrOfResults(10)); - options.add(new BooleanConfigOption("singleSuggestionMode", "Use this if you are interested in only one suggestion and your learning problem has many (more than 1000) examples.", false)); - options.add(CommonConfigOptions.getInstanceBasedDisjoints()); - options.add(new BooleanConfigOption("filterDescriptionsFollowingFromKB", "If true, then the results will not contain suggestions, which already follow logically from the knowledge base. Be careful, since this requires a potentially expensive consistency check for candidate solutions.", false)); - options.add(new BooleanConfigOption("reuseExistingDescription", "If true, the algorithm tries to find a good starting point close to an existing definition/super class of the given class in the knowledge base.", false)); - return options; } public static String getName() { - return "ISLE"; + return "CELOE"; } @Override public void init() throws ComponentInitException { + + if(maxExecutionTimeInSeconds != 0 && maxExecutionTimeInSecondsAfterImprovement != 0) { + maxExecutionTimeInSeconds = Math.min(maxExecutionTimeInSeconds, maxExecutionTimeInSecondsAfterImprovement); + } + + // compute used concepts/roles from allowed/ignored + // concepts/roles + Set<NamedClass> usedConcepts; +// Set<NamedClass> allowedConcepts = configurator.getAllowedConcepts()==null ? null : CommonConfigMappings.getAtomicConceptSet(configurator.getAllowedConcepts()); +// Set<NamedClass> ignoredConcepts = configurator.getIgnoredConcepts()==null ? null : CommonConfigMappings.getAtomicConceptSet(configurator.getIgnoredConcepts()); + if(allowedConcepts != null) { + // sanity check to control if no non-existing concepts are in the list + Helper.checkConcepts(reasoner, allowedConcepts); + usedConcepts = allowedConcepts; + } else if(ignoredConcepts != null) { + usedConcepts = Helper.computeConceptsUsingIgnoreList(reasoner, ignoredConcepts); + } else { + usedConcepts = Helper.computeConcepts(reasoner); + } + // copy class hierarchy and modify it such that each class is only // reachable via a single path - ClassHierarchy classHierarchy = reasoner.getClassHierarchy().clone(); +// ClassHierarchy classHierarchy = reasoner.getClassHierarchy().clone(); + ClassHierarchy classHierarchy = reasoner.getClassHierarchy().cloneAndRestrict(usedConcepts); classHierarchy.thinOutSubsumptionHierarchy(); + + // if no one injected a heuristic, we use a default one + if(heuristic == null) { + heuristic = new NLPHeuristic(); + } minimizer = new DescriptionMinimizer(reasoner); - startClass = Thing.instance; + // start at owl:Thing by default + if(startClass == null) { + startClass = Thing.instance; + } // singleSuggestionMode = configurator.getSingleSuggestionMode(); - + /* // create refinement operator -// operator = new RhoDRDown(reasoner, classHierarchy, startClass, configurator); - // create refinement operator if(operator == null) { operator = new RhoDRDown(); ((RhoDRDown)operator).setStartClass(startClass); - ((RhoDRDown)operator).setSubHierarchy(classHierarchy); ((RhoDRDown)operator).setReasoner(reasoner); - ((RhoDRDown)operator).init(); - } + } + ((RhoDRDown)operator).setSubHierarchy(classHierarchy); + ((RhoDRDown)operator).setObjectPropertyHierarchy(reasoner.getObjectPropertyHierarchy()); + ((RhoDRDown)operator).setDataPropertyHierarchy(reasoner.getDatatypePropertyHierarchy()); + ((RhoDRDown)operator).init(); + */ + // create a refinement operator and pass all configuration + // variables to it + if(operator == null) { + // we use a default operator and inject the class hierarchy for now + operator = new RhoDRDown(); + if(operator instanceof CustomStartRefinementOperator) { + ((CustomStartRefinementOperator)operator).setStartClass(startClass); + } + if(operator instanceof ReasoningBasedRefinementOperator) { + ((ReasoningBasedRefinementOperator)operator).setReasoner(reasoner); + } + operator.init(); + } + if(operator instanceof CustomHierarchyRefinementOperator) { + ((CustomHierarchyRefinementOperator)operator).setClassHierarchy(classHierarchy); + ((CustomHierarchyRefinementOperator)operator).setObjectPropertyHierarchy(reasoner.getObjectPropertyHierarchy()); + ((CustomHierarchyRefinementOperator)operator).setDataPropertyHierarchy(reasoner.getDatatypePropertyHierarchy()); + } + +// operator = new RhoDRDown(reasoner, classHierarchy, startClass, configurator); baseURI = reasoner.getBaseURI(); prefixes = reasoner.getPrefixes(); + if(writeSearchTree) { + File f = new File(searchTreeFile ); + Files.clearFile(f); + } bestEvaluatedDescriptions = new EvaluatedDescriptionSet(maxNrOfResults); @@ -211,12 +312,18 @@ // we put important parameters in class variables noise = noisePercentage/100d; +// System.out.println("noise " + noise); // maxDepth = configurator.getMaxDepth(); // (filterFollowsFromKB is automatically set to false if the problem // is not a class learning problem - filterFollowsFromKB = filterDescriptionsFollowingFromKB - && isClassLearningProblem; + filterFollowsFromKB = filterDescriptionsFollowingFromKB && isClassLearningProblem; +// Set<Description> concepts = operator.refine(Thing.instance, 5); +// for(Description concept : concepts) { +// System.out.println(concept); +// } +// System.out.println("refinements of thing: " + concepts.size()); + // actions specific to ontology engineering if(isClassLearningProblem) { ClassLearningProblem problem = (ClassLearningProblem) learningProblem; @@ -230,7 +337,7 @@ // superfluous to add super classes in this case) if(isEquivalenceProblem) { Set<Description> existingDefinitions = reasoner.getAssertedDefinitions(classToDescribe); - if(reuseExistingDescription && (existingDefinitions.size() > 0)) { + if(reuseExistingDescription && (existingDefinitions.size() > 0)) { // the existing definition is reused, which in the simplest case means to // use it as a start class or, if it is already too specific, generalise it @@ -246,7 +353,10 @@ LinkedList<Description> startClassCandidates = new LinkedList<Description>(); startClassCandidates.add(existingDefinition); - ((RhoDRDown)operator).setDropDisjuncts(true); + // hack for RhoDRDown + if(operator instanceof RhoDRDown) { + ((RhoDRDown)operator).setDropDisjuncts(true); + } LengthLimitedRefinementOperator upwardOperator = (LengthLimitedRefinementOperator) new OperatorInverter(operator); // use upward refinement until we find an appropriate start class @@ -279,7 +389,9 @@ // System.out.println("existing def: " + existingDefinition); // System.out.println(reasoner.getIndividuals(existingDefinition)); - ((RhoDRDown)operator).setDropDisjuncts(false); + if(operator instanceof RhoDRDown) { + ((RhoDRDown)operator).setDropDisjuncts(false); + } } else { Set<Description> superClasses = reasoner.getClassHierarchy().getSuperClasses(classToDescribe); @@ -322,6 +434,10 @@ return bestEvaluatedDescriptions.getSet(); } + public double getCurrentlyBestAccuracy() { + return bestEvaluatedDescriptions.getBest().getAccuracy(); + } + @Override public void start() { // System.out.println(configurator.getMaxExecutionTimeInSeconds()); @@ -339,10 +455,13 @@ int loop = 0; while (!terminationCriteriaSatisfied()) { +// System.out.println("loop " + loop); if(!singleSuggestionMode && bestEvaluatedDescriptions.getBestAccuracy() > highestAccuracy) { highestAccuracy = bestEvaluatedDescriptions.getBestAccuracy(); - logger.info("more accurate (" + dfPercent.format(highestAccuracy) + ") class expression found: " + descriptionToString(bestEvaluatedDescriptions.getBest().getDescription())); + expressionTestCountLastImprovement = expressionTests; + timeLastImprovement = System.nanoTime(); + logger.info("more accurate (" + dfPercent.format(highestAccuracy) + ") class expression found: " + descriptionToString(bestEvaluatedDescriptions.getBest().getDescription())); } // chose best node according to heuristics @@ -358,12 +477,16 @@ // for(Description refinement : refinements) { // System.out.println("refinement: " + refinement); // } +// if((loop+1) % 500 == 0) { +// System.out.println(getMinimumHorizontalExpansion() + " - " + getMaximumHorizontalExpansion()); +// System.exit(0); +// } while(refinements.size() != 0) { // pick element from set Description refinement = refinements.pollFirst(); int length = refinement.getLength(); - + // we ignore all refinements with lower length and too high depth // (this also avoids duplicate node children) if(length > horizExp && refinement.getDepth() <= maxDepth) { @@ -385,6 +508,24 @@ updateMinMaxHorizExp(nextNode); + // writing the search tree (if configured) + if (writeSearchTree) { + String treeString = "best node: " + bestEvaluatedDescriptions.getBest() + "\n"; + if (refinements.size() > 1) { + treeString += "all expanded nodes:\n"; + for (Description n : refinements) { + treeString += " " + n + "\n"; + } + } + treeString += startNode.toTreeString(baseURI); + treeString += "\n"; + + if (replaceSearchTree) + Files.createFile(new File(searchTreeFile), treeString); + else + Files.appendToFile(new File(searchTreeFile), treeString); + } + // System.out.println(loop); loop++; } @@ -392,7 +533,7 @@ if (stop) { logger.info("Algorithm stopped ("+expressionTests+" descriptions tested). " + nodes.size() + " nodes in the search tree.\n"); } else { - logger.info("Algorithm terminated successfully ("+expressionTests+" descriptions tested). " + nodes.size() + " nodes in the search tree.\n"); + logger.info("Algorithm terminated successfully (time: " + Helper.prettyPrintNanoSeconds(System.nanoTime()-nanoStartTime) + ", "+expressionTests+" descriptions tested, " + nodes.size() + " nodes in the search tree).\n"); logger.info(reasoner.toString()); } @@ -445,7 +586,7 @@ // returns true if node was added and false otherwise private boolean addNode(Description description, OENode parentNode) { -// System.out.println(description); +// System.out.println("d: " + description); // redundancy check (return if redundant) boolean nonRedundant = descriptions.add(description); @@ -498,6 +639,8 @@ return true; } +// System.out.println("description " + description + " accuracy " + accuracy); + // maybe add to best descriptions (method keeps set size fixed); // we need to make sure that this does not get called more often than // necessary since rewriting is expensive @@ -510,30 +653,42 @@ (accuracy >= accThreshold && description.getLength() < worst.getDescriptionLength())); } +// System.out.println(isCandidate); + // System.out.println("Test4 " + new Date()); if(isCandidate) { + Description niceDescription = rewriteNode(node); ConceptTransformation.transformToOrderedForm(niceDescription, descriptionComparator); // Description niceDescription = node.getDescription(); // another test: none of the other suggested descriptions should be // a subdescription of this one unless accuracy is different + // => comment: on the one hand, this appears to be too strict, because once A is a solution then everything containing + // A is not a candidate; on the other hand this suppresses many meaningless extensions of A boolean shorterDescriptionExists = false; - for(EvaluatedDescription ed : bestEvaluatedDescriptions.getSet()) { - if(Math.abs(ed.getAccuracy()-accuracy) <= 0.00001 && ConceptTransformation.isSubdescription(niceDescription, ed.getDescription())) { - shorterDescriptionExists = true; - break; - } + if(forceMutualDifference) { + for(EvaluatedDescription ed : bestEvaluatedDescriptions.getSet()) { + if(Math.abs(ed.getAccuracy()-accuracy) <= 0.00001 && ConceptTransformation.isSubdescription(niceDescription, ed.getDescription())) { +// System.out.println("shorter: " + ed.getDescription()); + shorterDescriptionExists = true; + break; + } + } } +// System.out.println("shorter description? " + shorterDescriptionExists + " nice: " + niceDescription); + if(!shorterDescriptionExists) { if(!filterFollowsFromKB || !((ClassLearningProblem)learningProblem).followsFromKB(niceDescription)) { +// System.out.println("Test2"); bestEvaluatedDescriptions.add(niceDescription, accuracy, learningProblem); // System.out.println("acc: " + accuracy); // System.out.println(bestEvaluatedDescriptions); } } +// System.out.println(bestEvaluatedDescriptions.getSet().size()); } // System.out.println("Test5 " + new Date()); @@ -630,14 +785,26 @@ private Description rewriteNode(OENode node) { Description description = node.getDescription(); // minimize description (expensive!) - also performes some human friendly rewrites - Description niceDescription = minimizer.minimizeClone(description); + Description niceDescription; + if(useMinimizer) { + niceDescription = minimizer.minimizeClone(description); + } else { + niceDescription = description; + } // replace \exists r.\top with \exists r.range(r) which is easier to read for humans ConceptTransformation.replaceRange(niceDescription, reasoner); return niceDescription; } private boolean terminationCriteriaSatisfied() { - return stop || ((System.nanoTime() - nanoStartTime) >= (maxExecutionTimeInSeconds*1000000000l)); + return + stop || + (maxClassExpressionTestsAfterImprovement != 0 && (expressionTests - expressionTestCountLastImprovement >= maxClassExpressionTestsAfterImprovement)) || + (maxClassExpressionTests != 0 && (expressionTests >= maxClassExpressionTests)) || + (maxExecutionTimeInSecondsAfterImprovement != 0 && ((System.nanoTime() - nanoStartTime) >= (maxExecutionTimeInSecondsAfterImprovement*1000000000l))) || + (maxExecutionTimeInSeconds != 0 && ((System.nanoTime() - nanoStartTime) >= (maxExecutionTimeInSeconds*1000000000l))) || + (terminateOnNoiseReached && (100*getCurrentlyBestAccuracy()>=100-noisePercentage)) || + (stopOnFirstDefinition && (getCurrentlyBestAccuracy() >= 1)); } private void reset() { @@ -740,6 +907,196 @@ */ public int getClassExpressionTests() { return expressionTests; + } + + public LengthLimitedRefinementOperator getOperator() { + return operator; + } + + @Autowired(required=false) + public void setOperator(LengthLimitedRefinementOperator operator) { + this.operator = operator; + } + + public Description getStartClass() { + return startClass; + } + + public void setStartClass(Description startClass) { + this.startClass = startClass; + } + + public Set<NamedClass> getAllowedConcepts() { + return allowedConcepts; + } + + public void setAllowedConcepts(Set<NamedClass> allowedConcepts) { + this.allowedConcepts = allowedConcepts; + } + + public Set<NamedClass> getIgnoredConcepts() { + return ignoredConcepts; + } + + public void setIgnoredConcepts(Set<NamedClass> ignoredConcepts) { + this.ignoredConcepts = ignoredConcepts; + } + + public boolean isWriteSearchTree() { + return writeSearchTree; + } + + public void setWriteSearchTree(boolean writeSearchTree) { + this.writeSearchTree = writeSearchTree; + } + + public String getSearchTreeFile() { + return searchTreeFile; + } + + public void setSearchTreeFile(String searchTreeFile) { + this.searchTreeFile = searchTreeFile; + } + + public int getMaxNrOfResults() { + return maxNrOfResults; + } + + public void setMaxNrOfResults(int maxNrOfResults) { + this.maxNrOfResults = maxNrOfResults; + } + + public double getNoisePercentage() { + return noisePercentage; + } + + public void setNoisePercentage(double noisePercentage) { + this.noisePercentage = noisePercentage; + } + + public boolean isFilterDescriptionsFollowingFromKB() { + return filterDescriptionsFollowingFromKB; + } + + public void setFilterDescriptionsFollowingFromKB(boolean filterDescriptionsFollowingFromKB) { + this.filterDescriptionsFollowingFromKB = filterDescriptionsFollowingFromKB; + } + + public boolean isReplaceSearchTree() { + return replaceSearchTree; + } + + public void setReplaceSearchTree(boolean replaceSearchTree) { + this.replaceSearchTree = replaceSearchTree; + } + + public int getMaxClassDescriptionTests() { + return maxClassExpressionTests; + } + + public void setMaxClassDescriptionTests(int maxClassDescriptionTests) { + this.maxClassExpressionTests = maxClassDescriptionTests; + } + + public int getMaxExecutionTimeInSeconds() { + return maxExecutionTimeInSeconds; + } + + public void setMaxExecutionTimeInSeconds(int maxExecutionTimeInSeconds) { + this.maxExecutionTimeInSeconds = maxExecutionTimeInSeconds; + } + + public boolean isTerminateOnNoiseReached() { + return terminateOnNoiseReached; + } + + public void setTerminateOnNoiseReached(boolean terminateOnNoiseReached) { + this.terminateOnNoiseReached = terminateOnNoiseReached; + } + + public boolean isReuseExistingDescription() { + return reuseExistingDescription; + } + + public void setReuseExistingDescription(boolean reuseExistingDescription) { + this.reuseExistingDescription = reuseExistingDescription; + } + + public boolean isUseMinimizer() { + return useMinimizer; + } + + public void setUseMinimizer(boolean useMinimizer) { + this.useMinimizer = useMinimizer; + } + + public NLPHeuristic getHeuristic() { + return heuristic; + } + + @Autowired(required=false) + public void setHeuristic(NLPHeuristic heuristic) { + this.heuristic = heuristic; + } + + public int getMaxClassExpressionTestsWithoutImprovement() { + return maxClassExpressionTestsAfterImprovement; + } + + public void setMaxClassExpressionTestsWithoutImprovement( + int maxClassExpressionTestsWithoutImprovement) { + this.maxClassExpressionTestsAfterImprovement = maxClassExpressionTestsWithoutImprovement; + } + + public int getMaxExecutionTimeInSecondsAfterImprovement() { + return maxExecutionTimeInSecondsAfterImprovement; + } + + public void setMaxExecutionTimeInSecondsAfterImprovement( + int maxExecutionTimeInSecondsAfterImprovement) { + this.maxExecutionTimeInSecondsAfterImprovement = maxExecutionTimeInSecondsAfterImprovement; } + public boolean isSingleSuggestionMode() { + return singleSuggestionMode; + } + + public void setSingleSuggestionMode(boolean singleSuggestionMode) { + this.singleSuggestionMode = singleSuggestionMode; + } + + public int getMaxClassExpressionTests() { + return maxClassExpressionTests; + } + + public void setMaxClassExpressionTests(int maxClassExpressionTests) { + this.maxClassExpressionTests = maxClassExpressionTests; + } + + public int getMaxClassExpressionTestsAfterImprovement() { + return maxClassExpressionTestsAfterImprovement; + } + + public void setMaxClassExpressionTestsAfterImprovement( + int maxClassExpressionTestsAfterImprovement) { + this.maxClassExpressionTestsAfterImprovement = maxClassExpressionTestsAfterImprovement; + } + + public double getMaxDepth() { + return maxDepth; + } + + public void setMaxDepth(double maxDepth) { + this.maxDepth = maxDepth; + } + + + public boolean isStopOnFirstDefinition() { + return stopOnFirstDefinition; + } + + public void setStopOnFirstDefinition(boolean stopOnFirstDefinition) { + this.stopOnFirstDefinition = stopOnFirstDefinition; + } + } Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LabelEntityTextRetriever.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LabelEntityTextRetriever.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LabelEntityTextRetriever.java 2013-07-08 13:51:31 UTC (rev 4015) @@ -0,0 +1,95 @@ +/** + * + */ +package org.dllearner.algorithms.isle; + +import java.util.HashMap; +import java.util.Map; +import java.util.Set; + +import org.dllearner.core.owl.Entity; +import org.dllearner.kb.OWLAPIOntology; +import org.dllearner.utilities.owl.OWLAPIConverter; +import org.semanticweb.owlapi.model.IRI; +import org.semanticweb.owlapi.model.OWLAnnotation; +import org.semanticweb.owlapi.model.OWLAnnotationProperty; +import org.semanticweb.owlapi.model.OWLDataFactory; +import org.semanticweb.owlapi.model.OWLEntity; +import org.semanticweb.owlapi.model.OWLLiteral; +import org.semanticweb.owlapi.model.OWLOntology; +import org.semanticweb.owlapi.model.OWLOntologyManager; +import org.semanticweb.owlapi.util.IRIShortFormProvider; +import org.semanticweb.owlapi.util.SimpleIRIShortFormProvider; +import org.semanticweb.owlapi.vocab.OWLRDFVocabulary; + +import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl; + + +/** + * @author Lorenz Buehmann + * + */ +public class LabelEntityTextRetriever implements EntityTextRetriever{ + + private OWLOntology ontology; + private OWLOntologyManager manager; + private OWLDataFactory df = new OWLDataFactoryImpl(); + + private OWLAnnotationProperty label = df.getOWLAnnotationProperty(OWLRDFVocabulary.RDFS_LABEL.getIRI()); + + private String language = "en"; + private double weight = 1d; + + private boolean useShortFormFallback = true; + private IRIShortFormProvider sfp = new SimpleIRIShortFormProvider(); + + public LabelEntityTextRetriever(OWLOntology ontology) { + this.ontology = ontology; + } + + public LabelEntityTextRetriever(OWLAPIOntology ontology) { + this.ontology = ontology.createOWLOntology(manager); + } + + /** + * @param language the language to set + */ + public void setLanguage(String language) { + this.language = language; + } + + /** + * Whether to use the short form of the IRI as fallback, if no label is given. + * @param useShortFormFallback the useShortFormFallback to set + */ + public void setUseShortFormFallback(boolean useShortFormFallback) { + this.useShortFormFallback = useShortFormFallback; + } + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.EntityTextRetriever#getRelevantText(org.dllearner.core.owl.Entity) + */ + @Override + public Map<String, Double> getRelevantText(Entity entity) { + Map<String, Double> textWithWeight = new HashMap<String, Double>(); + + OWLEntity e = OWLAPIConverter.getOWLAPIEntity(entity); + + Set<OWLAnnotation> annotations = e.getAnnotations(ontology, label); + for (OWLAnnotation annotation : annotations) { + if (annotation.getValue() instanceof OWLLiteral) { + OWLLiteral val = (OWLLiteral) annotation.getValue(); + if (val.hasLang(language)) { + String label = val.getLiteral(); + textWithWeight.put(label, weight); + } + } + } + + if(textWithWeight.isEmpty() && useShortFormFallback){ + textWithWeight.put(sfp.getShortForm(IRI.create(entity.getURI())), weight); + } + + return textWithWeight; + } +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneBasedRelevance.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneBasedRelevance.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneBasedRelevance.java 2013-07-08 13:51:31 UTC (rev 4015) @@ -0,0 +1,145 @@ +/** + * Copyright (C) 2007-2011, Jens Lehmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + + +package org.dllearner.algorithms.isle; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; + +import org.dllearner.core.owl.Entity; +import org.dllearner.utilities.owl.OWLAPIConverter; +import org.semanticweb.owlapi.model.OWLEntity; +import org.semanticweb.owlapi.model.OWLOntology; + + +public abstract class LuceneBasedRelevance implements Relevance{ + + private EntityTextRetriever textRetriever; + private LuceneSearcher searcher; + private OWLOntology ontology; + private Set<OWLEntity> entities; + +// public void printScores() throws Exception { +// for( OWLClass c: m_classes ) +// { +// Map<OWLEntity,Double> hmEntity2Score = getEntityRelevance(c); +// // normalization per class? +// hmEntity2Score = normalize( hmEntity2Score ); +// for( OWLEntity e : hmEntity2Score.keySet() ) +// { +// double dScore = hmEntity2Score.get(e); +// System.out.println( "P( "+ getLabel(c) +", "+ getLabel(e) +" ) = "+ dScore ); +// } +// } +// m_searcher.close(); +// } + + public LuceneBasedRelevance(OWLOntology ontology, LuceneSearcher searcher, EntityTextRetriever textRetriever) { + this.searcher = searcher; + this.ontology = ontology; + this.textRetriever = textRetriever; + + entities = new HashSet<OWLEntity>(); + entities.addAll(ontology.getClassesInSignature()); + entities.addAll(ontology.getObjectPropertiesInSignature()); + entities.addAll(ontology.getDataPropertiesInSignature()); + } + + public Map<OWLEntity,Double> normalizeMinMax( Map<OWLEntity,Double> hmEntity2Score ){ + Map<OWLEntity,Double> hmEntity2Norm = new HashMap<OWLEntity,Double>(); + double dMin = Double.MAX_VALUE; + Double dMax = Double.MIN_VALUE; + for( OWLEntity e : hmEntity2Score.keySet() ) + { + double dValue = hmEntity2Score.get(e); + if( dValue < dMin ){ + dMin = dValue; + } + else if( dValue > dMax ){ + dMax = dValue; + } + } + // System.out.println( "min="+ dMin +" max="+ dMax ); + for( OWLEntity e : hmEntity2Score.keySet() ) + { + double dValue = hmEntity2Score.get(e); + double dNorm = 0; + if( dMin == dMax ){ + dNorm = dValue; + } + else { + dNorm = ( dValue - dMin ) / ( dMax - dMin ); + } + hmEntity2Norm.put( e, dNorm ); + } + return hmEntity2Norm; + } + + @Override + public Map<Entity,Double> getEntityRelevance(Entity entity) throws Exception { + // computes relevance of entity for this class + // conditional probability: P(C,E)=f(C,E)/f(E) + // PMI(C,E)=log( P(C,E) / P(C) ) + Map<Entity, Double> hmEntity2Score = new HashMap<Entity, Double>(); + Map<String, Double> relevantText = textRetriever.getRelevantText(entity); + + for (Entry<String, Double> entry : relevantText.entrySet()) { + String text = entry.getKey(); + Double value = entry.getValue(); + + String sClass = text; + int nrOfDocumentsA = searcher.count(sClass); + int nrOfDocuments = searcher.indexSize(); + + for (OWLEntity otherEntity : entities) { + + Map<String, Double> otherRelevantText = textRetriever.getRelevantText(OWLAPIConverter + .getEntity(otherEntity)); + + for (Entry<String, Double> entry2 : otherRelevantText.entrySet()) { + String otherText = entry2.getKey(); + Double otherValue = entry2.getValue(); + + String sEntity = otherText; + int nrOfDocumentsB = searcher.count(sEntity); + int nrOfDocumentsAB = searcher.count(sClass + " AND " + sEntity); + // double dPEntity = (double)iEntity / (double)iAll; + + double score = computeScore(nrOfDocuments, nrOfDocumentsA, nrOfDocumentsB, nrOfDocumentsAB); + if (!Double.isNaN(score)){// && !Double.isInfinite(score)) { + hmEntity2Score.put(OWLAPIConverter.getEntity(otherEntity), score); + } + } + } + } + + return hmEntity2Score; + } + + /** + * Computes the score which is returned in {@link org.dllearner.algorithms.isle.LuceneBasedRelevance#getEntityRelevance} + * @return + */ + public abstract double computeScore(int nrOfDocuments, int nrOfDocumentsA, int nrOfDocumentsB, int nrOfDocumentsAB); + +} \ No newline at end of file Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneSearcher.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneSearcher.java 2013-07-08 13:49:40 UTC (rev 4014) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneSearcher.java 2013-07-08 13:51:31 UTC (rev 4015) @@ -40,13 +40,14 @@ import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.Scorer; +import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; public class LuceneSearcher { - private String INDEX = "index"; - private String FIELD = "contents"; + private String INDEX = "/home/me/DBpedia-Lucene-Index"; + private String FIELD = "short-abstract"; private IndexReader m_reader = null; private IndexSearcher m_searcher = null; @@ -61,12 +62,12 @@ LuceneSearcher searcher = new LuceneSearcher(); List<Document> docs = searcher.search( sQuery ); System.out.println( "\nquery='"+ sQuery +"' all="+ searcher.indexSize() +" hits="+ docs.size() ); - for( Document doc : docs ) - { -// String sDoc = doc.toString(); - float score = searcher.getScore( doc ); - System.out.println( "score="+ score +" doc="+ doc ); - } +// for( Document doc : docs ) +// { +//// String sDoc = doc.toString(); +// float score = searcher.getScore( doc ); +// System.out.println( "score="+ score +" doc="+ doc ); +// } } @SuppressWarnings("deprecation") @@ -77,6 +78,28 @@ m_parser = new QueryParser( Version.LUCENE_43, FIELD, m_analyzer ); } + public LuceneSearcher(IndexReader indexReader) throws Exception { + m_reader = indexReader; + m_searcher = new IndexSearcher( m_reader ); + m_analyzer = new StandardAnalyzer( Version.LUCENE_43); + m_parser = new QueryParser( Version.LUCENE_43, FIELD, m_analyzer ); + } + + public LuceneSearcher(Directory directory, String seachField) throws Exception { + this.FIELD = seachField; + m_reader = DirectoryReader.open(directory); + m_searcher = new IndexSearcher( m_reader ); + m_analyzer = new StandardAnalyzer( Version.LUCENE_43); + m_parser = new QueryParser( Version.LUCENE_43, FIELD, m_analyzer ); + } + + public LuceneSearcher(String indexDirectory) throws Exception { + m_reader = DirectoryReader.open(FSDirectory.open(new File(indexDirectory))); + m_searcher = new IndexSearcher( m_reader ); + m_analyzer = new StandardAnalyzer( Version.LUCENE_43); + m_parser = new QueryParser( Version.LUCENE_43, FIELD, m_analyzer ); + } + public void close() throws Exception { m_reader.close(); } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/NLPHeuristic.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/NLPHeuristic.java 2013-07-08 13:49:40 UTC (rev 4014) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/NLPHeuristic.java 2013-07-08 13:51:31 UTC (rev 4015) @@ -20,8 +20,13 @@ package org.dllearner.algorithms.isle; import java.util.Comparator; +import java.util.Map; import org.dllearner.algorithms.celoe.OENode; +import org.dllearner.core.Component; +import org.dllearner.core.ComponentInitException; +import org.dllearner.core.config.ConfigOption; +import org.dllearner.core.owl.Entity; import org.dllearner.utilities.owl.ConceptComparator; /** @@ -31,7 +36,8 @@ * @author Jens Lehmann * */ -public class NLPHeuristic implements Comparator<OENode> { +public class NLPHeuristic implements Component, Comparator<OENode>{ + // strong penalty for long descriptions private double expansionPenaltyFactor = 0.1; // bonus for being better than parent node @@ -42,7 +48,23 @@ // syntactic comparison as final comparison criterion private ConceptComparator conceptComparator = new ConceptComparator(); + @ConfigOption(name = "startNodeBonus", defaultValue="0.1") + private double startNodeBonus = 0.1; + + private Map<Entity, Double> entityRelevance; + + public NLPHeuristic() {} + + public NLPHeuristic(Map<Entity,Double> entityRelevance) { + this.entityRelevance = entityRelevance; + } + @Override + public void init() throws ComponentInitException { + + } + + @Override public int compare(OENode node1, OENode node2) { // System.out.println("node1 " + node1); // System.out.println("score: " + getNodeScore(node1)); @@ -67,6 +89,9 @@ if(!node.isRoot()) { double parentAccuracy = node.getParent().getAccuracy(); score += (parentAccuracy - score) * gainBonusFactor; + // the root node also gets a bonus to possibly spawn useful disjunctions + } else { + score += startNodeBonus; } // penalty for horizontal expansion score -= node.getHorizontalExpansion() * expansionPenaltyFactor; @@ -77,6 +102,48 @@ public double getExpansionPenaltyFactor() { return expansionPenaltyFactor; + } + + public double getGainBonusFactor() { + return gainBonusFactor; + } + + public void setGainBonusFactor(double gainBonusFactor) { + this.gainBonusFactor = gainBonusFactor; + } + + public double getNodeRefinementPenalty() { + return nodeRefinementPenalty; + } + + public void setNodeRefinementPenalty(double nodeRefinementPenalty) { + this.nodeRefinementPenalty = nodeRefinementPenalty; + } + + public void setExpansionPenaltyFactor(double expansionPenaltyFactor) { + this.expansionPenaltyFactor = expansionPenaltyFactor; + } + + public double getStartNodeBonus() { + return startNodeBonus; + } + + public void setStartNodeBonus(double startNodeBonus) { + this.startNodeBonus = startNodeBonus; } + + /** + * @param entityRelevance the entityRelevance to set + */ + public void setEntityRelevance(Map<Entity, Double> entityRelevance) { + this.entityRelevance = entityRelevance; + } + + /** + * @return the entityRelevance + */ + public Map<Entity, Double> getEntityRelevance() { + return entityRelevance; + } } Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/OWLOntologyLuceneIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/OWLOntologyLuceneIndex.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/OWLOntologyLuceneIndex.java 2013-07-08 13:51:31 UTC (rev 4015) @@ -0,0 +1,141 @@ +/** + * + */ +package org.dllearner.algorithms.isle; + +import java.io.IOException; +import java.util.HashSet; +import java.util.Set; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.Version; +import org.semanticweb.owlapi.model.IRI; +import org.semanticweb.owlapi.model.OWLAnnotation; +import org.semanticweb.owlapi.model.OWLAnnotationProperty; +import org.semanticweb.owlapi.model.OWLDataFactory; +import org.semanticweb.owlapi.model.OWLEntity; +import org.semanticweb.owlapi.model.OWLLiteral; +import org.semanticweb.owlapi.model.OWLOntology; +import org.semanticweb.owlapi.vocab.OWLRDFVocabulary; + +import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl; + +/** + * @author Lorenz Buehmann + * + */ +public class OWLOntologyLuceneIndex { + + private Directory directory = new RAMDirectory(); + private OWLOntology ontology; + private Set<OWLEntity> schemaEntities; + + private OWLDataFactory df = new OWLDataFactoryImpl(); + private OWLAnnotationProperty annotationProperty = df.getOWLAnnotationProperty(OWLRDFVocabulary.RDFS_LABEL.getIRI()); + private String language = "en"; + private String searchField; + + public OWLOntologyLuceneIndex(OWLOntology ontology, String searchField) throws IOException { + this.ontology = ontology; + this.searchField = searchField; + + schemaEntities = new HashSet<OWLEntity>(); + schemaEntities.addAll(ontology.getClassesInSignature()); + schemaEntities.addAll(ontology.getObjectPropertiesInSignature()); + schemaEntities.addAll(ontology.getDataPropertiesInSignature()); + + buildIndex(); + } + + public OWLOntologyLuceneIndex(OWLOntology ontology, OWLAnnotationProperty annotationProperty) throws IOException { + this.ontology = ontology; + this.annotationProperty = annotationProperty; + + schemaEntities = new HashSet<OWLEntity>(); + schemaEntities.addAll(ontology.getClassesInSignature()); + schemaEntities.addAll(ontology.getObjectPropertiesInSignature()); + schemaEntities.addAll(ontology.getDataPropertiesInSignature()); + + buildIndex(); + } + + /** + * @return the ontology + */ + public OWLOntology getOntology() { + return ontology; + } + + /** + * @return the directory + */ + public Directory getDirectory() { + return directory; + } + + /** + * @param annotationProperty the annotationProperty to set + */ + public void setAnnotationProperty(OWLAnnotationProperty annotationProperty) { + this.annotationProperty = annotationProperty; + } + + /** + * @param annotationProperty the annotationProperty to set + */ + public void setAnnotationProperty(String annotationPropertyIRI) { + this.annotationProperty = df.getOWLAnnotationProperty(IRI.create(annotationPropertyIRI)); + } + + public void buildIndex() throws IOException{ + Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43); + IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_43, analyzer); + IndexWriter writer = new IndexWriter(directory, indexWriterConfig); + System.out.println( "Creating index ..." ); + + Set<Document> luceneDocuments = new HashSet<Document>(); + FieldType stringType = new FieldType(StringField.TYPE_STORED); + stringType.setStoreTermVectors(false); + FieldType textType = new FieldType(TextField.TYPE_STORED); + textType.setStoreTermVectors(false); + + for (OWLEntity entity : schemaEntities) { + String label = null; + Set<OWLAnnotation> annotations = entity.getAnnotations(ontology, annotationProperty); + for (OWLAnnotation annotation : annotations) { + if (annotation.getValue() instanceof OWLLiteral) { + OWLLiteral val = (OWLLiteral) annotation.getValue(); + if (val.hasLang(language)) { + label = val.getLiteral(); + } + } + } + + if(label != null){ + Document luceneDocument = new Document(); + luceneDocument.add(new Field("uri", entity.toStringID(), stringType)); + luceneDocument.add(new Field(searchField, label, textType)); + luceneDocuments.add(luceneDocument); + } + + } + writer.addDocuments(luceneDocuments); + + System.out.println("Done."); + writer.close(); + } + + + +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/PMILuceneBasedRelevance.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/PMILuceneBasedRelevance.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/PMILuceneBasedRelevance.java 2013-07-08 13:51:31 UTC (rev 4015) @@ -0,0 +1,48 @@ +/** + * Copyright (C) 2007-2011, Jens Lehmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + + +package org.dllearner.algorithms.isle; + +import org.semanticweb.owlapi.model.OWLOntology; + + +public class PMILuceneBasedRelevance extends LuceneBasedRelevance{ + + /** + * @param ontology + * @param searcher + * @param textRetriever + */ + public PMILuceneBasedRelevance(OWLOntology ontology, LuceneSearcher searcher, EntityTextRetriever textRetriever) { + super(ontology, searcher, textRetriever); + + } + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.LuceneBasedRelevance#computeScore(int, int, int, int) + */ + @Override + public double computeScore(int nrOfDocuments, int nrOfDocumentsA, int nrOfDocumentsB, int nrOfDocumentsAB) { + double dPClass = nrOfDocuments == 0 ? 0 : ((double) nrOfDocumentsA / (double) nrOfDocuments); + double dPClassEntity = nrOfDocumentsB == 0 ? 0 : (double) nrOfDocumentsAB / (double) nrOfDocumentsB; + double pmi = Math.log(dPClassEntity / dPClass); + return pmi; + } +} \ No newline at end of file Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/PMIRelevance.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/PMIRelevance.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/PMIRelevance.java 2013-07-08 13:51:31 UTC (rev 4015) @@ -0,0 +1,108 @@ +package org.dllearner.algorithms.isle; + + +import java.io.*; +import java.util.*; + +public class PMIRelevance { + + private LuceneSearcher m_searcher = null; + + private Set<String> m_classes; + private Set<String> m_individuals; + + + public static void main( String args[] ) throws Exception { + PMIRelevance relevance = new PMIRelevance( args[0], args[1] ); + relevance.printScores(); + } + + public void printScores() throws Exception { + for( String sInd: m_individuals ) + { + Map<String,Double> hmClass2Score = getClassRelevance( sInd ); + for( String sClass : hmClass2Score.keySet() ) + { + double dScore = hmClass2Score.get( sClass ); + if( dScore > 0 ){ + System.out.println( "PMI( "+ sInd +" , "+ sClass +" ) = "+ dScore ); + } + } + } + /* for( String sClass: m_classes ) + { + Map<String,Double> hmInd2Score = getIndividualRelevance( sClass ); + for( String sInd : hmInd2Score.keySet() ) + { + double dScore = hmInd2Score.get( sInd ); + if( dScore > 0 ){ + System.out.println( "P( "+ sClass +" | "+ sInd +" ) = "+ dScore ); + } + } + } */ + m_searcher.close(); + } + + public PMIRelevance( String sClasses, String sIndividuals ) throws Exception { + m_searcher = new LuceneSearcher(); + m_classes = read( sClasses ); + m_individuals = read( sIndividuals ); + } + + public Map<String,Double> getClassRelevance( String sIndividual ) throws Exception { + // computes relevance of classes for this individual + // conditional probability: P(I|C)=f(I,C)/f(C) + // PMI(I,C)=log( P(I|C) / P(I) ) + Map<String,Double> hmClass2Score = new HashMap<String,Double>(); + int iInd = m_searcher.count( sIndividual ); + int iAll = m_searcher.indexSize(); + double dPInd = (double) iInd / (double) iAll; + for( String sClass: m_classes ) + { + int iClass = m_searcher.count( sClass ); + int iIndClass = m_searcher.count( sIndividual +" AND "+ sClass ); + double dPIndClass = (double) iIndClass / (double)iClass; + double dPMI = Math.log( dPIndClass / dPInd ); + hmClass2Score.put( sClass, dPMI ); + } + return hmClass2Score; + } + + public Map<String,Double> getIndividualRelevance( String sClass ) throws Exception { + // computes relevance of individuals for this class + // conditional probability: P(C|I)=f(C,I)/f(I) + // PMI(C|I)=log( P(C|I) / P(C) ) + Map<String,Double> hmInd2Score = new HashMap<String,Double>(); + int iClass = m_searcher.count( sClass ); + int iAll = m_searcher.indexSize(); + double dPClass = (double) iClass / (double) iAll; + for( String sInd: m_individuals ) + { + int iInd = m_searcher.count( sInd ); + int iIndClass = m_searcher.count( sClass +" AND "+ sInd ); + double dPClassInd = (double) iIndClass / (double)iInd; + double dPMI = Math.log( dPClassInd / dPClass ); + hmInd2Score.put( sInd, dPMI ); + } + return hmInd2Score; + } + + private static Set<String> read( String sFile ) throws Exception { + File file = new File( sFile ); + Set<String> lines = new HashSet<String>(); + BufferedReader reader = null; + try { + reader = new BufferedReader( new FileReader( file ) ); + String sLine = null; + while( ( sLine = reader.readLine() ) != null ) { + lines.add( sLine.trim() ); + } + } + finally { + if( reader != null ) { + reader.close(); + } + } + return lines; + } +} \ No newline at end of file Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/PMIRelevances.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/PMIRelevances.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/PMIRelevances.java 2013-07-08 13:51:31 UTC (rev 4015) @@ -0,0 +1,165 @@ +/** + * Copyright (C) 2007-2011, Jens Lehmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + + +package org.dllearner.algorithms.isle; + +import java.util.HashMap; +import java.util.Map; +import java.util.Set; + +import org.semanticweb.owlapi.apibinding.OWLManager; +import org.semanticweb.owlapi.model.IRI; +import org.semanticweb.owlapi.model.OWLClass; +import org.semanticweb.owlapi.model.OWLEntity; +import org.semanticweb.owlapi.model.OWLNamedObject; +import org.semanticweb.owlapi.model.OWLOntology; +import org.semanticweb.owlapi.model.O... [truncated message content] |