From: <lor...@us...> - 2013-09-04 15:39:12
|
Revision: 4068 http://sourceforge.net/p/dl-learner/code/4068 Author: lorenz_b Date: 2013-09-04 15:39:08 +0000 (Wed, 04 Sep 2013) Log Message: ----------- Added method to get relvant text for all entities. Modified Paths: -------------- trunk/components-core/pom.xml trunk/components-core/src/main/java/org/dllearner/algorithms/isle/ISLE.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/NLPHeuristic.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/SimpleWordSenseDisambiguation.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/PMIRelevanceMetric.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceUtils.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java trunk/components-core/src/main/java/org/dllearner/core/owl/Description.java trunk/components-core/src/main/java/org/dllearner/core/owl/Nothing.java trunk/examples/isle/father_labeled.owl trunk/pom.xml trunk/scripts/pom.xml trunk/scripts/src/main/java/org/dllearner/scripts/pattern/OWLAxiomPatternDetectionEvaluation.java trunk/scripts/src/main/java/org/dllearner/scripts/pattern/OWLAxiomPatternUsageEvaluation.java Modified: trunk/components-core/pom.xml =================================================================== --- trunk/components-core/pom.xml 2013-09-04 15:23:29 UTC (rev 4067) +++ trunk/components-core/pom.xml 2013-09-04 15:39:08 UTC (rev 4068) @@ -314,11 +314,6 @@ <artifactId>jwnl</artifactId> <version>1.4.1.RC2</version> </dependency> - <dependency> - <groupId>com.google.collections</groupId> - <artifactId>google-collections</artifactId> - <version>1.0</version> - </dependency> </dependencies> <dependencyManagement> <dependencies> Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/ISLE.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/ISLE.java 2013-09-04 15:23:29 UTC (rev 4067) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/ISLE.java 2013-09-04 15:39:08 UTC (rev 4068) @@ -34,6 +34,7 @@ import org.dllearner.algorithms.celoe.CELOE; import org.dllearner.algorithms.celoe.OENode; import org.dllearner.core.AbstractCELA; +import org.dllearner.core.AbstractKnowledgeSource; import org.dllearner.core.AbstractLearningProblem; import org.dllearner.core.AbstractReasonerComponent; import org.dllearner.core.ComponentAnn; @@ -47,10 +48,12 @@ import org.dllearner.core.owl.NamedClass; import org.dllearner.core.owl.Restriction; import org.dllearner.core.owl.Thing; +import org.dllearner.kb.OWLFile; import org.dllearner.learningproblems.ClassLearningProblem; import org.dllearner.learningproblems.PosNegLP; import org.dllearner.learningproblems.PosNegLPStandard; import org.dllearner.learningproblems.PosOnlyLP; +import org.dllearner.reasoning.FastInstanceChecker; import org.dllearner.refinementoperators.CustomHierarchyRefinementOperator; import org.dllearner.refinementoperators.CustomStartRefinementOperator; import org.dllearner.refinementoperators.LengthLimitedRefinementOperator; @@ -94,8 +97,7 @@ // all nodes in the search tree (used for selecting most promising node) private TreeSet<OENode> nodes; -// private OEHeuristicRuntime heuristic; // = new OEHeuristicRuntime(); - private NLPHeuristic heuristic = new NLPHeuristic(); + private NLPHeuristic heuristic; // = new OEHeuristicRuntime(); // root of search tree private OENode startNode; // the class with which we start the refinement process @@ -146,10 +148,11 @@ private int expressionTests = 0; private int minHorizExp = 0; private int maxHorizExp = 0; - private long totalRuntimeNs; + private long totalRuntimeNs = 0; // TODO: turn those into config options + // important: do not initialise those with empty sets // null = no settings for allowance / ignorance // empty set = allow / ignore nothing (it is often not desired to allow no class!) @@ -896,6 +899,10 @@ } } + public TreeSet<OENode> getNodes() { + return nodes; + } + public int getMaximumHorizontalExpansion() { return maxHorizExp; } @@ -1099,14 +1106,30 @@ public void setStopOnFirstDefinition(boolean stopOnFirstDefinition) { this.stopOnFirstDefinition = stopOnFirstDefinition; - } - + } + public long getTotalRuntimeNs() { return totalRuntimeNs; } - - public TreeSet<OENode> getNodes() { - return nodes; + + public static void main(String[] args) throws Exception{ + AbstractKnowledgeSource ks = new OWLFile("../examples/family/father_oe.owl"); + ks.init(); + + AbstractReasonerComponent rc = new FastInstanceChecker(ks); + rc.init(); + + ClassLearningProblem lp = new ClassLearningProblem(rc); + lp.setClassToDescribe(new NamedClass("http://example.com/father#father")); + lp.init(); + + CELOE alg = new CELOE(lp, rc); + alg.setMaxExecutionTimeInSeconds(10); + alg.init(); + + alg.start(); + } + } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/NLPHeuristic.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/NLPHeuristic.java 2013-09-04 15:23:29 UTC (rev 4067) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/NLPHeuristic.java 2013-09-04 15:39:08 UTC (rev 4068) @@ -108,17 +108,18 @@ //the NLP based scoring - Description expression = node.getExpression(); - OWLClassExpression owlapiDescription = OWLAPIConverter.getOWLAPIDescription(expression); - Set<Entity> entities = OWLAPIConverter.getEntities(owlapiDescription.getSignature()); - double sum = 0; - for (Entity entity : entities) { - double relevance = entityRelevance.containsKey(entity) ? entityRelevance.get(entity) : 0; - if(!Double.isInfinite(relevance)){ - sum += relevance; - } - } - score += nlpBonusFactor * sum; +// Description expression = node.getExpression(); +//// OWLClassExpression owlapiDescription = OWLAPIConverter.getOWLAPIDescription(expression); +//// Set<Entity> entities = OWLAPIConverter.getEntities(owlapiDescription.getSignature()); +// Set<Entity> entities = expression.getSignature(); +// double sum = 0; +// for (Entity entity : entities) { +// double relevance = entityRelevance.containsKey(entity) ? entityRelevance.get(entity) : 0; +// if(!Double.isInfinite(relevance)){ +// sum += relevance; +// } +// } +// score += nlpBonusFactor * sum; return score; } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/SimpleWordSenseDisambiguation.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/SimpleWordSenseDisambiguation.java 2013-09-04 15:23:29 UTC (rev 4067) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/SimpleWordSenseDisambiguation.java 2013-09-04 15:39:08 UTC (rev 4068) @@ -75,5 +75,25 @@ } return labels; } + + private Set<String> getRelatedWordPhrases(Entity entity){ + //add the labels if exist + Set<String> relatedWordPhrases = new HashSet<String>(); + OWLEntity owlEntity = OWLAPIConverter.getOWLAPIEntity(entity); + Set<OWLAnnotationAssertionAxiom> axioms = ontology.getAnnotationAssertionAxioms(owlEntity.getIRI()); + for (OWLAnnotationAssertionAxiom annotation : axioms) { + if(annotation.getProperty().equals(annotationProperty)){ + if (annotation.getValue() instanceof OWLLiteral) { + OWLLiteral val = (OWLLiteral) annotation.getValue(); + relatedWordPhrases.add(val.getLiteral()); + } + } + } + //add the short form of the URI if no labels are available + if(relatedWordPhrases.isEmpty()){ + relatedWordPhrases.add(sfp.getShortForm(IRI.create(entity.getURI()))); + } + return relatedWordPhrases; + } } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/PMIRelevanceMetric.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/PMIRelevanceMetric.java 2013-09-04 15:23:29 UTC (rev 4067) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/PMIRelevanceMetric.java 2013-09-04 15:39:08 UTC (rev 4068) @@ -28,10 +28,12 @@ Set<AnnotatedDocument> documentsAB = Sets.intersection(documentsA, documentsB); int nrOfDocuments = index.getSize(); - double dPClass = nrOfDocuments == 0 ? 0 : ((double) documentsA.size() / (double) nrOfDocuments); - double dPClassEntity = documentsB.size() == 0 ? 0 : (double) documentsAB.size() / (double) documentsB.size(); - double pmi = Math.log(dPClassEntity / dPClass); + double pA = nrOfDocuments == 0 ? 0 : ((double) documentsA.size() / (double) nrOfDocuments); + double pB = nrOfDocuments == 0 ? 0 : ((double) documentsB.size() / (double) nrOfDocuments); + double pAB = nrOfDocuments == 0 ? 0 : ((double) documentsAB.size() / (double) nrOfDocuments); + double pmi = Math.log(pAB / pA * pB); + return pmi; } @@ -42,11 +44,15 @@ Set<AnnotatedDocument> documentsAB = Sets.intersection(documentsA, documentsB); int nrOfDocuments = index.getSize(); - double dPClass = nrOfDocuments == 0 ? 0 : ((double) documentsA.size() / (double) nrOfDocuments); - double dPClassEntity = documentsB.size() == 0 ? 0 : (double) documentsAB.size() / (double) documentsB.size(); - double pmi = Math.log(dPClassEntity / dPClass); + double pA = nrOfDocuments == 0 ? 0 : ((double) documentsA.size() / (double) nrOfDocuments); + double pB = nrOfDocuments == 0 ? 0 : ((double) documentsB.size() / (double) nrOfDocuments); + double pAB = nrOfDocuments == 0 ? 0 : ((double) documentsAB.size() / (double) nrOfDocuments); - double pAB = (double) documentsAB.size() / (double) nrOfDocuments; + if(pA * pB == 0){ + return 0; + } + double pmi = Math.log(pAB / pA * pB); + double normalizedPMI = (pmi/-Math.log(pAB) + 1)/2; return normalizedPMI; Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceUtils.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceUtils.java 2013-09-04 15:23:29 UTC (rev 4067) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceUtils.java 2013-09-04 15:39:08 UTC (rev 4068) @@ -40,7 +40,7 @@ Set<Entity> otherEntities = OWLAPIConverter.getEntities(owlEntities); for (Entity otherEntity : otherEntities) { - double relevance = metric.getRelevance(entity, otherEntity); + double relevance = metric.getNormalizedRelevance(entity, otherEntity); relevantEntities.put(otherEntity, relevance); } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java 2013-09-04 15:23:29 UTC (rev 4067) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java 2013-09-04 15:39:08 UTC (rev 4068) @@ -4,6 +4,7 @@ package org.dllearner.algorithms.isle.textretrieval; import java.util.HashMap; +import java.util.HashSet; import java.util.Map; import java.util.Set; @@ -90,4 +91,26 @@ return textWithWeight; } + + /** + * Returns for each entity in the ontology all relevant text, i.e. eitherthe annotations or the short form of the IRI as fallback. + * @return + */ + public Map<Entity, Set<String>> getRelevantText() { + Map<Entity, Set<String>> entity2RelevantText = new HashMap<Entity, Set<String>>(); + + Set<OWLEntity> schemaEntities = new HashSet<OWLEntity>(); + schemaEntities.addAll(ontology.getClassesInSignature()); + schemaEntities.addAll(ontology.getObjectPropertiesInSignature()); + schemaEntities.addAll(ontology.getDataPropertiesInSignature()); + + Map<String, Double> relevantText; + for (OWLEntity owlEntity : schemaEntities) { + Entity entity = OWLAPIConverter.getEntity(owlEntity); + relevantText = getRelevantText(entity); + entity2RelevantText.put(entity, relevantText.keySet()); + } + + return entity2RelevantText; + } } Modified: trunk/components-core/src/main/java/org/dllearner/core/owl/Description.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/core/owl/Description.java 2013-09-04 15:23:29 UTC (rev 4067) +++ trunk/components-core/src/main/java/org/dllearner/core/owl/Description.java 2013-09-04 15:39:08 UTC (rev 4068) @@ -19,9 +19,11 @@ package org.dllearner.core.owl; +import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Set; /** * A class description is sometimes also called "complex class" or "concept". @@ -211,6 +213,36 @@ } /** + * Returns all named entities. + * @return + */ + public Set<Entity> getSignature(){ + Set<Entity> entities = new HashSet<Entity>(); + if(this instanceof NamedClass){ + entities.add((NamedClass)this); + } else if(this instanceof Thing){ + entities.add(new NamedClass(Thing.uri)); + } else if(this instanceof Nothing){ + entities.add(new NamedClass(Nothing.uri)); + } else if(this instanceof Restriction){ + PropertyExpression propertyExpression = ((Restriction)this).getRestrictedPropertyExpression(); + if(propertyExpression instanceof ObjectProperty){ + entities.add((ObjectProperty)propertyExpression); + } else if(propertyExpression instanceof DatatypeProperty){ + entities.add((DatatypeProperty)propertyExpression); + } + entities.addAll(getChild(0).getSignature()); + + } else { + for (Description child : children) { + entities.addAll(child.getSignature()); + } + } + + return entities; + } + + /** * Returns a manchester syntax string of this description. For a * reference, see * <a href="http://www.co-ode.org/resources/reference/manchester_syntax">here</a> Modified: trunk/components-core/src/main/java/org/dllearner/core/owl/Nothing.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/core/owl/Nothing.java 2013-09-04 15:23:29 UTC (rev 4067) +++ trunk/components-core/src/main/java/org/dllearner/core/owl/Nothing.java 2013-09-04 15:39:08 UTC (rev 4068) @@ -19,6 +19,7 @@ package org.dllearner.core.owl; +import java.net.URI; import java.util.Map; /** @@ -35,6 +36,8 @@ private static final long serialVersionUID = -3053885252153066318L; public static final Nothing instance = new Nothing(); + public static final URI uri = URI.create("http://www.w3.org/2002/07/owl#Thing"); + public String toString(String baseURI, Map<String,String> prefixes) { return "BOTTOM"; } @@ -52,7 +55,14 @@ // in Protege 4.0 only Nothing //return "owl:Nothing"; return "Nothing"; - } + } + + /** + * @return the uri + */ + public static URI getURI() { + return uri; + } public int getLength() { return 1; Modified: trunk/examples/isle/father_labeled.owl =================================================================== --- trunk/examples/isle/father_labeled.owl 2013-09-04 15:23:29 UTC (rev 4067) +++ trunk/examples/isle/father_labeled.owl 2013-09-04 15:39:08 UTC (rev 4068) @@ -58,7 +58,7 @@ <!-- http://example.com/father#father --> <owl:Class rdf:about="&father;father"> - <rdfs:label xml:lang="en">person which has at least 1 child</rdfs:label> + <rdfs:label xml:lang="en">male person which has at least 1 child</rdfs:label> <rdfs:subClassOf rdf:resource="&father;male"/> </owl:Class> Modified: trunk/pom.xml =================================================================== --- trunk/pom.xml 2013-09-04 15:23:29 UTC (rev 4067) +++ trunk/pom.xml 2013-09-04 15:39:08 UTC (rev 4068) @@ -164,7 +164,7 @@ <dependency> <groupId>org.semanticweb.hermit</groupId> <artifactId>hermit</artifactId> - <version>1.3.3</version> + <version>1.3.8</version> </dependency> <!-- SOLR Dependency --> Modified: trunk/scripts/pom.xml =================================================================== --- trunk/scripts/pom.xml 2013-09-04 15:23:29 UTC (rev 4067) +++ trunk/scripts/pom.xml 2013-09-04 15:39:08 UTC (rev 4068) @@ -139,6 +139,13 @@ </exclusions> </dependency> + + <dependency> + <groupId>org.apache.commons</groupId> + <artifactId>commons-math3</artifactId> + <version>3.0</version> +</dependency> + </dependencies> <build> Modified: trunk/scripts/src/main/java/org/dllearner/scripts/pattern/OWLAxiomPatternDetectionEvaluation.java =================================================================== --- trunk/scripts/src/main/java/org/dllearner/scripts/pattern/OWLAxiomPatternDetectionEvaluation.java 2013-09-04 15:23:29 UTC (rev 4067) +++ trunk/scripts/src/main/java/org/dllearner/scripts/pattern/OWLAxiomPatternDetectionEvaluation.java 2013-09-04 15:39:08 UTC (rev 4068) @@ -13,6 +13,8 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -39,6 +41,8 @@ import org.semanticweb.owlapi.model.OWLOntologyCreationException; import org.semanticweb.owlapi.model.UnloadableImportException; +import com.google.common.math.IntMath; + import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl; import uk.ac.manchester.cs.owl.owlapi.mansyntaxrenderer.ManchesterOWLSyntaxOWLObjectRendererImpl; @@ -57,6 +61,11 @@ private boolean formatNumbers = true; private int numberOfRowsPerTable = 25; + private int minOntologies = 5; + + private Map<OWLAxiom, Integer> winsorizedFrequencies = new HashMap<OWLAxiom, Integer>(); + private int percentileInPercent = 95; + public OWLAxiomPatternDetectionEvaluation() { initDBConnection(); @@ -311,12 +320,13 @@ String latexTable = "\\begin{table}\n"; latexTable += "\\begin{tabular}{lrrr}\n"; latexTable += "\\toprule\n"; - latexTable += "Pattern & Frequency & \\#Ontologies\\\\\\midrule\n"; + latexTable += "Pattern & Frequency & Winsorised Frequency & \\#Ontologies\\\\\\midrule\n"; for (Entry<OWLAxiom, Pair<Integer, Integer>> entry : topN.entrySet()) { OWLAxiom axiom = entry.getKey(); Integer frequency = entry.getValue().getKey(); Integer df = entry.getValue().getValue(); + Integer winsorizedFrequency = winsorizedFrequencies.get(axiom); if(axiom != null){ String axiomColumn = axiomRenderer.render(axiom); @@ -330,7 +340,7 @@ } if(formatNumbers){ - latexTable += axiomColumn + " & " + "\\num{" + frequency + "} & " + df + "\\\\\n"; + latexTable += axiomColumn + " & \\num{" + frequency + "} & \\num{" + winsorizedFrequency + "} & "+ df + "\\\\\n"; } else { latexTable += axiomColumn + " & " + frequency + " & " + df + "\\\\\n"; } @@ -347,13 +357,13 @@ LatexWriter w = new LatexWriter(sw); LatexObjectVisitor renderer = new LatexObjectVisitor(w, df); String latexTable = "\\begin{table}\n"; - latexTable += "\\begin{tabular}{rlrr"; + latexTable += "\\begin{tabular}{rlrrr"; for (int i = 0; i < repositories.size(); i++) { latexTable += "r"; } latexTable += "}\n"; latexTable += "\\toprule\n"; - latexTable += " & Pattern & Frequency & \\#Ontologies"; + latexTable += " & Pattern & Frequency & Winsorized Frequency & \\#Ontologies"; for (OntologyRepository repository : repositories) { latexTable += " & " + repository.getName(); } @@ -366,6 +376,7 @@ OWLAxiom axiom = entry.getValue().keySet().iterator().next(); Integer frequency = entry.getValue().values().iterator().next().getKey(); Integer df = entry.getValue().values().iterator().next().getValue(); + int winsorizedFrequency = winsorizedFrequencies.get(axiom); if(axiom != null){ String axiomColumn = axiomRenderer.render(axiom); @@ -378,7 +389,7 @@ } if(formatNumbers){ - latexTable += i + ". & " + axiomColumn + " & " + "\\num{" + frequency + "} & " + df; + latexTable += i + ". & " + axiomColumn + " & \\num{" + frequency + "} & \\num{" + winsorizedFrequency + "} & "+ df; for (OntologyRepository repository : repositories) { int rank = 0; boolean contained = false; @@ -433,21 +444,62 @@ ps = conn.prepareStatement("SELECT P.id, pattern,SUM(occurrences),COUNT(ontology_id) FROM " + "Ontology_Pattern OP, Pattern P, Ontology O WHERE " + "(P.id=OP.pattern_id AND O.id=OP.ontology_id AND P.axiom_type=?) " + - "GROUP BY P.id ORDER BY SUM(`OP`.`occurrences`) DESC LIMIT ?"); + "GROUP BY P.id HAVING COUNT(ontology_id)>=? ORDER BY SUM(`OP`.`occurrences`) DESC LIMIT ?"); ps.setString(1, axiomType.name()); - ps.setInt(2, n); + ps.setInt(2, minOntologies); + ps.setInt(3, n); rs = ps.executeQuery(); while(rs.next()){ + int patternID = rs.getInt(1); + OWLAxiom axiom = asOWLAxiom(rs.getString(2)); Map<OWLAxiom, Pair<Integer, Integer>> m = new LinkedHashMap<OWLAxiom, Pair<Integer,Integer>>(); - m.put(asOWLAxiom(rs.getString(2)), new Pair<Integer, Integer>(rs.getInt(3), rs.getInt(4))); - topN.put(rs.getInt(1), m); + m.put(axiom, new Pair<Integer, Integer>(rs.getInt(3), rs.getInt(4))); + topN.put(patternID, m); + + //get winsorized frequency + ps = conn.prepareStatement("SELECT occurrences FROM " + + "Ontology_Pattern WHERE " + + "(pattern_id=?) "); + ps.setInt(1, patternID); + ResultSet rs2 = ps.executeQuery(); + System.out.println("Pattern ID:" + patternID); + System.out.println(axiom); + + List<Integer> values = new ArrayList<Integer>(); + while(rs2.next()){ + values.add(rs2.getInt(1)); + } + winsorize(values); + int sum = 0; + for (Integer val : values) { + sum += val; + } + winsorizedFrequencies.put(axiom, sum); } + + + } catch(SQLException e){ e.printStackTrace(); } return topN; } + private void winsorize(List<Integer> values){ + //compute 95th percentile + int percentile = (int) Math.round(percentileInPercent/100d * values.size() + 1/2d); + //sort values + Collections.sort(values);System.out.println(values); + //get the value at percentile rank + int max = values.get(percentile-1); + //set all values after to max + for (int i = percentile; i < values.size(); i++) { + values.set(i, max); + } + System.out.println(percentile); + System.out.println(values); + } + private Map<Integer, Map<OWLAxiom, Pair<Integer, Integer>>> getTopNAxiomPatternsWithId(OntologyRepository repository, AxiomTypeCategory axiomType, int n){ Map<Integer, Map<OWLAxiom, Pair<Integer, Integer>>> topN = new LinkedHashMap<Integer, Map<OWLAxiom, Pair<Integer, Integer>>>(); PreparedStatement ps; @@ -569,6 +621,5 @@ new OWLAxiomPatternDetectionEvaluation().run(analyzeRepositories, Arrays.asList( new TONESRepository(), new BioPortalRepository(), new OxfordRepository())); } - } Modified: trunk/scripts/src/main/java/org/dllearner/scripts/pattern/OWLAxiomPatternUsageEvaluation.java =================================================================== --- trunk/scripts/src/main/java/org/dllearner/scripts/pattern/OWLAxiomPatternUsageEvaluation.java 2013-09-04 15:23:29 UTC (rev 4067) +++ trunk/scripts/src/main/java/org/dllearner/scripts/pattern/OWLAxiomPatternUsageEvaluation.java 2013-09-04 15:39:08 UTC (rev 4068) @@ -42,6 +42,7 @@ import joptsimple.OptionSet; import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; +import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics; import org.apache.log4j.Logger; import org.coode.owlapi.turtle.TurtleOntologyFormat; import org.dllearner.core.EvaluatedAxiom; @@ -51,6 +52,7 @@ import org.dllearner.kb.LocalModelBasedSparqlEndpointKS; import org.dllearner.kb.SparqlEndpointKS; import org.dllearner.kb.sparql.ExtractionDBCache; +import org.dllearner.kb.sparql.QueryEngineHTTP; import org.dllearner.kb.sparql.SparqlEndpoint; import org.dllearner.kb.sparql.SparqlQuery; import org.dllearner.learningproblems.AxiomScore; @@ -92,7 +94,6 @@ import com.clarkparsia.pellet.owlapiv3.PelletReasonerFactory; import com.google.common.base.Charsets; import com.google.common.base.Joiner; -import com.google.common.cache.Cache; import com.google.common.cache.CacheBuilder; import com.google.common.cache.CacheLoader; import com.google.common.cache.LoadingCache; @@ -117,7 +118,6 @@ import com.hp.hpl.jena.rdf.model.ModelFactory; import com.hp.hpl.jena.rdf.model.Resource; import com.hp.hpl.jena.rdf.model.Statement; -import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP; import com.hp.hpl.jena.sparql.engine.http.QueryExceptionHTTP; import com.hp.hpl.jena.vocabulary.RDF; import com.jamonapi.Monitor; @@ -165,7 +165,12 @@ private File samplesDir; private File instantiationsDir; + private DescriptiveStatistics fragmentStatistics = new DescriptiveStatistics(100); + + + private int nrOfEarlyTerminations = 0; + public OWLAxiomPatternUsageEvaluation() { try { BZip2CompressorInputStream is = new BZip2CompressorInputStream(new URL(ontologyURL).openStream()); @@ -374,7 +379,9 @@ e.printStackTrace(); } } -// System.exit(0); + logger.info("Early terminations: " + nrOfEarlyTerminations ); + logger.info(fragmentStatistics.getMin() + "--" + fragmentStatistics.getMax() + "--" + fragmentStatistics.getMean()); + System.exit(0); Monitor patternTimeMon = MonitorFactory.getTimeMonitor("pattern-runtime"); //for each pattern @@ -694,28 +701,39 @@ long startTime = System.currentTimeMillis(); int offset = 0; boolean hasMoreResults = true; - while(hasMoreResults && (System.currentTimeMillis() - startTime)<= maxFragmentExtractionTime){ + long remainingTime = maxFragmentExtractionTime - (System.currentTimeMillis() - startTime); + while(hasMoreResults && remainingTime > 0){ query.setOffset(offset); logger.info(query); - Model m = executeConstructQuery(query); + Model m = executeConstructQuery(query, remainingTime); fragment.add(m); + remainingTime = maxFragmentExtractionTime - (System.currentTimeMillis() - startTime); if(m.size() == 0){ hasMoreResults = false; + if(remainingTime > 0){ + logger.info("No more triples left. Early termination..."); + nrOfEarlyTerminations++; + } + } offset += queryLimit; - try { - Thread.sleep(500); - } catch (InterruptedException e) { - e.printStackTrace(); - } +// try { +// Thread.sleep(500); +// } catch (InterruptedException e) { +// e.printStackTrace(); +// } } - logger.info("...got " + fragment.size() + " triples."); try { fragment.write(new FileOutputStream(file), "TURTLE"); } catch (FileNotFoundException e) { e.printStackTrace(); } filterModel(fragment); + logger.info("...got " + fragment.size() + " triples "); + ResultSet rs = QueryExecutionFactory.create("SELECT (COUNT(DISTINCT ?s) AS ?cnt) WHERE {?s a <" + cls.getName() + ">. }", fragment).execSelect(); + int nrOfInstances = rs.next().getLiteral("cnt").getInt(); + logger.info("with " + nrOfInstances + " instances of class " + cls.getName()); + fragmentStatistics.addValue(nrOfInstances); return fragment; } @@ -1281,6 +1299,45 @@ return rs; } + protected Model executeConstructQuery(Query query, long timeout) { + if(ks.isRemote()){ + SparqlEndpoint endpoint = ((SparqlEndpointKS) ks).getEndpoint(); + ExtractionDBCache cache = ks.getCache(); + Model model = null; + try { +// if(cache != null){ +// try { +// model = cache.executeConstructQuery(endpoint, query.toString()); +// } catch (UnsupportedEncodingException e) { +// e.printStackTrace(); +// } catch (SQLException e) { +// e.printStackTrace(); +// } +// } else { + QueryEngineHTTP queryExecution = new QueryEngineHTTP(endpoint.getURL().toString(), + query); + queryExecution.setDefaultGraphURIs(endpoint.getDefaultGraphURIs()); + queryExecution.setNamedGraphURIs(endpoint.getNamedGraphURIs()); + queryExecution.setTimeout(timeout, timeout); + model = queryExecution.execConstruct(); +// } + logger.debug("Got " + model.size() + " triples."); + return model; + } catch (QueryExceptionHTTP e) { + if(e.getCause() instanceof SocketTimeoutException){ + logger.warn("Got timeout"); + } else { + logger.error("Exception executing query", e); + } + return ModelFactory.createDefaultModel(); + } + } else { + QueryExecution queryExecution = QueryExecutionFactory.create(query, ((LocalModelBasedSparqlEndpointKS)ks).getModel()); + Model model = queryExecution.execConstruct(); + return model; + } + } + protected Model executeConstructQuery(Query query) { if(ks.isRemote()){ SparqlEndpoint endpoint = ((SparqlEndpointKS) ks).getEndpoint(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |