You can subscribe to this list here.
2007 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
(120) |
Sep
(36) |
Oct
(116) |
Nov
(17) |
Dec
(44) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2008 |
Jan
(143) |
Feb
(192) |
Mar
(74) |
Apr
(84) |
May
(105) |
Jun
(64) |
Jul
(49) |
Aug
(120) |
Sep
(159) |
Oct
(156) |
Nov
(51) |
Dec
(28) |
2009 |
Jan
(17) |
Feb
(55) |
Mar
(33) |
Apr
(57) |
May
(54) |
Jun
(28) |
Jul
(6) |
Aug
(16) |
Sep
(38) |
Oct
(30) |
Nov
(26) |
Dec
(52) |
2010 |
Jan
(7) |
Feb
(91) |
Mar
(65) |
Apr
(2) |
May
(14) |
Jun
(25) |
Jul
(38) |
Aug
(48) |
Sep
(80) |
Oct
(70) |
Nov
(75) |
Dec
(77) |
2011 |
Jan
(68) |
Feb
(53) |
Mar
(51) |
Apr
(35) |
May
(65) |
Jun
(101) |
Jul
(29) |
Aug
(230) |
Sep
(95) |
Oct
(49) |
Nov
(110) |
Dec
(63) |
2012 |
Jan
(41) |
Feb
(42) |
Mar
(25) |
Apr
(46) |
May
(51) |
Jun
(44) |
Jul
(45) |
Aug
(29) |
Sep
(12) |
Oct
(9) |
Nov
(17) |
Dec
(2) |
2013 |
Jan
(12) |
Feb
(14) |
Mar
(7) |
Apr
(16) |
May
(54) |
Jun
(27) |
Jul
(11) |
Aug
(5) |
Sep
(85) |
Oct
(27) |
Nov
(37) |
Dec
(32) |
2014 |
Jan
(8) |
Feb
(29) |
Mar
(5) |
Apr
(3) |
May
(22) |
Jun
(3) |
Jul
(4) |
Aug
(3) |
Sep
|
Oct
|
Nov
|
Dec
|
From: <lor...@us...> - 2013-09-16 00:14:36
|
Revision: 4110 http://sourceforge.net/p/dl-learner/code/4110 Author: lorenz_b Date: 2013-09-16 00:14:34 +0000 (Mon, 16 Sep 2013) Log Message: ----------- Updated Pellet deps. Modified Paths: -------------- trunk/components-core/pom.xml trunk/interfaces/pom.xml trunk/pom.xml Modified: trunk/components-core/pom.xml =================================================================== --- trunk/components-core/pom.xml 2013-09-14 06:51:26 UTC (rev 4109) +++ trunk/components-core/pom.xml 2013-09-16 00:14:34 UTC (rev 4110) @@ -105,20 +105,9 @@ <version>3.4.4</version> </dependency> - <!-- THIS IS FROM THE UNIBAS REPO --> <dependency> - <groupId>com.owldl</groupId> + <groupId>com.clarkparsia</groupId> <artifactId>pellet</artifactId> - <exclusions> - <exclusion> <!-- declare the exclusion here --> - <groupId>org.mortbay.jetty</groupId> - <artifactId>org.mortbay.jetty</artifactId> - </exclusion> - <exclusion> - <artifactId>owlapi</artifactId> - <groupId>net.sourceforge.owlapi</groupId> - </exclusion> - </exclusions> </dependency> <dependency> Modified: trunk/interfaces/pom.xml =================================================================== --- trunk/interfaces/pom.xml 2013-09-14 06:51:26 UTC (rev 4109) +++ trunk/interfaces/pom.xml 2013-09-16 00:14:34 UTC (rev 4110) @@ -525,6 +525,10 @@ <groupId>net.sourceforge.owlapi</groupId> <artifactId>owlapi</artifactId> </exclusion> + <exclusion> + <groupId>com.owldl</groupId> + <artifactId>pellet</artifactId> + </exclusion> </exclusions> </dependency> <dependency> Modified: trunk/pom.xml =================================================================== --- trunk/pom.xml 2013-09-14 06:51:26 UTC (rev 4109) +++ trunk/pom.xml 2013-09-16 00:14:34 UTC (rev 4110) @@ -20,7 +20,7 @@ <slf4j.version>1.6.4</slf4j.version> <log4j.version>1.2.16</log4j.version> - <solr.version>4.1.0</solr.version> + <solr.version>4.4.0</solr.version> </properties> <modules> @@ -143,16 +143,9 @@ </dependency> <dependency> - <groupId>com.owldl</groupId> + <groupId>com.clarkparsia</groupId> <artifactId>pellet</artifactId> - <version>2.3.0</version> - <exclusions> - <!--Excluding this because it has the same classpath as the new Apache Jena and can cause problems--> - <exclusion> - <groupId>com.hp.hpl.jena</groupId> - <artifactId>jena</artifactId> - </exclusion> - </exclusions> + <version>2.3.1</version> </dependency> <dependency> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2013-09-14 06:51:28
|
Revision: 4109 http://sourceforge.net/p/dl-learner/code/4109 Author: lorenz_b Date: 2013-09-14 06:51:26 +0000 (Sat, 14 Sep 2013) Log Message: ----------- Extended fragment extraction. Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java 2013-09-14 06:45:10 UTC (rev 4108) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java 2013-09-14 06:51:26 UTC (rev 4109) @@ -14,9 +14,6 @@ import org.dllearner.core.owl.Entity; import org.semanticweb.owlapi.model.OWLOntology; -import cern.colt.Arrays; -import cern.colt.list.AbstractCollection; - import edu.stanford.nlp.util.Sets; /** This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2013-09-14 06:45:20
|
Revision: 4108 http://sourceforge.net/p/dl-learner/code/4108 Author: lorenz_b Date: 2013-09-14 06:45:10 +0000 (Sat, 14 Sep 2013) Log Message: ----------- Extended fragment extraction. Modified Paths: -------------- trunk/components-core/pom.xml trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/datastructures/QueryTree.java trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/datastructures/impl/QueryTreeImpl.java trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/impl/QueryTreeFactoryImpl.java trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/impl/QueryTreeFactoryImpl2.java trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/NBR.java trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/EvaluatedQueryTree.java trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/LGGGeneratorImpl.java trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/NoiseSensitiveLGG.java trunk/components-core/src/test/java/org/dllearner/algorithms/qtl/LGGTest.java Added Paths: ----------- trunk/components-core/src/main/java/org/dllearner/algorithms/pattern/FragmentExtractionStrategy.java trunk/components-core/src/main/java/org/dllearner/algorithms/pattern/FragmentExtractor.java trunk/components-core/src/main/java/org/dllearner/algorithms/pattern/IndividualBasedFragmentExtractor.java trunk/components-core/src/main/java/org/dllearner/algorithms/pattern/MaximumModalDepthDetector.java trunk/components-core/src/main/java/org/dllearner/algorithms/pattern/PatternBasedAxiomLearningAlgorithm.java trunk/components-core/src/main/java/org/dllearner/algorithms/pattern/TimeBasedFragmentExtractor.java trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/NoiseSensitiveLGGMultithreaded.java Modified: trunk/components-core/pom.xml =================================================================== --- trunk/components-core/pom.xml 2013-09-10 15:52:48 UTC (rev 4107) +++ trunk/components-core/pom.xml 2013-09-14 06:45:10 UTC (rev 4108) @@ -314,6 +314,11 @@ <artifactId>jwnl</artifactId> <version>1.4.1.RC2</version> </dependency> + <dependency> + <groupId>org.apache.commons</groupId> + <artifactId>commons-math3</artifactId> + <version>3.1.1</version> + </dependency> </dependencies> <dependencyManagement> <dependencies> Added: trunk/components-core/src/main/java/org/dllearner/algorithms/pattern/FragmentExtractionStrategy.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/pattern/FragmentExtractionStrategy.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/pattern/FragmentExtractionStrategy.java 2013-09-14 06:45:10 UTC (rev 4108) @@ -0,0 +1,20 @@ +/** + * + */ +package org.dllearner.algorithms.pattern; + +/** + * @author Lorenz Buehmann + * + */ +public enum FragmentExtractionStrategy { + + /** + * Extract a fragment based on a given number of examples. + */ + INDIVIDUALS, + /** + * Extract as much information as possible in a given time. + */ + TIME +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/pattern/FragmentExtractor.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/pattern/FragmentExtractor.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/pattern/FragmentExtractor.java 2013-09-14 06:45:10 UTC (rev 4108) @@ -0,0 +1,23 @@ +/** + * + */ +package org.dllearner.algorithms.pattern; + +import org.dllearner.core.owl.NamedClass; + +import com.hp.hpl.jena.rdf.model.Model; + +/** + * @author Lorenz Buehmann + * + */ +public interface FragmentExtractor { + + /** + * @param cls + * @param maxFragmentDepth + * @return + */ + Model extractFragment(NamedClass cls, int maxFragmentDepth); + +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/pattern/IndividualBasedFragmentExtractor.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/pattern/IndividualBasedFragmentExtractor.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/pattern/IndividualBasedFragmentExtractor.java 2013-09-14 06:45:10 UTC (rev 4108) @@ -0,0 +1,109 @@ +/** + * + */ +package org.dllearner.algorithms.pattern; + +import java.sql.SQLException; +import java.util.HashSet; +import java.util.Set; +import java.util.concurrent.TimeUnit; + +import org.aksw.jena_sparql_api.cache.core.QueryExecutionFactoryCacheEx; +import org.aksw.jena_sparql_api.cache.extra.CacheCoreEx; +import org.aksw.jena_sparql_api.cache.extra.CacheCoreH2; +import org.aksw.jena_sparql_api.cache.extra.CacheEx; +import org.aksw.jena_sparql_api.cache.extra.CacheExImpl; +import org.aksw.jena_sparql_api.core.QueryExecutionFactory; +import org.dllearner.core.owl.Individual; +import org.dllearner.core.owl.NamedClass; +import org.dllearner.kb.SparqlEndpointKS; +import org.dllearner.kb.sparql.ConciseBoundedDescriptionGenerator; +import org.dllearner.kb.sparql.ConciseBoundedDescriptionGeneratorImpl; +import org.dllearner.kb.sparql.QueryExecutionFactoryHttp; +import org.dllearner.kb.sparql.SparqlEndpoint; + +import com.hp.hpl.jena.query.QueryExecution; +import com.hp.hpl.jena.query.QuerySolution; +import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.ModelFactory; + +/** + * @author Lorenz Buehmann + * + */ +public class IndividualBasedFragmentExtractor implements FragmentExtractor{ + + public static final FragmentExtractionStrategy extractionStrategy = FragmentExtractionStrategy.INDIVIDUALS; + private QueryExecutionFactory qef; + + private long maxNrOfIndividuals; + private long startTime; + + private ConciseBoundedDescriptionGenerator cbdGen; + + public IndividualBasedFragmentExtractor(SparqlEndpointKS ks, String cacheDir, int maxNrOfIndividuals) { + this.maxNrOfIndividuals = maxNrOfIndividuals; + + SparqlEndpoint endpoint = ks.getEndpoint(); + + qef = new QueryExecutionFactoryHttp(endpoint.getURL().toString(), endpoint.getDefaultGraphURIs()); + if(cacheDir != null){ + try { + long timeToLive = TimeUnit.DAYS.toMillis(30); + CacheCoreEx cacheBackend = CacheCoreH2.create(cacheDir, timeToLive, true); + CacheEx cacheFrontend = new CacheExImpl(cacheBackend); + qef = new QueryExecutionFactoryCacheEx(qef, cacheFrontend); + } catch (ClassNotFoundException e) { + e.printStackTrace(); + } catch (SQLException e) { + e.printStackTrace(); + } + } + + cbdGen = new ConciseBoundedDescriptionGeneratorImpl(endpoint, cacheDir); + } + + public IndividualBasedFragmentExtractor(SparqlEndpointKS ks, int maxNrOfIndividuals) { + this(ks, null, maxNrOfIndividuals); + } + + /* (non-Javadoc) + * @see org.dllearner.algorithms.pattern.FragmentExtractor#extractFragment(org.dllearner.core.owl.NamedClass) + */ + @Override + public Model extractFragment(NamedClass cls, int maxFragmentDepth) { + startTime = System.currentTimeMillis(); + Model fragment = ModelFactory.createDefaultModel(); + + //get some random individuals + Set<Individual> individuals = getRandomIndividuals(cls); + + //get for each individual the CBD + Model cbd; + for (Individual ind : individuals) { + cbd = cbdGen.getConciseBoundedDescription(ind.getName(), maxFragmentDepth); + fragment.add(cbd); + } + return fragment; + } + + private Set<Individual> getRandomIndividuals(NamedClass cls){ + Set<Individual> individuals = new HashSet<Individual>(); + + String query = "SELECT ?s WHERE {?s a <" + cls.getName() + ">} LIMIT " + maxNrOfIndividuals; + QueryExecution qe = qef.createQueryExecution(query); + ResultSet rs = qe.execSelect(); + QuerySolution qs; + while(rs.hasNext()){ + qs = rs.next(); + if(qs.get("s").isURIResource()){ + individuals.add(new Individual(qs.getResource("s").getURI())); + } + + } + qe.close(); + + return individuals; + } +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/pattern/MaximumModalDepthDetector.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/pattern/MaximumModalDepthDetector.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/pattern/MaximumModalDepthDetector.java 2013-09-14 06:45:10 UTC (rev 4108) @@ -0,0 +1,503 @@ +/** + * + */ +package org.dllearner.algorithms.pattern; + +import org.semanticweb.owlapi.model.OWLAnnotationAssertionAxiom; +import org.semanticweb.owlapi.model.OWLAnnotationPropertyDomainAxiom; +import org.semanticweb.owlapi.model.OWLAnnotationPropertyRangeAxiom; +import org.semanticweb.owlapi.model.OWLAsymmetricObjectPropertyAxiom; +import org.semanticweb.owlapi.model.OWLAxiom; +import org.semanticweb.owlapi.model.OWLAxiomVisitor; +import org.semanticweb.owlapi.model.OWLClass; +import org.semanticweb.owlapi.model.OWLClassAssertionAxiom; +import org.semanticweb.owlapi.model.OWLClassExpression; +import org.semanticweb.owlapi.model.OWLClassExpressionVisitor; +import org.semanticweb.owlapi.model.OWLDataAllValuesFrom; +import org.semanticweb.owlapi.model.OWLDataExactCardinality; +import org.semanticweb.owlapi.model.OWLDataHasValue; +import org.semanticweb.owlapi.model.OWLDataMaxCardinality; +import org.semanticweb.owlapi.model.OWLDataMinCardinality; +import org.semanticweb.owlapi.model.OWLDataPropertyAssertionAxiom; +import org.semanticweb.owlapi.model.OWLDataPropertyDomainAxiom; +import org.semanticweb.owlapi.model.OWLDataPropertyRangeAxiom; +import org.semanticweb.owlapi.model.OWLDataSomeValuesFrom; +import org.semanticweb.owlapi.model.OWLDatatypeDefinitionAxiom; +import org.semanticweb.owlapi.model.OWLDeclarationAxiom; +import org.semanticweb.owlapi.model.OWLDifferentIndividualsAxiom; +import org.semanticweb.owlapi.model.OWLDisjointClassesAxiom; +import org.semanticweb.owlapi.model.OWLDisjointDataPropertiesAxiom; +import org.semanticweb.owlapi.model.OWLDisjointObjectPropertiesAxiom; +import org.semanticweb.owlapi.model.OWLDisjointUnionAxiom; +import org.semanticweb.owlapi.model.OWLEquivalentClassesAxiom; +import org.semanticweb.owlapi.model.OWLEquivalentDataPropertiesAxiom; +import org.semanticweb.owlapi.model.OWLEquivalentObjectPropertiesAxiom; +import org.semanticweb.owlapi.model.OWLFunctionalDataPropertyAxiom; +import org.semanticweb.owlapi.model.OWLFunctionalObjectPropertyAxiom; +import org.semanticweb.owlapi.model.OWLHasKeyAxiom; +import org.semanticweb.owlapi.model.OWLInverseFunctionalObjectPropertyAxiom; +import org.semanticweb.owlapi.model.OWLInverseObjectPropertiesAxiom; +import org.semanticweb.owlapi.model.OWLIrreflexiveObjectPropertyAxiom; +import org.semanticweb.owlapi.model.OWLNegativeDataPropertyAssertionAxiom; +import org.semanticweb.owlapi.model.OWLNegativeObjectPropertyAssertionAxiom; +import org.semanticweb.owlapi.model.OWLObjectAllValuesFrom; +import org.semanticweb.owlapi.model.OWLObjectComplementOf; +import org.semanticweb.owlapi.model.OWLObjectExactCardinality; +import org.semanticweb.owlapi.model.OWLObjectHasSelf; +import org.semanticweb.owlapi.model.OWLObjectHasValue; +import org.semanticweb.owlapi.model.OWLObjectIntersectionOf; +import org.semanticweb.owlapi.model.OWLObjectMaxCardinality; +import org.semanticweb.owlapi.model.OWLObjectMinCardinality; +import org.semanticweb.owlapi.model.OWLObjectOneOf; +import org.semanticweb.owlapi.model.OWLObjectPropertyAssertionAxiom; +import org.semanticweb.owlapi.model.OWLObjectPropertyDomainAxiom; +import org.semanticweb.owlapi.model.OWLObjectPropertyRangeAxiom; +import org.semanticweb.owlapi.model.OWLObjectSomeValuesFrom; +import org.semanticweb.owlapi.model.OWLObjectUnionOf; +import org.semanticweb.owlapi.model.OWLReflexiveObjectPropertyAxiom; +import org.semanticweb.owlapi.model.OWLSameIndividualAxiom; +import org.semanticweb.owlapi.model.OWLSubAnnotationPropertyOfAxiom; +import org.semanticweb.owlapi.model.OWLSubClassOfAxiom; +import org.semanticweb.owlapi.model.OWLSubDataPropertyOfAxiom; +import org.semanticweb.owlapi.model.OWLSubObjectPropertyOfAxiom; +import org.semanticweb.owlapi.model.OWLSubPropertyChainOfAxiom; +import org.semanticweb.owlapi.model.OWLSymmetricObjectPropertyAxiom; +import org.semanticweb.owlapi.model.OWLTransitiveObjectPropertyAxiom; +import org.semanticweb.owlapi.model.SWRLRule; + +import arq.cmdline.ModAlgebra; + +/** + * @author Lorenz Buehmann + * + */ +public class MaximumModalDepthDetector implements OWLAxiomVisitor, OWLClassExpressionVisitor{ + + int maxModalDepth; + + public static int getMaxModalDepth(OWLAxiom axiom){ + MaximumModalDepthDetector depthDetector = new MaximumModalDepthDetector(); + axiom.accept(depthDetector); + return depthDetector.maxModalDepth; + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLAnnotationAxiomVisitor#visit(org.semanticweb.owlapi.model.OWLAnnotationAssertionAxiom) + */ + @Override + public void visit(OWLAnnotationAssertionAxiom axiom) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLAnnotationAxiomVisitor#visit(org.semanticweb.owlapi.model.OWLSubAnnotationPropertyOfAxiom) + */ + @Override + public void visit(OWLSubAnnotationPropertyOfAxiom axiom) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLAnnotationAxiomVisitor#visit(org.semanticweb.owlapi.model.OWLAnnotationPropertyDomainAxiom) + */ + @Override + public void visit(OWLAnnotationPropertyDomainAxiom axiom) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLAnnotationAxiomVisitor#visit(org.semanticweb.owlapi.model.OWLAnnotationPropertyRangeAxiom) + */ + @Override + public void visit(OWLAnnotationPropertyRangeAxiom axiom) { + } + + + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLAxiomVisitor#visit(org.semanticweb.owlapi.model.OWLDeclarationAxiom) + */ + @Override + public void visit(OWLDeclarationAxiom axiom) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLAxiomVisitor#visit(org.semanticweb.owlapi.model.OWLSubClassOfAxiom) + */ + @Override + public void visit(OWLSubClassOfAxiom axiom) { + OWLClassExpression subClass = axiom.getSubClass(); + subClass.accept(this); + int tmp = maxModalDepth; + maxModalDepth = 1; + OWLClassExpression superClass = axiom.getSuperClass(); + superClass.accept(this); + maxModalDepth = Math.max(tmp, maxModalDepth); + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLAxiomVisitor#visit(org.semanticweb.owlapi.model.OWLNegativeObjectPropertyAssertionAxiom) + */ + @Override + public void visit(OWLNegativeObjectPropertyAssertionAxiom axiom) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLAxiomVisitor#visit(org.semanticweb.owlapi.model.OWLAsymmetricObjectPropertyAxiom) + */ + @Override + public void visit(OWLAsymmetricObjectPropertyAxiom axiom) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLAxiomVisitor#visit(org.semanticweb.owlapi.model.OWLReflexiveObjectPropertyAxiom) + */ + @Override + public void visit(OWLReflexiveObjectPropertyAxiom axiom) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLAxiomVisitor#visit(org.semanticweb.owlapi.model.OWLDisjointClassesAxiom) + */ + @Override + public void visit(OWLDisjointClassesAxiom axiom) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLAxiomVisitor#visit(org.semanticweb.owlapi.model.OWLDataPropertyDomainAxiom) + */ + @Override + public void visit(OWLDataPropertyDomainAxiom axiom) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLAxiomVisitor#visit(org.semanticweb.owlapi.model.OWLObjectPropertyDomainAxiom) + */ + @Override + public void visit(OWLObjectPropertyDomainAxiom axiom) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLAxiomVisitor#visit(org.semanticweb.owlapi.model.OWLEquivalentObjectPropertiesAxiom) + */ + @Override + public void visit(OWLEquivalentObjectPropertiesAxiom axiom) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLAxiomVisitor#visit(org.semanticweb.owlapi.model.OWLNegativeDataPropertyAssertionAxiom) + */ + @Override + public void visit(OWLNegativeDataPropertyAssertionAxiom axiom) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLAxiomVisitor#visit(org.semanticweb.owlapi.model.OWLDifferentIndividualsAxiom) + */ + @Override + public void visit(OWLDifferentIndividualsAxiom axiom) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLAxiomVisitor#visit(org.semanticweb.owlapi.model.OWLDisjointDataPropertiesAxiom) + */ + @Override + public void visit(OWLDisjointDataPropertiesAxiom axiom) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLAxiomVisitor#visit(org.semanticweb.owlapi.model.OWLDisjointObjectPropertiesAxiom) + */ + @Override + public void visit(OWLDisjointObjectPropertiesAxiom axiom) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLAxiomVisitor#visit(org.semanticweb.owlapi.model.OWLObjectPropertyRangeAxiom) + */ + @Override + public void visit(OWLObjectPropertyRangeAxiom axiom) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLAxiomVisitor#visit(org.semanticweb.owlapi.model.OWLObjectPropertyAssertionAxiom) + */ + @Override + public void visit(OWLObjectPropertyAssertionAxiom axiom) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLAxiomVisitor#visit(org.semanticweb.owlapi.model.OWLFunctionalObjectPropertyAxiom) + */ + @Override + public void visit(OWLFunctionalObjectPropertyAxiom axiom) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLAxiomVisitor#visit(org.semanticweb.owlapi.model.OWLSubObjectPropertyOfAxiom) + */ + @Override + public void visit(OWLSubObjectPropertyOfAxiom axiom) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLAxiomVisitor#visit(org.semanticweb.owlapi.model.OWLDisjointUnionAxiom) + */ + @Override + public void visit(OWLDisjointUnionAxiom axiom) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLAxiomVisitor#visit(org.semanticweb.owlapi.model.OWLSymmetricObjectPropertyAxiom) + */ + @Override + public void visit(OWLSymmetricObjectPropertyAxiom axiom) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLAxiomVisitor#visit(org.semanticweb.owlapi.model.OWLDataPropertyRangeAxiom) + */ + @Override + public void visit(OWLDataPropertyRangeAxiom axiom) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLAxiomVisitor#visit(org.semanticweb.owlapi.model.OWLFunctionalDataPropertyAxiom) + */ + @Override + public void visit(OWLFunctionalDataPropertyAxiom axiom) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLAxiomVisitor#visit(org.semanticweb.owlapi.model.OWLEquivalentDataPropertiesAxiom) + */ + @Override + public void visit(OWLEquivalentDataPropertiesAxiom axiom) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLAxiomVisitor#visit(org.semanticweb.owlapi.model.OWLClassAssertionAxiom) + */ + @Override + public void visit(OWLClassAssertionAxiom axiom) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLAxiomVisitor#visit(org.semanticweb.owlapi.model.OWLEquivalentClassesAxiom) + */ + @Override + public void visit(OWLEquivalentClassesAxiom axiom) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLAxiomVisitor#visit(org.semanticweb.owlapi.model.OWLDataPropertyAssertionAxiom) + */ + @Override + public void visit(OWLDataPropertyAssertionAxiom axiom) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLAxiomVisitor#visit(org.semanticweb.owlapi.model.OWLTransitiveObjectPropertyAxiom) + */ + @Override + public void visit(OWLTransitiveObjectPropertyAxiom axiom) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLAxiomVisitor#visit(org.semanticweb.owlapi.model.OWLIrreflexiveObjectPropertyAxiom) + */ + @Override + public void visit(OWLIrreflexiveObjectPropertyAxiom axiom) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLAxiomVisitor#visit(org.semanticweb.owlapi.model.OWLSubDataPropertyOfAxiom) + */ + @Override + public void visit(OWLSubDataPropertyOfAxiom axiom) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLAxiomVisitor#visit(org.semanticweb.owlapi.model.OWLInverseFunctionalObjectPropertyAxiom) + */ + @Override + public void visit(OWLInverseFunctionalObjectPropertyAxiom axiom) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLAxiomVisitor#visit(org.semanticweb.owlapi.model.OWLSameIndividualAxiom) + */ + @Override + public void visit(OWLSameIndividualAxiom axiom) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLAxiomVisitor#visit(org.semanticweb.owlapi.model.OWLSubPropertyChainOfAxiom) + */ + @Override + public void visit(OWLSubPropertyChainOfAxiom axiom) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLAxiomVisitor#visit(org.semanticweb.owlapi.model.OWLInverseObjectPropertiesAxiom) + */ + @Override + public void visit(OWLInverseObjectPropertiesAxiom axiom) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLAxiomVisitor#visit(org.semanticweb.owlapi.model.OWLHasKeyAxiom) + */ + @Override + public void visit(OWLHasKeyAxiom axiom) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLAxiomVisitor#visit(org.semanticweb.owlapi.model.OWLDatatypeDefinitionAxiom) + */ + @Override + public void visit(OWLDatatypeDefinitionAxiom axiom) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLAxiomVisitor#visit(org.semanticweb.owlapi.model.SWRLRule) + */ + @Override + public void visit(SWRLRule axiom) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLClassExpressionVisitor#visit(org.semanticweb.owlapi.model.OWLClass) + */ + @Override + public void visit(OWLClass ce) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLClassExpressionVisitor#visit(org.semanticweb.owlapi.model.OWLObjectIntersectionOf) + */ + @Override + public void visit(OWLObjectIntersectionOf ce) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLClassExpressionVisitor#visit(org.semanticweb.owlapi.model.OWLObjectUnionOf) + */ + @Override + public void visit(OWLObjectUnionOf ce) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLClassExpressionVisitor#visit(org.semanticweb.owlapi.model.OWLObjectComplementOf) + */ + @Override + public void visit(OWLObjectComplementOf ce) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLClassExpressionVisitor#visit(org.semanticweb.owlapi.model.OWLObjectSomeValuesFrom) + */ + @Override + public void visit(OWLObjectSomeValuesFrom ce) { + maxModalDepth++; + ce.getFiller().accept(this); + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLClassExpressionVisitor#visit(org.semanticweb.owlapi.model.OWLObjectAllValuesFrom) + */ + @Override + public void visit(OWLObjectAllValuesFrom ce) { + maxModalDepth++; + ce.getFiller().accept(this); + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLClassExpressionVisitor#visit(org.semanticweb.owlapi.model.OWLObjectHasValue) + */ + @Override + public void visit(OWLObjectHasValue ce) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLClassExpressionVisitor#visit(org.semanticweb.owlapi.model.OWLObjectMinCardinality) + */ + @Override + public void visit(OWLObjectMinCardinality ce) { + maxModalDepth++; + ce.getFiller().accept(this); + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLClassExpressionVisitor#visit(org.semanticweb.owlapi.model.OWLObjectExactCardinality) + */ + @Override + public void visit(OWLObjectExactCardinality ce) { + maxModalDepth++; + ce.getFiller().accept(this); + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLClassExpressionVisitor#visit(org.semanticweb.owlapi.model.OWLObjectMaxCardinality) + */ + @Override + public void visit(OWLObjectMaxCardinality ce) { + maxModalDepth++; + ce.getFiller().accept(this); + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLClassExpressionVisitor#visit(org.semanticweb.owlapi.model.OWLObjectHasSelf) + */ + @Override + public void visit(OWLObjectHasSelf ce) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLClassExpressionVisitor#visit(org.semanticweb.owlapi.model.OWLObjectOneOf) + */ + @Override + public void visit(OWLObjectOneOf ce) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLClassExpressionVisitor#visit(org.semanticweb.owlapi.model.OWLDataSomeValuesFrom) + */ + @Override + public void visit(OWLDataSomeValuesFrom ce) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLClassExpressionVisitor#visit(org.semanticweb.owlapi.model.OWLDataAllValuesFrom) + */ + @Override + public void visit(OWLDataAllValuesFrom ce) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLClassExpressionVisitor#visit(org.semanticweb.owlapi.model.OWLDataHasValue) + */ + @Override + public void visit(OWLDataHasValue ce) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLClassExpressionVisitor#visit(org.semanticweb.owlapi.model.OWLDataMinCardinality) + */ + @Override + public void visit(OWLDataMinCardinality ce) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLClassExpressionVisitor#visit(org.semanticweb.owlapi.model.OWLDataExactCardinality) + */ + @Override + public void visit(OWLDataExactCardinality ce) { + } + + /* (non-Javadoc) + * @see org.semanticweb.owlapi.model.OWLClassExpressionVisitor#visit(org.semanticweb.owlapi.model.OWLDataMaxCardinality) + */ + @Override + public void visit(OWLDataMaxCardinality ce) { + } + + +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/pattern/PatternBasedAxiomLearningAlgorithm.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/pattern/PatternBasedAxiomLearningAlgorithm.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/pattern/PatternBasedAxiomLearningAlgorithm.java 2013-09-14 06:45:10 UTC (rev 4108) @@ -0,0 +1,273 @@ +/** + * + */ +package org.dllearner.algorithms.pattern; + +import java.io.ByteArrayInputStream; +import java.io.InputStream; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.concurrent.TimeUnit; + +import org.dllearner.core.AbstractAxiomLearningAlgorithm; +import org.dllearner.core.Score; +import org.dllearner.core.owl.Axiom; +import org.dllearner.core.owl.NamedClass; +import org.dllearner.kb.SparqlEndpointKS; +import org.dllearner.kb.sparql.SparqlEndpoint; +import org.dllearner.utilities.owl.DLLearnerAxiomConvertVisitor; +import org.dllearner.utilities.owl.OWLAPIAxiomConvertVisitor; +import org.dllearner.utilities.owl.OWLClassExpressionToSPARQLConverter; +import org.semanticweb.owlapi.model.AxiomType; +import org.semanticweb.owlapi.model.IRI; +import org.semanticweb.owlapi.model.OWLAnnotationProperty; +import org.semanticweb.owlapi.model.OWLAxiom; +import org.semanticweb.owlapi.model.OWLClass; +import org.semanticweb.owlapi.model.OWLClassExpression; +import org.semanticweb.owlapi.model.OWLDataFactory; +import org.semanticweb.owlapi.model.OWLEntity; +import org.semanticweb.owlapi.model.OWLEquivalentClassesAxiom; +import org.semanticweb.owlapi.model.OWLSubClassOfAxiom; +import org.semanticweb.owlapi.model.PrefixManager; +import org.semanticweb.owlapi.util.DefaultPrefixManager; +import org.semanticweb.owlapi.util.OWLObjectDuplicator; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl; + +import com.google.common.collect.HashMultiset; +import com.google.common.collect.Multiset; +import com.hp.hpl.jena.query.Query; +import com.hp.hpl.jena.query.QueryExecutionFactory; +import com.hp.hpl.jena.query.QuerySolution; +import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.query.ResultSetFormatter; +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.ModelFactory; +import com.hp.hpl.jena.rdf.model.Resource; +import com.hp.hpl.jena.vocabulary.RDF; + +/** + * @author Lorenz Buehmann + * + */ +public class PatternBasedAxiomLearningAlgorithm extends AbstractAxiomLearningAlgorithm{ + + private static final Logger logger = LoggerFactory.getLogger(PatternBasedAxiomLearningAlgorithm.class); + + + private Axiom pattern; + private NamedClass cls; + + private FragmentExtractor fragmentExtractor; + private OWLClassExpressionToSPARQLConverter converter = new OWLClassExpressionToSPARQLConverter(); + private OWLDataFactory dataFactory = new OWLDataFactoryImpl(); + + private OWLAnnotationProperty confidenceProperty = dataFactory.getOWLAnnotationProperty(IRI.create("http://dl-learner.org/pattern/confidence")); + + + private double threshold = 0.4; + + public PatternBasedAxiomLearningAlgorithm(SparqlEndpointKS ks, FragmentExtractionStrategy extractionStrategy) { + this(ks, null, extractionStrategy); + } + + public PatternBasedAxiomLearningAlgorithm(SparqlEndpointKS ks, String cacheDir, FragmentExtractionStrategy extractionStrategy) { + this.ks = ks; + + if(extractionStrategy == FragmentExtractionStrategy.TIME){ + fragmentExtractor = new TimeBasedFragmentExtractor(ks, cacheDir, 20, TimeUnit.SECONDS); + } else if(extractionStrategy == FragmentExtractionStrategy.INDIVIDUALS){ + fragmentExtractor = new IndividualBasedFragmentExtractor(ks, cacheDir, 20); + } + } + + /** + * @param pattern the pattern to set + */ + public void setPattern(Axiom pattern) { + this.pattern = pattern; + } + + /** + * @param cls the cls to set + */ + public void setClass(NamedClass cls) { + this.cls = cls; + } + + @Override + public void start() { + logger.info("Start learning..."); + + startTime = System.currentTimeMillis(); + + logger.info("Pattern: " + pattern); + + //get the maximum modal depth in the pattern axioms + int modalDepth = MaximumModalDepthDetector.getMaxModalDepth(OWLAPIAxiomConvertVisitor.convertAxiom(pattern)); + logger.info("Modal depth: " + modalDepth); + + //extract fragment + Model fragment = fragmentExtractor.extractFragment(cls, modalDepth); + + //try to find instantiation of the pattern with confidence above threshold + Set<OWLAxiom> instantiations = applyPattern(OWLAPIAxiomConvertVisitor.convertAxiom(pattern), dataFactory.getOWLClass(IRI.create(cls.getName())), fragment); + System.out.println(instantiations); + + logger.info("...finished in {}ms.", (System.currentTimeMillis()-startTime)); + } + + private Set<OWLAxiom> applyPattern(OWLAxiom pattern, OWLClass cls, Model fragment) { + Map<OWLAxiom, Score> axioms2Score = new HashMap<OWLAxiom, Score>(); + + OWLClassExpression patternSubClass = null; + OWLClassExpression patternSuperClass = null; + + if(pattern.isOfType(AxiomType.EQUIVALENT_CLASSES)){ + Set<OWLSubClassOfAxiom> subClassOfAxioms = ((OWLEquivalentClassesAxiom)pattern).asOWLSubClassOfAxioms(); + for (OWLSubClassOfAxiom axiom : subClassOfAxioms) { + if(!axiom.getSubClass().isAnonymous()){ + patternSubClass = axiom.getSubClass(); + patternSuperClass = axiom.getSuperClass(); + break; + } + } + } else if(pattern.isOfType(AxiomType.SUBCLASS_OF)){ + patternSubClass = ((OWLSubClassOfAxiom) pattern).getSubClass(); + patternSuperClass = ((OWLSubClassOfAxiom) pattern).getSuperClass(); + } else { + logger.warn("Pattern " + pattern + " not supported yet."); + return Collections.emptySet(); + } + + Set<OWLEntity> signature = patternSuperClass.getSignature(); + signature.remove(patternSubClass.asOWLClass()); + Query query = converter.asQuery("?x", dataFactory.getOWLObjectIntersectionOf(cls, patternSuperClass), signature); + logger.info("Running query\n" + query); + Map<OWLEntity, String> variablesMapping = converter.getVariablesMapping(); + com.hp.hpl.jena.query.ResultSet rs = QueryExecutionFactory.create(query, fragment).execSelect(); + QuerySolution qs; + Set<String> resources = new HashSet<String>(); + Multiset<OWLAxiom> instantiations = HashMultiset.create(); + while (rs.hasNext()) { + qs = rs.next(); + resources.add(qs.getResource("x").getURI()); + // get the IRIs for each variable + Map<OWLEntity, IRI> entity2IRIMap = new HashMap<OWLEntity, IRI>(); + entity2IRIMap.put(patternSubClass.asOWLClass(), cls.getIRI()); + boolean skip = false; + for (OWLEntity entity : signature) { + String var = variablesMapping.get(entity); + if(qs.get(var) == null){ + logger.warn("Variable " + var + " is not bound."); + skip = true; + break; + } + if(qs.get(var).isLiteral()){ + skip = true; + break; + } + Resource resource = qs.getResource(var); + if(entity.isOWLObjectProperty() && resource.hasURI(RDF.type.getURI())){ + skip = true; + break; + } + entity2IRIMap.put(entity, IRI.create(resource.getURI())); + } + if(!skip){ + // instantiate the pattern + OWLObjectDuplicator duplicator = new OWLObjectDuplicator(entity2IRIMap, dataFactory); + OWLAxiom patternInstantiation = duplicator.duplicateObject(pattern); + instantiations.add(patternInstantiation); + } + } + // compute the score + int total = resources.size(); + for (OWLAxiom axiom : instantiations.elementSet()) { + int frequency = instantiations.count(axiom); +// System.out.println(axiom + ":" + frequency); + Score score = computeScore(total, Math.min(total, frequency)); + axioms2Score.put(axiom, score); + } + + return asAnnotatedAxioms(axioms2Score); + } + + private Set<OWLAxiom> asAnnotatedAxioms(Map<OWLAxiom, Score> axioms2Score){ + Set<OWLAxiom> annotatedAxioms = new HashSet<OWLAxiom>(); + for (Entry<OWLAxiom, Score> entry : axioms2Score.entrySet()) { + OWLAxiom axiom = entry.getKey(); + Score score = entry.getValue(); + if(score.getAccuracy() >= threshold){ + annotatedAxioms.add(axiom.getAnnotatedAxiom( + Collections.singleton(dataFactory.getOWLAnnotation(confidenceProperty, dataFactory.getOWLLiteral(score.getAccuracy()))))); + + } + } + return annotatedAxioms; + } + + public static void main(String[] args) throws Exception { + OWLDataFactoryImpl df = new OWLDataFactoryImpl(); + PrefixManager pm = new DefaultPrefixManager("http://dllearner.org/pattern#"); + + Model model = ModelFactory.createDefaultModel(); + String triples = + "<http://ex.org/a> a <http://ex.org/A>."+ + "<http://ex.org/a> <http://ex.org/p> <http://ex.org/y1>."+ + "<http://ex.org/y1> a <http://ex.org/B>."+ + + "<http://ex.org/b> a <http://ex.org/A>."+ + "<http://ex.org/b> <http://ex.org/p> <http://ex.org/y2>."+ + "<http://ex.org/y2> a <http://ex.org/B>."+ + + "<http://ex.org/c> a <http://ex.org/A>." + ; + InputStream is = new ByteArrayInputStream( triples.getBytes("UTF-8")); + model.read(is, null, "TURTLE"); + + String query = "SELECT DISTINCT ?x WHERE { " + + "?x a <http://ex.org/A> .}"; + + ResultSet rs = QueryExecutionFactory.create(query, model).execSelect(); + System.out.println(ResultSetFormatter.asText(rs)); + + query = "SELECT DISTINCT ?p0 ?cls0 ?x WHERE { " + + "?x a <http://ex.org/A> ." + + "?x ?p0 ?s0 " + + " { SELECT ?x ?p0 ?cls0 (count(?s1) AS ?cnt1)" + + " WHERE" + + " { ?x ?p0 ?s1 ." + + " ?s1 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> ?cls0" + + " }" + + " GROUP BY ?x ?cls0 ?p0" + + " }" + + " { SELECT ?x ?p0 (count(?s2) AS ?cnt2)" + + " WHERE" + + " { ?x ?p0 ?s2 }" + + " GROUP BY ?x ?p0" + + " }" + + " FILTER ( ?cnt1 = ?cnt2 ) }"; + + rs = QueryExecutionFactory.create(query, model).execSelect(); + System.out.println(ResultSetFormatter.asText(rs)); + + SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpedia(); +// endpoint = SparqlEndpoint.getEndpointDBpediaLOD2Cloud(); +// endpoint = SparqlEndpoint.getEndpointDBpediaLiveAKSW(); + NamedClass cls = new NamedClass("http://dbpedia.org/ontology/SoccerPlayer"); + OWLAxiom pattern = df.getOWLSubClassOfAxiom(df.getOWLClass("A", pm), + df.getOWLObjectAllValuesFrom(df.getOWLObjectProperty("p", pm), df.getOWLClass("B", pm))); + + PatternBasedAxiomLearningAlgorithm la = new PatternBasedAxiomLearningAlgorithm(new SparqlEndpointKS(endpoint), "cache", FragmentExtractionStrategy.INDIVIDUALS); + la.setClass(cls); + la.setPattern(DLLearnerAxiomConvertVisitor.getDLLearnerAxiom(pattern)); + la.start(); + } +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/pattern/TimeBasedFragmentExtractor.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/pattern/TimeBasedFragmentExtractor.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/pattern/TimeBasedFragmentExtractor.java 2013-09-14 06:45:10 UTC (rev 4108) @@ -0,0 +1,119 @@ +/** + * + */ +package org.dllearner.algorithms.pattern; + +import java.sql.SQLException; +import java.util.concurrent.TimeUnit; + +import org.aksw.jena_sparql_api.cache.core.QueryExecutionFactoryCacheEx; +import org.aksw.jena_sparql_api.cache.extra.CacheCoreEx; +import org.aksw.jena_sparql_api.cache.extra.CacheCoreH2; +import org.aksw.jena_sparql_api.cache.extra.CacheEx; +import org.aksw.jena_sparql_api.cache.extra.CacheExImpl; +import org.aksw.jena_sparql_api.core.QueryExecutionFactory; +import org.aksw.jena_sparql_api.pagination.core.PaginationUtils; +import org.dllearner.core.owl.NamedClass; +import org.dllearner.kb.SparqlEndpointKS; +import org.dllearner.kb.sparql.QueryExecutionFactoryHttp; +import org.dllearner.kb.sparql.SparqlEndpoint; + +import com.hp.hpl.jena.query.ParameterizedSparqlString; +import com.hp.hpl.jena.query.Query; +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.ModelFactory; + +/** + * @author Lorenz Buehmann + * + */ +public class TimeBasedFragmentExtractor implements FragmentExtractor{ + + public static final FragmentExtractionStrategy extractionStrategy = FragmentExtractionStrategy.TIME; + private SparqlEndpointKS ks; + private QueryExecutionFactory qef; + + private long maxExecutionTimeInMilliseconds; + private long startTime; + + public TimeBasedFragmentExtractor(SparqlEndpointKS ks, String cacheDir, int maxExecutionTimeInMilliseconds, TimeUnit timeUnit) { + this.ks = ks; + this.maxExecutionTimeInMilliseconds = timeUnit.toMillis(maxExecutionTimeInMilliseconds); + + SparqlEndpoint endpoint = ks.getEndpoint(); + + qef = new QueryExecutionFactoryHttp(endpoint.getURL().toString(), endpoint.getDefaultGraphURIs()); + if(cacheDir != null){ + try { + long timeToLive = TimeUnit.DAYS.toMillis(30); + CacheCoreEx cacheBackend = CacheCoreH2.create(cacheDir, timeToLive, true); + CacheEx cacheFrontend = new CacheExImpl(cacheBackend); + qef = new QueryExecutionFactoryCacheEx(qef, cacheFrontend); + } catch (ClassNotFoundException e) { + e.printStackTrace(); + } catch (SQLException e) { + e.printStackTrace(); + } + } + } + + public TimeBasedFragmentExtractor(SparqlEndpointKS ks, int maxExecutionTimeInMilliseconds, TimeUnit timeUnit) { + this(ks, null, maxExecutionTimeInMilliseconds, timeUnit); + } + + /* (non-Javadoc) + * @see org.dllearner.algorithms.pattern.FragmentExtractor#extractFragment(org.dllearner.core.owl.NamedClass) + */ + @Override + public Model extractFragment(NamedClass cls, int maxFragmentDepth) { + startTime = System.currentTimeMillis(); + Model fragment = ModelFactory.createDefaultModel(); + + Query query = buildConstructQuery(cls, maxFragmentDepth); + + long pageSize = PaginationUtils.adjustPageSize(qef, 10000); + query.setLimit(pageSize); + int offset = 0; + while(getRemainingRuntime() > 0){ + query.setOffset(offset);System.out.println(query); + Model model = qef.createQueryExecution(query).execConstruct(); + fragment.add(model); + offset += pageSize; + } + return fragment; + } + + private Query buildConstructQuery(NamedClass cls, int depth){ + StringBuilder sb = new StringBuilder(); + int maxVarCnt = 0; + sb.append("CONSTRUCT {\n"); + sb.append("?s").append("?p0 ").append("?o0").append(".\n"); + for(int i = 1; i < depth-1; i++){ + sb.append("?o").append(i-1).append(" ").append("?p").append(i).append(" ").append("?o").append(i).append(".\n"); + maxVarCnt++; + } + sb.append("?o").append(maxVarCnt).append(" a ?type.\n"); + sb.append("}\n"); + sb.append("WHERE {\n"); + sb.append("?s a ?cls."); + sb.append("?s").append("?p0 ").append("?o0").append(".\n"); + for(int i = 1; i < depth-1; i++){ + sb.append("OPTIONAL{\n"); + sb.append("?o").append(i-1).append(" ").append("?p").append(i).append(" ").append("?o").append(i).append(".\n"); + } + sb.append("OPTIONAL{?o").append(maxVarCnt).append(" a ?type}.\n"); + for(int i = 1; i < depth-1; i++){ + sb.append("}"); + } + + sb.append("}\n"); + ParameterizedSparqlString template = new ParameterizedSparqlString(sb.toString()); + template.setIri("cls", cls.getName()); + return template.asQuery(); + } + + private long getRemainingRuntime(){ + return maxExecutionTimeInMilliseconds - (System.currentTimeMillis() - startTime); + } + +} Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/datastructures/QueryTree.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/datastructures/QueryTree.java 2013-09-10 15:52:48 UTC (rev 4107) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/datastructures/QueryTree.java 2013-09-14 06:45:10 UTC (rev 4108) @@ -59,11 +59,11 @@ boolean isLiteralNode(); - void setLiteralNode(boolean isLiteralNode); + void setIsLiteralNode(boolean isLiteralNode); boolean isResourceNode(); - void setResourceNode(boolean isResourceNode); + void setIsResourceNode(boolean isResourceNode); boolean isVarNode(); Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/datastructures/impl/QueryTreeImpl.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/datastructures/impl/QueryTreeImpl.java 2013-09-10 15:52:48 UTC (rev 4107) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/datastructures/impl/QueryTreeImpl.java 2013-09-14 06:45:10 UTC (rev 4108) @@ -70,6 +70,10 @@ */ public class QueryTreeImpl<N> implements QueryTree<N>{ + enum NodeType{ + RESOURCE, LITERAL, BLANK, VARIABLE; + } + private N userObject; private QueryTreeImpl<N> parent; @@ -119,8 +123,8 @@ for(QueryTree<N> child : tree.getChildren()){ subTree = new QueryTreeImpl<N>(child); subTree.setId(child.getId()); - subTree.setLiteralNode(child.isLiteralNode()); - subTree.setResourceNode(child.isResourceNode()); + subTree.setIsLiteralNode(child.isLiteralNode()); + subTree.setIsResourceNode(child.isResourceNode()); addChild(subTree, tree.getEdge(child)); } } @@ -175,11 +179,11 @@ } @Override - public void setLiteralNode(boolean isLiteralNode) { + public void setIsLiteralNode(boolean isLiteralNode) { this.isLiteralNode = isLiteralNode; } - public void setBlankNode(boolean isBlankNode) { + public void setIsBlankNode(boolean isBlankNode) { this.isBlankNode = isBlankNode; } @@ -193,7 +197,7 @@ } @Override - public void setResourceNode(boolean isResourceNode) { + public void setIsResourceNode(boolean isResourceNode) { this.isResourceNode = isResourceNode; } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/impl/QueryTreeFactoryImpl.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/impl/QueryTreeFactoryImpl.java 2013-09-10 15:52:48 UTC (rev 4107) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/impl/QueryTreeFactoryImpl.java 2013-09-14 06:45:10 UTC (rev 4108) @@ -243,7 +243,7 @@ subTree = new QueryTreeImpl<String>(sb.toString()); // subTree = new QueryTreeImpl<String>(lit.toString()); subTree.setId(nodeId++); - subTree.setLiteralNode(true); + subTree.setIsLiteralNode(true); if(lit.getDatatype() == XSDDatatype.XSDinteger || lit.getDatatype() == XSDDatatype.XSDdouble || lit.getDatatype() == XSDDatatype.XSDdate @@ -258,13 +258,13 @@ } else if(objectFilter.isRelevantResource(object.asResource().getURI())){ if(!tree.getUserObjectPathToRoot().contains(st.getObject().toString())){ subTree = new QueryTreeImpl<String>(st.getObject().toString()); - subTree.setResourceNode(true); + subTree.setIsResourceNode(true); tree.addChild(subTree, st.getPredicate().toString()); if(depth < maxDepth){ fillTree(subTree, resource2Statements, depth); } if(object.isAnon()){ - subTree.setBlankNode(true); + subTree.setIsBlankNode(true); } } @@ -272,7 +272,7 @@ if(depth < maxDepth && !tree.getUserObjectPathToRoot().contains(st.getObject().toString())){ subTree = new QueryTreeImpl<String>(st.getObject().toString()); - subTree.setResourceNode(true); + subTree.setIsResourceNode(true); tree.addChild(subTree, st.getPredicate().toString()); fillTree(subTree, resource2Statements, depth); } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/impl/QueryTreeFactoryImpl2.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/impl/QueryTreeFactoryImpl2.java 2013-09-10 15:52:48 UTC (rev 4107) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/impl/QueryTreeFactoryImpl2.java 2013-09-14 06:45:10 UTC (rev 4108) @@ -243,13 +243,13 @@ subTree = new QueryTreeImpl<String>(sb.toString()); // subTree = new QueryTreeImpl<String>(lit.toString()); subTree.setId(nodeId++); - subTree.setLiteralNode(true); + subTree.setIsLiteralNode(true); tree.addChild(subTree, st.getPredicate().toString()); } else if(objectFilter.isRelevantResource(object.asResource().getURI())){ if(tree.getUserObjectPathToRoot().size() < 3 && !tree.getUserObjectPathToRoot().contains(st.getObject().toString())){ subTree = new QueryTreeImpl<String>(st.getObject().toString()); - subTree.setResourceNode(true); + subTree.setIsResourceNode(true); tree.addChild(subTree, st.getPredicate().toString()); fillTree(subTree, resource2Statements); } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/NBR.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/NBR.java 2013-09-10 15:52:48 UTC (rev 4107) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/NBR.java 2013-09-14 06:45:10 UTC (rev 4108) @@ -558,8 +558,8 @@ genTree.addChange(new QueryTreeChange(node.getId(), ChangeType.REPLACE_LABEL)); genTree2Sum.put(genTree, sum(matrix.get(node))); node.setUserObject(label); - node.setLiteralNode(isLiteralNode); - node.setResourceNode(!isLiteralNode); + node.setIsLiteralNode(isLiteralNode); + node.setIsResourceNode(!isLiteralNode); } } } @@ -636,8 +636,8 @@ genTree.addChange(new QueryTreeChange(child.getId(), ChangeType.REPLACE_LABEL)); gens.add(genTree); child.setUserObject(label); - child.setLiteralNode(isLiteralNode); - child.setResourceNode(!isLiteralNode); + child.setIsLiteralNode(isLiteralNode); + child.setIsResourceNode(!isLiteralNode); for(QueryTree<N> c : child.getChildren()){ N oldLabel = node2Label.get(c.getId()); if(oldLabel != null){ @@ -814,8 +814,8 @@ } this.nodeId++; subTree = createFilteredTree(child); - subTree.setLiteralNode(child.isLiteralNode()); - subTree.setResourceNode(child.isResourceNode()); + subTree.setIsLiteralNode(child.isLiteralNode()); + subTree.setIsResourceNode(child.isResourceNode()); filteredTree.addChild((QueryTreeImpl<N>)subTree, tree.getEdge(child)); } return filteredTree; Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/EvaluatedQueryTree.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/EvaluatedQueryTree.java 2013-09-10 15:52:48 UTC (rev 4107) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/EvaluatedQueryTree.java 2013-09-14 06:45:10 UTC (rev 4108) @@ -7,12 +7,14 @@ public class EvaluatedQueryTree<N> implements Comparable<EvaluatedQueryTree<N>>{ private QueryTree<N> tree; - private Collection<QueryTree<N>> uncoveredExamples; + private Collection<QueryTree<N>> falseNegatives; + private Collection<QueryTree<N>> falsePositives; private double score; - public EvaluatedQueryTree(QueryTree<N> tree, Collection<QueryTree<N>> uncoveredExamples, double score) { + public EvaluatedQueryTree(QueryTree<N> tree, Collection<QueryTree<N>> falseNegatives, Collection<QueryTree<N>> falsePositives, double score) { this.tree = tree; - this.uncoveredExamples = uncoveredExamples; + this.falseNegatives = falseNegatives; + this.falsePositives = falsePositives; this.score = score; } @@ -20,10 +22,20 @@ return tree; } - public Collection<QueryTree<N>> getUncoveredExamples() { - return uncoveredExamples; + /** + * @return the falseNegatives + */ + public Collection<QueryTree<N>> getFalseNegatives() { + return falseNegatives; } + /** + * @return the falsePositives + */ + public Collection<QueryTree<N>> getFalsePositives() { + return falsePositives; + } + public double getScore() { return score; } @@ -39,4 +51,12 @@ return 1; } } + + /* (non-Javadoc) + * @see java.lang.Object#toString() + */ + @Override + public String toString() { + return "QueryTree(Score:" + score + ")\n" + tree.getStringRepresentation(); + } } \ No newline at end of file Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/LGGGeneratorImpl.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/LGGGeneratorImpl.java 2013-09-10 15:52:48 UTC (rev 4107) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/LGGGeneratorImpl.java 2013-09-14 06:45:10 UTC (rev 4108) @@ -48,6 +48,8 @@ private Logger logger = Logger.getLogger(LGGGeneratorImpl.class); private int nodeId; + + private int calls = 0; @Override public QueryTree<N> getLGG(QueryTree<N> tree1, QueryTree<N> tree2) { @@ -58,11 +60,13 @@ public QueryTree<N> getLGG(QueryTree<N> tree1, QueryTree<N> tree2, boolean learnFilters) { nodeId = 0; + calls = 0; Monitor mon = MonitorFactory.getTimeMonitor("LGG"); mon.start(); QueryTree<N> lgg = computeLGG(tree1, tree2, learnFilters); mon.stop(); addNumbering(lgg); + System.out.println("Calls needed: " + calls); return lgg; } @@ -97,13 +101,13 @@ } Monitor mon = MonitorFactory.getTimeMonitor("LGG"); mon.start(); - QueryTree<N> lgg = computeLGG(treeList.get(0), treeList.get(1), learnFilters); + QueryTree<N> lgg = getLGG(treeList.get(0), treeList.get(1), learnFilters); if(logger.isDebugEnabled()){ logger.debug("LGG for 1 and 2:\n" + lgg.getStringRepresentation()); } for(int i = 2; i < treeList.size(); i++){ - lgg = computeLGG(lgg, treeList.get(i), learnFilters); + lgg = getLGG(lgg, treeList.get(i), learnFilters); if(logger.isDebugEnabled()){ logger.debug("LGG for 1-" + (i+1) + ":\n" + lgg.getStringRepresentation()); } @@ -118,7 +122,8 @@ return lgg; } - private QueryTree<N> computeLGG(QueryTree<N> tree1, QueryTree<N> tree2, boolean learnFilters){System.out.println("call"); + private QueryTree<N> computeLGG(QueryTree<N> tree1, QueryTree<N> tree2, boolean learnFilters){ + calls++; if(logger.isDebugEnabled()){ logger.debug("Computing LGG for"); logger.debug(tree1.getStringRepresentation()); @@ -126,9 +131,20 @@ logger.debug(tree2.getStringRepresentation()); } - QueryTree<N> lgg = new QueryTreeImpl<N>(tree1.getUserObject()); + + QueryTree<N> lgg; + //firstly, we check if both root nodes are resource nodes and have the same URI, i.e. the trees describe the same resource + //if YES all child nodes should be also the same and we can just return one of the two tree as LGG + if(tree1.isResourceNode() && tree2.isResourceNode() && tree1.getUserObject().equals(tree2.getUserObject())){ + if(logger.isDebugEnabled()){ + logger.debug("Early termination. Tree 1(" + tree1 + ") and tree 2(" + tree2 + ") describe the same resource."); + } + return tree1; + } + //if NO we have to create a new tree as LGG and compute the LGG for the all child node pairs having the same edge to the parent nodes + lgg = new QueryTreeImpl<N>(tree1.getUserObject()); if(tree1.isResourceNode() && tree2.isResourceNode()){ - lgg.setResourceNode(true); + lgg.setIsResourceNode(true); } @@ -153,8 +169,8 @@ // } if(!lgg.sameType(tree2) || !lgg.getUserObject().equals(tree2.getUserObject())){ lgg.setUserObject((N)"?"); - lgg.setLiteralNode(false); - lgg.setResourceNode(false); + lgg.setIsLiteralNode(false); + lgg.setIsResourceNode(false); } if(tree1.isLiteralNode() && tree2.isLiteralNode()){ @@ -165,14 +181,14 @@ ((QueryTreeImpl<N>)lgg).addLiterals(((QueryTreeImpl<N>)tree1).getLiterals()); ((QueryTreeImpl<N>)lgg).addLiterals(((QueryTreeImpl<N>)tree2).getLiterals()); } - lgg.setLiteralNode(true); + lgg.setIsLiteralNode(true); } Set<QueryTreeImpl<N>> addedChildren; QueryTreeImpl<N> lggChild; for(Object edge : new TreeSet<Object>(tree1.getEdges())){ if(logger.isTraceEnabled()){ - logger.trace("Regarding egde: " + edge); + logger.trace("Analyzing egde: " + edge); } addedChildren = new HashSet<QueryTreeImpl<N>>(); for(QueryTree<N> child1 : tree1.getChildren(edge)){ Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/NoiseSensitiveLGG.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/NoiseSensitiveLGG.java 2013-09-10 15:52:48 UTC (rev 4107) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/NoiseSensitiveLGG.java 2013-09-14 06:45:10 UTC (rev 4108) @@ -2,6 +2,7 @@ import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.List; import java.util.PriorityQueue; import java.util.Queue; @@ -15,6 +16,8 @@ import com.jamonapi.Monitor; import com.jamonapi.MonitorFactory; +import edu.stanford.nlp.util.Sets; + public class NoiseSensitiveLGG<N> { @@ -25,31 +28,50 @@ private Queue<EvaluatedQueryTree<N>> todoList; private SortedSet<EvaluatedQueryTree<N>> solutions; + private double currentlyBestScore = 0d; + public NoiseSensitiveLGG() { } - public List<EvaluatedQueryTree<N>> computeLGG(List<QueryTree<N>> trees){ + public List<EvaluatedQueryTree<N>> computeLGG(List<QueryTree<N>> posExampleTrees){ + return computeLGG(posExampleTrees, Collections.<QueryTree<N>>emptyList()); + } + + public List<EvaluatedQueryTree<N>> computeLGG(List<QueryTree<N>> posExamples, List<QueryTree<N>> negExamples){ + currentlyBestScore = 0d; Monitor subMon = MonitorFactory.getTimeMonitor("subsumption-mon"); Monitor lggMon = MonitorFactory.getTimeMonitor("lgg-mon"); - init(trees); + init(posExamples, negExamples); EvaluatedQueryTree<N> currentElement; do{ logger.trace("TODO list size: " + todoList.size()); //pick best element from todo list currentElement = todoList.poll(); - for (QueryTree<N> example : currentElement.getUncoveredExamples()) { + for (QueryTree<N> example : currentElement.getFalseNegatives()) { QueryTree<N> tree = currentElement.getTree(); //compute the LGG lggMon.start(); QueryTree<N> lgg = lggGenerator.getLGG(tree, example); lggMon.stop(); - //compute examples which are not covered by LGG - Collection<QueryTree<N>> uncoveredExamples = getUncoveredTrees(lgg, trees); + //compute positive examples which are not covered by LGG + Collection<QueryTree<N>> uncoveredPositiveExamples = getUncoveredTrees(lgg, posExamples); + //compute negative examples which are covered by LGG + Collection<QueryTree<N>> coveredNegativeExamples = getCoveredTrees(lgg, negExamples); //compute score - double score = Heuristics.getConfidenceInterval95WaldAverage(trees.size(), trees.size() - uncoveredExamples.size()); - //add to todo list, if not already contained in todo list or solution list - EvaluatedQueryTree<N> solution = new EvaluatedQueryTree<N>(lgg, uncoveredExamples, score); - todo(solution); + int coveredPositiveExamples = posExamples.size() - uncoveredPositiveExamples.size(); + double recall = coveredPositiveExamples / (double)posExamples.size(); + double precision = (coveredNegativeExamples.size() + coveredPositiveExamples == 0) + ? 0 + : coveredPositiveExamples / (double)(coveredPositiveExamples + coveredNegativeExamples.size()); + + double score = Heuristics.getFScore(recall, precision); + if(score > currentlyBestScore){ + //add to todo list, if not already contained in todo list or solution list + EvaluatedQueryTree<N> solution = new EvaluatedQueryTree<N>(lgg, uncoveredPositiveExamples, coveredNegativeExamples, score); + todo(solution); + currentlyBestScore = score; + } + } solutions.add(currentElement); // todoList.remove(currentElement); @@ -63,6 +85,12 @@ return new ArrayList<EvaluatedQueryTree<N>>(solutions); } + /** + * Return all trees from the given list {@code allTrees} which are not already subsumed by {@code tree}. + * @param tree + * @param allTrees + * @return + */ private Collection<QueryTree<N>> getUncoveredTrees(QueryTree<N> tree, List<QueryTree<N>> allTrees){ Collection<QueryTree<N>> uncoveredTrees = new ArrayList<QueryTree<N>>(); for (QueryTree<N> queryTree : allTrees) { @@ -74,14 +102,36 @@ return uncoveredTrees; } - private void init(List<QueryTree<N>> trees){ + /** + * Return all trees from the given list {@code allTrees} which are not already subsumed by {@code tree}. + * @param tree + * @param allTrees + * @return + */ + private Collection<QueryTree<N>> getCoveredTrees(QueryTree<N> tree, List<QueryTree<N>> trees){ + Collection<QueryTree<N>> coveredTrees = new ArrayList<QueryTree<N>>(); + for (QueryTree<N> queryTree : trees) { + boolean subsumed = queryTree.isSubsumedBy(tree); + if(subsumed){ + coveredTrees.add(queryTree); + } + } + return coveredTrees; + } + + /** + * Initializes the todo list with all distinct trees contained in the given list {@code trees}. + * Firstly, distinct trees are computed and afterwards, for each tree a score is computed. + * @param trees + */ + private void init(List<QueryTree<N>> posExamples, List<QueryTree<N>> negExamples){ todoList = new PriorityQueue<EvaluatedQueryTree<N>>(); solutions = new TreeSet<EvaluatedQueryTree<N>>(); // EvaluatedQueryTree<N> dummy = new EvaluatedQueryTree<N>(new QueryTreeImpl<N>((N)"TOP"), trees, 0d); // todoList.add(dummy); //compute distinct trees Collection<QueryTree<N>> distinctTrees = new ArrayList<QueryTree<N>>(); - for (QueryTree<N> quer... [truncated message content] |
From: <and...@us...> - 2013-09-10 15:52:52
|
Revision: 4107 http://sourceforge.net/p/dl-learner/code/4107 Author: andremelo Date: 2013-09-10 15:52:48 +0000 (Tue, 10 Sep 2013) Log Message: ----------- - Adding the method to EntitityCandidateGenerator interface: HashMap<Annotation,Set<Entity>> getCandidatesMap(Set<Annotation> annotations) - Adding first version of the postprocessing from the trie implementation Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityCandidateGenerator.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotator.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidateGenerator.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityCandidateGenerator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityCandidateGenerator.java 2013-09-10 15:49:18 UTC (rev 4106) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityCandidateGenerator.java 2013-09-10 15:52:48 UTC (rev 4107) @@ -3,6 +3,7 @@ */ package org.dllearner.algorithms.isle; +import java.util.HashMap; import java.util.Set; import org.dllearner.algorithms.isle.index.Annotation; @@ -22,4 +23,7 @@ } public abstract Set<Entity> getCandidates(Annotation annotation); + + + public abstract HashMap<Annotation,Set<Entity>> getCandidatesMap(Set<Annotation> annotations); } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotator.java 2013-09-10 15:49:18 UTC (rev 4106) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotator.java 2013-09-10 15:52:48 UTC (rev 4107) @@ -1,5 +1,6 @@ package org.dllearner.algorithms.isle.index; +import java.util.HashMap; import java.util.HashSet; import java.util.Set; @@ -40,8 +41,9 @@ public AnnotatedDocument processDocument(TextDocument document){ Set<Annotation> annotations = linguisticAnnotator.annotate(document); Set<SemanticAnnotation> semanticAnnotations = new HashSet<SemanticAnnotation>(); - for (Annotation annotation : annotations) { - Set<Entity> candidateEntities = entityCandidateGenerator.getCandidates(annotation); + HashMap<Annotation,Set<Entity>> candidatesMap = entityCandidateGenerator.getCandidatesMap(annotations); + for (Annotation annotation : candidatesMap.keySet()) { + Set<Entity> candidateEntities = candidatesMap.get(annotation); if (candidateEntities == null || candidateEntities.size() == 0) { continue; } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidateGenerator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidateGenerator.java 2013-09-10 15:49:18 UTC (rev 4106) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidateGenerator.java 2013-09-10 15:52:48 UTC (rev 4107) @@ -3,6 +3,7 @@ */ package org.dllearner.algorithms.isle.index; +import java.util.HashMap; import java.util.HashSet; import java.util.Set; @@ -39,4 +40,13 @@ return allEntities; } + @Override + public HashMap<Annotation, Set<Entity>> getCandidatesMap(Set<Annotation> annotations) { + HashMap<Annotation, Set<Entity>> result = new HashMap<Annotation, Set<Entity>>(); + for (Annotation annotation: annotations) + result.put(annotation, getCandidates(annotation)); + + return result; + } + } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java 2013-09-10 15:49:18 UTC (rev 4106) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java 2013-09-10 15:52:48 UTC (rev 4107) @@ -1,11 +1,24 @@ package org.dllearner.algorithms.isle.index; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; import java.util.Set; +import java.util.regex.Pattern; import org.dllearner.algorithms.isle.EntityCandidateGenerator; +import org.dllearner.algorithms.isle.StopWordFilter; import org.dllearner.core.owl.Entity; import org.semanticweb.owlapi.model.OWLOntology; +import cern.colt.Arrays; +import cern.colt.list.AbstractCollection; + +import edu.stanford.nlp.util.Sets; + /** * Generates candidates using a entity candidates prefix trie * @author Andre Melo @@ -13,7 +26,9 @@ */ public class TrieEntityCandidateGenerator extends EntityCandidateGenerator{ - EntityCandidatesTrie candidatesTrie; + final EntityCandidatesTrie candidatesTrie; + final StopWordFilter stopWordFilter = new StopWordFilter(); + int window = 10; public TrieEntityCandidateGenerator(OWLOntology ontology, EntityCandidatesTrie candidatesTrie) { super(ontology); @@ -24,4 +39,103 @@ return candidatesTrie.getCandidateEntities(annotation.getToken()); } + /** + * Postprocess the annotations generated by annotate + * The objective is to merge annotations which are likely to belong to the same entity + * @param annotations : set of annotations + * @param window : maximum distance between the annotations + * @return + */ + public void postProcess(HashMap<Annotation,Set<Entity>> candidatesMap, int window, StopWordFilter stopWordFilter) { + Set<Annotation> annotations = candidatesMap.keySet(); + List<Annotation> sortedAnnotations = new ArrayList<Annotation>(annotations); + + // Sort annotations by offset in ascending order + Collections.sort(sortedAnnotations, new Comparator<Annotation>(){ + public int compare(Annotation a1,Annotation a2){ + return Integer.compare(a1.getOffset(), a2.getOffset()); + } + }); + + int windowStart = 0; + int windowEnd = 0; + for (int i=0; i<sortedAnnotations.size(); i++) { + + Annotation annotation_i = sortedAnnotations.get(i); + int begin_i = annotation_i.getOffset(); + int end_i = begin_i + annotation_i.getLength()-1; + String token_i = annotation_i.getToken(); + Set<Entity> candidates_i = getCandidates(annotation_i); + Set<Entity> newCandidates_i = new HashSet<Entity>(); + + // Determine the annotations contained in the window + while ((sortedAnnotations.get(windowStart).getOffset()+sortedAnnotations.get(windowStart).getLength()-1)<(begin_i-window)) + windowStart++; + while (windowEnd<sortedAnnotations.size() && sortedAnnotations.get(windowEnd).getOffset()<(end_i+window)) + windowEnd++; + + // For every annotation in the window (defined by the number of characters between offsets) + for (int j=windowStart; j<sortedAnnotations.size() && j<windowEnd; j++) { + if (j!=i) { + Annotation annotation_j = sortedAnnotations.get(j); + String token_j = annotation_j.getToken(); + Set<Entity> candidates_j = getCandidates(annotation_j); + Set<Entity> intersection = Sets.intersection(candidates_i, candidates_j); + Set<Entity> newCandidates_ij = new HashSet<Entity>(); + for (Entity commonEntity: intersection) { + if (!(stopWordFilter.isStopWord(token_i) && stopWordFilter.isStopWord(token_j))) { + if (!token_i.contains(token_j) && !token_j.contains(token_i)) { + newCandidates_ij.add(commonEntity); + //System.out.println("common("+token_i+","+token_j+")="+commonEntity); + } + } + } + if (!newCandidates_ij.isEmpty()) { + Annotation mergedAnnotation = mergeAnnotations(annotation_i,annotation_j); + // If there's no punctuation in the merged annotation + if (!Pattern.matches("\\p{Punct}", mergedAnnotation.getToken())) { + candidatesMap.put(mergedAnnotation, newCandidates_ij); + candidatesMap.remove(annotation_i); + candidatesMap.remove(annotation_j); + } + + newCandidates_i.addAll(newCandidates_ij); + } + } + } + + // Deletes annotation if it's a stop word and doesn't have any matching annotation in the window + if (stopWordFilter.isStopWord(token_i)) { + if (newCandidates_i.isEmpty()) + candidatesMap.remove(annotation_i); + } + } + + + + } + + private Annotation mergeAnnotations(Annotation annotation_i, Annotation annotation_j) { + int offset; + int length; + if (annotation_i.getOffset() < annotation_j.getOffset()) { + offset = annotation_i.getOffset(); + length = annotation_j.getOffset() - offset + annotation_j.getLength(); + } else { + offset = annotation_j.getOffset(); + length = annotation_i.getOffset() - offset + annotation_i.getLength(); + } + return new Annotation(annotation_i.getReferencedDocument(), offset, length); + } + + @Override + public HashMap<Annotation, Set<Entity>> getCandidatesMap(Set<Annotation> annotations) { + HashMap<Annotation, Set<Entity>> candidatesMap = new HashMap<Annotation, Set<Entity>>(); + for (Annotation annotation: annotations) + candidatesMap.put(annotation, getCandidates(annotation)); + + postProcess(candidatesMap, window, stopWordFilter); + + return candidatesMap; + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <and...@us...> - 2013-09-10 15:49:21
|
Revision: 4106 http://sourceforge.net/p/dl-learner/code/4106 Author: andremelo Date: 2013-09-10 15:49:18 +0000 (Tue, 10 Sep 2013) Log Message: ----------- Adding isStopWord method Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/StopWordFilter.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/StopWordFilter.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/StopWordFilter.java 2013-09-10 10:23:36 UTC (rev 4105) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/StopWordFilter.java 2013-09-10 15:49:18 UTC (rev 4106) @@ -52,5 +52,9 @@ } } } + + public boolean isStopWord(String token) { + return stopWords.contains(token); + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <and...@us...> - 2013-09-10 10:23:39
|
Revision: 4105 http://sourceforge.net/p/dl-learner/code/4105 Author: andremelo Date: 2013-09-10 10:23:36 +0000 (Tue, 10 Sep 2013) Log Message: ----------- Simplifying getLongestMatch method Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/utilities/datastructures/PrefixTrie.java Modified: trunk/components-core/src/main/java/org/dllearner/utilities/datastructures/PrefixTrie.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/utilities/datastructures/PrefixTrie.java 2013-09-09 11:21:51 UTC (rev 4104) +++ trunk/components-core/src/main/java/org/dllearner/utilities/datastructures/PrefixTrie.java 2013-09-10 10:23:36 UTC (rev 4105) @@ -119,26 +119,25 @@ } public CharSequence getLongestMatch(CharSequence s) { - Node<T> deepestWithValue = root; Node<T> current = root; int i; + int end = 0; for (i = 0; i < s.length(); i++) { int nodeIndex = s.charAt(i) - rangeOffset; - if (nodeIndex < 0 || rangeSize <= nodeIndex) { + if (nodeIndex < 0 || rangeSize <= nodeIndex) return null; - } + current = current.next[nodeIndex]; - if (current == null) { + if (current == null) break; - } - if (current.value != null) { - deepestWithValue = current; - } + + if (current.value != null) + end = i; } - if (i<=1 || deepestWithValue==root || deepestWithValue.value==null) + if (end==0) return null; - else - return s.subSequence(0, i - 1); + else + return s.subSequence(0, end+1); } /** This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <dfl...@us...> - 2013-09-09 11:21:55
|
Revision: 4104 http://sourceforge.net/p/dl-learner/code/4104 Author: dfleischhacker Date: 2013-09-09 11:21:51 +0000 (Mon, 09 Sep 2013) Log Message: ----------- Add test for entity linking and make use of lemmatizing optional in SimpleSemanticIndex Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java 2013-09-09 11:03:04 UTC (rev 4103) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java 2013-09-09 11:21:51 UTC (rev 4104) @@ -23,20 +23,42 @@ /** * Initializes the semantic index to use {@code ontology} for finding all labels of an entity and + * {@code syntacticIndex} to query for documents containing these labels. This consutrctor initializes with + * full lemmatizing enabled. + * + * @param ontology ontology to retrieve entity labels from + * @param syntacticIndex index to query for documents containing the labels + */ + public SimpleSemanticIndex(OWLOntology ontology, SyntacticIndex syntacticIndex) { + this(ontology, syntacticIndex, true); + } + + /** + * Initializes the semantic index to use {@code ontology} for finding all labels of an entity and * {@code syntacticIndex} to query for documents containing these labels. * * @param ontology ontology to retrieve entity labels from * @param syntacticIndex index to query for documents containing the labels + * @param useWordNormalization whether word normalization should be used or not */ - public SimpleSemanticIndex(OWLOntology ontology, SyntacticIndex syntacticIndex) { + public SimpleSemanticIndex(OWLOntology ontology, SyntacticIndex syntacticIndex, boolean useWordNormalization) { super(ontology); - SimpleEntityCandidatesTrie trie = new SimpleEntityCandidatesTrie(new RDFSLabelEntityTextRetriever(ontology), - ontology, new SimpleEntityCandidatesTrie.LemmatizingWordNetNameGenerator(5)); + SimpleEntityCandidatesTrie trie; + if (useWordNormalization) { + trie = new SimpleEntityCandidatesTrie(new RDFSLabelEntityTextRetriever(ontology), + ontology, new SimpleEntityCandidatesTrie.LemmatizingWordNetNameGenerator(5)); + } + else { + trie = new SimpleEntityCandidatesTrie(new RDFSLabelEntityTextRetriever(ontology), + ontology, new SimpleEntityCandidatesTrie.DummyNameGenerator()); + } // trie.printTrie(); + TrieLinguisticAnnotator linguisticAnnotator = new TrieLinguisticAnnotator(trie); + linguisticAnnotator.setNormalizeWords(useWordNormalization); setSemanticAnnotator(new SemanticAnnotator( new SimpleWordSenseDisambiguation(ontology), new TrieEntityCandidateGenerator(ontology, trie), - new TrieLinguisticAnnotator(trie))); + linguisticAnnotator)); } Modified: trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java =================================================================== --- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java 2013-09-09 11:03:04 UTC (rev 4103) +++ trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java 2013-09-09 11:21:51 UTC (rev 4104) @@ -3,16 +3,11 @@ */ package org.dllearner.algorithms.isle; -import java.io.File; -import java.io.IOException; -import java.text.DecimalFormat; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; - +import com.google.common.base.Charsets; +import com.google.common.base.Joiner; +import com.google.common.io.Files; import org.dllearner.algorithms.celoe.CELOE; -import org.dllearner.algorithms.isle.index.AnnotatedDocument; -import org.dllearner.algorithms.isle.index.TextDocument; +import org.dllearner.algorithms.isle.index.*; import org.dllearner.algorithms.isle.index.semantic.SemanticIndex; import org.dllearner.algorithms.isle.index.semantic.simple.SimpleSemanticIndex; import org.dllearner.algorithms.isle.index.syntactic.OWLOntologyLuceneSyntacticIndexCreator; @@ -36,12 +31,14 @@ import org.semanticweb.owlapi.model.OWLDataFactory; import org.semanticweb.owlapi.model.OWLOntology; import org.semanticweb.owlapi.model.OWLOntologyManager; - import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl; -import com.google.common.base.Charsets; -import com.google.common.base.Joiner; -import com.google.common.io.Files; +import java.io.File; +import java.io.IOException; +import java.text.DecimalFormat; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; /** * Some tests for the ISLE algorithm. @@ -160,6 +157,39 @@ isle.start(); } + @Test + public void testEntityLinkingWithLemmatizing() throws Exception { + EntityCandidatesTrie ect = new SimpleEntityCandidatesTrie(new RDFSLabelEntityTextRetriever(ontology), ontology, + new SimpleEntityCandidatesTrie.LemmatizingWordNetNameGenerator(5)); + LinguisticAnnotator linguisticAnnotator = new TrieLinguisticAnnotator(ect); + WordSenseDisambiguation wsd = new SimpleWordSenseDisambiguation(ontology); + EntityCandidateGenerator ecg = new TrieEntityCandidateGenerator(ontology, ect); + SemanticAnnotator semanticAnnotator = new SemanticAnnotator(wsd, ecg, linguisticAnnotator); + + Set<TextDocument> docs = createDocuments(); + for (TextDocument doc : docs) { + AnnotatedDocument annotated = semanticAnnotator.processDocument(doc); + System.out.println(annotated); + } + } + + @Test + public void testEntityLinkingWithSimpleStringMatching() throws Exception { + EntityCandidatesTrie ect = new SimpleEntityCandidatesTrie(new RDFSLabelEntityTextRetriever(ontology), ontology, + new SimpleEntityCandidatesTrie.DummyNameGenerator()); + TrieLinguisticAnnotator linguisticAnnotator = new TrieLinguisticAnnotator(ect); + linguisticAnnotator.setNormalizeWords(false); + WordSenseDisambiguation wsd = new SimpleWordSenseDisambiguation(ontology); + EntityCandidateGenerator ecg = new TrieEntityCandidateGenerator(ontology, ect); + SemanticAnnotator semanticAnnotator = new SemanticAnnotator(wsd, ecg, linguisticAnnotator); + + Set<TextDocument> docs = createDocuments(); + for (TextDocument doc : docs) { + AnnotatedDocument annotated = semanticAnnotator.processDocument(doc); + System.out.println(annotated); + } + } + @Test public void compareISLE() throws Exception { KnowledgeSource ks = new OWLAPIOntology(ontology); @@ -170,7 +200,7 @@ lp.setClassToDescribe(cls); lp.init(); - semanticIndex = new SimpleSemanticIndex(ontology, syntacticIndex); + semanticIndex = new SimpleSemanticIndex(ontology, syntacticIndex, false); semanticIndex.buildIndex(createDocuments()); relevance = new PMIRelevanceMetric(semanticIndex); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <dfl...@us...> - 2013-09-09 11:03:06
|
Revision: 4103 http://sourceforge.net/p/dl-learner/code/4103 Author: dfleischhacker Date: 2013-09-09 11:03:04 +0000 (Mon, 09 Sep 2013) Log Message: ----------- Prevent trailing space in generated annotations Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/utilities/datastructures/PrefixTrie.java Modified: trunk/components-core/src/main/java/org/dllearner/utilities/datastructures/PrefixTrie.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/utilities/datastructures/PrefixTrie.java 2013-09-09 10:43:46 UTC (rev 4102) +++ trunk/components-core/src/main/java/org/dllearner/utilities/datastructures/PrefixTrie.java 2013-09-09 11:03:04 UTC (rev 4103) @@ -138,7 +138,7 @@ if (i<=1 || deepestWithValue==root || deepestWithValue.value==null) return null; else - return s.subSequence(0, i); + return s.subSequence(0, i - 1); } /** This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2013-09-09 10:43:49
|
Revision: 4102 http://sourceforge.net/p/dl-learner/code/4102 Author: lorenz_b Date: 2013-09-09 10:43:46 +0000 (Mon, 09 Sep 2013) Log Message: ----------- Added class to compute the cosine similarity for 2 documents using the Lucene API. Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/NLPHeuristic.java Added Paths: ----------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/VSMCosineDocumentSimilarity.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/NLPHeuristic.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/NLPHeuristic.java 2013-09-09 10:18:57 UTC (rev 4101) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/NLPHeuristic.java 2013-09-09 10:43:46 UTC (rev 4102) @@ -57,7 +57,7 @@ @ConfigOption(name = "startNodeBonus", defaultValue="0.1") private double startNodeBonus = 0.1; - private double nlpBonusFactor = 0.0001; + private double nlpBonusFactor = 1; private Map<Entity, Double> entityRelevance; Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/VSMCosineDocumentSimilarity.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/VSMCosineDocumentSimilarity.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/VSMCosineDocumentSimilarity.java 2013-09-09 10:43:46 UTC (rev 4102) @@ -0,0 +1,238 @@ +/** + * + */ +package org.dllearner.algorithms.isle; + +import java.io.IOException; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +import org.apache.commons.math3.linear.ArrayRealVector; +import org.apache.commons.math3.linear.RealVector; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.core.SimpleAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.Version; + +/** + * Imagine an N-dimensional space where N is the number of unique words in a pair of texts. Each of the two texts + * can be treated like a vector in this N-dimensional space. The distance between the two vectors is an indication + * of the similarity of the two texts. The cosine of the angle between the two vectors is the most common distance measure. + * @author Lorenz Buehmann + * + */ +public class VSMCosineDocumentSimilarity { + + enum TermWeighting { + TF, TF_IDF + } + + public static final String CONTENT = "Content"; + public static final FieldType TYPE_STORED = new FieldType(); + + private final Set<String> terms = new HashSet<String>(); + private final RealVector v1; + private final RealVector v2; + + static { + TYPE_STORED.setIndexed(true); + TYPE_STORED.setTokenized(true); + TYPE_STORED.setStored(true); + TYPE_STORED.setStoreTermVectors(true); + TYPE_STORED.setStoreTermVectorPositions(true); + TYPE_STORED.freeze(); + } + + public VSMCosineDocumentSimilarity(String s1, String s2, TermWeighting termWeighting) throws IOException { + //create the index + Directory directory = createIndex(s1, s2); + IndexReader reader = DirectoryReader.open(directory); + //generate the document vectors + if(termWeighting == TermWeighting.TF){//based on term frequency only + //compute the term frequencies for document 1 + Map<String, Integer> f1 = getTermFrequencies(reader, 0); + //compute the term frequencies for document 2 + Map<String, Integer> f2 = getTermFrequencies(reader, 1); + reader.close(); + //map both documents to vector objects + v1 = getTermVectorInteger(f1); + v2 = getTermVectorInteger(f2); + } else if(termWeighting == TermWeighting.TF_IDF){//based on tf*idf weighting + //compute the term frequencies for document 1 + Map<String, Double> f1 = getTermWeights(reader, 0); + //compute the term frequencies for document 2 + Map<String, Double> f2 = getTermWeights(reader, 1); + reader.close(); + //map both documents to vector objects + v1 = getTermVectorDouble(f1); + v2 = getTermVectorDouble(f2); + } else { + v1 = null; + v2 = null; + } + } + + public VSMCosineDocumentSimilarity(String s1, String s2) throws IOException { + this(s1, s2, TermWeighting.TF_IDF); + } + + /** + * Returns the cosine document similarity between document {@code doc1} and {@code doc2} using TF-IDF as weighting for each term. + * The resulting similarity ranges from -1 meaning exactly opposite, to 1 meaning exactly the same, + * with 0 usually indicating independence, and in-between values indicating intermediate similarity or dissimilarity. + * @param s1 + * @param s2 + * @return + * @throws IOException + */ + public static double getCosineSimilarity(String doc1, String doc2) + throws IOException { + return new VSMCosineDocumentSimilarity(doc1, doc2).getCosineSimilarity(); + } + + /** + * Returns the cosine document similarity between document {@code doc1} and {@code doc2} based on {@code termWeighting} to compute the weight + * for each term in the documents. + * The resulting similarity ranges from -1 meaning exactly opposite, to 1 meaning exactly the same, + * with 0 usually indicating independence, and in-between values indicating intermediate similarity or dissimilarity. + * @param s1 + * @param s2 + * @return + * @throws IOException + */ + public static double getCosineSimilarity(String doc1, String doc2, TermWeighting termWeighting) + throws IOException { + return new VSMCosineDocumentSimilarity(doc1, doc2, termWeighting).getCosineSimilarity(); + } + + /** + * Create a in-memory Lucene index for both documents. + * @param s1 + * @param s2 + * @return + * @throws IOException + */ + private Directory createIndex(String s1, String s2) throws IOException { + Directory directory = new RAMDirectory(); + Analyzer analyzer = new SimpleAnalyzer(Version.LUCENE_43); + IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_43, analyzer); + IndexWriter writer = new IndexWriter(directory, iwc); + addDocument(writer, s1); + addDocument(writer, s2); + writer.close(); + return directory; + } + + /** + * Add the document to the Lucene index. + * @param writer + * @param content + * @throws IOException + */ + private void addDocument(IndexWriter writer, String content) throws IOException { + Document doc = new Document(); + Field field = new Field(CONTENT, content, TYPE_STORED); + doc.add(field); + writer.addDocument(doc); + } + + /** + * Get the frequency of each term contained in the document. + * @param reader + * @param docId + * @return + * @throws IOException + */ + private Map<String, Integer> getTermFrequencies(IndexReader reader, int docId) + throws IOException { + Terms vector = reader.getTermVector(docId, CONTENT); + TermsEnum termsEnum = vector.iterator(null); + Map<String, Integer> frequencies = new HashMap<String, Integer>(); + BytesRef text = null; + while ((text = termsEnum.next()) != null) { + String term = text.utf8ToString(); + int freq = (int) termsEnum.totalTermFreq(); + frequencies.put(term, freq); + terms.add(term); + } + return frequencies; + } + + /** + * Get the weight(tf*idf) of each term contained in the document. + * @param reader + * @param docId + * @return + * @throws IOException + */ + private Map<String, Double> getTermWeights(IndexReader reader, int docId) + throws IOException { + Terms vector = reader.getTermVector(docId, CONTENT); + TermsEnum termsEnum = vector.iterator(null); + Map<String, Double> weights = new HashMap<String, Double>(); + BytesRef text = null; + while ((text = termsEnum.next()) != null) { + String term = text.utf8ToString(); + //get the term frequency + int tf = (int) termsEnum.totalTermFreq(); + //get the document frequency + int df = reader.docFreq(new Term(CONTENT, text)); + //compute the inverse document frequency + double idf = getIDF(reader.numDocs(), df); + //compute tf*idf + double weight = tf * idf; + + weights.put(term, weight); + terms.add(term); + } + return weights; + } + + private double getIDF(int totalNumberOfDocuments, int documentFrequency){ + return 1 + Math.log(totalNumberOfDocuments/documentFrequency); + } + + private double getCosineSimilarity() { + return (v1.dotProduct(v2)) / (v1.getNorm() * v2.getNorm()); + } + + private RealVector getTermVectorInteger(Map<String, Integer> map) { + RealVector vector = new ArrayRealVector(terms.size()); + int i = 0; + for (String term : terms) { + int value = map.containsKey(term) ? map.get(term) : 0; + vector.setEntry(i++, value); + } + return vector.mapDivide(vector.getL1Norm()); + } + + private RealVector getTermVectorDouble(Map<String, Double> map) { + RealVector vector = new ArrayRealVector(terms.size()); + int i = 0; + for (String term : terms) { + double value = map.containsKey(term) ? map.get(term) : 0d; + vector.setEntry(i++, value); + } + return vector.mapDivide(vector.getL1Norm()); + } + + public static void main(String[] args) throws Exception { + double cosineSimilarity = VSMCosineDocumentSimilarity.getCosineSimilarity("The king is here", "The salad is cold"); + System.out.println(cosineSimilarity); + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <dfl...@us...> - 2013-09-09 10:19:00
|
Revision: 4101 http://sourceforge.net/p/dl-learner/code/4101 Author: dfleischhacker Date: 2013-09-09 10:18:57 +0000 (Mon, 09 Sep 2013) Log Message: ----------- Remove constructor defaulting to DummyNameGenerator Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java 2013-09-09 10:12:56 UTC (rev 4100) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java 2013-09-09 10:18:57 UTC (rev 4101) @@ -13,16 +13,16 @@ PrefixTrie<Set<Entity>> trie; EntityTextRetriever entityTextRetriever; - /** - * Initialize the trie with strings from the provided ontology using a no-op name generator, i.e., only the - * actual ontology strings are added and no expansion is done. - * - * @param entityTextRetriever the text retriever to use - * @param ontology the ontology to get strings from - */ - public SimpleEntityCandidatesTrie(EntityTextRetriever entityTextRetriever, OWLOntology ontology) { - this(entityTextRetriever, ontology, new DummyNameGenerator()); - } +// /** +// * Initialize the trie with strings from the provided ontology using a no-op name generator, i.e., only the +// * actual ontology strings are added and no expansion is done. +// * +// * @param entityTextRetriever the text retriever to use +// * @param ontology the ontology to get strings from +// */ +// public SimpleEntityCandidatesTrie(EntityTextRetriever entityTextRetriever, OWLOntology ontology) { +// this(entityTextRetriever, ontology, new DummyNameGenerator()); +// } /** * Initialize the trie with strings from the provided ontology and use the given entity name generator This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <dfl...@us...> - 2013-09-09 10:12:59
|
Revision: 4100 http://sourceforge.net/p/dl-learner/code/4100 Author: dfleischhacker Date: 2013-09-09 10:12:56 +0000 (Mon, 09 Sep 2013) Log Message: ----------- Improve document content cleanup Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TextDocument.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TextDocument.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TextDocument.java 2013-09-09 10:12:21 UTC (rev 4099) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TextDocument.java 2013-09-09 10:12:56 UTC (rev 4100) @@ -18,9 +18,10 @@ */ public TextDocument(String content) { this.rawContent = content; - this.content = content.replaceAll("[^A-Za-z ]", " "); + this.content = content.toLowerCase(); + this.content = this.content.replaceAll("[^a-z ]", " "); this.content = this.content.replaceAll("\\s{2,}", " "); - this.content = content.toLowerCase(); + this.content = content.trim(); } @Override This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <dfl...@us...> - 2013-09-09 10:12:23
|
Revision: 4099 http://sourceforge.net/p/dl-learner/code/4099 Author: dfleischhacker Date: 2013-09-09 10:12:21 +0000 (Mon, 09 Sep 2013) Log Message: ----------- Add possibility to switch off word normalization in annotator Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieLinguisticAnnotator.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieLinguisticAnnotator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieLinguisticAnnotator.java 2013-09-09 10:11:41 UTC (rev 4098) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieLinguisticAnnotator.java 2013-09-09 10:12:21 UTC (rev 4099) @@ -4,19 +4,21 @@ import java.util.Set; /** - * Annotates a document using a prefix trie + * Annotates a document using a prefix trie. * * @author Andre Melo */ public class TrieLinguisticAnnotator implements LinguisticAnnotator { EntityCandidatesTrie candidatesTrie; + private boolean normalizeWords = true; public TrieLinguisticAnnotator(EntityCandidatesTrie candidatesTrie) { this.candidatesTrie = candidatesTrie; } /** - * Generates annotation based on trie's longest matching strings + * Generates annotation based on trie's longest matching strings. By default, the document's contents are + * normalized using a lemmatizer. The normalization step can be disabled using the * * @param document the document to get annotations for * @return the set of annotation for the given document @@ -26,8 +28,14 @@ String content = document.getContent(); Set<Annotation> annotations = new HashSet<Annotation>(); for (int i = 0; i < content.length(); i++) { + if (Character.isWhitespace(content.charAt(i))) { + continue; + } String unparsed = content.substring(i); - String match = candidatesTrie.getLongestMatch(LinguisticUtil.getInstance().getNormalizedForm(unparsed)); + if (normalizeWords) { + unparsed = LinguisticUtil.getInstance().getNormalizedForm(unparsed); + } + String match = candidatesTrie.getLongestMatch(unparsed); if (match != null && !match.isEmpty()) { Annotation annotation = new Annotation(document, i, match.length()); annotations.add(annotation); @@ -37,4 +45,11 @@ return annotations; } + /** + * Sets whether the document's contents should be normalized or not. + * @param enabled if true normalizing is enabled, otherwise disabled + */ + public void setNormalizeWords(boolean enabled) { + normalizeWords = enabled; + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <dfl...@us...> - 2013-09-09 10:11:44
|
Revision: 4098 http://sourceforge.net/p/dl-learner/code/4098 Author: dfleischhacker Date: 2013-09-09 10:11:41 +0000 (Mon, 09 Sep 2013) Log Message: ----------- Improve some documentation Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Annotation.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticAnnotator.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Annotation.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Annotation.java 2013-09-09 10:10:55 UTC (rev 4097) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Annotation.java 2013-09-09 10:11:41 UTC (rev 4098) @@ -5,6 +5,7 @@ /** + * A (non-semantic) annotation which represents an entity in a document by its offset and length. * @author Lorenz Buehmann * */ Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticAnnotator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticAnnotator.java 2013-09-09 10:10:55 UTC (rev 4097) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticAnnotator.java 2013-09-09 10:11:41 UTC (rev 4098) @@ -6,11 +6,15 @@ import java.util.Set; /** + * Interface for generating (non-semantic) annotations for documents. * @author Lorenz Buehmann - * */ public interface LinguisticAnnotator { - + /** + * Returns the set of annotation for the given document. + * @param document the document to get annotation for + * @return set of annotations for the given document + */ Set<Annotation> annotate(Document document); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <dfl...@us...> - 2013-09-09 10:10:58
|
Revision: 4097 http://sourceforge.net/p/dl-learner/code/4097 Author: dfleischhacker Date: 2013-09-09 10:10:55 +0000 (Mon, 09 Sep 2013) Log Message: ----------- Fix NPE Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java 2013-09-06 14:10:41 UTC (rev 4096) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java 2013-09-09 10:10:55 UTC (rev 4097) @@ -6,7 +6,6 @@ import org.dllearner.algorithms.isle.WordNet; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; /** @@ -27,6 +26,15 @@ return instance; } + public LinguisticUtil() { + try { + lemmatizer = new DefaultLemmatizer(); + } + catch (Exception e) { + e.printStackTrace(); + } + } + /** * Processes the given string and puts camelCased words into single words. * @param camelCase the word containing camelcase to split @@ -119,7 +127,7 @@ boolean first = true; ArrayList<String> singleWords = new ArrayList<String>(); - Collections.addAll(singleWords, word.split(" ")); + Collections.addAll(singleWords, word.trim().split(" ")); for (String w : singleWords) { try { @@ -129,20 +137,29 @@ else { res.append(" "); } - if (lemmatizer == null) { - res.append(w); - } - else { - res.append(lemmatizer.lemmatize(w)); - } + res.append(lemmatizeSingleWord(word)); } catch (Exception e) { - e.printStackTrace(); + throw new RuntimeException(e); } } return res.toString(); } + private String lemmatizeSingleWord(String word) { + try { + if (lemmatizer == null) { + return word; + } + else { + return lemmatizer.lemmatize(word); + } + } + catch (NullPointerException e) { + return word; + } + } + public static void main(String[] args) { System.out.println(LinguisticUtil.getInstance().getNormalizedForm("going")); for (String s : LinguisticUtil.getInstance().getWordsFromCamelCase("thisIsAClassWith1Name123")) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <dfl...@us...> - 2013-09-06 14:10:46
|
Revision: 4096 http://sourceforge.net/p/dl-learner/code/4096 Author: dfleischhacker Date: 2013-09-06 14:10:41 +0000 (Fri, 06 Sep 2013) Log Message: ----------- Remove debugging output Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java 2013-09-06 13:31:43 UTC (rev 4095) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java 2013-09-06 14:10:41 UTC (rev 4096) @@ -52,7 +52,6 @@ } addEntry(text, entity); for (String alternativeText : nameGenerator.getAlternativeText(text)) { - System.out.println("New alternative text for " + text + " --> " + alternativeText); addEntry(alternativeText, entity); } // Adds also composing words, e.g. for "has child", "has" and "child" are also added @@ -60,7 +59,6 @@ for (String subtext : text.split(" ")) { addEntry(subtext, entity); for (String alternativeText : nameGenerator.getAlternativeText(subtext)) { - System.out.println("New alternative text for " + subtext + " --> " + alternativeText); addEntry(alternativeText, entity); } //System.out.println("trie.add("+subtext+","++")"); @@ -169,8 +167,8 @@ ArrayList<String> res = new ArrayList<String>(); res.add(LinguisticUtil.getInstance().getNormalizedForm(word)); - for (String w : LinguisticUtil.getInstance() - .getTopSynonymsForWord(LinguisticUtil.getInstance().getNormalizedForm(word), maxNumberOfSenses)) { + for (String w : LinguisticUtil.getInstance().getTopSynonymsForWord( + LinguisticUtil.getInstance().getNormalizedForm(word), maxNumberOfSenses)) { res.add(w.replaceAll("_", " ")); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <dfl...@us...> - 2013-09-06 13:31:47
|
Revision: 4095 http://sourceforge.net/p/dl-learner/code/4095 Author: dfleischhacker Date: 2013-09-06 13:31:43 +0000 (Fri, 06 Sep 2013) Log Message: ----------- Make LinguisticUtil singleton Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotator.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieLinguisticAnnotator.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java 2013-09-06 12:48:08 UTC (rev 4094) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java 2013-09-06 13:31:43 UTC (rev 4095) @@ -6,23 +6,25 @@ import org.dllearner.algorithms.isle.WordNet; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; /** * Provides shortcuts to commonly used linguistic operations * @author Daniel Fleischhacker */ public class LinguisticUtil { + private static LinguisticUtil instance; + private static final WordNet wn = new WordNet(); private static POS[] RELEVANT_POS = new POS[]{POS.NOUN, POS.VERB}; private static Lemmatizer lemmatizer; - static { - try { - lemmatizer = new DefaultLemmatizer(); + public static LinguisticUtil getInstance() { + if (instance == null) { + instance = new LinguisticUtil(); } - catch (Exception e) { - e.printStackTrace(); - } + return instance; } /** @@ -30,7 +32,7 @@ * @param camelCase the word containing camelcase to split * @return all words as camelcase contained in the given word */ - public static String[] getWordsFromCamelCase(String camelCase) { + public String[] getWordsFromCamelCase(String camelCase) { ArrayList<String> resultingWords = new ArrayList<String>(); StringBuilder sb = new StringBuilder(); for (int i = 0; i < camelCase.length(); i++) { @@ -66,7 +68,7 @@ * @param underScored word to split at underscores * @return words contained in given word */ - public static String[] getWordsFromUnderscored(String underScored) { + public String[] getWordsFromUnderscored(String underScored) { return underScored.split("_"); } @@ -77,7 +79,7 @@ * @param word the word to retrieve synonyms for * @return synonyms for the given word */ - public static String[] getSynonymsForWord(String word) { + public String[] getSynonymsForWord(String word) { ArrayList<String> synonyms = new ArrayList<String>(); for (POS pos : RELEVANT_POS) { @@ -94,7 +96,7 @@ * @param n the number of senses to get lemmas for * @return synonyms for the given word */ - public static String[] getTopSynonymsForWord(String word, int n) { + public String[] getTopSynonymsForWord(String word, int n) { ArrayList<String> synonyms = new ArrayList<String>(); for (POS pos : RELEVANT_POS) { @@ -104,30 +106,48 @@ } /** - * Returns the normalized form of the given word. This method is only able to work with single words! If there is an - * error normalizing the given word, the word itself is returned. + * Returns the normalized form of the given word. If the word contains spaces, each part separated by spaces is + * normalized independently and joined afterwards. If there is an error normalizing the given word, the word itself + * is returned. * * @param word the word to get normalized form for * @return normalized form of the word or the word itself on an error */ - public static String getNormalizedForm(String word) { - try { - if (lemmatizer == null) { - return word; + public String getNormalizedForm(String word) { + StringBuilder res = new StringBuilder(); + + boolean first = true; + + ArrayList<String> singleWords = new ArrayList<String>(); + Collections.addAll(singleWords, word.split(" ")); + + for (String w : singleWords) { + try { + if (first) { + first = false; + } + else { + res.append(" "); + } + if (lemmatizer == null) { + res.append(w); + } + else { + res.append(lemmatizer.lemmatize(w)); + } } - return lemmatizer.lemmatize(word); + catch (Exception e) { + e.printStackTrace(); + } } - catch (Exception e) { - e.printStackTrace(); - } - return word; + return res.toString(); } public static void main(String[] args) { - System.out.println(getNormalizedForm("going")); - for (String s : getWordsFromCamelCase("thisIsAClassWith1Name123")) { + System.out.println(LinguisticUtil.getInstance().getNormalizedForm("going")); + for (String s : LinguisticUtil.getInstance().getWordsFromCamelCase("thisIsAClassWith1Name123")) { System.out.println(s); - for (String w : getSynonymsForWord(s)) { + for (String w : LinguisticUtil.getInstance().getSynonymsForWord(s)) { System.out.println(" --> " + w); } } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotator.java 2013-09-06 12:48:08 UTC (rev 4094) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotator.java 2013-09-06 13:31:43 UTC (rev 4095) @@ -42,7 +42,10 @@ Set<SemanticAnnotation> semanticAnnotations = new HashSet<SemanticAnnotation>(); for (Annotation annotation : annotations) { Set<Entity> candidateEntities = entityCandidateGenerator.getCandidates(annotation); - SemanticAnnotation semanticAnnotation = wordSenseDisambiguation.disambiguate(annotation, candidateEntities); + if (candidateEntities == null || candidateEntities.size() == 0) { + continue; + } + SemanticAnnotation semanticAnnotation = wordSenseDisambiguation.disambiguate(annotation, candidateEntities); if(semanticAnnotation != null){ semanticAnnotations.add(semanticAnnotation); } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java 2013-09-06 12:48:08 UTC (rev 4094) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java 2013-09-06 13:31:43 UTC (rev 4095) @@ -45,14 +45,14 @@ for (Entity entity : relevantText.keySet()) { for (String text : relevantText.get(entity)) { - text = StringUtils.join(LinguisticUtil.getWordsFromCamelCase(text), " "); - text = StringUtils.join(LinguisticUtil.getWordsFromUnderscored(text), " "); + text = StringUtils.join(LinguisticUtil.getInstance().getWordsFromCamelCase(text), " "); + text = StringUtils.join(LinguisticUtil.getInstance().getWordsFromUnderscored(text), " "); if (text.trim().isEmpty()) { continue; } addEntry(text, entity); for (String alternativeText : nameGenerator.getAlternativeText(text)) { -// System.out.println("New alternative text for " + text + " --> " + alternativeText); + System.out.println("New alternative text for " + text + " --> " + alternativeText); addEntry(alternativeText, entity); } // Adds also composing words, e.g. for "has child", "has" and "child" are also added @@ -60,7 +60,7 @@ for (String subtext : text.split(" ")) { addEntry(subtext, entity); for (String alternativeText : nameGenerator.getAlternativeText(subtext)) { -// System.out.println("New alternative text for " + subtext + " --> " + alternativeText); + System.out.println("New alternative text for " + subtext + " --> " + alternativeText); addEntry(alternativeText, entity); } //System.out.println("trie.add("+subtext+","++")"); @@ -146,7 +146,7 @@ @Override public List<String> getAlternativeText(String word) { - return Arrays.asList(LinguisticUtil.getTopSynonymsForWord(word, maxNumberOfSenses)); + return Arrays.asList(LinguisticUtil.getInstance().getTopSynonymsForWord(word, maxNumberOfSenses)); } } @@ -167,10 +167,10 @@ @Override public List<String> getAlternativeText(String word) { ArrayList<String> res = new ArrayList<String>(); - res.add(LinguisticUtil.getNormalizedForm(word)); + res.add(LinguisticUtil.getInstance().getNormalizedForm(word)); - for (String w : LinguisticUtil - .getTopSynonymsForWord(LinguisticUtil.getNormalizedForm(word), maxNumberOfSenses)) { + for (String w : LinguisticUtil.getInstance() + .getTopSynonymsForWord(LinguisticUtil.getInstance().getNormalizedForm(word), maxNumberOfSenses)) { res.add(w.replaceAll("_", " ")); } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieLinguisticAnnotator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieLinguisticAnnotator.java 2013-09-06 12:48:08 UTC (rev 4094) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieLinguisticAnnotator.java 2013-09-06 13:31:43 UTC (rev 4095) @@ -27,7 +27,7 @@ Set<Annotation> annotations = new HashSet<Annotation>(); for (int i = 0; i < content.length(); i++) { String unparsed = content.substring(i); - String match = candidatesTrie.getLongestMatch(unparsed); + String match = candidatesTrie.getLongestMatch(LinguisticUtil.getInstance().getNormalizedForm(unparsed)); if (match != null && !match.isEmpty()) { Annotation annotation = new Annotation(document, i, match.length()); annotations.add(annotation); Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java 2013-09-06 12:48:08 UTC (rev 4094) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java 2013-09-06 13:31:43 UTC (rev 4095) @@ -91,8 +91,8 @@ if(textWithWeight.isEmpty() && useShortFormFallback){ String shortForm = sfp.getShortForm(IRI.create(entity.getURI())); - shortForm = Joiner.on(" ").join(LinguisticUtil.getWordsFromCamelCase(shortForm)); - shortForm = Joiner.on(" ").join(LinguisticUtil.getWordsFromUnderscored(shortForm)).trim(); + shortForm = Joiner.on(" ").join(LinguisticUtil.getInstance().getWordsFromCamelCase(shortForm)); + shortForm = Joiner.on(" ").join(LinguisticUtil.getInstance().getWordsFromUnderscored(shortForm)).trim(); textWithWeight.put(shortForm, weight); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <dfl...@us...> - 2013-09-06 12:48:13
|
Revision: 4094 http://sourceforge.net/p/dl-learner/code/4094 Author: dfleischhacker Date: 2013-09-06 12:48:08 +0000 (Fri, 06 Sep 2013) Log Message: ----------- Normalize documents to all lowercase Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TextDocument.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TextDocument.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TextDocument.java 2013-09-06 11:36:33 UTC (rev 4093) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TextDocument.java 2013-09-06 12:48:08 UTC (rev 4094) @@ -20,6 +20,7 @@ this.rawContent = content; this.content = content.replaceAll("[^A-Za-z ]", " "); this.content = this.content.replaceAll("\\s{2,}", " "); + this.content = content.toLowerCase(); } @Override This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <dfl...@us...> - 2013-09-06 11:36:39
|
Revision: 4093 http://sourceforge.net/p/dl-learner/code/4093 Author: dfleischhacker Date: 2013-09-06 11:36:33 +0000 (Fri, 06 Sep 2013) Log Message: ----------- Extend ontology words by synonyms Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/WordNet.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieLinguisticAnnotator.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/WordNet.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/WordNet.java 2013-09-06 10:01:53 UTC (rev 4092) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/WordNet.java 2013-09-06 11:36:33 UTC (rev 4093) @@ -93,7 +93,7 @@ // IndexWord iw = dict.getMorphologicalProcessor().lookupBaseForm(pos, s); if (iw != null) { Synset[] synsets = iw.getSenses(); - for (int i = 0; i < n; i++) { + for (int i = 0; i < Math.min(n, synsets.length); i++) { for (Word word : synsets[i].getWords()) { String c = word.getLemma(); if (!c.equals(s) && !c.contains(" ")) { Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java 2013-09-06 10:01:53 UTC (rev 4092) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java 2013-09-06 11:36:33 UTC (rev 4093) @@ -36,9 +36,13 @@ for (int i = 0; i < camelCase.length(); i++) { // we just ignore characters not matching the defined pattern char curChar = camelCase.charAt(i); - if (!Character.isLetter(curChar)) { + if (Character.isWhitespace(curChar)) { + sb.append(" "); continue; } + else if (!Character.isLetter(curChar)) { + continue; + } if (Character.isUpperCase(curChar)) { // found a new upper case letter resultingWords.add(sb.toString()); sb = new StringBuilder(); Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java 2013-09-06 10:01:53 UTC (rev 4092) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java 2013-09-06 11:36:33 UTC (rev 4093) @@ -1,5 +1,6 @@ package org.dllearner.algorithms.isle.index; +import org.apache.commons.lang.StringUtils; import org.dllearner.algorithms.isle.textretrieval.EntityTextRetriever; import org.dllearner.core.owl.Entity; import org.dllearner.utilities.datastructures.PrefixTrie; @@ -11,28 +12,62 @@ PrefixTrie<Set<Entity>> trie; EntityTextRetriever entityTextRetriever; - + + /** + * Initialize the trie with strings from the provided ontology using a no-op name generator, i.e., only the + * actual ontology strings are added and no expansion is done. + * + * @param entityTextRetriever the text retriever to use + * @param ontology the ontology to get strings from + */ public SimpleEntityCandidatesTrie(EntityTextRetriever entityTextRetriever, OWLOntology ontology) { - this.entityTextRetriever = entityTextRetriever; - buildTrie(ontology); + this(entityTextRetriever, ontology, new DummyNameGenerator()); } + + /** + * Initialize the trie with strings from the provided ontology and use the given entity name generator + * for generating alternative words. + * + * @param entityTextRetriever the text retriever to use + * @param ontology the ontology to get strings from + * @param nameGenerator the name generator to use for generating alternative words + */ + public SimpleEntityCandidatesTrie(EntityTextRetriever entityTextRetriever, OWLOntology ontology, + NameGenerator nameGenerator) { + this.entityTextRetriever = entityTextRetriever; + buildTrie(ontology, nameGenerator); + } - public void buildTrie(OWLOntology ontology) { + public void buildTrie(OWLOntology ontology, NameGenerator nameGenerator) { this.trie = new PrefixTrie<Set<Entity>>(); Map<Entity, Set<String>> relevantText = entityTextRetriever.getRelevantText(ontology); for (Entity entity : relevantText.keySet()) { + for (String text : relevantText.get(entity)) { - addEntry(text, entity); - // Adds also composing words, e.g. for "has child", "has" and "child" are also added - if (text.contains(" ")) { - for (String subtext : text.split(" ")) { - addEntry(subtext, entity); - //System.out.println("trie.add("+subtext+","++")"); - } - } - } - } + text = StringUtils.join(LinguisticUtil.getWordsFromCamelCase(text), " "); + text = StringUtils.join(LinguisticUtil.getWordsFromUnderscored(text), " "); + if (text.trim().isEmpty()) { + continue; + } + addEntry(text, entity); + for (String alternativeText : nameGenerator.getAlternativeText(text)) { +// System.out.println("New alternative text for " + text + " --> " + alternativeText); + addEntry(alternativeText, entity); + } + // Adds also composing words, e.g. for "has child", "has" and "child" are also added + if (text.contains(" ")) { + for (String subtext : text.split(" ")) { + addEntry(subtext, entity); + for (String alternativeText : nameGenerator.getAlternativeText(subtext)) { +// System.out.println("New alternative text for " + subtext + " --> " + alternativeText); + addEntry(alternativeText, entity); + } + //System.out.println("trie.add("+subtext+","++")"); + } + } + } + } } @Override @@ -62,7 +97,7 @@ public String toString() { String output = ""; Map<String,Set<Entity>> trieMap = trie.toMap(); - List<String> termsList = new ArrayList(trieMap.keySet()); + List<String> termsList = new ArrayList<String>(trieMap.keySet()); Collections.sort(termsList); for (String key : termsList) { output += key + ":\n"; @@ -78,4 +113,68 @@ } + public static interface NameGenerator { + /** + * Returns a list of possible alternative words for the given word + * + * @param text the text to return alternative words for + * @return alternative words for given word + */ + List<String> getAlternativeText(String text); + } + + public static class DummyNameGenerator implements NameGenerator { + @Override + public List<String> getAlternativeText(String word) { + return Collections.singletonList(word); + } + } + + /** + * Generates alternative texts by using WordNet synonyms. + */ + public static class WordNetNameGenerator implements NameGenerator { + private int maxNumberOfSenses = 5; + + /** + * Sets up the generator for returning the lemmas of the top {@code maxNumberOfSenses} senses. + * @param maxNumberOfSenses the maximum number of senses to aggregate word lemmas from + */ + public WordNetNameGenerator(int maxNumberOfSenses) { + this.maxNumberOfSenses = maxNumberOfSenses; + } + + @Override + public List<String> getAlternativeText(String word) { + return Arrays.asList(LinguisticUtil.getTopSynonymsForWord(word, maxNumberOfSenses)); + } + } + + /** + * Generates alternative texts by using WordNet synonym and lemmatizing of the original words + */ + public static class LemmatizingWordNetNameGenerator implements NameGenerator { + private int maxNumberOfSenses = 5; + + /** + * Sets up the generator for returning the lemmas of the top {@code maxNumberOfSenses} senses. + * @param maxNumberOfSenses the maximum number of senses to aggregate word lemmas from + */ + public LemmatizingWordNetNameGenerator(int maxNumberOfSenses) { + this.maxNumberOfSenses = maxNumberOfSenses; + } + + @Override + public List<String> getAlternativeText(String word) { + ArrayList<String> res = new ArrayList<String>(); + res.add(LinguisticUtil.getNormalizedForm(word)); + + for (String w : LinguisticUtil + .getTopSynonymsForWord(LinguisticUtil.getNormalizedForm(word), maxNumberOfSenses)) { + res.add(w.replaceAll("_", " ")); + } + + return res; + } + } } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieLinguisticAnnotator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieLinguisticAnnotator.java 2013-09-06 10:01:53 UTC (rev 4092) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieLinguisticAnnotator.java 2013-09-06 11:36:33 UTC (rev 4093) @@ -5,36 +5,36 @@ /** * Annotates a document using a prefix trie + * * @author Andre Melo - * */ public class TrieLinguisticAnnotator implements LinguisticAnnotator { - - EntityCandidatesTrie candidatesTrie; - - public TrieLinguisticAnnotator(EntityCandidatesTrie candidatesTrie) { - this.candidatesTrie = candidatesTrie; - } - - /** - * Generates annotation based on trie's longest matching strings - * @param document - * @return - */ - @Override - public Set<Annotation> annotate(Document document) { - String content = document.getContent(); - Set<Annotation> annotations = new HashSet<Annotation>(); - for (int i=0; i<content.length(); i++) { - String unparsed = content.substring(i); - String match = candidatesTrie.getLongestMatch(unparsed); - if (match!=null && !match.isEmpty()) { - Annotation annotation = new Annotation(document, i, match.length()); - annotations.add(annotation); - i += match.length()-1; - } - } - return annotations; - } + EntityCandidatesTrie candidatesTrie; + public TrieLinguisticAnnotator(EntityCandidatesTrie candidatesTrie) { + this.candidatesTrie = candidatesTrie; + } + + /** + * Generates annotation based on trie's longest matching strings + * + * @param document the document to get annotations for + * @return the set of annotation for the given document + */ + @Override + public Set<Annotation> annotate(Document document) { + String content = document.getContent(); + Set<Annotation> annotations = new HashSet<Annotation>(); + for (int i = 0; i < content.length(); i++) { + String unparsed = content.substring(i); + String match = candidatesTrie.getLongestMatch(unparsed); + if (match != null && !match.isEmpty()) { + Annotation annotation = new Annotation(document, i, match.length()); + annotations.add(annotation); + i += match.length() - 1; + } + } + return annotations; + } + } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java 2013-09-06 10:01:53 UTC (rev 4092) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java 2013-09-06 11:36:33 UTC (rev 4093) @@ -30,7 +30,8 @@ */ public SimpleSemanticIndex(OWLOntology ontology, SyntacticIndex syntacticIndex) { super(ontology); - SimpleEntityCandidatesTrie trie = new SimpleEntityCandidatesTrie(new RDFSLabelEntityTextRetriever(ontology), ontology); + SimpleEntityCandidatesTrie trie = new SimpleEntityCandidatesTrie(new RDFSLabelEntityTextRetriever(ontology), + ontology, new SimpleEntityCandidatesTrie.LemmatizingWordNetNameGenerator(5)); // trie.printTrie(); setSemanticAnnotator(new SemanticAnnotator( new SimpleWordSenseDisambiguation(ontology), This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <dfl...@us...> - 2013-09-06 10:01:56
|
Revision: 4092 http://sourceforge.net/p/dl-learner/code/4092 Author: dfleischhacker Date: 2013-09-06 10:01:53 +0000 (Fri, 06 Sep 2013) Log Message: ----------- Add methods to get top n synonyms for words Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/WordNet.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/WordNet.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/WordNet.java 2013-09-05 13:59:47 UTC (rev 4091) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/WordNet.java 2013-09-06 10:01:53 UTC (rev 4092) @@ -1,78 +1,115 @@ package org.dllearner.algorithms.isle; -import java.io.InputStream; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import java.util.Set; - import net.didion.jwnl.JWNL; import net.didion.jwnl.JWNLException; -import net.didion.jwnl.data.IndexWord; -import net.didion.jwnl.data.POS; -import net.didion.jwnl.data.PointerTarget; -import net.didion.jwnl.data.PointerUtils; -import net.didion.jwnl.data.Synset; -import net.didion.jwnl.data.Word; +import net.didion.jwnl.data.*; import net.didion.jwnl.data.list.PointerTargetNode; import net.didion.jwnl.data.list.PointerTargetNodeList; import net.didion.jwnl.dictionary.Dictionary; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + public class WordNet { - - public Dictionary dict; - - public WordNet() { - try { - JWNL.initialize(this.getClass().getClassLoader().getResourceAsStream("wordnet_properties.xml")); - dict = Dictionary.getInstance(); - } catch (JWNLException e) { - e.printStackTrace(); - } - } - - public WordNet(String configPath) { - try { - JWNL.initialize(this.getClass().getClassLoader().getResourceAsStream(configPath)); - dict = Dictionary.getInstance(); - } catch (JWNLException e) { - e.printStackTrace(); - } - } - - public WordNet(InputStream propertiesStream) { - try { - JWNL.initialize(propertiesStream); - dict = Dictionary.getInstance(); - } catch (JWNLException e) { - e.printStackTrace(); - } - } - - public List<String> getBestSynonyms(POS pos, String s) { - - List<String> synonyms = new ArrayList<String>(); - - try { - IndexWord iw = dict.getIndexWord(pos, s);//dict.getMorphologicalProcessor().lookupBaseForm(pos, s) + + public Dictionary dict; + + public WordNet() { + try { + JWNL.initialize(this.getClass().getClassLoader().getResourceAsStream("wordnet_properties.xml")); + dict = Dictionary.getInstance(); + } + catch (JWNLException e) { + e.printStackTrace(); + } + } + + public WordNet(String configPath) { + try { + JWNL.initialize(this.getClass().getClassLoader().getResourceAsStream(configPath)); + dict = Dictionary.getInstance(); + } + catch (JWNLException e) { + e.printStackTrace(); + } + } + + public WordNet(InputStream propertiesStream) { + try { + JWNL.initialize(propertiesStream); + dict = Dictionary.getInstance(); + } + catch (JWNLException e) { + e.printStackTrace(); + } + } + + public static void main(String[] args) { + System.out.println(new WordNet().getBestSynonyms(POS.VERB, "learn")); + System.out.println(new WordNet().getSisterTerms(POS.NOUN, "actress")); + } + + public List<String> getBestSynonyms(POS pos, String s) { + + List<String> synonyms = new ArrayList<String>(); + + try { + IndexWord iw = dict.getIndexWord(pos, s);//dict.getMorphologicalProcessor().lookupBaseForm(pos, s) // IndexWord iw = dict.getMorphologicalProcessor().lookupBaseForm(pos, s); - if(iw != null){ - Synset[] synsets = iw.getSenses(); - Word[] words = synsets[0].getWords(); - for(Word w : words){ - String c = w.getLemma(); - if (!c.equals(s) && !c.contains(" ") && synonyms.size() < 4) { - synonyms.add(c); - } - } - } - - } catch (JWNLException e) { - e.printStackTrace(); - } - return synonyms; - } + if (iw != null) { + Synset[] synsets = iw.getSenses(); + Word[] words = synsets[0].getWords(); + for (Word w : words) { + String c = w.getLemma(); + if (!c.equals(s) && !c.contains(" ") && synonyms.size() < 4) { + synonyms.add(c); + } + } + } + } + catch (JWNLException e) { + e.printStackTrace(); + } + return synonyms; + } + + /** + * Returns the lemmas for the top {@code n} synsets of the given POS for the string {@code s}. + * + * @param pos the part of speech to retrieve synonyms for + * @param s the string to retrieve synonyms for + * @param n the number of synonyms to retrieve + * @return list of the lemmas of the top n synonyms of s + */ + public List<String> getTopSynonyms(POS pos, String s, int n) { + + List<String> synonyms = new ArrayList<String>(); + + try { + IndexWord iw = dict.getIndexWord(pos, s);//dict.getMorphologicalProcessor().lookupBaseForm(pos, s) +// IndexWord iw = dict.getMorphologicalProcessor().lookupBaseForm(pos, s); + if (iw != null) { + Synset[] synsets = iw.getSenses(); + for (int i = 0; i < n; i++) { + for (Word word : synsets[i].getWords()) { + String c = word.getLemma(); + if (!c.equals(s) && !c.contains(" ")) { + synonyms.add(c); + } + } + } + } + + } + catch (JWNLException e) { + e.printStackTrace(); + } + return synonyms; + } + public List<String> getAllSynonyms(POS pos, String s) { List<String> synonyms = new ArrayList<String>(); try { @@ -96,120 +133,124 @@ return synonyms; } - public List<String> getSisterTerms(POS pos, String s){ - List<String> sisterTerms = new ArrayList<String>(); - - try { - IndexWord iw = dict.getIndexWord(pos, s);//dict.getMorphologicalProcessor().lookupBaseForm(pos, s) + public List<String> getSisterTerms(POS pos, String s) { + List<String> sisterTerms = new ArrayList<String>(); + + try { + IndexWord iw = dict.getIndexWord(pos, s);//dict.getMorphologicalProcessor().lookupBaseForm(pos, s) // IndexWord iw = dict.getMorphologicalProcessor().lookupBaseForm(pos, s); - if(iw != null){ - Synset[] synsets = iw.getSenses(); - //System.out.println(synsets[0]); - PointerTarget[] pointerArr = synsets[0].getTargets(); - } - - } catch (JWNLException e) { - e.printStackTrace(); - } - return sisterTerms; - } - - public List<String> getAttributes(String s) { - - List<String> result = new ArrayList<String>(); - - try { - IndexWord iw = dict.getIndexWord(POS.ADJECTIVE, s); - if(iw != null){ - Synset[] synsets = iw.getSenses(); - Word[] words = synsets[0].getWords(); - for(Word w : words){ - String c = w.getLemma(); - if (!c.equals(s) && !c.contains(" ") && result.size() < 4) { - result.add(c); - } - } - } - - } catch (JWNLException e) { - e.printStackTrace(); - } - - return result; - } - - public static void main(String[] args) { - System.out.println(new WordNet().getBestSynonyms(POS.VERB, "learn")); - System.out.println(new WordNet().getSisterTerms(POS.NOUN, "actress")); - } - - /** - * Funktion returns a List of Hypo and Hypernyms of a given string - * @param s Word for which you want to get Hypo and Hypersyms - * @return List of Hypo and Hypernyms - * @throws JWNLException - */ - public List<String> getRelatedNouns(String s) { - List<String> result = new ArrayList<String>(); - IndexWord word = null; - Synset sense=null; - try{ - word=dict.getIndexWord(POS.NOUN,s); - if(word!=null){ - sense = word.getSense(1); - //Synset sense = word.getSense(1); - - PointerTargetNodeList relatedListHypernyms = null; - PointerTargetNodeList relatedListHyponyms = null; - try { - relatedListHypernyms = PointerUtils.getInstance().getDirectHypernyms(sense); - } catch (JWNLException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - try { - relatedListHyponyms = PointerUtils.getInstance().getDirectHyponyms(sense); - } catch (JWNLException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - - Iterator i = relatedListHypernyms.iterator(); - while (i.hasNext()) { - PointerTargetNode related = (PointerTargetNode) i.next(); - Synset s1 = related.getSynset(); - String tmp=(s1.toString()).replace(s1.getGloss(), ""); - tmp=tmp.replace(" -- ()]",""); - tmp=tmp.replaceAll("[0-9]",""); - tmp=tmp.replace("[Synset: [Offset: ",""); - tmp=tmp.replace("] [POS: noun] Words: ",""); - //its possible, that there is more than one word in a line from wordnet - String[] array_tmp=tmp.split(","); - for(String z : array_tmp) result.add(z.replace(" ", "")); - } - - Iterator j = relatedListHyponyms.iterator(); - while (j.hasNext()) { - PointerTargetNode related = (PointerTargetNode) j.next(); - Synset s1 = related.getSynset(); - String tmp=(s1.toString()).replace(s1.getGloss(), ""); - tmp=tmp.replace(" -- ()]",""); - tmp=tmp.replaceAll("[0-9]",""); - tmp=tmp.replace("[Synset: [Offset: ",""); - tmp=tmp.replace("] [POS: noun] Words: ",""); - //its possible, that there is more than one word in a line from wordnet - String[] array_tmp=tmp.split(","); - for(String z : array_tmp) result.add(z.replace(" ", "")); - } - } - }catch (JWNLException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - - - - return result; - } - + if (iw != null) { + Synset[] synsets = iw.getSenses(); + //System.out.println(synsets[0]); + PointerTarget[] pointerArr = synsets[0].getTargets(); + } + + } + catch (JWNLException e) { + e.printStackTrace(); + } + return sisterTerms; + } + + public List<String> getAttributes(String s) { + + List<String> result = new ArrayList<String>(); + + try { + IndexWord iw = dict.getIndexWord(POS.ADJECTIVE, s); + if (iw != null) { + Synset[] synsets = iw.getSenses(); + Word[] words = synsets[0].getWords(); + for (Word w : words) { + String c = w.getLemma(); + if (!c.equals(s) && !c.contains(" ") && result.size() < 4) { + result.add(c); + } + } + } + + } + catch (JWNLException e) { + e.printStackTrace(); + } + + return result; + } + + /** + * Funktion returns a List of Hypo and Hypernyms of a given string + * + * @param s Word for which you want to get Hypo and Hypersyms + * @return List of Hypo and Hypernyms + * @throws JWNLException + */ + public List<String> getRelatedNouns(String s) { + List<String> result = new ArrayList<String>(); + IndexWord word = null; + Synset sense = null; + try { + word = dict.getIndexWord(POS.NOUN, s); + if (word != null) { + sense = word.getSense(1); + //Synset sense = word.getSense(1); + + PointerTargetNodeList relatedListHypernyms = null; + PointerTargetNodeList relatedListHyponyms = null; + try { + relatedListHypernyms = PointerUtils.getInstance().getDirectHypernyms(sense); + } + catch (JWNLException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + try { + relatedListHyponyms = PointerUtils.getInstance().getDirectHyponyms(sense); + } + catch (JWNLException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + Iterator i = relatedListHypernyms.iterator(); + while (i.hasNext()) { + PointerTargetNode related = (PointerTargetNode) i.next(); + Synset s1 = related.getSynset(); + String tmp = (s1.toString()).replace(s1.getGloss(), ""); + tmp = tmp.replace(" -- ()]", ""); + tmp = tmp.replaceAll("[0-9]", ""); + tmp = tmp.replace("[Synset: [Offset: ", ""); + tmp = tmp.replace("] [POS: noun] Words: ", ""); + //its possible, that there is more than one word in a line from wordnet + String[] array_tmp = tmp.split(","); + for (String z : array_tmp) { + result.add(z.replace(" ", "")); + } + } + + Iterator j = relatedListHyponyms.iterator(); + while (j.hasNext()) { + PointerTargetNode related = (PointerTargetNode) j.next(); + Synset s1 = related.getSynset(); + String tmp = (s1.toString()).replace(s1.getGloss(), ""); + tmp = tmp.replace(" -- ()]", ""); + tmp = tmp.replaceAll("[0-9]", ""); + tmp = tmp.replace("[Synset: [Offset: ", ""); + tmp = tmp.replace("] [POS: noun] Words: ", ""); + //its possible, that there is more than one word in a line from wordnet + String[] array_tmp = tmp.split(","); + for (String z : array_tmp) { + result.add(z.replace(" ", "")); + } + } + } + } + catch (JWNLException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + + return result; + } + } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java 2013-09-05 13:59:47 UTC (rev 4091) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java 2013-09-06 10:01:53 UTC (rev 4092) @@ -83,6 +83,23 @@ } /** + * Returns an array of the lemmas of the top {@code n} synonyms for the given word. Only synonyms for the POS in + * {@link #RELEVANT_POS} are returned. + * + * @param word the word to retrieve synonyms for + * @param n the number of senses to get lemmas for + * @return synonyms for the given word + */ + public static String[] getTopSynonymsForWord(String word, int n) { + ArrayList<String> synonyms = new ArrayList<String>(); + + for (POS pos : RELEVANT_POS) { + synonyms.addAll(wn.getTopSynonyms(pos, word, n)); + } + return synonyms.toArray(new String[synonyms.size()]); + } + + /** * Returns the normalized form of the given word. This method is only able to work with single words! If there is an * error normalizing the given word, the word itself is returned. * This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <dfl...@us...> - 2013-09-05 13:59:50
|
Revision: 4091 http://sourceforge.net/p/dl-learner/code/4091 Author: dfleischhacker Date: 2013-09-05 13:59:47 +0000 (Thu, 05 Sep 2013) Log Message: ----------- Reuse computed match instead of recomputing it Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java 2013-09-05 10:17:34 UTC (rev 4090) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java 2013-09-05 13:59:47 UTC (rev 4091) @@ -1,18 +1,12 @@ package org.dllearner.algorithms.isle.index; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; - import org.dllearner.algorithms.isle.textretrieval.EntityTextRetriever; import org.dllearner.core.owl.Entity; import org.dllearner.utilities.datastructures.PrefixTrie; import org.semanticweb.owlapi.model.OWLOntology; +import java.util.*; + public class SimpleEntityCandidatesTrie implements EntityCandidatesTrie { PrefixTrie<Set<Entity>> trie; @@ -62,7 +56,7 @@ @Override public String getLongestMatch(String s) { CharSequence match = trie.getLongestMatch(s); - return (match!=null) ? trie.getLongestMatch(s).toString() : null; + return (match!=null) ? match.toString() : null; } public String toString() { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2013-09-05 10:17:37
|
Revision: 4090 http://sourceforge.net/p/dl-learner/code/4090 Author: lorenz_b Date: 2013-09-05 10:17:34 +0000 (Thu, 05 Sep 2013) Log Message: ----------- Bugfixes. Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/NLPHeuristic.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/SimpleWordSenseDisambiguation.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/PMIRelevanceMetric.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceUtils.java trunk/test/isle/swore/corpus/customer_requirement.txt Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/NLPHeuristic.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/NLPHeuristic.java 2013-09-05 09:26:44 UTC (rev 4089) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/NLPHeuristic.java 2013-09-05 10:17:34 UTC (rev 4090) @@ -108,13 +108,13 @@ // the NLP based scoring - Description expression = node.getExpression();System.out.println(expression); + Description expression = node.getExpression();//System.out.println(expression); // OWLClassExpression owlapiDescription = OWLAPIConverter.getOWLAPIDescription(expression); // Set<Entity> entities = OWLAPIConverter.getEntities(owlapiDescription.getSignature()); Set<Entity> entities = expression.getSignature(); double sum = 0; for (Entity entity : entities) { - double relevance = entityRelevance.containsKey(entity) ? entityRelevance.get(entity) : 0;System.out.println(entity + ":" + relevance); + double relevance = entityRelevance.containsKey(entity) ? entityRelevance.get(entity) : 0;//System.out.println(entity + ":" + relevance); if(!Double.isInfinite(relevance)){ sum += relevance; } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/SimpleWordSenseDisambiguation.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/SimpleWordSenseDisambiguation.java 2013-09-05 09:26:44 UTC (rev 4089) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/SimpleWordSenseDisambiguation.java 2013-09-05 10:17:34 UTC (rev 4090) @@ -6,6 +6,7 @@ import java.util.HashSet; import java.util.Set; +import org.apache.log4j.Logger; import org.dllearner.algorithms.isle.index.Annotation; import org.dllearner.algorithms.isle.index.SemanticAnnotation; import org.dllearner.core.owl.Entity; @@ -28,6 +29,9 @@ */ public class SimpleWordSenseDisambiguation extends WordSenseDisambiguation{ + + private static final Logger logger = Logger.getLogger(SimpleWordSenseDisambiguation.class.getName()); + private IRIShortFormProvider sfp = new SimpleIRIShortFormProvider(); private OWLDataFactory df = new OWLDataFactoryImpl(); private OWLAnnotationProperty annotationProperty = df.getRDFSLabel(); @@ -44,17 +48,21 @@ */ @Override public SemanticAnnotation disambiguate(Annotation annotation, Set<Entity> candidateEntities) { - String token = annotation.getToken(); + logger.debug("Linguistic annotations:\n" + annotation); + logger.debug("Candidate entities:" + candidateEntities); + String token = annotation.getToken().trim(); //check if annotation token matches label of entity or the part behind #(resp. /) for (Entity entity : candidateEntities) { Set<String> labels = getLabels(entity); for (String label : labels) { if(label.equals(token)){ + logger.debug("Disambiguated entity: " + entity); return new SemanticAnnotation(annotation, entity); } } String shortForm = sfp.getShortForm(IRI.create(entity.getURI())); if(annotation.equals(shortForm)){ + logger.debug("Disambiguated entity: " + entity); return new SemanticAnnotation(annotation, entity); } } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java 2013-09-05 09:26:44 UTC (rev 4089) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java 2013-09-05 10:17:34 UTC (rev 4090) @@ -36,6 +36,8 @@ private SyntacticIndex syntacticIndex; private Map<Entity, Set<AnnotatedDocument>> index; private OWLOntology ontology; + + private int size = 0; public SemanticIndex(OWLOntology ontology, SyntacticIndex syntacticIndex, WordSenseDisambiguation wordSenseDisambiguation, EntityCandidateGenerator entityCandidateGenerator, LinguisticAnnotator linguisticAnnotator) { @@ -77,6 +79,7 @@ } logger.info("Annotated document:" + annotatedDocument); } + size = documents.size(); logger.info("...done."); } @@ -145,6 +148,6 @@ * @return the total number of documents contained in the index */ public int getSize() { - return index.size(); + return size; } } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java 2013-09-05 09:26:44 UTC (rev 4089) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java 2013-09-05 10:17:34 UTC (rev 4090) @@ -31,7 +31,7 @@ public SimpleSemanticIndex(OWLOntology ontology, SyntacticIndex syntacticIndex) { super(ontology); SimpleEntityCandidatesTrie trie = new SimpleEntityCandidatesTrie(new RDFSLabelEntityTextRetriever(ontology), ontology); - trie.printTrie(); +// trie.printTrie(); setSemanticAnnotator(new SemanticAnnotator( new SimpleWordSenseDisambiguation(ontology), new TrieEntityCandidateGenerator(ontology, trie), Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/PMIRelevanceMetric.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/PMIRelevanceMetric.java 2013-09-05 09:26:44 UTC (rev 4089) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/PMIRelevanceMetric.java 2013-09-05 10:17:34 UTC (rev 4090) @@ -43,18 +43,27 @@ Set<AnnotatedDocument> documentsB = index.getDocuments(entityB); Set<AnnotatedDocument> documentsAB = Sets.intersection(documentsA, documentsB); int nrOfDocuments = index.getSize(); +// System.out.println("A:" + documentsA.size()); +// System.out.println("B:" + documentsB.size()); +// System.out.println("AB:" + documentsAB.size()); +// System.out.println(nrOfDocuments); double pA = nrOfDocuments == 0 ? 0 : ((double) documentsA.size() / (double) nrOfDocuments); double pB = nrOfDocuments == 0 ? 0 : ((double) documentsB.size() / (double) nrOfDocuments); double pAB = nrOfDocuments == 0 ? 0 : ((double) documentsAB.size() / (double) nrOfDocuments); - if(pA * pB == 0){ + if(pAB == 0 || pA * pB == 0){ return 0; } - double pmi = Math.log(pAB / pA * pB); + double pmi = Math.log(pAB / (pA * pB)); - double normalizedPMI = (pmi/-Math.log(pAB) + 1)/2; + double denominator = -Math.log(pAB); + if(denominator == 0){ + return 0; + } + double normalizedPMI = (pmi/denominator + 1)/2; + return normalizedPMI; } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceUtils.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceUtils.java 2013-09-05 09:26:44 UTC (rev 4089) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceUtils.java 2013-09-05 10:17:34 UTC (rev 4090) @@ -31,6 +31,7 @@ } public static Map<Entity, Double> getRelevantEntities(Entity entity, OWLOntology ontology, RelevanceMetric metric){ + System.out.println(entity); Map<Entity, Double> relevantEntities = new HashMap<Entity, Double>(); Set<OWLEntity> owlEntities = new HashSet<OWLEntity>(); @@ -39,8 +40,10 @@ owlEntities.addAll(ontology.getObjectPropertiesInSignature()); Set<Entity> otherEntities = OWLAPIConverter.getEntities(owlEntities); + otherEntities.remove(entity); for (Entity otherEntity : otherEntities) { double relevance = metric.getNormalizedRelevance(entity, otherEntity); + System.out.println(otherEntity + ":" + relevance); relevantEntities.put(otherEntity, relevance); } Modified: trunk/test/isle/swore/corpus/customer_requirement.txt =================================================================== --- trunk/test/isle/swore/corpus/customer_requirement.txt 2013-09-05 09:26:44 UTC (rev 4089) +++ trunk/test/isle/swore/corpus/customer_requirement.txt 2013-09-05 10:17:34 UTC (rev 4090) @@ -1 +1 @@ -A customer requirement is usually desired by at least one customer and usually specificed or captured via requirements engineering systems for later inspection by software developers and maintainers. The goal of involving customer in the requirements elicitation process is to improve the quality of the software (see also: requirements elicitation, open source software, E-Government). +A customer requirement is usually desired by at least one customer and usually specified or captured via requirements engineering systems for later inspection by software developers and maintainers. The goal of involving customer in the requirements elicitation process is to improve the quality of the software (see also: requirements elicitation, open source software, E-Government). This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <and...@us...> - 2013-09-05 09:26:49
|
Revision: 4089 http://sourceforge.net/p/dl-learner/code/4089 Author: andremelo Date: 2013-09-05 09:26:44 +0000 (Thu, 05 Sep 2013) Log Message: ----------- Adding contains to PrefixTrie and fixing addEntry in SimpleEntityCandidatesTrie Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java trunk/components-core/src/main/java/org/dllearner/utilities/datastructures/PrefixTrie.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java 2013-09-05 09:21:08 UTC (rev 4088) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java 2013-09-05 09:26:44 UTC (rev 4089) @@ -43,8 +43,10 @@ @Override public void addEntry(String s, Entity e) { - Set<Entity> candidates = trie.get(s); - if (candidates==null) + Set<Entity> candidates; + if (trie.contains(s)) + candidates = trie.get(s); + else candidates = new HashSet<Entity>(); candidates.add(e); @@ -69,7 +71,7 @@ List<String> termsList = new ArrayList(trieMap.keySet()); Collections.sort(termsList); for (String key : termsList) { - output += key + ": ("; + output += key + ":\n"; for (Entity candidate: trieMap.get(key)) { output += "\t"+candidate+"\n"; } Modified: trunk/components-core/src/main/java/org/dllearner/utilities/datastructures/PrefixTrie.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/utilities/datastructures/PrefixTrie.java 2013-09-05 09:21:08 UTC (rev 4088) +++ trunk/components-core/src/main/java/org/dllearner/utilities/datastructures/PrefixTrie.java 2013-09-05 09:26:44 UTC (rev 4089) @@ -82,6 +82,21 @@ current.value = value; return oldValue; } + + public boolean contains(CharSequence s) { + Node<T> current = root; + for (int i = 0; i < s.length(); i++) { + int nodeIndex = s.charAt(i) - rangeOffset; + if (nodeIndex < 0 || rangeSize <= nodeIndex) { + return false; + } + current = current.next[nodeIndex]; + if (current == null) { + return false; + } + } + return (current.value!=null); + } /** {@inheritDoc} */ public T get(CharSequence s) { @@ -120,10 +135,10 @@ deepestWithValue = current; } } - if (i<=1 || deepestWithValue==root) + if (i<=1 || deepestWithValue==root || deepestWithValue.value==null) return null; else - return s.subSequence(1, i); + return s.subSequence(0, i); } /** This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <jen...@us...> - 2013-09-05 09:21:11
|
Revision: 4088 http://sourceforge.net/p/dl-learner/code/4088 Author: jenslehmann Date: 2013-09-05 09:21:08 +0000 (Thu, 05 Sep 2013) Log Message: ----------- ISLE test refined Modified Paths: -------------- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java Modified: trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java =================================================================== --- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java 2013-09-05 09:10:33 UTC (rev 4087) +++ trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java 2013-09-05 09:21:08 UTC (rev 4088) @@ -44,8 +44,10 @@ import com.google.common.io.Files; /** + * Some tests for the ISLE algorithm. + * * @author Lorenz Buehmann - * + * @author Jens Lehmann */ public class ISLETest { @@ -188,6 +190,7 @@ // run standard CELOE as reference CELOE celoe = new CELOE(lp, reasoner); +// celoe.setHeuristic(heuristic); celoe.setSearchTreeFile(testFolder + "searchTreeCELOE.txt"); celoe.setWriteSearchTree(true); celoe.setTerminateOnNoiseReached(true); @@ -203,7 +206,13 @@ System.out.println("expressions tested: " + isle.getClassExpressionTests() + " vs. " + celoe.getClassExpressionTests()); System.out.println("search tree nodes: " + isle.getNodes().size() + " vs. " + celoe.getNodes().size()); System.out.println("runtime: " + Helper.prettyPrintNanoSeconds(isle.getTotalRuntimeNs()) + " vs. " + Helper.prettyPrintNanoSeconds(celoe.getTotalRuntimeNs())); - + + // only ISLE +// System.out.println("accuracy: " + df.format(100*isle.getCurrentlyBestAccuracy())+"%"); +// System.out.println("expressions tested: " + isle.getClassExpressionTests()); +// System.out.println("search tree nodes: " + isle.getNodes().size()); +// System.out.println("runtime: " + Helper.prettyPrintNanoSeconds(isle.getTotalRuntimeNs())); + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2013-09-05 09:10:36
|
Revision: 4087 http://sourceforge.net/p/dl-learner/code/4087 Author: lorenz_b Date: 2013-09-05 09:10:33 +0000 (Thu, 05 Sep 2013) Log Message: ----------- Added printing. Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/ISLE.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/NLPHeuristic.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/ISLE.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/ISLE.java 2013-09-05 08:52:49 UTC (rev 4086) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/ISLE.java 2013-09-05 09:10:33 UTC (rev 4087) @@ -81,7 +81,7 @@ @ComponentAnn(name="ISLE", shortName="isle", version=0.5, description="CELOE is an adapted and extended version of the OCEL algorithm applied for the ontology engineering use case. See http://jens-lehmann.org/files/2011/celoe.pdf for reference.") public class ISLE extends AbstractCELA { - private static Logger logger = Logger.getLogger(CELOE.class); + private static Logger logger = Logger.getLogger(ISLE.class); // private CELOEConfigurator configurator; private boolean isRunning = false; Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/NLPHeuristic.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/NLPHeuristic.java 2013-09-05 08:52:49 UTC (rev 4086) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/NLPHeuristic.java 2013-09-05 09:10:33 UTC (rev 4087) @@ -107,19 +107,19 @@ score -= node.getRefinementCount() * nodeRefinementPenalty; - //the NLP based scoring -// Description expression = node.getExpression(); -//// OWLClassExpression owlapiDescription = OWLAPIConverter.getOWLAPIDescription(expression); -//// Set<Entity> entities = OWLAPIConverter.getEntities(owlapiDescription.getSignature()); -// Set<Entity> entities = expression.getSignature(); -// double sum = 0; -// for (Entity entity : entities) { -// double relevance = entityRelevance.containsKey(entity) ? entityRelevance.get(entity) : 0; -// if(!Double.isInfinite(relevance)){ -// sum += relevance; -// } -// } -// score += nlpBonusFactor * sum; +// the NLP based scoring + Description expression = node.getExpression();System.out.println(expression); +// OWLClassExpression owlapiDescription = OWLAPIConverter.getOWLAPIDescription(expression); +// Set<Entity> entities = OWLAPIConverter.getEntities(owlapiDescription.getSignature()); + Set<Entity> entities = expression.getSignature(); + double sum = 0; + for (Entity entity : entities) { + double relevance = entityRelevance.containsKey(entity) ? entityRelevance.get(entity) : 0;System.out.println(entity + ":" + relevance); + if(!Double.isInfinite(relevance)){ + sum += relevance; + } + } + score += nlpBonusFactor * sum; return score; } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java 2013-09-05 08:52:49 UTC (rev 4086) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java 2013-09-05 09:10:33 UTC (rev 4087) @@ -75,6 +75,7 @@ } existingAnnotatedDocuments.add(annotatedDocument); } + logger.info("Annotated document:" + annotatedDocument); } logger.info("...done."); } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java 2013-09-05 08:52:49 UTC (rev 4086) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java 2013-09-05 09:10:33 UTC (rev 4087) @@ -31,6 +31,7 @@ public SimpleSemanticIndex(OWLOntology ontology, SyntacticIndex syntacticIndex) { super(ontology); SimpleEntityCandidatesTrie trie = new SimpleEntityCandidatesTrie(new RDFSLabelEntityTextRetriever(ontology), ontology); + trie.printTrie(); setSemanticAnnotator(new SemanticAnnotator( new SimpleWordSenseDisambiguation(ontology), new TrieEntityCandidateGenerator(ontology, trie), Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java 2013-09-05 08:52:49 UTC (rev 4086) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java 2013-09-05 09:10:33 UTC (rev 4087) @@ -82,7 +82,7 @@ if (annotation.getValue() instanceof OWLLiteral) { OWLLiteral val = (OWLLiteral) annotation.getValue(); if (val.hasLang(language)) { - String label = val.getLiteral(); + String label = val.getLiteral().trim(); textWithWeight.put(label, weight); } } @@ -92,7 +92,7 @@ if(textWithWeight.isEmpty() && useShortFormFallback){ String shortForm = sfp.getShortForm(IRI.create(entity.getURI())); shortForm = Joiner.on(" ").join(LinguisticUtil.getWordsFromCamelCase(shortForm)); - shortForm = Joiner.on(" ").join(LinguisticUtil.getWordsFromUnderscored(shortForm)); + shortForm = Joiner.on(" ").join(LinguisticUtil.getWordsFromUnderscored(shortForm)).trim(); textWithWeight.put(shortForm, weight); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <and...@us...> - 2013-09-05 08:52:51
|
Revision: 4086 http://sourceforge.net/p/dl-learner/code/4086 Author: andremelo Date: 2013-09-05 08:52:49 +0000 (Thu, 05 Sep 2013) Log Message: ----------- Fixing getLongestMatch method and adding toString to SimpleEntityCandidatesTrie Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java trunk/components-core/src/main/java/org/dllearner/utilities/datastructures/PrefixTrie.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java 2013-09-05 08:42:59 UTC (rev 4085) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java 2013-09-05 08:52:49 UTC (rev 4086) @@ -63,17 +63,23 @@ return (match!=null) ? trie.getLongestMatch(s).toString() : null; } - public void printTrie() { - System.out.println("Printing tree content:"); + public String toString() { + String output = ""; Map<String,Set<Entity>> trieMap = trie.toMap(); List<String> termsList = new ArrayList(trieMap.keySet()); Collections.sort(termsList); for (String key : termsList) { - System.out.println(key); + output += key + ": ("; for (Entity candidate: trieMap.get(key)) { - System.out.println("\t"+candidate); + output += "\t"+candidate+"\n"; } } + return output; } + + public void printTrie() { + System.out.println(this.toString()); + + } } Modified: trunk/components-core/src/main/java/org/dllearner/utilities/datastructures/PrefixTrie.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/utilities/datastructures/PrefixTrie.java 2013-09-05 08:42:59 UTC (rev 4085) +++ trunk/components-core/src/main/java/org/dllearner/utilities/datastructures/PrefixTrie.java 2013-09-05 08:52:49 UTC (rev 4086) @@ -104,6 +104,7 @@ } public CharSequence getLongestMatch(CharSequence s) { + Node<T> deepestWithValue = root; Node<T> current = root; int i; for (i = 0; i < s.length(); i++) { @@ -115,8 +116,11 @@ if (current == null) { break; } + if (current.value != null) { + deepestWithValue = current; + } } - if (i<=1) + if (i<=1 || deepestWithValue==root) return null; else return s.subSequence(1, i); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |