You can subscribe to this list here.
2007 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
(120) |
Sep
(36) |
Oct
(116) |
Nov
(17) |
Dec
(44) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2008 |
Jan
(143) |
Feb
(192) |
Mar
(74) |
Apr
(84) |
May
(105) |
Jun
(64) |
Jul
(49) |
Aug
(120) |
Sep
(159) |
Oct
(156) |
Nov
(51) |
Dec
(28) |
2009 |
Jan
(17) |
Feb
(55) |
Mar
(33) |
Apr
(57) |
May
(54) |
Jun
(28) |
Jul
(6) |
Aug
(16) |
Sep
(38) |
Oct
(30) |
Nov
(26) |
Dec
(52) |
2010 |
Jan
(7) |
Feb
(91) |
Mar
(65) |
Apr
(2) |
May
(14) |
Jun
(25) |
Jul
(38) |
Aug
(48) |
Sep
(80) |
Oct
(70) |
Nov
(75) |
Dec
(77) |
2011 |
Jan
(68) |
Feb
(53) |
Mar
(51) |
Apr
(35) |
May
(65) |
Jun
(101) |
Jul
(29) |
Aug
(230) |
Sep
(95) |
Oct
(49) |
Nov
(110) |
Dec
(63) |
2012 |
Jan
(41) |
Feb
(42) |
Mar
(25) |
Apr
(46) |
May
(51) |
Jun
(44) |
Jul
(45) |
Aug
(29) |
Sep
(12) |
Oct
(9) |
Nov
(17) |
Dec
(2) |
2013 |
Jan
(12) |
Feb
(14) |
Mar
(7) |
Apr
(16) |
May
(54) |
Jun
(27) |
Jul
(11) |
Aug
(5) |
Sep
(85) |
Oct
(27) |
Nov
(37) |
Dec
(32) |
2014 |
Jan
(8) |
Feb
(29) |
Mar
(5) |
Apr
(3) |
May
(22) |
Jun
(3) |
Jul
(4) |
Aug
(3) |
Sep
|
Oct
|
Nov
|
Dec
|
From: <dfl...@us...> - 2013-09-03 15:26:21
|
Revision: 4035 http://sourceforge.net/p/dl-learner/code/4035 Author: dfleischhacker Date: 2013-09-03 15:26:18 +0000 (Tue, 03 Sep 2013) Log Message: ----------- Add WordSenseDisambiguation class Added Paths: ----------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/WordSenseDisambiguation.java Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/WordSenseDisambiguation.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/WordSenseDisambiguation.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/WordSenseDisambiguation.java 2013-09-03 15:26:18 UTC (rev 4035) @@ -0,0 +1,36 @@ +package org.dllearner.algorithms.isle; + +import org.dllearner.algorithms.isle.index.Annotation; +import org.dllearner.algorithms.isle.index.Document; +import org.dllearner.algorithms.isle.index.SemanticAnnotation; +import org.dllearner.core.owl.Entity; +import org.semanticweb.owlapi.model.OWLOntology; + +import java.util.Set; + +/** + * Abstract class for + * + * @author Daniel Fleischhacker + */ +public abstract class WordSenseDisambiguation { + OWLOntology ontology; + + /** + * Initialize the word sense disambiguation to use the given ontology. + * + * @param ontology the ontology to disambiguate on + */ + public WordSenseDisambiguation(OWLOntology ontology) { + this.ontology = ontology; + } + + /** + * Chooses the correct entity for the given annotation from a set of candidate enties. + * + * @param annotation the annotation to find entity for + * @param candidateEntities the set of candidate entities + * @return semantic annotation containing the given annotation and the chosen entity + */ + public abstract SemanticAnnotation disambiguate(Annotation annotation, Set<Entity> candidateEntities); +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2013-09-03 15:17:51
|
Revision: 4034 http://sourceforge.net/p/dl-learner/code/4034 Author: lorenz_b Date: 2013-09-03 15:17:48 +0000 (Tue, 03 Sep 2013) Log Message: ----------- Added annotation. Added Paths: ----------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotation.java Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotation.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotation.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotation.java 2013-09-03 15:17:48 UTC (rev 4034) @@ -0,0 +1,53 @@ +/** + * + */ +package org.dllearner.algorithms.isle.index; + +import org.dllearner.core.owl.Entity; + +/** + * @author Lorenz Buehmann + * + */ +public class SemanticAnnotation extends Annotation{ + + private Entity entity; + + public SemanticAnnotation(Document getReferencedDocument, Entity entity, int offset, int length) { + super(getReferencedDocument, offset, length); + this.entity = entity; + } + + public Entity getEntity() { + return entity; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = super.hashCode(); + result = prime * result + ((entity == null) ? 0 : entity.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (!super.equals(obj)) + return false; + if (getClass() != obj.getClass()) + return false; + SemanticAnnotation other = (SemanticAnnotation) obj; + if (entity == null) { + if (other.entity != null) + return false; + } else if (!entity.equals(other.entity)) + return false; + return true; + } + + + + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2013-09-03 15:16:27
|
Revision: 4033 http://sourceforge.net/p/dl-learner/code/4033 Author: lorenz_b Date: 2013-09-03 15:16:21 +0000 (Tue, 03 Sep 2013) Log Message: ----------- Added annotation. Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/AnnotatedDocument.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/AnnotatedTextDocument.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Annotation.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotator.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/AnnotatedDocument.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/AnnotatedDocument.java 2013-09-03 14:43:56 UTC (rev 4032) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/AnnotatedDocument.java 2013-09-03 15:16:21 UTC (rev 4033) @@ -23,7 +23,7 @@ * Returns all annotations of the document. * @return */ - Set<Annotation> getAnnotations(); + Set<SemanticAnnotation> getAnnotations(); /** * Returns the annotation at the given position(offset) of given length. @@ -31,7 +31,7 @@ * @param length * @return */ - Annotation getAnnotation(int offset, int length); + SemanticAnnotation getAnnotation(int offset, int length); /** * Returns the number of occurrences of the given entity in this document. Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/AnnotatedTextDocument.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/AnnotatedTextDocument.java 2013-09-03 14:43:56 UTC (rev 4032) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/AnnotatedTextDocument.java 2013-09-03 15:16:21 UTC (rev 4033) @@ -15,16 +15,16 @@ public class AnnotatedTextDocument implements AnnotatedDocument{ private TextDocument document; - private Set<Annotation> annotations; + private Set<SemanticAnnotation> annotations; private Set<Entity> entities; - public AnnotatedTextDocument(TextDocument document, Set<Annotation> annotations) { + public AnnotatedTextDocument(TextDocument document, Set<SemanticAnnotation> annotations) { this.document = document; this.annotations = annotations; entities = new HashSet<Entity>(); - for (Annotation annotation : annotations) { + for (SemanticAnnotation annotation : annotations) { entities.add(annotation.getEntity()); } } @@ -57,7 +57,7 @@ * @see org.dllearner.algorithms.isle.index.AnnotatedDocument#getAnnotations() */ @Override - public Set<Annotation> getAnnotations() { + public Set<SemanticAnnotation> getAnnotations() { return annotations; } @@ -65,8 +65,8 @@ * @see org.dllearner.algorithms.isle.index.AnnotatedDocument#getAnnotation(int, int) */ @Override - public Annotation getAnnotation(int offset, int length) { - for (Annotation annotation : annotations) { + public SemanticAnnotation getAnnotation(int offset, int length) { + for (SemanticAnnotation annotation : annotations) { if(annotation.getOffset() == offset && annotation.getLength() == length){ return annotation; } @@ -80,7 +80,7 @@ @Override public int getEntityFrequency(Entity entity) { int cnt = 0; - for (Annotation annotation : annotations) { + for (SemanticAnnotation annotation : annotations) { if(annotation.getEntity().equals(entity)){ cnt++; } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Annotation.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Annotation.java 2013-09-03 14:43:56 UTC (rev 4032) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Annotation.java 2013-09-03 15:16:21 UTC (rev 4033) @@ -3,7 +3,6 @@ */ package org.dllearner.algorithms.isle.index; -import org.dllearner.core.owl.Entity; /** * @author Lorenz Buehmann @@ -12,13 +11,11 @@ public class Annotation { private Document getReferencedDocument; - private Entity entity; private int offset; private int length; - public Annotation(Document getReferencedDocument, Entity entity, int offset, int length) { + public Annotation(Document getReferencedDocument, int offset, int length) { this.getReferencedDocument = getReferencedDocument; - this.entity = entity; this.offset = offset; this.length = length; } @@ -27,10 +24,6 @@ return getReferencedDocument; } - public Entity getEntity() { - return entity; - } - public int getOffset() { return offset; } @@ -43,7 +36,6 @@ public int hashCode() { final int prime = 31; int result = 1; - result = prime * result + ((entity == null) ? 0 : entity.hashCode()); result = prime * result + ((getReferencedDocument == null) ? 0 : getReferencedDocument.hashCode()); result = prime * result + length; result = prime * result + offset; @@ -59,11 +51,6 @@ if (getClass() != obj.getClass()) return false; Annotation other = (Annotation) obj; - if (entity == null) { - if (other.entity != null) - return false; - } else if (!entity.equals(other.entity)) - return false; if (getReferencedDocument == null) { if (other.getReferencedDocument != null) return false; Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotator.java 2013-09-03 14:43:56 UTC (rev 4032) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotator.java 2013-09-03 15:16:21 UTC (rev 4033) @@ -7,7 +7,8 @@ * * @author Daniel Fleischhacker */ -public abstract class SemanticAnnotator { +public class SemanticAnnotator { + OWLOntology ontology; /** @@ -25,5 +26,7 @@ * @param document the document to annotate * @return the given document extended with annotations */ - public abstract AnnotatedDocument processDocument(Document document); + public AnnotatedDocument processDocument(Document document){ + return null; + } } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java 2013-09-03 14:43:56 UTC (rev 4032) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java 2013-09-03 15:16:21 UTC (rev 4033) @@ -43,7 +43,7 @@ */ @Override public Set<AnnotatedDocument> getDocuments(Entity entity) { - Set<Document> documents = new HashSet<Document>(); + Set<AnnotatedDocument> documents = new HashSet<AnnotatedDocument>(); Map<String, Double> relevantText = labelRetriever.getRelevantText(entity); for (Entry<String, Double> entry : relevantText.entrySet()) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <dfl...@us...> - 2013-09-03 14:43:59
|
Revision: 4032 http://sourceforge.net/p/dl-learner/code/4032 Author: dfleischhacker Date: 2013-09-03 14:43:56 +0000 (Tue, 03 Sep 2013) Log Message: ----------- Refactor SemanticAnnotator with predefined constructor Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotator.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotator.java 2013-09-03 14:32:34 UTC (rev 4031) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotator.java 2013-09-03 14:43:56 UTC (rev 4032) @@ -1,14 +1,29 @@ package org.dllearner.algorithms.isle.index; +import org.semanticweb.owlapi.model.OWLOntology; + /** * Provides methods to annotate documents. + * + * @author Daniel Fleischhacker */ -public interface SemanticAnnotator { +public abstract class SemanticAnnotator { + OWLOntology ontology; + /** + * Initialize this semantic annotator to use the entities from the provided ontology. + * + * @param ontology the ontology to use entities from + */ + public SemanticAnnotator(OWLOntology ontology) { + this.ontology = ontology; + } + + /** * Processes the given document and returns the annotated version of this document. * * @param document the document to annotate * @return the given document extended with annotations */ - AnnotatedDocument processDocument(Document document); + public abstract AnnotatedDocument processDocument(Document document); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <dfl...@us...> - 2013-09-03 14:32:38
|
Revision: 4031 http://sourceforge.net/p/dl-learner/code/4031 Author: dfleischhacker Date: 2013-09-03 14:32:34 +0000 (Tue, 03 Sep 2013) Log Message: ----------- Add method to process document to SemanticAnnotator Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotator.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotator.java 2013-09-03 14:28:00 UTC (rev 4030) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotator.java 2013-09-03 14:32:34 UTC (rev 4031) @@ -1,10 +1,14 @@ package org.dllearner.algorithms.isle.index; /** - * + * Provides methods to annotate documents. */ public interface SemanticAnnotator { /** - * Processes the given document and return + * Processes the given document and returns the annotated version of this document. + * + * @param document the document to annotate + * @return the given document extended with annotations */ + AnnotatedDocument processDocument(Document document); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2013-09-03 14:28:03
|
Revision: 4030 http://sourceforge.net/p/dl-learner/code/4030 Author: lorenz_b Date: 2013-09-03 14:28:00 +0000 (Tue, 03 Sep 2013) Log Message: ----------- Implemented AnnotatedDocument interface. Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java Added Paths: ----------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/AnnotatedTextDocument.java Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/AnnotatedTextDocument.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/AnnotatedTextDocument.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/AnnotatedTextDocument.java 2013-09-03 14:28:00 UTC (rev 4030) @@ -0,0 +1,91 @@ +/** + * + */ +package org.dllearner.algorithms.isle.index; + +import java.util.HashSet; +import java.util.Set; + +import org.dllearner.core.owl.Entity; + +/** + * @author Lorenz Buehmann + * + */ +public class AnnotatedTextDocument implements AnnotatedDocument{ + + private TextDocument document; + private Set<Annotation> annotations; + private Set<Entity> entities; + + + public AnnotatedTextDocument(TextDocument document, Set<Annotation> annotations) { + this.document = document; + this.annotations = annotations; + + entities = new HashSet<Entity>(); + for (Annotation annotation : annotations) { + entities.add(annotation.getEntity()); + } + } + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.index.Document#getContent() + */ + @Override + public String getContent() { + return document.getContent(); + } + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.index.Document#getRawContent() + */ + @Override + public String getRawContent() { + return document.getRawContent(); + } + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.index.AnnotatedDocument#getContainedEntities() + */ + @Override + public Set<Entity> getContainedEntities() { + return entities; + } + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.index.AnnotatedDocument#getAnnotations() + */ + @Override + public Set<Annotation> getAnnotations() { + return annotations; + } + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.index.AnnotatedDocument#getAnnotation(int, int) + */ + @Override + public Annotation getAnnotation(int offset, int length) { + for (Annotation annotation : annotations) { + if(annotation.getOffset() == offset && annotation.getLength() == length){ + return annotation; + } + } + return null; + } + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.index.AnnotatedDocument#getEntityFrequency(org.dllearner.core.owl.Entity) + */ + @Override + public int getEntityFrequency(Entity entity) { + int cnt = 0; + for (Annotation annotation : annotations) { + if(annotation.getEntity().equals(entity)){ + cnt++; + } + } + return cnt; + } + +} Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java 2013-09-03 14:19:50 UTC (rev 4029) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java 2013-09-03 14:28:00 UTC (rev 4030) @@ -3,6 +3,7 @@ */ package org.dllearner.algorithms.isle.index.semantic.simple; +import org.dllearner.algorithms.isle.index.AnnotatedDocument; import org.dllearner.algorithms.isle.index.Document; import org.dllearner.algorithms.isle.index.syntactic.SyntacticIndex; import org.dllearner.algorithms.isle.index.semantic.SemanticIndex; @@ -41,7 +42,7 @@ * @see org.dllearner.algorithms.isle.SemanticIndex#getDocuments(org.dllearner.core.owl.Entity) */ @Override - public Set<Document> getDocuments(Entity entity) { + public Set<AnnotatedDocument> getDocuments(Entity entity) { Set<Document> documents = new HashSet<Document>(); Map<String, Double> relevantText = labelRetriever.getRelevantText(entity); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <dfl...@us...> - 2013-09-03 14:19:53
|
Revision: 4029 http://sourceforge.net/p/dl-learner/code/4029 Author: dfleischhacker Date: 2013-09-03 14:19:50 +0000 (Tue, 03 Sep 2013) Log Message: ----------- Refactor AnnotatedDocument and SemanticIndex Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/AnnotatedDocument.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/AnnotatedDocument.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/AnnotatedDocument.java 2013-09-03 13:53:43 UTC (rev 4028) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/AnnotatedDocument.java 2013-09-03 14:19:50 UTC (rev 4029) @@ -11,7 +11,7 @@ * @author Lorenz Buehmann * */ -public interface AnnotatedDocument { +public interface AnnotatedDocument extends Document { /** * Returns a set of entities which are contained in the document. @@ -33,4 +33,11 @@ */ Annotation getAnnotation(int offset, int length); + /** + * Returns the number of occurrences of the given entity in this document. + * + * @param entity the entity to get frequency for + * @return number of occurrences of given entity in this document + */ + int getEntityFrequency(Entity entity); } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java 2013-09-03 13:53:43 UTC (rev 4028) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java 2013-09-03 14:19:50 UTC (rev 4029) @@ -1,5 +1,6 @@ package org.dllearner.algorithms.isle.index.semantic; +import org.dllearner.algorithms.isle.index.AnnotatedDocument; import org.dllearner.algorithms.isle.index.Document; import org.dllearner.core.owl.Entity; @@ -14,12 +15,12 @@ */ public interface SemanticIndex { /** - * Returns the set of documents which reference the given entity using one of its surface forms. + * Returns the set of annotated documents which reference the given entity using one of its surface forms. * * @param entity entity to retrieve documents * @return documents referencing given entity */ - public Set<Document> getDocuments(Entity entity); + public Set<AnnotatedDocument> getDocuments(Entity entity); /** * Returns the number of documents for the given entity. This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2013-09-03 13:53:45
|
Revision: 4028 http://sourceforge.net/p/dl-learner/code/4028 Author: lorenz_b Date: 2013-09-03 13:53:43 +0000 (Tue, 03 Sep 2013) Log Message: ----------- Added annotation class and interface for annotated document. Added Paths: ----------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/AnnotatedDocument.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Annotation.java Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/AnnotatedDocument.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/AnnotatedDocument.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/AnnotatedDocument.java 2013-09-03 13:53:43 UTC (rev 4028) @@ -0,0 +1,36 @@ +/** + * + */ +package org.dllearner.algorithms.isle.index; + +import java.util.Set; + +import org.dllearner.core.owl.Entity; + +/** + * @author Lorenz Buehmann + * + */ +public interface AnnotatedDocument { + + /** + * Returns a set of entities which are contained in the document. + * @return + */ + Set<Entity> getContainedEntities(); + + /** + * Returns all annotations of the document. + * @return + */ + Set<Annotation> getAnnotations(); + + /** + * Returns the annotation at the given position(offset) of given length. + * @param offset + * @param length + * @return + */ + Annotation getAnnotation(int offset, int length); + +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Annotation.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Annotation.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Annotation.java 2013-09-03 13:53:43 UTC (rev 4028) @@ -0,0 +1,81 @@ +/** + * + */ +package org.dllearner.algorithms.isle.index; + +import org.dllearner.core.owl.Entity; + +/** + * @author Lorenz Buehmann + * + */ +public class Annotation { + + private Document getReferencedDocument; + private Entity entity; + private int offset; + private int length; + + public Annotation(Document getReferencedDocument, Entity entity, int offset, int length) { + this.getReferencedDocument = getReferencedDocument; + this.entity = entity; + this.offset = offset; + this.length = length; + } + + public Document getGetReferencedDocument() { + return getReferencedDocument; + } + + public Entity getEntity() { + return entity; + } + + public int getOffset() { + return offset; + } + + public int getLength() { + return length; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((entity == null) ? 0 : entity.hashCode()); + result = prime * result + ((getReferencedDocument == null) ? 0 : getReferencedDocument.hashCode()); + result = prime * result + length; + result = prime * result + offset; + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + Annotation other = (Annotation) obj; + if (entity == null) { + if (other.entity != null) + return false; + } else if (!entity.equals(other.entity)) + return false; + if (getReferencedDocument == null) { + if (other.getReferencedDocument != null) + return false; + } else if (!getReferencedDocument.equals(other.getReferencedDocument)) + return false; + if (length != other.length) + return false; + if (offset != other.offset) + return false; + return true; + } + + + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2013-08-27 11:57:27
|
Revision: 4027 http://sourceforge.net/p/dl-learner/code/4027 Author: lorenz_b Date: 2013-08-27 11:57:25 +0000 (Tue, 27 Aug 2013) Log Message: ----------- Added new constructor. Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java Modified: trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java 2013-08-19 09:54:07 UTC (rev 4026) +++ trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java 2013-08-27 11:57:25 UTC (rev 4027) @@ -34,7 +34,6 @@ import java.util.TreeSet; import java.util.concurrent.TimeUnit; -import org.aksw.commons.util.strings.StringUtils; import org.aksw.jena_sparql_api.cache.core.QueryExecutionFactoryCacheEx; import org.aksw.jena_sparql_api.cache.extra.CacheCoreEx; import org.aksw.jena_sparql_api.cache.extra.CacheCoreH2; @@ -43,7 +42,6 @@ import org.aksw.jena_sparql_api.core.QueryExecutionFactory; import org.aksw.jena_sparql_api.http.QueryExecutionFactoryHttp; import org.aksw.jena_sparql_api.model.QueryExecutionFactoryModel; -import org.aksw.jena_sparql_api.pagination.core.QueryExecutionFactoryPaginated; import org.dllearner.core.ComponentAnn; import org.dllearner.core.IndividualReasoner; import org.dllearner.core.SchemaReasoner; @@ -145,7 +143,24 @@ qef = new QueryExecutionFactoryModel(((LocalModelBasedSparqlEndpointKS)ks).getModel()); } } + + public SPARQLReasoner(SparqlEndpointKS ks, CacheCoreEx cacheBackend) { + this.ks = ks; + classPopularityMap = new HashMap<NamedClass, Integer>(); + objectPropertyPopularityMap = new HashMap<ObjectProperty, Integer>(); + + if(ks.isRemote()){ + SparqlEndpoint endpoint = ks.getEndpoint(); + qef = new QueryExecutionFactoryHttp(endpoint.getURL().toString(), endpoint.getDefaultGraphURIs()); + CacheEx cacheFrontend = new CacheExImpl(cacheBackend); + qef = new QueryExecutionFactoryCacheEx(qef, cacheFrontend); +// qef = new QueryExecutionFactoryPaginated(qef, 10000); + } else { + qef = new QueryExecutionFactoryModel(((LocalModelBasedSparqlEndpointKS)ks).getModel()); + } + } + public SPARQLReasoner(SparqlEndpointKS ks, ExtractionDBCache cache) { this(ks, cache.getCacheDirectory()); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <dfl...@us...> - 2013-08-19 09:54:10
|
Revision: 4026 http://sourceforge.net/p/dl-learner/code/4026 Author: dfleischhacker Date: 2013-08-19 09:54:07 +0000 (Mon, 19 Aug 2013) Log Message: ----------- TR API: More restructuring of class structure Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/AbstractRelevanceMetric.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/PMIRelevanceMetric.java trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java Added Paths: ----------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotator.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndexFactory.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndexFactory.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/LuceneSyntacticIndex.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/OWLOntologyLuceneSyntacticIndexCreator.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/SyntacticIndex.java Removed Paths: ------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LuceneSyntacticIndex.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/OWLOntologyLuceneSyntacticIndexCreator.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticIndex.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticIndexCreator.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleSemanticIndex.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SyntacticIndex.java Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LuceneSyntacticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LuceneSyntacticIndex.java 2013-08-19 09:53:14 UTC (rev 4025) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LuceneSyntacticIndex.java 2013-08-19 09:54:07 UTC (rev 4026) @@ -1,99 +0,0 @@ -/** - * - */ -package org.dllearner.algorithms.isle.index; - -import org.apache.lucene.analysis.standard.StandardAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.index.DirectoryReader; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.queryparser.classic.ParseException; -import org.apache.lucene.queryparser.classic.QueryParser; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.TotalHitCountCollector; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.FSDirectory; -import org.apache.lucene.util.Version; - -import java.io.File; -import java.io.IOException; -import java.util.HashSet; -import java.util.Set; - -/** - * @author Lorenz Buehmann - * - */ -public class LuceneSyntacticIndex implements SyntacticIndex { - - private IndexSearcher searcher; - private QueryParser parser; - private IndexReader indexReader; - private String searchField; - - public LuceneSyntacticIndex(IndexReader indexReader, String searchField) throws Exception { - this.indexReader = indexReader; - this.searchField = searchField; - searcher = new IndexSearcher(indexReader); - StandardAnalyzer analyzer = new StandardAnalyzer( Version.LUCENE_43); - parser = new QueryParser( Version.LUCENE_43, searchField, analyzer ); - } - - public LuceneSyntacticIndex(Directory directory, String searchField) throws Exception { - this(DirectoryReader.open(directory), searchField); - } - - public LuceneSyntacticIndex(String indexDirectory, String searchField) throws Exception { - this(DirectoryReader.open(FSDirectory.open(new File(indexDirectory))), searchField); - } - - /* (non-Javadoc) - * @see org.dllearner.algorithms.isle.SyntacticIndex#getDocuments(java.lang.String) - */ - @Override - public Set<org.dllearner.algorithms.isle.index.Document> getDocuments(String searchString) { - Set<org.dllearner.algorithms.isle.index.Document> documents = new HashSet<org.dllearner.algorithms.isle.index.Document>(); - try { - Query query = parser.parse(searchString); - ScoreDoc[] result = searcher.search(query, getSize()).scoreDocs; - for (int i = 0; i < result.length; i++) { - Document doc = searcher.doc(result[i].doc); - documents.add(new TextDocument(doc.get(searchField))); - } - } catch (ParseException e) { - e.printStackTrace(); - } catch (IOException e) { - e.printStackTrace(); - } - return documents; - } - - /* (non-Javadoc) - * @see org.dllearner.algorithms.isle.SyntacticIndex#getSize() - */ - @Override - public int getSize() { - return indexReader.numDocs(); - } - - /* (non-Javadoc) - * @see org.dllearner.algorithms.isle.SyntacticIndex#count(java.lang.String) - */ - @Override - public int count(String searchString) { - try { - Query query = parser.parse(searchString); - TotalHitCountCollector results = new TotalHitCountCollector(); - searcher.search(query, results); - return results.getTotalHits(); - } catch (ParseException e) { - e.printStackTrace(); - } catch (IOException e) { - e.printStackTrace(); - } - return -1; - } - -} Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/OWLOntologyLuceneSyntacticIndexCreator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/OWLOntologyLuceneSyntacticIndexCreator.java 2013-08-19 09:53:14 UTC (rev 4025) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/OWLOntologyLuceneSyntacticIndexCreator.java 2013-08-19 09:54:07 UTC (rev 4026) @@ -1,94 +0,0 @@ -/** - * - */ -package org.dllearner.algorithms.isle.index; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.standard.StandardAnalyzer; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.FieldType; -import org.apache.lucene.document.StringField; -import org.apache.lucene.document.TextField; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.RAMDirectory; -import org.apache.lucene.util.Version; -import org.semanticweb.owlapi.model.*; -import org.semanticweb.owlapi.vocab.OWLRDFVocabulary; -import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl; - -import java.io.IOException; -import java.util.HashSet; -import java.util.Set; - -/** - * Creates a Lucene Index for the labels if classes and properties. - * @author Lorenz Buehmann - * - */ -public class OWLOntologyLuceneSyntacticIndexCreator { - - private Directory directory = new RAMDirectory(); - private OWLOntology ontology; - private Set<OWLEntity> schemaEntities; - - private OWLDataFactory df = new OWLDataFactoryImpl(); - private OWLAnnotationProperty annotationProperty = df.getOWLAnnotationProperty(OWLRDFVocabulary.RDFS_LABEL.getIRI()); - private String language = "en"; - private String searchField; - - public OWLOntologyLuceneSyntacticIndexCreator(OWLOntology ontology, OWLAnnotationProperty annotationProperty, String searchField) throws IOException { - this.ontology = ontology; - this.annotationProperty = annotationProperty; - this.searchField = searchField; - - schemaEntities = new HashSet<OWLEntity>(); - schemaEntities.addAll(ontology.getClassesInSignature()); - schemaEntities.addAll(ontology.getObjectPropertiesInSignature()); - schemaEntities.addAll(ontology.getDataPropertiesInSignature()); - } - - public SyntacticIndex buildIndex() throws Exception{ - Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43); - IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_43, analyzer); - IndexWriter writer = new IndexWriter(directory, indexWriterConfig); - System.out.println( "Creating index ..." ); - - Set<org.apache.lucene.document.Document> luceneDocuments = new HashSet<org.apache.lucene.document.Document>(); - FieldType stringType = new FieldType(StringField.TYPE_STORED); - stringType.setStoreTermVectors(false); - FieldType textType = new FieldType(TextField.TYPE_STORED); - textType.setStoreTermVectors(false); - - for (OWLEntity entity : schemaEntities) { - String label = null; - Set<OWLAnnotation> annotations = entity.getAnnotations(ontology, annotationProperty); - for (OWLAnnotation annotation : annotations) { - if (annotation.getValue() instanceof OWLLiteral) { - OWLLiteral val = (OWLLiteral) annotation.getValue(); - if (val.hasLang(language)) { - label = val.getLiteral(); - } - } - } - - if(label != null){ - org.apache.lucene.document.Document luceneDocument = new org.apache.lucene.document.Document(); - luceneDocument.add(new Field("uri", entity.toStringID(), stringType)); - luceneDocument.add(new Field(searchField, label, textType)); - luceneDocuments.add(luceneDocument); - } - - } - writer.addDocuments(luceneDocuments); - - System.out.println("Done."); - writer.close(); - - return new LuceneSyntacticIndex(directory, searchField); - } - - - -} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotator.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotator.java 2013-08-19 09:54:07 UTC (rev 4026) @@ -0,0 +1,10 @@ +package org.dllearner.algorithms.isle.index; + +/** + * + */ +public interface SemanticAnnotator { + /** + * Processes the given document and return + */ +} Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticIndex.java 2013-08-19 09:53:14 UTC (rev 4025) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticIndex.java 2013-08-19 09:54:07 UTC (rev 4026) @@ -1,37 +0,0 @@ -package org.dllearner.algorithms.isle.index; - -import org.dllearner.core.owl.Entity; - -import java.util.Set; - -/** - * Interface for an index which is able to resolve a given entity's URI to the set of documents containing - * this entity, i.e., documents which contain words disambiguated to the given entity. - * - * @author Lorenz Buehmann - * @author Daniel Fleischhacker - */ -public interface SemanticIndex { - /** - * Returns the set of documents which reference the given entity using one of its surface forms. - * - * @param entity entity to retrieve documents - * @return documents referencing given entity - */ - public Set<Document> getDocuments(Entity entity); - - /** - * Returns the number of documents for the given entity. - * - * @param entity entity to return number of referencing documents for - * @return number of documents for the given entity in this index - */ - public int count(Entity entity); - - /** - * Returns the total number of documents contained in the index. - * - * @return the total number of documents contained in the index - */ - public int getSize(); -} Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticIndexCreator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticIndexCreator.java 2013-08-19 09:53:14 UTC (rev 4025) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticIndexCreator.java 2013-08-19 09:54:07 UTC (rev 4026) @@ -1,22 +0,0 @@ -/** - * - */ -package org.dllearner.algorithms.isle.index; - -/** - * This gets a syntactic index and returns a semantic index by applying WSD etc. - * @author Lorenz Buehmann - * - */ -public class SemanticIndexCreator { - - private SyntacticIndex syntacticIndex; - - public SemanticIndexCreator(SyntacticIndex syntacticIndex) { - this.syntacticIndex = syntacticIndex; - } - - public SemanticIndex createSemanticIndex(){ - return null; - } -} Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleSemanticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleSemanticIndex.java 2013-08-19 09:53:14 UTC (rev 4025) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleSemanticIndex.java 2013-08-19 09:54:07 UTC (rev 4026) @@ -1,67 +0,0 @@ -/** - * - */ -package org.dllearner.algorithms.isle.index; - -import org.dllearner.algorithms.isle.textretrieval.RDFSLabelEntityTextRetriever; -import org.dllearner.core.owl.Entity; -import org.semanticweb.owlapi.model.OWLOntology; - -import java.util.HashSet; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Set; - -/** - * @author Lorenz Buehmann - * - */ -public class SimpleSemanticIndex implements SemanticIndex { - - private SyntacticIndex syntacticIndex; - private RDFSLabelEntityTextRetriever labelRetriever; - - /** - * - */ - public SimpleSemanticIndex(OWLOntology ontology, SyntacticIndex syntacticIndex) { - this.syntacticIndex = syntacticIndex; - labelRetriever = new RDFSLabelEntityTextRetriever(ontology); - } - - - /* (non-Javadoc) - * @see org.dllearner.algorithms.isle.SemanticIndex#getDocuments(org.dllearner.core.owl.Entity) - */ - @Override - public Set<Document> getDocuments(Entity entity) { - Set<Document> documents = new HashSet<Document>(); - Map<String, Double> relevantText = labelRetriever.getRelevantText(entity); - - for (Entry<String, Double> entry : relevantText.entrySet()) { - String label = entry.getKey(); - documents.addAll(syntacticIndex.getDocuments(label)); - } - - return documents; - } - - /* (non-Javadoc) - * @see org.dllearner.algorithms.isle.SemanticIndex#count(java.lang.String) - */ - @Override - public int count(Entity entity) { - return getDocuments(entity).size(); - } - - /* (non-Javadoc) - * @see org.dllearner.algorithms.isle.SemanticIndex#getSize() - */ - @Override - public int getSize() { - return syntacticIndex.getSize(); - } - - - -} Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SyntacticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SyntacticIndex.java 2013-08-19 09:53:14 UTC (rev 4025) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SyntacticIndex.java 2013-08-19 09:54:07 UTC (rev 4026) @@ -1,41 +0,0 @@ -/** - * - */ -package org.dllearner.algorithms.isle.index; - -import java.util.Set; - -/** - * Interface for a syntactic index, e.g., a basic string-based inverted index. - * - * @author Lorenz Buehmann - * @author Daniel Fleischhacker - */ -public interface SyntacticIndex { - - /** - * Returns a set of documents based on how the underlying index is processing the given - * search string. - * - * @param searchString query specifying the documents to retrieve - * @return set of documents retrieved based on the given query string - */ - Set<Document> getDocuments(String searchString); - - /** - * Returns the number of documents based on how the underlying index is processing the - * given search string. - * - * @param searchString query specifying the documents to include in the number of documents - * @return number of documents retrieved based on the given query string - */ - int count(String searchString); - - /** - * Returns the total number of documents contained in the index. - * - * @return the total number of documents contained in the index - */ - int getSize(); - -} Copied: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java (from rev 4025, trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticIndex.java) =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java 2013-08-19 09:54:07 UTC (rev 4026) @@ -0,0 +1,38 @@ +package org.dllearner.algorithms.isle.index.semantic; + +import org.dllearner.algorithms.isle.index.Document; +import org.dllearner.core.owl.Entity; + +import java.util.Set; + +/** + * Interface for an index which is able to resolve a given entity's URI to the set of documents containing + * this entity, i.e., documents which contain words disambiguated to the given entity. + * + * @author Lorenz Buehmann + * @author Daniel Fleischhacker + */ +public interface SemanticIndex { + /** + * Returns the set of documents which reference the given entity using one of its surface forms. + * + * @param entity entity to retrieve documents + * @return documents referencing given entity + */ + public Set<Document> getDocuments(Entity entity); + + /** + * Returns the number of documents for the given entity. + * + * @param entity entity to return number of referencing documents for + * @return number of documents for the given entity in this index + */ + public int count(Entity entity); + + /** + * Returns the total number of documents contained in the index. + * + * @return the total number of documents contained in the index + */ + public int getSize(); +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndexFactory.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndexFactory.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndexFactory.java 2013-08-19 09:54:07 UTC (rev 4026) @@ -0,0 +1,18 @@ +package org.dllearner.algorithms.isle.index.semantic; + +import java.io.File; + +/** + * Provides methods for creating semantic indexes. + * + * @author Daniel Fleischhacker + */ +public interface SemanticIndexFactory { + /** + * Returns a newly created semantic index for the collection of files contained in the given {@code directory}. + * + * @param inputDirectory directory containing files to create index from + * @return semantic index for the files in the given input directory + */ + public SemanticIndex createIndex(File inputDirectory); +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java 2013-08-19 09:54:07 UTC (rev 4026) @@ -0,0 +1,73 @@ +/** + * + */ +package org.dllearner.algorithms.isle.index.semantic.simple; + +import org.dllearner.algorithms.isle.index.Document; +import org.dllearner.algorithms.isle.index.syntactic.SyntacticIndex; +import org.dllearner.algorithms.isle.index.semantic.SemanticIndex; +import org.dllearner.algorithms.isle.textretrieval.RDFSLabelEntityTextRetriever; +import org.dllearner.core.owl.Entity; +import org.semanticweb.owlapi.model.OWLOntology; + +import java.util.HashSet; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; + +/** + * A semantic index which returns all documents which contain at least one of the labels assigned to a specific + * entity in a provided ontology. + * + * @author Lorenz Buehmann + */ +public class SimpleSemanticIndex implements SemanticIndex { + private SyntacticIndex syntacticIndex; + private RDFSLabelEntityTextRetriever labelRetriever; + + /** + * Initializes the semantic index to use {@code ontology} for finding all labels of an entity and + * {@code syntacticIndex} to query for documents containing these labels. + * + * @param ontology ontology to retrieve entity labels from + * @param syntacticIndex index to query for documents containing the labels + */ + public SimpleSemanticIndex(OWLOntology ontology, SyntacticIndex syntacticIndex) { + this.syntacticIndex = syntacticIndex; + labelRetriever = new RDFSLabelEntityTextRetriever(ontology); + } + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.SemanticIndex#getDocuments(org.dllearner.core.owl.Entity) + */ + @Override + public Set<Document> getDocuments(Entity entity) { + Set<Document> documents = new HashSet<Document>(); + Map<String, Double> relevantText = labelRetriever.getRelevantText(entity); + + for (Entry<String, Double> entry : relevantText.entrySet()) { + String label = entry.getKey(); + documents.addAll(syntacticIndex.getDocuments(label)); + } + + return documents; + } + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.SemanticIndex#count(java.lang.String) + */ + @Override + public int count(Entity entity) { + return getDocuments(entity).size(); + } + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.SemanticIndex#getSize() + */ + @Override + public int getSize() { + return syntacticIndex.getSize(); + } + + +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndexFactory.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndexFactory.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndexFactory.java 2013-08-19 09:54:07 UTC (rev 4026) @@ -0,0 +1,40 @@ +/** + * + */ +package org.dllearner.algorithms.isle.index.semantic.simple; + +import org.dllearner.algorithms.isle.index.syntactic.SyntacticIndex; +import org.dllearner.algorithms.isle.index.semantic.SemanticIndex; +import org.dllearner.algorithms.isle.index.semantic.SemanticIndexFactory; +import org.semanticweb.owlapi.model.OWLOntology; + +import java.io.File; + +/** + * This gets a syntactic index and returns a semantic index by applying WSD etc. + * + * @author Lorenz Buehmann + * @author Daniel Fleischhacker + */ +public class SimpleSemanticIndexFactory implements SemanticIndexFactory { + private OWLOntology ontology; + private SyntacticIndex syntacticIndex; + + /** + * Initializes a semantic index factory for creating simple semantic indexes. Simple semantic indexes use + * the labels assigned to an entity in {@code ontology} as its surface forms and return the all documents + * from the given syntactic index which contain at least one of these surface forms. + * + * @param syntacticIndex the syntactic index in which occurrences of the labels are searched + * @param ontology the ontology retrieve the entities' labels from + */ + public SimpleSemanticIndexFactory(SyntacticIndex syntacticIndex, OWLOntology ontology) { + this.syntacticIndex = syntacticIndex; + this.ontology = ontology; + } + + @Override + public SemanticIndex createIndex(File inputDirectory) { + return new SimpleSemanticIndex(ontology, syntacticIndex); + } +} Copied: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/LuceneSyntacticIndex.java (from rev 4025, trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LuceneSyntacticIndex.java) =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/LuceneSyntacticIndex.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/LuceneSyntacticIndex.java 2013-08-19 09:54:07 UTC (rev 4026) @@ -0,0 +1,100 @@ +/** + * + */ +package org.dllearner.algorithms.isle.index.syntactic; + +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.queryparser.classic.ParseException; +import org.apache.lucene.queryparser.classic.QueryParser; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TotalHitCountCollector; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.util.Version; +import org.dllearner.algorithms.isle.index.TextDocument; + +import java.io.File; +import java.io.IOException; +import java.util.HashSet; +import java.util.Set; + +/** + * @author Lorenz Buehmann + * + */ +public class LuceneSyntacticIndex implements SyntacticIndex { + + private IndexSearcher searcher; + private QueryParser parser; + private IndexReader indexReader; + private String searchField; + + public LuceneSyntacticIndex(IndexReader indexReader, String searchField) throws Exception { + this.indexReader = indexReader; + this.searchField = searchField; + searcher = new IndexSearcher(indexReader); + StandardAnalyzer analyzer = new StandardAnalyzer( Version.LUCENE_43); + parser = new QueryParser( Version.LUCENE_43, searchField, analyzer ); + } + + public LuceneSyntacticIndex(Directory directory, String searchField) throws Exception { + this(DirectoryReader.open(directory), searchField); + } + + public LuceneSyntacticIndex(String indexDirectory, String searchField) throws Exception { + this(DirectoryReader.open(FSDirectory.open(new File(indexDirectory))), searchField); + } + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.SyntacticIndex#getDocuments(java.lang.String) + */ + @Override + public Set<org.dllearner.algorithms.isle.index.Document> getDocuments(String searchString) { + Set<org.dllearner.algorithms.isle.index.Document> documents = new HashSet<org.dllearner.algorithms.isle.index.Document>(); + try { + Query query = parser.parse(searchString); + ScoreDoc[] result = searcher.search(query, getSize()).scoreDocs; + for (int i = 0; i < result.length; i++) { + Document doc = searcher.doc(result[i].doc); + documents.add(new TextDocument(doc.get(searchField))); + } + } catch (ParseException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } + return documents; + } + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.SyntacticIndex#getSize() + */ + @Override + public int getSize() { + return indexReader.numDocs(); + } + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.SyntacticIndex#count(java.lang.String) + */ + @Override + public int count(String searchString) { + try { + Query query = parser.parse(searchString); + TotalHitCountCollector results = new TotalHitCountCollector(); + searcher.search(query, results); + return results.getTotalHits(); + } catch (ParseException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } + return -1; + } + +} Copied: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/OWLOntologyLuceneSyntacticIndexCreator.java (from rev 4025, trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/OWLOntologyLuceneSyntacticIndexCreator.java) =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/OWLOntologyLuceneSyntacticIndexCreator.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/OWLOntologyLuceneSyntacticIndexCreator.java 2013-08-19 09:54:07 UTC (rev 4026) @@ -0,0 +1,94 @@ +/** + * + */ +package org.dllearner.algorithms.isle.index.syntactic; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.Version; +import org.semanticweb.owlapi.model.*; +import org.semanticweb.owlapi.vocab.OWLRDFVocabulary; +import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl; + +import java.io.IOException; +import java.util.HashSet; +import java.util.Set; + +/** + * Creates a Lucene Index for the labels if classes and properties. + * @author Lorenz Buehmann + * + */ +public class OWLOntologyLuceneSyntacticIndexCreator { + + private Directory directory = new RAMDirectory(); + private OWLOntology ontology; + private Set<OWLEntity> schemaEntities; + + private OWLDataFactory df = new OWLDataFactoryImpl(); + private OWLAnnotationProperty annotationProperty = df.getOWLAnnotationProperty(OWLRDFVocabulary.RDFS_LABEL.getIRI()); + private String language = "en"; + private String searchField; + + public OWLOntologyLuceneSyntacticIndexCreator(OWLOntology ontology, OWLAnnotationProperty annotationProperty, String searchField) throws IOException { + this.ontology = ontology; + this.annotationProperty = annotationProperty; + this.searchField = searchField; + + schemaEntities = new HashSet<OWLEntity>(); + schemaEntities.addAll(ontology.getClassesInSignature()); + schemaEntities.addAll(ontology.getObjectPropertiesInSignature()); + schemaEntities.addAll(ontology.getDataPropertiesInSignature()); + } + + public SyntacticIndex buildIndex() throws Exception{ + Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43); + IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_43, analyzer); + IndexWriter writer = new IndexWriter(directory, indexWriterConfig); + System.out.println( "Creating index ..." ); + + Set<org.apache.lucene.document.Document> luceneDocuments = new HashSet<org.apache.lucene.document.Document>(); + FieldType stringType = new FieldType(StringField.TYPE_STORED); + stringType.setStoreTermVectors(false); + FieldType textType = new FieldType(TextField.TYPE_STORED); + textType.setStoreTermVectors(false); + + for (OWLEntity entity : schemaEntities) { + String label = null; + Set<OWLAnnotation> annotations = entity.getAnnotations(ontology, annotationProperty); + for (OWLAnnotation annotation : annotations) { + if (annotation.getValue() instanceof OWLLiteral) { + OWLLiteral val = (OWLLiteral) annotation.getValue(); + if (val.hasLang(language)) { + label = val.getLiteral(); + } + } + } + + if(label != null){ + org.apache.lucene.document.Document luceneDocument = new org.apache.lucene.document.Document(); + luceneDocument.add(new Field("uri", entity.toStringID(), stringType)); + luceneDocument.add(new Field(searchField, label, textType)); + luceneDocuments.add(luceneDocument); + } + + } + writer.addDocuments(luceneDocuments); + + System.out.println("Done."); + writer.close(); + + return new LuceneSyntacticIndex(directory, searchField); + } + + + +} Copied: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/SyntacticIndex.java (from rev 4025, trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SyntacticIndex.java) =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/SyntacticIndex.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/SyntacticIndex.java 2013-08-19 09:54:07 UTC (rev 4026) @@ -0,0 +1,43 @@ +/** + * + */ +package org.dllearner.algorithms.isle.index.syntactic; + +import org.dllearner.algorithms.isle.index.Document; + +import java.util.Set; + +/** + * Interface for a syntactic index, e.g., a basic string-based inverted index. + * + * @author Lorenz Buehmann + * @author Daniel Fleischhacker + */ +public interface SyntacticIndex { + + /** + * Returns a set of documents based on how the underlying index is processing the given + * search string. + * + * @param searchString query specifying the documents to retrieve + * @return set of documents retrieved based on the given query string + */ + Set<Document> getDocuments(String searchString); + + /** + * Returns the number of documents based on how the underlying index is processing the + * given search string. + * + * @param searchString query specifying the documents to include in the number of documents + * @return number of documents retrieved based on the given query string + */ + int count(String searchString); + + /** + * Returns the total number of documents contained in the index. + * + * @return the total number of documents contained in the index + */ + int getSize(); + +} Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/AbstractRelevanceMetric.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/AbstractRelevanceMetric.java 2013-08-19 09:53:14 UTC (rev 4025) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/AbstractRelevanceMetric.java 2013-08-19 09:54:07 UTC (rev 4026) @@ -3,12 +3,12 @@ */ package org.dllearner.algorithms.isle.metrics; +import org.dllearner.algorithms.isle.index.semantic.SemanticIndex; +import org.semanticweb.owlapi.model.OWLEntity; + import java.util.HashMap; import java.util.Map; -import org.dllearner.algorithms.isle.index.SemanticIndex; -import org.semanticweb.owlapi.model.OWLEntity; - /** * @author Lorenz Buehmann * Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/PMIRelevanceMetric.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/PMIRelevanceMetric.java 2013-08-19 09:53:14 UTC (rev 4025) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/PMIRelevanceMetric.java 2013-08-19 09:54:07 UTC (rev 4026) @@ -3,12 +3,12 @@ */ package org.dllearner.algorithms.isle.metrics; -import java.util.Set; - -import org.dllearner.algorithms.isle.index.SemanticIndex; +import com.google.common.collect.Sets; +import org.dllearner.algorithms.isle.index.Document; +import org.dllearner.algorithms.isle.index.semantic.SemanticIndex; import org.dllearner.core.owl.Entity; -import com.google.common.collect.Sets; +import java.util.Set; /** * @author Lorenz Buehmann @@ -22,9 +22,9 @@ @Override public double getRelevance(Entity entityA, Entity entityB){ - Set<String> documentsA = index.getDocuments(entityA); - Set<String> documentsB = index.getDocuments(entityB); - Set<String> documentsAB = Sets.intersection(documentsA, documentsB); + Set<Document> documentsA = index.getDocuments(entityA); + Set<Document> documentsB = index.getDocuments(entityB); + Set<Document> documentsAB = Sets.intersection(documentsA, documentsB); int nrOfDocuments = index.getSize(); double dPClass = nrOfDocuments == 0 ? 0 : ((double) documentsA.size() / (double) nrOfDocuments); Modified: trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java =================================================================== --- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java 2013-08-19 09:53:14 UTC (rev 4025) +++ trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java 2013-08-19 09:54:07 UTC (rev 4026) @@ -3,13 +3,11 @@ */ package org.dllearner.algorithms.isle; -import java.io.File; -import java.util.Map; - -import org.dllearner.algorithms.isle.index.OWLOntologyLuceneSyntacticIndexCreator; -import org.dllearner.algorithms.isle.index.SemanticIndex; -import org.dllearner.algorithms.isle.index.SimpleSemanticIndex; -import org.dllearner.algorithms.isle.index.SyntacticIndex; +import com.google.common.base.Joiner; +import org.dllearner.algorithms.isle.index.semantic.SemanticIndex; +import org.dllearner.algorithms.isle.index.semantic.simple.SimpleSemanticIndex; +import org.dllearner.algorithms.isle.index.syntactic.OWLOntologyLuceneSyntacticIndexCreator; +import org.dllearner.algorithms.isle.index.syntactic.SyntacticIndex; import org.dllearner.algorithms.isle.metrics.PMIRelevanceMetric; import org.dllearner.algorithms.isle.metrics.RelevanceMetric; import org.dllearner.algorithms.isle.metrics.RelevanceUtils; @@ -28,10 +26,10 @@ import org.semanticweb.owlapi.model.OWLDataFactory; import org.semanticweb.owlapi.model.OWLOntology; import org.semanticweb.owlapi.model.OWLOntologyManager; - import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl; -import com.google.common.base.Joiner; +import java.io.File; +import java.util.Map; /** * @author Lorenz Buehmann This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <dfl...@us...> - 2013-08-19 09:53:17
|
Revision: 4025 http://sourceforge.net/p/dl-learner/code/4025 Author: dfleischhacker Date: 2013-08-19 09:53:14 +0000 (Mon, 19 Aug 2013) Log Message: ----------- TR API: Add equals and hashCode to TextDocument Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TextDocument.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TextDocument.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TextDocument.java 2013-08-19 09:52:57 UTC (rev 4024) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TextDocument.java 2013-08-19 09:53:14 UTC (rev 4025) @@ -8,6 +8,12 @@ public class TextDocument implements Document { private String content; + + /** + * Initializes a text document with the given content. + * + * @param content content of this text document + */ public TextDocument(String content) { this.content = content; } @@ -26,4 +32,27 @@ public String getRawContent() { return content; } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + + TextDocument that = (TextDocument) o; + + if (!content.equals(that.content)) { + return false; + } + + return true; + } + + @Override + public int hashCode() { + return content.hashCode(); + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <dfl...@us...> - 2013-08-19 09:53:00
|
Revision: 4024 http://sourceforge.net/p/dl-learner/code/4024 Author: dfleischhacker Date: 2013-08-19 09:52:57 +0000 (Mon, 19 Aug 2013) Log Message: ----------- TR API: Document instead of String for documents Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LuceneSyntacticIndex.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/OWLOntologyLuceneSyntacticIndexCreator.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticIndex.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleSemanticIndex.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SyntacticIndex.java Added Paths: ----------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Document.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TextDocument.java Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Document.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Document.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Document.java 2013-08-19 09:52:57 UTC (rev 4024) @@ -0,0 +1,24 @@ +package org.dllearner.algorithms.isle.index; + +/** + * Interface for classes representing documents. + * + * @author Daniel Fleischhacker + */ +public interface Document { + /** + * Returns the cleaned content of this document represented as a string. This returns the cleaned content, + * thus markup and other structure is removed. The raw content can be retrieved using {@link #getRawContent}. + * Methods for retrieving more specialized content formats might be implemented by the actual implementations. + * + * @return this document's text content + */ + public String getContent(); + + /** + * Returns the uncleaned content, i.e., as originally retrieved, of this document represented as string. + * + * @return uncleaned content of this document + */ + public String getRawContent(); +} Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LuceneSyntacticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LuceneSyntacticIndex.java 2013-08-15 09:42:17 UTC (rev 4023) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LuceneSyntacticIndex.java 2013-08-19 09:52:57 UTC (rev 4024) @@ -3,11 +3,6 @@ */ package org.dllearner.algorithms.isle.index; -import java.io.File; -import java.io.IOException; -import java.util.HashSet; -import java.util.Set; - import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.DirectoryReader; @@ -22,6 +17,11 @@ import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; +import java.io.File; +import java.io.IOException; +import java.util.HashSet; +import java.util.Set; + /** * @author Lorenz Buehmann * @@ -41,26 +41,26 @@ parser = new QueryParser( Version.LUCENE_43, searchField, analyzer ); } - public LuceneSyntacticIndex(Directory directory, String seachField) throws Exception { - this(DirectoryReader.open(directory), seachField); + public LuceneSyntacticIndex(Directory directory, String searchField) throws Exception { + this(DirectoryReader.open(directory), searchField); } - public LuceneSyntacticIndex(String indexDirectory, String seachField) throws Exception { - this(DirectoryReader.open(FSDirectory.open(new File(indexDirectory))), seachField); + public LuceneSyntacticIndex(String indexDirectory, String searchField) throws Exception { + this(DirectoryReader.open(FSDirectory.open(new File(indexDirectory))), searchField); } /* (non-Javadoc) * @see org.dllearner.algorithms.isle.SyntacticIndex#getDocuments(java.lang.String) */ @Override - public Set<String> getDocuments(String searchString) { - Set<String> documents = new HashSet<String>(); + public Set<org.dllearner.algorithms.isle.index.Document> getDocuments(String searchString) { + Set<org.dllearner.algorithms.isle.index.Document> documents = new HashSet<org.dllearner.algorithms.isle.index.Document>(); try { Query query = parser.parse(searchString); ScoreDoc[] result = searcher.search(query, getSize()).scoreDocs; for (int i = 0; i < result.length; i++) { Document doc = searcher.doc(result[i].doc); - documents.add(doc.get(searchField)); + documents.add(new TextDocument(doc.get(searchField))); } } catch (ParseException e) { e.printStackTrace(); Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/OWLOntologyLuceneSyntacticIndexCreator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/OWLOntologyLuceneSyntacticIndexCreator.java 2013-08-15 09:42:17 UTC (rev 4023) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/OWLOntologyLuceneSyntacticIndexCreator.java 2013-08-19 09:52:57 UTC (rev 4024) @@ -3,13 +3,8 @@ */ package org.dllearner.algorithms.isle.index; -import java.io.IOException; -import java.util.HashSet; -import java.util.Set; - import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; -import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.StringField; @@ -19,16 +14,14 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.Version; -import org.semanticweb.owlapi.model.OWLAnnotation; -import org.semanticweb.owlapi.model.OWLAnnotationProperty; -import org.semanticweb.owlapi.model.OWLDataFactory; -import org.semanticweb.owlapi.model.OWLEntity; -import org.semanticweb.owlapi.model.OWLLiteral; -import org.semanticweb.owlapi.model.OWLOntology; +import org.semanticweb.owlapi.model.*; import org.semanticweb.owlapi.vocab.OWLRDFVocabulary; - import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl; +import java.io.IOException; +import java.util.HashSet; +import java.util.Set; + /** * Creates a Lucene Index for the labels if classes and properties. * @author Lorenz Buehmann @@ -61,8 +54,8 @@ IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_43, analyzer); IndexWriter writer = new IndexWriter(directory, indexWriterConfig); System.out.println( "Creating index ..." ); - - Set<Document> luceneDocuments = new HashSet<Document>(); + + Set<org.apache.lucene.document.Document> luceneDocuments = new HashSet<org.apache.lucene.document.Document>(); FieldType stringType = new FieldType(StringField.TYPE_STORED); stringType.setStoreTermVectors(false); FieldType textType = new FieldType(TextField.TYPE_STORED); @@ -81,7 +74,7 @@ } if(label != null){ - Document luceneDocument = new Document(); + org.apache.lucene.document.Document luceneDocument = new org.apache.lucene.document.Document(); luceneDocument.add(new Field("uri", entity.toStringID(), stringType)); luceneDocument.add(new Field(searchField, label, textType)); luceneDocuments.add(luceneDocument); Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticIndex.java 2013-08-15 09:42:17 UTC (rev 4023) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticIndex.java 2013-08-19 09:52:57 UTC (rev 4024) @@ -1,35 +1,37 @@ -/** - * - */ package org.dllearner.algorithms.isle.index; +import org.dllearner.core.owl.Entity; + import java.util.Set; -import org.dllearner.core.owl.Entity; - /** - * This class + * Interface for an index which is able to resolve a given entity's URI to the set of documents containing + * this entity, i.e., documents which contain words disambiguated to the given entity. + * * @author Lorenz Buehmann - * + * @author Daniel Fleischhacker */ public interface SemanticIndex { + /** + * Returns the set of documents which reference the given entity using one of its surface forms. + * + * @param entity entity to retrieve documents + * @return documents referencing given entity + */ + public Set<Document> getDocuments(Entity entity); - /** - * This method returns a set of documents for the given entity. - * @param entity - * @return - */ - Set<String> getDocuments(Entity entity); - /** - * This method returns the number of documents for the given entity. - * @param entity - * @return - */ - int count(Entity entity); - /** - * This methods returns the total number of documents contained in the index. - * @return the total number of documents contained in the index - */ - int getSize(); + /** + * Returns the number of documents for the given entity. + * + * @param entity entity to return number of referencing documents for + * @return number of documents for the given entity in this index + */ + public int count(Entity entity); + /** + * Returns the total number of documents contained in the index. + * + * @return the total number of documents contained in the index + */ + public int getSize(); } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleSemanticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleSemanticIndex.java 2013-08-15 09:42:17 UTC (rev 4023) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleSemanticIndex.java 2013-08-19 09:52:57 UTC (rev 4024) @@ -3,20 +3,20 @@ */ package org.dllearner.algorithms.isle.index; +import org.dllearner.algorithms.isle.textretrieval.RDFSLabelEntityTextRetriever; +import org.dllearner.core.owl.Entity; +import org.semanticweb.owlapi.model.OWLOntology; + import java.util.HashSet; import java.util.Map; import java.util.Map.Entry; import java.util.Set; -import org.dllearner.algorithms.isle.textretrieval.RDFSLabelEntityTextRetriever; -import org.dllearner.core.owl.Entity; -import org.semanticweb.owlapi.model.OWLOntology; - /** * @author Lorenz Buehmann * */ -public class SimpleSemanticIndex implements SemanticIndex{ +public class SimpleSemanticIndex implements SemanticIndex { private SyntacticIndex syntacticIndex; private RDFSLabelEntityTextRetriever labelRetriever; @@ -34,8 +34,8 @@ * @see org.dllearner.algorithms.isle.SemanticIndex#getDocuments(org.dllearner.core.owl.Entity) */ @Override - public Set<String> getDocuments(Entity entity) { - Set<String> documents = new HashSet<String>(); + public Set<Document> getDocuments(Entity entity) { + Set<Document> documents = new HashSet<Document>(); Map<String, Double> relevantText = labelRetriever.getRelevantText(entity); for (Entry<String, Double> entry : relevantText.entrySet()) { Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SyntacticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SyntacticIndex.java 2013-08-15 09:42:17 UTC (rev 4023) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SyntacticIndex.java 2013-08-19 09:52:57 UTC (rev 4024) @@ -1,32 +1,41 @@ /** - * + * */ package org.dllearner.algorithms.isle.index; import java.util.Set; /** + * Interface for a syntactic index, e.g., a basic string-based inverted index. + * * @author Lorenz Buehmann - * + * @author Daniel Fleischhacker */ public interface SyntacticIndex { - /** - * This method returns a set of documents based on how the underlying index is processing the given search string. - * @param searchString - * @return - */ - Set<String> getDocuments(String searchString); - /** - * This method returns the number of documents based on how the underlying index is processing the given search string. - * @param searchString - * @return - */ - int count(String searchString); - /** - * This methods returns the total number of documents contained in the index. - * @return the total number of documents contained in the index - */ - int getSize(); - + /** + * Returns a set of documents based on how the underlying index is processing the given + * search string. + * + * @param searchString query specifying the documents to retrieve + * @return set of documents retrieved based on the given query string + */ + Set<Document> getDocuments(String searchString); + + /** + * Returns the number of documents based on how the underlying index is processing the + * given search string. + * + * @param searchString query specifying the documents to include in the number of documents + * @return number of documents retrieved based on the given query string + */ + int count(String searchString); + + /** + * Returns the total number of documents contained in the index. + * + * @return the total number of documents contained in the index + */ + int getSize(); + } Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TextDocument.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TextDocument.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TextDocument.java 2013-08-19 09:52:57 UTC (rev 4024) @@ -0,0 +1,29 @@ +package org.dllearner.algorithms.isle.index; + +/** + * A simple text document without further formatting or markup. + * + * @author Daniel Fleischhacker + */ +public class TextDocument implements Document { + private String content; + + public TextDocument(String content) { + this.content = content; + } + + @Override + public String getContent() { + return content; + } + + /** + * The text content of this document. Returns the same data as {@link #getContent()}. + * + * @return text content of this document + */ + @Override + public String getRawContent() { + return content; + } +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2013-08-15 09:42:20
|
Revision: 4023 http://sourceforge.net/p/dl-learner/code/4023 Author: lorenz_b Date: 2013-08-15 09:42:17 +0000 (Thu, 15 Aug 2013) Log Message: ----------- Added convenience method to get axioms above threshold. Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/core/AbstractAxiomLearningAlgorithm.java Modified: trunk/components-core/src/main/java/org/dllearner/core/AbstractAxiomLearningAlgorithm.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/core/AbstractAxiomLearningAlgorithm.java 2013-07-29 08:12:30 UTC (rev 4022) +++ trunk/components-core/src/main/java/org/dllearner/core/AbstractAxiomLearningAlgorithm.java 2013-08-15 09:42:17 UTC (rev 4023) @@ -220,6 +220,14 @@ return bestAxioms; } + public List<Axiom> getCurrentlyBestAxioms(double accuracyThreshold) { + List<Axiom> bestAxioms = new ArrayList<Axiom>(); + for(EvaluatedAxiom evAx : getCurrentlyBestEvaluatedAxioms(accuracyThreshold)){ + bestAxioms.add(evAx.getAxiom()); + } + return bestAxioms; + } + public List<EvaluatedAxiom> getCurrentlyBestEvaluatedAxioms() { return currentlyBestAxioms; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2013-07-29 08:12:38
|
Revision: 4022 http://sourceforge.net/p/dl-learner/code/4022 Author: lorenz_b Date: 2013-07-29 08:12:30 +0000 (Mon, 29 Jul 2013) Log Message: ----------- Cont. ISLE Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LuceneSyntacticIndex.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleSemanticIndex.java trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/NoiseSensitiveLGG.java trunk/components-core/src/main/java/org/dllearner/kb/sparql/QueryEngineHTTP.java trunk/components-core/src/main/java/org/dllearner/utilities/owl/OWLClassExpressionToSPARQLConverter.java trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LuceneSyntacticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LuceneSyntacticIndex.java 2013-07-17 11:44:41 UTC (rev 4021) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LuceneSyntacticIndex.java 2013-07-29 08:12:30 UTC (rev 4022) @@ -67,7 +67,7 @@ } catch (IOException e) { e.printStackTrace(); } - return null; + return documents; } /* (non-Javadoc) Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleSemanticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleSemanticIndex.java 2013-07-17 11:44:41 UTC (rev 4021) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleSemanticIndex.java 2013-07-29 08:12:30 UTC (rev 4022) @@ -3,9 +3,14 @@ */ package org.dllearner.algorithms.isle.index; +import java.util.HashSet; +import java.util.Map; +import java.util.Map.Entry; import java.util.Set; +import org.dllearner.algorithms.isle.textretrieval.RDFSLabelEntityTextRetriever; import org.dllearner.core.owl.Entity; +import org.semanticweb.owlapi.model.OWLOntology; /** * @author Lorenz Buehmann @@ -13,13 +18,32 @@ */ public class SimpleSemanticIndex implements SemanticIndex{ + private SyntacticIndex syntacticIndex; + private RDFSLabelEntityTextRetriever labelRetriever; + /** + * + */ + public SimpleSemanticIndex(OWLOntology ontology, SyntacticIndex syntacticIndex) { + this.syntacticIndex = syntacticIndex; + labelRetriever = new RDFSLabelEntityTextRetriever(ontology); + } + + /* (non-Javadoc) * @see org.dllearner.algorithms.isle.SemanticIndex#getDocuments(org.dllearner.core.owl.Entity) */ @Override public Set<String> getDocuments(Entity entity) { - return null; + Set<String> documents = new HashSet<String>(); + Map<String, Double> relevantText = labelRetriever.getRelevantText(entity); + + for (Entry<String, Double> entry : relevantText.entrySet()) { + String label = entry.getKey(); + documents.addAll(syntacticIndex.getDocuments(label)); + } + + return documents; } /* (non-Javadoc) @@ -27,7 +51,7 @@ */ @Override public int count(Entity entity) { - return 0; + return getDocuments(entity).size(); } /* (non-Javadoc) @@ -35,7 +59,7 @@ */ @Override public int getSize() { - return 0; + return syntacticIndex.getSize(); } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/NoiseSensitiveLGG.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/NoiseSensitiveLGG.java 2013-07-17 11:44:41 UTC (rev 4021) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/NoiseSensitiveLGG.java 2013-07-29 08:12:30 UTC (rev 4022) @@ -8,6 +8,7 @@ import java.util.SortedSet; import java.util.TreeSet; +import org.apache.log4j.Logger; import org.dllearner.algorithms.qtl.datastructures.QueryTree; import org.dllearner.learningproblems.Heuristics; @@ -16,6 +17,9 @@ public class NoiseSensitiveLGG<N> { + + private static final Logger logger = Logger.getLogger(NoiseSensitiveLGG.class.getName()); + private LGGGenerator<N> lggGenerator = new LGGGeneratorImpl<N>(); private Queue<EvaluatedQueryTree<N>> todoList; @@ -29,7 +33,8 @@ Monitor lggMon = MonitorFactory.getTimeMonitor("lgg-mon"); init(trees); EvaluatedQueryTree<N> currentElement; - do{System.out.println("TODO list size: " + todoList.size()); + do{ + logger.trace("TODO list size: " + todoList.size()); //pick best element from todo list currentElement = todoList.poll(); for (QueryTree<N> example : currentElement.getUncoveredExamples()) { @@ -39,33 +44,36 @@ QueryTree<N> lgg = lggGenerator.getLGG(tree, example); lggMon.stop(); //compute examples which are not covered by LGG - Collection<QueryTree<N>> uncoveredExamples = new ArrayList<QueryTree<N>>(); - for (QueryTree<N> queryTree : trees) { - subMon.start(); - boolean subsumed = queryTree.isSubsumedBy(lgg); - subMon.stop(); - if(!subsumed){ - uncoveredExamples.add(queryTree); - } - } + Collection<QueryTree<N>> uncoveredExamples = getUncoveredTrees(lgg, trees); //compute score double score = Heuristics.getConfidenceInterval95WaldAverage(trees.size(), trees.size() - uncoveredExamples.size()); //add to todo list, if not already contained in todo list or solution list EvaluatedQueryTree<N> solution = new EvaluatedQueryTree<N>(lgg, uncoveredExamples, score); todo(solution); } - System.out.println("LGG time: " + lggMon.getTotal() + "ms"); - System.out.println("Avg. LGG time: " + lggMon.getAvg() + "ms"); - System.out.println("#LGG computations: " + lggMon.getHits()); - System.out.println("Subsumption test time: " + subMon.getTotal() + "ms"); - System.out.println("Avg. subsumption test time: " + subMon.getAvg() + "ms"); - System.out.println("#Subsumption tests: " + subMon.getHits()); solutions.add(currentElement); // todoList.remove(currentElement); } while(!terminationCriteriaSatisfied()); + logger.trace("LGG time: " + lggMon.getTotal() + "ms"); + logger.trace("Avg. LGG time: " + lggMon.getAvg() + "ms"); + logger.trace("#LGG computations: " + lggMon.getHits()); + logger.trace("Subsumption test time: " + subMon.getTotal() + "ms"); + logger.trace("Avg. subsumption test time: " + subMon.getAvg() + "ms"); + logger.trace("#Subsumption tests: " + subMon.getHits()); return new ArrayList<EvaluatedQueryTree<N>>(solutions); } + private Collection<QueryTree<N>> getUncoveredTrees(QueryTree<N> tree, List<QueryTree<N>> allTrees){ + Collection<QueryTree<N>> uncoveredTrees = new ArrayList<QueryTree<N>>(); + for (QueryTree<N> queryTree : allTrees) { + boolean subsumed = queryTree.isSubsumedBy(tree); + if(!subsumed){ + uncoveredTrees.add(queryTree); + } + } + return uncoveredTrees; + } + private void init(List<QueryTree<N>> trees){ todoList = new PriorityQueue<EvaluatedQueryTree<N>>(); solutions = new TreeSet<EvaluatedQueryTree<N>>(); @@ -76,9 +84,11 @@ for (QueryTree<N> queryTree : trees) {//System.out.println(queryTree.getStringRepresentation()); boolean distinct = true; for (QueryTree<N> otherTree : distinctTrees) { - if(queryTree.isSubsumedBy(otherTree)){ - distinct = false; - break; + if(!queryTree.equals(otherTree)){ + if(queryTree.isSameTreeAs(otherTree)){ + distinct = false; + break; + } } } if(distinct){ @@ -86,9 +96,8 @@ } } for (QueryTree<N> queryTree : distinctTrees) { - Collection<QueryTree<N>> uncoveredExamples = new ArrayList<QueryTree<N>>(distinctTrees); - uncoveredExamples.remove(queryTree); - double score = (trees.size() - uncoveredExamples.size()) / (double)trees.size(); + Collection<QueryTree<N>> uncoveredExamples = getUncoveredTrees(queryTree, trees); + double score = Heuristics.getConfidenceInterval95WaldAverage(trees.size(), trees.size() - uncoveredExamples.size()); todoList.add(new EvaluatedQueryTree<N>(queryTree, uncoveredExamples, score)); } } Modified: trunk/components-core/src/main/java/org/dllearner/kb/sparql/QueryEngineHTTP.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/kb/sparql/QueryEngineHTTP.java 2013-07-17 11:44:41 UTC (rev 4021) +++ trunk/components-core/src/main/java/org/dllearner/kb/sparql/QueryEngineHTTP.java 2013-07-29 08:12:30 UTC (rev 4022) @@ -268,7 +268,7 @@ private Model execModel(Model model) { HttpQuery httpQuery = makeHttpQuery() ; - httpQuery.setAccept(WebContent.contentTypeNTriplesAlt) ; + httpQuery.setAccept(WebContent.contentTypeTurtleAlt1) ; InputStream in = httpQuery.exec() ; //Don't assume the endpoint actually gives back the content type we asked for @@ -284,7 +284,7 @@ //Try to select language appropriately here based on the model content type Lang lang = WebContent.contentTypeToLang(actualContentType); if (! RDFLanguages.isTriples(lang)) throw new QueryException("Endpoint returned Content Type: " + actualContentType + " which is not a valid RDF Graph syntax"); - model.read(in, null, Lang.NTRIPLES.getName()) ; + model.read(in, null, Lang.TURTLE.getName()) ; return model ; } Modified: trunk/components-core/src/main/java/org/dllearner/utilities/owl/OWLClassExpressionToSPARQLConverter.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/utilities/owl/OWLClassExpressionToSPARQLConverter.java 2013-07-17 11:44:41 UTC (rev 4021) +++ trunk/components-core/src/main/java/org/dllearner/utilities/owl/OWLClassExpressionToSPARQLConverter.java 2013-07-29 08:12:30 UTC (rev 4022) @@ -82,6 +82,8 @@ private Set<? extends OWLEntity> variableEntities = new HashSet<OWLEntity>(); private VariablesMapping mapping; + private boolean ignoreGenericTypeStatements = true; + private OWLClassExpression expr; public OWLClassExpressionToSPARQLConverter(VariablesMapping mapping) { this.mapping = mapping; @@ -96,6 +98,7 @@ } public String convert(String rootVariable, OWLClassExpression expr){ + this.expr = expr; reset(); variables.push(rootVariable); expr.accept(this); @@ -261,7 +264,9 @@ @Override public void visit(OWLClass ce) { - sparql += triple(variables.peek(), "a", render(ce)); + if(ce.equals(expr) || (ignoreGenericTypeStatements && !ce.isOWLThing())){ + sparql += triple(variables.peek(), "a", render(ce)); + } } @Override @@ -577,7 +582,9 @@ @Override public void visit(OWLDatatype node) { - sparql += "FILTER(DATATYPE(" + variables.peek() + "=<" + node.getIRI().toString() + ">))"; + if(ignoreGenericTypeStatements && !node.isRDFPlainLiteral() && !node.isTopDatatype()){ + sparql += "FILTER(DATATYPE(" + variables.peek() + "=<" + node.getIRI().toString() + ">))"; + } } @Override Modified: trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java =================================================================== --- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java 2013-07-17 11:44:41 UTC (rev 4021) +++ trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java 2013-07-29 08:12:30 UTC (rev 4022) @@ -6,6 +6,15 @@ import java.io.File; import java.util.Map; +import org.dllearner.algorithms.isle.index.OWLOntologyLuceneSyntacticIndexCreator; +import org.dllearner.algorithms.isle.index.SemanticIndex; +import org.dllearner.algorithms.isle.index.SimpleSemanticIndex; +import org.dllearner.algorithms.isle.index.SyntacticIndex; +import org.dllearner.algorithms.isle.metrics.PMIRelevanceMetric; +import org.dllearner.algorithms.isle.metrics.RelevanceMetric; +import org.dllearner.algorithms.isle.metrics.RelevanceUtils; +import org.dllearner.algorithms.isle.textretrieval.EntityTextRetriever; +import org.dllearner.algorithms.isle.textretrieval.RDFSLabelEntityTextRetriever; import org.dllearner.core.AbstractReasonerComponent; import org.dllearner.core.KnowledgeSource; import org.dllearner.core.owl.Entity; @@ -16,9 +25,12 @@ import org.junit.Before; import org.junit.Test; import org.semanticweb.owlapi.apibinding.OWLManager; +import org.semanticweb.owlapi.model.OWLDataFactory; import org.semanticweb.owlapi.model.OWLOntology; import org.semanticweb.owlapi.model.OWLOntologyManager; +import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl; + import com.google.common.base.Joiner; /** @@ -29,10 +41,10 @@ private OWLOntologyManager manager; private OWLOntology ontology; + private OWLDataFactory df = new OWLDataFactoryImpl(); private NamedClass cls; private EntityTextRetriever textRetriever; - private LuceneSearcher searcher; - private Relevance relevance; + private RelevanceMetric relevance; private String searchField = "label"; /** @@ -43,9 +55,9 @@ ontology = manager.loadOntologyFromOntologyDocument(new File("../examples/isle/father_labeled.owl")); cls = new NamedClass("http://example.com/father#father"); textRetriever = new RDFSLabelEntityTextRetriever(ontology); - OWLOntologyLuceneIndex index = new OWLOntologyLuceneIndex(ontology, searchField); - searcher = new LuceneSearcher(index.getDirectory(), searchField); - relevance = new PMILuceneBasedRelevance(ontology, searcher, textRetriever); + SyntacticIndex syntacticIndex = new OWLOntologyLuceneSyntacticIndexCreator(ontology, df.getRDFSLabel(), searchField).buildIndex(); + SemanticIndex semanticIndex = new SimpleSemanticIndex(ontology, syntacticIndex); + relevance = new PMIRelevanceMetric(semanticIndex); } /** @@ -66,7 +78,7 @@ @Test public void testEntityRelevance() throws Exception { System.out.println("Relevant entities for entity " + cls + ":"); - Map<Entity, Double> entityRelevance = relevance.getEntityRelevance(cls); + Map<Entity, Double> entityRelevance = RelevanceUtils.getRelevantEntities(cls, ontology, relevance); System.out.println(Joiner.on("\n").join(entityRelevance.entrySet())); } @@ -80,7 +92,7 @@ lp.setClassToDescribe(cls); lp.init(); - Map<Entity, Double> entityRelevance = relevance.getEntityRelevance(cls); + Map<Entity, Double> entityRelevance = RelevanceUtils.getRelevantEntities(cls, ontology, relevance); NLPHeuristic heuristic = new NLPHeuristic(entityRelevance); ISLE isle = new ISLE(lp, reasoner); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2013-07-17 11:44:44
|
Revision: 4021 http://sourceforge.net/p/dl-learner/code/4021 Author: lorenz_b Date: 2013-07-17 11:44:41 +0000 (Wed, 17 Jul 2013) Log Message: ----------- Refactored ISLE components. Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityExtraction.java Added Paths: ----------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LuceneSyntacticIndex.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/OWLOntologyLuceneSyntacticIndexCreator.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticIndex.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticIndexCreator.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleSemanticIndex.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SyntacticIndex.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/AbstractRelevanceMetric.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/PMIRelevanceMetric.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceMetric.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceUtils.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/EntityTextRetriever.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/RDFSCommentEntityTextRetriever.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/RDFSLabelEntityTextRetriever.java Removed Paths: ------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/AnnotationEntityTextRetriever.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityTextRetriever.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneBasedRelevance.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneDocument.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneIndexer.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneSearcher.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/OWLOntologyLuceneIndex.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/PMILuceneBasedRelevance.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/RDFSCommentEntityTextRetriever.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/RDFSLabelEntityTextRetriever.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/Relevance.java Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/AnnotationEntityTextRetriever.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/AnnotationEntityTextRetriever.java 2013-07-16 05:25:41 UTC (rev 4020) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/AnnotationEntityTextRetriever.java 2013-07-17 11:44:41 UTC (rev 4021) @@ -1,93 +0,0 @@ -/** - * - */ -package org.dllearner.algorithms.isle; - -import java.util.HashMap; -import java.util.Map; -import java.util.Set; - -import org.dllearner.core.owl.Entity; -import org.dllearner.kb.OWLAPIOntology; -import org.dllearner.utilities.owl.OWLAPIConverter; -import org.semanticweb.owlapi.model.IRI; -import org.semanticweb.owlapi.model.OWLAnnotation; -import org.semanticweb.owlapi.model.OWLAnnotationProperty; -import org.semanticweb.owlapi.model.OWLEntity; -import org.semanticweb.owlapi.model.OWLLiteral; -import org.semanticweb.owlapi.model.OWLOntology; -import org.semanticweb.owlapi.model.OWLOntologyManager; -import org.semanticweb.owlapi.util.IRIShortFormProvider; -import org.semanticweb.owlapi.util.SimpleIRIShortFormProvider; - - -/** - * @author Lorenz Buehmann - * - */ -public class AnnotationEntityTextRetriever implements EntityTextRetriever{ - - private OWLOntology ontology; - private OWLOntologyManager manager; - - private String language = "en"; - private double weight = 1d; - - private boolean useShortFormFallback = true; - private IRIShortFormProvider sfp = new SimpleIRIShortFormProvider(); - - private OWLAnnotationProperty[] properties; - - public AnnotationEntityTextRetriever(OWLOntology ontology, OWLAnnotationProperty... properties) { - this.ontology = ontology; - this.properties = properties; - } - - public AnnotationEntityTextRetriever(OWLAPIOntology ontology, OWLAnnotationProperty... properties) { - this.ontology = ontology.createOWLOntology(manager); - } - - /** - * @param language the language to set - */ - public void setLanguage(String language) { - this.language = language; - } - - /** - * Whether to use the short form of the IRI as fallback, if no label is given. - * @param useShortFormFallback the useShortFormFallback to set - */ - public void setUseShortFormFallback(boolean useShortFormFallback) { - this.useShortFormFallback = useShortFormFallback; - } - - /* (non-Javadoc) - * @see org.dllearner.algorithms.isle.EntityTextRetriever#getRelevantText(org.dllearner.core.owl.Entity) - */ - @Override - public Map<String, Double> getRelevantText(Entity entity) { - Map<String, Double> textWithWeight = new HashMap<String, Double>(); - - OWLEntity e = OWLAPIConverter.getOWLAPIEntity(entity); - - for (OWLAnnotationProperty property : properties) { - Set<OWLAnnotation> annotations = e.getAnnotations(ontology, property); - for (OWLAnnotation annotation : annotations) { - if (annotation.getValue() instanceof OWLLiteral) { - OWLLiteral val = (OWLLiteral) annotation.getValue(); - if (val.hasLang(language)) { - String label = val.getLiteral(); - textWithWeight.put(label, weight); - } - } - } - } - - if(textWithWeight.isEmpty() && useShortFormFallback){ - textWithWeight.put(sfp.getShortForm(IRI.create(entity.getURI())), weight); - } - - return textWithWeight; - } -} Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityExtraction.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityExtraction.java 2013-07-16 05:25:41 UTC (rev 4020) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityExtraction.java 2013-07-17 11:44:41 UTC (rev 4021) @@ -4,6 +4,7 @@ package org.dllearner.algorithms.isle; import java.util.Map; +import java.util.Set; import org.dllearner.core.owl.Entity; @@ -17,7 +18,7 @@ * Extracts all entities contained in the working text with some confidence value. * @return */ - Map<Entity, Double> extractEntities(); + Map<Entity, Set<String>> extractEntities(); /** * Extracts all entities of the given <code>type</code> contained in the working text with some confidence value. Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityTextRetriever.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityTextRetriever.java 2013-07-16 05:25:41 UTC (rev 4020) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityTextRetriever.java 2013-07-17 11:44:41 UTC (rev 4021) @@ -1,48 +0,0 @@ -/** - * Copyright (C) 2007-2011, Jens Lehmann - * - * This file is part of DL-Learner. - * - * DL-Learner is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3 of the License, or - * (at your option) any later version. - * - * DL-Learner is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -package org.dllearner.algorithms.isle; - -import java.util.Map; - -import org.dllearner.core.owl.Entity; - -/** - * Interface for methods, which retrieve relevant texts given an entity - * in an ontology. An entity text retriever can do simple operations such - * as converting the URI into text or retrieving an rdfs:label, but could - * also search web pages for textual explanations of an entity. - * - * @author Jens Lehmann - * - */ -public interface EntityTextRetriever { - - /** - * The method retrieves a string or a set of strings, which is weighted by - * importance with respect to the entity. For instance, an rdfs:label of - * an entity can be given more weight than an rdfs:comment, which in turn - * can be more important than a description retrieved from a web page. - * - * @param entity The entity to handle. - * @return A weighted set of strings. For a value x, we need to have 0 <= x <= 1. - */ - public Map<String, Double> getRelevantText(Entity entity); - -} Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneBasedRelevance.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneBasedRelevance.java 2013-07-16 05:25:41 UTC (rev 4020) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneBasedRelevance.java 2013-07-17 11:44:41 UTC (rev 4021) @@ -1,145 +0,0 @@ -/** - * Copyright (C) 2007-2011, Jens Lehmann - * - * This file is part of DL-Learner. - * - * DL-Learner is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3 of the License, or - * (at your option) any later version. - * - * DL-Learner is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - - -package org.dllearner.algorithms.isle; - -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Set; - -import org.dllearner.core.owl.Entity; -import org.dllearner.utilities.owl.OWLAPIConverter; -import org.semanticweb.owlapi.model.OWLEntity; -import org.semanticweb.owlapi.model.OWLOntology; - - -public abstract class LuceneBasedRelevance implements Relevance{ - - private EntityTextRetriever textRetriever; - private LuceneSearcher searcher; - private OWLOntology ontology; - private Set<OWLEntity> entities; - -// public void printScores() throws Exception { -// for( OWLClass c: m_classes ) -// { -// Map<OWLEntity,Double> hmEntity2Score = getEntityRelevance(c); -// // normalization per class? -// hmEntity2Score = normalize( hmEntity2Score ); -// for( OWLEntity e : hmEntity2Score.keySet() ) -// { -// double dScore = hmEntity2Score.get(e); -// System.out.println( "P( "+ getLabel(c) +", "+ getLabel(e) +" ) = "+ dScore ); -// } -// } -// m_searcher.close(); -// } - - public LuceneBasedRelevance(OWLOntology ontology, LuceneSearcher searcher, EntityTextRetriever textRetriever) { - this.searcher = searcher; - this.ontology = ontology; - this.textRetriever = textRetriever; - - entities = new HashSet<OWLEntity>(); - entities.addAll(ontology.getClassesInSignature()); - entities.addAll(ontology.getObjectPropertiesInSignature()); - entities.addAll(ontology.getDataPropertiesInSignature()); - } - - public Map<OWLEntity,Double> normalizeMinMax( Map<OWLEntity,Double> hmEntity2Score ){ - Map<OWLEntity,Double> hmEntity2Norm = new HashMap<OWLEntity,Double>(); - double dMin = Double.MAX_VALUE; - Double dMax = Double.MIN_VALUE; - for( OWLEntity e : hmEntity2Score.keySet() ) - { - double dValue = hmEntity2Score.get(e); - if( dValue < dMin ){ - dMin = dValue; - } - else if( dValue > dMax ){ - dMax = dValue; - } - } - // System.out.println( "min="+ dMin +" max="+ dMax ); - for( OWLEntity e : hmEntity2Score.keySet() ) - { - double dValue = hmEntity2Score.get(e); - double dNorm = 0; - if( dMin == dMax ){ - dNorm = dValue; - } - else { - dNorm = ( dValue - dMin ) / ( dMax - dMin ); - } - hmEntity2Norm.put( e, dNorm ); - } - return hmEntity2Norm; - } - - @Override - public Map<Entity,Double> getEntityRelevance(Entity entity) throws Exception { - // computes relevance of entity for this class - // conditional probability: P(C,E)=f(C,E)/f(E) - // PMI(C,E)=log( P(C,E) / P(C) ) - Map<Entity, Double> hmEntity2Score = new HashMap<Entity, Double>(); - Map<String, Double> relevantText = textRetriever.getRelevantText(entity); - - for (Entry<String, Double> entry : relevantText.entrySet()) { - String text = entry.getKey(); - Double value = entry.getValue(); - - String sClass = text; - int nrOfDocumentsA = searcher.count(sClass); - int nrOfDocuments = searcher.indexSize(); - - for (OWLEntity otherEntity : entities) { - - Map<String, Double> otherRelevantText = textRetriever.getRelevantText(OWLAPIConverter - .getEntity(otherEntity)); - - for (Entry<String, Double> entry2 : otherRelevantText.entrySet()) { - String otherText = entry2.getKey(); - Double otherValue = entry2.getValue(); - - String sEntity = otherText; - int nrOfDocumentsB = searcher.count(sEntity); - int nrOfDocumentsAB = searcher.count(sClass + " AND " + sEntity); - // double dPEntity = (double)iEntity / (double)iAll; - - double score = computeScore(nrOfDocuments, nrOfDocumentsA, nrOfDocumentsB, nrOfDocumentsAB); - if (!Double.isNaN(score)){// && !Double.isInfinite(score)) { - hmEntity2Score.put(OWLAPIConverter.getEntity(otherEntity), score); - } - } - } - } - - return hmEntity2Score; - } - - /** - * Computes the score which is returned in {@link org.dllearner.algorithms.isle.LuceneBasedRelevance#getEntityRelevance} - * @return - */ - public abstract double computeScore(int nrOfDocuments, int nrOfDocumentsA, int nrOfDocumentsB, int nrOfDocumentsAB); - -} \ No newline at end of file Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneDocument.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneDocument.java 2013-07-16 05:25:41 UTC (rev 4020) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneDocument.java 2013-07-17 11:44:41 UTC (rev 4021) @@ -1,43 +0,0 @@ -/** - * Copyright (C) 2007-2011, Jens Lehmann - * - * This file is part of DL-Learner. - * - * DL-Learner is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3 of the License, or - * (at your option) any later version. - * - * DL-Learner is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - - -package org.dllearner.algorithms.isle; - -import java.io.File; -import java.io.FileReader; - -import org.apache.lucene.document.DateTools; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; - - -public class LuceneDocument { - - public static Document Document( File f ) throws java.io.FileNotFoundException { - Document doc = new Document(); - doc.add( new Field( "path", f.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED ) ); - doc.add( new Field( "modified", - DateTools.timeToString(f.lastModified(), DateTools.Resolution.MINUTE), - Field.Store.YES, Field.Index.NOT_ANALYZED)); - doc.add( new Field( "contents", new FileReader(f) ) ); - return doc; - } -} - Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneIndexer.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneIndexer.java 2013-07-16 05:25:41 UTC (rev 4020) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneIndexer.java 2013-07-17 11:44:41 UTC (rev 4021) @@ -1,100 +0,0 @@ -/** - * Copyright (C) 2007-2011, Jens Lehmann - * - * This file is part of DL-Learner. - * - * DL-Learner is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3 of the License, or - * (at your option) any later version. - * - * DL-Learner is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - - -package org.dllearner.algorithms.isle; - -import java.io.File; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.util.Date; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.standard.StandardAnalyzer; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.store.FSDirectory; -import org.apache.lucene.util.Version; - - -public class LuceneIndexer { - - static final File INDEX = new File( "index" ); - - public static void main( String[] args ) { - if( INDEX.exists() ) - { - System.out.println("<delete index!>"); - System.exit(1); - } -// final File docDir = new File( args[0] ); -// LuceneIndexer indexer = new LuceneIndexer( docDir ); - } - - @SuppressWarnings("deprecation") - public LuceneIndexer( File docDir ){ - System.out.println( "LuceneIndex: "+ docDir ); - Date start = new Date(); - try { - - Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43); - IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_43, analyzer); - IndexWriter writer = new IndexWriter( FSDirectory.open( INDEX ), indexWriterConfig); - System.out.println( "Creating index ..." ); - index( writer, docDir ); - System.out.println( "Optimizing index ..." ); - writer.close(); - Date end = new Date(); - System.out.println( end.getTime() - start.getTime() + " total milliseconds" ); - } - catch (IOException e) { - e.printStackTrace(); - } - } - - private void index( IndexWriter writer, File file ) throws IOException { - // System.out.println( "LuceneIndexer.index: "+ file ); - if( file.canRead() ) - { - if( file.isDirectory() ) - { - String[] files = file.list(); - if( files != null ) - { - for( int i = 0; i < files.length; i++ ) { - index( writer, new File( file, files[i] ) ); - } - } - } - else { - // System.out.println( "Indexer.index: adding " + file ); - try { - writer.addDocument( LuceneDocument.Document( file ) ); - } - catch (FileNotFoundException fnfe) { - fnfe.printStackTrace(); - } - } - } - else { - System.out.println( "<cannot read file!>" ); - } - } - -} Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneSearcher.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneSearcher.java 2013-07-16 05:25:41 UTC (rev 4020) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneSearcher.java 2013-07-17 11:44:41 UTC (rev 4021) @@ -1,176 +0,0 @@ -/** - * Copyright (C) 2007-2011, Jens Lehmann - * - * This file is part of DL-Learner. - * - * DL-Learner is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3 of the License, or - * (at your option) any later version. - * - * DL-Learner is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - - -package org.dllearner.algorithms.isle; - -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.standard.StandardAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.DirectoryReader; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.queryparser.classic.QueryParser; -import org.apache.lucene.search.Collector; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.Scorer; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.FSDirectory; -import org.apache.lucene.util.Version; - -public class LuceneSearcher { - - private String INDEX = "/home/me/DBpedia-Lucene-Index"; - private String FIELD = "short-abstract"; - - private IndexReader m_reader = null; - private IndexSearcher m_searcher = null; - private Analyzer m_analyzer = null; - private QueryParser m_parser = null; - - private Map<Document,Float> m_results = null; - - - public static void main( String[] args ) throws Exception { - String sQuery = args[0]; - LuceneSearcher searcher = new LuceneSearcher(); - List<Document> docs = searcher.search( sQuery ); - System.out.println( "\nquery='"+ sQuery +"' all="+ searcher.indexSize() +" hits="+ docs.size() ); -// for( Document doc : docs ) -// { -//// String sDoc = doc.toString(); -// float score = searcher.getScore( doc ); -// System.out.println( "score="+ score +" doc="+ doc ); -// } - } - - @SuppressWarnings("deprecation") - public LuceneSearcher() throws Exception { - m_reader = DirectoryReader.open( FSDirectory.open( new File( INDEX ) )); - m_searcher = new IndexSearcher( m_reader ); - m_analyzer = new StandardAnalyzer( Version.LUCENE_43); - m_parser = new QueryParser( Version.LUCENE_43, FIELD, m_analyzer ); - } - - public LuceneSearcher(IndexReader indexReader) throws Exception { - m_reader = indexReader; - m_searcher = new IndexSearcher( m_reader ); - m_analyzer = new StandardAnalyzer( Version.LUCENE_43); - m_parser = new QueryParser( Version.LUCENE_43, FIELD, m_analyzer ); - } - - public LuceneSearcher(Directory directory, String seachField) throws Exception { - this.FIELD = seachField; - m_reader = DirectoryReader.open(directory); - m_searcher = new IndexSearcher( m_reader ); - m_analyzer = new StandardAnalyzer( Version.LUCENE_43); - m_parser = new QueryParser( Version.LUCENE_43, FIELD, m_analyzer ); - } - - public LuceneSearcher(String indexDirectory) throws Exception { - m_reader = DirectoryReader.open(FSDirectory.open(new File(indexDirectory))); - m_searcher = new IndexSearcher( m_reader ); - m_analyzer = new StandardAnalyzer( Version.LUCENE_43); - m_parser = new QueryParser( Version.LUCENE_43, FIELD, m_analyzer ); - } - - public void close() throws Exception { - m_reader.close(); - } - - public int indexSize(){ - return m_reader.numDocs(); - } - - public List<Document> search( String sQuery ) throws Exception { - m_results = new HashMap<Document,Float>(); - Query query = m_parser.parse( sQuery ); - search( query ); - // m_reader.close(); - return getDocuments(); - } - - public int count( String sQuery ) throws Exception { - return search( sQuery ).size(); - } - - public List<Document> getDocuments(){ - List<Document> docs = new ArrayList<Document>(); - for( Document doc: m_results.keySet() ){ - docs.add( doc ); - } - Collections.sort( docs, new Comparator<Document>(){ - public int compare( Document d1, Document d2 ){ - float s1 = getScore( d1 ); - float s2 = getScore( d2 ); - if( s1 > s2 ) return -1; - else if( s1 < s2 ) return 1; - return 0; - } - @Override - public boolean equals( Object obj ){ - return false; - } - } ); - return docs; - } - - public float getScore( Document doc ){ - return m_results.get( doc ); - } - - private void search( Query query ) throws IOException { - @SuppressWarnings("unused") - Collector collector = new Collector() - { - private Scorer scorer; - private int docBase; - private Map<Document,Float> results = new HashMap<Document,Float>(); - - @Override - public void collect(int doc) throws IOException { - // System.out.println("doc=" + doc + docBase + " score=" + scorer.score()); - m_results.put( m_searcher.doc( doc ), scorer.score() ); - } - @Override - public boolean acceptsDocsOutOfOrder() { - return true; - } - @Override - public void setScorer(Scorer scorer) throws IOException { - this.scorer = scorer; - } - @Override - public void setNextReader(AtomicReaderContext context) throws IOException { - this.docBase = context.docBase; - } - }; - m_searcher.search( query, collector ); - } -} Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/OWLOntologyLuceneIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/OWLOntologyLuceneIndex.java 2013-07-16 05:25:41 UTC (rev 4020) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/OWLOntologyLuceneIndex.java 2013-07-17 11:44:41 UTC (rev 4021) @@ -1,142 +0,0 @@ -/** - * - */ -package org.dllearner.algorithms.isle; - -import java.io.IOException; -import java.util.HashSet; -import java.util.Set; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.standard.StandardAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.FieldType; -import org.apache.lucene.document.StringField; -import org.apache.lucene.document.TextField; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.FSDirectory; -import org.apache.lucene.store.RAMDirectory; -import org.apache.lucene.util.Version; -import org.semanticweb.owlapi.model.IRI; -import org.semanticweb.owlapi.model.OWLAnnotation; -import org.semanticweb.owlapi.model.OWLAnnotationProperty; -import org.semanticweb.owlapi.model.OWLDataFactory; -import org.semanticweb.owlapi.model.OWLEntity; -import org.semanticweb.owlapi.model.OWLLiteral; -import org.semanticweb.owlapi.model.OWLOntology; -import org.semanticweb.owlapi.vocab.OWLRDFVocabulary; - -import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl; - -/** - * Creates a Lucene Index for the labels if classes and properties. - * @author Lorenz Buehmann - * - */ -public class OWLOntologyLuceneIndex { - - private Directory directory = new RAMDirectory(); - private OWLOntology ontology; - private Set<OWLEntity> schemaEntities; - - private OWLDataFactory df = new OWLDataFactoryImpl(); - private OWLAnnotationProperty annotationProperty = df.getOWLAnnotationProperty(OWLRDFVocabulary.RDFS_LABEL.getIRI()); - private String language = "en"; - private String searchField; - - public OWLOntologyLuceneIndex(OWLOntology ontology, String searchField) throws IOException { - this.ontology = ontology; - this.searchField = searchField; - - schemaEntities = new HashSet<OWLEntity>(); - schemaEntities.addAll(ontology.getClassesInSignature()); - schemaEntities.addAll(ontology.getObjectPropertiesInSignature()); - schemaEntities.addAll(ontology.getDataPropertiesInSignature()); - - buildIndex(); - } - - public OWLOntologyLuceneIndex(OWLOntology ontology, OWLAnnotationProperty annotationProperty) throws IOException { - this.ontology = ontology; - this.annotationProperty = annotationProperty; - - schemaEntities = new HashSet<OWLEntity>(); - schemaEntities.addAll(ontology.getClassesInSignature()); - schemaEntities.addAll(ontology.getObjectPropertiesInSignature()); - schemaEntities.addAll(ontology.getDataPropertiesInSignature()); - - buildIndex(); - } - - /** - * @return the ontology - */ - public OWLOntology getOntology() { - return ontology; - } - - /** - * @return the directory - */ - public Directory getDirectory() { - return directory; - } - - /** - * @param annotationProperty the annotationProperty to set - */ - public void setAnnotationProperty(OWLAnnotationProperty annotationProperty) { - this.annotationProperty = annotationProperty; - } - - /** - * @param annotationProperty the annotationProperty to set - */ - public void setAnnotationProperty(String annotationPropertyIRI) { - this.annotationProperty = df.getOWLAnnotationProperty(IRI.create(annotationPropertyIRI)); - } - - public void buildIndex() throws IOException{ - Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43); - IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_43, analyzer); - IndexWriter writer = new IndexWriter(directory, indexWriterConfig); - System.out.println( "Creating index ..." ); - - Set<Document> luceneDocuments = new HashSet<Document>(); - FieldType stringType = new FieldType(StringField.TYPE_STORED); - stringType.setStoreTermVectors(false); - FieldType textType = new FieldType(TextField.TYPE_STORED); - textType.setStoreTermVectors(false); - - for (OWLEntity entity : schemaEntities) { - String label = null; - Set<OWLAnnotation> annotations = entity.getAnnotations(ontology, annotationProperty); - for (OWLAnnotation annotation : annotations) { - if (annotation.getValue() instanceof OWLLiteral) { - OWLLiteral val = (OWLLiteral) annotation.getValue(); - if (val.hasLang(language)) { - label = val.getLiteral(); - } - } - } - - if(label != null){ - Document luceneDocument = new Document(); - luceneDocument.add(new Field("uri", entity.toStringID(), stringType)); - luceneDocument.add(new Field(searchField, label, textType)); - luceneDocuments.add(luceneDocument); - } - - } - writer.addDocuments(luceneDocuments); - - System.out.println("Done."); - writer.close(); - } - - - -} Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/PMILuceneBasedRelevance.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/PMILuceneBasedRelevance.java 2013-07-16 05:25:41 UTC (rev 4020) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/PMILuceneBasedRelevance.java 2013-07-17 11:44:41 UTC (rev 4021) @@ -1,48 +0,0 @@ -/** - * Copyright (C) 2007-2011, Jens Lehmann - * - * This file is part of DL-Learner. - * - * DL-Learner is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3 of the License, or - * (at your option) any later version. - * - * DL-Learner is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - - -package org.dllearner.algorithms.isle; - -import org.semanticweb.owlapi.model.OWLOntology; - - -public class PMILuceneBasedRelevance extends LuceneBasedRelevance{ - - /** - * @param ontology - * @param searcher - * @param textRetriever - */ - public PMILuceneBasedRelevance(OWLOntology ontology, LuceneSearcher searcher, EntityTextRetriever textRetriever) { - super(ontology, searcher, textRetriever); - - } - - /* (non-Javadoc) - * @see org.dllearner.algorithms.isle.LuceneBasedRelevance#computeScore(int, int, int, int) - */ - @Override - public double computeScore(int nrOfDocuments, int nrOfDocumentsA, int nrOfDocumentsB, int nrOfDocumentsAB) { - double dPClass = nrOfDocuments == 0 ? 0 : ((double) nrOfDocumentsA / (double) nrOfDocuments); - double dPClassEntity = nrOfDocumentsB == 0 ? 0 : (double) nrOfDocumentsAB / (double) nrOfDocumentsB; - double pmi = Math.log(dPClassEntity / dPClass); - return pmi; - } -} \ No newline at end of file Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/RDFSCommentEntityTextRetriever.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/RDFSCommentEntityTextRetriever.java 2013-07-16 05:25:41 UTC (rev 4020) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/RDFSCommentEntityTextRetriever.java 2013-07-17 11:44:41 UTC (rev 4021) @@ -1,26 +0,0 @@ -/** - * - */ -package org.dllearner.algorithms.isle; - -import org.dllearner.kb.OWLAPIOntology; -import org.semanticweb.owlapi.model.OWLOntology; -import org.semanticweb.owlapi.vocab.OWLRDFVocabulary; - -import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl; - - -/** - * @author Lorenz Buehmann - * - */ -public class RDFSCommentEntityTextRetriever extends AnnotationEntityTextRetriever{ - - public RDFSCommentEntityTextRetriever(OWLOntology ontology) { - super(ontology, new OWLDataFactoryImpl().getOWLAnnotationProperty(OWLRDFVocabulary.RDFS_COMMENT.getIRI())); - } - - public RDFSCommentEntityTextRetriever(OWLAPIOntology ontology) { - super(ontology, new OWLDataFactoryImpl().getOWLAnnotationProperty(OWLRDFVocabulary.RDFS_COMMENT.getIRI())); - } -} Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/RDFSLabelEntityTextRetriever.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/RDFSLabelEntityTextRetriever.java 2013-07-16 05:25:41 UTC (rev 4020) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/RDFSLabelEntityTextRetriever.java 2013-07-17 11:44:41 UTC (rev 4021) @@ -1,26 +0,0 @@ -/** - * - */ -package org.dllearner.algorithms.isle; - -import org.dllearner.kb.OWLAPIOntology; -import org.semanticweb.owlapi.model.OWLOntology; -import org.semanticweb.owlapi.vocab.OWLRDFVocabulary; - -import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl; - - -/** - * @author Lorenz Buehmann - * - */ -public class RDFSLabelEntityTextRetriever extends AnnotationEntityTextRetriever{ - - public RDFSLabelEntityTextRetriever(OWLOntology ontology) { - super(ontology, new OWLDataFactoryImpl().getOWLAnnotationProperty(OWLRDFVocabulary.RDFS_LABEL.getIRI())); - } - - public RDFSLabelEntityTextRetriever(OWLAPIOntology ontology) { - super(ontology, new OWLDataFactoryImpl().getOWLAnnotationProperty(OWLRDFVocabulary.RDFS_LABEL.getIRI())); - } -} Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/Relevance.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/Relevance.java 2013-07-16 05:25:41 UTC (rev 4020) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/Relevance.java 2013-07-17 11:44:41 UTC (rev 4021) @@ -1,31 +0,0 @@ -/** - * Copyright (C) 2007-2011, Jens Lehmann - * - * This file is part of DL-Learner. - * - * DL-Learner is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3 of the License, or - * (at your option) any later version. - * - * DL-Learner is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - - -package org.dllearner.algorithms.isle; - -import java.util.Map; - -import org.dllearner.core.owl.Entity; - - -public interface Relevance { - - public Map<Entity,Double> getEntityRelevance(Entity entity) throws Exception; -} \ No newline at end of file Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LuceneSyntacticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LuceneSyntacticIndex.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LuceneSyntacticIndex.java 2013-07-17 11:44:41 UTC (rev 4021) @@ -0,0 +1,99 @@ +/** + * + */ +package org.dllearner.algorithms.isle.index; + +import java.io.File; +import java.io.IOException; +import java.util.HashSet; +import java.util.Set; + +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.queryparser.classic.ParseException; +import org.apache.lucene.queryparser.classic.QueryParser; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TotalHitCountCollector; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.util.Version; + +/** + * @author Lorenz Buehmann + * + */ +public class LuceneSyntacticIndex implements SyntacticIndex { + + private IndexSearcher searcher; + private QueryParser parser; + private IndexReader indexReader; + private String searchField; + + public LuceneSyntacticIndex(IndexReader indexReader, String searchField) throws Exception { + this.indexReader = indexReader; + this.searchField = searchField; + searcher = new IndexSearcher(indexReader); + StandardAnalyzer analyzer = new StandardAnalyzer( Version.LUCENE_43); + parser = new QueryParser( Version.LUCENE_43, searchField, analyzer ); + } + + public LuceneSyntacticIndex(Directory directory, String seachField) throws Exception { + this(DirectoryReader.open(directory), seachField); + } + + public LuceneSyntacticIndex(String indexDirectory, String seachField) throws Exception { + this(DirectoryReader.open(FSDirectory.open(new File(indexDirectory))), seachField); + } + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.SyntacticIndex#getDocuments(java.lang.String) + */ + @Override + public Set<String> getDocuments(String searchString) { + Set<String> documents = new HashSet<String>(); + try { + Query query = parser.parse(searchString); + ScoreDoc[] result = searcher.search(query, getSize()).scoreDocs; + for (int i = 0; i < result.length; i++) { + Document doc = searcher.doc(result[i].doc); + documents.add(doc.get(searchField)); + } + } catch (ParseException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } + return null; + } + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.SyntacticIndex#getSize() + */ + @Override + public int getSize() { + return indexReader.numDocs(); + } + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.SyntacticIndex#count(java.lang.String) + */ + @Override + public int count(String searchString) { + try { + Query query = parser.parse(searchString); + TotalHitCountCollector results = new TotalHitCountCollector(); + searcher.search(query, results); + return results.getTotalHits(); + } catch (ParseException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } + return -1; + } + +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/OWLOntologyLuceneSyntacticIndexCreator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/OWLOntologyLuceneSyntacticIndexCreator.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/OWLOntologyLuceneSyntacticIndexCreator.java 2013-07-17 11:44:41 UTC (rev 4021) @@ -0,0 +1,101 @@ +/** + * + */ +package org.dllearner.algorithms.isle.index; + +import java.io.IOException; +import java.util.HashSet; +import java.util.Set; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.Version; +import org.semanticweb.owlapi.model.OWLAnnotation; +import org.semanticweb.owlapi.model.OWLAnnotationProperty; +import org.semanticweb.owlapi.model.OWLDataFactory; +import org.semanticweb.owlapi.model.OWLEntity; +import org.semanticweb.owlapi.model.OWLLiteral; +import org.semanticweb.owlapi.model.OWLOntology; +import org.semanticweb.owlapi.vocab.OWLRDFVocabulary; + +import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl; + +/** + * Creates a Lucene Index for the labels if classes and properties. + * @author Lorenz Buehmann + * + */ +public class OWLOntologyLuceneSyntacticIndexCreator { + + private Directory directory = new RAMDirectory(); + private OWLOntology ontology; + private Set<OWLEntity> schemaEntities; + + private OWLDataFactory df = new OWLDataFactoryImpl(); + private OWLAnnotationProperty annotationProperty = df.getOWLAnnotationProperty(OWLRDFVocabulary.RDFS_LABEL.getIRI()); + private String language = "en"; + private String searchField; + + public OWLOntologyLuceneSyntacticIndexCreator(OWLOntology ontology, OWLAnnotationProperty annotationProperty, String searchField) throws IOException { + this.ontology = ontology; + this.annotationProperty = annotationProperty; + this.searchField = searchField; + + schemaEntities = new HashSet<OWLEntity>(); + schemaEntities.addAll(ontology.getClassesInSignature()); + schemaEntities.addAll(ontology.getObjectPropertiesInSignature()); + schemaEntities.addAll(ontology.getDataPropertiesInSignature()); + } + + public SyntacticIndex buildIndex() throws Exception{ + Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43); + IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_43, analyzer); + IndexWriter writer = new IndexWriter(directory, indexWriterConfig); + System.out.println( "Creating index ..." ); + + Set<Document> luceneDocuments = new HashSet<Document>(); + FieldType stringType = new FieldType(StringField.TYPE_STORED); + stringType.setStoreTermVectors(false); + FieldType textType = new FieldType(TextField.TYPE_STORED); + textType.setStoreTermVectors(false); + + for (OWLEntity entity : schemaEntities) { + String label = null; + Set<OWLAnnotation> annotations = entity.getAnnotations(ontology, annotationProperty); + for (OWLAnnotation annotation : annotations) { + if (annotation.getValue() instanceof OWLLiteral) { + OWLLiteral val = (OWLLiteral) annotation.getValue(); + if (val.hasLang(language)) { + label = val.getLiteral(); + } + } + } + + if(label != null){ + Document luceneDocument = new Document(); + luceneDocument.add(new Field("uri", entity.toStringID(), stringType)); + luceneDocument.add(new Field(searchField, label, textType)); + luceneDocuments.add(luceneDocument); + } + + } + writer.addDocuments(luceneDocuments); + + System.out.println("Done."); + writer.close(); + + return new LuceneSyntacticIndex(directory, searchField); + } + + + +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticIndex.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticIndex.java 2013-07-17 11:44:41 UTC (rev 4021) @@ -0,0 +1,35 @@ +/** + * + */ +package org.dllearner.algorithms.isle.index; + +import java.util.Set; + +import org.dllearner.core.owl.Entity; + +/** + * This class + * @author Lorenz Buehmann + * + */ +public interface SemanticIndex { + + /** + * This method returns a set of documents for the given entity. + * @param entity + * @return + */ + Set<String> getDocuments(Entity entity); + /** + * This method returns the number of documents for the given entity. + * @param entity + * @return + */ + int count(Entity entity); + /** + * This methods returns the total number of documents contained in the index. + * @return the total number of documents contained in the index + */ + int getSize(); + +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticIndexCreator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticIndexCreator.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticIndexCreator.java 2013-07-17 11:44:41 UTC (rev 4021) @@ -0,0 +1,22 @@ +/** + * + */ +package org.dllearner.algorithms.isle.index; + +/** + * This gets a syntactic index and returns a semantic index by applying WSD etc. + * @author Lorenz Buehmann + * + */ +public class SemanticIndexCreator { + + private SyntacticIndex syntacticIndex; + + public SemanticIndexCreator(SyntacticIndex syntacticIndex) { + this.syntacticIndex = syntacticIndex; + } + + public SemanticIndex createSemanticIndex(){ + return null; + } +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleSemanticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleSemanticIndex.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleSemanticIndex.java 2013-07-17 11:44:41 UTC (rev 4021) @@ -0,0 +1,43 @@ +/** + * + */ +package org.dllearner.algorithms.isle.index; + +import java.util.Set; + +import org.dllearner.core.owl.Entity; + +/** + * @author Lorenz Buehmann + * + */ +public class SimpleSemanticIndex implements SemanticIndex{ + + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.SemanticIndex#getDocuments(org.dllearner.core.owl.Entity) + */ + @Override + public Set<String> getDocuments(Entity entity) { + return null; + } + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.SemanticIndex#count(java.lang.String) + */ + @Override + public int count(Entity entity) { + return 0; + } + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.SemanticIndex#getSize() + */ + @Override + public int getSize() { + return 0; + } + + + +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SyntacticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SyntacticIndex.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SyntacticIndex.java 2013-07-17 11:44:41 UTC (rev 4021) @@ -0,0 +1,32 @@ +/** + * + */ +package org.dllearner.algorithms.isle.index; + +import java.util.Set; + +/** + * @author Lorenz Buehmann + * + */ +public interface SyntacticIndex { + + /** + * This method returns a set of documents based on how the underlying index is processing the given search string. + * @param searchString + * @return + */ + Set<String> getDocuments(String searchString); + /** + * This method returns the number of documents based on how the underlying index is processing the given search string. + * @param searchString + * @return + */ + int count(String searchString); + /** + * This methods returns the total number of documents contained in the index. + * @return the total number of documents contained in the index + */ + int getSize(); + +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/AbstractRelevanceMetric.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/AbstractRelevanceMetric.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/AbstractRelevanceMetric.java 2013-07-17 11:44:41 UTC (rev 4021) @@ -0,0 +1,54 @@ +/** + * + */ +package org.dllearner.algorithms.isle.metrics; + +import java.util.HashMap; +import java.util.Map; + +import org.dllearner.algorithms.isle.index.SemanticIndex; +import org.semanticweb.owlapi.model.OWLEntity; + +/** + * @author Lorenz Buehmann + * + */ +public abstract class AbstractRelevanceMetric implements RelevanceMetric { + + protected SemanticIndex index; + + public AbstractRelevanceMetric(SemanticIndex index) { + this.index = index; + } + + public Map<OWLEntity,Double> normalizeMinMax( Map<OWLEntity,Double> hmEntity2Score ){ + Map<OWLEntity,Double> hmEntity2Norm = new HashMap<OWLEntity,Double>(); + double dMin = Double.MAX_VALUE; + Double dMax = Double.MIN_VALUE; + for( OWLEntity e : hmEntity2Score.keySet() ) + { + double dValue = hmEntity2Score.get(e); + if( dValue < dMin ){ + dMin = dValue; + } + else if( dValue > dMax ){ + dMax = dValue; + } + } + // System.out.println( "min="+ dMin +" max="+ dMax ); + for( OWLEntity e : hmEntity2Score.keySet() ) + { + double dValue = hmEntity2Score.get(e); + double dNorm = 0; + if( dMin == dMax ){ + dNorm = dValue; + } + else { + dNorm = ( dValue - dMin ) / ( dMax - dMin ); + } + hmEntity2Norm.put( e, dNorm ); + } + return hmEntity2Norm; + } + +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/PMIRelevanceMetric.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/PMIRelevanceMetric.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/PMIRelevanceMetric.java 2013-07-17 11:44:41 UTC (rev 4021) @@ -0,0 +1,37 @@ +/** + * + */ +package org.dllearner.algorithms.isle.metrics; + +import java.util.Set; + +import org.dllearner.algorithms.isle.index.SemanticIndex; +import org.dllearner.core.owl.Entity; + +import com.google.common.collect.Sets; + +/** + * @author Lorenz Buehmann + * + */ +public class PMIRelevanceMetric extends AbstractRelevanceMetric { + + public PMIRelevanceMetric(SemanticIndex index) { + super(index); + } + + @Override + public double getRelevance(Entity entityA, Entity entityB){ + Set<String> documentsA = index.getDocuments(entityA); + Set<String> documentsB = index.getDocuments(entityB); + Set<String> documentsAB = Sets.intersection(documentsA, documentsB); + int nrOfDocuments = index.getSize(); + + double dPClass = nrOfDocuments == 0 ? 0 : ((double) documentsA.size() / (double) nrOfDocuments); + double dPClassEntity = documentsB.size() == 0 ? 0 : (double) documentsAB.size() / (double) documentsB.size(); + double pmi = Math.log(dPClassEntity / dPClass); + + return pmi; + } + +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceMetric.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceMetric.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceMetric.java 2013-07-17 11:44:41 UTC (rev 4021) @@ -0,0 +1,33 @@ +/** + * Copyright (C) 2007-2011, Jens Lehmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + + +package org.dllearner.algorithms.isle.metrics; + +import org.dllearner.core.owl.Entity; + + +public interface RelevanceMetric { + /** + * @param entity1 + * @param entity2 + * @return + */ + double getRelevance(Entity entity1, Entity entity2); +} \ No newline at end of file Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceUtils.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceUtils.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/RelevanceUtils.java 2013-07-17 11:44:41 UTC (rev 4021) @@ -0,0 +1,50 @@ +/** + * + */ +package org.dllearner.algorithms.isle.metrics; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +import org.dllearner.core.owl.Entity; +import org.dllearner.utilities.owl.OWLAPIConverter; +import org.semanticweb.owlapi.model.OWLEntity; +import org.semanticweb.owlapi.model.OWLOntology; + +/** + * @author Lorenz Buehmann + * + */ +public class RelevanceUtils { + + public static Map<Entity, Double> getRelevantEntities(Entity entity, Set<Entity> otherEntities, RelevanceMetric metric){ + Map<Entity, Double> relevantEntities = new HashMap<Entity, Double>(); + + for (Entity otherEntity : otherEntities) { + double relevance = metric.getRelevance(entity, otherEntity); + relevantEntities.put(otherEntity, relevance); + } + + return relevantEntities; + } + + public static Map<Entity, Double> getRelevantEntities(Entity entity, OWLOntology ontology, RelevanceMetric metric){ + Map<Entity, Double> relevantEntities = new HashMap<Entity, Double>(); + + Set<OWLEntity> owlEntities = new HashSet<OWLEntity>(); + owlEntities.addAll(ontology.getClassesInSignature()); + owlEntities.addAll(ontology.getDataPropertiesInSignature()); + owlEntities.addAll(ontology.getObjectPropertiesInSignature()); + Set<Entity> otherEntities = OWLAPIConverter.getEntities(owlEntities); + + for (Entity otherEntity : otherEntities) { + double relevance = metric.getRelevance(entity, otherEntity); + relevantEntities.put(otherEntity, relevance); + } + + return relevantEntities; + } + +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/AnnotationEntityTextRetriever.java 2013-07-17 11:44:41 UTC (rev 4021) @@ -0,0 +1,93 @@ +/** + * + */ +package org.dllearner.algorithms.isle.textretrieval; + +import java.util.HashMap; +import java.util.Map; +import java.util.Set; + +import org.dllearner.core.owl.Entity; +import org.dllearner.kb.OWLAPIOntology; +import org.dllearner.utilities.owl.OWLAPIConverter; +import org.semanticweb.owlapi.model.IRI; +import org.semanticweb.owlapi.model.OWLAnnotation; +import org.semanticweb.owlapi.model.OWLAnnotationProperty; +import org.semanticweb.owlapi.model.OWLEntity; +import org.semanticweb.owlapi.model.OWLLiteral; +import org.semanticweb.owlapi.model.OWLOntology; +import org.semanticweb.owlapi.model.OWLOntologyManager; +import org.semanticweb.owlapi.util.IRIShortFormProvider; +import org.semanticweb.owlapi.util.SimpleIRIShortFormProvider; + + +/** + * @author Lorenz Buehmann + * + */ +public class AnnotationEntityTextRetriever implements EntityTextRetriever{ + + private OWLOntology ontology; + private OWLOntologyManager manager; + + private String language = "en"; + private double weight = 1d; + + private boolean useShortFormFallback = true; + private IRIShortFormProvider sfp = new SimpleIRIShortFormProvider(); + + private OWLAnnotationProperty[] properties; + + public AnnotationEntityTextRetriever(OWLOntology ontology, OWLAnnotationProperty... properties) { + this.ontology = ontology; + this.properties = properties; + } + + public AnnotationEntityTextRetriever(OWLAPIOntology ontology, OWLAnnotationProperty... properties) { + this.ontology = ontology.createOWLOntology(manager); + } + + /** + * @param language the language to set + */ + public void setLanguage(String language) { + this.language = language; + } + + /** + * Whether to use the short form of the IRI as fallback, if no label is given. + * @param useShortFormFallback the useShortFormFallback to set + */ + public void setUseShortFormFallback(boolean useShortFormFallback) { + this.useShortFormFallback = useShortFormFallback; + } + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.EntityTextRetriever#getRelevantText(org.dllearner.core.owl.Entity) + */ + @Override + public Map<String, Double> getRelevantText(Entity entity) { + Map<String, Double> textWithWeight = new HashMap<String, Double>(); + + OWLEntity e = OWLAPIConverter.getOWLAPIEntity(entity); + + for (OWLAnnotationProperty property : properties) { + Set<OWLAnnotation> annotations = e.getAnnotations(ontology, property); + for (OWLAnnotation annotation : annotations) { + if (annotation.getValue() instanceof OWLLiteral) { + OWLLiteral val = (OWLLiteral) annotation.getValue(); + if (val.hasLang(language)) { + String label = val.getLiteral(); + textWithWeight.put(label, weight); + } + } + } + } + + if(textWithWeight.isEmpty() && useShortFormFallback){ + textWithWeight.put(sfp.getShortForm(IRI.create(entity.getURI())), weight); + } + + return textWithWeight; + } +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/EntityTextRetriever.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/EntityTextRetriever.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/textretrieval/EntityTe... [truncated message content] |
From: <dc...@us...> - 2013-07-16 05:25:44
|
Revision: 4020 http://sourceforge.net/p/dl-learner/code/4020 Author: dcherix Date: 2013-07-16 05:25:41 +0000 (Tue, 16 Jul 2013) Log Message: ----------- Added a simple possibility to configure via a webpage for sparqr Modified Paths: -------------- trunk/interfaces/src/main/java/org/dllearner/server/Rest.java trunk/interfaces/src/main/webapp/WEB-INF/web.xml Added Paths: ----------- trunk/interfaces/src/main/java/org/dllearner/server/ConfGeneratorServlet.java trunk/interfaces/src/main/resources/config.template Added: trunk/interfaces/src/main/java/org/dllearner/server/ConfGeneratorServlet.java =================================================================== --- trunk/interfaces/src/main/java/org/dllearner/server/ConfGeneratorServlet.java (rev 0) +++ trunk/interfaces/src/main/java/org/dllearner/server/ConfGeneratorServlet.java 2013-07-16 05:25:41 UTC (rev 4020) @@ -0,0 +1,221 @@ +/** + * + */ +package org.dllearner.server; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.net.URLDecoder; +import java.util.Collections; +import java.util.Enumeration; +import java.util.HashMap; +import java.util.Map; +import java.util.TreeMap; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletRequestWrapper; +import javax.servlet.http.HttpServletResponse; + +import org.apache.commons.lang.exception.ExceptionUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * @author didier + * + */ +public class ConfGeneratorServlet extends HttpServlet { + + private Logger logger = LoggerFactory.getLogger(ConfGeneratorServlet.class); + + private String template; + + public ConfGeneratorServlet() { + BufferedReader input = new BufferedReader(new InputStreamReader(ConfGeneratorServlet.class + .getClassLoader().getResourceAsStream("config.template"))); + StringBuilder builder = new StringBuilder(); + try { + while (input.ready()) { + builder.append(input.readLine()); + builder.append("\n"); + } + } catch (IOException e) { + logger.error("", ExceptionUtils.getRootCause(e)); + } finally { + try { + input.close(); + } catch (IOException e) { + logger.error("", ExceptionUtils.getRootCause(e)); + } + } + template = builder.toString(); + } + + /* + * (non-Javadoc) + * + * @see + * javax.servlet.http.HttpServlet#doGet(javax.servlet.http.HttpServletRequest + * , javax.servlet.http.HttpServletResponse) + */ + @Override + protected void doGet(HttpServletRequest req, HttpServletResponse resp) throws ServletException, + IOException { + handle(req, resp); + } + + /* + * (non-Javadoc) + * + * @see + * javax.servlet.http.HttpServlet#doPost(javax.servlet.http.HttpServletRequest + * , javax.servlet.http.HttpServletResponse) + */ + @Override + protected void doPost(HttpServletRequest req, HttpServletResponse resp) + throws ServletException, IOException { + handle(req, resp); + } + + /** + * @param req + * @param resp + */ + private void handle(HttpServletRequest req, HttpServletResponse resp) { + String tmp = template; + try { + String[] pos, neg = null; + if (!Rest.isSet("pos", req)) { + System.out.println("blub"); + req.getRequestDispatcher("/WEB-INF/sparqr.html").forward(req, resp); + return; + } else { + pos = URLDecoder.decode(req.getParameter("pos"), "UTF-8").split(","); + if (Rest.isSet("neg", req)) { + neg = URLDecoder.decode(req.getParameter("neg"), "UTF-8").split(","); + } + } + StringBuilder posStr = new StringBuilder(); + StringBuilder negStr = new StringBuilder(); + StringBuilder instances = new StringBuilder(); + + if (neg != null) { + tmp=tmp.replace("<LPTYPE>", "\"posNegStandard\""); + for (int i = 0; i < neg.length; i++) { + if (i > 0) { + negStr.append(",\n"); + instances.append(",\n"); + } + negStr.append("\""); + negStr.append(neg[i].replaceAll("\"|\n|\\s", "")); + negStr.append("\""); + instances.append("\""); + instances.append(neg[i].replaceAll("\"|\n|\\s", "")); + instances.append("\""); + } + } else { + tmp=tmp.replace("<LPTYPE>", "\"posOnlyLP\""); + tmp=tmp.replace("lp.negativeExamples = {\n<NEGATIVES>\n} ", ""); + } + + for (int i = 0; i < pos.length; i++) { + if (i > 0) { + posStr.append(",\n"); + } + if (instances.length() > 0) { + instances.append(",\n"); + } + posStr.append("\""); + posStr.append(pos[i].replaceAll("\"|\n|\\s", "")); + posStr.append("\""); + instances.append("\""); + instances.append(pos[i].replaceAll("\"|\n|\\s", "")); + instances.append("\""); + } + tmp=tmp.replace("<INSTANCES>", instances.toString()); + tmp=tmp.replace("<POSITIVES>", posStr.toString()); + tmp=tmp.replace("<NEGATIVES>", negStr.toString()); + Map<String, String[]> additionalParams = new HashMap<String, String[]>(); + additionalParams.put("conf", new String[]{tmp}); + System.out.println(tmp); + ModifiableWrappedRequest request = new ModifiableWrappedRequest(req, additionalParams); + request.getRequestDispatcher("/rest").forward(request, resp); + } catch (ServletException e) { + logger.error("", ExceptionUtils.getRootCause(e)); + try { + resp.sendError(500, ExceptionUtils.getRootCause(e).toString()); + } catch (IOException e1) { + logger.error("", ExceptionUtils.getRootCause(e1)); + } + } catch (IOException e) { + logger.error("", ExceptionUtils.getRootCause(e)); + try { + resp.sendError(500, ExceptionUtils.getRootCause(e).toString()); + } catch (IOException e1) { + logger.error("", ExceptionUtils.getRootCause(e1)); + } + } + } + + public class ModifiableWrappedRequest extends HttpServletRequestWrapper + { + private final Map<String, String[]> modifiableParameters; + private Map<String, String[]> allParameters = null; + + /** + * Create a new request wrapper that will merge additional parameters into + * the request object without prematurely reading parameters from the + * original request. + * + * @param request + * @param additionalParams + */ + public ModifiableWrappedRequest(final HttpServletRequest request, + final Map<String, String[]> additionalParams) + { + super(request); + modifiableParameters = new TreeMap<String, String[]>(); + modifiableParameters.putAll(additionalParams); + } + + @Override + public String getParameter(final String name) + { + String[] strings = getParameterMap().get(name); + if (strings != null) + { + return strings[0]; + } + return super.getParameter(name); + } + + @Override + public Map<String, String[]> getParameterMap() + { + if (allParameters == null) + { + allParameters = new TreeMap<String, String[]>(); + allParameters.putAll(super.getParameterMap()); + allParameters.putAll(modifiableParameters); + } + //Return an unmodifiable collection because we need to uphold the interface contract. + return Collections.unmodifiableMap(allParameters); + } + + @Override + public Enumeration<String> getParameterNames() + { + return Collections.enumeration(getParameterMap().keySet()); + } + + @Override + public String[] getParameterValues(final String name) + { + return getParameterMap().get(name); + } + } + +} Property changes on: trunk/interfaces/src/main/java/org/dllearner/server/ConfGeneratorServlet.java ___________________________________________________________________ Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Modified: trunk/interfaces/src/main/java/org/dllearner/server/Rest.java =================================================================== --- trunk/interfaces/src/main/java/org/dllearner/server/Rest.java 2013-07-15 16:32:53 UTC (rev 4019) +++ trunk/interfaces/src/main/java/org/dllearner/server/Rest.java 2013-07-16 05:25:41 UTC (rev 4020) @@ -23,6 +23,7 @@ import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import java.io.*; +import java.net.URLDecoder; import java.util.ArrayList; import java.util.HashMap; import java.util.Map; @@ -58,10 +59,10 @@ int limit = 5; if (!isSet("conf", httpServletRequest)) { // throw new IllegalArgumentException("Missing parameter: conf is required. "); - httpServletResponse.sendError(400, "Missing parameter: conf is required. "); + httpServletRequest.getRequestDispatcher("/WEB-INF/sparqr.html").forward(httpServletRequest, httpServletResponse); return; } else { - conf = httpServletRequest.getParameter("conf"); + conf = URLDecoder.decode(httpServletRequest.getParameter("conf")); if (isSet("limit", httpServletRequest)) { limit = Integer.parseInt(httpServletRequest.getParameter("limit")); } Added: trunk/interfaces/src/main/resources/config.template =================================================================== --- trunk/interfaces/src/main/resources/config.template (rev 0) +++ trunk/interfaces/src/main/resources/config.template 2013-07-16 05:25:41 UTC (rev 4020) @@ -0,0 +1,47 @@ +sparql.type = "sparqls" +sparql.endpointURL = "http://dbpedia.org/sparql" +sparql.defaultGraphURI = "http://dbpedia.org" +sparql.recursionDepth = 2 +sparql.ontologySchemaUrls = {"http://downloads.dbpedia.org/3.6/dbpedia_3.6.owl" } +sparql.aboxfilter = "FILTER ( +!isLiteral(?o) && +!regex(str(?p), 'http://dbpedia.org/property/website') && +!regex(str(?p), 'http://dbpedia.org/property/wikipage') && +!regex(str(?p), 'http://dbpedia.org/property/wikiPageUsesTemplate') && +!regex(str(?p), 'http://dbpedia.org/property/reference') && +!regex(str(?p), 'http://www.w3.org/2004/02/skos/core') && +!regex(str(?p), 'http://www.w3.org/2002/07/owl#sameAs') && +!regex(str(?p), 'http://xmlns.com/foaf/0.1/') && +!regex(str(?p), 'http://dbpedia.org/property/wordnet_type') && +!regex(str(?p), 'http://dbpedia.org/property/wikilink') && +regex(str(?o), '^http://dbpedia.org/resource/') +) " +sparql.tboxfilter = "FILTER ( !regex(str(?class), '^http://upload.wikimedia.org/wikipedia') && +!regex(str(?class), '^http://dbpedia.org/resource/Template') && +!regex(str(?class), '^http://dbpedia.org/resource/Category:') && +!regex(str(?class), '^http://umbel.org/umbel/') && +!regex(str(?class), '^http://dbpedia.org/class/yago') + ) . " +sparql.instances = { +<INSTANCES> + } +reasoner.type = "fast instance checker" +reasoner.sources = {sparql} +lp.type = <LPTYPE> +lp.positiveExamples = { +<POSITIVES> +} +lp.negativeExamples = { +<NEGATIVES> +} +lp.reasoner = reasoner +op.type = "rho" +op.useNegation = false +op.useAllConstructor = false +op.useCardinalityRestrictions = false +op.useHasValueConstructor = true +op.reasoner = reasoner +alg.type = "ocel" +alg.reasoner = reasoner +alg.maxExecutionTimeInSeconds = 30 +alg.noisePercentage = 10.0 \ No newline at end of file Modified: trunk/interfaces/src/main/webapp/WEB-INF/web.xml =================================================================== --- trunk/interfaces/src/main/webapp/WEB-INF/web.xml 2013-07-15 16:32:53 UTC (rev 4019) +++ trunk/interfaces/src/main/webapp/WEB-INF/web.xml 2013-07-16 05:25:41 UTC (rev 4020) @@ -14,8 +14,18 @@ <servlet-name>rest</servlet-name> <url-pattern>/rest</url-pattern> </servlet-mapping> + + <servlet> + <servlet-name>conf</servlet-name> + <servlet-class>org.dllearner.server.ConfGeneratorServlet</servlet-class> + </servlet> + <servlet-mapping> + <servlet-name>conf</servlet-name> + <url-pattern>/conf</url-pattern> + </servlet-mapping> + <servlet> <servlet-name>NKEGeizhals</servlet-name> <servlet-class>org.dllearner.server.NKEGeizhals</servlet-class> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <dc...@us...> - 2013-07-15 16:32:57
|
Revision: 4019 http://sourceforge.net/p/dl-learner/code/4019 Author: dcherix Date: 2013-07-15 16:32:53 +0000 (Mon, 15 Jul 2013) Log Message: ----------- Cahnges in Rest to return http error stats on exceptions Modified Paths: -------------- trunk/interfaces/src/main/java/org/dllearner/server/Rest.java Modified: trunk/interfaces/src/main/java/org/dllearner/server/Rest.java =================================================================== --- trunk/interfaces/src/main/java/org/dllearner/server/Rest.java 2013-07-12 09:03:21 UTC (rev 4018) +++ trunk/interfaces/src/main/java/org/dllearner/server/Rest.java 2013-07-15 16:32:53 UTC (rev 4019) @@ -57,7 +57,9 @@ String conf = null; int limit = 5; if (!isSet("conf", httpServletRequest)) { - throw new IllegalArgumentException("Missing parameter: conf is required. "); +// throw new IllegalArgumentException("Missing parameter: conf is required. "); + httpServletResponse.sendError(400, "Missing parameter: conf is required. "); + return; } else { conf = httpServletRequest.getParameter("conf"); if (isSet("limit", httpServletRequest)) { @@ -116,6 +118,8 @@ learningResult.put("success", "0"); learningResult.put("error", msg); learningResult.put("stacktrace", ExceptionUtils.getRootCause(e)); + result.put("learningresult", learningResult); + httpServletResponse.sendError(500, result.toJSONString()); } result.put("learningresult", learningResult); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2013-07-12 09:03:25
|
Revision: 4018 http://sourceforge.net/p/dl-learner/code/4018 Author: lorenz_b Date: 2013-07-12 09:03:21 +0000 (Fri, 12 Jul 2013) Log Message: ----------- Cont. ISLE. Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/OWLOntologyLuceneIndex.java trunk/components-core/src/main/java/org/dllearner/core/owl/Entity.java trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java Added Paths: ----------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/AnnotationEntityTextRetriever.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityExtraction.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/RDFSCommentEntityTextRetriever.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/RDFSLabelEntityTextRetriever.java Removed Paths: ------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LabelEntityTextRetriever.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/PMIRelevance.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/PMIRelevances.java Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/AnnotationEntityTextRetriever.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/AnnotationEntityTextRetriever.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/AnnotationEntityTextRetriever.java 2013-07-12 09:03:21 UTC (rev 4018) @@ -0,0 +1,93 @@ +/** + * + */ +package org.dllearner.algorithms.isle; + +import java.util.HashMap; +import java.util.Map; +import java.util.Set; + +import org.dllearner.core.owl.Entity; +import org.dllearner.kb.OWLAPIOntology; +import org.dllearner.utilities.owl.OWLAPIConverter; +import org.semanticweb.owlapi.model.IRI; +import org.semanticweb.owlapi.model.OWLAnnotation; +import org.semanticweb.owlapi.model.OWLAnnotationProperty; +import org.semanticweb.owlapi.model.OWLEntity; +import org.semanticweb.owlapi.model.OWLLiteral; +import org.semanticweb.owlapi.model.OWLOntology; +import org.semanticweb.owlapi.model.OWLOntologyManager; +import org.semanticweb.owlapi.util.IRIShortFormProvider; +import org.semanticweb.owlapi.util.SimpleIRIShortFormProvider; + + +/** + * @author Lorenz Buehmann + * + */ +public class AnnotationEntityTextRetriever implements EntityTextRetriever{ + + private OWLOntology ontology; + private OWLOntologyManager manager; + + private String language = "en"; + private double weight = 1d; + + private boolean useShortFormFallback = true; + private IRIShortFormProvider sfp = new SimpleIRIShortFormProvider(); + + private OWLAnnotationProperty[] properties; + + public AnnotationEntityTextRetriever(OWLOntology ontology, OWLAnnotationProperty... properties) { + this.ontology = ontology; + this.properties = properties; + } + + public AnnotationEntityTextRetriever(OWLAPIOntology ontology, OWLAnnotationProperty... properties) { + this.ontology = ontology.createOWLOntology(manager); + } + + /** + * @param language the language to set + */ + public void setLanguage(String language) { + this.language = language; + } + + /** + * Whether to use the short form of the IRI as fallback, if no label is given. + * @param useShortFormFallback the useShortFormFallback to set + */ + public void setUseShortFormFallback(boolean useShortFormFallback) { + this.useShortFormFallback = useShortFormFallback; + } + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.EntityTextRetriever#getRelevantText(org.dllearner.core.owl.Entity) + */ + @Override + public Map<String, Double> getRelevantText(Entity entity) { + Map<String, Double> textWithWeight = new HashMap<String, Double>(); + + OWLEntity e = OWLAPIConverter.getOWLAPIEntity(entity); + + for (OWLAnnotationProperty property : properties) { + Set<OWLAnnotation> annotations = e.getAnnotations(ontology, property); + for (OWLAnnotation annotation : annotations) { + if (annotation.getValue() instanceof OWLLiteral) { + OWLLiteral val = (OWLLiteral) annotation.getValue(); + if (val.hasLang(language)) { + String label = val.getLiteral(); + textWithWeight.put(label, weight); + } + } + } + } + + if(textWithWeight.isEmpty() && useShortFormFallback){ + textWithWeight.put(sfp.getShortForm(IRI.create(entity.getURI())), weight); + } + + return textWithWeight; + } +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityExtraction.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityExtraction.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityExtraction.java 2013-07-12 09:03:21 UTC (rev 4018) @@ -0,0 +1,28 @@ +/** + * + */ +package org.dllearner.algorithms.isle; + +import java.util.Map; + +import org.dllearner.core.owl.Entity; + +/** + * @author Lorenz Buehmann + * + */ +public interface EntityExtraction { + + /** + * Extracts all entities contained in the working text with some confidence value. + * @return + */ + Map<Entity, Double> extractEntities(); + + /** + * Extracts all entities of the given <code>type</code> contained in the working text with some confidence value. + * @return + */ + Map<Entity, Double> extractEntities(Entity.Type type); + +} Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LabelEntityTextRetriever.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LabelEntityTextRetriever.java 2013-07-11 12:24:23 UTC (rev 4017) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LabelEntityTextRetriever.java 2013-07-12 09:03:21 UTC (rev 4018) @@ -1,95 +0,0 @@ -/** - * - */ -package org.dllearner.algorithms.isle; - -import java.util.HashMap; -import java.util.Map; -import java.util.Set; - -import org.dllearner.core.owl.Entity; -import org.dllearner.kb.OWLAPIOntology; -import org.dllearner.utilities.owl.OWLAPIConverter; -import org.semanticweb.owlapi.model.IRI; -import org.semanticweb.owlapi.model.OWLAnnotation; -import org.semanticweb.owlapi.model.OWLAnnotationProperty; -import org.semanticweb.owlapi.model.OWLDataFactory; -import org.semanticweb.owlapi.model.OWLEntity; -import org.semanticweb.owlapi.model.OWLLiteral; -import org.semanticweb.owlapi.model.OWLOntology; -import org.semanticweb.owlapi.model.OWLOntologyManager; -import org.semanticweb.owlapi.util.IRIShortFormProvider; -import org.semanticweb.owlapi.util.SimpleIRIShortFormProvider; -import org.semanticweb.owlapi.vocab.OWLRDFVocabulary; - -import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl; - - -/** - * @author Lorenz Buehmann - * - */ -public class LabelEntityTextRetriever implements EntityTextRetriever{ - - private OWLOntology ontology; - private OWLOntologyManager manager; - private OWLDataFactory df = new OWLDataFactoryImpl(); - - private OWLAnnotationProperty label = df.getOWLAnnotationProperty(OWLRDFVocabulary.RDFS_LABEL.getIRI()); - - private String language = "en"; - private double weight = 1d; - - private boolean useShortFormFallback = true; - private IRIShortFormProvider sfp = new SimpleIRIShortFormProvider(); - - public LabelEntityTextRetriever(OWLOntology ontology) { - this.ontology = ontology; - } - - public LabelEntityTextRetriever(OWLAPIOntology ontology) { - this.ontology = ontology.createOWLOntology(manager); - } - - /** - * @param language the language to set - */ - public void setLanguage(String language) { - this.language = language; - } - - /** - * Whether to use the short form of the IRI as fallback, if no label is given. - * @param useShortFormFallback the useShortFormFallback to set - */ - public void setUseShortFormFallback(boolean useShortFormFallback) { - this.useShortFormFallback = useShortFormFallback; - } - - /* (non-Javadoc) - * @see org.dllearner.algorithms.isle.EntityTextRetriever#getRelevantText(org.dllearner.core.owl.Entity) - */ - @Override - public Map<String, Double> getRelevantText(Entity entity) { - Map<String, Double> textWithWeight = new HashMap<String, Double>(); - - OWLEntity e = OWLAPIConverter.getOWLAPIEntity(entity); - - Set<OWLAnnotation> annotations = e.getAnnotations(ontology, label); - for (OWLAnnotation annotation : annotations) { - if (annotation.getValue() instanceof OWLLiteral) { - OWLLiteral val = (OWLLiteral) annotation.getValue(); - if (val.hasLang(language)) { - String label = val.getLiteral(); - textWithWeight.put(label, weight); - } - } - } - - if(textWithWeight.isEmpty() && useShortFormFallback){ - textWithWeight.put(sfp.getShortForm(IRI.create(entity.getURI())), weight); - } - - return textWithWeight; - } -} Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/OWLOntologyLuceneIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/OWLOntologyLuceneIndex.java 2013-07-11 12:24:23 UTC (rev 4017) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/OWLOntologyLuceneIndex.java 2013-07-12 09:03:21 UTC (rev 4018) @@ -32,6 +32,7 @@ import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl; /** + * Creates a Lucene Index for the labels if classes and properties. * @author Lorenz Buehmann * */ Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/PMIRelevance.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/PMIRelevance.java 2013-07-11 12:24:23 UTC (rev 4017) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/PMIRelevance.java 2013-07-12 09:03:21 UTC (rev 4018) @@ -1,108 +0,0 @@ -package org.dllearner.algorithms.isle; - - -import java.io.*; -import java.util.*; - -public class PMIRelevance { - - private LuceneSearcher m_searcher = null; - - private Set<String> m_classes; - private Set<String> m_individuals; - - - public static void main( String args[] ) throws Exception { - PMIRelevance relevance = new PMIRelevance( args[0], args[1] ); - relevance.printScores(); - } - - public void printScores() throws Exception { - for( String sInd: m_individuals ) - { - Map<String,Double> hmClass2Score = getClassRelevance( sInd ); - for( String sClass : hmClass2Score.keySet() ) - { - double dScore = hmClass2Score.get( sClass ); - if( dScore > 0 ){ - System.out.println( "PMI( "+ sInd +" , "+ sClass +" ) = "+ dScore ); - } - } - } - /* for( String sClass: m_classes ) - { - Map<String,Double> hmInd2Score = getIndividualRelevance( sClass ); - for( String sInd : hmInd2Score.keySet() ) - { - double dScore = hmInd2Score.get( sInd ); - if( dScore > 0 ){ - System.out.println( "P( "+ sClass +" | "+ sInd +" ) = "+ dScore ); - } - } - } */ - m_searcher.close(); - } - - public PMIRelevance( String sClasses, String sIndividuals ) throws Exception { - m_searcher = new LuceneSearcher(); - m_classes = read( sClasses ); - m_individuals = read( sIndividuals ); - } - - public Map<String,Double> getClassRelevance( String sIndividual ) throws Exception { - // computes relevance of classes for this individual - // conditional probability: P(I|C)=f(I,C)/f(C) - // PMI(I,C)=log( P(I|C) / P(I) ) - Map<String,Double> hmClass2Score = new HashMap<String,Double>(); - int iInd = m_searcher.count( sIndividual ); - int iAll = m_searcher.indexSize(); - double dPInd = (double) iInd / (double) iAll; - for( String sClass: m_classes ) - { - int iClass = m_searcher.count( sClass ); - int iIndClass = m_searcher.count( sIndividual +" AND "+ sClass ); - double dPIndClass = (double) iIndClass / (double)iClass; - double dPMI = Math.log( dPIndClass / dPInd ); - hmClass2Score.put( sClass, dPMI ); - } - return hmClass2Score; - } - - public Map<String,Double> getIndividualRelevance( String sClass ) throws Exception { - // computes relevance of individuals for this class - // conditional probability: P(C|I)=f(C,I)/f(I) - // PMI(C|I)=log( P(C|I) / P(C) ) - Map<String,Double> hmInd2Score = new HashMap<String,Double>(); - int iClass = m_searcher.count( sClass ); - int iAll = m_searcher.indexSize(); - double dPClass = (double) iClass / (double) iAll; - for( String sInd: m_individuals ) - { - int iInd = m_searcher.count( sInd ); - int iIndClass = m_searcher.count( sClass +" AND "+ sInd ); - double dPClassInd = (double) iIndClass / (double)iInd; - double dPMI = Math.log( dPClassInd / dPClass ); - hmInd2Score.put( sInd, dPMI ); - } - return hmInd2Score; - } - - private static Set<String> read( String sFile ) throws Exception { - File file = new File( sFile ); - Set<String> lines = new HashSet<String>(); - BufferedReader reader = null; - try { - reader = new BufferedReader( new FileReader( file ) ); - String sLine = null; - while( ( sLine = reader.readLine() ) != null ) { - lines.add( sLine.trim() ); - } - } - finally { - if( reader != null ) { - reader.close(); - } - } - return lines; - } -} \ No newline at end of file Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/PMIRelevances.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/PMIRelevances.java 2013-07-11 12:24:23 UTC (rev 4017) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/PMIRelevances.java 2013-07-12 09:03:21 UTC (rev 4018) @@ -1,165 +0,0 @@ -/** - * Copyright (C) 2007-2011, Jens Lehmann - * - * This file is part of DL-Learner. - * - * DL-Learner is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3 of the License, or - * (at your option) any later version. - * - * DL-Learner is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - - -package org.dllearner.algorithms.isle; - -import java.util.HashMap; -import java.util.Map; -import java.util.Set; - -import org.semanticweb.owlapi.apibinding.OWLManager; -import org.semanticweb.owlapi.model.IRI; -import org.semanticweb.owlapi.model.OWLClass; -import org.semanticweb.owlapi.model.OWLEntity; -import org.semanticweb.owlapi.model.OWLNamedObject; -import org.semanticweb.owlapi.model.OWLOntology; -import org.semanticweb.owlapi.model.OWLOntologyManager; - - -public class PMIRelevances { - - private LuceneSearcher m_searcher = null; - - private OWLOntologyManager m_manager; - private OWLOntology m_ontology; - - private Set<OWLEntity> m_entities; - private Set<OWLClass> m_classes; - - - public static void main( String args[] ) throws Exception { - PMIRelevances relevances = new PMIRelevances( args[0] ); - relevances.printScores(); - } - - public void printScores() throws Exception { - for( OWLClass c: m_classes ) - { - Map<OWLEntity,Double> hmEntity2Score = getEntityRelevance(c); - // normalization per class? - hmEntity2Score = normalize( hmEntity2Score ); - for( OWLEntity e : hmEntity2Score.keySet() ) - { - double dScore = hmEntity2Score.get(e); - System.out.println( "P( "+ getLabel(c) +", "+ getLabel(e) +" ) = "+ dScore ); - } - } - m_searcher.close(); - } - - public PMIRelevances( String sOntologyURI ) throws Exception { - m_searcher = new LuceneSearcher(); - loadOntology( sOntologyURI ); - } - - public Map<OWLEntity,Double> normalize( Map<OWLEntity,Double> hmEntity2Score ){ - Map<OWLEntity,Double> hmEntity2Norm = new HashMap<OWLEntity,Double>(); - double dMin = Double.MAX_VALUE; - Double dMax = Double.MIN_VALUE; - for( OWLEntity e : hmEntity2Score.keySet() ) - { - double dValue = hmEntity2Score.get(e); - if( dValue < dMin ){ - dMin = dValue; - } - else if( dValue > dMax ){ - dMax = dValue; - } - } - // System.out.println( "min="+ dMin +" max="+ dMax ); - for( OWLEntity e : hmEntity2Score.keySet() ) - { - double dValue = hmEntity2Score.get(e); - double dNorm = 0; - if( dMin == dMax ){ - dNorm = dValue; - } - else { - dNorm = ( dValue - dMin ) / ( dMax - dMin ); - } - hmEntity2Norm.put( e, dNorm ); - } - return hmEntity2Norm; - } - - public Map<OWLEntity,Double> getEntityRelevance( OWLClass c ) throws Exception { - // computes relevance of entity for this class - // conditional probability: P(C,E)=f(C,E)/f(E) - // PMI(C,E)=log( P(C,E) / P(C) ) - Map<OWLEntity,Double> hmEntity2Score = new HashMap<OWLEntity,Double>(); - String sClass = getLabel(c); - int iClass = m_searcher.count( sClass ); - int iAll = m_searcher.indexSize(); - double dPClass = (double) iClass / (double) iAll; - for( OWLEntity e: m_entities ) - { - String sEntity = getLabel(e); - int iEntity = m_searcher.count( sEntity ); - int iEntityClass = m_searcher.count( sClass +" AND "+ sEntity ); -// double dPEntity = (double)iEntity / (double)iAll; - double dPClassEntity = (double) iEntityClass / (double)iEntity; - double dPMI = Math.log( dPClassEntity / dPClass ); - if( !Double.isNaN( dPMI ) && !Double.isInfinite( dPMI ) ){ - hmEntity2Score.put( e, dPMI ); - } - } - return hmEntity2Score; - } - - /* private String getLabel( OWLEntity e ){ - System.out.println( "getLabel: "+ e ); - OWLDataFactory factory = m_manager.getOWLDataFactory(); - OWLAnnotationProperty label = factory.getOWLAnnotationProperty( OWLRDFVocabulary.RDFS_LABEL.getIRI() ); - Set<OWLAnnotation> anns = e.getAnnotations( m_ontology, label ); - for( OWLAnnotation annotation: anns ) - { - System.out.println( "annotation="+ annotation ); - if( annotation.getValue() instanceof OWLLiteral ) - { - OWLLiteral val = (OWLLiteral) annotation.getValue(); - if( !val.isOWLTypedLiteral() ){ - if (val.asOWLStringLiteral().getLang().equals("en")) { - return val.getLiteral(); - } - } - return val.getLiteral(); - } - } - return null; - } */ - - private String getLabel( OWLEntity e ){ - if( e instanceof OWLNamedObject ){ - String sIRI = ((OWLNamedObject)e).getIRI().toString(); - return sIRI.substring( sIRI.indexOf( "#" )+1 ); - } - return null; - } - - private void loadOntology( String sOntologyURI ) throws Exception { - m_manager = OWLManager.createOWLOntologyManager(); - IRI ontologyIRI = IRI.create( sOntologyURI ); - m_ontology = m_manager.loadOntology( ontologyIRI ); - m_classes = m_ontology.getClassesInSignature(); - m_entities = m_ontology.getSignature(); - System.out.println( "classes="+ m_classes.size() +" entities="+ m_entities.size() ); - // m_manager.removeOntology( ontology ); - } -} \ No newline at end of file Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/RDFSCommentEntityTextRetriever.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/RDFSCommentEntityTextRetriever.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/RDFSCommentEntityTextRetriever.java 2013-07-12 09:03:21 UTC (rev 4018) @@ -0,0 +1,26 @@ +/** + * + */ +package org.dllearner.algorithms.isle; + +import org.dllearner.kb.OWLAPIOntology; +import org.semanticweb.owlapi.model.OWLOntology; +import org.semanticweb.owlapi.vocab.OWLRDFVocabulary; + +import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl; + + +/** + * @author Lorenz Buehmann + * + */ +public class RDFSCommentEntityTextRetriever extends AnnotationEntityTextRetriever{ + + public RDFSCommentEntityTextRetriever(OWLOntology ontology) { + super(ontology, new OWLDataFactoryImpl().getOWLAnnotationProperty(OWLRDFVocabulary.RDFS_COMMENT.getIRI())); + } + + public RDFSCommentEntityTextRetriever(OWLAPIOntology ontology) { + super(ontology, new OWLDataFactoryImpl().getOWLAnnotationProperty(OWLRDFVocabulary.RDFS_COMMENT.getIRI())); + } +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/RDFSLabelEntityTextRetriever.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/RDFSLabelEntityTextRetriever.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/RDFSLabelEntityTextRetriever.java 2013-07-12 09:03:21 UTC (rev 4018) @@ -0,0 +1,26 @@ +/** + * + */ +package org.dllearner.algorithms.isle; + +import org.dllearner.kb.OWLAPIOntology; +import org.semanticweb.owlapi.model.OWLOntology; +import org.semanticweb.owlapi.vocab.OWLRDFVocabulary; + +import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl; + + +/** + * @author Lorenz Buehmann + * + */ +public class RDFSLabelEntityTextRetriever extends AnnotationEntityTextRetriever{ + + public RDFSLabelEntityTextRetriever(OWLOntology ontology) { + super(ontology, new OWLDataFactoryImpl().getOWLAnnotationProperty(OWLRDFVocabulary.RDFS_LABEL.getIRI())); + } + + public RDFSLabelEntityTextRetriever(OWLAPIOntology ontology) { + super(ontology, new OWLDataFactoryImpl().getOWLAnnotationProperty(OWLRDFVocabulary.RDFS_LABEL.getIRI())); + } +} Modified: trunk/components-core/src/main/java/org/dllearner/core/owl/Entity.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/core/owl/Entity.java 2013-07-11 12:24:23 UTC (rev 4017) +++ trunk/components-core/src/main/java/org/dllearner/core/owl/Entity.java 2013-07-12 09:03:21 UTC (rev 4018) @@ -28,6 +28,10 @@ * */ public interface Entity extends NamedKBElement { + + public enum Type{ + CLASS, OBJECT_PROPERTY, DATA_PROPERTY; + } public URI getURI(); Modified: trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java =================================================================== --- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java 2013-07-11 12:24:23 UTC (rev 4017) +++ trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java 2013-07-12 09:03:21 UTC (rev 4018) @@ -3,15 +3,10 @@ */ package org.dllearner.algorithms.isle; -import static org.junit.Assert.*; - import java.io.File; import java.util.Map; -import java.util.Map.Entry; -import org.dllearner.core.AbstractLearningProblem; import org.dllearner.core.AbstractReasonerComponent; -import org.dllearner.core.ComponentInitException; import org.dllearner.core.KnowledgeSource; import org.dllearner.core.owl.Entity; import org.dllearner.core.owl.NamedClass; @@ -39,21 +34,28 @@ private LuceneSearcher searcher; private Relevance relevance; private String searchField = "label"; - + /** - * @throws java.lang.Exception + * */ - @Before - public void setUp() throws Exception { + public ISLETest() throws Exception{ manager = OWLManager.createOWLOntologyManager(); ontology = manager.loadOntologyFromOntologyDocument(new File("../examples/isle/father_labeled.owl")); cls = new NamedClass("http://example.com/father#father"); - textRetriever = new LabelEntityTextRetriever(ontology); + textRetriever = new RDFSLabelEntityTextRetriever(ontology); OWLOntologyLuceneIndex index = new OWLOntologyLuceneIndex(ontology, searchField); searcher = new LuceneSearcher(index.getDirectory(), searchField); relevance = new PMILuceneBasedRelevance(ontology, searcher, textRetriever); } + /** + * @throws java.lang.Exception + */ + @Before + public void setUp() throws Exception{ + + } + @Test public void testTextRetrieval() { System.out.println("Text for entity " + cls + ":"); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2013-07-11 12:24:25
|
Revision: 4017 http://sourceforge.net/p/dl-learner/code/4017 Author: lorenz_b Date: 2013-07-11 12:24:23 +0000 (Thu, 11 Jul 2013) Log Message: ----------- Added CLI paramters for entity type filtering when doing a batch processing. Modified Paths: -------------- trunk/interfaces/src/main/java/org/dllearner/cli/Enrichment.java Modified: trunk/interfaces/src/main/java/org/dllearner/cli/Enrichment.java =================================================================== --- trunk/interfaces/src/main/java/org/dllearner/cli/Enrichment.java 2013-07-11 12:23:36 UTC (rev 4016) +++ trunk/interfaces/src/main/java/org/dllearner/cli/Enrichment.java 2013-07-11 12:24:23 UTC (rev 4017) @@ -256,7 +256,13 @@ private Set<OWLAxiom> learnedOWLAxioms; private Set<EvaluatedAxiom> learnedEvaluatedAxioms; + private boolean processPropertiesTypeInferred = false; + private boolean iterativeMode = false; + private boolean processObjectProperties; + private boolean processDataProperties; + private boolean processClasses; + public Enrichment(SparqlEndpoint se, Entity resource, double threshold, int nrOfAxiomsToLearn, boolean useInference, boolean verbose, int chunksize, int maxExecutionTimeInSeconds, boolean omitExistingAxioms) { @@ -302,7 +308,7 @@ dataPropertyAlgorithms.add(SubDataPropertyOfAxiomLearner.class); classAlgorithms = new LinkedList<Class<? extends LearningAlgorithm>>(); - classAlgorithms.add(DisjointClassesLearner.class); +// classAlgorithms.add(DisjointClassesLearner.class); // classAlgorithms.add(SimpleSubclassLearner.class); classAlgorithms.add(CELOE.class); @@ -316,11 +322,19 @@ this.allowedNamespaces = allowedNamespaces; } + /** + * @param iterativeMode the iterativeMode to set + */ + public void setIterativeMode(boolean iterativeMode) { + this.iterativeMode = iterativeMode; + } + public void start() throws ComponentInitException, IllegalArgumentException, SecurityException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException, LearningProblemUnsupportedException, MalformedURLException { // instantiate SPARQL endpoint wrapper component SparqlEndpointKS ks = new SparqlEndpointKS(se); ks.init(); + ks.setSupportsSPARQL_1_1(!iterativeMode); // common helper objects SPARQLTasks st = new SPARQLTasks(se); @@ -340,41 +354,70 @@ if(resource == null) { // loop over all entities and call appropriate algorithms - - Set<NamedClass> classes = allowedNamespaces.isEmpty() ? reasoner.getOWLClasses() : reasoner.getOWLClasses(allowedNamespaces.iterator().next());//st.getAllClasses(); - filterByNamespaces(classes);//classes = Sets.newHashSet(new NamedClass("http://dbpedia.org/ontology/Arachnid")); int entities = 0; - for(NamedClass nc : classes) { - try { - runClassLearningAlgorithms(ks, nc); - } catch (Exception e) { - e.printStackTrace(); - } - entities++; - if(maxEntitiesPerType != -1 && entities > maxEntitiesPerType) { - break; - } + Set<org.dllearner.core.owl.Property> processedProperties = new HashSet<org.dllearner.core.owl.Property>(); + if(processClasses){ + Set<NamedClass> classes = allowedNamespaces.isEmpty() ? reasoner.getOWLClasses() : reasoner.getOWLClasses(allowedNamespaces.iterator().next());//st.getAllClasses(); + filterByNamespaces(classes);//classes = Sets.newHashSet(new NamedClass("http://dbpedia.org/ontology/Arachnid")); + for(NamedClass nc : classes) { + try { + runClassLearningAlgorithms(ks, nc); + } catch (Exception e) { + e.printStackTrace(); + } + entities++; + if(maxEntitiesPerType != -1 && entities > maxEntitiesPerType) { + break; + } + } } entities = 0; - Set<ObjectProperty> objectProperties = st.getAllObjectProperties(); - filterByNamespaces(objectProperties); - for(ObjectProperty property : objectProperties) { - runObjectPropertyAlgorithms(ks, property); - entities++; - if(maxEntitiesPerType != -1 && entities > maxEntitiesPerType) { - break; - } + if(processObjectProperties){ + Set<ObjectProperty> objectProperties = st.getAllObjectProperties(); + filterByNamespaces(objectProperties); + for(ObjectProperty property : objectProperties) { + runObjectPropertyAlgorithms(ks, property); + entities++; + if(maxEntitiesPerType != -1 && entities > maxEntitiesPerType) { + break; + } + } + processedProperties.addAll(objectProperties); } entities = 0; - Set<DatatypeProperty> dataProperties = st.getAllDataProperties(); - filterByNamespaces(dataProperties); - for(DatatypeProperty property : dataProperties) { - runDataPropertyAlgorithms(ks, property); - entities++; - if(maxEntitiesPerType != -1 && entities > maxEntitiesPerType) { - break; - } + if(processDataProperties){ + Set<DatatypeProperty> dataProperties = st.getAllDataProperties(); + filterByNamespaces(dataProperties); + for(DatatypeProperty property : dataProperties) { + runDataPropertyAlgorithms(ks, property); + entities++; + if(maxEntitiesPerType != -1 && entities > maxEntitiesPerType) { + break; + } + } + processedProperties.addAll(dataProperties); } + + //optionally, get all properties and infer its type + if(processPropertiesTypeInferred ){ + reasoner.precomputePopularity(); + Set<org.dllearner.core.owl.Property> properties = allowedNamespaces.isEmpty() ? reasoner.getProperties(true) : reasoner.getProperties(true, allowedNamespaces.iterator().next()); + properties.removeAll(processedProperties); + filterByNamespaces(properties); + for(org.dllearner.core.owl.Property property : properties) { + if(property instanceof ObjectProperty){ + runObjectPropertyAlgorithms(ks, (ObjectProperty) property); + entities++; + } else if(property instanceof DatatypeProperty){ + runDataPropertyAlgorithms(ks, (DatatypeProperty) property); + entities++; + } + + if(maxEntitiesPerType != -1 && entities > maxEntitiesPerType) { + break; + } + } + } } else { if(resource instanceof ObjectProperty) { System.out.println(resource + " appears to be an object property. Running appropriate algorithms.\n"); @@ -529,36 +572,6 @@ // } } - /*//old way to get SPARQL fragment - SparqlKnowledgeSource ks2; - AbstractReasonerComponent rc; - if(reuseKnowledgeSource) { - ks2 = ksCached; - rc = rcCached; - System.out.println("re-using previously generated knowledge base fragment"); - } else { - ks2 = new SparqlKnowledgeSource(); - ks2.setInstances(Datastructures.individualSetToStringSet(examples.getCompleteSet())); - ks2.setUrl(ks.getEndpoint().getURL()); - ks2.setDefaultGraphURIs(new TreeSet<String>(ks.getEndpoint().getDefaultGraphURIs())); - ks2.setUseLits(false); - ks2.setUseCacheDatabase(true); - ks2.setCacheDir(cacheDir); - ks2.setRecursionDepth(2); - ks2.setCloseAfterRecursion(true); - ks2.setDissolveBlankNodes(false); - ks2.setSaveExtractedFragment(true); - startTime = System.currentTimeMillis(); - System.out.print("getting knowledge base fragment ... "); - ks2.init(); - runTime = System.currentTimeMillis() - startTime; - System.out.println("done in " + runTime + " ms"); - rc = new FastInstanceChecker(ks2); - rc.init(); - ksCached = ks2; - rcCached = rc; - }*/ - ClassLearningProblem lp = new ClassLearningProblem(rc); lp.setClassToDescribe(nc); lp.setEquivalence(equivalence); @@ -572,7 +585,7 @@ la.setNoisePercentage(25); la.setMaxNrOfResults(100); la.init(); - ((RhoDRDown)la.getOperator()).setUseNegation(false); +// ((RhoDRDown)la.getOperator()).setUseNegation(false); startTime = System.currentTimeMillis(); System.out.print("running CELOE (for " + (equivalence ? "equivalent classes" : "sub classes") + ") ... "); la.start(); @@ -1005,6 +1018,34 @@ public List<AlgorithmRun> getAlgorithmRuns() { return algorithmRuns; } + + /** + * @param processClasses the processClasses to set + */ + public void setProcessClasses(boolean processClasses) { + this.processClasses = processClasses; + } + + /** + * @param processDataProperties the processDataProperties to set + */ + public void setProcessDataProperties(boolean processDataProperties) { + this.processDataProperties = processDataProperties; + } + + /** + * @param processObjectProperties the processObjectProperties to set + */ + public void setProcessObjectProperties(boolean processObjectProperties) { + this.processObjectProperties = processObjectProperties; + } + + /** + * @param processPropertiesTypeInferred the processPropertiesTypeInferred to set + */ + public void setProcessPropertiesTypeInferred(boolean processPropertiesTypeInferred) { + this.processPropertiesTypeInferred = processPropertiesTypeInferred; + } public static void main(String[] args) throws IOException, ComponentInitException, IllegalArgumentException, SecurityException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException, LearningProblemUnsupportedException { @@ -1039,6 +1080,8 @@ .ofType(Integer.class).defaultsTo(10); parser.acceptsAll(asList("i", "inference"), "Specifies whether to use inference. If yes, the schema will be loaded into a reasoner and used for computing the scores.").withOptionalArg().ofType(Boolean.class).defaultsTo(true); + parser.acceptsAll(asList("iterative"), + "Specifies whether to use local fragments or single query mode.").withOptionalArg().ofType(Boolean.class).defaultsTo(false); parser.acceptsAll(asList("s", "serialize"), "Specify a file where the ontology with all axioms can be written.") .withRequiredArg().ofType(File.class); parser.acceptsAll(asList("a", "annotations"), @@ -1052,7 +1095,14 @@ OptionSpec<String> allowedNamespacesOption = parser.accepts( "ns" ).withRequiredArg().ofType( String.class ) .withValuesSeparatedBy( ',' ); + parser.acceptsAll(asList("op"), + "Specifies whether to compute axiom for object properties.").withOptionalArg().ofType(Boolean.class).defaultsTo(true); + parser.acceptsAll(asList("dp"), + "Specifies whether to compute axiom for data properties.").withOptionalArg().ofType(Boolean.class).defaultsTo(true); + parser.acceptsAll(asList("cls"), + "Specifies whether compute axiom for classes.").withOptionalArg().ofType(Boolean.class).defaultsTo(true); + //username and password if endpoint is protected parser.acceptsAll(asList("u", "username"), "Specify the username.") .withOptionalArg().ofType(String.class); @@ -1150,6 +1200,7 @@ } boolean useInference = (Boolean) options.valueOf("i"); + boolean iterativeMode = (Boolean) options.valueOf("iterative"); // boolean verbose = (Boolean) options.valueOf("v"); double threshold = (Double) options.valueOf("t"); int maxNrOfResults = (Integer) options.valueOf("l"); @@ -1173,8 +1224,17 @@ //extract namespaces to which the analyzed entities will be restricted List<String> allowedNamespaces = options.valuesOf(allowedNamespacesOption); + //check which entity types we have to process + boolean processObjectProperties = (Boolean) options.valueOf("op"); + boolean processDataProperties = (Boolean) options.valueOf("dp"); + boolean processClasses = (Boolean) options.valueOf("cls"); + Enrichment e = new Enrichment(se, resource, threshold, maxNrOfResults, useInference, false, chunksize, maxExecutionTimeInSeconds, omitExistingAxioms); e.setAllowedNamespaces(allowedNamespaces); + e.setIterativeMode(iterativeMode); + e.setProcessObjectProperties(processObjectProperties); + e.setProcessDataProperties(processDataProperties); + e.setProcessClasses(processClasses); e.start(); SparqlEndpointKS ks = new SparqlEndpointKS(se); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2013-07-11 12:23:39
|
Revision: 4016 http://sourceforge.net/p/dl-learner/code/4016 Author: lorenz_b Date: 2013-07-11 12:23:36 +0000 (Thu, 11 Jul 2013) Log Message: ----------- Some work on ISLE. Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/NLPHeuristic.java trunk/components-core/src/main/java/org/dllearner/kb/sparql/QueryEngineHTTP.java trunk/components-core/src/main/java/org/dllearner/utilities/owl/OWLAPIConverter.java trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/NLPHeuristic.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/NLPHeuristic.java 2013-07-08 13:51:31 UTC (rev 4015) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/NLPHeuristic.java 2013-07-11 12:23:36 UTC (rev 4016) @@ -20,14 +20,20 @@ package org.dllearner.algorithms.isle; import java.util.Comparator; +import java.util.HashSet; import java.util.Map; +import java.util.Set; import org.dllearner.algorithms.celoe.OENode; import org.dllearner.core.Component; import org.dllearner.core.ComponentInitException; import org.dllearner.core.config.ConfigOption; +import org.dllearner.core.owl.Description; import org.dllearner.core.owl.Entity; import org.dllearner.utilities.owl.ConceptComparator; +import org.dllearner.utilities.owl.OWLAPIConverter; +import org.semanticweb.owlapi.model.OWLClassExpression; +import org.semanticweb.owlapi.model.OWLEntity; /** * @@ -51,6 +57,8 @@ @ConfigOption(name = "startNodeBonus", defaultValue="0.1") private double startNodeBonus = 0.1; + private double nlpBonusFactor = 0.0001; + private Map<Entity, Double> entityRelevance; public NLPHeuristic() {} @@ -97,6 +105,21 @@ score -= node.getHorizontalExpansion() * expansionPenaltyFactor; // penalty for having many child nodes (stuck prevention) score -= node.getRefinementCount() * nodeRefinementPenalty; + + + //the NLP based scoring + Description expression = node.getExpression(); + OWLClassExpression owlapiDescription = OWLAPIConverter.getOWLAPIDescription(expression); + Set<Entity> entities = OWLAPIConverter.getEntities(owlapiDescription.getSignature()); + double sum = 0; + for (Entity entity : entities) { + double relevance = entityRelevance.containsKey(entity) ? entityRelevance.get(entity) : 0; + if(!Double.isInfinite(relevance)){ + sum += relevance; + } + } + score += nlpBonusFactor * sum; + return score; } Modified: trunk/components-core/src/main/java/org/dllearner/kb/sparql/QueryEngineHTTP.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/kb/sparql/QueryEngineHTTP.java 2013-07-08 13:51:31 UTC (rev 4015) +++ trunk/components-core/src/main/java/org/dllearner/kb/sparql/QueryEngineHTTP.java 2013-07-11 12:23:36 UTC (rev 4016) @@ -268,7 +268,7 @@ private Model execModel(Model model) { HttpQuery httpQuery = makeHttpQuery() ; - httpQuery.setAccept(WebContent.contentTypeTurtleAlt1) ; + httpQuery.setAccept(WebContent.contentTypeNTriplesAlt) ; InputStream in = httpQuery.exec() ; //Don't assume the endpoint actually gives back the content type we asked for @@ -284,7 +284,7 @@ //Try to select language appropriately here based on the model content type Lang lang = WebContent.contentTypeToLang(actualContentType); if (! RDFLanguages.isTriples(lang)) throw new QueryException("Endpoint returned Content Type: " + actualContentType + " which is not a valid RDF Graph syntax"); - model.read(in, null, "TURTLE") ; + model.read(in, null, Lang.NTRIPLES.getName()) ; return model ; } Modified: trunk/components-core/src/main/java/org/dllearner/utilities/owl/OWLAPIConverter.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/utilities/owl/OWLAPIConverter.java 2013-07-08 13:51:31 UTC (rev 4015) +++ trunk/components-core/src/main/java/org/dllearner/utilities/owl/OWLAPIConverter.java 2013-07-11 12:23:36 UTC (rev 4016) @@ -19,6 +19,7 @@ package org.dllearner.utilities.owl; +import java.util.HashSet; import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; @@ -152,6 +153,22 @@ throw new Error("OWL API entity conversion for " + entity + " not supported."); } + public static Set<Entity> getEntities(Set<OWLEntity> owlEntities) { + Set<Entity> entities = new HashSet<Entity>(); + for (OWLEntity entity : owlEntities) { + if(entity instanceof OWLObjectProperty) { + entities.add(convertObjectProperty((OWLObjectProperty) entity)); + } else if(entity instanceof OWLDataProperty) { + entities.add(convertDatatypeProperty((OWLDataProperty) entity)); + } else if(entity instanceof OWLClass) { + entities.add(new NamedClass(entity.toStringID())); + } else if(entity instanceof OWLNamedIndividual) { + entities.add(convertIndividual((OWLNamedIndividual) entity)); + } + } + return entities; + } + public static Description convertClass(OWLClass owlClass) { if(owlClass.isOWLThing()) { return Thing.instance; Modified: trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java =================================================================== --- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java 2013-07-08 13:51:31 UTC (rev 4015) +++ trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java 2013-07-11 12:23:36 UTC (rev 4016) @@ -69,7 +69,7 @@ } @Test - public void testISLE() throws ComponentInitException { + public void testISLE() throws Exception { KnowledgeSource ks = new OWLAPIOntology(ontology); AbstractReasonerComponent reasoner = new FastInstanceChecker(ks); reasoner.init(); @@ -78,7 +78,11 @@ lp.setClassToDescribe(cls); lp.init(); + Map<Entity, Double> entityRelevance = relevance.getEntityRelevance(cls); + NLPHeuristic heuristic = new NLPHeuristic(entityRelevance); + ISLE isle = new ISLE(lp, reasoner); + isle.setHeuristic(heuristic); isle.init(); isle.start(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2013-07-08 13:51:34
|
Revision: 4015 http://sourceforge.net/p/dl-learner/code/4015 Author: lorenz_b Date: 2013-07-08 13:51:31 +0000 (Mon, 08 Jul 2013) Log Message: ----------- First refactoring of ISLE algorithm. Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/celoe/OEHeuristicRuntime.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/ISLE.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneSearcher.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/NLPHeuristic.java trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java trunk/components-core/src/main/java/org/dllearner/utilities/owl/OWLAPIConverter.java Added Paths: ----------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LabelEntityTextRetriever.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneBasedRelevance.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/OWLOntologyLuceneIndex.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/PMILuceneBasedRelevance.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/PMIRelevance.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/PMIRelevances.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/Relevance.java trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java Removed Paths: ------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/Relevances.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/celoe/OEHeuristicRuntime.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/celoe/OEHeuristicRuntime.java 2013-07-08 13:49:40 UTC (rev 4014) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/celoe/OEHeuristicRuntime.java 2013-07-08 13:51:31 UTC (rev 4015) @@ -25,7 +25,6 @@ import org.dllearner.core.ComponentAnn; import org.dllearner.core.ComponentInitException; import org.dllearner.core.config.ConfigOption; -import org.dllearner.core.config.DoubleEditor; import org.dllearner.utilities.owl.ConceptComparator; /** Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/ISLE.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/ISLE.java 2013-07-08 13:49:40 UTC (rev 4014) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/ISLE.java 2013-07-08 13:51:31 UTC (rev 4015) @@ -19,6 +19,7 @@ package org.dllearner.algorithms.isle; +import java.io.File; import java.text.DecimalFormat; import java.util.Collection; import java.util.Iterator; @@ -35,11 +36,10 @@ import org.dllearner.core.AbstractCELA; import org.dllearner.core.AbstractLearningProblem; import org.dllearner.core.AbstractReasonerComponent; +import org.dllearner.core.ComponentAnn; import org.dllearner.core.ComponentInitException; import org.dllearner.core.EvaluatedDescription; -import org.dllearner.core.options.BooleanConfigOption; -import org.dllearner.core.options.CommonConfigOptions; -import org.dllearner.core.options.ConfigOption; +import org.dllearner.core.config.ConfigOption; import org.dllearner.core.owl.ClassHierarchy; import org.dllearner.core.owl.Description; import org.dllearner.core.owl.Individual; @@ -51,16 +51,20 @@ import org.dllearner.learningproblems.PosNegLP; import org.dllearner.learningproblems.PosNegLPStandard; import org.dllearner.learningproblems.PosOnlyLP; +import org.dllearner.refinementoperators.CustomHierarchyRefinementOperator; +import org.dllearner.refinementoperators.CustomStartRefinementOperator; import org.dllearner.refinementoperators.LengthLimitedRefinementOperator; import org.dllearner.refinementoperators.OperatorInverter; -import org.dllearner.refinementoperators.RefinementOperator; +import org.dllearner.refinementoperators.ReasoningBasedRefinementOperator; import org.dllearner.refinementoperators.RhoDRDown; +import org.dllearner.utilities.Files; import org.dllearner.utilities.Helper; import org.dllearner.utilities.owl.ConceptComparator; import org.dllearner.utilities.owl.ConceptTransformation; import org.dllearner.utilities.owl.DescriptionMinimizer; import org.dllearner.utilities.owl.EvaluatedDescriptionSet; import org.dllearner.utilities.owl.PropertyContext; +import org.springframework.beans.factory.annotation.Autowired; import com.jamonapi.Monitor; import com.jamonapi.MonitorFactory; @@ -71,9 +75,11 @@ * @author Jens Lehmann * */ +@ComponentAnn(name="ISLE", shortName="isle", version=0.5, description="CELOE is an adapted and extended version of the OCEL algorithm applied for the ontology engineering use case. See http://jens-lehmann.org/files/2011/celoe.pdf for reference.") public class ISLE extends AbstractCELA { private static Logger logger = Logger.getLogger(CELOE.class); +// private CELOEConfigurator configurator; private boolean isRunning = false; private boolean stop = false; @@ -83,13 +89,17 @@ private LengthLimitedRefinementOperator operator; private DescriptionMinimizer minimizer; + @ConfigOption(name="useMinimizer", defaultValue="true", description="Specifies whether returned expressions should be minimised by removing those parts, which are not needed. (Basically the minimiser tries to find the shortest expression which is equivalent to the learned expression). Turning this feature off may improve performance.") + private boolean useMinimizer = true; // all nodes in the search tree (used for selecting most promising node) private TreeSet<OENode> nodes; +// private OEHeuristicRuntime heuristic; // = new OEHeuristicRuntime(); private NLPHeuristic heuristic = new NLPHeuristic(); // root of search tree private OENode startNode; // the class with which we start the refinement process + @ConfigOption(name = "startClass", defaultValue="owl:Thing", description="You can specify a start class for the algorithm. To do this, you have to use Manchester OWL syntax without using prefixes.") private Description startClass; // all descriptions in the search tree plus those which were too weak (for fast redundancy check) @@ -99,6 +109,7 @@ // if true, then each solution is evaluated exactly instead of approximately // private boolean exactBestDescriptionEvaluation = false; + @ConfigOption(name = "singleSuggestionMode", defaultValue="false", description="Use this if you are interested in only one suggestion and your learning problem has many (more than 1000) examples.") private boolean singleSuggestionMode; private Description bestDescription; private double bestAccuracy = Double.MIN_VALUE; @@ -115,11 +126,16 @@ private long nanoStartTime; - // important parameters + // important parameters (non-config options but internal) private double noise; - private double maxDepth; - private boolean filterFollowsFromKB; + + private boolean filterFollowsFromKB; + // less important parameters + // forces that one solution cannot be subexpression of another expression; this option is useful to get diversity + // but it can also suppress quite useful expressions + private boolean forceMutualDifference = false; + // utility variables private String baseURI; private Map<String, String> prefixes; @@ -130,80 +146,165 @@ private int expressionTests = 0; private int minHorizExp = 0; private int maxHorizExp = 0; + + // TODO: turn those into config options + + // important: do not initialise those with empty sets + // null = no settings for allowance / ignorance + // empty set = allow / ignore nothing (it is often not desired to allow no class!) + Set<NamedClass> allowedConcepts = null; + Set<NamedClass> ignoredConcepts = null; - private double noisePercentage = 0.0; + @ConfigOption(name = "writeSearchTree", defaultValue="false", description="specifies whether to write a search tree") + private boolean writeSearchTree = false; + @ConfigOption(name = "searchTreeFile", defaultValue="log/searchTree.txt", description="file to use for the search tree") + private String searchTreeFile = "log/searchTree.txt"; + + @ConfigOption(name = "replaceSearchTree", defaultValue="false", description="specifies whether to replace the search tree in the log file after each run or append the new search tree") + private boolean replaceSearchTree = false; + + @ConfigOption(name = "maxNrOfResults", defaultValue="10", description="Sets the maximum number of results one is interested in. (Setting this to a lower value may increase performance as the learning algorithm has to store/evaluate/beautify less descriptions).") private int maxNrOfResults = 10; - private boolean filterDescriptionsFollowingFromKB = true; + @ConfigOption(name = "noisePercentage", defaultValue="0.0", description="the (approximated) percentage of noise within the examples") + private double noisePercentage = 0.0; - private long maxExecutionTimeInSeconds = 10; + @ConfigOption(name = "filterDescriptionsFollowingFromKB", defaultValue="false", description="If true, then the results will not contain suggestions, which already follow logically from the knowledge base. Be careful, since this requires a potentially expensive consistency check for candidate solutions.") + private boolean filterDescriptionsFollowingFromKB = false; + @ConfigOption(name = "reuseExistingDescription", defaultValue="false", description="If true, the algorithm tries to find a good starting point close to an existing definition/super class of the given class in the knowledge base.") private boolean reuseExistingDescription = false; + + @ConfigOption(name = "maxClassExpressionTests", defaultValue="0", description="The maximum number of candidate hypothesis the algorithm is allowed to test (0 = no limit). The algorithm will stop afterwards. (The real number of tests can be slightly higher, because this criterion usually won't be checked after each single test.)") + private int maxClassExpressionTests = 0; + + @ConfigOption(name = "maxClassExpressionTestsAfterImprovement", defaultValue="0", description = "The maximum number of candidate hypothesis the algorithm is allowed after an improvement in accuracy (0 = no limit). The algorithm will stop afterwards. (The real number of tests can be slightly higher, because this criterion usually won't be checked after each single test.)") + private int maxClassExpressionTestsAfterImprovement = 0; + @ConfigOption(defaultValue = "10", name = "maxExecutionTimeInSeconds", description = "maximum execution of the algorithm in seconds") + private int maxExecutionTimeInSeconds = 10; + + @ConfigOption(defaultValue = "0", name = "maxExecutionTimeInSecondsAfterImprovement", description = "maximum execution of the algorithm in seconds") + private int maxExecutionTimeInSecondsAfterImprovement = 0; + + @ConfigOption(name = "terminateOnNoiseReached", defaultValue="false", description="specifies whether to terminate when noise criterion is met") + private boolean terminateOnNoiseReached = false; + + @ConfigOption(name = "maxDepth", defaultValue="7", description="maximum depth of description") + private double maxDepth = 7; + + @ConfigOption(name = "stopOnFirstDefinition", defaultValue="false", description="algorithm will terminate immediately when a correct definition is found") + private boolean stopOnFirstDefinition = false; + + private int expressionTestCountLastImprovement; + + + @SuppressWarnings("unused") + private long timeLastImprovement = 0; + +// public CELOEConfigurator getConfigurator() { +// return configurator; +// } + + public ISLE() { + + } + public ISLE(AbstractLearningProblem problem, AbstractReasonerComponent reasoner) { super(problem, reasoner); +// configurator = new CELOEConfigurator(this); } public static Collection<Class<? extends AbstractLearningProblem>> supportedLearningProblems() { Collection<Class<? extends AbstractLearningProblem>> problems = new LinkedList<Class<? extends AbstractLearningProblem>>(); problems.add(AbstractLearningProblem.class); return problems; - } - - public static Collection<ConfigOption<?>> createConfigOptions() { - Collection<ConfigOption<?>> options = new LinkedList<ConfigOption<?>>(); - options.add(CommonConfigOptions.useAllConstructor()); - options.add(CommonConfigOptions.useExistsConstructor()); - options.add(CommonConfigOptions.useHasValueConstructor()); - options.add(CommonConfigOptions.useDataHasValueConstructor()); - options.add(CommonConfigOptions.valueFreqencyThreshold()); - options.add(CommonConfigOptions.useCardinalityRestrictions()); - options.add(CommonConfigOptions.cardinalityLimit()); - // by default, we do not use negation (should be configurable in GUI) - options.add(CommonConfigOptions.useNegation(false)); - options.add(CommonConfigOptions.useBooleanDatatypes()); - options.add(CommonConfigOptions.useDoubleDatatypes()); - options.add(CommonConfigOptions.maxExecutionTimeInSeconds(10)); - options.add(CommonConfigOptions.getNoisePercentage()); - options.add(CommonConfigOptions.getMaxDepth(7)); - options.add(CommonConfigOptions.maxNrOfResults(10)); - options.add(new BooleanConfigOption("singleSuggestionMode", "Use this if you are interested in only one suggestion and your learning problem has many (more than 1000) examples.", false)); - options.add(CommonConfigOptions.getInstanceBasedDisjoints()); - options.add(new BooleanConfigOption("filterDescriptionsFollowingFromKB", "If true, then the results will not contain suggestions, which already follow logically from the knowledge base. Be careful, since this requires a potentially expensive consistency check for candidate solutions.", false)); - options.add(new BooleanConfigOption("reuseExistingDescription", "If true, the algorithm tries to find a good starting point close to an existing definition/super class of the given class in the knowledge base.", false)); - return options; } public static String getName() { - return "ISLE"; + return "CELOE"; } @Override public void init() throws ComponentInitException { + + if(maxExecutionTimeInSeconds != 0 && maxExecutionTimeInSecondsAfterImprovement != 0) { + maxExecutionTimeInSeconds = Math.min(maxExecutionTimeInSeconds, maxExecutionTimeInSecondsAfterImprovement); + } + + // compute used concepts/roles from allowed/ignored + // concepts/roles + Set<NamedClass> usedConcepts; +// Set<NamedClass> allowedConcepts = configurator.getAllowedConcepts()==null ? null : CommonConfigMappings.getAtomicConceptSet(configurator.getAllowedConcepts()); +// Set<NamedClass> ignoredConcepts = configurator.getIgnoredConcepts()==null ? null : CommonConfigMappings.getAtomicConceptSet(configurator.getIgnoredConcepts()); + if(allowedConcepts != null) { + // sanity check to control if no non-existing concepts are in the list + Helper.checkConcepts(reasoner, allowedConcepts); + usedConcepts = allowedConcepts; + } else if(ignoredConcepts != null) { + usedConcepts = Helper.computeConceptsUsingIgnoreList(reasoner, ignoredConcepts); + } else { + usedConcepts = Helper.computeConcepts(reasoner); + } + // copy class hierarchy and modify it such that each class is only // reachable via a single path - ClassHierarchy classHierarchy = reasoner.getClassHierarchy().clone(); +// ClassHierarchy classHierarchy = reasoner.getClassHierarchy().clone(); + ClassHierarchy classHierarchy = reasoner.getClassHierarchy().cloneAndRestrict(usedConcepts); classHierarchy.thinOutSubsumptionHierarchy(); + + // if no one injected a heuristic, we use a default one + if(heuristic == null) { + heuristic = new NLPHeuristic(); + } minimizer = new DescriptionMinimizer(reasoner); - startClass = Thing.instance; + // start at owl:Thing by default + if(startClass == null) { + startClass = Thing.instance; + } // singleSuggestionMode = configurator.getSingleSuggestionMode(); - + /* // create refinement operator -// operator = new RhoDRDown(reasoner, classHierarchy, startClass, configurator); - // create refinement operator if(operator == null) { operator = new RhoDRDown(); ((RhoDRDown)operator).setStartClass(startClass); - ((RhoDRDown)operator).setSubHierarchy(classHierarchy); ((RhoDRDown)operator).setReasoner(reasoner); - ((RhoDRDown)operator).init(); - } + } + ((RhoDRDown)operator).setSubHierarchy(classHierarchy); + ((RhoDRDown)operator).setObjectPropertyHierarchy(reasoner.getObjectPropertyHierarchy()); + ((RhoDRDown)operator).setDataPropertyHierarchy(reasoner.getDatatypePropertyHierarchy()); + ((RhoDRDown)operator).init(); + */ + // create a refinement operator and pass all configuration + // variables to it + if(operator == null) { + // we use a default operator and inject the class hierarchy for now + operator = new RhoDRDown(); + if(operator instanceof CustomStartRefinementOperator) { + ((CustomStartRefinementOperator)operator).setStartClass(startClass); + } + if(operator instanceof ReasoningBasedRefinementOperator) { + ((ReasoningBasedRefinementOperator)operator).setReasoner(reasoner); + } + operator.init(); + } + if(operator instanceof CustomHierarchyRefinementOperator) { + ((CustomHierarchyRefinementOperator)operator).setClassHierarchy(classHierarchy); + ((CustomHierarchyRefinementOperator)operator).setObjectPropertyHierarchy(reasoner.getObjectPropertyHierarchy()); + ((CustomHierarchyRefinementOperator)operator).setDataPropertyHierarchy(reasoner.getDatatypePropertyHierarchy()); + } + +// operator = new RhoDRDown(reasoner, classHierarchy, startClass, configurator); baseURI = reasoner.getBaseURI(); prefixes = reasoner.getPrefixes(); + if(writeSearchTree) { + File f = new File(searchTreeFile ); + Files.clearFile(f); + } bestEvaluatedDescriptions = new EvaluatedDescriptionSet(maxNrOfResults); @@ -211,12 +312,18 @@ // we put important parameters in class variables noise = noisePercentage/100d; +// System.out.println("noise " + noise); // maxDepth = configurator.getMaxDepth(); // (filterFollowsFromKB is automatically set to false if the problem // is not a class learning problem - filterFollowsFromKB = filterDescriptionsFollowingFromKB - && isClassLearningProblem; + filterFollowsFromKB = filterDescriptionsFollowingFromKB && isClassLearningProblem; +// Set<Description> concepts = operator.refine(Thing.instance, 5); +// for(Description concept : concepts) { +// System.out.println(concept); +// } +// System.out.println("refinements of thing: " + concepts.size()); + // actions specific to ontology engineering if(isClassLearningProblem) { ClassLearningProblem problem = (ClassLearningProblem) learningProblem; @@ -230,7 +337,7 @@ // superfluous to add super classes in this case) if(isEquivalenceProblem) { Set<Description> existingDefinitions = reasoner.getAssertedDefinitions(classToDescribe); - if(reuseExistingDescription && (existingDefinitions.size() > 0)) { + if(reuseExistingDescription && (existingDefinitions.size() > 0)) { // the existing definition is reused, which in the simplest case means to // use it as a start class or, if it is already too specific, generalise it @@ -246,7 +353,10 @@ LinkedList<Description> startClassCandidates = new LinkedList<Description>(); startClassCandidates.add(existingDefinition); - ((RhoDRDown)operator).setDropDisjuncts(true); + // hack for RhoDRDown + if(operator instanceof RhoDRDown) { + ((RhoDRDown)operator).setDropDisjuncts(true); + } LengthLimitedRefinementOperator upwardOperator = (LengthLimitedRefinementOperator) new OperatorInverter(operator); // use upward refinement until we find an appropriate start class @@ -279,7 +389,9 @@ // System.out.println("existing def: " + existingDefinition); // System.out.println(reasoner.getIndividuals(existingDefinition)); - ((RhoDRDown)operator).setDropDisjuncts(false); + if(operator instanceof RhoDRDown) { + ((RhoDRDown)operator).setDropDisjuncts(false); + } } else { Set<Description> superClasses = reasoner.getClassHierarchy().getSuperClasses(classToDescribe); @@ -322,6 +434,10 @@ return bestEvaluatedDescriptions.getSet(); } + public double getCurrentlyBestAccuracy() { + return bestEvaluatedDescriptions.getBest().getAccuracy(); + } + @Override public void start() { // System.out.println(configurator.getMaxExecutionTimeInSeconds()); @@ -339,10 +455,13 @@ int loop = 0; while (!terminationCriteriaSatisfied()) { +// System.out.println("loop " + loop); if(!singleSuggestionMode && bestEvaluatedDescriptions.getBestAccuracy() > highestAccuracy) { highestAccuracy = bestEvaluatedDescriptions.getBestAccuracy(); - logger.info("more accurate (" + dfPercent.format(highestAccuracy) + ") class expression found: " + descriptionToString(bestEvaluatedDescriptions.getBest().getDescription())); + expressionTestCountLastImprovement = expressionTests; + timeLastImprovement = System.nanoTime(); + logger.info("more accurate (" + dfPercent.format(highestAccuracy) + ") class expression found: " + descriptionToString(bestEvaluatedDescriptions.getBest().getDescription())); } // chose best node according to heuristics @@ -358,12 +477,16 @@ // for(Description refinement : refinements) { // System.out.println("refinement: " + refinement); // } +// if((loop+1) % 500 == 0) { +// System.out.println(getMinimumHorizontalExpansion() + " - " + getMaximumHorizontalExpansion()); +// System.exit(0); +// } while(refinements.size() != 0) { // pick element from set Description refinement = refinements.pollFirst(); int length = refinement.getLength(); - + // we ignore all refinements with lower length and too high depth // (this also avoids duplicate node children) if(length > horizExp && refinement.getDepth() <= maxDepth) { @@ -385,6 +508,24 @@ updateMinMaxHorizExp(nextNode); + // writing the search tree (if configured) + if (writeSearchTree) { + String treeString = "best node: " + bestEvaluatedDescriptions.getBest() + "\n"; + if (refinements.size() > 1) { + treeString += "all expanded nodes:\n"; + for (Description n : refinements) { + treeString += " " + n + "\n"; + } + } + treeString += startNode.toTreeString(baseURI); + treeString += "\n"; + + if (replaceSearchTree) + Files.createFile(new File(searchTreeFile), treeString); + else + Files.appendToFile(new File(searchTreeFile), treeString); + } + // System.out.println(loop); loop++; } @@ -392,7 +533,7 @@ if (stop) { logger.info("Algorithm stopped ("+expressionTests+" descriptions tested). " + nodes.size() + " nodes in the search tree.\n"); } else { - logger.info("Algorithm terminated successfully ("+expressionTests+" descriptions tested). " + nodes.size() + " nodes in the search tree.\n"); + logger.info("Algorithm terminated successfully (time: " + Helper.prettyPrintNanoSeconds(System.nanoTime()-nanoStartTime) + ", "+expressionTests+" descriptions tested, " + nodes.size() + " nodes in the search tree).\n"); logger.info(reasoner.toString()); } @@ -445,7 +586,7 @@ // returns true if node was added and false otherwise private boolean addNode(Description description, OENode parentNode) { -// System.out.println(description); +// System.out.println("d: " + description); // redundancy check (return if redundant) boolean nonRedundant = descriptions.add(description); @@ -498,6 +639,8 @@ return true; } +// System.out.println("description " + description + " accuracy " + accuracy); + // maybe add to best descriptions (method keeps set size fixed); // we need to make sure that this does not get called more often than // necessary since rewriting is expensive @@ -510,30 +653,42 @@ (accuracy >= accThreshold && description.getLength() < worst.getDescriptionLength())); } +// System.out.println(isCandidate); + // System.out.println("Test4 " + new Date()); if(isCandidate) { + Description niceDescription = rewriteNode(node); ConceptTransformation.transformToOrderedForm(niceDescription, descriptionComparator); // Description niceDescription = node.getDescription(); // another test: none of the other suggested descriptions should be // a subdescription of this one unless accuracy is different + // => comment: on the one hand, this appears to be too strict, because once A is a solution then everything containing + // A is not a candidate; on the other hand this suppresses many meaningless extensions of A boolean shorterDescriptionExists = false; - for(EvaluatedDescription ed : bestEvaluatedDescriptions.getSet()) { - if(Math.abs(ed.getAccuracy()-accuracy) <= 0.00001 && ConceptTransformation.isSubdescription(niceDescription, ed.getDescription())) { - shorterDescriptionExists = true; - break; - } + if(forceMutualDifference) { + for(EvaluatedDescription ed : bestEvaluatedDescriptions.getSet()) { + if(Math.abs(ed.getAccuracy()-accuracy) <= 0.00001 && ConceptTransformation.isSubdescription(niceDescription, ed.getDescription())) { +// System.out.println("shorter: " + ed.getDescription()); + shorterDescriptionExists = true; + break; + } + } } +// System.out.println("shorter description? " + shorterDescriptionExists + " nice: " + niceDescription); + if(!shorterDescriptionExists) { if(!filterFollowsFromKB || !((ClassLearningProblem)learningProblem).followsFromKB(niceDescription)) { +// System.out.println("Test2"); bestEvaluatedDescriptions.add(niceDescription, accuracy, learningProblem); // System.out.println("acc: " + accuracy); // System.out.println(bestEvaluatedDescriptions); } } +// System.out.println(bestEvaluatedDescriptions.getSet().size()); } // System.out.println("Test5 " + new Date()); @@ -630,14 +785,26 @@ private Description rewriteNode(OENode node) { Description description = node.getDescription(); // minimize description (expensive!) - also performes some human friendly rewrites - Description niceDescription = minimizer.minimizeClone(description); + Description niceDescription; + if(useMinimizer) { + niceDescription = minimizer.minimizeClone(description); + } else { + niceDescription = description; + } // replace \exists r.\top with \exists r.range(r) which is easier to read for humans ConceptTransformation.replaceRange(niceDescription, reasoner); return niceDescription; } private boolean terminationCriteriaSatisfied() { - return stop || ((System.nanoTime() - nanoStartTime) >= (maxExecutionTimeInSeconds*1000000000l)); + return + stop || + (maxClassExpressionTestsAfterImprovement != 0 && (expressionTests - expressionTestCountLastImprovement >= maxClassExpressionTestsAfterImprovement)) || + (maxClassExpressionTests != 0 && (expressionTests >= maxClassExpressionTests)) || + (maxExecutionTimeInSecondsAfterImprovement != 0 && ((System.nanoTime() - nanoStartTime) >= (maxExecutionTimeInSecondsAfterImprovement*1000000000l))) || + (maxExecutionTimeInSeconds != 0 && ((System.nanoTime() - nanoStartTime) >= (maxExecutionTimeInSeconds*1000000000l))) || + (terminateOnNoiseReached && (100*getCurrentlyBestAccuracy()>=100-noisePercentage)) || + (stopOnFirstDefinition && (getCurrentlyBestAccuracy() >= 1)); } private void reset() { @@ -740,6 +907,196 @@ */ public int getClassExpressionTests() { return expressionTests; + } + + public LengthLimitedRefinementOperator getOperator() { + return operator; + } + + @Autowired(required=false) + public void setOperator(LengthLimitedRefinementOperator operator) { + this.operator = operator; + } + + public Description getStartClass() { + return startClass; + } + + public void setStartClass(Description startClass) { + this.startClass = startClass; + } + + public Set<NamedClass> getAllowedConcepts() { + return allowedConcepts; + } + + public void setAllowedConcepts(Set<NamedClass> allowedConcepts) { + this.allowedConcepts = allowedConcepts; + } + + public Set<NamedClass> getIgnoredConcepts() { + return ignoredConcepts; + } + + public void setIgnoredConcepts(Set<NamedClass> ignoredConcepts) { + this.ignoredConcepts = ignoredConcepts; + } + + public boolean isWriteSearchTree() { + return writeSearchTree; + } + + public void setWriteSearchTree(boolean writeSearchTree) { + this.writeSearchTree = writeSearchTree; + } + + public String getSearchTreeFile() { + return searchTreeFile; + } + + public void setSearchTreeFile(String searchTreeFile) { + this.searchTreeFile = searchTreeFile; + } + + public int getMaxNrOfResults() { + return maxNrOfResults; + } + + public void setMaxNrOfResults(int maxNrOfResults) { + this.maxNrOfResults = maxNrOfResults; + } + + public double getNoisePercentage() { + return noisePercentage; + } + + public void setNoisePercentage(double noisePercentage) { + this.noisePercentage = noisePercentage; + } + + public boolean isFilterDescriptionsFollowingFromKB() { + return filterDescriptionsFollowingFromKB; + } + + public void setFilterDescriptionsFollowingFromKB(boolean filterDescriptionsFollowingFromKB) { + this.filterDescriptionsFollowingFromKB = filterDescriptionsFollowingFromKB; + } + + public boolean isReplaceSearchTree() { + return replaceSearchTree; + } + + public void setReplaceSearchTree(boolean replaceSearchTree) { + this.replaceSearchTree = replaceSearchTree; + } + + public int getMaxClassDescriptionTests() { + return maxClassExpressionTests; + } + + public void setMaxClassDescriptionTests(int maxClassDescriptionTests) { + this.maxClassExpressionTests = maxClassDescriptionTests; + } + + public int getMaxExecutionTimeInSeconds() { + return maxExecutionTimeInSeconds; + } + + public void setMaxExecutionTimeInSeconds(int maxExecutionTimeInSeconds) { + this.maxExecutionTimeInSeconds = maxExecutionTimeInSeconds; + } + + public boolean isTerminateOnNoiseReached() { + return terminateOnNoiseReached; + } + + public void setTerminateOnNoiseReached(boolean terminateOnNoiseReached) { + this.terminateOnNoiseReached = terminateOnNoiseReached; + } + + public boolean isReuseExistingDescription() { + return reuseExistingDescription; + } + + public void setReuseExistingDescription(boolean reuseExistingDescription) { + this.reuseExistingDescription = reuseExistingDescription; + } + + public boolean isUseMinimizer() { + return useMinimizer; + } + + public void setUseMinimizer(boolean useMinimizer) { + this.useMinimizer = useMinimizer; + } + + public NLPHeuristic getHeuristic() { + return heuristic; + } + + @Autowired(required=false) + public void setHeuristic(NLPHeuristic heuristic) { + this.heuristic = heuristic; + } + + public int getMaxClassExpressionTestsWithoutImprovement() { + return maxClassExpressionTestsAfterImprovement; + } + + public void setMaxClassExpressionTestsWithoutImprovement( + int maxClassExpressionTestsWithoutImprovement) { + this.maxClassExpressionTestsAfterImprovement = maxClassExpressionTestsWithoutImprovement; + } + + public int getMaxExecutionTimeInSecondsAfterImprovement() { + return maxExecutionTimeInSecondsAfterImprovement; + } + + public void setMaxExecutionTimeInSecondsAfterImprovement( + int maxExecutionTimeInSecondsAfterImprovement) { + this.maxExecutionTimeInSecondsAfterImprovement = maxExecutionTimeInSecondsAfterImprovement; } + public boolean isSingleSuggestionMode() { + return singleSuggestionMode; + } + + public void setSingleSuggestionMode(boolean singleSuggestionMode) { + this.singleSuggestionMode = singleSuggestionMode; + } + + public int getMaxClassExpressionTests() { + return maxClassExpressionTests; + } + + public void setMaxClassExpressionTests(int maxClassExpressionTests) { + this.maxClassExpressionTests = maxClassExpressionTests; + } + + public int getMaxClassExpressionTestsAfterImprovement() { + return maxClassExpressionTestsAfterImprovement; + } + + public void setMaxClassExpressionTestsAfterImprovement( + int maxClassExpressionTestsAfterImprovement) { + this.maxClassExpressionTestsAfterImprovement = maxClassExpressionTestsAfterImprovement; + } + + public double getMaxDepth() { + return maxDepth; + } + + public void setMaxDepth(double maxDepth) { + this.maxDepth = maxDepth; + } + + + public boolean isStopOnFirstDefinition() { + return stopOnFirstDefinition; + } + + public void setStopOnFirstDefinition(boolean stopOnFirstDefinition) { + this.stopOnFirstDefinition = stopOnFirstDefinition; + } + } Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LabelEntityTextRetriever.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LabelEntityTextRetriever.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LabelEntityTextRetriever.java 2013-07-08 13:51:31 UTC (rev 4015) @@ -0,0 +1,95 @@ +/** + * + */ +package org.dllearner.algorithms.isle; + +import java.util.HashMap; +import java.util.Map; +import java.util.Set; + +import org.dllearner.core.owl.Entity; +import org.dllearner.kb.OWLAPIOntology; +import org.dllearner.utilities.owl.OWLAPIConverter; +import org.semanticweb.owlapi.model.IRI; +import org.semanticweb.owlapi.model.OWLAnnotation; +import org.semanticweb.owlapi.model.OWLAnnotationProperty; +import org.semanticweb.owlapi.model.OWLDataFactory; +import org.semanticweb.owlapi.model.OWLEntity; +import org.semanticweb.owlapi.model.OWLLiteral; +import org.semanticweb.owlapi.model.OWLOntology; +import org.semanticweb.owlapi.model.OWLOntologyManager; +import org.semanticweb.owlapi.util.IRIShortFormProvider; +import org.semanticweb.owlapi.util.SimpleIRIShortFormProvider; +import org.semanticweb.owlapi.vocab.OWLRDFVocabulary; + +import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl; + + +/** + * @author Lorenz Buehmann + * + */ +public class LabelEntityTextRetriever implements EntityTextRetriever{ + + private OWLOntology ontology; + private OWLOntologyManager manager; + private OWLDataFactory df = new OWLDataFactoryImpl(); + + private OWLAnnotationProperty label = df.getOWLAnnotationProperty(OWLRDFVocabulary.RDFS_LABEL.getIRI()); + + private String language = "en"; + private double weight = 1d; + + private boolean useShortFormFallback = true; + private IRIShortFormProvider sfp = new SimpleIRIShortFormProvider(); + + public LabelEntityTextRetriever(OWLOntology ontology) { + this.ontology = ontology; + } + + public LabelEntityTextRetriever(OWLAPIOntology ontology) { + this.ontology = ontology.createOWLOntology(manager); + } + + /** + * @param language the language to set + */ + public void setLanguage(String language) { + this.language = language; + } + + /** + * Whether to use the short form of the IRI as fallback, if no label is given. + * @param useShortFormFallback the useShortFormFallback to set + */ + public void setUseShortFormFallback(boolean useShortFormFallback) { + this.useShortFormFallback = useShortFormFallback; + } + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.EntityTextRetriever#getRelevantText(org.dllearner.core.owl.Entity) + */ + @Override + public Map<String, Double> getRelevantText(Entity entity) { + Map<String, Double> textWithWeight = new HashMap<String, Double>(); + + OWLEntity e = OWLAPIConverter.getOWLAPIEntity(entity); + + Set<OWLAnnotation> annotations = e.getAnnotations(ontology, label); + for (OWLAnnotation annotation : annotations) { + if (annotation.getValue() instanceof OWLLiteral) { + OWLLiteral val = (OWLLiteral) annotation.getValue(); + if (val.hasLang(language)) { + String label = val.getLiteral(); + textWithWeight.put(label, weight); + } + } + } + + if(textWithWeight.isEmpty() && useShortFormFallback){ + textWithWeight.put(sfp.getShortForm(IRI.create(entity.getURI())), weight); + } + + return textWithWeight; + } +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneBasedRelevance.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneBasedRelevance.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneBasedRelevance.java 2013-07-08 13:51:31 UTC (rev 4015) @@ -0,0 +1,145 @@ +/** + * Copyright (C) 2007-2011, Jens Lehmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + + +package org.dllearner.algorithms.isle; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; + +import org.dllearner.core.owl.Entity; +import org.dllearner.utilities.owl.OWLAPIConverter; +import org.semanticweb.owlapi.model.OWLEntity; +import org.semanticweb.owlapi.model.OWLOntology; + + +public abstract class LuceneBasedRelevance implements Relevance{ + + private EntityTextRetriever textRetriever; + private LuceneSearcher searcher; + private OWLOntology ontology; + private Set<OWLEntity> entities; + +// public void printScores() throws Exception { +// for( OWLClass c: m_classes ) +// { +// Map<OWLEntity,Double> hmEntity2Score = getEntityRelevance(c); +// // normalization per class? +// hmEntity2Score = normalize( hmEntity2Score ); +// for( OWLEntity e : hmEntity2Score.keySet() ) +// { +// double dScore = hmEntity2Score.get(e); +// System.out.println( "P( "+ getLabel(c) +", "+ getLabel(e) +" ) = "+ dScore ); +// } +// } +// m_searcher.close(); +// } + + public LuceneBasedRelevance(OWLOntology ontology, LuceneSearcher searcher, EntityTextRetriever textRetriever) { + this.searcher = searcher; + this.ontology = ontology; + this.textRetriever = textRetriever; + + entities = new HashSet<OWLEntity>(); + entities.addAll(ontology.getClassesInSignature()); + entities.addAll(ontology.getObjectPropertiesInSignature()); + entities.addAll(ontology.getDataPropertiesInSignature()); + } + + public Map<OWLEntity,Double> normalizeMinMax( Map<OWLEntity,Double> hmEntity2Score ){ + Map<OWLEntity,Double> hmEntity2Norm = new HashMap<OWLEntity,Double>(); + double dMin = Double.MAX_VALUE; + Double dMax = Double.MIN_VALUE; + for( OWLEntity e : hmEntity2Score.keySet() ) + { + double dValue = hmEntity2Score.get(e); + if( dValue < dMin ){ + dMin = dValue; + } + else if( dValue > dMax ){ + dMax = dValue; + } + } + // System.out.println( "min="+ dMin +" max="+ dMax ); + for( OWLEntity e : hmEntity2Score.keySet() ) + { + double dValue = hmEntity2Score.get(e); + double dNorm = 0; + if( dMin == dMax ){ + dNorm = dValue; + } + else { + dNorm = ( dValue - dMin ) / ( dMax - dMin ); + } + hmEntity2Norm.put( e, dNorm ); + } + return hmEntity2Norm; + } + + @Override + public Map<Entity,Double> getEntityRelevance(Entity entity) throws Exception { + // computes relevance of entity for this class + // conditional probability: P(C,E)=f(C,E)/f(E) + // PMI(C,E)=log( P(C,E) / P(C) ) + Map<Entity, Double> hmEntity2Score = new HashMap<Entity, Double>(); + Map<String, Double> relevantText = textRetriever.getRelevantText(entity); + + for (Entry<String, Double> entry : relevantText.entrySet()) { + String text = entry.getKey(); + Double value = entry.getValue(); + + String sClass = text; + int nrOfDocumentsA = searcher.count(sClass); + int nrOfDocuments = searcher.indexSize(); + + for (OWLEntity otherEntity : entities) { + + Map<String, Double> otherRelevantText = textRetriever.getRelevantText(OWLAPIConverter + .getEntity(otherEntity)); + + for (Entry<String, Double> entry2 : otherRelevantText.entrySet()) { + String otherText = entry2.getKey(); + Double otherValue = entry2.getValue(); + + String sEntity = otherText; + int nrOfDocumentsB = searcher.count(sEntity); + int nrOfDocumentsAB = searcher.count(sClass + " AND " + sEntity); + // double dPEntity = (double)iEntity / (double)iAll; + + double score = computeScore(nrOfDocuments, nrOfDocumentsA, nrOfDocumentsB, nrOfDocumentsAB); + if (!Double.isNaN(score)){// && !Double.isInfinite(score)) { + hmEntity2Score.put(OWLAPIConverter.getEntity(otherEntity), score); + } + } + } + } + + return hmEntity2Score; + } + + /** + * Computes the score which is returned in {@link org.dllearner.algorithms.isle.LuceneBasedRelevance#getEntityRelevance} + * @return + */ + public abstract double computeScore(int nrOfDocuments, int nrOfDocumentsA, int nrOfDocumentsB, int nrOfDocumentsAB); + +} \ No newline at end of file Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneSearcher.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneSearcher.java 2013-07-08 13:49:40 UTC (rev 4014) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneSearcher.java 2013-07-08 13:51:31 UTC (rev 4015) @@ -40,13 +40,14 @@ import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.Scorer; +import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; public class LuceneSearcher { - private String INDEX = "index"; - private String FIELD = "contents"; + private String INDEX = "/home/me/DBpedia-Lucene-Index"; + private String FIELD = "short-abstract"; private IndexReader m_reader = null; private IndexSearcher m_searcher = null; @@ -61,12 +62,12 @@ LuceneSearcher searcher = new LuceneSearcher(); List<Document> docs = searcher.search( sQuery ); System.out.println( "\nquery='"+ sQuery +"' all="+ searcher.indexSize() +" hits="+ docs.size() ); - for( Document doc : docs ) - { -// String sDoc = doc.toString(); - float score = searcher.getScore( doc ); - System.out.println( "score="+ score +" doc="+ doc ); - } +// for( Document doc : docs ) +// { +//// String sDoc = doc.toString(); +// float score = searcher.getScore( doc ); +// System.out.println( "score="+ score +" doc="+ doc ); +// } } @SuppressWarnings("deprecation") @@ -77,6 +78,28 @@ m_parser = new QueryParser( Version.LUCENE_43, FIELD, m_analyzer ); } + public LuceneSearcher(IndexReader indexReader) throws Exception { + m_reader = indexReader; + m_searcher = new IndexSearcher( m_reader ); + m_analyzer = new StandardAnalyzer( Version.LUCENE_43); + m_parser = new QueryParser( Version.LUCENE_43, FIELD, m_analyzer ); + } + + public LuceneSearcher(Directory directory, String seachField) throws Exception { + this.FIELD = seachField; + m_reader = DirectoryReader.open(directory); + m_searcher = new IndexSearcher( m_reader ); + m_analyzer = new StandardAnalyzer( Version.LUCENE_43); + m_parser = new QueryParser( Version.LUCENE_43, FIELD, m_analyzer ); + } + + public LuceneSearcher(String indexDirectory) throws Exception { + m_reader = DirectoryReader.open(FSDirectory.open(new File(indexDirectory))); + m_searcher = new IndexSearcher( m_reader ); + m_analyzer = new StandardAnalyzer( Version.LUCENE_43); + m_parser = new QueryParser( Version.LUCENE_43, FIELD, m_analyzer ); + } + public void close() throws Exception { m_reader.close(); } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/NLPHeuristic.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/NLPHeuristic.java 2013-07-08 13:49:40 UTC (rev 4014) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/NLPHeuristic.java 2013-07-08 13:51:31 UTC (rev 4015) @@ -20,8 +20,13 @@ package org.dllearner.algorithms.isle; import java.util.Comparator; +import java.util.Map; import org.dllearner.algorithms.celoe.OENode; +import org.dllearner.core.Component; +import org.dllearner.core.ComponentInitException; +import org.dllearner.core.config.ConfigOption; +import org.dllearner.core.owl.Entity; import org.dllearner.utilities.owl.ConceptComparator; /** @@ -31,7 +36,8 @@ * @author Jens Lehmann * */ -public class NLPHeuristic implements Comparator<OENode> { +public class NLPHeuristic implements Component, Comparator<OENode>{ + // strong penalty for long descriptions private double expansionPenaltyFactor = 0.1; // bonus for being better than parent node @@ -42,7 +48,23 @@ // syntactic comparison as final comparison criterion private ConceptComparator conceptComparator = new ConceptComparator(); + @ConfigOption(name = "startNodeBonus", defaultValue="0.1") + private double startNodeBonus = 0.1; + + private Map<Entity, Double> entityRelevance; + + public NLPHeuristic() {} + + public NLPHeuristic(Map<Entity,Double> entityRelevance) { + this.entityRelevance = entityRelevance; + } + @Override + public void init() throws ComponentInitException { + + } + + @Override public int compare(OENode node1, OENode node2) { // System.out.println("node1 " + node1); // System.out.println("score: " + getNodeScore(node1)); @@ -67,6 +89,9 @@ if(!node.isRoot()) { double parentAccuracy = node.getParent().getAccuracy(); score += (parentAccuracy - score) * gainBonusFactor; + // the root node also gets a bonus to possibly spawn useful disjunctions + } else { + score += startNodeBonus; } // penalty for horizontal expansion score -= node.getHorizontalExpansion() * expansionPenaltyFactor; @@ -77,6 +102,48 @@ public double getExpansionPenaltyFactor() { return expansionPenaltyFactor; + } + + public double getGainBonusFactor() { + return gainBonusFactor; + } + + public void setGainBonusFactor(double gainBonusFactor) { + this.gainBonusFactor = gainBonusFactor; + } + + public double getNodeRefinementPenalty() { + return nodeRefinementPenalty; + } + + public void setNodeRefinementPenalty(double nodeRefinementPenalty) { + this.nodeRefinementPenalty = nodeRefinementPenalty; + } + + public void setExpansionPenaltyFactor(double expansionPenaltyFactor) { + this.expansionPenaltyFactor = expansionPenaltyFactor; + } + + public double getStartNodeBonus() { + return startNodeBonus; + } + + public void setStartNodeBonus(double startNodeBonus) { + this.startNodeBonus = startNodeBonus; } + + /** + * @param entityRelevance the entityRelevance to set + */ + public void setEntityRelevance(Map<Entity, Double> entityRelevance) { + this.entityRelevance = entityRelevance; + } + + /** + * @return the entityRelevance + */ + public Map<Entity, Double> getEntityRelevance() { + return entityRelevance; + } } Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/OWLOntologyLuceneIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/OWLOntologyLuceneIndex.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/OWLOntologyLuceneIndex.java 2013-07-08 13:51:31 UTC (rev 4015) @@ -0,0 +1,141 @@ +/** + * + */ +package org.dllearner.algorithms.isle; + +import java.io.IOException; +import java.util.HashSet; +import java.util.Set; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.Version; +import org.semanticweb.owlapi.model.IRI; +import org.semanticweb.owlapi.model.OWLAnnotation; +import org.semanticweb.owlapi.model.OWLAnnotationProperty; +import org.semanticweb.owlapi.model.OWLDataFactory; +import org.semanticweb.owlapi.model.OWLEntity; +import org.semanticweb.owlapi.model.OWLLiteral; +import org.semanticweb.owlapi.model.OWLOntology; +import org.semanticweb.owlapi.vocab.OWLRDFVocabulary; + +import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl; + +/** + * @author Lorenz Buehmann + * + */ +public class OWLOntologyLuceneIndex { + + private Directory directory = new RAMDirectory(); + private OWLOntology ontology; + private Set<OWLEntity> schemaEntities; + + private OWLDataFactory df = new OWLDataFactoryImpl(); + private OWLAnnotationProperty annotationProperty = df.getOWLAnnotationProperty(OWLRDFVocabulary.RDFS_LABEL.getIRI()); + private String language = "en"; + private String searchField; + + public OWLOntologyLuceneIndex(OWLOntology ontology, String searchField) throws IOException { + this.ontology = ontology; + this.searchField = searchField; + + schemaEntities = new HashSet<OWLEntity>(); + schemaEntities.addAll(ontology.getClassesInSignature()); + schemaEntities.addAll(ontology.getObjectPropertiesInSignature()); + schemaEntities.addAll(ontology.getDataPropertiesInSignature()); + + buildIndex(); + } + + public OWLOntologyLuceneIndex(OWLOntology ontology, OWLAnnotationProperty annotationProperty) throws IOException { + this.ontology = ontology; + this.annotationProperty = annotationProperty; + + schemaEntities = new HashSet<OWLEntity>(); + schemaEntities.addAll(ontology.getClassesInSignature()); + schemaEntities.addAll(ontology.getObjectPropertiesInSignature()); + schemaEntities.addAll(ontology.getDataPropertiesInSignature()); + + buildIndex(); + } + + /** + * @return the ontology + */ + public OWLOntology getOntology() { + return ontology; + } + + /** + * @return the directory + */ + public Directory getDirectory() { + return directory; + } + + /** + * @param annotationProperty the annotationProperty to set + */ + public void setAnnotationProperty(OWLAnnotationProperty annotationProperty) { + this.annotationProperty = annotationProperty; + } + + /** + * @param annotationProperty the annotationProperty to set + */ + public void setAnnotationProperty(String annotationPropertyIRI) { + this.annotationProperty = df.getOWLAnnotationProperty(IRI.create(annotationPropertyIRI)); + } + + public void buildIndex() throws IOException{ + Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43); + IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_43, analyzer); + IndexWriter writer = new IndexWriter(directory, indexWriterConfig); + System.out.println( "Creating index ..." ); + + Set<Document> luceneDocuments = new HashSet<Document>(); + FieldType stringType = new FieldType(StringField.TYPE_STORED); + stringType.setStoreTermVectors(false); + FieldType textType = new FieldType(TextField.TYPE_STORED); + textType.setStoreTermVectors(false); + + for (OWLEntity entity : schemaEntities) { + String label = null; + Set<OWLAnnotation> annotations = entity.getAnnotations(ontology, annotationProperty); + for (OWLAnnotation annotation : annotations) { + if (annotation.getValue() instanceof OWLLiteral) { + OWLLiteral val = (OWLLiteral) annotation.getValue(); + if (val.hasLang(language)) { + label = val.getLiteral(); + } + } + } + + if(label != null){ + Document luceneDocument = new Document(); + luceneDocument.add(new Field("uri", entity.toStringID(), stringType)); + luceneDocument.add(new Field(searchField, label, textType)); + luceneDocuments.add(luceneDocument); + } + + } + writer.addDocuments(luceneDocuments); + + System.out.println("Done."); + writer.close(); + } + + + +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/PMILuceneBasedRelevance.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/PMILuceneBasedRelevance.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/PMILuceneBasedRelevance.java 2013-07-08 13:51:31 UTC (rev 4015) @@ -0,0 +1,48 @@ +/** + * Copyright (C) 2007-2011, Jens Lehmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + + +package org.dllearner.algorithms.isle; + +import org.semanticweb.owlapi.model.OWLOntology; + + +public class PMILuceneBasedRelevance extends LuceneBasedRelevance{ + + /** + * @param ontology + * @param searcher + * @param textRetriever + */ + public PMILuceneBasedRelevance(OWLOntology ontology, LuceneSearcher searcher, EntityTextRetriever textRetriever) { + super(ontology, searcher, textRetriever); + + } + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.LuceneBasedRelevance#computeScore(int, int, int, int) + */ + @Override + public double computeScore(int nrOfDocuments, int nrOfDocumentsA, int nrOfDocumentsB, int nrOfDocumentsAB) { + double dPClass = nrOfDocuments == 0 ? 0 : ((double) nrOfDocumentsA / (double) nrOfDocuments); + double dPClassEntity = nrOfDocumentsB == 0 ? 0 : (double) nrOfDocumentsAB / (double) nrOfDocumentsB; + double pmi = Math.log(dPClassEntity / dPClass); + return pmi; + } +} \ No newline at end of file Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/PMIRelevance.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/PMIRelevance.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/PMIRelevance.java 2013-07-08 13:51:31 UTC (rev 4015) @@ -0,0 +1,108 @@ +package org.dllearner.algorithms.isle; + + +import java.io.*; +import java.util.*; + +public class PMIRelevance { + + private LuceneSearcher m_searcher = null; + + private Set<String> m_classes; + private Set<String> m_individuals; + + + public static void main( String args[] ) throws Exception { + PMIRelevance relevance = new PMIRelevance( args[0], args[1] ); + relevance.printScores(); + } + + public void printScores() throws Exception { + for( String sInd: m_individuals ) + { + Map<String,Double> hmClass2Score = getClassRelevance( sInd ); + for( String sClass : hmClass2Score.keySet() ) + { + double dScore = hmClass2Score.get( sClass ); + if( dScore > 0 ){ + System.out.println( "PMI( "+ sInd +" , "+ sClass +" ) = "+ dScore ); + } + } + } + /* for( String sClass: m_classes ) + { + Map<String,Double> hmInd2Score = getIndividualRelevance( sClass ); + for( String sInd : hmInd2Score.keySet() ) + { + double dScore = hmInd2Score.get( sInd ); + if( dScore > 0 ){ + System.out.println( "P( "+ sClass +" | "+ sInd +" ) = "+ dScore ); + } + } + } */ + m_searcher.close(); + } + + public PMIRelevance( String sClasses, String sIndividuals ) throws Exception { + m_searcher = new LuceneSearcher(); + m_classes = read( sClasses ); + m_individuals = read( sIndividuals ); + } + + public Map<String,Double> getClassRelevance( String sIndividual ) throws Exception { + // computes relevance of classes for this individual + // conditional probability: P(I|C)=f(I,C)/f(C) + // PMI(I,C)=log( P(I|C) / P(I) ) + Map<String,Double> hmClass2Score = new HashMap<String,Double>(); + int iInd = m_searcher.count( sIndividual ); + int iAll = m_searcher.indexSize(); + double dPInd = (double) iInd / (double) iAll; + for( String sClass: m_classes ) + { + int iClass = m_searcher.count( sClass ); + int iIndClass = m_searcher.count( sIndividual +" AND "+ sClass ); + double dPIndClass = (double) iIndClass / (double)iClass; + double dPMI = Math.log( dPIndClass / dPInd ); + hmClass2Score.put( sClass, dPMI ); + } + return hmClass2Score; + } + + public Map<String,Double> getIndividualRelevance( String sClass ) throws Exception { + // computes relevance of individuals for this class + // conditional probability: P(C|I)=f(C,I)/f(I) + // PMI(C|I)=log( P(C|I) / P(C) ) + Map<String,Double> hmInd2Score = new HashMap<String,Double>(); + int iClass = m_searcher.count( sClass ); + int iAll = m_searcher.indexSize(); + double dPClass = (double) iClass / (double) iAll; + for( String sInd: m_individuals ) + { + int iInd = m_searcher.count( sInd ); + int iIndClass = m_searcher.count( sClass +" AND "+ sInd ); + double dPClassInd = (double) iIndClass / (double)iInd; + double dPMI = Math.log( dPClassInd / dPClass ); + hmInd2Score.put( sInd, dPMI ); + } + return hmInd2Score; + } + + private static Set<String> read( String sFile ) throws Exception { + File file = new File( sFile ); + Set<String> lines = new HashSet<String>(); + BufferedReader reader = null; + try { + reader = new BufferedReader( new FileReader( file ) ); + String sLine = null; + while( ( sLine = reader.readLine() ) != null ) { + lines.add( sLine.trim() ); + } + } + finally { + if( reader != null ) { + reader.close(); + } + } + return lines; + } +} \ No newline at end of file Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/PMIRelevances.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/PMIRelevances.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/PMIRelevances.java 2013-07-08 13:51:31 UTC (rev 4015) @@ -0,0 +1,165 @@ +/** + * Copyright (C) 2007-2011, Jens Lehmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + + +package org.dllearner.algorithms.isle; + +import java.util.HashMap; +import java.util.Map; +import java.util.Set; + +import org.semanticweb.owlapi.apibinding.OWLManager; +import org.semanticweb.owlapi.model.IRI; +import org.semanticweb.owlapi.model.OWLClass; +import org.semanticweb.owlapi.model.OWLEntity; +import org.semanticweb.owlapi.model.OWLNamedObject; +import org.semanticweb.owlapi.model.OWLOntology; +import org.semanticweb.owlapi.model.O... [truncated message content] |
From: <lor...@us...> - 2013-07-08 13:49:43
|
Revision: 4014 http://sourceforge.net/p/dl-learner/code/4014 Author: lorenz_b Date: 2013-07-08 13:49:40 +0000 (Mon, 08 Jul 2013) Log Message: ----------- Added example for ISLE. Added Paths: ----------- trunk/examples/isle/ trunk/examples/isle/catalog-v001.xml trunk/examples/isle/father_labeled.owl Added: trunk/examples/isle/catalog-v001.xml =================================================================== --- trunk/examples/isle/catalog-v001.xml (rev 0) +++ trunk/examples/isle/catalog-v001.xml 2013-07-08 13:49:40 UTC (rev 4014) @@ -0,0 +1,6 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<catalog prefer="public" xmlns="urn:oasis:names:tc:entity:xmlns:xml:catalog"> + <group id="Folder Repository, directory=, recursive=true, Auto-Update=true, version=2" prefer="public" xml:base=""> + <uri id="Automatically generated entry, Timestamp=1373287083350" name="http://example.com/father" uri="father_labeled.owl"/> + </group> +</catalog> Added: trunk/examples/isle/father_labeled.owl =================================================================== --- trunk/examples/isle/father_labeled.owl (rev 0) +++ trunk/examples/isle/father_labeled.owl 2013-07-08 13:49:40 UTC (rev 4014) @@ -0,0 +1,169 @@ +<?xml version="1.0"?> + + +<!DOCTYPE rdf:RDF [ + <!ENTITY father "http://example.com/father#" > + <!ENTITY owl "http://www.w3.org/2002/07/owl#" > + <!ENTITY xsd "http://www.w3.org/2001/XMLSchema#" > + <!ENTITY owl2xml "http://www.w3.org/2006/12/owl2-xml#" > + <!ENTITY rdfs "http://www.w3.org/2000/01/rdf-schema#" > + <!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#" > +]> + + +<rdf:RDF xmlns="http://example.com/father#" + xml:base="http://example.com/father" + xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" + xmlns:owl2xml="http://www.w3.org/2006/12/owl2-xml#" + xmlns:owl="http://www.w3.org/2002/07/owl#" + xmlns:xsd="http://www.w3.org/2001/XMLSchema#" + xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" + xmlns:father="http://example.com/father#"> + <owl:Ontology rdf:about="http://example.com/father"/> + + + + <!-- + /////////////////////////////////////////////////////////////////////////////////////// + // + // Object Properties + // + /////////////////////////////////////////////////////////////////////////////////////// + --> + + + + + <!-- http://example.com/father#hasChild --> + + <owl:ObjectProperty rdf:about="&father;hasChild"> + <rdfs:label xml:lang="en">has child</rdfs:label> + <rdfs:domain rdf:resource="&father;person"/> + <rdfs:range rdf:resource="&father;person"/> + </owl:ObjectProperty> + + + + <!-- + /////////////////////////////////////////////////////////////////////////////////////// + // + // Classes + // + /////////////////////////////////////////////////////////////////////////////////////// + --> + + + + + <!-- http://example.com/father#father --> + + <owl:Class rdf:about="&father;father"> + <rdfs:label xml:lang="en">person which has at least 1 child</rdfs:label> + <rdfs:subClassOf rdf:resource="&father;male"/> + </owl:Class> + + + + <!-- http://example.com/father#female --> + + <owl:Class rdf:about="&father;female"> + <rdfs:label xml:lang="en">female</rdfs:label> + <rdfs:subClassOf rdf:resource="&father;person"/> + <owl:disjointWith rdf:resource="&father;male"/> + </owl:Class> + + + + <!-- http://example.com/father#male --> + + <owl:Class rdf:about="&father;male"> + <rdfs:label xml:lang="en">male</rdfs:label> + <rdfs:subClassOf rdf:resource="&father;person"/> + </owl:Class> + + + + <!-- http://example.com/father#person --> + + <owl:Class rdf:about="&father;person"> + <rdfs:label xml:lang="en">Person</rdfs:label> + <rdfs:subClassOf rdf:resource="&owl;Thing"/> + </owl:Class> + + + + <!-- http://www.w3.org/2002/07/owl#Thing --> + + <owl:Class rdf:about="&owl;Thing"/> + + + + <!-- + /////////////////////////////////////////////////////////////////////////////////////// + // + // Individuals + // + /////////////////////////////////////////////////////////////////////////////////////// + --> + + + + + <!-- http://example.com/father#anna --> + + <owl:NamedIndividual rdf:about="&father;anna"> + <rdf:type rdf:resource="&father;female"/> + <hasChild rdf:resource="&father;heinz"/> + </owl:NamedIndividual> + + + + <!-- http://example.com/father#heinz --> + + <owl:NamedIndividual rdf:about="&father;heinz"> + <rdf:type rdf:resource="&father;male"/> + </owl:NamedIndividual> + + + + <!-- http://example.com/father#markus --> + + <owl:NamedIndividual rdf:about="&father;markus"> + <rdf:type rdf:resource="&father;father"/> + <rdf:type rdf:resource="&father;male"/> + <hasChild rdf:resource="&father;anna"/> + </owl:NamedIndividual> + + + + <!-- http://example.com/father#martin --> + + <owl:NamedIndividual rdf:about="&father;martin"> + <rdf:type rdf:resource="&father;father"/> + <rdf:type rdf:resource="&father;male"/> + <hasChild rdf:resource="&father;heinz"/> + </owl:NamedIndividual> + + + + <!-- http://example.com/father#michelle --> + + <owl:NamedIndividual rdf:about="&father;michelle"> + <rdf:type rdf:resource="&father;female"/> + </owl:NamedIndividual> + + + + <!-- http://example.com/father#stefan --> + + <owl:NamedIndividual rdf:about="&father;stefan"> + <rdf:type rdf:resource="&father;father"/> + <rdf:type rdf:resource="&father;male"/> + <hasChild rdf:resource="&father;markus"/> + </owl:NamedIndividual> +</rdf:RDF> + + + +<!-- Generated by the OWL API (version 3.4.2) http://owlapi.sourceforge.net --> + This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2013-07-01 19:05:53
|
Revision: 4013 http://sourceforge.net/p/dl-learner/code/4013 Author: lorenz_b Date: 2013-07-01 19:05:50 +0000 (Mon, 01 Jul 2013) Log Message: ----------- Minor changes in SPARQL reasonr. Updated LUCENE libs for ISLE: Modified Paths: -------------- trunk/components-core/pom.xml trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityTextRetriever.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneIndexer.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneSearcher.java trunk/components-core/src/main/java/org/dllearner/algorithms/pattern/OWLAxiomPatternFinder.java trunk/components-core/src/main/java/org/dllearner/algorithms/properties/DisjointObjectPropertyAxiomLearner.java trunk/components-core/src/main/java/org/dllearner/kb/repository/oxford/OxfordRepository.java trunk/components-core/src/main/java/org/dllearner/kb/sparql/QueryEngineHTTP.java trunk/components-core/src/main/java/org/dllearner/kb/sparql/simple/SparqlSimpleExtractor.java trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java Modified: trunk/components-core/pom.xml =================================================================== --- trunk/components-core/pom.xml 2013-07-01 18:51:19 UTC (rev 4012) +++ trunk/components-core/pom.xml 2013-07-01 19:05:50 UTC (rev 4013) @@ -215,6 +215,14 @@ <artifactId>lucene-core</artifactId> </dependency> <dependency> + <groupId>org.apache.lucene</groupId> + <artifactId>lucene-analyzers-common</artifactId> + </dependency> + <dependency> + <groupId>org.apache.lucene</groupId> + <artifactId>lucene-queryparser</artifactId> + </dependency> + <dependency> <groupId>commons-lang</groupId> <artifactId>commons-lang</artifactId> </dependency> @@ -272,7 +280,7 @@ <dependency> <groupId>org.aksw.jena-sparql-api</groupId> <artifactId>jena-sparql-api-core</artifactId> - <version>2.10.0-4-SNAPSHOT</version> + <version>2.10.0-5-SNAPSHOT</version> </dependency> </dependencies> <dependencyManagement> Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityTextRetriever.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityTextRetriever.java 2013-07-01 18:51:19 UTC (rev 4012) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityTextRetriever.java 2013-07-01 19:05:50 UTC (rev 4013) @@ -43,6 +43,6 @@ * @param entity The entity to handle. * @return A weighted set of strings. For a value x, we need to have 0 <= x <= 1. */ - public Map<String, Integer> getRelevantText(Entity entity); + public Map<String, Double> getRelevantText(Entity entity); } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneIndexer.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneIndexer.java 2013-07-01 18:51:19 UTC (rev 4012) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneIndexer.java 2013-07-01 19:05:50 UTC (rev 4013) @@ -25,8 +25,10 @@ import java.io.IOException; import java.util.Date; +import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; @@ -51,12 +53,12 @@ Date start = new Date(); try { - IndexWriter writer = new IndexWriter( FSDirectory.open( INDEX ), - new StandardAnalyzer( Version.LUCENE_CURRENT ), true, IndexWriter.MaxFieldLength.LIMITED ); + Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43); + IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_43, analyzer); + IndexWriter writer = new IndexWriter( FSDirectory.open( INDEX ), indexWriterConfig); System.out.println( "Creating index ..." ); index( writer, docDir ); System.out.println( "Optimizing index ..." ); - writer.optimize(); writer.close(); Date end = new Date(); System.out.println( end.getTime() - start.getTime() + " total milliseconds" ); Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneSearcher.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneSearcher.java 2013-07-01 18:51:19 UTC (rev 4012) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneSearcher.java 2013-07-01 19:05:50 UTC (rev 4013) @@ -32,13 +32,14 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.Collector; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.Searcher; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; @@ -48,7 +49,7 @@ private String FIELD = "contents"; private IndexReader m_reader = null; - private Searcher m_searcher = null; + private IndexSearcher m_searcher = null; private Analyzer m_analyzer = null; private QueryParser m_parser = null; @@ -70,10 +71,10 @@ @SuppressWarnings("deprecation") public LuceneSearcher() throws Exception { - m_reader = IndexReader.open( FSDirectory.open( new File( INDEX ) ), true ); + m_reader = DirectoryReader.open( FSDirectory.open( new File( INDEX ) )); m_searcher = new IndexSearcher( m_reader ); - m_analyzer = new StandardAnalyzer( Version.LUCENE_CURRENT ); - m_parser = new QueryParser( Version.LUCENE_CURRENT, FIELD, m_analyzer ); + m_analyzer = new StandardAnalyzer( Version.LUCENE_43); + m_parser = new QueryParser( Version.LUCENE_43, FIELD, m_analyzer ); } public void close() throws Exception { @@ -139,13 +140,13 @@ return true; } @Override - public void setNextReader( IndexReader reader, int docBase ) throws IOException { - this.docBase = docBase; - } - @Override public void setScorer(Scorer scorer) throws IOException { this.scorer = scorer; } + @Override + public void setNextReader(AtomicReaderContext context) throws IOException { + this.docBase = context.docBase; + } }; m_searcher.search( query, collector ); } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/pattern/OWLAxiomPatternFinder.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/pattern/OWLAxiomPatternFinder.java 2013-07-01 18:51:19 UTC (rev 4012) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/pattern/OWLAxiomPatternFinder.java 2013-07-01 19:05:50 UTC (rev 4013) @@ -364,7 +364,7 @@ System.out.print(i++ + ": "); URI uri = entry.getPhysicalURI(); // if(uri.toString().startsWith("http://rest.bioontology.org/bioportal/ontologies/download/42764")){ - if (!ontologyProcessed(uri)) { + if (!ontologyProcessed(uri)) {//if(entry.getOntologyShortName().equals("00698"))continue; System.out.print("Loading \"" + entry.getOntologyShortName() + "\" from "+ uri); try { manager = OWLManager.createOWLOntologyManager(); Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/properties/DisjointObjectPropertyAxiomLearner.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/properties/DisjointObjectPropertyAxiomLearner.java 2013-07-01 18:51:19 UTC (rev 4012) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/properties/DisjointObjectPropertyAxiomLearner.java 2013-07-01 19:05:50 UTC (rev 4013) @@ -116,7 +116,7 @@ //compute the overlap if exist Map<ObjectProperty, Integer> property2Overlap = new HashMap<ObjectProperty, Integer>(); String query = String.format("SELECT ?p (COUNT(*) AS ?cnt) WHERE {?s <%s> ?o. ?s ?p ?o.} GROUP BY ?p", propertyToDescribe.getName()); - System.out.println(query);ResultSet rs = executeSelectQuery(query); + ResultSet rs = executeSelectQuery(query); QuerySolution qs; while(rs.hasNext()){ qs = rs.next(); Modified: trunk/components-core/src/main/java/org/dllearner/kb/repository/oxford/OxfordRepository.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/kb/repository/oxford/OxfordRepository.java 2013-07-01 18:51:19 UTC (rev 4012) +++ trunk/components-core/src/main/java/org/dllearner/kb/repository/oxford/OxfordRepository.java 2013-07-01 19:05:50 UTC (rev 4013) @@ -90,7 +90,7 @@ private URI physicalURI; public RepositoryEntry(URI ontologyIRI) { - this.ontologyURI = ontologyIRI;System.out.println(ontologyIRI); + this.ontologyURI = ontologyIRI; OntologyIRIShortFormProvider sfp = new OntologyIRIShortFormProvider(); shortName = sfp.getShortForm(IRI.create(ontologyIRI)); physicalURI = ontologyIRI; Modified: trunk/components-core/src/main/java/org/dllearner/kb/sparql/QueryEngineHTTP.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/kb/sparql/QueryEngineHTTP.java 2013-07-01 18:51:19 UTC (rev 4012) +++ trunk/components-core/src/main/java/org/dllearner/kb/sparql/QueryEngineHTTP.java 2013-07-01 19:05:50 UTC (rev 4013) @@ -268,7 +268,7 @@ private Model execModel(Model model) { HttpQuery httpQuery = makeHttpQuery() ; - httpQuery.setAccept(WebContent.contentTypeNTriplesAlt) ; + httpQuery.setAccept(WebContent.contentTypeTurtleAlt1) ; InputStream in = httpQuery.exec() ; //Don't assume the endpoint actually gives back the content type we asked for @@ -284,7 +284,7 @@ //Try to select language appropriately here based on the model content type Lang lang = WebContent.contentTypeToLang(actualContentType); if (! RDFLanguages.isTriples(lang)) throw new QueryException("Endpoint returned Content Type: " + actualContentType + " which is not a valid RDF Graph syntax"); - model.read(in, null, "N-TRIPLES") ; + model.read(in, null, "TURTLE") ; return model ; } Modified: trunk/components-core/src/main/java/org/dllearner/kb/sparql/simple/SparqlSimpleExtractor.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/kb/sparql/simple/SparqlSimpleExtractor.java 2013-07-01 18:51:19 UTC (rev 4012) +++ trunk/components-core/src/main/java/org/dllearner/kb/sparql/simple/SparqlSimpleExtractor.java 2013-07-01 19:05:50 UTC (rev 4013) @@ -11,7 +11,6 @@ import org.dllearner.core.AbstractKnowledgeSource; import org.dllearner.core.ComponentAnn; import org.dllearner.core.ComponentInitException; -import org.dllearner.core.KnowledgeSource; import org.dllearner.core.OntologyFormat; import org.dllearner.core.OntologyFormatUnsupportedException; import org.dllearner.core.config.ConfigOption; @@ -36,7 +35,6 @@ import com.jamonapi.MonitorFactory; @ComponentAnn(name = "efficient SPARQL fragment extractor", shortName = "sparqls", version = 0.1) - public class SparqlSimpleExtractor extends AbstractKnowledgeSource implements OWLOntologyKnowledgeSource{ @@ -334,5 +332,10 @@ JenaToOwlapiConverter converter = new JenaToOwlapiConverter(); return converter.convert(this.model,manager); } + + public static String getName(){ + return "efficient SPARQL fragment extractor"; + } + } Modified: trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java 2013-07-01 18:51:19 UTC (rev 4012) +++ trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java 2013-07-01 19:05:50 UTC (rev 4013) @@ -22,10 +22,12 @@ import java.net.URL; import java.sql.SQLException; import java.util.ArrayList; +import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Set; import java.util.SortedSet; import java.util.TreeMap; @@ -73,6 +75,8 @@ import org.slf4j.LoggerFactory; import com.clarkparsia.owlapiv3.XSD; +import com.google.common.collect.HashMultimap; +import com.google.common.collect.Multimap; import com.hp.hpl.jena.ontology.OntClass; import com.hp.hpl.jena.ontology.OntModel; import com.hp.hpl.jena.query.QueryExecution; @@ -80,6 +84,7 @@ import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; +import com.hp.hpl.jena.rdf.model.RDFNode; import com.hp.hpl.jena.rdf.model.Resource; import com.hp.hpl.jena.vocabulary.OWL; import com.hp.hpl.jena.vocabulary.OWL2; @@ -134,7 +139,7 @@ e.printStackTrace(); } } - qef = new QueryExecutionFactoryPaginated(qef, 10000); +// qef = new QueryExecutionFactoryPaginated(qef, 10000); } else { qef = new QueryExecutionFactoryModel(((LocalModelBasedSparqlEndpointKS)ks).getModel()); @@ -518,6 +523,100 @@ } return types; } + + public Set<Property> getProperties(boolean inferType, String namespace) { + Set<Property> properties = new HashSet<Property>(); + String query = "SELECT DISTINCT ?p ?type WHERE {?s ?p ?o." + + (namespace != null ? ("FILTER(REGEX(?p,'^" + namespace + "'))") : "") + + "OPTIONAL{?p a ?type.}}"; + ResultSet rs = executeSelectQuery(query); + Multimap<String, String> uri2Types = HashMultimap.create(); + QuerySolution qs; + while(rs.hasNext()){ + qs = rs.next(); + String uri = qs.getResource("p").getURI(); + String type = ""; + if(qs.getResource("type") != null){ + type = qs.getResource("type").getURI(); + } + uri2Types.put(uri, type); + } + for (Entry<String, Collection<String>> entry : uri2Types.asMap().entrySet()) { + String uri = entry.getKey(); + Collection<String> types = entry.getValue(); + if(types.contains(OWL.ObjectProperty.getURI()) && !types.contains(OWL.DatatypeProperty.getURI())){ + properties.add(new ObjectProperty(uri)); + } else if(!types.contains(OWL.ObjectProperty.getURI()) && types.contains(OWL.DatatypeProperty.getURI())){ + properties.add(new DatatypeProperty(uri)); + } else { + //infer the type by values + query = "SELECT ?o WHERE {?s <" + uri + "> ?o. } LIMIT 100"; + rs = executeSelectQuery(query); + boolean op = true; + boolean dp = true; + RDFNode node; + while(rs.hasNext()){ + node = rs.next().get("o"); + op = node.isResource(); + dp = node.isLiteral(); + } + if(op && !dp){ + properties.add(new ObjectProperty(uri)); + } else if(!op && dp){ + properties.add(new DatatypeProperty(uri)); + } else { + //not possible to decide + } + } + } + return properties; + } + + public Set<Property> getProperties(boolean inferType) { + Set<Property> properties = new TreeSet<Property>(); + String query = "SELECT DISTINCT ?p ?type WHERE {?s ?p ?o. OPTIONAL{?p a ?type.}}"; + ResultSet rs = executeSelectQuery(query); + Multimap<String, String> uri2Types = HashMultimap.create(); + QuerySolution qs; + while(rs.hasNext()){ + qs = rs.next(); + String uri = qs.getResource("p").getURI(); + String type = ""; + if(qs.getResource("type") != null){ + type = qs.getResource("type").getURI(); + } + uri2Types.put(uri, type); + } + for (Entry<String, Collection<String>> entry : uri2Types.asMap().entrySet()) { + String uri = entry.getKey(); + Collection<String> types = entry.getValue(); + if(types.contains(OWL.ObjectProperty.getURI()) && !types.contains(OWL.DatatypeProperty.getURI())){ + properties.add(new ObjectProperty(uri)); + } else if(!types.contains(OWL.ObjectProperty.getURI()) && types.contains(OWL.DatatypeProperty.getURI())){ + properties.add(new DatatypeProperty(uri)); + } else { + //infer the type by values + query = "SELECT ?o WHERE {?s <" + uri + "> ?o. } LIMIT 100"; + rs = executeSelectQuery(query); + boolean op = true; + boolean dp = true; + RDFNode node; + while(rs.hasNext()){ + node = rs.next().get("o"); + op = node.isResource(); + dp = node.isLiteral(); + } + if(op && !dp){ + properties.add(new ObjectProperty(uri)); + } else if(!op && dp){ + properties.add(new DatatypeProperty(uri)); + } else { + //not possible to decide + } + } + } + return properties; + } public Set<NamedClass> getOWLClasses() { Set<NamedClass> types = new HashSet<NamedClass>(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2013-07-01 18:51:22
|
Revision: 4012 http://sourceforge.net/p/dl-learner/code/4012 Author: lorenz_b Date: 2013-07-01 18:51:19 +0000 (Mon, 01 Jul 2013) Log Message: ----------- Updated LUCENE version Modified Paths: -------------- trunk/pom.xml Modified: trunk/pom.xml =================================================================== --- trunk/pom.xml 2013-06-30 20:17:50 UTC (rev 4011) +++ trunk/pom.xml 2013-07-01 18:51:19 UTC (rev 4012) @@ -236,8 +236,18 @@ <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-core</artifactId> - <version>3.5.0</version> + <version>4.3.1</version> </dependency> + <dependency> + <groupId>org.apache.lucene</groupId> + <artifactId>lucene-analyzers-common</artifactId> + <version>4.3.1</version> + </dependency> + <dependency> + <groupId>org.apache.lucene</groupId> + <artifactId>lucene-queryparser</artifactId> + <version>4.3.1</version> + </dependency> <dependency> <groupId>org.ini4j</groupId> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2013-06-30 20:17:52
|
Revision: 4011 http://sourceforge.net/p/dl-learner/code/4011 Author: lorenz_b Date: 2013-06-30 20:17:50 +0000 (Sun, 30 Jun 2013) Log Message: ----------- Workaround for wrong query results. Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/core/AbstractAxiomLearningAlgorithm.java Modified: trunk/components-core/src/main/java/org/dllearner/core/AbstractAxiomLearningAlgorithm.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/core/AbstractAxiomLearningAlgorithm.java 2013-06-28 11:54:18 UTC (rev 4010) +++ trunk/components-core/src/main/java/org/dllearner/core/AbstractAxiomLearningAlgorithm.java 2013-06-30 20:17:50 UTC (rev 4011) @@ -416,6 +416,10 @@ } protected Score computeScore(int total, int success){ + if(success > total){ + logger.warn("success value > total value"); + } + success = Math.min(total, success);//TODO this is a workaround as Virtuoso sometimes returns wrong counts double[] confidenceInterval = Heuristics.getConfidenceInterval95Wald(total, success); double accuracy = (confidenceInterval[0] + confidenceInterval[1]) / 2; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |