From: <lor...@us...> - 2013-09-04 13:18:24
|
Revision: 4056 http://sourceforge.net/p/dl-learner/code/4056 Author: lorenz_b Date: 2013-09-04 13:18:17 +0000 (Wed, 04 Sep 2013) Log Message: ----------- Added wordnet+stanford. Modified Paths: -------------- trunk/components-core/pom.xml trunk/components-core/src/main/java/org/dllearner/algorithms/isle/StopWordFilter.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Annotation.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotation.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotator.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java trunk/components-core/src/main/resources/wordnet_properties.xml trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java Added Paths: ----------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/SimpleWordSenseDisambiguation.java Modified: trunk/components-core/pom.xml =================================================================== --- trunk/components-core/pom.xml 2013-09-04 10:04:14 UTC (rev 4055) +++ trunk/components-core/pom.xml 2013-09-04 13:18:17 UTC (rev 4056) @@ -282,6 +282,32 @@ <artifactId>jena-sparql-api-core</artifactId> <version>2.10.0-5-SNAPSHOT</version> </dependency> + <dependency> + <groupId>org.apache.commons</groupId> + <artifactId>commons-lang3</artifactId> + <version>3.1</version> + </dependency> + <dependency> + <groupId>edu.stanford.nlp</groupId> + <artifactId>stanford-corenlp</artifactId> + <version>1.3.4</version> + </dependency> + <dependency> + <groupId>edu.stanford.nlp</groupId> + <artifactId>stanford-corenlp</artifactId> + <version>1.3.4</version> + </dependency> + <dependency> + <groupId>edu.stanford.nlp</groupId> + <artifactId>stanford-corenlp</artifactId> + <version>1.3.4</version> + <classifier>models</classifier> + </dependency> + <dependency> + <groupId>net.didion.jwnl</groupId> + <artifactId>jwnl</artifactId> + <version>1.4.1.RC2</version> + </dependency> </dependencies> <dependencyManagement> <dependencies> Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/SimpleWordSenseDisambiguation.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/SimpleWordSenseDisambiguation.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/SimpleWordSenseDisambiguation.java 2013-09-04 13:18:17 UTC (rev 4056) @@ -0,0 +1,79 @@ +/** + * + */ +package org.dllearner.algorithms.isle; + +import java.util.HashSet; +import java.util.Set; + +import org.dllearner.algorithms.isle.index.Annotation; +import org.dllearner.algorithms.isle.index.SemanticAnnotation; +import org.dllearner.core.owl.Entity; +import org.dllearner.utilities.owl.OWLAPIConverter; +import org.semanticweb.owlapi.model.IRI; +import org.semanticweb.owlapi.model.OWLAnnotationAssertionAxiom; +import org.semanticweb.owlapi.model.OWLAnnotationProperty; +import org.semanticweb.owlapi.model.OWLDataFactory; +import org.semanticweb.owlapi.model.OWLEntity; +import org.semanticweb.owlapi.model.OWLLiteral; +import org.semanticweb.owlapi.model.OWLOntology; +import org.semanticweb.owlapi.util.IRIShortFormProvider; +import org.semanticweb.owlapi.util.SimpleIRIShortFormProvider; + +import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl; + +/** + * @author Lorenz Buehmann + * + */ +public class SimpleWordSenseDisambiguation extends WordSenseDisambiguation{ + + private IRIShortFormProvider sfp = new SimpleIRIShortFormProvider(); + private OWLDataFactory df = new OWLDataFactoryImpl(); + private OWLAnnotationProperty annotationProperty = df.getRDFSLabel(); + + /** + * @param ontology + */ + public SimpleWordSenseDisambiguation(OWLOntology ontology) { + super(ontology); + } + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.WordSenseDisambiguation#disambiguate(org.dllearner.algorithms.isle.index.Annotation, java.util.Set) + */ + @Override + public SemanticAnnotation disambiguate(Annotation annotation, Set<Entity> candidateEntities) { + String token = annotation.getToken(); + //check if annotation token matches label of entity or the part behind #(resp. /) + for (Entity entity : candidateEntities) { + Set<String> labels = getLabels(entity); + for (String label : labels) { + if(label.equals(token)){ + return new SemanticAnnotation(annotation, entity); + } + } + String shortForm = sfp.getShortForm(IRI.create(entity.getURI())); + if(annotation.equals(shortForm)){ + return new SemanticAnnotation(annotation, entity); + } + } + return null; + } + + private Set<String> getLabels(Entity entity){ + Set<String> labels = new HashSet<String>(); + OWLEntity owlEntity = OWLAPIConverter.getOWLAPIEntity(entity); + Set<OWLAnnotationAssertionAxiom> axioms = ontology.getAnnotationAssertionAxioms(owlEntity.getIRI()); + for (OWLAnnotationAssertionAxiom annotation : axioms) { + if(annotation.getProperty().equals(annotationProperty)){ + if (annotation.getValue() instanceof OWLLiteral) { + OWLLiteral val = (OWLLiteral) annotation.getValue(); + labels.add(val.getLiteral()); + } + } + } + return labels; + } + +} Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/StopWordFilter.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/StopWordFilter.java 2013-09-04 10:04:14 UTC (rev 4055) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/StopWordFilter.java 2013-09-04 13:18:17 UTC (rev 4056) @@ -45,7 +45,7 @@ public void removeStopWordAnnotations(Set<Annotation> annotations) { for (Iterator<Annotation> iter = annotations.iterator(); iter.hasNext();) { Annotation annotation = iter.next(); - String content = annotation.getGetReferencedDocument().getContent(); + String content = annotation.getReferencedDocument().getContent(); String token = content.substring(annotation.getOffset(), annotation.getOffset()+annotation.getLength()); if(stopWords.contains(token)){ iter.remove(); Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Annotation.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Annotation.java 2013-09-04 10:04:14 UTC (rev 4055) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Annotation.java 2013-09-04 13:18:17 UTC (rev 4056) @@ -10,18 +10,18 @@ */ public class Annotation { - private Document getReferencedDocument; + private Document referencedDocument; private int offset; private int length; - public Annotation(Document getReferencedDocument, int offset, int length) { - this.getReferencedDocument = getReferencedDocument; + public Annotation(Document referencedDocument, int offset, int length) { + this.referencedDocument = referencedDocument; this.offset = offset; this.length = length; } - public Document getGetReferencedDocument() { - return getReferencedDocument; + public Document getReferencedDocument() { + return referencedDocument; } public int getOffset() { @@ -31,12 +31,16 @@ public int getLength() { return length; } + + public String getToken(){ + return referencedDocument.getContent().substring(offset, offset + length); + } @Override public int hashCode() { final int prime = 31; int result = 1; - result = prime * result + ((getReferencedDocument == null) ? 0 : getReferencedDocument.hashCode()); + result = prime * result + ((referencedDocument == null) ? 0 : referencedDocument.hashCode()); result = prime * result + length; result = prime * result + offset; return result; @@ -51,10 +55,10 @@ if (getClass() != obj.getClass()) return false; Annotation other = (Annotation) obj; - if (getReferencedDocument == null) { - if (other.getReferencedDocument != null) + if (referencedDocument == null) { + if (other.referencedDocument != null) return false; - } else if (!getReferencedDocument.equals(other.getReferencedDocument)) + } else if (!referencedDocument.equals(other.referencedDocument)) return false; if (length != other.length) return false; @@ -68,6 +72,6 @@ */ @Override public String toString() { - return "\"" + getReferencedDocument.getContent().substring(offset, offset+length) + "\" at position " + offset; + return "\"" + referencedDocument.getContent().substring(offset, offset+length) + "\" at position " + offset; } } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotation.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotation.java 2013-09-04 10:04:14 UTC (rev 4055) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotation.java 2013-09-04 13:18:17 UTC (rev 4056) @@ -14,7 +14,7 @@ private Entity entity; public SemanticAnnotation(Annotation annotation, Entity entity) { - super(annotation.getGetReferencedDocument(), annotation.getOffset(), annotation.getLength()); + super(annotation.getReferencedDocument(), annotation.getOffset(), annotation.getLength()); this.entity = entity; } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotator.java 2013-09-04 10:04:14 UTC (rev 4055) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotator.java 2013-09-04 13:18:17 UTC (rev 4056) @@ -43,8 +43,9 @@ for (Annotation annotation : annotations) { Set<Entity> candidateEntities = entityCandidateGenerator.getCandidates(annotation); SemanticAnnotation semanticAnnotation = wordSenseDisambiguation.disambiguate(annotation, candidateEntities); - semanticAnnotations.add(semanticAnnotation); - + if(semanticAnnotation != null){ + semanticAnnotations.add(semanticAnnotation); + } } AnnotatedDocument annotatedDocument = new AnnotatedTextDocument(document, semanticAnnotations); return annotatedDocument; Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java 2013-09-04 10:04:14 UTC (rev 4055) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java 2013-09-04 13:18:17 UTC (rev 4056) @@ -4,6 +4,7 @@ package org.dllearner.algorithms.isle.index.semantic.simple; import org.dllearner.algorithms.isle.RandomWordSenseDisambiguation; +import org.dllearner.algorithms.isle.SimpleWordSenseDisambiguation; import org.dllearner.algorithms.isle.index.SimpleEntityCandidateGenerator; import org.dllearner.algorithms.isle.index.SimpleLinguisticAnnotator; import org.dllearner.algorithms.isle.index.semantic.SemanticIndex; @@ -28,7 +29,7 @@ public SimpleSemanticIndex(OWLOntology ontology, SyntacticIndex syntacticIndex) { super(ontology, syntacticIndex, - new RandomWordSenseDisambiguation(ontology), + new SimpleWordSenseDisambiguation(ontology), new SimpleEntityCandidateGenerator(ontology), new SimpleLinguisticAnnotator()); } Modified: trunk/components-core/src/main/resources/wordnet_properties.xml =================================================================== --- trunk/components-core/src/main/resources/wordnet_properties.xml 2013-09-04 10:04:14 UTC (rev 4055) +++ trunk/components-core/src/main/resources/wordnet_properties.xml 2013-09-04 13:18:17 UTC (rev 4056) @@ -38,7 +38,7 @@ <param name="dictionary_element_factory" value="net.didion.jwnl.princeton.data.PrincetonWN17FileDictionaryElementFactory"/> <param name="file_manager" value="net.didion.jwnl.dictionary.file_manager.FileManagerImpl"> <param name="file_type" value="net.didion.jwnl.princeton.file.PrincetonRandomAccessDictionaryFile"/> - <param name="dictionary_path" value="/opt/wordnet/dict"/> + <param name="dictionary_path" value="/opt/wordnet"/> </param> </dictionary> <resource class="PrincetonResource"/> Modified: trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java =================================================================== --- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java 2013-09-04 10:04:14 UTC (rev 4055) +++ trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java 2013-09-04 13:18:17 UTC (rev 4056) @@ -118,8 +118,14 @@ public void testSemanticIndexAnnotationProperty(){ semanticIndex = new SimpleSemanticIndex(ontology, syntacticIndex); semanticIndex.buildIndex(df.getRDFSLabel(), null); - Set<AnnotatedDocument> documents = semanticIndex.getDocuments(new NamedClass("http://example.com/father#father")); - System.out.println(documents); + + NamedClass nc = new NamedClass("http://example.com/father#father"); + Set<AnnotatedDocument> documents = semanticIndex.getDocuments(nc); + System.out.println("Documents for " + nc + ":\n" + documents); + + nc = new NamedClass("http://example.com/father#person"); + documents = semanticIndex.getDocuments(nc); + System.out.println("Documents for " + nc + ":\n" + documents); } @Test This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |