From: <dfl...@us...> - 2013-11-21 13:16:16
|
Revision: 4163 http://sourceforge.net/p/dl-learner/code/4163 Author: dfleischhacker Date: 2013-11-21 13:16:13 +0000 (Thu, 21 Nov 2013) Log Message: ----------- Annotation refactoring Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Annotation.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/SentenceBasedContextExtractor.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/SimpleWordSenseDisambiguation.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Annotation.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Annotation.java 2013-11-21 13:00:33 UTC (rev 4162) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Annotation.java 2013-11-21 13:16:13 UTC (rev 4163) @@ -4,6 +4,9 @@ package org.dllearner.algorithms.isle.index; +import java.util.ArrayList; +import java.util.List; + /** * A (non-semantic) annotation which represents an entity in a document by its offset and length. * @author Lorenz Buehmann @@ -12,8 +15,7 @@ public class Annotation { private Document referencedDocument; - private int offset; - private int length; + private ArrayList<Token> tokens; private String matchedString; public String getMatchedString() { @@ -24,64 +26,64 @@ this.matchedString = matchedString; } - public Annotation(Document referencedDocument, int offset, int length) { + public Annotation(Document referencedDocument, List<Token> tokens) { this.referencedDocument = referencedDocument; - this.offset = offset; - this.length = length; - } + this.tokens = new ArrayList<Token>(tokens); + } public Document getReferencedDocument() { return referencedDocument; } - public int getOffset() { - return offset; - } + public String getString(){ + StringBuilder sb = new StringBuilder(); + for (Token t : tokens) { + if (sb.length() > 0) { + sb.append(" "); + } + sb.append(t.getStemmedForm()); + } + return sb.toString(); + } - public int getLength() { - return length; - } - - public String getToken(){ - return referencedDocument.getContent().substring(offset, offset + length); - } + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } - @Override - public int hashCode() { - final int prime = 31; - int result = 1; - result = prime * result + ((referencedDocument == null) ? 0 : referencedDocument.hashCode()); - result = prime * result + length; - result = prime * result + offset; - return result; - } + Annotation that = (Annotation) o; + if (matchedString != null ? !matchedString.equals(that.matchedString) : that.matchedString != null) { + return false; + } + if (referencedDocument != null ? !referencedDocument.equals(that.referencedDocument) : + that.referencedDocument != null) { + return false; + } + if (tokens != null ? !tokens.equals(that.tokens) : that.tokens != null) { + return false; + } + + return true; + } + + @Override + public int hashCode() { + int result = referencedDocument != null ? referencedDocument.hashCode() : 0; + result = 31 * result + (tokens != null ? tokens.hashCode() : 0); + result = 31 * result + (matchedString != null ? matchedString.hashCode() : 0); + return result; + } + + /* (non-Javadoc) + * @see java.lang.Object#toString() + */ @Override - public boolean equals(Object obj) { - if (this == obj) - return true; - if (obj == null) - return false; - if (getClass() != obj.getClass()) - return false; - Annotation other = (Annotation) obj; - if (referencedDocument == null) { - if (other.referencedDocument != null) - return false; - } else if (!referencedDocument.equals(other.referencedDocument)) - return false; - if (length != other.length) - return false; - if (offset != other.offset) - return false; - return true; - } - - /* (non-Javadoc) - * @see java.lang.Object#toString() - */ - @Override public String toString() { - return "\"" + referencedDocument.getContent().substring(offset, offset+length) + "\" at position " + offset; - } + return getString(); + } } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java 2013-11-21 13:00:33 UTC (rev 4162) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java 2013-11-21 13:16:13 UTC (rev 4163) @@ -54,7 +54,7 @@ Annotation annotation_i = sortedAnnotations.get(i); int begin_i = annotation_i.getOffset(); int end_i = begin_i + annotation_i.getLength()-1; - String token_i = annotation_i.getToken(); + String token_i = annotation_i.getString(); Set<Entity> candidates_i = getCandidates(annotation_i); Set<Entity> newCandidates_i = new HashSet<Entity>(); @@ -68,7 +68,7 @@ for (int j=windowStart; j<sortedAnnotations.size() && j<windowEnd; j++) { if (j!=i) { Annotation annotation_j = sortedAnnotations.get(j); - String token_j = annotation_j.getToken(); + String token_j = annotation_j.getString(); Set<Entity> candidates_j = getCandidates(annotation_j); Set<Entity> intersection = Sets.intersection(candidates_i, candidates_j); Set<Entity> newCandidates_ij = new HashSet<Entity>(); @@ -83,7 +83,7 @@ if (!newCandidates_ij.isEmpty()) { Annotation mergedAnnotation = mergeAnnotations(annotation_i,annotation_j); // If there's no punctuation in the merged annotation - if (!Pattern.matches("\\p{Punct}", mergedAnnotation.getToken())) { + if (!Pattern.matches("\\p{Punct}", mergedAnnotation.getString())) { candidatesMap.put(mergedAnnotation, newCandidates_ij); candidatesMap.remove(annotation_i); candidatesMap.remove(annotation_j); Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java 2013-11-21 13:00:33 UTC (rev 4162) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java 2013-11-21 13:16:13 UTC (rev 4163) @@ -48,6 +48,7 @@ public SemanticIndex(OWLOntology ontology) { this.ontology = ontology; + } /** Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/SentenceBasedContextExtractor.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/SentenceBasedContextExtractor.java 2013-11-21 13:00:33 UTC (rev 4162) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/SentenceBasedContextExtractor.java 2013-11-21 13:16:13 UTC (rev 4163) @@ -57,7 +57,7 @@ } index += s.length(); } - throw new RuntimeException("Token " + annotation.getToken() + " not found in text " + annotation.getReferencedDocument().getRawContent()); + throw new RuntimeException("Token " + annotation.getString() + " not found in text " + annotation.getReferencedDocument().getRawContent()); } private List<CoreMap> getSentences(String document) { Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/SimpleWordSenseDisambiguation.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/SimpleWordSenseDisambiguation.java 2013-11-21 13:00:33 UTC (rev 4162) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/SimpleWordSenseDisambiguation.java 2013-11-21 13:16:13 UTC (rev 4163) @@ -50,7 +50,7 @@ public SemanticAnnotation disambiguate(Annotation annotation, Set<Entity> candidateEntities) { logger.debug("Linguistic annotations:\n" + annotation); logger.debug("Candidate entities:" + candidateEntities); - String token = annotation.getToken().trim(); + String token = annotation.getString().trim(); //check if annotation token matches label of entity or the part behind #(resp. /) for (Entity entity : candidateEntities) { Set<String> labels = getLabels(entity); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |