From: <dfl...@us...> - 2013-12-09 15:37:42
|
Revision: 4199 http://sourceforge.net/p/dl-learner/code/4199 Author: dfleischhacker Date: 2013-12-09 15:37:39 +0000 (Mon, 09 Dec 2013) Log Message: ----------- Cleanup and show alternative names Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Token.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java Removed Paths: ------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/FullTokenEntitySetPair.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/NormalizedTextMapper.java Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/FullTokenEntitySetPair.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/FullTokenEntitySetPair.java 2013-12-09 15:36:38 UTC (rev 4198) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/FullTokenEntitySetPair.java 2013-12-09 15:37:39 UTC (rev 4199) @@ -1,31 +0,0 @@ -package org.dllearner.algorithms.isle.index; - -import org.dllearner.core.owl.Entity; - -import java.util.HashSet; -import java.util.Set; - -/** - * A pair consisting of a full string token and the corresponding entities - */ -public class FullTokenEntitySetPair { - private String fullToken; - private Set<Entity> entitySet; - - public FullTokenEntitySetPair(String fullToken) { - this.fullToken = fullToken; - this.entitySet = new HashSet<Entity>(); - } - - public String getFullToken() { - return fullToken; - } - - public Set<Entity> getEntitySet() { - return entitySet; - } - - public void addEntity(Entity entity) { - entitySet.add(entity); - } -} Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/NormalizedTextMapper.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/NormalizedTextMapper.java 2013-12-09 15:36:38 UTC (rev 4198) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/NormalizedTextMapper.java 2013-12-09 15:37:39 UTC (rev 4199) @@ -1,141 +0,0 @@ -package org.dllearner.algorithms.isle.index; - -import java.util.ArrayList; - -/** - * Provides text normalization and mapping of normalized ranges to the original ones. - */ -public class NormalizedTextMapper { - private Document originalDocument; - private String originalText; - private String normalizedText; - - private ArrayList<OccurenceMappingPair> normalizedIndexToOriginalIndex; - - public NormalizedTextMapper(Document original) { - this.originalDocument = original; - this.originalText = original.getContent(); - this.normalizedIndexToOriginalIndex = new ArrayList<OccurenceMappingPair>(); - - StringBuilder sb = new StringBuilder(); - int currentOriginalIndex = 0; - for (String originalWord : originalText.split(" ")) { - String normalizedWord = getNormalizedWord(originalWord); - normalizedIndexToOriginalIndex - .add(new OccurenceMappingPair(currentOriginalIndex, originalWord.length(), sb.length(), - normalizedWord.length())); - currentOriginalIndex += originalWord.length() + 1; - sb.append(normalizedWord); - sb.append(" "); - } - normalizedText = sb.toString(); - } - - public String getOriginalText() { - return originalText; - } - - public String getNormalizedText() { - return normalizedText; - } - - /** - * Returns the annotation for the original text matching the given position and length in the normalized - * text. - * - * @param position position in the normalized text to get annotation for - * @param length length of the text to get annotation for - * @return - */ - public Annotation getOriginalAnnotationForPosition(int position, int length) { - int curNormalizedLength = 0; - int originalStart = -1; - int curOriginalLength = 0; - - for (OccurenceMappingPair p : normalizedIndexToOriginalIndex) { - if (p.getNormalizedIndex() == position) { - originalStart = p.getOriginalIndex(); - } - if (originalStart != -1) { - curNormalizedLength += p.getNormalizedLength(); - curOriginalLength += p.getOriginalLength(); - if (curNormalizedLength >= length) { - //TODO refactoring -// return new Annotation(originalDocument, originalStart, curOriginalLength); - } - - // include space - curNormalizedLength += 1; - curOriginalLength += 1; - } - } - - return null; - } - - /** - * Returns the normalized form of the given word. Word must not contain any spaces or the like. - * @param word - * @return - */ - private String getNormalizedWord(String word) { - return LinguisticUtil.getInstance().getNormalizedForm(word); - } - - public static void main(String[] args) { -// NormalizedTextMapper n = new NormalizedTextMapper(new TextDocument("This is a testing text using letters")); -// System.out.println(n.getOriginalText()); -// System.out.println(n.getNormalizedText()); -// for (OccurenceMappingPair p : n.normalizedIndexToOriginalIndex) { -// System.out.println(p); -// } -// System.out.println(n.getOriginalAnnotationForPosition(7,6)); -// System.out.println(n.getOriginalAnnotationForPosition(23,6)); -// System.out.println(n.getOriginalAnnotationForPosition(7,1)); -// System.out.println(n.getOriginalAnnotationForPosition(14,15)); - } - - /** - * Maps words identified by index and length in the normalized texts to the original word. - */ - private class OccurenceMappingPair { - private int originalIndex; - private int originalLength; - private int normalizedIndex; - private int normalizedLength; - - private OccurenceMappingPair(int originalIndex, int originalLength, int normalizedIndex, int normalizedLength) { - - this.originalIndex = originalIndex; - this.originalLength = originalLength; - this.normalizedIndex = normalizedIndex; - this.normalizedLength = normalizedLength; - } - - private int getNormalizedIndex() { - return normalizedIndex; - } - - private int getNormalizedLength() { - return normalizedLength; - } - - private int getOriginalLength() { - return originalLength; - } - - private int getOriginalIndex() { - return originalIndex; - } - - @Override - public String toString() { - return "OccurenceMappingPair{" + - "originalIndex=" + originalIndex + - ", originalLength=" + originalLength + - ", normalizedIndex=" + normalizedIndex + - ", normalizedLength=" + normalizedLength + - '}'; - } - } -} Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java 2013-12-09 15:36:38 UTC (rev 4198) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java 2013-12-09 15:37:39 UTC (rev 4199) @@ -91,6 +91,10 @@ String[] synonyms = LinguisticUtil.getInstance().getSynonymsForWord(t.getRawForm(), wordnetPos); for (String synonym : synonyms) { + // ignore all multi word synonyms + if (synonym.contains("_")) { + continue; + } t.addAlternativeForm(LinguisticUtil.getInstance().getNormalizedForm(synonym)); } } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Token.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Token.java 2013-12-09 15:36:38 UTC (rev 4198) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Token.java 2013-12-09 15:37:39 UTC (rev 4199) @@ -138,7 +138,7 @@ */ @Override public String toString() { - return "[Word: " + rawForm + " | Stemmed word: " + stemmedForm + " | POS tag: " + posTag + "]"; + return "[Word: " + rawForm + " | Stemmed word: " + stemmedForm + " | POS tag: " + posTag + " | Alternatives: " + alternativeForms.toString() + "]"; } /** Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java 2013-12-09 15:36:38 UTC (rev 4198) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java 2013-12-09 15:37:39 UTC (rev 4199) @@ -1,15 +1,15 @@ package org.dllearner.algorithms.isle.index; import com.google.common.collect.Lists; -import com.google.common.collect.Sets; - import org.dllearner.algorithms.isle.EntityCandidateGenerator; import org.dllearner.algorithms.isle.StopWordFilter; import org.dllearner.core.owl.Entity; import org.semanticweb.owlapi.model.OWLOntology; -import java.util.*; -import java.util.regex.Pattern; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Set; /** * Generates candidates using a entity candidates prefix trie @@ -34,7 +34,6 @@ /** * Postprocess the annotations generated by annotate * The objective is to merge annotations which are likely to belong to the same entity - * @param annotations : set of annotations * @param window : maximum distance between the annotations * @return */ This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |