From: <lor...@us...> - 2013-12-02 15:43:51
|
Revision: 4188 http://sourceforge.net/p/dl-learner/code/4188 Author: lorenz_b Date: 2013-12-02 15:43:48 +0000 (Mon, 02 Dec 2013) Log Message: ----------- Refactoring ISLE. Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieLinguisticAnnotator.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java 2013-12-02 15:22:04 UTC (rev 4187) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java 2013-12-02 15:43:48 UTC (rev 4188) @@ -160,18 +160,7 @@ } public String toString() { - StringBuilder output = new StringBuilder(); - Map<String,FullTokenEntitySetPair> trieMap = trie.toMap(); - - for (Entry<String, FullTokenEntitySetPair> entry : trieMap.entrySet()) { - String key = entry.getKey(); - FullTokenEntitySetPair pair = entry.getValue(); - output.append(key + " (" + pair.getFullToken() + ") :\n"); - for (Entity candidate: pair.getEntitySet()) { - output.append("\t"+candidate+"\n"); - } - } - return output.toString(); + return tree.toString(); } public static void main(String[] args) { Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java 2013-12-02 15:22:04 UTC (rev 4187) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java 2013-12-02 15:43:48 UTC (rev 4188) @@ -16,12 +16,21 @@ private HashMap<Token, TokenTree> children; private Set<Entity> entities; private List<Token> originalTokens; + private boolean ignoreStopWords = true; public TokenTree() { this.children = new HashMap<>(); this.entities = new HashSet<>(); this.originalTokens = new ArrayList<>(); } + + /** + * If set to TRUE, stopwords like 'of, on' are ignored during creation and retrieval operations. + * @param ignoreStopWords the ignoreStopWords to set + */ + public void setIgnoreStopWords(boolean ignoreStopWords) { + this.ignoreStopWords = ignoreStopWords; + } /** * Adds all given entities to the end of the path resulting from the given tokens. @@ -32,12 +41,16 @@ public void add(List<Token> tokens, Set<Entity> entities, List<Token> originalTokens) { TokenTree curNode = this; for (Token t : tokens) { - TokenTree nextNode = curNode.children.get(t); - if (nextNode == null) { - nextNode = new TokenTree(); - curNode.children.put(t, nextNode); - } - curNode = nextNode; + if(!ignoreStopWords || (ignoreStopWords && !t.isStopWord())){ + TokenTree nextNode = curNode.children.get(t); + if (nextNode == null) { + nextNode = new TokenTree(); + curNode.children.put(t, nextNode); + } + curNode = nextNode; + } else { + System.out.println("ignored " + t); + } } curNode.entities.addAll(entities); curNode.originalTokens = new ArrayList<>(originalTokens); Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieLinguisticAnnotator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieLinguisticAnnotator.java 2013-12-02 15:22:04 UTC (rev 4187) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieLinguisticAnnotator.java 2013-12-02 15:43:48 UTC (rev 4188) @@ -27,14 +27,14 @@ @Override public Set<Annotation> annotate(TextDocument document) { Set<Annotation> annotations = new HashSet<Annotation>(); - NormalizedTextMapper mapper = new NormalizedTextMapper(document); - String content = mapper.getNormalizedText(); List<Token> matchedTokens; for (Token token : document) { matchedTokens = candidatesTrie.getLongestMatchingText(document.getTokensStartingAtToken(token, true)); - Annotation annotation = new Annotation(document, matchedTokens); - annotations.add(annotation); + if(matchedTokens != null && !matchedTokens.isEmpty()){ + Annotation annotation = new Annotation(document, matchedTokens); + annotations.add(annotation); + } } return annotations; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |