From: <dfl...@us...> - 2013-12-02 12:51:33
|
Revision: 4177 http://sourceforge.net/p/dl-learner/code/4177 Author: dfleischhacker Date: 2013-12-02 12:51:30 +0000 (Mon, 02 Dec 2013) Log Message: ----------- Add TokenTree class Added Paths: ----------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java 2013-12-02 12:51:30 UTC (rev 4177) @@ -0,0 +1,111 @@ +package org.dllearner.algorithms.isle.index; + +import org.dllearner.core.owl.Entity; + +import java.util.*; + +/** + * Tree for finding longest matching Token sequence + * + * @author Daniel Fleischhacker + */ +public class TokenTree { + private HashMap<Token, TokenTree> children; + private Set<Entity> entities; + + public TokenTree() { + this.children = new HashMap<>(); + this.entities = new HashSet<>(); + } + + /** + * Adds all given entities to the end of the path resulting from the given tokens. + * + * @param tokens tokens to locate insertion point for entities + * @param entities entities to add + */ + public void add(List<Token> tokens, Set<Entity> entities) { + TokenTree curNode = this; + for (Token t : tokens) { + TokenTree nextNode = curNode.children.get(t); + if (nextNode == null) { + nextNode = new TokenTree(); + curNode.children.put(t, nextNode); + } + curNode = nextNode; + } + curNode.entities.addAll(entities); + } + + /** + * Adds the given entity to the tree. + * + * @param tokens tokens to locate insertion point for entities + * @param entity entity to add + */ + public void add(List<Token> tokens, Entity entity) { + add(tokens, Collections.singleton(entity)); + } + + /** + * Returns the set of entities located by the given list of tokens. + * + * @param tokens tokens to locate the information to get + * @return located set of entities or null if token sequence not contained in tree + */ + public Set<Entity> get(List<Token> tokens) { + TokenTree curNode = this; + for (Token t : tokens) { + TokenTree nextNode = curNode.children.get(t); + if (nextNode == null) { + return null; + } + curNode = nextNode; + } + return curNode.entities; + } + + /** + * Returns the list of tokens which are the longest match with entities assigned in this tree. + * + * @param tokens list of tokens to check for longest match + * @return list of tokens being the longest match, sublist of {@code tokens} anchored at the first token + */ + public List<Token> getLongestMatch(List<Token> tokens) { + List<Token> fallbackTokenList = new ArrayList<>(); + TokenTree curNode = this; + + for (Token t : tokens) { + TokenTree nextNode = curNode.children.get(t); + if (nextNode == null) { + return fallbackTokenList; + } + curNode = nextNode; + fallbackTokenList.add(t); + } + return fallbackTokenList; + } + + /** + * Returns the set of entities assigned to the longest matching token subsequence of the given token sequence. + * @param tokens token sequence to search for longest match + * @return set of entities assigned to the longest matching token subsequence of the given token sequence + */ + public Set<Entity> getEntitiesForLongestMatch(List<Token> tokens) { + TokenTree fallback = this.entities.isEmpty() ? null : this; + TokenTree curNode = this; + + for (Token t : tokens) { + TokenTree nextNode = curNode.children.get(t); + if (nextNode == null) { + return fallback == null ? null : fallback.entities; + } + curNode = nextNode; + if (!curNode.entities.isEmpty()) { + fallback = curNode; + } + } + + return fallback.entities; + } +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <dfl...@us...> - 2013-12-02 14:34:45
|
Revision: 4181 http://sourceforge.net/p/dl-learner/code/4181 Author: dfleischhacker Date: 2013-12-02 14:34:42 +0000 (Mon, 02 Dec 2013) Log Message: ----------- Add toString to TokenTree Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java 2013-12-02 14:30:18 UTC (rev 4180) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java 2013-12-02 14:34:42 UTC (rev 4181) @@ -1,11 +1,10 @@ package org.dllearner.algorithms.isle.index; +import com.google.common.base.Splitter; +import com.google.common.collect.Lists; import org.dllearner.core.owl.Entity; import org.dllearner.core.owl.NamedClass; -import com.google.common.base.Splitter; -import com.google.common.collect.Lists; - import java.util.*; /** @@ -112,7 +111,7 @@ return fallback.entities; } - + public static void main(String[] args) throws Exception { List<Token> tokens1 = Lists.newLinkedList(); for (String s : Splitter.on(" ").split("this is a token tree")) { @@ -127,5 +126,28 @@ TokenTree tree = new TokenTree(); tree.add(tokens1, new NamedClass("TokenTree")); tree.add(tokens2, new NamedClass("TokenizedTree")); - } + System.out.println(tree); + } + + + public String toString() { + return "TokenTree\n"+ toString(0); + } + + public String toString(int indent) { + StringBuilder indentStringBuilder = new StringBuilder(); + for (int i = 0; i < indent; i++) { + indentStringBuilder.append(" "); + } + String indentString = indentStringBuilder.toString(); + StringBuilder sb = new StringBuilder(); + for (Map.Entry<Token, TokenTree> e : children.entrySet()) { + sb.append(indentString).append(e.getKey().toString()); + sb.append("\n"); + sb.append(e.getValue().toString(indent + 1)); + } + return sb.toString(); + } + + } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <dfl...@us...> - 2013-12-02 14:41:24
|
Revision: 4182 http://sourceforge.net/p/dl-learner/code/4182 Author: dfleischhacker Date: 2013-12-02 14:41:21 +0000 (Mon, 02 Dec 2013) Log Message: ----------- Prevent possible NPE Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java 2013-12-02 14:34:42 UTC (rev 4181) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java 2013-12-02 14:41:21 UTC (rev 4182) @@ -109,7 +109,7 @@ } } - return fallback.entities; + return fallback == null ? Collections.<Entity>emptySet() : fallback.entities; } public static void main(String[] args) throws Exception { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <dfl...@us...> - 2013-12-10 09:56:58
|
Revision: 4201 http://sourceforge.net/p/dl-learner/code/4201 Author: dfleischhacker Date: 2013-12-10 09:56:55 +0000 (Tue, 10 Dec 2013) Log Message: ----------- Retrieve entities from all possible leaf nodes in the TokenTree Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java 2013-12-09 15:38:09 UTC (rev 4200) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java 2013-12-10 09:56:55 UTC (rev 4201) @@ -81,7 +81,7 @@ public Set<Entity> get(List<Token> tokens) { TokenTree curNode = this; for (Token t : tokens) { - TokenTree nextNode = curNode.children.get(t); + TokenTree nextNode = getNextTokenTree(curNode, t); if (nextNode == null) { return null; } @@ -90,6 +90,25 @@ return curNode.entities; } + public Set<Entity> getAllEntities(List<Token> tokens) { + HashSet<Entity> resEntities = new HashSet<>(); + getAllEntitiesRec(tokens, 0, this, resEntities); + return resEntities; + } + + public void getAllEntitiesRec(List<Token> tokens, int curPosition, TokenTree curTree, HashSet<Entity> resEntities) { + if (curPosition == tokens.size()) { + resEntities.addAll(curTree.entities); + return; + } + Token t = tokens.get(curPosition); + for (Map.Entry<Token, TokenTree> entry : curTree.children.entrySet()) { + if (t.equalsWithAlternativeForms(entry.getKey())) { + getAllEntitiesRec(tokens, curPosition + 1, entry.getValue(), resEntities); + } + } + } + /** * Returns the list of tokens which are the longest match with entities assigned in this tree. * @@ -148,7 +167,7 @@ } /** - * Returns the original token for the longest match + * Returns the original ontology tokens for the longest match */ public List<Token> getOriginalTokensForLongestMatch(List<Token> tokens) { TokenTree fallback = this.entities.isEmpty() ? null : this; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |