From: <lor...@us...> - 2013-09-04 13:58:46
|
Revision: 4058 http://sourceforge.net/p/dl-learner/code/4058 Author: lorenz_b Date: 2013-09-04 13:58:43 +0000 (Wed, 04 Sep 2013) Log Message: ----------- Added prefix trie from google. Modified Paths: -------------- trunk/components-core/pom.xml Added Paths: ----------- trunk/components-core/src/main/java/org/dllearner/utilities/datastructures/PrefixMap.java trunk/components-core/src/main/java/org/dllearner/utilities/datastructures/PrefixTrie.java Modified: trunk/components-core/pom.xml =================================================================== --- trunk/components-core/pom.xml 2013-09-04 13:21:22 UTC (rev 4057) +++ trunk/components-core/pom.xml 2013-09-04 13:58:43 UTC (rev 4058) @@ -308,6 +308,11 @@ <artifactId>jwnl</artifactId> <version>1.4.1.RC2</version> </dependency> + <dependency> + <groupId>com.google.collections</groupId> + <artifactId>google-collections</artifactId> + <version>1.0</version> + </dependency> </dependencies> <dependencyManagement> <dependencies> Added: trunk/components-core/src/main/java/org/dllearner/utilities/datastructures/PrefixMap.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/utilities/datastructures/PrefixMap.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/utilities/datastructures/PrefixMap.java 2013-09-04 13:58:43 UTC (rev 4058) @@ -0,0 +1,31 @@ +// Copyright 2006 Google Inc. All Rights Reserved. + +package org.dllearner.utilities.datastructures; + +/** + * Maps string prefixes to values. For example, if you {@code put("foo", 1)}, + * {@code get("foobar")} returns {@code 1}. Prohibits null values. + * + * <p>Use instead of iterating over a series of string prefixes calling + * {@code String.startsWith(prefix)}. + * + * @author cra...@go... (Bob Lee) + */ +public interface PrefixMap<T> { + /** + * Maps prefix to value. + * + * @return The previous value stored for this prefix, or null if none. + * @throws IllegalArgumentException if prefix is an empty string. + */ + T put(CharSequence prefix, T value); + + /** + * Finds a prefix that matches {@code s} and returns the mapped value. + * + * If multiple prefixes in the map match {@code s}, the longest match wins. + * + * @return value for prefix matching {@code s} or {@code null} if none match. + */ + T get(CharSequence s); +} \ No newline at end of file Added: trunk/components-core/src/main/java/org/dllearner/utilities/datastructures/PrefixTrie.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/utilities/datastructures/PrefixTrie.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/utilities/datastructures/PrefixTrie.java 2013-09-04 13:58:43 UTC (rev 4058) @@ -0,0 +1,154 @@ +// Copyright 2006 Google Inc. All Rights Reserved. + +package org.dllearner.utilities.datastructures; + +import java.util.Map; + +/** + * Trie implementation supporting CharSequences as prefixes. + * + * Prefixes are sequences of characters, and the set of allowed characters is + * specified as a range of sequential characters. By default, any seven-bit + * character may appear in a prefix, and so the trie is a 128-ary tree. + * + * @author cra...@go... (Bob Lee) + * @author mh...@go... (Matthew Harris) + */ +public class PrefixTrie<T> implements PrefixMap<T> { + // The set of allowed characters in prefixes is given by a range of + // consecutive characters. rangeOffset denotes the beginning of the range, + // and rangeSize gives the number of characters in the range, which is used as + // the number of children of each node. + private final char rangeOffset; + private final int rangeSize; + + private final Node<T> root; + + /** + * Constructs a trie for holding strings of seven-bit characters. + */ + public PrefixTrie() { + rangeOffset = '\0'; + rangeSize = 128; + root = new Node<T>(rangeSize); + } + + /** + * Constructs a trie for holding strings of characters. + * + * The set of characters allowed in prefixes is given by the range + * [rangeOffset, lastCharInRange], inclusive. + * + * @param firstCharInRange + * @param lastCharInRange + */ + public PrefixTrie(char firstCharInRange, char lastCharInRange) { + this.rangeOffset = firstCharInRange; + this.rangeSize = lastCharInRange - firstCharInRange + 1; + + if (rangeSize <= 0) { + throw new IllegalArgumentException("Char range must include some chars"); + } + + root = new Node<T>(rangeSize); + } + + /** + * {@inheritDoc} + * + * @throws IllegalArgumentException if prefix contains a character outside the + * range of legal prefix characters. + */ + public T put(CharSequence prefix, T value) { + if (value == null) { + throw new NullPointerException(); + } + + Node<T> current = root; + for (int i = 0; i < prefix.length(); i++) { + int nodeIndex = prefix.charAt(i) - rangeOffset; + try { + Node<T> next = current.next[nodeIndex]; + if (next == null) { + next = current.next[nodeIndex] = new Node<T>(rangeSize); + } + current = next; + } catch (ArrayIndexOutOfBoundsException e) { + throw new IllegalArgumentException( + "'" + prefix.charAt(i) + "' is not a legal prefix character."); + } + } + T oldValue = current.value; + current.value = value; + return oldValue; + } + + /** {@inheritDoc} */ + public T get(CharSequence s) { + Node<T> deepestWithValue = root; + Node<T> current = root; + for (int i = 0; i < s.length(); i++) { + int nodeIndex = s.charAt(i) - rangeOffset; + if (nodeIndex < 0 || rangeSize <= nodeIndex) { + return null; + } + current = current.next[nodeIndex]; + if (current == null) { + break; + } + if (current.value != null) { + deepestWithValue = current; + } + } + return deepestWithValue.value; + } + + /** + * Returns a Map containing the same data as this structure. + * + * This implementation constructs and populates an entirely new map rather + * than providing a map view on the trie, so this is mostly useful for + * debugging. + * + * @return A Map mapping each prefix to its corresponding value. + */ + public Map<String, T> toMap() { + Map<String, T> map = com.google.common.collect.Maps.newLinkedHashMap(); + addEntries(root, new StringBuilder(), map); + return map; + } + + /** + * Adds to the given map all entries at or below the given node. + * + * @param node + * @param builder A StringBuilder containing the prefix for the given node. + * @param map + */ + private void addEntries(Node<T> node, + StringBuilder builder, + Map<String, T> map) { + if (node.value != null) { + map.put(builder.toString(), node.value); + } + + for (int i = 0; i < node.next.length; i++) { + Node<T> next = node.next[i]; + if (next != null) { + builder.append((char) (i + rangeOffset)); + addEntries(next, builder, map); + builder.deleteCharAt(builder.length() - 1); + } + } + } + + private static class Node<T> { + T value; + final Node<T>[] next; + + @SuppressWarnings("unchecked") + Node(int numChildren) { + next = new Node[numChildren]; + } + } +} \ No newline at end of file This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |