[FOray-commit] SF.net SVN: foray:[10687] trunk/foray
Modular XSL-FO Implementation for Java.
Status: Alpha
Brought to you by:
victormote
|
From: <vic...@us...> - 2009-03-15 18:22:16
|
Revision: 10687
http://foray.svn.sourceforge.net/foray/?rev=10687&view=rev
Author: victormote
Date: 2009-03-15 18:21:21 +0000 (Sun, 15 Mar 2009)
Log Message:
-----------
Move the TernaryTreeMap class from the hyphenation module to the common module, where it can be more widely used.
Modified Paths:
--------------
trunk/foray/foray-common/src/javatest/org/foray/common/TestFOrayCommon.java
trunk/foray/foray-hyphen/src/java/org/foray/hyphen/HyphenationTree.java
trunk/foray/foray-hyphen/src/javatest/org/foray/hyphen/TestFOrayHyphen.java
Added Paths:
-----------
trunk/foray/foray-common/src/java/org/foray/common/TernaryTreeMap.java
trunk/foray/foray-common/src/javatest/org/foray/common/TestTernaryTree.java
Removed Paths:
-------------
trunk/foray/foray-hyphen/src/java/org/foray/hyphen/TernaryTreeMap.java
trunk/foray/foray-hyphen/src/javatest/org/foray/hyphen/TestTernaryTree.java
Copied: trunk/foray/foray-common/src/java/org/foray/common/TernaryTreeMap.java (from rev 10686, trunk/foray/foray-hyphen/src/java/org/foray/hyphen/TernaryTreeMap.java)
===================================================================
--- trunk/foray/foray-common/src/java/org/foray/common/TernaryTreeMap.java (rev 0)
+++ trunk/foray/foray-common/src/java/org/foray/common/TernaryTreeMap.java 2009-03-15 18:21:21 UTC (rev 10687)
@@ -0,0 +1,797 @@
+/*
+ * Copyright 2004 The FOray Project.
+ * http://www.foray.org
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * This work is in part derived from the following work(s), used with the
+ * permission of the licensor:
+ * Apache FOP, licensed by the Apache Software Foundation
+ *
+ */
+
+/*
+ * $LastChangedRevision$
+ * $LastChangedDate$
+ * $LastChangedBy$
+ */
+
+/*
+ * Known Constributors:
+ * Carlos Villegas <ca...@un...> (Original author)
+ */
+
+package org.foray.common;
+
+
+import java.io.Serializable;
+import java.util.Enumeration;
+import java.util.Stack;
+
+/**
+ * <p>A map whose key is always a {@link CharSequence} (usually a {@link String}), and whose value
+ * is a char, usually used to store an index into some other data structure.
+ * This class is intended to serve as a base class or helper class for natural-language dictionaries
+ * and similar data structures.</p>
+ *
+ * <p>A ternary search tree is a hybrid between a binary tree and a digital search tree (trie).
+ * Ternary trees have some nice properties, including the following:
+ * <ul>
+ * <li>The tree can be traversed in sorted order.</li>
+ * <li>Partial matches (wildcards) can be implemented.</li>
+ * <li>Retrieval of all keys within a given distance from the target is possible.</li>
+ * <li>The storage requirements are higher than a binary tree but a lot less than a trie.</li>
+ * <li>Performance is comparable with a hash table.
+ * Sometimes it outperforms a hash function, and can usually determine a miss faster than a
+ * hash.</li>
+ * </ul>
+ * </p>
+ *
+ * <p>In this implementation the value is a char (think "unsigned short"), and is stored in the leaf
+ * nodes of the tree.
+ * This limits the number of nodes to 65,536.
+ * Branches that contain only one key are compressed to one node by storing a pointer to the trailer
+ * substring of the key.
+ * A compressed branch needs only one node per key plus the size of the string key.
+ * Compressed branches are decompressed as needed when another key with same prefix is inserted.
+ * This saves a lot of space, especially for long keys.</p>
+ *
+ * <p>This class could be extended to or embedded in a general map that would allow arbitrary
+ * objects to be values in the map.
+ * One scheme for doing so would be to create an array of the value Objects, and use this class to
+ * store the indexes to that array.</p>
+ */
+
+public class TernaryTreeMap implements Cloneable, Serializable {
+ /* TODO: After all other API-related to-do items are cleaned up, consider moving this class
+ * to Common module. */
+
+ /** Allocation size for arrays. */
+ private static final int BLOCK_SIZE = 2048;
+
+ /** Constant used for serialization. */
+ private static final long serialVersionUID = 240904407206584695L;
+
+ /*
+ * We use four arrays of equal size to represent the nodes in this tree.
+ * A given node /n/ is represented by the values in the arrays lo, hi, eq, and sc.
+ * It is tempting to group these four arrays into a private inner class, and the original author
+ * of this class left documentation kind of apologizing for not doing so.
+ * However, experimentation shows that the extra object references created when doing so more
+ * than doubles the size of the serialized instance, and therefore presumably does approximately
+ * the same to memory consumption.
+ * Since a light memory footprint is one of the main design considerations for this class, we
+ * will use the original parallel arrays instead.
+ */
+
+ /**
+ * If {@link #keyChar} is a valid Unicode character, this array contains the index to the node
+ * containing the "low" branch, that is the branch that should be traversed to find a char
+ * that is less than {@link #keyChar}.
+ * If {@link #keyChar} is equal to U+FFFF, the compressed branch indicator, then this array
+ * instead contains the index into {@link #compressedKeys} that contains the first char in the
+ * compressed portion of the key.
+ */
+ private char[] lo;
+
+ /**
+ * The index to the node containing the "hi" branch, that is the branch that should be traversed
+ * to find a char that is greater than {@link #keyChar}.
+ */
+ private char[] hi;
+
+ /**
+ * If {@link #keyChar} is a valid Unicode character, this array contains the index to the node
+ * containing the "equal" branch, that is the branch that should be traversed to find a char
+ * that is equal to {@link #keyChar}.
+ * If {@link #keyChar} is equal to U+0000, the string terminator, then this array instead
+ * contains value for this map element.
+ */
+ private char[] eq;
+
+ /**
+ * <P>The character, which is part of the key, stored in this node.
+ * Two special values are reserved:</P>
+ * <ul>
+ * <li>0x0000 is the string terminator. If the node contains this value, then the search is
+ * done and the value in {@link #eq} should be returned.</li>
+ * <li>0xFFFF to indicate that the branch starting at this node is compressed.
+ * This shouldn't be a problem if we give the usual semantics to strings since 0xFFFF is
+ * guaranteed not to be a valid Unicode character.</li>
+ * </ul>
+ */
+ private char[] keyChar;
+
+ /** This vector holds the compressed portion of a key when the branch is compressed.
+ * It is simply a series of word fragments, each followed by a
+ * {@link StringUtil#NULL_TERMINATOR}.
+ * The index to the start of the compressed portion of the key for a given node in the tree is
+ * found in the {@link #lo} value for the node. */
+ private CharVector compressedKeys;
+
+ /** The index of the root of the current branch?? */
+ private char root;
+
+ /** The index to the next available node. */
+ private char freenode;
+
+ /** The number of key-value pairs mapped in the tree.
+ * This is not necessarily the same as the number of nodes in the tree, because of branch
+ * compression, and because it frequently takes more than one node to store the chars in a
+ * key. */
+ private int length;
+
+ /**
+ * Constructor.
+ */
+ public TernaryTreeMap() {
+ init();
+ }
+
+ /**
+ * Initializes the basic data structures of the tree.
+ */
+ private void init() {
+ this.root = 0;
+ this.freenode = 1;
+ this.length = 0;
+ this.lo = new char[TernaryTreeMap.BLOCK_SIZE];
+ this.hi = new char[TernaryTreeMap.BLOCK_SIZE];
+ this.eq = new char[TernaryTreeMap.BLOCK_SIZE];
+ this.keyChar = new char[TernaryTreeMap.BLOCK_SIZE];
+ this.compressedKeys = new CharVector();
+ }
+
+ /**
+ * Put part or all of one char[] in the map.
+ * @param key The key to be added.
+ * @param value The value.
+ */
+ public void put(final char[] key, final char value) {
+ put(key, 0, value);
+ }
+
+ /**
+ * Put part or all of one char[] in the map.
+ * @param key The key to be added.
+ * @param start The index to the first char in <code>key</code> to be added.
+ * @param value The value.
+ */
+ public void put(final char[] key, final int start, final char value) {
+ put(key, start, key.length, value);
+ }
+
+ /**
+ * Put part or all of one char[] in the map.
+ * @param key The key to be added.
+ * @param start The index to the first char in <code>key</code> to be added.
+ * @param end The index to the first char in <code>key</code>, after <code>start</code> that should NOT be added.
+ * @param value The value.
+ */
+ public void put(final char[] key, final int start, final int end, final char value) {
+ final String keyString = new String(key, start, end - start);
+ put(keyString, value);
+ }
+
+ /**
+ * Put one String or other CharSequence in the map.
+ * @param key The key to be added.
+ * @param value The value.
+ */
+ public void put(final CharSequence key, final char value) {
+ this.put(key, 0, value);
+ }
+
+ /**
+ * Put one String or other CharSequence in the map.
+ * @param key The key to be added.
+ * @param start The index to the first char in <code>key</code> to be added.
+ * @param value The value.
+ */
+ public void put(final CharSequence key, final int start, final char value) {
+ put(key, start, key.length(), value);
+ }
+
+ /**
+ * Put one String or other CharSequence in the map.
+ * @param key The key to be added.
+ * @param start The index to the first char in <code>key</code> to be added.
+ * @param end The index to the first char in <code>key</code>, after <code>start</code> that should NOT be added.
+ * @param value The value.
+ */
+ public void put(final CharSequence key, final int start, final int end, final char value) {
+ ensureCapacity(this.getNodeCount() + end - start + 1);
+ this.root = put(this.root, key, start, end, value);
+ }
+
+ /**
+ * The actual insertion function, which is recursive.
+ * @param p The initial root value.
+ * @param key The key to be added.
+ * @param start The index to the first char in <code>key</code> to be added.
+ * @param end The index to the first char in <code>key</code>, after <code>start</code> that should NOT be added.
+ * @param value The value.
+ * @return The updated root value.
+ */
+ private char put(final char p, final CharSequence key, final int start, final int end, final char value) {
+ char pToUse = p;
+ final int len = end - start;
+ if (pToUse == 0) {
+ // this means there is no branch, this node will start a new branch.
+ // Instead of doing that, we store the key somewhere else and create
+ // only one node with a pointer to the key
+ pToUse = this.freenode++;
+ /* Holds data. */
+ this.eq[pToUse] = value;
+ this.length++;
+ this.hi[pToUse] = 0;
+ if (len > 0) {
+ /* Indicates branch is compressed */
+ this.keyChar[pToUse] = Character.MAX_VALUE;
+ /* Use "lo" to hold pointer to key. */
+ this.lo[pToUse] = (char) this.compressedKeys.add(key, start, len);
+ this.compressedKeys.add(StringUtil.NULL_TERMINATOR);
+ } else {
+ this.keyChar[pToUse] = 0;
+ this.lo[pToUse] = 0;
+ }
+ return pToUse;
+ }
+
+ if (this.keyChar[pToUse] == Character.MAX_VALUE) {
+ final char pp = this.freenode++;
+ this.lo[pp] = this.lo[pToUse];
+ this.eq[pp] = this.eq[pToUse];
+ this.lo[pToUse] = 0;
+ if (len > 0) {
+ this.keyChar[pToUse] = this.compressedKeys.get(this.lo[pp]);
+ this.eq[pToUse] = pp;
+ this.lo[pp]++;
+ if (this.compressedKeys.get(this.lo[pp]) == 0) {
+ this.lo[pp] = 0;
+ this.keyChar[pp] = 0;
+ this.hi[pp] = 0;
+ } else {
+ this.keyChar[pp] = Character.MAX_VALUE;
+ }
+ } else {
+ this.keyChar[pp] = Character.MAX_VALUE;
+ this.hi[pToUse] = pp;
+ this.keyChar[pToUse] = 0;
+ this.eq[pToUse] = value;
+ this.length++;
+ return pToUse;
+ }
+ }
+ final char s = key.charAt(start);
+ if (s < this.keyChar[pToUse]) {
+ this.lo[pToUse] = put(this.lo[pToUse], key, start, end, value);
+ } else if (s == this.keyChar[pToUse]) {
+ if (s != 0) {
+ this.eq[pToUse] = put(this.eq[pToUse], key, start + 1, end, value);
+ } else {
+ // key already in tree, overwrite data
+ this.eq[pToUse] = value;
+ }
+
+ } else {
+ this.hi[pToUse] = put(this.hi[pToUse], key, start, end, value);
+ }
+ return pToUse;
+ }
+
+ /**
+ * Returns the mapped "value" for a given char array.
+ * @param key The key whose value is needed.
+ * @return The mapped value.
+ */
+ public int get(final char[] key) {
+ return get(key, 0);
+ }
+
+ /**
+ * Returns the mapped "value" for a given char array.
+ * @param key The key whose value is needed.
+ * @param start The index to the first element in <code>key</code> which should be considered part of the key.
+ * @return The mapped value.
+ */
+ public int get(final char[] key, final int start) {
+ return get(key, start, key.length);
+ }
+
+ /**
+ * Returns the mapped "value" for a given char array.
+ * @param key The key whose value is needed.
+ * @param start The index to the first element in <code>key</code> which should be considered part of the key.
+ * @param end The index to the first element in <code>key</code>, after <code>start</code>, which should NOT be
+ * considered part of the key.
+ * @return The mapped value, if <code>key</code> is in the map. Otherwise, returns -1;
+ */
+ public int get(final char[] key, final int start, final int end) {
+ final int length = end - start;
+ final String keyString = new String(key, start, length);
+ return get(keyString);
+ }
+
+ /**
+ * Returns the mapped "value" for a given String or other CharSequence key.
+ * @param key The key whose value is needed.
+ * @return The mapped value.
+ */
+ public int get(final CharSequence key) {
+ return get(key, 0);
+ }
+
+ /**
+ * Returns the mapped "value" for a given String or other CharSequence key.
+ * @param key The key whose value is needed.
+ * @param start The index to the first element in <code>key</code> which should be considered part of the key.
+ * @return The mapped value.
+ */
+ public int get(final CharSequence key, final int start) {
+ return get(key, start, key.length());
+ }
+
+ /**
+ * Returns the mapped "value" for a given String or other CharSequence key.
+ * @param key The key whose value is needed.
+ * @param start The index to the first element in <code>key</code> which should be considered part of the key.
+ * @param end The index to the first element in <code>key</code>, after <code>start</code>, which should NOT be
+ * considered part of the key.
+ * @return The mapped value, if <code>key</code> is in the map. Otherwise, returns -1;
+ */
+ public int get(final CharSequence key, final int start, final int end) {
+ int d;
+ char p = this.root;
+ int i = start;
+ char c;
+
+ while (p != 0) {
+ if (this.keyChar[p] == Character.MAX_VALUE) {
+ if (this.compressedKeys.compareNullTerminated(this.lo[p], key, i, end) == 0) {
+ return this.eq[p];
+ }
+ return -1;
+ }
+ if (i >= end) {
+ /* If we are past the end of the key, then pretend like we have a
+ * null-terminator. */
+ c = 0;
+ } else {
+ c = key.charAt(i);
+ }
+ d = c - this.keyChar[p];
+ if (d == 0) {
+ if (c == 0) {
+ return this.eq[p];
+ }
+ i++;
+ p = this.eq[p];
+ } else if (d < 0) {
+ p = this.lo[p];
+ } else {
+ p = this.hi[p];
+ }
+ }
+ return -1;
+ }
+
+ /**
+ * Ensures that the internal data structures have enough nodes to handle a given number of nodes.
+ * @param newCapacity The new number of nodes that needs to be ensured.
+ */
+ private void ensureCapacity(final int newCapacity) {
+ if (newCapacity > this.eq.length) {
+ redimNodeArrays(this.eq.length + TernaryTreeMap.BLOCK_SIZE);
+ }
+ }
+
+ /**
+ * Resizes the arrays.
+ * @param newsize The new number of nodes in the tree.
+ */
+ private void redimNodeArrays(final int newsize) {
+ int len = 0;
+ if (newsize < this.lo.length) {
+ len = newsize;
+ } else {
+ len = this.lo.length;
+ }
+ char[] na = new char[newsize];
+ System.arraycopy(this.lo, 0, na, 0, len);
+ this.lo = na;
+ na = new char[newsize];
+ System.arraycopy(this.hi, 0, na, 0, len);
+ this.hi = na;
+ na = new char[newsize];
+ System.arraycopy(this.eq, 0, na, 0, len);
+ this.eq = na;
+ na = new char[newsize];
+ System.arraycopy(this.keyChar, 0, na, 0, len);
+ this.keyChar = na;
+ }
+
+ /**
+ * Returns the number of key-value pairs in this map.
+ * This can be different from the number of nodes in the tree because of the compressed
+ * branches.
+ * @return The number of key-value pairs in this map.
+ */
+ public int size() {
+ return this.length;
+ }
+
+ /**
+ * Returns the number of nodes in the tree.
+ * @return The number of nodes in the tree.
+ */
+ int getNodeCount() {
+ return this.freenode;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public TernaryTreeMap clone() {
+ final TernaryTreeMap t = new TernaryTreeMap();
+ t.lo = this.lo.clone();
+ t.hi = this.hi.clone();
+ t.eq = this.eq.clone();
+ t.keyChar = this.keyChar.clone();
+ t.compressedKeys = this.compressedKeys.clone();
+ t.root = this.root;
+ t.freenode = this.freenode;
+ t.length = this.length;
+
+ return t;
+ }
+
+ /**
+ * Recursively insert the median first and then the median of the
+ * lower and upper halves, and so on in order to get a balanced
+ * tree. The array of keys is assumed to be sorted in ascending
+ * order.
+ * @param keys The array of keys to be processed.
+ * @param values The array of values to be processed.
+ * @param offset The offset from the computed median at which the item
+ * should be inserted.
+ * @param upperBound The highest index which should be used in the new
+ * median computation.
+ */
+ private void insertBalanced(final String[] keys, final char[] values,
+ final int offset, final int upperBound) {
+ if (upperBound < 1) {
+ return;
+ }
+
+ /* Compute the median index as 1/2 of the upperBound. */
+ final int median = upperBound >> 1;
+
+ /* Insert the median key into the tree. */
+ put(keys[median + offset], values[median + offset]);
+
+ /* Recursively insert into the lower half of the branch. */
+ insertBalanced(keys, values, offset, median);
+
+ /* Recursively insert into the upper half of the branch. */
+ insertBalanced(keys, values, offset + median + 1,
+ upperBound - median - 1);
+ }
+
+ /**
+ * Balance the tree for best search performance.
+ */
+ private void balance() {
+ int i = 0;
+ final int n = this.length;
+ final String[] k = new String[n];
+ final char[] v = new char[n];
+ final Iterator iter = new Iterator();
+ while (iter.hasMoreElements()) {
+ v[i] = iter.getValue();
+ k[i++] = iter.nextElement();
+ }
+ init();
+ insertBalanced(k, v, 0, n);
+ // With uniform letter distribution sc[root] should be around 'm'
+ }
+
+ /**
+ * Allows the map to internally optimize itself for faster performance and possible memory
+ * reduction.
+ * This method should be run only once, immediately after the tree is completely built.
+ */
+ public void optimize() {
+ /* Each node stores a character which is part of
+ * some key(s). In a compressed branch (one that only contain
+ * a single string key) the trailer of the key which is not
+ * already in nodes is stored externally in the kv array.
+ * As items are inserted, key substrings decrease.
+ * Some substrings may completely disappear when the whole
+ * branch is totally decompressed.
+ * The tree is traversed to find the key substrings actually
+ * used. In addition, duplicate substrings are removed using
+ * a map (implemented with a TernaryTree!).
+ */
+
+ /* First balance the tree for best performance. */
+ balance();
+
+ /* Redimension the node arrays. */
+ redimNodeArrays(this.freenode);
+
+ // ok, compact kv array
+ final CharVector kx = new CharVector();
+ final TernaryTreeMap map = new TernaryTreeMap();
+ compact(kx, map, this.root);
+ this.compressedKeys = kx;
+ this.compressedKeys.trimToSize();
+ }
+
+ /**
+ * Compacts the trailing key vector.
+ * @param kx The new trailing key vector.
+ * @param map The map being used to look for duplicates.
+ * @param p The index to the node being tested.
+ */
+ private void compact(final CharVector kx, final TernaryTreeMap map,
+ final char p) {
+ int k;
+ if (p == 0) {
+ return;
+ }
+ if (this.keyChar[p] == Character.MAX_VALUE) {
+ k = map.get(this.compressedKeys, this.lo[p]);
+ if (k < 0) {
+ final int start = this.lo[p];
+ k = kx.addNullTerminated(this.compressedKeys, start);
+ kx.add(StringUtil.NULL_TERMINATOR);
+ map.put(kx, k, (char) k);
+ }
+ this.lo[p] = (char) k;
+ } else {
+ compact(kx, map, this.lo[p]);
+ if (this.keyChar[p] != 0) {
+ compact(kx, map, this.eq[p]);
+ }
+ compact(kx, map, this.hi[p]);
+ }
+ }
+
+ /**
+ * Inner class Iterator implementation.
+ */
+ public class Iterator implements Enumeration<String> {
+
+ /** Current node index. */
+ private int cur;
+
+ /** Current key. */
+ private String curkey;
+
+ /**
+ * Inner class Cloneable implementation.
+ */
+ private class Item implements Cloneable {
+ /** The parent char of the Item. */
+ private char parent;
+
+ /** The child char of the Item. */
+ private char child;
+
+ /**
+ * No-argument constructor.
+ */
+ public Item() {
+ this.parent = 0;
+ this.child = 0;
+ }
+
+ /**
+ * Constructor.
+ * @param parent Index to the parent node.
+ * @param child Index to the child node.
+ */
+ public Item(final char parent, final char child) {
+ this.parent = parent;
+ this.child = child;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public Item clone() {
+ return new Item(this.parent, this.child);
+ }
+
+ }
+
+ /** Node stack. */
+ private Stack<TernaryTreeMap.Iterator.Item> ns;
+
+ /** Key stack implemented with a StringBuffer. */
+ private StringBuilder ks;
+
+ /**
+ * Constructor.
+ */
+ public Iterator() {
+ this.cur = -1;
+ this.ns = new Stack<TernaryTreeMap.Iterator.Item>();
+ this.ks = new StringBuilder();
+ rewind();
+ }
+
+ /**
+ * Rewinds the iterator to point to its first item.
+ */
+ public void rewind() {
+ this.ns.removeAllElements();
+ this.ks.setLength(0);
+ this.cur = TernaryTreeMap.this.root;
+ run();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public String nextElement() {
+ final String res = new String(this.curkey);
+ this.cur = up();
+ run();
+ return res;
+ }
+
+ /**
+ * Returns the value of the current key of the iterator.
+ * @return The index value of the current key of the iterator.
+ */
+ public char getValue() {
+ if (this.cur >= 0) {
+ return TernaryTreeMap.this.eq[this.cur];
+ }
+ return 0;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public boolean hasMoreElements() {
+ return this.cur != -1;
+ }
+
+ /**
+ * Move up one level in the tree??
+ * @return The index to the next level up in the tree??
+ */
+ private int up() {
+ Item i = new Item();
+ int res = 0;
+
+ if (this.ns.empty()) {
+ return -1;
+ }
+
+ if (this.cur != 0
+ && TernaryTreeMap.this.keyChar[this.cur] == 0) {
+ return TernaryTreeMap.this.lo[this.cur];
+ }
+
+ boolean climb = true;
+
+ while (climb) {
+ i = this.ns.pop();
+ i.child++;
+ switch (i.child) {
+ case 1:
+ if (TernaryTreeMap.this.keyChar[i.parent] != 0) {
+ res = TernaryTreeMap.this.eq[i.parent];
+ this.ns.push(i.clone());
+ this.ks.append(TernaryTreeMap.this.keyChar[i.parent]);
+ } else {
+ i.child++;
+ this.ns.push(i.clone());
+ res = TernaryTreeMap.this.hi[i.parent];
+ }
+ climb = false;
+ break;
+
+ case 2:
+ res = TernaryTreeMap.this.hi[i.parent];
+ this.ns.push(i.clone());
+ if (this.ks.length() > 0) {
+ this.ks.setLength(this.ks.length() - 1); // pop
+ }
+ climb = false;
+ break;
+
+ default:
+ if (this.ns.empty()) {
+ return -1;
+ }
+ climb = true;
+ break;
+ }
+ }
+ return res;
+ }
+
+ /**
+ * Traverse the tree to find next key.
+ * @return The index to the next item in the map??
+ */
+ private int run() {
+ if (this.cur == -1) {
+ return -1;
+ }
+
+ boolean leaf = false;
+ for (;;) {
+ // first go down on low branch until leaf or compressed branch
+ while (this.cur != 0) {
+ if (TernaryTreeMap.this.keyChar[this.cur] == Character.MAX_VALUE) {
+ leaf = true;
+ break;
+ }
+ this.ns.push(new Item((char) this.cur, '\u0000'));
+ if (TernaryTreeMap.this.keyChar[this.cur] == 0) {
+ leaf = true;
+ break;
+ }
+ this.cur = TernaryTreeMap.this.lo[this.cur];
+ }
+ if (leaf) {
+ break;
+ }
+ // nothing found, go up one node and try again
+ this.cur = up();
+ if (this.cur == -1) {
+ return -1;
+ }
+ }
+ // The current node should be a data node and
+ // the key should be in the key stack (at least partially)
+ final StringBuilder buf = new StringBuilder(this.ks.toString());
+ if (TernaryTreeMap.this.keyChar[this.cur] == Character.MAX_VALUE) {
+ int p = TernaryTreeMap.this.lo[this.cur];
+ while (TernaryTreeMap.this.compressedKeys.get(p) != 0) {
+ buf.append(TernaryTreeMap.this.compressedKeys.get(p++));
+ }
+ }
+ this.curkey = buf.toString();
+ return 0;
+ }
+
+ }
+
+}
Modified: trunk/foray/foray-common/src/javatest/org/foray/common/TestFOrayCommon.java
===================================================================
--- trunk/foray/foray-common/src/javatest/org/foray/common/TestFOrayCommon.java 2009-03-15 18:11:47 UTC (rev 10686)
+++ trunk/foray/foray-common/src/javatest/org/foray/common/TestFOrayCommon.java 2009-03-15 18:21:21 UTC (rev 10687)
@@ -75,6 +75,7 @@
testSuite.addTestSuite(TestByteVectorPacked.class);
testSuite.addTestSuite(TestCharVector.class);
testSuite.addTestSuite(TestStringUtil.class);
+ testSuite.addTestSuite(TestTernaryTree.class);
testSuite.addTestSuite(TestUnicodeChar.class);
return testSuite;
Copied: trunk/foray/foray-common/src/javatest/org/foray/common/TestTernaryTree.java (from rev 10684, trunk/foray/foray-hyphen/src/javatest/org/foray/hyphen/TestTernaryTree.java)
===================================================================
--- trunk/foray/foray-common/src/javatest/org/foray/common/TestTernaryTree.java (rev 0)
+++ trunk/foray/foray-common/src/javatest/org/foray/common/TestTernaryTree.java 2009-03-15 18:21:21 UTC (rev 10687)
@@ -0,0 +1,155 @@
+/*
+ * Copyright 2007 The FOray Project.
+ * http://www.foray.org
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * This work is in part derived from the following work(s), used with the
+ * permission of the licensor:
+ * Apache FOP, licensed by the Apache Software Foundation
+ *
+ */
+
+/*
+ * $LastChangedRevision$
+ * $LastChangedDate$
+ * $LastChangedBy$
+ */
+
+package org.foray.common;
+
+import junit.framework.TestCase;
+
+/**
+ * JUnit test class for the class {@link TernaryTreeMap}.
+ */
+public class TestTernaryTree extends TestCase {
+
+ /**
+ * Functional test which adds some key-value pairs to a TernaryTree, then
+ * queries the tree, testing the output.
+ */
+ public void testOutput() {
+ /* Check the CharSequence putters for whole words. */
+ TernaryTreeMap map = new TernaryTreeMap();
+ map.put("Carlos", 'C');
+ map.put("Car", 'r');
+ map.put("palos", 'l');
+ map.put("pa", 'p');
+ map.optimize();
+ checkGetters(map);
+
+ /* Check the CharSequence putters for words starting in the middle of a string. */
+ map = new TernaryTreeMap();
+ map.put("123Carlos", 3, 'C');
+ map.put("123Car", 3, 'r');
+ map.put("123palos", 3, 'l');
+ map.put("123pa", 3, 'p');
+ map.optimize();
+ checkGetters(map);
+
+ /* Check the CharSequence putters for words starting and ending in the middle of a string. */
+ map = new TernaryTreeMap();
+ map.put("123CarlosPDQ", 3, 9, 'C');
+ map.put("123CarPDQ", 3, 6, 'r');
+ map.put("123palosPDQ", 3, 8, 'l');
+ map.put("123paPDQ", 3, 5, 'p');
+ map.optimize();
+ checkGetters(map);
+
+ /* Check the char[] putters for whole words. */
+ map = new TernaryTreeMap();
+ map.put("Carlos".toCharArray(), 'C');
+ map.put("Car".toCharArray(), 'r');
+ map.put("palos".toCharArray(), 'l');
+ map.put("pa".toCharArray(), 'p');
+ map.optimize();
+ checkGetters(map);
+
+ /* Check the char[] putters for words starting in the middle of a string. */
+ map = new TernaryTreeMap();
+ map.put("123Carlos".toCharArray(), 3, 'C');
+ map.put("123Car".toCharArray(), 3, 'r');
+ map.put("123palos".toCharArray(), 3, 'l');
+ map.put("123pa".toCharArray(), 3, 'p');
+ map.optimize();
+ checkGetters(map);
+
+ /* Check the char[] putters for words starting and ending in the middle of a string. */
+ map = new TernaryTreeMap();
+ map.put("123CarlosPDQ".toCharArray(), 3, 9, 'C');
+ map.put("123CarPDQ".toCharArray(), 3, 6, 'r');
+ map.put("123palosPDQ".toCharArray(), 3, 8, 'l');
+ map.put("123paPDQ".toCharArray(), 3, 5, 'p');
+ map.optimize();
+ checkGetters(map);
+ }
+
+ /**
+ * Makes assertions about the various CharSequence getters.
+ * @param map The map being tested.
+ */
+ private void checkGetters(final TernaryTreeMap map) {
+ /* Check the CharSequence getters for whole words. */
+ assertEquals('C', (char) map.get("Carlos"));
+ assertEquals('r', (char) map.get("Car"));
+ assertEquals('l', (char) map.get("palos"));
+ assertEquals('p', (char) map.get("pa"));
+ assertEquals(-1, map.get("alto"));
+ assertEquals(-1, map.get("C"));
+
+ /* Check the CharSequence getters for words starting in the middle of a string. */
+ assertEquals('C', (char) map.get("ABCCarlos", 3));
+ assertEquals('r', (char) map.get("ABCCar", 3));
+ assertEquals('l', (char) map.get("ABCpalos", 3));
+ assertEquals('p', (char) map.get("ABCpa", 3));
+ assertEquals(-1, map.get("ABCalto", 3));
+ assertEquals(-1, map.get("ABCC", 3));
+
+ /* Check the CharSequence getters for words starting and ending in the middle of a string. */
+ assertEquals('C', (char) map.get("ABCCarlosXYZ", 3, 9));
+ assertEquals('r', (char) map.get("ABCCarXYZ", 3, 6));
+ assertEquals('l', (char) map.get("ABCpalosXYZ", 3, 8));
+ assertEquals('p', (char) map.get("ABCpaABCXYZ", 3, 5));
+ assertEquals(-1, map.get("ABCaltoXYZ", 3, 7));
+ assertEquals(-1, map.get("ABCCXYZ", 3, 4));
+
+ /* Check the char[] getters for whole words. */
+ assertEquals('C', (char) map.get("Carlos".toCharArray()));
+ assertEquals('r', (char) map.get("Car".toCharArray()));
+ assertEquals('l', (char) map.get("palos".toCharArray()));
+ assertEquals('p', (char) map.get("pa".toCharArray()));
+ assertEquals(-1, map.get("alto".toCharArray()));
+ assertEquals(-1, map.get("C".toCharArray()));
+
+ /* Check the char[] getters for words starting in the middle of a string. */
+ assertEquals('C', (char) map.get("ABCCarlos".toCharArray(), 3));
+ assertEquals('r', (char) map.get("ABCCar".toCharArray(), 3));
+ assertEquals('l', (char) map.get("ABCpalos".toCharArray(), 3));
+ assertEquals('p', (char) map.get("ABCpa".toCharArray(), 3));
+ assertEquals(-1, map.get("ABCalto".toCharArray(), 3));
+ assertEquals(-1, map.get("ABCC".toCharArray(), 3));
+
+ /* Check the char[] getters for words starting and ending in the middle of a string. */
+ assertEquals('C', (char) map.get("ABCCarlosXYZ".toCharArray(), 3, 9));
+ assertEquals('r', (char) map.get("ABCCarXYZ".toCharArray(), 3, 6));
+ assertEquals('l', (char) map.get("ABCpalosXYZ".toCharArray(), 3, 8));
+ assertEquals('p', (char) map.get("ABCpaABCXYZ".toCharArray(), 3, 5));
+ assertEquals(-1, map.get("ABCaltoXYZ".toCharArray(), 3, 7));
+ assertEquals(-1, map.get("ABCCXYZ".toCharArray(), 3, 4));
+
+ assertEquals(4, map.size());
+ assertEquals(10, map.getNodeCount());
+ }
+
+}
Property changes on: trunk/foray/foray-common/src/javatest/org/foray/common/TestTernaryTree.java
___________________________________________________________________
Added: svn:keywords
+ "Author Id Rev Date URL"
Added: svn:mergeinfo
+
Added: svn:eol-style
+ native
Modified: trunk/foray/foray-hyphen/src/java/org/foray/hyphen/HyphenationTree.java
===================================================================
--- trunk/foray/foray-hyphen/src/java/org/foray/hyphen/HyphenationTree.java 2009-03-15 18:11:47 UTC (rev 10686)
+++ trunk/foray/foray-hyphen/src/java/org/foray/hyphen/HyphenationTree.java 2009-03-15 18:21:21 UTC (rev 10687)
@@ -37,6 +37,7 @@
import org.foray.common.ByteVectorPacked;
import org.foray.common.CharVector;
import org.foray.common.StringUtil;
+import org.foray.common.TernaryTreeMap;
import org.axsl.hyphen.HyphenationException;
Deleted: trunk/foray/foray-hyphen/src/java/org/foray/hyphen/TernaryTreeMap.java
===================================================================
--- trunk/foray/foray-hyphen/src/java/org/foray/hyphen/TernaryTreeMap.java 2009-03-15 18:11:47 UTC (rev 10686)
+++ trunk/foray/foray-hyphen/src/java/org/foray/hyphen/TernaryTreeMap.java 2009-03-15 18:21:21 UTC (rev 10687)
@@ -1,799 +0,0 @@
-/*
- * Copyright 2004 The FOray Project.
- * http://www.foray.org
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * This work is in part derived from the following work(s), used with the
- * permission of the licensor:
- * Apache FOP, licensed by the Apache Software Foundation
- *
- */
-
-/*
- * $LastChangedRevision$
- * $LastChangedDate$
- * $LastChangedBy$
- */
-
-/*
- * Known Constributors:
- * Carlos Villegas <ca...@un...> (Original author)
- */
-
-package org.foray.hyphen;
-
-import org.foray.common.CharVector;
-import org.foray.common.StringUtil;
-
-import java.io.Serializable;
-import java.util.Enumeration;
-import java.util.Stack;
-
-/**
- * <p>A map whose key is always a {@link CharSequence} (usually a {@link String}), and whose value
- * is a char, usually used to store an index into some other data structure.
- * This class is intended to serve as a base class or helper class for natural-language dictionaries
- * and similar data structures.</p>
- *
- * <p>A ternary search tree is a hybrid between a binary tree and a digital search tree (trie).
- * Ternary trees have some nice properties, including the following:
- * <ul>
- * <li>The tree can be traversed in sorted order.</li>
- * <li>Partial matches (wildcards) can be implemented.</li>
- * <li>Retrieval of all keys within a given distance from the target is possible.</li>
- * <li>The storage requirements are higher than a binary tree but a lot less than a trie.</li>
- * <li>Performance is comparable with a hash table.
- * Sometimes it outperforms a hash function, and can usually determine a miss faster than a
- * hash.</li>
- * </ul>
- * </p>
- *
- * <p>In this implementation the value is a char (think "unsigned short"), and is stored in the leaf
- * nodes of the tree.
- * This limits the number of nodes to 65,536.
- * Branches that contain only one key are compressed to one node by storing a pointer to the trailer
- * substring of the key.
- * A compressed branch needs only one node per key plus the size of the string key.
- * Compressed branches are decompressed as needed when another key with same prefix is inserted.
- * This saves a lot of space, especially for long keys.</p>
- *
- * <p>This class could be extended to or embedded in a general map that would allow arbitrary
- * objects to be values in the map.
- * One scheme for doing so would be to create an array of the value Objects, and use this class to
- * store the indexes to that array.</p>
- */
-
-public class TernaryTreeMap implements Cloneable, Serializable {
- /* TODO: After all other API-related to-do items are cleaned up, consider moving this class
- * to Common module. */
-
- /** Allocation size for arrays. */
- private static final int BLOCK_SIZE = 2048;
-
- /** Constant used for serialization. */
- private static final long serialVersionUID = 240904407206584695L;
-
- /*
- * We use four arrays of equal size to represent the nodes in this tree.
- * A given node /n/ is represented by the values in the arrays lo, hi, eq, and sc.
- * It is tempting to group these four arrays into a private inner class, and the original author
- * of this class left documentation kind of apologizing for not doing so.
- * However, experimentation shows that the extra object references created when doing so more
- * than doubles the size of the serialized instance, and therefore presumably does approximately
- * the same to memory consumption.
- * Since a light memory footprint is one of the main design considerations for this class, we
- * will use the original parallel arrays instead.
- */
-
- /**
- * If {@link #keyChar} is a valid Unicode character, this array contains the index to the node
- * containing the "low" branch, that is the branch that should be traversed to find a char
- * that is less than {@link #keyChar}.
- * If {@link #keyChar} is equal to U+FFFF, the compressed branch indicator, then this array
- * instead contains the index into {@link #compressedKeys} that contains the first char in the
- * compressed portion of the key.
- */
- private char[] lo;
-
- /**
- * The index to the node containing the "hi" branch, that is the branch that should be traversed
- * to find a char that is greater than {@link #keyChar}.
- */
- private char[] hi;
-
- /**
- * If {@link #keyChar} is a valid Unicode character, this array contains the index to the node
- * containing the "equal" branch, that is the branch that should be traversed to find a char
- * that is equal to {@link #keyChar}.
- * If {@link #keyChar} is equal to U+0000, the string terminator, then this array instead
- * contains value for this map element.
- */
- private char[] eq;
-
- /**
- * <P>The character, which is part of the key, stored in this node.
- * Two special values are reserved:</P>
- * <ul>
- * <li>0x0000 is the string terminator. If the node contains this value, then the search is
- * done and the value in {@link #eq} should be returned.</li>
- * <li>0xFFFF to indicate that the branch starting at this node is compressed.
- * This shouldn't be a problem if we give the usual semantics to strings since 0xFFFF is
- * guaranteed not to be a valid Unicode character.</li>
- * </ul>
- */
- private char[] keyChar;
-
- /** This vector holds the compressed portion of a key when the branch is compressed.
- * It is simply a series of word fragments, each followed by a
- * {@link StringUtil#NULL_TERMINATOR}.
- * The index to the start of the compressed portion of the key for a given node in the tree is
- * found in the {@link #lo} value for the node. */
- private CharVector compressedKeys;
-
- /** The index of the root of the current branch?? */
- private char root;
-
- /** The index to the next available node. */
- private char freenode;
-
- /** The number of key-value pairs mapped in the tree.
- * This is not necessarily the same as the number of nodes in the tree, because of branch
- * compression, and because it frequently takes more than one node to store the chars in a
- * key. */
- private int length;
-
- /**
- * Constructor.
- */
- public TernaryTreeMap() {
- init();
- }
-
- /**
- * Initializes the basic data structures of the tree.
- */
- private void init() {
- this.root = 0;
- this.freenode = 1;
- this.length = 0;
- this.lo = new char[TernaryTreeMap.BLOCK_SIZE];
- this.hi = new char[TernaryTreeMap.BLOCK_SIZE];
- this.eq = new char[TernaryTreeMap.BLOCK_SIZE];
- this.keyChar = new char[TernaryTreeMap.BLOCK_SIZE];
- this.compressedKeys = new CharVector();
- }
-
- /**
- * Put part or all of one char[] in the map.
- * @param key The key to be added.
- * @param value The value.
- */
- public void put(final char[] key, final char value) {
- put(key, 0, value);
- }
-
- /**
- * Put part or all of one char[] in the map.
- * @param key The key to be added.
- * @param start The index to the first char in <code>key</code> to be added.
- * @param value The value.
- */
- public void put(final char[] key, final int start, final char value) {
- put(key, start, key.length, value);
- }
-
- /**
- * Put part or all of one char[] in the map.
- * @param key The key to be added.
- * @param start The index to the first char in <code>key</code> to be added.
- * @param end The index to the first char in <code>key</code>, after <code>start</code> that should NOT be added.
- * @param value The value.
- */
- public void put(final char[] key, final int start, final int end, final char value) {
- final String keyString = new String(key, start, end - start);
- put(keyString, value);
- }
-
- /**
- * Put one String or other CharSequence in the map.
- * @param key The key to be added.
- * @param value The value.
- */
- public void put(final CharSequence key, final char value) {
- this.put(key, 0, value);
- }
-
- /**
- * Put one String or other CharSequence in the map.
- * @param key The key to be added.
- * @param start The index to the first char in <code>key</code> to be added.
- * @param value The value.
- */
- public void put(final CharSequence key, final int start, final char value) {
- put(key, start, key.length(), value);
- }
-
- /**
- * Put one String or other CharSequence in the map.
- * @param key The key to be added.
- * @param start The index to the first char in <code>key</code> to be added.
- * @param end The index to the first char in <code>key</code>, after <code>start</code> that should NOT be added.
- * @param value The value.
- */
- public void put(final CharSequence key, final int start, final int end, final char value) {
- ensureCapacity(this.getNodeCount() + end - start + 1);
- this.root = put(this.root, key, start, end, value);
- }
-
- /**
- * The actual insertion function, which is recursive.
- * @param p The initial root value.
- * @param key The key to be added.
- * @param start The index to the first char in <code>key</code> to be added.
- * @param end The index to the first char in <code>key</code>, after <code>start</code> that should NOT be added.
- * @param value The value.
- * @return The updated root value.
- */
- private char put(final char p, final CharSequence key, final int start, final int end, final char value) {
- char pToUse = p;
- final int len = end - start;
- if (pToUse == 0) {
- // this means there is no branch, this node will start a new branch.
- // Instead of doing that, we store the key somewhere else and create
- // only one node with a pointer to the key
- pToUse = this.freenode++;
- /* Holds data. */
- this.eq[pToUse] = value;
- this.length++;
- this.hi[pToUse] = 0;
- if (len > 0) {
- /* Indicates branch is compressed */
- this.keyChar[pToUse] = Character.MAX_VALUE;
- /* Use "lo" to hold pointer to key. */
- this.lo[pToUse] = (char) this.compressedKeys.add(key, start, len);
- this.compressedKeys.add(StringUtil.NULL_TERMINATOR);
- } else {
- this.keyChar[pToUse] = 0;
- this.lo[pToUse] = 0;
- }
- return pToUse;
- }
-
- if (this.keyChar[pToUse] == Character.MAX_VALUE) {
- final char pp = this.freenode++;
- this.lo[pp] = this.lo[pToUse];
- this.eq[pp] = this.eq[pToUse];
- this.lo[pToUse] = 0;
- if (len > 0) {
- this.keyChar[pToUse] = this.compressedKeys.get(this.lo[pp]);
- this.eq[pToUse] = pp;
- this.lo[pp]++;
- if (this.compressedKeys.get(this.lo[pp]) == 0) {
- this.lo[pp] = 0;
- this.keyChar[pp] = 0;
- this.hi[pp] = 0;
- } else {
- this.keyChar[pp] = Character.MAX_VALUE;
- }
- } else {
- this.keyChar[pp] = Character.MAX_VALUE;
- this.hi[pToUse] = pp;
- this.keyChar[pToUse] = 0;
- this.eq[pToUse] = value;
- this.length++;
- return pToUse;
- }
- }
- final char s = key.charAt(start);
- if (s < this.keyChar[pToUse]) {
- this.lo[pToUse] = put(this.lo[pToUse], key, start, end, value);
- } else if (s == this.keyChar[pToUse]) {
- if (s != 0) {
- this.eq[pToUse] = put(this.eq[pToUse], key, start + 1, end, value);
- } else {
- // key already in tree, overwrite data
- this.eq[pToUse] = value;
- }
-
- } else {
- this.hi[pToUse] = put(this.hi[pToUse], key, start, end, value);
- }
- return pToUse;
- }
-
- /**
- * Returns the mapped "value" for a given char array.
- * @param key The key whose value is needed.
- * @return The mapped value.
- */
- public int get(final char[] key) {
- return get(key, 0);
- }
-
- /**
- * Returns the mapped "value" for a given char array.
- * @param key The key whose value is needed.
- * @param start The index to the first element in <code>key</code> which should be considered part of the key.
- * @return The mapped value.
- */
- public int get(final char[] key, final int start) {
- return get(key, start, key.length);
- }
-
- /**
- * Returns the mapped "value" for a given char array.
- * @param key The key whose value is needed.
- * @param start The index to the first element in <code>key</code> which should be considered part of the key.
- * @param end The index to the first element in <code>key</code>, after <code>start</code>, which should NOT be
- * considered part of the key.
- * @return The mapped value, if <code>key</code> is in the map. Otherwise, returns -1;
- */
- public int get(final char[] key, final int start, final int end) {
- final int length = end - start;
- final String keyString = new String(key, start, length);
- return get(keyString);
- }
-
- /**
- * Returns the mapped "value" for a given String or other CharSequence key.
- * @param key The key whose value is needed.
- * @return The mapped value.
- */
- public int get(final CharSequence key) {
- return get(key, 0);
- }
-
- /**
- * Returns the mapped "value" for a given String or other CharSequence key.
- * @param key The key whose value is needed.
- * @param start The index to the first element in <code>key</code> which should be considered part of the key.
- * @return The mapped value.
- */
- public int get(final CharSequence key, final int start) {
- return get(key, start, key.length());
- }
-
- /**
- * Returns the mapped "value" for a given String or other CharSequence key.
- * @param key The key whose value is needed.
- * @param start The index to the first element in <code>key</code> which should be considered part of the key.
- * @param end The index to the first element in <code>key</code>, after <code>start</code>, which should NOT be
- * considered part of the key.
- * @return The mapped value, if <code>key</code> is in the map. Otherwise, returns -1;
- */
- public int get(final CharSequence key, final int start, final int end) {
- int d;
- char p = this.root;
- int i = start;
- char c;
-
- while (p != 0) {
- if (this.keyChar[p] == Character.MAX_VALUE) {
- if (this.compressedKeys.compareNullTerminated(this.lo[p], key, i, end) == 0) {
- return this.eq[p];
- }
- return -1;
- }
- if (i >= end) {
- /* If we are past the end of the key, then pretend like we have a
- * null-terminator. */
- c = 0;
- } else {
- c = key.charAt(i);
- }
- d = c - this.keyChar[p];
- if (d == 0) {
- if (c == 0) {
- return this.eq[p];
- }
- i++;
- p = this.eq[p];
- } else if (d < 0) {
- p = this.lo[p];
- } else {
- p = this.hi[p];
- }
- }
- return -1;
- }
-
- /**
- * Ensures that the internal data structures have enough nodes to handle a given number of nodes.
- * @param newCapacity The new number of nodes that needs to be ensured.
- */
- private void ensureCapacity(final int newCapacity) {
- if (newCapacity > this.eq.length) {
- redimNodeArrays(this.eq.length + TernaryTreeMap.BLOCK_SIZE);
- }
- }
-
- /**
- * Resizes the arrays.
- * @param newsize The new number of nodes in the tree.
- */
- private void redimNodeArrays(final int newsize) {
- int len = 0;
- if (newsize < this.lo.length) {
- len = newsize;
- } else {
- len = this.lo.length;
- }
- char[] na = new char[newsize];
- System.arraycopy(this.lo, 0, na, 0, len);
- this.lo = na;
- na = new char[newsize];
- System.arraycopy(this.hi, 0, na, 0, len);
- this.hi = na;
- na = new char[newsize];
- System.arraycopy(this.eq, 0, na, 0, len);
- this.eq = na;
- na = new char[newsize];
- System.arraycopy(this.keyChar, 0, na, 0, len);
- this.keyChar = na;
- }
-
- /**
- * Returns the number of key-value pairs in this map.
- * This can be different from the number of nodes in the tree because of the compressed
- * branches.
- * @return The number of key-value pairs in this map.
- */
- public int size() {
- return this.length;
- }
-
- /**
- * Returns the number of nodes in the tree.
- * @return The number of nodes in the tree.
- */
- int getNodeCount() {
- return this.freenode;
- }
-
- /**
- * {@inheritDoc}
- */
- public TernaryTreeMap clone() {
- final TernaryTreeMap t = new TernaryTreeMap();
- t.lo = this.lo.clone();
- t.hi = this.hi.clone();
- t.eq = this.eq.clone();
- t.keyChar = this.keyChar.clone();
- t.compressedKeys = this.compressedKeys.clone();
- t.root = this.root;
- t.freenode = this.freenode;
- t.length = this.length;
-
- return t;
- }
-
- /**
- * Recursively insert the median first and then the median of the
- * lower and upper halves, and so on in order to get a balanced
- * tree. The array of keys is assumed to be sorted in ascending
- * order.
- * @param keys The array of keys to be processed.
- * @param values The array of values to be processed.
- * @param offset The offset from the computed median at which the item
- * should be inserted.
- * @param upperBound The highest index which should be used in the new
- * median computation.
- */
- private void insertBalanced(final String[] keys, final char[] values,
- final int offset, final int upperBound) {
- if (upperBound < 1) {
- return;
- }
-
- /* Compute the median index as 1/2 of the upperBound. */
- final int median = upperBound >> 1;
-
- /* Insert the median key into the tree. */
- put(keys[median + offset], values[median + offset]);
-
- /* Recursively insert into the lower half of the branch. */
- insertBalanced(keys, values, offset, median);
-
- /* Recursively insert into the upper half of the branch. */
- insertBalanced(keys, values, offset + median + 1,
- upperBound - median - 1);
- }
-
- /**
- * Balance the tree for best search performance.
- */
- private void balance() {
- int i = 0;
- final int n = this.length;
- final String[] k = new String[n];
- final char[] v = new char[n];
- final Iterator iter = new Iterator();
- while (iter.hasMoreElements()) {
- v[i] = iter.getValue();
- k[i++] = iter.nextElement();
- }
- init();
- insertBalanced(k, v, 0, n);
- // With uniform letter distribution sc[root] should be around 'm'
- }
-
- /**
- * Allows the map to internally optimize itself for faster performance and possible memory
- * reduction.
- * This method should be run only once, immediately after the tree is completely built.
- */
- public void optimize() {
- /* Each node stores a character which is part of
- * some key(s). In a compressed branch (one that only contain
- * a single string key) the trailer of the key which is not
- * already in nodes is stored externally in the kv array.
- * As items are inserted, key substrings decrease.
- * Some substrings may completely disappear when the whole
- * branch is totally decompressed.
- * The tree is traversed to find the key substrings actually
- * used. In addition, duplicate substrings are removed using
- * a map (implemented with a TernaryTree!).
- */
-
- /* First balance the tree for best perfo...
[truncated message content] |