From: Eric F. <er...@us...> - 2002-01-03 16:43:26
|
Update of /cvsroot/maxent/maxent/src/java/opennlp/maxent In directory usw-pr-cvs1:/tmp/cvs-serv11068/src/java/opennlp/maxent Modified Files: DataIndexer.java Log Message: bug fix: replace ComparableEvent[] array with an ArrayList so that we don't make assumptions about the size of the event index until we've filtered out events that have no active features. The native array approach was a problem inasmuch as it could contain null entries (for the dropped events) that would break the sorting routine. ArrayList avoids this pitfall by sorting just the parts of the underlying array that have entries. Index: DataIndexer.java =================================================================== RCS file: /cvsroot/maxent/maxent/src/java/opennlp/maxent/DataIndexer.java,v retrieving revision 1.7 retrieving revision 1.8 diff -C2 -d -r1.7 -r1.8 *** DataIndexer.java 2002/01/03 14:34:29 1.7 --- DataIndexer.java 2002/01/03 16:43:23 1.8 *************** *** 59,63 **** TObjectIntHashMap predicateIndex; TLinkedList events; ! ComparableEvent[] eventsToCompare; predicateIndex = new TObjectIntHashMap(); --- 59,63 ---- TObjectIntHashMap predicateIndex; TLinkedList events; ! List eventsToCompare; predicateIndex = new TObjectIntHashMap(); *************** *** 90,114 **** * @since maxent 1.2.6 */ ! private void sortAndMerge(ComparableEvent[] eventsToCompare) { ! Arrays.sort(eventsToCompare); ! int numEvents = eventsToCompare.length; int numUniqueEvents = 1; // assertion: eventsToCompare.length >= 1 ! if (eventsToCompare.length <= 1) { return; // nothing to do; edge case (see assertion) } ! ComparableEvent ce = eventsToCompare[0]; for (int i=1; i<numEvents; i++) { ! if (ce.compareTo(eventsToCompare[i]) == 0) { ce.seen++; // increment the seen count ! eventsToCompare[i] = null; // kill the duplicate } else { ! ce = eventsToCompare[i]; // a new champion emerges... numUniqueEvents++; // increment the # of unique events } } ! System.out.println("done. Reduced " + eventsToCompare.length + " events to " + numUniqueEvents + "."); --- 90,116 ---- * @since maxent 1.2.6 */ ! private void sortAndMerge(List eventsToCompare) { ! Collections.sort(eventsToCompare); ! int numEvents = eventsToCompare.size(); int numUniqueEvents = 1; // assertion: eventsToCompare.length >= 1 ! if (numEvents <= 1) { return; // nothing to do; edge case (see assertion) } ! ComparableEvent ce = (ComparableEvent)eventsToCompare.get(0); for (int i=1; i<numEvents; i++) { ! ComparableEvent ce2 = (ComparableEvent)eventsToCompare.get(i); ! ! if (ce.compareTo(ce2) == 0) { ce.seen++; // increment the seen count ! eventsToCompare.set(i, null); // kill the duplicate } else { ! ce = ce2; // a new champion emerges... numUniqueEvents++; // increment the # of unique events } } ! System.out.println("done. Reduced " + numEvents + " events to " + numUniqueEvents + "."); *************** *** 118,122 **** for (int i = 0, j = 0; i<numEvents; i++) { ! ComparableEvent evt = eventsToCompare[i]; if (null == evt) { continue; // this was a dupe, skip over it. --- 120,124 ---- for (int i = 0, j = 0; i<numEvents; i++) { ! ComparableEvent evt = (ComparableEvent)eventsToCompare.get(i); if (null == evt) { continue; // this was a dupe, skip over it. *************** *** 168,173 **** } ! private ComparableEvent[] index(TLinkedList events, ! TObjectIntHashMap predicateIndex) { TObjectIntHashMap omap = new TObjectIntHashMap(); --- 170,175 ---- } ! private List index(TLinkedList events, ! TObjectIntHashMap predicateIndex) { TObjectIntHashMap omap = new TObjectIntHashMap(); *************** *** 175,179 **** int outcomeCount = 0; int predCount = 0; ! ComparableEvent[] eventsToCompare = new ComparableEvent[numEvents]; TIntArrayList indexedContext = new TIntArrayList(); --- 177,181 ---- int outcomeCount = 0; int predCount = 0; ! List eventsToCompare = new ArrayList(numEvents); TIntArrayList indexedContext = new TIntArrayList(); *************** *** 181,184 **** --- 183,187 ---- Event ev = (Event)events.removeFirst(); String[] econtext = ev.getContext(); + ComparableEvent ce; int predID, ocID; *************** *** 201,206 **** // drop events with no active features if (indexedContext.size() > 0) { ! eventsToCompare[eventIndex] = ! new ComparableEvent(ocID, indexedContext.toNativeArray()); } // recycle the TIntArrayList --- 204,209 ---- // drop events with no active features if (indexedContext.size() > 0) { ! ce = new ComparableEvent(ocID, indexedContext.toNativeArray()); ! eventsToCompare.add(ce); } // recycle the TIntArrayList |