From: Wolfgang M. M. <wol...@us...> - 2004-09-12 10:40:24
|
Update of /cvsroot/exist/eXist-1.0/src/org/exist/storage In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv1248/src/org/exist/storage Modified Files: NativeTextEngine.java Log Message: Applied patch by Christian Mayrhuber concerning large attribute string values. Index: NativeTextEngine.java =================================================================== RCS file: /cvsroot/exist/eXist-1.0/src/org/exist/storage/NativeTextEngine.java,v retrieving revision 1.70 retrieving revision 1.71 diff -C2 -d -r1.70 -r1.71 *** NativeTextEngine.java 12 Sep 2004 09:25:20 -0000 1.70 --- NativeTextEngine.java 12 Sep 2004 10:40:13 -0000 1.71 *************** *** 100,103 **** --- 100,109 ---- public final static byte ATTRIBUTE_SECTION = 1; public final static byte TEXT_SECTION = 0; + + /** + * Limit the length of the words to be indexed. + * Default is 512 characters for words in attributes and elements. + */ + public final static int MAX_WORD_LENGTH = Integer.MAX_VALUE; protected BFile dbWords; *************** *** 627,631 **** is = dbWords.getAsStream(ref); if (is == null) { - LOG.warn(word + " not found in the index. This should not happen!"); continue; } --- 633,636 ---- *************** *** 706,710 **** } word = token.getText().toLowerCase(); ! if (stoplist.contains(word) || word.length() > 512) { continue; } --- 711,715 ---- } word = token.getText().toLowerCase(); ! if (stoplist.contains(word) || word.length() > MAX_WORD_LENGTH) { continue; } *************** *** 735,751 **** invIdx.addText(sal, gid); } else { ! while (null != (token = tokenizer.nextToken())) { ! if (idx != null && idx.getIncludeAlphaNum() == false ! && token.isAlpha() == false) { ! continue; ! } ! word = token.getCharSequence(); ! // word = token.getText(); ! if (stoplist.contains(word) || word.length() > 1024) { ! continue; } - invIdx.setDocument(doc); - invIdx.addText(word, gid); - } } } --- 740,756 ---- invIdx.addText(sal, gid); } else { ! while (null != (token = tokenizer.nextToken())) { ! if (idx != null && idx.getIncludeAlphaNum() == false ! && token.isAlpha() == false) { ! continue; ! } ! word = token.getCharSequence(); ! // word = token.getText(); ! if (stoplist.contains(word) || word.length() > MAX_WORD_LENGTH) { ! continue; ! } ! invIdx.setDocument(doc); ! invIdx.addText(word, gid); } } } |