Menu

#2334 java.lang.IllegalArgumentException: Document contains at least one immense term

v1.0_(example)
closed
nobody
None
1
2026-01-08
2023-07-04
Anonymous
No

program.name=DocFetcher
program.version=1.1.25
program.build=20210525-2212
program.portable=false
java.runtime.name=Java(TM) SE Runtime Environment
java.runtime.version=1.8.0_181-b13
java.version=1.8.0_181
sun.arch.data.model=64
os.arch=amd64
os.name=Windows 10
os.version=10.0
user.language=zh
java.lang.IllegalArgumentException: Document contains at least one immense term in field="content" (whose UTF8 encoding is longer than the max length 32766), all of which were skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense term is: '[50, 46, 55, 49, 56, 50, 56, 49, 56, 50, 56, 52, 53, 57, 48, 52, 53, 50, 51, 53, 51, 54, 48, 50, 56, 55, 52, 55, 49, 51]...', original message: bytes can be at most 32766 in length; got 100002
at org.apache.lucene.index.DefaultIndexingChain$PerField.invert(DefaultIndexingChain.java:806)
at org.apache.lucene.index.DefaultIndexingChain.processField(DefaultIndexingChain.java:447)
at org.apache.lucene.index.DefaultIndexingChain.processDocument(DefaultIndexingChain.java:403)
at org.apache.lucene.index.DocumentsWriterPerThread.updateDocument(DocumentsWriterPerThread.java:232)
at org.apache.lucene.index.DocumentsWriter.updateDocument(DocumentsWriter.java:478)
at org.apache.lucene.index.IndexWriter.updateDocument(IndexWriter.java:1571)
at org.apache.lucene.index.IndexWriter.addDocument(IndexWriter.java:1316)
at net.sourceforge.docfetcher.model.index.IndexWriterAdapter.add(IndexWriterAdapter.java:53)
at net.sourceforge.docfetcher.model.index.file.SimpleDocWriter.write(SimpleDocWriter.java:44)
at net.sourceforge.docfetcher.model.index.file.LuceneDocWriter.add(LuceneDocWriter.java:36)
at net.sourceforge.docfetcher.model.index.file.FileContext.index(FileContext.java:161)
at net.sourceforge.docfetcher.model.index.file.FileIndex$1.handleFile(FileIndex.java:295)
at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.runWithHtmlPairing(HtmlFileLister.java:126)
at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.doRun(HtmlFileLister.java:57)
at net.sourceforge.docfetcher.util.Stoppable.run(Stoppable.java:57)
at net.sourceforge.docfetcher.model.index.file.FileIndex.visitDirOrZip(FileIndex.java:442)
at net.sourceforge.docfetcher.model.index.file.FileIndex.access$200(FileIndex.java:51)
at net.sourceforge.docfetcher.model.index.file.FileIndex$1.handleDir(FileIndex.java:393)
at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.runWithHtmlPairing(HtmlFileLister.java:147)
at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.doRun(HtmlFileLister.java:57)
at net.sourceforge.docfetcher.util.Stoppable.run(Stoppable.java:57)
at net.sourceforge.docfetcher.model.index.file.FileIndex.visitDirOrZip(FileIndex.java:442)
at net.sourceforge.docfetcher.model.index.file.FileIndex.access$200(FileIndex.java:51)
at net.sourceforge.docfetcher.model.index.file.FileIndex$1.handleDir(FileIndex.java:393)
at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.runWithHtmlPairing(HtmlFileLister.java:147)
at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.doRun(HtmlFileLister.java:57)
at net.sourceforge.docfetcher.util.Stoppable.run(Stoppable.java:57)
at net.sourceforge.docfetcher.model.index.file.FileIndex.visitDirOrZip(FileIndex.java:442)
at net.sourceforge.docfetcher.model.index.file.FileIndex.access$200(FileIndex.java:51)
at net.sourceforge.docfetcher.model.index.file.FileIndex$1.handleDir(FileIndex.java:393)
at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.runWithHtmlPairing(HtmlFileLister.java:147)
at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.doRun(HtmlFileLister.java:57)
at net.sourceforge.docfetcher.util.Stoppable.run(Stoppable.java:57)
at net.sourceforge.docfetcher.model.index.file.FileIndex.visitDirOrZip(FileIndex.java:442)
at net.sourceforge.docfetcher.model.index.file.FileIndex.access$200(FileIndex.java:51)
at net.sourceforge.docfetcher.model.index.file.FileIndex$1.handleDir(FileIndex.java:393)
at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.runWithHtmlPairing(HtmlFileLister.java:147)
at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.doRun(HtmlFileLister.java:57)
at net.sourceforge.docfetcher.util.Stoppable.run(Stoppable.java:57)
at net.sourceforge.docfetcher.model.index.file.FileIndex.visitDirOrZip(FileIndex.java:442)
at net.sourceforge.docfetcher.model.index.file.FileIndex.access$200(FileIndex.java:51)
at net.sourceforge.docfetcher.model.index.file.FileIndex$1.handleDir(FileIndex.java:393)
at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.runWithHtmlPairing(HtmlFileLister.java:147)
at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.doRun(HtmlFileLister.java:57)
at net.sourceforge.docfetcher.util.Stoppable.run(Stoppable.java:57)
at net.sourceforge.docfetcher.model.index.file.FileIndex.visitDirOrZip(FileIndex.java:442)
at net.sourceforge.docfetcher.model.index.file.FileIndex.access$200(FileIndex.java:51)
at net.sourceforge.docfetcher.model.index.file.FileIndex$1.handleDir(FileIndex.java:393)
at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.runWithHtmlPairing(HtmlFileLister.java:147)
at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.doRun(HtmlFileLister.java:57)
at net.sourceforge.docfetcher.util.Stoppable.run(Stoppable.java:57)
at net.sourceforge.docfetcher.model.index.file.FileIndex.visitDirOrZip(FileIndex.java:442)
at net.sourceforge.docfetcher.model.index.file.FileIndex.access$200(FileIndex.java:51)
at net.sourceforge.docfetcher.model.index.file.FileIndex$1.handleDir(FileIndex.java:393)
at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.runWithHtmlPairing(HtmlFileLister.java:147)
at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.doRun(HtmlFileLister.java:57)
at net.sourceforge.docfetcher.util.Stoppable.run(Stoppable.java:57)
at net.sourceforge.docfetcher.model.index.file.FileIndex.visitDirOrZip(FileIndex.java:442)
at net.sourceforge.docfetcher.model.index.file.FileIndex.access$200(FileIndex.java:51)
at net.sourceforge.docfetcher.model.index.file.FileIndex$1.handleDir(FileIndex.java:393)
at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.runWithHtmlPairing(HtmlFileLister.java:147)
at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.doRun(HtmlFileLister.java:57)
at net.sourceforge.docfetcher.util.Stoppable.run(Stoppable.java:57)
at net.sourceforge.docfetcher.model.index.file.FileIndex.visitDirOrZip(FileIndex.java:442)
at net.sourceforge.docfetcher.model.index.file.FileIndex.access$200(FileIndex.java:51)
at net.sourceforge.docfetcher.model.index.file.FileIndex$1.handleDir(FileIndex.java:393)
at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.runWithHtmlPairing(HtmlFileLister.java:147)
at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.doRun(HtmlFileLister.java:57)
at net.sourceforge.docfetcher.util.Stoppable.run(Stoppable.java:57)
at net.sourceforge.docfetcher.model.index.file.FileIndex.visitDirOrZip(FileIndex.java:442)
at net.sourceforge.docfetcher.model.index.file.FileIndex.access$200(FileIndex.java:51)
at net.sourceforge.docfetcher.model.index.file.FileIndex$1.handleDir(FileIndex.java:393)
at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.runWithHtmlPairing(HtmlFileLister.java:147)
at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.doRun(HtmlFileLister.java:57)
at net.sourceforge.docfetcher.util.Stoppable.run(Stoppable.java:57)
at net.sourceforge.docfetcher.model.index.file.FileIndex.visitDirOrZip(FileIndex.java:442)
at net.sourceforge.docfetcher.model.index.file.FileIndex.access$200(FileIndex.java:51)
at net.sourceforge.docfetcher.model.index.file.FileIndex$1.handleDir(FileIndex.java:393)
at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.runWithHtmlPairing(HtmlFileLister.java:147)
at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.doRun(HtmlFileLister.java:57)
at net.sourceforge.docfetcher.util.Stoppable.run(Stoppable.java:57)
at net.sourceforge.docfetcher.model.index.file.FileIndex.visitDirOrZip(FileIndex.java:442)
at net.sourceforge.docfetcher.model.index.file.FileIndex.access$200(FileIndex.java:51)
at net.sourceforge.docfetcher.model.index.file.FileIndex$1.handleDir(FileIndex.java:393)
at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.runWithHtmlPairing(HtmlFileLister.java:147)
at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.doRun(HtmlFileLister.java:57)
at net.sourceforge.docfetcher.util.Stoppable.run(Stoppable.java:57)
at net.sourceforge.docfetcher.model.index.file.FileIndex.visitDirOrZip(FileIndex.java:442)
at net.sourceforge.docfetcher.model.index.file.FileIndex.doUpdate(FileIndex.java:159)
at net.sourceforge.docfetcher.model.TreeIndex.update(TreeIndex.java:148)
at net.sourceforge.docfetcher.model.index.Task.update(Task.java:98)
at net.sourceforge.docfetcher.model.index.IndexingQueue.threadLoop(IndexingQueue.java:193)
at net.sourceforge.docfetcher.model.index.IndexingQueue.access$100(IndexingQueue.java:46)
at net.sourceforge.docfetcher.model.index.IndexingQueue$2.run(IndexingQueue.java:118)
Caused by: org.apache.lucene.util.BytesRefHash$MaxBytesLengthExceededException: bytes can be at most 32766 in length; got 100002
at org.apache.lucene.util.BytesRefHash.add(BytesRefHash.java:263)
at org.apache.lucene.index.TermsHashPerField.add(TermsHashPerField.java:149)
at org.apache.lucene.index.DefaultIndexingChain$PerField.invert(DefaultIndexingChain.java:796)
... 93 more

Discussion

  • Nam-Quang Tran

    Nam-Quang Tran - 2026-01-08
    • summary: program.name=DocFetcher program.version=1.1.25 program.build=20210525-2212 program.portable=false java.runtime.name=Java(TM) SE Runtime Environment java.runtime.version=1.8.0_181-b13 java.version=1.8.0_181 sun.arch.data.model=64 os.arch=amd64 os.name=Windows 10 os.version=10.0 user.language=zh java.lang.IllegalArgumentException: Document contains at least one immense term in field="content" (whose UTF8 encoding is longer than the max length 32766), all of which were skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense term is: '[50, 46, 55, 49, 56, 50, 56, 49, 56, 50, 56, 52, 53, 57, 48, 52, 53, 50, 51, 53, 51, 54, 48, 50, 56, 55, 52, 55, 49, 51]...', original message: bytes can be at most 32766 in length; got 100002 at org.apache.lucene.index.DefaultIndexingChain$PerField.invert(DefaultIndexingChain.java:806) at org.apache.lucene.index.DefaultIndexingChain.processField(DefaultIndexingChain.java:447) at org.apache.lucene.index.DefaultIndexingChain.processDocument(DefaultIndexingChain.java:403) at org.apache.lucene.index.DocumentsWriterPerThread.updateDocument(DocumentsWriterPerThread.java:232) at org.apache.lucene.index.DocumentsWriter.updateDocument(DocumentsWriter.java:478) at org.apache.lucene.index.IndexWriter.updateDocument(IndexWriter.java:1571) at org.apache.lucene.index.IndexWriter.addDocument(IndexWriter.java:1316) at net.sourceforge.docfetcher.model.index.IndexWriterAdapter.add(IndexWriterAdapter.java:53) at net.sourceforge.docfetcher.model.index.file.SimpleDocWriter.write(SimpleDocWriter.java:44) at net.sourceforge.docfetcher.model.index.file.LuceneDocWriter.add(LuceneDocWriter.java:36) at net.sourceforge.docfetcher.model.index.file.FileContext.index(FileContext.java:161) at net.sourceforge.docfetcher.model.index.file.FileIndex$1.handleFile(FileIndex.java:295) at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.runWithHtmlPairing(HtmlFileLister.java:126) at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.doRun(HtmlFileLister.java:57) at net.sourceforge.docfetcher.util.Stoppable.run(Stoppable.java:57) at net.sourceforge.docfetcher.model.index.file.FileIndex.visitDirOrZip(FileIndex.java:442) at net.sourceforge.docfetcher.model.index.file.FileIndex.access$200(FileIndex.java:51) at net.sourceforge.docfetcher.model.index.file.FileIndex$1.handleDir(FileIndex.java:393) at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.runWithHtmlPairing(HtmlFileLister.java:147) at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.doRun(HtmlFileLister.java:57) at net.sourceforge.docfetcher.util.Stoppable.run(Stoppable.java:57) at net.sourceforge.docfetcher.model.index.file.FileIndex.visitDirOrZip(FileIndex.java:442) at net.sourceforge.docfetcher.model.index.file.FileIndex.access$200(FileIndex.java:51) at net.sourceforge.docfetcher.model.index.file.FileIndex$1.handleDir(FileIndex.java:393) at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.runWithHtmlPairing(HtmlFileLister.java:147) at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.doRun(HtmlFileLister.java:57) at net.sourceforge.docfetcher.util.Stoppable.run(Stoppable.java:57) at net.sourceforge.docfetcher.model.index.file.FileIndex.visitDirOrZip(FileIndex.java:442) at net.sourceforge.docfetcher.model.index.file.FileIndex.access$200(FileIndex.java:51) at net.sourceforge.docfetcher.model.index.file.FileIndex$1.handleDir(FileIndex.java:393) at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.runWithHtmlPairing(HtmlFileLister.java:147) at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.doRun(HtmlFileLister.java:57) at net.sourceforge.docfetcher.util.Stoppable.run(Stoppable.java:57) at net.sourceforge.docfetcher.model.index.file.FileIndex.visitDirOrZip(FileIndex.java:442) at net.sourceforge.docfetcher.model.index.file.FileIndex.access$200(FileIndex.java:51) at net.sourceforge.docfetcher.model.index.file.FileIndex$1.handleDir(FileIndex.java:393) at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.runWithHtmlPairing(HtmlFileLister.java:147) at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.doRun(HtmlFileLister.java:57) at net.sourceforge.docfetcher.util.Stoppable.run(Stoppable.java:57) at net.sourceforge.docfetcher.model.index.file.FileIndex.visitDirOrZip(FileIndex.java:442) at net.sourceforge.docfetcher.model.index.file.FileIndex.access$200(FileIndex.java:51) at net.sourceforge.docfetcher.model.index.file.FileIndex$1.handleDir(FileIndex.java:393) at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.runWithHtmlPairing(HtmlFileLister.java:147) at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.doRun(HtmlFileLister.java:57) at net.sourceforge.docfetcher.util.Stoppable.run(Stoppable.java:57) at net.sourceforge.docfetcher.model.index.file.FileIndex.visitDirOrZip(FileIndex.java:442) at net.sourceforge.docfetcher.model.index.file.FileIndex.access$200(FileIndex.java:51) at net.sourceforge.docfetcher.model.index.file.FileIndex$1.handleDir(FileIndex.java:393) at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.runWithHtmlPairing(HtmlFileLister.java:147) at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.doRun(HtmlFileLister.java:57) at net.sourceforge.docfetcher.util.Stoppable.run(Stoppable.java:57) at net.sourceforge.docfetcher.model.index.file.FileIndex.visitDirOrZip(FileIndex.java:442) at net.sourceforge.docfetcher.model.index.file.FileIndex.access$200(FileIndex.java:51) at net.sourceforge.docfetcher.model.index.file.FileIndex$1.handleDir(FileIndex.java:393) at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.runWithHtmlPairing(HtmlFileLister.java:147) at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.doRun(HtmlFileLister.java:57) at net.sourceforge.docfetcher.util.Stoppable.run(Stoppable.java:57) at net.sourceforge.docfetcher.model.index.file.FileIndex.visitDirOrZip(FileIndex.java:442) at net.sourceforge.docfetcher.model.index.file.FileIndex.access$200(FileIndex.java:51) at net.sourceforge.docfetcher.model.index.file.FileIndex$1.handleDir(FileIndex.java:393) at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.runWithHtmlPairing(HtmlFileLister.java:147) at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.doRun(HtmlFileLister.java:57) at net.sourceforge.docfetcher.util.Stoppable.run(Stoppable.java:57) at net.sourceforge.docfetcher.model.index.file.FileIndex.visitDirOrZip(FileIndex.java:442) at net.sourceforge.docfetcher.model.index.file.FileIndex.access$200(FileIndex.java:51) at net.sourceforge.docfetcher.model.index.file.FileIndex$1.handleDir(FileIndex.java:393) at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.runWithHtmlPairing(HtmlFileLister.java:147) at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.doRun(HtmlFileLister.java:57) at net.sourceforge.docfetcher.util.Stoppable.run(Stoppable.java:57) at net.sourceforge.docfetcher.model.index.file.FileIndex.visitDirOrZip(FileIndex.java:442) at net.sourceforge.docfetcher.model.index.file.FileIndex.access$200(FileIndex.java:51) at net.sourceforge.docfetcher.model.index.file.FileIndex$1.handleDir(FileIndex.java:393) at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.runWithHtmlPairing(HtmlFileLister.java:147) at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.doRun(HtmlFileLister.java:57) at net.sourceforge.docfetcher.util.Stoppable.run(Stoppable.java:57) at net.sourceforge.docfetcher.model.index.file.FileIndex.visitDirOrZip(FileIndex.java:442) at net.sourceforge.docfetcher.model.index.file.FileIndex.access$200(FileIndex.java:51) at net.sourceforge.docfetcher.model.index.file.FileIndex$1.handleDir(FileIndex.java:393) at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.runWithHtmlPairing(HtmlFileLister.java:147) at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.doRun(HtmlFileLister.java:57) at net.sourceforge.docfetcher.util.Stoppable.run(Stoppable.java:57) at net.sourceforge.docfetcher.model.index.file.FileIndex.visitDirOrZip(FileIndex.java:442) at net.sourceforge.docfetcher.model.index.file.FileIndex.access$200(FileIndex.java:51) at net.sourceforge.docfetcher.model.index.file.FileIndex$1.handleDir(FileIndex.java:393) at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.runWithHtmlPairing(HtmlFileLister.java:147) at net.sourceforge.docfetcher.model.index.file.HtmlFileLister.doRun(HtmlFileLister.java:57) at net.sourceforge.docfetcher.util.Stoppable.run(Stoppable.java:57) at net.sourceforge.docfetcher.model.index.file.FileIndex.visitDirOrZip(FileIndex.java:442) at net.sourceforge.docfetcher.model.index.file.FileIndex.doUpdate(FileIndex.java:159) at net.sourceforge.docfetcher.model.TreeIndex.update(TreeIndex.java:148) at net.sourceforge.docfetcher.model.index.Task.update(Task.java:98) at net.sourceforge.docfetcher.model.index.IndexingQueue.threadLoop(IndexingQueue.java:193) at net.sourceforge.docfetcher.model.index.IndexingQueue.access$100(IndexingQueue.java:46) at net.sourceforge.docfetcher.model.index.IndexingQueue$2.run(IndexingQueue.java:118) Caused by: org.apache.lucene.util.BytesRefHash$MaxBytesLengthExceededException: bytes can be at most 32766 in length; got 100002 at org.apache.lucene.util.BytesRefHash.add(BytesRefHash.java:263) at org.apache.lucene.index.TermsHashPerField.add(TermsHashPerField.java:149) at org.apache.lucene.index.DefaultIndexingChain$PerField.invert(DefaultIndexingChain.java:796) ... 93 more --> java.lang.IllegalArgumentException: Document contains at least one immense term
    • status: open --> closed
     
  • Nam-Quang Tran

    Nam-Quang Tran - 2026-01-08

    Will be fixed in DocFetcher 1.1.27.

     

Anonymous
Anonymous

Add attachments
Cancel





MongoDB Logo MongoDB