From: <wol...@us...> - 2010-04-03 18:24:05
|
Revision: 11606 http://exist.svn.sourceforge.net/exist/?rev=11606&view=rev Author: wolfgang_m Date: 2010-04-03 18:23:59 +0000 (Sat, 03 Apr 2010) Log Message: ----------- [bugfix] fixed LuceneIndexWorker.scanIndex to always return correct term frequencies. It is the responsibility of the calling application to set a limit if returning all terms is too slow. Modified Paths: -------------- trunk/eXist/extensions/indexes/lucene/src/org/exist/indexing/lucene/LuceneIndexWorker.java Modified: trunk/eXist/extensions/indexes/lucene/src/org/exist/indexing/lucene/LuceneIndexWorker.java =================================================================== --- trunk/eXist/extensions/indexes/lucene/src/org/exist/indexing/lucene/LuceneIndexWorker.java 2010-04-03 18:20:57 UTC (rev 11605) +++ trunk/eXist/extensions/indexes/lucene/src/org/exist/indexing/lucene/LuceneIndexWorker.java 2010-04-03 18:23:59 UTC (rev 11606) @@ -545,13 +545,13 @@ IntegerValue vmax = (IntegerValue) hints.get(VALUE_COUNT); max = vmax == null ? Long.MAX_VALUE : vmax.getValue(); } - if (nodes == null) - return scanIndexByQName(qnames, docs, start, end, max); + if (nodes == null || max < Long.MAX_VALUE) + return scanIndexByQName(qnames, docs, nodes, start, end, max); else return scanIndexByNodes(qnames, docs, nodes, start, end, max); } - private Occurrences[] scanIndexByQName(List<QName> qnames, DocumentSet docs, String start, String end, long max) { + private Occurrences[] scanIndexByQName(List<QName> qnames, DocumentSet docs, NodeSet nodes, String start, String end, long max) { TreeMap<String, Occurrences> map = new TreeMap<String, Occurrences>(); IndexReader reader = null; try { @@ -562,7 +562,7 @@ if (start == null) terms = reader.terms(new Term(field, "")); else - terms = reader.terms(new Term(field, start.toString())); + terms = reader.terms(new Term(field, start)); if (terms == null) continue; Term term; @@ -582,19 +582,25 @@ if (reader.isDeleted(termDocs.doc())) continue; Document doc = reader.document(termDocs.doc()); - Field fDocId = doc.getField("docId"); - int docId = Integer.parseInt(fDocId.stringValue()); + String fDocId = doc.get("docId"); + int docId = Integer.parseInt(fDocId); DocumentImpl storedDocument = docs.getDoc(docId); if (storedDocument == null) continue; - - Occurrences oc = map.get(term.text()); - if (oc == null) { - oc = new Occurrences(term.text()); - map.put(term.text(), oc); + NodeId nodeId = null; + if (nodes != null) { + // load document to check if the current node is in the passed context set, if any + nodeId = readNodeId(doc); } - oc.addDocument(storedDocument); - oc.addOccurrences(termDocs.freq()); + if (nodeId == null || nodes.get(storedDocument, nodeId) != null) { + Occurrences oc = map.get(term.text()); + if (oc == null) { + oc = new Occurrences(term.text()); + map.put(term.text(), oc); + } + oc.addDocument(storedDocument); + oc.addOccurrences(termDocs.freq()); + } } termDocs.close(); } @@ -664,9 +670,6 @@ if (oc == null) { oc = new Occurrences(terms[j]); map.put(terms[j], oc); - - if (map.size() >= max) - return occurrencesToArray(map); } oc.addDocument(doc); oc.addOccurrences(freq[j]); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |