From: <dat...@us...> - 2003-02-17 18:36:33
|
Update of /cvsroot/webmacro/wiki/src/org/tcdi/opensource/wiki/search In directory sc8-pr-cvs1:/tmp/cvs-serv5418/src/org/tcdi/opensource/wiki/search Modified Files: Tag: christian LuceneFinder.java LuceneIndexer.java Log Message: All changes since moving the wiki cvs from Eric's server to SF. Index: LuceneFinder.java =================================================================== RCS file: /cvsroot/webmacro/wiki/src/org/tcdi/opensource/wiki/search/LuceneFinder.java,v retrieving revision 1.2.2.1 retrieving revision 1.2.2.2 diff -C2 -d -r1.2.2.1 -r1.2.2.2 *** LuceneFinder.java 18 Sep 2002 18:48:33 -0000 1.2.2.1 --- LuceneFinder.java 17 Feb 2003 18:36:26 -0000 1.2.2.2 *************** *** 32,40 **** import org.apache.lucene.analysis.de.GermanAnalyzer; import org.apache.lucene.document.Document; ! import org.apache.lucene.search.Searcher; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; ! import org.apache.lucene.search.Hits; ! import org.apache.lucene.queryParser.QueryParser; import org.tcdi.opensource.wiki.*; --- 32,40 ---- import org.apache.lucene.analysis.de.GermanAnalyzer; import org.apache.lucene.document.Document; ! import org.apache.lucene.queryParser.QueryParser; ! import org.apache.lucene.search.Hits; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; ! import org.apache.lucene.search.Searcher; import org.tcdi.opensource.wiki.*; *************** *** 49,143 **** * be searched. * ! *@author e_ridge ! *@created 8. September 2002 */ public class LuceneFinder implements WikiPageFinder { ! static Analyzer analyzer = new GermanAnalyzer(); ! /** ! * return an array of pages that match the <code>query</code>. The query is ! * implementation specific ! * ! *@param wiki Description of the Parameter ! *@param query Description of the Parameter ! *@return Description of the Return Value ! *@exception WikiPageFinder.FinderException Description of the Exception ! */ ! public WikiPageFinder.FindResult[] findPages(WikiSystem wiki, String query) throws WikiPageFinder.FinderException { ! try { ! Searcher searcher = null; ! //Analyzer analyzer = new StopAnalyzer(); ! String _query = "title:" + query + " keywords:" + query + "^2 " + query + "^3"; ! Query q = QueryParser.parse(_query, "text", analyzer); ! /* ! * QueryParser qp = new QueryParser("text", analyzer); ! * Query q = qp.parse("(title:" + query + ")^4 OR (keywords:" + query + ")^2 OR (text:" + query + ")"); ! * System.out.println("query: " + q); ! */ ! Hits hits = null; ! if (wiki.getBaseDir() != null) { ! searcher = new IndexSearcher(wiki.getBaseDir() + wiki.getProperties().getProperty("LuceneIndexer.IndexDirectory")); ! } else { ! searcher = new IndexSearcher(wiki.getProperties().getProperty("LuceneIndexer.IndexDirectory")); ! } ! hits = searcher.search(q); ! List pages = new ArrayList(); ! int size = hits.length(); ! for (int x = 0; x < size; x++) { ! String title = hits.doc(x).get("title"); ! if (title != null) { ! WikiPage page = wiki.getPage(title); ! if (page == null) { ! continue; ! } ! FindResult result = new FindResult(); ! result.page = page; ! result.score = Math.round(hits.score(x) * 100); ! result.preview = org.webmacro.util.HTMLEscaper.escape(hits.doc(x).get("text")); ! /* ! * boolean skip = false; ! * for (int y = 0; y < pages.size(); y++) { ! * FindResult fr = (FindResult) pages.get(y); ! * String tmp = fr.page.getTitle(); ! * if (tmp.equals(title)) { ! * fr.score += 1; ! * skip = true; ! * break; ! * } ! * } ! * if (skip) { ! * continue; ! * } ! */ ! pages.add(result); ! } ! } ! searcher.close(); ! WikiPageFinder.FindResult[] results = (FindResult[]) pages.toArray(new WikiPageFinder.FindResult[0]); ! /* ! * Arrays.sort(results, ! * new Comparator() { ! * public int compare(Object o1, Object o2) { ! * FindResult fr1 = (FindResult) o1; ! * FindResult fr2 = (FindResult) o2; ! * return (int) (fr2.score * 1000 - fr1.score * 1000); ! * } ! * }); ! */ ! return results; ! } catch (Exception e) { ! e.printStackTrace(); ! throw new WikiPageFinder.FinderException(e.toString()); ! } ! } } --- 49,146 ---- * be searched. * ! * @author e_ridge ! * @created 8. September 2002 */ public class LuceneFinder implements WikiPageFinder { ! static Analyzer analyzer = new GermanAnalyzer(); ! /** ! * return an array of pages that match the <code>query</code>. The query is ! * implementation specific ! * ! * @param wiki Description of the Parameter ! * @param query Description of the Parameter ! * @return Description of the Return ! * Value ! * @exception WikiPageFinder.FinderException Description of the Exception ! */ ! public WikiPageFinder.FindResult[] findPages(WikiSystem wiki, String query) throws WikiPageFinder.FinderException { ! try { ! Searcher searcher = null; ! //Analyzer analyzer = new StopAnalyzer(); ! String _query = null; ! if (query.indexOf(":") > 0) { ! _query = query; ! } else { ! _query = "title:" + query + " keywords:" + query + "^2 " + query + "^3"; ! } ! Query q = QueryParser.parse(_query, "text", analyzer); ! /* ! * QueryParser qp = new QueryParser("text", analyzer); ! * Query q = qp.parse("(title:" + query + ")^4 OR (keywords:" + query + ")^2 OR (text:" + query + ")"); ! * System.out.println("query: " + q); ! */ ! Hits hits = null; ! if (wiki.getBaseDir() != null) { ! searcher = new IndexSearcher(wiki.getBaseDir() + wiki.getProperties().getProperty("LuceneIndexer.IndexDirectory")); ! } else { ! searcher = new IndexSearcher(wiki.getProperties().getProperty("LuceneIndexer.IndexDirectory")); ! } ! hits = searcher.search(q); ! List pages = new ArrayList(); ! int size = hits.length(); ! for (int x = 0; x < size; x++) { ! String title = hits.doc(x).get("title"); ! if (title != null) { ! WikiPage page = wiki.getPage(title); ! if (page == null) { ! continue; ! } ! FindResult result = new FindResult(); ! result.page = page; ! result.score = Math.round(hits.score(x) * 100); ! result.preview = org.webmacro.util.HTMLEscaper.escape(hits.doc(x).get("text")); ! /* ! * boolean skip = false; ! * for (int y = 0; y < pages.size(); y++) { ! * FindResult fr = (FindResult) pages.get(y); ! * String tmp = fr.page.getTitle(); ! * if (tmp.equals(title)) { ! * fr.score += 1; ! * skip = true; ! * break; ! * } ! * } ! * if (skip) { ! * continue; ! * } ! */ ! pages.add(result); ! } ! } ! searcher.close(); ! WikiPageFinder.FindResult[] results = (FindResult[]) pages.toArray(new WikiPageFinder.FindResult[0]); ! Arrays.sort(results, ! new Comparator() { ! public int compare(Object o1, Object o2) { ! FindResult fr1 = (FindResult) o1; ! FindResult fr2 = (FindResult) o2; ! return (int) (fr2.score * 1000 - fr1.score * 1000); ! } ! }); ! return results; ! } catch (Exception e) { ! throw new WikiPageFinder.FinderException(e.toString()); ! } ! } } Index: LuceneIndexer.java =================================================================== RCS file: /cvsroot/webmacro/wiki/src/org/tcdi/opensource/wiki/search/LuceneIndexer.java,v retrieving revision 1.3.2.1 retrieving revision 1.3.2.2 diff -C2 -d -r1.3.2.1 -r1.3.2.2 *** LuceneIndexer.java 18 Sep 2002 18:48:33 -0000 1.3.2.1 --- LuceneIndexer.java 17 Feb 2003 18:36:27 -0000 1.3.2.2 *************** *** 27,39 **** import java.io.*; import java.util.Vector; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; - import org.apache.lucene.document.Document; - import org.apache.lucene.document.Field; - import org.apache.lucene.document.DateField; - import org.apache.lucene.analysis.Analyzer; - import org.apache.lucene.analysis.de.GermanAnalyzer; import org.tcdi.opensource.wiki.*; --- 27,39 ---- import java.io.*; import java.util.Vector; + import org.apache.lucene.analysis.Analyzer; + import org.apache.lucene.analysis.de.GermanAnalyzer; + import org.apache.lucene.document.DateField; + import org.apache.lucene.document.Document; + import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.tcdi.opensource.wiki.*; *************** *** 50,222 **** * directory that is writeable by the owner of the running JVM. * ! *@author e_ridge ! *@created 8. September 2002 */ public class LuceneIndexer implements WikiPageIndexer { ! static Analyzer analyzer = new GermanAnalyzer(); ! private Vector _pageQueue = new Vector(); ! private WikiSystem _wiki = null; - /** - * Description of the Class - * - *@author christiana - *@created 18. September 2002 - */ - private class IndexThread extends Thread { - private LuceneIndexer _parent = null; - /** - * Constructor for the IndexThread object - * - *@param p Description of the Parameter - */ - public IndexThread(LuceneIndexer p) { - this._parent = p; - } - /** - * Main processing method for the IndexThread object - */ - public void run() { - boolean optimized = true; - File f = null; - while (true) { - if (_parent._pageQueue.size() != 0) { - if (f == null) { - if (_parent._wiki.getBaseDir() != null) { - f = new File(_parent._wiki.getBaseDir() + _parent._wiki.getProperties().getProperty("LuceneIndexer.IndexDirectory")); - } else { - f = new File(_parent._wiki.getProperties().getProperty("LuceneIndexer.IndexDirectory")); - } - } - WikiPage p = (WikiPage) _pageQueue.elementAt(0); - Document doc = createDocument(_parent._wiki, p); - if (doc != null) { - try { - IndexReader reader = IndexReader.open(f); - reader.delete(new Term("title", p.getTitle())); - reader.close(); ! IndexWriter writer = new IndexWriter(f, _parent.analyzer, ! !f.exists()); ! writer.addDocument(doc); ! writer.close(); ! } catch (IOException ioe) { ! ioe.printStackTrace(System.err); ! } finally { ! optimized = false; ! _pageQueue.removeElementAt(0); ! } - } - } else if (!optimized) { - //optimize the index - try { - IndexWriter writer = new IndexWriter(f, _parent.analyzer, - !f.exists()); - writer.optimize(); - writer.close(); - } catch (IOException ioe) { - ioe.printStackTrace(System.err); - } - optimized = true; - } - } - } - /** - * Description of the Method - * - *@param wiki Description of the Parameter - *@param page Description of the Parameter - *@return Description of the Return Value - */ - private Document createDocument(WikiSystem wiki, WikiPage page) { - Document doc = new Document(); - String tmp = null; ! // title ! tmp = page.getTitle(); ! if (tmp != null) { ! doc.add(Field.Keyword("title", tmp)); ! } ! // last modified date ! doc.add(Field.Keyword("modified", DateField.dateToString(page.getDateLastModified()))); ! // creation date ! doc.add(Field.Keyword("created", DateField.dateToString(page.getDateCreated()))); ! // author ! WikiUser user = wiki.getUser(page.getAuthor()); ! if (user != null) { ! doc.add(Field.Keyword("author", user.getName())); ! } ! // keywords (related titles) ! StringBuffer sb = new StringBuffer(128); ! String[] keywords = page.getRelatedTitles(); ! for (int x = 0; x < keywords.length; x++) { ! if (sb.length() > 0) { ! sb.append(" "); ! } ! sb.append(keywords[x]); ! } ! doc.add(Field.Text("keywords", sb.toString())); ! // the actual text of the page ! tmp = page.getUnparsedData(); ! if (tmp != null) { ! WikiPageRenderer renderer = new TextPageRenderer(new TextURLRenderer(), wiki); ! try { ! doc.add(Field.Text("text", renderer.render(page))); ! } catch (Exception e) { ! e.printStackTrace(); ! return null; ! } ! } ! return doc; ! } ! } - private IndexThread _indexer = new IndexThread(this); ! /** ! * Constructor for the LuceneIndexer object ! */ ! public LuceneIndexer() { ! this._indexer.start(); ! } ! /** ! * Description of the Method ! * ! *@param wiki Description of the Parameter ! *@param page Description of the Parameter ! *@exception WikiPageIndexer.IndexerException Description of the Exception ! */ ! public void index(WikiSystem wiki, WikiPage page) throws WikiPageIndexer.IndexerException { ! if (this._wiki == null) { ! this._wiki = wiki; ! } else { ! if (this._wiki != wiki) { ! throw new WikiPageIndexer.IndexerException("Using the same LuceneIndexer on multiple Wikis is not supported"); ! } ! } ! _pageQueue.add(page); ! } } --- 50,238 ---- * directory that is writeable by the owner of the running JVM. * ! * @author e_ridge ! * @created 8. September 2002 */ public class LuceneIndexer implements WikiPageIndexer { ! private IndexThread _indexer = new IndexThread(this); ! private Vector _pageQueue = new Vector(); ! private WikiSystem _wiki = null; + static Analyzer analyzer = new GermanAnalyzer(); + /** + * Constructor for the LuceneIndexer object + */ + public LuceneIndexer() { + this._indexer.start(); + } + /** + * Description of the Method + * + * @param wiki Description of the Parameter + * @param page Description of the Parameter + * @exception WikiPageIndexer.IndexerException Description of the Exception + */ + public void index(WikiSystem wiki, WikiPage page) throws WikiPageIndexer.IndexerException { + if (this._wiki == null) { + this._wiki = wiki; + } else { + if (this._wiki != wiki) { + throw new WikiPageIndexer.IndexerException("Using the same LuceneIndexer on multiple Wikis is not supported"); + } + } + _pageQueue.add(page); + } ! /** ! * Description of the Class ! * ! * @author christiana ! * @created 18. September 2002 ! */ ! private class IndexThread extends Thread { ! private LuceneIndexer _parent = null; + /** + * Constructor for the IndexThread object + * + * @param p Description of the Parameter + */ + public IndexThread(LuceneIndexer p) { + this._parent = p; + } ! /** ! * Description of the Method ! * ! * @param wiki Description of the Parameter ! * @param page Description of the Parameter ! * @return Description of the Return Value ! */ ! private Document createDocument(WikiSystem wiki, WikiPage page) { ! Document doc = new Document(); ! String tmp = null; ! // title ! tmp = page.getTitle(); ! if (tmp != null) { ! doc.add(Field.Keyword("title", tmp)); ! } ! // last modified date ! doc.add(Field.Keyword("modified", DateField.dateToString(page.getDateLastModified()))); ! // creation date ! doc.add(Field.Keyword("created", DateField.dateToString(page.getDateCreated()))); ! // author ! WikiUser user = wiki.getUser(page.getAuthor()); ! if (user != null) { ! doc.add(Field.Keyword("author", user.getName())); ! } ! // keywords (related titles) ! StringBuffer sb = new StringBuffer(128); ! String[] keywords = page.getRelatedTitles(); ! for (int x = 0; x < keywords.length; x++) { ! if (sb.length() > 0) { ! sb.append(" "); ! } ! sb.append(keywords[x]); ! } ! doc.add(Field.Text("keywords", sb.toString())); ! // the actual text of the page ! tmp = page.getUnparsedData(); ! if (tmp != null) { ! WikiPageRenderer renderer = new TextPageRenderer(new TextURLRenderer(), wiki); ! try { ! doc.add(Field.Text("text", renderer.render(page))); ! } catch (Exception e) { ! e.printStackTrace(); ! return null; ! } ! } + return doc; + } + /** + * Main processing method for the IndexThread object + */ + public void run() { + boolean optimized = true; + File f = null; + boolean running = true; + while (running) { + if (_parent._pageQueue.size() != 0) { + if (f == null) { + if (_parent._wiki.getBaseDir() != null) { + f = new File(_parent._wiki.getBaseDir() + _parent._wiki.getProperties().getProperty("LuceneIndexer.IndexDirectory")); + } else { + f = new File(_parent._wiki.getProperties().getProperty("LuceneIndexer.IndexDirectory")); + } + } + WikiPage p = (WikiPage) _pageQueue.elementAt(0); + Document doc = createDocument(_parent._wiki, p); + if (doc != null) { + IndexWriter writer = null; + try { + IndexReader reader = IndexReader.open(f); + reader.delete(new Term("title", p.getTitle())); + reader.close(); ! writer = new IndexWriter(f, _parent.analyzer, ! !f.exists()); ! writer.addDocument(doc); ! writer.close(); ! } catch (IOException ioe) { ! ioe.printStackTrace(System.err); ! } finally { ! optimized = false; ! _pageQueue.removeElementAt(0); ! try { ! writer.close(); ! } catch (Exception e) {} + } ! } ! } else if (!optimized) { ! //optimize the index ! IndexWriter writer = null; ! try { ! writer = new IndexWriter(f, _parent.analyzer, ! !f.exists()); ! writer.optimize(); ! writer.close(); ! } catch (IOException ioe) { ! ioe.printStackTrace(System.err); ! } finally { ! try { ! writer.close(); ! } catch (Exception e) {} ! ! } ! optimized = true; ! } ! try { ! Thread.sleep(100); ! } catch (InterruptedException i) { ! running = false; ! } ! } ! } ! } } |