From: Stig T. <jw...@us...> - 2005-03-24 14:01:31
|
Update of /cvsroot/mailsomething/mailsomething/src/net/sf/mailsomething/mail/db In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv28043/net/sf/mailsomething/mail/db Added Files: HSQLConnectionManager.java CachedJDBCWordDataSource.java Log Message: temporarely placed here, maybe they should be in another package --- NEW FILE: CachedJDBCWordDataSource.java --- package net.sf.mailsomething.mail.db; /* * ==================================================================== * * The Apache Software License, Version 1.1 * * Copyright (c) 2003 Nick Lothian. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. The end-user documentation included with the redistribution, if * any, must include the following acknowlegement: * "This product includes software developed by the * developers of Classifier4J (http://classifier4j.sf.net/)." * Alternately, this acknowlegement may appear in the software itself, * if and wherever such third-party acknowlegements normally appear. * * 4. The name "Classifier4J" must not be used to endorse or promote * products derived from this software without prior written * permission. For written permission, please contact * http://sourceforge.net/users/nicklothian/. * * 5. Products derived from this software may not be called * "Classifier4J", nor may "Classifier4J" appear in their names * without prior written permission. For written permission, please * contact http://sourceforge.net/users/nicklothian/. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * ==================================================================== */ import java.io.Serializable; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.util.Collection; import java.util.HashMap; import java.util.Map; import java.util.Vector; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import net.sf.classifier4J.ICategorisedClassifier; import net.sf.classifier4J.bayesian.IJDBCConnectionManager; import net.sf.classifier4J.bayesian.IWordsDataSource; import net.sf.classifier4J.bayesian.WordProbability; import net.sf.classifier4J.bayesian.WordsDataSourceException; /** * * @author Nick Lothian * @author Peter Leschev * */ public class CachedJDBCWordDataSource implements IWordsDataSource, Serializable { private Map words = new HashMap(); IJDBCConnectionManager connectionManager; private Log log = LogFactory.getLog(this.getClass()); public CachedJDBCWordDataSource(IJDBCConnectionManager connectionManager) { this.connectionManager = connectionManager; try { WordProbability[] props = loadWordProbabilities(); for(int i = 0; i < props.length; i++) setWordProbability(props[i]); } catch (Exception f) { f.printStackTrace(); } } public void setWordProbability(WordProbability wp) { words.put(wp.getWord(), wp); } /** * @see net.sf.classifier4J.bayesian.IWordsDataSource#getWordProbability(java.lang.String) */ public WordProbability getWordProbability(String word) { if (words.containsKey(word)) { return (WordProbability) words.get(word); } else { return null; } } public Collection getAll() { return words.values(); } /** * @see net.sf.classifier4J.bayesian.IWordsDataSource#addMatch(java.lang.String) */ public void addMatch(String word) { WordProbability wp = (WordProbability) words.get(word); if (wp == null) { wp = new WordProbability(word, 1, 0); } else { wp.setMatchingCount(wp.getMatchingCount() + 1); } setWordProbability(wp); updateWordProbability(ICategorisedClassifier.DEFAULT_CATEGORY, word, true); } /** * @see net.sf.classifier4J.bayesian.IWordsDataSource#addNonMatch(java.lang.String) */ public void addNonMatch(String word) { WordProbability wp = (WordProbability) words.get(word); if (wp == null) { wp = new WordProbability(word, 0, 1); } else { wp.setNonMatchingCount(wp.getNonMatchingCount() + 1); } setWordProbability(wp); updateWordProbability(ICategorisedClassifier.DEFAULT_CATEGORY, word, false); } private void updateWordProbability(String category, String word, boolean isMatch) { String fieldname = "nonmatch_count"; if (isMatch) { fieldname = "match_count"; } // truncate word at 255 characters if (word.length() > 255) { word = word.substring(0, 254); } Connection conn = null; try { conn = connectionManager.getConnection(); PreparedStatement insertStatement = conn.prepareStatement("INSERT INTO word_probability (word, category) VALUES (?, ?)"); PreparedStatement selectStatement = conn.prepareStatement("SELECT 1 FROM word_probability WHERE word = ? AND category = ?"); PreparedStatement updateStatement = conn.prepareStatement("UPDATE word_probability SET " + fieldname + " = " + fieldname + " + 1 WHERE word = ? AND category = ?"); selectStatement.setString(1, word); selectStatement.setString(2, category); ResultSet rs = selectStatement.executeQuery(); if (!rs.next()) { // word is not in table // insert the word insertStatement.setString(1, word); insertStatement.setString(2, category); insertStatement.execute(); } // update the word count updateStatement.setString(1, word); updateStatement.setString(2, category); updateStatement.execute(); } catch (SQLException e) { //throw new WordsDataSourceException("Problem updating WordProbability", e); } finally { if (conn != null) { try { connectionManager.returnConnection(conn); } catch (SQLException e1) { // ignore } } } } public void addMatch(String category, String word) throws WordsDataSourceException { if (category == null) { throw new IllegalArgumentException("category cannot be null"); } updateWordProbability(category, word, true); } public void addNonMatch(String category, String word) throws WordsDataSourceException { if (category == null) { throw new IllegalArgumentException("category cannot be null"); } updateWordProbability(category, word, false); } public WordProbability[] loadWordProbabilities() throws WordsDataSourceException { Vector props = new Vector(); String method = "getWordProbability()"; int matchingCount = 0; int nonMatchingCount = 0; Connection conn = null; try { conn = connectionManager.getConnection(); PreparedStatement ps = conn.prepareStatement("SELECT * FROM word_probability"); ResultSet rs = ps.executeQuery(); while (rs.next()) { matchingCount = rs.getInt("match_count"); nonMatchingCount = rs.getInt("nonmatch_count"); WordProbability wp = new WordProbability(rs.getString("word"), matchingCount, nonMatchingCount); props.add(wp); } } catch (SQLException e) { throw new WordsDataSourceException("Problem obtaining WordProbability from database", e); } finally { if (conn != null) { try { connectionManager.returnConnection(conn); } catch (SQLException e1) { // ignore } } } if (log.isDebugEnabled()) { log.debug(method + " WordProbability loaded [" + "]"); } return (WordProbability[])props.toArray(new WordProbability[]{}); } } --- NEW FILE: HSQLConnectionManager.java --- package net.sf.mailsomething.mail.db; import java.sql.Connection; import java.sql.DriverManager; import java.sql.SQLException; import java.sql.Statement; import net.sf.classifier4J.bayesian.IJDBCConnectionManager; /** * To be used with JDBCWordsDataSource Maybe I will change that class so it * doesnt need such an implementation like this. * * @author Stig tanggaard * @since 2005-03-24 * */ public class HSQLConnectionManager implements IJDBCConnectionManager { Connection connection; boolean tableCreated = false; /* * (non-Javadoc) * * @see net.sf.classifier4J.bayesian.IJDBCConnectionManager#getConnection() */ public Connection getConnection() throws SQLException { if (connection == null || connection.isClosed()) try { Class.forName("org.hsqldb.jdbcDriver"); connection = DriverManager.getConnection("jdbc:hsqldb:" + "imagecrawler_", "sa", ""); if(!tableCreated) createTable(); } catch (Exception f) { f.printStackTrace(); } return connection; } protected void createTable() { try { // by declaring the id column IDENTITY, the db will automatically // generate unique values for new rows- useful for row keys update("CREATE TABLE word_probability " + "( word VARCHAR(255)," + " category VARCHAR(255)," + " match_count INTEGER," + " nonmatch_count INTEGER))"); } catch (SQLException ex2) { //ignore ex2.printStackTrace(); // second time we run program // should throw execption since table // already there // // this will have no effect on the db } tableCreated = true; } public synchronized void update(String expression) throws SQLException { Statement st = null; st= connection.createStatement(); // statements int i = st.executeUpdate(expression); // run the query if (i == -1) { System.out.println("db error : " + expression); } st.close(); } /* * (non-Javadoc) * * @see net.sf.classifier4J.bayesian.IJDBCConnectionManager#returnConnection(java.sql.Connection) */ public void returnConnection(Connection arg0) throws SQLException { //arg0.close(); } } |