[Classifier4j-devel] Bug Report. / java.lang.NullPointerException at jdbm
Status: Beta
Brought to you by:
nicklothian
From: emen <em...@o2...> - 2004-01-14 02:19:43
|
Theres something I at first had taken as a bug, but then realised that its probably my improper use. Nick asked me to report it here anyway, so I do. I'll dump here a lot of code so read patiently or at all ;) Looking at example included in classifier I made myself a small class that makes my life easier when it comes to classify things or train classifier. I have added some comments inside the code to show three states: 1 error, 2 still error, 3 works fine. So here it is: package blackwidow.Classifiers.Text; import blackwidow.Conf.*; import java.io.*; import java.sql.SQLException; import net.sf.classifier4J.DefaultTokenizer; import net.sf.classifier4J.ITrainableClassifier; import net.sf.classifier4J.bayesian.*; import blackwidow.GUI.*; import net.sf.classifier4J.*; /** * <p>Title: BackWidow</p> * <p>Description: </p> * <p>Copyright: Copyright (c) emen 2003</p> * <p>Company: </p> * @author emen * @version 0.0.1 */ public class bwBayesianTextClassifier { //public final static String connectionString = bwConfig.propConf.getProperty("JdbmUrl"); public final static String relativeDBPath = bwConfig.propConf.getProperty("RelativeDBPath"); //public final static String username = bwConfig.propConf.getProperty("JdbmUserName"); //public final static String password = bwConfig.propConf.getProperty("JdbmUserPass"); JDBMWordsDataSource wds; BayesianClassifier classifier; public bwBayesianTextClassifier() { try { wds = new JDBMWordsDataSource(relativeDBPath); wds.open(); classifier = new BayesianClassifier(wds); } catch(Exception ex) { bwDebugOutput.display(ex); ex.printStackTrace(); } } //************ followning code was added in step 2 / remove it for stem 3 public void reInit() { wds.close(); try { wds = new JDBMWordsDataSource(relativeDBPath); wds.open(); //classifier = new BayesianClassifier(wds); } catch(Exception ex) { bwDebugOutput.display(ex); ex.printStackTrace(); } } //************ end of step 2 //************ followning code was added in step 3 public void reInit() { try { wds = new JDBMWordsDataSource(relativeDBPath); wds.open(); classifier = new BayesianClassifier(wds); } catch(Exception ex) { bwDebugOutput.display(ex); ex.printStackTrace(); } } //************ end of step 3 public void trainClassifierFromFile(boolean isMatch, String filename) throws FileNotFoundException, IOException, ClassifierException { reInit(); // <======================== This line added in step 2 InputStream input = new FileInputStream(filename); BufferedReader reader = new BufferedReader(new InputStreamReader(input)); //int c; String line = ""; StringBuffer fileContents = new StringBuffer(""); while((line = reader.readLine()) != null) { fileContents.append(line); } String contents = fileContents.toString(); int length = new DefaultTokenizer().tokenize(contents).length; long startTime = System.currentTimeMillis(); if(isMatch) { System.out.println("Training Classifier4J with " + length + " matching words. This may take a while."); bwDebugOutput.display("bwBayesianTextClassifier.trainClassifierFromFile( ): Training Classifier4J with " + length + " matching words. This may take a while."); classifier.teachMatch(contents); } else { System.out.println("Training Classifier4J with " + length + " non-matching words. This may take a while."); bwDebugOutput.display("bwBayesianTextClassifier.trainClassifierFromFile( ): Training Classifier4J with " + length + " non-matching words. This may take a while."); classifier.teachNonMatch(contents); } long endTime = System.currentTimeMillis(); long time = (endTime - startTime) / 1000; if(time == 0) { time = 1; } System.out.println("Done. Took " + time + " seconds, which is " + length / time + " words per second."); bwDebugOutput.display("bwBayesianTextClassifier.trainClassifierFromFile( ): Done. Took " + time + " seconds, which is " + length / time + " words per second."); wds.close(); } public void trainClassifier(boolean isMatch, String contents) throws FileNotFoundException, IOException, ClassifierException { reInit(); // <======================== This line added in step 2 if(contents == null) { bwDebugOutput.display("bwBayesianTextClassifier.trainClassifier(): null content passed as training data, skipping."); return; } int length = new DefaultTokenizer().tokenize(contents).length; long startTime = System.currentTimeMillis(); if(isMatch) { System.out.println("Training Classifier4J with " + length + " matching words. This may take a while."); bwDebugOutput.display("bwBayesianTextClassifier.trainClassifier(): Training Classifier4J with " + length + " matching words. This may take a while."); classifier.teachMatch(contents); } else { System.out.println("Training Classifier4J with " + length + " non-matching words. This may take a while."); bwDebugOutput.display("bwBayesianTextClassifier.trainClassifier(): Training Classifier4J with " + length + " non-matching words. This may take a while."); classifier.teachNonMatch(contents); } long endTime = System.currentTimeMillis(); long time = (endTime - startTime) / 1000; if(time == 0) { time = 1; } System.out.println("Done. Took " + time + " seconds, which is " + length / time + " words per second."); bwDebugOutput.display("bwBayesianTextClassifier.trainClassifier(): Done. Took " + time + " seconds, which is " + length / time + " words per second."); wds.close(); } public boolean classifyFromFile(String filename) { reInit(); // <======================== This line added in step 2 InputStream input = null; //int c; String line = ""; StringBuffer fileContents = new StringBuffer(""); try { input = new FileInputStream(filename); BufferedReader reader = new BufferedReader(new InputStreamReader(input)); while ( (line = reader.readLine()) != null) { fileContents.append( line ); } reader.close(); } catch(Exception ex) { bwDebugOutput.display(ex); ex.printStackTrace(); } String contents = fileContents.toString(); int length = new DefaultTokenizer().tokenize(contents).length; System.out.println("Analysing " + filename + " (contains " + length + " words). This may take a while."); bwDebugOutput.display("bwBayesianTextClassifier.classifyFromFile(): Analysing " + filename + " (contains " + length + " words). This may take a while."); long startTime = System.currentTimeMillis(); double matchProb = 0.0; try { matchProb = classifier.classify(contents); } catch(ClassifierException ex2) { bwDebugOutput.display(ex2); ex2.printStackTrace(); } long endTime = System.currentTimeMillis(); long time = (endTime - startTime)/1000; if(time == 0) { time = 1; } System.out.println("Done. Took " + time + " seconds, which is " + length/time + " words per second."); bwDebugOutput.display("bwBayesianTextClassifier.classifyFromFile(): Done. Took " + time + " seconds, which is " + length/time + " words per second."); System.out.println("Match Probability = " + matchProb); bwDebugOutput.display("bwBayesianTextClassifier.classifyFromFile(): Match Probability = " + matchProb); boolean retVal = classifier.isMatch(matchProb); System.out.println("Is considered a match: " + retVal); bwDebugOutput.display("bwBayesianTextClassifier.classifyFromFile(): Is considered a match: " + retVal); //wds.close(); return retVal; } public boolean classify(String contents) { reInit(); // <======================== This line added in step 2 int length = new DefaultTokenizer().tokenize(contents).length; System.out.println("Analysing user string input (contains " + length + " words). This may take a while."); bwDebugOutput.display("bwBayesianTextClassifier.classify(): Analysing user string input (contains " + length + " words). This may take a while."); long startTime = System.currentTimeMillis(); double matchProb = 0.0; try { matchProb = classifier.classify(contents); } catch(ClassifierException ex) { bwDebugOutput.display(ex); ex.printStackTrace(); } long endTime = System.currentTimeMillis(); long time = (endTime - startTime)/1000; if(time == 0) { time = 1; } System.out.println("Done. Took " + time + " seconds, which is " + length/time + " words per second."); bwDebugOutput.display("bwBayesianTextClassifier.classify(): Done. Took " + time + " seconds, which is " + length/time + " words per second."); System.out.println("Match Probability = " + matchProb); bwDebugOutput.display("bwBayesianTextClassifier.classify(): Match Probability = " + matchProb); boolean retVal = classifier.isMatch(matchProb); System.out.println("Is considered a match: " + retVal); bwDebugOutput.display("bwBayesianTextClassifier.classify(): Is considered a match: " + retVal); //wds.close(); return retVal; } } The below exception is thrown after first succesfull training of classifier. It occurs at every next attempt to teach mach or non match. Above step 3 is the workaround and then all seems to work fine. Stack trace for errors: java.lang.NullPointerException at jdbm.recman.PageManager.getFirst(PageManager.java:211) at jdbm.recman.PageCursor.next(PageCursor.java:90) at jdbm.recman.FreePhysicalRowIdPageManager.get(FreePhysicalRowIdPageManage r.java:82) at jdbm.recman.PhysicalRowIdManager.alloc(PhysicalRowIdManager.java:162) at jdbm.recman.PhysicalRowIdManager.insert(PhysicalRowIdManager.java:77) at jdbm.recman.RecordManager.insert(RecordManager.java:143) at jdbm.recman.RecordManager.insert(RecordManager.java:156) at jdbm.btree.BPage.<init>(BPage.java:206) at jdbm.btree.BPage.insert(BPage.java:364) at jdbm.btree.BPage.insert(BPage.java:326) at jdbm.btree.BTree.insert(BTree.java:270) at net.sf.classifier4J.bayesian.JDBMWordsDataSource.addNonMatch(JDBMWordsDa taSource.java:157) at net.sf.classifier4J.bayesian.BayesianClassifier.teachNonMatch(BayesianCl assifier.java:267) at net.sf.classifier4J.bayesian.BayesianClassifier.teachNonMatch(BayesianCl assifier.java:218) at net.sf.classifier4J.bayesian.BayesianClassifier.teachNonMatch(BayesianCl assifier.java:190) at blackwidow.Classifiers.Text.bwBayesianTextClassifier.trainClassifier(bwB ayesianTextClassifier.java:166) at blackwidow.Classifiers.Text.bwClassifierTrainer.train(bwClassifierTraine r.java:49) at blackwidow.GUI.bwLearnByExamplesTab$1.actionPerformed(bwLearnByExamplesT ab.java:101) at javax.swing.AbstractButton.fireActionPerformed(AbstractButton.java:1764) at javax.swing.AbstractButton$ForwardActionEvents.actionPerformed(AbstractB utton.java:1817) at javax.swing.DefaultButtonModel.fireActionPerformed(DefaultButtonModel.ja va:419) at javax.swing.DefaultButtonModel.setPressed(DefaultButtonModel.java:257) at javax.swing.plaf.basic.BasicButtonListener.mouseReleased(BasicButtonList ener.java:245) at java.awt.Component.processMouseEvent(Component.java:5093) at java.awt.Component.processEvent(Component.java:4890) at java.awt.Container.processEvent(Container.java:1566) at java.awt.Component.dispatchEventImpl(Component.java:3598) at java.awt.Container.dispatchEventImpl(Container.java:1623) at java.awt.Component.dispatchEvent(Component.java:3439) at java.awt.LightweightDispatcher.retargetMouseEvent(Container.java:3450) at java.awt.LightweightDispatcher.processMouseEvent(Container.java:3165) at java.awt.LightweightDispatcher.dispatchEvent(Container.java:3095) at java.awt.Container.dispatchEventImpl(Container.java:1609) at java.awt.Window.dispatchEventImpl(Window.java:1585) at java.awt.Component.dispatchEvent(Component.java:3439) at java.awt.EventQueue.dispatchEvent(EventQueue.java:450) at java.awt.EventDispatchThread.pumpOneEventForHierarchy(EventDispatchThrea d.java:197) at java.awt.EventDispatchThread.pumpEventsForHierarchy(EventDispatchThread. java:150) at java.awt.EventDispatchThread.pumpEvents(EventDispatchThread.java:144) at java.awt.EventDispatchThread.pumpEvents(EventDispatchThread.java:136) at java.awt.EventDispatchThread.run(EventDispatchThread.java:99) I want to ad that I also when classifying get only 0.01 or 0.99 probabilities and no other. It seems to me very strange. I taught classifier about the same number of positive and negative examples, so this probability should varry. Other from that C4J seems to work quite well and accurately if taught well. Regards emen ------------------------------------------------------------------------ - FIGHT BACK AGAINST SPAM! Download Spam Inspector, the Award Winning Anti-Spam Filter http://mail.giantcompany.com |