From: <jgr...@us...> - 2003-03-03 23:14:46
|
Update of /cvsroot/popfile/engine/Classifier In directory sc8-pr-cvs1:/tmp/cvs-serv22945/Classifier Modified Files: Bayes.pm Log Message: Got to the point where the absolute basics are working; i.e. if you have a POPFile setup already then you can download and get your mail classified; don't expect statistics, history or reclassification to behave yet, but we are getting closer; also MailParse and Bayes are still messing around inside each others private variables Index: Bayes.pm =================================================================== RCS file: /cvsroot/popfile/engine/Classifier/Bayes.pm,v retrieving revision 1.109 retrieving revision 1.110 diff -C2 -d -r1.109 -r1.110 *** Bayes.pm 3 Mar 2003 22:01:30 -0000 1.109 --- Bayes.pm 3 Mar 2003 23:14:37 -0000 1.110 *************** *** 110,113 **** --- 110,122 ---- my ( $self ) = @_; + # Subject modification (global setting is on) + $self->config_( 'subject', 1 ); + + # Adding the X-Text-Classification on + $self->config_( 'xtc', 1 ); + + # Adding the X-POPFile-Link is no + $self->config_( 'xpl', 1 ); + # No default unclassified probability $self->config_( 'unclassified_probability', 0 ); *************** *** 525,529 **** # If the user has not defined any buckets then we escape here return unclassified ! return "unclassified " if ( $#buckets == -1 ); # The score hash will contain the likelihood that the given message is in each --- 534,538 ---- # If the user has not defined any buckets then we escape here return unclassified ! return "unclassified" if ( $#buckets == -1 ); # The score hash will contain the likelihood that the given message is in each *************** *** 560,564 **** # Switching from using *= to += and using the log of every probability instead ! foreach my $word (keys %{$self->{parser__}->{words}}) { my $wmax = -10000; if ($self->{wordscores__}) { --- 569,573 ---- # Switching from using *= to += and using the log of every probability instead ! foreach my $word (keys %{$self->{parser__}->{words__}}) { my $wmax = -10000; if ($self->{wordscores__}) { *************** *** 576,580 **** # and we multiply by the number of times that the word occurs ! $score{$bucket} += ( $probability * $self->{parser__}{words}{$word} ); if ($self->{wordscores__}) { $wtprob{$word} += exp($probability); --- 585,589 ---- # and we multiply by the number of times that the word occurs ! $score{$bucket} += ( $probability * $self->{parser__}{words__}{$word} ); if ($self->{wordscores__}) { $wtprob{$word} += exp($probability); *************** *** 584,590 **** if ($wmax > $self->{not_likely__}) { ! $correction += ($wmax - $logbuck) * $self->{parser__}{words}{$word}; } else { ! $correction += $wmax * $self->{parser__}{words}{$word}; } $wordprob{$word} = exp($wmax); --- 593,599 ---- if ($wmax > $self->{not_likely__}) { ! $correction += ($wmax - $logbuck) * $self->{parser__}{words__}{$word}; } else { ! $correction += $wmax * $self->{parser__}{words__}{$word}; } $wordprob{$word} = exp($wmax); *************** *** 713,718 **** my $getting_headers = 1; ! my $temp_file = "$self->config_( 'msgdir' )popfile$dcount" . "=$mcount.msg"; ! my $class_file = "$self->config_( 'msgdir' )popfile$dcount" . "=$mcount.cls"; open TEMP, ">$temp_file"; --- 722,727 ---- my $getting_headers = 1; ! my $temp_file = $self->global_config_( 'msgdir' ) . "popfile$dcount" . "=$mcount.msg"; ! my $class_file = $self->global_config_( 'msgdir' ) . "popfile$dcount" . "=$mcount.cls"; open TEMP, ">$temp_file"; |