|
From: <jgr...@us...> - 2003-09-05 21:18:56
|
Update of /cvsroot/popfile/engine/Classifier
In directory sc8-pr-cvs1:/tmp/cvs-serv5019/Classifier
Modified Files:
Bayes.pm
Log Message:
Default values for the dummy unsure bucket
Index: Bayes.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Classifier/Bayes.pm,v
retrieving revision 1.186
retrieving revision 1.187
diff -C2 -d -r1.186 -r1.187
*** Bayes.pm 4 Sep 2003 16:30:41 -0000 1.186
--- Bayes.pm 5 Sep 2003 21:18:51 -0000 1.187
***************
*** 407,419 ****
$self->{colors__}{unclassified} = 'black';
# SLM for unclassified "bucket" will always match the global setting
$self->{parameters__}{unclassified}{subject} = $self->global_config_('subject');
# Quarantine for unclassified will be off:
$self->{parameters__}{unclassified}{quarantine} = 0;
!
}
--- 407,421 ----
$self->{colors__}{unclassified} = 'black';
+ $self->{colors__}{unsure} = 'black';
# SLM for unclassified "bucket" will always match the global setting
$self->{parameters__}{unclassified}{subject} = $self->global_config_('subject');
+ $self->{parameters__}{unsure}{subject} = $self->global_config_('subject');
# Quarantine for unclassified will be off:
$self->{parameters__}{unclassified}{quarantine} = 0;
! $self->{parameters__}{unsure}{quarantine} = 0;
}
***************
*** 608,612 ****
my $term = $sum;
! for my $i (1..$free/2) {
$term *= $m / $i;
$sum += $term;
--- 610,614 ----
my $term = $sum;
! for my $i (1..($free/2-1)) {
$term *= $m / $i;
$sum += $term;
***************
*** 697,701 ****
--- 699,706 ----
# P(word|bucket) ^ word count and multiply to the score
+ my $word_count = 0;
+
foreach my $word (keys %{$self->{parser__}->{words__}}) {
+ $word_count += 2;
my $wmax = -10000;
***************
*** 719,723 ****
foreach my $bucket (@buckets) {
! $chi{$bucket} = chi2( $score{$bucket}, 2 * $matchcount{$bucket}, -int($score{$ranking[0]}/log(10)) * log(10) );
}
--- 724,728 ----
foreach my $bucket (@buckets) {
! $chi{$bucket} = chi2( $score{$bucket}, $word_count, -int($score{$ranking[0]}/log(10)) * log(10) );
}
***************
*** 785,789 ****
}
! $self->{scores__} .= "<hr><b>$language{Scores}</b><p>\n<b>Verdict: <font color=\"$self->{colors__}{$class}\">$class ($certainty)</font></b><p>\n<table class=\"top20Words\">\n<tr>\n<th scope=\"col\">$language{Bucket}</th>\n<th> </th>\n";
$self->{scores__} .= "<th scope=\"col\">$language{Count} </th><th scope=\"col\">$language{Probability}</th></tr>\n";
--- 790,794 ----
}
! $self->{scores__} .= "<hr><b>$language{Scores}</b><p>\n<b>Verdict: <font color=\"$self->{colors__}{$class}\">$class ($certainty $chi{$ranking[0]} $chi{$ranking[1]})</font></b><p>\n<table class=\"top20Words\">\n<tr>\n<th scope=\"col\">$language{Bucket}</th>\n<th> </th>\n";
$self->{scores__} .= "<th scope=\"col\">$language{Count} </th><th scope=\"col\">$language{Probability}</th></tr>\n";
***************
*** 1160,1164 ****
# Add the Subject line modification or the original line back again
! if ( $classification ne 'unclassified' ) {
if ( $self->global_config_( 'subject' ) ) {
# Don't add the classification unless it is not present
--- 1165,1169 ----
# Add the Subject line modification or the original line back again
! if ( ( $classification ne 'unclassified' ) && ( $classification ne 'unsure' ) ) {
if ( $self->global_config_( 'subject' ) ) {
# Don't add the classification unless it is not present
***************
*** 1198,1202 ****
# information from POPFile and wrapping the original message in a MIME encoding
! if ( $classification ne 'unclassified' ) {
if ( $self->{parameters__}{$classification}{quarantine} == 1 ) {
print $client "From: " . $self->{parser__}->get_header( 'from' ) . "$eol";
--- 1203,1207 ----
# information from POPFile and wrapping the original message in a MIME encoding
! if ( ( $classification ne 'unclassified' ) && ( $classification ne 'unsure' ) ) {
if ( $self->{parameters__}{$classification}{quarantine} == 1 ) {
print $client "From: " . $self->{parser__}->get_header( 'from' ) . "$eol";
***************
*** 1236,1240 ****
my $before_dot = '';
! if ( $classification ne 'unclassified' ) {
if ( ( $self->{parameters__}{$classification}{quarantine} == 1 ) && $echo ) {
$before_dot = "$eol--$nopath_temp_file--$eol";
--- 1241,1245 ----
my $before_dot = '';
! if ( ( $classification ne 'unclassified' ) && ( $classification ne 'unsure' ) ) {
if ( ( $self->{parameters__}{$classification}{quarantine} == 1 ) && $echo ) {
$before_dot = "$eol--$nopath_temp_file--$eol";
|