Update of /cvsroot/popfile/engine/Classifier
In directory sc8-pr-cvs1:/tmp/cvs-serv4955/Classifier
Modified Files:
Bayes.pm MailParse.pm
Log Message:
Fix a bug where the HTML parser was NOT turning CR/LF into whitespace and update the test suite
Index: Bayes.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Classifier/Bayes.pm,v
retrieving revision 1.209
retrieving revision 1.210
diff -C2 -d -r1.209 -r1.210
*** Bayes.pm 13 Oct 2003 20:23:40 -0000 1.209
--- Bayes.pm 14 Oct 2003 16:15:26 -0000 1.210
***************
*** 1098,1105 ****
if ($prob >= .999999) {
$probstr = sprintf("%12.6f", 0.999999);
- } elsif ($prob >= 0.1 || $prob == 0.0) {
- $probstr = sprintf("%12.6f", $prob);
} else {
! $probstr = sprintf("%17.6e", $prob);
}
--- 1098,1107 ----
if ($prob >= .999999) {
$probstr = sprintf("%12.6f", 0.999999);
} else {
! if ($prob >= 0.1 || $prob == 0.0) {
! $probstr = sprintf("%12.6f", $prob);
! } else {
! $probstr = sprintf("%17.6e", $prob);
! }
}
***************
*** 1188,1195 ****
if ($self->{wmformat__} eq 'score') {
$wordprobstr = sprintf("%12.4f", ($probability - $self->{not_likely__})/$log10 );
- } elsif ($self->{wmformat__} eq 'prob') {
- $wordprobstr = sprintf("%12.4f", $wordprobs{$bucket,$word});
} else {
! $wordprobstr = sprintf("%13.5f", exp($probability) );
}
--- 1190,1199 ----
if ($self->{wmformat__} eq 'score') {
$wordprobstr = sprintf("%12.4f", ($probability - $self->{not_likely__})/$log10 );
} else {
! if ($self->{wmformat__} eq 'prob') {
! $wordprobstr = sprintf("%12.4f", $wordprobs{$bucket,$word});
! } else {
! $wordprobstr = sprintf("%13.5f", exp($probability) );
! }
}
***************
*** 2003,2008 ****
foreach my $word (keys %{$self->{parser__}->{words__}}) {
! $self->set_value_( $bucket, $word, $subtract * $self->{parser__}->{words__}{$word} +
! $self->get_base_value_( $bucket, $word ) );
}
}
--- 2007,2012 ----
foreach my $word (keys %{$self->{parser__}->{words__}}) {
! $self->set_value_( $bucket, $word, $subtract * $self->{parser__}->{words__}{$word} + # PROFILE BLOCK START
! $self->get_base_value_( $bucket, $word ) ); # PROFILE BLOCK STOP
}
}
Index: MailParse.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Classifier/MailParse.pm,v
retrieving revision 1.168
retrieving revision 1.169
diff -C2 -d -r1.168 -r1.169
*** MailParse.pm 10 Oct 2003 14:59:32 -0000 1.168
--- MailParse.pm 14 Oct 2003 16:15:26 -0000 1.169
***************
*** 1012,1016 ****
my $found = 1;
! $line =~ s/[\r\n]+//gm;
print "parse_html: [$line] " . $self->{in_html_tag__} . "\n" if $self->{debug__};
--- 1012,1016 ----
my $found = 1;
! $line =~ s/[\r\n]+/ /gm;
print "parse_html: [$line] " . $self->{in_html_tag__} . "\n" if $self->{debug__};
|