|
From: <jgr...@us...> - 2003-06-09 18:33:13
|
Update of /cvsroot/popfile/engine/Classifier
In directory sc8-pr-cvs1:/tmp/cvs-serv21085/Classifier
Modified Files:
Bayes.pm
Log Message:
Add code to check for bad entries in the corpus and ignore them
Index: Bayes.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Classifier/Bayes.pm,v
retrieving revision 1.148
retrieving revision 1.149
diff -C2 -d -r1.148 -r1.149
*** Bayes.pm 3 Jun 2003 08:27:10 -0000 1.148
--- Bayes.pm 9 Jun 2003 18:33:10 -0000 1.149
***************
*** 464,471 ****
}
! if ( /([^\s]+) (\d+)/ ) {
my $word = $1;
my $value = $2;
- $value =~ s/[\r\n]//g;
if ( $value > 0 ) {
$self->{total__}{$bucket} += $value;
--- 464,472 ----
}
! s/[\r\n]//g;
!
! if ( /^([^\s]+) (\d+)$/ ) {
my $word = $1;
my $value = $2;
if ( $value > 0 ) {
$self->{total__}{$bucket} += $value;
***************
*** 473,476 ****
--- 474,479 ----
set_value_( $self, $bucket, $word, $value );
}
+ } else {
+ $self->log_( "Found entry in corpus for $bucket that looks wrong: \"$_\" (ignoring)" );
}
}
***************
*** 1355,1362 ****
}
! if ( /([^\s]+) (\d+)/ ) {
my $word = $1;
my $value = $2;
- $value =~ s/[\r\n]//g;
if ( $value > 0 ) {
$words{$word} = $value;
--- 1358,1366 ----
}
! s/[\r\n]//g;
!
! if ( /^([^\s]+) (\d+)$/ ) {
my $word = $1;
my $value = $2;
if ( $value > 0 ) {
$words{$word} = $value;
***************
*** 1413,1420 ****
}
! if ( /([^\s]+) (\d+)/ ) {
my $word = $1;
my $value = $2;
- $value =~ s/[\r\n]//g;
if ( $value > 0 ) {
$words{$word} = $value;
--- 1417,1425 ----
}
! s/[\r\n]//g;
!
! if ( /^([^\s]+) (\d+)$/ ) {
my $word = $1;
my $value = $2;
if ( $value > 0 ) {
$words{$word} = $value;
|