Update of /cvsroot/popfile/engine/Classifier
In directory sc8-pr-cvs1:/tmp/cvs-serv31512/Classifier
Modified Files:
Bayes.pm
Log Message:
Merge helphand patch that prevents various unintialized variable warnings, greatly improves URL encoding and makes loading of the corpus safer so that corrupted information in the corpus is discarded
Index: Bayes.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Classifier/Bayes.pm,v
retrieving revision 1.99
retrieving revision 1.100
diff -C2 -d -r1.99 -r1.100
*** Bayes.pm 19 Feb 2003 19:51:42 -0000 1.99
--- Bayes.pm 20 Feb 2003 16:25:53 -0000 1.100
***************
*** 464,468 ****
}
! if ( /(.+) (.+)/ ) {
my $word = $self->{mangler}->mangle($1,1);
my $value = $2;
--- 464,468 ----
}
! if ( /([^\s]+) (\d+)/ ) {
my $word = $self->{mangler}->mangle($1,1);
my $value = $2;
***************
*** 538,555 ****
for my $type (sort keys %{$self->{magnets}{$bucket}}) {
! # You cannot use @ or $ inside a \Q\E regular expression and hence
! # we have to change the $magnet and the text we are comparing against
! # by changing the $ and @ signs to .
! my $noattype;
! $noattype = $self->{parser}->{$type};
! $noattype =~ s/[@\$]/\./g;
! for my $magnet (sort keys %{$self->{magnets}{$bucket}{$type}}) {
! my $regex;
! $regex = $magnet;
! $regex =~ s/[@\$]/\./g;
if ( $noattype =~ m/\Q$regex\E/i ) {
--- 538,555 ----
for my $type (sort keys %{$self->{magnets}{$bucket}}) {
! # You cannot use @ or $ inside a \Q\E regular expression and hence
! # we have to change the $magnet and the text we are comparing against
! # by changing the $ and @ signs to .
! my $noattype;
! $noattype = $self->{parser}->{$type};
! $noattype =~ s/[@\$]/\./g;
! for my $magnet (sort keys %{$self->{magnets}{$bucket}{$type}}) {
! my $regex;
! $regex = $magnet;
! $regex =~ s/[@\$]/\./g;
if ( $noattype =~ m/\Q$regex\E/i ) {
|