From: <jgr...@us...> - 2003-02-20 16:25:57
|
Update of /cvsroot/popfile/engine/Classifier In directory sc8-pr-cvs1:/tmp/cvs-serv31512/Classifier Modified Files: Bayes.pm Log Message: Merge helphand patch that prevents various unintialized variable warnings, greatly improves URL encoding and makes loading of the corpus safer so that corrupted information in the corpus is discarded Index: Bayes.pm =================================================================== RCS file: /cvsroot/popfile/engine/Classifier/Bayes.pm,v retrieving revision 1.99 retrieving revision 1.100 diff -C2 -d -r1.99 -r1.100 *** Bayes.pm 19 Feb 2003 19:51:42 -0000 1.99 --- Bayes.pm 20 Feb 2003 16:25:53 -0000 1.100 *************** *** 464,468 **** } ! if ( /(.+) (.+)/ ) { my $word = $self->{mangler}->mangle($1,1); my $value = $2; --- 464,468 ---- } ! if ( /([^\s]+) (\d+)/ ) { my $word = $self->{mangler}->mangle($1,1); my $value = $2; *************** *** 538,555 **** for my $type (sort keys %{$self->{magnets}{$bucket}}) { ! # You cannot use @ or $ inside a \Q\E regular expression and hence ! # we have to change the $magnet and the text we are comparing against ! # by changing the $ and @ signs to . ! my $noattype; ! $noattype = $self->{parser}->{$type}; ! $noattype =~ s/[@\$]/\./g; ! for my $magnet (sort keys %{$self->{magnets}{$bucket}{$type}}) { ! my $regex; ! $regex = $magnet; ! $regex =~ s/[@\$]/\./g; if ( $noattype =~ m/\Q$regex\E/i ) { --- 538,555 ---- for my $type (sort keys %{$self->{magnets}{$bucket}}) { ! # You cannot use @ or $ inside a \Q\E regular expression and hence ! # we have to change the $magnet and the text we are comparing against ! # by changing the $ and @ signs to . ! my $noattype; ! $noattype = $self->{parser}->{$type}; ! $noattype =~ s/[@\$]/\./g; ! for my $magnet (sort keys %{$self->{magnets}{$bucket}{$type}}) { ! my $regex; ! $regex = $magnet; ! $regex =~ s/[@\$]/\./g; if ( $noattype =~ m/\Q$regex\E/i ) { |