From: <jgr...@us...> - 2003-02-27 00:40:19
|
Update of /cvsroot/popfile/engine/Classifier In directory sc8-pr-cvs1:/tmp/cvs-serv24532/Classifier Modified Files: Bayes.pm Log Message: Walk through and make sure that everytime we use a variable inside a regular expression we surround it with \Q and \E to prevent special characters inside the variable Index: Bayes.pm =================================================================== RCS file: /cvsroot/popfile/engine/Classifier/Bayes.pm,v retrieving revision 1.103 retrieving revision 1.104 diff -C2 -d -r1.103 -r1.104 *** Bayes.pm 24 Feb 2003 23:40:19 -0000 1.103 --- Bayes.pm 27 Feb 2003 00:40:12 -0000 1.104 *************** *** 290,299 **** if ( $word ne '' ) { ! $word =~ /^(.)/; ! my $i = ord($1); ! $self->{matrix}{$bucket}[$i] = '' if ( !defined($self->{matrix}{$bucket}[$i]) ); ! $self->{matrix}{$bucket}[$i] .= "|$word $value|" if ( ( $self->{matrix}{$bucket}[$i] =~ s/\|\Q$word\E (L?[\-\.\d]+)\|/\|$word $value\|/ ) == 0 ); ! } } --- 290,299 ---- if ( $word ne '' ) { ! $word =~ /^(.)/; ! my $i = ord($1); ! $self->{matrix}{$bucket}[$i] = '' if ( !defined($self->{matrix}{$bucket}[$i]) ); ! $self->{matrix}{$bucket}[$i] .= "|$word $value|" if ( ( $self->{matrix}{$bucket}[$i] =~ s/\|\Q$word\E (L?[\-\.\d]+)\|/\|$word $value\|/ ) == 0 ); ! } } *************** *** 373,381 **** update_constants($self); ! # unclassified will always have the color black, note that unclassified is not ! # actually a bucket ! ! $self->{colors}{unclassified} = 'black'; ! print "Corpus loaded with $self->{full_total} entries\n" if $self->{debug}; } --- 373,381 ---- update_constants($self); ! # unclassified will always have the color black, note that unclassified is not ! # actually a bucket ! ! $self->{colors}{unclassified} = 'black'; ! print "Corpus loaded with $self->{full_total} entries\n" if $self->{debug}; } *************** *** 429,448 **** if ( /^([^ ]+) (.+)$/ ) { ! my $type = $1; ! my $value = $2; ! $value =~ s/\\(\?|\*|\||\(|\)|\[|\]|\{|\}|\^|\$|\.)/$1/g; $self->{magnets}{$bucket}{$type}{$value} = 1; } else { ! # This branch is used to catch the original magnets in an ! # old version of POPFile that were just there for from ! # addresses only ! if ( /^(.+)$/ ) { ! my $value = $1; ! $value =~ s/\\(\?|\*|\||\(|\)|\[|\]|\{|\}|\^|\$|\.)/$1/g; ! $self->{magnets}{$bucket}{from}{$1} = 1; ! } } } --- 429,448 ---- if ( /^([^ ]+) (.+)$/ ) { ! my $type = $1; ! my $value = $2; ! $value =~ s/\\(\?|\*|\||\(|\)|\[|\]|\{|\}|\^|\$|\.)/$1/g; $self->{magnets}{$bucket}{$type}{$value} = 1; } else { ! # This branch is used to catch the original magnets in an ! # old version of POPFile that were just there for from ! # addresses only ! if ( /^(.+)$/ ) { ! my $value = $1; ! $value =~ s/\\(\?|\*|\||\(|\)|\[|\]|\{|\}|\^|\$|\.)/$1/g; ! $self->{magnets}{$bucket}{from}{$1} = 1; ! } } } *************** *** 538,544 **** for my $type (sort keys %{$self->{magnets}{$bucket}}) { ! # You cannot use @ or $ inside a \Q\E regular expression and hence ! # we have to change the $magnet and the text we are comparing against ! # by changing the $ and @ signs to . my $noattype; --- 538,544 ---- for my $type (sort keys %{$self->{magnets}{$bucket}}) { ! # You cannot use @ or $ inside a \Q\E regular expression and hence ! # we have to change the $magnet and the text we are comparing against ! # by changing the $ and @ signs to . my $noattype; *************** *** 547,551 **** $noattype =~ s/[@\$]/\./g; ! for my $magnet (sort keys %{$self->{magnets}{$bucket}{$type}}) { my $regex; --- 547,551 ---- $noattype =~ s/[@\$]/\./g; ! for my $magnet (sort keys %{$self->{magnets}{$bucket}{$type}}) { my $regex; *************** *** 699,703 **** if ( ( $total != 0 ) && ( $score{$ranking[0]} > log($self->{unclassified} * $total) ) ) { ! $class = $ranking[0]; } --- 699,703 ---- if ( ( $total != 0 ) && ( $score{$ranking[0]} > log($self->{unclassified} * $total) ) ) { ! $class = $ranking[0]; } *************** *** 761,775 **** while ( <$mail> ) { my $line; ! my $fileline; $line = $_; ! # This is done so that we remove the network style end of line CR LF ! # and allow Perl to decide on the local system EOL which it will expand ! # out of \n when this gets written to the temp file ! ! $fileline = $line; ! $fileline =~ s/[\r\n]//g; ! $fileline .= "\n"; # Check for an abort --- 761,775 ---- while ( <$mail> ) { my $line; ! my $fileline; $line = $_; ! # This is done so that we remove the network style end of line CR LF ! # and allow Perl to decide on the local system EOL which it will expand ! # out of \n when this gets written to the temp file ! ! $fileline = $line; ! $fileline =~ s/[\r\n]//g; ! $fileline .= "\n"; # Check for an abort *************** *** 835,839 **** if ( $self->{configuration}->{configuration}{subject} ) { # Don't add the classification unless it is not present ! if ( !( $msg_subject =~ /\[$classification\]/ ) && ( $self->{parameters}{$classification}{subject} == 1 ) && ( $self->{parameters}{$classification}{quarantine} == 0 ) ) { --- 835,839 ---- if ( $self->{configuration}->{configuration}{subject} ) { # Don't add the classification unless it is not present ! if ( !( $msg_subject =~ /\[\Q$classification\E\]/ ) && ( $self->{parameters}{$classification}{subject} == 1 ) && ( $self->{parameters}{$classification}{quarantine} == 0 ) ) { *************** *** 852,856 **** # Add the XTC header $msg_head_after .= "X-Text-Classification: $classification$eol" if ( ( $self->{configuration}->{configuration}{xtc} ) && ! ( $self->{parameters}{$classification}{quarantine} == 0 ) ); # Add the XPL header --- 852,856 ---- # Add the XTC header $msg_head_after .= "X-Text-Classification: $classification$eol" if ( ( $self->{configuration}->{configuration}{xtc} ) && ! ( $self->{parameters}{$classification}{quarantine} == 0 ) ); # Add the XPL header *************** *** 881,891 **** print $client "To: $self->{parser}->{to}$eol"; print $client "Date: $self->{parser}->{date}$eol"; ! if ( $self->{configuration}->{configuration}{subject} ) { ! # Don't add the classification unless it is not present ! if ( !( $msg_subject =~ /\[$classification\]/ ) && ! ( $self->{parameters}{$classification}{subject} == 1 ) ) { ! $msg_subject = " [$classification]$msg_subject"; ! } ! } print $client "Subject:$msg_subject$eol"; print $client "X-Text-Classification: $classification$eol" if ( $self->{configuration}->{configuration}{xtc} ); --- 881,891 ---- print $client "To: $self->{parser}->{to}$eol"; print $client "Date: $self->{parser}->{date}$eol"; ! if ( $self->{configuration}->{configuration}{subject} ) { ! # Don't add the classification unless it is not present ! if ( !( $msg_subject =~ /\[\Q$classification\E\]/ ) && ! ( $self->{parameters}{$classification}{subject} == 1 ) ) { ! $msg_subject = " [$classification]$msg_subject"; ! } ! } print $client "Subject:$msg_subject$eol"; print $client "X-Text-Classification: $classification$eol" if ( $self->{configuration}->{configuration}{xtc} ); |