From: <ssc...@us...> - 2003-02-27 09:49:41
|
Update of /cvsroot/popfile/engine/Classifier In directory sc8-pr-cvs1:/tmp/cvs-serv3131/Classifier Modified Files: MailParse.pm Log Message: fixed possible handle leak in archive code. made spaced word detector more aggressive at word edges and updated spaced tests Index: MailParse.pm =================================================================== RCS file: /cvsroot/popfile/engine/Classifier/MailParse.pm,v retrieving revision 1.90 retrieving revision 1.91 diff -C2 -d -r1.90 -r1.91 *** MailParse.pm 25 Feb 2003 02:04:12 -0000 1.90 --- MailParse.pm 27 Feb 2003 09:49:38 -0000 1.91 *************** *** 257,265 **** # without using the encoded content printer or modifying $self->{ut} ! foreach my $space (' ', '\'', '*', '^', '`' ){ ! while ( $line =~ s/ (([A-Z]\Q$space\E){2,15}[A-Z])( |\Q$space\E|[!\?])/ /i ) { ! my $word = $1; $word =~ s/\Q$space\E//g; ! update_word( $self, $word, $encoded, ' ', ' ', '' ); increment_word( $self, 'trick:spacedout' ); } --- 257,267 ---- # without using the encoded content printer or modifying $self->{ut} ! foreach my $space (' ', '\'', '*', '^', '`', ' ', '\38' ){ ! while ( $line =~ s/( |^)(([A-Z]\Q$space\E){2,15}[A-Z])( |\Q$space\E|[!\?])/ /i ) { ! my $word = $2; ! print "$word ->" if $self->{debug}; $word =~ s/\Q$space\E//g; ! print "$word\n" if $self->{debug}; ! update_word( $self, $word, $encoded, ' ', ' ', $prefix); increment_word( $self, 'trick:spacedout' ); } |