Update of /cvsroot/popfile/engine/Classifier
In directory sc8-pr-cvs1:/tmp/cvs-serv3131/Classifier
Modified Files:
MailParse.pm
Log Message:
fixed possible handle leak in archive code. made spaced word detector more aggressive at word edges and updated spaced tests
Index: MailParse.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Classifier/MailParse.pm,v
retrieving revision 1.90
retrieving revision 1.91
diff -C2 -d -r1.90 -r1.91
*** MailParse.pm 25 Feb 2003 02:04:12 -0000 1.90
--- MailParse.pm 27 Feb 2003 09:49:38 -0000 1.91
***************
*** 257,265 ****
# without using the encoded content printer or modifying $self->{ut}
! foreach my $space (' ', '\'', '*', '^', '`' ){
! while ( $line =~ s/ (([A-Z]\Q$space\E){2,15}[A-Z])( |\Q$space\E|[!\?])/ /i ) {
! my $word = $1;
$word =~ s/\Q$space\E//g;
! update_word( $self, $word, $encoded, ' ', ' ', '' );
increment_word( $self, 'trick:spacedout' );
}
--- 257,267 ----
# without using the encoded content printer or modifying $self->{ut}
! foreach my $space (' ', '\'', '*', '^', '`', ' ', '\38' ){
! while ( $line =~ s/( |^)(([A-Z]\Q$space\E){2,15}[A-Z])( |\Q$space\E|[!\?])/ /i ) {
! my $word = $2;
! print "$word ->" if $self->{debug};
$word =~ s/\Q$space\E//g;
! print "$word\n" if $self->{debug};
! update_word( $self, $word, $encoded, ' ', ' ', $prefix);
increment_word( $self, 'trick:spacedout' );
}
|