Update of /cvsroot/popfile/engine/Classifier
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv30485/Classifier
Modified Files:
MailParse.pm
Log Message:
performance improvement for handling spaced_out trick
Index: MailParse.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Classifier/MailParse.pm,v
retrieving revision 1.226
retrieving revision 1.227
diff -C2 -d -r1.226 -r1.227
*** MailParse.pm 2 Dec 2006 17:45:02 -0000 1.226
--- MailParse.pm 12 Aug 2007 13:27:34 -0000 1.227
***************
*** 640,653 ****
# Deal with runs of alternating spaces and letters
! foreach my $space (' ', '\'', '*', '^', '`', ' ', '\38', '.' ){
! while ( $line =~ s/( |^)(([A-Z]\Q$space\E){2,15}[A-Z])( |\Q$space\E|[!\?,])/ /i ) {
! my $original = "$1$2$4";
! my $word = $2;
! print "$word ->" if $self->{debug__};
! $word =~ s/[^A-Z]//gi;
! print "$word\n" if $self->{debug__};
! $self->update_word( $word, $encoded, ' ', ' ', $prefix);
! $self->update_pseudoword( 'trick', 'spacedout', $encoded, $original );
! }
}
--- 640,651 ----
# Deal with runs of alternating spaces and letters
! while ( $line =~ s/( |^)([A-Za-z]([\'\*^`&\. ]| )(?:[A-Za-z]\3){1,14}[A-Za-z])( |\3|[!\?,]|$)/ / ) {
! my $original = "$1$2$4";
! my $word = $2;
! print "$word ->" if $self->{debug__};
! $word =~ s/[^A-Z]//gi;
! print "$word\n" if $self->{debug__};
! $self->update_word( $word, $encoded, ' ', ' ', $prefix);
! $self->update_pseudoword( 'trick', 'spacedout', $encoded, $original );
}
|