From: naoki i. <am...@us...> - 2007-08-12 13:27:33
|
Update of /cvsroot/popfile/engine/Classifier In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv30485/Classifier Modified Files: MailParse.pm Log Message: performance improvement for handling spaced_out trick Index: MailParse.pm =================================================================== RCS file: /cvsroot/popfile/engine/Classifier/MailParse.pm,v retrieving revision 1.226 retrieving revision 1.227 diff -C2 -d -r1.226 -r1.227 *** MailParse.pm 2 Dec 2006 17:45:02 -0000 1.226 --- MailParse.pm 12 Aug 2007 13:27:34 -0000 1.227 *************** *** 640,653 **** # Deal with runs of alternating spaces and letters ! foreach my $space (' ', '\'', '*', '^', '`', ' ', '\38', '.' ){ ! while ( $line =~ s/( |^)(([A-Z]\Q$space\E){2,15}[A-Z])( |\Q$space\E|[!\?,])/ /i ) { ! my $original = "$1$2$4"; ! my $word = $2; ! print "$word ->" if $self->{debug__}; ! $word =~ s/[^A-Z]//gi; ! print "$word\n" if $self->{debug__}; ! $self->update_word( $word, $encoded, ' ', ' ', $prefix); ! $self->update_pseudoword( 'trick', 'spacedout', $encoded, $original ); ! } } --- 640,651 ---- # Deal with runs of alternating spaces and letters ! while ( $line =~ s/( |^)([A-Za-z]([\'\*^`&\. ]| )(?:[A-Za-z]\3){1,14}[A-Za-z])( |\3|[!\?,]|$)/ / ) { ! my $original = "$1$2$4"; ! my $word = $2; ! print "$word ->" if $self->{debug__}; ! $word =~ s/[^A-Z]//gi; ! print "$word\n" if $self->{debug__}; ! $self->update_word( $word, $encoded, ' ', ' ', $prefix); ! $self->update_pseudoword( 'trick', 'spacedout', $encoded, $original ); } |