From: naoki i. <am...@us...> - 2007-08-12 13:24:36
|
Update of /cvsroot/popfile/engine/Classifier In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv29384/Classifier Modified Files: Tag: b0_22_2 MailParse.pm Log Message: performance improvement for handling spaced_out trick Index: MailParse.pm =================================================================== RCS file: /cvsroot/popfile/engine/Classifier/MailParse.pm,v retrieving revision 1.216.4.7 retrieving revision 1.216.4.8 diff -C2 -d -r1.216.4.7 -r1.216.4.8 *** MailParse.pm 23 Oct 2006 14:05:19 -0000 1.216.4.7 --- MailParse.pm 12 Aug 2007 13:24:33 -0000 1.216.4.8 *************** *** 622,635 **** # Deal with runs of alternating spaces and letters ! foreach my $space (' ', '\'', '*', '^', '`', ' ', '\38', '.' ){ ! while ( $line =~ s/( |^)(([A-Z]\Q$space\E){2,15}[A-Z])( |\Q$space\E|[!\?,])/ /i ) { ! my $original = "$1$2$4"; ! my $word = $2; ! print "$word ->" if $self->{debug__}; ! $word =~ s/[^A-Z]//gi; ! print "$word\n" if $self->{debug__}; ! $self->update_word( $word, $encoded, ' ', ' ', $prefix); ! $self->update_pseudoword( 'trick', 'spacedout', $encoded, $original ); ! } } --- 622,633 ---- # Deal with runs of alternating spaces and letters ! while ( $line =~ s/( |^)([A-Za-z]([\'\*^`&\. ]| )(?:[A-Za-z]\3){1,14}[A-Za-z])( |\3|[!\?,]|$)/ / ) { ! my $original = "$1$2$4"; ! my $word = $2; ! print "$word ->" if $self->{debug__}; ! $word =~ s/[^A-Z]//gi; ! print "$word\n" if $self->{debug__}; ! $self->update_word( $word, $encoded, ' ', ' ', $prefix); ! $self->update_pseudoword( 'trick', 'spacedout', $encoded, $original ); } |