Update of /cvsroot/popfile/engine/Classifier
In directory sc8-pr-cvs1:/tmp/cvs-serv22967/Classifier
Modified Files:
MailParse.pm
Log Message:
Only compile the HTML tag removal expressions once
Index: MailParse.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Classifier/MailParse.pm,v
retrieving revision 1.159
retrieving revision 1.160
diff -C2 -d -r1.159 -r1.160
*** MailParse.pm 5 Sep 2003 20:38:10 -0000 1.159
--- MailParse.pm 11 Sep 2003 08:03:20 -0000 1.160
***************
*** 944,948 ****
# Content-Type header and only process mails of type text/html.
! while ( $line =~ s/(<\/?(?!(?:$spacing_tags|$non_spacing_tags)\W)[a-z0-9]+(?:\s+.*?)?\/?>)//i ) {
$self->update_pseudoword( 'html', 'invalidtag', $encoded, $1 );
print "html:invalidtag: $1\n" if $self->{debug};
--- 944,948 ----
# Content-Type header and only process mails of type text/html.
! while ( $line =~ s/(<\/?(?!(?:$spacing_tags|$non_spacing_tags)\W)[a-z0-9]+(?:\s+.*?)?\/?>)//io ) {
$self->update_pseudoword( 'html', 'invalidtag', $encoded, $1 );
print "html:invalidtag: $1\n" if $self->{debug};
***************
*** 954,958 ****
# FIXME: What about combined open and close tags such as <b />?
! while ( $line =~s/(<($non_spacing_tags)(?:\s+[^>]*?)?><\/\2>)//i ) {
$self->update_pseudoword( 'html', 'emptypair', $encoded, $1 );
print "html:emptypair: $1\n" if $self->{debug};
--- 954,958 ----
# FIXME: What about combined open and close tags such as <b />?
! while ( $line =~s/(<($non_spacing_tags)(?:\s+[^>]*?)?><\/\2>)//io ) {
$self->update_pseudoword( 'html', 'emptypair', $encoded, $1 );
print "html:emptypair: $1\n" if $self->{debug};
|