From: <ssc...@us...> - 2003-04-10 21:37:31
|
Update of /cvsroot/popfile/engine/Classifier In directory sc8-pr-cvs1:/tmp/cvs-serv11134 Modified Files: MailParse.pm Log Message: rework bug 696986 fix, prior fix caused UTF errors in Bayes.pm modify href parsing to detect protocol-less URL's Index: MailParse.pm =================================================================== RCS file: /cvsroot/popfile/engine/Classifier/MailParse.pm,v retrieving revision 1.106 retrieving revision 1.107 diff -C2 -d -r1.106 -r1.107 *** MailParse.pm 8 Apr 2003 03:16:44 -0000 1.106 --- MailParse.pm 10 Apr 2003 21:37:25 -0000 1.107 *************** *** 15,19 **** use MIME::Base64; use MIME::QuotedPrint; ! require Encode::MIME::Header; # HTML entity mapping to character codes, this maps things like & to their corresponding --- 15,19 ---- use MIME::Base64; use MIME::QuotedPrint; ! #require Encode::MIME::Header; # HTML entity mapping to character codes, this maps things like & to their corresponding *************** *** 405,422 **** if ( $attribute =~ /^href$/i && $tag =~ /^(a|link|base|area)$/i ) { ! # ftp, http, https ! ! if ( $value =~ /^(ftp|http|https):\/\//i ) { ! add_url($self, $value, $encoded, $quote, $end_quote, ''); ! next; ! } ! ! # The less common mailto: goes second, and we only care if this is in an anchor ! if ( $tag =~ /^a$/ && $value =~ /^mailto:([[:alpha:]0-9\-_\.]+?@([[:alpha:]0-9\-_\.]+?))([>\&\?\:\/]|$)/i ) { ! update_word( $self, $1, $encoded, 'mailto:', ($3?'[\\\>\&\?\:\/]':$end_quote), '' ); ! add_url( $self, $2, $encoded, '@', ($3?'[\\\&\?\:\/]':$end_quote), '' ); } ! next; } --- 405,422 ---- if ( $attribute =~ /^href$/i && $tag =~ /^(a|link|base|area)$/i ) { ! # Look for mailto:'s ! if ($value =~ /^mailto:/i) { ! if ( $tag =~ /^a$/ && $value =~ /^mailto:([[:alpha:]0-9\-_\.]+?@([[:alpha:]0-9\-_\.]+?))([>\&\?\:\/]|$)/i ) { ! update_word( $self, $1, $encoded, 'mailto:', ($3?'[\\\>\&\?\:\/]':$end_quote), '' ); ! add_url( $self, $2, $encoded, '@', ($3?'[\\\&\?\:\/]':$end_quote), '' ); ! } ! } else { ! # Anything that isn't a mailto is probably an URL ! ! $self->add_url($value, $encoded, $quote, $end_quote, ''); } ! ! next; } *************** *** 657,660 **** --- 657,661 ---- # $protocol $authinfo $host $port $query $hash may be processed below if desired + return 1; } *************** *** 1053,1073 **** # the original string with it later. Thus, this subroutine returns the real decoded result. ! my ( $self, $mystring ) = @_; ! ! $mystring = Encode::MIME::Header::decode($Encode::Encoding{'MIME-Header'},$mystring); ! #my $decode_it = ''; ! #while ( $mystring =~ /=\?[\w-]+\?B\?(.*)\?=/ig ) { ! # $decode_it = decode_base64( $1 ); ! # $mystring =~ s/=\?[\w-]+\?B\?(.*)\?=/$decode_it/i; ! #} ! #while ( $mystring =~ /=\?[\w-]+\?Q\?(.*)\?=/ig ) { ! # $decode_it = $1; ! # $decode_it =~ s/\_/=20/g; ! # $decode_it = decode_qp( $decode_it ); ! # $mystring =~ s/=\?[\w-]+\?Q\?(.*)\?=/$decode_it/i; ! # } ! return $mystring; } --- 1054,1073 ---- # the original string with it later. Thus, this subroutine returns the real decoded result. ! my ( $self, $mystring ) = @_; ! my $decode_it = ''; ! while ( $mystring =~ /=\?[\w-]+\?(B|Q)\?(.*)\?=/ig ) { ! if ($1 eq "B") { ! $decode_it = decode_base64( $2 ); ! $mystring =~ s/=\?[\w-]+\?B\?(.*)\?=/$decode_it/i; ! } elsif ($1 eq "Q") { ! $decode_it = $2; ! $decode_it =~ s/\_/=20/g; ! $decode_it = decode_qp( $decode_it ); ! $mystring =~ s/=\?[\w-]+\?Q\?(.*)\?=/$decode_it/i; ! } ! } ! return $mystring; } |