Update of /cvsroot/popfile/engine/Classifier
In directory sc8-pr-cvs1:/tmp/cvs-serv11134
Modified Files:
MailParse.pm
Log Message:
rework bug 696986 fix, prior fix caused UTF errors in Bayes.pm
modify href parsing to detect protocol-less URL's
Index: MailParse.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Classifier/MailParse.pm,v
retrieving revision 1.106
retrieving revision 1.107
diff -C2 -d -r1.106 -r1.107
*** MailParse.pm 8 Apr 2003 03:16:44 -0000 1.106
--- MailParse.pm 10 Apr 2003 21:37:25 -0000 1.107
***************
*** 15,19 ****
use MIME::Base64;
use MIME::QuotedPrint;
! require Encode::MIME::Header;
# HTML entity mapping to character codes, this maps things like & to their corresponding
--- 15,19 ----
use MIME::Base64;
use MIME::QuotedPrint;
! #require Encode::MIME::Header;
# HTML entity mapping to character codes, this maps things like & to their corresponding
***************
*** 405,422 ****
if ( $attribute =~ /^href$/i && $tag =~ /^(a|link|base|area)$/i ) {
! # ftp, http, https
!
! if ( $value =~ /^(ftp|http|https):\/\//i ) {
! add_url($self, $value, $encoded, $quote, $end_quote, '');
! next;
! }
!
! # The less common mailto: goes second, and we only care if this is in an anchor
! if ( $tag =~ /^a$/ && $value =~ /^mailto:([[:alpha:]0-9\-_\.]+?@([[:alpha:]0-9\-_\.]+?))([>\&\?\:\/]|$)/i ) {
! update_word( $self, $1, $encoded, 'mailto:', ($3?'[\\\>\&\?\:\/]':$end_quote), '' );
! add_url( $self, $2, $encoded, '@', ($3?'[\\\&\?\:\/]':$end_quote), '' );
}
! next;
}
--- 405,422 ----
if ( $attribute =~ /^href$/i && $tag =~ /^(a|link|base|area)$/i ) {
! # Look for mailto:'s
! if ($value =~ /^mailto:/i) {
! if ( $tag =~ /^a$/ && $value =~ /^mailto:([[:alpha:]0-9\-_\.]+?@([[:alpha:]0-9\-_\.]+?))([>\&\?\:\/]|$)/i ) {
! update_word( $self, $1, $encoded, 'mailto:', ($3?'[\\\>\&\?\:\/]':$end_quote), '' );
! add_url( $self, $2, $encoded, '@', ($3?'[\\\&\?\:\/]':$end_quote), '' );
! }
! } else {
! # Anything that isn't a mailto is probably an URL
!
! $self->add_url($value, $encoded, $quote, $end_quote, '');
}
!
! next;
}
***************
*** 657,660 ****
--- 657,661 ----
# $protocol $authinfo $host $port $query $hash may be processed below if desired
+ return 1;
}
***************
*** 1053,1073 ****
# the original string with it later. Thus, this subroutine returns the real decoded result.
! my ( $self, $mystring ) = @_;
!
! $mystring = Encode::MIME::Header::decode($Encode::Encoding{'MIME-Header'},$mystring);
! #my $decode_it = '';
! #while ( $mystring =~ /=\?[\w-]+\?B\?(.*)\?=/ig ) {
! # $decode_it = decode_base64( $1 );
! # $mystring =~ s/=\?[\w-]+\?B\?(.*)\?=/$decode_it/i;
! #}
! #while ( $mystring =~ /=\?[\w-]+\?Q\?(.*)\?=/ig ) {
! # $decode_it = $1;
! # $decode_it =~ s/\_/=20/g;
! # $decode_it = decode_qp( $decode_it );
! # $mystring =~ s/=\?[\w-]+\?Q\?(.*)\?=/$decode_it/i;
! # }
!
return $mystring;
}
--- 1054,1073 ----
# the original string with it later. Thus, this subroutine returns the real decoded result.
! my ( $self, $mystring ) = @_;
! my $decode_it = '';
! while ( $mystring =~ /=\?[\w-]+\?(B|Q)\?(.*)\?=/ig ) {
! if ($1 eq "B") {
! $decode_it = decode_base64( $2 );
! $mystring =~ s/=\?[\w-]+\?B\?(.*)\?=/$decode_it/i;
! } elsif ($1 eq "Q") {
! $decode_it = $2;
! $decode_it =~ s/\_/=20/g;
! $decode_it = decode_qp( $decode_it );
! $mystring =~ s/=\?[\w-]+\?Q\?(.*)\?=/$decode_it/i;
! }
! }
!
return $mystring;
}
|