|
From: <ssc...@us...> - 2003-04-08 03:16:47
|
Update of /cvsroot/popfile/engine/Classifier
In directory sc8-pr-cvs1:/tmp/cvs-serv14429
Modified Files:
MailParse.pm
Log Message:
Fixes [ 696986 ] 0.18.1: Underscores in QP encoded headers, misc other header optimizations
Index: MailParse.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Classifier/MailParse.pm,v
retrieving revision 1.105
retrieving revision 1.106
diff -C2 -d -r1.105 -r1.106
*** MailParse.pm 7 Apr 2003 20:36:52 -0000 1.105
--- MailParse.pm 8 Apr 2003 03:16:44 -0000 1.106
***************
*** 15,18 ****
--- 15,19 ----
use MIME::Base64;
use MIME::QuotedPrint;
+ require Encode::MIME::Header;
# HTML entity mapping to character codes, this maps things like & to their corresponding
***************
*** 1053,1068 ****
my ( $self, $mystring ) = @_;
! my $decode_it = '';
! if ( $mystring =~ /=\?[\w-]+\?B\?(.*)\?=/i ) {
! $decode_it = decode_base64( $1 );
! $mystring =~ s/=\?[\w-]+\?B\?(.*)\?=/$decode_it/i;
! } else {
! if ( $mystring =~ /=\?[\w-]+\?Q\?(.*)\?=/i ) {
! $decode_it = decode_qp( $1 );
! $mystring =~ s/=\?[\w-]+\?Q\?(.*)\?=/$decode_it/i;
! }
! }
!
return $mystring;
}
--- 1054,1073 ----
my ( $self, $mystring ) = @_;
!
! $mystring = Encode::MIME::Header::decode($Encode::Encoding{'MIME-Header'},$mystring);
!
! #my $decode_it = '';
! #while ( $mystring =~ /=\?[\w-]+\?B\?(.*)\?=/ig ) {
! # $decode_it = decode_base64( $1 );
! # $mystring =~ s/=\?[\w-]+\?B\?(.*)\?=/$decode_it/i;
! #}
! #while ( $mystring =~ /=\?[\w-]+\?Q\?(.*)\?=/ig ) {
! # $decode_it = $1;
! # $decode_it =~ s/\_/=20/g;
! # $decode_it = decode_qp( $decode_it );
! # $mystring =~ s/=\?[\w-]+\?Q\?(.*)\?=/$decode_it/i;
! # }
!
return $mystring;
}
***************
*** 1112,1115 ****
--- 1117,1126 ----
$self->update_pseudoword( 'header', $header );
+ # Check the encoding type in all RFC 2047 encoded headers
+
+ if ( $argument =~ /=\?(.{1,40})\?(Q|B)/i ) {
+ update_word( $self, $1, 0, '', '', 'charset' );
+ }
+
# Handle the From, To and Cc headers and extract email addresses
# from them and treat them as words
***************
*** 1126,1130 ****
my $prefix = '';
! if ( $header =~ /^(From|To|Cc|Reply\-To)/i ) {
if ( $argument =~ /=\?(.{1,40})\?/ ) {
--- 1137,1145 ----
my $prefix = '';
! if ( $header =~ /^(From|To|Cc|Reply\-To)$/i ) {
!
! # These headers at least can be decoded
!
! $argument = $self->decode_string( $argument );
if ( $argument =~ /=\?(.{1,40})\?/ ) {
***************
*** 1132,1147 ****
}
! if ( $header =~ /^From/i ) {
$encoding = '';
! $self->{content_type__} = '';
! $self->{from__} = $self->decode_string( $argument ) if ( $self->{from__} eq '' ) ;
$prefix = 'from';
}
! $prefix = 'to' if ( $header =~ /^To/i );
! $self->{to__} = $self->decode_string( $argument ) if ( ( $header =~ /^To/i ) && ( $self->{to__} eq '' ) );
! $prefix = 'cc' if ( $header =~ /^Cc/i );
! $self->{cc__} = $self->decode_string( $argument ) if ( ( $header =~ /^Cc/i ) && ( $self->{cc__} eq '' ) );
while ( $argument =~ s/<([[:alpha:]0-9\-_\.]+?@([[:alpha:]0-9\-_\.]+?))>// ) {
--- 1147,1166 ----
}
! if ( $header =~ /^From$/i ) {
$encoding = '';
! $self->{content_type__} = '';
! $self->{from__} = $argument if ( $self->{from__} eq '' ) ;
$prefix = 'from';
}
! if ( $header =~ /^To$/i ) {
! $prefix = 'to';
! $self->{to__} = $argument if ( $self->{to__} eq '' );
! }
! if ( $header =~ /^Cc$/i ) {
! $prefix = 'cc';
! $self->{cc__} = $argument if ( $self->{cc__} eq '' );
! }
while ( $argument =~ s/<([[:alpha:]0-9\-_\.]+?@([[:alpha:]0-9\-_\.]+?))>// ) {
***************
*** 1159,1166 ****
}
! $self->{subject__} = $self->decode_string( $argument ) if ( ( $header =~ /^Subject/i ) && ( $self->{subject__} eq '' ) );
!
! if ( $header =~ /^Subject/i ) {
$prefix = 'subject';
}
--- 1178,1185 ----
}
! if ( $header =~ /^Subject$/i ) {
$prefix = 'subject';
+ $argument = $self->decode_string( $argument );
+ $self->{subject__} = $argument if ( ( $self->{subject__} eq '' ) );
}
***************
*** 1169,1173 ****
# Look for MIME
! if ( $header =~ /^Content-Type/i ) {
if ( $argument =~ /charset=\"?([^\"]{1,40})\"?/ ) {
--- 1188,1192 ----
# Look for MIME
! if ( $header =~ /^Content-Type$/i ) {
if ( $argument =~ /charset=\"?([^\"]{1,40})\"?/ ) {
***************
*** 1207,1211 ****
# do a special parse here since words might be broken across the boundaries
! if ( $header =~ /^Content-Transfer-Encoding/i ) {
$encoding = $argument;
print "Setting encoding to $encoding\n" if $self->{debug};
--- 1226,1230 ----
# do a special parse here since words might be broken across the boundaries
! if ( $header =~ /^Content-Transfer-Encoding$/i ) {
$encoding = $argument;
print "Setting encoding to $encoding\n" if $self->{debug};
***************
*** 1218,1223 ****
# Some headers to discard
! return ($mime, $encoding) if ( $header =~ /^(Thread-Index|X-UIDL|Message-ID|X-Text-Classification|X-Mime-Key)/i );
!
add_line( $self, $argument, 0, $prefix );
--- 1237,1246 ----
# Some headers to discard
! return ($mime, $encoding) if ( $header =~ /^(Thread-Index|X-UIDL|Message-ID|X-Text-Classification|X-Mime-Key)$/i );
!
! # Some headers should never be RFC 2047 decoded
!
! $argument = $self->decode_string($argument) unless ($header =~ /^(Revceived|Content\-Type|Content\-Disposition)$/i);
!
add_line( $self, $argument, 0, $prefix );
|