|
From: <jgr...@us...> - 2003-02-21 18:55:14
|
Update of /cvsroot/popfile/engine/Classifier
In directory sc8-pr-cvs1:/tmp/cvs-serv22571/Classifier
Modified Files:
MailParse.pm
Log Message:
Merged and modified a patch that gives us decoding of the ?charset?[BQ]?text? style of From and Subject lines
Index: MailParse.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Classifier/MailParse.pm,v
retrieving revision 1.87
retrieving revision 1.88
diff -C2 -d -r1.87 -r1.88
*** MailParse.pm 19 Feb 2003 19:57:42 -0000 1.87
--- MailParse.pm 21 Feb 2003 18:55:10 -0000 1.88
***************
*** 834,843 ****
$encoding = '';
$self->{content_type} = '';
! $self->{from} = $argument if ( $self->{from} eq '' ) ;
$prefix = 'from';
}
$prefix = 'to' if ( $header =~ /^To/i );
! $self->{to} = $argument if ( ( $header =~ /^To/i ) && ( $self->{to} eq '' ) );
while ( $argument =~ s/<([[:alpha:]0-9\-_\.]+?@([[:alpha:]0-9\-_\.]+?))>// ) {
--- 834,843 ----
$encoding = '';
$self->{content_type} = '';
! $self->{from} = $self->decode_string( $argument ) if ( $self->{from} eq '' ) ;
$prefix = 'from';
}
$prefix = 'to' if ( $header =~ /^To/i );
! $self->{to} = $self->decode_string( $argument ) if ( ( $header =~ /^To/i ) && ( $self->{to} eq '' ) );
while ( $argument =~ s/<([[:alpha:]0-9\-_\.]+?@([[:alpha:]0-9\-_\.]+?))>// ) {
***************
*** 855,859 ****
}
! $self->{subject} = $argument if ( ( $header =~ /^Subject/i ) && ( $self->{subject} eq '' ) );
if ( $header =~ /^Subject/i ) {
--- 855,859 ----
}
! $self->{subject} = $self->decode_string( $argument ) if ( ( $header =~ /^Subject/i ) && ( $self->{subject} eq '' ) );
if ( $header =~ /^Subject/i ) {
***************
*** 1072,1075 ****
--- 1072,1111 ----
return $colorized;
}
+
+ # ---------------------------------------------------------------------------------------------
+ #
+ # decode_string - Decode MIME encoded strings used in the header lines in email messages
+ #
+ # $mystring - The string that neeeds decode
+ #
+ # Return the decoded string, this routine recognizes lines of the form
+ #
+ # =?charset?[BQ]?text?=
+ #
+ # A B indicates base64 encoding, a Q indicates quoted printable encoding
+ # ---------------------------------------------------------------------------------------------
+ sub decode_string
+ {
+ # I choose not to use "$mystring = MIME::Base64::decode( $1 );" because some spam mails
+ # have subjects like: "Subject: adjpwpekm =?ISO-8859-1?Q?=B2=E1=A4=D1=AB=C7?= dopdalnfjpw".
+ # Therefore, it will be better to store the decoded text in a temporary variable and substitute
+ # the original string with it later. Thus, this subroutine returns the real decoded result.
+
+ my ( $self, $mystring ) = @_;
+ my $decode_it = '';
+
+ if ( $mystring =~ /=\?[\w-]+\?B\?(.*)\?=/i ) {
+ $decode_it = decode_base64( $1 );
+ $mystring =~ s/=\?[\w-]+\?B\?(.*)\?=/$decode_it/i;
+ } else {
+ if ( $mystring =~ /=\?[\w-]+\?Q\?(.*)\?=/i ) {
+ $decode_it = decode_qp( $1 );
+ $mystring =~ s/=\?[\w-]+\?Q\?(.*)\?=/$decode_it/i;
+ }
+ }
+
+ return $mystring;
+ }
+
1;
|