From: <jgr...@us...> - 2003-02-21 18:55:14
|
Update of /cvsroot/popfile/engine/Classifier In directory sc8-pr-cvs1:/tmp/cvs-serv22571/Classifier Modified Files: MailParse.pm Log Message: Merged and modified a patch that gives us decoding of the ?charset?[BQ]?text? style of From and Subject lines Index: MailParse.pm =================================================================== RCS file: /cvsroot/popfile/engine/Classifier/MailParse.pm,v retrieving revision 1.87 retrieving revision 1.88 diff -C2 -d -r1.87 -r1.88 *** MailParse.pm 19 Feb 2003 19:57:42 -0000 1.87 --- MailParse.pm 21 Feb 2003 18:55:10 -0000 1.88 *************** *** 834,843 **** $encoding = ''; $self->{content_type} = ''; ! $self->{from} = $argument if ( $self->{from} eq '' ) ; $prefix = 'from'; } $prefix = 'to' if ( $header =~ /^To/i ); ! $self->{to} = $argument if ( ( $header =~ /^To/i ) && ( $self->{to} eq '' ) ); while ( $argument =~ s/<([[:alpha:]0-9\-_\.]+?@([[:alpha:]0-9\-_\.]+?))>// ) { --- 834,843 ---- $encoding = ''; $self->{content_type} = ''; ! $self->{from} = $self->decode_string( $argument ) if ( $self->{from} eq '' ) ; $prefix = 'from'; } $prefix = 'to' if ( $header =~ /^To/i ); ! $self->{to} = $self->decode_string( $argument ) if ( ( $header =~ /^To/i ) && ( $self->{to} eq '' ) ); while ( $argument =~ s/<([[:alpha:]0-9\-_\.]+?@([[:alpha:]0-9\-_\.]+?))>// ) { *************** *** 855,859 **** } ! $self->{subject} = $argument if ( ( $header =~ /^Subject/i ) && ( $self->{subject} eq '' ) ); if ( $header =~ /^Subject/i ) { --- 855,859 ---- } ! $self->{subject} = $self->decode_string( $argument ) if ( ( $header =~ /^Subject/i ) && ( $self->{subject} eq '' ) ); if ( $header =~ /^Subject/i ) { *************** *** 1072,1075 **** --- 1072,1111 ---- return $colorized; } + + # --------------------------------------------------------------------------------------------- + # + # decode_string - Decode MIME encoded strings used in the header lines in email messages + # + # $mystring - The string that neeeds decode + # + # Return the decoded string, this routine recognizes lines of the form + # + # =?charset?[BQ]?text?= + # + # A B indicates base64 encoding, a Q indicates quoted printable encoding + # --------------------------------------------------------------------------------------------- + sub decode_string + { + # I choose not to use "$mystring = MIME::Base64::decode( $1 );" because some spam mails + # have subjects like: "Subject: adjpwpekm =?ISO-8859-1?Q?=B2=E1=A4=D1=AB=C7?= dopdalnfjpw". + # Therefore, it will be better to store the decoded text in a temporary variable and substitute + # the original string with it later. Thus, this subroutine returns the real decoded result. + + my ( $self, $mystring ) = @_; + my $decode_it = ''; + + if ( $mystring =~ /=\?[\w-]+\?B\?(.*)\?=/i ) { + $decode_it = decode_base64( $1 ); + $mystring =~ s/=\?[\w-]+\?B\?(.*)\?=/$decode_it/i; + } else { + if ( $mystring =~ /=\?[\w-]+\?Q\?(.*)\?=/i ) { + $decode_it = decode_qp( $1 ); + $mystring =~ s/=\?[\w-]+\?Q\?(.*)\?=/$decode_it/i; + } + } + + return $mystring; + } + 1; |