From: naoki i. <am...@us...> - 2006-12-02 17:45:03
|
Update of /cvsroot/popfile/engine/Classifier In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv5357/Classifier Modified Files: MailParse.pm Log Message: Following Japanese support: - Performance update for convert_encoding. - Fix the problem with HTML entity. - Clear the character set per mail to avoid using the wrong charsets. - Fix the problem that 'uninitialized value' warnings occur on Japanese environment. Index: MailParse.pm =================================================================== RCS file: /cvsroot/popfile/engine/Classifier/MailParse.pm,v retrieving revision 1.225 retrieving revision 1.226 diff -C2 -d -r1.225 -r1.226 *** MailParse.pm 20 Feb 2006 09:16:34 -0000 1.225 --- MailParse.pm 2 Dec 2006 17:45:02 -0000 1.226 *************** *** 584,591 **** if ( defined( $to ) ) { ! # HTML entities confilict with DBCS chars. Replace ! # entities with blanks. ! if ( $self->{lang__} eq 'Korean' ) { $to = ' '; } else { --- 584,591 ---- if ( defined( $to ) ) { ! # HTML entities confilict with DBCS and EUC-JP ! # chars. Replace entities with blanks. ! if ( $self->{lang__} eq 'Korean' || $self->{lang__} eq 'Nihongo' ) { $to = ' '; } else { *************** *** 1582,1585 **** --- 1582,1588 ---- $self->{colorized__} .= "<tt>" if ( $self->{color__} ne '' ); + # Clear the character set to avoid using the wrong charsets + $self->{charset__} = ''; + # Since Text::Kakasi is not thread-safe, we use it under the # control of a Mutex to avoid a crash if we are running on *************** *** 1689,1698 **** } - # Decode \x?? - if ( $self->{lang__} eq 'Nihongo' && !$self->{in_headers__} ) { - $line =~ s/\\x([8-9A-F][A-F0-9])/pack("C", hex($1))/eig; - } - if ( $self->{lang__} eq 'Nihongo' ) { $line = convert_encoding( $line, $self->{charset__}, 'euc-jp', '7bit-jis', @{$encoding_candidates{$self->{lang__}}} ); $line = parse_line_with_kakasi( $self, $line ); --- 1692,1701 ---- } if ( $self->{lang__} eq 'Nihongo' ) { + # Decode \x?? + if ( !$self->{in_headers__} ) { + $line =~ s/\\x([8-9A-F][A-F0-9])/pack("C", hex($1))/eig; + } + $line = convert_encoding( $line, $self->{charset__}, 'euc-jp', '7bit-jis', @{$encoding_candidates{$self->{lang__}}} ); $line = parse_line_with_kakasi( $self, $line ); *************** *** 2550,2553 **** --- 2553,2559 ---- my ( $string, $from, $to, $default, @candidates ) = @_; + # If the string contains only ascii characters, do nothing. + return $string if ( $string =~ /^[\r\n\t\x20-\x7E]*$/ ); + require Encode; require Encode::Guess; |