From: naoki i. <am...@us...> - 2006-10-23 14:05:25
|
Update of /cvsroot/popfile/engine/Classifier In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv29448/Classifier Modified Files: Tag: b0_22_2 MailParse.pm Log Message: Following Japanese support: - Performance update for convert_encoding. - Fix the problem with HTML entity. - Clear the character set per mail to avoid using the wrong charsets. - Fix the problem that 'uninitialized value' warnings occur on Japanese environment. Index: MailParse.pm =================================================================== RCS file: /cvsroot/popfile/engine/Classifier/MailParse.pm,v retrieving revision 1.216.4.6 retrieving revision 1.216.4.7 diff -C2 -d -r1.216.4.6 -r1.216.4.7 *** MailParse.pm 9 Feb 2006 00:25:31 -0000 1.216.4.6 --- MailParse.pm 23 Oct 2006 14:05:19 -0000 1.216.4.7 *************** *** 572,578 **** if ( defined( $to ) ) { ! # HTML entities confilict with DBCS chars. Replace entities with blanks. ! if ( $self->{lang__} eq 'Korean' ) { $to = ' '; } else { --- 572,578 ---- if ( defined( $to ) ) { ! # HTML entities confilict with DBCS and EUC-JP chars. Replace entities with blanks. ! if ( $self->{lang__} eq 'Korean' || $self->{lang__} eq 'Nihongo' ) { $to = ' '; } else { *************** *** 1539,1542 **** --- 1539,1545 ---- $self->{colorized__} .= "<tt>" if ( $self->{color__} ne '' ); + # Clear the character set to avoid using the wrong charsets + $self->{charset__} = ''; + # Since Text::Kakasi is not thread-safe, we use it under the # control of a Mutex to avoid a crash if we are running on *************** *** 1641,1650 **** } - # Decode \x?? - if ( $self->{lang__} eq 'Nihongo' && !$self->{in_headers__} ) { - $line =~ s/\\x([8-9A-F][A-F0-9])/pack("C", hex($1))/eig; - } - if ( $self->{lang__} eq 'Nihongo' ) { $line = convert_encoding( $line, $self->{charset__}, 'euc-jp', '7bit-jis', @{$encoding_candidates{$self->{lang__}}} ); $line = parse_line_with_kakasi( $self, $line ); --- 1644,1653 ---- } if ( $self->{lang__} eq 'Nihongo' ) { + # Decode \x?? + if ( !$self->{in_headers__} ) { + $line =~ s/\\x([8-9A-F][A-F0-9])/pack("C", hex($1))/eig; + } + $line = convert_encoding( $line, $self->{charset__}, 'euc-jp', '7bit-jis', @{$encoding_candidates{$self->{lang__}}} ); $line = parse_line_with_kakasi( $self, $line ); *************** *** 2480,2483 **** --- 2483,2489 ---- my ( $string, $from, $to, $default, @candidates ) = @_; + # If the string contains only ascii characters, do nothing + return $string if ( $string =~ /^[\r\n\t\x20-\x7E]*$/ ); + require Encode; require Encode::Guess; |