Update of /cvsroot/popfile/engine/Classifier
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv5357/Classifier
Modified Files:
MailParse.pm
Log Message:
Following Japanese support:
- Performance update for convert_encoding.
- Fix the problem with HTML entity.
- Clear the character set per mail to avoid using the wrong charsets.
- Fix the problem that 'uninitialized value' warnings occur on Japanese environment.
Index: MailParse.pm
===================================================================
RCS file: /cvsroot/popfile/engine/Classifier/MailParse.pm,v
retrieving revision 1.225
retrieving revision 1.226
diff -C2 -d -r1.225 -r1.226
*** MailParse.pm 20 Feb 2006 09:16:34 -0000 1.225
--- MailParse.pm 2 Dec 2006 17:45:02 -0000 1.226
***************
*** 584,591 ****
if ( defined( $to ) ) {
! # HTML entities confilict with DBCS chars. Replace
! # entities with blanks.
! if ( $self->{lang__} eq 'Korean' ) {
$to = ' ';
} else {
--- 584,591 ----
if ( defined( $to ) ) {
! # HTML entities confilict with DBCS and EUC-JP
! # chars. Replace entities with blanks.
! if ( $self->{lang__} eq 'Korean' || $self->{lang__} eq 'Nihongo' ) {
$to = ' ';
} else {
***************
*** 1582,1585 ****
--- 1582,1588 ----
$self->{colorized__} .= "<tt>" if ( $self->{color__} ne '' );
+ # Clear the character set to avoid using the wrong charsets
+ $self->{charset__} = '';
+
# Since Text::Kakasi is not thread-safe, we use it under the
# control of a Mutex to avoid a crash if we are running on
***************
*** 1689,1698 ****
}
- # Decode \x??
- if ( $self->{lang__} eq 'Nihongo' && !$self->{in_headers__} ) {
- $line =~ s/\\x([8-9A-F][A-F0-9])/pack("C", hex($1))/eig;
- }
-
if ( $self->{lang__} eq 'Nihongo' ) {
$line = convert_encoding( $line, $self->{charset__}, 'euc-jp', '7bit-jis', @{$encoding_candidates{$self->{lang__}}} );
$line = parse_line_with_kakasi( $self, $line );
--- 1692,1701 ----
}
if ( $self->{lang__} eq 'Nihongo' ) {
+ # Decode \x??
+ if ( !$self->{in_headers__} ) {
+ $line =~ s/\\x([8-9A-F][A-F0-9])/pack("C", hex($1))/eig;
+ }
+
$line = convert_encoding( $line, $self->{charset__}, 'euc-jp', '7bit-jis', @{$encoding_candidates{$self->{lang__}}} );
$line = parse_line_with_kakasi( $self, $line );
***************
*** 2550,2553 ****
--- 2553,2559 ----
my ( $string, $from, $to, $default, @candidates ) = @_;
+ # If the string contains only ascii characters, do nothing.
+ return $string if ( $string =~ /^[\r\n\t\x20-\x7E]*$/ );
+
require Encode;
require Encode::Guess;
|