From: <no...@kr...> - 2008-02-28 21:22:28
|
Revision: 4912 Author: noam Date: 2008-02-28 16:21:52 -0500 (Thu, 28 Feb 2008) Log Message: ----------- krang_charset_convert_data - fixed bug in 1252 definition Modified Paths: -------------- branches/krang_v3_02/bin/krang_charset_convert_data Modified: branches/krang_v3_02/bin/krang_charset_convert_data =================================================================== --- branches/krang_v3_02/bin/krang_charset_convert_data 2008-02-28 20:36:55 UTC (rev 4911) +++ branches/krang_v3_02/bin/krang_charset_convert_data 2008-02-28 21:21:52 UTC (rev 4912) @@ -365,68 +365,27 @@ } } -my %cp1252_string_to_char; -sub build_special_cp1252_conversions { - for (my $val = 0; $val < 256; $val++) { - my $char = chr($val); - from_to($char, 'windows-1252', $to); - my $string = '&#' . sprintf("%03d", $val) . ';'; - $cp1252_string_to_char{$string} = $char; - } -} - -my %cp1252_string_to_html_entity = -( - '128' => '€', - '130' => '‚', - '131' => 'ƒ', - '132' => '„', - '133' => '…', - '134' => '†', - '135' => '‡', - '136' => 'ˆ', - '137' => '‰', - '138' => 'Š', - '139' => '‹', - '140' => 'Œ', - '142' => 'Ž', - '145' => '‘', - '146' => '’', - '147' => '“', - '148' => '”', - '149' => '•', - '150' => '–', - '151' => '—', - '152' => '˜', - '153' => '™', - '154' => 'š', - '155' => '›', - '156' => 'œ', - '158' => 'ž', - '159' => 'Ÿ', - ); - sub cp1252_string_to_html_entity { + my %cp1252_to_html = (128=>'€', 130=>'‚', 131=>'ƒ', 132=>'„', 133=>'…', 134=>'†', 135=>'‡', 136=>'ˆ', 137=>'‰', 138=>'Š', 139=>'‹', 140=>'Œ', 142=>'Ž', 145=>'‘', 146=>'’', 147=>'“', 148=>'”', 149=>'•', 150=>'–', 151=>'—', 152=>'˜', 153=>'™', 154=>'š', 155=>'›', 156=>'œ', 158=>'ž', 159=>'Ÿ'); + my $orig_string = shift; my ($prefix, $value) = ($orig_string =~ /^\&(\#|x|X)(.*);$/); return $orig_string unless ($prefix && $value); - + $value = hex($value) if ($prefix eq 'x' || $prefix eq 'X'); - my $html_entity = $cp1252_string_to_html_entity{$value}; + my $html_entity = $cp1252_to_html{$value}; return $html_entity ? $html_entity : $orig_string; } - sub convert_scalar { my $data = shift; return unless defined $data; - - # convert manually-entered Windows-CP1252 strings (e.g. —) to HTML-entity equivalents (e.g. —) - $data =~ s/((\&\#\d\d\d\;)|(\&[xX][0-9a-fA-F][0-9a-fA-F];))/@{[cp1252_string_to_html_entity($1)]}/sg - unless (pkg('Charset')->mysql_charset($from) ne 'latin1'); - # now perform actual charset conversion + # convert manually-entered Windows-CP1252 strings (e.g. —) to HTML-entity equivalents (e.g. —) + $data =~ s/((\&\#\d\d\d\;)|(\&[xX][0-9a-fA-F][0-9a-fA-F];))/@{[cp1252_string_to_html_entity($1)]}/sg + unless (pkg('Charset')->mysql_charset($from) ne 'latin1'); + + # now perform actual charset conversion from_to($data, $from, $to); - return $data; -} +} |