From: <de...@de...> - 2008-05-26 08:32:27
|
Author: CrawfordCurrie Date: 2008-05-26 03:31:48 -0500 (Mon, 26 May 2008) New Revision: 16835 Trac url: http://develop.twiki.org/trac/changeset/16835 Modified: twiki/branches/TWikiRelease04x02/twikiplugins/WysiwygPlugin/lib/TWiki/Plugins/WysiwygPlugin.pm twiki/branches/TWikiRelease04x02/twikiplugins/WysiwygPlugin/lib/TWiki/Plugins/WysiwygPlugin/Constants.pm twiki/branches/TWikiRelease04x02/twikiplugins/WysiwygPlugin/lib/TWiki/Plugins/WysiwygPlugin/HTML2TML.pm twiki/branches/TWikiRelease04x02/twikiplugins/WysiwygPlugin/lib/TWiki/Plugins/WysiwygPlugin/TML2HTML.pm Log: Item5528: the last change didn't correctly expand entities in verbatim text, and broke the tests Modified: twiki/branches/TWikiRelease04x02/twikiplugins/WysiwygPlugin/lib/TWiki/Plugins/WysiwygPlugin/Constants.pm =================================================================== --- twiki/branches/TWikiRelease04x02/twikiplugins/WysiwygPlugin/lib/TWiki/Plugins/WysiwygPlugin/Constants.pm 2008-05-26 08:15:35 UTC (rev 16834) +++ twiki/branches/TWikiRelease04x02/twikiplugins/WysiwygPlugin/lib/TWiki/Plugins/WysiwygPlugin/Constants.pm 2008-05-26 08:31:48 UTC (rev 16835) @@ -217,78 +217,88 @@ ); # Reverse mapping -our %HighBit2Unicode = map { $unicode2HighBit{$_} => $_ } keys %unicode2HighBit; +our %highBit2Unicode = map { $unicode2HighBit{$_} => $_ } keys %unicode2HighBit; our $unicode2HighBitChars = join('', keys %unicode2HighBit); -our $HighBit2UnicodeChars = join('', keys %HighBit2Unicode); +our $highBit2UnicodeChars = join('', keys %highBit2Unicode); +our $encoding; -# Entities that we want to convert back to characters, rather -# than leaving them as HTML entities. -our @safeEntities = qw( - euro iexcl cent pound curren yen brvbar sect - uml copy ordf laquo not shy reg macr - deg plusmn sup2 sup3 acute micro para middot - cedil sup1 ordm raquo frac14 frac12 frac34 iquest - Agrave Aacute Acirc Atilde Auml Aring AElig Ccedil - Egrave Eacute Ecirc Euml Igrave Iacute Icirc Iuml - ETH Ntilde Ograve Oacute Ocirc Otilde Ouml times - Oslash Ugrave Uacute Ucirc Uuml Yacute THORN szlig - agrave aacute acirc atilde auml aring aelig ccedil - egrave eacute ecirc uml igrave iacute icirc iuml - eth ntilde ograve oacute ocirc otilde ouml divide - oslash ugrave uacute ucirc uuml yacute thorn yuml -); +sub encoding { + unless ($encoding) { + $encoding = Encode::resolve_alias( + $TWiki::cfg{Site}{CharSet} || 'iso-8859-1'); + } + return $encoding; +} # Map selected unicode characters back to high-bit chars if # iso-8859-1 is selected. This is required because the same characters # have different code points in unicode and iso-8859-1. For example, # € is 128 in iso-8859-1 and 8364 in unicode. sub mapUnicode2HighBit { - my $text = shift; - if (Encode::resolve_alias($TWiki::cfg{Site}{CharSet} || 'iso-8859-1') - eq 'iso-8859-1') { + if (encoding() eq 'iso-8859-1') { # Map unicode back to iso-8859 high-bit chars - $text =~ s/([$unicode2HighBitChars])/$unicode2HighBit{$1}/ge; + $_[0] =~ s/([$unicode2HighBitChars])/$unicode2HighBit{$1}/ge; } - return $text; } # Map selected high-bit chars to unicode if # iso-8859-1 is selected. sub mapHighBit2Unicode { - my $text = shift; - if (Encode::resolve_alias($TWiki::cfg{Site}{CharSet} || 'iso-8859-1') - eq 'iso-8859-1') { + if (encoding() eq 'iso-8859-1') { # Map unicode back to iso-8859 high-bit chars - $text =~ s/([$HighBit2UnicodeChars])/$HighBit2Unicode{$1}/ge; + $_[0] =~ s/([$highBit2UnicodeChars])/$highBit2Unicode{$1}/ge; } - return $text; } +# Named entities that we want to convert back to characters, rather +# than leaving them as HTML entities. +our @safeEntities = qw( + euro iexcl cent pound curren yen brvbar sect + uml copy ordf laquo not shy reg macr + deg plusmn sup2 sup3 acute micro para middot + cedil sup1 ordm raquo frac14 frac12 frac34 iquest + Agrave Aacute Acirc Atilde Auml Aring AElig Ccedil + Egrave Eacute Ecirc Euml Igrave Iacute Icirc Iuml + ETH Ntilde Ograve Oacute Ocirc Otilde Ouml times + Oslash Ugrave Uacute Ucirc Uuml Yacute THORN szlig + agrave aacute acirc atilde auml aring aelig ccedil + egrave eacute ecirc uml igrave iacute icirc iuml + eth ntilde ograve oacute ocirc otilde ouml divide + oslash ugrave uacute ucirc uuml yacute thorn yuml +); + # Mapping from entity names to characters our $safe_entities; -our $safe_entity_RE; -# Convert the safe entities values to characters in the site charset. -sub decodeSafeEntities { - my $text = shift; - +# Get a hash that maps the safe entities values to characters +# in the site charset. +sub safeEntities { unless ($safe_entities) { - my $encoding = Encode::resolve_alias( - $TWiki::cfg{Site}{CharSet} || 'iso-8859-1'); foreach my $entity (@safeEntities) { # Decode the entity name to unicode my $unicode = HTML::Entities::decode_entities("&$entity;"); - if ($encoding eq 'iso-8859-1') { - # Map unicode back to iso-8859 high-bit chars - $unicode = mapUnicode2HighBit($unicode); - } - $safe_entities->{$entity} = Encode::encode($encoding, $unicode); + # Map unicode back to iso-8859 high-bit chars if required + mapUnicode2HighBit($unicode); + $safe_entities->{$entity} = Encode::encode(encoding(), $unicode); } - $safe_entity_RE = join('|', @safeEntities); } - $text =~ s/&($safe_entity_RE);/$safe_entities->{$1}/g; - return $text; + return $safe_entities; } +# Debug +sub chCodes { + my $text = shift; + my $s = ""; + for (my $i = 0; $i < length($text); $i++) { + my $ch = substr($text, $i, 1); + if (ord($ch) < 32 || ord($ch) > 127) { + $s = $s . '#' . ord($ch) . ';'; + } else { + $s .= $ch; + } + } + return $s; +} + 1; Modified: twiki/branches/TWikiRelease04x02/twikiplugins/WysiwygPlugin/lib/TWiki/Plugins/WysiwygPlugin/HTML2TML.pm =================================================================== --- twiki/branches/TWikiRelease04x02/twikiplugins/WysiwygPlugin/lib/TWiki/Plugins/WysiwygPlugin/HTML2TML.pm 2008-05-26 08:15:35 UTC (rev 16834) +++ twiki/branches/TWikiRelease04x02/twikiplugins/WysiwygPlugin/lib/TWiki/Plugins/WysiwygPlugin/HTML2TML.pm 2008-05-26 08:31:48 UTC (rev 16835) @@ -129,9 +129,16 @@ $text = Encode::encode_utf8($text); } - # Decode safe entities back to characters - $text = WC::decodeSafeEntities($text); + # Convert (safe) named entities back to the + # site charset. Numeric entities are mapped straight to the + # corresponding code point unless their value overflow. + require HTML::Entities; + HTML::Entities::_decode_entities($text, WC::safeEntities()); + # After decoding entities, we have to map unicode characters + # back to high bit + WC::mapUnicode2HighBit($text); + return $text; } Modified: twiki/branches/TWikiRelease04x02/twikiplugins/WysiwygPlugin/lib/TWiki/Plugins/WysiwygPlugin/TML2HTML.pm =================================================================== --- twiki/branches/TWikiRelease04x02/twikiplugins/WysiwygPlugin/lib/TWiki/Plugins/WysiwygPlugin/TML2HTML.pm 2008-05-26 08:15:35 UTC (rev 16834) +++ twiki/branches/TWikiRelease04x02/twikiplugins/WysiwygPlugin/lib/TWiki/Plugins/WysiwygPlugin/TML2HTML.pm 2008-05-26 08:31:48 UTC (rev 16835) @@ -143,6 +143,8 @@ return $thing->{text} if $thing->{encoding} eq 'NONE'; my $method = 'CGI::'.$thing->{encoding}; my $text = $thing->{text}; + $text = _protectVerbatimChars($text) if + $thing->{type} =~ /^(PROTECTED|STICKY|VERBATIM)$/; no strict 'refs'; return &$method({class => 'WYSIWYG_'.$thing->{type} }, $text); use strict 'refs'; @@ -447,9 +449,9 @@ $this->_putBackBlocks( $text, 'literal', 'div' ); # replace verbatim with pre in the final output, with encoded entities - $this->_putBackBlocks( $text, 'verbatim', 'pre' ); + $this->_putBackBlocks($text, 'verbatim', 'pre', \&_protectVerbatimChars); - $this->_putBackBlocks( $text, 'sticky', 'div' ); + $this->_putBackBlocks($text, 'sticky', 'div', \&_protectVerbatimChars); $text =~ s/(<nop>)/$this->_liftOut($1, 'PROTECTED')/ge; @@ -464,6 +466,15 @@ } } +# Encode special chars in verbatim as entities to prevent misinterpretation +sub _protectVerbatimChars { + my $text = shift; + $text =~ s/([\000-\011\013-\037<&>'"])/'&#'.ord($1).';'/ges; + $text =~ s/ / /g; + $text =~ s/\n/<br \/>/gs; + return $text; +} + sub _takeOutIMGTag { my ($this, $text) = @_; # Expand selected TWiki variables in IMG tags so that images appear in the Modified: twiki/branches/TWikiRelease04x02/twikiplugins/WysiwygPlugin/lib/TWiki/Plugins/WysiwygPlugin.pm =================================================================== --- twiki/branches/TWikiRelease04x02/twikiplugins/WysiwygPlugin/lib/TWiki/Plugins/WysiwygPlugin.pm 2008-05-26 08:15:35 UTC (rev 16834) +++ twiki/branches/TWikiRelease04x02/twikiplugins/WysiwygPlugin/lib/TWiki/Plugins/WysiwygPlugin.pm 2008-05-26 08:31:48 UTC (rev 16835) @@ -605,15 +605,6 @@ # This function is used to decode such parameters to the currently selected # TWiki site character set. -sub chcodes { - my $text = shift; - my $s = ""; - for (my $i = 0; $i < length($text); $i++) { - $s = $s . " ". ord(substr($text, $i, 1)); - } - return $s; -} - # Note that this transform is not as simple as an Encode::from_to, as # a number of unicode code points must be remapped for certain encodings. sub RESTParameter2SiteCharSet { @@ -621,7 +612,7 @@ $text = Encode::decode_utf8($text, Encode::FB_PERLQQ); - $text = WC::mapUnicode2HighBit($text); + WC::mapUnicode2HighBit($text); $text = Encode::encode( $TWiki::cfg{Site}{CharSet}, $text, Encode::FB_PERLQQ); @@ -640,7 +631,7 @@ $text = Encode::decode( $TWiki::cfg{Site}{CharSet}, $text, Encode::FB_PERLQQ); - $text = WC::mapHighBit2Unicode($text); + WC::mapHighBit2Unicode($text); $text = Encode::encode_utf8($text); |