From: Richard D. <rd...@us...> - 2002-12-10 21:54:42
|
Update of /cvsroot/twiki/twiki/lib In directory sc8-pr-cvs1:/tmp/cvs-serv28466/lib Modified Files: TWiki.pm Log Message: Fix for non-encoding of 8-bit characters in RSS feeds - any skins named 'rss*' are rendered in $pageMode = 'rss', i.e. 8-bit characters are rendered as &#nnn;. Such characters are not encoded in HTML pages. Tested on donkin.org RSS feed since this needs to go into TWiki.org, e.g. http://donkin.org/bin/view/Main/WebRss?skin=rss&contenttype=text/xml has an example, while http://donkin.org/bin/view/Test/WebChanges doesn't encode 8-bit characters. Index: TWiki.pm =================================================================== RCS file: /cvsroot/twiki/twiki/lib/TWiki.pm,v retrieving revision 1.185 retrieving revision 1.186 diff -C2 -r1.185 -r1.186 *** TWiki.pm 9 Dec 2002 19:32:01 -0000 1.185 --- TWiki.pm 10 Dec 2002 21:54:37 -0000 1.186 *************** *** 85,88 **** --- 85,89 ---- $formatVersion $OS $readTopicPermissionFailed + $pageMode ); *************** *** 97,101 **** $singleUpperAlphaRegex $singleLowerAlphaRegex $singleUpperAlphaNumRegex $singleMixedAlphaNumRegex $singleMixedNonAlphaNumRegex ! $mixedAlphaNumRegex ); --- 98,102 ---- $singleUpperAlphaRegex $singleLowerAlphaRegex $singleUpperAlphaNumRegex $singleMixedAlphaNumRegex $singleMixedNonAlphaNumRegex ! $singleMixedNonAlphaRegex $mixedAlphaNumRegex ); *************** *** 189,192 **** --- 190,195 ---- $basicInitDone = 0; # basicInitialize not yet done + $pageMode = 'html'; # Default is to render as HTML + # ========================= *************** *** 471,475 **** $singleMixedAlphaNumRegex = qr/[${upperAlpha}${lowerAlpha}${numeric}]/; ! $singleMixedNonAlphaNumRegex = qr/[^${upperAlpha}${lowerAlpha}${numeric}]/; # Multi-character alpha-based regexes --- 474,479 ---- $singleMixedAlphaNumRegex = qr/[${upperAlpha}${lowerAlpha}${numeric}]/; ! $singleMixedNonAlphaRegex = qr/[^${upperAlpha}${lowerAlpha}${numeric}]/; ! $singleMixedNonAlphaNumRegex = qr/[^${upperAlpha}${lowerAlpha}]/; # Multi-character alpha-based regexes *************** *** 511,514 **** --- 515,519 ---- my( $pluginHeaders, $coreHeaders ); + $contentType .= "; charset=$siteCharset"; *************** *** 585,588 **** --- 590,602 ---- # ========================= + # Set page mode: + # - 'rss' - encode 8-bit characters as XML entities + # - 'html' - no encoding of 8-bit characters + sub pageMode + { + $pageMode = shift; + } + + # ========================= sub getCgiQuery { *************** *** 1118,1131 **** $htext =~ s/\s+[\+\-]*/ /g; # remove newlines and special chars ! # limit to 162 chars $htext =~ s/(.{162})($mixedAlphaNumRegex)(.*?)$/$1$2 \.\.\./g; ! # Commented out by RD - encoding breaks non-ISO-8859-1 character sets, and ! # the browsers and RSS readers work OK with unencoded 8 bit characters ! # within RSS feeds, as long as %CHARSET% is included in template. ! # ! # Encode special chars into HTML &#nnn; entities for international ! # character support ! # $htext =~ s/([\x7f-\xff])/"\&\#" . unpack( "C", $1 ) .";"/ge; # inline search renders text, so prevent linking of external and --- 1132,1145 ---- $htext =~ s/\s+[\+\-]*/ /g; # remove newlines and special chars ! # limit to 162 chars ! # FIXME I18N: Avoid splitting within multi-byte character sets $htext =~ s/(.{162})($mixedAlphaNumRegex)(.*?)$/$1$2 \.\.\./g; ! # Encode special chars into XML &#nnn; entities for use in RSS feeds ! # - no encoding for HTML pages, to avoid breaking international ! # characters. ! if( $pageMode eq 'rss' ) { ! $htext =~ s/([\x7f-\xff])/"\&\#" . unpack( "C", $1 ) .";"/ge; ! } # inline search renders text, so prevent linking of external and |