From: <pdo...@us...> - 2021-08-25 03:33:10
|
Revision: 14926 http://sourceforge.net/p/squirrelmail/code/14926 Author: pdontthink Date: 2021-08-25 03:33:09 +0000 (Wed, 25 Aug 2021) Log Message: ----------- Fix certain messages with headers in unknown charsets encoded as quoted printable showing up as blank sender/subject in message list. There are two ways to fix it, and the more accurate but costly one is left unfinished since the easy fix seems to have helped in every case I know of Modified Paths: -------------- branches/SM-1_4-STABLE/squirrelmail/functions/i18n.php branches/SM-1_4-STABLE/squirrelmail/functions/strings.php Modified: branches/SM-1_4-STABLE/squirrelmail/functions/i18n.php =================================================================== --- branches/SM-1_4-STABLE/squirrelmail/functions/i18n.php 2021-08-10 06:41:30 UTC (rev 14925) +++ branches/SM-1_4-STABLE/squirrelmail/functions/i18n.php 2021-08-25 03:33:09 UTC (rev 14926) @@ -159,15 +159,17 @@ } /** - * Converts string from given charset to charset, that can be displayed by user translation. + * Converts a string from the given $charset to a character set that + * can be displayed by the current user interface language (translation) * - * Function by default returns html encoded strings, if translation uses different encoding. + * Function by default returns html encoded strings if translation uses + * different encoding. * If Japanese translation is used - function returns string converted to euc-jp * If $charset is not supported - function returns unconverted string. * * sanitizing of html tags is also done by this function. * - * @param string $charset + * @param string $charset The charset of the incoming string * @param string $string Text to be decoded * @param boolean $force_decode converts string to html without $charset!=$default_charset check. * Argument is available since 1.4.5 and 1.5.1. @@ -184,7 +186,7 @@ } /* All HTML special characters are 7 bit and can be replaced first */ - if (! $save_html) $string = sm_encode_html_special_chars ($string); + if (! $save_html) $string = sm_encode_html_special_chars($string, ENT_COMPAT, $charset); $charset = strtolower($charset); set_my_charset(); Modified: branches/SM-1_4-STABLE/squirrelmail/functions/strings.php =================================================================== --- branches/SM-1_4-STABLE/squirrelmail/functions/strings.php 2021-08-10 06:41:30 UTC (rev 14925) +++ branches/SM-1_4-STABLE/squirrelmail/functions/strings.php 2021-08-25 03:33:09 UTC (rev 14926) @@ -1536,21 +1536,75 @@ * attempts to add the correct character encoding * * @param string $string The string to be converted - * @param int $flags A bitmask that controls the behavior of htmlspecialchars() + * @param int $flags A bitmask that controls the behavior of + * htmlspecialchars() -- NOTE that this parameter + * should only be used to dictate handling of + * quotes; handling invalid code sequences is done + * using the $invalid_sequence_flag parameter below * (See http://php.net/manual/function.htmlspecialchars.php ) - * (OPTIONAL; default ENT_COMPAT, ENT_COMPAT | ENT_SUBSTITUTE for PHP >=5.4) + * (OPTIONAL; default ENT_COMPAT) * @param string $encoding The character encoding to use in the conversion - * (OPTIONAL; default automatic detection) + * (if not one of the character sets supported + * by PHP's htmlspecialchars(), then $encoding + * will be ignored and iso-8859-1 will be used, + * unless a default has been specified in + * $default_htmlspecialchars_encoding in + * config_local.php) (OPTIONAL; default automatic + * detection) * @param boolean $double_encode Whether or not to convert entities that are * already in the string (only supported in * PHP 5.2.3+) (OPTIONAL; default TRUE) + * @param mixed $invalid_sequence_flag A bitmask that controls how invalid + * code sequences should be handled; + * When calling htmlspecialchars(), + * this value will be combined with + * the $flags parameter above + * (See http://php.net/manual/function.htmlspecialchars.php ) + * (OPTIONAL; defaults to the string + * "ent_substitute" that, for PHP 5.4+, + * is converted to the ENT_SUBSTITUTE + * constant, otherwise empty) * * @return string The converted text * */ function sm_encode_html_special_chars($string, $flags=ENT_COMPAT, - $encoding=NULL, $double_encode=TRUE) + $encoding=NULL, $double_encode=TRUE, + $invalid_sequence_flag='ent_substitute') { + if ($invalid_sequence_flag === 'ent_substitute') + { + if (check_php_version(5, 4, 0)) + $invalid_sequence_flag = ENT_SUBSTITUTE; + else + $invalid_sequence_flag = 0; + } + + + // charsets supported by PHP's htmlspecialchars + // (move this elsewhere if needed) + // + static $htmlspecialchars_charsets = array( + 'iso-8859-1', 'iso8859-1', + 'iso-8859-5', 'iso8859-5', + 'iso-8859-15', 'iso8859-15', + 'utf-8', + 'cp866', 'ibm866', '866', + 'cp1251', 'windows-1251', 'win-1251', '1251', + 'cp1252', 'windows-1252', '1252', + 'koi8-R', 'koi8-ru', 'koi8r', + 'big5', '950', + 'gb2312', '936', + 'big5-hkscs', + 'shift_jis', 'sjis', 'sjis-win', 'cp932', '932', + 'euc-jp', 'eucjp', 'eucjp-win', + 'macroman', + ); + + + // if not given, set encoding to the charset being + // used by the current user interface language + // if (!$encoding) { global $default_charset; @@ -1559,15 +1613,58 @@ $encoding = $default_charset; } - if (check_php_version(5, 2, 3)) { - // Replace invalid characters with a symbol instead of returning - // empty string for the entire to be encoded string. - if (check_php_version(5, 4, 0) && $flags == ENT_COMPAT) { - $flags = $flags | ENT_SUBSTITUTE; + + // two ways to handle encodings not supported by htmlspecialchars() - + // one takes less CPU cycles but can munge characters in certain + // translations, the other is more exact but requires more resources + // + global $html_special_chars_extended_fix; +//FIXME: need to document that the config switch above can be enabled in config_local... but first, we need to decide if we will implement the second option here -- currently there hasn't been a need for it (munged characters seem quite rare).... see tracker #2806 for some tips https://sourceforge.net/p/squirrelmail/bugs/2806 + if (!in_array(strtolower($encoding), $htmlspecialchars_charsets)) + { + if ($html_special_chars_extended_fix) + { + // convert to utf-8 first, run htmlspecialchars() and convert + // back to original encoding below + // +//FIXME: try conversion functions in this order: recode_string(), iconv(), mbstring (with various charset checks: sq_mb_list_encodings(), mb_check_encoding) -- oh, first check for internal charset_decode_CHARSET() function?? or just use (does this put everything into HTML entities already? shouldn't, but if it does, return right here): + $string = charset_decode($encoding, $string, TRUE, TRUE); + $string = charset_encode($string, $encoding, TRUE); } - return htmlspecialchars($string, $flags, $encoding, $double_encode); + else + { + // simply force use of an encoding that is supported (some + // characters may be munged) + // + // use default from configuration if provided or hard-coded fallback + // + global $default_htmlspecialchars_encoding; + if (!empty($default_htmlspecialchars_encoding)) + $encoding = $default_htmlspecialchars_encoding; + else + $encoding = 'iso-8859-1'; + } } - return htmlspecialchars($string, $flags, $encoding); + +// TODO: Is adding this check an unnecessary performance hit? + if (check_php_version(5, 2, 3)) + $ret = htmlspecialchars($string, $flags | $invalid_sequence_flag, + $encoding, $double_encode); + else + $ret = htmlspecialchars($string, $flags | $invalid_sequence_flag, + $encoding); + + + // convert back to original encoding if needed (see above) + // + if ($html_special_chars_extended_fix + && !in_array(strtolower($encoding), $htmlspecialchars_charsets)) + { +//FIXME: NOT FINISHED - here, we'd convert from utf-8 back to original charset (if we obey $lossy_encoding and end up returning in utf-8 instead of original charset, does that screw up the caller?) + } + + + return $ret; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |