From: <dg...@su...> - 2009-02-16 15:42:37
|
Author: bellmich Date: Mon Feb 16 16:40:20 2009 New Revision: 928 URL: http://libsyncml.opensync.org/changeset/928 Log: added support for broken UTF-16 handling of Nokia SMS sync Please note that this works only for downloading SMS. added a new test for the UTF-16 conversion Modified: trunk/libsyncml/parser/sml_xml_parse.c trunk/tests/check_xml_fix_broken_item_data.c Modified: trunk/libsyncml/parser/sml_xml_parse.c ============================================================================== --- trunk/libsyncml/parser/sml_xml_parse.c Mon Feb 16 16:37:56 2009 (r927) +++ trunk/libsyncml/parser/sml_xml_parse.c Mon Feb 16 16:40:20 2009 (r928) @@ -1604,9 +1604,103 @@ smlAssert(fixed_data); smlAssert(fixed_size); + /* ******************* + * fix wrong enconding + * ******************* + */ + + /* This fix was implemented for text/x-vMessage. + * Some mobiles encode SMS always as UTF-16 + * even if the XML document is encoded in UTF-8. + * Example: Nokia E71 + */ + *fixed_size = size; - *fixed_data = g_strdup(data); + *fixed_data = smlTryMalloc0(size + 1, error); + if (!*fixed_data) + { + smlTrace(TRACE_EXIT_ERROR, "%s - %s", __func__, smlErrorPrint(error)); + return FALSE; + } + memcpy(*fixed_data, data, size); const char *position = *fixed_data; + + while (position + 1 < *fixed_data + *fixed_size) + { + /* check if the next character is a NULL byte */ + const char *byte = position + 1; + if (*byte != 0) { + /* the next byte is not NULL */ + position++; + continue; + } + smlTrace(TRACE_INTERNAL, "%s: Found NULL byte in XML document at %p.", __func__, position); + + /* the next character is a NULL byte + * so let's check how long the UTF-16 string is + */ + const char *last_utf16 = position; + while (last_utf16 + 1 < *fixed_data + *fixed_size && + *((char *)(last_utf16 + 1)) == 0) + { + last_utf16 += 2; + } + + /* go to the last NULL byte */ + last_utf16--; + + /* convert the whole strong to UTF-8 */ + smlTrace(TRACE_INTERNAL, "%s: Converting %d bytes ...", __func__, last_utf16 - position + 1); + GError *gerror = NULL; + size_t read = 0; + size_t written = 0; + gchar *conv_string = g_convert( + position, (last_utf16 - position + 1), + "UTF-8", "UTF-16", + &read, &written, + &gerror); + if (gerror != NULL) + { + smlErrorSet( + error, SML_ERROR_GENERIC, + "Character conversion from UTF-16 to UTF-8 failed. %s", + gerror->message); + g_error_free(gerror); + smlTrace(TRACE_EXIT_ERROR, "%s - %s", __func__, smlErrorPrint(error)); + return FALSE; + } + smlTrace(TRACE_INTERNAL, "%s: read %d --> written %d --> %d ::= %s", __func__, read, written, strlen(conv_string), conv_string); + + /* replace the embedded string */ + char *new_data = smlTryMalloc0(*fixed_size - (last_utf16 - position + 1) + strlen(conv_string) + 1, error); + if (!new_data) + { + smlTrace(TRACE_EXIT_ERROR, "%s - %s", __func__, smlErrorPrint(error)); + return FALSE; + } + memcpy(new_data, *fixed_data, (size_t) position - (size_t) *fixed_data); + memcpy(new_data + (size_t) position - (size_t) *fixed_data, conv_string, strlen(conv_string)); + memcpy(new_data + (size_t) position - (size_t) *fixed_data + strlen(conv_string), + last_utf16 + 1, *fixed_size - ( last_utf16 + 1 - *fixed_data )); + + /* fix pointers */ + *fixed_size = *fixed_size - (last_utf16 - position + 1) + strlen(conv_string); + position = new_data + (position - *fixed_data) + strlen(conv_string) + 1; + smlSafeCFree(fixed_data); + *fixed_data = new_data; + new_data = NULL; + smlSafeCFree(&conv_string); + + smlTrace(TRACE_INTERNAL, "%s: Converted UTF-16 string to UTF-8", __func__); + } + smlTrace(TRACE_INTERNAL, "%s: Correctly encoded: %s", __func__, *fixed_data); + + /* ***************** + * add missing CDATA + * ***************** + */ + + position = *fixed_data; while (position + 1 < *fixed_data + *fixed_size) { Modified: trunk/tests/check_xml_fix_broken_item_data.c ============================================================================== --- trunk/tests/check_xml_fix_broken_item_data.c Mon Feb 16 16:37:56 2009 (r927) +++ trunk/tests/check_xml_fix_broken_item_data.c Mon Feb 16 16:40:20 2009 (r928) @@ -177,6 +177,42 @@ } END_TEST +START_TEST (check_utf16_conversion) +{ + SmlError *error = NULL; + char *input_data = smlTryMalloc0(20, &error); + sml_fail_unless(error == NULL, "%s", smlErrorPrint(&error)); + input_data[0] = 65; /* A */ + input_data[1] = 66; /* B */ + input_data[3] = 67; /* C */ + input_data[5] = 68; /* D */ + input_data[6] = 69; /* E */ + input_data[7] = 70; /* F*/ + input_data[8] = 71; /* G */ + input_data[10] = 72; /* H */ + input_data[12] = 73; /* I */ + input_data[13] = 74; /* J */ + size_t length = 14; + + char *fixed_data = NULL; + unsigned int fixed_size = 0; + + /* check UTF-16 conversion of_smlXmlParserFixBrokenItemData */ + + sml_fail_unless( + _smlXmlParserFixBrokenItemData( + input_data, length, + &fixed_data, &fixed_size, + &error), + "%s", smlErrorPrint(&error)); + sml_fail_unless(error == NULL, "%s", smlErrorPrint(&error)); + sml_fail_unless(fixed_size == strlen("ABCDEFGHIJ"), "%d != %d", fixed_size, strlen("ABCDEFGHIJ")); + sml_fail_unless(strcmp(fixed_data, "ABCDEFGHIJ") == 0, NULL); + g_free(input_data); + g_free(fixed_data); +} +END_TEST + Suite *parser_suite(void) { Suite *s = suite_create("Parser"); @@ -188,6 +224,7 @@ create_case(s, "check_one_broken_item_data", check_one_broken_item_data); create_case(s, "check_multi_broken_item_data", check_multi_broken_item_data); create_case(s, "check_mixed_broken_item_data", check_mixed_broken_item_data); + create_case(s, "check_utf16_conversion", check_utf16_conversion); return s; } |