From: <tp...@ke...> - 2007-06-27 22:30:43
|
CVS Root: /cvs/gstreamer Module: gst-plugins-base Changes by: tpm Date: Wed Jun 27 2007 22:30:34 UTC Log message: * gst-libs/gst/tag/tags.c: (gst_tag_freeform_string_to_utf8): Don't pass trailing zeroes in fixed-size string arrays in ID3v1 tags to utf8-validate; fixes recognition of ID3v1 tags in UTF-8 encoding (#451707); also, output some debugging info when dealing with freeform strings. * tests/check/libs/tag.c: (GST_START_TEST), (tag_suite): Add unit test for the above. Modified files: . : ChangeLog gst-libs/gst/tag: tags.c tests/check/libs: tag.c Links: http://freedesktop.org/cgi-bin/viewcvs.cgi/gstreamer/gst-plugins-base/ChangeLog.diff?r1=1.3382&r2=1.3383 http://freedesktop.org/cgi-bin/viewcvs.cgi/gstreamer/gst-plugins-base/gst-libs/gst/tag/tags.c.diff?r1=1.9&r2=1.10 http://freedesktop.org/cgi-bin/viewcvs.cgi/gstreamer/gst-plugins-base/tests/check/libs/tag.c.diff?r1=1.11&r2=1.12 ====Begin Diffs==== Index: ChangeLog =================================================================== RCS file: /cvs/gstreamer/gst-plugins-base/ChangeLog,v retrieving revision 1.3382 retrieving revision 1.3383 diff -u -d -r1.3382 -r1.3383 --- ChangeLog 27 Jun 2007 12:55:19 -0000 1.3382 +++ ChangeLog 27 Jun 2007 22:30:19 -0000 1.3383 @@ -1,5 +1,16 @@ 2007-06-27 Tim-Philipp Müller <tim at centricular dot net> + * gst-libs/gst/tag/tags.c: (gst_tag_freeform_string_to_utf8): + Don't pass trailing zeroes in fixed-size string arrays in ID3v1 tags + to utf8-validate; fixes recognition of ID3v1 tags in UTF-8 encoding + (#451707); also, output some debugging info when dealing with + freeform strings. + + * tests/check/libs/tag.c: (GST_START_TEST), (tag_suite): + Add unit test for the above. +2007-06-27 Tim-Philipp Müller <tim at centricular dot net> * gst-libs/gst/pbutils/descriptions.c: (caps_are_rtp_caps): Add description for Windows Media RTP caps. Index: tags.c RCS file: /cvs/gstreamer/gst-plugins-base/gst-libs/gst/tag/tags.c,v retrieving revision 1.9 retrieving revision 1.10 diff -u -d -r1.9 -r1.10 --- tags.c 12 Apr 2007 16:36:36 -0000 1.9 +++ tags.c 27 Jun 2007 22:30:19 -0000 1.10 @@ -255,10 +255,18 @@ if (size < 0) size = strlen (data); + /* chop off trailing string terminators to make sure utf8_validate doesn't + * get to see them (since that would make the utf8 check fail) */ + while (size > 0 && data[size - 1] == '\0') + --size; /* Should we try the charsets specified * via environment variables FIRST ? */ - if (g_utf8_validate (data, size, NULL)) - return g_strndup (data, size); + if (g_utf8_validate (data, size, NULL)) { + utf8 = g_strndup (data, size); + GST_LOG ("String '%s' is valid UTF-8 already", utf8); + goto beach; + } while (env_vars && *env_vars != NULL) { const gchar *env = NULL; @@ -271,6 +279,7 @@ csets = g_strsplit (env, G_SEARCHPATH_SEPARATOR_S, -1); for (c = csets; c && *c; ++c) { + GST_LOG ("Trying to convert freeform string to UTF-8 from '%s'", *c); if ((utf8 = g_convert (data, size, "UTF-8", *c, &bytes_read, NULL, NULL))) { if (bytes_read == size) { @@ -289,6 +298,7 @@ /* Try current locale (if not UTF-8) */ if (!g_get_charset (&cur_loc)) { + GST_LOG ("Trying to convert freeform string using locale ('%s')", cur_loc); if ((utf8 = g_locale_to_utf8 (data, size, &bytes_read, NULL, NULL))) { if (bytes_read == size) { goto beach; @@ -299,6 +309,7 @@ } /* Try ISO-8859-1 */ + GST_LOG ("Trying to convert freeform string using ISO-8859-1 fallback"); utf8 = g_convert (data, size, "UTF-8", "ISO-8859-1", &bytes_read, NULL, NULL); if (utf8 != NULL && bytes_read == size) { goto beach; @@ -310,8 +321,10 @@ beach: g_strchomp (utf8); - if (utf8 && utf8[0] != '\0') + if (utf8 && utf8[0] != '\0') { + GST_LOG ("Returning '%s'", utf8); return utf8; g_free (utf8); return NULL; Index: tag.c RCS file: /cvs/gstreamer/gst-plugins-base/tests/check/libs/tag.c,v retrieving revision 1.11 retrieving revision 1.12 diff -u -d -r1.11 -r1.12 --- tag.c 27 Mar 2007 10:17:16 -0000 1.11 +++ tag.c 27 Jun 2007 22:30:19 -0000 1.12 @@ -600,6 +600,80 @@ GST_END_TEST; +GST_START_TEST (test_id3v1_utf8_tag) +{ + const guint8 id3v1[128] = { + /* marker */ + 'T', 'A', 'G', + /* title (30 bytes) */ + 'D', 0xc3, 0xad, 'v', 'k', 'a', ' ', 's', + ' ', 'p', 'e', 'r', 'l', 'a', 'm', 'i', + ' ', 'v', 'e', ' ', 'v', 'l', 'a', 's', + 'e', 'c', 'h', 0, 0, 0, + /* artist (30 bytes) */ + 'A', 'l', 'e', 0xc5, 0xa1, ' ', 'B', 'r', 'i', 'c', 'h', 't', 'a', + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* album (30 bytes) */ + 'B', 'e', 's', 't', ' ', 'o', 'f', ' ', '(', 'P', 'r', 'o', 's', 't', + 0xc4, 0x9b, ' ', 0xc3, 0xba, 0xc5, 0xbe, 'a', 's', 'n', 0xc3, 0xbd, ')', + 0, 0, 0, + /* year (4 bytes) */ + '2', '0', '0', '0', + /* comment (28 bytes) */ + '-', '-', '-', ' ', 0xc4, 0x8d, 'e', 's', 'k', 0xc3, 0xa9, ' ', 'p', + 0xc3, 0xad, 's', 'n', 'i', 0xc4, 0x8d, 'k', 'y', ' ', '-', '-', '-', + 0, 0, + /* track number */ + /* genre */ + 0x11 + }; + GstTagList *tags; + GDate *d; + gchar *s; + /* set this, to make sure UTF-8 strings are really interpreted properly + * as UTF-8, regardless of the locale set */ + g_setenv ("GST_ID3V1_TAG_ENCODING", "WINDOWS-1250", TRUE); + tags = gst_tag_list_new_from_id3v1 (id3v1); + fail_unless (tags != NULL); + GST_LOG ("Got tags: %" GST_PTR_FORMAT, tags); + s = NULL; + fail_unless (gst_tag_list_get_string (tags, GST_TAG_TITLE, &s)); + fail_unless (s != NULL); + fail_unless_equals_string (s, "DÃvka s perlami ve vlasech"); + g_free (s); + fail_unless (gst_tag_list_get_string (tags, GST_TAG_ARTIST, &s)); + fail_unless_equals_string (s, "AleÅ¡ Brichta"); + fail_unless (gst_tag_list_get_string (tags, GST_TAG_ALBUM, &s)); + fail_unless_equals_string (s, "Best of (ProstÄ ÃºÅ¾asný)"); + d = NULL; + fail_unless (gst_tag_list_get_date (tags, GST_TAG_DATE, &d)); + fail_unless (d != NULL); + fail_unless_equals_int (g_date_get_year (d), 2000); + g_date_free (d); + gst_tag_list_free (tags); + g_unsetenv ("GST_ID3V1_TAG_ENCODING"); +} +GST_END_TEST; static Suite * tag_suite (void) { @@ -611,6 +685,7 @@ tcase_add_test (tc_chain, test_parse_extended_comment); tcase_add_test (tc_chain, test_vorbis_tags); tcase_add_test (tc_chain, test_id3_tags); + tcase_add_test (tc_chain, test_id3v1_utf8_tag); return s; } |