From: Cyril P. <cyr...@la...> - 2005-01-10 21:27:12
|
Hi I'm writing an XML file with the xmlpp interface. Everything is fine with xmlpp::Document::write_to_file_formatted(); But when I'm calling xmlpp::Document::write_to_string_formatted(), I=20 receive a SIGABRT signal. Any idea around ? Thanks PS: please find hereafter my source code and the gdb 'info st' output * source code (very simple - the ConvertInput function comes from=20 libxml2 code API samples). #include <libxml++/libxml++.h> #include <libxml/tree.h> #include <string> #include <iostream> /** * ConvertInput: * @in: string in a given encoding * @encoding: the encoding used * * Converts @in into UTF-8 for processing with libxml2 APIs * * Returns the converted UTF-8 string, or NULL in case of error. */ xmlChar * ConvertInput(const char *in, const char *encoding) { xmlChar *out; int ret; int size; int out_size; int temp; xmlCharEncodingHandlerPtr handler; if (in =3D=3D 0) return 0; handler =3D xmlFindCharEncodingHandler(encoding); if (!handler) { printf("ConvertInput: no encoding handler found for '%s'\n", encoding ? encoding : ""); return 0; } size =3D (int) strlen(in) + 1; out_size =3D size * 2 - 1; out =3D (unsigned char *) xmlMalloc((size_t) out_size); if (out !=3D 0) { temp =3D size - 1; ret =3D handler->input(out, &out_size, (const xmlChar *) in, &te= mp); if ((ret < 0) || (temp - size + 1)) { if (ret < 0) { printf("ConvertInput: conversion wasn't successful.\n"); } else { printf ("ConvertInput: conversion wasn't successful.=20 converted: %i octets.\n", temp); } xmlFree(out); out =3D 0; } else { out =3D (unsigned char *) xmlRealloc(out, out_size + 1); out[out_size] =3D 0; /*null terminating out */ } } else { printf("ConvertInput: no mem\n"); } return out; } int main(int argc, char **argv) { xmlpp::Document doc; xmlpp::Element * elem =3D 0; std::string my_encoding =3D "ISO-8859-1"; doc.set_internal_subset("GEDCOM", "", "gedcom60.dtd"); doc.add_comment("Generated by libgedcomparser - part of GHOSTS project= "); elem =3D doc.create_root_node("GEDCOM"); xmlChar * converted =3D ConvertInput("blah =E9", my_encoding.c_str()); std::cerr << converted << std::endl; if (converted !=3D 0) { std::string converted_string((char *)converted); std::cerr << converted << std::endl; elem->set_child_text(converted_string); } doc.write_to_file_formatted("xmlencode.xml", my_encoding); std::cerr << "write_to_file_formatted OK" << std::endl; Glib::ustring result_xml; result_xml =3D doc.write_to_string_formatted(my_encoding); std::cerr << "write_to_string_formatted OK" << std::endl; std::cerr << result_xml << std::endl; return 0; } * gdb info st : #0 0x403aa621 in kill () from /lib/libc.so.6 #1 0x4017826b in raise (sig=3D6) at signals.c:65 #2 0x403aba53 in abort () from /lib/libc.so.6 #3 0x402f7895 in __cxxabiv1::__terminate(void (*)()) () from=20 /usr/lib/libstdc++.so.3 #4 0x402f7880 in __cxxabiv1::__terminate(void (*)()) () from=20 /usr/lib/libstdc++.so.3 #5 0x402f79e0 in __cxa_rethrow () from /usr/lib/libstdc++.so.3 #6 0x402ede29 in std::__throw_length_error(char const*) () from=20 /usr/lib/libstdc++.so.3 #7 0x0804aef1 in std::string::_Rep::_S_create(unsigned,=20 std::allocator<char> const&) (__capacity=3D4294967295, __alloc=3D@0xbffff500) at /usr/include/g++-v3/bits/basic_string.tcc:= 371 #8 0x0804ab28 in char* std::string::_S_construct<char const*>(char=20 const*, char const*, std::allocator<char> const&,=20 std::forward_iterator_tag) ( __beg=3D0x80516e0 "<?xml version=3D\"1.0\"=20 encoding=3D\"ISO-8859-1\"?>\n<!DOCTYPE GEDCOM SYSTEM=20 \"gedcom60.dtd\">\n<!--Generated by libgedcomparser - part of GHOSTS=20 project-->\n<GEDCOM>blah =E9</GEDCOM>\n", __end=3D0x80516df "", __a=3D@0xbffff500) at /usr/include/g++-v3/bits/basic_string.tcc:143 #9 0x40034674 in std::string::string(char const*, unsigned,=20 std::allocator<char> const&) (this=3D0xbffff560, __s=3D0x80516e0 "<?xml version=3D\"1.0\"=20 encoding=3D\"ISO-8859-1\"?>\n<!DOCTYPE GEDCOM SYSTEM=20 \"gedcom60.dtd\">\n<!--Generated by libgedcomparser - part of GHOSTS=20 project-->\n<GEDCOM>blah =E9</GEDCOM>\n", __n=3D4294967295, __a=3D@0xbffff500) at /usr/include/g++-v3/bits/basic_string.h:666 #10 0x401c40c4 in Glib::ustring::ustring(char const*, unsigned)=20 (this=3D0xbffff560, src=3D0x80516e0 "<?xml version=3D\"1.0\"=20 encoding=3D\"ISO-8859-1\"?>\n<!DOCTYPE GEDCOM SYSTEM=20 \"gedcom60.dtd\">\n<!--Generated by libgedcomparser - part of GHOSTS=20 project-->\n<GEDCOM>blah =E9</GEDCOM>\n", n=3D169) at /usr/include/g++-v3/bits/stl_alloc.h:571 #11 0x40025d27 in xmlpp::Document::do_write_to_string(Glib::ustring=20 const&, bool) (this=3D0xbffff6a0, encoding=3D@0xbffff5d0, format=3Dtrue) at document.cc:309 |
From: Jonathan W. <co...@co...> - 2005-01-11 09:43:40
|
On Mon, Jan 10, 2005 at 10:27:09PM +0100, Cyril PICARD wrote: > Hi > > I'm writing an XML file with the xmlpp interface. > Everything is fine with xmlpp::Document::write_to_file_formatted(); > But when I'm calling xmlpp::Document::write_to_string_formatted(), I > receive a SIGABRT signal. > > Any idea around ? > > Thanks I'll have a guess ... > * gdb info st : > > #0 0x403aa621 in kill () from /lib/libc.so.6 > #1 0x4017826b in raise (sig=6) at signals.c:65 > #2 0x403aba53 in abort () from /lib/libc.so.6 > #3 0x402f7895 in __cxxabiv1::__terminate(void (*)()) () from > /usr/lib/libstdc++.so.3 > #4 0x402f7880 in __cxxabiv1::__terminate(void (*)()) () from > /usr/lib/libstdc++.so.3 > #5 0x402f79e0 in __cxa_rethrow () from /usr/lib/libstdc++.so.3 > #6 0x402ede29 in std::__throw_length_error(char const*) () from > /usr/lib/libstdc++.so.3 > #7 0x0804aef1 in std::string::_Rep::_S_create(unsigned, > std::allocator<char> const&) (__capacity=4294967295, > __alloc=@0xbffff500) at /usr/include/g++-v3/bits/basic_string.tcc:371 > #8 0x0804ab28 in char* std::string::_S_construct<char const*>(char > const*, char const*, std::allocator<char> const&, > std::forward_iterator_tag) ( > __beg=0x80516e0 "<?xml version=\"1.0\" > encoding=\"ISO-8859-1\"?>\n<!DOCTYPE GEDCOM SYSTEM > \"gedcom60.dtd\">\n<!--Generated by libgedcomparser - part of GHOSTS > project-->\n<GEDCOM>blah ?</GEDCOM>\n", __end=0x80516df "", > __a=@0xbffff500) at /usr/include/g++-v3/bits/basic_string.tcc:143 The frame above has __end < __beg, which is why an exception is thrown. > #9 0x40034674 in std::string::string(char const*, unsigned, > std::allocator<char> const&) (this=0xbffff560, > __s=0x80516e0 "<?xml version=\"1.0\" > encoding=\"ISO-8859-1\"?>\n<!DOCTYPE GEDCOM SYSTEM > \"gedcom60.dtd\">\n<!--Generated by libgedcomparser - part of GHOSTS > project-->\n<GEDCOM>blah ?</GEDCOM>\n", __n=4294967295, > __a=@0xbffff500) at /usr/include/g++-v3/bits/basic_string.h:666 That seems to be because __n = std::string::npos = size_t(-1) in frame #9, which must be passed by: > #10 0x401c40c4 in Glib::ustring::ustring(char const*, unsigned) > (this=0xbffff560, > src=0x80516e0 "<?xml version=\"1.0\" > encoding=\"ISO-8859-1\"?>\n<!DOCTYPE GEDCOM SYSTEM > \"gedcom60.dtd\">\n<!--Generated by libgedcomparser - part of GHOSTS > project-->\n<GEDCOM>blah ?</GEDCOM>\n", n=169) > at /usr/include/g++-v3/bits/stl_alloc.h:571 n = 169 in that frame, but Glib::ustring seems to pass -1 to the std::string ctor. That looks as though utf8_byte_offset(src,n) returns -1, which I think means there was an invalid UTF8 character in the string. Is the data you pass to the string correctly UTF8-encoded? The gdb output shows the 'é' wasn't converted from latin1 to utf-8. > #11 0x40025d27 in xmlpp::Document::do_write_to_string(Glib::ustring > const&, bool) (this=0xbffff6a0, > encoding=@0xbffff5d0, format=true) at document.cc:309 jon -- "A woman drove me to drink, I never had the courtesy to thank her." - W.C. Fields |
From: Cyril P. <cyr...@la...> - 2005-01-11 19:59:58
|
Jonathan Wakely wrote: > On Mon, Jan 10, 2005 at 10:27:09PM +0100, Cyril PICARD wrote: >=20 >=20 >>Hi >> >>I'm writing an XML file with the xmlpp interface. >>Everything is fine with xmlpp::Document::write_to_file_formatted(); >>But when I'm calling xmlpp::Document::write_to_string_formatted(), I=20 >>receive a SIGABRT signal. >> >>Any idea around ? >> >>Thanks >=20 >=20 > I'll have a guess ... >=20 >=20 >>* gdb info st : >> >>#0 0x403aa621 in kill () from /lib/libc.so.6 >>#1 0x4017826b in raise (sig=3D6) at signals.c:65 >>#2 0x403aba53 in abort () from /lib/libc.so.6 >>#3 0x402f7895 in __cxxabiv1::__terminate(void (*)()) () from=20 >>/usr/lib/libstdc++.so.3 >>#4 0x402f7880 in __cxxabiv1::__terminate(void (*)()) () from=20 >>/usr/lib/libstdc++.so.3 >>#5 0x402f79e0 in __cxa_rethrow () from /usr/lib/libstdc++.so.3 >>#6 0x402ede29 in std::__throw_length_error(char const*) () from=20 >>/usr/lib/libstdc++.so.3 >>#7 0x0804aef1 in std::string::_Rep::_S_create(unsigned,=20 >>std::allocator<char> const&) (__capacity=3D4294967295, >> __alloc=3D@0xbffff500) at /usr/include/g++-v3/bits/basic_string.tcc= :371 >>#8 0x0804ab28 in char* std::string::_S_construct<char const*>(char=20 >>const*, char const*, std::allocator<char> const&,=20 >>std::forward_iterator_tag) ( >> __beg=3D0x80516e0 "<?xml version=3D\"1.0\"=20 >>encoding=3D\"ISO-8859-1\"?>\n<!DOCTYPE GEDCOM SYSTEM=20 >>\"gedcom60.dtd\">\n<!--Generated by libgedcomparser - part of GHOSTS=20 >>project-->\n<GEDCOM>blah ?</GEDCOM>\n", __end=3D0x80516df "", >> __a=3D@0xbffff500) at /usr/include/g++-v3/bits/basic_string.tcc:143 >=20 >=20 > The frame above has __end < __beg, which is why an exception is thrown. >=20 >=20 >>#9 0x40034674 in std::string::string(char const*, unsigned,=20 >>std::allocator<char> const&) (this=3D0xbffff560, >> __s=3D0x80516e0 "<?xml version=3D\"1.0\"=20 >>encoding=3D\"ISO-8859-1\"?>\n<!DOCTYPE GEDCOM SYSTEM=20 >>\"gedcom60.dtd\">\n<!--Generated by libgedcomparser - part of GHOSTS=20 >>project-->\n<GEDCOM>blah ?</GEDCOM>\n", __n=3D4294967295, >> __a=3D@0xbffff500) at /usr/include/g++-v3/bits/basic_string.h:666 >=20 >=20 > That seems to be because __n =3D std::string::npos =3D size_t(-1) in fr= ame #9, > which must be passed by: >=20 >=20 >>#10 0x401c40c4 in Glib::ustring::ustring(char const*, unsigned)=20 >>(this=3D0xbffff560, >> src=3D0x80516e0 "<?xml version=3D\"1.0\"=20 >>encoding=3D\"ISO-8859-1\"?>\n<!DOCTYPE GEDCOM SYSTEM=20 >>\"gedcom60.dtd\">\n<!--Generated by libgedcomparser - part of GHOSTS=20 >>project-->\n<GEDCOM>blah ?</GEDCOM>\n", n=3D169) >> at /usr/include/g++-v3/bits/stl_alloc.h:571 >=20 >=20 > n =3D 169 in that frame, but Glib::ustring seems to pass -1 to the > std::string ctor. That looks as though utf8_byte_offset(src,n) returns > -1, which I think means there was an invalid UTF8 character in the > string. Is the data you pass to the string correctly UTF8-encoded? > The gdb output shows the '=EF=BF=BD' wasn't converted from latin1 to ut= f-8. >=20 >=20 >>#11 0x40025d27 in xmlpp::Document::do_write_to_string(Glib::ustring=20 >>const&, bool) (this=3D0xbffff6a0, >> encoding=3D@0xbffff5d0, format=3Dtrue) at document.cc:309 >=20 >=20 > jon >=20 Thanks jon I understand your comments, but I don't understand how to correctly=20 UTF-8-encode my string. I thought my conversion was ok since - I use the ConvertInput function provided in the libxml2 code samples - I successfully write the xml data into a file using=20 Document::write_to_file_formatted |