From: Eric W. <war...@us...> - 2001-10-18 20:57:02
|
Update of /cvsroot/gaim/gaim/src/protocols/gg In directory usw-pr-cvs1:/tmp/cvs-serv14705/src/protocols/gg Modified Files: .cvsignore Makefile.am gg.c Added Files: iconv_string.c iconv_string.h Log Message: fun stuff. --- NEW FILE: iconv_string.c --- /* Copyright (C) 1999-2001 Bruno Haible. This file is not part of the GNU LIBICONV Library. This file is put into the public domain. */ #ifdef HAVE_CONFIG_H #include <config.h> #endif #ifdef HAVE_ICONV #include "iconv_string.h" #include <iconv.h> #include <errno.h> #include <stdlib.h> #include <string.h> #define tmpbufsize 4096 int iconv_string (const char* tocode, const char* fromcode, const char* start, const char* end, char** resultp, size_t* lengthp) { iconv_t cd = iconv_open(tocode,fromcode); size_t length; char* result; if (cd == (iconv_t)(-1)) { if (errno != EINVAL) return -1; /* Unsupported fromcode or tocode. Check whether the caller requested autodetection. */ if (!strcmp(fromcode,"autodetect_utf8")) { int ret; /* Try UTF-8 first. There are very few ISO-8859-1 inputs that would be valid UTF-8, but many UTF-8 inputs are valid ISO-8859-1. */ ret = iconv_string(tocode,"UTF-8",start,end,resultp,lengthp); if (!(ret < 0 && errno == EILSEQ)) return ret; ret = iconv_string(tocode,"ISO-8859-1",start,end,resultp,lengthp); return ret; } if (!strcmp(fromcode,"autodetect_jp")) { int ret; /* Try 7-bit encoding first. If the input contains bytes >= 0x80, it will fail. */ ret = iconv_string(tocode,"ISO-2022-JP-2",start,end,resultp,lengthp); if (!(ret < 0 && errno == EILSEQ)) return ret; /* Try EUC-JP next. Short SHIFT_JIS inputs may come out wrong. This is unavoidable. People will condemn SHIFT_JIS. If we tried SHIFT_JIS first, then some short EUC-JP inputs would come out wrong, and people would condemn EUC-JP and Unix, which would not be good. */ ret = iconv_string(tocode,"EUC-JP",start,end,resultp,lengthp); if (!(ret < 0 && errno == EILSEQ)) return ret; /* Finally try SHIFT_JIS. */ ret = iconv_string(tocode,"SHIFT_JIS",start,end,resultp,lengthp); return ret; } if (!strcmp(fromcode,"autodetect_kr")) { int ret; /* Try 7-bit encoding first. If the input contains bytes >= 0x80, it will fail. */ ret = iconv_string(tocode,"ISO-2022-KR",start,end,resultp,lengthp); if (!(ret < 0 && errno == EILSEQ)) return ret; /* Finally try EUC-KR. */ ret = iconv_string(tocode,"EUC-KR",start,end,resultp,lengthp); return ret; } errno = EINVAL; return -1; } /* Determine the length we need. */ { size_t count = 0; char tmpbuf[tmpbufsize]; const char* inptr = start; size_t insize = end-start; while (insize > 0) { char* outptr = tmpbuf; size_t outsize = tmpbufsize; size_t res = iconv(cd,&inptr,&insize,&outptr,&outsize); if (res == (size_t)(-1)) { if (errno == EINVAL) break; else { int saved_errno = errno; iconv_close(cd); errno = saved_errno; return -1; } } count += outptr-tmpbuf; } { char* outptr = tmpbuf; size_t outsize = tmpbufsize; size_t res = iconv(cd,NULL,NULL,&outptr,&outsize); if (res == (size_t)(-1)) { int saved_errno = errno; iconv_close(cd); errno = saved_errno; return -1; } count += outptr-tmpbuf; } length = count; } if (lengthp != NULL) *lengthp = length; if (resultp == NULL) { iconv_close(cd); return 0; } result = (*resultp == NULL ? malloc(length) : realloc(*resultp,length)); *resultp = result; if (length == 0) { iconv_close(cd); return 0; } if (result == NULL) { iconv_close(cd); errno = ENOMEM; return -1; } iconv(cd,NULL,NULL,NULL,NULL); /* return to the initial state */ /* Do the conversion for real. */ { const char* inptr = start; size_t insize = end-start; char* outptr = result; size_t outsize = length; while (insize > 0) { size_t res = iconv(cd,&inptr,&insize,&outptr,&outsize); if (res == (size_t)(-1)) { if (errno == EINVAL) break; else { int saved_errno = errno; iconv_close(cd); errno = saved_errno; return -1; } } } { size_t res = iconv(cd,NULL,NULL,&outptr,&outsize); if (res == (size_t)(-1)) { int saved_errno = errno; iconv_close(cd); errno = saved_errno; return -1; } } if (outsize != 0) abort(); } iconv_close(cd); return 0; } #endif --- NEW FILE: iconv_string.h --- /* Copyright (C) 1999-2001 Bruno Haible. This file is not part of the GNU LIBICONV Library. This file is put into the public domain. */ /* * This C function converts an entire string from one encoding to another, * using iconv. Easier to use than iconv() itself, and supports autodetect * encodings on input. * * int iconv_string (const char* tocode, const char* fromcode, * const char* start, const char* end, * char** resultp, size_t* lengthp) * * Converts a memory region given in encoding FROMCODE to a new memory * region in encoding TOCODE. FROMCODE and TOCODE are as for iconv_open(3), * except that FROMCODE may be one of the values * "autodetect_utf8" supports ISO-8859-1 and UTF-8 * "autodetect_jp" supports EUC-JP, ISO-2022-JP-2 and SHIFT_JIS * "autodetect_kr" supports EUC-KR and ISO-2022-KR * The input is in the memory region between start (inclusive) and end * (exclusive). If resultp is not NULL, the output string is stored in * *resultp; malloc/realloc is used to allocate the result. * * This function does not treat zero characters specially. * * Return value: 0 if successful, otherwise -1 and errno set. Particular * errno values: EILSEQ and ENOMEM. * * Example: * const char* s = ...; * char* result = NULL; * if (iconv_string("UCS-4-INTERNAL", "autodetect_utf8", * s, s+strlen(s)+1, &result, NULL) < 0) * perror("iconv_string"); * */ #include <stddef.h> #ifdef __cplusplus extern "C" { #endif extern int iconv_string (const char* tocode, const char* fromcode, const char* start, const char* end, char** resultp, size_t* lengthp); #ifdef __cplusplus } #endif Index: .cvsignore =================================================================== RCS file: /cvsroot/gaim/gaim/src/protocols/gg/.cvsignore,v retrieving revision 1.1 retrieving revision 1.2 diff -u -d -r1.1 -r1.2 --- .cvsignore 2001/09/29 23:06:30 1.1 +++ .cvsignore 2001/10/18 20:56:59 1.2 @@ -5,3 +5,4 @@ gg.lo libgg.la libgg.lo +iconv_string.lo Index: Makefile.am =================================================================== RCS file: /cvsroot/gaim/gaim/src/protocols/gg/Makefile.am,v retrieving revision 1.1 retrieving revision 1.2 diff -u -d -r1.1 -r1.2 --- Makefile.am 2001/09/29 23:06:30 1.1 +++ Makefile.am 2001/10/18 20:56:59 1.2 @@ -13,6 +13,8 @@ libgg_a_SOURCES = libgg.c \ libgg.h \ + iconv_string.c \ + iconv_string.h \ gg.c else @@ -23,6 +25,8 @@ libgg_la_SOURCES = libgg.c \ libgg.h \ + iconv_string.c \ + iconv_string.h \ gg.c endif Index: gg.c =================================================================== RCS file: /cvsroot/gaim/gaim/src/protocols/gg/gg.c,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- gg.c 2001/10/16 23:24:34 1.4 +++ gg.c 2001/10/18 20:56:59 1.5 @@ -20,7 +20,9 @@ * */ +#ifdef HAVE_CONFIG_H #include <config.h> +#endif #include <netdb.h> #include <unistd.h> @@ -40,6 +42,7 @@ #endif #ifdef HAVE_ICONV #include <iconv.h> +#include "iconv_string.h" #endif /* Library from EKG (Eksperymentalny Klient Gadu-Gadu) */ #include "libgg.h" @@ -89,35 +92,12 @@ static gchar *charset_convert(const gchar *locstr, char *encsrc, char *encdst) { + gchar *result = NULL; #ifdef HAVE_ICONV - gchar *dststr; - size_t loclen, dstlen; - gchar *fsave, *tsave; - size_t count; - static iconv_t cd = (iconv_t)(-1); - - if (cd == (iconv_t)(-1)) { - cd = iconv_open(encdst, encsrc); - if (cd == (iconv_t)(-1)) { - return g_strdup(locstr); - } - } - - loclen = strlen(locstr); - /* we are ready for multibyte conversions */ - dstlen = MB_LEN_MAX * loclen; - dststr = g_new0(gchar, dstlen + 1); - fsave = (gchar *)locstr; - tsave = dststr; - count = iconv(cd, &fsave, &loclen, &tsave, &dstlen); - if (count == -1) { - g_free(dststr); - return g_strdup(locstr); - } - return dststr; -#else - return g_strdup(locstr); + if (iconv_string(encdst, encsrc, locstr, locstr+strlen(locstr)+1, &result, NULL) < 0) #endif + return g_strdup(locstr); + return result; } static gboolean invalid_uin(char *uin) |