00001
00002 #include "define.h"
00003
00004 static int unicode_up = 0;
00005 static iconv_t i16to8;
00006 static const char *target_charset = NULL;
00007 static int target_open_from = 0;
00008 static int target_open_to = 0;
00009 static iconv_t i8totarget = (iconv_t)-1;
00010 static iconv_t target2i8 = (iconv_t)-1;
00011
00012
00013 #define ASSERT(x,...) { if( !(x) ) DIE(( __VA_ARGS__)); }
00014
00015
00018 static void pst_vbresize(pst_vbuf *vb, size_t len);
00019 static void pst_vbresize(pst_vbuf *vb, size_t len)
00020 {
00021 vb->dlen = 0;
00022
00023 if (vb->blen >= len) {
00024 vb->b = vb->buf;
00025 return;
00026 }
00027
00028 vb->buf = realloc(vb->buf, len);
00029 vb->b = vb->buf;
00030 vb->blen = len;
00031 }
00032
00033
00034 static size_t pst_vbavail(pst_vbuf * vb);
00035 static size_t pst_vbavail(pst_vbuf * vb)
00036 {
00037 return vb->blen - vb->dlen - (size_t)(vb->b - vb->buf);
00038 }
00039
00040
00041 static void open_targets(const char* charset);
00042 static void open_targets(const char* charset)
00043 {
00044 if (!target_charset || strcasecmp(target_charset, charset)) {
00045 if (target_open_from) iconv_close(i8totarget);
00046 if (target_open_to) iconv_close(target2i8);
00047 if (target_charset) free((char *)target_charset);
00048 target_charset = strdup(charset);
00049 target_open_from = 1;
00050 target_open_to = 1;
00051 i8totarget = iconv_open(target_charset, "utf-8");
00052 if (i8totarget == (iconv_t)-1) {
00053 target_open_from = 0;
00054 DEBUG_WARN(("Couldn't open iconv descriptor for utf-8 to %s.\n", target_charset));
00055 }
00056 target2i8 = iconv_open("utf-8", target_charset);
00057 if (target2i8 == (iconv_t)-1) {
00058 target_open_to = 0;
00059 DEBUG_WARN(("Couldn't open iconv descriptor for %s to utf-8.\n", target_charset));
00060 }
00061 }
00062 }
00063
00064
00065 static size_t sbcs_conversion(pst_vbuf *dest, const char *inbuf, int iblen, iconv_t conversion);
00066 static size_t sbcs_conversion(pst_vbuf *dest, const char *inbuf, int iblen, iconv_t conversion)
00067 {
00068 size_t inbytesleft = iblen;
00069 size_t icresult = (size_t)-1;
00070 size_t outbytesleft = 0;
00071 char *outbuf = NULL;
00072 int myerrno;
00073
00074 DEBUG_ENT("sbcs_conversion");
00075 pst_vbresize(dest, 2*iblen);
00076
00077 do {
00078 outbytesleft = dest->blen - dest->dlen;
00079 outbuf = dest->b + dest->dlen;
00080 icresult = iconv(conversion, (ICONV_CONST char**)&inbuf, &inbytesleft, &outbuf, &outbytesleft);
00081 myerrno = errno;
00082 dest->dlen = outbuf - dest->b;
00083 if (inbytesleft) pst_vbgrow(dest, 2*inbytesleft);
00084 } while ((size_t)-1 == icresult && E2BIG == myerrno);
00085
00086 if (icresult == (size_t)-1) {
00087 DEBUG_WARN(("iconv failure: %s\n", strerror(myerrno)));
00088 pst_unicode_init();
00089 DEBUG_RET();
00090 return (size_t)-1;
00091 }
00092 DEBUG_RET();
00093 return 0;
00094 }
00095
00096
00097 static void pst_unicode_close();
00098 static void pst_unicode_close()
00099 {
00100 iconv_close(i16to8);
00101 if (target_open_from) iconv_close(i8totarget);
00102 if (target_open_to) iconv_close(target2i8);
00103 if (target_charset) free((char *)target_charset);
00104 target_charset = NULL;
00105 target_open_from = 0;
00106 target_open_to = 0;
00107 unicode_up = 0;
00108 }
00109
00110
00111 static int utf16_is_terminated(const char *str, int length);
00112 static int utf16_is_terminated(const char *str, int length)
00113 {
00114 int len = -1;
00115 int i;
00116 for (i = 0; i < length; i += 2) {
00117 if (str[i] == 0 && str[i + 1] == 0) {
00118 len = i;
00119 }
00120 }
00121
00122 if (len == -1) {
00123 DEBUG_WARN(("utf16 string is not zero terminated\n"));
00124 }
00125
00126 return (len == -1) ? 0 : 1;
00127 }
00128
00129
00130 pst_vbuf *pst_vballoc(size_t len)
00131 {
00132 pst_vbuf *result = pst_malloc(sizeof(pst_vbuf));
00133 if (result) {
00134 result->dlen = 0;
00135 result->blen = 0;
00136 result->buf = NULL;
00137 pst_vbresize(result, len);
00138 }
00139 else DIE(("malloc() failure"));
00140 return result;
00141 }
00142
00143
00146 void pst_vbgrow(pst_vbuf *vb, size_t len)
00147 {
00148 if (0 == len)
00149 return;
00150
00151 if (0 == vb->blen) {
00152 pst_vbresize(vb, len);
00153 return;
00154 }
00155
00156 if (vb->dlen + len > vb->blen) {
00157 if (vb->dlen + len < vb->blen * 1.5)
00158 len = vb->blen * 1.5;
00159 char *nb = pst_malloc(vb->blen + len);
00160 if (!nb) DIE(("malloc() failure"));
00161 vb->blen = vb->blen + len;
00162 memcpy(nb, vb->b, vb->dlen);
00163
00164 free(vb->buf);
00165 vb->buf = nb;
00166 vb->b = vb->buf;
00167 } else {
00168 if (vb->b != vb->buf)
00169 memcpy(vb->buf, vb->b, vb->dlen);
00170 }
00171
00172 vb->b = vb->buf;
00173
00174 ASSERT(pst_vbavail(vb) >= len, "vbgrow(): I have failed in my mission.");
00175 }
00176
00177
00180 void pst_vbset(pst_vbuf * vb, void *b, size_t len)
00181 {
00182 pst_vbresize(vb, len);
00183 memcpy(vb->b, b, len);
00184 vb->dlen = len;
00185 }
00186
00187
00190 void pst_vbappend(pst_vbuf *vb, void *b, size_t len)
00191 {
00192 if (0 == vb->dlen) {
00193 pst_vbset(vb, b, len);
00194 return;
00195 }
00196 pst_vbgrow(vb, len);
00197 memcpy(vb->b + vb->dlen, b, len);
00198 vb->dlen += len;
00199 }
00200
00201
00202 void pst_unicode_init()
00203 {
00204 if (unicode_up) pst_unicode_close();
00205 i16to8 = iconv_open("utf-8", "utf-16le");
00206 if (i16to8 == (iconv_t)-1) {
00207 DEBUG_WARN(("Couldn't open iconv descriptor for utf-16le to utf-8.\n"));
00208 }
00209 unicode_up = 1;
00210 }
00211
00212
00213 size_t pst_vb_utf16to8(pst_vbuf *dest, const char *inbuf, int iblen)
00214 {
00215 size_t inbytesleft = iblen;
00216 size_t icresult = (size_t)-1;
00217 size_t outbytesleft = 0;
00218 char *outbuf = NULL;
00219 int myerrno;
00220
00221 if (!unicode_up) return (size_t)-1;
00222 pst_vbresize(dest, iblen);
00223
00224
00225 if (!utf16_is_terminated(inbuf, iblen))
00226 return (size_t)-1;
00227
00228 do {
00229 outbytesleft = dest->blen - dest->dlen;
00230 outbuf = dest->b + dest->dlen;
00231 icresult = iconv(i16to8, (ICONV_CONST char**)&inbuf, &inbytesleft, &outbuf, &outbytesleft);
00232 myerrno = errno;
00233 dest->dlen = outbuf - dest->b;
00234 if (inbytesleft) pst_vbgrow(dest, inbytesleft);
00235 } while ((size_t)-1 == icresult && E2BIG == myerrno);
00236
00237 if (icresult == (size_t)-1) {
00238 DEBUG_WARN(("iconv failure: %s\n", strerror(myerrno)));
00239 pst_unicode_init();
00240 return (size_t)-1;
00241 }
00242 return (icresult) ? (size_t)-1 : 0;
00243 }
00244
00245
00246 size_t pst_vb_utf8to8bit(pst_vbuf *dest, const char *inbuf, int iblen, const char* charset)
00247 {
00248 open_targets(charset);
00249 if (!target_open_from) return (size_t)-1;
00250 return sbcs_conversion(dest, inbuf, iblen, i8totarget);
00251 }
00252
00253
00254 size_t pst_vb_8bit2utf8(pst_vbuf *dest, const char *inbuf, int iblen, const char* charset)
00255 {
00256 open_targets(charset);
00257 if (!target_open_to) return (size_t)-1;
00258 return sbcs_conversion(dest, inbuf, iblen, target2i8);
00259 }
00260