rfc2047: detect partial multibyte sequences and decode them correctly

pull/10/head
Christian Neukirchen 8 years ago
parent 631b6c7c82
commit 27915af60d

@ -126,6 +126,7 @@ int
blaze822_decode_rfc2047(char *dst, char *src, size_t dlen, char *tgtenc) blaze822_decode_rfc2047(char *dst, char *src, size_t dlen, char *tgtenc)
{ {
iconv_t ic = (iconv_t)-1; iconv_t ic = (iconv_t)-1;
char *srcenc = 0;
char *b = src; char *b = src;
@ -134,11 +135,17 @@ blaze822_decode_rfc2047(char *dst, char *src, size_t dlen, char *tgtenc)
if (!s) if (!s)
goto nocodeok; goto nocodeok;
// keep track of partial multibyte sequences
char *partial = 0;
size_t partiallen = 0;
do { do {
char *t; char *t;
t = b; t = b;
while (t < s) // strip space-only inbetween encoded words while (t < s) // strip space-only inbetween encoded words
if (!isfws(*t++)) { if (!isfws(*t++)) {
if (partial) // mixed up encodings
goto nocode;
while (b < s && dlen) { while (b < s && dlen) {
*dst++ = *b++; *dst++ = *b++;
dlen--; dlen--;
@ -156,7 +163,17 @@ blaze822_decode_rfc2047(char *dst, char *src, size_t dlen, char *tgtenc)
goto nocode; goto nocode;
*e = 0; *e = 0;
ic = iconv_open(tgtenc, s); if (!srcenc || strcmp(srcenc, s) != 0) {
if (partial) // mixed up encodings
goto nocode;
free(srcenc);
srcenc = strdup(s);
if (!srcenc)
goto nocode;
if (ic != (iconv_t)-1)
iconv_close(ic);
ic = iconv_open(tgtenc, srcenc);
}
*e = '?'; *e = '?';
e++; e++;
@ -180,23 +197,38 @@ blaze822_decode_rfc2047(char *dst, char *src, size_t dlen, char *tgtenc)
else else
goto nocode; goto nocode;
if (partial) {
dec = realloc(dec, declen + partiallen);
if (!dec)
goto nocode;
memmove(dec + partiallen, dec, declen);
memcpy(dec, partial, partiallen);
declen += partiallen;
free(partial);
partial = 0;
partiallen = 0;
}
decchunk = dec; decchunk = dec;
int r = iconv(ic, &dec, &declen, &dst, &dlen); int r = iconv(ic, &dec, &declen, &dst, &dlen);
if (r < 0) { if (r < 0) {
if (errno == E2BIG) { if (errno == E2BIG) {
iconv_close(ic);
break; break;
} else if (errno == EILSEQ || errno == EINVAL) { } else if (errno == EILSEQ) {
goto nocode; goto nocode;
} else if (errno == EINVAL) {
partial = malloc(declen);
if (!partial)
goto nocode;
memcpy(partial, dec, declen);
partiallen = declen;
} else { } else {
perror("iconv"); perror("iconv");
goto nocode; goto nocode;
} }
} }
iconv_close(ic); while (!partial && declen && dlen) {
while (declen && dlen) {
*dst++ = *dec++; *dst++ = *dec++;
declen--; declen--;
dlen--; dlen--;
@ -214,13 +246,17 @@ blaze822_decode_rfc2047(char *dst, char *src, size_t dlen, char *tgtenc)
*dst = 0; *dst = 0;
if (ic != (iconv_t)-1)
iconv_close(ic);
free(srcenc);
return 1; return 1;
nocode: nocode:
fprintf(stderr, "error decoding rfc2047\n");
if (ic != (iconv_t)-1) if (ic != (iconv_t)-1)
iconv_close(ic); iconv_close(ic);
free(srcenc);
fprintf(stderr, "error decoding rfc2047\n");
nocodeok: nocodeok:
while (*src && dlen) { while (*src && dlen) {
*dst++ = *src++; *dst++ = *src++;
@ -257,6 +293,10 @@ main() {
char test4dec[255]; char test4dec[255];
blaze822_decode_rfc2047(test4dec, test4, sizeof test4dec, "UTF-8"); blaze822_decode_rfc2047(test4dec, test4, sizeof test4dec, "UTF-8");
printf("%s\n", test4dec); printf("%s\n", test4dec);
char test5[] = "=?UTF-8?Q?z=E2=80?= =?UTF-8?Q?=99z?=";
char test5dec[255];
blaze822_decode_rfc2047(test5dec, test5, sizeof test5dec, "UTF-8");
printf("%s\n", test5dec);
} }
#endif #endif

Loading…
Cancel
Save