degesch: fix crash on invalid cp1252 characters

We don't even really need iconv here.
This commit is contained in:
Přemysl Eric Janouch 2016-04-03 04:05:04 +02:00
parent 695d615225
commit 3a8d70de66
1 changed files with 36 additions and 14 deletions

View File

@ -1973,7 +1973,6 @@ struct app_context
iconv_t term_to_utf8; ///< Terminal encoding to UTF-8 iconv_t term_to_utf8; ///< Terminal encoding to UTF-8
iconv_t term_from_utf8; ///< UTF-8 to terminal encoding iconv_t term_from_utf8; ///< UTF-8 to terminal encoding
iconv_t latin1_to_utf8; ///< ISO Latin 1 to UTF-8
struct input *input; ///< User interface struct input *input; ///< User interface
@ -2054,12 +2053,9 @@ app_context_init (struct app_context *self)
self->backlog_limit = 1000; self->backlog_limit = 1000;
self->last_displayed_msg_time = time (NULL); self->last_displayed_msg_time = time (NULL);
// Windows 1252 redefines several silly control characters as glyphs
char *native = nl_langinfo (CODESET); char *native = nl_langinfo (CODESET);
if (!app_iconv_open (&self->term_from_utf8, native, "UTF-8") if (!app_iconv_open (&self->term_from_utf8, native, "UTF-8")
|| !app_iconv_open (&self->term_to_utf8, "UTF-8", native) || !app_iconv_open (&self->term_to_utf8, "UTF-8", native))
|| (!app_iconv_open (&self->latin1_to_utf8, "UTF-8", "WINDOWS-1252")
&& !app_iconv_open (&self->latin1_to_utf8, "UTF-8", "ISO-8859-1")))
exit_fatal ("creating the UTF-8 conversion object failed: %s", exit_fatal ("creating the UTF-8 conversion object failed: %s",
strerror (errno)); strerror (errno));
@ -2100,7 +2096,6 @@ app_context_free (struct app_context *self)
str_map_free (&self->servers); str_map_free (&self->servers);
poller_free (&self->poller); poller_free (&self->poller);
iconv_close (self->latin1_to_utf8);
iconv_close (self->term_from_utf8); iconv_close (self->term_from_utf8);
iconv_close (self->term_to_utf8); iconv_close (self->term_to_utf8);
@ -2915,14 +2910,41 @@ irc_skip_statusmsg (struct server *s, const char *target)
// As of 2015, everything should be in UTF-8. And if it's not, we'll decode it // As of 2015, everything should be in UTF-8. And if it's not, we'll decode it
// as ISO Latin 1. This function should not be called on the whole message. // as ISO Latin 1. This function should not be called on the whole message.
static char * static char *
irc_to_utf8 (struct app_context *ctx, const char *text) irc_to_utf8 (const char *text)
{ {
if (!text) if (!text)
return NULL; return NULL;
size_t len = strlen (text) + 1; size_t len = strlen (text) + 1;
if (utf8_validate (text, len)) if (utf8_validate (text, len))
return xstrdup (text); return xstrdup (text);
return iconv_xstrdup (ctx->latin1_to_utf8, (char *) text, len, NULL);
// Windows 1252 redefines several silly C1 control characters as glyphs
static const char *c1[32] =
{
"\xe2\x82\xac", "\xc2\x81", "\xe2\x80\x9a", "\xc6\x92",
"\xe2\x80\x9e", "\xe2\x80\xa6", "\xe2\x80\xa0", "\xe2\x80\xa1",
"\xcb\x86", "\xe2\x80\xb0", "\xc5\xa0", "\xe2\x80\xb9",
"\xc5\x92", "\xc2\x8d", "\xc5\xbd", "\xc2\x8f",
"\xc2\x90", "\xe2\x80\x98", "\xe2\x80\x99", "\xe2\x80\x9c",
"\xe2\x80\x9d", "\xe2\x80\xa2", "\xe2\x80\x93", "\xe2\x80\x94",
"\xcb\x9c", "\xe2\x84\xa2", "\xc5\xa1", "\xe2\x80\xba",
"\xc5\x93", "\xc2\x9d", "\xc5\xbe", "\xc5\xb8",
};
struct str s;
str_init (&s);
for (const char *p = text; *p; p++)
{
int c = *(unsigned char *) p;
if (c < 0x80)
str_append_c (&s, c);
else if (c < 0xA0)
str_append (&s, c1[c & 0x1f]);
else
str_append_data (&s,
(char[]) {0xc0 | (c >> 6), 0x80 | (c & 0x3f)}, 2);
}
return str_steal (&s);
} }
// This function is used to output debugging IRC traffic to the terminal. // This function is used to output debugging IRC traffic to the terminal.
@ -2931,7 +2953,7 @@ irc_to_utf8 (struct app_context *ctx, const char *text)
static char * static char *
irc_to_term (struct app_context *ctx, const char *text) irc_to_term (struct app_context *ctx, const char *text)
{ {
char *utf8 = irc_to_utf8 (ctx, text); char *utf8 = irc_to_utf8 (text);
char *term = iconv_xstrdup (ctx->term_from_utf8, utf8, -1, NULL); char *term = iconv_xstrdup (ctx->term_from_utf8, utf8, -1, NULL);
free (utf8); free (utf8);
return term; return term;
@ -3096,7 +3118,7 @@ formatter_parse_nick (struct formatter *self, char *s)
// which would also make us not cut off the userhost part, ever // which would also make us not cut off the userhost part, ever
if (irc_is_channel (self->s, irc_skip_statusmsg (self->s, s))) if (irc_is_channel (self->s, irc_skip_statusmsg (self->s, s)))
{ {
char *tmp = irc_to_utf8 (self->ctx, s); char *tmp = irc_to_utf8 (s);
FORMATTER_ADD_TEXT (self, tmp); FORMATTER_ADD_TEXT (self, tmp);
free (tmp); free (tmp);
return; return;
@ -3120,7 +3142,7 @@ formatter_parse_nick (struct formatter *self, char *s)
FORMATTER_ADD_ITEM (self, FG_COLOR, .color = color); FORMATTER_ADD_ITEM (self, FG_COLOR, .color = color);
char *x = irc_to_utf8 (self->ctx, nick); char *x = irc_to_utf8 (nick);
free (nick); free (nick);
FORMATTER_ADD_TEXT (self, x); FORMATTER_ADD_TEXT (self, x);
free (x); free (x);
@ -3141,7 +3163,7 @@ formatter_parse_nick_full (struct formatter *self, char *s)
FORMATTER_ADD_TEXT (self, " ("); FORMATTER_ADD_TEXT (self, " (");
FORMATTER_ADD_ITEM (self, ATTR, .attribute = ATTR_USERHOST); FORMATTER_ADD_ITEM (self, ATTR, .attribute = ATTR_USERHOST);
char *x = irc_to_utf8 (self->ctx, userhost); char *x = irc_to_utf8 (userhost);
FORMATTER_ADD_TEXT (self, x); FORMATTER_ADD_TEXT (self, x);
free (x); free (x);
@ -3181,12 +3203,12 @@ restart:
break; break;
case 'S': case 'S':
tmp = irc_to_utf8 (self->ctx, (s = va_arg (*ap, char *))); tmp = irc_to_utf8 ((s = va_arg (*ap, char *)));
str_append (buf, tmp); str_append (buf, tmp);
free (tmp); free (tmp);
break; break;
case 'm': case 'm':
tmp = irc_to_utf8 (self->ctx, (s = va_arg (*ap, char *))); tmp = irc_to_utf8 ((s = va_arg (*ap, char *)));
formatter_parse_mirc (self, tmp); formatter_parse_mirc (self, tmp);
free (tmp); free (tmp);
break; break;