diff --git a/degesch.c b/degesch.c index 76721e5..10f96af 100644 --- a/degesch.c +++ b/degesch.c @@ -3022,7 +3022,7 @@ irc_skip_statusmsg (struct server *s, const char *target) // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -// As of 2015, everything should be in UTF-8. And if it's not, we'll decode it +// As of 2020, everything should be in UTF-8. And if it's not, we'll decode it // as ISO Latin 1. This function should not be called on the whole message. static char * irc_to_utf8 (const char *text) @@ -7811,9 +7811,30 @@ irc_process_numeric (struct server *s, strv_free (©); } +static void +irc_fix_cut_off_utf8 (char **line) +{ + // A variation on utf8_validate(), we need to detect the -2 return + const char *p = *line, *end = strchr (p, 0); + int32_t codepoint; + while ((codepoint = utf8_decode (&p, end - p)) >= 0 + && codepoint <= 0x10FFFF /* TODO: move this check into a function */) + ; + if (codepoint != -2) + return; + + struct str fixed_up = str_make (); + str_append_data (&fixed_up, *line, p - *line); + str_append (&fixed_up, "\xEF\xBF\xBD" /* U+FFFD */); + cstr_set (line, str_steal (&fixed_up)); +} + static void irc_process_message (const struct irc_message *msg, struct server *s) { + if (msg->params.len) + irc_fix_cut_off_utf8 (&msg->params.vector[msg->params.len - 1]); + // TODO: make use of IRCv3.2 server-time (with fallback to unixtime_msec()) // -> change all calls to log_{server,nick,outcoming,ctcp}*() to take // an extra argument specifying time