From d4413627e68325e954406e35c76b51325bb0e5d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C5=99emysl=20Janouch?= Date: Fri, 24 Apr 2015 22:34:44 +0200 Subject: [PATCH] degesch: better & working text wrapping Now we respect word boundaries. --- degesch.c | 63 ++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 49 insertions(+), 14 deletions(-) diff --git a/degesch.c b/degesch.c index 962b587..1808de2 100644 --- a/degesch.c +++ b/degesch.c @@ -2398,15 +2398,15 @@ irc_process_message (const struct irc_message *msg, // --- Message autosplitting magic --------------------------------------------- +// This is the most basic acceptable algorithm; something like ICU with proper +// locale specification would be needed to make it work better. + static bool wrap_text (const char *message, int line_max, struct str_vector *output, struct error **e) { - // Attempt to split the message if it doesn't completely fit into a single - // IRC protocol message while trying not to break UTF-8. Unicode can still - // end up being wrong, though. As well as any mIRC formatting. - // - // TODO: at least try to word-wrap if nothing else + // Initialize to the first word, even if it's empty + const char *word_end = message + strcspn (message, " "); for (int message_left = strlen (message); message_left; ) { @@ -2415,12 +2415,46 @@ wrap_text (const char *message, int part_left = MIN (line_max, message_left); bool empty = true; + + // First try going word by word + const char *word_start; + int word_len = word_end - message; + while (part_left && word_len <= part_left) + { + if (word_len) + { + str_append_data (&m, message, word_len); + message += word_len; + message_left -= word_len; + part_left -= word_len; + empty = false; + } + + // Find the next word's end + word_start = message + strspn (message, " "); + word_end = word_start + strcspn (word_start, " "); + word_len = word_end - message; + } + + if (!empty) + { + // Discard whitespace between words if split + message_left -= word_start - message; + message = word_start; + + str_vector_add (output, m.str); + str_free (&m); + continue; + } + + // And if that doesn't help, cut the longest valid block of characters. + // Note that we never get to the end of the word, so "word_end" stays. while (true) { const char *next = utf8_next (message, message_left); hard_assert (next); - int char_len = message - next; + int char_len = next - message; if (char_len > part_left) break; @@ -2428,6 +2462,7 @@ wrap_text (const char *message, message += char_len; message_left -= char_len; + part_left -= char_len; empty = false; } @@ -2436,14 +2471,14 @@ wrap_text (const char *message, str_free (&m); - if (empty) - { - // Well, that's just weird - error_set (e, - "Message splitting was unsuccessful as there was " - "too little room for UTF-8 characters"); - return false; - } + if (!empty) + continue; + + // Well, that's just weird + error_set (e, + "Message splitting was unsuccessful as there was " + "too little room for UTF-8 characters"); + return false; } return true; }