From 410bcdcd78a5b060610cdaed5d6815de7681ae01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C5=99emysl=20Janouch?= Date: Mon, 21 Mar 2016 00:30:59 +0100 Subject: [PATCH] degesch: phase 1 of word wrapping implementation --- NEWS | 2 + degesch.c | 336 ++++++++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 293 insertions(+), 45 deletions(-) diff --git a/NEWS b/NEWS index 431d85a..2c88bf2 100644 --- a/NEWS +++ b/NEWS @@ -23,6 +23,8 @@ * degesch: added --format for previewing things like MOTD files + * degesch: implemented word wrapping in buffers + * kike: add support for IRCv3.2 server-time * ZyklonB: plugins now run in a dedicated data directory diff --git a/degesch.c b/degesch.c index dcad0b2..3e25caf 100644 --- a/degesch.c +++ b/degesch.c @@ -1937,6 +1937,7 @@ struct app_context bool beep_on_highlight; ///< Beep on highlight bool logging; ///< Logging to file enabled bool show_all_prefixes; ///< Show all prefixes before nicks + bool word_wrapping; ///< Enable simple word wrapping struct str_map servers; ///< Our servers @@ -2138,6 +2139,7 @@ static void on_config_logging_change (struct config_item *item); TRIVIAL_BOOLEAN_ON_CHANGE (isolate_buffers) TRIVIAL_BOOLEAN_ON_CHANGE (beep_on_highlight) +TRIVIAL_BOOLEAN_ON_CHANGE (word_wrapping) // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -2313,6 +2315,11 @@ static struct config_schema g_config_behaviour[] = .type = CONFIG_ITEM_BOOLEAN, .default_ = "off", .on_change = on_config_show_all_prefixes_change }, + { .name = "word_wrapping", + .comment = "Enable simple word wrapping in buffers", + .type = CONFIG_ITEM_BOOLEAN, + .default_ = "on", + .on_change = on_config_word_wrapping_change }, { .name = "logging", .comment = "Log buffer contents to file", .type = CONFIG_ITEM_BOOLEAN, @@ -3253,29 +3260,224 @@ formatter_add (struct formatter *self, const char *format, ...) va_end (ap); } +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +struct line_char_attrs +{ + int named; ///< Named attribute or -1 + int text; ///< Text attributes + int fg; ///< Foreground color (-1 for default) + int bg; ///< Background color (-1 for default) +}; + +struct line_char +{ + LIST_HEADER (struct line_char) + + char bytes[MB_LEN_MAX]; ///< The character + size_t len; ///< Length of the character in bytes + wchar_t wide; ///< The character as a wchar_t + int width; ///< Width of the character in cells + struct line_char_attrs attrs; ///< Attributes +}; + +static struct line_char * +line_char_new (const char *mb, size_t mb_len, wchar_t wc) +{ + struct line_char *self = xcalloc (1, sizeof *self); + memcpy (self->bytes, mb, (self->len = MIN (mb_len, sizeof self->bytes))); + self->width = wcwidth ((self->wide = wc)); + + self->attrs.bg = self->attrs.fg = -1; + self->attrs.named = ATTR_RESET; + return self; +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +struct line_wrap_mark +{ + struct line_char *start; ///< First character + int used; ///< Display cells used +}; + +static void +line_wrap_mark_push (struct line_wrap_mark *mark, struct line_char *c) +{ + if (!mark->start) + mark->start = c; + mark->used += c->width; +} + +struct line_wrap_state +{ + struct line_char *result; ///< Head of result + struct line_char *result_tail; ///< Tail of result + + int line_used; ///< Line length before marks + int line_max; ///< Maximum line length + struct line_wrap_mark chunk; ///< All buffered text + struct line_wrap_mark overflow; ///< Overflowing text +}; + +static void +line_wrap_flush_split (struct line_wrap_state *s, struct line_wrap_mark *before) +{ + struct line_char *nl = line_char_new ("\n", 1, L'\n'); + LIST_INSERT_WITH_TAIL (s->result, s->result_tail, nl, before->start); + s->line_used = before->used; +} + +static void +line_wrap_flush (struct line_wrap_state *s, bool force_split) +{ + if (!s->overflow.start) + s->line_used += s->chunk.used; + else if (force_split || s->chunk.used > s->line_max) + { +#ifdef WRAP_UNNECESSARILY + // Use the entire line and split the chunk in the middle + line_wrap_flush_split (s, &s->overflow); +#else + // We don't actually _need_ to split here, and doing so will break + // link searching mechanisms in some terminals + s->line_used = s->overflow.used; +#endif + } + else + // Print the chunk in its entirety on a new line + line_wrap_flush_split (s, &s->chunk); + + memset (&s->chunk, 0, sizeof s->chunk); + memset (&s->overflow, 0, sizeof s->overflow); +} + +static void +line_wrap_nl (struct line_wrap_state *s) +{ + line_wrap_flush (s, true); + struct line_char *nl = line_char_new ("\n", 1, L'\n'); + LIST_APPEND_WITH_TAIL (s->result, s->result_tail, nl); + s->line_used = 0; +} + +static void +line_wrap_tab (struct line_wrap_state *s, struct line_char *c) +{ + line_wrap_flush (s, true); + if (s->line_used >= s->line_max) + line_wrap_nl (s); + + // Compute the number of characters needed to get to the next tab stop + int tab_width = ((s->line_used + 8) & ~7) - s->line_used; + // On overflow just fill the rest of the line with spaces + if (s->line_used + tab_width > s->line_max) + tab_width = s->line_max - s->line_used; + + s->line_used += tab_width; + while (tab_width--) + { + struct line_char *space = line_char_new (" ", 1, L' '); + space->attrs = c->attrs; + LIST_APPEND_WITH_TAIL (s->result, s->result_tail, space); + } +} + +static void +line_wrap_push_char (struct line_wrap_state *s, struct line_char *c) +{ + // Note that when processing whitespace here, any non-WS chunk has already + // been flushed, and thus it matters little if we flush with force split + if (wcschr (L"\r\f\v", c->wide)) + /* Skip problematic characters */; + else if (c->wide == L'\n') + line_wrap_nl (s); + else if (c->wide == L'\t') + line_wrap_tab (s, c); + else + goto use_as_is; + free (c); + return; + +use_as_is: + if (s->overflow.start + || s->line_used + s->chunk.used + c->width > s->line_max) + { + if (s->overflow.used + c->width > s->line_max) + { +#ifdef WRAP_UNNECESSARILY + // If the overflow overflows, restart on a new line + line_wrap_nl (s); +#else + // See line_wrap_flush(), we would end up on a new line anyway + line_wrap_flush (s, true); + s->line_used = 0; +#endif + } + else + line_wrap_mark_push (&s->overflow, c); + } + line_wrap_mark_push (&s->chunk, c); + LIST_APPEND_WITH_TAIL (s->result, s->result_tail, c); +} + +/// Basic word wrapping that respects wcwidth(3) and expands tabs. +/// Besides making text easier to read, it also fixes the problem with +/// formatting spilling over the entire new line on line wrap. +static struct line_char * +line_wrap (struct line_char *line, int max_width) +{ + struct line_wrap_state s = { .line_max = max_width }; + bool last_was_word_char = false; + LIST_FOR_EACH (struct line_char, c, line) + { + // Act on the right boundary of (\s*\S+) chunks + bool this_is_word_char = !wcschr (L" \t\r\n\f\v", c->wide); + if (last_was_word_char && !this_is_word_char) + line_wrap_flush (&s, false); + last_was_word_char = this_is_word_char; + + LIST_UNLINK (line, c); + line_wrap_push_char (&s, c); + } + + // Make sure to process the last word and return the modified list + line_wrap_flush (&s, false); + return s.result; +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +struct exploder +{ + struct app_context *ctx; ///< Application context + struct line_char *result; ///< Result + struct line_char *result_tail; ///< Tail of result + struct line_char_attrs attrs; ///< Current attributes +}; + static bool -formatter_flush_attr - (struct attribute_printer *state, struct formatter_item *item) +explode_formatter_attr (struct exploder *self, struct formatter_item *item) { switch (item->type) { case FORMATTER_ITEM_ATTR: - attribute_printer_apply (state, item->attribute); - state->want = 0; - state->want_foreground = -1; - state->want_background = -1; + self->attrs.named = item->attribute; + self->attrs.text = 0; + self->attrs.fg = -1; + self->attrs.bg = -1; return true; case FORMATTER_ITEM_SIMPLE: - state->want ^= item->attribute; - attribute_printer_update (state); + self->attrs.named = -1; + self->attrs.text ^= item->attribute; return true; case FORMATTER_ITEM_FG_COLOR: - state->want_foreground = item->color; - attribute_printer_update (state); + self->attrs.named = -1; + self->attrs.fg = item->color; return true; case FORMATTER_ITEM_BG_COLOR: - state->want_background = item->color; - attribute_printer_update (state); + self->attrs.named = -1; + self->attrs.bg = item->color; return true; default: return false; @@ -3283,51 +3485,103 @@ formatter_flush_attr } static void -formatter_flush_text (struct app_context *ctx, const char *text, FILE *stream) +explode_text (struct exploder *self, const char *text) { - struct str sanitized; - str_init (&sanitized); + size_t term_len = 0; + char *term = iconv_xstrdup (self->ctx->term_from_utf8, + (char *) text, -1, &term_len); - // Throw away any potentially harmful control characters - char *term = iconv_xstrdup (ctx->term_from_utf8, (char *) text, -1, NULL); - for (char *p = term; *p; p++) - if (!strchr ("\a\b\x1b", *p)) - str_append_c (&sanitized, *p); + mbstate_t ps; + memset (&ps, 0, sizeof ps); + + wchar_t wch; + size_t len, processed = 0; + while ((len = mbrtowc (&wch, term + processed, term_len - processed, &ps))) + { + hard_assert (len != (size_t) -2 && len != (size_t) -1); + processed += len; + + // Throw away any potentially harmful control characters + // XXX: this is likely to break shift state encodings + if (wcschr (L"\a\b\x1b", wch)) + continue; + + struct line_char *c = line_char_new (term + processed - len, len, wch); + c->attrs = self->attrs; + LIST_APPEND_WITH_TAIL (self->result, self->result_tail, c); + } free (term); +} - fputs (sanitized.str, stream); - str_free (&sanitized); +static struct line_char * +formatter_to_chars (struct formatter *formatter) +{ + struct exploder self = { .ctx = formatter->ctx }; + self.attrs.fg = self.attrs.bg = self.attrs.named = -1; + + int attribute_ignore = 0; + for (size_t i = 0; i < formatter->items_len; i++) + { + struct formatter_item *iter = &formatter->items[i]; + if (iter->type == FORMATTER_ITEM_TEXT) + explode_text (&self, iter->text); + else if (iter->type == FORMATTER_ITEM_IGNORE_ATTR) + attribute_ignore += iter->attribute; + else if (attribute_ignore <= 0 + && !explode_formatter_attr (&self, iter)) + hard_assert (!"unhandled formatter item type"); + } + return self.result; } static void formatter_flush (struct formatter *self, FILE *stream, bool raw_attributes) { - if (!raw_attributes && !get_attribute_printer (stream)) + struct line_char *line = formatter_to_chars (self); + + if (!get_attribute_printer (stream) && !raw_attributes) { - for (size_t i = 0; i < self->items_len; i++) + LIST_FOR_EACH (struct line_char, c, line) { - struct formatter_item *iter = &self->items[i]; - if (iter->type == FORMATTER_ITEM_TEXT) - fputs (iter->text, stream); + fwrite (c->bytes, c->len, 1, stream); + free (c); } return; } + if (self->ctx->word_wrapping) + line = line_wrap (line, g_terminal.columns); + + // TODO: rewrite the sloppily hacked mess around attribute_printer; + // so far I just didn't want to break everything at once struct attribute_printer state; attribute_printer_init (&state, self->ctx, stream); attribute_printer_reset (&state); - int attribute_ignore = 0; - for (size_t i = 0; i < self->items_len; i++) + struct line_char_attrs attrs = + { .fg = -1, .bg = -1, .named = ATTR_RESET, .text = 0 }; + LIST_FOR_EACH (struct line_char, c, line) { - struct formatter_item *iter = &self->items[i]; - if (iter->type == FORMATTER_ITEM_TEXT) - formatter_flush_text (self->ctx, iter->text, stream); - else if (iter->type == FORMATTER_ITEM_IGNORE_ATTR) - attribute_ignore += iter->attribute; - else if (attribute_ignore <= 0 - && !formatter_flush_attr (&state, iter)) - hard_assert (!"unhandled formatter item type"); + if (attrs.fg != c->attrs.fg + || attrs.bg != c->attrs.bg + || attrs.named != c->attrs.named + || attrs.text != c->attrs.text) + { + if (c->attrs.named != -1) + attribute_printer_apply (&state, c->attrs.named); + else + { + state.want = c->attrs.text; + state.want_foreground = c->attrs.fg; + state.want_background = c->attrs.bg; + attribute_printer_reset (&state); + attribute_printer_update (&state); + } + attrs = c->attrs; + } + + fwrite (c->bytes, c->len, 1, stream); + free (c); } attribute_printer_reset (&state); } @@ -3393,14 +3647,6 @@ buffer_line_flush (struct buffer_line *line, struct formatter *f, FILE *output, for (struct formatter_item *iter = line->items; iter->type; iter++) formatter_add_item (f, *iter); - // XXX: we could reset attributes _before_ the newline. That, however, - // doesn't really work, because when the line wraps at the end of the - // screen and a background colour is set, the terminal paints the entire - // new line with that colour. "clr_to_eol" is not a solution in that - // case either, because it may delete the last character on a non-wrapped - // line, and while we can append an extra space as a workaround, that can - // cause an extra wrap for which I've found no way of avoiding. - // TODO: think about manual line wrapping; that way we can also word wrap formatter_add (f, "\n"); formatter_flush (f, output, raw_attributes); formatter_free (f);