From 4ecf8d90dae5ce512aca6787289d5b4a19565c22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C5=99emysl=20Janouch?= Date: Thu, 7 May 2015 05:19:13 +0200 Subject: [PATCH] degesch: halfplement word completion This was a bit painful. --- degesch.c | 314 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 310 insertions(+), 4 deletions(-) diff --git a/degesch.c b/degesch.c index 98e4713..1e82ee3 100644 --- a/degesch.c +++ b/degesch.c @@ -53,6 +53,7 @@ enum #include #include #include +#include #include #ifndef TIOCGWINSZ @@ -5022,6 +5023,268 @@ irc_connect (struct server *s, bool *should_retry, struct error **e) return true; } +// --- Word completion --------------------------------------------------------- + +// The amount of crap that goes into this is truly insane. +// It's mostly because of Editline's total ignorance of this task. + +struct completion_word +{ + size_t start; ///< Offset to start of word + size_t end; ///< Offset to end of word +}; + +struct completion +{ + char *line; ///< The line which is being completed + + struct completion_word *words; ///< Word locations + size_t words_len; ///< Number of words + size_t words_alloc; ///< Number of words allocated + + size_t location; ///< Which word is being completed +}; + +static void +completion_init (struct completion *self) +{ + memset (self, 0, sizeof *self); +} + +static void +completion_free (struct completion *self) +{ + free (self->line); + free (self->words); +} + +static void +completion_add_word (struct completion *self, size_t start, size_t end) +{ + if (!self->words) + self->words = xcalloc ((self->words_alloc = 4), sizeof *self->words); + if (self->words_len == self->words_alloc) + self->words = xrealloc (self->words, (self->words_alloc <<= 1)); + self->words[self->words_len] = (struct completion_word) { start, end }; +} + +static void +completion_parse (struct completion *self, const char *line, size_t len) +{ + self->line = xstrndup (line, len); + + // The first and the last word may be empty + const char *s = self->line; + while (true) + { + const char *start = s; + size_t word_len = strcspn (s, WORD_BREAKING_CHARS); + const char *end = start + word_len; + s = end + strspn (end, WORD_BREAKING_CHARS); + + completion_add_word (self, start - self->line, end - self->line); + if (s == end) + break; + } +} + +static void +completion_locate (struct completion *self, size_t offset) +{ + size_t i = 0; + for (; i < self->words_len; i++) + if (self->words[i].start > offset) + break; + self->location = i - 1; +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +struct utf8_iter +{ + const char *s; ///< String iterator + size_t len; ///< How many bytes remain +}; + +static void +utf8_iter_init (struct utf8_iter *self, const char *s) +{ + self->len = strlen ((self->s = s)); +} + +// TODO: return the value of the codepoint, that will simplify things +static const char * +utf8_iter_next (struct utf8_iter *self, size_t *len) +{ + if (!self->len) + return NULL; + + const char *old = self->s; + if (!soft_assert ((self->s = utf8_next (old, self->len)))) + { + // Invalid UTF-8 + self->len = 0; + return NULL; + } + + self->len -= (*len = self->s - old); + return old; +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +// XXX: this isn't completely right because Unicode, but let's keep it simple. +// At worst it will stop before a combining mark, or fail to compare +// non-ASCII identifiers case-insensitively. + +static size_t +utf8_common_prefix (const char **vector, size_t len) +{ + size_t prefix = 0; + if (!vector || !vector[0]) + return 0; + + struct utf8_iter a[len]; + for (size_t i = 0; i < len; i++) + utf8_iter_init (&a[i], vector[i]); + + size_t ch_len; + const char *ch; + while ((ch = utf8_iter_next (&a[0], &ch_len))) + { + for (size_t i = 1; i < len; i++) + { + size_t other_len; + const char *other = utf8_iter_next (&a[i], &other_len); + + // Not bothering with different length or lowercasing non-ASCII + if (!other || ch_len != other_len + || (ch_len == 1 && tolower_ascii (*ch) != tolower_ascii (*other)) + || (ch_len != 1 && memcmp (ch, other, ch_len))) + return prefix; + } + prefix += ch_len; + } + return prefix; +} + +static char ** +complete_word (struct app_context *ctx, struct completion *data, + const char *word) +{ + // TODO: return a list of matches with the longest common part + // (or a copy of "word" if none) as the first entry + // TODO: if there's only one match, don't bother computing the common part + char **result = xcalloc (2, sizeof *result); + result[0] = xstrdup_printf ("%shue", word); + return result; +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +/// A special wrapper for iconv_xstrdup() that also fixes indexes into the +/// original string to point to the right location in the output. +/// Thanks, Readline! Without you I would have never needed to deal with this. +static char * +locale_to_utf8 (struct app_context *ctx, const char *locale, + int *indexes[], size_t n_indexes) +{ + struct str utf8; str_init (&utf8); + mbstate_t state; memset (&state, 0, sizeof state); + + size_t remaining = strlen (locale) + 1; + const char *p = locale; + + // Reset the shift state, FWIW + (void) iconv (ctx->term_to_utf8, NULL, NULL, NULL, NULL); + + bool fixed[n_indexes]; + memset (fixed, 0, sizeof fixed); + + while (true) + { + size_t len = mbrlen (p, remaining, &state); + + // Incomplete multibyte character or illegal sequence (probably) + if (len == (size_t) -2 + || len == (size_t) -1) + { + str_free (&utf8); + return NULL; + } + + // Convert indexes into the multibyte string to UTF-8 + for (size_t i = 0; i < n_indexes; i++) + if (!fixed[i] && *indexes[i] <= p - locale) + { + *indexes[i] = utf8.len; + fixed[i] = true; + } + + // End of string + if (!len) + break; + + // EINVAL (incomplete sequence) should never happen and + // EILSEQ neither because we've already checked for that with mbrlen(). + // E2BIG is what iconv_xstrdup solves. This must succeed. + size_t ch_len; + char *ch = iconv_xstrdup (ctx->term_to_utf8, (char *) p, len, &ch_len); + hard_assert (ch != NULL); + str_append_data (&utf8, ch, ch_len); + free (ch); + + p += len; + remaining -= len; + } + return str_steal (&utf8); +} + +static void +utf8_vector_to_locale (struct app_context *ctx, char **vector) +{ + for (; *vector; vector++) + { + char *converted = iconv_xstrdup + (ctx->term_from_utf8, *vector, -1, NULL); + if (!soft_assert (converted)) + converted = xstrdup (""); + + free (*vector); + *vector = converted; + } +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +/// Takes a line in locale-specific encoding and position of a word to complete, +/// returns a vector of matches in locale-specific encoding. +static char ** +make_completions (struct app_context *ctx, char *line, int start, int end) +{ + int *fixes[] = { &start, &end }; + char *line_utf8 = locale_to_utf8 (ctx, line, fixes, N_ELEMENTS (fixes)); + if (!line_utf8) + return NULL; + + hard_assert (start >= 0 && end >= 0 && start <= end); + + struct completion c; + completion_init (&c); + completion_parse (&c, line, strlen (line)); + completion_locate (&c, start); + char *word = xstrndup (line + start, end - start); + char **completions = complete_word (ctx, &c, word); + free (word); + completion_free (&c); + + if (completions) + utf8_vector_to_locale (ctx, completions); + + free (line_utf8); + return completions; +} + // --- GNU Readline user actions ----------------------------------------------- #ifdef HAVE_READLINE @@ -5127,13 +5390,13 @@ app_readline_bind_meta (char key, rl_command_func_t cb) static char ** app_readline_completion (const char *text, int start, int end) { + // We will reconstruct that ourselves (void) text; - (void) start; - (void) end; // Don't iterate over filenames and stuff rl_attempted_completion_over = true; - return NULL; + + return make_completions (g_ctx, rl_line_buffer, start, end); } static int @@ -5222,7 +5485,50 @@ on_editline_complete (EditLine *editline, int key) (void) key; (void) editline; - return CC_ERROR; + struct app_context *ctx = g_ctx; + unsigned char result = CC_REFRESH_BEEP; + + // First prepare what Readline would have normally done for us... + const LineInfo *info = el_line (editline); + int len = info->lastchar - info->buffer; + int point = info->cursor - info->buffer; + char *copy = xstrndup (info->buffer, len); + + // XXX: possibly incorrect wrt. shift state encodings + int el_start = point, el_end = point; + while (el_start && !strchr (WORD_BREAKING_CHARS, copy[el_start - 1])) + el_start--; + + char **completions = make_completions (ctx, copy, el_start, el_end); + if (!completions) + goto out; + + // The most basic autocompletion. I'm not sure if Readline's + // menu-complete can at all be implemented with Editline. + + // Remove the original word. Editline needs it in wide characters... + // XXX: possibly incorrect wrt. shift state encodings + copy[el_end] = '\0'; + el_wdeletestr (editline, mbstowcs (NULL, copy + el_start, 0)); + + // Insert the best match instead + el_insertstr (editline, completions[0]); + if (!completions[1]) + { + // If it is the only match, don't beep at the user + // but finish the word instead + el_insertstr (editline, " "); + result = CC_REFRESH; + } + + // Free the vector of matches + for (char **p = completions; *p; p++) + free (*p); + free (completions); + +out: + free (copy); + return result; } static unsigned char