degesch: halfplement word completion

This was a bit painful.
2015-05-07 05:19:13 +02:00
parent 4f5d171799
commit 4ecf8d90da
1 changed files with 310 additions and 4 deletions
--- a/degesch.c
+++ b/degesch.c
@@ -53,6 +53,7 @@ enum
 #include <locale.h>
 #include <pwd.h>
 #include <sys/utsname.h>
+#include <wchar.h>

 #include <termios.h>
 #ifndef TIOCGWINSZ
@@ -5022,6 +5023,268 @@ irc_connect (struct server *s, bool *should_retry, struct error **e)
 	return true;
 }

+// --- Word completion ---------------------------------------------------------
+
+// The amount of crap that goes into this is truly insane.
+// It's mostly because of Editline's total ignorance of this task.
+
+struct completion_word
+{
+	size_t start;                       ///< Offset to start of word
+	size_t end;                         ///< Offset to end of word
+};
+
+struct completion
+{
+	char *line;                         ///< The line which is being completed
+
+	struct completion_word *words;      ///< Word locations
+	size_t words_len;                   ///< Number of words
+	size_t words_alloc;                 ///< Number of words allocated
+
+	size_t location;                    ///< Which word is being completed
+};
+
+static void
+completion_init (struct completion *self)
+{
+	memset (self, 0, sizeof *self);
+}
+
+static void
+completion_free (struct completion *self)
+{
+	free (self->line);
+	free (self->words);
+}
+
+static void
+completion_add_word (struct completion *self, size_t start, size_t end)
+{
+	if (!self->words)
+		self->words = xcalloc ((self->words_alloc = 4), sizeof *self->words);
+	if (self->words_len == self->words_alloc)
+		self->words = xrealloc (self->words, (self->words_alloc <<= 1));
+	self->words[self->words_len] = (struct completion_word) { start, end };
+}
+
+static void
+completion_parse (struct completion *self, const char *line, size_t len)
+{
+	self->line = xstrndup (line, len);
+
+	// The first and the last word may be empty
+	const char *s = self->line;
+	while (true)
+	{
+		const char *start = s;
+		size_t word_len = strcspn (s, WORD_BREAKING_CHARS);
+		const char *end = start + word_len;
+		s = end + strspn (end, WORD_BREAKING_CHARS);
+
+		completion_add_word (self, start - self->line, end - self->line);
+		if (s == end)
+			break;
+	}
+}
+
+static void
+completion_locate (struct completion *self, size_t offset)
+{
+	size_t i = 0;
+	for (; i < self->words_len; i++)
+		if (self->words[i].start > offset)
+			break;
+	self->location = i - 1;
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+struct utf8_iter
+{
+	const char *s;                      ///< String iterator
+	size_t len;                         ///< How many bytes remain
+};
+
+static void
+utf8_iter_init (struct utf8_iter *self, const char *s)
+{
+	self->len = strlen ((self->s = s));
+}
+
+// TODO: return the value of the codepoint, that will simplify things
+static const char *
+utf8_iter_next (struct utf8_iter *self, size_t *len)
+{
+	if (!self->len)
+		return NULL;
+
+	const char *old = self->s;
+	if (!soft_assert ((self->s = utf8_next (old, self->len))))
+	{
+		// Invalid UTF-8
+		self->len = 0;
+		return NULL;
+	}
+
+	self->len -= (*len = self->s - old);
+	return old;
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+// XXX: this isn't completely right because Unicode, but let's keep it simple.
+//   At worst it will stop before a combining mark, or fail to compare
+//   non-ASCII identifiers case-insensitively.
+
+static size_t
+utf8_common_prefix (const char **vector, size_t len)
+{
+	size_t prefix = 0;
+	if (!vector || !vector[0])
+		return 0;
+
+	struct utf8_iter a[len];
+	for (size_t i = 0; i < len; i++)
+		utf8_iter_init (&a[i], vector[i]);
+
+	size_t ch_len;
+	const char *ch;
+	while ((ch = utf8_iter_next (&a[0], &ch_len)))
+	{
+		for (size_t i = 1; i < len; i++)
+		{
+			size_t other_len;
+			const char *other = utf8_iter_next (&a[i], &other_len);
+
+			// Not bothering with different length or lowercasing non-ASCII
+			if (!other || ch_len != other_len
+			 || (ch_len == 1 && tolower_ascii (*ch) != tolower_ascii (*other))
+			 || (ch_len != 1 && memcmp (ch, other, ch_len)))
+				return prefix;
+		}
+		prefix += ch_len;
+	}
+	return prefix;
+}
+
+static char **
+complete_word (struct app_context *ctx, struct completion *data,
+	const char *word)
+{
+	// TODO: return a list of matches with the longest common part
+	//   (or a copy of "word" if none) as the first entry
+	// TODO: if there's only one match, don't bother computing the common part
+	char **result = xcalloc (2, sizeof *result);
+	result[0] = xstrdup_printf ("%shue", word);
+	return result;
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+/// A special wrapper for iconv_xstrdup() that also fixes indexes into the
+/// original string to point to the right location in the output.
+/// Thanks, Readline!  Without you I would have never needed to deal with this.
+static char *
+locale_to_utf8 (struct app_context *ctx, const char *locale,
+	int *indexes[], size_t n_indexes)
+{
+	struct str utf8;  str_init (&utf8);
+	mbstate_t state;  memset (&state, 0, sizeof state);
+
+	size_t remaining = strlen (locale) + 1;
+	const char *p = locale;
+
+	// Reset the shift state, FWIW
+	(void) iconv (ctx->term_to_utf8, NULL, NULL, NULL, NULL);
+
+	bool fixed[n_indexes];
+	memset (fixed, 0, sizeof fixed);
+
+	while (true)
+	{
+		size_t len = mbrlen (p, remaining, &state);
+
+		// Incomplete multibyte character or illegal sequence (probably)
+		if (len == (size_t) -2
+		 || len == (size_t) -1)
+		{
+			str_free (&utf8);
+			return NULL;
+		}
+
+		// Convert indexes into the multibyte string to UTF-8
+		for (size_t i = 0; i < n_indexes; i++)
+			if (!fixed[i] && *indexes[i] <= p - locale)
+			{
+				*indexes[i] = utf8.len;
+				fixed[i] = true;
+			}
+
+		// End of string
+		if (!len)
+			break;
+
+		// EINVAL (incomplete sequence) should never happen and
+		// EILSEQ neither because we've already checked for that with mbrlen().
+		// E2BIG is what iconv_xstrdup solves.  This must succeed.
+		size_t ch_len;
+		char *ch = iconv_xstrdup (ctx->term_to_utf8, (char *) p, len, &ch_len);
+		hard_assert (ch != NULL);
+		str_append_data (&utf8, ch, ch_len);
+		free (ch);
+
+		p += len;
+		remaining -= len;
+	}
+	return str_steal (&utf8);
+}
+
+static void
+utf8_vector_to_locale (struct app_context *ctx, char **vector)
+{
+	for (; *vector; vector++)
+	{
+		char *converted = iconv_xstrdup
+			(ctx->term_from_utf8, *vector, -1, NULL);
+		if (!soft_assert (converted))
+			converted = xstrdup ("");
+
+		free (*vector);
+		*vector = converted;
+	}
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+/// Takes a line in locale-specific encoding and position of a word to complete,
+/// returns a vector of matches in locale-specific encoding.
+static char **
+make_completions (struct app_context *ctx, char *line, int start, int end)
+{
+	int *fixes[] = { &start, &end };
+	char *line_utf8 = locale_to_utf8 (ctx, line, fixes, N_ELEMENTS (fixes));
+	if (!line_utf8)
+		return NULL;
+
+	hard_assert (start >= 0 && end >= 0 && start <= end);
+
+	struct completion c;
+	completion_init (&c);
+	completion_parse (&c, line, strlen (line));
+	completion_locate (&c, start);
+	char *word = xstrndup (line + start, end - start);
+	char **completions = complete_word (ctx, &c, word);
+	free (word);
+	completion_free (&c);
+
+	if (completions)
+		utf8_vector_to_locale (ctx, completions);
+
+	free (line_utf8);
+	return completions;
+}
+
 // --- GNU Readline user actions -----------------------------------------------

 #ifdef HAVE_READLINE
@@ -5127,13 +5390,13 @@ app_readline_bind_meta (char key, rl_command_func_t cb)
 static char **
 app_readline_completion (const char *text, int start, int end)
 {
+	// We will reconstruct that ourselves
 	(void) text;
-	(void) start;
-	(void) end;

 	// Don't iterate over filenames and stuff
 	rl_attempted_completion_over = true;
-	return NULL;
+
+	return make_completions (g_ctx, rl_line_buffer, start, end);
 }

 static int
@@ -5222,7 +5485,50 @@ on_editline_complete (EditLine *editline, int key)
 	(void) key;
 	(void) editline;

-	return CC_ERROR;
+	struct app_context *ctx = g_ctx;
+	unsigned char result = CC_REFRESH_BEEP;
+
+	// First prepare what Readline would have normally done for us...
+	const LineInfo *info = el_line (editline);
+	int len = info->lastchar - info->buffer;
+	int point = info->cursor - info->buffer;
+	char *copy = xstrndup (info->buffer, len);
+
+	// XXX: possibly incorrect wrt. shift state encodings
+	int el_start = point, el_end = point;
+	while (el_start && !strchr (WORD_BREAKING_CHARS, copy[el_start - 1]))
+		el_start--;
+
+	char **completions = make_completions (ctx, copy, el_start, el_end);
+	if (!completions)
+		goto out;
+
+	// The most basic autocompletion.  I'm not sure if Readline's
+	// menu-complete can at all be implemented with Editline.
+
+	// Remove the original word.  Editline needs it in wide characters...
+	// XXX: possibly incorrect wrt. shift state encodings
+	copy[el_end] = '\0';
+	el_wdeletestr (editline, mbstowcs (NULL, copy + el_start, 0));
+
+	// Insert the best match instead
+	el_insertstr (editline, completions[0]);
+	if (!completions[1])
+	{
+		// If it is the only match, don't beep at the user
+		// but finish the word instead
+		el_insertstr (editline, " ");
+		result = CC_REFRESH;
+	}
+
+	// Free the vector of matches
+	for (char **p = completions; *p; p++)
+		free (*p);
+	free (completions);
+
+out:
+	free (copy);
+	return result;
 }

 static unsigned char