degesch: halfplement word completion

This was a bit painful.
This commit is contained in:
Přemysl Eric Janouch 2015-05-07 05:19:13 +02:00
parent 4f5d171799
commit 4ecf8d90da
1 changed files with 310 additions and 4 deletions

314
degesch.c
View File

@ -53,6 +53,7 @@ enum
#include <locale.h>
#include <pwd.h>
#include <sys/utsname.h>
#include <wchar.h>
#include <termios.h>
#ifndef TIOCGWINSZ
@ -5022,6 +5023,268 @@ irc_connect (struct server *s, bool *should_retry, struct error **e)
return true;
}
// --- Word completion ---------------------------------------------------------
// The amount of crap that goes into this is truly insane.
// It's mostly because of Editline's total ignorance of this task.
struct completion_word
{
size_t start; ///< Offset to start of word
size_t end; ///< Offset to end of word
};
struct completion
{
char *line; ///< The line which is being completed
struct completion_word *words; ///< Word locations
size_t words_len; ///< Number of words
size_t words_alloc; ///< Number of words allocated
size_t location; ///< Which word is being completed
};
static void
completion_init (struct completion *self)
{
memset (self, 0, sizeof *self);
}
static void
completion_free (struct completion *self)
{
free (self->line);
free (self->words);
}
static void
completion_add_word (struct completion *self, size_t start, size_t end)
{
if (!self->words)
self->words = xcalloc ((self->words_alloc = 4), sizeof *self->words);
if (self->words_len == self->words_alloc)
self->words = xrealloc (self->words, (self->words_alloc <<= 1));
self->words[self->words_len] = (struct completion_word) { start, end };
}
static void
completion_parse (struct completion *self, const char *line, size_t len)
{
self->line = xstrndup (line, len);
// The first and the last word may be empty
const char *s = self->line;
while (true)
{
const char *start = s;
size_t word_len = strcspn (s, WORD_BREAKING_CHARS);
const char *end = start + word_len;
s = end + strspn (end, WORD_BREAKING_CHARS);
completion_add_word (self, start - self->line, end - self->line);
if (s == end)
break;
}
}
static void
completion_locate (struct completion *self, size_t offset)
{
size_t i = 0;
for (; i < self->words_len; i++)
if (self->words[i].start > offset)
break;
self->location = i - 1;
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
struct utf8_iter
{
const char *s; ///< String iterator
size_t len; ///< How many bytes remain
};
static void
utf8_iter_init (struct utf8_iter *self, const char *s)
{
self->len = strlen ((self->s = s));
}
// TODO: return the value of the codepoint, that will simplify things
static const char *
utf8_iter_next (struct utf8_iter *self, size_t *len)
{
if (!self->len)
return NULL;
const char *old = self->s;
if (!soft_assert ((self->s = utf8_next (old, self->len))))
{
// Invalid UTF-8
self->len = 0;
return NULL;
}
self->len -= (*len = self->s - old);
return old;
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// XXX: this isn't completely right because Unicode, but let's keep it simple.
// At worst it will stop before a combining mark, or fail to compare
// non-ASCII identifiers case-insensitively.
static size_t
utf8_common_prefix (const char **vector, size_t len)
{
size_t prefix = 0;
if (!vector || !vector[0])
return 0;
struct utf8_iter a[len];
for (size_t i = 0; i < len; i++)
utf8_iter_init (&a[i], vector[i]);
size_t ch_len;
const char *ch;
while ((ch = utf8_iter_next (&a[0], &ch_len)))
{
for (size_t i = 1; i < len; i++)
{
size_t other_len;
const char *other = utf8_iter_next (&a[i], &other_len);
// Not bothering with different length or lowercasing non-ASCII
if (!other || ch_len != other_len
|| (ch_len == 1 && tolower_ascii (*ch) != tolower_ascii (*other))
|| (ch_len != 1 && memcmp (ch, other, ch_len)))
return prefix;
}
prefix += ch_len;
}
return prefix;
}
static char **
complete_word (struct app_context *ctx, struct completion *data,
const char *word)
{
// TODO: return a list of matches with the longest common part
// (or a copy of "word" if none) as the first entry
// TODO: if there's only one match, don't bother computing the common part
char **result = xcalloc (2, sizeof *result);
result[0] = xstrdup_printf ("%shue", word);
return result;
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
/// A special wrapper for iconv_xstrdup() that also fixes indexes into the
/// original string to point to the right location in the output.
/// Thanks, Readline! Without you I would have never needed to deal with this.
static char *
locale_to_utf8 (struct app_context *ctx, const char *locale,
int *indexes[], size_t n_indexes)
{
struct str utf8; str_init (&utf8);
mbstate_t state; memset (&state, 0, sizeof state);
size_t remaining = strlen (locale) + 1;
const char *p = locale;
// Reset the shift state, FWIW
(void) iconv (ctx->term_to_utf8, NULL, NULL, NULL, NULL);
bool fixed[n_indexes];
memset (fixed, 0, sizeof fixed);
while (true)
{
size_t len = mbrlen (p, remaining, &state);
// Incomplete multibyte character or illegal sequence (probably)
if (len == (size_t) -2
|| len == (size_t) -1)
{
str_free (&utf8);
return NULL;
}
// Convert indexes into the multibyte string to UTF-8
for (size_t i = 0; i < n_indexes; i++)
if (!fixed[i] && *indexes[i] <= p - locale)
{
*indexes[i] = utf8.len;
fixed[i] = true;
}
// End of string
if (!len)
break;
// EINVAL (incomplete sequence) should never happen and
// EILSEQ neither because we've already checked for that with mbrlen().
// E2BIG is what iconv_xstrdup solves. This must succeed.
size_t ch_len;
char *ch = iconv_xstrdup (ctx->term_to_utf8, (char *) p, len, &ch_len);
hard_assert (ch != NULL);
str_append_data (&utf8, ch, ch_len);
free (ch);
p += len;
remaining -= len;
}
return str_steal (&utf8);
}
static void
utf8_vector_to_locale (struct app_context *ctx, char **vector)
{
for (; *vector; vector++)
{
char *converted = iconv_xstrdup
(ctx->term_from_utf8, *vector, -1, NULL);
if (!soft_assert (converted))
converted = xstrdup ("");
free (*vector);
*vector = converted;
}
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
/// Takes a line in locale-specific encoding and position of a word to complete,
/// returns a vector of matches in locale-specific encoding.
static char **
make_completions (struct app_context *ctx, char *line, int start, int end)
{
int *fixes[] = { &start, &end };
char *line_utf8 = locale_to_utf8 (ctx, line, fixes, N_ELEMENTS (fixes));
if (!line_utf8)
return NULL;
hard_assert (start >= 0 && end >= 0 && start <= end);
struct completion c;
completion_init (&c);
completion_parse (&c, line, strlen (line));
completion_locate (&c, start);
char *word = xstrndup (line + start, end - start);
char **completions = complete_word (ctx, &c, word);
free (word);
completion_free (&c);
if (completions)
utf8_vector_to_locale (ctx, completions);
free (line_utf8);
return completions;
}
// --- GNU Readline user actions -----------------------------------------------
#ifdef HAVE_READLINE
@ -5127,13 +5390,13 @@ app_readline_bind_meta (char key, rl_command_func_t cb)
static char **
app_readline_completion (const char *text, int start, int end)
{
// We will reconstruct that ourselves
(void) text;
(void) start;
(void) end;
// Don't iterate over filenames and stuff
rl_attempted_completion_over = true;
return NULL;
return make_completions (g_ctx, rl_line_buffer, start, end);
}
static int
@ -5222,7 +5485,50 @@ on_editline_complete (EditLine *editline, int key)
(void) key;
(void) editline;
return CC_ERROR;
struct app_context *ctx = g_ctx;
unsigned char result = CC_REFRESH_BEEP;
// First prepare what Readline would have normally done for us...
const LineInfo *info = el_line (editline);
int len = info->lastchar - info->buffer;
int point = info->cursor - info->buffer;
char *copy = xstrndup (info->buffer, len);
// XXX: possibly incorrect wrt. shift state encodings
int el_start = point, el_end = point;
while (el_start && !strchr (WORD_BREAKING_CHARS, copy[el_start - 1]))
el_start--;
char **completions = make_completions (ctx, copy, el_start, el_end);
if (!completions)
goto out;
// The most basic autocompletion. I'm not sure if Readline's
// menu-complete can at all be implemented with Editline.
// Remove the original word. Editline needs it in wide characters...
// XXX: possibly incorrect wrt. shift state encodings
copy[el_end] = '\0';
el_wdeletestr (editline, mbstowcs (NULL, copy + el_start, 0));
// Insert the best match instead
el_insertstr (editline, completions[0]);
if (!completions[1])
{
// If it is the only match, don't beep at the user
// but finish the word instead
el_insertstr (editline, " ");
result = CC_REFRESH;
}
// Free the vector of matches
for (char **p = completions; *p; p++)
free (*p);
free (completions);
out:
free (copy);
return result;
}
static unsigned char