Move to iconv

That is the major change in this commit. Also Ctrl-modified keys now should work in URxvt, which was surprisingly trivial to achieve. Coming up next: - making sure the tests still work - introducing CMake
2014-09-28 03:51:45 +02:00 · 2014-09-28 03:51:45 +02:00 · 36bc6cd095
commit 36bc6cd095
parent cac1f8373b
10 changed files with 312 additions and 341 deletions
--- a/demo-async.c
+++ b/demo-async.c
@ -3,6 +3,8 @@

 #include <poll.h>
 #include <stdio.h>
+#include <unistd.h>
+#include <locale.h>

 #include "termkey.h"

@ -21,8 +23,9 @@ main (int argc, char *argv[])
 	(void) argv;

 	TERMKEY_CHECK_VERSION;
+	setlocale (LC_CTYPE, "");

-	termkey_t *tk = termkey_new (0, 0);
+	termkey_t *tk = termkey_new (STDIN_FILENO, NULL, 0);

 	if (!tk)
 	{
@ -31,7 +34,7 @@ main (int argc, char *argv[])
 	}

 	struct pollfd fd;
-	fd.fd = 0; /* the file descriptor we passed to termkey_new() */
+	fd.fd = STDIN_FILENO; /* the file descriptor we passed to termkey_new() */
 	fd.events = POLLIN;

 	termkey_result_t ret;
@ -54,8 +57,8 @@ main (int argc, char *argv[])
 		{
 			on_key (tk, &key);

-			if (key.type == TERMKEY_TYPE_UNICODE
-			 && key.modifiers & TERMKEY_KEYMOD_CTRL
+			if (key.type == TERMKEY_TYPE_KEY
+			 && (key.modifiers & TERMKEY_KEYMOD_CTRL)
 			 && (key.code.codepoint == 'C' || key.code.codepoint == 'c'))
 				running = 0;
 		}
--- a/demo-glib.c
+++ b/demo-glib.c
@ -1,5 +1,7 @@
 #include <stdio.h>
 #include <glib.h>
+#include <unistd.h>
+#include <locale.h>

 #include "termkey.h"

@ -49,9 +51,13 @@ stdin_io (GIOChannel *source, GIOCondition condition, gpointer data)
 int
 main (int argc, char *argv[])
 {
-	TERMKEY_CHECK_VERSION;
+	(void) argc;
+	(void) argv;

-	tk = termkey_new (0, 0);
+	TERMKEY_CHECK_VERSION;
+	setlocale (LC_CTYPE, "");
+
+	tk = termkey_new (STDIN_FILENO, NULL, 0);
 	if (!tk)
 	{
 		fprintf (stderr, "Cannot allocate termkey instance\n");
@ -59,7 +65,8 @@ main (int argc, char *argv[])
 	}

 	GMainLoop *loop = g_main_loop_new (NULL, FALSE);
-	g_io_add_watch (g_io_channel_unix_new (0), G_IO_IN, stdin_io, NULL);
+	g_io_add_watch (g_io_channel_unix_new (STDIN_FILENO),
+		G_IO_IN, stdin_io, NULL);
 	g_main_loop_run (loop);
 	termkey_destroy (tk);
 }
--- a/demo.c
+++ b/demo.c
@ -1,9 +1,10 @@
-// we want optarg
+// We want optarg
 #define _XOPEN_SOURCE 600

 #include <stdio.h>
 #include <unistd.h>
 #include <errno.h>
+#include <locale.h>

 #include "termkey.h"

@ -11,6 +12,7 @@ int
 main(int argc, char *argv[])
 {
 	TERMKEY_CHECK_VERSION;
+	setlocale (LC_CTYPE, "");

 	int mouse = 0;
 	int mouse_proto = 0;
@ -41,18 +43,18 @@ main(int argc, char *argv[])
 		}
 	}

-	tk = termkey_new (0, TERMKEY_FLAG_SPACESYMBOL | TERMKEY_FLAG_CTRLC);
-
+	tk = termkey_new (STDIN_FILENO, NULL,
+		TERMKEY_FLAG_SPACESYMBOL | TERMKEY_FLAG_CTRLC);
 	if (!tk)
 	{
 		fprintf (stderr, "Cannot allocate termkey instance\n");
 		exit (1);
 	}

-	if (termkey_get_flags (tk) & TERMKEY_FLAG_UTF8)
-		printf ("Termkey in UTF-8 mode\n");
-	else if (termkey_get_flags (tk) & TERMKEY_FLAG_RAW)
+	if (termkey_get_flags (tk) & TERMKEY_FLAG_RAW)
 		printf ("Termkey in RAW mode\n");
+	else
+		printf ("Termkey in multibyte mode\n");

 	termkey_result_t ret;
 	termkey_key_t key;
@ -103,12 +105,12 @@ main(int argc, char *argv[])
 			else
 				printf ("Key %s\n", buffer);

-			if (key.type == TERMKEY_TYPE_UNICODE
+			if (key.type == TERMKEY_TYPE_KEY
 			 && key.modifiers & TERMKEY_KEYMOD_CTRL
 			 && (key.code.codepoint == 'C' || key.code.codepoint == 'c'))
 				break;

-			if (key.type == TERMKEY_TYPE_UNICODE
+			if (key.type == TERMKEY_TYPE_KEY
 			 && key.modifiers == 0
 			 && key.code.codepoint == '?')
 			{
@ -121,7 +123,7 @@ main(int argc, char *argv[])
 		{
 			if (errno != EINTR)
 			{
-				perror("termkey_waitkey");
+				perror ("termkey_waitkey");
 				break;
 			}
 			printf ("Interrupted by signal\n");
--- a/driver-csi.c
+++ b/driver-csi.c
@ -146,6 +146,28 @@ register_csifunc (termkey_type_t type, termkey_sym_t sym, int number)
 	csi_handlers['~' - 0x40] = &handle_csifunc;
 }

+/*
+ * URxvt seems to emit this instead of ~ when holding Ctrl
+ */
+
+static termkey_result_t
+handle_csi_caret (termkey_t *tk,
+	termkey_key_t *key, int cmd, long *arg, int args)
+{
+	switch (cmd)
+	{
+	case '^':
+	{
+		termkey_result_t res = handle_csifunc (tk, key, cmd, arg, args);
+		if (res == TERMKEY_RES_KEY)
+			key->modifiers |= TERMKEY_KEYMOD_CTRL;
+		return res;
+	}
+	default:
+		return TERMKEY_RES_NONE;
+	}
+}
+
 /*
 * Handler for CSI u extended Unicode keys
 */
@ -531,6 +553,18 @@ register_keys (void)

 	csi_handlers['y' - 0x40] = &handle_csi_y;

+	// URxvt
+	register_csi_ss3_full (TERMKEY_TYPE_KEYSYM, TERMKEY_SYM_UP,
+		TERMKEY_KEYMOD_CTRL, TERMKEY_KEYMOD_CTRL, 'a');
+	register_csi_ss3_full (TERMKEY_TYPE_KEYSYM, TERMKEY_SYM_DOWN,
+		TERMKEY_KEYMOD_CTRL, TERMKEY_KEYMOD_CTRL, 'b');
+	register_csi_ss3_full (TERMKEY_TYPE_KEYSYM, TERMKEY_SYM_RIGHT,
+		TERMKEY_KEYMOD_CTRL, TERMKEY_KEYMOD_CTRL, 'c');
+	register_csi_ss3_full (TERMKEY_TYPE_KEYSYM, TERMKEY_SYM_LEFT,
+		TERMKEY_KEYMOD_CTRL, TERMKEY_KEYMOD_CTRL, 'd');
+
+	csi_handlers['^' - 0x40] = &handle_csi_caret;
+
 	keyinfo_initialised = 1;
 	return 1;
 }
@ -610,16 +644,20 @@ peekkey_csi (termkey_t *tk, termkey_csi_t *csi,
 		switch (args)
 		{
 		case 1:
-			fprintf (stderr, "CSI: Unknown arg1=%ld cmd=%c\n", arg[0], (char)cmd);
+			fprintf (stderr, "CSI: Unknown arg1=%ld cmd=%c\n",
+				arg[0], (char) cmd);
 			break;
 		case 2:
-			fprintf (stderr, "CSI: Unknown arg1=%ld arg2=%ld cmd=%c\n", arg[0], arg[1], (char) cmd);
+			fprintf (stderr, "CSI: Unknown arg1=%ld arg2=%ld cmd=%c\n",
+				arg[0], arg[1], (char) cmd);
 			break;
 		case 3:
-			fprintf (stderr, "CSI: Unknown arg1=%ld arg2=%ld arg3=%ld cmd=%c\n", arg[0], arg[1], arg[2], (char) cmd);
+			fprintf (stderr, "CSI: Unknown arg1=%ld arg2=%ld arg3=%ld cmd=%c\n",
+				arg[0], arg[1], arg[2], (char) cmd);
 			break;
 		default:
-			fprintf (stderr, "CSI: Unknown arg1=%ld arg2=%ld arg3=%ld ... args=%d cmd=%c\n", arg[0], arg[1], arg[2], args, (char) cmd);
+			fprintf (stderr, "CSI: Unknown arg1=%ld arg2=%ld arg3=%ld ... "
+				"args=%zu cmd=%c\n", arg[0], arg[1], arg[2], args, (char) cmd);
 			break;
 		}
 #endif
@ -665,12 +703,12 @@ peekkey_ss3 (termkey_t *tk, termkey_csi_t *csi, size_t introlen,
 	{
 		if (tk->flags & TERMKEY_FLAG_CONVERTKP && ss3_kpalts[cmd - 0x40])
 		{
-			key->type = TERMKEY_TYPE_UNICODE;
+			key->type = TERMKEY_TYPE_KEY;
 			key->code.codepoint = ss3_kpalts[cmd - 0x40];
 			key->modifiers = 0;

-			key->utf8[0] = key->code.codepoint;
-			key->utf8[1] = 0;
+			key->multibyte[0] = key->code.codepoint;
+			key->multibyte[1] = 0;
 		}
 		else
 		{
@ -712,16 +750,13 @@ peekkey (termkey_t *tk, void *info,
 		return peekkey_ss3 (tk, csi, 1, key, force, nbytep);
 	if (b0 == 0x9b)
 		return peekkey_csi (tk, csi, 1, key, force, nbytep);
-	else
-		return TERMKEY_RES_NONE;
+	return TERMKEY_RES_NONE;
 }

 termkey_driver_t termkey_driver_csi =
 {
 	.name        = "CSI",
-
 	.new_driver  = new_driver,
 	.free_driver = free_driver,
-
 	.peekkey     = peekkey,
 };
--- a/driver-ti.c
+++ b/driver-ti.c
@ -106,8 +106,7 @@ lookup_next (trie_node_t *n, unsigned char b)
 	{
 	case TYPE_KEY:
 	case TYPE_MOUSE:
-		// FIXME
-		fprintf (stderr, "ABORT: lookup_next within a TYPE_KEY node\n");
+		fprintf (stderr, "fatal: lookup_next within a TYPE_KEY node\n");
 		abort ();
 	case TYPE_ARRAY:
 	{
@ -117,8 +116,7 @@ lookup_next (trie_node_t *n, unsigned char b)
 		return nar->arr[b - nar->min];
 	}
 	}
-
-	return NULL; // Never reached but keeps compiler happy
+	return NULL;  // Never reached but keeps compiler happy
 }

 static void
@ -186,8 +184,8 @@ load_terminfo (termkey_ti_t *ti, const char *term)
 #else
 	int err;

-	/* Have to cast away the const. But it's OK - we know terminfo won't really
-	 * modify term */
+	/* Have to cast away the const. But it's OK - we know terminfo won't
+	 * really modify term */
 	if (setupterm ((char *) term, 1, &err) != OK)
 		return 0;
 #endif
@ -295,11 +293,10 @@ new_driver (termkey_t *tk, const char *term)
 	return ti;

 abort_free_trie:
-	free_trie(ti->root);
+	free_trie (ti->root);

 abort_free_ti:
-	free(ti);
-
+	free (ti);
 	return NULL;
 }

@ -487,7 +484,6 @@ funcname2keysym (const char *funcname,
 	if (funcname[0] == 'f' && isdigit (funcname[1]))
 	{
 		*typep = TERMKEY_TYPE_FUNCTION;
-		// FIXME
 		*symp  = atoi (funcname + 1);
 		return 1;
 	}
@ -502,7 +498,8 @@ funcname2keysym (const char *funcname,
 	}

 #ifdef DEBUG
-	fprintf (stderr, "TODO: Need to convert funcname %s to a type/sym\n", funcname);
+	fprintf (stderr, "TODO: Need to convert funcname"
+		" %s to a type/sym\n", funcname);
 #endif

 	return 0;
@ -546,8 +543,8 @@ insert_seq (termkey_ti_t *ti, const char *seq, trie_node_t *node)
 			trie_node_array_t *nar = (trie_node_array_t *) p;
 			if (b < nar->min || b > nar->max)
 			{
-				// FIXME
-				fprintf (stderr, "ASSERT FAIL: Trie insert at 0x%02x is outside of extent bounds (0x%02x..0x%02x)\n",
+				fprintf (stderr, "fatal: trie insert at 0x%02x is outside of"
+					" extent bounds (0x%02x..0x%02x)\n",
 					b, nar->min, nar->max);
 				abort ();
 			}
@ -557,8 +554,7 @@ insert_seq (termkey_ti_t *ti, const char *seq, trie_node_t *node)
 		}
 		case TYPE_KEY:
 		case TYPE_MOUSE:
-			// FIXME
-			fprintf (stderr, "ASSERT FAIL: Tried to insert child node in TYPE_KEY\n");
+			fprintf (stderr, "fatal: tried to insert child node in TYPE_KEY\n");
 			abort ();
 		}

@ -570,12 +566,9 @@ insert_seq (termkey_ti_t *ti, const char *seq, trie_node_t *node)
 termkey_driver_t termkey_driver_ti =
 {
 	.name         = "terminfo",
-
 	.new_driver   = new_driver,
 	.free_driver  = free_driver,
-
 	.start_driver = start_driver,
 	.stop_driver  = stop_driver,
-
 	.peekkey      = peekkey,
 };
--- a/t/02getkey.c
+++ b/t/02getkey.c
@ -20,10 +20,10 @@ int main(int argc, char *argv[])

  is_int(termkey_getkey(tk, &key), TERMKEY_RES_KEY, "getkey yields RES_KEY after h");

-  is_int(key.type,           TERMKEY_TYPE_UNICODE, "key.type after h");
-  is_int(key.code.codepoint, 'h',                  "key.code.codepoint after h");
-  is_int(key.modifiers,      0,                    "key.modifiers after h");
-  is_str(key.utf8,           "h",                  "key.utf8 after h");
+  is_int(key.type,           TERMKEY_TYPE_KEY, "key.type after h");
+  is_int(key.code.codepoint, 'h',              "key.code.codepoint after h");
+  is_int(key.modifiers,      0,                "key.modifiers after h");
+  is_str(key.multibyte,      "h",              "key.multibyte after h");

  is_int(termkey_get_buffer_remaining(tk), 256, "buffer free 256 after getkey");

--- a/t/03utf8.c
+++ b/t/03utf8.c
@ -8,13 +8,13 @@ int main(int argc, char *argv[])

  plan_tests(57);

-  tk = termkey_new_abstract("vt100", TERMKEY_FLAG_UTF8);
+  tk = termkey_new_abstract("vt100", "UTF-8", 0);

  termkey_push_bytes(tk, "a", 1);

  is_int(termkey_getkey(tk, &key), TERMKEY_RES_KEY, "getkey yields RES_KEY low ASCII");
-  is_int(key.type,           TERMKEY_TYPE_UNICODE, "key.type low ASCII");
-  is_int(key.code.codepoint, 'a',                  "key.code.codepoint low ASCII");
+  is_int(key.type,           TERMKEY_TYPE_KEY, "key.type low ASCII");
+  is_int(key.code.codepoint, 'a',              "key.code.codepoint low ASCII");

  /* 2-byte UTF-8 range is U+0080 to U+07FF (0xDF 0xBF) */
  /* However, we'd best avoid the C1 range, so we'll start at U+00A0 (0xC2 0xA0) */
@ -22,42 +22,42 @@ int main(int argc, char *argv[])
  termkey_push_bytes(tk, "\xC2\xA0", 2);

  is_int(termkey_getkey(tk, &key), TERMKEY_RES_KEY, "getkey yields RES_KEY UTF-8 2 low");
-  is_int(key.type,           TERMKEY_TYPE_UNICODE, "key.type UTF-8 2 low");
-  is_int(key.code.codepoint, 0x00A0,               "key.code.codepoint UTF-8 2 low");
+  is_int(key.type,           TERMKEY_TYPE_KEY, "key.type UTF-8 2 low");
+  is_int(key.code.codepoint, 0x00A0,           "key.code.codepoint UTF-8 2 low");

  termkey_push_bytes(tk, "\xDF\xBF", 2);

  is_int(termkey_getkey(tk, &key), TERMKEY_RES_KEY, "getkey yields RES_KEY UTF-8 2 high");
-  is_int(key.type,           TERMKEY_TYPE_UNICODE, "key.type UTF-8 2 high");
-  is_int(key.code.codepoint, 0x07FF,               "key.code.codepoint UTF-8 2 high");
+  is_int(key.type,           TERMKEY_TYPE_KEY, "key.type UTF-8 2 high");
+  is_int(key.code.codepoint, 0x07FF,           "key.code.codepoint UTF-8 2 high");

  /* 3-byte UTF-8 range is U+0800 (0xE0 0xA0 0x80) to U+FFFD (0xEF 0xBF 0xBD) */

  termkey_push_bytes(tk, "\xE0\xA0\x80", 3);

  is_int(termkey_getkey(tk, &key), TERMKEY_RES_KEY, "getkey yields RES_KEY UTF-8 3 low");
-  is_int(key.type,           TERMKEY_TYPE_UNICODE, "key.type UTF-8 3 low");
-  is_int(key.code.codepoint, 0x0800,               "key.code.codepoint UTF-8 3 low");
+  is_int(key.type,           TERMKEY_TYPE_KEY, "key.type UTF-8 3 low");
+  is_int(key.code.codepoint, 0x0800,           "key.code.codepoint UTF-8 3 low");

  termkey_push_bytes(tk, "\xEF\xBF\xBD", 3);

  is_int(termkey_getkey(tk, &key), TERMKEY_RES_KEY, "getkey yields RES_KEY UTF-8 3 high");
-  is_int(key.type,           TERMKEY_TYPE_UNICODE, "key.type UTF-8 3 high");
-  is_int(key.code.codepoint, 0xFFFD,               "key.code.codepoint UTF-8 3 high");
+  is_int(key.type,           TERMKEY_TYPE_KEY, "key.type UTF-8 3 high");
+  is_int(key.code.codepoint, 0xFFFD,           "key.code.codepoint UTF-8 3 high");

  /* 4-byte UTF-8 range is U+10000 (0xF0 0x90 0x80 0x80) to U+10FFFF (0xF4 0x8F 0xBF 0xBF) */

  termkey_push_bytes(tk, "\xF0\x90\x80\x80", 4);

  is_int(termkey_getkey(tk, &key), TERMKEY_RES_KEY, "getkey yields RES_KEY UTF-8 4 low");
-  is_int(key.type,           TERMKEY_TYPE_UNICODE, "key.type UTF-8 4 low");
-  is_int(key.code.codepoint, 0x10000,              "key.code.codepoint UTF-8 4 low");
+  is_int(key.type,           TERMKEY_TYPE_KEY, "key.type UTF-8 4 low");
+  is_int(key.code.codepoint, 0x10000,          "key.code.codepoint UTF-8 4 low");

  termkey_push_bytes(tk, "\xF4\x8F\xBF\xBF", 4);

  is_int(termkey_getkey(tk, &key), TERMKEY_RES_KEY, "getkey yields RES_KEY UTF-8 4 high");
-  is_int(key.type,           TERMKEY_TYPE_UNICODE, "key.type UTF-8 4 high");
-  is_int(key.code.codepoint, 0x10FFFF,             "key.code.codepoint UTF-8 4 high");
+  is_int(key.type,           TERMKEY_TYPE_KEY, "key.type UTF-8 4 high");
+  is_int(key.code.codepoint, 0x10FFFF,         "key.code.codepoint UTF-8 4 high");

  /* Invalid continuations */

--- a/termkey-internal.h
+++ b/termkey-internal.h
@ -5,6 +5,8 @@

 #include <stdint.h>
 #include <termios.h>
+#include <stdbool.h>
+#include <iconv.h>

 typedef struct termkey_driver termkey_driver_t;
 struct termkey_driver
@ -40,29 +42,30 @@ struct termkey
 	int fd;
 	int flags;
 	int canonflags;
+
 	unsigned char *buffer;
 	size_t buffstart; // First offset in buffer
-	size_t buffcount; // NUMBER of entires valid in buffer
+	size_t buffcount; // Number of entires valid in buffer
 	size_t buffsize; // Total malloc'ed size

-	// Position beyond buffstart at which peekkey() should next start
-	// normally 0, but see also termkey_interpret_csi().
+	// Position beyond buffstart at which peekkey() should next start.
+	// Normally 0, but see also termkey_interpret_csi().
 	size_t hightide;

 	struct termios restore_termios;
-	char restore_termios_valid;
+	bool restore_termios_valid;

-	int waittime; // msec
+	int waittime; // In milliseconds

-	char is_closed;
-	char is_started;
+	bool is_closed; // We've received EOF
+	bool is_started;

 	int nkeynames;
 	const char **keynames;

-	// There are 32 C0 codes
-	keyinfo_t c0[32];
-
+	keyinfo_t c0[32]; // There are 32 C0 codes
+	iconv_t to_utf32_conv;
+	iconv_t from_utf32_conv;
 	termkey_driver_node_t *drivers;

 	// Now some "protected" methods for the driver to call but which we don't
@ -70,7 +73,7 @@ struct termkey
 	struct
 	{
 		void (*emit_codepoint) (termkey_t *tk,
-			long codepoint, termkey_key_t *key);
+			uint32_t codepoint, termkey_key_t *key);
 		termkey_result_t (*peekkey_simple) (termkey_t *tk,
 			termkey_key_t *key, int force, size_t *nbytes);
 		termkey_result_t (*peekkey_mouse) (termkey_t *tk,
--- a/termkey.c
+++ b/termkey.c
@ -7,6 +7,7 @@
 #include <unistd.h>
 #include <string.h>
 #include <strings.h>
+#include <langinfo.h>

 #include <stdio.h>

@ -14,20 +15,16 @@ void
 termkey_check_version (int major, int minor)
 {
 	if (major != TERMKEY_VERSION_MAJOR)
-	{
-		fprintf (stderr, "libtermkey major version mismatch; %d (wants) != %d (library)\n",
+		fprintf (stderr, "libtermkey major version mismatch;"
+			" %d (wants) != %d (library)\n",
 			major, TERMKEY_VERSION_MAJOR);
-		exit (1);
-	}
-
-	if (minor > TERMKEY_VERSION_MINOR)
-	{
-		fprintf (stderr, "libtermkey minor version mismatch; %d (wants) > %d (library)\n",
+	else if (minor > TERMKEY_VERSION_MINOR)
+		fprintf (stderr, "libtermkey minor version mismatch;"
+			" %d (wants) > %d (library)\n",
 			minor, TERMKEY_VERSION_MINOR);
-		exit (1);
-	}
-
-	// Happy
+	else
+		return;
+	exit (1);
 }

 static termkey_driver_t *drivers[] =
@ -38,7 +35,7 @@ static termkey_driver_t *drivers[] =
 };

 // Forwards for the "protected" methods
-static void emit_codepoint (termkey_t *tk, long codepoint, termkey_key_t *key);
+static void emit_codepoint (termkey_t *tk, uint32_t codepoint, termkey_key_t *key);
 static termkey_result_t peekkey_simple (termkey_t *tk,
 	termkey_key_t *key, int force, size_t *nbytes);
 static termkey_result_t peekkey_mouse (termkey_t *tk,
@ -127,7 +124,7 @@ keynames[] =
 static void
 print_buffer (termkey_t *tk)
 {
-	int i;
+	size_t i;
 	for (i = 0; i < tk->buffcount && i < 20; i++)
 		fprintf (stderr, "%02x ", CHARAT (i));
 	if (tk->buffcount > 20)
@ -139,9 +136,9 @@ print_key (termkey_t *tk, termkey_key_t *key)
 {
 	switch (key->type)
 	{
-	case TERMKEY_TYPE_UNICODE:
-		fprintf (stderr, "Unicode codepoint=U+%04lx utf8='%s'",
-			key->code.codepoint, key->utf8);
+	case TERMKEY_TYPE_KEY:
+		fprintf (stderr, "Unicode codepoint=U+%04lx multibyte='%s'",
+			(long) key->code.codepoint, key->multibyte);
 		break;
 	case TERMKEY_TYPE_FUNCTION:
 		fprintf (stderr, "Function F%d", key->code.number);
@ -300,12 +297,12 @@ termkey_alloc (void)
 	tk->buffsize  = 256; /* bytes */
 	tk->hightide  = 0;

-	tk->restore_termios_valid = 0;
+	tk->restore_termios_valid = false;

 	tk->waittime = 50; /* msec */

-	tk->is_closed  = 0;
-	tk->is_started = 0;
+	tk->is_closed  = false;
+	tk->is_started = false;

 	tk->nkeynames = 64;
 	tk->keynames  = NULL;
@ -322,11 +319,23 @@ termkey_alloc (void)
 }

 static int
-termkey_init (termkey_t *tk, const char *term)
+termkey_init (termkey_t *tk, const char *term, const char *encoding)
 {
+	if (!encoding)
+		encoding = nl_langinfo (CODESET);
+
+	static const uint16_t endianity = 0x0102;
+	const char *utf32 = (*(uint8_t *) &endianity == 0x01)
+		? "UTF-32BE" : "UTF-32LE";
+
+	if ((tk->to_utf32_conv = iconv_open (utf32, encoding)) == (iconv_t) -1)
+		return 0;
+	if ((tk->from_utf32_conv = iconv_open (encoding, utf32)) == (iconv_t) -1)
+		goto abort_free_to_utf32;
+
 	tk->buffer = malloc (tk->buffsize);
 	if (!tk->buffer)
-		return 0;
+		goto abort_free_from_utf32;

 	tk->keynames = malloc (sizeof tk->keynames[0] * tk->nkeynames);
 	if (!tk->keynames)
@ -345,7 +354,7 @@ termkey_init (termkey_t *tk, const char *term)
 	register_c0 (tk, TERMKEY_SYM_ENTER,     0x0d, NULL);
 	register_c0 (tk, TERMKEY_SYM_ESCAPE,    0x1b, NULL);

-	termkey_driver_node_t *tail = NULL;
+	termkey_driver_node_t **tail = &tk->drivers;
 	for (i = 0; drivers[i]; i++)
 	{
 		void *info = (*drivers[i]->new_driver) (tk, term);
@ -364,12 +373,8 @@ termkey_init (termkey_t *tk, const char *term)
 		thisdrv->info = info;
 		thisdrv->next = NULL;

-		if (!tail)
-			tk->drivers = thisdrv;
-		else
-			tail->next = thisdrv;
-
-		tail = thisdrv;
+		*tail = thisdrv;
+		tail = &thisdrv->next;

 #ifdef DEBUG
 		fprintf (stderr, "Loaded %s driver\n", drivers[i]->name);
@ -394,55 +399,36 @@ abort_free_drivers:

 abort_free_keynames:
 	free (tk->keynames);
-
 abort_free_buffer:
 	free (tk->buffer);
+abort_free_from_utf32:
+	iconv_close (tk->from_utf32_conv);
+abort_free_to_utf32:
+	iconv_close (tk->to_utf32_conv);
 	return 0;
 }

 termkey_t *
-termkey_new (int fd, int flags)
+termkey_new (int fd, const char *encoding, int flags)
 {
 	termkey_t *tk = termkey_alloc ();
 	if (!tk)
 		return NULL;

 	tk->fd = fd;
-
-	if (!(flags & (TERMKEY_FLAG_RAW | TERMKEY_FLAG_UTF8)))
-	{
-		char *e;
-
-		/* Most OSes will set .UTF-8. Some will set .utf8. Try to be fairly
-		 * generous in parsing these
-		 */
-		if (((e = getenv("LANG")) || (e = getenv("LC_CTYPE"))
-			|| (e = getenv("LC_ALL"))) && (e = strchr(e, '.')) && e++ &&
-			(!strcasecmp(e, "UTF-8") || !strcasecmp(e, "UTF8")))
-			flags |= TERMKEY_FLAG_UTF8;
-		else
-			flags |= TERMKEY_FLAG_RAW;
-	}
-
 	termkey_set_flags (tk, flags);

 	const char *term = getenv ("TERM");
+	if (termkey_init (tk, term, encoding)
+	 && termkey_start (tk))
+		return tk;

-	if (!termkey_init (tk, term))
-		goto abort;
-
-	if (!termkey_start (tk))
-		goto abort;
-
-	return tk;
-
-abort:
 	free (tk);
 	return NULL;
 }

 termkey_t *
-termkey_new_abstract (const char *term, int flags)
+termkey_new_abstract (const char *term, const char *encoding, int flags)
 {
 	termkey_t *tk = termkey_alloc ();
 	if (!tk)
@ -450,7 +436,8 @@ termkey_new_abstract (const char *term, int flags)

 	tk->fd = -1;
 	termkey_set_flags (tk, flags);
-	if (!termkey_init (tk, term))
+
+	if (!termkey_init (tk, term, encoding))
 	{
 		free (tk);
 		return NULL;
@ -466,13 +453,17 @@ termkey_free (termkey_t *tk)
 	free (tk->buffer);   tk->buffer   = NULL;
 	free (tk->keynames); tk->keynames = NULL;

-	termkey_driver_node_t *p;
-	for (p = tk->drivers; p; )
+	iconv_close (tk->to_utf32_conv);
+	tk->to_utf32_conv = (iconv_t) -1;
+	iconv_close (tk->from_utf32_conv);
+	tk->from_utf32_conv = (iconv_t) -1;
+
+	termkey_driver_node_t *p, *next;
+	for (p = tk->drivers; p; p = next)
 	{
 		(*p->driver->free_driver) (p->info);
-		termkey_driver_node_t *next = p->next;
+		next = p->next;
 		free (p);
-		p = next;
 	}
 	free (tk);
 }
@ -498,7 +489,7 @@ termkey_start (termkey_t *tk)
 		if (tcgetattr (tk->fd, &termios) == 0)
 		{
 			tk->restore_termios = termios;
-			tk->restore_termios_valid = 1;
+			tk->restore_termios_valid = true;

 			termios.c_iflag &= ~(IXON|INLCR|ICRNL);
 			termios.c_lflag &= ~(ICANON|ECHO);
@ -554,7 +545,7 @@ termkey_stop (termkey_t *tk)
 	if (tk->restore_termios_valid)
 		tcsetattr (tk->fd, TCSANOW, &tk->restore_termios);

-	tk->is_started = 0;
+	tk->is_started = false;
 	return 1;
 }

@ -608,7 +599,7 @@ void
 termkey_set_canonflags (termkey_t *tk, int flags)
 {
 	tk->canonflags = flags;
-	if(tk->canonflags & TERMKEY_CANON_SPACESYMBOL)
+	if (tk->canonflags & TERMKEY_CANON_SPACESYMBOL)
 		tk->flags |= TERMKEY_FLAG_SPACESYMBOL;
 	else
 		tk->flags &= ~TERMKEY_FLAG_SPACESYMBOL;
@ -635,8 +626,8 @@ termkey_set_buffer_size (termkey_t *tk, size_t size)
 size_t
 termkey_get_buffer_remaining (termkey_t *tk)
 {
-	/* Return the total number of free bytes in the buffer, because that's what
- 	* is available to the user. */
+	/* Return the total number of free bytes in the buffer,
+	 * because that's what is available to the user. */
 	return tk->buffsize - tk->buffcount;
 }

@ -654,129 +645,76 @@ eat_bytes (termkey_t *tk, size_t count)
 	tk->buffcount -= count;
 }

-static inline unsigned int
-utf8_seqlen (long codepoint)
-{
-	if (codepoint < 0x0000080) return 1;
-	if (codepoint < 0x0000800) return 2;
-	if (codepoint < 0x0010000) return 3;
-	if (codepoint < 0x0200000) return 4;
-	if (codepoint < 0x4000000) return 5;
-	return 6;
-}
+#define MULTIBYTE_INVALID '?'

 static void
-fill_utf8 (termkey_key_t *key)
+fill_multibyte (termkey_t *tk, termkey_key_t *key)
 {
-	long codepoint = key->code.codepoint;
-	int nbytes = utf8_seqlen (codepoint);
+	size_t codepoint_len = sizeof key->code.codepoint;
+	char *codepoint_ptr = (char *) &key->code.codepoint;
+	size_t multibyte_len = sizeof key->multibyte;
+	char *multibyte_ptr = (char *) key->multibyte;

-	key->utf8[nbytes] = 0;
+	size_t result = iconv (tk->from_utf32_conv,
+		&codepoint_ptr, &codepoint_len, &multibyte_ptr, &multibyte_len);
+	size_t output = sizeof key->multibyte - multibyte_len;

-	// This is easier done backwards
-	int b = nbytes;
-	while (b > 1)
+	// Something broke
+	if (result == (size_t) -1 || output == 0)
 	{
-		b--;
-		key->utf8[b] = 0x80 | (codepoint & 0x3f);
-		codepoint >>= 6;
+		key->multibyte[0] = MULTIBYTE_INVALID;
+		key->multibyte[1] = 0;
+		return;
 	}

-	switch (nbytes)
-	{
-		case 1: key->utf8[0] =        (codepoint & 0x7f); break;
-		case 2: key->utf8[0] = 0xc0 | (codepoint & 0x1f); break;
-		case 3: key->utf8[0] = 0xe0 | (codepoint & 0x0f); break;
-		case 4: key->utf8[0] = 0xf0 | (codepoint & 0x07); break;
-		case 5: key->utf8[0] = 0xf8 | (codepoint & 0x03); break;
-		case 6: key->utf8[0] = 0xfc | (codepoint & 0x01); break;
-	}
+	// Append a null character, as it wasn't port of the input
+	key->multibyte[output] = 0;
 }

-#define UTF8_INVALID 0xFFFD
 static termkey_result_t
-parse_utf8 (const unsigned char *bytes, size_t len, long *cp, size_t *nbytep)
+parse_multibyte (termkey_t *tk, const unsigned char *bytes, size_t len,
+	uint32_t *cp, size_t *nbytep)
 {
-	unsigned int nbytes;
-	unsigned char b0 = bytes[0];
+	size_t multibyte_len = len;
+	char *multibyte_ptr = (char *) bytes;
+	size_t codepoint_len = sizeof *cp;
+	char *codepoint_ptr = (char *) cp;

-	if (b0 < 0x80)
-	{
-		// Single byte ASCII
-		*cp = b0;
-		*nbytep = 1;
-		return TERMKEY_RES_KEY;
-	}
-	else if (b0 < 0xc0)
-	{
-		// Starts with a continuation byte - that's not right
-		*cp = UTF8_INVALID;
-		*nbytep = 1;
-		return TERMKEY_RES_KEY;
-	}
-	else if (b0 < 0xe0)
-	{
-		nbytes = 2;
-		*cp = b0 & 0x1f;
-	}
-	else if (b0 < 0xf0)
-	{
-		nbytes = 3;
-		*cp = b0 & 0x0f;
-	}
-	else if (b0 < 0xf8)
-	{
-		nbytes = 4;
-		*cp = b0 & 0x07;
-	}
-	else if (b0 < 0xfc)
-	{
-		nbytes = 5;
-		*cp = b0 & 0x03;
-	}
-	else if (b0 < 0xfe)
-	{
-		nbytes = 6;
-		*cp = b0 & 0x01;
-	}
-	else
-	{
-		*cp = UTF8_INVALID;
-		*nbytep = 1;
-		return TERMKEY_RES_KEY;
-	}
+	// Fingers crossed...
+	errno = 0;
+	iconv (tk->to_utf32_conv,
+		&multibyte_ptr, &multibyte_len, &codepoint_ptr, &codepoint_len);

-	for (unsigned int b = 1; b < nbytes; b++)
+	// Only one Unicode character could have been processed at maximum,
+	// so let's just set the number of processed bytes to the difference
+	*nbytep = len - multibyte_len;
+
+	// Nothing has been converted, let's examine what happened
+	if (codepoint_ptr == (char *) cp)
 	{
-		if (b >= len)
+		if (errno == 0)
+			// The input was probably a shift sequence
 			return TERMKEY_RES_AGAIN;
-
-		unsigned char cb = bytes[b];
-		if (cb < 0x80 || cb >= 0xc0)
+		if (errno == EINVAL)
+			// Incomplete character or shift sequence
+			return TERMKEY_RES_AGAIN;
+		if (errno == EILSEQ)
 		{
-			*cp = UTF8_INVALID;
-			*nbytep = b;
+			// Invalid multibyte sequence in the input, let's try going
+			// byte after byte in hope we skip it completely
+			*cp = MULTIBYTE_INVALID;
+			*nbytep = 1;
 			return TERMKEY_RES_KEY;
 		}

-		*cp <<= 6;
-		*cp |= cb & 0x3f;
+		// We can't really get E2BIG so what the fuck is going on here
+		abort ();
 	}
-
-	// Check for overlong sequences
-	if (nbytes > utf8_seqlen (*cp))
-		*cp = UTF8_INVALID;
-
-	// Check for UTF-16 surrogates or invalid *cps
-	if ((*cp >= 0xD800 && *cp <= 0xDFFF) || *cp == 0xFFFE || *cp == 0xFFFF)
-		*cp = UTF8_INVALID;
-
-	*nbytep = nbytes;
 	return TERMKEY_RES_KEY;
 }

 static void
-emit_codepoint (termkey_t *tk, long codepoint, termkey_key_t *key)
+emit_codepoint (termkey_t *tk, uint32_t codepoint, termkey_key_t *key)
 {
 	if (codepoint < 0x20)
 	{
@ -793,14 +731,15 @@ emit_codepoint (termkey_t *tk, long codepoint, termkey_key_t *key)

 		if (!key->code.sym)
 		{
-			key->type = TERMKEY_TYPE_UNICODE;
-			/* Generically modified Unicode ought not report the SHIFT state, or else
-			 * we get into complicationg trying to report Shift-; vs : and so on...
-			 * In order to be able to represent Ctrl-Shift-A as CTRL modified
-			 * unicode A, we need to call Ctrl-A simply 'a', lowercase
+			key->type = TERMKEY_TYPE_KEY;
+			/* Generically modified Unicode ought not report the SHIFT state,
+			 * or else we get into complications trying to report Shift-; vs :
+			 * and so on...  In order to be able to represent Ctrl-Shift-A as
+			 * CTRL modified unicode A, we need to call Ctrl-A simply 'a',
+			 * lowercase
 			 */
 			if (codepoint + 0x40 >= 'A' && codepoint + 0x40 <= 'Z')
-				// it's a letter - use lowecase instead
+				// It's a letter - use lowecase instead
 				key->code.codepoint = codepoint + 0x60;
 			else
 				key->code.codepoint = codepoint + 0x40;
@ -816,32 +755,17 @@ emit_codepoint (termkey_t *tk, long codepoint, termkey_key_t *key)
 		key->code.sym = TERMKEY_SYM_DEL;
 		key->modifiers = 0;
 	}
-	else if (codepoint >= 0x20 && codepoint < 0x80)
-	{
-		// ASCII lowbyte range
-		key->type = TERMKEY_TYPE_UNICODE;
-		key->code.codepoint = codepoint;
-		key->modifiers = 0;
-	}
-	else if (codepoint >= 0x80 && codepoint < 0xa0)
-	{
-		// UTF-8 never starts with a C1 byte. So we can be sure of these
-		key->type = TERMKEY_TYPE_UNICODE;
-		key->code.codepoint = codepoint - 0x40;
-		key->modifiers = TERMKEY_KEYMOD_CTRL | TERMKEY_KEYMOD_ALT;
-	}
 	else
 	{
-		// UTF-8 codepoint
-		key->type = TERMKEY_TYPE_UNICODE;
+		key->type = TERMKEY_TYPE_KEY;
 		key->code.codepoint = codepoint;
 		key->modifiers = 0;
 	}

 	termkey_canonicalise (tk, key);

-	if (key->type == TERMKEY_TYPE_UNICODE)
-		fill_utf8 (key);
+	if (key->type == TERMKEY_TYPE_KEY)
+		fill_multibyte (tk, key);
 }

 void
@ -851,7 +775,7 @@ termkey_canonicalise (termkey_t *tk, termkey_key_t *key)

 	if (flags & TERMKEY_CANON_SPACESYMBOL)
 	{
-		if (key->type == TERMKEY_TYPE_UNICODE && key->code.number == 0x20)
+		if (key->type == TERMKEY_TYPE_KEY && key->code.codepoint == 0x20)
 		{
 			key->type = TERMKEY_TYPE_KEYSYM;
 			key->code.sym = TERMKEY_SYM_SPACE;
@ -862,9 +786,9 @@ termkey_canonicalise (termkey_t *tk, termkey_key_t *key)
 		if (key->type == TERMKEY_TYPE_KEYSYM
 		 && key->code.sym == TERMKEY_SYM_SPACE)
 		{
-			key->type = TERMKEY_TYPE_UNICODE;
-			key->code.number = 0x20;
-			fill_utf8 (key);
+			key->type = TERMKEY_TYPE_KEY;
+			key->code.codepoint = 0x20;
+			fill_multibyte (tk, key);
 		}
 	}

@ -962,14 +886,14 @@ peekkey_simple (termkey_t *tk, termkey_key_t *key, int force, size_t *nbytep)
 	if (tk->buffcount == 0)
 		return tk->is_closed ? TERMKEY_RES_EOF : TERMKEY_RES_NONE;

-	unsigned char b0 = CHARAT(0);
+	unsigned char b0 = CHARAT (0);
 	if (b0 == 0x1b)
 	{
 		// Escape-prefixed value? Might therefore be Alt+key
 		if (tk->buffcount == 1)
 		{
-			// This might be an <Esc> press, or it may want to be part of a longer
-			// sequence
+			// This might be an <Esc> press, or it may want to be part
+			// of a longer sequence
 			if (!force)
 				return TERMKEY_RES_AGAIN;

@ -1004,46 +928,38 @@ peekkey_simple (termkey_t *tk, termkey_key_t *key, int force, size_t *nbytep)

 		return metakey_result;
 	}
-	else if (b0 < 0xa0)
+	else if (!(tk->flags & TERMKEY_FLAG_RAW))
 	{
-		// Single byte C0, G0 or C1 - C1 is never UTF-8 initial byte
-		(*tk->method.emit_codepoint) (tk, b0, key);
-		*nbytep = 1;
-		return TERMKEY_RES_KEY;
-	}
-	else if (tk->flags & TERMKEY_FLAG_UTF8)
-	{
-		// Some UTF-8
-		long codepoint;
-		termkey_result_t res = parse_utf8
-			(tk->buffer + tk->buffstart, tk->buffcount, &codepoint, nbytep);
+		uint32_t codepoint;
+		termkey_result_t res = parse_multibyte
+			(tk, tk->buffer + tk->buffstart, tk->buffcount, &codepoint, nbytep);

 		if (res == TERMKEY_RES_AGAIN && force)
 		{
-			/* There weren't enough bytes for a complete UTF-8 sequence but caller
- 			* demands an answer. About the best thing we can do here is eat as many
- 			* bytes as we have, and emit a UTF8_INVALID. If the remaining bytes
- 			* arrive later, they'll be invalid too.
- 			*/
-			codepoint = UTF8_INVALID;
+			/* There weren't enough bytes for a complete character but
+			 * caller demands an answer.  About the best thing we can do here
+			 * is eat as many bytes as we have, and emit a MULTIBYTE_INVALID.
+			 * If the remaining bytes arrive later, they'll be invalid too.
+			 */
+			codepoint = MULTIBYTE_INVALID;
 			*nbytep = tk->buffcount;
 			res = TERMKEY_RES_KEY;
 		}

-		key->type = TERMKEY_TYPE_UNICODE;
+		key->type = TERMKEY_TYPE_KEY;
 		key->modifiers = 0;
 		(*tk->method.emit_codepoint) (tk, codepoint, key);
 		return res;
 	}
 	else
 	{
-		// Non UTF-8 case - just report the raw byte
-		key->type = TERMKEY_TYPE_UNICODE;
+		// Non multibyte case - just report the raw byte
+		key->type = TERMKEY_TYPE_KEY;
 		key->code.codepoint = b0;
 		key->modifiers = 0;

-		key->utf8[0] = key->code.codepoint;
-		key->utf8[1] = 0;
+		key->multibyte[0] = b0;
+		key->multibyte[1] = 0;

 		*nbytep = 1;
 		return TERMKEY_RES_KEY;
@ -1078,7 +994,7 @@ termkey_getkey (termkey_t *tk, termkey_key_t *key)
 	if (ret == TERMKEY_RES_KEY)
 		eat_bytes (tk, nbytes);

-	if(ret == TERMKEY_RES_AGAIN)
+	if (ret == TERMKEY_RES_AGAIN)
 		/* Call peekkey() again in force mode to obtain whatever it can */
 		(void) peekkey (tk, key, 1, &nbytes);
 		/* Don't eat it yet though */
@ -1145,7 +1061,7 @@ retry:
 				return TERMKEY_RES_ERROR;
 			}

-			if (fd.revents & (POLLIN|POLLHUP|POLLERR))
+			if (fd.revents & (POLLIN | POLLHUP | POLLERR))
 				ret = termkey_advisereadable (tk);
 			else
 				ret = TERMKEY_RES_NONE;
@ -1198,7 +1114,7 @@ retry:
 	}
 	if (len < 1)
 	{
-		tk->is_closed = 1;
+		tk->is_closed = true;
 		return TERMKEY_RES_NONE;
 	}
 	tk->buffcount += len;
@ -1363,15 +1279,15 @@ termkey_strfkey (termkey_t *tk, char *buffer, size_t len,
 		!!(format & TERMKEY_FORMAT_LOWERMOD) * 4];

 	int wrapbracket = (format & TERMKEY_FORMAT_WRAPBRACKET) &&
-		(key->type != TERMKEY_TYPE_UNICODE || key->modifiers != 0);
+		(key->type != TERMKEY_TYPE_KEY || key->modifiers != 0);

 	char sep = (format & TERMKEY_FORMAT_SPACEMOD) ? ' ' : '-';

 	if (format & TERMKEY_FORMAT_CARETCTRL &&
- 		key->type == TERMKEY_TYPE_UNICODE &&
+		key->type == TERMKEY_TYPE_KEY &&
 		key->modifiers == TERMKEY_KEYMOD_CTRL)
 	{
-		long codepoint = key->code.codepoint;
+		uint32_t codepoint = key->code.codepoint;

 		// Handle some of the special casesfirst
 		if (codepoint >= 'a' && codepoint <= 'z')
@ -1406,7 +1322,7 @@ termkey_strfkey (termkey_t *tk, char *buffer, size_t len,
 	if (key->modifiers & TERMKEY_KEYMOD_ALT)
 	{
 		l = snprintf (buffer + pos, len - pos, "%s%c", mods->alt, sep);
-		if(l <= 0)
+		if (l <= 0)
 			return pos;
 		pos += l;
 	}
@ -1427,10 +1343,10 @@ termkey_strfkey (termkey_t *tk, char *buffer, size_t len,

 	switch (key->type)
 	{
-	case TERMKEY_TYPE_UNICODE:
-		if (!key->utf8[0]) // In case of user-supplied key structures
-			fill_utf8 (key);
-		l = snprintf (buffer + pos, len - pos, "%s", key->utf8);
+	case TERMKEY_TYPE_KEY:
+		if (!key->multibyte[0]) // In case of user-supplied key structures
+			fill_multibyte (tk, key);
+		l = snprintf (buffer + pos, len - pos, "%s", key->multibyte);
 		break;
 	case TERMKEY_TYPE_KEYSYM:
 	{
@ -1454,7 +1370,7 @@ termkey_strfkey (termkey_t *tk, char *buffer, size_t len,

 		static const char *evnames[] =
 			{ "Unknown", "Press", "Drag", "Release" };
-		l = snprintf(buffer + pos, len - pos,
+		l = snprintf (buffer + pos, len - pos,
 			"Mouse%s(%d)", evnames[ev], button);
 		if (format & TERMKEY_FORMAT_MOUSE_POS)
 		{
@ -1478,7 +1394,7 @@ termkey_strfkey (termkey_t *tk, char *buffer, size_t len,
 		else
 			l = snprintf (buffer + pos, len - pos,
 				"Mode(%d=%d)", mode, value);
-		// XXX: should this fall through?
+		break;
 	}
 	case TERMKEY_TYPE_UNKNOWN_CSI:
 		l = snprintf (buffer + pos, len - pos,
@ -1514,19 +1430,20 @@ termkey_strpkey (termkey_t *tk,
 	if ((format & TERMKEY_FORMAT_CARETCTRL) && str[0] == '^' && str[1])
 	{
 		str = termkey_strpkey (tk,
-			str+1, key, format & ~TERMKEY_FORMAT_CARETCTRL);
+			str + 1, key, format & ~TERMKEY_FORMAT_CARETCTRL);

 		if (!str
- 		 || key->type != TERMKEY_TYPE_UNICODE
+		 || key->type != TERMKEY_TYPE_KEY
 		 || key->code.codepoint < '@'
 		 || key->code.codepoint > '_'
 		 || key->modifiers != 0)
 			return NULL;

-		if (key->code.codepoint >= 'A' && key->code.codepoint <= 'Z')
+		if (key->code.codepoint >= 'A'
+		 && key->code.codepoint <= 'Z')
 			key->code.codepoint += 0x20;
 		key->modifiers = TERMKEY_KEYMOD_CTRL;
-		fill_utf8 (key);
+		fill_multibyte (tk, key);
 		return (char *) str;
 	}

@ -1535,7 +1452,6 @@ termkey_strpkey (termkey_t *tk,
 		(format & TERMKEY_FORMAT_SPACEMOD) ? ' ' : '-')))
 	{
 		size_t n = sep_at - str;
-
 		if (n == strlen (mods->alt) && !strncmp (mods->alt, str, n))
 			key->modifiers |= TERMKEY_KEYMOD_ALT;
 		else if (n == strlen (mods->ctrl) && !strncmp (mods->ctrl, str, n))
@ -1558,18 +1474,17 @@ termkey_strpkey (termkey_t *tk,
 		key->type = TERMKEY_TYPE_KEYSYM;
 		str = endstr;
 	}
-	// FIXME: sscanf
 	else if (sscanf(str, "F%d%zn", &key->code.number, &snbytes) == 1)
 	{
 		key->type = TERMKEY_TYPE_FUNCTION;
 		str += snbytes;
 	}
-	// Unicode must be last
-	else if (parse_utf8 ((unsigned const char *) str, strlen (str),
+	// Multibyte must be last
+	else if (parse_multibyte (tk, (unsigned const char *) str, strlen (str),
 		&key->code.codepoint, &nbytes) == TERMKEY_RES_KEY)
 	{
-		key->type = TERMKEY_TYPE_UNICODE;
-		fill_utf8 (key);
+		key->type = TERMKEY_TYPE_KEY;
+		fill_multibyte (tk, key);
 		str += nbytes;
 	}
 	// TODO: Consider mouse events?
@ -1595,7 +1510,7 @@ termkey_keycmp (termkey_t *tk,

 	switch (key1.type)
 	{
-	case TERMKEY_TYPE_UNICODE:
+	case TERMKEY_TYPE_KEY:
 		if (key1.code.codepoint != key2.code.codepoint)
 			return key1.code.codepoint - key2.code.codepoint;
 		break;
@ -1636,7 +1551,6 @@ termkey_keycmp (termkey_t *tk,
 		return value1 - value2;
 	}
 	}
-
 	return key1.modifiers - key2.modifiers;
 }

--- a/termkey.h.in
+++ b/termkey.h.in
@ -92,7 +92,7 @@ enum termkey_sym
 typedef enum termkey_type termkey_type_t;
 enum termkey_type
 {
-	TERMKEY_TYPE_UNICODE,
+	TERMKEY_TYPE_KEY,
 	TERMKEY_TYPE_FUNCTION,
 	TERMKEY_TYPE_KEYSYM,
 	TERMKEY_TYPE_MOUSE,
@ -135,7 +135,7 @@ struct termkey_key
 	termkey_type_t type;
 	union
 	{
-		long          codepoint; /* TERMKEY_TYPE_UNICODE */
+		uint32_t      codepoint; /* TERMKEY_TYPE_KEY */
 		int           number;    /* TERMKEY_TYPE_FUNCTION */
 		termkey_sym_t sym;       /* TERMKEY_TYPE_KEYSYM */
 		char          mouse[4];  /* TERMKEY_TYPE_MOUSE */
@ -145,21 +145,27 @@ struct termkey_key
 	int modifiers;

 	/* The raw multibyte sequence for the key */
-	char utf8[MB_LEN_MAX + 1];
+	char multibyte[MB_LEN_MAX + 1];
 };

 typedef struct termkey termkey_t;

 enum
 {
-	TERMKEY_FLAG_NOINTERPRET = 1 << 0, /* Do not interpret C0//DEL codes if possible */
-	TERMKEY_FLAG_CONVERTKP   = 1 << 1, /* Convert KP codes to regular keypresses */
-	TERMKEY_FLAG_RAW         = 1 << 2, /* Input is raw bytes, not UTF-8 */
-	TERMKEY_FLAG_UTF8        = 1 << 3, /* Input is definitely UTF-8 */
-	TERMKEY_FLAG_NOTERMIOS   = 1 << 4, /* Do not make initial termios calls on construction */
-	TERMKEY_FLAG_SPACESYMBOL = 1 << 5, /* Sets TERMKEY_CANON_SPACESYMBOL */
-	TERMKEY_FLAG_CTRLC       = 1 << 6, /* Allow Ctrl-C to be read as normal, disabling SIGINT */
-	TERMKEY_FLAG_EINTR       = 1 << 7  /* Return ERROR on signal (EINTR) rather than retry */
+	/* Do not interpret C0//DEL codes if possible */
+	TERMKEY_FLAG_NOINTERPRET = 1 << 0,
+	/* Convert KP codes to regular keypresses */
+	TERMKEY_FLAG_CONVERTKP   = 1 << 1,
+	/* Don't try to decode the input characters */
+	TERMKEY_FLAG_RAW         = 1 << 2,
+	/* Do not make initial termios calls on construction */
+	TERMKEY_FLAG_NOTERMIOS   = 1 << 4,
+	/* Sets TERMKEY_CANON_SPACESYMBOL */
+	TERMKEY_FLAG_SPACESYMBOL = 1 << 5,
+	/* Allow Ctrl-C to be read as normal, disabling SIGINT */
+	TERMKEY_FLAG_CTRLC       = 1 << 6,
+	/* Return ERROR on signal (EINTR) rather than retry */
+	TERMKEY_FLAG_EINTR       = 1 << 7
 };

 enum
@ -170,8 +176,9 @@ enum

 void termkey_check_version (int major, int minor);

-termkey_t *termkey_new (int fd, int flags);
-termkey_t *termkey_new_abstract (const char *term, int flags);
+termkey_t *termkey_new (int fd, const char *encoding, int flags);
+termkey_t *termkey_new_abstract (const char *term,
+	const char *encoding, int flags);
 void termkey_free (termkey_t *tk);
 void termkey_destroy (termkey_t *tk);

@ -226,15 +233,22 @@ termkey_result_t termkey_interpret_csi (termkey_t *tk,
 typedef enum termkey_format termkey_format_t;
 enum termkey_format
 {
-	TERMKEY_FORMAT_LONGMOD     = 1 << 0, /* Shift-... instead of S-... */
-	TERMKEY_FORMAT_CARETCTRL   = 1 << 1, /* ^X instead of C-X */
-	TERMKEY_FORMAT_ALTISMETA   = 1 << 2, /* Meta- or M- instead of Alt- or A- */
-	TERMKEY_FORMAT_WRAPBRACKET = 1 << 3, /* Wrap special keys in brackets like <Escape> */
-	TERMKEY_FORMAT_SPACEMOD    = 1 << 4, /* M Foo instead of M-Foo */
-	TERMKEY_FORMAT_LOWERMOD    = 1 << 5, /* meta or m instead of Meta or M */
-	TERMKEY_FORMAT_LOWERSPACE  = 1 << 6, /* page down instead of PageDown */
-
-	TERMKEY_FORMAT_MOUSE_POS   = 1 << 8  /* Include mouse position if relevant; @ col,line */
+	/* Shift-... instead of S-... */
+	TERMKEY_FORMAT_LONGMOD     = 1 << 0,
+	/* ^X instead of C-X */
+	TERMKEY_FORMAT_CARETCTRL   = 1 << 1,
+	/* Meta- or M- instead of Alt- or A- */
+	TERMKEY_FORMAT_ALTISMETA   = 1 << 2,
+	/* Wrap special keys in brackets like <Escape> */
+	TERMKEY_FORMAT_WRAPBRACKET = 1 << 3,
+	/* M Foo instead of M-Foo */
+	TERMKEY_FORMAT_SPACEMOD    = 1 << 4,
+	/* meta or m instead of Meta or M */
+	TERMKEY_FORMAT_LOWERMOD    = 1 << 5,
+	/* page down instead of PageDown */
+	TERMKEY_FORMAT_LOWERSPACE  = 1 << 6,
+	/* Include mouse position if relevant; @ col,line */
+	TERMKEY_FORMAT_MOUSE_POS   = 1 << 8
 };

 /* Some useful combinations */