Highlight the longest matching prefix of entries

This commit is contained in:
Přemysl Eric Janouch 2016-09-26 15:59:26 +02:00
parent a87aca9c76
commit a591041912
Signed by: p
GPG Key ID: B715679E3A361BE6
3 changed files with 97 additions and 3 deletions

View File

@ -606,6 +606,9 @@ app_redraw_view (Application *self)
move (TOP_BAR_CUTOFF, 0); move (TOP_BAR_CUTOFF, 0);
gchar *input_utf8 = g_ucs4_to_utf8
((gunichar *) self->input->data, -1, NULL, NULL, NULL);
guint i, k = self->top_offset, shown = 0; guint i, k = self->top_offset, shown = 0;
for (i = 0; i < self->entries->len; i++) for (i = 0; i < self->entries->len; i++)
{ {
@ -617,9 +620,27 @@ app_redraw_view (Application *self)
if (k + 1 == ve->definitions_length) attrs |= A_UNDERLINE; if (k + 1 == ve->definitions_length) attrs |= A_UNDERLINE;
attrset (attrs); attrset (attrs);
guint left_width = app_get_left_column_width (self); RowBuffer buf;
app_add_utf8_string (self, ve->word, 0, left_width); row_buffer_init (&buf, self);
addstr (" ");
size_t common = stardict_longest_common_collation_prefix
(self->dict, ve->word, input_utf8);
gchar *prefix = g_strndup (ve->word, common);
row_buffer_append (&buf, prefix, A_BOLD);
g_free (prefix);
row_buffer_append (&buf, ve->word + common, 0);
gint left_width = app_get_left_column_width (self);
if (buf.total_width > left_width)
row_buffer_ellipsis (&buf, left_width, attrs);
row_buffer_flush (&buf);
for (int i = buf.total_width; i < left_width + 1; i++)
addch (' ');
row_buffer_free (&buf);
app_add_utf8_string (self, app_add_utf8_string (self,
ve->definitions[k], 0, COLS - left_width - 1); ve->definitions[k], 0, COLS - left_width - 1);
@ -631,6 +652,8 @@ app_redraw_view (Application *self)
} }
done: done:
free (input_utf8);
attrset (0); attrset (0);
clrtobot (); clrtobot ();
refresh (); refresh ();

View File

@ -29,6 +29,7 @@
#include <unicode/ucol.h> #include <unicode/ucol.h>
#include <unicode/ustring.h> #include <unicode/ustring.h>
#include <unicode/ubrk.h>
#include "stardict.h" #include "stardict.h"
#include "stardict-private.h" #include "stardict-private.h"
@ -934,6 +935,73 @@ stardict_dict_search (StardictDict *sd, const gchar *word, gboolean *success)
return stardict_iterator_new (sd, imin); return stardict_iterator_new (sd, imin);
} }
/// Return the longest sequence of bytes from @a s1 that form a common prefix
/// with @a s2 wrt. collation rules for this dictionary.
size_t
stardict_longest_common_collation_prefix (StardictDict *sd,
const gchar *s1, const gchar *s2)
{
UErrorCode error;
int32_t uc1_len = 0;
int32_t uc2_len = 0;
// It sets the error to overflow each time, even during pre-flight
error = U_ZERO_ERROR;
u_strFromUTF8 (NULL, 0, &uc1_len, s1, -1, &error);
error = U_ZERO_ERROR;
u_strFromUTF8 (NULL, 0, &uc2_len, s2, -1, &error);
error = U_ZERO_ERROR;
UChar uc1[uc1_len];
UChar uc2[uc2_len];
u_strFromUTF8 (uc1, uc1_len, NULL, s1, -1, &error);
u_strFromUTF8 (uc2, uc2_len, NULL, s2, -1, &error);
// Both inputs need to be valid UTF-8 because of all the iteration mess
if (U_FAILURE (error))
return 0;
// ucol_getSortKey() can't be used for these purposes, so the only
// reasonable thing remaining is iterating by full graphemes. It doesn't
// work entirely correctly (e.g. Czech "ch" should be regarded as a single
// unit, and punctuation could be ignored). It's just good enough.
//
// In theory we could set the strength to UCOL_PRIMARY and ignore accents
// but that's likely not what the user wants most of the time.
//
// Locale shouldn't matter much with graphemes, let's use the default.
UBreakIterator *it1 =
ubrk_open (UBRK_CHARACTER, NULL, uc1, uc1_len, &error);
UBreakIterator *it2 =
ubrk_open (UBRK_CHARACTER, NULL, uc2, uc2_len, &error);
int32_t longest = 0;
int32_t pos1, pos2;
while ((pos1 = ubrk_next (it1)) != UBRK_DONE
&& (pos2 = ubrk_next (it2)) != UBRK_DONE)
{
if (!ucol_strcoll (sd->priv->collator, uc1, pos1, uc2, pos2))
longest = pos1;
}
ubrk_close (it1);
ubrk_close (it2);
if (!longest)
return 0;
int32_t common_len = 0;
u_strToUTF8 (NULL, 0, &common_len, uc1, longest, &error);
// Since this heavily depends on UTF-16 <-> UTF-8 not modifying the chars
// (surrogate pairs interference?), let's add some paranoia here
char common[common_len];
error = U_ZERO_ERROR;
u_strToUTF8 (common, common_len, NULL, uc1, longest, &error);
g_return_val_if_fail (!memcmp (s1, common, common_len), 0);
return (size_t) common_len;
}
static void static void
stardict_entry_field_free (StardictEntryField *sef) stardict_entry_field_free (StardictEntryField *sef)
{ {

View File

@ -138,6 +138,9 @@ gchar **stardict_dict_get_synonyms (StardictDict *sd, const gchar *word);
StardictIterator *stardict_dict_search StardictIterator *stardict_dict_search
(StardictDict *sd, const gchar *word, gboolean *success); (StardictDict *sd, const gchar *word, gboolean *success);
size_t stardict_longest_common_collation_prefix
(StardictDict *sd, const gchar *w1, const gchar *w2);
// --- Dictionary iterators ---------------------------------------------------- // --- Dictionary iterators ----------------------------------------------------
struct stardict_iterator struct stardict_iterator