From 627c296057a91e73d3cd1631caa1e61ad4f2d124 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C5=99emysl=20Eric=20Janouch?= Date: Tue, 12 Oct 2021 01:26:06 +0200 Subject: [PATCH] query-tool: support more field types Add options to format the output for the terminal, or IRC messages. Changed the output format to separate dictionary name with a tab, so it's now rather similar to tabfiles. --- src/query-tool.c | 227 ++++++++++++++++++++++++++++++++++++++--------- src/sdtui.c | 20 +---- src/stardict.c | 2 +- src/utils.c | 21 +++++ src/utils.h | 2 + 5 files changed, 210 insertions(+), 62 deletions(-) diff --git a/src/query-tool.c b/src/query-tool.c index 63817ba..825bada 100644 --- a/src/query-tool.c +++ b/src/query-tool.c @@ -1,14 +1,14 @@ /* * A tool to query multiple dictionaries for the specified word * - * Intended for use in IRC bots and similar silly things---words go in, one - * on a line, and entries come out, one dictionary at a time, finalised with - * an empty line. Newlines are escaped with `\n', backslashes with `\\'. + * Intended for use in IRC bots and similar silly things---words go in, + * one per each line, and entries come out, one dictionary at a time, + * finalised with an empty line. Newlines are escaped with `\n', + * backslashes with `\\'. * - * So far only the `m' field is supported. Feel free to extend the program - * according to your needs, it's not very complicated. + * So far only the `m', `g`, and `x` fields are supported, as in sdtui. * - * Copyright (c) 2013, Přemysl Eric Janouch + * Copyright (c) 2013 - 2021, Přemysl Eric Janouch * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted. @@ -30,10 +30,124 @@ #include #include +#include #include "stardict.h" #include "stardict-private.h" #include "generator.h" +#include "utils.h" + + +// --- Output formatting ------------------------------------------------------- + +/// Transform Pango attributes to in-line formatting sequences (non-reentrant) +typedef const gchar *(*FormatterFunc) (PangoAttrIterator *); + +static const gchar * +pango_attrs_ignore (G_GNUC_UNUSED PangoAttrIterator *iterator) +{ + return ""; +} + +static const gchar * +pango_attrs_to_irc (PangoAttrIterator *iterator) +{ + static gchar buf[5]; + gchar *p = buf; + *p++ = 0x0f; + + if (!iterator) + goto reset_formatting; + + PangoAttrInt *attr = NULL; + if ((attr = (PangoAttrInt *) pango_attr_iterator_get (iterator, + PANGO_ATTR_WEIGHT)) && attr->value >= PANGO_WEIGHT_BOLD) + *p++ = 0x02; + if ((attr = (PangoAttrInt *) pango_attr_iterator_get (iterator, + PANGO_ATTR_UNDERLINE)) && attr->value == PANGO_UNDERLINE_SINGLE) + *p++ = 0x1f; + if ((attr = (PangoAttrInt *) pango_attr_iterator_get (iterator, + PANGO_ATTR_STYLE)) && attr->value == PANGO_STYLE_ITALIC) + *p++ = 0x1d; + +reset_formatting: + *p++ = 0; + return buf; +} + +static const gchar * +pango_attrs_to_ansi (PangoAttrIterator *iterator) +{ + static gchar buf[16]; + g_strlcpy (buf, "\x1b[0", sizeof buf); + if (!iterator) + goto reset_formatting; + + PangoAttrInt *attr = NULL; + if ((attr = (PangoAttrInt *) pango_attr_iterator_get (iterator, + PANGO_ATTR_WEIGHT)) && attr->value >= PANGO_WEIGHT_BOLD) + g_strlcat (buf, ";1", sizeof buf); + if ((attr = (PangoAttrInt *) pango_attr_iterator_get (iterator, + PANGO_ATTR_UNDERLINE)) && attr->value == PANGO_UNDERLINE_SINGLE) + g_strlcat (buf, ";4", sizeof buf); + if ((attr = (PangoAttrInt *) pango_attr_iterator_get (iterator, + PANGO_ATTR_STYLE)) && attr->value == PANGO_STYLE_ITALIC) + g_strlcat (buf, ";3", sizeof buf); + +reset_formatting: + g_strlcat (buf, "m", sizeof buf); + return buf; +} + +static gchar * +pango_to_output_text (const gchar *markup, FormatterFunc formatter) +{ + // This function skips leading whitespace, but it's the canonical one + gchar *text = NULL; + PangoAttrList *attrs = NULL; + if (!pango_parse_markup (markup, -1, 0, &attrs, &text, NULL, NULL)) + return g_strdup_printf ("<%s>", ("error in entry")); + + PangoAttrIterator *iterator = pango_attr_list_get_iterator (attrs); + GString *result = g_string_new (""); + do + { + gint start = 0, end = 0; + pango_attr_iterator_range (iterator, &start, &end); + if (end == G_MAXINT) + end = strlen (text); + + g_string_append (result, formatter (iterator)); + g_string_append_len (result, text + start, end - start); + } + while (pango_attr_iterator_next (iterator)); + g_string_append (result, formatter (NULL)); + + g_free (text); + pango_attr_iterator_destroy (iterator); + pango_attr_list_unref (attrs); + return g_string_free (result, FALSE); +} + +static gchar * +field_to_output_text (const StardictEntryField *field, FormatterFunc formatter) +{ + const gchar *definition = field->data; + if (field->type == STARDICT_FIELD_MEANING) + return g_strdup (definition); + if (field->type == STARDICT_FIELD_PANGO) + return pango_to_output_text (definition, formatter); + if (field->type == STARDICT_FIELD_XDXF) + { + gchar *markup = xdxf_to_pango_markup_with_reduced_effort (definition); + gchar *result = pango_to_output_text (markup, formatter); + g_free (markup); + return result; + } + return NULL; +} + +// --- Main -------------------------------------------------------------------- static guint count_equal_chars (const gchar *a, const gchar *b) @@ -46,15 +160,16 @@ count_equal_chars (const gchar *a, const gchar *b) } static void -do_dictionary (StardictDict *dict, const gchar *word) +do_dictionary (StardictDict *dict, const gchar *word, FormatterFunc formatter) { gboolean found; StardictIterator *iter = stardict_dict_search (dict, word, &found); if (!found) goto out; - // Default Stardict ordering is ASCII case-insensitive. - // Try to find a better matching entry based on letter case: + // Default Stardict ordering is ASCII case-insensitive, + // which may be further exacerbated by our own collation feature. + // Try to find a better matching entry: gint64 best_offset = stardict_iterator_get_offset (iter); guint best_score = count_equal_chars @@ -86,27 +201,67 @@ do_dictionary (StardictDict *dict, const gchar *word) for (; list; list = list->next) { StardictEntryField *field = list->data; - if (field->type == STARDICT_FIELD_MEANING) + gchar *definitions = field_to_output_text (field, formatter); + if (!definitions) + continue; + + printf ("%s\t", info->book_name); + for (const gchar *p = definitions; *p; p++) { - const gchar *desc = field->data; - printf ("%s:", info->book_name); - for (; *desc; desc++) - { - if (*desc == '\\') - printf ("\\\\"); - else if (*desc == '\n') - printf ("\\n"); - else - putchar (*desc); - } - putchar ('\n'); + if (*p == '\\') + printf ("\\\\"); + else if (*p == '\n') + printf ("\\n"); + else + putchar (*p); } + putchar ('\n'); + g_free (definitions); } g_object_unref (entry); out: g_object_unref (iter); } +static FormatterFunc +parse_options (int *argc, char ***argv) +{ + GError *error = NULL; + GOptionContext *ctx = g_option_context_new + ("DICTIONARY.ifo... - query multiple dictionaries"); + + gboolean format_with_ansi = FALSE; + gboolean format_with_irc = FALSE; + GOptionEntry entries[] = + { + { "ansi", 'a', 0, G_OPTION_ARG_NONE, &format_with_ansi, + "Format with ANSI sequences", NULL }, + { "irc", 'i', 0, G_OPTION_ARG_NONE, &format_with_irc, + "Format with IRC codes", NULL }, + { } + }; + + g_option_context_add_main_entries (ctx, entries, NULL); + if (!g_option_context_parse (ctx, argc, argv, &error)) + { + g_printerr ("Error: option parsing failed: %s\n", error->message); + exit (EXIT_FAILURE); + } + if (*argc < 2) + { + g_printerr ("%s\n", g_option_context_get_help (ctx, TRUE, NULL)); + exit (EXIT_FAILURE); + } + g_option_context_free (ctx); + + if (format_with_ansi) + return pango_attrs_to_ansi; + if (format_with_irc) + return pango_attrs_to_irc; + + return pango_attrs_ignore; +} + int main (int argc, char *argv[]) { @@ -115,21 +270,7 @@ G_GNUC_BEGIN_IGNORE_DEPRECATIONS g_type_init (); G_GNUC_END_IGNORE_DEPRECATIONS - GError *error = NULL; - GOptionContext *ctx = g_option_context_new - ("DICTIONARY.ifo... - query multiple dictionaries"); - if (!g_option_context_parse (ctx, &argc, &argv, &error)) - { - g_printerr ("Error: option parsing failed: %s\n", error->message); - exit (EXIT_FAILURE); - } - g_option_context_free (ctx); - - if (argc < 2) - { - g_printerr ("Error: no dictionaries given\n"); - exit (EXIT_FAILURE); - } + FormatterFunc formatter = parse_options (&argc, &argv); guint n_dicts = argc - 1; StardictDict **dicts = g_alloca (sizeof *dicts * n_dicts); @@ -137,6 +278,7 @@ G_GNUC_END_IGNORE_DEPRECATIONS guint i; for (i = 1; i <= n_dicts; i++) { + GError *error = NULL; dicts[i - 1] = stardict_dict_new (argv[i], &error); if (error) { @@ -146,26 +288,23 @@ G_GNUC_END_IGNORE_DEPRECATIONS } } - while (TRUE) + gint c; + do { GString *s = g_string_new (NULL); - - gint c; while ((c = getchar ()) != EOF && c != '\n') if (c != '\r') g_string_append_c (s, c); if (s->len) for (i = 0; i < n_dicts; i++) - do_dictionary (dicts[i], s->str); + do_dictionary (dicts[i], s->str, formatter); printf ("\n"); fflush (NULL); g_string_free (s, TRUE); - - if (c == EOF) - break; } + while (c != EOF); for (i = 0; i < n_dicts; i++) g_object_unref (dicts[i]); diff --git a/src/sdtui.c b/src/sdtui.c index 5e00d7c..d64f1d1 100644 --- a/src/sdtui.c +++ b/src/sdtui.c @@ -348,23 +348,9 @@ view_entry_split_add_pango (ViewEntry *ve, const gchar *markup) static void view_entry_split_add_xdxf (ViewEntry *ve, const gchar *xml) { - // Trivially filter out all tags we can't quite handle, - // then parse the reduced XML as Pango markup--this seems to work well. - // Given the nature of our display, also skip keyword elements. - GString *filtered = g_string_new (""); - while (*xml) - { - // GMarkup can read some of the wilder XML constructs, Pango skips them - const gchar *p = NULL; - if (*xml != '<' || xml[1] == '!' || xml[1] == '?' - || g_ascii_isspace (xml[1]) || !*(p = xml + 1 + (xml[1] == '/')) - || (strchr ("biu", *p) && p[1] == '>') || !(p = strchr (p, '>'))) - g_string_append_c (filtered, *xml++); - else if (xml[1] != 'k' || xml[2] != '>' || !(xml = strstr (p, ""))) - xml = ++p; - } - view_entry_split_add_pango (ve, filtered->str); - g_string_free (filtered, TRUE); + gchar *markup = xdxf_to_pango_markup_with_reduced_effort (xml); + view_entry_split_add_pango (ve, markup); + g_free (markup); } /// Decomposes a dictionary entry into the format we want. diff --git a/src/stardict.c b/src/stardict.c index d371eb1..8b55f99 100644 --- a/src/stardict.c +++ b/src/stardict.c @@ -1354,7 +1354,7 @@ stardict_iterator_get_entry (StardictIterator *sdi) { g_return_val_if_fail (STARDICT_IS_ITERATOR (sdi), NULL); if (!stardict_iterator_is_valid (sdi)) - return FALSE; + return NULL; return stardict_dict_get_entry (sdi->owner, sdi->offset); } diff --git a/src/utils.c b/src/utils.c index 8c63548..275e4e1 100644 --- a/src/utils.c +++ b/src/utils.c @@ -33,6 +33,27 @@ #include "utils.h" +/// Trivially filter out all tags that aren't part of the Pango markup language, +/// or no frontend can quite handle--this seems to work well. +/// Given the nature of our display, also skip whole keyword elements. +gchar * +xdxf_to_pango_markup_with_reduced_effort (const gchar *xml) +{ + GString *filtered = g_string_new (""); + while (*xml) + { + // GMarkup can read some of the wilder XML constructs, Pango skips them + const gchar *p = NULL; + if (*xml != '<' || xml[1] == '!' || xml[1] == '?' + || g_ascii_isspace (xml[1]) || !*(p = xml + 1 + (xml[1] == '/')) + || (strchr ("biu", *p) && p[1] == '>') || !(p = strchr (p, '>'))) + g_string_append_c (filtered, *xml++); + else if (xml[1] != 'k' || xml[2] != '>' || !(xml = strstr (p, ""))) + xml = ++p; + } + return g_string_free (filtered, FALSE); +} + /// Read the whole stream into a byte array. gboolean stream_read_all (GByteArray *ba, GInputStream *is, GError **error) diff --git a/src/utils.h b/src/utils.h index 178a1d9..99ad19a 100644 --- a/src/utils.h +++ b/src/utils.h @@ -36,6 +36,8 @@ } \ } +gchar *xdxf_to_pango_markup_with_reduced_effort (const gchar *xml); + gboolean stream_read_all (GByteArray *ba, GInputStream *is, GError **error); gchar *stream_read_string (GDataInputStream *dis, GError **error); gboolean xstrtoul (unsigned long *out, const char *s, int base);