query-tool: support more field types

Add options to format the output for the terminal, or IRC messages.

Changed the output format to separate dictionary name with a tab,
so it's now rather similar to tabfiles.
This commit is contained in:
Přemysl Eric Janouch 2021-10-12 01:26:06 +02:00
parent 13a16d1eb5
commit 627c296057
Signed by: p
GPG Key ID: A0420B94F92B9493
5 changed files with 210 additions and 62 deletions

View File

@ -1,14 +1,14 @@
/*
* A tool to query multiple dictionaries for the specified word
*
* Intended for use in IRC bots and similar silly things---words go in, one
* on a line, and entries come out, one dictionary at a time, finalised with
* an empty line. Newlines are escaped with `\n', backslashes with `\\'.
* Intended for use in IRC bots and similar silly things---words go in,
* one per each line, and entries come out, one dictionary at a time,
* finalised with an empty line. Newlines are escaped with `\n',
* backslashes with `\\'.
*
* So far only the `m' field is supported. Feel free to extend the program
* according to your needs, it's not very complicated.
* So far only the `m', `g`, and `x` fields are supported, as in sdtui.
*
* Copyright (c) 2013, Přemysl Eric Janouch <p@janouch.name>
* Copyright (c) 2013 - 2021, Přemysl Eric Janouch <p@janouch.name>
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted.
@ -30,10 +30,124 @@
#include <glib.h>
#include <gio/gio.h>
#include <pango/pango.h>
#include "stardict.h"
#include "stardict-private.h"
#include "generator.h"
#include "utils.h"
// --- Output formatting -------------------------------------------------------
/// Transform Pango attributes to in-line formatting sequences (non-reentrant)
typedef const gchar *(*FormatterFunc) (PangoAttrIterator *);
static const gchar *
pango_attrs_ignore (G_GNUC_UNUSED PangoAttrIterator *iterator)
{
return "";
}
static const gchar *
pango_attrs_to_irc (PangoAttrIterator *iterator)
{
static gchar buf[5];
gchar *p = buf;
*p++ = 0x0f;
if (!iterator)
goto reset_formatting;
PangoAttrInt *attr = NULL;
if ((attr = (PangoAttrInt *) pango_attr_iterator_get (iterator,
PANGO_ATTR_WEIGHT)) && attr->value >= PANGO_WEIGHT_BOLD)
*p++ = 0x02;
if ((attr = (PangoAttrInt *) pango_attr_iterator_get (iterator,
PANGO_ATTR_UNDERLINE)) && attr->value == PANGO_UNDERLINE_SINGLE)
*p++ = 0x1f;
if ((attr = (PangoAttrInt *) pango_attr_iterator_get (iterator,
PANGO_ATTR_STYLE)) && attr->value == PANGO_STYLE_ITALIC)
*p++ = 0x1d;
reset_formatting:
*p++ = 0;
return buf;
}
static const gchar *
pango_attrs_to_ansi (PangoAttrIterator *iterator)
{
static gchar buf[16];
g_strlcpy (buf, "\x1b[0", sizeof buf);
if (!iterator)
goto reset_formatting;
PangoAttrInt *attr = NULL;
if ((attr = (PangoAttrInt *) pango_attr_iterator_get (iterator,
PANGO_ATTR_WEIGHT)) && attr->value >= PANGO_WEIGHT_BOLD)
g_strlcat (buf, ";1", sizeof buf);
if ((attr = (PangoAttrInt *) pango_attr_iterator_get (iterator,
PANGO_ATTR_UNDERLINE)) && attr->value == PANGO_UNDERLINE_SINGLE)
g_strlcat (buf, ";4", sizeof buf);
if ((attr = (PangoAttrInt *) pango_attr_iterator_get (iterator,
PANGO_ATTR_STYLE)) && attr->value == PANGO_STYLE_ITALIC)
g_strlcat (buf, ";3", sizeof buf);
reset_formatting:
g_strlcat (buf, "m", sizeof buf);
return buf;
}
static gchar *
pango_to_output_text (const gchar *markup, FormatterFunc formatter)
{
// This function skips leading whitespace, but it's the canonical one
gchar *text = NULL;
PangoAttrList *attrs = NULL;
if (!pango_parse_markup (markup, -1, 0, &attrs, &text, NULL, NULL))
return g_strdup_printf ("<%s>", ("error in entry"));
PangoAttrIterator *iterator = pango_attr_list_get_iterator (attrs);
GString *result = g_string_new ("");
do
{
gint start = 0, end = 0;
pango_attr_iterator_range (iterator, &start, &end);
if (end == G_MAXINT)
end = strlen (text);
g_string_append (result, formatter (iterator));
g_string_append_len (result, text + start, end - start);
}
while (pango_attr_iterator_next (iterator));
g_string_append (result, formatter (NULL));
g_free (text);
pango_attr_iterator_destroy (iterator);
pango_attr_list_unref (attrs);
return g_string_free (result, FALSE);
}
static gchar *
field_to_output_text (const StardictEntryField *field, FormatterFunc formatter)
{
const gchar *definition = field->data;
if (field->type == STARDICT_FIELD_MEANING)
return g_strdup (definition);
if (field->type == STARDICT_FIELD_PANGO)
return pango_to_output_text (definition, formatter);
if (field->type == STARDICT_FIELD_XDXF)
{
gchar *markup = xdxf_to_pango_markup_with_reduced_effort (definition);
gchar *result = pango_to_output_text (markup, formatter);
g_free (markup);
return result;
}
return NULL;
}
// --- Main --------------------------------------------------------------------
static guint
count_equal_chars (const gchar *a, const gchar *b)
@ -46,15 +160,16 @@ count_equal_chars (const gchar *a, const gchar *b)
}
static void
do_dictionary (StardictDict *dict, const gchar *word)
do_dictionary (StardictDict *dict, const gchar *word, FormatterFunc formatter)
{
gboolean found;
StardictIterator *iter = stardict_dict_search (dict, word, &found);
if (!found)
goto out;
// Default Stardict ordering is ASCII case-insensitive.
// Try to find a better matching entry based on letter case:
// Default Stardict ordering is ASCII case-insensitive,
// which may be further exacerbated by our own collation feature.
// Try to find a better matching entry:
gint64 best_offset = stardict_iterator_get_offset (iter);
guint best_score = count_equal_chars
@ -86,27 +201,67 @@ do_dictionary (StardictDict *dict, const gchar *word)
for (; list; list = list->next)
{
StardictEntryField *field = list->data;
if (field->type == STARDICT_FIELD_MEANING)
gchar *definitions = field_to_output_text (field, formatter);
if (!definitions)
continue;
printf ("%s\t", info->book_name);
for (const gchar *p = definitions; *p; p++)
{
const gchar *desc = field->data;
printf ("%s:", info->book_name);
for (; *desc; desc++)
{
if (*desc == '\\')
if (*p == '\\')
printf ("\\\\");
else if (*desc == '\n')
else if (*p == '\n')
printf ("\\n");
else
putchar (*desc);
putchar (*p);
}
putchar ('\n');
}
g_free (definitions);
}
g_object_unref (entry);
out:
g_object_unref (iter);
}
static FormatterFunc
parse_options (int *argc, char ***argv)
{
GError *error = NULL;
GOptionContext *ctx = g_option_context_new
("DICTIONARY.ifo... - query multiple dictionaries");
gboolean format_with_ansi = FALSE;
gboolean format_with_irc = FALSE;
GOptionEntry entries[] =
{
{ "ansi", 'a', 0, G_OPTION_ARG_NONE, &format_with_ansi,
"Format with ANSI sequences", NULL },
{ "irc", 'i', 0, G_OPTION_ARG_NONE, &format_with_irc,
"Format with IRC codes", NULL },
{ }
};
g_option_context_add_main_entries (ctx, entries, NULL);
if (!g_option_context_parse (ctx, argc, argv, &error))
{
g_printerr ("Error: option parsing failed: %s\n", error->message);
exit (EXIT_FAILURE);
}
if (*argc < 2)
{
g_printerr ("%s\n", g_option_context_get_help (ctx, TRUE, NULL));
exit (EXIT_FAILURE);
}
g_option_context_free (ctx);
if (format_with_ansi)
return pango_attrs_to_ansi;
if (format_with_irc)
return pango_attrs_to_irc;
return pango_attrs_ignore;
}
int
main (int argc, char *argv[])
{
@ -115,21 +270,7 @@ G_GNUC_BEGIN_IGNORE_DEPRECATIONS
g_type_init ();
G_GNUC_END_IGNORE_DEPRECATIONS
GError *error = NULL;
GOptionContext *ctx = g_option_context_new
("DICTIONARY.ifo... - query multiple dictionaries");
if (!g_option_context_parse (ctx, &argc, &argv, &error))
{
g_printerr ("Error: option parsing failed: %s\n", error->message);
exit (EXIT_FAILURE);
}
g_option_context_free (ctx);
if (argc < 2)
{
g_printerr ("Error: no dictionaries given\n");
exit (EXIT_FAILURE);
}
FormatterFunc formatter = parse_options (&argc, &argv);
guint n_dicts = argc - 1;
StardictDict **dicts = g_alloca (sizeof *dicts * n_dicts);
@ -137,6 +278,7 @@ G_GNUC_END_IGNORE_DEPRECATIONS
guint i;
for (i = 1; i <= n_dicts; i++)
{
GError *error = NULL;
dicts[i - 1] = stardict_dict_new (argv[i], &error);
if (error)
{
@ -146,26 +288,23 @@ G_GNUC_END_IGNORE_DEPRECATIONS
}
}
while (TRUE)
gint c;
do
{
GString *s = g_string_new (NULL);
gint c;
while ((c = getchar ()) != EOF && c != '\n')
if (c != '\r')
g_string_append_c (s, c);
if (s->len)
for (i = 0; i < n_dicts; i++)
do_dictionary (dicts[i], s->str);
do_dictionary (dicts[i], s->str, formatter);
printf ("\n");
fflush (NULL);
g_string_free (s, TRUE);
if (c == EOF)
break;
}
while (c != EOF);
for (i = 0; i < n_dicts; i++)
g_object_unref (dicts[i]);

View File

@ -348,23 +348,9 @@ view_entry_split_add_pango (ViewEntry *ve, const gchar *markup)
static void
view_entry_split_add_xdxf (ViewEntry *ve, const gchar *xml)
{
// Trivially filter out all tags we can't quite handle,
// then parse the reduced XML as Pango markup--this seems to work well.
// Given the nature of our display, also skip keyword elements.
GString *filtered = g_string_new ("");
while (*xml)
{
// GMarkup can read some of the wilder XML constructs, Pango skips them
const gchar *p = NULL;
if (*xml != '<' || xml[1] == '!' || xml[1] == '?'
|| g_ascii_isspace (xml[1]) || !*(p = xml + 1 + (xml[1] == '/'))
|| (strchr ("biu", *p) && p[1] == '>') || !(p = strchr (p, '>')))
g_string_append_c (filtered, *xml++);
else if (xml[1] != 'k' || xml[2] != '>' || !(xml = strstr (p, "</k>")))
xml = ++p;
}
view_entry_split_add_pango (ve, filtered->str);
g_string_free (filtered, TRUE);
gchar *markup = xdxf_to_pango_markup_with_reduced_effort (xml);
view_entry_split_add_pango (ve, markup);
g_free (markup);
}
/// Decomposes a dictionary entry into the format we want.

View File

@ -1354,7 +1354,7 @@ stardict_iterator_get_entry (StardictIterator *sdi)
{
g_return_val_if_fail (STARDICT_IS_ITERATOR (sdi), NULL);
if (!stardict_iterator_is_valid (sdi))
return FALSE;
return NULL;
return stardict_dict_get_entry (sdi->owner, sdi->offset);
}

View File

@ -33,6 +33,27 @@
#include "utils.h"
/// Trivially filter out all tags that aren't part of the Pango markup language,
/// or no frontend can quite handle--this seems to work well.
/// Given the nature of our display, also skip whole keyword elements.
gchar *
xdxf_to_pango_markup_with_reduced_effort (const gchar *xml)
{
GString *filtered = g_string_new ("");
while (*xml)
{
// GMarkup can read some of the wilder XML constructs, Pango skips them
const gchar *p = NULL;
if (*xml != '<' || xml[1] == '!' || xml[1] == '?'
|| g_ascii_isspace (xml[1]) || !*(p = xml + 1 + (xml[1] == '/'))
|| (strchr ("biu", *p) && p[1] == '>') || !(p = strchr (p, '>')))
g_string_append_c (filtered, *xml++);
else if (xml[1] != 'k' || xml[2] != '>' || !(xml = strstr (p, "</k>")))
xml = ++p;
}
return g_string_free (filtered, FALSE);
}
/// Read the whole stream into a byte array.
gboolean
stream_read_all (GByteArray *ba, GInputStream *is, GError **error)

View File

@ -36,6 +36,8 @@
} \
}
gchar *xdxf_to_pango_markup_with_reduced_effort (const gchar *xml);
gboolean stream_read_all (GByteArray *ba, GInputStream *is, GError **error);
gchar *stream_read_string (GDataInputStream *dis, GError **error);
gboolean xstrtoul (unsigned long *out, const char *s, int base);