Compare commits

..

No commits in common. "695f71d9462ce3fe0bdcbe7fae9015ce254512fd" and "bbe22712fe551e8ee79726002725cfc211086024" have entirely different histories.

9 changed files with 89 additions and 329 deletions

View File

@ -180,14 +180,15 @@ add_executable (${PROJECT_NAME}
target_link_libraries (${PROJECT_NAME} ${project_common_libraries})
# Tools
set (tools add-pronunciation query-tool transform)
foreach (tool ${tools})
add_executable (${tool} EXCLUDE_FROM_ALL
src/${tool}.c ${project_common_sources})
target_link_libraries (${tool} ${project_common_libraries})
endforeach (tool)
add_executable (query-tool EXCLUDE_FROM_ALL
src/query-tool.c ${project_common_sources})
target_link_libraries (query-tool ${project_common_libraries})
add_custom_target (tools DEPENDS ${tools})
add_executable (add-pronunciation EXCLUDE_FROM_ALL
src/add-pronunciation.c ${project_common_sources})
target_link_libraries (add-pronunciation ${project_common_libraries})
add_custom_target (tools DEPENDS add-pronunciation query-tool)
# The files to be installed
include (GNUInstallDirs)

View File

@ -100,11 +100,6 @@ Dictionaries
Unfortunately this application only really works with specific dictionaries.
Word definitions have to be in plain text, separated by newlines.
You may use the included transform tool to transform existing dictionaries that
are almost useful as they are, e.g. after stripping XML tags. You might want to
fix up the `sametypesequence` of the resulting '.ifo' file afterwards, and run
dictzip on the resulting '.dict' file.
https://mega.co.nz/#!axtD0QRK!sbtBgizksyfkPqKvKEgr8GQ11rsWhtqyRgUUV0B7pwg[
CZ <--> { EN, DE, PL, RU } dictionaries]

View File

@ -30,7 +30,6 @@
#include "stardict.h"
#include "stardict-private.h"
#include "generator.h"
#include "utils.h"
// --- Pronunciation generator -------------------------------------------------
@ -150,7 +149,7 @@ worker_writer (WorkerData *data)
stardict_iterator_next (data->iterator);
if (fprintf (data->child_stdin, "%s\n", x) < 0)
fatal ("write to eSpeak failed: %s\n", strerror (errno));
g_error ("write to eSpeak failed: %s", strerror (errno));
g_free (x);
}
@ -170,10 +169,16 @@ get_void_entry (gchar *cmdline[])
if (!g_spawn_sync (NULL, cmdline, NULL,
G_SPAWN_SEARCH_PATH | G_SPAWN_STDERR_TO_DEV_NULL, NULL, NULL,
&output, NULL, &exit_status, &error))
fatal ("Error: couldn't spawn espeak: %s\n", error->message);
{
g_printerr ("Error: couldn't spawn espeak: %s", error->message);
exit (EXIT_FAILURE);
}
if (exit_status)
fatal ("Error: espeak returned %d\n", exit_status);
{
g_printerr ("Error: espeak returned %d\n", exit_status);
exit (EXIT_FAILURE);
}
return output;
}
@ -188,7 +193,7 @@ worker (WorkerData *data)
if (!g_spawn_async_with_pipes (NULL, data->cmdline, NULL,
G_SPAWN_SEARCH_PATH, NULL, NULL,
NULL, &child_in, &child_out, NULL, &error))
fatal ("g_spawn: %s\n", error->message);
g_error ("g_spawn() failed: %s", error->message);
data->child_stdin = fdopen (child_in, "wb");
if (!data->child_stdin)
@ -223,7 +228,7 @@ worker (WorkerData *data)
while ((c = fgetc (child_stdout)) != EOF && c != '\n')
g_string_append_c (s, c);
if (c == EOF)
fatal ("eSpeak process died too soon\n");
g_error ("eSpeak process died too soon");
gchar *translation = g_string_free (s, FALSE);
*output_end = translation;
@ -241,8 +246,11 @@ worker (WorkerData *data)
}
if (fgetc (child_stdout) != EOF)
fatal ("Error: eSpeak has written more lines than it should. "
{
g_printerr ("Error: eSpeak has written more lines than it should. "
"The output would be corrupt, aborting.\n");
exit (EXIT_FAILURE);
}
fclose (child_stdout);
return g_thread_join (writer);
@ -274,6 +282,32 @@ stardict_info_copy (StardictInfo *dest, const StardictInfo *src)
}
}
/// Write a list of data fields back to a dictionary.
static gboolean
write_fields (Generator *generator, GList *fields, gboolean sts, GError **error)
{
while (fields)
{
StardictEntryField *field = fields->data;
if (!sts && !generator_write_type (generator, field->type, error))
return FALSE;
gboolean mark_end = !sts || fields->next != NULL;
if (g_ascii_islower (field->type))
{
if (!generator_write_string (generator,
field->data, mark_end, error))
return FALSE;
}
else if (!generator_write_raw (generator,
field->data, field->data_size, mark_end, error))
return FALSE;
fields = fields->next;
}
return TRUE;
}
int
main (int argc, char *argv[])
{
@ -305,10 +339,18 @@ G_GNUC_END_IGNORE_DEPRECATIONS
("input.ifo output-basename - add pronunciation to dictionaries");
g_option_context_add_main_entries (ctx, entries, NULL);
if (!g_option_context_parse (ctx, &argc, &argv, &error))
fatal ("Error: option parsing failed: %s\n", error->message);
{
g_printerr ("Error: option parsing failed: %s\n", error->message);
exit (EXIT_FAILURE);
}
if (argc != 3)
fatal ("%s", g_option_context_get_help (ctx, TRUE, FALSE));
{
gchar *help = g_option_context_get_help (ctx, TRUE, FALSE);
g_printerr ("%s", help);
g_free (help);
exit (EXIT_FAILURE);
}
g_option_context_free (ctx);
@ -327,13 +369,20 @@ G_GNUC_END_IGNORE_DEPRECATIONS
printf ("Loading the original dictionary...\n");
StardictDict *dict = stardict_dict_new (argv[1], &error);
if (!dict)
fatal ("Error: opening the dictionary failed: %s\n", error->message);
{
g_printerr ("Error: opening the dictionary failed: %s\n",
error->message);
exit (EXIT_FAILURE);
}
gsize n_words = stardict_info_get_word_count
(stardict_dict_get_info (dict));
if (n_processes <= 0)
fatal ("Error: there must be at least one process\n");
{
g_printerr ("Error: there must be at least one process\n");
exit (EXIT_FAILURE);
}
if ((gsize) n_processes > n_words * 1024)
{
@ -412,8 +461,11 @@ G_GNUC_END_IGNORE_DEPRECATIONS
// Put extended entries into a new dictionary
Generator *generator = generator_new (argv[2], &error);
if (!generator)
fatal ("Error: failed to create the output dictionary: %s\n",
{
g_printerr ("Error: failed to create the output dictionary: %s\n",
error->message);
exit (EXIT_FAILURE);
}
StardictInfo *info = generator->info;
stardict_info_copy (info, stardict_dict_get_info (dict));
@ -464,10 +516,14 @@ G_GNUC_END_IGNORE_DEPRECATIONS
start_link.next = entry->fields;
start_link.data = &field;
if (!generator_write_fields (generator, &start_link, &error)
if (!write_fields (generator, &start_link,
info->same_type_sequence != NULL, &error)
|| !generator_finish_entry (generator,
stardict_iterator_get_word (iterator), &error))
fatal ("Error: write failed: %s\n", error->message);
{
g_printerr ("Error: write failed: %s\n", error->message);
exit (EXIT_FAILURE);
}
g_object_unref (entry);
@ -484,7 +540,11 @@ G_GNUC_END_IGNORE_DEPRECATIONS
putchar ('\n');
if (!generator_finish (generator, &error))
fatal ("Error: failed to write the dictionary: %s\n", error->message);
{
g_printerr ("Error: failed to write the dictionary: %s\n",
error->message);
exit (EXIT_FAILURE);
}
generator_free (generator);
g_object_unref (dict);

View File

@ -1,7 +1,7 @@
/*
* generator.c: dictionary generator
*
* Copyright (c) 2013 - 2020, Přemysl Eric Janouch <p@janouch.name>
* Copyright (c) 2013, Přemysl Eric Janouch <p@janouch.name>
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted.
@ -170,34 +170,6 @@ generator_write_string (Generator *self,
return TRUE;
}
/// Write a list of data fields back to a dictionary. The list has to be
/// acceptable for the generated dictionary's sametypesequence (or lack of).
gboolean
generator_write_fields (Generator *self, const GList *fields, GError **error)
{
gboolean sts = self->info->same_type_sequence != NULL;
while (fields)
{
StardictEntryField *field = fields->data;
if (!sts && !generator_write_type (self, field->type, error))
return FALSE;
gboolean mark_end = !sts || fields->next != NULL;
if (g_ascii_islower (field->type))
{
if (!generator_write_string (self,
field->data, mark_end, error))
return FALSE;
}
else if (!generator_write_raw (self,
field->data, field->data_size, mark_end, error))
return FALSE;
fields = fields->next;
}
return TRUE;
}
/// Finishes the current entry and writes it into the index.
gboolean
generator_finish_entry (Generator *self, const gchar *word, GError **error)

View File

@ -4,7 +4,7 @@
* Nothing fancy. Just something moved out off the `stardict' test to be
* conveniently reused by the included tools.
*
* Copyright (c) 2013 - 2020, Přemysl Eric Janouch <p@janouch.name>
* Copyright (c) 2013, Přemysl Eric Janouch <p@janouch.name>
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted.
@ -42,15 +42,12 @@ Generator *generator_new (const gchar *base, GError **error);
gboolean generator_finish (Generator *self, GError **error);
void generator_free (Generator *self);
void generator_begin_entry (Generator *self);
gboolean generator_write_type (Generator *self, gchar type, GError **error);
gboolean generator_write_raw (Generator *self,
gpointer data, gsize data_size, gboolean mark_end, GError **error);
gboolean generator_write_string (Generator *self,
const gchar *s, gboolean mark_end, GError **error);
void generator_begin_entry (Generator *self);
gboolean generator_write_fields (Generator *self,
const GList *fields, GError **error);
gboolean generator_finish_entry (Generator *self,
const gchar *word, GError **error);

View File

@ -192,7 +192,7 @@ struct stardict_entry_field
{
gchar type; ///< Type of entry (EntryFieldType)
gpointer data; ///< Raw data or null-terminated string
gsize data_size; ///< Size of data, including any \0
gsize data_size; ///< Size of data, includding any \0
};
struct stardict_entry

View File

@ -1,252 +0,0 @@
/*
* A tool to transform dictionaries dictionaries by an external filter
*
* The external filter needs to process NUL-separated textual entries.
*
* Example: transform input.info output -- perl -p0e s/bullshit/soykaf/g
*
* Copyright (c) 2020, Přemysl Eric Janouch <p@janouch.name>
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <locale.h>
#include <glib.h>
#include <glib/gstdio.h>
#include <glib-unix.h>
#include <gio/gio.h>
#include "stardict.h"
#include "stardict-private.h"
#include "generator.h"
#include "utils.h"
enum { PIPE_READ, PIPE_WRITE };
// --- Main --------------------------------------------------------------------
static inline void
print_progress (gulong *last_percent, StardictIterator *iterator, gsize total)
{
gulong percent =
(gulong) stardict_iterator_get_offset (iterator) * 100 / total;
if (percent != *last_percent)
{
printf ("\r Writing entries... %3lu%%", percent);
*last_percent = percent;
}
}
static gboolean
write_to_filter (StardictDict *dict, gint fd, GError **error)
{
StardictInfo *info = stardict_dict_get_info (dict);
gsize n_words = stardict_info_get_word_count (info);
StardictIterator *iterator = stardict_iterator_new (dict, 0);
gulong last_percent = -1;
while (stardict_iterator_is_valid (iterator))
{
print_progress (&last_percent, iterator, n_words);
StardictEntry *entry = stardict_iterator_get_entry (iterator);
for (const GList *fields = stardict_entry_get_fields (entry);
fields; fields = fields->next)
{
StardictEntryField *field = fields->data;
if (!g_ascii_islower (field->type))
continue;
if (write (fd, field->data, field->data_size)
!= (ssize_t) field->data_size)
{
g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
"%s", strerror (errno));
return FALSE;
}
}
g_object_unref (entry);
stardict_iterator_next (iterator);
}
printf ("\n");
return TRUE;
}
static gboolean
update_from_filter (StardictDict *dict, Generator *generator,
GMappedFile *filtered_file, GError **error)
{
gchar *filtered = g_mapped_file_get_contents (filtered_file);
gchar *filtered_end = filtered + g_mapped_file_get_length (filtered_file);
StardictInfo *info = stardict_dict_get_info (dict);
gsize n_words = stardict_info_get_word_count (info);
StardictIterator *iterator = stardict_iterator_new (dict, 0);
gulong last_percent = -1;
while (stardict_iterator_is_valid (iterator))
{
print_progress (&last_percent, iterator, n_words);
StardictEntry *entry = stardict_iterator_get_entry (iterator);
generator_begin_entry (generator);
for (GList *fields = entry->fields; fields; fields = fields->next)
{
StardictEntryField *field = fields->data;
if (!g_ascii_islower (field->type))
continue;
gchar *end = memchr (filtered, 0, filtered_end - filtered);
if (!end)
{
g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
"filter seems to have ended too early");
return FALSE;
}
g_free (field->data);
field->data = g_strdup (filtered);
field->data_size = end - filtered + 1;
filtered = end + 1;
}
if (!generator_write_fields (generator, entry->fields, error)
|| !generator_finish_entry (generator,
stardict_iterator_get_word (iterator), error))
return FALSE;
g_object_unref (entry);
stardict_iterator_next (iterator);
}
printf ("\n");
return TRUE;
}
// FIXME: copied from add-pronunciation.c, should merge it somewhere (utils?)
/// Copy the contents of one StardictInfo object into another. Ignores path.
static void
stardict_info_copy (StardictInfo *dest, const StardictInfo *src)
{
dest->version = src->version;
guint i;
for (i = 0; i < _stardict_ifo_keys_length; i++)
{
const struct stardict_ifo_key *key = &_stardict_ifo_keys[i];
if (key->type == IFO_STRING)
{
gchar **p = &G_STRUCT_MEMBER (gchar *, dest, key->offset);
gchar *q = G_STRUCT_MEMBER (gchar *, src, key->offset);
g_free (*p);
*p = q ? g_strdup (q) : NULL;
}
else
G_STRUCT_MEMBER (gulong, dest, key->offset) =
G_STRUCT_MEMBER (gulong, src, key->offset);
}
}
int
main (int argc, char *argv[])
{
// The GLib help includes an ellipsis character, for some reason
(void) setlocale (LC_ALL, "");
GError *error = NULL;
GOptionContext *ctx = g_option_context_new
("input.ifo output-basename -- FILTER [ARG...]");
g_option_context_set_summary
(ctx, "Transform dictionaries using a filter program.");
g_option_context_set_description (ctx, "Test?");
if (!g_option_context_parse (ctx, &argc, &argv, &error))
fatal ("Error: option parsing failed: %s\n", error->message);
if (argc < 3)
fatal ("%s", g_option_context_get_help (ctx, TRUE, FALSE));
// GLib is bullshit, getopt_long() always correctly removes this
gint program_argv_start = 3;
if (!strcmp (argv[program_argv_start], "--"))
program_argv_start++;
g_option_context_free (ctx);
printf ("Loading the original dictionary...\n");
StardictDict *dict = stardict_dict_new (argv[1], &error);
if (!dict)
fatal ("Error: opening the dictionary failed: %s\n", error->message);
printf ("Filtering entries...\n");
gint child_in[2];
if (!g_unix_open_pipe (child_in, 0, &error))
fatal ("g_unix_open_pipe: %s\n", error->message);
FILE *child_out = tmpfile ();
if (!child_out)
fatal ("tmpfile: %s\n", strerror (errno));
GPid pid = -1;
if (!g_spawn_async_with_fds (NULL /* working_directory */,
argv + program_argv_start /* forward a part of ours */, NULL /* envp */,
G_SPAWN_SEARCH_PATH | G_SPAWN_DO_NOT_REAP_CHILD,
NULL /* child_setup */, NULL /* user_data */,
&pid, child_in[PIPE_READ], fileno (child_out), STDERR_FILENO, &error))
fatal ("g_spawn: %s\n", error->message);
if (!write_to_filter (dict, child_in[PIPE_WRITE], &error))
fatal ("write_to_filter: %s\n", error->message);
if (!g_close (child_in[PIPE_READ], &error)
|| !g_close (child_in[PIPE_WRITE], &error))
fatal ("g_close: %s\n", error->message);
printf ("Waiting for the filter to finish...\n");
int wstatus = errno = 0;
if (waitpid (pid, &wstatus, 0) < 1
|| !WIFEXITED (wstatus) || WEXITSTATUS (wstatus) > 0)
fatal ("Filter failed (%s, status %d)\n", strerror (errno), wstatus);
GMappedFile *filtered = g_mapped_file_new_from_fd (fileno (child_out),
FALSE /* writable */, &error);
if (!filtered)
fatal ("g_mapped_file_new_from_fd: %s\n", error->message);
printf ("Writing the new dictionary...\n");
Generator *generator = generator_new (argv[2], &error);
if (!generator)
fatal ("Error: failed to create the output dictionary: %s\n",
error->message);
StardictInfo *info = generator->info;
stardict_info_copy (info, stardict_dict_get_info (dict));
// This gets incremented each time an entry is finished
info->word_count = 0;
if (!update_from_filter (dict, generator, filtered, &error)
|| !generator_finish (generator, &error))
fatal ("Error: failed to write the dictionary: %s\n", error->message);
g_mapped_file_unref (filtered);
fclose (child_out);
generator_free (generator);
g_object_unref (dict);
return 0;
}

View File

@ -1,7 +1,7 @@
/*
* utils.c: miscellaneous utilities
*
* Copyright (c) 2013 - 2020, Přemysl Eric Janouch <p@janouch.name>
* Copyright (c) 2013 - 2015, Přemysl Eric Janouch <p@janouch.name>
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted.
@ -20,7 +20,6 @@
#include <gio/gio.h>
#include <stdlib.h>
#include <errno.h>
#include <stdarg.h>
#include <curses.h>
#include <termios.h>
@ -100,14 +99,3 @@ update_curses_terminal_size (void)
refresh ();
#endif // HAVE_RESIZETERM && TIOCGWINSZ
}
/// Print a fatal error message and terminate the process immediately.
void
fatal (const gchar *format, ...)
{
va_list ap;
va_start (ap, format);
vfprintf (stderr, format, ap);
exit (EXIT_FAILURE);
va_end (ap);
}

View File

@ -1,7 +1,7 @@
/*
* utils.h: miscellaneous utilities
*
* Copyright (c) 2013 - 2020, Přemysl Eric Janouch <p@janouch.name>
* Copyright (c) 2013 - 2015, Přemysl Eric Janouch <p@janouch.name>
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted.
@ -40,6 +40,5 @@ gboolean stream_read_all (GByteArray *ba, GInputStream *is, GError **error);
gchar *stream_read_string (GDataInputStream *dis, GError **error);
gboolean xstrtoul (unsigned long *out, const char *s, int base);
void update_curses_terminal_size (void);
void fatal (const gchar *format, ...) G_GNUC_PRINTF (1, 2) G_GNUC_NORETURN;
#endif // ! UTILS_H