2013-05-08 20:42:35 +02:00
|
|
|
/*
|
|
|
|
* A tool to add eSpeak-generated pronunciation to dictionaries
|
|
|
|
*
|
|
|
|
* Here I use the `espeak' process rather than libespeak because of the GPL.
|
2013-05-10 02:06:57 +02:00
|
|
|
* It's far from ideal, rather good as a starting point.
|
2013-05-08 20:42:35 +02:00
|
|
|
*
|
|
|
|
* Copyright (c) 2013, Přemysl Janouch <p.janouch@gmail.com>
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* Permission to use, copy, modify, and/or distribute this software for any
|
|
|
|
* purpose with or without fee is hereby granted, provided that the above
|
|
|
|
* copyright notice and this permission notice appear in all copies.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
|
|
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
|
|
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
|
|
|
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
|
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
|
|
|
|
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
|
|
|
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <errno.h>
|
|
|
|
|
|
|
|
#include <glib.h>
|
|
|
|
#include <gio/gio.h>
|
|
|
|
|
|
|
|
#include "stardict.h"
|
2013-05-09 00:49:45 +02:00
|
|
|
#include "stardict-private.h"
|
|
|
|
#include "generator.h"
|
2013-05-08 20:42:35 +02:00
|
|
|
|
|
|
|
|
|
|
|
// --- Pronunciation generator -------------------------------------------------
|
|
|
|
|
|
|
|
typedef struct worker_data WorkerData;
|
|
|
|
|
|
|
|
struct worker_data
|
|
|
|
{
|
2015-02-26 23:26:52 +01:00
|
|
|
gchar **cmdline; ///< eSpeak command line
|
|
|
|
guint ignore_acronyms : 1; ///< Don't spell out acronyms
|
|
|
|
GRegex *re_stop; ///< Regex for stop sequences
|
|
|
|
GRegex *re_acronym; ///< Regex for ACRONYMS
|
|
|
|
|
|
|
|
guint32 start_entry; ///< The first entry to be processed
|
|
|
|
guint32 end_entry; ///< Past the last entry to be processed
|
|
|
|
|
|
|
|
// Reader, writer
|
|
|
|
GMutex *dict_mutex; ///< Locks the dictionary object
|
|
|
|
|
|
|
|
// Reader
|
|
|
|
GThread *main_thread; ///< A handle to the reader thread
|
|
|
|
StardictDict *dict; ///< The dictionary object
|
|
|
|
gpointer output; ///< Linked-list of pronunciation data
|
|
|
|
|
|
|
|
GMutex *remaining_mutex; ///< Locks the progress stats
|
|
|
|
GCond *remaining_cond; ///< Signals a change in progress
|
|
|
|
guint32 remaining; ///< How many entries remain
|
|
|
|
guint32 total; ///< Total number of entries
|
|
|
|
|
|
|
|
// Writer
|
|
|
|
StardictIterator *iterator; ///< Iterates over the dictionary
|
|
|
|
FILE *child_stdin; ///< Standard input of eSpeak
|
2013-05-08 20:42:35 +02:00
|
|
|
};
|
|
|
|
|
2015-02-26 23:26:52 +01:00
|
|
|
/// eSpeak splits the output on certain characters.
|
2013-05-10 02:06:57 +02:00
|
|
|
#define LINE_SPLITTING_CHARS ".,:;?!"
|
|
|
|
|
2015-02-26 23:26:52 +01:00
|
|
|
/// We don't want to include brackets either.
|
2013-05-10 02:06:57 +02:00
|
|
|
#define OTHER_STOP_CHARS "([{<"
|
|
|
|
|
2015-02-26 23:26:52 +01:00
|
|
|
/// A void word used to make a unique "no pronunciation available" mark.
|
2013-05-10 02:06:57 +02:00
|
|
|
#define VOID_ENTRY "not present in any dictionary"
|
|
|
|
|
|
|
|
|
2015-02-26 23:26:52 +01:00
|
|
|
/// Adds dots between characters.
|
2013-05-10 02:06:57 +02:00
|
|
|
static gboolean
|
|
|
|
writer_acronym_cb (const GMatchInfo *info, GString *res,
|
|
|
|
G_GNUC_UNUSED gpointer data)
|
|
|
|
{
|
|
|
|
gchar *preceding = g_match_info_fetch (info, 1);
|
|
|
|
g_string_append (res, preceding);
|
|
|
|
g_free (preceding);
|
|
|
|
|
|
|
|
gchar *word = g_match_info_fetch (info, 2);
|
|
|
|
|
|
|
|
g_string_append_c (res, *word);
|
|
|
|
const gchar *p;
|
|
|
|
for (p = word + 1; *p; p++)
|
|
|
|
{
|
|
|
|
g_string_append_c (res, '.');
|
|
|
|
g_string_append_c (res, *p);
|
|
|
|
}
|
|
|
|
|
|
|
|
g_free (word);
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
2015-02-26 23:26:52 +01:00
|
|
|
/// Writes to espeak's stdin.
|
2013-05-08 20:42:35 +02:00
|
|
|
static gpointer
|
|
|
|
worker_writer (WorkerData *data)
|
|
|
|
{
|
2013-05-19 05:04:47 +02:00
|
|
|
GError *error = NULL;
|
2013-05-10 02:06:57 +02:00
|
|
|
GMatchInfo *match_info;
|
2013-05-08 20:42:35 +02:00
|
|
|
while (stardict_iterator_get_offset (data->iterator) != data->end_entry)
|
|
|
|
{
|
|
|
|
g_mutex_lock (data->dict_mutex);
|
|
|
|
const gchar *word = stardict_iterator_get_word (data->iterator);
|
|
|
|
g_mutex_unlock (data->dict_mutex);
|
|
|
|
|
2013-05-10 02:06:57 +02:00
|
|
|
word += strspn (word, LINE_SPLITTING_CHARS " \t");
|
|
|
|
gchar *x = g_strdup (word);
|
|
|
|
|
2015-02-26 23:26:52 +01:00
|
|
|
// Cut the word if needed be
|
2013-05-10 02:06:57 +02:00
|
|
|
error = NULL;
|
|
|
|
if (g_regex_match_full (data->re_stop,
|
|
|
|
x, -1, 0, 0, &match_info, &error))
|
|
|
|
{
|
|
|
|
gint start_pos;
|
|
|
|
g_match_info_fetch_pos (match_info, 0, &start_pos, NULL);
|
|
|
|
x[start_pos] = 0;
|
|
|
|
}
|
|
|
|
g_match_info_free (match_info);
|
|
|
|
|
2015-02-26 23:26:52 +01:00
|
|
|
// Change acronyms so that they're not pronounced as words
|
2013-05-10 02:06:57 +02:00
|
|
|
if (!error && !data->ignore_acronyms)
|
|
|
|
{
|
|
|
|
char *tmp = g_regex_replace_eval (data->re_acronym,
|
|
|
|
x, -1, 0, 0, writer_acronym_cb, NULL, &error);
|
|
|
|
g_free (x);
|
|
|
|
x = tmp;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (error)
|
|
|
|
{
|
|
|
|
g_printerr ("Notice: error processing '%s': %s\n",
|
|
|
|
word, error->message);
|
|
|
|
g_clear_error (&error);
|
|
|
|
*x = 0;
|
|
|
|
}
|
|
|
|
|
2015-02-26 23:26:52 +01:00
|
|
|
// We might have accidentally cut off everything
|
2013-05-10 02:06:57 +02:00
|
|
|
if (!*x)
|
|
|
|
{
|
|
|
|
g_free (x);
|
|
|
|
x = g_strdup (VOID_ENTRY);
|
|
|
|
}
|
|
|
|
|
2013-05-08 20:42:35 +02:00
|
|
|
stardict_iterator_next (data->iterator);
|
2013-05-10 02:06:57 +02:00
|
|
|
if (fprintf (data->child_stdin, "%s\n", x) < 0)
|
2013-05-08 20:42:35 +02:00
|
|
|
g_error ("write to eSpeak failed: %s", strerror (errno));
|
2013-05-10 02:06:57 +02:00
|
|
|
|
|
|
|
g_free (x);
|
2013-05-08 20:42:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
g_object_unref (data->iterator);
|
|
|
|
return GINT_TO_POINTER (fclose (data->child_stdin));
|
|
|
|
}
|
|
|
|
|
2015-02-26 23:26:52 +01:00
|
|
|
/// Get the void entry (and test if espeak works).
|
2013-05-10 02:06:57 +02:00
|
|
|
static gchar *
|
|
|
|
get_void_entry (gchar *cmdline[])
|
|
|
|
{
|
|
|
|
gchar *output;
|
|
|
|
gint exit_status;
|
|
|
|
|
2013-05-19 05:04:47 +02:00
|
|
|
GError *error = NULL;
|
2013-05-10 02:06:57 +02:00
|
|
|
if (!g_spawn_sync (NULL, cmdline, NULL,
|
|
|
|
G_SPAWN_SEARCH_PATH | G_SPAWN_STDERR_TO_DEV_NULL, NULL, NULL,
|
|
|
|
&output, NULL, &exit_status, &error))
|
|
|
|
{
|
|
|
|
g_printerr ("Error: couldn't spawn espeak: %s", error->message);
|
|
|
|
exit (EXIT_FAILURE);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (exit_status)
|
|
|
|
{
|
|
|
|
g_printerr ("Error: espeak returned %d\n", exit_status);
|
|
|
|
exit (EXIT_FAILURE);
|
|
|
|
}
|
|
|
|
|
|
|
|
return output;
|
|
|
|
}
|
|
|
|
|
2015-02-26 23:26:52 +01:00
|
|
|
/// Reads from espeak's stdout.
|
2013-05-08 20:42:35 +02:00
|
|
|
static gpointer
|
|
|
|
worker (WorkerData *data)
|
|
|
|
{
|
2015-02-26 23:26:52 +01:00
|
|
|
// Spawn eSpeak
|
2013-05-19 05:04:47 +02:00
|
|
|
GError *error = NULL;
|
2013-05-10 02:06:57 +02:00
|
|
|
gint child_in, child_out;
|
|
|
|
if (!g_spawn_async_with_pipes (NULL, data->cmdline, NULL,
|
2013-05-08 20:42:35 +02:00
|
|
|
G_SPAWN_SEARCH_PATH, NULL, NULL,
|
|
|
|
NULL, &child_in, &child_out, NULL, &error))
|
|
|
|
g_error ("g_spawn() failed: %s", error->message);
|
|
|
|
|
|
|
|
data->child_stdin = fdopen (child_in, "wb");
|
|
|
|
if (!data->child_stdin)
|
|
|
|
perror ("fdopen");
|
|
|
|
|
|
|
|
FILE *child_stdout = fdopen (child_out, "rb");
|
|
|
|
if (!child_stdout)
|
|
|
|
perror ("fdopen");
|
|
|
|
|
2015-02-26 23:26:52 +01:00
|
|
|
// Spawn a writer thread
|
2013-05-08 20:42:35 +02:00
|
|
|
g_mutex_lock (data->dict_mutex);
|
|
|
|
data->iterator = stardict_iterator_new (data->dict, data->start_entry);
|
|
|
|
g_mutex_unlock (data->dict_mutex);
|
|
|
|
|
|
|
|
GThread *writer = g_thread_new ("write worker",
|
|
|
|
(GThreadFunc) worker_writer, data);
|
|
|
|
|
2015-02-26 23:26:52 +01:00
|
|
|
// Read the output
|
2013-05-08 20:42:35 +02:00
|
|
|
g_mutex_lock (data->remaining_mutex);
|
|
|
|
guint32 remaining = data->remaining;
|
|
|
|
g_mutex_unlock (data->remaining_mutex);
|
|
|
|
|
|
|
|
data->output = NULL;
|
|
|
|
gpointer *output_end = &data->output;
|
|
|
|
while (remaining)
|
|
|
|
{
|
|
|
|
static gchar next[sizeof (gpointer)];
|
|
|
|
GString *s = g_string_new (NULL);
|
|
|
|
g_string_append_len (s, next, sizeof next);
|
|
|
|
|
|
|
|
gint c;
|
|
|
|
while ((c = fgetc (child_stdout)) != EOF && c != '\n')
|
|
|
|
g_string_append_c (s, c);
|
|
|
|
if (c == EOF)
|
|
|
|
g_error ("eSpeak process died too soon");
|
|
|
|
|
|
|
|
gchar *translation = g_string_free (s, FALSE);
|
|
|
|
*output_end = translation;
|
|
|
|
output_end = (gpointer *) translation;
|
|
|
|
|
2015-02-26 23:26:52 +01:00
|
|
|
// We limit progress reporting so that
|
|
|
|
// the mutex doesn't spin like crazy
|
2013-05-10 02:06:57 +02:00
|
|
|
if ((--remaining & 255) != 0)
|
2013-05-08 20:42:35 +02:00
|
|
|
continue;
|
|
|
|
|
|
|
|
g_mutex_lock (data->remaining_mutex);
|
|
|
|
data->remaining = remaining;
|
|
|
|
g_cond_broadcast (data->remaining_cond);
|
|
|
|
g_mutex_unlock (data->remaining_mutex);
|
|
|
|
}
|
|
|
|
|
2013-05-10 02:06:57 +02:00
|
|
|
if (fgetc (child_stdout) != EOF)
|
|
|
|
{
|
|
|
|
g_printerr ("Error: eSpeak has written more lines than it should. "
|
|
|
|
"The output would be corrupt, aborting.\n");
|
|
|
|
exit (EXIT_FAILURE);
|
|
|
|
}
|
|
|
|
|
2013-05-08 20:42:35 +02:00
|
|
|
fclose (child_stdout);
|
|
|
|
return g_thread_join (writer);
|
|
|
|
}
|
|
|
|
|
|
|
|
// --- Main --------------------------------------------------------------------
|
|
|
|
|
2015-02-26 23:26:52 +01:00
|
|
|
/// Copy the contents of one StardictInfo object into another. Ignores path.
|
2013-05-10 02:06:57 +02:00
|
|
|
static void
|
|
|
|
stardict_info_copy (StardictInfo *dest, const StardictInfo *src)
|
|
|
|
{
|
|
|
|
dest->version = src->version;
|
|
|
|
|
|
|
|
guint i;
|
|
|
|
for (i = 0; i < _stardict_ifo_keys_length; i++)
|
|
|
|
{
|
|
|
|
const struct stardict_ifo_key *key = &_stardict_ifo_keys[i];
|
|
|
|
if (key->type == IFO_STRING)
|
|
|
|
{
|
|
|
|
gchar **p = &G_STRUCT_MEMBER (gchar *, dest, key->offset);
|
|
|
|
gchar *q = G_STRUCT_MEMBER (gchar *, src, key->offset);
|
|
|
|
|
|
|
|
g_free (*p);
|
|
|
|
*p = q ? g_strdup (q) : NULL;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
G_STRUCT_MEMBER (gulong, dest, key->offset) =
|
|
|
|
G_STRUCT_MEMBER (gulong, src, key->offset);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-02-26 23:26:52 +01:00
|
|
|
/// Write a list of data fields back to a dictionary.
|
2013-05-10 02:06:57 +02:00
|
|
|
static gboolean
|
|
|
|
write_fields (Generator *generator, GList *fields, gboolean sts, GError **error)
|
|
|
|
{
|
|
|
|
while (fields)
|
|
|
|
{
|
|
|
|
StardictEntryField *field = fields->data;
|
|
|
|
if (!sts && !generator_write_type (generator, field->type, error))
|
|
|
|
return FALSE;
|
|
|
|
|
|
|
|
gboolean mark_end = !sts || fields->next != NULL;
|
|
|
|
if (g_ascii_islower (field->type))
|
|
|
|
{
|
|
|
|
if (!generator_write_string (generator,
|
|
|
|
field->data, mark_end, error))
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
else if (!generator_write_raw (generator,
|
|
|
|
field->data, field->data_size, mark_end, error))
|
|
|
|
return FALSE;
|
|
|
|
|
|
|
|
fields = fields->next;
|
|
|
|
}
|
|
|
|
return TRUE;
|
|
|
|
}
|
|
|
|
|
2013-05-08 20:42:35 +02:00
|
|
|
int
|
|
|
|
main (int argc, char *argv[])
|
|
|
|
{
|
|
|
|
gint n_processes = 1;
|
2013-05-10 02:06:57 +02:00
|
|
|
gchar *voice = NULL;
|
|
|
|
gboolean ignore_acronyms = FALSE;
|
2013-05-08 20:42:35 +02:00
|
|
|
|
|
|
|
GOptionEntry entries[] =
|
|
|
|
{
|
|
|
|
{ "processes", 'N', G_OPTION_FLAG_IN_MAIN,
|
|
|
|
G_OPTION_ARG_INT, &n_processes,
|
2013-05-10 02:06:57 +02:00
|
|
|
"The number of espeak processes run in parallel", "PROCESSES" },
|
|
|
|
{ "voice", 'v', G_OPTION_FLAG_IN_MAIN,
|
|
|
|
G_OPTION_ARG_STRING, &voice,
|
|
|
|
"The voice to be used by eSpeak to pronounce the words", "VOICE" },
|
|
|
|
{ "ignore-acronyms", 0, G_OPTION_FLAG_IN_MAIN,
|
|
|
|
G_OPTION_ARG_NONE, &ignore_acronyms,
|
|
|
|
"Don't spell out words composed of big letters only", NULL },
|
2013-05-08 20:42:35 +02:00
|
|
|
{ NULL }
|
|
|
|
};
|
|
|
|
|
2013-05-10 02:06:57 +02:00
|
|
|
G_GNUC_BEGIN_IGNORE_DEPRECATIONS
|
|
|
|
if (glib_check_version (2, 36, 0))
|
|
|
|
g_type_init ();
|
|
|
|
G_GNUC_END_IGNORE_DEPRECATIONS
|
|
|
|
|
2013-05-08 20:42:35 +02:00
|
|
|
GError *error = NULL;
|
|
|
|
GOptionContext *ctx = g_option_context_new
|
2013-05-10 02:06:57 +02:00
|
|
|
("input.ifo output-basename - add pronunciation to dictionaries");
|
2013-05-08 20:42:35 +02:00
|
|
|
g_option_context_add_main_entries (ctx, entries, NULL);
|
|
|
|
if (!g_option_context_parse (ctx, &argc, &argv, &error))
|
|
|
|
{
|
2013-05-10 02:06:57 +02:00
|
|
|
g_printerr ("Error: option parsing failed: %s\n", error->message);
|
2013-05-08 20:42:35 +02:00
|
|
|
exit (EXIT_FAILURE);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (argc != 3)
|
|
|
|
{
|
|
|
|
gchar *help = g_option_context_get_help (ctx, TRUE, FALSE);
|
2013-05-10 02:06:57 +02:00
|
|
|
g_printerr ("%s", help);
|
2013-05-08 20:42:35 +02:00
|
|
|
g_free (help);
|
|
|
|
exit (EXIT_FAILURE);
|
|
|
|
}
|
|
|
|
|
2013-05-10 02:06:57 +02:00
|
|
|
g_option_context_free (ctx);
|
|
|
|
|
2015-02-26 23:26:52 +01:00
|
|
|
// See if we can run espeak
|
2013-05-10 02:06:57 +02:00
|
|
|
static gchar *cmdline[] = { "espeak", "--ipa", "-q", NULL, NULL, NULL };
|
|
|
|
|
|
|
|
if (voice)
|
|
|
|
{
|
|
|
|
cmdline[3] = "-v";
|
|
|
|
cmdline[4] = voice;
|
|
|
|
}
|
|
|
|
|
|
|
|
gchar *void_entry = g_strstrip (get_void_entry (cmdline));
|
|
|
|
|
2015-02-26 23:26:52 +01:00
|
|
|
// Load the dictionary
|
2013-05-10 02:06:57 +02:00
|
|
|
printf ("Loading the original dictionary...\n");
|
2013-05-08 20:42:35 +02:00
|
|
|
StardictDict *dict = stardict_dict_new (argv[1], &error);
|
|
|
|
if (!dict)
|
|
|
|
{
|
2013-05-10 02:06:57 +02:00
|
|
|
g_printerr ("Error: opening the dictionary failed: %s\n",
|
|
|
|
error->message);
|
2013-05-08 20:42:35 +02:00
|
|
|
exit (EXIT_FAILURE);
|
|
|
|
}
|
|
|
|
|
|
|
|
gsize n_words = stardict_info_get_word_count
|
|
|
|
(stardict_dict_get_info (dict));
|
|
|
|
|
|
|
|
if (n_processes <= 0)
|
|
|
|
{
|
|
|
|
g_printerr ("Error: there must be at least one process\n");
|
|
|
|
exit (EXIT_FAILURE);
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((gsize) n_processes > n_words * 1024)
|
|
|
|
{
|
|
|
|
n_processes = n_words / 1024;
|
|
|
|
if (!n_processes)
|
|
|
|
n_processes = 1;
|
|
|
|
g_printerr ("Warning: too many processes, reducing to %d\n",
|
|
|
|
n_processes);
|
|
|
|
}
|
|
|
|
|
2015-02-26 23:26:52 +01:00
|
|
|
// Spawn worker threads to generate pronunciation data
|
2013-05-08 20:42:35 +02:00
|
|
|
static GMutex dict_mutex;
|
|
|
|
|
|
|
|
static GMutex remaining_mutex;
|
|
|
|
static GCond remaining_cond;
|
|
|
|
|
|
|
|
WorkerData *data = g_alloca (sizeof *data * n_processes);
|
|
|
|
|
2013-05-10 02:06:57 +02:00
|
|
|
GRegex *re_stop = g_regex_new ("[" LINE_SPLITTING_CHARS "][ ?]"
|
|
|
|
"|\\.\\.\\.|[" OTHER_STOP_CHARS "]", G_REGEX_OPTIMIZE, 0, &error);
|
|
|
|
g_assert (re_stop != NULL);
|
|
|
|
|
|
|
|
GRegex *re_acronym = g_regex_new ("(^|\\pZ)(\\p{Lu}+)(?=\\pZ|$)",
|
|
|
|
G_REGEX_OPTIMIZE, 0, &error);
|
|
|
|
g_assert (re_acronym != NULL);
|
|
|
|
|
2013-05-08 20:42:35 +02:00
|
|
|
gint i;
|
|
|
|
for (i = 0; i < n_processes; i++)
|
|
|
|
{
|
2013-05-10 02:06:57 +02:00
|
|
|
data[i].start_entry = n_words * i / n_processes;
|
|
|
|
data[i].end_entry = n_words * (i + 1) / n_processes;
|
2013-05-08 20:42:35 +02:00
|
|
|
|
2013-05-10 02:06:57 +02:00
|
|
|
data[i].total = data[i].remaining =
|
|
|
|
data[i].end_entry - data[i].start_entry;
|
2013-05-08 20:42:35 +02:00
|
|
|
data[i].remaining_mutex = &remaining_mutex;
|
|
|
|
data[i].remaining_cond = &remaining_cond;
|
|
|
|
|
|
|
|
data[i].dict = dict;
|
|
|
|
data[i].dict_mutex = &dict_mutex;
|
|
|
|
|
2013-05-10 02:06:57 +02:00
|
|
|
data[i].re_stop = re_stop;
|
|
|
|
data[i].re_acronym = re_acronym;
|
|
|
|
|
|
|
|
data[i].cmdline = cmdline;
|
|
|
|
data[i].ignore_acronyms = ignore_acronyms;
|
|
|
|
data[i].main_thread =
|
|
|
|
g_thread_new ("worker", (GThreadFunc) worker, &data[i]);
|
2013-05-08 20:42:35 +02:00
|
|
|
}
|
|
|
|
|
2015-02-26 23:26:52 +01:00
|
|
|
// Loop while the threads still have some work to do and report status
|
2013-05-08 20:42:35 +02:00
|
|
|
g_mutex_lock (&remaining_mutex);
|
|
|
|
for (;;)
|
|
|
|
{
|
|
|
|
gboolean all_finished = TRUE;
|
|
|
|
printf ("\rRetrieving pronunciation... ");
|
|
|
|
for (i = 0; i < n_processes; i++)
|
|
|
|
{
|
2013-05-10 02:06:57 +02:00
|
|
|
printf ("%3u%% ", 100 - data[i].remaining * 100 / data[i].total);
|
2013-05-08 20:42:35 +02:00
|
|
|
if (data[i].remaining)
|
|
|
|
all_finished = FALSE;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (all_finished)
|
|
|
|
break;
|
|
|
|
g_cond_wait (&remaining_cond, &remaining_mutex);
|
|
|
|
}
|
|
|
|
g_mutex_unlock (&remaining_mutex);
|
|
|
|
|
2013-05-10 02:06:57 +02:00
|
|
|
putchar ('\n');
|
2013-05-08 20:42:35 +02:00
|
|
|
for (i = 0; i < n_processes; i++)
|
|
|
|
g_thread_join (data[i].main_thread);
|
|
|
|
|
2013-05-10 02:06:57 +02:00
|
|
|
g_regex_unref (re_stop);
|
|
|
|
g_regex_unref (re_acronym);
|
|
|
|
|
2015-02-26 23:26:52 +01:00
|
|
|
// Put extended entries into a new dictionary
|
2013-05-10 02:06:57 +02:00
|
|
|
Generator *generator = generator_new (argv[2], &error);
|
|
|
|
if (!generator)
|
2013-05-08 20:42:35 +02:00
|
|
|
{
|
2013-05-10 02:06:57 +02:00
|
|
|
g_printerr ("Error: failed to create the output dictionary: %s\n",
|
|
|
|
error->message);
|
|
|
|
exit (EXIT_FAILURE);
|
|
|
|
}
|
|
|
|
|
|
|
|
StardictInfo *info = generator->info;
|
|
|
|
stardict_info_copy (info, stardict_dict_get_info (dict));
|
|
|
|
|
2015-02-26 23:26:52 +01:00
|
|
|
// This gets incremented each time an entry is finished
|
2013-05-10 02:06:57 +02:00
|
|
|
info->word_count = 0;
|
|
|
|
|
|
|
|
if (info->same_type_sequence)
|
|
|
|
{
|
|
|
|
gchar *new_sts = g_strconcat ("t", info->same_type_sequence, NULL);
|
|
|
|
g_free (info->same_type_sequence);
|
|
|
|
info->same_type_sequence = new_sts;
|
|
|
|
}
|
|
|
|
|
2015-02-26 23:26:52 +01:00
|
|
|
// Write out all the entries together with the pronunciation
|
2013-05-10 02:06:57 +02:00
|
|
|
for (i = 0; i < n_processes; i++)
|
|
|
|
{
|
|
|
|
StardictIterator *iterator =
|
|
|
|
stardict_iterator_new (dict, data[i].start_entry);
|
|
|
|
|
|
|
|
gpointer *output = data[i].output;
|
|
|
|
while (stardict_iterator_get_offset (iterator) != data[i].end_entry)
|
|
|
|
{
|
|
|
|
printf ("\rCreating a new dictionary... %3lu%%",
|
|
|
|
(gulong) stardict_iterator_get_offset (iterator) * 100
|
|
|
|
/ stardict_dict_get_info (dict)->word_count);
|
|
|
|
|
|
|
|
g_assert (output != NULL);
|
|
|
|
|
|
|
|
gchar *pronunciation = g_strstrip ((gchar *) (output + 1));
|
|
|
|
StardictEntry *entry = stardict_iterator_get_entry (iterator);
|
|
|
|
|
|
|
|
generator_begin_entry (generator);
|
|
|
|
|
|
|
|
if (!strcmp (pronunciation, void_entry))
|
|
|
|
*pronunciation = 0;
|
|
|
|
|
|
|
|
// g_printerr ("%s /%s/\n",
|
|
|
|
// stardict_iterator_get_word (iterator), pronunciation);
|
|
|
|
|
2015-02-26 23:26:52 +01:00
|
|
|
// For the sake of simplicity we fake a new start;
|
|
|
|
// write_fields() only iterates the list in one direction.
|
2013-05-10 02:06:57 +02:00
|
|
|
StardictEntryField field;
|
|
|
|
field.type = 't';
|
|
|
|
field.data = pronunciation;
|
|
|
|
|
|
|
|
GList start_link;
|
|
|
|
start_link.next = entry->fields;
|
|
|
|
start_link.data = &field;
|
|
|
|
|
|
|
|
if (!write_fields (generator, &start_link,
|
|
|
|
info->same_type_sequence != NULL, &error)
|
|
|
|
|| !generator_finish_entry (generator,
|
|
|
|
stardict_iterator_get_word (iterator), &error))
|
|
|
|
{
|
|
|
|
g_printerr ("Error: write failed: %s\n", error->message);
|
|
|
|
exit (EXIT_FAILURE);
|
|
|
|
}
|
|
|
|
|
|
|
|
g_object_unref (entry);
|
|
|
|
|
|
|
|
gpointer *tmp = output;
|
|
|
|
output = *output;
|
|
|
|
g_free (tmp);
|
|
|
|
|
|
|
|
stardict_iterator_next (iterator);
|
|
|
|
}
|
|
|
|
|
|
|
|
g_assert (output == NULL);
|
|
|
|
g_object_unref (iterator);
|
|
|
|
}
|
|
|
|
|
|
|
|
putchar ('\n');
|
|
|
|
if (!generator_finish (generator, &error))
|
|
|
|
{
|
|
|
|
g_printerr ("Error: failed to write the dictionary: %s\n",
|
|
|
|
error->message);
|
|
|
|
exit (EXIT_FAILURE);
|
2013-05-08 20:42:35 +02:00
|
|
|
}
|
|
|
|
|
2013-05-10 02:06:57 +02:00
|
|
|
generator_free (generator);
|
|
|
|
g_object_unref (dict);
|
|
|
|
g_free (void_entry);
|
2013-05-08 20:42:35 +02:00
|
|
|
return 0;
|
|
|
|
}
|