Move dictionary generation to its own module

This commit is contained in:
Přemysl Eric Janouch 2013-05-09 00:49:45 +02:00
parent 28321b72e9
commit bcf933fb9e
7 changed files with 332 additions and 108 deletions

View File

@ -19,10 +19,10 @@ clean:
sdcli: src/sdcli.o src/stardict.o sdcli: src/sdcli.o src/stardict.o
$(CC) $^ -o $@ $(LDFLAGS) $(CC) $^ -o $@ $(LDFLAGS)
add-pronunciation: src/add-pronunciation.o src/stardict.o add-pronunciation: src/add-pronunciation.o src/stardict.o src/generator.o
$(CC) $^ -o $@ $(LDFLAGS) $(CC) $^ -o $@ $(LDFLAGS)
test-stardict: src/test-stardict.o src/stardict.o test-stardict: src/test-stardict.o src/stardict.o src/generator.o
$(CC) $^ -o $@ $(LDFLAGS) $(CC) $^ -o $@ $(LDFLAGS)
test: $(tests) test: $(tests)

View File

@ -29,6 +29,8 @@
#include <gio/gio.h> #include <gio/gio.h>
#include "stardict.h" #include "stardict.h"
#include "stardict-private.h"
#include "generator.h"
// --- Pronunciation generator ------------------------------------------------- // --- Pronunciation generator -------------------------------------------------

199
src/generator.c Normal file
View File

@ -0,0 +1,199 @@
/*
* generator.c: dictionary generator
*
* Copyright (c) 2013, Přemysl Janouch <p.janouch@gmail.com>
* All rights reserved.
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <glib.h>
#include <gio/gio.h>
#include "stardict.h"
#include "stardict-private.h"
#include "generator.h"
/** Creates an output stream for a path plus suffix. */
static GFileOutputStream *
replace_file_by_suffix (const gchar *base, const gchar *suffix, GError **error)
{
gchar *full_path = g_strconcat (base, suffix, NULL);
GFile *file = g_file_new_for_path (full_path);
g_free (full_path);
GFileOutputStream *stream = g_file_replace (file,
NULL, FALSE, G_FILE_CREATE_NONE, NULL, error);
g_object_unref (file);
return stream;
}
/** Creates a Stardict dictionary generator for the specified base. */
Generator *
generator_new (const gchar *base, GError **error)
{
Generator *self = g_malloc0 (sizeof *self);
self->info = g_malloc0 (sizeof *self->info);
self->info->path = g_strconcat (base, ".ifo", NULL);
self->dict_stream = replace_file_by_suffix (base, ".dict", error);
if (!self->dict_stream)
goto error_dict;
self->idx_stream = replace_file_by_suffix (base, ".idx", error);
if (!self->idx_stream)
goto error_idx;
self->dict_data = g_data_output_stream_new
(G_OUTPUT_STREAM (self->dict_stream));
g_data_output_stream_set_byte_order
(self->dict_data, G_DATA_STREAM_BYTE_ORDER_BIG_ENDIAN);
self->idx_data = g_data_output_stream_new
(G_OUTPUT_STREAM (self->idx_stream));
g_data_output_stream_set_byte_order
(self->idx_data, G_DATA_STREAM_BYTE_ORDER_BIG_ENDIAN);
return self;
error_idx:
g_object_unref (self->dict_stream);
error_dict:
stardict_info_free (self->info);
g_free (self);
return NULL;
}
/** Finishes the dictionary and writes the .ifo file. */
gboolean
generator_finish (Generator *self, GError **error)
{
GString *ifo_contents = g_string_new ("StarDict's dict ifo file\n");
if (self->info->version == SD_VERSION_3_0_0)
g_string_append (ifo_contents, "version=3.0.0\n");
else
g_string_append (ifo_contents, "version=2.4.2\n");
self->info->idx_filesize = g_seekable_tell (G_SEEKABLE (self->idx_stream));
self->info->idx_offset_bits = 32;
if (!g_output_stream_close
(G_OUTPUT_STREAM (self->dict_stream), NULL, error)
|| !g_output_stream_close
(G_OUTPUT_STREAM (self->idx_stream), NULL, error))
return FALSE;
guint i;
for (i = 0; i < _stardict_ifo_keys_length; i++)
{
const struct stardict_ifo_key *key = &_stardict_ifo_keys[i];
if (key->type == IFO_STRING)
{
const gchar *value = G_STRUCT_MEMBER (const gchar *,
self->info, key->offset);
if (value)
g_string_append_printf (ifo_contents, "%s=%s\n",
key->name, value);
}
else
{
gulong value = G_STRUCT_MEMBER (gulong,
self->info, key->offset);
if (value)
g_string_append_printf (ifo_contents, "%s=%lu\n",
key->name, value);
}
}
gboolean success = g_file_set_contents (self->info->path,
ifo_contents->str, -1, error);
g_string_free (ifo_contents, TRUE);
return success;
}
/** Start writing a dictionary entry. */
void
generator_begin_entry (Generator *self)
{
self->entry_mark = g_seekable_tell (G_SEEKABLE (self->dict_stream));
}
/** Write the data type of an entry field, when there's no sametypesequence. */
gboolean
generator_write_type (Generator *self, gchar type, GError **error)
{
return g_data_output_stream_put_byte (self->dict_data, type, NULL, error);
}
/** Write a raw binary field. */
gboolean
generator_write_raw (Generator *self,
gpointer data, gsize data_size, gboolean mark_end, GError **error)
{
gsize written;
if ((mark_end && !g_data_output_stream_put_uint32
(self->dict_data, data_size, NULL, error))
|| !g_output_stream_write_all (G_OUTPUT_STREAM (self->dict_stream),
data, data_size, &written, NULL, error))
return FALSE;
return TRUE;
}
/** Write a text string. */
gboolean
generator_write_string (Generator *self,
const gchar *s, gboolean mark_end, GError **error)
{
if (!g_data_output_stream_put_string (self->dict_data, s, NULL, error)
|| (mark_end && !g_data_output_stream_put_byte
(self->dict_data, '\0', NULL, error)))
return FALSE;
return TRUE;
}
/** Finishes the current entry and writes it into the index. */
gboolean
generator_finish_entry (Generator *self, const gchar *word, GError **error)
{
if (!g_data_output_stream_put_string (self->idx_data, word, NULL, error)
|| !g_data_output_stream_put_byte (self->idx_data, '\0', NULL, error)
|| !g_data_output_stream_put_uint32 (self->idx_data,
self->entry_mark, NULL, error)
|| !g_data_output_stream_put_uint32 (self->idx_data,
g_seekable_tell (G_SEEKABLE (self->dict_stream)) -
self->entry_mark, NULL, error))
return FALSE;
self->info->word_count++;
return TRUE;
}
/** Destroys the generator object, freeing up system resources. */
void
generator_free (Generator *self)
{
stardict_info_free (self->info);
g_object_unref (self->dict_data);
g_object_unref (self->idx_data);
g_object_unref (self->dict_stream);
g_object_unref (self->idx_stream);
}

56
src/generator.h Normal file
View File

@ -0,0 +1,56 @@
/*
* generator.h: dictionary generator
*
* Nothing fancy. Just something moved out off the `stardict' test to be
* conveniently reused by the included tools.
*
* Copyright (c) 2013, Přemysl Janouch <p.janouch@gmail.com>
* All rights reserved.
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
*/
#ifndef GENERATOR_H
#define GENERATOR_H
/** Simplifies the task of creating a StarDict dictionary. */
typedef struct generator Generator;
struct generator
{
StardictInfo * info; //!< Dictionary information, fill it in
goffset entry_mark; //!< Marks the entry's start offset
GFileOutputStream * dict_stream; //!< Dictionary stream
GDataOutputStream * dict_data; //!< Dictionary data stream wrapper
GFileOutputStream * idx_stream; //!< Index file stream
GDataOutputStream * idx_data; //!< Index file data stream wrapper
};
Generator *generator_new (const gchar *base, GError **error);
gboolean generator_finish (Generator *self, GError **error);
void generator_free (Generator *self);
void generator_begin_entry (Generator *self);
gboolean generator_write_type (Generator *self, gchar type, GError **error);
gboolean generator_write_raw (Generator *self,
gpointer data, gsize data_size, gboolean mark_end, GError **error);
gboolean generator_write_string (Generator *self,
const gchar *s, gboolean mark_end, GError **error);
gboolean generator_finish_entry (Generator *self,
const gchar *word, GError **error);
#endif /* ! GENERATOR_H */

View File

@ -62,4 +62,20 @@ struct stardict_synonym_entry
guint32 original_word; //!< The original word's index guint32 original_word; //!< The original word's index
}; };
struct stardict_ifo_key
{
const gchar *name; //!< Name of the key
enum {
IFO_STRING, //!< A @code gchar * @endcode value
IFO_NUMBER //!< A @code gulong @endcode value
} type; //!< Type of the value
size_t offset; //!< Offset within StardictInfo
};
/** Lists all the entries in StardictInfo. */
extern const struct stardict_ifo_key _stardict_ifo_keys[];
/** Denotes the length of _stardict_ifo_keys. */
extern gsize _stardict_ifo_keys_length;
#endif /* ! STARDICTPRIVATE_H */ #endif /* ! STARDICTPRIVATE_H */

View File

@ -235,6 +235,23 @@ stardict_info_free (StardictInfo *sdi)
#define DEFINE_IFO_KEY(n, t, e) { (n), IFO_##t, offsetof (StardictInfo, e) } #define DEFINE_IFO_KEY(n, t, e) { (n), IFO_##t, offsetof (StardictInfo, e) }
const struct stardict_ifo_key _stardict_ifo_keys[] =
{
DEFINE_IFO_KEY ("bookname", STRING, book_name),
DEFINE_IFO_KEY ("wordcount", NUMBER, word_count),
DEFINE_IFO_KEY ("synwordcount", NUMBER, syn_word_count),
DEFINE_IFO_KEY ("idxfilesize", NUMBER, idx_filesize),
DEFINE_IFO_KEY ("idxoffsetbits", NUMBER, idx_offset_bits),
DEFINE_IFO_KEY ("author", STRING, author),
DEFINE_IFO_KEY ("email", STRING, email),
DEFINE_IFO_KEY ("website", STRING, website),
DEFINE_IFO_KEY ("description", STRING, description),
DEFINE_IFO_KEY ("date", STRING, date),
DEFINE_IFO_KEY ("sametypesequence", STRING, same_type_sequence)
};
gsize _stardict_ifo_keys_length = G_N_ELEMENTS (_stardict_ifo_keys);
static gboolean static gboolean
load_ifo (StardictInfo *sti, const gchar *path, GError **error) load_ifo (StardictInfo *sti, const gchar *path, GError **error)
{ {
@ -263,45 +280,24 @@ load_ifo (StardictInfo *sti, const gchar *path, GError **error)
goto error; goto error;
} }
static const struct
{
const gchar *name;
enum { IFO_STRING, IFO_NUMBER } type;
size_t offset;
}
ifo_keys[] =
{
DEFINE_IFO_KEY ("bookname", STRING, book_name),
DEFINE_IFO_KEY ("wordcount", NUMBER, word_count),
DEFINE_IFO_KEY ("synwordcount", NUMBER, syn_word_count),
DEFINE_IFO_KEY ("idxfilesize", NUMBER, idx_filesize),
DEFINE_IFO_KEY ("idxoffsetbits", NUMBER, idx_offset_bits),
DEFINE_IFO_KEY ("author", STRING, author),
DEFINE_IFO_KEY ("email", STRING, email),
DEFINE_IFO_KEY ("website", STRING, website),
DEFINE_IFO_KEY ("description", STRING, description),
DEFINE_IFO_KEY ("date", STRING, date),
DEFINE_IFO_KEY ("sametypesequence", STRING, same_type_sequence)
};
gint ret; gint ret;
while ((ret = ifo_reader_read (&ir)) == 1) while ((ret = ifo_reader_read (&ir)) == 1)
{ {
guint i; guint i;
for (i = 0; i < G_N_ELEMENTS (ifo_keys); i++) for (i = 0; i < _stardict_ifo_keys_length; i++)
if (!strcmp (ir.key, ifo_keys[i].name)) if (!strcmp (ir.key, _stardict_ifo_keys[i].name))
break; break;
if (i == G_N_ELEMENTS (ifo_keys)) if (i == _stardict_ifo_keys_length)
{ {
g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA,
"%s: unknown key, ignoring: %s", path, ir.key); "%s: unknown key, ignoring: %s", path, ir.key);
continue; continue;
} }
if (ifo_keys[i].type == IFO_STRING) if (_stardict_ifo_keys[i].type == IFO_STRING)
{ {
G_STRUCT_MEMBER (gchar *, sti, ifo_keys[i].offset) G_STRUCT_MEMBER (gchar *, sti, _stardict_ifo_keys[i].offset)
= g_strdup (ir.value); = g_strdup (ir.value);
continue; continue;
} }
@ -316,7 +312,7 @@ load_ifo (StardictInfo *sti, const gchar *path, GError **error)
goto error; goto error;
} }
G_STRUCT_MEMBER (gulong, sti, ifo_keys[i].offset) = wc; G_STRUCT_MEMBER (gulong, sti, _stardict_ifo_keys[i].offset) = wc;
} }
if (ret == -1) if (ret == -1)
@ -361,9 +357,10 @@ error:
if (!ret_val) if (!ret_val)
{ {
guint i; guint i;
for (i = 0; i < G_N_ELEMENTS (ifo_keys); i++) for (i = 0; i < _stardict_ifo_keys_length; i++)
if (ifo_keys[i].type == IFO_STRING) if (_stardict_ifo_keys[i].type == IFO_STRING)
g_free (G_STRUCT_MEMBER (gchar *, sti, ifo_keys[i].offset)); g_free (G_STRUCT_MEMBER (gchar *, sti,
_stardict_ifo_keys[i].offset));
} }
else else
sti->path = g_strdup (path); sti->path = g_strdup (path);

View File

@ -1,5 +1,5 @@
/* /*
* stardict.c: StarDict API test * test-stardict.c: StarDict API test
* *
* Copyright (c) 2013, Přemysl Janouch <p.janouch@gmail.com> * Copyright (c) 2013, Přemysl Janouch <p.janouch@gmail.com>
* All rights reserved. * All rights reserved.
@ -26,6 +26,8 @@
#include <gio/gio.h> #include <gio/gio.h>
#include "stardict.h" #include "stardict.h"
#include "stardict-private.h"
#include "generator.h"
// --- Utilities --------------------------------------------------------------- // --- Utilities ---------------------------------------------------------------
@ -184,96 +186,48 @@ dictionary_create (void)
Dictionary *dict = g_malloc (sizeof *dict); Dictionary *dict = g_malloc (sizeof *dict);
dict->tmp_dir = g_file_new_for_path (tmp_dir_path); dict->tmp_dir = g_file_new_for_path (tmp_dir_path);
dict->ifo_file = g_file_get_child (dict->tmp_dir, "test.ifo");
static const gint dictionary_size = 8; gchar *base = g_build_filename (tmp_dir_path, "test", NULL);
Generator *generator = generator_new (base, &error);
g_free (base);
if (!generator)
g_error ("Failed to create a dictionary: %s", error->message);
static const guint dictionary_size = 8;
dict->data = generate_dictionary_data (dictionary_size); dict->data = generate_dictionary_data (dictionary_size);
GFile *dict_file = g_file_get_child (dict->tmp_dir, "test.dict");
GFile *idx_file = g_file_get_child (dict->tmp_dir, "test.idx");
GFileOutputStream *dict_stream = g_file_replace (dict_file, generator->info->version = SD_VERSION_3_0_0;
NULL, FALSE, G_FILE_CREATE_NONE, NULL, &error); generator->info->book_name = g_strdup ("Test Book");
if (!dict_stream) generator->info->author = g_strdup ("Lyra Heartstrings");
g_error ("Failed to create the .dict file: %s", error->message); generator->info->email = g_strdup ("lyra@equestria.net");
generator->info->description = g_strdup ("Test dictionary");
generator->info->date = g_strdup ("21.12.2012");
generator->info->same_type_sequence = g_strdup ("mX");
GFileOutputStream *idx_stream = g_file_replace (idx_file, guint i;
NULL, FALSE, G_FILE_CREATE_NONE, NULL, &error);
if (!idx_stream)
g_error ("Failed to create the .idx file: %s", error->message);
GDataOutputStream *dict_data
= g_data_output_stream_new (G_OUTPUT_STREAM (dict_stream));
g_data_output_stream_set_byte_order
(dict_data, G_DATA_STREAM_BYTE_ORDER_BIG_ENDIAN);
GDataOutputStream *idx_data
= g_data_output_stream_new (G_OUTPUT_STREAM (idx_stream));
g_data_output_stream_set_byte_order
(idx_data, G_DATA_STREAM_BYTE_ORDER_BIG_ENDIAN);
gint i;
gsize written;
for (i = 0; i < dictionary_size; i++) for (i = 0; i < dictionary_size; i++)
{ {
TestEntry *te = &g_array_index (dict->data, TestEntry, i); TestEntry *te = &g_array_index (dict->data, TestEntry, i);
goffset offset = g_seekable_tell (G_SEEKABLE (dict_stream));
if (!g_data_output_stream_put_string (dict_data, generator_begin_entry (generator);
te->meaning, NULL, &error) if (!generator_write_string (generator, te->meaning, TRUE, &error)
|| !g_data_output_stream_put_byte (dict_data, '\0', NULL, &error) || !generator_write_raw (generator,
|| !g_output_stream_write_all (G_OUTPUT_STREAM (dict_stream), te->data, te->data_size, FALSE, &error))
te->data, te->data_size, &written, NULL, &error)) g_error ("Write to dictionary data failed: %s", error->message);
g_error ("Write to dictionary failed: %s", error->message);
if (!g_data_output_stream_put_string (idx_data, if (!generator_finish_entry (generator, te->word, &error))
te->word, NULL, &error)
|| !g_data_output_stream_put_byte (idx_data, '\0', NULL, &error)
|| !g_data_output_stream_put_uint32 (idx_data, offset, NULL, &error)
|| !g_data_output_stream_put_uint32 (idx_data,
g_seekable_tell (G_SEEKABLE (dict_stream)) - offset, NULL, &error))
g_error ("Write to index failed: %s", error->message); g_error ("Write to index failed: %s", error->message);
} }
gint index_size = g_seekable_tell (G_SEEKABLE (idx_stream)); if (!generator_finish (generator, &error))
g_error ("Failed to finish the dictionary: %s", error->message);
if (!g_output_stream_close (G_OUTPUT_STREAM (dict_stream), NULL, &error))
g_error ("Failed to close the .dict file: %s", error->message);
if (!g_output_stream_close (G_OUTPUT_STREAM (idx_stream), NULL, &error))
g_error ("Failed to close the .idx file: %s", error->message);
g_object_unref (dict_data);
g_object_unref (idx_data);
g_object_unref (dict_stream);
g_object_unref (idx_stream);
gchar *ifo_contents = g_strdup_printf
("StarDict's dict ifo file\n"
"version=3.0.0\n"
"bookname=Test Book\n"
"wordcount=%d\n"
"idxfilesize=%d\n"
"idxoffsetbits=32\n"
"author=Lyra Heartstrings\n"
"email=lyra@equestria.net\n"
"website=http://equestria.net\n"
"description=Test dictionary\n"
"date=21.12.2012\n"
"sametypesequence=mX\n",
dictionary_size, index_size);
g_object_unref (dict_file);
g_object_unref (idx_file);
dict->ifo_file = g_file_get_child (dict->tmp_dir, "test.ifo");
if (!g_file_replace_contents (dict->ifo_file,
ifo_contents, strlen (ifo_contents),
NULL, FALSE, G_FILE_CREATE_NONE, NULL, NULL, &error))
g_error ("Failed to create the .ifo file: %s", error->message);
g_free (ifo_contents);
g_message ("Successfully created a test dictionary in %s", tmp_dir_path); g_message ("Successfully created a test dictionary in %s", tmp_dir_path);
g_free (tmp_dir_path);
generator_free (generator);
g_free (tmp_dir_path);
return dict; return dict;
} }