You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1384 lines
36 KiB
1384 lines
36 KiB
/* |
|
* stardict.c: StarDict API |
|
* |
|
* Copyright (c) 2013 - 2016, Přemysl Eric Janouch <p@janouch.name> |
|
* |
|
* Permission to use, copy, modify, and/or distribute this software for any |
|
* purpose with or without fee is hereby granted. |
|
* |
|
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
|
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
|
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY |
|
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION |
|
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN |
|
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
|
* |
|
*/ |
|
|
|
#include <stdio.h> |
|
#include <stdlib.h> |
|
#include <string.h> |
|
#include <locale.h> |
|
|
|
#include <glib.h> |
|
#include <gio/gio.h> |
|
#include <glib/gi18n.h> |
|
|
|
#include <unicode/ucol.h> |
|
#include <unicode/ustring.h> |
|
#include <unicode/ubrk.h> |
|
|
|
#include "stardict.h" |
|
#include "stardict-private.h" |
|
#include "dictzip-input-stream.h" |
|
#include "utils.h" |
|
|
|
#if ! GLIB_CHECK_VERSION (2, 40, 0) |
|
#define g_info g_debug |
|
#endif |
|
|
|
|
|
// --- Utilities --------------------------------------------------------------- |
|
|
|
/// String compare function used for StarDict indexes. |
|
static inline gint |
|
stardict_strcmp (const gchar *s1, const gchar *s2) |
|
{ |
|
gint a = g_ascii_strcasecmp (s1, s2); |
|
return a ? a : strcmp (s1, s2); |
|
} |
|
|
|
// --- Errors ------------------------------------------------------------------ |
|
|
|
GQuark |
|
stardict_error_quark (void) |
|
{ |
|
return g_quark_from_static_string ("stardict-error-quark"); |
|
} |
|
|
|
// --- IFO reader -------------------------------------------------------------- |
|
|
|
/// Helper class for reading .ifo files. |
|
typedef struct ifo_reader IfoReader; |
|
|
|
struct ifo_reader |
|
{ |
|
gchar * data; ///< File data terminated with \0 |
|
gchar * data_end; ///< Where the final \0 char. is |
|
|
|
gchar * start; ///< Start of the current token |
|
|
|
gchar * key; ///< The key (points into @a data) |
|
gchar * value; ///< The value (points into @a data) |
|
}; |
|
|
|
static gboolean |
|
ifo_reader_init (IfoReader *ir, const gchar *path, GError **error) |
|
{ |
|
gsize length; |
|
gchar *contents; |
|
if (!g_file_get_contents (path, &contents, &length, error)) |
|
return FALSE; |
|
|
|
static const char first_line[] = "StarDict's dict ifo file\n"; |
|
if (length < sizeof first_line - 1 |
|
|| strncmp (contents, first_line, sizeof first_line - 1)) |
|
{ |
|
g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, |
|
"%s: %s", path, _("invalid header format")); |
|
return FALSE; |
|
} |
|
|
|
ir->data = contents; |
|
ir->start = contents + sizeof first_line - 1; |
|
ir->data_end = contents + length; |
|
return TRUE; |
|
} |
|
|
|
static void |
|
ifo_reader_free (IfoReader *ir) |
|
{ |
|
g_free (ir->data); |
|
} |
|
|
|
static gint |
|
ifo_reader_read (IfoReader *ir) |
|
{ |
|
ir->key = NULL; |
|
ir->value = NULL; |
|
|
|
gchar *p; |
|
for (p = ir->start; p < ir->data_end; p++) |
|
{ |
|
if (*p == '\n') |
|
{ |
|
if (!ir->key) |
|
return -1; |
|
|
|
*p = 0; |
|
ir->value = ir->start; |
|
ir->start = p + 1; |
|
return 1; |
|
} |
|
|
|
if (*p == '=') |
|
{ |
|
if (p == ir->start) |
|
return -1; |
|
|
|
*p = 0; |
|
ir->key = ir->start; |
|
ir->start = p + 1; |
|
} |
|
} |
|
|
|
if (!ir->key) |
|
{ |
|
if (p != ir->start) |
|
return -1; |
|
return 0; |
|
} |
|
|
|
ir->value = ir->start; |
|
ir->start = p; |
|
return 1; |
|
} |
|
|
|
// --- StardictInfo ------------------------------------------------------------ |
|
|
|
/// Return the filesystem path for the dictionary. |
|
const gchar * |
|
stardict_info_get_path (StardictInfo *sdi) |
|
{ |
|
return sdi->path; |
|
} |
|
|
|
/// Return the name of the dictionary. |
|
const gchar * |
|
stardict_info_get_book_name (StardictInfo *sdi) |
|
{ |
|
return sdi->book_name; |
|
} |
|
|
|
/// Return the word count of the dictionary. Note that this information comes |
|
/// from the .ifo file, while the dictionary could successfully load with |
|
/// a different count of word entries. |
|
gsize |
|
stardict_info_get_word_count (StardictInfo *sdi) |
|
{ |
|
return sdi->word_count; |
|
} |
|
|
|
/// Destroy the dictionary info object. |
|
void |
|
stardict_info_free (StardictInfo *sdi) |
|
{ |
|
g_free (sdi->path); |
|
g_free (sdi->book_name); |
|
g_free (sdi->author); |
|
g_free (sdi->email); |
|
g_free (sdi->website); |
|
g_free (sdi->description); |
|
g_free (sdi->date); |
|
g_free (sdi->same_type_sequence); |
|
|
|
g_free (sdi->collation); |
|
g_free (sdi); |
|
} |
|
|
|
#define DEFINE_IFO_KEY(n, t, e) { (n), IFO_##t, offsetof (StardictInfo, e) } |
|
|
|
const struct stardict_ifo_key _stardict_ifo_keys[] = |
|
{ |
|
DEFINE_IFO_KEY ("bookname", STRING, book_name), |
|
DEFINE_IFO_KEY ("wordcount", NUMBER, word_count), |
|
DEFINE_IFO_KEY ("synwordcount", NUMBER, syn_word_count), |
|
DEFINE_IFO_KEY ("idxfilesize", NUMBER, idx_filesize), |
|
DEFINE_IFO_KEY ("idxoffsetbits", NUMBER, idx_offset_bits), |
|
DEFINE_IFO_KEY ("author", STRING, author), |
|
DEFINE_IFO_KEY ("email", STRING, email), |
|
DEFINE_IFO_KEY ("website", STRING, website), |
|
DEFINE_IFO_KEY ("description", STRING, description), |
|
DEFINE_IFO_KEY ("date", STRING, date), |
|
DEFINE_IFO_KEY ("sametypesequence", STRING, same_type_sequence), |
|
|
|
// These are our own custom |
|
DEFINE_IFO_KEY ("collation", STRING, collation) |
|
}; |
|
|
|
gsize _stardict_ifo_keys_length = G_N_ELEMENTS (_stardict_ifo_keys); |
|
|
|
/// Copy the contents of one StardictInfo object into another. Ignores path. |
|
void |
|
stardict_info_copy (StardictInfo *dest, const StardictInfo *src) |
|
{ |
|
dest->version = src->version; |
|
|
|
guint i; |
|
for (i = 0; i < _stardict_ifo_keys_length; i++) |
|
{ |
|
const struct stardict_ifo_key *key = &_stardict_ifo_keys[i]; |
|
if (key->type == IFO_STRING) |
|
{ |
|
gchar **p = &G_STRUCT_MEMBER (gchar *, dest, key->offset); |
|
gchar *q = G_STRUCT_MEMBER (gchar *, src, key->offset); |
|
|
|
g_free (*p); |
|
*p = q ? g_strdup (q) : NULL; |
|
} |
|
else |
|
G_STRUCT_MEMBER (gulong, dest, key->offset) = |
|
G_STRUCT_MEMBER (gulong, src, key->offset); |
|
} |
|
} |
|
|
|
static gboolean |
|
load_ifo (StardictInfo *sti, const gchar *path, GError **error) |
|
{ |
|
IfoReader ir; |
|
if (!ifo_reader_init (&ir, path, error)) |
|
return FALSE; |
|
|
|
gboolean ret_val = FALSE; |
|
memset (sti, 0, sizeof *sti); |
|
|
|
if (ifo_reader_read (&ir) != 1 || strcmp (ir.key, "version")) |
|
{ |
|
g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, |
|
"%s: %s", path, _("version not specified")); |
|
goto error; |
|
} |
|
|
|
if (!strcmp (ir.value, "2.4.2")) |
|
sti->version = SD_VERSION_2_4_2; |
|
else if (!strcmp (ir.value, "3.0.0")) |
|
sti->version = SD_VERSION_3_0_0; |
|
else |
|
{ |
|
g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, |
|
"%s: %s: %s", path, _("invalid version"), ir.value); |
|
goto error; |
|
} |
|
|
|
gint ret; |
|
while ((ret = ifo_reader_read (&ir)) == 1) |
|
{ |
|
guint i; |
|
for (i = 0; i < _stardict_ifo_keys_length; i++) |
|
if (!strcmp (ir.key, _stardict_ifo_keys[i].name)) |
|
break; |
|
|
|
if (i == _stardict_ifo_keys_length) |
|
{ |
|
g_info ("%s: %s: %s", path, _("unknown key, ignoring"), ir.key); |
|
continue; |
|
} |
|
|
|
if (!g_utf8_validate (ir.value, -1, NULL)) |
|
{ |
|
g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, |
|
"%s: %s", path, _("invalid encoding, must be valid UTF-8")); |
|
goto error; |
|
} |
|
|
|
if (_stardict_ifo_keys[i].type == IFO_STRING) |
|
{ |
|
G_STRUCT_MEMBER (gchar *, sti, _stardict_ifo_keys[i].offset) |
|
= g_strdup (ir.value); |
|
continue; |
|
} |
|
|
|
// Otherwise it has to be IFO_NUMBER |
|
gchar *end; |
|
gulong wc = strtol (ir.value, &end, 10); |
|
if (*end) |
|
{ |
|
g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, |
|
"%s: %s", path, _("invalid integer")); |
|
goto error; |
|
} |
|
|
|
G_STRUCT_MEMBER (gulong, sti, _stardict_ifo_keys[i].offset) = wc; |
|
} |
|
|
|
if (ret == -1) |
|
{ |
|
g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, |
|
"%s: %s", path, _("option format error")); |
|
goto error; |
|
} |
|
|
|
// FIXME check for zeros, don't assume that 0 means "not set" |
|
if (!sti->book_name || !*sti->book_name) |
|
{ |
|
g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, |
|
"%s: %s", path, _("no book name specified")); |
|
goto error; |
|
} |
|
if (!sti->word_count) |
|
{ |
|
g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, |
|
"%s: %s", path, _("word count not specified")); |
|
goto error; |
|
} |
|
if (!sti->idx_filesize) |
|
{ |
|
g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, |
|
"%s: %s", path, _("index file size not specified")); |
|
goto error; |
|
} |
|
|
|
if (!sti->idx_offset_bits) |
|
sti->idx_offset_bits = 32; |
|
else if (sti->idx_offset_bits != 32 && sti->idx_offset_bits != 64) |
|
{ |
|
g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, |
|
"%s: %s: %lu", path, _("invalid index offset bits"), |
|
sti->idx_offset_bits); |
|
goto error; |
|
} |
|
|
|
ret_val = TRUE; |
|
|
|
error: |
|
if (!ret_val) |
|
{ |
|
guint i; |
|
for (i = 0; i < _stardict_ifo_keys_length; i++) |
|
if (_stardict_ifo_keys[i].type == IFO_STRING) |
|
g_free (G_STRUCT_MEMBER (gchar *, sti, |
|
_stardict_ifo_keys[i].offset)); |
|
} |
|
else |
|
sti->path = g_strdup (path); |
|
|
|
ifo_reader_free (&ir); |
|
return ret_val; |
|
} |
|
|
|
/// List all dictionary files located in a path. |
|
/// @return GList<StardictInfo *>. Deallocate the list with: |
|
/// @code |
|
/// g_list_free_full ((GDestroyNotify) stardict_info_free); |
|
/// @endcode |
|
GList * |
|
stardict_list_dictionaries (const gchar *path) |
|
{ |
|
GPatternSpec *ps = g_pattern_spec_new ("*.ifo"); |
|
GDir *dir = g_dir_open (path, 0, NULL); |
|
g_return_val_if_fail (dir != NULL, NULL); |
|
|
|
GList *dicts = NULL; |
|
const gchar *name; |
|
while ((name = g_dir_read_name (dir))) |
|
{ |
|
if (!g_pattern_match_string (ps, name)) |
|
continue; |
|
|
|
gchar *filename = g_build_filename (path, name, NULL); |
|
StardictInfo *ifo = g_new (StardictInfo, 1); |
|
if (load_ifo (ifo, filename, NULL)) |
|
dicts = g_list_append (dicts, ifo); |
|
else |
|
g_free (ifo); |
|
g_free (filename); |
|
} |
|
g_dir_close (dir); |
|
g_pattern_spec_free (ps); |
|
return dicts; |
|
} |
|
|
|
// --- StardictDict ------------------------------------------------------------ |
|
|
|
struct stardict_dict_private |
|
{ |
|
StardictInfo * info; //!< General information about the dict |
|
GArray * index; //!< Word index |
|
GArray * synonyms; //!< Synonyms |
|
GStringChunk * string_allocator; //!< String allocator (index+synonyms) |
|
|
|
// The collated indexes are only permutations of their normal selves. |
|
|
|
UCollator * collator; //!< ICU index collator |
|
UCollator * collator_root; //!< ICU fallback root collator |
|
GArray * collated_synonyms; //!< Sorted indexes into @a synonyms |
|
|
|
// There are currently three ways the dictionary data can be read: |
|
// through mmap(), from a seekable GInputStream, or from a preallocated |
|
// chunk of memory that the whole dictionary has been decompressed into. |
|
// |
|
// It wouldn't be unreasonable to drop the support for regular gzip files. |
|
|
|
GInputStream * dict_stream; //!< Dictionary input stream handle |
|
GMappedFile * mapped_dict; //!< Dictionary memory map handle |
|
gpointer dict; //!< Dictionary data |
|
gsize dict_length; //!< Length of the dict data in bytes |
|
}; |
|
|
|
G_DEFINE_TYPE_WITH_CODE (StardictDict, stardict_dict, G_TYPE_OBJECT, |
|
G_ADD_PRIVATE (StardictDict)) |
|
|
|
static void |
|
stardict_dict_finalize (GObject *self) |
|
{ |
|
StardictDictPrivate *priv = STARDICT_DICT (self)->priv; |
|
|
|
if (priv->info) |
|
stardict_info_free (priv->info); |
|
|
|
g_array_free (priv->index, TRUE); |
|
g_array_free (priv->synonyms, TRUE); |
|
g_string_chunk_free (priv->string_allocator); |
|
|
|
if (priv->collator) |
|
ucol_close (priv->collator); |
|
if (priv->collator_root) |
|
ucol_close (priv->collator_root); |
|
if (priv->collated_synonyms) |
|
g_array_free (priv->collated_synonyms, TRUE); |
|
|
|
if (priv->mapped_dict) |
|
g_mapped_file_unref (priv->mapped_dict); |
|
else if (priv->dict_stream) |
|
g_object_unref (priv->dict_stream); |
|
else |
|
g_free (priv->dict); |
|
|
|
G_OBJECT_CLASS (stardict_dict_parent_class)->finalize (self); |
|
} |
|
|
|
static void |
|
stardict_dict_class_init (StardictDictClass *klass) |
|
{ |
|
G_OBJECT_CLASS (klass)->finalize = stardict_dict_finalize; |
|
} |
|
|
|
static void |
|
stardict_dict_init (StardictDict *self) |
|
{ |
|
self->priv = stardict_dict_get_instance_private (self); |
|
} |
|
|
|
/// Load a StarDict dictionary. |
|
/// @param[in] filename Path to the .ifo file |
|
StardictDict * |
|
stardict_dict_new (const gchar *filename, GError **error) |
|
{ |
|
StardictInfo *ifo = g_new (StardictInfo, 1); |
|
if (!load_ifo (ifo, filename, error)) |
|
{ |
|
g_free (ifo); |
|
return NULL; |
|
} |
|
|
|
StardictDict *sd = stardict_dict_new_from_info (ifo, error); |
|
if (!sd) stardict_info_free (ifo); |
|
return sd; |
|
} |
|
|
|
/// Return information about a loaded dictionary. The returned reference is |
|
/// only valid for the lifetime of the dictionary object. |
|
StardictInfo * |
|
stardict_dict_get_info (StardictDict *sd) |
|
{ |
|
g_return_val_if_fail (STARDICT_IS_DICT (sd), NULL); |
|
return sd->priv->info; |
|
} |
|
|
|
/// Load a StarDict index from a GIO input stream. |
|
static gboolean |
|
load_idx_internal (StardictDict *sd, GInputStream *is, GError **error) |
|
{ |
|
StardictDictPrivate *priv = sd->priv; |
|
GDataInputStream *dis = g_data_input_stream_new (G_INPUT_STREAM (is)); |
|
g_data_input_stream_set_byte_order (dis, |
|
G_DATA_STREAM_BYTE_ORDER_BIG_ENDIAN); |
|
|
|
StardictIndexEntry entry; |
|
GError *err = NULL; |
|
// Ignoring "wordcount", just reading as long as we can |
|
gchar *name; |
|
while ((name = stream_read_string (dis, &err))) |
|
{ |
|
if (priv->info->idx_offset_bits == 32) |
|
entry.data_offset |
|
= g_data_input_stream_read_uint32 (dis, NULL, &err); |
|
else |
|
entry.data_offset |
|
= g_data_input_stream_read_uint64 (dis, NULL, &err); |
|
if (err) |
|
goto error; |
|
|
|
entry.data_size = g_data_input_stream_read_uint32 (dis, NULL, &err); |
|
if (err) |
|
goto error; |
|
|
|
entry.name = g_string_chunk_insert (sd->priv->string_allocator, name); |
|
entry.reverse_index = priv->index->len; |
|
g_array_append_val (priv->index, entry); |
|
g_free (name); |
|
} |
|
|
|
if (err != NULL) |
|
goto error; |
|
|
|
g_object_unref (dis); |
|
return TRUE; |
|
|
|
error: |
|
g_propagate_error (error, err); |
|
g_free (name); |
|
g_object_unref (dis); |
|
return FALSE; |
|
} |
|
|
|
/// Load a StarDict index. |
|
static gboolean |
|
load_idx (StardictDict *sd, const gchar *filename, |
|
gboolean gzipped, GError **error) |
|
{ |
|
gboolean ret_val = FALSE; |
|
GFile *file = g_file_new_for_path (filename); |
|
GFileInputStream *fis = g_file_read (file, NULL, error); |
|
|
|
if (!fis) |
|
goto cannot_open; |
|
|
|
if (gzipped) |
|
{ |
|
GZlibDecompressor *zd |
|
= g_zlib_decompressor_new (G_ZLIB_COMPRESSOR_FORMAT_GZIP); |
|
GInputStream *cis = g_converter_input_stream_new |
|
(G_INPUT_STREAM (fis), G_CONVERTER (zd)); |
|
|
|
ret_val = load_idx_internal (sd, cis, error); |
|
|
|
g_object_unref (cis); |
|
g_object_unref (zd); |
|
} |
|
else |
|
ret_val = load_idx_internal (sd, G_INPUT_STREAM (fis), error); |
|
|
|
g_object_unref (fis); |
|
cannot_open: |
|
g_object_unref (file); |
|
return ret_val; |
|
} |
|
|
|
static gboolean |
|
load_syn (StardictDict *sd, const gchar *filename, GError **error) |
|
{ |
|
gboolean ret_val = FALSE; |
|
GFile *file = g_file_new_for_path (filename); |
|
GFileInputStream *fis = g_file_read (file, NULL, error); |
|
|
|
if (!fis) |
|
goto cannot_open; |
|
|
|
GDataInputStream *dis = g_data_input_stream_new (G_INPUT_STREAM (fis)); |
|
g_data_input_stream_set_byte_order (dis, |
|
G_DATA_STREAM_BYTE_ORDER_BIG_ENDIAN); |
|
|
|
StardictSynonymEntry entry; |
|
GError *err = NULL; |
|
// Ignoring "synwordcount", just reading as long as we can |
|
gchar *word; |
|
while ((word = stream_read_string (dis, &err))) |
|
{ |
|
entry.original_word = g_data_input_stream_read_uint32 (dis, NULL, &err); |
|
if (err) |
|
break; |
|
|
|
entry.word = g_string_chunk_insert (sd->priv->string_allocator, word); |
|
g_array_append_val (sd->priv->synonyms, entry); |
|
g_free (word); |
|
} |
|
|
|
if (err != NULL) |
|
{ |
|
g_free (word); |
|
g_propagate_error (error, err); |
|
} |
|
else |
|
ret_val = TRUE; |
|
|
|
g_object_unref (dis); |
|
g_object_unref (fis); |
|
cannot_open: |
|
g_object_unref (file); |
|
return ret_val; |
|
} |
|
|
|
/// Load StarDict dictionary data. |
|
static gboolean |
|
load_dict (StardictDict *sd, const gchar *filename, gboolean gzipped, |
|
GError **error) |
|
{ |
|
StardictDictPrivate *priv = sd->priv; |
|
|
|
if (gzipped) |
|
{ |
|
gboolean ret_val = FALSE; |
|
GFile *file = g_file_new_for_path (filename); |
|
GFileInputStream *fis = g_file_read (file, NULL, error); |
|
|
|
if (!fis) |
|
goto cannot_open; |
|
|
|
// As a simple workaround for GLib < 2.33.1 and the lack of support for |
|
// the GSeekable interface in GDataInputStream, disable dictzip. |
|
// |
|
// http://lists.gnu.org/archive/html/qemu-devel/2013-06/msg04690.html |
|
if (!glib_check_version (2, 33, 1)) |
|
{ |
|
// Try opening it as a dictzip file first |
|
DictzipInputStream *dzis = |
|
dictzip_input_stream_new (G_INPUT_STREAM (fis), NULL); |
|
if (dzis) |
|
{ |
|
priv->dict_stream = G_INPUT_STREAM (dzis); |
|
ret_val = TRUE; |
|
goto done; |
|
} |
|
|
|
// If unsuccessful, just read it all, as it is, into memory |
|
if (!g_seekable_seek (G_SEEKABLE (fis), 0, G_SEEK_SET, NULL, error)) |
|
goto done; |
|
} |
|
|
|
GByteArray *ba = g_byte_array_new (); |
|
GZlibDecompressor *zd |
|
= g_zlib_decompressor_new (G_ZLIB_COMPRESSOR_FORMAT_GZIP); |
|
GInputStream *cis = g_converter_input_stream_new |
|
(G_INPUT_STREAM (fis), G_CONVERTER (zd)); |
|
|
|
ret_val = stream_read_all (ba, cis, error); |
|
|
|
g_object_unref (cis); |
|
g_object_unref (zd); |
|
|
|
if (ret_val) |
|
{ |
|
priv->dict_length = ba->len; |
|
priv->dict = g_byte_array_free (ba, FALSE); |
|
} |
|
else |
|
g_byte_array_free (ba, TRUE); |
|
|
|
done: |
|
g_object_unref (fis); |
|
cannot_open: |
|
g_object_unref (file); |
|
return ret_val; |
|
} |
|
|
|
priv->mapped_dict = g_mapped_file_new (filename, FALSE, error); |
|
if (!priv->mapped_dict) |
|
return FALSE; |
|
|
|
priv->dict_length = g_mapped_file_get_length (priv->mapped_dict); |
|
priv->dict = g_mapped_file_get_contents (priv->mapped_dict); |
|
return TRUE; |
|
} |
|
|
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
|
|
/// Compare the two strings by collation rules. |
|
static inline gint |
|
stardict_dict_strcoll (gconstpointer s1, gconstpointer s2, gpointer data) |
|
{ |
|
StardictDict *sd = data; |
|
UErrorCode error = U_ZERO_ERROR; |
|
|
|
#if U_ICU_VERSION_MAJOR_NUM >= 50 |
|
return ucol_strcollUTF8 (sd->priv->collator, s1, -1, s2, -1, &error); |
|
#else // U_ICU_VERSION_MAJOR_NUM >= 50 |
|
// This remarkably retarded API absolutely reeks of corporate; |
|
// I don't have to tell you that this code runs slow, do I? |
|
|
|
int32_t uc1_len = 0; |
|
int32_t uc2_len = 0; |
|
|
|
error = U_ZERO_ERROR; |
|
u_strFromUTF8WithSub (NULL, 0, &uc1_len, s1, -1, 0xFFFD, NULL, &error); |
|
error = U_ZERO_ERROR; |
|
u_strFromUTF8WithSub (NULL, 0, &uc2_len, s2, -1, 0xFFFD, NULL, &error); |
|
|
|
UChar uc1[uc1_len]; |
|
UChar uc2[uc2_len]; |
|
error = U_ZERO_ERROR; |
|
u_strFromUTF8WithSub (uc1, uc1_len, NULL, s1, -1, 0xFFFD, NULL, &error); |
|
error = U_ZERO_ERROR; |
|
u_strFromUTF8WithSub (uc2, uc2_len, NULL, s2, -1, 0xFFFD, NULL, &error); |
|
|
|
return ucol_strcoll (sd->priv->collator, uc1, uc1_len, uc2, uc2_len); |
|
#endif // U_ICU_VERSION_MAJOR_NUM >= 50 |
|
} |
|
|
|
/// Stricter stardict_dict_strcoll() used to sort the collated index. |
|
static inline gint |
|
stardict_dict_strcoll_for_sorting |
|
(gconstpointer s1, gconstpointer s2, gpointer data) |
|
{ |
|
UCollationResult a = stardict_dict_strcoll (s1, s2, data); |
|
return a ? a : strcmp (s1, s2); |
|
} |
|
|
|
static inline gint |
|
stardict_dict_index_coll_for_sorting |
|
(gconstpointer x1, gconstpointer x2, gpointer data) |
|
{ |
|
const StardictIndexEntry *e1 = x1, *e2 = x2; |
|
return stardict_dict_strcoll_for_sorting (e1->name, e2->name, data); |
|
} |
|
|
|
static inline gint |
|
stardict_dict_synonyms_coll_for_sorting |
|
(gconstpointer x1, gconstpointer x2, gpointer data) |
|
{ |
|
StardictDict *sd = data; |
|
const gchar *s1 = g_array_index |
|
(sd->priv->index, StardictSynonymEntry, *(guint32 *) x1).word; |
|
const gchar *s2 = g_array_index |
|
(sd->priv->index, StardictSynonymEntry, *(guint32 *) x2).word; |
|
return stardict_dict_strcoll_for_sorting (s1, s2, data); |
|
} |
|
|
|
static gboolean |
|
stardict_dict_set_collation (StardictDict *sd, const gchar *collation) |
|
{ |
|
StardictDictPrivate *priv = sd->priv; |
|
UErrorCode error = U_ZERO_ERROR; |
|
if (!(priv->collator = ucol_open (collation, &error))) |
|
{ |
|
// TODO: set a meaningful error |
|
g_info ("failed to create a collator for `%s'", collation); |
|
return FALSE; |
|
} |
|
|
|
// TODO: if error != U_ZERO_ERROR, report a meaningful message |
|
|
|
// Reorder the index according to the ICU locale |
|
ucol_setAttribute (priv->collator, UCOL_CASE_FIRST, UCOL_OFF, &error); |
|
g_array_sort_with_data (sd->priv->index, |
|
stardict_dict_index_coll_for_sorting, sd); |
|
|
|
// Construct a reverse index from the original index as it's used less |
|
guint32 *reverse = g_malloc_n (priv->index->len, sizeof *reverse); |
|
for (guint32 i = 0; i < priv->index->len; i++) |
|
reverse[g_array_index (priv->index, |
|
StardictIndexEntry, i).reverse_index] = i; |
|
for (guint32 i = 0; i < priv->index->len; i++) |
|
g_array_index (priv->index, |
|
StardictIndexEntry, i).reverse_index = reverse[i]; |
|
g_free (reverse); |
|
|
|
priv->collated_synonyms = g_array_sized_new (FALSE, FALSE, |
|
sizeof (guint32), priv->synonyms->len); |
|
for (guint32 i = 0; i < priv->synonyms->len; i++) |
|
g_array_append_val (priv->collated_synonyms, i); |
|
g_array_sort_with_data (sd->priv->collated_synonyms, |
|
stardict_dict_synonyms_coll_for_sorting, sd); |
|
|
|
// Make the collator something like case-insensitive, see: |
|
// http://userguide.icu-project.org/collation/concepts |
|
// We shouldn't need to sort the data anymore, and if we did, we could just |
|
// reset the strength to its default value for the given locale. |
|
ucol_setStrength (priv->collator, UCOL_SECONDARY); |
|
return TRUE; |
|
} |
|
|
|
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
|
|
/// Load a StarDict dictionary. |
|
/// @param[in] sdi Parsed .ifo data. The dictionary assumes ownership. |
|
StardictDict * |
|
stardict_dict_new_from_info (StardictInfo *sdi, GError **error) |
|
{ |
|
g_return_val_if_fail (sdi != NULL, NULL); |
|
|
|
StardictDict *sd = g_object_new (STARDICT_TYPE_DICT, NULL); |
|
StardictDictPrivate *priv = sd->priv; |
|
priv->info = sdi; |
|
priv->index = g_array_new (FALSE, FALSE, sizeof (StardictIndexEntry)); |
|
priv->synonyms = g_array_new (FALSE, FALSE, sizeof (StardictSynonymEntry)); |
|
priv->string_allocator = g_string_chunk_new ((1 << 15)); |
|
|
|
const gchar *dot = strrchr (sdi->path, '.'); |
|
gchar *base = dot ? g_strndup (sdi->path, dot - sdi->path) |
|
: g_strdup (sdi->path); |
|
|
|
gchar *base_idx = g_strconcat (base, ".idx", NULL); |
|
gboolean ret = FALSE; |
|
if (g_file_test (base_idx, G_FILE_TEST_EXISTS | G_FILE_TEST_IS_REGULAR)) |
|
ret = load_idx (sd, base_idx, FALSE, error); |
|
else |
|
{ |
|
gchar *base_idx_gz = g_strconcat (base_idx, ".gz", NULL); |
|
g_free (base_idx); |
|
base_idx = base_idx_gz; |
|
|
|
if (g_file_test (base_idx, G_FILE_TEST_EXISTS | G_FILE_TEST_IS_REGULAR)) |
|
ret = load_idx (sd, base_idx, TRUE, error); |
|
else |
|
{ |
|
g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_FILE_NOT_FOUND, |
|
"%s: %s", sdi->path, _("cannot find .idx file")); |
|
} |
|
} |
|
g_free (base_idx); |
|
|
|
if (!ret) |
|
goto error; |
|
|
|
gchar *base_dict = g_strconcat (base, ".dict", NULL); |
|
ret = FALSE; |
|
if (g_file_test (base_dict, G_FILE_TEST_EXISTS | G_FILE_TEST_IS_REGULAR)) |
|
ret = load_dict (sd, base_dict, FALSE, error); |
|
else |
|
{ |
|
gchar *base_dict_dz = g_strconcat (base_dict, ".dz", NULL); |
|
g_free (base_dict); |
|
base_dict = base_dict_dz; |
|
|
|
if (g_file_test (base_dict, G_FILE_TEST_EXISTS | G_FILE_TEST_IS_REGULAR)) |
|
ret = load_dict (sd, base_dict, TRUE, error); |
|
else |
|
{ |
|
g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_FILE_NOT_FOUND, |
|
"%s: %s", sdi->path, _("cannot find .dict file")); |
|
} |
|
} |
|
g_free (base_dict); |
|
|
|
if (!ret) |
|
goto error; |
|
|
|
gchar *base_syn = g_strconcat (base, ".syn", NULL); |
|
if (g_file_test (base_syn, G_FILE_TEST_EXISTS | G_FILE_TEST_IS_REGULAR)) |
|
(void) load_syn (sd, base_syn, NULL); |
|
g_free (base_syn); |
|
|
|
// We need a fallback collator to find common prefixes |
|
if (!sdi->collation || !stardict_dict_set_collation (sd, sdi->collation)) |
|
{ |
|
UErrorCode error = U_ZERO_ERROR; |
|
sd->priv->collator_root = ucol_open ("" /* root collator */, &error); |
|
} |
|
|
|
g_free (base); |
|
return sd; |
|
|
|
error: |
|
g_free (base); |
|
priv->info = NULL; |
|
g_object_unref (sd); |
|
return NULL; |
|
} |
|
|
|
static gint |
|
stardict_dict_cmp_synonym (StardictDict *sd, const gchar *word, gint i) |
|
{ |
|
GArray *collated = sd->priv->collated_synonyms; |
|
GArray *synonyms = sd->priv->synonyms; |
|
|
|
if (sd->priv->collator) |
|
return stardict_dict_strcoll (word, |
|
g_array_index (synonyms, StardictSynonymEntry, |
|
g_array_index (collated, guint32, i)).word, sd); |
|
return g_ascii_strcasecmp (word, |
|
g_array_index (synonyms, StardictSynonymEntry, i).word); |
|
} |
|
|
|
/// Return words of which the argument is a synonym or NULL |
|
/// if there are no such words. |
|
gchar ** |
|
stardict_dict_get_synonyms (StardictDict *sd, const gchar *word) |
|
{ |
|
GArray *collated = sd->priv->collated_synonyms; |
|
GArray *synonyms = sd->priv->synonyms; |
|
GArray *index = sd->priv->index; |
|
|
|
BINARY_SEARCH_BEGIN (synonyms->len - 1, |
|
stardict_dict_cmp_synonym (sd, word, imid)) |
|
|
|
// Back off to the first matching entry |
|
while (imid > 0 && !stardict_dict_cmp_synonym (sd, word, imid - 1)) |
|
imid--; |
|
|
|
GPtrArray *array = g_ptr_array_new (); |
|
|
|
// And add all matching entries from that position on to the array |
|
do |
|
{ |
|
guint32 i = sd->priv->collator |
|
? g_array_index (synonyms, StardictSynonymEntry, |
|
g_array_index (collated, guint32, imid)).original_word |
|
: g_array_index (synonyms, StardictSynonymEntry, |
|
imid).original_word; |
|
|
|
// When we use a collator this will point to the original entry, |
|
// otherwise it points to itself and this changes nothing |
|
i = g_array_index (sd->priv->index, StardictIndexEntry, |
|
i).reverse_index; |
|
|
|
g_ptr_array_add (array, |
|
g_strdup (g_array_index (index, StardictIndexEntry, i).name)); |
|
} |
|
while ((guint) ++imid < synonyms->len |
|
&& !stardict_dict_cmp_synonym (sd, word, imid)); |
|
|
|
g_ptr_array_add (array, NULL); |
|
return (gchar **) g_ptr_array_free (array, FALSE); |
|
|
|
BINARY_SEARCH_END |
|
return NULL; |
|
} |
|
|
|
static gint |
|
stardict_dict_cmp_index (StardictDict *sd, const gchar *word, gint i) |
|
{ |
|
const gchar *target = |
|
g_array_index (sd->priv->index, StardictIndexEntry, i).name; |
|
if (sd->priv->collator) |
|
return stardict_dict_strcoll (word, target, sd); |
|
return g_ascii_strcasecmp (word, target); |
|
} |
|
|
|
static size_t |
|
prefix (StardictDict *sd, const gchar *word, gint i) |
|
{ |
|
GArray *index = sd->priv->index; |
|
return (guint) i >= index->len ? 0 : |
|
stardict_longest_common_collation_prefix |
|
(sd, word, g_array_index (index, StardictIndexEntry, i).name); |
|
} |
|
|
|
/// Search for a word. The search is ASCII-case-insensitive. |
|
/// @param[in] word The word in utf-8 encoding |
|
/// @param[out] success TRUE if found |
|
/// @return An iterator object pointing to the word, or where it would be |
|
StardictIterator * |
|
stardict_dict_search (StardictDict *sd, const gchar *word, gboolean *success) |
|
{ |
|
GArray *index = sd->priv->index; |
|
|
|
BINARY_SEARCH_BEGIN (index->len - 1, |
|
stardict_dict_cmp_index (sd, word, imid)) |
|
|
|
// Back off to the first matching entry |
|
while (imid > 0 && !stardict_dict_cmp_index (sd, word, imid - 1)) |
|
imid--; |
|
|
|
if (success) *success = TRUE; |
|
return stardict_iterator_new (sd, imid); |
|
|
|
BINARY_SEARCH_END |
|
|
|
// Try to find a longer common prefix with a preceding entry. |
|
// We need to take care not to step through the entire dictionary |
|
// if not a single character matches, because it can be quite costly. |
|
size_t probe, best = prefix (sd, word, imin); |
|
while (best && imin > 0 && (probe = prefix (sd, word, imin - 1)) >= best) |
|
{ |
|
// TODO: take more care to not screw up exact matches, |
|
// use several "best"s according to quality |
|
// (the most severe issue here is ignored diacritics) |
|
if (!strcmp (word, g_array_index |
|
(index, StardictIndexEntry, imin).name)) |
|
break; |
|
|
|
best = probe; |
|
imin--; |
|
} |
|
|
|
if (success) *success = FALSE; |
|
return stardict_iterator_new (sd, imin); |
|
} |
|
|
|
/// Return the longest sequence of bytes from @a s1 that form a common prefix |
|
/// with @a s2 wrt. collation rules for this dictionary. |
|
size_t |
|
stardict_longest_common_collation_prefix (StardictDict *sd, |
|
const gchar *s1, const gchar *s2) |
|
{ |
|
UErrorCode error; |
|
int32_t uc1_len = 0; |
|
int32_t uc2_len = 0; |
|
|
|
// It sets the error to overflow each time, even during pre-flight |
|
error = U_ZERO_ERROR; |
|
u_strFromUTF8 (NULL, 0, &uc1_len, s1, -1, &error); |
|
error = U_ZERO_ERROR; |
|
u_strFromUTF8 (NULL, 0, &uc2_len, s2, -1, &error); |
|
error = U_ZERO_ERROR; |
|
|
|
UChar uc1[uc1_len]; |
|
UChar uc2[uc2_len]; |
|
u_strFromUTF8 (uc1, uc1_len, NULL, s1, -1, &error); |
|
u_strFromUTF8 (uc2, uc2_len, NULL, s2, -1, &error); |
|
|
|
// Both inputs need to be valid UTF-8 because of all the iteration mess |
|
if (U_FAILURE (error)) |
|
return 0; |
|
|
|
UCollator *collator = sd->priv->collator; |
|
if (!collator && !(collator = sd->priv->collator_root)) |
|
return 0; |
|
|
|
// ucol_getSortKey() can't be used for these purposes, so the only |
|
// reasonable thing remaining is iterating by full graphemes. It doesn't |
|
// work entirely correctly (e.g. Czech "ch" should be regarded as a single |
|
// unit). It's just good enough for most purposes. |
|
// |
|
// Locale shouldn't matter much with graphemes, let's use the default. |
|
UBreakIterator *it1 = |
|
ubrk_open (UBRK_CHARACTER, NULL, uc1, uc1_len, &error); |
|
UBreakIterator *it2 = |
|
ubrk_open (UBRK_CHARACTER, NULL, uc2, uc2_len, &error); |
|
|
|
UCollationStrength prev_strength = ucol_getStrength (collator); |
|
ucol_setStrength (collator, UCOL_PRIMARY); |
|
|
|
int32_t longest = 0; |
|
int32_t pos1, pos2; |
|
while ((pos1 = ubrk_next (it1)) != UBRK_DONE |
|
&& (pos2 = ubrk_next (it2)) != UBRK_DONE) |
|
{ |
|
if (!ucol_strcoll (collator, uc1, pos1, uc2, pos2)) |
|
longest = pos1; |
|
} |
|
ubrk_close (it1); |
|
ubrk_close (it2); |
|
|
|
ucol_setStrength (collator, prev_strength); |
|
if (!longest) |
|
return 0; |
|
|
|
int32_t common_len = 0; |
|
u_strToUTF8 (NULL, 0, &common_len, uc1, longest, &error); |
|
|
|
// Since this heavily depends on UTF-16 <-> UTF-8 not modifying the chars |
|
// (surrogate pairs interference?), let's add some paranoia here |
|
char common[common_len]; |
|
error = U_ZERO_ERROR; |
|
u_strToUTF8 (common, common_len, NULL, uc1, longest, &error); |
|
g_return_val_if_fail (!memcmp (s1, common, common_len), 0); |
|
|
|
return (size_t) common_len; |
|
} |
|
|
|
static void |
|
stardict_entry_field_free (StardictEntryField *sef) |
|
{ |
|
g_free (sef->data); |
|
g_slice_free1 (sizeof *sef, sef); |
|
} |
|
|
|
static StardictEntryField * |
|
read_entry (gchar type, const gchar **entry_iterator, |
|
const gchar *end, gboolean is_final) |
|
{ |
|
const gchar *entry = *entry_iterator; |
|
if (g_ascii_islower (type)) |
|
{ |
|
GString *data = g_string_new (NULL); |
|
|
|
if (is_final) |
|
{ |
|
g_string_append_len (data, entry, end - entry); |
|
entry += end - entry; |
|
} |
|
else |
|
{ |
|
gint c = EOF; |
|
while (entry < end && (c = *entry++)) |
|
g_string_append_c (data, c); |
|
|
|
if (c != '\0') |
|
return (gpointer) g_string_free (data, TRUE); |
|
} |
|
|
|
StardictEntryField *sef = g_slice_alloc (sizeof *sef); |
|
sef->type = type; |
|
sef->data_size = data->len + 1; |
|
sef->data = g_string_free (data, FALSE); |
|
*entry_iterator = entry; |
|
return sef; |
|
} |
|
|
|
gsize length; |
|
if (is_final) |
|
length = end - entry; |
|
else |
|
{ |
|
if (entry + sizeof (guint32) > end) |
|
return NULL; |
|
|
|
length = GUINT32_FROM_BE (*(guint32 *) entry); |
|
entry += sizeof (guint32); |
|
|
|
if (entry + length > end) |
|
return NULL; |
|
} |
|
|
|
StardictEntryField *sef = g_slice_alloc (sizeof *sef); |
|
sef->type = type; |
|
sef->data_size = length; |
|
sef->data = memcpy (g_malloc (length), entry, length); |
|
*entry_iterator = entry + length; |
|
return sef; |
|
} |
|
|
|
static GList * |
|
read_entries (const gchar *entry, gsize entry_size, GError **error) |
|
{ |
|
const gchar *end = entry + entry_size; |
|
GList *result = NULL; |
|
|
|
while (entry < end) |
|
{ |
|
gchar type = *entry++; |
|
StardictEntryField *sef = read_entry (type, &entry, end, FALSE); |
|
if (!sef) |
|
goto error; |
|
result = g_list_append (result, sef); |
|
} |
|
|
|
return result; |
|
|
|
error: |
|
g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, |
|
_("invalid data entry")); |
|
g_list_free_full (result, (GDestroyNotify) stardict_entry_field_free); |
|
return NULL; |
|
} |
|
|
|
static GList * |
|
read_entries_sts (const gchar *entry, gsize entry_size, |
|
const gchar *sts, GError **error) |
|
{ |
|
const gchar *end = entry + entry_size; |
|
GList *result = NULL; |
|
|
|
while (*sts) |
|
{ |
|
gchar type = *sts++; |
|
StardictEntryField *sef = read_entry (type, &entry, end, !*sts); |
|
if (!sef) |
|
goto error; |
|
result = g_list_append (result, sef); |
|
} |
|
|
|
return result; |
|
|
|
error: |
|
g_set_error (error, STARDICT_ERROR, STARDICT_ERROR_INVALID_DATA, |
|
_("invalid data entry")); |
|
g_list_free_full (result, (GDestroyNotify) stardict_entry_field_free); |
|
return NULL; |
|
} |
|
|
|
/// Read entry data from GInputStream. |
|
static gchar * |
|
read_entry_data_from_stream |
|
(GInputStream *stream, guint32 offset, StardictIndexEntry *sie) |
|
{ |
|
GError *error = NULL; |
|
if (!g_seekable_seek (G_SEEKABLE (stream), sie->data_offset, |
|
G_SEEK_SET, NULL, &error)) |
|
{ |
|
g_debug ("problem seeking to entry #%" |
|
G_GUINT32_FORMAT ": %s", offset, error->message); |
|
g_error_free (error); |
|
return NULL; |
|
} |
|
|
|
gchar *data = g_malloc (sie->data_size); |
|
gssize read = g_input_stream_read (stream, |
|
data, sie->data_size, NULL, &error); |
|
if (read < sie->data_size) |
|
{ |
|
if (error) |
|
{ |
|
g_debug ("problem reading entry #%" |
|
G_GUINT32_FORMAT ": %s", offset, error->message); |
|
g_error_free (error); |
|
} |
|
else |
|
g_debug ("probably overflowing entry #%" |
|
G_GUINT32_FORMAT, offset); |
|
|
|
g_free (data); |
|
return NULL; |
|
} |
|
return data; |
|
} |
|
|
|
/// Return the data for the specified offset in the index. Unsafe. |
|
static StardictEntry * |
|
stardict_dict_get_entry (StardictDict *sd, guint32 offset) |
|
{ |
|
// TODO maybe cache the entries, maybe don't hide the errors (also above) |
|
StardictDictPrivate *priv = sd->priv; |
|
StardictIndexEntry *sie = &g_array_index (priv->index, |
|
StardictIndexEntry, offset); |
|
GError *error = NULL; |
|
|
|
gchar *data; |
|
if (priv->dict_stream) |
|
{ |
|
data = read_entry_data_from_stream (priv->dict_stream, offset, sie); |
|
if (!data) |
|
return NULL; |
|
} |
|
else |
|
{ |
|
if (sie->data_offset + sie->data_size > priv->dict_length) |
|
{ |
|
g_debug ("overflowing entry #%" G_GUINT32_FORMAT, offset); |
|
return NULL; |
|
} |
|
data = priv->dict + sie->data_offset; |
|
} |
|
|
|
GList *entries; |
|
if (priv->info->same_type_sequence) |
|
entries = read_entries_sts (data, sie->data_size, |
|
priv->info->same_type_sequence, &error); |
|
else |
|
entries = read_entries (data, sie->data_size, &error); |
|
|
|
if (error) |
|
{ |
|
g_debug ("problem processing entry #%" |
|
G_GUINT32_FORMAT ": %s", offset, error->message); |
|
g_error_free (error); |
|
} |
|
if (priv->dict_stream) |
|
g_free (data); |
|
if (!entries) |
|
return NULL; |
|
|
|
StardictEntry *se = g_object_new (STARDICT_TYPE_ENTRY, NULL); |
|
se->fields = entries; |
|
return se; |
|
} |
|
|
|
// --- StardictEntry ----------------------------------------------------------- |
|
|
|
G_DEFINE_TYPE (StardictEntry, stardict_entry, G_TYPE_OBJECT) |
|
|
|
static void |
|
stardict_entry_finalize (GObject *self) |
|
{ |
|
StardictEntry *sde = STARDICT_ENTRY (self); |
|
|
|
g_list_free_full (sde->fields, (GDestroyNotify) stardict_entry_field_free); |
|
|
|
G_OBJECT_CLASS (stardict_entry_parent_class)->finalize (self); |
|
} |
|
|
|
static void |
|
stardict_entry_class_init (StardictEntryClass *klass) |
|
{ |
|
G_OBJECT_CLASS (klass)->finalize = stardict_entry_finalize; |
|
} |
|
|
|
static void |
|
stardict_entry_init (G_GNUC_UNUSED StardictEntry *sde) |
|
{ |
|
} |
|
|
|
/// Return the entries present within the entry. |
|
/// @return GList<StardictEntryField *> |
|
const GList * |
|
stardict_entry_get_fields (StardictEntry *sde) |
|
{ |
|
g_return_val_if_fail (STARDICT_IS_ENTRY (sde), NULL); |
|
return sde->fields; |
|
} |
|
|
|
// --- StardictIterator--------------------------------------------------------- |
|
|
|
G_DEFINE_TYPE (StardictIterator, stardict_iterator, G_TYPE_OBJECT) |
|
|
|
static void |
|
stardict_iterator_finalize (GObject *self) |
|
{ |
|
StardictIterator *si = STARDICT_ITERATOR (self); |
|
|
|
g_object_unref (si->owner); |
|
|
|
G_OBJECT_CLASS (stardict_iterator_parent_class)->finalize (self); |
|
} |
|
|
|
static void |
|
stardict_iterator_class_init (StardictIteratorClass *klass) |
|
{ |
|
G_OBJECT_CLASS (klass)->finalize = stardict_iterator_finalize; |
|
} |
|
|
|
static void |
|
stardict_iterator_init (G_GNUC_UNUSED StardictIterator *sd) |
|
{ |
|
} |
|
|
|
/// Create a new iterator for the dictionary with offset @a offset. |
|
StardictIterator * |
|
stardict_iterator_new (StardictDict *sd, guint32 offset) |
|
{ |
|
g_return_val_if_fail (STARDICT_IS_DICT (sd), NULL); |
|
|
|
StardictIterator *si = g_object_new (STARDICT_TYPE_ITERATOR, NULL); |
|
si->owner = g_object_ref (sd); |
|
si->offset = offset; |
|
return si; |
|
} |
|
|
|
/// Return the word in the index that the iterator points at, or NULL. |
|
const gchar * |
|
stardict_iterator_get_word (StardictIterator *sdi) |
|
{ |
|
g_return_val_if_fail (STARDICT_IS_ITERATOR (sdi), NULL); |
|
if (!stardict_iterator_is_valid (sdi)) |
|
return NULL; |
|
return g_array_index (sdi->owner->priv->index, |
|
StardictIndexEntry, sdi->offset).name; |
|
} |
|
|
|
/// Return the dictionary entry that the iterator points at, or NULL. |
|
StardictEntry * |
|
stardict_iterator_get_entry (StardictIterator *sdi) |
|
{ |
|
g_return_val_if_fail (STARDICT_IS_ITERATOR (sdi), NULL); |
|
if (!stardict_iterator_is_valid (sdi)) |
|
return NULL; |
|
return stardict_dict_get_entry (sdi->owner, sdi->offset); |
|
} |
|
|
|
/// Return whether the iterator points to a valid index entry. |
|
gboolean |
|
stardict_iterator_is_valid (StardictIterator *sdi) |
|
{ |
|
g_return_val_if_fail (STARDICT_IS_ITERATOR (sdi), FALSE); |
|
return sdi->offset >= 0 && sdi->offset < sdi->owner->priv->index->len; |
|
} |
|
|
|
/// Return the offset of the iterator within the dictionary index. |
|
gint64 |
|
stardict_iterator_get_offset (StardictIterator *sdi) |
|
{ |
|
g_return_val_if_fail (STARDICT_IS_ITERATOR (sdi), -1); |
|
return sdi->offset; |
|
} |
|
|
|
/// Set the offset of the iterator. |
|
void |
|
stardict_iterator_set_offset |
|
(StardictIterator *sdi, gint64 offset, gboolean relative) |
|
{ |
|
g_return_if_fail (STARDICT_IS_ITERATOR (sdi)); |
|
sdi->offset = relative ? sdi->offset + offset : offset; |
|
}
|
|
|