Compare commits
No commits in common. "6c364dc99766bfd3bd86cd262db37b1a766dca1e" and "03f2123447b1a5537db1cdde016eb87eb2d2371f" have entirely different histories.
6c364dc997
...
03f2123447
@ -163,7 +163,7 @@ if (gtk_FOUND)
|
|||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
# Tools
|
# Tools
|
||||||
set (tools tabfile add-pronunciation query-tool transform)
|
set (tools add-pronunciation query-tool transform)
|
||||||
foreach (tool ${tools})
|
foreach (tool ${tools})
|
||||||
add_executable (${tool} EXCLUDE_FROM_ALL
|
add_executable (${tool} EXCLUDE_FROM_ALL
|
||||||
src/${tool}.c ${project_common_sources})
|
src/${tool}.c ${project_common_sources})
|
||||||
|
@ -867,12 +867,11 @@ stardict_dict_cmp_synonym (StardictDict *sd, const gchar *word, gint i)
|
|||||||
g_array_index (synonyms, StardictSynonymEntry, i).word);
|
g_array_index (synonyms, StardictSynonymEntry, i).word);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return words of which the argument is a synonym or NULL
|
/// Return words for which the argument is a synonym of or NULL
|
||||||
/// if there are no such words.
|
/// if there are no such words.
|
||||||
gchar **
|
gchar **
|
||||||
stardict_dict_get_synonyms (StardictDict *sd, const gchar *word)
|
stardict_dict_get_synonyms (StardictDict *sd, const gchar *word)
|
||||||
{
|
{
|
||||||
GArray *collated = sd->priv->collated_synonyms;
|
|
||||||
GArray *synonyms = sd->priv->synonyms;
|
GArray *synonyms = sd->priv->synonyms;
|
||||||
GArray *index = sd->priv->index;
|
GArray *index = sd->priv->index;
|
||||||
|
|
||||||
@ -880,32 +879,26 @@ stardict_dict_get_synonyms (StardictDict *sd, const gchar *word)
|
|||||||
stardict_dict_cmp_synonym (sd, word, imid))
|
stardict_dict_cmp_synonym (sd, word, imid))
|
||||||
|
|
||||||
// Back off to the first matching entry
|
// Back off to the first matching entry
|
||||||
while (imid > 0 && !stardict_dict_cmp_synonym (sd, word, imid - 1))
|
while (imid > 0 && !stardict_dict_cmp_synonym (sd, word, --imid))
|
||||||
imid--;
|
;
|
||||||
|
|
||||||
GPtrArray *array = g_ptr_array_new ();
|
GPtrArray *array = g_ptr_array_new ();
|
||||||
|
|
||||||
// And add all matching entries from that position on to the array
|
// And add all matching entries from that position on to the array
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
guint32 i = sd->priv->collator
|
guint32 i = g_array_index
|
||||||
? g_array_index (synonyms, StardictSynonymEntry,
|
(synonyms, StardictSynonymEntry, ++imid).original_word;
|
||||||
g_array_index (collated, guint32, imid)).original_word
|
|
||||||
: g_array_index (synonyms, StardictSynonymEntry,
|
|
||||||
imid).original_word;
|
|
||||||
|
|
||||||
// When we use a collator this will point to the original entry,
|
// When we use a collator this will point to the original entry,
|
||||||
// otherwise it points to itself and this changes nothing
|
// otherwise it points to itself and this changes nothing
|
||||||
i = g_array_index (sd->priv->index, StardictIndexEntry,
|
i = g_array_index
|
||||||
i).reverse_index;
|
(sd->priv->index, StardictIndexEntry, i).reverse_index;
|
||||||
|
g_ptr_array_add (array, g_strdup (g_array_index
|
||||||
g_ptr_array_add (array,
|
(index, StardictIndexEntry, i).name));
|
||||||
g_strdup (g_array_index (index, StardictIndexEntry, i).name));
|
|
||||||
}
|
}
|
||||||
while ((guint) ++imid < synonyms->len
|
while ((guint) imid < synonyms->len - 1 && !stardict_strcmp (word,
|
||||||
&& !stardict_dict_cmp_synonym (sd, word, imid));
|
g_array_index (synonyms, StardictSynonymEntry, imid + 1).word));
|
||||||
|
|
||||||
g_ptr_array_add (array, NULL);
|
|
||||||
return (gchar **) g_ptr_array_free (array, FALSE);
|
return (gchar **) g_ptr_array_free (array, FALSE);
|
||||||
|
|
||||||
BINARY_SEARCH_END
|
BINARY_SEARCH_END
|
||||||
|
163
src/tabfile.c
163
src/tabfile.c
@ -1,163 +0,0 @@
|
|||||||
/*
|
|
||||||
* A clean reimplementation of StarDict's tabfile
|
|
||||||
*
|
|
||||||
* Copyright (c) 2020 - 2021, Přemysl Eric Janouch <p@janouch.name>
|
|
||||||
*
|
|
||||||
* Permission to use, copy, modify, and/or distribute this software for any
|
|
||||||
* purpose with or without fee is hereby granted.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
||||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
||||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
|
||||||
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
||||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
|
|
||||||
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
|
||||||
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <errno.h>
|
|
||||||
#include <locale.h>
|
|
||||||
|
|
||||||
#include <glib.h>
|
|
||||||
#include <gio/gio.h>
|
|
||||||
|
|
||||||
#include "stardict.h"
|
|
||||||
#include "stardict-private.h"
|
|
||||||
#include "generator.h"
|
|
||||||
#include "utils.h"
|
|
||||||
|
|
||||||
static gboolean
|
|
||||||
set_data_error (GError **error, const char *message)
|
|
||||||
{
|
|
||||||
g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_INVALID_DATA, message);
|
|
||||||
return FALSE;
|
|
||||||
}
|
|
||||||
|
|
||||||
static const gchar escapes[256] = { ['n'] = '\n', ['t'] = '\t', ['\\'] = '\\' };
|
|
||||||
|
|
||||||
static gboolean
|
|
||||||
inplace_unescape (char *line, GError **error)
|
|
||||||
{
|
|
||||||
gboolean escape = FALSE;
|
|
||||||
char *dest = line;
|
|
||||||
for (char *src = line; *src; src++)
|
|
||||||
{
|
|
||||||
if (escape)
|
|
||||||
{
|
|
||||||
escape = FALSE;
|
|
||||||
if (!(*dest++ = escapes[(guchar) *src]))
|
|
||||||
return set_data_error (error, "unsupported escape");
|
|
||||||
}
|
|
||||||
else if (*src == '\\')
|
|
||||||
escape = TRUE;
|
|
||||||
else
|
|
||||||
*dest++ = *src;
|
|
||||||
}
|
|
||||||
if (escape)
|
|
||||||
return set_data_error (error, "trailing escape character");
|
|
||||||
|
|
||||||
*dest = 0;
|
|
||||||
return TRUE;
|
|
||||||
}
|
|
||||||
|
|
||||||
static gboolean
|
|
||||||
import_line (Generator *generator, char *line, size_t len, GError **error)
|
|
||||||
{
|
|
||||||
if (!len)
|
|
||||||
return TRUE;
|
|
||||||
|
|
||||||
char *separator = strchr (line, '\t');
|
|
||||||
if (!separator)
|
|
||||||
return set_data_error (error, "keyword separator not found");
|
|
||||||
|
|
||||||
*separator++ = 0;
|
|
||||||
if (strchr (line, '\\'))
|
|
||||||
// The index wouldn't be sorted correctly with our method
|
|
||||||
return set_data_error (error, "escapes not allowed in keywords");
|
|
||||||
|
|
||||||
char *newline = strpbrk (separator, "\r\n");
|
|
||||||
if (newline)
|
|
||||||
*newline = 0;
|
|
||||||
|
|
||||||
if (!inplace_unescape (line, error)
|
|
||||||
|| !inplace_unescape (separator, error))
|
|
||||||
return FALSE;
|
|
||||||
|
|
||||||
generator_begin_entry (generator);
|
|
||||||
return generator_write_string (generator, separator, TRUE, error)
|
|
||||||
&& generator_finish_entry (generator, line, error);
|
|
||||||
}
|
|
||||||
|
|
||||||
static gboolean
|
|
||||||
transform (FILE *fsorted, Generator *generator, GError **error)
|
|
||||||
{
|
|
||||||
char *line = NULL;
|
|
||||||
size_t size = 0, ln = 1;
|
|
||||||
for (ssize_t read; (read = getline (&line, &size, fsorted)) >= 0; ln++)
|
|
||||||
if (!import_line (generator, line, read, error))
|
|
||||||
break;
|
|
||||||
|
|
||||||
free (line);
|
|
||||||
if (ferror (fsorted))
|
|
||||||
{
|
|
||||||
g_set_error_literal (error, G_IO_ERROR,
|
|
||||||
g_io_error_from_errno (errno), g_strerror (errno));
|
|
||||||
return FALSE;
|
|
||||||
}
|
|
||||||
if (!feof (fsorted))
|
|
||||||
{
|
|
||||||
// You'll only get good line number output with presorted input!
|
|
||||||
g_prefix_error (error, "line %zu: ", ln);
|
|
||||||
return FALSE;
|
|
||||||
}
|
|
||||||
return TRUE;
|
|
||||||
}
|
|
||||||
|
|
||||||
int
|
|
||||||
main (int argc, char *argv[])
|
|
||||||
{
|
|
||||||
// The GLib help includes an ellipsis character, for some reason
|
|
||||||
(void) setlocale (LC_ALL, "");
|
|
||||||
|
|
||||||
GError *error = NULL;
|
|
||||||
GOptionContext *ctx = g_option_context_new ("output-basename < input");
|
|
||||||
g_option_context_set_summary (ctx,
|
|
||||||
"Create a StarDict dictionary from plaintext.");
|
|
||||||
if (!g_option_context_parse (ctx, &argc, &argv, &error))
|
|
||||||
fatal ("Error: option parsing failed: %s\n", error->message);
|
|
||||||
|
|
||||||
if (argc != 2)
|
|
||||||
fatal ("%s", g_option_context_get_help (ctx, TRUE, FALSE));
|
|
||||||
g_option_context_free (ctx);
|
|
||||||
|
|
||||||
// This actually implements stardict_strcmp(), POSIX-compatibly.
|
|
||||||
// Your sort(1) is not expected to be stable by default, like bsdsort is.
|
|
||||||
FILE *fsorted = popen ("LC_ALL=C sort -t'\t' -k1f,1", "r");
|
|
||||||
if (!fsorted)
|
|
||||||
fatal ("%s: %s\n", "popen", g_strerror (errno));
|
|
||||||
|
|
||||||
Generator *generator = generator_new (argv[1], &error);
|
|
||||||
if (!generator)
|
|
||||||
fatal ("Error: failed to create the output dictionary: %s\n",
|
|
||||||
error->message);
|
|
||||||
|
|
||||||
StardictInfo *info = generator->info;
|
|
||||||
info->version = SD_VERSION_3_0_0;
|
|
||||||
info->book_name = g_strdup (argv[1]);
|
|
||||||
info->same_type_sequence = g_strdup ("m");
|
|
||||||
|
|
||||||
// This gets incremented each time an entry is finished
|
|
||||||
info->word_count = 0;
|
|
||||||
|
|
||||||
if (!transform (fsorted, generator, &error)
|
|
||||||
|| !generator_finish (generator, &error))
|
|
||||||
fatal ("Error: failed to write the dictionary: %s\n", error->message);
|
|
||||||
|
|
||||||
generator_free (generator);
|
|
||||||
fclose (fsorted);
|
|
||||||
return 0;
|
|
||||||
}
|
|
@ -3,7 +3,7 @@
|
|||||||
*
|
*
|
||||||
* The external filter needs to process NUL-separated textual entries.
|
* The external filter needs to process NUL-separated textual entries.
|
||||||
*
|
*
|
||||||
* Example: transform input.ifo output -- perl -p0e s/bullshit/soykaf/g
|
* Example: transform input.info output -- perl -p0e s/bullshit/soykaf/g
|
||||||
*
|
*
|
||||||
* Copyright (c) 2020, Přemysl Eric Janouch <p@janouch.name>
|
* Copyright (c) 2020, Přemysl Eric Janouch <p@janouch.name>
|
||||||
*
|
*
|
||||||
|
Loading…
x
Reference in New Issue
Block a user