Add sample dictionary downloaders/builders
This commit is contained in:
parent
3881725904
commit
ed8b1bcdad
|
@ -172,6 +172,21 @@ endforeach ()
|
|||
|
||||
add_custom_target (tools DEPENDS ${tools})
|
||||
|
||||
# Example dictionaries
|
||||
file (GLOB dicts_scripts "${PROJECT_SOURCE_DIR}/dicts/*.sh")
|
||||
set (dicts_targets)
|
||||
foreach (dict_script ${dicts_scripts})
|
||||
get_filename_component (dict_name "${dict_script}" NAME_WE)
|
||||
list (APPEND dicts_targets "dicts-${dict_name}")
|
||||
add_custom_target (dicts-${dict_name}
|
||||
COMMAND sh -c "PATH=.:$PATH \"$0\"" "${dict_script}"
|
||||
DEPENDS tabfile
|
||||
COMMENT "Generating sample dictionary ${dict_name}"
|
||||
VERBATIM)
|
||||
endforeach ()
|
||||
|
||||
add_custom_target (dicts DEPENDS ${dicts_targets})
|
||||
|
||||
# The files to be installed
|
||||
include (GNUInstallDirs)
|
||||
install (TARGETS ${PROJECT_NAME} DESTINATION ${CMAKE_INSTALL_BINDIR})
|
||||
|
@ -212,4 +227,3 @@ set (CPACK_SOURCE_IGNORE_FILES "/\\\\.git;/build;/CMakeLists.txt.user")
|
|||
set (CPACK_SOURCE_PACKAGE_FILE_NAME "${PROJECT_NAME}-${PROJECT_VERSION}")
|
||||
|
||||
include (CPack)
|
||||
|
||||
|
|
|
@ -101,13 +101,14 @@ Dictionaries
|
|||
Unfortunately this application only really works with specific dictionaries.
|
||||
Word definitions have to be in plain text, separated by newlines.
|
||||
|
||||
The `make dicts` command will build some examples from freely available sources.
|
||||
|
||||
You may use the included transform tool to transform existing dictionaries that
|
||||
are almost useful as they are, e.g. after stripping XML tags. You might want to
|
||||
fix up the `sametypesequence` of the resulting '.ifo' file afterwards, and run
|
||||
dictzip on the resulting '.dict' file.
|
||||
|
||||
https://mega.co.nz/#!axtD0QRK!sbtBgizksyfkPqKvKEgr8GQ11rsWhtqyRgUUV0B7pwg[
|
||||
CZ <--> { EN, DE, PL, RU } dictionaries]
|
||||
https://mega.co.nz/#!axtD0QRK!sbtBgizksyfkPqKvKEgr8GQ11rsWhtqyRgUUV0B7pwg[CZ <--> EN/DE/PL/RU dictionaries]
|
||||
|
||||
Contributing and Support
|
||||
------------------------
|
||||
|
|
|
@ -0,0 +1,21 @@
|
|||
#!/bin/sh -e
|
||||
# GNU/FDL English-Czech dictionary, see https://www.svobodneslovniky.cz/
|
||||
curl -Lo- https://www.svobodneslovniky.cz/data/en-cs.txt.gz | \
|
||||
zcat | grep -v ^# | sed 's/\\//g' | perl -CSD -F\\t -le '
|
||||
sub e { shift =~ s/\\/\\\\/gr =~ s/\n/\\n/gr =~ s/\t/\\t/gr }
|
||||
sub w {
|
||||
open(my $f, "|-", "tabfile gnu-fdl-$_[0]") or die $!;
|
||||
print $f e($k) . "\t" . e(join("\n", @$v))
|
||||
while ($k, $v) = each %{$_[1]};
|
||||
close($f);
|
||||
}
|
||||
my ($en, $cz, $notes, $special, $translator) = @F;
|
||||
if ($cz) {
|
||||
$notes =~ s/\w+:\s?//g; # remove word classes
|
||||
$notes =~ s/(\w+\.)(?!])/($1)/; # quote "pl."
|
||||
push(@{$encz{$en}}, $notes ? "$cz " . $notes : $cz);
|
||||
push(@{$czen{$cz}}, $notes ? "$en " . $notes : $en);
|
||||
} END {
|
||||
w("en-cz", \%encz);
|
||||
w("cz-en", \%czen);
|
||||
}'
|
|
@ -0,0 +1,8 @@
|
|||
#!/bin/sh -e
|
||||
# Slovník cizích slov, see https://slovnik-cizich-slov.abz.cz/web.php/o-slovniku
|
||||
# TODO: Skipping the optional pronunciation field, tabfile can't handle it yet,
|
||||
# but could be made to accept a lowercase sametypesequence
|
||||
curl -Lo- https://slovnik-cizich-slov.abz.cz/export.php | \
|
||||
iconv -f latin2 -t UTF-8 | perl -CSD -F\\\| -le '
|
||||
print "$_\t" . $F[2] =~ s/\\/\\\\/gr =~ s/; /\\n/gr for split(", ", $F[0])
|
||||
' | sort -u | tabfile slovnik-cizich-slov
|
Loading…
Reference in New Issue