From f812fae922eec06235c9e566b78c7f0fb46a709b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C5=99emysl=20Eric=20Janouch?= Date: Wed, 13 Oct 2021 23:57:36 +0200 Subject: [PATCH] Add the GNU/FDL German-Czech dictionary to dicts But only build it with WANT_BAD_DICTS set to non-null. --- dicts/gnu-fdl-de-cz.sh | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100755 dicts/gnu-fdl-de-cz.sh diff --git a/dicts/gnu-fdl-de-cz.sh b/dicts/gnu-fdl-de-cz.sh new file mode 100755 index 0000000..ffe56a4 --- /dev/null +++ b/dicts/gnu-fdl-de-cz.sh @@ -0,0 +1,34 @@ +#!/bin/sh -e +# GNU/FDL German-Czech dictionary, see https://gnu.nemeckoceskyslovnik.cz + +# Sometimes the domain doesn't resolve, and the contents are close to useless +[ -n "$WANT_BAD_DICTS" ] || exit + +curl -Lo- 'https://gnu.nemeckoceskyslovnik.cz/index.php?id=6&sablona=export&format=zcu' | \ +grep -v ^# | sed 's/\\//g' | perl -CSD -F\\t -le ' + sub tabesc { shift =~ s/\\/\\\\/gr =~ s/\n/\\n/gr =~ s/\t/\\t/gr } + sub w { + my ($name, $dict, $collation) = @_; + open(my $f, "|-", "tabfile", "--pango", "--collation=$collation", + "--website=https://gnu.nemeckoceskyslovnik.cz", + "gnu-fdl-$name") or die $!; + print $f tabesc($keyword) . "\t" . tabesc(join("\n", @$defs)) + while ($keyword, $defs) = each %{$dict}; + close($f); + } + sub xmlesc { shift =~ s/&/&/gr =~ s//>/gr } + sub entry { + my ($definition, $notes) = map {xmlesc($_)} @_; + $notes ? "$definition $notes" : $definition; + } + next if !$_ .. 0; + my ($de, $cs, $notes, $special, $translator) = @F; + if ($cs) { + $notes =~ s/\w+:\s?//g; # remove word classes + $notes =~ s/(\w+\.)(?!])/($1)/; # quote "pl." + push(@{$decs{$de}}, entry($cs, $notes)); + push(@{$csde{$cs}}, entry($de, $notes)); + } END { + w("de-cz", \%decs, "de"); + w("cz-de", \%csde, "cs"); + }'