Add sample dictionary downloaders/builders
This commit is contained in:
21
dicts/gnu-fdl-en-cz.sh
Executable file
21
dicts/gnu-fdl-en-cz.sh
Executable file
@@ -0,0 +1,21 @@
|
||||
#!/bin/sh -e
|
||||
# GNU/FDL English-Czech dictionary, see https://www.svobodneslovniky.cz/
|
||||
curl -Lo- https://www.svobodneslovniky.cz/data/en-cs.txt.gz | \
|
||||
zcat | grep -v ^# | sed 's/\\//g' | perl -CSD -F\\t -le '
|
||||
sub e { shift =~ s/\\/\\\\/gr =~ s/\n/\\n/gr =~ s/\t/\\t/gr }
|
||||
sub w {
|
||||
open(my $f, "|-", "tabfile gnu-fdl-$_[0]") or die $!;
|
||||
print $f e($k) . "\t" . e(join("\n", @$v))
|
||||
while ($k, $v) = each %{$_[1]};
|
||||
close($f);
|
||||
}
|
||||
my ($en, $cz, $notes, $special, $translator) = @F;
|
||||
if ($cz) {
|
||||
$notes =~ s/\w+:\s?//g; # remove word classes
|
||||
$notes =~ s/(\w+\.)(?!])/($1)/; # quote "pl."
|
||||
push(@{$encz{$en}}, $notes ? "$cz " . $notes : $cz);
|
||||
push(@{$czen{$cz}}, $notes ? "$en " . $notes : $en);
|
||||
} END {
|
||||
w("en-cz", \%encz);
|
||||
w("cz-en", \%czen);
|
||||
}'
|
||||
Reference in New Issue
Block a user