Add sample dictionary downloaders/builders
This commit is contained in:
		
							
								
								
									
										21
									
								
								dicts/gnu-fdl-en-cz.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										21
									
								
								dicts/gnu-fdl-en-cz.sh
									
									
									
									
									
										Executable file
									
								
							@@ -0,0 +1,21 @@
 | 
			
		||||
#!/bin/sh -e
 | 
			
		||||
# GNU/FDL English-Czech dictionary, see https://www.svobodneslovniky.cz/
 | 
			
		||||
curl -Lo- https://www.svobodneslovniky.cz/data/en-cs.txt.gz | \
 | 
			
		||||
zcat | grep -v ^# | sed 's/\\//g' | perl -CSD -F\\t -le '
 | 
			
		||||
	sub e { shift =~ s/\\/\\\\/gr =~ s/\n/\\n/gr =~ s/\t/\\t/gr }
 | 
			
		||||
	sub w {
 | 
			
		||||
		open(my $f, "|-", "tabfile gnu-fdl-$_[0]") or die $!;
 | 
			
		||||
		print $f e($k) . "\t" . e(join("\n", @$v))
 | 
			
		||||
			while ($k, $v) = each %{$_[1]};
 | 
			
		||||
		close($f);
 | 
			
		||||
	}
 | 
			
		||||
	my ($en, $cz, $notes, $special, $translator) = @F;
 | 
			
		||||
	if ($cz) {
 | 
			
		||||
		$notes =~ s/\w+:\s?//g;          # remove word classes
 | 
			
		||||
		$notes =~ s/(\w+\.)(?!])/($1)/;  # quote "pl."
 | 
			
		||||
		push(@{$encz{$en}}, $notes ? "$cz " . $notes : $cz);
 | 
			
		||||
		push(@{$czen{$cz}}, $notes ? "$en " . $notes : $en);
 | 
			
		||||
	} END {
 | 
			
		||||
		w("en-cz", \%encz);
 | 
			
		||||
		w("cz-en", \%czen);
 | 
			
		||||
	}'
 | 
			
		||||
							
								
								
									
										8
									
								
								dicts/slovnik-cizich-slov.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										8
									
								
								dicts/slovnik-cizich-slov.sh
									
									
									
									
									
										Executable file
									
								
							@@ -0,0 +1,8 @@
 | 
			
		||||
#!/bin/sh -e
 | 
			
		||||
# Slovník cizích slov, see https://slovnik-cizich-slov.abz.cz/web.php/o-slovniku
 | 
			
		||||
# TODO: Skipping the optional pronunciation field, tabfile can't handle it yet,
 | 
			
		||||
# but could be made to accept a lowercase sametypesequence
 | 
			
		||||
curl -Lo- https://slovnik-cizich-slov.abz.cz/export.php | \
 | 
			
		||||
iconv -f latin2 -t UTF-8 | perl -CSD -F\\\| -le '
 | 
			
		||||
	print "$_\t" . $F[2] =~ s/\\/\\\\/gr =~ s/; /\\n/gr for split(", ", $F[0])
 | 
			
		||||
' | sort -u | tabfile slovnik-cizich-slov
 | 
			
		||||
		Reference in New Issue
	
	Block a user