Add a stupid AsciiDoc to manual page converter

Most of my projects that could need it make use of liberty.
2022-09-25 20:07:10 +02:00
parent 22a121383f
commit 9883caf849
1 changed files with 231 additions and 0 deletions
--- a/tools/asciiman.awk
+++ b/tools/asciiman.awk
@@ -0,0 +1,231 @@
 # asciiman.awk: stupid AsciiDoc to manual page converter
 #
 # Copyright (c) 2022, Přemysl Eric Janouch <p@janouch.name>
 # SPDX-License-Identifier: 0BSD
 #
 # This is not intended to produce great output, merely useful output.
 # As such, input documents should restrict themselves as follows:
 #
 #  - Attributes cannot be passed on the command line.
 #  - In-line formatting sequences must not overlap,
 #    cannot be escaped, and cannot span lines.
 #  - Heading underlines must match in byte length exactly.
 #  - Only a small subset of syntax is supported overall.
 #
 # Also beware that the output has only been tested with GNU troff.
 function fatal(message) {
 	print ".\\\" " FILENAME ":" FNR ": fatal error: " message
 	print FILENAME ":" FNR ": fatal error: " message > "/dev/stderr"
 	exit 1
 }
 function expand(s,   attr) {
 	# TODO: This should not expand unknown attribute names.
 	while (match(s, /[{][^{}]*[}]/)) {
 		attr = substr(s, RSTART + 1, RLENGTH - 2)
 		s = substr(s, 1, RSTART - 1) Attrs[attr] substr(s, RSTART + RLENGTH)
 	}
 	return s
 }
 function escape(s) {
 	gsub(/\\/, "\\\\", s)
 	gsub(/-/, "\\-", s)
 	gsub(/[.]/, "\\.", s)
 	return s
 }
 function readattribute(line,    attrname, attrvalue) {
 	if (match(line, /^:[^:]*: /)) {
 		attrname = substr(line, RSTART + 1, RLENGTH - 3)
 		attrvalue = substr(line, RSTART + RLENGTH)
 		Attrs[attrname] = expand(attrvalue)
 		return 1
 	}
 }
 NR == 1 {
 	nameline = $0
 	if (match(nameline, /[(][[:digit:]][)]$/)) {
 		name = substr(nameline, 1, RSTART - 1)
 		section = substr(nameline, RSTART + 1, RLENGTH - 2)
 	} else {
 		fatal("invalid header line")
 	}
 	getline
 	if (length(nameline) != length($0) || /[^=]/)
 		fatal("invalid header underline")
 	getline
 	while (readattribute($0))
 		getline
 	if ($0)
 		fatal("expected an empty line after the header")
 	# Requesting tbl(1), even though we currently do not support tables.
 	print "'\\\\"" t"
 	print ".TH \"" toupper(name) "\" \"" section "\""
 	# Hyphenation is indeed rather annoying, in particular with long links.
 	print ".nh"
 }
 function inline(line) {
 	if (!line) {
 		print ".sp"
 		return
 	}
 	line = escape(expand(line))
 	# Enable double-spacing after the end of a sentence.
 	gsub(/\\[.][[:space:]]+/, ".\n", s)
 	# Strip empty URL descriptions, otherwise useful for demarking the end.
 	while (match(line, /[^[:space:]]+\[\]/)) {
 		line = substr(line, 1, RSTART + RLENGTH - 3) \
 			 substr(line, RSTART + RLENGTH)
 	}
 	# Pass-through, otherwise useful for hacks, is a lie here.
 	while (match(line, /[+][+][+][^+]+[+][+][+]/)) {
 		line = substr(line, 1, RSTART - 1) \
 			 substr(line, RSTART + 3, RLENGTH - 6) \
 			 substr(line, RSTART + RLENGTH)
 	}
 	# Italic and bold formatting doesn't respect any word boundaries.
 	while (match(line, /__[^_]+__/)) {
 		line = substr(line, 1, RSTART - 1) \
 			 "\\fI" substr(line, RSTART + 2, RLENGTH - 4) "\\fP" \
 			 substr(line, RSTART + RLENGTH)
 	}
 	while (match(line, /_[^_]+_/)) {
 		line = substr(line, 1, RSTART - 1) \
 			 "\\fI" substr(line, RSTART + 1, RLENGTH - 2) "\\fP" \
 			 substr(line, RSTART + RLENGTH)
 	}
 	while (match(line, /[*][*][^*]+[*][*]/)) {
 		line = substr(line, 1, RSTART - 1) \
 			 "\\fB" substr(line, RSTART + 2, RLENGTH - 4) "\\fP" \
 			 substr(line, RSTART + RLENGTH)
 	}
 	while (match(line, /[*][^*]+[*]/)) {
 		line = substr(line, 1, RSTART - 1) \
 			 "\\fB" substr(line, RSTART + 1, RLENGTH - 2) "\\fP" \
 			 substr(line, RSTART + RLENGTH)
 	}
 	sub(/[[:space:]]+[+]$/, "\n.br", line)
 	print line
 }
 # Returns 1 iff the left-over $0 should be processed further.
 function process(firstline) {
 	if (readattribute(firstline))
 		return 0
 	if (getline <= 0) {
 		inline(firstline)
 		return 0
 	}
 	if (length(firstline) == length($0) && /^-+$/) {
 		print ".SH \"" escape(toupper(expand(firstline))) "\""
 		return 0
 	}
 	if (length(firstline) == length($0) && /^~+$/) {
 		print ".SS \"" escape(expand(firstline)) "\""
 		return 0
 	}
 	if (firstline ~ /^(-{4,}|[.]{4,})$/) {
 		print ".if n .RS 4"
 		print ".nf"
 		print ".fam C"
 		do {
 			print escape($0)
 		} while (getline > 0 && $0 != firstline)
 		print ".fam"
 		print ".fi"
 		print ".if n .RE"
 		return 0
 	}
 	if (firstline ~ /^\/{4,}$/) {
 		do {
 			print ".\\\" " $0
 		} while (getline > 0 && $0 != firstline)
 		return 0
 	}
 	if (match(firstline, /^\/\//)) {
 		print ".\\\" " firstline
 		return 1
 	}
 	# We generally assume these block end with a blank line.
 	if (match(firstline, /^[[:space:]]*[*][[:space:]]+/)) {
 		# Bullet magic copied over from AsciiDoc/Asciidoctor generators.
 		print ".RS 4"
 		print ".ie n \\{\\"
 		print "\\h'-04'\\(bu\\h'+03'\\c"
 		print ".\\}"
 		print ".el \\{\\"
 		print ".sp -1"
 		print ".IP \\(bu 2.3"
 		print ".\\}"
 		inline(substr(firstline, RSTART + RLENGTH))
 		while ($0) {
 			sub(/^[[:space:]]+/, "")
 			sub(/^[+]$/, "")
 			if (!process($0) && getline <= 0)
 				fatal("unexpected EOF")
 			if (match($0, /^[[:space:]]*[*][[:space:]]+/))
 				break
 		}
 		print ".RE"
 		print ".sp"
 		return !!$0
 	}
 	if (match(firstline, /^[[:space:]]+/)) {
 		print ".if n .RS 4"
 		print ".nf"
 		print ".fam C"
 		do {
 			print escape(substr(firstline, RLENGTH + 1))
 			firstline = $0
 		} while ($0 && getline > 0)
 		print ".fam"
 		print ".fi"
 		print ".if n .RE"
 		return 1
 	}
 	if (match(firstline, /::$/)) {
 		inline(substr(firstline, 1, RSTART - 1))
 		while (match($0, /::$/)) {
 			print ".br"
 			inline(substr($0, 1, RSTART - 1))
 			if (getline <= 0)
 				fatal("unexpected EOF")
 		}
 		print ".RS 4"
 		while ($0) {
 			sub(/^[[:space:]]+/, "")
 			sub(/^[+]$/, "")
 			if (!process($0) && getline <= 0)
 				fatal("unexpected EOF")
 			if (match($0, /::$/))
 				break
 		}
 		print ".RE"
 		print ".sp"
 		return !!$0
 	}
 	inline(firstline)
 	return 1
 }
 {
 	while (process($0)) {}
 }