Add a stupid AsciiDoc to manual page converter

Most of my projects that could need it make use of liberty.
2022-09-25 20:07:10 +02:00
parent 22a121383f
commit 9883caf849
1 changed files with 231 additions and 0 deletions
--- a/tools/asciiman.awk
+++ b/tools/asciiman.awk
@@ -0,0 +1,231 @@
+# asciiman.awk: stupid AsciiDoc to manual page converter
+#
+# Copyright (c) 2022, Přemysl Eric Janouch <p@janouch.name>
+# SPDX-License-Identifier: 0BSD
+#
+# This is not intended to produce great output, merely useful output.
+# As such, input documents should restrict themselves as follows:
+#
+#  - Attributes cannot be passed on the command line.
+#  - In-line formatting sequences must not overlap,
+#    cannot be escaped, and cannot span lines.
+#  - Heading underlines must match in byte length exactly.
+#  - Only a small subset of syntax is supported overall.
+#
+# Also beware that the output has only been tested with GNU troff.
+
+function fatal(message) {
+	print ".\\\" " FILENAME ":" FNR ": fatal error: " message
+	print FILENAME ":" FNR ": fatal error: " message > "/dev/stderr"
+	exit 1
+}
+
+function expand(s,   attr) {
+	# TODO: This should not expand unknown attribute names.
+	while (match(s, /[{][^{}]*[}]/)) {
+		attr = substr(s, RSTART + 1, RLENGTH - 2)
+		s = substr(s, 1, RSTART - 1) Attrs[attr] substr(s, RSTART + RLENGTH)
+	}
+	return s
+}
+
+function escape(s) {
+	gsub(/\\/, "\\\\", s)
+	gsub(/-/, "\\-", s)
+	gsub(/[.]/, "\\.", s)
+	return s
+}
+
+function readattribute(line,    attrname, attrvalue) {
+	if (match(line, /^:[^:]*: /)) {
+		attrname = substr(line, RSTART + 1, RLENGTH - 3)
+		attrvalue = substr(line, RSTART + RLENGTH)
+		Attrs[attrname] = expand(attrvalue)
+		return 1
+	}
+}
+
+NR == 1 {
+	nameline = $0
+	if (match(nameline, /[(][[:digit:]][)]$/)) {
+		name = substr(nameline, 1, RSTART - 1)
+		section = substr(nameline, RSTART + 1, RLENGTH - 2)
+	} else {
+		fatal("invalid header line")
+	}
+
+	getline
+	if (length(nameline) != length($0) || /[^=]/)
+		fatal("invalid header underline")
+
+	getline
+	while (readattribute($0))
+		getline
+	if ($0)
+		fatal("expected an empty line after the header")
+
+	# Requesting tbl(1), even though we currently do not support tables.
+	print "'\\\\"" t"
+	print ".TH \"" toupper(name) "\" \"" section "\""
+
+	# Hyphenation is indeed rather annoying, in particular with long links.
+	print ".nh"
+}
+
+function inline(line) {
+	if (!line) {
+		print ".sp"
+		return
+	}
+
+	line = escape(expand(line))
+
+	# Enable double-spacing after the end of a sentence.
+	gsub(/\\[.][[:space:]]+/, ".\n", s)
+
+	# Strip empty URL descriptions, otherwise useful for demarking the end.
+	while (match(line, /[^[:space:]]+\[\]/)) {
+		line = substr(line, 1, RSTART + RLENGTH - 3) \
+			 substr(line, RSTART + RLENGTH)
+	}
+
+	# Pass-through, otherwise useful for hacks, is a lie here.
+	while (match(line, /[+][+][+][^+]+[+][+][+]/)) {
+		line = substr(line, 1, RSTART - 1) \
+			 substr(line, RSTART + 3, RLENGTH - 6) \
+			 substr(line, RSTART + RLENGTH)
+	}
+
+	# Italic and bold formatting doesn't respect any word boundaries.
+	while (match(line, /__[^_]+__/)) {
+		line = substr(line, 1, RSTART - 1) \
+			 "\\fI" substr(line, RSTART + 2, RLENGTH - 4) "\\fP" \
+			 substr(line, RSTART + RLENGTH)
+	}
+	while (match(line, /_[^_]+_/)) {
+		line = substr(line, 1, RSTART - 1) \
+			 "\\fI" substr(line, RSTART + 1, RLENGTH - 2) "\\fP" \
+			 substr(line, RSTART + RLENGTH)
+	}
+	while (match(line, /[*][*][^*]+[*][*]/)) {
+		line = substr(line, 1, RSTART - 1) \
+			 "\\fB" substr(line, RSTART + 2, RLENGTH - 4) "\\fP" \
+			 substr(line, RSTART + RLENGTH)
+	}
+	while (match(line, /[*][^*]+[*]/)) {
+		line = substr(line, 1, RSTART - 1) \
+			 "\\fB" substr(line, RSTART + 1, RLENGTH - 2) "\\fP" \
+			 substr(line, RSTART + RLENGTH)
+	}
+
+	sub(/[[:space:]]+[+]$/, "\n.br", line)
+	print line
+}
+
+# Returns 1 iff the left-over $0 should be processed further.
+function process(firstline) {
+	if (readattribute(firstline))
+		return 0
+	if (getline <= 0) {
+		inline(firstline)
+		return 0
+	}
+
+	if (length(firstline) == length($0) && /^-+$/) {
+		print ".SH \"" escape(toupper(expand(firstline))) "\""
+		return 0
+	}
+	if (length(firstline) == length($0) && /^~+$/) {
+		print ".SS \"" escape(expand(firstline)) "\""
+		return 0
+	}
+	if (firstline ~ /^(-{4,}|[.]{4,})$/) {
+		print ".if n .RS 4"
+		print ".nf"
+		print ".fam C"
+		do {
+			print escape($0)
+		} while (getline > 0 && $0 != firstline)
+		print ".fam"
+		print ".fi"
+		print ".if n .RE"
+		return 0
+	}
+	if (firstline ~ /^\/{4,}$/) {
+		do {
+			print ".\\\" " $0
+		} while (getline > 0 && $0 != firstline)
+		return 0
+	}
+	if (match(firstline, /^\/\//)) {
+		print ".\\\" " firstline
+		return 1
+	}
+
+	# We generally assume these block end with a blank line.
+	if (match(firstline, /^[[:space:]]*[*][[:space:]]+/)) {
+		# Bullet magic copied over from AsciiDoc/Asciidoctor generators.
+		print ".RS 4"
+		print ".ie n \\{\\"
+		print "\\h'-04'\\(bu\\h'+03'\\c"
+		print ".\\}"
+		print ".el \\{\\"
+		print ".sp -1"
+		print ".IP \\(bu 2.3"
+		print ".\\}"
+
+		inline(substr(firstline, RSTART + RLENGTH))
+		while ($0) {
+			sub(/^[[:space:]]+/, "")
+			sub(/^[+]$/, "")
+			if (!process($0) && getline <= 0)
+				fatal("unexpected EOF")
+			if (match($0, /^[[:space:]]*[*][[:space:]]+/))
+				break
+		}
+		print ".RE"
+		print ".sp"
+		return !!$0
+	}
+	if (match(firstline, /^[[:space:]]+/)) {
+		print ".if n .RS 4"
+		print ".nf"
+		print ".fam C"
+		do {
+			print escape(substr(firstline, RLENGTH + 1))
+			firstline = $0
+		} while ($0 && getline > 0)
+		print ".fam"
+		print ".fi"
+		print ".if n .RE"
+		return 1
+	}
+	if (match(firstline, /::$/)) {
+		inline(substr(firstline, 1, RSTART - 1))
+		while (match($0, /::$/)) {
+			print ".br"
+			inline(substr($0, 1, RSTART - 1))
+			if (getline <= 0)
+				fatal("unexpected EOF")
+		}
+
+		print ".RS 4"
+		while ($0) {
+			sub(/^[[:space:]]+/, "")
+			sub(/^[+]$/, "")
+			if (!process($0) && getline <= 0)
+				fatal("unexpected EOF")
+			if (match($0, /::$/))
+				break
+		}
+		print ".RE"
+		print ".sp"
+		return !!$0
+	}
+	inline(firstline)
+	return 1
+}
+
+{
+	while (process($0)) {}
+}