# asciiman.awk: stupid AsciiDoc to manual page converter # # Copyright (c) 2022, Přemysl Eric Janouch # SPDX-License-Identifier: 0BSD # # This is not intended to produce great output, merely useful output. # As such, input documents should restrict themselves as follows: # # - Attributes cannot be passed on the command line. # - In-line formatting sequences must not overlap, # cannot be escaped, and cannot span lines. # - Heading underlines must match in byte length exactly. # - Only a small subset of syntax is supported overall. # # Also beware that the output has only been tested with GNU troff. function fatal(message) { print ".\\\" " FILENAME ":" FNR ": fatal error: " message print FILENAME ":" FNR ": fatal error: " message > "/dev/stderr" exit 1 } function expand(s, attr) { # TODO: This should not expand unknown attribute names. while (match(s, /[{][^{}]*[}]/)) { attr = substr(s, RSTART + 1, RLENGTH - 2) s = substr(s, 1, RSTART - 1) Attrs[attr] substr(s, RSTART + RLENGTH) } return s } function escape(s) { gsub(/\\/, "\\\\", s) gsub(/-/, "\\-", s) sub(/^[.']/, "\\\\\\&&", s) return s } function readattribute(line, attrname, attrvalue) { if (match(line, /^:[^:]*: /)) { attrname = substr(line, RSTART + 1, RLENGTH - 3) attrvalue = substr(line, RSTART + RLENGTH) Attrs[attrname] = expand(attrvalue) return 1 } } NR == 1 { nameline = $0 if (match(nameline, /[(][[:digit:]][)]$/)) { name = substr(nameline, 1, RSTART - 1) section = substr(nameline, RSTART + 1, RLENGTH - 2) } else { fatal("invalid header line") } getline if (length(nameline) != length($0) || /[^=]/) fatal("invalid header underline") getline while (readattribute($0)) getline if ($0) fatal("expected an empty line after the header") # Requesting tbl(1), even though we currently do not support tables. print "'\\\" t" print ".TH \"" toupper(name) "\" \"" section "\"" # Hyphenation is indeed rather annoying, in particular with long links. print ".nh" } function inline(line) { if (!line) { print ".sp" return } line = escape(expand(line)) # Strip empty URL descriptions, otherwise useful for demarking the end. while (match(line, /[^[:space:]]+\[\]/)) { line = substr(line, 1, RSTART + RLENGTH - 3) \ substr(line, RSTART + RLENGTH) } # Pass-through, otherwise useful for hacks, is a lie here. while (match(line, /[+][+][+][^+]+[+][+][+]/)) { line = substr(line, 1, RSTART - 1) \ substr(line, RSTART + 3, RLENGTH - 6) \ substr(line, RSTART + RLENGTH) } # Italic and bold formatting doesn't respect any word boundaries. while (match(line, /__[^_]+__/)) { line = substr(line, 1, RSTART - 1) \ "\\fI" substr(line, RSTART + 2, RLENGTH - 4) "\\fP" \ substr(line, RSTART + RLENGTH) } while (match(line, /_[^_]+_/)) { line = substr(line, 1, RSTART - 1) \ "\\fI" substr(line, RSTART + 1, RLENGTH - 2) "\\fP" \ substr(line, RSTART + RLENGTH) } while (match(line, /[*][*][^*]+[*][*]/)) { line = substr(line, 1, RSTART - 1) \ "\\fB" substr(line, RSTART + 2, RLENGTH - 4) "\\fP" \ substr(line, RSTART + RLENGTH) } while (match(line, /[*][^*]+[*]/)) { line = substr(line, 1, RSTART - 1) \ "\\fB" substr(line, RSTART + 1, RLENGTH - 2) "\\fP" \ substr(line, RSTART + RLENGTH) } # Enable double-spacing after the end of a sentence. gsub(/[.][[:space:]]+/, ".\n", line) gsub(/[!][[:space:]]+/, "!\n", line) gsub(/[?][[:space:]]+/, "?\n", line) # Quote commands resulting from that, as well as from expand(). gsub(/\n[.]/, "\n\\\\\\&.", line) gsub(/\n[']/, "\n\\\\\\&'", line) sub(/[[:space:]]+[+]$/, "\n.br", line) print line } # Returns 1 iff the left-over $0 should be processed further. function process(firstline) { if (readattribute(firstline)) return 0 if (getline <= 0) { inline(firstline) return 0 } if (length(firstline) == length($0) && /^-+$/) { print ".SH \"" escape(toupper(expand(firstline))) "\"" return 0 } if (length(firstline) == length($0) && /^~+$/) { print ".SS \"" escape(expand(firstline)) "\"" return 0 } if (firstline ~ /^(-{4,}|[.]{4,})$/) { print ".if n .RS 4" print ".nf" print ".fam C" do { print escape($0) } while (getline > 0 && $0 != firstline) print ".fam" print ".fi" print ".if n .RE" return 0 } if (firstline ~ /^\/{4,}$/) { do { print ".\\\" " $0 } while (getline > 0 && $0 != firstline) return 0 } if (match(firstline, /^\/\//)) { print ".\\\" " firstline return 1 } # We generally assume these block end with a blank line. if (match(firstline, /^[[:space:]]*[*][[:space:]]+/)) { # Bullet magic copied over from AsciiDoc/Asciidoctor generators. print ".RS 4" print ".ie n \\{\\" print "\\h'-04'\\(bu\\h'+03'\\c" print ".\\}" print ".el \\{\\" print ".sp -1" print ".IP \\(bu 2.3" print ".\\}" inline(substr(firstline, RSTART + RLENGTH)) while ($0) { sub(/^[[:space:]]+/, "") sub(/^[+]$/, "") if (!process($0) && getline <= 0) fatal("unexpected EOF") if (match($0, /^[[:space:]]*[*][[:space:]]+/)) break } print ".RE" print ".sp" return !!$0 } if (match(firstline, /^[[:space:]]+/)) { print ".if n .RS 4" print ".nf" print ".fam C" do { print escape(substr(firstline, RLENGTH + 1)) firstline = $0 } while ($0 && getline > 0) print ".fam" print ".fi" print ".if n .RE" return 1 } if (match(firstline, /::$/)) { inline(substr(firstline, 1, RSTART - 1)) while (match($0, /::$/)) { print ".br" inline(substr($0, 1, RSTART - 1)) if (getline <= 0) fatal("unexpected EOF") } print ".RS 4" while ($0) { sub(/^[[:space:]]+/, "") sub(/^[+]$/, "") if (!process($0) && getline <= 0) fatal("unexpected EOF") if (match($0, /::$/)) break } print ".RE" print ".sp" return !!$0 } inline(firstline) return 1 } { while (process($0)) {} }