From 228c3f3914108a49e02a0e7b490f69c3c5a870d2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C5=99emysl=20Janouch?= <p@janouch.name>
Date: Sun, 7 Oct 2018 18:07:18 +0200
Subject: [PATCH] hasp: add a libasciidoc preprocessor

---
 README       |  5 +++
 hasp/main.go | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 96 insertions(+)
 create mode 100644 hasp/main.go
diff --git a/README b/README
index ccdd176..e32d012 100644
--- a/README
+++ b/README
@@ -185,6 +185,11 @@ An improved replacement for autocutsel in selection synchronization "mode":
 
 Only UTF8_STRING-convertible selections are synchronized.
 
+hasp -- (lib)asciidoc syntax preprocessor
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Provisional tool to make libasciidoc understand more syntax, namely two-line/
+underlined titles for my Gitea projects.
+
 ht -- terminal emulator
 ~~~~~~~~~~~~~~~~~~~~~~~
 Similar scope to st(1).  Clever display of internal padding for better looks.
diff --git a/hasp/main.go b/hasp/main.go
new file mode 100644
index 0000000..e19d48a
--- /dev/null
+++ b/hasp/main.go
@@ -0,0 +1,91 @@
+// Program hasp is a preprocessor for libasciidoc to make it understand
+// two-line/underlined titles, intended to be used in Gitea.
+package main
+
+import (
+	"bytes"
+	"context"
+	"encoding/xml"
+	"io"
+	"io/ioutil"
+	"os"
+	"strings"
+	"unicode"
+	"unicode/utf8"
+
+	"github.com/bytesparadise/libasciidoc"
+	"github.com/bytesparadise/libasciidoc/pkg/renderer"
+)
+
+// isTitle returns the title level if the lines seem to form a title,
+// zero otherwise. Input lines may inclide trailing newlines.
+func isTitle(line1, line2 []byte) int {
+	// This is a very naïve method, we should target graphemes (thus at least
+	// NFC normalize the lines first) and account for wide characters.
+	diff := utf8.RuneCount(line1) - utf8.RuneCount(line2)
+	if len(line2) < 2 || diff < -1 || diff > 1 {
+		return 0
+	}
+
+	// "Don't be fooled by back-to-back delimited blocks."
+	// Still gets fooled by other things, though.
+	if bytes.IndexFunc(line1, func(r rune) bool {
+		return unicode.IsLetter(r) || unicode.IsNumber(r)
+	}) < 0 {
+		return 0
+	}
+
+	// The underline must be homogenous.
+	for _, r := range bytes.TrimRight(line2, "\r\n") {
+		if r != line2[0] {
+			return 0
+		}
+	}
+	return 1 + strings.IndexByte("=-~^+", line2[0])
+}
+
+func writeLine(w *io.PipeWriter, cur, next []byte) []byte {
+	if level := isTitle(cur, next); level > 0 {
+		w.Write(append(bytes.Repeat([]byte{'='}, level), ' '))
+		next = nil
+	}
+	w.Write(cur)
+	return next
+}
+
+// ConvertTitles converts AsciiDoc two-line (underlined) titles to single-line.
+func ConvertTitles(w *io.PipeWriter, input []byte) {
+	var last []byte
+	for _, cur := range bytes.SplitAfter(input, []byte{'\n'}) {
+		last = writeLine(w, last, cur)
+	}
+	writeLine(w, last, nil)
+}
+
+func main() {
+	input, err := ioutil.ReadAll(os.Stdin)
+	if err != nil {
+		panic(err)
+	}
+
+	pr, pw := io.Pipe()
+	go func() {
+		defer pw.Close()
+		ConvertTitles(pw, input)
+	}()
+
+	// io.Copy(os.Stdout, pr)
+	// return
+
+	_, err = libasciidoc.ConvertToHTML(context.Background(), pr, os.Stdout,
+		renderer.IncludeHeaderFooter(true))
+	if err != nil {
+		// Fallback: output all the text sanitized for direct inclusion.
+		os.Stdout.WriteString("<pre>")
+		for _, line := range bytes.Split(input, []byte{'\n'}) {
+			xml.EscapeText(os.Stdout, line)
+			os.Stdout.WriteString("\n")
+		}
+		os.Stdout.WriteString("</pre>")
+	}
+}