hswg: split out asciidoc.go

This commit is contained in:
Přemysl Eric Janouch 2021-06-29 03:15:41 +02:00
parent 5b432fcc0b
commit 61083027a3
Signed by: p
GPG Key ID: A0420B94F92B9493
2 changed files with 54 additions and 48 deletions

54
hswg/asciidoc.go Normal file
View File

@ -0,0 +1,54 @@
package main
import (
"bytes"
"io"
"strings"
"unicode"
"unicode/utf8"
)
// isTitle returns the title level if the lines seem to form a title,
// zero otherwise. Input lines may inclide trailing newlines.
func isTitle(line1, line2 []byte) int {
// This is a very naïve method, we should target graphemes (thus at least
// NFC normalize the lines first) and account for wide characters.
diff := utf8.RuneCount(line1) - utf8.RuneCount(line2)
if len(line2) < 2 || diff < -1 || diff > 1 {
return 0
}
// "Don't be fooled by back-to-back delimited blocks."
// Still gets fooled by other things, though.
if bytes.IndexFunc(line1, func(r rune) bool {
return unicode.IsLetter(r) || unicode.IsNumber(r)
}) < 0 {
return 0
}
// The underline must be homogenous.
for _, r := range bytes.TrimRight(line2, "\r\n") {
if r != line2[0] {
return 0
}
}
return 1 + strings.IndexByte("=-~^+", line2[0])
}
func writeLine(w *io.PipeWriter, cur, next []byte) []byte {
if level := isTitle(cur, next); level > 0 {
w.Write(append(bytes.Repeat([]byte{'='}, level), ' '))
next = nil
}
w.Write(cur)
return next
}
// ConvertTitles converts AsciiDoc two-line (underlined) titles to single-line.
func ConvertTitles(w *io.PipeWriter, input []byte) {
var last []byte
for _, cur := range bytes.SplitAfter(input, []byte{'\n'}) {
last = writeLine(w, last, cur)
}
writeLine(w, last, nil)
}

View File

@ -16,11 +16,8 @@ import (
"path/filepath"
"regexp"
"sort"
"strings"
"syscall"
"time"
"unicode"
"unicode/utf8"
"github.com/bytesparadise/libasciidoc/pkg/configuration"
"github.com/bytesparadise/libasciidoc/pkg/parser"
@ -30,51 +27,6 @@ import (
"github.com/bytesparadise/libasciidoc/pkg/validator"
)
// isTitle returns the title level if the lines seem to form a title,
// zero otherwise. Input lines may inclide trailing newlines.
func isTitle(line1, line2 []byte) int {
// This is a very naïve method, we should target graphemes (thus at least
// NFC normalize the lines first) and account for wide characters.
diff := utf8.RuneCount(line1) - utf8.RuneCount(line2)
if len(line2) < 2 || diff < -1 || diff > 1 {
return 0
}
// "Don't be fooled by back-to-back delimited blocks."
// Still gets fooled by other things, though.
if bytes.IndexFunc(line1, func(r rune) bool {
return unicode.IsLetter(r) || unicode.IsNumber(r)
}) < 0 {
return 0
}
// The underline must be homogenous.
for _, r := range bytes.TrimRight(line2, "\r\n") {
if r != line2[0] {
return 0
}
}
return 1 + strings.IndexByte("=-~^+", line2[0])
}
func writeLine(w *io.PipeWriter, cur, next []byte) []byte {
if level := isTitle(cur, next); level > 0 {
w.Write(append(bytes.Repeat([]byte{'='}, level), ' '))
next = nil
}
w.Write(cur)
return next
}
// ConvertTitles converts AsciiDoc two-line (underlined) titles to single-line.
func ConvertTitles(w *io.PipeWriter, input []byte) {
var last []byte
for _, cur := range bytes.SplitAfter(input, []byte{'\n'}) {
last = writeLine(w, last, cur)
}
writeLine(w, last, nil)
}
// Metadata contains select metadata about a rendered document.
type Metadata struct {
types.Metadata