haven/hswg/main.go

255 lines
6.5 KiB
Go
Raw Normal View History

2020-08-15 03:08:02 +02:00
// Program hswg is a static website generator employing libasciidoc with added
// support for two-line/underlined titles, and postprocessing "wiki" InterLinks.
package main
import (
"bytes"
"encoding/xml"
2020-08-15 03:08:02 +02:00
"fmt"
"io"
"io/ioutil"
"log"
"os"
"path/filepath"
"regexp"
"sort"
"strings"
"time"
"unicode"
"unicode/utf8"
"github.com/bytesparadise/libasciidoc"
"github.com/bytesparadise/libasciidoc/pkg/configuration"
"github.com/bytesparadise/libasciidoc/pkg/types"
)
// isTitle returns the title level if the lines seem to form a title,
// zero otherwise. Input lines may inclide trailing newlines.
func isTitle(line1, line2 []byte) int {
// This is a very naïve method, we should target graphemes (thus at least
// NFC normalize the lines first) and account for wide characters.
diff := utf8.RuneCount(line1) - utf8.RuneCount(line2)
if len(line2) < 2 || diff < -1 || diff > 1 {
return 0
}
// "Don't be fooled by back-to-back delimited blocks."
// Still gets fooled by other things, though.
if bytes.IndexFunc(line1, func(r rune) bool {
return unicode.IsLetter(r) || unicode.IsNumber(r)
}) < 0 {
return 0
}
// The underline must be homogenous.
for _, r := range bytes.TrimRight(line2, "\r\n") {
if r != line2[0] {
return 0
}
}
return 1 + strings.IndexByte("=-~^+", line2[0])
}
func writeLine(w *io.PipeWriter, cur, next []byte) []byte {
if level := isTitle(cur, next); level > 0 {
w.Write(append(bytes.Repeat([]byte{'='}, level), ' '))
next = nil
}
w.Write(cur)
return next
}
// ConvertTitles converts AsciiDoc two-line (underlined) titles to single-line.
func ConvertTitles(w *io.PipeWriter, input []byte) {
var last []byte
for _, cur := range bytes.SplitAfter(input, []byte{'\n'}) {
last = writeLine(w, last, cur)
}
writeLine(w, last, nil)
}
// Render converts an io.Reader with an AsciiDoc document to HTML. So long as
// the file could be read at all, it will always return a non-empty document.
func Render(doc io.Reader, config configuration.Configuration) (
html *bytes.Buffer, meta types.Metadata, err error) {
html = bytes.NewBuffer(nil)
var input []byte
if input, err = ioutil.ReadAll(doc); err != nil {
return
}
pr, pw := io.Pipe()
go func() {
defer pw.Close()
ConvertTitles(pw, input)
}()
// io.Copy(os.Stdout, pr)
// return
meta, err = libasciidoc.ConvertToHTML(pr, html, config)
if err != nil {
// Fallback: output all the text sanitized for direct inclusion.
html.Reset()
_, _ = html.WriteString("<pre>")
for _, line := range bytes.Split(input, []byte{'\n'}) {
_ = xml.EscapeText(html, line)
_, _ = html.WriteString("\n")
}
_, _ = html.WriteString("</pre>")
}
return
}
2020-08-15 03:08:02 +02:00
// entry contains all context information about a single page.
type entry struct {
path string // path
mtime time.Time // modification time
metadata types.Metadata // metadata
document []byte // inner document with expanded LinkWords
backlinks []string // what documents link back here
}
var extRE = regexp.MustCompile(`\.[^/.]*$`)
func stripExtension(path string) string {
return extRE.ReplaceAllString(path, "")
}
func resultPath(path string) string {
if m := extRE.FindStringIndex(path); m != nil {
return path[:m[0]] + ".html"
}
return path + ".html"
}
func makeLink(m *map[string]*entry, name string) string {
e := (*m)[name]
return fmt.Sprintf("<a href='%s'>%s</a>", resultPath(e.path), name)
}
var linkWordRE = regexp.MustCompile(`\b\p{Lu}\p{L}*\b`)
func expand(m *map[string]*entry, name string, chunk []byte) []byte {
return linkWordRE.ReplaceAllFunc(chunk, func(match []byte) []byte {
if link, ok := (*m)[string(match)]; ok {
link.backlinks = append(link.backlinks, name)
return []byte(makeLink(m, string(match)))
}
return match
})
}
func singleFile() {
html, meta, err := Render(os.Stdin, configuration.NewConfiguration())
if err != nil {
log.Println(err)
} else if meta.Title != "" {
_, _ = os.Stdout.WriteString("<h1>")
_ = xml.EscapeText(os.Stdout, []byte(meta.Title))
_, _ = os.Stdout.WriteString("</h1>\n")
}
_, _ = io.Copy(os.Stdout, html)
}
2020-08-15 03:08:02 +02:00
func main() {
if len(os.Args) < 2 {
singleFile()
return
}
2020-08-15 03:08:02 +02:00
if len(os.Args) < 3 {
log.Fatalf("usage: %s TEMPLATE GLOB...\n", os.Args[0])
}
// Read the common page header.
header, err := ioutil.ReadFile(os.Args[1])
if err != nil {
log.Fatalln(err)
}
// Create a map from document names to their page entries.
entries := map[string]*entry{}
for _, glob := range os.Args[2:] {
matches, err := filepath.Glob(glob)
if err != nil {
log.Fatalf("%s: %s\n", glob, err)
}
for _, path := range matches {
name := stripExtension(filepath.Base(path))
if conflict, ok := entries[name]; ok {
log.Fatalf("%s: conflicts with %s\n", name, conflict.path)
}
entries[name] = &entry{path: path}
}
}
tagRE := regexp.MustCompile(`<[^<>]+>`)
for name, e := range entries {
f, err := os.Open(e.path)
if err != nil {
log.Fatalln(err)
}
if i, err := f.Stat(); err != nil {
log.Fatalln(err)
} else {
e.mtime = i.ModTime()
}
var html *bytes.Buffer
if html, e.metadata, err = Render(f, configuration.NewConfiguration(
2020-08-15 03:08:02 +02:00
configuration.WithFilename(e.path),
configuration.WithLastUpdated(e.mtime),
)); err != nil {
2020-08-15 03:08:02 +02:00
log.Fatalln(err)
}
// Expand LinkWords anywhere between <tags>.
// We want something like the inverse of Regexp.ReplaceAllStringFunc.
raw, last, expanded := html.Bytes(), 0, bytes.NewBuffer(nil)
2020-08-15 03:08:02 +02:00
for _, where := range tagRE.FindAllIndex(raw, -1) {
_, _ = expanded.Write(expand(&entries, name, raw[last:where[0]]))
_, _ = expanded.Write(raw[where[0]:where[1]])
last = where[1]
}
_, _ = expanded.Write(expand(&entries, name, raw[last:]))
e.document = expanded.Bytes()
}
for name, e := range entries {
f, err := os.Create(resultPath(e.path))
if err != nil {
log.Fatalln(err)
}
_, _ = f.Write(header)
title := e.metadata.Title
if title == "" {
title = name
}
_, _ = f.WriteString(fmt.Sprintf("<title>%s</title>\n", title))
_, _ = f.WriteString(fmt.Sprintf("<h1>%s</h1>\n", title))
sort.Strings(e.backlinks)
backlinks := []string{}
for _, name := range e.backlinks {
backlinks = append(backlinks, makeLink(&entries, name))
}
if len(backlinks) > 0 {
_, _ = f.WriteString(fmt.Sprintf("<p id=links>Links here: %s</p>\n",
strings.Join(backlinks, ", ")))
}
_, _ = f.Write(e.document)
_, _ = f.WriteString(fmt.Sprintf("<p id=footer>Last updated: %s"+
" &mdash; <a href='%s'>Source</p>\n",
e.metadata.LastUpdated, e.path))
}
}