haven/hswg/main.go

530 lines
13 KiB
Go
Raw Normal View History

2020-08-15 03:08:02 +02:00
// Program hswg is a static website generator employing libasciidoc with added
// support for two-line/underlined titles, and postprocessing "wiki" InterLinks.
package main
import (
"bytes"
"encoding/binary"
"encoding/xml"
2020-08-15 03:08:02 +02:00
"fmt"
"html/template"
2020-08-15 03:08:02 +02:00
"io"
"io/ioutil"
"log"
"os"
"os/signal"
2020-08-15 03:08:02 +02:00
"path/filepath"
"regexp"
"sort"
"strings"
2022-03-03 12:57:21 +01:00
"sync"
"syscall"
2020-08-15 03:08:02 +02:00
"time"
"github.com/bytesparadise/libasciidoc/pkg/configuration"
"github.com/bytesparadise/libasciidoc/pkg/parser"
"github.com/bytesparadise/libasciidoc/pkg/renderer/sgml/html5"
2020-08-15 03:08:02 +02:00
"github.com/bytesparadise/libasciidoc/pkg/types"
"github.com/bytesparadise/libasciidoc/pkg/validator"
2020-08-15 03:08:02 +02:00
)
// Metadata contains select metadata about a rendered document.
type Metadata struct {
types.Metadata
// Note that this includes entries from the front-matter
// (see parser.ApplySubstitutions <- parser.ParseDocument).
Attributes types.Attributes
}
2020-09-21 18:30:11 +02:00
// IsDraft returns whether the document is marked as a draft, and should not
// be linked anywhere else.
func (m *Metadata) IsDraft() bool { return m.Attributes.Has("draft") }
// Attr is a shortcut for retrieving document attributes by name.
func (m *Metadata) Attr(name string) string {
return m.Attributes.GetAsStringWithDefault(name, "")
}
// AttrList is similar to Attr, but splits the result at commas,
// and trims whitespace around array elements.
func (m *Metadata) AttrList(name string) []string {
if !m.Attributes.Has(name) {
return nil
}
res := strings.Split(m.Attr(name), ",")
for i := range res {
res[i] = strings.TrimSpace(res[i])
}
return res
}
// Render converts an io.Reader with an AsciiDoc document to HTML. So long as
// the file could be read at all, it will always return a non-empty document.
2022-03-03 14:28:15 +01:00
func Render(r io.Reader, config *configuration.Configuration) (
html *bytes.Buffer, meta Metadata, err error) {
html = bytes.NewBuffer(nil)
var input []byte
if input, err = ioutil.ReadAll(r); err != nil {
return
}
pr, pw := io.Pipe()
go func() {
defer pw.Close()
ConvertTitles(pw, input)
}()
// io.Copy(os.Stdout, pr)
// return
2022-03-03 14:28:15 +01:00
var doc *types.Document
if doc, err = parser.ParseDocument(pr, config); err == nil {
2022-03-03 14:28:15 +01:00
doctype := config.Attributes.GetAsStringWithDefault(
types.AttrDocType, "article")
problems, verr := validator.Validate(doc, doctype)
if verr != nil {
fmt.Fprintln(os.Stderr, verr)
2021-06-02 22:54:37 +02:00
}
for _, problem := range problems {
fmt.Fprintln(os.Stderr, problem.Message)
}
2022-10-06 19:12:42 +02:00
meta.Metadata, err = html5.Render(doc, config, html)
}
if err != nil {
// Fallback: output all the text sanitized for direct inclusion.
html.Reset()
_, _ = html.WriteString("<pre>")
for _, line := range bytes.Split(input, []byte{'\n'}) {
_ = xml.EscapeText(html, line)
_, _ = html.WriteString("\n")
}
_, _ = html.WriteString("</pre>")
}
2022-03-03 14:28:15 +01:00
meta.Attributes = config.Attributes
return
}
// Entry contains all context information about a single page.
type Entry struct {
2021-06-22 01:29:21 +02:00
Metadata // metadata
PathSource string // path to source AsciiDoc
PathDestination string // path to destination HTML
mtime time.Time // modification time
raw []byte // raw inner document
2021-06-22 01:29:21 +02:00
Content template.HTML // inner document with expanded LinkWords
backlinks map[string]bool // what documents link back here
Backlinks []template.HTML
2020-08-15 03:08:02 +02:00
}
// Published returns the date when the entry was published, or nil if unknown.
func (e *Entry) Published() *time.Time {
2022-10-06 19:12:42 +02:00
if d, ok := e.Attributes.GetAsString("date"); !ok {
return nil
} else if t, err := time.Parse(time.RFC3339, d); err == nil {
return &t
} else if t, err := time.Parse("2006-01-02", d); err == nil {
return &t
} else {
log.Printf("%s: date: %s\n", e.PathSource, err)
return nil
}
}
var (
globs = []string{"*.adoc", "*.asciidoc"}
extRE = regexp.MustCompile(`\.[^/.]*$`)
)
func pathToName(path string) string {
return stripExtension(filepath.Base(path))
}
2020-08-15 03:08:02 +02:00
func stripExtension(path string) string {
return extRE.ReplaceAllString(path, "")
}
func resultPath(path string) string {
if m := extRE.FindStringIndex(path); m != nil {
return path[:m[0]] + ".html"
}
return path + ".html"
}
func makeLink(m *map[string]*Entry, name string) string {
2020-08-15 03:08:02 +02:00
e := (*m)[name]
return fmt.Sprintf("<a href='%s'>%s</a>",
filepath.Clean(e.PathDestination), name)
2020-08-15 03:08:02 +02:00
}
var linkWordRE = regexp.MustCompile(`\b\p{Lu}\p{L}*\b`)
func expand(m *map[string]*Entry, name string, chunk []byte) []byte {
2020-08-15 03:08:02 +02:00
return linkWordRE.ReplaceAllFunc(chunk, func(match []byte) []byte {
2020-09-21 18:30:11 +02:00
if link, ok := (*m)[string(match)]; ok && string(match) != name &&
!link.IsDraft() {
2021-06-22 01:29:21 +02:00
link.backlinks[name] = true
2020-08-15 03:08:02 +02:00
return []byte(makeLink(m, string(match)))
}
return match
})
}
var tagRE = regexp.MustCompile(`<[^<>]+>`)
func renderEntry(name string, e *Entry) error {
f, err := os.Open(e.PathSource)
if err != nil {
return err
}
if i, err := f.Stat(); err != nil {
return err
} else {
e.mtime = i.ModTime()
}
var html *bytes.Buffer
if html, e.Metadata, err = Render(f, configuration.NewConfiguration(
configuration.WithFilename(e.PathSource),
configuration.WithLastUpdated(e.mtime),
2022-10-05 20:38:13 +02:00
configuration.WithAttribute("toc", "preamble"),
2022-10-06 19:12:42 +02:00
configuration.WithAttribute("toc-title", "<h2>Contents</h2>"),
2022-10-05 20:38:13 +02:00
configuration.WithAttribute("source-highlighter", "chroma"),
)); err != nil {
return err
2020-08-15 03:08:02 +02:00
}
// Every page needs to have a title.
if e.Title == "" {
e.Title = name
2020-08-15 03:08:02 +02:00
}
e.raw = html.Bytes()
return nil
}
func makeEntry(path string) *Entry {
return &Entry{
PathSource: path,
PathDestination: resultPath(path),
}
}
// loadEntries creates a map from document names to their page entries.
func loadEntries(dirname string) (map[string]*Entry, error) {
entries := map[string]*Entry{}
for _, glob := range globs {
matches, err := filepath.Glob(filepath.Join(dirname, glob))
2020-08-15 03:08:02 +02:00
if err != nil {
return nil, fmt.Errorf("%s: %s", dirname, err)
2020-08-15 03:08:02 +02:00
}
for _, path := range matches {
name := pathToName(path)
2020-08-15 03:08:02 +02:00
if conflict, ok := entries[name]; ok {
return nil, fmt.Errorf("%s: conflicts with %s",
name, conflict.PathSource)
}
entries[name] = makeEntry(path)
2020-08-15 03:08:02 +02:00
}
}
return entries, nil
}
2020-08-15 03:08:02 +02:00
func writeEntry(e *Entry, t *template.Template,
entries *map[string]*Entry) error {
f, err := os.Create(e.PathDestination)
if err != nil {
return err
}
2021-06-22 01:29:21 +02:00
backlinks := []string{}
for name := range e.backlinks {
backlinks = append(backlinks, name)
}
sort.Strings(backlinks)
for _, name := range backlinks {
e.Backlinks =
append(e.Backlinks, template.HTML(makeLink(entries, name)))
}
return t.Execute(f, e)
}
func writeIndex(path string, t *template.Template,
entries *map[string]*Entry) error {
// Reorder entries reversely, primarily by date, secondarily by filename.
ordered := []*Entry{}
for _, e := range *entries {
ordered = append(ordered, e)
}
sort.Slice(ordered, func(i, j int) bool {
a, b := ordered[i], ordered[j]
p1, p2 := a.Published(), b.Published()
if p1 == nil && p2 != nil {
return true
}
if p1 == nil && p2 == nil {
return a.PathSource > b.PathSource
}
if p2 == nil {
return false
}
if p1.Equal(*p2) {
return a.PathSource > b.PathSource
}
return p2.Before(*p1)
})
f, err := os.Create(path)
if err != nil {
return err
}
// TODO(p): Splitting content to categories would be nice. Or tags.
return t.Execute(f, ordered)
}
func finalizeEntries(entries *map[string]*Entry, t *template.Template,
indexPath string, indexT *template.Template) {
2022-03-03 12:57:21 +01:00
// The initial render of a large amount of entries is resource-intensive.
var wg sync.WaitGroup
for name, e := range *entries {
e.backlinks = map[string]bool{}
2022-03-03 12:57:21 +01:00
if e.raw != nil {
continue
}
wg.Add(1)
go func(name string, e *Entry) {
defer wg.Done()
if err := renderEntry(name, e); err != nil {
log.Printf("%s: %s\n", name, err)
}
2022-03-03 12:57:21 +01:00
}(name, e)
}
2022-03-03 12:57:21 +01:00
wg.Wait()
for name, e := range *entries {
// Expand LinkWords anywhere between <tags>.
// We want something like the inverse of Regexp.ReplaceAllStringFunc.
raw, last, expanded := e.raw, 0, bytes.NewBuffer(nil)
for _, where := range tagRE.FindAllIndex(raw, -1) {
_, _ = expanded.Write(expand(entries, name, raw[last:where[0]]))
_, _ = expanded.Write(raw[where[0]:where[1]])
last = where[1]
}
_, _ = expanded.Write(expand(entries, name, raw[last:]))
e.Content = template.HTML(expanded.String())
}
for name, e := range *entries {
// Don't overwrite failed renders.
if e.raw == nil {
continue
}
if err := writeEntry(e, t, entries); err != nil {
log.Printf("%s: %s\n", name, err)
}
}
if err := writeIndex(indexPath, indexT, entries); err != nil {
log.Printf("%s: %s\n", indexPath, err)
}
}
type watchEvent struct {
path string // the path of the target
present bool // if not, the file has been removed
}
func dispatchEvents(dirname string, r io.Reader, ch chan<- *watchEvent) error {
var e syscall.InotifyEvent
for {
// FIXME(p): This has to respect the machine's endianness.
// Perhaps use the unsafe package.
err := binary.Read(r, binary.LittleEndian, &e)
if err == io.EOF {
return nil
}
if err != nil {
return err
}
base := make([]byte, e.Len)
if e.Len != 0 {
if n, err := r.Read(base); err != nil {
return err
} else if n < int(e.Len) {
return fmt.Errorf("short read")
}
}
switch {
case e.Mask&syscall.IN_IGNORED != 0:
return fmt.Errorf("watch removed by kernel")
case e.Mask&syscall.IN_Q_OVERFLOW != 0:
log.Println("inotify: queue overflowed")
ch <- nil
continue
case e.Len == 0:
continue
}
basename, interesting := string(base[:bytes.IndexByte(base, 0)]), false
for _, glob := range globs {
if matches, _ := filepath.Match(glob, basename); matches {
interesting = true
}
}
if !interesting {
continue
}
event := &watchEvent{path: filepath.Join(dirname, basename)}
if e.Mask&syscall.IN_MODIFY != 0 || e.Mask&syscall.IN_MOVED_TO != 0 ||
e.Mask&syscall.IN_CLOSE_WRITE != 0 {
event.present = true
ch <- event
}
if e.Mask&syscall.IN_DELETE != 0 || e.Mask&syscall.IN_MOVED_FROM != 0 {
event.present = false
ch <- event
}
}
}
func watchDirectory(dirname string) (<-chan *watchEvent, error) {
inotifyFD, err := syscall.InotifyInit1(0)
if err != nil {
return nil, err
}
// We're ignoring IN_CREATE, as it doesn't seem to be useful,
// and we're leaving out IN_MODIFY since VIM always triggers IN_CLOSE_WRITE,
// saving us from having to coalesce plentiful similar events.
_, err = syscall.InotifyAddWatch(inotifyFD, dirname, syscall.IN_ONLYDIR|
syscall.IN_MOVE|syscall.IN_DELETE|syscall.IN_CLOSE_WRITE)
if err != nil {
return nil, err
}
inotifyFile := os.NewFile(uintptr(inotifyFD), "inotify")
buf := make([]byte, syscall.SizeofInotifyEvent+syscall.PathMax+1)
ch := make(chan *watchEvent)
go func() {
// Trigger an initial rendering run.
ch <- nil
defer close(ch)
for {
n, err := inotifyFile.Read(buf)
if err != nil {
log.Println(err)
return
}
err = dispatchEvents(dirname, bytes.NewReader(buf[:n]), ch)
if err != nil {
log.Printf("inotify: %s\n", err)
return
}
}
}()
return ch, nil
}
var funcs = template.FuncMap{
"contains": func(needle string, haystack []string) bool {
for _, el := range haystack {
if el == needle {
return true
}
}
return false
},
}
func singleFile() {
html, meta, err := Render(os.Stdin, configuration.NewConfiguration())
if err != nil {
log.Println(err)
} else if meta.Title != "" {
_, _ = os.Stdout.WriteString("<h1>")
_ = xml.EscapeText(os.Stdout, []byte(meta.Title))
_, _ = os.Stdout.WriteString("</h1>\n")
}
_, _ = io.Copy(os.Stdout, html)
}
func main() {
if len(os.Args) < 2 {
singleFile()
return
}
if len(os.Args) != 4 {
log.Fatalf("usage: %s TEMPLATE INDEX DIRECTORY\n", os.Args[0])
}
argTemplate, argIndex, argDirectory := os.Args[1], os.Args[2], os.Args[3]
// Read a template for entries.
header, err := ioutil.ReadFile(argTemplate)
if err != nil {
log.Fatalln(err)
}
tmplEntry, err := template.New("entry").Funcs(funcs).Parse(string(header))
if err != nil {
log.Fatalln(err)
}
// Read a template for the index from the standard input.
index, err := ioutil.ReadAll(os.Stdin)
if err != nil {
log.Fatalln(err)
}
tmplIndex, err := template.New("index").Funcs(funcs).Parse(string(index))
if err != nil {
log.Fatalln(err)
}
// Re-render as needed, avoid having to trigger anything manually.
var entries map[string]*Entry
directoryWatch, err := watchDirectory(argDirectory)
if err != nil {
log.Fatalln(err)
}
signals := make(chan os.Signal, 1)
signal.Notify(signals, syscall.SIGINT, syscall.SIGHUP, syscall.SIGTERM)
for {
select {
case <-signals:
os.Exit(0)
case event, ok := <-directoryWatch:
if !ok {
os.Exit(1)
}
if event == nil {
log.Println("reloading all files")
if entries, err = loadEntries(argDirectory); err != nil {
log.Println(err)
}
} else if event.present {
log.Printf("updating %s\n", event.path)
entries[pathToName(event.path)] = makeEntry(event.path)
} else {
log.Printf("removing %s\n", event.path)
delete(entries, pathToName(event.path))
os.Remove(resultPath(event.path))
}
finalizeEntries(&entries, tmplEntry, argIndex, tmplIndex)
log.Println("done")
}
2020-08-15 03:08:02 +02:00
}
}