haven/hswg/main.go

// Program hswg is a static website generator employing libasciidoc with added
// support for two-line/underlined titles, and postprocessing "wiki" InterLinks.
package main

import (
	"bytes"
	"encoding/xml"
	"fmt"
	"html/template"
	"io"
	"io/ioutil"
	"log"
	"os"
	"path/filepath"
	"regexp"
	"sort"
	"strings"
	"time"
	"unicode"
	"unicode/utf8"

	"github.com/bytesparadise/libasciidoc/pkg/configuration"
	"github.com/bytesparadise/libasciidoc/pkg/parser"
	"github.com/bytesparadise/libasciidoc/pkg/renderer"
	"github.com/bytesparadise/libasciidoc/pkg/renderer/sgml/html5"
	"github.com/bytesparadise/libasciidoc/pkg/types"
	"github.com/bytesparadise/libasciidoc/pkg/validator"
)

// isTitle returns the title level if the lines seem to form a title,
// zero otherwise. Input lines may inclide trailing newlines.
func isTitle(line1, line2 []byte) int {
	// This is a very naïve method, we should target graphemes (thus at least
	// NFC normalize the lines first) and account for wide characters.
	diff := utf8.RuneCount(line1) - utf8.RuneCount(line2)
	if len(line2) < 2 || diff < -1 || diff > 1 {
		return 0
	}

	// "Don't be fooled by back-to-back delimited blocks."
	// Still gets fooled by other things, though.
	if bytes.IndexFunc(line1, func(r rune) bool {
		return unicode.IsLetter(r) || unicode.IsNumber(r)
	}) < 0 {
		return 0
	}

	// The underline must be homogenous.
	for _, r := range bytes.TrimRight(line2, "\r\n") {
		if r != line2[0] {
			return 0
		}
	}
	return 1 + strings.IndexByte("=-~^+", line2[0])
}

func writeLine(w *io.PipeWriter, cur, next []byte) []byte {
	if level := isTitle(cur, next); level > 0 {
		w.Write(append(bytes.Repeat([]byte{'='}, level), ' '))
		next = nil
	}
	w.Write(cur)
	return next
}

// ConvertTitles converts AsciiDoc two-line (underlined) titles to single-line.
func ConvertTitles(w *io.PipeWriter, input []byte) {
	var last []byte
	for _, cur := range bytes.SplitAfter(input, []byte{'\n'}) {
		last = writeLine(w, last, cur)
	}
	writeLine(w, last, nil)
}

// Metadata contains select metadata about a rendered document.
type Metadata struct {
	types.Metadata

	// Note that this includes entries from the front-matter
	// (see parser.ApplySubstitutions <- parser.ParseDocument).
	Attributes types.Attributes
}

// IsDraft returns whether the document is marked as a draft, and should not
// be linked anywhere else.
func (m *Metadata) IsDraft() bool { return m.Attributes.Has("draft") }

// Render converts an io.Reader with an AsciiDoc document to HTML. So long as
// the file could be read at all, it will always return a non-empty document.
func Render(r io.Reader, config configuration.Configuration) (
	html *bytes.Buffer, meta Metadata, err error) {
	html = bytes.NewBuffer(nil)

	var input []byte
	if input, err = ioutil.ReadAll(r); err != nil {
		return
	}

	pr, pw := io.Pipe()
	go func() {
		defer pw.Close()
		ConvertTitles(pw, input)
	}()

	// io.Copy(os.Stdout, pr)
	// return

	var doc types.Document
	if doc, err = parser.ParseDocument(pr, config); err == nil {
		problems, err := validator.Validate(&doc)
		if err != nil {
			fmt.Fprintln(os.Stderr, err)
		}
		for _, problem := range problems {
			fmt.Fprintln(os.Stderr, problem.Message)
		}
		ctx := renderer.NewContext(doc, config)
		meta.Metadata, err = html5.Render(ctx, doc, html)
	}
	if err != nil {
		// Fallback: output all the text sanitized for direct inclusion.
		html.Reset()

		_, _ = html.WriteString("<pre>")
		for _, line := range bytes.Split(input, []byte{'\n'}) {
			_ = xml.EscapeText(html, line)
			_, _ = html.WriteString("\n")
		}
		_, _ = html.WriteString("</pre>")
	}
	meta.Attributes = doc.Attributes
	return
}

// Entry contains all context information about a single page.
type Entry struct {
	Metadata                        // metadata
	PathSource      string          // path to source AsciiDoc
	PathDestination string          // path to destination HTML
	mtime           time.Time       // modification time
	raw             []byte          // raw inner document
	Content         template.HTML   // inner document with expanded LinkWords
	backlinks       map[string]bool // what documents link back here
	Backlinks       []template.HTML
}

// Published returns the date when the entry was published, or nil if unknown.
func (e *Entry) Published() *time.Time {
	if d, _, err := e.Attributes.GetAsString("date"); err != nil {
		return nil
	} else if t, err := time.Parse(time.RFC3339, d); err == nil {
		return &t
	} else if t, err := time.Parse("2006-01-02", d); err == nil {
		return &t
	} else {
		return nil
	}
}

var extRE = regexp.MustCompile(`\.[^/.]*$`)

func stripExtension(path string) string {
	return extRE.ReplaceAllString(path, "")
}

func resultPath(path string) string {
	if m := extRE.FindStringIndex(path); m != nil {
		return path[:m[0]] + ".html"
	}
	return path + ".html"
}

func makeLink(m *map[string]*Entry, name string) string {
	e := (*m)[name]
	return fmt.Sprintf("<a href='%s'>%s</a>", e.PathDestination, name)
}

var linkWordRE = regexp.MustCompile(`\b\p{Lu}\p{L}*\b`)

func expand(m *map[string]*Entry, name string, chunk []byte) []byte {
	return linkWordRE.ReplaceAllFunc(chunk, func(match []byte) []byte {
		if link, ok := (*m)[string(match)]; ok && string(match) != name &&
			!link.IsDraft() {
			link.backlinks[name] = true
			return []byte(makeLink(m, string(match)))
		}
		return match
	})
}

var tagRE = regexp.MustCompile(`<[^<>]+>`)

func renderEntry(name string, e *Entry) error {
	f, err := os.Open(e.PathSource)
	if err != nil {
		return err
	}

	if i, err := f.Stat(); err != nil {
		return err
	} else {
		e.mtime = i.ModTime()
	}

	var html *bytes.Buffer
	if html, e.Metadata, err = Render(f, configuration.NewConfiguration(
		configuration.WithFilename(e.PathSource),
		configuration.WithLastUpdated(e.mtime),
	)); err != nil {
		return err
	}

	// Every page needs to have a title.
	if e.Title == "" {
		e.Title = name
	}

	e.raw = html.Bytes()
	return nil
}

func loadEntries(globs []string) (map[string]*Entry, error) {
	// Create a map from document names to their page entries.
	entries := map[string]*Entry{}
	for _, glob := range globs {
		matches, err := filepath.Glob(glob)
		if err != nil {
			return nil, fmt.Errorf("%s: %s\n", glob, err)
		}
		for _, path := range matches {
			name := stripExtension(filepath.Base(path))
			if conflict, ok := entries[name]; ok {
				return nil, fmt.Errorf("%s: conflicts with %s\n",
					name, conflict.PathSource)
			}
			entries[name] = &Entry{
				PathSource:      path,
				PathDestination: resultPath(path),
				backlinks:       map[string]bool{},
			}
		}
	}

	for name, e := range entries {
		if err := renderEntry(name, e); err != nil {
			return nil, err
		}
	}
	return entries, nil
}

func writeEntry(e *Entry, t *template.Template,
	entries *map[string]*Entry) error {
	f, err := os.Create(e.PathDestination)
	if err != nil {
		return err
	}

	backlinks := []string{}
	for name := range e.backlinks {
		backlinks = append(backlinks, name)
	}
	sort.Strings(backlinks)
	for _, name := range backlinks {
		e.Backlinks =
			append(e.Backlinks, template.HTML(makeLink(entries, name)))
	}

	return t.Execute(f, e)
}

func finalizeEntries(entries *map[string]*Entry) {
	for name, e := range *entries {
		// Expand LinkWords anywhere between <tags>.
		// We want something like the inverse of Regexp.ReplaceAllStringFunc.
		raw, last, expanded := e.raw, 0, bytes.NewBuffer(nil)
		for _, where := range tagRE.FindAllIndex(raw, -1) {
			_, _ = expanded.Write(expand(entries, name, raw[last:where[0]]))
			_, _ = expanded.Write(raw[where[0]:where[1]])
			last = where[1]
		}
		_, _ = expanded.Write(expand(entries, name, raw[last:]))
		e.Content = template.HTML(expanded.String())
	}
}

func writeIndex(t *template.Template, entries *map[string]*Entry) error {
	// Reorder entries reversely, primarily by date, secondarily by filename.
	ordered := []*Entry{}
	for _, e := range *entries {
		ordered = append(ordered, e)
	}

	sort.Slice(ordered, func(i, j int) bool {
		a, b := ordered[i], ordered[j]
		p1, p2 := a.Published(), b.Published()
		if p1 == nil && p2 != nil {
			return true
		}
		if p1 == nil && p2 == nil {
			return a.PathSource > b.PathSource
		}
		if p2 == nil {
			return false
		}
		if p1.Equal(*p2) {
			return a.PathSource > b.PathSource
		}
		return p2.Before(*p1)
	})

	// TODO(p): Splitting content to categories would be nice.
	return t.Execute(os.Stdout, ordered)
}

func singleFile() {
	html, meta, err := Render(os.Stdin, configuration.NewConfiguration())
	if err != nil {
		log.Println(err)
	} else if meta.Title != "" {
		_, _ = os.Stdout.WriteString("<h1>")
		_ = xml.EscapeText(os.Stdout, []byte(meta.Title))
		_, _ = os.Stdout.WriteString("</h1>\n")
	}
	_, _ = io.Copy(os.Stdout, html)
}

func main() {
	if len(os.Args) < 2 {
		singleFile()
		return
	}
	if len(os.Args) < 3 {
		log.Fatalf("usage: %s TEMPLATE GLOB...\n", os.Args[0])
	}

	// Read the common page header.
	header, err := ioutil.ReadFile(os.Args[1])
	if err != nil {
		log.Fatalln(err)
	}
	t, err := template.New("page").Parse(string(header))
	if err != nil {
		log.Fatalln(err)
	}

	// Process all entries.
	entries, err := loadEntries(os.Args[2:])
	if err != nil {
		log.Fatalln(err)
	}

	finalizeEntries(&entries)
	for _, e := range entries {
		if err := writeEntry(e, t, &entries); err != nil {
			log.Fatalln(err)
		}
	}

	// Read a template from the standard input, write an index.
	var input []byte
	if input, err = ioutil.ReadAll(os.Stdin); err != nil {
		log.Fatalln(err)
	}
	t, err = template.New("-").Parse(string(input))
	if err != nil {
		log.Fatalln(err)
	}
	if err := writeIndex(t, &entries); err != nil {
		log.Fatalln(err)
	}
}