commit 04639942af2275574bb240e00bd9018fbb3eeca1
Author: Přemysl Janouch <p.janouch@gmail.com>
Date:   Sun Jul 10 14:35:33 2016 +0200

    Initial commit

diff --git a/assembler.go b/assembler.go
new file mode 100644
index 0000000..21bebba
--- /dev/null
+++ b/assembler.go
@@ -0,0 +1,314 @@
+package main
+
+import (
+	"errors"
+	"io"
+	"fmt"
+	"bufio"
+	"strings"
+	"strconv"
+)
+
+const (
+	WORD = iota          // [A-Za-z_-]+
+	NUMBER               // [0-9]+
+	NEWLINE              // \n
+	ERROR                // Error
+)
+
+type location struct {
+	line int
+	column int
+}
+
+type token struct {
+	location location    // Position of the token
+	value string         // Text content of the token
+	kind int             // Kind of the token
+}
+
+type tokenizer struct {
+	line int             // Current line
+	column int           // Current column
+
+	value []byte         // Current token string
+	reader *bufio.Reader // Reader
+	tokens chan<- token  // Token channel
+}
+
+// -----------------------------------------------------------------------------
+
+func isSpace(c byte) bool {
+	return c == ' ' || c == '\r' || c == '\t'
+}
+
+func isNumber(c byte) bool {
+	return c >= '0' && c <= '9'
+}
+
+func isWordHead(c byte) bool {
+	if c >= 'a' && c <= 'z' { c -= 32 }
+	return c >= 'a' && c <= 'z' || c == '_'
+}
+
+func isWordTail(c byte) bool {
+	return isWordHead(c) || isNumber(c)
+}
+
+// -----------------------------------------------------------------------------
+
+func (t *tokenizer) send(kind int) {
+	// FIXME: track the beginning of the token
+	t.tokens <- token{location{t.line, t.column}, string(t.value), kind}
+	t.value = []byte{}
+}
+
+func (t *tokenizer) peek() (byte, error) {
+	buf, err := t.reader.Peek(1)
+	return buf[0], err
+}
+
+func (t *tokenizer) eat() (byte, error) {
+	c, err := t.reader.ReadByte()
+	if err != nil { return 0, err }
+
+	if c == '\n' {
+		t.line++
+		t.column = 0
+	} else {
+		t.column++
+	}
+	return c, nil
+}
+
+// -----------------------------------------------------------------------------
+
+func (t *tokenizer) step() error {
+	t.value = []byte{}
+	c, err := t.peek()
+
+	if err == io.EOF { return nil }
+	if err != nil { return err }
+
+	switch {
+	case isSpace(c):
+		t.eat()
+	case c == '\n':
+		c, err = t.eat()
+		t.value = append(t.value, c)
+
+		t.send(NEWLINE)
+	case isNumber(c):
+		c, err = t.eat()
+		t.value = append(t.value, c)
+
+		for {
+			c, err = t.peek()
+			if err == io.EOF { break }
+			if err != nil { return err }
+
+			if !isNumber(c) { break }
+
+			c, _ = t.eat()
+			t.value = append(t.value, c)
+		}
+		t.send(NUMBER)
+	case isWordHead(c):
+		c, err = t.eat()
+		t.value = append(t.value, c)
+
+		for {
+			c, err = t.peek()
+			if err == io.EOF { break }
+			if err != nil { return err }
+
+			if !isWordTail(c) { break }
+
+			c, _ = t.eat()
+			t.value = append(t.value, c)
+		}
+		t.send(WORD)
+	case c == '/':
+		c, err = t.eat()
+		t.value = append(t.value, c)
+
+		c, err = t.peek()
+		if err != nil { return err }
+
+		if c != '/' {
+			return errors.New("unrecognized input")
+		}
+		for {
+			c, err = t.peek()
+			if err == io.EOF { break }
+			if err != nil { return err }
+
+			if c == '\n' { break }
+			t.eat()
+		}
+	default:
+		return errors.New("unrecognized input")
+	}
+	return nil
+}
+
+func tokenize(r io.Reader, tokens chan<- token) {
+	t := tokenizer{
+		line: 1,
+		column: 0,
+		tokens: tokens,
+		reader: bufio.NewReader(r),
+	}
+	for {
+		err := t.step()
+		if err == io.EOF {
+			break
+		}
+		if err != nil {
+			t.tokens <- token{location{t.line, t.column},
+				fmt.Sprintf("line %d, column %d: %s",
+					t.line, t.column, err.Error()), ERROR}
+			break
+		}
+	}
+	close(tokens)
+}
+
+// -----------------------------------------------------------------------------
+
+const (
+	IHALT = iota
+	IADD
+	ISUBTRACT
+	ISTORE
+	ILOAD
+	IBRANCH
+	IBRANCH_IF_ZERO
+	IBRANCH_IF_POSITIVE
+	IINPUT
+	IOUTUT
+	IDATA
+)
+
+var instructions = map[string]int {
+	"HLT": IHALT,
+	"COB": IHALT,
+	"ADD": IADD,
+	"SUB": ISUBTRACT,
+	"STA": ISTORE,
+	"LDA": ILOAD,
+	"BRA": IBRANCH,
+	"BRZ": IBRANCH_IF_ZERO,
+	"BRP": IBRANCH_IF_POSITIVE,
+	"INP": IINPUT,
+	"OUT": IOUTUT,
+	"DAT": IDATA,
+}
+
+type instruction struct {
+	id int
+	target string
+	number int
+}
+
+// -----------------------------------------------------------------------------
+
+type assembler struct {
+	tokens chan token
+	output []instruction
+	labels map[string]int
+}
+
+func (a *assembler) step() (bool, error) {
+	token, ok := <-a.tokens
+	if !ok { return false, nil }
+
+	// TODO: add token location information to returned errors
+
+	switch token.kind {
+	case WORD:
+		canonical := strings.ToUpper(token.value)
+		instr, found := instructions[canonical]
+
+		// Not found in the instruction list
+		// Assume it is a label
+		if !found {
+			if _, dup := a.labels[canonical]; dup {
+				return false, fmt.Errorf("Duplicate label: %s", canonical)
+			}
+			a.labels[canonical] = len(a.output)
+
+			token, ok = <-a.tokens
+			if !ok {
+				return false, errors.New("Unexpected end of file")
+			}
+			if token.kind != WORD {
+				return false, errors.New("Expected word")
+			}
+
+			// XXX: it might be better to classify this in the lexer
+			canonical = strings.ToUpper(token.value)
+			instr, found = instructions[canonical]
+		}
+
+		if !found {
+			return false, fmt.Errorf("Unknown instruction: %s", canonical)
+		}
+
+		instrHolder := instruction{id: instr}
+		token, ok := <-a.tokens
+		if !ok {
+			// This is fine, just assume zero
+			break
+		}
+
+		switch token.kind {
+		case WORD:
+			instrHolder.target = strings.ToUpper(token.value)
+		case NEWLINE:
+			// This is fine, just assume zero
+		case NUMBER:
+			instrHolder.number, _ = strconv.Atoi(token.value)
+		case ERROR:
+			return false, errors.New(token.value)
+		}
+		a.output = append(a.output, instrHolder)
+	case NEWLINE:
+		// Ignore empty lines
+	case NUMBER:
+		return false, errors.New("Unexpected number")
+	case ERROR:
+		return false, errors.New(token.value)
+	}
+	return true, nil
+}
+
+func Assemble(r io.Reader) (code []int16, err error) {
+	a := assembler{tokens: make(chan token)}
+	go tokenize(r, a.tokens)
+
+	for {
+		cont, err := a.step()
+		if err != nil {
+			return nil, err
+		}
+		if !cont {
+			break
+		}
+	}
+
+	for _, x := range(a.output) {
+		n := x.id * 100
+		if len(x.target) != 0 {
+			if resolved, ok := a.labels[x.target]; !ok {
+				return nil, errors.New("Unknown label")
+			} else {
+				n += resolved
+			}
+		} else {
+			n += x.number
+		}
+		code = append(code, int16(n))
+	}
+	return code, nil
+}
diff --git a/machine.go b/machine.go
new file mode 100644
index 0000000..a4b188d
--- /dev/null
+++ b/machine.go
@@ -0,0 +1,11 @@
+package main
+
+func Run(code []int16) {
+	// TODO: assert that the code is 100 boxes long
+
+	pc := 0
+	for pc < len(code) {
+	}
+
+	// TODO: throw an exception
+}
diff --git a/main.go b/main.go
new file mode 100644
index 0000000..c277d00
--- /dev/null
+++ b/main.go
@@ -0,0 +1,15 @@
+package main
+
+import (
+	"os"
+	"fmt"
+)
+
+func main() {
+	code, err := Assemble(os.Stdin)
+	if err != nil {
+		fmt.Printf("Assembly failed: %s", err)
+		os.Exit(1)
+	}
+	Run(code)
+}