commit 04639942af2275574bb240e00bd9018fbb3eeca1
Author: Přemysl Janouch
Date: Sun Jul 10 14:35:33 2016 +0200
Initial commit
diff --git a/assembler.go b/assembler.go
new file mode 100644
index 0000000..21bebba
--- /dev/null
+++ b/assembler.go
@@ -0,0 +1,314 @@
+package main
+
+import (
+ "errors"
+ "io"
+ "fmt"
+ "bufio"
+ "strings"
+ "strconv"
+)
+
+const (
+ WORD = iota // [A-Za-z_-]+
+ NUMBER // [0-9]+
+ NEWLINE // \n
+ ERROR // Error
+)
+
+type location struct {
+ line int
+ column int
+}
+
+type token struct {
+ location location // Position of the token
+ value string // Text content of the token
+ kind int // Kind of the token
+}
+
+type tokenizer struct {
+ line int // Current line
+ column int // Current column
+
+ value []byte // Current token string
+ reader *bufio.Reader // Reader
+ tokens chan<- token // Token channel
+}
+
+// -----------------------------------------------------------------------------
+
+func isSpace(c byte) bool {
+ return c == ' ' || c == '\r' || c == '\t'
+}
+
+func isNumber(c byte) bool {
+ return c >= '0' && c <= '9'
+}
+
+func isWordHead(c byte) bool {
+ if c >= 'a' && c <= 'z' { c -= 32 }
+ return c >= 'a' && c <= 'z' || c == '_'
+}
+
+func isWordTail(c byte) bool {
+ return isWordHead(c) || isNumber(c)
+}
+
+// -----------------------------------------------------------------------------
+
+func (t *tokenizer) send(kind int) {
+ // FIXME: track the beginning of the token
+ t.tokens <- token{location{t.line, t.column}, string(t.value), kind}
+ t.value = []byte{}
+}
+
+func (t *tokenizer) peek() (byte, error) {
+ buf, err := t.reader.Peek(1)
+ return buf[0], err
+}
+
+func (t *tokenizer) eat() (byte, error) {
+ c, err := t.reader.ReadByte()
+ if err != nil { return 0, err }
+
+ if c == '\n' {
+ t.line++
+ t.column = 0
+ } else {
+ t.column++
+ }
+ return c, nil
+}
+
+// -----------------------------------------------------------------------------
+
+func (t *tokenizer) step() error {
+ t.value = []byte{}
+ c, err := t.peek()
+
+ if err == io.EOF { return nil }
+ if err != nil { return err }
+
+ switch {
+ case isSpace(c):
+ t.eat()
+ case c == '\n':
+ c, err = t.eat()
+ t.value = append(t.value, c)
+
+ t.send(NEWLINE)
+ case isNumber(c):
+ c, err = t.eat()
+ t.value = append(t.value, c)
+
+ for {
+ c, err = t.peek()
+ if err == io.EOF { break }
+ if err != nil { return err }
+
+ if !isNumber(c) { break }
+
+ c, _ = t.eat()
+ t.value = append(t.value, c)
+ }
+ t.send(NUMBER)
+ case isWordHead(c):
+ c, err = t.eat()
+ t.value = append(t.value, c)
+
+ for {
+ c, err = t.peek()
+ if err == io.EOF { break }
+ if err != nil { return err }
+
+ if !isWordTail(c) { break }
+
+ c, _ = t.eat()
+ t.value = append(t.value, c)
+ }
+ t.send(WORD)
+ case c == '/':
+ c, err = t.eat()
+ t.value = append(t.value, c)
+
+ c, err = t.peek()
+ if err != nil { return err }
+
+ if c != '/' {
+ return errors.New("unrecognized input")
+ }
+ for {
+ c, err = t.peek()
+ if err == io.EOF { break }
+ if err != nil { return err }
+
+ if c == '\n' { break }
+ t.eat()
+ }
+ default:
+ return errors.New("unrecognized input")
+ }
+ return nil
+}
+
+func tokenize(r io.Reader, tokens chan<- token) {
+ t := tokenizer{
+ line: 1,
+ column: 0,
+ tokens: tokens,
+ reader: bufio.NewReader(r),
+ }
+ for {
+ err := t.step()
+ if err == io.EOF {
+ break
+ }
+ if err != nil {
+ t.tokens <- token{location{t.line, t.column},
+ fmt.Sprintf("line %d, column %d: %s",
+ t.line, t.column, err.Error()), ERROR}
+ break
+ }
+ }
+ close(tokens)
+}
+
+// -----------------------------------------------------------------------------
+
+const (
+ IHALT = iota
+ IADD
+ ISUBTRACT
+ ISTORE
+ ILOAD
+ IBRANCH
+ IBRANCH_IF_ZERO
+ IBRANCH_IF_POSITIVE
+ IINPUT
+ IOUTUT
+ IDATA
+)
+
+var instructions = map[string]int {
+ "HLT": IHALT,
+ "COB": IHALT,
+ "ADD": IADD,
+ "SUB": ISUBTRACT,
+ "STA": ISTORE,
+ "LDA": ILOAD,
+ "BRA": IBRANCH,
+ "BRZ": IBRANCH_IF_ZERO,
+ "BRP": IBRANCH_IF_POSITIVE,
+ "INP": IINPUT,
+ "OUT": IOUTUT,
+ "DAT": IDATA,
+}
+
+type instruction struct {
+ id int
+ target string
+ number int
+}
+
+// -----------------------------------------------------------------------------
+
+type assembler struct {
+ tokens chan token
+ output []instruction
+ labels map[string]int
+}
+
+func (a *assembler) step() (bool, error) {
+ token, ok := <-a.tokens
+ if !ok { return false, nil }
+
+ // TODO: add token location information to returned errors
+
+ switch token.kind {
+ case WORD:
+ canonical := strings.ToUpper(token.value)
+ instr, found := instructions[canonical]
+
+ // Not found in the instruction list
+ // Assume it is a label
+ if !found {
+ if _, dup := a.labels[canonical]; dup {
+ return false, fmt.Errorf("Duplicate label: %s", canonical)
+ }
+ a.labels[canonical] = len(a.output)
+
+ token, ok = <-a.tokens
+ if !ok {
+ return false, errors.New("Unexpected end of file")
+ }
+ if token.kind != WORD {
+ return false, errors.New("Expected word")
+ }
+
+ // XXX: it might be better to classify this in the lexer
+ canonical = strings.ToUpper(token.value)
+ instr, found = instructions[canonical]
+ }
+
+ if !found {
+ return false, fmt.Errorf("Unknown instruction: %s", canonical)
+ }
+
+ instrHolder := instruction{id: instr}
+ token, ok := <-a.tokens
+ if !ok {
+ // This is fine, just assume zero
+ break
+ }
+
+ switch token.kind {
+ case WORD:
+ instrHolder.target = strings.ToUpper(token.value)
+ case NEWLINE:
+ // This is fine, just assume zero
+ case NUMBER:
+ instrHolder.number, _ = strconv.Atoi(token.value)
+ case ERROR:
+ return false, errors.New(token.value)
+ }
+ a.output = append(a.output, instrHolder)
+ case NEWLINE:
+ // Ignore empty lines
+ case NUMBER:
+ return false, errors.New("Unexpected number")
+ case ERROR:
+ return false, errors.New(token.value)
+ }
+ return true, nil
+}
+
+func Assemble(r io.Reader) (code []int16, err error) {
+ a := assembler{tokens: make(chan token)}
+ go tokenize(r, a.tokens)
+
+ for {
+ cont, err := a.step()
+ if err != nil {
+ return nil, err
+ }
+ if !cont {
+ break
+ }
+ }
+
+ for _, x := range(a.output) {
+ n := x.id * 100
+ if len(x.target) != 0 {
+ if resolved, ok := a.labels[x.target]; !ok {
+ return nil, errors.New("Unknown label")
+ } else {
+ n += resolved
+ }
+ } else {
+ n += x.number
+ }
+ code = append(code, int16(n))
+ }
+ return code, nil
+}
diff --git a/machine.go b/machine.go
new file mode 100644
index 0000000..a4b188d
--- /dev/null
+++ b/machine.go
@@ -0,0 +1,11 @@
+package main
+
+func Run(code []int16) {
+ // TODO: assert that the code is 100 boxes long
+
+ pc := 0
+ for pc < len(code) {
+ }
+
+ // TODO: throw an exception
+}
diff --git a/main.go b/main.go
new file mode 100644
index 0000000..c277d00
--- /dev/null
+++ b/main.go
@@ -0,0 +1,15 @@
+package main
+
+import (
+ "os"
+ "fmt"
+)
+
+func main() {
+ code, err := Assemble(os.Stdin)
+ if err != nil {
+ fmt.Printf("Assembly failed: %s", err)
+ os.Exit(1)
+ }
+ Run(code)
+}