344 lines
6.7 KiB
Go
344 lines
6.7 KiB
Go
package main
|
|
|
|
import (
|
|
"bufio"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"strconv"
|
|
"strings"
|
|
)
|
|
|
|
const (
|
|
WORD = iota // [A-Za-z_-]+
|
|
INSTRUCTION // Instruction word
|
|
NUMBER // [0-9]+
|
|
NEWLINE // \n
|
|
ERROR // Error
|
|
)
|
|
|
|
type location struct {
|
|
line int
|
|
column int
|
|
}
|
|
|
|
type token struct {
|
|
location location // Position of the token
|
|
value string // Text content of the token
|
|
instruction int // INSTRUCTION ID
|
|
kind int // Kind of the token
|
|
}
|
|
|
|
type tokenizer struct {
|
|
location location // Current position
|
|
value []byte // Current token string
|
|
reader *bufio.Reader // Reader
|
|
tokens chan<- token // Output token channel
|
|
}
|
|
|
|
// -----------------------------------------------------------------------------
|
|
|
|
func isSpace(c byte) bool {
|
|
return c == ' ' || c == '\r' || c == '\t'
|
|
}
|
|
|
|
func isNumber(c byte) bool {
|
|
return c >= '0' && c <= '9'
|
|
}
|
|
|
|
func isWordHead(c byte) bool {
|
|
if c >= 'a' && c <= 'z' {
|
|
c -= 32
|
|
}
|
|
return c >= 'A' && c <= 'Z' || c == '_'
|
|
}
|
|
|
|
func isWordTail(c byte) bool {
|
|
return isWordHead(c) || isNumber(c)
|
|
}
|
|
|
|
// -----------------------------------------------------------------------------
|
|
|
|
const (
|
|
IHALT = iota * 100
|
|
IADD
|
|
ISUBTRACT
|
|
ISTORE
|
|
_
|
|
ILOAD
|
|
IBRANCH
|
|
IBRANCH_IF_ZERO
|
|
IBRANCH_IF_POSITIVE
|
|
IIO
|
|
)
|
|
|
|
const (
|
|
_ = iota
|
|
IO_INPUT
|
|
IO_OUTPUT
|
|
)
|
|
|
|
var instructions = map[string]int{
|
|
"HLT": IHALT,
|
|
"COB": IHALT,
|
|
"ADD": IADD,
|
|
"SUB": ISUBTRACT,
|
|
"STA": ISTORE,
|
|
"LDA": ILOAD,
|
|
"BRA": IBRANCH,
|
|
"BRZ": IBRANCH_IF_ZERO,
|
|
"BRP": IBRANCH_IF_POSITIVE,
|
|
"INP": IIO + IO_INPUT,
|
|
"OUT": IIO + IO_OUTPUT,
|
|
"DAT": 0,
|
|
}
|
|
|
|
// -----------------------------------------------------------------------------
|
|
|
|
func (t *tokenizer) send(start location, kind int) {
|
|
tok := token{start, strings.ToUpper(string(t.value)), 0, kind}
|
|
if kind == WORD {
|
|
if instr, found := instructions[tok.value]; found {
|
|
tok.kind = INSTRUCTION
|
|
tok.instruction = instr
|
|
}
|
|
}
|
|
t.tokens <- tok
|
|
t.value = []byte{}
|
|
}
|
|
|
|
// XXX: the handling could probably be simplified by extending the "byte"
|
|
// to also include a special out-of-band value for errors
|
|
func (t *tokenizer) peek() (byte, error) {
|
|
if buf, err := t.reader.Peek(1); err != nil {
|
|
return '?', err
|
|
} else {
|
|
return buf[0], nil
|
|
}
|
|
}
|
|
|
|
func (t *tokenizer) eat() (byte, error) {
|
|
c, err := t.reader.ReadByte()
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
|
|
if c == '\n' {
|
|
t.location.line++
|
|
t.location.column = 1
|
|
} else {
|
|
t.location.column++
|
|
}
|
|
return c, nil
|
|
}
|
|
|
|
// -----------------------------------------------------------------------------
|
|
|
|
func (t *tokenizer) step() error {
|
|
start := t.location
|
|
t.value = []byte{}
|
|
|
|
c, err := t.peek()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
switch {
|
|
case isSpace(c):
|
|
c, err = t.eat()
|
|
case c == '\n':
|
|
c, err = t.eat()
|
|
t.value = append(t.value, c)
|
|
|
|
t.send(start, NEWLINE)
|
|
case isNumber(c):
|
|
for isNumber(c) {
|
|
c, err = t.eat()
|
|
t.value = append(t.value, c)
|
|
|
|
c, err = t.peek()
|
|
if err == io.EOF {
|
|
break
|
|
}
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
t.send(start, NUMBER)
|
|
case isWordHead(c):
|
|
for isWordTail(c) {
|
|
c, err = t.eat()
|
|
t.value = append(t.value, c)
|
|
|
|
c, err = t.peek()
|
|
if err == io.EOF {
|
|
break
|
|
}
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
t.send(start, WORD)
|
|
case c == '/':
|
|
c, err = t.eat()
|
|
c, err = t.peek()
|
|
if err == io.EOF {
|
|
return errors.New("unexpected EOF")
|
|
}
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if c != '/' {
|
|
return errors.New(fmt.Sprintf("unrecognized input: '%c'", c))
|
|
}
|
|
for c != '\n' {
|
|
c, err = t.eat()
|
|
c, err = t.peek()
|
|
if err == io.EOF {
|
|
break
|
|
}
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
default:
|
|
return errors.New(fmt.Sprintf("unrecognized input: '%c'", c))
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func tokenize(r io.Reader, tokens chan<- token) {
|
|
t := tokenizer{
|
|
location: location{line: 1, column: 1},
|
|
tokens: tokens,
|
|
reader: bufio.NewReader(r),
|
|
}
|
|
for {
|
|
if err := t.step(); err == io.EOF {
|
|
break
|
|
} else if err != nil {
|
|
t.tokens <- token{t.location, err.Error(), 0, ERROR}
|
|
break
|
|
}
|
|
}
|
|
close(tokens)
|
|
}
|
|
|
|
// -----------------------------------------------------------------------------
|
|
|
|
type instruction struct {
|
|
id int // What instruction this is
|
|
target string // Label name
|
|
number int // Immediate value
|
|
}
|
|
|
|
type assembler struct {
|
|
tokens chan token // Where tokens come from
|
|
output []instruction // The assembled program
|
|
labels map[string]int // Addresses of labels
|
|
}
|
|
|
|
func (a *assembler) step() (bool, error) {
|
|
token, ok := <-a.tokens
|
|
if !ok {
|
|
return false, nil
|
|
}
|
|
|
|
mkerr := func(format string, a ...interface{}) error {
|
|
prefix := fmt.Sprintf("line %d, column %d: ",
|
|
token.location.line, token.location.column)
|
|
return errors.New(prefix + fmt.Sprintf(format, a...))
|
|
}
|
|
switch token.kind {
|
|
case WORD:
|
|
if _, dup := a.labels[token.value]; dup {
|
|
return false, mkerr("duplicate label: %s", token.value)
|
|
}
|
|
a.labels[token.value] = len(a.output)
|
|
|
|
if token, ok = <-a.tokens; !ok {
|
|
return false, mkerr("unexpected end of file")
|
|
}
|
|
if token.kind != INSTRUCTION {
|
|
return false, mkerr("expected instruction name after label")
|
|
}
|
|
fallthrough
|
|
case INSTRUCTION:
|
|
instrHolder := instruction{id: token.instruction}
|
|
|
|
token, ok := <-a.tokens
|
|
eol := false
|
|
switch {
|
|
case token.kind == WORD:
|
|
instrHolder.target = strings.ToUpper(token.value)
|
|
case token.kind == NUMBER:
|
|
// TODO: we should check the number
|
|
instrHolder.number, _ = strconv.Atoi(token.value)
|
|
case token.kind == ERROR:
|
|
return false, errors.New(token.value)
|
|
case !ok:
|
|
fallthrough
|
|
case token.kind == NEWLINE:
|
|
// This is fine, just assume zero
|
|
eol = true
|
|
}
|
|
a.output = append(a.output, instrHolder)
|
|
|
|
if !eol {
|
|
token, ok := <-a.tokens
|
|
switch {
|
|
case !ok:
|
|
case token.kind == NEWLINE:
|
|
case token.kind == ERROR:
|
|
return false, mkerr("%s", token.value)
|
|
default:
|
|
return false, mkerr("expected end of line")
|
|
}
|
|
}
|
|
case NEWLINE:
|
|
// Ignore empty lines
|
|
case NUMBER:
|
|
return false, mkerr("unexpected number")
|
|
case ERROR:
|
|
return false, mkerr("%s", token.value)
|
|
}
|
|
return true, nil
|
|
}
|
|
|
|
func Assemble(r io.Reader) (code []int16, err error) {
|
|
a := assembler{tokens: make(chan token), labels: make(map[string]int)}
|
|
go tokenize(r, a.tokens)
|
|
|
|
for {
|
|
if cont, err := a.step(); err != nil {
|
|
return nil, err
|
|
} else if !cont {
|
|
break
|
|
}
|
|
}
|
|
|
|
code = make([]int16, 100)
|
|
for i, x := range a.output {
|
|
if i >= len(code) {
|
|
return nil, errors.New("program too long")
|
|
}
|
|
n := x.id
|
|
switch {
|
|
case x.id%100 != 0:
|
|
// TODO: we could complain that arguments aren't allowed
|
|
case len(x.target) != 0:
|
|
// Resolve targets to code locations
|
|
if resolved, ok := a.labels[x.target]; !ok {
|
|
return nil, errors.New("unknown label")
|
|
} else {
|
|
n += resolved
|
|
}
|
|
default:
|
|
n += x.number
|
|
}
|
|
code[i] = int16(n)
|
|
}
|
|
return code, nil
|
|
}
|