
344 lines
6.7 KiB
Raw Normal View History

2016-07-10 14:35:33 +02:00
package main
import (
2016-07-10 14:35:33 +02:00
2016-07-10 14:35:33 +02:00
2016-07-10 14:35:33 +02:00
const (
2016-10-20 01:31:15 +02:00
WORD = iota // [A-Za-z_-]+
INSTRUCTION // Instruction word
NUMBER // [0-9]+
ERROR // Error
2016-07-10 14:35:33 +02:00
type location struct {
line int
2016-07-10 14:35:33 +02:00
column int
type token struct {
2016-10-20 01:31:15 +02:00
location location // Position of the token
value string // Text content of the token
instruction int // INSTRUCTION ID
kind int // Kind of the token
2016-07-10 14:35:33 +02:00
type tokenizer struct {
location location // Current position
value []byte // Current token string
reader *bufio.Reader // Reader
tokens chan<- token // Output token channel
2016-07-10 14:35:33 +02:00
// -----------------------------------------------------------------------------
func isSpace(c byte) bool {
return c == ' ' || c == '\r' || c == '\t'
func isNumber(c byte) bool {
return c >= '0' && c <= '9'
func isWordHead(c byte) bool {
if c >= 'a' && c <= 'z' {
c -= 32
2016-10-20 00:12:24 +02:00
return c >= 'A' && c <= 'Z' || c == '_'
2016-07-10 14:35:33 +02:00
func isWordTail(c byte) bool {
return isWordHead(c) || isNumber(c)
// -----------------------------------------------------------------------------
2016-10-20 01:31:15 +02:00
const (
IHALT = iota * 100
const (
_ = iota
var instructions = map[string]int{
"DAT": 0,
// -----------------------------------------------------------------------------
func (t *tokenizer) send(start location, kind int) {
2016-10-20 01:31:15 +02:00
tok := token{start, strings.ToUpper(string(t.value)), 0, kind}
if kind == WORD {
if instr, found := instructions[tok.value]; found {
tok.kind = INSTRUCTION
tok.instruction = instr
t.tokens <- tok
2016-07-10 14:35:33 +02:00
t.value = []byte{}
// XXX: the handling could probably be simplified by extending the "byte"
2016-10-20 00:12:24 +02:00
// to also include a special out-of-band value for errors
2016-07-10 14:35:33 +02:00
func (t *tokenizer) peek() (byte, error) {
2016-10-20 01:31:15 +02:00
if buf, err := t.reader.Peek(1); err != nil {
2016-10-20 00:12:24 +02:00
return '?', err
2016-10-20 01:31:15 +02:00
} else {
return buf[0], nil
2016-10-20 00:12:24 +02:00
2016-07-10 14:35:33 +02:00
func (t *tokenizer) eat() (byte, error) {
c, err := t.reader.ReadByte()
if err != nil {
return 0, err
2016-07-10 14:35:33 +02:00
if c == '\n' {
2016-10-20 01:47:07 +02:00
t.location.column = 1
2016-07-10 14:35:33 +02:00
} else {
2016-07-10 14:35:33 +02:00
return c, nil
// -----------------------------------------------------------------------------
func (t *tokenizer) step() error {
2016-10-20 00:12:24 +02:00
start := t.location
2016-07-10 14:35:33 +02:00
t.value = []byte{}
2016-10-20 00:12:24 +02:00
c, err := t.peek()
if err != nil {
return err
2016-07-10 14:35:33 +02:00
switch {
case isSpace(c):
c, err = t.eat()
2016-07-10 14:35:33 +02:00
case c == '\n':
c, err = t.eat()
t.value = append(t.value, c)
t.send(start, NEWLINE)
2016-07-10 14:35:33 +02:00
case isNumber(c):
2016-10-20 00:12:24 +02:00
for isNumber(c) {
c, err = t.eat()
t.value = append(t.value, c)
2016-07-10 14:35:33 +02:00
c, err = t.peek()
if err == io.EOF {
if err != nil {
return err
2016-07-10 14:35:33 +02:00
t.send(start, NUMBER)
2016-07-10 14:35:33 +02:00
case isWordHead(c):
2016-10-20 00:12:24 +02:00
for isWordTail(c) {
c, err = t.eat()
t.value = append(t.value, c)
2016-07-10 14:35:33 +02:00
c, err = t.peek()
if err == io.EOF {
if err != nil {
return err
2016-07-10 14:35:33 +02:00
t.send(start, WORD)
2016-07-10 14:35:33 +02:00
case c == '/':
c, err = t.eat()
c, err = t.peek()
2016-10-20 00:12:24 +02:00
if err == io.EOF {
return errors.New("unexpected EOF")
if err != nil {
return err
2016-07-10 14:35:33 +02:00
if c != '/' {
2016-10-20 00:12:24 +02:00
return errors.New(fmt.Sprintf("unrecognized input: '%c'", c))
2016-07-10 14:35:33 +02:00
2016-10-20 00:12:24 +02:00
for c != '\n' {
c, err = t.eat()
2016-07-10 14:35:33 +02:00
c, err = t.peek()
if err == io.EOF {
if err != nil {
return err
2016-07-10 14:35:33 +02:00
2016-10-20 00:12:24 +02:00
return errors.New(fmt.Sprintf("unrecognized input: '%c'", c))
2016-07-10 14:35:33 +02:00
return nil
func tokenize(r io.Reader, tokens chan<- token) {
t := tokenizer{
2016-10-20 01:47:07 +02:00
location: location{line: 1, column: 1},
tokens: tokens,
reader: bufio.NewReader(r),
2016-07-10 14:35:33 +02:00
for {
2016-10-20 01:31:15 +02:00
if err := t.step(); err == io.EOF {
2016-07-10 14:35:33 +02:00
2016-10-20 01:31:15 +02:00
} else if err != nil {
2016-10-20 01:47:07 +02:00
t.tokens <- token{t.location, err.Error(), 0, ERROR}
2016-07-10 14:35:33 +02:00
// -----------------------------------------------------------------------------
type instruction struct {
2016-10-20 00:12:24 +02:00
id int // What instruction this is
target string // Label name
number int // Immediate value
2016-07-10 14:35:33 +02:00
type assembler struct {
2016-10-20 00:12:24 +02:00
tokens chan token // Where tokens come from
output []instruction // The assembled program
labels map[string]int // Addresses of labels
2016-07-10 14:35:33 +02:00
func (a *assembler) step() (bool, error) {
token, ok := <-a.tokens
if !ok {
return false, nil
2016-07-10 14:35:33 +02:00
2016-10-20 01:47:07 +02:00
mkerr := func(format string, a ...interface{}) error {
prefix := fmt.Sprintf("line %d, column %d: ",
token.location.line, token.location.column)
return errors.New(prefix + fmt.Sprintf(format, a...))
2016-07-10 14:35:33 +02:00
switch token.kind {
case WORD:
2016-10-20 01:31:15 +02:00
if _, dup := a.labels[token.value]; dup {
2016-10-20 01:47:07 +02:00
return false, mkerr("duplicate label: %s", token.value)
2016-10-20 01:31:15 +02:00
a.labels[token.value] = len(a.output)
2016-07-10 14:35:33 +02:00
2016-10-20 01:31:15 +02:00
if token, ok = <-a.tokens; !ok {
2016-10-20 01:47:07 +02:00
return false, mkerr("unexpected end of file")
2016-07-10 14:35:33 +02:00
2016-10-20 01:31:15 +02:00
if token.kind != INSTRUCTION {
2016-10-20 01:47:07 +02:00
return false, mkerr("expected instruction name after label")
2016-07-10 14:35:33 +02:00
2016-10-20 01:31:15 +02:00
instrHolder := instruction{id: token.instruction}
2016-07-10 14:35:33 +02:00
2016-10-20 00:12:24 +02:00
token, ok := <-a.tokens
eol := false
switch {
case token.kind == WORD:
2016-07-10 14:35:33 +02:00
instrHolder.target = strings.ToUpper(token.value)
2016-10-20 00:12:24 +02:00
case token.kind == NUMBER:
2016-10-20 01:31:15 +02:00
// TODO: we should check the number
2016-07-10 14:35:33 +02:00
instrHolder.number, _ = strconv.Atoi(token.value)
2016-10-20 00:12:24 +02:00
case token.kind == ERROR:
2016-07-10 14:35:33 +02:00
return false, errors.New(token.value)
2016-10-20 00:12:24 +02:00
case !ok:
case token.kind == NEWLINE:
// This is fine, just assume zero
eol = true
2016-07-10 14:35:33 +02:00
a.output = append(a.output, instrHolder)
2016-10-20 00:12:24 +02:00
if !eol {
token, ok := <-a.tokens
switch {
case !ok:
case token.kind == NEWLINE:
case token.kind == ERROR:
2016-10-20 01:47:07 +02:00
return false, mkerr("%s", token.value)
2016-10-20 00:12:24 +02:00
2016-10-20 01:47:07 +02:00
return false, mkerr("expected end of line")
2016-10-20 00:12:24 +02:00
2016-07-10 14:35:33 +02:00
// Ignore empty lines
case NUMBER:
2016-10-20 01:47:07 +02:00
return false, mkerr("unexpected number")
2016-07-10 14:35:33 +02:00
case ERROR:
2016-10-20 01:47:07 +02:00
return false, mkerr("%s", token.value)
2016-07-10 14:35:33 +02:00
return true, nil
func Assemble(r io.Reader) (code []int16, err error) {
2016-10-20 00:12:24 +02:00
a := assembler{tokens: make(chan token), labels: make(map[string]int)}
2016-07-10 14:35:33 +02:00
go tokenize(r, a.tokens)
for {
2016-10-20 01:31:15 +02:00
if cont, err := a.step(); err != nil {
2016-07-10 14:35:33 +02:00
return nil, err
2016-10-20 01:31:15 +02:00
} else if !cont {
2016-07-10 14:35:33 +02:00
2016-10-20 00:12:24 +02:00
code = make([]int16, 100)
for i, x := range a.output {
if i >= len(code) {
2016-10-20 01:47:07 +02:00
return nil, errors.New("program too long")
2016-10-20 00:12:24 +02:00
2016-10-20 01:31:15 +02:00
n := x.id
2016-10-20 00:12:24 +02:00
switch {
2016-10-20 01:31:15 +02:00
case x.id%100 != 0:
// TODO: we could complain that arguments aren't allowed
2016-10-20 00:12:24 +02:00
case len(x.target) != 0:
// Resolve targets to code locations
2016-07-10 14:35:33 +02:00
if resolved, ok := a.labels[x.target]; !ok {
2016-10-20 01:47:07 +02:00
return nil, errors.New("unknown label")
2016-07-10 14:35:33 +02:00
} else {
n += resolved
2016-10-20 00:12:24 +02:00
2016-07-10 14:35:33 +02:00
n += x.number
2016-10-20 00:12:24 +02:00
code[i] = int16(n)
2016-07-10 14:35:33 +02:00
return code, nil