commit 04639942af2275574bb240e00bd9018fbb3eeca1 Author: Přemysl Janouch Date: Sun Jul 10 14:35:33 2016 +0200 Initial commit diff --git a/assembler.go b/assembler.go new file mode 100644 index 0000000..21bebba --- /dev/null +++ b/assembler.go @@ -0,0 +1,314 @@ +package main + +import ( + "errors" + "io" + "fmt" + "bufio" + "strings" + "strconv" +) + +const ( + WORD = iota // [A-Za-z_-]+ + NUMBER // [0-9]+ + NEWLINE // \n + ERROR // Error +) + +type location struct { + line int + column int +} + +type token struct { + location location // Position of the token + value string // Text content of the token + kind int // Kind of the token +} + +type tokenizer struct { + line int // Current line + column int // Current column + + value []byte // Current token string + reader *bufio.Reader // Reader + tokens chan<- token // Token channel +} + +// ----------------------------------------------------------------------------- + +func isSpace(c byte) bool { + return c == ' ' || c == '\r' || c == '\t' +} + +func isNumber(c byte) bool { + return c >= '0' && c <= '9' +} + +func isWordHead(c byte) bool { + if c >= 'a' && c <= 'z' { c -= 32 } + return c >= 'a' && c <= 'z' || c == '_' +} + +func isWordTail(c byte) bool { + return isWordHead(c) || isNumber(c) +} + +// ----------------------------------------------------------------------------- + +func (t *tokenizer) send(kind int) { + // FIXME: track the beginning of the token + t.tokens <- token{location{t.line, t.column}, string(t.value), kind} + t.value = []byte{} +} + +func (t *tokenizer) peek() (byte, error) { + buf, err := t.reader.Peek(1) + return buf[0], err +} + +func (t *tokenizer) eat() (byte, error) { + c, err := t.reader.ReadByte() + if err != nil { return 0, err } + + if c == '\n' { + t.line++ + t.column = 0 + } else { + t.column++ + } + return c, nil +} + +// ----------------------------------------------------------------------------- + +func (t *tokenizer) step() error { + t.value = []byte{} + c, err := t.peek() + + if err == io.EOF { return nil } + if err != nil { return err } + + switch { + case isSpace(c): + t.eat() + case c == '\n': + c, err = t.eat() + t.value = append(t.value, c) + + t.send(NEWLINE) + case isNumber(c): + c, err = t.eat() + t.value = append(t.value, c) + + for { + c, err = t.peek() + if err == io.EOF { break } + if err != nil { return err } + + if !isNumber(c) { break } + + c, _ = t.eat() + t.value = append(t.value, c) + } + t.send(NUMBER) + case isWordHead(c): + c, err = t.eat() + t.value = append(t.value, c) + + for { + c, err = t.peek() + if err == io.EOF { break } + if err != nil { return err } + + if !isWordTail(c) { break } + + c, _ = t.eat() + t.value = append(t.value, c) + } + t.send(WORD) + case c == '/': + c, err = t.eat() + t.value = append(t.value, c) + + c, err = t.peek() + if err != nil { return err } + + if c != '/' { + return errors.New("unrecognized input") + } + for { + c, err = t.peek() + if err == io.EOF { break } + if err != nil { return err } + + if c == '\n' { break } + t.eat() + } + default: + return errors.New("unrecognized input") + } + return nil +} + +func tokenize(r io.Reader, tokens chan<- token) { + t := tokenizer{ + line: 1, + column: 0, + tokens: tokens, + reader: bufio.NewReader(r), + } + for { + err := t.step() + if err == io.EOF { + break + } + if err != nil { + t.tokens <- token{location{t.line, t.column}, + fmt.Sprintf("line %d, column %d: %s", + t.line, t.column, err.Error()), ERROR} + break + } + } + close(tokens) +} + +// ----------------------------------------------------------------------------- + +const ( + IHALT = iota + IADD + ISUBTRACT + ISTORE + ILOAD + IBRANCH + IBRANCH_IF_ZERO + IBRANCH_IF_POSITIVE + IINPUT + IOUTUT + IDATA +) + +var instructions = map[string]int { + "HLT": IHALT, + "COB": IHALT, + "ADD": IADD, + "SUB": ISUBTRACT, + "STA": ISTORE, + "LDA": ILOAD, + "BRA": IBRANCH, + "BRZ": IBRANCH_IF_ZERO, + "BRP": IBRANCH_IF_POSITIVE, + "INP": IINPUT, + "OUT": IOUTUT, + "DAT": IDATA, +} + +type instruction struct { + id int + target string + number int +} + +// ----------------------------------------------------------------------------- + +type assembler struct { + tokens chan token + output []instruction + labels map[string]int +} + +func (a *assembler) step() (bool, error) { + token, ok := <-a.tokens + if !ok { return false, nil } + + // TODO: add token location information to returned errors + + switch token.kind { + case WORD: + canonical := strings.ToUpper(token.value) + instr, found := instructions[canonical] + + // Not found in the instruction list + // Assume it is a label + if !found { + if _, dup := a.labels[canonical]; dup { + return false, fmt.Errorf("Duplicate label: %s", canonical) + } + a.labels[canonical] = len(a.output) + + token, ok = <-a.tokens + if !ok { + return false, errors.New("Unexpected end of file") + } + if token.kind != WORD { + return false, errors.New("Expected word") + } + + // XXX: it might be better to classify this in the lexer + canonical = strings.ToUpper(token.value) + instr, found = instructions[canonical] + } + + if !found { + return false, fmt.Errorf("Unknown instruction: %s", canonical) + } + + instrHolder := instruction{id: instr} + token, ok := <-a.tokens + if !ok { + // This is fine, just assume zero + break + } + + switch token.kind { + case WORD: + instrHolder.target = strings.ToUpper(token.value) + case NEWLINE: + // This is fine, just assume zero + case NUMBER: + instrHolder.number, _ = strconv.Atoi(token.value) + case ERROR: + return false, errors.New(token.value) + } + a.output = append(a.output, instrHolder) + case NEWLINE: + // Ignore empty lines + case NUMBER: + return false, errors.New("Unexpected number") + case ERROR: + return false, errors.New(token.value) + } + return true, nil +} + +func Assemble(r io.Reader) (code []int16, err error) { + a := assembler{tokens: make(chan token)} + go tokenize(r, a.tokens) + + for { + cont, err := a.step() + if err != nil { + return nil, err + } + if !cont { + break + } + } + + for _, x := range(a.output) { + n := x.id * 100 + if len(x.target) != 0 { + if resolved, ok := a.labels[x.target]; !ok { + return nil, errors.New("Unknown label") + } else { + n += resolved + } + } else { + n += x.number + } + code = append(code, int16(n)) + } + return code, nil +} diff --git a/machine.go b/machine.go new file mode 100644 index 0000000..a4b188d --- /dev/null +++ b/machine.go @@ -0,0 +1,11 @@ +package main + +func Run(code []int16) { + // TODO: assert that the code is 100 boxes long + + pc := 0 + for pc < len(code) { + } + + // TODO: throw an exception +} diff --git a/main.go b/main.go new file mode 100644 index 0000000..c277d00 --- /dev/null +++ b/main.go @@ -0,0 +1,15 @@ +package main + +import ( + "os" + "fmt" +) + +func main() { + code, err := Assemble(os.Stdin) + if err != nil { + fmt.Printf("Assembly failed: %s", err) + os.Exit(1) + } + Run(code) +}