package main import ( "bufio" "errors" "fmt" "io" "strconv" "strings" ) const ( WORD = iota // [A-Za-z_-]+ INSTRUCTION // Instruction word NUMBER // [0-9]+ NEWLINE // \n ERROR // Error ) type location struct { line int column int } type token struct { location location // Position of the token value string // Text content of the token instruction int // INSTRUCTION ID kind int // Kind of the token } type tokenizer struct { location location // Current position value []byte // Current token string reader *bufio.Reader // Reader tokens chan<- token // Output token channel } // ----------------------------------------------------------------------------- func isSpace(c byte) bool { return c == ' ' || c == '\r' || c == '\t' } func isNumber(c byte) bool { return c >= '0' && c <= '9' } func isWordHead(c byte) bool { if c >= 'a' && c <= 'z' { c -= 32 } return c >= 'A' && c <= 'Z' || c == '_' } func isWordTail(c byte) bool { return isWordHead(c) || isNumber(c) } // ----------------------------------------------------------------------------- const ( IHALT = iota * 100 IADD ISUBTRACT ISTORE _ ILOAD IBRANCH IBRANCH_IF_ZERO IBRANCH_IF_POSITIVE IIO ) const ( _ = iota IO_INPUT IO_OUTPUT ) var instructions = map[string]int{ "HLT": IHALT, "COB": IHALT, "ADD": IADD, "SUB": ISUBTRACT, "STA": ISTORE, "LDA": ILOAD, "BRA": IBRANCH, "BRZ": IBRANCH_IF_ZERO, "BRP": IBRANCH_IF_POSITIVE, "INP": IIO + IO_INPUT, "OUT": IIO + IO_OUTPUT, "DAT": 0, } // ----------------------------------------------------------------------------- func (t *tokenizer) send(start location, kind int) { tok := token{start, strings.ToUpper(string(t.value)), 0, kind} if kind == WORD { if instr, found := instructions[tok.value]; found { tok.kind = INSTRUCTION tok.instruction = instr } } t.tokens <- tok t.value = []byte{} } // XXX: the handling could probably be simplified by extending the "byte" // to also include a special out-of-band value for errors func (t *tokenizer) peek() (byte, error) { if buf, err := t.reader.Peek(1); err != nil { return '?', err } else { return buf[0], nil } } func (t *tokenizer) eat() (byte, error) { c, err := t.reader.ReadByte() if err != nil { return 0, err } if c == '\n' { t.location.line++ t.location.column = 1 } else { t.location.column++ } return c, nil } // ----------------------------------------------------------------------------- func (t *tokenizer) step() error { start := t.location t.value = []byte{} c, err := t.peek() if err != nil { return err } switch { case isSpace(c): c, err = t.eat() case c == '\n': c, err = t.eat() t.value = append(t.value, c) t.send(start, NEWLINE) case isNumber(c): for isNumber(c) { c, err = t.eat() t.value = append(t.value, c) c, err = t.peek() if err == io.EOF { break } if err != nil { return err } } t.send(start, NUMBER) case isWordHead(c): for isWordTail(c) { c, err = t.eat() t.value = append(t.value, c) c, err = t.peek() if err == io.EOF { break } if err != nil { return err } } t.send(start, WORD) case c == '/': c, err = t.eat() c, err = t.peek() if err == io.EOF { return errors.New("unexpected EOF") } if err != nil { return err } if c != '/' { return errors.New(fmt.Sprintf("unrecognized input: '%c'", c)) } for c != '\n' { c, err = t.eat() c, err = t.peek() if err == io.EOF { break } if err != nil { return err } } default: return errors.New(fmt.Sprintf("unrecognized input: '%c'", c)) } return nil } func tokenize(r io.Reader, tokens chan<- token) { t := tokenizer{ location: location{line: 1, column: 1}, tokens: tokens, reader: bufio.NewReader(r), } for { if err := t.step(); err == io.EOF { break } else if err != nil { t.tokens <- token{t.location, err.Error(), 0, ERROR} break } } close(tokens) } // ----------------------------------------------------------------------------- type instruction struct { id int // What instruction this is target string // Label name number int // Immediate value } type assembler struct { tokens chan token // Where tokens come from output []instruction // The assembled program labels map[string]int // Addresses of labels } func (a *assembler) step() (bool, error) { token, ok := <-a.tokens if !ok { return false, nil } mkerr := func(format string, a ...interface{}) error { prefix := fmt.Sprintf("line %d, column %d: ", token.location.line, token.location.column) return errors.New(prefix + fmt.Sprintf(format, a...)) } switch token.kind { case WORD: if _, dup := a.labels[token.value]; dup { return false, mkerr("duplicate label: %s", token.value) } a.labels[token.value] = len(a.output) if token, ok = <-a.tokens; !ok { return false, mkerr("unexpected end of file") } if token.kind != INSTRUCTION { return false, mkerr("expected instruction name after label") } fallthrough case INSTRUCTION: instrHolder := instruction{id: token.instruction} token, ok := <-a.tokens eol := false switch { case token.kind == WORD: instrHolder.target = strings.ToUpper(token.value) case token.kind == NUMBER: // TODO: we should check the number instrHolder.number, _ = strconv.Atoi(token.value) case token.kind == ERROR: return false, errors.New(token.value) case !ok: fallthrough case token.kind == NEWLINE: // This is fine, just assume zero eol = true } a.output = append(a.output, instrHolder) if !eol { token, ok := <-a.tokens switch { case !ok: case token.kind == NEWLINE: case token.kind == ERROR: return false, mkerr("%s", token.value) default: return false, mkerr("expected end of line") } } case NEWLINE: // Ignore empty lines case NUMBER: return false, mkerr("unexpected number") case ERROR: return false, mkerr("%s", token.value) } return true, nil } func Assemble(r io.Reader) (code []int16, err error) { a := assembler{tokens: make(chan token), labels: make(map[string]int)} go tokenize(r, a.tokens) for { if cont, err := a.step(); err != nil { return nil, err } else if !cont { break } } code = make([]int16, 100) for i, x := range a.output { if i >= len(code) { return nil, errors.New("program too long") } n := x.id switch { case x.id%100 != 0: // TODO: we could complain that arguments aren't allowed case len(x.target) != 0: // Resolve targets to code locations if resolved, ok := a.labels[x.target]; !ok { return nil, errors.New("unknown label") } else { n += resolved } default: n += x.number } code[i] = int16(n) } return code, nil }