package main import ( "bufio" "errors" "fmt" "io" "strconv" "strings" ) const ( WORD = iota // [A-Za-z_-]+ NUMBER // [0-9]+ NEWLINE // \n ERROR // Error ) type location struct { line int column int } type token struct { location location // Position of the token value string // Text content of the token kind int // Kind of the token } type tokenizer struct { location location // Current position value []byte // Current token string reader *bufio.Reader // Reader tokens chan<- token // Output token channel } // ----------------------------------------------------------------------------- func isSpace(c byte) bool { return c == ' ' || c == '\r' || c == '\t' } func isNumber(c byte) bool { return c >= '0' && c <= '9' } func isWordHead(c byte) bool { if c >= 'a' && c <= 'z' { c -= 32 } return c >= 'A' && c <= 'Z' || c == '_' } func isWordTail(c byte) bool { return isWordHead(c) || isNumber(c) } // ----------------------------------------------------------------------------- func (t *tokenizer) send(start location, kind int) { t.tokens <- token{start, string(t.value), kind} t.value = []byte{} } // XXX: the handling could probably be simplified by extending the "byte" // to also include a special out-of-band value for errors func (t *tokenizer) peek() (byte, error) { buf, err := t.reader.Peek(1) if err != nil { return '?', err } return buf[0], err } func (t *tokenizer) eat() (byte, error) { c, err := t.reader.ReadByte() if err != nil { return 0, err } if c == '\n' { t.location.line++ t.location.column = 0 } else { t.location.column++ } return c, nil } // ----------------------------------------------------------------------------- func (t *tokenizer) step() error { start := t.location t.value = []byte{} c, err := t.peek() if err != nil { return err } switch { case isSpace(c): c, err = t.eat() case c == '\n': c, err = t.eat() t.value = append(t.value, c) t.send(start, NEWLINE) case isNumber(c): for isNumber(c) { c, err = t.eat() t.value = append(t.value, c) c, err = t.peek() if err == io.EOF { break } if err != nil { return err } } t.send(start, NUMBER) case isWordHead(c): for isWordTail(c) { c, err = t.eat() t.value = append(t.value, c) c, err = t.peek() if err == io.EOF { break } if err != nil { return err } } t.send(start, WORD) case c == '/': c, err = t.eat() c, err = t.peek() if err == io.EOF { return errors.New("unexpected EOF") } if err != nil { return err } if c != '/' { return errors.New(fmt.Sprintf("unrecognized input: '%c'", c)) } for c != '\n' { c, err = t.eat() c, err = t.peek() if err == io.EOF { break } if err != nil { return err } } default: return errors.New(fmt.Sprintf("unrecognized input: '%c'", c)) } return nil } func tokenize(r io.Reader, tokens chan<- token) { t := tokenizer{ location: location{line: 1, column: 0}, tokens: tokens, reader: bufio.NewReader(r), } for { err := t.step() if err == io.EOF { break } if err != nil { t.tokens <- token{t.location, fmt.Sprintf("line %d, column %d: %s", t.location.line, t.location.column, err.Error()), ERROR} break } } close(tokens) } // ----------------------------------------------------------------------------- const ( IHALT = iota IADD ISUBTRACT ISTORE ILOAD _ IBRANCH IBRANCH_IF_ZERO IBRANCH_IF_POSITIVE IINPUT IOUTPUT IDATA ) var instructions = map[string]int{ "HLT": IHALT, "COB": IHALT, "ADD": IADD, "SUB": ISUBTRACT, "STA": ISTORE, "LDA": ILOAD, "BRA": IBRANCH, "BRZ": IBRANCH_IF_ZERO, "BRP": IBRANCH_IF_POSITIVE, "INP": IINPUT, "OUT": IOUTPUT, "DAT": IDATA, } type instruction struct { id int // What instruction this is target string // Label name number int // Immediate value } // ----------------------------------------------------------------------------- type assembler struct { tokens chan token // Where tokens come from output []instruction // The assembled program labels map[string]int // Addresses of labels } func (a *assembler) step() (bool, error) { token, ok := <-a.tokens if !ok { return false, nil } // TODO: add token location information to returned errors switch token.kind { case WORD: canonical := strings.ToUpper(token.value) instr, found := instructions[canonical] // Not found in the instruction list // Assume it is a label if !found { if _, dup := a.labels[canonical]; dup { return false, fmt.Errorf("Duplicate label: %s", canonical) } a.labels[canonical] = len(a.output) token, ok = <-a.tokens if !ok { return false, errors.New("Unexpected end of file") } if token.kind != WORD { return false, errors.New("Expected word") } // XXX: it might be better to classify this in the lexer canonical = strings.ToUpper(token.value) instr, found = instructions[canonical] } if !found { return false, fmt.Errorf("Unknown instruction: %s", canonical) } instrHolder := instruction{id: instr} token, ok := <-a.tokens eol := false switch { case token.kind == WORD: instrHolder.target = strings.ToUpper(token.value) case token.kind == NUMBER: instrHolder.number, _ = strconv.Atoi(token.value) case token.kind == ERROR: return false, errors.New(token.value) case !ok: fallthrough case token.kind == NEWLINE: // This is fine, just assume zero eol = true } a.output = append(a.output, instrHolder) if !eol { token, ok := <-a.tokens switch { case !ok: break case token.kind == NEWLINE: break case token.kind == ERROR: return false, errors.New(token.value) default: return false, errors.New("Expected end of line") } } case NEWLINE: // Ignore empty lines case NUMBER: return false, errors.New("Unexpected number") case ERROR: return false, errors.New(token.value) } return true, nil } func Assemble(r io.Reader) (code []int16, err error) { a := assembler{tokens: make(chan token), labels: make(map[string]int)} go tokenize(r, a.tokens) for { cont, err := a.step() if err != nil { return nil, err } if !cont { break } } code = make([]int16, 100) for i, x := range a.output { if i >= len(code) { return nil, errors.New("Program too long") } n := x.id * 100 // XXX: this also stinks if x.id == IDATA { n = 0 } // XXX: we should be able to handle the strange INP and OUT better switch { case x.id == IINPUT: n = 901 case x.id == IOUTPUT: n = 902 case len(x.target) != 0: // Resolve targets to code locations if resolved, ok := a.labels[x.target]; !ok { return nil, errors.New("Unknown label") } else { n += resolved } default: n += x.number } code[i] = int16(n) } return code, nil }