package main import ( "errors" "io" "fmt" "bufio" "strings" "strconv" ) const ( WORD = iota // [A-Za-z_-]+ NUMBER // [0-9]+ NEWLINE // \n ERROR // Error ) type location struct { line int column int } type token struct { location location // Position of the token value string // Text content of the token kind int // Kind of the token } type tokenizer struct { line int // Current line column int // Current column value []byte // Current token string reader *bufio.Reader // Reader tokens chan<- token // Token channel } // ----------------------------------------------------------------------------- func isSpace(c byte) bool { return c == ' ' || c == '\r' || c == '\t' } func isNumber(c byte) bool { return c >= '0' && c <= '9' } func isWordHead(c byte) bool { if c >= 'a' && c <= 'z' { c -= 32 } return c >= 'a' && c <= 'z' || c == '_' } func isWordTail(c byte) bool { return isWordHead(c) || isNumber(c) } // ----------------------------------------------------------------------------- func (t *tokenizer) send(kind int) { // FIXME: track the beginning of the token t.tokens <- token{location{t.line, t.column}, string(t.value), kind} t.value = []byte{} } func (t *tokenizer) peek() (byte, error) { buf, err := t.reader.Peek(1) return buf[0], err } func (t *tokenizer) eat() (byte, error) { c, err := t.reader.ReadByte() if err != nil { return 0, err } if c == '\n' { t.line++ t.column = 0 } else { t.column++ } return c, nil } // ----------------------------------------------------------------------------- func (t *tokenizer) step() error { t.value = []byte{} c, err := t.peek() if err == io.EOF { return nil } if err != nil { return err } switch { case isSpace(c): t.eat() case c == '\n': c, err = t.eat() t.value = append(t.value, c) t.send(NEWLINE) case isNumber(c): c, err = t.eat() t.value = append(t.value, c) for { c, err = t.peek() if err == io.EOF { break } if err != nil { return err } if !isNumber(c) { break } c, _ = t.eat() t.value = append(t.value, c) } t.send(NUMBER) case isWordHead(c): c, err = t.eat() t.value = append(t.value, c) for { c, err = t.peek() if err == io.EOF { break } if err != nil { return err } if !isWordTail(c) { break } c, _ = t.eat() t.value = append(t.value, c) } t.send(WORD) case c == '/': c, err = t.eat() t.value = append(t.value, c) c, err = t.peek() if err != nil { return err } if c != '/' { return errors.New("unrecognized input") } for { c, err = t.peek() if err == io.EOF { break } if err != nil { return err } if c == '\n' { break } t.eat() } default: return errors.New("unrecognized input") } return nil } func tokenize(r io.Reader, tokens chan<- token) { t := tokenizer{ line: 1, column: 0, tokens: tokens, reader: bufio.NewReader(r), } for { err := t.step() if err == io.EOF { break } if err != nil { t.tokens <- token{location{t.line, t.column}, fmt.Sprintf("line %d, column %d: %s", t.line, t.column, err.Error()), ERROR} break } } close(tokens) } // ----------------------------------------------------------------------------- const ( IHALT = iota IADD ISUBTRACT ISTORE ILOAD IBRANCH IBRANCH_IF_ZERO IBRANCH_IF_POSITIVE IINPUT IOUTUT IDATA ) var instructions = map[string]int { "HLT": IHALT, "COB": IHALT, "ADD": IADD, "SUB": ISUBTRACT, "STA": ISTORE, "LDA": ILOAD, "BRA": IBRANCH, "BRZ": IBRANCH_IF_ZERO, "BRP": IBRANCH_IF_POSITIVE, "INP": IINPUT, "OUT": IOUTUT, "DAT": IDATA, } type instruction struct { id int target string number int } // ----------------------------------------------------------------------------- type assembler struct { tokens chan token output []instruction labels map[string]int } func (a *assembler) step() (bool, error) { token, ok := <-a.tokens if !ok { return false, nil } // TODO: add token location information to returned errors switch token.kind { case WORD: canonical := strings.ToUpper(token.value) instr, found := instructions[canonical] // Not found in the instruction list // Assume it is a label if !found { if _, dup := a.labels[canonical]; dup { return false, fmt.Errorf("Duplicate label: %s", canonical) } a.labels[canonical] = len(a.output) token, ok = <-a.tokens if !ok { return false, errors.New("Unexpected end of file") } if token.kind != WORD { return false, errors.New("Expected word") } // XXX: it might be better to classify this in the lexer canonical = strings.ToUpper(token.value) instr, found = instructions[canonical] } if !found { return false, fmt.Errorf("Unknown instruction: %s", canonical) } instrHolder := instruction{id: instr} token, ok := <-a.tokens if !ok { // This is fine, just assume zero break } switch token.kind { case WORD: instrHolder.target = strings.ToUpper(token.value) case NEWLINE: // This is fine, just assume zero case NUMBER: instrHolder.number, _ = strconv.Atoi(token.value) case ERROR: return false, errors.New(token.value) } a.output = append(a.output, instrHolder) case NEWLINE: // Ignore empty lines case NUMBER: return false, errors.New("Unexpected number") case ERROR: return false, errors.New(token.value) } return true, nil } func Assemble(r io.Reader) (code []int16, err error) { a := assembler{tokens: make(chan token)} go tokenize(r, a.tokens) for { cont, err := a.step() if err != nil { return nil, err } if !cont { break } } for _, x := range(a.output) { n := x.id * 100 if len(x.target) != 0 { if resolved, ok := a.labels[x.target]; !ok { return nil, errors.New("Unknown label") } else { n += resolved } } else { n += x.number } code = append(code, int16(n)) } return code, nil }