package main

import (
	"bufio"
	"errors"
	"fmt"
	"io"
	"strconv"
	"strings"
)

const (
	WORD    = iota // [A-Za-z_-]+
	NUMBER         // [0-9]+
	NEWLINE        // \n
	ERROR          // Error
)

type location struct {
	line   int
	column int
}

type token struct {
	location location // Position of the token
	value    string   // Text content of the token
	kind     int      // Kind of the token
}

type tokenizer struct {
	location location      // Current position
	value    []byte        // Current token string
	reader   *bufio.Reader // Reader
	tokens   chan<- token  // Output token channel
}

// -----------------------------------------------------------------------------

func isSpace(c byte) bool {
	return c == ' ' || c == '\r' || c == '\t'
}

func isNumber(c byte) bool {
	return c >= '0' && c <= '9'
}

func isWordHead(c byte) bool {
	if c >= 'a' && c <= 'z' {
		c -= 32
	}
	return c >= 'A' && c <= 'Z' || c == '_'
}

func isWordTail(c byte) bool {
	return isWordHead(c) || isNumber(c)
}

// -----------------------------------------------------------------------------

func (t *tokenizer) send(start location, kind int) {
	t.tokens <- token{start, string(t.value), kind}
	t.value = []byte{}
}

// XXX: the handling could probably be simplified by extending the "byte"
//   to also include a special out-of-band value for errors
func (t *tokenizer) peek() (byte, error) {
	buf, err := t.reader.Peek(1)
	if err != nil {
		return '?', err
	}
	return buf[0], err
}

func (t *tokenizer) eat() (byte, error) {
	c, err := t.reader.ReadByte()
	if err != nil {
		return 0, err
	}

	if c == '\n' {
		t.location.line++
		t.location.column = 0
	} else {
		t.location.column++
	}
	return c, nil
}

// -----------------------------------------------------------------------------

func (t *tokenizer) step() error {
	start := t.location
	t.value = []byte{}

	c, err := t.peek()
	if err != nil {
		return err
	}

	switch {
	case isSpace(c):
		c, err = t.eat()
	case c == '\n':
		c, err = t.eat()
		t.value = append(t.value, c)

		t.send(start, NEWLINE)
	case isNumber(c):
		for isNumber(c) {
			c, err = t.eat()
			t.value = append(t.value, c)

			c, err = t.peek()
			if err == io.EOF {
				break
			}
			if err != nil {
				return err
			}
		}
		t.send(start, NUMBER)
	case isWordHead(c):
		for isWordTail(c) {
			c, err = t.eat()
			t.value = append(t.value, c)

			c, err = t.peek()
			if err == io.EOF {
				break
			}
			if err != nil {
				return err
			}
		}
		t.send(start, WORD)
	case c == '/':
		c, err = t.eat()
		c, err = t.peek()
		if err == io.EOF {
			return errors.New("unexpected EOF")
		}
		if err != nil {
			return err
		}

		if c != '/' {
			return errors.New(fmt.Sprintf("unrecognized input: '%c'", c))
		}
		for c != '\n' {
			c, err = t.eat()
			c, err = t.peek()
			if err == io.EOF {
				break
			}
			if err != nil {
				return err
			}
		}
	default:
		return errors.New(fmt.Sprintf("unrecognized input: '%c'", c))
	}
	return nil
}

func tokenize(r io.Reader, tokens chan<- token) {
	t := tokenizer{
		location: location{line: 1, column: 0},
		tokens:   tokens,
		reader:   bufio.NewReader(r),
	}
	for {
		err := t.step()
		if err == io.EOF {
			break
		}
		if err != nil {
			t.tokens <- token{t.location, fmt.Sprintf("line %d, column %d: %s",
				t.location.line, t.location.column, err.Error()), ERROR}
			break
		}
	}
	close(tokens)
}

// -----------------------------------------------------------------------------

const (
	IHALT = iota
	IADD
	ISUBTRACT
	ISTORE
	ILOAD
	_
	IBRANCH
	IBRANCH_IF_ZERO
	IBRANCH_IF_POSITIVE
	IINPUT
	IOUTPUT
	IDATA
)

var instructions = map[string]int{
	"HLT": IHALT,
	"COB": IHALT,
	"ADD": IADD,
	"SUB": ISUBTRACT,
	"STA": ISTORE,
	"LDA": ILOAD,
	"BRA": IBRANCH,
	"BRZ": IBRANCH_IF_ZERO,
	"BRP": IBRANCH_IF_POSITIVE,
	"INP": IINPUT,
	"OUT": IOUTPUT,
	"DAT": IDATA,
}

type instruction struct {
	id     int    // What instruction this is
	target string // Label name
	number int    // Immediate value
}

// -----------------------------------------------------------------------------

type assembler struct {
	tokens chan token     // Where tokens come from
	output []instruction  // The assembled program
	labels map[string]int // Addresses of labels
}

func (a *assembler) step() (bool, error) {
	token, ok := <-a.tokens
	if !ok {
		return false, nil
	}

	// TODO: add token location information to returned errors

	switch token.kind {
	case WORD:
		canonical := strings.ToUpper(token.value)
		instr, found := instructions[canonical]

		// Not found in the instruction list
		// Assume it is a label
		if !found {
			if _, dup := a.labels[canonical]; dup {
				return false, fmt.Errorf("Duplicate label: %s", canonical)
			}
			a.labels[canonical] = len(a.output)

			token, ok = <-a.tokens
			if !ok {
				return false, errors.New("Unexpected end of file")
			}
			if token.kind != WORD {
				return false, errors.New("Expected word")
			}

			// XXX: it might be better to classify this in the lexer
			canonical = strings.ToUpper(token.value)
			instr, found = instructions[canonical]
		}
		if !found {
			return false, fmt.Errorf("Unknown instruction: %s", canonical)
		}

		instrHolder := instruction{id: instr}

		token, ok := <-a.tokens
		eol := false
		switch {
		case token.kind == WORD:
			instrHolder.target = strings.ToUpper(token.value)
		case token.kind == NUMBER:
			instrHolder.number, _ = strconv.Atoi(token.value)
		case token.kind == ERROR:
			return false, errors.New(token.value)
		case !ok:
			fallthrough
		case token.kind == NEWLINE:
			// This is fine, just assume zero
			eol = true
		}
		a.output = append(a.output, instrHolder)

		if !eol {
			token, ok := <-a.tokens
			switch {
			case !ok:
				break
			case token.kind == NEWLINE:
				break
			case token.kind == ERROR:
				return false, errors.New(token.value)
			default:
				return false, errors.New("Expected end of line")
			}
		}
	case NEWLINE:
		// Ignore empty lines
	case NUMBER:
		return false, errors.New("Unexpected number")
	case ERROR:
		return false, errors.New(token.value)
	}
	return true, nil
}

func Assemble(r io.Reader) (code []int16, err error) {
	a := assembler{tokens: make(chan token), labels: make(map[string]int)}
	go tokenize(r, a.tokens)

	for {
		cont, err := a.step()
		if err != nil {
			return nil, err
		}
		if !cont {
			break
		}
	}

	code = make([]int16, 100)
	for i, x := range a.output {
		if i >= len(code) {
			return nil, errors.New("Program too long")
		}
		n := x.id * 100
		// XXX: this also stinks
		if x.id == IDATA {
			n = 0
		}
		// XXX: we should be able to handle the strange INP and OUT better
		switch {
		case x.id == IINPUT:
			n = 901
		case x.id == IOUTPUT:
			n = 902
		case len(x.target) != 0:
			// Resolve targets to code locations
			if resolved, ok := a.labels[x.target]; !ok {
				return nil, errors.New("Unknown label")
			} else {
				n += resolved
			}
		default:
			n += x.number
		}
		code[i] = int16(n)
	}
	return code, nil
}