package main

import (
	"errors"
	"io"
	"fmt"
	"bufio"
	"strings"
	"strconv"
)

const (
	WORD = iota          // [A-Za-z_-]+
	NUMBER               // [0-9]+
	NEWLINE              // \n
	ERROR                // Error
)

type location struct {
	line int
	column int
}

type token struct {
	location location    // Position of the token
	value string         // Text content of the token
	kind int             // Kind of the token
}

type tokenizer struct {
	line int             // Current line
	column int           // Current column

	value []byte         // Current token string
	reader *bufio.Reader // Reader
	tokens chan<- token  // Token channel
}

// -----------------------------------------------------------------------------

func isSpace(c byte) bool {
	return c == ' ' || c == '\r' || c == '\t'
}

func isNumber(c byte) bool {
	return c >= '0' && c <= '9'
}

func isWordHead(c byte) bool {
	if c >= 'a' && c <= 'z' { c -= 32 }
	return c >= 'a' && c <= 'z' || c == '_'
}

func isWordTail(c byte) bool {
	return isWordHead(c) || isNumber(c)
}

// -----------------------------------------------------------------------------

func (t *tokenizer) send(kind int) {
	// FIXME: track the beginning of the token
	t.tokens <- token{location{t.line, t.column}, string(t.value), kind}
	t.value = []byte{}
}

func (t *tokenizer) peek() (byte, error) {
	buf, err := t.reader.Peek(1)
	return buf[0], err
}

func (t *tokenizer) eat() (byte, error) {
	c, err := t.reader.ReadByte()
	if err != nil { return 0, err }

	if c == '\n' {
		t.line++
		t.column = 0
	} else {
		t.column++
	}
	return c, nil
}

// -----------------------------------------------------------------------------

func (t *tokenizer) step() error {
	t.value = []byte{}
	c, err := t.peek()

	if err == io.EOF { return nil }
	if err != nil { return err }

	switch {
	case isSpace(c):
		t.eat()
	case c == '\n':
		c, err = t.eat()
		t.value = append(t.value, c)

		t.send(NEWLINE)
	case isNumber(c):
		c, err = t.eat()
		t.value = append(t.value, c)

		for {
			c, err = t.peek()
			if err == io.EOF { break }
			if err != nil { return err }

			if !isNumber(c) { break }

			c, _ = t.eat()
			t.value = append(t.value, c)
		}
		t.send(NUMBER)
	case isWordHead(c):
		c, err = t.eat()
		t.value = append(t.value, c)

		for {
			c, err = t.peek()
			if err == io.EOF { break }
			if err != nil { return err }

			if !isWordTail(c) { break }

			c, _ = t.eat()
			t.value = append(t.value, c)
		}
		t.send(WORD)
	case c == '/':
		c, err = t.eat()
		t.value = append(t.value, c)

		c, err = t.peek()
		if err != nil { return err }

		if c != '/' {
			return errors.New("unrecognized input")
		}
		for {
			c, err = t.peek()
			if err == io.EOF { break }
			if err != nil { return err }

			if c == '\n' { break }
			t.eat()
		}
	default:
		return errors.New("unrecognized input")
	}
	return nil
}

func tokenize(r io.Reader, tokens chan<- token) {
	t := tokenizer{
		line: 1,
		column: 0,
		tokens: tokens,
		reader: bufio.NewReader(r),
	}
	for {
		err := t.step()
		if err == io.EOF {
			break
		}
		if err != nil {
			t.tokens <- token{location{t.line, t.column},
				fmt.Sprintf("line %d, column %d: %s",
					t.line, t.column, err.Error()), ERROR}
			break
		}
	}
	close(tokens)
}

// -----------------------------------------------------------------------------

const (
	IHALT = iota
	IADD
	ISUBTRACT
	ISTORE
	ILOAD
	IBRANCH
	IBRANCH_IF_ZERO
	IBRANCH_IF_POSITIVE
	IINPUT
	IOUTUT
	IDATA
)

var instructions = map[string]int {
	"HLT": IHALT,
	"COB": IHALT,
	"ADD": IADD,
	"SUB": ISUBTRACT,
	"STA": ISTORE,
	"LDA": ILOAD,
	"BRA": IBRANCH,
	"BRZ": IBRANCH_IF_ZERO,
	"BRP": IBRANCH_IF_POSITIVE,
	"INP": IINPUT,
	"OUT": IOUTUT,
	"DAT": IDATA,
}

type instruction struct {
	id int
	target string
	number int
}

// -----------------------------------------------------------------------------

type assembler struct {
	tokens chan token
	output []instruction
	labels map[string]int
}

func (a *assembler) step() (bool, error) {
	token, ok := <-a.tokens
	if !ok { return false, nil }

	// TODO: add token location information to returned errors

	switch token.kind {
	case WORD:
		canonical := strings.ToUpper(token.value)
		instr, found := instructions[canonical]

		// Not found in the instruction list
		// Assume it is a label
		if !found {
			if _, dup := a.labels[canonical]; dup {
				return false, fmt.Errorf("Duplicate label: %s", canonical)
			}
			a.labels[canonical] = len(a.output)

			token, ok = <-a.tokens
			if !ok {
				return false, errors.New("Unexpected end of file")
			}
			if token.kind != WORD {
				return false, errors.New("Expected word")
			}

			// XXX: it might be better to classify this in the lexer
			canonical = strings.ToUpper(token.value)
			instr, found = instructions[canonical]
		}

		if !found {
			return false, fmt.Errorf("Unknown instruction: %s", canonical)
		}

		instrHolder := instruction{id: instr}
		token, ok := <-a.tokens
		if !ok {
			// This is fine, just assume zero
			break
		}

		switch token.kind {
		case WORD:
			instrHolder.target = strings.ToUpper(token.value)
		case NEWLINE:
			// This is fine, just assume zero
		case NUMBER:
			instrHolder.number, _ = strconv.Atoi(token.value)
		case ERROR:
			return false, errors.New(token.value)
		}
		a.output = append(a.output, instrHolder)
	case NEWLINE:
		// Ignore empty lines
	case NUMBER:
		return false, errors.New("Unexpected number")
	case ERROR:
		return false, errors.New(token.value)
	}
	return true, nil
}

func Assemble(r io.Reader) (code []int16, err error) {
	a := assembler{tokens: make(chan token)}
	go tokenize(r, a.tokens)

	for {
		cont, err := a.step()
		if err != nil {
			return nil, err
		}
		if !cont {
			break
		}
	}

	for _, x := range(a.output) {
		n := x.id * 100
		if len(x.target) != 0 {
			if resolved, ok := a.labels[x.target]; !ok {
				return nil, errors.New("Unknown label")
			} else {
				n += resolved
			}
		} else {
			n += x.number
		}
		code = append(code, int16(n))
	}
	return code, nil
}