From 128fb157b302f0f0881844fc7a2fa5653c74a197 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C5=99emysl=20Janouch?= Date: Thu, 22 Dec 2016 22:58:20 +0100 Subject: [PATCH] Initial commit --- LICENSE | 15 + Makefile | 12 + README.adoc | 49 +++ bfc-amd64-linux.c | 723 +++++++++++++++++++++++++++++++ interpreters/Makefile | 14 + interpreters/README.adoc | 15 + interpreters/bf-faster-loops.c | 151 +++++++ interpreters/bf-jit-opt.c | 495 +++++++++++++++++++++ interpreters/bf-jit-unsafe-opt.c | 617 ++++++++++++++++++++++++++ interpreters/bf-jit-unsafe.c | 495 +++++++++++++++++++++ interpreters/bf-jit.c | 327 ++++++++++++++ interpreters/bf-optimizing.c | 213 +++++++++ interpreters/bf.c | 160 +++++++ 13 files changed, 3286 insertions(+) create mode 100644 LICENSE create mode 100644 Makefile create mode 100644 README.adoc create mode 100644 bfc-amd64-linux.c create mode 100644 interpreters/Makefile create mode 100644 interpreters/README.adoc create mode 100644 interpreters/bf-faster-loops.c create mode 100644 interpreters/bf-jit-opt.c create mode 100644 interpreters/bf-jit-unsafe-opt.c create mode 100644 interpreters/bf-jit-unsafe.c create mode 100644 interpreters/bf-jit.c create mode 100644 interpreters/bf-optimizing.c create mode 100644 interpreters/bf.c diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..ce263ae --- /dev/null +++ b/LICENSE @@ -0,0 +1,15 @@ + Copyright (c) 2016, Přemysl Janouch + All rights reserved. + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..5bb5db1 --- /dev/null +++ b/Makefile @@ -0,0 +1,12 @@ +# All we need is C99 and POSIX, which this should make available +CFLAGS = -std=gnu99 +NAMES = bfc-amd64-linux + +all: $(NAMES) + +%: %.c + $(CC) $(CPPFLAGS) $(CFLAGS) $< -o $@ +clean: + rm -f $(NAMES) + +.PHONY: all clean diff --git a/README.adoc b/README.adoc new file mode 100644 index 0000000..660c38c --- /dev/null +++ b/README.adoc @@ -0,0 +1,49 @@ +bfc +=== + +'bfc' is a small, fast, self-contained, optimizing Brainfuck compiler for Linux +on Intel x86-64. + +Also included are several interpreters in various states of sophistication that +document my progress as I was writing this, from the simplest approach to an +optimizing JIT compiler. + +It's pretty easy to retarget the compiler, it just means redoing half the work. +The compiler itself is platform agnostic. + +Building +-------- +Build dependencies: a C99 compiler + +Runtime dependencies: Linux + + $ git clone https://github.com/pjanouch/bfc.git + $ cd bfc + $ make + +To obtain dumps of the intermediate representation, compile with `-DDEBUG`: + + $ make CPPFLAGS=-DDEBUG + +Usage +----- + + ./bfc-amd64-linux [INPUT-FILE] [OUTPUT-FILE] + +When no input file is specified, stdin is used. Similarly, the default output +filename is a.out. The resulting file can be run on the target platform. + +Contributing and Support +------------------------ +Use this project's GitHub to report any bugs, request features, or submit pull +requests. If you want to discuss this project, or maybe just hang out with +the developer, feel free to join me at irc://irc.janouch.name, channel #dev. + +License +------- +'bfc' is written by Přemysl Janouch . + +You may use the software under the terms of the ISC license, the text of which +is included within the package, or, at your option, you may relicense the work +under the MIT or the Modified BSD License, as listed at the following site: + +http://www.gnu.org/licenses/license-list.html diff --git a/bfc-amd64-linux.c b/bfc-amd64-linux.c new file mode 100644 index 0000000..039e224 --- /dev/null +++ b/bfc-amd64-linux.c @@ -0,0 +1,723 @@ +// This is an exercise in futility more than anything else +#include +#include +#include +#include +#include +#include +#include + +#define exit_fatal(...) \ + do { \ + fprintf (stderr, "fatal: " __VA_ARGS__); \ + exit (EXIT_FAILURE); \ + } while (0) + +// --- Safe memory management -------------------------------------------------- + +static void * +xcalloc (size_t m, size_t n) +{ + void *p = calloc (m, n); + if (!p) + exit_fatal ("calloc: %s\n", strerror (errno)); + return p; +} + +static void * +xrealloc (void *o, size_t n) +{ + void *p = realloc (o, n); + if (!p && n) + exit_fatal ("realloc: %s\n", strerror (errno)); + return p; +} + +// --- Dynamically allocated strings ------------------------------------------- + +struct str +{ + char *str; ///< String data, null terminated + size_t alloc; ///< How many bytes are allocated + size_t len; ///< How long the string actually is +}; + +static void +str_init (struct str *self) +{ + self->len = 0; + self->str = xcalloc (1, (self->alloc = 16)); +} + +static void +str_ensure_space (struct str *self, size_t n) +{ + // We allocate at least one more byte for the terminating null character + size_t new_alloc = self->alloc; + while (new_alloc <= self->len + n) + new_alloc <<= 1; + if (new_alloc != self->alloc) + self->str = xrealloc (self->str, (self->alloc = new_alloc)); +} + +static void +str_append_data (struct str *self, const void *data, size_t n) +{ + str_ensure_space (self, n); + memcpy (self->str + self->len, data, n); + self->str[self->len += n] = '\0'; +} + +static void +str_append_c (struct str *self, char c) +{ + str_append_data (self, &c, 1); +} + +// --- Application ------------------------------------------------------------- + +enum command +{ + RIGHT, LEFT, INC, DEC, IN, OUT, BEGIN, END, + SET, EAT, INCACC, DECACC +}; + +bool grouped[] = { 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 }; +struct instruction { enum command cmd; int offset; size_t arg; }; +#define INSTRUCTION(c, o, a) (struct instruction) { (c), (o), (a) } + +// - - Debugging - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#ifdef DEBUG +static void +debug_dump_instruction (FILE *fp, const struct instruction *in) +{ + const char *name; + switch (in->cmd) + { + case RIGHT: name = "RIGHT "; break; + case LEFT: name = "LEFT "; break; + case INC: name = "INC "; break; + case DEC: name = "DEC "; break; + case OUT: name = "OUT "; break; + case IN: name = "IN "; break; + case BEGIN: name = "BEGIN "; break; + case END: name = "END "; break; + case SET: name = "SET "; break; + case EAT: name = "EAT "; break; + case INCACC: name = "INCACC"; break; + case DECACC: name = "DECACC"; break; + } + fprintf (fp, "%s %zu", name, in->arg); + if (in->offset != 0) + fprintf (fp, " [%d]", in->offset); + fprintf (fp, "\n"); +} + +static void +debug_dump (const char *filename, struct instruction *in, size_t len) +{ + FILE *fp = fopen (filename, "w"); + long indent = 0; + for (size_t i = 0; i < len; i++) + { + if (in[i].cmd == END) + indent--; + for (long k = 0; k < indent; k++) + fputs (" ", fp); + debug_dump_instruction (fp, &in[i]); + if (in[i].cmd == BEGIN) + indent++; + } + fclose (fp); +} +#else +#define debug_dump(...) +#endif + +// - - Optimization passes - - - - - - - - - - - - - - - - - - - - - - - - - - - + +static size_t +optimize_assignment (struct instruction *irb, size_t irb_len) +{ + size_t in = 0, out = 0; + for (; in < irb_len; in++, out++) + { + if (in + 2 < irb_len + && irb[in ].cmd == BEGIN + && irb[in + 1].cmd == DEC && irb[in + 1].arg == 1 + && irb[in + 2].cmd == END) + { + irb[out] = INSTRUCTION (SET, 0, 0); + in += 2; + } + else if (out && irb[out - 1].cmd == SET && irb[in].cmd == INC) + irb[--out].arg += irb[in].arg; + else if (out != in) + irb[out] = irb[in]; + } + return out; +} + +// Add offsets to INC/DEC/SET stuck between LEFT/RIGHT +// and compress the LEFT/RIGHT sequences +static size_t +optimize_offseted_inc_dec (struct instruction *irb, size_t irb_len) +{ + size_t in = 0, out = 0; + for (in = 0, out = 0; in < irb_len; in++, out++) + { + intptr_t dir = 0; + if (irb[in].cmd == RIGHT) + dir = irb[in].arg; + else if (irb[in].cmd == LEFT) + dir = -(intptr_t) irb[in].arg; + else + { + irb[out] = irb[in]; + continue; + } + + while (in + 2 < irb_len) + { + // An immediate offset has its limits on x86-64 + if (dir < INT8_MIN || dir > INT8_MAX) + break; + + intptr_t diff; + if (irb[in + 2].cmd == RIGHT) + diff = irb[in + 2].arg; + else if (irb[in + 2].cmd == LEFT) + diff = -(intptr_t) irb[in + 2].arg; + else + break; + + int cmd = irb[in + 1].cmd; + if (cmd != INC && cmd != DEC && cmd != SET) + break; + + irb[out] = irb[in + 1]; + irb[out].offset = dir; + + dir += diff; + out += 1; + in += 2; + } + + for (; in + 1 < irb_len; in++) + { + if (irb[in + 1].cmd == RIGHT) + dir += irb[in + 1].arg; + else if (irb[in + 1].cmd == LEFT) + dir -= (intptr_t) irb[in + 1].arg; + else + break; + } + + if (!dir) + out--; + else if (dir > 0) + irb[out] = INSTRUCTION (RIGHT, 0, dir); + else + irb[out] = INSTRUCTION (LEFT, 0, -dir); + } + return out; +} + +// Try to eliminate loops that eat a cell and add/subtract its value +// to/from some other cell +static size_t +optimize_inc_dec_loops (struct instruction *irb, size_t irb_len) +{ + size_t in = 0, out = 0; + for (in = 0, out = 0; in < irb_len; in++, out++) + { + irb[out] = irb[in]; + if (irb[in].cmd != BEGIN) + continue; + + bool ok = false; + size_t count = 0; + for (size_t k = in + 1; k < irb_len; k++) + { + if (irb[k].cmd == END) + { + ok = true; + break; + } + if (irb[k].cmd != INC + && irb[k].cmd != DEC) + break; + count++; + } + if (!ok) + continue; + + // Stable sort operations by their offsets, put [0] first + bool sorted; + do + { + sorted = true; + for (size_t k = 1; k < count; k++) + { + if (irb[in + k].offset == 0) + continue; + if (irb[in + k + 1].offset != 0 + && irb[in + k].offset <= irb[in + k + 1].offset) + continue; + + struct instruction tmp = irb[in + k + 1]; + irb[in + k + 1] = irb[in + k]; + irb[in + k] = tmp; + sorted = false; + } + } + while (!sorted); + + // Abort the optimization on duplicate offsets (complication with [0]) + for (size_t k = 1; k < count; k++) + if (irb[in + k].offset == irb[in + k + 1].offset) + ok = false; + // XXX: can't make the code longer either + for (size_t k = 1; k <= count; k++) + if (irb[in + k].arg != 1) + ok = false; + if (!ok + || irb[in + 1].cmd != DEC + || irb[in + 1].offset != 0) + continue; + + int min_safe_left_offset = 0; + if (in > 1 && irb[in - 1].cmd == RIGHT) + min_safe_left_offset = -irb[in - 1].arg; + + bool cond_needed_for_safety = false; + for (size_t k = 0; k < count; k++) + if (irb[in + k + 1].offset < min_safe_left_offset) + { + cond_needed_for_safety = true; + break; + } + + in++; + if (cond_needed_for_safety) + out++; + + irb[out] = INSTRUCTION (EAT, 0, 0); + for (size_t k = 1; k < count; k++) + irb[out + k] = INSTRUCTION (irb[in + k].cmd == INC + ? INCACC : DECACC, irb[in + k].offset, 0); + + in += count; + out += count; + + if (cond_needed_for_safety) + irb[out] = INSTRUCTION (END, 0, 0); + else + out--; + } + return out; +} + +// - - Loop pairing - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +static void +pair_loops (struct instruction *irb, size_t irb_len) +{ + size_t nesting = 0; + size_t *stack = xcalloc (sizeof *stack, irb_len); + for (size_t i = 0; i < irb_len; i++) + { + switch (irb[i].cmd) + { + case BEGIN: + stack[nesting++] = i; + break; + case END: + if (nesting <= 0) + exit_fatal ("unbalanced loops\n"); + + --nesting; + irb[stack[nesting]].arg = i + 1; + + // Looping can be disabled by optimizations + if (irb[i].arg) + irb[i].arg = stack[nesting] + 1; + default: + break; + } + } + free (stack); + + if (nesting != 0) + exit_fatal ("unbalanced loops\n"); +} + +// - - Main - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +int +main (int argc, char *argv[]) +{ + if (argc > 3) + exit_fatal ("usage: %s [INPUT-FILE]\n", argv[0]); + + FILE *input_file = stdin; + if (argc > 1 && !(input_file = fopen (argv[1], "r"))) + exit_fatal ("fopen: %s: %s\n", argv[1], strerror (errno)); + + const char *output_path = "a.out"; + if (argc > 2) + output_path = argv[2]; + + struct str buffer; + str_init (&buffer); + + int c; + while ((c = fgetc (input_file)) != EOF) + str_append_c (&buffer, c); + if (ferror (input_file)) + exit_fatal ("can't read program\n"); + fclose (input_file); + +// - - Decode, group and optimize - - - - - - - - - - - - - - - - - - - - - - - + + // This is our Intermediate Representation Buffer + struct instruction *irb = xcalloc (sizeof *irb, buffer.len); + size_t irb_len = 0; + + for (size_t i = 0; i < buffer.len; i++) + { + enum command cmd; + switch (buffer.str[i]) + { + case '>': cmd = RIGHT; break; + case '<': cmd = LEFT; break; + case '+': cmd = INC; break; + case '-': cmd = DEC; break; + case '.': cmd = OUT; break; + case ',': cmd = IN; break; + case '[': cmd = BEGIN; break; + case ']': cmd = END; break; + default: continue; + } + + // The most basic optimization is to group identical commands together + if (!irb_len || !grouped[cmd] || irb[irb_len - 1].cmd != cmd) + irb_len++; + + irb[irb_len - 1].cmd = cmd; + irb[irb_len - 1].arg++; + } + + debug_dump ("bf-no-opt.txt", irb, irb_len); + irb_len = optimize_assignment (irb, irb_len); + debug_dump ("bf-pre-offsets.txt", irb, irb_len); + irb_len = optimize_offseted_inc_dec (irb, irb_len); + debug_dump ("bf-pre-incdec-unloop.txt", irb, irb_len); + irb_len = optimize_inc_dec_loops (irb, irb_len); + debug_dump ("bf-optimized.txt", irb, irb_len); + pair_loops (irb, irb_len); + debug_dump ("bf-final.txt", irb, irb_len); + +// - - Code generation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + str_init (&buffer); + size_t *offsets = xcalloc (sizeof *offsets, irb_len + 1); + bool *sets_flags = xcalloc (sizeof *sets_flags, irb_len); + +#define CODE(x) { char t[] = x; str_append_data (&buffer, t, sizeof t - 1); } +#define LE(v) (uint8_t[]) { v, v>>8, v>>16, v>>24, v>>32, v>>40, v>>48, v>>56 } +#define DB(x) { uint64_t v = (x); str_append_data (&buffer, LE (v), 1); } +#define DW(x) { uint64_t v = (x); str_append_data (&buffer, LE (v), 2); } +#define DD(x) { uint64_t v = (x); str_append_data (&buffer, LE (v), 4); } +#define DQ(x) { uint64_t v = (x); str_append_data (&buffer, LE (v), 8); } + + enum + { + ELF_LOAD_CODE = 0x400000, // where code is loaded (usual) + ELF_LOAD_DATA = 0x800000 // where the tape is placed + }; + + CODE ("\xB8") DD (ELF_LOAD_DATA) // mov rax, "ELF_LOAD_DATA" + CODE ("\x30\xDB") // xor bl, bl + + for (size_t i = 0; i < irb_len; i++) + { + offsets[i] = buffer.len; + + size_t arg = irb[i].arg; + assert (arg <= UINT32_MAX); + + int offset = irb[i].offset; + assert (offset <= INT8_MAX && offset >= INT8_MIN); + + // Don't save what we've just loaded + if (irb[i].cmd == LEFT || irb[i].cmd == RIGHT) + if (i < 2 || i + 1 >= irb_len + || (irb[i - 2].cmd != LEFT && irb[i - 2].cmd != RIGHT) + || irb[i - 1].cmd != BEGIN + || irb[i + 1].cmd != END) + CODE ("\x88\x18") // mov [rax], bl + + switch (irb[i].cmd) + { + case RIGHT: + // add rax, "arg" -- optimistic, no boundary checking + if (arg > INT8_MAX) { CODE ("\x48\x05") DD (arg) } + else { CODE ("\x48\x83\xC0") DB (arg) } + break; + case LEFT: + // sub rax, "arg" -- optimistic, no boundary checking + if (arg > INT8_MAX) { CODE ("\x48\x2D") DD (arg) } + else { CODE ("\x48\x83\xE8") DB (arg) } + break; + + case EAT: + // NOTE: the kernel destroys rcx and r11 on syscalls, + // there must be no OUT or IN between EAT and INCACC/DECACC + CODE ("\x88\xD9" "\x30\xDB") // mov cl, bl; xor bl, bl + sets_flags[i] = true; + break; + case INCACC: + if (offset) + { + CODE ("\x00\x48") DB (offset) // add [rax+"offset"], cl + } + else + { + CODE ("\x00\xCB") // add bl, cl + sets_flags[i] = true; + } + break; + case DECACC: + if (offset) + { + CODE ("\x28\x48") DB (offset) // sub [rax+"offset"], cl + } + else + { + CODE ("\x28\xCB") // sub bl, cl + sets_flags[i] = true; + } + break; + + case INC: + if (offset) + { + CODE ("\x80\x40") DB (offset) // add byte [rax+"offset"], "arg" + } + else + { + CODE ("\x80\xC3") // add bl, "arg" + sets_flags[i] = true; + } + DB (arg) + break; + case DEC: + if (offset) + { + CODE ("\x80\x68") DB (offset) // sub byte [rax+"offset"], "arg" + } + else + { + CODE ("\x80\xEB") // sub bl, "arg" + sets_flags[i] = true; + } + DB (arg) + break; + case SET: + if (offset) + { + CODE ("\xC6\x40") DB (offset) // mov byte [rax+"offset"], "arg" + } + else + CODE ("\xB3") // mov bl, "arg" + DB (arg) + break; + + case OUT: + CODE ("\xE8") DD (0) // call "write" + break; + case IN: + CODE ("\xE8") DD (0) // call "read" + break; + + case BEGIN: + // Don't test the register when the flag has been set already; + // this doesn't have much of an effect in practice + if (!i || !sets_flags[i - 1]) + CODE ("\x84\xDB") // test bl, bl + CODE ("\x0F\x84\x00\x00\x00\x00") // jz "offsets[arg]" + break; + case END: + // We know that the cell is zero, make this an "if", not a "loop"; + // this doesn't have much of an effect in practice + if (!arg) + break; + + if (!i || !sets_flags[i - 1]) + CODE ("\x84\xDB") // test bl, bl + CODE ("\x0F\x85\x00\x00\x00\x00") // jnz "offsets[arg]" + break; + } + + // No sense in reading it out when we overwrite it immediately; + // this doesn't have much of an effect in practice + if (irb[i].cmd == LEFT || irb[i].cmd == RIGHT) + if (i + 1 >= irb_len + || irb[i + 1].cmd != SET + || irb[i + 1].offset != 0) + CODE ("\x8A\x18") // mov bl, [rax] + } + // When there is a loop at the end we need to be able to jump past it + offsets[irb_len] = buffer.len; + + // Write an epilog which handles all the OS interfacing + // + // System V x86-64 ABI: + // rax <-> both syscall number and return value + // args -> rdi, rsi, rdx, r10, r8, r9 + // trashed <- rcx, r11 + + enum { SYS_READ = 0, SYS_WRITE = 1, SYS_EXIT = 60 }; + + CODE ("\xB8") DD (SYS_EXIT) // mov eax, 0x3c + CODE ("\x48\x31\xFF") // xor rdi, rdi + CODE ("\x0F\x05") // syscall + + size_t fatal_offset = buffer.len; + CODE ("\x48\x89\xF7") // mov rdi, rsi -- use the string in rsi + CODE ("\x30\xC0") // xor al, al -- look for the nil byte + CODE ("\x48\x31\xC9") // xor rcx, rcx + CODE ("\x48\xF7\xD1") // not rcx -- start from -1 + CODE ("\xFC" "\xF2\xAE") // cld; repne scasb -- decrement until found + CODE ("\x48\xF7\xD1") // not rcx + CODE ("\x48\x8D\x51\xFF") // lea rdx, [rcx-1] -- save length in rdx + CODE ("\xB8") DD (SYS_WRITE) // mov eax, "SYS_WRITE" + CODE ("\xBF") DD (2) // mov edi, "STDERR_FILENO" + CODE ("\x0F\x05") // syscall + + CODE ("\xB8") DD (SYS_EXIT) // mov eax, "SYS_EXIT" + CODE ("\xBF") DD (1) // mov edi, "EXIT_FAILURE" + CODE ("\x0F\x05") // syscall + + size_t read_offset = buffer.len; + CODE ("\x50") // push rax -- save tape position + CODE ("\xB8") DD (SYS_READ) // mov eax, "SYS_READ" + CODE ("\x48\x89\xC7") // mov rdi, rax -- STDIN_FILENO + CODE ("\x66\x6A\x00") // push word 0 -- the default value for EOF + CODE ("\x48\x89\xE6") // mov rsi, rsp -- the char starts at rsp + CODE ("\xBA") DD (1) // mov edx, 1 -- count + CODE ("\x0F\x05") // syscall + CODE ("\x66\x5B") // pop bx + + CODE ("\x48\x83\xF8\x00") // cmp rax, 0 + CODE ("\x48\x8D\x35") DD (4) // lea rsi, [rel read_message] + CODE ("\x7C") // jl "fatal_offset" -- write failure message + DB ((intptr_t) fatal_offset - (intptr_t) (buffer.len + 1)) + CODE ("\x58") // pop rax -- restore tape position + CODE ("\xC3") // ret + CODE ("fatal: read failed\n\0") + + size_t write_offset = buffer.len; + CODE ("\x50") // push rax -- save tape position + CODE ("\xB8") DD (SYS_WRITE) // mov eax, "SYS_WRITE" + CODE ("\x48\x89\xC7") // mov rdi, rax -- STDOUT_FILENO + CODE ("\x66\x53") // push bx + CODE ("\x48\x89\xE6") // mov rsi, rsp -- the char starts at rsp + CODE ("\xBA") DD (1) // mov edx, 1 -- count + CODE ("\x0F\x05") // syscall + CODE ("\x66\x5B") // pop bx + + CODE ("\x48\x83\xF8\x00") // cmp rax, 0 + CODE ("\x48\x8D\x35") DD (4) // lea rsi, [rel write_message] + CODE ("\x7C") // jl "fatal_offset" -- write failure message + DB ((intptr_t) fatal_offset - (intptr_t) (buffer.len + 1)) + CODE ("\x58") // pop rax -- restore tape position + CODE ("\xC3") // ret + CODE ("fatal: write failed\n\0") + + // Now that we know where each instruction is, fill in relative jumps + for (size_t i = 0; i < irb_len; i++) + { + if (!irb[i].arg) + continue; + + // This must accurately reflect the code generators + intptr_t target, fixup = offsets[i]; + if (irb[i].cmd == BEGIN || irb[i].cmd == END) + { + fixup += (i && sets_flags[i - 1]) ? 2 : 4; + target = offsets[irb[i].arg]; + } + else if (irb[i].cmd == IN) { fixup++; target = read_offset; } + else if (irb[i].cmd == OUT) { fixup++; target = write_offset; } + else continue; + + uint64_t v = target - (fixup + 4); + memcpy (buffer.str + fixup, LE (v), 4); + } + free (offsets); + free (sets_flags); + +// - - Output - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + // Now that we know how long the machine code is, we can write the header. + // Note that for PIE we would need to depend on the dynamic linker, so no. + // + // Recommended reading: + // http://www.muppetlabs.com/~breadbox/software/tiny/teensy.html + // man 5 elf + + struct str code = buffer; + str_init (&buffer); + + enum + { + ELF_HEADER_SIZE = 64, // size of the ELF header + ELF_PROGRAM_ENTRY_SIZE = 56, // size of a program header + ELF_META_SIZE = ELF_HEADER_SIZE + 2 * ELF_PROGRAM_ENTRY_SIZE + }; + + // ELF header + CODE ("\x7F" "ELF\x02\x01\x01") // ELF, 64-bit, little endian, v1 + CODE ("\x00\x00" "\0\0\0\0\0\0\0") // Unix System V ABI, v0, padding + DW (2) DW (62) DD (1) // executable, x86-64, v1 + DQ (ELF_LOAD_CODE + ELF_META_SIZE) // entry point address + DQ (ELF_HEADER_SIZE) DQ (0) // program, section header offset + DD (0) // no processor-specific flags + DW (ELF_HEADER_SIZE) // ELF header size + DW (ELF_PROGRAM_ENTRY_SIZE) DW (2) // program hdr tbl entry size, count + DW (0) DW (0) // section hdr tbl entry size, count + DW (0) // no section index for strings + + // Program header for code + // The entry point address seems to require alignment, so map start of file + DD (1) DD (5) // PT_LOAD, PF_R | PF_X + DQ (0) // offset within the file + DQ (ELF_LOAD_CODE) // address in virtual memory + DQ (ELF_LOAD_CODE) // address in physical memory + DQ (code.len + ELF_META_SIZE) // length within the file + DQ (code.len + ELF_META_SIZE) // length within memory + DQ (4096) // segment alignment + + // Program header for the tape + DD (1) DD (6) // PT_LOAD, PF_R | PF_W + DQ (0) // offset within the file + DQ (ELF_LOAD_DATA) // address in virtual memory + DQ (ELF_LOAD_DATA) // address in physical memory + DQ (0) // length within the file + DQ (1 << 20) // one megabyte of memory + DQ (4096) // segment alignment + + // The section header table is optional and we don't need it for anything + + FILE *output_file; + if (!(output_file = fopen (output_path, "w"))) + exit_fatal ("fopen: %s: %s\n", output_path, strerror (errno)); + + fwrite (buffer.str, buffer.len, 1, output_file); + fwrite (code.str, code.len, 1, output_file); + fclose (output_file); + return 0; +} diff --git a/interpreters/Makefile b/interpreters/Makefile new file mode 100644 index 0000000..3e73cf5 --- /dev/null +++ b/interpreters/Makefile @@ -0,0 +1,14 @@ +CC = c99 +CFLAGS = -O3 + +NAMES = bf bf-faster-loops bf-optimizing \ + bf-jit bf-jit-opt bf-jit-unsafe bf-jit-unsafe-opt + +all: $(NAMES) + +%: %.c + $(CC) $(CPPFLAGS) $(CFLAGS) $< -o $@ +clean: + rm -f $(NAMES) + +.PHONY: all clean diff --git a/interpreters/README.adoc b/interpreters/README.adoc new file mode 100644 index 0000000..ecde608 --- /dev/null +++ b/interpreters/README.adoc @@ -0,0 +1,15 @@ +This directory contains several Brainfuck interpreters in various states of +sophistication, from the simplest approach to an optimizing JIT compiler: + + * `bf.c` is the stupidest one and the oldest by far + * `bf-faster-loops.c` precomputes loop jumps + * `bf-optimizing.c` improves on that by changing `[-]+` loops into assignments + * `bf-jit.c` adds JIT compilation for Intel x86-64 + * `bf-jit-opt.c` tries a bit harder to avoid looping on the current value + * `bf-jit-unsafe.c` abolishes all boundary checks when moving across the tape + * `bf-jit-unsafe-opt.c` makes use of immediate offsets to modify values + +I recommend using a tool such as 'meld' to view the differences. + +Just run `make` in this directory to have them all built, and append +`CPPFLAGS=-DDEBUG` to get dumps of the IR for the more sophisticated JITs. diff --git a/interpreters/bf-faster-loops.c b/interpreters/bf-faster-loops.c new file mode 100644 index 0000000..e301d95 --- /dev/null +++ b/interpreters/bf-faster-loops.c @@ -0,0 +1,151 @@ +#include +#include +#include +#include +#include +#include + +#define exit_fatal(...) \ + do { \ + fprintf (stderr, "fatal: " __VA_ARGS__); \ + exit (EXIT_FAILURE); \ + } while (0) + +// --- Safe memory management -------------------------------------------------- + +static void * +xmalloc (size_t n) +{ + void *p = malloc (n); + if (!p) + exit_fatal ("malloc: %s\n", strerror (errno)); + return p; +} + +static void * +xrealloc (void *o, size_t n) +{ + void *p = realloc (o, n); + if (!p && n) + exit_fatal ("realloc: %s\n", strerror (errno)); + return p; +} + +// --- Dynamically allocated strings ------------------------------------------- + +struct str +{ + char *str; ///< String data, null terminated + size_t alloc; ///< How many bytes are allocated + size_t len; ///< How long the string actually is +}; + +static void +str_init (struct str *self) +{ + self->alloc = 16; + self->len = 0; + self->str = strcpy (xmalloc (self->alloc), ""); +} + +static void +str_ensure_space (struct str *self, size_t n) +{ + // We allocate at least one more byte for the terminating null character + size_t new_alloc = self->alloc; + while (new_alloc <= self->len + n) + new_alloc <<= 1; + if (new_alloc != self->alloc) + self->str = xrealloc (self->str, (self->alloc = new_alloc)); +} + +static void +str_append_data (struct str *self, const void *data, size_t n) +{ + str_ensure_space (self, n); + memcpy (self->str + self->len, data, n); + self->str[self->len += n] = '\0'; +} + +static void +str_append_c (struct str *self, char c) +{ + str_append_data (self, &c, 1); +} + +// --- Main -------------------------------------------------------------------- + +int +main (int argc, char *argv[]) +{ + struct str program; str_init (&program); + struct str data; str_init (&data); + + int c; + while ((c = fgetc (stdin)) != EOF) + str_append_c (&program, c); + if (ferror (stdin)) + exit_fatal ("can't read program\n"); + + FILE *input = fopen ("/dev/tty", "rb"); + if (!input) + exit_fatal ("can't open terminal for reading\n"); + + size_t *pairs = xmalloc (sizeof *pairs * program.len); + size_t *stack = xmalloc (sizeof *stack * program.len); + + size_t nesting = 0; + for (size_t i = 0; i < program.len; i++) + { + switch (program.str[i]) + { + case '[': + stack[nesting++] = i; + break; + case ']': + assert (nesting > 0); + + --nesting; + pairs[stack[nesting]] = i; + pairs[i] = stack[nesting]; + } + } + assert (nesting == 0); + + size_t dataptr = 0; + str_append_c (&data, 0); + + for (size_t i = 0; i < program.len; i++) + { + switch (program.str[i]) + { + case '>': + assert (dataptr != SIZE_MAX); + if (++dataptr == data.len) + str_append_c (&data, 0); + break; + case '<': + assert (dataptr != 0); + dataptr--; + break; + + case '+': data.str[dataptr]++; break; + case '-': data.str[dataptr]--; break; + + case '.': + fputc (data.str[dataptr], stdout); + break; + case ',': + data.str[dataptr] = c = fgetc (input); + assert (c != EOF); + break; + + case '[': if (!data.str[dataptr]) i = pairs[i]; break; + case ']': if ( data.str[dataptr]) i = pairs[i]; break; + + default: + break; + } + } + return 0; +} diff --git a/interpreters/bf-jit-opt.c b/interpreters/bf-jit-opt.c new file mode 100644 index 0000000..3b24ad3 --- /dev/null +++ b/interpreters/bf-jit-opt.c @@ -0,0 +1,495 @@ +// This is an exercise in futility more than anything else +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include + +#if (defined __x86_64__ || defined __amd64__) && defined __unix__ + #include +#else + #error Platform not supported +#endif + +#define exit_fatal(...) \ + do { \ + fprintf (stderr, "fatal: " __VA_ARGS__); \ + exit (EXIT_FAILURE); \ + } while (0) + +// --- Safe memory management -------------------------------------------------- + +static void * +xcalloc (size_t m, size_t n) +{ + void *p = calloc (m, n); + if (!p) + exit_fatal ("calloc: %s\n", strerror (errno)); + return p; +} + +static void * +xrealloc (void *o, size_t n) +{ + void *p = realloc (o, n); + if (!p && n) + exit_fatal ("realloc: %s\n", strerror (errno)); + return p; +} + +// --- Dynamically allocated strings ------------------------------------------- + +struct str +{ + char *str; ///< String data, null terminated + size_t alloc; ///< How many bytes are allocated + size_t len; ///< How long the string actually is +}; + +static void +str_init (struct str *self) +{ + self->len = 0; + self->str = xcalloc (1, (self->alloc = 16)); +} + +static void +str_ensure_space (struct str *self, size_t n) +{ + // We allocate at least one more byte for the terminating null character + size_t new_alloc = self->alloc; + while (new_alloc <= self->len + n) + new_alloc <<= 1; + if (new_alloc != self->alloc) + self->str = xrealloc (self->str, (self->alloc = new_alloc)); +} + +static void +str_append_data (struct str *self, const void *data, size_t n) +{ + str_ensure_space (self, n); + memcpy (self->str + self->len, data, n); + self->str[self->len += n] = '\0'; +} + +static void +str_append_c (struct str *self, char c) +{ + str_append_data (self, &c, 1); +} + +// --- Application ------------------------------------------------------------- + +struct str data; ///< Data tape +volatile size_t dataptr; ///< Current location on the tape +FILE *input; ///< User input + +enum command { RIGHT, LEFT, INC, DEC, SET, IN, OUT, BEGIN, END, + EAT, INCACC, DECACC }; +bool grouped[] = { 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 }; +struct instruction { enum command cmd; size_t arg; }; +#define INSTRUCTION(c, a) (struct instruction) { (c), (a) } + +// - - Callbacks - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +// Some things I just really don't want to write in assembly even though it +// is effectively a big performance hit, eliminating the advantage of JIT + +static void +right (size_t arg) +{ + assert (SIZE_MAX - dataptr > arg); + dataptr += arg; + + while (dataptr >= data.len) + str_append_c (&data, 0); +} + +static void +left (size_t arg) +{ + assert (dataptr >= arg); + dataptr -= arg; +} + +static void +cin (void) +{ + int c; + data.str[dataptr] = c = fgetc (input); + assert (c != EOF); +} + +// - - Main - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#ifdef DEBUG +static void +debug_dump (const char *filename, struct instruction *in, size_t len) +{ + FILE *fp = fopen (filename, "w"); + long indent = 0; + for (size_t i = 0; i < len; i++) + { + if (in[i].cmd == END) + indent--; + for (long k = 0; k < indent; k++) + fprintf (fp, " "); + + switch (in[i].cmd) + { + case RIGHT: fprintf (fp, "RIGHT %zu\n", in[i].arg); break; + case LEFT: fprintf (fp, "LEFT %zu\n", in[i].arg); break; + case INC: fprintf (fp, "INC %zu\n", in[i].arg); break; + case DEC: fprintf (fp, "DEC %zu\n", in[i].arg); break; + case OUT: fprintf (fp, "OUT %zu\n", in[i].arg); break; + case IN: fprintf (fp, "IN %zu\n", in[i].arg); break; + case BEGIN: fprintf (fp, "BEGIN %zu\n", in[i].arg); break; + case END: fprintf (fp, "END %zu\n", in[i].arg); break; + case SET: fprintf (fp, "SET %zu\n", in[i].arg); break; + case EAT: fprintf (fp, "EAT %zu\n", in[i].arg); break; + case INCACC: fprintf (fp, "INCACC %zu\n", in[i].arg); break; + case DECACC: fprintf (fp, "DECACC %zu\n", in[i].arg); break; + } + if (in[i].cmd == BEGIN) + indent++; + } + fclose (fp); +} +#else +#define debug_dump(...) +#endif + +int +main (int argc, char *argv[]) +{ + (void) argc; + (void) argv; + + struct str program; + str_init (&program); + + int c; + while ((c = fgetc (stdin)) != EOF) + str_append_c (&program, c); + if (ferror (stdin)) + exit_fatal ("can't read program\n"); + if (!(input = fopen ("/dev/tty", "rb"))) + exit_fatal ("can't open terminal for reading\n"); + +// - - Decode and group - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + struct instruction *parsed = xcalloc (sizeof *parsed, program.len); + size_t parsed_len = 0; + + for (size_t i = 0; i < program.len; i++) + { + enum command cmd; + switch (program.str[i]) + { + case '>': cmd = RIGHT; break; + case '<': cmd = LEFT; break; + case '+': cmd = INC; break; + case '-': cmd = DEC; break; + case '.': cmd = OUT; break; + case ',': cmd = IN; break; + case '[': cmd = BEGIN; break; + case ']': cmd = END; break; + default: continue; + } + + // The most basic optimization is to group identical commands together + if (!parsed_len || !grouped[cmd] || parsed[parsed_len - 1].cmd != cmd) + parsed_len++; + + parsed[parsed_len - 1].cmd = cmd; + parsed[parsed_len - 1].arg++; + } + +// - - Optimization passes - - - - - - - - - - - - - - - - - - - - - - - - - - - + + debug_dump ("bf-no-opt.txt", parsed, parsed_len); + + size_t in = 0, out = 0; + for (; in < parsed_len; in++, out++) + { + // This shows up in mandelbrot.bf a lot but actually helps hanoi.bf + if (in + 5 < parsed_len + && parsed[in].cmd == BEGIN && parsed[in + 5].cmd == END + && parsed[in + 1].cmd == DEC && parsed[in + 1].arg == 1 + + && parsed[in + 2].cmd == LEFT && parsed[in + 4].cmd == RIGHT + && parsed[in + 2].arg == parsed[in + 4].arg + + && (parsed[in + 3].cmd == INC || parsed[in + 3].cmd == DEC) + && parsed[in + 3].arg == 1) + { + // This mustn't make the move when the cell is zero already + parsed[out] = parsed[in]; + parsed[out + 1] = INSTRUCTION (EAT, 0); + parsed[out + 2] = parsed[in + 2]; + parsed[out + 3] = INSTRUCTION + (parsed[in + 3].cmd == INC ? INCACC : DECACC, 0); + parsed[out + 4] = parsed[in + 4]; + // This disables the looping further in the code; + // this doesn't have much of an effect in practice + parsed[out + 5] = INSTRUCTION (END, 0); + in += 5; + out += 5; + } + // The simpler case that cannot crash and thus can avoid the loop + else if (in + 5 < parsed_len + && parsed[in].cmd == BEGIN && parsed[in + 5].cmd == END + && parsed[in + 1].cmd == DEC && parsed[in + 1].arg == 1 + + && parsed[in + 2].cmd == RIGHT && parsed[in + 4].cmd == LEFT + && parsed[in + 2].arg == parsed[in + 4].arg + + && (parsed[in + 3].cmd == INC || parsed[in + 3].cmd == DEC) + && parsed[in + 3].arg == 1) + { + parsed[out] = INSTRUCTION (EAT, 0); + parsed[out + 1] = parsed[in + 2]; + parsed[out + 2] = INSTRUCTION + (parsed[in + 3].cmd == INC ? INCACC : DECACC, 0); + parsed[out + 3] = parsed[in + 4]; + in += 5; + out += 3; + } + else if (in + 2 < parsed_len + && parsed[in ].cmd == BEGIN + && parsed[in + 1].cmd == DEC && parsed[in + 1].arg == 1 + && parsed[in + 2].cmd == END) + { + parsed[out] = INSTRUCTION (SET, 0); + in += 2; + } + else if (out && parsed[out - 1].cmd == SET && parsed[in].cmd == INC) + parsed[--out].arg += parsed[in].arg; + else if (out != in) + parsed[out] = parsed[in]; + } + parsed_len = out; + + for (in = 0, out = 0; in < parsed_len; in++, out++) + { + ssize_t dir = 0; + if (parsed[in].cmd == RIGHT) + dir = parsed[in].arg; + else if (parsed[in].cmd == LEFT) + dir = -(ssize_t) parsed[in].arg; + else + { + parsed[out] = parsed[in]; + continue; + } + + for (; in + 1 < parsed_len; in++) + { + if (parsed[in + 1].cmd == RIGHT) + dir += parsed[in + 1].arg; + else if (parsed[in + 1].cmd == LEFT) + dir -= (ssize_t) parsed[in + 1].arg; + else + break; + } + + if (!dir) + out--; + else if (dir > 0) + parsed[out] = INSTRUCTION (RIGHT, dir); + else + parsed[out] = INSTRUCTION (LEFT, -dir); + } + parsed_len = out; + + debug_dump ("bf-optimized.txt", parsed, parsed_len); + +// - - Loop pairing - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + size_t nesting = 0; + size_t *stack = xcalloc (sizeof *stack, parsed_len); + for (size_t i = 0; i < parsed_len; i++) + { + switch (parsed[i].cmd) + { + case BEGIN: + stack[nesting++] = i; + break; + case END: + assert (nesting > 0); + + --nesting; + parsed[stack[nesting]].arg = i + 1; + + // Looping can be disabled by optimizations + if (parsed[i].arg) + parsed[i].arg = stack[nesting] + 1; + default: + break; + } + } + free (stack); + assert (nesting == 0); + + debug_dump ("bf-final.txt", parsed, parsed_len); + +// - - JIT - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + // Functions preserve the registers rbx, rsp, rbp, r12, r13, r14, and r15; + // while rax, rdi, rsi, rdx, rcx, r8, r9, r10, r11 are scratch registers. + + str_init (&program); + size_t *offsets = xcalloc (sizeof *offsets, parsed_len + 1); + uint8_t *arith = xcalloc (sizeof *arith, parsed_len); + +#define CODE(x) { char t[] = x; str_append_data (&program, t, sizeof t - 1); } +#define WORD(x) { size_t t = (size_t)(x); str_append_data (&program, &t, 8); } +#define DWRD(x) { size_t t = (size_t)(x); str_append_data (&program, &t, 4); } + + CODE ("\x49\xBD") WORD (&dataptr) // mov r13, qword "&dataptr" + CODE ("\x49\xBF") WORD (&data.str) // mov r15, qword "&data.str" + CODE ("\x4D\x8B\x37") // mov r14, qword [r15] + CODE ("\x30\xDB") // xor bl, bl + + for (size_t i = 0; i < parsed_len; i++) + { + offsets[i] = program.len; + + size_t arg = parsed[i].arg; + assert (arg <= UINT32_MAX); + switch (parsed[i].cmd) + { + case RIGHT: + CODE ("\x41\x88\x1E") // mov [r14], bl + CODE ("\xBF") DWRD (arg) // mov edi, "arg" + CODE ("\x48\xB8") WORD (right) // mov rax, "right" + CODE ("\xFF\xD0") // call rax + + // The data could get reallocated, so reload the address + CODE ("\x4D\x8B\x37") // mov r14, qword [r15] + CODE ("\x4D\x03\x75\x00") // add r14, [r13] + break; + case LEFT: + CODE ("\x41\x88\x1E") // mov [r14], bl + CODE ("\xBF") DWRD (arg) // mov edi, "arg" + CODE ("\x49\x29\xFE") // sub r14, rdi -- optimistic + CODE ("\x48\xB8") WORD (left) // mov rax, "left" + CODE ("\xFF\xD0") // call rax + break; + + case EAT: + CODE ("\x41\x88\xDC") // mov r12b, bl + CODE ("\x30\xDB") // xor bl, bl + arith[i] = 1; + break; + case INCACC: + CODE ("\x44\x00\xE3") // add bl, r12b + arith[i] = 1; + break; + case DECACC: + CODE ("\x44\x28\xE3") // sub bl, r12b + arith[i] = 1; + break; + + case INC: + CODE ("\x80\xC3") // add bl, "arg" + str_append_c (&program, arg); + arith[i] = 1; + break; + case DEC: + CODE ("\x80\xEB") // sub bl, "arg" + str_append_c (&program, arg); + arith[i] = 1; + break; + case SET: + CODE ("\xB3") // mov bl, "arg" + str_append_c (&program, arg); + break; + + case OUT: + CODE ("\x48\x0F\xB6\xFB") // movzx rdi, bl + CODE ("\x48\xBE") WORD (stdout) // mov rsi, "stdout" + CODE ("\x48\xB8") WORD (fputc) // mov rax, "fputc" + CODE ("\xFF\xD0") // call rax + break; + case IN: + CODE ("\x48\xB8") WORD (cin) // mov rax, "cin" + CODE ("\xFF\xD0") // call rax + CODE ("\x41\x8A\x1E") // mov bl, [r14] + break; + + case BEGIN: + // Don't test the register when the flag has been set already; + // this doesn't have much of an effect in practice + if (!i || !arith[i - 1]) + CODE ("\x84\xDB") // test bl, bl + CODE ("\x0F\x84\x00\x00\x00\x00") // jz "offsets[i]" + break; + case END: + // We know that the cell is zero, make this an "if", not a "loop"; + // this doesn't have much of an effect in practice + if (!arg) + break; + + if (!i || !arith[i - 1]) + CODE ("\x84\xDB") // test bl, bl + CODE ("\x0F\x85\x00\x00\x00\x00") // jnz "offsets[i]" + break; + } + + // No sense in reading it out when we overwrite it immediately; + // this doesn't have much of an effect in practice + if (parsed[i].cmd == LEFT || parsed[i].cmd == RIGHT) + if (i + 1 >= parsed_len + || parsed[i + 1].cmd != SET) + CODE ("\x41\x8A\x1E") // mov bl, [r14] + } + // When there is a loop at the end we need to be able to jump past it + offsets[parsed_len] = program.len; + str_append_c (&program, '\xC3'); // ret + + // Now that we know where each instruction is, fill in relative jumps; + // this must accurately reflect code generators for BEGIN and END + for (size_t i = 0; i < parsed_len; i++) + { + if ((parsed[i].cmd != BEGIN && parsed[i].cmd != END) + || !parsed[i].arg) + continue; + + size_t fixup = offsets[i] + 2; + if (!i || !arith[i - 1]) + fixup += 2; + + *(int32_t *)(program.str + fixup) = + ((intptr_t)(offsets[parsed[i].arg]) - (intptr_t)(fixup + 4)); + } + free (offsets); + free (arith); + +#ifdef DEBUG + FILE *bin = fopen ("bf-jit.bin", "w"); + fwrite (program.str, program.len, 1, bin); + fclose (bin); +#endif + +// - - Runtime - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + // Some systems may have W^X + void *executable = mmap (NULL, program.len, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (!executable) + exit_fatal ("mmap: %s\n", strerror (errno)); + + memcpy (executable, program.str, program.len); + if (mprotect (executable, program.len, PROT_READ | PROT_EXEC)) + exit_fatal ("mprotect: %s\n", strerror (errno)); + + str_init (&data); + str_append_c (&data, 0); + ((void (*) (void)) executable)(); + return 0; +} diff --git a/interpreters/bf-jit-unsafe-opt.c b/interpreters/bf-jit-unsafe-opt.c new file mode 100644 index 0000000..88a7980 --- /dev/null +++ b/interpreters/bf-jit-unsafe-opt.c @@ -0,0 +1,617 @@ +// This is an exercise in futility more than anything else +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include + +#if (defined __x86_64__ || defined __amd64__) && defined __unix__ + #include + #include +#else + #error Platform not supported +#endif + +#define exit_fatal(...) \ + do { \ + fprintf (stderr, "fatal: " __VA_ARGS__); \ + exit (EXIT_FAILURE); \ + } while (0) + +// --- Safe memory management -------------------------------------------------- + +static void * +xcalloc (size_t m, size_t n) +{ + void *p = calloc (m, n); + if (!p) + exit_fatal ("calloc: %s\n", strerror (errno)); + return p; +} + +static void * +xrealloc (void *o, size_t n) +{ + void *p = realloc (o, n); + if (!p && n) + exit_fatal ("realloc: %s\n", strerror (errno)); + return p; +} + +// --- Dynamically allocated strings ------------------------------------------- + +struct str +{ + char *str; ///< String data, null terminated + size_t alloc; ///< How many bytes are allocated + size_t len; ///< How long the string actually is +}; + +static void +str_init (struct str *self) +{ + self->len = 0; + self->str = xcalloc (1, (self->alloc = 16)); +} + +static void +str_ensure_space (struct str *self, size_t n) +{ + // We allocate at least one more byte for the terminating null character + size_t new_alloc = self->alloc; + while (new_alloc <= self->len + n) + new_alloc <<= 1; + if (new_alloc != self->alloc) + self->str = xrealloc (self->str, (self->alloc = new_alloc)); +} + +static void +str_append_data (struct str *self, const void *data, size_t n) +{ + str_ensure_space (self, n); + memcpy (self->str + self->len, data, n); + self->str[self->len += n] = '\0'; +} + +static void +str_append_c (struct str *self, char c) +{ + str_append_data (self, &c, 1); +} + +// --- Application ------------------------------------------------------------- + +enum command { RIGHT, LEFT, INC, DEC, SET, IN, OUT, BEGIN, END, + EAT, INCACC, DECACC }; +bool grouped[] = { 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 }; +struct instruction { enum command cmd; int offset; size_t arg; }; +#define INSTRUCTION(c, o, a) (struct instruction) { (c), (o), (a) } + +// - - Callbacks - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +FILE *input; ///< User input + +static int +cin (void) +{ + int c = fgetc (input); + assert (c != EOF); + return c; +} + +// - - Main - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#ifdef DEBUG +static void +debug_dump (const char *filename, struct instruction *in, size_t len) +{ + FILE *fp = fopen (filename, "w"); + long indent = 0; + for (size_t i = 0; i < len; i++) + { + if (in[i].cmd == END) + indent--; + for (long k = 0; k < indent; k++) + fprintf (fp, " "); + + switch (in[i].cmd) + { + case RIGHT: fputs ("RIGHT ", fp); break; + case LEFT: fputs ("LEFT ", fp); break; + case INC: fputs ("INC ", fp); break; + case DEC: fputs ("DEC ", fp); break; + case OUT: fputs ("OUT ", fp); break; + case IN: fputs ("IN ", fp); break; + case BEGIN: fputs ("BEGIN ", fp); break; + case END: fputs ("END ", fp); break; + case SET: fputs ("SET ", fp); break; + case EAT: fputs ("EAT ", fp); break; + case INCACC: fputs ("INCACC", fp); break; + case DECACC: fputs ("DECACC", fp); break; + } + fprintf (fp, " %zu [%d]\n", in[i].arg, in[i].offset); + if (in[i].cmd == BEGIN) + indent++; + } + fclose (fp); +} +#else +#define debug_dump(...) +#endif + +int +main (int argc, char *argv[]) +{ + (void) argc; + (void) argv; + + struct str program; + str_init (&program); + + int c; + while ((c = fgetc (stdin)) != EOF) + str_append_c (&program, c); + if (ferror (stdin)) + exit_fatal ("can't read program\n"); + if (!(input = fopen ("/dev/tty", "rb"))) + exit_fatal ("can't open terminal for reading\n"); + +// - - Decode and group - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + struct instruction *parsed = xcalloc (sizeof *parsed, program.len); + size_t parsed_len = 0; + + for (size_t i = 0; i < program.len; i++) + { + enum command cmd; + switch (program.str[i]) + { + case '>': cmd = RIGHT; break; + case '<': cmd = LEFT; break; + case '+': cmd = INC; break; + case '-': cmd = DEC; break; + case '.': cmd = OUT; break; + case ',': cmd = IN; break; + case '[': cmd = BEGIN; break; + case ']': cmd = END; break; + default: continue; + } + + // The most basic optimization is to group identical commands together + if (!parsed_len || !grouped[cmd] || parsed[parsed_len - 1].cmd != cmd) + parsed_len++; + + parsed[parsed_len - 1].cmd = cmd; + parsed[parsed_len - 1].arg++; + } + +// - - Optimization passes - - - - - - - - - - - - - - - - - - - - - - - - - - - + + debug_dump ("bf-no-opt.txt", parsed, parsed_len); + + size_t in = 0, out = 0; + for (; in < parsed_len; in++, out++) + { + if (in + 2 < parsed_len + && parsed[in ].cmd == BEGIN + && parsed[in + 1].cmd == DEC && parsed[in + 1].arg == 1 + && parsed[in + 2].cmd == END) + { + parsed[out] = INSTRUCTION (SET, 0, 0); + in += 2; + } + else if (out && parsed[out - 1].cmd == SET && parsed[in].cmd == INC) + parsed[--out].arg += parsed[in].arg; + else if (out != in) + parsed[out] = parsed[in]; + } + parsed_len = out; + + debug_dump ("bf-pre-offsets.txt", parsed, parsed_len); + + // Add offsets to INC/DEC/SET stuck between LEFT/RIGHT + // and compress the LEFT/RIGHT sequences + for (in = 0, out = 0; in < parsed_len; in++, out++) + { + ssize_t dir = 0; + if (parsed[in].cmd == RIGHT) + dir = parsed[in].arg; + else if (parsed[in].cmd == LEFT) + dir = -(ssize_t) parsed[in].arg; + else + { + parsed[out] = parsed[in]; + continue; + } + + while (in + 2 < parsed_len) + { + // An immediate offset has its limits + if (dir < INT8_MIN || dir > INT8_MAX) + break; + + ssize_t diff; + if (parsed[in + 2].cmd == RIGHT) + diff = parsed[in + 2].arg; + else if (parsed[in + 2].cmd == LEFT) + diff = -(ssize_t) parsed[in + 2].arg; + else + break; + + int cmd = parsed[in + 1].cmd; + if (cmd != INC && cmd != DEC && cmd != SET) + break; + + parsed[out] = parsed[in + 1]; + parsed[out].offset = dir; + + dir += diff; + out += 1; + in += 2; + } + + for (; in + 1 < parsed_len; in++) + { + if (parsed[in + 1].cmd == RIGHT) + dir += parsed[in + 1].arg; + else if (parsed[in + 1].cmd == LEFT) + dir -= (ssize_t) parsed[in + 1].arg; + else + break; + } + + if (!dir) + out--; + else if (dir > 0) + parsed[out] = INSTRUCTION (RIGHT, 0, dir); + else + parsed[out] = INSTRUCTION (LEFT, 0, -dir); + } + parsed_len = out; + + debug_dump ("bf-pre-incdec-unloop.txt", parsed, parsed_len); + + // Try to eliminate loops that eat a cell and add/subtract its value + // to/from some other cell + for (in = 0, out = 0; in < parsed_len; in++, out++) + { + parsed[out] = parsed[in]; + if (parsed[in].cmd != BEGIN) + continue; + + bool ok = false; + size_t count = 0; + for (size_t k = in + 1; k < parsed_len; k++) + { + if (parsed[k].cmd == END) + { + ok = true; + break; + } + if (parsed[k].cmd != INC + && parsed[k].cmd != DEC) + break; + count++; + } + if (!ok) + continue; + + // Stable sort operations by their offsets, put [0] first + bool sorted; + do + { + sorted = true; + for (size_t k = 1; k < count; k++) + { + if (parsed[in + k].offset == 0) + continue; + if (parsed[in + k + 1].offset != 0 + && parsed[in + k].offset <= parsed[in + k + 1].offset) + continue; + + struct instruction tmp = parsed[in + k + 1]; + parsed[in + k + 1] = parsed[in + k]; + parsed[in + k] = tmp; + sorted = false; + } + } + while (!sorted); + + // Abort the optimization on duplicate offsets (complication with [0]) + for (size_t k = 1; k < count; k++) + if (parsed[in + k].offset == parsed[in + k + 1].offset) + ok = false; + // XXX: can't make the code longer either + for (size_t k = 1; k <= count; k++) + if (parsed[in + k].arg != 1) + ok = false; + if (!ok + || parsed[in + 1].cmd != DEC + || parsed[in + 1].offset != 0) + continue; + + int min_safe_left_offset = 0; + if (in > 1 && parsed[in - 1].cmd == RIGHT) + min_safe_left_offset = -parsed[in - 1].arg; + + bool cond_needed_for_safety = false; + for (size_t k = 0; k < count; k++) + if (parsed[in + k + 1].offset < min_safe_left_offset) + { + cond_needed_for_safety = true; + break; + } + + in++; + if (cond_needed_for_safety) + out++; + + parsed[out] = INSTRUCTION (EAT, 0, 0); + for (size_t k = 1; k < count; k++) + parsed[out + k] = INSTRUCTION (parsed[in + k].cmd == INC + ? INCACC : DECACC, parsed[in + k].offset, 0); + + in += count; + out += count; + + if (cond_needed_for_safety) + parsed[out] = INSTRUCTION (END, 0, 0); + else + out--; + } + parsed_len = out; + + debug_dump ("bf-optimized.txt", parsed, parsed_len); + +// - - Loop pairing - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + size_t nesting = 0; + size_t *stack = xcalloc (sizeof *stack, parsed_len); + for (size_t i = 0; i < parsed_len; i++) + { + switch (parsed[i].cmd) + { + case BEGIN: + stack[nesting++] = i; + break; + case END: + assert (nesting > 0); + + --nesting; + parsed[stack[nesting]].arg = i + 1; + + // Looping can be disabled by optimizations + if (parsed[i].arg) + parsed[i].arg = stack[nesting] + 1; + default: + break; + } + } + free (stack); + assert (nesting == 0); + + debug_dump ("bf-final.txt", parsed, parsed_len); + +// - - JIT - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + // Functions preserve the registers rbx, rsp, rbp, r12, r13, r14, and r15; + // while rax, rdi, rsi, rdx, rcx, r8, r9, r10, r11 are scratch registers. + + str_init (&program); + size_t *offsets = xcalloc (sizeof *offsets, parsed_len + 1); + uint8_t *arith = xcalloc (sizeof *arith, parsed_len); + +#define CODE(x) { char t[] = x; str_append_data (&program, t, sizeof t - 1); } +#define WORD(x) { size_t t = (size_t)(x); str_append_data (&program, &t, 8); } +#define DWRD(x) { size_t t = (size_t)(x); str_append_data (&program, &t, 4); } + + CODE ("\x48\x89\xF8") // mov rax, rdi + CODE ("\x30\xDB") // xor bl, bl + + for (size_t i = 0; i < parsed_len; i++) + { + offsets[i] = program.len; + + size_t arg = parsed[i].arg; + assert (arg <= UINT32_MAX); + + int offset = parsed[i].offset; + assert (offset <= INT8_MAX && offset >= INT8_MIN); + + // Don't save what we've just loaded + if (parsed[i].cmd == LEFT || parsed[i].cmd == RIGHT) + if (i < 2 || i + 1 >= parsed_len + || (parsed[i - 2].cmd != LEFT && parsed[i - 2].cmd != RIGHT) + || parsed[i - 1].cmd != BEGIN + || parsed[i + 1].cmd != END) + CODE ("\x88\x18") // mov [rax], bl + + switch (parsed[i].cmd) + { + case RIGHT: + // add rax, "arg" -- optimistic, no boundary checking + if (arg > INT8_MAX) + { CODE ("\x48\x05") DWRD (arg) } + else + { CODE ("\x48\x83\xC0") str_append_c (&program, arg); } + break; + case LEFT: + // sub rax, "arg" -- optimistic, no boundary checking + if (arg > INT8_MAX) + { CODE ("\x48\x2D") DWRD (arg) } + else + { CODE ("\x48\x83\xE8") str_append_c (&program, arg); } + break; + + case EAT: + CODE ("\x41\x88\xDC") // mov r12b, bl + CODE ("\x30\xDB") // xor bl, bl + arith[i] = 1; + break; + case INCACC: + if (offset) + { + CODE ("\x44\x00\x60") // add [rax+"offset"], r12b + str_append_c (&program, offset); + } + else + { + CODE ("\x44\x00\xE3") // add bl, r12b + arith[i] = 1; + } + break; + case DECACC: + if (offset) + { + CODE ("\x44\x28\x60") // sub [rax+"offset"], r12b + str_append_c (&program, offset); + } + else + { + CODE ("\x44\x28\xE3") // sub bl, r12b + arith[i] = 1; + } + break; + + case INC: + if (offset) + { + CODE ("\x80\x40") // add byte [rax+"offset"], "arg" + str_append_c (&program, offset); + } + else + { + arith[i] = 1; + CODE ("\x80\xC3") // add bl, "arg" + } + str_append_c (&program, arg); + break; + case DEC: + if (offset) + { + CODE ("\x80\x68") // sub byte [rax+"offset"], "arg" + str_append_c (&program, offset); + } + else + { + arith[i] = 1; + CODE ("\x80\xEB") // sub bl, "arg" + } + str_append_c (&program, arg); + break; + case SET: + if (offset) + { + CODE ("\xC6\x40") // mov byte [rax+"offset"], "arg" + str_append_c (&program, offset); + } + else + CODE ("\xB3") // mov bl, "arg" + str_append_c (&program, arg); + break; + + case OUT: + CODE ("\x50\x53") // push rax, push rbx + CODE ("\x48\x0F\xB6\xFB") // movzx rdi, bl + CODE ("\x48\xBE") WORD (stdout) // mov rsi, "stdout" + CODE ("\x48\xB8") WORD (fputc) // mov rax, "fputc" + CODE ("\xFF\xD0") // call rax + CODE ("\x5B\x58") // pop rbx, pop rax + break; + case IN: + CODE ("\x50") // push rax + CODE ("\x48\xB8") WORD (cin) // mov rax, "cin" + CODE ("\xFF\xD0") // call rax + CODE ("\x88\xC3") // mov bl, al + CODE ("\x58") // pop rax + break; + + case BEGIN: + // Don't test the register when the flag has been set already; + // this doesn't have much of an effect in practice + if (!i || !arith[i - 1]) + CODE ("\x84\xDB") // test bl, bl + CODE ("\x0F\x84\x00\x00\x00\x00") // jz "offsets[i]" + break; + case END: + // We know that the cell is zero, make this an "if", not a "loop"; + // this doesn't have much of an effect in practice + if (!arg) + break; + + if (!i || !arith[i - 1]) + CODE ("\x84\xDB") // test bl, bl + CODE ("\x0F\x85\x00\x00\x00\x00") // jnz "offsets[i]" + break; + } + + // No sense in reading it out when we overwrite it immediately; + // this doesn't have much of an effect in practice + if (parsed[i].cmd == LEFT || parsed[i].cmd == RIGHT) + if (i + 1 >= parsed_len + || parsed[i + 1].cmd != SET + || parsed[i + 1].offset != 0) + CODE ("\x8A\x18") // mov bl, [rax] + } + // When there is a loop at the end we need to be able to jump past it + offsets[parsed_len] = program.len; + str_append_c (&program, '\xC3'); // ret + + // Now that we know where each instruction is, fill in relative jumps; + // this must accurately reflect code generators for BEGIN and END + for (size_t i = 0; i < parsed_len; i++) + { + if ((parsed[i].cmd != BEGIN && parsed[i].cmd != END) + || !parsed[i].arg) + continue; + + size_t fixup = offsets[i] + 2; + if (!i || !arith[i - 1]) + fixup += 2; + + *(int32_t *)(program.str + fixup) = + ((intptr_t)(offsets[parsed[i].arg]) - (intptr_t)(fixup + 4)); + } + free (offsets); + free (arith); + +#ifdef DEBUG + FILE *bin = fopen ("bf-jit.bin", "w"); + fwrite (program.str, program.len, 1, bin); + fclose (bin); +#endif + +// - - Runtime - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + // Some systems may have W^X + void *executable = mmap (NULL, program.len, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (!executable) + exit_fatal ("mmap: %s\n", strerror (errno)); + + memcpy (executable, program.str, program.len); + if (mprotect (executable, program.len, PROT_READ | PROT_EXEC)) + exit_fatal ("mprotect: %s\n", strerror (errno)); + + // We create crash zones on both ends of the tape for some minimum safety + long pagesz = sysconf (_SC_PAGESIZE); + assert (pagesz > 0); + + const size_t tape_len = (1 << 20) + 2 * pagesz; + char *tape = mmap (NULL, tape_len, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (!tape) + exit_fatal ("mmap: %s\n", strerror (errno)); + + memset (tape, 0, tape_len); + if (mprotect (tape, pagesz, PROT_NONE) + || mprotect (tape + tape_len - pagesz, pagesz, PROT_NONE)) + exit_fatal ("mprotect: %s\n", strerror (errno)); + + ((void (*) (char *)) executable)(tape + pagesz); + return 0; +} diff --git a/interpreters/bf-jit-unsafe.c b/interpreters/bf-jit-unsafe.c new file mode 100644 index 0000000..63fbd7e --- /dev/null +++ b/interpreters/bf-jit-unsafe.c @@ -0,0 +1,495 @@ +// This is an exercise in futility more than anything else +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include + +#if (defined __x86_64__ || defined __amd64__) && defined __unix__ + #include + #include +#else + #error Platform not supported +#endif + +#define exit_fatal(...) \ + do { \ + fprintf (stderr, "fatal: " __VA_ARGS__); \ + exit (EXIT_FAILURE); \ + } while (0) + +// --- Safe memory management -------------------------------------------------- + +static void * +xcalloc (size_t m, size_t n) +{ + void *p = calloc (m, n); + if (!p) + exit_fatal ("calloc: %s\n", strerror (errno)); + return p; +} + +static void * +xrealloc (void *o, size_t n) +{ + void *p = realloc (o, n); + if (!p && n) + exit_fatal ("realloc: %s\n", strerror (errno)); + return p; +} + +// --- Dynamically allocated strings ------------------------------------------- + +struct str +{ + char *str; ///< String data, null terminated + size_t alloc; ///< How many bytes are allocated + size_t len; ///< How long the string actually is +}; + +static void +str_init (struct str *self) +{ + self->len = 0; + self->str = xcalloc (1, (self->alloc = 16)); +} + +static void +str_ensure_space (struct str *self, size_t n) +{ + // We allocate at least one more byte for the terminating null character + size_t new_alloc = self->alloc; + while (new_alloc <= self->len + n) + new_alloc <<= 1; + if (new_alloc != self->alloc) + self->str = xrealloc (self->str, (self->alloc = new_alloc)); +} + +static void +str_append_data (struct str *self, const void *data, size_t n) +{ + str_ensure_space (self, n); + memcpy (self->str + self->len, data, n); + self->str[self->len += n] = '\0'; +} + +static void +str_append_c (struct str *self, char c) +{ + str_append_data (self, &c, 1); +} + +// --- Application ------------------------------------------------------------- + +enum command { RIGHT, LEFT, INC, DEC, SET, IN, OUT, BEGIN, END, + EAT, INCACC, DECACC }; +bool grouped[] = { 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 }; +struct instruction { enum command cmd; size_t arg; }; +#define INSTRUCTION(c, a) (struct instruction) { (c), (a) } + +// - - Callbacks - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +FILE *input; ///< User input + +static int +cin (void) +{ + int c = fgetc (input); + assert (c != EOF); + return c; +} + +// - - Main - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +#ifdef DEBUG +static void +debug_dump (const char *filename, struct instruction *in, size_t len) +{ + FILE *fp = fopen (filename, "w"); + long indent = 0; + for (size_t i = 0; i < len; i++) + { + if (in[i].cmd == END) + indent--; + for (long k = 0; k < indent; k++) + fprintf (fp, " "); + + switch (in[i].cmd) + { + case RIGHT: fprintf (fp, "RIGHT %zu\n", in[i].arg); break; + case LEFT: fprintf (fp, "LEFT %zu\n", in[i].arg); break; + case INC: fprintf (fp, "INC %zu\n", in[i].arg); break; + case DEC: fprintf (fp, "DEC %zu\n", in[i].arg); break; + case OUT: fprintf (fp, "OUT %zu\n", in[i].arg); break; + case IN: fprintf (fp, "IN %zu\n", in[i].arg); break; + case BEGIN: fprintf (fp, "BEGIN %zu\n", in[i].arg); break; + case END: fprintf (fp, "END %zu\n", in[i].arg); break; + case SET: fprintf (fp, "SET %zu\n", in[i].arg); break; + case EAT: fprintf (fp, "EAT %zu\n", in[i].arg); break; + case INCACC: fprintf (fp, "INCACC %zu\n", in[i].arg); break; + case DECACC: fprintf (fp, "DECACC %zu\n", in[i].arg); break; + } + if (in[i].cmd == BEGIN) + indent++; + } + fclose (fp); +} +#else +#define debug_dump(...) +#endif + +int +main (int argc, char *argv[]) +{ + (void) argc; + (void) argv; + + struct str program; + str_init (&program); + + int c; + while ((c = fgetc (stdin)) != EOF) + str_append_c (&program, c); + if (ferror (stdin)) + exit_fatal ("can't read program\n"); + if (!(input = fopen ("/dev/tty", "rb"))) + exit_fatal ("can't open terminal for reading\n"); + +// - - Decode and group - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + struct instruction *parsed = xcalloc (sizeof *parsed, program.len); + size_t parsed_len = 0; + + for (size_t i = 0; i < program.len; i++) + { + enum command cmd; + switch (program.str[i]) + { + case '>': cmd = RIGHT; break; + case '<': cmd = LEFT; break; + case '+': cmd = INC; break; + case '-': cmd = DEC; break; + case '.': cmd = OUT; break; + case ',': cmd = IN; break; + case '[': cmd = BEGIN; break; + case ']': cmd = END; break; + default: continue; + } + + // The most basic optimization is to group identical commands together + if (!parsed_len || !grouped[cmd] || parsed[parsed_len - 1].cmd != cmd) + parsed_len++; + + parsed[parsed_len - 1].cmd = cmd; + parsed[parsed_len - 1].arg++; + } + +// - - Optimization passes - - - - - - - - - - - - - - - - - - - - - - - - - - - + + debug_dump ("bf-no-opt.txt", parsed, parsed_len); + + size_t in = 0, out = 0; + for (; in < parsed_len; in++, out++) + { + // This shows up in mandelbrot.bf a lot but actually helps hanoi.bf + if (in + 5 < parsed_len + && parsed[in].cmd == BEGIN && parsed[in + 5].cmd == END + && parsed[in + 1].cmd == DEC && parsed[in + 1].arg == 1 + + && parsed[in + 2].cmd == LEFT && parsed[in + 4].cmd == RIGHT + && parsed[in + 2].arg == parsed[in + 4].arg + + && (parsed[in + 3].cmd == INC || parsed[in + 3].cmd == DEC) + && parsed[in + 3].arg == 1) + { + // This mustn't make the move when the cell is zero already + parsed[out] = parsed[in]; + parsed[out + 1] = INSTRUCTION (EAT, 0); + parsed[out + 2] = parsed[in + 2]; + parsed[out + 3] = INSTRUCTION + (parsed[in + 3].cmd == INC ? INCACC : DECACC, 0); + parsed[out + 4] = parsed[in + 4]; + // This disables the looping further in the code; + // this doesn't have much of an effect in practice + parsed[out + 5] = INSTRUCTION (END, 0); + in += 5; + out += 5; + } + // The simpler case that cannot crash and thus can avoid the loop + else if (in + 5 < parsed_len + && parsed[in].cmd == BEGIN && parsed[in + 5].cmd == END + && parsed[in + 1].cmd == DEC && parsed[in + 1].arg == 1 + + && parsed[in + 2].cmd == RIGHT && parsed[in + 4].cmd == LEFT + && parsed[in + 2].arg == parsed[in + 4].arg + + && (parsed[in + 3].cmd == INC || parsed[in + 3].cmd == DEC) + && parsed[in + 3].arg == 1) + { + parsed[out] = INSTRUCTION (EAT, 0); + parsed[out + 1] = parsed[in + 2]; + parsed[out + 2] = INSTRUCTION + (parsed[in + 3].cmd == INC ? INCACC : DECACC, 0); + parsed[out + 3] = parsed[in + 4]; + in += 5; + out += 3; + } + else if (in + 2 < parsed_len + && parsed[in ].cmd == BEGIN + && parsed[in + 1].cmd == DEC && parsed[in + 1].arg == 1 + && parsed[in + 2].cmd == END) + { + parsed[out] = INSTRUCTION (SET, 0); + in += 2; + } + else if (out && parsed[out - 1].cmd == SET && parsed[in].cmd == INC) + parsed[--out].arg += parsed[in].arg; + else if (out != in) + parsed[out] = parsed[in]; + } + parsed_len = out; + + for (in = 0, out = 0; in < parsed_len; in++, out++) + { + ssize_t dir = 0; + if (parsed[in].cmd == RIGHT) + dir = parsed[in].arg; + else if (parsed[in].cmd == LEFT) + dir = -(ssize_t) parsed[in].arg; + else + { + parsed[out] = parsed[in]; + continue; + } + + for (; in + 1 < parsed_len; in++) + { + if (parsed[in + 1].cmd == RIGHT) + dir += parsed[in + 1].arg; + else if (parsed[in + 1].cmd == LEFT) + dir -= (ssize_t) parsed[in + 1].arg; + else + break; + } + + if (!dir) + out--; + else if (dir > 0) + parsed[out] = INSTRUCTION (RIGHT, dir); + else + parsed[out] = INSTRUCTION (LEFT, -dir); + } + parsed_len = out; + + debug_dump ("bf-optimized.txt", parsed, parsed_len); + +// - - Loop pairing - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + size_t nesting = 0; + size_t *stack = xcalloc (sizeof *stack, parsed_len); + for (size_t i = 0; i < parsed_len; i++) + { + switch (parsed[i].cmd) + { + case BEGIN: + stack[nesting++] = i; + break; + case END: + assert (nesting > 0); + + --nesting; + parsed[stack[nesting]].arg = i + 1; + + // Looping can be disabled by optimizations + if (parsed[i].arg) + parsed[i].arg = stack[nesting] + 1; + default: + break; + } + } + free (stack); + assert (nesting == 0); + + debug_dump ("bf-final.txt", parsed, parsed_len); + +// - - JIT - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + // Functions preserve the registers rbx, rsp, rbp, r12, r13, r14, and r15; + // while rax, rdi, rsi, rdx, rcx, r8, r9, r10, r11 are scratch registers. + + str_init (&program); + size_t *offsets = xcalloc (sizeof *offsets, parsed_len + 1); + uint8_t *arith = xcalloc (sizeof *arith, parsed_len); + +#define CODE(x) { char t[] = x; str_append_data (&program, t, sizeof t - 1); } +#define WORD(x) { size_t t = (size_t)(x); str_append_data (&program, &t, 8); } +#define DWRD(x) { size_t t = (size_t)(x); str_append_data (&program, &t, 4); } + + CODE ("\x48\x89\xF8") // mov rax, rdi + CODE ("\x30\xDB") // xor bl, bl + + for (size_t i = 0; i < parsed_len; i++) + { + offsets[i] = program.len; + + size_t arg = parsed[i].arg; + assert (arg <= UINT32_MAX); + + // Don't save what we've just loaded + if (parsed[i].cmd == LEFT || parsed[i].cmd == RIGHT) + if (i < 2 || i + 1 >= parsed_len + || (parsed[i - 2].cmd != LEFT && parsed[i - 2].cmd != RIGHT) + || parsed[i - 1].cmd != BEGIN + || parsed[i + 1].cmd != END) + CODE ("\x88\x18") // mov [rax], bl + + switch (parsed[i].cmd) + { + case RIGHT: + // add rax, "arg" -- optimistic, no boundary checking + if (arg > INT8_MAX) + { CODE ("\x48\x05") DWRD (arg) } + else + { CODE ("\x48\x83\xC0") str_append_c (&program, arg); } + break; + case LEFT: + // sub rax, "arg" -- optimistic, no boundary checking + if (arg > INT8_MAX) + { CODE ("\x48\x2D") DWRD (arg) } + else + { CODE ("\x48\x83\xE8") str_append_c (&program, arg); } + break; + + case EAT: + CODE ("\x41\x88\xDC") // mov r12b, bl + CODE ("\x30\xDB") // xor bl, bl + arith[i] = 1; + break; + case INCACC: + CODE ("\x44\x00\xE3") // add bl, r12b + arith[i] = 1; + break; + case DECACC: + CODE ("\x44\x28\xE3") // sub bl, r12b + arith[i] = 1; + break; + + case INC: + CODE ("\x80\xC3") // add bl, "arg" + str_append_c (&program, arg); + arith[i] = 1; + break; + case DEC: + CODE ("\x80\xEB") // sub bl, "arg" + str_append_c (&program, arg); + arith[i] = 1; + break; + case SET: + CODE ("\xB3") // mov bl, "arg" + str_append_c (&program, arg); + break; + + case OUT: + CODE ("\x50\x53") // push rax, push rbx + CODE ("\x48\x0F\xB6\xFB") // movzx rdi, bl + CODE ("\x48\xBE") WORD (stdout) // mov rsi, "stdout" + CODE ("\x48\xB8") WORD (fputc) // mov rax, "fputc" + CODE ("\xFF\xD0") // call rax + CODE ("\x5B\x58") // pop rbx, pop rax + break; + case IN: + CODE ("\x50") // push rax + CODE ("\x48\xB8") WORD (cin) // mov rax, "cin" + CODE ("\xFF\xD0") // call rax + CODE ("\x88\xC3") // mov bl, al + CODE ("\x58") // pop rax + break; + + case BEGIN: + // Don't test the register when the flag has been set already; + // this doesn't have much of an effect in practice + if (!i || !arith[i - 1]) + CODE ("\x84\xDB") // test bl, bl + CODE ("\x0F\x84\x00\x00\x00\x00") // jz "offsets[i]" + break; + case END: + // We know that the cell is zero, make this an "if", not a "loop"; + // this doesn't have much of an effect in practice + if (!arg) + break; + + if (!i || !arith[i - 1]) + CODE ("\x84\xDB") // test bl, bl + CODE ("\x0F\x85\x00\x00\x00\x00") // jnz "offsets[i]" + break; + } + + // No sense in reading it out when we overwrite it immediately; + // this doesn't have much of an effect in practice + if (parsed[i].cmd == LEFT || parsed[i].cmd == RIGHT) + if (i + 1 >= parsed_len + || parsed[i + 1].cmd != SET) + CODE ("\x8A\x18") // mov bl, [rax] + } + // When there is a loop at the end we need to be able to jump past it + offsets[parsed_len] = program.len; + str_append_c (&program, '\xC3'); // ret + + // Now that we know where each instruction is, fill in relative jumps; + // this must accurately reflect code generators for BEGIN and END + for (size_t i = 0; i < parsed_len; i++) + { + if ((parsed[i].cmd != BEGIN && parsed[i].cmd != END) + || !parsed[i].arg) + continue; + + size_t fixup = offsets[i] + 2; + if (!i || !arith[i - 1]) + fixup += 2; + + *(int32_t *)(program.str + fixup) = + ((intptr_t)(offsets[parsed[i].arg]) - (intptr_t)(fixup + 4)); + } + free (offsets); + free (arith); + +#ifdef DEBUG + FILE *bin = fopen ("bf-jit.bin", "w"); + fwrite (program.str, program.len, 1, bin); + fclose (bin); +#endif + +// - - Runtime - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + // Some systems may have W^X + void *executable = mmap (NULL, program.len, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (!executable) + exit_fatal ("mmap: %s\n", strerror (errno)); + + memcpy (executable, program.str, program.len); + if (mprotect (executable, program.len, PROT_READ | PROT_EXEC)) + exit_fatal ("mprotect: %s\n", strerror (errno)); + + // We create crash zones on both ends of the tape for some minimum safety + long pagesz = sysconf (_SC_PAGESIZE); + assert (pagesz > 0); + + const size_t tape_len = (1 << 20) + 2 * pagesz; + char *tape = mmap (NULL, tape_len, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (!tape) + exit_fatal ("mmap: %s\n", strerror (errno)); + + memset (tape, 0, tape_len); + if (mprotect (tape, pagesz, PROT_NONE) + || mprotect (tape + tape_len - pagesz, pagesz, PROT_NONE)) + exit_fatal ("mprotect: %s\n", strerror (errno)); + + ((void (*) (char *)) executable)(tape + pagesz); + return 0; +} diff --git a/interpreters/bf-jit.c b/interpreters/bf-jit.c new file mode 100644 index 0000000..0be6d94 --- /dev/null +++ b/interpreters/bf-jit.c @@ -0,0 +1,327 @@ +// This is an exercise in futility more than anything else +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include + +#if (defined __x86_64__ || defined __amd64__) && defined __unix__ + #include +#else + #error Platform not supported +#endif + +#define exit_fatal(...) \ + do { \ + fprintf (stderr, "fatal: " __VA_ARGS__); \ + exit (EXIT_FAILURE); \ + } while (0) + +// --- Safe memory management -------------------------------------------------- + +static void * +xcalloc (size_t m, size_t n) +{ + void *p = calloc (m, n); + if (!p) + exit_fatal ("calloc: %s\n", strerror (errno)); + return p; +} + +static void * +xrealloc (void *o, size_t n) +{ + void *p = realloc (o, n); + if (!p && n) + exit_fatal ("realloc: %s\n", strerror (errno)); + return p; +} + +// --- Dynamically allocated strings ------------------------------------------- + +struct str +{ + char *str; ///< String data, null terminated + size_t alloc; ///< How many bytes are allocated + size_t len; ///< How long the string actually is +}; + +static void +str_init (struct str *self) +{ + self->len = 0; + self->str = xcalloc (1, (self->alloc = 16)); +} + +static void +str_ensure_space (struct str *self, size_t n) +{ + // We allocate at least one more byte for the terminating null character + size_t new_alloc = self->alloc; + while (new_alloc <= self->len + n) + new_alloc <<= 1; + if (new_alloc != self->alloc) + self->str = xrealloc (self->str, (self->alloc = new_alloc)); +} + +static void +str_append_data (struct str *self, const void *data, size_t n) +{ + str_ensure_space (self, n); + memcpy (self->str + self->len, data, n); + self->str[self->len += n] = '\0'; +} + +static void +str_append_c (struct str *self, char c) +{ + str_append_data (self, &c, 1); +} + +// --- Application ------------------------------------------------------------- + +struct str data; ///< Data tape +volatile size_t dataptr; ///< Current location on the tape +FILE *input; ///< User input + +enum command { RIGHT, LEFT, INC, DEC, SET, IN, OUT, BEGIN, END }; +bool grouped[] = { 1, 1, 1, 1, 1, 0, 0, 0, 0 }; +struct instruction { enum command cmd; size_t arg; }; + +// - - Callbacks - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +// Some things I just really don't want to write in assembly even though it +// is effectively a big performance hit, eliminating the advantage of JIT + +static void +right (size_t arg) +{ + assert (SIZE_MAX - dataptr > arg); + dataptr += arg; + + while (dataptr >= data.len) + str_append_c (&data, 0); +} + +static void +left (size_t arg) +{ + assert (dataptr >= arg); + dataptr -= arg; +} + +static int +cin (void) +{ + int c = fgetc (input); + assert (c != EOF); + return c; +} + +// - - Main - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +int +main (int argc, char *argv[]) +{ + (void) argc; + (void) argv; + + struct str program; + str_init (&program); + + int c; + while ((c = fgetc (stdin)) != EOF) + str_append_c (&program, c); + if (ferror (stdin)) + exit_fatal ("can't read program\n"); + if (!(input = fopen ("/dev/tty", "rb"))) + exit_fatal ("can't open terminal for reading\n"); + +// - - Decode and group - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + struct instruction *parsed = xcalloc (sizeof *parsed, program.len); + size_t parsed_len = 0; + + for (size_t i = 0; i < program.len; i++) + { + enum command cmd; + switch (program.str[i]) + { + case '>': cmd = RIGHT; break; + case '<': cmd = LEFT; break; + case '+': cmd = INC; break; + case '-': cmd = DEC; break; + case '.': cmd = OUT; break; + case ',': cmd = IN; break; + case '[': cmd = BEGIN; break; + case ']': cmd = END; break; + default: continue; + } + + if (!parsed_len || !grouped[cmd] || parsed[parsed_len - 1].cmd != cmd) + parsed_len++; + + parsed[parsed_len - 1].cmd = cmd; + parsed[parsed_len - 1].arg++; + } + +// - - Simple optimization pass - - - - - - - - - - - - - - - - - - - - - - - - + + size_t in = 0, out = 0; + for (; in < parsed_len; in++, out++) + { + if (in + 2 < parsed_len + && parsed[in ].cmd == BEGIN + && parsed[in + 1].cmd == DEC && parsed[in + 1].arg == 1 + && parsed[in + 2].cmd == END) + { + parsed[out].cmd = SET; + parsed[out].arg = 0; + in += 2; + } + else if (out && parsed[out - 1].cmd == SET && parsed[in].cmd == INC) + parsed[--out].arg += parsed[in].arg; + else if (out != in) + parsed[out] = parsed[in]; + } + + parsed_len = out; + +// - - Loop pairing - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + size_t nesting = 0; + size_t *stack = xcalloc (sizeof *stack, parsed_len); + for (size_t i = 0; i < parsed_len; i++) + { + switch (parsed[i].cmd) + { + case BEGIN: + stack[nesting++] = i; + break; + case END: + assert (nesting > 0); + + --nesting; + parsed[stack[nesting]].arg = i + 1; + parsed[i].arg = stack[nesting] + 1; + default: + break; + } + } + free (stack); + assert (nesting == 0); + +// - - JIT - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + // Functions preserve the registers rbx, rsp, rbp, r12, r13, r14, and r15; + // while rax, rdi, rsi, rdx, rcx, r8, r9, r10, r11 are scratch registers. + + str_init (&program); + size_t *offsets = xcalloc (sizeof *offsets, parsed_len + 1); + +#define CODE(x) { char t[] = x; str_append_data (&program, t, sizeof t - 1); } +#define WORD(x) { size_t t = (size_t)(x); str_append_data (&program, &t, 8); } + + CODE ("\x49\xBD") WORD (&dataptr) // mov r13, qword "&dataptr" + CODE ("\x49\xBF") WORD (&data.str) // mov r15, qword "&data.str" + CODE ("\x4D\x8B\x37") // mov r14, qword [r15] + CODE ("\x30\xDB") // xor bl, bl + + for (size_t i = 0; i < parsed_len; i++) + { + offsets[i] = program.len; + + size_t arg = parsed[i].arg; + switch (parsed[i].cmd) + { + case RIGHT: + CODE ("\x41\x88\x1E") // mov [r14], bl + CODE ("\x48\xBF") WORD (arg) // mov rdi, "arg" + CODE ("\x48\xB8") WORD (right) // mov rax, "right" + CODE ("\xFF\xD0") // call rax + + // The data could get reallocated, so reload the address + CODE ("\x4D\x8B\x37") // mov r14, qword [r15] + CODE ("\x4D\x03\x75\x00") // add r14, [r13] + CODE ("\x41\x8A\x1E") // mov bl, [r14] + break; + case LEFT: + CODE ("\x41\x88\x1E") // mov [r14], bl + CODE ("\x48\xBF") WORD (arg) // mov rdi, "arg" + CODE ("\x49\x29\xFE") // sub r14, rdi -- optimistic + CODE ("\x48\xB8") WORD (left) // mov rax, "left" + CODE ("\xFF\xD0") // call rax + CODE ("\x41\x8A\x1E") // mov bl, [r14] + break; + + case INC: + CODE ("\x80\xC3") // add bl, "arg" + str_append_c (&program, arg); + break; + case DEC: + CODE ("\x80\xEB") // sub bl, "arg" + str_append_c (&program, arg); + break; + case SET: + CODE ("\xB3") // mov bl, "arg" + str_append_c (&program, arg); + break; + + case OUT: + CODE ("\x48\x0F\xB6\xFB") // movzx rdi, bl + CODE ("\x48\xBE") WORD (stdout) // mov rsi, "stdout" + CODE ("\x48\xB8") WORD (fputc) // mov rax, "fputc" + CODE ("\xFF\xD0") // call rax + break; + case IN: + CODE ("\x48\xB8") WORD (cin) // mov rax, "cin" + CODE ("\xFF\xD0") // call rax + CODE ("\x88\xC3") // mov bl, al + break; + + case BEGIN: + CODE ("\x84\xDB") // test bl, bl + CODE ("\x0F\x84\x00\x00\x00\x00") // jz "offsets[i]" + break; + case END: + CODE ("\x84\xDB") // test bl, bl + CODE ("\x0F\x85\x00\x00\x00\x00") // jnz "offsets[i]" + break; + } + } + // When there is a loop at the end we need to be able to jump past it + offsets[parsed_len] = program.len; + str_append_c (&program, '\xC3'); // ret + + // Now that we know where each instruction is, fill in relative jumps + for (size_t i = 0; i < parsed_len; i++) + { + if (parsed[i].cmd != BEGIN && parsed[i].cmd != END) + continue; + size_t fixup = offsets[i] + 4; + *(int32_t *)(program.str + fixup) = + ((intptr_t)(offsets[parsed[i].arg]) - (intptr_t)(fixup + 4)); + } + free (offsets); + +// - - Runtime - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + // Some systems may have W^X + void *executable = mmap (NULL, program.len, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (!executable) + exit_fatal ("mmap: %s\n", strerror (errno)); + + memcpy (executable, program.str, program.len); + if (mprotect (executable, program.len, PROT_READ | PROT_EXEC)) + exit_fatal ("mprotect: %s\n", strerror (errno)); + + str_init (&data); + str_append_c (&data, 0); + ((void (*) (void)) executable)(); + return 0; +} diff --git a/interpreters/bf-optimizing.c b/interpreters/bf-optimizing.c new file mode 100644 index 0000000..b37c0f2 --- /dev/null +++ b/interpreters/bf-optimizing.c @@ -0,0 +1,213 @@ +#include +#include +#include +#include +#include +#include +#include + +#define exit_fatal(...) \ + do { \ + fprintf (stderr, "fatal: " __VA_ARGS__); \ + exit (EXIT_FAILURE); \ + } while (0) + +// --- Safe memory management -------------------------------------------------- + +static void * +xcalloc (size_t m, size_t n) +{ + void *p = calloc (m, n); + if (!p) + exit_fatal ("calloc: %s\n", strerror (errno)); + return p; +} + +static void * +xrealloc (void *o, size_t n) +{ + void *p = realloc (o, n); + if (!p && n) + exit_fatal ("realloc: %s\n", strerror (errno)); + return p; +} + +// --- Dynamically allocated strings ------------------------------------------- + +struct str +{ + char *str; ///< String data, null terminated + size_t alloc; ///< How many bytes are allocated + size_t len; ///< How long the string actually is +}; + +static void +str_init (struct str *self) +{ + self->len = 0; + self->str = xcalloc (1, (self->alloc = 16)); +} + +static void +str_ensure_space (struct str *self, size_t n) +{ + // We allocate at least one more byte for the terminating null character + size_t new_alloc = self->alloc; + while (new_alloc <= self->len + n) + new_alloc <<= 1; + if (new_alloc != self->alloc) + self->str = xrealloc (self->str, (self->alloc = new_alloc)); +} + +static void +str_append_data (struct str *self, const void *data, size_t n) +{ + str_ensure_space (self, n); + memcpy (self->str + self->len, data, n); + self->str[self->len += n] = '\0'; +} + +static void +str_append_c (struct str *self, char c) +{ + str_append_data (self, &c, 1); +} + +// --- Main -------------------------------------------------------------------- + +struct str program; ///< Raw program +struct str data; ///< Data tape + +enum command { RIGHT, LEFT, INC, DEC, SET, IN, OUT, BEGIN, END }; +bool grouped[] = { 1, 1, 1, 1, 1, 0, 0, 0, 0 }; +struct instruction { enum command cmd; size_t arg; }; + +int +main (int argc, char *argv[]) +{ + (void) argc; str_init (&program); + (void) argv; str_init (&data); + + int c; + while ((c = fgetc (stdin)) != EOF) + str_append_c (&program, c); + if (ferror (stdin)) + exit_fatal ("can't read program\n"); + + FILE *input = fopen ("/dev/tty", "rb"); + if (!input) + exit_fatal ("can't open terminal for reading\n"); + +// - - Decode and group - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + struct instruction *parsed = xcalloc (sizeof *parsed, program.len); + size_t parsed_len = 0; + + for (size_t i = 0; i < program.len; i++) + { + enum command cmd; + switch (program.str[i]) + { + case '>': cmd = RIGHT; break; + case '<': cmd = LEFT; break; + case '+': cmd = INC; break; + case '-': cmd = DEC; break; + case '.': cmd = OUT; break; + case ',': cmd = IN; break; + case '[': cmd = BEGIN; break; + case ']': cmd = END; break; + default: continue; + } + + if (!parsed_len || !grouped[cmd] || parsed[parsed_len - 1].cmd != cmd) + parsed_len++; + + parsed[parsed_len - 1].cmd = cmd; + parsed[parsed_len - 1].arg++; + } + +// - - Simple optimization pass - - - - - - - - - - - - - - - - - - - - - - - - + + size_t in = 0, out = 0; + for (; in < parsed_len; in++, out++) + { + if (in + 2 < parsed_len + && parsed[in ].cmd == BEGIN + && parsed[in + 1].cmd == DEC && parsed[in + 1].arg == 1 + && parsed[in + 2].cmd == END) + { + parsed[out].cmd = SET; + parsed[out].arg = 0; + in += 2; + } + else if (out && parsed[out - 1].cmd == SET && parsed[in].cmd == INC) + parsed[--out].arg += parsed[in].arg; + else if (out != in) + parsed[out] = parsed[in]; + } + + parsed_len = out; + +// - - Loop pairing - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + size_t nesting = 0; + size_t *stack = xcalloc (sizeof *stack, parsed_len); + for (size_t i = 0; i < parsed_len; i++) + { + switch (parsed[i].cmd) + { + case BEGIN: + stack[nesting++] = i; + break; + case END: + assert (nesting > 0); + + --nesting; + parsed[stack[nesting]].arg = i; + parsed[i].arg = stack[nesting]; + default: + break; + } + } + assert (nesting == 0); + +// - - Runtime - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + size_t dataptr = 0; + str_append_c (&data, 0); + + for (size_t i = 0; i < parsed_len; i++) + { + size_t arg = parsed[i].arg; + switch (parsed[i].cmd) + { + case RIGHT: + assert (SIZE_MAX - dataptr > arg); + dataptr += arg; + + while (dataptr >= data.len) + str_append_c (&data, 0); + break; + case LEFT: + assert (dataptr >= arg); + dataptr -= arg; + break; + + case INC: data.str[dataptr] += arg; break; + case DEC: data.str[dataptr] -= arg; break; + case SET: data.str[dataptr] = arg; break; + + case OUT: + fputc (data.str[dataptr], stdout); + break; + case IN: + data.str[dataptr] = c = fgetc (input); + assert (c != EOF); + break; + + case BEGIN: if (!data.str[dataptr]) i = arg; break; + case END: if ( data.str[dataptr]) i = arg; break; + } + } + return 0; +} diff --git a/interpreters/bf.c b/interpreters/bf.c new file mode 100644 index 0000000..606c609 --- /dev/null +++ b/interpreters/bf.c @@ -0,0 +1,160 @@ +#include +#include +#include +#include +#include +#include + +#define exit_fatal(...) \ + do { \ + fprintf (stderr, "fatal: " __VA_ARGS__); \ + exit (EXIT_FAILURE); \ + } while (0) + +// --- Safe memory management -------------------------------------------------- + +static void * +xmalloc (size_t n) +{ + void *p = malloc (n); + if (!p) + exit_fatal ("malloc: %s\n", strerror (errno)); + return p; +} + +static void * +xrealloc (void *o, size_t n) +{ + void *p = realloc (o, n); + if (!p && n) + exit_fatal ("realloc: %s\n", strerror (errno)); + return p; +} + +// --- Dynamically allocated strings ------------------------------------------- + +struct str +{ + char *str; ///< String data, null terminated + size_t alloc; ///< How many bytes are allocated + size_t len; ///< How long the string actually is +}; + +static void +str_init (struct str *self) +{ + self->alloc = 16; + self->len = 0; + self->str = strcpy (xmalloc (self->alloc), ""); +} + +static void +str_ensure_space (struct str *self, size_t n) +{ + // We allocate at least one more byte for the terminating null character + size_t new_alloc = self->alloc; + while (new_alloc <= self->len + n) + new_alloc <<= 1; + if (new_alloc != self->alloc) + self->str = xrealloc (self->str, (self->alloc = new_alloc)); +} + +static void +str_append_data (struct str *self, const void *data, size_t n) +{ + str_ensure_space (self, n); + memcpy (self->str + self->len, data, n); + self->len += n; + self->str[self->len] = '\0'; +} + +static void +str_append_c (struct str *self, char c) +{ + str_append_data (self, &c, 1); +} + +// --- Main -------------------------------------------------------------------- + +int +main (int argc, char *argv[]) +{ + struct str program; str_init (&program); + struct str data; str_init (&data); + + int c; + while ((c = fgetc (stdin)) != EOF) + str_append_c (&program, c); + if (ferror (stdin)) + exit_fatal ("can't read program\n"); + + FILE *input = fopen ("/dev/tty", "rb"); + if (!input) + exit_fatal ("can't open terminal for reading\n"); + + size_t dataptr = 0; + str_append_c (&data, 0); + + for (size_t i = 0; i < program.len; i++) + { + switch (program.str[i]) + { + long pairs; + case '>': + assert (dataptr != SIZE_MAX); + dataptr++; + if (dataptr == data.len) + str_append_c (&data, 0); + break; + case '<': + assert (dataptr != 0); + dataptr--; + break; + + case '+': data.str[dataptr]++; break; + case '-': data.str[dataptr]--; break; + + case '.': + fputc (data.str[dataptr], stdout); + break; + case ',': + data.str[dataptr] = c = fgetc (input); + assert (c != EOF); + break; + + case '[': + if (data.str[dataptr]) break; + + for (pairs = 0; i < program.len; i++) + { + switch (program.str[i]) + { + case '[': pairs++; break; + case ']': pairs--; break; + } + if (!pairs) + break; + } + assert (!pairs); + break; + case ']': + if (!data.str[dataptr]) break; + + for (pairs = 0; i != SIZE_MAX; i--) + { + switch (program.str[i]) + { + case '[': pairs--; break; + case ']': pairs++; break; + } + if (!pairs) + break; + } + assert (!pairs); + break; + default: + break; + } + } + return 0; +}