Browse Source

Initial commit

Přemysl Janouch 2 years ago
commit
128fb157b3
Signed by: Přemysl Janouch <p.janouch@gmail.com> GPG Key ID: B715679E3A361BE6

+ 15
- 0
LICENSE View File

@@ -0,0 +1,15 @@
1
+ Copyright (c) 2016, Přemysl Janouch <p.janouch@gmail.com>
2
+ All rights reserved.
3
+ 
4
+ Permission to use, copy, modify, and/or distribute this software for any
5
+ purpose with or without fee is hereby granted, provided that the above
6
+ copyright notice and this permission notice appear in all copies.
7
+ 
8
+ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
11
+ SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
13
+ OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
14
+ CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15
+

+ 12
- 0
Makefile View File

@@ -0,0 +1,12 @@
1
+# All we need is C99 and POSIX, which this should make available
2
+CFLAGS = -std=gnu99
3
+NAMES = bfc-amd64-linux
4
+
5
+all: $(NAMES)
6
+
7
+%: %.c
8
+	$(CC) $(CPPFLAGS) $(CFLAGS) $< -o $@
9
+clean:
10
+	rm -f $(NAMES)
11
+
12
+.PHONY: all clean

+ 49
- 0
README.adoc View File

@@ -0,0 +1,49 @@
1
+bfc
2
+===
3
+
4
+'bfc' is a small, fast, self-contained, optimizing Brainfuck compiler for Linux
5
+on Intel x86-64.
6
+
7
+Also included are several interpreters in various states of sophistication that
8
+document my progress as I was writing this, from the simplest approach to an
9
+optimizing JIT compiler.
10
+
11
+It's pretty easy to retarget the compiler, it just means redoing half the work.
12
+The compiler itself is platform agnostic.
13
+
14
+Building
15
+--------
16
+Build dependencies: a C99 compiler +
17
+Runtime dependencies: Linux
18
+
19
+ $ git clone https://github.com/pjanouch/bfc.git
20
+ $ cd bfc
21
+ $ make
22
+
23
+To obtain dumps of the intermediate representation, compile with `-DDEBUG`:
24
+
25
+ $ make CPPFLAGS=-DDEBUG
26
+
27
+Usage
28
+-----
29
+
30
+ ./bfc-amd64-linux [INPUT-FILE] [OUTPUT-FILE]
31
+
32
+When no input file is specified, stdin is used.  Similarly, the default output
33
+filename is a.out.  The resulting file can be run on the target platform.
34
+
35
+Contributing and Support
36
+------------------------
37
+Use this project's GitHub to report any bugs, request features, or submit pull
38
+requests.  If you want to discuss this project, or maybe just hang out with
39
+the developer, feel free to join me at irc://irc.janouch.name, channel #dev.
40
+
41
+License
42
+-------
43
+'bfc' is written by Přemysl Janouch <p.janouch@gmail.com>.
44
+
45
+You may use the software under the terms of the ISC license, the text of which
46
+is included within the package, or, at your option, you may relicense the work
47
+under the MIT or the Modified BSD License, as listed at the following site:
48
+
49
+http://www.gnu.org/licenses/license-list.html

+ 723
- 0
bfc-amd64-linux.c View File

@@ -0,0 +1,723 @@
1
+// This is an exercise in futility more than anything else
2
+#include <stdio.h>
3
+#include <stdlib.h>
4
+#include <string.h>
5
+#include <stdint.h>
6
+#include <stdbool.h>
7
+#include <assert.h>
8
+#include <errno.h>
9
+
10
+#define exit_fatal(...)                                                        \
11
+	do {                                                                       \
12
+		fprintf (stderr, "fatal: " __VA_ARGS__);                               \
13
+		exit (EXIT_FAILURE);                                                   \
14
+	} while (0)
15
+
16
+// --- Safe memory management --------------------------------------------------
17
+
18
+static void *
19
+xcalloc (size_t m, size_t n)
20
+{
21
+	void *p = calloc (m, n);
22
+	if (!p)
23
+		exit_fatal ("calloc: %s\n", strerror (errno));
24
+	return p;
25
+}
26
+
27
+static void *
28
+xrealloc (void *o, size_t n)
29
+{
30
+	void *p = realloc (o, n);
31
+	if (!p && n)
32
+		exit_fatal ("realloc: %s\n", strerror (errno));
33
+	return p;
34
+}
35
+
36
+// --- Dynamically allocated strings -------------------------------------------
37
+
38
+struct str
39
+{
40
+	char *str;                          ///< String data, null terminated
41
+	size_t alloc;                       ///< How many bytes are allocated
42
+	size_t len;                         ///< How long the string actually is
43
+};
44
+
45
+static void
46
+str_init (struct str *self)
47
+{
48
+	self->len = 0;
49
+	self->str = xcalloc (1, (self->alloc = 16));
50
+}
51
+
52
+static void
53
+str_ensure_space (struct str *self, size_t n)
54
+{
55
+	// We allocate at least one more byte for the terminating null character
56
+	size_t new_alloc = self->alloc;
57
+	while (new_alloc <= self->len + n)
58
+		new_alloc <<= 1;
59
+	if (new_alloc != self->alloc)
60
+		self->str = xrealloc (self->str, (self->alloc = new_alloc));
61
+}
62
+
63
+static void
64
+str_append_data (struct str *self, const void *data, size_t n)
65
+{
66
+	str_ensure_space (self, n);
67
+	memcpy (self->str + self->len, data, n);
68
+	self->str[self->len += n] = '\0';
69
+}
70
+
71
+static void
72
+str_append_c (struct str *self, char c)
73
+{
74
+	str_append_data (self, &c, 1);
75
+}
76
+
77
+// --- Application -------------------------------------------------------------
78
+
79
+enum command
80
+{
81
+	RIGHT, LEFT, INC, DEC, IN, OUT, BEGIN, END,
82
+	SET, EAT, INCACC, DECACC
83
+};
84
+
85
+bool grouped[] = { 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 };
86
+struct instruction { enum command cmd; int offset; size_t arg; };
87
+#define INSTRUCTION(c, o, a) (struct instruction) { (c), (o), (a) }
88
+
89
+// - - Debugging - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
90
+
91
+#ifdef DEBUG
92
+static void
93
+debug_dump_instruction (FILE *fp, const struct instruction *in)
94
+{
95
+	const char *name;
96
+	switch (in->cmd)
97
+	{
98
+	case RIGHT:  name = "RIGHT "; break;
99
+	case LEFT:   name = "LEFT  "; break;
100
+	case INC:    name = "INC   "; break;
101
+	case DEC:    name = "DEC   "; break;
102
+	case OUT:    name = "OUT   "; break;
103
+	case IN:     name = "IN    "; break;
104
+	case BEGIN:  name = "BEGIN "; break;
105
+	case END:    name = "END   "; break;
106
+	case SET:    name = "SET   "; break;
107
+	case EAT:    name = "EAT   "; break;
108
+	case INCACC: name = "INCACC"; break;
109
+	case DECACC: name = "DECACC"; break;
110
+	}
111
+	fprintf (fp, "%s %zu", name, in->arg);
112
+	if (in->offset != 0)
113
+		fprintf (fp, " [%d]", in->offset);
114
+	fprintf (fp, "\n");
115
+}
116
+
117
+static void
118
+debug_dump (const char *filename, struct instruction *in, size_t len)
119
+{
120
+	FILE *fp = fopen (filename, "w");
121
+	long indent = 0;
122
+	for (size_t i = 0; i < len; i++)
123
+	{
124
+		if (in[i].cmd == END)
125
+			indent--;
126
+		for (long k = 0; k < indent; k++)
127
+			fputs ("  ", fp);
128
+		debug_dump_instruction (fp, &in[i]);
129
+		if (in[i].cmd == BEGIN)
130
+			indent++;
131
+	}
132
+	fclose (fp);
133
+}
134
+#else
135
+#define debug_dump(...)
136
+#endif
137
+
138
+// - - Optimization passes - - - - - - - - - - - - - - - - - - - - - - - - - - -
139
+
140
+static size_t
141
+optimize_assignment (struct instruction *irb, size_t irb_len)
142
+{
143
+	size_t in = 0, out = 0;
144
+	for (; in < irb_len; in++, out++)
145
+	{
146
+		if (in + 2 < irb_len
147
+		 && irb[in    ].cmd == BEGIN
148
+		 && irb[in + 1].cmd == DEC && irb[in + 1].arg == 1
149
+		 && irb[in + 2].cmd == END)
150
+		{
151
+			irb[out] = INSTRUCTION (SET, 0, 0);
152
+			in += 2;
153
+		}
154
+		else if (out && irb[out - 1].cmd == SET && irb[in].cmd == INC)
155
+			irb[--out].arg += irb[in].arg;
156
+		else if (out != in)
157
+			irb[out] = irb[in];
158
+	}
159
+	return out;
160
+}
161
+
162
+// Add offsets to INC/DEC/SET stuck between LEFT/RIGHT
163
+// and compress the LEFT/RIGHT sequences
164
+static size_t
165
+optimize_offseted_inc_dec (struct instruction *irb, size_t irb_len)
166
+{
167
+	size_t in = 0, out = 0;
168
+	for (in = 0, out = 0; in < irb_len; in++, out++)
169
+	{
170
+		intptr_t dir = 0;
171
+		if (irb[in].cmd == RIGHT)
172
+			dir = irb[in].arg;
173
+		else if (irb[in].cmd == LEFT)
174
+			dir = -(intptr_t) irb[in].arg;
175
+		else
176
+		{
177
+			irb[out] = irb[in];
178
+			continue;
179
+		}
180
+
181
+		while (in + 2 < irb_len)
182
+		{
183
+			// An immediate offset has its limits on x86-64
184
+			if (dir < INT8_MIN || dir > INT8_MAX)
185
+				break;
186
+
187
+			intptr_t diff;
188
+			if (irb[in + 2].cmd == RIGHT)
189
+				diff = irb[in + 2].arg;
190
+			else if (irb[in + 2].cmd == LEFT)
191
+				diff = -(intptr_t) irb[in + 2].arg;
192
+			else
193
+				break;
194
+
195
+			int cmd = irb[in + 1].cmd;
196
+			if (cmd != INC && cmd != DEC && cmd != SET)
197
+				break;
198
+
199
+			irb[out] = irb[in + 1];
200
+			irb[out].offset = dir;
201
+
202
+			dir += diff;
203
+			out += 1;
204
+			in += 2;
205
+		}
206
+
207
+		for (; in + 1 < irb_len; in++)
208
+		{
209
+			if (irb[in + 1].cmd == RIGHT)
210
+				dir += irb[in + 1].arg;
211
+			else if (irb[in + 1].cmd == LEFT)
212
+				dir -= (intptr_t) irb[in + 1].arg;
213
+			else
214
+				break;
215
+		}
216
+
217
+		if (!dir)
218
+			out--;
219
+		else if (dir > 0)
220
+			irb[out] = INSTRUCTION (RIGHT, 0, dir);
221
+		else
222
+			irb[out] = INSTRUCTION (LEFT, 0, -dir);
223
+	}
224
+	return out;
225
+}
226
+
227
+// Try to eliminate loops that eat a cell and add/subtract its value
228
+// to/from some other cell
229
+static size_t
230
+optimize_inc_dec_loops (struct instruction *irb, size_t irb_len)
231
+{
232
+	size_t in = 0, out = 0;
233
+	for (in = 0, out = 0; in < irb_len; in++, out++)
234
+	{
235
+		irb[out] = irb[in];
236
+		if (irb[in].cmd != BEGIN)
237
+			continue;
238
+
239
+		bool ok = false;
240
+		size_t count = 0;
241
+		for (size_t k = in + 1; k < irb_len; k++)
242
+		{
243
+			if (irb[k].cmd == END)
244
+			{
245
+				ok = true;
246
+				break;
247
+			}
248
+			if (irb[k].cmd != INC
249
+			 && irb[k].cmd != DEC)
250
+				break;
251
+			count++;
252
+		}
253
+		if (!ok)
254
+			continue;
255
+
256
+		// Stable sort operations by their offsets, put [0] first
257
+		bool sorted;
258
+		do
259
+		{
260
+			sorted = true;
261
+			for (size_t k = 1; k < count; k++)
262
+			{
263
+				if (irb[in + k].offset == 0)
264
+					continue;
265
+				if (irb[in + k + 1].offset != 0
266
+				 && irb[in + k].offset <= irb[in + k + 1].offset)
267
+					continue;
268
+
269
+				struct instruction tmp = irb[in + k + 1];
270
+				irb[in + k + 1] = irb[in + k];
271
+				irb[in + k] = tmp;
272
+				sorted = false;
273
+			}
274
+		}
275
+		while (!sorted);
276
+
277
+		// Abort the optimization on duplicate offsets (complication with [0])
278
+		for (size_t k = 1; k < count; k++)
279
+			if (irb[in + k].offset == irb[in + k + 1].offset)
280
+				ok = false;
281
+		// XXX: can't make the code longer either
282
+		for (size_t k = 1; k <= count; k++)
283
+			if (irb[in + k].arg != 1)
284
+				ok = false;
285
+		if (!ok
286
+		 || irb[in + 1].cmd != DEC
287
+		 || irb[in + 1].offset != 0)
288
+			continue;
289
+
290
+		int min_safe_left_offset = 0;
291
+		if (in > 1 && irb[in - 1].cmd == RIGHT)
292
+			min_safe_left_offset = -irb[in - 1].arg;
293
+
294
+		bool cond_needed_for_safety = false;
295
+		for (size_t k = 0; k < count; k++)
296
+			if (irb[in + k + 1].offset < min_safe_left_offset)
297
+			{
298
+				cond_needed_for_safety = true;
299
+				break;
300
+			}
301
+
302
+		in++;
303
+		if (cond_needed_for_safety)
304
+			out++;
305
+
306
+		irb[out] = INSTRUCTION (EAT, 0, 0);
307
+		for (size_t k = 1; k < count; k++)
308
+			irb[out + k] = INSTRUCTION (irb[in + k].cmd == INC
309
+				? INCACC : DECACC, irb[in + k].offset, 0);
310
+
311
+		in += count;
312
+		out += count;
313
+
314
+		if (cond_needed_for_safety)
315
+			irb[out] = INSTRUCTION (END, 0, 0);
316
+		else
317
+			out--;
318
+	}
319
+	return out;
320
+}
321
+
322
+// - - Loop pairing  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
323
+
324
+static void
325
+pair_loops (struct instruction *irb, size_t irb_len)
326
+{
327
+	size_t nesting = 0;
328
+	size_t *stack = xcalloc (sizeof *stack, irb_len);
329
+	for (size_t i = 0; i < irb_len; i++)
330
+	{
331
+		switch (irb[i].cmd)
332
+		{
333
+		case BEGIN:
334
+			stack[nesting++] = i;
335
+			break;
336
+		case END:
337
+			if (nesting <= 0)
338
+				exit_fatal ("unbalanced loops\n");
339
+
340
+			--nesting;
341
+			irb[stack[nesting]].arg = i + 1;
342
+
343
+			// Looping can be disabled by optimizations
344
+			if (irb[i].arg)
345
+				irb[i].arg = stack[nesting] + 1;
346
+		default:
347
+			break;
348
+		}
349
+	}
350
+	free (stack);
351
+
352
+	if (nesting != 0)
353
+		exit_fatal ("unbalanced loops\n");
354
+}
355
+
356
+// - - Main  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
357
+
358
+int
359
+main (int argc, char *argv[])
360
+{
361
+	if (argc > 3)
362
+		exit_fatal ("usage: %s [INPUT-FILE]\n", argv[0]);
363
+
364
+	FILE *input_file = stdin;
365
+	if (argc > 1 && !(input_file = fopen (argv[1], "r")))
366
+		exit_fatal ("fopen: %s: %s\n", argv[1], strerror (errno));
367
+
368
+	const char *output_path = "a.out";
369
+	if (argc > 2)
370
+		output_path = argv[2];
371
+
372
+	struct str buffer;
373
+	str_init (&buffer);
374
+
375
+	int c;
376
+	while ((c = fgetc (input_file)) != EOF)
377
+		str_append_c (&buffer, c);
378
+	if (ferror (input_file))
379
+		exit_fatal ("can't read program\n");
380
+	fclose (input_file);
381
+
382
+// - - Decode, group and optimize  - - - - - - - - - - - - - - - - - - - - - - -
383
+
384
+	// This is our Intermediate Representation Buffer
385
+	struct instruction *irb = xcalloc (sizeof *irb, buffer.len);
386
+	size_t irb_len = 0;
387
+
388
+	for (size_t i = 0; i < buffer.len; i++)
389
+	{
390
+		enum command cmd;
391
+		switch (buffer.str[i])
392
+		{
393
+		case '>': cmd = RIGHT; break;
394
+		case '<': cmd = LEFT;  break;
395
+		case '+': cmd = INC;   break;
396
+		case '-': cmd = DEC;   break;
397
+		case '.': cmd = OUT;   break;
398
+		case ',': cmd = IN;    break;
399
+		case '[': cmd = BEGIN; break;
400
+		case ']': cmd = END;   break;
401
+		default:  continue;
402
+		}
403
+
404
+		// The most basic optimization is to group identical commands together
405
+		if (!irb_len || !grouped[cmd] || irb[irb_len - 1].cmd != cmd)
406
+			irb_len++;
407
+
408
+		irb[irb_len - 1].cmd = cmd;
409
+		irb[irb_len - 1].arg++;
410
+	}
411
+
412
+	debug_dump ("bf-no-opt.txt",            irb, irb_len);
413
+	irb_len = optimize_assignment          (irb, irb_len);
414
+	debug_dump ("bf-pre-offsets.txt",       irb, irb_len);
415
+	irb_len = optimize_offseted_inc_dec    (irb, irb_len);
416
+	debug_dump ("bf-pre-incdec-unloop.txt", irb, irb_len);
417
+	irb_len = optimize_inc_dec_loops       (irb, irb_len);
418
+	debug_dump ("bf-optimized.txt",         irb, irb_len);
419
+	pair_loops                             (irb, irb_len);
420
+	debug_dump ("bf-final.txt",             irb, irb_len);
421
+
422
+// - - Code generation - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
423
+
424
+	str_init (&buffer);
425
+	size_t *offsets    = xcalloc (sizeof *offsets,    irb_len + 1);
426
+	bool   *sets_flags = xcalloc (sizeof *sets_flags, irb_len);
427
+
428
+#define CODE(x) { char t[] = x; str_append_data (&buffer, t, sizeof t - 1); }
429
+#define LE(v) (uint8_t[]) { v, v>>8, v>>16, v>>24, v>>32, v>>40, v>>48, v>>56 }
430
+#define DB(x) { uint64_t v = (x); str_append_data (&buffer, LE (v), 1); }
431
+#define DW(x) { uint64_t v = (x); str_append_data (&buffer, LE (v), 2); }
432
+#define DD(x) { uint64_t v = (x); str_append_data (&buffer, LE (v), 4); }
433
+#define DQ(x) { uint64_t v = (x); str_append_data (&buffer, LE (v), 8); }
434
+
435
+	enum
436
+	{
437
+		ELF_LOAD_CODE = 0x400000,             // where code is loaded (usual)
438
+		ELF_LOAD_DATA = 0x800000              // where the tape is placed
439
+	};
440
+
441
+	CODE ("\xB8") DD (ELF_LOAD_DATA)          // mov rax, "ELF_LOAD_DATA"
442
+	CODE ("\x30\xDB")                         // xor bl, bl
443
+
444
+	for (size_t i = 0; i < irb_len; i++)
445
+	{
446
+		offsets[i] = buffer.len;
447
+
448
+		size_t arg = irb[i].arg;
449
+		assert (arg <= UINT32_MAX);
450
+
451
+		int offset = irb[i].offset;
452
+		assert (offset <= INT8_MAX && offset >= INT8_MIN);
453
+
454
+		// Don't save what we've just loaded
455
+		if (irb[i].cmd == LEFT || irb[i].cmd == RIGHT)
456
+			if (i < 2 || i + 1 >= irb_len
457
+			 || (irb[i - 2].cmd != LEFT && irb[i - 2].cmd != RIGHT)
458
+			 || irb[i - 1].cmd != BEGIN
459
+			 || irb[i + 1].cmd != END)
460
+				CODE ("\x88\x18")             // mov [rax], bl
461
+
462
+		switch (irb[i].cmd)
463
+		{
464
+		case RIGHT:
465
+			// add rax, "arg" -- optimistic, no boundary checking
466
+			if (arg > INT8_MAX) { CODE ("\x48\x05")     DD (arg) }
467
+			else                { CODE ("\x48\x83\xC0") DB (arg) }
468
+			break;
469
+		case LEFT:
470
+			// sub rax, "arg" -- optimistic, no boundary checking
471
+			if (arg > INT8_MAX) { CODE ("\x48\x2D")     DD (arg) }
472
+			else                { CODE ("\x48\x83\xE8") DB (arg) }
473
+			break;
474
+
475
+		case EAT:
476
+			// NOTE: the kernel destroys rcx and r11 on syscalls,
477
+			//   there must be no OUT or IN between EAT and INCACC/DECACC
478
+			CODE ("\x88\xD9" "\x30\xDB")      // mov cl, bl; xor bl, bl
479
+			sets_flags[i] = true;
480
+			break;
481
+		case INCACC:
482
+			if (offset)
483
+			{
484
+				CODE ("\x00\x48") DB (offset) // add [rax+"offset"], cl
485
+			}
486
+			else
487
+			{
488
+				CODE ("\x00\xCB")             // add bl, cl
489
+				sets_flags[i] = true;
490
+			}
491
+			break;
492
+		case DECACC:
493
+			if (offset)
494
+			{
495
+				CODE ("\x28\x48") DB (offset) // sub [rax+"offset"], cl
496
+			}
497
+			else
498
+			{
499
+				CODE ("\x28\xCB")             // sub bl, cl
500
+				sets_flags[i] = true;
501
+			}
502
+			break;
503
+
504
+		case INC:
505
+			if (offset)
506
+			{
507
+				CODE ("\x80\x40") DB (offset) // add byte [rax+"offset"], "arg"
508
+			}
509
+			else
510
+			{
511
+				CODE ("\x80\xC3")             // add bl, "arg"
512
+				sets_flags[i] = true;
513
+			}
514
+			DB (arg)
515
+			break;
516
+		case DEC:
517
+			if (offset)
518
+			{
519
+				CODE ("\x80\x68") DB (offset) // sub byte [rax+"offset"], "arg"
520
+			}
521
+			else
522
+			{
523
+				CODE ("\x80\xEB")             // sub bl, "arg"
524
+				sets_flags[i] = true;
525
+			}
526
+			DB (arg)
527
+			break;
528
+		case SET:
529
+			if (offset)
530
+			{
531
+				CODE ("\xC6\x40") DB (offset) // mov byte [rax+"offset"], "arg"
532
+			}
533
+			else
534
+				CODE ("\xB3")                 // mov bl, "arg"
535
+			DB (arg)
536
+			break;
537
+
538
+		case OUT:
539
+			CODE ("\xE8") DD (0)              // call "write"
540
+			break;
541
+		case IN:
542
+			CODE ("\xE8") DD (0)              // call "read"
543
+			break;
544
+
545
+		case BEGIN:
546
+			// Don't test the register when the flag has been set already;
547
+			// this doesn't have much of an effect in practice
548
+			if (!i || !sets_flags[i - 1])
549
+				CODE ("\x84\xDB")             // test bl, bl
550
+			CODE ("\x0F\x84\x00\x00\x00\x00") // jz "offsets[arg]"
551
+			break;
552
+		case END:
553
+			// We know that the cell is zero, make this an "if", not a "loop";
554
+			// this doesn't have much of an effect in practice
555
+			if (!arg)
556
+				break;
557
+
558
+			if (!i || !sets_flags[i - 1])
559
+				CODE ("\x84\xDB")             // test bl, bl
560
+			CODE ("\x0F\x85\x00\x00\x00\x00") // jnz "offsets[arg]"
561
+			break;
562
+		}
563
+
564
+		// No sense in reading it out when we overwrite it immediately;
565
+		// this doesn't have much of an effect in practice
566
+		if (irb[i].cmd == LEFT || irb[i].cmd == RIGHT)
567
+			if (i + 1 >= irb_len
568
+			 || irb[i + 1].cmd != SET
569
+			 || irb[i + 1].offset != 0)
570
+				CODE ("\x8A\x18")             // mov bl, [rax]
571
+	}
572
+	// When there is a loop at the end we need to be able to jump past it
573
+	offsets[irb_len] = buffer.len;
574
+
575
+	// Write an epilog which handles all the OS interfacing
576
+	//
577
+	// System V x86-64 ABI:
578
+	//   rax <-> both syscall number and return value
579
+	//   args -> rdi, rsi, rdx, r10, r8, r9
580
+	//   trashed <- rcx, r11
581
+
582
+	enum { SYS_READ = 0, SYS_WRITE = 1, SYS_EXIT = 60 };
583
+
584
+	CODE ("\xB8") DD (SYS_EXIT)  // mov eax, 0x3c
585
+	CODE ("\x48\x31\xFF")        // xor rdi, rdi
586
+	CODE ("\x0F\x05")            // syscall
587
+
588
+	size_t fatal_offset = buffer.len;
589
+	CODE ("\x48\x89\xF7")        // mov rdi, rsi -- use the string in rsi
590
+	CODE ("\x30\xC0")            // xor al, al -- look for the nil byte
591
+	CODE ("\x48\x31\xC9")        // xor rcx, rcx
592
+	CODE ("\x48\xF7\xD1")        // not rcx -- start from -1
593
+	CODE ("\xFC" "\xF2\xAE")     // cld; repne scasb -- decrement until found
594
+	CODE ("\x48\xF7\xD1")        // not rcx
595
+	CODE ("\x48\x8D\x51\xFF")    // lea rdx, [rcx-1] -- save length in rdx
596
+	CODE ("\xB8") DD (SYS_WRITE) // mov eax, "SYS_WRITE"
597
+	CODE ("\xBF") DD (2)         // mov edi, "STDERR_FILENO"
598
+	CODE ("\x0F\x05")            // syscall
599
+
600
+	CODE ("\xB8") DD (SYS_EXIT)  // mov eax, "SYS_EXIT"
601
+	CODE ("\xBF") DD (1)         // mov edi, "EXIT_FAILURE"
602
+	CODE ("\x0F\x05")            // syscall
603
+
604
+	size_t read_offset = buffer.len;
605
+	CODE ("\x50")                // push rax -- save tape position
606
+	CODE ("\xB8") DD (SYS_READ)  // mov eax, "SYS_READ"
607
+	CODE ("\x48\x89\xC7")        // mov rdi, rax -- STDIN_FILENO
608
+	CODE ("\x66\x6A\x00")        // push word 0 -- the default value for EOF
609
+	CODE ("\x48\x89\xE6")        // mov rsi, rsp -- the char starts at rsp
610
+	CODE ("\xBA") DD (1)         // mov edx, 1 -- count
611
+	CODE ("\x0F\x05")            // syscall
612
+	CODE ("\x66\x5B")            // pop bx
613
+
614
+	CODE ("\x48\x83\xF8\x00")    // cmp rax, 0
615
+	CODE ("\x48\x8D\x35") DD (4) // lea rsi, [rel read_message]
616
+	CODE ("\x7C")                // jl "fatal_offset" -- write failure message
617
+	DB ((intptr_t) fatal_offset - (intptr_t) (buffer.len + 1))
618
+	CODE ("\x58")                // pop rax -- restore tape position
619
+	CODE ("\xC3")                // ret
620
+	CODE ("fatal: read failed\n\0")
621
+
622
+	size_t write_offset = buffer.len;
623
+	CODE ("\x50")                // push rax -- save tape position
624
+	CODE ("\xB8") DD (SYS_WRITE) // mov eax, "SYS_WRITE"
625
+	CODE ("\x48\x89\xC7")        // mov rdi, rax -- STDOUT_FILENO
626
+	CODE ("\x66\x53")            // push bx
627
+	CODE ("\x48\x89\xE6")        // mov rsi, rsp -- the char starts at rsp
628
+	CODE ("\xBA") DD (1)         // mov edx, 1 -- count
629
+	CODE ("\x0F\x05")            // syscall
630
+	CODE ("\x66\x5B")            // pop bx
631
+
632
+	CODE ("\x48\x83\xF8\x00")    // cmp rax, 0
633
+	CODE ("\x48\x8D\x35") DD (4) // lea rsi, [rel write_message]
634
+	CODE ("\x7C")                // jl "fatal_offset" -- write failure message
635
+	DB ((intptr_t) fatal_offset - (intptr_t) (buffer.len + 1))
636
+	CODE ("\x58")                // pop rax -- restore tape position
637
+	CODE ("\xC3")                // ret
638
+	CODE ("fatal: write failed\n\0")
639
+
640
+	// Now that we know where each instruction is, fill in relative jumps
641
+	for (size_t i = 0; i < irb_len; i++)
642
+	{
643
+		if (!irb[i].arg)
644
+			continue;
645
+
646
+		// This must accurately reflect the code generators
647
+		intptr_t target, fixup = offsets[i];
648
+		if (irb[i].cmd == BEGIN || irb[i].cmd == END)
649
+		{
650
+			fixup += (i && sets_flags[i - 1]) ? 2 : 4;
651
+			target = offsets[irb[i].arg];
652
+		}
653
+		else if (irb[i].cmd == IN)  { fixup++; target = read_offset;  }
654
+		else if (irb[i].cmd == OUT) { fixup++; target = write_offset; }
655
+		else continue;
656
+
657
+		uint64_t v = target - (fixup + 4);
658
+		memcpy (buffer.str + fixup, LE (v), 4);
659
+	}
660
+	free (offsets);
661
+	free (sets_flags);
662
+
663
+// - - Output  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
664
+
665
+	// Now that we know how long the machine code is, we can write the header.
666
+	// Note that for PIE we would need to depend on the dynamic linker, so no.
667
+	//
668
+	// Recommended reading:
669
+	//   http://www.muppetlabs.com/~breadbox/software/tiny/teensy.html
670
+	//   man 5 elf
671
+
672
+	struct str code = buffer;
673
+	str_init (&buffer);
674
+
675
+	enum
676
+	{
677
+		ELF_HEADER_SIZE = 64,           // size of the ELF header
678
+		ELF_PROGRAM_ENTRY_SIZE = 56,    // size of a program header
679
+		ELF_META_SIZE = ELF_HEADER_SIZE + 2 * ELF_PROGRAM_ENTRY_SIZE
680
+	};
681
+
682
+	// ELF header
683
+	CODE ("\x7F" "ELF\x02\x01\x01")     // ELF, 64-bit, little endian, v1
684
+	CODE ("\x00\x00" "\0\0\0\0\0\0\0")  // Unix System V ABI, v0, padding
685
+	DW (2) DW (62) DD (1)               // executable, x86-64, v1
686
+	DQ (ELF_LOAD_CODE + ELF_META_SIZE)  // entry point address
687
+	DQ (ELF_HEADER_SIZE) DQ (0)         // program, section header offset
688
+	DD (0)                              // no processor-specific flags
689
+	DW (ELF_HEADER_SIZE)                // ELF header size
690
+	DW (ELF_PROGRAM_ENTRY_SIZE) DW (2)  // program hdr tbl entry size, count
691
+	DW (0) DW (0)                       // section hdr tbl entry size, count
692
+	DW (0)                              // no section index for strings
693
+
694
+	// Program header for code
695
+	// The entry point address seems to require alignment, so map start of file
696
+	DD (1) DD (5)                       // PT_LOAD, PF_R | PF_X
697
+	DQ (0)                              // offset within the file
698
+	DQ (ELF_LOAD_CODE)                  // address in virtual memory
699
+	DQ (ELF_LOAD_CODE)                  // address in physical memory
700
+	DQ (code.len + ELF_META_SIZE)       // length within the file
701
+	DQ (code.len + ELF_META_SIZE)       // length within memory
702
+	DQ (4096)                           // segment alignment
703
+
704
+	// Program header for the tape
705
+	DD (1) DD (6)                       // PT_LOAD, PF_R | PF_W
706
+	DQ (0)                              // offset within the file
707
+	DQ (ELF_LOAD_DATA)                  // address in virtual memory
708
+	DQ (ELF_LOAD_DATA)                  // address in physical memory
709
+	DQ (0)                              // length within the file
710
+	DQ (1 << 20)                        // one megabyte of memory
711
+	DQ (4096)                           // segment alignment
712
+
713
+	// The section header table is optional and we don't need it for anything
714
+
715
+	FILE *output_file;
716
+	if (!(output_file = fopen (output_path, "w")))
717
+		exit_fatal ("fopen: %s: %s\n", output_path, strerror (errno));
718
+
719
+	fwrite (buffer.str, buffer.len, 1, output_file);
720
+	fwrite (code.str, code.len, 1, output_file);
721
+	fclose (output_file);
722
+	return 0;
723
+}

+ 14
- 0
interpreters/Makefile View File

@@ -0,0 +1,14 @@
1
+CC = c99
2
+CFLAGS = -O3
3
+
4
+NAMES = bf bf-faster-loops bf-optimizing \
5
+	bf-jit bf-jit-opt bf-jit-unsafe bf-jit-unsafe-opt
6
+
7
+all: $(NAMES)
8
+
9
+%: %.c
10
+	$(CC) $(CPPFLAGS) $(CFLAGS) $< -o $@
11
+clean:
12
+	rm -f $(NAMES)
13
+
14
+.PHONY: all clean

+ 15
- 0
interpreters/README.adoc View File

@@ -0,0 +1,15 @@
1
+This directory contains several Brainfuck interpreters in various states of
2
+sophistication, from the simplest approach to an optimizing JIT compiler:
3
+
4
+ * `bf.c` is the stupidest one and the oldest by far
5
+ * `bf-faster-loops.c` precomputes loop jumps
6
+ * `bf-optimizing.c` improves on that by changing `[-]+` loops into assignments
7
+ * `bf-jit.c` adds JIT compilation for Intel x86-64
8
+ * `bf-jit-opt.c` tries a bit harder to avoid looping on the current value
9
+ * `bf-jit-unsafe.c` abolishes all boundary checks when moving across the tape
10
+ * `bf-jit-unsafe-opt.c` makes use of immediate offsets to modify values
11
+
12
+I recommend using a tool such as 'meld' to view the differences.
13
+
14
+Just run `make` in this directory to have them all built, and append
15
+`CPPFLAGS=-DDEBUG` to get dumps of the IR for the more sophisticated JITs.

+ 151
- 0
interpreters/bf-faster-loops.c View File

@@ -0,0 +1,151 @@
1
+#include <stdio.h>
2
+#include <stdlib.h>
3
+#include <string.h>
4
+#include <stdint.h>
5
+#include <assert.h>
6
+#include <errno.h>
7
+
8
+#define exit_fatal(...)                                                        \
9
+	do {                                                                       \
10
+		fprintf (stderr, "fatal: " __VA_ARGS__);                               \
11
+		exit (EXIT_FAILURE);                                                   \
12
+	} while (0)
13
+
14
+// --- Safe memory management --------------------------------------------------
15
+
16
+static void *
17
+xmalloc (size_t n)
18
+{
19
+	void *p = malloc (n);
20
+	if (!p)
21
+		exit_fatal ("malloc: %s\n", strerror (errno));
22
+	return p;
23
+}
24
+
25
+static void *
26
+xrealloc (void *o, size_t n)
27
+{
28
+	void *p = realloc (o, n);
29
+	if (!p && n)
30
+		exit_fatal ("realloc: %s\n", strerror (errno));
31
+	return p;
32
+}
33
+
34
+// --- Dynamically allocated strings -------------------------------------------
35
+
36
+struct str
37
+{
38
+	char *str;                          ///< String data, null terminated
39
+	size_t alloc;                       ///< How many bytes are allocated
40
+	size_t len;                         ///< How long the string actually is
41
+};
42
+
43
+static void
44
+str_init (struct str *self)
45
+{
46
+	self->alloc = 16;
47
+	self->len = 0;
48
+	self->str = strcpy (xmalloc (self->alloc), "");
49
+}
50
+
51
+static void
52
+str_ensure_space (struct str *self, size_t n)
53
+{
54
+	// We allocate at least one more byte for the terminating null character
55
+	size_t new_alloc = self->alloc;
56
+	while (new_alloc <= self->len + n)
57
+		new_alloc <<= 1;
58
+	if (new_alloc != self->alloc)
59
+		self->str = xrealloc (self->str, (self->alloc = new_alloc));
60
+}
61
+
62
+static void
63
+str_append_data (struct str *self, const void *data, size_t n)
64
+{
65
+	str_ensure_space (self, n);
66
+	memcpy (self->str + self->len, data, n);
67
+	self->str[self->len += n] = '\0';
68
+}
69
+
70
+static void
71
+str_append_c (struct str *self, char c)
72
+{
73
+	str_append_data (self, &c, 1);
74
+}
75
+
76
+// --- Main --------------------------------------------------------------------
77
+
78
+int
79
+main (int argc, char *argv[])
80
+{
81
+	struct str program; str_init (&program);
82
+	struct str data;    str_init (&data);
83
+
84
+	int c;
85
+	while ((c = fgetc (stdin)) != EOF)
86
+		str_append_c (&program, c);
87
+	if (ferror (stdin))
88
+		exit_fatal ("can't read program\n");
89
+
90
+	FILE *input = fopen ("/dev/tty", "rb");
91
+	if (!input)
92
+		exit_fatal ("can't open terminal for reading\n");
93
+
94
+	size_t *pairs = xmalloc (sizeof *pairs * program.len);
95
+	size_t *stack = xmalloc (sizeof *stack * program.len);
96
+
97
+	size_t nesting = 0;
98
+	for (size_t i = 0; i < program.len; i++)
99
+	{
100
+		switch (program.str[i])
101
+		{
102
+		case '[':
103
+			stack[nesting++] = i;
104
+			break;
105
+		case ']':
106
+			assert (nesting > 0);
107
+
108
+			--nesting;
109
+			pairs[stack[nesting]] = i;
110
+			pairs[i] = stack[nesting];
111
+		}
112
+	}
113
+	assert (nesting == 0);
114
+
115
+	size_t dataptr = 0;
116
+	str_append_c (&data, 0);
117
+
118
+	for (size_t i = 0; i < program.len; i++)
119
+	{
120
+		switch (program.str[i])
121
+		{
122
+		case '>':
123
+			assert (dataptr != SIZE_MAX);
124
+			if (++dataptr == data.len)
125
+				str_append_c (&data, 0);
126
+			break;
127
+		case '<':
128
+			assert (dataptr != 0);
129
+			dataptr--;
130
+			break;
131
+
132
+		case '+': data.str[dataptr]++; break;
133
+		case '-': data.str[dataptr]--; break;
134
+
135
+		case '.':
136
+			fputc (data.str[dataptr], stdout);
137
+			break;
138
+		case ',':
139
+			data.str[dataptr] = c = fgetc (input);
140
+			assert (c != EOF);
141
+			break;
142
+
143
+		case '[': if (!data.str[dataptr]) i = pairs[i]; break;
144
+		case ']': if ( data.str[dataptr]) i = pairs[i]; break;
145
+
146
+		default:
147
+			break;
148
+		}
149
+	}
150
+	return 0;
151
+}

+ 495
- 0
interpreters/bf-jit-opt.c View File

@@ -0,0 +1,495 @@
1
+// This is an exercise in futility more than anything else
2
+#define _GNU_SOURCE
3
+
4
+#include <stdio.h>
5
+#include <stdlib.h>
6
+#include <string.h>
7
+#include <stdint.h>
8
+#include <stdbool.h>
9
+#include <assert.h>
10
+#include <errno.h>
11
+
12
+#if (defined __x86_64__ || defined __amd64__) && defined __unix__
13
+	#include <sys/mman.h>
14
+#else
15
+	#error Platform not supported
16
+#endif
17
+
18
+#define exit_fatal(...)                                                        \
19
+	do {                                                                       \
20
+		fprintf (stderr, "fatal: " __VA_ARGS__);                               \
21
+		exit (EXIT_FAILURE);                                                   \
22
+	} while (0)
23
+
24
+// --- Safe memory management --------------------------------------------------
25
+
26
+static void *
27
+xcalloc (size_t m, size_t n)
28
+{
29
+	void *p = calloc (m, n);
30
+	if (!p)
31
+		exit_fatal ("calloc: %s\n", strerror (errno));
32
+	return p;
33
+}
34
+
35
+static void *
36
+xrealloc (void *o, size_t n)
37
+{
38
+	void *p = realloc (o, n);
39
+	if (!p && n)
40
+		exit_fatal ("realloc: %s\n", strerror (errno));
41
+	return p;
42
+}
43
+
44
+// --- Dynamically allocated strings -------------------------------------------
45
+
46
+struct str
47
+{
48
+	char *str;                          ///< String data, null terminated
49
+	size_t alloc;                       ///< How many bytes are allocated
50
+	size_t len;                         ///< How long the string actually is
51
+};
52
+
53
+static void
54
+str_init (struct str *self)
55
+{
56
+	self->len = 0;
57
+	self->str = xcalloc (1, (self->alloc = 16));
58
+}
59
+
60
+static void
61
+str_ensure_space (struct str *self, size_t n)
62
+{
63
+	// We allocate at least one more byte for the terminating null character
64
+	size_t new_alloc = self->alloc;
65
+	while (new_alloc <= self->len + n)
66
+		new_alloc <<= 1;
67
+	if (new_alloc != self->alloc)
68
+		self->str = xrealloc (self->str, (self->alloc = new_alloc));
69
+}
70
+
71
+static void
72
+str_append_data (struct str *self, const void *data, size_t n)
73
+{
74
+	str_ensure_space (self, n);
75
+	memcpy (self->str + self->len, data, n);
76
+	self->str[self->len += n] = '\0';
77
+}
78
+
79
+static void
80
+str_append_c (struct str *self, char c)
81
+{
82
+	str_append_data (self, &c, 1);
83
+}
84
+
85
+// --- Application -------------------------------------------------------------
86
+
87
+struct str data;                        ///< Data tape
88
+volatile size_t dataptr;                ///< Current location on the tape
89
+FILE *input;                            ///< User input
90
+
91
+enum command { RIGHT, LEFT, INC, DEC, SET, IN, OUT, BEGIN, END,
92
+	EAT, INCACC, DECACC };
93
+bool grouped[] = { 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 };
94
+struct instruction { enum command cmd; size_t arg; };
95
+#define INSTRUCTION(c, a) (struct instruction) { (c), (a) }
96
+
97
+// - - Callbacks - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
98
+
99
+// Some things I just really don't want to write in assembly even though it
100
+// is effectively a big performance hit, eliminating the advantage of JIT
101
+
102
+static void
103
+right (size_t arg)
104
+{
105
+	assert (SIZE_MAX - dataptr > arg);
106
+	dataptr += arg;
107
+
108
+	while (dataptr >= data.len)
109
+		str_append_c (&data, 0);
110
+}
111
+
112
+static void
113
+left (size_t arg)
114
+{
115
+	assert (dataptr >= arg);
116
+	dataptr -= arg;
117
+}
118
+
119
+static void
120
+cin (void)
121
+{
122
+	int c;
123
+	data.str[dataptr] = c = fgetc (input);
124
+	assert (c != EOF);
125
+}
126
+
127
+// - - Main  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
128
+
129
+#ifdef DEBUG
130
+static void
131
+debug_dump (const char *filename, struct instruction *in, size_t len)
132
+{
133
+	FILE *fp = fopen (filename, "w");
134
+	long indent = 0;
135
+	for (size_t i = 0; i < len; i++)
136
+	{
137
+		if (in[i].cmd == END)
138
+			indent--;
139
+		for (long k = 0; k < indent; k++)
140
+			fprintf (fp, "  ");
141
+
142
+		switch (in[i].cmd)
143
+		{
144
+		case RIGHT:  fprintf (fp, "RIGHT  %zu\n", in[i].arg); break;
145
+		case LEFT:   fprintf (fp, "LEFT   %zu\n", in[i].arg); break;
146
+		case INC:    fprintf (fp, "INC    %zu\n", in[i].arg); break;
147
+		case DEC:    fprintf (fp, "DEC    %zu\n", in[i].arg); break;
148
+		case OUT:    fprintf (fp, "OUT    %zu\n", in[i].arg); break;
149
+		case IN:     fprintf (fp, "IN     %zu\n", in[i].arg); break;
150
+		case BEGIN:  fprintf (fp, "BEGIN  %zu\n", in[i].arg); break;
151
+		case END:    fprintf (fp, "END    %zu\n", in[i].arg); break;
152
+		case SET:    fprintf (fp, "SET    %zu\n", in[i].arg); break;
153
+		case EAT:    fprintf (fp, "EAT    %zu\n", in[i].arg); break;
154
+		case INCACC: fprintf (fp, "INCACC %zu\n", in[i].arg); break;
155
+		case DECACC: fprintf (fp, "DECACC %zu\n", in[i].arg); break;
156
+		}
157
+		if (in[i].cmd == BEGIN)
158
+			indent++;
159
+	}
160
+	fclose (fp);
161
+}
162
+#else
163
+#define debug_dump(...)
164
+#endif
165
+
166
+int
167
+main (int argc, char *argv[])
168
+{
169
+	(void) argc;
170
+	(void) argv;
171
+
172
+	struct str program;
173
+	str_init (&program);
174
+
175
+	int c;
176
+	while ((c = fgetc (stdin)) != EOF)
177
+		str_append_c (&program, c);
178
+	if (ferror (stdin))
179
+		exit_fatal ("can't read program\n");
180
+	if (!(input = fopen ("/dev/tty", "rb")))
181
+		exit_fatal ("can't open terminal for reading\n");
182
+
183
+// - - Decode and group  - - - - - - - - - - - - - - - - - - - - - - - - - - - -
184
+
185
+	struct instruction *parsed = xcalloc (sizeof *parsed, program.len);
186
+	size_t parsed_len = 0;
187
+
188
+	for (size_t i = 0; i < program.len; i++)
189
+	{
190
+		enum command cmd;
191
+		switch (program.str[i])
192
+		{
193
+		case '>': cmd = RIGHT; break;
194
+		case '<': cmd = LEFT;  break;
195
+		case '+': cmd = INC;   break;
196
+		case '-': cmd = DEC;   break;
197
+		case '.': cmd = OUT;   break;
198
+		case ',': cmd = IN;    break;
199
+		case '[': cmd = BEGIN; break;
200
+		case ']': cmd = END;   break;
201
+		default:  continue;
202
+		}
203
+
204
+		// The most basic optimization is to group identical commands together
205
+		if (!parsed_len || !grouped[cmd] || parsed[parsed_len - 1].cmd != cmd)
206
+			parsed_len++;
207
+
208
+		parsed[parsed_len - 1].cmd = cmd;
209
+		parsed[parsed_len - 1].arg++;
210
+	}
211
+
212
+// - - Optimization passes - - - - - - - - - - - - - - - - - - - - - - - - - - -
213
+
214
+	debug_dump ("bf-no-opt.txt", parsed, parsed_len);
215
+
216
+	size_t in = 0, out = 0;
217
+	for (; in < parsed_len; in++, out++)
218
+	{
219
+		// This shows up in mandelbrot.bf a lot but actually helps hanoi.bf
220
+		if (in + 5 < parsed_len
221
+		 && parsed[in].cmd == BEGIN && parsed[in + 5].cmd == END
222
+		 && parsed[in + 1].cmd == DEC && parsed[in + 1].arg == 1
223
+
224
+		 && parsed[in + 2].cmd == LEFT && parsed[in + 4].cmd == RIGHT
225
+		 && parsed[in + 2].arg == parsed[in + 4].arg
226
+
227
+		 && (parsed[in + 3].cmd == INC || parsed[in + 3].cmd == DEC)
228
+		 && parsed[in + 3].arg == 1)
229
+		{
230
+			// This mustn't make the move when the cell is zero already
231
+			parsed[out] = parsed[in];
232
+			parsed[out + 1] = INSTRUCTION (EAT, 0);
233
+			parsed[out + 2] = parsed[in + 2];
234
+			parsed[out + 3] = INSTRUCTION
235
+				(parsed[in + 3].cmd == INC ? INCACC : DECACC, 0);
236
+			parsed[out + 4] = parsed[in + 4];
237
+			// This disables the looping further in the code;
238
+			// this doesn't have much of an effect in practice
239
+			parsed[out + 5] = INSTRUCTION (END, 0);
240
+			in += 5;
241
+			out += 5;
242
+		}
243
+		// The simpler case that cannot crash and thus can avoid the loop
244
+		else if (in + 5 < parsed_len
245
+		 && parsed[in].cmd == BEGIN && parsed[in + 5].cmd == END
246
+		 && parsed[in + 1].cmd == DEC && parsed[in + 1].arg == 1
247
+
248
+		 && parsed[in + 2].cmd == RIGHT && parsed[in + 4].cmd == LEFT
249
+		 && parsed[in + 2].arg == parsed[in + 4].arg
250
+
251
+		 && (parsed[in + 3].cmd == INC || parsed[in + 3].cmd == DEC)
252
+		 && parsed[in + 3].arg == 1)
253
+		{
254
+			parsed[out] = INSTRUCTION (EAT, 0);
255
+			parsed[out + 1] = parsed[in + 2];
256
+			parsed[out + 2] = INSTRUCTION
257
+				(parsed[in + 3].cmd == INC ? INCACC : DECACC, 0);
258
+			parsed[out + 3] = parsed[in + 4];
259
+			in += 5;
260
+			out += 3;
261
+		}
262
+		else if (in + 2 < parsed_len
263
+		 && parsed[in    ].cmd == BEGIN
264
+		 && parsed[in + 1].cmd == DEC && parsed[in + 1].arg == 1
265
+		 && parsed[in + 2].cmd == END)
266
+		{
267
+			parsed[out] = INSTRUCTION (SET, 0);
268
+			in += 2;
269
+		}
270
+		else if (out && parsed[out - 1].cmd == SET && parsed[in].cmd == INC)
271
+			parsed[--out].arg += parsed[in].arg;
272
+		else if (out != in)
273
+			parsed[out] = parsed[in];
274
+	}
275
+	parsed_len = out;
276
+
277
+	for (in = 0, out = 0; in < parsed_len; in++, out++)
278
+	{
279
+		ssize_t dir = 0;
280
+		if (parsed[in].cmd == RIGHT)
281
+			dir = parsed[in].arg;
282
+		else if (parsed[in].cmd == LEFT)
283
+			dir = -(ssize_t) parsed[in].arg;
284
+		else
285
+		{
286
+			parsed[out] = parsed[in];
287
+			continue;
288
+		}
289
+
290
+		for (; in + 1 < parsed_len; in++)
291
+		{
292
+			if (parsed[in + 1].cmd == RIGHT)
293
+				dir += parsed[in + 1].arg;
294
+			else if (parsed[in + 1].cmd == LEFT)
295
+				dir -= (ssize_t) parsed[in + 1].arg;
296
+			else
297
+				break;
298
+		}
299
+
300
+		if (!dir)
301
+			out--;
302
+		else if (dir > 0)
303
+			parsed[out] = INSTRUCTION (RIGHT, dir);
304
+		else
305
+			parsed[out] = INSTRUCTION (LEFT, -dir);
306
+	}
307
+	parsed_len = out;
308
+
309
+	debug_dump ("bf-optimized.txt", parsed, parsed_len);
310
+
311
+// - - Loop pairing  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
312
+
313
+	size_t nesting = 0;
314
+	size_t *stack = xcalloc (sizeof *stack, parsed_len);
315
+	for (size_t i = 0; i < parsed_len; i++)
316
+	{
317
+		switch (parsed[i].cmd)
318
+		{
319
+		case BEGIN:
320
+			stack[nesting++] = i;
321
+			break;
322
+		case END:
323
+			assert (nesting > 0);
324
+
325
+			--nesting;
326
+			parsed[stack[nesting]].arg = i + 1;
327
+
328
+			// Looping can be disabled by optimizations
329
+			if (parsed[i].arg)
330
+				parsed[i].arg = stack[nesting] + 1;
331
+		default:
332
+			break;
333
+		}
334
+	}
335
+	free (stack);
336
+	assert (nesting == 0);
337
+
338
+	debug_dump ("bf-final.txt", parsed, parsed_len);
339
+
340
+// - - JIT - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
341
+
342
+	// Functions preserve the registers rbx, rsp, rbp, r12, r13, r14, and r15;
343
+	// while rax, rdi, rsi, rdx, rcx, r8, r9, r10, r11 are scratch registers.
344
+
345
+	str_init (&program);
346
+	size_t *offsets = xcalloc (sizeof *offsets, parsed_len + 1);
347
+	uint8_t *arith  = xcalloc (sizeof *arith,   parsed_len);
348
+
349
+#define CODE(x) { char t[] = x; str_append_data (&program, t, sizeof t - 1); }
350
+#define WORD(x) { size_t t = (size_t)(x); str_append_data (&program, &t, 8); }
351
+#define DWRD(x) { size_t t = (size_t)(x); str_append_data (&program, &t, 4); }
352
+
353
+	CODE ("\x49\xBD") WORD (&dataptr)         // mov r13, qword "&dataptr"
354
+	CODE ("\x49\xBF") WORD (&data.str)        // mov r15, qword "&data.str"
355
+	CODE ("\x4D\x8B\x37")                     // mov r14, qword [r15]
356
+	CODE ("\x30\xDB")                         // xor bl, bl
357
+
358
+	for (size_t i = 0; i < parsed_len; i++)
359
+	{
360
+		offsets[i] = program.len;
361
+
362
+		size_t arg = parsed[i].arg;
363
+		assert (arg <= UINT32_MAX);
364
+		switch (parsed[i].cmd)
365
+		{
366
+		case RIGHT:
367
+			CODE ("\x41\x88\x1E")             // mov [r14], bl
368
+			CODE ("\xBF") DWRD (arg)          // mov edi, "arg"
369
+			CODE ("\x48\xB8") WORD (right)    // mov rax, "right"
370
+			CODE ("\xFF\xD0")                 // call rax
371
+
372
+			// The data could get reallocated, so reload the address
373
+			CODE ("\x4D\x8B\x37")             // mov r14, qword [r15]
374
+			CODE ("\x4D\x03\x75\x00")         // add r14, [r13]
375
+			break;
376
+		case LEFT:
377
+			CODE ("\x41\x88\x1E")             // mov [r14], bl
378
+			CODE ("\xBF") DWRD (arg)          // mov edi, "arg"
379
+			CODE ("\x49\x29\xFE")             // sub r14, rdi -- optimistic
380
+			CODE ("\x48\xB8") WORD (left)     // mov rax, "left"
381
+			CODE ("\xFF\xD0")                 // call rax
382
+			break;
383
+
384
+		case EAT:
385
+			CODE ("\x41\x88\xDC")             // mov r12b, bl
386
+			CODE ("\x30\xDB")                 // xor bl, bl
387
+			arith[i] = 1;
388
+			break;
389
+		case INCACC:
390
+			CODE ("\x44\x00\xE3")             // add bl, r12b
391
+			arith[i] = 1;
392
+			break;
393
+		case DECACC:
394
+			CODE ("\x44\x28\xE3")             // sub bl, r12b
395
+			arith[i] = 1;
396
+			break;
397
+
398
+		case INC:
399
+			CODE ("\x80\xC3")                 // add bl, "arg"
400
+			str_append_c (&program, arg);
401
+			arith[i] = 1;
402
+			break;
403
+		case DEC:
404
+			CODE ("\x80\xEB")                 // sub bl, "arg"
405
+			str_append_c (&program, arg);
406
+			arith[i] = 1;
407
+			break;
408
+		case SET:
409
+			CODE ("\xB3")                     // mov bl, "arg"
410
+			str_append_c (&program, arg);
411
+			break;
412
+
413
+		case OUT:
414
+			CODE ("\x48\x0F\xB6\xFB")         // movzx rdi, bl
415
+			CODE ("\x48\xBE") WORD (stdout)   // mov rsi, "stdout"
416
+			CODE ("\x48\xB8") WORD (fputc)    // mov rax, "fputc"
417
+			CODE ("\xFF\xD0")                 // call rax
418
+			break;
419
+		case IN:
420
+			CODE ("\x48\xB8") WORD (cin)      // mov rax, "cin"
421
+			CODE ("\xFF\xD0")                 // call rax
422
+			CODE ("\x41\x8A\x1E")             // mov bl, [r14]
423
+			break;
424
+
425
+		case BEGIN:
426
+			// Don't test the register when the flag has been set already;
427
+			// this doesn't have much of an effect in practice
428
+			if (!i || !arith[i - 1])
429
+				CODE ("\x84\xDB")             // test bl, bl
430
+			CODE ("\x0F\x84\x00\x00\x00\x00") // jz "offsets[i]"
431
+			break;
432
+		case END:
433
+			// We know that the cell is zero, make this an "if", not a "loop";
434
+			// this doesn't have much of an effect in practice
435
+			if (!arg)
436
+				break;
437
+
438
+			if (!i || !arith[i - 1])
439
+				CODE ("\x84\xDB")             // test bl, bl
440
+			CODE ("\x0F\x85\x00\x00\x00\x00") // jnz "offsets[i]"
441
+			break;
442
+		}
443
+
444
+		// No sense in reading it out when we overwrite it immediately;
445
+		// this doesn't have much of an effect in practice
446
+		if (parsed[i].cmd == LEFT || parsed[i].cmd == RIGHT)
447
+			if (i + 1 >= parsed_len
448
+			 || parsed[i + 1].cmd != SET)
449
+				CODE ("\x41\x8A\x1E")         // mov bl, [r14]
450
+	}
451
+	// When there is a loop at the end we need to be able to jump past it
452
+	offsets[parsed_len] = program.len;
453
+	str_append_c (&program, '\xC3');          // ret
454
+
455
+	// Now that we know where each instruction is, fill in relative jumps;
456
+	// this must accurately reflect code generators for BEGIN and END
457
+	for (size_t i = 0; i < parsed_len; i++)
458
+	{
459
+		if ((parsed[i].cmd != BEGIN && parsed[i].cmd != END)
460
+		 || !parsed[i].arg)
461
+			continue;
462
+
463
+		size_t fixup = offsets[i] + 2;
464
+		if (!i || !arith[i - 1])
465
+			fixup += 2;
466
+
467
+		*(int32_t *)(program.str + fixup) =
468
+			((intptr_t)(offsets[parsed[i].arg]) - (intptr_t)(fixup + 4));
469
+	}
470
+	free (offsets);
471
+	free (arith);
472
+
473
+#ifdef DEBUG
474
+	FILE *bin = fopen ("bf-jit.bin", "w");
475
+	fwrite (program.str, program.len, 1, bin);
476
+	fclose (bin);
477
+#endif
478
+
479
+// - - Runtime - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
480
+
481
+	// Some systems may have W^X
482
+	void *executable = mmap (NULL, program.len, PROT_READ | PROT_WRITE,
483
+		MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
484
+	if (!executable)
485
+		exit_fatal ("mmap: %s\n", strerror (errno));
486
+
487
+	memcpy (executable, program.str, program.len);
488
+	if (mprotect (executable, program.len, PROT_READ | PROT_EXEC))
489
+		exit_fatal ("mprotect: %s\n", strerror (errno));
490
+
491
+	str_init (&data);
492
+	str_append_c (&data, 0);
493
+	((void (*) (void)) executable)();
494
+	return 0;
495
+}

+ 617
- 0
interpreters/bf-jit-unsafe-opt.c View File

@@ -0,0 +1,617 @@
1
+// This is an exercise in futility more than anything else
2
+#define _GNU_SOURCE
3
+
4
+#include <stdio.h>
5
+#include <stdlib.h>
6
+#include <string.h>
7
+#include <stdint.h>
8
+#include <stdbool.h>
9
+#include <assert.h>
10
+#include <errno.h>
11
+
12
+#if (defined __x86_64__ || defined __amd64__) && defined __unix__
13
+	#include <unistd.h>
14
+	#include <sys/mman.h>
15
+#else
16
+	#error Platform not supported
17
+#endif
18
+
19
+#define exit_fatal(...)                                                        \
20
+	do {                                                                       \
21
+		fprintf (stderr, "fatal: " __VA_ARGS__);                               \
22
+		exit (EXIT_FAILURE);                                                   \
23
+	} while (0)
24
+
25
+// --- Safe memory management --------------------------------------------------
26
+
27
+static void *
28
+xcalloc (size_t m, size_t n)
29
+{
30
+	void *p = calloc (m, n);
31
+	if (!p)
32
+		exit_fatal ("calloc: %s\n", strerror (errno));
33
+	return p;
34
+}
35
+
36
+static void *
37
+xrealloc (void *o, size_t n)
38
+{
39
+	void *p = realloc (o, n);
40
+	if (!p && n)
41
+		exit_fatal ("realloc: %s\n", strerror (errno));
42
+	return p;
43
+}
44
+
45
+// --- Dynamically allocated strings -------------------------------------------
46
+
47
+struct str
48
+{
49
+	char *str;                          ///< String data, null terminated
50
+	size_t alloc;                       ///< How many bytes are allocated
51
+	size_t len;                         ///< How long the string actually is
52
+};
53
+
54
+static void
55
+str_init (struct str *self)
56
+{
57
+	self->len = 0;
58
+	self->str = xcalloc (1, (self->alloc = 16));
59
+}
60
+
61
+static void
62
+str_ensure_space (struct str *self, size_t n)
63
+{
64
+	// We allocate at least one more byte for the terminating null character
65
+	size_t new_alloc = self->alloc;
66
+	while (new_alloc <= self->len + n)
67
+		new_alloc <<= 1;
68
+	if (new_alloc != self->alloc)
69
+		self->str = xrealloc (self->str, (self->alloc = new_alloc));
70
+}
71
+
72
+static void
73
+str_append_data (struct str *self, const void *data, size_t n)
74
+{
75
+	str_ensure_space (self, n);
76
+	memcpy (self->str + self->len, data, n);
77
+	self->str[self->len += n] = '\0';
78
+}
79
+
80
+static void
81
+str_append_c (struct str *self, char c)
82
+{
83
+	str_append_data (self, &c, 1);
84
+}
85
+
86
+// --- Application -------------------------------------------------------------
87
+
88
+enum command { RIGHT, LEFT, INC, DEC, SET, IN, OUT, BEGIN, END,
89
+	EAT, INCACC, DECACC };
90
+bool grouped[] = { 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 };
91
+struct instruction { enum command cmd; int offset; size_t arg; };
92
+#define INSTRUCTION(c, o, a) (struct instruction) { (c), (o), (a) }
93
+
94
+// - - Callbacks - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
95
+
96
+FILE *input;                            ///< User input
97
+
98
+static int
99
+cin (void)
100
+{
101
+	int c = fgetc (input);
102
+	assert (c != EOF);
103
+	return c;
104
+}
105
+
106
+// - - Main  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
107
+
108
+#ifdef DEBUG
109
+static void
110
+debug_dump (const char *filename, struct instruction *in, size_t len)
111
+{
112
+	FILE *fp = fopen (filename, "w");
113
+	long indent = 0;
114
+	for (size_t i = 0; i < len; i++)
115
+	{
116
+		if (in[i].cmd == END)
117
+			indent--;
118
+		for (long k = 0; k < indent; k++)
119
+			fprintf (fp, "  ");
120
+
121
+		switch (in[i].cmd)
122
+		{
123
+		case RIGHT:  fputs ("RIGHT ", fp); break;
124
+		case LEFT:   fputs ("LEFT  ", fp); break;
125
+		case INC:    fputs ("INC   ", fp); break;
126
+		case DEC:    fputs ("DEC   ", fp); break;
127
+		case OUT:    fputs ("OUT   ", fp); break;
128
+		case IN:     fputs ("IN    ", fp); break;
129
+		case BEGIN:  fputs ("BEGIN ", fp); break;
130
+		case END:    fputs ("END   ", fp); break;
131
+		case SET:    fputs ("SET   ", fp); break;
132
+		case EAT:    fputs ("EAT   ", fp); break;
133
+		case INCACC: fputs ("INCACC", fp); break;
134
+		case DECACC: fputs ("DECACC", fp); break;
135
+		}
136
+		fprintf (fp, " %zu [%d]\n", in[i].arg, in[i].offset);
137
+		if (in[i].cmd == BEGIN)
138
+			indent++;
139
+	}
140
+	fclose (fp);
141
+}
142
+#else
143
+#define debug_dump(...)
144
+#endif
145
+
146
+int
147
+main (int argc, char *argv[])
148
+{
149
+	(void) argc;
150
+	(void) argv;
151
+
152
+	struct str program;
153
+	str_init (&program);
154
+
155
+	int c;
156
+	while ((c = fgetc (stdin)) != EOF)
157
+		str_append_c (&program, c);
158
+	if (ferror (stdin))
159
+		exit_fatal ("can't read program\n");
160
+	if (!(input = fopen ("/dev/tty", "rb")))
161
+		exit_fatal ("can't open terminal for reading\n");
162
+
163
+// - - Decode and group  - - - - - - - - - - - - - - - - - - - - - - - - - - - -
164
+
165
+	struct instruction *parsed = xcalloc (sizeof *parsed, program.len);
166
+	size_t parsed_len = 0;
167
+
168
+	for (size_t i = 0; i < program.len; i++)
169
+	{
170
+		enum command cmd;
171
+		switch (program.str[i])
172
+		{
173
+		case '>': cmd = RIGHT; break;
174
+		case '<': cmd = LEFT;  break;
175
+		case '+': cmd = INC;   break;
176
+		case '-': cmd = DEC;   break;
177
+		case '.': cmd = OUT;   break;
178
+		case ',': cmd = IN;    break;
179
+		case '[': cmd = BEGIN; break;
180
+		case ']': cmd = END;   break;
181
+		default:  continue;
182
+		}
183
+
184
+		// The most basic optimization is to group identical commands together
185
+		if (!parsed_len || !grouped[cmd] || parsed[parsed_len - 1].cmd != cmd)
186
+			parsed_len++;
187
+
188
+		parsed[parsed_len - 1].cmd = cmd;
189
+		parsed[parsed_len - 1].arg++;
190
+	}
191
+
192
+// - - Optimization passes - - - - - - - - - - - - - - - - - - - - - - - - - - -
193
+
194
+	debug_dump ("bf-no-opt.txt", parsed, parsed_len);
195
+
196
+	size_t in = 0, out = 0;
197
+	for (; in < parsed_len; in++, out++)
198
+	{
199
+		if (in + 2 < parsed_len
200
+		 && parsed[in    ].cmd == BEGIN
201
+		 && parsed[in + 1].cmd == DEC && parsed[in + 1].arg == 1
202
+		 && parsed[in + 2].cmd == END)
203
+		{
204
+			parsed[out] = INSTRUCTION (SET, 0, 0);
205
+			in += 2;
206
+		}
207
+		else if (out && parsed[out - 1].cmd == SET && parsed[in].cmd == INC)
208
+			parsed[--out].arg += parsed[in].arg;
209
+		else if (out != in)
210
+			parsed[out] = parsed[in];
211
+	}
212
+	parsed_len = out;
213
+
214
+	debug_dump ("bf-pre-offsets.txt", parsed, parsed_len);
215
+
216
+	// Add offsets to INC/DEC/SET stuck between LEFT/RIGHT
217
+	// and compress the LEFT/RIGHT sequences
218
+	for (in = 0, out = 0; in < parsed_len; in++, out++)
219
+	{
220
+		ssize_t dir = 0;
221
+		if (parsed[in].cmd == RIGHT)
222
+			dir = parsed[in].arg;
223
+		else if (parsed[in].cmd == LEFT)
224
+			dir = -(ssize_t) parsed[in].arg;
225
+		else
226
+		{
227
+			parsed[out] = parsed[in];
228
+			continue;
229
+		}
230
+
231
+		while (in + 2 < parsed_len)
232
+		{
233
+			// An immediate offset has its limits
234
+			if (dir < INT8_MIN || dir > INT8_MAX)
235
+				break;
236
+
237
+			ssize_t diff;
238
+			if (parsed[in + 2].cmd == RIGHT)
239
+				diff = parsed[in + 2].arg;
240
+			else if (parsed[in + 2].cmd == LEFT)
241
+				diff = -(ssize_t) parsed[in + 2].arg;
242
+			else
243
+				break;
244
+
245
+			int cmd = parsed[in + 1].cmd;
246
+			if (cmd != INC && cmd != DEC && cmd != SET)
247
+				break;
248
+
249
+			parsed[out] = parsed[in + 1];
250
+			parsed[out].offset = dir;
251
+
252
+			dir += diff;
253
+			out += 1;
254
+			in += 2;
255
+		}
256
+
257
+		for (; in + 1 < parsed_len; in++)
258
+		{
259
+			if (parsed[in + 1].cmd == RIGHT)
260
+				dir += parsed[in + 1].arg;
261
+			else if (parsed[in + 1].cmd == LEFT)
262
+				dir -= (ssize_t) parsed[in + 1].arg;
263
+			else
264
+				break;
265
+		}
266
+
267
+		if (!dir)
268
+			out--;
269
+		else if (dir > 0)
270
+			parsed[out] = INSTRUCTION (RIGHT, 0, dir);
271
+		else
272
+			parsed[out] = INSTRUCTION (LEFT, 0, -dir);
273
+	}
274
+	parsed_len = out;
275
+
276
+	debug_dump ("bf-pre-incdec-unloop.txt", parsed, parsed_len);
277
+
278
+	// Try to eliminate loops that eat a cell and add/subtract its value
279
+	// to/from some other cell
280
+	for (in = 0, out = 0; in < parsed_len; in++, out++)
281
+	{
282
+		parsed[out] = parsed[in];
283
+		if (parsed[in].cmd != BEGIN)
284
+			continue;
285
+
286
+		bool ok = false;
287
+		size_t count = 0;
288
+		for (size_t k = in + 1; k < parsed_len; k++)
289
+		{
290
+			if (parsed[k].cmd == END)
291
+			{
292
+				ok = true;
293
+				break;
294
+			}
295
+			if (parsed[k].cmd != INC
296
+			 && parsed[k].cmd != DEC)
297
+				break;
298
+			count++;
299
+		}
300
+		if (!ok)
301
+			continue;
302
+
303
+		// Stable sort operations by their offsets, put [0] first
304
+		bool sorted;
305
+		do
306
+		{
307
+			sorted = true;
308
+			for (size_t k = 1; k < count; k++)
309
+			{
310
+				if (parsed[in + k].offset == 0)
311
+					continue;
312
+				if (parsed[in + k + 1].offset != 0
313
+				 && parsed[in + k].offset <= parsed[in + k + 1].offset)
314
+					continue;
315
+
316
+				struct instruction tmp = parsed[in + k + 1];
317
+				parsed[in + k + 1] = parsed[in + k];
318
+				parsed[in + k] = tmp;
319
+				sorted = false;
320
+			}
321
+		}
322
+		while (!sorted);
323
+
324
+		// Abort the optimization on duplicate offsets (complication with [0])
325
+		for (size_t k = 1; k < count; k++)
326
+			if (parsed[in + k].offset == parsed[in + k + 1].offset)
327
+				ok = false;
328
+		// XXX: can't make the code longer either
329
+		for (size_t k = 1; k <= count; k++)
330
+			if (parsed[in + k].arg != 1)
331
+				ok = false;
332
+		if (!ok
333
+		 || parsed[in + 1].cmd != DEC
334
+		 || parsed[in + 1].offset != 0)
335
+			continue;
336
+
337
+		int min_safe_left_offset = 0;
338
+		if (in > 1 && parsed[in - 1].cmd == RIGHT)
339
+			min_safe_left_offset = -parsed[in - 1].arg;
340
+
341
+		bool cond_needed_for_safety = false;
342
+		for (size_t k = 0; k < count; k++)
343
+			if (parsed[in + k + 1].offset < min_safe_left_offset)
344
+			{
345
+				cond_needed_for_safety = true;
346
+				break;
347
+			}
348
+
349
+		in++;
350
+		if (cond_needed_for_safety)
351
+			out++;
352
+
353
+		parsed[out] = INSTRUCTION (EAT, 0, 0);
354
+		for (size_t k = 1; k < count; k++)
355
+			parsed[out + k] = INSTRUCTION (parsed[in + k].cmd == INC
356
+				? INCACC : DECACC, parsed[in + k].offset, 0);
357
+
358
+		in += count;
359
+		out += count;
360
+
361
+		if (cond_needed_for_safety)
362
+			parsed[out] = INSTRUCTION (END, 0, 0);
363
+		else
364
+			out--;
365
+	}
366
+	parsed_len = out;
367
+
368
+	debug_dump ("bf-optimized.txt", parsed, parsed_len);
369
+
370
+// - - Loop pairing  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
371
+
372
+	size_t nesting = 0;
373
+	size_t *stack = xcalloc (sizeof *stack, parsed_len);
374
+	for (size_t i = 0; i < parsed_len; i++)
375
+	{
376
+		switch (parsed[i].cmd)
377
+		{
378
+		case BEGIN:
379
+			stack[nesting++] = i;
380
+			break;
381
+		case END:
382
+			assert (nesting > 0);
383
+
384
+			--nesting;
385
+			parsed[stack[nesting]].arg = i + 1;
386
+
387
+			// Looping can be disabled by optimizations
388
+			if (parsed[i].arg)
389
+				parsed[i].arg = stack[nesting] + 1;
390
+		default:
391
+			break;
392
+		}
393
+	}
394
+	free (stack);
395
+	assert (nesting == 0);
396
+
397
+	debug_dump ("bf-final.txt", parsed, parsed_len);
398
+
399
+// - - JIT - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
400
+
401
+	// Functions preserve the registers rbx, rsp, rbp, r12, r13, r14, and r15;
402
+	// while rax, rdi, rsi, rdx, rcx, r8, r9, r10, r11 are scratch registers.
403
+
404
+	str_init (&program);
405
+	size_t *offsets = xcalloc (sizeof *offsets, parsed_len + 1);
406
+	uint8_t *arith  = xcalloc (sizeof *arith,   parsed_len);
407
+
408
+#define CODE(x) { char t[] = x; str_append_data (&program, t, sizeof t - 1); }
409
+#define WORD(x) { size_t t = (size_t)(x); str_append_data (&program, &t, 8); }
410
+#define DWRD(x) { size_t t = (size_t)(x); str_append_data (&program, &t, 4); }
411
+
412
+	CODE ("\x48\x89\xF8")                     // mov rax, rdi
413
+	CODE ("\x30\xDB")                         // xor bl, bl
414
+
415
+	for (size_t i = 0; i < parsed_len; i++)
416
+	{
417
+		offsets[i] = program.len;
418
+
419
+		size_t arg = parsed[i].arg;
420
+		assert (arg <= UINT32_MAX);
421
+
422
+		int offset = parsed[i].offset;
423
+		assert (offset <= INT8_MAX && offset >= INT8_MIN);
424
+
425
+		// Don't save what we've just loaded
426
+		if (parsed[i].cmd == LEFT || parsed[i].cmd == RIGHT)
427
+			if (i < 2 || i + 1 >= parsed_len
428
+			 || (parsed[i - 2].cmd != LEFT && parsed[i - 2].cmd != RIGHT)
429
+			 || parsed[i - 1].cmd != BEGIN
430
+			 || parsed[i + 1].cmd != END)
431
+				CODE ("\x88\x18")             // mov [rax], bl
432
+
433
+		switch (parsed[i].cmd)
434
+		{
435
+		case RIGHT:
436
+			// add rax, "arg" -- optimistic, no boundary checking
437
+			if (arg > INT8_MAX)
438
+				{ CODE ("\x48\x05")     DWRD (arg) }
439
+			else
440
+				{ CODE ("\x48\x83\xC0") str_append_c (&program, arg); }
441
+			break;
442
+		case LEFT:
443
+			// sub rax, "arg" -- optimistic, no boundary checking
444
+			if (arg > INT8_MAX)
445
+				{ CODE ("\x48\x2D")     DWRD (arg) }
446
+			else
447
+				{ CODE ("\x48\x83\xE8") str_append_c (&program, arg); }
448
+			break;
449
+
450
+		case EAT:
451
+			CODE ("\x41\x88\xDC")             // mov r12b, bl
452
+			CODE ("\x30\xDB")                 // xor bl, bl
453
+			arith[i] = 1;
454
+			break;
455
+		case INCACC:
456
+			if (offset)
457
+			{
458
+				CODE ("\x44\x00\x60")         // add [rax+"offset"], r12b
459
+				str_append_c (&program, offset);
460
+			}
461
+			else
462
+			{
463
+				CODE ("\x44\x00\xE3")         // add bl, r12b
464
+				arith[i] = 1;
465
+			}
466
+			break;
467
+		case DECACC:
468
+			if (offset)
469
+			{
470
+				CODE ("\x44\x28\x60")         // sub [rax+"offset"], r12b
471
+				str_append_c (&program, offset);
472
+			}
473
+			else
474
+			{
475
+				CODE ("\x44\x28\xE3")         // sub bl, r12b
476
+				arith[i] = 1;
477
+			}
478
+			break;
479
+
480
+		case INC:
481
+			if (offset)
482
+			{
483
+				CODE ("\x80\x40")             // add byte [rax+"offset"], "arg"
484
+				str_append_c (&program, offset);
485
+			}
486
+			else
487
+			{
488
+				arith[i] = 1;
489
+				CODE ("\x80\xC3")             // add bl, "arg"
490
+			}
491
+			str_append_c (&program, arg);
492
+			break;
493
+		case DEC:
494
+			if (offset)
495
+			{
496
+				CODE ("\x80\x68")             // sub byte [rax+"offset"], "arg"
497
+				str_append_c (&program, offset);
498
+			}
499
+			else
500
+			{
501
+				arith[i] = 1;
502
+				CODE ("\x80\xEB")             // sub bl, "arg"
503
+			}
504
+			str_append_c (&program, arg);
505
+			break;
506
+		case SET:
507
+			if (offset)
508
+			{
509
+				CODE ("\xC6\x40")             // mov byte [rax+"offset"], "arg"
510
+				str_append_c (&program, offset);
511
+			}
512
+			else
513
+				CODE ("\xB3")                 // mov bl, "arg"
514
+			str_append_c (&program, arg);
515
+			break;
516
+
517
+		case OUT:
518
+			CODE ("\x50\x53")                 // push rax, push rbx
519
+			CODE ("\x48\x0F\xB6\xFB")         // movzx rdi, bl
520
+			CODE ("\x48\xBE") WORD (stdout)   // mov rsi, "stdout"
521
+			CODE ("\x48\xB8") WORD (fputc)    // mov rax, "fputc"
522
+			CODE ("\xFF\xD0")                 // call rax
523
+			CODE ("\x5B\x58")                 // pop rbx, pop rax
524
+			break;
525
+		case IN:
526
+			CODE ("\x50")                     // push rax
527
+			CODE ("\x48\xB8") WORD (cin)      // mov rax, "cin"
528
+			CODE ("\xFF\xD0")                 // call rax
529
+			CODE ("\x88\xC3")                 // mov bl, al
530
+			CODE ("\x58")                     // pop rax
531
+			break;
532
+
533
+		case BEGIN:
534
+			// Don't test the register when the flag has been set already;
535
+			// this doesn't have much of an effect in practice
536
+			if (!i || !arith[i - 1])
537
+				CODE ("\x84\xDB")             // test bl, bl
538
+			CODE ("\x0F\x84\x00\x00\x00\x00") // jz "offsets[i]"
539
+			break;
540
+		case END:
541
+			// We know that the cell is zero, make this an "if", not a "loop";
542
+			// this doesn't have much of an effect in practice
543
+			if (!arg)
544
+				break;
545
+
546
+			if (!i || !arith[i - 1])
547
+				CODE ("\x84\xDB")             // test bl, bl
548
+			CODE ("\x0F\x85\x00\x00\x00\x00") // jnz "offsets[i]"
549
+			break;
550
+		}
551
+
552
+		// No sense in reading it out when we overwrite it immediately;
553
+		// this doesn't have much of an effect in practice
554
+		if (parsed[i].cmd == LEFT || parsed[i].cmd == RIGHT)
555
+			if (i + 1 >= parsed_len
556
+			 || parsed[i + 1].cmd != SET
557
+			 || parsed[i + 1].offset != 0)
558
+				CODE ("\x8A\x18")             // mov bl, [rax]
559
+	}
560
+	// When there is a loop at the end we need to be able to jump past it
561
+	offsets[parsed_len] = program.len;
562
+	str_append_c (&program, '\xC3');          // ret
563
+
564
+	// Now that we know where each instruction is, fill in relative jumps;
565
+	// this must accurately reflect code generators for BEGIN and END
566
+	for (size_t i = 0; i < parsed_len; i++)
567
+	{
568
+		if ((parsed[i].cmd != BEGIN && parsed[i].cmd != END)
569
+		 || !parsed[i].arg)
570
+			continue;
571
+
572
+		size_t fixup = offsets[i] + 2;
573
+		if (!i || !arith[i - 1])
574
+			fixup += 2;
575
+
576
+		*(int32_t *)(program.str + fixup) =
577
+			((intptr_t)(offsets[parsed[i].arg]) - (intptr_t)(fixup + 4));
578
+	}
579
+	free (offsets);
580
+	free (arith);
581
+
582
+#ifdef DEBUG
583
+	FILE *bin = fopen ("bf-jit.bin", "w");
584
+	fwrite (program.str, program.len, 1, bin);
585
+	fclose (bin);
586
+#endif
587
+
588
+// - - Runtime - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
589
+
590
+	// Some systems may have W^X
591
+	void *executable = mmap (NULL, program.len, PROT_READ | PROT_WRITE,
592
+		MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
593
+	if (!executable)
594
+		exit_fatal ("mmap: %s\n", strerror (errno));
595
+
596
+	memcpy (executable, program.str, program.len);
597
+	if (mprotect (executable, program.len, PROT_READ | PROT_EXEC))
598
+		exit_fatal ("mprotect: %s\n", strerror (errno));
599
+
600
+	// We create crash zones on both ends of the tape for some minimum safety
601
+	long pagesz = sysconf (_SC_PAGESIZE);
602
+	assert (pagesz > 0);
603
+
604
+	const size_t tape_len = (1 << 20) + 2 * pagesz;
605
+	char *tape = mmap (NULL, tape_len, PROT_READ | PROT_WRITE,
606
+		MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
607
+	if (!tape)
608
+		exit_fatal ("mmap: %s\n", strerror (errno));
609
+
610
+	memset (tape, 0, tape_len);
611
+	if (mprotect (tape,                     pagesz, PROT_NONE)
612
+	 || mprotect (tape + tape_len - pagesz, pagesz, PROT_NONE))
613
+		exit_fatal ("mprotect: %s\n", strerror (errno));
614
+
615
+	((void (*) (char *)) executable)(tape + pagesz);
616
+	return 0;
617
+}

+ 495
- 0
interpreters/bf-jit-unsafe.c View File

@@ -0,0 +1,495 @@
1
+// This is an exercise in futility more than anything else
2
+#define _GNU_SOURCE
3
+
4
+#include <stdio.h>
5
+#include <stdlib.h>
6
+#include <string.h>
7
+#include <stdint.h>
8
+#include <stdbool.h>
9
+#include <assert.h>
10
+#include <errno.h>
11
+
12
+#if (defined __x86_64__ || defined __amd64__) && defined __unix__
13
+	#include <unistd.h>
14
+	#include <sys/mman.h>
15
+#else
16
+	#error Platform not supported
17
+#endif
18
+
19
+#define exit_fatal(...)                                                        \
20
+	do {                                                                       \
21
+		fprintf (stderr, "fatal: " __VA_ARGS__);                               \
22
+		exit (EXIT_FAILURE);                                                   \
23
+	} while (0)
24
+
25
+// --- Safe memory management --------------------------------------------------
26
+
27
+static void *
28
+xcalloc (size_t m, size_t n)
29
+{
30
+	void *p = calloc (m, n);
31
+	if (!p)
32
+		exit_fatal ("calloc: %s\n", strerror (errno));
33
+	return p;
34
+}
35
+
36
+static void *
37
+xrealloc (void *o, size_t n)
38
+{
39
+	void *p = realloc (o, n);
40
+	if (!p && n)
41
+		exit_fatal ("realloc: %s\n", strerror (errno));
42
+	return p;
43
+}
44
+
45
+// --- Dynamically allocated strings -------------------------------------------
46
+
47
+struct str
48
+{
49
+	char *str;                          ///< String data, null terminated
50
+	size_t alloc;                       ///< How many bytes are allocated
51
+	size_t len;                         ///< How long the string actually is
52
+};
53
+
54
+static void
55
+str_init (struct str *self)
56
+{
57
+	self->len = 0;
58
+	self->str = xcalloc (1, (self->alloc = 16));
59
+}
60
+
61
+static void
62
+str_ensure_space (struct str *self, size_t n)
63
+{
64
+	// We allocate at least one more byte for the terminating null character
65
+	size_t new_alloc = self->alloc;
66
+	while (new_alloc <= self->len + n)
67
+		new_alloc <<= 1;
68
+	if (new_alloc != self->alloc)
69
+		self->str = xrealloc (self->str, (self->alloc = new_alloc));
70
+}
71
+
72
+static void
73
+str_append_data (struct str *self, const void *data, size_t n)
74
+{
75
+	str_ensure_space (self, n);
76
+	memcpy (self->str + self->len, data, n);
77
+	self->str[self->len += n] = '\0';
78
+}
79
+
80
+static void
81
+str_append_c (struct str *self, char c)
82
+{
83
+	str_append_data (self, &c, 1);
84
+}
85
+
86
+// --- Application -------------------------------------------------------------
87
+
88
+enum command { RIGHT, LEFT, INC, DEC, SET, IN, OUT, BEGIN, END,
89
+	EAT, INCACC, DECACC };
90
+bool grouped[] = { 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 };
91
+struct instruction { enum command cmd; size_t arg; };
92
+#define INSTRUCTION(c, a) (struct instruction) { (c), (a) }
93
+
94
+// - - Callbacks - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
95
+
96
+FILE *input;                            ///< User input
97
+
98
+static int
99
+cin (void)
100
+{
101
+	int c = fgetc (input);
102
+	assert (c != EOF);
103
+	return c;
104
+}
105
+
106
+// - - Main  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
107
+
108
+#ifdef DEBUG
109
+static void
110
+debug_dump (const char *filename, struct instruction *in, size_t len)
111
+{
112
+	FILE *fp = fopen (filename, "w");
113
+	long indent = 0;
114
+	for (size_t i = 0; i < len; i++)
115
+	{
116
+		if (in[i].cmd == END)
117
+			indent--;
118
+		for (long k = 0; k < indent; k++)
119
+			fprintf (fp, "  ");
120
+
121
+		switch (in[i].cmd)
122
+		{
123
+		case RIGHT:  fprintf (fp, "RIGHT  %zu\n", in[i].arg); break;
124
+		case LEFT:   fprintf (fp, "LEFT   %zu\n", in[i].arg); break;
125
+		case INC:    fprintf (fp, "INC    %zu\n", in[i].arg); break;
126
+		case DEC:    fprintf (fp, "DEC    %zu\n", in[i].arg); break;
127
+		case OUT:    fprintf (fp, "OUT    %zu\n", in[i].arg); break;
128
+		case IN:     fprintf (fp, "IN     %zu\n", in[i].arg); break;
129
+		case BEGIN:  fprintf (fp, "BEGIN  %zu\n", in[i].arg); break;
130
+		case END:    fprintf (fp, "END    %zu\n", in[i].arg); break;
131
+		case SET:    fprintf (fp, "SET    %zu\n", in[i].arg); break;
132
+		case EAT:    fprintf (fp, "EAT    %zu\n", in[i].arg); break;
133
+		case INCACC: fprintf (fp, "INCACC %zu\n", in[i].arg); break;
134
+		case DECACC: fprintf (fp, "DECACC %zu\n", in[i].arg); break;
135
+		}
136
+		if (in[i].cmd == BEGIN)
137
+			indent++;
138
+	}
139
+	fclose (fp);
140
+}
141
+#else
142
+#define debug_dump(...)
143
+#endif
144
+
145
+int
146
+main (int argc, char *argv[])
147
+{
148
+	(void) argc;
149
+	(void) argv;
150
+
151
+	struct str program;
152
+	str_init (&program);
153
+
154
+	int c;
155
+	while ((c = fgetc (stdin)) != EOF)
156
+		str_append_c (&program, c);
157
+	if (ferror (stdin))
158
+		exit_fatal ("can't read program\n");
159
+	if (!(input = fopen ("/dev/tty", "rb")))
160
+		exit_fatal ("can't open terminal for reading\n");
161
+
162
+// - - Decode and group  - - - - - - - - - - - - - - - - - - - - - - - - - - - -
163
+
164
+	struct instruction *parsed = xcalloc (sizeof *parsed, program.len);
165
+	size_t parsed_len = 0;
166
+
167
+	for (size_t i = 0; i < program.len; i++)
168
+	{
169
+		enum command cmd;
170
+		switch (program.str[i])
171
+		{
172
+		case '>': cmd = RIGHT; break;
173
+		case '<': cmd = LEFT;  break;
174
+		case '+': cmd = INC;   break;
175
+		case '-': cmd = DEC;   break;
176
+		case '.': cmd = OUT;   break;
177
+		case ',': cmd = IN;    break;
178
+		case '[': cmd = BEGIN; break;
179
+		case ']': cmd = END;   break;
180
+		default:  continue;
181
+		}
182
+
183
+		// The most basic optimization is to group identical commands together
184
+		if (!parsed_len || !grouped[cmd] || parsed[parsed_len - 1].cmd != cmd)
185
+			parsed_len++;
186
+
187
+		parsed[parsed_len - 1].cmd = cmd;
188
+		parsed[parsed_len - 1].arg++;
189
+	}
190
+
191
+// - - Optimization passes - - - - - - - - - - - - - - - - - - - - - - - - - - -
192
+
193
+	debug_dump ("bf-no-opt.txt", parsed, parsed_len);
194
+
195
+	size_t in = 0, out = 0;
196
+	for (; in < parsed_len; in++, out++)
197
+	{
198
+		// This shows up in mandelbrot.bf a lot but actually helps hanoi.bf
199
+		if (in + 5 < parsed_len
200
+		 && parsed[in].cmd == BEGIN && parsed[in + 5].cmd == END
201
+		 && parsed[in + 1].cmd == DEC && parsed[in + 1].arg == 1
202
+
203
+		 && parsed[in + 2].cmd == LEFT && parsed[in + 4].cmd == RIGHT
204
+		 && parsed[in + 2].arg == parsed[in + 4].arg
205
+
206
+		 && (parsed[in + 3].cmd == INC || parsed[in + 3].cmd == DEC)
207
+		 && parsed[in + 3].arg == 1)
208
+		{
209
+			// This mustn't make the move when the cell is zero already
210
+			parsed[out] = parsed[in];
211
+			parsed[out + 1] = INSTRUCTION (EAT, 0);
212
+			parsed[out + 2] = parsed[in + 2];
213
+			parsed[out + 3] = INSTRUCTION
214
+				(parsed[in + 3].cmd == INC ? INCACC : DECACC, 0);
215
+			parsed[out + 4] = parsed[in + 4];
216
+			// This disables the looping further in the code;
217
+			// this doesn't have much of an effect in practice
218
+			parsed[out + 5] = INSTRUCTION (END, 0);
219
+			in += 5;
220
+			out += 5;
221
+		}
222
+		// The simpler case that cannot crash and thus can avoid the loop
223
+		else if (in + 5 < parsed_len
224
+		 && parsed[in].cmd == BEGIN && parsed[in + 5].cmd == END
225
+		 && parsed[in + 1].cmd == DEC && parsed[in + 1].arg == 1
226
+
227
+		 && parsed[in + 2].cmd == RIGHT && parsed[in + 4].cmd == LEFT
228
+		 && parsed[in + 2].arg == parsed[in + 4].arg
229
+
230
+		 && (parsed[in + 3].cmd == INC || parsed[in + 3].cmd == DEC)
231
+		 && parsed[in + 3].arg == 1)
232
+		{
233
+			parsed[out] = INSTRUCTION (EAT, 0);
234
+			parsed[out + 1] = parsed[in + 2];
235
+			parsed[out + 2] = INSTRUCTION
236
+				(parsed[in + 3].cmd == INC ? INCACC : DECACC, 0);
237
+			parsed[out + 3] = parsed[in + 4];
238
+			in += 5;
239
+			out += 3;
240
+		}
241
+		else if (in + 2 < parsed_len
242
+		 && parsed[in    ].cmd == BEGIN
243
+		 && parsed[in + 1].cmd == DEC && parsed[in + 1].arg == 1
244
+		 && parsed[in + 2].cmd == END)
245
+		{
246
+			parsed[out] = INSTRUCTION (SET, 0);
247
+			in += 2;
248
+		}
249
+		else if (out && parsed[out - 1].cmd == SET && parsed[in].cmd == INC)
250
+			parsed[--out].arg += parsed[in].arg;
251
+		else if (out != in)
252
+			parsed[out] = parsed[in];
253
+	}
254
+	parsed_len = out;
255
+
256
+	for (in = 0, out = 0; in < parsed_len; in++, out++)
257
+	{
258
+		ssize_t dir = 0;
259
+		if (parsed[in].cmd == RIGHT)
260
+			dir = parsed[in].arg;
261
+		else if (parsed[in].cmd == LEFT)
262
+			dir = -(ssize_t) parsed[in].arg;
263
+		else
264
+		{
265
+			parsed[out] = parsed[in];
266
+			continue;
267
+		}
268
+
269
+		for (; in + 1 < parsed_len; in++)
270
+		{
271
+			if (parsed[in + 1].cmd == RIGHT)
272
+				dir += parsed[in + 1].arg;
273
+			else if (parsed[in + 1].cmd == LEFT)
274
+				dir -= (ssize_t) parsed[in + 1].arg;
275
+			else
276
+				break;
277
+		}
278
+
279
+		if (!dir)
280
+			out--;
281
+		else if (dir > 0)
282
+			parsed[out] = INSTRUCTION (RIGHT, dir);
283
+		else
284
+			parsed[out] = INSTRUCTION (LEFT, -dir);
285
+	}
286
+	parsed_len = out;
287
+
288
+	debug_dump ("bf-optimized.txt", parsed, parsed_len);
289
+
290
+// - - Loop pairing  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
291
+
292
+	size_t nesting = 0;
293
+	size_t *stack = xcalloc (sizeof *stack, parsed_len);
294
+	for (size_t i = 0; i < parsed_len; i++)
295
+	{
296
+		switch (parsed[i].cmd)
297
+		{
298
+		case BEGIN:
299
+			stack[nesting++] = i;
300
+			break;
301
+		case END:
302
+			assert (nesting > 0);
303
+
304
+			--nesting;
305
+			parsed[stack[nesting]].arg = i + 1;
306
+
307
+			// Looping can be disabled by optimizations
308
+			if (parsed[i].arg)
309
+				parsed[i].arg = stack[nesting] + 1;
310
+		default:
311
+			break;
312
+		}
313
+	}
314
+	free (stack);
315
+	assert (nesting == 0);
316
+
317
+	debug_dump ("bf-final.txt", parsed, parsed_len);
318
+
319
+// - - JIT - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
320
+
321
+	// Functions preserve the registers rbx, rsp, rbp, r12, r13, r14, and r15;
322
+	// while rax, rdi, rsi, rdx, rcx, r8, r9, r10, r11 are scratch registers.
323
+
324
+	str_init (&program);
325
+	size_t *offsets = xcalloc (sizeof *offsets, parsed_len + 1);
326
+	uint8_t *arith  = xcalloc (sizeof *arith,   parsed_len);
327
+
328
+#define CODE(x) { char t[] = x; str_append_data (&program, t, sizeof t - 1); }
329
+#define WORD(x) { size_t t = (size_t)(x); str_append_data (&program, &t, 8); }
330
+#define DWRD(x) { size_t t = (size_t)(x); str_append_data (&program, &t, 4); }
331
+
332
+	CODE ("\x48\x89\xF8")                     // mov rax, rdi
333
+	CODE ("\x30\xDB")                         // xor bl, bl
334
+
335
+	for (size_t i = 0; i < parsed_len; i++)
336
+	{
337
+		offsets[i] = program.len;
338
+
339
+		size_t arg = parsed[i].arg;
340
+		assert (arg <= UINT32_MAX);
341
+
342
+		// Don't save what we've just loaded
343
+		if (parsed[i].cmd == LEFT || parsed[i].cmd == RIGHT)
344
+			if (i < 2 || i + 1 >= parsed_len
345
+			 || (parsed[i - 2].cmd != LEFT && parsed[i - 2].cmd != RIGHT)
346
+			 || parsed[i - 1].cmd != BEGIN
347
+			 || parsed[i + 1].cmd != END)
348
+				CODE ("\x88\x18")             // mov [rax], bl
349
+
350
+		switch (parsed[i].cmd)
351
+		{
352
+		case RIGHT:
353
+			// add rax, "arg" -- optimistic, no boundary checking
354
+			if (arg > INT8_MAX)
355
+				{ CODE ("\x48\x05") DWRD (arg) }
356
+			else
357
+				{ CODE ("\x48\x83\xC0") str_append_c (&program, arg); }
358
+			break;
359
+		case LEFT:
360
+			// sub rax, "arg" -- optimistic, no boundary checking
361
+			if (arg > INT8_MAX)
362
+				{ CODE ("\x48\x2D") DWRD (arg) }
363
+			else
364
+				{ CODE ("\x48\x83\xE8") str_append_c (&program, arg); }
365
+			break;
366
+
367
+		case EAT:
368
+			CODE ("\x41\x88\xDC")             // mov r12b, bl
369
+			CODE ("\x30\xDB")                 // xor bl, bl
370
+			arith[i] = 1;
371
+			break;
372
+		case INCACC:
373
+			CODE ("\x44\x00\xE3")             // add bl, r12b
374
+			arith[i] = 1;
375
+			break;
376
+		case DECACC:
377
+			CODE ("\x44\x28\xE3")             // sub bl, r12b
378
+			arith[i] = 1;
379
+			break;
380
+
381
+		case INC:
382
+			CODE ("\x80\xC3")                 // add bl, "arg"
383
+			str_append_c (&program, arg);
384
+			arith[i] = 1;
385
+			break;
386
+		case DEC:
387
+			CODE ("\x80\xEB")                 // sub bl, "arg"
388
+			str_append_c (&program, arg);
389
+			arith[i] = 1;
390
+			break;
391
+		case SET:
392
+			CODE ("\xB3")                     // mov bl, "arg"
393
+			str_append_c (&program, arg);
394
+			break;
395
+
396
+		case OUT:
397
+			CODE ("\x50\x53")                 // push rax, push rbx
398
+			CODE ("\x48\x0F\xB6\xFB")         // movzx rdi, bl
399
+			CODE ("\x48\xBE") WORD (stdout)   // mov rsi, "stdout"
400
+			CODE ("\x48\xB8") WORD (fputc)    // mov rax, "fputc"
401
+			CODE ("\xFF\xD0")                 // call rax
402
+			CODE ("\x5B\x58")                 // pop rbx, pop rax
403
+			break;
404
+		case IN:
405
+			CODE ("\x50")                     // push rax
406
+			CODE ("\x48\xB8") WORD (cin)      // mov rax, "cin"
407
+			CODE ("\xFF\xD0")                 // call rax
408
+			CODE ("\x88\xC3")                 // mov bl, al
409
+			CODE ("\x58")                     // pop rax
410
+			break;
411
+
412
+		case BEGIN:
413
+			// Don't test the register when the flag has been set already;
414
+			// this doesn't have much of an effect in practice
415
+			if (!i || !arith[i - 1])
416
+				CODE ("\x84\xDB")             // test bl, bl
417
+			CODE ("\x0F\x84\x00\x00\x00\x00") // jz "offsets[i]"
418
+			break;
419
+		case END:
420
+			// We know that the cell is zero, make this an "if", not a "loop";
421
+			// this doesn't have much of an effect in practice
422
+			if (!arg)
423
+				break;
424
+
425
+			if (!i || !arith[i - 1])
426
+				CODE ("\x84\xDB")             // test bl, bl
427
+			CODE ("\x0F\x85\x00\x00\x00\x00") // jnz "offsets[i]"
428
+			break;
429
+		}
430
+
431
+		// No sense in reading it out when we overwrite it immediately;
432
+		// this doesn't have much of an effect in practice
433
+		if (parsed[i].cmd == LEFT || parsed[i].cmd == RIGHT)
434
+			if (i + 1 >= parsed_len
435
+			 || parsed[i + 1].cmd != SET)
436
+				CODE ("\x8A\x18")             // mov bl, [rax]
437
+	}
438
+	// When there is a loop at the end we need to be able to jump past it
439
+	offsets[parsed_len] = program.len;
440
+	str_append_c (&program, '\xC3');          // ret
441
+
442
+	// Now that we know where each instruction is, fill in relative jumps;
443
+	// this must accurately reflect code generators for BEGIN and END
444
+	for (size_t i = 0; i < parsed_len; i++)
445
+	{
446
+		if ((parsed[i].cmd != BEGIN && parsed[i].cmd != END)
447
+		 || !parsed[i].arg)
448
+			continue;
449
+
450
+		size_t fixup = offsets[i] + 2;
451
+		if (!i || !arith[i - 1])
452
+			fixup += 2;
453
+
454
+		*(int32_t *)(program.str + fixup) =
455
+			((intptr_t)(offsets[parsed[i].arg]) - (intptr_t)(fixup + 4));
456
+	}
457
+	free (offsets);
458
+	free (arith);
459
+
460
+#ifdef DEBUG
461
+	FILE *bin = fopen ("bf-jit.bin", "w");
462
+	fwrite (program.str, program.len, 1, bin);
463
+	fclose (bin);
464
+#endif
465
+
466
+// - - Runtime - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
467
+
468
+	// Some systems may have W^X
469
+	void *executable = mmap (NULL, program.len, PROT_READ | PROT_WRITE,
470
+		MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
471
+	if (!executable)
472
+		exit_fatal ("mmap: %s\n", strerror (errno));
473
+
474
+	memcpy (executable, program.str, program.len);
475
+	if (mprotect (executable, program.len, PROT_READ | PROT_EXEC))
476
+		exit_fatal ("mprotect: %s\n", strerror (errno));
477
+
478
+	// We create crash zones on both ends of the tape for some minimum safety
479
+	long pagesz = sysconf (_SC_PAGESIZE);
480
+	assert (pagesz > 0);
481
+
482
+	const size_t tape_len = (1 << 20) + 2 * pagesz;
483
+	char *tape = mmap (NULL, tape_len, PROT_READ | PROT_WRITE,
484
+		MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
485
+	if (!tape)
486
+		exit_fatal ("mmap: %s\n", strerror (errno));
487
+
488
+	memset (tape, 0, tape_len);
489
+	if (mprotect (tape,                     pagesz, PROT_NONE)
490
+	 || mprotect (tape + tape_len - pagesz, pagesz, PROT_NONE))
491
+		exit_fatal ("mprotect: %s\n", strerror (errno));
492
+
493
+	((void (*) (char *)) executable)(tape + pagesz);
494
+	return 0;
495
+}

+ 327
- 0
interpreters/bf-jit.c View File

@@ -0,0 +1,327 @@
1
+// This is an exercise in futility more than anything else
2
+#define _GNU_SOURCE
3
+
4
+#include <stdio.h>
5
+#include <stdlib.h>
6
+#include <string.h>
7
+#include <stdint.h>
8
+#include <stdbool.h>
9
+#include <assert.h>
10
+#include <errno.h>
11
+
12
+#if (defined __x86_64__ || defined __amd64__) && defined __unix__
13
+	#include <sys/mman.h>
14
+#else
15
+	#error Platform not supported
16
+#endif
17
+
18
+#define exit_fatal(...)                                                        \
19
+	do {                                                                       \
20
+		fprintf (stderr, "fatal: " __VA_ARGS__);                               \
21
+		exit (EXIT_FAILURE);                                                   \
22
+	} while (0)
23
+
24
+// --- Safe memory management --------------------------------------------------
25
+
26
+static void *
27
+xcalloc (size_t m, size_t n)
28
+{
29
+	void *p = calloc (m, n);
30
+	if (!p)
31
+		exit_fatal ("calloc: %s\n", strerror (errno));
32
+	return p;
33
+}
34
+
35
+static void *
36
+xrealloc (void *o, size_t n)
37
+{
38
+	void *p = realloc (o, n);
39
+	if (!p && n)
40
+		exit_fatal ("realloc: %s\n", strerror (errno));
41
+	return p;
42
+}
43
+
44
+// --- Dynamically allocated strings -------------------------------------------
45
+
46
+struct str
47
+{
48
+	char *str;                          ///< String data, null terminated
49
+	size_t alloc;                       ///< How many bytes are allocated
50
+	size_t len;                         ///< How long the string actually is
51
+};
52
+
53
+static void
54
+str_init (struct str *self)
55
+{
56
+	self->len = 0;
57
+	self->str = xcalloc (1, (self->alloc = 16));
58
+}
59
+
60
+static void
61
+str_ensure_space (struct str *self, size_t n)
62
+{
63
+	// We allocate at least one more byte for the terminating null character
64
+	size_t new_alloc = self->alloc;
65
+	while (new_alloc <= self->len + n)
66
+		new_alloc <<= 1;
67
+	if (new_alloc != self->alloc)
68
+		self->str = xrealloc (self->str, (self->alloc = new_alloc));
69
+}
70
+
71
+static void
72
+str_append_data (struct str *self, const void *data, size_t n)
73
+{
74
+	str_ensure_space (self, n);
75
+	memcpy (self->str + self->len, data, n);
76
+	self->str[self->len +=