Brainfuck compiler
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

bf-jit-unsafe.c 14KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495
  1. // This is an exercise in futility more than anything else
  2. #define _GNU_SOURCE
  3. #include <stdio.h>
  4. #include <stdlib.h>
  5. #include <string.h>
  6. #include <stdint.h>
  7. #include <stdbool.h>
  8. #include <assert.h>
  9. #include <errno.h>
  10. #if (defined __x86_64__ || defined __amd64__) && defined __unix__
  11. #include <unistd.h>
  12. #include <sys/mman.h>
  13. #else
  14. #error Platform not supported
  15. #endif
  16. #define exit_fatal(...) \
  17. do { \
  18. fprintf (stderr, "fatal: " __VA_ARGS__); \
  19. exit (EXIT_FAILURE); \
  20. } while (0)
  21. // --- Safe memory management --------------------------------------------------
  22. static void *
  23. xcalloc (size_t m, size_t n)
  24. {
  25. void *p = calloc (m, n);
  26. if (!p)
  27. exit_fatal ("calloc: %s\n", strerror (errno));
  28. return p;
  29. }
  30. static void *
  31. xrealloc (void *o, size_t n)
  32. {
  33. void *p = realloc (o, n);
  34. if (!p && n)
  35. exit_fatal ("realloc: %s\n", strerror (errno));
  36. return p;
  37. }
  38. // --- Dynamically allocated strings -------------------------------------------
  39. struct str
  40. {
  41. char *str; ///< String data, null terminated
  42. size_t alloc; ///< How many bytes are allocated
  43. size_t len; ///< How long the string actually is
  44. };
  45. static void
  46. str_init (struct str *self)
  47. {
  48. self->len = 0;
  49. self->str = xcalloc (1, (self->alloc = 16));
  50. }
  51. static void
  52. str_ensure_space (struct str *self, size_t n)
  53. {
  54. // We allocate at least one more byte for the terminating null character
  55. size_t new_alloc = self->alloc;
  56. while (new_alloc <= self->len + n)
  57. new_alloc <<= 1;
  58. if (new_alloc != self->alloc)
  59. self->str = xrealloc (self->str, (self->alloc = new_alloc));
  60. }
  61. static void
  62. str_append_data (struct str *self, const void *data, size_t n)
  63. {
  64. str_ensure_space (self, n);
  65. memcpy (self->str + self->len, data, n);
  66. self->str[self->len += n] = '\0';
  67. }
  68. static void
  69. str_append_c (struct str *self, char c)
  70. {
  71. str_append_data (self, &c, 1);
  72. }
  73. // --- Application -------------------------------------------------------------
  74. enum command { RIGHT, LEFT, INC, DEC, SET, IN, OUT, BEGIN, END,
  75. EAT, INCACC, DECACC };
  76. bool grouped[] = { 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 };
  77. struct instruction { enum command cmd; size_t arg; };
  78. #define INSTRUCTION(c, a) (struct instruction) { (c), (a) }
  79. // - - Callbacks - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  80. FILE *input; ///< User input
  81. static int
  82. cin (void)
  83. {
  84. int c = fgetc (input);
  85. assert (c != EOF);
  86. return c;
  87. }
  88. // - - Main - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  89. #ifdef DEBUG
  90. static void
  91. debug_dump (const char *filename, struct instruction *in, size_t len)
  92. {
  93. FILE *fp = fopen (filename, "w");
  94. long indent = 0;
  95. for (size_t i = 0; i < len; i++)
  96. {
  97. if (in[i].cmd == END)
  98. indent--;
  99. for (long k = 0; k < indent; k++)
  100. fprintf (fp, " ");
  101. switch (in[i].cmd)
  102. {
  103. case RIGHT: fprintf (fp, "RIGHT %zu\n", in[i].arg); break;
  104. case LEFT: fprintf (fp, "LEFT %zu\n", in[i].arg); break;
  105. case INC: fprintf (fp, "INC %zu\n", in[i].arg); break;
  106. case DEC: fprintf (fp, "DEC %zu\n", in[i].arg); break;
  107. case OUT: fprintf (fp, "OUT %zu\n", in[i].arg); break;
  108. case IN: fprintf (fp, "IN %zu\n", in[i].arg); break;
  109. case BEGIN: fprintf (fp, "BEGIN %zu\n", in[i].arg); break;
  110. case END: fprintf (fp, "END %zu\n", in[i].arg); break;
  111. case SET: fprintf (fp, "SET %zu\n", in[i].arg); break;
  112. case EAT: fprintf (fp, "EAT %zu\n", in[i].arg); break;
  113. case INCACC: fprintf (fp, "INCACC %zu\n", in[i].arg); break;
  114. case DECACC: fprintf (fp, "DECACC %zu\n", in[i].arg); break;
  115. }
  116. if (in[i].cmd == BEGIN)
  117. indent++;
  118. }
  119. fclose (fp);
  120. }
  121. #else
  122. #define debug_dump(...)
  123. #endif
  124. int
  125. main (int argc, char *argv[])
  126. {
  127. (void) argc;
  128. (void) argv;
  129. struct str program;
  130. str_init (&program);
  131. int c;
  132. while ((c = fgetc (stdin)) != EOF)
  133. str_append_c (&program, c);
  134. if (ferror (stdin))
  135. exit_fatal ("can't read program\n");
  136. if (!(input = fopen ("/dev/tty", "rb")))
  137. exit_fatal ("can't open terminal for reading\n");
  138. // - - Decode and group - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  139. struct instruction *parsed = xcalloc (sizeof *parsed, program.len);
  140. size_t parsed_len = 0;
  141. for (size_t i = 0; i < program.len; i++)
  142. {
  143. enum command cmd;
  144. switch (program.str[i])
  145. {
  146. case '>': cmd = RIGHT; break;
  147. case '<': cmd = LEFT; break;
  148. case '+': cmd = INC; break;
  149. case '-': cmd = DEC; break;
  150. case '.': cmd = OUT; break;
  151. case ',': cmd = IN; break;
  152. case '[': cmd = BEGIN; break;
  153. case ']': cmd = END; break;
  154. default: continue;
  155. }
  156. // The most basic optimization is to group identical commands together
  157. if (!parsed_len || !grouped[cmd] || parsed[parsed_len - 1].cmd != cmd)
  158. parsed_len++;
  159. parsed[parsed_len - 1].cmd = cmd;
  160. parsed[parsed_len - 1].arg++;
  161. }
  162. // - - Optimization passes - - - - - - - - - - - - - - - - - - - - - - - - - - -
  163. debug_dump ("bf-no-opt.txt", parsed, parsed_len);
  164. size_t in = 0, out = 0;
  165. for (; in < parsed_len; in++, out++)
  166. {
  167. // This shows up in mandelbrot.bf a lot but actually helps hanoi.bf
  168. if (in + 5 < parsed_len
  169. && parsed[in].cmd == BEGIN && parsed[in + 5].cmd == END
  170. && parsed[in + 1].cmd == DEC && parsed[in + 1].arg == 1
  171. && parsed[in + 2].cmd == LEFT && parsed[in + 4].cmd == RIGHT
  172. && parsed[in + 2].arg == parsed[in + 4].arg
  173. && (parsed[in + 3].cmd == INC || parsed[in + 3].cmd == DEC)
  174. && parsed[in + 3].arg == 1)
  175. {
  176. // This mustn't make the move when the cell is zero already
  177. parsed[out] = parsed[in];
  178. parsed[out + 1] = INSTRUCTION (EAT, 0);
  179. parsed[out + 2] = parsed[in + 2];
  180. parsed[out + 3] = INSTRUCTION
  181. (parsed[in + 3].cmd == INC ? INCACC : DECACC, 0);
  182. parsed[out + 4] = parsed[in + 4];
  183. // This disables the looping further in the code;
  184. // this doesn't have much of an effect in practice
  185. parsed[out + 5] = INSTRUCTION (END, 0);
  186. in += 5;
  187. out += 5;
  188. }
  189. // The simpler case that cannot crash and thus can avoid the loop
  190. else if (in + 5 < parsed_len
  191. && parsed[in].cmd == BEGIN && parsed[in + 5].cmd == END
  192. && parsed[in + 1].cmd == DEC && parsed[in + 1].arg == 1
  193. && parsed[in + 2].cmd == RIGHT && parsed[in + 4].cmd == LEFT
  194. && parsed[in + 2].arg == parsed[in + 4].arg
  195. && (parsed[in + 3].cmd == INC || parsed[in + 3].cmd == DEC)
  196. && parsed[in + 3].arg == 1)
  197. {
  198. parsed[out] = INSTRUCTION (EAT, 0);
  199. parsed[out + 1] = parsed[in + 2];
  200. parsed[out + 2] = INSTRUCTION
  201. (parsed[in + 3].cmd == INC ? INCACC : DECACC, 0);
  202. parsed[out + 3] = parsed[in + 4];
  203. in += 5;
  204. out += 3;
  205. }
  206. else if (in + 2 < parsed_len
  207. && parsed[in ].cmd == BEGIN
  208. && parsed[in + 1].cmd == DEC && parsed[in + 1].arg == 1
  209. && parsed[in + 2].cmd == END)
  210. {
  211. parsed[out] = INSTRUCTION (SET, 0);
  212. in += 2;
  213. }
  214. else if (out && parsed[out - 1].cmd == SET && parsed[in].cmd == INC)
  215. parsed[--out].arg += parsed[in].arg;
  216. else if (out != in)
  217. parsed[out] = parsed[in];
  218. }
  219. parsed_len = out;
  220. for (in = 0, out = 0; in < parsed_len; in++, out++)
  221. {
  222. ssize_t dir = 0;
  223. if (parsed[in].cmd == RIGHT)
  224. dir = parsed[in].arg;
  225. else if (parsed[in].cmd == LEFT)
  226. dir = -(ssize_t) parsed[in].arg;
  227. else
  228. {
  229. parsed[out] = parsed[in];
  230. continue;
  231. }
  232. for (; in + 1 < parsed_len; in++)
  233. {
  234. if (parsed[in + 1].cmd == RIGHT)
  235. dir += parsed[in + 1].arg;
  236. else if (parsed[in + 1].cmd == LEFT)
  237. dir -= (ssize_t) parsed[in + 1].arg;
  238. else
  239. break;
  240. }
  241. if (!dir)
  242. out--;
  243. else if (dir > 0)
  244. parsed[out] = INSTRUCTION (RIGHT, dir);
  245. else
  246. parsed[out] = INSTRUCTION (LEFT, -dir);
  247. }
  248. parsed_len = out;
  249. debug_dump ("bf-optimized.txt", parsed, parsed_len);
  250. // - - Loop pairing - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  251. size_t nesting = 0;
  252. size_t *stack = xcalloc (sizeof *stack, parsed_len);
  253. for (size_t i = 0; i < parsed_len; i++)
  254. {
  255. switch (parsed[i].cmd)
  256. {
  257. case BEGIN:
  258. stack[nesting++] = i;
  259. break;
  260. case END:
  261. assert (nesting > 0);
  262. --nesting;
  263. parsed[stack[nesting]].arg = i + 1;
  264. // Looping can be disabled by optimizations
  265. if (parsed[i].arg)
  266. parsed[i].arg = stack[nesting] + 1;
  267. default:
  268. break;
  269. }
  270. }
  271. free (stack);
  272. assert (nesting == 0);
  273. debug_dump ("bf-final.txt", parsed, parsed_len);
  274. // - - JIT - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  275. // Functions preserve the registers rbx, rsp, rbp, r12, r13, r14, and r15;
  276. // while rax, rdi, rsi, rdx, rcx, r8, r9, r10, r11 are scratch registers.
  277. str_init (&program);
  278. size_t *offsets = xcalloc (sizeof *offsets, parsed_len + 1);
  279. uint8_t *arith = xcalloc (sizeof *arith, parsed_len);
  280. #define CODE(x) { char t[] = x; str_append_data (&program, t, sizeof t - 1); }
  281. #define WORD(x) { size_t t = (size_t)(x); str_append_data (&program, &t, 8); }
  282. #define DWRD(x) { size_t t = (size_t)(x); str_append_data (&program, &t, 4); }
  283. CODE ("\x48\x89\xF8") // mov rax, rdi
  284. CODE ("\x30\xDB") // xor bl, bl
  285. for (size_t i = 0; i < parsed_len; i++)
  286. {
  287. offsets[i] = program.len;
  288. size_t arg = parsed[i].arg;
  289. assert (arg <= UINT32_MAX);
  290. // Don't save what we've just loaded
  291. if (parsed[i].cmd == LEFT || parsed[i].cmd == RIGHT)
  292. if (i < 2 || i + 1 >= parsed_len
  293. || (parsed[i - 2].cmd != LEFT && parsed[i - 2].cmd != RIGHT)
  294. || parsed[i - 1].cmd != BEGIN
  295. || parsed[i + 1].cmd != END)
  296. CODE ("\x88\x18") // mov [rax], bl
  297. switch (parsed[i].cmd)
  298. {
  299. case RIGHT:
  300. // add rax, "arg" -- optimistic, no boundary checking
  301. if (arg > INT8_MAX)
  302. { CODE ("\x48\x05") DWRD (arg) }
  303. else
  304. { CODE ("\x48\x83\xC0") str_append_c (&program, arg); }
  305. break;
  306. case LEFT:
  307. // sub rax, "arg" -- optimistic, no boundary checking
  308. if (arg > INT8_MAX)
  309. { CODE ("\x48\x2D") DWRD (arg) }
  310. else
  311. { CODE ("\x48\x83\xE8") str_append_c (&program, arg); }
  312. break;
  313. case EAT:
  314. CODE ("\x41\x88\xDC") // mov r12b, bl
  315. CODE ("\x30\xDB") // xor bl, bl
  316. arith[i] = 1;
  317. break;
  318. case INCACC:
  319. CODE ("\x44\x00\xE3") // add bl, r12b
  320. arith[i] = 1;
  321. break;
  322. case DECACC:
  323. CODE ("\x44\x28\xE3") // sub bl, r12b
  324. arith[i] = 1;
  325. break;
  326. case INC:
  327. CODE ("\x80\xC3") // add bl, "arg"
  328. str_append_c (&program, arg);
  329. arith[i] = 1;
  330. break;
  331. case DEC:
  332. CODE ("\x80\xEB") // sub bl, "arg"
  333. str_append_c (&program, arg);
  334. arith[i] = 1;
  335. break;
  336. case SET:
  337. CODE ("\xB3") // mov bl, "arg"
  338. str_append_c (&program, arg);
  339. break;
  340. case OUT:
  341. CODE ("\x50\x53") // push rax, push rbx
  342. CODE ("\x48\x0F\xB6\xFB") // movzx rdi, bl
  343. CODE ("\x48\xBE") WORD (stdout) // mov rsi, "stdout"
  344. CODE ("\x48\xB8") WORD (fputc) // mov rax, "fputc"
  345. CODE ("\xFF\xD0") // call rax
  346. CODE ("\x5B\x58") // pop rbx, pop rax
  347. break;
  348. case IN:
  349. CODE ("\x50") // push rax
  350. CODE ("\x48\xB8") WORD (cin) // mov rax, "cin"
  351. CODE ("\xFF\xD0") // call rax
  352. CODE ("\x88\xC3") // mov bl, al
  353. CODE ("\x58") // pop rax
  354. break;
  355. case BEGIN:
  356. // Don't test the register when the flag has been set already;
  357. // this doesn't have much of an effect in practice
  358. if (!i || !arith[i - 1])
  359. CODE ("\x84\xDB") // test bl, bl
  360. CODE ("\x0F\x84\x00\x00\x00\x00") // jz "offsets[i]"
  361. break;
  362. case END:
  363. // We know that the cell is zero, make this an "if", not a "loop";
  364. // this doesn't have much of an effect in practice
  365. if (!arg)
  366. break;
  367. if (!i || !arith[i - 1])
  368. CODE ("\x84\xDB") // test bl, bl
  369. CODE ("\x0F\x85\x00\x00\x00\x00") // jnz "offsets[i]"
  370. break;
  371. }
  372. // No sense in reading it out when we overwrite it immediately;
  373. // this doesn't have much of an effect in practice
  374. if (parsed[i].cmd == LEFT || parsed[i].cmd == RIGHT)
  375. if (i + 1 >= parsed_len
  376. || parsed[i + 1].cmd != SET)
  377. CODE ("\x8A\x18") // mov bl, [rax]
  378. }
  379. // When there is a loop at the end we need to be able to jump past it
  380. offsets[parsed_len] = program.len;
  381. str_append_c (&program, '\xC3'); // ret
  382. // Now that we know where each instruction is, fill in relative jumps;
  383. // this must accurately reflect code generators for BEGIN and END
  384. for (size_t i = 0; i < parsed_len; i++)
  385. {
  386. if ((parsed[i].cmd != BEGIN && parsed[i].cmd != END)
  387. || !parsed[i].arg)
  388. continue;
  389. size_t fixup = offsets[i] + 2;
  390. if (!i || !arith[i - 1])
  391. fixup += 2;
  392. *(int32_t *)(program.str + fixup) =
  393. ((intptr_t)(offsets[parsed[i].arg]) - (intptr_t)(fixup + 4));
  394. }
  395. free (offsets);
  396. free (arith);
  397. #ifdef DEBUG
  398. FILE *bin = fopen ("bf-jit.bin", "w");
  399. fwrite (program.str, program.len, 1, bin);
  400. fclose (bin);
  401. #endif
  402. // - - Runtime - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  403. // Some systems may have W^X
  404. void *executable = mmap (NULL, program.len, PROT_READ | PROT_WRITE,
  405. MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
  406. if (!executable)
  407. exit_fatal ("mmap: %s\n", strerror (errno));
  408. memcpy (executable, program.str, program.len);
  409. if (mprotect (executable, program.len, PROT_READ | PROT_EXEC))
  410. exit_fatal ("mprotect: %s\n", strerror (errno));
  411. // We create crash zones on both ends of the tape for some minimum safety
  412. long pagesz = sysconf (_SC_PAGESIZE);
  413. assert (pagesz > 0);
  414. const size_t tape_len = (1 << 20) + 2 * pagesz;
  415. char *tape = mmap (NULL, tape_len, PROT_READ | PROT_WRITE,
  416. MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
  417. if (!tape)
  418. exit_fatal ("mmap: %s\n", strerror (errno));
  419. memset (tape, 0, tape_len);
  420. if (mprotect (tape, pagesz, PROT_NONE)
  421. || mprotect (tape + tape_len - pagesz, pagesz, PROT_NONE))
  422. exit_fatal ("mprotect: %s\n", strerror (errno));
  423. ((void (*) (char *)) executable)(tape + pagesz);
  424. return 0;
  425. }