Brainfuck compiler
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

bf-jit-opt.c 14KB

3 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495
  1. // This is an exercise in futility more than anything else
  2. #define _GNU_SOURCE
  3. #include <stdio.h>
  4. #include <stdlib.h>
  5. #include <string.h>
  6. #include <stdint.h>
  7. #include <stdbool.h>
  8. #include <assert.h>
  9. #include <errno.h>
  10. #if (defined __x86_64__ || defined __amd64__) && defined __unix__
  11. #include <sys/mman.h>
  12. #else
  13. #error Platform not supported
  14. #endif
  15. #define exit_fatal(...) \
  16. do { \
  17. fprintf (stderr, "fatal: " __VA_ARGS__); \
  18. exit (EXIT_FAILURE); \
  19. } while (0)
  20. // --- Safe memory management --------------------------------------------------
  21. static void *
  22. xcalloc (size_t m, size_t n)
  23. {
  24. void *p = calloc (m, n);
  25. if (!p)
  26. exit_fatal ("calloc: %s\n", strerror (errno));
  27. return p;
  28. }
  29. static void *
  30. xrealloc (void *o, size_t n)
  31. {
  32. void *p = realloc (o, n);
  33. if (!p && n)
  34. exit_fatal ("realloc: %s\n", strerror (errno));
  35. return p;
  36. }
  37. // --- Dynamically allocated strings -------------------------------------------
  38. struct str
  39. {
  40. char *str; ///< String data, null terminated
  41. size_t alloc; ///< How many bytes are allocated
  42. size_t len; ///< How long the string actually is
  43. };
  44. static void
  45. str_init (struct str *self)
  46. {
  47. self->len = 0;
  48. self->str = xcalloc (1, (self->alloc = 16));
  49. }
  50. static void
  51. str_ensure_space (struct str *self, size_t n)
  52. {
  53. // We allocate at least one more byte for the terminating null character
  54. size_t new_alloc = self->alloc;
  55. while (new_alloc <= self->len + n)
  56. new_alloc <<= 1;
  57. if (new_alloc != self->alloc)
  58. self->str = xrealloc (self->str, (self->alloc = new_alloc));
  59. }
  60. static void
  61. str_append_data (struct str *self, const void *data, size_t n)
  62. {
  63. str_ensure_space (self, n);
  64. memcpy (self->str + self->len, data, n);
  65. self->str[self->len += n] = '\0';
  66. }
  67. static void
  68. str_append_c (struct str *self, char c)
  69. {
  70. str_append_data (self, &c, 1);
  71. }
  72. // --- Application -------------------------------------------------------------
  73. struct str data; ///< Data tape
  74. volatile size_t dataptr; ///< Current location on the tape
  75. FILE *input; ///< User input
  76. enum command { RIGHT, LEFT, INC, DEC, SET, IN, OUT, BEGIN, END,
  77. EAT, INCACC, DECACC };
  78. bool grouped[] = { 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 };
  79. struct instruction { enum command cmd; size_t arg; };
  80. #define INSTRUCTION(c, a) (struct instruction) { (c), (a) }
  81. // - - Callbacks - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  82. // Some things I just really don't want to write in assembly even though it
  83. // is effectively a big performance hit, eliminating the advantage of JIT
  84. static void
  85. right (size_t arg)
  86. {
  87. assert (SIZE_MAX - dataptr > arg);
  88. dataptr += arg;
  89. while (dataptr >= data.len)
  90. str_append_c (&data, 0);
  91. }
  92. static void
  93. left (size_t arg)
  94. {
  95. assert (dataptr >= arg);
  96. dataptr -= arg;
  97. }
  98. static void
  99. cin (void)
  100. {
  101. int c;
  102. data.str[dataptr] = c = fgetc (input);
  103. assert (c != EOF);
  104. }
  105. // - - Main - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  106. #ifdef DEBUG
  107. static void
  108. debug_dump (const char *filename, struct instruction *in, size_t len)
  109. {
  110. FILE *fp = fopen (filename, "w");
  111. long indent = 0;
  112. for (size_t i = 0; i < len; i++)
  113. {
  114. if (in[i].cmd == END)
  115. indent--;
  116. for (long k = 0; k < indent; k++)
  117. fprintf (fp, " ");
  118. switch (in[i].cmd)
  119. {
  120. case RIGHT: fprintf (fp, "RIGHT %zu\n", in[i].arg); break;
  121. case LEFT: fprintf (fp, "LEFT %zu\n", in[i].arg); break;
  122. case INC: fprintf (fp, "INC %zu\n", in[i].arg); break;
  123. case DEC: fprintf (fp, "DEC %zu\n", in[i].arg); break;
  124. case OUT: fprintf (fp, "OUT %zu\n", in[i].arg); break;
  125. case IN: fprintf (fp, "IN %zu\n", in[i].arg); break;
  126. case BEGIN: fprintf (fp, "BEGIN %zu\n", in[i].arg); break;
  127. case END: fprintf (fp, "END %zu\n", in[i].arg); break;
  128. case SET: fprintf (fp, "SET %zu\n", in[i].arg); break;
  129. case EAT: fprintf (fp, "EAT %zu\n", in[i].arg); break;
  130. case INCACC: fprintf (fp, "INCACC %zu\n", in[i].arg); break;
  131. case DECACC: fprintf (fp, "DECACC %zu\n", in[i].arg); break;
  132. }
  133. if (in[i].cmd == BEGIN)
  134. indent++;
  135. }
  136. fclose (fp);
  137. }
  138. #else
  139. #define debug_dump(...)
  140. #endif
  141. int
  142. main (int argc, char *argv[])
  143. {
  144. (void) argc;
  145. (void) argv;
  146. struct str program;
  147. str_init (&program);
  148. int c;
  149. while ((c = fgetc (stdin)) != EOF)
  150. str_append_c (&program, c);
  151. if (ferror (stdin))
  152. exit_fatal ("can't read program\n");
  153. if (!(input = fopen ("/dev/tty", "rb")))
  154. exit_fatal ("can't open terminal for reading\n");
  155. // - - Decode and group - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  156. struct instruction *parsed = xcalloc (sizeof *parsed, program.len);
  157. size_t parsed_len = 0;
  158. for (size_t i = 0; i < program.len; i++)
  159. {
  160. enum command cmd;
  161. switch (program.str[i])
  162. {
  163. case '>': cmd = RIGHT; break;
  164. case '<': cmd = LEFT; break;
  165. case '+': cmd = INC; break;
  166. case '-': cmd = DEC; break;
  167. case '.': cmd = OUT; break;
  168. case ',': cmd = IN; break;
  169. case '[': cmd = BEGIN; break;
  170. case ']': cmd = END; break;
  171. default: continue;
  172. }
  173. // The most basic optimization is to group identical commands together
  174. if (!parsed_len || !grouped[cmd] || parsed[parsed_len - 1].cmd != cmd)
  175. parsed_len++;
  176. parsed[parsed_len - 1].cmd = cmd;
  177. parsed[parsed_len - 1].arg++;
  178. }
  179. // - - Optimization passes - - - - - - - - - - - - - - - - - - - - - - - - - - -
  180. debug_dump ("bf-no-opt.txt", parsed, parsed_len);
  181. size_t in = 0, out = 0;
  182. for (; in < parsed_len; in++, out++)
  183. {
  184. // This shows up in mandelbrot.bf a lot but actually helps hanoi.bf
  185. if (in + 5 < parsed_len
  186. && parsed[in].cmd == BEGIN && parsed[in + 5].cmd == END
  187. && parsed[in + 1].cmd == DEC && parsed[in + 1].arg == 1
  188. && parsed[in + 2].cmd == LEFT && parsed[in + 4].cmd == RIGHT
  189. && parsed[in + 2].arg == parsed[in + 4].arg
  190. && (parsed[in + 3].cmd == INC || parsed[in + 3].cmd == DEC)
  191. && parsed[in + 3].arg == 1)
  192. {
  193. // This mustn't make the move when the cell is zero already
  194. parsed[out] = parsed[in];
  195. parsed[out + 1] = INSTRUCTION (EAT, 0);
  196. parsed[out + 2] = parsed[in + 2];
  197. parsed[out + 3] = INSTRUCTION
  198. (parsed[in + 3].cmd == INC ? INCACC : DECACC, 0);
  199. parsed[out + 4] = parsed[in + 4];
  200. // This disables the looping further in the code;
  201. // this doesn't have much of an effect in practice
  202. parsed[out + 5] = INSTRUCTION (END, 0);
  203. in += 5;
  204. out += 5;
  205. }
  206. // The simpler case that cannot crash and thus can avoid the loop
  207. else if (in + 5 < parsed_len
  208. && parsed[in].cmd == BEGIN && parsed[in + 5].cmd == END
  209. && parsed[in + 1].cmd == DEC && parsed[in + 1].arg == 1
  210. && parsed[in + 2].cmd == RIGHT && parsed[in + 4].cmd == LEFT
  211. && parsed[in + 2].arg == parsed[in + 4].arg
  212. && (parsed[in + 3].cmd == INC || parsed[in + 3].cmd == DEC)
  213. && parsed[in + 3].arg == 1)
  214. {
  215. parsed[out] = INSTRUCTION (EAT, 0);
  216. parsed[out + 1] = parsed[in + 2];
  217. parsed[out + 2] = INSTRUCTION
  218. (parsed[in + 3].cmd == INC ? INCACC : DECACC, 0);
  219. parsed[out + 3] = parsed[in + 4];
  220. in += 5;
  221. out += 3;
  222. }
  223. else if (in + 2 < parsed_len
  224. && parsed[in ].cmd == BEGIN
  225. && parsed[in + 1].cmd == DEC && parsed[in + 1].arg == 1
  226. && parsed[in + 2].cmd == END)
  227. {
  228. parsed[out] = INSTRUCTION (SET, 0);
  229. in += 2;
  230. }
  231. else if (out && parsed[out - 1].cmd == SET && parsed[in].cmd == INC)
  232. parsed[--out].arg += parsed[in].arg;
  233. else if (out != in)
  234. parsed[out] = parsed[in];
  235. }
  236. parsed_len = out;
  237. for (in = 0, out = 0; in < parsed_len; in++, out++)
  238. {
  239. ssize_t dir = 0;
  240. if (parsed[in].cmd == RIGHT)
  241. dir = parsed[in].arg;
  242. else if (parsed[in].cmd == LEFT)
  243. dir = -(ssize_t) parsed[in].arg;
  244. else
  245. {
  246. parsed[out] = parsed[in];
  247. continue;
  248. }
  249. for (; in + 1 < parsed_len; in++)
  250. {
  251. if (parsed[in + 1].cmd == RIGHT)
  252. dir += parsed[in + 1].arg;
  253. else if (parsed[in + 1].cmd == LEFT)
  254. dir -= (ssize_t) parsed[in + 1].arg;
  255. else
  256. break;
  257. }
  258. if (!dir)
  259. out--;
  260. else if (dir > 0)
  261. parsed[out] = INSTRUCTION (RIGHT, dir);
  262. else
  263. parsed[out] = INSTRUCTION (LEFT, -dir);
  264. }
  265. parsed_len = out;
  266. debug_dump ("bf-optimized.txt", parsed, parsed_len);
  267. // - - Loop pairing - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  268. size_t nesting = 0;
  269. size_t *stack = xcalloc (sizeof *stack, parsed_len);
  270. for (size_t i = 0; i < parsed_len; i++)
  271. {
  272. switch (parsed[i].cmd)
  273. {
  274. case BEGIN:
  275. stack[nesting++] = i;
  276. break;
  277. case END:
  278. assert (nesting > 0);
  279. --nesting;
  280. parsed[stack[nesting]].arg = i + 1;
  281. // Looping can be disabled by optimizations
  282. if (parsed[i].arg)
  283. parsed[i].arg = stack[nesting] + 1;
  284. default:
  285. break;
  286. }
  287. }
  288. free (stack);
  289. assert (nesting == 0);
  290. debug_dump ("bf-final.txt", parsed, parsed_len);
  291. // - - JIT - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  292. // Functions preserve the registers rbx, rsp, rbp, r12, r13, r14, and r15;
  293. // while rax, rdi, rsi, rdx, rcx, r8, r9, r10, r11 are scratch registers.
  294. str_init (&program);
  295. size_t *offsets = xcalloc (sizeof *offsets, parsed_len + 1);
  296. uint8_t *arith = xcalloc (sizeof *arith, parsed_len);
  297. #define CODE(x) { char t[] = x; str_append_data (&program, t, sizeof t - 1); }
  298. #define WORD(x) { size_t t = (size_t)(x); str_append_data (&program, &t, 8); }
  299. #define DWRD(x) { size_t t = (size_t)(x); str_append_data (&program, &t, 4); }
  300. CODE ("\x49\xBD") WORD (&dataptr) // mov r13, qword "&dataptr"
  301. CODE ("\x49\xBF") WORD (&data.str) // mov r15, qword "&data.str"
  302. CODE ("\x4D\x8B\x37") // mov r14, qword [r15]
  303. CODE ("\x30\xDB") // xor bl, bl
  304. for (size_t i = 0; i < parsed_len; i++)
  305. {
  306. offsets[i] = program.len;
  307. size_t arg = parsed[i].arg;
  308. assert (arg <= UINT32_MAX);
  309. switch (parsed[i].cmd)
  310. {
  311. case RIGHT:
  312. CODE ("\x41\x88\x1E") // mov [r14], bl
  313. CODE ("\xBF") DWRD (arg) // mov edi, "arg"
  314. CODE ("\x48\xB8") WORD (right) // mov rax, "right"
  315. CODE ("\xFF\xD0") // call rax
  316. // The data could get reallocated, so reload the address
  317. CODE ("\x4D\x8B\x37") // mov r14, qword [r15]
  318. CODE ("\x4D\x03\x75\x00") // add r14, [r13]
  319. break;
  320. case LEFT:
  321. CODE ("\x41\x88\x1E") // mov [r14], bl
  322. CODE ("\xBF") DWRD (arg) // mov edi, "arg"
  323. CODE ("\x49\x29\xFE") // sub r14, rdi -- optimistic
  324. CODE ("\x48\xB8") WORD (left) // mov rax, "left"
  325. CODE ("\xFF\xD0") // call rax
  326. break;
  327. case EAT:
  328. CODE ("\x41\x88\xDC") // mov r12b, bl
  329. CODE ("\x30\xDB") // xor bl, bl
  330. arith[i] = 1;
  331. break;
  332. case INCACC:
  333. CODE ("\x44\x00\xE3") // add bl, r12b
  334. arith[i] = 1;
  335. break;
  336. case DECACC:
  337. CODE ("\x44\x28\xE3") // sub bl, r12b
  338. arith[i] = 1;
  339. break;
  340. case INC:
  341. CODE ("\x80\xC3") // add bl, "arg"
  342. str_append_c (&program, arg);
  343. arith[i] = 1;
  344. break;
  345. case DEC:
  346. CODE ("\x80\xEB") // sub bl, "arg"
  347. str_append_c (&program, arg);
  348. arith[i] = 1;
  349. break;
  350. case SET:
  351. CODE ("\xB3") // mov bl, "arg"
  352. str_append_c (&program, arg);
  353. break;
  354. case OUT:
  355. CODE ("\x48\x0F\xB6\xFB") // movzx rdi, bl
  356. CODE ("\x48\xBE") WORD (stdout) // mov rsi, "stdout"
  357. CODE ("\x48\xB8") WORD (fputc) // mov rax, "fputc"
  358. CODE ("\xFF\xD0") // call rax
  359. break;
  360. case IN:
  361. CODE ("\x48\xB8") WORD (cin) // mov rax, "cin"
  362. CODE ("\xFF\xD0") // call rax
  363. CODE ("\x41\x8A\x1E") // mov bl, [r14]
  364. break;
  365. case BEGIN:
  366. // Don't test the register when the flag has been set already;
  367. // this doesn't have much of an effect in practice
  368. if (!i || !arith[i - 1])
  369. CODE ("\x84\xDB") // test bl, bl
  370. CODE ("\x0F\x84\x00\x00\x00\x00") // jz "offsets[i]"
  371. break;
  372. case END:
  373. // We know that the cell is zero, make this an "if", not a "loop";
  374. // this doesn't have much of an effect in practice
  375. if (!arg)
  376. break;
  377. if (!i || !arith[i - 1])
  378. CODE ("\x84\xDB") // test bl, bl
  379. CODE ("\x0F\x85\x00\x00\x00\x00") // jnz "offsets[i]"
  380. break;
  381. }
  382. // No sense in reading it out when we overwrite it immediately;
  383. // this doesn't have much of an effect in practice
  384. if (parsed[i].cmd == LEFT || parsed[i].cmd == RIGHT)
  385. if (i + 1 >= parsed_len
  386. || parsed[i + 1].cmd != SET)
  387. CODE ("\x41\x8A\x1E") // mov bl, [r14]
  388. }
  389. // When there is a loop at the end we need to be able to jump past it
  390. offsets[parsed_len] = program.len;
  391. str_append_c (&program, '\xC3'); // ret
  392. // Now that we know where each instruction is, fill in relative jumps;
  393. // this must accurately reflect code generators for BEGIN and END
  394. for (size_t i = 0; i < parsed_len; i++)
  395. {
  396. if ((parsed[i].cmd != BEGIN && parsed[i].cmd != END)
  397. || !parsed[i].arg)
  398. continue;
  399. size_t fixup = offsets[i] + 2;
  400. if (!i || !arith[i - 1])
  401. fixup += 2;
  402. *(int32_t *)(program.str + fixup) =
  403. ((intptr_t)(offsets[parsed[i].arg]) - (intptr_t)(fixup + 4));
  404. }
  405. free (offsets);
  406. free (arith);
  407. #ifdef DEBUG
  408. FILE *bin = fopen ("bf-jit.bin", "w");
  409. fwrite (program.str, program.len, 1, bin);
  410. fclose (bin);
  411. #endif
  412. // - - Runtime - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  413. // Some systems may have W^X
  414. void *executable = mmap (NULL, program.len, PROT_READ | PROT_WRITE,
  415. MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
  416. if (!executable)
  417. exit_fatal ("mmap: %s\n", strerror (errno));
  418. memcpy (executable, program.str, program.len);
  419. if (mprotect (executable, program.len, PROT_READ | PROT_EXEC))
  420. exit_fatal ("mprotect: %s\n", strerror (errno));
  421. str_init (&data);
  422. str_append_c (&data, 0);
  423. ((void (*) (void)) executable)();
  424. return 0;
  425. }