Brainfuck compiler
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

bfc-amd64.c 21KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741
  1. // This is an exercise in futility more than anything else
  2. #include <stdio.h>
  3. #include <stdlib.h>
  4. #include <string.h>
  5. #include <stdint.h>
  6. #include <stdbool.h>
  7. #include <assert.h>
  8. #include <errno.h>
  9. #ifdef __unix__
  10. #include <fcntl.h>
  11. #endif
  12. #define exit_fatal(...) \
  13. do { \
  14. fprintf (stderr, "fatal: " __VA_ARGS__); \
  15. exit (EXIT_FAILURE); \
  16. } while (0)
  17. // --- Safe memory management --------------------------------------------------
  18. static void *
  19. xcalloc (size_t m, size_t n)
  20. {
  21. void *p = calloc (m, n);
  22. if (!p)
  23. exit_fatal ("calloc: %s\n", strerror (errno));
  24. return p;
  25. }
  26. static void *
  27. xrealloc (void *o, size_t n)
  28. {
  29. void *p = realloc (o, n);
  30. if (!p && n)
  31. exit_fatal ("realloc: %s\n", strerror (errno));
  32. return p;
  33. }
  34. // --- Dynamically allocated strings -------------------------------------------
  35. struct str
  36. {
  37. char *str; ///< String data, null terminated
  38. size_t alloc; ///< How many bytes are allocated
  39. size_t len; ///< How long the string actually is
  40. };
  41. static void
  42. str_init (struct str *self)
  43. {
  44. self->len = 0;
  45. self->str = xcalloc (1, (self->alloc = 16));
  46. }
  47. static void
  48. str_ensure_space (struct str *self, size_t n)
  49. {
  50. // We allocate at least one more byte for the terminating null character
  51. size_t new_alloc = self->alloc;
  52. while (new_alloc <= self->len + n)
  53. new_alloc <<= 1;
  54. if (new_alloc != self->alloc)
  55. self->str = xrealloc (self->str, (self->alloc = new_alloc));
  56. }
  57. static void
  58. str_append_data (struct str *self, const void *data, size_t n)
  59. {
  60. str_ensure_space (self, n);
  61. memcpy (self->str + self->len, data, n);
  62. self->str[self->len += n] = '\0';
  63. }
  64. static void
  65. str_append_c (struct str *self, char c)
  66. {
  67. str_append_data (self, &c, 1);
  68. }
  69. // --- Application -------------------------------------------------------------
  70. enum command
  71. {
  72. RIGHT, LEFT, INC, DEC, IN, OUT, BEGIN, END,
  73. SET, EAT, INCACC, DECACC
  74. };
  75. bool grouped[] = { 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
  76. struct instruction { enum command cmd; int offset; size_t arg; };
  77. #define INSTRUCTION(c, o, a) (struct instruction) { (c), (o), (a) }
  78. // - - Debugging - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  79. #ifdef DEBUG
  80. static void
  81. debug_dump_instruction (FILE *fp, const struct instruction *in)
  82. {
  83. const char *name;
  84. switch (in->cmd)
  85. {
  86. case RIGHT: name = "RIGHT "; break;
  87. case LEFT: name = "LEFT "; break;
  88. case INC: name = "INC "; break;
  89. case DEC: name = "DEC "; break;
  90. case OUT: name = "OUT "; break;
  91. case IN: name = "IN "; break;
  92. case BEGIN: name = "BEGIN "; break;
  93. case END: name = "END "; break;
  94. case SET: name = "SET "; break;
  95. case EAT: name = "EAT "; break;
  96. case INCACC: name = "INCACC"; break;
  97. case DECACC: name = "DECACC"; break;
  98. }
  99. fprintf (fp, "%s %zu", name, in->arg);
  100. if (in->offset != 0)
  101. fprintf (fp, " [%d]", in->offset);
  102. fprintf (fp, "\n");
  103. }
  104. static void
  105. debug_dump (const char *filename, struct instruction *in, size_t len)
  106. {
  107. FILE *fp = fopen (filename, "w");
  108. long indent = 0;
  109. for (size_t i = 0; i < len; i++)
  110. {
  111. if (in[i].cmd == END)
  112. indent--;
  113. for (long k = 0; k < indent; k++)
  114. fputs (" ", fp);
  115. debug_dump_instruction (fp, &in[i]);
  116. if (in[i].cmd == BEGIN)
  117. indent++;
  118. }
  119. fclose (fp);
  120. }
  121. #else
  122. #define debug_dump(...)
  123. #endif
  124. // - - Optimization passes - - - - - - - - - - - - - - - - - - - - - - - - - - -
  125. static size_t
  126. optimize_assignment (struct instruction *irb, size_t irb_len)
  127. {
  128. size_t in = 0, out = 0;
  129. for (; in < irb_len; in++, out++)
  130. {
  131. if (in + 2 < irb_len
  132. && irb[in ].cmd == BEGIN
  133. && irb[in + 1].cmd == DEC && irb[in + 1].arg == 1
  134. && irb[in + 2].cmd == END)
  135. {
  136. irb[out] = INSTRUCTION (SET, 0, 0);
  137. in += 2;
  138. }
  139. else if (out && irb[out - 1].cmd == SET && irb[in].cmd == INC)
  140. irb[--out].arg += irb[in].arg;
  141. else if (out != in)
  142. irb[out] = irb[in];
  143. }
  144. return out;
  145. }
  146. // Add the offset of the LEFT/RIGHT instruction to the accumulator
  147. static bool
  148. add_direction_offset (struct instruction *irb, intptr_t *acc)
  149. {
  150. if (irb->cmd == RIGHT)
  151. *acc += irb->arg;
  152. else if (irb->cmd == LEFT)
  153. *acc -= (intptr_t) irb->arg;
  154. else
  155. return false;
  156. return true;
  157. }
  158. // Add offsets to INC/DEC/SET stuck between LEFT/RIGHT
  159. // and compress the LEFT/RIGHT sequences
  160. static size_t
  161. optimize_offseted_inc_dec (struct instruction *irb, size_t irb_len)
  162. {
  163. size_t in = 0, out = 0;
  164. for (in = 0, out = 0; in < irb_len; in++, out++)
  165. {
  166. intptr_t dir = 0;
  167. if (!add_direction_offset (&irb[in], &dir))
  168. {
  169. irb[out] = irb[in];
  170. continue;
  171. }
  172. while (in + 2 < irb_len)
  173. {
  174. // An immediate offset has its limits on x86-64
  175. if (dir < INT8_MIN || dir > INT8_MAX)
  176. break;
  177. intptr_t diff = 0;
  178. if (!add_direction_offset (&irb[in + 2], &diff))
  179. break;
  180. int cmd = irb[in + 1].cmd;
  181. if (cmd != INC && cmd != DEC && cmd != SET)
  182. break;
  183. irb[out] = irb[in + 1];
  184. irb[out].offset = dir;
  185. dir += diff;
  186. out += 1;
  187. in += 2;
  188. }
  189. for (; in + 1 < irb_len; in++)
  190. if (!add_direction_offset (&irb[in + 1], &dir))
  191. break;
  192. if (!dir)
  193. out--;
  194. else if (dir > 0)
  195. irb[out] = INSTRUCTION (RIGHT, 0, dir);
  196. else
  197. irb[out] = INSTRUCTION (LEFT, 0, -dir);
  198. }
  199. return out;
  200. }
  201. // Try to eliminate loops that eat a cell and add/subtract its value
  202. // to/from some other cell
  203. static size_t
  204. optimize_inc_dec_loops (struct instruction *irb, size_t irb_len)
  205. {
  206. size_t in = 0, out = 0;
  207. for (in = 0, out = 0; in < irb_len; in++, out++)
  208. {
  209. irb[out] = irb[in];
  210. if (irb[in].cmd != BEGIN)
  211. continue;
  212. bool ok = false;
  213. size_t count = 0;
  214. for (size_t k = in + 1; k < irb_len; k++)
  215. {
  216. if (irb[k].cmd == END)
  217. {
  218. ok = true;
  219. break;
  220. }
  221. if (irb[k].cmd != INC
  222. && irb[k].cmd != DEC)
  223. break;
  224. count++;
  225. }
  226. if (!ok)
  227. continue;
  228. // Stable sort operations by their offsets, put [0] first
  229. bool sorted;
  230. do
  231. {
  232. sorted = true;
  233. for (size_t k = 1; k < count; k++)
  234. {
  235. if (irb[in + k].offset == 0)
  236. continue;
  237. if (irb[in + k + 1].offset != 0
  238. && irb[in + k].offset <= irb[in + k + 1].offset)
  239. continue;
  240. struct instruction tmp = irb[in + k + 1];
  241. irb[in + k + 1] = irb[in + k];
  242. irb[in + k] = tmp;
  243. sorted = false;
  244. }
  245. }
  246. while (!sorted);
  247. // Abort the optimization on duplicate offsets (complication with [0])
  248. for (size_t k = 1; k < count; k++)
  249. if (irb[in + k].offset == irb[in + k + 1].offset)
  250. ok = false;
  251. // XXX: can't make the code longer either
  252. for (size_t k = 1; k <= count; k++)
  253. if (irb[in + k].arg != 1)
  254. ok = false;
  255. if (!ok
  256. || irb[in + 1].cmd != DEC
  257. || irb[in + 1].offset != 0)
  258. continue;
  259. int min_safe_left_offset = 0;
  260. if (in > 1 && irb[in - 1].cmd == RIGHT)
  261. min_safe_left_offset = -irb[in - 1].arg;
  262. bool cond_needed_for_safety = false;
  263. for (size_t k = 0; k < count; k++)
  264. if (irb[in + k + 1].offset < min_safe_left_offset)
  265. {
  266. cond_needed_for_safety = true;
  267. break;
  268. }
  269. in++;
  270. if (cond_needed_for_safety)
  271. out++;
  272. irb[out] = INSTRUCTION (EAT, 0, 0);
  273. for (size_t k = 1; k < count; k++)
  274. irb[out + k] = INSTRUCTION (irb[in + k].cmd == INC
  275. ? INCACC : DECACC, irb[in + k].offset, 0);
  276. in += count;
  277. out += count;
  278. if (cond_needed_for_safety)
  279. irb[out] = INSTRUCTION (END, 0, 0);
  280. else
  281. out--;
  282. }
  283. return out;
  284. }
  285. // - - Loop pairing - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  286. static void
  287. pair_loops (struct instruction *irb, size_t irb_len)
  288. {
  289. size_t nesting = 0;
  290. size_t *stack = xcalloc (sizeof *stack, irb_len);
  291. for (size_t i = 0; i < irb_len; i++)
  292. {
  293. switch (irb[i].cmd)
  294. {
  295. case BEGIN:
  296. stack[nesting++] = i;
  297. break;
  298. case END:
  299. if (nesting <= 0)
  300. exit_fatal ("unbalanced loops\n");
  301. --nesting;
  302. irb[stack[nesting]].arg = i + 1;
  303. // Looping can be disabled by optimizations
  304. if (irb[i].arg)
  305. irb[i].arg = stack[nesting] + 1;
  306. default:
  307. break;
  308. }
  309. }
  310. free (stack);
  311. if (nesting != 0)
  312. exit_fatal ("unbalanced loops\n");
  313. }
  314. // - - Main - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  315. int
  316. main (int argc, char *argv[])
  317. {
  318. if (argc > 3)
  319. exit_fatal ("usage: %s [INPUT-FILE] [OUTPUT-FILE]\n", argv[0]);
  320. FILE *input_file = stdin;
  321. if (argc > 1 && !(input_file = fopen (argv[1], "r")))
  322. exit_fatal ("fopen: %s: %s\n", argv[1], strerror (errno));
  323. const char *output_path = "a.out";
  324. if (argc > 2)
  325. output_path = argv[2];
  326. struct str buffer;
  327. str_init (&buffer);
  328. int c;
  329. while ((c = fgetc (input_file)) != EOF)
  330. str_append_c (&buffer, c);
  331. if (ferror (input_file))
  332. exit_fatal ("can't read program\n");
  333. fclose (input_file);
  334. // - - Decode, group and optimize - - - - - - - - - - - - - - - - - - - - - - -
  335. // This is our Intermediate Representation Buffer
  336. struct instruction *irb = xcalloc (sizeof *irb, buffer.len);
  337. size_t irb_len = 0;
  338. for (size_t i = 0; i < buffer.len; i++)
  339. {
  340. enum command cmd;
  341. switch (buffer.str[i])
  342. {
  343. case '>': cmd = RIGHT; break;
  344. case '<': cmd = LEFT; break;
  345. case '+': cmd = INC; break;
  346. case '-': cmd = DEC; break;
  347. case '.': cmd = OUT; break;
  348. case ',': cmd = IN; break;
  349. case '[': cmd = BEGIN; break;
  350. case ']': cmd = END; break;
  351. default: continue;
  352. }
  353. // The most basic optimization is to group identical commands together
  354. if (!irb_len || !grouped[cmd] || irb[irb_len - 1].cmd != cmd)
  355. irb_len++;
  356. irb[irb_len - 1].cmd = cmd;
  357. irb[irb_len - 1].arg++;
  358. }
  359. debug_dump ("bf-no-opt.txt", irb, irb_len);
  360. irb_len = optimize_assignment (irb, irb_len);
  361. debug_dump ("bf-pre-offsets.txt", irb, irb_len);
  362. irb_len = optimize_offseted_inc_dec (irb, irb_len);
  363. debug_dump ("bf-pre-incdec-unloop.txt", irb, irb_len);
  364. irb_len = optimize_inc_dec_loops (irb, irb_len);
  365. debug_dump ("bf-optimized.txt", irb, irb_len);
  366. pair_loops (irb, irb_len);
  367. debug_dump ("bf-final.txt", irb, irb_len);
  368. // - - Code generation - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  369. str_init (&buffer);
  370. size_t *offsets = xcalloc (sizeof *offsets, irb_len + 1);
  371. bool *sets_flags = xcalloc (sizeof *sets_flags, irb_len);
  372. #define CODE(x) { char t[] = x; str_append_data (&buffer, t, sizeof t - 1); }
  373. #define LE(v) (uint8_t[]) { v, v>>8, v>>16, v>>24, v>>32, v>>40, v>>48, v>>56 }
  374. #define DB(x) { uint64_t v = (x); str_append_data (&buffer, LE (v), 1); }
  375. #define DW(x) { uint64_t v = (x); str_append_data (&buffer, LE (v), 2); }
  376. #define DD(x) { uint64_t v = (x); str_append_data (&buffer, LE (v), 4); }
  377. #define DQ(x) { uint64_t v = (x); str_append_data (&buffer, LE (v), 8); }
  378. enum
  379. {
  380. ELF_LOAD_CODE = 0x400000, // where code is loaded (usual)
  381. ELF_LOAD_DATA = 0x800000 // where the tape is placed
  382. };
  383. CODE ("\xB8") DD (ELF_LOAD_DATA) // mov rax, "ELF_LOAD_DATA"
  384. CODE ("\x30\xDB") // xor bl, bl
  385. for (size_t i = 0; i < irb_len; i++)
  386. {
  387. offsets[i] = buffer.len;
  388. size_t arg = irb[i].arg;
  389. assert (arg <= UINT32_MAX);
  390. int offset = irb[i].offset;
  391. assert (offset <= INT8_MAX && offset >= INT8_MIN);
  392. // Don't save what we've just loaded
  393. if (irb[i].cmd == LEFT || irb[i].cmd == RIGHT)
  394. if (i < 2 || i + 1 >= irb_len
  395. || (irb[i - 2].cmd != LEFT && irb[i - 2].cmd != RIGHT)
  396. || irb[i - 1].cmd != BEGIN
  397. || irb[i + 1].cmd != END)
  398. CODE ("\x88\x18") // mov [rax], bl
  399. switch (irb[i].cmd)
  400. {
  401. case RIGHT:
  402. // add rax, "arg" -- optimistic, no boundary checking
  403. if (arg > INT8_MAX) { CODE ("\x48\x05") DD (arg) }
  404. else { CODE ("\x48\x83\xC0") DB (arg) }
  405. break;
  406. case LEFT:
  407. // sub rax, "arg" -- optimistic, no boundary checking
  408. if (arg > INT8_MAX) { CODE ("\x48\x2D") DD (arg) }
  409. else { CODE ("\x48\x83\xE8") DB (arg) }
  410. break;
  411. case EAT:
  412. // NOTE: the kernel destroys rcx and r11 on syscalls,
  413. // there must be no OUT or IN between EAT and INCACC/DECACC
  414. CODE ("\x88\xD9" "\x30\xDB") // mov cl, bl; xor bl, bl
  415. sets_flags[i] = true;
  416. break;
  417. case INCACC:
  418. if (offset)
  419. {
  420. CODE ("\x00\x48") DB (offset) // add [rax+"offset"], cl
  421. }
  422. else
  423. {
  424. CODE ("\x00\xCB") // add bl, cl
  425. sets_flags[i] = true;
  426. }
  427. break;
  428. case DECACC:
  429. if (offset)
  430. {
  431. CODE ("\x28\x48") DB (offset) // sub [rax+"offset"], cl
  432. }
  433. else
  434. {
  435. CODE ("\x28\xCB") // sub bl, cl
  436. sets_flags[i] = true;
  437. }
  438. break;
  439. case INC:
  440. if (offset)
  441. {
  442. CODE ("\x80\x40") DB (offset) // add byte [rax+"offset"], "arg"
  443. }
  444. else
  445. {
  446. CODE ("\x80\xC3") // add bl, "arg"
  447. sets_flags[i] = true;
  448. }
  449. DB (arg)
  450. break;
  451. case DEC:
  452. if (offset)
  453. {
  454. CODE ("\x80\x68") DB (offset) // sub byte [rax+"offset"], "arg"
  455. }
  456. else
  457. {
  458. CODE ("\x80\xEB") // sub bl, "arg"
  459. sets_flags[i] = true;
  460. }
  461. DB (arg)
  462. break;
  463. case SET:
  464. if (offset)
  465. {
  466. CODE ("\xC6\x40") DB (offset) // mov byte [rax+"offset"], "arg"
  467. }
  468. else
  469. CODE ("\xB3") // mov bl, "arg"
  470. DB (arg)
  471. break;
  472. case OUT:
  473. CODE ("\xE8") DD (0) // call "write"
  474. break;
  475. case IN:
  476. CODE ("\xE8") DD (0) // call "read"
  477. break;
  478. case BEGIN:
  479. // Don't test the register when the flag has been set already;
  480. // this doesn't have much of an effect in practice
  481. if (!i || !sets_flags[i - 1])
  482. CODE ("\x84\xDB") // test bl, bl
  483. CODE ("\x0F\x84\x00\x00\x00\x00") // jz "offsets[arg]"
  484. break;
  485. case END:
  486. // We know that the cell is zero, make this an "if", not a "loop";
  487. // this doesn't have much of an effect in practice
  488. if (!arg)
  489. break;
  490. if (!i || !sets_flags[i - 1])
  491. CODE ("\x84\xDB") // test bl, bl
  492. CODE ("\x0F\x85\x00\x00\x00\x00") // jnz "offsets[arg]"
  493. break;
  494. }
  495. // No sense in reading it out when we overwrite it immediately;
  496. // this doesn't have much of an effect in practice
  497. if (irb[i].cmd == LEFT || irb[i].cmd == RIGHT)
  498. if (i + 1 >= irb_len
  499. || irb[i + 1].cmd != SET
  500. || irb[i + 1].offset != 0)
  501. CODE ("\x8A\x18") // mov bl, [rax]
  502. }
  503. // When there is a loop at the end we need to be able to jump past it
  504. offsets[irb_len] = buffer.len;
  505. // Write an epilog which handles all the OS interfacing
  506. //
  507. // System V x86-64 ABI:
  508. // rax <-> both syscall number and return value
  509. // args -> rdi, rsi, rdx, r10, r8, r9
  510. // trashed <- rcx, r11
  511. #ifdef TARGET_OPENBSD
  512. enum { SYS_READ = 3, SYS_WRITE = 4, SYS_EXIT = 1 };
  513. #elif defined TARGET_LINUX
  514. enum { SYS_READ = 0, SYS_WRITE = 1, SYS_EXIT = 60 };
  515. #else
  516. #error Target not supported
  517. #endif
  518. CODE ("\xB8") DD (SYS_EXIT) // mov eax, "SYS_EXIT"
  519. CODE ("\x48\x31\xFF") // xor rdi, rdi
  520. CODE ("\x0F\x05") // syscall
  521. size_t fatal_offset = buffer.len;
  522. CODE ("\x48\x89\xF7") // mov rdi, rsi -- use the string in rsi
  523. CODE ("\x30\xC0") // xor al, al -- look for the nil byte
  524. CODE ("\x48\x31\xC9") // xor rcx, rcx
  525. CODE ("\x48\xF7\xD1") // not rcx -- start from -1
  526. CODE ("\xFC" "\xF2\xAE") // cld; repne scasb -- decrement until found
  527. CODE ("\x48\xF7\xD1") // not rcx
  528. CODE ("\x48\x8D\x51\xFF") // lea rdx, [rcx-1] -- save length in rdx
  529. CODE ("\xB8") DD (SYS_WRITE) // mov eax, "SYS_WRITE"
  530. CODE ("\xBF") DD (2) // mov edi, "STDERR_FILENO"
  531. CODE ("\x0F\x05") // syscall
  532. CODE ("\xB8") DD (SYS_EXIT) // mov eax, "SYS_EXIT"
  533. CODE ("\xBF") DD (1) // mov edi, "EXIT_FAILURE"
  534. CODE ("\x0F\x05") // syscall
  535. size_t io_offset = buffer.len;
  536. CODE ("\x48\x89\xE6") // mov rsi, rsp -- the char starts at rsp
  537. CODE ("\xBA") DD (1) // mov edx, 1 -- count
  538. CODE ("\x0F\x05") // syscall
  539. CODE ("\x48\x83\xF8\x00") // cmp rax, 0
  540. CODE ("\x4C\x89\xE6") // mov rsi, r12
  541. CODE ("\x7C") // jl "fatal_offset" -- write failure message
  542. DB ((intptr_t) fatal_offset - (intptr_t) (buffer.len + 1))
  543. CODE ("\x66\x5B") // pop bx
  544. CODE ("\x58") // pop rax -- restore tape position
  545. CODE ("\xC3") // ret
  546. size_t read_offset = buffer.len;
  547. CODE ("\x50") // push rax -- save tape position
  548. CODE ("\xB8") DD (SYS_READ) // mov eax, "SYS_READ"
  549. CODE ("\xBF") DD (0) // mov edi, "STDIN_FILENO"
  550. CODE ("\x66\x6A\x00") // push word 0 -- the default value for EOF
  551. CODE ("\x4C\x8D\x25") DD (2) // lea r12, [rel read_message]
  552. CODE ("\xEB") // jmp "io_offset"
  553. DB ((intptr_t) io_offset - (intptr_t) (buffer.len + 1))
  554. CODE ("fatal: read failed\n\0")
  555. size_t write_offset = buffer.len;
  556. CODE ("\x50") // push rax -- save tape position
  557. CODE ("\xB8") DD (SYS_WRITE) // mov eax, "SYS_WRITE"
  558. CODE ("\xBF") DD (1) // mov edi, "STDOUT_FILENO"
  559. CODE ("\x66\x53") // push bx
  560. CODE ("\x4C\x8D\x25") DD (2) // lea r12, [rel write_message]
  561. CODE ("\xEB") // jmp "io_offset"
  562. DB ((intptr_t) io_offset - (intptr_t) (buffer.len + 1))
  563. CODE ("fatal: write failed\n\0")
  564. // Now that we know where each instruction is, fill in relative jumps
  565. for (size_t i = 0; i < irb_len; i++)
  566. {
  567. if (!irb[i].arg)
  568. continue;
  569. // This must accurately reflect the code generators
  570. intptr_t target, fixup = offsets[i];
  571. if (irb[i].cmd == BEGIN || irb[i].cmd == END)
  572. {
  573. fixup += (i && sets_flags[i - 1]) ? 2 : 4;
  574. target = offsets[irb[i].arg];
  575. }
  576. else if (irb[i].cmd == IN) { fixup++; target = read_offset; }
  577. else if (irb[i].cmd == OUT) { fixup++; target = write_offset; }
  578. else continue;
  579. uint64_t v = target - (fixup + 4);
  580. memcpy (buffer.str + fixup, LE (v), 4);
  581. }
  582. free (offsets);
  583. free (sets_flags);
  584. // - - Output - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  585. // Now that we know how long the machine code is, we can write the header.
  586. // Note that for PIE we would need to depend on the dynamic linker, so no.
  587. //
  588. // Recommended reading:
  589. // http://www.muppetlabs.com/~breadbox/software/tiny/teensy.html
  590. // man 5 elf
  591. struct str code = buffer;
  592. str_init (&buffer);
  593. enum
  594. {
  595. ELF_HEADER_SIZE = 64, // size of the ELF header
  596. ELF_PROGRAM_ENTRY_SIZE = 56, // size of a program header
  597. ELF_SECTION_ENTRY_SIZE = 64, // size of a section header
  598. ELF_META_SIZE = ELF_HEADER_SIZE + 2 * ELF_PROGRAM_ENTRY_SIZE
  599. };
  600. // ELF header
  601. CODE ("\x7F" "ELF\x02\x01\x01") // ELF, 64-bit, little endian, v1
  602. #ifdef TARGET_OPENBSD
  603. // OpenBSD either requires its ABI or a PT_NOTE with "OpenBSD" in it
  604. CODE ("\x0C\x00" "\0\0\0\0\0\0\0") // OpenBSD ABI, v0, padding
  605. #else
  606. CODE ("\x00\x00" "\0\0\0\0\0\0\0") // Unix System V ABI, v0, padding
  607. #endif
  608. DW (2) DW (62) DD (1) // executable, x86-64, v1
  609. DQ (ELF_LOAD_CODE + ELF_META_SIZE) // entry point address
  610. DQ (ELF_HEADER_SIZE) DQ (0) // program, section header offset
  611. DD (0) // no processor-specific flags
  612. DW (ELF_HEADER_SIZE) // ELF header size
  613. DW (ELF_PROGRAM_ENTRY_SIZE) DW (2) // program hdr tbl entry size, count
  614. DW (ELF_SECTION_ENTRY_SIZE) DW (0) // section hdr tbl entry size, count
  615. DW (0) // no section index for strings
  616. // Program header for code
  617. // The entry point address seems to require alignment, so map start of file
  618. DD (1) DD (5) // PT_LOAD, PF_R | PF_X
  619. DQ (0) // offset within the file
  620. DQ (ELF_LOAD_CODE) // address in virtual memory
  621. DQ (ELF_LOAD_CODE) // address in physical memory
  622. DQ (ELF_META_SIZE + code.len) // length within the file
  623. DQ (ELF_META_SIZE + code.len) // length within memory
  624. DQ (4096) // segment alignment
  625. // Program header for the tape
  626. DD (1) DD (6) // PT_LOAD, PF_R | PF_W
  627. DQ (0) // offset within the file
  628. DQ (ELF_LOAD_DATA) // address in virtual memory
  629. DQ (ELF_LOAD_DATA) // address in physical memory
  630. DQ (0) // length within the file
  631. DQ (1 << 20) // one megabyte of memory
  632. DQ (4096) // segment alignment
  633. // The section header table is optional and we don't need it for anything
  634. FILE *output_file;
  635. #ifdef __unix__
  636. int output_fd;
  637. if ((output_fd = open (output_path, O_CREAT|O_WRONLY|O_TRUNC, 0777)) < 0)
  638. exit_fatal ("open: %s: %s\n", output_path, strerror (errno));
  639. if (!(output_file = fdopen (output_fd, "w")))
  640. exit_fatal ("fdopen: %s\n", strerror (errno));
  641. #else
  642. if (!(output_file = fopen (output_path, "w")))
  643. exit_fatal ("fopen: %s: %s\n", output_path, strerror (errno));
  644. #endif
  645. fwrite (buffer.str, buffer.len, 1, output_file);
  646. fwrite (code.str, code.len, 1, output_file);
  647. fclose (output_file);
  648. return 0;
  649. }