Browse Source

Initial commit of gdb-experiment.go

Přemysl Janouch 2 years ago
parent
commit
4efc032827
Signed by: Přemysl Janouch <p.janouch@gmail.com> GPG Key ID: B715679E3A361BE6
1 changed files with 355 additions and 0 deletions
  1. 355
    0
      gdb-experiment.go

+ 355
- 0
gdb-experiment.go View File

@@ -0,0 +1,355 @@
1
+// Non-optimizing Brainfuck compiler generating binaries for Linux on x86-64;
2
+// gofmt has been tried, with disappointing results
3
+package main
4
+
5
+import (
6
+	"errors"
7
+	"fmt"
8
+	"io/ioutil"
9
+	"log"
10
+	"os"
11
+	"strconv"
12
+)
13
+
14
+const ( RIGHT = iota; LEFT; INC; DEC; IN; OUT; BEGIN; END )
15
+
16
+var info = []struct {
17
+	grouped bool
18
+	name    string
19
+}{
20
+	{true, "RIGHT"},
21
+	{true, "LEFT"},
22
+	{true, "INC"},
23
+	{true, "DEC"},
24
+	{false, "IN"},
25
+	{false, "OUT"},
26
+	{false, "BEGIN"},
27
+	{false, "END"},
28
+}
29
+
30
+type instruction struct {
31
+	command int
32
+	arg     int
33
+}
34
+
35
+// Dump internal representation to a file for debugging purposes
36
+func dump(filename string, irb []instruction) error {
37
+	out, err := os.Create(filename)
38
+	if err != nil {
39
+		return err
40
+	}
41
+
42
+	indent := 0
43
+	for _, x := range irb {
44
+		if x.command == END {
45
+			indent--
46
+		}
47
+		for i := 0; i < indent; i++ {
48
+			out.WriteString("  ")
49
+		}
50
+		out.WriteString(info[x.command].name)
51
+		if info[x.command].grouped {
52
+			fmt.Fprintf(out, " %d", x.arg)
53
+		}
54
+		out.WriteString("\n")
55
+		if x.command == BEGIN {
56
+			indent++
57
+		}
58
+	}
59
+	if err = out.Close(); err != nil {
60
+		return err
61
+	}
62
+	return nil
63
+}
64
+
65
+// Decode a Brainfuck program into internal representation,
66
+// coalescing identical commands together as the most basic optimization
67
+func decode(program []byte) (irb []instruction) {
68
+	for _, c := range program {
69
+		var command int
70
+		switch c {
71
+		case '>': command = RIGHT
72
+		case '<': command = LEFT
73
+		case '+': command = INC
74
+		case '-': command = DEC
75
+		case '.': command = OUT
76
+		case ',': command = IN
77
+		case '[': command = BEGIN
78
+		case ']': command = END
79
+		default:  continue
80
+		}
81
+
82
+		if len(irb) == 0 || !info[command].grouped ||
83
+			irb[len(irb)-1].command != command {
84
+			irb = append(irb, instruction{command, 1})
85
+		} else {
86
+			irb[len(irb)-1].arg++
87
+		}
88
+	}
89
+	return
90
+}
91
+
92
+// Match loop commands so that we know where to jump
93
+func pairLoops(irb []instruction) error {
94
+	nesting := 0
95
+	stack := make([]int, len(irb))
96
+	for i, x := range irb {
97
+		switch x.command {
98
+		case BEGIN:
99
+			stack[nesting] = i
100
+			nesting++
101
+		case END:
102
+			if nesting <= 0 {
103
+				return errors.New("unbalanced loops")
104
+			}
105
+			nesting--
106
+			irb[stack[nesting]].arg = i + 1
107
+			irb[i].arg = stack[nesting] + 1
108
+		}
109
+	}
110
+	if nesting != 0 {
111
+		return errors.New("unbalanced loops")
112
+	}
113
+	return nil
114
+}
115
+
116
+// --- Code generation ---------------------------------------------------------
117
+
118
+type codegen struct {
119
+	buf []byte
120
+}
121
+
122
+// Convert an arbitrary integral value up to 8 bytes long to little endian
123
+func le(unknown interface{}) []byte {
124
+	// Trying hard to avoid reflect.Value.Int/Uint
125
+	formatted := fmt.Sprintf("%d", unknown)
126
+
127
+	var v uint64
128
+	if unsigned, err := strconv.ParseUint(formatted, 10, 64); err == nil {
129
+		v = unsigned
130
+	} else if signed, err := strconv.ParseInt(formatted, 10, 64); err == nil {
131
+		v = uint64(signed)
132
+	} else {
133
+		panic("cannot convert to number")
134
+	}
135
+	return []byte{byte(v), byte(v >> 8), byte(v >> 16), byte(v >> 24),
136
+		byte(v >> 32), byte(v >> 40), byte(v >> 48), byte(v >> 56)}
137
+}
138
+
139
+func (a *codegen) append(v []byte)           { a.buf = append(a.buf, v...) }
140
+func (a *codegen) code(v string) *codegen    { a.append([]byte(v)); return a }
141
+func (a *codegen) db(v interface{}) *codegen { a.append(le(v)[:1]); return a }
142
+func (a *codegen) dw(v interface{}) *codegen { a.append(le(v)[:2]); return a }
143
+func (a *codegen) dd(v interface{}) *codegen { a.append(le(v)[:4]); return a }
144
+func (a *codegen) dq(v interface{}) *codegen { a.append(le(v)[:8]); return a }
145
+
146
+const (
147
+	ElfCodeAddr = 0x400000 // Where the code is loaded in memory
148
+	ElfDataAddr = 0x800000 // Where the tape is placed in memory
149
+)
150
+
151
+const (
152
+	SYS_READ  = 0
153
+	SYS_WRITE = 1
154
+	SYS_EXIT  = 60
155
+)
156
+
157
+func codegenAmd64(irb []instruction) []byte {
158
+	offsets := make([]int, len(irb)+1)
159
+	a := codegen{}
160
+
161
+	a.code("\xB8").dd(ElfDataAddr)                // mov rax, "ElfCodeAddr"
162
+	a.code("\x30\xDB")                            // xor bl, bl
163
+
164
+	for i, x := range irb {
165
+		offsets[i] = len(a.buf)
166
+		if x.command == LEFT || x.command == RIGHT {
167
+			a.code("\x88\x18")                    // mov [rax], bl
168
+		}
169
+		switch x.command {
170
+		case RIGHT: a.code("\x48\x05").dd(x.arg)  // add rax, "arg"
171
+		case LEFT:  a.code("\x48\x2D").dd(x.arg)  // sub rax, "arg"
172
+		case INC:   a.code("\x80\xC3").db(x.arg)  // add bl, "arg"
173
+		case DEC:   a.code("\x80\xEB").db(x.arg)  // sub bl, "arg"
174
+		case OUT:   a.code("\xE8").dd(0)          // call "write"
175
+		case IN:    a.code("\xE8").dd(0)          // call "read"
176
+		case BEGIN:
177
+			// test bl, bl; jz "offsets[arg]"
178
+			a.code("\x84\xDB" + "\x0F\x84").dd(0)
179
+		case END:
180
+			// test bl, bl; jnz "offsets[arg]"
181
+			a.code("\x84\xDB" + "\x0F\x85").dd(0)
182
+		}
183
+		if x.command == LEFT || x.command == RIGHT {
184
+			a.code("\x8A\x18")                    // mov bl, [rax]
185
+		}
186
+	}
187
+	// When there is a loop at the end we need to be able to jump past it
188
+	offsets[len(irb)] = len(a.buf)
189
+
190
+	// Write an epilog which handles all the OS interfacing
191
+	//
192
+	// System V x86-64 ABI:
193
+	//   rax <-> both syscall number and return value
194
+	//   args -> rdi, rsi, rdx, r10, r8, r9
195
+	//   trashed <- rcx, r11
196
+
197
+	a.code("\xB8").dd(SYS_EXIT)  // mov eax, 0x3c
198
+	a.code("\x48\x31\xFF")       // xor rdi, rdi
199
+	a.code("\x0F\x05")           // syscall
200
+
201
+	fatal := len(a.buf)
202
+	a.code("\x48\x89\xF7")       // mov rdi, rsi -- use the string in rsi
203
+	a.code("\x30\xC0")           // xor al, al -- look for the nil byte
204
+	a.code("\x48\x31\xC9")       // xor rcx, rcx
205
+	a.code("\x48\xF7\xD1")       // not rcx -- start from -1
206
+	a.code("\xFC" + "\xF2\xAE")  // cld; repne scasb -- decrement until found
207
+	a.code("\x48\xF7\xD1")       // not rcx
208
+	a.code("\x48\x8D\x51\xFF")   // lea rdx, [rcx-1] -- save length in rdx
209
+	a.code("\xB8").dd(SYS_WRITE) // mov eax, "SYS_WRITE"
210
+	a.code("\xBF").dd(2)         // mov edi, "STDERR_FILENO"
211
+	a.code("\x0F\x05")           // syscall
212
+
213
+	a.code("\xB8").dd(SYS_EXIT)  // mov eax, "SYS_EXIT"
214
+	a.code("\xBF").dd(1)         // mov edi, "EXIT_FAILURE"
215
+	a.code("\x0F\x05")           // syscall
216
+
217
+	read := len(a.buf)
218
+	a.code("\x50")               // push rax -- save tape position
219
+	a.code("\xB8").dd(SYS_READ)  // mov eax, "SYS_READ"
220
+	a.code("\x48\x89\xC7")       // mov rdi, rax -- STDIN_FILENO
221
+	a.code("\x66\x6A\x00")       // push word 0 -- the default value for EOF
222
+	a.code("\x48\x89\xE6")       // mov rsi, rsp -- the char starts at rsp
223
+	a.code("\xBA").dd(1)         // mov edx, 1 -- count
224
+	a.code("\x0F\x05")           // syscall
225
+	a.code("\x66\x5B")           // pop bx
226
+
227
+	a.code("\x48\x83\xF8\x00")   // cmp rax, 0
228
+	a.code("\x48\x8D\x35").dd(4) // lea rsi, [rel read_message]
229
+	a.code("\x7C")               // jl "fatal_offset" -- write failure message
230
+	a.db(fatal - len(a.buf) - 1)
231
+	a.code("\x58")               // pop rax -- restore tape position
232
+	a.code("\xC3")               // ret
233
+	a.code("fatal: read failed\n\x00")
234
+
235
+	write := len(a.buf)
236
+	a.code("\x50")               // push rax -- save tape position
237
+	a.code("\xB8").dd(SYS_WRITE) // mov eax, "SYS_WRITE"
238
+	a.code("\x48\x89\xC7")       // mov rdi, rax -- STDOUT_FILENO
239
+	a.code("\x66\x53")           // push bx
240
+	a.code("\x48\x89\xE6")       // mov rsi, rsp -- the char starts at rsp
241
+	a.code("\xBA").dd(1)         // mov edx, 1 -- count
242
+	a.code("\x0F\x05")           // syscall
243
+	a.code("\x66\x5B")           // pop bx
244
+
245
+	a.code("\x48\x83\xF8\x00")   // cmp rax, 0
246
+	a.code("\x48\x8D\x35").dd(4) // lea rsi, [rel write_message]
247
+	a.code("\x7C")               // jl "fatal_offset" -- write failure message
248
+	a.db(fatal - len(a.buf) - 1)
249
+	a.code("\x58")               // pop rax -- restore tape position
250
+	a.code("\xC3")               // ret
251
+	a.code("fatal: write failed\n\x00")
252
+
253
+	// Now that we know where each instruction is, fill in relative jumps
254
+	for i, x := range irb {
255
+		// This must accurately reflect the code generators
256
+		target, fixup := 0, offsets[i]
257
+		if x.command == BEGIN || x.command == END {
258
+			fixup += 4
259
+			target = offsets[x.arg]
260
+		} else if x.command == IN {
261
+			fixup += 1
262
+			target = read
263
+		} else if x.command == OUT {
264
+			fixup += 1
265
+			target = write
266
+		} else {
267
+			continue
268
+		}
269
+		copy(a.buf[fixup:], le(target - fixup - 4)[:4])
270
+	}
271
+	return a.buf
272
+}
273
+
274
+// --- Main --------------------------------------------------------------------
275
+
276
+func main() {
277
+	var err error
278
+	if len(os.Args) > 3 {
279
+		log.Fatalf("usage: %s [INPUT-FILE] [OUTPUT-FILE]", os.Args[0])
280
+	}
281
+
282
+	input := os.Stdin
283
+	if len(os.Args) > 1 {
284
+		if input, err = os.Open(os.Args[1]); err != nil {
285
+			log.Fatalf("%s", err)
286
+		}
287
+	}
288
+
289
+	outputPath := "a.out"
290
+	if len(os.Args) > 2 {
291
+		outputPath = os.Args[2]
292
+	}
293
+
294
+	program, err := ioutil.ReadAll(input)
295
+	input.Close()
296
+	if err != nil {
297
+		log.Fatalf("can't read program: %s", err)
298
+	}
299
+
300
+	irb := decode(program)
301
+	// ... various optimizations could be performed here if we give up brevity
302
+	pairLoops(irb)
303
+	dump("ir-dump.txt", irb)
304
+
305
+	code := codegenAmd64(irb)
306
+	a := codegen{}
307
+
308
+	// TODO: also use the constants in package "debug/elf"
309
+
310
+	const (
311
+		ElfHeaderSize       = 64        // size of the ELF header
312
+		ElfProgramEntrySize = 56        // size of a program header
313
+		ElfSectionEntrySize = 64        // size of a section header
314
+		ElfPrologSize       = ElfHeaderSize + 2*ElfProgramEntrySize
315
+	)
316
+
317
+	// ELF header
318
+	a.code("\x7FELF\x02\x01\x01")       // ELF, 64-bit, little endian, v1
319
+	// Unix System V ABI, v0, padding
320
+	a.code("\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00")
321
+	a.dw(2).dw(62).dd(1)                // executable, x86-64, v1
322
+	a.dq(ElfCodeAddr + ElfPrologSize)   // entry point address
323
+
324
+	// We only append section headers with debugging info with DEBUG
325
+	a.dq(ElfHeaderSize).dq(0)           // program, section header offset
326
+	a.dd(0)                             // no processor-specific flags
327
+	a.dw(ElfHeaderSize)                 // ELF header size
328
+	a.dw(ElfProgramEntrySize).dw(2)     // program hdr tbl entry size, count
329
+	a.dw(ElfSectionEntrySize).dw(0)     // section hdr tbl entry size, count
330
+	a.dw(0)                             // no section index for strings
331
+
332
+	// Program header for code
333
+	// The entry point address seems to require alignment, so map start of file
334
+	a.dd(1).dd(5)                       // PT_LOAD, PF_R | PF_X
335
+	a.dq(0)                             // offset within the file
336
+	a.dq(ElfCodeAddr)                   // address in virtual memory
337
+	a.dq(ElfCodeAddr)                   // address in physical memory
338
+	a.dq(ElfPrologSize + len(code))     // length within the file
339
+	a.dq(ElfPrologSize + len(code))     // length within memory
340
+	a.dq(4096)                          // segment alignment
341
+
342
+	// Program header for the tape
343
+	a.dd(1).dd(6)                       // PT_LOAD, PF_R | PF_W
344
+	a.dq(0)                             // offset within the file
345
+	a.dq(ElfDataAddr)                   // address in virtual memory
346
+	a.dq(ElfDataAddr)                   // address in physical memory
347
+	a.dq(0)                             // length within the file
348
+	a.dq(1 << 20)                       // one megabyte of memory
349
+	a.dq(4096)                          // segment alignment
350
+
351
+	a.buf = append(a.buf, code...)
352
+	if err = ioutil.WriteFile(outputPath, a.buf, 0777); err != nil {
353
+		log.Fatalf("%s", err)
354
+	}
355
+}

Loading…
Cancel
Save