Browse Source

gdb-experiment.go: add DWARF debugging information

Now we can debug compiled Brainfuck!
Přemysl Janouch 1 year ago
parent
commit
def3218714
Signed by: Přemysl Janouch <p.janouch@gmail.com> GPG Key ID: B715679E3A361BE6
3 changed files with 258 additions and 48 deletions
  1. 3
    0
      .gitignore
  2. 12
    0
      README.adoc
  3. 243
    48
      gdb-experiment.go

+ 3
- 0
.gitignore View File

@@ -1,6 +1,9 @@
1 1
 a.out
2
+gdb-experiment
2 3
 bfc-amd64-*
3 4
 
5
+ir-dump.txt
6
+
4 7
 bfc.creator*
5 8
 bfc.config
6 9
 bfc.files

+ 12
- 0
README.adoc View File

@@ -35,6 +35,18 @@ When no input file is specified, standard input is used.  Similarly, the default
35 35
 output filename is `a.out`.  After the compilation, the resulting file can be
36 36
 run on the target platform.
37 37
 
38
+gdb
39
+---
40
+You may have noticed the `gdb-experiment.go` file.  It is a non-optimizing
41
+version of the compiler targeting Linux only that adds DWARF debugging
42
+information mapping code locations onto lines in the `ir-dump.txt` byproduct
43
+output file.  It's been rewritten in Go since managing all those binary buffers
44
+required to build the symbol table proved to be too painful in C.
45
+
46
+ $ go run gdb-experiment.go [INPUT-FILE] [OUTPUT-FILE]
47
+
48
+Use `break *0x4000b7` to get a breakpoint at the first Brainfuck instruction.
49
+
38 50
 Contributing and Support
39 51
 ------------------------
40 52
 Use this project's GitHub to report any bugs, request features, or submit pull

+ 243
- 48
gdb-experiment.go View File

@@ -1,14 +1,21 @@
1
-// Non-optimizing Brainfuck compiler generating binaries for Linux on x86-64;
2
-// gofmt has been tried, with disappointing results
1
+// Non-optimizing Brainfuck compiler generating binaries for Linux on x86-64
2
+// with debugging information mapping instructions onto an IR dump.
3
+// gofmt has been tried, with disappointing results.
4
+// codegen{} is also pretty ugly in the way it works but damn convenient.
3 5
 package main
4 6
 
5 7
 import (
8
+	"encoding/binary"
6 9
 	"errors"
7 10
 	"fmt"
8 11
 	"io/ioutil"
9 12
 	"log"
10 13
 	"os"
11 14
 	"strconv"
15
+
16
+	// Let's not repeat all those constants here onstants
17
+	"debug/dwarf"
18
+	"debug/elf"
12 19
 )
13 20
 
14 21
 const ( RIGHT = iota; LEFT; INC; DEC; IN; OUT; BEGIN; END )
@@ -124,16 +131,15 @@ func le(unknown interface{}) []byte {
124 131
 	// Trying hard to avoid reflect.Value.Int/Uint
125 132
 	formatted := fmt.Sprintf("%d", unknown)
126 133
 
127
-	var v uint64
134
+	b := make([]byte, 8)
128 135
 	if unsigned, err := strconv.ParseUint(formatted, 10, 64); err == nil {
129
-		v = unsigned
136
+		binary.LittleEndian.PutUint64(b, unsigned)
130 137
 	} else if signed, err := strconv.ParseInt(formatted, 10, 64); err == nil {
131
-		v = uint64(signed)
138
+		binary.LittleEndian.PutUint64(b, uint64(signed))
132 139
 	} else {
133 140
 		panic("cannot convert to number")
134 141
 	}
135
-	return []byte{byte(v), byte(v >> 8), byte(v >> 16), byte(v >> 24),
136
-		byte(v >> 32), byte(v >> 40), byte(v >> 48), byte(v >> 56)}
142
+	return b
137 143
 }
138 144
 
139 145
 func (a *codegen) append(v []byte)           { a.buf = append(a.buf, v...) }
@@ -154,8 +160,8 @@ const (
154 160
 	SYS_EXIT  = 60
155 161
 )
156 162
 
157
-func codegenAmd64(irb []instruction) []byte {
158
-	offsets := make([]int, len(irb)+1)
163
+func codegenAmd64(irb []instruction) (code []byte, offsets []int) {
164
+	offsets = make([]int, len(irb)+1)
159 165
 	a := codegen{}
160 166
 
161 167
 	a.code("\xB8").dd(ElfDataAddr)                // mov rax, "ElfCodeAddr"
@@ -268,7 +274,7 @@ func codegenAmd64(irb []instruction) []byte {
268 274
 		}
269 275
 		copy(a.buf[fixup:], le(target - fixup - 4)[:4])
270 276
 	}
271
-	return a.buf
277
+	return a.buf, offsets
272 278
 }
273 279
 
274 280
 // --- Main --------------------------------------------------------------------
@@ -301,55 +307,244 @@ func main() {
301 307
 	// ... various optimizations could be performed here if we give up brevity
302 308
 	pairLoops(irb)
303 309
 	dump("ir-dump.txt", irb)
310
+	code, offsets := codegenAmd64(irb)
304 311
 
305
-	code := codegenAmd64(irb)
306
-	a := codegen{}
312
+// - - ELF generation  - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
307 313
 
308
-	// TODO: also use the constants in package "debug/elf"
314
+	// Now that we know how long the machine code is, we can write the header.
315
+	// Note that for PIE we would need to depend on the dynamic linker, so no.
316
+	//
317
+	// Recommended reading:
318
+	//   http://www.muppetlabs.com/~breadbox/software/tiny/teensy.html
319
+	//   man 5 elf
320
+	//
321
+	// In case of unexpected gdb problems, also see:
322
+	//   DWARF4.pdf
323
+	//   https://sourceware.org/elfutils/DwarfLint
324
+	//   http://wiki.osdev.org/DWARF
309 325
 
310 326
 	const (
311
-		ElfHeaderSize       = 64        // size of the ELF header
312
-		ElfProgramEntrySize = 56        // size of a program header
313
-		ElfSectionEntrySize = 64        // size of a section header
314
-		ElfPrologSize       = ElfHeaderSize + 2*ElfProgramEntrySize
327
+		ElfHeaderSize       = 64        // Size of the ELF header
328
+		ElfProgramEntrySize = 56        // Size of a program header
329
+		ElfSectionEntrySize = 64        // Size of a section header
315 330
 	)
316 331
 
317
-	// ELF header
318
-	a.code("\x7FELF\x02\x01\x01")       // ELF, 64-bit, little endian, v1
319
-	// Unix System V ABI, v0, padding
320
-	a.code("\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00")
321
-	a.dw(2).dw(62).dd(1)                // executable, x86-64, v1
322
-	a.dq(ElfCodeAddr + ElfPrologSize)   // entry point address
323
-
324
-	// We only append section headers with debugging info with DEBUG
325
-	a.dq(ElfHeaderSize).dq(0)           // program, section header offset
326
-	a.dd(0)                             // no processor-specific flags
327
-	a.dw(ElfHeaderSize)                 // ELF header size
328
-	a.dw(ElfProgramEntrySize).dw(2)     // program hdr tbl entry size, count
329
-	a.dw(ElfSectionEntrySize).dw(0)     // section hdr tbl entry size, count
330
-	a.dw(0)                             // no section index for strings
332
+// - - Program headers - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
333
+
334
+	ph := codegen{}
335
+	phCount := 2
336
+
337
+	codeOffset := ElfHeaderSize + phCount*ElfProgramEntrySize
338
+	codeEndOffset := codeOffset + len(code)
331 339
 
332 340
 	// Program header for code
333 341
 	// The entry point address seems to require alignment, so map start of file
334
-	a.dd(1).dd(5)                       // PT_LOAD, PF_R | PF_X
335
-	a.dq(0)                             // offset within the file
336
-	a.dq(ElfCodeAddr)                   // address in virtual memory
337
-	a.dq(ElfCodeAddr)                   // address in physical memory
338
-	a.dq(ElfPrologSize + len(code))     // length within the file
339
-	a.dq(ElfPrologSize + len(code))     // length within memory
340
-	a.dq(4096)                          // segment alignment
342
+	ph.dd(elf.PT_LOAD).dd(elf.PF_R | elf.PF_X)
343
+	ph.dq(0)                            // Offset within the file
344
+	ph.dq(ElfCodeAddr)                  // Address in virtual memory
345
+	ph.dq(ElfCodeAddr)                  // Address in physical memory
346
+	ph.dq(codeEndOffset)                // Length within the file
347
+	ph.dq(codeEndOffset)                // Length within memory
348
+	ph.dq(4096)                         // Segment alignment
341 349
 
342 350
 	// Program header for the tape
343
-	a.dd(1).dd(6)                       // PT_LOAD, PF_R | PF_W
344
-	a.dq(0)                             // offset within the file
345
-	a.dq(ElfDataAddr)                   // address in virtual memory
346
-	a.dq(ElfDataAddr)                   // address in physical memory
347
-	a.dq(0)                             // length within the file
348
-	a.dq(1 << 20)                       // one megabyte of memory
349
-	a.dq(4096)                          // segment alignment
350
-
351
-	a.buf = append(a.buf, code...)
352
-	if err = ioutil.WriteFile(outputPath, a.buf, 0777); err != nil {
351
+	ph.dd(elf.PT_LOAD).dd(elf.PF_R | elf.PF_W)
352
+	ph.dq(0)                            // Offset within the file
353
+	ph.dq(ElfDataAddr)                  // Address in virtual memory
354
+	ph.dq(ElfDataAddr)                  // Address in physical memory
355
+	ph.dq(0)                            // Length within the file
356
+	ph.dq(1 << 20)                      // One megabyte of memory
357
+	ph.dq(4096)                         // Segment alignment
358
+
359
+	// Now that the rigid part has been generated, we can append sections
360
+	pieces := [][]byte{ph.buf, code}
361
+	position := codeEndOffset
362
+
363
+// - - Sections  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
364
+
365
+	sh := codegen{}
366
+	shCount := 0
367
+
368
+	// This section is created on the go as we need to name other sections
369
+	stringTable := codegen{}
370
+
371
+// - - Text  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
372
+
373
+	sh.dd(len(stringTable.buf))         // Index for the name of the section
374
+	stringTable.code(".text\x00")
375
+	sh.dd(elf.SHT_PROGBITS)
376
+	sh.dq(elf.SHF_ALLOC | elf.SHF_EXECINSTR)
377
+	sh.dq(ElfCodeAddr + codeOffset)     // Memory address
378
+	sh.dq(codeOffset)                   // Byte offset
379
+	sh.dq(len(code) - codeOffset)       // Byte size
380
+	sh.dd(0).dd(0)                      // No link, no info
381
+	sh.dq(0).dq(0)                      // No alignment, no entry size
382
+	shCount++
383
+
384
+// - - Debug line  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
385
+
386
+	const (
387
+		opcodeBase = 13  // Offset by DWARF4 standard opcodes
388
+		lineBase   = 0   // We don't need negative line indexes
389
+		lineRange  = 2   // Either we advance a line or not (we always do)
390
+	)
391
+
392
+	// FIXME: we use db() a lot instead of a proper un/signed LEB128 encoder;
393
+	//   that means that values > 127/63 or < 0 would break it;
394
+	//   see Appendix C to DWARF4.pdf for an algorithm
395
+
396
+	lineProgram := codegen{}
397
+	// Extended opcode DW_LNE_set_address to reset the PC to the start of code
398
+	lineProgram.db(0).db(1 + 8).db(2).dq(ElfCodeAddr + codeOffset)
399
+	if len(irb) > 0 {
400
+		lineProgram.db(opcodeBase + offsets[0] * lineRange)
401
+	}
402
+	// The epilog, which is at the very end of the offset array, is included
403
+	for i := 1; i <= len(irb); i++ {
404
+		size := offsets[i] - offsets[i - 1]
405
+		lineProgram.db(opcodeBase + (1 - lineBase) + size * lineRange)
406
+	}
407
+	// Extended opcode DW_LNE_end_sequence is mandatory at the end
408
+	lineProgram.db(0).db(1).db(1)
409
+
410
+	lineHeader := codegen{}
411
+	lineHeader.db(1)                    // Minimum instruction length
412
+	lineHeader.db(1)                    // Maximum operations per instruction
413
+	lineHeader.db(1)                    // default_is_stmt
414
+	lineHeader.db(lineBase)
415
+	lineHeader.db(lineRange)
416
+
417
+	lineHeader.db(opcodeBase)
418
+	// Number of operands for all standard opcodes (1..opcodeBase-1)
419
+	opcodeLengths := []byte{0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1}
420
+	lineHeader.buf = append(lineHeader.buf, opcodeLengths...)
421
+
422
+	// include_directories []string \x00
423
+	lineHeader.db(0)
424
+	// file_names []struct{base string; dir u8; modified u8; length u8} \x00
425
+	lineHeader.code("ir-dump.txt\x00").db(0).db(0).db(0).db(0)
426
+
427
+	lineEntry := codegen{}
428
+	lineEntry.dw(4)                     // .debug_line version number
429
+	lineEntry.dd(len(lineHeader.buf))
430
+	lineEntry.buf = append(lineEntry.buf, lineHeader.buf...)
431
+	lineEntry.buf = append(lineEntry.buf, lineProgram.buf...)
432
+
433
+	debugLine := codegen{}
434
+	debugLine.dd(len(lineEntry.buf))
435
+	debugLine.buf = append(debugLine.buf, lineEntry.buf...)
436
+
437
+	sh.dd(len(stringTable.buf))         // Index for the name of the section
438
+	stringTable.code(".debug_line\x00")
439
+	sh.dd(elf.SHT_PROGBITS).dq(0).dq(0) // Type, no flags, no memory address
440
+	sh.dq(position)                     // Byte offset
441
+	sh.dq(len(debugLine.buf))           // Byte size
442
+	sh.dd(0).dd(0)                      // No link, no info
443
+	sh.dq(0).dq(0)                      // No alignment, no entry size
444
+	shCount++
445
+
446
+	pieces = append(pieces, debugLine.buf)
447
+	position += len(debugLine.buf)
448
+
449
+// - - Debug abbreviations - - - - - - - - - - - - - - - - - - - - - - - - - - -
450
+
451
+	const (
452
+		formAddr      = 0x01            // Pointer size
453
+		formSecOffset = 0x17            // DWARF size
454
+	)
455
+
456
+	debugAbbrev := codegen{}
457
+	debugAbbrev.db(1)                   // Our abbreviation code
458
+	debugAbbrev.db(dwarf.TagCompileUnit)
459
+	debugAbbrev.db(0)                   // DW_CHILDREN_no
460
+	debugAbbrev.db(dwarf.AttrLowpc).db(formAddr)
461
+	debugAbbrev.db(dwarf.AttrHighpc).db(formAddr)
462
+	debugAbbrev.db(dwarf.AttrStmtList).db(formSecOffset)
463
+	debugAbbrev.db(0).db(0)             // End of attributes
464
+	debugAbbrev.db(0)                   // End of abbreviations
465
+
466
+	sh.dd(len(stringTable.buf))         // Index for the name of the section
467
+	stringTable.code(".debug_abbrev\x00")
468
+	sh.dd(elf.SHT_PROGBITS).dq(0).dq(0) // Type, no flags, no memory address
469
+	sh.dq(position)                     // Byte offset
470
+	sh.dq(len(debugAbbrev.buf))         // Byte size
471
+	sh.dd(0).dd(0)                      // No link, no info
472
+	sh.dq(0).dq(0)                      // No alignment, no entry size
473
+	shCount++
474
+
475
+	pieces = append(pieces, debugAbbrev.buf)
476
+	position += len(debugAbbrev.buf)
477
+
478
+// - - Debug info  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
479
+
480
+	cuEntry := codegen{}
481
+	cuEntry.dw(4)                       // .debug_info version number
482
+	cuEntry.dd(0)                       // Offset into .debug_abbrev
483
+	cuEntry.db(8)                       // Pointer size
484
+
485
+	// Single compile unit as per .debug_abbrev
486
+	cuEntry.db(1)
487
+	cuEntry.dq(ElfCodeAddr + codeOffset)
488
+	cuEntry.dq(ElfCodeAddr + codeEndOffset)
489
+	cuEntry.dd(0)
490
+
491
+	debugInfo := codegen{}
492
+	debugInfo.dd(len(cuEntry.buf))
493
+	debugInfo.buf = append(debugInfo.buf, cuEntry.buf...)
494
+
495
+	sh.dd(len(stringTable.buf))         // Index for the name of the section
496
+	stringTable.code(".debug_info\x00")
497
+	sh.dd(elf.SHT_PROGBITS).dq(0).dq(0) // Type, no flags, no memory address
498
+	sh.dq(position)                     // Byte offset
499
+	sh.dq(len(debugInfo.buf))           // Byte size
500
+	sh.dd(0).dd(0)                      // No link, no info
501
+	sh.dq(0).dq(0)                      // No alignment, no entry size
502
+	shCount++
503
+
504
+	pieces = append(pieces, debugInfo.buf)
505
+	position += len(debugInfo.buf)
506
+
507
+// - - Section names and section table - - - - - - - - - - - - - - - - - - - - -
508
+
509
+	sh.dd(len(stringTable.buf))         // Index for the name of the section
510
+	stringTable.code(".shstrtab\x00")
511
+	sh.dd(elf.SHT_STRTAB).dq(0).dq(0)   // Type, no flags, no memory address
512
+	sh.dq(position)                     // Byte offset
513
+	sh.dq(len(stringTable.buf))         // Byte size
514
+	sh.dd(0).dd(0)                      // No link, no info
515
+	sh.dq(0).dq(0)                      // No alignment, no entry size
516
+	shCount++
517
+
518
+	pieces = append(pieces, stringTable.buf)
519
+	position += len(stringTable.buf)
520
+
521
+	pieces = append(pieces, sh.buf)
522
+	// Don't increment the position, we want to know where section headers start
523
+
524
+// - - Final assembly of parts - - - - - - - - - - - - - - - - - - - - - - - - -
525
+
526
+	bin := codegen{}
527
+
528
+	// ELF header
529
+	bin.code("\x7FELF\x02\x01\x01")     // ELF, 64-bit, little endian, v1
530
+	// Unix System V ABI, v0, padding
531
+	bin.code("\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00")
532
+	bin.dw(elf.ET_EXEC).dw(elf.EM_X86_64).dd(elf.EV_CURRENT)
533
+	bin.dq(ElfCodeAddr + codeOffset)    // Entry point address
534
+	bin.dq(ElfHeaderSize)               // Program header offset
535
+	bin.dq(position)                    // Section header offset
536
+	bin.dd(0)                           // No processor-specific flags
537
+	bin.dw(ElfHeaderSize)               // ELF header size
538
+	bin.dw(ElfProgramEntrySize)         // Program header table entry size
539
+	bin.dw(phCount)                     // Program header table entry count
540
+	bin.dw(ElfSectionEntrySize)         // Section header table entry size
541
+	bin.dw(shCount)                     // Section header table entry count
542
+	bin.dw(shCount - 1)                 // Section index for strings
543
+
544
+	for _, x := range pieces {
545
+		bin.buf = append(bin.buf, x...)
546
+	}
547
+	if err = ioutil.WriteFile(outputPath, bin.buf, 0777); err != nil {
353 548
 		log.Fatalf("%s", err)
354 549
 	}
355 550
 }

Loading…
Cancel
Save