gdb-experiment.go: add DWARF debugging information
Now we can debug compiled Brainfuck!
This commit is contained in:
		
							parent
							
								
									4efc032827
								
							
						
					
					
						commit
						def3218714
					
				
							
								
								
									
										3
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @ -1,6 +1,9 @@ | ||||
| a.out | ||||
| gdb-experiment | ||||
| bfc-amd64-* | ||||
| 
 | ||||
| ir-dump.txt | ||||
| 
 | ||||
| bfc.creator* | ||||
| bfc.config | ||||
| bfc.files | ||||
|  | ||||
							
								
								
									
										12
									
								
								README.adoc
									
									
									
									
									
								
							
							
						
						
									
										12
									
								
								README.adoc
									
									
									
									
									
								
							| @ -35,6 +35,18 @@ When no input file is specified, standard input is used.  Similarly, the default | ||||
| output filename is `a.out`.  After the compilation, the resulting file can be | ||||
| run on the target platform. | ||||
| 
 | ||||
| gdb | ||||
| --- | ||||
| You may have noticed the `gdb-experiment.go` file.  It is a non-optimizing | ||||
| version of the compiler targeting Linux only that adds DWARF debugging | ||||
| information mapping code locations onto lines in the `ir-dump.txt` byproduct | ||||
| output file.  It's been rewritten in Go since managing all those binary buffers | ||||
| required to build the symbol table proved to be too painful in C. | ||||
| 
 | ||||
|  $ go run gdb-experiment.go [INPUT-FILE] [OUTPUT-FILE] | ||||
| 
 | ||||
| Use `break *0x4000b7` to get a breakpoint at the first Brainfuck instruction. | ||||
| 
 | ||||
| Contributing and Support | ||||
| ------------------------ | ||||
| Use this project's GitHub to report any bugs, request features, or submit pull | ||||
|  | ||||
| @ -1,14 +1,21 @@ | ||||
| // Non-optimizing Brainfuck compiler generating binaries for Linux on x86-64; | ||||
| // gofmt has been tried, with disappointing results | ||||
| // Non-optimizing Brainfuck compiler generating binaries for Linux on x86-64 | ||||
| // with debugging information mapping instructions onto an IR dump. | ||||
| // gofmt has been tried, with disappointing results. | ||||
| // codegen{} is also pretty ugly in the way it works but damn convenient. | ||||
| package main | ||||
| 
 | ||||
| import ( | ||||
| 	"encoding/binary" | ||||
| 	"errors" | ||||
| 	"fmt" | ||||
| 	"io/ioutil" | ||||
| 	"log" | ||||
| 	"os" | ||||
| 	"strconv" | ||||
| 
 | ||||
| 	// Let's not repeat all those constants here onstants | ||||
| 	"debug/dwarf" | ||||
| 	"debug/elf" | ||||
| ) | ||||
| 
 | ||||
| const ( RIGHT = iota; LEFT; INC; DEC; IN; OUT; BEGIN; END ) | ||||
| @ -124,16 +131,15 @@ func le(unknown interface{}) []byte { | ||||
| 	// Trying hard to avoid reflect.Value.Int/Uint | ||||
| 	formatted := fmt.Sprintf("%d", unknown) | ||||
| 
 | ||||
| 	var v uint64 | ||||
| 	b := make([]byte, 8) | ||||
| 	if unsigned, err := strconv.ParseUint(formatted, 10, 64); err == nil { | ||||
| 		v = unsigned | ||||
| 		binary.LittleEndian.PutUint64(b, unsigned) | ||||
| 	} else if signed, err := strconv.ParseInt(formatted, 10, 64); err == nil { | ||||
| 		v = uint64(signed) | ||||
| 		binary.LittleEndian.PutUint64(b, uint64(signed)) | ||||
| 	} else { | ||||
| 		panic("cannot convert to number") | ||||
| 	} | ||||
| 	return []byte{byte(v), byte(v >> 8), byte(v >> 16), byte(v >> 24), | ||||
| 		byte(v >> 32), byte(v >> 40), byte(v >> 48), byte(v >> 56)} | ||||
| 	return b | ||||
| } | ||||
| 
 | ||||
| func (a *codegen) append(v []byte)           { a.buf = append(a.buf, v...) } | ||||
| @ -154,8 +160,8 @@ const ( | ||||
| 	SYS_EXIT  = 60 | ||||
| ) | ||||
| 
 | ||||
| func codegenAmd64(irb []instruction) []byte { | ||||
| 	offsets := make([]int, len(irb)+1) | ||||
| func codegenAmd64(irb []instruction) (code []byte, offsets []int) { | ||||
| 	offsets = make([]int, len(irb)+1) | ||||
| 	a := codegen{} | ||||
| 
 | ||||
| 	a.code("\xB8").dd(ElfDataAddr)                // mov rax, "ElfCodeAddr" | ||||
| @ -268,7 +274,7 @@ func codegenAmd64(irb []instruction) []byte { | ||||
| 		} | ||||
| 		copy(a.buf[fixup:], le(target - fixup - 4)[:4]) | ||||
| 	} | ||||
| 	return a.buf | ||||
| 	return a.buf, offsets | ||||
| } | ||||
| 
 | ||||
| // --- Main -------------------------------------------------------------------- | ||||
| @ -301,55 +307,244 @@ func main() { | ||||
| 	// ... various optimizations could be performed here if we give up brevity | ||||
| 	pairLoops(irb) | ||||
| 	dump("ir-dump.txt", irb) | ||||
| 	code, offsets := codegenAmd64(irb) | ||||
| 
 | ||||
| 	code := codegenAmd64(irb) | ||||
| 	a := codegen{} | ||||
| // - - ELF generation  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||||
| 
 | ||||
| 	// TODO: also use the constants in package "debug/elf" | ||||
| 	// Now that we know how long the machine code is, we can write the header. | ||||
| 	// Note that for PIE we would need to depend on the dynamic linker, so no. | ||||
| 	// | ||||
| 	// Recommended reading: | ||||
| 	//   http://www.muppetlabs.com/~breadbox/software/tiny/teensy.html | ||||
| 	//   man 5 elf | ||||
| 	// | ||||
| 	// In case of unexpected gdb problems, also see: | ||||
| 	//   DWARF4.pdf | ||||
| 	//   https://sourceware.org/elfutils/DwarfLint | ||||
| 	//   http://wiki.osdev.org/DWARF | ||||
| 
 | ||||
| 	const ( | ||||
| 		ElfHeaderSize       = 64        // size of the ELF header | ||||
| 		ElfProgramEntrySize = 56        // size of a program header | ||||
| 		ElfSectionEntrySize = 64        // size of a section header | ||||
| 		ElfPrologSize       = ElfHeaderSize + 2*ElfProgramEntrySize | ||||
| 		ElfHeaderSize       = 64        // Size of the ELF header | ||||
| 		ElfProgramEntrySize = 56        // Size of a program header | ||||
| 		ElfSectionEntrySize = 64        // Size of a section header | ||||
| 	) | ||||
| 
 | ||||
| 	// ELF header | ||||
| 	a.code("\x7FELF\x02\x01\x01")       // ELF, 64-bit, little endian, v1 | ||||
| 	// Unix System V ABI, v0, padding | ||||
| 	a.code("\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00") | ||||
| 	a.dw(2).dw(62).dd(1)                // executable, x86-64, v1 | ||||
| 	a.dq(ElfCodeAddr + ElfPrologSize)   // entry point address | ||||
| // - - Program headers - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||||
| 
 | ||||
| 	// We only append section headers with debugging info with DEBUG | ||||
| 	a.dq(ElfHeaderSize).dq(0)           // program, section header offset | ||||
| 	a.dd(0)                             // no processor-specific flags | ||||
| 	a.dw(ElfHeaderSize)                 // ELF header size | ||||
| 	a.dw(ElfProgramEntrySize).dw(2)     // program hdr tbl entry size, count | ||||
| 	a.dw(ElfSectionEntrySize).dw(0)     // section hdr tbl entry size, count | ||||
| 	a.dw(0)                             // no section index for strings | ||||
| 	ph := codegen{} | ||||
| 	phCount := 2 | ||||
| 
 | ||||
| 	codeOffset := ElfHeaderSize + phCount*ElfProgramEntrySize | ||||
| 	codeEndOffset := codeOffset + len(code) | ||||
| 
 | ||||
| 	// Program header for code | ||||
| 	// The entry point address seems to require alignment, so map start of file | ||||
| 	a.dd(1).dd(5)                       // PT_LOAD, PF_R | PF_X | ||||
| 	a.dq(0)                             // offset within the file | ||||
| 	a.dq(ElfCodeAddr)                   // address in virtual memory | ||||
| 	a.dq(ElfCodeAddr)                   // address in physical memory | ||||
| 	a.dq(ElfPrologSize + len(code))     // length within the file | ||||
| 	a.dq(ElfPrologSize + len(code))     // length within memory | ||||
| 	a.dq(4096)                          // segment alignment | ||||
| 	ph.dd(elf.PT_LOAD).dd(elf.PF_R | elf.PF_X) | ||||
| 	ph.dq(0)                            // Offset within the file | ||||
| 	ph.dq(ElfCodeAddr)                  // Address in virtual memory | ||||
| 	ph.dq(ElfCodeAddr)                  // Address in physical memory | ||||
| 	ph.dq(codeEndOffset)                // Length within the file | ||||
| 	ph.dq(codeEndOffset)                // Length within memory | ||||
| 	ph.dq(4096)                         // Segment alignment | ||||
| 
 | ||||
| 	// Program header for the tape | ||||
| 	a.dd(1).dd(6)                       // PT_LOAD, PF_R | PF_W | ||||
| 	a.dq(0)                             // offset within the file | ||||
| 	a.dq(ElfDataAddr)                   // address in virtual memory | ||||
| 	a.dq(ElfDataAddr)                   // address in physical memory | ||||
| 	a.dq(0)                             // length within the file | ||||
| 	a.dq(1 << 20)                       // one megabyte of memory | ||||
| 	a.dq(4096)                          // segment alignment | ||||
| 	ph.dd(elf.PT_LOAD).dd(elf.PF_R | elf.PF_W) | ||||
| 	ph.dq(0)                            // Offset within the file | ||||
| 	ph.dq(ElfDataAddr)                  // Address in virtual memory | ||||
| 	ph.dq(ElfDataAddr)                  // Address in physical memory | ||||
| 	ph.dq(0)                            // Length within the file | ||||
| 	ph.dq(1 << 20)                      // One megabyte of memory | ||||
| 	ph.dq(4096)                         // Segment alignment | ||||
| 
 | ||||
| 	a.buf = append(a.buf, code...) | ||||
| 	if err = ioutil.WriteFile(outputPath, a.buf, 0777); err != nil { | ||||
| 	// Now that the rigid part has been generated, we can append sections | ||||
| 	pieces := [][]byte{ph.buf, code} | ||||
| 	position := codeEndOffset | ||||
| 
 | ||||
| // - - Sections  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||||
| 
 | ||||
| 	sh := codegen{} | ||||
| 	shCount := 0 | ||||
| 
 | ||||
| 	// This section is created on the go as we need to name other sections | ||||
| 	stringTable := codegen{} | ||||
| 
 | ||||
| // - - Text  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||||
| 
 | ||||
| 	sh.dd(len(stringTable.buf))         // Index for the name of the section | ||||
| 	stringTable.code(".text\x00") | ||||
| 	sh.dd(elf.SHT_PROGBITS) | ||||
| 	sh.dq(elf.SHF_ALLOC | elf.SHF_EXECINSTR) | ||||
| 	sh.dq(ElfCodeAddr + codeOffset)     // Memory address | ||||
| 	sh.dq(codeOffset)                   // Byte offset | ||||
| 	sh.dq(len(code) - codeOffset)       // Byte size | ||||
| 	sh.dd(0).dd(0)                      // No link, no info | ||||
| 	sh.dq(0).dq(0)                      // No alignment, no entry size | ||||
| 	shCount++ | ||||
| 
 | ||||
| // - - Debug line  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||||
| 
 | ||||
| 	const ( | ||||
| 		opcodeBase = 13  // Offset by DWARF4 standard opcodes | ||||
| 		lineBase   = 0   // We don't need negative line indexes | ||||
| 		lineRange  = 2   // Either we advance a line or not (we always do) | ||||
| 	) | ||||
| 
 | ||||
| 	// FIXME: we use db() a lot instead of a proper un/signed LEB128 encoder; | ||||
| 	//   that means that values > 127/63 or < 0 would break it; | ||||
| 	//   see Appendix C to DWARF4.pdf for an algorithm | ||||
| 
 | ||||
| 	lineProgram := codegen{} | ||||
| 	// Extended opcode DW_LNE_set_address to reset the PC to the start of code | ||||
| 	lineProgram.db(0).db(1 + 8).db(2).dq(ElfCodeAddr + codeOffset) | ||||
| 	if len(irb) > 0 { | ||||
| 		lineProgram.db(opcodeBase + offsets[0] * lineRange) | ||||
| 	} | ||||
| 	// The epilog, which is at the very end of the offset array, is included | ||||
| 	for i := 1; i <= len(irb); i++ { | ||||
| 		size := offsets[i] - offsets[i - 1] | ||||
| 		lineProgram.db(opcodeBase + (1 - lineBase) + size * lineRange) | ||||
| 	} | ||||
| 	// Extended opcode DW_LNE_end_sequence is mandatory at the end | ||||
| 	lineProgram.db(0).db(1).db(1) | ||||
| 
 | ||||
| 	lineHeader := codegen{} | ||||
| 	lineHeader.db(1)                    // Minimum instruction length | ||||
| 	lineHeader.db(1)                    // Maximum operations per instruction | ||||
| 	lineHeader.db(1)                    // default_is_stmt | ||||
| 	lineHeader.db(lineBase) | ||||
| 	lineHeader.db(lineRange) | ||||
| 
 | ||||
| 	lineHeader.db(opcodeBase) | ||||
| 	// Number of operands for all standard opcodes (1..opcodeBase-1) | ||||
| 	opcodeLengths := []byte{0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1} | ||||
| 	lineHeader.buf = append(lineHeader.buf, opcodeLengths...) | ||||
| 
 | ||||
| 	// include_directories []string \x00 | ||||
| 	lineHeader.db(0) | ||||
| 	// file_names []struct{base string; dir u8; modified u8; length u8} \x00 | ||||
| 	lineHeader.code("ir-dump.txt\x00").db(0).db(0).db(0).db(0) | ||||
| 
 | ||||
| 	lineEntry := codegen{} | ||||
| 	lineEntry.dw(4)                     // .debug_line version number | ||||
| 	lineEntry.dd(len(lineHeader.buf)) | ||||
| 	lineEntry.buf = append(lineEntry.buf, lineHeader.buf...) | ||||
| 	lineEntry.buf = append(lineEntry.buf, lineProgram.buf...) | ||||
| 
 | ||||
| 	debugLine := codegen{} | ||||
| 	debugLine.dd(len(lineEntry.buf)) | ||||
| 	debugLine.buf = append(debugLine.buf, lineEntry.buf...) | ||||
| 
 | ||||
| 	sh.dd(len(stringTable.buf))         // Index for the name of the section | ||||
| 	stringTable.code(".debug_line\x00") | ||||
| 	sh.dd(elf.SHT_PROGBITS).dq(0).dq(0) // Type, no flags, no memory address | ||||
| 	sh.dq(position)                     // Byte offset | ||||
| 	sh.dq(len(debugLine.buf))           // Byte size | ||||
| 	sh.dd(0).dd(0)                      // No link, no info | ||||
| 	sh.dq(0).dq(0)                      // No alignment, no entry size | ||||
| 	shCount++ | ||||
| 
 | ||||
| 	pieces = append(pieces, debugLine.buf) | ||||
| 	position += len(debugLine.buf) | ||||
| 
 | ||||
| // - - Debug abbreviations - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||||
| 
 | ||||
| 	const ( | ||||
| 		formAddr      = 0x01            // Pointer size | ||||
| 		formSecOffset = 0x17            // DWARF size | ||||
| 	) | ||||
| 
 | ||||
| 	debugAbbrev := codegen{} | ||||
| 	debugAbbrev.db(1)                   // Our abbreviation code | ||||
| 	debugAbbrev.db(dwarf.TagCompileUnit) | ||||
| 	debugAbbrev.db(0)                   // DW_CHILDREN_no | ||||
| 	debugAbbrev.db(dwarf.AttrLowpc).db(formAddr) | ||||
| 	debugAbbrev.db(dwarf.AttrHighpc).db(formAddr) | ||||
| 	debugAbbrev.db(dwarf.AttrStmtList).db(formSecOffset) | ||||
| 	debugAbbrev.db(0).db(0)             // End of attributes | ||||
| 	debugAbbrev.db(0)                   // End of abbreviations | ||||
| 
 | ||||
| 	sh.dd(len(stringTable.buf))         // Index for the name of the section | ||||
| 	stringTable.code(".debug_abbrev\x00") | ||||
| 	sh.dd(elf.SHT_PROGBITS).dq(0).dq(0) // Type, no flags, no memory address | ||||
| 	sh.dq(position)                     // Byte offset | ||||
| 	sh.dq(len(debugAbbrev.buf))         // Byte size | ||||
| 	sh.dd(0).dd(0)                      // No link, no info | ||||
| 	sh.dq(0).dq(0)                      // No alignment, no entry size | ||||
| 	shCount++ | ||||
| 
 | ||||
| 	pieces = append(pieces, debugAbbrev.buf) | ||||
| 	position += len(debugAbbrev.buf) | ||||
| 
 | ||||
| // - - Debug info  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||||
| 
 | ||||
| 	cuEntry := codegen{} | ||||
| 	cuEntry.dw(4)                       // .debug_info version number | ||||
| 	cuEntry.dd(0)                       // Offset into .debug_abbrev | ||||
| 	cuEntry.db(8)                       // Pointer size | ||||
| 
 | ||||
| 	// Single compile unit as per .debug_abbrev | ||||
| 	cuEntry.db(1) | ||||
| 	cuEntry.dq(ElfCodeAddr + codeOffset) | ||||
| 	cuEntry.dq(ElfCodeAddr + codeEndOffset) | ||||
| 	cuEntry.dd(0) | ||||
| 
 | ||||
| 	debugInfo := codegen{} | ||||
| 	debugInfo.dd(len(cuEntry.buf)) | ||||
| 	debugInfo.buf = append(debugInfo.buf, cuEntry.buf...) | ||||
| 
 | ||||
| 	sh.dd(len(stringTable.buf))         // Index for the name of the section | ||||
| 	stringTable.code(".debug_info\x00") | ||||
| 	sh.dd(elf.SHT_PROGBITS).dq(0).dq(0) // Type, no flags, no memory address | ||||
| 	sh.dq(position)                     // Byte offset | ||||
| 	sh.dq(len(debugInfo.buf))           // Byte size | ||||
| 	sh.dd(0).dd(0)                      // No link, no info | ||||
| 	sh.dq(0).dq(0)                      // No alignment, no entry size | ||||
| 	shCount++ | ||||
| 
 | ||||
| 	pieces = append(pieces, debugInfo.buf) | ||||
| 	position += len(debugInfo.buf) | ||||
| 
 | ||||
| // - - Section names and section table - - - - - - - - - - - - - - - - - - - - - | ||||
| 
 | ||||
| 	sh.dd(len(stringTable.buf))         // Index for the name of the section | ||||
| 	stringTable.code(".shstrtab\x00") | ||||
| 	sh.dd(elf.SHT_STRTAB).dq(0).dq(0)   // Type, no flags, no memory address | ||||
| 	sh.dq(position)                     // Byte offset | ||||
| 	sh.dq(len(stringTable.buf))         // Byte size | ||||
| 	sh.dd(0).dd(0)                      // No link, no info | ||||
| 	sh.dq(0).dq(0)                      // No alignment, no entry size | ||||
| 	shCount++ | ||||
| 
 | ||||
| 	pieces = append(pieces, stringTable.buf) | ||||
| 	position += len(stringTable.buf) | ||||
| 
 | ||||
| 	pieces = append(pieces, sh.buf) | ||||
| 	// Don't increment the position, we want to know where section headers start | ||||
| 
 | ||||
| // - - Final assembly of parts - - - - - - - - - - - - - - - - - - - - - - - - - | ||||
| 
 | ||||
| 	bin := codegen{} | ||||
| 
 | ||||
| 	// ELF header | ||||
| 	bin.code("\x7FELF\x02\x01\x01")     // ELF, 64-bit, little endian, v1 | ||||
| 	// Unix System V ABI, v0, padding | ||||
| 	bin.code("\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00") | ||||
| 	bin.dw(elf.ET_EXEC).dw(elf.EM_X86_64).dd(elf.EV_CURRENT) | ||||
| 	bin.dq(ElfCodeAddr + codeOffset)    // Entry point address | ||||
| 	bin.dq(ElfHeaderSize)               // Program header offset | ||||
| 	bin.dq(position)                    // Section header offset | ||||
| 	bin.dd(0)                           // No processor-specific flags | ||||
| 	bin.dw(ElfHeaderSize)               // ELF header size | ||||
| 	bin.dw(ElfProgramEntrySize)         // Program header table entry size | ||||
| 	bin.dw(phCount)                     // Program header table entry count | ||||
| 	bin.dw(ElfSectionEntrySize)         // Section header table entry size | ||||
| 	bin.dw(shCount)                     // Section header table entry count | ||||
| 	bin.dw(shCount - 1)                 // Section index for strings | ||||
| 
 | ||||
| 	for _, x := range pieces { | ||||
| 		bin.buf = append(bin.buf, x...) | ||||
| 	} | ||||
| 	if err = ioutil.WriteFile(outputPath, bin.buf, 0777); err != nil { | ||||
| 		log.Fatalf("%s", err) | ||||
| 	} | ||||
| } | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user