#!/usr/bin/env python3 from capstone import * from capstone.ppc import * from elftools.elf.elffile import * from elftools.elf.sections import * import sys # addr -> name labels = {} # fileOffset -> {addr, type} relocations = {} # index -> {offset, flags, length, is_bss, name} sectionInfo = [] R_PPC_NONE = 0 R_PPC_ADDR32 = 1 R_PPC_ADDR24 = 2 R_PPC_ADDR16_LO = 4 R_PPC_ADDR16_HI = 5 R_PPC_ADDR16_HA = 6 R_PPC_REL24 = 10 R_PPC_REL14 = 11 R_DOLPHIN_SECTION = 202 R_DOLPHIN_END = 203 relocationTypeNames = { R_PPC_NONE: 'R_PPC_NONE', R_PPC_ADDR32: 'R_PPC_ADDR32', R_PPC_ADDR24: 'R_PPC_ADDR24', R_PPC_ADDR16_LO: 'R_PPC_ADDR16_LO', R_PPC_ADDR16_HI: 'R_PPC_ADDR16_HI', R_PPC_ADDR16_HA: 'R_PPC_ADDR16_HA', R_PPC_REL24: 'R_PPC_REL24', R_PPC_REL14: 'R_PPC_REL14', R_DOLPHIN_SECTION: 'R_DOLPHIN_SECTION', R_DOLPHIN_END: 'R_DOLPHIN_END' } def read_u8(offset): return filecontent[offset] def read_u16(offset): return (filecontent[offset + 0] << 8) | filecontent[offset + 1] def read_u32(offset): return (filecontent[offset + 0] << 24) | (filecontent[offset + 1] << 16) | (filecontent[offset + 2] << 8) | filecontent[offset + 3] def add_label(addr, name=None): if addr in labels: return labels[addr] if name == None: name = 'lbl_%08X' % addr labels[addr] = name return name with open(sys.argv[1], 'rb') as file: filecontent = bytearray(file.read()) if len(sys.argv) >= 3: # Why is this so slow? with open(sys.argv[2], 'rb') as f: elf = ELFFile(f) elfsymtab = elf.get_section_by_name('.symtab') for i in range(0, elfsymtab.num_symbols()): sym = elfsymtab.get_symbol(i) if len(sym.name) > 0 and not sym.name[0] in {'.', '@'}: add_label(sym['st_value'], sym.name) id = read_u32(0) numSections = read_u32(0x0C) sectionInfoOffset = read_u32(0x10) nameOffset = read_u32(0x14) nameSize = read_u32(0x18) version = read_u32(0x1C) bssSize = read_u32(0x20) relOffset = read_u32(0x24) impOffset = read_u32(0x28) impSize = read_u32(0x2C) prologSection = read_u8(0x30) epilogSection = read_u8(0x31) unresolvedSection = read_u8(0x32) prolog = read_u32(0x34) epilog = read_u32(0x38) unresolved = read_u32(0x3C) print("# id: %i" % id) print("# version: %i" % version) print("# nameoffset: 0x%X, size: 0x%X" % (nameOffset, nameSize)) print("# section table: 0x%X, size: 0x%X" % (sectionInfoOffset, numSections*8)) print("# imp table: 0x%X" % impOffset) print("# relocs offset: 0x%X" % relOffset) print("# _prolog: %i:0x%X" % (prologSection, prolog)) print("# _epilog: %i:0x%X" % (epilogSection, epilog)) print("# _unresolved: %i:0x%X" % (unresolvedSection, unresolved)) print("# num sections: %i" % numSections) print('.include "macros.inc"') #print("%i sections:" % numSections) # Read sections for i in range(0, numSections): o = sectionInfoOffset + i * 8 section = { 'offset': read_u32(o + 0) & ~3, 'flags': read_u32(o + 0) & 3, 'length': read_u32(o + 4) } if section['offset'] == 0 and section['length'] > 0: section['is_bss'] = True else: section['is_bss'] = False # Hack: if bss, then set file offset to something unique as to not # clash with other symbols if section['is_bss']: section['offset'] = 0x10000000 # Determine name if section['is_bss']: section['name'] = '.bss%i' % i elif section['flags'] & 1: section['name'] = '.text%i' % i else: section['name'] = '.data%i' % i sectionInfo.append(section) print("# offset: 0x%08X\tlength: 0x%08X\tflags: %i" % (section['offset'], section['length'], section['flags'])) sectionInfo[1]['name'] = '.text' sectionInfo[2]['name'] = '.ctors' sectionInfo[3]['name'] = '.dtors' sectionInfo[4]['name'] = '.rodata' sectionInfo[5]['name'] = '.data' sectionInfo[6]['name'] = '.bss' # Add labels for prologue and epilogue if prologSection != 0: labels[sectionInfo[prologSection]['offset'] + prolog] = '_prolog' if epilogSection != 0: labels[sectionInfo[epilogSection]['offset'] + epilog] = '_epilog' if unresolvedSection != 0: labels[sectionInfo[unresolvedSection]['offset'] + unresolved] = '_unresolved' def read_relocation_info(module, o): currSection = None missingSymbols = False while True: offset = read_u16(o + 0) type = read_u8(o + 2) section = read_u8(o + 3) addend = read_u32(o + 4) # Get address of symbol and add label symAddr = 0 if type == R_DOLPHIN_SECTION: # R_DOLPHIN_SECTION currSection = sectionInfo[section] relocOffset = currSection['offset'] if type < 200: if module == 0: # dol symAddr = addend if symAddr not in labels: print('error: symbol for 0x%08X not found' % symAddr) missingSymbols = True else: # rel symAddr = sectionInfo[section]['offset'] + addend labels[symAddr] = 'lbl_%08X' % symAddr # Get file offset for relocation relocOffset += offset if type < 200: reloc = { 'addr': symAddr, 'type': type, } relocations[relocOffset] = reloc #print(" offset: 0x%04X(+0x%X)\ttype: %s\tsection: %i\tsym_addr: 0x%08X" % (relocOffset, offset, relocationTypeNames[type], section, symAddr)) #print(" offset: 0x%04X(+0x%X)\ttype: %s\tsection: %i\tsym_addr: ?" % (relocOffset, offset, relocationTypeNames[type], section)) if type == R_DOLPHIN_END: break o += 8 if missingSymbols: exit(1) numImpEntries = impSize / 8 #print("%i imports" % numImpEntries) for i in range(0, int(numImpEntries)): o = impOffset + i * 8 module = read_u32(o + 0) relocation = read_u32(o + 4) #print("module: %i, offset: 0x%08X" % (module, relocation)) read_relocation_info(module, relocation) cs = Cs(CS_ARCH_PPC, CS_MODE_32 | CS_MODE_BIG_ENDIAN) cs.detail = True cs.imm_unsigned = False def get_relocation_for_offset(o): for i in range(o, o + 4): if i in relocations: return relocations[i] return None def get_label(addr): if addr in labels: return labels[addr] return '0x%08X' % addr def print_label(label): if label in ['_prolog', '_epilog', '_unresolved']: label = '.global %s\n%s' % (label, label) print('%s:' % label) def sign_extend_16(value): if value > 0 and (value & 0x8000): value -= 0x10000 return value def disasm_fcmp(inst): crd = (inst & 0x03800000) >> 23 a = (inst & 0x001f0000) >> 16 b = (inst & 0x0000f800) >> 11 return 'fcmpo cr%i, f%i, f%i' % (crd, a, b) def disasm_mspr(inst, mode): if (inst & 1): return None d = (inst & 0x03e00000) >> 21 a = (inst & 0x001f0000) >> 16 b = (inst & 0x0000f800) >>11 spr = (b << 5) + a if mode: return 'mtspr 0x%X, r%i' % (spr, d) else: return 'mfspr r%i, 0x%X' % (d, spr) def disasm_mcrxr(inst): if (inst & 0x007ff801): return None crd = (inst & 0x03800000) >> 23 return 'mcrxr cr%i' % crd def disassemble_insn_that_capstone_cant_handle(o, reloc): if reloc: relocComment = '\t;# %s:%s' % (get_label(reloc['addr']), relocationTypeNames[reloc['type']]) else: relocComment = '' raw = read_u32(o) asm = None idx = (raw & 0xfc000000) >> 26 idx2 = (raw & 0x000007fe) >> 1 # mtspr if idx == 31 and idx2 == 467: asm = disasm_mspr(raw, 1) # mfspr elif idx == 31 and idx2 == 339: asm = disasm_mspr(raw, 0) # mcrxr elif idx == 31 and idx2 == 512: asm = disasm_mcrxr(raw) # fcmpo elif idx == 63 and idx2 == 32: asm = disasm_fcmp(raw) # Paired singles elif idx == 4: asm = disasm_ps(raw) elif idx in {56, 57, 60, 61}: asm = disasm_ps_mem(raw, idx) if asm: return asm return '.4byte 0x%08X ;# (error: unknown instruction) %s' % (read_u32(o), relocComment) def disassemble_insn(o, reloc): if reloc: relocComment = '\t;# %s:%s' % (get_label(reloc['addr']), relocationTypeNames[reloc['type']]) else: relocComment = '' try: insn = next(cs.disasm(filecontent[o : o+4], o)) except StopIteration: return disassemble_insn_that_capstone_cant_handle(o, reloc) if reloc: relocType = reloc['type'] else: relocType = -1 # handle relocs label if insn.id in {PPC_INS_BL, PPC_INS_BC} and relocType in {R_PPC_REL24, R_PPC_REL14}: return '%s %s' % (insn.mnemonic, get_label(reloc['addr'])) if insn.id == PPC_INS_LIS and relocType == R_PPC_ADDR16_HA: return '%s %s, %s@ha' % (insn.mnemonic, insn.reg_name(insn.operands[0].reg), get_label(reloc['addr'])) if insn.id == PPC_INS_LIS and relocType == R_PPC_ADDR16_HI: return '%s %s, %s@h' % (insn.mnemonic, insn.reg_name(insn.operands[0].reg), get_label(reloc['addr'])) if insn.id in {PPC_INS_ADDI, PPC_INS_ORI} and relocType == R_PPC_ADDR16_LO: return '%s %s, %s, %s@l' % (insn.mnemonic, insn.reg_name(insn.operands[0].reg), insn.reg_name(insn.operands[1].reg), get_label(reloc['addr'])) if insn.id in { PPC_INS_LWZ, PPC_INS_LHZ, PPC_INS_LHA, PPC_INS_LBZ, PPC_INS_LWZU, PPC_INS_LHZU, PPC_INS_LHAU, PPC_INS_LBZU, PPC_INS_LFS, PPC_INS_LFD, PPC_INS_LFSU, PPC_INS_LFDU, PPC_INS_STW, PPC_INS_STH, PPC_INS_STB, PPC_INS_STWU, PPC_INS_STHU, PPC_INS_STBU, PPC_INS_STFS, PPC_INS_STFD, PPC_INS_STFSU, PPC_INS_STFDU} \ and relocType == R_PPC_ADDR16_LO: return '%s %s, %s@l(%s)' % (insn.mnemonic, insn.reg_name(insn.operands[0].reg), get_label(reloc['addr']), insn.reg_name(insn.operands[1].mem.base)) # branch target labels if insn.id in {PPC_INS_B, PPC_INS_BL, PPC_INS_BDZ, PPC_INS_BDNZ, PPC_INS_BC}: if reloc: return '%s %s' % (insn.mnemonic, get_label(reloc['addr'])) #add_label(insn.operands[0].imm) #label = labels[insn.operands[0].imm] #if label: # WTF, capstone? if o == 0xAD8C: return '%s lbl_0000ADB0' % insn.mnemonic return '%s %s' % (insn.mnemonic, get_label(insn.operands[0].imm)) # misc. fixes # Sign-extend immediate values because Capstone is an idiot and thinks all immediates are unsigned if insn.id in {PPC_INS_ADDI, PPC_INS_ADDIC, PPC_INS_SUBFIC, PPC_INS_MULLI} and (insn.operands[2].imm & 0x8000): return "%s %s, %s, %i ;# fixed addi" % (insn.mnemonic, insn.reg_name(insn.operands[0].reg), insn.reg_name(insn.operands[1].value.reg), insn.operands[2].imm - 0x10000) if (insn.id == PPC_INS_LI or insn.id == PPC_INS_CMPWI) and (insn.operands[1].imm & 0x8000): return "%s %s, %i" % (insn.mnemonic, insn.reg_name(insn.operands[0].reg), insn.operands[1].imm - 0x10000) # cntlz -> cntlzw if insn.id == PPC_INS_CNTLZW: return "cntlzw %s" % insn.op_str return '%s %s%s' % (insn.mnemonic, insn.op_str, relocComment) def scan_local_labels(o, size): end = o + size while o < end: reloc = get_relocation_for_offset(o) if reloc: pass else: try: insn = next(cs.disasm(filecontent[o:o+4], o)) if insn.id in {PPC_INS_B, PPC_INS_BL, PPC_INS_BC, PPC_INS_BDZ, PPC_INS_BDNZ}: for op in insn.operands: if op.type == PPC_OP_IMM: l = add_label(op.imm) #print('adding local label %s(0x%X) from offset 0x%X' % (l, op.imm, o)) except StopIteration: pass o += 4 #for insn in cs.disasm(filecontent[o:o+size], o): # # branch labels # if insn.id in {PPC_INS_B, PPC_INS_BL, PPC_INS_BC, PPC_INS_BDZ, PPC_INS_BDNZ}: # for op in insn.operands: # if op.type == PPC_OP_IMM: # l = add_label(op.imm) # print('adding local label %s(0x%X) from offset 0x%X' % (l, op.imm, o)) def dump_code(o, size): scan_local_labels(o, size) end = o + size code = filecontent[o : end] while o < end: if o in labels: print_label(labels[o]) asm = disassemble_insn(o, get_relocation_for_offset(o)) print('/* %08X %08X */ %s' % (o, read_u32(o), asm)) #print('/* %08X */ %s' % (read_u32(o), asm)) o += 4 if o < end: print('incomplete') # returns True if value is 4-byte aligned def is_aligned(num): return num % 4 == 0 def align(num): return (num + 3) & ~3 def is_ascii(code): if code >= 0x20 and code <= 0x7E: # normal characters return True if code in [0x09, 0x0A]: # tab, newline return True return False # returns True if all elements are zero def is_all_zero(arr): for val in arr: if val != 0: return False return True # returns string of comma-separated hex bytes def hex_bytes(data): return ', '.join('0x%02X' % n for n in data) # reads a string starting at pos def read_string(data, pos): text = '' while pos < len(data) and is_ascii(data[pos]): text += chr(data[pos]) pos += 1 if pos < len(data) and data[pos] == 0: return text return '' # escapes special characters in the string for use in a C string literal def escape_string(text): return text.replace('\\','\\\\').replace('"','\\"').replace('\n','\\n').replace('\t','\\t') def output_data_range(secNum, o, end): print(' # 0x%X' % o) if not is_aligned(o): print(' .byte ' + hex_bytes(filecontent[o:align(o)])) o = align(o) while o < (end & ~3): # Try to see if this is a string. string = read_string(filecontent, o) if len(string) >= 4 and secNum == 5: # strings are only in .data strEnd = o + len(string)+1 if is_aligned(strEnd) or is_all_zero(filecontent[strEnd : align(strEnd)-strEnd]): print(' .asciz \"%s"' % escape_string(string)) if not is_aligned(strEnd): print(' .balign 4') o = align(strEnd) continue # Not a string reloc = get_relocation_for_offset(o) if reloc: type = reloc['type'] if type == R_PPC_ADDR32: value = labels[reloc['addr']] else: print('dunno what to do about %s here' % relocationTypeNames[type]) else: value = '0x%08X' % read_u32(o) print(' .4byte %s' % value) o += 4 if o < end: print(' .byte ' + hex_bytes(filecontent[o:end])) return def dump_data(secNum, o, size): end = o + size lastPos = o while o < end: if o in labels: if o - lastPos > 0: output_data_range(secNum, lastPos, o) print_label(labels[o]) lastPos = o o += 1 if o - lastPos > 0: output_data_range(secNum, lastPos, o) return def output_bss_range(start, end): print(' .skip 0x%X' % (end - start)) def dump_bss(o, size): end = o + size lastPos = o while o < end: if o in labels: if o - lastPos > 0: output_bss_range(lastPos, o) print_label(labels[o]) lastPos = o o += 1 if o - lastPos > 0: output_bss_range(lastPos, o) return for i in range(0, numSections): section = sectionInfo[i] if section['offset'] == 0 and section['length'] == 0: continue print('# %i' % i) print('.section %s' % section['name']) if section['is_bss']: # bss section dump_bss(section['offset'], section['length']) elif section['flags'] & 1: # code section dump_code(section['offset'], section['length']) elif section['offset'] != 0: # data section dump_data(i, section['offset'], section['length']) print('')