Very experimental analyzer, DOL & REL support, function signatures

This commit is contained in:
2023-01-18 12:52:38 -08:00
parent d76c554d31
commit d864b0e395
164 changed files with 97543 additions and 705 deletions

View File

@@ -5,7 +5,7 @@ use std::{
path::PathBuf,
};
use anyhow::{Context, Error, Result};
use anyhow::{anyhow, bail, Context, Result};
use argh::FromArgs;
use object::{Object, ObjectSymbol, SymbolScope};
@@ -45,9 +45,9 @@ fn create(args: CreateArgs) -> Result<()> {
// Process response files (starting with '@')
let mut files = Vec::with_capacity(args.files.len());
for path in args.files {
let path_str = path.to_str().ok_or_else(|| {
Error::msg(format!("'{}' is not valid UTF-8", path.to_string_lossy()))
})?;
let path_str = path
.to_str()
.ok_or_else(|| anyhow!("'{}' is not valid UTF-8", path.display()))?;
match path_str.strip_prefix('@') {
Some(rsp_file) => {
let reader = BufReader::new(
@@ -71,25 +71,23 @@ fn create(args: CreateArgs) -> Result<()> {
let mut identifiers = Vec::with_capacity(files.len());
let mut symbol_table = BTreeMap::new();
for path in &files {
let file_name = path.file_name().ok_or_else(|| {
Error::msg(format!("'{}' is not a file path", path.to_string_lossy()))
})?;
let file_name = file_name.to_str().ok_or_else(|| {
Error::msg(format!("'{}' is not valid UTF-8", file_name.to_string_lossy()))
})?;
let file_name = path
.file_name()
.ok_or_else(|| anyhow!("'{}' is not a file path", path.display()))?;
let file_name = file_name
.to_str()
.ok_or_else(|| anyhow!("'{}' is not valid UTF-8", file_name.to_string_lossy()))?;
let identifier = file_name.as_bytes().to_vec();
identifiers.push(identifier.clone());
let entries = match symbol_table.entry(identifier) {
Entry::Vacant(e) => e.insert(Vec::new()),
Entry::Occupied(_) => {
return Err(Error::msg(format!("Duplicate file name '{file_name}'")))
}
Entry::Occupied(_) => bail!("Duplicate file name '{file_name}'"),
};
let object_file = File::open(path)
.with_context(|| format!("Failed to open object file '{}'", path.to_string_lossy()))?;
.with_context(|| format!("Failed to open object file '{}'", path.display()))?;
let map = unsafe { memmap2::MmapOptions::new().map(&object_file) }
.with_context(|| format!("Failed to mmap object file: '{}'", path.to_string_lossy()))?;
.with_context(|| format!("Failed to mmap object file: '{}'", path.display()))?;
let obj = object::File::parse(map.as_ref())?;
for symbol in obj.symbols() {
if symbol.scope() == SymbolScope::Dynamic {

View File

@@ -1,4 +1,4 @@
use anyhow::{Error, Result};
use anyhow::{anyhow, Result};
use argh::FromArgs;
use cwdemangle::{demangle, DemangleOptions};
@@ -21,6 +21,6 @@ pub fn run(args: Args) -> Result<()> {
println!("{symbol}");
Ok(())
}
None => Err(Error::msg("Failed to demangle symbol")),
None => Err(anyhow!("Failed to demangle symbol")),
}
}

582
src/cmd/dol.rs Normal file
View File

@@ -0,0 +1,582 @@
use std::{
collections::BTreeMap,
fs::File,
io::{BufRead, BufReader, BufWriter},
path::{Path, PathBuf},
};
use anyhow::{anyhow, bail, Context, Result};
use argh::FromArgs;
use crate::util::{
cfa::{
locate_sda_bases, AnalysisPass, AnalyzerState, FindSaveRestSleds,
FindTRKInterruptVectorTable,
},
config::{parse_symbol_line, write_symbols},
dol::process_dol,
elf::process_elf,
executor::read_u32,
map::process_map,
obj::{
ObjInfo, ObjRelocKind, ObjSectionKind, ObjSymbol, ObjSymbolFlagSet, ObjSymbolFlags,
ObjSymbolKind,
},
sigs::check_signatures,
tracker::Tracker,
};
#[derive(FromArgs, PartialEq, Debug)]
/// Commands for processing DOL files.
#[argh(subcommand, name = "dol")]
pub struct Args {
#[argh(subcommand)]
command: SubCommand,
}
#[derive(FromArgs, PartialEq, Debug)]
#[argh(subcommand)]
enum SubCommand {
Disasm(DisasmArgs),
Info(InfoArgs),
}
#[derive(FromArgs, PartialEq, Eq, Debug)]
/// Disassembles a DOL file.
#[argh(subcommand, name = "disasm")]
pub struct DisasmArgs {
#[argh(option, short = 'm')]
/// path to input map
map_file: Option<PathBuf>,
#[argh(option, short = 's')]
/// path to symbols file
symbols_file: Option<PathBuf>,
#[argh(option, short = 'e')]
/// ELF file to validate against (debugging only)
elf_file: Option<PathBuf>,
#[argh(positional)]
/// DOL file
dol_file: PathBuf,
}
#[derive(FromArgs, PartialEq, Eq, Debug)]
/// Views DOL file information.
#[argh(subcommand, name = "info")]
pub struct InfoArgs {
#[argh(positional)]
/// DOL file
dol_file: PathBuf,
}
pub fn run(args: Args) -> Result<()> {
match args.command {
SubCommand::Disasm(c_args) => disasm(c_args),
SubCommand::Info(c_args) => info(c_args),
}
}
const SIGNATURES: &[(&str, &str)] = &[
("__init_registers", include_str!("../../assets/__init_registers.yml")),
("__init_hardware", include_str!("../../assets/__init_hardware.yml")),
("__init_data", include_str!("../../assets/__init_data.yml")),
("__set_debug_bba", include_str!("../../assets/__set_debug_bba.yml")),
("__OSPSInit", include_str!("../../assets/__OSPSInit.yml")),
("__OSFPRInit", include_str!("../../assets/__OSFPRInit.yml")),
("__OSCacheInit", include_str!("../../assets/__OSCacheInit.yml")),
("DMAErrorHandler", include_str!("../../assets/DMAErrorHandler.yml")),
("DBInit", include_str!("../../assets/DBInit.yml")),
("OSInit", include_str!("../../assets/OSInit.yml")),
("__OSThreadInit", include_str!("../../assets/__OSThreadInit.yml")),
("__OSInitIPCBuffer", include_str!("../../assets/__OSInitIPCBuffer.yml")),
("EXIInit", include_str!("../../assets/EXIInit.yml")),
("EXIGetID", include_str!("../../assets/EXIGetID.yml")),
("exit", include_str!("../../assets/exit.yml")),
("_ExitProcess", include_str!("../../assets/_ExitProcess.yml")),
("__fini_cpp", include_str!("../../assets/__fini_cpp.yml")),
("__destroy_global_chain", include_str!("../../assets/__destroy_global_chain.yml")),
("InitMetroTRK", include_str!("../../assets/InitMetroTRK.yml")),
("InitMetroTRKCommTable", include_str!("../../assets/InitMetroTRKCommTable.yml")),
("OSExceptionInit", include_str!("../../assets/OSExceptionInit.yml")),
("OSDefaultExceptionHandler", include_str!("../../assets/OSDefaultExceptionHandler.yml")),
("__OSUnhandledException", include_str!("../../assets/__OSUnhandledException.yml")),
("OSDisableScheduler", include_str!("../../assets/OSDisableScheduler.yml")),
("__OSReschedule", include_str!("../../assets/__OSReschedule.yml")),
("__OSInitSystemCall", include_str!("../../assets/__OSInitSystemCall.yml")),
("OSInitAlarm", include_str!("../../assets/OSInitAlarm.yml")),
("__OSInitAlarm", include_str!("../../assets/__OSInitAlarm.yml")),
("__OSEVStart", include_str!("../../assets/OSExceptionVector.yml")),
("__OSDBINTSTART", include_str!("../../assets/__OSDBIntegrator.yml")),
("__OSDBJUMPSTART", include_str!("../../assets/__OSDBJump.yml")),
("SIInit", include_str!("../../assets/SIInit.yml")),
("SIGetType", include_str!("../../assets/SIGetType.yml")),
("SISetSamplingRate", include_str!("../../assets/SISetSamplingRate.yml")),
("SISetXY", include_str!("../../assets/SISetXY.yml")),
("VIGetTvFormat", include_str!("../../assets/VIGetTvFormat.yml")),
("DVDInit", include_str!("../../assets/DVDInit.yml")),
("DVDSetAutoFatalMessaging", include_str!("../../assets/DVDSetAutoFatalMessaging.yml")),
("OSSetArenaLo", include_str!("../../assets/OSSetArenaLo.yml")),
("OSSetArenaHi", include_str!("../../assets/OSSetArenaHi.yml")),
("OSSetMEM1ArenaLo", include_str!("../../assets/OSSetMEM1ArenaLo.yml")),
("OSSetMEM1ArenaHi", include_str!("../../assets/OSSetMEM1ArenaHi.yml")),
("OSSetMEM2ArenaLo", include_str!("../../assets/OSSetMEM2ArenaLo.yml")),
("OSSetMEM2ArenaHi", include_str!("../../assets/OSSetMEM2ArenaHi.yml")),
("__OSInitAudioSystem", include_str!("../../assets/__OSInitAudioSystem.yml")),
("__OSInitMemoryProtection", include_str!("../../assets/__OSInitMemoryProtection.yml")),
// ("BATConfig", include_str!("../../assets/BATConfig.yml")), TODO
("ReportOSInfo", include_str!("../../assets/ReportOSInfo.yml")),
("__check_pad3", include_str!("../../assets/__check_pad3.yml")),
("OSResetSystem", include_str!("../../assets/OSResetSystem.yml")),
("OSReturnToMenu", include_str!("../../assets/OSReturnToMenu.yml")),
("__OSReturnToMenu", include_str!("../../assets/__OSReturnToMenu.yml")),
("__OSShutdownDevices", include_str!("../../assets/__OSShutdownDevices.yml")),
("__OSInitSram", include_str!("../../assets/__OSInitSram.yml")),
("__OSSyncSram", include_str!("../../assets/__OSSyncSram.yml")),
("__OSGetExceptionHandler", include_str!("../../assets/__OSGetExceptionHandler.yml")),
("OSRegisterResetFunction", include_str!("../../assets/OSRegisterResetFunction.yml")),
("OSRegisterShutdownFunction", include_str!("../../assets/OSRegisterShutdownFunction.yml")),
("DecrementerExceptionHandler", include_str!("../../assets/DecrementerExceptionHandler.yml")),
("DecrementerExceptionCallback", include_str!("../../assets/DecrementerExceptionCallback.yml")),
("__OSInterruptInit", include_str!("../../assets/__OSInterruptInit.yml")),
("__OSContextInit", include_str!("../../assets/__OSContextInit.yml")),
("OSSwitchFPUContext", include_str!("../../assets/OSSwitchFPUContext.yml")),
("OSReport", include_str!("../../assets/OSReport.yml")),
("TRK_main", include_str!("../../assets/TRK_main.yml")),
("TRKNubWelcome", include_str!("../../assets/TRKNubWelcome.yml")),
("TRKInitializeNub", include_str!("../../assets/TRKInitializeNub.yml")),
("TRKInitializeIntDrivenUART", include_str!("../../assets/TRKInitializeIntDrivenUART.yml")),
("TRKEXICallBack", include_str!("../../assets/TRKEXICallBack.yml")),
("TRKLoadContext", include_str!("../../assets/TRKLoadContext.yml")),
("TRKInterruptHandler", include_str!("../../assets/TRKInterruptHandler.yml")),
("TRKExceptionHandler", include_str!("../../assets/TRKExceptionHandler.yml")),
("TRKSaveExtended1Block", include_str!("../../assets/TRKSaveExtended1Block.yml")),
("TRKNubMainLoop", include_str!("../../assets/TRKNubMainLoop.yml")),
("TRKTargetContinue", include_str!("../../assets/TRKTargetContinue.yml")),
("TRKSwapAndGo", include_str!("../../assets/TRKSwapAndGo.yml")),
("TRKRestoreExtended1Block", include_str!("../../assets/TRKRestoreExtended1Block.yml")),
(
"TRKInterruptHandlerEnableInterrupts",
include_str!("../../assets/TRKInterruptHandlerEnableInterrupts.yml"),
),
("memset", include_str!("../../assets/memset.yml")),
(
"__msl_runtime_constraint_violation_s",
include_str!("../../assets/__msl_runtime_constraint_violation_s.yml"),
),
("ClearArena", include_str!("../../assets/ClearArena.yml")),
("IPCCltInit", include_str!("../../assets/IPCCltInit.yml")),
("__OSInitSTM", include_str!("../../assets/__OSInitSTM.yml")),
("IOS_Open", include_str!("../../assets/IOS_Open.yml")),
("__ios_Ipc2", include_str!("../../assets/__ios_Ipc2.yml")),
("IPCiProfQueueReq", include_str!("../../assets/IPCiProfQueueReq.yml")),
("SCInit", include_str!("../../assets/SCInit.yml")),
("SCReloadConfFileAsync", include_str!("../../assets/SCReloadConfFileAsync.yml")),
("NANDPrivateOpenAsync", include_str!("../../assets/NANDPrivateOpenAsync.yml")),
("nandIsInitialized", include_str!("../../assets/nandIsInitialized.yml")),
("nandOpen", include_str!("../../assets/nandOpen.yml")),
("nandGenerateAbsPath", include_str!("../../assets/nandGenerateAbsPath.yml")),
("nandGetHeadToken", include_str!("../../assets/nandGetHeadToken.yml")),
("ISFS_OpenAsync", include_str!("../../assets/ISFS_OpenAsync.yml")),
("nandConvertErrorCode", include_str!("../../assets/nandConvertErrorCode.yml")),
("NANDLoggingAddMessageAsync", include_str!("../../assets/NANDLoggingAddMessageAsync.yml")),
("__NANDPrintErrorMessage", include_str!("../../assets/__NANDPrintErrorMessage.yml")),
("__OSInitNet", include_str!("../../assets/__OSInitNet.yml")),
("__DVDCheckDevice", include_str!("../../assets/__DVDCheckDevice.yml")),
("__OSInitPlayTime", include_str!("../../assets/__OSInitPlayTime.yml")),
("__OSStartPlayRecord", include_str!("../../assets/__OSStartPlayRecord.yml")),
("NANDInit", include_str!("../../assets/NANDInit.yml")),
("ISFS_OpenLib", include_str!("../../assets/ISFS_OpenLib.yml")),
("ESP_GetTitleId", include_str!("../../assets/ESP_GetTitleId.yml")),
("NANDSetAutoErrorMessaging", include_str!("../../assets/NANDSetAutoErrorMessaging.yml")),
("__DVDFSInit", include_str!("../../assets/__DVDFSInit.yml")),
("__DVDClearWaitingQueue", include_str!("../../assets/__DVDClearWaitingQueue.yml")),
("__DVDInitWA", include_str!("../../assets/__DVDInitWA.yml")),
("__DVDLowSetWAType", include_str!("../../assets/__DVDLowSetWAType.yml")),
("__fstLoad", include_str!("../../assets/__fstLoad.yml")),
("DVDReset", include_str!("../../assets/DVDReset.yml")),
("DVDLowReset", include_str!("../../assets/DVDLowReset.yml")),
("DVDReadDiskID", include_str!("../../assets/DVDReadDiskID.yml")),
("stateReady", include_str!("../../assets/stateReady.yml")),
("DVDLowWaitCoverClose", include_str!("../../assets/DVDLowWaitCoverClose.yml")),
("__DVDStoreErrorCode", include_str!("../../assets/__DVDStoreErrorCode.yml")),
("DVDLowStopMotor", include_str!("../../assets/DVDLowStopMotor.yml")),
("DVDGetDriveStatus", include_str!("../../assets/DVDGetDriveStatus.yml")),
("printf", include_str!("../../assets/printf.yml")),
("sprintf", include_str!("../../assets/sprintf.yml")),
("vprintf", include_str!("../../assets/vprintf.yml")),
("vsprintf", include_str!("../../assets/vsprintf.yml")),
("vsnprintf", include_str!("../../assets/vsnprintf.yml")),
("__pformatter", include_str!("../../assets/__pformatter.yml")),
("longlong2str", include_str!("../../assets/longlong2str.yml")),
("__mod2u", include_str!("../../assets/__mod2u.yml")),
("__FileWrite", include_str!("../../assets/__FileWrite.yml")),
("fwrite", include_str!("../../assets/fwrite.yml")),
("__fwrite", include_str!("../../assets/__fwrite.yml")),
("__stdio_atexit", include_str!("../../assets/__stdio_atexit.yml")),
("__StringWrite", include_str!("../../assets/__StringWrite.yml")),
];
pub fn apply_signatures(obj: &mut ObjInfo) -> Result<()> {
let entry = obj.entry as u32;
check_signatures(obj, entry, include_str!("../../assets/__start.yml"))?;
for &(name, sig_str) in SIGNATURES {
if let Some(symbol) = obj.symbols.iter().find(|symbol| symbol.name == name) {
let addr = symbol.address as u32;
check_signatures(obj, addr, sig_str)?;
}
}
if let Some(symbol) = obj.symbols.iter().find(|symbol| symbol.name == "__init_user") {
// __init_user can be overridden, but we can still look for __init_cpp from it
let mut analyzer = AnalyzerState::default();
analyzer.process_function_at(&obj, symbol.address as u32)?;
for addr in analyzer.function_entries {
if check_signatures(obj, addr, include_str!("../../assets/__init_cpp.yml"))? {
break;
}
}
}
if let Some(symbol) = obj.symbols.iter().find(|symbol| symbol.name == "_ctors") {
// First entry of ctors is __init_cpp_exceptions
let section = obj.section_at(symbol.address as u32)?;
let target = read_u32(&section.data, symbol.address as u32, section.address as u32)
.ok_or_else(|| anyhow!("Failed to read _ctors data"))?;
if target != 0 {
check_signatures(obj, target, include_str!("../../assets/__init_cpp_exceptions.yml"))?;
}
}
if let Some(symbol) = obj.symbols.iter().find(|symbol| symbol.name == "_dtors") {
// Second entry of dtors is __fini_cpp_exceptions
let section = obj.section_at(symbol.address as u32)?;
let target = read_u32(&section.data, symbol.address as u32 + 4, section.address as u32)
.ok_or_else(|| anyhow!("Failed to read _dtors data"))?;
if target != 0 {
check_signatures(obj, target, include_str!("../../assets/__fini_cpp_exceptions.yml"))?;
}
}
Ok(())
}
fn info(args: InfoArgs) -> Result<()> {
let mut obj = process_dol(&args.dol_file)?;
apply_signatures(&mut obj)?;
// Apply known functions from extab
let mut state = AnalyzerState::default();
for (&addr, &size) in &obj.known_functions {
state.function_entries.insert(addr);
state.function_bounds.insert(addr, addr + size);
}
for symbol in &obj.symbols {
if symbol.kind != ObjSymbolKind::Function {
continue;
}
state.function_entries.insert(symbol.address as u32);
if !symbol.size_known {
continue;
}
state.function_bounds.insert(symbol.address as u32, (symbol.address + symbol.size) as u32);
}
// Also check the start of each code section
for section in &obj.sections {
if section.kind == ObjSectionKind::Code {
state.function_entries.insert(section.address as u32);
}
}
state.detect_functions(&obj)?;
log::info!("Discovered {} functions", state.function_slices.len());
FindTRKInterruptVectorTable::execute(&mut state, &obj)?;
FindSaveRestSleds::execute(&mut state, &obj)?;
state.apply(&mut obj)?;
println!("{}:", obj.name);
println!("Entry point: {:#010X}", obj.entry);
println!("\nSections:");
println!("\t{: >10} | {: <10} | {: <10} | {: <10}", "Name", "Address", "Size", "File Off");
for section in &obj.sections {
println!(
"\t{: >10} | {:#010X} | {: <#10X} | {: <#10X}",
section.name, section.address, section.size, section.file_offset
);
}
println!("\nDiscovered symbols:");
println!("\t{: >23} | {: <10} | {: <10}", "Name", "Address", "Size");
let mut symbols = obj.symbols.clone();
symbols.sort_by_key(|sym| sym.address);
for symbol in symbols {
if symbol.name.starts_with('@') || symbol.name.starts_with("fn_") {
continue;
}
if symbol.size_known {
println!("\t{: >23} | {:#010X} | {: <#10X}", symbol.name, symbol.address, symbol.size);
} else {
let size_str = if symbol.section.is_none() { "ABS" } else { "?" };
println!("\t{: >23} | {:#010X} | {: <10}", symbol.name, symbol.address, size_str);
}
}
println!("\n{} discovered functions from exception table", obj.known_functions.len());
Ok(())
}
fn disasm(args: DisasmArgs) -> Result<()> {
let mut obj = process_dol(&args.dol_file)?;
log::info!("Performing initial control flow analysis");
// if detect_sda_bases(&mut obj).context("Failed to locate SDA bases")? {
// let (sda2_base, sda_base) = obj.sda_bases.unwrap();
// log::info!("Found _SDA2_BASE_ @ {:#010X}, _SDA_BASE_ @ {:#010X}", sda2_base, sda_base);
// } else {
// bail!("Unable to locate SDA bases");
// }
if let Some(map) = &args.map_file {
let mut reader = BufReader::new(
File::open(map)
.with_context(|| format!("Failed to open map file '{}'", map.display()))?,
);
let _entries = process_map(&mut reader)?;
}
let mut state = AnalyzerState::default();
if let Some(symbols_path) = &args.symbols_file {
let mut reader = BufReader::new(File::open(symbols_path).with_context(|| {
format!("Failed to open symbols file '{}'", symbols_path.display())
})?);
for result in reader.lines() {
let line = match result {
Ok(line) => line,
Err(e) => bail!("Failed to process symbols file: {e:?}"),
};
if let Some(symbol) = parse_symbol_line(&line, &obj)? {
// if symbol.kind == ObjSymbolKind::Function {
// state.function_entries.insert(symbol.address as u32);
// if symbol.size_known {
// state
// .function_bounds
// .insert(symbol.address as u32, (symbol.address + symbol.size) as u32);
// }
// }
if let Some(existing_symbol) = obj
.symbols
.iter_mut()
.find(|e| e.address == symbol.address && e.kind == symbol.kind)
{
*existing_symbol = symbol;
} else {
obj.symbols.push(symbol);
}
}
}
}
// TODO move before symbols?
apply_signatures(&mut obj)?;
// Apply known functions from extab
for (&addr, &size) in &obj.known_functions {
state.function_entries.insert(addr);
state.function_bounds.insert(addr, addr + size);
}
for symbol in &obj.symbols {
if symbol.kind != ObjSymbolKind::Function {
continue;
}
state.function_entries.insert(symbol.address as u32);
if !symbol.size_known {
continue;
}
state.function_bounds.insert(symbol.address as u32, (symbol.address + symbol.size) as u32);
}
// Also check the start of each code section
for section in &obj.sections {
if section.kind == ObjSectionKind::Code {
state.function_entries.insert(section.address as u32);
}
}
state.detect_functions(&obj)?;
log::info!("Discovered {} functions", state.function_slices.len());
FindTRKInterruptVectorTable::execute(&mut state, &obj)?;
FindSaveRestSleds::execute(&mut state, &obj)?;
state.apply(&mut obj)?;
log::info!("Performing relocation analysis");
let mut tracker = Tracker::new(&obj);
tracker.process(&obj)?;
log::info!("Applying relocations");
tracker.apply(&mut obj, false)?;
//
// log::info!("Writing disassembly");
// let mut w = BufWriter::new(File::create("out.s")?);
// write_asm(&mut w, &obj)?;
if let Some(symbols_path) = &args.symbols_file {
let mut symbols_writer = BufWriter::new(
File::create(&symbols_path)
.with_context(|| format!("Failed to create '{}'", symbols_path.display()))?,
);
write_symbols(&mut symbols_writer, &obj)?;
}
// (debugging) validate against ELF
if let Some(file) = args.elf_file {
validate(&obj, &file, &state)?;
}
Ok(())
}
fn validate<P: AsRef<Path>>(obj: &ObjInfo, elf_file: P, state: &AnalyzerState) -> Result<()> {
let real_obj = process_elf(elf_file)?;
for real_section in &real_obj.sections {
let obj_section = match obj.sections.get(real_section.index) {
Some(v) => v,
None => {
log::error!(
"Section {} {} doesn't exist in DOL",
real_section.index,
real_section.name
);
continue;
}
};
if obj_section.kind != real_section.kind || obj_section.name != real_section.name {
log::warn!(
"Section mismatch: {} {:?} ({}) should be {} {:?}",
obj_section.name,
obj_section.kind,
obj_section.index,
real_section.name,
real_section.kind
);
}
}
let mut real_functions = BTreeMap::<u32, String>::new();
for section in &real_obj.sections {
if section.kind != ObjSectionKind::Code {
continue;
}
for (_symbol_idx, symbol) in real_obj.symbols_for_section(section.index) {
// if symbol.name.starts_with("switch_") {
// continue;
// }
// if symbol.kind == ObjSymbolKind::Function {
real_functions.insert(symbol.address as u32, symbol.name.clone());
match state.function_bounds.get(&(symbol.address as u32)) {
Some(&end) => {
if symbol.size > 0 && end != (symbol.address + symbol.size) as u32 {
log::warn!(
"Function {:#010X} ({}) ends at {:#010X}, expected {:#010X}",
symbol.address,
symbol.name,
end,
symbol.address + symbol.size
);
}
}
None => {
log::warn!(
"Function {:#010X} ({}) not discovered!",
symbol.address,
symbol.name
);
}
}
// }
}
}
for (&start, &end) in &state.function_bounds {
if end == 0 {
continue;
}
if !real_functions.contains_key(&start) {
let (real_addr, real_name) = real_functions.range(..start).last().unwrap();
log::warn!(
"Function {:#010X} not real (actually a part of {} @ {:#010X})",
start,
real_name,
real_addr
);
}
}
for real_section in &real_obj.sections {
let obj_section = match obj.sections.get(real_section.index) {
Some(v) => v,
None => continue,
};
let real_map = real_section.build_relocation_map()?;
let obj_map = obj_section.build_relocation_map()?;
for (&real_addr, real_reloc) in &real_map {
let real_symbol = &real_obj.symbols[real_reloc.target_symbol];
let obj_reloc = match obj_map.get(&real_addr) {
Some(v) => v,
None => {
// Ignore GCC local jump branches
if real_symbol.kind == ObjSymbolKind::Section
&& real_section.kind == ObjSectionKind::Code
&& real_reloc.addend != 0
&& matches!(
real_reloc.kind,
ObjRelocKind::PpcRel14 | ObjRelocKind::PpcRel24
)
{
continue;
}
log::warn!(
"Relocation not found @ {:#010X} {:?} to {:#010X}+{:X} ({})",
real_addr,
real_reloc.kind,
real_symbol.address,
real_reloc.addend,
real_symbol.demangled_name.as_ref().unwrap_or(&real_symbol.name)
);
continue;
}
};
let obj_symbol = &obj.symbols[obj_reloc.target_symbol];
if real_reloc.kind != obj_reloc.kind {
log::warn!(
"Relocation type mismatch @ {:#010X}: {:?} != {:?}",
real_addr,
obj_reloc.kind,
real_reloc.kind
);
continue;
}
if real_symbol.address as i64 + real_reloc.addend
!= obj_symbol.address as i64 + obj_reloc.addend
{
log::warn!(
"Relocation target mismatch @ {:#010X} {:?}: {:#010X}+{:X} != {:#010X}+{:X} ({})",
real_addr,
real_reloc.kind,
obj_symbol.address,
obj_reloc.addend,
real_symbol.address,
real_reloc.addend,
real_symbol.demangled_name.as_ref().unwrap_or(&real_symbol.name)
);
continue;
}
}
for (&obj_addr, obj_reloc) in &obj_map {
let obj_symbol = &obj.symbols[obj_reloc.target_symbol];
let real_reloc = match real_map.get(&obj_addr) {
Some(v) => v,
None => {
log::warn!(
"Relocation not real @ {:#010X} {:?} to {:#010X}+{:X} ({})",
obj_addr,
obj_reloc.kind,
obj_symbol.address,
obj_reloc.addend,
obj_symbol.demangled_name.as_ref().unwrap_or(&obj_symbol.name)
);
continue;
}
};
}
}
Ok(())
}

View File

@@ -1,24 +1,30 @@
use std::{
collections::{btree_map, hash_map, BTreeMap, HashMap},
collections::{btree_map, btree_map::Entry, hash_map, BTreeMap, HashMap, HashSet},
fs,
fs::{DirBuilder, File},
io::{BufWriter, Write},
path::PathBuf,
io::{BufRead, BufReader, BufWriter, Write},
path::{Path, PathBuf},
};
use anyhow::{Context, Error, Result};
use anyhow::{anyhow, bail, ensure, Context, Result};
use argh::FromArgs;
use object::{
write::{SectionId, SymbolId},
write::{Mangling, SectionId, SymbolId},
Object, ObjectSection, ObjectSymbol, RelocationKind, RelocationTarget, SectionFlags,
SectionIndex, SectionKind, SymbolFlags, SymbolKind, SymbolScope, SymbolSection,
};
use ppc750cl::Ins;
use serde::{Deserialize, Serialize};
use sha1::{Digest, Sha1};
use crate::util::{
asm::write_asm,
config::write_symbols,
elf::{process_elf, write_elf},
obj::ObjKind,
obj::{ObjKind, ObjReloc, ObjRelocKind, ObjSymbolFlagSet, ObjSymbolKind},
sigs::{check_signature, compare_signature, generate_signature, FunctionSignature},
split::split_obj,
tracker::Tracker,
};
#[derive(FromArgs, PartialEq, Debug)]
@@ -32,8 +38,10 @@ pub struct Args {
#[derive(FromArgs, PartialEq, Debug)]
#[argh(subcommand)]
enum SubCommand {
Config(ConfigArgs),
Disasm(DisasmArgs),
Fixup(FixupArgs),
Signatures(SignaturesArgs),
Split(SplitArgs),
}
@@ -73,18 +81,64 @@ pub struct SplitArgs {
out_dir: PathBuf,
}
#[derive(FromArgs, PartialEq, Eq, Debug)]
/// Generates configuration files from an executable ELF.
#[argh(subcommand, name = "config")]
pub struct ConfigArgs {
#[argh(positional)]
/// input file
in_file: PathBuf,
#[argh(positional)]
/// output directory
out_dir: PathBuf,
}
#[derive(FromArgs, PartialEq, Eq, Debug)]
/// Builds function signatures from an ELF file.
#[argh(subcommand, name = "sigs")]
pub struct SignaturesArgs {
#[argh(positional)]
/// input file(s)
files: Vec<PathBuf>,
#[argh(option, short = 's')]
/// symbol name
symbol: String,
#[argh(option, short = 'o')]
/// output yml
out_file: PathBuf,
}
pub fn run(args: Args) -> Result<()> {
match args.command {
SubCommand::Config(c_args) => config(c_args),
SubCommand::Disasm(c_args) => disasm(c_args),
SubCommand::Fixup(c_args) => fixup(c_args),
SubCommand::Split(c_args) => split(c_args),
SubCommand::Signatures(c_args) => signatures(c_args),
}
}
fn config(args: ConfigArgs) -> Result<()> {
log::info!("Loading {}", args.in_file.display());
let mut obj = process_elf(&args.in_file)?;
DirBuilder::new().recursive(true).create(&args.out_dir)?;
let symbols_path = args.out_dir.join("symbols.txt");
let mut symbols_writer = BufWriter::new(
File::create(&symbols_path)
.with_context(|| format!("Failed to create '{}'", symbols_path.display()))?,
);
write_symbols(&mut symbols_writer, &obj)?;
Ok(())
}
fn disasm(args: DisasmArgs) -> Result<()> {
log::info!("Loading {}", args.elf_file.display());
let obj = process_elf(&args.elf_file)?;
match obj.kind {
ObjKind::Executable => {
log::info!("Splitting {} objects", obj.link_order.len());
let split_objs = split_obj(&obj)?;
let asm_dir = args.out.join("asm");
@@ -92,17 +146,21 @@ fn disasm(args: DisasmArgs) -> Result<()> {
DirBuilder::new().recursive(true).create(&include_dir)?;
fs::write(&include_dir.join("macros.inc"), include_bytes!("../../assets/macros.inc"))?;
let mut files_out = File::create(args.out.join("link_order.txt"))?;
for (unit, split_obj) in obj.link_order.iter().zip(&split_objs) {
let out_path = asm_dir.join(file_name_from_unit(unit, ".s"));
log::info!("Writing {}", out_path.display());
if let Some(parent) = out_path.parent() {
DirBuilder::new().recursive(true).create(parent)?;
}
let mut w = BufWriter::new(File::create(out_path)?);
write_asm(&mut w, split_obj)?;
w.flush()?;
let name = format!("$(OBJ_DIR)/asm/{}", file_name_from_unit(unit, ".o"));
println!(" {name: <70}\\");
writeln!(files_out, "{}", file_name_from_unit(unit, ".o"))?;
}
files_out.flush()?;
}
ObjKind::Relocatable => {
if let Some(parent) = args.out.parent() {
@@ -117,17 +175,16 @@ fn disasm(args: DisasmArgs) -> Result<()> {
fn split(args: SplitArgs) -> Result<()> {
let obj = process_elf(&args.in_file)?;
ensure!(obj.kind == ObjKind::Executable, "Can only split executable objects");
let mut file_map = HashMap::<String, object::write::Object>::new();
let mut file_map = HashMap::<String, Vec<u8>>::new();
let split_objs = split_obj(&obj)?;
for (unit, split_obj) in obj.link_order.iter().zip(&split_objs) {
let out_obj = write_elf(split_obj)?;
match file_map.entry(unit.clone()) {
hash_map::Entry::Occupied(_) => {
return Err(Error::msg(format!("Duplicate file {unit}")));
}
hash_map::Entry::Vacant(e) => e.insert(out_obj),
hash_map::Entry::Occupied(_) => bail!("Duplicate file {unit}"),
};
}
@@ -135,14 +192,15 @@ fn split(args: SplitArgs) -> Result<()> {
for unit in &obj.link_order {
let object = file_map
.get(unit)
.ok_or_else(|| Error::msg(format!("Failed to find object file for unit '{unit}'")))?;
.ok_or_else(|| anyhow!("Failed to find object file for unit '{unit}'"))?;
let out_path = args.out_dir.join(file_name_from_unit(unit, ".o"));
writeln!(rsp_file, "{}", out_path.to_string_lossy())?;
writeln!(rsp_file, "{}", out_path.display())?;
if let Some(parent) = out_path.parent() {
DirBuilder::new().recursive(true).create(parent)?;
}
let mut file = BufWriter::new(File::create(out_path)?);
object.write_stream(&mut file).map_err(|e| Error::msg(format!("{e:?}")))?;
let mut file = File::create(&out_path)
.with_context(|| format!("Failed to create '{}'", out_path.display()))?;
file.write_all(object)?;
file.flush()?;
}
rsp_file.flush()?;
@@ -152,6 +210,7 @@ fn split(args: SplitArgs) -> Result<()> {
fn file_name_from_unit(str: &str, suffix: &str) -> String {
let str = str.strip_suffix(ASM_SUFFIX).unwrap_or(str);
let str = str.strip_prefix("C:").unwrap_or(str);
let str = str.strip_prefix("D:").unwrap_or(str);
let str = str
.strip_suffix(".c")
.or_else(|| str.strip_suffix(".cp"))
@@ -167,12 +226,12 @@ fn file_name_from_unit(str: &str, suffix: &str) -> String {
const ASM_SUFFIX: &str = " (asm)";
fn fixup(args: FixupArgs) -> Result<()> {
let in_buf = fs::read(&args.in_file).with_context(|| {
format!("Failed to open input file: '{}'", args.in_file.to_string_lossy())
})?;
let in_buf = fs::read(&args.in_file)
.with_context(|| format!("Failed to open input file: '{}'", args.in_file.display()))?;
let in_file = object::read::File::parse(&*in_buf).context("Failed to parse input ELF")?;
let mut out_file =
object::write::Object::new(in_file.format(), in_file.architecture(), in_file.endianness());
out_file.set_mangling(Mangling::None);
// Write file symbol first
let mut file_symbol_found = false;
@@ -188,12 +247,13 @@ fn fixup(args: FixupArgs) -> Result<()> {
}
// Create a file symbol if not found
if !file_symbol_found {
let file_name = args.in_file.file_name().ok_or_else(|| {
Error::msg(format!("'{}' is not a file path", args.in_file.to_string_lossy()))
})?;
let file_name = file_name.to_str().ok_or_else(|| {
Error::msg(format!("'{}' is not valid UTF-8", file_name.to_string_lossy()))
})?;
let file_name = args
.in_file
.file_name()
.ok_or_else(|| anyhow!("'{}' is not a file path", args.in_file.display()))?;
let file_name = file_name
.to_str()
.ok_or_else(|| anyhow!("'{}' is not valid UTF-8", file_name.to_string_lossy()))?;
let mut name_bytes = file_name.as_bytes().to_vec();
name_bytes.append(&mut ASM_SUFFIX.as_bytes().to_vec());
out_file.add_symbol(object::write::Symbol {
@@ -270,21 +330,21 @@ fn fixup(args: FixupArgs) -> Result<()> {
match in_symbol.kind() {
SymbolKind::Section => in_symbol
.section_index()
.ok_or_else(|| Error::msg("Section symbol without section"))
.ok_or_else(|| anyhow!("Section symbol without section"))
.and_then(|section_idx| {
section_ids[section_idx.0].ok_or_else(|| {
Error::msg("Relocation against stripped section")
anyhow!("Relocation against stripped section")
})
})
.map(|section_idx| out_file.section_symbol(section_idx)),
_ => Err(Error::msg("Missing symbol for relocation")),
_ => Err(anyhow!("Missing symbol for relocation")),
}
}
},
RelocationTarget::Section(section_idx) => section_ids[section_idx.0]
.ok_or_else(|| Error::msg("Relocation against stripped section"))
.ok_or_else(|| anyhow!("Relocation against stripped section"))
.map(|section_id| out_file.section_symbol(section_id)),
target => Err(Error::msg(format!("Invalid relocation target '{target:?}'"))),
target => Err(anyhow!("Invalid relocation target '{target:?}'")),
}?;
// Attempt to replace section symbols with direct symbol references
@@ -305,7 +365,11 @@ fn fixup(args: FixupArgs) -> Result<()> {
let kind = match reloc.kind() {
// This is a hack to avoid replacement with a section symbol
// See [`object::write::elf::object::elf_fixup_relocation`]
RelocationKind::Absolute => RelocationKind::Elf(object::elf::R_PPC_ADDR32),
RelocationKind::Absolute => RelocationKind::Elf(if addr & 3 == 0 {
object::elf::R_PPC_ADDR32
} else {
object::elf::R_PPC_UADDR32
}),
other => other,
};
@@ -320,10 +384,11 @@ fn fixup(args: FixupArgs) -> Result<()> {
}
}
let mut out = BufWriter::new(File::create(&args.out_file).with_context(|| {
format!("Failed to create output file: '{}'", args.out_file.to_string_lossy())
})?);
out_file.write_stream(&mut out).map_err(|e| Error::msg(format!("{e:?}")))?;
let mut out =
BufWriter::new(File::create(&args.out_file).with_context(|| {
format!("Failed to create output file: '{}'", args.out_file.display())
})?);
out_file.write_stream(&mut out).map_err(|e| anyhow!("{e:?}"))?;
out.flush()?;
Ok(())
}
@@ -340,7 +405,7 @@ fn to_write_symbol_section(
.get(idx.0)
.and_then(|&opt| opt)
.map(object::write::SymbolSection::Section)
.ok_or_else(|| Error::msg("Missing symbol section")),
.ok_or_else(|| anyhow!("Missing symbol section")),
_ => Ok(object::write::SymbolSection::Undefined),
}
}
@@ -349,7 +414,7 @@ fn to_write_symbol_flags(flags: SymbolFlags<SectionIndex>) -> Result<SymbolFlags
match flags {
SymbolFlags::Elf { st_info, st_other } => Ok(SymbolFlags::Elf { st_info, st_other }),
SymbolFlags::None => Ok(SymbolFlags::None),
_ => Err(Error::msg("Unexpected symbol flags")),
_ => Err(anyhow!("Unexpected symbol flags")),
}
}
@@ -372,6 +437,60 @@ fn to_write_symbol(
fn has_section_flags(flags: SectionFlags, flag: u32) -> Result<bool> {
match flags {
SectionFlags::Elf { sh_flags } => Ok(sh_flags & flag as u64 == flag as u64),
_ => Err(Error::msg("Unexpected section flags")),
_ => Err(anyhow!("Unexpected section flags")),
}
}
fn signatures(args: SignaturesArgs) -> Result<()> {
// Process response files (starting with '@')
let mut files = Vec::with_capacity(args.files.len());
for path in args.files {
let path_str =
path.to_str().ok_or_else(|| anyhow!("'{}' is not valid UTF-8", path.display()))?;
match path_str.strip_prefix('@') {
Some(rsp_file) => {
let reader = BufReader::new(
File::open(rsp_file)
.with_context(|| format!("Failed to open file '{rsp_file}'"))?,
);
for result in reader.lines() {
let line = result?;
if !line.is_empty() {
files.push(PathBuf::from(line));
}
}
}
None => {
files.push(path);
}
}
}
let mut signatures: HashMap<Vec<u8>, FunctionSignature> = HashMap::new();
for path in files {
log::info!("Processing {}", path.display());
let (data, signature) = match generate_signature(&path, &args.symbol) {
Ok(Some(signature)) => signature,
Ok(None) => continue,
Err(e) => {
eprintln!("Failed: {:?}", e);
continue;
}
};
log::info!("Comparing hash {}", signature.hash);
if let Some((_, existing)) = signatures.iter_mut().find(|(a, b)| *a == &data) {
compare_signature(existing, &signature)?;
} else {
signatures.insert(data, signature);
}
}
let mut signatures = signatures.into_iter().map(|(a, b)| b).collect::<Vec<FunctionSignature>>();
log::info!("{} unique signatures", signatures.len());
signatures.sort_by_key(|s| s.signature.len());
let out =
BufWriter::new(File::create(&args.out_file).with_context(|| {
format!("Failed to create output file '{}'", args.out_file.display())
})?);
serde_yaml::to_writer(out, &signatures)?;
Ok(())
}

View File

@@ -3,10 +3,10 @@ use std::{
io::{BufWriter, Seek, SeekFrom, Write},
};
use anyhow::{Context, Error, Result};
use anyhow::{anyhow, bail, ensure, Context, Result};
use argh::FromArgs;
use memmap2::MmapOptions;
use object::{Architecture, Object, ObjectKind, ObjectSection, SectionKind};
use object::{Architecture, Endianness, Object, ObjectKind, ObjectSection, SectionKind};
#[derive(FromArgs, PartialEq, Eq, Debug)]
/// Converts an ELF file to a DOL file.
@@ -49,14 +49,12 @@ pub fn run(args: Args) -> Result<()> {
let obj_file = object::read::File::parse(&*map)?;
match obj_file.architecture() {
Architecture::PowerPc => {}
arch => return Err(Error::msg(format!("Unexpected architecture: {arch:?}"))),
arch => bail!("Unexpected architecture: {arch:?}"),
};
if obj_file.is_little_endian() {
return Err(Error::msg("Expected big endian"));
}
ensure!(obj_file.endianness() == Endianness::Big, "Expected big endian");
match obj_file.kind() {
ObjectKind::Executable => {}
kind => return Err(Error::msg(format!("Unexpected ELF type: {kind:?}"))),
kind => bail!("Unexpected ELF type: {kind:?}"),
}
let mut header = DolHeader { entry_point: obj_file.entry() as u32, ..Default::default() };
@@ -75,10 +73,10 @@ pub fn run(args: Args) -> Result<()> {
let address = section.address() as u32;
let size = align32(section.size() as u32);
*header.text_sections.get_mut(header.text_section_count).ok_or_else(|| {
Error::msg(format!(
anyhow!(
"Too many text sections (while processing '{}')",
section.name().unwrap_or("[error]")
))
)
})? = DolSection { offset, address, size };
header.text_section_count += 1;
write_aligned(&mut out, section.data()?, size)?;
@@ -93,10 +91,10 @@ pub fn run(args: Args) -> Result<()> {
let address = section.address() as u32;
let size = align32(section.size() as u32);
*header.data_sections.get_mut(header.data_section_count).ok_or_else(|| {
Error::msg(format!(
anyhow!(
"Too many data sections (while processing '{}')",
section.name().unwrap_or("[error]")
))
)
})? = DolSection { offset, address, size };
header.data_section_count += 1;
write_aligned(&mut out, section.data()?, size)?;

View File

@@ -1,6 +1,6 @@
use std::{fs::File, io::BufReader, ops::Range};
use std::{fs::File, io::BufReader};
use anyhow::{Context, Error, Result};
use anyhow::{bail, ensure, Context, Result};
use argh::FromArgs;
use crate::util::map::{process_map, resolve_link_order, SymbolEntry, SymbolRef};
@@ -103,12 +103,7 @@ fn entries(args: EntriesArgs) -> Result<()> {
}
}
}
None => {
return Err(Error::msg(format!(
"Failed to find entries for TU '{}' in map",
args.unit
)));
}
None => bail!("Failed to find entries for TU '{}' in map", args.unit),
}
Ok(())
}
@@ -122,9 +117,7 @@ fn symbol(args: SymbolArgs) -> Result<()> {
let mut opt_ref: Option<(SymbolRef, SymbolEntry)> = None;
for (symbol_ref, entry) in &entries.symbols {
if symbol_ref.name == args.symbol {
if opt_ref.is_some() {
return Err(Error::msg(format!("Symbol '{}' found in multiple TUs", args.symbol)));
}
ensure!(opt_ref.is_none(), "Symbol '{}' found in multiple TUs", args.symbol);
opt_ref = Some((symbol_ref.clone(), entry.clone()));
}
}
@@ -140,7 +133,7 @@ fn symbol(args: SymbolArgs) -> Result<()> {
reference.demangled.as_ref().unwrap_or(&reference.name),
reference.kind,
reference.visibility,
reference.unit
reference.unit.as_deref().unwrap_or("[generated]")
);
} else {
println!(">>> {} (NOT FOUND)", x.name);
@@ -156,7 +149,7 @@ fn symbol(args: SymbolArgs) -> Result<()> {
reference.demangled.as_ref().unwrap_or(&reference.name),
reference.kind,
reference.visibility,
reference.unit
reference.unit.as_deref().unwrap_or("[generated]")
);
} else {
println!(">>> {} (NOT FOUND)", x.name);
@@ -165,9 +158,7 @@ fn symbol(args: SymbolArgs) -> Result<()> {
}
println!("\n");
}
None => {
return Err(Error::msg(format!("Failed to find symbol '{}' in map", args.symbol)));
}
None => bail!("Failed to find symbol '{}' in map", args.symbol),
}
Ok(())
}
@@ -201,19 +192,19 @@ fn slices(args: SlicesArgs) -> Result<()> {
unit.clone()
};
println!("{unit_path}:");
let mut ranges = Vec::<(String, Range<u32>)>::new();
match entries.unit_section_ranges.get(&unit) {
Some(sections) => {
for (name, range) in sections {
ranges.push((name.clone(), range.clone()));
}
}
None => return Err(Error::msg(format!("Failed to locate sections for unit '{unit}'"))),
}
ranges.sort_by(|(_, a), (_, b)| a.start.cmp(&b.start));
for (name, range) in ranges {
println!("\t{}: [{:#010x}, {:#010x}]", name, range.start, range.end);
}
// let mut ranges = Vec::<(String, Range<u32>)>::new();
// match entries.unit_section_ranges.get(&unit) {
// Some(sections) => {
// for (name, range) in sections {
// ranges.push((name.clone(), range.clone()));
// }
// }
// None => bail!("Failed to locate sections for unit '{unit}'"),
// }
// ranges.sort_by(|(_, a), (_, b)| a.start.cmp(&b.start));
// for (name, range) in ranges {
// println!("\t{}: [{:#010x}, {:#010x}]", name, range.start, range.end);
// }
}
Ok(())
}
@@ -223,12 +214,12 @@ fn symbols(args: SymbolsArgs) -> Result<()> {
File::open(&args.map_file)
.with_context(|| format!("Failed to open file '{}'", args.map_file))?,
);
let entries = process_map(reader)?;
for (address, symbol) in entries.address_to_symbol {
if symbol.name.starts_with('@') {
continue;
}
println!("{:#010x}: {}", address, symbol.name);
}
let _entries = process_map(reader)?;
// for (address, symbol) in entries.address_to_symbol {
// if symbol.name.starts_with('@') {
// continue;
// }
// println!("{:#010x}: {}", address, symbol.name);
// }
Ok(())
}

View File

@@ -1,4 +1,4 @@
use anyhow::{Context, Error, Result};
use anyhow::{bail, ensure, Context, Result};
use argh::FromArgs;
use memchr::memmem;
use memmap2::MmapOptions;
@@ -23,11 +23,10 @@ pub fn run(args: Args) -> Result<()> {
.with_context(|| format!("Failed to read build info string from '{}'", args.build_info))?;
let build_string_trim = build_string.trim_end();
let build_string_bytes = build_string_trim.as_bytes();
if build_string_bytes.len() > BUILD_STRING_MAX {
return Err(Error::msg(format!(
"Build string '{build_string_trim}' is greater than maximum size of {BUILD_STRING_MAX}"
)));
}
ensure!(
build_string_bytes.len() <= BUILD_STRING_MAX,
"Build string '{build_string_trim}' is greater than maximum size of {BUILD_STRING_MAX}"
);
let binary_file = std::fs::File::options()
.read(true)
@@ -38,7 +37,7 @@ pub fn run(args: Args) -> Result<()> {
.with_context(|| format!("Failed to mmap binary: '{}'", args.binary))?;
let start = match memmem::find(&map, BUILD_STRING_TAG.as_bytes()) {
Some(idx) => idx + BUILD_STRING_TAG.as_bytes().len(),
None => return Err(Error::msg("Failed to find build string tag in binary")),
None => bail!("Failed to find build string tag in binary"),
};
let end = start + build_string_bytes.len();
map[start..end].copy_from_slice(build_string_bytes);

View File

@@ -1,7 +1,9 @@
pub(crate) mod ar;
pub(crate) mod demangle;
pub(crate) mod dol;
pub(crate) mod elf;
pub(crate) mod elf2dol;
pub(crate) mod map;
pub(crate) mod metroidbuildinfo;
pub(crate) mod rel;
pub(crate) mod shasum;

312
src/cmd/rel.rs Normal file
View File

@@ -0,0 +1,312 @@
use std::{
collections::{btree_map, BTreeMap},
fs::File,
io::{BufWriter, Write},
path::PathBuf,
};
use anyhow::{anyhow, bail, ensure, Context, Result};
use argh::FromArgs;
use crate::{
cmd::dol::apply_signatures,
util::{
dol::process_dol,
elf::write_elf,
obj::{ObjInfo, ObjSection, ObjSymbol},
rel::process_rel,
},
};
use crate::util::cfa::{AnalysisPass, AnalyzerState, FindSaveRestSleds, FindTRKInterruptVectorTable};
use crate::util::obj::{nested_push, ObjReloc, ObjRelocKind, ObjSectionKind, ObjSymbolKind};
use crate::util::tracker::Tracker;
#[derive(FromArgs, PartialEq, Debug)]
/// Commands for processing REL files.
#[argh(subcommand, name = "rel")]
pub struct Args {
#[argh(subcommand)]
command: SubCommand,
}
#[derive(FromArgs, PartialEq, Debug)]
#[argh(subcommand)]
enum SubCommand {
Info(InfoArgs),
Merge(MergeArgs),
}
#[derive(FromArgs, PartialEq, Eq, Debug)]
/// Views REL file information.
#[argh(subcommand, name = "info")]
pub struct InfoArgs {
#[argh(positional)]
/// REL file
rel_file: PathBuf,
}
#[derive(FromArgs, PartialEq, Eq, Debug)]
/// Merges a DOL + REL(s) into an ELF.
#[argh(subcommand, name = "merge")]
pub struct MergeArgs {
#[argh(positional)]
/// DOL file
dol_file: PathBuf,
#[argh(positional)]
/// REL file(s)
rel_files: Vec<PathBuf>,
#[argh(option, short = 'o')]
/// output ELF
out_file: PathBuf,
}
pub fn run(args: Args) -> Result<()> {
match args.command {
SubCommand::Info(c_args) => info(c_args),
SubCommand::Merge(c_args) => merge(c_args),
}
}
fn info(args: InfoArgs) -> Result<()> {
let rel = process_rel(&args.rel_file)?;
println!("Read REL module ID {}", rel.module_id);
// println!("REL: {:#?}", rel);
Ok(())
}
#[inline]
const fn align32(x: u32) -> u32 { (x + 31) & !31 }
fn merge(args: MergeArgs) -> Result<()> {
let mut module_map = BTreeMap::<u32, ObjInfo>::new();
log::info!("Loading {}", args.dol_file.display());
let mut obj = process_dol(&args.dol_file)?;
apply_signatures(&mut obj)?;
for path in &args.rel_files {
log::info!("Loading {}", path.display());
let obj = process_rel(path)?;
match module_map.entry(obj.module_id) {
btree_map::Entry::Vacant(e) => e.insert(obj),
btree_map::Entry::Occupied(_) => bail!("Duplicate module ID {}", obj.module_id),
};
}
let mut section_map: BTreeMap<u32, BTreeMap<u32, u32>> = BTreeMap::new();
let mut offset = align32(obj.arena_lo.unwrap() + 0x2000);
for (_, module) in &module_map {
for mod_section in &module.sections {
let section_idx = obj.sections.len();
ensure!(mod_section.relocations.is_empty(), "Unsupported relocations during merge");
obj.sections.push(ObjSection {
name: format!("{}:{}", mod_section.name, module.module_id),
kind: mod_section.kind,
address: offset as u64,
size: mod_section.size,
data: mod_section.data.clone(),
align: mod_section.align,
index: section_idx,
elf_index: mod_section.elf_index,
relocations: vec![],
original_address: mod_section.original_address,
file_offset: mod_section.file_offset,
section_known: mod_section.section_known,
});
nested_try_insert(
&mut section_map,
module.module_id,
mod_section.elf_index as u32,
offset,
)?;
let symbols = module.symbols_for_section(mod_section.index);
for (_, mod_symbol) in symbols {
obj.symbols.push(ObjSymbol {
name: mod_symbol.name.clone(),
demangled_name: mod_symbol.demangled_name.clone(),
address: mod_symbol.address + offset as u64,
section: Some(section_idx),
size: mod_symbol.size,
size_known: mod_symbol.size_known,
flags: mod_symbol.flags,
kind: mod_symbol.kind,
});
}
offset += align32(mod_section.size as u32);
}
}
let mut symbol_maps = Vec::new();
for section in &obj.sections {
symbol_maps.push(obj.build_symbol_map(section.index)?);
}
// Apply relocations
for (_, module) in &module_map {
for rel_reloc in &module.unresolved_relocations {
let source_addr =
section_map[&module.module_id][&(rel_reloc.section as u32)] + rel_reloc.address;
let target_addr = if rel_reloc.module_id == 0 {
rel_reloc.addend
} else {
let base = section_map[&rel_reloc.module_id][&(rel_reloc.target_section as u32)];
let addend = rel_reloc.addend;
base + addend
};
let source_section = obj.section_at(source_addr)?;
let target_section = obj.section_at(target_addr)?;
let target_section_index = target_section.index;
// Try to find a previous sized symbol that encompasses the target
let sym_map = &mut symbol_maps[target_section_index];
let target_symbol = {
let mut result = None;
for (&addr, symbol_idxs) in sym_map.range(..=target_addr).rev() {
let symbol_idx = if symbol_idxs.len() == 1 {
symbol_idxs.first().cloned().unwrap()
} else {
let mut symbol_idxs = symbol_idxs.clone();
symbol_idxs.sort_by_key(|&symbol_idx| {
let symbol = &obj.symbols[symbol_idx];
let mut rank = match symbol.kind {
ObjSymbolKind::Function | ObjSymbolKind::Object => {
match rel_reloc.kind {
ObjRelocKind::PpcAddr16Hi
| ObjRelocKind::PpcAddr16Ha
| ObjRelocKind::PpcAddr16Lo => 1,
ObjRelocKind::Absolute
| ObjRelocKind::PpcRel24
| ObjRelocKind::PpcRel14
| ObjRelocKind::PpcEmbSda21 => 2,
}
}
// Label
ObjSymbolKind::Unknown => match rel_reloc.kind {
ObjRelocKind::PpcAddr16Hi
| ObjRelocKind::PpcAddr16Ha
| ObjRelocKind::PpcAddr16Lo
if !symbol.name.starts_with("..") =>
{
3
}
_ => 1,
},
ObjSymbolKind::Section => -1,
};
if symbol.size > 0 {
rank += 1;
}
-rank
});
match symbol_idxs.first().cloned() {
Some(v) => v,
None => continue,
}
};
let symbol = &obj.symbols[symbol_idx];
if symbol.address == target_addr as u64 {
result = Some(symbol_idx);
break;
}
if symbol.size > 0 {
if symbol.address + symbol.size > target_addr as u64 {
result = Some(symbol_idx);
}
break;
}
}
result
};
let (symbol_idx, addend) = if let Some(symbol_idx) = target_symbol {
let symbol = &obj.symbols[symbol_idx];
(symbol_idx, target_addr as i64 - symbol.address as i64)
} else {
// Create a new label
let symbol_idx = obj.symbols.len();
obj.symbols.push(ObjSymbol {
name: String::new(),
demangled_name: None,
address: target_addr as u64,
section: Some(target_section_index),
size: 0,
size_known: false,
flags: Default::default(),
kind: Default::default(),
});
nested_push(sym_map, target_addr, symbol_idx);
(symbol_idx, 0)
};
obj.sections[target_section_index].relocations.push(ObjReloc {
kind: rel_reloc.kind,
address: source_addr as u64,
target_symbol: symbol_idx,
addend,
});
}
}
// Apply known functions from extab
let mut state = AnalyzerState::default();
for (&addr, &size) in &obj.known_functions {
state.function_entries.insert(addr);
state.function_bounds.insert(addr, addr + size);
}
for symbol in &obj.symbols {
if symbol.kind != ObjSymbolKind::Function {
continue;
}
state.function_entries.insert(symbol.address as u32);
if !symbol.size_known {
continue;
}
state.function_bounds.insert(symbol.address as u32, (symbol.address + symbol.size) as u32);
}
// Also check the start of each code section
for section in &obj.sections {
if section.kind == ObjSectionKind::Code {
state.function_entries.insert(section.address as u32);
}
}
state.detect_functions(&obj)?;
log::info!("Discovered {} functions", state.function_slices.len());
FindTRKInterruptVectorTable::execute(&mut state, &obj)?;
FindSaveRestSleds::execute(&mut state, &obj)?;
state.apply(&mut obj)?;
log::info!("Performing relocation analysis");
let mut tracker = Tracker::new(&obj);
tracker.process(&obj)?;
log::info!("Applying relocations");
tracker.apply(&mut obj, false)?;
// Write ELF
let mut file = File::create(&args.out_file)
.with_context(|| format!("Failed to create '{}'", args.out_file.display()))?;
let out_object = write_elf(&obj)?;
file.write_all(&out_object)?;
file.flush()?;
Ok(())
}
#[inline]
fn nested_try_insert<T1, T2, T3>(
map: &mut BTreeMap<T1, BTreeMap<T2, T3>>,
v1: T1,
v2: T2,
v3: T3,
) -> Result<()>
where
T1: Eq + Ord,
T2: Eq + Ord,
{
let map = match map.entry(v1) {
btree_map::Entry::Occupied(entry) => entry.into_mut(),
btree_map::Entry::Vacant(entry) => entry.insert(Default::default()),
};
match map.entry(v2) {
btree_map::Entry::Occupied(_) => bail!("Entry already exists"),
btree_map::Entry::Vacant(entry) => entry.insert(v3),
};
Ok(())
}

View File

@@ -4,7 +4,7 @@ use std::{
path::Path,
};
use anyhow::{Context, Error, Result};
use anyhow::{anyhow, bail, Context, Result};
use argh::FromArgs;
use filetime::{set_file_mtime, FileTime};
use sha1::{Digest, Sha1};
@@ -42,13 +42,13 @@ fn check(args: Args, file: File) -> Result<()> {
for line in reader.lines() {
let line = match line {
Ok(line) => line,
Err(e) => return Err(Error::msg(format!("File read failed: {e}"))),
Err(e) => bail!("File read failed: {e}"),
};
let (hash, file_name) =
line.split_once(' ').ok_or_else(|| Error::msg(format!("Invalid line: {line}")))?;
line.split_once(' ').ok_or_else(|| anyhow!("Invalid line: {line}"))?;
let file_name = match file_name.chars().next() {
Some(' ') | Some('*') => &file_name[1..],
_ => return Err(Error::msg(format!("Invalid line: {line}"))),
_ => bail!("Invalid line: {line}"),
};
let mut hash_bytes = [0u8; 20];
hex::decode_to_slice(hash, &mut hash_bytes)
@@ -78,7 +78,7 @@ fn hash(args: Args, file: File) -> Result<()> {
let hash = file_sha1(file)?;
let mut hash_buf = [0u8; 40];
let hash_str = base16ct::lower::encode_str(&hash, &mut hash_buf)
.map_err(|e| Error::msg(format!("Failed to encode hash: {e}")))?;
.map_err(|e| anyhow!("Failed to encode hash: {e}"))?;
println!("{} {}", hash_str, args.file);
Ok(())
}

View File

@@ -1,5 +1,3 @@
extern crate core;
use argh::FromArgs;
mod argh_version;
@@ -18,28 +16,32 @@ struct TopLevel {
enum SubCommand {
Ar(cmd::ar::Args),
Demangle(cmd::demangle::Args),
Dol(cmd::dol::Args),
Elf(cmd::elf::Args),
Elf2Dol(cmd::elf2dol::Args),
Map(cmd::map::Args),
MetroidBuildInfo(cmd::metroidbuildinfo::Args),
Rel(cmd::rel::Args),
Shasum(cmd::shasum::Args),
}
fn main() {
pretty_env_logger::init();
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
let args: TopLevel = argh_version::from_env();
let result = match args.command {
SubCommand::Ar(c_args) => cmd::ar::run(c_args),
SubCommand::Demangle(c_args) => cmd::demangle::run(c_args),
SubCommand::Dol(c_args) => cmd::dol::run(c_args),
SubCommand::Elf(c_args) => cmd::elf::run(c_args),
SubCommand::Elf2Dol(c_args) => cmd::elf2dol::run(c_args),
SubCommand::Map(c_args) => cmd::map::run(c_args),
SubCommand::MetroidBuildInfo(c_args) => cmd::metroidbuildinfo::run(c_args),
SubCommand::Rel(c_args) => cmd::rel::run(c_args),
SubCommand::Shasum(c_args) => cmd::shasum::run(c_args),
};
if let Err(e) = result {
eprintln!("{e:?}");
eprintln!("Failed: {e:?}");
std::process::exit(1);
}
}

View File

@@ -4,7 +4,7 @@ use std::{
io::Write,
};
use anyhow::{Error, Result};
use anyhow::{anyhow, bail, ensure, Result};
use ppc750cl::{disasm_iter, Argument, Ins, Opcode};
use crate::util::obj::{
@@ -97,7 +97,11 @@ pub fn write_asm<W: Write>(w: &mut W, obj: &ObjInfo) -> Result<()> {
}
if let Some(symbol_idx) = target_symbol_idx {
relocations.insert(ins.addr, ObjReloc {
kind: ObjRelocKind::PpcRel24,
kind: match ins.op {
Opcode::B => ObjRelocKind::PpcRel24,
Opcode::Bc => ObjRelocKind::PpcRel14,
_ => unreachable!(),
},
address: ins.addr as u64,
target_symbol: symbol_idx,
addend: 0,
@@ -325,9 +329,7 @@ fn write_symbol_entry<W: Write>(
ObjSymbolKind::Function => "fn",
ObjSymbolKind::Object => "obj",
ObjSymbolKind::Unknown => "sym",
ObjSymbolKind::Section => {
return Err(Error::msg(format!("Attempted to write section symbol: {symbol:?}")))
}
ObjSymbolKind::Section => bail!("Attempted to write section symbol: {symbol:?}"),
};
let scope = if symbol.flags.0.contains(ObjSymbolFlags::Weak) {
"weak"
@@ -447,14 +449,13 @@ fn write_data<W: Write>(
let data = &section.data[(current_address - section.address as u32) as usize
..(until - section.address as u32) as usize];
if symbol_kind == ObjSymbolKind::Function {
if current_address & 3 != 0 || data.len() & 3 != 0 {
return Err(Error::msg(format!(
"Unaligned code write @ {} {:#010X} size {:#X}",
section.name,
current_address,
data.len()
)));
}
ensure!(
current_address & 3 == 0 && data.len() & 3 == 0,
"Unaligned code write @ {} {:#010X} size {:#X}",
section.name,
current_address,
data.len()
);
write_code_chunk(w, symbols, entries, relocations, section, current_address, data)?;
} else {
write_data_chunk(w, data)?;
@@ -476,11 +477,10 @@ fn find_symbol_kind(
SymbolEntryKind::Start => {
let new_kind = symbols[entry.index].kind;
if !matches!(new_kind, ObjSymbolKind::Unknown | ObjSymbolKind::Section) {
if found && new_kind != kind {
return Err(Error::msg(format!(
"Conflicting symbol kinds found: {kind:?} and {new_kind:?}"
)));
}
ensure!(
!found || new_kind == kind,
"Conflicting symbol kinds found: {kind:?} and {new_kind:?}"
);
kind = new_kind;
found = true;
}
@@ -546,7 +546,11 @@ fn write_data_reloc<W: Write>(
writeln!(w)?;
Ok((reloc.address + 4) as u32)
}
_ => Err(Error::msg(format!("Unsupported data relocation type {:?}", reloc.kind))),
_ => Err(anyhow!(
"Unsupported data relocation type {:?} @ {:#010X}",
reloc.kind,
reloc.address
)),
}
}
@@ -634,7 +638,13 @@ fn write_section_header<W: Write>(
write!(w, ".section {}", section.name)?;
write!(w, ", \"a\"")?;
}
name => return Err(Error::msg(format!("Unknown section {name}"))),
name => {
log::warn!("Unknown section {name}");
write!(w, ".section {}", section.name)?;
if section.kind == ObjSectionKind::Bss {
write!(w, ", \"\", @nobits")?;
}
}
};
if subsection != 0 {
write!(w, ", unique, {subsection}")?;

511
src/util/cfa.rs Normal file
View File

@@ -0,0 +1,511 @@
use std::{
collections::{btree_map::Entry, BTreeMap, BTreeSet},
num::NonZeroU32,
ops::Range,
};
use anyhow::{anyhow, bail, ensure, Context, Result};
use fixedbitset::FixedBitSet;
use flagset::FlagSet;
use ppc750cl::{Argument, Ins, Opcode, GPR};
use crate::util::{
executor::{disassemble, ExecCbData, ExecCbResult, Executor},
obj::{
ObjInfo, ObjSection, ObjSectionKind, ObjSymbol, ObjSymbolFlagSet, ObjSymbolFlags,
ObjSymbolKind,
},
slices::{FunctionSlices, TailCallResult},
vm::{BranchTarget, GprValue, StepResult, VM},
};
#[derive(Debug, Default)]
pub struct AnalyzerState {
pub sda_bases: Option<(u32, u32)>,
pub function_entries: BTreeSet<u32>,
pub function_bounds: BTreeMap<u32, u32>,
pub function_slices: BTreeMap<u32, FunctionSlices>,
pub jump_tables: BTreeMap<u32, u32>,
pub known_symbols: BTreeMap<u32, ObjSymbol>,
pub non_finalized_functions: BTreeMap<u32, FunctionSlices>,
}
impl AnalyzerState {
pub fn apply(&self, obj: &mut ObjInfo) -> Result<()> {
for (&start, &end) in &self.function_bounds {
if end == 0 {
continue;
}
if let Some(existing_symbol) = obj
.symbols
.iter_mut()
.find(|sym| sym.address == start as u64 && sym.kind == ObjSymbolKind::Function)
{
let new_size = (end - start) as u64;
if !existing_symbol.size_known || existing_symbol.size == 0 {
existing_symbol.size = new_size;
existing_symbol.size_known = true;
} else if existing_symbol.size != new_size {
log::warn!(
"Conflicting size for {}: was {:#X}, now {:#X}",
existing_symbol.name,
existing_symbol.size,
new_size
);
}
continue;
}
let section = obj
.sections
.iter()
.find(|section| {
(start as u64) >= section.address
&& (end as u64) <= section.address + section.size
})
.ok_or_else(|| {
anyhow!("Failed to locate section for function {:#010X}-{:#010X}", start, end)
})?;
obj.symbols.push(ObjSymbol {
name: format!("fn_{:08X}", start),
demangled_name: None,
address: start as u64,
section: Some(section.index),
size: (end - start) as u64,
size_known: true,
flags: Default::default(),
kind: ObjSymbolKind::Function,
});
}
for (&addr, &size) in &self.jump_tables {
let section = obj
.sections
.iter()
.find(|section| {
(addr as u64) >= section.address
&& ((addr + size) as u64) <= section.address + section.size
})
.ok_or_else(|| anyhow!("Failed to locate section for jump table"))?;
if let Some(existing_symbol) = obj
.symbols
.iter_mut()
.find(|sym| sym.address == addr as u64 && sym.kind == ObjSymbolKind::Object)
{
let new_size = size as u64;
if !existing_symbol.size_known || existing_symbol.size == 0 {
existing_symbol.size = new_size;
existing_symbol.size_known = true;
// existing_symbol.flags.0 &= ObjSymbolFlags::Global;
// existing_symbol.flags.0 |= ObjSymbolFlags::Local;
} else if existing_symbol.size != new_size {
log::warn!(
"Conflicting size for {}: was {:#X}, now {:#X}",
existing_symbol.name,
existing_symbol.size,
new_size
);
}
continue;
}
obj.symbols.push(ObjSymbol {
name: format!("jumptable_{:08X}", addr),
demangled_name: None,
address: addr as u64,
section: Some(section.index),
size: size as u64,
size_known: true,
flags: ObjSymbolFlagSet(ObjSymbolFlags::Local.into()),
kind: ObjSymbolKind::Object,
});
}
for (&_addr, symbol) in &self.known_symbols {
if let Some(existing_symbol) = obj
.symbols
.iter_mut()
.find(|e| symbol.address == e.address && symbol.kind == e.kind)
{
*existing_symbol = symbol.clone();
continue;
}
obj.symbols.push(symbol.clone());
}
Ok(())
}
pub fn detect_functions(&mut self, obj: &ObjInfo) -> Result<()> {
// Process known functions first
let known_functions = self.function_entries.clone();
for addr in known_functions {
self.process_function_at(obj, addr)?;
}
// Locate entry function bounds
self.process_function_at(obj, obj.entry as u32)?;
// Locate bounds for referenced functions until none are left
self.process_functions(obj)?;
// Final pass(es)
while self.finalize_functions(obj, true)? {
self.process_functions(obj)?;
}
Ok(())
}
fn finalize_functions(&mut self, obj: &ObjInfo, finalize: bool) -> Result<bool> {
let mut finalized = Vec::new();
for (&addr, slices) in &mut self.non_finalized_functions {
// log::info!("Trying to finalize {:#010X}", addr);
let function_start = slices.start();
let function_end = slices.end();
let mut current = 0;
while let Some(&block) = slices.possible_blocks.range(current + 4..).next() {
current = block;
match slices.check_tail_call(
obj,
block,
function_start,
function_end,
&self.function_entries,
) {
TailCallResult::Not => {
log::trace!("Finalized block @ {:#010X}", block);
slices.possible_blocks.remove(&block);
slices.analyze(
obj,
block,
function_start,
Some(function_end),
&self.function_entries,
)?;
}
TailCallResult::Is => {
log::trace!("Finalized tail call @ {:#010X}", block);
slices.possible_blocks.remove(&block);
slices.function_references.insert(block);
}
TailCallResult::Possible => {
if finalize {
log::trace!(
"Still couldn't determine {:#010X}, assuming non-tail-call",
block
);
slices.possible_blocks.remove(&block);
slices.analyze(
obj,
block,
function_start,
Some(function_end),
&self.function_entries,
)?;
}
}
}
}
if slices.can_finalize() {
log::trace!("Finalizing {:#010X}", addr);
slices.finalize(obj, &self.function_entries)?;
self.function_entries.append(&mut slices.function_references.clone());
self.jump_tables.append(&mut slices.jump_table_references.clone());
let end = slices.end();
self.function_bounds.insert(addr, end);
self.function_slices.insert(addr, slices.clone());
finalized.push(addr);
}
}
let finalized_new = !finalized.is_empty();
for addr in finalized {
self.non_finalized_functions.remove(&addr);
}
Ok(finalized_new)
}
fn first_unbounded_function(&self) -> Option<u32> {
let mut entries_iter = self.function_entries.iter().cloned();
let mut bounds_iter = self.function_bounds.keys().cloned();
let mut entry = entries_iter.next();
let mut bound = bounds_iter.next();
loop {
match (entry, bound) {
(Some(a), Some(b)) => {
if b < a {
bound = bounds_iter.next();
continue;
} else if a != b {
if self.non_finalized_functions.contains_key(&a) {
entry = entries_iter.next();
continue;
} else {
break Some(a);
}
}
}
(Some(a), None) => {
if self.non_finalized_functions.contains_key(&a) {
entry = entries_iter.next();
continue;
} else {
break Some(a);
}
}
_ => break None,
}
entry = entries_iter.next();
bound = bounds_iter.next();
}
}
fn process_functions(&mut self, obj: &ObjInfo) -> Result<()> {
loop {
match self.first_unbounded_function() {
Some(addr) => {
log::trace!("Processing {:#010X}", addr);
self.process_function_at(&obj, addr)?;
}
None => {
if !self.finalize_functions(obj, false)? {
if !self.detect_new_functions(obj)? {
break;
}
}
}
}
}
Ok(())
}
pub fn process_function_at(&mut self, obj: &ObjInfo, addr: u32) -> Result<bool> {
if addr == 0 || addr == 0xFFFFFFFF {
log::warn!("Tried to detect @ {:#010X}", addr);
self.function_bounds.insert(addr, 0);
return Ok(false);
}
Ok(if let Some(mut slices) = self.process_function(obj, addr)? {
self.function_entries.insert(addr);
self.function_entries.append(&mut slices.function_references.clone());
self.jump_tables.append(&mut slices.jump_table_references.clone());
if slices.can_finalize() {
slices.finalize(obj, &self.function_entries)?;
self.function_bounds.insert(addr, slices.end());
self.function_slices.insert(addr, slices);
} else {
self.non_finalized_functions.insert(addr, slices);
}
true
} else {
log::debug!("Not a function @ {:#010X}", addr);
self.function_bounds.insert(addr, 0);
false
})
}
fn process_function(&mut self, obj: &ObjInfo, start: u32) -> Result<Option<FunctionSlices>> {
let mut slices = FunctionSlices::default();
let function_end = self.function_bounds.get(&start).cloned();
if start == 0x801FC300 {
log::info!("Processing TRKExceptionHandler");
}
Ok(match slices.analyze(obj, start, start, function_end, &self.function_entries)? {
true => Some(slices),
false => None,
})
}
fn detect_new_functions(&mut self, obj: &ObjInfo) -> Result<bool> {
let mut found_new = false;
let mut iter = self.function_bounds.iter().peekable();
while let (Some((&first_begin, &first_end)), Some(&(&second_begin, &second_end))) =
(iter.next(), iter.peek())
{
if first_end == 0 || first_end > second_begin {
continue;
}
let addr = match skip_alignment(obj, first_end, second_begin) {
Some(addr) => addr,
None => continue,
};
if second_begin > addr && self.function_entries.insert(addr) {
log::trace!(
"Trying function @ {:#010X} (from {:#010X}-{:#010X} <-> {:#010X}-{:#010X})",
addr,
first_begin,
first_end,
second_begin,
second_end,
);
found_new = true;
}
}
Ok(found_new)
}
}
pub trait AnalysisPass {
fn execute(state: &mut AnalyzerState, obj: &ObjInfo) -> Result<()>;
}
pub struct FindTRKInterruptVectorTable {}
pub const TRK_TABLE_HEADER: &str = "Metrowerks Target Resident Kernel for PowerPC";
pub const TRK_TABLE_SIZE: u32 = 0x1F34; // always?
// TRK_MINNOW_DOLPHIN.a __exception.s
impl AnalysisPass for FindTRKInterruptVectorTable {
fn execute(state: &mut AnalyzerState, obj: &ObjInfo) -> Result<()> {
for (&start, _) in state.function_bounds.iter().filter(|&(_, &end)| end == 0) {
let (section, data) = match obj.section_data(start, 0) {
Ok((section, data)) => (section, data),
Err(_) => continue,
};
if data.starts_with(TRK_TABLE_HEADER.as_bytes())
&& data[TRK_TABLE_HEADER.as_bytes().len()] == 0
{
log::info!("Found gTRKInterruptVectorTable @ {:#010X}", start);
state.known_symbols.insert(start, ObjSymbol {
name: "gTRKInterruptVectorTable".to_string(),
demangled_name: None,
address: start as u64,
section: Some(section.index),
size: 0,
size_known: true,
flags: ObjSymbolFlagSet(FlagSet::from(ObjSymbolFlags::Global)),
kind: ObjSymbolKind::Unknown,
});
let end = start + TRK_TABLE_SIZE;
state.known_symbols.insert(end, ObjSymbol {
name: "gTRKInterruptVectorTableEnd".to_string(),
demangled_name: None,
address: end as u64,
section: Some(section.index),
size: 0,
size_known: true,
flags: ObjSymbolFlagSet(FlagSet::from(ObjSymbolFlags::Global)),
kind: ObjSymbolKind::Unknown,
});
return Ok(());
}
}
log::info!("gTRKInterruptVectorTable not found");
Ok(())
}
}
pub struct FindSaveRestSleds {}
const SLEDS: [([u8; 4], &'static str, &'static str); 4] = [
([0xd9, 0xcb, 0xff, 0x70], "__save_fpr", "_savefpr_"),
([0xc9, 0xcb, 0xff, 0x70], "__restore_fpr", "_restfpr_"),
([0x91, 0xcb, 0xff, 0xb8], "__save_gpr", "_savegpr_"),
([0x81, 0xcb, 0xff, 0xb8], "__restore_gpr", "_restgpr_"),
];
// Runtime.PPCEABI.H.a runtime.c
impl AnalysisPass for FindSaveRestSleds {
fn execute(state: &mut AnalyzerState, obj: &ObjInfo) -> Result<()> {
const SLED_SIZE: usize = 19 * 4; // registers 14-31 + blr
let mut clear_ranges: Vec<Range<u32>> = vec![];
for (&start, _) in state.function_bounds.iter().filter(|&(_, &end)| end != 0) {
let (section, data) = obj.section_data(start, 0)?;
for (needle, func, label) in &SLEDS {
if data.starts_with(needle) {
log::info!("Found {} @ {:#010X}", func, start);
clear_ranges.push(start + 4..start + SLED_SIZE as u32);
state.known_symbols.insert(start, ObjSymbol {
name: func.to_string(),
demangled_name: None,
address: start as u64,
section: Some(section.index),
size: SLED_SIZE as u64,
size_known: true,
flags: ObjSymbolFlagSet(ObjSymbolFlags::Global.into()),
kind: ObjSymbolKind::Function,
});
for i in 14..=31 {
let addr = start + (i - 14) * 4;
state.known_symbols.insert(addr, ObjSymbol {
name: format!("{}{}", label, i),
demangled_name: None,
address: addr as u64,
section: Some(section.index),
size: 0,
size_known: true,
flags: ObjSymbolFlagSet(ObjSymbolFlags::Global.into()),
kind: ObjSymbolKind::Unknown,
});
}
}
}
}
for range in clear_ranges {
for addr in range.step_by(4) {
state.function_entries.remove(&addr);
state.function_bounds.remove(&addr);
state.function_slices.remove(&addr);
}
}
Ok(())
}
}
fn skip_alignment(obj: &ObjInfo, mut addr: u32, end: u32) -> Option<u32> {
let mut data = match obj.section_data(addr, end) {
Ok((_, data)) => data,
Err(_) => return None,
};
loop {
if data.is_empty() {
break None;
}
if data[0..4] == [0u8; 4] {
addr += 4;
data = &data[4..];
} else {
break Some(addr);
}
}
}
/// Execute VM from entry point following branches and function calls
/// until SDA bases are initialized (__init_registers)
pub fn locate_sda_bases(obj: &mut ObjInfo) -> Result<bool> {
let mut executor = Executor::new(obj);
executor.push(obj.entry as u32, VM::new(), false);
let result =
executor.run(obj, |ExecCbData { executor, vm, result, section, ins, block_start }| {
match result {
StepResult::Continue | StepResult::LoadStore { .. } => {
return Ok(ExecCbResult::Continue);
}
StepResult::Illegal => bail!("Illegal instruction @ {:#010X}", ins.addr),
StepResult::Jump(target) => match target {
BranchTarget::Address(addr) => {
return Ok(ExecCbResult::Jump(addr));
}
_ => {}
},
StepResult::Branch(branches) => {
for branch in branches {
match branch.target {
BranchTarget::Address(addr) => {
executor.push(addr, branch.vm, false);
}
_ => {}
}
}
}
}
if let (GprValue::Constant(sda2_base), GprValue::Constant(sda_base)) =
(vm.gpr_value(2), vm.gpr_value(13))
{
return Ok(ExecCbResult::End((sda2_base, sda_base)));
}
Ok(ExecCbResult::EndBlock)
})?;
match result {
Some((sda2_base, sda_base)) => {
obj.sda2_base = Some(sda2_base);
obj.sda_base = Some(sda_base);
Ok(true)
}
None => Ok(false),
}
}

179
src/util/config.rs Normal file
View File

@@ -0,0 +1,179 @@
use std::{io::Write, num::ParseIntError, ops::BitAndAssign};
use anyhow::{anyhow, bail, Result};
use cwdemangle::{demangle, DemangleOptions};
use flagset::FlagSet;
use once_cell::sync::Lazy;
use regex::Regex;
use crate::util::obj::{
ObjInfo, ObjSectionKind, ObjSymbol, ObjSymbolFlagSet, ObjSymbolFlags, ObjSymbolKind,
};
fn parse_hex(s: &str) -> Result<u32, ParseIntError> {
u32::from_str_radix(s.trim_start_matches("0x"), 16)
}
pub fn parse_symbol_line(line: &str, obj: &ObjInfo) -> Result<Option<ObjSymbol>> {
static SYMBOL_LINE: Lazy<Regex> = Lazy::new(|| {
Regex::new(
"^\\s*(?P<name>[^\\s=]+)\\s*=\\s*(?:(?P<section>[A-Za-z0-9.]+):)?(?P<addr>[0-9A-Fa-fXx]+);(?:\\s*//\\s*(?P<attrs>.*))?$",
)
.unwrap()
});
static COMMENT_LINE: Lazy<Regex> = Lazy::new(|| Regex::new("^\\s*//.*$").unwrap());
if let Some(captures) = SYMBOL_LINE.captures(&line) {
let name = captures["name"].to_string();
let addr = parse_hex(&captures["addr"])?;
let demangled_name = demangle(&name, &DemangleOptions::default());
let mut symbol = ObjSymbol {
name,
demangled_name,
address: addr as u64,
section: obj.section_at(addr).ok().map(|section| section.index),
size: 0,
size_known: false,
flags: Default::default(),
kind: ObjSymbolKind::Unknown,
};
let attrs = captures["attrs"].split(' ');
for attr in attrs {
if let Some((name, value)) = attr.split_once(':') {
match name {
"type" => {
symbol.kind = symbol_kind_from_str(value)
.ok_or_else(|| anyhow!("Unknown symbol type '{}'", value))?;
}
"size" => {
symbol.size = parse_hex(&value)? as u64;
symbol.size_known = true;
}
"scope" => {
symbol.flags.0 |= symbol_flags_from_str(value)
.ok_or_else(|| anyhow!("Unknown symbol scope '{}'", value))?;
}
_ => bail!("Unknown attribute '{name}'"),
}
} else {
match attr {
"hidden" => {
symbol.flags.0 |= ObjSymbolFlags::Hidden;
}
_ => bail!("Unknown attribute '{attr}'"),
}
}
}
Ok(Some(symbol))
} else if COMMENT_LINE.is_match(line) {
Ok(None)
} else {
Err(anyhow!("Failed to parse line '{line}'"))
}
}
fn is_skip_symbol(symbol: &ObjSymbol) -> bool {
// symbol.name.starts_with("lbl_")
// || symbol.name.starts_with("func_")
// || symbol.name.starts_with("switch_")
// || symbol.name.starts_with("float_")
// || symbol.name.starts_with("double_")
false
}
pub fn write_symbols<W: Write>(w: &mut W, obj: &ObjInfo) -> Result<()> {
let mut symbols: Vec<&ObjSymbol> = obj.symbols.iter().map(|s| s).collect();
symbols.sort_by_key(|s| s.address);
for symbol in symbols {
if symbol.kind == ObjSymbolKind::Section
// Ignore absolute symbols for now (usually linker-generated)
|| symbol.section.is_none()
|| is_skip_symbol(symbol)
{
continue;
}
write_symbol(w, obj, symbol)?;
}
Ok(())
}
fn write_symbol<W: Write>(w: &mut W, obj: &ObjInfo, symbol: &ObjSymbol) -> Result<()> {
// if let Some(demangled_name) = &symbol.demangled_name {
// writeln!(w, "// {demangled_name}")?;
// }
write!(w, "{} = ", symbol.name)?;
let section = symbol.section.and_then(|idx| obj.sections.get(idx));
if let Some(section) = section {
write!(w, "{}:", section.name)?;
}
write!(w, "{:#010X}; //", symbol.address)?;
write!(w, " type:{}", symbol_kind_to_str(symbol.kind))?;
// if let Some(section) = section {
// match section.kind {
// ObjSectionKind::Code => {
// write!(w, " type:function")?;
// }
// ObjSectionKind::Data | ObjSectionKind::ReadOnlyData | ObjSectionKind::Bss => {
// write!(w, " type:object")?;
// }
// }
// }
if symbol.size_known && symbol.size > 0 {
write!(w, " size:{:#X}", symbol.size)?;
}
if let Some(scope) = symbol_flags_to_str(symbol.flags) {
write!(w, " scope:{scope}")?;
}
if symbol.flags.0.contains(ObjSymbolFlags::Hidden) {
write!(w, " hidden")?;
}
writeln!(w)?;
Ok(())
}
#[inline]
fn symbol_kind_to_str(kind: ObjSymbolKind) -> &'static str {
match kind {
ObjSymbolKind::Unknown => "label",
ObjSymbolKind::Function => "function",
ObjSymbolKind::Object => "object",
ObjSymbolKind::Section => "section",
}
}
#[inline]
fn symbol_kind_from_str(s: &str) -> Option<ObjSymbolKind> {
match s {
"label" => Some(ObjSymbolKind::Unknown),
"function" => Some(ObjSymbolKind::Function),
"object" => Some(ObjSymbolKind::Object),
"section" => Some(ObjSymbolKind::Section),
_ => None,
}
}
#[inline]
fn symbol_flags_to_str(flags: ObjSymbolFlagSet) -> Option<&'static str> {
if flags.0.contains(ObjSymbolFlags::Common) {
Some("common")
} else if flags.0.contains(ObjSymbolFlags::Weak) {
Some("weak")
} else if flags.0.contains(ObjSymbolFlags::Global) {
Some("global")
} else if flags.0.contains(ObjSymbolFlags::Local) {
Some("local")
} else {
None
}
}
#[inline]
fn symbol_flags_from_str(s: &str) -> Option<ObjSymbolFlags> {
match s {
"common" => Some(ObjSymbolFlags::Common),
"weak" => Some(ObjSymbolFlags::Weak),
"global" => Some(ObjSymbolFlags::Global),
"local" => Some(ObjSymbolFlags::Local),
_ => None,
}
}

554
src/util/dol.rs Normal file
View File

@@ -0,0 +1,554 @@
use std::{collections::BTreeMap, fs::File, io::BufReader, path::Path};
use anyhow::{anyhow, bail, ensure, Result};
use dol::{Dol, DolSection, DolSectionType};
use crate::util::{
cfa::{locate_sda_bases, AnalyzerState},
obj::{
ObjArchitecture, ObjInfo, ObjKind, ObjSection, ObjSectionKind, ObjSymbol, ObjSymbolFlagSet,
ObjSymbolFlags, ObjSymbolKind,
},
};
const MAX_TEXT_SECTIONS: usize = 7;
const MAX_DATA_SECTIONS: usize = 11;
const MAX_ROM_COPY_INFO_SIZE: usize = (MAX_TEXT_SECTIONS + MAX_DATA_SECTIONS + 1) * 3 * 4; // num sections * 3 entries * u32
const MAX_BSS_INIT_INFO_SIZE: usize = (MAX_DATA_SECTIONS + 1) * 2 * 4; // num sections * 2 entries * u32
const ETI_INIT_INFO_SIZE: usize = 16; // eti_start, eti_end, code_start, code_size
fn read_u32(dol: &Dol, addr: u32) -> Result<u32> {
Ok(u32::from_be_bytes(dol.virtual_data_at(addr, 4)?.try_into()?))
}
pub fn process_dol<P: AsRef<Path>>(path: P) -> Result<ObjInfo> {
let name = path
.as_ref()
.file_name()
.and_then(|filename| filename.to_str())
.unwrap_or_default()
.to_string();
let dol = Dol::read_from(BufReader::new(File::open(path)?))?;
let mut obj = ObjInfo {
module_id: 0,
kind: ObjKind::Executable,
architecture: ObjArchitecture::PowerPc,
name,
symbols: vec![],
sections: vec![],
entry: dol.header.entry_point as u64,
sda2_base: None,
sda_base: None,
stack_address: None,
stack_end: None,
db_stack_addr: None,
arena_lo: None,
arena_hi: None,
splits: Default::default(),
link_order: vec![],
known_functions: Default::default(),
unresolved_relocations: vec![],
};
// Locate _rom_copy_info
let first_rom_section = dol
.header
.sections
.iter()
.find(|section| section.kind != DolSectionType::Bss)
.ok_or_else(|| anyhow!("Failed to locate first rom section"))?;
let init_section = section_by_address(&dol, dol.header.entry_point)
.ok_or_else(|| anyhow!("Failed to locate .init section"))?;
let rom_copy_info_addr = {
let mut addr = init_section.target + init_section.size
- MAX_ROM_COPY_INFO_SIZE as u32
- MAX_BSS_INIT_INFO_SIZE as u32;
loop {
let value = read_u32(&dol, addr)?;
if value == first_rom_section.target {
log::debug!("Found _rom_copy_info @ {addr:#010X}");
break Some(addr);
}
addr += 4;
if addr >= init_section.target + init_section.size {
log::warn!("Failed to locate _rom_copy_info");
break None;
}
}
};
// Process _rom_copy_info
let mut rom_sections = BTreeMap::<u32, u32>::new();
let rom_copy_info_end = match rom_copy_info_addr {
Some(mut addr) => loop {
let rom = read_u32(&dol, addr)?;
let copy = read_u32(&dol, addr + 4)?;
ensure!(
rom == copy,
"Unsupported section: ROM address {rom:#010X} != copy address {copy:#010X}",
);
let size = read_u32(&dol, addr + 8)?;
addr += 12;
if size == 0 {
log::debug!("Found _rom_copy_info end @ {addr:#010X}");
break Some(addr);
}
if addr >= init_section.target + init_section.size {
log::warn!("Failed to locate _rom_copy_info end");
break None;
}
rom_sections.insert(rom, size);
},
None => None,
};
// Locate _bss_init_info
let bss_section = dol
.header
.sections
.iter()
.find(|section| section.kind == DolSectionType::Bss)
.ok_or_else(|| anyhow!("Failed to locate BSS section"))?;
let bss_init_info_addr = match rom_copy_info_end {
Some(mut addr) => loop {
let value = read_u32(&dol, addr)?;
if value == bss_section.target {
log::debug!("Found _bss_init_info @ {addr:#010X}");
break Some(addr);
}
addr += 4;
if addr >= init_section.target + init_section.size {
log::warn!("Failed to locate _bss_init_info");
break None;
}
},
None => None,
};
// Process _bss_init_info
let mut bss_sections = BTreeMap::<u32, u32>::new();
let bss_init_info_end = match bss_init_info_addr {
Some(mut addr) => loop {
let rom = read_u32(&dol, addr)?;
let size = read_u32(&dol, addr + 4)?;
addr += 8;
if size == 0 {
log::debug!("Found _bss_init_info end @ {addr:#010X}");
break Some(addr);
}
if addr >= init_section.target + init_section.size {
log::warn!("Failed to locate _bss_init_info end");
break None;
}
bss_sections.insert(rom, size);
},
None => None,
};
// Locate _eti_init_info
let num_text_sections =
dol.header.sections.iter().filter(|section| section.kind == DolSectionType::Text).count();
let mut eti_entries: Vec<EtiEntry> = Vec::new();
let mut eti_init_info_range: Option<(u32, u32)> = None;
let mut extab_section: Option<usize> = None;
let mut extabindex_section: Option<usize> = None;
'outer: for dol_section in
dol.header.sections.iter().filter(|section| section.kind == DolSectionType::Data)
{
// Use section size from _rom_copy_info
let dol_section_size = match rom_sections.get(&dol_section.target) {
Some(&size) => size,
None => dol_section.size,
};
let dol_section_end = dol_section.target + dol_section_size;
let eti_init_info_addr = {
let mut addr = dol_section_end - (ETI_INIT_INFO_SIZE * (num_text_sections + 1)) as u32;
loop {
let eti_init_info = read_eti_init_info(&dol, addr)?;
if validate_eti_init_info(
&dol,
&eti_init_info,
dol_section,
dol_section_end,
&rom_sections,
)? {
log::debug!("Found _eti_init_info @ {addr:#010X}");
break addr;
}
addr += 4;
if addr > dol_section_end - ETI_INIT_INFO_SIZE as u32 {
continue 'outer;
}
}
};
let eti_init_info_end = {
let mut addr = eti_init_info_addr;
loop {
let eti_init_info = read_eti_init_info(&dol, addr)?;
addr += 16;
if eti_init_info.is_zero() {
break;
}
if addr > dol_section_end - ETI_INIT_INFO_SIZE as u32 {
bail!(
"Failed to locate _eti_init_info end (start @ {:#010X})",
eti_init_info_addr
);
}
if !validate_eti_init_info(
&dol,
&eti_init_info,
dol_section,
dol_section_end,
&rom_sections,
)? {
bail!("Invalid _eti_init_info entry: {:#010X?}", eti_init_info);
}
for addr in (eti_init_info.eti_start..eti_init_info.eti_end).step_by(12) {
let eti_entry = read_eti_entry(&dol, addr)?;
let entry_section =
section_by_address(&dol, eti_entry.extab_addr).ok_or_else(|| {
anyhow!(
"Failed to locate section for extab address {:#010X}",
eti_entry.extab_addr
)
})?;
if let Some(extab_section) = extab_section {
ensure!(
entry_section.index == extab_section,
"Mismatched sections for extabindex entries: {} != {}",
entry_section.index,
extab_section
);
} else {
extab_section = Some(entry_section.index);
}
eti_entries.push(eti_entry);
}
}
log::debug!("Found _eti_init_info end @ {addr:#010X}");
addr
};
eti_init_info_range = Some((eti_init_info_addr, eti_init_info_end));
extabindex_section = Some(dol_section.index);
break;
}
if eti_init_info_range.is_none() {
log::debug!("Failed to locate _eti_init_info");
}
// Add text and data sections
for dol_section in
dol.header.sections.iter().filter(|section| section.kind != DolSectionType::Bss)
{
let (name, kind, known) = match dol_section.index {
idx if idx == init_section.index => (".init".to_string(), ObjSectionKind::Code, true),
idx if Some(idx) == extab_section => {
("extab".to_string(), ObjSectionKind::ReadOnlyData, true)
}
idx if Some(idx) == extabindex_section => {
("extabindex".to_string(), ObjSectionKind::ReadOnlyData, true)
}
_ if num_text_sections == 2 && dol_section.kind == DolSectionType::Text => {
(".text".to_string(), ObjSectionKind::Code, true)
}
idx => match dol_section.kind {
DolSectionType::Text => (format!(".text{idx}"), ObjSectionKind::Code, false),
DolSectionType::Data => (format!(".data{idx}"), ObjSectionKind::Data, false),
DolSectionType::Bss => unreachable!(),
},
};
// Use section size from _rom_copy_info
let size = match rom_sections.get(&dol_section.target) {
Some(&size) => size,
None => {
if !rom_sections.is_empty() {
log::warn!(
"Section {} ({:#010X}) doesn't exist in _rom_copy_info",
dol_section.index,
dol_section.target
);
}
dol_section.size
}
};
obj.sections.push(ObjSection {
name,
kind,
address: dol_section.target as u64,
size: size as u64,
data: dol.virtual_data_at(dol_section.target, size)?.to_vec(),
align: 0,
index: 0,
elf_index: 0,
relocations: vec![],
original_address: 0,
file_offset: dol_section.offset as u64,
section_known: known,
});
}
// Add BSS sections from _bss_init_info
for (idx, (&addr, &size)) in bss_sections.iter().enumerate() {
ensure!(
addr >= bss_section.target
&& addr < bss_section.target + bss_section.size
&& addr + size <= bss_section.target + bss_section.size,
"Invalid BSS range {:#010X}-{:#010X} (DOL BSS: {:#010X}-{:#010X})",
addr,
addr + size,
bss_section.target,
bss_section.target + bss_section.size
);
obj.sections.push(ObjSection {
name: format!(".bss{}", idx),
kind: ObjSectionKind::Bss,
address: addr as u64,
size: size as u64,
data: vec![],
align: 0,
index: 0,
elf_index: 0,
relocations: vec![],
original_address: 0,
file_offset: 0,
section_known: false,
});
}
// Sort sections by address ascending
obj.sections.sort_by_key(|s| s.address);
// Apply section indices
let mut init_section_index = None;
for (idx, section) in obj.sections.iter_mut().enumerate() {
match section.name.as_str() {
".init" => {
init_section_index = Some(idx);
}
"extab" => {
extab_section = Some(idx);
}
"extabindex" => {
extabindex_section = Some(idx);
}
_ => {}
}
section.index = idx;
// Assume the original ELF section index is +1
// ELF files start with a NULL section
section.elf_index = idx + 1;
}
// Generate _rom_copy_info symbol
if let (Some(rom_copy_info_addr), Some(rom_copy_info_end)) =
(rom_copy_info_addr, rom_copy_info_end)
{
obj.symbols.push(ObjSymbol {
name: "_rom_copy_info".to_string(),
demangled_name: None,
address: rom_copy_info_addr as u64,
section: init_section_index,
size: (rom_copy_info_end - rom_copy_info_addr) as u64,
size_known: true,
flags: ObjSymbolFlagSet(ObjSymbolFlags::Global.into()),
kind: ObjSymbolKind::Object,
});
}
// Generate _bss_init_info symbol
if let (Some(bss_init_info_addr), Some(bss_init_info_end)) =
(bss_init_info_addr, bss_init_info_end)
{
obj.symbols.push(ObjSymbol {
name: "_bss_init_info".to_string(),
demangled_name: None,
address: bss_init_info_addr as u64,
section: init_section_index,
size: (bss_init_info_end - bss_init_info_addr) as u64,
size_known: true,
flags: ObjSymbolFlagSet(ObjSymbolFlags::Global.into()),
kind: ObjSymbolKind::Object,
});
}
// Generate _eti_init_info symbol
if let Some((eti_init_info_addr, eti_init_info_end)) = eti_init_info_range {
obj.symbols.push(ObjSymbol {
name: "_eti_init_info".to_string(),
demangled_name: None,
address: eti_init_info_addr as u64,
section: extabindex_section,
size: (eti_init_info_end - eti_init_info_addr) as u64,
size_known: true,
flags: ObjSymbolFlagSet(ObjSymbolFlags::Global.into()),
kind: ObjSymbolKind::Object,
});
}
// Generate symbols for extab & extabindex entries
if let (Some(extabindex_section_idx), Some(extab_section_idx)) =
(extabindex_section, extab_section)
{
let extabindex_section = &obj.sections[extabindex_section_idx];
let extab_section = &obj.sections[extab_section_idx];
for entry in &eti_entries {
// Add functions from extabindex entries as known function bounds
if let Some(old_value) = obj.known_functions.insert(entry.function, entry.function_size)
{
if old_value != entry.function_size {
log::warn!(
"Conflicting sizes for {:#010X}: {:#X} != {:#X}",
entry.function,
entry.function_size,
old_value
);
}
}
obj.symbols.push(ObjSymbol {
name: format!("@eti_{:08X}", entry.address),
demangled_name: None,
address: entry.address as u64,
section: Some(extabindex_section.index),
size: 12,
size_known: true,
flags: ObjSymbolFlagSet(ObjSymbolFlags::Local | ObjSymbolFlags::Hidden),
kind: ObjSymbolKind::Object,
});
}
let mut entry_iter = eti_entries.iter().peekable();
loop {
let (addr, size) = match (entry_iter.next(), entry_iter.peek()) {
(Some(a), Some(&b)) => (a.extab_addr, b.extab_addr - a.extab_addr),
(Some(a), None) => (
a.extab_addr,
(extabindex_section.address + extabindex_section.size) as u32 - a.extab_addr,
),
_ => break,
};
obj.symbols.push(ObjSymbol {
name: format!("@etb_{:08X}", addr),
demangled_name: None,
address: addr as u64,
section: Some(extab_section.index),
size: size as u64,
size_known: true,
flags: ObjSymbolFlagSet(ObjSymbolFlags::Local | ObjSymbolFlags::Hidden),
kind: ObjSymbolKind::Object,
});
}
}
// Locate _SDA2_BASE_ & _SDA_BASE_
let sda_bases = match locate_sda_bases(&mut obj) {
Ok(true) => {
let sda2_base = obj.sda2_base.unwrap();
let sda_base = obj.sda_base.unwrap();
obj.symbols.push(ObjSymbol {
name: "_SDA2_BASE_".to_string(),
demangled_name: None,
address: sda2_base as u64,
section: None,
size: 0,
size_known: false,
flags: ObjSymbolFlagSet(ObjSymbolFlags::Global.into()),
kind: ObjSymbolKind::Unknown,
});
obj.symbols.push(ObjSymbol {
name: "_SDA_BASE_".to_string(),
demangled_name: None,
address: sda_base as u64,
section: None,
size: 0,
size_known: false,
flags: ObjSymbolFlagSet(ObjSymbolFlags::Global.into()),
kind: ObjSymbolKind::Unknown,
});
}
Ok(false) => {
log::warn!("Unable to locate SDA bases");
}
Err(e) => {
log::warn!("Failed to locate SDA bases: {:?}", e);
}
};
Ok(obj)
}
#[derive(Debug, Clone)]
struct EtiInitInfo {
eti_start: u32,
eti_end: u32,
code_start: u32,
code_size: u32,
}
impl EtiInitInfo {
#[inline]
fn is_zero(&self) -> bool {
self.eti_start == 0 && self.eti_end == 0 && self.code_start == 0 && self.code_size == 0
}
}
#[derive(Debug, Clone)]
struct EtiEntry {
address: u32,
function: u32,
function_size: u32,
extab_addr: u32,
}
fn read_eti_init_info(dol: &Dol, addr: u32) -> Result<EtiInitInfo> {
let eti_start = read_u32(&dol, addr)?;
let eti_end = read_u32(&dol, addr + 4)?;
let code_start = read_u32(&dol, addr + 8)?;
let code_size = read_u32(&dol, addr + 12)?;
Ok(EtiInitInfo { eti_start, eti_end, code_start, code_size })
}
fn read_eti_entry(dol: &Dol, address: u32) -> Result<EtiEntry> {
let function = read_u32(&dol, address)?;
let function_size = read_u32(&dol, address + 4)?;
let extab_addr = read_u32(&dol, address + 8)?;
Ok(EtiEntry { address, function, function_size, extab_addr })
}
fn validate_eti_init_info(
dol: &Dol,
eti_init_info: &EtiInitInfo,
eti_section: &DolSection,
eti_section_end: u32,
rom_sections: &BTreeMap<u32, u32>,
) -> Result<bool> {
if eti_init_info.eti_start >= eti_section.target
&& eti_init_info.eti_start < eti_section_end
&& eti_init_info.eti_end >= eti_section.target
&& eti_init_info.eti_end < eti_section_end
{
if let Some(code_section) = section_by_address(&dol, eti_init_info.code_start) {
let code_section_size = match rom_sections.get(&code_section.target) {
Some(&size) => size,
None => code_section.size,
};
if eti_init_info.code_size <= code_section_size {
return Ok(true);
}
}
}
Ok(false)
}
fn section_by_address(dol: &Dol, addr: u32) -> Option<&DolSection> {
dol.header
.sections
.iter()
.find(|section| addr >= section.target && addr < section.target + section.size)
}

1118
src/util/dwarf.rs Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -1,57 +1,83 @@
use std::{
collections::{hash_map, BTreeMap, HashMap},
collections::{btree_map::Entry, hash_map, BTreeMap, HashMap},
fs::File,
io::Cursor,
path::Path,
};
use anyhow::{Context, Error, Result};
use anyhow::{anyhow, bail, ensure, Context, Result};
use cwdemangle::demangle;
use flagset::Flags;
use indexmap::IndexMap;
use memmap2::MmapOptions;
use object::{
elf::{
R_PPC_ADDR16_HA, R_PPC_ADDR16_HI, R_PPC_ADDR16_LO, R_PPC_ADDR32, R_PPC_EMB_SDA21,
R_PPC_REL14, R_PPC_REL24,
elf,
elf::{SHF_ALLOC, SHF_EXECINSTR, SHF_WRITE, SHT_NOBITS, SHT_PROGBITS},
write::{
elf::{ProgramHeader, Rel, SectionHeader, SectionIndex, SymbolIndex},
Mangling, SectionId, StringId, SymbolId,
},
write::{Mangling, SectionId, SymbolId},
Architecture, BinaryFormat, Endianness, Object, ObjectKind, ObjectSection, ObjectSymbol,
Relocation, RelocationEncoding, RelocationKind, RelocationTarget, SectionKind, Symbol,
Relocation, RelocationEncoding, RelocationKind, RelocationTarget, Section, SectionKind, Symbol,
SymbolFlags, SymbolKind, SymbolScope, SymbolSection,
};
use crate::util::obj::{
ObjArchitecture, ObjInfo, ObjKind, ObjReloc, ObjRelocKind, ObjSection, ObjSectionKind,
ObjSymbol, ObjSymbolFlagSet, ObjSymbolFlags, ObjSymbolKind,
use crate::util::{
dwarf::{
process_address, process_type, read_debug_section, type_string, ud_type, ud_type_string,
AttributeKind, TagKind, TypeKind,
},
obj::{
ObjArchitecture, ObjInfo, ObjKind, ObjReloc, ObjRelocKind, ObjSection, ObjSectionKind,
ObjSymbol, ObjSymbolFlagSet, ObjSymbolFlags, ObjSymbolKind,
},
sigs::OutSymbol,
};
enum BoundaryState {
/// Looking for a file symbol, any section symbols are queued
LookForFile(Vec<(u64, String)>),
/// Looking for section symbols
LookForSections(String),
/// Done with files and sections
FilesEnded,
}
const ENABLE_DWARF: bool = false;
pub fn process_elf<P: AsRef<Path>>(path: P) -> Result<ObjInfo> {
let elf_file = File::open(&path).with_context(|| {
format!("Failed to open ELF file '{}'", path.as_ref().to_string_lossy())
})?;
let map = unsafe { MmapOptions::new().map(&elf_file) }.with_context(|| {
format!("Failed to mmap ELF file: '{}'", path.as_ref().to_string_lossy())
})?;
let elf_file = File::open(&path)
.with_context(|| format!("Failed to open ELF file '{}'", path.as_ref().display()))?;
let map = unsafe { MmapOptions::new().map(&elf_file) }
.with_context(|| format!("Failed to mmap ELF file: '{}'", path.as_ref().display()))?;
let obj_file = object::read::File::parse(&*map)?;
let architecture = match obj_file.architecture() {
Architecture::PowerPc => ObjArchitecture::PowerPc,
arch => return Err(Error::msg(format!("Unexpected architecture: {arch:?}"))),
arch => bail!("Unexpected architecture: {arch:?}"),
};
if obj_file.endianness() != Endianness::Big {
return Err(Error::msg("Expected big endian"));
}
ensure!(obj_file.endianness() == Endianness::Big, "Expected big endian");
let kind = match obj_file.kind() {
ObjectKind::Executable => ObjKind::Executable,
ObjectKind::Relocatable => ObjKind::Relocatable,
kind => return Err(Error::msg(format!("Unexpected ELF type: {kind:?}"))),
kind => bail!("Unexpected ELF type: {kind:?}"),
};
let mut obj_name = String::new();
if ENABLE_DWARF {
if let Some(debug_section) = obj_file.section_by_name(".debug") {
if debug_section.size() > 0 {
load_debug_section(&obj_file, debug_section)?;
}
}
}
let mut obj_name = String::new();
let mut stack_address: Option<u32> = None;
let mut stack_end: Option<u32> = None;
let mut db_stack_addr: Option<u32> = None;
let mut arena_lo: Option<u32> = None;
let mut arena_hi: Option<u32> = None;
let mut sda_base: Option<u32> = None;
let mut sda2_base: Option<u32> = None;
let mut sections: Vec<ObjSection> = vec![];
let mut section_indexes: Vec<Option<usize>> = vec![];
@@ -75,6 +101,7 @@ pub fn process_elf<P: AsRef<Path>>(path: P) -> Result<ObjInfo> {
data: section.uncompressed_data()?.to_vec(),
align: section.align(),
index: sections.len(),
elf_index: section.index().0,
relocations: vec![],
original_address: 0, // TODO load from abs symbol
file_offset: section.file_range().map(|(v, _)| v).unwrap_or_default(),
@@ -87,100 +114,144 @@ pub fn process_elf<P: AsRef<Path>>(path: P) -> Result<ObjInfo> {
let mut current_file: Option<String> = None;
let mut section_starts = IndexMap::<String, Vec<(u64, String)>>::new();
let mut name_to_index = HashMap::<String, usize>::new(); // for resolving duplicate names
let mut boundary_state = BoundaryState::LookForFile(Default::default());
for symbol in obj_file.symbols() {
// Locate linker-generated symbols
let symbol_name = symbol.name()?;
match symbol_name {
"_stack_addr" => {
stack_address = Some(symbol.address() as u32);
}
"_stack_end" => {
stack_end = Some(symbol.address() as u32);
}
"_db_stack_addr" => {
db_stack_addr = Some(symbol.address() as u32);
}
"__ArenaLo" => {
arena_lo = Some(symbol.address() as u32);
}
"__ArenaHi" => {
arena_hi = Some(symbol.address() as u32);
}
"_stack_addr" => stack_address = Some(symbol.address() as u32),
"_stack_end" => stack_end = Some(symbol.address() as u32),
"_db_stack_addr" => db_stack_addr = Some(symbol.address() as u32),
"__ArenaLo" => arena_lo = Some(symbol.address() as u32),
"__ArenaHi" => arena_hi = Some(symbol.address() as u32),
"_SDA_BASE_" => sda_base = Some(symbol.address() as u32),
"_SDA2_BASE_" => sda2_base = Some(symbol.address() as u32),
_ => {}
}
};
// MWCC has file symbol first, then sections
// GCC has section symbols first, then file
match symbol.kind() {
// Detect file boundaries
SymbolKind::File => {
let mut file_name = symbol_name.to_string();
// Try to exclude precompiled header symbols
// Make configurable eventually
if file_name == "Precompiled.cpp"
|| file_name == "stdafx.cpp"
|| file_name.ends_with(".h")
|| file_name.starts_with("Pch.")
|| file_name.contains("precompiled_")
|| file_name.contains("Precompiled")
|| file_name.contains(".pch")
|| file_name.contains("_PCH.")
{
symbol_indexes.push(None);
continue;
}
if kind == ObjKind::Relocatable {
obj_name = file_name.clone();
}
match section_starts.entry(file_name.clone()) {
let sections = match section_starts.entry(file_name.clone()) {
indexmap::map::Entry::Occupied(_) => {
let index = match name_to_index.entry(file_name.clone()) {
hash_map::Entry::Occupied(mut e) => e.into_mut(),
hash_map::Entry::Occupied(e) => e.into_mut(),
hash_map::Entry::Vacant(e) => e.insert(0),
};
*index += 1;
let new_name = format!("{}_{}", file_name, index);
log::info!("Renaming {} to {}", file_name, new_name);
section_starts.insert(new_name.clone(), Default::default());
file_name = new_name;
}
indexmap::map::Entry::Vacant(e) => {
e.insert(Default::default());
}
};
current_file = Some(file_name);
}
// Detect sections within a file
SymbolKind::Section => {
if let Some(file_name) = &current_file {
let sections = section_starts
.get_mut(file_name)
.ok_or_else(|| Error::msg("Failed to create entry"))?;
let section_index = symbol
.section_index()
.ok_or_else(|| Error::msg("Section symbol without section"))?;
let section = obj_file.section_by_index(section_index)?;
let section_name = section.name()?.to_string();
sections.push((symbol.address(), section_name));
};
}
// Sometimes, the section symbol address is 0,
// so attempt to detect it from first symbol within section
SymbolKind::Data | SymbolKind::Text => {
if let Some(file_name) = &current_file {
let sections = section_starts
.get_mut(file_name)
.ok_or_else(|| Error::msg("Failed to create entry"))?;
let section_index = symbol.section_index().ok_or_else(|| {
Error::msg(format!("Section symbol without section: {symbol:?}"))
})?;
let section = obj_file.section_by_index(section_index)?;
let section_name = section.name()?;
if let Some((addr, _)) =
sections.iter_mut().find(|(_, name)| name == section_name)
{
if *addr == 0 {
*addr = symbol.address();
// log::info!("Renaming {} to {}", file_name, new_name);
file_name = new_name.clone();
match section_starts.entry(new_name.clone()) {
indexmap::map::Entry::Occupied(_) => {
bail!("Duplicate filename '{}'", new_name)
}
indexmap::map::Entry::Vacant(e) => e.insert(Default::default()),
}
};
}
indexmap::map::Entry::Vacant(e) => e.insert(Default::default()),
};
current_file = Some(file_name.clone());
match &mut boundary_state {
BoundaryState::LookForFile(queue) => {
if queue.is_empty() {
boundary_state = BoundaryState::LookForSections(file_name);
} else {
// Clears queue
sections.append(queue);
}
}
BoundaryState::LookForSections(_) => {
boundary_state = BoundaryState::LookForSections(file_name);
}
BoundaryState::FilesEnded => {
log::warn!("File symbol after files ended: '{}'", file_name);
}
}
}
SymbolKind::Section => {
let section_index = symbol
.section_index()
.ok_or_else(|| anyhow!("Section symbol without section"))?;
let section = obj_file.section_by_index(section_index)?;
let section_name = section.name()?.to_string();
match &mut boundary_state {
BoundaryState::LookForFile(queue) => {
queue.push((symbol.address(), section_name));
}
BoundaryState::LookForSections(file_name) => {
let sections = section_starts
.get_mut(file_name)
.ok_or_else(|| anyhow!("Failed to create entry"))?;
sections.push((symbol.address(), section_name));
}
BoundaryState::FilesEnded => {
log::warn!(
"Section symbol after files ended: {} @ {:#010X}",
section_name,
symbol.address()
);
}
}
}
_ => match symbol.section() {
// Linker generated symbols indicate the end
SymbolSection::Absolute => {
current_file = None;
boundary_state = BoundaryState::FilesEnded;
}
SymbolSection::Section(_) | SymbolSection::Undefined => {}
_ => return Err(Error::msg(format!("Unsupported symbol section type {symbol:?}"))),
SymbolSection::Section(section_index) => match &mut boundary_state {
BoundaryState::LookForFile(_) => {}
BoundaryState::LookForSections(file_name) => {
let sections = section_starts
.get_mut(file_name)
.ok_or_else(|| anyhow!("Failed to create entry"))?;
let section = obj_file.section_by_index(section_index)?;
let section_name = section.name()?;
if let Some((addr, _)) = sections
.iter_mut()
.find(|(addr, name)| *addr == 0 && name == section_name)
{
// If the section symbol had address 0, determine address
// from first symbol within that section.
*addr = symbol.address();
} else if !sections.iter().any(|(_, name)| name == section_name) {
// Otherwise, if there was no section symbol, assume this
// symbol indicates the section address.
sections.push((symbol.address(), section_name.to_string()));
}
}
BoundaryState::FilesEnded => {}
},
SymbolSection::Undefined => {}
_ => bail!("Unsupported symbol section type {symbol:?}"),
},
}
// Generate symbols
if matches!(symbol.kind(), SymbolKind::Null | SymbolKind::File) {
if matches!(symbol.kind(), SymbolKind::Null | SymbolKind::File)
|| matches!(symbol.section_index(), Some(idx) if section_indexes[idx.0] == None)
{
symbol_indexes.push(None);
continue;
}
@@ -224,12 +295,15 @@ pub fn process_elf<P: AsRef<Path>>(path: P) -> Result<ObjInfo> {
}
Ok(ObjInfo {
module_id: 0,
kind,
architecture,
name: obj_name,
symbols,
sections,
entry: obj_file.entry() as u32,
entry: obj_file.entry(),
sda2_base,
sda_base,
stack_address,
stack_end,
db_stack_addr,
@@ -237,104 +311,310 @@ pub fn process_elf<P: AsRef<Path>>(path: P) -> Result<ObjInfo> {
arena_hi,
splits,
link_order,
known_functions: Default::default(),
unresolved_relocations: vec![],
})
}
pub fn write_elf(obj: &ObjInfo) -> Result<object::write::Object> {
let mut out_obj =
object::write::Object::new(BinaryFormat::Elf, Architecture::PowerPc, Endianness::Big);
out_obj.set_mangling(Mangling::None);
if !obj.name.is_empty() {
out_obj.add_file_symbol(obj.name.as_bytes().to_vec());
pub fn write_elf(obj: &ObjInfo) -> Result<Vec<u8>> {
let mut out_data = Vec::new();
let mut writer = object::write::elf::Writer::new(Endianness::Big, false, &mut out_data);
struct OutSection {
index: SectionIndex,
rela_index: Option<SectionIndex>,
offset: usize,
rela_offset: usize,
name: StringId,
rela_name: Option<StringId>,
}
struct OutSymbol {
index: SymbolIndex,
sym: object::write::elf::Sym,
}
let mut section_idxs: Vec<SectionId> = Vec::with_capacity(obj.sections.len());
let mut out_sections: Vec<OutSection> = Vec::with_capacity(obj.sections.len());
for section in &obj.sections {
let section_id =
out_obj.add_section(vec![], section.name.as_bytes().to_vec(), match section.kind {
ObjSectionKind::Code => SectionKind::Text,
ObjSectionKind::Data => SectionKind::Data,
ObjSectionKind::ReadOnlyData => SectionKind::ReadOnlyData,
ObjSectionKind::Bss => SectionKind::UninitializedData,
});
section_idxs.push(section_id);
let out_section = out_obj.section_mut(section_id);
match section.kind {
ObjSectionKind::Bss => {
out_section.append_bss(section.size, section.align);
}
_ => {
out_section.set_data(section.data.clone(), section.align);
}
}
// Generate section symbol
out_obj.section_symbol(section_id);
// Add original addresses
if section.original_address != 0 {
// TODO write to metadata?
}
if section.file_offset != 0 {
// TODO write to metadata?
let name = writer.add_section_name(section.name.as_bytes());
let index = writer.reserve_section_index();
out_sections.push(OutSection {
index,
rela_index: None,
offset: 0,
rela_offset: 0,
name,
rela_name: None,
});
}
let mut rela_names: Vec<String> = vec![Default::default(); obj.sections.len()];
for ((section, out_section), rela_name) in
obj.sections.iter().zip(&mut out_sections).zip(&mut rela_names)
{
if !section.relocations.is_empty() {
*rela_name = format!(".rela{}", section.name);
out_section.rela_name = Some(writer.add_section_name(rela_name.as_bytes()));
out_section.rela_index = Some(writer.reserve_section_index());
}
}
let symtab = writer.reserve_symtab_section_index();
let shstrtab = writer.reserve_shstrtab_section_index();
let strtab = writer.reserve_strtab_section_index();
// Add symbols
let mut symbol_idxs: Vec<SymbolId> = Vec::with_capacity(obj.symbols.len());
for symbol in &obj.symbols {
let symbol_id = out_obj.add_symbol(object::write::Symbol {
name: symbol.name.as_bytes().to_vec(),
value: symbol.address,
size: symbol.size,
kind: match symbol.kind {
ObjSymbolKind::Unknown => SymbolKind::Null,
ObjSymbolKind::Function => SymbolKind::Text,
ObjSymbolKind::Object => SymbolKind::Data,
ObjSymbolKind::Section => SymbolKind::Section,
let mut out_symbols: Vec<OutSymbol> = Vec::with_capacity(obj.symbols.len());
let mut symbol_offset = 0;
let mut num_local = 0;
if !obj.name.is_empty() {
let name_index = writer.add_string(obj.name.as_bytes());
let index = writer.reserve_symbol_index(None);
out_symbols.push(OutSymbol {
index,
sym: object::write::elf::Sym {
name: Some(name_index),
section: None,
st_info: {
let st_type = elf::STT_FILE;
let st_bind = elf::STB_GLOBAL;
(st_bind << 4) + st_type
},
st_other: elf::STV_DEFAULT,
st_shndx: elf::SHN_ABS,
st_value: 0,
st_size: 0,
},
scope: if symbol.flags.0.contains(ObjSymbolFlags::Hidden) {
SymbolScope::Linkage
} else if symbol.flags.0.contains(ObjSymbolFlags::Local) {
SymbolScope::Compilation
} else {
SymbolScope::Dynamic
},
weak: symbol.flags.0.contains(ObjSymbolFlags::Weak),
section: match symbol.section {
None => object::write::SymbolSection::Undefined,
Some(idx) => object::write::SymbolSection::Section(section_idxs[idx]),
},
flags: SymbolFlags::None,
});
symbol_idxs.push(symbol_id);
symbol_offset += 1;
}
for symbol in &obj.symbols {
let section_index = symbol.section.and_then(|idx| out_sections.get(idx)).map(|s| s.index);
let index = writer.reserve_symbol_index(section_index);
let name_index = if symbol.name.is_empty() {
None
} else {
Some(writer.add_string(symbol.name.as_bytes()))
};
let sym = object::write::elf::Sym {
name: name_index,
section: section_index,
st_info: {
let st_type = match symbol.kind {
ObjSymbolKind::Unknown => elf::STT_NOTYPE,
ObjSymbolKind::Function => elf::STT_FUNC,
ObjSymbolKind::Object => {
if symbol.flags.0.contains(ObjSymbolFlags::Common) {
elf::STT_COMMON
} else {
elf::STT_OBJECT
}
}
ObjSymbolKind::Section => elf::STT_SECTION,
};
let st_bind = if symbol.flags.0.contains(ObjSymbolFlags::Weak) {
elf::STB_WEAK
} else if symbol.flags.0.contains(ObjSymbolFlags::Local) {
elf::STB_LOCAL
} else {
elf::STB_GLOBAL
};
(st_bind << 4) + st_type
},
st_other: if symbol.flags.0.contains(ObjSymbolFlags::Hidden) {
elf::STV_HIDDEN
} else {
elf::STV_DEFAULT
},
st_shndx: if section_index.is_some() {
0
} else if symbol.address != 0 {
elf::SHN_ABS
} else {
elf::SHN_UNDEF
},
st_value: symbol.address,
st_size: symbol.size,
};
if sym.st_info >> 4 == elf::STB_LOCAL {
num_local = writer.symbol_count();
}
out_symbols.push(OutSymbol {
index,
sym,
});
}
// Add relocations
for section in &obj.sections {
let section_id = section_idxs[section.index];
for reloc in &section.relocations {
let symbol_id = symbol_idxs[reloc.target_symbol];
out_obj.add_relocation(section_id, object::write::Relocation {
offset: reloc.address,
size: 0,
kind: RelocationKind::Elf(match reloc.kind {
ObjRelocKind::Absolute => R_PPC_ADDR32,
ObjRelocKind::PpcAddr16Hi => R_PPC_ADDR16_HI,
ObjRelocKind::PpcAddr16Ha => R_PPC_ADDR16_HA,
ObjRelocKind::PpcAddr16Lo => R_PPC_ADDR16_LO,
ObjRelocKind::PpcRel24 => R_PPC_REL24,
ObjRelocKind::PpcRel14 => R_PPC_REL14,
ObjRelocKind::PpcEmbSda21 => R_PPC_EMB_SDA21,
}),
encoding: RelocationEncoding::Generic,
symbol: symbol_id,
addend: reloc.addend,
})?;
writer.reserve_file_header();
if obj.kind == ObjKind::Executable {
writer.reserve_program_headers(obj.sections.len() as u32);
}
for (section, out_section) in obj.sections.iter().zip(&mut out_sections) {
match section.kind {
ObjSectionKind::Code | ObjSectionKind::Data | ObjSectionKind::ReadOnlyData => {}
ObjSectionKind::Bss => continue,
}
ensure!(section.data.len() as u64 == section.size, "Mismatched section size");
out_section.offset = writer.reserve(section.data.len(), 32);
}
writer.reserve_shstrtab();
writer.reserve_strtab();
writer.reserve_symtab();
for (section, out_section) in obj.sections.iter().zip(&mut out_sections) {
if section.relocations.is_empty() {
continue;
}
out_section.rela_offset = writer.reserve_relocations(section.relocations.len(), true);
}
writer.reserve_section_headers();
writer.write_file_header(&object::write::elf::FileHeader {
os_abi: elf::ELFOSABI_SYSV,
abi_version: 0,
e_type: match obj.kind {
ObjKind::Executable => elf::ET_EXEC,
ObjKind::Relocatable => elf::ET_REL,
},
e_machine: elf::EM_PPC,
e_entry: obj.entry,
e_flags: elf::EF_PPC_EMB,
})?;
if obj.kind == ObjKind::Executable {
writer.write_align_program_headers();
for (section, out_section) in obj.sections.iter().zip(&out_sections) {
writer.write_program_header(&ProgramHeader {
p_type: elf::PT_LOAD,
p_flags: match section.kind {
ObjSectionKind::Code => elf::PF_R | elf::PF_X,
ObjSectionKind::Data | ObjSectionKind::Bss => elf::PF_R | elf::PF_W,
ObjSectionKind::ReadOnlyData => elf::PF_R,
},
p_offset: out_section.offset as u64,
p_vaddr: section.address,
p_paddr: 0,
p_filesz: match section.kind {
ObjSectionKind::Bss => 0,
_ => section.size,
},
p_memsz: section.size,
p_align: 32,
});
}
}
Ok(out_obj)
for (section, out_section) in obj.sections.iter().zip(&out_sections) {
if section.kind == ObjSectionKind::Bss {
continue;
}
writer.write_align(32);
debug_assert_eq!(writer.len(), out_section.offset);
writer.write(&section.data);
}
writer.write_shstrtab();
writer.write_strtab();
writer.write_null_symbol();
for out_symbol in &out_symbols {
writer.write_symbol(&out_symbol.sym);
}
for (section, out_section) in obj.sections.iter().zip(&out_sections) {
if section.relocations.is_empty() {
continue;
}
writer.write_align_relocation();
debug_assert_eq!(writer.len(), out_section.rela_offset);
for reloc in &section.relocations {
let mut r_offset = reloc.address;
let r_type = match reloc.kind {
ObjRelocKind::Absolute => {
if r_offset & 3 == 0 {
elf::R_PPC_ADDR32
} else {
elf::R_PPC_UADDR32
}
}
ObjRelocKind::PpcAddr16Hi => {
r_offset = (r_offset & !3) + 2;
elf::R_PPC_ADDR16_HI
},
ObjRelocKind::PpcAddr16Ha => {
r_offset = (r_offset & !3) + 2;
elf::R_PPC_ADDR16_HA
},
ObjRelocKind::PpcAddr16Lo => {
r_offset = (r_offset & !3) + 2;
elf::R_PPC_ADDR16_LO
},
ObjRelocKind::PpcRel24 => {
r_offset = (r_offset & !3);
elf::R_PPC_REL24
},
ObjRelocKind::PpcRel14 => {
r_offset = (r_offset & !3);
elf::R_PPC_REL14
},
ObjRelocKind::PpcEmbSda21 => {
r_offset = (r_offset & !3) + 2;
elf::R_PPC_EMB_SDA21
},
};
writer.write_relocation(true, &Rel {
r_offset,
r_sym: (reloc.target_symbol + symbol_offset + 1) as u32,
r_type,
r_addend: reloc.addend,
});
}
}
writer.write_null_section_header();
for (section, out_section) in obj.sections.iter().zip(&out_sections) {
writer.write_section_header(&SectionHeader {
name: Some(out_section.name),
sh_type: match section.kind {
ObjSectionKind::Code | ObjSectionKind::Data | ObjSectionKind::ReadOnlyData => {
SHT_PROGBITS
}
ObjSectionKind::Bss => SHT_NOBITS,
},
sh_flags: match section.kind {
ObjSectionKind::Code => SHF_ALLOC | SHF_EXECINSTR,
ObjSectionKind::Data | ObjSectionKind::Bss => SHF_ALLOC | SHF_WRITE,
ObjSectionKind::ReadOnlyData => SHF_ALLOC,
} as u64,
sh_addr: section.address,
sh_offset: out_section.offset as u64,
sh_size: section.size,
sh_link: 0,
sh_info: 0,
sh_addralign: section.align,
sh_entsize: 0, // TODO?
});
}
for (section, out_section) in obj.sections.iter().zip(&out_sections) {
let Some(rela_name) = out_section.rela_name else {
continue;
};
writer.write_relocation_section_header(
rela_name,
out_section.index,
symtab,
out_section.rela_offset,
section.relocations.len(),
true,
);
}
writer.write_symtab_section_header(num_local);
writer.write_shstrtab_section_header();
writer.write_strtab_section_header();
debug_assert_eq!(writer.reserved_len(), writer.len());
Ok(out_data)
}
fn to_obj_symbol(
@@ -347,18 +627,13 @@ fn to_obj_symbol(
None => None,
};
let name = match symbol.kind() {
SymbolKind::Section => {
if let Some(section) = &section {
section.name()?
} else {
return Err(Error::msg("Section symbol without section"));
}
}
SymbolKind::Section => match &section {
Some(section) => section.name()?,
_ => bail!("Section symbol without section"),
},
_ => symbol.name()?,
};
if name.is_empty() {
return Err(Error::msg("Empty symbol name"));
}
ensure!(!name.is_empty(), "Empty symbol name");
let mut flags = ObjSymbolFlagSet(ObjSymbolFlags::none());
if symbol.is_global() {
flags = ObjSymbolFlagSet(flags.0 | ObjSymbolFlags::Global);
@@ -389,7 +664,7 @@ fn to_obj_symbol(
SymbolKind::Data => ObjSymbolKind::Object,
SymbolKind::Unknown => ObjSymbolKind::Unknown,
SymbolKind::Section => ObjSymbolKind::Section,
_ => return Err(Error::msg(format!("Unsupported symbol kind: {:?}", symbol.kind()))),
_ => bail!("Unsupported symbol kind: {:?}", symbol.kind()),
},
})
}
@@ -404,26 +679,24 @@ fn to_obj_reloc(
let reloc_kind = match reloc.kind() {
RelocationKind::Absolute => ObjRelocKind::Absolute,
RelocationKind::Elf(kind) => match kind {
R_PPC_ADDR16_LO => ObjRelocKind::PpcAddr16Lo,
R_PPC_ADDR16_HI => ObjRelocKind::PpcAddr16Hi,
R_PPC_ADDR16_HA => ObjRelocKind::PpcAddr16Ha,
R_PPC_REL24 => ObjRelocKind::PpcRel24,
R_PPC_REL14 => ObjRelocKind::PpcRel14,
R_PPC_EMB_SDA21 => ObjRelocKind::PpcEmbSda21,
_ => return Err(Error::msg(format!("Unhandled PPC relocation type: {kind}"))),
elf::R_PPC_ADDR16_LO => ObjRelocKind::PpcAddr16Lo,
elf::R_PPC_ADDR16_HI => ObjRelocKind::PpcAddr16Hi,
elf::R_PPC_ADDR16_HA => ObjRelocKind::PpcAddr16Ha,
elf::R_PPC_REL24 => ObjRelocKind::PpcRel24,
elf::R_PPC_REL14 => ObjRelocKind::PpcRel14,
elf::R_PPC_EMB_SDA21 => ObjRelocKind::PpcEmbSda21,
_ => bail!("Unhandled PPC relocation type: {kind}"),
},
_ => return Err(Error::msg(format!("Unhandled relocation type: {:?}", reloc.kind()))),
_ => bail!("Unhandled relocation type: {:?}", reloc.kind()),
};
let symbol = match reloc.target() {
RelocationTarget::Symbol(idx) => {
obj_file.symbol_by_index(idx).context("Failed to locate relocation target symbol")?
}
_ => {
return Err(Error::msg(format!("Unhandled relocation target: {:?}", reloc.target())));
}
_ => bail!("Unhandled relocation target: {:?}", reloc.target()),
};
let target_symbol = symbol_indexes[symbol.index().0]
.ok_or_else(|| Error::msg(format!("Relocation against stripped symbol: {symbol:?}")))?;
.ok_or_else(|| anyhow!("Relocation against stripped symbol: {symbol:?}"))?;
let addend = match symbol.kind() {
SymbolKind::Text | SymbolKind::Data | SymbolKind::Unknown => Ok(reloc.addend()),
SymbolKind::Section => {
@@ -433,23 +706,189 @@ fn to_obj_reloc(
) as i64;
match reloc_kind {
ObjRelocKind::Absolute => addend,
_ => {
return Err(Error::msg(format!(
"Unsupported implicit relocation type {reloc_kind:?}"
)))
}
_ => bail!("Unsupported implicit relocation type {reloc_kind:?}"),
}
} else {
reloc.addend()
};
if addend < 0 {
return Err(Error::msg(format!("Negative addend in section reloc: {addend}")));
}
ensure!(addend >= 0, "Negative addend in section reloc: {addend}");
Ok(addend)
}
_ => Err(Error::msg(format!("Unhandled relocation symbol type {:?}", symbol.kind()))),
_ => Err(anyhow!("Unhandled relocation symbol type {:?}", symbol.kind())),
}?;
let address = address & !3; // TODO hack: round down for instruction
let reloc_data = ObjReloc { kind: reloc_kind, address, target_symbol, addend };
Ok(reloc_data)
}
fn load_debug_section(obj_file: &object::File<'_>, debug_section: Section) -> Result<()> {
let mut data = debug_section.uncompressed_data()?.into_owned();
// Apply relocations to data
for (addr, reloc) in debug_section.relocations() {
match reloc.kind() {
RelocationKind::Absolute | RelocationKind::Elf(elf::R_PPC_UADDR32) => {
let target = match reloc.target() {
RelocationTarget::Symbol(symbol_idx) => {
let symbol = obj_file.symbol_by_index(symbol_idx)?;
(symbol.address() as i64 + reloc.addend()) as u32
}
// RelocationTarget::Section(section_idx) => {
// let section = obj_file.section_by_index(section_idx)?;
// (section.address() as i64 + reloc.addend()) as u32
// }
// RelocationTarget::Absolute => reloc.addend() as u32,
_ => bail!("Invalid .debug relocation target"),
};
data[addr as usize..addr as usize + 4].copy_from_slice(&target.to_be_bytes());
}
RelocationKind::Elf(elf::R_PPC_NONE) => {}
_ => bail!("Unhandled .debug relocation type {:?}", reloc.kind()),
}
}
let mut reader = Cursor::new(&*data);
let tags = read_debug_section(&mut reader)?;
// let mut w = BufWriter::new(File::create("dwarfdump2.txt")?);
// for (&addr, tag) in &tags {
// writeln!(w, "{}: {:?}", addr, tag)?;
// }
// w.flush()?;
let mut units = Vec::<String>::new();
if let Some((_, mut tag)) = tags.first_key_value() {
loop {
match tag.kind {
TagKind::CompileUnit => {
let unit = tag
.string_attribute(AttributeKind::Name)
.ok_or_else(|| anyhow!("CompileUnit without name {:?}", tag))?;
if units.contains(unit) {
log::warn!("Duplicate unit '{}'", unit);
} else {
units.push(unit.clone());
}
let children = tag.children(&tags);
let mut typedefs = BTreeMap::<u32, Vec<u32>>::new();
for child in children {
match child.kind {
TagKind::GlobalSubroutine | TagKind::Subroutine => {
let _is_prototyped =
child.string_attribute(AttributeKind::Prototyped).is_some();
if let (Some(_hi), Some(_lo)) = (
child.address_attribute(AttributeKind::HighPc),
child.address_attribute(AttributeKind::LowPc),
) {}
let name = child
.string_attribute(AttributeKind::Name)
.ok_or_else(|| anyhow!("Subroutine without name"))?;
let udt = ud_type(&tags, child)?;
let ts = ud_type_string(&tags, &typedefs, &udt)?;
// log::info!("{} {}{};", ts.prefix, name, ts.suffix);
}
TagKind::Typedef => {
let name = child
.string_attribute(AttributeKind::Name)
.ok_or_else(|| anyhow!("Typedef without name"))?;
let attr = child
.type_attribute()
.ok_or_else(|| anyhow!("Typedef without type attribute"))?;
let t = process_type(attr)?;
let ts = type_string(&tags, &typedefs, &t)?;
// log::info!("typedef {} {}{};", ts.prefix, name, ts.suffix);
// TODO fundamental typedefs?
if let Some(ud_type_ref) =
child.reference_attribute(AttributeKind::UserDefType)
{
match typedefs.entry(ud_type_ref) {
Entry::Vacant(e) => {
e.insert(vec![child.key]);
}
Entry::Occupied(e) => {
e.into_mut().push(child.key);
}
}
}
}
TagKind::GlobalVariable | TagKind::LocalVariable => {
let name = child
.string_attribute(AttributeKind::Name)
.ok_or_else(|| anyhow!("Variable without name"))?;
let address = if let Some(location) =
child.block_attribute(AttributeKind::Location)
{
Some(process_address(location)?)
} else {
None
};
if let Some(type_attr) = child.type_attribute() {
let var_type = process_type(type_attr)?;
// log::info!("{:?}", var_type);
if let TypeKind::UserDefined(key) = var_type.kind {
let ud_tag = tags
.get(&key)
.ok_or_else(|| anyhow!("Invalid UD type ref"))?;
let ud_type = ud_type(&tags, ud_tag)?;
// log::info!("{:?}", ud_type);
}
let ts = type_string(&tags, &typedefs, &var_type)?;
let st = if child.kind == TagKind::LocalVariable {
"static "
} else {
""
};
let address_str = match address {
Some(addr) => format!(" : {:#010X}", addr),
None => String::new(),
};
let size = var_type.size(&tags)?;
log::info!(
"{}{} {}{}{}; // size: {:#X}",
st,
ts.prefix,
name,
ts.suffix,
address_str,
size,
);
}
}
TagKind::StructureType
| TagKind::ArrayType
| TagKind::EnumerationType
| TagKind::UnionType
| TagKind::ClassType
| TagKind::SubroutineType => {
let udt = ud_type(&tags, child)?;
if child.string_attribute(AttributeKind::Name).is_some() {
// log::info!("{}", ud_type_def(&tags, &typedefs, &udt)?);
}
}
_ => {
log::warn!("Unhandled CompileUnit child {:?}", child.kind);
}
}
}
// println!("Children: {:?}", children.iter().map(|c| c.kind).collect::<Vec<TagKind>>());
}
_ => {
log::warn!("Expected CompileUnit, got {:?}", tag.kind);
break;
}
}
if let Some(next) = tag.next_sibling(&tags) {
tag = next;
} else {
break;
}
}
}
// log::info!("Link order:");
// for x in units {
// log::info!("{}", x);
// }
Ok(())
}

221
src/util/executor.rs Normal file
View File

@@ -0,0 +1,221 @@
use std::{collections::BTreeSet, num::NonZeroU32};
use anyhow::{Context, Result};
use fixedbitset::FixedBitSet;
use ppc750cl::Ins;
use crate::util::{
obj::{ObjInfo, ObjSection, ObjSectionKind},
vm::{StepResult, VM},
};
pub fn disassemble(section: &ObjSection, address: u32) -> Option<Ins> {
read_u32(&section.data, address, section.address as u32).map(|code| Ins::new(code, address))
}
pub fn read_u32(data: &[u8], address: u32, section_address: u32) -> Option<u32> {
let offset = (address - section_address) as usize;
if data.len() < offset + 4 {
return None;
}
Some(u32::from_be_bytes(data[offset..offset + 4].try_into().unwrap()))
}
/// Space-efficient implementation for tracking visited code addresses
struct VisitedAddresses {
inner: Vec<FixedBitSet>,
}
impl VisitedAddresses {
pub fn new(obj: &ObjInfo) -> Self {
let mut inner = Vec::with_capacity(obj.sections.len());
for section in &obj.sections {
if section.kind == ObjSectionKind::Code {
let size = (section.size / 4) as usize;
inner.push(FixedBitSet::with_capacity(size));
} else {
// Empty
inner.push(FixedBitSet::new())
}
}
Self { inner }
}
pub fn contains(&self, section: &ObjSection, address: u32) -> bool {
self.inner[section.index].contains(Self::bit_for(section, address))
}
pub fn insert(&mut self, section: &ObjSection, address: u32) {
self.inner[section.index].insert(Self::bit_for(section, address));
}
#[inline]
fn bit_for(section: &ObjSection, address: u32) -> usize {
((address as u64 - section.address) / 4) as usize
}
}
pub struct VMState {
pub vm: Box<VM>,
pub address: u32,
}
/// Helper for branched VM execution, only visiting addresses once.
pub struct Executor {
vm_stack: Vec<VMState>,
visited: VisitedAddresses,
}
pub struct ExecCbData<'a> {
pub executor: &'a mut Executor,
pub vm: &'a mut VM,
pub result: StepResult,
pub section: &'a ObjSection,
pub ins: &'a Ins,
pub block_start: u32,
}
pub enum ExecCbResult<T = ()> {
Continue,
Jump(u32),
EndBlock,
End(T),
}
impl Executor {
pub fn new(obj: &ObjInfo) -> Self {
Self { vm_stack: vec![], visited: VisitedAddresses::new(obj) }
}
pub fn run<Cb, R>(&mut self, obj: &ObjInfo, mut cb: Cb) -> Result<Option<R>>
where Cb: FnMut(ExecCbData) -> Result<ExecCbResult<R>> {
while let Some(mut state) = self.vm_stack.pop() {
let section = match obj.section_at(state.address) {
Ok(section) => section,
Err(e) => {
log::error!("{}", e);
// return Ok(None);
continue;
}
};
if section.kind != ObjSectionKind::Code {
log::warn!("Attempted to visit non-code address {:#010X}", state.address);
continue;
}
// Already visited block
if self.visited.contains(section, state.address) {
continue;
}
let mut block_start = state.address;
loop {
self.visited.insert(section, state.address);
let ins = match disassemble(section, state.address) {
Some(ins) => ins,
None => return Ok(None),
};
let result = state.vm.step(&ins);
match cb(ExecCbData {
executor: self,
vm: &mut state.vm,
result,
section,
ins: &ins,
block_start,
})? {
ExecCbResult::Continue => {
state.address += 4;
}
ExecCbResult::Jump(addr) => {
if self.visited.contains(section, addr) {
break;
}
block_start = addr;
state.address = addr;
}
ExecCbResult::EndBlock => break,
ExecCbResult::End(result) => return Ok(Some(result)),
}
}
}
Ok(None)
}
pub fn push(&mut self, address: u32, vm: Box<VM>, sort: bool) {
self.vm_stack.push(VMState { address, vm });
if sort {
// Sort lowest to highest, so we always go highest address first
self.vm_stack.sort_by_key(|state| state.address);
}
}
pub fn visited(&self, section: &ObjSection, address: u32) -> bool {
self.visited.contains(section, address)
}
}
fn is_valid_jump_table_addr(obj: &ObjInfo, addr: u32) -> bool {
matches!(obj.section_at(addr), Ok(section) if section.kind != ObjSectionKind::Bss)
}
fn get_jump_table_entries(
obj: &ObjInfo,
addr: u32,
size: Option<NonZeroU32>,
from: u32,
function_start: u32,
function_end: u32,
) -> Result<(Vec<u32>, u32)> {
let section = obj.section_at(addr).with_context(|| {
format!("Failed to get jump table entries @ {:#010X} size {:?}", addr, size)
})?;
let offset = (addr as u64 - section.address) as usize;
if let Some(size) = size.map(|n| n.get()) {
log::debug!(
"Located jump table @ {:#010X} with entry count {} (from {:#010X})",
addr,
size / 4,
from
);
let jt_data = &section.data[offset..offset + size as usize];
let entries =
jt_data.chunks_exact(4).map(|c| u32::from_be_bytes(c.try_into().unwrap())).collect();
Ok((entries, size))
} else {
let mut entries = Vec::new();
let mut cur_addr = addr;
while let Some(value) = read_u32(&section.data, cur_addr, section.address as u32) {
if value < function_start || value >= function_end {
break;
}
entries.push(value);
cur_addr += 4;
}
let size = cur_addr - addr;
log::debug!(
"Guessed jump table @ {:#010X} with entry count {} (from {:#010X})",
addr,
size / 4,
from
);
Ok((entries, size))
}
}
pub fn uniq_jump_table_entries(
obj: &ObjInfo,
addr: u32,
size: Option<NonZeroU32>,
from: u32,
function_start: u32,
function_end: u32,
) -> Result<(BTreeSet<u32>, u32)> {
if !is_valid_jump_table_addr(obj, addr) {
return Ok((BTreeSet::new(), 0));
}
let (entries, size) =
get_jump_table_entries(obj, addr, size, from, function_start, function_end)?;
Ok((BTreeSet::from_iter(entries.iter().cloned().filter(|&addr| addr != 0)), size))
}

View File

@@ -2,10 +2,9 @@ use std::{
collections::{btree_map, hash_map, BTreeMap, HashMap},
hash::Hash,
io::BufRead,
ops::Range,
};
use anyhow::{Context, Error, Result};
use anyhow::{bail, ensure, Error, Result};
use cwdemangle::{demangle, DemangleOptions};
use lazy_static::lazy_static;
use multimap::MultiMap;
@@ -33,7 +32,7 @@ pub struct SymbolEntry {
pub demangled: Option<String>,
pub kind: SymbolKind,
pub visibility: SymbolVisibility,
pub unit: String,
pub unit: Option<String>,
pub address: u32,
pub size: u32,
pub section: String,
@@ -42,7 +41,7 @@ pub struct SymbolEntry {
#[derive(Debug, Clone, Hash, Eq, PartialEq)]
pub struct SymbolRef {
pub name: String,
pub unit: String,
pub unit: Option<String>,
}
#[derive(Default)]
@@ -61,66 +60,66 @@ fn resolve_section_order(
) -> Result<SectionOrder> {
let mut ordering = SectionOrder::default();
let mut last_unit = String::new();
let mut last_section = String::new();
let mut section_unit_idx = 0usize;
for symbol_ref in address_to_symbol.values() {
if let Some(symbol) = symbol_entries.get_mut(symbol_ref) {
if last_unit != symbol.unit {
if last_section != symbol.section {
ordering.unit_order.push((symbol.section.clone(), vec![]));
section_unit_idx = ordering.unit_order.len() - 1;
last_section = symbol.section.clone();
}
let unit_order = &mut ordering.unit_order[section_unit_idx];
if unit_order.1.contains(&symbol.unit) {
// With -common on, .bss is split into two parts. The TU order repeats
// at the end with all globally-deduplicated BSS symbols. Once we detect
// a duplicate inside of .bss, we create a new section and start again.
// TODO the first entry in .comm *could* be a TU without regular .bss
if symbol.section == ".bss" {
log::debug!(".comm section detected, duplicate {}", symbol.unit);
ordering.unit_order.push((".comm".to_string(), vec![symbol.unit.clone()]));
section_unit_idx = ordering.unit_order.len() - 1;
} else {
return Err(Error::msg(format!(
"TU order conflict: {} exists multiple times in {}.",
symbol.unit, symbol.section,
)));
}
} else {
unit_order.1.push(symbol.unit.clone());
}
last_unit = symbol.unit.clone();
}
// For ASM-generated objects, notype,local symbols in .text
// are usually local jump labels, and should be ignored.
if is_code_section(&symbol.section)
&& symbol.size == 0
&& symbol.kind == SymbolKind::NoType
&& symbol.visibility == SymbolVisibility::Local
{
// Being named something other than lbl_* could indicate
// that it's actually a local function, but let's just
// make the user resolve that if necessary.
if !symbol.name.starts_with("lbl_") {
log::warn!("Skipping local text symbol {}", symbol.name);
}
continue;
}
// Guess the symbol type if necessary.
if symbol.kind == SymbolKind::NoType {
if is_code_section(&symbol.section) {
symbol.kind = SymbolKind::Function;
} else {
symbol.kind = SymbolKind::Object;
}
}
ordering.symbol_order.push(symbol_ref.clone());
} else {
return Err(Error::msg(format!("Symbol has address but no entry: {symbol_ref:?}")));
}
}
// let mut last_unit = String::new();
// let mut last_section = String::new();
// let mut section_unit_idx = 0usize;
// for symbol_ref in address_to_symbol.values() {
// if let Some(symbol) = symbol_entries.get_mut(symbol_ref) {
// if last_unit != symbol.unit {
// if last_section != symbol.section {
// ordering.unit_order.push((symbol.section.clone(), vec![]));
// section_unit_idx = ordering.unit_order.len() - 1;
// last_section = symbol.section.clone();
// }
// let unit_order = &mut ordering.unit_order[section_unit_idx];
// if unit_order.1.contains(&symbol.unit) {
// // With -common on, .bss is split into two parts. The TU order repeats
// // at the end with all globally-deduplicated BSS symbols. Once we detect
// // a duplicate inside of .bss, we create a new section and start again.
// // TODO the first entry in .comm *could* be a TU without regular .bss
// if symbol.section == ".bss" {
// log::debug!(".comm section detected, duplicate {}", symbol.unit);
// ordering.unit_order.push((".comm".to_string(), vec![symbol.unit.clone()]));
// section_unit_idx = ordering.unit_order.len() - 1;
// } else {
// bail!(
// "TU order conflict: {} exists multiple times in {}.",
// symbol.unit, symbol.section,
// );
// }
// } else {
// unit_order.1.push(symbol.unit.clone());
// }
// last_unit = symbol.unit.clone();
// }
// // For ASM-generated objects, notype,local symbols in .text
// // are usually local jump labels, and should be ignored.
// if is_code_section(&symbol.section)
// && symbol.size == 0
// && symbol.kind == SymbolKind::NoType
// && symbol.visibility == SymbolVisibility::Local
// {
// // Being named something other than lbl_* could indicate
// // that it's actually a local function, but let's just
// // make the user resolve that if necessary.
// if !symbol.name.starts_with("lbl_") {
// log::warn!("Skipping local text symbol {}", symbol.name);
// }
// continue;
// }
// // Guess the symbol type if necessary.
// if symbol.kind == SymbolKind::NoType {
// if is_code_section(&symbol.section) {
// symbol.kind = SymbolKind::Function;
// } else {
// symbol.kind = SymbolKind::Object;
// }
// }
// ordering.symbol_order.push(symbol_ref.clone());
// } else {
// bail!("Symbol has address but no entry: {symbol_ref:?}");
// }
// }
for iter in ordering.symbol_order.windows(2) {
let next_address = symbol_entries.get(&iter[1]).unwrap().address;
@@ -145,7 +144,7 @@ pub fn resolve_link_order(section_unit_order: &[(String, Vec<String>)]) -> Resul
let mut t_sort = TopologicalSort::<String>::new();
for (section, order) in section_unit_order {
let mut order: &[String] = order;
if (section == ".ctors" || section == ".dtors") && order.len() > 1 {
if matches!(section.as_str(), ".ctors" | ".dtors") && order.len() > 1 {
// __init_cpp_exceptions.o has symbols that get ordered to the beginning of
// .ctors and .dtors, so our topological sort would fail if we added them.
// Always skip the first TU of .ctors and .dtors.
@@ -159,15 +158,11 @@ pub fn resolve_link_order(section_unit_order: &[(String, Vec<String>)]) -> Resul
global_unit_order.push(unit);
}
// An incomplete topological sort indicates that a cyclic dependency was encountered.
if !t_sort.is_empty() {
return Err(Error::msg("Cyclic dependency encountered!"));
}
ensure!(t_sort.is_empty(), "Cyclic dependency encountered!");
// Sanity check, did we get all TUs in the final order?
for (_, order) in section_unit_order {
for unit in order {
if !global_unit_order.contains(unit) {
return Err(Error::msg(format!("Failed to find an order for {unit}")));
}
ensure!(global_unit_order.contains(unit), "Failed to find an order for {unit}");
}
}
Ok(global_unit_order)
@@ -204,8 +199,8 @@ pub struct MapEntries {
pub unit_entries: MultiMap<String, SymbolRef>,
pub entry_references: MultiMap<SymbolRef, SymbolRef>,
pub entry_referenced_from: MultiMap<SymbolRef, SymbolRef>,
pub address_to_symbol: BTreeMap<u32, SymbolRef>,
pub unit_section_ranges: HashMap<String, HashMap<String, Range<u32>>>,
// pub address_to_symbol: BTreeMap<u32, SymbolRef>,
// pub unit_section_ranges: HashMap<String, HashMap<String, Range<u32>>>,
pub symbol_order: Vec<SymbolRef>,
pub unit_order: Vec<(String, Vec<String>)>,
}
@@ -219,11 +214,13 @@ struct LinkMapState {
#[derive(Default)]
struct SectionLayoutState {
current_section: String,
section_units: Vec<String>,
unit_override: Option<String>,
relative_offset: u32,
last_unit_start: u32,
last_section_end: u32,
current_unit: Option<String>,
units: Vec<(u32, String)>,
symbols: BTreeMap<u32, Vec<SymbolEntry>>,
// unit_override: Option<String>,
// relative_offset: u32,
// last_unit_start: u32,
// last_section_end: u32,
has_link_map: bool,
}
@@ -262,9 +259,7 @@ impl StateMachine {
} else if LINKER_SYMBOLS_HEADER.is_match(&line) {
self.switch_state(ProcessMapState::LinkerGeneratedSymbols)?;
} else {
return Err(Error::msg(format!(
"Unexpected line while processing map: '{line}'"
)));
bail!("Unexpected line while processing map: '{line}'");
}
}
ProcessMapState::LinkMap(ref mut state) => {
@@ -286,20 +281,18 @@ impl StateMachine {
} else if LINKER_SYMBOLS_HEADER.is_match(&line) {
self.switch_state(ProcessMapState::LinkerGeneratedSymbols)?;
} else {
return Err(Error::msg(format!(
"Unexpected line while processing map: '{line}'"
)));
bail!("Unexpected line while processing map: '{line}'");
}
}
ProcessMapState::SectionLayout(ref mut state) => {
if let Some(captures) = SECTION_LAYOUT_SYMBOL.captures(&line) {
StateMachine::section_layout_entry(captures, state, &mut self.entries)?;
} else if let Some(captures) = SECTION_LAYOUT_START.captures(&line) {
let last_section_end = state.last_section_end;
// let last_section_end = state.last_section_end;
self.switch_state(ProcessMapState::SectionLayout(SectionLayoutState {
current_section: captures["section"].to_string(),
has_link_map: self.has_link_map,
last_section_end,
// last_section_end,
..Default::default()
}))?;
} else if SECTION_LAYOUT_HEADER.is_match(&line) {
@@ -309,9 +302,7 @@ impl StateMachine {
} else if LINKER_SYMBOLS_HEADER.is_match(&line) {
self.switch_state(ProcessMapState::LinkerGeneratedSymbols)?;
} else {
return Err(Error::msg(format!(
"Unexpected line while processing map: '{line}'"
)));
bail!("Unexpected line while processing map: '{line}'");
}
}
ProcessMapState::MemoryMap => {
@@ -351,21 +342,19 @@ impl StateMachine {
state: &mut LinkMapState,
entries: &mut MapEntries,
) -> Result<()> {
if captures["sym"].starts_with('.') {
state.last_name.clear();
return Ok(());
}
// if captures["sym"].starts_with('.') {
// state.last_name.clear();
// return Ok(());
// }
let is_duplicate = &captures["sym"] == ">>>";
let unit = captures["tu"].trim().to_string();
let name = if is_duplicate {
if state.last_name.is_empty() {
return Err(Error::msg("Last name empty?"));
}
ensure!(!state.last_name.is_empty(), "Last name empty?");
state.last_name.clone()
} else {
captures["sym"].to_string()
};
let symbol_ref = SymbolRef { name: name.clone(), unit: unit.clone() };
let symbol_ref = SymbolRef { name: name.clone(), unit: Some(unit.clone()) };
let depth: usize = captures["depth"].parse()?;
if depth > state.symbol_stack.len() {
state.symbol_stack.push(symbol_ref.clone());
@@ -379,19 +368,13 @@ impl StateMachine {
"object" => SymbolKind::Object,
"section" => SymbolKind::Section,
"notype" => SymbolKind::NoType,
_ => {
return Err(Error::msg(format!("Unknown symbol type: {}", &captures["type"],)));
}
kind => bail!("Unknown symbol type: {kind}"),
};
let visibility = match &captures["vis"] {
"global" => SymbolVisibility::Global,
"local" => SymbolVisibility::Local,
"weak" => SymbolVisibility::Weak,
_ => {
return Err(Error::msg(
format!("Unknown symbol visibility: {}", &captures["vis"],),
));
}
visibility => bail!("Unknown symbol visibility: {visibility}"),
};
if !is_duplicate && state.symbol_stack.len() > 1 {
let from = &state.symbol_stack[state.symbol_stack.len() - 2];
@@ -420,13 +403,13 @@ impl StateMachine {
should_insert = false;
}
if should_insert {
let demangled = demangle(&name, &DemangleOptions { omit_empty_parameters: true });
let demangled = demangle(&name, &DemangleOptions::default());
entries.symbols.insert(symbol_ref.clone(), SymbolEntry {
name: name.clone(),
demangled,
kind,
visibility,
unit: unit.clone(),
unit: Some(unit.clone()),
address: 0,
size: 0,
section: String::new(),
@@ -443,14 +426,14 @@ impl StateMachine {
entries: &mut MapEntries,
) -> Result<()> {
let name = captures["sym"].to_string();
let demangled = demangle(&name, &DemangleOptions { omit_empty_parameters: true });
let symbol_ref = SymbolRef { name: name.clone(), unit: "[generated]".to_string() };
let demangled = demangle(&name, &DemangleOptions::default());
let symbol_ref = SymbolRef { name: name.clone(), unit: None };
entries.symbols.insert(symbol_ref, SymbolEntry {
name,
demangled,
kind: SymbolKind::NoType,
visibility: SymbolVisibility::Global,
unit: "[generated]".to_string(),
unit: None,
address: 0,
size: 0,
section: String::new(),
@@ -460,18 +443,18 @@ impl StateMachine {
fn end_section_layout(state: &mut SectionLayoutState, entries: &mut MapEntries) -> Result<()> {
// Set last section size
if let Some(last_unit) = state.section_units.last() {
let last_unit = state.unit_override.as_ref().unwrap_or(last_unit);
nested_try_insert(
&mut entries.unit_section_ranges,
last_unit.clone(),
state.current_section.clone(),
state.last_unit_start..state.last_section_end,
)
.with_context(|| {
format!("TU '{}' already exists in section '{}'", last_unit, state.current_section)
})?;
}
// if let Some(last_unit) = state.section_units.last() {
// let last_unit = state.unit_override.as_ref().unwrap_or(last_unit);
// nested_try_insert(
// &mut entries.unit_section_ranges,
// last_unit.clone(),
// state.current_section.clone(),
// state.last_unit_start..state.last_section_end,
// )
// .with_context(|| {
// format!("TU '{}' already exists in section '{}'", last_unit, state.current_section)
// })?;
// }
Ok(())
}
@@ -483,165 +466,58 @@ impl StateMachine {
if captures["rom_addr"].trim() == "UNUSED" {
return Ok(());
}
let sym_name = captures["sym"].trim();
let mut tu = captures["tu"].trim().to_string();
let mut address = u32::from_str_radix(captures["addr"].trim(), 16)?;
let mut size = u32::from_str_radix(captures["size"].trim(), 16)?;
// For RELs, the each section starts at address 0. For our purposes
// we'll create "fake" addresses by simply starting at the end of the
// previous section.
if state.section_units.is_empty() {
if address == 0 {
state.relative_offset = state.last_section_end;
} else {
state.relative_offset = 0;
if state.current_unit.as_ref() != Some(&tu) || sym_name == state.current_section {
state.current_unit = Some(tu.clone());
state.units.push((address, tu.clone()));
if sym_name == state.current_section {
return Ok(());
}
}
address += state.relative_offset;
let original_tu = tu.clone();
if state.section_units.last() != Some(&tu) || sym_name == state.current_section {
// Set last section size
if let Some(last_unit) = state.section_units.last() {
let last_unit = state.unit_override.as_ref().unwrap_or(last_unit);
nested_try_insert(
&mut entries.unit_section_ranges,
last_unit.clone(),
state.current_section.clone(),
state.last_unit_start..address,
)
.with_context(|| {
format!(
"TU '{}' already exists in section '{}'",
last_unit, state.current_section
)
})?;
let symbol_ref = SymbolRef { name: sym_name.to_string(), unit: Some(tu.clone()) };
let entry = if let Some(existing) = entries.symbols.get(&symbol_ref) {
SymbolEntry {
name: existing.name.clone(),
demangled: existing.demangled.clone(),
kind: existing.kind,
visibility: existing.visibility,
unit: existing.unit.clone(),
address,
size,
section: state.current_section.clone(),
}
state.last_unit_start = address;
// Since the map doesn't contain file paths, it's likely that
// a duplicate TU inside of a section is simply a separate file.
// We can rename it and remap symbols to the new TU name.
// TODO: Find symbols in other sections and rename?
if state.section_units.contains(&tu) {
let new_unit = format!("{}_{}_{:08x}", tu, state.current_section, address);
} else {
let visibility = if state.has_link_map {
log::warn!(
"TU order conflict: {} exists multiple times in {}. Renaming to {}.",
"Symbol not in link map: {} ({}). Type and visibility unknown.",
sym_name,
tu,
state.current_section,
new_unit,
);
state.unit_override = Some(new_unit);
SymbolVisibility::Local
} else {
state.unit_override = None;
SymbolVisibility::Global
};
SymbolEntry {
name: sym_name.to_string(),
demangled: None,
kind: SymbolKind::NoType,
visibility,
unit: Some(tu.clone()),
address,
size,
section: state.current_section.clone(),
}
}
if let Some(unit) = &state.unit_override {
tu = unit.clone();
}
// Section symbol (i.e. ".data") indicates section size for a TU
// ...but we can't rely on it because of UNUSED symbols
if sym_name == state.current_section {
// Skip empty sections
if size != 0 {
state.section_units.push(original_tu);
}
return Ok(());
}
// Otherwise, for ASM-generated objects, the first section symbol in a TU
// has the full size of the section.
if state.section_units.last() != Some(&original_tu) {
if size == 0 {
return Err(Error::msg(format!("No section size for {sym_name} in {tu}")));
}
state.section_units.push(original_tu);
// Clear it, so that we guess the "real" symbol size later.
size = 0;
}
// Ignore ...data.0 and similar
if sym_name.starts_with("...") {
return Ok(());
}
// Increment section end
state.last_section_end = address + size;
let symbol_ref = SymbolRef { name: sym_name.to_string(), unit: tu.clone() };
match entries.symbols.entry(symbol_ref.clone()) {
hash_map::Entry::Occupied(entry) => {
// let symbol = if tu != original_tu {
// let old_entry = entry.remove();
// match entries.symbols.entry(SymbolRef {
// name: sym_name.to_string(),
// unit: tu.clone(),
// }) {
// Entry::Occupied(entry) => entry.into_mut(),
// Entry::Vacant(entry) => entry.insert(old_entry),
// }
// } else {
// entry.into_mut()
// };
let symbol = entry.into_mut();
symbol.address = address;
symbol.size = size;
symbol.section = state.current_section.clone();
// Move symbol to renamed TU if necessary
// symbol.unit = tu.clone();
match entries.address_to_symbol.entry(address) {
btree_map::Entry::Vacant(entry) => {
entry.insert(symbol_ref);
}
btree_map::Entry::Occupied(entry) => {
log::warn!(
"Symbol overridden @ {:X} from {} to {} in {}",
symbol.address,
entry.get().name,
sym_name,
tu
);
}
}
}
hash_map::Entry::Vacant(entry) => {
let visibility = if state.has_link_map {
log::warn!(
"Symbol not in link map: {} ({}). Type and visibility unknown.",
sym_name,
tu,
);
SymbolVisibility::Local
} else {
SymbolVisibility::Global
};
entry.insert(SymbolEntry {
name: sym_name.to_string(),
demangled: None,
kind: SymbolKind::NoType,
visibility,
unit: tu.clone(),
address,
size,
section: state.current_section.clone(),
});
match entries.address_to_symbol.entry(address) {
btree_map::Entry::Vacant(entry) => {
entry.insert(symbol_ref);
}
btree_map::Entry::Occupied(entry) => {
log::warn!(
"Symbol overridden @ {:X} from {} to {} in {}",
address,
entry.get().name,
sym_name,
tu
);
}
}
};
match state.symbols.entry(address) {
btree_map::Entry::Occupied(e) => e.into_mut().push(entry),
btree_map::Entry::Vacant(e) => {
e.insert(vec![entry]);
}
}
Ok(())
@@ -663,9 +539,9 @@ pub fn process_map<R: BufRead>(reader: R) -> Result<MapEntries> {
state.end_state()?;
let mut entries = state.entries;
let section_order = resolve_section_order(&entries.address_to_symbol, &mut entries.symbols)?;
entries.symbol_order = section_order.symbol_order;
entries.unit_order = section_order.unit_order;
// let section_order = resolve_section_order(&entries.address_to_symbol, &mut entries.symbols)?;
// entries.symbol_order = section_order.symbol_order;
// entries.unit_order = section_order.unit_order;
Ok(entries)
}
@@ -685,7 +561,7 @@ where
hash_map::Entry::Vacant(entry) => entry.insert(Default::default()),
};
match map.entry(v2) {
hash_map::Entry::Occupied(_) => return Err(Error::msg("Entry already exists")),
hash_map::Entry::Occupied(_) => bail!("Entry already exists"),
hash_map::Entry::Vacant(entry) => entry.insert(v3),
};
Ok(())

View File

@@ -1,5 +1,15 @@
pub(crate) mod asm;
pub(crate) mod cfa;
pub(crate) mod config;
pub(crate) mod dol;
pub(crate) mod dwarf;
pub(crate) mod elf;
pub(crate) mod executor;
pub(crate) mod map;
pub(crate) mod obj;
pub(crate) mod rel;
pub(crate) mod sigs;
pub(crate) mod slices;
pub(crate) mod split;
pub(crate) mod tracker;
pub(crate) mod vm;

View File

@@ -1,12 +1,22 @@
use std::{
cmp::min,
collections::{btree_map, BTreeMap},
fmt,
hash::{Hash, Hasher},
};
use std::marker::PhantomData;
use anyhow::{Error, Result};
use anyhow::{anyhow, bail, Result};
use flagset::{flags, FlagSet};
use serde::{de, de::Visitor, Deserialize, Deserializer, Serialize, Serializer};
use serde_yaml::Value;
use serde_repr::{Serialize_repr, Deserialize_repr};
use crate::util::rel::RelReloc;
flags! {
#[repr(u8)]
#[derive(Deserialize_repr, Serialize_repr)]
pub enum ObjSymbolFlags: u8 {
Global,
Local,
@@ -15,7 +25,7 @@ flags! {
Hidden,
}
}
#[derive(Debug, Copy, Clone, Default, Eq, PartialEq)]
#[derive(Debug, Copy, Clone, Default, Eq, PartialEq, Serialize, Deserialize)]
pub struct ObjSymbolFlagSet(pub(crate) FlagSet<ObjSymbolFlags>);
#[allow(clippy::derive_hash_xor_eq)]
impl Hash for ObjSymbolFlagSet {
@@ -37,11 +47,14 @@ pub struct ObjSection {
pub data: Vec<u8>,
pub align: u64,
pub index: usize,
/// REL files reference the original ELF section indices
pub elf_index: usize,
pub relocations: Vec<ObjReloc>,
pub original_address: u64,
pub file_offset: u64,
pub section_known: bool,
}
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Default)]
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Default, Serialize, Deserialize)]
pub enum ObjSymbolKind {
#[default]
Unknown,
@@ -62,9 +75,9 @@ pub struct ObjSymbol {
}
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
pub enum ObjKind {
/// Fully linked file
/// Fully linked object
Executable,
/// Relocatable file
/// Relocatable object
Relocatable,
}
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
@@ -78,9 +91,11 @@ pub struct ObjInfo {
pub name: String,
pub symbols: Vec<ObjSymbol>,
pub sections: Vec<ObjSection>,
pub entry: u32,
pub entry: u64,
// Linker generated
pub sda2_base: Option<u32>,
pub sda_base: Option<u32>,
pub stack_address: Option<u32>,
pub stack_end: Option<u32>,
pub db_stack_addr: Option<u32>,
@@ -90,8 +105,16 @@ pub struct ObjInfo {
// Extracted
pub splits: BTreeMap<u32, String>,
pub link_order: Vec<String>,
// From extab
pub known_functions: BTreeMap<u32, u32>,
// REL
/// Module ID (0 for main)
pub module_id: u32,
pub unresolved_relocations: Vec<RelReloc>,
}
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Serialize, Deserialize)]
pub enum ObjRelocKind {
Absolute,
PpcAddr16Hi,
@@ -123,11 +146,30 @@ impl ObjInfo {
pub fn build_symbol_map(&self, section_idx: usize) -> Result<BTreeMap<u32, Vec<usize>>> {
let mut symbols = BTreeMap::<u32, Vec<usize>>::new();
for (symbol_idx, symbol) in self.symbols_for_section(section_idx) {
let address = symbol.address as u32;
nested_push(&mut symbols, address, symbol_idx);
nested_push(&mut symbols, symbol.address as u32, symbol_idx);
}
Ok(symbols)
}
pub fn section_at(&self, addr: u32) -> Result<&ObjSection> {
self.sections
.iter()
.find(|&section| {
(addr as u64) >= section.address && (addr as u64) < section.address + section.size
})
.ok_or_else(|| anyhow!("Failed to locate section @ {:#010X}", addr))
}
pub fn section_data(&self, start: u32, end: u32) -> Result<(&ObjSection, &[u8])> {
let section = self.section_at(start)?;
let data = if end == 0 {
&section.data[(start as u64 - section.address) as usize..]
} else {
&section.data[(start as u64 - section.address) as usize
..min(section.data.len(), (end as u64 - section.address) as usize)]
};
Ok((section, data))
}
}
impl ObjSection {
@@ -139,9 +181,7 @@ impl ObjSection {
btree_map::Entry::Vacant(e) => {
e.insert(reloc.clone());
}
btree_map::Entry::Occupied(_) => {
return Err(Error::msg(format!("Duplicate relocation @ {address:#010X}")));
}
btree_map::Entry::Occupied(_) => bail!("Duplicate relocation @ {address:#010X}"),
}
}
Ok(relocations)

252
src/util/rel.rs Normal file
View File

@@ -0,0 +1,252 @@
use std::{
fs::File,
io::{BufReader, Read, Seek, SeekFrom},
path::Path,
};
use anyhow::{anyhow, bail, ensure, Result};
use byteorder::{BigEndian, ReadBytesExt};
use object::elf::{
R_PPC_ADDR16, R_PPC_ADDR16_HA, R_PPC_ADDR16_HI, R_PPC_ADDR16_LO, R_PPC_ADDR24, R_PPC_ADDR32,
R_PPC_NONE, R_PPC_REL14, R_PPC_REL24, R_PPC_UADDR32,
};
use crate::util::obj::{
ObjArchitecture, ObjInfo, ObjKind, ObjRelocKind, ObjSection, ObjSectionKind, ObjSymbol,
ObjSymbolFlagSet, ObjSymbolFlags, ObjSymbolKind,
};
/// Do not relocate anything, but accumulate the offset field for the next relocation offset calculation.
/// These types are used for referring to relocations that are more than 0xffff apart from each other.
pub const R_DOLPHIN_NOP: u32 = 201;
/// Change which section relocations are being applied to.
/// Set the offset into the section to 0.
pub const R_DOLPHIN_SECTION: u32 = 202;
/// Stop parsing the relocation list.
pub const R_DOLPHIN_END: u32 = 203;
/// Unknown.
#[allow(unused)]
pub const R_DOLPHIN_MRKREF: u32 = 204;
pub fn process_rel<P: AsRef<Path>>(path: P) -> Result<ObjInfo> {
let mut reader = BufReader::new(File::open(&path)?);
let module_id = reader.read_u32::<BigEndian>()?;
ensure!(reader.read_u32::<BigEndian>()? == 0, "Expected 'next' to be 0");
ensure!(reader.read_u32::<BigEndian>()? == 0, "Expected 'prev' to be 0");
let num_sections = reader.read_u32::<BigEndian>()?;
let section_info_offset = reader.read_u32::<BigEndian>()?;
let name_offset = reader.read_u32::<BigEndian>()?;
let name_size = reader.read_u32::<BigEndian>()?;
let version = reader.read_u32::<BigEndian>()?;
ensure!(matches!(version, 1..=3), "Unsupported REL version {}", version);
let bss_size = reader.read_u32::<BigEndian>()?;
let rel_offset = reader.read_u32::<BigEndian>()?;
let imp_offset = reader.read_u32::<BigEndian>()?;
let imp_size = reader.read_u32::<BigEndian>()?;
let prolog_section = reader.read_u8()?;
let epilog_section = reader.read_u8()?;
let unresolved_section = reader.read_u8()?;
ensure!(reader.read_u8()? == 0, "Expected 'bssSection' to be 0");
let prolog_offset = reader.read_u32::<BigEndian>()?;
let epilog_offset = reader.read_u32::<BigEndian>()?;
let unresolved_offset = reader.read_u32::<BigEndian>()?;
let (align, bss_align) = if version >= 2 {
let align = reader.read_u32::<BigEndian>()?;
let bss_align = reader.read_u32::<BigEndian>()?;
(Some(align), Some(bss_align))
} else {
(None, None)
};
let fix_size = if version >= 3 { Some(reader.read_u32::<BigEndian>()?) } else { None };
let mut sections = Vec::with_capacity(num_sections as usize);
reader.seek(SeekFrom::Start(section_info_offset as u64))?;
let mut total_bss_size = 0;
for idx in 0..num_sections {
let offset = reader.read_u32::<BigEndian>()?;
let size = reader.read_u32::<BigEndian>()?;
if size == 0 {
continue;
}
let exec = (offset & 1) == 1;
let offset = offset & !3;
let data = if offset == 0 {
vec![]
} else {
let position = reader.stream_position()?;
reader.seek(SeekFrom::Start(offset as u64))?;
let mut data = vec![0u8; size as usize];
reader.read_exact(&mut data)?;
reader.seek(SeekFrom::Start(position))?;
data
};
// println!("Section {} offset {:#X} size {:#X}", idx, offset, size);
let index = sections.len();
sections.push(ObjSection {
name: format!(".section{}", idx),
kind: if offset == 0 {
ObjSectionKind::Bss
} else if exec {
ObjSectionKind::Code
} else {
ObjSectionKind::Data
},
address: 0,
size: size as u64,
data,
align: match offset {
0 => bss_align,
_ => align,
}
.unwrap_or_default() as u64,
index,
elf_index: idx as usize,
relocations: vec![],
original_address: 0,
file_offset: offset as u64,
section_known: false,
});
if offset == 0 {
total_bss_size += size;
}
}
ensure!(
total_bss_size == bss_size,
"Mismatched BSS size: {:#X} != {:#X}",
total_bss_size,
bss_size
);
let mut symbols = Vec::new();
let mut add_symbol = |section_idx: u8, offset: u32, name: &str| -> Result<()> {
if section_idx > 0 {
let section = sections
.iter()
.find(|section| section.elf_index == section_idx as usize)
.ok_or_else(|| anyhow!("Failed to locate {name} section {section_idx}"))?;
log::info!("Adding {name} section {section_idx} offset {offset:#X}");
symbols.push(ObjSymbol {
name: name.to_string(),
demangled_name: None,
address: offset as u64,
section: Some(section.index),
size: 0,
size_known: false,
flags: ObjSymbolFlagSet(ObjSymbolFlags::Global.into()),
kind: ObjSymbolKind::Function,
});
}
Ok(())
};
add_symbol(prolog_section, prolog_offset, "_prolog")?;
add_symbol(epilog_section, epilog_offset, "_epilog")?;
add_symbol(unresolved_section, unresolved_offset, "_unresolved")?;
let mut unresolved_relocations = Vec::new();
let mut imp_idx = 0;
let imp_end = (imp_offset + imp_size) as u64;
reader.seek(SeekFrom::Start(imp_offset as u64))?;
while reader.stream_position()? < imp_end {
let reloc_module_id = reader.read_u32::<BigEndian>()?;
let reloc_offset = reader.read_u32::<BigEndian>()?;
if imp_idx == 0 {
ensure!(
reloc_offset == rel_offset,
"imp index 0 offset mismatch: {:#X} != {:#X}",
reloc_offset,
rel_offset
);
}
imp_idx += 1;
if reloc_module_id == module_id {
if let Some(fix_size) = fix_size {
ensure!(fix_size == reloc_offset, "fix_size mismatch: {:#X} != {:#X}", fix_size, reloc_offset);
}
}
let position = reader.stream_position()?;
reader.seek(SeekFrom::Start(reloc_offset as u64))?;
let mut address = 0u32;
let mut section = u8::MAX;
loop {
let offset = reader.read_u16::<BigEndian>()?;
let type_id = reader.read_u8()? as u32;
let target_section = reader.read_u8()?;
let addend = reader.read_u32::<BigEndian>()?;
let kind = match type_id {
R_PPC_NONE => continue,
R_PPC_ADDR32 | R_PPC_UADDR32 => ObjRelocKind::Absolute,
// R_PPC_ADDR24 => ObjRelocKind::PpcAddr24,
// R_PPC_ADDR16 => ObjRelocKind::PpcAddr16,
R_PPC_ADDR16_LO => ObjRelocKind::PpcAddr16Lo,
R_PPC_ADDR16_HI => ObjRelocKind::PpcAddr16Hi,
R_PPC_ADDR16_HA => ObjRelocKind::PpcAddr16Ha,
// R_PPC_ADDR14 => ObjRelocKind::PpcAddr14,
// R_PPC_ADDR14_BRTAKEN => ObjRelocKind::PpcAddr14BrTaken,
// R_PPC_ADDR14_BRNTAKEN => ObjRelocKind::PpcAddr14BrnTaken,
R_PPC_REL24 => ObjRelocKind::PpcRel24,
R_PPC_REL14 => ObjRelocKind::PpcRel14,
// R_PPC_REL14_BRTAKEN => ObjRelocKind::PpcRel14BrTaken,
// R_PPC_REL14_BRNTAKEN => ObjRelocKind::PpcRel14BrnTaken,
R_DOLPHIN_NOP => {
address += offset as u32;
continue;
}
R_DOLPHIN_SECTION => {
address = 0;
section = target_section;
continue;
}
R_DOLPHIN_END => break,
// R_DOLPHIN_MRKREF => ?
reloc_type => bail!("Unhandled REL relocation type {reloc_type}"),
};
address += offset as u32;
unresolved_relocations.push(RelReloc {
kind,
section,
address,
module_id: reloc_module_id,
target_section,
addend,
});
}
reader.seek(SeekFrom::Start(position))?;
}
Ok(ObjInfo {
module_id,
kind: ObjKind::Relocatable,
architecture: ObjArchitecture::PowerPc,
name: "".to_string(),
symbols,
sections,
entry: 0,
sda2_base: None,
sda_base: None,
stack_address: None,
stack_end: None,
db_stack_addr: None,
arena_lo: None,
arena_hi: None,
splits: Default::default(),
link_order: vec![],
known_functions: Default::default(),
unresolved_relocations,
})
}
#[derive(Debug, Clone)]
pub struct RelReloc {
pub kind: ObjRelocKind,
pub section: u8,
pub address: u32,
pub module_id: u32,
pub target_section: u8,
pub addend: u32,
}

411
src/util/sigs.rs Normal file
View File

@@ -0,0 +1,411 @@
use std::{
collections::{btree_map, BTreeMap},
path::Path,
};
use anyhow::{anyhow, bail, ensure, Result};
use base64::{engine::general_purpose::STANDARD, Engine};
use cwdemangle::{demangle, DemangleOptions};
use ppc750cl::Ins;
use serde::{forward_to_deserialize_any, Deserialize, Serialize};
use sha1::{Digest, Sha1};
use crate::util::{
elf::process_elf,
obj::{
ObjInfo, ObjReloc, ObjRelocKind, ObjSectionKind, ObjSymbol, ObjSymbolFlagSet,
ObjSymbolFlags, ObjSymbolKind,
},
tracker::{Relocation, Tracker},
};
#[derive(Debug, Clone, Hash, Eq, PartialEq, Serialize, Deserialize)]
pub struct OutSymbol {
pub kind: ObjSymbolKind,
pub name: String,
pub size: u32,
pub flags: ObjSymbolFlagSet,
pub section: Option<String>,
}
#[derive(Debug, Clone, Hash, Eq, PartialEq, Serialize, Deserialize)]
pub struct OutReloc {
pub offset: u32,
pub kind: ObjRelocKind,
pub symbol: usize,
pub addend: i32,
}
#[derive(Debug, Clone, Hash, Eq, PartialEq, Serialize, Deserialize)]
pub struct FunctionSignature {
pub symbol: usize,
pub hash: String,
pub signature: String,
pub symbols: Vec<OutSymbol>,
pub relocations: Vec<OutReloc>,
}
/// Creates a fixed-size array reference from a slice.
#[macro_export]
macro_rules! array_ref {
($slice:expr, $offset:expr, $size:expr) => {{
#[inline]
fn to_array<T>(slice: &[T]) -> &[T; $size] {
unsafe { &*(slice.as_ptr() as *const [_; $size]) }
}
to_array(&$slice[$offset..$offset + $size])
}};
}
pub fn check_signature(mut data: &[u8], sig: &FunctionSignature) -> Result<bool> {
let sig_data = STANDARD.decode(&sig.signature)?;
// println!(
// "\nChecking signature {} {} (size {})",
// sig.symbols[sig.symbol].name, sig.hash, sig.symbols[sig.symbol].size
// );
// for chunk in sig_data.chunks_exact(8) {
// let ins = u32::from_be_bytes(*array_ref!(chunk, 0, 4));
// let i = Ins::new(ins, 0);
// println!("=> {}", i.simplified());
// }
for chunk in sig_data.chunks_exact(8) {
let ins = u32::from_be_bytes(*array_ref!(chunk, 0, 4));
let pat = u32::from_be_bytes(*array_ref!(chunk, 4, 4));
if (u32::from_be_bytes(*array_ref!(data, 0, 4)) & pat) != ins {
return Ok(false);
}
data = &data[4..];
}
Ok(true)
}
pub fn check_signatures(obj: &mut ObjInfo, addr: u32, sig_str: &str) -> Result<bool> {
let signatures: Vec<FunctionSignature> = serde_yaml::from_str(sig_str)?;
let (_, data) = obj.section_data(addr, 0)?;
let mut name = None;
for signature in &signatures {
if name.is_none() {
name = Some(signature.symbols[signature.symbol].name.clone());
}
if check_signature(data, signature)? {
log::debug!("Found {} @ {:#010X}", signature.symbols[signature.symbol].name, addr);
apply_signature(obj, addr, signature)?;
return Ok(true);
}
}
if let Some(name) = name {
log::debug!("Didn't find {} @ {:#010X}", name, addr);
}
Ok(false)
}
pub fn apply_symbol(obj: &mut ObjInfo, target: u32, sig_symbol: &OutSymbol) -> Result<usize> {
let target_section_index = obj.section_at(target).ok().map(|section| section.index);
if let Some(target_section_index) = target_section_index {
let target_section = &mut obj.sections[target_section_index];
if !target_section.section_known {
if let Some(section_name) = &sig_symbol.section {
target_section.name = section_name.clone();
target_section.kind = match section_name.as_str() {
".init" | ".text" | ".dbgtext" => ObjSectionKind::Code,
".ctors" | ".dtors" | ".rodata" | ".sdata2" | "extab" | "extabindex" => {
ObjSectionKind::ReadOnlyData
}
".bss" | ".sbss" | ".sbss2" => ObjSectionKind::Bss,
".data" | ".sdata" => ObjSectionKind::Data,
name => bail!("Unknown section {name}"),
};
target_section.section_known = true;
}
}
}
let target_symbol_idx = if let Some((symbol_idx, existing)) =
obj.symbols.iter_mut().enumerate().find(|(_, symbol)| {
symbol.address == target as u64
&& symbol.kind == sig_symbol.kind
// HACK to avoid replacing different ABS symbols
&& (symbol.section.is_some() || symbol.name == sig_symbol.name)
}) {
// TODO apply to existing
log::debug!("Replacing {:?} with {}", existing, sig_symbol.name);
*existing = ObjSymbol {
name: sig_symbol.name.clone(),
demangled_name: demangle(&sig_symbol.name, &DemangleOptions::default()),
address: target as u64,
section: target_section_index,
size: if existing.size_known { existing.size } else { sig_symbol.size as u64 },
size_known: existing.size_known || sig_symbol.size != 0,
flags: sig_symbol.flags,
kind: sig_symbol.kind,
};
symbol_idx
} else {
let target_symbol_idx = obj.symbols.len();
obj.symbols.push(ObjSymbol {
name: sig_symbol.name.clone(),
demangled_name: demangle(&sig_symbol.name, &DemangleOptions::default()),
address: target as u64,
section: target_section_index,
size: sig_symbol.size as u64,
size_known: sig_symbol.size != 0,
flags: sig_symbol.flags,
kind: sig_symbol.kind,
});
target_symbol_idx
};
match sig_symbol.name.as_str() {
"_SDA_BASE_" => obj.sda_base = Some(target),
"_SDA2_BASE_" => obj.sda2_base = Some(target),
"_stack_addr" => obj.stack_address = Some(target),
"_stack_end" => obj.stack_end = Some(target),
"_db_stack_addr" => obj.db_stack_addr = Some(target),
"__ArenaLo" => obj.arena_lo = Some(target),
"__ArenaHi" => obj.arena_hi = Some(target),
_ => {}
}
Ok(target_symbol_idx)
}
pub fn apply_signature(obj: &mut ObjInfo, addr: u32, signature: &FunctionSignature) -> Result<()> {
let section_index = obj.section_at(addr)?.index;
let in_symbol = &signature.symbols[signature.symbol];
let symbol_idx = apply_symbol(obj, addr, in_symbol)?;
let mut tracker = Tracker::new(obj);
for reloc in &signature.relocations {
tracker.known_relocations.insert(addr + reloc.offset);
}
tracker.process_function(obj, &obj.symbols[symbol_idx])?;
for (&reloc_addr, reloc) in &tracker.relocations {
if reloc_addr < addr || reloc_addr >= addr + in_symbol.size {
continue;
}
let offset = reloc_addr - addr;
let sig_reloc = match signature.relocations.iter().find(|r| r.offset == offset) {
Some(reloc) => reloc,
None => continue,
};
let target = match (reloc, sig_reloc.kind) {
(&Relocation::Absolute(addr), ObjRelocKind::Absolute)
| (&Relocation::Hi(addr), ObjRelocKind::PpcAddr16Hi)
| (&Relocation::Ha(addr), ObjRelocKind::PpcAddr16Ha)
| (&Relocation::Lo(addr), ObjRelocKind::PpcAddr16Lo)
| (&Relocation::Rel24(addr), ObjRelocKind::PpcRel24)
| (&Relocation::Rel14(addr), ObjRelocKind::PpcRel14)
| (&Relocation::Sda21(addr), ObjRelocKind::PpcEmbSda21) => {
(addr as i64 - sig_reloc.addend as i64) as u32
}
_ => bail!("Relocation mismatch: {:?} != {:?}", reloc, sig_reloc.kind),
};
let sig_symbol = &signature.symbols[sig_reloc.symbol];
let target_symbol_idx = apply_symbol(obj, target, sig_symbol)?;
let obj_reloc = ObjReloc {
kind: sig_reloc.kind,
address: reloc_addr as u64,
target_symbol: target_symbol_idx,
addend: sig_reloc.addend as i64,
};
// log::info!("Applying relocation {:#010X?}", obj_reloc);
obj.sections[section_index].relocations.push(obj_reloc);
}
for reloc in &signature.relocations {
let addr = addr + reloc.offset;
if !tracker.relocations.contains_key(&addr) {
let sig_symbol = &signature.symbols[reloc.symbol];
bail!("Missing relocation @ {:#010X}: {:?} -> {:?}", addr, reloc, sig_symbol);
}
}
Ok(())
}
pub fn compare_signature(existing: &mut FunctionSignature, new: &FunctionSignature) -> Result<()> {
ensure!(
existing.symbols.len() == new.symbols.len(),
"Mismatched symbol count: {} != {}\n{:?}\n{:?}",
new.symbols.len(),
existing.symbols.len(),
new.symbols,
existing.symbols,
);
ensure!(
existing.relocations.len() == new.relocations.len(),
"Mismatched relocation count: {} != {}",
new.relocations.len(),
existing.relocations.len()
);
for (idx, (a, b)) in existing.symbols.iter_mut().zip(&new.symbols).enumerate() {
if a != b {
// If mismatched sections, clear
if a.name == b.name
&& a.size == b.size
&& a.flags == b.flags
&& a.kind == b.kind
&& a.section != b.section
{
log::warn!("Clearing section for {} ({:?} != {:?})", a.name, a.section, b.section);
a.section = None;
} else if !a.name.starts_with('@') {
log::error!("Symbol {} mismatch: {:?} != {:?}", idx, a, b);
}
}
}
for (a, b) in existing.relocations.iter().zip(&new.relocations) {
if a != b {
log::error!("Relocation {} mismatch: {:?} != {:?}", a.offset, a, b);
}
}
Ok(())
}
pub fn generate_signature(
path: &Path,
symbol_name: &str,
) -> Result<Option<(Vec<u8>, FunctionSignature)>> {
let mut out_symbols: Vec<OutSymbol> = Vec::new();
let mut out_relocs: Vec<OutReloc> = Vec::new();
let mut symbol_map: BTreeMap<usize, usize> = BTreeMap::new();
let mut obj = process_elf(path)?;
if obj.sda2_base.is_none()
|| obj.sda_base.is_none()
|| obj.stack_address.is_none()
|| obj.stack_end.is_none()
|| obj.db_stack_addr.is_none()
// || obj.arena_hi.is_none()
// || obj.arena_lo.is_none()
{
log::warn!("Failed to locate all abs symbols {:#010X?} {:#010X?} {:#010X?} {:#010X?} {:#010X?} {:#010X?} {:#010X?}", obj.sda2_base, obj.sda_base, obj.stack_address, obj.stack_end, obj.db_stack_addr, obj.arena_hi, obj.arena_lo);
return Ok(None);
}
let mut tracker = Tracker::new(&obj);
// tracker.ignore_addresses.insert(0x80004000);
for symbol in &obj.symbols {
if symbol.kind != ObjSymbolKind::Function {
continue;
}
if symbol.name != symbol_name && symbol.name != symbol_name.replace("TRK", "TRK_") {
continue;
}
// log::info!("Tracking {}", symbol.name);
tracker.process_function(&obj, symbol)?;
}
tracker.apply(&mut obj, true)?; // true
for symbol in &obj.symbols {
if symbol.kind != ObjSymbolKind::Function {
continue;
}
if symbol.name != symbol_name && symbol.name != symbol_name.replace("TRK", "TRK_") {
continue;
}
let section_idx = symbol.section.unwrap();
let section = &obj.sections[section_idx];
let out_symbol_idx = out_symbols.len();
out_symbols.push(OutSymbol {
kind: symbol.kind,
name: symbol.name.clone(),
size: symbol.size as u32,
flags: symbol.flags,
section: Some(section.name.clone()),
});
// println!(
// "Building signature for {} ({:#010X}-{:#010X})",
// symbol.name,
// symbol.address,
// symbol.address + symbol.size
// );
let relocations = section.build_relocation_map()?;
let mut instructions = section.data[(symbol.address - section.address) as usize
..(symbol.address - section.address + symbol.size) as usize]
.chunks_exact(4)
.map(|c| (u32::from_be_bytes(c.try_into().unwrap()), !0u32))
.collect::<Vec<(u32, u32)>>();
for (idx, (ins, pat)) in instructions.iter_mut().enumerate() {
let addr = (symbol.address as usize + idx * 4) as u32;
if let Some(reloc) = relocations.get(&addr) {
let symbol_idx = match symbol_map.entry(reloc.target_symbol) {
btree_map::Entry::Vacant(e) => {
let target = &obj.symbols[reloc.target_symbol];
let symbol_idx = out_symbols.len();
e.insert(symbol_idx);
out_symbols.push(OutSymbol {
kind: target.kind,
name: target.name.clone(),
size: if target.kind == ObjSymbolKind::Function {
0
} else {
target.size as u32
},
flags: target.flags,
section: target.section.map(|idx| obj.sections[idx].name.clone()),
});
symbol_idx
}
btree_map::Entry::Occupied(e) => *e.get(),
};
match reloc.kind {
ObjRelocKind::Absolute => {
*ins = 0;
*pat = 0;
}
ObjRelocKind::PpcAddr16Hi
| ObjRelocKind::PpcAddr16Ha
| ObjRelocKind::PpcAddr16Lo => {
*ins = *ins & !0xFFFF;
*pat = !0xFFFF;
}
ObjRelocKind::PpcRel24 => {
*ins = *ins & !0x3FFFFFC;
*pat = !0x3FFFFFC;
}
ObjRelocKind::PpcRel14 => {
*ins = *ins & !0xFFFC;
*pat = !0xFFFC;
}
ObjRelocKind::PpcEmbSda21 => {
*ins = *ins & !0x1FFFFF;
*pat = !0x1FFFFF;
}
}
out_relocs.push(OutReloc {
offset: addr - (symbol.address as u32),
kind: reloc.kind,
symbol: symbol_idx,
addend: reloc.addend as i32,
// instruction: format!("{}", Ins::new(*ins, addr).simplified()),
});
}
// println!("{}", Ins::new(*ins, addr).simplified());
}
// if out_symbols.is_empty() || out_relocs.is_empty() {
// bail!("Failed to locate any symbols or relocs");
// }
// println!("Data: {:#010X?}", instructions);
let mut data = vec![0u8; instructions.len() * 8];
for (idx, &(ins, pat)) in instructions.iter().enumerate() {
data[idx * 8..idx * 8 + 4].copy_from_slice(&ins.to_be_bytes());
data[idx * 8 + 4..idx * 8 + 8].copy_from_slice(&pat.to_be_bytes());
}
// println!(
// "OK: Data (len {}): {:X?} | SYMBOLS: {:?} | RELOCS: {:?}",
// data.len(),
// data,
// out_symbols,
// out_relocs
// );
let encoded = STANDARD.encode(&data);
let mut hasher = Sha1::new();
hasher.update(&data);
let hash = hasher.finalize();
let mut hash_buf = [0u8; 40];
let hash_str = base16ct::lower::encode_str(&hash, &mut hash_buf)
.map_err(|e| anyhow!("Failed to encode hash: {e}"))?;
return Ok(Some((data, FunctionSignature {
symbol: 0,
hash: hash_str.to_string(),
signature: encoded,
symbols: out_symbols,
relocations: out_relocs,
})));
}
Ok(None)
}

504
src/util/slices.rs Normal file
View File

@@ -0,0 +1,504 @@
use std::{
collections::{btree_map, BTreeMap, BTreeSet},
ops::Range,
};
use anyhow::{bail, ensure, Context, Result};
use ppc750cl::{Ins, Opcode};
use crate::util::{
executor::{disassemble, uniq_jump_table_entries, ExecCbData, ExecCbResult, Executor, VMState},
obj::{ObjInfo, ObjSection, ObjSectionKind},
vm::{BranchTarget, StepResult, VM},
};
#[derive(Debug, Default, Clone)]
pub struct FunctionSlices {
pub blocks: BTreeMap<u32, u32>,
pub branches: BTreeMap<u32, Vec<u32>>,
pub function_references: BTreeSet<u32>,
pub jump_table_references: BTreeMap<u32, u32>,
pub prologue: Option<u32>,
pub epilogue: Option<u32>,
// Either a block or tail call
pub possible_blocks: BTreeSet<u32>,
pub has_conditional_blr: bool,
pub has_rfi: bool,
pub finalized: bool,
}
pub enum TailCallResult {
Not,
Is,
Possible,
}
type BlockRange = Range<u32>;
impl FunctionSlices {
pub fn end(&self) -> u32 { self.blocks.last_key_value().map(|(_, &end)| end).unwrap_or(0) }
pub fn start(&self) -> u32 {
self.blocks.first_key_value().map(|(&start, _)| start).unwrap_or(0)
}
pub fn add_block_start(&mut self, addr: u32) -> bool {
if addr == 0xFFFFFFFF {
panic!();
}
// Slice previous block.
if let Some((_, end)) = self.blocks.range_mut(..addr).last() {
let last_end = *end;
if last_end > addr {
*end = addr;
self.blocks.insert(addr, last_end);
return false;
}
}
// Otherwise, insert with no end.
match self.blocks.entry(addr) {
btree_map::Entry::Vacant(e) => {
e.insert(0);
true
}
btree_map::Entry::Occupied(_) => false,
}
}
fn check_prologue(&mut self, section: &ObjSection, ins: &Ins) -> Result<()> {
let next_ins = match disassemble(section, ins.addr + 4) {
Some(ins) => ins,
None => return Ok(()),
};
// stwu r1, d(r1)
// mfspr r0, LR
if ((ins.op == Opcode::Stwu && ins.field_rS() == 1 && ins.field_rA() == 1)
&& (next_ins.op == Opcode::Mfspr
&& next_ins.field_rD() == 0
&& next_ins.field_spr() == 8))
// mfspr r0, LR
// stw r0, d(r1)
|| ((ins.op == Opcode::Mfspr && ins.field_rD() == 0 && ins.field_spr() == 8)
&& (next_ins.op == Opcode::Stw
&& next_ins.field_rS() == 0
&& next_ins.field_rA() == 1))
{
match self.prologue {
Some(prologue) if prologue != ins.addr && prologue != ins.addr - 4 => {
bail!("Found duplicate prologue: {:#010X} and {:#010X}", prologue, ins.addr)
}
_ => self.prologue = Some(ins.addr),
}
}
Ok(())
}
fn check_epilogue(&mut self, section: &ObjSection, ins: &Ins) -> Result<()> {
let next_ins = match disassemble(section, ins.addr + 4) {
Some(ins) => ins,
None => return Ok(()),
};
// mtspr SPR, r0
// addi rD, rA, SIMM
if ((ins.op == Opcode::Mtspr && ins.field_rS() == 0 && ins.field_spr() == 8)
&& (next_ins.op == Opcode::Addi
&& next_ins.field_rD() == 1
&& next_ins.field_rA() == 1))
// or r1, rA, rB
// mtspr SPR, r0
|| ((ins.op == Opcode::Or && ins.field_rA() == 1)
&& (next_ins.op == Opcode::Mtspr
&& next_ins.field_rS() == 0
&& next_ins.field_spr() == 8))
{
match self.epilogue {
Some(epilogue) if epilogue != ins.addr => {
bail!("Found duplicate epilogue: {:#010X} and {:#010X}", epilogue, ins.addr)
}
_ => self.epilogue = Some(ins.addr),
}
}
Ok(())
}
fn instruction_callback(
&mut self,
data: ExecCbData,
obj: &ObjInfo,
function_start: u32,
function_end: Option<u32>,
known_functions: &BTreeSet<u32>,
) -> Result<ExecCbResult<bool>> {
let ExecCbData { executor, vm, result, section, ins, block_start } = data;
// Track discovered prologue(s) and epilogue(s)
self.check_prologue(section, ins)
.with_context(|| format!("While processing {:#010X}", function_start))?;
self.check_epilogue(section, ins)
.with_context(|| format!("While processing {:#010X}", function_start))?;
if !self.has_conditional_blr && is_conditional_blr(&ins) {
self.has_conditional_blr = true;
}
if !self.has_rfi && ins.op == Opcode::Rfi {
self.has_rfi = true;
}
// If control flow hits a block we thought may be a tail call,
// we know it isn't.
if self.possible_blocks.contains(&ins.addr) {
self.possible_blocks.remove(&ins.addr);
}
match result {
StepResult::Continue | StepResult::LoadStore { .. } => {
let next_address = ins.addr + 4;
// If we already visited the next address, connect the blocks and end
if executor.visited(section, next_address) {
self.blocks.insert(block_start, next_address);
self.branches.insert(ins.addr, vec![next_address]);
Ok(ExecCbResult::EndBlock)
} else {
Ok(ExecCbResult::Continue)
}
}
StepResult::Illegal => {
log::debug!("Illegal instruction @ {:#010X}", ins.addr);
Ok(ExecCbResult::End(false))
}
StepResult::Jump(target) => match target {
BranchTarget::Unknown => {
// Likely end of function
let next_addr = ins.addr + 4;
self.blocks.insert(block_start, next_addr);
// If this function has a prologue but no epilogue, and this
// instruction is a bctr, we can assume it's an unrecovered
// jump table and continue analysis.
if self.prologue.is_some() && self.epilogue.is_none() {
log::debug!("Assuming unrecovered jump table {:#010X}", next_addr);
self.branches.insert(ins.addr, vec![next_addr]);
if self.add_block_start(next_addr) {
executor.push(next_addr, vm.clone_for_return(), true);
}
}
Ok(ExecCbResult::EndBlock)
}
BranchTarget::Return => {
self.blocks.insert(block_start, ins.addr + 4);
Ok(ExecCbResult::EndBlock)
}
BranchTarget::Address(addr) => {
// End of block
self.blocks.insert(block_start, ins.addr + 4);
self.branches.insert(ins.addr, vec![addr]);
if addr == ins.addr {
// pass
} else if addr >= function_start
&& matches!(function_end, Some(known_end) if addr < known_end)
{
// If target is within known function bounds, jump
if self.add_block_start(addr) {
return Ok(ExecCbResult::Jump(addr));
}
} else if matches!(obj.section_data(ins.addr, ins.addr + 4), Ok((_, data)) if data == [0u8; 4])
{
// If this branch has 0'd padding after it, assume tail call.
self.function_references.insert(addr);
} else {
self.possible_blocks.insert(addr);
}
Ok(ExecCbResult::EndBlock)
}
BranchTarget::JumpTable { address, size } => {
// End of block
let next_address = ins.addr + 4;
self.blocks.insert(block_start, next_address);
let (mut entries, size) = uniq_jump_table_entries(
obj,
address,
size,
ins.addr,
function_start,
function_end.unwrap_or_else(|| self.end()),
)?;
if entries.contains(&next_address)
&& !entries.iter().any(|addr| known_functions.contains(addr))
{
self.jump_table_references.insert(address, size);
let mut branches = vec![];
for addr in entries {
branches.push(addr);
if self.add_block_start(addr) {
executor.push(addr, vm.clone_all(), true);
}
}
self.branches.insert(ins.addr, branches);
} else {
// If the table doesn't contain the next address,
// it could be a function jump table instead
self.possible_blocks.append(&mut entries);
}
Ok(ExecCbResult::EndBlock)
}
},
StepResult::Branch(mut branches) => {
// End of block
self.blocks.insert(block_start, ins.addr + 4);
let mut out_branches = vec![];
for branch in branches {
match branch.target {
BranchTarget::Unknown | BranchTarget::Return => {
continue;
}
BranchTarget::Address(addr) => {
if branch.link || known_functions.contains(&addr) {
self.function_references.insert(addr);
} else {
out_branches.push(addr);
if self.add_block_start(addr) {
executor.push(addr, branch.vm, true);
}
}
}
BranchTarget::JumpTable { .. } => {
bail!("Conditional jump table unsupported @ {:#010X}", ins.addr);
}
}
}
if !out_branches.is_empty() {
self.branches.insert(ins.addr, out_branches);
}
Ok(ExecCbResult::EndBlock)
}
}
}
pub fn analyze(
&mut self,
obj: &ObjInfo,
start: u32,
function_start: u32,
function_end: Option<u32>,
known_functions: &BTreeSet<u32>,
) -> Result<bool> {
if !self.add_block_start(start) {
return Ok(true);
}
let mut executor = Executor::new(obj);
executor.push(start, VM::new_from_obj(obj), false);
let result = executor.run(obj, |data| {
self.instruction_callback(data, obj, function_start, function_end, known_functions)
})?;
if matches!(result, Some(b) if !b) {
return Ok(false);
}
// Visit unreachable blocks
while let Some((first, _)) = self.first_disconnected_block() {
executor.push(first.end, VM::new_from_obj(obj), true);
let result = executor.run(obj, |data| {
self.instruction_callback(data, obj, function_start, function_end, known_functions)
})?;
if matches!(result, Some(b) if !b) {
return Ok(false);
}
}
// Visit trailing blocks
if let Some(known_end) = function_end {
while self.end() < known_end {
executor.push(self.end(), VM::new_from_obj(obj), true);
let result = executor.run(obj, |data| {
self.instruction_callback(
data,
obj,
function_start,
function_end,
known_functions,
)
})?;
if matches!(result, Some(b) if !b) {
return Ok(false);
}
}
}
// Sanity check
for (&start, &end) in &self.blocks {
ensure!(end != 0, "Failed to finalize block @ {start:#010X}");
}
Ok(true)
}
pub fn can_finalize(&self) -> bool { self.possible_blocks.is_empty() }
pub fn finalize(&mut self, obj: &ObjInfo, known_functions: &BTreeSet<u32>) -> Result<()> {
ensure!(!self.finalized, "Already finalized");
ensure!(self.can_finalize(), "Can't finalize");
match (self.prologue, self.epilogue) {
(Some(p), Some(e)) => {
// log::info!("Prologue/epilogue pair: {:#010X} - {:#010X}", p, e);
}
(Some(p), None) => {
// log::info!("{:#010X?}", self);
// bail!("Unpaired prologue {:#010X}", p);
}
(None, Some(e)) => {
log::info!("{:#010X?}", self);
bail!("Unpaired epilogue {:#010X}", e);
}
(None, None) => {}
}
let end = self.end();
if let Ok(section) = obj.section_at(end) {
// FIXME this is real bad
if !self.has_conditional_blr {
if let Some(ins) = disassemble(&section, end - 4) {
if ins.op == Opcode::B {
if self.function_references.contains(&ins.branch_dest().unwrap()) {
for (_, branches) in &self.branches {
if branches.len() > 1
&& branches.contains(self.blocks.last_key_value().unwrap().0)
{
self.has_conditional_blr = true;
}
}
}
}
}
}
// MWCC optimization sometimes leaves an unreachable blr
// after generating a conditional blr in the function.
if self.has_conditional_blr {
if matches!(disassemble(&section, end - 4), Some(ins) if !ins.is_blr())
&& matches!(disassemble(&section, end), Some(ins) if ins.is_blr())
&& !known_functions.contains(&end)
{
log::trace!("Found trailing blr @ {:#010X}, merging with function", end);
self.blocks.insert(end, end + 4);
}
}
// Some functions with rfi also include a trailing nop
if self.has_rfi {
if matches!(disassemble(&section, end), Some(ins) if is_nop(&ins))
&& !known_functions.contains(&end)
{
log::trace!("Found trailing nop @ {:#010X}, merging with function", end);
self.blocks.insert(end, end + 4);
}
}
}
self.finalized = true;
Ok(())
}
pub fn check_tail_call(
&mut self,
obj: &ObjInfo,
addr: u32,
function_start: u32,
function_end: u32,
known_functions: &BTreeSet<u32>,
) -> TailCallResult {
// log::info!("Determing if {:#010X} is a tail call", addr);
// If jump target is already a known block or within known function bounds, not a tail call.
if self.blocks.contains_key(&addr) || (addr >= function_start && addr < function_end) {
return TailCallResult::Not;
}
// If there's a prologue in the current function, not a tail call.
if self.prologue.is_some() {
return TailCallResult::Not;
}
// If jump target is before the start of the function, known tail call.
if addr < function_start {
return TailCallResult::Is;
}
// If the jump target has 0'd padding before it, known tail call.
if matches!(obj.section_data(addr - 4, addr), Ok((_, data)) if data == [0u8; 4]) {
return TailCallResult::Is;
}
// If we're not sure where the function ends yet, mark as possible tail call.
// let end = self.end();
if function_end == 0 {
return TailCallResult::Possible;
}
// If jump target is known to be a function, or there's a function in between
// this and the jump target, known tail call.
log::trace!("Checking {:#010X}..={:#010X}", function_start + 4, addr);
if self.function_references.range(function_start + 4..=addr).next().is_some()
|| known_functions.range(function_start + 4..=addr).next().is_some()
{
return TailCallResult::Is;
}
// Perform CFA on jump target to determine more
let mut slices = FunctionSlices::default();
slices.function_references = self.function_references.clone();
if let Ok(result) =
slices.analyze(obj, addr, function_start, Some(function_end), known_functions)
{
// If analysis failed, assume tail call.
if !result {
log::warn!("Tail call analysis failed for {:#010X}", addr);
return TailCallResult::Is;
}
// If control flow jumps below the entry point, not a tail call.
let start = slices.start();
if start < addr {
log::trace!("Tail call possibility eliminated: {:#010X} < {:#010X}", start, addr);
return TailCallResult::Not;
}
// If control flow includes another possible tail call, we know both are not tail calls.
let end = slices.end();
let other_blocks =
self.possible_blocks.range(start + 4..end).cloned().collect::<Vec<u32>>();
if !other_blocks.is_empty() {
for other_addr in other_blocks {
log::trace!("Logically eliminating {:#010X}", other_addr);
self.possible_blocks.remove(&other_addr);
// self.add_block_start(oth);
}
log::trace!("While analyzing {:#010X}", addr);
return TailCallResult::Not;
}
// If we discovered a function prologue, known tail call.
if slices.prologue.is_some() {
log::trace!("Prologue discovered; known tail call: {:#010X}", addr);
return TailCallResult::Is;
}
}
TailCallResult::Possible
}
pub fn first_disconnected_block(&self) -> Option<(BlockRange, BlockRange)> {
let mut iter = self.blocks.iter().peekable();
loop {
let ((first_begin, first_end), (second_begin, second_end)) =
match (iter.next(), iter.peek()) {
(Some((&b1s, &b1e)), Some(&(&b2s, &b2e))) => ((b1s, b1e), (b2s, b2e)),
_ => break None,
};
if second_begin > first_end {
break Some((first_begin..first_end, second_begin..second_end));
}
}
}
}
#[inline]
fn is_conditional_blr(ins: &Ins) -> bool {
ins.op == Opcode::Bclr && ins.field_BO() & 0b10100 != 0b10100
}
#[inline]
fn is_nop(ins: &Ins) -> bool {
// ori r0, r0, 0
ins.code == 0x60000000
}

View File

@@ -1,6 +1,6 @@
use std::{cmp::min, collections::HashMap};
use anyhow::{Error, Result};
use anyhow::{anyhow, bail, ensure, Result};
use crate::util::obj::{
ObjArchitecture, ObjInfo, ObjKind, ObjReloc, ObjSection, ObjSectionKind, ObjSymbol,
@@ -8,9 +8,7 @@ use crate::util::obj::{
/// Split an executable object into relocatable objects.
pub fn split_obj(obj: &ObjInfo) -> Result<Vec<ObjInfo>> {
if obj.kind != ObjKind::Executable {
return Err(Error::msg(format!("Expected executable object, got {:?}", obj.kind)));
}
ensure!(obj.kind == ObjKind::Executable, "Expected executable object");
let mut objects: Vec<ObjInfo> = vec![];
let mut object_symbols: Vec<Vec<Option<usize>>> = vec![];
@@ -19,12 +17,15 @@ pub fn split_obj(obj: &ObjInfo) -> Result<Vec<ObjInfo>> {
name_to_obj.insert(unit.clone(), objects.len());
object_symbols.push(vec![None; obj.symbols.len()]);
objects.push(ObjInfo {
module_id: 0,
kind: ObjKind::Relocatable,
architecture: ObjArchitecture::PowerPc,
name: unit.clone(),
symbols: vec![],
sections: vec![],
entry: 0,
sda2_base: None,
sda_base: None,
stack_address: None,
stack_end: None,
db_stack_addr: None,
@@ -32,6 +33,8 @@ pub fn split_obj(obj: &ObjInfo) -> Result<Vec<ObjInfo>> {
arena_hi: None,
splits: Default::default(),
link_order: vec![],
known_functions: Default::default(),
unresolved_relocations: vec![],
});
}
@@ -58,14 +61,16 @@ pub fn split_obj(obj: &ObjInfo) -> Result<Vec<ObjInfo>> {
let (file_addr, unit) = match file_iter.next() {
Some((&addr, unit)) => (addr, unit),
None => return Err(Error::msg("No file found")),
None => bail!("No file found"),
};
if file_addr > current_address {
return Err(Error::msg(format!(
"Gap in files: {} @ {:#010X}, {} @ {:#010X}",
section.name, section.address, unit, file_addr
)));
}
ensure!(
file_addr <= current_address,
"Gap in files: {} @ {:#010X}, {} @ {:#010X}",
section.name,
section.address,
unit,
file_addr
);
let mut file_end = section_end;
if let Some(&(&next_addr, _)) = file_iter.peek() {
file_end = min(next_addr, section_end);
@@ -74,17 +79,11 @@ pub fn split_obj(obj: &ObjInfo) -> Result<Vec<ObjInfo>> {
let file = name_to_obj
.get(unit)
.and_then(|&idx| objects.get_mut(idx))
.ok_or_else(|| Error::msg(format!("Unit '{unit}' not in link order")))?;
.ok_or_else(|| anyhow!("Unit '{unit}' not in link order"))?;
let symbol_idxs = name_to_obj
.get(unit)
.and_then(|&idx| object_symbols.get_mut(idx))
.ok_or_else(|| Error::msg(format!("Unit '{unit}' not in link order")))?;
let data = match section.kind {
ObjSectionKind::Bss => vec![],
_ => section.data[(current_address as u64 - section.address) as usize
..(file_end as u64 - section.address) as usize]
.to_vec(),
};
.ok_or_else(|| anyhow!("Unit '{unit}' not in link order"))?;
// Calculate & verify section alignment
let mut align = default_section_align(section);
@@ -96,14 +95,20 @@ pub fn split_obj(obj: &ObjInfo) -> Result<Vec<ObjInfo>> {
align,
current_address
);
align = 4;
}
if current_address & (align as u32 - 1) != 0 {
return Err(Error::msg(format!(
"Invalid alignment for split: {} {} {:#010X}",
unit, section.name, current_address
)));
while align > 4 {
align /= 2;
if current_address & (align as u32 - 1) == 0 {
break;
}
}
}
ensure!(
current_address & (align as u32 - 1) == 0,
"Invalid alignment for split: {} {} {:#010X}",
unit,
section.name,
current_address
);
// Collect relocations; target_symbol will be updated later
let out_relocations = relocations
@@ -116,21 +121,8 @@ pub fn split_obj(obj: &ObjInfo) -> Result<Vec<ObjInfo>> {
})
.collect();
let out_section_idx = file.sections.len();
file.sections.push(ObjSection {
name: section.name.clone(),
kind: section.kind,
address: 0,
size: file_end as u64 - current_address as u64,
data,
align,
index: out_section_idx,
relocations: out_relocations,
original_address: current_address as u64,
file_offset: section.file_offset + (current_address as u64 - section.address),
});
// Add section symbols
let out_section_idx = file.sections.len();
for &symbol_idx in symbols.range(current_address..file_end).flat_map(|(_, vec)| vec) {
if symbol_idxs[symbol_idx].is_some() {
continue; // should never happen?
@@ -149,6 +141,27 @@ pub fn split_obj(obj: &ObjInfo) -> Result<Vec<ObjInfo>> {
});
}
let data = match section.kind {
ObjSectionKind::Bss => vec![],
_ => section.data[(current_address as u64 - section.address) as usize
..(file_end as u64 - section.address) as usize]
.to_vec(),
};
file.sections.push(ObjSection {
name: section.name.clone(),
kind: section.kind,
address: 0,
size: file_end as u64 - current_address as u64,
data,
align,
index: out_section_idx,
elf_index: out_section_idx + 1,
relocations: out_relocations,
original_address: current_address as u64,
file_offset: section.file_offset + (current_address as u64 - section.address),
section_known: true,
});
current_address = file_end;
}
}

733
src/util/tracker.rs Normal file
View File

@@ -0,0 +1,733 @@
use std::{
collections::{BTreeMap, BTreeSet, VecDeque},
mem::take,
};
use anyhow::{bail, Result};
use ppc750cl::{disasm_iter, Argument, Ins, Opcode};
use crate::util::{
executor::{uniq_jump_table_entries, ExecCbData, ExecCbResult, Executor},
obj::{
nested_push, ObjInfo, ObjReloc, ObjRelocKind, ObjSection, ObjSectionKind, ObjSymbol,
ObjSymbolKind,
},
slices::FunctionSlices,
vm::{is_store_op, BranchTarget, GprValue, StepResult, VM},
};
#[derive(Debug, Eq, PartialEq)]
pub enum Label {
Local,
Global,
Data,
JumpTable,
VTable,
}
#[derive(Debug, Copy, Clone)]
pub enum Relocation {
Ha(u32),
Hi(u32),
Lo(u32),
Sda21(u32),
Rel14(u32),
Rel24(u32),
Absolute(u32),
}
#[derive(Debug)]
pub enum DataKind {
Unknown = -1,
Word,
Half,
Byte,
Float,
Double,
String,
String16,
}
pub struct Tracker {
processed_functions: BTreeSet<u32>,
sda2_base: u32, // r2
sda_base: u32, // r13
labels: BTreeMap<u32, Label>,
pub relocations: BTreeMap<u32, Relocation>,
data_types: BTreeMap<u32, DataKind>,
stack_address: Option<u32>,
stack_end: Option<u32>,
db_stack_addr: Option<u32>,
arena_lo: Option<u32>,
arena_hi: Option<u32>,
pub ignore_addresses: BTreeSet<u32>,
pub known_relocations: BTreeSet<u32>,
stores_to: BTreeSet<u32>, // for determining data vs rodata, sdata(2)/sbss(2)
sda_to: BTreeSet<u32>, // for determining data vs sdata
hal_to: BTreeSet<u32>, // for determining data vs sdata
}
impl Tracker {
pub fn new(obj: &ObjInfo) -> Tracker {
Self {
processed_functions: Default::default(),
sda2_base: obj.sda2_base.unwrap(),
sda_base: obj.sda_base.unwrap(),
labels: Default::default(),
relocations: Default::default(),
data_types: Default::default(),
stack_address: obj.stack_address,
stack_end: obj.stack_end.or_else(|| {
// Stack ends after all BSS sections
obj.sections
.iter()
.rfind(|s| s.kind == ObjSectionKind::Bss)
.map(|s| (s.address + s.size) as u32)
}),
db_stack_addr: obj.db_stack_addr,
arena_lo: obj
.arena_lo
.or_else(|| obj.db_stack_addr.map(|db_stack_addr| (db_stack_addr + 0x1F) & !0x1F)),
arena_hi: Some(obj.arena_hi.unwrap_or(0x81700000)),
ignore_addresses: Default::default(),
known_relocations: Default::default(),
stores_to: Default::default(),
sda_to: Default::default(),
hal_to: Default::default(),
}
}
pub fn process(&mut self, obj: &ObjInfo) -> Result<()> {
log::debug!("Processing code sections");
self.process_code(obj)?;
for (section_index, section) in obj.sections.iter().enumerate() {
if matches!(section.kind, ObjSectionKind::Data | ObjSectionKind::ReadOnlyData) {
log::debug!("Processing section {}, address {:#X}", section.index, section.address);
self.process_data(obj, section)?;
}
}
Ok(())
}
fn update_stack_address(&mut self, addr: u32) {
if let Some(db_stack_addr) = self.db_stack_addr {
if db_stack_addr == addr {
return;
}
}
if let Some(stack_addr) = self.stack_address {
if stack_addr != addr {
log::error!("Stack address overridden from {:#010X} to {:#010X}", stack_addr, addr);
return;
}
}
log::debug!("Located stack address: {:08X}", addr);
self.stack_address = Some(addr);
let db_stack_addr = addr + 0x2000;
self.db_stack_addr = Some(db_stack_addr);
self.arena_lo = Some((db_stack_addr + 0x1F) & !0x1F);
// __ArenaHi is fixed (until it isn't?)
self.arena_hi = Some(0x81700000);
log::debug!("_stack_addr: {:#010X}", addr);
log::debug!("_stack_end: {:#010X}", self.stack_end.unwrap());
log::debug!("_db_stack_addr: {:#010X}", db_stack_addr);
log::debug!("__ArenaLo: {:#010X}", self.arena_lo.unwrap());
log::debug!("__ArenaHi: {:#010X}", self.arena_hi.unwrap());
}
fn process_code(&mut self, obj: &ObjInfo) -> Result<()> {
let mut symbol_map = BTreeMap::new();
for section in obj.sections.iter().filter(|s| s.kind == ObjSectionKind::Code) {
symbol_map.append(&mut obj.build_symbol_map(section.index)?);
}
self.process_function_by_address(obj, &symbol_map, obj.entry as u32)?;
'outer: for (&addr, symbols) in &symbol_map {
if self.processed_functions.contains(&addr) {
continue;
}
self.processed_functions.insert(addr);
for &symbol_idx in symbols {
let symbol = &obj.symbols[symbol_idx];
if symbol.kind == ObjSymbolKind::Function && symbol.size_known {
self.process_function(obj, symbol)?;
continue 'outer;
}
}
}
// Special handling for gTRKInterruptVectorTable
if let (Some(trk_interrupt_table), Some(trk_interrupt_vector_table_end)) = (
obj.symbols.iter().find(|sym| sym.name == "gTRKInterruptVectorTable"),
obj.symbols.iter().find(|sym| sym.name == "gTRKInterruptVectorTableEnd"),
) {}
Ok(())
}
fn process_function_by_address(
&mut self,
obj: &ObjInfo,
symbol_map: &BTreeMap<u32, Vec<usize>>,
addr: u32,
) -> Result<()> {
if self.processed_functions.contains(&addr) {
return Ok(());
}
self.processed_functions.insert(addr);
if let Some(symbols) = symbol_map.get(&addr) {
for &symbol_idx in symbols {
let symbol = &obj.symbols[symbol_idx];
if symbol.kind == ObjSymbolKind::Function && symbol.size_known {
self.process_function(obj, symbol)?;
return Ok(());
}
}
}
log::warn!("Failed to locate function symbol @ {:#010X}", addr);
Ok(())
}
fn instruction_callback(
&mut self,
data: ExecCbData,
obj: &ObjInfo,
function_start: u32,
function_end: u32,
possible_missed_branches: &mut BTreeMap<u32, Box<VM>>,
) -> Result<ExecCbResult<()>> {
let ExecCbData { executor, vm, result, section, ins, block_start } = data;
let is_function_addr = |addr: u32| addr >= function_start && addr < function_end;
match result {
StepResult::Continue => {
// if ins.addr == 0x8000ed0c || ins.addr == 0x8000ed08 || ins.addr == 0x8000ca50 {
// println!("ok");
// }
match ins.op {
Opcode::Addi | Opcode::Addic | Opcode::Addic_ => {
// addi rD, rA, SIMM
let source = ins.field_rA();
let target = ins.field_rD();
if let GprValue::Constant(value) = vm.gpr[target].value {
if self.is_valid_address(obj, ins.addr, value) {
if (source == 2
&& vm.gpr[2].value == GprValue::Constant(self.sda2_base))
|| (source == 13
&& vm.gpr[13].value == GprValue::Constant(self.sda_base))
{
self.relocations.insert(ins.addr, Relocation::Sda21(value));
self.sda_to.insert(value);
} else if let (Some(hi_addr), Some(lo_addr)) =
(vm.gpr[target].hi_addr, vm.gpr[target].lo_addr)
{
let hi_reloc = self.relocations.get(&hi_addr.get()).cloned();
if hi_reloc.is_none() {
self.relocations
.insert(hi_addr.get(), Relocation::Ha(value));
}
let lo_reloc = self.relocations.get(&lo_addr.get()).cloned();
if lo_reloc.is_none() {
self.relocations
.insert(lo_addr.get(), Relocation::Lo(value));
}
self.hal_to.insert(value);
}
}
}
}
Opcode::Ori => {
// ori rA, rS, UIMM
let source = ins.field_rS();
let target = ins.field_rA();
if let GprValue::Constant(value) = vm.gpr[target].value {
// if target == 1 {
// log::debug!("Stack address written from {:#010X}", ins.addr);
// self.update_stack_address(value);
// }
if self.is_valid_address(obj, ins.addr, value) {
if let (Some(hi_addr), Some(lo_addr)) =
(vm.gpr[target].hi_addr, vm.gpr[target].lo_addr)
{
let hi_reloc = self.relocations.get(&hi_addr.get()).cloned();
if hi_reloc.is_none() {
self.relocations
.insert(hi_addr.get(), Relocation::Hi(value));
}
let lo_reloc = self.relocations.get(&lo_addr.get()).cloned();
if lo_reloc.is_none() {
self.relocations
.insert(lo_addr.get(), Relocation::Lo(value));
}
self.hal_to.insert(value);
}
}
}
}
_ => {}
}
Ok(ExecCbResult::Continue)
}
StepResult::LoadStore { address, source, source_reg } => {
if self.is_valid_address(obj, ins.addr, address) {
if (source_reg == 2 && source.value == GprValue::Constant(self.sda2_base))
|| (source_reg == 13 && source.value == GprValue::Constant(self.sda_base))
{
self.relocations.insert(ins.addr, Relocation::Sda21(address));
self.sda_to.insert(address);
} else {
match (source.hi_addr, source.lo_addr) {
(Some(hi_addr), None) => {
let hi_reloc = self.relocations.get(&hi_addr.get()).cloned();
if hi_reloc.is_none() {
self.relocations.insert(hi_addr.get(), Relocation::Ha(address));
}
if hi_reloc.is_none()
|| matches!(hi_reloc, Some(Relocation::Ha(v)) if v == address)
{
self.relocations.insert(ins.addr, Relocation::Lo(address));
}
self.hal_to.insert(address);
}
(Some(hi_addr), Some(lo_addr)) => {
let hi_reloc = self.relocations.get(&hi_addr.get()).cloned();
if hi_reloc.is_none() {
self.relocations.insert(hi_addr.get(), Relocation::Ha(address));
}
let lo_reloc = self.relocations.get(&lo_addr.get()).cloned();
if lo_reloc.is_none() {
self.relocations.insert(lo_addr.get(), Relocation::Lo(address));
}
self.hal_to.insert(address);
}
_ => {}
}
}
self.data_types.insert(address, data_kind_from_op(ins.op));
if is_store_op(ins.op) {
self.stores_to.insert(address);
}
}
Ok(ExecCbResult::Continue)
}
StepResult::Illegal => bail!(
"Illegal instruction hit @ {:#010X} (function {:#010X}-{:#010X})",
ins.addr,
function_start,
function_end
),
StepResult::Jump(target) => match target {
BranchTarget::Unknown | BranchTarget::Return => Ok(ExecCbResult::EndBlock),
BranchTarget::Address(addr) => {
let next_addr = ins.addr + 4;
if next_addr < function_end {
possible_missed_branches.insert(ins.addr + 4, vm.clone_all());
}
if is_function_addr(addr) {
Ok(ExecCbResult::Jump(addr))
} else {
self.relocations.insert(ins.addr, Relocation::Rel24(addr));
Ok(ExecCbResult::EndBlock)
}
}
BranchTarget::JumpTable { address, size } => {
let (entries, _) = uniq_jump_table_entries(
obj,
address,
size,
ins.addr,
function_start,
function_end,
)?;
for target in entries {
if is_function_addr(target) {
executor.push(target, vm.clone_all(), true);
}
}
Ok(ExecCbResult::EndBlock)
}
},
StepResult::Branch(branches) => {
for branch in branches {
match branch.target {
BranchTarget::Unknown | BranchTarget::Return => {}
BranchTarget::Address(addr) => {
if branch.link || !is_function_addr(addr) {
self.relocations.insert(ins.addr, match ins.op {
Opcode::B => Relocation::Rel24(addr),
_ => Relocation::Rel14(addr),
});
} else if is_function_addr(addr) {
executor.push(addr, branch.vm, true);
}
}
BranchTarget::JumpTable { .. } => {
bail!("Conditional jump table unsupported @ {:#010X}", ins.addr)
}
}
}
Ok(ExecCbResult::EndBlock)
}
}
}
pub fn process_function(&mut self, obj: &ObjInfo, symbol: &ObjSymbol) -> Result<()> {
let function_start = symbol.address as u32;
let function_end = (symbol.address + symbol.size) as u32;
// The compiler can sometimes create impossible-to-reach branches,
// but we still want to track them.
let mut possible_missed_branches = BTreeMap::new();
let mut executor = Executor::new(obj);
executor.push(
symbol.address as u32,
VM::new_with_base(self.sda2_base, self.sda_base),
false,
);
loop {
executor.run(obj, |data| -> Result<ExecCbResult<()>> {
self.instruction_callback(
data,
obj,
function_start,
function_end,
&mut possible_missed_branches,
)
})?;
if possible_missed_branches.is_empty() {
break;
}
let mut added = false;
for (addr, vm) in take(&mut possible_missed_branches) {
let section = match obj.section_at(addr) {
Ok(section) => section,
Err(_) => continue,
};
if !executor.visited(section, addr) {
executor.push(addr, vm, true);
added = true;
}
}
if !added {
break;
}
}
Ok(())
}
fn process_data(&mut self, obj: &ObjInfo, section: &ObjSection) -> Result<()> {
let mut addr = section.address as u32;
for chunk in section.data.chunks_exact(4) {
let value = u32::from_be_bytes(chunk.try_into()?);
if self.is_valid_address(obj, addr, value) {
self.relocations.insert(addr, Relocation::Absolute(value));
}
addr += 4;
}
Ok(())
}
fn is_valid_address(&self, obj: &ObjInfo, from: u32, addr: u32) -> bool {
if self.ignore_addresses.contains(&addr) {
return false;
}
if self.known_relocations.contains(&from) {
return true;
}
if self.stack_address == Some(addr)
|| self.stack_end == Some(addr)
|| self.db_stack_addr == Some(addr)
|| self.arena_lo == Some(addr)
|| self.arena_hi == Some(addr)
|| self.sda2_base == addr
|| self.sda_base == addr
{
return true;
}
if addr > 0x80000000 && addr < 0x80003100 {
return true;
}
for section in &obj.sections {
if addr >= section.address as u32 && addr <= (section.address + section.size) as u32 {
return true;
}
}
false
}
fn special_symbol(&self, obj: &mut ObjInfo, addr: u32) -> Option<usize> {
let mut check_symbol = |opt: Option<u32>, name: &str| -> Option<usize> {
if let Some(value) = opt {
if addr == value {
return Some(generate_special_symbol(obj, value, name));
}
}
None
};
check_symbol(self.stack_address, "_stack_addr")
.or_else(|| check_symbol(self.stack_end, "_stack_end"))
.or_else(|| check_symbol(self.arena_lo, "__ArenaLo"))
.or_else(|| check_symbol(self.arena_hi, "__ArenaHi"))
.or_else(|| check_symbol(self.db_stack_addr, "_db_stack_addr"))
.or_else(|| check_symbol(Some(self.sda2_base), "_SDA2_BASE_"))
.or_else(|| check_symbol(Some(self.sda_base), "_SDA_BASE_"))
}
pub fn apply(&self, obj: &mut ObjInfo, replace: bool) -> Result<()> {
for section in &mut obj.sections {
if !section.section_known {
if section.kind == ObjSectionKind::Code {
log::info!("Renaming {} to .text", section.name);
section.name = ".text".to_string();
continue;
}
let start = section.address as u32;
let end = (section.address + section.size) as u32;
if self.sda_to.range(start..end).next().is_some() {
if self.stores_to.range(start..end).next().is_some() {
if section.kind == ObjSectionKind::Bss {
log::info!("Renaming {} to .sbss", section.name);
section.name = ".sbss".to_string();
} else {
log::info!("Renaming {} to .sdata", section.name);
section.name = ".sdata".to_string();
}
} else if section.kind == ObjSectionKind::Bss {
log::info!("Renaming {} to .sbss2", section.name);
section.name = ".sbss2".to_string();
} else {
log::info!("Renaming {} to .sdata2", section.name);
section.name = ".sdata2".to_string();
section.kind = ObjSectionKind::ReadOnlyData;
}
} else if self.hal_to.range(start..end).next().is_some() {
if section.kind == ObjSectionKind::Bss {
log::info!("Renaming {} to .bss", section.name);
section.name = ".bss".to_string();
} else if self.stores_to.range(start..end).next().is_some() {
log::info!("Renaming {} to .data", section.name);
section.name = ".data".to_string();
} else {
log::info!("Renaming {} to .rodata", section.name);
section.name = ".rodata".to_string();
section.kind = ObjSectionKind::ReadOnlyData;
}
}
}
}
let mut symbol_maps = Vec::new();
for section in &obj.sections {
symbol_maps.push(obj.build_symbol_map(section.index)?);
}
for (addr, reloc) in &self.relocations {
let addr = *addr;
let (reloc_kind, target) = match *reloc {
Relocation::Ha(v) => (ObjRelocKind::PpcAddr16Ha, v),
Relocation::Hi(v) => (ObjRelocKind::PpcAddr16Hi, v),
Relocation::Lo(v) => (ObjRelocKind::PpcAddr16Lo, v),
Relocation::Sda21(v) => (ObjRelocKind::PpcEmbSda21, v),
Relocation::Rel14(v) => (ObjRelocKind::PpcRel14, v),
Relocation::Rel24(v) => (ObjRelocKind::PpcRel24, v),
Relocation::Absolute(v) => (ObjRelocKind::Absolute, v),
};
let (target_symbol, addend) = if let Some(symbol) = self.special_symbol(obj, target) {
(symbol, 0)
} else {
let target_section =
match obj.sections.iter().find(|s| {
target >= s.address as u32 && target < (s.address + s.size) as u32
}) {
Some(v) => v,
None => continue,
};
// Try to find a previous sized symbol that encompasses the target
let sym_map = &mut symbol_maps[target_section.index];
let target_symbol = {
let mut result = None;
for (&addr, symbol_idxs) in sym_map.range(..=target).rev() {
let symbol_idx = if symbol_idxs.len() == 1 {
symbol_idxs.first().cloned().unwrap()
} else {
let mut symbol_idxs = symbol_idxs.clone();
symbol_idxs.sort_by_key(|&symbol_idx| {
let symbol = &obj.symbols[symbol_idx];
let mut rank = match symbol.kind {
ObjSymbolKind::Function | ObjSymbolKind::Object => {
match reloc_kind {
ObjRelocKind::PpcAddr16Hi
| ObjRelocKind::PpcAddr16Ha
| ObjRelocKind::PpcAddr16Lo => 1,
ObjRelocKind::Absolute
| ObjRelocKind::PpcRel24
| ObjRelocKind::PpcRel14
| ObjRelocKind::PpcEmbSda21 => 2,
}
}
// Label
ObjSymbolKind::Unknown => match reloc_kind {
ObjRelocKind::PpcAddr16Hi
| ObjRelocKind::PpcAddr16Ha
| ObjRelocKind::PpcAddr16Lo
if !symbol.name.starts_with("..") =>
{
3
}
_ => 1,
},
ObjSymbolKind::Section => -1,
};
if symbol.size > 0 {
rank += 1;
}
-rank
});
match symbol_idxs.first().cloned() {
Some(v) => v,
None => continue,
}
};
let symbol = &obj.symbols[symbol_idx];
if symbol.address == target as u64 {
result = Some(symbol_idx);
break;
}
if symbol.size > 0 {
if symbol.address + symbol.size > target as u64 {
result = Some(symbol_idx);
}
break;
}
}
result
};
if let Some(symbol_idx) = target_symbol {
let symbol = &obj.symbols[symbol_idx];
(symbol_idx, target as i64 - symbol.address as i64)
} else {
// Create a new label
let symbol_idx = obj.symbols.len();
obj.symbols.push(ObjSymbol {
name: format!("lbl_{:08X}", target),
demangled_name: None,
address: target as u64,
section: Some(target_section.index),
size: 0,
size_known: false,
flags: Default::default(),
kind: Default::default(),
});
nested_push(sym_map, target, symbol_idx);
(symbol_idx, 0)
}
};
let reloc = ObjReloc { kind: reloc_kind, address: addr as u64, target_symbol, addend };
let section = match obj
.sections
.iter_mut()
.find(|s| addr >= s.address as u32 && addr < (s.address + s.size) as u32)
{
Some(v) => v,
None => bail!(
"Failed to locate source section for relocation @ {:#010X} {:#010X?}",
addr,
reloc
),
};
match section.relocations.iter_mut().find(|r| r.address as u32 == addr) {
Some(v) => {
let iter_symbol = &obj.symbols[v.target_symbol];
let reloc_symbol = &obj.symbols[reloc.target_symbol];
if iter_symbol.address as i64 + v.addend
!= reloc_symbol.address as i64 + reloc.addend
{
bail!(
"Conflicting relocations (target {:#010X}): {:#010X?} != {:#010X?}",
target,
v,
reloc
);
}
if replace {
*v = reloc;
}
}
None => section.relocations.push(reloc),
}
}
Ok(())
}
}
fn is_branch_with_link(ins: &Ins) -> bool { ins.is_branch() && ins.field_LK() }
fn data_kind_from_op(op: Opcode) -> DataKind {
match op {
Opcode::Lbz => DataKind::Byte,
Opcode::Lbzu => DataKind::Byte,
Opcode::Lbzux => DataKind::Byte,
Opcode::Lbzx => DataKind::Byte,
Opcode::Lfd => DataKind::Double,
Opcode::Lfdu => DataKind::Double,
Opcode::Lfdux => DataKind::Double,
Opcode::Lfdx => DataKind::Double,
Opcode::Lfs => DataKind::Float,
Opcode::Lfsu => DataKind::Float,
Opcode::Lfsux => DataKind::Float,
Opcode::Lfsx => DataKind::Float,
Opcode::Lha => DataKind::Half,
Opcode::Lhau => DataKind::Half,
Opcode::Lhaux => DataKind::Half,
Opcode::Lhax => DataKind::Half,
Opcode::Lhbrx => DataKind::Half,
Opcode::Lhz => DataKind::Half,
Opcode::Lhzu => DataKind::Half,
Opcode::Lhzux => DataKind::Half,
Opcode::Lhzx => DataKind::Half,
Opcode::Lwz => DataKind::Word,
Opcode::Lwzu => DataKind::Word,
Opcode::Lwzux => DataKind::Word,
Opcode::Lwzx => DataKind::Word,
Opcode::Stb => DataKind::Byte,
Opcode::Stbu => DataKind::Byte,
Opcode::Stbux => DataKind::Byte,
Opcode::Stbx => DataKind::Byte,
Opcode::Stfd => DataKind::Double,
Opcode::Stfdu => DataKind::Double,
Opcode::Stfdux => DataKind::Double,
Opcode::Stfdx => DataKind::Double,
Opcode::Stfiwx => DataKind::Float,
Opcode::Stfs => DataKind::Float,
Opcode::Stfsu => DataKind::Float,
Opcode::Stfsux => DataKind::Float,
Opcode::Stfsx => DataKind::Float,
Opcode::Sth => DataKind::Half,
Opcode::Sthbrx => DataKind::Half,
Opcode::Sthu => DataKind::Half,
Opcode::Sthux => DataKind::Half,
Opcode::Sthx => DataKind::Half,
Opcode::Stw => DataKind::Word,
Opcode::Stwbrx => DataKind::Word,
Opcode::Stwcx_ => DataKind::Word,
Opcode::Stwu => DataKind::Word,
Opcode::Stwux => DataKind::Word,
Opcode::Stwx => DataKind::Word,
_ => DataKind::Unknown,
}
}
fn generate_special_symbol(obj: &mut ObjInfo, addr: u32, name: &str) -> usize {
if let Some((symbol_idx, _)) =
obj.symbols.iter().enumerate().find(|&(_, symbol)| symbol.name == name)
{
return symbol_idx;
}
let symbol_idx = obj.symbols.len();
obj.symbols.push(ObjSymbol {
name: name.to_string(),
address: addr as u64,
..Default::default()
});
symbol_idx
}

740
src/util/vm.rs Normal file
View File

@@ -0,0 +1,740 @@
use std::num::NonZeroU32;
use ppc750cl::{Argument, Ins, Opcode, GPR};
use crate::util::obj::ObjInfo;
#[derive(Default, Debug, Copy, Clone, Eq, PartialEq)]
pub enum GprValue {
#[default]
/// GPR value is unknown
Unknown,
/// GPR value is a constant
Constant(u32),
/// Comparison result (CR field)
ComparisonResult(u8),
/// GPR value is within a range
Range { min: u32, max: u32, step: u32 },
/// GPR value is loaded from an address with a max offset (jump table)
LoadIndexed { address: u32, max_offset: Option<NonZeroU32> },
}
#[derive(Default, Debug, Copy, Clone, Eq, PartialEq)]
pub struct Gpr {
/// The current calculated value
pub value: GprValue,
/// Address that loads the hi part of this GPR
pub hi_addr: Option<NonZeroU32>,
/// Address that loads the lo part of this GPR
pub lo_addr: Option<NonZeroU32>,
}
impl Gpr {
fn set_direct(&mut self, value: GprValue) {
self.value = value;
self.hi_addr = None;
self.lo_addr = None;
}
fn set_hi(&mut self, value: GprValue, addr: u32) {
self.value = value;
self.hi_addr = NonZeroU32::new(addr);
self.lo_addr = None;
}
fn set_lo(&mut self, value: GprValue, addr: u32, hi_gpr: Gpr) {
self.value = value;
self.hi_addr = hi_gpr.hi_addr;
self.lo_addr = hi_gpr.lo_addr.or_else(|| NonZeroU32::new(addr));
}
}
#[derive(Default, Debug, Clone, Eq, PartialEq)]
struct Cr {
/// The left-hand value of this comparison
left: GprValue,
/// The right-hand value of this comparison
right: GprValue,
/// Whether this comparison is signed
signed: bool,
}
#[derive(Default, Debug, Clone, Eq, PartialEq)]
pub struct VM {
/// General purpose registers
pub gpr: [Gpr; 32],
/// Condition registers
cr: [Cr; 8],
/// Count register
ctr: GprValue,
}
impl VM {
pub fn gpr_value(&self, reg: u8) -> GprValue { self.gpr[reg as usize].value }
}
#[derive(Debug, Clone, Eq, PartialEq)]
pub enum BranchTarget {
/// Unknown branch target (CTR without known value)
Unknown,
/// Branch to LR
Return,
/// Branch to address
Address(u32),
/// Branch to jump table
JumpTable { address: u32, size: Option<NonZeroU32> },
}
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct Branch {
/// Branch target
pub target: BranchTarget,
/// Branch with link
pub link: bool,
/// VM state for this branch
pub vm: Box<VM>,
}
#[derive(Debug, Clone, Eq, PartialEq)]
pub enum StepResult {
/// Continue normally
Continue,
/// Load from / store to
LoadStore { address: u32, source: Gpr, source_reg: u8 },
/// Hit illegal instruction
Illegal,
/// Jump without affecting VM state
Jump(BranchTarget),
/// Branch with split VM states
Branch(Vec<Branch>),
}
impl VM {
#[inline]
pub fn new() -> Box<Self> { Box::default() }
#[inline]
pub fn new_from_obj(obj: &ObjInfo) -> Box<Self> {
match (obj.sda2_base, obj.sda_base) {
(Some(sda2_base), Some(sda_base)) => Self::new_with_base(sda2_base, sda_base),
_ => Self::new(),
}
}
#[inline]
pub fn new_with_base(sda2_base: u32, sda_base: u32) -> Box<Self> {
let mut vm = Self::new();
vm.gpr[2].value = GprValue::Constant(sda2_base);
vm.gpr[13].value = GprValue::Constant(sda_base);
vm
}
/// When calling a function, only preserve SDA bases
#[inline]
pub fn clone_for_link(&self) -> Box<Self> {
let mut vm = Self::new();
vm.gpr[2].value = self.gpr[2].value;
vm.gpr[13].value = self.gpr[13].value;
vm
}
/// When returning from a function call, only dedicated
/// and nonvolatile registers are preserved
#[inline]
pub fn clone_for_return(&self) -> Box<Self> {
let mut vm = Self::new();
// Dedicated registers
vm.gpr[1].value = self.gpr[1].value;
vm.gpr[2].value = self.gpr[2].value;
vm.gpr[13].value = self.gpr[13].value;
// Non-volatile registers
for i in 14..32 {
vm.gpr[i] = self.gpr[i];
}
vm
}
#[inline]
pub fn clone_all(&self) -> Box<Self> { Box::new(self.clone()) }
pub fn step(&mut self, ins: &Ins) -> StepResult {
match ins.op {
Opcode::Illegal => {
return StepResult::Illegal;
}
Opcode::Add => {
// add rD, rA, rB
let left = self.gpr[ins.field_rA()].value;
let right = self.gpr[ins.field_rB()].value;
let value = match (left, right) {
(GprValue::Constant(left), GprValue::Constant(right)) => {
GprValue::Constant(left.wrapping_add(right))
}
_ => GprValue::Unknown,
};
self.gpr[ins.field_rD()].set_direct(value);
}
Opcode::Addis => {
// addis rD, rA, SIMM
let left = if ins.field_rA() == 0 {
GprValue::Constant(0)
} else {
self.gpr[ins.field_rA()].value
};
let value = match left {
GprValue::Constant(value) => {
GprValue::Constant(value.wrapping_add((ins.field_simm() as u32) << 16))
}
_ => GprValue::Unknown,
};
if ins.field_rA() == 0 {
// lis rD, SIMM
self.gpr[ins.field_rD()].set_hi(value, ins.addr);
} else {
self.gpr[ins.field_rD()].set_direct(value);
}
}
Opcode::Addi | Opcode::Addic | Opcode::Addic_ => {
// addi rD, rA, SIMM
// addic rD, rA, SIMM
// addic. rD, rA, SIMM
let left = if ins.field_rA() == 0 && ins.op == Opcode::Addi {
GprValue::Constant(0)
} else {
self.gpr[ins.field_rA()].value
};
let value = match left {
GprValue::Constant(value) => {
GprValue::Constant(value.wrapping_add(ins.field_simm() as u32))
}
_ => GprValue::Unknown,
};
if ins.field_rA() == 0 {
// li rD, SIMM
self.gpr[ins.field_rD()].set_direct(value);
} else {
self.gpr[ins.field_rD()].set_lo(value, ins.addr, self.gpr[ins.field_rA()]);
}
}
Opcode::Ori => {
// ori rA, rS, UIMM
let value = match self.gpr[ins.field_rS()].value {
GprValue::Constant(value) => {
GprValue::Constant(value | ins.field_uimm() as u32)
}
_ => GprValue::Unknown,
};
self.gpr[ins.field_rA()].set_lo(value, ins.addr, self.gpr[ins.field_rS()]);
}
Opcode::Or => {
// or rA, rS, rB
if ins.field_rS() == ins.field_rB() {
// Register copy
self.gpr[ins.field_rA()] = self.gpr[ins.field_rS()];
} else {
let left = self.gpr[ins.field_rS()].value;
let right = self.gpr[ins.field_rB()].value;
let value = match (left, right) {
(GprValue::Constant(left), GprValue::Constant(right)) => {
GprValue::Constant(left | right)
}
_ => GprValue::Unknown,
};
self.gpr[ins.field_rA()].set_direct(value);
}
}
// cmp [crfD], [L], rA, rB
// cmpi [crfD], [L], rA, SIMM
// cmpl [crfD], [L], rA, rB
// cmpli [crfD], [L], rA, UIMM
Opcode::Cmp | Opcode::Cmpi | Opcode::Cmpl | Opcode::Cmpli => {
if ins.field_L() == 0 {
let left_reg = ins.field_rA();
let left = self.gpr[left_reg].value;
let (right, signed) = match ins.op {
Opcode::Cmp => (self.gpr[ins.field_rB()].value, true),
Opcode::Cmpl => (self.gpr[ins.field_rB()].value, false),
Opcode::Cmpi => (GprValue::Constant(ins.field_simm() as u32), true),
Opcode::Cmpli => (GprValue::Constant(ins.field_uimm() as u32), false),
_ => unreachable!(),
};
let crf = ins.field_crfD();
self.cr[crf] = Cr { signed, left, right };
self.gpr[left_reg].value = GprValue::ComparisonResult(crf as u8);
}
}
// rlwinm rA, rS, SH, MB, ME
// rlwnm rA, rS, rB, MB, ME
Opcode::Rlwinm | Opcode::Rlwnm => {
let value = if let Some(shift) = match ins.op {
Opcode::Rlwinm => Some(ins.field_SH() as u32),
Opcode::Rlwnm => match self.gpr[ins.field_rB()].value {
GprValue::Constant(value) => Some(value),
_ => None,
},
_ => unreachable!(),
} {
let mask = mask_value(ins.field_MB() as u32, ins.field_ME() as u32);
match self.gpr[ins.field_rS()].value {
GprValue::Constant(value) => {
GprValue::Constant(value.rotate_left(shift) & mask)
}
GprValue::Range { min, max, step } => GprValue::Range {
min: min.rotate_left(shift) & mask,
max: max.rotate_left(shift) & mask,
step: step.rotate_left(shift),
},
_ => GprValue::Range { min: 0, max: mask, step: 1u32.rotate_left(shift) },
}
} else {
GprValue::Unknown
};
self.gpr[ins.field_rA()].set_direct(value);
}
// b[l][a] target_addr
// b[c][l][a] BO, BI, target_addr
// b[c]ctr[l] BO, BI
// b[c]lr[l] BO, BI
Opcode::B | Opcode::Bc | Opcode::Bcctr | Opcode::Bclr => {
// HACK for `bla 0x60` in __OSDBJump
if ins.op == Opcode::B && ins.field_LK() && ins.field_AA() {
return StepResult::Jump(BranchTarget::Unknown);
}
let branch_target = match ins.op {
Opcode::Bcctr => {
match self.ctr {
GprValue::Constant(value) => BranchTarget::Address(value),
GprValue::LoadIndexed { address, max_offset }
// FIXME: avoids treating bctrl indirect calls as jump tables
if !ins.field_LK() => {
BranchTarget::JumpTable { address, size: max_offset.and_then(|n| n.checked_add(4)) }
}
_ => BranchTarget::Unknown,
}
}
Opcode::Bclr => BranchTarget::Return,
_ => BranchTarget::Address(ins.branch_dest().unwrap()),
};
// If branching with link, use function call semantics
if ins.field_LK() {
return StepResult::Branch(vec![
Branch {
target: BranchTarget::Address(ins.addr + 4),
link: false,
vm: self.clone_for_return(),
},
Branch { target: branch_target, link: true, vm: self.clone_for_link() },
]);
}
// Branch always
if ins.op == Opcode::B || ins.field_BO() & 0b10100 == 0b10100 {
return StepResult::Jump(branch_target);
}
// Branch conditionally
let mut branches = vec![
// Branch not taken
Branch {
target: BranchTarget::Address(ins.addr + 4),
link: false,
vm: self.clone_all(),
},
// Branch taken
Branch { target: branch_target, link: ins.field_LK(), vm: self.clone_all() },
];
// Use tracked CR to calculate new register values for branches
let crf = ins.field_BI() >> 2;
let crb = (ins.field_BI() & 3) as u8;
let (f_val, t_val) =
split_values_by_crb(crb, self.cr[crf].left, self.cr[crf].right);
if ins.field_BO() & 0b11110 == 0b00100 {
// Branch if false
branches[0].vm.set_comparison_result(t_val, crf);
branches[1].vm.set_comparison_result(f_val, crf);
} else if ins.field_BO() & 0b11110 == 0b01100 {
// Branch if true
branches[0].vm.set_comparison_result(f_val, crf);
branches[1].vm.set_comparison_result(t_val, crf);
}
return StepResult::Branch(branches);
}
// lwzx rD, rA, rB
Opcode::Lwzx => {
let left = self.gpr[ins.field_rA()].value;
let right = self.gpr[ins.field_rB()].value;
let value = match (left, right) {
(GprValue::Constant(address), GprValue::Range { min: _, max, .. })
if /*min == 0 &&*/ max < u32::MAX - 4 && max & 3 == 0 =>
{
GprValue::LoadIndexed { address, max_offset: NonZeroU32::new(max) }
}
(GprValue::Constant(address), _) => {
GprValue::LoadIndexed { address, max_offset: None }
}
_ => GprValue::Unknown,
};
self.gpr[ins.field_rD()].set_direct(value);
}
// mtspr SPR, rS
Opcode::Mtspr => {
if ins.field_spr() == 9 {
// CTR
self.ctr = self.gpr[ins.field_rS()].value;
}
}
// mfspr rD, SPR
Opcode::Mfspr => {
let value = if ins.field_spr() == 9 {
// CTR
self.ctr
} else {
GprValue::Unknown
};
self.gpr[ins.field_rD()].set_direct(value);
}
// rfi
Opcode::Rfi => {
return StepResult::Jump(BranchTarget::Unknown);
}
op if is_load_store_op(op) => {
let source = ins.field_rA();
let mut result = StepResult::Continue;
if let GprValue::Constant(base) = self.gpr[source].value {
let address = base.wrapping_add(ins.field_simm() as u32);
if is_update_op(op) {
self.gpr[source].set_lo(
GprValue::Constant(address),
ins.addr,
self.gpr[source],
);
}
result = StepResult::LoadStore {
address,
source: self.gpr[source],
source_reg: source as u8,
};
} else if is_update_op(op) {
self.gpr[source].set_direct(GprValue::Unknown);
}
if is_load_op(op) {
self.gpr[ins.field_rD()].set_direct(GprValue::Unknown);
}
return result;
}
_ => {
for field in ins.defs() {
match field.argument() {
Some(Argument::GPR(GPR(reg))) => {
self.gpr[reg as usize].set_direct(GprValue::Unknown);
}
_ => {}
}
}
}
}
StepResult::Continue
}
#[inline]
fn set_comparison_result(&mut self, value: GprValue, crf: usize) {
for gpr in &mut self.gpr {
if gpr.value == GprValue::ComparisonResult(crf as u8) {
gpr.value = value;
}
}
}
}
/// Given a condition register bit, calculate new register
/// values for each branch. (false / true)
fn split_values_by_crb(crb: u8, left: GprValue, right: GprValue) -> (GprValue, GprValue) {
match crb {
// lt
0 => match (left, right) {
(GprValue::Range { min, max, step }, GprValue::Constant(value)) => (
// left >= right
GprValue::Range {
min: std::cmp::max(min, value),
max: std::cmp::max(max, value),
step,
},
// left < right
GprValue::Range {
min: std::cmp::min(min, value.wrapping_sub(1)),
max: std::cmp::min(max, value.wrapping_sub(1)),
step,
},
),
(_, GprValue::Constant(value)) => (
// left >= right
GprValue::Range { min: value, max: u32::MAX, step: 1 },
// left < right
GprValue::Range { min: 0, max: value.wrapping_sub(1), step: 1 },
),
_ => (left, left),
},
// gt
1 => match (left, right) {
(GprValue::Range { min, max, step }, GprValue::Constant(value)) => (
// left <= right
GprValue::Range {
min: std::cmp::min(min, value),
max: std::cmp::min(max, value),
step,
},
// left > right
GprValue::Range {
min: std::cmp::max(min, value.wrapping_add(1)),
max: std::cmp::max(max, value.wrapping_add(1)),
step,
},
),
(_, GprValue::Constant(value)) => (
// left <= right
GprValue::Range { min: 0, max: value, step: 1 },
// left > right
GprValue::Range { min: value.wrapping_add(1), max: u32::MAX, step: 1 },
),
_ => (left, left),
},
// eq
2 => match (left, right) {
(GprValue::Constant(l), GprValue::Constant(r)) => (
// left != right
if l == r { GprValue::Unknown } else { left },
// left == right
GprValue::Constant(r),
),
(_, GprValue::Constant(value)) => (
// left != right
left,
// left == right
GprValue::Constant(value),
),
_ => (left, left),
},
// so
3 => (left, left),
_ => unreachable!(),
}
}
#[inline]
fn mask_value(begin: u32, end: u32) -> u32 {
let mut mask = 0u32;
for bit in begin..=end {
mask |= 1 << (31 - bit);
}
mask
}
#[inline]
pub fn is_load_op(op: Opcode) -> bool {
matches!(
op,
Opcode::Lbz
| Opcode::Lbzu
| Opcode::Lha
| Opcode::Lhau
| Opcode::Lhz
| Opcode::Lhzu
| Opcode::Lmw
| Opcode::Lwz
| Opcode::Lwzu
)
}
#[inline]
pub fn is_loadf_op(op: Opcode) -> bool {
matches!(op, Opcode::Lfd | Opcode::Lfdu | Opcode::Lfs | Opcode::Lfsu)
}
#[inline]
pub fn is_store_op(op: Opcode) -> bool {
matches!(
op,
Opcode::Stb
| Opcode::Stbu
| Opcode::Sth
| Opcode::Sthu
| Opcode::Stmw
| Opcode::Stw
| Opcode::Stwu
)
}
#[inline]
pub fn is_storef_op(op: Opcode) -> bool {
matches!(op, Opcode::Stfd | Opcode::Stfdu | Opcode::Stfs | Opcode::Stfsu)
}
#[inline]
pub fn is_load_store_op(op: Opcode) -> bool {
is_load_op(op) || is_loadf_op(op) || is_store_op(op) || is_storef_op(op)
}
#[inline]
pub fn is_update_op(op: Opcode) -> bool {
matches!(
op,
Opcode::Lbzu
| Opcode::Lbzux
| Opcode::Lfdu
| Opcode::Lfdux
| Opcode::Lfsu
| Opcode::Lfsux
| Opcode::Lhau
| Opcode::Lhaux
| Opcode::Lhzu
| Opcode::Lhzux
| Opcode::Lwzu
| Opcode::Lwzux
| Opcode::Stbu
| Opcode::Stbux
| Opcode::Stfdu
| Opcode::Stfdux
| Opcode::Stfsu
| Opcode::Stfsux
| Opcode::Sthu
| Opcode::Sthux
| Opcode::Stwu
| Opcode::Stwux
)
}
// #[inline]
// fn is_indexed_load_op(op: Opcode) -> bool {
// matches!(
// op,
// Opcode::Lbzux
// | Opcode::Lbzx
// | Opcode::Lhax
// | Opcode::Lhaux
// | Opcode::Lhzx
// | Opcode::Lhzux
// | Opcode::Lwzx
// | Opcode::Lwzux
// )
// }
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_load_indexed_1() {
let mut vm = VM::new();
assert_eq!(vm.step(&Ins::new(0x3cc08052, 0x803dfe28)), StepResult::Continue); // lis r6, -0x7fae
assert_eq!(vm.step(&Ins::new(0x38c60e18, 0x803dfe30)), StepResult::Continue); // addi r6, r6, 0xe18
assert_eq!(vm.gpr[6].value, GprValue::Constant(0x80520e18));
assert_eq!(vm.step(&Ins::new(0x550066fa, 0x803dfe34)), StepResult::Continue); // rlwinm r0, r8, 12, 27, 29
assert_eq!(vm.gpr[0].value, GprValue::Range { min: 0, max: 28, step: 1 << 12 });
assert_eq!(vm.step(&Ins::new(0x7d86002e, 0x803dfe3c)), StepResult::Continue); // lwzx r12, r6, r0
assert_eq!(vm.gpr[12].value, GprValue::LoadIndexed {
address: 0x80520e18,
max_offset: NonZeroU32::new(28)
});
assert_eq!(vm.step(&Ins::new(0x7d8903a6, 0x803dfe4c)), StepResult::Continue); // mtspr CTR, r12
assert_eq!(vm.ctr, GprValue::LoadIndexed {
address: 0x80520e18,
max_offset: NonZeroU32::new(28)
});
assert_eq!(
vm.step(&Ins::new(0x4e800420, 0x803dfe50)), // bctr
StepResult::Jump(BranchTarget::JumpTable {
address: 0x80520e18,
size: NonZeroU32::new(32)
})
);
}
#[test]
fn test_load_indexed_2() {
let mut vm = VM::new();
assert_eq!(vm.step(&Ins::new(0x3c808057, 0x80465320)), StepResult::Continue); // lis r4, -0x7fa9
assert_eq!(vm.step(&Ins::new(0x54600e7a, 0x80465324)), StepResult::Continue); // rlwinm r0, r3, 1, 25, 29
assert_eq!(vm.gpr[0].value, GprValue::Range { min: 0, max: 124, step: 2 });
assert_eq!(vm.step(&Ins::new(0x38840f70, 0x80465328)), StepResult::Continue); // addi r4, r4, 0xf70
assert_eq!(vm.gpr[4].value, GprValue::Constant(0x80570f70));
assert_eq!(vm.step(&Ins::new(0x7d84002e, 0x80465330)), StepResult::Continue); // lwzx r12, r4, r0
assert_eq!(vm.gpr[12].value, GprValue::LoadIndexed {
address: 0x80570f70,
max_offset: NonZeroU32::new(124)
});
assert_eq!(vm.step(&Ins::new(0x7d8903a6, 0x80465340)), StepResult::Continue); // mtspr CTR, r12
assert_eq!(vm.ctr, GprValue::LoadIndexed {
address: 0x80570f70,
max_offset: NonZeroU32::new(124)
});
assert_eq!(
vm.step(&Ins::new(0x4e800420, 0x80465344)), // bctr
StepResult::Jump(BranchTarget::JumpTable {
address: 0x80570f70,
size: NonZeroU32::new(128)
})
);
}
#[test]
fn test_load_indexed_3() {
let mut vm = VM::new();
assert_eq!(vm.step(&Ins::new(0x28000127, 0x800ed458)), StepResult::Continue); // cmplwi r0, 0x127
assert_eq!(vm.cr, Cr {
signed: false,
left: GprValue::Unknown,
right: GprValue::Constant(295),
});
// When branch isn't taken, we know r0 is <= 295
let mut false_vm = vm.clone();
false_vm.gpr[0] =
Gpr { value: GprValue::Range { min: 0, max: 295, step: 1 }, ..Default::default() };
// When branch is taken, we know r0 is > 295
let mut true_vm = vm.clone();
true_vm.gpr[0] = Gpr {
value: GprValue::Range { min: 296, max: u32::MAX, step: 1 },
..Default::default()
};
assert_eq!(
vm.step(&Ins::new(0x418160bc, 0x800ed45c)), // bgt 0x60bc
StepResult::Branch(vec![
Branch {
target: BranchTarget::Address(0x800ed460),
link: false,
vm: false_vm.clone()
},
Branch { target: BranchTarget::Address(0x800f3518), link: false, vm: true_vm }
])
);
// Take the false branch
let mut vm = false_vm;
assert_eq!(vm.step(&Ins::new(0x3c608053, 0x800ed460)), StepResult::Continue); // lis r3, -0x7fad
assert_eq!(vm.step(&Ins::new(0x5400103a, 0x800ed464)), StepResult::Continue); // rlwinm r0, r0, 0x2, 0x0, 0x1d
assert_eq!(vm.gpr[0].value, GprValue::Range { min: 0, max: 1180, step: 4 });
assert_eq!(vm.step(&Ins::new(0x3863ef6c, 0x800ed468)), StepResult::Continue); // subi r3, r3, 0x1094
assert_eq!(vm.gpr[3].value, GprValue::Constant(0x8052ef6c));
assert_eq!(vm.step(&Ins::new(0x7c63002e, 0x800ed46c)), StepResult::Continue); // lwzx r3, r3, r0
assert_eq!(vm.gpr[3].value, GprValue::LoadIndexed {
address: 0x8052ef6c,
max_offset: NonZeroU32::new(1180)
});
assert_eq!(vm.step(&Ins::new(0x7c6903a6, 0x800ed470)), StepResult::Continue); // mtspr CTR, r3
assert_eq!(vm.ctr, GprValue::LoadIndexed {
address: 0x8052ef6c,
max_offset: NonZeroU32::new(1180)
});
assert_eq!(
vm.step(&Ins::new(0x4e800420, 0x800ed474)), // bctr
StepResult::Jump(BranchTarget::JumpTable {
address: 0x8052ef6c,
size: NonZeroU32::new(1184)
})
);
}
}