use std::{ collections::{btree_map::Entry, hash_map, BTreeMap, HashMap}, fs, fs::{DirBuilder, File}, io::Write, path::{Path, PathBuf}, }; use anyhow::{anyhow, bail, Context, Result}; use argp::FromArgs; use serde::{Deserialize, Serialize}; use crate::{ analysis::{ cfa::AnalyzerState, objects::{detect_object_boundaries, detect_strings}, pass::{AnalysisPass, FindSaveRestSleds, FindTRKInterruptVectorTable}, signatures::{apply_signatures, apply_signatures_post}, tracker::Tracker, }, obj::{ split::{is_linker_generated_object, split_obj, update_splits}, ObjDataKind, ObjInfo, ObjRelocKind, ObjSectionKind, ObjSymbol, ObjSymbolFlagSet, ObjSymbolFlags, ObjSymbolKind, ObjSymbolScope, SymbolIndex, }, util::{ asm::write_asm, comment::MWComment, config::{apply_splits, apply_symbols_file, write_splits_file, write_symbols_file}, dep::DepFile, dol::process_dol, elf::{process_elf, write_elf}, file::{buf_writer, map_file, map_reader, touch}, lcf::{asm_path_for_unit, generate_ldscript, obj_path_for_unit}, map::apply_map_file, rel::process_rel, }, }; #[derive(FromArgs, PartialEq, Debug)] /// Commands for processing DOL files. #[argp(subcommand, name = "dol")] pub struct Args { #[argp(subcommand)] command: SubCommand, } #[derive(FromArgs, PartialEq, Debug)] #[argp(subcommand)] enum SubCommand { Info(InfoArgs), Split(SplitArgs), Diff(DiffArgs), Apply(ApplyArgs), } #[derive(FromArgs, PartialEq, Eq, Debug)] /// Views DOL file information. #[argp(subcommand, name = "info")] pub struct InfoArgs { #[argp(positional)] /// DOL file dol_file: PathBuf, } #[derive(FromArgs, PartialEq, Eq, Debug)] /// Splits a DOL into relocatable objects. #[argp(subcommand, name = "split")] pub struct SplitArgs { #[argp(positional)] /// input configuration file config: PathBuf, #[argp(positional)] /// output directory out_dir: PathBuf, #[argp(switch)] /// skip updating splits & symbol files (for build systems) no_update: bool, } #[derive(FromArgs, PartialEq, Eq, Debug)] /// Diffs symbols in a linked ELF. #[argp(subcommand, name = "diff")] pub struct DiffArgs { #[argp(positional)] /// input configuration file config: PathBuf, #[argp(positional)] /// linked ELF elf_file: PathBuf, #[argp(positional)] /// map file map_file: PathBuf, } #[derive(FromArgs, PartialEq, Eq, Debug)] /// Applies updated symbols from a linked ELF to the project configuration. #[argp(subcommand, name = "apply")] pub struct ApplyArgs { #[argp(positional)] /// input configuration file config: PathBuf, #[argp(positional)] /// linked ELF elf_file: PathBuf, #[argp(positional)] /// map file map_file: PathBuf, } #[inline] fn bool_true() -> bool { true } #[derive(Serialize, Deserialize, Debug, Clone)] pub struct ProjectConfig { pub object: PathBuf, pub splits: Option, pub symbols: Option, /// Version of the MW `.comment` section format. /// If not present, no `.comment` sections will be written. pub mw_comment_version: Option, pub modules: Vec, // Analysis options #[serde(default = "bool_true")] pub detect_objects: bool, #[serde(default = "bool_true")] pub detect_strings: bool, #[serde(default = "bool_true")] pub write_asm: bool, #[serde(default = "bool_true")] pub auto_force_files: bool, } #[derive(Serialize, Deserialize, Debug, Clone)] pub struct ModuleConfig { pub object: PathBuf, } #[derive(Serialize, Deserialize, Debug, Clone)] pub struct OutputUnit { pub object: PathBuf, pub name: String, pub autogenerated: bool, } #[derive(Serialize, Deserialize, Debug, Clone, Default)] pub struct OutputConfig { pub units: Vec, } pub fn run(args: Args) -> Result<()> { match args.command { SubCommand::Info(c_args) => info(c_args), SubCommand::Split(c_args) => split(c_args), SubCommand::Diff(c_args) => diff(c_args), SubCommand::Apply(c_args) => apply(c_args), } } fn info(args: InfoArgs) -> Result<()> { let mut obj = process_dol(&args.dol_file)?; apply_signatures(&mut obj)?; let mut state = AnalyzerState::default(); state.detect_functions(&obj)?; log::info!("Discovered {} functions", state.function_slices.len()); FindTRKInterruptVectorTable::execute(&mut state, &obj)?; FindSaveRestSleds::execute(&mut state, &obj)?; state.apply(&mut obj)?; apply_signatures_post(&mut obj)?; println!("{}:", obj.name); println!("Entry point: {:#010X}", obj.entry); println!("\nSections:"); println!("\t{: >10} | {: <10} | {: <10} | {: <10}", "Name", "Address", "Size", "File Off"); for section in &obj.sections { println!( "\t{: >10} | {:#010X} | {: <#10X} | {: <#10X}", section.name, section.address, section.size, section.file_offset ); } println!("\nDiscovered symbols:"); println!("\t{: >23} | {: <10} | {: <10}", "Name", "Address", "Size"); for (_, symbol) in obj.symbols.iter_ordered() { if symbol.name.starts_with('@') || symbol.name.starts_with("fn_") { continue; } if symbol.size_known { println!("\t{: >23} | {:#010X} | {: <#10X}", symbol.name, symbol.address, symbol.size); } else { let size_str = if symbol.section.is_none() { "ABS" } else { "?" }; println!("\t{: >23} | {:#010X} | {: <10}", symbol.name, symbol.address, size_str); } } println!("\n{} discovered functions from exception table", obj.known_functions.len()); Ok(()) } fn split(args: SplitArgs) -> Result<()> { log::info!("Loading {}", args.config.display()); let mut config_file = File::open(&args.config) .with_context(|| format!("Failed to open config file '{}'", args.config.display()))?; let config: ProjectConfig = serde_yaml::from_reader(&mut config_file)?; let out_config_path = args.out_dir.join("config.json"); let mut dep = DepFile::new(out_config_path.clone()); log::info!("Loading {}", config.object.display()); let mut obj = process_dol(&config.object)?; dep.push(config.object.clone()); if let Some(comment_version) = config.mw_comment_version { obj.mw_comment = Some(MWComment::new(comment_version)?); } let mut modules = BTreeMap::::new(); for module_config in &config.modules { log::info!("Loading {}", module_config.object.display()); let map = map_file(&module_config.object)?; let rel_obj = process_rel(map_reader(&map))?; match modules.entry(rel_obj.module_id) { Entry::Vacant(e) => e.insert(rel_obj), Entry::Occupied(_) => bail!("Duplicate module ID {}", obj.module_id), }; dep.push(module_config.object.clone()); } if let Some(splits_path) = &config.splits { dep.push(splits_path.clone()); if splits_path.is_file() { let map = map_file(splits_path)?; apply_splits(map_reader(&map), &mut obj)?; } } let mut state = AnalyzerState::default(); if let Some(symbols_path) = &config.symbols { dep.push(symbols_path.clone()); apply_symbols_file(symbols_path, &mut obj)?; } // TODO move before symbols? log::info!("Performing signature analysis"); apply_signatures(&mut obj)?; if !modules.is_empty() { log::info!("Applying module relocations"); for (module_id, module_obj) in modules { for rel_reloc in &module_obj.unresolved_relocations { // TODO also apply inter-module relocations if rel_reloc.module_id != 0 { continue; } let target = rel_reloc.addend; if let Some((symbol_index, symbol)) = obj.symbols.for_relocation(target, rel_reloc.kind)? { let addend = target as i64 - symbol.address as i64; if addend != 0 { bail!( "Module {} relocation to {:#010X} for symbol {} has non-zero addend {:#010X}", module_id, symbol.address, symbol.name, addend ); } obj.symbols.set_externally_referenced(symbol_index, true); } else { // Add label let target_section = obj.section_at(target)?; obj.symbols.add_direct(ObjSymbol { name: format!("lbl_{:08X}", target), demangled_name: None, address: target as u64, section: Some(target_section.index), size: 0, size_known: false, flags: ObjSymbolFlagSet(ObjSymbolFlags::ForceActive.into()), kind: Default::default(), align: None, data_kind: ObjDataKind::Unknown, })?; } } } } log::info!("Detecting function boundaries"); state.detect_functions(&obj)?; log::info!("Discovered {} functions", state.function_slices.len()); FindTRKInterruptVectorTable::execute(&mut state, &obj)?; FindSaveRestSleds::execute(&mut state, &obj)?; state.apply(&mut obj)?; apply_signatures_post(&mut obj)?; log::info!("Performing relocation analysis"); let mut tracker = Tracker::new(&obj); tracker.process(&obj)?; log::info!("Applying relocations"); tracker.apply(&mut obj, false)?; if config.detect_objects { log::info!("Detecting object boundaries"); detect_object_boundaries(&mut obj)?; } if config.detect_strings { log::info!("Detecting strings"); detect_strings(&mut obj)?; } log::info!("Adjusting splits"); update_splits(&mut obj)?; if !args.no_update { if let Some(symbols_path) = &config.symbols { write_symbols_file(symbols_path, &obj)?; } if let Some(splits_path) = &config.splits { write_splits_file(splits_path, &obj)?; } } log::info!("Splitting {} objects", obj.link_order.len()); let split_objs = split_obj(&obj)?; // Create out dirs touch(&args.out_dir)?; let asm_dir = args.out_dir.join("asm"); let include_dir = args.out_dir.join("include"); let obj_dir = args.out_dir.join("obj"); DirBuilder::new().recursive(true).create(&include_dir)?; fs::write(include_dir.join("macros.inc"), include_str!("../../assets/macros.inc"))?; log::info!("Writing object files"); let mut file_map = HashMap::>::new(); for (unit, split_obj) in obj.link_order.iter().zip(&split_objs) { let out_obj = write_elf(split_obj)?; match file_map.entry(unit.name.clone()) { hash_map::Entry::Vacant(e) => e.insert(out_obj), hash_map::Entry::Occupied(_) => bail!("Duplicate file {}", unit.name), }; } let mut out_config = OutputConfig::default(); for unit in &obj.link_order { let object = file_map .get(&unit.name) .ok_or_else(|| anyhow!("Failed to find object file for unit '{}'", unit.name))?; let out_path = obj_dir.join(obj_path_for_unit(&unit.name)); out_config.units.push(OutputUnit { object: out_path.clone(), name: unit.name.clone(), autogenerated: unit.autogenerated, }); if let Some(parent) = out_path.parent() { DirBuilder::new().recursive(true).create(parent)?; } fs::write(&out_path, object) .with_context(|| format!("Failed to write '{}'", out_path.display()))?; } { let mut out_file = buf_writer(&out_config_path)?; serde_json::to_writer_pretty(&mut out_file, &out_config)?; out_file.flush()?; } // Generate ldscript.lcf fs::write( args.out_dir.join("ldscript.lcf"), generate_ldscript(&obj, config.auto_force_files)?, )?; log::info!("Writing disassembly"); for (unit, split_obj) in obj.link_order.iter().zip(&split_objs) { let out_path = asm_dir.join(asm_path_for_unit(&unit.name)); if let Some(parent) = out_path.parent() { DirBuilder::new().recursive(true).create(parent)?; } let mut w = buf_writer(&out_path)?; write_asm(&mut w, split_obj)?; w.flush()?; } // Write dep file { let dep_path = args.out_dir.join("dep"); let mut dep_file = buf_writer(dep_path)?; dep.write(&mut dep_file)?; dep_file.flush()?; } // (debugging) validate against ELF // if let Some(file) = &args.elf_file { // validate(&obj, file, &state)?; // } Ok(()) } #[allow(dead_code)] fn validate>(obj: &ObjInfo, elf_file: P, state: &AnalyzerState) -> Result<()> { let real_obj = process_elf(elf_file)?; for real_section in &real_obj.sections { let obj_section = match obj.sections.get(real_section.index) { Some(v) => v, None => { log::error!( "Section {} {} doesn't exist in DOL", real_section.index, real_section.name ); continue; } }; if obj_section.kind != real_section.kind || obj_section.name != real_section.name { log::warn!( "Section mismatch: {} {:?} ({}) should be {} {:?}", obj_section.name, obj_section.kind, obj_section.index, real_section.name, real_section.kind ); } } let mut real_functions = BTreeMap::::new(); for section in &real_obj.sections { if section.kind != ObjSectionKind::Code { continue; } for (_symbol_idx, symbol) in real_obj.symbols.for_section(section) { real_functions.insert(symbol.address as u32, symbol.name.clone()); match state.function_bounds.get(&(symbol.address as u32)) { Some(&end) => { if symbol.size > 0 && end != (symbol.address + symbol.size) as u32 { log::warn!( "Function {:#010X} ({}) ends at {:#010X}, expected {:#010X}", symbol.address, symbol.name, end, symbol.address + symbol.size ); } } None => { log::warn!( "Function {:#010X} ({}) not discovered!", symbol.address, symbol.name ); } } } } for (&start, &end) in &state.function_bounds { if end == 0 { continue; } if !real_functions.contains_key(&start) { let (real_addr, real_name) = real_functions.range(..start).last().unwrap(); log::warn!( "Function {:#010X} not real (actually a part of {} @ {:#010X})", start, real_name, real_addr ); } } // return Ok(()); // TODO for real_section in &real_obj.sections { let obj_section = match obj.sections.get(real_section.index) { Some(v) => v, None => continue, }; let real_map = real_section.build_relocation_map()?; let obj_map = obj_section.build_relocation_map()?; for (&real_addr, &real_reloc_idx) in &real_map { let real_reloc = &real_section.relocations[real_reloc_idx]; let real_symbol = real_obj.symbols.at(real_reloc.target_symbol); let obj_reloc = match obj_map.get(&real_addr) { Some(v) => &obj_section.relocations[*v], None => { // Ignore GCC local jump branches if real_symbol.kind == ObjSymbolKind::Section && real_section.kind == ObjSectionKind::Code && real_reloc.addend != 0 && matches!( real_reloc.kind, ObjRelocKind::PpcRel14 | ObjRelocKind::PpcRel24 ) { continue; } log::warn!( "Relocation not found @ {:#010X} {:?} to {:#010X}+{:X} ({})", real_addr, real_reloc.kind, real_symbol.address, real_reloc.addend, real_symbol.demangled_name.as_ref().unwrap_or(&real_symbol.name) ); continue; } }; let obj_symbol = obj.symbols.at(obj_reloc.target_symbol); if real_reloc.kind != obj_reloc.kind { log::warn!( "Relocation type mismatch @ {:#010X}: {:?} != {:?}", real_addr, obj_reloc.kind, real_reloc.kind ); continue; } if real_symbol.address as i64 + real_reloc.addend != obj_symbol.address as i64 + obj_reloc.addend { log::warn!( "Relocation target mismatch @ {:#010X} {:?}: {:#010X}+{:X} != {:#010X}+{:X} ({})", real_addr, real_reloc.kind, obj_symbol.address, obj_reloc.addend, real_symbol.address, real_reloc.addend, real_symbol.demangled_name.as_ref().unwrap_or(&real_symbol.name) ); continue; } } for (&obj_addr, &obj_reloc_idx) in &obj_map { let obj_reloc = &obj_section.relocations[obj_reloc_idx]; let obj_symbol = obj.symbols.at(obj_reloc.target_symbol); if !real_map.contains_key(&obj_addr) { log::warn!( "Relocation not real @ {:#010X} {:?} to {:#010X}+{:X} ({})", obj_addr, obj_reloc.kind, obj_symbol.address, obj_reloc.addend, obj_symbol.demangled_name.as_ref().unwrap_or(&obj_symbol.name) ); continue; } } } Ok(()) } fn diff(args: DiffArgs) -> Result<()> { log::info!("Loading {}", args.config.display()); let mut config_file = File::open(&args.config) .with_context(|| format!("Failed to open config file '{}'", args.config.display()))?; let config: ProjectConfig = serde_yaml::from_reader(&mut config_file)?; log::info!("Loading {}", config.object.display()); let mut obj = process_dol(&config.object)?; if let Some(symbols_path) = &config.symbols { apply_symbols_file(symbols_path, &mut obj)?; } log::info!("Loading {}", args.elf_file.display()); let mut linked_obj = process_elf(&args.elf_file)?; log::info!("Loading {}", args.map_file.display()); apply_map_file(&args.map_file, &mut linked_obj)?; for orig_sym in obj.symbols.iter() { let linked_sym = linked_obj .symbols .at_address(orig_sym.address as u32) .find(|(_, sym)| sym.name == orig_sym.name) .or_else(|| { linked_obj .symbols .at_address(orig_sym.address as u32) .find(|(_, sym)| sym.kind == orig_sym.kind) }); let mut found = false; if let Some((_, linked_sym)) = linked_sym { if linked_sym.name.starts_with(&orig_sym.name) { if linked_sym.size != orig_sym.size { log::error!( "Expected {} (type {:?}) to have size {:#X}, but found {:#X}", orig_sym.name, orig_sym.kind, orig_sym.size, linked_sym.size ); } found = true; } else if linked_sym.kind == orig_sym.kind && linked_sym.size == orig_sym.size { // Fuzzy match let orig_data = obj .section_data( orig_sym.address as u32, orig_sym.address as u32 + orig_sym.size as u32, )? .1; let linked_data = linked_obj .section_data( linked_sym.address as u32, linked_sym.address as u32 + linked_sym.size as u32, )? .1; if orig_data == linked_data { found = true; } } } if !found { log::error!( "Expected to find symbol {} (type {:?}, size {:#X}) at {:#010X}", orig_sym.name, orig_sym.kind, orig_sym.size, orig_sym.address ); for (_, linked_sym) in linked_obj.symbols.at_address(orig_sym.address as u32) { log::error!( "At {:#010X}, found: {} (type {:?}, size {:#X})", linked_sym.address, linked_sym.name, linked_sym.kind, linked_sym.size, ); } for (_, linked_sym) in linked_obj.symbols.for_name(&orig_sym.name) { log::error!( "Instead, found {} (type {:?}, size {:#X}) at {:#010X}", linked_sym.name, linked_sym.kind, linked_sym.size, linked_sym.address, ); } break; } } Ok(()) } fn apply(args: ApplyArgs) -> Result<()> { log::info!("Loading {}", args.config.display()); let mut config_file = File::open(&args.config) .with_context(|| format!("Failed to open config file '{}'", args.config.display()))?; let config: ProjectConfig = serde_yaml::from_reader(&mut config_file)?; log::info!("Loading {}", config.object.display()); let mut obj = process_dol(&config.object)?; if let Some(symbols_path) = &config.symbols { if !apply_symbols_file(symbols_path, &mut obj)? { bail!("Symbols file '{}' does not exist", symbols_path.display()); } } else { bail!("No symbols file specified in config"); } log::info!("Loading {}", args.elf_file.display()); let mut linked_obj = process_elf(&args.elf_file)?; log::info!("Loading {}", args.map_file.display()); apply_map_file(&args.map_file, &mut linked_obj)?; let mut replacements: Vec<(SymbolIndex, Option)> = vec![]; for (orig_idx, orig_sym) in obj.symbols.iter().enumerate() { let linked_sym = linked_obj .symbols .at_address(orig_sym.address as u32) .find(|(_, sym)| sym.name == orig_sym.name) .or_else(|| { linked_obj .symbols .at_address(orig_sym.address as u32) .find(|(_, sym)| sym.kind == orig_sym.kind) }); if let Some((_, linked_sym)) = linked_sym { let mut updated_sym = orig_sym.clone(); let is_globalized = linked_sym.name.ends_with(&format!("_{:08X}", linked_sym.address)); if (is_globalized && !linked_sym.name.starts_with(&orig_sym.name)) || (!is_globalized && linked_sym.name != orig_sym.name) { log::info!( "Changing name of {} (type {:?}) to {}", orig_sym.name, orig_sym.kind, linked_sym.name ); updated_sym.name = linked_sym.name.clone(); } if linked_sym.size != orig_sym.size { log::info!( "Changing size of {} (type {:?}) from {:#X} to {:#X}", orig_sym.name, orig_sym.kind, orig_sym.size, linked_sym.size ); updated_sym.size = linked_sym.size; } let linked_scope = linked_sym.flags.scope(); if linked_scope != ObjSymbolScope::Unknown && !is_globalized && linked_scope != orig_sym.flags.scope() { log::info!( "Changing scope of {} (type {:?}) from {:?} to {:?}", orig_sym.name, orig_sym.kind, orig_sym.flags.scope(), linked_scope ); updated_sym.flags.set_scope(linked_scope); } if updated_sym != *orig_sym { replacements.push((orig_idx, Some(updated_sym))); } } else { log::warn!( "Symbol not in linked ELF: {} (type {:?}, size {:#X}) at {:#010X}", orig_sym.name, orig_sym.kind, orig_sym.size, orig_sym.address ); // TODO // replacements.push((orig_idx, None)); } } // Add symbols from the linked object that aren't in the original for linked_sym in linked_obj.symbols.iter() { if matches!(linked_sym.kind, ObjSymbolKind::Section) || is_linker_generated_object(&linked_sym.name) { continue; } let orig_sym = obj .symbols .at_address(linked_sym.address as u32) .find(|(_, sym)| sym.name == linked_sym.name) .or_else(|| { linked_obj .symbols .at_address(linked_sym.address as u32) .find(|(_, sym)| sym.kind == linked_sym.kind) }); if orig_sym.is_none() { log::info!( "Adding symbol {} (type {:?}, size {:#X}) at {:#010X}", linked_sym.name, linked_sym.kind, linked_sym.size, linked_sym.address ); obj.symbols.add_direct(linked_sym.clone())?; } } // Apply replacements for (idx, replacement) in replacements { if let Some(replacement) = replacement { obj.symbols.replace(idx, replacement)?; } else { // TODO // obj.symbols.remove(idx)?; } } write_symbols_file(config.symbols.as_ref().unwrap(), &obj)?; Ok(()) }