use std::{ cmp::min, collections::BTreeMap, fmt::{Debug, Display, Formatter, UpperHex}, ops::{Add, AddAssign, BitAnd, Sub}, }; use anyhow::{bail, ensure, Context, Result}; use itertools::Itertools; use crate::{ analysis::{ executor::{ExecCbData, ExecCbResult, Executor}, skip_alignment, slices::{FunctionSlices, TailCallResult}, vm::{BranchTarget, GprValue, StepResult, VM}, RelocationTarget, }, obj::{ObjInfo, ObjSectionKind, ObjSymbol, ObjSymbolFlagSet, ObjSymbolFlags, ObjSymbolKind}, }; #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct SectionAddress { pub section: usize, pub address: u32, } impl Debug for SectionAddress { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { write!(f, "{}:{:#X}", self.section as isize, self.address) } } impl Display for SectionAddress { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { write!(f, "{}:{:#X}", self.section as isize, self.address) } } impl SectionAddress { pub fn new(section: usize, address: u32) -> Self { Self { section, address } } pub fn offset(self, offset: i32) -> Self { Self { section: self.section, address: self.address.wrapping_add_signed(offset) } } pub fn align_up(self, align: u32) -> Self { Self { section: self.section, address: (self.address + align - 1) & !(align - 1) } } pub fn align_down(self, align: u32) -> Self { Self { section: self.section, address: self.address & !(align - 1) } } pub fn is_aligned(self, align: u32) -> bool { self.address & (align - 1) == 0 } } impl Add for SectionAddress { type Output = Self; fn add(self, rhs: u32) -> Self::Output { Self { section: self.section, address: self.address + rhs } } } impl Sub for SectionAddress { type Output = Self; fn sub(self, rhs: u32) -> Self::Output { Self { section: self.section, address: self.address - rhs } } } impl AddAssign for SectionAddress { fn add_assign(&mut self, rhs: u32) { self.address += rhs; } } impl UpperHex for SectionAddress { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { write!(f, "{}:{:#010X}", self.section as isize, self.address) } } impl BitAnd for SectionAddress { type Output = u32; fn bitand(self, rhs: u32) -> Self::Output { self.address & rhs } } #[derive(Default, Debug, Clone)] pub struct FunctionInfo { pub analyzed: bool, pub end: Option, pub slices: Option, } impl FunctionInfo { pub fn is_analyzed(&self) -> bool { self.analyzed } pub fn is_function(&self) -> bool { self.analyzed && self.end.is_some() && self.slices.is_some() } pub fn is_non_function(&self) -> bool { self.analyzed && self.end.is_none() && self.slices.is_none() } pub fn is_unfinalized(&self) -> bool { self.analyzed && self.end.is_none() && self.slices.is_some() } } #[derive(Debug, Default)] pub struct AnalyzerState { pub sda_bases: Option<(u32, u32)>, pub functions: BTreeMap, pub jump_tables: BTreeMap, pub known_symbols: BTreeMap, pub known_sections: BTreeMap, } impl AnalyzerState { pub fn apply(&self, obj: &mut ObjInfo) -> Result<()> { for (§ion_index, section_name) in &self.known_sections { obj.sections[section_index].rename(section_name.clone())?; } for (&start, FunctionInfo { end, .. }) in self.functions.iter() { let Some(end) = end else { continue }; let section = &obj.sections[start.section]; ensure!( section.contains_range(start.address..end.address), "Function {:#010X}..{:#010X} out of bounds of section {} {:#010X}..{:#010X}", start.address, end, section.name, section.address, section.address + section.size ); let name = if obj.module_id == 0 { format!("fn_{:08X}", start.address) } else { format!("fn_{}_{:X}", obj.module_id, start.address) }; obj.add_symbol( ObjSymbol { name, address: start.address as u64, section: Some(start.section), size: (end.address - start.address) as u64, size_known: true, kind: ObjSymbolKind::Function, ..Default::default() }, false, )?; } let mut iter = self.jump_tables.iter().peekable(); while let Some((&addr, &(mut size))) = iter.next() { // Truncate overlapping jump tables if let Some((&next_addr, _)) = iter.peek() { if next_addr.section == addr.section { size = min(size, next_addr.address - addr.address); } } let section = &obj.sections[addr.section]; ensure!( section.contains_range(addr.address..addr.address + size), "Jump table {:#010X}..{:#010X} out of bounds of section {} {:#010X}..{:#010X}", addr.address, addr.address + size, section.name, section.address, section.address + section.size ); let address_str = if obj.module_id == 0 { format!("{:08X}", addr.address) } else { format!( "{}_{}_{:X}", obj.module_id, section.name.trim_start_matches('.'), addr.address ) }; obj.add_symbol( ObjSymbol { name: format!("jumptable_{}", address_str), address: addr.address as u64, section: Some(addr.section), size: size as u64, size_known: true, flags: ObjSymbolFlagSet(ObjSymbolFlags::Local.into()), kind: ObjSymbolKind::Object, ..Default::default() }, false, )?; } for (&_addr, symbol) in &self.known_symbols { obj.add_symbol(symbol.clone(), true)?; } Ok(()) } pub fn detect_functions(&mut self, obj: &ObjInfo) -> Result<()> { // Apply known functions from extab for (&addr, &size) in &obj.known_functions { self.functions.insert(addr, FunctionInfo { analyzed: false, end: size.map(|size| addr + size), slices: None, }); } // Apply known functions from symbols for (_, symbol) in obj.symbols.by_kind(ObjSymbolKind::Function) { let Some(section_index) = symbol.section else { continue }; let addr_ref = SectionAddress::new(section_index, symbol.address as u32); self.functions.insert(addr_ref, FunctionInfo { analyzed: false, end: if symbol.size_known { Some(addr_ref + symbol.size as u32) } else { None }, slices: None, }); } // Also check the beginning of every code section for (section_index, section) in obj.sections.by_kind(ObjSectionKind::Code) { self.functions .entry(SectionAddress::new(section_index, section.address as u32)) .or_default(); } // Process known functions first for addr in self.functions.keys().cloned().collect_vec() { self.process_function_at(obj, addr)?; } if let Some(entry) = obj.entry.map(|n| n as u32) { // Locate entry function bounds let (section_index, _) = obj .sections .at_address(entry) .context(format!("Entry point {:#010X} outside of any section", entry))?; self.process_function_at(obj, SectionAddress::new(section_index, entry))?; } // Locate bounds for referenced functions until none are left self.process_functions(obj)?; // Final pass(es) while self.finalize_functions(obj, true)? { self.process_functions(obj)?; } if self.functions.iter().any(|(_, i)| i.is_unfinalized()) { log::error!("Failed to finalize functions:"); for (addr, info) in self.functions.iter().filter(|(_, i)| i.is_unfinalized()) { log::error!( " {:#010X}: blocks [{:?}]", addr, info.slices.as_ref().unwrap().possible_blocks.keys() ); } bail!("Failed to finalize functions"); } Ok(()) } fn finalize_functions(&mut self, obj: &ObjInfo, finalize: bool) -> Result { let mut finalized_any = false; let unfinalized = self .functions .iter() .filter_map(|(&addr, info)| { if info.is_unfinalized() { info.slices.clone().map(|s| (addr, s)) } else { None } }) .collect_vec(); for (addr, mut slices) in unfinalized { // log::info!("Trying to finalize {:#010X}", addr); let Some(function_start) = slices.start() else { bail!("Function slice without start @ {:#010X}", addr); }; let function_end = slices.end(); let mut current = SectionAddress::new(addr.section, 0); while let Some((&block, vm)) = slices.possible_blocks.range(current..).next() { current = block + 4; let vm = vm.clone(); match slices.check_tail_call( obj, block, function_start, function_end, &self.functions, Some(vm.clone()), ) { TailCallResult::Not => { log::trace!("Finalized block @ {:#010X}", block); slices.possible_blocks.remove(&block); slices.analyze( obj, block, function_start, function_end, &self.functions, Some(vm), )?; // Start at the beginning of the function again current = SectionAddress::new(addr.section, 0); } TailCallResult::Is => { log::trace!("Finalized tail call @ {:#010X}", block); slices.possible_blocks.remove(&block); slices.function_references.insert(block); // Start at the beginning of the function again current = SectionAddress::new(addr.section, 0); } TailCallResult::Possible => { if finalize { log::trace!( "Still couldn't determine {:#010X}, assuming non-tail-call", block ); slices.possible_blocks.remove(&block); slices.analyze( obj, block, function_start, function_end, &self.functions, Some(vm), )?; } } TailCallResult::Error(e) => return Err(e), } } if slices.can_finalize() { log::trace!("Finalizing {:#010X}", addr); slices.finalize(obj, &self.functions)?; for address in slices.function_references.iter().cloned() { // Only create functions for code sections // Some games use branches to data sections to prevent dead stripping (Mario Party) if matches!(obj.sections.get(address.section), Some(section) if section.kind == ObjSectionKind::Code) { self.functions.entry(address).or_default(); } } self.jump_tables.append(&mut slices.jump_table_references.clone()); let end = slices.end(); let info = self.functions.get_mut(&addr).unwrap(); info.analyzed = true; info.end = end; info.slices = Some(slices.clone()); finalized_any = true; } } Ok(finalized_any) } fn first_unbounded_function(&self) -> Option { self.functions.iter().find(|(_, info)| !info.is_analyzed()).map(|(&addr, _)| addr) } fn process_functions(&mut self, obj: &ObjInfo) -> Result<()> { loop { match self.first_unbounded_function() { Some(addr) => { log::trace!("Processing {:#010X}", addr); self.process_function_at(obj, addr)?; } None => { if !self.finalize_functions(obj, false)? && !self.detect_new_functions(obj)? { break; } } } } Ok(()) } pub fn process_function_at(&mut self, obj: &ObjInfo, addr: SectionAddress) -> Result { Ok(if let Some(mut slices) = self.process_function(obj, addr)? { for address in slices.function_references.iter().cloned() { // Only create functions for code sections // Some games use branches to data sections to prevent dead stripping (Mario Party) if matches!(obj.sections.get(address.section), Some(section) if section.kind == ObjSectionKind::Code) { self.functions.entry(address).or_default(); } } self.jump_tables.append(&mut slices.jump_table_references.clone()); if slices.can_finalize() { slices.finalize(obj, &self.functions)?; let info = self.functions.entry(addr).or_default(); info.analyzed = true; info.end = slices.end(); info.slices = Some(slices); } else { let info = self.functions.entry(addr).or_default(); info.analyzed = true; info.end = None; info.slices = Some(slices); } true } else { log::debug!("Not a function @ {:#010X}", addr); let info = self.functions.entry(addr).or_default(); info.analyzed = true; info.end = None; false }) } fn process_function( &mut self, obj: &ObjInfo, start: SectionAddress, ) -> Result> { let mut slices = FunctionSlices::default(); let function_end = self.functions.get(&start).and_then(|info| info.end); Ok(match slices.analyze(obj, start, start, function_end, &self.functions, None)? { true => Some(slices), false => None, }) } fn detect_new_functions(&mut self, obj: &ObjInfo) -> Result { let mut new_functions = vec![]; for (section_index, section) in obj.sections.by_kind(ObjSectionKind::Code) { let section_start = SectionAddress::new(section_index, section.address as u32); let section_end = section_start + section.size as u32; let mut iter = self.functions.range(section_start..section_end).peekable(); loop { match (iter.next(), iter.peek()) { (Some((&first, first_info)), Some(&(&second, second_info))) => { let Some(first_end) = first_info.end else { continue }; if first_end > second { bail!("Overlapping functions {}-{} -> {}", first, first_end, second); } let addr = match skip_alignment(section, first_end, second) { Some(addr) => addr, None => continue, }; if second > addr { log::trace!( "Trying function @ {:#010X} (from {:#010X}-{:#010X} <-> {:#010X}-{:#010X?})", addr, first.address, first_end, second.address, second_info.end, ); new_functions.push(addr); } } (Some((last, last_info)), None) => { let Some(last_end) = last_info.end else { continue }; if last_end < section_end { let addr = match skip_alignment(section, last_end, section_end) { Some(addr) => addr, None => continue, }; if addr < section_end { log::trace!( "Trying function @ {:#010X} (from {:#010X}-{:#010X} <-> {:#010X})", addr, last.address, last_end, section_end, ); new_functions.push(addr); } } } _ => break, } } } let found_new = !new_functions.is_empty(); for addr in new_functions { let opt = self.functions.insert(addr, FunctionInfo::default()); ensure!(opt.is_none(), "Attempted to detect duplicate function @ {:#010X}", addr); } Ok(found_new) } } /// Execute VM from entry point following branches and function calls /// until SDA bases are initialized (__init_registers) pub fn locate_sda_bases(obj: &mut ObjInfo) -> Result { let Some(entry) = obj.entry else { return Ok(false); }; let (section_index, _) = obj .sections .at_address(entry as u32) .context(format!("Entry point {:#010X} outside of any section", entry))?; let entry_addr = SectionAddress::new(section_index, entry as u32); let mut executor = Executor::new(obj); executor.push(entry_addr, VM::new(), false); let result = executor.run( obj, |ExecCbData { executor, vm, result, ins_addr, section: _, ins: _, block_start: _ }| { match result { StepResult::Continue | StepResult::LoadStore { .. } => { return Ok(ExecCbResult::Continue); } StepResult::Illegal => bail!("Illegal instruction @ {}", ins_addr), StepResult::Jump(target) => { if let BranchTarget::Address(RelocationTarget::Address(addr)) = target { return Ok(ExecCbResult::Jump(addr)); } } StepResult::Branch(branches) => { for branch in branches { if let BranchTarget::Address(RelocationTarget::Address(addr)) = branch.target { executor.push(addr, branch.vm, false); } } } } if let (GprValue::Constant(sda2_base), GprValue::Constant(sda_base)) = (vm.gpr_value(2), vm.gpr_value(13)) { return Ok(ExecCbResult::End((sda2_base, sda_base))); } Ok(ExecCbResult::EndBlock) }, )?; match result { Some((sda2_base, sda_base)) => { obj.sda2_base = Some(sda2_base); obj.sda_base = Some(sda_base); Ok(true) } None => Ok(false), } } /// ProDG hardcodes .bss and .sbss section initialization in `entry` /// This function locates the memset calls and returns a list of /// (address, size) pairs for the .bss sections. pub fn locate_bss_memsets(obj: &mut ObjInfo) -> Result> { let mut bss_sections: Vec<(u32, u32)> = Vec::new(); let Some(entry) = obj.entry else { return Ok(bss_sections); }; let (section_index, _) = obj .sections .at_address(entry as u32) .context(format!("Entry point {:#010X} outside of any section", entry))?; let entry_addr = SectionAddress::new(section_index, entry as u32); let mut executor = Executor::new(obj); executor.push(entry_addr, VM::new(), false); executor.run( obj, |ExecCbData { executor: _, vm, result, ins_addr, section: _, ins: _, block_start: _ }| { match result { StepResult::Continue | StepResult::LoadStore { .. } => Ok(ExecCbResult::Continue), StepResult::Illegal => bail!("Illegal instruction @ {}", ins_addr), StepResult::Jump(_target) => Ok(ExecCbResult::End(())), StepResult::Branch(branches) => { for branch in branches { if branch.link { // ProDG bug? Registers are supposed to start at r3 if let ( GprValue::Constant(addr), GprValue::Constant(value), GprValue::Constant(size), ) = (vm.gpr_value(4), vm.gpr_value(5), vm.gpr_value(6)) { if value == 0 && size > 0 { bss_sections.push((addr, size)); } } } } Ok(ExecCbResult::Continue) } } }, )?; Ok(bss_sections) }