589 lines
22 KiB
Rust
589 lines
22 KiB
Rust
use std::{
|
|
cmp::min,
|
|
collections::BTreeMap,
|
|
fmt::{Debug, Display, Formatter, UpperHex},
|
|
ops::{Add, AddAssign, BitAnd, Sub},
|
|
};
|
|
|
|
use anyhow::{bail, ensure, Context, Result};
|
|
use itertools::Itertools;
|
|
|
|
use crate::{
|
|
analysis::{
|
|
executor::{ExecCbData, ExecCbResult, Executor},
|
|
skip_alignment,
|
|
slices::{FunctionSlices, TailCallResult},
|
|
vm::{BranchTarget, GprValue, StepResult, VM},
|
|
RelocationTarget,
|
|
},
|
|
obj::{ObjInfo, ObjSectionKind, ObjSymbol, ObjSymbolFlagSet, ObjSymbolFlags, ObjSymbolKind},
|
|
};
|
|
|
|
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
|
pub struct SectionAddress {
|
|
pub section: usize,
|
|
pub address: u32,
|
|
}
|
|
|
|
impl Debug for SectionAddress {
|
|
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
|
write!(f, "{}:{:#X}", self.section as isize, self.address)
|
|
}
|
|
}
|
|
|
|
impl Display for SectionAddress {
|
|
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
|
write!(f, "{}:{:#X}", self.section as isize, self.address)
|
|
}
|
|
}
|
|
|
|
impl SectionAddress {
|
|
pub fn new(section: usize, address: u32) -> Self { Self { section, address } }
|
|
|
|
pub fn offset(self, offset: i32) -> Self {
|
|
Self { section: self.section, address: self.address.wrapping_add_signed(offset) }
|
|
}
|
|
|
|
pub fn align_up(self, align: u32) -> Self {
|
|
Self { section: self.section, address: (self.address + align - 1) & !(align - 1) }
|
|
}
|
|
|
|
pub fn align_down(self, align: u32) -> Self {
|
|
Self { section: self.section, address: self.address & !(align - 1) }
|
|
}
|
|
|
|
pub fn is_aligned(self, align: u32) -> bool { self.address & (align - 1) == 0 }
|
|
}
|
|
|
|
impl Add<u32> for SectionAddress {
|
|
type Output = Self;
|
|
|
|
fn add(self, rhs: u32) -> Self::Output {
|
|
Self { section: self.section, address: self.address + rhs }
|
|
}
|
|
}
|
|
|
|
impl Sub<u32> for SectionAddress {
|
|
type Output = Self;
|
|
|
|
fn sub(self, rhs: u32) -> Self::Output {
|
|
Self { section: self.section, address: self.address - rhs }
|
|
}
|
|
}
|
|
|
|
impl AddAssign<u32> for SectionAddress {
|
|
fn add_assign(&mut self, rhs: u32) { self.address += rhs; }
|
|
}
|
|
|
|
impl UpperHex for SectionAddress {
|
|
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
|
write!(f, "{}:{:#010X}", self.section as isize, self.address)
|
|
}
|
|
}
|
|
|
|
impl BitAnd<u32> for SectionAddress {
|
|
type Output = u32;
|
|
|
|
fn bitand(self, rhs: u32) -> Self::Output { self.address & rhs }
|
|
}
|
|
|
|
#[derive(Default, Debug, Clone)]
|
|
pub struct FunctionInfo {
|
|
pub analyzed: bool,
|
|
pub end: Option<SectionAddress>,
|
|
pub slices: Option<FunctionSlices>,
|
|
}
|
|
|
|
impl FunctionInfo {
|
|
pub fn is_analyzed(&self) -> bool { self.analyzed }
|
|
|
|
pub fn is_function(&self) -> bool {
|
|
self.analyzed && self.end.is_some() && self.slices.is_some()
|
|
}
|
|
|
|
pub fn is_non_function(&self) -> bool {
|
|
self.analyzed && self.end.is_none() && self.slices.is_none()
|
|
}
|
|
|
|
pub fn is_unfinalized(&self) -> bool {
|
|
self.analyzed && self.end.is_none() && self.slices.is_some()
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Default)]
|
|
pub struct AnalyzerState {
|
|
pub sda_bases: Option<(u32, u32)>,
|
|
pub functions: BTreeMap<SectionAddress, FunctionInfo>,
|
|
pub jump_tables: BTreeMap<SectionAddress, u32>,
|
|
pub known_symbols: BTreeMap<SectionAddress, ObjSymbol>,
|
|
pub known_sections: BTreeMap<usize, String>,
|
|
}
|
|
|
|
impl AnalyzerState {
|
|
pub fn apply(&self, obj: &mut ObjInfo) -> Result<()> {
|
|
for (§ion_index, section_name) in &self.known_sections {
|
|
obj.sections[section_index].rename(section_name.clone())?;
|
|
}
|
|
for (&start, FunctionInfo { end, .. }) in self.functions.iter() {
|
|
let Some(end) = end else { continue };
|
|
let section = &obj.sections[start.section];
|
|
ensure!(
|
|
section.contains_range(start.address..end.address),
|
|
"Function {:#010X}..{:#010X} out of bounds of section {} {:#010X}..{:#010X}",
|
|
start.address,
|
|
end,
|
|
section.name,
|
|
section.address,
|
|
section.address + section.size
|
|
);
|
|
let name = if obj.module_id == 0 {
|
|
format!("fn_{:08X}", start.address)
|
|
} else {
|
|
format!("fn_{}_{:X}", obj.module_id, start.address)
|
|
};
|
|
obj.add_symbol(
|
|
ObjSymbol {
|
|
name,
|
|
address: start.address as u64,
|
|
section: Some(start.section),
|
|
size: (end.address - start.address) as u64,
|
|
size_known: true,
|
|
kind: ObjSymbolKind::Function,
|
|
..Default::default()
|
|
},
|
|
false,
|
|
)?;
|
|
}
|
|
let mut iter = self.jump_tables.iter().peekable();
|
|
while let Some((&addr, &(mut size))) = iter.next() {
|
|
// Truncate overlapping jump tables
|
|
if let Some((&next_addr, _)) = iter.peek() {
|
|
if next_addr.section == addr.section {
|
|
size = min(size, next_addr.address - addr.address);
|
|
}
|
|
}
|
|
let section = &obj.sections[addr.section];
|
|
ensure!(
|
|
section.contains_range(addr.address..addr.address + size),
|
|
"Jump table {:#010X}..{:#010X} out of bounds of section {} {:#010X}..{:#010X}",
|
|
addr.address,
|
|
addr.address + size,
|
|
section.name,
|
|
section.address,
|
|
section.address + section.size
|
|
);
|
|
let address_str = if obj.module_id == 0 {
|
|
format!("{:08X}", addr.address)
|
|
} else {
|
|
format!(
|
|
"{}_{}_{:X}",
|
|
obj.module_id,
|
|
section.name.trim_start_matches('.'),
|
|
addr.address
|
|
)
|
|
};
|
|
obj.add_symbol(
|
|
ObjSymbol {
|
|
name: format!("jumptable_{}", address_str),
|
|
address: addr.address as u64,
|
|
section: Some(addr.section),
|
|
size: size as u64,
|
|
size_known: true,
|
|
flags: ObjSymbolFlagSet(ObjSymbolFlags::Local.into()),
|
|
kind: ObjSymbolKind::Object,
|
|
..Default::default()
|
|
},
|
|
false,
|
|
)?;
|
|
}
|
|
for (&_addr, symbol) in &self.known_symbols {
|
|
obj.add_symbol(symbol.clone(), true)?;
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
pub fn detect_functions(&mut self, obj: &ObjInfo) -> Result<()> {
|
|
// Apply known functions from extab
|
|
for (&addr, &size) in &obj.known_functions {
|
|
self.functions.insert(addr, FunctionInfo {
|
|
analyzed: false,
|
|
end: size.map(|size| addr + size),
|
|
slices: None,
|
|
});
|
|
}
|
|
// Apply known functions from symbols
|
|
for (_, symbol) in obj.symbols.by_kind(ObjSymbolKind::Function) {
|
|
let Some(section_index) = symbol.section else { continue };
|
|
let addr_ref = SectionAddress::new(section_index, symbol.address as u32);
|
|
self.functions.insert(addr_ref, FunctionInfo {
|
|
analyzed: false,
|
|
end: if symbol.size_known { Some(addr_ref + symbol.size as u32) } else { None },
|
|
slices: None,
|
|
});
|
|
}
|
|
// Also check the beginning of every code section
|
|
for (section_index, section) in obj.sections.by_kind(ObjSectionKind::Code) {
|
|
self.functions
|
|
.entry(SectionAddress::new(section_index, section.address as u32))
|
|
.or_default();
|
|
}
|
|
|
|
// Process known functions first
|
|
for addr in self.functions.keys().cloned().collect_vec() {
|
|
self.process_function_at(obj, addr)?;
|
|
}
|
|
if let Some(entry) = obj.entry.map(|n| n as u32) {
|
|
// Locate entry function bounds
|
|
let (section_index, _) = obj
|
|
.sections
|
|
.at_address(entry)
|
|
.context(format!("Entry point {:#010X} outside of any section", entry))?;
|
|
self.process_function_at(obj, SectionAddress::new(section_index, entry))?;
|
|
}
|
|
// Locate bounds for referenced functions until none are left
|
|
self.process_functions(obj)?;
|
|
// Final pass(es)
|
|
while self.finalize_functions(obj, true)? {
|
|
self.process_functions(obj)?;
|
|
}
|
|
if self.functions.iter().any(|(_, i)| i.is_unfinalized()) {
|
|
log::error!("Failed to finalize functions:");
|
|
for (addr, info) in self.functions.iter().filter(|(_, i)| i.is_unfinalized()) {
|
|
log::error!(
|
|
" {:#010X}: blocks [{:?}]",
|
|
addr,
|
|
info.slices.as_ref().unwrap().possible_blocks.keys()
|
|
);
|
|
}
|
|
bail!("Failed to finalize functions");
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
fn finalize_functions(&mut self, obj: &ObjInfo, finalize: bool) -> Result<bool> {
|
|
let mut finalized_any = false;
|
|
let unfinalized = self
|
|
.functions
|
|
.iter()
|
|
.filter_map(|(&addr, info)| {
|
|
if info.is_unfinalized() {
|
|
info.slices.clone().map(|s| (addr, s))
|
|
} else {
|
|
None
|
|
}
|
|
})
|
|
.collect_vec();
|
|
for (addr, mut slices) in unfinalized {
|
|
// log::info!("Trying to finalize {:#010X}", addr);
|
|
let Some(function_start) = slices.start() else {
|
|
bail!("Function slice without start @ {:#010X}", addr);
|
|
};
|
|
let function_end = slices.end();
|
|
let mut current = SectionAddress::new(addr.section, 0);
|
|
while let Some((&block, vm)) = slices.possible_blocks.range(current..).next() {
|
|
current = block + 4;
|
|
let vm = vm.clone();
|
|
match slices.check_tail_call(
|
|
obj,
|
|
block,
|
|
function_start,
|
|
function_end,
|
|
&self.functions,
|
|
Some(vm.clone()),
|
|
) {
|
|
TailCallResult::Not => {
|
|
log::trace!("Finalized block @ {:#010X}", block);
|
|
slices.possible_blocks.remove(&block);
|
|
slices.analyze(
|
|
obj,
|
|
block,
|
|
function_start,
|
|
function_end,
|
|
&self.functions,
|
|
Some(vm),
|
|
)?;
|
|
// Start at the beginning of the function again
|
|
current = SectionAddress::new(addr.section, 0);
|
|
}
|
|
TailCallResult::Is => {
|
|
log::trace!("Finalized tail call @ {:#010X}", block);
|
|
slices.possible_blocks.remove(&block);
|
|
slices.function_references.insert(block);
|
|
// Start at the beginning of the function again
|
|
current = SectionAddress::new(addr.section, 0);
|
|
}
|
|
TailCallResult::Possible => {
|
|
if finalize {
|
|
log::trace!(
|
|
"Still couldn't determine {:#010X}, assuming non-tail-call",
|
|
block
|
|
);
|
|
slices.possible_blocks.remove(&block);
|
|
slices.analyze(
|
|
obj,
|
|
block,
|
|
function_start,
|
|
function_end,
|
|
&self.functions,
|
|
Some(vm),
|
|
)?;
|
|
}
|
|
}
|
|
TailCallResult::Error(e) => return Err(e),
|
|
}
|
|
}
|
|
if slices.can_finalize() {
|
|
log::trace!("Finalizing {:#010X}", addr);
|
|
slices.finalize(obj, &self.functions)?;
|
|
for address in slices.function_references.iter().cloned() {
|
|
// Only create functions for code sections
|
|
// Some games use branches to data sections to prevent dead stripping (Mario Party)
|
|
if matches!(obj.sections.get(address.section), Some(section) if section.kind == ObjSectionKind::Code)
|
|
{
|
|
self.functions.entry(address).or_default();
|
|
}
|
|
}
|
|
self.jump_tables.append(&mut slices.jump_table_references.clone());
|
|
let end = slices.end();
|
|
let info = self.functions.get_mut(&addr).unwrap();
|
|
info.analyzed = true;
|
|
info.end = end;
|
|
info.slices = Some(slices.clone());
|
|
finalized_any = true;
|
|
}
|
|
}
|
|
Ok(finalized_any)
|
|
}
|
|
|
|
fn first_unbounded_function(&self) -> Option<SectionAddress> {
|
|
self.functions.iter().find(|(_, info)| !info.is_analyzed()).map(|(&addr, _)| addr)
|
|
}
|
|
|
|
fn process_functions(&mut self, obj: &ObjInfo) -> Result<()> {
|
|
loop {
|
|
match self.first_unbounded_function() {
|
|
Some(addr) => {
|
|
log::trace!("Processing {:#010X}", addr);
|
|
self.process_function_at(obj, addr)?;
|
|
}
|
|
None => {
|
|
if !self.finalize_functions(obj, false)? && !self.detect_new_functions(obj)? {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
pub fn process_function_at(&mut self, obj: &ObjInfo, addr: SectionAddress) -> Result<bool> {
|
|
Ok(if let Some(mut slices) = self.process_function(obj, addr)? {
|
|
for address in slices.function_references.iter().cloned() {
|
|
// Only create functions for code sections
|
|
// Some games use branches to data sections to prevent dead stripping (Mario Party)
|
|
if matches!(obj.sections.get(address.section), Some(section) if section.kind == ObjSectionKind::Code)
|
|
{
|
|
self.functions.entry(address).or_default();
|
|
}
|
|
}
|
|
self.jump_tables.append(&mut slices.jump_table_references.clone());
|
|
if slices.can_finalize() {
|
|
slices.finalize(obj, &self.functions)?;
|
|
let info = self.functions.entry(addr).or_default();
|
|
info.analyzed = true;
|
|
info.end = slices.end();
|
|
info.slices = Some(slices);
|
|
} else {
|
|
let info = self.functions.entry(addr).or_default();
|
|
info.analyzed = true;
|
|
info.end = None;
|
|
info.slices = Some(slices);
|
|
}
|
|
true
|
|
} else {
|
|
log::debug!("Not a function @ {:#010X}", addr);
|
|
let info = self.functions.entry(addr).or_default();
|
|
info.analyzed = true;
|
|
info.end = None;
|
|
false
|
|
})
|
|
}
|
|
|
|
fn process_function(
|
|
&mut self,
|
|
obj: &ObjInfo,
|
|
start: SectionAddress,
|
|
) -> Result<Option<FunctionSlices>> {
|
|
let mut slices = FunctionSlices::default();
|
|
let function_end = self.functions.get(&start).and_then(|info| info.end);
|
|
Ok(match slices.analyze(obj, start, start, function_end, &self.functions, None)? {
|
|
true => Some(slices),
|
|
false => None,
|
|
})
|
|
}
|
|
|
|
fn detect_new_functions(&mut self, obj: &ObjInfo) -> Result<bool> {
|
|
let mut new_functions = vec![];
|
|
for (section_index, section) in obj.sections.by_kind(ObjSectionKind::Code) {
|
|
let section_start = SectionAddress::new(section_index, section.address as u32);
|
|
let section_end = section_start + section.size as u32;
|
|
let mut iter = self.functions.range(section_start..section_end).peekable();
|
|
loop {
|
|
match (iter.next(), iter.peek()) {
|
|
(Some((&first, first_info)), Some(&(&second, second_info))) => {
|
|
let Some(first_end) = first_info.end else { continue };
|
|
if first_end > second {
|
|
bail!("Overlapping functions {}-{} -> {}", first, first_end, second);
|
|
}
|
|
let addr = match skip_alignment(section, first_end, second) {
|
|
Some(addr) => addr,
|
|
None => continue,
|
|
};
|
|
if second > addr {
|
|
log::trace!(
|
|
"Trying function @ {:#010X} (from {:#010X}-{:#010X} <-> {:#010X}-{:#010X?})",
|
|
addr,
|
|
first.address,
|
|
first_end,
|
|
second.address,
|
|
second_info.end,
|
|
);
|
|
new_functions.push(addr);
|
|
}
|
|
}
|
|
(Some((last, last_info)), None) => {
|
|
let Some(last_end) = last_info.end else { continue };
|
|
if last_end < section_end {
|
|
let addr = match skip_alignment(section, last_end, section_end) {
|
|
Some(addr) => addr,
|
|
None => continue,
|
|
};
|
|
if addr < section_end {
|
|
log::trace!(
|
|
"Trying function @ {:#010X} (from {:#010X}-{:#010X} <-> {:#010X})",
|
|
addr,
|
|
last.address,
|
|
last_end,
|
|
section_end,
|
|
);
|
|
new_functions.push(addr);
|
|
}
|
|
}
|
|
}
|
|
_ => break,
|
|
}
|
|
}
|
|
}
|
|
let found_new = !new_functions.is_empty();
|
|
for addr in new_functions {
|
|
let opt = self.functions.insert(addr, FunctionInfo::default());
|
|
ensure!(opt.is_none(), "Attempted to detect duplicate function @ {:#010X}", addr);
|
|
}
|
|
Ok(found_new)
|
|
}
|
|
}
|
|
|
|
/// Execute VM from entry point following branches and function calls
|
|
/// until SDA bases are initialized (__init_registers)
|
|
pub fn locate_sda_bases(obj: &mut ObjInfo) -> Result<bool> {
|
|
let Some(entry) = obj.entry else {
|
|
return Ok(false);
|
|
};
|
|
let (section_index, _) = obj
|
|
.sections
|
|
.at_address(entry as u32)
|
|
.context(format!("Entry point {:#010X} outside of any section", entry))?;
|
|
let entry_addr = SectionAddress::new(section_index, entry as u32);
|
|
|
|
let mut executor = Executor::new(obj);
|
|
executor.push(entry_addr, VM::new(), false);
|
|
let result = executor.run(
|
|
obj,
|
|
|ExecCbData { executor, vm, result, ins_addr: _, section: _, ins, block_start: _ }| {
|
|
match result {
|
|
StepResult::Continue | StepResult::LoadStore { .. } => {
|
|
return Ok(ExecCbResult::Continue);
|
|
}
|
|
StepResult::Illegal => bail!("Illegal instruction @ {:#010X}", ins.addr),
|
|
StepResult::Jump(target) => {
|
|
if let BranchTarget::Address(RelocationTarget::Address(addr)) = target {
|
|
return Ok(ExecCbResult::Jump(addr));
|
|
}
|
|
}
|
|
StepResult::Branch(branches) => {
|
|
for branch in branches {
|
|
if let BranchTarget::Address(RelocationTarget::Address(addr)) =
|
|
branch.target
|
|
{
|
|
executor.push(addr, branch.vm, false);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if let (GprValue::Constant(sda2_base), GprValue::Constant(sda_base)) =
|
|
(vm.gpr_value(2), vm.gpr_value(13))
|
|
{
|
|
return Ok(ExecCbResult::End((sda2_base, sda_base)));
|
|
}
|
|
|
|
Ok(ExecCbResult::EndBlock)
|
|
},
|
|
)?;
|
|
match result {
|
|
Some((sda2_base, sda_base)) => {
|
|
obj.sda2_base = Some(sda2_base);
|
|
obj.sda_base = Some(sda_base);
|
|
Ok(true)
|
|
}
|
|
None => Ok(false),
|
|
}
|
|
}
|
|
|
|
/// ProDG hardcodes .bss and .sbss section initialization in `entry`
|
|
/// This function locates the memset calls and returns a list of
|
|
/// (address, size) pairs for the .bss sections.
|
|
pub fn locate_bss_memsets(obj: &mut ObjInfo) -> Result<Vec<(u32, u32)>> {
|
|
let mut bss_sections: Vec<(u32, u32)> = Vec::new();
|
|
let Some(entry) = obj.entry else {
|
|
return Ok(bss_sections);
|
|
};
|
|
let (section_index, _) = obj
|
|
.sections
|
|
.at_address(entry as u32)
|
|
.context(format!("Entry point {:#010X} outside of any section", entry))?;
|
|
let entry_addr = SectionAddress::new(section_index, entry as u32);
|
|
|
|
let mut executor = Executor::new(obj);
|
|
executor.push(entry_addr, VM::new(), false);
|
|
executor.run(
|
|
obj,
|
|
|ExecCbData { executor: _, vm, result, ins_addr: _, section: _, ins, block_start: _ }| {
|
|
match result {
|
|
StepResult::Continue | StepResult::LoadStore { .. } => Ok(ExecCbResult::Continue),
|
|
StepResult::Illegal => bail!("Illegal instruction @ {:#010X}", ins.addr),
|
|
StepResult::Jump(_target) => Ok(ExecCbResult::End(())),
|
|
StepResult::Branch(branches) => {
|
|
for branch in branches {
|
|
if branch.link {
|
|
// ProDG bug? Registers are supposed to start at r3
|
|
if let (
|
|
GprValue::Constant(addr),
|
|
GprValue::Constant(value),
|
|
GprValue::Constant(size),
|
|
) = (vm.gpr_value(4), vm.gpr_value(5), vm.gpr_value(6))
|
|
{
|
|
if value == 0 && size > 0 {
|
|
bss_sections.push((addr, size));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
Ok(ExecCbResult::Continue)
|
|
}
|
|
}
|
|
},
|
|
)?;
|
|
Ok(bss_sections)
|
|
}
|