Analyzer fixes galore

- Transparent NLZSS decompression (add `:nlzss` to path)
- Overhaul portions of the analyzer to support more games
- Reject some invalid data relocations automatically
- Jump table analysis fixes
This commit is contained in:
Luke Street 2023-09-13 02:08:51 -04:00
parent 50f913c4eb
commit d4ef1ce16a
29 changed files with 1533 additions and 669 deletions

58
Cargo.lock generated
View File

@ -37,6 +37,15 @@ dependencies = [
"memchr",
]
[[package]]
name = "ansi_term"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2"
dependencies = [
"winapi",
]
[[package]]
name = "anyhow"
version = "1.0.71"
@ -225,6 +234,21 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "clap"
version = "2.34.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c"
dependencies = [
"ansi_term",
"atty",
"bitflags",
"strsim",
"textwrap",
"unicode-width",
"vec_map",
]
[[package]]
name = "cpufeatures"
version = "0.2.5"
@ -307,7 +331,7 @@ dependencies = [
[[package]]
name = "decomp-toolkit"
version = "0.4.2"
version = "0.5.0"
dependencies = [
"anyhow",
"ar",
@ -330,6 +354,7 @@ dependencies = [
"memchr",
"memmap2",
"multimap",
"nintendo-lz",
"num_enum",
"object 0.31.1",
"once_cell",
@ -613,6 +638,16 @@ dependencies = [
"serde",
]
[[package]]
name = "nintendo-lz"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "768b95cd65a1a8b82d6c7e90a69d080b20996a876cd62987ab5bcb350c5ae646"
dependencies = [
"byteorder",
"clap",
]
[[package]]
name = "nu-ansi-term"
version = "0.46.0"
@ -985,6 +1020,12 @@ version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9"
[[package]]
name = "strsim"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
[[package]]
name = "supports-color"
version = "1.3.1"
@ -1017,6 +1058,15 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "textwrap"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
dependencies = [
"unicode-width",
]
[[package]]
name = "thiserror"
version = "1.0.37"
@ -1157,6 +1207,12 @@ version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d"
[[package]]
name = "vec_map"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191"
[[package]]
name = "version_check"
version = "0.9.4"

View File

@ -3,7 +3,7 @@ name = "decomp-toolkit"
description = "Yet another GameCube/Wii decompilation toolkit."
authors = ["Luke Street <luke@street.dev>"]
license = "MIT OR Apache-2.0"
version = "0.4.2"
version = "0.5.0"
edition = "2021"
publish = false
build = "build.rs"
@ -42,6 +42,7 @@ log = "0.4.19"
memchr = "2.5.0"
memmap2 = "0.7.1"
multimap = "0.9.0"
nintendo-lz = "0.1.3"
num_enum = "0.6.1"
object = { version = "0.31.1", features = ["read_core", "std", "elf", "write_std"], default-features = false }
once_cell = "1.18.0"

View File

@ -14,8 +14,5 @@ SECTIONS
FORCEACTIVE
{
_unresolved
_prolog
_epilog
$FORCEACTIVE
}

View File

@ -1,10 +1,12 @@
use std::{
collections::{BTreeMap, BTreeSet},
cmp::min,
collections::BTreeMap,
fmt::{Debug, Display, Formatter, UpperHex},
ops::{Add, AddAssign, BitAnd, Sub},
};
use anyhow::{bail, ensure, Context, Result};
use itertools::Itertools;
use crate::{
analysis::{
@ -12,6 +14,7 @@ use crate::{
skip_alignment,
slices::{FunctionSlices, TailCallResult},
vm::{BranchTarget, GprValue, StepResult, VM},
RelocationTarget,
},
obj::{ObjInfo, ObjSectionKind, ObjSymbol, ObjSymbolFlagSet, ObjSymbolFlags, ObjSymbolKind},
};
@ -36,6 +39,20 @@ impl Display for SectionAddress {
impl SectionAddress {
pub fn new(section: usize, address: u32) -> Self { Self { section, address } }
pub fn offset(self, offset: i32) -> Self {
Self { section: self.section, address: self.address.wrapping_add_signed(offset) }
}
pub fn align_up(self, align: u32) -> Self {
Self { section: self.section, address: (self.address + align - 1) & !(align - 1) }
}
pub fn align_down(self, align: u32) -> Self {
Self { section: self.section, address: self.address & !(align - 1) }
}
pub fn is_aligned(self, align: u32) -> bool { self.address & (align - 1) == 0 }
}
impl Add<u32> for SectionAddress {
@ -70,16 +87,36 @@ impl BitAnd<u32> for SectionAddress {
fn bitand(self, rhs: u32) -> Self::Output { self.address & rhs }
}
#[derive(Default, Debug, Clone)]
pub struct FunctionInfo {
pub analyzed: bool,
pub end: Option<SectionAddress>,
pub slices: Option<FunctionSlices>,
}
impl FunctionInfo {
pub fn is_analyzed(&self) -> bool { self.analyzed }
pub fn is_function(&self) -> bool {
self.analyzed && self.end.is_some() && self.slices.is_some()
}
pub fn is_non_function(&self) -> bool {
self.analyzed && self.end.is_none() && self.slices.is_none()
}
pub fn is_unfinalized(&self) -> bool {
self.analyzed && self.end.is_none() && self.slices.is_some()
}
}
#[derive(Debug, Default)]
pub struct AnalyzerState {
pub sda_bases: Option<(u32, u32)>,
pub function_entries: BTreeSet<SectionAddress>,
pub function_bounds: BTreeMap<SectionAddress, Option<SectionAddress>>,
pub function_slices: BTreeMap<SectionAddress, FunctionSlices>,
pub functions: BTreeMap<SectionAddress, FunctionInfo>,
pub jump_tables: BTreeMap<SectionAddress, u32>,
pub known_symbols: BTreeMap<SectionAddress, ObjSymbol>,
pub known_sections: BTreeMap<usize, String>,
pub non_finalized_functions: BTreeMap<SectionAddress, FunctionSlices>,
}
impl AnalyzerState {
@ -87,7 +124,7 @@ impl AnalyzerState {
for (&section_index, section_name) in &self.known_sections {
obj.sections[section_index].rename(section_name.clone())?;
}
for (&start, &end) in &self.function_bounds {
for (&start, FunctionInfo { end, .. }) in self.functions.iter() {
let Some(end) = end else { continue };
let section = &obj.sections[start.section];
ensure!(
@ -120,7 +157,14 @@ impl AnalyzerState {
false,
)?;
}
for (&addr, &size) in &self.jump_tables {
let mut iter = self.jump_tables.iter().peekable();
while let Some((&addr, &(mut size))) = iter.next() {
// Truncate overlapping jump tables
if let Some((&next_addr, _)) = iter.peek() {
if next_addr.section == addr.section {
size = min(size, next_addr.address - addr.address);
}
}
let section = &obj.sections[addr.section];
ensure!(
section.contains_range(addr.address..addr.address + size),
@ -166,27 +210,31 @@ impl AnalyzerState {
pub fn detect_functions(&mut self, obj: &ObjInfo) -> Result<()> {
// Apply known functions from extab
for (&addr, &size) in &obj.known_functions {
self.function_entries.insert(addr);
self.function_bounds.insert(addr, Some(addr + size));
self.functions.insert(addr, FunctionInfo {
analyzed: false,
end: size.map(|size| addr + size),
slices: None,
});
}
// Apply known functions from symbols
for (_, symbol) in obj.symbols.by_kind(ObjSymbolKind::Function) {
let Some(section_index) = symbol.section else { continue };
let addr_ref = SectionAddress::new(section_index, symbol.address as u32);
self.function_entries.insert(addr_ref);
if symbol.size_known {
self.function_bounds.insert(addr_ref, Some(addr_ref + symbol.size as u32));
}
self.functions.insert(addr_ref, FunctionInfo {
analyzed: false,
end: if symbol.size_known { Some(addr_ref + symbol.size as u32) } else { None },
slices: None,
});
}
// Also check the beginning of every code section
for (section_index, section) in obj.sections.by_kind(ObjSectionKind::Code) {
self.function_entries
.insert(SectionAddress::new(section_index, section.address as u32));
self.functions
.entry(SectionAddress::new(section_index, section.address as u32))
.or_default();
}
// Process known functions first
let known_functions = self.function_entries.clone();
for addr in known_functions {
for addr in self.functions.keys().cloned().collect_vec() {
self.process_function_at(obj, addr)?;
}
if let Some(entry) = obj.entry.map(|n| n as u32) {
@ -203,26 +251,46 @@ impl AnalyzerState {
while self.finalize_functions(obj, true)? {
self.process_functions(obj)?;
}
if self.functions.iter().any(|(_, i)| i.is_unfinalized()) {
log::error!("Failed to finalize functions:");
for (addr, _) in self.functions.iter().filter(|(_, i)| i.is_unfinalized()) {
log::error!(" {:#010X}", addr);
}
bail!("Failed to finalize functions");
}
Ok(())
}
fn finalize_functions(&mut self, obj: &ObjInfo, finalize: bool) -> Result<bool> {
let mut finalized = Vec::new();
for (&addr, slices) in &mut self.non_finalized_functions {
let mut finalized_any = false;
let unfinalized = self
.functions
.iter()
.filter_map(|(&addr, info)| {
if info.is_unfinalized() {
info.slices.clone().map(|s| (addr, s))
} else {
None
}
})
.collect_vec();
for (addr, mut slices) in unfinalized {
// log::info!("Trying to finalize {:#010X}", addr);
let Some(function_start) = slices.start() else {
bail!("Function slice without start @ {:#010X}", addr);
};
let function_end = slices.end();
let mut current = SectionAddress::new(addr.section, 0);
while let Some(&block) = slices.possible_blocks.range(current + 4..).next() {
current = block;
while let Some((&block, vm)) = slices.possible_blocks.range(current..).next() {
current = block + 4;
let vm = vm.clone();
match slices.check_tail_call(
obj,
block,
function_start,
function_end,
&self.function_entries,
&self.functions,
Some(vm.clone()),
) {
TailCallResult::Not => {
log::trace!("Finalized block @ {:#010X}", block);
@ -232,7 +300,8 @@ impl AnalyzerState {
block,
function_start,
function_end,
&self.function_entries,
&self.functions,
Some(vm),
)?;
}
TailCallResult::Is => {
@ -252,7 +321,8 @@ impl AnalyzerState {
block,
function_start,
function_end,
&self.function_entries,
&self.functions,
Some(vm),
)?;
}
}
@ -261,55 +331,24 @@ impl AnalyzerState {
}
if slices.can_finalize() {
log::trace!("Finalizing {:#010X}", addr);
slices.finalize(obj, &self.function_entries)?;
self.function_entries.append(&mut slices.function_references.clone());
slices.finalize(obj, &self.functions)?;
for address in slices.function_references.iter().cloned() {
self.functions.entry(address).or_default();
}
self.jump_tables.append(&mut slices.jump_table_references.clone());
let end = slices.end();
self.function_bounds.insert(addr, end);
self.function_slices.insert(addr, slices.clone());
finalized.push(addr);
let info = self.functions.get_mut(&addr).unwrap();
info.analyzed = true;
info.end = end;
info.slices = Some(slices.clone());
finalized_any = true;
}
}
let finalized_new = !finalized.is_empty();
for addr in finalized {
self.non_finalized_functions.remove(&addr);
}
Ok(finalized_new)
Ok(finalized_any)
}
fn first_unbounded_function(&self) -> Option<SectionAddress> {
let mut entries_iter = self.function_entries.iter().cloned();
let mut bounds_iter = self.function_bounds.keys().cloned();
let mut entry = entries_iter.next();
let mut bound = bounds_iter.next();
loop {
match (entry, bound) {
(Some(a), Some(b)) => {
if b < a {
bound = bounds_iter.next();
continue;
} else if a != b {
if self.non_finalized_functions.contains_key(&a) {
entry = entries_iter.next();
continue;
} else {
break Some(a);
}
}
}
(Some(a), None) => {
if self.non_finalized_functions.contains_key(&a) {
entry = entries_iter.next();
continue;
} else {
break Some(a);
}
}
_ => break None,
}
entry = entries_iter.next();
bound = bounds_iter.next();
}
self.functions.iter().find(|(_, info)| !info.is_analyzed()).map(|(&addr, _)| addr)
}
fn process_functions(&mut self, obj: &ObjInfo) -> Result<()> {
@ -330,26 +369,29 @@ impl AnalyzerState {
}
pub fn process_function_at(&mut self, obj: &ObjInfo, addr: SectionAddress) -> Result<bool> {
// if addr == 0 || addr == 0xFFFFFFFF {
// log::warn!("Tried to detect @ {:#010X}", addr);
// self.function_bounds.insert(addr, 0);
// return Ok(false);
// }
Ok(if let Some(mut slices) = self.process_function(obj, addr)? {
self.function_entries.insert(addr);
self.function_entries.append(&mut slices.function_references.clone());
for address in slices.function_references.iter().cloned() {
self.functions.entry(address).or_default();
}
self.jump_tables.append(&mut slices.jump_table_references.clone());
if slices.can_finalize() {
slices.finalize(obj, &self.function_entries)?;
self.function_bounds.insert(addr, slices.end());
self.function_slices.insert(addr, slices);
slices.finalize(obj, &self.functions)?;
let info = self.functions.entry(addr).or_default();
info.analyzed = true;
info.end = slices.end();
info.slices = Some(slices);
} else {
self.non_finalized_functions.insert(addr, slices);
let info = self.functions.entry(addr).or_default();
info.analyzed = true;
info.end = None;
info.slices = Some(slices);
}
true
} else {
log::debug!("Not a function @ {:#010X}", addr);
self.function_bounds.insert(addr, None);
let info = self.functions.entry(addr).or_default();
info.analyzed = true;
info.end = None;
false
})
}
@ -360,58 +402,58 @@ impl AnalyzerState {
start: SectionAddress,
) -> Result<Option<FunctionSlices>> {
let mut slices = FunctionSlices::default();
let function_end = self.function_bounds.get(&start).cloned().flatten();
Ok(match slices.analyze(obj, start, start, function_end, &self.function_entries)? {
let function_end = self.functions.get(&start).and_then(|info| info.end);
Ok(match slices.analyze(obj, start, start, function_end, &self.functions, None)? {
true => Some(slices),
false => None,
})
}
fn detect_new_functions(&mut self, obj: &ObjInfo) -> Result<bool> {
let mut found_new = false;
let mut new_functions = vec![];
for (section_index, section) in obj.sections.by_kind(ObjSectionKind::Code) {
let section_start = SectionAddress::new(section_index, section.address as u32);
let section_end = section_start + section.size as u32;
let mut iter = self.function_bounds.range(section_start..section_end).peekable();
let mut iter = self.functions.range(section_start..section_end).peekable();
loop {
match (iter.next(), iter.peek()) {
(Some((&first_begin, &first_end)), Some(&(&second_begin, &second_end))) => {
let Some(first_end) = first_end else { continue };
if first_end > second_begin {
continue;
(Some((&first, first_info)), Some(&(&second, second_info))) => {
let Some(first_end) = first_info.end else { continue };
if first_end > second {
bail!("Overlapping functions {}-{} -> {}", first, first_end, second);
}
let addr = match skip_alignment(section, first_end, second_begin) {
let addr = match skip_alignment(section, first_end, second) {
Some(addr) => addr,
None => continue,
};
if second_begin > addr && self.function_entries.insert(addr) {
if second > addr {
log::trace!(
"Trying function @ {:#010X} (from {:#010X}-{:#010X} <-> {:#010X}-{:#010X?})",
addr,
first_begin,
first.address,
first_end,
second_begin,
second_end,
second.address,
second_info.end,
);
found_new = true;
new_functions.push(addr);
}
}
(Some((&last_begin, &last_end)), None) => {
let Some(last_end) = last_end else { continue };
(Some((last, last_info)), None) => {
let Some(last_end) = last_info.end else { continue };
if last_end < section_end {
let addr = match skip_alignment(section, last_end, section_end) {
Some(addr) => addr,
None => continue,
};
if addr < section_end && self.function_entries.insert(addr) {
log::debug!(
if addr < section_end {
log::trace!(
"Trying function @ {:#010X} (from {:#010X}-{:#010X} <-> {:#010X})",
addr,
last_begin,
last.address,
last_end,
section_end,
);
found_new = true;
new_functions.push(addr);
}
}
}
@ -419,6 +461,11 @@ impl AnalyzerState {
}
}
}
let found_new = !new_functions.is_empty();
for addr in new_functions {
let opt = self.functions.insert(addr, FunctionInfo::default());
ensure!(opt.is_none(), "Attempted to detect duplicate function @ {:#010X}", addr);
}
Ok(found_new)
}
}
@ -446,13 +493,15 @@ pub fn locate_sda_bases(obj: &mut ObjInfo) -> Result<bool> {
}
StepResult::Illegal => bail!("Illegal instruction @ {:#010X}", ins.addr),
StepResult::Jump(target) => {
if let BranchTarget::Address(addr) = target {
if let BranchTarget::Address(RelocationTarget::Address(addr)) = target {
return Ok(ExecCbResult::Jump(addr));
}
}
StepResult::Branch(branches) => {
for branch in branches {
if let BranchTarget::Address(addr) = branch.target {
if let BranchTarget::Address(RelocationTarget::Address(addr)) =
branch.target
{
executor.push(addr, branch.vm, false);
}
}

View File

@ -1,6 +1,6 @@
use std::{collections::BTreeSet, num::NonZeroU32};
use anyhow::{anyhow, ensure, Context, Result};
use anyhow::{anyhow, bail, ensure, Context, Result};
use ppc750cl::Ins;
use crate::{
@ -35,13 +35,15 @@ fn read_unresolved_relocation_address(
section: &ObjSection,
address: u32,
reloc_kind: Option<ObjRelocKind>,
) -> Result<Option<SectionAddress>> {
) -> Result<Option<RelocationTarget>> {
if let Some(reloc) = obj
.unresolved_relocations
.iter()
.find(|reloc| reloc.section as usize == section.elf_index && reloc.address == address)
{
ensure!(reloc.module_id == obj.module_id);
if reloc.module_id != obj.module_id {
return Ok(Some(RelocationTarget::External));
}
if let Some(reloc_kind) = reloc_kind {
ensure!(reloc.kind == reloc_kind);
}
@ -52,10 +54,10 @@ fn read_unresolved_relocation_address(
reloc.target_section
)
})?;
Ok(Some(SectionAddress {
Ok(Some(RelocationTarget::Address(SectionAddress {
section: target_section_index,
address: target_section.address as u32 + reloc.addend,
}))
})))
} else {
Ok(None)
}
@ -66,7 +68,7 @@ fn read_relocation_address(
section: &ObjSection,
address: u32,
reloc_kind: Option<ObjRelocKind>,
) -> Result<Option<SectionAddress>> {
) -> Result<Option<RelocationTarget>> {
let Some(reloc) = section.relocations.at(address) else {
return Ok(None);
};
@ -74,13 +76,13 @@ fn read_relocation_address(
ensure!(reloc.kind == reloc_kind);
}
let symbol = &obj.symbols[reloc.target_symbol];
let section_index = symbol.section.with_context(|| {
format!("Symbol '{}' @ {:#010X} missing section", symbol.name, symbol.address)
})?;
Ok(Some(SectionAddress {
let Some(section_index) = symbol.section else {
return Ok(Some(RelocationTarget::External));
};
Ok(Some(RelocationTarget::Address(SectionAddress {
section: section_index,
address: (symbol.address as i64 + reloc.addend) as u32,
}))
})))
}
pub fn read_address(obj: &ObjInfo, section: &ObjSection, address: u32) -> Result<SectionAddress> {
@ -94,7 +96,11 @@ pub fn read_address(obj: &ObjInfo, section: &ObjSection, address: u32) -> Result
Some(ObjRelocKind::Absolute),
)?;
}
opt.with_context(|| {
opt.and_then(|t| match t {
RelocationTarget::Address(addr) => Some(addr),
RelocationTarget::External => None,
})
.with_context(|| {
format!("Failed to find relocation for {:#010X} in section {}", address, section.name)
})
} else {
@ -109,12 +115,18 @@ fn is_valid_jump_table_addr(obj: &ObjInfo, addr: SectionAddress) -> bool {
!matches!(obj.sections[addr.section].kind, ObjSectionKind::Code | ObjSectionKind::Bss)
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum RelocationTarget {
Address(SectionAddress),
External,
}
#[inline(never)]
pub fn relocation_target_for(
obj: &ObjInfo,
addr: SectionAddress,
reloc_kind: Option<ObjRelocKind>,
) -> Result<Option<SectionAddress>> {
) -> Result<Option<RelocationTarget>> {
let section = &obj.sections[addr.section];
let mut opt = read_relocation_address(obj, section, addr.address, reloc_kind)?;
if opt.is_none() {
@ -149,17 +161,24 @@ fn get_jump_table_entries(
if let Some(target) =
relocation_target_for(obj, cur_addr, Some(ObjRelocKind::Absolute))?
{
entries.push(target);
match target {
RelocationTarget::Address(addr) => entries.push(addr),
RelocationTarget::External => {
bail!("Jump table entry at {:#010X} points to external symbol", cur_addr)
}
}
} else {
let entry_addr = u32::from_be_bytes(*array_ref!(data, 0, 4));
let (section_index, _) =
obj.sections.at_address(entry_addr).with_context(|| {
format!(
"Invalid jump table entry {:#010X} at {:#010X}",
entry_addr, cur_addr
)
})?;
entries.push(SectionAddress::new(section_index, entry_addr));
if entry_addr > 0 {
let (section_index, _) =
obj.sections.at_address(entry_addr).with_context(|| {
format!(
"Invalid jump table entry {:#010X} at {:#010X}",
entry_addr, cur_addr
)
})?;
entries.push(SectionAddress::new(section_index, entry_addr));
}
}
data = &data[4..];
cur_addr += 4;
@ -172,7 +191,10 @@ fn get_jump_table_entries(
let target = if let Some(target) =
relocation_target_for(obj, cur_addr, Some(ObjRelocKind::Absolute))?
{
target
match target {
RelocationTarget::Address(addr) => addr,
RelocationTarget::External => break,
}
} else if obj.kind == ObjKind::Executable {
let Some(value) = read_u32(section, cur_addr.address) else {
break;

View File

@ -1,11 +1,10 @@
use std::ops::Range;
use anyhow::{bail, ensure, Result};
use flagset::FlagSet;
use itertools::Itertools;
use memchr::memmem;
use crate::{
analysis::cfa::{AnalyzerState, SectionAddress},
analysis::cfa::{AnalyzerState, FunctionInfo, SectionAddress},
obj::{
ObjInfo, ObjKind, ObjRelocKind, ObjSectionKind, ObjSymbol, ObjSymbolFlagSet,
ObjSymbolFlags, ObjSymbolKind,
@ -24,7 +23,9 @@ pub const TRK_TABLE_SIZE: u32 = 0x1F34; // always?
// TRK_MINNOW_DOLPHIN.a __exception.s
impl AnalysisPass for FindTRKInterruptVectorTable {
fn execute(state: &mut AnalyzerState, obj: &ObjInfo) -> Result<()> {
for (&start, _) in state.function_bounds.iter().filter(|&(_, &end)| end.is_none()) {
for (&start, _) in
state.functions.iter().filter(|(_, info)| info.analyzed && info.end.is_none())
{
let section = &obj.sections[start.section];
let data = match section.data_range(start.address, 0) {
Ok(ret) => ret,
@ -70,67 +71,58 @@ impl AnalysisPass for FindTRKInterruptVectorTable {
pub struct FindSaveRestSleds {}
const SLEDS: [([u8; 4], &str, &str); 4] = [
([0xd9, 0xcb, 0xff, 0x70], "__save_fpr", "_savefpr_"),
([0xc9, 0xcb, 0xff, 0x70], "__restore_fpr", "_restfpr_"),
([0x91, 0xcb, 0xff, 0xb8], "__save_gpr", "_savegpr_"),
([0x81, 0xcb, 0xff, 0xb8], "__restore_gpr", "_restgpr_"),
const SLEDS: [([u8; 8], &str, &str); 4] = [
([0xd9, 0xcb, 0xff, 0x70, 0xd9, 0xeb, 0xff, 0x78], "__save_fpr", "_savefpr_"),
([0xc9, 0xcb, 0xff, 0x70, 0xc9, 0xeb, 0xff, 0x78], "__restore_fpr", "_restfpr_"),
([0x91, 0xcb, 0xff, 0xb8, 0x91, 0xeb, 0xff, 0xbc], "__save_gpr", "_savegpr_"),
([0x81, 0xcb, 0xff, 0xb8, 0x81, 0xeb, 0xff, 0xbc], "__restore_gpr", "_restgpr_"),
];
// Runtime.PPCEABI.H.a runtime.c
impl AnalysisPass for FindSaveRestSleds {
fn execute(state: &mut AnalyzerState, obj: &ObjInfo) -> Result<()> {
const SLED_SIZE: usize = 19 * 4; // registers 14-31 + blr
let mut clear_ranges: Vec<Range<SectionAddress>> = vec![];
for (&start, _) in state.function_bounds.iter().filter(|&(_, &end)| end.is_some()) {
let section = &obj.sections[start.section];
let data = match section.data_range(start.address, 0) {
Ok(ret) => ret,
Err(_) => continue,
};
for (section_index, section) in obj.sections.by_kind(ObjSectionKind::Code) {
for (needle, func, label) in &SLEDS {
if data.starts_with(needle) {
log::debug!("Found {} @ {:#010X}", func, start);
clear_ranges.push(start + 4..start + SLED_SIZE as u32);
state.known_symbols.insert(start, ObjSymbol {
name: func.to_string(),
let Some(pos) = memmem::find(&section.data, needle) else {
continue;
};
let start = SectionAddress::new(section_index, section.address as u32 + pos as u32);
log::debug!("Found {} @ {:#010X}", func, start);
state.functions.insert(start, FunctionInfo {
analyzed: false,
end: Some(start + SLED_SIZE as u32),
slices: None,
});
state.known_symbols.insert(start, ObjSymbol {
name: func.to_string(),
demangled_name: None,
address: start.address as u64,
section: Some(start.section),
size: SLED_SIZE as u64,
size_known: true,
flags: ObjSymbolFlagSet(ObjSymbolFlags::Global.into()),
kind: ObjSymbolKind::Function,
align: None,
data_kind: Default::default(),
});
for i in 14..=31 {
let addr = start + (i - 14) * 4;
state.known_symbols.insert(addr, ObjSymbol {
name: format!("{}{}", label, i),
demangled_name: None,
address: start.address as u64,
address: addr.address as u64,
section: Some(start.section),
size: SLED_SIZE as u64,
size: 0,
size_known: true,
flags: ObjSymbolFlagSet(ObjSymbolFlags::Global.into()),
kind: ObjSymbolKind::Function,
kind: ObjSymbolKind::Unknown,
align: None,
data_kind: Default::default(),
});
for i in 14..=31 {
let addr = start + (i - 14) * 4;
state.known_symbols.insert(addr, ObjSymbol {
name: format!("{}{}", label, i),
demangled_name: None,
address: addr.address as u64,
section: Some(start.section),
size: 0,
size_known: true,
flags: ObjSymbolFlagSet(ObjSymbolFlags::Global.into()),
kind: ObjSymbolKind::Unknown,
align: None,
data_kind: Default::default(),
});
}
}
}
}
for range in clear_ranges {
let mut addr = range.start;
while addr < range.end {
state.function_entries.remove(&addr);
state.function_bounds.remove(&addr);
state.function_slices.remove(&addr);
addr += 4;
}
}
Ok(())
}
}
@ -179,7 +171,7 @@ impl AnalysisPass for FindRelCtorsDtors {
};
if target_section.kind != ObjSectionKind::Code
|| !state
.function_bounds
.functions
.contains_key(&SectionAddress::new(target_section_index, reloc.addend))
{
return false;
@ -197,7 +189,7 @@ impl AnalysisPass for FindRelCtorsDtors {
.collect_vec();
if possible_sections.len() != 2 {
log::warn!("Failed to find .ctors and .dtors");
log::debug!("Failed to find .ctors and .dtors");
return Ok(());
}
@ -311,7 +303,7 @@ impl AnalysisPass for FindRelRodataData {
.collect_vec();
if possible_sections.len() != 2 {
log::warn!("Failed to find .rodata and .data");
log::debug!("Failed to find .rodata and .data");
return Ok(());
}

View File

@ -382,7 +382,7 @@ fn apply_init_user_signatures(obj: &mut ObjInfo) -> Result<()> {
// __init_user can be overridden, but we can still look for __init_cpp from it
let mut analyzer = AnalyzerState::default();
analyzer.process_function_at(obj, SectionAddress::new(section_index, symbol.address as u32))?;
for addr in analyzer.function_entries {
for (addr, _) in analyzer.functions {
let section = &obj.sections[addr.section];
if let Some(signature) = check_signatures_str(
section,

View File

@ -8,11 +8,12 @@ use ppc750cl::{Ins, Opcode};
use crate::{
analysis::{
cfa::SectionAddress,
cfa::{FunctionInfo, SectionAddress},
disassemble,
executor::{ExecCbData, ExecCbResult, Executor},
uniq_jump_table_entries,
vm::{section_address_for, BranchTarget, StepResult, VM},
RelocationTarget,
},
obj::{ObjInfo, ObjKind, ObjSection},
};
@ -26,10 +27,11 @@ pub struct FunctionSlices {
pub prologue: Option<SectionAddress>,
pub epilogue: Option<SectionAddress>,
// Either a block or tail call
pub possible_blocks: BTreeSet<SectionAddress>,
pub possible_blocks: BTreeMap<SectionAddress, Box<VM>>,
pub has_conditional_blr: bool,
pub has_rfi: bool,
pub finalized: bool,
pub has_r1_load: bool, // Possibly instead of a prologue
}
pub enum TailCallResult {
@ -72,6 +74,25 @@ fn check_sequence(
Ok(found)
}
fn check_prologue_sequence(section: &ObjSection, ins: &Ins) -> Result<bool> {
#[inline(always)]
fn is_mflr(ins: &Ins) -> bool {
// mfspr r0, LR
ins.op == Opcode::Mfspr && ins.field_rD() == 0 && ins.field_spr() == 8
}
#[inline(always)]
fn is_stwu(ins: &Ins) -> bool {
// stwu r1, d(r1)
ins.op == Opcode::Stwu && ins.field_rS() == 1 && ins.field_rA() == 1
}
#[inline(always)]
fn is_stw(ins: &Ins) -> bool {
// stw r0, d(r1)
ins.op == Opcode::Stw && ins.field_rS() == 0 && ins.field_rA() == 1
}
check_sequence(section, ins, &[(&is_stwu, &is_mflr), (&is_mflr, &is_stw)])
}
impl FunctionSlices {
pub fn end(&self) -> Option<SectionAddress> {
self.blocks.last_key_value().and_then(|(_, &end)| end)
@ -109,22 +130,16 @@ impl FunctionSlices {
ins: &Ins,
) -> Result<()> {
#[inline(always)]
fn is_mflr(ins: &Ins) -> bool {
// mfspr r0, LR
ins.op == Opcode::Mfspr && ins.field_rD() == 0 && ins.field_spr() == 8
}
#[inline(always)]
fn is_stwu(ins: &Ins) -> bool {
// stwu r1, d(r1)
ins.op == Opcode::Stwu && ins.field_rS() == 1 && ins.field_rA() == 1
}
#[inline(always)]
fn is_stw(ins: &Ins) -> bool {
// stw r0, d(r1)
ins.op == Opcode::Stw && ins.field_rS() == 0 && ins.field_rA() == 1
fn is_lwz(ins: &Ins) -> bool {
// lwz r1, d(r)
ins.op == Opcode::Lwz && ins.field_rD() == 1
}
if check_sequence(section, ins, &[(&is_stwu, &is_mflr), (&is_mflr, &is_stw)])? {
if is_lwz(ins) {
self.has_r1_load = true;
return Ok(()); // Possibly instead of a prologue
}
if check_prologue_sequence(section, ins)? {
if let Some(prologue) = self.prologue {
if prologue != addr && prologue != addr - 4 {
bail!("Found duplicate prologue: {:#010X} and {:#010X}", prologue, addr)
@ -170,21 +185,37 @@ impl FunctionSlices {
Ok(())
}
fn is_known_function(
&self,
known_functions: &BTreeMap<SectionAddress, FunctionInfo>,
addr: SectionAddress,
) -> Option<SectionAddress> {
if self.function_references.contains(&addr) {
return Some(addr);
}
if let Some((&fn_addr, info)) = known_functions.range(..=addr).next_back() {
if fn_addr == addr || info.end.is_some_and(|end| addr < end) {
return Some(fn_addr);
}
}
None
}
fn instruction_callback(
&mut self,
data: ExecCbData,
obj: &ObjInfo,
function_start: SectionAddress,
function_end: Option<SectionAddress>,
known_functions: &BTreeSet<SectionAddress>,
known_functions: &BTreeMap<SectionAddress, FunctionInfo>,
) -> Result<ExecCbResult<bool>> {
let ExecCbData { executor, vm, result, ins_addr, section, ins, block_start } = data;
// Track discovered prologue(s) and epilogue(s)
self.check_prologue(section, ins_addr, ins)
.with_context(|| format!("While processing {:#010X}", function_start))?;
.with_context(|| format!("While processing {:#010X}: {:#?}", function_start, self))?;
self.check_epilogue(section, ins_addr, ins)
.with_context(|| format!("While processing {:#010X}", function_start))?;
.with_context(|| format!("While processing {:#010X}: {:#?}", function_start, self))?;
if !self.has_conditional_blr && is_conditional_blr(ins) {
self.has_conditional_blr = true;
}
@ -193,9 +224,20 @@ impl FunctionSlices {
}
// If control flow hits a block we thought may be a tail call,
// we know it isn't.
if self.possible_blocks.contains(&ins_addr) {
if self.possible_blocks.contains_key(&ins_addr) {
self.possible_blocks.remove(&ins_addr);
}
if let Some(fn_addr) = self.is_known_function(known_functions, ins_addr) {
if fn_addr != function_start {
log::warn!(
"Control flow from {} hit known function {} (instruction: {})",
function_start,
fn_addr,
ins_addr
);
return Ok(ExecCbResult::End(false));
}
}
match result {
StepResult::Continue | StepResult::LoadStore { .. } => {
@ -214,7 +256,9 @@ impl FunctionSlices {
Ok(ExecCbResult::End(false))
}
StepResult::Jump(target) => match target {
BranchTarget::Unknown => {
BranchTarget::Unknown
| BranchTarget::Address(RelocationTarget::External)
| BranchTarget::JumpTable { address: RelocationTarget::External, .. } => {
// Likely end of function
let next_addr = ins_addr + 4;
self.blocks.insert(block_start, Some(next_addr));
@ -234,34 +278,41 @@ impl FunctionSlices {
self.blocks.insert(block_start, Some(ins_addr + 4));
Ok(ExecCbResult::EndBlock)
}
BranchTarget::Address(addr) => {
BranchTarget::Address(RelocationTarget::Address(addr)) => {
// End of block
self.blocks.insert(block_start, Some(ins_addr + 4));
self.branches.insert(ins_addr, vec![addr]);
if addr == ins_addr {
// Infinite loop
} else if addr >= function_start
&& matches!(function_end, Some(known_end) if addr < known_end)
&& (matches!(function_end, Some(known_end) if addr < known_end)
|| matches!(self.end(), Some(end) if addr < end)
|| addr < ins_addr)
{
// If target is within known function bounds, jump
if self.add_block_start(addr) {
return Ok(ExecCbResult::Jump(addr));
}
} else if matches!(section.data_range(ins_addr.address, ins_addr.address + 4), Ok(data) if data == [0u8; 4])
{
} else if let Some(fn_addr) = self.is_known_function(known_functions, addr) {
ensure!(fn_addr != function_start); // Sanity check
self.function_references.insert(fn_addr);
} else if addr.section != ins_addr.section
// If this branch has zeroed padding after it, assume tail call.
|| matches!(section.data_range(ins_addr.address, ins_addr.address + 4), Ok(data) if data == [0u8; 4])
{
self.function_references.insert(addr);
} else {
self.possible_blocks.insert(addr);
self.possible_blocks.insert(addr, vm.clone_all());
}
Ok(ExecCbResult::EndBlock)
}
BranchTarget::JumpTable { address, size } => {
BranchTarget::JumpTable { address: RelocationTarget::Address(address), size } => {
// End of block
let next_address = ins_addr + 4;
self.blocks.insert(block_start, Some(next_address));
let (mut entries, size) = uniq_jump_table_entries(
log::debug!("Fetching jump table entries @ {} with size {:?}", address, size);
let (entries, size) = uniq_jump_table_entries(
obj,
address,
size,
@ -269,8 +320,12 @@ impl FunctionSlices {
function_start,
function_end.or_else(|| self.end()),
)?;
log::debug!("-> size {}: {:?}", size, entries);
if entries.contains(&next_address)
&& !entries.iter().any(|addr| known_functions.contains(addr))
&& !entries.iter().any(|&addr| {
self.is_known_function(known_functions, addr)
.is_some_and(|fn_addr| fn_addr != function_start)
})
{
self.jump_table_references.insert(address, size);
let mut branches = vec![];
@ -284,7 +339,8 @@ impl FunctionSlices {
} else {
// If the table doesn't contain the next address,
// it could be a function jump table instead
self.possible_blocks.append(&mut entries);
self.possible_blocks
.extend(entries.into_iter().map(|addr| (addr, vm.clone_all())));
}
Ok(ExecCbResult::EndBlock)
}
@ -296,11 +352,15 @@ impl FunctionSlices {
let mut out_branches = vec![];
for branch in branches {
match branch.target {
BranchTarget::Unknown | BranchTarget::Return => {
continue;
}
BranchTarget::Address(addr) => {
if branch.link || known_functions.contains(&addr) {
BranchTarget::Address(RelocationTarget::Address(addr)) => {
let known = self.is_known_function(known_functions, addr);
if let Some(fn_addr) = known {
if fn_addr != function_start {
self.function_references.insert(fn_addr);
continue;
}
}
if branch.link {
self.function_references.insert(addr);
} else {
out_branches.push(addr);
@ -310,8 +370,14 @@ impl FunctionSlices {
}
}
BranchTarget::JumpTable { address, size } => {
bail!("Conditional jump table unsupported @ {:#010X} -> {:#010X} size {:#X?}", ins_addr, address, size);
bail!(
"Conditional jump table unsupported @ {:#010X} -> {:?} size {:#X?}",
ins_addr,
address,
size
);
}
_ => continue,
}
}
if !out_branches.is_empty() {
@ -328,14 +394,15 @@ impl FunctionSlices {
start: SectionAddress,
function_start: SectionAddress,
function_end: Option<SectionAddress>,
known_functions: &BTreeSet<SectionAddress>,
known_functions: &BTreeMap<SectionAddress, FunctionInfo>,
vm: Option<Box<VM>>,
) -> Result<bool> {
if !self.add_block_start(start) {
return Ok(true);
}
let mut executor = Executor::new(obj);
executor.push(start, VM::new_from_obj(obj), false);
executor.push(start, vm.unwrap_or_else(|| VM::new_from_obj(obj)), false);
let result = executor.run(obj, |data| {
self.instruction_callback(data, obj, function_start, function_end, known_functions)
})?;
@ -345,7 +412,8 @@ impl FunctionSlices {
// Visit unreachable blocks
while let Some((first, _)) = self.first_disconnected_block() {
executor.push(first.end, VM::new_from_obj(obj), true);
let vm = self.possible_blocks.remove(&first.start);
executor.push(first.end, vm.unwrap_or_else(|| VM::new_from_obj(obj)), true);
let result = executor.run(obj, |data| {
self.instruction_callback(data, obj, function_start, function_end, known_functions)
})?;
@ -356,13 +424,25 @@ impl FunctionSlices {
// Visit trailing blocks
if let Some(known_end) = function_end {
loop {
let Some(end) = self.end() else {
'outer: loop {
let Some(mut end) = self.end() else {
log::warn!("Trailing block analysis failed @ {:#010X}", function_start);
break;
};
if end >= known_end {
break;
loop {
if end >= known_end {
break 'outer;
}
// Skip nops
match disassemble(&obj.sections[end.section], end.address) {
Some(ins) => {
if !is_nop(&ins) {
break;
}
}
_ => break,
}
end += 4;
}
executor.push(end, VM::new_from_obj(obj), true);
let result = executor.run(obj, |data| {
@ -393,20 +473,23 @@ impl FunctionSlices {
pub fn finalize(
&mut self,
obj: &ObjInfo,
known_functions: &BTreeSet<SectionAddress>,
known_functions: &BTreeMap<SectionAddress, FunctionInfo>,
) -> Result<()> {
ensure!(!self.finalized, "Already finalized");
ensure!(self.can_finalize(), "Can't finalize");
match (self.prologue, self.epilogue) {
(Some(_), Some(_)) | (None, None) => {}
(Some(_), None) => {
match (self.prologue, self.epilogue, self.has_r1_load) {
(Some(_), Some(_), _) | (None, None, _) => {}
(Some(_), None, _) => {
// Likely __noreturn
}
(None, Some(e)) => {
(None, Some(e), false) => {
log::warn!("{:#010X?}", self);
bail!("Unpaired epilogue {:#010X}", e);
}
(None, Some(_), true) => {
// Possible stack setup
}
}
let Some(end) = self.end() else {
@ -425,7 +508,7 @@ impl FunctionSlices {
if !self.has_conditional_blr {
if let Some(ins) = disassemble(section, end.address - 4) {
if ins.op == Opcode::B {
if let Some(target) = ins
if let Some(RelocationTarget::Address(target)) = ins
.branch_dest()
.and_then(|addr| section_address_for(obj, end - 4, addr))
{
@ -450,7 +533,7 @@ impl FunctionSlices {
if self.has_conditional_blr
&& matches!(disassemble(section, end.address - 4), Some(ins) if !ins.is_blr())
&& matches!(disassemble(section, end.address), Some(ins) if ins.is_blr())
&& !known_functions.contains(&end)
&& !known_functions.contains_key(&end)
{
log::trace!("Found trailing blr @ {:#010X}, merging with function", end);
self.blocks.insert(end, Some(end + 4));
@ -459,7 +542,7 @@ impl FunctionSlices {
// Some functions with rfi also include a trailing nop
if self.has_rfi
&& matches!(disassemble(section, end.address), Some(ins) if is_nop(&ins))
&& !known_functions.contains(&end)
&& !known_functions.contains_key(&end)
{
log::trace!("Found trailing nop @ {:#010X}, merging with function", end);
self.blocks.insert(end, Some(end + 4));
@ -480,7 +563,8 @@ impl FunctionSlices {
addr: SectionAddress,
function_start: SectionAddress,
function_end: Option<SectionAddress>,
known_functions: &BTreeSet<SectionAddress>,
known_functions: &BTreeMap<SectionAddress, FunctionInfo>,
vm: Option<Box<VM>>,
) -> TailCallResult {
// If jump target is already a known block or within known function bounds, not a tail call.
if self.blocks.contains_key(&addr) {
@ -521,12 +605,37 @@ impl FunctionSlices {
{
return TailCallResult::Is;
}
// If we haven't discovered a prologue yet, and one exists between the function
// start and the jump target, known tail call.
if self.prologue.is_none() {
let mut current_address = function_start;
while current_address < addr {
let ins = disassemble(target_section, current_address.address).unwrap();
match check_prologue_sequence(target_section, &ins) {
Ok(true) => {
log::debug!(
"Prologue discovered @ {}; known tail call: {}",
current_address,
addr
);
return TailCallResult::Is;
}
Ok(false) => {}
Err(e) => {
log::warn!("Error while checking prologue sequence: {}", e);
return TailCallResult::Error(e);
}
}
current_address += 4;
}
}
// Perform CFA on jump target to determine more
let mut slices = FunctionSlices {
function_references: self.function_references.clone(),
..Default::default()
};
if let Ok(result) = slices.analyze(obj, addr, function_start, function_end, known_functions)
if let Ok(result) =
slices.analyze(obj, addr, function_start, function_end, known_functions, vm)
{
// If analysis failed, assume tail call.
if !result {
@ -545,7 +654,7 @@ impl FunctionSlices {
let other_blocks = self
.possible_blocks
.range(start + 4..end)
.cloned()
.map(|(&addr, _)| addr)
.collect::<Vec<SectionAddress>>();
if !other_blocks.is_empty() {
for other_addr in other_blocks {
@ -563,6 +672,7 @@ impl FunctionSlices {
return TailCallResult::Is;
}
}
// If all else fails, try again later.
TailCallResult::Possible
}

View File

@ -14,6 +14,7 @@ use crate::{
executor::{ExecCbData, ExecCbResult, Executor},
relocation_target_for, uniq_jump_table_entries,
vm::{is_store_op, BranchTarget, GprValue, StepResult, VM},
RelocationTarget,
},
obj::{
ObjDataKind, ObjInfo, ObjKind, ObjReloc, ObjRelocKind, ObjSection, ObjSectionKind,
@ -23,13 +24,31 @@ use crate::{
#[derive(Debug, Copy, Clone)]
pub enum Relocation {
Ha(SectionAddress),
Hi(SectionAddress),
Lo(SectionAddress),
Sda21(SectionAddress),
Rel14(SectionAddress),
Rel24(SectionAddress),
Absolute(SectionAddress),
Ha(RelocationTarget),
Hi(RelocationTarget),
Lo(RelocationTarget),
Sda21(RelocationTarget),
Rel14(RelocationTarget),
Rel24(RelocationTarget),
Absolute(RelocationTarget),
}
impl Relocation {
fn kind_and_address(&self) -> Option<(ObjRelocKind, SectionAddress)> {
let (reloc_kind, target) = match self {
Relocation::Ha(v) => (ObjRelocKind::PpcAddr16Ha, v),
Relocation::Hi(v) => (ObjRelocKind::PpcAddr16Hi, v),
Relocation::Lo(v) => (ObjRelocKind::PpcAddr16Lo, v),
Relocation::Sda21(v) => (ObjRelocKind::PpcEmbSda21, v),
Relocation::Rel14(v) => (ObjRelocKind::PpcRel14, v),
Relocation::Rel24(v) => (ObjRelocKind::PpcRel24, v),
Relocation::Absolute(v) => (ObjRelocKind::Absolute, v),
};
match *target {
RelocationTarget::Address(address) => Some((reloc_kind, address)),
RelocationTarget::External => None,
}
}
}
#[derive(Debug)]
@ -93,13 +112,37 @@ impl Tracker {
#[instrument(name = "tracker", skip(self, obj))]
pub fn process(&mut self, obj: &ObjInfo) -> Result<()> {
self.process_code(obj)?;
for (section_index, section) in obj
.sections
.iter()
.filter(|(_, s)| matches!(s.kind, ObjSectionKind::Data | ObjSectionKind::ReadOnlyData))
{
log::debug!("Processing section {}, address {:#X}", section_index, section.address);
self.process_data(obj, section_index, section)?;
if obj.kind == ObjKind::Executable {
for (section_index, section) in obj.sections.iter().filter(|(_, s)| {
matches!(s.kind, ObjSectionKind::Data | ObjSectionKind::ReadOnlyData)
}) {
log::debug!("Processing section {}, address {:#X}", section_index, section.address);
self.process_data(obj, section_index, section)?;
}
}
self.reject_invalid_relocations(obj)?;
Ok(())
}
/// Remove data relocations that point to an unaligned address if the aligned address has a
/// relocation. A relocation will never point to the middle of an address.
fn reject_invalid_relocations(&mut self, obj: &ObjInfo) -> Result<()> {
let mut to_reject = vec![];
for (&address, reloc) in &self.relocations {
let section = &obj.sections[address.section];
if !matches!(section.kind, ObjSectionKind::Data | ObjSectionKind::ReadOnlyData) {
continue;
}
let Some((_, target)) = reloc.kind_and_address() else {
continue;
};
if !target.is_aligned(4) && self.relocations.contains_key(&target.align_down(4)) {
log::debug!("Rejecting invalid relocation @ {} -> {}", address, target);
to_reject.push(address);
}
}
for address in to_reject {
self.relocations.remove(&address);
}
Ok(())
}
@ -143,6 +186,22 @@ impl Tracker {
Ok(())
}
#[inline]
fn gpr_address(
&self,
obj: &ObjInfo,
ins_addr: SectionAddress,
value: &GprValue,
) -> Option<RelocationTarget> {
match *value {
GprValue::Constant(value) => {
self.is_valid_address(obj, ins_addr, value).map(RelocationTarget::Address)
}
GprValue::Address(address) => Some(address),
_ => None,
}
}
fn instruction_callback(
&mut self,
data: ExecCbData,
@ -162,28 +221,37 @@ impl Tracker {
Opcode::Addi | Opcode::Addic | Opcode::Addic_ => {
let source = ins.field_rA();
let target = ins.field_rD();
if let GprValue::Constant(value) = vm.gpr[target].value {
if let Some(value) = self.is_valid_address(obj, ins_addr, value) {
if (source == 2
&& matches!(self.sda2_base, Some(v) if vm.gpr[2].value == GprValue::Constant(v)))
|| (source == 13
&& matches!(self.sda_base, Some(v) if vm.gpr[13].value == GprValue::Constant(v)))
{
self.relocations.insert(ins_addr, Relocation::Sda21(value));
self.sda_to.insert(value);
} else if let (Some(hi_addr), Some(lo_addr)) =
(vm.gpr[target].hi_addr, vm.gpr[target].lo_addr)
{
let hi_reloc = self.relocations.get(&hi_addr).cloned();
if hi_reloc.is_none() {
debug_assert_ne!(value, SectionAddress::new(usize::MAX, 0));
self.relocations.insert(hi_addr, Relocation::Ha(value));
}
let lo_reloc = self.relocations.get(&lo_addr).cloned();
if lo_reloc.is_none() {
self.relocations.insert(lo_addr, Relocation::Lo(value));
}
self.hal_to.insert(value);
if let Some(value) = self.gpr_address(obj, ins_addr, &vm.gpr[target].value)
{
if (source == 2
&& matches!(self.sda2_base, Some(v) if vm.gpr[2].value == GprValue::Constant(v)))
|| (source == 13
&& matches!(self.sda_base, Some(v) if vm.gpr[13].value == GprValue::Constant(v)))
{
self.relocations.insert(ins_addr, Relocation::Sda21(value));
if let RelocationTarget::Address(address) = value {
self.sda_to.insert(address);
}
} else if let (Some(hi_addr), Some(lo_addr)) =
(vm.gpr[target].hi_addr, vm.gpr[target].lo_addr)
{
let hi_reloc = self.relocations.get(&hi_addr).cloned();
if hi_reloc.is_none() {
debug_assert_ne!(
value,
RelocationTarget::Address(SectionAddress::new(
usize::MAX,
0
))
);
self.relocations.insert(hi_addr, Relocation::Ha(value));
}
let lo_reloc = self.relocations.get(&lo_addr).cloned();
if lo_reloc.is_none() {
self.relocations.insert(lo_addr, Relocation::Lo(value));
}
if let RelocationTarget::Address(address) = value {
self.hal_to.insert(address);
}
}
}
@ -191,20 +259,21 @@ impl Tracker {
// ori rA, rS, UIMM
Opcode::Ori => {
let target = ins.field_rA();
if let GprValue::Constant(value) = vm.gpr[target].value {
if let Some(value) = self.is_valid_address(obj, ins_addr, value) {
if let (Some(hi_addr), Some(lo_addr)) =
(vm.gpr[target].hi_addr, vm.gpr[target].lo_addr)
{
let hi_reloc = self.relocations.get(&hi_addr).cloned();
if hi_reloc.is_none() {
self.relocations.insert(hi_addr, Relocation::Hi(value));
}
let lo_reloc = self.relocations.get(&lo_addr).cloned();
if lo_reloc.is_none() {
self.relocations.insert(lo_addr, Relocation::Lo(value));
}
self.hal_to.insert(value);
if let Some(value) = self.gpr_address(obj, ins_addr, &vm.gpr[target].value)
{
if let (Some(hi_addr), Some(lo_addr)) =
(vm.gpr[target].hi_addr, vm.gpr[target].lo_addr)
{
let hi_reloc = self.relocations.get(&hi_addr).cloned();
if hi_reloc.is_none() {
self.relocations.insert(hi_addr, Relocation::Hi(value));
}
let lo_reloc = self.relocations.get(&lo_addr).cloned();
if lo_reloc.is_none() {
self.relocations.insert(lo_addr, Relocation::Lo(value));
}
if let RelocationTarget::Address(address) = value {
self.hal_to.insert(address);
}
}
}
@ -214,20 +283,28 @@ impl Tracker {
Ok(ExecCbResult::Continue)
}
StepResult::LoadStore { address, source, source_reg } => {
if let Some(address) = self.is_valid_address(obj, ins_addr, address.address) {
if self.is_valid_section_address(obj, ins_addr) {
if (source_reg == 2
&& matches!(self.sda2_base, Some(v) if source.value == GprValue::Constant(v)))
|| (source_reg == 13
&& matches!(self.sda_base, Some(v) if source.value == GprValue::Constant(v)))
{
self.relocations.insert(ins_addr, Relocation::Sda21(address));
self.sda_to.insert(address);
if let RelocationTarget::Address(address) = address {
self.sda_to.insert(address);
}
} else {
match (source.hi_addr, source.lo_addr) {
(Some(hi_addr), None) => {
let hi_reloc = self.relocations.get(&hi_addr).cloned();
if hi_reloc.is_none() {
debug_assert_ne!(address, SectionAddress::new(usize::MAX, 0));
debug_assert_ne!(
address,
RelocationTarget::Address(SectionAddress::new(
usize::MAX,
0
))
);
self.relocations.insert(hi_addr, Relocation::Ha(address));
}
if hi_reloc.is_none()
@ -235,26 +312,38 @@ impl Tracker {
{
self.relocations.insert(ins_addr, Relocation::Lo(address));
}
self.hal_to.insert(address);
if let RelocationTarget::Address(address) = address {
self.hal_to.insert(address);
}
}
(Some(hi_addr), Some(lo_addr)) => {
let hi_reloc = self.relocations.get(&hi_addr).cloned();
if hi_reloc.is_none() {
debug_assert_ne!(address, SectionAddress::new(usize::MAX, 0));
debug_assert_ne!(
address,
RelocationTarget::Address(SectionAddress::new(
usize::MAX,
0
))
);
self.relocations.insert(hi_addr, Relocation::Ha(address));
}
let lo_reloc = self.relocations.get(&lo_addr).cloned();
if lo_reloc.is_none() {
self.relocations.insert(lo_addr, Relocation::Lo(address));
}
self.hal_to.insert(address);
if let RelocationTarget::Address(address) = address {
self.hal_to.insert(address);
}
}
_ => {}
}
}
self.data_types.insert(address, data_kind_from_op(ins.op));
if is_store_op(ins.op) {
self.stores_to.insert(address);
if let RelocationTarget::Address(address) = address {
self.data_types.insert(address, data_kind_from_op(ins.op));
if is_store_op(ins.op) {
self.stores_to.insert(address);
}
}
}
Ok(ExecCbResult::Continue)
@ -266,22 +355,27 @@ impl Tracker {
function_end
),
StepResult::Jump(target) => match target {
BranchTarget::Unknown | BranchTarget::Return => Ok(ExecCbResult::EndBlock),
BranchTarget::Unknown
| BranchTarget::Return
| BranchTarget::JumpTable { address: RelocationTarget::External, .. } => {
Ok(ExecCbResult::EndBlock)
}
BranchTarget::Address(addr) => {
let next_addr = ins_addr + 4;
if next_addr < function_end {
possible_missed_branches.insert(ins_addr + 4, vm.clone_all());
}
if is_function_addr(addr) {
Ok(ExecCbResult::Jump(addr))
} else {
if ins.is_direct_branch() {
self.relocations.insert(ins_addr, Relocation::Rel24(addr));
if let RelocationTarget::Address(addr) = addr {
if is_function_addr(addr) {
return Ok(ExecCbResult::Jump(addr));
}
Ok(ExecCbResult::EndBlock)
}
if ins.is_direct_branch() {
self.relocations.insert(ins_addr, Relocation::Rel24(addr));
}
Ok(ExecCbResult::EndBlock)
}
BranchTarget::JumpTable { address, size } => {
BranchTarget::JumpTable { address: RelocationTarget::Address(address), size } => {
let (entries, _) = uniq_jump_table_entries(
obj,
address,
@ -301,19 +395,30 @@ impl Tracker {
StepResult::Branch(branches) => {
for branch in branches {
match branch.target {
BranchTarget::Unknown | BranchTarget::Return => {}
BranchTarget::Address(addr) => {
if branch.link || !is_function_addr(addr) {
BranchTarget::Unknown
| BranchTarget::Return
| BranchTarget::JumpTable { address: RelocationTarget::External, .. } => {}
BranchTarget::Address(target) => {
let (addr, is_fn_addr) = if let RelocationTarget::Address(addr) = target
{
(addr, is_function_addr(addr))
} else {
(SectionAddress::new(usize::MAX, 0), false)
};
if branch.link || !is_fn_addr {
self.relocations.insert(ins_addr, match ins.op {
Opcode::B => Relocation::Rel24(addr),
Opcode::Bc => Relocation::Rel14(addr),
Opcode::B => Relocation::Rel24(target),
Opcode::Bc => Relocation::Rel14(target),
_ => continue,
});
} else if is_function_addr(addr) {
} else if is_fn_addr {
executor.push(addr, branch.vm, true);
}
}
BranchTarget::JumpTable { address, size } => {
BranchTarget::JumpTable {
address: RelocationTarget::Address(address),
size,
} => {
let (entries, _) = uniq_jump_table_entries(
obj,
address,
@ -390,13 +495,24 @@ impl Tracker {
for chunk in section.data.chunks_exact(4) {
let value = u32::from_be_bytes(chunk.try_into()?);
if let Some(value) = self.is_valid_address(obj, addr, value) {
self.relocations.insert(addr, Relocation::Absolute(value));
self.relocations
.insert(addr, Relocation::Absolute(RelocationTarget::Address(value)));
}
addr += 4;
}
Ok(())
}
fn is_valid_section_address(&self, obj: &ObjInfo, from: SectionAddress) -> bool {
if let Some((&start, &end)) = obj.blocked_ranges.range(..=from).next_back() {
if from.section == start.section && from.address >= start.address && from.address < end
{
return false;
}
}
true
}
fn is_valid_address(
&self,
obj: &ObjInfo,
@ -410,11 +526,12 @@ impl Tracker {
}
}
// Check for an existing relocation
if let Some(target) = relocation_target_for(obj, from, None).ok().flatten() {
if obj.kind == ObjKind::Executable {
debug_assert_eq!(target.address, addr);
if cfg!(debug_assertions) {
let relocation_target = relocation_target_for(obj, from, None).ok().flatten();
if !matches!(relocation_target, None | Some(RelocationTarget::External)) {
// VM should have already handled this
panic!("Relocation already exists for {:#010X} (from {:#010X})", addr, from);
}
return Some(target);
}
// Remainder of this function is for executable objects only
if obj.kind == ObjKind::Relocatable {
@ -530,17 +647,27 @@ impl Tracker {
}
}
for (addr, reloc) in &self.relocations {
let addr = *addr;
let (reloc_kind, target) = match *reloc {
Relocation::Ha(v) => (ObjRelocKind::PpcAddr16Ha, v),
Relocation::Hi(v) => (ObjRelocKind::PpcAddr16Hi, v),
Relocation::Lo(v) => (ObjRelocKind::PpcAddr16Lo, v),
Relocation::Sda21(v) => (ObjRelocKind::PpcEmbSda21, v),
Relocation::Rel14(v) => (ObjRelocKind::PpcRel14, v),
Relocation::Rel24(v) => (ObjRelocKind::PpcRel24, v),
Relocation::Absolute(v) => (ObjRelocKind::Absolute, v),
for (&addr, reloc) in &self.relocations {
let Some((reloc_kind, target)) = reloc.kind_and_address() else {
// Skip external relocations, they already exist
continue;
};
if obj.kind == ObjKind::Relocatable {
// Sanity check: relocatable objects already have relocations,
// did our analyzer find one that isn't real?
let section = &obj.sections[addr.section];
if section.relocations.at(addr.address).is_none()
// We _do_ want to rebuild missing R_PPC_REL24 relocations
&& !matches!(reloc_kind, ObjRelocKind::PpcRel24)
{
bail!(
"Found invalid relocation {} {:#?} (target {}) in relocatable object",
addr,
reloc,
target
);
}
}
let data_kind = self
.data_types
.get(&target)
@ -607,12 +734,14 @@ impl Tracker {
!= reloc_symbol.address as i64 + addend
{
bail!(
"Conflicting relocations (target {:#010X}): {:#010X?} ({}) != {:#010X?} ({})",
"Conflicting relocations (target {:#010X}): {:#010X?} ({} {:#X}) != {:#010X?} ({} {:#X})",
target,
e.value,
iter_symbol.name,
iter_symbol.address as i64 + e.value.addend,
reloc,
reloc_symbol.name
reloc_symbol.name,
reloc_symbol.address as i64 + addend,
);
}
}

View File

@ -3,7 +3,7 @@ use std::num::NonZeroU32;
use ppc750cl::{Argument, Ins, Opcode, GPR};
use crate::{
analysis::{cfa::SectionAddress, relocation_target_for},
analysis::{cfa::SectionAddress, relocation_target_for, RelocationTarget},
obj::{ObjInfo, ObjKind},
};
@ -15,13 +15,13 @@ pub enum GprValue {
/// GPR value is a constant
Constant(u32),
/// GPR value is a known relocated address
Address(SectionAddress),
Address(RelocationTarget),
/// Comparison result (CR field)
ComparisonResult(u8),
/// GPR value is within a range
Range { min: u32, max: u32, step: u32 },
/// GPR value is loaded from an address with a max offset (jump table)
LoadIndexed { address: u32, max_offset: Option<NonZeroU32> },
LoadIndexed { address: RelocationTarget, max_offset: Option<NonZeroU32> },
}
#[derive(Default, Debug, Copy, Clone, Eq, PartialEq)]
@ -52,6 +52,14 @@ impl Gpr {
self.hi_addr = hi_gpr.hi_addr;
self.lo_addr = Some(hi_gpr.lo_addr.unwrap_or(addr));
}
fn address(&self, obj: &ObjInfo, ins_addr: SectionAddress) -> Option<RelocationTarget> {
match self.value {
GprValue::Constant(value) => section_address_for(obj, ins_addr, value),
GprValue::Address(target) => Some(target),
_ => None,
}
}
}
#[derive(Default, Debug, Clone, Eq, PartialEq)]
@ -85,9 +93,9 @@ pub enum BranchTarget {
/// Branch to LR
Return,
/// Branch to address
Address(SectionAddress),
Address(RelocationTarget),
/// Branch to jump table
JumpTable { address: SectionAddress, size: Option<NonZeroU32> },
JumpTable { address: RelocationTarget, size: Option<NonZeroU32> },
}
#[derive(Debug, Clone, Eq, PartialEq)]
@ -105,7 +113,7 @@ pub enum StepResult {
/// Continue normally
Continue,
/// Load from / store to
LoadStore { address: SectionAddress, source: Gpr, source_reg: u8 },
LoadStore { address: RelocationTarget, source: Gpr, source_reg: u8 },
/// Hit illegal instruction
Illegal,
/// Jump without affecting VM state
@ -118,16 +126,16 @@ pub fn section_address_for(
obj: &ObjInfo,
ins_addr: SectionAddress,
target_addr: u32,
) -> Option<SectionAddress> {
) -> Option<RelocationTarget> {
if let Some(target) = relocation_target_for(obj, ins_addr, None).ok().flatten() {
return Some(target);
}
if obj.kind == ObjKind::Executable {
let (section_index, _) = obj.sections.at_address(target_addr).ok()?;
return Some(SectionAddress::new(section_index, target_addr));
return Some(RelocationTarget::Address(SectionAddress::new(section_index, target_addr)));
}
if obj.sections[ins_addr.section].contains(target_addr) {
Some(SectionAddress::new(ins_addr.section, target_addr))
Some(RelocationTarget::Address(SectionAddress::new(ins_addr.section, target_addr)))
} else {
None
}
@ -183,12 +191,6 @@ impl VM {
pub fn clone_all(&self) -> Box<Self> { Box::new(self.clone()) }
pub fn step(&mut self, obj: &ObjInfo, ins_addr: SectionAddress, ins: &Ins) -> StepResult {
// let relocation_target = relocation_target_for(obj, ins_addr, None).ok().flatten();
// if let Some(_target) = relocation_target {
// let _defs = ins.defs();
// // TODO
// }
match ins.op {
Opcode::Illegal => {
return StepResult::Illegal;
@ -201,61 +203,99 @@ impl VM {
(GprValue::Constant(left), GprValue::Constant(right)) => {
GprValue::Constant(left.wrapping_add(right))
}
(
GprValue::Address(RelocationTarget::Address(left)),
GprValue::Constant(right),
) => GprValue::Address(RelocationTarget::Address(left + right)),
(
GprValue::Constant(left),
GprValue::Address(RelocationTarget::Address(right)),
) => GprValue::Address(RelocationTarget::Address(right + left)),
_ => GprValue::Unknown,
};
self.gpr[ins.field_rD()].set_direct(value);
}
// addis rD, rA, SIMM
Opcode::Addis => {
let left = if ins.field_rA() == 0 {
GprValue::Constant(0)
if let Some(target) =
relocation_target_for(obj, ins_addr, None /* TODO */).ok().flatten()
{
debug_assert_eq!(ins.field_rA(), 0);
self.gpr[ins.field_rD()].set_hi(GprValue::Address(target), ins_addr);
} else {
self.gpr[ins.field_rA()].value
};
let value = match left {
GprValue::Constant(value) => {
GprValue::Constant(value.wrapping_add((ins.field_simm() as u32) << 16))
let left = if ins.field_rA() == 0 {
GprValue::Constant(0)
} else {
self.gpr[ins.field_rA()].value
};
let value = match left {
GprValue::Constant(value) => {
GprValue::Constant(value.wrapping_add((ins.field_simm() as u32) << 16))
}
_ => GprValue::Unknown,
};
if ins.field_rA() == 0 {
// lis rD, SIMM
self.gpr[ins.field_rD()].set_hi(value, ins_addr);
} else {
self.gpr[ins.field_rD()].set_direct(value);
}
_ => GprValue::Unknown,
};
if ins.field_rA() == 0 {
// lis rD, SIMM
self.gpr[ins.field_rD()].set_hi(value, ins_addr);
} else {
self.gpr[ins.field_rD()].set_direct(value);
}
}
// addi rD, rA, SIMM
// addic rD, rA, SIMM
// addic. rD, rA, SIMM
Opcode::Addi | Opcode::Addic | Opcode::Addic_ => {
let left = if ins.field_rA() == 0 && ins.op == Opcode::Addi {
GprValue::Constant(0)
if let Some(target) =
relocation_target_for(obj, ins_addr, None /* TODO */).ok().flatten()
{
self.gpr[ins.field_rD()].set_lo(
GprValue::Address(target),
ins_addr,
self.gpr[ins.field_rA()],
);
} else {
self.gpr[ins.field_rA()].value
};
let value = match left {
GprValue::Constant(value) => {
GprValue::Constant(value.wrapping_add(ins.field_simm() as u32))
let left = if ins.field_rA() == 0 && ins.op == Opcode::Addi {
GprValue::Constant(0)
} else {
self.gpr[ins.field_rA()].value
};
let value = match left {
GprValue::Constant(value) => {
GprValue::Constant(value.wrapping_add(ins.field_simm() as u32))
}
GprValue::Address(RelocationTarget::Address(address)) => GprValue::Address(
RelocationTarget::Address(address.offset(ins.field_simm() as i32)),
),
_ => GprValue::Unknown,
};
if ins.field_rA() == 0 {
// li rD, SIMM
self.gpr[ins.field_rD()].set_direct(value);
} else {
self.gpr[ins.field_rD()].set_lo(value, ins_addr, self.gpr[ins.field_rA()]);
}
_ => GprValue::Unknown,
};
if ins.field_rA() == 0 {
// li rD, SIMM
self.gpr[ins.field_rD()].set_direct(value);
} else {
self.gpr[ins.field_rD()].set_lo(value, ins_addr, self.gpr[ins.field_rA()]);
}
}
// ori rA, rS, UIMM
Opcode::Ori => {
let value = match self.gpr[ins.field_rS()].value {
GprValue::Constant(value) => {
GprValue::Constant(value | ins.field_uimm() as u32)
}
_ => GprValue::Unknown,
};
self.gpr[ins.field_rA()].set_lo(value, ins_addr, self.gpr[ins.field_rS()]);
if let Some(target) =
relocation_target_for(obj, ins_addr, None /* TODO */).ok().flatten()
{
self.gpr[ins.field_rA()].set_lo(
GprValue::Address(target),
ins_addr,
self.gpr[ins.field_rS()],
);
} else {
let value = match self.gpr[ins.field_rS()].value {
GprValue::Constant(value) => {
GprValue::Constant(value | ins.field_uimm() as u32)
}
_ => GprValue::Unknown,
};
self.gpr[ins.field_rA()].set_lo(value, ins_addr, self.gpr[ins.field_rS()]);
}
}
// or rA, rS, rB
Opcode::Or => {
@ -336,20 +376,18 @@ impl VM {
Opcode::Bcctr => {
match self.ctr {
GprValue::Constant(value) => {
// TODO only check valid target?
if let Some(target) = section_address_for(obj, ins_addr, value) {
BranchTarget::Address(target)
} else {
BranchTarget::Unknown
}
},
GprValue::Address(target) => BranchTarget::Address(target),
GprValue::LoadIndexed { address, max_offset }
// FIXME: avoids treating bctrl indirect calls as jump tables
if !ins.field_LK() => {
if let Some(target) = section_address_for(obj, ins_addr, address) {
BranchTarget::JumpTable { address: target, size: max_offset.and_then(|n| n.checked_add(4)) }
} else {
BranchTarget::Unknown
}
BranchTarget::JumpTable { address, size: max_offset.and_then(|n| n.checked_add(4)) }
}
_ => BranchTarget::Unknown,
}
@ -369,7 +407,7 @@ impl VM {
if ins.field_LK() {
return StepResult::Branch(vec![
Branch {
target: BranchTarget::Address(ins_addr + 4),
target: BranchTarget::Address(RelocationTarget::Address(ins_addr + 4)),
link: false,
vm: self.clone_for_return(),
},
@ -386,7 +424,7 @@ impl VM {
let mut branches = vec![
// Branch not taken
Branch {
target: BranchTarget::Address(ins_addr + 4),
target: BranchTarget::Address(RelocationTarget::Address(ins_addr + 4)),
link: false,
vm: self.clone_all(),
},
@ -413,15 +451,20 @@ impl VM {
}
// lwzx rD, rA, rB
Opcode::Lwzx => {
let left = self.gpr[ins.field_rA()].value;
let left = self.gpr[ins.field_rA()].address(obj, ins_addr);
let right = self.gpr[ins.field_rB()].value;
let value = match (left, right) {
(GprValue::Constant(address), GprValue::Range { min: _, max, .. })
(Some(address), GprValue::Range { min: _, max, .. })
if /*min == 0 &&*/ max < u32::MAX - 4 && max & 3 == 0 =>
{
GprValue::LoadIndexed { address, max_offset: NonZeroU32::new(max) }
}
(GprValue::Constant(address), _) => {
(Some(address), GprValue::Range { min: _, max, .. })
if /*min == 0 &&*/ max < u32::MAX - 4 && max & 3 == 0 =>
{
GprValue::LoadIndexed { address, max_offset: NonZeroU32::new(max) }
}
(Some(address), _) => {
GprValue::LoadIndexed { address, max_offset: None }
}
_ => GprValue::Unknown,
@ -452,16 +495,29 @@ impl VM {
op if is_load_store_op(op) => {
let source = ins.field_rA();
let mut result = StepResult::Continue;
if let GprValue::Constant(base) = self.gpr[source].value {
let address = base.wrapping_add(ins.field_simm() as u32);
if let GprValue::Address(target) = self.gpr[source].value {
if is_update_op(op) {
self.gpr[source].set_lo(
GprValue::Constant(address),
GprValue::Address(target),
ins_addr,
self.gpr[source],
);
}
result = StepResult::LoadStore {
address: target,
source: self.gpr[source],
source_reg: source as u8,
};
} else if let GprValue::Constant(base) = self.gpr[source].value {
let address = base.wrapping_add(ins.field_simm() as u32);
if let Some(target) = section_address_for(obj, ins_addr, address) {
if is_update_op(op) {
self.gpr[source].set_lo(
GprValue::Address(target),
ins_addr,
self.gpr[source],
);
}
result = StepResult::LoadStore {
address: target,
source: self.gpr[source],
@ -573,11 +629,21 @@ fn split_values_by_crb(crb: u8, left: GprValue, right: GprValue) -> (GprValue, G
#[inline]
fn mask_value(begin: u32, end: u32) -> u32 {
let mut mask = 0u32;
for bit in begin..=end {
mask |= 1 << (31 - bit);
if begin <= end {
let mut mask = 0u32;
for bit in begin..=end {
mask |= 1 << (31 - bit);
}
mask
} else if begin == end + 1 {
u32::MAX
} else {
let mut mask = u32::MAX;
for bit in end + 1..begin {
mask &= !(1 << (31 - bit));
}
mask
}
mask
}
#[inline]

View File

@ -5,7 +5,7 @@ use std::{
ffi::OsStr,
fs,
fs::DirBuilder,
io::Write,
io::{Cursor, Write},
mem::take,
path::{Path, PathBuf},
time::Instant,
@ -31,8 +31,9 @@ use crate::{
},
cmd::shasum::file_sha1_string,
obj::{
ObjDataKind, ObjInfo, ObjReloc, ObjRelocKind, ObjSectionKind, ObjSymbol, ObjSymbolFlagSet,
ObjSymbolFlags, ObjSymbolKind, ObjSymbolScope, SymbolIndex,
best_match_for_reloc, ObjDataKind, ObjInfo, ObjKind, ObjReloc, ObjRelocKind,
ObjSectionKind, ObjSymbol, ObjSymbolFlagSet, ObjSymbolFlags, ObjSymbolKind, ObjSymbolScope,
SymbolIndex,
},
util::{
asm::write_asm,
@ -44,10 +45,7 @@ use crate::{
dep::DepFile,
dol::process_dol,
elf::{process_elf, write_elf},
file::{
buf_reader, buf_writer, decompress_if_needed, map_file, touch, verify_hash,
FileIterator, Reader,
},
file::{buf_reader, buf_writer, map_file, touch, verify_hash, FileIterator},
lcf::{asm_path_for_unit, generate_ldscript, obj_path_for_unit},
map::apply_map_file,
rel::{process_rel, process_rel_header},
@ -272,6 +270,7 @@ pub struct OutputModule {
pub module_id: u32,
#[serde(with = "path_slash_serde")]
pub ldscript: PathBuf,
pub entry: Option<String>,
pub units: Vec<OutputUnit>,
}
@ -293,7 +292,7 @@ pub fn run(args: Args) -> Result<()> {
}
fn apply_selfile(obj: &mut ObjInfo, buf: &[u8]) -> Result<()> {
let rso = process_rso(&mut Reader::new(buf))?;
let rso = process_rso(&mut Cursor::new(buf))?;
for symbol in rso.symbols.iter() {
let dol_section_index = match symbol.section {
Some(section) => section,
@ -373,25 +372,26 @@ fn apply_selfile(obj: &mut ObjInfo, buf: &[u8]) -> Result<()> {
fn info(args: InfoArgs) -> Result<()> {
let mut obj = {
let file = map_file(&args.dol_file)?;
let data = decompress_if_needed(file.as_slice())?;
process_dol(data.as_ref(), "")?
process_dol(file.as_slice(), "")?
};
apply_signatures(&mut obj)?;
let mut state = AnalyzerState::default();
FindSaveRestSleds::execute(&mut state, &obj)?;
state.detect_functions(&obj)?;
log::debug!("Discovered {} functions", state.function_slices.len());
log::debug!(
"Discovered {} functions",
state.functions.iter().filter(|(_, i)| i.end.is_some()).count()
);
FindTRKInterruptVectorTable::execute(&mut state, &obj)?;
FindSaveRestSleds::execute(&mut state, &obj)?;
state.apply(&mut obj)?;
apply_signatures_post(&mut obj)?;
if let Some(selfile) = &args.selfile {
let file = map_file(selfile)?;
let data = decompress_if_needed(file.as_slice())?;
apply_selfile(&mut obj, data.as_ref())?;
apply_selfile(&mut obj, file.as_slice())?;
}
println!("{}:", obj.name);
@ -450,19 +450,31 @@ fn update_symbols(obj: &mut ObjInfo, modules: &ModuleMap<'_>, create_symbols: bo
{
if source_module_id == obj.module_id {
// Skip if already resolved
let (_, source_section) = obj
.sections
.get_elf_index(rel_reloc.section as usize)
.ok_or_else(|| anyhow!("Failed to locate REL section {}", rel_reloc.section))?;
let (_, source_section) =
obj.sections.get_elf_index(rel_reloc.section as usize).ok_or_else(|| {
anyhow!(
"Failed to locate REL section {} in module ID {}: source module {}, {:?}",
rel_reloc.section,
obj.module_id,
source_module_id,
rel_reloc
)
})?;
if source_section.relocations.contains(rel_reloc.address) {
continue;
}
}
let (target_section_index, target_section) = obj
.sections
.get_elf_index(rel_reloc.target_section as usize)
.ok_or_else(|| anyhow!("Failed to locate REL section {}", rel_reloc.target_section))?;
let (target_section_index, target_section) =
obj.sections.get_elf_index(rel_reloc.target_section as usize).ok_or_else(|| {
anyhow!(
"Failed to locate REL section {} in module ID {}: source module {}, {:?}",
rel_reloc.target_section,
obj.module_id,
source_module_id,
rel_reloc
)
})?;
if let Some((symbol_index, symbol)) = obj.symbols.for_relocation(
SectionAddress::new(target_section_index, rel_reloc.addend),
@ -517,10 +529,15 @@ fn create_relocations(obj: &mut ObjInfo, modules: &ModuleMap<'_>, dol_obj: &ObjI
// Resolve all relocations in this module
for rel_reloc in take(&mut obj.unresolved_relocations) {
// Skip if already resolved
let (_, source_section) = obj
.sections
.get_elf_index(rel_reloc.section as usize)
.ok_or_else(|| anyhow!("Failed to locate REL section {}", rel_reloc.section))?;
let (_, source_section) =
obj.sections.get_elf_index(rel_reloc.section as usize).ok_or_else(|| {
anyhow!(
"Failed to locate REL section {} in module ID {}: {:?}",
rel_reloc.section,
obj.module_id,
rel_reloc
)
})?;
if source_section.relocations.contains(rel_reloc.address) {
continue;
}
@ -575,10 +592,8 @@ fn create_relocations(obj: &mut ObjInfo, modules: &ModuleMap<'_>, dol_obj: &ObjI
Some(rel_reloc.module_id)
},
};
let (_, source_section) = obj
.sections
.get_elf_index_mut(rel_reloc.section as usize)
.ok_or_else(|| anyhow!("Failed to locate REL section {}", rel_reloc.section))?;
let (_, source_section) =
obj.sections.get_elf_index_mut(rel_reloc.section as usize).unwrap();
source_section.relocations.insert(rel_reloc.address, reloc)?;
}
@ -653,11 +668,10 @@ fn load_analyze_dol(config: &ProjectConfig) -> Result<AnalyzeResult> {
log::debug!("Loading {}", config.base.object.display());
let mut obj = {
let file = map_file(&config.base.object)?;
let data = decompress_if_needed(file.as_slice())?;
if let Some(hash_str) = &config.base.hash {
verify_hash(data.as_ref(), hash_str)?;
verify_hash(file.as_slice(), hash_str)?;
}
process_dol(data.as_ref(), config.base.name().as_ref())?
process_dol(file.as_slice(), config.base.name().as_ref())?
};
let mut dep = vec![config.base.object.clone()];
@ -688,10 +702,9 @@ fn load_analyze_dol(config: &ProjectConfig) -> Result<AnalyzeResult> {
if !config.quick_analysis {
let mut state = AnalyzerState::default();
debug!("Detecting function boundaries");
state.detect_functions(&obj)?;
FindTRKInterruptVectorTable::execute(&mut state, &obj)?;
FindSaveRestSleds::execute(&mut state, &obj)?;
state.detect_functions(&obj)?;
FindTRKInterruptVectorTable::execute(&mut state, &obj)?;
state.apply(&mut obj)?;
}
@ -701,11 +714,10 @@ fn load_analyze_dol(config: &ProjectConfig) -> Result<AnalyzeResult> {
if let Some(selfile) = &config.selfile {
log::info!("Loading {}", selfile.display());
let file = map_file(selfile)?;
let data = decompress_if_needed(file.as_slice())?;
if let Some(hash) = &config.selfile_hash {
verify_hash(data.as_ref(), hash)?;
verify_hash(file.as_slice(), hash)?;
}
apply_selfile(&mut obj, data.as_ref())?;
apply_selfile(&mut obj, file.as_slice())?;
dep.push(selfile.clone());
}
@ -761,11 +773,21 @@ fn split_write_obj(
debug!("Writing object files");
let obj_dir = out_dir.join("obj");
let entry = if obj.kind == ObjKind::Executable {
obj.entry.and_then(|e| {
let (section_index, _) = obj.sections.at_address(e as u32).ok()?;
let symbols = obj.symbols.at_section_address(section_index, e as u32).collect_vec();
best_match_for_reloc(symbols, ObjRelocKind::PpcRel24).map(|(_, s)| s.name.clone())
})
} else {
obj.symbols.by_name("_prolog")?.map(|(_, s)| s.name.clone())
};
let mut out_config = OutputModule {
name: module_config.name().to_string(),
module_id: obj.module_id,
ldscript: out_dir.join("ldscript.lcf"),
units: Vec::with_capacity(split_objs.len()),
entry,
};
for (unit, split_obj) in obj.link_order.iter().zip(&split_objs) {
let out_obj = write_elf(split_obj)?;
@ -802,13 +824,12 @@ fn split_write_obj(
fn load_analyze_rel(config: &ProjectConfig, module_config: &ModuleConfig) -> Result<AnalyzeResult> {
debug!("Loading {}", module_config.object.display());
let map = map_file(&module_config.object)?;
let buf = decompress_if_needed(map.as_slice())?;
let file = map_file(&module_config.object)?;
if let Some(hash_str) = &module_config.hash {
verify_hash(buf.as_ref(), hash_str)?;
verify_hash(file.as_slice(), hash_str)?;
}
let (_, mut module_obj) =
process_rel(&mut Reader::new(buf.as_ref()), module_config.name().as_ref())?;
process_rel(&mut Cursor::new(file.as_slice()), module_config.name().as_ref())?;
if let Some(comment_version) = config.mw_comment_version {
module_obj.mw_comment = Some(MWComment::new(comment_version)?);
@ -863,11 +884,10 @@ fn split(args: SplitArgs) -> Result<()> {
for module_config in config.modules.iter_mut() {
let file = map_file(&module_config.object)?;
let buf = decompress_if_needed(file.as_slice())?;
if let Some(hash_str) = &module_config.hash {
verify_hash(buf.as_ref(), hash_str)?;
verify_hash(file.as_slice(), hash_str)?;
} else {
module_config.hash = Some(file_sha1_string(&mut Reader::new(buf.as_ref()))?);
module_config.hash = Some(file_sha1_string(&mut file.as_reader())?);
}
}
@ -1080,21 +1100,26 @@ fn validate<P: AsRef<Path>>(obj: &ObjInfo, elf_file: P, state: &AnalyzerState) -
for (_symbol_idx, symbol) in real_obj.symbols.for_section(section_index) {
let symbol_addr = SectionAddress::new(section_index, symbol.address as u32);
real_functions.insert(symbol_addr, symbol.name.clone());
match state.function_bounds.get(&symbol_addr) {
Some(&Some(end)) => {
if symbol.size > 0 && end != (symbol_addr + symbol.size as u32) {
match state.functions.get(&symbol_addr) {
Some(info) => {
if let Some(end) = info.end {
if symbol.size > 0 && end != (symbol_addr + symbol.size as u32) {
log::warn!(
"Function {:#010X} ({}) ends at {:#010X}, expected {:#010X}",
symbol.address,
symbol.name,
end,
symbol.address + symbol.size
);
}
} else {
log::warn!(
"Function {:#010X} ({}) ends at {:#010X}, expected {:#010X}",
"Function {:#010X} ({}) has no end",
symbol.address,
symbol.name,
end,
symbol.address + symbol.size
symbol.name
);
}
}
Some(_) => {
log::warn!("Function {:#010X} ({}) has no end", symbol.address, symbol.name);
}
None => {
log::warn!(
"Function {:#010X} ({}) not discovered!",
@ -1105,8 +1130,8 @@ fn validate<P: AsRef<Path>>(obj: &ObjInfo, elf_file: P, state: &AnalyzerState) -
}
}
}
for (&start, &end) in &state.function_bounds {
let Some(end) = end else {
for (&start, info) in &state.functions {
let Some(end) = info.end else {
continue;
};
if !real_functions.contains_key(&start) {
@ -1206,11 +1231,10 @@ fn diff(args: DiffArgs) -> Result<()> {
log::info!("Loading {}", config.base.object.display());
let mut obj = {
let file = map_file(&config.base.object)?;
let data = decompress_if_needed(file.as_slice())?;
if let Some(hash_str) = &config.base.hash {
verify_hash(data.as_ref(), hash_str)?;
verify_hash(file.as_slice(), hash_str)?;
}
process_dol(data.as_ref(), config.base.name().as_ref())?
process_dol(file.as_slice(), config.base.name().as_ref())?
};
if let Some(symbols_path) = &config.base.symbols {
@ -1353,11 +1377,10 @@ fn apply(args: ApplyArgs) -> Result<()> {
log::info!("Loading {}", config.base.object.display());
let mut obj = {
let file = map_file(&config.base.object)?;
let data = decompress_if_needed(file.as_slice())?;
if let Some(hash_str) = &config.base.hash {
verify_hash(data.as_ref(), hash_str)?;
verify_hash(file.as_slice(), hash_str)?;
}
process_dol(data.as_ref(), config.base.name().as_ref())?
process_dol(file.as_slice(), config.base.name().as_ref())?
};
if let Some(symbols_path) = &config.base.symbols {

View File

@ -2,7 +2,7 @@ use std::{
collections::{btree_map, hash_map, BTreeMap, HashMap},
fs,
fs::DirBuilder,
io::Write,
io::{Cursor, Write},
path::PathBuf,
};
@ -22,7 +22,7 @@ use crate::{
comment::{read_comment_sym, MWComment},
config::{write_splits_file, write_symbols_file},
elf::{process_elf, write_elf},
file::{buf_writer, process_rsp, Reader},
file::{buf_writer, process_rsp},
signatures::{compare_signature, generate_signature, FunctionSignature},
split::split_obj,
IntoCow, ToCow,
@ -544,7 +544,7 @@ fn info(args: InfoArgs) -> Result<()> {
if let Some(comment_section) = in_file.section_by_name(".comment") {
let data = comment_section.uncompressed_data()?;
if !data.is_empty() {
let mut reader = Reader::new(&*data);
let mut reader = Cursor::new(&*data);
let header =
MWComment::parse_header(&mut reader).context("While reading .comment section")?;
println!("\nMetrowerks metadata (.comment):");

View File

@ -6,6 +6,7 @@ pub mod elf;
pub mod elf2dol;
pub mod map;
pub mod metroidbuildinfo;
pub mod nlzss;
pub mod rarc;
pub mod rel;
pub mod rso;

63
src/cmd/nlzss.rs Normal file
View File

@ -0,0 +1,63 @@
use std::{fs, path::PathBuf};
use anyhow::{anyhow, Context, Result};
use argp::FromArgs;
use crate::util::{
file::{open_file, process_rsp},
IntoCow, ToCow,
};
#[derive(FromArgs, PartialEq, Debug)]
/// Commands for processing NLZSS-compressed files.
#[argp(subcommand, name = "nlzss")]
pub struct Args {
#[argp(subcommand)]
command: SubCommand,
}
#[derive(FromArgs, PartialEq, Debug)]
#[argp(subcommand)]
enum SubCommand {
Decompress(DecompressArgs),
}
#[derive(FromArgs, PartialEq, Eq, Debug)]
/// Decompresses NLZSS-compressed files.
#[argp(subcommand, name = "decompress")]
pub struct DecompressArgs {
#[argp(positional)]
/// NLZSS-compressed file(s)
files: Vec<PathBuf>,
#[argp(option, short = 'o')]
/// Output file (or directory, if multiple files are specified).
/// If not specified, decompresses in-place.
output: Option<PathBuf>,
}
pub fn run(args: Args) -> Result<()> {
match args.command {
SubCommand::Decompress(args) => decompress(args),
}
}
fn decompress(args: DecompressArgs) -> Result<()> {
let files = process_rsp(&args.files)?;
let single_file = files.len() == 1;
for path in files {
let data = nintendo_lz::decompress(&mut open_file(&path)?)
.map_err(|e| anyhow!("Failed to decompress '{}' with NLZSS: {}", path.display(), e))?;
let out_path = if let Some(output) = &args.output {
if single_file {
output.as_path().to_cow()
} else {
output.join(path.file_name().unwrap()).into_cow()
}
} else {
path.as_path().to_cow()
};
fs::write(out_path.as_ref(), data)
.with_context(|| format!("Failed to write '{}'", out_path.display()))?;
}
Ok(())
}

View File

@ -1,8 +1,7 @@
use std::{
collections::{btree_map, BTreeMap},
ffi::OsStr,
fs,
io::Write,
io::{ Write},
path::PathBuf,
time::Instant,
};
@ -10,7 +9,8 @@ use std::{
use anyhow::{anyhow, bail, ensure, Context, Result};
use argp::FromArgs;
use object::{
Architecture, Endianness, Object, ObjectSection, ObjectSymbol, RelocationTarget, SymbolIndex,
Architecture, Endianness, File, Object, ObjectSection, ObjectSymbol, RelocationTarget,
SectionIndex, SymbolIndex,
};
use rayon::prelude::*;
use rustc_hash::FxHashMap;
@ -27,18 +27,18 @@ use crate::{
tracker::Tracker,
},
array_ref_mut,
cmd::dol::ProjectConfig,
cmd::dol::{ModuleConfig, ProjectConfig},
obj::{ObjInfo, ObjReloc, ObjRelocKind, ObjSection, ObjSectionKind, ObjSymbol},
util::{
config::is_auto_symbol,
dol::process_dol,
elf::{to_obj_reloc_kind, write_elf},
file::{
buf_reader, buf_writer, decompress_if_needed, map_file, process_rsp, verify_hash,
FileIterator, Reader,
},
file::{buf_reader, buf_writer, map_file, process_rsp, verify_hash, FileIterator},
nested::NestedMap,
rel::{process_rel, process_rel_header, write_rel, RelHeader, RelReloc, RelWriteInfo},
rel::{
process_rel, process_rel_header, process_rel_sections, write_rel, RelHeader, RelReloc,
RelSectionHeader, RelWriteInfo, PERMITTED_SECTIONS,
},
IntoCow, ToCow,
},
};
@ -106,8 +106,8 @@ pub fn run(args: Args) -> Result<()> {
}
}
fn load_obj(buf: &[u8]) -> Result<object::File> {
let obj = object::read::File::parse(buf)?;
fn load_obj(buf: &[u8]) -> Result<File> {
let obj = File::parse(buf)?;
match obj.architecture() {
Architecture::PowerPc => {}
arch => bail!("Unexpected architecture: {arch:?}"),
@ -116,21 +116,134 @@ fn load_obj(buf: &[u8]) -> Result<object::File> {
Ok(obj)
}
/// Attempt to match the section index from the ELF to the original REL.
/// Our built ELFs may be missing sections that were present in the original RELs.
fn match_section_index(
obj: &File,
section_index: SectionIndex,
rel_sections: &[RelSectionHeader],
) -> Result<usize> {
let (_, _) = (obj, rel_sections);
Ok(section_index.0)
// TODO
// rel_sections
// .iter()
// .enumerate()
// .filter(|(_, s)| s.size() > 0)
// .zip(obj.sections().filter(|s| s.size() > 0))
// .find_map(
// |((rel_section_index, _), obj_section)| {
// if obj_section.index() == section_index {
// Some(rel_section_index)
// } else {
// None
// }
// },
// )
// .ok_or_else(|| {
// anyhow!(
// "Failed to find matching section index for {} ({}), REL section count: {}",
// obj.section_by_index(section_index)
// .ok()
// .and_then(|s| s.name().ok().map(|s| s.to_string()))
// .unwrap_or("[invalid]".to_string()),
// section_index.0,
// rel_sections.len()
// )
// })
}
fn load_rel(module_config: &ModuleConfig) -> Result<(RelHeader, Vec<RelSectionHeader>)> {
let file = map_file(&module_config.object)?;
if let Some(hash_str) = &module_config.hash {
verify_hash(file.as_slice(), hash_str)?;
}
let mut reader = file.as_reader();
let header = process_rel_header(&mut reader)?;
let sections = process_rel_sections(&mut reader, &header)?;
Ok((header, sections))
}
fn resolve_relocations(
module: &File,
existing_headers: &BTreeMap<u32, (RelHeader, Vec<RelSectionHeader>)>,
module_id: usize,
symbol_map: &FxHashMap<&[u8], (usize, SymbolIndex)>,
modules: &[(File, PathBuf)],
relocations: &mut Vec<RelReloc>,
) -> Result<usize> {
let mut resolved = 0usize;
for section in module.sections() {
if !matches!(section.name(), Ok(name) if PERMITTED_SECTIONS.contains(&name)) {
continue;
}
let section_index = if let Some((_, sections)) = existing_headers.get(&(module_id as u32)) {
match_section_index(module, section.index(), sections)?
} else {
section.index().0
} as u8;
for (address, reloc) in section.relocations() {
let reloc_target = match reloc.target() {
RelocationTarget::Symbol(idx) => {
module.symbol_by_index(idx).with_context(|| {
format!("Relocation against invalid symbol index {}", idx.0)
})?
}
reloc_target => bail!("Unsupported relocation target: {reloc_target:?}"),
};
let (target_module_id, target_symbol) = if reloc_target.is_undefined() {
resolved += 1;
symbol_map
.get(reloc_target.name_bytes()?)
.map(|&(module_id, symbol_idx)| {
(module_id, modules[module_id].0.symbol_by_index(symbol_idx).unwrap())
})
.ok_or_else(|| {
anyhow!(
"Failed to find symbol {} in any module",
reloc_target.name().unwrap_or("[invalid]")
)
})?
} else {
(module_id, reloc_target)
};
let target_section_index = target_symbol.section_index().unwrap();
let target_section = if let Some((_, sections)) =
existing_headers.get(&(target_module_id as u32))
{
match_section_index(&modules[target_module_id].0, target_section_index, sections)?
} else {
target_section_index.0
} as u8;
relocations.push(RelReloc {
kind: to_obj_reloc_kind(reloc.kind())?,
section: section_index,
address: address as u32,
module_id: target_module_id as u32,
target_section,
addend: (target_symbol.address() as i64 + reloc.addend()) as u32,
// Extra
original_section: section.index().0 as u8,
original_target_section: target_section_index.0 as u8,
});
}
}
Ok(resolved)
}
fn make(args: MakeArgs) -> Result<()> {
let total = Instant::now();
// Load existing REL headers (if specified)
let mut existing_headers = BTreeMap::<u32, RelHeader>::new();
let mut existing_headers = BTreeMap::<u32, (RelHeader, Vec<RelSectionHeader>)>::new();
if let Some(config_path) = &args.config {
let config: ProjectConfig = serde_yaml::from_reader(&mut buf_reader(config_path)?)?;
for module_config in &config.modules {
let map = map_file(&module_config.object)?;
let buf = decompress_if_needed(map.as_slice())?;
if let Some(hash_str) = &module_config.hash {
verify_hash(buf.as_ref(), hash_str)?;
}
let header = process_rel_header(&mut Reader::new(buf.as_ref()))?;
existing_headers.insert(header.module_id, header);
let _span = info_span!("module", name = %module_config.name()).entered();
let (header, sections) = load_rel(module_config).with_context(|| {
format!("While loading REL '{}'", module_config.object.display())
})?;
existing_headers.insert(header.module_id, (header, sections));
}
}
@ -144,6 +257,7 @@ fn make(args: MakeArgs) -> Result<()> {
.zip(&paths)
.map(|(file, path)| {
load_obj(file.as_slice())
.map(|o| (o, path.clone()))
.with_context(|| format!("Failed to load '{}'", path.display()))
})
.collect::<Result<Vec<_>>>()?;
@ -151,7 +265,8 @@ fn make(args: MakeArgs) -> Result<()> {
// Create symbol map
let start = Instant::now();
let mut symbol_map = FxHashMap::<&[u8], (usize, SymbolIndex)>::default();
for (module_id, module) in modules.iter().enumerate() {
for (module_id, (module, path)) in modules.iter().enumerate() {
let _span = info_span!("file", path = %path.display()).entered();
for symbol in module.symbols() {
if symbol.is_definition() && symbol.scope() == object::SymbolScope::Dynamic {
symbol_map.entry(symbol.name_bytes()?).or_insert((module_id, symbol.index()));
@ -163,45 +278,19 @@ fn make(args: MakeArgs) -> Result<()> {
let mut resolved = 0usize;
let mut relocations = Vec::<Vec<RelReloc>>::with_capacity(modules.len() - 1);
relocations.resize_with(modules.len() - 1, Vec::new);
for ((module_id, module), relocations) in
for ((module_id, (module, path)), relocations) in
modules.iter().enumerate().skip(1).zip(&mut relocations)
{
for section in module.sections() {
for (address, reloc) in section.relocations() {
let reloc_target = match reloc.target() {
RelocationTarget::Symbol(idx) => {
module.symbol_by_index(idx).with_context(|| {
format!("Relocation against invalid symbol index {}", idx.0)
})?
}
reloc_target => bail!("Unsupported relocation target: {reloc_target:?}"),
};
let (target_module_id, target_symbol) = if reloc_target.is_undefined() {
resolved += 1;
symbol_map
.get(reloc_target.name_bytes()?)
.map(|&(module_id, symbol_idx)| {
(module_id, modules[module_id].symbol_by_index(symbol_idx).unwrap())
})
.ok_or_else(|| {
anyhow!(
"Failed to find symbol {} in any module",
reloc_target.name().unwrap_or("[invalid]")
)
})?
} else {
(module_id, reloc_target)
};
relocations.push(RelReloc {
kind: to_obj_reloc_kind(reloc.kind())?,
section: section.index().0 as u8,
address: address as u32,
module_id: target_module_id as u32,
target_section: target_symbol.section_index().unwrap().0 as u8,
addend: (target_symbol.address() as i64 + reloc.addend()) as u32,
});
}
}
let _span = info_span!("file", path = %path.display()).entered();
resolved += resolve_relocations(
module,
&existing_headers,
module_id,
&symbol_map,
&modules,
relocations,
)
.with_context(|| format!("While resolving relocations in '{}'", path.display()))?;
}
let duration = start.elapsed();
@ -214,12 +303,10 @@ fn make(args: MakeArgs) -> Result<()> {
// Write RELs
let start = Instant::now();
for (((module_id, module), path), relocations) in
modules.iter().enumerate().zip(&paths).skip(1).zip(relocations)
for ((module_id, (module, path)), relocations) in
modules.iter().enumerate().skip(1).zip(relocations)
{
let name =
path.file_stem().unwrap_or(OsStr::new("[unknown]")).to_str().unwrap_or("[invalid]");
let _span = info_span!("module", name = %name).entered();
let _span = info_span!("file", path = %path.display()).entered();
let mut info = RelWriteInfo {
module_id: module_id as u32,
version: 3,
@ -230,13 +317,13 @@ fn make(args: MakeArgs) -> Result<()> {
section_count: None,
quiet: args.no_warn,
};
if let Some(existing_module) = existing_headers.get(&(module_id as u32)) {
info.version = existing_module.version;
info.name_offset = Some(existing_module.name_offset);
info.name_size = Some(existing_module.name_size);
info.align = existing_module.align;
info.bss_align = existing_module.bss_align;
info.section_count = Some(existing_module.num_sections as usize);
if let Some((header, _)) = existing_headers.get(&(module_id as u32)) {
info.version = header.version;
info.name_offset = Some(header.name_offset);
info.name_size = Some(header.name_size);
info.align = header.align;
info.bss_align = header.bss_align;
info.section_count = Some(header.num_sections as usize);
}
let rel_path = path.with_extension("rel");
let mut w = buf_writer(&rel_path)?;
@ -254,8 +341,7 @@ fn make(args: MakeArgs) -> Result<()> {
fn info(args: InfoArgs) -> Result<()> {
let file = map_file(args.rel_file)?;
let buf = decompress_if_needed(file.as_slice())?;
let (header, mut module_obj) = process_rel(&mut Reader::new(buf.as_ref()), "")?;
let (header, mut module_obj) = process_rel(&mut file.as_reader(), "")?;
let mut state = AnalyzerState::default();
state.detect_functions(&module_obj)?;
@ -319,9 +405,8 @@ fn merge(args: MergeArgs) -> Result<()> {
log::info!("Loading {}", args.dol_file.display());
let mut obj = {
let file = map_file(&args.dol_file)?;
let buf = decompress_if_needed(file.as_slice())?;
let name = args.dol_file.file_stem().map(|s| s.to_string_lossy()).unwrap_or_default();
process_dol(buf.as_ref(), name.as_ref())?
process_dol(file.as_slice(), name.as_ref())?
};
log::info!("Performing signature analysis");
@ -434,11 +519,14 @@ fn merge(args: MergeArgs) -> Result<()> {
log::info!("Detecting function boundaries");
let mut state = AnalyzerState::default();
FindSaveRestSleds::execute(&mut state, &obj)?;
state.detect_functions(&obj)?;
log::info!("Discovered {} functions", state.function_slices.len());
log::info!(
"Discovered {} functions",
state.functions.iter().filter(|(_, i)| i.is_function()).count()
);
FindTRKInterruptVectorTable::execute(&mut state, &obj)?;
FindSaveRestSleds::execute(&mut state, &obj)?;
state.apply(&mut obj)?;
apply_signatures_post(&mut obj)?;

View File

@ -1,12 +1,9 @@
use std::path::PathBuf;
use anyhow::{Context, Result};
use anyhow::Result;
use argp::FromArgs;
use crate::util::{
file::{decompress_if_needed, map_file, Reader},
rso::process_rso,
};
use crate::util::{file::map_file, rso::process_rso};
#[derive(FromArgs, PartialEq, Debug)]
/// Commands for processing RSO files.
@ -39,10 +36,10 @@ pub fn run(args: Args) -> Result<()> {
fn info(args: InfoArgs) -> Result<()> {
let rso = {
let file = map_file(&args.rso_file)?;
let data = decompress_if_needed(file.as_slice())
.with_context(|| format!("Failed to decompress '{}'", args.rso_file.display()))?;
process_rso(&mut Reader::new(data.as_ref()))?
let file = map_file(args.rso_file)?;
let obj = process_rso(&mut file.as_reader())?;
#[allow(clippy::let_and_return)]
obj
};
println!("Read RSO module {}", rso.name);
Ok(())

View File

@ -30,7 +30,8 @@ pub struct DecompressArgs {
/// YAZ0-compressed files
files: Vec<PathBuf>,
#[argp(option, short = 'o')]
/// Output directory. If not specified, decompresses in-place.
/// Output file (or directory, if multiple files are specified).
/// If not specified, decompresses in-place.
output: Option<PathBuf>,
}
@ -41,10 +42,16 @@ pub fn run(args: Args) -> Result<()> {
}
fn decompress(args: DecompressArgs) -> Result<()> {
for path in process_rsp(&args.files)? {
let files = process_rsp(&args.files)?;
let single_file = files.len() == 1;
for path in files {
let data = decompress_reader(&mut open_file(&path)?)?;
let out_path = if let Some(output) = &args.output {
output.join(path.file_name().unwrap()).into_cow()
if single_file {
output.as_path().to_cow()
} else {
output.join(path.file_name().unwrap()).into_cow()
}
} else {
path.as_path().to_cow()
};

View File

@ -82,6 +82,7 @@ enum SubCommand {
Elf2Dol(cmd::elf2dol::Args),
// Map(cmd::map::Args),
MetroidBuildInfo(cmd::metroidbuildinfo::Args),
Nlzss(cmd::nlzss::Args),
Rarc(cmd::rarc::Args),
Rel(cmd::rel::Args),
Rso(cmd::rso::Args),
@ -129,6 +130,7 @@ fn main() {
SubCommand::Elf2Dol(c_args) => cmd::elf2dol::run(c_args),
// SubCommand::Map(c_args) => cmd::map::run(c_args),
SubCommand::MetroidBuildInfo(c_args) => cmd::metroidbuildinfo::run(c_args),
SubCommand::Nlzss(c_args) => cmd::nlzss::run(c_args),
SubCommand::Rarc(c_args) => cmd::rarc::run(c_args),
SubCommand::Rel(c_args) => cmd::rel::run(c_args),
SubCommand::Rso(c_args) => cmd::rso::run(c_args),

View File

@ -69,8 +69,8 @@ pub struct ObjInfo {
pub link_order: Vec<ObjUnit>,
pub blocked_ranges: BTreeMap<SectionAddress, u32>, // start -> end
// From extab
pub known_functions: BTreeMap<SectionAddress, u32>,
// From .ctors, .dtors and extab
pub known_functions: BTreeMap<SectionAddress, Option<u32>>,
// REL
/// Module ID (0 for main)

View File

@ -1,9 +1,12 @@
use std::{collections::BTreeMap, ops::RangeBounds};
use std::{cmp::max, collections::BTreeMap, ops::RangeBounds};
use anyhow::{anyhow, Result};
use itertools::Itertools;
use crate::util::nested::NestedVec;
use crate::{
obj::{ObjInfo, ObjSection},
util::{nested::NestedVec, split::default_section_align},
};
/// Marks a split point within a section.
#[derive(Debug, Clone, Eq, PartialEq)]
@ -21,6 +24,30 @@ pub struct ObjSplit {
pub rename: Option<String>,
}
impl ObjSplit {
pub fn alignment(
&self,
obj: &ObjInfo,
section_index: usize,
section: &ObjSection,
split_addr: u32,
) -> u32 {
self.align.unwrap_or_else(|| {
let default_align = default_section_align(section) as u32;
max(
// Maximum alignment of any symbol in this split
obj.symbols
.for_section_range(section_index, split_addr..self.end)
.filter(|&(_, s)| s.size_known && s.size > 0)
.filter_map(|(_, s)| s.align)
.max()
.unwrap_or(default_align),
default_align,
)
})
}
}
/// Splits within a section.
#[derive(Debug, Clone, Default)]
pub struct ObjSplits {
@ -46,6 +73,13 @@ impl ObjSplits {
}
}
pub fn at_mut(&mut self, address: u32) -> Option<&mut ObjSplit> {
match self.for_range_mut(..=address).next_back() {
Some((_, split)) if split.end == 0 || split.end > address => Some(split),
_ => None,
}
}
/// Locate existing splits within the given address range.
pub fn for_range<R>(&self, range: R) -> impl DoubleEndedIterator<Item = (u32, &ObjSplit)>
where R: RangeBounds<u32> {

View File

@ -112,6 +112,15 @@ impl ObjSymbolFlagSet {
self.0 &= !ObjSymbolFlags::ForceActive;
}
}
/// Special flags to keep when merging symbols.
#[inline]
pub fn keep_flags(&self) -> FlagSet<ObjSymbolFlags> {
self.0
& (ObjSymbolFlags::ForceActive
| ObjSymbolFlags::NoWrite
| ObjSymbolFlags::RelocationIgnore)
}
}
#[allow(clippy::derived_hash_with_manual_eq)]
@ -213,8 +222,7 @@ impl ObjSymbols {
let replace = replace || (is_auto_symbol(existing) && !is_auto_symbol(&in_symbol));
let size =
if existing.size_known && in_symbol.size_known && existing.size != in_symbol.size {
// TODO fix and promote back to warning
log::debug!(
log::warn!(
"Conflicting size for {}: was {:#X}, now {:#X}",
existing.name,
existing.size,
@ -248,7 +256,7 @@ impl ObjSymbols {
section: in_symbol.section,
size,
size_known: existing.size_known || in_symbol.size != 0,
flags: in_symbol.flags,
flags: ObjSymbolFlagSet(in_symbol.flags.0 | existing.flags.keep_flags()),
kind: in_symbol.kind,
align: in_symbol.align.or(existing.align),
data_kind: match in_symbol.data_kind {
@ -511,7 +519,8 @@ impl ObjSymbol {
ObjSymbolKind::Unknown => true,
ObjSymbolKind::Function => !matches!(reloc_kind, ObjRelocKind::PpcEmbSda21),
ObjSymbolKind::Object => {
!matches!(reloc_kind, ObjRelocKind::PpcRel14 | ObjRelocKind::PpcRel24)
// !matches!(reloc_kind, ObjRelocKind::PpcRel14 | ObjRelocKind::PpcRel24)
true // SADX has bugged relocations that jump from .text to .bss, how awful
}
ObjSymbolKind::Section => {
matches!(

View File

@ -32,7 +32,7 @@ impl DepFile {
pub fn write<W: Write>(&self, w: &mut W) -> std::io::Result<()> {
write!(w, "{}:", self.name.to_slash_lossy())?;
for dep in self.dependencies.iter().unique() {
write!(w, " \\\n {}", dep.to_slash_lossy())?;
write!(w, " \\\n {}", dep.to_slash_lossy().replace(' ', "\\ "))?;
}
Ok(())
}

View File

@ -1,4 +1,4 @@
use std::collections::BTreeMap;
use std::{collections::BTreeMap, io::Cursor};
use anyhow::{anyhow, bail, ensure, Result};
use dol::{Dol, DolSection, DolSectionType};
@ -9,7 +9,6 @@ use crate::{
ObjArchitecture, ObjInfo, ObjKind, ObjSection, ObjSectionKind, ObjSymbol, ObjSymbolFlagSet,
ObjSymbolFlags, ObjSymbolKind,
},
util::file::Reader,
};
const MAX_TEXT_SECTIONS: usize = 7;
@ -23,7 +22,7 @@ fn read_u32(dol: &Dol, addr: u32) -> Result<u32> {
}
pub fn process_dol(buf: &[u8], name: &str) -> Result<ObjInfo> {
let dol = Dol::read_from(Reader::new(buf))?;
let dol = Dol::read_from(Cursor::new(buf))?;
// Locate _rom_copy_info
let first_rom_section = dol
@ -410,7 +409,9 @@ pub fn process_dol(buf: &[u8], name: &str) -> Result<ObjInfo> {
)
})?;
let addr = SectionAddress::new(section_index, entry.function);
if let Some(old_value) = obj.known_functions.insert(addr, entry.function_size) {
if let Some(Some(old_value)) =
obj.known_functions.insert(addr, Some(entry.function_size))
{
if old_value != entry.function_size {
log::warn!(
"Conflicting sizes for {:#010X}: {:#X} != {:#X}",
@ -465,6 +466,36 @@ pub fn process_dol(buf: &[u8], name: &str) -> Result<ObjInfo> {
}
}
// Add .ctors and .dtors functions to known functions if they exist
for (_, section) in obj.sections.iter() {
if section.size & 3 != 0 {
continue;
}
let mut entries = vec![];
let mut current_addr = section.address as u32;
for chunk in section.data.chunks_exact(4) {
let addr = u32::from_be_bytes(chunk.try_into()?);
if addr == 0 || addr & 3 != 0 {
break;
}
let Ok((section_index, section)) = obj.sections.at_address(addr) else {
break;
};
if section.kind != ObjSectionKind::Code {
break;
}
entries.push(SectionAddress::new(section_index, addr));
current_addr += 4;
}
// .ctors and .dtors end with a null pointer
if current_addr != (section.address + section.size) as u32 - 4
|| section.data_range(current_addr, 0)?.iter().any(|&b| b != 0)
{
continue;
}
obj.known_functions.extend(entries.into_iter().map(|addr| (addr, None)));
}
// Locate _SDA2_BASE_ & _SDA_BASE_
match locate_sda_bases(&mut obj) {
Ok(true) => {

View File

@ -23,9 +23,7 @@ pub struct MappedFile {
}
impl MappedFile {
pub fn new(mmap: Mmap, offset: u64, len: u64) -> Self { Self { mmap, offset, len } }
pub fn as_reader(&self) -> Reader { Reader::new(self.as_slice()) }
pub fn as_reader(&self) -> Cursor<&[u8]> { Cursor::new(self.as_slice()) }
pub fn as_slice(&self) -> &[u8] {
&self.mmap[self.offset as usize..self.offset as usize + self.len as usize]
@ -59,28 +57,47 @@ pub fn split_path<P: AsRef<Path>>(path: P) -> Result<(PathBuf, Option<PathBuf>)>
Ok((base_path, sub_path))
}
/// Opens a memory mapped file.
pub fn map_file<P: AsRef<Path>>(path: P) -> Result<MappedFile> {
let (base_path, sub_path) = split_path(path)?;
/// Opens a memory mapped file, and decompresses it if needed.
pub fn map_file<P: AsRef<Path>>(path: P) -> Result<FileEntry> {
let (base_path, sub_path) = split_path(path.as_ref())?;
let file = File::open(&base_path)
.with_context(|| format!("Failed to open file '{}'", base_path.display()))?;
let mmap = unsafe { MmapOptions::new().map(&file) }
.with_context(|| format!("Failed to mmap file: '{}'", base_path.display()))?;
let (offset, len) = if let Some(sub_path) = sub_path {
let rarc = rarc::RarcReader::new(&mut Reader::new(&*mmap))
.with_context(|| format!("Failed to read RARC '{}'", base_path.display()))?;
let mut reader = Cursor::new(&*mmap);
if sub_path.as_os_str() == OsStr::new("nlzss") {
return Ok(FileEntry::Buffer(nintendo_lz::decompress(&mut reader).map_err(|e| {
anyhow!("Failed to decompress '{}' with NLZSS: {}", path.as_ref().display(), e)
})?));
} else if sub_path.as_os_str() == OsStr::new("yaz0") {
return Ok(FileEntry::Buffer(yaz0::decompress_file(&mut reader).with_context(
|| format!("Failed to decompress '{}' with Yaz0", path.as_ref().display()),
)?));
}
let rarc = rarc::RarcReader::new(&mut reader)
.with_context(|| format!("Failed to open '{}' as RARC archive", base_path.display()))?;
rarc.find_file(&sub_path)?.map(|(o, s)| (o, s as u64)).ok_or_else(|| {
anyhow!("File '{}' not found in '{}'", sub_path.display(), base_path.display())
})?
} else {
(0, mmap.len() as u64)
};
Ok(MappedFile { mmap, offset, len })
let map = MappedFile { mmap, offset, len };
let buf = map.as_slice();
// Auto-detect compression if there's a magic number.
if buf.len() > 4 && buf[0..4] == *b"Yaz0" {
return Ok(FileEntry::Buffer(yaz0::decompress_file(&mut map.as_reader()).with_context(
|| format!("Failed to decompress '{}' with Yaz0", path.as_ref().display()),
)?));
}
Ok(FileEntry::MappedFile(map))
}
pub type OpenedFile = TakeSeek<File>;
/// Opens a file (not memory mapped).
/// Opens a file (not memory mapped). No decompression is performed.
pub fn open_file<P: AsRef<Path>>(path: P) -> Result<OpenedFile> {
let (base_path, sub_path) = split_path(path)?;
let mut file = File::open(&base_path)
@ -98,7 +115,10 @@ pub fn open_file<P: AsRef<Path>>(path: P) -> Result<OpenedFile> {
Ok(file.take_seek(size))
}
pub type Reader<'a> = Cursor<&'a [u8]>;
pub trait Reader: BufRead + Seek {}
impl Reader for Cursor<&[u8]> {}
// impl Reader for &mut OpenedFile {}
/// Creates a buffered reader around a file (not memory mapped).
pub fn buf_reader<P: AsRef<Path>>(path: P) -> Result<BufReader<File>> {
@ -178,7 +198,7 @@ struct RarcIterator {
impl RarcIterator {
pub fn new(file: Mmap, base_path: &Path) -> Result<Self> {
let reader = rarc::RarcReader::new(&mut Reader::new(&*file))?;
let reader = rarc::RarcReader::new(&mut Cursor::new(&*file))?;
let paths = Self::collect_paths(&reader, base_path);
Ok(Self { file, base_path: base_path.to_owned(), paths, index: 0 })
}
@ -233,10 +253,31 @@ pub enum FileEntry {
impl FileEntry {
/// Creates a reader for the file.
pub fn as_reader(&self) -> Reader {
pub fn as_reader(&self) -> Box<dyn Reader + '_> {
match self {
Self::MappedFile(file) => file.as_reader(),
Self::Buffer(slice) => Reader::new(slice),
Self::MappedFile(file) => Box::new(file.as_reader()),
Self::Buffer(slice) => Box::new(Cursor::new(slice.as_slice())),
}
}
pub fn as_slice(&self) -> &[u8] {
match self {
Self::MappedFile(file) => file.as_slice(),
Self::Buffer(slice) => slice.as_slice(),
}
}
pub fn len(&self) -> u64 {
match self {
Self::MappedFile(file) => file.len(),
Self::Buffer(slice) => slice.len() as u64,
}
}
pub fn is_empty(&self) -> bool {
match self {
Self::MappedFile(file) => file.is_empty(),
Self::Buffer(slice) => slice.is_empty(),
}
}
}
@ -279,7 +320,8 @@ impl FileIterator {
let path = self.paths[self.index].clone();
self.index += 1;
match map_file(&path) {
Ok(map) => self.handle_file(map, path),
Ok(FileEntry::MappedFile(map)) => self.handle_file(map, path),
Ok(FileEntry::Buffer(_)) => todo!(),
Err(err) => Some(Err(err)),
}
}
@ -303,7 +345,7 @@ impl FileIterator {
fn handle_yaz0(
&mut self,
mut reader: Reader,
mut reader: Cursor<&[u8]>,
path: PathBuf,
) -> Option<Result<(PathBuf, FileEntry)>> {
Some(match yaz0::decompress_file(&mut reader) {
@ -340,7 +382,7 @@ pub fn touch<P: AsRef<Path>>(path: P) -> std::io::Result<()> {
pub fn decompress_if_needed(buf: &[u8]) -> Result<Cow<[u8]>> {
Ok(if buf.len() > 4 && buf[0..4] == *b"Yaz0" {
yaz0::decompress_file(&mut Reader::new(buf))?.into_cow()
yaz0::decompress_file(&mut Cursor::new(buf))?.into_cow()
} else {
buf.to_cow()
})

View File

@ -78,9 +78,6 @@ pub fn generate_ldscript(obj: &ObjInfo, force_active: &[String]) -> Result<Strin
}
pub fn generate_ldscript_partial(obj: &ObjInfo, force_active: &[String]) -> Result<String> {
let section_defs =
obj.sections.iter().map(|(_, s)| format!("{} :{{}}", s.name)).join("\n ");
let mut force_files = Vec::with_capacity(obj.link_order.len());
for unit in &obj.link_order {
let obj_path = obj_path_for_unit(&unit.name);
@ -96,7 +93,6 @@ pub fn generate_ldscript_partial(obj: &ObjInfo, force_active: &[String]) -> Resu
}
let out = include_str!("../../assets/ldscript_partial.lcf")
.replace("$SECTIONS", &section_defs)
.replace("$FORCEACTIVE", &force_active.join("\n "));
Ok(out)
}

View File

@ -564,7 +564,7 @@ impl StateMachine {
}
}
pub fn process_map<R: BufRead>(reader: &mut R) -> Result<MapInfo> {
pub fn process_map<R: BufRead + ?Sized>(reader: &mut R) -> Result<MapInfo> {
let mut sm = StateMachine {
state: ProcessMapState::None,
result: Default::default(),

View File

@ -110,7 +110,7 @@ struct RelImport {
#[binrw]
#[derive(Copy, Clone, Debug)]
struct RelSectionHeader {
pub struct RelSectionHeader {
offset_and_flags: u32,
size: u32,
}
@ -120,11 +120,11 @@ impl RelSectionHeader {
Self { offset_and_flags: offset | (exec as u32), size }
}
fn offset(&self) -> u32 { self.offset_and_flags & !1 }
pub fn offset(&self) -> u32 { self.offset_and_flags & !1 }
fn size(&self) -> u32 { self.size }
pub fn size(&self) -> u32 { self.size }
fn exec(&self) -> bool { self.offset_and_flags & 1 != 0 }
pub fn exec(&self) -> bool { self.offset_and_flags & 1 != 0 }
}
#[binrw]
@ -140,15 +140,26 @@ pub fn process_rel_header<R: Read + Seek>(reader: &mut R) -> Result<RelHeader> {
RelHeader::read_be(reader).context("Failed to read REL header")
}
pub fn process_rel<R: Read + Seek>(reader: &mut R, name: &str) -> Result<(RelHeader, ObjInfo)> {
let header = process_rel_header(reader)?;
pub fn process_rel_sections<R: Read + Seek>(
reader: &mut R,
header: &RelHeader,
) -> Result<Vec<RelSectionHeader>> {
let mut sections = Vec::with_capacity(header.num_sections as usize);
reader.seek(SeekFrom::Start(header.section_info_offset as u64))?;
let mut found_text = false;
let mut total_bss_size = 0;
for idx in 0..header.num_sections {
let section = RelSectionHeader::read_be(reader)
.with_context(|| format!("Failed to read REL section header {}", idx))?;
sections.push(section);
}
Ok(sections)
}
pub fn process_rel<R: Read + Seek>(reader: &mut R, name: &str) -> Result<(RelHeader, ObjInfo)> {
let header = process_rel_header(reader)?;
let mut sections = Vec::with_capacity(header.num_sections as usize);
let mut text_section = None;
let mut total_bss_size = 0;
for (idx, section) in process_rel_sections(reader, &header)?.iter().enumerate() {
let offset = section.offset();
let size = section.size();
if size == 0 {
@ -173,8 +184,8 @@ pub fn process_rel<R: Read + Seek>(reader: &mut R, name: &str) -> Result<(RelHea
total_bss_size = size;
(".bss".to_string(), ObjSectionKind::Bss, true)
} else if section.exec() {
ensure!(!found_text, "Multiple text sections in REL");
found_text = true;
ensure!(text_section.is_none(), "Multiple text sections in REL");
text_section = Some(idx as u8);
(".text".to_string(), ObjSectionKind::Code, true)
} else {
(format!(".section{}", idx), ObjSectionKind::Data, false)
@ -190,7 +201,7 @@ pub fn process_rel<R: Read + Seek>(reader: &mut R, name: &str) -> Result<(RelHea
_ => header.align,
}
.unwrap_or_default() as u64,
elf_index: idx as usize,
elf_index: idx,
relocations: Default::default(),
original_address: 0,
file_offset: offset as u64,
@ -206,32 +217,37 @@ pub fn process_rel<R: Read + Seek>(reader: &mut R, name: &str) -> Result<(RelHea
);
let mut symbols = Vec::new();
let mut add_symbol = |rel_section_idx: u8, offset: u32, name: &str| -> Result<()> {
if rel_section_idx > 0 {
let (section_index, _) = sections
.iter()
.enumerate()
.find(|&(_, section)| section.elf_index == rel_section_idx as usize)
.ok_or_else(|| anyhow!("Failed to locate {name} section {rel_section_idx}"))?;
log::debug!("Adding {name} section {rel_section_idx} offset {offset:#X}");
symbols.push(ObjSymbol {
name: name.to_string(),
demangled_name: None,
address: offset as u64,
section: Some(section_index),
size: 0,
size_known: false,
flags: ObjSymbolFlagSet(ObjSymbolFlags::Global.into()),
kind: ObjSymbolKind::Function,
align: None,
data_kind: Default::default(),
});
}
Ok(())
};
add_symbol(header.prolog_section, header.prolog_offset, "_prolog")?;
add_symbol(header.epilog_section, header.epilog_offset, "_epilog")?;
add_symbol(header.unresolved_section, header.unresolved_offset, "_unresolved")?;
let mut add_symbol =
|rel_section_idx: u8, offset: u32, name: &str, force_active: bool| -> Result<()> {
if rel_section_idx > 0 {
let (section_index, _) = sections
.iter()
.enumerate()
.find(|&(_, section)| section.elf_index == rel_section_idx as usize)
.ok_or_else(|| anyhow!("Failed to locate {name} section {rel_section_idx}"))?;
log::debug!("Adding {name} section {rel_section_idx} offset {offset:#X}");
let mut flags = ObjSymbolFlagSet(ObjSymbolFlags::Global.into());
if force_active {
flags.set_force_active(true);
}
symbols.push(ObjSymbol {
name: name.to_string(),
demangled_name: None,
address: offset as u64,
section: Some(section_index),
size: 0,
size_known: false,
flags,
kind: ObjSymbolKind::Function,
align: None,
data_kind: Default::default(),
});
}
Ok(())
};
add_symbol(header.prolog_section, header.prolog_offset, "_prolog", true)?;
add_symbol(header.epilog_section, header.epilog_offset, "_epilog", true)?;
add_symbol(header.unresolved_section, header.unresolved_offset, "_unresolved", true)?;
let mut unresolved_relocations = Vec::new();
let mut imp_idx = 0;
@ -303,6 +319,8 @@ pub fn process_rel<R: Read + Seek>(reader: &mut R, name: &str) -> Result<(RelHea
module_id: import.module_id,
target_section: reloc.section,
addend: reloc.addend,
original_section: section,
original_target_section: reloc.section,
};
unresolved_relocations.push(reloc);
}
@ -338,6 +356,10 @@ pub struct RelReloc {
/// Target addend within section.
/// If target module ID is 0 (DOL), this is an absolute address.
pub addend: u32,
// EXTRA for matching
pub original_section: u8,
pub original_target_section: u8,
}
#[inline]
@ -356,13 +378,15 @@ fn apply_relocation(
data: &mut [u8],
module_id: u32,
rel_reloc: &RelReloc,
unresolved: u32,
header: &RelHeader,
) -> Result<()> {
let diff = if rel_reloc.module_id == module_id && rel_reloc.section == rel_reloc.target_section
{
rel_reloc.addend as i32 - rel_reloc.address as i32
} else if header.unresolved_section == rel_reloc.section {
header.unresolved_offset as i32 - rel_reloc.address as i32
} else {
unresolved as i32 - rel_reloc.address as i32
return Ok(());
};
let ins_ref = array_ref_mut!(data, rel_reloc.address as usize, 4);
let mut ins = u32::from_be_bytes(*ins_ref);
@ -404,7 +428,7 @@ pub struct RelWriteInfo {
pub quiet: bool,
}
const PERMITTED_SECTIONS: [&str; 7] =
pub const PERMITTED_SECTIONS: [&str; 7] =
[".init", ".text", ".ctors", ".dtors", ".rodata", ".data", ".bss"];
pub fn should_write_section(section: &object::Section) -> bool {
@ -445,7 +469,7 @@ pub fn write_rel<W: Write>(
let mut apply_relocations = vec![];
relocations.retain(|r| {
if !should_write_section(
&file.section_by_index(object::SectionIndex(r.section as usize)).unwrap(),
&file.section_by_index(object::SectionIndex(r.original_section as usize)).unwrap(),
) {
return false;
}
@ -660,10 +684,10 @@ pub fn write_rel<W: Write>(
let section_index = section.index().0 as u8;
let mut section_data = section.uncompressed_data()?;
if apply_relocations.iter().any(|r| r.section == section_index) {
if apply_relocations.iter().any(|r| r.original_section == section_index) {
let mut data = section_data.into_owned();
for reloc in apply_relocations.iter().filter(|r| r.section == section_index) {
apply_relocation(&mut data, info.module_id, reloc, header.unresolved_offset)?;
for reloc in apply_relocations.iter().filter(|r| r.original_section == section_index) {
apply_relocation(&mut data, info.module_id, reloc, &header)?;
}
section_data = data.into_cow();
}

View File

@ -13,6 +13,7 @@ use crate::{
analysis::{
cfa::SectionAddress,
tracker::{Relocation, Tracker},
RelocationTarget,
},
array_ref,
obj::{
@ -171,16 +172,18 @@ pub fn apply_signature(
None => continue,
};
let target = match (reloc, sig_reloc.kind) {
(&Relocation::Absolute(addr), ObjRelocKind::Absolute)
| (&Relocation::Hi(addr), ObjRelocKind::PpcAddr16Hi)
| (&Relocation::Ha(addr), ObjRelocKind::PpcAddr16Ha)
| (&Relocation::Lo(addr), ObjRelocKind::PpcAddr16Lo)
| (&Relocation::Rel24(addr), ObjRelocKind::PpcRel24)
| (&Relocation::Rel14(addr), ObjRelocKind::PpcRel14)
| (&Relocation::Sda21(addr), ObjRelocKind::PpcEmbSda21) => SectionAddress::new(
addr.section,
(addr.address as i64 - sig_reloc.addend as i64) as u32,
),
(&Relocation::Absolute(RelocationTarget::Address(addr)), ObjRelocKind::Absolute)
| (&Relocation::Hi(RelocationTarget::Address(addr)), ObjRelocKind::PpcAddr16Hi)
| (&Relocation::Ha(RelocationTarget::Address(addr)), ObjRelocKind::PpcAddr16Ha)
| (&Relocation::Lo(RelocationTarget::Address(addr)), ObjRelocKind::PpcAddr16Lo)
| (&Relocation::Rel24(RelocationTarget::Address(addr)), ObjRelocKind::PpcRel24)
| (&Relocation::Rel14(RelocationTarget::Address(addr)), ObjRelocKind::PpcRel14)
| (&Relocation::Sda21(RelocationTarget::Address(addr)), ObjRelocKind::PpcEmbSda21) => {
SectionAddress::new(
addr.section,
(addr.address as i64 - sig_reloc.addend as i64) as u32,
)
}
_ => bail!("Relocation mismatch: {:?} != {:?}", reloc, sig_reloc.kind),
};
let sig_symbol = &signature.symbols[sig_reloc.symbol];

View File

@ -69,7 +69,7 @@ fn split_ctors_dtors(obj: &mut ObjInfo, start: SectionAddress, end: SectionAddre
.section
.and_then(|idx| obj.sections.get(idx).map(|s| s.name.clone()))
.unwrap_or_else(|| "unknown".to_string());
format!("{}_{}", function_symbol.name, section_name.trim_start_matches('.'))
format!("auto_{}_{}", function_symbol.name, section_name.trim_start_matches('.'))
});
log::debug!("Adding splits to unit {}", unit);
@ -250,7 +250,7 @@ fn split_extabindex(obj: &mut ObjInfo, start: SectionAddress) -> Result<()> {
.section
.and_then(|idx| obj.sections.get(idx).map(|s| s.name.clone()))
.unwrap_or_else(|| "unknown".to_string());
format!("{}_{}", function_symbol.name, section_name.trim_start_matches('.'))
format!("auto_{}_{}", function_symbol.name, section_name.trim_start_matches('.'))
});
log::debug!("Adding splits to unit {}", unit);
@ -333,12 +333,20 @@ fn create_gap_splits(obj: &mut ObjInfo) -> Result<()> {
break;
}
let (split_start, split_end) = match file_iter.peek() {
let (split_start, split_end, split_align) = match file_iter.peek() {
Some(&(addr, split)) => {
log::debug!("Found split {} ({:#010X}..{:#010X})", split.unit, addr, split.end);
(addr, SectionAddress::new(section_index, split.end))
(
addr,
SectionAddress::new(section_index, split.end),
split.alignment(obj, section_index, section, addr.address),
)
}
None => (section_end, SectionAddress::new(section_index, 0)),
None => (
section_end,
SectionAddress::new(section_index, 0),
default_section_align(section) as u32,
),
};
ensure!(
split_start >= current_address,
@ -347,7 +355,15 @@ fn create_gap_splits(obj: &mut ObjInfo) -> Result<()> {
split_end
);
if split_start > current_address {
let aligned_addr = current_address.align_up(split_align);
if split_start > aligned_addr {
log::debug!(
"Creating auto split: {} > {} (orig: {}, align {})",
split_start,
aligned_addr,
current_address,
split_align
);
// Find any duplicate symbols in this range
let mut new_split_end = split_start;
let symbols = obj
@ -355,7 +371,7 @@ fn create_gap_splits(obj: &mut ObjInfo) -> Result<()> {
.for_section_range(section_index, current_address.address..split_start.address)
.collect_vec();
let mut existing_symbols = HashSet::new();
for (_, symbol) in symbols {
for &(_, symbol) in &symbols {
if !existing_symbols.insert(symbol.name.clone()) {
log::debug!(
"Found duplicate symbol {} at {:#010X}",
@ -367,13 +383,22 @@ fn create_gap_splits(obj: &mut ObjInfo) -> Result<()> {
}
}
ensure!(
new_split_end > current_address,
"Duplicate symbols at {:#010X}: {:?}",
current_address,
symbols
.iter()
.filter(|(_, s)| s.address == current_address.address as u64)
.collect_vec(),
);
log::debug!(
"Creating split from {:#010X}..{:#010X}",
current_address,
new_split_end
);
let unit = format!(
"{:02}_{:08X}_{}",
"auto_{:02}_{:08X}_{}",
current_address.section,
current_address.address,
section.name.trim_start_matches('.')
@ -622,6 +647,85 @@ fn add_padding_symbols(obj: &mut ObjInfo) -> Result<()> {
#[inline]
const fn align_up(value: u32, align: u32) -> u32 { (value + (align - 1)) & !(align - 1) }
#[allow(dead_code)]
fn trim_split_alignment(obj: &mut ObjInfo) -> Result<()> {
// For each split, set the end of split to the end of the last symbol in the split.
let mut split_updates = vec![];
let mut iter = obj.sections.all_splits().peekable();
while let Some((section_index, section, addr, split)) = iter.next() {
let next_split = iter
.peek()
.filter(|&&(idx, _, _, _)| section_index == idx)
.map(|&(_, _, addr, split)| (addr, split));
let mut split_end = split.end;
if let Some((_, symbol)) = obj
.symbols
.for_section_range(section_index, addr..split.end)
.filter(|&(_, s)| s.size_known && s.size > 0)
.next_back()
{
split_end = symbol.address as u32 + symbol.size as u32;
}
split_end = align_up(split_end, split.alignment(obj, section_index, section, addr));
if split_end < split.end {
if let Some((next_addr, next_split)) = next_split {
let next_split_align = next_split.alignment(obj, section_index, section, addr);
if align_up(split_end, next_split_align) < next_addr {
log::warn!(
"Tried to trim {} split {} {:#010X}..{:#010X} to {:#010X}, but next split {} starts at {:#010X} with alignment {}",
section.name,
split.unit,
addr,
split.end,
split_end,
next_split.unit,
next_addr,
next_split_align
);
}
}
log::info!(
"Trimming {} split {} {:#010X}..{:#010X} to {:#010X}",
section.name,
split.unit,
addr,
split.end,
split_end
);
split_updates.push((section_index, addr, split_end));
}
}
drop(iter);
for (section_index, addr, split_end) in split_updates {
obj.sections[section_index].splits.at_mut(addr).unwrap().end = split_end;
}
Ok(())
}
/// Trim splits if they contain linker generated symbols.
fn trim_linker_generated_symbols(obj: &mut ObjInfo) -> Result<()> {
for section_index in 0..obj.sections.count() {
let section_end = end_for_section(obj, section_index)?;
let section = &mut obj.sections[section_index];
if section.address as u32 + section.size as u32 == section_end.address {
continue;
}
if let Some((addr, split)) = section.splits.iter_mut().next_back() {
if split.end > section_end.address {
log::debug!(
"Trimming split {} {:#010X}..{:#010X} to {:#010X}",
split.unit,
addr,
split.end,
section_end.address
);
split.end = section_end.address;
}
}
}
Ok(())
}
/// Perform any necessary adjustments to allow relinking.
/// This includes:
/// - Ensuring .ctors & .dtors entries are split with their associated function
@ -654,6 +758,9 @@ pub fn update_splits(obj: &mut ObjInfo, common_start: Option<u32>, fill_gaps: bo
split_ctors_dtors(obj, start, end)?;
}
// Remove linker generated symbols from splits
trim_linker_generated_symbols(obj)?;
// Create gap splits
create_gap_splits(obj)?;
@ -663,6 +770,10 @@ pub fn update_splits(obj: &mut ObjInfo, common_start: Option<u32>, fill_gaps: bo
// Ensure splits don't overlap symbols or each other
validate_splits(obj)?;
// Trim alignment from splits
// TODO figure out mwld pooled data alignment
// trim_split_alignment(obj)?;
if fill_gaps {
// Add symbols to beginning of any split that doesn't start with a symbol
add_padding_symbols(obj)?;
@ -793,7 +904,7 @@ pub fn split_obj(obj: &ObjInfo) -> Result<Vec<ObjInfo>> {
for (section_index, section) in obj.sections.iter() {
let mut current_address = SectionAddress::new(section_index, section.address as u32);
let section_end = end_for_section(obj, section_index)?;
let mut file_iter = section
let mut split_iter = section
.splits
.for_range(current_address.address..section_end.address)
.map(|(addr, split)| (SectionAddress::new(section_index, addr), split))
@ -804,30 +915,38 @@ pub fn split_obj(obj: &ObjInfo) -> Result<Vec<ObjInfo>> {
break;
}
let (file_addr, split) = match file_iter.next() {
let (split_addr, split) = match split_iter.next() {
Some((addr, split)) => (addr, split),
None => bail!("No file found"),
None => bail!("No split found"),
};
ensure!(
file_addr <= current_address,
"Gap in files: {} @ {:#010X}, {} @ {:#010X}",
split_addr == current_address,
"Split @ {} {} not found",
section.name,
section.address,
split.unit,
file_addr
current_address
);
let mut file_end = section_end;
if let Some(&(next_addr, _next_split)) = file_iter.peek() {
file_end = min(next_addr, section_end);
let split_end = SectionAddress::new(section_index, split.end);
let next_addr = split_iter.peek().map(|&(addr, _)| addr).unwrap_or(section_end);
if next_addr > split_end
&& section.data_range(split_end.address, next_addr.address)?.iter().any(|&b| b != 0)
{
bail!(
"Unsplit data in {} from {} {} to next split {}",
section.name,
split.unit,
split_end,
next_addr
);
}
// Skip over this data
if split.skip {
current_address = file_end;
current_address = next_addr;
continue;
}
let file = name_to_obj
let split_obj = name_to_obj
.get(&split.unit)
.and_then(|&idx| objects.get_mut(idx))
.ok_or_else(|| anyhow!("Unit '{}' not in link order", split.unit))?;
@ -842,7 +961,10 @@ pub fn split_obj(obj: &ObjInfo) -> Result<Vec<ObjInfo>> {
max(
// Maximum alignment of any symbol in this split
obj.symbols
.for_section_range(section_index, current_address.address..file_end.address)
.for_section_range(
section_index,
current_address.address..split_end.address,
)
.filter(|&(_, s)| s.size_known && s.size > 0)
.filter_map(|(_, s)| s.align)
.max()
@ -877,7 +999,7 @@ pub fn split_obj(obj: &ObjInfo) -> Result<Vec<ObjInfo>> {
// Collect relocations; target_symbol will be updated later
let out_relocations = section
.relocations
.range(current_address.address..file_end.address)
.range(current_address.address..split_end.address)
.map(|(addr, o)| {
(addr - current_address.address, ObjReloc {
kind: o.kind,
@ -889,10 +1011,10 @@ pub fn split_obj(obj: &ObjInfo) -> Result<Vec<ObjInfo>> {
.collect_vec();
// Add section symbols
let out_section_idx = file.sections.next_section_index();
let out_section_idx = split_obj.sections.next_section_index();
for (symbol_idx, symbol) in obj
.symbols
.for_section_range(section_index, current_address.address..=file_end.address)
.for_section_range(section_index, current_address.address..=split_end.address)
.filter(|&(_, s)| {
s.section == Some(section_index) && !is_linker_generated_label(&s.name)
})
@ -902,7 +1024,7 @@ pub fn split_obj(obj: &ObjInfo) -> Result<Vec<ObjInfo>> {
}
// TODO hack for gTRKInterruptVectorTableEnd
if (symbol.address == file_end.address as u64
if (symbol.address == split_end.address as u64
&& symbol.name != "gTRKInterruptVectorTableEnd")
|| (symbol.address == current_address.address as u64
&& symbol.name == "gTRKInterruptVectorTableEnd")
@ -910,7 +1032,7 @@ pub fn split_obj(obj: &ObjInfo) -> Result<Vec<ObjInfo>> {
continue;
}
symbol_idxs[symbol_idx] = Some(file.symbols.add_direct(ObjSymbol {
symbol_idxs[symbol_idx] = Some(split_obj.symbols.add_direct(ObjSymbol {
name: symbol.name.clone(),
demangled_name: symbol.demangled_name.clone(),
address: if split.common {
@ -934,22 +1056,22 @@ pub fn split_obj(obj: &ObjInfo) -> Result<Vec<ObjInfo>> {
// For mwldeppc 2.7 and above, a .comment section is required to link without error
// when common symbols are present. Automatically add one if needed.
if split.common && file.mw_comment.is_none() {
file.mw_comment = Some(MWComment::new(8)?);
if split.common && split_obj.mw_comment.is_none() {
split_obj.mw_comment = Some(MWComment::new(8)?);
}
if !split.common {
let data = match section.kind {
ObjSectionKind::Bss => vec![],
_ => section.data[(current_address.address as u64 - section.address) as usize
..(file_end.address as u64 - section.address) as usize]
..(split_end.address as u64 - section.address) as usize]
.to_vec(),
};
file.sections.push(ObjSection {
split_obj.sections.push(ObjSection {
name: split.rename.as_ref().unwrap_or(&section.name).clone(),
kind: section.kind,
address: 0,
size: file_end.address as u64 - current_address.address as u64,
size: split_end.address as u64 - current_address.address as u64,
data,
align,
elf_index: out_section_idx + 1,
@ -962,7 +1084,7 @@ pub fn split_obj(obj: &ObjInfo) -> Result<Vec<ObjInfo>> {
});
}
current_address = file_end;
current_address = next_addr;
}
}