From 5843ee021efc1074d6b25d266cd62bf3703be20a Mon Sep 17 00:00:00 2001 From: Luke Street Date: Thu, 17 Aug 2023 22:09:45 -0400 Subject: [PATCH] Begin REL analysis & rework lots of code to be section-address aware --- src/analysis/cfa.rs | 28 +- src/analysis/executor.rs | 33 +- src/analysis/mod.rs | 17 +- src/analysis/objects.rs | 29 +- src/analysis/pass.rs | 19 +- src/analysis/signatures.rs | 116 +++--- src/analysis/slices.rs | 18 +- src/analysis/tracker.rs | 73 ++-- src/cmd/dol.rs | 510 +++++++++++++++++------- src/cmd/elf.rs | 14 +- src/cmd/rel.rs | 18 +- src/obj/mod.rs | 682 ++------------------------------ src/obj/sections.rs | 247 ++++++++++++ src/obj/splits.rs | 76 ++++ src/obj/symbols.rs | 536 +++++++++++++++++++++++++ src/util/asm.rs | 27 +- src/util/config.rs | 71 ++-- src/util/dol.rs | 28 +- src/util/elf.rs | 55 +-- src/util/file.rs | 5 +- src/util/lcf.rs | 10 +- src/util/map.rs | 31 +- src/util/mod.rs | 2 + src/util/rel.rs | 18 +- src/util/rso.rs | 23 +- src/{obj => util}/signatures.rs | 32 +- src/{obj => util}/split.rs | 322 ++++++++------- 27 files changed, 1813 insertions(+), 1227 deletions(-) create mode 100644 src/obj/sections.rs create mode 100644 src/obj/splits.rs create mode 100644 src/obj/symbols.rs rename src/{obj => util}/signatures.rs (93%) rename src/{obj => util}/split.rs (77%) diff --git a/src/analysis/cfa.rs b/src/analysis/cfa.rs index fa09c5c..5e07eba 100644 --- a/src/analysis/cfa.rs +++ b/src/analysis/cfa.rs @@ -29,8 +29,10 @@ impl AnalyzerState { if end == 0 { continue; } - let section_index = - obj.section_for(start..end).context("Failed to locate section for function")?.index; + let (section_index, _) = obj + .sections + .with_range(start..end) + .context("Failed to locate section for function")?; obj.add_symbol( ObjSymbol { name: format!("fn_{:08X}", start), @@ -48,10 +50,10 @@ impl AnalyzerState { )?; } for (&addr, &size) in &self.jump_tables { - let section_index = obj - .section_for(addr..addr + size) - .context("Failed to locate section for jump table")? - .index; + let (section_index, _) = obj + .sections + .with_range(addr..addr + size) + .context("Failed to locate section for jump table")?; obj.add_symbol( ObjSymbol { name: format!("jumptable_{:08X}", addr), @@ -89,7 +91,7 @@ impl AnalyzerState { } } // Also check the beginning of every code section - for section in obj.sections.iter().filter(|s| s.kind == ObjSectionKind::Code) { + for (_, section) in obj.sections.by_kind(ObjSectionKind::Code) { self.function_entries.insert(section.address as u32); } @@ -266,11 +268,7 @@ impl AnalyzerState { fn detect_new_functions(&mut self, obj: &ObjInfo) -> Result { let mut found_new = false; - for section in &obj.sections { - if section.kind != ObjSectionKind::Code { - continue; - } - + for (_, section) in obj.sections.by_kind(ObjSectionKind::Code) { let section_start = section.address as u32; let section_end = (section.address + section.size) as u32; let mut iter = self.function_bounds.range(section_start..section_end).peekable(); @@ -280,7 +278,7 @@ impl AnalyzerState { if first_end == 0 || first_end > second_begin { continue; } - let addr = match skip_alignment(obj, first_end, second_begin) { + let addr = match skip_alignment(section, first_end, second_begin) { Some(addr) => addr, None => continue, }; @@ -298,7 +296,7 @@ impl AnalyzerState { } (Some((&last_begin, &last_end)), None) => { if last_end > 0 && last_end < section_end { - let addr = match skip_alignment(obj, last_end, section_end) { + let addr = match skip_alignment(section, last_end, section_end) { Some(addr) => addr, None => continue, }; @@ -329,7 +327,7 @@ pub fn locate_sda_bases(obj: &mut ObjInfo) -> Result { executor.push(obj.entry as u32, VM::new(), false); let result = executor.run( obj, - |ExecCbData { executor, vm, result, section: _, ins, block_start: _ }| { + |ExecCbData { executor, vm, result, section_index: _, section: _, ins, block_start: _ }| { match result { StepResult::Continue | StepResult::LoadStore { .. } => { return Ok(ExecCbResult::Continue); diff --git a/src/analysis/executor.rs b/src/analysis/executor.rs index ba06ef2..4792eaa 100644 --- a/src/analysis/executor.rs +++ b/src/analysis/executor.rs @@ -17,8 +17,8 @@ struct VisitedAddresses { impl VisitedAddresses { pub fn new(obj: &ObjInfo) -> Self { - let mut inner = Vec::with_capacity(obj.sections.len()); - for section in &obj.sections { + let mut inner = Vec::with_capacity(obj.sections.count()); + for (_, section) in obj.sections.iter() { if section.kind == ObjSectionKind::Code { let size = (section.size / 4) as usize; inner.push(FixedBitSet::with_capacity(size)); @@ -30,17 +30,17 @@ impl VisitedAddresses { Self { inner } } - pub fn contains(&self, section: &ObjSection, address: u32) -> bool { - self.inner[section.index].contains(Self::bit_for(section, address)) + pub fn contains(&self, section_index: usize, section_address: u32, address: u32) -> bool { + self.inner[section_index].contains(Self::bit_for(section_address, address)) } - pub fn insert(&mut self, section: &ObjSection, address: u32) { - self.inner[section.index].insert(Self::bit_for(section, address)); + pub fn insert(&mut self, section_index: usize, section_address: u32, address: u32) { + self.inner[section_index].insert(Self::bit_for(section_address, address)); } #[inline] - fn bit_for(section: &ObjSection, address: u32) -> usize { - ((address as u64 - section.address) / 4) as usize + fn bit_for(section_address: u32, address: u32) -> usize { + ((address - section_address) / 4) as usize } } @@ -59,6 +59,7 @@ pub struct ExecCbData<'a> { pub executor: &'a mut Executor, pub vm: &'a mut VM, pub result: StepResult, + pub section_index: usize, pub section: &'a ObjSection, pub ins: &'a Ins, pub block_start: u32, @@ -79,8 +80,8 @@ impl Executor { pub fn run(&mut self, obj: &ObjInfo, mut cb: Cb) -> Result> where Cb: FnMut(ExecCbData) -> Result> { while let Some(mut state) = self.vm_stack.pop() { - let section = match obj.section_at(state.address) { - Ok(section) => section, + let (section_index, section) = match obj.sections.at_address(state.address) { + Ok(ret) => ret, Err(e) => { log::error!("{}", e); // return Ok(None); @@ -93,13 +94,14 @@ impl Executor { } // Already visited block - if self.visited.contains(section, state.address) { + let section_address = section.address as u32; + if self.visited.contains(section_index, section_address, state.address) { continue; } let mut block_start = state.address; loop { - self.visited.insert(section, state.address); + self.visited.insert(section_index, section_address, state.address); let ins = match disassemble(section, state.address) { Some(ins) => ins, @@ -110,6 +112,7 @@ impl Executor { executor: self, vm: &mut state.vm, result, + section_index, section, ins: &ins, block_start, @@ -118,7 +121,7 @@ impl Executor { state.address += 4; } ExecCbResult::Jump(addr) => { - if self.visited.contains(section, addr) { + if self.visited.contains(section_index, section_address, addr) { break; } block_start = addr; @@ -140,7 +143,7 @@ impl Executor { } } - pub fn visited(&self, section: &ObjSection, address: u32) -> bool { - self.visited.contains(section, address) + pub fn visited(&self, section_index: usize, section_address: u32, address: u32) -> bool { + self.visited.contains(section_index, section_address, address) } } diff --git a/src/analysis/mod.rs b/src/analysis/mod.rs index f91d89a..d8bbb04 100644 --- a/src/analysis/mod.rs +++ b/src/analysis/mod.rs @@ -3,7 +3,10 @@ use std::{collections::BTreeSet, num::NonZeroU32}; use anyhow::{Context, Result}; use ppc750cl::Ins; -use crate::obj::{ObjInfo, ObjSection, ObjSectionKind}; +use crate::{ + array_ref, + obj::{ObjInfo, ObjSection, ObjSectionKind}, +}; pub mod cfa; pub mod executor; @@ -23,11 +26,11 @@ pub fn read_u32(data: &[u8], address: u32, section_address: u32) -> Option if data.len() < offset + 4 { return None; } - Some(u32::from_be_bytes(data[offset..offset + 4].try_into().unwrap())) + Some(u32::from_be_bytes(*array_ref!(data, offset, 4))) } fn is_valid_jump_table_addr(obj: &ObjInfo, addr: u32) -> bool { - matches!(obj.section_at(addr), Ok(section) if section.kind != ObjSectionKind::Bss) + matches!(obj.sections.at_address(addr), Ok((_, section)) if section.kind != ObjSectionKind::Bss) } fn get_jump_table_entries( @@ -38,7 +41,7 @@ fn get_jump_table_entries( function_start: u32, function_end: u32, ) -> Result<(Vec, u32)> { - let section = obj.section_at(addr).with_context(|| { + let (_, section) = obj.sections.at_address(addr).with_context(|| { format!("Failed to get jump table entries @ {:#010X} size {:?}", addr, size) })?; let offset = (addr as u64 - section.address) as usize; @@ -90,9 +93,9 @@ pub fn uniq_jump_table_entries( Ok((BTreeSet::from_iter(entries.iter().cloned().filter(|&addr| addr != 0)), size)) } -pub fn skip_alignment(obj: &ObjInfo, mut addr: u32, end: u32) -> Option { - let mut data = match obj.section_data(addr, end) { - Ok((_, data)) => data, +pub fn skip_alignment(section: &ObjSection, mut addr: u32, end: u32) -> Option { + let mut data = match section.data_range(addr, end) { + Ok(data) => data, Err(_) => return None, }; loop { diff --git a/src/analysis/objects.rs b/src/analysis/objects.rs index 8e55406..a81f861 100644 --- a/src/analysis/objects.rs +++ b/src/analysis/objects.rs @@ -1,16 +1,18 @@ use anyhow::Result; -use crate::obj::{ - split::is_linker_generated_label, ObjDataKind, ObjInfo, ObjSectionKind, ObjSymbolKind, +use crate::{ + obj::{ObjDataKind, ObjInfo, ObjSectionKind, ObjSymbolKind}, + util::split::is_linker_generated_label, }; pub fn detect_object_boundaries(obj: &mut ObjInfo) -> Result<()> { - for section in obj.sections.iter().filter(|s| s.kind != ObjSectionKind::Code) { - let section_start = section.address as u32; + for (section_index, section) in + obj.sections.iter_mut().filter(|(_, s)| s.kind != ObjSectionKind::Code) + { let section_end = (section.address + section.size) as u32; let mut replace_symbols = vec![]; - for (idx, symbol) in obj.symbols.for_range(section_start..section_end) { + for (idx, symbol) in obj.symbols.for_section(section_index) { let mut symbol = symbol.clone(); if is_linker_generated_label(&symbol.name) { continue; @@ -25,7 +27,7 @@ pub fn detect_object_boundaries(obj: &mut ObjInfo) -> Result<()> { if !symbol.size_known { let next_addr = obj .symbols - .for_range(symbol.address as u32 + 1..section_end) + .for_section_range(section_index, symbol.address as u32 + 1..) .next() .map_or(section_end, |(_, symbol)| symbol.address as u32); let new_size = next_addr - symbol.address as u32; @@ -35,9 +37,9 @@ pub fn detect_object_boundaries(obj: &mut ObjInfo) -> Result<()> { (2 | 4, 2) => expected_size, (..=8, 1 | 2 | 4) => { // alignment to double - if obj.symbols.at_address(next_addr).any(|(_, sym)| sym.data_kind == ObjDataKind::Double) + if obj.symbols.at_section_address(section_index, next_addr).any(|(_, sym)| sym.data_kind == ObjDataKind::Double) // If we're at a TU boundary, we can assume it's just padding - || obj.splits.contains_key(&(symbol.address as u32 + new_size)) + || section.splits.has_split_at(symbol.address as u32 + new_size) { expected_size } else { @@ -63,10 +65,10 @@ pub fn detect_object_boundaries(obj: &mut ObjInfo) -> Result<()> { pub fn detect_strings(obj: &mut ObjInfo) -> Result<()> { let mut symbols_set = Vec::<(usize, ObjDataKind, usize)>::new(); - for section in obj + for (section_index, section) in obj .sections .iter() - .filter(|s| matches!(s.kind, ObjSectionKind::Data | ObjSectionKind::ReadOnlyData)) + .filter(|(_, s)| matches!(s.kind, ObjSectionKind::Data | ObjSectionKind::ReadOnlyData)) { enum StringResult { None, @@ -119,11 +121,10 @@ pub fn detect_strings(obj: &mut ObjInfo) -> Result<()> { } for (symbol_idx, symbol) in obj .symbols - .for_section(section) + .for_section(section_index) .filter(|(_, sym)| sym.data_kind == ObjDataKind::Unknown) { - let (_section, data) = - obj.section_data(symbol.address as u32, (symbol.address + symbol.size) as u32)?; + let data = section.symbol_data(symbol)?; match is_string(data) { StringResult::None => {} StringResult::String { length, terminated } => { @@ -146,7 +147,7 @@ pub fn detect_strings(obj: &mut ObjInfo) -> Result<()> { } for (symbol_idx, data_kind, size) in symbols_set { - let mut symbol = obj.symbols.at(symbol_idx).clone(); + let mut symbol = obj.symbols[symbol_idx].clone(); log::debug!("Setting {} ({:#010X}) to size {:#X}", symbol.name, symbol.address, size); symbol.data_kind = data_kind; symbol.size = size as u64; diff --git a/src/analysis/pass.rs b/src/analysis/pass.rs index e0352cd..2c62c61 100644 --- a/src/analysis/pass.rs +++ b/src/analysis/pass.rs @@ -21,8 +21,9 @@ pub const TRK_TABLE_SIZE: u32 = 0x1F34; // always? impl AnalysisPass for FindTRKInterruptVectorTable { fn execute(state: &mut AnalyzerState, obj: &ObjInfo) -> Result<()> { for (&start, _) in state.function_bounds.iter().filter(|&(_, &end)| end == 0) { - let (section, data) = match obj.section_data(start, 0) { - Ok((section, data)) => (section, data), + let (section_index, section) = obj.sections.at_address(start)?; + let data = match section.data_range(start, 0) { + Ok(ret) => ret, Err(_) => continue, }; if data.starts_with(TRK_TABLE_HEADER.as_bytes()) @@ -33,7 +34,7 @@ impl AnalysisPass for FindTRKInterruptVectorTable { name: "gTRKInterruptVectorTable".to_string(), demangled_name: None, address: start as u64, - section: Some(section.index), + section: Some(section_index), size: 0, size_known: true, flags: ObjSymbolFlagSet(FlagSet::from(ObjSymbolFlags::Global)), @@ -46,7 +47,7 @@ impl AnalysisPass for FindTRKInterruptVectorTable { name: "gTRKInterruptVectorTableEnd".to_string(), demangled_name: None, address: end as u64, - section: Some(section.index), + section: Some(section_index), size: 0, size_known: true, flags: ObjSymbolFlagSet(FlagSet::from(ObjSymbolFlags::Global)), @@ -78,7 +79,11 @@ impl AnalysisPass for FindSaveRestSleds { const SLED_SIZE: usize = 19 * 4; // registers 14-31 + blr let mut clear_ranges: Vec> = vec![]; for (&start, _) in state.function_bounds.iter().filter(|&(_, &end)| end != 0) { - let (section, data) = obj.section_data(start, 0)?; + let (section_index, section) = obj.sections.at_address(start)?; + let data = match section.data_range(start, 0) { + Ok(ret) => ret, + Err(_) => continue, + }; for (needle, func, label) in &SLEDS { if data.starts_with(needle) { log::debug!("Found {} @ {:#010X}", func, start); @@ -87,7 +92,7 @@ impl AnalysisPass for FindSaveRestSleds { name: func.to_string(), demangled_name: None, address: start as u64, - section: Some(section.index), + section: Some(section_index), size: SLED_SIZE as u64, size_known: true, flags: ObjSymbolFlagSet(ObjSymbolFlags::Global.into()), @@ -101,7 +106,7 @@ impl AnalysisPass for FindSaveRestSleds { name: format!("{}{}", label, i), demangled_name: None, address: addr as u64, - section: Some(section.index), + section: Some(section_index), size: 0, size_known: true, flags: ObjSymbolFlagSet(ObjSymbolFlags::Global.into()), diff --git a/src/analysis/signatures.rs b/src/analysis/signatures.rs index f9fecfc..e8ec6f9 100644 --- a/src/analysis/signatures.rs +++ b/src/analysis/signatures.rs @@ -3,12 +3,10 @@ use anyhow::{anyhow, Result}; use crate::{ analysis::{cfa::AnalyzerState, read_u32}, obj::{ - signatures::{ - apply_signature, check_signatures, check_signatures_str, parse_signatures, - FunctionSignature, - }, - ObjInfo, ObjSplit, ObjSymbol, ObjSymbolFlagSet, ObjSymbolFlags, ObjSymbolKind, + ObjInfo, ObjSectionKind, ObjSplit, ObjSymbol, ObjSymbolFlagSet, ObjSymbolFlags, + ObjSymbolKind, }, + util::signatures::{apply_signature, check_signatures, check_signatures_str, parse_signatures}, }; const SIGNATURES: &[(&str, &str)] = &[ @@ -197,17 +195,23 @@ const POST_SIGNATURES: &[(&str, &str)] = &[ pub fn apply_signatures(obj: &mut ObjInfo) -> Result<()> { let entry = obj.entry as u32; - if let Some(signature) = - check_signatures_str(obj, entry, include_str!("../../assets/signatures/__start.yml"))? - { - apply_signature(obj, entry, &signature)?; + let (entry_section_index, entry_section) = obj.sections.at_address(entry)?; + if let Some(signature) = check_signatures_str( + entry_section, + entry, + include_str!("../../assets/signatures/__start.yml"), + )? { + apply_signature(obj, entry_section_index, entry, &signature)?; } for &(name, sig_str) in SIGNATURES { if let Some((_, symbol)) = obj.symbols.by_name(name)? { let addr = symbol.address as u32; - if let Some(signature) = check_signatures_str(obj, addr, sig_str)? { - apply_signature(obj, addr, &signature)?; + let section_index = + symbol.section.ok_or_else(|| anyhow!("Symbol '{}' missing section", name))?; + let section = &obj.sections[section_index]; + if let Some(signature) = check_signatures_str(section, addr, sig_str)? { + apply_signature(obj, section_index, addr, &signature)?; } } } @@ -217,12 +221,13 @@ pub fn apply_signatures(obj: &mut ObjInfo) -> Result<()> { let mut analyzer = AnalyzerState::default(); analyzer.process_function_at(obj, symbol.address as u32)?; for addr in analyzer.function_entries { + let (section_index, section) = obj.sections.at_address(addr)?; if let Some(signature) = check_signatures_str( - obj, + section, addr, include_str!("../../assets/signatures/__init_cpp.yml"), )? { - apply_signature(obj, addr, &signature)?; + apply_signature(obj, section_index, addr, &signature)?; break; } } @@ -230,24 +235,27 @@ pub fn apply_signatures(obj: &mut ObjInfo) -> Result<()> { if let Some((_, symbol)) = obj.symbols.by_name("_ctors")? { // First entry of ctors is __init_cpp_exceptions - let section = obj.section_at(symbol.address as u32)?; - let target = read_u32(§ion.data, symbol.address as u32, section.address as u32) - .ok_or_else(|| anyhow!("Failed to read _ctors data"))?; + let ctors_section_index = + symbol.section.ok_or_else(|| anyhow!("Missing _ctors symbol section"))?; + let ctors_section = &obj.sections[ctors_section_index]; + let target = + read_u32(&ctors_section.data, symbol.address as u32, ctors_section.address as u32) + .ok_or_else(|| anyhow!("Failed to read _ctors data"))?; if target != 0 { + let (target_section_index, target_section) = obj.sections.at_address(target)?; if let Some(signature) = check_signatures_str( - obj, + target_section, target, include_str!("../../assets/signatures/__init_cpp_exceptions.yml"), )? { let address = symbol.address; - let section_index = section.index; - apply_signature(obj, target, &signature)?; - obj.add_symbol( + apply_signature(obj, target_section_index, target, &signature)?; + obj.symbols.add( ObjSymbol { name: "__init_cpp_exceptions_reference".to_string(), demangled_name: None, address, - section: Some(section_index), + section: Some(ctors_section_index), size: 4, size_known: true, flags: ObjSymbolFlagSet(ObjSymbolFlags::Global.into()), @@ -257,8 +265,8 @@ pub fn apply_signatures(obj: &mut ObjInfo) -> Result<()> { }, true, )?; - if obj.split_for(address as u32).is_none() { - obj.add_split(address as u32, ObjSplit { + if obj.sections[ctors_section_index].splits.for_address(address as u32).is_none() { + obj.add_split(ctors_section_index, address as u32, ObjSplit { unit: "__init_cpp_exceptions.cpp".to_string(), end: address as u32 + 4, align: None, @@ -271,30 +279,32 @@ pub fn apply_signatures(obj: &mut ObjInfo) -> Result<()> { } if let Some((_, symbol)) = obj.symbols.by_name("_dtors")? { - let section = obj.section_at(symbol.address as u32)?; + let dtors_section_index = + symbol.section.ok_or_else(|| anyhow!("Missing _dtors symbol section"))?; + let dtors_section = &obj.sections[dtors_section_index]; let address = symbol.address; - let section_address = section.address; - let section_index = section.index; + let section_address = dtors_section.address; // First entry of dtors is __destroy_global_chain - let dgc_target = read_u32(§ion.data, address as u32, section_address as u32) + let dgc_target = read_u32(&dtors_section.data, address as u32, section_address as u32) .ok_or_else(|| anyhow!("Failed to read _dtors data"))?; - let fce_target = read_u32(§ion.data, address as u32 + 4, section_address as u32) + let fce_target = read_u32(&dtors_section.data, address as u32 + 4, section_address as u32) .ok_or_else(|| anyhow!("Failed to read _dtors data"))?; let mut found_dgc = false; let mut found_fce = false; if dgc_target != 0 { + let (target_section_index, target_section) = obj.sections.at_address(dgc_target)?; if let Some(signature) = check_signatures_str( - obj, + target_section, dgc_target, include_str!("../../assets/signatures/__destroy_global_chain.yml"), )? { - apply_signature(obj, dgc_target, &signature)?; + apply_signature(obj, target_section_index, dgc_target, &signature)?; obj.add_symbol( ObjSymbol { name: "__destroy_global_chain_reference".to_string(), demangled_name: None, address, - section: Some(section_index), + section: Some(dtors_section_index), size: 4, size_known: true, flags: ObjSymbolFlagSet(ObjSymbolFlags::Global.into()), @@ -314,18 +324,19 @@ pub fn apply_signatures(obj: &mut ObjInfo) -> Result<()> { } // Second entry of dtors is __fini_cpp_exceptions if fce_target != 0 { + let (target_section_index, target_section) = obj.sections.at_address(fce_target)?; if let Some(signature) = check_signatures_str( - obj, + target_section, fce_target, include_str!("../../assets/signatures/__fini_cpp_exceptions.yml"), )? { - apply_signature(obj, fce_target, &signature)?; + apply_signature(obj, target_section_index, fce_target, &signature)?; obj.add_symbol( ObjSymbol { name: "__fini_cpp_exceptions_reference".to_string(), demangled_name: None, address: address + 4, - section: Some(section_index), + section: Some(dtors_section_index), size: 4, size_known: true, flags: ObjSymbolFlagSet(ObjSymbolFlags::Global.into()), @@ -344,8 +355,8 @@ pub fn apply_signatures(obj: &mut ObjInfo) -> Result<()> { if found_fce { end += 4; } - if obj.split_for(address as u32).is_none() { - obj.add_split(address as u32, ObjSplit { + if obj.sections[dtors_section_index].splits.for_address(address as u32).is_none() { + obj.add_split(dtors_section_index, address as u32, ObjSplit { unit: "__init_cpp_exceptions.cpp".to_string(), end, align: None, @@ -363,19 +374,28 @@ pub fn apply_signatures_post(obj: &mut ObjInfo) -> Result<()> { log::info!("Checking post CFA signatures..."); for &(_name, sig_str) in POST_SIGNATURES { let signatures = parse_signatures(sig_str)?; - let mut iter = obj.symbols.by_kind(ObjSymbolKind::Function); - let opt = loop { - let Some((_, symbol)) = iter.next() else { - break Option::<(u32, FunctionSignature)>::None; - }; - if let Some(signature) = check_signatures(obj, symbol.address as u32, &signatures)? { - break Some((symbol.address as u32, signature)); + let mut found_signature = None; + 'outer: for (section_index, section) in obj.sections.by_kind(ObjSectionKind::Code) { + for (symbol_index, symbol) in obj + .symbols + .for_section(section_index) + .filter(|(_, sym)| sym.kind == ObjSymbolKind::Function) + { + if let Some(signature) = + check_signatures(section, symbol.address as u32, &signatures)? + { + found_signature = Some((symbol_index, signature)); + break 'outer; + } } - }; - if let Some((addr, signature)) = opt { - drop(iter); - apply_signature(obj, addr, &signature)?; - break; + } + if let Some((symbol_index, signature)) = found_signature { + let symbol = &obj.symbols[symbol_index]; + let section_index = symbol + .section + .ok_or_else(|| anyhow!("Symbol '{}' missing section", symbol.name))?; + let address = symbol.address as u32; + apply_signature(obj, section_index, address, &signature)?; } } log::info!("Done!"); diff --git a/src/analysis/slices.rs b/src/analysis/slices.rs index 99cfcc3..dd28a6f 100644 --- a/src/analysis/slices.rs +++ b/src/analysis/slices.rs @@ -164,7 +164,7 @@ impl FunctionSlices { function_end: Option, known_functions: &BTreeSet, ) -> Result> { - let ExecCbData { executor, vm, result, section, ins, block_start } = data; + let ExecCbData { executor, vm, result, section_index, section, ins, block_start } = data; // Track discovered prologue(s) and epilogue(s) self.check_prologue(section, ins) @@ -187,7 +187,7 @@ impl FunctionSlices { StepResult::Continue | StepResult::LoadStore { .. } => { let next_address = ins.addr + 4; // If we already visited the next address, connect the blocks and end - if executor.visited(section, next_address) { + if executor.visited(section_index, section.address as u32, next_address) { self.blocks.insert(block_start, next_address); self.branches.insert(ins.addr, vec![next_address]); Ok(ExecCbResult::EndBlock) @@ -233,7 +233,7 @@ impl FunctionSlices { if self.add_block_start(addr) { return Ok(ExecCbResult::Jump(addr)); } - } else if matches!(obj.section_data(ins.addr, ins.addr + 4), Ok((_, data)) if data == [0u8; 4]) + } else if matches!(section.data_range(ins.addr, ins.addr + 4), Ok(data) if data == [0u8; 4]) { // If this branch has zeroed padding after it, assume tail call. self.function_references.insert(addr); @@ -385,8 +385,10 @@ impl FunctionSlices { } let end = self.end(); - match (obj.section_at(end), obj.section_at(end - 4)) { - (Ok(section), Ok(other_section)) if section.index == other_section.index => { + match (obj.sections.at_address(end), obj.sections.at_address(end - 4)) { + (Ok((section_index, section)), Ok((other_section_index, _other_section))) + if section_index == other_section_index => + { // FIXME this is real bad if !self.has_conditional_blr { if let Some(ins) = disassemble(section, end - 4) { @@ -453,15 +455,15 @@ impl FunctionSlices { return TailCallResult::Is; } // If the jump target is in a different section, known tail call. - let section = match obj.section_at(function_start) { + let (_, target_section) = match obj.sections.at_address(addr) { Ok(section) => section, Err(e) => return TailCallResult::Error(e), }; - if !section.contains(addr) { + if !target_section.contains(function_start) { return TailCallResult::Is; } // If the jump target has 0'd padding before it, known tail call. - if matches!(obj.section_data(addr - 4, addr), Ok((_, data)) if data == [0u8; 4]) { + if matches!(target_section.data_range(addr - 4, addr), Ok(data) if data == [0u8; 4]) { return TailCallResult::Is; } // If we're not sure where the function ends yet, mark as possible tail call. diff --git a/src/analysis/tracker.rs b/src/analysis/tracker.rs index 96e7173..bb763db 100644 --- a/src/analysis/tracker.rs +++ b/src/analysis/tracker.rs @@ -73,8 +73,8 @@ impl Tracker { // Stack ends after all BSS sections obj.sections .iter() - .rfind(|s| s.kind == ObjSectionKind::Bss) - .map(|s| (s.address + s.size) as u32) + .rfind(|&(_, s)| s.kind == ObjSectionKind::Bss) + .map(|(_, s)| (s.address + s.size) as u32) }), db_stack_addr: obj.db_stack_addr, arena_lo: obj @@ -92,21 +92,23 @@ impl Tracker { pub fn process(&mut self, obj: &ObjInfo) -> Result<()> { log::debug!("Processing code sections"); self.process_code(obj)?; - for section in &obj.sections { - if matches!(section.kind, ObjSectionKind::Data | ObjSectionKind::ReadOnlyData) { - log::debug!("Processing section {}, address {:#X}", section.index, section.address); - self.process_data(obj, section)?; - } + for (section_index, section) in obj + .sections + .iter() + .filter(|(_, s)| matches!(s.kind, ObjSectionKind::Data | ObjSectionKind::ReadOnlyData)) + { + log::debug!("Processing section {}, address {:#X}", section_index, section.address); + self.process_data(obj, section)?; } Ok(()) } fn process_code(&mut self, obj: &ObjInfo) -> Result<()> { self.process_function_by_address(obj, obj.entry as u32)?; - for section in obj.sections.iter().filter(|s| s.kind == ObjSectionKind::Code) { + for (section_index, _) in obj.sections.by_kind(ObjSectionKind::Code) { for (_, symbol) in obj .symbols - .for_range(section.address as u32..(section.address + section.size) as u32) + .for_section(section_index) .filter(|(_, symbol)| symbol.kind == ObjSymbolKind::Function && symbol.size_known) { let addr = symbol.address as u32; @@ -124,9 +126,10 @@ impl Tracker { return Ok(()); } self.processed_functions.insert(addr); + let (section_index, _) = obj.sections.at_address(addr)?; if let Some((_, symbol)) = obj .symbols - .at_address(addr) + .at_section_address(section_index, addr) .find(|(_, symbol)| symbol.kind == ObjSymbolKind::Function && symbol.size_known) { self.process_function(obj, symbol)?; @@ -144,7 +147,8 @@ impl Tracker { function_end: u32, possible_missed_branches: &mut BTreeMap>, ) -> Result> { - let ExecCbData { executor, vm, result, section: _, ins, block_start: _ } = data; + let ExecCbData { executor, vm, result, section_index: _, section: _, ins, block_start: _ } = + data; let is_function_addr = |addr: u32| addr >= function_start && addr < function_end; match result { @@ -344,11 +348,11 @@ impl Tracker { } let mut added = false; for (addr, vm) in take(&mut possible_missed_branches) { - let section = match obj.section_at(addr) { + let (section_index, section) = match obj.sections.at_address(addr) { Ok(section) => section, Err(_) => continue, }; - if !executor.visited(section, addr) { + if !executor.visited(section_index, section.address as u32, addr) { executor.push(addr, vm, true); added = true; } @@ -397,7 +401,7 @@ impl Tracker { // if addr > 0x80000000 && addr < 0x80003100 { // return true; // } - if let Ok(section) = obj.section_at(addr) { + if let Ok((_, section)) = obj.sections.at_address(addr) { // References to code sections will never be unaligned return section.kind != ObjSectionKind::Code || addr & 3 == 0; } @@ -450,7 +454,7 @@ impl Tracker { section.name = new_name; } - for section in &mut obj.sections { + for (_, section) in obj.sections.iter_mut() { if !section.section_known { if section.kind == ObjSectionKind::Code { apply_section_name(section, ".text"); @@ -485,7 +489,7 @@ impl Tracker { } let mut relocation_maps = Vec::new(); - for section in &obj.sections { + for (_, section) in obj.sections.iter() { relocation_maps.push(section.build_relocation_map()?); } @@ -516,7 +520,7 @@ impl Tracker { if let Some(symbol) = self.special_symbol(obj, target, reloc_kind) { (symbol, 0) } else { - let target_section = match obj.sections.iter().find(|s| { + let (target_section_index, _) = match obj.sections.iter().find(|&(_, s)| { target >= s.address as u32 && target < (s.address + s.size) as u32 }) { Some(v) => v, @@ -541,7 +545,7 @@ impl Tracker { name: format!("lbl_{:08X}", target), demangled_name: None, address: target as u64, - section: Some(target_section.index), + section: Some(target_section_index), size: 0, size_known: false, flags: Default::default(), @@ -552,31 +556,34 @@ impl Tracker { (symbol_idx, 0) } }; - let reloc = ObjReloc { kind: reloc_kind, address: addr as u64, target_symbol, addend }; - let section = match obj - .sections - .iter_mut() - .find(|s| addr >= s.address as u32 && addr < (s.address + s.size) as u32) - { - Some(v) => v, - None => bail!( - "Failed to locate source section for relocation @ {:#010X} {:#010X?}", - addr, - reloc - ), + let reloc = ObjReloc { + kind: reloc_kind, + address: addr as u64, + target_symbol, + addend, + module: None, }; + let (section_index, section) = + match obj.sections.iter_mut().find(|(_, s)| s.contains(addr)) { + Some(v) => v, + None => bail!( + "Failed to locate source section for relocation @ {:#010X} {:#010X?}", + addr, + reloc + ), + }; - let reloc_map = &mut relocation_maps[section.index]; + let reloc_map = &mut relocation_maps[section_index]; match reloc_map.entry(addr) { Entry::Vacant(e) => { e.insert(section.relocations.len()); section.relocations.push(reloc); } Entry::Occupied(e) => { - let reloc_symbol = obj.symbols.at(reloc.target_symbol); + let reloc_symbol = &obj.symbols[reloc.target_symbol]; if reloc_symbol.name != "_unresolved" { let v = &mut section.relocations[*e.get()]; - let iter_symbol = obj.symbols.at(v.target_symbol); + let iter_symbol = &obj.symbols[v.target_symbol]; if iter_symbol.address as i64 + v.addend != reloc_symbol.address as i64 + reloc.addend { diff --git a/src/cmd/dol.rs b/src/cmd/dol.rs index e8b0255..47c79d0 100644 --- a/src/cmd/dol.rs +++ b/src/cmd/dol.rs @@ -5,6 +5,7 @@ use std::{ io::Write, path::{Path, PathBuf}, }; +use std::mem::take; use anyhow::{anyhow, bail, Context, Result}; use argp::FromArgs; @@ -21,14 +22,15 @@ use crate::{ }, cmd::shasum::file_sha1, obj::{ - split::{is_linker_generated_object, split_obj, update_splits}, - ObjDataKind, ObjInfo, ObjRelocKind, ObjSectionKind, ObjSymbol, ObjSymbolFlagSet, + ObjDataKind, ObjInfo, ObjReloc, ObjRelocKind, ObjSectionKind, ObjSymbol, ObjSymbolFlagSet, ObjSymbolFlags, ObjSymbolKind, ObjSymbolScope, SymbolIndex, }, util::{ asm::write_asm, comment::MWComment, - config::{apply_splits, apply_symbols_file, write_splits_file, write_symbols_file}, + config::{ + apply_splits, apply_symbols_file, is_auto_symbol, write_splits_file, write_symbols_file, + }, dep::DepFile, dol::process_dol, elf::{process_elf, write_elf}, @@ -37,6 +39,7 @@ use crate::{ map::apply_map_file, rel::process_rel, rso::{process_rso, DOL_SECTION_ABS, DOL_SECTION_NAMES}, + split::{is_linker_generated_object, split_obj, update_splits}, }, }; @@ -150,6 +153,8 @@ pub struct ProjectConfig { pub struct ModuleConfig { pub object: PathBuf, pub hash: Option, + pub splits: Option, + pub symbols: Option, } #[derive(Serialize, Deserialize, Debug, Clone)] @@ -159,9 +164,18 @@ pub struct OutputUnit { pub autogenerated: bool, } +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct OutputModule { + pub name: String, + pub ldscript: PathBuf, + pub units: Vec, +} + #[derive(Serialize, Deserialize, Debug, Clone, Default)] pub struct OutputConfig { + pub ldscript: PathBuf, pub units: Vec, + pub modules: Vec, } pub fn run(args: Args) -> Result<()> { @@ -174,6 +188,7 @@ pub fn run(args: Args) -> Result<()> { } fn apply_selfile(obj: &mut ObjInfo, selfile: &Path) -> Result<()> { + log::info!("Loading {}", selfile.display()); let rso = process_rso(selfile)?; for symbol in rso.symbols.iter() { let dol_section_index = match symbol.section { @@ -191,13 +206,13 @@ fn apply_selfile(obj: &mut ObjInfo, selfile: &Path) -> Result<()> { DOL_SECTION_NAMES.get(dol_section_index).and_then(|&opt| opt).ok_or_else(|| { anyhow!("Can't add symbol for unknown DOL section {}", dol_section_index) })?; - let dol_section = obj + let (dol_section_index, dol_section) = obj .sections .iter() - .find(|section| section.name == dol_section_name) + .find(|&(_, section)| section.name == dol_section_name) .ok_or_else(|| anyhow!("Failed to locate DOL section {}", dol_section_name))?; ( - Some(dol_section.index), + Some(dol_section_index), dol_section.address as u32 + symbol.address as u32, Some(dol_section.kind), ) @@ -208,17 +223,17 @@ fn apply_selfile(obj: &mut ObjInfo, selfile: &Path) -> Result<()> { Some(_) => ObjSymbolKind::Object, None => ObjSymbolKind::Unknown, }; - let existing_symbols = obj.symbols.at_address(address).collect_vec(); + let existing_symbols = if let Some(section_index) = section { + obj.symbols.at_section_address(section_index, address).collect_vec() + } else { + // TODO hmmm + obj.symbols.iter_abs().filter(|(_, s)| s.address == address as u64).collect_vec() + }; let existing_symbol = existing_symbols .iter() - .find(|(_, s)| s.section == section && s.name == symbol.name) + .find(|(_, s)| s.name == symbol.name) .cloned() - .or_else(|| { - existing_symbols - .iter() - .find(|(_, s)| s.section == section && s.kind == symbol_kind) - .cloned() - }); + .or_else(|| existing_symbols.iter().find(|(_, s)| s.kind == symbol_kind).cloned()); if let Some((existing_symbol_idx, existing_symbol)) = existing_symbol { log::debug!("Mapping symbol {} to {}", symbol.name, existing_symbol.name); obj.symbols.replace(existing_symbol_idx, ObjSymbol { @@ -273,7 +288,7 @@ fn info(args: InfoArgs) -> Result<()> { println!("Entry point: {:#010X}", obj.entry); println!("\nSections:"); println!("\t{: >10} | {: <10} | {: <10} | {: <10}", "Name", "Address", "Size", "File Off"); - for section in &obj.sections { + for (_, section) in obj.sections.iter() { println!( "\t{: >10} | {:#010X} | {: <#10X} | {: <#10X}", section.name, section.address, section.size, section.file_offset @@ -281,8 +296,8 @@ fn info(args: InfoArgs) -> Result<()> { } println!("\nDiscovered symbols:"); println!("\t{: >23} | {: <10} | {: <10}", "Name", "Address", "Size"); - for (_, symbol) in obj.symbols.iter_ordered() { - if symbol.name.starts_with('@') || symbol.name.starts_with("fn_") { + for (_, symbol) in obj.symbols.iter_ordered().chain(obj.symbols.iter_abs()) { + if symbol.name.starts_with('@') || is_auto_symbol(&symbol.name) { continue; } if symbol.size_known { @@ -314,6 +329,214 @@ fn verify_hash>(path: P, hash_str: &str) -> Result<()> { } } +fn update_symbols(obj: &mut ObjInfo, modules: &BTreeMap) -> Result<()> { + log::info!("Updating symbols for module {}", obj.module_id); + + // Find all references to this module from other modules + for rel_reloc in obj + .unresolved_relocations + .iter() + .chain(modules.iter().flat_map(|(_, obj)| obj.unresolved_relocations.iter())) + .filter(|r| r.module_id == obj.module_id) + { + let (target_section_index, target_section) = obj + .sections + .get_elf_index(rel_reloc.target_section as usize) + .ok_or_else(|| anyhow!("Failed to locate REL section {}", rel_reloc.section))?; + + let target_symbol = obj + .symbols + .at_section_address(target_section_index, rel_reloc.addend) + .filter(|(_, s)| s.referenced_by(rel_reloc.kind)) + .at_most_one() + .map_err(|e| { + for (_, symbol) in e { + log::warn!( + "Multiple symbols found for {:#010X}: {}", + rel_reloc.addend, + symbol.name + ); + } + anyhow!("Multiple symbols found for {:#010X}", rel_reloc.addend) + })?; + + if let Some((symbol_index, symbol)) = target_symbol { + // Update symbol + log::trace!( + "Found symbol in section {} at {:#010X}: {}", + rel_reloc.target_section, + rel_reloc.addend, + symbol.name + ); + obj.symbols.flags(symbol_index).set_force_active(true); + } else { + // Add label + log::trace!( + "Creating label in section {} at {:#010X}", + rel_reloc.target_section, + rel_reloc.addend + ); + obj.symbols.add_direct(ObjSymbol { + name: format!( + "lbl_mod{}_{}_{:08X}", + obj.module_id, + target_section.name.trim_start_matches('.'), + rel_reloc.addend + ), + demangled_name: None, + address: rel_reloc.addend as u64, + section: Some(target_section_index), + size: 0, + size_known: false, + flags: ObjSymbolFlagSet(ObjSymbolFlags::ForceActive.into()), + kind: Default::default(), + align: None, + data_kind: ObjDataKind::Unknown, + })?; + } + } + + Ok(()) +} + +fn create_relocations( + obj: &mut ObjInfo, + modules: &BTreeMap, + dol_obj: &ObjInfo, +) -> Result<()> { + log::info!("Creating relocations for module {}", obj.module_id); + + // Resolve all relocations in this module + for rel_reloc in take(&mut obj.unresolved_relocations) { + let target_obj = if rel_reloc.module_id == 0 { + dol_obj + } else if rel_reloc.module_id == obj.module_id { + &*obj + } else { + modules + .get(&rel_reloc.module_id) + .ok_or_else(|| anyhow!("Failed to locate module {}", rel_reloc.module_id))? + }; + + let (target_section_index, _target_section) = if rel_reloc.module_id == 0 { + target_obj.sections.at_address(rel_reloc.addend)? + } else { + target_obj.sections.get_elf_index(rel_reloc.target_section as usize).ok_or_else( + || { + anyhow!( + "Failed to locate module {} section {}", + rel_reloc.module_id, + rel_reloc.target_section + ) + }, + )? + }; + + if let Some((symbol_index, symbol)) = target_obj + .symbols + .at_section_address(target_section_index, rel_reloc.addend) + .filter(|(_, s)| s.referenced_by(rel_reloc.kind)) + .at_most_one() + .map_err(|e| { + for (_, symbol) in e { + log::warn!( + "Multiple symbols found for {:#010X}: {}", + rel_reloc.addend, + symbol.name + ); + } + anyhow!("Multiple symbols found for {:#010X}", rel_reloc.addend) + })? + { + // log::info!("Would create relocation to symbol {}", symbol.name); + let reloc = ObjReloc { + kind: rel_reloc.kind, + address: rel_reloc.address as u64 & !3, + target_symbol: symbol_index, + addend: rel_reloc.addend as i64 - symbol.address as i64, + module: if rel_reloc.module_id == obj.module_id { + None + } else { + Some(rel_reloc.module_id) + }, + }; + let (_, source_section) = obj + .sections + .get_elf_index_mut(rel_reloc.section as usize) + .ok_or_else(|| anyhow!("Failed to locate REL section {}", rel_reloc.section))?; + source_section.relocations.push(reloc); + } else { + bail!( + "Couldn't find module {} symbol in section {} at {:#010X}", + rel_reloc.module_id, + rel_reloc.target_section, + rel_reloc.addend + ); + } + } + + Ok(()) +} + +fn resolve_external_relocations( + obj: &mut ObjInfo, + modules: &BTreeMap, + dol_obj: Option<&ObjInfo>, +) -> Result<()> { + log::info!("Resolving relocations for module {}", obj.module_id); + + #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)] + struct RelocRef { + module_id: u32, + symbol_index: SymbolIndex, + } + let mut reloc_to_symbol = HashMap::::new(); + + for (_section_index, section) in obj.sections.iter_mut() { + for reloc in section.relocations.iter_mut() { + if let Some(module_id) = reloc.module { + let reloc_ref = RelocRef { module_id, symbol_index: reloc.target_symbol }; + let symbol_idx = match reloc_to_symbol.entry(reloc_ref) { + hash_map::Entry::Occupied(e) => *e.get(), + hash_map::Entry::Vacant(e) => { + let target_obj = if module_id == obj.module_id { + bail!("Relocation to self in module {}", obj.module_id) + } else if module_id == 0 { + dol_obj.unwrap() + } else { + modules.get(&module_id).ok_or_else(|| { + anyhow!("Failed to locate module {}", reloc.module.unwrap()) + })? + }; + + let target_symbol = &target_obj.symbols[reloc.target_symbol]; + let symbol_idx = obj.symbols.add_direct(ObjSymbol { + name: target_symbol.name.clone(), + demangled_name: target_symbol.demangled_name.clone(), + address: 0, + section: None, + size: 0, + size_known: false, + flags: Default::default(), + kind: Default::default(), + align: None, + data_kind: Default::default(), + })?; + + e.insert(symbol_idx); + symbol_idx + } + }; + + reloc.target_symbol = symbol_idx; + reloc.module = None; + } + } + } + + Ok(()) +} + fn split(args: SplitArgs) -> Result<()> { log::info!("Loading {}", args.config.display()); let mut config_file = File::open(&args.config) @@ -335,6 +558,7 @@ fn split(args: SplitArgs) -> Result<()> { } let mut modules = BTreeMap::::new(); + let mut module_ids = Vec::with_capacity(config.modules.len()); for module_config in &config.modules { log::info!("Loading {}", module_config.object.display()); if let Some(hash_str) = &module_config.hash { @@ -342,6 +566,7 @@ fn split(args: SplitArgs) -> Result<()> { } let map = map_file(&module_config.object)?; let rel_obj = process_rel(map_reader(&map))?; + module_ids.push(rel_obj.module_id); match modules.entry(rel_obj.module_id) { Entry::Vacant(e) => e.insert(rel_obj), Entry::Occupied(_) => bail!("Duplicate module ID {}", obj.module_id), @@ -370,52 +595,20 @@ fn split(args: SplitArgs) -> Result<()> { if !modules.is_empty() { log::info!("Applying module relocations"); - for (module_id, module_obj) in modules { - for rel_reloc in &module_obj.unresolved_relocations { - // TODO also apply inter-module relocations - if rel_reloc.module_id != 0 { - continue; - } - let target = rel_reloc.addend; - if let Some((symbol_index, symbol)) = - obj.symbols.for_relocation(target, rel_reloc.kind)? - { - if symbol.flags.is_local() { - bail!( - "Module {} relocation to {:#010X} found local symbol {}", - module_id, - symbol.address, - symbol.name - ); - } - let addend = target as i64 - symbol.address as i64; - if addend != 0 { - bail!( - "Module {} relocation to {:#010X} for symbol {} has non-zero addend {:#010X}", - module_id, - symbol.address, - symbol.name, - addend - ); - } - obj.symbols.flags(symbol_index).set_force_active(true); - } else { - // Add label - let target_section = obj.section_at(target)?; - obj.symbols.add_direct(ObjSymbol { - name: format!("lbl_{:08X}", target), - demangled_name: None, - address: target as u64, - section: Some(target_section.index), - size: 0, - size_known: false, - flags: ObjSymbolFlagSet(ObjSymbolFlags::ForceActive.into()), - kind: Default::default(), - align: None, - data_kind: ObjDataKind::Unknown, - })?; - } - } + + // Step 1: For each module, create any missing symbols (referenced from other modules) and set FORCEACTIVE + update_symbols(&mut obj, &modules)?; + for &module_id in &module_ids { + let mut module_obj = modules.remove(&module_id).unwrap(); + update_symbols(&mut module_obj, &modules)?; + modules.insert(module_id, module_obj); + } + + // Step 2: For each module, create relocations to symbols in other modules + for &module_id in &module_ids { + let mut module_obj = modules.remove(&module_id).unwrap(); + create_relocations(&mut module_obj, &modules, &obj)?; + modules.insert(module_id, module_obj); } } @@ -467,6 +660,17 @@ fn split(args: SplitArgs) -> Result<()> { } } + if !modules.is_empty() { + log::info!("Resolving module relocations"); + + resolve_external_relocations(&mut obj, &modules, None)?; + for &module_id in &module_ids { + let mut module_obj = modules.remove(&module_id).unwrap(); + resolve_external_relocations(&mut module_obj, &modules, Some(&obj))?; + modules.insert(module_id, module_obj); + } + } + log::info!("Splitting {} objects", obj.link_order.len()); let split_objs = split_obj(&obj)?; @@ -479,20 +683,9 @@ fn split(args: SplitArgs) -> Result<()> { fs::write(include_dir.join("macros.inc"), include_str!("../../assets/macros.inc"))?; log::info!("Writing object files"); - let mut file_map = HashMap::>::new(); + let mut out_config = OutputConfig::default(); for (unit, split_obj) in obj.link_order.iter().zip(&split_objs) { let out_obj = write_elf(split_obj)?; - match file_map.entry(unit.name.clone()) { - hash_map::Entry::Vacant(e) => e.insert(out_obj), - hash_map::Entry::Occupied(_) => bail!("Duplicate file {}", unit.name), - }; - } - - let mut out_config = OutputConfig::default(); - for unit in &obj.link_order { - let object = file_map - .get(&unit.name) - .ok_or_else(|| anyhow!("Failed to find object file for unit '{}'", unit.name))?; let out_path = obj_dir.join(obj_path_for_unit(&unit.name)); out_config.units.push(OutputUnit { object: out_path.clone(), @@ -502,7 +695,7 @@ fn split(args: SplitArgs) -> Result<()> { if let Some(parent) = out_path.parent() { DirBuilder::new().recursive(true).create(parent)?; } - fs::write(&out_path, object) + fs::write(&out_path, out_obj) .with_context(|| format!("Failed to write '{}'", out_path.display()))?; } { @@ -512,24 +705,46 @@ fn split(args: SplitArgs) -> Result<()> { } // Generate ldscript.lcf - fs::write( - args.out_dir.join("ldscript.lcf"), - generate_ldscript(&obj, config.auto_force_files)?, - )?; + let ldscript_path = args.out_dir.join("ldscript.lcf"); + fs::write(&ldscript_path, generate_ldscript(&obj, config.auto_force_files)?)?; + out_config.ldscript = ldscript_path; log::info!("Writing disassembly"); for (unit, split_obj) in obj.link_order.iter().zip(&split_objs) { let out_path = asm_dir.join(asm_path_for_unit(&unit.name)); - if let Some(parent) = out_path.parent() { - DirBuilder::new().recursive(true).create(parent)?; - } let mut w = buf_writer(&out_path)?; write_asm(&mut w, split_obj) .with_context(|| format!("Failed to write {}", out_path.display()))?; w.flush()?; } + // Split and write modules + for (config, &module_id) in config.modules.iter().zip(&module_ids) { + let obj = modules.get(&module_id).unwrap(); + + let out_dir = args.out_dir.join(format!("module_{}", module_id)); + let asm_dir = out_dir.join("asm"); + // let obj_dir = out_dir.join("obj"); + + if !args.no_update { + if let Some(symbols_path) = &config.symbols { + write_symbols_file(symbols_path, obj)?; + } + if let Some(splits_path) = &config.splits { + write_splits_file(splits_path, obj)?; + } + } + + log::info!("Writing disassembly"); + let filename = config.object.file_name().unwrap().to_str().unwrap(); + let out_path = asm_dir.join(asm_path_for_unit(filename)); + let mut w = buf_writer(&out_path)?; + write_asm(&mut w, obj) + .with_context(|| format!("Failed to write {}", out_path.display()))?; + w.flush()?; + } + // Write dep file { let dep_path = args.out_dir.join("dep"); @@ -549,15 +764,11 @@ fn split(args: SplitArgs) -> Result<()> { #[allow(dead_code)] fn validate>(obj: &ObjInfo, elf_file: P, state: &AnalyzerState) -> Result<()> { let real_obj = process_elf(elf_file)?; - for real_section in &real_obj.sections { - let obj_section = match obj.sections.get(real_section.index) { + for (section_index, real_section) in real_obj.sections.iter() { + let obj_section = match obj.sections.get(section_index) { Some(v) => v, None => { - log::error!( - "Section {} {} doesn't exist in DOL", - real_section.index, - real_section.name - ); + log::error!("Section {} {} doesn't exist in DOL", section_index, real_section.name); continue; } }; @@ -566,18 +777,15 @@ fn validate>(obj: &ObjInfo, elf_file: P, state: &AnalyzerState) - "Section mismatch: {} {:?} ({}) should be {} {:?}", obj_section.name, obj_section.kind, - obj_section.index, + section_index, real_section.name, real_section.kind ); } } let mut real_functions = BTreeMap::::new(); - for section in &real_obj.sections { - if section.kind != ObjSectionKind::Code { - continue; - } - for (_symbol_idx, symbol) in real_obj.symbols.for_section(section) { + for (section_index, _section) in real_obj.sections.by_kind(ObjSectionKind::Code) { + for (_symbol_idx, symbol) in real_obj.symbols.for_section(section_index) { real_functions.insert(symbol.address as u32, symbol.name.clone()); match state.function_bounds.get(&(symbol.address as u32)) { Some(&end) => { @@ -617,8 +825,8 @@ fn validate>(obj: &ObjInfo, elf_file: P, state: &AnalyzerState) - } // return Ok(()); // TODO - for real_section in &real_obj.sections { - let obj_section = match obj.sections.get(real_section.index) { + for (real_section_index, real_section) in real_obj.sections.iter() { + let obj_section = match obj.sections.get(real_section_index) { Some(v) => v, None => continue, }; @@ -626,7 +834,7 @@ fn validate>(obj: &ObjInfo, elf_file: P, state: &AnalyzerState) - let obj_map = obj_section.build_relocation_map()?; for (&real_addr, &real_reloc_idx) in &real_map { let real_reloc = &real_section.relocations[real_reloc_idx]; - let real_symbol = real_obj.symbols.at(real_reloc.target_symbol); + let real_symbol = &real_obj.symbols[real_reloc.target_symbol]; let obj_reloc = match obj_map.get(&real_addr) { Some(v) => &obj_section.relocations[*v], None => { @@ -652,7 +860,7 @@ fn validate>(obj: &ObjInfo, elf_file: P, state: &AnalyzerState) - continue; } }; - let obj_symbol = obj.symbols.at(obj_reloc.target_symbol); + let obj_symbol = &obj.symbols[obj_reloc.target_symbol]; if real_reloc.kind != obj_reloc.kind { log::warn!( "Relocation type mismatch @ {:#010X}: {:?} != {:?}", @@ -680,7 +888,7 @@ fn validate>(obj: &ObjInfo, elf_file: P, state: &AnalyzerState) - } for (&obj_addr, &obj_reloc_idx) in &obj_map { let obj_reloc = &obj_section.relocations[obj_reloc_idx]; - let obj_symbol = obj.symbols.at(obj_reloc.target_symbol); + let obj_symbol = &obj.symbols[obj_reloc.target_symbol]; if !real_map.contains_key(&obj_addr) { log::warn!( "Relocation not real @ {:#010X} {:?} to {:#010X}+{:X} ({})", @@ -716,19 +924,20 @@ fn diff(args: DiffArgs) -> Result<()> { log::info!("Loading {}", args.map_file.display()); apply_map_file(&args.map_file, &mut linked_obj)?; - for orig_sym in obj.symbols.iter() { - if orig_sym.kind == ObjSymbolKind::Section || orig_sym.section.is_none() { - continue; - } + for orig_sym in obj.symbols.iter().filter(|s| s.kind != ObjSymbolKind::Section) { + let Some(orig_section_index) = orig_sym.section else { continue }; + let orig_section = &obj.sections[orig_section_index]; + let (linked_section_index, linked_section) = + linked_obj.sections.at_address(orig_sym.address as u32)?; let linked_sym = linked_obj .symbols - .at_address(orig_sym.address as u32) + .at_section_address(linked_section_index, orig_sym.address as u32) .find(|(_, sym)| sym.name == orig_sym.name) .or_else(|| { linked_obj .symbols - .at_address(orig_sym.address as u32) + .at_section_address(linked_section_index, orig_sym.address as u32) .find(|(_, sym)| sym.kind == orig_sym.kind) }); let mut found = false; @@ -746,18 +955,14 @@ fn diff(args: DiffArgs) -> Result<()> { found = true; } else if linked_sym.kind == orig_sym.kind && linked_sym.size == orig_sym.size { // Fuzzy match - let orig_data = obj - .section_data( - orig_sym.address as u32, - orig_sym.address as u32 + orig_sym.size as u32, - )? - .1; - let linked_data = linked_obj - .section_data( - linked_sym.address as u32, - linked_sym.address as u32 + linked_sym.size as u32, - )? - .1; + let orig_data = orig_section.data_range( + orig_sym.address as u32, + orig_sym.address as u32 + orig_sym.size as u32, + )?; + let linked_data = linked_section.data_range( + linked_sym.address as u32, + linked_sym.address as u32 + linked_sym.size as u32, + )?; if orig_data == linked_data { found = true; } @@ -771,7 +976,9 @@ fn diff(args: DiffArgs) -> Result<()> { orig_sym.size, orig_sym.address ); - for (_, linked_sym) in linked_obj.symbols.at_address(orig_sym.address as u32) { + for (_, linked_sym) in + linked_obj.symbols.at_section_address(linked_section_index, orig_sym.address as u32) + { log::error!( "At {:#010X}, found: {} (type {:?}, size {:#X})", linked_sym.address, @@ -794,32 +1001,30 @@ fn diff(args: DiffArgs) -> Result<()> { } // Data diff - for orig_sym in obj.symbols.iter() { - if orig_sym.kind == ObjSymbolKind::Section || orig_sym.section.is_none() { - continue; - } + for orig_sym in obj.symbols.iter().filter(|s| s.kind != ObjSymbolKind::Section) { + let Some(orig_section_index) = orig_sym.section else { continue }; + let orig_section = &obj.sections[orig_section_index]; + let (linked_section_index, linked_section) = + linked_obj.sections.at_address(orig_sym.address as u32)?; let (_, linked_sym) = linked_obj .symbols - .at_address(orig_sym.address as u32) + .at_section_address(linked_section_index, orig_sym.address as u32) .find(|(_, sym)| sym.name == orig_sym.name) .or_else(|| { linked_obj .symbols - .at_address(orig_sym.address as u32) + .at_section_address(linked_section_index, orig_sym.address as u32) .find(|(_, sym)| sym.kind == orig_sym.kind) }) .unwrap(); - let orig_data = obj - .section_data(orig_sym.address as u32, orig_sym.address as u32 + orig_sym.size as u32)? - .1; - let linked_data = linked_obj - .section_data( - linked_sym.address as u32, - linked_sym.address as u32 + linked_sym.size as u32, - )? - .1; + let orig_data = orig_section + .data_range(orig_sym.address as u32, orig_sym.address as u32 + orig_sym.size as u32)?; + let linked_data = linked_section.data_range( + linked_sym.address as u32, + linked_sym.address as u32 + linked_sym.size as u32, + )?; if orig_data != linked_data { log::error!( "Data mismatch for {} (type {:?}, size {:#X}) at {:#010X}", @@ -861,14 +1066,21 @@ fn apply(args: ApplyArgs) -> Result<()> { let mut replacements: Vec<(SymbolIndex, Option)> = vec![]; for (orig_idx, orig_sym) in obj.symbols.iter().enumerate() { + // skip ABS for now + if orig_sym.section.is_none() { + continue; + } + let (linked_section_index, _linked_section) = + linked_obj.sections.at_address(orig_sym.address as u32)?; + let linked_sym = linked_obj .symbols - .at_address(orig_sym.address as u32) + .at_section_address(linked_section_index, orig_sym.address as u32) .find(|(_, sym)| sym.name == orig_sym.name) .or_else(|| { linked_obj .symbols - .at_address(orig_sym.address as u32) + .at_section_address(linked_section_index, orig_sym.address as u32) .find(|(_, sym)| sym.kind == orig_sym.kind) }); if let Some((_, linked_sym)) = linked_sym { @@ -929,18 +1141,21 @@ fn apply(args: ApplyArgs) -> Result<()> { for linked_sym in linked_obj.symbols.iter() { if matches!(linked_sym.kind, ObjSymbolKind::Section) || is_linker_generated_object(&linked_sym.name) + // skip ABS for now + || linked_sym.section.is_none() { continue; } + let (orig_section_index, _orig_section) = + obj.sections.at_address(linked_sym.address as u32)?; let orig_sym = obj .symbols - .at_address(linked_sym.address as u32) + .at_section_address(orig_section_index, linked_sym.address as u32) .find(|(_, sym)| sym.name == linked_sym.name) .or_else(|| { - linked_obj - .symbols - .at_address(linked_sym.address as u32) + obj.symbols + .at_section_address(orig_section_index, linked_sym.address as u32) .find(|(_, sym)| sym.kind == linked_sym.kind) }); if orig_sym.is_none() { @@ -951,7 +1166,18 @@ fn apply(args: ApplyArgs) -> Result<()> { linked_sym.size, linked_sym.address ); - obj.symbols.add_direct(linked_sym.clone())?; + obj.symbols.add_direct(ObjSymbol { + name: linked_sym.name.clone(), + demangled_name: linked_sym.demangled_name.clone(), + address: linked_sym.address, + section: Some(orig_section_index), + size: linked_sym.size, + size_known: linked_sym.size_known, + flags: linked_sym.flags, + kind: linked_sym.kind, + align: linked_sym.align, + data_kind: linked_sym.data_kind, + })?; } } diff --git a/src/cmd/elf.rs b/src/cmd/elf.rs index b78b932..53e5e4d 100644 --- a/src/cmd/elf.rs +++ b/src/cmd/elf.rs @@ -16,16 +16,14 @@ use object::{ }; use crate::{ - obj::{ - signatures::{compare_signature, generate_signature, FunctionSignature}, - split::split_obj, - ObjKind, - }, + obj::ObjKind, util::{ asm::write_asm, config::{write_splits_file, write_symbols_file}, elf::{process_elf, write_elf}, file::{buf_writer, process_rsp}, + signatures::{compare_signature, generate_signature, FunctionSignature}, + split::split_obj, }, }; @@ -148,9 +146,6 @@ fn disasm(args: DisasmArgs) -> Result<()> { let out_path = asm_dir.join(file_name_from_unit(&unit.name, ".s")); log::info!("Writing {}", out_path.display()); - if let Some(parent) = out_path.parent() { - DirBuilder::new().recursive(true).create(parent)?; - } let mut w = buf_writer(out_path)?; write_asm(&mut w, split_obj)?; w.flush()?; @@ -160,9 +155,6 @@ fn disasm(args: DisasmArgs) -> Result<()> { files_out.flush()?; } ObjKind::Relocatable => { - if let Some(parent) = args.out.parent() { - DirBuilder::new().recursive(true).create(parent)?; - } let mut w = buf_writer(args.out)?; write_asm(&mut w, &obj)?; w.flush()?; diff --git a/src/cmd/rel.rs b/src/cmd/rel.rs index 25ad47d..3cf4fa2 100644 --- a/src/cmd/rel.rs +++ b/src/cmd/rel.rs @@ -107,25 +107,24 @@ fn merge(args: MergeArgs) -> Result<()> { let mut section_map: BTreeMap> = BTreeMap::new(); let mut offset = align32(arena_lo + 0x2000); for module in module_map.values() { - for mod_section in &module.sections { - let section_idx = obj.sections.len(); + for (mod_section_index, mod_section) in module.sections.iter() { ensure!(mod_section.relocations.is_empty(), "Unsupported relocations during merge"); - obj.sections.push(ObjSection { + let section_idx = obj.sections.push(ObjSection { name: format!("{}:{}", mod_section.name, module.module_id), kind: mod_section.kind, address: offset as u64, size: mod_section.size, data: mod_section.data.clone(), align: mod_section.align, - index: section_idx, elf_index: mod_section.elf_index, relocations: vec![], original_address: mod_section.original_address, file_offset: mod_section.file_offset, section_known: mod_section.section_known, + splits: mod_section.splits.clone(), }); section_map.nested_insert(module.module_id, mod_section.elf_index as u32, offset)?; - for (_, mod_symbol) in module.symbols.for_section(mod_section) { + for (_, mod_symbol) in module.symbols.for_section(mod_section_index) { obj.symbols.add_direct(ObjSymbol { name: mod_symbol.name.clone(), demangled_name: mod_symbol.demangled_name.clone(), @@ -157,8 +156,8 @@ fn merge(args: MergeArgs) -> Result<()> { })?; section_map[&(rel_reloc.target_section as u32)] + rel_reloc.addend }; - let source_section_index = obj.section_at(source_addr)?.index; - let target_section_index = obj.section_at(target_addr)?.index; + let (source_section_index, _) = obj.sections.at_address(source_addr)?; + let (target_section_index, _) = obj.sections.at_address(target_addr)?; let (symbol_idx, addend) = if let Some((symbol_idx, symbol)) = obj.symbols.for_relocation(target_addr, rel_reloc.kind)? @@ -185,6 +184,7 @@ fn merge(args: MergeArgs) -> Result<()> { address: source_addr as u64, target_symbol: symbol_idx, addend, + module: None, }); } } @@ -217,11 +217,11 @@ fn merge(args: MergeArgs) -> Result<()> { } fn link_relocations(obj: &mut ObjInfo) -> Result<()> { - for section in &mut obj.sections { + for (_, section) in obj.sections.iter_mut() { for reloc in §ion.relocations { let source_address = reloc.address /*& !3*/; let target_address = - (obj.symbols.address_of(reloc.target_symbol) as i64 + reloc.addend) as u32; + (obj.symbols[reloc.target_symbol].address as i64 + reloc.addend) as u32; let ins_ref = array_ref_mut!(section.data, (source_address - section.address) as usize, 4); let mut ins = u32::from_be_bytes(*ins_ref); diff --git a/src/obj/mod.rs b/src/obj/mod.rs index ce296de..7248f51 100644 --- a/src/obj/mod.rs +++ b/src/obj/mod.rs @@ -1,180 +1,23 @@ -pub mod signatures; -pub mod split; +mod sections; +mod splits; +mod symbols; use std::{ cmp::{max, min}, - collections::{btree_map, BTreeMap, BTreeSet, HashMap}, - hash::{Hash, Hasher}, - ops::{Range, RangeBounds}, + collections::{BTreeMap, BTreeSet}, + hash::Hash, }; use anyhow::{anyhow, bail, ensure, Result}; -use flagset::{flags, FlagSet}; -use itertools::Itertools; +pub use sections::{section_kind_for_section, ObjSection, ObjSectionKind, ObjSections}; use serde::{Deserialize, Serialize}; -use serde_repr::{Deserialize_repr, Serialize_repr}; - -use crate::{ - obj::split::is_linker_generated_label, - util::{comment::MWComment, nested::NestedVec, rel::RelReloc}, +pub use splits::{ObjSplit, ObjSplits}; +pub use symbols::{ + ObjDataKind, ObjSymbol, ObjSymbolFlagSet, ObjSymbolFlags, ObjSymbolKind, ObjSymbolScope, + ObjSymbols, SymbolIndex, }; -#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize, Default)] -pub enum ObjSymbolScope { - #[default] - Unknown, - Global, - Weak, - Local, -} - -flags! { - #[repr(u8)] - #[derive(Deserialize_repr, Serialize_repr)] - pub enum ObjSymbolFlags: u8 { - Global, - Local, - Weak, - Common, - Hidden, - ForceActive, - } -} - -#[derive(Debug, Copy, Clone, Default, Eq, PartialEq, Serialize, Deserialize)] -pub struct ObjSymbolFlagSet(pub FlagSet); - -impl ObjSymbolFlagSet { - #[inline] - pub fn scope(&self) -> ObjSymbolScope { - if self.is_local() { - ObjSymbolScope::Local - } else if self.is_weak() { - ObjSymbolScope::Weak - } else if self.0.contains(ObjSymbolFlags::Global) { - ObjSymbolScope::Global - } else { - ObjSymbolScope::Unknown - } - } - - #[inline] - pub fn is_local(&self) -> bool { self.0.contains(ObjSymbolFlags::Local) } - - #[inline] - pub fn is_global(&self) -> bool { !self.is_local() } - - #[inline] - pub fn is_common(&self) -> bool { self.0.contains(ObjSymbolFlags::Common) } - - #[inline] - pub fn is_weak(&self) -> bool { self.0.contains(ObjSymbolFlags::Weak) } - - #[inline] - pub fn is_hidden(&self) -> bool { self.0.contains(ObjSymbolFlags::Hidden) } - - #[inline] - pub fn is_force_active(&self) -> bool { self.0.contains(ObjSymbolFlags::ForceActive) } - - #[inline] - pub fn set_scope(&mut self, scope: ObjSymbolScope) { - match scope { - ObjSymbolScope::Unknown => { - self.0 &= !(ObjSymbolFlags::Local | ObjSymbolFlags::Global | ObjSymbolFlags::Weak) - } - ObjSymbolScope::Global => { - self.0 = (self.0 & !(ObjSymbolFlags::Local | ObjSymbolFlags::Weak)) - | ObjSymbolFlags::Global - } - ObjSymbolScope::Weak => { - self.0 = (self.0 & !(ObjSymbolFlags::Local | ObjSymbolFlags::Global)) - | ObjSymbolFlags::Weak - } - ObjSymbolScope::Local => { - self.0 = (self.0 & !(ObjSymbolFlags::Global | ObjSymbolFlags::Weak)) - | ObjSymbolFlags::Local - } - } - } - - #[inline] - pub fn set_force_active(&mut self, value: bool) { - if value { - self.0 |= ObjSymbolFlags::ForceActive; - } else { - self.0 &= !ObjSymbolFlags::ForceActive; - } - } -} - -#[allow(clippy::derived_hash_with_manual_eq)] -impl Hash for ObjSymbolFlagSet { - fn hash(&self, state: &mut H) { self.0.bits().hash(state) } -} - -#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] -pub enum ObjSectionKind { - Code, - Data, - ReadOnlyData, - Bss, -} - -#[derive(Debug, Clone)] -pub struct ObjSection { - pub name: String, - pub kind: ObjSectionKind, - pub address: u64, - pub size: u64, - pub data: Vec, - pub align: u64, - pub index: usize, - /// REL files reference the original ELF section indices - pub elf_index: usize, - pub relocations: Vec, - pub original_address: u64, - pub file_offset: u64, - pub section_known: bool, -} - -#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Default, Serialize, Deserialize)] -pub enum ObjSymbolKind { - #[default] - Unknown, - Function, - Object, - Section, -} - -#[derive(Debug, Copy, Clone, Default, PartialEq, Eq)] -pub enum ObjDataKind { - #[default] - Unknown, - Byte, - Byte2, - Byte4, - Byte8, - Float, - Double, - String, - String16, - StringTable, - String16Table, -} - -#[derive(Debug, Clone, Default, Eq, PartialEq)] -pub struct ObjSymbol { - pub name: String, - pub demangled_name: Option, - pub address: u64, - pub section: Option, - pub size: u64, - pub size_known: bool, - pub flags: ObjSymbolFlagSet, - pub kind: ObjSymbolKind, - pub align: Option, - pub data_kind: ObjDataKind, -} +use crate::util::{comment::MWComment, rel::RelReloc}; #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] pub enum ObjKind { @@ -199,34 +42,13 @@ pub struct ObjUnit { pub comment_version: Option, } -/// Marks a split point within a section. -#[derive(Debug, Clone, Eq, PartialEq)] -pub struct ObjSplit { - pub unit: String, - pub end: u32, - pub align: Option, - /// Whether this is a part of common BSS. - pub common: bool, - /// Generated, replaceable by user. - pub autogenerated: bool, -} - -pub type SymbolIndex = usize; - -#[derive(Debug, Clone)] -pub struct ObjSymbols { - symbols: Vec, - symbols_by_address: BTreeMap>, - symbols_by_name: HashMap>, -} - #[derive(Debug, Clone)] pub struct ObjInfo { pub kind: ObjKind, pub architecture: ObjArchitecture, pub name: String, pub symbols: ObjSymbols, - pub sections: Vec, + pub sections: ObjSections, pub entry: u64, pub mw_comment: Option, @@ -240,7 +62,6 @@ pub struct ObjInfo { pub arena_hi: Option, // Extracted - pub splits: BTreeMap>, pub named_sections: BTreeMap, pub link_order: Vec, pub blocked_ranges: BTreeMap, // start -> end @@ -271,312 +92,8 @@ pub struct ObjReloc { pub address: u64, pub target_symbol: SymbolIndex, pub addend: i64, -} - -impl ObjSymbols { - pub fn new(symbols: Vec) -> Self { - let mut symbols_by_address = BTreeMap::>::new(); - let mut symbols_by_name = HashMap::>::new(); - for (idx, symbol) in symbols.iter().enumerate() { - symbols_by_address.nested_push(symbol.address as u32, idx); - if !symbol.name.is_empty() { - symbols_by_name.nested_push(symbol.name.clone(), idx); - } - } - Self { symbols, symbols_by_address, symbols_by_name } - } - - pub fn add(&mut self, in_symbol: ObjSymbol, replace: bool) -> Result { - let opt = self.at_address(in_symbol.address as u32).find(|(_, symbol)| { - (symbol.kind == in_symbol.kind || - // Replace lbl_* with real symbols - (symbol.kind == ObjSymbolKind::Unknown && symbol.name.starts_with("lbl_"))) - // Hack to avoid replacing different ABS symbols - && (symbol.section.is_some() || symbol.name == in_symbol.name) - // Avoid replacing symbols with ABS symbols, and vice versa - && (symbol.section == in_symbol.section) - }); - let target_symbol_idx = if let Some((symbol_idx, existing)) = opt { - let size = - if existing.size_known && in_symbol.size_known && existing.size != in_symbol.size { - log::warn!( - "Conflicting size for {}: was {:#X}, now {:#X}", - existing.name, - existing.size, - in_symbol.size - ); - if replace { - in_symbol.size - } else { - existing.size - } - } else if in_symbol.size_known { - in_symbol.size - } else { - existing.size - }; - if !replace { - // Not replacing existing symbol, but update size - if in_symbol.size_known && !existing.size_known { - self.replace(symbol_idx, ObjSymbol { - size: in_symbol.size, - size_known: true, - ..existing.clone() - })?; - } - return Ok(symbol_idx); - } - let new_symbol = ObjSymbol { - name: in_symbol.name, - demangled_name: in_symbol.demangled_name, - address: in_symbol.address, - section: in_symbol.section, - size, - size_known: existing.size_known || in_symbol.size != 0, - flags: in_symbol.flags, - kind: in_symbol.kind, - align: in_symbol.align.or(existing.align), - data_kind: match in_symbol.data_kind { - ObjDataKind::Unknown => existing.data_kind, - kind => kind, - }, - }; - if existing != &new_symbol { - log::debug!("Replacing {:?} with {:?}", existing, new_symbol); - self.replace(symbol_idx, new_symbol)?; - } - symbol_idx - } else { - let target_symbol_idx = self.symbols.len(); - self.add_direct(ObjSymbol { - name: in_symbol.name, - demangled_name: in_symbol.demangled_name, - address: in_symbol.address, - section: in_symbol.section, - size: in_symbol.size, - size_known: in_symbol.size != 0, - flags: in_symbol.flags, - kind: in_symbol.kind, - align: in_symbol.align, - data_kind: in_symbol.data_kind, - })?; - target_symbol_idx - }; - Ok(target_symbol_idx) - } - - pub fn add_direct(&mut self, in_symbol: ObjSymbol) -> Result { - let symbol_idx = self.symbols.len(); - self.symbols_by_address.nested_push(in_symbol.address as u32, symbol_idx); - if !in_symbol.name.is_empty() { - self.symbols_by_name.nested_push(in_symbol.name.clone(), symbol_idx); - } - self.symbols.push(in_symbol); - Ok(symbol_idx) - } - - pub fn at(&self, symbol_idx: SymbolIndex) -> &ObjSymbol { &self.symbols[symbol_idx] } - - pub fn address_of(&self, symbol_idx: SymbolIndex) -> u64 { self.symbols[symbol_idx].address } - - pub fn iter(&self) -> impl DoubleEndedIterator { self.symbols.iter() } - - pub fn count(&self) -> usize { self.symbols.len() } - - pub fn at_address( - &self, - addr: u32, - ) -> impl DoubleEndedIterator { - self.symbols_by_address - .get(&addr) - .into_iter() - .flatten() - .map(move |&idx| (idx, &self.symbols[idx])) - } - - pub fn kind_at_address( - &self, - addr: u32, - kind: ObjSymbolKind, - ) -> Result> { - let (count, result) = self - .at_address(addr) - .filter(|(_, sym)| sym.kind == kind) - .fold((0, None), |(i, _), v| (i + 1, Some(v))); - ensure!(count <= 1, "Multiple symbols of kind {:?} at address {:#010X}", kind, addr); - Ok(result) - } - - // Iterate over all in address ascending order, including ABS symbols - pub fn iter_ordered(&self) -> impl DoubleEndedIterator { - self.symbols_by_address - .iter() - .flat_map(move |(_, v)| v.iter().map(move |u| (*u, &self.symbols[*u]))) - } - - // Iterate over range in address ascending order, excluding ABS symbols - pub fn for_range( - &self, - range: R, - ) -> impl DoubleEndedIterator - where - R: RangeBounds, - { - self.symbols_by_address - .range(range) - .flat_map(move |(_, v)| v.iter().map(move |u| (*u, &self.symbols[*u]))) - // Ignore ABS symbols - .filter(move |(_, sym)| sym.section.is_some() || sym.flags.is_common()) - } - - pub fn indexes_for_range( - &self, - range: R, - ) -> impl DoubleEndedIterator - where - R: RangeBounds, - { - self.symbols_by_address.range(range).map(|(k, v)| (*k, v.as_ref())) - } - - pub fn for_section( - &self, - section: &ObjSection, - ) -> impl DoubleEndedIterator { - let section_index = section.index; - self.for_range(section.address as u32..(section.address + section.size) as u32) - .filter(move |(_, symbol)| symbol.section == Some(section_index)) - } - - pub fn for_name( - &self, - name: &str, - ) -> impl DoubleEndedIterator { - self.symbols_by_name - .get(name) - .into_iter() - .flat_map(move |v| v.iter().map(move |u| (*u, &self.symbols[*u]))) - } - - pub fn by_name(&self, name: &str) -> Result> { - let mut iter = self.for_name(name); - let result = iter.next(); - if let Some((index, symbol)) = result { - if let Some((other_index, other_symbol)) = iter.next() { - bail!( - "Multiple symbols with name {}: {} {:?} {:#010X} and {} {:?} {:#010X}", - name, - index, - symbol.kind, - symbol.address, - other_index, - other_symbol.kind, - other_symbol.address - ); - } - } - Ok(result) - } - - pub fn by_kind( - &self, - kind: ObjSymbolKind, - ) -> impl DoubleEndedIterator { - self.symbols.iter().enumerate().filter(move |(_, sym)| sym.kind == kind) - } - - pub fn replace(&mut self, index: SymbolIndex, symbol: ObjSymbol) -> Result<()> { - let symbol_ref = &mut self.symbols[index]; - ensure!(symbol_ref.address == symbol.address, "Can't modify address with replace_symbol"); - if symbol_ref.name != symbol.name { - if !symbol_ref.name.is_empty() { - self.symbols_by_name.nested_remove(&symbol_ref.name, &index); - } - if !symbol.name.is_empty() { - self.symbols_by_name.nested_push(symbol.name.clone(), index); - } - } - *symbol_ref = symbol; - Ok(()) - } - - // Try to find a previous sized symbol that encompasses the target - pub fn for_relocation( - &self, - target_addr: u32, - reloc_kind: ObjRelocKind, - ) -> Result> { - let mut result = None; - for (_addr, symbol_idxs) in self.indexes_for_range(..=target_addr).rev() { - let mut symbols = symbol_idxs - .iter() - .map(|&idx| (idx, self.at(idx))) - .filter(|(_, sym)| { - // Linker generated labels can only be used with @ha/@h/@l relocations - !is_linker_generated_label(&sym.name) - || (matches!( - reloc_kind, - ObjRelocKind::PpcAddr16Ha - | ObjRelocKind::PpcAddr16Hi - | ObjRelocKind::PpcAddr16Lo - )) - }) - .collect_vec(); - let (symbol_idx, symbol) = if symbols.len() == 1 { - symbols.pop().unwrap() - } else { - symbols.sort_by_key(|&(_, symbol)| { - let mut rank = match symbol.kind { - ObjSymbolKind::Function | ObjSymbolKind::Object => match reloc_kind { - ObjRelocKind::PpcAddr16Hi - | ObjRelocKind::PpcAddr16Ha - | ObjRelocKind::PpcAddr16Lo => 1, - ObjRelocKind::Absolute - | ObjRelocKind::PpcRel24 - | ObjRelocKind::PpcRel14 - | ObjRelocKind::PpcEmbSda21 => 2, - }, - // Label - ObjSymbolKind::Unknown => match reloc_kind { - ObjRelocKind::PpcAddr16Hi - | ObjRelocKind::PpcAddr16Ha - | ObjRelocKind::PpcAddr16Lo - if !symbol.name.starts_with("..") => - { - 3 - } - _ => 1, - }, - ObjSymbolKind::Section => -1, - }; - if symbol.size > 0 { - rank += 1; - } - -rank - }); - match symbols.first() { - Some(&v) => v, - None => continue, - } - }; - if symbol.address == target_addr as u64 { - result = Some((symbol_idx, symbol)); - break; - } - if symbol.size > 0 { - if symbol.address + symbol.size > target_addr as u64 { - result = Some((symbol_idx, symbol)); - } - break; - } - } - Ok(result) - } - - #[inline] - pub fn flags(&mut self, idx: SymbolIndex) -> &mut ObjSymbolFlagSet { - &mut self.symbols[idx].flags - } + /// If present, relocation against external module + pub module: Option, } impl ObjInfo { @@ -591,8 +108,8 @@ impl ObjInfo { kind, architecture, name, - symbols: ObjSymbols::new(symbols), - sections, + symbols: ObjSymbols::new(kind, symbols), + sections: ObjSections::new(kind, sections), entry: 0, mw_comment: Default::default(), sda2_base: None, @@ -602,7 +119,7 @@ impl ObjInfo { db_stack_addr: None, arena_lo: None, arena_hi: None, - splits: Default::default(), + // splits: Default::default(), named_sections: Default::default(), link_order: vec![], blocked_ranges: Default::default(), @@ -626,86 +143,11 @@ impl ObjInfo { self.symbols.add(in_symbol, replace) } - pub fn section_at(&self, addr: u32) -> Result<&ObjSection> { - self.sections - .iter() - .find(|s| s.contains(addr)) - .ok_or_else(|| anyhow!("Failed to locate section @ {:#010X}", addr)) - } - - pub fn section_for(&self, range: Range) -> Result<&ObjSection> { - self.sections.iter().find(|s| s.contains_range(range.clone())).ok_or_else(|| { - anyhow!("Failed to locate section @ {:#010X}-{:#010X}", range.start, range.end) - }) - } - - pub fn section_data(&self, start: u32, end: u32) -> Result<(&ObjSection, &[u8])> { - let section = self.section_at(start)?; - ensure!( - section.contains_range(start..end), - "Range {:#010X}-{:#010X} outside of section {}: {:#010X}-{:#010X}", - start, - end, - section.name, - section.address, - section.address + section.size - ); - if section.kind == ObjSectionKind::Bss { - return Ok((section, &[])); - } - let data = if end == 0 { - §ion.data[(start as u64 - section.address) as usize..] - } else { - §ion.data[(start as u64 - section.address) as usize - ..min(section.data.len(), (end as u64 - section.address) as usize)] - }; - Ok((section, data)) - } - - /// Locate an existing split for the given address. - pub fn split_for(&self, address: u32) -> Option<(u32, &ObjSplit)> { - match self.splits_for_range(..=address).next_back() { - Some((addr, split)) if split.end == 0 || split.end > address => Some((addr, split)), - _ => None, - } - } - - /// Locate existing splits within the given address range. - pub fn splits_for_range( - &self, - range: R, - ) -> impl DoubleEndedIterator - where - R: RangeBounds, - { - self.splits.range(range).flat_map(|(addr, v)| v.iter().map(move |u| (*addr, u))) - } - - pub fn split_for_unit( - &self, - unit: &str, - section: &ObjSection, - ) -> Result> { - let mut result = None::<(u32, &ObjSplit)>; - for (addr, split) in self - .splits_for_range(section.address as u32..(section.address + section.size) as u32) - .filter(|(_, split)| split.unit == unit) - { - ensure!( - result.is_none(), - "Multiple splits for unit {} in section {}: {:#010X}, {:#010X}", - unit, - section.name, - result.unwrap().0, - addr - ); - result = Some((addr, split)); - } - Ok(result) - } - - pub fn add_split(&mut self, address: u32, split: ObjSplit) -> Result<()> { - let section = self.section_at(address)?; + pub fn add_split(&mut self, section_index: usize, address: u32, split: ObjSplit) -> Result<()> { + let section = self + .sections + .get_mut(section_index) + .ok_or_else(|| anyhow!("Invalid section index {}", section_index))?; let section_start = section.address as u32; let section_end = (section.address + section.size) as u32; ensure!( @@ -719,7 +161,7 @@ impl ObjInfo { section_end ); - if let Some((existing_addr, existing_split)) = self.split_for_unit(&split.unit, section)? { + if let Some((existing_addr, existing_split)) = section.splits.for_unit(&split.unit)? { let new_start = min(existing_addr, address); let new_end = max(existing_split.end, split.end); @@ -788,7 +230,7 @@ impl ObjInfo { // Check if new split overlaps any existing splits let mut to_remove = BTreeSet::new(); let mut to_rename = BTreeSet::new(); - for (existing_addr, existing_split) in self.splits_for_range(new_start..new_end) { + for (existing_addr, existing_split) in section.splits.for_range(new_start..new_end) { // TODO the logic in this method should be reworked, this is a hack if split.autogenerated && !existing_split.autogenerated { log::debug!( @@ -827,15 +269,15 @@ impl ObjInfo { // Remove overlapping splits for addr in to_remove { - self.splits.remove(&addr); + section.splits.remove(addr); } // Rename any units that were overwritten // TODO this should also merge with existing splits for unit in to_rename { for (existing_addr, existing) in self - .splits + .sections .iter_mut() - .flat_map(|(addr, v)| v.iter_mut().map(move |u| (addr, u))) + .flat_map(|(_, section)| section.splits.iter_mut()) .filter(|(_, split)| split.unit == unit) { log::debug!( @@ -848,7 +290,7 @@ impl ObjInfo { existing.unit = split.unit.clone(); } } - self.add_split(new_start, ObjSplit { + self.add_split(section_index, new_start, ObjSplit { unit: split.unit, end: new_end, align: new_align, @@ -859,72 +301,14 @@ impl ObjInfo { } log::debug!("Adding split @ {} {:#010X}: {:?}", section.name, address, split); - self.splits.entry(address).or_default().push(split); + section.splits.push(address, split); Ok(()) } pub fn is_unit_autogenerated(&self, unit: &str) -> bool { - self.splits_for_range(..) - .filter(|(_, split)| split.unit == unit) - .all(|(_, split)| split.autogenerated) + self.sections + .all_splits() + .filter(|(_, _, _, split)| split.unit == unit) + .all(|(_, _, _, split)| split.autogenerated) } } - -impl ObjSection { - pub fn build_relocation_map(&self) -> Result> { - let mut relocations = BTreeMap::new(); - for (idx, reloc) in self.relocations.iter().enumerate() { - let address = reloc.address as u32; - match relocations.entry(address) { - btree_map::Entry::Vacant(e) => { - e.insert(idx); - } - btree_map::Entry::Occupied(_) => bail!("Duplicate relocation @ {address:#010X}"), - } - } - Ok(relocations) - } - - pub fn build_relocation_map_cloned(&self) -> Result> { - let mut relocations = BTreeMap::new(); - for reloc in self.relocations.iter().cloned() { - let address = reloc.address as u32; - match relocations.entry(address) { - btree_map::Entry::Vacant(e) => { - e.insert(reloc); - } - btree_map::Entry::Occupied(_) => bail!("Duplicate relocation @ {address:#010X}"), - } - } - Ok(relocations) - } - - #[inline] - pub fn contains(&self, addr: u32) -> bool { - (self.address..self.address + self.size).contains(&(addr as u64)) - } - - #[inline] - pub fn contains_range(&self, range: Range) -> bool { - (range.start as u64) >= self.address && (range.end as u64) <= self.address + self.size - } - - pub fn rename(&mut self, name: String) -> Result<()> { - self.kind = section_kind_for_section(&name)?; - self.name = name; - self.section_known = true; - Ok(()) - } -} - -pub fn section_kind_for_section(section_name: &str) -> Result { - Ok(match section_name { - ".init" | ".text" | ".dbgtext" | ".vmtext" => ObjSectionKind::Code, - ".ctors" | ".dtors" | ".rodata" | ".sdata2" | "extab" | "extabindex" => { - ObjSectionKind::ReadOnlyData - } - ".bss" | ".sbss" | ".sbss2" => ObjSectionKind::Bss, - ".data" | ".sdata" => ObjSectionKind::Data, - name => bail!("Unknown section {name}"), - }) -} diff --git a/src/obj/sections.rs b/src/obj/sections.rs new file mode 100644 index 0000000..ad7f993 --- /dev/null +++ b/src/obj/sections.rs @@ -0,0 +1,247 @@ +use std::{ + cmp::min, + collections::{btree_map, BTreeMap, Bound}, + ops::{Index, IndexMut, Range, RangeBounds}, +}; + +use anyhow::{anyhow, bail, ensure, Result}; +use itertools::Itertools; + +use crate::obj::{ObjKind, ObjReloc, ObjSplit, ObjSplits, ObjSymbol}; + +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] +pub enum ObjSectionKind { + Code, + Data, + ReadOnlyData, + Bss, +} + +#[derive(Debug, Clone)] +pub struct ObjSection { + pub name: String, + pub kind: ObjSectionKind, + pub address: u64, + pub size: u64, + pub data: Vec, + pub align: u64, + /// REL files reference the original ELF section indices + pub elf_index: usize, + pub relocations: Vec, + pub original_address: u64, + pub file_offset: u64, + pub section_known: bool, + pub splits: ObjSplits, +} + +#[derive(Debug, Clone)] +pub struct ObjSections { + obj_kind: ObjKind, + sections: Vec, +} + +impl ObjSections { + pub fn new(obj_kind: ObjKind, sections: Vec) -> Self { Self { obj_kind, sections } } + + pub fn iter(&self) -> impl DoubleEndedIterator { + self.sections.iter().enumerate() + } + + pub fn iter_mut(&mut self) -> impl DoubleEndedIterator { + self.sections.iter_mut().enumerate() + } + + pub fn count(&self) -> usize { self.sections.len() } + + pub fn next_section_index(&self) -> usize { self.sections.len() } + + pub fn get(&self, index: usize) -> Option<&ObjSection> { self.sections.get(index) } + + pub fn get_mut(&mut self, index: usize) -> Option<&mut ObjSection> { + self.sections.get_mut(index) + } + + pub fn get_elf_index(&self, elf_index: usize) -> Option<(usize, &ObjSection)> { + self.iter().find(|&(_, s)| s.elf_index == elf_index) + } + + pub fn get_elf_index_mut(&mut self, elf_index: usize) -> Option<(usize, &mut ObjSection)> { + self.iter_mut().find(|(_, s)| s.elf_index == elf_index) + } + + pub fn at_address(&self, addr: u32) -> Result<(usize, &ObjSection)> { + ensure!( + self.obj_kind == ObjKind::Executable, + "Use of ObjSections::at_address in relocatable object" + ); + self.iter() + .find(|&(_, s)| s.contains(addr)) + .ok_or_else(|| anyhow!("Failed to locate section @ {:#010X}", addr)) + } + + pub fn at_address_mut(&mut self, addr: u32) -> Result<(usize, &mut ObjSection)> { + ensure!( + self.obj_kind == ObjKind::Executable, + "Use of ObjSections::at_address_mut in relocatable object" + ); + self.iter_mut() + .find(|(_, s)| s.contains(addr)) + .ok_or_else(|| anyhow!("Failed to locate section @ {:#010X}", addr)) + } + + pub fn with_range(&self, range: Range) -> Result<(usize, &ObjSection)> { + ensure!( + self.obj_kind == ObjKind::Executable, + "Use of ObjSections::with_range in relocatable object" + ); + self.iter().find(|&(_, s)| s.contains_range(range.clone())).ok_or_else(|| { + anyhow!("Failed to locate section @ {:#010X}-{:#010X}", range.start, range.end) + }) + } + + pub fn by_kind( + &self, + kind: ObjSectionKind, + ) -> impl DoubleEndedIterator { + self.iter().filter(move |(_, s)| s.kind == kind) + } + + pub fn by_name(&self, name: &str) -> Result> { + self.iter() + .filter(move |(_, s)| s.name == name) + .at_most_one() + .map_err(|_| anyhow!("Multiple sections with name {}", name)) + } + + pub fn push(&mut self, section: ObjSection) -> usize { + let index = self.sections.len(); + self.sections.push(section); + index + } + + pub fn all_splits( + &self, + ) -> impl DoubleEndedIterator { + self.iter() + .flat_map(|(idx, s)| s.splits.iter().map(move |(addr, split)| (idx, s, addr, split))) + } +} + +impl Index for ObjSections { + type Output = ObjSection; + + fn index(&self, index: usize) -> &Self::Output { &self.sections[index] } +} + +impl IndexMut for ObjSections { + fn index_mut(&mut self, index: usize) -> &mut Self::Output { &mut self.sections[index] } +} + +impl ObjSection { + pub fn data_range(&self, start: u32, end: u32) -> Result<&[u8]> { + if end == 0 { + ensure!( + self.contains(start), + "Address {:#010X} outside of section {}: {:#010X}-{:#010X}", + start, + self.name, + self.address, + self.address + self.size + ); + } else { + ensure!( + self.contains_range(start..end), + "Range {:#010X}-{:#010X} outside of section {}: {:#010X}-{:#010X}", + start, + end, + self.name, + self.address, + self.address + self.size + ); + } + if self.kind == ObjSectionKind::Bss { + return Ok(&[]); + } + let start = (start as u64 - self.address) as usize; + Ok(if end == 0 { + &self.data[start..] + } else { + &self.data[start..min(self.data.len(), (end as u64 - self.address) as usize)] + }) + } + + #[inline] + pub fn symbol_data(&self, symbol: &ObjSymbol) -> Result<&[u8]> { + self.data_range(symbol.address as u32, symbol.address as u32 + symbol.size as u32) + } + + pub fn build_relocation_map(&self) -> Result> { + let mut relocations = BTreeMap::new(); + for (idx, reloc) in self.relocations.iter().enumerate() { + let address = reloc.address as u32; + match relocations.entry(address) { + btree_map::Entry::Vacant(e) => { + e.insert(idx); + } + btree_map::Entry::Occupied(_) => bail!("Duplicate relocation @ {address:#010X}"), + } + } + Ok(relocations) + } + + pub fn build_relocation_map_cloned(&self) -> Result> { + let mut relocations = BTreeMap::new(); + for reloc in self.relocations.iter().cloned() { + let address = reloc.address as u32; + match relocations.entry(address) { + btree_map::Entry::Vacant(e) => { + e.insert(reloc); + } + btree_map::Entry::Occupied(_) => bail!("Duplicate relocation @ {address:#010X}"), + } + } + Ok(relocations) + } + + #[inline] + pub fn contains(&self, addr: u32) -> bool { + (self.address..self.address + self.size).contains(&(addr as u64)) + } + + #[inline] + pub fn contains_range(&self, range: R) -> bool + where R: RangeBounds { + let start = self.address as u32; + let end = self.address as u32 + self.size as u32; + let start_in_range = match range.start_bound() { + Bound::Included(&n) => n >= start && n < end, + Bound::Excluded(&n) => n > start && n < end, + Bound::Unbounded => true, + }; + let end_in_range = match range.end_bound() { + Bound::Included(&n) => n > start && n < end, + Bound::Excluded(&n) => n > start && n <= end, + Bound::Unbounded => true, + }; + start_in_range && end_in_range + } + + pub fn rename(&mut self, name: String) -> Result<()> { + self.kind = section_kind_for_section(&name)?; + self.name = name; + self.section_known = true; + Ok(()) + } +} + +pub fn section_kind_for_section(section_name: &str) -> Result { + Ok(match section_name { + ".init" | ".text" | ".dbgtext" | ".vmtext" => ObjSectionKind::Code, + ".ctors" | ".dtors" | ".rodata" | ".sdata2" | "extab" | "extabindex" => { + ObjSectionKind::ReadOnlyData + } + ".bss" | ".sbss" | ".sbss2" => ObjSectionKind::Bss, + ".data" | ".sdata" => ObjSectionKind::Data, + name => bail!("Unknown section {name}"), + }) +} diff --git a/src/obj/splits.rs b/src/obj/splits.rs new file mode 100644 index 0000000..88e61d9 --- /dev/null +++ b/src/obj/splits.rs @@ -0,0 +1,76 @@ +use std::{collections::BTreeMap, ops::RangeBounds}; + +use anyhow::{anyhow, Result}; +use itertools::Itertools; + +use crate::util::nested::NestedVec; + +/// Marks a split point within a section. +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct ObjSplit { + pub unit: String, + pub end: u32, + pub align: Option, + /// Whether this is a part of common BSS. + pub common: bool, + /// Generated, replaceable by user. + pub autogenerated: bool, +} + +/// Splits within a section. +#[derive(Debug, Clone, Default)] +pub struct ObjSplits { + splits: BTreeMap>, +} + +impl ObjSplits { + pub fn iter(&self) -> impl DoubleEndedIterator { + self.splits.iter().flat_map(|(addr, v)| v.iter().map(move |u| (*addr, u))) + } + + pub fn iter_mut(&mut self) -> impl DoubleEndedIterator { + self.splits.iter_mut().flat_map(|(addr, v)| v.iter_mut().map(move |u| (*addr, u))) + } + + pub fn has_split_at(&self, address: u32) -> bool { self.splits.contains_key(&address) } + + /// Locate an existing split for the given address. + pub fn for_address(&self, address: u32) -> Option<(u32, &ObjSplit)> { + match self.for_range(..=address).next_back() { + Some((addr, split)) if split.end == 0 || split.end > address => Some((addr, split)), + _ => None, + } + } + + /// Locate existing splits within the given address range. + pub fn for_range(&self, range: R) -> impl DoubleEndedIterator + where R: RangeBounds { + self.splits.range(range).flat_map(|(addr, v)| v.iter().map(move |u| (*addr, u))) + } + + /// Locate existing splits within the given address range. + pub fn for_range_mut( + &mut self, + range: R, + ) -> impl DoubleEndedIterator + where + R: RangeBounds, + { + self.splits.range_mut(range).flat_map(|(addr, v)| v.iter_mut().map(move |u| (*addr, u))) + } + + pub fn for_unit(&self, unit: &str) -> Result> { + self.splits + .iter() + .flat_map(|(addr, v)| v.iter().map(move |u| (*addr, u))) + .filter(|&(_, split)| split.unit == unit) + .at_most_one() + .map_err(|_| anyhow!("Multiple splits for unit {}", unit)) + } + + pub fn push(&mut self, address: u32, split: ObjSplit) { + self.splits.nested_push(address, split); + } + + pub fn remove(&mut self, address: u32) -> Option> { self.splits.remove(&address) } +} diff --git a/src/obj/symbols.rs b/src/obj/symbols.rs new file mode 100644 index 0000000..c1bb1bd --- /dev/null +++ b/src/obj/symbols.rs @@ -0,0 +1,536 @@ +use std::{ + collections::{BTreeMap, HashMap}, + hash::{Hash, Hasher}, + ops::{Index, RangeBounds}, +}; + +use anyhow::{anyhow, bail, ensure, Result}; +use flagset::{flags, FlagSet}; +use itertools::Itertools; +use serde::{Deserialize, Serialize}; +use serde_repr::{Deserialize_repr, Serialize_repr}; + +use crate::{ + obj::{ObjKind, ObjRelocKind}, + util::{config::is_auto_symbol, nested::NestedVec, split::is_linker_generated_label}, +}; + +#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize, Default)] +pub enum ObjSymbolScope { + #[default] + Unknown, + Global, + Weak, + Local, +} + +flags! { + #[repr(u8)] + #[derive(Deserialize_repr, Serialize_repr)] + pub enum ObjSymbolFlags: u8 { + Global, + Local, + Weak, + Common, + Hidden, + ForceActive, + } +} + +#[derive(Debug, Copy, Clone, Default, Eq, PartialEq, Serialize, Deserialize)] +pub struct ObjSymbolFlagSet(pub FlagSet); + +impl ObjSymbolFlagSet { + #[inline] + pub fn scope(&self) -> ObjSymbolScope { + if self.is_local() { + ObjSymbolScope::Local + } else if self.is_weak() { + ObjSymbolScope::Weak + } else if self.0.contains(ObjSymbolFlags::Global) { + ObjSymbolScope::Global + } else { + ObjSymbolScope::Unknown + } + } + + #[inline] + pub fn is_local(&self) -> bool { self.0.contains(ObjSymbolFlags::Local) } + + #[inline] + pub fn is_global(&self) -> bool { !self.is_local() } + + #[inline] + pub fn is_common(&self) -> bool { self.0.contains(ObjSymbolFlags::Common) } + + #[inline] + pub fn is_weak(&self) -> bool { self.0.contains(ObjSymbolFlags::Weak) } + + #[inline] + pub fn is_hidden(&self) -> bool { self.0.contains(ObjSymbolFlags::Hidden) } + + #[inline] + pub fn is_force_active(&self) -> bool { self.0.contains(ObjSymbolFlags::ForceActive) } + + #[inline] + pub fn set_scope(&mut self, scope: ObjSymbolScope) { + match scope { + ObjSymbolScope::Unknown => { + self.0 &= !(ObjSymbolFlags::Local | ObjSymbolFlags::Global | ObjSymbolFlags::Weak) + } + ObjSymbolScope::Global => { + self.0 = (self.0 & !(ObjSymbolFlags::Local | ObjSymbolFlags::Weak)) + | ObjSymbolFlags::Global + } + ObjSymbolScope::Weak => { + self.0 = (self.0 & !(ObjSymbolFlags::Local | ObjSymbolFlags::Global)) + | ObjSymbolFlags::Weak + } + ObjSymbolScope::Local => { + self.0 = (self.0 & !(ObjSymbolFlags::Global | ObjSymbolFlags::Weak)) + | ObjSymbolFlags::Local + } + } + } + + #[inline] + pub fn set_force_active(&mut self, value: bool) { + if value { + self.0 |= ObjSymbolFlags::ForceActive; + } else { + self.0 &= !ObjSymbolFlags::ForceActive; + } + } +} + +#[allow(clippy::derived_hash_with_manual_eq)] +impl Hash for ObjSymbolFlagSet { + fn hash(&self, state: &mut H) { self.0.bits().hash(state) } +} + +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Default, Serialize, Deserialize)] +pub enum ObjSymbolKind { + #[default] + Unknown, + Function, + Object, + Section, +} + +#[derive(Debug, Copy, Clone, Default, PartialEq, Eq)] +pub enum ObjDataKind { + #[default] + Unknown, + Byte, + Byte2, + Byte4, + Byte8, + Float, + Double, + String, + String16, + StringTable, + String16Table, +} + +#[derive(Debug, Clone, Default, Eq, PartialEq)] +pub struct ObjSymbol { + pub name: String, + pub demangled_name: Option, + pub address: u64, + pub section: Option, + pub size: u64, + pub size_known: bool, + pub flags: ObjSymbolFlagSet, + pub kind: ObjSymbolKind, + pub align: Option, + pub data_kind: ObjDataKind, +} + +pub type SymbolIndex = usize; + +#[derive(Debug, Clone)] +pub struct ObjSymbols { + obj_kind: ObjKind, + symbols: Vec, + symbols_by_address: BTreeMap>, + symbols_by_name: HashMap>, + symbols_by_section: Vec>>, +} + +impl ObjSymbols { + pub fn new(obj_kind: ObjKind, symbols: Vec) -> Self { + let mut symbols_by_address = BTreeMap::>::new(); + let mut symbols_by_section: Vec>> = vec![]; + let mut symbols_by_name = HashMap::>::new(); + for (idx, symbol) in symbols.iter().enumerate() { + if obj_kind == ObjKind::Executable { + symbols_by_address.nested_push(symbol.address as u32, idx); + } + if let Some(section_idx) = symbol.section { + if section_idx >= symbols_by_section.len() { + symbols_by_section.resize_with(section_idx + 1, BTreeMap::new); + } + symbols_by_section[section_idx].nested_push(symbol.address as u32, idx); + } else { + debug_assert!( + symbol.address == 0 + || symbol.flags.is_common() + || obj_kind == ObjKind::Executable, + "ABS symbol in relocatable object" + ); + } + if !symbol.name.is_empty() { + symbols_by_name.nested_push(symbol.name.clone(), idx); + } + } + Self { obj_kind, symbols, symbols_by_address, symbols_by_name, symbols_by_section } + } + + pub fn add(&mut self, in_symbol: ObjSymbol, replace: bool) -> Result { + let opt = if let Some(section_index) = in_symbol.section { + self.at_section_address(section_index, in_symbol.address as u32).find(|(_, symbol)| { + symbol.kind == in_symbol.kind || + // Replace auto symbols with real symbols + (symbol.kind == ObjSymbolKind::Unknown && is_auto_symbol(&symbol.name)) + }) + } else { + // TODO hmmm + self.iter_abs().find(|(_, symbol)| symbol.name == in_symbol.name) + }; + let target_symbol_idx = if let Some((symbol_idx, existing)) = opt { + let size = + if existing.size_known && in_symbol.size_known && existing.size != in_symbol.size { + log::warn!( + "Conflicting size for {}: was {:#X}, now {:#X}", + existing.name, + existing.size, + in_symbol.size + ); + if replace { + in_symbol.size + } else { + existing.size + } + } else if in_symbol.size_known { + in_symbol.size + } else { + existing.size + }; + if !replace { + // Not replacing existing symbol, but update size + if in_symbol.size_known && !existing.size_known { + self.replace(symbol_idx, ObjSymbol { + size: in_symbol.size, + size_known: true, + ..existing.clone() + })?; + } + return Ok(symbol_idx); + } + let new_symbol = ObjSymbol { + name: in_symbol.name, + demangled_name: in_symbol.demangled_name, + address: in_symbol.address, + section: in_symbol.section, + size, + size_known: existing.size_known || in_symbol.size != 0, + flags: in_symbol.flags, + kind: in_symbol.kind, + align: in_symbol.align.or(existing.align), + data_kind: match in_symbol.data_kind { + ObjDataKind::Unknown => existing.data_kind, + kind => kind, + }, + }; + if existing != &new_symbol { + log::debug!("Replacing {:?} with {:?}", existing, new_symbol); + self.replace(symbol_idx, new_symbol)?; + } + symbol_idx + } else { + let target_symbol_idx = self.symbols.len(); + self.add_direct(ObjSymbol { + name: in_symbol.name, + demangled_name: in_symbol.demangled_name, + address: in_symbol.address, + section: in_symbol.section, + size: in_symbol.size, + size_known: in_symbol.size != 0, + flags: in_symbol.flags, + kind: in_symbol.kind, + align: in_symbol.align, + data_kind: in_symbol.data_kind, + })?; + target_symbol_idx + }; + Ok(target_symbol_idx) + } + + pub fn add_direct(&mut self, in_symbol: ObjSymbol) -> Result { + let symbol_idx = self.symbols.len(); + if self.obj_kind == ObjKind::Executable { + self.symbols_by_address.nested_push(in_symbol.address as u32, symbol_idx); + } + if let Some(section_idx) = in_symbol.section { + if section_idx >= self.symbols_by_section.len() { + self.symbols_by_section.resize_with(section_idx + 1, BTreeMap::new); + } + self.symbols_by_section[section_idx].nested_push(in_symbol.address as u32, symbol_idx); + } else { + ensure!( + in_symbol.address == 0 + || in_symbol.flags.is_common() + || self.obj_kind == ObjKind::Executable, + "ABS symbol in relocatable object" + ); + } + if !in_symbol.name.is_empty() { + self.symbols_by_name.nested_push(in_symbol.name.clone(), symbol_idx); + } + self.symbols.push(in_symbol); + Ok(symbol_idx) + } + + pub fn iter(&self) -> impl DoubleEndedIterator { self.symbols.iter() } + + pub fn count(&self) -> usize { self.symbols.len() } + + pub fn at_section_address( + &self, + section_idx: usize, + addr: u32, + ) -> impl DoubleEndedIterator { + self.symbols_by_section + .get(section_idx) + .and_then(|v| v.get(&addr)) + .into_iter() + .flatten() + .map(move |&idx| (idx, &self.symbols[idx])) + } + + pub fn kind_at_section_address( + &self, + section_idx: usize, + addr: u32, + kind: ObjSymbolKind, + ) -> Result> { + self.at_section_address(section_idx, addr) + .filter(|(_, sym)| sym.kind == kind) + .at_most_one() + .map_err(|_| anyhow!("Multiple symbols of kind {:?} at address {:#010X}", kind, addr)) + } + + // Iterate over all in address ascending order, excluding ABS symbols + pub fn iter_ordered(&self) -> impl DoubleEndedIterator { + self.symbols_by_section + .iter() + .flat_map(|v| v.iter().map(|(_, v)| v)) + .flat_map(move |v| v.iter().map(move |u| (*u, &self.symbols[*u]))) + } + + // Iterate over all ABS symbols + pub fn iter_abs(&self) -> impl DoubleEndedIterator { + debug_assert!(self.obj_kind == ObjKind::Executable); + self.symbols_by_address + .iter() + .flat_map(|(_, v)| v.iter().map(|&u| (u, &self.symbols[u]))) + .filter(|(_, s)| s.section.is_none()) + } + + // Iterate over range in address ascending order, excluding ABS symbols + pub fn for_section_range( + &self, + section_index: usize, + range: R, + ) -> impl DoubleEndedIterator + where + R: RangeBounds + Clone, + { + self.symbols_by_section + .get(section_index) + .into_iter() + .flat_map(move |v| v.range(range.clone())) + .flat_map(move |(_, v)| v.iter().map(move |u| (*u, &self.symbols[*u]))) + } + + pub fn indexes_for_range( + &self, + range: R, + ) -> impl DoubleEndedIterator + where + R: RangeBounds, + { + debug_assert!(self.obj_kind == ObjKind::Executable); + self.symbols_by_address.range(range).map(|(k, v)| (*k, v.as_ref())) + } + + pub fn for_section( + &self, + section_idx: usize, + ) -> impl DoubleEndedIterator { + self.symbols_by_section + .get(section_idx) + .into_iter() + .flat_map(|v| v.iter().map(|(_, v)| v)) + .flat_map(move |v| v.iter().map(move |u| (*u, &self.symbols[*u]))) + } + + pub fn for_name( + &self, + name: &str, + ) -> impl DoubleEndedIterator { + self.symbols_by_name + .get(name) + .into_iter() + .flat_map(move |v| v.iter().map(move |u| (*u, &self.symbols[*u]))) + } + + pub fn by_name(&self, name: &str) -> Result> { + let mut iter = self.for_name(name); + let result = iter.next(); + if let Some((index, symbol)) = result { + if let Some((other_index, other_symbol)) = iter.next() { + bail!( + "Multiple symbols with name {}: {} {:?} {:#010X} and {} {:?} {:#010X}", + name, + index, + symbol.kind, + symbol.address, + other_index, + other_symbol.kind, + other_symbol.address + ); + } + } + Ok(result) + } + + pub fn by_kind( + &self, + kind: ObjSymbolKind, + ) -> impl DoubleEndedIterator { + self.symbols.iter().enumerate().filter(move |(_, sym)| sym.kind == kind) + } + + pub fn replace(&mut self, index: SymbolIndex, symbol: ObjSymbol) -> Result<()> { + let symbol_ref = &mut self.symbols[index]; + ensure!(symbol_ref.address == symbol.address, "Can't modify address with replace_symbol"); + ensure!(symbol_ref.section == symbol.section, "Can't modify section with replace_symbol"); + if symbol_ref.name != symbol.name { + if !symbol_ref.name.is_empty() { + self.symbols_by_name.nested_remove(&symbol_ref.name, &index); + } + if !symbol.name.is_empty() { + self.symbols_by_name.nested_push(symbol.name.clone(), index); + } + } + *symbol_ref = symbol; + Ok(()) + } + + // Try to find a previous sized symbol that encompasses the target + pub fn for_relocation( + &self, + target_addr: u32, + reloc_kind: ObjRelocKind, + ) -> Result> { + ensure!(self.obj_kind == ObjKind::Executable); + let mut result = None; + for (_addr, symbol_idxs) in self.indexes_for_range(..=target_addr).rev() { + let mut symbols = symbol_idxs + .iter() + .map(|&idx| (idx, &self.symbols[idx])) + .filter(|(_, sym)| sym.referenced_by(reloc_kind)) + .collect_vec(); + let (symbol_idx, symbol) = if symbols.len() == 1 { + symbols.pop().unwrap() + } else { + symbols.sort_by_key(|&(_, symbol)| { + let mut rank = match symbol.kind { + ObjSymbolKind::Function | ObjSymbolKind::Object => match reloc_kind { + ObjRelocKind::PpcAddr16Hi + | ObjRelocKind::PpcAddr16Ha + | ObjRelocKind::PpcAddr16Lo => 1, + ObjRelocKind::Absolute + | ObjRelocKind::PpcRel24 + | ObjRelocKind::PpcRel14 + | ObjRelocKind::PpcEmbSda21 => 2, + }, + // Label + ObjSymbolKind::Unknown => match reloc_kind { + ObjRelocKind::PpcAddr16Hi + | ObjRelocKind::PpcAddr16Ha + | ObjRelocKind::PpcAddr16Lo + if !symbol.name.starts_with("..") => + { + 3 + } + _ => 1, + }, + ObjSymbolKind::Section => -1, + }; + if symbol.size > 0 { + rank += 1; + } + -rank + }); + match symbols.first() { + Some(&v) => v, + None => continue, + } + }; + if symbol.address == target_addr as u64 { + result = Some((symbol_idx, symbol)); + break; + } + if symbol.size > 0 { + if symbol.address + symbol.size > target_addr as u64 { + result = Some((symbol_idx, symbol)); + } + break; + } + } + Ok(result) + } + + #[inline] + pub fn flags(&mut self, idx: SymbolIndex) -> &mut ObjSymbolFlagSet { + &mut self.symbols[idx].flags + } +} + +impl Index for ObjSymbols { + type Output = ObjSymbol; + + fn index(&self, index: usize) -> &Self::Output { &self.symbols[index] } +} + +impl ObjSymbol { + /// Whether this symbol can be referenced by the given relocation kind. + pub fn referenced_by(&self, reloc_kind: ObjRelocKind) -> bool { + if is_linker_generated_label(&self.name) { + // Linker generated labels will only be referenced by @ha/@h/@l relocations + return matches!( + reloc_kind, + ObjRelocKind::PpcAddr16Ha | ObjRelocKind::PpcAddr16Hi | ObjRelocKind::PpcAddr16Lo + ); + } + + match self.kind { + ObjSymbolKind::Unknown => true, + ObjSymbolKind::Function => !matches!(reloc_kind, ObjRelocKind::PpcEmbSda21), + ObjSymbolKind::Object => { + !matches!(reloc_kind, ObjRelocKind::PpcRel14 | ObjRelocKind::PpcRel24) + } + ObjSymbolKind::Section => { + matches!( + reloc_kind, + ObjRelocKind::PpcAddr16Ha + | ObjRelocKind::PpcAddr16Hi + | ObjRelocKind::PpcAddr16Lo + ) + } + } + } +} diff --git a/src/util/asm.rs b/src/util/asm.rs index 05e65a5..1fdb54c 100644 --- a/src/util/asm.rs +++ b/src/util/asm.rs @@ -46,10 +46,10 @@ pub fn write_asm(w: &mut W, obj: &ObjInfo) -> Result<()> { let mut symbols: Vec = obj.symbols.iter().cloned().collect(); let mut section_entries: Vec>> = vec![]; let mut section_relocations: Vec> = vec![]; - for (section_idx, section) in obj.sections.iter().enumerate() { + for (section_idx, section) in obj.sections.iter() { // Build symbol start/end entries let mut entries = BTreeMap::>::new(); - for (symbol_index, symbol) in obj.symbols.for_section(section) { + for (symbol_index, symbol) in obj.symbols.for_section(section_idx) { entries.nested_push(symbol.address as u32, SymbolEntry { index: symbol_index, kind: SymbolEntryKind::Start, @@ -110,6 +110,7 @@ pub fn write_asm(w: &mut W, obj: &ObjInfo) -> Result<()> { address: ins.addr as u64, target_symbol: symbol_idx, addend: 0, + module: None, }); } } @@ -121,11 +122,11 @@ pub fn write_asm(w: &mut W, obj: &ObjInfo) -> Result<()> { } // Generate labels for jump tables & relative data relocations - for section in &obj.sections { - if !matches!(section.kind, ObjSectionKind::Data | ObjSectionKind::ReadOnlyData) { - continue; - } - + for (_section_index, section) in obj + .sections + .iter() + .filter(|(_, s)| matches!(s.kind, ObjSectionKind::Data | ObjSectionKind::ReadOnlyData)) + { for reloc in §ion.relocations { if reloc.addend == 0 { continue; @@ -135,7 +136,9 @@ pub fn write_asm(w: &mut W, obj: &ObjInfo) -> Result<()> { Some(v) => v, None => continue, }; - let target_section = &obj.sections[target_section_idx]; + let target_section = obj.sections.get(target_section_idx).ok_or_else(|| { + anyhow!("Invalid relocation target section: {:#010X} {:?}", reloc.address, target) + })?; let address = (target.address as i64 + reloc.addend) as u64; let vec = match section_entries[target_section_idx].entry(address as u32) { btree_map::Entry::Occupied(e) => e.into_mut(), @@ -177,14 +180,14 @@ pub fn write_asm(w: &mut W, obj: &ObjInfo) -> Result<()> { } } - for section in &obj.sections { - let entries = §ion_entries[section.index]; - let relocations = §ion_relocations[section.index]; + for (section_index, section) in obj.sections.iter() { + let entries = §ion_entries[section_index]; + let relocations = §ion_relocations[section_index]; let mut current_address = section.address as u32; let section_end = (section.address + section.size) as u32; let subsection = - obj.sections.iter().take(section.index).filter(|s| s.name == section.name).count(); + obj.sections.iter().take(section_index).filter(|(_, s)| s.name == section.name).count(); loop { if current_address >= section_end { diff --git a/src/util/config.rs b/src/util/config.rs index ee7cb8b..99aea62 100644 --- a/src/util/config.rs +++ b/src/util/config.rs @@ -12,13 +12,10 @@ use regex::{Captures, Regex}; use crate::{ obj::{ - ObjDataKind, ObjInfo, ObjSplit, ObjSymbol, ObjSymbolFlagSet, ObjSymbolFlags, ObjSymbolKind, - ObjUnit, - }, - util::{ - file::{buf_writer, map_file, map_reader}, - nested::NestedVec, + ObjDataKind, ObjInfo, ObjKind, ObjSplit, ObjSymbol, ObjSymbolFlagSet, ObjSymbolFlags, + ObjSymbolKind, ObjUnit, }, + util::file::{buf_writer, map_file, map_reader}, }; fn parse_hex(s: &str) -> Result { @@ -56,15 +53,18 @@ pub fn parse_symbol_line(line: &str, obj: &mut ObjInfo) -> Result Result bool { +pub fn is_skip_symbol(symbol: &ObjSymbol) -> bool { let _ = symbol; // symbol.name.starts_with("lbl_") // || symbol.name.starts_with("func_") @@ -142,6 +142,8 @@ fn is_skip_symbol(symbol: &ObjSymbol) -> bool { false } +pub fn is_auto_symbol(name: &str) -> bool { name.starts_with("lbl_") || name.starts_with("fn_") } + #[inline] pub fn write_symbols_file>(path: P, obj: &ObjInfo) -> Result<()> { let mut w = buf_writer(path)?; @@ -152,11 +154,7 @@ pub fn write_symbols_file>(path: P, obj: &ObjInfo) -> Result<()> pub fn write_symbols(w: &mut W, obj: &ObjInfo) -> Result<()> { for (_, symbol) in obj.symbols.iter_ordered() { - if symbol.kind == ObjSymbolKind::Section - // Ignore absolute symbols for now (usually linker-generated) - || symbol.section.is_none() - || is_skip_symbol(symbol) - { + if symbol.kind == ObjSymbolKind::Section || is_skip_symbol(symbol) { continue; } write_symbol(w, obj, symbol)?; @@ -311,17 +309,16 @@ pub fn write_splits(w: &mut W, obj: &ObjInfo) -> Result<()> { write!(w, " comment:{}", comment_version)?; } writeln!(w)?; - let mut split_iter = obj.splits_for_range(..).peekable(); - while let Some((addr, split)) = split_iter.next() { + let mut split_iter = obj.sections.all_splits().peekable(); + while let Some((_section_index, section, addr, split)) = split_iter.next() { if split.unit != unit.name { continue; } let end = if split.end > 0 { split.end } else { - split_iter.peek().map(|&(addr, _)| addr).unwrap_or(0) + split_iter.peek().map(|&(_, _, addr, _)| addr).unwrap_or(0) }; - let section = obj.section_at(addr)?; write!(w, "\t{:<11} start:{:#010X} end:{:#010X}", section.name, addr, end)?; // if let Some(align) = split.align { // write!(w, " align:{}", align)?; @@ -347,6 +344,7 @@ struct SplitSection { align: Option, /// Whether this is a part of common BSS. common: bool, + rename: Option, } struct SplitUnit { @@ -403,6 +401,7 @@ fn parse_section_line(captures: Captures) -> Result { end: 0, align: None, common: false, + rename: None, }; for attr in captures["attrs"].split(' ').filter(|&s| !s.is_empty()) { @@ -411,7 +410,7 @@ fn parse_section_line(captures: Captures) -> Result { "start" => section.start = parse_hex(value)?, "end" => section.end = parse_hex(value)?, "align" => section.align = Some(u32::from_str(value)?), - "rename" => section.name = value.to_string(), + "rename" => section.rename = Some(value.to_string()), _ => bail!("Unknown split attribute '{attr}'"), } } else { @@ -462,16 +461,38 @@ pub fn apply_splits(r: R, obj: &mut ObjInfo) -> Result<()> { } ( SplitState::Unit(unit), - SplitLine::Section(SplitSection { name, start, end, align, common }), + SplitLine::Section(SplitSection { name, start, end, align, common, rename }), ) => { - obj.splits.nested_push(start, ObjSplit { + let (section_index, _) = match obj.sections.by_name(&name)? { + Some(v) => Ok(v), + None => { + if obj.kind == ObjKind::Executable { + obj.sections.with_range(start..end) + } else { + Err(anyhow!("Section {} not found", name)) + } + } + }?; + let section = obj.sections.get_mut(section_index).unwrap(); + ensure!( + section.contains_range(start..end), + "Section {} ({:#010X}..{:#010X}) does not contain range {:#010X}..{:#010X}", + name, + section.address, + section.address + section.size, + start, + end + ); + section.splits.push(start, ObjSplit { unit: unit.clone(), end, align, common, autogenerated: false, }); - obj.named_sections.insert(start, name); + if let Some(name) = rename { + obj.named_sections.insert(start, name); + } } _ => {} } diff --git a/src/util/dol.rs b/src/util/dol.rs index a1d66f3..e3e9322 100644 --- a/src/util/dol.rs +++ b/src/util/dol.rs @@ -33,8 +33,6 @@ pub fn process_dol>(path: P) -> Result { let mmap = map_file(path)?; Dol::read_from(map_reader(&mmap))? }; - let mut obj = ObjInfo::new(ObjKind::Executable, ObjArchitecture::PowerPc, name, vec![], vec![]); - obj.entry = dol.header.entry_point as u64; // Locate _rom_copy_info let first_rom_section = dol @@ -227,6 +225,7 @@ pub fn process_dol>(path: P) -> Result { } // Add text and data sections + let mut sections = vec![]; for dol_section in dol.header.sections.iter().filter(|section| section.kind != DolSectionType::Bss) { @@ -263,19 +262,19 @@ pub fn process_dol>(path: P) -> Result { } }; - obj.sections.push(ObjSection { + sections.push(ObjSection { name, kind, address: dol_section.target as u64, size: size as u64, data: dol.virtual_data_at(dol_section.target, size)?.to_vec(), align: 0, - index: 0, elf_index: 0, relocations: vec![], original_address: 0, file_offset: dol_section.offset as u64, section_known: known, + splits: Default::default(), }); } @@ -292,28 +291,28 @@ pub fn process_dol>(path: P) -> Result { bss_section.target + bss_section.size ); - obj.sections.push(ObjSection { + sections.push(ObjSection { name: format!(".bss{}", idx), kind: ObjSectionKind::Bss, address: addr as u64, size: size as u64, data: vec![], align: 0, - index: 0, elf_index: 0, relocations: vec![], original_address: 0, file_offset: 0, section_known: false, + splits: Default::default(), }); } // Sort sections by address ascending - obj.sections.sort_by_key(|s| s.address); + sections.sort_by_key(|s| s.address); // Apply section indices let mut init_section_index = None; - for (idx, section) in obj.sections.iter_mut().enumerate() { + for (idx, section) in sections.iter_mut().enumerate() { match section.name.as_str() { ".init" => { init_section_index = Some(idx); @@ -326,12 +325,16 @@ pub fn process_dol>(path: P) -> Result { } _ => {} } - section.index = idx; // Assume the original ELF section index is +1 // ELF files start with a NULL section section.elf_index = idx + 1; } + // Create object + let mut obj = + ObjInfo::new(ObjKind::Executable, ObjArchitecture::PowerPc, name, vec![], sections); + obj.entry = dol.header.entry_point as u64; + // Generate _rom_copy_info symbol if let (Some(rom_copy_info_addr), Some(rom_copy_info_end)) = (rom_copy_info_addr, rom_copy_info_end) @@ -394,13 +397,10 @@ pub fn process_dol>(path: P) -> Result { } // Generate symbols for extab & extabindex entries - if let (Some(extabindex_section_idx), Some(extab_section_idx)) = + if let (Some(extabindex_section_index), Some(extab_section_index)) = (extabindex_section, extab_section) { - let extabindex_section = &obj.sections[extabindex_section_idx]; - let extabindex_section_index = extabindex_section.index; - let extab_section = &obj.sections[extab_section_idx]; - let extab_section_index = extab_section.index; + let extab_section = &obj.sections[extab_section_index]; let extab_section_address = extab_section.address; let extab_section_size = extab_section.size; diff --git a/src/util/elf.rs b/src/util/elf.rs index 44902ef..f9032e8 100644 --- a/src/util/elf.rs +++ b/src/util/elf.rs @@ -1,5 +1,5 @@ use std::{ - collections::{hash_map, BTreeMap, HashMap}, + collections::{hash_map, HashMap}, io::Cursor, path::Path, }; @@ -27,7 +27,6 @@ use crate::{ util::{ comment::{read_comment_sym, write_comment_sym, CommentSym, MWComment}, file::map_file, - nested::NestedVec, }, }; @@ -90,12 +89,12 @@ pub fn process_elf>(path: P) -> Result { size: section.size(), data: section.uncompressed_data()?.to_vec(), align: section.align(), - index: sections.len(), elf_index: section.index().0, relocations: vec![], original_address: 0, // TODO load from abs symbol file_offset: section.file_range().map(|(v, _)| v).unwrap_or_default(), section_known: true, + splits: Default::default(), }); } @@ -273,7 +272,6 @@ pub fn process_elf>(path: P) -> Result { } let mut link_order = Vec::::new(); - let mut splits = BTreeMap::>::new(); if kind == ObjKind::Executable { // Link order is trivially deduced for file_name in section_starts.keys() { @@ -285,9 +283,17 @@ pub fn process_elf>(path: P) -> Result { } // Create a map of address -> file splits - for (file_name, sections) in section_starts { - for (address, _) in sections { - splits.nested_push(address as u32, ObjSplit { + for (file_name, section_addrs) in section_starts { + for (address, _) in section_addrs { + let section = + sections.iter_mut().find(|s| s.contains(address as u32)).ok_or_else(|| { + anyhow!( + "Failed to find section containing address {:#010X} in file {}", + address, + file_name + ) + })?; + section.splits.push(address as u32, ObjSplit { unit: file_name.clone(), end: 0, // TODO align: None, @@ -326,7 +332,6 @@ pub fn process_elf>(path: P) -> Result { obj.db_stack_addr = db_stack_addr; obj.arena_lo = arena_lo; obj.arena_hi = arena_hi; - obj.splits = splits; obj.link_order = link_order; Ok(obj) } @@ -350,8 +355,8 @@ pub fn write_elf(obj: &ObjInfo) -> Result> { } writer.reserve_null_section_index(); - let mut out_sections: Vec = Vec::with_capacity(obj.sections.len()); - for section in &obj.sections { + let mut out_sections: Vec = Vec::with_capacity(obj.sections.count()); + for (_, section) in obj.sections.iter() { let name = writer.add_section_name(section.name.as_bytes()); let index = writer.reserve_section_index(); out_sections.push(OutSection { @@ -364,8 +369,8 @@ pub fn write_elf(obj: &ObjInfo) -> Result> { }); } - let mut rela_names: Vec = vec![Default::default(); obj.sections.len()]; - for ((section, out_section), rela_name) in + let mut rela_names: Vec = vec![Default::default(); obj.sections.count()]; + for (((_, section), out_section), rela_name) in obj.sections.iter().zip(&mut out_sections).zip(&mut rela_names) { if section.relocations.is_empty() { @@ -449,12 +454,12 @@ pub fn write_elf(obj: &ObjInfo) -> Result> { // Add section symbols for relocatable objects if obj.kind == ObjKind::Relocatable { - for section in &obj.sections { - let section_index = out_sections.get(section.index).map(|s| s.index); - let index = writer.reserve_symbol_index(section_index); + for (section_index, section) in obj.sections.iter() { + let out_section_index = out_sections.get(section_index).map(|s| s.index); + let index = writer.reserve_symbol_index(out_section_index); let sym = object::write::elf::Sym { name: None, - section: section_index, + section: out_section_index, st_info: (elf::STB_LOCAL << 4) + elf::STT_SECTION, st_other: elf::STV_DEFAULT, st_shndx: 0, @@ -535,10 +540,10 @@ pub fn write_elf(obj: &ObjInfo) -> Result> { writer.reserve_file_header(); if obj.kind == ObjKind::Executable { - writer.reserve_program_headers(obj.sections.len() as u32); + writer.reserve_program_headers(obj.sections.count() as u32); } - for (section, out_section) in obj.sections.iter().zip(&mut out_sections) { + for ((_, section), out_section) in obj.sections.iter().zip(&mut out_sections) { if section.kind == ObjSectionKind::Bss { continue; } @@ -553,7 +558,7 @@ pub fn write_elf(obj: &ObjInfo) -> Result> { } } - for (section, out_section) in obj.sections.iter().zip(&mut out_sections) { + for ((_, section), out_section) in obj.sections.iter().zip(&mut out_sections) { if section.relocations.is_empty() { continue; } @@ -586,7 +591,7 @@ pub fn write_elf(obj: &ObjInfo) -> Result> { if obj.kind == ObjKind::Executable { writer.write_align_program_headers(); - for (section, out_section) in obj.sections.iter().zip(&out_sections) { + for ((_, section), out_section) in obj.sections.iter().zip(&out_sections) { writer.write_program_header(&ProgramHeader { p_type: elf::PT_LOAD, p_flags: match section.kind { @@ -607,7 +612,7 @@ pub fn write_elf(obj: &ObjInfo) -> Result> { } } - for (section, out_section) in obj.sections.iter().zip(&out_sections) { + for ((_, section), out_section) in obj.sections.iter().zip(&out_sections) { if section.kind == ObjSectionKind::Bss { continue; } @@ -616,7 +621,7 @@ pub fn write_elf(obj: &ObjInfo) -> Result> { writer.write(§ion.data); } - for (section, out_section) in obj.sections.iter().zip(&out_sections) { + for ((_, section), out_section) in obj.sections.iter().zip(&out_sections) { if section.relocations.is_empty() { continue; } @@ -680,7 +685,7 @@ pub fn write_elf(obj: &ObjInfo) -> Result> { } writer.write_null_section_header(); - for (section, out_section) in obj.sections.iter().zip(&out_sections) { + for ((_, section), out_section) in obj.sections.iter().zip(&out_sections) { writer.write_section_header(&SectionHeader { name: Some(out_section.name), sh_type: match section.kind { @@ -703,7 +708,7 @@ pub fn write_elf(obj: &ObjInfo) -> Result> { sh_entsize: 0, // TODO? }); } - for (section, out_section) in obj.sections.iter().zip(&out_sections) { + for ((_, section), out_section) in obj.sections.iter().zip(&out_sections) { let Some(rela_name) = out_section.rela_name else { continue; }; @@ -853,6 +858,6 @@ fn to_obj_reloc( _ => Err(anyhow!("Unhandled relocation symbol type {:?}", symbol.kind())), }?; let address = address & !3; // TODO hack: round down for instruction - let reloc_data = ObjReloc { kind: reloc_kind, address, target_symbol, addend }; + let reloc_data = ObjReloc { kind: reloc_kind, address, target_symbol, addend, module: None }; Ok(Some(reloc_data)) } diff --git a/src/util/file.rs b/src/util/file.rs index c0f0fbc..a3819ed 100644 --- a/src/util/file.rs +++ b/src/util/file.rs @@ -1,5 +1,5 @@ use std::{ - fs::{File, OpenOptions}, + fs::{DirBuilder, File, OpenOptions}, io::{BufRead, BufReader, BufWriter, Cursor, Read}, path::{Path, PathBuf}, }; @@ -35,6 +35,9 @@ pub fn buf_reader>(path: P) -> Result> { /// Creates a buffered writer around a file (not memory mapped). pub fn buf_writer>(path: P) -> Result> { + if let Some(parent) = path.as_ref().parent() { + DirBuilder::new().recursive(true).create(parent)?; + } let file = File::create(&path) .with_context(|| format!("Failed to create file '{}'", path.as_ref().display()))?; Ok(BufWriter::new(file)) diff --git a/src/util/lcf.rs b/src/util/lcf.rs index 93b9ebe..e037124 100644 --- a/src/util/lcf.rs +++ b/src/util/lcf.rs @@ -9,16 +9,16 @@ use crate::obj::ObjInfo; const fn align_up(value: u32, align: u32) -> u32 { (value + (align - 1)) & !(align - 1) } pub fn generate_ldscript(obj: &ObjInfo, auto_force_files: bool) -> Result { - let origin = obj.sections.iter().map(|s| s.address).min().unwrap(); + let origin = obj.sections.iter().map(|(_, s)| s.address).min().unwrap(); let stack_size = match (obj.stack_address, obj.stack_end) { (Some(stack_address), Some(stack_end)) => stack_address - stack_end, _ => 65535, // default }; // Guess section alignment - let mut alignments = Vec::with_capacity(obj.sections.len()); + let mut alignments = Vec::with_capacity(obj.sections.count()); let mut last_section_end = origin as u32; - for section in &obj.sections { + for (_, section) in obj.sections.iter() { let section_start = section.address as u32; let mut align = 0x20; while align_up(last_section_end, align) < section_start { @@ -40,7 +40,7 @@ pub fn generate_ldscript(obj: &ObjInfo, auto_force_files: bool) -> Result Result>(path: P, obj: &mut ObjInfo) -> Result<()> } pub fn apply_map(result: &MapInfo, obj: &mut ObjInfo) -> Result<()> { - for section in &mut obj.sections { + for (_section_index, section) in obj.sections.iter_mut() { if let Some(info) = result.sections.get(&(section.address as u32)) { let kind = section_kind_for_section(&info.name)?; if section.section_known { @@ -702,11 +699,9 @@ pub fn apply_map(result: &MapInfo, obj: &mut ObjInfo) -> Result<()> { } // Add section symbols for (section_name, symbol_map) in &result.section_symbols { - let section_index = obj + let (section_index, _) = obj .sections - .iter() - .find(|s| &s.name == section_name) - .map(|s| s.index) + .by_name(section_name)? .ok_or_else(|| anyhow!("Failed to locate section {section_name} from map"))?; for symbol_entry in symbol_map.values().flatten() { add_symbol(obj, symbol_entry, Some(section_index))?; @@ -721,19 +716,19 @@ pub fn apply_map(result: &MapInfo, obj: &mut ObjInfo) -> Result<()> { for (section, unit_order) in &result.section_units { let mut units = Vec::new(); let mut existing = HashSet::new(); - for (addr, unit) in unit_order { + for (_addr, unit) in unit_order { let unit = unit.clone(); if !existing.contains(&unit) { units.push(unit.clone()); existing.insert(unit.clone()); } - obj.splits.nested_push(*addr, ObjSplit { - unit, - end: 0, // TODO? - align: None, - common: false, // TODO? - autogenerated: false, - }); + // obj.splits.nested_push(*addr, ObjSplit { + // unit, + // end: 0, // TODO? + // align: None, + // common: false, // TODO? + // autogenerated: false, + // }); } section_order.push((section.clone(), units)); } diff --git a/src/util/mod.rs b/src/util/mod.rs index 1d40707..5dd1897 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -12,6 +12,8 @@ pub mod nested; pub mod rarc; pub mod rel; pub mod rso; +pub mod signatures; +pub mod split; pub mod yaz0; /// Creates a fixed-size array reference from a slice. diff --git a/src/util/rel.rs b/src/util/rel.rs index 14faab6..e279463 100644 --- a/src/util/rel.rs +++ b/src/util/rel.rs @@ -79,7 +79,6 @@ pub fn process_rel(mut reader: Reader) -> Result { // println!("Section {} offset {:#X} size {:#X}", idx, offset, size); - let index = sections.len(); sections.push(ObjSection { name: format!(".section{}", idx), kind: if offset == 0 { @@ -97,12 +96,12 @@ pub fn process_rel(mut reader: Reader) -> Result { _ => align, } .unwrap_or_default() as u64, - index, elf_index: idx as usize, relocations: vec![], original_address: 0, file_offset: offset as u64, section_known: false, + splits: Default::default(), }); if offset == 0 { total_bss_size += size; @@ -116,18 +115,19 @@ pub fn process_rel(mut reader: Reader) -> Result { ); let mut symbols = Vec::new(); - let mut add_symbol = |section_idx: u8, offset: u32, name: &str| -> Result<()> { - if section_idx > 0 { - let section = sections + let mut add_symbol = |rel_section_idx: u8, offset: u32, name: &str| -> Result<()> { + if rel_section_idx > 0 { + let (section_index, _) = sections .iter() - .find(|section| section.elf_index == section_idx as usize) - .ok_or_else(|| anyhow!("Failed to locate {name} section {section_idx}"))?; - log::debug!("Adding {name} section {section_idx} offset {offset:#X}"); + .enumerate() + .find(|&(_, section)| section.elf_index == rel_section_idx as usize) + .ok_or_else(|| anyhow!("Failed to locate {name} section {rel_section_idx}"))?; + log::debug!("Adding {name} section {rel_section_idx} offset {offset:#X}"); symbols.push(ObjSymbol { name: name.to_string(), demangled_name: None, address: offset as u64, - section: Some(section.index), + section: Some(section_index), size: 0, size_known: false, flags: ObjSymbolFlagSet(ObjSymbolFlags::Global.into()), diff --git a/src/util/rso.rs b/src/util/rso.rs index 44101a3..bdbba61 100644 --- a/src/util/rso.rs +++ b/src/util/rso.rs @@ -89,7 +89,6 @@ pub fn process_rso>(path: P) -> Result { // println!("Section {} offset {:#X} size {:#X}", idx, offset, size); - let index = sections.len(); sections.push(ObjSection { name: format!(".section{}", idx), kind: if offset == 0 { @@ -103,12 +102,12 @@ pub fn process_rso>(path: P) -> Result { size: size as u64, data, align: 0, - index, elf_index: idx as usize, relocations: vec![], original_address: 0, file_offset: offset as u64, section_known: false, + splits: Default::default(), }); if offset == 0 { total_bss_size += size; @@ -122,18 +121,19 @@ pub fn process_rso>(path: P) -> Result { ); let mut symbols = Vec::new(); - let mut add_symbol = |section_idx: u8, offset: u32, name: &str| -> Result<()> { - if section_idx > 0 { - let section = sections + let mut add_symbol = |rel_section_idx: u8, offset: u32, name: &str| -> Result<()> { + if rel_section_idx > 0 { + let (section_index, _) = sections .iter() - .find(|section| section.elf_index == section_idx as usize) - .ok_or_else(|| anyhow!("Failed to locate {name} section {section_idx}"))?; - log::debug!("Adding {name} section {section_idx} offset {offset:#X}"); + .enumerate() + .find(|&(_, section)| section.elf_index == rel_section_idx as usize) + .ok_or_else(|| anyhow!("Failed to locate {name} section {rel_section_idx}"))?; + log::debug!("Adding {name} section {rel_section_idx} offset {offset:#X}"); symbols.push(ObjSymbol { name: name.to_string(), demangled_name: None, address: offset as u64, - section: Some(section.index), + section: Some(section_index), size: 0, size_known: false, flags: ObjSymbolFlagSet(ObjSymbolFlags::Global.into()), @@ -182,8 +182,9 @@ pub fn process_rso>(path: P) -> Result { let demangled_name = demangle(&name, &DemangleOptions::default()); let section = sections .iter() - .find(|section| section.elf_index == section_idx as usize) - .map(|section| section.index) + .enumerate() + .find(|&(_, section)| section.elf_index == section_idx as usize) + .map(|(idx, _)| idx) // HACK: selfiles won't have any sections .unwrap_or(section_idx as usize); log::debug!( diff --git a/src/obj/signatures.rs b/src/util/signatures.rs similarity index 93% rename from src/obj/signatures.rs rename to src/util/signatures.rs index 7c5b420..b0fd12e 100644 --- a/src/obj/signatures.rs +++ b/src/util/signatures.rs @@ -12,7 +12,9 @@ use sha1::{Digest, Sha1}; use crate::{ analysis::tracker::{Relocation, Tracker}, array_ref, - obj::{ObjInfo, ObjReloc, ObjRelocKind, ObjSymbol, ObjSymbolFlagSet, ObjSymbolKind}, + obj::{ + ObjInfo, ObjReloc, ObjRelocKind, ObjSection, ObjSymbol, ObjSymbolFlagSet, ObjSymbolKind, + }, util::elf::process_elf, }; @@ -69,19 +71,19 @@ pub fn parse_signatures(sig_str: &str) -> Result> { } pub fn check_signatures_str( - obj: &ObjInfo, + section: &ObjSection, addr: u32, sig_str: &str, ) -> Result> { - check_signatures(obj, addr, &parse_signatures(sig_str)?) + check_signatures(section, addr, &parse_signatures(sig_str)?) } pub fn check_signatures( - obj: &ObjInfo, + section: &ObjSection, addr: u32, signatures: &Vec, ) -> Result> { - let (_, data) = obj.section_data(addr, 0)?; + let data = section.data_range(addr, 0)?; let mut name = None; for signature in signatures { if name.is_none() { @@ -104,7 +106,7 @@ pub fn check_signatures( } pub fn apply_symbol(obj: &mut ObjInfo, target: u32, sig_symbol: &OutSymbol) -> Result { - let mut target_section_index = obj.section_at(target).ok().map(|section| section.index); + let mut target_section_index = obj.sections.at_address(target).ok().map(|(idx, _)| idx); if let Some(target_section_index) = target_section_index { let target_section = &mut obj.sections[target_section_index]; if !target_section.section_known { @@ -138,15 +140,19 @@ pub fn apply_symbol(obj: &mut ObjInfo, target: u32, sig_symbol: &OutSymbol) -> R Ok(target_symbol_idx) } -pub fn apply_signature(obj: &mut ObjInfo, addr: u32, signature: &FunctionSignature) -> Result<()> { - let section_index = obj.section_at(addr)?.index; +pub fn apply_signature( + obj: &mut ObjInfo, + section_index: usize, + addr: u32, + signature: &FunctionSignature, +) -> Result<()> { let in_symbol = &signature.symbols[signature.symbol]; let symbol_idx = apply_symbol(obj, addr, in_symbol)?; let mut tracker = Tracker::new(obj); for reloc in &signature.relocations { tracker.known_relocations.insert(addr + reloc.offset); } - tracker.process_function(obj, obj.symbols.at(symbol_idx))?; + tracker.process_function(obj, &obj.symbols[symbol_idx])?; for (&reloc_addr, reloc) in &tracker.relocations { if reloc_addr < addr || reloc_addr >= addr + in_symbol.size { continue; @@ -175,6 +181,7 @@ pub fn apply_signature(obj: &mut ObjInfo, addr: u32, signature: &FunctionSignatu address: reloc_addr as u64, target_symbol: target_symbol_idx, addend: sig_reloc.addend as i64, + module: None, }; // log::info!("Applying relocation {:#010X?}", obj_reloc); obj.sections[section_index].relocations.push(obj_reloc); @@ -296,7 +303,7 @@ pub fn generate_signature>( let reloc = §ion.relocations[reloc_idx]; let symbol_idx = match symbol_map.entry(reloc.target_symbol) { btree_map::Entry::Vacant(e) => { - let target = obj.symbols.at(reloc.target_symbol); + let target = &obj.symbols[reloc.target_symbol]; let symbol_idx = out_symbols.len(); e.insert(symbol_idx); out_symbols.push(OutSymbol { @@ -308,7 +315,10 @@ pub fn generate_signature>( target.size as u32 }, flags: target.flags, - section: target.section.map(|idx| obj.sections[idx].name.clone()), + section: target + .section + .and_then(|idx| obj.sections.get(idx)) + .map(|section| section.name.clone()), }); symbol_idx } diff --git a/src/obj/split.rs b/src/util/split.rs similarity index 77% rename from src/obj/split.rs rename to src/util/split.rs index 7f308ba..c3209ea 100644 --- a/src/obj/split.rs +++ b/src/util/split.rs @@ -8,6 +8,7 @@ use itertools::Itertools; use petgraph::{graph::NodeIndex, Graph}; use crate::{ + array_ref, obj::{ ObjArchitecture, ObjInfo, ObjKind, ObjReloc, ObjSection, ObjSectionKind, ObjSplit, ObjSymbol, ObjSymbolFlagSet, ObjSymbolFlags, ObjSymbolKind, ObjSymbolScope, ObjUnit, @@ -16,25 +17,35 @@ use crate::{ }; /// Create splits for function pointers in the given section. -fn split_ctors_dtors(obj: &mut ObjInfo, section_start: u32, section_end: u32) -> Result<()> { - let mut new_splits = BTreeMap::new(); - let mut current_address = section_start; +fn split_ctors_dtors( + obj: &mut ObjInfo, + ctors_section_index: usize, + start: u32, + end: u32, +) -> Result<()> { + let ctors_section = &obj.sections[ctors_section_index]; + let mut new_splits = BTreeMap::::new(); + let mut current_address = start; let mut referenced_symbols = vec![]; - while current_address < section_end { - let (section, chunk) = obj.section_data(current_address, current_address + 4)?; - let function_addr = u32::from_be_bytes(chunk[0..4].try_into().unwrap()); - log::debug!("Found {} entry: {:#010X}", section.name, function_addr); + while current_address < end { + let chunk = ctors_section.data_range(current_address, current_address + 4)?; + let function_addr = u32::from_be_bytes(*array_ref!(chunk, 0, 4)); + log::debug!("Found {} entry: {:#010X}", ctors_section.name, function_addr); - let Some((function_symbol_idx, function_symbol)) = - obj.symbols.kind_at_address(function_addr, ObjSymbolKind::Function)? + let (text_section_index, text_section) = obj.sections.at_address(function_addr)?; + let Some((function_symbol_idx, function_symbol)) = obj.symbols.kind_at_section_address( + text_section_index, + function_addr, + ObjSymbolKind::Function, + )? else { bail!("Failed to find function symbol @ {:#010X}", function_addr); }; referenced_symbols.push(function_symbol_idx); - let ctors_split = obj.split_for(current_address); - let function_split = obj.split_for(function_addr); + let ctors_split = ctors_section.splits.for_address(current_address); + let function_split = text_section.splits.for_address(function_addr); let mut expected_unit = None; if let Some((_, ctors_split)) = ctors_split { @@ -45,7 +56,7 @@ fn split_ctors_dtors(obj: &mut ObjInfo, section_start: u32, section_end: u32) -> ensure!( unit == &function_split.unit, "Mismatched splits for {} {:#010X} ({}) and function {:#010X} ({})", - section.name, + ctors_section.name, current_address, unit, function_addr, @@ -67,32 +78,42 @@ fn split_ctors_dtors(obj: &mut ObjInfo, section_start: u32, section_end: u32) -> log::debug!("Adding splits to unit {}", unit); if ctors_split.is_none() { - log::debug!("Adding split for {} entry @ {:#010X}", section.name, current_address); - new_splits.insert(current_address, ObjSplit { - unit: unit.clone(), - end: current_address + 4, - align: None, - common: false, - autogenerated: true, - }); + log::debug!( + "Adding split for {} entry @ {:#010X}", + ctors_section.name, + current_address + ); + new_splits.insert( + current_address, + (ctors_section_index, ObjSplit { + unit: unit.clone(), + end: current_address + 4, + align: None, + common: false, + autogenerated: true, + }), + ); } if function_split.is_none() { log::debug!("Adding split for function @ {:#010X}", function_addr); - new_splits.insert(function_addr, ObjSplit { - unit, - end: function_addr + function_symbol.size as u32, - align: None, - common: false, - autogenerated: true, - }); + new_splits.insert( + function_addr, + (text_section_index, ObjSplit { + unit, + end: function_addr + function_symbol.size as u32, + align: None, + common: false, + autogenerated: true, + }), + ); } } current_address += 4; } - for (addr, split) in new_splits { - obj.add_split(addr, split)?; + for (addr, (section_index, split)) in new_splits { + obj.add_split(section_index, addr, split)?; } // Hack to avoid deadstripping @@ -104,8 +125,9 @@ fn split_ctors_dtors(obj: &mut ObjInfo, section_start: u32, section_end: u32) -> } /// Create splits for extabindex + extab entries. -fn split_extabindex(obj: &mut ObjInfo, section_index: usize, section_start: u32) -> Result<()> { - let mut new_splits = BTreeMap::new(); +fn split_extabindex(obj: &mut ObjInfo, section_index: usize, start: u32) -> Result<()> { + let section = &obj.sections[section_index]; + let mut new_splits = BTreeMap::::new(); let (_, eti_init_info) = obj .symbols .by_name("_eti_init_info")? @@ -116,13 +138,17 @@ fn split_extabindex(obj: &mut ObjInfo, section_index: usize, section_start: u32) eti_init_info.section, section_index ); - let mut current_address = section_start; + + let (extab_section_index, extab_section) = + obj.sections.by_name("extab")?.ok_or_else(|| anyhow!("Failed to find extab section"))?; + + let mut current_address = start; let section_end = eti_init_info.address as u32; while current_address < section_end { - let (_eti_section, chunk) = obj.section_data(current_address, current_address + 12)?; - let function_addr = u32::from_be_bytes(chunk[0..4].try_into().unwrap()); - let function_size = u32::from_be_bytes(chunk[4..8].try_into().unwrap()); - let extab_addr = u32::from_be_bytes(chunk[8..12].try_into().unwrap()); + let chunk = section.data_range(current_address, current_address + 12)?; + let function_addr = u32::from_be_bytes(*array_ref!(chunk, 0, 4)); + let function_size = u32::from_be_bytes(*array_ref!(chunk, 4, 4)); + let extab_addr = u32::from_be_bytes(*array_ref!(chunk, 8, 4)); log::debug!( "Found extabindex entry: {:#010X} size {:#010X} extab {:#010X}", function_addr, @@ -130,8 +156,11 @@ fn split_extabindex(obj: &mut ObjInfo, section_index: usize, section_start: u32) extab_addr ); - let Some((_, eti_symbol)) = - obj.symbols.kind_at_address(current_address, ObjSymbolKind::Object)? + let Some((_, eti_symbol)) = obj.symbols.kind_at_section_address( + section_index, + current_address, + ObjSymbolKind::Object, + )? else { bail!("Failed to find extabindex symbol @ {:#010X}", current_address); }; @@ -143,8 +172,12 @@ fn split_extabindex(obj: &mut ObjInfo, section_index: usize, section_start: u32) 12 ); - let Some((_, function_symbol)) = - obj.symbols.kind_at_address(function_addr, ObjSymbolKind::Function)? + let (text_section_index, text_section) = obj.sections.at_address(function_addr)?; + let Some((_, function_symbol)) = obj.symbols.kind_at_section_address( + text_section_index, + function_addr, + ObjSymbolKind::Function, + )? else { bail!("Failed to find function symbol @ {:#010X}", function_addr); }; @@ -156,8 +189,11 @@ fn split_extabindex(obj: &mut ObjInfo, section_index: usize, section_start: u32) function_size ); - let Some((_, extab_symbol)) = - obj.symbols.kind_at_address(extab_addr, ObjSymbolKind::Object)? + let Some((_, extab_symbol)) = obj.symbols.kind_at_section_address( + extab_section_index, + extab_addr, + ObjSymbolKind::Object, + )? else { bail!("Failed to find extab symbol @ {:#010X}", extab_addr); }; @@ -167,9 +203,9 @@ fn split_extabindex(obj: &mut ObjInfo, section_index: usize, section_start: u32) extab_symbol.name ); - let extabindex_split = obj.split_for(current_address); - let extab_split = obj.split_for(extab_addr); - let function_split = obj.split_for(function_addr); + let extabindex_split = section.splits.for_address(current_address); + let extab_split = extab_section.splits.for_address(extab_addr); + let function_split = text_section.splits.for_address(function_addr); let mut expected_unit = None; if let Some((_, extabindex_split)) = extabindex_split { @@ -221,43 +257,52 @@ fn split_extabindex(obj: &mut ObjInfo, section_index: usize, section_start: u32) current_address, end ); - new_splits.insert(current_address, ObjSplit { - unit: unit.clone(), - end, - align: None, - common: false, - autogenerated: true, - }); + new_splits.insert( + current_address, + (section_index, ObjSplit { + unit: unit.clone(), + end, + align: None, + common: false, + autogenerated: true, + }), + ); } if extab_split.is_none() { let end = extab_addr + extab_symbol.size as u32; log::debug!("Adding split for extab @ {:#010X}-{:#010X}", extab_addr, end); - new_splits.insert(extab_addr, ObjSplit { - unit: unit.clone(), - end, - align: None, - common: false, - autogenerated: true, - }); + new_splits.insert( + extab_addr, + (extab_section_index, ObjSplit { + unit: unit.clone(), + end, + align: None, + common: false, + autogenerated: true, + }), + ); } if function_split.is_none() { let end = function_addr + function_symbol.size as u32; log::debug!("Adding split for function @ {:#010X}-{:#010X}", function_addr, end); - new_splits.insert(function_addr, ObjSplit { - unit, - end, - align: None, - common: false, - autogenerated: true, - }); + new_splits.insert( + function_addr, + (text_section_index, ObjSplit { + unit, + end, + align: None, + common: false, + autogenerated: true, + }), + ); } } current_address += 12; } - for (addr, split) in new_splits { - obj.add_split(addr, split)?; + for (addr, (section_index, split)) in new_splits { + obj.add_split(section_index, addr, split)?; } Ok(()) @@ -265,12 +310,12 @@ fn split_extabindex(obj: &mut ObjInfo, section_index: usize, section_start: u32) /// Create splits for gaps between existing splits. fn create_gap_splits(obj: &mut ObjInfo) -> Result<()> { - let mut new_splits = BTreeMap::new(); + let mut new_splits = BTreeMap::::new(); - for (section_idx, section) in obj.sections.iter().enumerate() { + for (section_index, section) in obj.sections.iter() { let mut current_address = section.address as u32; - let section_end = end_for_section(obj, section_idx)?; - let mut file_iter = obj.splits_for_range(current_address..section_end).peekable(); + let section_end = end_for_section(obj, section_index)?; + let mut file_iter = section.splits.for_range(..section_end).peekable(); log::debug!( "Checking splits for section {} ({:#010X}..{:#010X})", @@ -300,16 +345,12 @@ fn create_gap_splits(obj: &mut ObjInfo) -> Result<()> { if split_start > current_address { // Find any duplicate symbols in this range let mut new_split_end = split_start; - let symbols = obj.symbols.for_range(current_address..split_start).collect_vec(); + let symbols = obj + .symbols + .for_section_range(section_index, current_address..split_start) + .collect_vec(); let mut existing_symbols = HashSet::new(); for (_, symbol) in symbols { - // Sanity check? Maybe not required? - ensure!( - symbol.section == Some(section_idx), - "Expected symbol {} to be in section {}", - symbol.name, - section_idx - ); if !existing_symbols.insert(symbol.name.clone()) { log::debug!( "Found duplicate symbol {} at {:#010X}", @@ -328,13 +369,16 @@ fn create_gap_splits(obj: &mut ObjInfo) -> Result<()> { ); let unit = format!("{:08X}_{}", current_address, section.name.trim_start_matches('.')); - new_splits.insert(current_address, ObjSplit { - unit: unit.clone(), - end: new_split_end, - align: None, - common: false, - autogenerated: true, - }); + new_splits.insert( + current_address, + (section_index, ObjSplit { + unit: unit.clone(), + end: new_split_end, + align: None, + common: false, + autogenerated: true, + }), + ); current_address = new_split_end; continue; } @@ -353,8 +397,8 @@ fn create_gap_splits(obj: &mut ObjInfo) -> Result<()> { } // Add new splits - for (addr, split) in new_splits { - obj.add_split(addr, split)?; + for (addr, (section_index, split)) in new_splits { + obj.add_split(section_index, addr, split)?; } Ok(()) @@ -362,25 +406,20 @@ fn create_gap_splits(obj: &mut ObjInfo) -> Result<()> { /// Ensures that all .bss splits following a common split are also marked as common. fn update_common_splits(obj: &mut ObjInfo) -> Result<()> { - let Some(bss_section) = obj.sections.iter().find(|s| s.name == ".bss") else { + let Some((bss_section_index, bss_section)) = obj.sections.by_name(".bss")? else { return Ok(()); }; - let bss_section_start = bss_section.address as u32; - let bss_section_end = (bss_section.address + bss_section.size) as u32; - let Some(common_bss_start) = obj - .splits_for_range(bss_section_start..bss_section_end) - .find(|(_, split)| split.common) - .map(|(addr, _)| addr) + let Some(common_bss_start) = + bss_section.splits.iter().find(|(_, split)| split.common).map(|(addr, _)| addr) else { return Ok(()); }; log::debug!("Found common BSS start at {:#010X}", common_bss_start); - for (addr, vec) in obj.splits.range_mut(common_bss_start..bss_section_end) { - for split in vec { - if !split.common { - split.common = true; - log::debug!("Added common flag to split {} at {:#010X}", split.unit, addr); - } + let bss_section = &mut obj.sections[bss_section_index]; + for (addr, split) in bss_section.splits.for_range_mut(common_bss_start..) { + if !split.common { + split.common = true; + log::debug!("Added common flag to split {} at {:#010X}", split.unit, addr); } } Ok(()) @@ -389,8 +428,7 @@ fn update_common_splits(obj: &mut ObjInfo) -> Result<()> { /// Final validation of splits. fn validate_splits(obj: &ObjInfo) -> Result<()> { let mut last_split_end = 0; - for (addr, split) in obj.splits_for_range(..) { - let section = obj.section_at(addr)?; + for (section_index, section, addr, split) in obj.sections.all_splits() { ensure!( addr >= last_split_end, "Split {} {} {:#010X}..{:#010X} overlaps with previous split", @@ -409,8 +447,11 @@ fn validate_splits(obj: &ObjInfo) -> Result<()> { ); last_split_end = split.end; - if let Some((_, symbol)) = - obj.symbols.for_range(..addr).filter(|&(_, s)| s.size_known && s.size > 0).next_back() + if let Some((_, symbol)) = obj + .symbols + .for_section_range(section_index, ..addr) + .filter(|&(_, s)| s.size_known && s.size > 0) + .next_back() { ensure!( addr >= symbol.address as u32 + symbol.size as u32, @@ -427,7 +468,7 @@ fn validate_splits(obj: &ObjInfo) -> Result<()> { if let Some((_, symbol)) = obj .symbols - .for_range(..split.end) + .for_section_range(section_index, ..split.end) .filter(|&(_, s)| s.size_known && s.size > 0) .next_back() { @@ -455,22 +496,23 @@ fn validate_splits(obj: &ObjInfo) -> Result<()> { /// - Resolving a new object link order pub fn update_splits(obj: &mut ObjInfo) -> Result<()> { // Create splits for extab and extabindex entries - if let Some(section) = obj.sections.iter().find(|s| s.name == "extabindex") { - split_extabindex(obj, section.index, section.address as u32)?; + if let Some((section_index, section)) = obj.sections.by_name("extabindex")? { + let start = section.address as u32; + split_extabindex(obj, section_index, start)?; } // Create splits for .ctors entries - if let Some(section) = obj.sections.iter().find(|s| s.name == ".ctors") { - let section_start = section.address as u32; - let section_end = section.address as u32 + section.size as u32 - 4; - split_ctors_dtors(obj, section_start, section_end)?; + if let Some((section_index, section)) = obj.sections.by_name(".ctors")? { + let start = section.address as u32; + let end = section.address as u32 + section.size as u32 - 4; + split_ctors_dtors(obj, section_index, start, end)?; } // Create splits for .dtors entries - if let Some(section) = obj.sections.iter().find(|s| s.name == ".dtors") { - let section_start = section.address as u32 + 4; // skip __destroy_global_chain_reference - let section_end = section.address as u32 + section.size as u32 - 4; - split_ctors_dtors(obj, section_start, section_end)?; + if let Some((section_index, section)) = obj.sections.by_name(".dtors")? { + let start = section.address as u32 + 4; // skip __destroy_global_chain_reference + let end = section.address as u32 + section.size as u32 - 4; + split_ctors_dtors(obj, section_index, start, end)?; } // Create gap splits @@ -502,17 +544,15 @@ fn resolve_link_order(obj: &ObjInfo) -> Result> { let mut graph = Graph::::new(); let mut unit_to_index_map = BTreeMap::::new(); - for (_, split) in obj.splits_for_range(..) { + for (_, _, _, split) in obj.sections.all_splits() { unit_to_index_map.insert(split.unit.clone(), NodeIndex::new(0)); } for (unit, index) in unit_to_index_map.iter_mut() { *index = graph.add_node(unit.clone()); } - for section in &obj.sections { - let mut iter = obj - .splits_for_range(section.address as u32..(section.address + section.size) as u32) - .peekable(); + for (_section_index, section) in obj.sections.iter() { + let mut iter = section.splits.iter().peekable(); if section.name == ".ctors" || section.name == ".dtors" { // Skip __init_cpp_exceptions.o let skipped = iter.next(); @@ -604,10 +644,10 @@ pub fn split_obj(obj: &ObjInfo) -> Result> { objects.push(split_obj); } - for (section_idx, section) in obj.sections.iter().enumerate() { + for (section_idx, section) in obj.sections.iter() { let mut current_address = section.address as u32; let section_end = end_for_section(obj, section_idx)?; - let mut file_iter = obj.splits_for_range(current_address..section_end).peekable(); + let mut file_iter = section.splits.for_range(current_address..section_end).peekable(); // Build address to relocation / address to symbol maps let relocations = section.build_relocation_map()?; @@ -679,14 +719,19 @@ pub fn split_obj(obj: &ObjInfo) -> Result> { address: o.address - current_address as u64, target_symbol: o.target_symbol, addend: o.addend, + module: o.module, } }) .collect(); // Add section symbols - let out_section_idx = file.sections.len(); + let out_section_idx = file.sections.next_section_index(); let mut comm_addr = current_address; - for (symbol_idx, symbol) in obj.symbols.for_range(current_address..file_end) { + for (symbol_idx, symbol) in + obj.symbols.for_section_range(section_idx, current_address..file_end).filter( + |&(_, s)| s.section == Some(section_idx) && !is_linker_generated_label(&s.name), + ) + { if symbol_idxs[symbol_idx].is_some() { continue; // should never happen? } @@ -708,8 +753,7 @@ pub fn split_obj(obj: &ObjInfo) -> Result> { } comm_addr = (symbol.address + symbol.size) as u32; - symbol_idxs[symbol_idx] = Some(file.symbols.count()); - file.symbols.add_direct(ObjSymbol { + symbol_idxs[symbol_idx] = Some(file.symbols.add_direct(ObjSymbol { name: symbol.name.clone(), demangled_name: symbol.demangled_name.clone(), address: if split.common { 4 } else { symbol.address - current_address as u64 }, @@ -724,7 +768,7 @@ pub fn split_obj(obj: &ObjInfo) -> Result> { kind: symbol.kind, align: if split.common { Some(4) } else { symbol.align }, data_kind: symbol.data_kind, - })?; + })?); } // For mwldeppc 2.7 and above, a .comment section is required to link without error @@ -752,12 +796,12 @@ pub fn split_obj(obj: &ObjInfo) -> Result> { size: file_end as u64 - current_address as u64, data, align, - index: out_section_idx, elf_index: out_section_idx + 1, relocations: out_relocations, original_address: current_address as u64, file_offset: section.file_offset + (current_address as u64 - section.address), section_known: true, + splits: Default::default(), }); } @@ -769,7 +813,7 @@ pub fn split_obj(obj: &ObjInfo) -> Result> { let mut globalize_symbols = vec![]; for (obj_idx, out_obj) in objects.iter_mut().enumerate() { let symbol_idxs = &mut object_symbols[obj_idx]; - for section in &mut out_obj.sections { + for (_section_index, section) in out_obj.sections.iter_mut() { for reloc in &mut section.relocations { match symbol_idxs[reloc.target_symbol] { Some(out_sym_idx) => { @@ -778,7 +822,7 @@ pub fn split_obj(obj: &ObjInfo) -> Result> { None => { // Extern let out_sym_idx = out_obj.symbols.count(); - let target_sym = obj.symbols.at(reloc.target_symbol); + let target_sym = &obj.symbols[reloc.target_symbol]; // If the symbol is local, we'll upgrade the scope to global // and rename it to avoid conflicts @@ -802,14 +846,14 @@ pub fn split_obj(obj: &ObjInfo) -> Result> { if section.name.as_str() == "extabindex" { let Some((target_addr, target_split)) = - obj.split_for(target_sym.address as u32) + section.splits.for_address(target_sym.address as u32) else { bail!( "Bad extabindex relocation @ {:#010X}", reloc.address + section.original_address ); }; - let target_section = &obj.section_at(target_addr)?.name; + let target_section = &obj.sections.at_address(target_addr)?.1.name; log::error!( "Bad extabindex relocation @ {:#010X}\n\ \tSource object: {}:{:#010X} ({})\n\ @@ -837,7 +881,7 @@ pub fn split_obj(obj: &ObjInfo) -> Result> { for (obj, symbol_map) in objects.iter_mut().zip(&object_symbols) { for (globalize_idx, new_name) in &globalize_symbols { if let Some(symbol_idx) = symbol_map[*globalize_idx] { - let mut symbol = obj.symbols.at(symbol_idx).clone(); + let mut symbol = obj.symbols[symbol_idx].clone(); symbol.name = new_name.clone(); if symbol.flags.is_local() { log::debug!("Globalizing {} in {}", symbol.name, obj.name); @@ -956,8 +1000,10 @@ pub fn is_linker_generated_object(name: &str) -> bool { /// Locate the end address of a section when excluding linker generated objects pub fn end_for_section(obj: &ObjInfo, section_index: usize) -> Result { - let section = &obj.sections[section_index]; - let section_start = section.address as u32; + let section = obj + .sections + .get(section_index) + .ok_or_else(|| anyhow!("Invalid section index: {}", section_index))?; let mut section_end = (section.address + section.size) as u32; // .ctors and .dtors end with a linker-generated null pointer, // adjust section size appropriately @@ -970,7 +1016,7 @@ pub fn end_for_section(obj: &ObjInfo, section_index: usize) -> Result { loop { let last_symbol = obj .symbols - .for_range(section_start..section_end) + .for_section_range(section_index, ..section_end) .filter(|(_, s)| s.kind == ObjSymbolKind::Object && s.size_known && s.size > 0) .next_back(); match last_symbol {