ARM: Fix "Combine text sections" confusing code and data (#195)

* ARM: Fix parsing of mapping symbols when "Combine text sections" is enabled

* Add test
This commit is contained in:
LagoLunatic 2025-04-26 13:14:16 -04:00 committed by GitHub
parent 39b1b49985
commit e1c51ac297
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 102 additions and 18 deletions

View File

@ -15,7 +15,7 @@ use crate::{
diff::{ArmArchVersion, ArmR9Usage, DiffObjConfig, display::InstructionPart}, diff::{ArmArchVersion, ArmR9Usage, DiffObjConfig, display::InstructionPart},
obj::{ obj::{
InstructionRef, Relocation, RelocationFlags, ResolvedInstructionRef, ResolvedRelocation, InstructionRef, Relocation, RelocationFlags, ResolvedInstructionRef, ResolvedRelocation,
ScannedInstruction, SymbolFlag, SymbolFlagSet, SymbolKind, ScannedInstruction, Section, SectionKind, Symbol, SymbolFlag, SymbolFlagSet, SymbolKind,
}, },
}; };
@ -32,7 +32,8 @@ impl ArchArm {
let endianness = file.endianness(); let endianness = file.endianness();
match file { match file {
object::File::Elf32(_) => { object::File::Elf32(_) => {
let disasm_modes = Self::elf_get_mapping_symbols(file); // The disasm_modes mapping is populated later in the post_init step so that we have access to merged sections.
let disasm_modes = BTreeMap::new();
let detected_version = Self::elf_detect_arm_version(file)?; let detected_version = Self::elf_detect_arm_version(file)?;
Ok(Self { disasm_modes, detected_version, endianness }) Ok(Self { disasm_modes, detected_version, endianness })
} }
@ -73,18 +74,22 @@ impl ArchArm {
Ok(None) Ok(None)
} }
fn elf_get_mapping_symbols(file: &object::File) -> BTreeMap<usize, Vec<DisasmMode>> { fn get_mapping_symbols(
file.sections() sections: &[Section],
.filter(|s| s.kind() == object::SectionKind::Text) symbols: &[Symbol],
.map(|s| { ) -> BTreeMap<usize, Vec<DisasmMode>> {
let index = s.index(); sections
let mut mapping_symbols: Vec<_> = file .iter()
.symbols() .enumerate()
.filter(|s| s.section_index().map(|i| i == index).unwrap_or(false)) .filter(|(_, section)| section.kind == SectionKind::Code)
.filter_map(|s| DisasmMode::from_symbol(&s)) .map(|(index, _)| {
let mut mapping_symbols: Vec<_> = symbols
.iter()
.filter(|s| s.section.map(|i| i == index).unwrap_or(false))
.filter_map(DisasmMode::from_symbol)
.collect(); .collect();
mapping_symbols.sort_unstable_by_key(|x| x.address); mapping_symbols.sort_unstable_by_key(|x| x.address);
(s.index().0 - 1, mapping_symbols) (index, mapping_symbols)
}) })
.collect() .collect()
} }
@ -178,6 +183,10 @@ impl ArchArm {
} }
impl Arch for ArchArm { impl Arch for ArchArm {
fn post_init(&mut self, sections: &[Section], symbols: &[Symbol]) {
self.disasm_modes = Self::get_mapping_symbols(sections, symbols);
}
fn scan_instructions( fn scan_instructions(
&self, &self,
address: u64, address: u64,
@ -441,7 +450,7 @@ impl Arch for ArchArm {
fn extra_symbol_flags(&self, symbol: &object::Symbol) -> SymbolFlagSet { fn extra_symbol_flags(&self, symbol: &object::Symbol) -> SymbolFlagSet {
let mut flags = SymbolFlagSet::default(); let mut flags = SymbolFlagSet::default();
if DisasmMode::from_symbol(symbol).is_some() { if DisasmMode::from_object_symbol(symbol).is_some() {
flags |= SymbolFlag::Hidden; flags |= SymbolFlag::Hidden;
} }
flags flags
@ -455,12 +464,17 @@ struct DisasmMode {
} }
impl DisasmMode { impl DisasmMode {
fn from_symbol<'a>(sym: &object::Symbol<'a, '_, &'a [u8]>) -> Option<Self> { fn from_object_symbol<'a>(sym: &object::Symbol<'a, '_, &'a [u8]>) -> Option<Self> {
sym.name() sym.name()
.ok() .ok()
.and_then(unarm::ParseMode::from_mapping_symbol) .and_then(unarm::ParseMode::from_mapping_symbol)
.map(|mapping| DisasmMode { address: sym.address() as u32, mapping }) .map(|mapping| DisasmMode { address: sym.address() as u32, mapping })
} }
fn from_symbol(sym: &Symbol) -> Option<Self> {
unarm::ParseMode::from_mapping_symbol(&sym.name)
.map(|mapping| DisasmMode { address: sym.address as u32, mapping })
}
} }
fn push_args( fn push_args(

View File

@ -12,7 +12,7 @@ use crate::{
}, },
obj::{ obj::{
InstructionArg, Object, ParsedInstruction, Relocation, RelocationFlags, InstructionArg, Object, ParsedInstruction, Relocation, RelocationFlags,
ResolvedInstructionRef, ScannedInstruction, Symbol, SymbolFlagSet, SymbolKind, ResolvedInstructionRef, ScannedInstruction, Section, Symbol, SymbolFlagSet, SymbolKind,
}, },
util::ReallySigned, util::ReallySigned,
}; };
@ -183,6 +183,9 @@ impl DataType {
} }
pub trait Arch: Send + Sync + Debug { pub trait Arch: Send + Sync + Debug {
// Finishes arch-specific initialization that must be done after sections have been combined.
fn post_init(&mut self, _sections: &[Section], _symbols: &[Symbol]) {}
/// Generate a list of instructions references (offset, size, opcode) from the given code. /// Generate a list of instructions references (offset, size, opcode) from the given code.
/// ///
/// The opcode IDs are used to generate the initial diff. Implementations should do as little /// The opcode IDs are used to generate the initial diff. Implementations should do as little

View File

@ -843,7 +843,7 @@ pub fn read(obj_path: &std::path::Path, config: &DiffObjConfig) -> Result<Object
pub fn parse(data: &[u8], config: &DiffObjConfig) -> Result<Object> { pub fn parse(data: &[u8], config: &DiffObjConfig) -> Result<Object> {
let obj_file = object::File::parse(data)?; let obj_file = object::File::parse(data)?;
let arch = new_arch(&obj_file)?; let mut arch = new_arch(&obj_file)?;
let split_meta = parse_split_meta(&obj_file)?; let split_meta = parse_split_meta(&obj_file)?;
let (mut sections, section_indices) = let (mut sections, section_indices) =
map_sections(arch.as_ref(), &obj_file, split_meta.as_ref())?; map_sections(arch.as_ref(), &obj_file, split_meta.as_ref())?;
@ -857,6 +857,7 @@ pub fn parse(data: &[u8], config: &DiffObjConfig) -> Result<Object> {
if config.combine_data_sections || config.combine_text_sections { if config.combine_data_sections || config.combine_text_sections {
combine_sections(&mut sections, &mut symbols, config)?; combine_sections(&mut sections, &mut symbols, config)?;
} }
arch.post_init(&sections, &symbols);
Ok(Object { Ok(Object {
arch, arch,
endianness: obj_file.endianness(), endianness: obj_file.endianness(),

View File

@ -5,7 +5,7 @@ mod common;
#[test] #[test]
#[cfg(feature = "arm")] #[cfg(feature = "arm")]
fn read_arm() { fn read_arm() {
let diff_config = diff::DiffObjConfig { mips_register_prefix: true, ..Default::default() }; let diff_config = diff::DiffObjConfig { ..Default::default() };
let obj = obj::read::parse(include_object!("data/arm/LinkStateItem.o"), &diff_config).unwrap(); let obj = obj::read::parse(include_object!("data/arm/LinkStateItem.o"), &diff_config).unwrap();
insta::assert_debug_snapshot!(obj); insta::assert_debug_snapshot!(obj);
let symbol_idx = let symbol_idx =
@ -19,7 +19,7 @@ fn read_arm() {
#[test] #[test]
#[cfg(feature = "arm")] #[cfg(feature = "arm")]
fn read_thumb() { fn read_thumb() {
let diff_config = diff::DiffObjConfig { mips_register_prefix: true, ..Default::default() }; let diff_config = diff::DiffObjConfig { ..Default::default() };
let obj = obj::read::parse(include_object!("data/arm/thumb.o"), &diff_config).unwrap(); let obj = obj::read::parse(include_object!("data/arm/thumb.o"), &diff_config).unwrap();
insta::assert_debug_snapshot!(obj); insta::assert_debug_snapshot!(obj);
let symbol_idx = obj let symbol_idx = obj
@ -32,3 +32,15 @@ fn read_thumb() {
let output = common::display_diff(&obj, &diff, symbol_idx, &diff_config); let output = common::display_diff(&obj, &diff, symbol_idx, &diff_config);
insta::assert_snapshot!(output); insta::assert_snapshot!(output);
} }
#[test]
#[cfg(feature = "arm")]
fn combine_text_sections() {
let diff_config = diff::DiffObjConfig { combine_text_sections: true, ..Default::default() };
let obj = obj::read::parse(include_object!("data/arm/enemy300.o"), &diff_config).unwrap();
let symbol_idx = obj.symbols.iter().position(|s| s.name == "Enemy300Draw").unwrap();
let diff = diff::code::no_diff_code(&obj, symbol_idx, &diff_config).unwrap();
insta::assert_debug_snapshot!(diff.instruction_rows);
let output = common::display_diff(&obj, &diff, symbol_idx, &diff_config);
insta::assert_snapshot!(output);
}

Binary file not shown.

View File

@ -0,0 +1,8 @@
---
source: objdiff-core/tests/arch_arm.rs
assertion_line: 45
expression: output
---
[(Line(90), Dim, 5), (Address(0), Normal, 5), (Spacing(4), Normal, 0), (Opcode("ldr", 32799), Normal, 10), (Argument(Opaque("r12")), Normal, 0), (Basic(", "), Normal, 0), (Basic("["), Normal, 0), (Argument(Opaque("pc")), Normal, 0), (Basic(", "), Normal, 0), (Basic("#"), Normal, 0), (Argument(Signed(0)), Normal, 0), (Basic("]"), Normal, 0), (Eol, Normal, 0)]
[(Line(90), Dim, 5), (Address(4), Normal, 5), (Spacing(4), Normal, 0), (Opcode("bx", 32779), Normal, 10), (Argument(Opaque("r12")), Normal, 0), (Eol, Normal, 0)]
[(Line(90), Dim, 5), (Address(8), Normal, 5), (Spacing(4), Normal, 0), (Opcode(".word", 65535), Normal, 10), (Symbol(Symbol { name: "esEnemyDraw", demangled_name: None, address: 0, size: 0, kind: Unknown, section: None, flags: FlagSet(Global), align: None, virtual_address: None }), Bright, 0), (Eol, Normal, 0)]

View File

@ -0,0 +1,46 @@
---
source: objdiff-core/tests/arch_arm.rs
assertion_line: 43
expression: diff.instruction_rows
---
[
InstructionDiffRow {
ins_ref: Some(
InstructionRef {
address: 76,
size: 4,
opcode: 32799,
},
),
kind: None,
branch_from: None,
branch_to: None,
arg_diff: [],
},
InstructionDiffRow {
ins_ref: Some(
InstructionRef {
address: 80,
size: 4,
opcode: 32779,
},
),
kind: None,
branch_from: None,
branch_to: None,
arg_diff: [],
},
InstructionDiffRow {
ins_ref: Some(
InstructionRef {
address: 84,
size: 4,
opcode: 65535,
},
),
kind: None,
branch_from: None,
branch_to: None,
arg_diff: [],
},
]