ARM: Fix "Combine text sections" confusing code and data (#195)

* ARM: Fix parsing of mapping symbols when "Combine text sections" is enabled

* Add test
This commit is contained in:
LagoLunatic 2025-04-26 13:14:16 -04:00 committed by GitHub
parent 39b1b49985
commit e1c51ac297
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 102 additions and 18 deletions

View File

@ -15,7 +15,7 @@ use crate::{
diff::{ArmArchVersion, ArmR9Usage, DiffObjConfig, display::InstructionPart},
obj::{
InstructionRef, Relocation, RelocationFlags, ResolvedInstructionRef, ResolvedRelocation,
ScannedInstruction, SymbolFlag, SymbolFlagSet, SymbolKind,
ScannedInstruction, Section, SectionKind, Symbol, SymbolFlag, SymbolFlagSet, SymbolKind,
},
};
@ -32,7 +32,8 @@ impl ArchArm {
let endianness = file.endianness();
match file {
object::File::Elf32(_) => {
let disasm_modes = Self::elf_get_mapping_symbols(file);
// The disasm_modes mapping is populated later in the post_init step so that we have access to merged sections.
let disasm_modes = BTreeMap::new();
let detected_version = Self::elf_detect_arm_version(file)?;
Ok(Self { disasm_modes, detected_version, endianness })
}
@ -73,18 +74,22 @@ impl ArchArm {
Ok(None)
}
fn elf_get_mapping_symbols(file: &object::File) -> BTreeMap<usize, Vec<DisasmMode>> {
file.sections()
.filter(|s| s.kind() == object::SectionKind::Text)
.map(|s| {
let index = s.index();
let mut mapping_symbols: Vec<_> = file
.symbols()
.filter(|s| s.section_index().map(|i| i == index).unwrap_or(false))
.filter_map(|s| DisasmMode::from_symbol(&s))
fn get_mapping_symbols(
sections: &[Section],
symbols: &[Symbol],
) -> BTreeMap<usize, Vec<DisasmMode>> {
sections
.iter()
.enumerate()
.filter(|(_, section)| section.kind == SectionKind::Code)
.map(|(index, _)| {
let mut mapping_symbols: Vec<_> = symbols
.iter()
.filter(|s| s.section.map(|i| i == index).unwrap_or(false))
.filter_map(DisasmMode::from_symbol)
.collect();
mapping_symbols.sort_unstable_by_key(|x| x.address);
(s.index().0 - 1, mapping_symbols)
(index, mapping_symbols)
})
.collect()
}
@ -178,6 +183,10 @@ impl ArchArm {
}
impl Arch for ArchArm {
fn post_init(&mut self, sections: &[Section], symbols: &[Symbol]) {
self.disasm_modes = Self::get_mapping_symbols(sections, symbols);
}
fn scan_instructions(
&self,
address: u64,
@ -441,7 +450,7 @@ impl Arch for ArchArm {
fn extra_symbol_flags(&self, symbol: &object::Symbol) -> SymbolFlagSet {
let mut flags = SymbolFlagSet::default();
if DisasmMode::from_symbol(symbol).is_some() {
if DisasmMode::from_object_symbol(symbol).is_some() {
flags |= SymbolFlag::Hidden;
}
flags
@ -455,12 +464,17 @@ struct DisasmMode {
}
impl DisasmMode {
fn from_symbol<'a>(sym: &object::Symbol<'a, '_, &'a [u8]>) -> Option<Self> {
fn from_object_symbol<'a>(sym: &object::Symbol<'a, '_, &'a [u8]>) -> Option<Self> {
sym.name()
.ok()
.and_then(unarm::ParseMode::from_mapping_symbol)
.map(|mapping| DisasmMode { address: sym.address() as u32, mapping })
}
fn from_symbol(sym: &Symbol) -> Option<Self> {
unarm::ParseMode::from_mapping_symbol(&sym.name)
.map(|mapping| DisasmMode { address: sym.address as u32, mapping })
}
}
fn push_args(

View File

@ -12,7 +12,7 @@ use crate::{
},
obj::{
InstructionArg, Object, ParsedInstruction, Relocation, RelocationFlags,
ResolvedInstructionRef, ScannedInstruction, Symbol, SymbolFlagSet, SymbolKind,
ResolvedInstructionRef, ScannedInstruction, Section, Symbol, SymbolFlagSet, SymbolKind,
},
util::ReallySigned,
};
@ -183,6 +183,9 @@ impl DataType {
}
pub trait Arch: Send + Sync + Debug {
// Finishes arch-specific initialization that must be done after sections have been combined.
fn post_init(&mut self, _sections: &[Section], _symbols: &[Symbol]) {}
/// Generate a list of instructions references (offset, size, opcode) from the given code.
///
/// The opcode IDs are used to generate the initial diff. Implementations should do as little

View File

@ -843,7 +843,7 @@ pub fn read(obj_path: &std::path::Path, config: &DiffObjConfig) -> Result<Object
pub fn parse(data: &[u8], config: &DiffObjConfig) -> Result<Object> {
let obj_file = object::File::parse(data)?;
let arch = new_arch(&obj_file)?;
let mut arch = new_arch(&obj_file)?;
let split_meta = parse_split_meta(&obj_file)?;
let (mut sections, section_indices) =
map_sections(arch.as_ref(), &obj_file, split_meta.as_ref())?;
@ -857,6 +857,7 @@ pub fn parse(data: &[u8], config: &DiffObjConfig) -> Result<Object> {
if config.combine_data_sections || config.combine_text_sections {
combine_sections(&mut sections, &mut symbols, config)?;
}
arch.post_init(&sections, &symbols);
Ok(Object {
arch,
endianness: obj_file.endianness(),

View File

@ -5,7 +5,7 @@ mod common;
#[test]
#[cfg(feature = "arm")]
fn read_arm() {
let diff_config = diff::DiffObjConfig { mips_register_prefix: true, ..Default::default() };
let diff_config = diff::DiffObjConfig { ..Default::default() };
let obj = obj::read::parse(include_object!("data/arm/LinkStateItem.o"), &diff_config).unwrap();
insta::assert_debug_snapshot!(obj);
let symbol_idx =
@ -19,7 +19,7 @@ fn read_arm() {
#[test]
#[cfg(feature = "arm")]
fn read_thumb() {
let diff_config = diff::DiffObjConfig { mips_register_prefix: true, ..Default::default() };
let diff_config = diff::DiffObjConfig { ..Default::default() };
let obj = obj::read::parse(include_object!("data/arm/thumb.o"), &diff_config).unwrap();
insta::assert_debug_snapshot!(obj);
let symbol_idx = obj
@ -32,3 +32,15 @@ fn read_thumb() {
let output = common::display_diff(&obj, &diff, symbol_idx, &diff_config);
insta::assert_snapshot!(output);
}
#[test]
#[cfg(feature = "arm")]
fn combine_text_sections() {
let diff_config = diff::DiffObjConfig { combine_text_sections: true, ..Default::default() };
let obj = obj::read::parse(include_object!("data/arm/enemy300.o"), &diff_config).unwrap();
let symbol_idx = obj.symbols.iter().position(|s| s.name == "Enemy300Draw").unwrap();
let diff = diff::code::no_diff_code(&obj, symbol_idx, &diff_config).unwrap();
insta::assert_debug_snapshot!(diff.instruction_rows);
let output = common::display_diff(&obj, &diff, symbol_idx, &diff_config);
insta::assert_snapshot!(output);
}

Binary file not shown.

View File

@ -0,0 +1,8 @@
---
source: objdiff-core/tests/arch_arm.rs
assertion_line: 45
expression: output
---
[(Line(90), Dim, 5), (Address(0), Normal, 5), (Spacing(4), Normal, 0), (Opcode("ldr", 32799), Normal, 10), (Argument(Opaque("r12")), Normal, 0), (Basic(", "), Normal, 0), (Basic("["), Normal, 0), (Argument(Opaque("pc")), Normal, 0), (Basic(", "), Normal, 0), (Basic("#"), Normal, 0), (Argument(Signed(0)), Normal, 0), (Basic("]"), Normal, 0), (Eol, Normal, 0)]
[(Line(90), Dim, 5), (Address(4), Normal, 5), (Spacing(4), Normal, 0), (Opcode("bx", 32779), Normal, 10), (Argument(Opaque("r12")), Normal, 0), (Eol, Normal, 0)]
[(Line(90), Dim, 5), (Address(8), Normal, 5), (Spacing(4), Normal, 0), (Opcode(".word", 65535), Normal, 10), (Symbol(Symbol { name: "esEnemyDraw", demangled_name: None, address: 0, size: 0, kind: Unknown, section: None, flags: FlagSet(Global), align: None, virtual_address: None }), Bright, 0), (Eol, Normal, 0)]

View File

@ -0,0 +1,46 @@
---
source: objdiff-core/tests/arch_arm.rs
assertion_line: 43
expression: diff.instruction_rows
---
[
InstructionDiffRow {
ins_ref: Some(
InstructionRef {
address: 76,
size: 4,
opcode: 32799,
},
),
kind: None,
branch_from: None,
branch_to: None,
arg_diff: [],
},
InstructionDiffRow {
ins_ref: Some(
InstructionRef {
address: 80,
size: 4,
opcode: 32779,
},
),
kind: None,
branch_from: None,
branch_to: None,
arg_diff: [],
},
InstructionDiffRow {
ins_ref: Some(
InstructionRef {
address: 84,
size: 4,
opcode: 65535,
},
),
kind: None,
branch_from: None,
branch_to: None,
arg_diff: [],
},
]