objdiff-cli diff & report changes, support .splitmeta object section

- Add `objdiff-cli report changes` for diffing two reports
- Unify some click-to-highlight logic between CLI and GUI
- Load .splitmeta section for extra object metadata (original virtual addr, etc)
- More work on objdiff-cli diff
This commit is contained in:
2024-02-28 21:44:53 -07:00
parent 28348606bf
commit 39a13f4d36
11 changed files with 1018 additions and 406 deletions

View File

@@ -28,6 +28,16 @@ pub enum DiffText<'a> {
Eol,
}
#[derive(Default, Clone, PartialEq, Eq)]
pub enum HighlightKind {
#[default]
None,
Opcode(u8),
Arg(ObjInsArgValue),
Symbol(String),
Address(u32),
}
pub fn display_diff<E>(
ins_diff: &ObjInsDiff,
base_addr: u32,
@@ -177,3 +187,31 @@ fn display_reloc<E>(
}
Ok(())
}
impl PartialEq<DiffText<'_>> for HighlightKind {
fn eq(&self, other: &DiffText) -> bool {
match (self, other) {
(HighlightKind::Opcode(a), DiffText::Opcode(_, b)) => a == b,
(HighlightKind::Arg(a), DiffText::Argument(b, _)) => a.loose_eq(b),
(HighlightKind::Symbol(a), DiffText::Symbol(b)) => a == &b.name,
(HighlightKind::Address(a), DiffText::Address(b) | DiffText::BranchTarget(b)) => a == b,
_ => false,
}
}
}
impl PartialEq<HighlightKind> for DiffText<'_> {
fn eq(&self, other: &HighlightKind) -> bool { other.eq(self) }
}
impl From<DiffText<'_>> for HighlightKind {
fn from(value: DiffText<'_>) -> Self {
match value {
DiffText::Opcode(_, op) => HighlightKind::Opcode(op),
DiffText::Argument(arg, _) => HighlightKind::Arg(arg.clone()),
DiffText::Symbol(sym) => HighlightKind::Symbol(sym.name.to_string()),
DiffText::Address(addr) | DiffText::BranchTarget(addr) => HighlightKind::Address(addr),
_ => HighlightKind::None,
}
}
}

View File

@@ -1,15 +1,16 @@
use std::{borrow::Cow, collections::BTreeMap, fs, io::Cursor, path::Path};
use std::{collections::BTreeMap, fs, io::Cursor, path::Path};
use anyhow::{anyhow, bail, ensure, Context, Result};
use byteorder::{BigEndian, ReadBytesExt};
use filetime::FileTime;
use flagset::Flags;
use object::{
elf, Architecture, Endianness, File, Object, ObjectSection, ObjectSymbol, RelocationKind,
RelocationTarget, SectionIndex, SectionKind, Symbol, SymbolKind, SymbolScope, SymbolSection,
elf, Architecture, File, Object, ObjectSection, ObjectSymbol, RelocationKind, RelocationTarget,
SectionIndex, SectionKind, Symbol, SymbolKind, SymbolScope, SymbolSection,
};
use crate::obj::{
split_meta::{SplitMeta, SPLITMETA_SECTION},
ObjArchitecture, ObjInfo, ObjReloc, ObjRelocKind, ObjSection, ObjSectionKind, ObjSymbol,
ObjSymbolFlagSet, ObjSymbolFlags,
};
@@ -23,7 +24,12 @@ fn to_obj_section_kind(kind: SectionKind) -> Option<ObjSectionKind> {
}
}
fn to_obj_symbol(obj_file: &File<'_>, symbol: &Symbol<'_, '_>, addend: i64) -> Result<ObjSymbol> {
fn to_obj_symbol(
obj_file: &File<'_>,
symbol: &Symbol<'_, '_>,
addend: i64,
split_meta: Option<&SplitMeta>,
) -> Result<ObjSymbol> {
let mut name = symbol.name().context("Failed to process symbol name")?;
if name.is_empty() {
log::warn!("Found empty sym: {symbol:?}");
@@ -57,6 +63,10 @@ fn to_obj_symbol(obj_file: &File<'_>, symbol: &Symbol<'_, '_>, addend: i64) -> R
if obj_file.architecture() == Architecture::PowerPc {
demangled_name = cwdemangle::demangle(name, &Default::default());
}
// Find the virtual address for the symbol if available
let virtual_address = split_meta
.and_then(|m| m.virtual_addresses.as_ref())
.and_then(|v| v.get(symbol.index().0).cloned());
Ok(ObjSymbol {
name: name.to_string(),
demangled_name,
@@ -66,13 +76,14 @@ fn to_obj_symbol(obj_file: &File<'_>, symbol: &Symbol<'_, '_>, addend: i64) -> R
size_known: symbol.size() != 0,
flags,
addend,
virtual_address,
diff_symbol: None,
instructions: vec![],
match_percent: None,
})
}
fn filter_sections(obj_file: &File<'_>) -> Result<Vec<ObjSection>> {
fn filter_sections(obj_file: &File<'_>, split_meta: Option<&SplitMeta>) -> Result<Vec<ObjSection>> {
let mut result = Vec::<ObjSection>::new();
for section in obj_file.sections() {
if section.size() == 0 {
@@ -83,6 +94,17 @@ fn filter_sections(obj_file: &File<'_>) -> Result<Vec<ObjSection>> {
};
let name = section.name().context("Failed to process section name")?;
let data = section.uncompressed_data().context("Failed to read section data")?;
// Find the virtual address for the section symbol if available
let section_symbol = obj_file.symbols().find(|s| {
s.kind() == SymbolKind::Section && s.section_index() == Some(section.index())
});
let virtual_address = section_symbol.and_then(|s| {
split_meta
.and_then(|m| m.virtual_addresses.as_ref())
.and_then(|v| v.get(s.index().0).cloned())
});
result.push(ObjSection {
name: name.to_string(),
kind,
@@ -92,6 +114,7 @@ fn filter_sections(obj_file: &File<'_>) -> Result<Vec<ObjSection>> {
index: section.index().0,
symbols: Vec::new(),
relocations: Vec::new(),
virtual_address,
data_diff: vec![],
match_percent: 0.0,
});
@@ -100,7 +123,11 @@ fn filter_sections(obj_file: &File<'_>) -> Result<Vec<ObjSection>> {
Ok(result)
}
fn symbols_by_section(obj_file: &File<'_>, section: &ObjSection) -> Result<Vec<ObjSymbol>> {
fn symbols_by_section(
obj_file: &File<'_>,
section: &ObjSection,
split_meta: Option<&SplitMeta>,
) -> Result<Vec<ObjSymbol>> {
let mut result = Vec::<ObjSymbol>::new();
for symbol in obj_file.symbols() {
if symbol.kind() == SymbolKind::Section {
@@ -115,7 +142,7 @@ fn symbols_by_section(obj_file: &File<'_>, section: &ObjSection) -> Result<Vec<O
continue;
}
}
result.push(to_obj_symbol(obj_file, &symbol, 0)?);
result.push(to_obj_symbol(obj_file, &symbol, 0, split_meta)?);
}
}
}
@@ -133,11 +160,11 @@ fn symbols_by_section(obj_file: &File<'_>, section: &ObjSection) -> Result<Vec<O
Ok(result)
}
fn common_symbols(obj_file: &File<'_>) -> Result<Vec<ObjSymbol>> {
fn common_symbols(obj_file: &File<'_>, split_meta: Option<&SplitMeta>) -> Result<Vec<ObjSymbol>> {
obj_file
.symbols()
.filter(Symbol::is_common)
.map(|symbol| to_obj_symbol(obj_file, &symbol, 0))
.map(|symbol| to_obj_symbol(obj_file, &symbol, 0, split_meta))
.collect::<Result<Vec<ObjSymbol>>>()
}
@@ -145,6 +172,7 @@ fn find_section_symbol(
obj_file: &File<'_>,
target: &Symbol<'_, '_>,
address: u64,
split_meta: Option<&SplitMeta>,
) -> Result<ObjSymbol> {
let section_index =
target.section_index().ok_or_else(|| anyhow::Error::msg("Unknown section index"))?;
@@ -164,7 +192,7 @@ fn find_section_symbol(
}
continue;
}
return to_obj_symbol(obj_file, &symbol, 0);
return to_obj_symbol(obj_file, &symbol, 0, split_meta);
}
let (name, offset) = closest_symbol
.and_then(|s| s.name().map(|n| (n, s.address())).ok())
@@ -180,6 +208,7 @@ fn find_section_symbol(
size_known: false,
flags: Default::default(),
addend: offset_addr as i64,
virtual_address: None,
diff_symbol: None,
instructions: vec![],
match_percent: None,
@@ -190,6 +219,7 @@ fn relocations_by_section(
arch: ObjArchitecture,
obj_file: &File<'_>,
section: &ObjSection,
split_meta: Option<&SplitMeta>,
) -> Result<Vec<ObjReloc>> {
let obj_section = obj_file.section_by_index(SectionIndex(section.index))?;
let mut relocations = Vec::<ObjReloc>::new();
@@ -259,11 +289,11 @@ fn relocations_by_section(
// println!("Reloc: {reloc:?}, symbol: {symbol:?}, addend: {addend:#X}");
let target = match symbol.kind() {
SymbolKind::Text | SymbolKind::Data | SymbolKind::Label | SymbolKind::Unknown => {
to_obj_symbol(obj_file, &symbol, addend)
to_obj_symbol(obj_file, &symbol, addend, split_meta)
}
SymbolKind::Section => {
ensure!(addend >= 0, "Negative addend in reloc: {addend}");
find_section_symbol(obj_file, &symbol, addend as u64)
find_section_symbol(obj_file, &symbol, addend as u64, split_meta)
}
kind => Err(anyhow!("Unhandled relocation symbol type {kind:?}")),
}?;
@@ -298,6 +328,7 @@ fn line_info(obj_file: &File<'_>) -> Result<Option<BTreeMap<u64, u64>>> {
// DWARF 2+
#[cfg(feature = "dwarf")]
{
use std::borrow::Cow;
let dwarf_cow = gimli::Dwarf::load(|id| {
Ok::<_, gimli::Error>(
obj_file
@@ -307,8 +338,8 @@ fn line_info(obj_file: &File<'_>) -> Result<Option<BTreeMap<u64, u64>>> {
)
})?;
let endian = match obj_file.endianness() {
Endianness::Little => gimli::RunTimeEndian::Little,
Endianness::Big => gimli::RunTimeEndian::Big,
object::Endianness::Little => gimli::RunTimeEndian::Little,
object::Endianness::Big => gimli::RunTimeEndian::Big,
};
let dwarf = dwarf_cow.borrow(|section| gimli::EndianSlice::new(section, endian));
let mut iter = dwarf.units();
@@ -344,17 +375,35 @@ pub fn read(obj_path: &Path) -> Result<ObjInfo> {
Architecture::Mips => ObjArchitecture::Mips,
_ => bail!("Unsupported architecture: {:?}", obj_file.architecture()),
};
let split_meta = split_meta(&obj_file)?;
let mut result = ObjInfo {
architecture,
path: obj_path.to_owned(),
timestamp,
sections: filter_sections(&obj_file)?,
common: common_symbols(&obj_file)?,
sections: filter_sections(&obj_file, split_meta.as_ref())?,
common: common_symbols(&obj_file, split_meta.as_ref())?,
line_info: line_info(&obj_file)?,
split_meta: None,
};
for section in &mut result.sections {
section.symbols = symbols_by_section(&obj_file, section)?;
section.relocations = relocations_by_section(architecture, &obj_file, section)?;
section.symbols = symbols_by_section(&obj_file, section, split_meta.as_ref())?;
section.relocations =
relocations_by_section(architecture, &obj_file, section, split_meta.as_ref())?;
}
result.split_meta = split_meta;
Ok(result)
}
fn split_meta(obj_file: &File<'_>) -> Result<Option<SplitMeta>> {
Ok(if let Some(section) = obj_file.section_by_name(SPLITMETA_SECTION) {
if section.size() != 0 {
let data = section.uncompressed_data()?;
let mut reader = data.as_ref();
Some(SplitMeta::from_reader(&mut reader, obj_file.endianness(), obj_file.is_64())?)
} else {
None
}
} else {
None
})
}

View File

@@ -3,11 +3,13 @@ pub mod elf;
pub mod mips;
#[cfg(feature = "ppc")]
pub mod ppc;
pub mod split_meta;
use std::{collections::BTreeMap, fmt, path::PathBuf};
use filetime::FileTime;
use flagset::{flags, FlagSet};
use split_meta::SplitMeta;
use crate::util::ReallySigned;
@@ -39,6 +41,7 @@ pub struct ObjSection {
pub index: usize,
pub symbols: Vec<ObjSymbol>,
pub relocations: Vec<ObjReloc>,
pub virtual_address: Option<u64>,
// Diff
pub data_diff: Vec<ObjDataDiff>,
@@ -139,7 +142,7 @@ pub struct ObjIns {
pub args: Vec<ObjInsArg>,
pub reloc: Option<ObjReloc>,
pub branch_dest: Option<u32>,
/// Line info
/// Line number
pub line: Option<u64>,
/// Original (unsimplified) instruction
pub orig: Option<String>,
@@ -185,6 +188,8 @@ pub struct ObjSymbol {
pub size_known: bool,
pub flags: ObjSymbolFlagSet,
pub addend: i64,
/// Original virtual address (from .splitmeta section)
pub virtual_address: Option<u64>,
// Diff
pub diff_symbol: Option<String>,
@@ -206,8 +211,12 @@ pub struct ObjInfo {
pub path: PathBuf,
pub timestamp: FileTime,
pub sections: Vec<ObjSection>,
/// Common BSS symbols
pub common: Vec<ObjSymbol>,
/// Line number info (.line or .debug_line section)
pub line_info: Option<BTreeMap<u64, u64>>,
/// Split object metadata (.splitmeta section)
pub split_meta: Option<SplitMeta>,
}
#[derive(Debug, Eq, PartialEq, Copy, Clone)]

View File

@@ -0,0 +1,169 @@
use std::{
io,
io::{Read, Write},
};
use object::{elf::SHT_LOUSER, Endian};
pub const SPLITMETA_SECTION: &str = ".splitmeta";
// Use the same section type as .mwcats.* so the linker ignores it
pub const SHT_SPLITMETA: u32 = SHT_LOUSER + 0x4A2A82C2;
/// This is used to store metadata about the source of an object file,
/// such as the original virtual addresses and the tool that wrote it.
#[derive(Debug, Default, Clone)]
pub struct SplitMeta {
/// The tool that generated the object. Informational only.
pub generator: Option<String>,
/// The name of the source module. (e.g. the DOL or REL name)
pub module_name: Option<String>,
/// The ID of the source module. (e.g. the DOL or REL ID)
pub module_id: Option<u32>,
/// Original virtual addresses of each symbol in the object.
/// Index 0 is the ELF null symbol.
pub virtual_addresses: Option<Vec<u64>>,
}
/**
* .splitmeta section format:
* - Magic: "SPMD"
* - Section: Magic: 4 bytes, Data size: 4 bytes, Data: variable
* Section size can be used to skip unknown sections
* - Repeat section until EOF
* Endianness matches the object file
*
* Sections:
* - Generator: Magic: "GENR", Data size: 4 bytes, Data: UTF-8 string (no null terminator)
* - Virtual addresses: Magic: "VIRT", Data size: 4 bytes, Data: array
* Data is u32 array for 32-bit objects, u64 array for 64-bit objects
* Count is size / 4 (32-bit) or size / 8 (64-bit)
*/
const SPLIT_META_MAGIC: [u8; 4] = *b"SPMD";
const GENERATOR_MAGIC: [u8; 4] = *b"GENR";
const MODULE_NAME_MAGIC: [u8; 4] = *b"MODN";
const MODULE_ID_MAGIC: [u8; 4] = *b"MODI";
const VIRTUAL_ADDRESS_MAGIC: [u8; 4] = *b"VIRT";
impl SplitMeta {
pub fn from_reader<E, R>(reader: &mut R, e: E, is_64: bool) -> io::Result<Self>
where
E: Endian,
R: Read + ?Sized,
{
let mut magic = [0; 4];
reader.read_exact(&mut magic)?;
if magic != SPLIT_META_MAGIC {
return Err(io::Error::new(io::ErrorKind::InvalidData, "Invalid split metadata magic"));
}
let mut result = SplitMeta::default();
loop {
let mut magic = [0; 4];
match reader.read_exact(&mut magic) {
Ok(()) => {}
Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => break,
Err(e) => return Err(e),
};
let mut size_bytes = [0; 4];
reader.read_exact(&mut size_bytes)?;
let size = e.read_u32_bytes(size_bytes);
let mut data = vec![0; size as usize];
reader.read_exact(&mut data)?;
match magic {
GENERATOR_MAGIC => {
let string = String::from_utf8(data)
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
result.generator = Some(string);
}
MODULE_NAME_MAGIC => {
let string = String::from_utf8(data)
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
result.module_name = Some(string);
}
MODULE_ID_MAGIC => {
let id = e.read_u32_bytes(data.as_slice().try_into().map_err(|_| {
io::Error::new(io::ErrorKind::InvalidData, "Invalid module ID size")
})?);
result.module_id = Some(id);
}
VIRTUAL_ADDRESS_MAGIC => {
let vec = if is_64 {
let mut vec = vec![0u64; data.len() / 8];
for i in 0..vec.len() {
vec[i] = e.read_u64_bytes(data[i * 8..(i + 1) * 8].try_into().unwrap());
}
vec
} else {
let mut vec = vec![0u64; data.len() / 4];
for i in 0..vec.len() {
vec[i] = e.read_u32_bytes(data[i * 4..(i + 1) * 4].try_into().unwrap())
as u64;
}
vec
};
result.virtual_addresses = Some(vec);
}
_ => {
// Ignore unknown sections
}
}
}
Ok(result)
}
pub fn to_writer<E, W>(&self, writer: &mut W, e: E, is_64: bool) -> io::Result<()>
where
E: Endian,
W: Write + ?Sized,
{
writer.write_all(&SPLIT_META_MAGIC)?;
if let Some(generator) = &self.generator {
writer.write_all(&GENERATOR_MAGIC)?;
writer.write_all(&e.write_u32_bytes(generator.len() as u32))?;
writer.write_all(generator.as_bytes())?;
}
if let Some(module_name) = &self.module_name {
writer.write_all(&MODULE_NAME_MAGIC)?;
writer.write_all(&e.write_u32_bytes(module_name.len() as u32))?;
writer.write_all(module_name.as_bytes())?;
}
if let Some(module_id) = self.module_id {
writer.write_all(&MODULE_ID_MAGIC)?;
writer.write_all(&e.write_u32_bytes(4))?;
writer.write_all(&e.write_u32_bytes(module_id))?;
}
if let Some(virtual_addresses) = &self.virtual_addresses {
writer.write_all(&VIRTUAL_ADDRESS_MAGIC)?;
let count = virtual_addresses.len() as u32;
if is_64 {
writer.write_all(&e.write_u32_bytes(count * 8))?;
for &addr in virtual_addresses {
writer.write_all(&e.write_u64_bytes(addr))?;
}
} else {
writer.write_all(&e.write_u32_bytes(count * 4))?;
for &addr in virtual_addresses {
writer.write_all(&e.write_u32_bytes(addr as u32))?;
}
}
}
Ok(())
}
pub fn write_size(&self, is_64: bool) -> usize {
let mut size = 4;
if let Some(generator) = self.generator.as_deref() {
size += 8 + generator.len();
}
if let Some(module_name) = self.module_name.as_deref() {
size += 8 + module_name.len();
}
if self.module_id.is_some() {
size += 12;
}
if let Some(virtual_addresses) = self.virtual_addresses.as_deref() {
size += 8 + if is_64 { 8 } else { 4 } * virtual_addresses.len();
}
size
}
}