objdiff-cli diff & report changes, support .splitmeta object section

- Add `objdiff-cli report changes` for diffing two reports - Unify some click-to-highlight logic between CLI and GUI - Load .splitmeta section for extra object metadata (original virtual addr, etc) - More work on objdiff-cli diff
2025-12-11 06:27:55 +00:00 · 2024-02-28 21:44:53 -07:00
parent 28348606bf
commit 39a13f4d36
11 changed files with 1018 additions and 406 deletions
--- a/objdiff-core/src/diff/display.rs
+++ b/objdiff-core/src/diff/display.rs
@@ -28,6 +28,16 @@ pub enum DiffText<'a> {
    Eol,
 }

+#[derive(Default, Clone, PartialEq, Eq)]
+pub enum HighlightKind {
+    #[default]
+    None,
+    Opcode(u8),
+    Arg(ObjInsArgValue),
+    Symbol(String),
+    Address(u32),
+}
+
 pub fn display_diff<E>(
    ins_diff: &ObjInsDiff,
    base_addr: u32,
@@ -177,3 +187,31 @@ fn display_reloc<E>(
    }
    Ok(())
 }
+
+impl PartialEq<DiffText<'_>> for HighlightKind {
+    fn eq(&self, other: &DiffText) -> bool {
+        match (self, other) {
+            (HighlightKind::Opcode(a), DiffText::Opcode(_, b)) => a == b,
+            (HighlightKind::Arg(a), DiffText::Argument(b, _)) => a.loose_eq(b),
+            (HighlightKind::Symbol(a), DiffText::Symbol(b)) => a == &b.name,
+            (HighlightKind::Address(a), DiffText::Address(b) | DiffText::BranchTarget(b)) => a == b,
+            _ => false,
+        }
+    }
+}
+
+impl PartialEq<HighlightKind> for DiffText<'_> {
+    fn eq(&self, other: &HighlightKind) -> bool { other.eq(self) }
+}
+
+impl From<DiffText<'_>> for HighlightKind {
+    fn from(value: DiffText<'_>) -> Self {
+        match value {
+            DiffText::Opcode(_, op) => HighlightKind::Opcode(op),
+            DiffText::Argument(arg, _) => HighlightKind::Arg(arg.clone()),
+            DiffText::Symbol(sym) => HighlightKind::Symbol(sym.name.to_string()),
+            DiffText::Address(addr) | DiffText::BranchTarget(addr) => HighlightKind::Address(addr),
+            _ => HighlightKind::None,
+        }
+    }
+}
--- a/objdiff-core/src/obj/elf.rs
+++ b/objdiff-core/src/obj/elf.rs
@@ -1,15 +1,16 @@
-use std::{borrow::Cow, collections::BTreeMap, fs, io::Cursor, path::Path};
+use std::{collections::BTreeMap, fs, io::Cursor, path::Path};

 use anyhow::{anyhow, bail, ensure, Context, Result};
 use byteorder::{BigEndian, ReadBytesExt};
 use filetime::FileTime;
 use flagset::Flags;
 use object::{
-    elf, Architecture, Endianness, File, Object, ObjectSection, ObjectSymbol, RelocationKind,
-    RelocationTarget, SectionIndex, SectionKind, Symbol, SymbolKind, SymbolScope, SymbolSection,
+    elf, Architecture, File, Object, ObjectSection, ObjectSymbol, RelocationKind, RelocationTarget,
+    SectionIndex, SectionKind, Symbol, SymbolKind, SymbolScope, SymbolSection,
 };

 use crate::obj::{
+    split_meta::{SplitMeta, SPLITMETA_SECTION},
    ObjArchitecture, ObjInfo, ObjReloc, ObjRelocKind, ObjSection, ObjSectionKind, ObjSymbol,
    ObjSymbolFlagSet, ObjSymbolFlags,
 };
@@ -23,7 +24,12 @@ fn to_obj_section_kind(kind: SectionKind) -> Option<ObjSectionKind> {
    }
 }

-fn to_obj_symbol(obj_file: &File<'_>, symbol: &Symbol<'_, '_>, addend: i64) -> Result<ObjSymbol> {
+fn to_obj_symbol(
+    obj_file: &File<'_>,
+    symbol: &Symbol<'_, '_>,
+    addend: i64,
+    split_meta: Option<&SplitMeta>,
+) -> Result<ObjSymbol> {
    let mut name = symbol.name().context("Failed to process symbol name")?;
    if name.is_empty() {
        log::warn!("Found empty sym: {symbol:?}");
@@ -57,6 +63,10 @@ fn to_obj_symbol(obj_file: &File<'_>, symbol: &Symbol<'_, '_>, addend: i64) -> R
    if obj_file.architecture() == Architecture::PowerPc {
        demangled_name = cwdemangle::demangle(name, &Default::default());
    }
+    // Find the virtual address for the symbol if available
+    let virtual_address = split_meta
+        .and_then(|m| m.virtual_addresses.as_ref())
+        .and_then(|v| v.get(symbol.index().0).cloned());
    Ok(ObjSymbol {
        name: name.to_string(),
        demangled_name,
@@ -66,13 +76,14 @@ fn to_obj_symbol(obj_file: &File<'_>, symbol: &Symbol<'_, '_>, addend: i64) -> R
        size_known: symbol.size() != 0,
        flags,
        addend,
+        virtual_address,
        diff_symbol: None,
        instructions: vec![],
        match_percent: None,
    })
 }

-fn filter_sections(obj_file: &File<'_>) -> Result<Vec<ObjSection>> {
+fn filter_sections(obj_file: &File<'_>, split_meta: Option<&SplitMeta>) -> Result<Vec<ObjSection>> {
    let mut result = Vec::<ObjSection>::new();
    for section in obj_file.sections() {
        if section.size() == 0 {
@@ -83,6 +94,17 @@ fn filter_sections(obj_file: &File<'_>) -> Result<Vec<ObjSection>> {
        };
        let name = section.name().context("Failed to process section name")?;
        let data = section.uncompressed_data().context("Failed to read section data")?;
+
+        // Find the virtual address for the section symbol if available
+        let section_symbol = obj_file.symbols().find(|s| {
+            s.kind() == SymbolKind::Section && s.section_index() == Some(section.index())
+        });
+        let virtual_address = section_symbol.and_then(|s| {
+            split_meta
+                .and_then(|m| m.virtual_addresses.as_ref())
+                .and_then(|v| v.get(s.index().0).cloned())
+        });
+
        result.push(ObjSection {
            name: name.to_string(),
            kind,
@@ -92,6 +114,7 @@ fn filter_sections(obj_file: &File<'_>) -> Result<Vec<ObjSection>> {
            index: section.index().0,
            symbols: Vec::new(),
            relocations: Vec::new(),
+            virtual_address,
            data_diff: vec![],
            match_percent: 0.0,
        });
@@ -100,7 +123,11 @@ fn filter_sections(obj_file: &File<'_>) -> Result<Vec<ObjSection>> {
    Ok(result)
 }

-fn symbols_by_section(obj_file: &File<'_>, section: &ObjSection) -> Result<Vec<ObjSymbol>> {
+fn symbols_by_section(
+    obj_file: &File<'_>,
+    section: &ObjSection,
+    split_meta: Option<&SplitMeta>,
+) -> Result<Vec<ObjSymbol>> {
    let mut result = Vec::<ObjSymbol>::new();
    for symbol in obj_file.symbols() {
        if symbol.kind() == SymbolKind::Section {
@@ -115,7 +142,7 @@ fn symbols_by_section(obj_file: &File<'_>, section: &ObjSection) -> Result<Vec<O
                        continue;
                    }
                }
-                result.push(to_obj_symbol(obj_file, &symbol, 0)?);
+                result.push(to_obj_symbol(obj_file, &symbol, 0, split_meta)?);
            }
        }
    }
@@ -133,11 +160,11 @@ fn symbols_by_section(obj_file: &File<'_>, section: &ObjSection) -> Result<Vec<O
    Ok(result)
 }

-fn common_symbols(obj_file: &File<'_>) -> Result<Vec<ObjSymbol>> {
+fn common_symbols(obj_file: &File<'_>, split_meta: Option<&SplitMeta>) -> Result<Vec<ObjSymbol>> {
    obj_file
        .symbols()
        .filter(Symbol::is_common)
-        .map(|symbol| to_obj_symbol(obj_file, &symbol, 0))
+        .map(|symbol| to_obj_symbol(obj_file, &symbol, 0, split_meta))
        .collect::<Result<Vec<ObjSymbol>>>()
 }

@@ -145,6 +172,7 @@ fn find_section_symbol(
    obj_file: &File<'_>,
    target: &Symbol<'_, '_>,
    address: u64,
+    split_meta: Option<&SplitMeta>,
 ) -> Result<ObjSymbol> {
    let section_index =
        target.section_index().ok_or_else(|| anyhow::Error::msg("Unknown section index"))?;
@@ -164,7 +192,7 @@ fn find_section_symbol(
            }
            continue;
        }
-        return to_obj_symbol(obj_file, &symbol, 0);
+        return to_obj_symbol(obj_file, &symbol, 0, split_meta);
    }
    let (name, offset) = closest_symbol
        .and_then(|s| s.name().map(|n| (n, s.address())).ok())
@@ -180,6 +208,7 @@ fn find_section_symbol(
        size_known: false,
        flags: Default::default(),
        addend: offset_addr as i64,
+        virtual_address: None,
        diff_symbol: None,
        instructions: vec![],
        match_percent: None,
@@ -190,6 +219,7 @@ fn relocations_by_section(
    arch: ObjArchitecture,
    obj_file: &File<'_>,
    section: &ObjSection,
+    split_meta: Option<&SplitMeta>,
 ) -> Result<Vec<ObjReloc>> {
    let obj_section = obj_file.section_by_index(SectionIndex(section.index))?;
    let mut relocations = Vec::<ObjReloc>::new();
@@ -259,11 +289,11 @@ fn relocations_by_section(
        // println!("Reloc: {reloc:?}, symbol: {symbol:?}, addend: {addend:#X}");
        let target = match symbol.kind() {
            SymbolKind::Text | SymbolKind::Data | SymbolKind::Label | SymbolKind::Unknown => {
-                to_obj_symbol(obj_file, &symbol, addend)
+                to_obj_symbol(obj_file, &symbol, addend, split_meta)
            }
            SymbolKind::Section => {
                ensure!(addend >= 0, "Negative addend in reloc: {addend}");
-                find_section_symbol(obj_file, &symbol, addend as u64)
+                find_section_symbol(obj_file, &symbol, addend as u64, split_meta)
            }
            kind => Err(anyhow!("Unhandled relocation symbol type {kind:?}")),
        }?;
@@ -298,6 +328,7 @@ fn line_info(obj_file: &File<'_>) -> Result<Option<BTreeMap<u64, u64>>> {
    // DWARF 2+
    #[cfg(feature = "dwarf")]
    {
+        use std::borrow::Cow;
        let dwarf_cow = gimli::Dwarf::load(|id| {
            Ok::<_, gimli::Error>(
                obj_file
@@ -307,8 +338,8 @@ fn line_info(obj_file: &File<'_>) -> Result<Option<BTreeMap<u64, u64>>> {
            )
        })?;
        let endian = match obj_file.endianness() {
-            Endianness::Little => gimli::RunTimeEndian::Little,
-            Endianness::Big => gimli::RunTimeEndian::Big,
+            object::Endianness::Little => gimli::RunTimeEndian::Little,
+            object::Endianness::Big => gimli::RunTimeEndian::Big,
        };
        let dwarf = dwarf_cow.borrow(|section| gimli::EndianSlice::new(section, endian));
        let mut iter = dwarf.units();
@@ -344,17 +375,35 @@ pub fn read(obj_path: &Path) -> Result<ObjInfo> {
        Architecture::Mips => ObjArchitecture::Mips,
        _ => bail!("Unsupported architecture: {:?}", obj_file.architecture()),
    };
+    let split_meta = split_meta(&obj_file)?;
    let mut result = ObjInfo {
        architecture,
        path: obj_path.to_owned(),
        timestamp,
-        sections: filter_sections(&obj_file)?,
-        common: common_symbols(&obj_file)?,
+        sections: filter_sections(&obj_file, split_meta.as_ref())?,
+        common: common_symbols(&obj_file, split_meta.as_ref())?,
        line_info: line_info(&obj_file)?,
+        split_meta: None,
    };
    for section in &mut result.sections {
-        section.symbols = symbols_by_section(&obj_file, section)?;
-        section.relocations = relocations_by_section(architecture, &obj_file, section)?;
+        section.symbols = symbols_by_section(&obj_file, section, split_meta.as_ref())?;
+        section.relocations =
+            relocations_by_section(architecture, &obj_file, section, split_meta.as_ref())?;
    }
+    result.split_meta = split_meta;
    Ok(result)
 }
+
+fn split_meta(obj_file: &File<'_>) -> Result<Option<SplitMeta>> {
+    Ok(if let Some(section) = obj_file.section_by_name(SPLITMETA_SECTION) {
+        if section.size() != 0 {
+            let data = section.uncompressed_data()?;
+            let mut reader = data.as_ref();
+            Some(SplitMeta::from_reader(&mut reader, obj_file.endianness(), obj_file.is_64())?)
+        } else {
+            None
+        }
+    } else {
+        None
+    })
+}
--- a/objdiff-core/src/obj/mod.rs
+++ b/objdiff-core/src/obj/mod.rs
@@ -3,11 +3,13 @@ pub mod elf;
 pub mod mips;
 #[cfg(feature = "ppc")]
 pub mod ppc;
+pub mod split_meta;

 use std::{collections::BTreeMap, fmt, path::PathBuf};

 use filetime::FileTime;
 use flagset::{flags, FlagSet};
+use split_meta::SplitMeta;

 use crate::util::ReallySigned;

@@ -39,6 +41,7 @@ pub struct ObjSection {
    pub index: usize,
    pub symbols: Vec<ObjSymbol>,
    pub relocations: Vec<ObjReloc>,
+    pub virtual_address: Option<u64>,

    // Diff
    pub data_diff: Vec<ObjDataDiff>,
@@ -139,7 +142,7 @@ pub struct ObjIns {
    pub args: Vec<ObjInsArg>,
    pub reloc: Option<ObjReloc>,
    pub branch_dest: Option<u32>,
-    /// Line info
+    /// Line number
    pub line: Option<u64>,
    /// Original (unsimplified) instruction
    pub orig: Option<String>,
@@ -185,6 +188,8 @@ pub struct ObjSymbol {
    pub size_known: bool,
    pub flags: ObjSymbolFlagSet,
    pub addend: i64,
+    /// Original virtual address (from .splitmeta section)
+    pub virtual_address: Option<u64>,

    // Diff
    pub diff_symbol: Option<String>,
@@ -206,8 +211,12 @@ pub struct ObjInfo {
    pub path: PathBuf,
    pub timestamp: FileTime,
    pub sections: Vec<ObjSection>,
+    /// Common BSS symbols
    pub common: Vec<ObjSymbol>,
+    /// Line number info (.line or .debug_line section)
    pub line_info: Option<BTreeMap<u64, u64>>,
+    /// Split object metadata (.splitmeta section)
+    pub split_meta: Option<SplitMeta>,
 }

 #[derive(Debug, Eq, PartialEq, Copy, Clone)]
--- a/objdiff-core/src/obj/split_meta.rs
+++ b/objdiff-core/src/obj/split_meta.rs
@@ -0,0 +1,169 @@
+use std::{
+    io,
+    io::{Read, Write},
+};
+
+use object::{elf::SHT_LOUSER, Endian};
+
+pub const SPLITMETA_SECTION: &str = ".splitmeta";
+// Use the same section type as .mwcats.* so the linker ignores it
+pub const SHT_SPLITMETA: u32 = SHT_LOUSER + 0x4A2A82C2;
+
+/// This is used to store metadata about the source of an object file,
+/// such as the original virtual addresses and the tool that wrote it.
+#[derive(Debug, Default, Clone)]
+pub struct SplitMeta {
+    /// The tool that generated the object. Informational only.
+    pub generator: Option<String>,
+    /// The name of the source module. (e.g. the DOL or REL name)
+    pub module_name: Option<String>,
+    /// The ID of the source module. (e.g. the DOL or REL ID)
+    pub module_id: Option<u32>,
+    /// Original virtual addresses of each symbol in the object.
+    /// Index 0 is the ELF null symbol.
+    pub virtual_addresses: Option<Vec<u64>>,
+}
+
+/**
+ * .splitmeta section format:
+ * - Magic: "SPMD"
+ * - Section: Magic: 4 bytes, Data size: 4 bytes, Data: variable
+ *     Section size can be used to skip unknown sections
+ * - Repeat section until EOF
+ * Endianness matches the object file
+ *
+ * Sections:
+ * - Generator: Magic: "GENR", Data size: 4 bytes, Data: UTF-8 string (no null terminator)
+ * - Virtual addresses: Magic: "VIRT", Data size: 4 bytes, Data: array
+ *     Data is u32 array for 32-bit objects, u64 array for 64-bit objects
+ *     Count is size / 4 (32-bit) or size / 8 (64-bit)
+ */
+
+const SPLIT_META_MAGIC: [u8; 4] = *b"SPMD";
+const GENERATOR_MAGIC: [u8; 4] = *b"GENR";
+const MODULE_NAME_MAGIC: [u8; 4] = *b"MODN";
+const MODULE_ID_MAGIC: [u8; 4] = *b"MODI";
+const VIRTUAL_ADDRESS_MAGIC: [u8; 4] = *b"VIRT";
+
+impl SplitMeta {
+    pub fn from_reader<E, R>(reader: &mut R, e: E, is_64: bool) -> io::Result<Self>
+    where
+        E: Endian,
+        R: Read + ?Sized,
+    {
+        let mut magic = [0; 4];
+        reader.read_exact(&mut magic)?;
+        if magic != SPLIT_META_MAGIC {
+            return Err(io::Error::new(io::ErrorKind::InvalidData, "Invalid split metadata magic"));
+        }
+        let mut result = SplitMeta::default();
+        loop {
+            let mut magic = [0; 4];
+            match reader.read_exact(&mut magic) {
+                Ok(()) => {}
+                Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => break,
+                Err(e) => return Err(e),
+            };
+            let mut size_bytes = [0; 4];
+            reader.read_exact(&mut size_bytes)?;
+            let size = e.read_u32_bytes(size_bytes);
+            let mut data = vec![0; size as usize];
+            reader.read_exact(&mut data)?;
+            match magic {
+                GENERATOR_MAGIC => {
+                    let string = String::from_utf8(data)
+                        .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
+                    result.generator = Some(string);
+                }
+                MODULE_NAME_MAGIC => {
+                    let string = String::from_utf8(data)
+                        .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
+                    result.module_name = Some(string);
+                }
+                MODULE_ID_MAGIC => {
+                    let id = e.read_u32_bytes(data.as_slice().try_into().map_err(|_| {
+                        io::Error::new(io::ErrorKind::InvalidData, "Invalid module ID size")
+                    })?);
+                    result.module_id = Some(id);
+                }
+                VIRTUAL_ADDRESS_MAGIC => {
+                    let vec = if is_64 {
+                        let mut vec = vec![0u64; data.len() / 8];
+                        for i in 0..vec.len() {
+                            vec[i] = e.read_u64_bytes(data[i * 8..(i + 1) * 8].try_into().unwrap());
+                        }
+                        vec
+                    } else {
+                        let mut vec = vec![0u64; data.len() / 4];
+                        for i in 0..vec.len() {
+                            vec[i] = e.read_u32_bytes(data[i * 4..(i + 1) * 4].try_into().unwrap())
+                                as u64;
+                        }
+                        vec
+                    };
+                    result.virtual_addresses = Some(vec);
+                }
+                _ => {
+                    // Ignore unknown sections
+                }
+            }
+        }
+        Ok(result)
+    }
+
+    pub fn to_writer<E, W>(&self, writer: &mut W, e: E, is_64: bool) -> io::Result<()>
+    where
+        E: Endian,
+        W: Write + ?Sized,
+    {
+        writer.write_all(&SPLIT_META_MAGIC)?;
+        if let Some(generator) = &self.generator {
+            writer.write_all(&GENERATOR_MAGIC)?;
+            writer.write_all(&e.write_u32_bytes(generator.len() as u32))?;
+            writer.write_all(generator.as_bytes())?;
+        }
+        if let Some(module_name) = &self.module_name {
+            writer.write_all(&MODULE_NAME_MAGIC)?;
+            writer.write_all(&e.write_u32_bytes(module_name.len() as u32))?;
+            writer.write_all(module_name.as_bytes())?;
+        }
+        if let Some(module_id) = self.module_id {
+            writer.write_all(&MODULE_ID_MAGIC)?;
+            writer.write_all(&e.write_u32_bytes(4))?;
+            writer.write_all(&e.write_u32_bytes(module_id))?;
+        }
+        if let Some(virtual_addresses) = &self.virtual_addresses {
+            writer.write_all(&VIRTUAL_ADDRESS_MAGIC)?;
+            let count = virtual_addresses.len() as u32;
+            if is_64 {
+                writer.write_all(&e.write_u32_bytes(count * 8))?;
+                for &addr in virtual_addresses {
+                    writer.write_all(&e.write_u64_bytes(addr))?;
+                }
+            } else {
+                writer.write_all(&e.write_u32_bytes(count * 4))?;
+                for &addr in virtual_addresses {
+                    writer.write_all(&e.write_u32_bytes(addr as u32))?;
+                }
+            }
+        }
+        Ok(())
+    }
+
+    pub fn write_size(&self, is_64: bool) -> usize {
+        let mut size = 4;
+        if let Some(generator) = self.generator.as_deref() {
+            size += 8 + generator.len();
+        }
+        if let Some(module_name) = self.module_name.as_deref() {
+            size += 8 + module_name.len();
+        }
+        if self.module_id.is_some() {
+            size += 12;
+        }
+        if let Some(virtual_addresses) = self.virtual_addresses.as_deref() {
+            size += 8 + if is_64 { 8 } else { 4 } * virtual_addresses.len();
+        }
+        size
+    }
+}