Make objdiff-core no_std + huge WASM rework

2025-12-12 22:56:19 +00:00 · 2025-02-07 00:10:49 -07:00
parent d938988d43
commit e8de35b78e
49 changed files with 1463 additions and 1046 deletions
--- a/objdiff-core/src/obj/mod.rs
+++ b/objdiff-core/src/obj/mod.rs
@@ -1,9 +1,9 @@
 pub mod read;
 pub mod split_meta;

-use std::{borrow::Cow, collections::BTreeMap, fmt, path::PathBuf};
+use alloc::{borrow::Cow, boxed::Box, collections::BTreeMap, string::String, vec::Vec};
+use core::fmt;

-use filetime::FileTime;
 use flagset::{flags, FlagSet};
 use object::RelocationFlags;
 use split_meta::SplitMeta;
@@ -152,8 +152,9 @@ pub struct ObjSymbol {

 pub struct ObjInfo {
    pub arch: Box<dyn ObjArch>,
-    pub path: Option<PathBuf>,
-    pub timestamp: Option<FileTime>,
+    pub path: Option<String>,
+    #[cfg(feature = "std")]
+    pub timestamp: Option<filetime::FileTime>,
    pub sections: Vec<ObjSection>,
    /// Common BSS symbols
    pub common: Vec<ObjSymbol>,
--- a/objdiff-core/src/obj/read.rs
+++ b/objdiff-core/src/obj/read.rs
@@ -1,13 +1,12 @@
-use std::{
-    collections::{HashMap, HashSet},
-    fs,
-    io::Cursor,
-    mem::size_of,
-    path::Path,
+use alloc::{
+    collections::{BTreeMap, BTreeSet},
+    format,
+    string::{String, ToString},
+    vec,
+    vec::Vec,
 };

 use anyhow::{anyhow, bail, ensure, Context, Result};
-use filetime::FileTime;
 use flagset::Flags;
 use object::{
    endian::LittleEndian as LE,
@@ -160,7 +159,7 @@ fn symbols_by_section(
    section: &ObjSection,
    section_symbols: &[Symbol<'_, '_>],
    split_meta: Option<&SplitMeta>,
-    name_counts: &mut HashMap<String, u32>,
+    name_counts: &mut BTreeMap<String, u32>,
 ) -> Result<Vec<ObjSymbol>> {
    let mut result = Vec::<ObjSymbol>::new();
    for symbol in section_symbols {
@@ -377,33 +376,37 @@ fn line_info(obj_file: &File<'_>, sections: &mut [ObjSection], obj_data: &[u8])
    // DWARF 1.1
    if let Some(section) = obj_file.section_by_name(".line") {
        let data = section.uncompressed_data()?;
-        let mut reader = Cursor::new(data.as_ref());
+        let mut reader: &[u8] = data.as_ref();

        let mut text_sections = obj_file.sections().filter(|s| s.kind() == SectionKind::Text);
-        while reader.position() < data.len() as u64 {
+        while !reader.is_empty() {
            let text_section_index = text_sections
                .next()
                .ok_or_else(|| anyhow!("Next text section not found for line info"))?
                .index()
                .0;
-            let start = reader.position();
-            let size = read_u32(obj_file, &mut reader)?;
-            let base_address = read_u32(obj_file, &mut reader)? as u64;
+
+            let mut section_data = &reader[..];
+            let size = read_u32(obj_file, &mut section_data)? as usize;
+            if size > reader.len() {
+                bail!("Line info size {size} exceeds remaining size {}", reader.len());
+            }
+            (section_data, reader) = reader.split_at(size);
+
+            let base_address = read_u32(obj_file, &mut section_data)? as u64;
            let Some(out_section) =
                sections.iter_mut().find(|s| s.orig_index == text_section_index)
            else {
                // Skip line info for sections we filtered out
-                reader.set_position(start + size as u64);
                continue;
            };
-            let end = start + size as u64;
-            while reader.position() < end {
-                let line_number = read_u32(obj_file, &mut reader)?;
-                let statement_pos = read_u16(obj_file, &mut reader)?;
+            while !section_data.is_empty() {
+                let line_number = read_u32(obj_file, &mut section_data)?;
+                let statement_pos = read_u16(obj_file, &mut section_data)?;
                if statement_pos != 0xFFFF {
                    log::warn!("Unhandled statement pos {}", statement_pos);
                }
-                let address_delta = read_u32(obj_file, &mut reader)? as u64;
+                let address_delta = read_u32(obj_file, &mut section_data)? as u64;
                out_section.line_info.insert(base_address + address_delta, line_number);
                log::debug!("Line: {:#x} -> {}", base_address + address_delta, line_number);
            }
@@ -413,22 +416,24 @@ fn line_info(obj_file: &File<'_>, sections: &mut [ObjSection], obj_data: &[u8])
    // DWARF 2+
    #[cfg(feature = "dwarf")]
    {
+        fn gimli_error(e: gimli::Error) -> anyhow::Error { anyhow::anyhow!("DWARF error: {e:?}") }
        let dwarf_cow = gimli::DwarfSections::load(|id| {
            Ok::<_, gimli::Error>(
                obj_file
                    .section_by_name(id.name())
                    .and_then(|section| section.uncompressed_data().ok())
-                    .unwrap_or(std::borrow::Cow::Borrowed(&[][..])),
+                    .unwrap_or(alloc::borrow::Cow::Borrowed(&[][..])),
            )
-        })?;
+        })
+        .map_err(gimli_error)?;
        let endian = match obj_file.endianness() {
            object::Endianness::Little => gimli::RunTimeEndian::Little,
            object::Endianness::Big => gimli::RunTimeEndian::Big,
        };
        let dwarf = dwarf_cow.borrow(|section| gimli::EndianSlice::new(section, endian));
        let mut iter = dwarf.units();
-        if let Some(header) = iter.next()? {
-            let unit = dwarf.unit(header)?;
+        if let Some(header) = iter.next().map_err(gimli_error)? {
+            let unit = dwarf.unit(header).map_err(gimli_error)?;
            if let Some(program) = unit.line_program.clone() {
                let mut text_sections =
                    obj_file.sections().filter(|s| s.kind() == SectionKind::Text);
@@ -438,7 +443,7 @@ fn line_info(obj_file: &File<'_>, sections: &mut [ObjSection], obj_data: &[u8])
                    .map(|s| &mut s.line_info);

                let mut rows = program.rows();
-                while let Some((_header, row)) = rows.next_row()? {
+                while let Some((_header, row)) = rows.next_row().map_err(gimli_error)? {
                    if let (Some(line), Some(lines)) = (row.line(), &mut lines) {
                        lines.insert(row.address(), line.get() as u32);
                    }
@@ -453,7 +458,7 @@ fn line_info(obj_file: &File<'_>, sections: &mut [ObjSection], obj_data: &[u8])
                }
            }
        }
-        if iter.next()?.is_some() {
+        if iter.next().map_err(gimli_error)?.is_some() {
            log::warn!("Multiple units found in DWARF data, only processing the first");
        }
    }
@@ -638,7 +643,7 @@ fn combine_sections(section: ObjSection, combine: ObjSection) -> Result<ObjSecti
 }

 fn combine_data_sections(sections: &mut Vec<ObjSection>) -> Result<()> {
-    let names_to_combine: HashSet<_> = sections
+    let names_to_combine: BTreeSet<_> = sections
        .iter()
        .filter(|s| s.kind == ObjSectionKind::Data)
        .map(|s| s.name.clone())
@@ -677,14 +682,15 @@ fn combine_data_sections(sections: &mut Vec<ObjSection>) -> Result<()> {
    Ok(())
 }

-pub fn read(obj_path: &Path, config: &DiffObjConfig) -> Result<ObjInfo> {
+#[cfg(feature = "std")]
+pub fn read(obj_path: &std::path::Path, config: &DiffObjConfig) -> Result<ObjInfo> {
    let (data, timestamp) = {
-        let file = fs::File::open(obj_path)?;
-        let timestamp = FileTime::from_last_modification_time(&file.metadata()?);
+        let file = std::fs::File::open(obj_path)?;
+        let timestamp = filetime::FileTime::from_last_modification_time(&file.metadata()?);
        (unsafe { memmap2::Mmap::map(&file) }?, timestamp)
    };
    let mut obj = parse(&data, config)?;
-    obj.path = Some(obj_path.to_owned());
+    obj.path = Some(obj_path.to_string_lossy().into_owned());
    obj.timestamp = Some(timestamp);
    Ok(obj)
 }
@@ -710,7 +716,7 @@ pub fn parse(data: &[u8], config: &DiffObjConfig) -> Result<ObjInfo> {
    }

    let mut sections = filter_sections(&obj_file, split_meta.as_ref())?;
-    let mut section_name_counts: HashMap<String, u32> = HashMap::new();
+    let mut section_name_counts: BTreeMap<String, u32> = BTreeMap::new();
    for section in &mut sections {
        section.symbols = symbols_by_section(
            arch.as_ref(),
@@ -733,12 +739,21 @@ pub fn parse(data: &[u8], config: &DiffObjConfig) -> Result<ObjInfo> {
    }
    line_info(&obj_file, &mut sections, data)?;
    let common = common_symbols(arch.as_ref(), &obj_file, split_meta.as_ref())?;
-    Ok(ObjInfo { arch, path: None, timestamp: None, sections, common, split_meta })
+    Ok(ObjInfo {
+        arch,
+        path: None,
+        #[cfg(feature = "std")]
+        timestamp: None,
+        sections,
+        common,
+        split_meta,
+    })
 }

-pub fn has_function(obj_path: &Path, symbol_name: &str) -> Result<bool> {
+#[cfg(feature = "std")]
+pub fn has_function(obj_path: &std::path::Path, symbol_name: &str) -> Result<bool> {
    let data = {
-        let file = fs::File::open(obj_path)?;
+        let file = std::fs::File::open(obj_path)?;
        unsafe { memmap2::Mmap::map(&file) }?
    };
    Ok(File::parse(&*data)?
--- a/objdiff-core/src/obj/split_meta.rs
+++ b/objdiff-core/src/obj/split_meta.rs
@@ -1,5 +1,6 @@
-use std::{io, io::Write};
+use alloc::{string::String, vec, vec::Vec};

+use anyhow::{anyhow, Result};
 use object::{elf::SHT_NOTE, Endian, ObjectSection};

 pub const SPLITMETA_SECTION: &str = ".note.split";
@@ -27,10 +28,10 @@ const NT_SPLIT_MODULE_ID: u32 = u32::from_be_bytes(*b"MODI");
 const NT_SPLIT_VIRTUAL_ADDRESSES: u32 = u32::from_be_bytes(*b"VIRT");

 impl SplitMeta {
-    pub fn from_section<E>(section: object::Section, e: E, is_64: bool) -> io::Result<Self>
+    pub fn from_section<E>(section: object::Section, e: E, is_64: bool) -> Result<Self>
    where E: Endian {
        let mut result = SplitMeta::default();
-        let data = section.uncompressed_data().map_err(object_io_error)?;
+        let data = section.uncompressed_data().map_err(object_error)?;
        let mut iter = NoteIterator::new(data.as_ref(), section.align(), e, is_64)?;
        while let Some(note) = iter.next(e)? {
            if note.name != ELF_NOTE_SPLIT {
@@ -39,19 +40,18 @@ impl SplitMeta {
            match note.n_type {
                NT_SPLIT_GENERATOR => {
                    let string = String::from_utf8(note.desc.to_vec())
-                        .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
+                        .map_err(|e| anyhow::Error::from(e))?;
                    result.generator = Some(string);
                }
                NT_SPLIT_MODULE_NAME => {
                    let string = String::from_utf8(note.desc.to_vec())
-                        .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
+                        .map_err(|e| anyhow::Error::from(e))?;
                    result.module_name = Some(string);
                }
                NT_SPLIT_MODULE_ID => {
-                    result.module_id =
-                        Some(e.read_u32_bytes(note.desc.try_into().map_err(|_| {
-                            io::Error::new(io::ErrorKind::InvalidData, "Invalid module ID size")
-                        })?));
+                    result.module_id = Some(e.read_u32_bytes(
+                        note.desc.try_into().map_err(|_| anyhow!("Invalid module ID size"))?,
+                    ));
                }
                NT_SPLIT_VIRTUAL_ADDRESSES => {
                    let vec = if is_64 {
@@ -79,10 +79,11 @@ impl SplitMeta {
        Ok(result)
    }

-    pub fn to_writer<E, W>(&self, writer: &mut W, e: E, is_64: bool) -> io::Result<()>
+    #[cfg(feature = "std")]
+    pub fn to_writer<E, W>(&self, writer: &mut W, e: E, is_64: bool) -> std::io::Result<()>
    where
        E: Endian,
-        W: Write + ?Sized,
+        W: std::io::Write + ?Sized,
    {
        if let Some(generator) = &self.generator {
            write_note_header(writer, e, NT_SPLIT_GENERATOR, generator.len())?;
@@ -137,10 +138,9 @@ impl SplitMeta {
    }
 }

-/// Convert an object::read::Error to an io::Error.
-fn object_io_error(err: object::read::Error) -> io::Error {
-    io::Error::new(io::ErrorKind::InvalidData, err)
-}
+/// Convert an object::read::Error to a String.
+#[inline]
+fn object_error(err: object::read::Error) -> anyhow::Error { anyhow::Error::new(err) }

 /// An ELF note entry.
 struct Note<'data> {
@@ -161,27 +161,27 @@ where E: Endian
 impl<'data, E> NoteIterator<'data, E>
 where E: Endian
 {
-    fn new(data: &'data [u8], align: u64, e: E, is_64: bool) -> io::Result<Self> {
+    fn new(data: &'data [u8], align: u64, e: E, is_64: bool) -> Result<Self> {
        Ok(if is_64 {
            NoteIterator::B64(
-                object::read::elf::NoteIterator::new(e, align, data).map_err(object_io_error)?,
+                object::read::elf::NoteIterator::new(e, align, data).map_err(object_error)?,
            )
        } else {
            NoteIterator::B32(
                object::read::elf::NoteIterator::new(e, align as u32, data)
-                    .map_err(object_io_error)?,
+                    .map_err(object_error)?,
            )
        })
    }

-    fn next(&mut self, e: E) -> io::Result<Option<Note<'data>>> {
+    fn next(&mut self, e: E) -> Result<Option<Note<'data>>> {
        match self {
-            NoteIterator::B32(iter) => Ok(iter.next().map_err(object_io_error)?.map(|note| Note {
+            NoteIterator::B32(iter) => Ok(iter.next().map_err(object_error)?.map(|note| Note {
                n_type: note.n_type(e),
                name: note.name(),
                desc: note.desc(),
            })),
-            NoteIterator::B64(iter) => Ok(iter.next().map_err(object_io_error)?.map(|note| Note {
+            NoteIterator::B64(iter) => Ok(iter.next().map_err(object_error)?.map(|note| Note {
                n_type: note.n_type(e),
                name: note.name(),
                desc: note.desc(),
@@ -192,7 +192,8 @@ where E: Endian

 fn align_size_to_4(size: usize) -> usize { (size + 3) & !3 }

-fn align_data_to_4<W: Write + ?Sized>(writer: &mut W, len: usize) -> io::Result<()> {
+#[cfg(feature = "std")]
+fn align_data_to_4<W: std::io::Write + ?Sized>(writer: &mut W, len: usize) -> std::io::Result<()> {
    const ALIGN_BYTES: &[u8] = &[0; 4];
    if len % 4 != 0 {
        writer.write_all(&ALIGN_BYTES[..4 - len % 4])?;
@@ -208,10 +209,11 @@ fn align_data_to_4<W: Write + ?Sized>(writer: &mut W, len: usize) -> io::Result<
 // Desc | variable size, padded to a 4 byte boundary
 const NOTE_HEADER_SIZE: usize = 12 + ((ELF_NOTE_SPLIT.len() + 4) & !3);

-fn write_note_header<E, W>(writer: &mut W, e: E, kind: u32, desc_len: usize) -> io::Result<()>
+#[cfg(feature = "std")]
+fn write_note_header<E, W>(writer: &mut W, e: E, kind: u32, desc_len: usize) -> std::io::Result<()>
 where
    E: Endian,
-    W: Write + ?Sized,
+    W: std::io::Write + ?Sized,
 {
    writer.write_all(&e.write_u32_bytes(ELF_NOTE_SPLIT.len() as u32 + 1))?; // Name Size
    writer.write_all(&e.write_u32_bytes(desc_len as u32))?; // Desc Size