Make objdiff-core no_std + huge WASM rework

This commit is contained in:
2025-02-07 00:10:49 -07:00
parent d938988d43
commit e8de35b78e
49 changed files with 1463 additions and 1046 deletions

View File

@@ -1,9 +1,9 @@
pub mod read;
pub mod split_meta;
use std::{borrow::Cow, collections::BTreeMap, fmt, path::PathBuf};
use alloc::{borrow::Cow, boxed::Box, collections::BTreeMap, string::String, vec::Vec};
use core::fmt;
use filetime::FileTime;
use flagset::{flags, FlagSet};
use object::RelocationFlags;
use split_meta::SplitMeta;
@@ -152,8 +152,9 @@ pub struct ObjSymbol {
pub struct ObjInfo {
pub arch: Box<dyn ObjArch>,
pub path: Option<PathBuf>,
pub timestamp: Option<FileTime>,
pub path: Option<String>,
#[cfg(feature = "std")]
pub timestamp: Option<filetime::FileTime>,
pub sections: Vec<ObjSection>,
/// Common BSS symbols
pub common: Vec<ObjSymbol>,

View File

@@ -1,13 +1,12 @@
use std::{
collections::{HashMap, HashSet},
fs,
io::Cursor,
mem::size_of,
path::Path,
use alloc::{
collections::{BTreeMap, BTreeSet},
format,
string::{String, ToString},
vec,
vec::Vec,
};
use anyhow::{anyhow, bail, ensure, Context, Result};
use filetime::FileTime;
use flagset::Flags;
use object::{
endian::LittleEndian as LE,
@@ -160,7 +159,7 @@ fn symbols_by_section(
section: &ObjSection,
section_symbols: &[Symbol<'_, '_>],
split_meta: Option<&SplitMeta>,
name_counts: &mut HashMap<String, u32>,
name_counts: &mut BTreeMap<String, u32>,
) -> Result<Vec<ObjSymbol>> {
let mut result = Vec::<ObjSymbol>::new();
for symbol in section_symbols {
@@ -377,33 +376,37 @@ fn line_info(obj_file: &File<'_>, sections: &mut [ObjSection], obj_data: &[u8])
// DWARF 1.1
if let Some(section) = obj_file.section_by_name(".line") {
let data = section.uncompressed_data()?;
let mut reader = Cursor::new(data.as_ref());
let mut reader: &[u8] = data.as_ref();
let mut text_sections = obj_file.sections().filter(|s| s.kind() == SectionKind::Text);
while reader.position() < data.len() as u64 {
while !reader.is_empty() {
let text_section_index = text_sections
.next()
.ok_or_else(|| anyhow!("Next text section not found for line info"))?
.index()
.0;
let start = reader.position();
let size = read_u32(obj_file, &mut reader)?;
let base_address = read_u32(obj_file, &mut reader)? as u64;
let mut section_data = &reader[..];
let size = read_u32(obj_file, &mut section_data)? as usize;
if size > reader.len() {
bail!("Line info size {size} exceeds remaining size {}", reader.len());
}
(section_data, reader) = reader.split_at(size);
let base_address = read_u32(obj_file, &mut section_data)? as u64;
let Some(out_section) =
sections.iter_mut().find(|s| s.orig_index == text_section_index)
else {
// Skip line info for sections we filtered out
reader.set_position(start + size as u64);
continue;
};
let end = start + size as u64;
while reader.position() < end {
let line_number = read_u32(obj_file, &mut reader)?;
let statement_pos = read_u16(obj_file, &mut reader)?;
while !section_data.is_empty() {
let line_number = read_u32(obj_file, &mut section_data)?;
let statement_pos = read_u16(obj_file, &mut section_data)?;
if statement_pos != 0xFFFF {
log::warn!("Unhandled statement pos {}", statement_pos);
}
let address_delta = read_u32(obj_file, &mut reader)? as u64;
let address_delta = read_u32(obj_file, &mut section_data)? as u64;
out_section.line_info.insert(base_address + address_delta, line_number);
log::debug!("Line: {:#x} -> {}", base_address + address_delta, line_number);
}
@@ -413,22 +416,24 @@ fn line_info(obj_file: &File<'_>, sections: &mut [ObjSection], obj_data: &[u8])
// DWARF 2+
#[cfg(feature = "dwarf")]
{
fn gimli_error(e: gimli::Error) -> anyhow::Error { anyhow::anyhow!("DWARF error: {e:?}") }
let dwarf_cow = gimli::DwarfSections::load(|id| {
Ok::<_, gimli::Error>(
obj_file
.section_by_name(id.name())
.and_then(|section| section.uncompressed_data().ok())
.unwrap_or(std::borrow::Cow::Borrowed(&[][..])),
.unwrap_or(alloc::borrow::Cow::Borrowed(&[][..])),
)
})?;
})
.map_err(gimli_error)?;
let endian = match obj_file.endianness() {
object::Endianness::Little => gimli::RunTimeEndian::Little,
object::Endianness::Big => gimli::RunTimeEndian::Big,
};
let dwarf = dwarf_cow.borrow(|section| gimli::EndianSlice::new(section, endian));
let mut iter = dwarf.units();
if let Some(header) = iter.next()? {
let unit = dwarf.unit(header)?;
if let Some(header) = iter.next().map_err(gimli_error)? {
let unit = dwarf.unit(header).map_err(gimli_error)?;
if let Some(program) = unit.line_program.clone() {
let mut text_sections =
obj_file.sections().filter(|s| s.kind() == SectionKind::Text);
@@ -438,7 +443,7 @@ fn line_info(obj_file: &File<'_>, sections: &mut [ObjSection], obj_data: &[u8])
.map(|s| &mut s.line_info);
let mut rows = program.rows();
while let Some((_header, row)) = rows.next_row()? {
while let Some((_header, row)) = rows.next_row().map_err(gimli_error)? {
if let (Some(line), Some(lines)) = (row.line(), &mut lines) {
lines.insert(row.address(), line.get() as u32);
}
@@ -453,7 +458,7 @@ fn line_info(obj_file: &File<'_>, sections: &mut [ObjSection], obj_data: &[u8])
}
}
}
if iter.next()?.is_some() {
if iter.next().map_err(gimli_error)?.is_some() {
log::warn!("Multiple units found in DWARF data, only processing the first");
}
}
@@ -638,7 +643,7 @@ fn combine_sections(section: ObjSection, combine: ObjSection) -> Result<ObjSecti
}
fn combine_data_sections(sections: &mut Vec<ObjSection>) -> Result<()> {
let names_to_combine: HashSet<_> = sections
let names_to_combine: BTreeSet<_> = sections
.iter()
.filter(|s| s.kind == ObjSectionKind::Data)
.map(|s| s.name.clone())
@@ -677,14 +682,15 @@ fn combine_data_sections(sections: &mut Vec<ObjSection>) -> Result<()> {
Ok(())
}
pub fn read(obj_path: &Path, config: &DiffObjConfig) -> Result<ObjInfo> {
#[cfg(feature = "std")]
pub fn read(obj_path: &std::path::Path, config: &DiffObjConfig) -> Result<ObjInfo> {
let (data, timestamp) = {
let file = fs::File::open(obj_path)?;
let timestamp = FileTime::from_last_modification_time(&file.metadata()?);
let file = std::fs::File::open(obj_path)?;
let timestamp = filetime::FileTime::from_last_modification_time(&file.metadata()?);
(unsafe { memmap2::Mmap::map(&file) }?, timestamp)
};
let mut obj = parse(&data, config)?;
obj.path = Some(obj_path.to_owned());
obj.path = Some(obj_path.to_string_lossy().into_owned());
obj.timestamp = Some(timestamp);
Ok(obj)
}
@@ -710,7 +716,7 @@ pub fn parse(data: &[u8], config: &DiffObjConfig) -> Result<ObjInfo> {
}
let mut sections = filter_sections(&obj_file, split_meta.as_ref())?;
let mut section_name_counts: HashMap<String, u32> = HashMap::new();
let mut section_name_counts: BTreeMap<String, u32> = BTreeMap::new();
for section in &mut sections {
section.symbols = symbols_by_section(
arch.as_ref(),
@@ -733,12 +739,21 @@ pub fn parse(data: &[u8], config: &DiffObjConfig) -> Result<ObjInfo> {
}
line_info(&obj_file, &mut sections, data)?;
let common = common_symbols(arch.as_ref(), &obj_file, split_meta.as_ref())?;
Ok(ObjInfo { arch, path: None, timestamp: None, sections, common, split_meta })
Ok(ObjInfo {
arch,
path: None,
#[cfg(feature = "std")]
timestamp: None,
sections,
common,
split_meta,
})
}
pub fn has_function(obj_path: &Path, symbol_name: &str) -> Result<bool> {
#[cfg(feature = "std")]
pub fn has_function(obj_path: &std::path::Path, symbol_name: &str) -> Result<bool> {
let data = {
let file = fs::File::open(obj_path)?;
let file = std::fs::File::open(obj_path)?;
unsafe { memmap2::Mmap::map(&file) }?
};
Ok(File::parse(&*data)?

View File

@@ -1,5 +1,6 @@
use std::{io, io::Write};
use alloc::{string::String, vec, vec::Vec};
use anyhow::{anyhow, Result};
use object::{elf::SHT_NOTE, Endian, ObjectSection};
pub const SPLITMETA_SECTION: &str = ".note.split";
@@ -27,10 +28,10 @@ const NT_SPLIT_MODULE_ID: u32 = u32::from_be_bytes(*b"MODI");
const NT_SPLIT_VIRTUAL_ADDRESSES: u32 = u32::from_be_bytes(*b"VIRT");
impl SplitMeta {
pub fn from_section<E>(section: object::Section, e: E, is_64: bool) -> io::Result<Self>
pub fn from_section<E>(section: object::Section, e: E, is_64: bool) -> Result<Self>
where E: Endian {
let mut result = SplitMeta::default();
let data = section.uncompressed_data().map_err(object_io_error)?;
let data = section.uncompressed_data().map_err(object_error)?;
let mut iter = NoteIterator::new(data.as_ref(), section.align(), e, is_64)?;
while let Some(note) = iter.next(e)? {
if note.name != ELF_NOTE_SPLIT {
@@ -39,19 +40,18 @@ impl SplitMeta {
match note.n_type {
NT_SPLIT_GENERATOR => {
let string = String::from_utf8(note.desc.to_vec())
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
.map_err(|e| anyhow::Error::from(e))?;
result.generator = Some(string);
}
NT_SPLIT_MODULE_NAME => {
let string = String::from_utf8(note.desc.to_vec())
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
.map_err(|e| anyhow::Error::from(e))?;
result.module_name = Some(string);
}
NT_SPLIT_MODULE_ID => {
result.module_id =
Some(e.read_u32_bytes(note.desc.try_into().map_err(|_| {
io::Error::new(io::ErrorKind::InvalidData, "Invalid module ID size")
})?));
result.module_id = Some(e.read_u32_bytes(
note.desc.try_into().map_err(|_| anyhow!("Invalid module ID size"))?,
));
}
NT_SPLIT_VIRTUAL_ADDRESSES => {
let vec = if is_64 {
@@ -79,10 +79,11 @@ impl SplitMeta {
Ok(result)
}
pub fn to_writer<E, W>(&self, writer: &mut W, e: E, is_64: bool) -> io::Result<()>
#[cfg(feature = "std")]
pub fn to_writer<E, W>(&self, writer: &mut W, e: E, is_64: bool) -> std::io::Result<()>
where
E: Endian,
W: Write + ?Sized,
W: std::io::Write + ?Sized,
{
if let Some(generator) = &self.generator {
write_note_header(writer, e, NT_SPLIT_GENERATOR, generator.len())?;
@@ -137,10 +138,9 @@ impl SplitMeta {
}
}
/// Convert an object::read::Error to an io::Error.
fn object_io_error(err: object::read::Error) -> io::Error {
io::Error::new(io::ErrorKind::InvalidData, err)
}
/// Convert an object::read::Error to a String.
#[inline]
fn object_error(err: object::read::Error) -> anyhow::Error { anyhow::Error::new(err) }
/// An ELF note entry.
struct Note<'data> {
@@ -161,27 +161,27 @@ where E: Endian
impl<'data, E> NoteIterator<'data, E>
where E: Endian
{
fn new(data: &'data [u8], align: u64, e: E, is_64: bool) -> io::Result<Self> {
fn new(data: &'data [u8], align: u64, e: E, is_64: bool) -> Result<Self> {
Ok(if is_64 {
NoteIterator::B64(
object::read::elf::NoteIterator::new(e, align, data).map_err(object_io_error)?,
object::read::elf::NoteIterator::new(e, align, data).map_err(object_error)?,
)
} else {
NoteIterator::B32(
object::read::elf::NoteIterator::new(e, align as u32, data)
.map_err(object_io_error)?,
.map_err(object_error)?,
)
})
}
fn next(&mut self, e: E) -> io::Result<Option<Note<'data>>> {
fn next(&mut self, e: E) -> Result<Option<Note<'data>>> {
match self {
NoteIterator::B32(iter) => Ok(iter.next().map_err(object_io_error)?.map(|note| Note {
NoteIterator::B32(iter) => Ok(iter.next().map_err(object_error)?.map(|note| Note {
n_type: note.n_type(e),
name: note.name(),
desc: note.desc(),
})),
NoteIterator::B64(iter) => Ok(iter.next().map_err(object_io_error)?.map(|note| Note {
NoteIterator::B64(iter) => Ok(iter.next().map_err(object_error)?.map(|note| Note {
n_type: note.n_type(e),
name: note.name(),
desc: note.desc(),
@@ -192,7 +192,8 @@ where E: Endian
fn align_size_to_4(size: usize) -> usize { (size + 3) & !3 }
fn align_data_to_4<W: Write + ?Sized>(writer: &mut W, len: usize) -> io::Result<()> {
#[cfg(feature = "std")]
fn align_data_to_4<W: std::io::Write + ?Sized>(writer: &mut W, len: usize) -> std::io::Result<()> {
const ALIGN_BYTES: &[u8] = &[0; 4];
if len % 4 != 0 {
writer.write_all(&ALIGN_BYTES[..4 - len % 4])?;
@@ -208,10 +209,11 @@ fn align_data_to_4<W: Write + ?Sized>(writer: &mut W, len: usize) -> io::Result<
// Desc | variable size, padded to a 4 byte boundary
const NOTE_HEADER_SIZE: usize = 12 + ((ELF_NOTE_SPLIT.len() + 4) & !3);
fn write_note_header<E, W>(writer: &mut W, e: E, kind: u32, desc_len: usize) -> io::Result<()>
#[cfg(feature = "std")]
fn write_note_header<E, W>(writer: &mut W, e: E, kind: u32, desc_len: usize) -> std::io::Result<()>
where
E: Endian,
W: Write + ?Sized,
W: std::io::Write + ?Sized,
{
writer.write_all(&e.write_u32_bytes(ELF_NOTE_SPLIT.len() as u32 + 1))?; // Name Size
writer.write_all(&e.write_u32_bytes(desc_len as u32))?; // Desc Size