Reconstruct "erased" tags from DWARF debugging info (#51)

* Read erased tags

* cargo +nightly fmt

* .filter(...).next() -> find(...)

* Plumb both data and address endians through

* Rename has_erased_parent -> is_erased_root and comment
This commit is contained in:
cadmic 2024-05-15 22:06:30 -07:00 committed by GitHub
parent d3c2e8192c
commit 876b78bfa6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 200 additions and 64 deletions

View File

@ -1,6 +1,7 @@
use std::{
collections::{btree_map, BTreeMap},
io::{stdout, Cursor, Read, Write},
ops::Bound::{Excluded, Unbounded},
path::PathBuf,
str::from_utf8,
};
@ -50,6 +51,10 @@ pub struct DumpArgs {
#[argp(switch)]
/// Disable color output.
no_color: bool,
#[argp(switch)]
/// Attempt to reconstruct tags that have been removed by the linker, e.g.
/// tags from unused functions or functions that have been inlined away.
include_erased: bool,
}
pub fn run(args: Args) -> Result<()> {
@ -97,15 +102,15 @@ fn dump(args: DumpArgs) -> Result<()> {
let name = name.rsplit_once('/').map(|(_, b)| b).unwrap_or(&name);
let file_path = out_path.join(format!("{}.txt", name));
let mut file = buf_writer(file_path)?;
dump_debug_section(&mut file, &obj_file, debug_section)?;
dump_debug_section(&args, &mut file, &obj_file, debug_section)?;
file.flush()?;
} else if args.no_color {
println!("\n// File {}:", name);
dump_debug_section(&mut stdout(), &obj_file, debug_section)?;
dump_debug_section(&args, &mut stdout(), &obj_file, debug_section)?;
} else {
let mut writer = HighlightWriter::new(syntax_set.clone(), syntax.clone(), theme);
writeln!(writer, "\n// File {}:", name)?;
dump_debug_section(&mut writer, &obj_file, debug_section)?;
dump_debug_section(&args, &mut writer, &obj_file, debug_section)?;
}
}
} else {
@ -115,19 +120,20 @@ fn dump(args: DumpArgs) -> Result<()> {
.ok_or_else(|| anyhow!("Failed to locate .debug section"))?;
if let Some(out_path) = &args.out {
let mut file = buf_writer(out_path)?;
dump_debug_section(&mut file, &obj_file, debug_section)?;
dump_debug_section(&args, &mut file, &obj_file, debug_section)?;
file.flush()?;
} else if args.no_color {
dump_debug_section(&mut stdout(), &obj_file, debug_section)?;
dump_debug_section(&args, &mut stdout(), &obj_file, debug_section)?;
} else {
let mut writer = HighlightWriter::new(syntax_set, syntax, theme);
dump_debug_section(&mut writer, &obj_file, debug_section)?;
dump_debug_section(&args, &mut writer, &obj_file, debug_section)?;
}
}
Ok(())
}
fn dump_debug_section<W>(
args: &DumpArgs,
w: &mut W,
obj_file: &object::File<'_>,
debug_section: Section,
@ -156,7 +162,7 @@ where
}
let mut reader = Cursor::new(&*data);
let info = read_debug_section(&mut reader, obj_file.endianness().into())?;
let info = read_debug_section(&mut reader, obj_file.endianness().into(), args.include_erased)?;
for (&addr, tag) in &info.tags {
log::debug!("{}: {:?}", addr, tag);
@ -222,41 +228,54 @@ where
}
writeln!(w, "*/")?;
let children = tag.children(&info.tags);
let mut children = tag.children(&info.tags);
// merge in erased tags
let range = match tag.next_sibling(&info.tags) {
Some(next) => (Excluded(tag.key), Excluded(next.key)),
None => (Excluded(tag.key), Unbounded),
};
for (_, child) in info.tags.range(range) {
if child.is_erased_root {
children.push(child);
}
}
children.sort_by_key(|x| x.key);
let mut typedefs = BTreeMap::<u32, Vec<u32>>::new();
for child in children {
let tag_type = match process_cu_tag(&info, child) {
Ok(tag_type) => tag_type,
Err(e) => {
log::error!(
"Failed to process tag {} (unit {}): {}",
"Failed to process tag {:X} (unit {}): {}",
child.key,
unit.name,
e
);
writeln!(
w,
"// ERROR: Failed to process tag {} ({:?})",
"// ERROR: Failed to process tag {:X} ({:?})",
child.key, child.kind
)?;
continue;
}
};
if should_skip_tag(&tag_type) {
if should_skip_tag(&tag_type, child.is_erased) {
continue;
}
match tag_type_string(&info, &typedefs, &tag_type) {
match tag_type_string(&info, &typedefs, &tag_type, child.is_erased) {
Ok(s) => writeln!(w, "{}", s)?,
Err(e) => {
log::error!(
"Failed to emit tag {} (unit {}): {}",
"Failed to emit tag {:X} (unit {}): {}",
child.key,
unit.name,
e
);
writeln!(
w,
"// ERROR: Failed to emit tag {} ({:?})",
"// ERROR: Failed to emit tag {:X} ({:?})",
child.key, child.kind
)?;
continue;

View File

@ -8,7 +8,7 @@ use std::{
use anyhow::{anyhow, bail, ensure, Context, Result};
use indent::indent_all_by;
use num_enum::{IntoPrimitive, TryFromPrimitive};
use num_enum::{IntoPrimitive, TryFromPrimitive, TryFromPrimitiveError};
use crate::{
array_ref,
@ -150,11 +150,21 @@ impl FundType {
FundType::Vec2x32Float => "__vec2x32float__",
})
}
pub fn parse_int(value: u16) -> Result<Self, TryFromPrimitiveError<Self>> {
if value >> 8 == 0x1 {
// Can appear in erased tags
Self::try_from(value & 0xFF)
} else {
Self::try_from(value)
}
}
}
#[derive(Debug, Eq, PartialEq, Copy, Clone, IntoPrimitive, TryFromPrimitive)]
#[repr(u8)]
pub enum Modifier {
MwPointerTo = 0x00, // Used in erased tags
PointerTo = 0x01,
ReferenceTo = 0x02,
Const = 0x03,
@ -162,6 +172,12 @@ pub enum Modifier {
// User types
}
impl Modifier {
pub fn parse_int(value: u8) -> Result<Self, TryFromPrimitiveError<Self>> {
Self::try_from(value & 0x7F) // High bit can appear in erased tags
}
}
#[derive(Debug, Eq, PartialEq, Copy, Clone, IntoPrimitive, TryFromPrimitive)]
#[repr(u8)]
pub enum SubscriptFormat {
@ -338,6 +354,8 @@ pub struct Attribute {
pub struct Tag {
pub key: u32,
pub kind: TagKind,
pub is_erased: bool, // Tag was deleted but has been reconstructed
pub is_erased_root: bool, // Tag is erased and is the root of a tree of erased tags
pub attributes: Vec<Attribute>,
}
@ -420,7 +438,7 @@ impl Tag {
pub fn children<'a>(&self, tags: &'a TagMap) -> Vec<&'a Tag> {
let sibling = self.next_sibling(tags);
let mut children = Vec::new();
let mut child = match self.next_tag(tags) {
let mut child = match self.next_tag(tags, self.is_erased) {
Some(child) => child,
None => return children,
};
@ -446,17 +464,19 @@ impl Tag {
if let Some(key) = self.reference_attribute(AttributeKind::Sibling) {
tags.get(&key)
} else {
self.next_tag(tags)
self.next_tag(tags, self.is_erased)
}
}
/// Returns the next tag sequentially, if any
pub fn next_tag<'a>(&self, tags: &'a TagMap) -> Option<&'a Tag> {
tags.range(self.key + 1..).next().map(|(_, tag)| tag)
/// Returns the next tag sequentially, if any (skipping erased tags)
pub fn next_tag<'a>(&self, tags: &'a TagMap, include_erased: bool) -> Option<&'a Tag> {
tags.range(self.key + 1..)
.find(|(_, tag)| include_erased || !tag.is_erased)
.map(|(_, tag)| tag)
}
}
pub fn read_debug_section<R>(reader: &mut R, e: Endian) -> Result<DwarfInfo>
pub fn read_debug_section<R>(reader: &mut R, e: Endian, include_erased: bool) -> Result<DwarfInfo>
where R: BufRead + Seek + ?Sized {
let len = {
let old_pos = reader.stream_position()?;
@ -471,8 +491,10 @@ where R: BufRead + Seek + ?Sized {
if position >= len {
break;
}
let tag = read_tag(reader, e)?;
info.tags.insert(position as u32, tag);
let tags = read_tags(reader, e, e, include_erased, false)?;
for tag in tags {
info.tags.insert(tag.key, tag);
}
}
Ok(info)
}
@ -507,29 +529,95 @@ where R: BufRead + Seek + ?Sized {
Ok(())
}
fn read_tag<R>(reader: &mut R, e: Endian) -> Result<Tag>
where R: BufRead + Seek + ?Sized {
fn read_tags<R>(
reader: &mut R,
data_endian: Endian,
addr_endian: Endian,
include_erased: bool,
is_erased: bool,
) -> Result<Vec<Tag>>
where
R: BufRead + Seek + ?Sized,
{
let mut tags = Vec::new();
let position = reader.stream_position()?;
let size = u32::from_reader(reader, e)?;
let size = u32::from_reader(reader, data_endian)?;
if size < 8 {
// Null entry
if size > 4 {
reader.seek(SeekFrom::Current(size as i64 - 4))?;
}
return Ok(Tag { key: position as u32, kind: TagKind::Padding, attributes: vec![] });
tags.push(Tag {
key: position as u32,
kind: TagKind::Padding,
is_erased,
is_erased_root: false,
attributes: Vec::new(),
});
return Ok(tags);
}
let tag_num = u16::from_reader(reader, e)?;
let tag_num = u16::from_reader(reader, data_endian)?;
let tag = TagKind::try_from(tag_num).context("Unknown DWARF tag type")?;
let mut attributes = Vec::new();
if tag == TagKind::Padding {
reader.seek(SeekFrom::Start(position + size as u64))?; // Skip padding
} else {
while reader.stream_position()? < position + size as u64 {
attributes.push(read_attribute(reader, e)?);
if include_erased {
// Erased entries that have become padding are little-endian, and we
// have to guess the length and tag of the first entry. We assume
// the entry is either a variable or a function, and read until we
// find the high_pc attribute. Only MwGlobalRef will follow, and
// these are unlikely to be confused with the length of the next
// entry.
let mut attributes = Vec::new();
let mut is_function = false;
while reader.stream_position()? < position + size as u64 {
// Peek next two bytes
let mut buf = [0u8; 2];
reader.read_exact(&mut buf)?;
let attr_tag = u16::from_reader(&mut Cursor::new(&buf), Endian::Little)?;
reader.seek(SeekFrom::Current(-2))?;
if is_function && attr_tag != AttributeKind::MwGlobalRef as u16 {
break;
}
let attr = read_attribute(reader, Endian::Little, addr_endian)?;
if attr.kind == AttributeKind::HighPc {
is_function = true;
}
attributes.push(attr);
}
let kind = if is_function { TagKind::Subroutine } else { TagKind::LocalVariable };
tags.push(Tag {
key: position as u32,
kind,
is_erased: true,
is_erased_root: true,
attributes,
});
// Read the rest of the tags
while reader.stream_position()? < position + size as u64 {
for tag in read_tags(reader, Endian::Little, addr_endian, include_erased, true)? {
tags.push(tag);
}
}
} else {
reader.seek(SeekFrom::Start(position + size as u64))?; // Skip padding
}
} else {
let mut attributes = Vec::new();
while reader.stream_position()? < position + size as u64 {
attributes.push(read_attribute(reader, data_endian, addr_endian)?);
}
tags.push(Tag {
key: position as u32,
kind: tag,
is_erased,
is_erased_root: false,
attributes,
});
}
Ok(Tag { key: position as u32, kind: tag, attributes })
Ok(tags)
}
// TODO Shift-JIS?
@ -547,29 +635,35 @@ where R: BufRead + ?Sized {
Ok(str)
}
fn read_attribute<R>(reader: &mut R, e: Endian) -> Result<Attribute>
where R: BufRead + Seek + ?Sized {
let attr_type = u16::from_reader(reader, e)?;
fn read_attribute<R>(
reader: &mut R,
data_endian: Endian,
addr_endian: Endian,
) -> Result<Attribute>
where
R: BufRead + Seek + ?Sized,
{
let attr_type = u16::from_reader(reader, data_endian)?;
let attr = AttributeKind::try_from(attr_type).context("Unknown DWARF attribute type")?;
let form = FormKind::try_from(attr_type & FORM_MASK).context("Unknown DWARF form type")?;
let value = match form {
FormKind::Addr => AttributeValue::Address(u32::from_reader(reader, e)?),
FormKind::Ref => AttributeValue::Reference(u32::from_reader(reader, e)?),
FormKind::Addr => AttributeValue::Address(u32::from_reader(reader, addr_endian)?),
FormKind::Ref => AttributeValue::Reference(u32::from_reader(reader, addr_endian)?),
FormKind::Block2 => {
let size = u16::from_reader(reader, e)?;
let size = u16::from_reader(reader, data_endian)?;
let mut data = vec![0u8; size as usize];
reader.read_exact(&mut data)?;
AttributeValue::Block(data)
}
FormKind::Block4 => {
let size = u32::from_reader(reader, e)?;
let size = u32::from_reader(reader, data_endian)?;
let mut data = vec![0u8; size as usize];
reader.read_exact(&mut data)?;
AttributeValue::Block(data)
}
FormKind::Data2 => AttributeValue::Data2(u16::from_reader(reader, e)?),
FormKind::Data4 => AttributeValue::Data4(u32::from_reader(reader, e)?),
FormKind::Data8 => AttributeValue::Data8(u64::from_reader(reader, e)?),
FormKind::Data2 => AttributeValue::Data2(u16::from_reader(reader, data_endian)?),
FormKind::Data4 => AttributeValue::Data4(u32::from_reader(reader, data_endian)?),
FormKind::Data8 => AttributeValue::Data8(u64::from_reader(reader, data_endian)?),
FormKind::String => AttributeValue::String(read_string(reader)?),
};
Ok(Attribute { kind: attr, value })
@ -879,7 +973,9 @@ pub struct Type {
impl Type {
pub fn size(&self, info: &DwarfInfo) -> Result<u32> {
if self.modifiers.iter().any(|m| matches!(m, Modifier::PointerTo | Modifier::ReferenceTo)) {
if self.modifiers.iter().any(|m| {
matches!(m, Modifier::MwPointerTo | Modifier::PointerTo | Modifier::ReferenceTo)
}) {
return Ok(4);
}
match self.kind {
@ -900,7 +996,7 @@ pub fn apply_modifiers(mut str: TypeString, modifiers: &[Modifier]) -> Result<Ty
let mut has_pointer = false;
for &modifier in modifiers.iter().rev() {
match modifier {
Modifier::PointerTo => {
Modifier::MwPointerTo | Modifier::PointerTo => {
if !has_pointer && !str.suffix.is_empty() {
if str.member.is_empty() {
str.prefix.push_str(" (*");
@ -1166,13 +1262,18 @@ fn ptr_to_member_type_string(
})
}
pub fn ud_type_def(info: &DwarfInfo, typedefs: &TypedefMap, t: &UserDefinedType) -> Result<String> {
pub fn ud_type_def(
info: &DwarfInfo,
typedefs: &TypedefMap,
t: &UserDefinedType,
is_erased: bool,
) -> Result<String> {
match t {
UserDefinedType::Array(t) => {
let ts = array_type_string(info, typedefs, t, false)?;
Ok(format!("// Array: {}{}", ts.prefix, ts.suffix))
}
UserDefinedType::Subroutine(t) => Ok(subroutine_def_string(info, typedefs, t)?),
UserDefinedType::Subroutine(t) => Ok(subroutine_def_string(info, typedefs, t, is_erased)?),
UserDefinedType::Structure(t) => Ok(struct_def_string(info, typedefs, t)?),
UserDefinedType::Enumeration(t) => Ok(enum_def_string(t)?),
UserDefinedType::Union(t) => Ok(union_def_string(info, typedefs, t)?),
@ -1233,9 +1334,12 @@ pub fn subroutine_def_string(
info: &DwarfInfo,
typedefs: &TypedefMap,
t: &SubroutineType,
is_erased: bool,
) -> Result<String> {
let mut out = String::new();
if let (Some(start), Some(end)) = (t.start_address, t.end_address) {
if is_erased {
out.push_str("// Erased\n");
} else if let (Some(start), Some(end)) = (t.start_address, t.end_address) {
writeln!(out, "// Range: {:#X} -> {:#X}", start, end)?;
}
let rt = type_string(info, typedefs, &t.return_type, true)?;
@ -1922,9 +2026,9 @@ fn process_array_tag(info: &DwarfInfo, tag: &Tag) -> Result<ArrayType> {
(AttributeKind::Sibling, _) => {}
(AttributeKind::SubscrData, AttributeValue::Block(data)) => {
subscr_data =
Some(process_array_subscript_data(data, info.e).with_context(|| {
format!("Failed to process SubscrData for tag: {:?}", tag)
})?)
Some(process_array_subscript_data(data, info.e, tag.is_erased).with_context(
|| format!("Failed to process SubscrData for tag: {:?}", tag),
)?)
}
(AttributeKind::Ordering, val) => match val {
AttributeValue::Data2(d2) => {
@ -1950,7 +2054,11 @@ fn process_array_tag(info: &DwarfInfo, tag: &Tag) -> Result<ArrayType> {
Ok(ArrayType { element_type: Box::from(element_type), dimensions })
}
fn process_array_subscript_data(data: &[u8], e: Endian) -> Result<(Type, Vec<ArrayDimension>)> {
fn process_array_subscript_data(
data: &[u8],
e: Endian,
is_erased: bool,
) -> Result<(Type, Vec<ArrayDimension>)> {
let mut element_type = None;
let mut dimensions = Vec::new();
let mut data = data;
@ -1990,7 +2098,9 @@ fn process_array_subscript_data(data: &[u8], e: Endian) -> Result<(Type, Vec<Arr
}
SubscriptFormat::ElementType => {
let mut cursor = Cursor::new(data);
let type_attr = read_attribute(&mut cursor, e)?;
// TODO: is this the right endianness to use for erased tags?
let type_attr =
read_attribute(&mut cursor, if is_erased { Endian::Little } else { e }, e)?;
element_type = Some(process_type(&type_attr, e)?);
data = &data[cursor.position() as usize..];
}
@ -2366,7 +2476,10 @@ fn process_subroutine_parameter_tag(info: &DwarfInfo, tag: &Tag) -> Result<Subro
_,
) => kind = Some(process_type(attr, info.e)?),
(AttributeKind::Location, AttributeValue::Block(block)) => {
location = Some(process_variable_location(block, info.e)?)
location = Some(process_variable_location(
block,
if tag.is_erased { Endian::Little } else { info.e },
)?)
}
(AttributeKind::MwDwarf2Location, AttributeValue::Block(_block)) => {
// TODO?
@ -2416,7 +2529,10 @@ fn process_local_variable_tag(info: &DwarfInfo, tag: &Tag) -> Result<SubroutineV
) => kind = Some(process_type(attr, info.e)?),
(AttributeKind::Location, AttributeValue::Block(block)) => {
if !block.is_empty() {
location = Some(process_variable_location(block, info.e)?);
location = Some(process_variable_location(
block,
if tag.is_erased { Endian::Little } else { info.e },
)?);
}
}
(AttributeKind::MwDwarf2Location, AttributeValue::Block(_block)) => {
@ -2505,7 +2621,7 @@ pub fn ud_type(info: &DwarfInfo, tag: &Tag) -> Result<UserDefinedType> {
pub fn process_modifiers(block: &[u8]) -> Result<Vec<Modifier>> {
let mut out = Vec::with_capacity(block.len());
for &b in block {
out.push(Modifier::try_from(b)?);
out.push(Modifier::parse_int(b)?);
}
Ok(out)
}
@ -2513,14 +2629,14 @@ pub fn process_modifiers(block: &[u8]) -> Result<Vec<Modifier>> {
pub fn process_type(attr: &Attribute, e: Endian) -> Result<Type> {
match (attr.kind, &attr.value) {
(AttributeKind::FundType, &AttributeValue::Data2(type_id)) => {
let fund_type = FundType::try_from(type_id)
.with_context(|| format!("Invalid fundamental type ID '{}'", type_id))?;
let fund_type = FundType::parse_int(type_id)
.with_context(|| format!("Invalid fundamental type ID '{:04X}'", type_id))?;
Ok(Type { kind: TypeKind::Fundamental(fund_type), modifiers: vec![] })
}
(AttributeKind::ModFundType, AttributeValue::Block(ops)) => {
let type_id = u16::from_bytes(ops[ops.len() - 2..].try_into()?, e);
let fund_type = FundType::try_from(type_id)
.with_context(|| format!("Invalid fundamental type ID '{}'", type_id))?;
let fund_type = FundType::parse_int(type_id)
.with_context(|| format!("Invalid fundamental type ID '{:04X}'", type_id))?;
let modifiers = process_modifiers(&ops[..ops.len() - 2])?;
Ok(Type { kind: TypeKind::Fundamental(fund_type), modifiers })
}
@ -2639,9 +2755,9 @@ pub fn process_cu_tag(info: &DwarfInfo, tag: &Tag) -> Result<TagType> {
}
/// Logic to skip uninteresting tags
pub fn should_skip_tag(tag_type: &TagType) -> bool {
pub fn should_skip_tag(tag_type: &TagType, is_erased: bool) -> bool {
match tag_type {
TagType::Variable(_) => false,
TagType::Variable(_) => is_erased,
TagType::Typedef(_) => false,
TagType::UserDefined(t) => !t.is_definition(),
}
@ -2651,12 +2767,13 @@ pub fn tag_type_string(
info: &DwarfInfo,
typedefs: &TypedefMap,
tag_type: &TagType,
is_erased: bool,
) -> Result<String> {
match tag_type {
TagType::Typedef(t) => typedef_string(info, typedefs, t),
TagType::Variable(v) => variable_string(info, typedefs, v, true),
TagType::UserDefined(ud) => {
let ud_str = ud_type_def(info, typedefs, ud)?;
let ud_str = ud_type_def(info, typedefs, ud, is_erased)?;
match ud {
UserDefinedType::Structure(_)
| UserDefinedType::Enumeration(_)