From 876b78bfa6bd5478c9be33fe2a6c6b78fcf4d200 Mon Sep 17 00:00:00 2001 From: cadmic Date: Wed, 15 May 2024 22:06:30 -0700 Subject: [PATCH] Reconstruct "erased" tags from DWARF debugging info (#51) * Read erased tags * cargo +nightly fmt * .filter(...).next() -> find(...) * Plumb both data and address endians through * Rename has_erased_parent -> is_erased_root and comment --- src/cmd/dwarf.rs | 47 +++++++--- src/util/dwarf.rs | 217 +++++++++++++++++++++++++++++++++++----------- 2 files changed, 200 insertions(+), 64 deletions(-) diff --git a/src/cmd/dwarf.rs b/src/cmd/dwarf.rs index 506bf7a..4311024 100644 --- a/src/cmd/dwarf.rs +++ b/src/cmd/dwarf.rs @@ -1,6 +1,7 @@ use std::{ collections::{btree_map, BTreeMap}, io::{stdout, Cursor, Read, Write}, + ops::Bound::{Excluded, Unbounded}, path::PathBuf, str::from_utf8, }; @@ -50,6 +51,10 @@ pub struct DumpArgs { #[argp(switch)] /// Disable color output. no_color: bool, + #[argp(switch)] + /// Attempt to reconstruct tags that have been removed by the linker, e.g. + /// tags from unused functions or functions that have been inlined away. + include_erased: bool, } pub fn run(args: Args) -> Result<()> { @@ -97,15 +102,15 @@ fn dump(args: DumpArgs) -> Result<()> { let name = name.rsplit_once('/').map(|(_, b)| b).unwrap_or(&name); let file_path = out_path.join(format!("{}.txt", name)); let mut file = buf_writer(file_path)?; - dump_debug_section(&mut file, &obj_file, debug_section)?; + dump_debug_section(&args, &mut file, &obj_file, debug_section)?; file.flush()?; } else if args.no_color { println!("\n// File {}:", name); - dump_debug_section(&mut stdout(), &obj_file, debug_section)?; + dump_debug_section(&args, &mut stdout(), &obj_file, debug_section)?; } else { let mut writer = HighlightWriter::new(syntax_set.clone(), syntax.clone(), theme); writeln!(writer, "\n// File {}:", name)?; - dump_debug_section(&mut writer, &obj_file, debug_section)?; + dump_debug_section(&args, &mut writer, &obj_file, debug_section)?; } } } else { @@ -115,19 +120,20 @@ fn dump(args: DumpArgs) -> Result<()> { .ok_or_else(|| anyhow!("Failed to locate .debug section"))?; if let Some(out_path) = &args.out { let mut file = buf_writer(out_path)?; - dump_debug_section(&mut file, &obj_file, debug_section)?; + dump_debug_section(&args, &mut file, &obj_file, debug_section)?; file.flush()?; } else if args.no_color { - dump_debug_section(&mut stdout(), &obj_file, debug_section)?; + dump_debug_section(&args, &mut stdout(), &obj_file, debug_section)?; } else { let mut writer = HighlightWriter::new(syntax_set, syntax, theme); - dump_debug_section(&mut writer, &obj_file, debug_section)?; + dump_debug_section(&args, &mut writer, &obj_file, debug_section)?; } } Ok(()) } fn dump_debug_section( + args: &DumpArgs, w: &mut W, obj_file: &object::File<'_>, debug_section: Section, @@ -156,7 +162,7 @@ where } let mut reader = Cursor::new(&*data); - let info = read_debug_section(&mut reader, obj_file.endianness().into())?; + let info = read_debug_section(&mut reader, obj_file.endianness().into(), args.include_erased)?; for (&addr, tag) in &info.tags { log::debug!("{}: {:?}", addr, tag); @@ -222,41 +228,54 @@ where } writeln!(w, "*/")?; - let children = tag.children(&info.tags); + let mut children = tag.children(&info.tags); + + // merge in erased tags + let range = match tag.next_sibling(&info.tags) { + Some(next) => (Excluded(tag.key), Excluded(next.key)), + None => (Excluded(tag.key), Unbounded), + }; + for (_, child) in info.tags.range(range) { + if child.is_erased_root { + children.push(child); + } + } + children.sort_by_key(|x| x.key); + let mut typedefs = BTreeMap::>::new(); for child in children { let tag_type = match process_cu_tag(&info, child) { Ok(tag_type) => tag_type, Err(e) => { log::error!( - "Failed to process tag {} (unit {}): {}", + "Failed to process tag {:X} (unit {}): {}", child.key, unit.name, e ); writeln!( w, - "// ERROR: Failed to process tag {} ({:?})", + "// ERROR: Failed to process tag {:X} ({:?})", child.key, child.kind )?; continue; } }; - if should_skip_tag(&tag_type) { + if should_skip_tag(&tag_type, child.is_erased) { continue; } - match tag_type_string(&info, &typedefs, &tag_type) { + match tag_type_string(&info, &typedefs, &tag_type, child.is_erased) { Ok(s) => writeln!(w, "{}", s)?, Err(e) => { log::error!( - "Failed to emit tag {} (unit {}): {}", + "Failed to emit tag {:X} (unit {}): {}", child.key, unit.name, e ); writeln!( w, - "// ERROR: Failed to emit tag {} ({:?})", + "// ERROR: Failed to emit tag {:X} ({:?})", child.key, child.kind )?; continue; diff --git a/src/util/dwarf.rs b/src/util/dwarf.rs index 70e53e9..77eef50 100644 --- a/src/util/dwarf.rs +++ b/src/util/dwarf.rs @@ -8,7 +8,7 @@ use std::{ use anyhow::{anyhow, bail, ensure, Context, Result}; use indent::indent_all_by; -use num_enum::{IntoPrimitive, TryFromPrimitive}; +use num_enum::{IntoPrimitive, TryFromPrimitive, TryFromPrimitiveError}; use crate::{ array_ref, @@ -150,11 +150,21 @@ impl FundType { FundType::Vec2x32Float => "__vec2x32float__", }) } + + pub fn parse_int(value: u16) -> Result> { + if value >> 8 == 0x1 { + // Can appear in erased tags + Self::try_from(value & 0xFF) + } else { + Self::try_from(value) + } + } } #[derive(Debug, Eq, PartialEq, Copy, Clone, IntoPrimitive, TryFromPrimitive)] #[repr(u8)] pub enum Modifier { + MwPointerTo = 0x00, // Used in erased tags PointerTo = 0x01, ReferenceTo = 0x02, Const = 0x03, @@ -162,6 +172,12 @@ pub enum Modifier { // User types } +impl Modifier { + pub fn parse_int(value: u8) -> Result> { + Self::try_from(value & 0x7F) // High bit can appear in erased tags + } +} + #[derive(Debug, Eq, PartialEq, Copy, Clone, IntoPrimitive, TryFromPrimitive)] #[repr(u8)] pub enum SubscriptFormat { @@ -338,6 +354,8 @@ pub struct Attribute { pub struct Tag { pub key: u32, pub kind: TagKind, + pub is_erased: bool, // Tag was deleted but has been reconstructed + pub is_erased_root: bool, // Tag is erased and is the root of a tree of erased tags pub attributes: Vec, } @@ -420,7 +438,7 @@ impl Tag { pub fn children<'a>(&self, tags: &'a TagMap) -> Vec<&'a Tag> { let sibling = self.next_sibling(tags); let mut children = Vec::new(); - let mut child = match self.next_tag(tags) { + let mut child = match self.next_tag(tags, self.is_erased) { Some(child) => child, None => return children, }; @@ -446,17 +464,19 @@ impl Tag { if let Some(key) = self.reference_attribute(AttributeKind::Sibling) { tags.get(&key) } else { - self.next_tag(tags) + self.next_tag(tags, self.is_erased) } } - /// Returns the next tag sequentially, if any - pub fn next_tag<'a>(&self, tags: &'a TagMap) -> Option<&'a Tag> { - tags.range(self.key + 1..).next().map(|(_, tag)| tag) + /// Returns the next tag sequentially, if any (skipping erased tags) + pub fn next_tag<'a>(&self, tags: &'a TagMap, include_erased: bool) -> Option<&'a Tag> { + tags.range(self.key + 1..) + .find(|(_, tag)| include_erased || !tag.is_erased) + .map(|(_, tag)| tag) } } -pub fn read_debug_section(reader: &mut R, e: Endian) -> Result +pub fn read_debug_section(reader: &mut R, e: Endian, include_erased: bool) -> Result where R: BufRead + Seek + ?Sized { let len = { let old_pos = reader.stream_position()?; @@ -471,8 +491,10 @@ where R: BufRead + Seek + ?Sized { if position >= len { break; } - let tag = read_tag(reader, e)?; - info.tags.insert(position as u32, tag); + let tags = read_tags(reader, e, e, include_erased, false)?; + for tag in tags { + info.tags.insert(tag.key, tag); + } } Ok(info) } @@ -507,29 +529,95 @@ where R: BufRead + Seek + ?Sized { Ok(()) } -fn read_tag(reader: &mut R, e: Endian) -> Result -where R: BufRead + Seek + ?Sized { +fn read_tags( + reader: &mut R, + data_endian: Endian, + addr_endian: Endian, + include_erased: bool, + is_erased: bool, +) -> Result> +where + R: BufRead + Seek + ?Sized, +{ + let mut tags = Vec::new(); let position = reader.stream_position()?; - let size = u32::from_reader(reader, e)?; + let size = u32::from_reader(reader, data_endian)?; if size < 8 { // Null entry if size > 4 { reader.seek(SeekFrom::Current(size as i64 - 4))?; } - return Ok(Tag { key: position as u32, kind: TagKind::Padding, attributes: vec![] }); + tags.push(Tag { + key: position as u32, + kind: TagKind::Padding, + is_erased, + is_erased_root: false, + attributes: Vec::new(), + }); + return Ok(tags); } - let tag_num = u16::from_reader(reader, e)?; + let tag_num = u16::from_reader(reader, data_endian)?; let tag = TagKind::try_from(tag_num).context("Unknown DWARF tag type")?; - let mut attributes = Vec::new(); if tag == TagKind::Padding { - reader.seek(SeekFrom::Start(position + size as u64))?; // Skip padding - } else { - while reader.stream_position()? < position + size as u64 { - attributes.push(read_attribute(reader, e)?); + if include_erased { + // Erased entries that have become padding are little-endian, and we + // have to guess the length and tag of the first entry. We assume + // the entry is either a variable or a function, and read until we + // find the high_pc attribute. Only MwGlobalRef will follow, and + // these are unlikely to be confused with the length of the next + // entry. + let mut attributes = Vec::new(); + let mut is_function = false; + while reader.stream_position()? < position + size as u64 { + // Peek next two bytes + let mut buf = [0u8; 2]; + reader.read_exact(&mut buf)?; + let attr_tag = u16::from_reader(&mut Cursor::new(&buf), Endian::Little)?; + reader.seek(SeekFrom::Current(-2))?; + + if is_function && attr_tag != AttributeKind::MwGlobalRef as u16 { + break; + } + + let attr = read_attribute(reader, Endian::Little, addr_endian)?; + if attr.kind == AttributeKind::HighPc { + is_function = true; + } + attributes.push(attr); + } + let kind = if is_function { TagKind::Subroutine } else { TagKind::LocalVariable }; + tags.push(Tag { + key: position as u32, + kind, + is_erased: true, + is_erased_root: true, + attributes, + }); + + // Read the rest of the tags + while reader.stream_position()? < position + size as u64 { + for tag in read_tags(reader, Endian::Little, addr_endian, include_erased, true)? { + tags.push(tag); + } + } + } else { + reader.seek(SeekFrom::Start(position + size as u64))?; // Skip padding } + } else { + let mut attributes = Vec::new(); + while reader.stream_position()? < position + size as u64 { + attributes.push(read_attribute(reader, data_endian, addr_endian)?); + } + tags.push(Tag { + key: position as u32, + kind: tag, + is_erased, + is_erased_root: false, + attributes, + }); } - Ok(Tag { key: position as u32, kind: tag, attributes }) + Ok(tags) } // TODO Shift-JIS? @@ -547,29 +635,35 @@ where R: BufRead + ?Sized { Ok(str) } -fn read_attribute(reader: &mut R, e: Endian) -> Result -where R: BufRead + Seek + ?Sized { - let attr_type = u16::from_reader(reader, e)?; +fn read_attribute( + reader: &mut R, + data_endian: Endian, + addr_endian: Endian, +) -> Result +where + R: BufRead + Seek + ?Sized, +{ + let attr_type = u16::from_reader(reader, data_endian)?; let attr = AttributeKind::try_from(attr_type).context("Unknown DWARF attribute type")?; let form = FormKind::try_from(attr_type & FORM_MASK).context("Unknown DWARF form type")?; let value = match form { - FormKind::Addr => AttributeValue::Address(u32::from_reader(reader, e)?), - FormKind::Ref => AttributeValue::Reference(u32::from_reader(reader, e)?), + FormKind::Addr => AttributeValue::Address(u32::from_reader(reader, addr_endian)?), + FormKind::Ref => AttributeValue::Reference(u32::from_reader(reader, addr_endian)?), FormKind::Block2 => { - let size = u16::from_reader(reader, e)?; + let size = u16::from_reader(reader, data_endian)?; let mut data = vec![0u8; size as usize]; reader.read_exact(&mut data)?; AttributeValue::Block(data) } FormKind::Block4 => { - let size = u32::from_reader(reader, e)?; + let size = u32::from_reader(reader, data_endian)?; let mut data = vec![0u8; size as usize]; reader.read_exact(&mut data)?; AttributeValue::Block(data) } - FormKind::Data2 => AttributeValue::Data2(u16::from_reader(reader, e)?), - FormKind::Data4 => AttributeValue::Data4(u32::from_reader(reader, e)?), - FormKind::Data8 => AttributeValue::Data8(u64::from_reader(reader, e)?), + FormKind::Data2 => AttributeValue::Data2(u16::from_reader(reader, data_endian)?), + FormKind::Data4 => AttributeValue::Data4(u32::from_reader(reader, data_endian)?), + FormKind::Data8 => AttributeValue::Data8(u64::from_reader(reader, data_endian)?), FormKind::String => AttributeValue::String(read_string(reader)?), }; Ok(Attribute { kind: attr, value }) @@ -879,7 +973,9 @@ pub struct Type { impl Type { pub fn size(&self, info: &DwarfInfo) -> Result { - if self.modifiers.iter().any(|m| matches!(m, Modifier::PointerTo | Modifier::ReferenceTo)) { + if self.modifiers.iter().any(|m| { + matches!(m, Modifier::MwPointerTo | Modifier::PointerTo | Modifier::ReferenceTo) + }) { return Ok(4); } match self.kind { @@ -900,7 +996,7 @@ pub fn apply_modifiers(mut str: TypeString, modifiers: &[Modifier]) -> Result { + Modifier::MwPointerTo | Modifier::PointerTo => { if !has_pointer && !str.suffix.is_empty() { if str.member.is_empty() { str.prefix.push_str(" (*"); @@ -1166,13 +1262,18 @@ fn ptr_to_member_type_string( }) } -pub fn ud_type_def(info: &DwarfInfo, typedefs: &TypedefMap, t: &UserDefinedType) -> Result { +pub fn ud_type_def( + info: &DwarfInfo, + typedefs: &TypedefMap, + t: &UserDefinedType, + is_erased: bool, +) -> Result { match t { UserDefinedType::Array(t) => { let ts = array_type_string(info, typedefs, t, false)?; Ok(format!("// Array: {}{}", ts.prefix, ts.suffix)) } - UserDefinedType::Subroutine(t) => Ok(subroutine_def_string(info, typedefs, t)?), + UserDefinedType::Subroutine(t) => Ok(subroutine_def_string(info, typedefs, t, is_erased)?), UserDefinedType::Structure(t) => Ok(struct_def_string(info, typedefs, t)?), UserDefinedType::Enumeration(t) => Ok(enum_def_string(t)?), UserDefinedType::Union(t) => Ok(union_def_string(info, typedefs, t)?), @@ -1233,9 +1334,12 @@ pub fn subroutine_def_string( info: &DwarfInfo, typedefs: &TypedefMap, t: &SubroutineType, + is_erased: bool, ) -> Result { let mut out = String::new(); - if let (Some(start), Some(end)) = (t.start_address, t.end_address) { + if is_erased { + out.push_str("// Erased\n"); + } else if let (Some(start), Some(end)) = (t.start_address, t.end_address) { writeln!(out, "// Range: {:#X} -> {:#X}", start, end)?; } let rt = type_string(info, typedefs, &t.return_type, true)?; @@ -1922,9 +2026,9 @@ fn process_array_tag(info: &DwarfInfo, tag: &Tag) -> Result { (AttributeKind::Sibling, _) => {} (AttributeKind::SubscrData, AttributeValue::Block(data)) => { subscr_data = - Some(process_array_subscript_data(data, info.e).with_context(|| { - format!("Failed to process SubscrData for tag: {:?}", tag) - })?) + Some(process_array_subscript_data(data, info.e, tag.is_erased).with_context( + || format!("Failed to process SubscrData for tag: {:?}", tag), + )?) } (AttributeKind::Ordering, val) => match val { AttributeValue::Data2(d2) => { @@ -1950,7 +2054,11 @@ fn process_array_tag(info: &DwarfInfo, tag: &Tag) -> Result { Ok(ArrayType { element_type: Box::from(element_type), dimensions }) } -fn process_array_subscript_data(data: &[u8], e: Endian) -> Result<(Type, Vec)> { +fn process_array_subscript_data( + data: &[u8], + e: Endian, + is_erased: bool, +) -> Result<(Type, Vec)> { let mut element_type = None; let mut dimensions = Vec::new(); let mut data = data; @@ -1990,7 +2098,9 @@ fn process_array_subscript_data(data: &[u8], e: Endian) -> Result<(Type, Vec { let mut cursor = Cursor::new(data); - let type_attr = read_attribute(&mut cursor, e)?; + // TODO: is this the right endianness to use for erased tags? + let type_attr = + read_attribute(&mut cursor, if is_erased { Endian::Little } else { e }, e)?; element_type = Some(process_type(&type_attr, e)?); data = &data[cursor.position() as usize..]; } @@ -2366,7 +2476,10 @@ fn process_subroutine_parameter_tag(info: &DwarfInfo, tag: &Tag) -> Result kind = Some(process_type(attr, info.e)?), (AttributeKind::Location, AttributeValue::Block(block)) => { - location = Some(process_variable_location(block, info.e)?) + location = Some(process_variable_location( + block, + if tag.is_erased { Endian::Little } else { info.e }, + )?) } (AttributeKind::MwDwarf2Location, AttributeValue::Block(_block)) => { // TODO? @@ -2416,7 +2529,10 @@ fn process_local_variable_tag(info: &DwarfInfo, tag: &Tag) -> Result kind = Some(process_type(attr, info.e)?), (AttributeKind::Location, AttributeValue::Block(block)) => { if !block.is_empty() { - location = Some(process_variable_location(block, info.e)?); + location = Some(process_variable_location( + block, + if tag.is_erased { Endian::Little } else { info.e }, + )?); } } (AttributeKind::MwDwarf2Location, AttributeValue::Block(_block)) => { @@ -2505,7 +2621,7 @@ pub fn ud_type(info: &DwarfInfo, tag: &Tag) -> Result { pub fn process_modifiers(block: &[u8]) -> Result> { let mut out = Vec::with_capacity(block.len()); for &b in block { - out.push(Modifier::try_from(b)?); + out.push(Modifier::parse_int(b)?); } Ok(out) } @@ -2513,14 +2629,14 @@ pub fn process_modifiers(block: &[u8]) -> Result> { pub fn process_type(attr: &Attribute, e: Endian) -> Result { match (attr.kind, &attr.value) { (AttributeKind::FundType, &AttributeValue::Data2(type_id)) => { - let fund_type = FundType::try_from(type_id) - .with_context(|| format!("Invalid fundamental type ID '{}'", type_id))?; + let fund_type = FundType::parse_int(type_id) + .with_context(|| format!("Invalid fundamental type ID '{:04X}'", type_id))?; Ok(Type { kind: TypeKind::Fundamental(fund_type), modifiers: vec![] }) } (AttributeKind::ModFundType, AttributeValue::Block(ops)) => { let type_id = u16::from_bytes(ops[ops.len() - 2..].try_into()?, e); - let fund_type = FundType::try_from(type_id) - .with_context(|| format!("Invalid fundamental type ID '{}'", type_id))?; + let fund_type = FundType::parse_int(type_id) + .with_context(|| format!("Invalid fundamental type ID '{:04X}'", type_id))?; let modifiers = process_modifiers(&ops[..ops.len() - 2])?; Ok(Type { kind: TypeKind::Fundamental(fund_type), modifiers }) } @@ -2639,9 +2755,9 @@ pub fn process_cu_tag(info: &DwarfInfo, tag: &Tag) -> Result { } /// Logic to skip uninteresting tags -pub fn should_skip_tag(tag_type: &TagType) -> bool { +pub fn should_skip_tag(tag_type: &TagType, is_erased: bool) -> bool { match tag_type { - TagType::Variable(_) => false, + TagType::Variable(_) => is_erased, TagType::Typedef(_) => false, TagType::UserDefined(t) => !t.is_definition(), } @@ -2651,12 +2767,13 @@ pub fn tag_type_string( info: &DwarfInfo, typedefs: &TypedefMap, tag_type: &TagType, + is_erased: bool, ) -> Result { match tag_type { TagType::Typedef(t) => typedef_string(info, typedefs, t), TagType::Variable(v) => variable_string(info, typedefs, v, true), TagType::UserDefined(ud) => { - let ud_str = ud_type_def(info, typedefs, ud)?; + let ud_str = ud_type_def(info, typedefs, ud, is_erased)?; match ud { UserDefinedType::Structure(_) | UserDefinedType::Enumeration(_)