From d969819b78e8febb2dabb2edc30bb56f64f17946 Mon Sep 17 00:00:00 2001 From: cadmic Date: Mon, 9 Jun 2025 21:44:39 -0700 Subject: [PATCH] Guess endianness of "erased" DWARF info (#104) --- README.md | 2 ++ src/util/dwarf.rs | 63 ++++++++++++++++++++++++++-------------------- src/util/reader.rs | 9 +++++++ 3 files changed, 47 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index 0922424..043d354 100644 --- a/README.md +++ b/README.md @@ -297,6 +297,8 @@ Dumps DWARF 1.1 information from an ELF file. (Does **not** support DWARF 2+) ```shell $ dtk dwarf dump input.elf +# or, to include data that was stripped by MWLD +$ dtk dwarf dump input.elf --include-erased ``` ### elf disasm diff --git a/src/util/dwarf.rs b/src/util/dwarf.rs index 69d6b85..e82816e 100644 --- a/src/util/dwarf.rs +++ b/src/util/dwarf.rs @@ -358,6 +358,7 @@ pub struct Tag { pub kind: TagKind, pub is_erased: bool, // Tag was deleted but has been reconstructed pub is_erased_root: bool, // Tag is erased and is the root of a tree of erased tags + pub data_endian: Endian, // Endianness of the tag data (could be different from the address endianness for erased tags) pub attributes: Vec, } @@ -554,6 +555,7 @@ where kind: TagKind::Padding, is_erased, is_erased_root: false, + data_endian, attributes: Vec::new(), }); return Ok(tags); @@ -563,26 +565,42 @@ where let tag = TagKind::try_from(tag_num).context("Unknown DWARF tag type")?; if tag == TagKind::Padding { if include_erased { - // Erased entries that have become padding are little-endian, and we - // have to guess the length and tag of the first entry. We assume - // the entry is either a variable or a function, and read until we - // find the high_pc attribute. Only MwGlobalRef will follow, and - // these are unlikely to be confused with the length of the next - // entry. + // Erased entries that have become padding could be either + // little-endian or big-endian, and we have to guess the length and + // tag of the first entry. We assume the entry is either a variable + // or a function, and read until we find the high_pc attribute. Only + // MwGlobalRef will follow, and these are unlikely to be confused + // with the length of the next entry. let mut attributes = Vec::new(); let mut is_function = false; + + // Guess endianness based on first attribute + let data_endian = if is_erased { + data_endian + } else { + // Peek next two bytes + let mut buf = [0u8; 2]; + reader.read_exact(&mut buf)?; + let attr_tag = u16::from_reader(&mut Cursor::new(&buf), data_endian)?; + reader.seek(SeekFrom::Current(-2))?; + match AttributeKind::try_from(attr_tag) { + Ok(_) => data_endian, + Err(_) => data_endian.flip(), + } + }; + while reader.stream_position()? < position + size as u64 { // Peek next two bytes let mut buf = [0u8; 2]; reader.read_exact(&mut buf)?; - let attr_tag = u16::from_reader(&mut Cursor::new(&buf), Endian::Little)?; + let attr_tag = u16::from_reader(&mut Cursor::new(&buf), data_endian)?; reader.seek(SeekFrom::Current(-2))?; if is_function && attr_tag != AttributeKind::MwGlobalRef as u16 { break; } - let attr = read_attribute(reader, Endian::Little, addr_endian)?; + let attr = read_attribute(reader, data_endian, addr_endian)?; if attr.kind == AttributeKind::HighPc { is_function = true; } @@ -594,12 +612,13 @@ where kind, is_erased: true, is_erased_root: true, + data_endian, attributes, }); // Read the rest of the tags while reader.stream_position()? < position + size as u64 { - for tag in read_tags(reader, Endian::Little, addr_endian, include_erased, true)? { + for tag in read_tags(reader, data_endian, addr_endian, include_erased, true)? { tags.push(tag); } } @@ -616,6 +635,7 @@ where kind: tag, is_erased, is_erased_root: false, + data_endian, attributes, }); } @@ -2028,9 +2048,9 @@ fn process_array_tag(info: &DwarfInfo, tag: &Tag) -> Result { (AttributeKind::Sibling, _) => {} (AttributeKind::SubscrData, AttributeValue::Block(data)) => { subscr_data = - Some(process_array_subscript_data(data, info.e, tag.is_erased).with_context( - || format!("Failed to process SubscrData for tag: {tag:?}"), - )?) + Some(process_array_subscript_data(data, info.e).with_context(|| { + format!("Failed to process SubscrData for tag: {tag:?}") + })?) } (AttributeKind::Ordering, val) => match val { AttributeValue::Data2(d2) => { @@ -2056,11 +2076,7 @@ fn process_array_tag(info: &DwarfInfo, tag: &Tag) -> Result { Ok(ArrayType { element_type: Box::from(element_type), dimensions }) } -fn process_array_subscript_data( - data: &[u8], - e: Endian, - is_erased: bool, -) -> Result<(Type, Vec)> { +fn process_array_subscript_data(data: &[u8], e: Endian) -> Result<(Type, Vec)> { let mut element_type = None; let mut dimensions = Vec::new(); let mut data = data; @@ -2101,8 +2117,7 @@ fn process_array_subscript_data( SubscriptFormat::ElementType => { let mut cursor = Cursor::new(data); // TODO: is this the right endianness to use for erased tags? - let type_attr = - read_attribute(&mut cursor, if is_erased { Endian::Little } else { e }, e)?; + let type_attr = read_attribute(&mut cursor, e, e)?; element_type = Some(process_type(&type_attr, e)?); data = &data[cursor.position() as usize..]; } @@ -2456,10 +2471,7 @@ fn process_subroutine_parameter_tag(info: &DwarfInfo, tag: &Tag) -> Result kind = Some(process_type(attr, info.e)?), (AttributeKind::Location, AttributeValue::Block(block)) => { if !block.is_empty() { - location = Some(process_variable_location( - block, - if tag.is_erased { Endian::Little } else { info.e }, - )?); + location = Some(process_variable_location(block, tag.data_endian)?); } } (AttributeKind::MwDwarf2Location, AttributeValue::Block(_block)) => { @@ -2514,10 +2526,7 @@ fn process_local_variable_tag(info: &DwarfInfo, tag: &Tag) -> Result kind = Some(process_type(attr, info.e)?), (AttributeKind::Location, AttributeValue::Block(block)) => { if !block.is_empty() { - location = Some(process_variable_location( - block, - if tag.is_erased { Endian::Little } else { info.e }, - )?); + location = Some(process_variable_location(block, tag.data_endian)?); } } (AttributeKind::MwDwarf2Location, AttributeValue::Block(_block)) => { diff --git a/src/util/reader.rs b/src/util/reader.rs index ca3a61b..7d30c7d 100644 --- a/src/util/reader.rs +++ b/src/util/reader.rs @@ -20,6 +20,15 @@ impl From for Endian { } } +impl Endian { + pub fn flip(self) -> Self { + match self { + Endian::Big => Endian::Little, + Endian::Little => Endian::Big, + } + } +} + pub const DYNAMIC_SIZE: usize = 0; pub const fn struct_size(fields: [usize; N]) -> usize {