Luke Street b184fee73f Migrate SectionIndex/SymbolIndex to u32
This halves the size of structs like SectionAddress.
2024-10-04 20:40:50 -06:00

164 lines
6.5 KiB
Rust

use anyhow::Result;
use crate::{
obj::{ObjDataKind, ObjInfo, ObjSectionKind, ObjSymbolKind, SymbolIndex},
util::split::is_linker_generated_label,
};
pub fn detect_objects(obj: &mut ObjInfo) -> Result<()> {
for (section_index, section) in
obj.sections.iter_mut().filter(|(_, s)| s.kind != ObjSectionKind::Code)
{
let section_end = (section.address + section.size) as u32;
let mut replace_symbols = vec![];
for (idx, symbol) in obj.symbols.for_section(section_index) {
let mut symbol = symbol.clone();
if is_linker_generated_label(&symbol.name) || symbol.name.starts_with("..") {
continue;
}
let expected_size = match symbol.data_kind {
ObjDataKind::Byte => 1,
ObjDataKind::Byte2 | ObjDataKind::Short => 2,
ObjDataKind::Byte4 | ObjDataKind::Float | ObjDataKind::Int => 4,
ObjDataKind::Byte8 | ObjDataKind::Double => 8,
_ => 0,
};
if !symbol.size_known {
let next_addr = obj
.symbols
.for_section_range(section_index, symbol.address as u32 + 1..)
.next()
.map_or(section_end, |(_, symbol)| symbol.address as u32);
let new_size = next_addr - symbol.address as u32;
log::debug!("Guessed {} size {:#X}", symbol.name, new_size);
symbol.size = match (new_size, expected_size) {
(..=4, 1) => expected_size,
(2 | 4, 2) => expected_size,
(..=8, 1 | 2 | 4) => {
// alignment to double
if obj.symbols.at_section_address(section_index, next_addr).any(|(_, sym)| sym.data_kind == ObjDataKind::Double)
// If we're at a TU boundary, we can assume it's just padding
|| section.splits.has_split_at(symbol.address as u32 + new_size)
{
expected_size
} else {
new_size
}
}
_ => new_size,
} as u64;
symbol.size_known = true;
}
symbol.kind = ObjSymbolKind::Object;
if expected_size > 1 && symbol.size as u32 % expected_size != 0 {
symbol.data_kind = ObjDataKind::Unknown;
}
replace_symbols.push((idx, symbol));
}
for (idx, symbol) in replace_symbols {
obj.symbols.replace(idx, symbol)?;
}
}
Ok(())
}
pub fn detect_strings(obj: &mut ObjInfo) -> Result<()> {
let mut symbols_set = Vec::<(SymbolIndex, ObjDataKind, usize)>::new();
for (section_index, section) in obj
.sections
.iter()
.filter(|(_, s)| matches!(s.kind, ObjSectionKind::Data | ObjSectionKind::ReadOnlyData))
{
enum StringResult {
None,
String { length: usize, terminated: bool },
WString { length: usize, str: String },
}
pub const fn trim_zeroes_end(mut bytes: &[u8]) -> &[u8] {
while let [rest @ .., last] = bytes {
if *last == 0 {
bytes = rest;
} else {
break;
}
}
bytes
}
fn is_string(data: &[u8]) -> StringResult {
let bytes = trim_zeroes_end(data);
if bytes.is_empty() {
return StringResult::None;
}
if bytes.iter().all(|&c| c.is_ascii_graphic() || c.is_ascii_whitespace()) {
return StringResult::String {
length: bytes.len(),
terminated: data.len() > bytes.len(),
};
}
if bytes.len() % 2 == 0 && data.len() >= bytes.len() + 2 {
// Found at least 2 bytes of trailing 0s, check UTF-16
let mut ok = true;
let mut str = String::new();
for n in std::char::decode_utf16(
bytes.chunks_exact(2).map(|c| u16::from_be_bytes(c.try_into().unwrap())),
) {
match n {
Ok(c) if c.is_ascii_graphic() || c.is_ascii_whitespace() => {
str.push(c);
}
_ => {
ok = false;
break;
}
}
}
if ok {
return StringResult::WString { length: bytes.len(), str };
}
}
StringResult::None
}
for (symbol_idx, symbol) in obj
.symbols
.for_section(section_index)
.filter(|(_, sym)| sym.data_kind == ObjDataKind::Unknown)
{
if symbol.name.starts_with("@stringBase") {
symbols_set.push((symbol_idx, ObjDataKind::StringTable, symbol.size as usize));
continue;
}
let data = section.symbol_data(symbol)?;
match is_string(data) {
StringResult::None => {}
StringResult::String { length, terminated } => {
let size = if terminated { length + 1 } else { length };
if !symbol.size_known || symbol.size == size as u64 {
let str = String::from_utf8_lossy(&data[..length]);
log::debug!("Found string '{}' @ {}", str, symbol.name);
symbols_set.push((symbol_idx, ObjDataKind::String, size));
}
}
StringResult::WString { length, str } => {
let size = length + 2;
if !symbol.size_known || symbol.size == size as u64 {
log::debug!("Found wide string '{}' @ {}", str, symbol.name);
symbols_set.push((symbol_idx, ObjDataKind::String16, size));
}
}
}
}
}
for (symbol_idx, data_kind, size) in symbols_set {
let mut symbol = obj.symbols[symbol_idx].clone();
log::debug!("Setting {} ({:#010X}) to size {:#X}", symbol.name, symbol.address, size);
symbol.data_kind = data_kind;
symbol.size = size as u64;
symbol.size_known = true;
obj.symbols.replace(symbol_idx, symbol)?;
}
Ok(())
}