mirror of
https://github.com/encounter/decomp-toolkit.git
synced 2025-06-10 00:23:28 +00:00
364 lines
12 KiB
Rust
364 lines
12 KiB
Rust
use std::{
|
|
io::{BufRead, Write},
|
|
num::ParseIntError,
|
|
str::FromStr,
|
|
};
|
|
|
|
use anyhow::{anyhow, bail, ensure, Result};
|
|
use cwdemangle::{demangle, DemangleOptions};
|
|
use once_cell::sync::Lazy;
|
|
use regex::Regex;
|
|
|
|
use crate::{
|
|
obj::{
|
|
ObjDataKind, ObjInfo, ObjSplit, ObjSymbol, ObjSymbolFlagSet, ObjSymbolFlags, ObjSymbolKind,
|
|
},
|
|
util::nested::NestedVec,
|
|
};
|
|
|
|
fn parse_hex(s: &str) -> Result<u32, ParseIntError> {
|
|
u32::from_str_radix(s.trim_start_matches("0x"), 16)
|
|
}
|
|
|
|
pub fn parse_symbol_line(line: &str, obj: &mut ObjInfo) -> Result<Option<ObjSymbol>> {
|
|
static SYMBOL_LINE: Lazy<Regex> = Lazy::new(|| {
|
|
Regex::new(
|
|
"^\\s*(?P<name>[^\\s=]+)\\s*=\\s*(?:(?P<section>[A-Za-z0-9.]+):)?(?P<addr>[0-9A-Fa-fXx]+);(?:\\s*//\\s*(?P<attrs>.*))?$",
|
|
)
|
|
.unwrap()
|
|
});
|
|
static COMMENT_LINE: Lazy<Regex> = Lazy::new(|| Regex::new("^\\s*(?://|#).*$").unwrap());
|
|
|
|
if let Some(captures) = SYMBOL_LINE.captures(line) {
|
|
let name = captures["name"].to_string();
|
|
let addr = parse_hex(&captures["addr"])?;
|
|
let demangled_name = demangle(&name, &DemangleOptions::default());
|
|
let mut symbol = ObjSymbol {
|
|
name,
|
|
demangled_name,
|
|
address: addr as u64,
|
|
section: obj.section_at(addr).ok().map(|section| section.index),
|
|
size: 0,
|
|
size_known: false,
|
|
flags: Default::default(),
|
|
kind: ObjSymbolKind::Unknown,
|
|
align: None,
|
|
data_kind: Default::default(),
|
|
};
|
|
let attrs = captures["attrs"].split(' ');
|
|
for attr in attrs {
|
|
if let Some((name, value)) = attr.split_once(':') {
|
|
match name {
|
|
"type" => {
|
|
symbol.kind = symbol_kind_from_str(value)
|
|
.ok_or_else(|| anyhow!("Unknown symbol type '{}'", value))?;
|
|
}
|
|
"size" => {
|
|
symbol.size = parse_hex(value)? as u64;
|
|
symbol.size_known = true;
|
|
}
|
|
"scope" => {
|
|
symbol.flags.0 |= symbol_flags_from_str(value)
|
|
.ok_or_else(|| anyhow!("Unknown symbol scope '{}'", value))?;
|
|
}
|
|
"align" => {
|
|
symbol.align = Some(parse_hex(value)?);
|
|
}
|
|
"data" => {
|
|
symbol.data_kind = symbol_data_kind_from_str(value)
|
|
.ok_or_else(|| anyhow!("Unknown symbol data type '{}'", value))?;
|
|
}
|
|
_ => bail!("Unknown symbol attribute '{name}'"),
|
|
}
|
|
} else {
|
|
match attr {
|
|
"hidden" => {
|
|
symbol.flags.0 |= ObjSymbolFlags::Hidden;
|
|
}
|
|
"noreloc" => {
|
|
ensure!(
|
|
symbol.size != 0,
|
|
"Symbol {} requires size != 0 with noreloc",
|
|
symbol.name
|
|
);
|
|
obj.blocked_ranges.insert(addr, addr + symbol.size as u32);
|
|
}
|
|
_ => bail!("Unknown symbol attribute '{attr}'"),
|
|
}
|
|
}
|
|
}
|
|
Ok(Some(symbol))
|
|
} else if line.is_empty() || COMMENT_LINE.is_match(line) {
|
|
Ok(None)
|
|
} else {
|
|
Err(anyhow!("Failed to parse symbol line '{line}'"))
|
|
}
|
|
}
|
|
|
|
fn is_skip_symbol(symbol: &ObjSymbol) -> bool {
|
|
let _ = symbol;
|
|
// symbol.name.starts_with("lbl_")
|
|
// || symbol.name.starts_with("func_")
|
|
// || symbol.name.starts_with("switch_")
|
|
// || symbol.name.starts_with("float_")
|
|
// || symbol.name.starts_with("double_")
|
|
false
|
|
}
|
|
|
|
pub fn write_symbols<W: Write>(w: &mut W, obj: &ObjInfo) -> Result<()> {
|
|
for (_, symbol) in obj.symbols.iter_ordered() {
|
|
if symbol.kind == ObjSymbolKind::Section
|
|
// Ignore absolute symbols for now (usually linker-generated)
|
|
|| symbol.section.is_none()
|
|
|| is_skip_symbol(symbol)
|
|
{
|
|
continue;
|
|
}
|
|
write_symbol(w, obj, symbol)?;
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
fn write_symbol<W: Write>(w: &mut W, obj: &ObjInfo, symbol: &ObjSymbol) -> Result<()> {
|
|
// if let Some(demangled_name) = &symbol.demangled_name {
|
|
// writeln!(w, "// {demangled_name}")?;
|
|
// }
|
|
write!(w, "{} = ", symbol.name)?;
|
|
let section = symbol.section.and_then(|idx| obj.sections.get(idx));
|
|
if let Some(section) = section {
|
|
write!(w, "{}:", section.name)?;
|
|
}
|
|
write!(w, "{:#010X}; //", symbol.address)?;
|
|
write!(w, " type:{}", symbol_kind_to_str(symbol.kind))?;
|
|
// if let Some(section) = section {
|
|
// match section.kind {
|
|
// ObjSectionKind::Code => {
|
|
// write!(w, " type:function")?;
|
|
// }
|
|
// ObjSectionKind::Data | ObjSectionKind::ReadOnlyData | ObjSectionKind::Bss => {
|
|
// write!(w, " type:object")?;
|
|
// }
|
|
// }
|
|
// }
|
|
if symbol.size_known && symbol.size > 0 {
|
|
write!(w, " size:{:#X}", symbol.size)?;
|
|
}
|
|
if let Some(scope) = symbol_flags_to_str(symbol.flags) {
|
|
write!(w, " scope:{scope}")?;
|
|
}
|
|
if let Some(align) = symbol.align {
|
|
write!(w, " align:{align:#X}")?;
|
|
}
|
|
if let Some(kind) = symbol_data_kind_to_str(symbol.data_kind) {
|
|
write!(w, " data:{kind}")?;
|
|
}
|
|
if symbol.flags.is_hidden() {
|
|
write!(w, " hidden")?;
|
|
}
|
|
if obj.blocked_ranges.contains_key(&(symbol.address as u32)) {
|
|
write!(w, " noreloc")?;
|
|
}
|
|
writeln!(w)?;
|
|
Ok(())
|
|
}
|
|
|
|
#[inline]
|
|
fn symbol_kind_to_str(kind: ObjSymbolKind) -> &'static str {
|
|
match kind {
|
|
ObjSymbolKind::Unknown => "label",
|
|
ObjSymbolKind::Function => "function",
|
|
ObjSymbolKind::Object => "object",
|
|
ObjSymbolKind::Section => "section",
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
fn symbol_data_kind_to_str(kind: ObjDataKind) -> Option<&'static str> {
|
|
match kind {
|
|
ObjDataKind::Unknown => None,
|
|
ObjDataKind::Byte => Some("byte"),
|
|
ObjDataKind::Byte2 => Some("2byte"),
|
|
ObjDataKind::Byte4 => Some("4byte"),
|
|
ObjDataKind::Byte8 => Some("8byte"),
|
|
ObjDataKind::Float => Some("float"),
|
|
ObjDataKind::Double => Some("double"),
|
|
ObjDataKind::String => Some("string"),
|
|
ObjDataKind::String16 => Some("wstring"),
|
|
ObjDataKind::StringTable => Some("string_table"),
|
|
ObjDataKind::String16Table => Some("wstring_table"),
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
fn symbol_kind_from_str(s: &str) -> Option<ObjSymbolKind> {
|
|
match s {
|
|
"label" => Some(ObjSymbolKind::Unknown),
|
|
"function" => Some(ObjSymbolKind::Function),
|
|
"object" => Some(ObjSymbolKind::Object),
|
|
"section" => Some(ObjSymbolKind::Section),
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
fn symbol_flags_to_str(flags: ObjSymbolFlagSet) -> Option<&'static str> {
|
|
if flags.0.contains(ObjSymbolFlags::Common) {
|
|
Some("common")
|
|
} else if flags.0.contains(ObjSymbolFlags::Weak) {
|
|
Some("weak")
|
|
} else if flags.0.contains(ObjSymbolFlags::Global) {
|
|
Some("global")
|
|
} else if flags.0.contains(ObjSymbolFlags::Local) {
|
|
Some("local")
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
fn symbol_flags_from_str(s: &str) -> Option<ObjSymbolFlags> {
|
|
match s {
|
|
"common" => Some(ObjSymbolFlags::Common),
|
|
"weak" => Some(ObjSymbolFlags::Weak),
|
|
"global" => Some(ObjSymbolFlags::Global),
|
|
"local" => Some(ObjSymbolFlags::Local),
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
fn symbol_data_kind_from_str(s: &str) -> Option<ObjDataKind> {
|
|
match s {
|
|
"byte" => Some(ObjDataKind::Byte),
|
|
"2byte" => Some(ObjDataKind::Byte2),
|
|
"4byte" => Some(ObjDataKind::Byte4),
|
|
"8byte" => Some(ObjDataKind::Byte8),
|
|
"float" => Some(ObjDataKind::Float),
|
|
"double" => Some(ObjDataKind::Double),
|
|
"string" => Some(ObjDataKind::String),
|
|
"wstring" => Some(ObjDataKind::String16),
|
|
"string_table" => Some(ObjDataKind::StringTable),
|
|
"wstring_table" => Some(ObjDataKind::String16Table),
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
pub fn write_splits<W: Write>(w: &mut W, obj: &ObjInfo) -> Result<()> {
|
|
for unit in &obj.link_order {
|
|
writeln!(w, "{}:", unit)?;
|
|
let mut split_iter = obj.splits_for_range(..).peekable();
|
|
while let Some((addr, split)) = split_iter.next() {
|
|
if &split.unit != unit {
|
|
continue;
|
|
}
|
|
let end = if split.end > 0 {
|
|
split.end
|
|
} else {
|
|
split_iter.peek().map(|&(addr, _)| addr).unwrap_or(0)
|
|
};
|
|
let section = obj.section_at(addr)?;
|
|
writeln!(w, "\t{:<11} start:{:#010X} end:{:#010X}", section.name, addr, end)?;
|
|
// align:{}
|
|
}
|
|
writeln!(w)?;
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
enum SplitLine {
|
|
Unit { name: String },
|
|
Section { name: String, start: u32, end: u32, align: Option<u32>, common: bool },
|
|
None,
|
|
}
|
|
|
|
fn parse_split_line(line: &str) -> Result<SplitLine> {
|
|
static UNIT_LINE: Lazy<Regex> =
|
|
Lazy::new(|| Regex::new("^\\s*(?P<name>[^\\s:]+)\\s*:\\s*$").unwrap());
|
|
static SECTION_LINE: Lazy<Regex> =
|
|
Lazy::new(|| Regex::new("^\\s*(?P<name>\\S+)\\s*(?P<attrs>.*)$").unwrap());
|
|
static COMMENT_LINE: Lazy<Regex> = Lazy::new(|| Regex::new("^\\s*(?://|#).*$").unwrap());
|
|
|
|
if line.is_empty() || COMMENT_LINE.is_match(line) {
|
|
Ok(SplitLine::None)
|
|
} else if let Some(captures) = UNIT_LINE.captures(line) {
|
|
let name = captures["name"].to_string();
|
|
Ok(SplitLine::Unit { name })
|
|
} else if let Some(captures) = SECTION_LINE.captures(line) {
|
|
let mut name = captures["name"].to_string();
|
|
let mut start: Option<u32> = None;
|
|
let mut end: Option<u32> = None;
|
|
let mut align: Option<u32> = None;
|
|
let mut common = false;
|
|
|
|
let attrs = captures["attrs"].split(' ');
|
|
for attr in attrs {
|
|
if let Some((attr, value)) = attr.split_once(':') {
|
|
match attr {
|
|
"start" => {
|
|
start = Some(parse_hex(value)?);
|
|
}
|
|
"end" => {
|
|
end = Some(parse_hex(value)?);
|
|
}
|
|
"align" => align = Some(u32::from_str(value)?),
|
|
"rename" => name = value.to_string(),
|
|
_ => bail!("Unknown split attribute '{name}'"),
|
|
}
|
|
} else {
|
|
match attr {
|
|
"common" => {
|
|
common = true;
|
|
if align.is_none() {
|
|
align = Some(4);
|
|
}
|
|
}
|
|
_ => bail!("Unknown split attribute '{attr}'"),
|
|
}
|
|
}
|
|
}
|
|
if let (Some(start), Some(end)) = (start, end) {
|
|
Ok(SplitLine::Section { name, start, end, align, common })
|
|
} else {
|
|
Err(anyhow!("Missing split attribute: '{line}'"))
|
|
}
|
|
} else {
|
|
Err(anyhow!("Failed to parse split line: '{line}'"))
|
|
}
|
|
}
|
|
|
|
pub fn apply_splits<R: BufRead>(r: R, obj: &mut ObjInfo) -> Result<()> {
|
|
enum SplitState {
|
|
None,
|
|
Unit(String),
|
|
}
|
|
let mut state = SplitState::None;
|
|
for result in r.lines() {
|
|
let line = match result {
|
|
Ok(line) => line,
|
|
Err(e) => return Err(e.into()),
|
|
};
|
|
let split_line = parse_split_line(&line)?;
|
|
match (&mut state, split_line) {
|
|
(SplitState::None | SplitState::Unit(_), SplitLine::Unit { name }) => {
|
|
obj.link_order.push(name.clone());
|
|
state = SplitState::Unit(name);
|
|
}
|
|
(SplitState::None, SplitLine::Section { name, .. }) => {
|
|
bail!("Section {} defined outside of unit", name);
|
|
}
|
|
(SplitState::Unit(unit), SplitLine::Section { name, start, end, align, common }) => {
|
|
obj.splits.nested_push(start, ObjSplit {
|
|
unit: unit.clone(),
|
|
end,
|
|
align,
|
|
common,
|
|
autogenerated: false,
|
|
});
|
|
obj.named_sections.insert(start, name);
|
|
}
|
|
_ => {}
|
|
}
|
|
}
|
|
Ok(())
|
|
}
|