Luke Street d9e1ae2777 MW 2.7 LD compatibility fixes & more
- More robust .comment section handling
- Auto-create .comment section for objects with common symbols (MW 2.7+ hack)
- Support loading REL modules in `dol split` (currently only for references)
- Add `dol diff` for quick diffing between linked ELF and expected symbols
- Add `dol apply` for applying linked ELF symbols to symbol config file
2023-08-08 23:34:00 -04:00

788 lines
27 KiB
Rust

use std::{
collections::{btree_map::Entry, hash_map, BTreeMap, HashMap},
fs,
fs::{DirBuilder, File},
io::Write,
path::{Path, PathBuf},
};
use anyhow::{anyhow, bail, Context, Result};
use argp::FromArgs;
use serde::{Deserialize, Serialize};
use crate::{
analysis::{
cfa::AnalyzerState,
objects::{detect_object_boundaries, detect_strings},
pass::{AnalysisPass, FindSaveRestSleds, FindTRKInterruptVectorTable},
signatures::{apply_signatures, apply_signatures_post},
tracker::Tracker,
},
obj::{
split::{is_linker_generated_object, split_obj, update_splits},
ObjDataKind, ObjInfo, ObjRelocKind, ObjSectionKind, ObjSymbol, ObjSymbolFlagSet,
ObjSymbolFlags, ObjSymbolKind, ObjSymbolScope, SymbolIndex,
},
util::{
asm::write_asm,
comment::MWComment,
config::{apply_splits, apply_symbols_file, write_splits_file, write_symbols_file},
dep::DepFile,
dol::process_dol,
elf::{process_elf, write_elf},
file::{buf_writer, map_file, map_reader, touch},
lcf::{asm_path_for_unit, generate_ldscript, obj_path_for_unit},
map::apply_map_file,
rel::process_rel,
},
};
#[derive(FromArgs, PartialEq, Debug)]
/// Commands for processing DOL files.
#[argp(subcommand, name = "dol")]
pub struct Args {
#[argp(subcommand)]
command: SubCommand,
}
#[derive(FromArgs, PartialEq, Debug)]
#[argp(subcommand)]
enum SubCommand {
Info(InfoArgs),
Split(SplitArgs),
Diff(DiffArgs),
Apply(ApplyArgs),
}
#[derive(FromArgs, PartialEq, Eq, Debug)]
/// Views DOL file information.
#[argp(subcommand, name = "info")]
pub struct InfoArgs {
#[argp(positional)]
/// DOL file
dol_file: PathBuf,
}
#[derive(FromArgs, PartialEq, Eq, Debug)]
/// Splits a DOL into relocatable objects.
#[argp(subcommand, name = "split")]
pub struct SplitArgs {
#[argp(positional)]
/// input configuration file
config: PathBuf,
#[argp(positional)]
/// output directory
out_dir: PathBuf,
#[argp(switch)]
/// skip updating splits & symbol files (for build systems)
no_update: bool,
}
#[derive(FromArgs, PartialEq, Eq, Debug)]
/// Diffs symbols in a linked ELF.
#[argp(subcommand, name = "diff")]
pub struct DiffArgs {
#[argp(positional)]
/// input configuration file
config: PathBuf,
#[argp(positional)]
/// linked ELF
elf_file: PathBuf,
#[argp(positional)]
/// map file
map_file: PathBuf,
}
#[derive(FromArgs, PartialEq, Eq, Debug)]
/// Applies updated symbols from a linked ELF to the project configuration.
#[argp(subcommand, name = "apply")]
pub struct ApplyArgs {
#[argp(positional)]
/// input configuration file
config: PathBuf,
#[argp(positional)]
/// linked ELF
elf_file: PathBuf,
#[argp(positional)]
/// map file
map_file: PathBuf,
}
#[inline]
fn bool_true() -> bool { true }
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct ProjectConfig {
pub object: PathBuf,
pub splits: Option<PathBuf>,
pub symbols: Option<PathBuf>,
/// Version of the MW `.comment` section format.
/// If not present, no `.comment` sections will be written.
pub mw_comment_version: Option<u8>,
pub modules: Vec<ModuleConfig>,
// Analysis options
#[serde(default = "bool_true")]
pub detect_objects: bool,
#[serde(default = "bool_true")]
pub detect_strings: bool,
#[serde(default = "bool_true")]
pub write_asm: bool,
#[serde(default = "bool_true")]
pub auto_force_files: bool,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct ModuleConfig {
pub object: PathBuf,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct OutputUnit {
pub object: PathBuf,
pub name: String,
pub autogenerated: bool,
}
#[derive(Serialize, Deserialize, Debug, Clone, Default)]
pub struct OutputConfig {
pub units: Vec<OutputUnit>,
}
pub fn run(args: Args) -> Result<()> {
match args.command {
SubCommand::Info(c_args) => info(c_args),
SubCommand::Split(c_args) => split(c_args),
SubCommand::Diff(c_args) => diff(c_args),
SubCommand::Apply(c_args) => apply(c_args),
}
}
fn info(args: InfoArgs) -> Result<()> {
let mut obj = process_dol(&args.dol_file)?;
apply_signatures(&mut obj)?;
let mut state = AnalyzerState::default();
state.detect_functions(&obj)?;
log::info!("Discovered {} functions", state.function_slices.len());
FindTRKInterruptVectorTable::execute(&mut state, &obj)?;
FindSaveRestSleds::execute(&mut state, &obj)?;
state.apply(&mut obj)?;
apply_signatures_post(&mut obj)?;
println!("{}:", obj.name);
println!("Entry point: {:#010X}", obj.entry);
println!("\nSections:");
println!("\t{: >10} | {: <10} | {: <10} | {: <10}", "Name", "Address", "Size", "File Off");
for section in &obj.sections {
println!(
"\t{: >10} | {:#010X} | {: <#10X} | {: <#10X}",
section.name, section.address, section.size, section.file_offset
);
}
println!("\nDiscovered symbols:");
println!("\t{: >23} | {: <10} | {: <10}", "Name", "Address", "Size");
for (_, symbol) in obj.symbols.iter_ordered() {
if symbol.name.starts_with('@') || symbol.name.starts_with("fn_") {
continue;
}
if symbol.size_known {
println!("\t{: >23} | {:#010X} | {: <#10X}", symbol.name, symbol.address, symbol.size);
} else {
let size_str = if symbol.section.is_none() { "ABS" } else { "?" };
println!("\t{: >23} | {:#010X} | {: <10}", symbol.name, symbol.address, size_str);
}
}
println!("\n{} discovered functions from exception table", obj.known_functions.len());
Ok(())
}
fn split(args: SplitArgs) -> Result<()> {
log::info!("Loading {}", args.config.display());
let mut config_file = File::open(&args.config)
.with_context(|| format!("Failed to open config file '{}'", args.config.display()))?;
let config: ProjectConfig = serde_yaml::from_reader(&mut config_file)?;
let out_config_path = args.out_dir.join("config.json");
let mut dep = DepFile::new(out_config_path.clone());
log::info!("Loading {}", config.object.display());
let mut obj = process_dol(&config.object)?;
dep.push(config.object.clone());
if let Some(comment_version) = config.mw_comment_version {
obj.mw_comment = Some(MWComment::new(comment_version)?);
}
let mut modules = BTreeMap::<u32, ObjInfo>::new();
for module_config in &config.modules {
log::info!("Loading {}", module_config.object.display());
let map = map_file(&module_config.object)?;
let rel_obj = process_rel(map_reader(&map))?;
match modules.entry(rel_obj.module_id) {
Entry::Vacant(e) => e.insert(rel_obj),
Entry::Occupied(_) => bail!("Duplicate module ID {}", obj.module_id),
};
dep.push(module_config.object.clone());
}
if let Some(splits_path) = &config.splits {
dep.push(splits_path.clone());
if splits_path.is_file() {
let map = map_file(splits_path)?;
apply_splits(map_reader(&map), &mut obj)?;
}
}
let mut state = AnalyzerState::default();
if let Some(symbols_path) = &config.symbols {
dep.push(symbols_path.clone());
apply_symbols_file(symbols_path, &mut obj)?;
}
// TODO move before symbols?
log::info!("Performing signature analysis");
apply_signatures(&mut obj)?;
if !modules.is_empty() {
log::info!("Applying module relocations");
for (module_id, module_obj) in modules {
for rel_reloc in &module_obj.unresolved_relocations {
// TODO also apply inter-module relocations
if rel_reloc.module_id != 0 {
continue;
}
let target = rel_reloc.addend;
if let Some((symbol_index, symbol)) =
obj.symbols.for_relocation(target, rel_reloc.kind)?
{
let addend = target as i64 - symbol.address as i64;
if addend != 0 {
bail!(
"Module {} relocation to {:#010X} for symbol {} has non-zero addend {:#010X}",
module_id,
symbol.address,
symbol.name,
addend
);
}
obj.symbols.set_externally_referenced(symbol_index, true);
} else {
// Add label
let target_section = obj.section_at(target)?;
obj.symbols.add_direct(ObjSymbol {
name: format!("lbl_{:08X}", target),
demangled_name: None,
address: target as u64,
section: Some(target_section.index),
size: 0,
size_known: false,
flags: ObjSymbolFlagSet(ObjSymbolFlags::ForceActive.into()),
kind: Default::default(),
align: None,
data_kind: ObjDataKind::Unknown,
})?;
}
}
}
}
log::info!("Detecting function boundaries");
state.detect_functions(&obj)?;
log::info!("Discovered {} functions", state.function_slices.len());
FindTRKInterruptVectorTable::execute(&mut state, &obj)?;
FindSaveRestSleds::execute(&mut state, &obj)?;
state.apply(&mut obj)?;
apply_signatures_post(&mut obj)?;
log::info!("Performing relocation analysis");
let mut tracker = Tracker::new(&obj);
tracker.process(&obj)?;
log::info!("Applying relocations");
tracker.apply(&mut obj, false)?;
if config.detect_objects {
log::info!("Detecting object boundaries");
detect_object_boundaries(&mut obj)?;
}
if config.detect_strings {
log::info!("Detecting strings");
detect_strings(&mut obj)?;
}
log::info!("Adjusting splits");
update_splits(&mut obj)?;
if !args.no_update {
if let Some(symbols_path) = &config.symbols {
write_symbols_file(symbols_path, &obj)?;
}
if let Some(splits_path) = &config.splits {
write_splits_file(splits_path, &obj)?;
}
}
log::info!("Splitting {} objects", obj.link_order.len());
let split_objs = split_obj(&obj)?;
// Create out dirs
touch(&args.out_dir)?;
let asm_dir = args.out_dir.join("asm");
let include_dir = args.out_dir.join("include");
let obj_dir = args.out_dir.join("obj");
DirBuilder::new().recursive(true).create(&include_dir)?;
fs::write(include_dir.join("macros.inc"), include_str!("../../assets/macros.inc"))?;
log::info!("Writing object files");
let mut file_map = HashMap::<String, Vec<u8>>::new();
for (unit, split_obj) in obj.link_order.iter().zip(&split_objs) {
let out_obj = write_elf(split_obj)?;
match file_map.entry(unit.name.clone()) {
hash_map::Entry::Vacant(e) => e.insert(out_obj),
hash_map::Entry::Occupied(_) => bail!("Duplicate file {}", unit.name),
};
}
let mut out_config = OutputConfig::default();
for unit in &obj.link_order {
let object = file_map
.get(&unit.name)
.ok_or_else(|| anyhow!("Failed to find object file for unit '{}'", unit.name))?;
let out_path = obj_dir.join(obj_path_for_unit(&unit.name));
out_config.units.push(OutputUnit {
object: out_path.clone(),
name: unit.name.clone(),
autogenerated: unit.autogenerated,
});
if let Some(parent) = out_path.parent() {
DirBuilder::new().recursive(true).create(parent)?;
}
fs::write(&out_path, object)
.with_context(|| format!("Failed to write '{}'", out_path.display()))?;
}
{
let mut out_file = buf_writer(&out_config_path)?;
serde_json::to_writer_pretty(&mut out_file, &out_config)?;
out_file.flush()?;
}
// Generate ldscript.lcf
fs::write(
args.out_dir.join("ldscript.lcf"),
generate_ldscript(&obj, config.auto_force_files)?,
)?;
log::info!("Writing disassembly");
for (unit, split_obj) in obj.link_order.iter().zip(&split_objs) {
let out_path = asm_dir.join(asm_path_for_unit(&unit.name));
if let Some(parent) = out_path.parent() {
DirBuilder::new().recursive(true).create(parent)?;
}
let mut w = buf_writer(&out_path)?;
write_asm(&mut w, split_obj)?;
w.flush()?;
}
// Write dep file
{
let dep_path = args.out_dir.join("dep");
let mut dep_file = buf_writer(dep_path)?;
dep.write(&mut dep_file)?;
dep_file.flush()?;
}
// (debugging) validate against ELF
// if let Some(file) = &args.elf_file {
// validate(&obj, file, &state)?;
// }
Ok(())
}
#[allow(dead_code)]
fn validate<P: AsRef<Path>>(obj: &ObjInfo, elf_file: P, state: &AnalyzerState) -> Result<()> {
let real_obj = process_elf(elf_file)?;
for real_section in &real_obj.sections {
let obj_section = match obj.sections.get(real_section.index) {
Some(v) => v,
None => {
log::error!(
"Section {} {} doesn't exist in DOL",
real_section.index,
real_section.name
);
continue;
}
};
if obj_section.kind != real_section.kind || obj_section.name != real_section.name {
log::warn!(
"Section mismatch: {} {:?} ({}) should be {} {:?}",
obj_section.name,
obj_section.kind,
obj_section.index,
real_section.name,
real_section.kind
);
}
}
let mut real_functions = BTreeMap::<u32, String>::new();
for section in &real_obj.sections {
if section.kind != ObjSectionKind::Code {
continue;
}
for (_symbol_idx, symbol) in real_obj.symbols.for_section(section) {
real_functions.insert(symbol.address as u32, symbol.name.clone());
match state.function_bounds.get(&(symbol.address as u32)) {
Some(&end) => {
if symbol.size > 0 && end != (symbol.address + symbol.size) as u32 {
log::warn!(
"Function {:#010X} ({}) ends at {:#010X}, expected {:#010X}",
symbol.address,
symbol.name,
end,
symbol.address + symbol.size
);
}
}
None => {
log::warn!(
"Function {:#010X} ({}) not discovered!",
symbol.address,
symbol.name
);
}
}
}
}
for (&start, &end) in &state.function_bounds {
if end == 0 {
continue;
}
if !real_functions.contains_key(&start) {
let (real_addr, real_name) = real_functions.range(..start).last().unwrap();
log::warn!(
"Function {:#010X} not real (actually a part of {} @ {:#010X})",
start,
real_name,
real_addr
);
}
}
// return Ok(()); // TODO
for real_section in &real_obj.sections {
let obj_section = match obj.sections.get(real_section.index) {
Some(v) => v,
None => continue,
};
let real_map = real_section.build_relocation_map()?;
let obj_map = obj_section.build_relocation_map()?;
for (&real_addr, &real_reloc_idx) in &real_map {
let real_reloc = &real_section.relocations[real_reloc_idx];
let real_symbol = real_obj.symbols.at(real_reloc.target_symbol);
let obj_reloc = match obj_map.get(&real_addr) {
Some(v) => &obj_section.relocations[*v],
None => {
// Ignore GCC local jump branches
if real_symbol.kind == ObjSymbolKind::Section
&& real_section.kind == ObjSectionKind::Code
&& real_reloc.addend != 0
&& matches!(
real_reloc.kind,
ObjRelocKind::PpcRel14 | ObjRelocKind::PpcRel24
)
{
continue;
}
log::warn!(
"Relocation not found @ {:#010X} {:?} to {:#010X}+{:X} ({})",
real_addr,
real_reloc.kind,
real_symbol.address,
real_reloc.addend,
real_symbol.demangled_name.as_ref().unwrap_or(&real_symbol.name)
);
continue;
}
};
let obj_symbol = obj.symbols.at(obj_reloc.target_symbol);
if real_reloc.kind != obj_reloc.kind {
log::warn!(
"Relocation type mismatch @ {:#010X}: {:?} != {:?}",
real_addr,
obj_reloc.kind,
real_reloc.kind
);
continue;
}
if real_symbol.address as i64 + real_reloc.addend
!= obj_symbol.address as i64 + obj_reloc.addend
{
log::warn!(
"Relocation target mismatch @ {:#010X} {:?}: {:#010X}+{:X} != {:#010X}+{:X} ({})",
real_addr,
real_reloc.kind,
obj_symbol.address,
obj_reloc.addend,
real_symbol.address,
real_reloc.addend,
real_symbol.demangled_name.as_ref().unwrap_or(&real_symbol.name)
);
continue;
}
}
for (&obj_addr, &obj_reloc_idx) in &obj_map {
let obj_reloc = &obj_section.relocations[obj_reloc_idx];
let obj_symbol = obj.symbols.at(obj_reloc.target_symbol);
if !real_map.contains_key(&obj_addr) {
log::warn!(
"Relocation not real @ {:#010X} {:?} to {:#010X}+{:X} ({})",
obj_addr,
obj_reloc.kind,
obj_symbol.address,
obj_reloc.addend,
obj_symbol.demangled_name.as_ref().unwrap_or(&obj_symbol.name)
);
continue;
}
}
}
Ok(())
}
fn diff(args: DiffArgs) -> Result<()> {
log::info!("Loading {}", args.config.display());
let mut config_file = File::open(&args.config)
.with_context(|| format!("Failed to open config file '{}'", args.config.display()))?;
let config: ProjectConfig = serde_yaml::from_reader(&mut config_file)?;
log::info!("Loading {}", config.object.display());
let mut obj = process_dol(&config.object)?;
if let Some(symbols_path) = &config.symbols {
apply_symbols_file(symbols_path, &mut obj)?;
}
log::info!("Loading {}", args.elf_file.display());
let mut linked_obj = process_elf(&args.elf_file)?;
log::info!("Loading {}", args.map_file.display());
apply_map_file(&args.map_file, &mut linked_obj)?;
for orig_sym in obj.symbols.iter() {
let linked_sym = linked_obj
.symbols
.at_address(orig_sym.address as u32)
.find(|(_, sym)| sym.name == orig_sym.name)
.or_else(|| {
linked_obj
.symbols
.at_address(orig_sym.address as u32)
.find(|(_, sym)| sym.kind == orig_sym.kind)
});
let mut found = false;
if let Some((_, linked_sym)) = linked_sym {
if linked_sym.name.starts_with(&orig_sym.name) {
if linked_sym.size != orig_sym.size {
log::error!(
"Expected {} (type {:?}) to have size {:#X}, but found {:#X}",
orig_sym.name,
orig_sym.kind,
orig_sym.size,
linked_sym.size
);
}
found = true;
} else if linked_sym.kind == orig_sym.kind && linked_sym.size == orig_sym.size {
// Fuzzy match
let orig_data = obj
.section_data(
orig_sym.address as u32,
orig_sym.address as u32 + orig_sym.size as u32,
)?
.1;
let linked_data = linked_obj
.section_data(
linked_sym.address as u32,
linked_sym.address as u32 + linked_sym.size as u32,
)?
.1;
if orig_data == linked_data {
found = true;
}
}
}
if !found {
log::error!(
"Expected to find symbol {} (type {:?}, size {:#X}) at {:#010X}",
orig_sym.name,
orig_sym.kind,
orig_sym.size,
orig_sym.address
);
for (_, linked_sym) in linked_obj.symbols.at_address(orig_sym.address as u32) {
log::error!(
"At {:#010X}, found: {} (type {:?}, size {:#X})",
linked_sym.address,
linked_sym.name,
linked_sym.kind,
linked_sym.size,
);
}
for (_, linked_sym) in linked_obj.symbols.for_name(&orig_sym.name) {
log::error!(
"Instead, found {} (type {:?}, size {:#X}) at {:#010X}",
linked_sym.name,
linked_sym.kind,
linked_sym.size,
linked_sym.address,
);
}
break;
}
}
Ok(())
}
fn apply(args: ApplyArgs) -> Result<()> {
log::info!("Loading {}", args.config.display());
let mut config_file = File::open(&args.config)
.with_context(|| format!("Failed to open config file '{}'", args.config.display()))?;
let config: ProjectConfig = serde_yaml::from_reader(&mut config_file)?;
log::info!("Loading {}", config.object.display());
let mut obj = process_dol(&config.object)?;
if let Some(symbols_path) = &config.symbols {
if !apply_symbols_file(symbols_path, &mut obj)? {
bail!("Symbols file '{}' does not exist", symbols_path.display());
}
} else {
bail!("No symbols file specified in config");
}
log::info!("Loading {}", args.elf_file.display());
let mut linked_obj = process_elf(&args.elf_file)?;
log::info!("Loading {}", args.map_file.display());
apply_map_file(&args.map_file, &mut linked_obj)?;
let mut replacements: Vec<(SymbolIndex, Option<ObjSymbol>)> = vec![];
for (orig_idx, orig_sym) in obj.symbols.iter().enumerate() {
let linked_sym = linked_obj
.symbols
.at_address(orig_sym.address as u32)
.find(|(_, sym)| sym.name == orig_sym.name)
.or_else(|| {
linked_obj
.symbols
.at_address(orig_sym.address as u32)
.find(|(_, sym)| sym.kind == orig_sym.kind)
});
if let Some((_, linked_sym)) = linked_sym {
let mut updated_sym = orig_sym.clone();
let is_globalized = linked_sym.name.ends_with(&format!("_{:08X}", linked_sym.address));
if (is_globalized && !linked_sym.name.starts_with(&orig_sym.name))
|| (!is_globalized && linked_sym.name != orig_sym.name)
{
log::info!(
"Changing name of {} (type {:?}) to {}",
orig_sym.name,
orig_sym.kind,
linked_sym.name
);
updated_sym.name = linked_sym.name.clone();
}
if linked_sym.size != orig_sym.size {
log::info!(
"Changing size of {} (type {:?}) from {:#X} to {:#X}",
orig_sym.name,
orig_sym.kind,
orig_sym.size,
linked_sym.size
);
updated_sym.size = linked_sym.size;
}
let linked_scope = linked_sym.flags.scope();
if linked_scope != ObjSymbolScope::Unknown
&& !is_globalized
&& linked_scope != orig_sym.flags.scope()
{
log::info!(
"Changing scope of {} (type {:?}) from {:?} to {:?}",
orig_sym.name,
orig_sym.kind,
orig_sym.flags.scope(),
linked_scope
);
updated_sym.flags.set_scope(linked_scope);
}
if updated_sym != *orig_sym {
replacements.push((orig_idx, Some(updated_sym)));
}
} else {
log::warn!(
"Symbol not in linked ELF: {} (type {:?}, size {:#X}) at {:#010X}",
orig_sym.name,
orig_sym.kind,
orig_sym.size,
orig_sym.address
);
// TODO
// replacements.push((orig_idx, None));
}
}
// Add symbols from the linked object that aren't in the original
for linked_sym in linked_obj.symbols.iter() {
if matches!(linked_sym.kind, ObjSymbolKind::Section)
|| is_linker_generated_object(&linked_sym.name)
{
continue;
}
let orig_sym = obj
.symbols
.at_address(linked_sym.address as u32)
.find(|(_, sym)| sym.name == linked_sym.name)
.or_else(|| {
linked_obj
.symbols
.at_address(linked_sym.address as u32)
.find(|(_, sym)| sym.kind == linked_sym.kind)
});
if orig_sym.is_none() {
log::info!(
"Adding symbol {} (type {:?}, size {:#X}) at {:#010X}",
linked_sym.name,
linked_sym.kind,
linked_sym.size,
linked_sym.address
);
obj.symbols.add_direct(linked_sym.clone())?;
}
}
// Apply replacements
for (idx, replacement) in replacements {
if let Some(replacement) = replacement {
obj.symbols.replace(idx, replacement)?;
} else {
// TODO
// obj.symbols.remove(idx)?;
}
}
write_symbols_file(config.symbols.as_ref().unwrap(), &obj)?;
Ok(())
}