Initial commit

This commit is contained in:
2022-11-27 01:37:29 -05:00
commit 636cbea59c
21 changed files with 2402 additions and 0 deletions

65
src/argh_version.rs Normal file
View File

@@ -0,0 +1,65 @@
// From https://gist.github.com/suluke/e0c672492126be0a4f3b4f0e1115d77c
//! Extend `argh` to be better integrated with the `cargo` ecosystem
//!
//! For now, this only adds a --version/-V option which causes early-exit.
use argh::{FromArgs, TopLevelCommand};
struct ArgsOrVersion<T: FromArgs>(T);
impl<T> TopLevelCommand for ArgsOrVersion<T> where T: FromArgs {}
impl<T> FromArgs for ArgsOrVersion<T>
where T: FromArgs
{
fn from_args(command_name: &[&str], args: &[&str]) -> Result<Self, argh::EarlyExit> {
/// Also use argh for catching `--version`-only invocations
#[derive(FromArgs)]
struct Version {
/// print version information and exit
#[argh(switch, short = 'V')]
pub version: bool,
}
match Version::from_args(command_name, args) {
Ok(v) => {
if v.version {
Err(argh::EarlyExit {
output: format!(
"{} {} {}",
command_name.first().unwrap_or(&""),
env!("VERGEN_BUILD_SEMVER"),
env!("VERGEN_GIT_SHA"),
),
status: Ok(()),
})
} else {
// seems args are empty
T::from_args(command_name, args).map(Self)
}
}
Err(exit) => match exit.status {
Ok(()) => {
// must have been --help
let help = match T::from_args(command_name, &["--help"]) {
Ok(_) => unreachable!(),
Err(exit) => exit.output,
};
Err(argh::EarlyExit {
output: format!(
"{} -V, --version print version information and exit",
help
),
status: Ok(()),
})
}
Err(()) => T::from_args(command_name, args).map(Self),
},
}
}
}
/// Create a `FromArgs` type from the current processs `env::args`.
///
/// This function will exit early from the current process if argument parsing was unsuccessful or if information like `--help` was requested.
/// Error messages will be printed to stderr, and `--help` output to stdout.
pub fn from_env<T>() -> T
where T: TopLevelCommand {
argh::from_env::<ArgsOrVersion<T>>().0
}

26
src/cmd/demangle.rs Normal file
View File

@@ -0,0 +1,26 @@
use anyhow::{Error, Result};
use argh::FromArgs;
use cwdemangle::{demangle, DemangleOptions};
#[derive(FromArgs, PartialEq, Eq, Debug)]
/// Demangle a CodeWarrior C++ symbol.
#[argh(subcommand, name = "demangle")]
pub struct Args {
#[argh(positional)]
/// symbol to demangle
symbol: String,
#[argh(switch)]
/// disable replacing `(void)` with `()`
keep_void: bool,
}
pub fn run(args: Args) -> Result<()> {
let options = DemangleOptions { omit_empty_parameters: !args.keep_void };
match demangle(args.symbol.as_str(), &options) {
Some(symbol) => {
println!("{}", symbol);
Ok(())
}
None => Err(Error::msg("Failed to demangle symbol")),
}
}

173
src/cmd/elf2dol.rs Normal file
View File

@@ -0,0 +1,173 @@
use std::{
fs::File,
io::{BufWriter, Seek, SeekFrom, Write},
};
use anyhow::{Context, Error, Result};
use argh::FromArgs;
use memmap2::MmapOptions;
use object::{Architecture, Object, ObjectKind, ObjectSection, SectionKind};
#[derive(FromArgs, PartialEq, Eq, Debug)]
/// Converts an ELF file to a DOL file.
#[argh(subcommand, name = "elf2dol")]
pub struct Args {
#[argh(positional)]
/// path to input ELF
elf_file: String,
#[argh(positional)]
/// path to output DOL
dol_file: String,
}
#[derive(Debug, Clone, Default)]
pub struct DolSection {
pub offset: u32,
pub address: u32,
pub size: u32,
}
#[derive(Debug, Clone, Default)]
pub struct DolHeader {
pub text_sections: Vec<DolSection>,
pub data_sections: Vec<DolSection>,
pub bss_address: u32,
pub bss_size: u32,
pub entry_point: u32,
}
const MAX_TEXT_SECTIONS: usize = 7;
const MAX_DATA_SECTIONS: usize = 11;
const ZERO_BUF: [u8; 32] = [0u8; 32];
pub fn run(args: Args) -> Result<()> {
let elf_file = File::open(&args.elf_file)
.with_context(|| format!("Failed to open ELF file '{}'", args.elf_file))?;
let map = unsafe { MmapOptions::new().map(&elf_file) }
.with_context(|| format!("Failed to mmap binary: '{}'", args.elf_file))?;
let obj_file = object::read::File::parse(&*map)?;
match obj_file.architecture() {
Architecture::PowerPc => {}
arch => return Err(Error::msg(format!("Unexpected architecture: {:?}", arch))),
};
if obj_file.is_little_endian() {
return Err(Error::msg("Expected big endian"));
}
match obj_file.kind() {
ObjectKind::Executable => {}
kind => return Err(Error::msg(format!("Unexpected ELF type: {:?}", kind))),
}
let mut out = BufWriter::new(
File::create(&args.dol_file)
.with_context(|| format!("Failed to create DOL file '{}'", args.dol_file))?,
);
let mut header = DolHeader { entry_point: obj_file.entry() as u32, ..Default::default() };
let mut offset = 0x100u32;
// Text sections
for section in obj_file.sections() {
if section.kind() != SectionKind::Text {
continue;
}
let address = section.address() as u32;
let size = align32(section.size() as u32);
header.text_sections.push(DolSection { offset, address, size });
out.seek(SeekFrom::Start(offset as u64))?;
write_aligned(&mut out, section.data()?)?;
offset += size;
}
// Data sections
for section in obj_file.sections() {
if section.kind() != SectionKind::Data && section.kind() != SectionKind::ReadOnlyData {
continue;
}
let address = section.address() as u32;
let size = align32(section.size() as u32);
header.data_sections.push(DolSection { offset, address, size });
out.seek(SeekFrom::Start(offset as u64))?;
write_aligned(&mut out, section.data()?)?;
offset += size;
}
// BSS sections
for section in obj_file.sections() {
if section.kind() != SectionKind::UninitializedData {
continue;
}
let address = section.address() as u32;
let size = section.size() as u32;
if header.bss_address == 0 {
header.bss_address = address;
}
header.bss_size = (address + size) - header.bss_address;
}
if header.text_sections.len() > MAX_TEXT_SECTIONS {
return Err(Error::msg(format!(
"Too many text sections: {} / {}",
header.text_sections.len(),
MAX_TEXT_SECTIONS
)));
}
if header.data_sections.len() > MAX_DATA_SECTIONS {
return Err(Error::msg(format!(
"Too many data sections: {} / {}",
header.data_sections.len(),
MAX_DATA_SECTIONS
)));
}
// Offsets
out.rewind()?;
for section in &header.text_sections {
out.write_all(&section.offset.to_be_bytes())?;
}
out.seek(SeekFrom::Start(0x1c))?;
for section in &header.data_sections {
out.write_all(&section.offset.to_be_bytes())?;
}
// Addresses
out.seek(SeekFrom::Start(0x48))?;
for section in &header.text_sections {
out.write_all(&section.address.to_be_bytes())?;
}
out.seek(SeekFrom::Start(0x64))?;
for section in &header.data_sections {
out.write_all(&section.address.to_be_bytes())?;
}
// Sizes
out.seek(SeekFrom::Start(0x90))?;
for section in &header.text_sections {
out.write_all(&section.size.to_be_bytes())?;
}
out.seek(SeekFrom::Start(0xac))?;
for section in &header.data_sections {
out.write_all(&section.size.to_be_bytes())?;
}
// BSS + entry
out.seek(SeekFrom::Start(0xd8))?;
out.write_all(&header.bss_address.to_be_bytes())?;
out.write_all(&header.bss_size.to_be_bytes())?;
out.write_all(&header.entry_point.to_be_bytes())?;
Ok(())
}
#[inline]
fn align32(x: u32) -> u32 { (x + 31) & !31 }
#[inline]
fn write_aligned<T: Write>(out: &mut T, bytes: &[u8]) -> std::io::Result<()> {
let len = bytes.len() as u32;
let padding = align32(len) - len;
out.write_all(bytes)?;
if padding > 0 {
out.write_all(&ZERO_BUF[0..padding as usize])?;
}
Ok(())
}

140
src/cmd/map.rs Normal file
View File

@@ -0,0 +1,140 @@
use std::{fs::File, io::BufReader};
use anyhow::{Context, Error, Result};
use argh::FromArgs;
use crate::util::map::{process_map, SymbolEntry, SymbolRef};
#[derive(FromArgs, PartialEq, Debug)]
/// Commands for processing CodeWarrior maps.
#[argh(subcommand, name = "map")]
pub struct Args {
#[argh(subcommand)]
command: SubCommand,
}
#[derive(FromArgs, PartialEq, Debug)]
#[argh(subcommand)]
enum SubCommand {
Entries(EntriesArgs),
Symbol(SymbolArgs),
}
#[derive(FromArgs, PartialEq, Eq, Debug)]
/// Displays all entries for a particular TU.
#[argh(subcommand, name = "entries")]
pub struct EntriesArgs {
#[argh(positional)]
/// path to input map
map_file: String,
#[argh(positional)]
/// TU to display entries for
unit: String,
}
#[derive(FromArgs, PartialEq, Eq, Debug)]
/// Displays all references to a symbol.
#[argh(subcommand, name = "symbol")]
pub struct SymbolArgs {
#[argh(positional)]
/// path to input map
map_file: String,
#[argh(positional)]
/// symbol to display references for
symbol: String,
}
pub fn run(args: Args) -> Result<()> {
match args.command {
SubCommand::Entries(c_args) => entries(c_args),
SubCommand::Symbol(c_args) => symbol(c_args),
}
}
fn entries(args: EntriesArgs) -> Result<()> {
let reader = BufReader::new(
File::open(&args.map_file)
.with_context(|| format!("Failed to open file '{}'", args.map_file))?,
);
let entries = process_map(reader)?;
match entries.unit_entries.get_vec(&args.unit) {
Some(vec) => {
for symbol_ref in vec {
if symbol_ref.name.starts_with('@') {
continue;
}
if let Some(symbol) = entries.symbols.get(symbol_ref) {
println!("{}", symbol.demangled.as_ref().unwrap_or(&symbol.name));
} else {
println!("Symbol not found: {}", symbol_ref.name);
}
}
}
None => {
return Err(Error::msg(format!(
"Failed to find entries for TU '{}' in map",
args.unit
)));
}
}
Ok(())
}
fn symbol(args: SymbolArgs) -> Result<()> {
let reader = BufReader::new(
File::open(&args.map_file)
.with_context(|| format!("Failed to open file '{}'", args.map_file))?,
);
let entries = process_map(reader)?;
let mut opt_ref: Option<(SymbolRef, SymbolEntry)> = None;
for (symbol_ref, entry) in &entries.symbols {
if symbol_ref.name == args.symbol {
if opt_ref.is_some() {
return Err(Error::msg(format!("Symbol '{}' found in multiple TUs", args.symbol)));
}
opt_ref = Some((symbol_ref.clone(), entry.clone()));
}
}
match opt_ref {
Some((symbol_ref, symbol)) => {
println!("Located symbol {}", symbol.demangled.as_ref().unwrap_or(&symbol.name));
if let Some(vec) = entries.entry_references.get_vec(&symbol_ref) {
println!("\nReferences:");
for x in vec {
if let Some(reference) = entries.symbols.get(x) {
println!(
">>> {} ({:?},{:?}) [{}]",
reference.demangled.as_ref().unwrap_or(&reference.name),
reference.kind,
reference.visibility,
reference.unit
);
} else {
println!(">>> {} (NOT FOUND)", x.name);
}
}
}
if let Some(vec) = entries.entry_referenced_from.get_vec(&symbol_ref) {
println!("\nReferenced from:");
for x in vec {
if let Some(reference) = entries.symbols.get(x) {
println!(
">>> {} ({:?}, {:?}) [{}]",
reference.demangled.as_ref().unwrap_or(&reference.name),
reference.kind,
reference.visibility,
reference.unit
);
} else {
println!(">>> {} (NOT FOUND)", x.name);
}
}
}
println!("\n");
}
None => {
return Err(Error::msg(format!("Failed to find symbol '{}' in map", args.symbol)));
}
}
Ok(())
}

View File

@@ -0,0 +1,47 @@
use anyhow::{Context, Error, Result};
use argh::FromArgs;
use memchr::memmem;
use memmap2::MmapOptions;
#[derive(FromArgs, PartialEq, Eq, Debug)]
/// Sets the MetroidBuildInfo tag value in a given binary.
#[argh(subcommand, name = "metroidbuildinfo")]
pub struct Args {
#[argh(positional)]
/// path to source binary
binary: String,
#[argh(positional)]
/// path to build info string
build_info: String,
}
const BUILD_STRING_MAX: usize = 35;
const BUILD_STRING_TAG: &str = "!#$MetroidBuildInfo!#$";
pub fn run(args: Args) -> Result<()> {
let build_string = std::fs::read_to_string(&args.build_info)
.with_context(|| format!("Failed to read build info string from '{}'", args.build_info))?;
let build_string_trim = build_string.trim_end();
if build_string_trim.as_bytes().len() > BUILD_STRING_MAX {
return Err(Error::msg(format!(
"Build string '{}' is greater than maximum size of {}",
build_string_trim, BUILD_STRING_MAX
)));
}
let binary_file = std::fs::File::options()
.read(true)
.write(true)
.open(&args.binary)
.with_context(|| format!("Failed to open binary for writing: '{}'", args.binary))?;
let mut map = unsafe { MmapOptions::new().map_mut(&binary_file) }
.with_context(|| format!("Failed to mmap binary: '{}'", args.binary))?;
let start = match memmem::find(&map, BUILD_STRING_TAG.as_bytes()) {
Some(idx) => idx + BUILD_STRING_TAG.as_bytes().len(),
None => return Err(Error::msg("Failed to find build string tag in binary")),
};
let end = start + build_string_trim.as_bytes().len();
map[start..end].copy_from_slice(build_string_trim.as_bytes());
map[end] = 0;
Ok(())
}

5
src/cmd/mod.rs Normal file
View File

@@ -0,0 +1,5 @@
pub(crate) mod demangle;
pub(crate) mod elf2dol;
pub(crate) mod map;
pub(crate) mod metroidbuildinfo;
pub(crate) mod shasum;

88
src/cmd/shasum.rs Normal file
View File

@@ -0,0 +1,88 @@
use std::{
fs::File,
io::{BufRead, BufReader, Read},
};
use anyhow::{Context, Error, Result};
use argh::FromArgs;
use sha1::{Digest, Sha1};
#[derive(FromArgs, PartialEq, Eq, Debug)]
/// Print or check SHA1 (160-bit) checksums.
#[argh(subcommand, name = "shasum")]
pub struct Args {
#[argh(switch, short = 'c')]
/// check SHA sums against given list
check: bool,
#[argh(positional)]
/// path to file
file: String,
}
const DEFAULT_BUF_SIZE: usize = 8192;
pub fn run(args: Args) -> Result<()> {
let file =
File::open(&args.file).with_context(|| format!("Failed to open file '{}'", args.file))?;
if args.check {
check(args, file)
} else {
hash(args, file)
}
}
fn check(_args: Args, file: File) -> Result<()> {
let reader = BufReader::new(file);
let mut mismatches = 0usize;
for line in reader.lines() {
let line = match line {
Ok(line) => line,
Err(e) => return Err(Error::msg(format!("File read failed: {}", e))),
};
let (hash, file_name) =
line.split_once(' ').ok_or_else(|| Error::msg(format!("Invalid line: {}", line)))?;
let file_name = match file_name.chars().next() {
Some(' ') | Some('*') => &file_name[1..],
_ => return Err(Error::msg(format!("Invalid line: {}", line))),
};
let mut hash_bytes = [0u8; 20];
hex::decode_to_slice(hash, &mut hash_bytes)
.with_context(|| format!("Invalid line: {}", line))?;
let file = File::open(file_name)
.with_context(|| format!("Failed to open file '{}'", file_name))?;
let found_hash = file_sha1(file)?;
if hash_bytes == found_hash.as_ref() {
println!("{}: OK", file_name);
} else {
println!("{}: FAILED", file_name);
mismatches += 1;
}
}
if mismatches != 0 {
eprintln!("WARNING: {} computed checksum did NOT match", mismatches);
std::process::exit(1);
}
Ok(())
}
fn hash(args: Args, file: File) -> Result<()> {
let hash = file_sha1(file)?;
let mut hash_buf = [0u8; 40];
let hash_str = base16ct::lower::encode_str(&hash, &mut hash_buf)
.map_err(|e| Error::msg(format!("Failed to encode hash: {}", e)))?;
println!("{} {}", hash_str, args.file);
Ok(())
}
fn file_sha1(mut file: File) -> Result<sha1::digest::Output<Sha1>> {
let mut buf = [0u8; DEFAULT_BUF_SIZE];
let mut hasher = Sha1::new();
Ok(loop {
let read = file.read(&mut buf).context("File read failed")?;
if read == 0 {
break hasher.finalize();
}
hasher.update(&buf[0..read]);
})
}

39
src/main.rs Normal file
View File

@@ -0,0 +1,39 @@
extern crate core;
use argh::FromArgs;
mod argh_version;
mod cmd;
mod util;
#[derive(FromArgs, PartialEq, Debug)]
/// GameCube/Wii decompilation project tools.
struct TopLevel {
#[argh(subcommand)]
command: SubCommand,
}
#[derive(FromArgs, PartialEq, Debug)]
#[argh(subcommand)]
enum SubCommand {
Demangle(cmd::demangle::Args),
Elf2Dol(cmd::elf2dol::Args),
Map(cmd::map::Args),
MetroidBuildInfo(cmd::metroidbuildinfo::Args),
Shasum(cmd::shasum::Args),
}
fn main() {
let args: TopLevel = argh_version::from_env();
let result = match args.command {
SubCommand::Demangle(c_args) => cmd::demangle::run(c_args),
SubCommand::Elf2Dol(c_args) => cmd::elf2dol::run(c_args),
SubCommand::Map(c_args) => cmd::map::run(c_args),
SubCommand::MetroidBuildInfo(c_args) => cmd::metroidbuildinfo::run(c_args),
SubCommand::Shasum(c_args) => cmd::shasum::run(c_args),
};
if let Err(e) = result {
eprintln!("{:?}", e);
std::process::exit(1);
}
}

485
src/util/map.rs Normal file
View File

@@ -0,0 +1,485 @@
use std::{
collections::{btree_map::Entry, BTreeMap, HashMap},
io::BufRead,
ops::Range,
};
use anyhow::{Error, Result};
use cwdemangle::{demangle, DemangleOptions};
use lazy_static::lazy_static;
use multimap::MultiMap;
use regex::Regex;
use topological_sort::TopologicalSort;
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum SymbolKind {
Function,
Object,
Section,
NoType,
}
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum SymbolVisibility {
Global,
Local,
Weak,
}
#[derive(Debug, Clone)]
pub struct SymbolEntry {
pub name: String,
pub demangled: Option<String>,
pub kind: SymbolKind,
pub visibility: SymbolVisibility,
pub unit: String,
pub address: u32,
pub size: u32,
pub section: String,
}
#[derive(Debug, Clone, Hash, Eq, PartialEq)]
pub struct SymbolRef {
pub name: String,
pub unit: String,
}
#[derive(Default)]
struct SectionOrder {
symbol_order: Vec<SymbolRef>,
unit_order: Vec<(String, Vec<String>)>,
}
fn is_code_section(section: &str) -> bool { section == ".text" || section == ".init" }
/// Iterate over the BTreeMap and generate an ordered list of symbols and TUs by address.
fn resolve_section_order(
address_to_symbol: &BTreeMap<u32, SymbolRef>,
symbol_entries: &mut HashMap<SymbolRef, SymbolEntry>,
) -> Result<SectionOrder> {
let mut ordering = SectionOrder::default();
let mut last_unit = String::new();
let mut unit_override = String::new();
let mut last_section = String::new();
let mut section_unit_idx = 0usize;
for symbol_ref in address_to_symbol.values() {
if let Some(symbol) = symbol_entries.get_mut(symbol_ref) {
if last_unit != symbol.unit {
unit_override.clear();
if last_section != symbol.section {
ordering.unit_order.push((symbol.section.clone(), vec![]));
section_unit_idx = ordering.unit_order.len() - 1;
last_section = symbol.section.clone();
}
let unit_order = &mut ordering.unit_order[section_unit_idx];
if unit_order.1.contains(&symbol.unit) {
// With -common on, .bss is split into two parts. The TU order repeats
// at the end with all globally-deduplicated BSS symbols. Once we detect
// a duplicate inside of .bss, we create a new section and start again.
// TODO the first entry in .comm *could* be a TU without regular .bss
if symbol.section == ".bss" {
log::debug!(".comm section detected, duplicate {}", symbol.unit);
ordering.unit_order.push((".comm".to_string(), vec![symbol.unit.clone()]));
section_unit_idx = ordering.unit_order.len() - 1;
} else {
// Since the map doesn't contain file paths, it's likely that
// a TU name conflict is simply a separate file.
// TODO need to resolve and split unit in other sections as well
unit_override =
format!("{}_{}_{:X}", symbol.unit, symbol.section, symbol.address);
log::warn!(
"TU order conflict: {} exists multiple times in {}. Renaming to {}.",
symbol.unit,
symbol.section,
unit_override,
);
unit_order.1.push(unit_override.clone());
}
} else {
unit_order.1.push(symbol.unit.clone());
}
last_unit = symbol.unit.clone();
}
// For ASM-generated objects, notype,local symbols in .text
// are usually local jump labels, and should be ignored.
if is_code_section(&symbol.section)
&& symbol.size == 0
&& symbol.kind == SymbolKind::NoType
&& symbol.visibility == SymbolVisibility::Local
{
// Being named something other than lbl_* could indicate
// that it's actually a local function, but let's just
// make the user resolve that if necessary.
if !symbol.name.starts_with("lbl_") {
log::warn!("Skipping local text symbol {}", symbol.name);
}
continue;
}
// Guess the symbol type if necessary.
if symbol.kind == SymbolKind::NoType {
if is_code_section(&symbol.section) {
symbol.kind = SymbolKind::Function;
} else {
symbol.kind = SymbolKind::Object;
}
}
// If we're renaming this TU, replace it in the symbol.
if !unit_override.is_empty() {
symbol.unit = unit_override.clone();
}
ordering.symbol_order.push(symbol_ref.clone());
} else {
return Err(Error::msg(format!("Symbol has address but no entry: {:?}", symbol_ref)));
}
}
for iter in ordering.symbol_order.windows(2) {
let next_address = symbol_entries.get(&iter[1]).unwrap().address;
let symbol = symbol_entries.get_mut(&iter[0]).unwrap();
// For ASM-generated objects, we need to guess the symbol size.
if symbol.size == 0 {
symbol.size = next_address - symbol.address;
}
}
Ok(ordering)
}
/// The ordering of TUs inside of each section represents a directed edge in a DAG.
/// We can use a topological sort to determine a valid global TU order.
/// There can be ambiguities, but any solution that satisfies the link order
/// constraints is considered valid.
// TODO account for library ordering
#[allow(dead_code)]
pub fn resolve_link_order(section_unit_order: &[(String, Vec<String>)]) -> Result<Vec<String>> {
let mut global_unit_order = Vec::<String>::new();
let mut t_sort = TopologicalSort::<String>::new();
for (section, order) in section_unit_order {
let mut order: &[String] = order;
if (section == ".ctors" || section == ".dtors") && order.len() > 1 {
// __init_cpp_exceptions.o has symbols that get ordered to the beginning of
// .ctors and .dtors, so our topological sort would fail if we added them.
// Always skip the first TU of .ctors and .dtors.
order = &order[1..];
}
for iter in order.windows(2) {
t_sort.add_dependency(iter[0].clone(), iter[1].clone());
}
}
for unit in &mut t_sort {
global_unit_order.push(unit);
}
// An incomplete topological sort indicates that a cyclic dependency was encountered.
if !t_sort.is_empty() {
return Err(Error::msg("Cyclic dependency encountered!"));
}
// Sanity check, did we get all TUs in the final order?
for (_, order) in section_unit_order {
for unit in order {
if !global_unit_order.contains(unit) {
return Err(Error::msg(format!("Failed to find an order for {}", unit)));
}
}
}
Ok(global_unit_order)
}
lazy_static! {
static ref LINK_MAP_START: Regex = Regex::new("^Link map of (.*)$").unwrap();
static ref LINK_MAP_ENTRY: Regex = Regex::new(
"^\\s*(?P<depth>\\d+)] (?P<sym>.*) \\((?P<type>.*),(?P<vis>.*)\\) found in (?P<tu>.*)$",
)
.unwrap();
static ref LINK_MAP_ENTRY_GENERATED: Regex =
Regex::new("^\\s*(?P<depth>\\d+)] (?P<sym>.*) found as linker generated symbol$").unwrap();
static ref LINK_MAP_ENTRY_DUPLICATE: Regex =
Regex::new("^\\s*(?P<depth>\\d+)] >>> UNREFERENCED DUPLICATE (?P<sym>.*)$").unwrap();
static ref SECTION_LAYOUT_START: Regex = Regex::new("^(?P<section>.*) section layout$").unwrap();
static ref SECTION_LAYOUT_SYMBOL: Regex = Regex::new(
"^\\s*(?P<rom_addr>[0-9A-Fa-f]+|UNUSED)\\s+(?P<size>[0-9A-Fa-f]+)\\s+(?P<addr>[0-9A-Fa-f]+|\\.{8})\\s+(?P<align>\\d+)?\\s*(?P<sym>.*?)(?:\\s+\\(entry of (?P<entry_of>.*?)\\))?\\s+(?P<tu>.*)$",
)
.unwrap();
static ref SECTION_LAYOUT_HEADER: Regex = Regex::new(
"^(\\s*Starting\\s+Virtual\\s*|\\s*address\\s+Size\\s+address\\s*|\\s*-----------------------\\s*)$",
)
.unwrap();
static ref MEMORY_MAP_HEADER: Regex = Regex::new("^\\s*Memory map:\\s*$").unwrap();
static ref EXTERN_SYMBOL: Regex = Regex::new("^\\s*>>> SYMBOL NOT FOUND: (.*)$").unwrap();
}
#[derive(Default)]
pub struct MapEntries {
pub symbols: HashMap<SymbolRef, SymbolEntry>,
pub unit_entries: MultiMap<String, SymbolRef>,
pub entry_references: MultiMap<SymbolRef, SymbolRef>,
pub entry_referenced_from: MultiMap<SymbolRef, SymbolRef>,
pub address_to_symbol: BTreeMap<u32, SymbolRef>,
pub unit_section_ranges: HashMap<String, Range<u32>>,
pub symbol_order: Vec<SymbolRef>,
pub unit_order: Vec<(String, Vec<String>)>,
}
pub fn process_map<R: BufRead>(reader: R) -> Result<MapEntries> {
let mut entries = MapEntries::default();
let mut symbol_stack = Vec::<SymbolRef>::new();
let mut current_section = String::new();
let mut last_name = String::new();
let mut last_unit = String::new();
let mut has_link_map = false;
let mut relative_offset = 0u32;
let mut last_section_end = 0u32;
for result in reader.lines() {
match result {
Ok(line) => {
if let Some(captures) = LINK_MAP_START.captures(&line) {
log::debug!("Entry point: {}", &captures[1]);
has_link_map = true;
} else if let Some(captures) = LINK_MAP_ENTRY.captures(&line) {
if captures["sym"].starts_with('.') {
last_name.clear();
continue;
}
let is_duplicate = &captures["sym"] == ">>>";
let unit = captures["tu"].trim().to_string();
let name = if is_duplicate {
if last_name.is_empty() {
return Err(Error::msg("Last name empty?"));
}
last_name.clone()
} else {
captures["sym"].to_string()
};
let symbol_ref = SymbolRef { name: name.clone(), unit: unit.clone() };
let depth: usize = captures["depth"].parse()?;
if depth > symbol_stack.len() {
symbol_stack.push(symbol_ref.clone());
} else if depth <= symbol_stack.len() {
symbol_stack.truncate(depth - 1);
symbol_stack.push(symbol_ref.clone());
}
// println!("Entry: {} ({})", name, tu);
let kind = match &captures["type"] {
"func" => SymbolKind::Function,
"object" => SymbolKind::Object,
"section" => SymbolKind::Section,
"notype" => SymbolKind::NoType,
_ => {
return Err(Error::msg(format!(
"Unknown symbol type: {}",
&captures["type"],
)));
}
};
let visibility = match &captures["vis"] {
"global" => SymbolVisibility::Global,
"local" => SymbolVisibility::Local,
"weak" => SymbolVisibility::Weak,
_ => {
return Err(Error::msg(format!(
"Unknown symbol visibility: {}",
&captures["vis"],
)));
}
};
if !is_duplicate && symbol_stack.len() > 1 {
let from = &symbol_stack[symbol_stack.len() - 2];
entries.entry_referenced_from.insert(symbol_ref.clone(), from.clone());
entries.entry_references.insert(from.clone(), symbol_ref.clone());
}
let mut should_insert = true;
if let Some(symbol) = entries.symbols.get(&symbol_ref) {
if symbol.kind != kind {
log::warn!(
"Kind mismatch for {}: was {:?}, now {:?}",
symbol.name,
symbol.kind,
kind
);
}
if symbol.visibility != visibility {
log::warn!(
"Visibility mismatch for {}: was {:?}, now {:?}",
symbol.name,
symbol.visibility,
visibility
);
}
entries.unit_entries.insert(unit.clone(), symbol_ref.clone());
should_insert = false;
}
if should_insert {
let demangled =
demangle(&name, &DemangleOptions { omit_empty_parameters: true });
entries.symbols.insert(symbol_ref.clone(), SymbolEntry {
name: name.clone(),
demangled,
kind,
visibility,
unit: unit.clone(),
address: 0,
size: 0,
section: String::new(),
});
last_name = name.clone();
entries.unit_entries.insert(unit, symbol_ref.clone());
}
} else if let Some(captures) = LINK_MAP_ENTRY_GENERATED.captures(&line) {
let name = captures["sym"].to_string();
let demangled =
demangle(&name, &DemangleOptions { omit_empty_parameters: true });
let symbol_ref =
SymbolRef { name: name.clone(), unit: "[generated]".to_string() };
entries.symbols.insert(symbol_ref, SymbolEntry {
name,
demangled,
kind: SymbolKind::NoType,
visibility: SymbolVisibility::Global,
unit: "[generated]".to_string(),
address: 0,
size: 0,
section: String::new(),
});
} else if line.trim().is_empty()
|| LINK_MAP_ENTRY_DUPLICATE.is_match(&line)
|| SECTION_LAYOUT_HEADER.is_match(&line)
|| EXTERN_SYMBOL.is_match(&line)
{
// Ignore
} else if let Some(captures) = SECTION_LAYOUT_START.captures(&line) {
current_section = captures["section"].trim().to_string();
last_unit.clear();
log::debug!("Processing section layout for {}", current_section);
} else if let Some(captures) = SECTION_LAYOUT_SYMBOL.captures(&line) {
if captures["rom_addr"].trim() == "UNUSED" {
continue;
}
let sym_name = captures["sym"].trim();
let tu = captures["tu"].trim();
let mut address = u32::from_str_radix(captures["addr"].trim(), 16)?;
let mut size = u32::from_str_radix(captures["size"].trim(), 16)?;
// For RELs, the each section starts at address 0. For our purposes
// we'll create "fake" addresses by simply starting at the end of the
// previous section.
if last_unit.is_empty() {
if address == 0 {
relative_offset = last_section_end;
} else {
relative_offset = 0;
}
}
address += relative_offset;
// Section symbol (i.e. ".data") indicates section size for a TU
if sym_name == current_section {
// Skip empty sections
if size == 0 {
continue;
}
let end = address + size;
entries.unit_section_ranges.insert(tu.to_string(), address..end);
last_unit = tu.to_string();
last_section_end = end;
continue;
}
// Otherwise, for ASM-generated objects, the first section symbol in a TU
// has the full size of the section.
if tu != last_unit {
if size == 0 {
return Err(Error::msg(format!(
"No section size for {} in {}",
sym_name, tu
)));
}
let end = address + size;
entries.unit_section_ranges.insert(tu.to_string(), address..end);
last_unit = tu.to_string();
last_section_end = end;
// Clear it, so that we guess the "real" symbol size later.
size = 0;
}
// Ignore ...data.0 and similar
if sym_name.starts_with("...") {
continue;
}
let symbol_ref = SymbolRef { name: sym_name.to_string(), unit: tu.to_string() };
if let Some(symbol) = entries.symbols.get_mut(&symbol_ref) {
symbol.address = address;
symbol.size = size;
symbol.section = current_section.clone();
match entries.address_to_symbol.entry(address) {
Entry::Vacant(entry) => {
entry.insert(symbol_ref.clone());
}
Entry::Occupied(entry) => {
log::warn!(
"Symbol overridden @ {:X} from {} to {} in {}",
symbol.address,
entry.get().name,
sym_name,
tu
);
}
}
} else {
let visibility = if has_link_map {
log::warn!(
"Symbol not in link map: {} ({}). Type and visibility unknown.",
sym_name,
tu,
);
SymbolVisibility::Local
} else {
SymbolVisibility::Global
};
entries.symbols.insert(symbol_ref.clone(), SymbolEntry {
name: sym_name.to_string(),
demangled: None,
kind: SymbolKind::NoType,
visibility,
unit: tu.to_string(),
address,
size,
section: current_section.clone(),
});
match entries.address_to_symbol.entry(address) {
Entry::Vacant(entry) => {
entry.insert(symbol_ref.clone());
}
Entry::Occupied(entry) => {
log::warn!(
"Symbol overridden @ {:X} from {} to {} in {}",
address,
entry.get().name,
sym_name,
tu
);
}
}
}
} else if MEMORY_MAP_HEADER.is_match(&line) {
// log::debug!("Done");
break;
} else {
todo!("{}", line);
}
}
Err(e) => {
return Err(Error::from(e));
}
}
}
let section_order = resolve_section_order(&entries.address_to_symbol, &mut entries.symbols)?;
entries.symbol_order = section_order.symbol_order;
entries.unit_order = section_order.unit_order;
Ok(entries)
}

1
src/util/mod.rs Normal file
View File

@@ -0,0 +1 @@
pub(crate) mod map;