486 lines
21 KiB
Rust
486 lines
21 KiB
Rust
use std::{
|
|
collections::{btree_map::Entry, BTreeMap, HashMap},
|
|
io::BufRead,
|
|
ops::Range,
|
|
};
|
|
|
|
use anyhow::{Error, Result};
|
|
use cwdemangle::{demangle, DemangleOptions};
|
|
use lazy_static::lazy_static;
|
|
use multimap::MultiMap;
|
|
use regex::Regex;
|
|
use topological_sort::TopologicalSort;
|
|
|
|
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
|
pub enum SymbolKind {
|
|
Function,
|
|
Object,
|
|
Section,
|
|
NoType,
|
|
}
|
|
|
|
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
|
pub enum SymbolVisibility {
|
|
Global,
|
|
Local,
|
|
Weak,
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct SymbolEntry {
|
|
pub name: String,
|
|
pub demangled: Option<String>,
|
|
pub kind: SymbolKind,
|
|
pub visibility: SymbolVisibility,
|
|
pub unit: String,
|
|
pub address: u32,
|
|
pub size: u32,
|
|
pub section: String,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Hash, Eq, PartialEq)]
|
|
pub struct SymbolRef {
|
|
pub name: String,
|
|
pub unit: String,
|
|
}
|
|
|
|
#[derive(Default)]
|
|
struct SectionOrder {
|
|
symbol_order: Vec<SymbolRef>,
|
|
unit_order: Vec<(String, Vec<String>)>,
|
|
}
|
|
|
|
fn is_code_section(section: &str) -> bool { section == ".text" || section == ".init" }
|
|
|
|
/// Iterate over the BTreeMap and generate an ordered list of symbols and TUs by address.
|
|
fn resolve_section_order(
|
|
address_to_symbol: &BTreeMap<u32, SymbolRef>,
|
|
symbol_entries: &mut HashMap<SymbolRef, SymbolEntry>,
|
|
) -> Result<SectionOrder> {
|
|
let mut ordering = SectionOrder::default();
|
|
|
|
let mut last_unit = String::new();
|
|
let mut unit_override = String::new();
|
|
let mut last_section = String::new();
|
|
let mut section_unit_idx = 0usize;
|
|
for symbol_ref in address_to_symbol.values() {
|
|
if let Some(symbol) = symbol_entries.get_mut(symbol_ref) {
|
|
if last_unit != symbol.unit {
|
|
unit_override.clear();
|
|
|
|
if last_section != symbol.section {
|
|
ordering.unit_order.push((symbol.section.clone(), vec![]));
|
|
section_unit_idx = ordering.unit_order.len() - 1;
|
|
last_section = symbol.section.clone();
|
|
}
|
|
let unit_order = &mut ordering.unit_order[section_unit_idx];
|
|
if unit_order.1.contains(&symbol.unit) {
|
|
// With -common on, .bss is split into two parts. The TU order repeats
|
|
// at the end with all globally-deduplicated BSS symbols. Once we detect
|
|
// a duplicate inside of .bss, we create a new section and start again.
|
|
// TODO the first entry in .comm *could* be a TU without regular .bss
|
|
if symbol.section == ".bss" {
|
|
log::debug!(".comm section detected, duplicate {}", symbol.unit);
|
|
ordering.unit_order.push((".comm".to_string(), vec![symbol.unit.clone()]));
|
|
section_unit_idx = ordering.unit_order.len() - 1;
|
|
} else {
|
|
// Since the map doesn't contain file paths, it's likely that
|
|
// a TU name conflict is simply a separate file.
|
|
// TODO need to resolve and split unit in other sections as well
|
|
unit_override =
|
|
format!("{}_{}_{:X}", symbol.unit, symbol.section, symbol.address);
|
|
log::warn!(
|
|
"TU order conflict: {} exists multiple times in {}. Renaming to {}.",
|
|
symbol.unit,
|
|
symbol.section,
|
|
unit_override,
|
|
);
|
|
unit_order.1.push(unit_override.clone());
|
|
}
|
|
} else {
|
|
unit_order.1.push(symbol.unit.clone());
|
|
}
|
|
last_unit = symbol.unit.clone();
|
|
}
|
|
// For ASM-generated objects, notype,local symbols in .text
|
|
// are usually local jump labels, and should be ignored.
|
|
if is_code_section(&symbol.section)
|
|
&& symbol.size == 0
|
|
&& symbol.kind == SymbolKind::NoType
|
|
&& symbol.visibility == SymbolVisibility::Local
|
|
{
|
|
// Being named something other than lbl_* could indicate
|
|
// that it's actually a local function, but let's just
|
|
// make the user resolve that if necessary.
|
|
if !symbol.name.starts_with("lbl_") {
|
|
log::warn!("Skipping local text symbol {}", symbol.name);
|
|
}
|
|
continue;
|
|
}
|
|
// Guess the symbol type if necessary.
|
|
if symbol.kind == SymbolKind::NoType {
|
|
if is_code_section(&symbol.section) {
|
|
symbol.kind = SymbolKind::Function;
|
|
} else {
|
|
symbol.kind = SymbolKind::Object;
|
|
}
|
|
}
|
|
// If we're renaming this TU, replace it in the symbol.
|
|
if !unit_override.is_empty() {
|
|
symbol.unit = unit_override.clone();
|
|
}
|
|
ordering.symbol_order.push(symbol_ref.clone());
|
|
} else {
|
|
return Err(Error::msg(format!("Symbol has address but no entry: {:?}", symbol_ref)));
|
|
}
|
|
}
|
|
|
|
for iter in ordering.symbol_order.windows(2) {
|
|
let next_address = symbol_entries.get(&iter[1]).unwrap().address;
|
|
let symbol = symbol_entries.get_mut(&iter[0]).unwrap();
|
|
// For ASM-generated objects, we need to guess the symbol size.
|
|
if symbol.size == 0 {
|
|
symbol.size = next_address - symbol.address;
|
|
}
|
|
}
|
|
|
|
Ok(ordering)
|
|
}
|
|
|
|
/// The ordering of TUs inside of each section represents a directed edge in a DAG.
|
|
/// We can use a topological sort to determine a valid global TU order.
|
|
/// There can be ambiguities, but any solution that satisfies the link order
|
|
/// constraints is considered valid.
|
|
// TODO account for library ordering
|
|
#[allow(dead_code)]
|
|
pub fn resolve_link_order(section_unit_order: &[(String, Vec<String>)]) -> Result<Vec<String>> {
|
|
let mut global_unit_order = Vec::<String>::new();
|
|
let mut t_sort = TopologicalSort::<String>::new();
|
|
for (section, order) in section_unit_order {
|
|
let mut order: &[String] = order;
|
|
if (section == ".ctors" || section == ".dtors") && order.len() > 1 {
|
|
// __init_cpp_exceptions.o has symbols that get ordered to the beginning of
|
|
// .ctors and .dtors, so our topological sort would fail if we added them.
|
|
// Always skip the first TU of .ctors and .dtors.
|
|
order = &order[1..];
|
|
}
|
|
for iter in order.windows(2) {
|
|
t_sort.add_dependency(iter[0].clone(), iter[1].clone());
|
|
}
|
|
}
|
|
for unit in &mut t_sort {
|
|
global_unit_order.push(unit);
|
|
}
|
|
// An incomplete topological sort indicates that a cyclic dependency was encountered.
|
|
if !t_sort.is_empty() {
|
|
return Err(Error::msg("Cyclic dependency encountered!"));
|
|
}
|
|
// Sanity check, did we get all TUs in the final order?
|
|
for (_, order) in section_unit_order {
|
|
for unit in order {
|
|
if !global_unit_order.contains(unit) {
|
|
return Err(Error::msg(format!("Failed to find an order for {}", unit)));
|
|
}
|
|
}
|
|
}
|
|
Ok(global_unit_order)
|
|
}
|
|
|
|
lazy_static! {
|
|
static ref LINK_MAP_START: Regex = Regex::new("^Link map of (.*)$").unwrap();
|
|
static ref LINK_MAP_ENTRY: Regex = Regex::new(
|
|
"^\\s*(?P<depth>\\d+)] (?P<sym>.*) \\((?P<type>.*),(?P<vis>.*)\\) found in (?P<tu>.*)$",
|
|
)
|
|
.unwrap();
|
|
static ref LINK_MAP_ENTRY_GENERATED: Regex =
|
|
Regex::new("^\\s*(?P<depth>\\d+)] (?P<sym>.*) found as linker generated symbol$").unwrap();
|
|
static ref LINK_MAP_ENTRY_DUPLICATE: Regex =
|
|
Regex::new("^\\s*(?P<depth>\\d+)] >>> UNREFERENCED DUPLICATE (?P<sym>.*)$").unwrap();
|
|
static ref SECTION_LAYOUT_START: Regex = Regex::new("^(?P<section>.*) section layout$").unwrap();
|
|
static ref SECTION_LAYOUT_SYMBOL: Regex = Regex::new(
|
|
"^\\s*(?P<rom_addr>[0-9A-Fa-f]+|UNUSED)\\s+(?P<size>[0-9A-Fa-f]+)\\s+(?P<addr>[0-9A-Fa-f]+|\\.{8})\\s+(?P<align>\\d+)?\\s*(?P<sym>.*?)(?:\\s+\\(entry of (?P<entry_of>.*?)\\))?\\s+(?P<tu>.*)$",
|
|
)
|
|
.unwrap();
|
|
static ref SECTION_LAYOUT_HEADER: Regex = Regex::new(
|
|
"^(\\s*Starting\\s+Virtual\\s*|\\s*address\\s+Size\\s+address\\s*|\\s*-----------------------\\s*)$",
|
|
)
|
|
.unwrap();
|
|
static ref MEMORY_MAP_HEADER: Regex = Regex::new("^\\s*Memory map:\\s*$").unwrap();
|
|
static ref EXTERN_SYMBOL: Regex = Regex::new("^\\s*>>> SYMBOL NOT FOUND: (.*)$").unwrap();
|
|
}
|
|
|
|
#[derive(Default)]
|
|
pub struct MapEntries {
|
|
pub symbols: HashMap<SymbolRef, SymbolEntry>,
|
|
pub unit_entries: MultiMap<String, SymbolRef>,
|
|
pub entry_references: MultiMap<SymbolRef, SymbolRef>,
|
|
pub entry_referenced_from: MultiMap<SymbolRef, SymbolRef>,
|
|
pub address_to_symbol: BTreeMap<u32, SymbolRef>,
|
|
pub unit_section_ranges: HashMap<String, Range<u32>>,
|
|
pub symbol_order: Vec<SymbolRef>,
|
|
pub unit_order: Vec<(String, Vec<String>)>,
|
|
}
|
|
|
|
pub fn process_map<R: BufRead>(reader: R) -> Result<MapEntries> {
|
|
let mut entries = MapEntries::default();
|
|
|
|
let mut symbol_stack = Vec::<SymbolRef>::new();
|
|
let mut current_section = String::new();
|
|
let mut last_name = String::new();
|
|
let mut last_unit = String::new();
|
|
let mut has_link_map = false;
|
|
let mut relative_offset = 0u32;
|
|
let mut last_section_end = 0u32;
|
|
for result in reader.lines() {
|
|
match result {
|
|
Ok(line) => {
|
|
if let Some(captures) = LINK_MAP_START.captures(&line) {
|
|
log::debug!("Entry point: {}", &captures[1]);
|
|
has_link_map = true;
|
|
} else if let Some(captures) = LINK_MAP_ENTRY.captures(&line) {
|
|
if captures["sym"].starts_with('.') {
|
|
last_name.clear();
|
|
continue;
|
|
}
|
|
let is_duplicate = &captures["sym"] == ">>>";
|
|
let unit = captures["tu"].trim().to_string();
|
|
let name = if is_duplicate {
|
|
if last_name.is_empty() {
|
|
return Err(Error::msg("Last name empty?"));
|
|
}
|
|
last_name.clone()
|
|
} else {
|
|
captures["sym"].to_string()
|
|
};
|
|
let symbol_ref = SymbolRef { name: name.clone(), unit: unit.clone() };
|
|
let depth: usize = captures["depth"].parse()?;
|
|
if depth > symbol_stack.len() {
|
|
symbol_stack.push(symbol_ref.clone());
|
|
} else if depth <= symbol_stack.len() {
|
|
symbol_stack.truncate(depth - 1);
|
|
symbol_stack.push(symbol_ref.clone());
|
|
}
|
|
// println!("Entry: {} ({})", name, tu);
|
|
let kind = match &captures["type"] {
|
|
"func" => SymbolKind::Function,
|
|
"object" => SymbolKind::Object,
|
|
"section" => SymbolKind::Section,
|
|
"notype" => SymbolKind::NoType,
|
|
_ => {
|
|
return Err(Error::msg(format!(
|
|
"Unknown symbol type: {}",
|
|
&captures["type"],
|
|
)));
|
|
}
|
|
};
|
|
let visibility = match &captures["vis"] {
|
|
"global" => SymbolVisibility::Global,
|
|
"local" => SymbolVisibility::Local,
|
|
"weak" => SymbolVisibility::Weak,
|
|
_ => {
|
|
return Err(Error::msg(format!(
|
|
"Unknown symbol visibility: {}",
|
|
&captures["vis"],
|
|
)));
|
|
}
|
|
};
|
|
if !is_duplicate && symbol_stack.len() > 1 {
|
|
let from = &symbol_stack[symbol_stack.len() - 2];
|
|
entries.entry_referenced_from.insert(symbol_ref.clone(), from.clone());
|
|
entries.entry_references.insert(from.clone(), symbol_ref.clone());
|
|
}
|
|
let mut should_insert = true;
|
|
if let Some(symbol) = entries.symbols.get(&symbol_ref) {
|
|
if symbol.kind != kind {
|
|
log::warn!(
|
|
"Kind mismatch for {}: was {:?}, now {:?}",
|
|
symbol.name,
|
|
symbol.kind,
|
|
kind
|
|
);
|
|
}
|
|
if symbol.visibility != visibility {
|
|
log::warn!(
|
|
"Visibility mismatch for {}: was {:?}, now {:?}",
|
|
symbol.name,
|
|
symbol.visibility,
|
|
visibility
|
|
);
|
|
}
|
|
entries.unit_entries.insert(unit.clone(), symbol_ref.clone());
|
|
should_insert = false;
|
|
}
|
|
if should_insert {
|
|
let demangled =
|
|
demangle(&name, &DemangleOptions { omit_empty_parameters: true });
|
|
entries.symbols.insert(symbol_ref.clone(), SymbolEntry {
|
|
name: name.clone(),
|
|
demangled,
|
|
kind,
|
|
visibility,
|
|
unit: unit.clone(),
|
|
address: 0,
|
|
size: 0,
|
|
section: String::new(),
|
|
});
|
|
last_name = name.clone();
|
|
entries.unit_entries.insert(unit, symbol_ref.clone());
|
|
}
|
|
} else if let Some(captures) = LINK_MAP_ENTRY_GENERATED.captures(&line) {
|
|
let name = captures["sym"].to_string();
|
|
let demangled =
|
|
demangle(&name, &DemangleOptions { omit_empty_parameters: true });
|
|
let symbol_ref =
|
|
SymbolRef { name: name.clone(), unit: "[generated]".to_string() };
|
|
entries.symbols.insert(symbol_ref, SymbolEntry {
|
|
name,
|
|
demangled,
|
|
kind: SymbolKind::NoType,
|
|
visibility: SymbolVisibility::Global,
|
|
unit: "[generated]".to_string(),
|
|
address: 0,
|
|
size: 0,
|
|
section: String::new(),
|
|
});
|
|
} else if line.trim().is_empty()
|
|
|| LINK_MAP_ENTRY_DUPLICATE.is_match(&line)
|
|
|| SECTION_LAYOUT_HEADER.is_match(&line)
|
|
|| EXTERN_SYMBOL.is_match(&line)
|
|
{
|
|
// Ignore
|
|
} else if let Some(captures) = SECTION_LAYOUT_START.captures(&line) {
|
|
current_section = captures["section"].trim().to_string();
|
|
last_unit.clear();
|
|
log::debug!("Processing section layout for {}", current_section);
|
|
} else if let Some(captures) = SECTION_LAYOUT_SYMBOL.captures(&line) {
|
|
if captures["rom_addr"].trim() == "UNUSED" {
|
|
continue;
|
|
}
|
|
let sym_name = captures["sym"].trim();
|
|
let tu = captures["tu"].trim();
|
|
let mut address = u32::from_str_radix(captures["addr"].trim(), 16)?;
|
|
let mut size = u32::from_str_radix(captures["size"].trim(), 16)?;
|
|
|
|
// For RELs, the each section starts at address 0. For our purposes
|
|
// we'll create "fake" addresses by simply starting at the end of the
|
|
// previous section.
|
|
if last_unit.is_empty() {
|
|
if address == 0 {
|
|
relative_offset = last_section_end;
|
|
} else {
|
|
relative_offset = 0;
|
|
}
|
|
}
|
|
address += relative_offset;
|
|
|
|
// Section symbol (i.e. ".data") indicates section size for a TU
|
|
if sym_name == current_section {
|
|
// Skip empty sections
|
|
if size == 0 {
|
|
continue;
|
|
}
|
|
let end = address + size;
|
|
entries.unit_section_ranges.insert(tu.to_string(), address..end);
|
|
last_unit = tu.to_string();
|
|
last_section_end = end;
|
|
continue;
|
|
}
|
|
|
|
// Otherwise, for ASM-generated objects, the first section symbol in a TU
|
|
// has the full size of the section.
|
|
if tu != last_unit {
|
|
if size == 0 {
|
|
return Err(Error::msg(format!(
|
|
"No section size for {} in {}",
|
|
sym_name, tu
|
|
)));
|
|
}
|
|
let end = address + size;
|
|
entries.unit_section_ranges.insert(tu.to_string(), address..end);
|
|
last_unit = tu.to_string();
|
|
last_section_end = end;
|
|
|
|
// Clear it, so that we guess the "real" symbol size later.
|
|
size = 0;
|
|
}
|
|
|
|
// Ignore ...data.0 and similar
|
|
if sym_name.starts_with("...") {
|
|
continue;
|
|
}
|
|
|
|
let symbol_ref = SymbolRef { name: sym_name.to_string(), unit: tu.to_string() };
|
|
if let Some(symbol) = entries.symbols.get_mut(&symbol_ref) {
|
|
symbol.address = address;
|
|
symbol.size = size;
|
|
symbol.section = current_section.clone();
|
|
match entries.address_to_symbol.entry(address) {
|
|
Entry::Vacant(entry) => {
|
|
entry.insert(symbol_ref.clone());
|
|
}
|
|
Entry::Occupied(entry) => {
|
|
log::warn!(
|
|
"Symbol overridden @ {:X} from {} to {} in {}",
|
|
symbol.address,
|
|
entry.get().name,
|
|
sym_name,
|
|
tu
|
|
);
|
|
}
|
|
}
|
|
} else {
|
|
let visibility = if has_link_map {
|
|
log::warn!(
|
|
"Symbol not in link map: {} ({}). Type and visibility unknown.",
|
|
sym_name,
|
|
tu,
|
|
);
|
|
SymbolVisibility::Local
|
|
} else {
|
|
SymbolVisibility::Global
|
|
};
|
|
entries.symbols.insert(symbol_ref.clone(), SymbolEntry {
|
|
name: sym_name.to_string(),
|
|
demangled: None,
|
|
kind: SymbolKind::NoType,
|
|
visibility,
|
|
unit: tu.to_string(),
|
|
address,
|
|
size,
|
|
section: current_section.clone(),
|
|
});
|
|
match entries.address_to_symbol.entry(address) {
|
|
Entry::Vacant(entry) => {
|
|
entry.insert(symbol_ref.clone());
|
|
}
|
|
Entry::Occupied(entry) => {
|
|
log::warn!(
|
|
"Symbol overridden @ {:X} from {} to {} in {}",
|
|
address,
|
|
entry.get().name,
|
|
sym_name,
|
|
tu
|
|
);
|
|
}
|
|
}
|
|
}
|
|
} else if MEMORY_MAP_HEADER.is_match(&line) {
|
|
// log::debug!("Done");
|
|
break;
|
|
} else {
|
|
todo!("{}", line);
|
|
}
|
|
}
|
|
Err(e) => {
|
|
return Err(Error::from(e));
|
|
}
|
|
}
|
|
}
|
|
|
|
let section_order = resolve_section_order(&entries.address_to_symbol, &mut entries.symbols)?;
|
|
entries.symbol_order = section_order.symbol_order;
|
|
entries.unit_order = section_order.unit_order;
|
|
|
|
Ok(entries)
|
|
}
|