1324 lines
50 KiB
Rust

use std::{
cmp::{max, min, Ordering},
collections::{BTreeMap, HashMap, HashSet},
};
use anyhow::{anyhow, bail, ensure, Context, Result};
use itertools::Itertools;
use petgraph::{graph::NodeIndex, Graph};
use tracing_attributes::instrument;
use crate::{
analysis::{cfa::SectionAddress, read_address, read_u32},
obj::{
ObjArchitecture, ObjInfo, ObjKind, ObjReloc, ObjRelocations, ObjSection, ObjSectionKind,
ObjSplit, ObjSymbol, ObjSymbolFlagSet, ObjSymbolFlags, ObjSymbolKind, ObjSymbolScope,
ObjUnit,
},
util::{align_up, comment::MWComment},
};
/// Create splits for function pointers in the given section.
fn split_ctors_dtors(obj: &mut ObjInfo, start: SectionAddress, end: SectionAddress) -> Result<()> {
let ctors_section = &obj.sections[start.section];
let mut new_splits = BTreeMap::<SectionAddress, ObjSplit>::new();
let mut current_address = start;
let mut referenced_symbols = vec![];
while current_address < end {
let function_addr = read_address(obj, ctors_section, current_address.address)?;
log::debug!("Found {} entry: {:#010X}", ctors_section.name, function_addr);
let text_section = &obj.sections[function_addr.section];
let Some((function_symbol_idx, function_symbol)) = obj.symbols.kind_at_section_address(
function_addr.section,
function_addr.address,
ObjSymbolKind::Function,
)?
else {
bail!("Failed to find function symbol @ {:#010X}", function_addr);
};
referenced_symbols.push(function_symbol_idx);
let ctors_split = ctors_section.splits.for_address(current_address.address);
let function_split = text_section.splits.for_address(function_addr.address);
let mut expected_unit = None;
if let Some((_, ctors_split)) = ctors_split {
expected_unit = Some(ctors_split.unit.clone());
}
if let Some((_, function_split)) = function_split {
if let Some(unit) = &expected_unit {
ensure!(
unit == &function_split.unit,
"Mismatched splits for {} {:#010X} ({}) and function {:#010X} ({})",
ctors_section.name,
current_address,
unit,
function_addr,
function_split.unit
);
} else {
expected_unit = Some(function_split.unit.clone());
}
}
if ctors_split.is_none() || function_split.is_none() {
let unit = expected_unit.unwrap_or_else(|| {
let section_name = function_symbol
.section
.and_then(|idx| obj.sections.get(idx).map(|s| s.name.clone()))
.unwrap_or_else(|| "unknown".to_string());
format!("auto_{}_{}", function_symbol.name, section_name.trim_start_matches('.'))
});
log::debug!("Adding splits to unit {}", unit);
if ctors_split.is_none() {
log::debug!(
"Adding split for {} entry @ {:#010X}",
ctors_section.name,
current_address
);
new_splits.insert(current_address, ObjSplit {
unit: unit.clone(),
end: current_address.address + 4,
align: None,
common: false,
autogenerated: true,
skip: false,
rename: None,
});
}
if function_split.is_none() {
log::debug!("Adding split for function @ {:#010X}", function_addr);
new_splits.insert(function_addr, ObjSplit {
unit,
end: function_addr.address + function_symbol.size as u32,
align: None,
common: false,
autogenerated: true,
skip: false,
rename: None,
});
}
}
current_address += 4;
}
for (addr, split) in new_splits {
obj.add_split(addr.section, addr.address, split)?;
}
// Hack to avoid deadstripping
for symbol_idx in referenced_symbols {
obj.symbols.flags(symbol_idx).set_force_active(true);
}
Ok(())
}
/// Create splits for extabindex + extab entries.
fn split_extabindex(obj: &mut ObjInfo, start: SectionAddress) -> Result<()> {
let section = &obj.sections[start.section];
let mut new_splits = BTreeMap::<SectionAddress, ObjSplit>::new();
let (_, eti_init_info) = obj
.symbols
.by_name("_eti_init_info")?
.ok_or_else(|| anyhow!("Failed to find _eti_init_info symbol"))?;
ensure!(
eti_init_info.section == Some(start.section),
"_eti_init_info symbol in the wrong section: {:?} != {}",
eti_init_info.section,
start.section
);
let (extab_section_index, extab_section) =
obj.sections.by_name("extab")?.ok_or_else(|| anyhow!("Failed to find extab section"))?;
let mut current_address = start;
let section_end = eti_init_info.address as u32;
while current_address.address < section_end {
let function_addr = read_address(obj, section, current_address.address)?;
let function_size = read_u32(section, current_address.address + 4).with_context(|| {
format!(
"Failed to read extabindex entry function size @ {:#010X}",
current_address.address + 4
)
})?;
let extab_addr = read_address(obj, section, current_address.address + 8)?;
ensure!(
extab_addr.section == extab_section_index,
"extabindex entry @ {:#010X} has invalid extab address {:#010X}",
current_address,
extab_addr
);
log::debug!(
"Found extabindex entry: {:#010X} size {:#010X} extab {:#010X}",
function_addr,
function_size,
extab_addr
);
let Some((_, eti_symbol)) = obj.symbols.kind_at_section_address(
current_address.section,
current_address.address,
ObjSymbolKind::Object,
)?
else {
bail!("Failed to find extabindex symbol @ {:#010X}", current_address);
};
ensure!(
eti_symbol.size_known && eti_symbol.size == 12,
"extabindex symbol {} has mismatched size ({:#X}, expected {:#X})",
eti_symbol.name,
eti_symbol.size,
12
);
let text_section = &obj.sections[function_addr.section];
let Some((_, function_symbol)) = obj.symbols.kind_at_section_address(
function_addr.section,
function_addr.address,
ObjSymbolKind::Function,
)?
else {
bail!("Failed to find function symbol @ {:#010X}", function_addr);
};
ensure!(
function_symbol.size_known && function_symbol.size == function_size as u64,
"Function symbol {} has mismatched size ({:#X}, expected {:#X})",
function_symbol.name,
function_symbol.size,
function_size
);
let Some((_, extab_symbol)) = obj.symbols.kind_at_section_address(
extab_addr.section,
extab_addr.address,
ObjSymbolKind::Object,
)?
else {
bail!("Failed to find extab symbol @ {:#010X}", extab_addr);
};
ensure!(
extab_symbol.size_known && extab_symbol.size > 0,
"extab symbol {} has unknown size",
extab_symbol.name
);
let extabindex_split = section.splits.for_address(current_address.address);
let extab_split = extab_section.splits.for_address(extab_addr.address);
let function_split = text_section.splits.for_address(function_addr.address);
let mut expected_unit = None;
if let Some((_, extabindex_split)) = extabindex_split {
expected_unit = Some(extabindex_split.unit.clone());
}
if let Some((_, extab_split)) = extab_split {
if let Some(unit) = &expected_unit {
ensure!(
unit == &extab_split.unit,
"Mismatched splits for extabindex {:#010X} ({}) and extab {:#010X} ({})",
current_address,
unit,
extab_addr,
extab_split.unit
);
} else {
expected_unit = Some(extab_split.unit.clone());
}
}
if let Some((_, function_split)) = function_split {
if let Some(unit) = &expected_unit {
ensure!(
unit == &function_split.unit,
"Mismatched splits for extabindex {:#010X} ({}) and function {:#010X} ({})",
current_address,
unit,
function_addr,
function_split.unit
);
} else {
expected_unit = Some(function_split.unit.clone());
}
}
if extabindex_split.is_none() || extab_split.is_none() || function_split.is_none() {
let unit = expected_unit.unwrap_or_else(|| {
let section_name = function_symbol
.section
.and_then(|idx| obj.sections.get(idx).map(|s| s.name.clone()))
.unwrap_or_else(|| "unknown".to_string());
format!("auto_{}_{}", function_symbol.name, section_name.trim_start_matches('.'))
});
log::debug!("Adding splits to unit {}", unit);
if extabindex_split.is_none() {
let end = current_address + 12;
log::debug!(
"Adding split for extabindex entry @ {:#010X}-{:#010X}",
current_address,
end
);
new_splits.insert(current_address, ObjSplit {
unit: unit.clone(),
end: end.address,
align: None,
common: false,
autogenerated: true,
skip: false,
rename: None,
});
}
if extab_split.is_none() {
let end = extab_addr + extab_symbol.size as u32;
log::debug!("Adding split for extab @ {:#010X}-{:#010X}", extab_addr, end);
new_splits.insert(extab_addr, ObjSplit {
unit: unit.clone(),
end: end.address,
align: None,
common: false,
autogenerated: true,
skip: false,
rename: None,
});
}
if function_split.is_none() {
let end = function_addr + function_symbol.size as u32;
log::debug!("Adding split for function @ {:#010X}-{:#010X}", function_addr, end);
new_splits.insert(function_addr, ObjSplit {
unit,
end: end.address,
align: None,
common: false,
autogenerated: true,
skip: false,
rename: None,
});
}
}
current_address += 12;
}
for (addr, split) in new_splits {
obj.add_split(addr.section, addr.address, split)?;
}
Ok(())
}
/// Create splits for gaps between existing splits.
fn create_gap_splits(obj: &mut ObjInfo) -> Result<()> {
let mut new_splits = BTreeMap::<SectionAddress, ObjSplit>::new();
for (section_index, section) in obj.sections.iter() {
let mut current_address = SectionAddress::new(section_index, section.address as u32);
let section_end = end_for_section(obj, section_index)?;
let mut file_iter = section
.splits
.for_range(..section_end.address)
.map(|(addr, split)| (SectionAddress::new(section_index, addr), split))
.peekable();
log::debug!(
"Checking splits for section {} ({:#010X}..{:#010X})",
section.name,
current_address,
section_end
);
loop {
if current_address >= section_end {
break;
}
let (split_start, split_end, split_align) = match file_iter.peek() {
Some(&(addr, split)) => {
log::debug!("Found split {} ({:#010X}..{:#010X})", split.unit, addr, split.end);
(
addr,
SectionAddress::new(section_index, split.end),
split.alignment(obj, section_index, section, addr.address),
)
}
None => (
section_end,
SectionAddress::new(section_index, 0),
default_section_align(section) as u32,
),
};
ensure!(
split_start >= current_address,
"Split {:#010X}..{:#010X} overlaps with previous split",
split_start,
split_end
);
let aligned_addr = current_address.align_up(split_align);
if split_start > aligned_addr {
log::debug!(
"Creating auto split: {} > {} (orig: {}, align {})",
split_start,
aligned_addr,
current_address,
split_align
);
// Find any duplicate symbols in this range
let mut new_split_end = split_start;
let symbols = obj
.symbols
.for_section_range(section_index, current_address.address..split_start.address)
.collect_vec();
let mut existing_symbols = HashSet::new();
for &(_, symbol) in &symbols {
if !existing_symbols.insert(symbol.name.clone()) {
log::debug!(
"Found duplicate symbol {} at {:#010X}",
symbol.name,
symbol.address
);
new_split_end.address = symbol.address as u32;
break;
}
}
ensure!(
new_split_end > current_address,
"Duplicate symbols at {:#010X}: {:?}",
current_address,
symbols
.iter()
.filter(|(_, s)| s.address == current_address.address as u64)
.collect_vec(),
);
log::debug!(
"Creating split from {:#010X}..{:#010X}",
current_address,
new_split_end
);
let unit = format!(
"auto_{:02}_{:08X}_{}",
current_address.section,
current_address.address,
section.name.trim_start_matches('.')
);
new_splits.insert(current_address, ObjSplit {
unit: unit.clone(),
end: new_split_end.address,
align: None,
common: false,
autogenerated: true,
skip: false,
rename: None,
});
current_address = new_split_end;
continue;
}
file_iter.next();
if split_end.address > 0 {
current_address = split_end;
} else {
let mut file_end = section_end;
if let Some(&(next_addr, _next_split)) = file_iter.peek() {
file_end = min(next_addr, section_end);
}
current_address = file_end;
}
}
}
// Add new splits
for (addr, split) in new_splits {
obj.add_split(addr.section, addr.address, split)?;
}
Ok(())
}
/// Ensures that all .bss splits following a common split are also marked as common.
fn update_common_splits(obj: &mut ObjInfo, common_start: Option<u32>) -> Result<()> {
let Some((bss_section_index, bss_section)) = obj.sections.by_name(".bss")? else {
return Ok(());
};
let Some(common_bss_start) = common_start.or_else(|| {
bss_section.splits.iter().find(|(_, split)| split.common).map(|(addr, _)| addr)
}) else {
return Ok(());
};
log::debug!("Found common BSS start at {:#010X}", common_bss_start);
let bss_section = &mut obj.sections[bss_section_index];
for (addr, split) in bss_section.splits.for_range_mut(common_bss_start..) {
if !split.common {
split.common = true;
log::debug!("Added common flag to split {} at {:#010X}", split.unit, addr);
}
}
Ok(())
}
/// Final validation of splits.
fn validate_splits(obj: &ObjInfo) -> Result<()> {
let mut last_split_end = SectionAddress::new(0, 0);
for (section_index, section, addr, split) in obj.sections.all_splits() {
let split_addr = SectionAddress::new(section_index, addr);
ensure!(
split_addr >= last_split_end,
"Split {} {} {:#010X}..{:#010X} overlaps with previous split",
split.unit,
section.name,
addr,
split.end
);
ensure!(
split.end > 0 && split.end >= addr,
"Invalid split end {} {} {:#010X}..{:#010X}",
split.unit,
section.name,
addr,
split.end
);
last_split_end = SectionAddress::new(section_index, split.end);
if let Some((_, symbol)) = obj
.symbols
.for_section_range(section_index, ..addr)
.filter(|&(_, s)| s.size_known && s.size > 0)
.next_back()
{
ensure!(
addr >= symbol.address as u32 + symbol.size as u32,
"Split {} {} {:#010X}..{:#010X} overlaps symbol '{}' {:#010X}..{:#010X}",
split.unit,
section.name,
addr,
split.end,
symbol.name,
symbol.address,
symbol.address + symbol.size
);
}
if let Some((_, symbol)) = obj
.symbols
.for_section_range(section_index, ..split.end)
.filter(|&(_, s)| s.size_known && s.size > 0)
.next_back()
{
ensure!(
split.end >= symbol.address as u32 + symbol.size as u32,
"Split {} {} ({:#010X}..{:#010X}) ends within symbol '{}' ({:#010X}..{:#010X})",
split.unit,
section.name,
addr,
split.end,
symbol.name,
symbol.address,
symbol.address + symbol.size
);
}
}
Ok(())
}
/// Add padding symbols to fill in gaps between splits and symbols.
fn add_padding_symbols(obj: &mut ObjInfo) -> Result<()> {
for (section_index, section, addr, _split) in obj.sections.all_splits() {
if section.name == ".ctors" || section.name == ".dtors" {
continue;
}
if obj
.symbols
.kind_at_section_address(section_index, addr, match section.kind {
ObjSectionKind::Code => ObjSymbolKind::Function,
ObjSectionKind::Data => ObjSymbolKind::Object,
ObjSectionKind::ReadOnlyData => ObjSymbolKind::Object,
ObjSectionKind::Bss => ObjSymbolKind::Object,
})?
.is_none()
{
let next_symbol_address = obj
.symbols
.for_section_range(section_index, addr + 1..)
.find(|&(_, s)| s.size_known && s.size > 0)
.map(|(_, s)| s.address)
.unwrap_or(section.address + section.size);
let symbol_name = format!(
"pad_{:02}_{:08X}_{}",
section_index,
addr,
section.name.trim_start_matches('.')
);
log::debug!("Adding padding symbol {} at {:#010X}", symbol_name, addr);
obj.symbols.add_direct(ObjSymbol {
name: symbol_name,
address: addr as u64,
section: Some(section_index),
size: next_symbol_address - addr as u64,
size_known: true,
flags: ObjSymbolFlagSet(
ObjSymbolFlags::Local | ObjSymbolFlags::ForceActive | ObjSymbolFlags::NoWrite,
),
kind: match section.kind {
ObjSectionKind::Code => ObjSymbolKind::Function,
ObjSectionKind::Data | ObjSectionKind::ReadOnlyData | ObjSectionKind::Bss => {
ObjSymbolKind::Object
}
},
..Default::default()
})?;
}
}
// Add padding symbols for gaps between symbols
for (section_index, section) in obj.sections.iter() {
if section.name == ".ctors" || section.name == ".dtors" {
continue;
}
let mut to_add = vec![];
let mut iter = obj
.symbols
.for_section(section_index)
.filter(|(_, s)| s.size_known && s.size > 0)
.peekable();
while let (Some((_, symbol)), Some(&(_, next_symbol))) = (iter.next(), iter.peek()) {
let aligned_end =
align_up((symbol.address + symbol.size) as u32, next_symbol.align.unwrap_or(1));
match aligned_end.cmp(&(next_symbol.address as u32)) {
Ordering::Less => {
let symbol_name = format!(
"gap_{:02}_{:08X}_{}",
section_index,
aligned_end,
section.name.trim_start_matches('.')
);
log::debug!("Adding gap symbol {} at {:#010X}", symbol_name, aligned_end);
to_add.push(ObjSymbol {
name: symbol_name,
address: aligned_end as u64,
section: Some(section_index),
size: next_symbol.address - aligned_end as u64,
size_known: true,
flags: ObjSymbolFlagSet(
ObjSymbolFlags::Global
| ObjSymbolFlags::Hidden
| ObjSymbolFlags::ForceActive
| ObjSymbolFlags::NoWrite,
),
kind: match section.kind {
ObjSectionKind::Code => ObjSymbolKind::Function,
ObjSectionKind::Data
| ObjSectionKind::ReadOnlyData
| ObjSectionKind::Bss => ObjSymbolKind::Object,
},
..Default::default()
});
}
Ordering::Equal => {}
Ordering::Greater => {
bail!(
"Symbol {} ({:#010X}..{:#010X}) overlaps with symbol {} ({:#010X}..{:#010X}, align {})",
symbol.name,
symbol.address,
symbol.address + symbol.size,
next_symbol.name,
next_symbol.address,
next_symbol.address + next_symbol.size,
next_symbol.align.unwrap_or(1)
);
}
}
}
drop(iter);
for symbol in to_add {
obj.symbols.add_direct(symbol)?;
}
}
Ok(())
}
#[allow(dead_code)]
fn trim_split_alignment(obj: &mut ObjInfo) -> Result<()> {
// For each split, set the end of split to the end of the last symbol in the split.
let mut split_updates = vec![];
let mut iter = obj.sections.all_splits().peekable();
while let Some((section_index, section, addr, split)) = iter.next() {
let next_split = iter
.peek()
.filter(|&&(idx, _, _, _)| section_index == idx)
.map(|&(_, _, addr, split)| (addr, split));
let mut split_end = split.end;
if let Some((_, symbol)) = obj
.symbols
.for_section_range(section_index, addr..split.end)
.filter(|&(_, s)| s.size_known && s.size > 0)
.next_back()
{
split_end = symbol.address as u32 + symbol.size as u32;
}
split_end = align_up(split_end, split.alignment(obj, section_index, section, addr));
if split_end < split.end {
if let Some((next_addr, next_split)) = next_split {
let next_split_align = next_split.alignment(obj, section_index, section, addr);
if align_up(split_end, next_split_align) < next_addr {
log::warn!(
"Tried to trim {} split {} {:#010X}..{:#010X} to {:#010X}, but next split {} starts at {:#010X} with alignment {}",
section.name,
split.unit,
addr,
split.end,
split_end,
next_split.unit,
next_addr,
next_split_align
);
}
}
log::info!(
"Trimming {} split {} {:#010X}..{:#010X} to {:#010X}",
section.name,
split.unit,
addr,
split.end,
split_end
);
split_updates.push((section_index, addr, split_end));
}
}
drop(iter);
for (section_index, addr, split_end) in split_updates {
obj.sections[section_index].splits.at_mut(addr).unwrap().end = split_end;
}
Ok(())
}
/// Trim splits if they contain linker generated symbols.
fn trim_linker_generated_symbols(obj: &mut ObjInfo) -> Result<()> {
for section_index in 0..obj.sections.count() {
let section_end = end_for_section(obj, section_index)?;
let section = &mut obj.sections[section_index];
if section.address as u32 + section.size as u32 == section_end.address {
continue;
}
if let Some((addr, split)) = section.splits.iter_mut().next_back() {
if split.end > section_end.address {
log::debug!(
"Trimming split {} {:#010X}..{:#010X} to {:#010X}",
split.unit,
addr,
split.end,
section_end.address
);
split.end = section_end.address;
}
}
}
Ok(())
}
/// Perform any necessary adjustments to allow relinking.
/// This includes:
/// - Ensuring .ctors & .dtors entries are split with their associated function
/// - Ensuring extab & extabindex entries are split with their associated function
/// - Creating splits for gaps between existing splits
/// - Resolving a new object link order
#[instrument(level = "debug", skip(obj))]
pub fn update_splits(obj: &mut ObjInfo, common_start: Option<u32>, fill_gaps: bool) -> Result<()> {
// Create splits for extab and extabindex entries
if let Some((section_index, section)) = obj.sections.by_name("extabindex")? {
let start = SectionAddress::new(section_index, section.address as u32);
split_extabindex(obj, start)?;
}
// Create splits for .ctors entries
if let Some((section_index, section)) = obj.sections.by_name(".ctors")? {
let start = SectionAddress::new(section_index, section.address as u32);
let end = start + (section.size as u32 - 4);
split_ctors_dtors(obj, start, end)?;
}
// Create splits for .dtors entries
if let Some((section_index, section)) = obj.sections.by_name(".dtors")? {
let mut start = SectionAddress::new(section_index, section.address as u32);
let end = start + (section.size as u32 - 4);
if obj.kind == ObjKind::Executable {
// Skip __destroy_global_chain_reference
start += 4;
}
split_ctors_dtors(obj, start, end)?;
}
// Remove linker generated symbols from splits
trim_linker_generated_symbols(obj)?;
// Create gap splits
create_gap_splits(obj)?;
// Update common BSS splits
update_common_splits(obj, common_start)?;
// Ensure splits don't overlap symbols or each other
validate_splits(obj)?;
// Trim alignment from splits
// TODO figure out mwld pooled data alignment
// trim_split_alignment(obj)?;
if fill_gaps {
// Add symbols to beginning of any split that doesn't start with a symbol
add_padding_symbols(obj)?;
}
// Resolve link order
obj.link_order = resolve_link_order(obj)?;
Ok(())
}
/// The ordering of TUs inside of each section represents a directed edge in a DAG.
/// We can use a topological sort to determine a valid global TU order.
/// There can be ambiguities, but any solution that satisfies the link order
/// constraints is considered valid.
#[instrument(level = "debug", skip(obj))]
fn resolve_link_order(obj: &ObjInfo) -> Result<Vec<ObjUnit>> {
#[allow(dead_code)]
#[derive(Debug, Copy, Clone)]
struct SplitEdge {
from: u32,
to: u32,
}
let mut graph = Graph::<String, SplitEdge>::new();
let mut unit_to_index_map = BTreeMap::<String, NodeIndex>::new();
for (_, _, _, split) in obj.sections.all_splits() {
unit_to_index_map.insert(split.unit.clone(), NodeIndex::new(0));
}
for (unit, index) in unit_to_index_map.iter_mut() {
*index = graph.add_node(unit.clone());
}
for (_section_index, section) in obj.sections.iter() {
let mut iter = section.splits.iter().peekable();
if section.name == ".ctors" || section.name == ".dtors" {
// Skip __init_cpp_exceptions.o
let skipped = iter.next();
log::debug!("Skipping split {:?} (next: {:?})", skipped, iter.peek());
}
while let (Some((a_addr, a)), Some(&(b_addr, b))) = (iter.next(), iter.peek()) {
if !a.common && b.common {
// This marks the beginning of the common BSS section.
continue;
}
if a.unit != b.unit {
log::debug!(
"Adding dependency {} ({:#010X}) -> {} ({:#010X})",
a.unit,
a_addr,
b.unit,
b_addr
);
let a_index = *unit_to_index_map.get(&a.unit).unwrap();
let b_index = *unit_to_index_map.get(&b.unit).unwrap();
graph.add_edge(a_index, b_index, SplitEdge { from: a_addr, to: b_addr });
}
}
}
// use petgraph::{
// dot::{Config, Dot},
// graph::EdgeReference,
// };
// let get_edge_attributes = |_, e: EdgeReference<SplitEdge>| {
// let &SplitEdge { from, to } = e.weight();
// let section_name = &obj.section_at(from).unwrap().name;
// format!("label=\"{} {:#010X} -> {:#010X}\"", section_name, from, to)
// };
// let dot = Dot::with_attr_getters(
// &graph,
// &[Config::EdgeNoLabel, Config::NodeNoLabel],
// &get_edge_attributes,
// &|_, (_, s)| format!("label=\"{}\"", s),
// );
// println!("{:?}", dot);
match petgraph::algo::toposort(&graph, None) {
Ok(vec) => Ok(vec
.iter()
.map(|&idx| {
let name = &graph[idx];
if let Some(existing) = obj.link_order.iter().find(|u| &u.name == name) {
existing.clone()
} else {
ObjUnit {
name: name.clone(),
autogenerated: obj.is_unit_autogenerated(name),
comment_version: None,
}
}
})
.collect_vec()),
Err(e) => Err(anyhow!(
"Cyclic dependency (involving {}) encountered while resolving link order",
graph[e.node_id()]
)),
}
}
/// Split an object into multiple relocatable objects.
#[instrument(level = "debug", skip(obj))]
pub fn split_obj(obj: &ObjInfo) -> Result<Vec<ObjInfo>> {
let mut objects: Vec<ObjInfo> = vec![];
let mut object_symbols: Vec<Vec<Option<usize>>> = vec![];
let mut name_to_obj: HashMap<String, usize> = HashMap::new();
for unit in &obj.link_order {
name_to_obj.insert(unit.name.clone(), objects.len());
object_symbols.push(vec![None; obj.symbols.count()]);
let mut split_obj = ObjInfo::new(
ObjKind::Relocatable,
ObjArchitecture::PowerPc,
unit.name.clone(),
vec![],
vec![],
);
if let Some(comment_version) = unit.comment_version {
if comment_version > 0 {
split_obj.mw_comment = Some(MWComment::new(comment_version)?);
}
} else {
split_obj.mw_comment = obj.mw_comment.clone();
}
objects.push(split_obj);
}
for (section_index, section) in obj.sections.iter() {
let mut current_address = SectionAddress::new(section_index, section.address as u32);
let section_end = end_for_section(obj, section_index)?;
let mut split_iter = section
.splits
.for_range(current_address.address..section_end.address)
.map(|(addr, split)| (SectionAddress::new(section_index, addr), split))
.peekable();
loop {
if current_address >= section_end {
break;
}
let (split_addr, split) = match split_iter.next() {
Some((addr, split)) => (addr, split),
None => bail!("No split found"),
};
ensure!(
split_addr == current_address,
"Split @ {} {} not found",
section.name,
current_address
);
let split_end = SectionAddress::new(section_index, split.end);
let next_addr = split_iter.peek().map(|&(addr, _)| addr).unwrap_or(section_end);
if next_addr > split_end
&& section.data_range(split_end.address, next_addr.address)?.iter().any(|&b| b != 0)
{
bail!(
"Unsplit data in {} from {} {} to next split {}",
section.name,
split.unit,
split_end,
next_addr
);
}
// Skip over this data
if split.skip {
current_address = next_addr;
continue;
}
let split_obj = name_to_obj
.get(&split.unit)
.and_then(|&idx| objects.get_mut(idx))
.ok_or_else(|| anyhow!("Unit '{}' not in link order", split.unit))?;
let symbol_idxs = name_to_obj
.get(&split.unit)
.and_then(|&idx| object_symbols.get_mut(idx))
.ok_or_else(|| anyhow!("Unit '{}' not in link order", split.unit))?;
// Calculate & verify section alignment
let mut align = split.align.unwrap_or_else(|| {
let default_align = default_section_align(section) as u32;
max(
// Maximum alignment of any symbol in this split
obj.symbols
.for_section_range(
section_index,
current_address.address..split_end.address,
)
.filter(|&(_, s)| s.size_known && s.size > 0)
.filter_map(|(_, s)| s.align)
.max()
.unwrap_or(default_align),
default_align,
)
}) as u64;
if current_address & (align as u32 - 1) != 0 {
log::warn!(
"Alignment for {} {} expected {}, but starts at {:#010X}",
split.unit,
section.name,
align,
current_address
);
while align > 4 {
align /= 2;
if current_address & (align as u32 - 1) == 0 {
break;
}
}
}
ensure!(
current_address & (align as u32 - 1) == 0,
"Invalid alignment for split: {} {} {:#010X}",
split.unit,
section.name,
current_address
);
// Collect relocations; target_symbol will be updated later
let out_relocations = section
.relocations
.range(current_address.address..split_end.address)
.map(|(addr, o)| {
(addr - current_address.address, ObjReloc {
kind: o.kind,
target_symbol: o.target_symbol,
addend: o.addend,
module: o.module,
})
})
.collect_vec();
// Add section symbols
let out_section_idx = split_obj.sections.next_section_index();
for (symbol_idx, symbol) in obj
.symbols
.for_section_range(section_index, current_address.address..=split_end.address)
.filter(|&(_, s)| {
s.section == Some(section_index) && !is_linker_generated_label(&s.name)
})
{
if symbol_idxs[symbol_idx].is_some() {
continue; // should never happen?
}
// TODO hack for gTRKInterruptVectorTableEnd
if (symbol.address == split_end.address as u64
&& symbol.name != "gTRKInterruptVectorTableEnd")
|| (symbol.address == current_address.address as u64
&& symbol.name == "gTRKInterruptVectorTableEnd")
{
continue;
}
symbol_idxs[symbol_idx] = Some(split_obj.symbols.add_direct(ObjSymbol {
name: symbol.name.clone(),
demangled_name: symbol.demangled_name.clone(),
address: if split.common {
symbol.align.unwrap_or(4) as u64
} else {
symbol.address - current_address.address as u64
},
section: if split.common { None } else { Some(out_section_idx) },
size: symbol.size,
size_known: symbol.size_known,
flags: if split.common {
ObjSymbolFlagSet(ObjSymbolFlags::Common.into())
} else {
symbol.flags
},
kind: symbol.kind,
align: symbol.align,
data_kind: symbol.data_kind,
name_hash: symbol.name_hash,
demangled_name_hash: symbol.demangled_name_hash,
})?);
}
// For mwldeppc 2.7 and above, a .comment section is required to link without error
// when common symbols are present. Automatically add one if needed.
if split.common && split_obj.mw_comment.is_none() {
split_obj.mw_comment = Some(MWComment::new(8)?);
}
if !split.common {
let data = match section.kind {
ObjSectionKind::Bss => vec![],
_ => section.data[(current_address.address as u64 - section.address) as usize
..(split_end.address as u64 - section.address) as usize]
.to_vec(),
};
split_obj.sections.push(ObjSection {
name: split.rename.as_ref().unwrap_or(&section.name).clone(),
kind: section.kind,
address: 0,
size: split_end.address as u64 - current_address.address as u64,
data,
align,
elf_index: out_section_idx + 1,
relocations: ObjRelocations::new(out_relocations)?,
original_address: current_address.address as u64,
file_offset: section.file_offset
+ (current_address.address as u64 - section.address),
section_known: true,
splits: Default::default(),
});
}
current_address = next_addr;
}
}
// Update relocations
let mut globalize_symbols = vec![];
for (obj_idx, out_obj) in objects.iter_mut().enumerate() {
let symbol_idxs = &mut object_symbols[obj_idx];
for (_section_index, section) in out_obj.sections.iter_mut() {
for (reloc_address, reloc) in section.relocations.iter_mut() {
match symbol_idxs[reloc.target_symbol] {
Some(out_sym_idx) => {
reloc.target_symbol = out_sym_idx;
}
None => {
// Extern
let out_sym_idx = out_obj.symbols.count();
let target_sym = &obj.symbols[reloc.target_symbol];
// If the symbol is local, we'll upgrade the scope to global
// and rename it to avoid conflicts
if target_sym.flags.is_local() {
let address_str = if obj.module_id == 0 {
format!("{:08X}", target_sym.address)
} else if let Some(section_index) = target_sym.section {
let target_section = &obj.sections[section_index];
format!(
"{}_{}_{:X}",
obj.module_id,
target_section.name.trim_start_matches('.'),
target_sym.address
)
} else {
bail!("Local symbol {} has no section", target_sym.name);
};
let new_name = if target_sym.name.ends_with(&address_str) {
target_sym.name.clone()
} else {
format!("{}_{}", target_sym.name, address_str)
};
globalize_symbols.push((reloc.target_symbol, new_name));
}
symbol_idxs[reloc.target_symbol] = Some(out_sym_idx);
out_obj.symbols.add_direct(ObjSymbol {
name: target_sym.name.clone(),
demangled_name: target_sym.demangled_name.clone(),
..Default::default()
})?;
reloc.target_symbol = out_sym_idx;
if section.name.as_str() == "extabindex" {
let Some((target_addr, target_split)) =
section.splits.for_address(target_sym.address as u32)
else {
bail!(
"Bad extabindex relocation @ {:#010X}",
reloc_address as u64 + section.original_address
);
};
let target_section = &obj.sections.at_address(target_addr)?.1.name;
log::error!(
"Bad extabindex relocation @ {:#010X}\n\
\tSource object: {}:{:#010X} ({})\n\
\tTarget object: {}:{:#010X} ({})\n\
\tTarget symbol: {:#010X} ({})\n\
This will cause the linker to crash.\n",
reloc_address as u64 + section.original_address,
section.name,
section.original_address,
out_obj.name,
target_section,
target_addr,
target_split.unit,
target_sym.address,
target_sym.demangled_name.as_deref().unwrap_or(&target_sym.name),
);
}
}
}
}
}
}
// Upgrade local symbols to global if necessary
for (obj, symbol_map) in objects.iter_mut().zip(&object_symbols) {
for (globalize_idx, new_name) in &globalize_symbols {
if let Some(symbol_idx) = symbol_map[*globalize_idx] {
let mut symbol = obj.symbols[symbol_idx].clone();
symbol.name = new_name.clone();
if symbol.flags.is_local() {
log::debug!("Globalizing {} in {}", symbol.name, obj.name);
symbol.flags.set_scope(ObjSymbolScope::Global);
}
obj.symbols.replace(symbol_idx, symbol)?;
}
}
}
// Extern linker generated symbols
for obj in &mut objects {
let mut replace_symbols = vec![];
for (symbol_idx, symbol) in obj.symbols.iter().enumerate() {
if is_linker_generated_label(&symbol.name) && symbol.section.is_some() {
log::debug!("Externing {:?} in {}", symbol, obj.name);
replace_symbols.push((symbol_idx, ObjSymbol {
name: symbol.name.clone(),
demangled_name: symbol.demangled_name.clone(),
..Default::default()
}));
}
}
for (symbol_idx, symbol) in replace_symbols {
obj.symbols.replace(symbol_idx, symbol)?;
}
}
Ok(objects)
}
/// mwld doesn't preserve the original section alignment values
pub fn default_section_align(section: &ObjSection) -> u64 {
match section.kind {
ObjSectionKind::Code => 4,
_ => match section.name.as_str() {
".ctors" | ".dtors" | "extab" | "extabindex" => 4,
".sbss" => 8, // ?
_ => 8,
},
}
}
/// Linker-generated symbols to extern
#[inline]
pub fn is_linker_generated_label(name: &str) -> bool {
matches!(
name,
"_ctors"
| "_dtors"
| "_f_init"
| "_f_init_rom"
| "_e_init"
| "_fextab"
| "_fextab_rom"
| "_eextab"
| "_fextabindex"
| "_fextabindex_rom"
| "_eextabindex"
| "_f_text"
| "_f_text_rom"
| "_e_text"
| "_f_ctors"
| "_f_ctors_rom"
| "_e_ctors"
| "_f_dtors"
| "_f_dtors_rom"
| "_e_dtors"
| "_f_rodata"
| "_f_rodata_rom"
| "_e_rodata"
| "_f_data"
| "_f_data_rom"
| "_e_data"
| "_f_sdata"
| "_f_sdata_rom"
| "_e_sdata"
| "_f_sbss"
| "_f_sbss_rom"
| "_e_sbss"
| "_f_sdata2"
| "_f_sdata2_rom"
| "_e_sdata2"
| "_f_sbss2"
| "_f_sbss2_rom"
| "_e_sbss2"
| "_f_bss"
| "_f_bss_rom"
| "_e_bss"
| "_f_stack"
| "_f_stack_rom"
| "_e_stack"
| "_stack_addr"
| "_stack_end"
| "_db_stack_addr"
| "_db_stack_end"
| "_heap_addr"
| "_heap_end"
| "_nbfunctions"
| "SIZEOF_HEADERS"
| "_SDA_BASE_"
| "_SDA2_BASE_"
| "_ABS_SDA_BASE_"
| "_ABS_SDA2_BASE_"
)
}
/// Linker generated objects to strip entirely
#[inline]
pub fn is_linker_generated_object(name: &str) -> bool {
matches!(
name,
"_eti_init_info" | "_rom_copy_info" | "_bss_init_info" | "_ctors$99" | "_dtors$99"
)
}
/// Locate the end address of a section when excluding linker generated objects
pub fn end_for_section(obj: &ObjInfo, section_index: usize) -> Result<SectionAddress> {
let section = obj
.sections
.get(section_index)
.ok_or_else(|| anyhow!("Invalid section index: {}", section_index))?;
let mut section_end = (section.address + section.size) as u32;
// .ctors and .dtors end with a linker-generated null pointer,
// adjust section size appropriately
if matches!(section.name.as_str(), ".ctors" | ".dtors")
&& section.data[section.data.len() - 4..] == [0u8; 4]
{
section_end -= 4;
}
loop {
let last_symbol = obj
.symbols
.for_section_range(section_index, ..section_end)
.filter(|(_, s)| s.kind == ObjSymbolKind::Object && s.size_known && s.size > 0)
.next_back();
match last_symbol {
Some((_, symbol)) if is_linker_generated_object(&symbol.name) => {
log::debug!(
"Found {}, adjusting section {} end {:#010X} -> {:#010X}",
section.name,
symbol.name,
section_end,
symbol.address
);
section_end = symbol.address as u32;
}
_ => break,
}
}
Ok(SectionAddress::new(section_index, section_end))
}