decomp-toolkit/src/util/split.rs

use std::{
    cmp::{max, min, Ordering},
    collections::{BTreeMap, HashMap, HashSet},
};

use anyhow::{anyhow, bail, ensure, Context, Result};
use itertools::Itertools;
use petgraph::{graph::NodeIndex, Graph};
use tracing_attributes::instrument;

use crate::{
    analysis::{cfa::SectionAddress, read_address, read_u32},
    obj::{
        ObjArchitecture, ObjInfo, ObjKind, ObjReloc, ObjRelocations, ObjSection, ObjSectionKind,
        ObjSplit, ObjSymbol, ObjSymbolFlagSet, ObjSymbolFlags, ObjSymbolKind, ObjSymbolScope,
        ObjUnit,
    },
    util::{align_up, comment::MWComment},
};

/// Create splits for function pointers in the given section.
fn split_ctors_dtors(obj: &mut ObjInfo, start: SectionAddress, end: SectionAddress) -> Result<()> {
    let ctors_section = &obj.sections[start.section];
    let mut new_splits = BTreeMap::<SectionAddress, ObjSplit>::new();
    let mut current_address = start;
    let mut referenced_symbols = vec![];

    while current_address < end {
        let function_addr = read_address(obj, ctors_section, current_address.address)?;
        log::debug!("Found {} entry: {:#010X}", ctors_section.name, function_addr);

        let text_section = &obj.sections[function_addr.section];
        let Some((function_symbol_idx, function_symbol)) = obj.symbols.kind_at_section_address(
            function_addr.section,
            function_addr.address,
            ObjSymbolKind::Function,
        )?
        else {
            bail!("Failed to find function symbol @ {:#010X}", function_addr);
        };
        referenced_symbols.push(function_symbol_idx);

        let ctors_split = ctors_section.splits.for_address(current_address.address);
        let function_split = text_section.splits.for_address(function_addr.address);

        let mut expected_unit = None;
        if let Some((_, ctors_split)) = ctors_split {
            expected_unit = Some(ctors_split.unit.clone());
        }
        if let Some((_, function_split)) = function_split {
            if let Some(unit) = &expected_unit {
                ensure!(
                    unit == &function_split.unit,
                    "Mismatched splits for {} {:#010X} ({}) and function {:#010X} ({})",
                    ctors_section.name,
                    current_address,
                    unit,
                    function_addr,
                    function_split.unit
                );
            } else {
                expected_unit = Some(function_split.unit.clone());
            }
        }

        if ctors_split.is_none() || function_split.is_none() {
            let unit = expected_unit.unwrap_or_else(|| {
                let section_name = function_symbol
                    .section
                    .and_then(|idx| obj.sections.get(idx).map(|s| s.name.clone()))
                    .unwrap_or_else(|| "unknown".to_string());
                format!("auto_{}_{}", function_symbol.name, section_name.trim_start_matches('.'))
            });
            log::debug!("Adding splits to unit {}", unit);

            if ctors_split.is_none() {
                log::debug!(
                    "Adding split for {} entry @ {:#010X}",
                    ctors_section.name,
                    current_address
                );
                new_splits.insert(current_address, ObjSplit {
                    unit: unit.clone(),
                    end: current_address.address + 4,
                    align: None,
                    common: false,
                    autogenerated: true,
                    skip: false,
                    rename: None,
                });
            }
            if function_split.is_none() {
                log::debug!("Adding split for function @ {:#010X}", function_addr);
                new_splits.insert(function_addr, ObjSplit {
                    unit,
                    end: function_addr.address + function_symbol.size as u32,
                    align: None,
                    common: false,
                    autogenerated: true,
                    skip: false,
                    rename: None,
                });
            }
        }

        current_address += 4;
    }

    for (addr, split) in new_splits {
        obj.add_split(addr.section, addr.address, split)?;
    }

    // Hack to avoid deadstripping
    for symbol_idx in referenced_symbols {
        obj.symbols.flags(symbol_idx).set_force_active(true);
    }

    Ok(())
}

/// Create splits for extabindex + extab entries.
fn split_extabindex(obj: &mut ObjInfo, start: SectionAddress) -> Result<()> {
    let section = &obj.sections[start.section];
    let mut new_splits = BTreeMap::<SectionAddress, ObjSplit>::new();
    let (_, eti_init_info) = obj
        .symbols
        .by_name("_eti_init_info")?
        .ok_or_else(|| anyhow!("Failed to find _eti_init_info symbol"))?;
    ensure!(
        eti_init_info.section == Some(start.section),
        "_eti_init_info symbol in the wrong section: {:?} != {}",
        eti_init_info.section,
        start.section
    );

    let (extab_section_index, extab_section) =
        obj.sections.by_name("extab")?.ok_or_else(|| anyhow!("Failed to find extab section"))?;

    let mut current_address = start;
    let section_end = eti_init_info.address as u32;
    while current_address.address < section_end {
        let function_addr = read_address(obj, section, current_address.address)?;
        let function_size = read_u32(section, current_address.address + 4).with_context(|| {
            format!(
                "Failed to read extabindex entry function size @ {:#010X}",
                current_address.address + 4
            )
        })?;
        let extab_addr = read_address(obj, section, current_address.address + 8)?;
        ensure!(
            extab_addr.section == extab_section_index,
            "extabindex entry @ {:#010X} has invalid extab address {:#010X}",
            current_address,
            extab_addr
        );
        log::debug!(
            "Found extabindex entry: {:#010X} size {:#010X} extab {:#010X}",
            function_addr,
            function_size,
            extab_addr
        );

        let Some((_, eti_symbol)) = obj.symbols.kind_at_section_address(
            current_address.section,
            current_address.address,
            ObjSymbolKind::Object,
        )?
        else {
            bail!("Failed to find extabindex symbol @ {:#010X}", current_address);
        };
        ensure!(
            eti_symbol.size_known && eti_symbol.size == 12,
            "extabindex symbol {} has mismatched size ({:#X}, expected {:#X})",
            eti_symbol.name,
            eti_symbol.size,
            12
        );

        let text_section = &obj.sections[function_addr.section];
        let Some((_, function_symbol)) = obj.symbols.kind_at_section_address(
            function_addr.section,
            function_addr.address,
            ObjSymbolKind::Function,
        )?
        else {
            bail!("Failed to find function symbol @ {:#010X}", function_addr);
        };
        ensure!(
            function_symbol.size_known && function_symbol.size == function_size as u64,
            "Function symbol {} has mismatched size ({:#X}, expected {:#X})",
            function_symbol.name,
            function_symbol.size,
            function_size
        );

        let Some((_, extab_symbol)) = obj.symbols.kind_at_section_address(
            extab_addr.section,
            extab_addr.address,
            ObjSymbolKind::Object,
        )?
        else {
            bail!("Failed to find extab symbol @ {:#010X}", extab_addr);
        };
        ensure!(
            extab_symbol.size_known && extab_symbol.size > 0,
            "extab symbol {} has unknown size",
            extab_symbol.name
        );

        let extabindex_split = section.splits.for_address(current_address.address);
        let extab_split = extab_section.splits.for_address(extab_addr.address);
        let function_split = text_section.splits.for_address(function_addr.address);

        let mut expected_unit = None;
        if let Some((_, extabindex_split)) = extabindex_split {
            expected_unit = Some(extabindex_split.unit.clone());
        }
        if let Some((_, extab_split)) = extab_split {
            if let Some(unit) = &expected_unit {
                ensure!(
                    unit == &extab_split.unit,
                    "Mismatched splits for extabindex {:#010X} ({}) and extab {:#010X} ({})",
                    current_address,
                    unit,
                    extab_addr,
                    extab_split.unit
                );
            } else {
                expected_unit = Some(extab_split.unit.clone());
            }
        }
        if let Some((_, function_split)) = function_split {
            if let Some(unit) = &expected_unit {
                ensure!(
                    unit == &function_split.unit,
                    "Mismatched splits for extabindex {:#010X} ({}) and function {:#010X} ({})",
                    current_address,
                    unit,
                    function_addr,
                    function_split.unit
                );
            } else {
                expected_unit = Some(function_split.unit.clone());
            }
        }

        if extabindex_split.is_none() || extab_split.is_none() || function_split.is_none() {
            let unit = expected_unit.unwrap_or_else(|| {
                let section_name = function_symbol
                    .section
                    .and_then(|idx| obj.sections.get(idx).map(|s| s.name.clone()))
                    .unwrap_or_else(|| "unknown".to_string());
                format!("auto_{}_{}", function_symbol.name, section_name.trim_start_matches('.'))
            });
            log::debug!("Adding splits to unit {}", unit);

            if extabindex_split.is_none() {
                let end = current_address + 12;
                log::debug!(
                    "Adding split for extabindex entry @ {:#010X}-{:#010X}",
                    current_address,
                    end
                );
                new_splits.insert(current_address, ObjSplit {
                    unit: unit.clone(),
                    end: end.address,
                    align: None,
                    common: false,
                    autogenerated: true,
                    skip: false,
                    rename: None,
                });
            }
            if extab_split.is_none() {
                let end = extab_addr + extab_symbol.size as u32;
                log::debug!("Adding split for extab @ {:#010X}-{:#010X}", extab_addr, end);
                new_splits.insert(extab_addr, ObjSplit {
                    unit: unit.clone(),
                    end: end.address,
                    align: None,
                    common: false,
                    autogenerated: true,
                    skip: false,
                    rename: None,
                });
            }
            if function_split.is_none() {
                let end = function_addr + function_symbol.size as u32;
                log::debug!("Adding split for function @ {:#010X}-{:#010X}", function_addr, end);
                new_splits.insert(function_addr, ObjSplit {
                    unit,
                    end: end.address,
                    align: None,
                    common: false,
                    autogenerated: true,
                    skip: false,
                    rename: None,
                });
            }
        }

        current_address += 12;
    }

    for (addr, split) in new_splits {
        obj.add_split(addr.section, addr.address, split)?;
    }

    Ok(())
}

/// Create splits for gaps between existing splits.
fn create_gap_splits(obj: &mut ObjInfo) -> Result<()> {
    let mut new_splits = BTreeMap::<SectionAddress, ObjSplit>::new();

    for (section_index, section) in obj.sections.iter() {
        let mut current_address = SectionAddress::new(section_index, section.address as u32);
        let section_end = end_for_section(obj, section_index)?;
        let mut file_iter = section
            .splits
            .for_range(..section_end.address)
            .map(|(addr, split)| (SectionAddress::new(section_index, addr), split))
            .peekable();

        log::debug!(
            "Checking splits for section {} ({:#010X}..{:#010X})",
            section.name,
            current_address,
            section_end
        );
        loop {
            if current_address >= section_end {
                break;
            }

            let (split_start, split_end, split_align) = match file_iter.peek() {
                Some(&(addr, split)) => {
                    log::debug!("Found split {} ({:#010X}..{:#010X})", split.unit, addr, split.end);
                    (
                        addr,
                        SectionAddress::new(section_index, split.end),
                        split.alignment(obj, section_index, section, addr.address),
                    )
                }
                None => (
                    section_end,
                    SectionAddress::new(section_index, 0),
                    default_section_align(section) as u32,
                ),
            };
            ensure!(
                split_start >= current_address,
                "Split {:#010X}..{:#010X} overlaps with previous split",
                split_start,
                split_end
            );

            let aligned_addr = current_address.align_up(split_align);
            if split_start > aligned_addr {
                log::debug!(
                    "Creating auto split: {} > {} (orig: {}, align {})",
                    split_start,
                    aligned_addr,
                    current_address,
                    split_align
                );
                // Find any duplicate symbols in this range
                let mut new_split_end = split_start;
                let symbols = obj
                    .symbols
                    .for_section_range(section_index, current_address.address..split_start.address)
                    .collect_vec();
                let mut existing_symbols = HashSet::new();
                for &(_, symbol) in &symbols {
                    if !existing_symbols.insert(symbol.name.clone()) {
                        log::debug!(
                            "Found duplicate symbol {} at {:#010X}",
                            symbol.name,
                            symbol.address
                        );
                        new_split_end.address = symbol.address as u32;
                        break;
                    }
                }

                ensure!(
                    new_split_end > current_address,
                    "Duplicate symbols at {:#010X}: {:?}",
                    current_address,
                    symbols
                        .iter()
                        .filter(|(_, s)| s.address == current_address.address as u64)
                        .collect_vec(),
                );
                log::debug!(
                    "Creating split from {:#010X}..{:#010X}",
                    current_address,
                    new_split_end
                );
                let unit = format!(
                    "auto_{:02}_{:08X}_{}",
                    current_address.section,
                    current_address.address,
                    section.name.trim_start_matches('.')
                );
                new_splits.insert(current_address, ObjSplit {
                    unit: unit.clone(),
                    end: new_split_end.address,
                    align: None,
                    common: false,
                    autogenerated: true,
                    skip: false,
                    rename: None,
                });
                current_address = new_split_end;
                continue;
            }

            file_iter.next();
            if split_end.address > 0 {
                current_address = split_end;
            } else {
                let mut file_end = section_end;
                if let Some(&(next_addr, _next_split)) = file_iter.peek() {
                    file_end = min(next_addr, section_end);
                }
                current_address = file_end;
            }
        }
    }

    // Add new splits
    for (addr, split) in new_splits {
        obj.add_split(addr.section, addr.address, split)?;
    }

    Ok(())
}

/// Ensures that all .bss splits following a common split are also marked as common.
fn update_common_splits(obj: &mut ObjInfo, common_start: Option<u32>) -> Result<()> {
    let Some((bss_section_index, bss_section)) = obj.sections.by_name(".bss")? else {
        return Ok(());
    };
    let Some(common_bss_start) = common_start.or_else(|| {
        bss_section.splits.iter().find(|(_, split)| split.common).map(|(addr, _)| addr)
    }) else {
        return Ok(());
    };
    log::debug!("Found common BSS start at {:#010X}", common_bss_start);
    let bss_section = &mut obj.sections[bss_section_index];
    for (addr, split) in bss_section.splits.for_range_mut(common_bss_start..) {
        if !split.common {
            split.common = true;
            log::debug!("Added common flag to split {} at {:#010X}", split.unit, addr);
        }
    }
    Ok(())
}

/// Final validation of splits.
fn validate_splits(obj: &ObjInfo) -> Result<()> {
    let mut last_split_end = SectionAddress::new(0, 0);
    for (section_index, section, addr, split) in obj.sections.all_splits() {
        let split_addr = SectionAddress::new(section_index, addr);
        ensure!(
            split_addr >= last_split_end,
            "Split {} {} {:#010X}..{:#010X} overlaps with previous split",
            split.unit,
            section.name,
            addr,
            split.end
        );
        ensure!(
            split.end > 0 && split.end >= addr,
            "Invalid split end {} {} {:#010X}..{:#010X}",
            split.unit,
            section.name,
            addr,
            split.end
        );
        last_split_end = SectionAddress::new(section_index, split.end);

        if let Some((_, symbol)) = obj
            .symbols
            .for_section_range(section_index, ..addr)
            .filter(|&(_, s)| s.size_known && s.size > 0)
            .next_back()
        {
            ensure!(
                addr >= symbol.address as u32 + symbol.size as u32,
                "Split {} {} {:#010X}..{:#010X} overlaps symbol '{}' {:#010X}..{:#010X}",
                split.unit,
                section.name,
                addr,
                split.end,
                symbol.name,
                symbol.address,
                symbol.address + symbol.size
            );
        }

        if let Some((_, symbol)) = obj
            .symbols
            .for_section_range(section_index, ..split.end)
            .filter(|&(_, s)| s.size_known && s.size > 0)
            .next_back()
        {
            ensure!(
                split.end >= symbol.address as u32 + symbol.size as u32,
                "Split {} {} ({:#010X}..{:#010X}) ends within symbol '{}' ({:#010X}..{:#010X})",
                split.unit,
                section.name,
                addr,
                split.end,
                symbol.name,
                symbol.address,
                symbol.address + symbol.size
            );
        }
    }
    Ok(())
}

/// Add padding symbols to fill in gaps between splits and symbols.
fn add_padding_symbols(obj: &mut ObjInfo) -> Result<()> {
    for (section_index, section, addr, _split) in obj.sections.all_splits() {
        if section.name == ".ctors" || section.name == ".dtors" {
            continue;
        }

        if obj
            .symbols
            .kind_at_section_address(section_index, addr, match section.kind {
                ObjSectionKind::Code => ObjSymbolKind::Function,
                ObjSectionKind::Data => ObjSymbolKind::Object,
                ObjSectionKind::ReadOnlyData => ObjSymbolKind::Object,
                ObjSectionKind::Bss => ObjSymbolKind::Object,
            })?
            .is_none()
        {
            let next_symbol_address = obj
                .symbols
                .for_section_range(section_index, addr + 1..)
                .find(|&(_, s)| s.size_known && s.size > 0)
                .map(|(_, s)| s.address)
                .unwrap_or(section.address + section.size);
            let symbol_name = format!(
                "pad_{:02}_{:08X}_{}",
                section_index,
                addr,
                section.name.trim_start_matches('.')
            );
            log::debug!("Adding padding symbol {} at {:#010X}", symbol_name, addr);
            obj.symbols.add_direct(ObjSymbol {
                name: symbol_name,
                address: addr as u64,
                section: Some(section_index),
                size: next_symbol_address - addr as u64,
                size_known: true,
                flags: ObjSymbolFlagSet(
                    ObjSymbolFlags::Local | ObjSymbolFlags::ForceActive | ObjSymbolFlags::NoWrite,
                ),
                kind: match section.kind {
                    ObjSectionKind::Code => ObjSymbolKind::Function,
                    ObjSectionKind::Data | ObjSectionKind::ReadOnlyData | ObjSectionKind::Bss => {
                        ObjSymbolKind::Object
                    }
                },
                ..Default::default()
            })?;
        }
    }

    // Add padding symbols for gaps between symbols
    for (section_index, section) in obj.sections.iter() {
        if section.name == ".ctors" || section.name == ".dtors" {
            continue;
        }

        let mut to_add = vec![];
        let mut iter = obj
            .symbols
            .for_section(section_index)
            .filter(|(_, s)| s.size_known && s.size > 0)
            .peekable();
        while let (Some((_, symbol)), Some(&(_, next_symbol))) = (iter.next(), iter.peek()) {
            let aligned_end =
                align_up((symbol.address + symbol.size) as u32, next_symbol.align.unwrap_or(1));
            match aligned_end.cmp(&(next_symbol.address as u32)) {
                Ordering::Less => {
                    let symbol_name = format!(
                        "gap_{:02}_{:08X}_{}",
                        section_index,
                        aligned_end,
                        section.name.trim_start_matches('.')
                    );
                    log::debug!("Adding gap symbol {} at {:#010X}", symbol_name, aligned_end);
                    to_add.push(ObjSymbol {
                        name: symbol_name,
                        address: aligned_end as u64,
                        section: Some(section_index),
                        size: next_symbol.address - aligned_end as u64,
                        size_known: true,
                        flags: ObjSymbolFlagSet(
                            ObjSymbolFlags::Global
                                | ObjSymbolFlags::Hidden
                                | ObjSymbolFlags::ForceActive
                                | ObjSymbolFlags::NoWrite,
                        ),
                        kind: match section.kind {
                            ObjSectionKind::Code => ObjSymbolKind::Function,
                            ObjSectionKind::Data
                            | ObjSectionKind::ReadOnlyData
                            | ObjSectionKind::Bss => ObjSymbolKind::Object,
                        },
                        ..Default::default()
                    });
                }
                Ordering::Equal => {}
                Ordering::Greater => {
                    bail!(
                        "Symbol {} ({:#010X}..{:#010X}) overlaps with symbol {} ({:#010X}..{:#010X}, align {})",
                        symbol.name,
                        symbol.address,
                        symbol.address + symbol.size,
                        next_symbol.name,
                        next_symbol.address,
                        next_symbol.address + next_symbol.size,
                        next_symbol.align.unwrap_or(1)
                    );
                }
            }
        }
        drop(iter);

        for symbol in to_add {
            obj.symbols.add_direct(symbol)?;
        }
    }
    Ok(())
}

#[allow(dead_code)]
fn trim_split_alignment(obj: &mut ObjInfo) -> Result<()> {
    // For each split, set the end of split to the end of the last symbol in the split.
    let mut split_updates = vec![];
    let mut iter = obj.sections.all_splits().peekable();
    while let Some((section_index, section, addr, split)) = iter.next() {
        let next_split = iter
            .peek()
            .filter(|&&(idx, _, _, _)| section_index == idx)
            .map(|&(_, _, addr, split)| (addr, split));
        let mut split_end = split.end;
        if let Some((_, symbol)) = obj
            .symbols
            .for_section_range(section_index, addr..split.end)
            .filter(|&(_, s)| s.size_known && s.size > 0)
            .next_back()
        {
            split_end = symbol.address as u32 + symbol.size as u32;
        }
        split_end = align_up(split_end, split.alignment(obj, section_index, section, addr));
        if split_end < split.end {
            if let Some((next_addr, next_split)) = next_split {
                let next_split_align = next_split.alignment(obj, section_index, section, addr);
                if align_up(split_end, next_split_align) < next_addr {
                    log::warn!(
                        "Tried to trim {} split {} {:#010X}..{:#010X} to {:#010X}, but next split {} starts at {:#010X} with alignment {}",
                        section.name,
                        split.unit,
                        addr,
                        split.end,
                        split_end,
                        next_split.unit,
                        next_addr,
                        next_split_align
                    );
                }
            }
            log::info!(
                "Trimming {} split {} {:#010X}..{:#010X} to {:#010X}",
                section.name,
                split.unit,
                addr,
                split.end,
                split_end
            );
            split_updates.push((section_index, addr, split_end));
        }
    }
    drop(iter);
    for (section_index, addr, split_end) in split_updates {
        obj.sections[section_index].splits.at_mut(addr).unwrap().end = split_end;
    }
    Ok(())
}

/// Trim splits if they contain linker generated symbols.
fn trim_linker_generated_symbols(obj: &mut ObjInfo) -> Result<()> {
    for section_index in 0..obj.sections.count() {
        let section_end = end_for_section(obj, section_index)?;
        let section = &mut obj.sections[section_index];
        if section.address as u32 + section.size as u32 == section_end.address {
            continue;
        }
        if let Some((addr, split)) = section.splits.iter_mut().next_back() {
            if split.end > section_end.address {
                log::debug!(
                    "Trimming split {} {:#010X}..{:#010X} to {:#010X}",
                    split.unit,
                    addr,
                    split.end,
                    section_end.address
                );
                split.end = section_end.address;
            }
        }
    }
    Ok(())
}

/// Perform any necessary adjustments to allow relinking.
/// This includes:
/// - Ensuring .ctors & .dtors entries are split with their associated function
/// - Ensuring extab & extabindex entries are split with their associated function
/// - Creating splits for gaps between existing splits
/// - Resolving a new object link order
#[instrument(level = "debug", skip(obj))]
pub fn update_splits(obj: &mut ObjInfo, common_start: Option<u32>, fill_gaps: bool) -> Result<()> {
    // Create splits for extab and extabindex entries
    if let Some((section_index, section)) = obj.sections.by_name("extabindex")? {
        let start = SectionAddress::new(section_index, section.address as u32);
        split_extabindex(obj, start)?;
    }

    // Create splits for .ctors entries
    if let Some((section_index, section)) = obj.sections.by_name(".ctors")? {
        let start = SectionAddress::new(section_index, section.address as u32);
        let end = start + (section.size as u32 - 4);
        split_ctors_dtors(obj, start, end)?;
    }

    // Create splits for .dtors entries
    if let Some((section_index, section)) = obj.sections.by_name(".dtors")? {
        let mut start = SectionAddress::new(section_index, section.address as u32);
        let end = start + (section.size as u32 - 4);
        if obj.kind == ObjKind::Executable {
            // Skip __destroy_global_chain_reference
            start += 4;
        }
        split_ctors_dtors(obj, start, end)?;
    }

    // Remove linker generated symbols from splits
    trim_linker_generated_symbols(obj)?;

    // Create gap splits
    create_gap_splits(obj)?;

    // Update common BSS splits
    update_common_splits(obj, common_start)?;

    // Ensure splits don't overlap symbols or each other
    validate_splits(obj)?;

    // Trim alignment from splits
    // TODO figure out mwld pooled data alignment
    // trim_split_alignment(obj)?;

    if fill_gaps {
        // Add symbols to beginning of any split that doesn't start with a symbol
        add_padding_symbols(obj)?;
    }

    // Resolve link order
    obj.link_order = resolve_link_order(obj)?;

    Ok(())
}

/// The ordering of TUs inside of each section represents a directed edge in a DAG.
/// We can use a topological sort to determine a valid global TU order.
/// There can be ambiguities, but any solution that satisfies the link order
/// constraints is considered valid.
#[instrument(level = "debug", skip(obj))]
fn resolve_link_order(obj: &ObjInfo) -> Result<Vec<ObjUnit>> {
    #[allow(dead_code)]
    #[derive(Debug, Copy, Clone)]
    struct SplitEdge {
        from: u32,
        to: u32,
    }

    let mut graph = Graph::<String, SplitEdge>::new();
    let mut unit_to_index_map = BTreeMap::<String, NodeIndex>::new();
    for (_, _, _, split) in obj.sections.all_splits() {
        unit_to_index_map.insert(split.unit.clone(), NodeIndex::new(0));
    }
    for (unit, index) in unit_to_index_map.iter_mut() {
        *index = graph.add_node(unit.clone());
    }

    for (_section_index, section) in obj.sections.iter() {
        let mut iter = section.splits.iter().peekable();
        if section.name == ".ctors" || section.name == ".dtors" {
            // Skip __init_cpp_exceptions.o
            let skipped = iter.next();
            log::debug!("Skipping split {:?} (next: {:?})", skipped, iter.peek());
        }
        while let (Some((a_addr, a)), Some(&(b_addr, b))) = (iter.next(), iter.peek()) {
            if !a.common && b.common {
                // This marks the beginning of the common BSS section.
                continue;
            }

            if a.unit != b.unit {
                log::debug!(
                    "Adding dependency {} ({:#010X}) -> {} ({:#010X})",
                    a.unit,
                    a_addr,
                    b.unit,
                    b_addr
                );
                let a_index = *unit_to_index_map.get(&a.unit).unwrap();
                let b_index = *unit_to_index_map.get(&b.unit).unwrap();
                graph.add_edge(a_index, b_index, SplitEdge { from: a_addr, to: b_addr });
            }
        }
    }

    // use petgraph::{
    //     dot::{Config, Dot},
    //     graph::EdgeReference,
    // };
    // let get_edge_attributes = |_, e: EdgeReference<SplitEdge>| {
    //     let &SplitEdge { from, to } = e.weight();
    //     let section_name = &obj.section_at(from).unwrap().name;
    //     format!("label=\"{} {:#010X} -> {:#010X}\"", section_name, from, to)
    // };
    // let dot = Dot::with_attr_getters(
    //     &graph,
    //     &[Config::EdgeNoLabel, Config::NodeNoLabel],
    //     &get_edge_attributes,
    //     &|_, (_, s)| format!("label=\"{}\"", s),
    // );
    // println!("{:?}", dot);

    match petgraph::algo::toposort(&graph, None) {
        Ok(vec) => Ok(vec
            .iter()
            .map(|&idx| {
                let name = &graph[idx];
                if let Some(existing) = obj.link_order.iter().find(|u| &u.name == name) {
                    existing.clone()
                } else {
                    ObjUnit {
                        name: name.clone(),
                        autogenerated: obj.is_unit_autogenerated(name),
                        comment_version: None,
                    }
                }
            })
            .collect_vec()),
        Err(e) => Err(anyhow!(
            "Cyclic dependency (involving {}) encountered while resolving link order",
            graph[e.node_id()]
        )),
    }
}

/// Split an object into multiple relocatable objects.
#[instrument(level = "debug", skip(obj))]
pub fn split_obj(obj: &ObjInfo) -> Result<Vec<ObjInfo>> {
    let mut objects: Vec<ObjInfo> = vec![];
    let mut object_symbols: Vec<Vec<Option<usize>>> = vec![];
    let mut name_to_obj: HashMap<String, usize> = HashMap::new();
    for unit in &obj.link_order {
        name_to_obj.insert(unit.name.clone(), objects.len());
        object_symbols.push(vec![None; obj.symbols.count()]);
        let mut split_obj = ObjInfo::new(
            ObjKind::Relocatable,
            ObjArchitecture::PowerPc,
            unit.name.clone(),
            vec![],
            vec![],
        );
        if let Some(comment_version) = unit.comment_version {
            if comment_version > 0 {
                split_obj.mw_comment = Some(MWComment::new(comment_version)?);
            }
        } else {
            split_obj.mw_comment = obj.mw_comment.clone();
        }
        objects.push(split_obj);
    }

    for (section_index, section) in obj.sections.iter() {
        let mut current_address = SectionAddress::new(section_index, section.address as u32);
        let section_end = end_for_section(obj, section_index)?;
        let mut split_iter = section
            .splits
            .for_range(current_address.address..section_end.address)
            .map(|(addr, split)| (SectionAddress::new(section_index, addr), split))
            .peekable();

        loop {
            if current_address >= section_end {
                break;
            }

            let (split_addr, split) = match split_iter.next() {
                Some((addr, split)) => (addr, split),
                None => bail!("No split found"),
            };
            ensure!(
                split_addr == current_address,
                "Split @ {} {} not found",
                section.name,
                current_address
            );

            let split_end = SectionAddress::new(section_index, split.end);
            let next_addr = split_iter.peek().map(|&(addr, _)| addr).unwrap_or(section_end);
            if next_addr > split_end
                && section.data_range(split_end.address, next_addr.address)?.iter().any(|&b| b != 0)
            {
                bail!(
                    "Unsplit data in {} from {} {} to next split {}",
                    section.name,
                    split.unit,
                    split_end,
                    next_addr
                );
            }

            // Skip over this data
            if split.skip {
                current_address = next_addr;
                continue;
            }

            let split_obj = name_to_obj
                .get(&split.unit)
                .and_then(|&idx| objects.get_mut(idx))
                .ok_or_else(|| anyhow!("Unit '{}' not in link order", split.unit))?;
            let symbol_idxs = name_to_obj
                .get(&split.unit)
                .and_then(|&idx| object_symbols.get_mut(idx))
                .ok_or_else(|| anyhow!("Unit '{}' not in link order", split.unit))?;

            // Calculate & verify section alignment
            let mut align = split.align.unwrap_or_else(|| {
                let default_align = default_section_align(section) as u32;
                max(
                    // Maximum alignment of any symbol in this split
                    obj.symbols
                        .for_section_range(
                            section_index,
                            current_address.address..split_end.address,
                        )
                        .filter(|&(_, s)| s.size_known && s.size > 0)
                        .filter_map(|(_, s)| s.align)
                        .max()
                        .unwrap_or(default_align),
                    default_align,
                )
            }) as u64;

            if current_address & (align as u32 - 1) != 0 {
                log::warn!(
                    "Alignment for {} {} expected {}, but starts at {:#010X}",
                    split.unit,
                    section.name,
                    align,
                    current_address
                );
                while align > 4 {
                    align /= 2;
                    if current_address & (align as u32 - 1) == 0 {
                        break;
                    }
                }
            }
            ensure!(
                current_address & (align as u32 - 1) == 0,
                "Invalid alignment for split: {} {} {:#010X}",
                split.unit,
                section.name,
                current_address
            );

            // Collect relocations; target_symbol will be updated later
            let out_relocations = section
                .relocations
                .range(current_address.address..split_end.address)
                .map(|(addr, o)| {
                    (addr - current_address.address, ObjReloc {
                        kind: o.kind,
                        target_symbol: o.target_symbol,
                        addend: o.addend,
                        module: o.module,
                    })
                })
                .collect_vec();

            // Add section symbols
            let out_section_idx = split_obj.sections.next_section_index();
            for (symbol_idx, symbol) in obj
                .symbols
                .for_section_range(section_index, current_address.address..=split_end.address)
                .filter(|&(_, s)| {
                    s.section == Some(section_index) && !is_linker_generated_label(&s.name)
                })
            {
                if symbol_idxs[symbol_idx].is_some() {
                    continue; // should never happen?
                }

                // TODO hack for gTRKInterruptVectorTableEnd
                if (symbol.address == split_end.address as u64
                    && symbol.name != "gTRKInterruptVectorTableEnd")
                    || (symbol.address == current_address.address as u64
                        && symbol.name == "gTRKInterruptVectorTableEnd")
                {
                    continue;
                }

                symbol_idxs[symbol_idx] = Some(split_obj.symbols.add_direct(ObjSymbol {
                    name: symbol.name.clone(),
                    demangled_name: symbol.demangled_name.clone(),
                    address: if split.common {
                        symbol.align.unwrap_or(4) as u64
                    } else {
                        symbol.address - current_address.address as u64
                    },
                    section: if split.common { None } else { Some(out_section_idx) },
                    size: symbol.size,
                    size_known: symbol.size_known,
                    flags: if split.common {
                        ObjSymbolFlagSet(ObjSymbolFlags::Common.into())
                    } else {
                        symbol.flags
                    },
                    kind: symbol.kind,
                    align: symbol.align,
                    data_kind: symbol.data_kind,
                    name_hash: symbol.name_hash,
                    demangled_name_hash: symbol.demangled_name_hash,
                })?);
            }

            // For mwldeppc 2.7 and above, a .comment section is required to link without error
            // when common symbols are present. Automatically add one if needed.
            if split.common && split_obj.mw_comment.is_none() {
                split_obj.mw_comment = Some(MWComment::new(8)?);
            }

            if !split.common {
                let data = match section.kind {
                    ObjSectionKind::Bss => vec![],
                    _ => section.data[(current_address.address as u64 - section.address) as usize
                        ..(split_end.address as u64 - section.address) as usize]
                        .to_vec(),
                };
                split_obj.sections.push(ObjSection {
                    name: split.rename.as_ref().unwrap_or(&section.name).clone(),
                    kind: section.kind,
                    address: 0,
                    size: split_end.address as u64 - current_address.address as u64,
                    data,
                    align,
                    elf_index: out_section_idx + 1,
                    relocations: ObjRelocations::new(out_relocations)?,
                    original_address: current_address.address as u64,
                    file_offset: section.file_offset
                        + (current_address.address as u64 - section.address),
                    section_known: true,
                    splits: Default::default(),
                });
            }

            current_address = next_addr;
        }
    }

    // Update relocations
    let mut globalize_symbols = vec![];
    for (obj_idx, out_obj) in objects.iter_mut().enumerate() {
        let symbol_idxs = &mut object_symbols[obj_idx];
        for (_section_index, section) in out_obj.sections.iter_mut() {
            for (reloc_address, reloc) in section.relocations.iter_mut() {
                match symbol_idxs[reloc.target_symbol] {
                    Some(out_sym_idx) => {
                        reloc.target_symbol = out_sym_idx;
                    }
                    None => {
                        // Extern
                        let out_sym_idx = out_obj.symbols.count();
                        let target_sym = &obj.symbols[reloc.target_symbol];

                        // If the symbol is local, we'll upgrade the scope to global
                        // and rename it to avoid conflicts
                        if target_sym.flags.is_local() {
                            let address_str = if obj.module_id == 0 {
                                format!("{:08X}", target_sym.address)
                            } else if let Some(section_index) = target_sym.section {
                                let target_section = &obj.sections[section_index];
                                format!(
                                    "{}_{}_{:X}",
                                    obj.module_id,
                                    target_section.name.trim_start_matches('.'),
                                    target_sym.address
                                )
                            } else {
                                bail!("Local symbol {} has no section", target_sym.name);
                            };
                            let new_name = if target_sym.name.ends_with(&address_str) {
                                target_sym.name.clone()
                            } else {
                                format!("{}_{}", target_sym.name, address_str)
                            };
                            globalize_symbols.push((reloc.target_symbol, new_name));
                        }

                        symbol_idxs[reloc.target_symbol] = Some(out_sym_idx);
                        out_obj.symbols.add_direct(ObjSymbol {
                            name: target_sym.name.clone(),
                            demangled_name: target_sym.demangled_name.clone(),
                            ..Default::default()
                        })?;
                        reloc.target_symbol = out_sym_idx;

                        if section.name.as_str() == "extabindex" {
                            let Some((target_addr, target_split)) =
                                section.splits.for_address(target_sym.address as u32)
                            else {
                                bail!(
                                    "Bad extabindex relocation @ {:#010X}",
                                    reloc_address as u64 + section.original_address
                                );
                            };
                            let target_section = &obj.sections.at_address(target_addr)?.1.name;
                            log::error!(
                                "Bad extabindex relocation @ {:#010X}\n\
                                \tSource object: {}:{:#010X} ({})\n\
                                \tTarget object: {}:{:#010X} ({})\n\
                                \tTarget symbol: {:#010X} ({})\n\
                                This will cause the linker to crash.\n",
                                reloc_address as u64 + section.original_address,
                                section.name,
                                section.original_address,
                                out_obj.name,
                                target_section,
                                target_addr,
                                target_split.unit,
                                target_sym.address,
                                target_sym.demangled_name.as_deref().unwrap_or(&target_sym.name),
                            );
                        }
                    }
                }
            }
        }
    }

    // Upgrade local symbols to global if necessary
    for (obj, symbol_map) in objects.iter_mut().zip(&object_symbols) {
        for (globalize_idx, new_name) in &globalize_symbols {
            if let Some(symbol_idx) = symbol_map[*globalize_idx] {
                let mut symbol = obj.symbols[symbol_idx].clone();
                symbol.name = new_name.clone();
                if symbol.flags.is_local() {
                    log::debug!("Globalizing {} in {}", symbol.name, obj.name);
                    symbol.flags.set_scope(ObjSymbolScope::Global);
                }
                obj.symbols.replace(symbol_idx, symbol)?;
            }
        }
    }

    // Extern linker generated symbols
    for obj in &mut objects {
        let mut replace_symbols = vec![];
        for (symbol_idx, symbol) in obj.symbols.iter().enumerate() {
            if is_linker_generated_label(&symbol.name) && symbol.section.is_some() {
                log::debug!("Externing {:?} in {}", symbol, obj.name);
                replace_symbols.push((symbol_idx, ObjSymbol {
                    name: symbol.name.clone(),
                    demangled_name: symbol.demangled_name.clone(),
                    ..Default::default()
                }));
            }
        }
        for (symbol_idx, symbol) in replace_symbols {
            obj.symbols.replace(symbol_idx, symbol)?;
        }
    }

    Ok(objects)
}

/// mwld doesn't preserve the original section alignment values
pub fn default_section_align(section: &ObjSection) -> u64 {
    match section.kind {
        ObjSectionKind::Code => 4,
        _ => match section.name.as_str() {
            ".ctors" | ".dtors" | "extab" | "extabindex" => 4,
            ".sbss" => 8, // ?
            _ => 8,
        },
    }
}

/// Linker-generated symbols to extern
#[inline]
pub fn is_linker_generated_label(name: &str) -> bool {
    matches!(
        name,
        "_ctors"
            | "_dtors"
            | "_f_init"
            | "_f_init_rom"
            | "_e_init"
            | "_fextab"
            | "_fextab_rom"
            | "_eextab"
            | "_fextabindex"
            | "_fextabindex_rom"
            | "_eextabindex"
            | "_f_text"
            | "_f_text_rom"
            | "_e_text"
            | "_f_ctors"
            | "_f_ctors_rom"
            | "_e_ctors"
            | "_f_dtors"
            | "_f_dtors_rom"
            | "_e_dtors"
            | "_f_rodata"
            | "_f_rodata_rom"
            | "_e_rodata"
            | "_f_data"
            | "_f_data_rom"
            | "_e_data"
            | "_f_sdata"
            | "_f_sdata_rom"
            | "_e_sdata"
            | "_f_sbss"
            | "_f_sbss_rom"
            | "_e_sbss"
            | "_f_sdata2"
            | "_f_sdata2_rom"
            | "_e_sdata2"
            | "_f_sbss2"
            | "_f_sbss2_rom"
            | "_e_sbss2"
            | "_f_bss"
            | "_f_bss_rom"
            | "_e_bss"
            | "_f_stack"
            | "_f_stack_rom"
            | "_e_stack"
            | "_stack_addr"
            | "_stack_end"
            | "_db_stack_addr"
            | "_db_stack_end"
            | "_heap_addr"
            | "_heap_end"
            | "_nbfunctions"
            | "SIZEOF_HEADERS"
            | "_SDA_BASE_"
            | "_SDA2_BASE_"
            | "_ABS_SDA_BASE_"
            | "_ABS_SDA2_BASE_"
    )
}

/// Linker generated objects to strip entirely
#[inline]
pub fn is_linker_generated_object(name: &str) -> bool {
    matches!(
        name,
        "_eti_init_info" | "_rom_copy_info" | "_bss_init_info" | "_ctors$99" | "_dtors$99"
    )
}

/// Locate the end address of a section when excluding linker generated objects
pub fn end_for_section(obj: &ObjInfo, section_index: usize) -> Result<SectionAddress> {
    let section = obj
        .sections
        .get(section_index)
        .ok_or_else(|| anyhow!("Invalid section index: {}", section_index))?;
    let mut section_end = (section.address + section.size) as u32;
    // .ctors and .dtors end with a linker-generated null pointer,
    // adjust section size appropriately
    if matches!(section.name.as_str(), ".ctors" | ".dtors")
        && section.data[section.data.len() - 4..] == [0u8; 4]
    {
        section_end -= 4;
    }
    loop {
        let last_symbol = obj
            .symbols
            .for_section_range(section_index, ..section_end)
            .filter(|(_, s)| s.kind == ObjSymbolKind::Object && s.size_known && s.size > 0)
            .next_back();
        match last_symbol {
            Some((_, symbol)) if is_linker_generated_object(&symbol.name) => {
                log::debug!(
                    "Found {}, adjusting section {} end {:#010X} -> {:#010X}",
                    section.name,
                    symbol.name,
                    section_end,
                    symbol.address
                );
                section_end = symbol.address as u32;
            }
            _ => break,
        }
    }
    Ok(SectionAddress::new(section_index, section_end))
}