objdiff/objdiff-core/src/obj/read.rs

use alloc::{
    collections::BTreeMap,
    format,
    string::{String, ToString},
    vec::Vec,
};
use core::cmp::Ordering;

use anyhow::{Context, Result, anyhow, bail, ensure};
use object::{Object as _, ObjectSection as _, ObjectSymbol as _};

use crate::{
    arch::{Arch, new_arch},
    diff::DiffObjConfig,
    obj::{
        Object, Relocation, RelocationFlags, Section, SectionData, SectionFlag, SectionKind,
        Symbol, SymbolFlag, SymbolKind,
        split_meta::{SPLITMETA_SECTION, SplitMeta},
    },
    util::{read_u16, read_u32},
};

fn map_section_kind(section: &object::Section) -> SectionKind {
    match section.kind() {
        object::SectionKind::Text => SectionKind::Code,
        object::SectionKind::Data | object::SectionKind::ReadOnlyData => SectionKind::Data,
        object::SectionKind::UninitializedData => SectionKind::Bss,
        _ => SectionKind::Unknown,
    }
}

fn map_symbol(
    arch: &dyn Arch,
    file: &object::File,
    symbol: &object::Symbol,
    section_indices: &[usize],
    split_meta: Option<&SplitMeta>,
) -> Result<Symbol> {
    let mut name = symbol.name().context("Failed to process symbol name")?.to_string();
    let mut size = symbol.size();
    if let (object::SymbolKind::Section, Some(section)) =
        (symbol.kind(), symbol.section_index().and_then(|i| file.section_by_index(i).ok()))
    {
        let section_name = section.name().context("Failed to process section name")?;
        name = format!("[{}]", section_name);
        // For section symbols, set the size to zero. If the size is non-zero, it will be included
        // in the diff. Most of the time, this is duplicative, given that we'll have function or
        // object symbols that cover the same range. In the case of an empty section, the size
        // inference logic below will set the size back to the section size, thus acting as a
        // placeholder symbol.
        size = 0;
    }

    let mut flags = arch.extra_symbol_flags(symbol);
    if symbol.is_global() {
        flags |= SymbolFlag::Global;
    }
    if symbol.is_local() {
        flags |= SymbolFlag::Local;
    }
    if symbol.is_common() {
        flags |= SymbolFlag::Common;
    }
    if symbol.is_weak() {
        flags |= SymbolFlag::Weak;
    }
    if file.format() == object::BinaryFormat::Elf && symbol.scope() == object::SymbolScope::Linkage
    {
        flags |= SymbolFlag::Hidden;
    }

    let kind = match symbol.kind() {
        object::SymbolKind::Text => SymbolKind::Function,
        object::SymbolKind::Data => SymbolKind::Object,
        object::SymbolKind::Section => SymbolKind::Section,
        _ => SymbolKind::Unknown,
    };
    let address = arch.symbol_address(symbol.address(), kind);
    let demangled_name = arch.demangle(&name);
    // Find the virtual address for the symbol if available
    let virtual_address = split_meta
        .and_then(|m| m.virtual_addresses.as_ref())
        .and_then(|v| v.get(symbol.index().0).cloned());
    let section = symbol.section_index().and_then(|i| section_indices.get(i.0).copied());

    Ok(Symbol {
        name,
        demangled_name,
        address,
        size,
        kind,
        section,
        flags,
        align: None, // TODO parse .comment
        virtual_address,
    })
}

fn map_symbols(
    arch: &dyn Arch,
    obj_file: &object::File,
    sections: &[Section],
    section_indices: &[usize],
    split_meta: Option<&SplitMeta>,
) -> Result<(Vec<Symbol>, Vec<usize>)> {
    let symbol_count = obj_file.symbols().count();
    let mut symbols = Vec::<Symbol>::with_capacity(symbol_count);
    let mut symbol_indices = Vec::<usize>::with_capacity(symbol_count + 1);
    for obj_symbol in obj_file.symbols() {
        if symbol_indices.len() <= obj_symbol.index().0 {
            symbol_indices.resize(obj_symbol.index().0 + 1, usize::MAX);
        }
        let symbol = map_symbol(arch, obj_file, &obj_symbol, section_indices, split_meta)?;
        symbol_indices[obj_symbol.index().0] = symbols.len();
        symbols.push(symbol);
    }

    // Infer symbol sizes for 0-size symbols
    infer_symbol_sizes(&mut symbols, sections);

    Ok((symbols, symbol_indices))
}

fn infer_symbol_sizes(symbols: &mut [Symbol], sections: &[Section]) {
    // Create a sorted list of symbol indices by section
    let mut symbols_with_section = Vec::<usize>::with_capacity(symbols.len());
    for (i, symbol) in symbols.iter().enumerate() {
        if symbol.section.is_some() {
            symbols_with_section.push(i);
        }
    }
    symbols_with_section.sort_by(|a, b| {
        let a = &symbols[*a];
        let b = &symbols[*b];
        a.section
            .unwrap_or(usize::MAX)
            .cmp(&b.section.unwrap_or(usize::MAX))
            .then_with(|| {
                // Sort section symbols first
                if a.kind == SymbolKind::Section {
                    Ordering::Less
                } else if b.kind == SymbolKind::Section {
                    Ordering::Greater
                } else {
                    Ordering::Equal
                }
            })
            .then_with(|| a.address.cmp(&b.address))
            .then_with(|| a.size.cmp(&b.size))
    });

    // Set symbol sizes based on the next symbol's address
    let mut iter_idx = 0;
    let mut last_end = (0, 0);
    while iter_idx < symbols_with_section.len() {
        let symbol_idx = symbols_with_section[iter_idx];
        let symbol = &symbols[symbol_idx];
        let section_idx = symbol.section.unwrap();
        iter_idx += 1;
        if symbol.size != 0 {
            if symbol.kind != SymbolKind::Section {
                last_end = (section_idx, symbol.address + symbol.size);
            }
            continue;
        }
        // Skip over symbols that are contained within the previous symbol
        if last_end.0 == section_idx && last_end.1 > symbol.address {
            continue;
        }
        let next_symbol = match symbol.kind {
            // For function/object symbols, find the next function/object symbol (in other words:
            // skip over labels)
            SymbolKind::Function | SymbolKind::Object => loop {
                if iter_idx >= symbols_with_section.len() {
                    break None;
                }
                let next_symbol = &symbols[symbols_with_section[iter_idx]];
                if next_symbol.section != Some(section_idx) {
                    break None;
                }
                if let SymbolKind::Function | SymbolKind::Object = next_symbol.kind {
                    break Some(next_symbol);
                }
                iter_idx += 1;
            },
            // For labels (or anything else), simply use the next symbol's address
            SymbolKind::Unknown | SymbolKind::Section => symbols_with_section
                .get(iter_idx)
                .map(|&i| &symbols[i])
                .take_if(|s| s.section == Some(section_idx)),
        };
        let next_address = next_symbol.map(|s| s.address).unwrap_or_else(|| {
            let section = &sections[section_idx];
            section.address + section.size
        });
        let new_size = next_address.saturating_sub(symbol.address);
        if new_size > 0 {
            let symbol = &mut symbols[symbol_idx];
            symbol.size = new_size;
            if symbol.kind != SymbolKind::Section {
                symbol.flags |= SymbolFlag::SizeInferred;
            }
            // Set symbol kind if unknown and size is non-zero
            if symbol.kind == SymbolKind::Unknown {
                symbol.kind = match sections[section_idx].kind {
                    SectionKind::Code => SymbolKind::Function,
                    SectionKind::Data | SectionKind::Bss => SymbolKind::Object,
                    _ => SymbolKind::Unknown,
                };
            }
        }
    }
}

fn map_sections(
    _arch: &dyn Arch,
    obj_file: &object::File,
    split_meta: Option<&SplitMeta>,
) -> Result<(Vec<Section>, Vec<usize>)> {
    let mut section_names = BTreeMap::<String, usize>::new();
    let section_count = obj_file.sections().count();
    let mut result = Vec::<Section>::with_capacity(section_count);
    let mut section_indices = Vec::<usize>::with_capacity(section_count + 1);
    for section in obj_file.sections() {
        let name = section.name().context("Failed to process section name")?;
        let kind = map_section_kind(&section);
        let data = if kind == SectionKind::Unknown {
            // Don't need to read data for unknown sections
            Vec::new()
        } else {
            section.uncompressed_data().context("Failed to read section data")?.into_owned()
        };

        // Find the virtual address for the section symbol if available
        let section_symbol = obj_file.symbols().find(|s| {
            s.kind() == object::SymbolKind::Section && s.section_index() == Some(section.index())
        });
        let virtual_address = section_symbol.and_then(|s| {
            split_meta
                .and_then(|m| m.virtual_addresses.as_ref())
                .and_then(|v| v.get(s.index().0).cloned())
        });

        let unique_id = section_names.entry(name.to_string()).or_insert(0);
        let id = format!("{}-{}", name, unique_id);
        *unique_id += 1;

        if section_indices.len() <= section.index().0 {
            section_indices.resize(section.index().0 + 1, usize::MAX);
        }
        section_indices[section.index().0] = result.len();
        result.push(Section {
            id,
            name: name.to_string(),
            address: section.address(),
            size: section.size(),
            kind,
            data: SectionData(data),
            flags: Default::default(),
            relocations: Default::default(),
            virtual_address,
            line_info: Default::default(),
        });
    }
    Ok((result, section_indices))
}

const LOW_PRIORITY_SYMBOLS: &[&str] =
    &["__gnu_compiled_c", "__gnu_compiled_cplusplus", "gcc2_compiled."];

fn best_symbol<'r, 'data, 'file>(
    symbols: &'r [object::Symbol<'data, 'file>],
    address: u64,
) -> Option<(object::SymbolIndex, u64)> {
    let mut closest_symbol_index = match symbols.binary_search_by_key(&address, |s| s.address()) {
        Ok(index) => Some(index),
        Err(index) => index.checked_sub(1),
    }?;
    // The binary search may not find the first symbol at the address, so work backwards
    let target_address = symbols[closest_symbol_index].address();
    while let Some(prev_index) = closest_symbol_index.checked_sub(1) {
        if symbols[prev_index].address() != target_address {
            break;
        }
        closest_symbol_index = prev_index;
    }
    let mut best_symbol: Option<&'r object::Symbol<'data, 'file>> = None;
    for symbol in symbols.iter().skip(closest_symbol_index) {
        if symbol.address() > address {
            break;
        }
        if symbol.kind() == object::SymbolKind::Section
            || (symbol.size() > 0 && (symbol.address() + symbol.size()) <= address)
        {
            continue;
        }
        // TODO priority ranking with visibility, etc
        if let Some(best) = best_symbol {
            if LOW_PRIORITY_SYMBOLS.contains(&best.name().unwrap_or_default())
                && !LOW_PRIORITY_SYMBOLS.contains(&symbol.name().unwrap_or_default())
            {
                best_symbol = Some(symbol);
            }
        } else {
            best_symbol = Some(symbol);
        }
    }
    best_symbol.map(|s| (s.index(), s.address()))
}

fn map_section_relocations(
    arch: &dyn Arch,
    obj_file: &object::File,
    obj_section: &object::Section,
    symbol_indices: &[usize],
    ordered_symbols: &[Vec<object::Symbol>],
) -> Result<Vec<Relocation>> {
    let mut relocations = Vec::<Relocation>::with_capacity(obj_section.relocations().count());
    for (address, reloc) in obj_section.relocations() {
        let flags = match reloc.flags() {
            object::RelocationFlags::Elf { r_type } => RelocationFlags::Elf(r_type),
            object::RelocationFlags::Coff { typ } => RelocationFlags::Coff(typ),
            flags => {
                bail!("Unhandled relocation flags: {:?}", flags);
            }
        };
        // TODO validate reloc here?
        let mut addend = if reloc.has_implicit_addend() {
            arch.implcit_addend(obj_file, obj_section, address, &reloc, flags)?
        } else {
            reloc.addend()
        };
        let target_symbol = match reloc.target() {
            object::RelocationTarget::Symbol(idx) => {
                if idx.0 == u32::MAX as usize {
                    // ???
                    continue;
                }
                // If the target is a section symbol, try to resolve a better symbol as the target
                let idx = if let Some(section_symbol) = obj_file
                    .symbol_by_index(idx)
                    .ok()
                    .take_if(|s| s.kind() == object::SymbolKind::Section)
                {
                    let section_index =
                        section_symbol.section_index().context("Section symbol without section")?;
                    let target_address = section_symbol.address().wrapping_add_signed(addend);
                    if let Some((new_idx, addr)) = ordered_symbols
                        .get(section_index.0)
                        .and_then(|symbols| best_symbol(symbols, target_address))
                    {
                        addend = target_address.wrapping_sub(addr) as i64;
                        new_idx
                    } else {
                        idx
                    }
                } else {
                    idx
                };
                match symbol_indices.get(idx.0).copied() {
                    Some(i) => i,
                    None => {
                        log::warn!("Invalid symbol index {}", idx.0);
                        continue;
                    }
                }
            }
            object::RelocationTarget::Absolute => {
                let section_name = obj_section.name()?;
                log::warn!("Ignoring absolute relocation @ {}:{:#x}", section_name, address);
                continue;
            }
            _ => bail!("Unhandled relocation target: {:?}", reloc.target()),
        };
        relocations.push(Relocation { address, flags, target_symbol, addend });
    }
    relocations.sort_by_key(|r| r.address);
    Ok(relocations)
}

fn map_relocations(
    arch: &dyn Arch,
    obj_file: &object::File,
    sections: &mut [Section],
    section_indices: &[usize],
    symbol_indices: &[usize],
) -> Result<()> {
    // Generate a list of symbols for each section
    let mut ordered_symbols =
        Vec::<Vec<object::Symbol>>::with_capacity(obj_file.sections().count() + 1);
    for symbol in obj_file.symbols() {
        let Some(section_index) = symbol.section_index() else {
            continue;
        };
        if symbol.kind() == object::SymbolKind::Section {
            continue;
        }
        if section_index.0 >= ordered_symbols.len() {
            ordered_symbols.resize_with(section_index.0 + 1, Vec::new);
        }
        ordered_symbols[section_index.0].push(symbol);
    }
    // Sort symbols by address and size
    for vec in &mut ordered_symbols {
        vec.sort_by(|a, b| a.address().cmp(&b.address()).then(a.size().cmp(&b.size())));
    }
    // Map relocations for each section. Section-relative relocations use the ordered symbols list
    // to find a better target symbol, if available.
    for obj_section in obj_file.sections() {
        let section = &mut sections[section_indices[obj_section.index().0]];
        if section.kind != SectionKind::Unknown {
            section.relocations = map_section_relocations(
                arch,
                obj_file,
                &obj_section,
                symbol_indices,
                &ordered_symbols,
            )?;
        }
    }
    Ok(())
}

fn calculate_pooled_relocations(
    arch: &dyn Arch,
    sections: &mut [Section],
    symbols: &[Symbol],
) -> Result<()> {
    for (section_index, section) in sections.iter_mut().enumerate() {
        if section.kind != SectionKind::Code {
            continue;
        }
        let mut fake_pool_relocs = Vec::new();
        for symbol in symbols {
            if symbol.section != Some(section_index) {
                continue;
            }
            if symbol.kind != SymbolKind::Function {
                continue;
            }
            let code =
                section.data_range(symbol.address, symbol.size as usize).ok_or_else(|| {
                    anyhow!(
                        "Symbol data out of bounds: {:#x}..{:#x}",
                        symbol.address,
                        symbol.address + symbol.size
                    )
                })?;
            fake_pool_relocs.append(&mut arch.generate_pooled_relocations(
                symbol.address,
                code,
                &section.relocations,
                symbols,
            ));
        }
        section.relocations.append(&mut fake_pool_relocs);
        section.relocations.sort_by_key(|r| r.address);
    }
    Ok(())
}

fn parse_line_info(
    obj_file: &object::File,
    sections: &mut [Section],
    section_indices: &[usize],
    obj_data: &[u8],
) -> Result<()> {
    // DWARF 1.1
    if let Some(section) = obj_file.section_by_name(".line") {
        let data = section.uncompressed_data()?;
        let mut reader: &[u8] = data.as_ref();

        let mut text_sections = sections.iter_mut().filter(|s| s.kind == SectionKind::Code);
        while !reader.is_empty() {
            let mut section_data = reader;
            let size = read_u32(obj_file, &mut section_data)? as usize;
            if size > reader.len() {
                bail!("Line info size {size} exceeds remaining size {}", reader.len());
            }
            (section_data, reader) = reader.split_at(size);

            section_data = &section_data[4..]; // Skip the size field
            let base_address = read_u32(obj_file, &mut section_data)? as u64;
            let out_section = text_sections.next().context("No text section for line info")?;
            while !section_data.is_empty() {
                let line_number = read_u32(obj_file, &mut section_data)?;
                let statement_pos = read_u16(obj_file, &mut section_data)?;
                if statement_pos != 0xFFFF {
                    log::warn!("Unhandled statement pos {}", statement_pos);
                }
                let address_delta = read_u32(obj_file, &mut section_data)? as u64;
                out_section.line_info.insert(base_address + address_delta, line_number);
            }
        }
    }

    // DWARF 2+
    #[cfg(feature = "dwarf")]
    {
        fn gimli_error(e: gimli::Error) -> anyhow::Error { anyhow::anyhow!("DWARF error: {e:?}") }
        let dwarf_cow = gimli::DwarfSections::load(|id| {
            Ok::<_, gimli::Error>(
                obj_file
                    .section_by_name(id.name())
                    .and_then(|section| section.uncompressed_data().ok())
                    .unwrap_or(alloc::borrow::Cow::Borrowed(&[][..])),
            )
        })
        .map_err(gimli_error)?;
        let endian = match obj_file.endianness() {
            object::Endianness::Little => gimli::RunTimeEndian::Little,
            object::Endianness::Big => gimli::RunTimeEndian::Big,
        };
        let dwarf = dwarf_cow.borrow(|section| gimli::EndianSlice::new(section, endian));
        let mut iter = dwarf.units();
        if let Some(header) = iter.next().map_err(gimli_error)? {
            let unit = dwarf.unit(header).map_err(gimli_error)?;
            if let Some(program) = unit.line_program.clone() {
                let mut text_sections = sections.iter_mut().filter(|s| s.kind == SectionKind::Code);
                let mut lines = text_sections.next().map(|section| &mut section.line_info);

                let mut rows = program.rows();
                while let Some((_header, row)) = rows.next_row().map_err(gimli_error)? {
                    if let (Some(line), Some(lines)) = (row.line(), &mut lines) {
                        lines.insert(row.address(), line.get() as u32);
                    }
                    if row.end_sequence() {
                        // The next row is the start of a new sequence, which means we must
                        // advance to the next .text section.
                        lines = text_sections.next().map(|section| &mut section.line_info);
                    }
                }
            }
        }
        if iter.next().map_err(gimli_error)?.is_some() {
            log::warn!("Multiple units found in DWARF data, only processing the first");
        }
    }

    // COFF
    if let object::File::Coff(coff) = obj_file {
        parse_line_info_coff(coff, sections, section_indices, obj_data)?;
    }

    Ok(())
}

fn parse_line_info_coff(
    coff: &object::coff::CoffFile,
    sections: &mut [Section],
    section_indices: &[usize],
    obj_data: &[u8],
) -> Result<()> {
    use object::{
        coff::{CoffHeader as _, ImageSymbol as _},
        endian::LittleEndian as LE,
    };
    let symbol_table = coff.coff_header().symbols(obj_data)?;

    // Enumerate over all sections.
    for sect in coff.sections() {
        let ptr_linenums = sect.coff_section().pointer_to_linenumbers.get(LE) as usize;
        let num_linenums = sect.coff_section().number_of_linenumbers.get(LE) as usize;

        // If we have no line number, skip this section.
        if num_linenums == 0 {
            continue;
        }

        // Find this section in our out_section. If it's not in out_section,
        // skip it.
        let Some(out_section) =
            section_indices.get(sect.index().0).and_then(|&i| sections.get_mut(i))
        else {
            continue;
        };

        // Turn the line numbers into an ImageLinenumber slice.
        let Some(linenums) = &obj_data.get(
            ptr_linenums..ptr_linenums + num_linenums * size_of::<object::pe::ImageLinenumber>(),
        ) else {
            continue;
        };
        let Ok(linenums) =
            object::pod::slice_from_all_bytes::<object::pe::ImageLinenumber>(linenums)
        else {
            continue;
        };

        // In COFF, the line numbers are stored relative to the start of the
        // function. Because of this, we need to know the line number where the
        // function starts, so we can sum the two and get the line number
        // relative to the start of the file.
        //
        // This variable stores the line number where the function currently
        // being processed starts. It is set to None when we failed to find the
        // line number of the start of the function.
        let mut cur_fun_start_linenumber = None;
        for linenum in linenums {
            let line_number = linenum.linenumber.get(LE);
            if line_number == 0 {
                // Starting a new function. We need to find the line where that
                // function is located in the file. To do this, we need to find
                // the `.bf` symbol "associated" with this function. The .bf
                // symbol will have a Function Begin/End Auxillary Record, which
                // contains the line number of the start of the function.

                // First, set cur_fun_start_linenumber to None. If we fail to
                // find the start of the function, this will make sure the
                // subsequent line numbers will be ignored until the next start
                // of function.
                cur_fun_start_linenumber = None;

                // Get the symbol associated with this function. We'll need it
                // for logging purposes, but also to acquire its Function
                // Auxillary Record, which tells us where to find our .bf symbol.
                let symtable_entry = linenum.symbol_table_index_or_virtual_address.get(LE);
                let Ok(symbol) = symbol_table.symbol(object::SymbolIndex(symtable_entry as usize))
                else {
                    continue;
                };
                let Ok(aux_fun) =
                    symbol_table.aux_function(object::SymbolIndex(symtable_entry as usize))
                else {
                    continue;
                };

                // Get the .bf symbol associated with this symbol. To do so, we
                // look at the Function Auxillary Record's tag_index, which is
                // an index in the symbol table pointing to our .bf symbol.
                if aux_fun.tag_index.get(LE) == 0 {
                    continue;
                }
                let Ok(bf_symbol) =
                    symbol_table.symbol(object::SymbolIndex(aux_fun.tag_index.get(LE) as usize))
                else {
                    continue;
                };
                // Do some sanity checks that we are, indeed, looking at a .bf
                // symbol.
                if bf_symbol.name(symbol_table.strings()) != Ok(b".bf") {
                    continue;
                }
                // Get the Function Begin/End Auxillary Record associated with
                // our .bf symbol, where we'll fine the linenumber of the start
                // of our function.
                let Ok(bf_aux) = symbol_table.get::<object::pe::ImageAuxSymbolFunctionBeginEnd>(
                    object::SymbolIndex(aux_fun.tag_index.get(LE) as usize),
                    1,
                ) else {
                    continue;
                };
                // Set cur_fun_start_linenumber so the following linenumber
                // records will know at what line the current function start.
                cur_fun_start_linenumber = Some(bf_aux.linenumber.get(LE) as u32);
                // Let's also synthesize a line number record from the start of
                // the function, as the linenumber records don't always cover it.
                out_section.line_info.insert(
                    sect.address() + symbol.value() as u64,
                    bf_aux.linenumber.get(LE) as u32,
                );
            } else if let Some(cur_linenumber) = cur_fun_start_linenumber {
                let vaddr = linenum.symbol_table_index_or_virtual_address.get(LE);
                out_section
                    .line_info
                    .insert(sect.address() + vaddr as u64, cur_linenumber + line_number as u32);
            }
        }
    }
    Ok(())
}

fn combine_sections(
    sections: &mut [Section],
    symbols: &mut [Symbol],
    config: &DiffObjConfig,
) -> Result<()> {
    let mut data_sections = BTreeMap::<String, Vec<usize>>::new();
    let mut text_sections = Vec::<usize>::new();
    for (i, section) in sections.iter().enumerate() {
        match section.kind {
            SectionKind::Data | SectionKind::Bss => {
                let base_name = if let Some(i) = section.name.rfind('$') {
                    &section.name[..i]
                } else {
                    &section.name
                };
                data_sections.entry(base_name.to_string()).or_default().push(i);
            }
            SectionKind::Code => {
                text_sections.push(i);
            }
            _ => {}
        }
    }
    if config.combine_data_sections {
        for (combined_name, mut section_indices) in data_sections {
            do_combine_sections(sections, symbols, &mut section_indices, combined_name)?;
        }
    }
    if config.combine_text_sections {
        do_combine_sections(sections, symbols, &mut text_sections, ".text".to_string())?;
    }
    Ok(())
}

fn do_combine_sections(
    sections: &mut [Section],
    symbols: &mut [Symbol],
    section_indices: &mut [usize],
    combined_name: String,
) -> Result<()> {
    if section_indices.len() < 2 {
        return Ok(());
    }
    // Sort sections lexicographically by name (for COFF section groups)
    section_indices.sort_by(|&a, &b| {
        let a_name = &sections[a].name;
        let b_name = &sections[b].name;
        // .text$di < .text$mn < .text
        if a_name.contains('$') && !b_name.contains('$') {
            return Ordering::Less;
        } else if !a_name.contains('$') && b_name.contains('$') {
            return Ordering::Greater;
        }
        a_name.cmp(b_name)
    });
    let first_section_idx = section_indices[0];

    // Calculate the new offset for each section
    let mut offsets = Vec::<u64>::with_capacity(section_indices.len());
    let mut current_offset = 0;
    let mut data_size = 0;
    let mut num_relocations = 0;
    for i in section_indices.iter().copied() {
        let section = &sections[i];
        if section.address != 0 {
            bail!("Section {} ({}) has non-zero address", i, section.name);
        }
        offsets.push(current_offset);
        current_offset += section.size;
        data_size += section.data.len();
        num_relocations += section.relocations.len();
    }
    if data_size > 0 {
        ensure!(data_size == current_offset as usize, "Data size mismatch");
    }

    // Combine section data
    let mut data = Vec::<u8>::with_capacity(data_size);
    let mut relocations = Vec::<Relocation>::with_capacity(num_relocations);
    let mut line_info = BTreeMap::<u64, u32>::new();
    for (&i, &offset) in section_indices.iter().zip(&offsets) {
        let section = &mut sections[i];
        section.size = 0;
        data.append(&mut section.data.0);
        section.relocations.iter_mut().for_each(|r| r.address += offset);
        relocations.append(&mut section.relocations);
        line_info.append(&mut section.line_info.iter().map(|(&a, &l)| (a + offset, l)).collect());
        section.line_info.clear();
        if offset > 0 {
            section.flags |= SectionFlag::Hidden;
        }
    }
    {
        let first_section = &mut sections[first_section_idx];
        first_section.id = format!("{combined_name}-combined");
        first_section.name = combined_name;
        first_section.size = current_offset;
        first_section.data = SectionData(data);
        first_section.flags |= SectionFlag::Combined;
        first_section.relocations = relocations;
        first_section.line_info = line_info;
    }

    // Find all section symbols for the merged sections
    let mut section_symbols = symbols
        .iter()
        .enumerate()
        .filter(|&(_, s)| {
            s.kind == SymbolKind::Section && s.section.is_some_and(|i| section_indices.contains(&i))
        })
        .map(|(i, _)| i)
        .collect::<Vec<_>>();
    section_symbols.sort_by_key(|&i| symbols[i].section.unwrap());
    let target_section_symbol = section_symbols.first().copied();

    // Adjust symbol addresses and section indices
    for symbol in symbols.iter_mut() {
        let Some(section_index) = symbol.section else {
            continue;
        };
        let Some(merge_index) = section_indices.iter().position(|&i| i == section_index) else {
            continue;
        };
        symbol.address += offsets[merge_index];
        symbol.section = Some(first_section_idx);
    }

    // Adjust relocations to section symbols
    for relocation in sections.iter_mut().flat_map(|s| s.relocations.iter_mut()) {
        let target_symbol = &symbols[relocation.target_symbol];
        if target_symbol.kind != SymbolKind::Section {
            continue;
        }
        if !target_symbol.section.is_some_and(|i| section_indices.contains(&i)) {
            continue;
        }
        // The section symbol's address will have the offset applied
        relocation.target_symbol = target_section_symbol.context("No target section symbol")?;
        relocation.addend = relocation
            .addend
            .checked_add_unsigned(target_symbol.address)
            .context("Relocation addend overflow")?;
    }

    // Reset section symbols
    for (i, &symbol_index) in section_symbols.iter().enumerate() {
        let symbol = &mut symbols[symbol_index];
        symbol.address = 0;
        if i > 0 {
            // Remove the section symbol
            symbol.kind = SymbolKind::Unknown;
            symbol.section = None;
        }
    }

    Ok(())
}

#[cfg(feature = "std")]
pub fn read(obj_path: &std::path::Path, config: &DiffObjConfig) -> Result<Object> {
    let (data, timestamp) = {
        let file = std::fs::File::open(obj_path)?;
        let timestamp = filetime::FileTime::from_last_modification_time(&file.metadata()?);
        (unsafe { memmap2::Mmap::map(&file) }?, timestamp)
    };
    let mut obj = parse(&data, config)?;
    obj.path = Some(obj_path.to_path_buf());
    obj.timestamp = Some(timestamp);
    Ok(obj)
}

pub fn parse(data: &[u8], config: &DiffObjConfig) -> Result<Object> {
    let obj_file = object::File::parse(data)?;
    let arch = new_arch(&obj_file)?;
    let split_meta = parse_split_meta(&obj_file)?;
    let (mut sections, section_indices) =
        map_sections(arch.as_ref(), &obj_file, split_meta.as_ref())?;
    let (mut symbols, symbol_indices) =
        map_symbols(arch.as_ref(), &obj_file, &sections, &section_indices, split_meta.as_ref())?;
    map_relocations(arch.as_ref(), &obj_file, &mut sections, &section_indices, &symbol_indices)?;
    if config.ppc_calculate_pool_relocations {
        calculate_pooled_relocations(arch.as_ref(), &mut sections, &symbols)?;
    }
    parse_line_info(&obj_file, &mut sections, &section_indices, data)?;
    if config.combine_data_sections || config.combine_text_sections {
        combine_sections(&mut sections, &mut symbols, config)?;
    }
    Ok(Object {
        arch,
        endianness: obj_file.endianness(),
        symbols,
        sections,
        split_meta,
        #[cfg(feature = "std")]
        path: None,
        #[cfg(feature = "std")]
        timestamp: None,
    })
}

#[cfg(feature = "std")]
pub fn has_function(obj_path: &std::path::Path, symbol_name: &str) -> Result<bool> {
    let data = {
        let file = std::fs::File::open(obj_path)?;
        unsafe { memmap2::Mmap::map(&file) }?
    };
    Ok(object::File::parse(&*data)?
        .symbol_by_name(symbol_name)
        .filter(|o| o.kind() == object::SymbolKind::Text)
        .is_some())
}

fn parse_split_meta(obj_file: &object::File) -> Result<Option<SplitMeta>> {
    Ok(if let Some(section) = obj_file.section_by_name(SPLITMETA_SECTION) {
        Some(SplitMeta::from_section(section, obj_file.endianness(), obj_file.is_64())?)
    } else {
        None
    })
}

#[cfg(test)]
mod test {
    use super::*;

    #[test]
    fn test_combine_sections() {
        let mut sections = vec![
            Section {
                id: ".text-0".to_string(),
                name: ".text".to_string(),
                size: 8,
                kind: SectionKind::Code,
                data: SectionData(vec![0; 8]),
                relocations: vec![
                    Relocation {
                        address: 0,
                        flags: RelocationFlags::Elf(0),
                        target_symbol: 0,
                        addend: 0,
                    },
                    Relocation {
                        address: 2,
                        flags: RelocationFlags::Elf(0),
                        target_symbol: 1,
                        addend: 0,
                    },
                    Relocation {
                        address: 4,
                        flags: RelocationFlags::Elf(0),
                        target_symbol: 3,
                        addend: 2,
                    },
                ],
                ..Default::default()
            },
            Section {
                id: ".data-0".to_string(),
                name: ".data".to_string(),
                size: 4,
                kind: SectionKind::Data,
                data: SectionData(vec![1, 2, 3, 4]),
                relocations: vec![Relocation {
                    address: 0,
                    flags: RelocationFlags::Elf(0),
                    target_symbol: 2,
                    addend: 0,
                }],
                line_info: [(0, 1)].into_iter().collect(),
                ..Default::default()
            },
            Section {
                id: ".data-1".to_string(),
                name: ".data".to_string(),
                size: 4,
                kind: SectionKind::Data,
                data: SectionData(vec![5, 6, 7, 8]),
                relocations: vec![Relocation {
                    address: 0,
                    flags: RelocationFlags::Elf(0),
                    target_symbol: 2,
                    addend: 0,
                }],
                ..Default::default()
            },
            Section {
                id: ".data-2".to_string(),
                name: ".data".to_string(),
                size: 4,
                kind: SectionKind::Data,
                data: SectionData(vec![9, 10, 11, 12]),
                line_info: [(0, 2)].into_iter().collect(),
                ..Default::default()
            },
        ];
        let mut symbols = vec![
            Symbol {
                name: ".data".to_string(),
                address: 0,
                kind: SymbolKind::Section,
                section: Some(2),
                ..Default::default()
            },
            Symbol {
                name: "symbol".to_string(),
                address: 0,
                kind: SymbolKind::Object,
                size: 4,
                section: Some(2),
                ..Default::default()
            },
            Symbol {
                name: "function".to_string(),
                address: 0,
                size: 8,
                kind: SymbolKind::Function,
                section: Some(0),
                ..Default::default()
            },
            Symbol {
                name: ".data".to_string(),
                address: 0,
                kind: SymbolKind::Section,
                section: Some(3),
                ..Default::default()
            },
        ];
        do_combine_sections(&mut sections, &mut symbols, &mut [1, 2, 3], ".data".to_string())
            .unwrap();
        assert_eq!(sections[1].data.0, (1..=12).collect::<Vec<_>>());
        insta::assert_debug_snapshot!((sections, symbols));
    }
}