Improve split cyclic dependency errors

When a link order is unresolvable, decomp-toolkit now prints out all of the TUs in a cycle, easing debugging. Example: ``` Cyclic dependency encountered while resolving link order: Dolphin/os/__start.c -> Dolphin/os/__ppc_eabi_init.cpp -> Dolphin/pad/PadClamp.c -> Dolphin/pad/pad.c ``` Thanks @simonlindholm for the toposort impl.
2025-12-12 14:46:17 +00:00 · 2025-01-27 17:53:38 -07:00
parent 9a6348ec49
commit 6c3887c7e6
5 changed files with 140 additions and 45 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -383,7 +383,6 @@ dependencies = [
 "once_cell",
 "orthrus-ncompress",
 "owo-colors",
 "petgraph",
 "ppc750cl",
 "rayon",
 "regex",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -60,7 +60,6 @@ object = { version = "0.36", features = ["read_core", "std", "elf", "write_std"]
 once_cell = "1.20"
 orthrus-ncompress = "0.2"
 owo-colors = { version = "4.1", features = ["supports-colors"] }
 petgraph = { version = "0.6", default-features = false }
 ppc750cl = "0.3"
 rayon = "1.10"
 regex = "1.11"
--- a/src/util/mod.rs
+++ b/src/util/mod.rs
@@ -25,6 +25,7 @@ pub mod rso;
 pub mod signatures;
 pub mod split;
 pub mod take_seek;
 pub mod toposort;
 pub mod u8_arc;
 pub mod wad;
--- a/src/util/split.rs
+++ b/src/util/split.rs
@@ -6,7 +6,6 @@ use std::{
 use anyhow::{anyhow, bail, ensure, Context, Result};
 use itertools::Itertools;
 use objdiff_core::obj::split_meta::SplitMeta;
 use petgraph::{graph::NodeIndex, Graph};
 use sanitise_file_name::sanitize_with_options;
 use tracing_attributes::instrument;
@@ -17,7 +16,7 @@ use crate::{
        ObjSplit, ObjSymbol, ObjSymbolFlagSet, ObjSymbolFlags, ObjSymbolKind, ObjSymbolScope,
        ObjUnit, SectionIndex, SymbolIndex,
    },
-    util::{align_up, comment::MWComment},
+    util::{align_up, comment::MWComment, toposort::toposort},
 };
 /// Create splits for function pointers in the given section.
@@ -839,14 +838,16 @@ fn resolve_link_order(obj: &ObjInfo) -> Result<Vec<ObjUnit>> {
        to: i64,
    }
-    let mut graph = Graph::<String, SplitEdge>::new();
+    let mut unit_to_index_map = BTreeMap::<&str, usize>::new();
-    let mut unit_to_index_map = BTreeMap::<String, NodeIndex>::new();
+    let mut index_to_unit = vec![];
    for (_, _, _, split) in obj.sections.all_splits() {
-        unit_to_index_map.insert(split.unit.clone(), NodeIndex::new(0));
+        unit_to_index_map.entry(split.unit.as_str()).or_insert_with(|| {
-    }
+            let idx = index_to_unit.len();
-    for (unit, index) in unit_to_index_map.iter_mut() {
+            index_to_unit.push(split.unit.as_str());
-        *index = graph.add_node(unit.clone());
+            idx
        });
    }
    let mut graph = vec![vec![]; index_to_unit.len()];
    for (_section_index, section) in obj.sections.iter() {
        let mut iter = section.splits.iter().peekable();
@@ -869,12 +870,9 @@ fn resolve_link_order(obj: &ObjInfo) -> Result<Vec<ObjUnit>> {
                    b.unit,
                    b_addr
                );
-                let a_index = *unit_to_index_map.get(&a.unit).unwrap();
+                let a_index = *unit_to_index_map.get(a.unit.as_str()).unwrap();
-                let b_index = *unit_to_index_map.get(&b.unit).unwrap();
+                let b_index = *unit_to_index_map.get(b.unit.as_str()).unwrap();
-                graph.add_edge(a_index, b_index, SplitEdge {
+                graph[a_index].push(b_index);
                    from: a_addr as i64,
                    to: b_addr as i64,
                });
            }
        }
    }
@@ -893,41 +891,22 @@ fn resolve_link_order(obj: &ObjInfo) -> Result<Vec<ObjUnit>> {
            }
        }
    }
-    let mut iter = ordered_units
+    let mut iter =
-        .into_iter()
+        ordered_units.values().filter_map(|unit| unit_to_index_map.get(unit.as_str())).peekable();
-        .filter_map(|(order, unit)| unit_to_index_map.get(&unit).map(|&index| (order, index)))
+    while let (Some(&a_index), Some(&&b_index)) = (iter.next(), iter.peek()) {
-        .peekable();
+        graph[a_index].push(b_index);
    while let (Some((a_order, a_index)), Some((b_order, b_index))) = (iter.next(), iter.peek()) {
        graph.add_edge(a_index, *b_index, SplitEdge { from: a_order as i64, to: *b_order as i64 });
    }
-    // use petgraph::{
+    match toposort(&graph) {
    //     dot::{Config, Dot},
    //     graph::EdgeReference,
    // };
    // let get_edge_attributes = |_, e: EdgeReference<SplitEdge>| {
    //     let &SplitEdge { from, to } = e.weight();
    //     let section_name = &obj.section_at(from).unwrap().name;
    //     format!("label=\"{} {:#010X} -> {:#010X}\"", section_name, from, to)
    // };
    // let dot = Dot::with_attr_getters(
    //     &graph,
    //     &[Config::EdgeNoLabel, Config::NodeNoLabel],
    //     &get_edge_attributes,
    //     &|_, (_, s)| format!("label=\"{}\"", s),
    // );
    // println!("{:?}", dot);
    match petgraph::algo::toposort(&graph, None) {
        Ok(vec) => Ok(vec
            .iter()
            .map(|&idx| {
-                let name = &graph[idx];
+                let name = index_to_unit[idx];
-                if let Some(existing) = obj.link_order.iter().find(|u| &u.name == name) {
+                if let Some(existing) = obj.link_order.iter().find(|u| u.name == name) {
                    existing.clone()
                } else {
                    ObjUnit {
-                        name: name.clone(),
+                        name: name.to_string(),
                        autogenerated: obj.is_unit_autogenerated(name),
                        comment_version: None,
                        order: None,
@@ -936,8 +915,8 @@ fn resolve_link_order(obj: &ObjInfo) -> Result<Vec<ObjUnit>> {
            })
            .collect_vec()),
        Err(e) => Err(anyhow!(
-            "Cyclic dependency (involving {}) encountered while resolving link order",
+            "Cyclic dependency encountered while resolving link order: {}",
-            graph[e.node_id()]
+            e.iter().map(|&idx| index_to_unit[idx]).join(" -> ")
        )),
    }
 }
--- a/src/util/toposort.rs
+++ b/src/util/toposort.rs
@@ -0,0 +1,117 @@
 /// Topological sort algorithm based on DFS:
 /// https://en.wikipedia.org/wiki/Topological_sorting#Depth-first_search
 /// Finds either an ordering of the vertices such that all edges go from
 /// lower to higher indexed nodes, or a cycle.
 ///
 /// Implementation by Simon Lindholm
 /// https://gist.github.com/simonlindholm/08664dd783ad4b9f23532fdd5e352b42
 pub fn toposort(graph: &[Vec<usize>]) -> Result<Vec<usize>, Vec<usize>> {
    let n = graph.len();
    #[derive(Copy, Clone)]
    enum State {
        Unvisited,
        Active(usize, usize),
        Finished,
    }
    let mut state = vec![State::Unvisited; n + 1];
    state[n] = State::Active(0, usize::MAX);
    let mut ret = Vec::new();
    let mut cur = n;
    loop {
        let State::Active(eind, par) = state[cur] else { panic!("unexpected state 1") };
        let adj;
        if cur == n {
            if eind == n {
                break;
            }
            adj = eind;
        } else {
            if eind == graph[cur].len() {
                state[cur] = State::Finished;
                ret.push(cur);
                cur = par;
                continue;
            }
            adj = graph[cur][eind];
        };
        state[cur] = State::Active(eind + 1, par);
        match state[adj] {
            State::Unvisited => {
                state[adj] = State::Active(0, cur);
                cur = adj;
            }
            State::Active(..) => {
                let mut cycle = Vec::new();
                while cur != adj {
                    cycle.push(cur);
                    let State::Active(_, par) = state[cur] else { panic!("unexpected state 2") };
                    cur = par;
                }
                cycle.push(cur);
                cycle.reverse();
                return Err(cycle);
            }
            State::Finished => {}
        };
    }
    ret.reverse();
    Ok(ret)
 }
 #[cfg(test)]
 mod tests {
    use std::collections::HashSet;
    use super::*;
    #[test]
    fn test_correct() {
        let mut rng_state: usize = 0;
        let mut rand = || {
            rng_state = rng_state.wrapping_mul(123156351724123181_usize);
            rng_state = rng_state.wrapping_add(670143798154186239_usize);
            rng_state >> 32
        };
        for _ in 0..10000 {
            let n = rand() % 20;
            let mut g = vec![vec![]; n];
            let mut g_set = HashSet::new();
            if n != 0 {
                let m = rand() % 50;
                for _ in 0..m {
                    let a = rand() % n;
                    let b = rand() % n;
                    g[a].push(b);
                    g_set.insert((a, b));
                }
            }
            match toposort(&g) {
                Ok(order) => {
                    assert_eq!(order.len(), n);
                    let mut node_to_order = vec![usize::MAX; n];
                    // Every node should occur exactly once...
                    for (i, &node) in order.iter().enumerate() {
                        assert!(node < n);
                        assert_eq!(node_to_order[node], usize::MAX);
                        node_to_order[node] = i;
                    }
                    // and the edges should go in forward order in the list
                    for i in 0..n {
                        for &j in &g[i] {
                            assert!(node_to_order[i] < node_to_order[j]);
                        }
                    }
                }
                Err(cycle) => {
                    // The found cycle should exist in the graph
                    assert!(!cycle.is_empty());
                    for i in 0..cycle.len() {
                        let a = cycle[i];
                        let b = cycle[(i + 1) % cycle.len()];
                        assert!(g_set.contains(&(a, b)));
                    }
                }
            }
        }
    }
 }