Improve split cyclic dependency errors

When a link order is unresolvable, decomp-toolkit now prints out all of the TUs in a cycle, easing debugging. Example: ``` Cyclic dependency encountered while resolving link order: Dolphin/os/__start.c -> Dolphin/os/__ppc_eabi_init.cpp -> Dolphin/pad/PadClamp.c -> Dolphin/pad/pad.c ``` Thanks @simonlindholm for the toposort impl.
2025-12-12 14:46:17 +00:00 · 2025-01-27 17:53:38 -07:00
parent 9a6348ec49
commit 6c3887c7e6
5 changed files with 140 additions and 45 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -383,7 +383,6 @@ dependencies = [
 "once_cell",
 "orthrus-ncompress",
 "owo-colors",
- "petgraph",
 "ppc750cl",
 "rayon",
 "regex",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -60,7 +60,6 @@ object = { version = "0.36", features = ["read_core", "std", "elf", "write_std"]
 once_cell = "1.20"
 orthrus-ncompress = "0.2"
 owo-colors = { version = "4.1", features = ["supports-colors"] }
-petgraph = { version = "0.6", default-features = false }
 ppc750cl = "0.3"
 rayon = "1.10"
 regex = "1.11"
--- a/src/util/mod.rs
+++ b/src/util/mod.rs
@@ -25,6 +25,7 @@ pub mod rso;
 pub mod signatures;
 pub mod split;
 pub mod take_seek;
+pub mod toposort;
 pub mod u8_arc;
 pub mod wad;

--- a/src/util/split.rs
+++ b/src/util/split.rs
@@ -6,7 +6,6 @@ use std::{
 use anyhow::{anyhow, bail, ensure, Context, Result};
 use itertools::Itertools;
 use objdiff_core::obj::split_meta::SplitMeta;
-use petgraph::{graph::NodeIndex, Graph};
 use sanitise_file_name::sanitize_with_options;
 use tracing_attributes::instrument;

@@ -17,7 +16,7 @@ use crate::{
        ObjSplit, ObjSymbol, ObjSymbolFlagSet, ObjSymbolFlags, ObjSymbolKind, ObjSymbolScope,
        ObjUnit, SectionIndex, SymbolIndex,
    },
-    util::{align_up, comment::MWComment},
+    util::{align_up, comment::MWComment, toposort::toposort},
 };

 /// Create splits for function pointers in the given section.
@@ -839,14 +838,16 @@ fn resolve_link_order(obj: &ObjInfo) -> Result<Vec<ObjUnit>> {
        to: i64,
    }

-    let mut graph = Graph::<String, SplitEdge>::new();
-    let mut unit_to_index_map = BTreeMap::<String, NodeIndex>::new();
+    let mut unit_to_index_map = BTreeMap::<&str, usize>::new();
+    let mut index_to_unit = vec![];
    for (_, _, _, split) in obj.sections.all_splits() {
-        unit_to_index_map.insert(split.unit.clone(), NodeIndex::new(0));
-    }
-    for (unit, index) in unit_to_index_map.iter_mut() {
-        *index = graph.add_node(unit.clone());
+        unit_to_index_map.entry(split.unit.as_str()).or_insert_with(|| {
+            let idx = index_to_unit.len();
+            index_to_unit.push(split.unit.as_str());
+            idx
+        });
    }
+    let mut graph = vec![vec![]; index_to_unit.len()];

    for (_section_index, section) in obj.sections.iter() {
        let mut iter = section.splits.iter().peekable();
@@ -869,12 +870,9 @@ fn resolve_link_order(obj: &ObjInfo) -> Result<Vec<ObjUnit>> {
                    b.unit,
                    b_addr
                );
-                let a_index = *unit_to_index_map.get(&a.unit).unwrap();
-                let b_index = *unit_to_index_map.get(&b.unit).unwrap();
-                graph.add_edge(a_index, b_index, SplitEdge {
-                    from: a_addr as i64,
-                    to: b_addr as i64,
-                });
+                let a_index = *unit_to_index_map.get(a.unit.as_str()).unwrap();
+                let b_index = *unit_to_index_map.get(b.unit.as_str()).unwrap();
+                graph[a_index].push(b_index);
            }
        }
    }
@@ -893,41 +891,22 @@ fn resolve_link_order(obj: &ObjInfo) -> Result<Vec<ObjUnit>> {
            }
        }
    }
-    let mut iter = ordered_units
-        .into_iter()
-        .filter_map(|(order, unit)| unit_to_index_map.get(&unit).map(|&index| (order, index)))
-        .peekable();
-    while let (Some((a_order, a_index)), Some((b_order, b_index))) = (iter.next(), iter.peek()) {
-        graph.add_edge(a_index, *b_index, SplitEdge { from: a_order as i64, to: *b_order as i64 });
+    let mut iter =
+        ordered_units.values().filter_map(|unit| unit_to_index_map.get(unit.as_str())).peekable();
+    while let (Some(&a_index), Some(&&b_index)) = (iter.next(), iter.peek()) {
+        graph[a_index].push(b_index);
    }

-    // use petgraph::{
-    //     dot::{Config, Dot},
-    //     graph::EdgeReference,
-    // };
-    // let get_edge_attributes = |_, e: EdgeReference<SplitEdge>| {
-    //     let &SplitEdge { from, to } = e.weight();
-    //     let section_name = &obj.section_at(from).unwrap().name;
-    //     format!("label=\"{} {:#010X} -> {:#010X}\"", section_name, from, to)
-    // };
-    // let dot = Dot::with_attr_getters(
-    //     &graph,
-    //     &[Config::EdgeNoLabel, Config::NodeNoLabel],
-    //     &get_edge_attributes,
-    //     &|_, (_, s)| format!("label=\"{}\"", s),
-    // );
-    // println!("{:?}", dot);
-
-    match petgraph::algo::toposort(&graph, None) {
+    match toposort(&graph) {
        Ok(vec) => Ok(vec
            .iter()
            .map(|&idx| {
-                let name = &graph[idx];
-                if let Some(existing) = obj.link_order.iter().find(|u| &u.name == name) {
+                let name = index_to_unit[idx];
+                if let Some(existing) = obj.link_order.iter().find(|u| u.name == name) {
                    existing.clone()
                } else {
                    ObjUnit {
-                        name: name.clone(),
+                        name: name.to_string(),
                        autogenerated: obj.is_unit_autogenerated(name),
                        comment_version: None,
                        order: None,
@@ -936,8 +915,8 @@ fn resolve_link_order(obj: &ObjInfo) -> Result<Vec<ObjUnit>> {
            })
            .collect_vec()),
        Err(e) => Err(anyhow!(
-            "Cyclic dependency (involving {}) encountered while resolving link order",
-            graph[e.node_id()]
+            "Cyclic dependency encountered while resolving link order: {}",
+            e.iter().map(|&idx| index_to_unit[idx]).join(" -> ")
        )),
    }
 }
--- a/src/util/toposort.rs
+++ b/src/util/toposort.rs
@@ -0,0 +1,117 @@
+/// Topological sort algorithm based on DFS:
+/// https://en.wikipedia.org/wiki/Topological_sorting#Depth-first_search
+/// Finds either an ordering of the vertices such that all edges go from
+/// lower to higher indexed nodes, or a cycle.
+///
+/// Implementation by Simon Lindholm
+/// https://gist.github.com/simonlindholm/08664dd783ad4b9f23532fdd5e352b42
+pub fn toposort(graph: &[Vec<usize>]) -> Result<Vec<usize>, Vec<usize>> {
+    let n = graph.len();
+    #[derive(Copy, Clone)]
+    enum State {
+        Unvisited,
+        Active(usize, usize),
+        Finished,
+    }
+    let mut state = vec![State::Unvisited; n + 1];
+    state[n] = State::Active(0, usize::MAX);
+    let mut ret = Vec::new();
+    let mut cur = n;
+    loop {
+        let State::Active(eind, par) = state[cur] else { panic!("unexpected state 1") };
+        let adj;
+        if cur == n {
+            if eind == n {
+                break;
+            }
+            adj = eind;
+        } else {
+            if eind == graph[cur].len() {
+                state[cur] = State::Finished;
+                ret.push(cur);
+                cur = par;
+                continue;
+            }
+            adj = graph[cur][eind];
+        };
+        state[cur] = State::Active(eind + 1, par);
+        match state[adj] {
+            State::Unvisited => {
+                state[adj] = State::Active(0, cur);
+                cur = adj;
+            }
+            State::Active(..) => {
+                let mut cycle = Vec::new();
+                while cur != adj {
+                    cycle.push(cur);
+                    let State::Active(_, par) = state[cur] else { panic!("unexpected state 2") };
+                    cur = par;
+                }
+                cycle.push(cur);
+                cycle.reverse();
+                return Err(cycle);
+            }
+            State::Finished => {}
+        };
+    }
+    ret.reverse();
+    Ok(ret)
+}
+
+#[cfg(test)]
+mod tests {
+    use std::collections::HashSet;
+
+    use super::*;
+
+    #[test]
+    fn test_correct() {
+        let mut rng_state: usize = 0;
+        let mut rand = || {
+            rng_state = rng_state.wrapping_mul(123156351724123181_usize);
+            rng_state = rng_state.wrapping_add(670143798154186239_usize);
+            rng_state >> 32
+        };
+        for _ in 0..10000 {
+            let n = rand() % 20;
+            let mut g = vec![vec![]; n];
+            let mut g_set = HashSet::new();
+            if n != 0 {
+                let m = rand() % 50;
+                for _ in 0..m {
+                    let a = rand() % n;
+                    let b = rand() % n;
+                    g[a].push(b);
+                    g_set.insert((a, b));
+                }
+            }
+            match toposort(&g) {
+                Ok(order) => {
+                    assert_eq!(order.len(), n);
+                    let mut node_to_order = vec![usize::MAX; n];
+                    // Every node should occur exactly once...
+                    for (i, &node) in order.iter().enumerate() {
+                        assert!(node < n);
+                        assert_eq!(node_to_order[node], usize::MAX);
+                        node_to_order[node] = i;
+                    }
+                    // and the edges should go in forward order in the list
+                    for i in 0..n {
+                        for &j in &g[i] {
+                            assert!(node_to_order[i] < node_to_order[j]);
+                        }
+                    }
+                }
+                Err(cycle) => {
+                    // The found cycle should exist in the graph
+                    assert!(!cycle.is_empty());
+                    for i in 0..cycle.len() {
+                        let a = cycle[i];
+                        let b = cycle[(i + 1) % cycle.len()];
+                        assert!(g_set.contains(&(a, b)));
+                    }
+                }
+            }
+        }
+    }
+}