From f6f0e66931756e290466800491740557dd0f2e22 Mon Sep 17 00:00:00 2001
From: Luke Street <luke@street.dev>
Date: Mon, 30 Sep 2024 22:20:34 -0600
Subject: [PATCH] Revamp progress output with objdiff report

Progress output now displays % matched, which measures 100% matched
functions across _all_ files, including files that aren't
complete/linked.

Due to this change, all source files need to be built in order to
calculate progress during a normal `ninja` run. In other words,
this makes the `all_source` build the default behavior.

The progress display can be disabled via `configure.py --no-progress`
or `config.progress = False`. This will only compile the source files
needed to link the matching DOL.

Additionally, progress information is automatically emitted as a job
summary in GitHub Actions, so it can be viewed without opening the
build logs.
---
 configure.py     |   9 +-
 tools/project.py | 209 ++++++++++++++++++++++-------------------------
 2 files changed, 104 insertions(+), 114 deletions(-)

diff --git a/configure.py b/configure.py
index 0b204ef..931ca25 100755
--- a/configure.py
+++ b/configure.py
@@ -113,6 +113,12 @@ parser.add_argument(
     action="store_true",
     help="builds equivalent (but non-matching) or modded objects",
 )
+parser.add_argument(
+    "--no-progress",
+    dest="progress",
+    action="store_false",
+    help="disable progress calculation",
+)
 args = parser.parse_args()
 
 config = ProjectConfig()
@@ -128,6 +134,7 @@ config.compilers_path = args.compilers
 config.generate_map = args.map
 config.non_matching = args.non_matching
 config.sjiswrap_path = args.sjiswrap
+config.progress = args.progress
 if not is_windows():
     config.wrapper = args.wrapper
 # Don't build asm unless we're --non-matching
@@ -138,7 +145,7 @@ if not config.non_matching:
 config.binutils_tag = "2.42-1"
 config.compilers_tag = "20240706"
 config.dtk_tag = "v1.0.0"
-config.objdiff_tag = "v2.2.0"
+config.objdiff_tag = "v2.2.1"
 config.sjiswrap_tag = "v1.1.1"
 config.wibo_tag = "0.6.11"
 
diff --git a/tools/project.py b/tools/project.py
index 55f9105..727d45c 100644
--- a/tools/project.py
+++ b/tools/project.py
@@ -17,7 +17,7 @@ import os
 import platform
 import sys
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Set, Tuple, Union, cast
+from typing import IO, Any, Dict, List, Optional, Set, Tuple, Union, cast
 
 from . import ninja_syntax
 from .ninja_syntax import serialize_path
@@ -157,6 +157,7 @@ class ProjectConfig:
         )
 
         # Progress output, progress.json and report.json config
+        self.progress = True  # Enable progress output
         self.progress_all: bool = True  # Include combined "all" category
         self.progress_modules: bool = True  # Include combined "modules" category
         self.progress_each_module: bool = (
@@ -1036,7 +1037,12 @@ def generate_build_ninja(
         n.build(
             outputs=progress_path,
             rule="progress",
-            implicit=[ok_path, configure_script, python_lib, config.config_path],
+            implicit=[
+                ok_path,
+                configure_script,
+                python_lib,
+                report_path,
+            ],
         )
 
         ###
@@ -1149,8 +1155,10 @@ def generate_build_ninja(
     if build_config:
         if config.non_matching:
             n.default(link_outputs)
-        else:
+        elif config.progress:
             n.default(progress_path)
+        else:
+            n.default(ok_path)
     else:
         n.default(build_config_path)
 
@@ -1356,124 +1364,77 @@ def generate_objdiff_config(
 # Calculate, print and write progress to progress.json
 def calculate_progress(config: ProjectConfig) -> None:
     config.validate()
-    objects = config.objects()
     out_path = config.out_path()
-    build_config = load_build_config(config, out_path / "config.json")
-    if build_config is None:
-        return
+    report_path = out_path / "report.json"
+    if not report_path.is_file():
+        sys.exit(f"Report file {report_path} does not exist")
 
-    class ProgressUnit:
-        def __init__(self, name: str) -> None:
-            self.name: str = name
-            self.code_total: int = 0
-            self.code_progress: int = 0
-            self.data_total: int = 0
-            self.data_progress: int = 0
-            self.objects: Set[Object] = set()
-            self.objects_progress: int = 0
+    report_data: Dict[str, Any] = {}
+    with open(report_path, "r", encoding="utf-8") as f:
+        report_data = json.load(f)
 
-        def add(self, build_obj: Dict[str, Any]) -> None:
-            self.code_total += build_obj["code_size"]
-            self.data_total += build_obj["data_size"]
+    # Convert string numbers (u64) to int
+    def convert_numbers(data: Dict[str, Any]) -> None:
+        for key, value in data.items():
+            if isinstance(value, str) and value.isdigit():
+                data[key] = int(value)
 
-            # Avoid counting the same object in different modules twice
-            include_object = build_obj["name"] not in self.objects
-            if include_object:
-                self.objects.add(build_obj["name"])
+    convert_numbers(report_data["measures"])
+    for category in report_data["categories"]:
+        convert_numbers(category["measures"])
 
-            if build_obj["autogenerated"]:
-                # Skip autogenerated objects
-                return
+    # Output to GitHub Actions job summary, if available
+    summary_path = os.getenv("GITHUB_STEP_SUMMARY")
+    summary_file: Optional[IO[str]] = None
+    if summary_path:
+        summary_file = open(summary_path, "a", encoding="utf-8")
+        summary_file.write("```\n")
 
-            obj = objects.get(build_obj["name"])
-            if obj is None or not obj.completed:
-                return
-
-            self.code_progress += build_obj["code_size"]
-            self.data_progress += build_obj["data_size"]
-            if include_object:
-                self.objects_progress += 1
-
-        def code_frac(self) -> float:
-            if self.code_total == 0:
-                return 1.0
-            return self.code_progress / self.code_total
-
-        def data_frac(self) -> float:
-            if self.data_total == 0:
-                return 1.0
-            return self.data_progress / self.data_total
-
-    progress_units: Dict[str, ProgressUnit] = {}
-    if config.progress_all:
-        progress_units["all"] = ProgressUnit("All")
-    progress_units["dol"] = ProgressUnit("DOL")
-    if len(build_config["modules"]) > 0:
-        if config.progress_modules:
-            progress_units["modules"] = ProgressUnit("Modules")
-    if len(config.progress_categories) > 0:
-        for category in config.progress_categories:
-            progress_units[category.id] = ProgressUnit(category.name)
-    if config.progress_each_module:
-        for module in build_config["modules"]:
-            progress_units[module["name"]] = ProgressUnit(module["name"])
-
-    def add_unit(id: str, unit: Dict[str, Any]) -> None:
-        progress = progress_units.get(id)
-        if progress is not None:
-            progress.add(unit)
-
-    # Add DOL units
-    for unit in build_config["units"]:
-        add_unit("all", unit)
-        add_unit("dol", unit)
-        obj = objects.get(unit["name"])
-        if obj is not None:
-            category_opt = obj.options["progress_category"]
-            if isinstance(category_opt, list):
-                for id in category_opt:
-                    add_unit(id, unit)
-            elif category_opt is not None:
-                add_unit(category_opt, unit)
-
-    # Add REL units
-    for module in build_config["modules"]:
-        for unit in module["units"]:
-            add_unit("all", unit)
-            add_unit("modules", unit)
-            add_unit(module["name"], unit)
-            obj = objects.get(unit["name"])
-            if obj is not None:
-                category_opt = obj.options["progress_category"]
-                if isinstance(category_opt, list):
-                    for id in category_opt:
-                        add_unit(id, unit)
-                elif category_opt is not None:
-                    add_unit(category_opt, unit)
+    def progress_print(s: str) -> None:
+        print(s)
+        if summary_file:
+            summary_file.write(s + "\n")
 
     # Print human-readable progress
-    print("Progress:")
+    progress_print("Progress:")
 
-    for unit in progress_units.values():
-        if len(unit.objects) == 0:
-            continue
+    def print_category(name: str, measures: Dict[str, Any]) -> None:
+        total_code = measures.get("total_code", 0)
+        matched_code = measures.get("matched_code", 0)
+        matched_code_percent = measures.get("matched_code_percent", 0)
+        total_data = measures.get("total_data", 0)
+        matched_data = measures.get("matched_data", 0)
+        matched_data_percent = measures.get("matched_data_percent", 0)
+        total_functions = measures.get("total_functions", 0)
+        matched_functions = measures.get("matched_functions", 0)
+        complete_code_percent = measures.get("complete_code_percent", 0)
+        total_units = measures.get("total_units", 0)
+        complete_units = measures.get("complete_units", 0)
 
-        code_frac = unit.code_frac()
-        data_frac = unit.data_frac()
-        print(
-            f"  {unit.name}: {code_frac:.2%} code, {data_frac:.2%} data ({unit.objects_progress} / {len(unit.objects)} files)"
+        progress_print(
+            f"  {name}: {matched_code_percent:.2f}% matched, {complete_code_percent:.2f}% linked ({complete_units} / {total_units} files)"
         )
-        print(f"    Code: {unit.code_progress} / {unit.code_total} bytes")
-        print(f"    Data: {unit.data_progress} / {unit.data_total} bytes")
+        progress_print(
+            f"    Code: {matched_code} / {total_code} bytes ({matched_functions} / {total_functions} functions)"
+        )
+        progress_print(
+            f"    Data: {matched_data} / {total_data} bytes ({matched_data_percent:.2f}%)"
+        )
+
+    print_category("All", report_data["measures"])
+    for category in report_data["categories"]:
+        print_category(category["name"], category["measures"])
 
     if config.progress_use_fancy:
-        unit = progress_units.get("all") or progress_units.get("dol")
-        if unit is None or len(unit.objects) == 0:
+        measures = report_data["measures"]
+        total_code = measures.get("total_code", 0)
+        total_data = measures.get("total_data", 0)
+        if total_code == 0 or total_data == 0:
             return
+        code_frac = measures.get("complete_code", 0) / total_code
+        data_frac = measures.get("complete_data", 0) / total_data
 
-        code_frac = unit.code_frac()
-        data_frac = unit.data_frac()
-        print(
+        progress_print(
             "\nYou have {} out of {} {} and {} out of {} {}.".format(
                 math.floor(code_frac * config.progress_code_fancy_frac),
                 config.progress_code_fancy_frac,
@@ -1484,17 +1445,39 @@ def calculate_progress(config: ProjectConfig) -> None:
             )
         )
 
+    # Finalize GitHub Actions job summary
+    if summary_file:
+        summary_file.write("```\n")
+        summary_file.close()
+
     # Generate and write progress.json
     progress_json: Dict[str, Any] = {}
-    for id, unit in progress_units.items():
-        if len(unit.objects) == 0:
-            continue
+
+    def add_category(id: str, measures: Dict[str, Any]) -> None:
         progress_json[id] = {
-            "code": unit.code_progress,
-            "code/total": unit.code_total,
-            "data": unit.data_progress,
-            "data/total": unit.data_total,
+            "code": measures.get("complete_code", 0),
+            "code/total": measures.get("total_code", 0),
+            "data": measures.get("complete_data", 0),
+            "data/total": measures.get("total_data", 0),
+            "matched_code": measures.get("matched_code", 0),
+            "matched_code/total": measures.get("total_code", 0),
+            "matched_data": measures.get("matched_data", 0),
+            "matched_data/total": measures.get("total_data", 0),
+            "matched_functions": measures.get("matched_functions", 0),
+            "matched_functions/total": measures.get("total_functions", 0),
+            "fuzzy_match": int(measures.get("fuzzy_match_percent", 0) * 100),
+            "fuzzy_match/total": 10000,
+            "units": measures.get("complete_units", 0),
+            "units/total": measures.get("total_units", 0),
         }
 
+    if config.progress_all:
+        add_category("all", report_data["measures"])
+    else:
+        # Support for old behavior where "dol" was the main category
+        add_category("dol", report_data["measures"])
+    for category in report_data["categories"]:
+        add_category(category["id"], category["measures"])
+
     with open(out_path / "progress.json", "w", encoding="utf-8") as w:
         json.dump(progress_json, w, indent=4)