Use patched 1.2.5n; delete frank/franklite

This commit is contained in:
Luke Street 2023-07-15 10:03:29 -04:00
parent a24285819d
commit 5bc8675e2c
4 changed files with 32 additions and 320 deletions

View File

@ -67,7 +67,7 @@ Building
``` ```
git clone https://github.com/PrimeDecomp/prime.git git clone https://github.com/PrimeDecomp/prime.git
``` ```
- Download [GC_WII_COMPILERS.zip](https://cdn.discordapp.com/attachments/727918646525165659/917185027656286218/GC_WII_COMPILERS.zip) - Download [GC_WII_COMPILERS.zip](https://cdn.discordapp.com/attachments/727918646525165659/1129759991696457728/GC_WII_COMPILERS.zip)
- Extract the _contents_ of the `GC` directory to `tools/mwcc_compiler`. - Extract the _contents_ of the `GC` directory to `tools/mwcc_compiler`.
- Resulting structure should be (for example) `tools/mwcc_compiler/1.3.2/mwcceppc.exe` - Resulting structure should be (for example) `tools/mwcc_compiler/1.3.2/mwcceppc.exe`
- Configure: - Configure:

View File

@ -699,7 +699,7 @@ LIBS = [
}, },
{ {
"lib": "ai", "lib": "ai",
"mw_version": "1.2.5e", "mw_version": "1.2.5n",
"cflags": "$cflags_base", "cflags": "$cflags_base",
"host": False, "host": False,
"objects": [ "objects": [
@ -708,7 +708,7 @@ LIBS = [
}, },
{ {
"lib": "ar", "lib": "ar",
"mw_version": "1.2.5e", "mw_version": "1.2.5n",
"cflags": "$cflags_base", "cflags": "$cflags_base",
"host": False, "host": False,
"objects": [ "objects": [
@ -736,7 +736,7 @@ LIBS = [
}, },
{ {
"lib": "dsp", "lib": "dsp",
"mw_version": "1.2.5e", "mw_version": "1.2.5n",
"cflags": "$cflags_base", "cflags": "$cflags_base",
"host": False, "host": False,
"objects": [ "objects": [
@ -747,7 +747,7 @@ LIBS = [
}, },
{ {
"lib": "dvd", "lib": "dvd",
"mw_version": "1.2.5e", "mw_version": "1.2.5n",
"cflags": "$cflags_base", "cflags": "$cflags_base",
"host": False, "host": False,
"objects": [ "objects": [
@ -799,7 +799,7 @@ LIBS = [
}, },
{ {
"lib": "os", "lib": "os",
"mw_version": "1.2.5e", "mw_version": "1.2.5n",
"cflags": "$cflags_base", "cflags": "$cflags_base",
"host": False, "host": False,
"objects": [ "objects": [
@ -809,7 +809,7 @@ LIBS = [
["Dolphin/os/OSArena", True], ["Dolphin/os/OSArena", True],
["Dolphin/os/OSAudioSystem", True], ["Dolphin/os/OSAudioSystem", True],
["Dolphin/os/OSCache", True], ["Dolphin/os/OSCache", True],
["Dolphin/os/OSContext", True, {"mw_version": "1.2.5"}], ["Dolphin/os/OSContext", True],
["Dolphin/os/OSError", True], ["Dolphin/os/OSError", True],
"Dolphin/os/OSFatal", "Dolphin/os/OSFatal",
"Dolphin/os/OSFont", "Dolphin/os/OSFont",
@ -830,7 +830,7 @@ LIBS = [
}, },
{ {
"lib": "pad", "lib": "pad",
"mw_version": "1.2.5e", "mw_version": "1.2.5n",
"cflags": "$cflags_base", "cflags": "$cflags_base",
"host": False, "host": False,
"objects": [ "objects": [
@ -988,7 +988,7 @@ LIBS = [
}, },
{ {
"lib": "card", "lib": "card",
"mw_version": "1.2.5e", "mw_version": "1.2.5n",
"cflags": "$cflags_base", "cflags": "$cflags_base",
"host": False, "host": False,
"objects": [ "objects": [
@ -1012,7 +1012,7 @@ LIBS = [
}, },
{ {
"lib": "si", "lib": "si",
"mw_version": "1.2.5e", "mw_version": "1.2.5n",
"cflags": "$cflags_base", "cflags": "$cflags_base",
"host": False, "host": False,
"objects": [ "objects": [
@ -1042,7 +1042,7 @@ LIBS = [
}, },
{ {
"lib": "gba", "lib": "gba",
"mw_version": "1.2.5e", "mw_version": "1.2.5n",
# "cflags" : "-proc gecko -Cpp_exceptions off -fp hard -nodefaults -nosyspath -i include -i libc -g -sym on -D_DEBUG=1 -enum int -use_lmw_stmw on", # "cflags" : "-proc gecko -Cpp_exceptions off -fp hard -nodefaults -nosyspath -i include -i libc -g -sym on -D_DEBUG=1 -enum int -use_lmw_stmw on",
"cflags": "$cflags_base", "cflags": "$cflags_base",
"host": False, "host": False,
@ -1132,12 +1132,6 @@ if __name__ == "__main__":
default=Path("build"), default=Path("build"),
help="base build directory", help="base build directory",
) )
parser.add_argument(
"--franklite",
dest="frank",
action="store_false",
help="use franklite.py instead of frank.py (non-matching)",
)
args = parser.parse_args() args = parser.parse_args()
# On Windows, we need this to use && in commands # On Windows, we need this to use && in commands
@ -1155,7 +1149,7 @@ if __name__ == "__main__":
if os.name != "nt" and "DEVKITPPC" in os.environ and not args.devkitppc: if os.name != "nt" and "DEVKITPPC" in os.environ and not args.devkitppc:
configure_args.extend(["--devkitppc", os.environ["DEVKITPPC"]]) configure_args.extend(["--devkitppc", os.environ["DEVKITPPC"]])
n.variable("configure_args", configure_args) n.variable("configure_args", configure_args)
n.variable("python", f"\"{sys.executable}\"") n.variable("python", f'"{sys.executable}"')
n.newline() n.newline()
### ###
@ -1185,8 +1179,6 @@ if __name__ == "__main__":
cflags_base += " -sym on -D_DEBUG" cflags_base += " -sym on -D_DEBUG"
else: else:
cflags_base += " -DNDEBUG" cflags_base += " -DNDEBUG"
if args.frank:
cflags_base += " -DFULL_FRANK"
n.variable("cflags_base", cflags_base) n.variable("cflags_base", cflags_base)
n.variable( n.variable(
"cflags_retro", "cflags_retro",
@ -1205,12 +1197,12 @@ if __name__ == "__main__":
map_path = build_path / "MetroidCWD.MAP" map_path = build_path / "MetroidCWD.MAP"
else: else:
map_path = build_path / "MetroidCW.MAP" map_path = build_path / "MetroidCW.MAP"
ldflags += f" -map {map_path}" ldflags += f" -map {map_path}"
if args.debug: if args.debug:
ldflags += " -g" ldflags += " -g"
n.variable("ldflags", ldflags) n.variable("ldflags", ldflags)
n.variable("mw_version", "1.3.2") mw_link_version = "1.3.2"
n.variable("mw_version", mw_link_version)
if os.name == "nt": if os.name == "nt":
exe = ".exe" exe = ".exe"
wine = "" wine = ""
@ -1283,20 +1275,9 @@ if __name__ == "__main__":
compiler_path = args.compilers / "$mw_version" compiler_path = args.compilers / "$mw_version"
mwcc = compiler_path / "mwcceppc.exe" mwcc = compiler_path / "mwcceppc.exe"
mwld = compiler_path / "mwldeppc.exe" mwld = compiler_path / "mwldeppc.exe"
frank = tools_path / "frank.py"
franklite = tools_path / "franklite.py"
gnu_as = dkp_path / "bin" / f"powerpc-eabi-as{exe}" gnu_as = dkp_path / "bin" / f"powerpc-eabi-as{exe}"
mwcc_cmd = f"{chain}{wine}{mwcc} $cflags -MMD -c $in -o $basedir" mwcc_cmd = f"{chain}{wine}{mwcc} $cflags -MMD -c $in -o $basedir"
if args.frank:
profile_mwcc = args.compilers / "1.2.5e" / "mwcceppc.exe"
mwcc_frank_cmd = (
f"{chain}{wine}{mwcc} $cflags -MMD -c $in -o $basedir"
+ f" && {wine}{profile_mwcc} $cflags -c $in -o $out.profile"
+ f" && $python {frank} $out $out.profile $out"
)
else:
mwcc_frank_cmd = f"{mwcc_cmd} && $python {franklite} $out $out"
mwld_cmd = f"{wine}{mwld} $ldflags -o $out @$out.rsp" mwld_cmd = f"{wine}{mwld} $ldflags -o $out @$out.rsp"
as_cmd = ( as_cmd = (
f"{chain}{gnu_as} $asflags -o $out $in -MD $out.d" f"{chain}{gnu_as} $asflags -o $out $in -MD $out.d"
@ -1308,7 +1289,6 @@ if __name__ == "__main__":
transform_dep = tools_path / "transform-dep.py" transform_dep = tools_path / "transform-dep.py"
transform_dep_cmd = f" && $python {transform_dep} $basefile.d $basefile.d" transform_dep_cmd = f" && $python {transform_dep} $basefile.d $basefile.d"
mwcc_cmd += transform_dep_cmd mwcc_cmd += transform_dep_cmd
mwcc_frank_cmd += transform_dep_cmd
n.comment("Link ELF file") n.comment("Link ELF file")
n.rule( n.rule(
@ -1330,16 +1310,6 @@ if __name__ == "__main__":
) )
n.newline() n.newline()
n.comment("MWCC build with franklite")
n.rule(
name="mwcc_frank",
command=mwcc_frank_cmd,
description="FRANK $out",
depfile="$basefile.d",
deps="gcc",
)
n.newline()
n.comment("Assemble asm") n.comment("Assemble asm")
n.rule( n.rule(
name="as", name="as",
@ -1392,6 +1362,7 @@ if __name__ == "__main__":
source_inputs = [] source_inputs = []
host_source_inputs = [] host_source_inputs = []
link_inputs = [] link_inputs = []
used_compiler_versions = set()
for lib in LIBS: for lib in LIBS:
inputs = [] inputs = []
if "lib" in lib: if "lib" in lib:
@ -1415,6 +1386,8 @@ if __name__ == "__main__":
object = object[0] object = object[0]
mw_version = options["mw_version"] or lib["mw_version"] mw_version = options["mw_version"] or lib["mw_version"]
used_compiler_versions.add(mw_version)
c_file = None c_file = None
if os.path.exists(src_path / f"{object}.cpp"): if os.path.exists(src_path / f"{object}.cpp"):
c_file = src_path / f"{object}.cpp" c_file = src_path / f"{object}.cpp"
@ -1425,13 +1398,6 @@ if __name__ == "__main__":
print(f"Mark as incomplete: {c_file}") print(f"Mark as incomplete: {c_file}")
rule = "mwcc" rule = "mwcc"
implicit = [] implicit = []
if mw_version == "1.2.5e":
mw_version = "1.2.5"
rule = "mwcc_frank"
if args.frank:
implicit.append(frank)
else:
implicit.append(franklite)
n.build( n.build(
outputs=path(build_src_path / f"{object}.o"), outputs=path(build_src_path / f"{object}.o"),
rule=rule, rule=rule,
@ -1482,6 +1448,19 @@ if __name__ == "__main__":
link_inputs.extend(inputs) link_inputs.extend(inputs)
n.newline() n.newline()
# Check if all compiler versions exist
for mw_version in used_compiler_versions:
mw_path = args.compilers / mw_version / "mwcceppc.exe"
if not os.path.exists(mw_path):
print(f"Compiler {mw_path} does not exist")
exit(1)
# Check if linker exists
mw_path = args.compilers / mw_link_version / "mwldeppc.exe"
if not os.path.exists(mw_path):
print(f"Linker {mw_path} does not exist")
exit(1)
### ###
# Link # Link
### ###

View File

@ -1,215 +0,0 @@
#! /usr/bin/env python3
# Written by Ethan Roseman (ethteck)
# MIT License
# Copyright 2021
# Modified by EpochFlame
import argparse
import sys
# Byte sequence that marks code size
CODESIZE_MAGIC = b"\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x34"
BLR_BYTE_SEQ = b"\x4E\x80\x00\x20"
MTLR_BYTE_SEQ = b"\x7C\x08\x03\xA6"
PROFILE_EXTRA_BYTES = b"\x48\x00\x00\x01\x60\x00\x00\x00"
LWZ_BYTE = b"\x80"
# Byte sequence array for branches to link register
BLR_BYTE_SEQ_ARRAY = [BLR_BYTE_SEQ,
b"\x4D\x80\x00\x20", b"\x4D\x80\x00\x21", b"\x4C\x81\x00\x20", b"\x4C\x81\x00\x21",
b"\x4D\x82\x00\x20", b"\x4D\x82\x00\x21", b"\x4C\x80\x00\x20", b"\x4C\x80\x00\x21",
b"\x4D\x81\x00\x20", b"\x4D\x81\x00\x21", b"\x4C\x80\x00\x20", b"\x4C\x80\x00\x21",
b"\x4C\x82\x00\x20", b"\x4C\x82\x00\x21", b"\x4C\x81\x00\x20", b"\x4C\x81\x00\x21",
b"\x4D\x83\x00\x20", b"\x4D\x83\x00\x21", b"\x4C\x83\x00\x20", b"\x4C\x83\x00\x21",
b"\x4D\x83\x00\x20", b"\x4D\x83\x00\x21", b"\x4C\x83\x00\x20", b"\x4C\x83\x00\x21"]
# Example invocation: ./frank.py vanilla.o profile.o output.o
parser = argparse.ArgumentParser()
parser.add_argument("vanilla", help="Path to the vanilla object", type=argparse.FileType('rb'))
parser.add_argument("profile", help="Path to the profile object", type=argparse.FileType('rb'))
parser.add_argument("target", help="Path to the target object (to write)")
args = parser.parse_args()
# Read contents into bytearrays and close files
vanilla_bytes = args.vanilla.read()
args.vanilla.close()
# If the file contains no code, the codesize magic will not be found.
# The vanilla object requires no modification.
code_size_magic_idx = vanilla_bytes.find(CODESIZE_MAGIC)
if code_size_magic_idx == -1:
with open(args.target, "wb") as f:
f.write(vanilla_bytes)
sys.exit(0)
profile_bytes = args.profile.read()
args.profile.close()
# Peephole rescheduling
#
# This is the pattern we will detect:
# (A) lwz <--. .--> (A) li
# (B) li <---\-' bl
# \ nop
# '---> (B) lwz
#
# If the profiled schedule swaps the
# instructions around the bl/nop, we
# instead use the vanilla schedule.
#
idx = 8
shift = 0 # difference between vanilla and profile code, due to bl/nops
while idx < len(profile_bytes) - 16:
# Find next epilogue
epi_pos = profile_bytes.find(PROFILE_EXTRA_BYTES, idx)
if epi_pos == -1:
break # break while loop when no targets remain
if epi_pos % 4 != 0: # check 4-byte alignment
idx += 4
continue
v_pos = epi_pos - shift
shift += 8
vanilla_inst_a = vanilla_bytes[v_pos-4:v_pos]
vanilla_inst_b = vanilla_bytes[v_pos:v_pos+4]
vanilla_inst_c = vanilla_bytes[v_pos+4:v_pos+8]
profile_inst_a = profile_bytes[epi_pos-4:epi_pos]
profile_inst_b = profile_bytes[epi_pos+8:epi_pos+12]
profile_inst_c = profile_bytes[epi_pos+12:epi_pos+16]
opcode_a = vanilla_inst_a[0] >> 2
opcode_b = vanilla_inst_b[0] >> 2
opcode_c = vanilla_inst_c[0] >> 2
LWZ = 0x80 >> 2
LFS = 0xC0 >> 2
ADDI = 0x38 >> 2
LI = ADDI # an LI instruction is just an ADDI with RA=0
LMW = 0xB8 >> 2
FDIVS = 0xEC >> 2
# Adjust LWZ and LMW loading from r1.
if opcode_a in [LWZ, LMW] and vanilla_inst_a[2] == 0x00 and \
opcode_b in [LI, LFS, FDIVS] and \
vanilla_inst_a == profile_inst_b and \
vanilla_inst_b == profile_inst_a and \
vanilla_inst_c == profile_inst_c and \
opcode_c != ADDI: # <- don't reorder if at the very end of the epilogue
# Swap instructions (A) and (B)
profile_bytes = profile_bytes[:epi_pos-4] \
+ vanilla_inst_a \
+ PROFILE_EXTRA_BYTES \
+ vanilla_inst_b \
+ profile_bytes[epi_pos+12:]
# Similar reordering for lwz/lmw, except both insns follow the bl/nop
elif opcode_b == LWZ and \
opcode_c == LMW and \
vanilla_inst_b == profile_inst_c and \
vanilla_inst_c == profile_inst_b:
profile_bytes = profile_bytes[:epi_pos+8] \
+ vanilla_inst_b \
+ vanilla_inst_c \
+ profile_bytes[epi_pos+16:]
idx = epi_pos + 8
# Remove byte sequence
stripped_bytes = profile_bytes.replace(PROFILE_EXTRA_BYTES, b"")
# Find end of code sections in vanilla and stripped bytes
code_size_offset = code_size_magic_idx + len(CODESIZE_MAGIC)
code_size_bytes = vanilla_bytes[code_size_offset:code_size_offset+4]
code_size = int.from_bytes(code_size_bytes, byteorder='big')
eoc_offset = 0x34 + code_size
# Break if the eoc is not found
assert(eoc_offset != len(vanilla_bytes))
# Replace 0x34 - eoc in vanilla with bytes from stripped
final_bytes = vanilla_bytes[:0x34] + stripped_bytes[0x34:eoc_offset] + vanilla_bytes[eoc_offset:]
# Fix branches to link register
for seq in BLR_BYTE_SEQ_ARRAY:
idx = 0
while idx < len(vanilla_bytes):
found_pos = vanilla_bytes.find(seq, idx)
if found_pos == -1:
break # break while loop when no targets remain
if found_pos % 4 != 0: # check 4-byte alignment
idx += 4
continue
final_bytes = final_bytes[:found_pos] + vanilla_bytes[found_pos:found_pos+4] + final_bytes[found_pos+4:]
idx = found_pos + len(seq)
# Reunify mtlr/blr instructions, shifting intermediary instructions up
idx = 0
while idx < len(final_bytes):
# Find mtlr position
mtlr_found_pos = final_bytes.find(MTLR_BYTE_SEQ, idx)
if mtlr_found_pos == -1:
break # break while loop when no targets remain
if mtlr_found_pos % 4 != 0: # check 4-byte alignment
idx += 4
continue
# Find paired blr position
blr_found_pos = final_bytes.find(BLR_BYTE_SEQ, mtlr_found_pos)
if blr_found_pos == -1:
break # break while loop when no targets remain
if blr_found_pos % 4 != 0: # check 4-byte alignment
idx += 4
continue
if mtlr_found_pos + 4 == blr_found_pos:
idx += 4
continue # continue if mtlr is followed directly by blr
final_bytes = final_bytes[:mtlr_found_pos] + final_bytes[mtlr_found_pos+4:blr_found_pos] + final_bytes[mtlr_found_pos:mtlr_found_pos+4] + final_bytes[blr_found_pos:]
idx = mtlr_found_pos + len(MTLR_BYTE_SEQ)
# Reorder lmw/lwz/lfd instructions, if needed (@Altafen)
# Specifically, if this sequence shows up in the stripped profiler code: "LMW, LWZ, LFD*"
# And this sequence shows up in the vanilla code: "LWZ, LFD*, LMW"
# (LFD* = any number of LFDs, including zero)
# If all bytes match between the two (except for the reordering), then use the vanilla ordering.
# This could be written to anchor around the "BL, NOP" instructions in unstripped profiler code,
# or to check for the presence of "ADDI, MTLR, BLR" soon after.
# This also could be written to decode the operands of each instruction to make sure the reorder is harmless.
# Neither of these safeguards are necessary at the moment.
LWZ = 32
LMW = 46
LFD = 50
idx = 0
while idx+4 < len(final_bytes):
if final_bytes[idx] >> 2 == LMW and final_bytes[idx+4] >> 2 == LWZ and vanilla_bytes[idx] >> 2 == LWZ:
start_idx = idx
lmw_bytes = final_bytes[idx:idx+4]
lwz_bytes = final_bytes[idx+4:idx+8]
if vanilla_bytes[idx:idx+4] != lwz_bytes:
idx += 4
continue
lfd_bytes = b""
idx += 4
while vanilla_bytes[idx] >> 2 == LFD:
lfd_bytes += vanilla_bytes[idx:idx+4]
idx += 4
if vanilla_bytes[idx:idx+4] != lmw_bytes:
continue
if final_bytes[start_idx+8:start_idx+8+len(lfd_bytes)] != lfd_bytes:
continue
idx += 4
final_bytes = final_bytes[:start_idx] + lwz_bytes + lfd_bytes + lmw_bytes + final_bytes[idx:]
continue
idx += 4
with open(args.target, "wb") as f:
f.write(final_bytes)

View File

@ -1,52 +0,0 @@
#! /usr/bin/env python3
# Written by Ethan Roseman (ethteck)
# MIT License
# Copyright 2021
# Modified by EpochFlame
import argparse
# Byte sequences
BLR_BYTE_SEQ = b"\x4E\x80\x00\x20"
MTLR_BYTE_SEQ = b"\x7C\x08\x03\xA6"
# Example invocation: ./frank.py vanilla.o target.o
parser = argparse.ArgumentParser()
parser.add_argument("vanilla", help="Path to the vanilla object", type=argparse.FileType('rb'))
parser.add_argument("target", help="Path to the target object (to write)")
args = parser.parse_args()
# Read contents into bytearrays and close files
vanilla_bytes = args.vanilla.read()
args.vanilla.close()
# Reunify mtlr/blr instructions, shifting intermediary instructions up
idx = 0
while idx < len(vanilla_bytes):
# Find mtlr position
mtlr_found_pos = vanilla_bytes.find(MTLR_BYTE_SEQ, idx)
if mtlr_found_pos == -1:
break # break while loop when no targets remain
if mtlr_found_pos % 4 != 0: # check 4-byte alignment
idx += 4
continue
# Find paired blr position
blr_found_pos = vanilla_bytes.find(BLR_BYTE_SEQ, mtlr_found_pos)
if blr_found_pos == -1:
break # break while loop when no targets remain
if blr_found_pos % 4 != 0: # check 4-byte alignment
idx += 4
continue
if mtlr_found_pos + 4 == blr_found_pos:
idx += 4
continue # continue if mtlr is followed directly by blr
vanilla_bytes = vanilla_bytes[:mtlr_found_pos] + vanilla_bytes[mtlr_found_pos+4:blr_found_pos] + vanilla_bytes[mtlr_found_pos:mtlr_found_pos+4] + vanilla_bytes[blr_found_pos:]
idx = mtlr_found_pos + len(MTLR_BYTE_SEQ)
with open(args.target, "wb") as f:
f.write(vanilla_bytes)