diff --git a/README.md b/README.md index 41a18e1b..85a7e7d9 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,7 @@ Building ``` git clone https://github.com/PrimeDecomp/prime.git ``` -- Download [GC_WII_COMPILERS.zip](https://cdn.discordapp.com/attachments/727918646525165659/917185027656286218/GC_WII_COMPILERS.zip) +- Download [GC_WII_COMPILERS.zip](https://cdn.discordapp.com/attachments/727918646525165659/1129759991696457728/GC_WII_COMPILERS.zip) - Extract the _contents_ of the `GC` directory to `tools/mwcc_compiler`. - Resulting structure should be (for example) `tools/mwcc_compiler/1.3.2/mwcceppc.exe` - Configure: diff --git a/configure.py b/configure.py index e542e97f..f83f5234 100755 --- a/configure.py +++ b/configure.py @@ -699,7 +699,7 @@ LIBS = [ }, { "lib": "ai", - "mw_version": "1.2.5e", + "mw_version": "1.2.5n", "cflags": "$cflags_base", "host": False, "objects": [ @@ -708,7 +708,7 @@ LIBS = [ }, { "lib": "ar", - "mw_version": "1.2.5e", + "mw_version": "1.2.5n", "cflags": "$cflags_base", "host": False, "objects": [ @@ -736,7 +736,7 @@ LIBS = [ }, { "lib": "dsp", - "mw_version": "1.2.5e", + "mw_version": "1.2.5n", "cflags": "$cflags_base", "host": False, "objects": [ @@ -747,7 +747,7 @@ LIBS = [ }, { "lib": "dvd", - "mw_version": "1.2.5e", + "mw_version": "1.2.5n", "cflags": "$cflags_base", "host": False, "objects": [ @@ -799,7 +799,7 @@ LIBS = [ }, { "lib": "os", - "mw_version": "1.2.5e", + "mw_version": "1.2.5n", "cflags": "$cflags_base", "host": False, "objects": [ @@ -809,7 +809,7 @@ LIBS = [ ["Dolphin/os/OSArena", True], ["Dolphin/os/OSAudioSystem", True], ["Dolphin/os/OSCache", True], - ["Dolphin/os/OSContext", True, {"mw_version": "1.2.5"}], + ["Dolphin/os/OSContext", True], ["Dolphin/os/OSError", True], "Dolphin/os/OSFatal", "Dolphin/os/OSFont", @@ -830,7 +830,7 @@ LIBS = [ }, { "lib": "pad", - "mw_version": "1.2.5e", + "mw_version": "1.2.5n", "cflags": "$cflags_base", "host": False, "objects": [ @@ -969,7 +969,7 @@ LIBS = [ ], }, { - "lib": "txwin", + "lib": "txwin", "mw_version": "1.2.5", "cflags": "-Cpp_exceptions off -proc gecko -fp hard -nodefaults -nosyspath -i include -i libc -g -sym on -D_DEBUG=1 -enum int ", "host": False, @@ -988,7 +988,7 @@ LIBS = [ }, { "lib": "card", - "mw_version": "1.2.5e", + "mw_version": "1.2.5n", "cflags": "$cflags_base", "host": False, "objects": [ @@ -1012,7 +1012,7 @@ LIBS = [ }, { "lib": "si", - "mw_version": "1.2.5e", + "mw_version": "1.2.5n", "cflags": "$cflags_base", "host": False, "objects": [ @@ -1042,8 +1042,8 @@ LIBS = [ }, { "lib": "gba", - "mw_version": "1.2.5e", - #"cflags" : "-proc gecko -Cpp_exceptions off -fp hard -nodefaults -nosyspath -i include -i libc -g -sym on -D_DEBUG=1 -enum int -use_lmw_stmw on", + "mw_version": "1.2.5n", + # "cflags" : "-proc gecko -Cpp_exceptions off -fp hard -nodefaults -nosyspath -i include -i libc -g -sym on -D_DEBUG=1 -enum int -use_lmw_stmw on", "cflags": "$cflags_base", "host": False, "objects": [ @@ -1132,12 +1132,6 @@ if __name__ == "__main__": default=Path("build"), help="base build directory", ) - parser.add_argument( - "--franklite", - dest="frank", - action="store_false", - help="use franklite.py instead of frank.py (non-matching)", - ) args = parser.parse_args() # On Windows, we need this to use && in commands @@ -1155,7 +1149,7 @@ if __name__ == "__main__": if os.name != "nt" and "DEVKITPPC" in os.environ and not args.devkitppc: configure_args.extend(["--devkitppc", os.environ["DEVKITPPC"]]) n.variable("configure_args", configure_args) - n.variable("python", f"\"{sys.executable}\"") + n.variable("python", f'"{sys.executable}"') n.newline() ### @@ -1185,8 +1179,6 @@ if __name__ == "__main__": cflags_base += " -sym on -D_DEBUG" else: cflags_base += " -DNDEBUG" - if args.frank: - cflags_base += " -DFULL_FRANK" n.variable("cflags_base", cflags_base) n.variable( "cflags_retro", @@ -1205,12 +1197,12 @@ if __name__ == "__main__": map_path = build_path / "MetroidCWD.MAP" else: map_path = build_path / "MetroidCW.MAP" - ldflags += f" -map {map_path}" if args.debug: ldflags += " -g" n.variable("ldflags", ldflags) - n.variable("mw_version", "1.3.2") + mw_link_version = "1.3.2" + n.variable("mw_version", mw_link_version) if os.name == "nt": exe = ".exe" wine = "" @@ -1283,20 +1275,9 @@ if __name__ == "__main__": compiler_path = args.compilers / "$mw_version" mwcc = compiler_path / "mwcceppc.exe" mwld = compiler_path / "mwldeppc.exe" - frank = tools_path / "frank.py" - franklite = tools_path / "franklite.py" gnu_as = dkp_path / "bin" / f"powerpc-eabi-as{exe}" mwcc_cmd = f"{chain}{wine}{mwcc} $cflags -MMD -c $in -o $basedir" - if args.frank: - profile_mwcc = args.compilers / "1.2.5e" / "mwcceppc.exe" - mwcc_frank_cmd = ( - f"{chain}{wine}{mwcc} $cflags -MMD -c $in -o $basedir" - + f" && {wine}{profile_mwcc} $cflags -c $in -o $out.profile" - + f" && $python {frank} $out $out.profile $out" - ) - else: - mwcc_frank_cmd = f"{mwcc_cmd} && $python {franklite} $out $out" mwld_cmd = f"{wine}{mwld} $ldflags -o $out @$out.rsp" as_cmd = ( f"{chain}{gnu_as} $asflags -o $out $in -MD $out.d" @@ -1308,7 +1289,6 @@ if __name__ == "__main__": transform_dep = tools_path / "transform-dep.py" transform_dep_cmd = f" && $python {transform_dep} $basefile.d $basefile.d" mwcc_cmd += transform_dep_cmd - mwcc_frank_cmd += transform_dep_cmd n.comment("Link ELF file") n.rule( @@ -1330,16 +1310,6 @@ if __name__ == "__main__": ) n.newline() - n.comment("MWCC build with franklite") - n.rule( - name="mwcc_frank", - command=mwcc_frank_cmd, - description="FRANK $out", - depfile="$basefile.d", - deps="gcc", - ) - n.newline() - n.comment("Assemble asm") n.rule( name="as", @@ -1392,6 +1362,7 @@ if __name__ == "__main__": source_inputs = [] host_source_inputs = [] link_inputs = [] + used_compiler_versions = set() for lib in LIBS: inputs = [] if "lib" in lib: @@ -1415,6 +1386,8 @@ if __name__ == "__main__": object = object[0] mw_version = options["mw_version"] or lib["mw_version"] + used_compiler_versions.add(mw_version) + c_file = None if os.path.exists(src_path / f"{object}.cpp"): c_file = src_path / f"{object}.cpp" @@ -1425,13 +1398,6 @@ if __name__ == "__main__": print(f"Mark as incomplete: {c_file}") rule = "mwcc" implicit = [] - if mw_version == "1.2.5e": - mw_version = "1.2.5" - rule = "mwcc_frank" - if args.frank: - implicit.append(frank) - else: - implicit.append(franklite) n.build( outputs=path(build_src_path / f"{object}.o"), rule=rule, @@ -1482,6 +1448,19 @@ if __name__ == "__main__": link_inputs.extend(inputs) n.newline() + # Check if all compiler versions exist + for mw_version in used_compiler_versions: + mw_path = args.compilers / mw_version / "mwcceppc.exe" + if not os.path.exists(mw_path): + print(f"Compiler {mw_path} does not exist") + exit(1) + + # Check if linker exists + mw_path = args.compilers / mw_link_version / "mwldeppc.exe" + if not os.path.exists(mw_path): + print(f"Linker {mw_path} does not exist") + exit(1) + ### # Link ### diff --git a/tools/frank.py b/tools/frank.py deleted file mode 100644 index 489c8ec0..00000000 --- a/tools/frank.py +++ /dev/null @@ -1,215 +0,0 @@ -#! /usr/bin/env python3 - -# Written by Ethan Roseman (ethteck) -# MIT License -# Copyright 2021 - -# Modified by EpochFlame - -import argparse -import sys - -# Byte sequence that marks code size -CODESIZE_MAGIC = b"\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x34" -BLR_BYTE_SEQ = b"\x4E\x80\x00\x20" -MTLR_BYTE_SEQ = b"\x7C\x08\x03\xA6" -PROFILE_EXTRA_BYTES = b"\x48\x00\x00\x01\x60\x00\x00\x00" - -LWZ_BYTE = b"\x80" - -# Byte sequence array for branches to link register -BLR_BYTE_SEQ_ARRAY = [BLR_BYTE_SEQ, -b"\x4D\x80\x00\x20", b"\x4D\x80\x00\x21", b"\x4C\x81\x00\x20", b"\x4C\x81\x00\x21", -b"\x4D\x82\x00\x20", b"\x4D\x82\x00\x21", b"\x4C\x80\x00\x20", b"\x4C\x80\x00\x21", -b"\x4D\x81\x00\x20", b"\x4D\x81\x00\x21", b"\x4C\x80\x00\x20", b"\x4C\x80\x00\x21", -b"\x4C\x82\x00\x20", b"\x4C\x82\x00\x21", b"\x4C\x81\x00\x20", b"\x4C\x81\x00\x21", -b"\x4D\x83\x00\x20", b"\x4D\x83\x00\x21", b"\x4C\x83\x00\x20", b"\x4C\x83\x00\x21", -b"\x4D\x83\x00\x20", b"\x4D\x83\x00\x21", b"\x4C\x83\x00\x20", b"\x4C\x83\x00\x21"] - -# Example invocation: ./frank.py vanilla.o profile.o output.o -parser = argparse.ArgumentParser() -parser.add_argument("vanilla", help="Path to the vanilla object", type=argparse.FileType('rb')) -parser.add_argument("profile", help="Path to the profile object", type=argparse.FileType('rb')) -parser.add_argument("target", help="Path to the target object (to write)") - -args = parser.parse_args() - -# Read contents into bytearrays and close files -vanilla_bytes = args.vanilla.read() -args.vanilla.close() - -# If the file contains no code, the codesize magic will not be found. -# The vanilla object requires no modification. -code_size_magic_idx = vanilla_bytes.find(CODESIZE_MAGIC) -if code_size_magic_idx == -1: - with open(args.target, "wb") as f: - f.write(vanilla_bytes) - sys.exit(0) - -profile_bytes = args.profile.read() -args.profile.close() - -# Peephole rescheduling -# -# This is the pattern we will detect: -# (A) lwz <--. .--> (A) li -# (B) li <---\-' bl -# \ nop -# '---> (B) lwz -# -# If the profiled schedule swaps the -# instructions around the bl/nop, we -# instead use the vanilla schedule. -# -idx = 8 -shift = 0 # difference between vanilla and profile code, due to bl/nops -while idx < len(profile_bytes) - 16: - # Find next epilogue - epi_pos = profile_bytes.find(PROFILE_EXTRA_BYTES, idx) - if epi_pos == -1: - break # break while loop when no targets remain - if epi_pos % 4 != 0: # check 4-byte alignment - idx += 4 - continue - - v_pos = epi_pos - shift - shift += 8 - - vanilla_inst_a = vanilla_bytes[v_pos-4:v_pos] - vanilla_inst_b = vanilla_bytes[v_pos:v_pos+4] - vanilla_inst_c = vanilla_bytes[v_pos+4:v_pos+8] - profile_inst_a = profile_bytes[epi_pos-4:epi_pos] - profile_inst_b = profile_bytes[epi_pos+8:epi_pos+12] - profile_inst_c = profile_bytes[epi_pos+12:epi_pos+16] - - opcode_a = vanilla_inst_a[0] >> 2 - opcode_b = vanilla_inst_b[0] >> 2 - opcode_c = vanilla_inst_c[0] >> 2 - - LWZ = 0x80 >> 2 - LFS = 0xC0 >> 2 - ADDI = 0x38 >> 2 - LI = ADDI # an LI instruction is just an ADDI with RA=0 - LMW = 0xB8 >> 2 - FDIVS = 0xEC >> 2 - - # Adjust LWZ and LMW loading from r1. - if opcode_a in [LWZ, LMW] and vanilla_inst_a[2] == 0x00 and \ - opcode_b in [LI, LFS, FDIVS] and \ - vanilla_inst_a == profile_inst_b and \ - vanilla_inst_b == profile_inst_a and \ - vanilla_inst_c == profile_inst_c and \ - opcode_c != ADDI: # <- don't reorder if at the very end of the epilogue - - # Swap instructions (A) and (B) - profile_bytes = profile_bytes[:epi_pos-4] \ - + vanilla_inst_a \ - + PROFILE_EXTRA_BYTES \ - + vanilla_inst_b \ - + profile_bytes[epi_pos+12:] - - # Similar reordering for lwz/lmw, except both insns follow the bl/nop - elif opcode_b == LWZ and \ - opcode_c == LMW and \ - vanilla_inst_b == profile_inst_c and \ - vanilla_inst_c == profile_inst_b: - - profile_bytes = profile_bytes[:epi_pos+8] \ - + vanilla_inst_b \ - + vanilla_inst_c \ - + profile_bytes[epi_pos+16:] - - idx = epi_pos + 8 - -# Remove byte sequence -stripped_bytes = profile_bytes.replace(PROFILE_EXTRA_BYTES, b"") - -# Find end of code sections in vanilla and stripped bytes -code_size_offset = code_size_magic_idx + len(CODESIZE_MAGIC) -code_size_bytes = vanilla_bytes[code_size_offset:code_size_offset+4] -code_size = int.from_bytes(code_size_bytes, byteorder='big') - -eoc_offset = 0x34 + code_size - -# Break if the eoc is not found -assert(eoc_offset != len(vanilla_bytes)) - -# Replace 0x34 - eoc in vanilla with bytes from stripped -final_bytes = vanilla_bytes[:0x34] + stripped_bytes[0x34:eoc_offset] + vanilla_bytes[eoc_offset:] - -# Fix branches to link register -for seq in BLR_BYTE_SEQ_ARRAY: - idx = 0 - - while idx < len(vanilla_bytes): - found_pos = vanilla_bytes.find(seq, idx) - if found_pos == -1: - break # break while loop when no targets remain - if found_pos % 4 != 0: # check 4-byte alignment - idx += 4 - continue - final_bytes = final_bytes[:found_pos] + vanilla_bytes[found_pos:found_pos+4] + final_bytes[found_pos+4:] - idx = found_pos + len(seq) - -# Reunify mtlr/blr instructions, shifting intermediary instructions up -idx = 0 - -while idx < len(final_bytes): - # Find mtlr position - mtlr_found_pos = final_bytes.find(MTLR_BYTE_SEQ, idx) - if mtlr_found_pos == -1: - break # break while loop when no targets remain - if mtlr_found_pos % 4 != 0: # check 4-byte alignment - idx += 4 - continue - # Find paired blr position - blr_found_pos = final_bytes.find(BLR_BYTE_SEQ, mtlr_found_pos) - if blr_found_pos == -1: - break # break while loop when no targets remain - if blr_found_pos % 4 != 0: # check 4-byte alignment - idx += 4 - continue - if mtlr_found_pos + 4 == blr_found_pos: - idx += 4 - continue # continue if mtlr is followed directly by blr - - final_bytes = final_bytes[:mtlr_found_pos] + final_bytes[mtlr_found_pos+4:blr_found_pos] + final_bytes[mtlr_found_pos:mtlr_found_pos+4] + final_bytes[blr_found_pos:] - idx = mtlr_found_pos + len(MTLR_BYTE_SEQ) - -# Reorder lmw/lwz/lfd instructions, if needed (@Altafen) -# Specifically, if this sequence shows up in the stripped profiler code: "LMW, LWZ, LFD*" -# And this sequence shows up in the vanilla code: "LWZ, LFD*, LMW" -# (LFD* = any number of LFDs, including zero) -# If all bytes match between the two (except for the reordering), then use the vanilla ordering. -# This could be written to anchor around the "BL, NOP" instructions in unstripped profiler code, -# or to check for the presence of "ADDI, MTLR, BLR" soon after. -# This also could be written to decode the operands of each instruction to make sure the reorder is harmless. -# Neither of these safeguards are necessary at the moment. -LWZ = 32 -LMW = 46 -LFD = 50 -idx = 0 -while idx+4 < len(final_bytes): - if final_bytes[idx] >> 2 == LMW and final_bytes[idx+4] >> 2 == LWZ and vanilla_bytes[idx] >> 2 == LWZ: - start_idx = idx - lmw_bytes = final_bytes[idx:idx+4] - lwz_bytes = final_bytes[idx+4:idx+8] - if vanilla_bytes[idx:idx+4] != lwz_bytes: - idx += 4 - continue - lfd_bytes = b"" - idx += 4 - while vanilla_bytes[idx] >> 2 == LFD: - lfd_bytes += vanilla_bytes[idx:idx+4] - idx += 4 - if vanilla_bytes[idx:idx+4] != lmw_bytes: - continue - if final_bytes[start_idx+8:start_idx+8+len(lfd_bytes)] != lfd_bytes: - continue - idx += 4 - final_bytes = final_bytes[:start_idx] + lwz_bytes + lfd_bytes + lmw_bytes + final_bytes[idx:] - continue - idx += 4 - -with open(args.target, "wb") as f: - f.write(final_bytes) diff --git a/tools/franklite.py b/tools/franklite.py deleted file mode 100755 index b8c9d009..00000000 --- a/tools/franklite.py +++ /dev/null @@ -1,52 +0,0 @@ -#! /usr/bin/env python3 - -# Written by Ethan Roseman (ethteck) -# MIT License -# Copyright 2021 - -# Modified by EpochFlame - -import argparse - -# Byte sequences -BLR_BYTE_SEQ = b"\x4E\x80\x00\x20" -MTLR_BYTE_SEQ = b"\x7C\x08\x03\xA6" - -# Example invocation: ./frank.py vanilla.o target.o -parser = argparse.ArgumentParser() -parser.add_argument("vanilla", help="Path to the vanilla object", type=argparse.FileType('rb')) -parser.add_argument("target", help="Path to the target object (to write)") - -args = parser.parse_args() - -# Read contents into bytearrays and close files -vanilla_bytes = args.vanilla.read() -args.vanilla.close() - -# Reunify mtlr/blr instructions, shifting intermediary instructions up -idx = 0 - -while idx < len(vanilla_bytes): - # Find mtlr position - mtlr_found_pos = vanilla_bytes.find(MTLR_BYTE_SEQ, idx) - if mtlr_found_pos == -1: - break # break while loop when no targets remain - if mtlr_found_pos % 4 != 0: # check 4-byte alignment - idx += 4 - continue - # Find paired blr position - blr_found_pos = vanilla_bytes.find(BLR_BYTE_SEQ, mtlr_found_pos) - if blr_found_pos == -1: - break # break while loop when no targets remain - if blr_found_pos % 4 != 0: # check 4-byte alignment - idx += 4 - continue - if mtlr_found_pos + 4 == blr_found_pos: - idx += 4 - continue # continue if mtlr is followed directly by blr - - vanilla_bytes = vanilla_bytes[:mtlr_found_pos] + vanilla_bytes[mtlr_found_pos+4:blr_found_pos] + vanilla_bytes[mtlr_found_pos:mtlr_found_pos+4] + vanilla_bytes[blr_found_pos:] - idx = mtlr_found_pos + len(MTLR_BYTE_SEQ) - -with open(args.target, "wb") as f: - f.write(vanilla_bytes)