From ffa313e7f43b32502b52976bca33cabee82054b0 Mon Sep 17 00:00:00 2001 From: Phillip Stephens Date: Thu, 3 Nov 2022 01:19:58 -0700 Subject: [PATCH] Match and link OSCache Former-commit-id: 2c8976221f0c082c66a4cd4200bb411d35281cf0 --- .vscode/settings.json | 4 +- asm/Dolphin/os/OSCache.s | 15 +- asm/Kyoto/Animation/DolphinCSkinRules.s | 3 +- include/dolphin/PPCArch.h | 29 ++ include/dolphin/db.h | 1 + include/dolphin/os/OSCache.h | 18 + include/dolphin/os/OSError.h | 20 ++ obj_files.mk | 2 +- src/Dolphin/PPCArch.c | 4 +- src/Dolphin/os/OSCache.c | 427 ++++++++++++++++++++++++ 10 files changed, 508 insertions(+), 15 deletions(-) create mode 100644 src/Dolphin/os/OSCache.c diff --git a/.vscode/settings.json b/.vscode/settings.json index 728a0e18..8c52412f 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -23,7 +23,9 @@ "dvd.h": "c", "dvdpriv.h": "c", "__ppc_eabi_linker.h": "c", - "hack.h": "c" + "hack.h": "c", + "db.h": "c", + "ppcarch.h": "c" }, "files.autoSave": "onFocusChange", "files.insertFinalNewline": true, diff --git a/asm/Dolphin/os/OSCache.s b/asm/Dolphin/os/OSCache.s index 54a1ca63..639c6861 100644 --- a/asm/Dolphin/os/OSCache.s +++ b/asm/Dolphin/os/OSCache.s @@ -131,9 +131,6 @@ lbl_8037EB84: /* 8037EB84 0037BAE4 7C 00 1F EC */ dcbz r0, r3 /* 8037EB88 0037BAE8 38 63 00 20 */ addi r3, r3, 0x20 /* 8037EB8C 0037BAEC 42 00 FF F8 */ bdnz lbl_8037EB84 - -.global sub_8037eb90 -sub_8037eb90: /* 8037EB90 0037BAF0 4E 80 00 20 */ blr .global ICInvalidateRange @@ -255,8 +252,8 @@ lbl_8037ECFC: /* 8037ED10 0037BC70 7C 98 E3 A6 */ mtspr 0x398, r4 /* 8037ED14 0037BC74 4E 80 00 20 */ blr -.global sub_8037ed18 -sub_8037ed18: +.global LCLoadBlocks +LCLoadBlocks: /* 8037ED18 0037BC78 54 A6 F6 FE */ rlwinm r6, r5, 0x1e, 0x1b, 0x1f /* 8037ED1C 0037BC7C 54 84 01 3E */ clrlwi r4, r4, 4 /* 8037ED20 0037BC80 7C C6 23 78 */ or r6, r6, r4 @@ -279,8 +276,8 @@ LCStoreBlocks: /* 8037ED58 0037BCB8 7C DB E3 A6 */ mtspr 0x39b, r6 /* 8037ED5C 0037BCBC 4E 80 00 20 */ blr -.global sub_8037ed60 -sub_8037ed60: +.global LCLoadData +LCLoadData: /* 8037ED60 0037BCC0 7C 08 02 A6 */ mflr r0 /* 8037ED64 0037BCC4 90 01 00 04 */ stw r0, 4(r1) /* 8037ED68 0037BCC8 94 21 FF D8 */ stwu r1, -0x28(r1) @@ -306,14 +303,14 @@ lbl_8037EDA4: /* 8037EDAC 0037BD0C 7F 83 E3 78 */ mr r3, r28 /* 8037EDB0 0037BD10 7F A4 EB 78 */ mr r4, r29 /* 8037EDB4 0037BD14 7F E5 FB 78 */ mr r5, r31 -/* 8037EDB8 0037BD18 4B FF FF 61 */ bl sub_8037ed18 +/* 8037EDB8 0037BD18 4B FF FF 61 */ bl LCLoadBlocks /* 8037EDBC 0037BD1C 3B E0 00 00 */ li r31, 0 /* 8037EDC0 0037BD20 48 00 00 20 */ b lbl_8037EDE0 lbl_8037EDC4: /* 8037EDC4 0037BD24 7F 83 E3 78 */ mr r3, r28 /* 8037EDC8 0037BD28 7F A4 EB 78 */ mr r4, r29 /* 8037EDCC 0037BD2C 38 A0 00 00 */ li r5, 0 -/* 8037EDD0 0037BD30 4B FF FF 49 */ bl sub_8037ed18 +/* 8037EDD0 0037BD30 4B FF FF 49 */ bl LCLoadBlocks /* 8037EDD4 0037BD34 3B FF FF 80 */ addi r31, r31, -128 /* 8037EDD8 0037BD38 3B 9C 10 00 */ addi r28, r28, 0x1000 /* 8037EDDC 0037BD3C 3B BD 10 00 */ addi r29, r29, 0x1000 diff --git a/asm/Kyoto/Animation/DolphinCSkinRules.s b/asm/Kyoto/Animation/DolphinCSkinRules.s index 3684b249..8c7d7874 100644 --- a/asm/Kyoto/Animation/DolphinCSkinRules.s +++ b/asm/Kyoto/Animation/DolphinCSkinRules.s @@ -289,7 +289,7 @@ lbl_80353BD0: /* 80353BD0 00350B30 1C BF 00 0C */ mulli r5, r31, 0xc /* 80353BD4 00350B34 38 05 00 1F */ addi r0, r5, 0x1f /* 80353BD8 00350B38 54 05 00 34 */ rlwinm r5, r0, 0, 0, 0x1a -/* 80353BDC 00350B3C 48 02 B1 85 */ bl sub_8037ed60 +/* 80353BDC 00350B3C 48 02 B1 85 */ bl LCLoadData /* 80353BE0 00350B40 88 0D 9E 04 */ lbz r0, lbl_805A89C4@sda21(r13) /* 80353BE4 00350B44 80 6D AA 74 */ lwz r3, lbl_805A9634@sda21(r13) /* 80353BE8 00350B48 7C 00 00 34 */ cntlzw r0, r0 @@ -1126,4 +1126,3 @@ lbl_803D8300: # ROM: 0x3D5300 .asciz "??(??)" .balign 4 - diff --git a/include/dolphin/PPCArch.h b/include/dolphin/PPCArch.h index 4b123954..06992725 100644 --- a/include/dolphin/PPCArch.h +++ b/include/dolphin/PPCArch.h @@ -6,6 +6,35 @@ #ifdef __cplusplus extern "C" { #endif +#define MSR_IR 0x00000020 +#define MSR_DR 0x00000010 +#define L2CR_L2E 0x80000000 +#define L2CR_L2PE 0x40000000 +#define L2CR_L2SIZ_256K 0x10000000 +#define L2CR_L2SIZ_512K 0x20000000 +#define L2CR_L2SIZ_1M 0x30000000 +#define L2CR_L2CLK_1_0 0x02000000 +#define L2CR_L2CLK_1_5 0x04000000 +#define L2CR_L2CLK_2_0 0x08000000 +#define L2CR_L2CLK_2_5 0x0A000000 +#define L2CR_L2CLK_3_0 0x0C000000 +#define L2CR_RAM_FLOW_THRU_BURST 0x00000000 +#define L2CR_RAM_PIPELINE_BURST 0x01000000 +#define L2CR_RAM_PIPELINE_LATE 0x01800000 +#define L2CR_L2I 0x00200000 + +#define SRR1_DMA_BIT 0x00200000 +#define SRR1_L2DP_BIT 0x00100000 +#define HID0_ICE 0x00008000 +#define HID0_DCE 0x00004000 +#define HID2_DCHERR 0x00800000 +#define HID2_DNCERR 0x00400000 +#define HID2_DCMERR 0x00200000 +#define HID2_DQOERR 0x00100000 +#define HID2_DCHEE 0x00080000 +#define HID2_DNCEE 0x00040000 +#define HID2_DCMEE 0x00020000 +#define HID2_DQOEE 0x00010000 u32 PPCMfmsr(); void PPCMtmsr(u32 newMSR); diff --git a/include/dolphin/db.h b/include/dolphin/db.h index 5372190f..08851a4a 100644 --- a/include/dolphin/db.h +++ b/include/dolphin/db.h @@ -22,6 +22,7 @@ extern DBInterface* __DBInterface; void DBInit(void); void DBInitComm(int* inputFlagPtr, int* mtrCallback); static void __DBExceptionDestination(void); +void DBPrintf(char* format, ...); #ifdef __cplusplus } diff --git a/include/dolphin/os/OSCache.h b/include/dolphin/os/OSCache.h index 97035925..20709d2f 100644 --- a/include/dolphin/os/OSCache.h +++ b/include/dolphin/os/OSCache.h @@ -7,9 +7,27 @@ extern "C" { #endif +void DCInvalidateRange(void* addr, u32 nBytes); void DCFlushRange(void* addr, u32 nBytes); +void DCStoreRange(void* addr, u32 nBytes); +void DCFlushRangeNoSync(void* addr, u32 nBytes); +void DCStoreRangeNoSync(void* addr, u32 nBytes); +void DCZeroRange(void* addr, u32 nBytes); +void DCTouchRange(void* addr, u32 nBytes); +void ICInvalidateRange(void* addr, u32 nBytes); + +#define LC_BASE_PREFIX 0xE000 +#define LC_BASE (LC_BASE_PREFIX << 16) void LCEnable(); +void LCDisable(void); +void LCLoadBlocks(void* destTag, void* srcAddr, u32 numBlocks); +void LCStoreBlocks(void* destAddr, void* srcTag, u32 numBlocks); +u32 LCLoadData(void* destAddr, void* srcAddr, u32 nBytes); +u32 LCStoreData(void* destAddr, void* srcAddr, u32 nBytes); +u32 LCQueueLength(void); +void LCQueueWait(u32 len); +void LCFlushQueue(void); #ifdef __cplusplus } diff --git a/include/dolphin/os/OSError.h b/include/dolphin/os/OSError.h index 2f795a50..526e3ac9 100644 --- a/include/dolphin/os/OSError.h +++ b/include/dolphin/os/OSError.h @@ -7,6 +7,26 @@ extern "C" { #endif +#define OS_ERROR_SYSTEM_RESET 0 +#define OS_ERROR_MACHINE_CHECK 1 +#define OS_ERROR_DSI 2 +#define OS_ERROR_ISI 3 +#define OS_ERROR_EXTERNAL_INTERRUPT 4 +#define OS_ERROR_ALIGNMENT 5 +#define OS_ERROR_PROGRAM 6 +#define OS_ERROR_FLOATING_POINT 7 +#define OS_ERROR_DECREMENTER 8 +#define OS_ERROR_SYSTEM_CALL 9 +#define OS_ERROR_TRACE 10 +#define OS_ERROR_PERFORMACE_MONITOR 11 +#define OS_ERROR_BREAKPOINT 12 +#define OS_ERROR_SYSTEM_INTERRUPT 13 +#define OS_ERROR_THERMAL_INTERRUPT 14 +#define OS_ERROR_PROTECTION 15 +#define OS_ERROR_FPE 16 + +#define OS_ERROR_MAX (OS_ERROR_FPE + 1) + typedef u16 OSError; typedef void OSErrorHandler(OSError, OSContext* context, ...); diff --git a/obj_files.mk b/obj_files.mk index a2927ebf..3fd2bda5 100644 --- a/obj_files.mk +++ b/obj_files.mk @@ -692,7 +692,7 @@ OS_FILES :=\ $(BUILD_DIR)/asm/Dolphin/os/OSAlarm.o\ $(BUILD_DIR)/asm/Dolphin/os/OSArena.o\ $(BUILD_DIR)/src/Dolphin/os/OSAudioSystem.ep.o\ - $(BUILD_DIR)/asm/Dolphin/os/OSCache.o\ + $(BUILD_DIR)/src/Dolphin/os/OSCache.ep.o\ $(BUILD_DIR)/asm/Dolphin/os/OSContext.o\ $(BUILD_DIR)/asm/Dolphin/os/OSError.o\ $(BUILD_DIR)/asm/Dolphin/os/OSFatal.o\ diff --git a/src/Dolphin/PPCArch.c b/src/Dolphin/PPCArch.c index 56a7cb4e..173c6853 100644 --- a/src/Dolphin/PPCArch.c +++ b/src/Dolphin/PPCArch.c @@ -546,7 +546,7 @@ void PPCDisableSpeculation (void) asm void PPCSetFpIEEEMode(void) { nofralloc - mtfsb0 4*cr7+1 + mtfsb0 4*7+1 blr } /* @@ -557,7 +557,7 @@ asm void PPCSetFpIEEEMode(void) asm void PPCSetFpNonIEEEMode (void) { nofralloc - mtfsb1 4*cr7+1 + mtfsb1 4*7+1 blr } // clang-format on diff --git a/src/Dolphin/os/OSCache.c b/src/Dolphin/os/OSCache.c new file mode 100644 index 00000000..ff7590d0 --- /dev/null +++ b/src/Dolphin/os/OSCache.c @@ -0,0 +1,427 @@ +#include "dolphin/PPCArch.h" +#include "dolphin/os.h" + +// Can't use this due to weird condition register issues +//#include "asm_types.h" +#define HID2 920 + +#include "dolphin/db.h" + +/* clang-format off */ +asm void DCEnable() { + nofralloc + sync + mfspr r3, HID0 + ori r3, r3, 0x4000 + mtspr HID0, r3 + blr +} + +asm void DCInvalidateRange(register void* addr, register u32 nBytes) { + nofralloc + cmplwi nBytes, 0 + blelr + clrlwi r5, addr, 27 + add nBytes, nBytes, r5 + addi nBytes, nBytes, 31 + srwi nBytes, nBytes, 5 + mtctr nBytes + +@1 + dcbi r0, addr + addi addr, addr, 32 + bdnz @1 + blr +} + + +asm void DCFlushRange(register void* addr, register u32 nBytes) { + nofralloc + cmplwi nBytes, 0 + blelr + clrlwi r5, addr, 27 + add nBytes, nBytes, r5 + addi nBytes, nBytes, 31 + srwi nBytes, nBytes, 5 + mtctr nBytes + +@1 + dcbf r0, addr + addi addr, addr, 32 + bdnz @1 + sc + blr +} + +asm void DCStoreRange(register void* addr, register u32 nBytes) { + nofralloc + cmplwi nBytes, 0 + blelr + clrlwi r5, addr, 27 + add nBytes, nBytes, r5 + addi nBytes, nBytes, 31 + srwi nBytes, nBytes, 5 + mtctr nBytes + +@1 + dcbst r0, addr + addi addr, addr, 32 + bdnz @1 + sc + + blr +} + +asm void DCFlushRangeNoSync(register void* addr, register u32 nBytes) { + nofralloc + cmplwi nBytes, 0 + blelr + clrlwi r5, addr, 27 + add nBytes, nBytes, r5 + addi nBytes, nBytes, 31 + srwi nBytes, nBytes, 5 + mtctr nBytes + +@1 + dcbf r0, addr + addi addr, addr, 32 + bdnz @1 + blr +} + + +asm void DCStoreRangeNoSync(register void* addr, register u32 nBytes) { + nofralloc + cmplwi nBytes, 0 + blelr + clrlwi r5, addr, 27 + add nBytes, nBytes, r5 + addi nBytes, nBytes, 31 + srwi nBytes, nBytes, 5 + mtctr nBytes + +@1 + dcbst r0, addr + addi addr, addr, 32 + bdnz @1 + + blr +} + +asm void DCZeroRange(register void* addr, register u32 nBytes) { + nofralloc + cmplwi nBytes, 0 + blelr + clrlwi r5, addr, 27 + add nBytes, nBytes, r5 + addi nBytes, nBytes, 31 + srwi nBytes, nBytes, 5 + mtctr nBytes + +@1 + dcbz r0, addr + addi addr, addr, 32 + bdnz @1 + + blr +} + + +asm void ICInvalidateRange(register void* addr, register u32 nBytes) { + nofralloc + nofralloc + cmplwi nBytes, 0 + blelr + clrlwi r5, addr, 27 + add nBytes, nBytes, r5 + addi nBytes, nBytes, 31 + srwi nBytes, nBytes, 5 + mtctr nBytes + +@1 + icbi r0, addr + addi addr, addr, 32 + bdnz @1 + sync + isync + + blr +} + + +asm void ICFlashInvalidate() { + nofralloc + mfspr r3, HID0 + ori r3, r3, 0x800 + mtspr HID0, r3 + blr +} + +asm void ICEnable() { + nofralloc + isync + mfspr r3, HID0 + ori r3, r3, 0x8000 + mtspr HID0, r3 + blr +} + +#define LC_LINES 512 +#define CACHE_LINES 1024 + +asm void __LCEnable() { + nofralloc + mfmsr r5 + ori r5, r5, 0x1000 + mtmsr r5 + + lis r3, OS_CACHED_REGION_PREFIX + li r4, CACHE_LINES + mtctr r4 +_touchloop: + dcbt 0,r3 + dcbst 0,r3 + addi r3,r3,32 + bdnz _touchloop + mfspr r4, HID2 + oris r4, r4, 0x100F + mtspr HID2, r4 + + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + lis r3, LC_BASE_PREFIX + ori r3, r3, 0x0002 + mtspr DBAT3L, r3 + ori r3, r3, 0x01fe + mtspr DBAT3U, r3 + isync + lis r3, LC_BASE_PREFIX + li r6, LC_LINES + mtctr r6 + li r6, 0 + +_lockloop: + dcbz_l r6, r3 + addi r3, r3, 32 + bdnz+ _lockloop + + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + + blr +} + +void LCEnable() { + BOOL enabled; + + enabled = OSDisableInterrupts(); + __LCEnable(); + OSRestoreInterrupts(enabled); +} + + +asm void LCDisable() { + nofralloc + lis r3, LC_BASE_PREFIX + li r4, LC_LINES + mtctr r4 +@1 + dcbi r0, r3 + addi r3, r3, 32 + bdnz @1 + mfspr r4, HID2 + rlwinm r4, r4, 0, 4, 2 + mtspr HID2, r4 + blr +} + + +asm void LCLoadBlocks(register void* destTag, register void* srcAddr, register u32 numBlocks) { + nofralloc + rlwinm r6, numBlocks, 30, 27, 31 + rlwinm srcAddr, srcAddr, 0, 4, 31 + or r6, r6, srcAddr + mtspr DMA_U, r6 + rlwinm r6, numBlocks, 2, 28, 29 + or r6, r6, destTag + ori r6, r6, 0x12 + mtspr DMA_L, r6 + blr +} + +asm void LCStoreBlocks(register void* destAddr, register void* srcTag, register u32 numBlocks) { + nofralloc + rlwinm r6, numBlocks, 30, 27, 31 + rlwinm destAddr, destAddr, 0, 4, 31 + or r6, r6, destAddr + mtspr DMA_U, r6 + rlwinm r6, numBlocks, 2, 28, 29 + or r6, r6, srcTag + ori r6, r6, 0x2 + mtspr DMA_L, r6 + blr +} + +/* clang-format on */ + +u32 LCLoadData(register void* destAddr, register void* srcAddr, register u32 nBytes) { + u32 numBlocks = (nBytes + 31) / 32; + u32 numTransactions = (numBlocks + 128 - 1) / 128; + + while (numBlocks > 0) { + if (numBlocks < 128) { + LCLoadBlocks(destAddr, srcAddr, numBlocks); + numBlocks = 0; + } else { + LCLoadBlocks(destAddr, srcAddr, 0); + numBlocks -= 128; + destAddr = (void*)((u32)destAddr + 4096); + srcAddr = (void*)((u32)srcAddr + 4096); + } + } + + return numTransactions; +} +u32 LCStoreData(void* destAddr, void* srcAddr, u32 nBytes) { + u32 numBlocks = (nBytes + 31) / 32; + u32 numTransactions = (numBlocks + 128 - 1) / 128; + + while (numBlocks > 0) { + if (numBlocks < 128) { + LCStoreBlocks(destAddr, srcAddr, numBlocks); + numBlocks = 0; + } else { + LCStoreBlocks(destAddr, srcAddr, 0); + numBlocks -= 128; + destAddr = (void*)((u32)destAddr + 4096); + srcAddr = (void*)((u32)srcAddr + 4096); + } + } + + return numTransactions; +} + +/* clang-format off */ +asm u32 LCQueueLength() { + nofralloc + mfspr r4, HID2 + rlwinm r3, r4, 8, 28, 31 + blr +} + +asm void LCQueueWait(register u32 len) { + nofralloc + addi len, len, 1 +@1 + mfspr r4, HID2 + rlwinm r4, r4, 8, 28, 31 + cmpw cr2, r4, r3 + bge cr2, @1 + blr +} + +/* clang-format on */ +static void L2Disable(void) { + __sync(); + PPCMtl2cr(PPCMfl2cr() & ~0x80000000); + __sync(); +} + +void L2GlobalInvalidate(void) { + L2Disable(); + PPCMtl2cr(PPCMfl2cr() | 0x00200000); + while (PPCMfl2cr() & 0x00000001u) + ; + PPCMtl2cr(PPCMfl2cr() & ~0x00200000); + while (PPCMfl2cr() & 0x00000001u) { + DBPrintf(">>> L2 INVALIDATE : SHOULD NEVER HAPPEN\n"); + } +} + +static void L2Init(void) { + u32 oldMSR; + oldMSR = PPCMfmsr(); + __sync(); + PPCMtmsr(MSR_IR | MSR_DR); + __sync(); + L2Disable(); + L2GlobalInvalidate(); + PPCMtmsr(oldMSR); +} + +void L2Enable(void) { PPCMtl2cr((PPCMfl2cr() | L2CR_L2E) & ~L2CR_L2I); } + +void DMAErrorHandler(OSError error, OSContext* context, ...) { + u32 hid2 = PPCMfhid2(); + + OSReport("Machine check received\n"); + OSReport("HID2 = 0x%x SRR1 = 0x%x\n", hid2, context->srr1); + if (!(hid2 & (HID2_DCHERR | HID2_DNCERR | HID2_DCMERR | HID2_DQOERR)) || + !(context->srr1 & SRR1_DMA_BIT)) { + OSReport("Machine check was not DMA/locked cache related\n"); + OSDumpContext(context); + PPCHalt(); + } + + OSReport("DMAErrorHandler(): An error occurred while processing DMA.\n"); + OSReport("The following errors have been detected and cleared :\n"); + + if (hid2 & HID2_DCHERR) { + OSReport("\t- Requested a locked cache tag that was already in the cache\n"); + } + + if (hid2 & HID2_DNCERR) { + OSReport("\t- DMA attempted to access normal cache\n"); + } + + if (hid2 & HID2_DCMERR) { + OSReport("\t- DMA missed in data cache\n"); + } + + if (hid2 & HID2_DQOERR) { + OSReport("\t- DMA queue overflowed\n"); + } + + // write hid2 back to clear the error bits + PPCMthid2(hid2); +} + +void __OSCacheInit() { + if (!(PPCMfhid0() & HID0_ICE)) { + ICEnable(); + DBPrintf("L1 i-caches initialized\n"); + } + if (!(PPCMfhid0() & HID0_DCE)) { + DCEnable(); + DBPrintf("L1 d-caches initialized\n"); + } + + if (!(PPCMfl2cr() & L2CR_L2E)) { + L2Init(); + L2Enable(); + DBPrintf("L2 cache initialized\n"); + } + + OSSetErrorHandler(OS_ERROR_MACHINE_CHECK, DMAErrorHandler); + DBPrintf("Locked cache machine check handler installed\n"); +}