diff --git a/dll/kernel32/processthreadsapi.cpp b/dll/kernel32/processthreadsapi.cpp index 9121e47..7fd25ee 100644 --- a/dll/kernel32/processthreadsapi.cpp +++ b/dll/kernel32/processthreadsapi.cpp @@ -110,7 +110,7 @@ void threadCleanup(void *param) { } g_currentThreadObject = nullptr; wibo::notifyDllThreadDetach(); - currentThreadTeb = nullptr; + wibo::uninstallTebForCurrentThread(); // TODO: mark mutexes owned by this thread as abandoned obj->cv.notify_all(); obj->notifyWaiters(false); diff --git a/src/macros.S b/src/macros.S index 4edc759..d8d8613 100644 --- a/src/macros.S +++ b/src/macros.S @@ -34,31 +34,32 @@ #ifdef __x86_64__ .macro LJMP32 teb_reg -#ifdef __APPLE__ -#define m1632 m1632_\@ +#define m64to32 m64to32_\@ .data -m1632: +m64to32: .long 1f # 32-bit code offset .long 0 # 32-bit code segment (filled in at runtime) .text mov r10w, word ptr [\teb_reg+TEB_CS_SEL] - mov word ptr [rip+m1632+4], r10w - jmp fword ptr [rip+m1632] -#else - jmp fword ptr [rip] # far jump into 32-bit code - .long 1f # 32-bit code offset - .word CS_32 # 32-bit code segment -#endif + mov word ptr [rip+m64to32+4], r10w + jmp fword ptr [rip+m64to32] .code32 1: endbr32 .endm .macro LJMP64 teb_reg -// Annoyingly, we can't assemble this in Intel syntax -.att_syntax prefix - ljmp $CS_64, $1f -.intel_syntax noprefix +#define m32to64 m32to64_\@ +.data +m32to64: + .long 1f # 64-bit code offset + .long 0 # 64-bit code segment (filled in at runtime) +.text + push eax + mov ax, word ptr [\teb_reg+TEB_HOST_CS_SEL] + mov word ptr [m32to64+4], ax + pop eax + jmp fword ptr [m32to64] .code64 1: endbr64 diff --git a/src/macros.h b/src/macros.h index 1d4062f..f3ff1a0 100644 --- a/src/macros.h +++ b/src/macros.h @@ -12,22 +12,13 @@ #ifdef __x86_64__ -#define TEB_CS_SEL 0xf9c // CodeSelector -#define TEB_DS_SEL 0xf9e // DataSelector -#define TEB_SP 0xfa0 // CurrentStackPointer -#define TEB_FSBASE 0xfa8 // HostFsBase -#define TEB_GSBASE 0xfb0 // HostGsBase +#define TEB_CS_SEL 0xf9c // CodeSelector +#define TEB_DS_SEL 0xf9e // DataSelector +#define TEB_SP 0xfa0 // CurrentStackPointer +#define TEB_FSBASE 0xfa8 // HostFsBase +#define TEB_GSBASE 0xfb0 // HostGsBase #define TEB_HAS_FSGSBASE 0xfb8 // HasFsGsBase - -#ifdef __linux__ -#define CS_32 0x23 // 32-bit code segment (Linux) -#define CS_64 0x33 // 64-bit code segment (Linux) -#define DS_32 0x2b // 32-bit data segment (Linux) -#elif defined(__APPLE__) -#define CS_64 0x2b // 64-bit code segment (macOS) -#else -#error "Unsupported platform" -#endif +#define TEB_HOST_CS_SEL 0xfba // HostCodeSelector #endif diff --git a/src/main.cpp b/src/main.cpp index fa95bf7..e8a3259 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -333,7 +333,7 @@ int main(int argc, char **argv) { wibo::processPeb = peb; wibo::initializeTibStackInfo(tib); if (!wibo::installTibForCurrentThread(tib)) { - perror("Failed to install TIB for main thread"); + perror("Failed to setup x86 segments and TEB"); return 1; } diff --git a/src/setup.S b/src/setup.S index 7e34f0d..a3d7e37 100644 --- a/src/setup.S +++ b/src/setup.S @@ -8,16 +8,30 @@ #if defined(__x86_64__) && defined(__linux__) -# int tebThreadSetup64(int entryNumber, TEB *teb) -ASM_GLOBAL(tebThreadSetup64, @function) +# void installSelectors(TEB *teb) +# Installs ds/es selectors in 32-bit mode +ASM_GLOBAL(installSelectors, @function) + mov r8, rsp # save host stack + mov rdx, qword ptr [rdi+TEB_SP] # fetch guest stack + mov ax, cs # fetch host code segment selector + mov word ptr [rdi+TEB_HOST_CS_SEL], ax # store host code segment selector + mov ax, word ptr [rdi+TEB_DS_SEL] # user data segment selector + LJMP32 rdi # far jump into 32-bit code + mov ds, ax # setup data segment + mov es, ax # setup extra segment + mov esp, edx # switch to guest stack + LJMP64 edi # far jump into 64-bit code + mov rsp, r8 # switch to host stack + ret +ASM_END(installSelectors) + +# int setThreadArea64(int entryNumber, TEB *teb) +# Runs syscall SYS_set_thread_area in 32-bit mode +ASM_GLOBAL(setThreadArea64, @function) push rbx # save rbx mov r8, rsp # save host stack - rdfsbase r9 # read host FS base mov rdx, qword ptr [rsi+TEB_SP] # fetch guest stack LJMP32 rsi # far jump into 32-bit code - mov ax, 0x2b # user data segment (Linux) - mov ds, ax # setup data segment - mov es, ax # setup extra segment mov esp, edx # switch to guest stack sub esp, 0x10 # sizeof(user_desc) mov dword ptr [esp], edi # entry_number (arg 0) @@ -35,10 +49,9 @@ ASM_GLOBAL(tebThreadSetup64, @function) LJMP64 esi # far jump into 64-bit code cdqe # sign-extend eax to rax mov rsp, r8 # switch to host stack - wrfsbase r9 # restore host FS base pop rbx # restore rbx ret -ASM_END(tebThreadSetup64) +ASM_END(setThreadArea64) #endif // defined(__x86_64__) && defined(__linux__) diff --git a/src/setup_darwin.cpp b/src/setup_darwin.cpp index c0e1d3e..0ad4b73 100644 --- a/src/setup_darwin.cpp +++ b/src/setup_darwin.cpp @@ -98,8 +98,7 @@ bool initializeLdtBitmapLocked() { if (count > kMaxLdtEntries) { DEBUG_LOG("setup_darwin: i386_get_ldt returned too many entries (%d), truncating to %d\n", count, kMaxLdtEntries); - errno = ENOSPC; - return false; + count = kMaxLdtEntries; } for (int i = 0; i < count; ++i) { markLdtEntryUsed(i); diff --git a/src/setup_linux.cpp b/src/setup_linux.cpp index 4f0f514..ed932ac 100644 --- a/src/setup_linux.cpp +++ b/src/setup_linux.cpp @@ -1,20 +1,205 @@ #include "setup.h" #include "common.h" +#include "types.h" -#include +#include #include #include +#include +#include + namespace { std::mutex g_tebSetupMutex; -int g_entryNumber = -1; +int g_threadAreaEntry = -1; +#ifdef __x86_64__ +uint16_t g_codeSelector = 0; +uint16_t g_dataSelector = 0; +#endif +constexpr int kMaxLdtEntries = 8192; +constexpr int kBitsPerWord = 32; +std::array g_ldtBitmap{}; +bool g_ldtBitmapInitialized = false; +int g_ldtHint = 0; -constexpr uint16_t createSelector(int entryNumber) { +inline user_desc createLdtEntry(uint32_t entryNumber, uint32_t base, uint32_t size, bool code) { + user_desc desc; // NOLINT(cppcoreguidelines-pro-type-member-init) + // Must memset to zero to avoid uninitialized padding bytes + std::memset(&desc, 0, sizeof(desc)); + + desc.entry_number = entryNumber; + desc.base_addr = base; + + uint32_t limit; + if (size > 0xFFFFF) { + // Page granularity (like your DESC_GRAN_PAGE case) + limit = (size - 1) >> 12; + desc.limit_in_pages = 1; + } else { + // Byte granularity + limit = size - 1; + desc.limit_in_pages = 0; + } + desc.limit = limit; + desc.seg_32bit = 1; + desc.contents = code ? MODIFY_LDT_CONTENTS_CODE : MODIFY_LDT_CONTENTS_DATA; + desc.read_exec_only = 0; + desc.seg_not_present = 0; + desc.useable = 1; + return desc; +} + +inline int modifyLdtRead(struct user_desc *entries, int bytes) { + return static_cast(syscall(SYS_modify_ldt, 2, entries, bytes)); +} + +inline int modifyLdtWrite(const struct user_desc *desc) { + return static_cast(syscall(SYS_modify_ldt, 1, desc, sizeof(*desc))); +} + +constexpr uint16_t createGdtSelector(int entryNumber) { return static_cast((entryNumber << 3) | USER_PRIVILEGE); } +constexpr uint16_t createLdtSelector(int entryNumber) { + return static_cast((entryNumber << 3) | 0x4 /* TI=1 */ | USER_PRIVILEGE); +} + +inline bool isLdtEntryValid(int entry) { return entry >= 0 && entry < kMaxLdtEntries; } + +inline void markLdtEntryUsed(int entry) { + if (!isLdtEntryValid(entry)) { + return; + } + g_ldtBitmap[entry / kBitsPerWord] |= (1u << (entry % kBitsPerWord)); +} + +inline void markLdtEntryFree(int entry) { + if (!isLdtEntryValid(entry)) { + return; + } + g_ldtBitmap[entry / kBitsPerWord] &= ~(1u << (entry % kBitsPerWord)); +} + +inline bool isLdtEntryUsed(int entry) { + if (!isLdtEntryValid(entry)) { + return true; + } + return (g_ldtBitmap[entry / kBitsPerWord] & (1u << (entry % kBitsPerWord))) != 0; +} + +void initializeLdtBitmapLocked() { + if (g_ldtBitmapInitialized) { + return; + } + g_ldtBitmapInitialized = true; + struct user_desc table[kMaxLdtEntries]; + std::memset(table, 0, sizeof(table)); + int bytes = modifyLdtRead(table, sizeof(table)); + if (bytes < 0) { + DEBUG_LOG("setup_linux: modify_ldt(read) failed during bitmap init (%s), assuming empty table\n", + strerror(errno)); + return; + } + int count = bytes / static_cast(sizeof(user_desc)); + if (count > kMaxLdtEntries) { + DEBUG_LOG("setup_linux: modify_ldt(read) returned too many entries (%d), truncating to %d\n", count, + kMaxLdtEntries); + count = kMaxLdtEntries; + } + for (int i = 0; i < count; ++i) { + const user_desc &d = table[i]; + user_desc unused; // NOLINT(cppcoreguidelines-pro-type-member-init) + std::memset(&unused, 0, sizeof(user_desc)); + bool allZero = std::memcmp(&d, &unused, sizeof(user_desc)) == 0; + if (!allZero && !d.seg_not_present) { + markLdtEntryUsed(i); + } + } +} + +int allocateLdtEntryLocked() { + initializeLdtBitmapLocked(); + auto tryAllocate = [&](int start) -> int { + for (int entry = start; entry < kMaxLdtEntries; ++entry) { + if (!isLdtEntryUsed(entry)) { + markLdtEntryUsed(entry); + g_ldtHint = entry + 1; + if (g_ldtHint >= kMaxLdtEntries) { + g_ldtHint = 0; + } + DEBUG_LOG("setup_linux: Allocating LDT entry %d\n", entry); + return entry; + } + } + return -1; + }; + int entry = tryAllocate(g_ldtHint); + if (entry >= 0) { + return entry; + } + entry = tryAllocate(0); + if (entry >= 0) { + return entry; + } + errno = ENOSPC; + return -1; +} + +void freeLdtEntryLocked(int entryNumber) { + if (!g_ldtBitmapInitialized || !isLdtEntryValid(entryNumber)) { + return; + } + markLdtEntryFree(entryNumber); + if (entryNumber < g_ldtHint) { + g_ldtHint = entryNumber; + } +} + +#ifdef __x86_64__ +bool segmentSetupLocked(TEB *teb) { + // Create code LDT entry + if (g_codeSelector == 0) { + int entryNumber = allocateLdtEntryLocked(); + if (entryNumber < 0) { + return false; + } + user_desc codeLdt = createLdtEntry(entryNumber, 0, 0xFFFFFFFF, true); + int ret = modifyLdtWrite(&codeLdt); + if (ret == 0) { + g_codeSelector = createLdtSelector(entryNumber); + DEBUG_LOG("setup_linux: Code LDT selector 0x%x\n", g_codeSelector); + } else { + freeLdtEntryLocked(entryNumber); + DEBUG_LOG("setup_linux: Failed to create code LDT entry, trying default (0x23)\n"); + g_codeSelector = 0x23; + } + } + // Create data LDT entry + if (g_dataSelector == 0) { + int entryNumber = allocateLdtEntryLocked(); + if (entryNumber < 0) { + return false; + } + user_desc dataLdt = createLdtEntry(entryNumber, 0, 0xFFFFFFFF, false); + int ret = modifyLdtWrite(&dataLdt); + if (ret == 0) { + g_dataSelector = createLdtSelector(entryNumber); + DEBUG_LOG("setup_linux: Data LDT selector 0x%x\n", g_dataSelector); + } else { + freeLdtEntryLocked(entryNumber); + DEBUG_LOG("setup_linux: Failed to create data LDT entry, trying default (0x2b)\n"); + g_dataSelector = 0x2b; + } + } + teb->CodeSelector = g_codeSelector; + teb->DataSelector = g_dataSelector; + return true; +} +#endif + } // namespace #if defined(__x86_64__) @@ -22,72 +207,116 @@ constexpr uint16_t createSelector(int entryNumber) { #include // Implemented in setup.S -extern "C" int tebThreadSetup64(int entryNumber, TEB *teb); +extern "C" void installSelectors(TEB *teb); +extern "C" int setThreadArea64(int entryNumber, TEB *teb); bool tebThreadSetup(TEB *teb) { std::lock_guard guard(g_tebSetupMutex); - int ret = tebThreadSetup64(g_entryNumber, teb); - if (ret < 0) { - return false; - } - if (g_entryNumber != ret) { - g_entryNumber = ret; - DEBUG_LOG("set_thread_area: allocated entry=%d base=%p\n", g_entryNumber, teb); - } else { - DEBUG_LOG("set_thread_area: reused entry=%d base=%p\n", g_entryNumber, teb); - } - - teb->CurrentFsSelector = createSelector(ret); - teb->CurrentGsSelector = 0; // Check for FSBASE/GSBASE instruction support unsigned int regs[4]; - int cpuidMax = __get_cpuid_max(0, nullptr); + unsigned int cpuidMax = __get_cpuid_max(0, nullptr); if (cpuidMax >= 0x7 && __get_cpuid_count(0x7, 0, ®s[0], ®s[1], ®s[2], ®s[3])) { teb->HasFsGsBase = !!(regs[1] & 1); } DEBUG_LOG("setup_linux: FSBASE/GSBASE instruction support: %s\n", teb->HasFsGsBase ? "yes" : "no"); + // Create code and data LDT entries + if (!segmentSetupLocked(teb)) { + return false; + } + + // Install ds/es selectors + installSelectors(teb); + + if (g_threadAreaEntry != -2) { + int ret = setThreadArea64(g_threadAreaEntry, teb); + if (ret >= 0) { + if (g_threadAreaEntry != ret) { + g_threadAreaEntry = ret; + DEBUG_LOG("setup_linux: allocated thread-local GDT entry=%d base=%p\n", g_threadAreaEntry, teb); + } else { + DEBUG_LOG("setup_linux: reused thread-local GDT entry=%d base=%p\n", g_threadAreaEntry, teb); + } + teb->CurrentFsSelector = createGdtSelector(ret); + } else { + DEBUG_LOG("setup_linux: set_thread_area failed (%s), falling back to LDT\n", strerror(errno)); + g_threadAreaEntry = -2; // Don't bother trying again + } + } + if (teb->CurrentFsSelector == 0) { + int entryNumber = allocateLdtEntryLocked(); + if (entryNumber < 0) { + return false; + } + user_desc fsLdt = createLdtEntry(entryNumber, toGuestPtr(teb), sizeof(TEB), false); + int ret = modifyLdtWrite(&fsLdt); + if (ret != 0) { + freeLdtEntryLocked(entryNumber); + return false; + } + teb->CurrentFsSelector = createLdtSelector(entryNumber); + } + + DEBUG_LOG("setup_linux: Using FS selector 0x%x\n", teb->CurrentFsSelector); return true; } #elif defined(__i386__) -#include - bool tebThreadSetup(TEB *teb) { std::lock_guard guard(g_tebSetupMutex); - struct user_desc desc; // NOLINT(cppcoreguidelines-pro-type-member-init) - std::memset(&desc, 0, sizeof(desc)); - desc.entry_number = g_entryNumber; - desc.base_addr = reinterpret_cast(teb); - desc.limit = static_cast(sizeof(TEB) - 1); - desc.seg_32bit = 1; - desc.contents = 0; - desc.read_exec_only = 0; - desc.limit_in_pages = 0; - desc.seg_not_present = 0; - desc.useable = 1; - if (syscall(SYS_set_thread_area, &desc) != 0) { - return false; + if (g_threadAreaEntry != -2) { + struct user_desc desc; // NOLINT(cppcoreguidelines-pro-type-member-init) + std::memset(&desc, 0, sizeof(desc)); + desc.entry_number = g_threadAreaEntry; + desc.base_addr = reinterpret_cast(teb); + desc.limit = static_cast(sizeof(TEB) - 1); + desc.seg_32bit = 1; + desc.contents = 0; + desc.read_exec_only = 0; + desc.limit_in_pages = 0; + desc.seg_not_present = 0; + desc.useable = 1; + if (syscall(SYS_set_thread_area, &desc) == 0) { + if (g_threadAreaEntry != static_cast(desc.entry_number)) { + g_threadAreaEntry = static_cast(desc.entry_number); + DEBUG_LOG("setup_linux: allocated thread-local GDT entry=%d base=%p\n", g_threadAreaEntry, teb); + } else { + DEBUG_LOG("setup_linux: reused thread-local GDT entry=%d base=%p\n", g_threadAreaEntry, teb); + } + teb->CurrentFsSelector = createGdtSelector(desc.entry_number); + } else { + DEBUG_LOG("setup_linux: set_thread_area failed (%s), falling back to LDT\n", strerror(errno)); + g_threadAreaEntry = -2; // Don't bother trying again + } } - if (g_entryNumber != static_cast(desc.entry_number)) { - g_entryNumber = static_cast(desc.entry_number); - DEBUG_LOG("setup_linux: allocated GDT entry=%d base=%p\n", g_entryNumber, teb); - } else { - DEBUG_LOG("setup_linux: reused GDT entry=%d base=%p\n", g_entryNumber, teb); + if (teb->CurrentFsSelector == 0) { + int entryNumber = allocateLdtEntryLocked(); + if (entryNumber < 0) { + return false; + } + user_desc fsLdt = createLdtEntry(entryNumber, toGuestPtr(teb), sizeof(TEB), false); + int ret = modifyLdtWrite(&fsLdt); + if (ret != 0) { + freeLdtEntryLocked(entryNumber); + return false; + } + teb->CurrentFsSelector = createLdtSelector(entryNumber); } - teb->CurrentFsSelector = createSelector(desc.entry_number); - teb->CurrentGsSelector = 0; + DEBUG_LOG("setup_linux: Using FS selector 0x%x\n", teb->CurrentFsSelector); return true; } #endif bool tebThreadTeardown(TEB *teb) { - (void)teb; - // no-op on Linux + if (teb->CurrentFsSelector & 0x4 /* TI=1 */) { + std::lock_guard guard(g_tebSetupMutex); + freeLdtEntryLocked(teb->CurrentFsSelector >> 3); + } + teb->CurrentFsSelector = 0; return true; } diff --git a/src/types.h b/src/types.h index 73612bc..e73e409 100644 --- a/src/types.h +++ b/src/types.h @@ -552,6 +552,7 @@ typedef struct _TEB { void *HostFsBase; void *HostGsBase; bool HasFsGsBase; + WORD HostCodeSelector; #endif } TEB; typedef GUEST_PTR PTEB; @@ -581,6 +582,9 @@ static_assert(offsetof(TEB, HostGsBase) == TEB_GSBASE); #ifdef TEB_HAS_FSGSBASE static_assert(offsetof(TEB, HasFsGsBase) == TEB_HAS_FSGSBASE); #endif +#ifdef TEB_HOST_CS_SEL +static_assert(offsetof(TEB, HostCodeSelector) == TEB_HOST_CS_SEL); +#endif typedef struct _MEMORY_BASIC_INFORMATION { GUEST_PTR BaseAddress; diff --git a/test/test_srw_lock.c b/test/test_srw_lock.c index 20e1ef7..4d96812 100644 --- a/test/test_srw_lock.c +++ b/test/test_srw_lock.c @@ -191,8 +191,8 @@ static void test_try_acquire(void) { int main(void) { test_shared_readers(); - // test_exclusive_blocks_shared(); - // test_shared_then_exclusive(); - // test_try_acquire(); + test_exclusive_blocks_shared(); + test_shared_then_exclusive(); + test_try_acquire(); return 0; }