setup_linux: Improve segments init & add modify_ldt fallback

This commit is contained in:
2025-11-11 00:24:42 -07:00
parent 6c4f332705
commit f18f2a929d
9 changed files with 323 additions and 86 deletions

View File

@@ -110,7 +110,7 @@ void threadCleanup(void *param) {
}
g_currentThreadObject = nullptr;
wibo::notifyDllThreadDetach();
currentThreadTeb = nullptr;
wibo::uninstallTebForCurrentThread();
// TODO: mark mutexes owned by this thread as abandoned
obj->cv.notify_all();
obj->notifyWaiters(false);

View File

@@ -34,31 +34,32 @@
#ifdef __x86_64__
.macro LJMP32 teb_reg
#ifdef __APPLE__
#define m1632 m1632_\@
#define m64to32 m64to32_\@
.data
m1632:
m64to32:
.long 1f # 32-bit code offset
.long 0 # 32-bit code segment (filled in at runtime)
.text
mov r10w, word ptr [\teb_reg+TEB_CS_SEL]
mov word ptr [rip+m1632+4], r10w
jmp fword ptr [rip+m1632]
#else
jmp fword ptr [rip] # far jump into 32-bit code
.long 1f # 32-bit code offset
.word CS_32 # 32-bit code segment
#endif
mov word ptr [rip+m64to32+4], r10w
jmp fword ptr [rip+m64to32]
.code32
1:
endbr32
.endm
.macro LJMP64 teb_reg
// Annoyingly, we can't assemble this in Intel syntax
.att_syntax prefix
ljmp $CS_64, $1f
.intel_syntax noprefix
#define m32to64 m32to64_\@
.data
m32to64:
.long 1f # 64-bit code offset
.long 0 # 64-bit code segment (filled in at runtime)
.text
push eax
mov ax, word ptr [\teb_reg+TEB_HOST_CS_SEL]
mov word ptr [m32to64+4], ax
pop eax
jmp fword ptr [m32to64]
.code64
1:
endbr64

View File

@@ -12,22 +12,13 @@
#ifdef __x86_64__
#define TEB_CS_SEL 0xf9c // CodeSelector
#define TEB_DS_SEL 0xf9e // DataSelector
#define TEB_SP 0xfa0 // CurrentStackPointer
#define TEB_FSBASE 0xfa8 // HostFsBase
#define TEB_GSBASE 0xfb0 // HostGsBase
#define TEB_CS_SEL 0xf9c // CodeSelector
#define TEB_DS_SEL 0xf9e // DataSelector
#define TEB_SP 0xfa0 // CurrentStackPointer
#define TEB_FSBASE 0xfa8 // HostFsBase
#define TEB_GSBASE 0xfb0 // HostGsBase
#define TEB_HAS_FSGSBASE 0xfb8 // HasFsGsBase
#ifdef __linux__
#define CS_32 0x23 // 32-bit code segment (Linux)
#define CS_64 0x33 // 64-bit code segment (Linux)
#define DS_32 0x2b // 32-bit data segment (Linux)
#elif defined(__APPLE__)
#define CS_64 0x2b // 64-bit code segment (macOS)
#else
#error "Unsupported platform"
#endif
#define TEB_HOST_CS_SEL 0xfba // HostCodeSelector
#endif

View File

@@ -333,7 +333,7 @@ int main(int argc, char **argv) {
wibo::processPeb = peb;
wibo::initializeTibStackInfo(tib);
if (!wibo::installTibForCurrentThread(tib)) {
perror("Failed to install TIB for main thread");
perror("Failed to setup x86 segments and TEB");
return 1;
}

View File

@@ -8,16 +8,30 @@
#if defined(__x86_64__) && defined(__linux__)
# int tebThreadSetup64(int entryNumber, TEB *teb)
ASM_GLOBAL(tebThreadSetup64, @function)
# void installSelectors(TEB *teb)
# Installs ds/es selectors in 32-bit mode
ASM_GLOBAL(installSelectors, @function)
mov r8, rsp # save host stack
mov rdx, qword ptr [rdi+TEB_SP] # fetch guest stack
mov ax, cs # fetch host code segment selector
mov word ptr [rdi+TEB_HOST_CS_SEL], ax # store host code segment selector
mov ax, word ptr [rdi+TEB_DS_SEL] # user data segment selector
LJMP32 rdi # far jump into 32-bit code
mov ds, ax # setup data segment
mov es, ax # setup extra segment
mov esp, edx # switch to guest stack
LJMP64 edi # far jump into 64-bit code
mov rsp, r8 # switch to host stack
ret
ASM_END(installSelectors)
# int setThreadArea64(int entryNumber, TEB *teb)
# Runs syscall SYS_set_thread_area in 32-bit mode
ASM_GLOBAL(setThreadArea64, @function)
push rbx # save rbx
mov r8, rsp # save host stack
rdfsbase r9 # read host FS base
mov rdx, qword ptr [rsi+TEB_SP] # fetch guest stack
LJMP32 rsi # far jump into 32-bit code
mov ax, 0x2b # user data segment (Linux)
mov ds, ax # setup data segment
mov es, ax # setup extra segment
mov esp, edx # switch to guest stack
sub esp, 0x10 # sizeof(user_desc)
mov dword ptr [esp], edi # entry_number (arg 0)
@@ -35,10 +49,9 @@ ASM_GLOBAL(tebThreadSetup64, @function)
LJMP64 esi # far jump into 64-bit code
cdqe # sign-extend eax to rax
mov rsp, r8 # switch to host stack
wrfsbase r9 # restore host FS base
pop rbx # restore rbx
ret
ASM_END(tebThreadSetup64)
ASM_END(setThreadArea64)
#endif // defined(__x86_64__) && defined(__linux__)

View File

@@ -98,8 +98,7 @@ bool initializeLdtBitmapLocked() {
if (count > kMaxLdtEntries) {
DEBUG_LOG("setup_darwin: i386_get_ldt returned too many entries (%d), truncating to %d\n", count,
kMaxLdtEntries);
errno = ENOSPC;
return false;
count = kMaxLdtEntries;
}
for (int i = 0; i < count; ++i) {
markLdtEntryUsed(i);

View File

@@ -1,20 +1,205 @@
#include "setup.h"
#include "common.h"
#include "types.h"
#include <asm/ldt.h>
#include <array>
#include <cstring>
#include <mutex>
#include <asm/ldt.h>
#include <sys/syscall.h>
namespace {
std::mutex g_tebSetupMutex;
int g_entryNumber = -1;
int g_threadAreaEntry = -1;
#ifdef __x86_64__
uint16_t g_codeSelector = 0;
uint16_t g_dataSelector = 0;
#endif
constexpr int kMaxLdtEntries = 8192;
constexpr int kBitsPerWord = 32;
std::array<uint32_t, kMaxLdtEntries / kBitsPerWord> g_ldtBitmap{};
bool g_ldtBitmapInitialized = false;
int g_ldtHint = 0;
constexpr uint16_t createSelector(int entryNumber) {
inline user_desc createLdtEntry(uint32_t entryNumber, uint32_t base, uint32_t size, bool code) {
user_desc desc; // NOLINT(cppcoreguidelines-pro-type-member-init)
// Must memset to zero to avoid uninitialized padding bytes
std::memset(&desc, 0, sizeof(desc));
desc.entry_number = entryNumber;
desc.base_addr = base;
uint32_t limit;
if (size > 0xFFFFF) {
// Page granularity (like your DESC_GRAN_PAGE case)
limit = (size - 1) >> 12;
desc.limit_in_pages = 1;
} else {
// Byte granularity
limit = size - 1;
desc.limit_in_pages = 0;
}
desc.limit = limit;
desc.seg_32bit = 1;
desc.contents = code ? MODIFY_LDT_CONTENTS_CODE : MODIFY_LDT_CONTENTS_DATA;
desc.read_exec_only = 0;
desc.seg_not_present = 0;
desc.useable = 1;
return desc;
}
inline int modifyLdtRead(struct user_desc *entries, int bytes) {
return static_cast<int>(syscall(SYS_modify_ldt, 2, entries, bytes));
}
inline int modifyLdtWrite(const struct user_desc *desc) {
return static_cast<int>(syscall(SYS_modify_ldt, 1, desc, sizeof(*desc)));
}
constexpr uint16_t createGdtSelector(int entryNumber) {
return static_cast<uint16_t>((entryNumber << 3) | USER_PRIVILEGE);
}
constexpr uint16_t createLdtSelector(int entryNumber) {
return static_cast<uint16_t>((entryNumber << 3) | 0x4 /* TI=1 */ | USER_PRIVILEGE);
}
inline bool isLdtEntryValid(int entry) { return entry >= 0 && entry < kMaxLdtEntries; }
inline void markLdtEntryUsed(int entry) {
if (!isLdtEntryValid(entry)) {
return;
}
g_ldtBitmap[entry / kBitsPerWord] |= (1u << (entry % kBitsPerWord));
}
inline void markLdtEntryFree(int entry) {
if (!isLdtEntryValid(entry)) {
return;
}
g_ldtBitmap[entry / kBitsPerWord] &= ~(1u << (entry % kBitsPerWord));
}
inline bool isLdtEntryUsed(int entry) {
if (!isLdtEntryValid(entry)) {
return true;
}
return (g_ldtBitmap[entry / kBitsPerWord] & (1u << (entry % kBitsPerWord))) != 0;
}
void initializeLdtBitmapLocked() {
if (g_ldtBitmapInitialized) {
return;
}
g_ldtBitmapInitialized = true;
struct user_desc table[kMaxLdtEntries];
std::memset(table, 0, sizeof(table));
int bytes = modifyLdtRead(table, sizeof(table));
if (bytes < 0) {
DEBUG_LOG("setup_linux: modify_ldt(read) failed during bitmap init (%s), assuming empty table\n",
strerror(errno));
return;
}
int count = bytes / static_cast<int>(sizeof(user_desc));
if (count > kMaxLdtEntries) {
DEBUG_LOG("setup_linux: modify_ldt(read) returned too many entries (%d), truncating to %d\n", count,
kMaxLdtEntries);
count = kMaxLdtEntries;
}
for (int i = 0; i < count; ++i) {
const user_desc &d = table[i];
user_desc unused; // NOLINT(cppcoreguidelines-pro-type-member-init)
std::memset(&unused, 0, sizeof(user_desc));
bool allZero = std::memcmp(&d, &unused, sizeof(user_desc)) == 0;
if (!allZero && !d.seg_not_present) {
markLdtEntryUsed(i);
}
}
}
int allocateLdtEntryLocked() {
initializeLdtBitmapLocked();
auto tryAllocate = [&](int start) -> int {
for (int entry = start; entry < kMaxLdtEntries; ++entry) {
if (!isLdtEntryUsed(entry)) {
markLdtEntryUsed(entry);
g_ldtHint = entry + 1;
if (g_ldtHint >= kMaxLdtEntries) {
g_ldtHint = 0;
}
DEBUG_LOG("setup_linux: Allocating LDT entry %d\n", entry);
return entry;
}
}
return -1;
};
int entry = tryAllocate(g_ldtHint);
if (entry >= 0) {
return entry;
}
entry = tryAllocate(0);
if (entry >= 0) {
return entry;
}
errno = ENOSPC;
return -1;
}
void freeLdtEntryLocked(int entryNumber) {
if (!g_ldtBitmapInitialized || !isLdtEntryValid(entryNumber)) {
return;
}
markLdtEntryFree(entryNumber);
if (entryNumber < g_ldtHint) {
g_ldtHint = entryNumber;
}
}
#ifdef __x86_64__
bool segmentSetupLocked(TEB *teb) {
// Create code LDT entry
if (g_codeSelector == 0) {
int entryNumber = allocateLdtEntryLocked();
if (entryNumber < 0) {
return false;
}
user_desc codeLdt = createLdtEntry(entryNumber, 0, 0xFFFFFFFF, true);
int ret = modifyLdtWrite(&codeLdt);
if (ret == 0) {
g_codeSelector = createLdtSelector(entryNumber);
DEBUG_LOG("setup_linux: Code LDT selector 0x%x\n", g_codeSelector);
} else {
freeLdtEntryLocked(entryNumber);
DEBUG_LOG("setup_linux: Failed to create code LDT entry, trying default (0x23)\n");
g_codeSelector = 0x23;
}
}
// Create data LDT entry
if (g_dataSelector == 0) {
int entryNumber = allocateLdtEntryLocked();
if (entryNumber < 0) {
return false;
}
user_desc dataLdt = createLdtEntry(entryNumber, 0, 0xFFFFFFFF, false);
int ret = modifyLdtWrite(&dataLdt);
if (ret == 0) {
g_dataSelector = createLdtSelector(entryNumber);
DEBUG_LOG("setup_linux: Data LDT selector 0x%x\n", g_dataSelector);
} else {
freeLdtEntryLocked(entryNumber);
DEBUG_LOG("setup_linux: Failed to create data LDT entry, trying default (0x2b)\n");
g_dataSelector = 0x2b;
}
}
teb->CodeSelector = g_codeSelector;
teb->DataSelector = g_dataSelector;
return true;
}
#endif
} // namespace
#if defined(__x86_64__)
@@ -22,72 +207,116 @@ constexpr uint16_t createSelector(int entryNumber) {
#include <cpuid.h>
// Implemented in setup.S
extern "C" int tebThreadSetup64(int entryNumber, TEB *teb);
extern "C" void installSelectors(TEB *teb);
extern "C" int setThreadArea64(int entryNumber, TEB *teb);
bool tebThreadSetup(TEB *teb) {
std::lock_guard guard(g_tebSetupMutex);
int ret = tebThreadSetup64(g_entryNumber, teb);
if (ret < 0) {
return false;
}
if (g_entryNumber != ret) {
g_entryNumber = ret;
DEBUG_LOG("set_thread_area: allocated entry=%d base=%p\n", g_entryNumber, teb);
} else {
DEBUG_LOG("set_thread_area: reused entry=%d base=%p\n", g_entryNumber, teb);
}
teb->CurrentFsSelector = createSelector(ret);
teb->CurrentGsSelector = 0;
// Check for FSBASE/GSBASE instruction support
unsigned int regs[4];
int cpuidMax = __get_cpuid_max(0, nullptr);
unsigned int cpuidMax = __get_cpuid_max(0, nullptr);
if (cpuidMax >= 0x7 && __get_cpuid_count(0x7, 0, &regs[0], &regs[1], &regs[2], &regs[3])) {
teb->HasFsGsBase = !!(regs[1] & 1);
}
DEBUG_LOG("setup_linux: FSBASE/GSBASE instruction support: %s\n", teb->HasFsGsBase ? "yes" : "no");
// Create code and data LDT entries
if (!segmentSetupLocked(teb)) {
return false;
}
// Install ds/es selectors
installSelectors(teb);
if (g_threadAreaEntry != -2) {
int ret = setThreadArea64(g_threadAreaEntry, teb);
if (ret >= 0) {
if (g_threadAreaEntry != ret) {
g_threadAreaEntry = ret;
DEBUG_LOG("setup_linux: allocated thread-local GDT entry=%d base=%p\n", g_threadAreaEntry, teb);
} else {
DEBUG_LOG("setup_linux: reused thread-local GDT entry=%d base=%p\n", g_threadAreaEntry, teb);
}
teb->CurrentFsSelector = createGdtSelector(ret);
} else {
DEBUG_LOG("setup_linux: set_thread_area failed (%s), falling back to LDT\n", strerror(errno));
g_threadAreaEntry = -2; // Don't bother trying again
}
}
if (teb->CurrentFsSelector == 0) {
int entryNumber = allocateLdtEntryLocked();
if (entryNumber < 0) {
return false;
}
user_desc fsLdt = createLdtEntry(entryNumber, toGuestPtr(teb), sizeof(TEB), false);
int ret = modifyLdtWrite(&fsLdt);
if (ret != 0) {
freeLdtEntryLocked(entryNumber);
return false;
}
teb->CurrentFsSelector = createLdtSelector(entryNumber);
}
DEBUG_LOG("setup_linux: Using FS selector 0x%x\n", teb->CurrentFsSelector);
return true;
}
#elif defined(__i386__)
#include <sys/syscall.h>
bool tebThreadSetup(TEB *teb) {
std::lock_guard guard(g_tebSetupMutex);
struct user_desc desc; // NOLINT(cppcoreguidelines-pro-type-member-init)
std::memset(&desc, 0, sizeof(desc));
desc.entry_number = g_entryNumber;
desc.base_addr = reinterpret_cast<uintptr_t>(teb);
desc.limit = static_cast<unsigned int>(sizeof(TEB) - 1);
desc.seg_32bit = 1;
desc.contents = 0;
desc.read_exec_only = 0;
desc.limit_in_pages = 0;
desc.seg_not_present = 0;
desc.useable = 1;
if (syscall(SYS_set_thread_area, &desc) != 0) {
return false;
if (g_threadAreaEntry != -2) {
struct user_desc desc; // NOLINT(cppcoreguidelines-pro-type-member-init)
std::memset(&desc, 0, sizeof(desc));
desc.entry_number = g_threadAreaEntry;
desc.base_addr = reinterpret_cast<uintptr_t>(teb);
desc.limit = static_cast<unsigned int>(sizeof(TEB) - 1);
desc.seg_32bit = 1;
desc.contents = 0;
desc.read_exec_only = 0;
desc.limit_in_pages = 0;
desc.seg_not_present = 0;
desc.useable = 1;
if (syscall(SYS_set_thread_area, &desc) == 0) {
if (g_threadAreaEntry != static_cast<int>(desc.entry_number)) {
g_threadAreaEntry = static_cast<int>(desc.entry_number);
DEBUG_LOG("setup_linux: allocated thread-local GDT entry=%d base=%p\n", g_threadAreaEntry, teb);
} else {
DEBUG_LOG("setup_linux: reused thread-local GDT entry=%d base=%p\n", g_threadAreaEntry, teb);
}
teb->CurrentFsSelector = createGdtSelector(desc.entry_number);
} else {
DEBUG_LOG("setup_linux: set_thread_area failed (%s), falling back to LDT\n", strerror(errno));
g_threadAreaEntry = -2; // Don't bother trying again
}
}
if (g_entryNumber != static_cast<int>(desc.entry_number)) {
g_entryNumber = static_cast<int>(desc.entry_number);
DEBUG_LOG("setup_linux: allocated GDT entry=%d base=%p\n", g_entryNumber, teb);
} else {
DEBUG_LOG("setup_linux: reused GDT entry=%d base=%p\n", g_entryNumber, teb);
if (teb->CurrentFsSelector == 0) {
int entryNumber = allocateLdtEntryLocked();
if (entryNumber < 0) {
return false;
}
user_desc fsLdt = createLdtEntry(entryNumber, toGuestPtr(teb), sizeof(TEB), false);
int ret = modifyLdtWrite(&fsLdt);
if (ret != 0) {
freeLdtEntryLocked(entryNumber);
return false;
}
teb->CurrentFsSelector = createLdtSelector(entryNumber);
}
teb->CurrentFsSelector = createSelector(desc.entry_number);
teb->CurrentGsSelector = 0;
DEBUG_LOG("setup_linux: Using FS selector 0x%x\n", teb->CurrentFsSelector);
return true;
}
#endif
bool tebThreadTeardown(TEB *teb) {
(void)teb;
// no-op on Linux
if (teb->CurrentFsSelector & 0x4 /* TI=1 */) {
std::lock_guard guard(g_tebSetupMutex);
freeLdtEntryLocked(teb->CurrentFsSelector >> 3);
}
teb->CurrentFsSelector = 0;
return true;
}

View File

@@ -552,6 +552,7 @@ typedef struct _TEB {
void *HostFsBase;
void *HostGsBase;
bool HasFsGsBase;
WORD HostCodeSelector;
#endif
} TEB;
typedef GUEST_PTR PTEB;
@@ -581,6 +582,9 @@ static_assert(offsetof(TEB, HostGsBase) == TEB_GSBASE);
#ifdef TEB_HAS_FSGSBASE
static_assert(offsetof(TEB, HasFsGsBase) == TEB_HAS_FSGSBASE);
#endif
#ifdef TEB_HOST_CS_SEL
static_assert(offsetof(TEB, HostCodeSelector) == TEB_HOST_CS_SEL);
#endif
typedef struct _MEMORY_BASIC_INFORMATION {
GUEST_PTR BaseAddress;

View File

@@ -191,8 +191,8 @@ static void test_try_acquire(void) {
int main(void) {
test_shared_readers();
// test_exclusive_blocks_shared();
// test_shared_then_exclusive();
// test_try_acquire();
test_exclusive_blocks_shared();
test_shared_then_exclusive();
test_try_acquire();
return 0;
}