diff --git a/CMakeLists.txt b/CMakeLists.txt index 7b0d029..a4a410a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -89,8 +89,8 @@ set(MI_BUILD_TESTS OFF CACHE BOOL "Build test executables" FORCE) include(FetchContent) FetchContent_Declare( mimalloc - GIT_REPOSITORY https://github.com/encounter/mimalloc.git - GIT_TAG 18d8537659d36ddeb320dfacc3b88fcd22a2607f # v3.1.5 + patch + GIT_REPOSITORY https://github.com/microsoft/mimalloc.git + GIT_TAG 69c5c5c402bf0414ff4a366697ecbd5c7578dc02 # dev3 (2025-11-05) ) FetchContent_MakeAvailable(mimalloc) diff --git a/dll/kernel32/heapapi.cpp b/dll/kernel32/heapapi.cpp index 59006e1..a08a595 100644 --- a/dll/kernel32/heapapi.cpp +++ b/dll/kernel32/heapapi.cpp @@ -9,8 +9,8 @@ #include #include -#include #include +#include #include using kernel32::HeapObject; @@ -23,7 +23,7 @@ HeapObject *g_processHeapRecord = nullptr; void ensureProcessHeapInitialized() { std::call_once(g_processHeapInitFlag, []() { - auto record = make_pin(nullptr); + auto record = make_pin(std::nullopt); if (!record) { return; } @@ -41,10 +41,6 @@ LPVOID heapAllocFromRecord(HeapObject *record, DWORD dwFlags, SIZE_T dwBytes) { if (!record) { return nullptr; } - auto *heap = record->heap; - if (!heap && record->isProcessHeap) { - heap = wibo::heap::getGuestHeap(); - } if ((record->createFlags | dwFlags) & HEAP_GENERATE_EXCEPTIONS) { DEBUG_LOG("HeapAlloc: HEAP_GENERATE_EXCEPTIONS not supported\n"); kernel32::setLastError(ERROR_INVALID_PARAMETER); @@ -52,7 +48,8 @@ LPVOID heapAllocFromRecord(HeapObject *record, DWORD dwFlags, SIZE_T dwBytes) { } const bool zeroMemory = (dwFlags & HEAP_ZERO_MEMORY) != 0; const SIZE_T requestSize = std::max(1, dwBytes); - void *mem = zeroMemory ? mi_heap_zalloc(heap, requestSize) : mi_heap_malloc(heap, requestSize); + void *mem = + record->heap ? record->heap->malloc(requestSize, zeroMemory) : wibo::heap::guestMalloc(requestSize, zeroMemory); if (!mem) { kernel32::setLastError(ERROR_NOT_ENOUGH_MEMORY); return nullptr; @@ -66,10 +63,6 @@ LPVOID heapAllocFromRecord(HeapObject *record, DWORD dwFlags, SIZE_T dwBytes) { } // namespace HeapObject::~HeapObject() { - if (heap) { - mi_heap_destroy(heap); - heap = nullptr; - } if (isProcessHeap) { g_processHeapHandle = NO_HANDLE; g_processHeapRecord = nullptr; @@ -86,13 +79,7 @@ HANDLE WINAPI HeapCreate(DWORD flOptions, SIZE_T dwInitialSize, SIZE_T dwMaximum return NO_HANDLE; } - mi_heap_t *heap = wibo::heap::createGuestHeap(); - if (!heap) { - setLastError(ERROR_NOT_ENOUGH_MEMORY); - return NO_HANDLE; - } - - auto record = make_pin(heap); + auto record = make_pin(wibo::Heap()); record->createFlags = flOptions; record->initialSize = dwInitialSize; record->maximumSize = dwMaximumSize; @@ -107,8 +94,7 @@ BOOL WINAPI HeapDestroy(HANDLE hHeap) { setLastError(ERROR_INVALID_HANDLE); return FALSE; } - mi_heap_destroy(record->heap); - record->heap = nullptr; + record->heap.reset(); wibo::handles().release(hHeap); return TRUE; } @@ -178,11 +164,6 @@ LPVOID WINAPI HeapReAlloc(HANDLE hHeap, DWORD dwFlags, LPVOID lpMem, SIZE_T dwBy VERBOSE_LOG("-> %p (alloc)\n", alloc); return alloc; } - if (!mi_is_in_heap_region(lpMem)) { - VERBOSE_LOG("-> NULL (not owned)\n"); - setLastError(ERROR_INVALID_PARAMETER); - return nullptr; - } if ((record->createFlags | dwFlags) & HEAP_GENERATE_EXCEPTIONS) { VERBOSE_LOG("-> NULL (exceptions unsupported)\n"); setLastError(ERROR_NOT_SUPPORTED); @@ -192,7 +173,7 @@ LPVOID WINAPI HeapReAlloc(HANDLE hHeap, DWORD dwFlags, LPVOID lpMem, SIZE_T dwBy const bool zeroMemory = (dwFlags & HEAP_ZERO_MEMORY) != 0; if (dwBytes == 0) { if (!inplaceOnly) { - mi_free(lpMem); + wibo::heap::guestFree(lpMem); VERBOSE_LOG("-> NULL (freed)\n"); return nullptr; } @@ -202,7 +183,7 @@ LPVOID WINAPI HeapReAlloc(HANDLE hHeap, DWORD dwFlags, LPVOID lpMem, SIZE_T dwBy } const SIZE_T requestSize = std::max(1, dwBytes); - const SIZE_T oldSize = mi_usable_size(lpMem); + const SIZE_T oldSize = wibo::heap::guestSize(lpMem); if (inplaceOnly || requestSize <= oldSize) { if (requestSize > oldSize) { VERBOSE_LOG("-> NULL (cannot grow in place)\n"); @@ -213,22 +194,8 @@ LPVOID WINAPI HeapReAlloc(HANDLE hHeap, DWORD dwFlags, LPVOID lpMem, SIZE_T dwBy return lpMem; } - auto *heap = record->heap; - if (!heap && record->isProcessHeap) { - heap = wibo::heap::getGuestHeap(); - } - void *ret = mi_heap_realloc(heap, lpMem, requestSize); - if (!ret) { - setLastError(ERROR_NOT_ENOUGH_MEMORY); - return nullptr; - } - if (zeroMemory && requestSize > oldSize) { - size_t newUsable = mi_usable_size(ret); - if (newUsable > oldSize) { - size_t zeroLen = std::min(newUsable, requestSize) - oldSize; - std::memset(static_cast(ret) + oldSize, 0, zeroLen); - } - } + void *ret = record->heap ? record->heap->realloc(lpMem, requestSize, zeroMemory) + : wibo::heap::guestRealloc(lpMem, requestSize, zeroMemory); if (isExecutableHeap(record.get())) { tryMarkExecutable(ret); } @@ -251,13 +218,7 @@ SIZE_T WINAPI HeapSize(HANDLE hHeap, DWORD dwFlags, LPCVOID lpMem) { setLastError(ERROR_INVALID_PARAMETER); return static_cast(-1); } - if (!mi_is_in_heap_region(lpMem)) { - VERBOSE_LOG("-> ERROR_INVALID_PARAMETER (not owned)\n"); - setLastError(ERROR_INVALID_PARAMETER); - return static_cast(-1); - } - size_t size = mi_usable_size(lpMem); - return static_cast(size); + return static_cast(wibo::heap::guestSize(lpMem)); } BOOL WINAPI HeapFree(HANDLE hHeap, DWORD dwFlags, LPVOID lpMem) { @@ -273,12 +234,12 @@ BOOL WINAPI HeapFree(HANDLE hHeap, DWORD dwFlags, LPVOID lpMem) { setLastError(ERROR_INVALID_HANDLE); return FALSE; } - if (!mi_is_in_heap_region(lpMem)) { + bool ret = record->heap ? record->heap->free(lpMem) : wibo::heap::guestFree(lpMem); + if (!ret) { VERBOSE_LOG("-> ERROR_INVALID_PARAMETER (not owned)\n"); setLastError(ERROR_INVALID_PARAMETER); return FALSE; } - mi_free(lpMem); VERBOSE_LOG("-> SUCCESS\n"); return TRUE; } diff --git a/dll/kernel32/heapapi.h b/dll/kernel32/heapapi.h index 3bb8ec2..09b111e 100644 --- a/dll/kernel32/heapapi.h +++ b/dll/kernel32/heapapi.h @@ -20,7 +20,7 @@ HANDLE WINAPI HeapCreate(DWORD flOptions, SIZE_T dwInitialSize, SIZE_T dwMaximum BOOL WINAPI HeapDestroy(HANDLE hHeap); HANDLE WINAPI GetProcessHeap(); BOOL WINAPI HeapSetInformation(HANDLE HeapHandle, HEAP_INFORMATION_CLASS HeapInformationClass, PVOID HeapInformation, - SIZE_T HeapInformationLength); + SIZE_T HeapInformationLength); LPVOID WINAPI HeapAlloc(HANDLE hHeap, DWORD dwFlags, SIZE_T dwBytes); LPVOID WINAPI HeapReAlloc(HANDLE hHeap, DWORD dwFlags, LPVOID lpMem, SIZE_T dwBytes); SIZE_T WINAPI HeapSize(HANDLE hHeap, DWORD dwFlags, LPCVOID lpMem); diff --git a/dll/kernel32/internal.h b/dll/kernel32/internal.h index eb6b412..947ec9d 100644 --- a/dll/kernel32/internal.h +++ b/dll/kernel32/internal.h @@ -2,10 +2,11 @@ #include "common.h" #include "handles.h" -#include "mimalloc.h" +#include "heap.h" #include "types.h" #include +#include #include namespace kernel32 { @@ -153,7 +154,7 @@ struct SemaphoreObject final : WaitableObject { struct HeapObject : public ObjectBase { static constexpr ObjectType kType = ObjectType::Heap; - mi_heap_t *heap; + std::optional heap; const pthread_t owner; DWORD createFlags = 0; SIZE_T initialSize = 0; @@ -161,11 +162,12 @@ struct HeapObject : public ObjectBase { DWORD compatibility = 0; bool isProcessHeap = false; - explicit HeapObject(mi_heap_t *heap) : ObjectBase(kType), heap(heap), owner(pthread_self()) {} + explicit HeapObject(std::optional heap) + : ObjectBase(kType), heap(std::move(heap)), owner(pthread_self()) {} ~HeapObject() override; [[nodiscard]] inline bool isOwner() const { return pthread_equal(owner, pthread_self()); } - [[nodiscard]] inline bool canAccess() const { return isProcessHeap || (isOwner() && heap != nullptr); } + [[nodiscard]] inline bool canAccess() const { return isProcessHeap || (isOwner() && heap.has_value()); } }; inline constexpr HANDLE kPseudoCurrentProcessHandleValue = static_cast(-1); diff --git a/dll/kernel32/processenv.cpp b/dll/kernel32/processenv.cpp index 8083a0e..7be6266 100644 --- a/dll/kernel32/processenv.cpp +++ b/dll/kernel32/processenv.cpp @@ -54,7 +54,7 @@ GUEST_PTR WINAPI GetCommandLineA() { HOST_CONTEXT_GUARD(); DEBUG_LOG("GetCommandLineA() -> %s\n", wibo::commandLine.c_str()); if (g_commandLineA == GUEST_NULL) { - void *tmp = wibo::heap::guestCalloc(1, wibo::commandLine.size() + 1); + void *tmp = wibo::heap::guestMalloc(wibo::commandLine.size() + 1, true); memcpy(tmp, wibo::commandLine.c_str(), wibo::commandLine.size()); g_commandLineA = toGuestPtr(tmp); } @@ -65,7 +65,7 @@ GUEST_PTR WINAPI GetCommandLineW() { HOST_CONTEXT_GUARD(); DEBUG_LOG("GetCommandLineW() -> %s\n", wideStringToString(wibo::commandLineW.data()).c_str()); if (g_commandLineW == GUEST_NULL) { - void *tmp = wibo::heap::guestCalloc(1, wibo::commandLineW.size() * sizeof(WCHAR) + sizeof(WCHAR)); + void *tmp = wibo::heap::guestMalloc(wibo::commandLineW.size() * sizeof(WCHAR) + sizeof(WCHAR), true); memcpy(tmp, wibo::commandLineW.data(), wibo::commandLineW.size() * sizeof(WCHAR)); g_commandLineW = toGuestPtr(tmp); } @@ -85,9 +85,7 @@ BOOL WINAPI SetStdHandle(DWORD nStdHandle, HANDLE hHandle) { return files::setStdHandle(nStdHandle, hHandle); } -GUEST_PTR WINAPI GetEnvironmentStrings() { - return GetEnvironmentStringsA(); -} +GUEST_PTR WINAPI GetEnvironmentStrings() { return GetEnvironmentStringsA(); } GUEST_PTR WINAPI GetEnvironmentStringsA() { HOST_CONTEXT_GUARD(); diff --git a/dll/kernel32/synchapi.cpp b/dll/kernel32/synchapi.cpp index 3769a53..c349faa 100644 --- a/dll/kernel32/synchapi.cpp +++ b/dll/kernel32/synchapi.cpp @@ -994,7 +994,7 @@ BOOL WINAPI InitializeCriticalSectionEx(LPCRITICAL_SECTION lpCriticalSection, DW lpCriticalSection->DebugInfo = static_cast(-1); } else { auto *debugInfo = reinterpret_cast( - wibo::heap::guestCalloc(1, sizeof(RTL_CRITICAL_SECTION_DEBUG))); + wibo::heap::guestMalloc(sizeof(RTL_CRITICAL_SECTION_DEBUG), true)); debugInfo->CriticalSection = toGuestPtr(lpCriticalSection); debugInfo->ProcessLocksList.Blink = toGuestPtr(&debugInfo->ProcessLocksList); debugInfo->ProcessLocksList.Flink = toGuestPtr(&debugInfo->ProcessLocksList); diff --git a/dll/kernel32/winbase.cpp b/dll/kernel32/winbase.cpp index d26793f..947392a 100644 --- a/dll/kernel32/winbase.cpp +++ b/dll/kernel32/winbase.cpp @@ -167,82 +167,6 @@ ATOM addAtomByString(const std::string &value) { return newAtom; } -void *doAlloc(UINT dwBytes, bool zero) { - if (dwBytes == 0) { - dwBytes = 1; - } - void *ret; - size_t size = static_cast(dwBytes); - if (dwBytes > MI_ARENA_MAX_OBJ_SIZE) { - // If the size is too large, allocate memory using virtualAlloc - DEBUG_LOG("doAlloc(%u, %d) -> virtualAlloc\n", dwBytes, zero); - void *addr = nullptr; - const auto result = wibo::heap::virtualAlloc(&addr, &size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); - if (result != wibo::heap::VmStatus::Success) { - return nullptr; - } - ret = addr; - } else { - ret = mi_heap_malloc_aligned(wibo::heap::getGuestHeap(), dwBytes, 8); - size = mi_usable_size(ret); - } - if (ret && zero) { - std::memset(ret, 0, size); - } - return ret; -} - -void *doRealloc(void *mem, UINT dwBytes, bool zero) { - if (dwBytes == 0) { - dwBytes = 1; - } - size_t oldSize; - void *ret; - size_t newSize = static_cast(dwBytes); - mi_heap_t *heap = wibo::heap::getGuestHeap(); - if ((mem == nullptr && dwBytes <= MI_ARENA_MAX_OBJ_SIZE) || mi_is_in_heap_region(mem)) { - oldSize = mi_usable_size(mem); - ret = mi_heap_realloc_aligned(heap, mem, dwBytes, 8); - newSize = mi_usable_size(ret); - } else { - DEBUG_LOG("doRealloc(%u, %d, %d) -> virtualAlloc\n", dwBytes, zero, mem); - MEMORY_BASIC_INFORMATION info; - auto result = wibo::heap::virtualQuery(mem, &info); - if (result != wibo::heap::VmStatus::Success) { - return nullptr; - } - oldSize = info.RegionSize; - result = wibo::heap::virtualAlloc(&ret, &newSize, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); - if (result != wibo::heap::VmStatus::Success) { - return nullptr; - } - std::memcpy(ret, mem, oldSize); - wibo::heap::virtualFree(mem, oldSize, MEM_RELEASE); - } - if (ret && zero && newSize > oldSize) { - std::memset(static_cast(ret) + oldSize, 0, newSize - oldSize); - } - return ret; -} - -bool doFree(void *mem) { - if (mem == nullptr) { - return false; - } - if (mi_is_in_heap_region(mem)) { - mi_free(mem); - } else { - DEBUG_LOG("doFree(%p) -> virtualFree\n", mem); - MEMORY_BASIC_INFORMATION info; - auto result = wibo::heap::virtualQuery(mem, &info); - if (result != wibo::heap::VmStatus::Success || fromGuestPtr(info.BaseAddress) != mem) { - return false; - } - wibo::heap::virtualFree(mem, info.RegionSize, MEM_RELEASE); - } - return true; -} - bool tryGetCurrentDirectoryPath(std::string &outPath) { std::error_code ec; std::filesystem::path cwd = std::filesystem::current_path(ec); @@ -811,17 +735,21 @@ HGLOBAL WINAPI GlobalAlloc(UINT uFlags, SIZE_T dwBytes) { return NO_HANDLE; } bool zero = (uFlags & GMEM_ZEROINIT) != 0; - void *ret = doAlloc(static_cast(dwBytes), zero); + void *ret = wibo::heap::guestMalloc(static_cast(dwBytes), zero); VERBOSE_LOG("-> %p\n", ret); + if (!ret) { + setLastError(ERROR_NOT_ENOUGH_MEMORY); + return GUEST_NULL; + } return toGuestPtr(ret); } HGLOBAL WINAPI GlobalFree(HGLOBAL hMem) { HOST_CONTEXT_GUARD(); VERBOSE_LOG("GlobalFree(%p)\n", hMem); - if (doFree(reinterpret_cast(hMem))) { + if (wibo::heap::guestFree(reinterpret_cast(hMem))) { VERBOSE_LOG("-> success\n"); - return NO_HANDLE; + return GUEST_NULL; } else { VERBOSE_LOG("-> failure\n"); return hMem; @@ -833,11 +761,15 @@ HGLOBAL WINAPI GlobalReAlloc(HGLOBAL hMem, SIZE_T dwBytes, UINT uFlags) { VERBOSE_LOG("GlobalReAlloc(%p, %zu, %x)\n", hMem, static_cast(dwBytes), uFlags); if (uFlags & GMEM_MODIFY) { assert(0); - return NO_HANDLE; + return GUEST_NULL; } bool zero = (uFlags & GMEM_ZEROINIT) != 0; - void *ret = doRealloc(reinterpret_cast(hMem), static_cast(dwBytes), zero); + void *ret = wibo::heap::guestRealloc(reinterpret_cast(hMem), static_cast(dwBytes), zero); VERBOSE_LOG("-> %p\n", ret); + if (!ret) { + setLastError(ERROR_NOT_ENOUGH_MEMORY); + return GUEST_NULL; + } return toGuestPtr(ret); } @@ -855,10 +787,10 @@ HLOCAL WINAPI LocalAlloc(UINT uFlags, SIZE_T uBytes) { if ((uFlags & LMEM_MOVEABLE) != 0) { VERBOSE_LOG(" ignoring LMEM_MOVEABLE\n"); } - void *result = doAlloc(static_cast(uBytes), zero); + void *result = wibo::heap::guestMalloc(static_cast(uBytes), zero); if (!result) { - setLastError(ERROR_NOT_SUPPORTED); - return NO_HANDLE; + setLastError(ERROR_NOT_ENOUGH_MEMORY); + return GUEST_NULL; } // Legacy Windows applications (pre-NX and DEP) may expect executable memory from LocalAlloc. tryMarkExecutable(result); @@ -869,11 +801,12 @@ HLOCAL WINAPI LocalAlloc(UINT uFlags, SIZE_T uBytes) { HLOCAL WINAPI LocalFree(HLOCAL hMem) { HOST_CONTEXT_GUARD(); VERBOSE_LOG("LocalFree(%p)\n", hMem); - if (doFree(reinterpret_cast(hMem))) { + if (wibo::heap::guestFree(reinterpret_cast(hMem))) { VERBOSE_LOG("-> success\n"); - return NO_HANDLE; + return GUEST_NULL; } else { VERBOSE_LOG("-> failure\n"); + setLastError(ERROR_INVALID_HANDLE); return hMem; } } @@ -885,10 +818,10 @@ HLOCAL WINAPI LocalReAlloc(HLOCAL hMem, SIZE_T uBytes, UINT uFlags) { if ((uFlags & LMEM_MOVEABLE) != 0) { VERBOSE_LOG(" ignoring LMEM_MOVEABLE\n"); } - void *result = doRealloc(reinterpret_cast(hMem), static_cast(uBytes), zero); + void *result = wibo::heap::guestRealloc(reinterpret_cast(hMem), static_cast(uBytes), zero); if (!result && uBytes != 0) { - setLastError(ERROR_NOT_SUPPORTED); - return NO_HANDLE; + setLastError(ERROR_NOT_ENOUGH_MEMORY); + return GUEST_NULL; } // Legacy Windows applications (pre-NX and DEP) may expect executable memory from LocalReAlloc. tryMarkExecutable(result); diff --git a/src/heap.cpp b/src/heap.cpp index f88db6d..76687c6 100644 --- a/src/heap.cpp +++ b/src/heap.cpp @@ -1,9 +1,11 @@ #include "heap.h" #include "common.h" #include "errors.h" +#include "processes.h" #include "types.h" #include +#include #include #include #include @@ -26,7 +28,6 @@ #endif #include -#include #include #include @@ -37,7 +38,7 @@ namespace { constexpr uintptr_t kLowMemoryStart = 0x00110000UL; // 1 MiB + 64 KiB -constexpr uintptr_t kHeapMax = 0x60000000UL; // 1 GiB +constexpr uintptr_t kHeapMax = 0x70000000UL; #ifdef __APPLE__ // On macOS, our program is mapped at 0x7E001000 constexpr uintptr_t kTopDownStart = 0x7D000000UL; @@ -46,21 +47,22 @@ constexpr uintptr_t kTwoGB = 0x7E000000UL; constexpr uintptr_t kTopDownStart = 0x7F000000UL; // Just below 2GB constexpr uintptr_t kTwoGB = 0x80000000UL; #endif -constexpr std::size_t kGuestArenaSize = 512ULL * 1024ULL * 1024ULL; // 512 MiB +constexpr std::size_t kGuestArenaSize = 64ULL * 1024ULL * 1024ULL; // 64 MiB +constexpr std::size_t kArenaMaxObjSize = 8ULL * 1024ULL * 1024ULL; // 8 MiB constexpr std::size_t kVirtualAllocationGranularity = 64ULL * 1024ULL; -struct ArenaRange { +struct Arena { + mi_arena_id_t arenaId = nullptr; void *start = nullptr; - std::size_t size = 0; + size_t size = 0; }; -// Guest arena (<2GB) -ArenaRange g_guest; -mi_arena_id_t g_guestArenaId = nullptr; -thread_local mi_heap_t *g_guestHeap = nullptr; +std::recursive_mutex g_arenasMutex; +std::vector g_arenas; +std::atomic_uint32_t g_heapTag(1); -bool g_initialized = false; -std::once_flag g_initOnce; +// Each thread gets its own set of mi_heap objects corresponding to each allocated arena +thread_local wibo::detail::HeapInternal g_guestHeap(0); std::mutex g_mappingsMutex; std::map *g_mappings = nullptr; @@ -386,6 +388,24 @@ bool findFreeMappingLocked(std::size_t size, uintptr_t minAddr, uintptr_t maxAdd return true; }; + bool foundTopCandidate = false; + uintptr_t bestCandidate = 0; + auto considerGap = [&](uintptr_t gapStart, uintptr_t gapEnd) -> bool { + uintptr_t candidate = 0; + if (!tryGap(gapStart, gapEnd, candidate)) { + return false; + } + if (!preferTop) { + *outAddr = candidate; + return true; + } + if (!foundTopCandidate || candidate > bestCandidate) { + bestCandidate = candidate; + foundTopCandidate = true; + } + return false; + }; + uintptr_t cursor = alignUp(searchMin, granularity); for (auto &g_mapping : *g_mappings) { uintptr_t mapStart = g_mapping.first; @@ -395,7 +415,7 @@ bool findFreeMappingLocked(std::size_t size, uintptr_t minAddr, uintptr_t maxAdd continue; } if (mapStart >= searchMax) { - if (tryGap(cursor, searchMax, *outAddr)) { + if (considerGap(cursor, searchMax)) { return true; } break; @@ -403,7 +423,7 @@ bool findFreeMappingLocked(std::size_t size, uintptr_t minAddr, uintptr_t maxAdd if (mapStart > cursor) { uintptr_t gapEnd = std::min(mapStart, searchMax); - if (tryGap(cursor, gapEnd, *outAddr)) { + if (considerGap(cursor, gapEnd)) { return true; } } @@ -418,17 +438,20 @@ bool findFreeMappingLocked(std::size_t size, uintptr_t minAddr, uintptr_t maxAdd } if (cursor < searchMax) { - if (tryGap(cursor, searchMax, *outAddr)) { + if (considerGap(cursor, searchMax)) { return true; } } + if (foundTopCandidate) { + *outAddr = bestCandidate; + return true; + } return false; } -bool mapArena(std::size_t size, uintptr_t minAddr, uintptr_t maxAddr, bool preferTop, const char *name, - ArenaRange &out) { - std::lock_guard guard(g_mappingsMutex); +bool mapArena(std::size_t size, uintptr_t minAddr, uintptr_t maxAddr, bool preferTop, const char *name, Arena &out) { + std::lock_guard lk(g_mappingsMutex); const std::size_t ps = wibo::heap::systemPageSize(); size = (size + ps - 1) & ~(ps - 1); uintptr_t cand = 0; @@ -444,46 +467,217 @@ bool mapArena(std::size_t size, uintptr_t minAddr, uintptr_t maxAddr, bool prefe return false; } -void initializeImpl() { - if (g_initialized) { - return; +bool createArenaLocked(size_t size) { + Arena arena; + if (!mapArena(size, kLowMemoryStart, kHeapMax, true, "wibo heap arena", arena)) { + DEBUG_LOG("heap: failed to find free mapping for arena\n"); + return false; } + if (!mi_manage_os_memory_ex(arena.start, arena.size, + /*is_committed*/ false, + /*is_pinned*/ false, + /*is_zero*/ true, + /*numa_node*/ -1, + /*exclusive*/ true, &arena.arenaId)) { + DEBUG_LOG("heap: failed to create mi_arena\n"); + return false; + } + DEBUG_LOG("heap: created arena %d at %p..%p (%zu MiB)\n", arena.arenaId, arena.start, + reinterpret_cast(arena.start) + arena.size, arena.size >> 20); + g_arenas.push_back(arena); + return true; +} - // Map and register guest arena (below 2GB, exclusive) - ArenaRange guest; - if (mapArena(kGuestArenaSize, kLowMemoryStart, kHeapMax, true, "wibo guest arena", guest)) { - bool ok = mi_manage_os_memory_ex(guest.start, guest.size, - /*is_committed*/ false, - /*is_pinned*/ false, - /*is_zero*/ true, - /*numa_node*/ -1, - /*exclusive*/ true, &g_guestArenaId); - if (ok) { - g_guest = guest; - } else { - LOG_ERR("heap: failed to register guest arena with mimalloc\n"); +mi_heap_t *heapForArena(std::vector &heaps, uint32_t arenaIdx, uint32_t heapTag) { + if (heaps.size() <= arenaIdx) { + heaps.resize(arenaIdx + 1, nullptr); + } + if (heaps[arenaIdx] == nullptr) { + mi_arena_id_t arenaId; + { + std::lock_guard lk(g_arenasMutex); + if (arenaIdx >= g_arenas.size()) { + return nullptr; + } + arenaId = g_arenas[arenaIdx].arenaId; + } + mi_heap_t *h = mi_heap_new_ex(static_cast(heapTag), heapTag != 0, arenaId); + if (h == nullptr) { + return nullptr; + } + heaps[arenaIdx] = h; + } + return heaps[arenaIdx]; +} + +template +inline auto tryWithArena(wibo::detail::HeapInternal &internal, uint32_t arenaIdx, CallbackFn &&cb) + -> std::invoke_result_t { + mi_heap_t *heap = heapForArena(internal.heaps, arenaIdx, internal.heapTag); + if (!heap) { + return {}; + } + return std::forward(cb)(heap, arenaIdx); +} + +template +inline auto tryWithAnyArena(wibo::detail::HeapInternal &internal, CallbackFn &&cb) + -> std::invoke_result_t { + using R = std::invoke_result_t; + R ret = tryWithArena(internal, internal.arenaHint, cb); + if (ret) { + return ret; + } + // Loop without locking (arenas won't be removed) + uint32_t numArenas = static_cast(g_arenas.size()); + for (uint32_t i = 0; i < numArenas; ++i) { + if (i == internal.arenaHint) { + continue; + } + ret = tryWithArena(internal, i, cb); + if (ret) { + internal.arenaHint = i; + return ret; } } - if (g_guest.size) { - DEBUG_LOG("heap: initialized guest arena %p..%p (%zu MiB) id=%p\n", g_guest.start, - static_cast(static_cast(g_guest.start) + g_guest.size), g_guest.size >> 20, - g_guestArenaId); - } else { - DEBUG_LOG("heap: guest arena initialization incomplete\n"); + std::lock_guard lk(g_arenasMutex); + // Was a new arena created while we were looping? + for (uint32_t i = numArenas; i < g_arenas.size(); ++i) { + ret = tryWithArena(internal, i, cb); + if (ret) { + internal.arenaHint = i; + return ret; + } } + DEBUG_LOG("heap: no arena available, creating new arena\n"); + if (createArenaLocked(kGuestArenaSize)) { + uint32_t newArenaIdx = static_cast(g_arenas.size() - 1); + ret = tryWithArena(internal, newArenaIdx, cb); + if (ret) { + internal.arenaHint = newArenaIdx; + return ret; + } + } + return {}; +} - g_initialized = true; +void *doAlloc(wibo::detail::HeapInternal &internal, size_t size, bool zero) { + if (size >= kArenaMaxObjSize) { + DEBUG_LOG("heap: large malloc %zu bytes, using virtualAlloc\n", size); + void *addr = nullptr; + const auto result = wibo::heap::virtualAlloc(&addr, &size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); + if (result != wibo::heap::VmStatus::Success) { + return nullptr; + } + return addr; + } + return tryWithAnyArena(internal, [size, zero](mi_heap_t *heap, uint32_t) { + return (zero ? mi_heap_zalloc_aligned : mi_heap_malloc_aligned)(heap, size, 8); + }); +} + +void *doRealloc(wibo::detail::HeapInternal &internal, void *ptr, size_t newSize, bool zero) { + bool isInHeap = mi_is_in_heap_region(ptr); + if (newSize >= kArenaMaxObjSize || !isInHeap) { + DEBUG_LOG("heap: large realloc %zu bytes, using virtualAlloc\n", newSize); + size_t oldSize; + if (isInHeap) { + oldSize = mi_usable_size(ptr); + } else { + // Get size from virtualQuery + MEMORY_BASIC_INFORMATION info; + auto result = wibo::heap::virtualQuery(ptr, &info); + if (result != wibo::heap::VmStatus::Success) { + return nullptr; + } + oldSize = info.RegionSize; + } + void *ret = nullptr; + if (newSize >= kArenaMaxObjSize) { + auto result = wibo::heap::virtualAlloc(&ret, &newSize, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); + if (result != wibo::heap::VmStatus::Success) { + return nullptr; + } + } else { + ret = doAlloc(internal, newSize, zero); + } + if (!ret) { + return nullptr; + } + std::memcpy(ret, ptr, std::min(oldSize, newSize)); + if (isInHeap) { + mi_free(ptr); + } else { + auto result = wibo::heap::virtualFree(ptr, 0, MEM_RELEASE); + if (result != wibo::heap::VmStatus::Success) { + return nullptr; + } + } + return ret; + } + return tryWithAnyArena(internal, [ptr, newSize, zero](mi_heap_t *heap, uint32_t) { + return (zero ? mi_heap_rezalloc_aligned : mi_heap_realloc_aligned)(heap, ptr, newSize, 8); + }); +} + +bool doFree(void *ptr) { + if (ptr == nullptr) { + return false; + } + if (mi_is_in_heap_region(ptr)) { + mi_free(ptr); + } else { + DEBUG_LOG("heap: free(%p) -> virtualFree\n", ptr); + auto result = wibo::heap::virtualFree(ptr, 0, MEM_RELEASE); + if (result != wibo::heap::VmStatus::Success) { + return false; + } + } + return true; } } // anonymous namespace -namespace wibo::heap { +namespace wibo { -bool initialize() { - std::call_once(g_initOnce, initializeImpl); - return g_initialized; +Heap::Heap() : threadId(getThreadId()), internal(g_heapTag++) {} + +Heap::~Heap() { + if (getThreadId() != threadId) { + DEBUG_LOG("heap: ~Heap() failed; heap owned by another thread\n"); + return; + } + for (mi_heap_t *h : internal.heaps) { + if (h) { + mi_heap_destroy(h); + } + } + internal.heaps.clear(); } +void *Heap::malloc(size_t size, bool zero) { + if (getThreadId() != threadId) { + DEBUG_LOG("heap: malloc(%zu) failed; heap owned by another thread\n", size); + return nullptr; + } + return doAlloc(internal, size, zero); +} + +void *Heap::realloc(void *ptr, size_t newSize, bool zero) { + if (getThreadId() != threadId) { + DEBUG_LOG("heap: realloc(%p, %zu) failed; heap owned by another thread\n", ptr, newSize); + return nullptr; + } + return doRealloc(internal, ptr, newSize, zero); +} + +// NOLINTNEXTLINE(readability-convert-member-functions-to-static) +bool Heap::free(void *ptr) { return doFree(ptr); } + +}; // namespace wibo + +namespace wibo::heap { + uintptr_t systemPageSize() { static uintptr_t cached = []() { long detected = sysconf(_SC_PAGESIZE); @@ -495,34 +689,25 @@ uintptr_t systemPageSize() { return cached; } -mi_heap_t *getGuestHeap() { - initialize(); - if (g_guestHeap == nullptr) { - g_guestHeap = createGuestHeap(); - } - return g_guestHeap; -} +void *guestMalloc(std::size_t size, bool zero) { return doAlloc(g_guestHeap, size, zero); } -mi_heap_t *createGuestHeap() { - initialize(); - if (g_guestArenaId != nullptr) { - if (mi_heap_t *h = mi_heap_new_ex(0, true, g_guestArenaId)) { - DEBUG_LOG("heap: created guest heap in arena %p\n", g_guestArenaId); - return h; +void *guestRealloc(void *ptr, std::size_t newSize, bool zero) { return doRealloc(g_guestHeap, ptr, newSize, zero); } + +bool guestFree(void *ptr) { return doFree(ptr); } + +size_t guestSize(const void *ptr) { + if (mi_is_in_heap_region(ptr)) { + return mi_usable_size(ptr); + } else { + MEMORY_BASIC_INFORMATION info; + auto result = wibo::heap::virtualQuery(ptr, &info); + if (result != wibo::heap::VmStatus::Success) { + return SIZE_MAX; } + return info.RegionSize; } - DEBUG_LOG("heap: created guest heap without arena\n"); - return mi_heap_new(); } -void *guestMalloc(std::size_t size) { return mi_heap_malloc(getGuestHeap(), size); } - -void *guestCalloc(std::size_t count, std::size_t size) { return mi_heap_calloc(getGuestHeap(), count, size); } - -void *guestRealloc(void *ptr, std::size_t newSize) { return mi_heap_realloc(getGuestHeap(), ptr, newSize); } - -void guestFree(void *ptr) { mi_free(ptr); } - uintptr_t allocationGranularity() { return kVirtualAllocationGranularity; } DWORD win32ErrorFromVmStatus(VmStatus status) { @@ -1041,8 +1226,8 @@ bool reserveGuestStack(std::size_t stackSizeBytes, void **outStackLimit, void ** const std::size_t ps = systemPageSize(); std::size_t total = ((stackSizeBytes + (ps * 2) - 1) & ~(ps - 1)); - ArenaRange r; - if (!mapArena(total, kTopDownStart, kTwoGB, true, "wibo guest stack", r)) { + Arena r; + if (!mapArena(total, kLowMemoryStart, kTwoGB, true, "wibo guest stack", r)) { DEBUG_LOG("heap: reserveGuestStack: failed to map region\n"); return false; } @@ -1174,8 +1359,8 @@ static size_t blockLower2GB(MEMORY_BASIC_INFORMATION mappings[MAX_NUM_MAPPINGS]) if (mapStart >= kTwoGB) { break; } - if (mapStart + mapEnd > kTwoGB) { - mapEnd = kTwoGB - mapStart; + if (mapEnd > kTwoGB) { + mapEnd = kTwoGB; } if (mapStart == mapEnd || mapStart > mapEnd) { continue; @@ -1190,7 +1375,6 @@ static size_t blockLower2GB(MEMORY_BASIC_INFORMATION mappings[MAX_NUM_MAPPINGS]) // Extend the previous mapping prevMapping.RegionSize = mapEnd - prevMapStart; lastMapEnd = mapEnd; - procLine = procLine.substr(newline + 1); continue; } } diff --git a/src/heap.h b/src/heap.h index 90eeb5b..d1fb8a2 100644 --- a/src/heap.h +++ b/src/heap.h @@ -7,17 +7,53 @@ #include #include #include +#include struct mi_heap_s; typedef struct mi_heap_s mi_heap_t; +namespace wibo { + +namespace detail { + +struct HeapInternal { + uint32_t heapTag; + // Previously used arena + uint32_t arenaHint = 0; + // Thread-local mi_heap objects per arena + std::vector heaps; + + explicit HeapInternal(uint32_t heapTag) : heapTag(heapTag) {} + ~HeapInternal() = default; +}; + +}; // namespace detail + +class Heap { + public: + Heap(); + ~Heap(); + + Heap(const Heap &) = delete; + Heap &operator=(const Heap &) = delete; + Heap(Heap &&) noexcept = default; + Heap &operator=(Heap &&) noexcept = default; + + void *malloc(size_t size, bool zero = false); + void *realloc(void *ptr, size_t newSize, bool zero = false); + bool free(void *ptr); + + private: + uint32_t threadId; + detail::HeapInternal internal; +}; + +}; // namespace wibo + namespace wibo::heap { -bool initialize(); uintptr_t systemPageSize(); uintptr_t allocationGranularity(); -mi_heap_t *getGuestHeap(); -mi_heap_t *createGuestHeap(); enum class VmStatus : uint32_t { Success = 0, @@ -31,10 +67,10 @@ enum class VmStatus : uint32_t { }; // Guest heap memory allocation helpers -void *guestMalloc(std::size_t size); -void *guestCalloc(std::size_t count, std::size_t size); -void *guestRealloc(void *ptr, std::size_t newSize); -void guestFree(void *ptr); +void *guestMalloc(std::size_t size, bool zero = false); +void *guestRealloc(void *ptr, std::size_t newSize, bool zero = false); +bool guestFree(void *ptr); +size_t guestSize(const void *ptr); VmStatus virtualAlloc(void **baseAddress, std::size_t *regionSize, DWORD allocationType, DWORD protect, DWORD type = MEM_PRIVATE); diff --git a/src/main.cpp b/src/main.cpp index 82e3955..bccce60 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -45,7 +45,7 @@ void wibo::debug_log(const char *fmt, ...) { } TEB *wibo::allocateTib() { - auto *newTib = static_cast(wibo::heap::guestCalloc(1, sizeof(TEB))); + auto *newTib = static_cast(wibo::heap::guestMalloc(sizeof(TEB), true)); if (!newTib) { return nullptr; } @@ -317,11 +317,11 @@ int main(int argc, char **argv) { files::init(); // Create PEB - PEB *peb = reinterpret_cast(wibo::heap::guestCalloc(1, sizeof(PEB))); - peb->ProcessParameters = toGuestPtr(wibo::heap::guestCalloc(1, sizeof(RTL_USER_PROCESS_PARAMETERS))); + PEB *peb = reinterpret_cast(wibo::heap::guestMalloc(sizeof(PEB), true)); + peb->ProcessParameters = toGuestPtr(wibo::heap::guestMalloc(sizeof(RTL_USER_PROCESS_PARAMETERS), true)); // Create TIB - TEB *tib = reinterpret_cast(wibo::heap::guestCalloc(1, sizeof(TEB))); + TEB *tib = reinterpret_cast(wibo::heap::guestMalloc(sizeof(TEB), true)); wibo::tls::initializeTib(tib); tib->Tib.Self = toGuestPtr(tib); tib->Peb = toGuestPtr(peb); diff --git a/src/tls.cpp b/src/tls.cpp index 67fd2b8..7322de3 100644 --- a/src/tls.cpp +++ b/src/tls.cpp @@ -34,7 +34,7 @@ TlsArray *allocateTlsArray(size_t capacity) { return nullptr; } const size_t bytes = sizeof(TlsArray) + capacity * sizeof(GUEST_PTR); - auto *arr = static_cast(wibo::heap::guestCalloc(1, bytes)); + auto *arr = static_cast(wibo::heap::guestMalloc(bytes, true)); if (!arr) { return nullptr; } diff --git a/test/test_heap.c b/test/test_heap.c index c19c5a0..ac78557 100644 --- a/test/test_heap.c +++ b/test/test_heap.c @@ -1,60 +1,109 @@ -#include #include #include #include +#include #include "test_assert.h" -int main(void) { - HANDLE processHeap = GetProcessHeap(); - TEST_CHECK(processHeap != NULL); +const SIZE_T LARGE_BLOCK_SIZE = 64ULL * 1024ULL * 1024ULL; // 64 MiB +const SIZE_T SMALL_BLOCK_SIZE = 64ULL * 1024ULL; // 64 KiB - uint8_t *block = (uint8_t *)HeapAlloc(processHeap, HEAP_ZERO_MEMORY, 32); - TEST_CHECK(block != NULL); - for (size_t i = 0; i < 32; i++) { - TEST_CHECK(block[i] == 0); - } +static void test_basics() { + HANDLE processHeap = GetProcessHeap(); + TEST_CHECK(processHeap != NULL); - SIZE_T blockSize = HeapSize(processHeap, 0, block); - TEST_CHECK(blockSize >= 32); + uint8_t *block = (uint8_t *)HeapAlloc(processHeap, HEAP_ZERO_MEMORY, 32); + TEST_CHECK(block != NULL); + for (size_t i = 0; i < 32; i++) { + TEST_CHECK(block[i] == 0); + } - memset(block, 0xAA, 16); - uint8_t *grown = (uint8_t *)HeapReAlloc(processHeap, HEAP_ZERO_MEMORY, block, 64); - TEST_CHECK(grown != NULL); - for (size_t i = 0; i < 16; i++) { - TEST_CHECK(grown[i] == 0xAA); - } - for (size_t i = 16; i < 64; i++) { - TEST_CHECK(grown[i] == 0); - } + SIZE_T blockSize = HeapSize(processHeap, 0, block); + TEST_CHECK(blockSize >= 32); - SetLastError(0); - void *inPlace = HeapReAlloc(processHeap, HEAP_REALLOC_IN_PLACE_ONLY, grown, 2048); - TEST_CHECK(inPlace == NULL); - TEST_CHECK_EQ(ERROR_NOT_ENOUGH_MEMORY, GetLastError()); + memset(block, 0xAA, 16); + uint8_t *grown = (uint8_t *)HeapReAlloc(processHeap, HEAP_ZERO_MEMORY, block, 64); + TEST_CHECK(grown != NULL); + for (size_t i = 0; i < 16; i++) { + TEST_CHECK(grown[i] == 0xAA); + } + for (size_t i = 16; i < 64; i++) { + TEST_CHECK(grown[i] == 0); + } - TEST_CHECK(HeapFree(processHeap, 0, grown)); + SetLastError(0); + void *inPlace = HeapReAlloc(processHeap, HEAP_REALLOC_IN_PLACE_ONLY, grown, 2048); + TEST_CHECK(inPlace == NULL); + TEST_CHECK_EQ(ERROR_NOT_ENOUGH_MEMORY, GetLastError()); - HANDLE privateHeap = HeapCreate(HEAP_CREATE_ENABLE_EXECUTE, 0, 0); - TEST_CHECK(privateHeap != NULL); + TEST_CHECK(HeapFree(processHeap, 0, grown)); - void *privateBlock = HeapAlloc(privateHeap, 0, 8); - TEST_CHECK(privateBlock != NULL); + HANDLE privateHeap = HeapCreate(HEAP_CREATE_ENABLE_EXECUTE, 0, 0); + TEST_CHECK(privateHeap != NULL); - SetLastError(0); - // Disabled temporarily; no good way to detect individual heap allocations - // in mimalloc currently. See https://github.com/microsoft/mimalloc/issues/298 + void *privateBlock = HeapAlloc(privateHeap, 0, 8); + TEST_CHECK(privateBlock != NULL); + + SetLastError(0); + // Disabled temporarily; no good way to detect individual heap allocations + // in mimalloc currently. See https://github.com/microsoft/mimalloc/issues/298 #if 0 TEST_CHECK(!HeapFree(processHeap, 0, privateBlock)); TEST_CHECK_EQ(ERROR_INVALID_PARAMETER, GetLastError()); #endif - TEST_CHECK(HeapFree(privateHeap, 0, privateBlock)); - TEST_CHECK(HeapDestroy(privateHeap)); + TEST_CHECK(HeapFree(privateHeap, 0, privateBlock)); + TEST_CHECK(HeapDestroy(privateHeap)); - SetLastError(0); - TEST_CHECK(!HeapDestroy(processHeap)); - TEST_CHECK_EQ(ERROR_INVALID_HANDLE, GetLastError()); - - return EXIT_SUCCESS; + SetLastError(0); + TEST_CHECK(!HeapDestroy(processHeap)); + TEST_CHECK_EQ(ERROR_INVALID_HANDLE, GetLastError()); +} + +static void test_large_alloc() { + HANDLE heap = HeapCreate(0, 0, 0); + TEST_CHECK(heap != NULL); + + // Test allocating a large block + void *largeBlock = HeapAlloc(heap, 0, LARGE_BLOCK_SIZE); + *(uint32_t *)largeBlock = 0x12345678; + TEST_CHECK(largeBlock != NULL); + SIZE_T blockSize = HeapSize(heap, 0, largeBlock); + TEST_CHECK(blockSize >= LARGE_BLOCK_SIZE); + + // Test reallocating a large block to a smaller size + void *smallBlock = HeapReAlloc(heap, 0, largeBlock, SMALL_BLOCK_SIZE); + TEST_CHECK(smallBlock != NULL); + TEST_CHECK(*(uint32_t *)smallBlock == 0x12345678); + blockSize = HeapSize(heap, 0, smallBlock); + TEST_CHECK(blockSize >= SMALL_BLOCK_SIZE); + + // Test reallocating a small block to a larger size + largeBlock = HeapReAlloc(heap, 0, smallBlock, LARGE_BLOCK_SIZE); + TEST_CHECK(largeBlock != NULL); + TEST_CHECK(*(uint32_t *)largeBlock == 0x12345678); + TEST_CHECK(HeapFree(heap, 0, largeBlock)); + + TEST_CHECK(HeapDestroy(heap)); +} + +static void test_heap_expansion() { + HANDLE heap = HeapCreate(0, 0, 0); + TEST_CHECK(heap != NULL); + + // Test allocating a total of 768 MiB + const SIZE_T TOTAL_SIZE = 768ULL * 1024ULL * 1024ULL; + for (int i = 0; i < (int)(TOTAL_SIZE / SMALL_BLOCK_SIZE); i++) { + void *block = HeapAlloc(heap, 0, SMALL_BLOCK_SIZE); + TEST_CHECK(block != NULL); + } + + TEST_CHECK(HeapDestroy(heap)); +} + +int main(void) { + test_basics(); + test_large_alloc(); + test_heap_expansion(); + return EXIT_SUCCESS; }