#include "common.hpp" #include "../internal.hpp" #include "../webgpu/gpu.hpp" #include "model/shader.hpp" #include "stream/shader.hpp" #include "texture.hpp" #include #include #include #include #include #include #include namespace aurora::gfx { static Module Log("aurora::gfx"); using webgpu::g_device; using webgpu::g_queue; #ifdef AURORA_GFX_DEBUG_GROUPS std::vector g_debugGroupStack; #endif constexpr uint64_t UniformBufferSize = 3145728; // 3mb constexpr uint64_t VertexBufferSize = 3145728; // 3mb constexpr uint64_t IndexBufferSize = 1048576; // 1mb constexpr uint64_t StorageBufferSize = 8388608; // 8mb constexpr uint64_t TextureUploadSize = 25165824; // 24mb constexpr uint64_t StagingBufferSize = UniformBufferSize + VertexBufferSize + IndexBufferSize + StorageBufferSize + TextureUploadSize; struct ShaderState { stream::State stream; model::State model; }; struct ShaderDrawCommand { ShaderType type; union { stream::DrawData stream; model::DrawData model; }; }; enum class CommandType { SetViewport, SetScissor, Draw, }; struct Command { CommandType type; #ifdef AURORA_GFX_DEBUG_GROUPS std::vector debugGroupStack; #endif union Data { struct SetViewportCommand { float left; float top; float width; float height; float znear; float zfar; bool operator==(const SetViewportCommand& rhs) const { return left == rhs.left && top == rhs.top && width == rhs.width && height == rhs.height && znear == rhs.znear && zfar == rhs.zfar; } bool operator!=(const SetViewportCommand& rhs) const { return !(*this == rhs); } } setViewport; struct SetScissorCommand { uint32_t x; uint32_t y; uint32_t w; uint32_t h; bool operator==(const SetScissorCommand& rhs) const { return x == rhs.x && y == rhs.y && w == rhs.w && h == rhs.h; } bool operator!=(const SetScissorCommand& rhs) const { return !(*this == rhs); } } setScissor; ShaderDrawCommand draw; } data; }; } // namespace aurora::gfx namespace aurora { // For types that we can't ensure are safe to hash with has_unique_object_representations, // we create specialized methods to handle them. Note that these are highly dependent on // the structure definition, which could easily change with Dawn updates. template <> inline HashType xxh3_hash(const WGPUBindGroupDescriptor& input, HashType seed) { constexpr auto offset = sizeof(void*) * 2; // skip nextInChain, label const auto hash = xxh3_hash_s(reinterpret_cast(&input) + offset, sizeof(WGPUBindGroupDescriptor) - offset - sizeof(void*) /* skip entries */, seed); return xxh3_hash_s(input.entries, sizeof(WGPUBindGroupEntry) * input.entryCount, hash); } template <> inline HashType xxh3_hash(const WGPUSamplerDescriptor& input, HashType seed) { constexpr auto offset = sizeof(void*) * 2; // skip nextInChain, label return xxh3_hash_s(reinterpret_cast(&input) + offset, sizeof(WGPUSamplerDescriptor) - offset - 2 /* skip padding */, seed); } } // namespace aurora namespace aurora::gfx { using NewPipelineCallback = std::function; std::mutex g_pipelineMutex; static bool g_hasPipelineThread = false; static std::thread g_pipelineThread; static std::atomic_bool g_pipelineThreadEnd; static std::condition_variable g_pipelineCv; static absl::flat_hash_map g_pipelines; static std::deque> g_queuedPipelines; static absl::flat_hash_map g_cachedBindGroups; static absl::flat_hash_map g_cachedSamplers; std::atomic_uint32_t queuedPipelines; std::atomic_uint32_t createdPipelines; static ByteBuffer g_verts; static ByteBuffer g_uniforms; static ByteBuffer g_indices; static ByteBuffer g_storage; static ByteBuffer g_textureUpload; WGPUBuffer g_vertexBuffer; WGPUBuffer g_uniformBuffer; WGPUBuffer g_indexBuffer; WGPUBuffer g_storageBuffer; static std::array g_stagingBuffers; static WGPUSupportedLimits g_cachedLimits; static ShaderState g_state; static PipelineRef g_currentPipeline; // for imgui debug size_t g_drawCallCount; size_t g_mergedDrawCallCount; size_t g_lastVertSize; size_t g_lastUniformSize; size_t g_lastIndexSize; size_t g_lastStorageSize; using CommandList = std::vector; struct ClipRect { int32_t x; int32_t y; int32_t width; int32_t height; }; struct RenderPass { u32 resolveTarget = UINT32_MAX; ClipRect resolveRect; Vec4 clearColor{0.f, 0.f, 0.f, 0.f}; CommandList commands; bool clear = true; }; static std::vector g_renderPasses; static u32 g_currentRenderPass = UINT32_MAX; std::vector g_resolvedTextures; std::vector g_textureUploads; static ByteBuffer g_serializedPipelines{}; static u32 g_serializedPipelineCount = 0; template static void serialize_pipeline_config(ShaderType type, const PipelineConfig& config) { static_assert(std::has_unique_object_representations_v); g_serializedPipelines.append(&type, sizeof(type)); const u32 configSize = sizeof(config); g_serializedPipelines.append(&configSize, sizeof(configSize)); g_serializedPipelines.append(&config, configSize); ++g_serializedPipelineCount; } template static PipelineRef find_pipeline(ShaderType type, const PipelineConfig& config, NewPipelineCallback&& cb, bool serialize = true) { PipelineRef hash = xxh3_hash(config, static_cast(type)); bool found = false; { std::scoped_lock guard{g_pipelineMutex}; found = g_pipelines.contains(hash); if (!found) { if (g_hasPipelineThread) { const auto ref = std::find_if(g_queuedPipelines.begin(), g_queuedPipelines.end(), [=](auto v) { return v.first == hash; }); if (ref != g_queuedPipelines.end()) { found = true; } } else { g_pipelines.try_emplace(hash, cb()); if (serialize) { serialize_pipeline_config(type, config); } found = true; } } if (!found) { g_queuedPipelines.emplace_back(std::pair{hash, std::move(cb)}); if (serialize) { serialize_pipeline_config(type, config); } } } if (!found) { g_pipelineCv.notify_one(); queuedPipelines++; } return hash; } static inline void push_command(CommandType type, const Command::Data& data) { if (g_currentRenderPass == UINT32_MAX) { Log.report(LOG_WARNING, FMT_STRING("Dropping command {}"), magic_enum::enum_name(type)); return; } g_renderPasses[g_currentRenderPass].commands.push_back({ .type = type, #ifdef AURORA_GFX_DEBUG_GROUPS .debugGroupStack = g_debugGroupStack, #endif .data = data, }); } static inline Command& get_last_draw_command(ShaderType type) { if (g_currentRenderPass == UINT32_MAX) { Log.report(LOG_FATAL, FMT_STRING("No last command")); unreachable(); } auto& last = g_renderPasses[g_currentRenderPass].commands.back(); if (last.type != CommandType::Draw || last.data.draw.type != type) { Log.report(LOG_FATAL, FMT_STRING("Last command invalid: {} {}, expected {} {}"), magic_enum::enum_name(last.type), magic_enum::enum_name(last.data.draw.type), magic_enum::enum_name(CommandType::Draw), magic_enum::enum_name(type)); unreachable(); } return last; } static void push_draw_command(ShaderDrawCommand data) { push_command(CommandType::Draw, Command::Data{.draw = data}); ++g_drawCallCount; } static Command::Data::SetViewportCommand g_cachedViewport; void set_viewport(float left, float top, float width, float height, float znear, float zfar) noexcept { Command::Data::SetViewportCommand cmd{left, top, width, height, znear, zfar}; if (cmd != g_cachedViewport) { push_command(CommandType::SetViewport, Command::Data{.setViewport = cmd}); g_cachedViewport = cmd; } } static Command::Data::SetScissorCommand g_cachedScissor; void set_scissor(uint32_t x, uint32_t y, uint32_t w, uint32_t h) noexcept { Command::Data::SetScissorCommand cmd{x, y, w, h}; if (cmd != g_cachedScissor) { push_command(CommandType::SetScissor, Command::Data{.setScissor = cmd}); g_cachedScissor = cmd; } } static inline bool operator==(const WGPUExtent3D& lhs, const WGPUExtent3D& rhs) { return lhs.width == rhs.width && lhs.height == rhs.height && lhs.depthOrArrayLayers == rhs.depthOrArrayLayers; } static inline bool operator!=(const WGPUExtent3D& lhs, const WGPUExtent3D& rhs) { return !(lhs == rhs); } void resolve_color(const ClipRect& rect, uint32_t bind, GXTexFmt fmt, bool clear_depth) noexcept { if (g_resolvedTextures.size() < bind + 1) { g_resolvedTextures.resize(bind + 1); } const WGPUExtent3D size{ .width = static_cast(rect.width), .height = static_cast(rect.height), .depthOrArrayLayers = 1, }; if (!g_resolvedTextures[bind] || g_resolvedTextures[bind]->size != size) { g_resolvedTextures[bind] = new_render_texture(rect.width, rect.height, fmt, "Resolved Texture"); } auto& currentPass = g_renderPasses[g_currentRenderPass]; currentPass.resolveTarget = bind; currentPass.resolveRect = rect; auto& newPass = g_renderPasses.emplace_back(); newPass.clearColor = gx::g_gxState.clearColor; newPass.clear = false; // TODO ++g_currentRenderPass; } template <> const stream::State& get_state() { return g_state.stream; } template <> void push_draw_command(stream::DrawData data) { push_draw_command(ShaderDrawCommand{.type = ShaderType::Stream, .stream = data}); } template <> void merge_draw_command(stream::DrawData data) { auto& last = get_last_draw_command(ShaderType::Stream).data.draw.stream; if (last.vertRange.offset + last.vertRange.size != data.vertRange.offset) { Log.report(LOG_FATAL, FMT_STRING("Invalid merge range: {} -> {}"), last.vertRange.offset + last.vertRange.size, data.vertRange.offset); } if (last.indexRange.offset + last.indexRange.size != data.indexRange.offset) { Log.report(LOG_FATAL, FMT_STRING("Invalid merge range: {} -> {}"), last.indexRange.offset + last.indexRange.size, data.indexRange.offset); } last.vertRange.size += data.vertRange.size; last.indexRange.size += data.indexRange.size; last.indexCount += data.indexCount; ++g_mergedDrawCallCount; } template <> PipelineRef pipeline_ref(stream::PipelineConfig config) { return find_pipeline(ShaderType::Stream, config, [=]() { return create_pipeline(g_state.stream, config); }); } template <> void push_draw_command(model::DrawData data) { push_draw_command(ShaderDrawCommand{.type = ShaderType::Model, .model = data}); } template <> PipelineRef pipeline_ref(model::PipelineConfig config) { return find_pipeline(ShaderType::Model, config, [=]() { return create_pipeline(g_state.model, config); }); } static void pipeline_worker() { bool hasMore = false; while (true) { std::pair cb; { std::unique_lock lock{g_pipelineMutex}; if (!hasMore) { g_pipelineCv.wait(lock, [] { return !g_queuedPipelines.empty() || g_pipelineThreadEnd; }); } if (g_pipelineThreadEnd) { break; } cb = std::move(g_queuedPipelines.front()); } auto result = cb.second(); // std::this_thread::sleep_for(std::chrono::milliseconds{1500}); { std::scoped_lock lock{g_pipelineMutex}; if (!g_pipelines.try_emplace(cb.first, std::move(result)).second) { Log.report(LOG_FATAL, FMT_STRING("Duplicate pipeline {}"), cb.first); unreachable(); } g_queuedPipelines.pop_front(); hasMore = !g_queuedPipelines.empty(); } createdPipelines++; queuedPipelines--; } } void initialize() { // No async pipelines for OpenGL (ES) if (webgpu::g_backendType == WGPUBackendType_OpenGL || webgpu::g_backendType == WGPUBackendType_OpenGLES) { g_hasPipelineThread = false; } else { g_pipelineThreadEnd = false; g_pipelineThread = std::thread(pipeline_worker); g_hasPipelineThread = true; } // For uniform & storage buffer offset alignments wgpuDeviceGetLimits(g_device, &g_cachedLimits); const auto createBuffer = [](WGPUBuffer& out, WGPUBufferUsageFlags usage, uint64_t size, const char* label) { if (size <= 0) { return; } const WGPUBufferDescriptor descriptor{ .label = label, .usage = usage, .size = size, }; out = wgpuDeviceCreateBuffer(g_device, &descriptor); }; createBuffer(g_uniformBuffer, WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst, UniformBufferSize, "Shared Uniform Buffer"); createBuffer(g_vertexBuffer, WGPUBufferUsage_Vertex | WGPUBufferUsage_CopyDst, VertexBufferSize, "Shared Vertex Buffer"); createBuffer(g_indexBuffer, WGPUBufferUsage_Index | WGPUBufferUsage_CopyDst, IndexBufferSize, "Shared Index Buffer"); createBuffer(g_storageBuffer, WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst, StorageBufferSize, "Shared Storage Buffer"); for (int i = 0; i < g_stagingBuffers.size(); ++i) { const auto label = fmt::format(FMT_STRING("Staging Buffer {}"), i); createBuffer(g_stagingBuffers[i], WGPUBufferUsage_MapWrite | WGPUBufferUsage_CopySrc, StagingBufferSize, label.c_str()); } map_staging_buffer(); g_state.stream = stream::construct_state(); g_state.model = model::construct_state(); { // Load serialized pipeline cache std::string path = std::string{g_config.configPath} + "/pipeline_cache.bin"; std::ifstream file(path, std::ios::in | std::ios::binary | std::ios::ate); if (file) { const auto size = file.tellg(); file.seekg(0, std::ios::beg); constexpr size_t headerSize = sizeof(g_serializedPipelineCount); if (size != -1 && size > headerSize) { g_serializedPipelines.append_zeroes(size_t(size) - headerSize); file.read(reinterpret_cast(&g_serializedPipelineCount), headerSize); file.read(reinterpret_cast(g_serializedPipelines.data()), size_t(size) - headerSize); } } } if (g_serializedPipelineCount > 0) { size_t offset = 0; while (offset < g_serializedPipelines.size()) { ShaderType type = *reinterpret_cast(g_serializedPipelines.data() + offset); offset += sizeof(ShaderType); u32 size = *reinterpret_cast(g_serializedPipelines.data() + offset); offset += sizeof(u32); switch (type) { case ShaderType::Stream: { if (size != sizeof(stream::PipelineConfig)) { break; } const auto config = *reinterpret_cast(g_serializedPipelines.data() + offset); if (config.version != gx::GXPipelineConfigVersion) { break; } find_pipeline( type, config, [=]() { return stream::create_pipeline(g_state.stream, config); }, false); } break; case ShaderType::Model: { if (size != sizeof(model::PipelineConfig)) { break; } const auto config = *reinterpret_cast(g_serializedPipelines.data() + offset); if (config.version != gx::GXPipelineConfigVersion) { break; } find_pipeline( type, config, [=]() { return model::create_pipeline(g_state.model, config); }, false); } break; default: Log.report(LOG_WARNING, FMT_STRING("Unknown pipeline type {}"), static_cast(type)); break; } offset += size; } } } void shutdown() { if (g_hasPipelineThread) { g_pipelineThreadEnd = true; g_pipelineCv.notify_all(); g_pipelineThread.join(); } { // Write serialized pipelines to file const auto path = std::string{g_config.configPath} + "pipeline_cache.bin"; std::ofstream file(path, std::ios::out | std::ios::trunc | std::ios::binary); if (file) { file.write(reinterpret_cast(&g_serializedPipelineCount), sizeof(g_serializedPipelineCount)); file.write(reinterpret_cast(g_serializedPipelines.data()), g_serializedPipelines.size()); } g_serializedPipelines.clear(); g_serializedPipelineCount = 0; } gx::shutdown(); g_resolvedTextures.clear(); g_textureUploads.clear(); for (const auto& item : g_cachedBindGroups) { wgpuBindGroupRelease(item.second); } g_cachedBindGroups.clear(); for (const auto& item : g_cachedSamplers) { wgpuSamplerRelease(item.second); } g_cachedSamplers.clear(); for (const auto& item : g_pipelines) { wgpuRenderPipelineRelease(item.second); } g_pipelines.clear(); g_queuedPipelines.clear(); if (g_vertexBuffer != nullptr) { wgpuBufferDestroy(g_vertexBuffer); g_vertexBuffer = nullptr; } if (g_uniformBuffer != nullptr) { wgpuBufferDestroy(g_uniformBuffer); g_uniformBuffer = nullptr; } if (g_indexBuffer != nullptr) { wgpuBufferDestroy(g_indexBuffer); g_indexBuffer = nullptr; } if (g_storageBuffer != nullptr) { wgpuBufferDestroy(g_storageBuffer); g_storageBuffer = nullptr; } for (auto& item : g_stagingBuffers) { if (item != nullptr) { wgpuBufferDestroy(item); } item = nullptr; } g_renderPasses.clear(); g_currentRenderPass = UINT32_MAX; g_state = {}; queuedPipelines = 0; createdPipelines = 0; } static size_t currentStagingBuffer = 0; static bool bufferMapped = false; void map_staging_buffer() { bufferMapped = false; wgpuBufferMapAsync( g_stagingBuffers[currentStagingBuffer], WGPUMapMode_Write, 0, StagingBufferSize, [](WGPUBufferMapAsyncStatus status, void* userdata) { if (status == WGPUBufferMapAsyncStatus_DestroyedBeforeCallback) { return; } else if (status != WGPUBufferMapAsyncStatus_Success) { Log.report(LOG_FATAL, FMT_STRING("Buffer mapping failed: {}"), status); unreachable(); } *static_cast(userdata) = true; }, &bufferMapped); } void begin_frame() { while (!bufferMapped) { wgpuDeviceTick(g_device); } size_t bufferOffset = 0; auto& stagingBuf = g_stagingBuffers[currentStagingBuffer]; const auto mapBuffer = [&](ByteBuffer& buf, uint64_t size) { if (size <= 0) { return; } buf = ByteBuffer{static_cast(wgpuBufferGetMappedRange(stagingBuf, bufferOffset, size)), static_cast(size)}; bufferOffset += size; }; mapBuffer(g_verts, VertexBufferSize); mapBuffer(g_uniforms, UniformBufferSize); mapBuffer(g_indices, IndexBufferSize); mapBuffer(g_storage, StorageBufferSize); mapBuffer(g_textureUpload, TextureUploadSize); g_drawCallCount = 0; g_mergedDrawCallCount = 0; g_renderPasses.emplace_back(); g_renderPasses[0].clearColor = gx::g_gxState.clearColor; g_currentRenderPass = 0; // push_command(CommandType::SetViewport, Command::Data{.setViewport = g_cachedViewport}); // push_command(CommandType::SetScissor, Command::Data{.setScissor = g_cachedScissor}); } void end_frame(WGPUCommandEncoder cmd) { uint64_t bufferOffset = 0; const auto writeBuffer = [&](ByteBuffer& buf, WGPUBuffer& out, uint64_t size, std::string_view label) { const auto writeSize = buf.size(); // Only need to copy this many bytes if (writeSize > 0) { wgpuCommandEncoderCopyBufferToBuffer(cmd, g_stagingBuffers[currentStagingBuffer], bufferOffset, out, 0, ALIGN(writeSize, 4)); buf.clear(); } bufferOffset += size; return writeSize; }; wgpuBufferUnmap(g_stagingBuffers[currentStagingBuffer]); g_lastVertSize = writeBuffer(g_verts, g_vertexBuffer, VertexBufferSize, "Vertex"); g_lastUniformSize = writeBuffer(g_uniforms, g_uniformBuffer, UniformBufferSize, "Uniform"); g_lastIndexSize = writeBuffer(g_indices, g_indexBuffer, IndexBufferSize, "Index"); g_lastStorageSize = writeBuffer(g_storage, g_storageBuffer, StorageBufferSize, "Storage"); { // Perform texture copies for (const auto& item : g_textureUploads) { const WGPUImageCopyBuffer buf{ .layout = WGPUTextureDataLayout{ .offset = item.layout.offset + bufferOffset, .bytesPerRow = ALIGN(item.layout.bytesPerRow, 256), .rowsPerImage = item.layout.rowsPerImage, }, .buffer = g_stagingBuffers[currentStagingBuffer], }; wgpuCommandEncoderCopyBufferToTexture(cmd, &buf, &item.tex, &item.size); } g_textureUploads.clear(); g_textureUpload.clear(); } currentStagingBuffer = (currentStagingBuffer + 1) % g_stagingBuffers.size(); map_staging_buffer(); g_currentRenderPass = UINT32_MAX; } void render(WGPUCommandEncoder cmd) { for (u32 i = 0; i < g_renderPasses.size(); ++i) { const auto& passInfo = g_renderPasses[i]; bool finalPass = i == g_renderPasses.size() - 1; if (finalPass && passInfo.resolveTarget != UINT32_MAX) { Log.report(LOG_FATAL, FMT_STRING("Final render pass must not have resolve target")); unreachable(); } const std::array attachments{ WGPURenderPassColorAttachment{ .view = webgpu::g_frameBuffer.view, .resolveTarget = webgpu::g_graphicsConfig.msaaSamples > 1 ? webgpu::g_frameBufferResolved.view : nullptr, .loadOp = passInfo.clear ? WGPULoadOp_Clear : WGPULoadOp_Load, .storeOp = WGPUStoreOp_Store, .clearColor = {NAN, NAN, NAN, NAN}, .clearValue = { .r = passInfo.clearColor.x(), .g = passInfo.clearColor.y(), .b = passInfo.clearColor.z(), .a = passInfo.clearColor.w(), }, }, }; const WGPURenderPassDepthStencilAttachment depthStencilAttachment{ .view = webgpu::g_depthBuffer.view, .depthLoadOp = passInfo.clear ? WGPULoadOp_Clear : WGPULoadOp_Load, .depthStoreOp = WGPUStoreOp_Store, .clearDepth = NAN, .depthClearValue = 1.f, }; const auto label = fmt::format(FMT_STRING("Render pass {}"), i); const WGPURenderPassDescriptor renderPassDescriptor{ .label = label.c_str(), .colorAttachmentCount = attachments.size(), .colorAttachments = attachments.data(), .depthStencilAttachment = &depthStencilAttachment, }; auto pass = wgpuCommandEncoderBeginRenderPass(cmd, &renderPassDescriptor); render_pass(pass, i); wgpuRenderPassEncoderEnd(pass); wgpuRenderPassEncoderRelease(pass); if (passInfo.resolveTarget != UINT32_MAX) { WGPUImageCopyTexture src{ .origin = WGPUOrigin3D{ .x = static_cast(passInfo.resolveRect.x), .y = static_cast(passInfo.resolveRect.y), }, }; if (webgpu::g_graphicsConfig.msaaSamples > 1) { src.texture = webgpu::g_frameBufferResolved.texture; } else { src.texture = webgpu::g_frameBuffer.texture; } auto& target = g_resolvedTextures[passInfo.resolveTarget]; const WGPUImageCopyTexture dst{ .texture = target->texture, }; const WGPUExtent3D size{ .width = static_cast(passInfo.resolveRect.width), .height = static_cast(passInfo.resolveRect.height), .depthOrArrayLayers = 1, }; wgpuCommandEncoderCopyTextureToTexture(cmd, &src, &dst, &size); } } g_renderPasses.clear(); } void render_pass(WGPURenderPassEncoder pass, u32 idx) { g_currentPipeline = UINTPTR_MAX; #ifdef AURORA_GFX_DEBUG_GROUPS std::vector lastDebugGroupStack; #endif for (const auto& cmd : g_renderPasses[idx].commands) { #ifdef AURORA_GFX_DEBUG_GROUPS { size_t firstDiff = lastDebugGroupStack.size(); for (size_t i = 0; i < lastDebugGroupStack.size(); ++i) { if (i >= cmd.debugGroupStack.size() || cmd.debugGroupStack[i] != lastDebugGroupStack[i]) { firstDiff = i; break; } } for (size_t i = firstDiff; i < lastDebugGroupStack.size(); ++i) { wgpuRenderPassEncoderPopDebugGroup(pass); } for (size_t i = firstDiff; i < cmd.debugGroupStack.size(); ++i) { wgpuRenderPassEncoderPushDebugGroup(pass, cmd.debugGroupStack[i].c_str()); } lastDebugGroupStack = cmd.debugGroupStack; } #endif switch (cmd.type) { case CommandType::SetViewport: { const auto& vp = cmd.data.setViewport; wgpuRenderPassEncoderSetViewport(pass, vp.left, vp.top, vp.width, vp.height, vp.znear, vp.zfar); } break; case CommandType::SetScissor: { const auto& sc = cmd.data.setScissor; wgpuRenderPassEncoderSetScissorRect(pass, sc.x, sc.y, sc.w, sc.h); } break; case CommandType::Draw: { const auto& draw = cmd.data.draw; switch (draw.type) { case ShaderType::Stream: stream::render(g_state.stream, draw.stream, pass); break; case ShaderType::Model: model::render(g_state.model, draw.model, pass); break; } } break; } } #ifdef AURORA_GFX_DEBUG_GROUPS for (size_t i = 0; i < lastDebugGroupStack.size(); ++i) { wgpuRenderPassEncoderPopDebugGroup(pass); } #endif } bool bind_pipeline(PipelineRef ref, WGPURenderPassEncoder pass) { if (ref == g_currentPipeline) { return true; } std::lock_guard guard{g_pipelineMutex}; const auto it = g_pipelines.find(ref); if (it == g_pipelines.end()) { return false; } wgpuRenderPassEncoderSetPipeline(pass, it->second); g_currentPipeline = ref; return true; } static inline Range push(ByteBuffer& target, const uint8_t* data, size_t length, size_t alignment) { size_t padding = 0; if (alignment != 0) { padding = alignment - length % alignment; } auto begin = target.size(); if (length == 0) { length = alignment; target.append_zeroes(alignment); } else { target.append(data, length); if (padding > 0) { target.append_zeroes(padding); } } return {static_cast(begin), static_cast(length + padding)}; } static inline Range map(ByteBuffer& target, size_t length, size_t alignment) { size_t padding = 0; if (alignment != 0) { padding = alignment - length % alignment; } if (length == 0) { length = alignment; } auto begin = target.size(); target.append_zeroes(length + padding); return {static_cast(begin), static_cast(length + padding)}; } Range push_verts(const uint8_t* data, size_t length) { return push(g_verts, data, length, 0); } Range push_indices(const uint8_t* data, size_t length) { return push(g_indices, data, length, 0); } Range push_uniform(const uint8_t* data, size_t length) { return push(g_uniforms, data, length, g_cachedLimits.limits.minUniformBufferOffsetAlignment); } Range push_storage(const uint8_t* data, size_t length) { return push(g_storage, data, length, g_cachedLimits.limits.minStorageBufferOffsetAlignment); } Range push_texture_data(const uint8_t* data, size_t length, u32 bytesPerRow, u32 rowsPerImage) { // For CopyBufferToTexture, we need an alignment of 256 per row (see Dawn kTextureBytesPerRowAlignment) const auto copyBytesPerRow = ALIGN(bytesPerRow, 256); const auto range = map(g_textureUpload, copyBytesPerRow * rowsPerImage, 0); u8* dst = g_textureUpload.data() + range.offset; for (u32 i = 0; i < rowsPerImage; ++i) { memcpy(dst, data, bytesPerRow); data += bytesPerRow; dst += copyBytesPerRow; } return range; } std::pair map_verts(size_t length) { const auto range = map(g_verts, length, 4); return {ByteBuffer{g_verts.data() + range.offset, range.size}, range}; } std::pair map_indices(size_t length) { const auto range = map(g_indices, length, 4); return {ByteBuffer{g_indices.data() + range.offset, range.size}, range}; } std::pair map_uniform(size_t length) { const auto range = map(g_uniforms, length, g_cachedLimits.limits.minUniformBufferOffsetAlignment); return {ByteBuffer{g_uniforms.data() + range.offset, range.size}, range}; } std::pair map_storage(size_t length) { const auto range = map(g_storage, length, g_cachedLimits.limits.minStorageBufferOffsetAlignment); return {ByteBuffer{g_storage.data() + range.offset, range.size}, range}; } BindGroupRef bind_group_ref(const WGPUBindGroupDescriptor& descriptor) { const auto id = xxh3_hash(descriptor); if (!g_cachedBindGroups.contains(id)) { g_cachedBindGroups.try_emplace(id, wgpuDeviceCreateBindGroup(g_device, &descriptor)); } return id; } WGPUBindGroup find_bind_group(BindGroupRef id) { const auto it = g_cachedBindGroups.find(id); if (it == g_cachedBindGroups.end()) { Log.report(LOG_FATAL, FMT_STRING("get_bind_group: failed to locate {}"), id); unreachable(); } return it->second; } WGPUSampler sampler_ref(const WGPUSamplerDescriptor& descriptor) { const auto id = xxh3_hash(descriptor); auto it = g_cachedSamplers.find(id); if (it == g_cachedSamplers.end()) { it = g_cachedSamplers.try_emplace(id, wgpuDeviceCreateSampler(g_device, &descriptor)).first; } return it->second; } uint32_t align_uniform(uint32_t value) { return ALIGN(value, g_cachedLimits.limits.minUniformBufferOffsetAlignment); } } // namespace aurora::gfx void push_debug_group(const char* label) { #ifdef AURORA_GFX_DEBUG_GROUPS aurora::gfx::g_debugGroupStack.emplace_back(label); #endif } void pop_debug_group() { #ifdef AURORA_GFX_DEBUG_GROUPS aurora::gfx::g_debugGroupStack.pop_back(); #endif }