diff --git a/src/dawn_native/d3d12/CommandBufferD3D12.cpp b/src/dawn_native/d3d12/CommandBufferD3D12.cpp index 8ee31ade37..151ab6edfc 100644 --- a/src/dawn_native/d3d12/CommandBufferD3D12.cpp +++ b/src/dawn_native/d3d12/CommandBufferD3D12.cpp @@ -567,10 +567,15 @@ namespace dawn_native { namespace d3d12 { wgpu::BufferUsage bufferUsages = wgpu::BufferUsage::None; for (size_t i = 0; i < usages.buffers.size(); ++i) { + Buffer* buffer = ToBackend(usages.buffers[i]); + + // TODO(jiawei.shao@intel.com): clear storage buffers with + // ClearUnorderedAccessView*(). + buffer->GetDevice()->ConsumedError(buffer->EnsureDataInitialized(commandContext)); + D3D12_RESOURCE_BARRIER barrier; - if (ToBackend(usages.buffers[i]) - ->TrackUsageAndGetResourceBarrier(commandContext, &barrier, - usages.bufferUsages[i])) { + if (buffer->TrackUsageAndGetResourceBarrier(commandContext, &barrier, + usages.bufferUsages[i])) { barriers.push_back(barrier); } bufferUsages |= usages.bufferUsages[i]; diff --git a/src/dawn_native/metal/CommandBufferMTL.mm b/src/dawn_native/metal/CommandBufferMTL.mm index 74b64612ec..9300f3806b 100644 --- a/src/dawn_native/metal/CommandBufferMTL.mm +++ b/src/dawn_native/metal/CommandBufferMTL.mm @@ -535,7 +535,8 @@ namespace dawn_native { namespace metal { const std::vector& passResourceUsages = GetResourceUsages().perPass; size_t nextPassNumber = 0; - auto LazyClearForPass = [](const PassResourceUsage& usages) { + auto LazyClearForPass = [](const PassResourceUsage& usages, + CommandRecordingContext* commandContext) { for (size_t i = 0; i < usages.textures.size(); ++i) { Texture* texture = ToBackend(usages.textures[i]); // Clear textures that are not output attachments. Output attachments will be @@ -545,6 +546,9 @@ namespace dawn_native { namespace metal { texture->EnsureSubresourceContentInitialized(texture->GetAllSubresources()); } } + for (BufferBase* bufferBase : usages.buffers) { + ToBackend(bufferBase)->EnsureDataInitialized(commandContext); + } }; Command type; @@ -553,7 +557,7 @@ namespace dawn_native { namespace metal { case Command::BeginComputePass: { mCommands.NextCommand(); - LazyClearForPass(passResourceUsages[nextPassNumber]); + LazyClearForPass(passResourceUsages[nextPassNumber], commandContext); commandContext->EndBlit(); DAWN_TRY(EncodeComputePass(commandContext)); @@ -565,7 +569,7 @@ namespace dawn_native { namespace metal { case Command::BeginRenderPass: { BeginRenderPassCmd* cmd = mCommands.NextCommand(); - LazyClearForPass(passResourceUsages[nextPassNumber]); + LazyClearForPass(passResourceUsages[nextPassNumber], commandContext); commandContext->EndBlit(); LazyClearRenderPassAttachments(cmd); diff --git a/src/dawn_native/opengl/CommandBufferGL.cpp b/src/dawn_native/opengl/CommandBufferGL.cpp index 51b9e7c1ec..951999fb6f 100644 --- a/src/dawn_native/opengl/CommandBufferGL.cpp +++ b/src/dawn_native/opengl/CommandBufferGL.cpp @@ -462,6 +462,10 @@ namespace dawn_native { namespace opengl { texture->EnsureSubresourceContentInitialized(texture->GetAllSubresources()); } } + + for (BufferBase* bufferBase : usages.buffers) { + ToBackend(bufferBase)->EnsureDataInitialized(); + } }; const std::vector& passResourceUsages = GetResourceUsages().perPass; diff --git a/src/dawn_native/vulkan/CommandBufferVk.cpp b/src/dawn_native/vulkan/CommandBufferVk.cpp index ba48a35c21..9e281dce9e 100644 --- a/src/dawn_native/vulkan/CommandBufferVk.cpp +++ b/src/dawn_native/vulkan/CommandBufferVk.cpp @@ -387,6 +387,7 @@ namespace dawn_native { namespace vulkan { for (size_t i = 0; i < usages.buffers.size(); ++i) { Buffer* buffer = ToBackend(usages.buffers[i]); + buffer->EnsureDataInitialized(recordingContext); buffer->TransitionUsageNow(recordingContext, usages.bufferUsages[i], &bufferBarriers, &srcStages, &dstStages); } diff --git a/src/tests/end2end/BufferZeroInitTests.cpp b/src/tests/end2end/BufferZeroInitTests.cpp index 0f685e59ed..6234496d35 100644 --- a/src/tests/end2end/BufferZeroInitTests.cpp +++ b/src/tests/end2end/BufferZeroInitTests.cpp @@ -80,7 +80,7 @@ class BufferZeroInitTest : public DawnTest { descriptor.size = size; descriptor.format = format; descriptor.usage = wgpu::TextureUsage::CopyDst | wgpu::TextureUsage::CopySrc | - wgpu::TextureUsage::OutputAttachment; + wgpu::TextureUsage::OutputAttachment | wgpu::TextureUsage::Storage; wgpu::Texture texture = device.CreateTexture(&descriptor); wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); @@ -121,10 +121,8 @@ class BufferZeroInitTest : public DawnTest { const uint64_t bufferSize = spec.bufferOffset + spec.extraBytes + utils::RequiredBytesInCopy(spec.bytesPerRow, spec.rowsPerImage, spec.textureSize, kTextureFormat); - wgpu::BufferDescriptor bufferDescriptor; - bufferDescriptor.size = bufferSize; - bufferDescriptor.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst; - wgpu::Buffer buffer = device.CreateBuffer(&bufferDescriptor); + wgpu::Buffer buffer = + CreateBuffer(bufferSize, wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst); const wgpu::BufferCopyView bufferCopyView = utils::CreateBufferCopyView( buffer, spec.bufferOffset, spec.bytesPerRow, spec.rowsPerImage); @@ -151,6 +149,44 @@ class BufferZeroInitTest : public DawnTest { EXPECT_BUFFER_FLOAT_RANGE_EQ(expectedValues.data(), buffer, 0, expectedValues.size()); } + + void TestBufferZeroInitInBindGroup(const char* computeShader, + uint64_t bufferOffset, + uint64_t boundBufferSize, + const std::vector& expectedBufferData) { + wgpu::ComputePipelineDescriptor pipelineDescriptor; + pipelineDescriptor.layout = nullptr; + pipelineDescriptor.computeStage.module = + utils::CreateShaderModule(device, utils::SingleShaderStage::Compute, computeShader); + pipelineDescriptor.computeStage.entryPoint = "main"; + wgpu::ComputePipeline pipeline = device.CreateComputePipeline(&pipelineDescriptor); + + const uint64_t bufferSize = expectedBufferData.size() * sizeof(uint32_t); + wgpu::Buffer buffer = + CreateBuffer(bufferSize, wgpu::BufferUsage::CopyDst | wgpu::BufferUsage::CopySrc | + wgpu::BufferUsage::Storage | wgpu::BufferUsage::Uniform); + wgpu::Texture outputTexture = + CreateAndInitializeTexture({1u, 1u, 1u}, wgpu::TextureFormat::RGBA8Unorm); + + wgpu::BindGroup bindGroup = utils::MakeBindGroup( + device, pipeline.GetBindGroupLayout(0), + {{0, buffer, bufferOffset, boundBufferSize}, {1u, outputTexture.CreateView()}}); + + wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); + wgpu::ComputePassEncoder computePass = encoder.BeginComputePass(); + computePass.SetBindGroup(0, bindGroup); + computePass.SetPipeline(pipeline); + computePass.Dispatch(1u); + computePass.EndPass(); + wgpu::CommandBuffer commandBuffer = encoder.Finish(); + + EXPECT_LAZY_CLEAR(1u, queue.Submit(1, &commandBuffer)); + + EXPECT_BUFFER_U32_RANGE_EQ(expectedBufferData.data(), buffer, 0, expectedBufferData.size()); + + constexpr RGBA8 kExpectedColor = {0, 255, 0, 255}; + EXPECT_PIXEL_RGBA8_EQ(kExpectedColor, outputTexture, 0u, 0u); + } }; // Test that calling writeBuffer to overwrite the entire buffer doesn't need to lazily initialize @@ -493,11 +529,7 @@ TEST_P(BufferZeroInitTest, CopyBufferToTexture) { { constexpr uint64_t kOffset = 0; const uint32_t totalBufferSize = requiredBufferSizeForCopy + kOffset; - wgpu::BufferDescriptor bufferDescriptor; - bufferDescriptor.size = totalBufferSize; - bufferDescriptor.usage = kBufferUsage; - - wgpu::Buffer buffer = device.CreateBuffer(&bufferDescriptor); + wgpu::Buffer buffer = CreateBuffer(totalBufferSize, kBufferUsage); const wgpu::BufferCopyView bufferCopyView = utils::CreateBufferCopyView( buffer, kOffset, kTextureBytesPerRowAlignment, kTextureSize.height); @@ -515,11 +547,7 @@ TEST_P(BufferZeroInitTest, CopyBufferToTexture) { { constexpr uint64_t kOffset = 8u; const uint32_t totalBufferSize = requiredBufferSizeForCopy + kOffset; - wgpu::BufferDescriptor bufferDescriptor; - bufferDescriptor.size = totalBufferSize; - bufferDescriptor.usage = kBufferUsage; - - wgpu::Buffer buffer = device.CreateBuffer(&bufferDescriptor); + wgpu::Buffer buffer = CreateBuffer(totalBufferSize, kBufferUsage); const wgpu::BufferCopyView bufferCopyView = utils::CreateBufferCopyView( buffer, kOffset, kTextureBytesPerRowAlignment, kTextureSize.height); @@ -598,6 +626,136 @@ TEST_P(BufferZeroInitTest, Copy2DArrayTextureToBuffer) { } } +// Test that the buffer will be lazy initialized correctly when its first use is to be bound as a +// uniform buffer. +TEST_P(BufferZeroInitTest, BoundAsUniformBuffer) { + // See https://github.com/google/shaderc/issues/1123 for more details. + // TODO(jiawei.shao@intel.com): enable this test when the related SPVC issue is fixed. + DAWN_SKIP_TEST_IF(IsSpvcParserBeingUsed()); + + const char* computeShader = R"( + #version 450 + layout(set = 0, binding = 0, std140) uniform UBO { + uvec4 value; + } ubo; + layout(set = 0, binding = 1, rgba8) uniform writeonly image2D outImage; + void main() { + if (ubo.value == uvec4(0, 0, 0, 0)) { + imageStore(outImage, ivec2(0, 0), vec4(0.f, 1.f, 0.f, 1.f)); + } else { + imageStore(outImage, ivec2(0, 0), vec4(1.f, 0.f, 0.f, 1.f)); + } + } + )"; + + constexpr uint32_t kBoundBufferSize = 16u; + + // Bind the whole buffer + { + const std::vector expected(kBoundBufferSize / sizeof(uint32_t), 0u); + TestBufferZeroInitInBindGroup(computeShader, 0, kBoundBufferSize, expected); + } + + // Bind a range of a buffer + { + constexpr uint32_t kOffset = 256u; + constexpr uint32_t kExtraBytes = 16u; + const std::vector expected( + (kBoundBufferSize + kOffset + kExtraBytes) / sizeof(uint32_t), 0u); + TestBufferZeroInitInBindGroup(computeShader, kOffset, kBoundBufferSize, expected); + } +} + +// Test that the buffer will be lazy initialized correctly when its first use is to be bound as a +// read-only storage buffer. +TEST_P(BufferZeroInitTest, BoundAsReadonlyStorageBuffer) { + // See https://github.com/google/shaderc/issues/1123 for more details. + // TODO(jiawei.shao@intel.com): enable this test when the related SPVC issue is fixed. + DAWN_SKIP_TEST_IF(IsSpvcParserBeingUsed()); + + const char* computeShader = R"( + #version 450 + layout(set = 0, binding = 0, std140) readonly buffer SSBO { + uvec4 value; + } ssbo; + layout(set = 0, binding = 1, rgba8) uniform writeonly image2D outImage; + void main() { + if (ssbo.value == uvec4(0, 0, 0, 0)) { + imageStore(outImage, ivec2(0, 0), vec4(0.f, 1.f, 0.f, 1.f)); + } else { + imageStore(outImage, ivec2(0, 0), vec4(1.f, 0.f, 0.f, 1.f)); + } + } + )"; + + constexpr uint32_t kBoundBufferSize = 16u; + + // Bind the whole buffer + { + const std::vector expected(kBoundBufferSize / sizeof(uint32_t), 0u); + TestBufferZeroInitInBindGroup(computeShader, 0, kBoundBufferSize, expected); + } + + // Bind a range of a buffer + { + constexpr uint32_t kOffset = 256u; + constexpr uint32_t kExtraBytes = 16u; + const std::vector expected( + (kBoundBufferSize + kOffset + kExtraBytes) / sizeof(uint32_t), 0u); + TestBufferZeroInitInBindGroup(computeShader, kOffset, kBoundBufferSize, expected); + } +} + +// Test that the buffer will be lazy initialized correctly when its first use is to be bound as a +// storage buffer. +TEST_P(BufferZeroInitTest, BoundAsStorageBuffer) { + // See https://github.com/google/shaderc/issues/1123 for more details. + // TODO(jiawei.shao@intel.com): enable this test when the related SPVC issue is fixed. + DAWN_SKIP_TEST_IF(IsSpvcParserBeingUsed()); + + const char* computeShader = R"( + #version 450 + layout(set = 0, binding = 0, std140) buffer SSBO { + uvec4 value[2]; + } ssbo; + layout(set = 0, binding = 1, rgba8) uniform writeonly image2D outImage; + void main() { + if (ssbo.value[0] == uvec4(0, 0, 0, 0) && ssbo.value[1] == uvec4(0, 0, 0, 0)) { + imageStore(outImage, ivec2(0, 0), vec4(0.f, 1.f, 0.f, 1.f)); + } else { + imageStore(outImage, ivec2(0, 0), vec4(1.f, 0.f, 0.f, 1.f)); + } + + memoryBarrier(); + barrier(); + + ssbo.value[0].x = 10u; + ssbo.value[1].y = 20u; + } + )"; + + constexpr uint32_t kBoundBufferSize = 32u; + + // Bind the whole buffer + { + std::vector expected(kBoundBufferSize / sizeof(uint32_t), 0u); + expected[0] = 10u; + expected[5] = 20u; + TestBufferZeroInitInBindGroup(computeShader, 0, kBoundBufferSize, expected); + } + + // Bind a range of a buffer + { + constexpr uint32_t kOffset = 256u; + constexpr uint32_t kExtraBytes = 16u; + std::vector expected( + (kBoundBufferSize + kOffset + kExtraBytes) / sizeof(uint32_t), 0u); + expected[kOffset / sizeof(uint32_t)] = 10u; + expected[kOffset / sizeof(uint32_t) + 5u] = 20u; + TestBufferZeroInitInBindGroup(computeShader, kOffset, kBoundBufferSize, expected); + } +} + DAWN_INSTANTIATE_TEST(BufferZeroInitTest, D3D12Backend({"nonzero_clear_resources_on_creation_for_testing", "lazy_clear_buffer_on_first_use"}),