dawn-cmake/src/tests/end2end/ComputeStorageBufferBarrierTests.cpp
Brandon Jones 0d50a2c770 ComputePipelineDescriptor.computeStage->compute
Deprecates the computeStage member of the descriptor in favor of compute
as described by the spec. In order to support both variants without
breaking backwards compatibility some code had to be manually added to
the wire client to copy from the deprecated member to the new one and
visa versa.

Change-Id: I9d5c2fc9c446c927c5792c9af9ed56c90060b65b
Bug: dawn:800
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/53884
Commit-Queue: Brandon Jones <bajones@chromium.org>
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
2021-06-09 18:07:32 +00:00

430 lines
18 KiB
C++

// Copyright 2019 The Dawn Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "tests/DawnTest.h"
#include "utils/WGPUHelpers.h"
class ComputeStorageBufferBarrierTests : public DawnTest {
protected:
static constexpr uint32_t kNumValues = 100;
static constexpr uint32_t kIterations = 100;
};
// Test that multiple dispatches to increment values in a storage buffer are synchronized.
TEST_P(ComputeStorageBufferBarrierTests, AddIncrement) {
std::vector<uint32_t> data(kNumValues, 0);
std::vector<uint32_t> expected(kNumValues, 0x1234 * kIterations);
uint64_t bufferSize = static_cast<uint64_t>(data.size() * sizeof(uint32_t));
wgpu::Buffer buffer = utils::CreateBufferFromData(
device, data.data(), bufferSize, wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc);
wgpu::ShaderModule module = utils::CreateShaderModule(device, R"(
[[block]] struct Buf {
data : array<u32, 100>;
};
[[group(0), binding(0)]] var<storage, read_write> buf : Buf;
[[stage(compute)]]
fn main([[builtin(global_invocation_id)]] GlobalInvocationID : vec3<u32>) {
buf.data[GlobalInvocationID.x] = buf.data[GlobalInvocationID.x] + 0x1234u;
}
)");
wgpu::ComputePipelineDescriptor pipelineDesc = {};
pipelineDesc.compute.module = module;
pipelineDesc.compute.entryPoint = "main";
wgpu::ComputePipeline pipeline = device.CreateComputePipeline(&pipelineDesc);
wgpu::BindGroup bindGroup =
utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0), {{0, buffer, 0, bufferSize}});
wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
pass.SetPipeline(pipeline);
pass.SetBindGroup(0, bindGroup);
for (uint32_t i = 0; i < kIterations; ++i) {
pass.Dispatch(kNumValues);
}
pass.EndPass();
wgpu::CommandBuffer commands = encoder.Finish();
queue.Submit(1, &commands);
EXPECT_BUFFER_U32_RANGE_EQ(expected.data(), buffer, 0, kNumValues);
}
// Test that multiple dispatches to increment values by ping-ponging between two storage buffers
// are synchronized.
TEST_P(ComputeStorageBufferBarrierTests, AddPingPong) {
std::vector<uint32_t> data(kNumValues, 0);
std::vector<uint32_t> expectedA(kNumValues, 0x1234 * kIterations);
std::vector<uint32_t> expectedB(kNumValues, 0x1234 * (kIterations - 1));
uint64_t bufferSize = static_cast<uint64_t>(data.size() * sizeof(uint32_t));
wgpu::Buffer bufferA = utils::CreateBufferFromData(
device, data.data(), bufferSize, wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc);
wgpu::Buffer bufferB = utils::CreateBufferFromData(
device, data.data(), bufferSize, wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc);
wgpu::ShaderModule module = utils::CreateShaderModule(device, R"(
// TODO(crbug.com/tint/386): Use the same struct.
[[block]] struct Src {
data : array<u32, 100>;
};
[[block]] struct Dst {
data : array<u32, 100>;
};
[[group(0), binding(0)]] var<storage, read_write> src : Src;
[[group(0), binding(1)]] var<storage, read_write> dst : Dst;
[[stage(compute)]]
fn main([[builtin(global_invocation_id)]] GlobalInvocationID : vec3<u32>) {
dst.data[GlobalInvocationID.x] = src.data[GlobalInvocationID.x] + 0x1234u;
}
)");
wgpu::ComputePipelineDescriptor pipelineDesc = {};
pipelineDesc.compute.module = module;
pipelineDesc.compute.entryPoint = "main";
wgpu::ComputePipeline pipeline = device.CreateComputePipeline(&pipelineDesc);
wgpu::BindGroup bindGroupA = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
{
{0, bufferA, 0, bufferSize},
{1, bufferB, 0, bufferSize},
});
wgpu::BindGroup bindGroupB = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
{
{0, bufferB, 0, bufferSize},
{1, bufferA, 0, bufferSize},
});
wgpu::BindGroup bindGroups[2] = {bindGroupA, bindGroupB};
wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
pass.SetPipeline(pipeline);
for (uint32_t i = 0; i < kIterations / 2; ++i) {
pass.SetBindGroup(0, bindGroups[0]);
pass.Dispatch(kNumValues);
pass.SetBindGroup(0, bindGroups[1]);
pass.Dispatch(kNumValues);
}
pass.EndPass();
wgpu::CommandBuffer commands = encoder.Finish();
queue.Submit(1, &commands);
EXPECT_BUFFER_U32_RANGE_EQ(expectedA.data(), bufferA, 0, kNumValues);
EXPECT_BUFFER_U32_RANGE_EQ(expectedB.data(), bufferB, 0, kNumValues);
}
// Test that multiple dispatches to increment values by ping-ponging between storage buffers and
// read-only storage buffers are synchronized in one compute pass.
TEST_P(ComputeStorageBufferBarrierTests, StorageAndReadonlyStoragePingPongInOnePass) {
std::vector<uint32_t> data(kNumValues, 0);
std::vector<uint32_t> expectedA(kNumValues, 0x1234 * kIterations);
std::vector<uint32_t> expectedB(kNumValues, 0x1234 * (kIterations - 1));
uint64_t bufferSize = static_cast<uint64_t>(data.size() * sizeof(uint32_t));
wgpu::Buffer bufferA = utils::CreateBufferFromData(
device, data.data(), bufferSize, wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc);
wgpu::Buffer bufferB = utils::CreateBufferFromData(
device, data.data(), bufferSize, wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc);
wgpu::ShaderModule module = utils::CreateShaderModule(device, R"(
// TODO(crbug.com/tint/386): Use the same struct.
[[block]] struct Src {
data : array<u32, 100>;
};
[[block]] struct Dst {
data : array<u32, 100>;
};
[[group(0), binding(0)]] var<storage, read> src : Src;
[[group(0), binding(1)]] var<storage, read_write> dst : Dst;
[[stage(compute)]]
fn main([[builtin(global_invocation_id)]] GlobalInvocationID : vec3<u32>) {
dst.data[GlobalInvocationID.x] = src.data[GlobalInvocationID.x] + 0x1234u;
}
)");
wgpu::ComputePipelineDescriptor pipelineDesc = {};
pipelineDesc.compute.module = module;
pipelineDesc.compute.entryPoint = "main";
wgpu::ComputePipeline pipeline = device.CreateComputePipeline(&pipelineDesc);
wgpu::BindGroup bindGroupA = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
{
{0, bufferA, 0, bufferSize},
{1, bufferB, 0, bufferSize},
});
wgpu::BindGroup bindGroupB = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
{
{0, bufferB, 0, bufferSize},
{1, bufferA, 0, bufferSize},
});
wgpu::BindGroup bindGroups[2] = {bindGroupA, bindGroupB};
wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
pass.SetPipeline(pipeline);
for (uint32_t i = 0; i < kIterations / 2; ++i) {
pass.SetBindGroup(0, bindGroups[0]);
pass.Dispatch(kNumValues);
pass.SetBindGroup(0, bindGroups[1]);
pass.Dispatch(kNumValues);
}
pass.EndPass();
wgpu::CommandBuffer commands = encoder.Finish();
queue.Submit(1, &commands);
EXPECT_BUFFER_U32_RANGE_EQ(expectedA.data(), bufferA, 0, kNumValues);
EXPECT_BUFFER_U32_RANGE_EQ(expectedB.data(), bufferB, 0, kNumValues);
}
// Test that Storage to Uniform buffer transitions work and synchronize correctly
// by ping-ponging between Storage/Uniform usage in sequential compute passes.
TEST_P(ComputeStorageBufferBarrierTests, UniformToStorageAddPingPong) {
std::vector<uint32_t> data(kNumValues, 0);
std::vector<uint32_t> expectedA(kNumValues, 0x1234 * kIterations);
std::vector<uint32_t> expectedB(kNumValues, 0x1234 * (kIterations - 1));
uint64_t bufferSize = static_cast<uint64_t>(data.size() * sizeof(uint32_t));
wgpu::Buffer bufferA = utils::CreateBufferFromData(
device, data.data(), bufferSize,
wgpu::BufferUsage::Storage | wgpu::BufferUsage::Uniform | wgpu::BufferUsage::CopySrc);
wgpu::Buffer bufferB = utils::CreateBufferFromData(
device, data.data(), bufferSize,
wgpu::BufferUsage::Storage | wgpu::BufferUsage::Uniform | wgpu::BufferUsage::CopySrc);
wgpu::ShaderModule module = utils::CreateShaderModule(device, R"(
[[block]] struct Buf {
data : array<vec4<u32>, 25>;
};
[[group(0), binding(0)]] var<uniform> src : Buf;
[[group(0), binding(1)]] var<storage, read_write> dst : Buf;
[[stage(compute)]]
fn main([[builtin(global_invocation_id)]] GlobalInvocationID : vec3<u32>) {
dst.data[GlobalInvocationID.x] = src.data[GlobalInvocationID.x] +
vec4<u32>(0x1234u, 0x1234u, 0x1234u, 0x1234u);
}
)");
wgpu::ComputePipelineDescriptor pipelineDesc = {};
pipelineDesc.compute.module = module;
pipelineDesc.compute.entryPoint = "main";
wgpu::ComputePipeline pipeline = device.CreateComputePipeline(&pipelineDesc);
wgpu::BindGroup bindGroupA = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
{
{0, bufferA, 0, bufferSize},
{1, bufferB, 0, bufferSize},
});
wgpu::BindGroup bindGroupB = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
{
{0, bufferB, 0, bufferSize},
{1, bufferA, 0, bufferSize},
});
wgpu::BindGroup bindGroups[2] = {bindGroupA, bindGroupB};
wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
for (uint32_t i = 0, b = 0; i < kIterations; ++i, b = 1 - b) {
wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
pass.SetPipeline(pipeline);
pass.SetBindGroup(0, bindGroups[b]);
pass.Dispatch(kNumValues / 4);
pass.EndPass();
}
wgpu::CommandBuffer commands = encoder.Finish();
queue.Submit(1, &commands);
EXPECT_BUFFER_U32_RANGE_EQ(expectedA.data(), bufferA, 0, kNumValues);
EXPECT_BUFFER_U32_RANGE_EQ(expectedB.data(), bufferB, 0, kNumValues);
}
// Test that Storage to Uniform buffer transitions work and synchronize correctly
// by ping-ponging between Storage/Uniform usage in one compute pass.
TEST_P(ComputeStorageBufferBarrierTests, UniformToStorageAddPingPongInOnePass) {
std::vector<uint32_t> data(kNumValues, 0);
std::vector<uint32_t> expectedA(kNumValues, 0x1234 * kIterations);
std::vector<uint32_t> expectedB(kNumValues, 0x1234 * (kIterations - 1));
uint64_t bufferSize = static_cast<uint64_t>(data.size() * sizeof(uint32_t));
wgpu::Buffer bufferA = utils::CreateBufferFromData(
device, data.data(), bufferSize,
wgpu::BufferUsage::Storage | wgpu::BufferUsage::Uniform | wgpu::BufferUsage::CopySrc);
wgpu::Buffer bufferB = utils::CreateBufferFromData(
device, data.data(), bufferSize,
wgpu::BufferUsage::Storage | wgpu::BufferUsage::Uniform | wgpu::BufferUsage::CopySrc);
wgpu::ShaderModule module = utils::CreateShaderModule(device, R"(
[[block]] struct Buf {
data : array<vec4<u32>, 25>;
};
[[group(0), binding(0)]] var<uniform> src : Buf;
[[group(0), binding(1)]] var<storage, read_write> dst : Buf;
[[stage(compute)]]
fn main([[builtin(global_invocation_id)]] GlobalInvocationID : vec3<u32>) {
dst.data[GlobalInvocationID.x] = src.data[GlobalInvocationID.x] +
vec4<u32>(0x1234u, 0x1234u, 0x1234u, 0x1234u);
}
)");
wgpu::ComputePipelineDescriptor pipelineDesc = {};
pipelineDesc.compute.module = module;
pipelineDesc.compute.entryPoint = "main";
wgpu::ComputePipeline pipeline = device.CreateComputePipeline(&pipelineDesc);
wgpu::BindGroup bindGroupA = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
{
{0, bufferA, 0, bufferSize},
{1, bufferB, 0, bufferSize},
});
wgpu::BindGroup bindGroupB = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
{
{0, bufferB, 0, bufferSize},
{1, bufferA, 0, bufferSize},
});
wgpu::BindGroup bindGroups[2] = {bindGroupA, bindGroupB};
wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
for (uint32_t i = 0, b = 0; i < kIterations; ++i, b = 1 - b) {
pass.SetPipeline(pipeline);
pass.SetBindGroup(0, bindGroups[b]);
pass.Dispatch(kNumValues / 4);
}
pass.EndPass();
wgpu::CommandBuffer commands = encoder.Finish();
queue.Submit(1, &commands);
EXPECT_BUFFER_U32_RANGE_EQ(expectedA.data(), bufferA, 0, kNumValues);
EXPECT_BUFFER_U32_RANGE_EQ(expectedB.data(), bufferB, 0, kNumValues);
}
// Test that barriers for dispatches correctly combine Indirect | Storage in backends with explicit
// barriers. Do this by:
// 1 - Initializing an indirect buffer with zeros.
// 2 - Write ones into it with a compute shader.
// 3 - Use the indirect buffer in a Dispatch while also reading its data.
TEST_P(ComputeStorageBufferBarrierTests, IndirectBufferCorrectBarrier) {
// For some reason SPIRV-Cross crashes when translating the step3 shader to HLSL. Suppress the
// failure since we'll remove SPIRV-Cross at some point.
DAWN_SUPPRESS_TEST_IF(IsD3D12() && !HasToggleEnabled("use_tint_generator"));
wgpu::ComputePipelineDescriptor step2PipelineDesc;
step2PipelineDesc.compute.entryPoint = "main";
step2PipelineDesc.compute.module = utils::CreateShaderModule(device, R"(
[[block]] struct Buf {
data : array<u32, 3>;
};
[[group(0), binding(0)]] var<storage, read_write> buf : Buf;
[[stage(compute)]] fn main() {
buf.data = array<u32, 3>(1u, 1u, 1u);
}
)");
wgpu::ComputePipeline step2Pipeline = device.CreateComputePipeline(&step2PipelineDesc);
wgpu::ComputePipelineDescriptor step3PipelineDesc;
step3PipelineDesc.compute.entryPoint = "main";
step3PipelineDesc.compute.module = utils::CreateShaderModule(device, R"(
[[block]] struct Buf {
data : array<u32, 3>;
};
[[group(0), binding(0)]] var<storage, read> buf : Buf;
[[block]] struct Result {
data : u32;
};
[[group(0), binding(1)]] var<storage, read_write> result : Result;
[[stage(compute)]] fn main() {
result.data = 2u;
if (buf.data[0] == 1u && buf.data[1] == 1u && buf.data[2] == 1u) {
result.data = 1u;
}
}
)");
wgpu::ComputePipeline step3Pipeline = device.CreateComputePipeline(&step3PipelineDesc);
// 1 - Initializing an indirect buffer with zeros.
wgpu::Buffer buf = utils::CreateBufferFromData<uint32_t>(
device, wgpu::BufferUsage::Storage | wgpu::BufferUsage::Indirect, {0u, 0u, 0u});
// 2 - Write ones into it with a compute shader.
wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
wgpu::BindGroup step2Group =
utils::MakeBindGroup(device, step2Pipeline.GetBindGroupLayout(0), {{0, buf}});
pass.SetPipeline(step2Pipeline);
pass.SetBindGroup(0, step2Group);
pass.Dispatch(1);
// 3 - Use the indirect buffer in a Dispatch while also reading its data.
wgpu::Buffer resultBuffer = utils::CreateBufferFromData<uint32_t>(
device, wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc, {0u});
wgpu::BindGroup step3Group = utils::MakeBindGroup(device, step3Pipeline.GetBindGroupLayout(0),
{{0, buf}, {1, resultBuffer}});
pass.SetPipeline(step3Pipeline);
pass.SetBindGroup(0, step3Group);
pass.DispatchIndirect(buf, 0);
pass.EndPass();
wgpu::CommandBuffer commands = encoder.Finish();
queue.Submit(1, &commands);
EXPECT_BUFFER_U32_EQ(1u, resultBuffer, 0);
}
DAWN_INSTANTIATE_TEST(ComputeStorageBufferBarrierTests,
D3D12Backend(),
MetalBackend(),
OpenGLBackend(),
OpenGLESBackend(),
VulkanBackend());