Transition bind group resource states before dispatch in compute pass

This patch fixes a crash issue in both D3D12 and Vulkan backends.
Previously on D3D12 and Vulkan before a compute pass we transitioned
the states of all the resources used in the pass, and before each
dispatch call we only checked if the states of the storage buffers,
read-only storage textures and write-only storage textures need to
be transitioned. This behavior causes two issues:

1. In a compute pass a buffer or texture can be used as both read-only
and writable usages in different dispatch calls (e.g. as storage
buffer in the first dispatch, and as the uniform buffer in the next
dispatch), while this is invalid state combination on D3D12 and isn't
allowed by D3D12 validation layer.
2. In the above case, the state of the buffer is not transitioned into
UNIFORM, which does not match the required state in the next dispatch.

This patch fixes this issue by transitioning all the states in the
current bind group before each dispatch() instead of the beginning
of the compute pass.

BUG=dawn:522
TEST=dawn_end2end_tests
Change-Id: Ibeb6c41dc493ee1068b43bd89ed5a15f2331ef75
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/27942
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
Reviewed-by: Austin Eng <enga@chromium.org>
Commit-Queue: Jiawei Shao <jiawei.shao@intel.com>
This commit is contained in:
Jiawei Shao
2020-09-09 01:14:38 +00:00
committed by Commit Bot service account
parent 7b7e098b11
commit 12e97ed6a7
7 changed files with 414 additions and 154 deletions

View File

@@ -127,6 +127,70 @@ TEST_P(ComputeStorageBufferBarrierTests, AddPingPong) {
EXPECT_BUFFER_U32_RANGE_EQ(expectedB.data(), bufferB, 0, kNumValues);
}
// Test that multiple dispatches to increment values by ping-ponging between storage buffers and
// read-only storage buffers are synchronized in one compute pass.
TEST_P(ComputeStorageBufferBarrierTests, StorageAndReadonlyStoragePingPongInOnePass) {
std::vector<uint32_t> data(kNumValues, 0);
std::vector<uint32_t> expectedA(kNumValues, 0x1234 * kIterations);
std::vector<uint32_t> expectedB(kNumValues, 0x1234 * (kIterations - 1));
uint64_t bufferSize = static_cast<uint64_t>(data.size() * sizeof(uint32_t));
wgpu::Buffer bufferA = utils::CreateBufferFromData(
device, data.data(), bufferSize, wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc);
wgpu::Buffer bufferB = utils::CreateBufferFromData(
device, data.data(), bufferSize, wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc);
wgpu::ShaderModule module =
utils::CreateShaderModule(device, utils::SingleShaderStage::Compute, R"(
#version 450
#define kNumValues 100
layout(std430, set = 0, binding = 0) readonly buffer Src { uint src[kNumValues]; };
layout(std430, set = 0, binding = 1) buffer Dst { uint dst[kNumValues]; };
void main() {
uint index = gl_GlobalInvocationID.x;
dst[index] = src[index] + 0x1234;
}
)");
wgpu::ComputePipelineDescriptor pipelineDesc = {};
pipelineDesc.computeStage.module = module;
pipelineDesc.computeStage.entryPoint = "main";
wgpu::ComputePipeline pipeline = device.CreateComputePipeline(&pipelineDesc);
wgpu::BindGroup bindGroupA = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
{
{0, bufferA, 0, bufferSize},
{1, bufferB, 0, bufferSize},
});
wgpu::BindGroup bindGroupB = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
{
{0, bufferB, 0, bufferSize},
{1, bufferA, 0, bufferSize},
});
wgpu::BindGroup bindGroups[2] = {bindGroupA, bindGroupB};
wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
pass.SetPipeline(pipeline);
for (uint32_t i = 0; i < kIterations / 2; ++i) {
pass.SetBindGroup(0, bindGroups[0]);
pass.Dispatch(kNumValues);
pass.SetBindGroup(0, bindGroups[1]);
pass.Dispatch(kNumValues);
}
pass.EndPass();
wgpu::CommandBuffer commands = encoder.Finish();
queue.Submit(1, &commands);
EXPECT_BUFFER_U32_RANGE_EQ(expectedA.data(), bufferA, 0, kNumValues);
EXPECT_BUFFER_U32_RANGE_EQ(expectedB.data(), bufferB, 0, kNumValues);
}
// Test that Storage to Uniform buffer transitions work and synchronize correctly
// by ping-ponging between Storage/Uniform usage in sequential compute passes.
TEST_P(ComputeStorageBufferBarrierTests, UniformToStorageAddPingPong) {
@@ -192,6 +256,70 @@ TEST_P(ComputeStorageBufferBarrierTests, UniformToStorageAddPingPong) {
EXPECT_BUFFER_U32_RANGE_EQ(expectedB.data(), bufferB, 0, kNumValues);
}
// Test that Storage to Uniform buffer transitions work and synchronize correctly
// by ping-ponging between Storage/Uniform usage in one compute pass.
TEST_P(ComputeStorageBufferBarrierTests, UniformToStorageAddPingPongInOnePass) {
std::vector<uint32_t> data(kNumValues, 0);
std::vector<uint32_t> expectedA(kNumValues, 0x1234 * kIterations);
std::vector<uint32_t> expectedB(kNumValues, 0x1234 * (kIterations - 1));
uint64_t bufferSize = static_cast<uint64_t>(data.size() * sizeof(uint32_t));
wgpu::Buffer bufferA = utils::CreateBufferFromData(
device, data.data(), bufferSize,
wgpu::BufferUsage::Storage | wgpu::BufferUsage::Uniform | wgpu::BufferUsage::CopySrc);
wgpu::Buffer bufferB = utils::CreateBufferFromData(
device, data.data(), bufferSize,
wgpu::BufferUsage::Storage | wgpu::BufferUsage::Uniform | wgpu::BufferUsage::CopySrc);
wgpu::ShaderModule module =
utils::CreateShaderModule(device, utils::SingleShaderStage::Compute, R"(
#version 450
#define kNumValues 100
layout(std140, set = 0, binding = 0) uniform Src { uvec4 src[kNumValues / 4]; };
layout(std430, set = 0, binding = 1) buffer Dst { uvec4 dst[kNumValues / 4]; };
void main() {
uint index = gl_GlobalInvocationID.x;
dst[index] = src[index] + 0x1234;
}
)");
wgpu::ComputePipelineDescriptor pipelineDesc = {};
pipelineDesc.computeStage.module = module;
pipelineDesc.computeStage.entryPoint = "main";
wgpu::ComputePipeline pipeline = device.CreateComputePipeline(&pipelineDesc);
wgpu::BindGroup bindGroupA = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
{
{0, bufferA, 0, bufferSize},
{1, bufferB, 0, bufferSize},
});
wgpu::BindGroup bindGroupB = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
{
{0, bufferB, 0, bufferSize},
{1, bufferA, 0, bufferSize},
});
wgpu::BindGroup bindGroups[2] = {bindGroupA, bindGroupB};
wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
for (uint32_t i = 0, b = 0; i < kIterations; ++i, b = 1 - b) {
pass.SetPipeline(pipeline);
pass.SetBindGroup(0, bindGroups[b]);
pass.Dispatch(kNumValues / 4);
}
pass.EndPass();
wgpu::CommandBuffer commands = encoder.Finish();
queue.Submit(1, &commands);
EXPECT_BUFFER_U32_RANGE_EQ(expectedA.data(), bufferA, 0, kNumValues);
EXPECT_BUFFER_U32_RANGE_EQ(expectedB.data(), bufferB, 0, kNumValues);
}
DAWN_INSTANTIATE_TEST(ComputeStorageBufferBarrierTests,
D3D12Backend(),
MetalBackend(),

View File

@@ -944,6 +944,163 @@ TEST_P(StorageTextureTests, Writeonly2DArrayStorageTexture) {
CheckOutputStorageTexture(writeonlyStorageTexture, kTextureFormat, kArrayLayerCount);
}
// Test that multiple dispatches to increment values by ping-ponging between a read-only storage
// texture and a write-only storage texture are synchronized in one pass.
TEST_P(StorageTextureTests, ReadonlyAndWriteonlyStorageTexturePingPong) {
constexpr wgpu::TextureFormat kTextureFormat = wgpu::TextureFormat::R32Uint;
wgpu::Texture storageTexture1 = CreateTexture(
kTextureFormat, wgpu::TextureUsage::Storage | wgpu::TextureUsage::CopySrc, 1u, 1u);
wgpu::Texture storageTexture2 = CreateTexture(
kTextureFormat, wgpu::TextureUsage::Storage | wgpu::TextureUsage::CopySrc, 1u, 1u);
wgpu::ShaderModule module =
utils::CreateShaderModule(device, utils::SingleShaderStage::Compute, R"(
#version 450
layout(set = 0, binding = 0, r32ui) uniform readonly uimage2D Src;
layout(set = 0, binding = 1, r32ui) uniform writeonly uimage2D Dst;
void main() {
uvec4 srcValue = imageLoad(Src, ivec2(0, 0));
++srcValue.x;
imageStore(Dst, ivec2(0, 0), srcValue);
}
)");
wgpu::ComputePipelineDescriptor pipelineDesc = {};
pipelineDesc.computeStage.module = module;
pipelineDesc.computeStage.entryPoint = "main";
wgpu::ComputePipeline pipeline = device.CreateComputePipeline(&pipelineDesc);
// In bindGroupA storageTexture1 is bound as read-only storage texture and storageTexture2 is
// bound as write-only storage texture.
wgpu::BindGroup bindGroupA = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
{
{0, storageTexture1.CreateView()},
{1, storageTexture2.CreateView()},
});
// In bindGroupA storageTexture2 is bound as read-only storage texture and storageTexture1 is
// bound as write-only storage texture.
wgpu::BindGroup bindGroupB = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
{
{0, storageTexture2.CreateView()},
{1, storageTexture1.CreateView()},
});
wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
pass.SetPipeline(pipeline);
// After the first dispatch the value in storageTexture2 should be 1u.
pass.SetBindGroup(0, bindGroupA);
pass.Dispatch(1);
// After the second dispatch the value in storageTexture1 should be 2u;
pass.SetBindGroup(0, bindGroupB);
pass.Dispatch(1);
pass.EndPass();
wgpu::BufferDescriptor bufferDescriptor;
bufferDescriptor.size = sizeof(uint32_t);
bufferDescriptor.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
wgpu::Buffer resultBuffer = device.CreateBuffer(&bufferDescriptor);
wgpu::TextureCopyView textureCopyView;
textureCopyView.texture = storageTexture1;
wgpu::BufferCopyView bufferCopyView = utils::CreateBufferCopyView(resultBuffer, 0, 256, 1);
wgpu::Extent3D extent3D = {1, 1, 1};
encoder.CopyTextureToBuffer(&textureCopyView, &bufferCopyView, &extent3D);
wgpu::CommandBuffer commands = encoder.Finish();
queue.Submit(1, &commands);
constexpr uint32_t kFinalPixelValueInTexture1 = 2u;
EXPECT_BUFFER_U32_EQ(kFinalPixelValueInTexture1, resultBuffer, 0);
}
// Test that multiple dispatches to increment values by ping-ponging between a sampled texture and
// a write-only storage texture are synchronized in one pass.
TEST_P(StorageTextureTests, SampledAndWriteonlyStorageTexturePingPong) {
constexpr wgpu::TextureFormat kTextureFormat = wgpu::TextureFormat::R32Uint;
wgpu::Texture storageTexture1 = CreateTexture(
kTextureFormat,
wgpu::TextureUsage::Sampled | wgpu::TextureUsage::Storage | wgpu::TextureUsage::CopySrc, 1u,
1u);
wgpu::Texture storageTexture2 = CreateTexture(
kTextureFormat, wgpu::TextureUsage::Sampled | wgpu::TextureUsage::Storage, 1u, 1u);
wgpu::SamplerDescriptor samplerDesc;
wgpu::Sampler sampler = device.CreateSampler(&samplerDesc);
wgpu::ShaderModule module =
utils::CreateShaderModule(device, utils::SingleShaderStage::Compute, R"(
#version 450
layout(set = 0, binding = 0) uniform sampler mySampler;
layout(set = 0, binding = 1) uniform utexture2D Src;
layout(set = 0, binding = 2, r32ui) uniform writeonly uimage2D Dst;
void main() {
uvec4 srcValue = texelFetch(usampler2D(Src, mySampler), ivec2(0, 0), 0);
++srcValue.x;
imageStore(Dst, ivec2(0, 0), srcValue);
}
)");
wgpu::ComputePipelineDescriptor pipelineDesc = {};
pipelineDesc.computeStage.module = module;
pipelineDesc.computeStage.entryPoint = "main";
wgpu::ComputePipeline pipeline = device.CreateComputePipeline(&pipelineDesc);
// In bindGroupA storageTexture1 is bound as read-only storage texture and storageTexture2 is
// bound as write-only storage texture.
wgpu::BindGroup bindGroupA = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
{
{0, sampler},
{1, storageTexture1.CreateView()},
{2, storageTexture2.CreateView()},
});
// In bindGroupA storageTexture2 is bound as read-only storage texture and storageTexture1 is
// bound as write-only storage texture.
wgpu::BindGroup bindGroupB = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0),
{
{0, sampler},
{1, storageTexture2.CreateView()},
{2, storageTexture1.CreateView()},
});
wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
pass.SetPipeline(pipeline);
// After the first dispatch the value in storageTexture2 should be 1u.
pass.SetBindGroup(0, bindGroupA);
pass.Dispatch(1);
// After the second dispatch the value in storageTexture1 should be 2u;
pass.SetBindGroup(0, bindGroupB);
pass.Dispatch(1);
pass.EndPass();
wgpu::BufferDescriptor bufferDescriptor;
bufferDescriptor.size = sizeof(uint32_t);
bufferDescriptor.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
wgpu::Buffer resultBuffer = device.CreateBuffer(&bufferDescriptor);
wgpu::TextureCopyView textureCopyView;
textureCopyView.texture = storageTexture1;
wgpu::BufferCopyView bufferCopyView = utils::CreateBufferCopyView(resultBuffer, 0, 256, 1);
wgpu::Extent3D extent3D = {1, 1, 1};
encoder.CopyTextureToBuffer(&textureCopyView, &bufferCopyView, &extent3D);
wgpu::CommandBuffer commands = encoder.Finish();
queue.Submit(1, &commands);
constexpr uint32_t kFinalPixelValueInTexture1 = 2u;
EXPECT_BUFFER_U32_EQ(kFinalPixelValueInTexture1, resultBuffer, 0);
}
DAWN_INSTANTIATE_TEST(StorageTextureTests,
D3D12Backend(),
MetalBackend(),