// Copyright 2019 The Dawn Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "tests/DawnTest.h" #include "utils/WGPUHelpers.h" #include class ComputeSharedMemoryTests : public DawnTest { public: static constexpr uint32_t kInstances = 11; void BasicTest(const char* shader); }; void ComputeSharedMemoryTests::BasicTest(const char* shader) { // Set up shader and pipeline auto module = utils::CreateShaderModule(device, shader); wgpu::ComputePipelineDescriptor csDesc; csDesc.compute.module = module; csDesc.compute.entryPoint = "main"; wgpu::ComputePipeline pipeline = device.CreateComputePipeline(&csDesc); // Set up dst storage buffer wgpu::BufferDescriptor dstDesc; dstDesc.size = sizeof(uint32_t); dstDesc.usage = wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst; wgpu::Buffer dst = device.CreateBuffer(&dstDesc); const uint32_t zero = 0; queue.WriteBuffer(dst, 0, &zero, sizeof(zero)); // Set up bind group and issue dispatch wgpu::BindGroup bindGroup = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0), { {0, dst, 0, sizeof(uint32_t)}, }); wgpu::CommandBuffer commands; { wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); wgpu::ComputePassEncoder pass = encoder.BeginComputePass(); pass.SetPipeline(pipeline); pass.SetBindGroup(0, bindGroup); pass.Dispatch(1); pass.EndPass(); commands = encoder.Finish(); } queue.Submit(1, &commands); const uint32_t expected = kInstances; EXPECT_BUFFER_U32_RANGE_EQ(&expected, dst, 0, 1); } // Basic shared memory test TEST_P(ComputeSharedMemoryTests, Basic) { BasicTest(R"( let kTileSize : u32 = 4u; let kInstances : u32 = 11u; struct Dst { x : u32; }; @group(0) @binding(0) var dst : Dst; var tmp : u32; @stage(compute) @workgroup_size(4,4,1) fn main(@builtin(local_invocation_id) LocalInvocationID : vec3) { let index : u32 = LocalInvocationID.y * kTileSize + LocalInvocationID.x; if (index == 0u) { tmp = 0u; } workgroupBarrier(); for (var i : u32 = 0u; i < kInstances; i = i + 1u) { if (i == index) { tmp = tmp + 1u; } workgroupBarrier(); } if (index == 0u) { dst.x = tmp; } })"); } // Test using assorted types in workgroup memory. MSL lacks constructors // for matrices in threadgroup memory. Basic test that reading and // writing a matrix in workgroup memory works. TEST_P(ComputeSharedMemoryTests, AssortedTypes) { wgpu::ComputePipelineDescriptor csDesc; csDesc.compute.module = utils::CreateShaderModule(device, R"( struct StructValues { m: mat2x2; }; struct Dst { d_struct : StructValues; d_matrix : mat2x2; d_array : array; d_vector : vec4; }; @group(0) @binding(0) var dst : Dst; var wg_struct : StructValues; var wg_matrix : mat2x2; var wg_array : array; var wg_vector : vec4; @stage(compute) @workgroup_size(4,1,1) fn main(@builtin(local_invocation_id) LocalInvocationID : vec3) { let i = 4u * LocalInvocationID.x; if (LocalInvocationID.x == 0u) { wg_struct.m = mat2x2( vec2(f32(i), f32(i + 1u)), vec2(f32(i + 2u), f32(i + 3u))); } else if (LocalInvocationID.x == 1u) { wg_matrix = mat2x2( vec2(f32(i), f32(i + 1u)), vec2(f32(i + 2u), f32(i + 3u))); } else if (LocalInvocationID.x == 2u) { wg_array[0u] = i; wg_array[1u] = i + 1u; wg_array[2u] = i + 2u; wg_array[3u] = i + 3u; } else if (LocalInvocationID.x == 3u) { wg_vector = vec4( f32(i), f32(i + 1u), f32(i + 2u), f32(i + 3u)); } workgroupBarrier(); if (LocalInvocationID.x == 0u) { dst.d_struct = wg_struct; dst.d_matrix = wg_matrix; dst.d_array = wg_array; dst.d_vector = wg_vector; } } )"); csDesc.compute.entryPoint = "main"; wgpu::ComputePipeline pipeline = device.CreateComputePipeline(&csDesc); // Set up dst storage buffer wgpu::BufferDescriptor dstDesc; dstDesc.size = 64; dstDesc.usage = wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst; wgpu::Buffer dst = device.CreateBuffer(&dstDesc); // Set up bind group and issue dispatch wgpu::BindGroup bindGroup = utils::MakeBindGroup(device, pipeline.GetBindGroupLayout(0), { {0, dst}, }); wgpu::CommandBuffer commands; { wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); wgpu::ComputePassEncoder pass = encoder.BeginComputePass(); pass.SetPipeline(pipeline); pass.SetBindGroup(0, bindGroup); pass.Dispatch(1); pass.EndPass(); commands = encoder.Finish(); } queue.Submit(1, &commands); std::array expectedStruct = {0., 1., 2., 3.}; std::array expectedMatrix = {4., 5., 6., 7.}; std::array expectedArray = {8, 9, 10, 11}; std::array expectedVector = {12., 13., 14., 15.}; EXPECT_BUFFER_FLOAT_RANGE_EQ(expectedStruct.data(), dst, 0, 4); EXPECT_BUFFER_FLOAT_RANGE_EQ(expectedMatrix.data(), dst, 16, 4); EXPECT_BUFFER_U32_RANGE_EQ(expectedArray.data(), dst, 32, 4); EXPECT_BUFFER_FLOAT_RANGE_EQ(expectedVector.data(), dst, 48, 4); } DAWN_INSTANTIATE_TEST(ComputeSharedMemoryTests, D3D12Backend(), MetalBackend(), OpenGLBackend(), OpenGLESBackend(), VulkanBackend());