Vulkan: Enforce fixed subgroup size for compute shaders.
This CL ensures that, on architectures with a varying subgroup size, compute shaders are always compiled with a fixed subgroup size to avoid consistency issues when one shader writes data in a subgroup-size dependent layout to GPU memory, to be read by another shader in a future dispatch. At the moment, only Intel ICDs are known to implement this [1], and the code uses a heuristics to chose the size of 16, which seems to be the sweet spot according to Intel engineers. + Update the PNextChainBuilder class to deal with the fact that VkComputePipelineCreateInfo::pNext is defined as a const void*, which created compiler errors in the previous implementation. [1] https://bugs.freedesktop.org/show_bug.cgi?id=108875 Change-Id: I332faa53b9f854a8abe43a7271f30d8c5deb2142 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/16021 Commit-Queue: Corentin Wallez <cwallez@chromium.org> Reviewed-by: Corentin Wallez <cwallez@chromium.org>
This commit is contained in:
parent
4ae315b0d1
commit
56f1678437
|
@ -18,6 +18,7 @@
|
|||
#include "dawn_native/vulkan/FencedDeleter.h"
|
||||
#include "dawn_native/vulkan/PipelineLayoutVk.h"
|
||||
#include "dawn_native/vulkan/ShaderModuleVk.h"
|
||||
#include "dawn_native/vulkan/UtilsVulkan.h"
|
||||
#include "dawn_native/vulkan/VulkanError.h"
|
||||
|
||||
namespace dawn_native { namespace vulkan {
|
||||
|
@ -50,6 +51,18 @@ namespace dawn_native { namespace vulkan {
|
|||
createInfo.stage.pSpecializationInfo = nullptr;
|
||||
|
||||
Device* device = ToBackend(GetDevice());
|
||||
|
||||
PNextChainBuilder extChain(&createInfo);
|
||||
|
||||
VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroupSizeInfo = {};
|
||||
uint32_t computeSubgroupSize = device->GetComputeSubgroupSize();
|
||||
if (computeSubgroupSize != 0u) {
|
||||
extChain.Add(
|
||||
&subgroupSizeInfo,
|
||||
VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT);
|
||||
subgroupSizeInfo.requiredSubgroupSize = computeSubgroupSize;
|
||||
}
|
||||
|
||||
return CheckVkSuccess(
|
||||
device->fn.CreateComputePipelines(device->GetVkDevice(), ::VK_NULL_HANDLE, 1,
|
||||
&createInfo, nullptr, &*mHandle),
|
||||
|
|
|
@ -388,6 +388,8 @@ namespace dawn_native { namespace vulkan {
|
|||
|
||||
*dst = mDeviceInfo.featuresExtensions.subgroupSizeControl;
|
||||
featuresChain.Add(dst);
|
||||
|
||||
mComputeSubgroupSize = FindComputeSubgroupSize();
|
||||
}
|
||||
|
||||
// Always require independentBlend because it is a core Dawn feature
|
||||
|
@ -464,6 +466,33 @@ namespace dawn_native { namespace vulkan {
|
|||
return usedKnobs;
|
||||
}
|
||||
|
||||
uint32_t Device::FindComputeSubgroupSize() const {
|
||||
if (!mDeviceInfo.subgroupSizeControl) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT& ext =
|
||||
mDeviceInfo.propertiesExtensions.subgroupSizeControl;
|
||||
|
||||
if (ext.minSubgroupSize == ext.maxSubgroupSize) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// At the moment, only Intel devices support varying subgroup sizes
|
||||
// and 16, which is the next value after the minimum of 8, is the sweet
|
||||
// spot according to [1]. Hence the following heuristics, which may
|
||||
// need to be adjusted in the future for other architectures, or if
|
||||
// a specific API is added to let client code select the size..
|
||||
//
|
||||
// [1] https://bugs.freedesktop.org/show_bug.cgi?id=108875
|
||||
uint32_t subgroupSize = ext.minSubgroupSize * 2;
|
||||
if (subgroupSize <= ext.maxSubgroupSize) {
|
||||
return subgroupSize;
|
||||
} else {
|
||||
return ext.minSubgroupSize;
|
||||
}
|
||||
}
|
||||
|
||||
void Device::GatherQueueFromDevice() {
|
||||
fn.GetDeviceQueue(mVkDevice, mQueueFamily, 0, &mQueue);
|
||||
}
|
||||
|
|
|
@ -100,6 +100,12 @@ namespace dawn_native { namespace vulkan {
|
|||
|
||||
ResourceMemoryAllocator* GetResourceMemoryAllocatorForTesting() const;
|
||||
|
||||
// Return the fixed subgroup size to use for compute shaders on this device,
|
||||
// or 0 if none needs to be set.
|
||||
uint32_t GetComputeSubgroupSize() const {
|
||||
return mComputeSubgroupSize;
|
||||
}
|
||||
|
||||
private:
|
||||
ResultOrError<BindGroupBase*> CreateBindGroupImpl(
|
||||
const BindGroupDescriptor* descriptor) override;
|
||||
|
@ -130,6 +136,7 @@ namespace dawn_native { namespace vulkan {
|
|||
ResultOrError<VulkanDeviceKnobs> CreateDevice(VkPhysicalDevice physicalDevice);
|
||||
void GatherQueueFromDevice();
|
||||
|
||||
uint32_t FindComputeSubgroupSize() const;
|
||||
void InitTogglesFromDriver();
|
||||
void ApplyDepth24PlusS8Toggle();
|
||||
|
||||
|
@ -144,6 +151,7 @@ namespace dawn_native { namespace vulkan {
|
|||
VkDevice mVkDevice = VK_NULL_HANDLE;
|
||||
uint32_t mQueueFamily = 0;
|
||||
VkQueue mQueue = VK_NULL_HANDLE;
|
||||
uint32_t mComputeSubgroupSize = 0;
|
||||
|
||||
std::unique_ptr<DescriptorSetService> mDescriptorSetService;
|
||||
std::unique_ptr<FencedDeleter> mDeleter;
|
||||
|
|
Loading…
Reference in New Issue