Reland: Vulkan: Enforce fixed subgroup size for compute shaders.
Reland after a refactor of how the extension handling work in the Vulkan backend. The original author is David Turner <david.turner.dev@gmail.com>. This CL ensures that, on architectures with a varying subgroup size, compute shaders are always compiled with a fixed subgroup size to avoid consistency issues when one shader writes data in a subgroup-size dependent layout to GPU memory, to be read by another shader in a future dispatch. At the moment, only Intel ICDs are known to implement this [1], and the code uses a heuristics to chose the size of 16, which seems to be the sweet spot according to Intel engineers. + Update the PNextChainBuilder class to deal with the fact that VkComputePipelineCreateInfo::pNext is defined as a const void*, which created compiler errors in the previous implementation. [1] https://bugs.freedesktop.org/show_bug.cgi?id=108875 Bug: dawn:464 Change-Id: I035ee06084fcc964742f0bff4c54cff257c742ae Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/23202 Commit-Queue: Corentin Wallez <cwallez@chromium.org> Reviewed-by: Austin Eng <enga@chromium.org> Reviewed-by: Stephen White <senorblanco@chromium.org>
This commit is contained in:
parent
5cef1162b9
commit
fefb452f29
|
@ -18,6 +18,7 @@
|
||||||
#include "dawn_native/vulkan/FencedDeleter.h"
|
#include "dawn_native/vulkan/FencedDeleter.h"
|
||||||
#include "dawn_native/vulkan/PipelineLayoutVk.h"
|
#include "dawn_native/vulkan/PipelineLayoutVk.h"
|
||||||
#include "dawn_native/vulkan/ShaderModuleVk.h"
|
#include "dawn_native/vulkan/ShaderModuleVk.h"
|
||||||
|
#include "dawn_native/vulkan/UtilsVulkan.h"
|
||||||
#include "dawn_native/vulkan/VulkanError.h"
|
#include "dawn_native/vulkan/VulkanError.h"
|
||||||
|
|
||||||
namespace dawn_native { namespace vulkan {
|
namespace dawn_native { namespace vulkan {
|
||||||
|
@ -49,6 +50,19 @@ namespace dawn_native { namespace vulkan {
|
||||||
createInfo.stage.pSpecializationInfo = nullptr;
|
createInfo.stage.pSpecializationInfo = nullptr;
|
||||||
|
|
||||||
Device* device = ToBackend(GetDevice());
|
Device* device = ToBackend(GetDevice());
|
||||||
|
|
||||||
|
PNextChainBuilder extChain(&createInfo);
|
||||||
|
|
||||||
|
VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroupSizeInfo = {};
|
||||||
|
uint32_t computeSubgroupSize = device->GetComputeSubgroupSize();
|
||||||
|
if (computeSubgroupSize != 0u) {
|
||||||
|
ASSERT(device->GetDeviceInfo().HasExt(DeviceExt::SubgroupSizeControl));
|
||||||
|
subgroupSizeInfo.requiredSubgroupSize = computeSubgroupSize;
|
||||||
|
extChain.Add(
|
||||||
|
&subgroupSizeInfo,
|
||||||
|
VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT);
|
||||||
|
}
|
||||||
|
|
||||||
return CheckVkSuccess(
|
return CheckVkSuccess(
|
||||||
device->fn.CreateComputePipelines(device->GetVkDevice(), ::VK_NULL_HANDLE, 1,
|
device->fn.CreateComputePipelines(device->GetVkDevice(), ::VK_NULL_HANDLE, 1,
|
||||||
&createInfo, nullptr, &*mHandle),
|
&createInfo, nullptr, &*mHandle),
|
||||||
|
|
|
@ -305,6 +305,8 @@ namespace dawn_native { namespace vulkan {
|
||||||
// Always request all the features from VK_EXT_subgroup_size_control when available.
|
// Always request all the features from VK_EXT_subgroup_size_control when available.
|
||||||
usedKnobs.subgroupSizeControlFeatures = mDeviceInfo.subgroupSizeControlFeatures;
|
usedKnobs.subgroupSizeControlFeatures = mDeviceInfo.subgroupSizeControlFeatures;
|
||||||
featuresChain.Add(&usedKnobs.subgroupSizeControlFeatures);
|
featuresChain.Add(&usedKnobs.subgroupSizeControlFeatures);
|
||||||
|
|
||||||
|
mComputeSubgroupSize = FindComputeSubgroupSize();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (IsExtensionEnabled(Extension::TextureCompressionBC)) {
|
if (IsExtensionEnabled(Extension::TextureCompressionBC)) {
|
||||||
|
@ -386,6 +388,32 @@ namespace dawn_native { namespace vulkan {
|
||||||
return usedKnobs;
|
return usedKnobs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint32_t Device::FindComputeSubgroupSize() const {
|
||||||
|
if (!mDeviceInfo.HasExt(DeviceExt::SubgroupSizeControl)) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT& ext =
|
||||||
|
mDeviceInfo.subgroupSizeControlProperties;
|
||||||
|
|
||||||
|
if (ext.minSubgroupSize == ext.maxSubgroupSize) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// At the moment, only Intel devices support varying subgroup sizes and 16, which is the
|
||||||
|
// next value after the minimum of 8, is the sweet spot according to [1]. Hence the
|
||||||
|
// following heuristics, which may need to be adjusted in the future for other
|
||||||
|
// architectures, or if a specific API is added to let client code select the size..
|
||||||
|
//
|
||||||
|
// [1] https://bugs.freedesktop.org/show_bug.cgi?id=108875
|
||||||
|
uint32_t subgroupSize = ext.minSubgroupSize * 2;
|
||||||
|
if (subgroupSize <= ext.maxSubgroupSize) {
|
||||||
|
return subgroupSize;
|
||||||
|
} else {
|
||||||
|
return ext.minSubgroupSize;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void Device::GatherQueueFromDevice() {
|
void Device::GatherQueueFromDevice() {
|
||||||
fn.GetDeviceQueue(mVkDevice, mQueueFamily, 0, &mQueue);
|
fn.GetDeviceQueue(mVkDevice, mQueueFamily, 0, &mQueue);
|
||||||
}
|
}
|
||||||
|
@ -712,6 +740,10 @@ namespace dawn_native { namespace vulkan {
|
||||||
return mResourceMemoryAllocator.get();
|
return mResourceMemoryAllocator.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint32_t Device::GetComputeSubgroupSize() const {
|
||||||
|
return mComputeSubgroupSize;
|
||||||
|
}
|
||||||
|
|
||||||
MaybeError Device::WaitForIdleForDestruction() {
|
MaybeError Device::WaitForIdleForDestruction() {
|
||||||
// Immediately tag the recording context as unused so we don't try to submit it in Tick.
|
// Immediately tag the recording context as unused so we don't try to submit it in Tick.
|
||||||
// Move the mRecordingContext.used to mUnusedCommands so it can be cleaned up in
|
// Move the mRecordingContext.used to mUnusedCommands so it can be cleaned up in
|
||||||
|
|
|
@ -96,6 +96,10 @@ namespace dawn_native { namespace vulkan {
|
||||||
|
|
||||||
ResourceMemoryAllocator* GetResourceMemoryAllocatorForTesting() const;
|
ResourceMemoryAllocator* GetResourceMemoryAllocatorForTesting() const;
|
||||||
|
|
||||||
|
// Return the fixed subgroup size to use for compute shaders on this device or 0 if none
|
||||||
|
// needs to be set.
|
||||||
|
uint32_t GetComputeSubgroupSize() const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Device(Adapter* adapter, const DeviceDescriptor* descriptor);
|
Device(Adapter* adapter, const DeviceDescriptor* descriptor);
|
||||||
|
|
||||||
|
@ -130,6 +134,7 @@ namespace dawn_native { namespace vulkan {
|
||||||
ResultOrError<VulkanDeviceKnobs> CreateDevice(VkPhysicalDevice physicalDevice);
|
ResultOrError<VulkanDeviceKnobs> CreateDevice(VkPhysicalDevice physicalDevice);
|
||||||
void GatherQueueFromDevice();
|
void GatherQueueFromDevice();
|
||||||
|
|
||||||
|
uint32_t FindComputeSubgroupSize() const;
|
||||||
void InitTogglesFromDriver();
|
void InitTogglesFromDriver();
|
||||||
void ApplyDepth24PlusS8Toggle();
|
void ApplyDepth24PlusS8Toggle();
|
||||||
|
|
||||||
|
@ -144,6 +149,7 @@ namespace dawn_native { namespace vulkan {
|
||||||
VkDevice mVkDevice = VK_NULL_HANDLE;
|
VkDevice mVkDevice = VK_NULL_HANDLE;
|
||||||
uint32_t mQueueFamily = 0;
|
uint32_t mQueueFamily = 0;
|
||||||
VkQueue mQueue = VK_NULL_HANDLE;
|
VkQueue mQueue = VK_NULL_HANDLE;
|
||||||
|
uint32_t mComputeSubgroupSize = 0;
|
||||||
|
|
||||||
SerialQueue<Ref<BindGroupLayout>> mBindGroupLayoutsPendingDeallocation;
|
SerialQueue<Ref<BindGroupLayout>> mBindGroupLayoutsPendingDeallocation;
|
||||||
std::unique_ptr<FencedDeleter> mDeleter;
|
std::unique_ptr<FencedDeleter> mDeleter;
|
||||||
|
|
Loading…
Reference in New Issue