Reland: Vulkan: Enforce fixed subgroup size for compute shaders.

Reland after a refactor of how the extension handling work in the Vulkan
backend.

The original author is David Turner <david.turner.dev@gmail.com>.

This CL ensures that, on architectures with a varying subgroup size,
compute shaders are always compiled with a fixed subgroup size to
avoid consistency issues when one shader writes data in a subgroup-size
dependent layout to GPU memory, to be read by another shader in a
future dispatch.

At the moment, only Intel ICDs are known to implement this [1],
and the code uses a heuristics to chose the size of 16, which seems to
be the sweet spot according to Intel engineers.

+ Update the PNextChainBuilder class to deal with the fact that
  VkComputePipelineCreateInfo::pNext is defined as a const void*,
  which created compiler errors in the previous implementation.

[1] https://bugs.freedesktop.org/show_bug.cgi?id=108875

Bug: dawn:464
Change-Id: I035ee06084fcc964742f0bff4c54cff257c742ae
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/23202
Commit-Queue: Corentin Wallez <cwallez@chromium.org>
Reviewed-by: Austin Eng <enga@chromium.org>
Reviewed-by: Stephen White <senorblanco@chromium.org>
This commit is contained in:
Corentin Wallez 2020-06-16 09:17:48 +00:00 committed by Commit Bot service account
parent 5cef1162b9
commit fefb452f29
3 changed files with 52 additions and 0 deletions

View File

@ -18,6 +18,7 @@
#include "dawn_native/vulkan/FencedDeleter.h" #include "dawn_native/vulkan/FencedDeleter.h"
#include "dawn_native/vulkan/PipelineLayoutVk.h" #include "dawn_native/vulkan/PipelineLayoutVk.h"
#include "dawn_native/vulkan/ShaderModuleVk.h" #include "dawn_native/vulkan/ShaderModuleVk.h"
#include "dawn_native/vulkan/UtilsVulkan.h"
#include "dawn_native/vulkan/VulkanError.h" #include "dawn_native/vulkan/VulkanError.h"
namespace dawn_native { namespace vulkan { namespace dawn_native { namespace vulkan {
@ -49,6 +50,19 @@ namespace dawn_native { namespace vulkan {
createInfo.stage.pSpecializationInfo = nullptr; createInfo.stage.pSpecializationInfo = nullptr;
Device* device = ToBackend(GetDevice()); Device* device = ToBackend(GetDevice());
PNextChainBuilder extChain(&createInfo);
VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroupSizeInfo = {};
uint32_t computeSubgroupSize = device->GetComputeSubgroupSize();
if (computeSubgroupSize != 0u) {
ASSERT(device->GetDeviceInfo().HasExt(DeviceExt::SubgroupSizeControl));
subgroupSizeInfo.requiredSubgroupSize = computeSubgroupSize;
extChain.Add(
&subgroupSizeInfo,
VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT);
}
return CheckVkSuccess( return CheckVkSuccess(
device->fn.CreateComputePipelines(device->GetVkDevice(), ::VK_NULL_HANDLE, 1, device->fn.CreateComputePipelines(device->GetVkDevice(), ::VK_NULL_HANDLE, 1,
&createInfo, nullptr, &*mHandle), &createInfo, nullptr, &*mHandle),

View File

@ -305,6 +305,8 @@ namespace dawn_native { namespace vulkan {
// Always request all the features from VK_EXT_subgroup_size_control when available. // Always request all the features from VK_EXT_subgroup_size_control when available.
usedKnobs.subgroupSizeControlFeatures = mDeviceInfo.subgroupSizeControlFeatures; usedKnobs.subgroupSizeControlFeatures = mDeviceInfo.subgroupSizeControlFeatures;
featuresChain.Add(&usedKnobs.subgroupSizeControlFeatures); featuresChain.Add(&usedKnobs.subgroupSizeControlFeatures);
mComputeSubgroupSize = FindComputeSubgroupSize();
} }
if (IsExtensionEnabled(Extension::TextureCompressionBC)) { if (IsExtensionEnabled(Extension::TextureCompressionBC)) {
@ -386,6 +388,32 @@ namespace dawn_native { namespace vulkan {
return usedKnobs; return usedKnobs;
} }
uint32_t Device::FindComputeSubgroupSize() const {
if (!mDeviceInfo.HasExt(DeviceExt::SubgroupSizeControl)) {
return 0;
}
const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT& ext =
mDeviceInfo.subgroupSizeControlProperties;
if (ext.minSubgroupSize == ext.maxSubgroupSize) {
return 0;
}
// At the moment, only Intel devices support varying subgroup sizes and 16, which is the
// next value after the minimum of 8, is the sweet spot according to [1]. Hence the
// following heuristics, which may need to be adjusted in the future for other
// architectures, or if a specific API is added to let client code select the size..
//
// [1] https://bugs.freedesktop.org/show_bug.cgi?id=108875
uint32_t subgroupSize = ext.minSubgroupSize * 2;
if (subgroupSize <= ext.maxSubgroupSize) {
return subgroupSize;
} else {
return ext.minSubgroupSize;
}
}
void Device::GatherQueueFromDevice() { void Device::GatherQueueFromDevice() {
fn.GetDeviceQueue(mVkDevice, mQueueFamily, 0, &mQueue); fn.GetDeviceQueue(mVkDevice, mQueueFamily, 0, &mQueue);
} }
@ -712,6 +740,10 @@ namespace dawn_native { namespace vulkan {
return mResourceMemoryAllocator.get(); return mResourceMemoryAllocator.get();
} }
uint32_t Device::GetComputeSubgroupSize() const {
return mComputeSubgroupSize;
}
MaybeError Device::WaitForIdleForDestruction() { MaybeError Device::WaitForIdleForDestruction() {
// Immediately tag the recording context as unused so we don't try to submit it in Tick. // Immediately tag the recording context as unused so we don't try to submit it in Tick.
// Move the mRecordingContext.used to mUnusedCommands so it can be cleaned up in // Move the mRecordingContext.used to mUnusedCommands so it can be cleaned up in

View File

@ -96,6 +96,10 @@ namespace dawn_native { namespace vulkan {
ResourceMemoryAllocator* GetResourceMemoryAllocatorForTesting() const; ResourceMemoryAllocator* GetResourceMemoryAllocatorForTesting() const;
// Return the fixed subgroup size to use for compute shaders on this device or 0 if none
// needs to be set.
uint32_t GetComputeSubgroupSize() const;
private: private:
Device(Adapter* adapter, const DeviceDescriptor* descriptor); Device(Adapter* adapter, const DeviceDescriptor* descriptor);
@ -130,6 +134,7 @@ namespace dawn_native { namespace vulkan {
ResultOrError<VulkanDeviceKnobs> CreateDevice(VkPhysicalDevice physicalDevice); ResultOrError<VulkanDeviceKnobs> CreateDevice(VkPhysicalDevice physicalDevice);
void GatherQueueFromDevice(); void GatherQueueFromDevice();
uint32_t FindComputeSubgroupSize() const;
void InitTogglesFromDriver(); void InitTogglesFromDriver();
void ApplyDepth24PlusS8Toggle(); void ApplyDepth24PlusS8Toggle();
@ -144,6 +149,7 @@ namespace dawn_native { namespace vulkan {
VkDevice mVkDevice = VK_NULL_HANDLE; VkDevice mVkDevice = VK_NULL_HANDLE;
uint32_t mQueueFamily = 0; uint32_t mQueueFamily = 0;
VkQueue mQueue = VK_NULL_HANDLE; VkQueue mQueue = VK_NULL_HANDLE;
uint32_t mComputeSubgroupSize = 0;
SerialQueue<Ref<BindGroupLayout>> mBindGroupLayoutsPendingDeallocation; SerialQueue<Ref<BindGroupLayout>> mBindGroupLayoutsPendingDeallocation;
std::unique_ptr<FencedDeleter> mDeleter; std::unique_ptr<FencedDeleter> mDeleter;