Add extra layers to work around array texture corruption issue

This change works around the array texture corruption issue for
some Windows Intel devices on some old drivers. The number of
extra layer for a given texture is precisely calculated according
to texture memory layout on these devices.

It also adds one more test: clearTexture.

Bug: dawn:949, dawn:1507

Change-Id: I0b2a6497c77f3edf45c49220517e13be76c6b608
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/103120
Reviewed-by: Austin Eng <enga@chromium.org>
Kokoro: Kokoro <noreply+kokoro@google.com>
Commit-Queue: Yunchao He <yunchao.he@intel.com>
This commit is contained in:
Yunchao He 2022-09-30 17:57:38 +00:00 committed by Dawn LUCI CQ
parent d673ce3ca8
commit 7a4072e3e3
8 changed files with 148 additions and 13 deletions

View File

@ -155,7 +155,7 @@ MaybeError Buffer::Initialize(bool mappedAtCreation) {
DAWN_TRY_ASSIGN( DAWN_TRY_ASSIGN(
mResourceAllocation, mResourceAllocation,
ToBackend(GetDevice())->AllocateMemory(heapType, resourceDescriptor, bufferUsage)); ToBackend(GetDevice())->AllocateMemory(heapType, resourceDescriptor, bufferUsage, 0));
SetLabelImpl(); SetLabelImpl();

View File

@ -541,8 +541,11 @@ void Device::DeallocateMemory(ResourceHeapAllocation& allocation) {
ResultOrError<ResourceHeapAllocation> Device::AllocateMemory( ResultOrError<ResourceHeapAllocation> Device::AllocateMemory(
D3D12_HEAP_TYPE heapType, D3D12_HEAP_TYPE heapType,
const D3D12_RESOURCE_DESC& resourceDescriptor, const D3D12_RESOURCE_DESC& resourceDescriptor,
D3D12_RESOURCE_STATES initialUsage) { D3D12_RESOURCE_STATES initialUsage,
return mResourceAllocatorManager->AllocateMemory(heapType, resourceDescriptor, initialUsage); uint32_t formatBytesPerBlock) {
// formatBytesPerBlock is needed only for color non-compressed formats for a workaround.
return mResourceAllocatorManager->AllocateMemory(heapType, resourceDescriptor, initialUsage,
formatBytesPerBlock);
} }
std::unique_ptr<ExternalImageDXGIImpl> Device::CreateExternalImageDXGIImpl( std::unique_ptr<ExternalImageDXGIImpl> Device::CreateExternalImageDXGIImpl(

View File

@ -115,7 +115,8 @@ class Device final : public DeviceBase {
ResultOrError<ResourceHeapAllocation> AllocateMemory( ResultOrError<ResourceHeapAllocation> AllocateMemory(
D3D12_HEAP_TYPE heapType, D3D12_HEAP_TYPE heapType,
const D3D12_RESOURCE_DESC& resourceDescriptor, const D3D12_RESOURCE_DESC& resourceDescriptor,
D3D12_RESOURCE_STATES initialUsage); D3D12_RESOURCE_STATES initialUsage,
uint32_t formatBytesPerBlock);
void DeallocateMemory(ResourceHeapAllocation& allocation); void DeallocateMemory(ResourceHeapAllocation& allocation);

View File

@ -14,6 +14,7 @@
#include "dawn/native/d3d12/ResourceAllocatorManagerD3D12.h" #include "dawn/native/d3d12/ResourceAllocatorManagerD3D12.h"
#include <algorithm>
#include <limits> #include <limits>
#include <utility> #include <utility>
@ -177,6 +178,113 @@ bool IsClearValueOptimizable(DeviceBase* device, const D3D12_RESOURCE_DESC& reso
D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) != 0; D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) != 0;
} }
uint32_t GetColumnPitch(uint32_t baseHeight, uint32_t mipLevelCount) {
// This function returns the number of rows of block for a single layer with all mipmaps.
//
// Below is a simple diagram about texture memory layout for one single layer of a mipmap
// texture. For details about texture memory layout on Intel Gen12 GPU, read page 78 at
// https://01.org/sites/default/files/documentation/intel-gfx-prm-osrc-tgl-vol05-memory_data_formats.pdf.
// ---------------------------------------------- ---
// | | |
// | |
// | |
// | |
// | LOD 0 |
// | |
// | |
// | | column pitch (aka QPitch)
// | |
// | |
// ----------------------------------------------
// | | |
// | | LOD2 |
// | LOD 1 |---------
// | | LOD3 |
// | |-------
// | | .
// ---------------------- . |
// . ---
uint32_t level1Height = 0;
uint32_t level2ToTailHeight = 0;
if (mipLevelCount >= 2) {
level1Height = std::max(baseHeight >> 1, 1u);
for (uint32_t level = 2; level < mipLevelCount; ++level) {
level2ToTailHeight += std::max(baseHeight >> level, 1u);
}
}
// The height of level 2 to tail (or max) can be greater than the height of level 1. For
// example, if the single layer's dimension is 16x4 and it has full mipmaps, then there are 5
// levels: 16x4, 8x2, 4x1, 2x1, 1x1. So level1Height is 2, while level2ToTailHeight is 1+1+1
// = 3.
uint32_t columnPitch = baseHeight + std::max(level1Height, level2ToTailHeight);
// The number of rows of block for a texture must be a multiple of 4.
return Align(columnPitch, 4);
}
uint32_t ComputeExtraArraySizeForIntelGen12(uint32_t width,
uint32_t height,
uint32_t arrayLayerCount,
uint32_t mipLevelCount,
uint32_t sampleCount,
uint32_t formatBytesPerBlock) {
// For details about texture memory layout on Intel Gen12 GPU, read
// https://01.org/sites/default/files/documentation/intel-gfx-prm-osrc-tgl-vol05-memory_data_formats.pdf.
// - Texture memory layout: from <Surface Memory Organizations> to
// <Surface Padding Requirement>.
// - Tile-based memory: the entire section of <Address Tiling Function Introduction>.
constexpr uint32_t kPageSize = 4 * 1024;
constexpr uint32_t kTileSize = 16 * kPageSize;
constexpr uint32_t kTileHeight = 128;
constexpr uint32_t kTileWidth = kTileSize / kTileHeight;
constexpr uint32_t kLinearAlignment = 4 * kPageSize;
uint64_t layerxSamples = arrayLayerCount * sampleCount;
if (layerxSamples <= 1) {
return 0;
}
uint32_t columnPitch = GetColumnPitch(height, mipLevelCount);
uint64_t totalWidth = width * formatBytesPerBlock;
uint64_t totalHeight = columnPitch * layerxSamples;
// Texture should be aligned on both tile width (512 bytes) and tile height (128 rows) on Intel
// Gen12 GPU
uint32_t mainTileCols = Align(totalWidth, kTileWidth) / kTileWidth;
uint32_t mainTileRows = Align(totalHeight, kTileHeight) / kTileHeight;
uint64_t mainTileCount = mainTileCols * mainTileRows;
// There is a bug in Intel old drivers to compute the auxiliary memory size (auxSize) of the
// texture, which is calculated from the main memory size (mainSize) of the texture. Note that
// memory allocation for mainSize itself is correct. But during memory allocation for auxSize,
// it re-caculated mainSize and did it in a wrong way. The incorrect algorithm doesn't respect
// alignment requirements from tile-based texture memory layout. It just simple aligned to a
// constant value (16K) for each sample and layer.
uint64_t expectedMainSize = mainTileCount * kTileSize;
uint64_t actualMainSize = Align(columnPitch * totalWidth, kLinearAlignment) * layerxSamples;
// If the incorrect mainSize calculation lead to less-than-expected auxSize, texture corruption
// is very likely to happen for any texture access like texture copy, rendering, sampling, etc.
// So we have to allocate a few more extra layers to offset the less-than-expected auxSize.
// However, it is fine if the incorrect mainSize calculation doesn't introduce less auxSize. For
// example, if correct mainSize is 3.8M, it requires 4 pages of auxSize (16K). Any incorrect
// mainSize between 3.0+ M and 4.0M also requires 16K auxSize according to the calculation:
// auxSize = Align(mainSize >> 8, kPageSize). And greater auxSize is also fine. But if mainSize
// is less than 3.0M, its auxSize will be less than 16K and hence texture corruption is caused.
uint64_t expectedAuxSize = Align(expectedMainSize >> 8, kPageSize);
uint64_t actualAuxSize = Align(actualMainSize >> 8, kPageSize);
if (actualAuxSize < expectedAuxSize) {
uint64_t actualMainSizePerLayer = actualMainSize / arrayLayerCount;
return (expectedMainSize - actualMainSize + actualMainSizePerLayer - 1) /
actualMainSizePerLayer;
}
return 0;
}
} // namespace } // namespace
ResourceAllocatorManager::ResourceAllocatorManager(Device* device) : mDevice(device) { ResourceAllocatorManager::ResourceAllocatorManager(Device* device) : mDevice(device) {
@ -199,7 +307,8 @@ ResourceAllocatorManager::ResourceAllocatorManager(Device* device) : mDevice(dev
ResultOrError<ResourceHeapAllocation> ResourceAllocatorManager::AllocateMemory( ResultOrError<ResourceHeapAllocation> ResourceAllocatorManager::AllocateMemory(
D3D12_HEAP_TYPE heapType, D3D12_HEAP_TYPE heapType,
const D3D12_RESOURCE_DESC& resourceDescriptor, const D3D12_RESOURCE_DESC& resourceDescriptor,
D3D12_RESOURCE_STATES initialUsage) { D3D12_RESOURCE_STATES initialUsage,
uint32_t formatBytesPerBlock) {
// In order to suppress a warning in the D3D12 debug layer, we need to specify an // In order to suppress a warning in the D3D12 debug layer, we need to specify an
// optimized clear value. As there are no negative consequences when picking a mismatched // optimized clear value. As there are no negative consequences when picking a mismatched
// clear value, we use zero as the optimized clear value. This also enables fast clears on // clear value, we use zero as the optimized clear value. This also enables fast clears on
@ -211,6 +320,18 @@ ResultOrError<ResourceHeapAllocation> ResourceAllocatorManager::AllocateMemory(
optimizedClearValue = &zero; optimizedClearValue = &zero;
} }
// If we are allocating memory for a 2D array texture on D3D12 backend, we need to allocate
// extra layers on some Intel Gen12 devices, see crbug.com/dawn/949 for details.
D3D12_RESOURCE_DESC revisedDescriptor = resourceDescriptor;
if (mDevice->IsToggleEnabled(Toggle::D3D12AllocateExtraMemoryFor2DArrayTexture) &&
resourceDescriptor.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE2D &&
resourceDescriptor.DepthOrArraySize > 1) {
revisedDescriptor.DepthOrArraySize += ComputeExtraArraySizeForIntelGen12(
resourceDescriptor.Width, resourceDescriptor.Height,
resourceDescriptor.DepthOrArraySize, resourceDescriptor.MipLevels,
resourceDescriptor.SampleDesc.Count, formatBytesPerBlock);
}
// TODO(crbug.com/dawn/849): Conditionally disable sub-allocation. // TODO(crbug.com/dawn/849): Conditionally disable sub-allocation.
// For very large resources, there is no benefit to suballocate. // For very large resources, there is no benefit to suballocate.
// For very small resources, it is inefficent to suballocate given the min. heap // For very small resources, it is inefficent to suballocate given the min. heap
@ -218,7 +339,7 @@ ResultOrError<ResourceHeapAllocation> ResourceAllocatorManager::AllocateMemory(
// Attempt to satisfy the request using sub-allocation (placed resource in a heap). // Attempt to satisfy the request using sub-allocation (placed resource in a heap).
if (!mDevice->IsToggleEnabled(Toggle::DisableResourceSuballocation)) { if (!mDevice->IsToggleEnabled(Toggle::DisableResourceSuballocation)) {
ResourceHeapAllocation subAllocation; ResourceHeapAllocation subAllocation;
DAWN_TRY_ASSIGN(subAllocation, CreatePlacedResource(heapType, resourceDescriptor, DAWN_TRY_ASSIGN(subAllocation, CreatePlacedResource(heapType, revisedDescriptor,
optimizedClearValue, initialUsage)); optimizedClearValue, initialUsage));
if (subAllocation.GetInfo().mMethod != AllocationMethod::kInvalid) { if (subAllocation.GetInfo().mMethod != AllocationMethod::kInvalid) {
return std::move(subAllocation); return std::move(subAllocation);
@ -227,7 +348,7 @@ ResultOrError<ResourceHeapAllocation> ResourceAllocatorManager::AllocateMemory(
// If sub-allocation fails, fall-back to direct allocation (committed resource). // If sub-allocation fails, fall-back to direct allocation (committed resource).
ResourceHeapAllocation directAllocation; ResourceHeapAllocation directAllocation;
DAWN_TRY_ASSIGN(directAllocation, CreateCommittedResource(heapType, resourceDescriptor, DAWN_TRY_ASSIGN(directAllocation, CreateCommittedResource(heapType, revisedDescriptor,
optimizedClearValue, initialUsage)); optimizedClearValue, initialUsage));
if (directAllocation.GetInfo().mMethod != AllocationMethod::kInvalid) { if (directAllocation.GetInfo().mMethod != AllocationMethod::kInvalid) {
return std::move(directAllocation); return std::move(directAllocation);

View File

@ -63,7 +63,8 @@ class ResourceAllocatorManager {
ResultOrError<ResourceHeapAllocation> AllocateMemory( ResultOrError<ResourceHeapAllocation> AllocateMemory(
D3D12_HEAP_TYPE heapType, D3D12_HEAP_TYPE heapType,
const D3D12_RESOURCE_DESC& resourceDescriptor, const D3D12_RESOURCE_DESC& resourceDescriptor,
D3D12_RESOURCE_STATES initialUsage); D3D12_RESOURCE_STATES initialUsage,
uint32_t formatBytesPerBlock);
void DeallocateMemory(ResourceHeapAllocation& allocation); void DeallocateMemory(ResourceHeapAllocation& allocation);

View File

@ -39,7 +39,7 @@ MaybeError StagingBuffer::Initialize() {
resourceDescriptor.Flags = D3D12_RESOURCE_FLAG_NONE; resourceDescriptor.Flags = D3D12_RESOURCE_FLAG_NONE;
DAWN_TRY_ASSIGN(mUploadHeap, mDevice->AllocateMemory(D3D12_HEAP_TYPE_UPLOAD, resourceDescriptor, DAWN_TRY_ASSIGN(mUploadHeap, mDevice->AllocateMemory(D3D12_HEAP_TYPE_UPLOAD, resourceDescriptor,
D3D12_RESOURCE_STATE_GENERIC_READ)); D3D12_RESOURCE_STATE_GENERIC_READ, 0));
// The mapped buffer can be accessed at any time, so it must be locked to ensure it is never // The mapped buffer can be accessed at any time, so it must be locked to ensure it is never
// evicted. This buffer should already have been made resident when it was created. // evicted. This buffer should already have been made resident when it was created.

View File

@ -607,9 +607,13 @@ MaybeError Texture::InitializeAsInternalTexture() {
resourceDescriptor.Flags = D3D12ResourceFlags(GetInternalUsage(), GetFormat()); resourceDescriptor.Flags = D3D12ResourceFlags(GetInternalUsage(), GetFormat());
mD3D12ResourceFlags = resourceDescriptor.Flags; mD3D12ResourceFlags = resourceDescriptor.Flags;
uint32_t bytesPerBlock = 0;
if (GetFormat().IsColor()) {
bytesPerBlock = GetFormat().GetAspectInfo(wgpu::TextureAspect::All).block.byteSize;
}
DAWN_TRY_ASSIGN(mResourceAllocation, DAWN_TRY_ASSIGN(mResourceAllocation,
device->AllocateMemory(D3D12_HEAP_TYPE_DEFAULT, resourceDescriptor, device->AllocateMemory(D3D12_HEAP_TYPE_DEFAULT, resourceDescriptor,
D3D12_RESOURCE_STATE_COMMON)); D3D12_RESOURCE_STATE_COMMON, bytesPerBlock));
SetLabelImpl(); SetLabelImpl();

View File

@ -28,6 +28,7 @@ constexpr wgpu::TextureFormat kFormat = wgpu::TextureFormat::RGBA8Unorm;
namespace { namespace {
enum class WriteType { enum class WriteType {
ClearTexture,
WriteTexture, // Write the tested texture via writeTexture API WriteTexture, // Write the tested texture via writeTexture API
B2TCopy, // Write the tested texture via B2T copy B2TCopy, // Write the tested texture via B2T copy
RenderConstant, // Write the tested texture via rendering the whole rectangle with solid color RenderConstant, // Write the tested texture via rendering the whole rectangle with solid color
@ -40,6 +41,9 @@ enum class WriteType {
std::ostream& operator<<(std::ostream& o, WriteType writeType) { std::ostream& operator<<(std::ostream& o, WriteType writeType) {
switch (writeType) { switch (writeType) {
case WriteType::ClearTexture:
o << "ClearTexture";
break;
case WriteType::WriteTexture: case WriteType::WriteTexture:
o << "WriteTexture"; o << "WriteTexture";
break; break;
@ -109,7 +113,7 @@ class TextureCorruptionTests : public DawnTestWithParams<TextureCorruptionTestsP
// lead to precision loss or rendering a solid color is easier to implement and // lead to precision loss or rendering a solid color is easier to implement and
// compare. // compare.
data[i * elementNumPerRow + j] = 0xFFFFFFFF; data[i * elementNumPerRow + j] = 0xFFFFFFFF;
} else { } else if (type != WriteType::ClearTexture) {
data[i * elementNumPerRow + j] = srcValue; data[i * elementNumPerRow + j] = srcValue;
srcValue++; srcValue++;
} }
@ -275,5 +279,6 @@ DAWN_INSTANTIATE_TEST_P(TextureCorruptionTests,
{D3D12Backend()}, {D3D12Backend()},
{100u, 200u, 300u, 400u, 500u, 600u, 700u, 800u, 900u, 1000u, 1200u}, {100u, 200u, 300u, 400u, 500u, 600u, 700u, 800u, 900u, 1000u, 1200u},
{100u, 200u}, {100u, 200u},
{WriteType::WriteTexture, WriteType::B2TCopy, WriteType::RenderConstant, {WriteType::ClearTexture, WriteType::WriteTexture, WriteType::B2TCopy,
WriteType::RenderFromTextureSample, WriteType::RenderFromTextureLoad}); WriteType::RenderConstant, WriteType::RenderFromTextureSample,
WriteType::RenderFromTextureLoad});