mirror of
https://github.com/encounter/dawn-cmake.git
synced 2025-07-03 19:55:56 +00:00
Add extra layers to work around array texture corruption issue
This change works around the array texture corruption issue for some Windows Intel devices on some old drivers. The number of extra layer for a given texture is precisely calculated according to texture memory layout on these devices. It also adds one more test: clearTexture. Bug: dawn:949, dawn:1507 Change-Id: I0b2a6497c77f3edf45c49220517e13be76c6b608 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/103120 Reviewed-by: Austin Eng <enga@chromium.org> Kokoro: Kokoro <noreply+kokoro@google.com> Commit-Queue: Yunchao He <yunchao.he@intel.com>
This commit is contained in:
parent
d673ce3ca8
commit
7a4072e3e3
@ -155,7 +155,7 @@ MaybeError Buffer::Initialize(bool mappedAtCreation) {
|
|||||||
|
|
||||||
DAWN_TRY_ASSIGN(
|
DAWN_TRY_ASSIGN(
|
||||||
mResourceAllocation,
|
mResourceAllocation,
|
||||||
ToBackend(GetDevice())->AllocateMemory(heapType, resourceDescriptor, bufferUsage));
|
ToBackend(GetDevice())->AllocateMemory(heapType, resourceDescriptor, bufferUsage, 0));
|
||||||
|
|
||||||
SetLabelImpl();
|
SetLabelImpl();
|
||||||
|
|
||||||
|
@ -541,8 +541,11 @@ void Device::DeallocateMemory(ResourceHeapAllocation& allocation) {
|
|||||||
ResultOrError<ResourceHeapAllocation> Device::AllocateMemory(
|
ResultOrError<ResourceHeapAllocation> Device::AllocateMemory(
|
||||||
D3D12_HEAP_TYPE heapType,
|
D3D12_HEAP_TYPE heapType,
|
||||||
const D3D12_RESOURCE_DESC& resourceDescriptor,
|
const D3D12_RESOURCE_DESC& resourceDescriptor,
|
||||||
D3D12_RESOURCE_STATES initialUsage) {
|
D3D12_RESOURCE_STATES initialUsage,
|
||||||
return mResourceAllocatorManager->AllocateMemory(heapType, resourceDescriptor, initialUsage);
|
uint32_t formatBytesPerBlock) {
|
||||||
|
// formatBytesPerBlock is needed only for color non-compressed formats for a workaround.
|
||||||
|
return mResourceAllocatorManager->AllocateMemory(heapType, resourceDescriptor, initialUsage,
|
||||||
|
formatBytesPerBlock);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<ExternalImageDXGIImpl> Device::CreateExternalImageDXGIImpl(
|
std::unique_ptr<ExternalImageDXGIImpl> Device::CreateExternalImageDXGIImpl(
|
||||||
|
@ -115,7 +115,8 @@ class Device final : public DeviceBase {
|
|||||||
ResultOrError<ResourceHeapAllocation> AllocateMemory(
|
ResultOrError<ResourceHeapAllocation> AllocateMemory(
|
||||||
D3D12_HEAP_TYPE heapType,
|
D3D12_HEAP_TYPE heapType,
|
||||||
const D3D12_RESOURCE_DESC& resourceDescriptor,
|
const D3D12_RESOURCE_DESC& resourceDescriptor,
|
||||||
D3D12_RESOURCE_STATES initialUsage);
|
D3D12_RESOURCE_STATES initialUsage,
|
||||||
|
uint32_t formatBytesPerBlock);
|
||||||
|
|
||||||
void DeallocateMemory(ResourceHeapAllocation& allocation);
|
void DeallocateMemory(ResourceHeapAllocation& allocation);
|
||||||
|
|
||||||
|
@ -14,6 +14,7 @@
|
|||||||
|
|
||||||
#include "dawn/native/d3d12/ResourceAllocatorManagerD3D12.h"
|
#include "dawn/native/d3d12/ResourceAllocatorManagerD3D12.h"
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
|
||||||
@ -177,6 +178,113 @@ bool IsClearValueOptimizable(DeviceBase* device, const D3D12_RESOURCE_DESC& reso
|
|||||||
D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) != 0;
|
D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint32_t GetColumnPitch(uint32_t baseHeight, uint32_t mipLevelCount) {
|
||||||
|
// This function returns the number of rows of block for a single layer with all mipmaps.
|
||||||
|
//
|
||||||
|
// Below is a simple diagram about texture memory layout for one single layer of a mipmap
|
||||||
|
// texture. For details about texture memory layout on Intel Gen12 GPU, read page 78 at
|
||||||
|
// https://01.org/sites/default/files/documentation/intel-gfx-prm-osrc-tgl-vol05-memory_data_formats.pdf.
|
||||||
|
// ---------------------------------------------- ---
|
||||||
|
// | | |
|
||||||
|
// | |
|
||||||
|
// | |
|
||||||
|
// | |
|
||||||
|
// | LOD 0 |
|
||||||
|
// | |
|
||||||
|
// | |
|
||||||
|
// | | column pitch (aka QPitch)
|
||||||
|
// | |
|
||||||
|
// | |
|
||||||
|
// ----------------------------------------------
|
||||||
|
// | | |
|
||||||
|
// | | LOD2 |
|
||||||
|
// | LOD 1 |---------
|
||||||
|
// | | LOD3 |
|
||||||
|
// | |-------
|
||||||
|
// | | .
|
||||||
|
// ---------------------- . |
|
||||||
|
// . ---
|
||||||
|
|
||||||
|
uint32_t level1Height = 0;
|
||||||
|
uint32_t level2ToTailHeight = 0;
|
||||||
|
if (mipLevelCount >= 2) {
|
||||||
|
level1Height = std::max(baseHeight >> 1, 1u);
|
||||||
|
|
||||||
|
for (uint32_t level = 2; level < mipLevelCount; ++level) {
|
||||||
|
level2ToTailHeight += std::max(baseHeight >> level, 1u);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// The height of level 2 to tail (or max) can be greater than the height of level 1. For
|
||||||
|
// example, if the single layer's dimension is 16x4 and it has full mipmaps, then there are 5
|
||||||
|
// levels: 16x4, 8x2, 4x1, 2x1, 1x1. So level1Height is 2, while level2ToTailHeight is 1+1+1
|
||||||
|
// = 3.
|
||||||
|
uint32_t columnPitch = baseHeight + std::max(level1Height, level2ToTailHeight);
|
||||||
|
|
||||||
|
// The number of rows of block for a texture must be a multiple of 4.
|
||||||
|
return Align(columnPitch, 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t ComputeExtraArraySizeForIntelGen12(uint32_t width,
|
||||||
|
uint32_t height,
|
||||||
|
uint32_t arrayLayerCount,
|
||||||
|
uint32_t mipLevelCount,
|
||||||
|
uint32_t sampleCount,
|
||||||
|
uint32_t formatBytesPerBlock) {
|
||||||
|
// For details about texture memory layout on Intel Gen12 GPU, read
|
||||||
|
// https://01.org/sites/default/files/documentation/intel-gfx-prm-osrc-tgl-vol05-memory_data_formats.pdf.
|
||||||
|
// - Texture memory layout: from <Surface Memory Organizations> to
|
||||||
|
// <Surface Padding Requirement>.
|
||||||
|
// - Tile-based memory: the entire section of <Address Tiling Function Introduction>.
|
||||||
|
constexpr uint32_t kPageSize = 4 * 1024;
|
||||||
|
constexpr uint32_t kTileSize = 16 * kPageSize;
|
||||||
|
constexpr uint32_t kTileHeight = 128;
|
||||||
|
constexpr uint32_t kTileWidth = kTileSize / kTileHeight;
|
||||||
|
constexpr uint32_t kLinearAlignment = 4 * kPageSize;
|
||||||
|
|
||||||
|
uint64_t layerxSamples = arrayLayerCount * sampleCount;
|
||||||
|
|
||||||
|
if (layerxSamples <= 1) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t columnPitch = GetColumnPitch(height, mipLevelCount);
|
||||||
|
|
||||||
|
uint64_t totalWidth = width * formatBytesPerBlock;
|
||||||
|
uint64_t totalHeight = columnPitch * layerxSamples;
|
||||||
|
|
||||||
|
// Texture should be aligned on both tile width (512 bytes) and tile height (128 rows) on Intel
|
||||||
|
// Gen12 GPU
|
||||||
|
uint32_t mainTileCols = Align(totalWidth, kTileWidth) / kTileWidth;
|
||||||
|
uint32_t mainTileRows = Align(totalHeight, kTileHeight) / kTileHeight;
|
||||||
|
uint64_t mainTileCount = mainTileCols * mainTileRows;
|
||||||
|
|
||||||
|
// There is a bug in Intel old drivers to compute the auxiliary memory size (auxSize) of the
|
||||||
|
// texture, which is calculated from the main memory size (mainSize) of the texture. Note that
|
||||||
|
// memory allocation for mainSize itself is correct. But during memory allocation for auxSize,
|
||||||
|
// it re-caculated mainSize and did it in a wrong way. The incorrect algorithm doesn't respect
|
||||||
|
// alignment requirements from tile-based texture memory layout. It just simple aligned to a
|
||||||
|
// constant value (16K) for each sample and layer.
|
||||||
|
uint64_t expectedMainSize = mainTileCount * kTileSize;
|
||||||
|
uint64_t actualMainSize = Align(columnPitch * totalWidth, kLinearAlignment) * layerxSamples;
|
||||||
|
|
||||||
|
// If the incorrect mainSize calculation lead to less-than-expected auxSize, texture corruption
|
||||||
|
// is very likely to happen for any texture access like texture copy, rendering, sampling, etc.
|
||||||
|
// So we have to allocate a few more extra layers to offset the less-than-expected auxSize.
|
||||||
|
// However, it is fine if the incorrect mainSize calculation doesn't introduce less auxSize. For
|
||||||
|
// example, if correct mainSize is 3.8M, it requires 4 pages of auxSize (16K). Any incorrect
|
||||||
|
// mainSize between 3.0+ M and 4.0M also requires 16K auxSize according to the calculation:
|
||||||
|
// auxSize = Align(mainSize >> 8, kPageSize). And greater auxSize is also fine. But if mainSize
|
||||||
|
// is less than 3.0M, its auxSize will be less than 16K and hence texture corruption is caused.
|
||||||
|
uint64_t expectedAuxSize = Align(expectedMainSize >> 8, kPageSize);
|
||||||
|
uint64_t actualAuxSize = Align(actualMainSize >> 8, kPageSize);
|
||||||
|
if (actualAuxSize < expectedAuxSize) {
|
||||||
|
uint64_t actualMainSizePerLayer = actualMainSize / arrayLayerCount;
|
||||||
|
return (expectedMainSize - actualMainSize + actualMainSizePerLayer - 1) /
|
||||||
|
actualMainSizePerLayer;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
ResourceAllocatorManager::ResourceAllocatorManager(Device* device) : mDevice(device) {
|
ResourceAllocatorManager::ResourceAllocatorManager(Device* device) : mDevice(device) {
|
||||||
@ -199,7 +307,8 @@ ResourceAllocatorManager::ResourceAllocatorManager(Device* device) : mDevice(dev
|
|||||||
ResultOrError<ResourceHeapAllocation> ResourceAllocatorManager::AllocateMemory(
|
ResultOrError<ResourceHeapAllocation> ResourceAllocatorManager::AllocateMemory(
|
||||||
D3D12_HEAP_TYPE heapType,
|
D3D12_HEAP_TYPE heapType,
|
||||||
const D3D12_RESOURCE_DESC& resourceDescriptor,
|
const D3D12_RESOURCE_DESC& resourceDescriptor,
|
||||||
D3D12_RESOURCE_STATES initialUsage) {
|
D3D12_RESOURCE_STATES initialUsage,
|
||||||
|
uint32_t formatBytesPerBlock) {
|
||||||
// In order to suppress a warning in the D3D12 debug layer, we need to specify an
|
// In order to suppress a warning in the D3D12 debug layer, we need to specify an
|
||||||
// optimized clear value. As there are no negative consequences when picking a mismatched
|
// optimized clear value. As there are no negative consequences when picking a mismatched
|
||||||
// clear value, we use zero as the optimized clear value. This also enables fast clears on
|
// clear value, we use zero as the optimized clear value. This also enables fast clears on
|
||||||
@ -211,6 +320,18 @@ ResultOrError<ResourceHeapAllocation> ResourceAllocatorManager::AllocateMemory(
|
|||||||
optimizedClearValue = &zero;
|
optimizedClearValue = &zero;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If we are allocating memory for a 2D array texture on D3D12 backend, we need to allocate
|
||||||
|
// extra layers on some Intel Gen12 devices, see crbug.com/dawn/949 for details.
|
||||||
|
D3D12_RESOURCE_DESC revisedDescriptor = resourceDescriptor;
|
||||||
|
if (mDevice->IsToggleEnabled(Toggle::D3D12AllocateExtraMemoryFor2DArrayTexture) &&
|
||||||
|
resourceDescriptor.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE2D &&
|
||||||
|
resourceDescriptor.DepthOrArraySize > 1) {
|
||||||
|
revisedDescriptor.DepthOrArraySize += ComputeExtraArraySizeForIntelGen12(
|
||||||
|
resourceDescriptor.Width, resourceDescriptor.Height,
|
||||||
|
resourceDescriptor.DepthOrArraySize, resourceDescriptor.MipLevels,
|
||||||
|
resourceDescriptor.SampleDesc.Count, formatBytesPerBlock);
|
||||||
|
}
|
||||||
|
|
||||||
// TODO(crbug.com/dawn/849): Conditionally disable sub-allocation.
|
// TODO(crbug.com/dawn/849): Conditionally disable sub-allocation.
|
||||||
// For very large resources, there is no benefit to suballocate.
|
// For very large resources, there is no benefit to suballocate.
|
||||||
// For very small resources, it is inefficent to suballocate given the min. heap
|
// For very small resources, it is inefficent to suballocate given the min. heap
|
||||||
@ -218,7 +339,7 @@ ResultOrError<ResourceHeapAllocation> ResourceAllocatorManager::AllocateMemory(
|
|||||||
// Attempt to satisfy the request using sub-allocation (placed resource in a heap).
|
// Attempt to satisfy the request using sub-allocation (placed resource in a heap).
|
||||||
if (!mDevice->IsToggleEnabled(Toggle::DisableResourceSuballocation)) {
|
if (!mDevice->IsToggleEnabled(Toggle::DisableResourceSuballocation)) {
|
||||||
ResourceHeapAllocation subAllocation;
|
ResourceHeapAllocation subAllocation;
|
||||||
DAWN_TRY_ASSIGN(subAllocation, CreatePlacedResource(heapType, resourceDescriptor,
|
DAWN_TRY_ASSIGN(subAllocation, CreatePlacedResource(heapType, revisedDescriptor,
|
||||||
optimizedClearValue, initialUsage));
|
optimizedClearValue, initialUsage));
|
||||||
if (subAllocation.GetInfo().mMethod != AllocationMethod::kInvalid) {
|
if (subAllocation.GetInfo().mMethod != AllocationMethod::kInvalid) {
|
||||||
return std::move(subAllocation);
|
return std::move(subAllocation);
|
||||||
@ -227,7 +348,7 @@ ResultOrError<ResourceHeapAllocation> ResourceAllocatorManager::AllocateMemory(
|
|||||||
|
|
||||||
// If sub-allocation fails, fall-back to direct allocation (committed resource).
|
// If sub-allocation fails, fall-back to direct allocation (committed resource).
|
||||||
ResourceHeapAllocation directAllocation;
|
ResourceHeapAllocation directAllocation;
|
||||||
DAWN_TRY_ASSIGN(directAllocation, CreateCommittedResource(heapType, resourceDescriptor,
|
DAWN_TRY_ASSIGN(directAllocation, CreateCommittedResource(heapType, revisedDescriptor,
|
||||||
optimizedClearValue, initialUsage));
|
optimizedClearValue, initialUsage));
|
||||||
if (directAllocation.GetInfo().mMethod != AllocationMethod::kInvalid) {
|
if (directAllocation.GetInfo().mMethod != AllocationMethod::kInvalid) {
|
||||||
return std::move(directAllocation);
|
return std::move(directAllocation);
|
||||||
|
@ -63,7 +63,8 @@ class ResourceAllocatorManager {
|
|||||||
ResultOrError<ResourceHeapAllocation> AllocateMemory(
|
ResultOrError<ResourceHeapAllocation> AllocateMemory(
|
||||||
D3D12_HEAP_TYPE heapType,
|
D3D12_HEAP_TYPE heapType,
|
||||||
const D3D12_RESOURCE_DESC& resourceDescriptor,
|
const D3D12_RESOURCE_DESC& resourceDescriptor,
|
||||||
D3D12_RESOURCE_STATES initialUsage);
|
D3D12_RESOURCE_STATES initialUsage,
|
||||||
|
uint32_t formatBytesPerBlock);
|
||||||
|
|
||||||
void DeallocateMemory(ResourceHeapAllocation& allocation);
|
void DeallocateMemory(ResourceHeapAllocation& allocation);
|
||||||
|
|
||||||
|
@ -39,7 +39,7 @@ MaybeError StagingBuffer::Initialize() {
|
|||||||
resourceDescriptor.Flags = D3D12_RESOURCE_FLAG_NONE;
|
resourceDescriptor.Flags = D3D12_RESOURCE_FLAG_NONE;
|
||||||
|
|
||||||
DAWN_TRY_ASSIGN(mUploadHeap, mDevice->AllocateMemory(D3D12_HEAP_TYPE_UPLOAD, resourceDescriptor,
|
DAWN_TRY_ASSIGN(mUploadHeap, mDevice->AllocateMemory(D3D12_HEAP_TYPE_UPLOAD, resourceDescriptor,
|
||||||
D3D12_RESOURCE_STATE_GENERIC_READ));
|
D3D12_RESOURCE_STATE_GENERIC_READ, 0));
|
||||||
|
|
||||||
// The mapped buffer can be accessed at any time, so it must be locked to ensure it is never
|
// The mapped buffer can be accessed at any time, so it must be locked to ensure it is never
|
||||||
// evicted. This buffer should already have been made resident when it was created.
|
// evicted. This buffer should already have been made resident when it was created.
|
||||||
|
@ -607,9 +607,13 @@ MaybeError Texture::InitializeAsInternalTexture() {
|
|||||||
resourceDescriptor.Flags = D3D12ResourceFlags(GetInternalUsage(), GetFormat());
|
resourceDescriptor.Flags = D3D12ResourceFlags(GetInternalUsage(), GetFormat());
|
||||||
mD3D12ResourceFlags = resourceDescriptor.Flags;
|
mD3D12ResourceFlags = resourceDescriptor.Flags;
|
||||||
|
|
||||||
|
uint32_t bytesPerBlock = 0;
|
||||||
|
if (GetFormat().IsColor()) {
|
||||||
|
bytesPerBlock = GetFormat().GetAspectInfo(wgpu::TextureAspect::All).block.byteSize;
|
||||||
|
}
|
||||||
DAWN_TRY_ASSIGN(mResourceAllocation,
|
DAWN_TRY_ASSIGN(mResourceAllocation,
|
||||||
device->AllocateMemory(D3D12_HEAP_TYPE_DEFAULT, resourceDescriptor,
|
device->AllocateMemory(D3D12_HEAP_TYPE_DEFAULT, resourceDescriptor,
|
||||||
D3D12_RESOURCE_STATE_COMMON));
|
D3D12_RESOURCE_STATE_COMMON, bytesPerBlock));
|
||||||
|
|
||||||
SetLabelImpl();
|
SetLabelImpl();
|
||||||
|
|
||||||
|
@ -28,6 +28,7 @@ constexpr wgpu::TextureFormat kFormat = wgpu::TextureFormat::RGBA8Unorm;
|
|||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
enum class WriteType {
|
enum class WriteType {
|
||||||
|
ClearTexture,
|
||||||
WriteTexture, // Write the tested texture via writeTexture API
|
WriteTexture, // Write the tested texture via writeTexture API
|
||||||
B2TCopy, // Write the tested texture via B2T copy
|
B2TCopy, // Write the tested texture via B2T copy
|
||||||
RenderConstant, // Write the tested texture via rendering the whole rectangle with solid color
|
RenderConstant, // Write the tested texture via rendering the whole rectangle with solid color
|
||||||
@ -40,6 +41,9 @@ enum class WriteType {
|
|||||||
|
|
||||||
std::ostream& operator<<(std::ostream& o, WriteType writeType) {
|
std::ostream& operator<<(std::ostream& o, WriteType writeType) {
|
||||||
switch (writeType) {
|
switch (writeType) {
|
||||||
|
case WriteType::ClearTexture:
|
||||||
|
o << "ClearTexture";
|
||||||
|
break;
|
||||||
case WriteType::WriteTexture:
|
case WriteType::WriteTexture:
|
||||||
o << "WriteTexture";
|
o << "WriteTexture";
|
||||||
break;
|
break;
|
||||||
@ -109,7 +113,7 @@ class TextureCorruptionTests : public DawnTestWithParams<TextureCorruptionTestsP
|
|||||||
// lead to precision loss or rendering a solid color is easier to implement and
|
// lead to precision loss or rendering a solid color is easier to implement and
|
||||||
// compare.
|
// compare.
|
||||||
data[i * elementNumPerRow + j] = 0xFFFFFFFF;
|
data[i * elementNumPerRow + j] = 0xFFFFFFFF;
|
||||||
} else {
|
} else if (type != WriteType::ClearTexture) {
|
||||||
data[i * elementNumPerRow + j] = srcValue;
|
data[i * elementNumPerRow + j] = srcValue;
|
||||||
srcValue++;
|
srcValue++;
|
||||||
}
|
}
|
||||||
@ -275,5 +279,6 @@ DAWN_INSTANTIATE_TEST_P(TextureCorruptionTests,
|
|||||||
{D3D12Backend()},
|
{D3D12Backend()},
|
||||||
{100u, 200u, 300u, 400u, 500u, 600u, 700u, 800u, 900u, 1000u, 1200u},
|
{100u, 200u, 300u, 400u, 500u, 600u, 700u, 800u, 900u, 1000u, 1200u},
|
||||||
{100u, 200u},
|
{100u, 200u},
|
||||||
{WriteType::WriteTexture, WriteType::B2TCopy, WriteType::RenderConstant,
|
{WriteType::ClearTexture, WriteType::WriteTexture, WriteType::B2TCopy,
|
||||||
WriteType::RenderFromTextureSample, WriteType::RenderFromTextureLoad});
|
WriteType::RenderConstant, WriteType::RenderFromTextureSample,
|
||||||
|
WriteType::RenderFromTextureLoad});
|
||||||
|
Loading…
x
Reference in New Issue
Block a user