Initialize padding on buffer allocations

Buffer allocations in Dawn may be padded. This padding
could be visible, depending on how the backend
implements robust vertex buffer access. This commit
updates buffer creation to clear all padding bytes
immediately after creation.
It is not counted as a lazy clear.
And, add a test which reads off the end of a padded
vertex buffer to check that the padding bytes are also
initialized.

Also: Update Metal buffers to always allocate enough
space for Tint's vertex pulling, and pass the full
allocated size to Tint. While writing the test in this
commit, a bug was found where Tint assumes there is at
least 4 bytes in the vertex pulling buffer. The WebGPU
API currently allows zero-sized bindings, so Dawn needs
to always allocate enough space for one element.

Also: Update Vulkan vertex/index buffers to allocate
at least one more byte than requested because
vkCmdSetVertexBuffers and vkCmdSetIndexBuffer disallow
the offset to be equal to the buffer size. We need
at least one more byte to make zero-sized bindings
as the end of the buffer valid.

Lastly: Update helper so that a T2B copy of a single
row that touches all bytes in a buffer updates the
buffer state to be fully initialized. This is a small
optimization that is necessary to write the test.

Bug: dawn:837
Change-Id: I829f4764509c4ec784c5aeaaf40b6dcbd4be6866
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/62161
Commit-Queue: Austin Eng <enga@chromium.org>
Reviewed-by: Kai Ninomiya <kainino@chromium.org>
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
This commit is contained in:
Austin Eng 2021-08-19 18:02:36 +00:00 committed by Dawn LUCI CQ
parent 663a16e2fe
commit ddd0a0a856
13 changed files with 376 additions and 43 deletions

View File

@ -17,6 +17,7 @@
#include "common/BitSetIterator.h" #include "common/BitSetIterator.h"
#include "dawn_native/Buffer.h" #include "dawn_native/Buffer.h"
#include "dawn_native/CommandEncoder.h" #include "dawn_native/CommandEncoder.h"
#include "dawn_native/CommandValidation.h"
#include "dawn_native/Commands.h" #include "dawn_native/Commands.h"
#include "dawn_native/Format.h" #include "dawn_native/Format.h"
#include "dawn_native/Texture.h" #include "dawn_native/Texture.h"
@ -171,27 +172,37 @@ namespace dawn_native {
ASSERT(copy != nullptr); ASSERT(copy != nullptr);
if (copy->destination.offset > 0) { if (copy->destination.offset > 0) {
// The copy doesn't touch the start of the buffer.
return false; return false;
} }
const TextureBase* texture = copy->source.texture.Get(); const TextureBase* texture = copy->source.texture.Get();
const TexelBlockInfo& blockInfo = const TexelBlockInfo& blockInfo =
texture->GetFormat().GetAspectInfo(copy->source.aspect).block; texture->GetFormat().GetAspectInfo(copy->source.aspect).block;
const uint64_t widthInBlocks = copy->copySize.width / blockInfo.width;
const uint64_t heightInBlocks = copy->copySize.height / blockInfo.height; const uint64_t heightInBlocks = copy->copySize.height / blockInfo.height;
const bool multiSlice = copy->copySize.depthOrArrayLayers > 1;
const bool multiRow = multiSlice || heightInBlocks > 1;
if (copy->destination.rowsPerImage > heightInBlocks) { if (multiSlice && copy->destination.rowsPerImage > heightInBlocks) {
// There are gaps between slices that aren't overwritten
return false; return false;
} }
const uint64_t copyTextureDataSizePerRow = const uint64_t copyTextureDataSizePerRow = widthInBlocks * blockInfo.byteSize;
copy->copySize.width / blockInfo.width * blockInfo.byteSize; if (multiRow && copy->destination.bytesPerRow > copyTextureDataSizePerRow) {
if (copy->destination.bytesPerRow > copyTextureDataSizePerRow) { // There are gaps between rows that aren't overwritten
return false; return false;
} }
const uint64_t overwrittenRangeSize = // After the above checks, we're sure the copy has no gaps.
copyTextureDataSizePerRow * heightInBlocks * copy->copySize.depthOrArrayLayers; // Now, compute the total number of bytes written.
if (copy->destination.buffer->GetSize() > overwrittenRangeSize) { const uint64_t writtenBytes =
ComputeRequiredBytesInCopy(blockInfo, copy->copySize, copy->destination.bytesPerRow,
copy->destination.rowsPerImage)
.AcquireSuccess();
if (!copy->destination.buffer->IsFullBufferRange(copy->destination.offset, writtenBytes)) {
// The written bytes don't cover the whole buffer.
return false; return false;
} }

View File

@ -168,6 +168,21 @@ namespace dawn_native { namespace d3d12 {
DAWN_TRY(ClearBuffer(commandRecordingContext, uint8_t(1u))); DAWN_TRY(ClearBuffer(commandRecordingContext, uint8_t(1u)));
} }
// Initialize the padding bytes to zero.
if (GetDevice()->IsToggleEnabled(Toggle::LazyClearResourceOnFirstUse) &&
!mappedAtCreation) {
uint32_t paddingBytes = GetAllocatedSize() - GetSize();
if (paddingBytes > 0) {
CommandRecordingContext* commandRecordingContext;
DAWN_TRY_ASSIGN(commandRecordingContext,
ToBackend(GetDevice())->GetPendingCommandContext());
uint32_t clearSize = paddingBytes;
uint64_t clearOffset = GetSize();
DAWN_TRY(ClearBuffer(commandRecordingContext, 0, clearOffset, clearSize));
}
}
return {}; return {};
} }
@ -444,29 +459,33 @@ namespace dawn_native { namespace d3d12 {
return {}; return {};
} }
MaybeError Buffer::ClearBuffer(CommandRecordingContext* commandContext, uint8_t clearValue) { MaybeError Buffer::ClearBuffer(CommandRecordingContext* commandContext,
uint8_t clearValue,
uint64_t offset,
uint64_t size) {
Device* device = ToBackend(GetDevice()); Device* device = ToBackend(GetDevice());
size = size > 0 ? size : GetAllocatedSize();
// The state of the buffers on UPLOAD heap must always be GENERIC_READ and cannot be // The state of the buffers on UPLOAD heap must always be GENERIC_READ and cannot be
// changed away, so we can only clear such buffer with buffer mapping. // changed away, so we can only clear such buffer with buffer mapping.
if (D3D12HeapType(GetUsage()) == D3D12_HEAP_TYPE_UPLOAD) { if (D3D12HeapType(GetUsage()) == D3D12_HEAP_TYPE_UPLOAD) {
DAWN_TRY(MapInternal(true, 0, size_t(GetAllocatedSize()), "D3D12 map at clear buffer")); DAWN_TRY(MapInternal(true, static_cast<size_t>(offset), static_cast<size_t>(size),
memset(mMappedData, clearValue, GetAllocatedSize()); "D3D12 map at clear buffer"));
memset(mMappedData, clearValue, size);
UnmapImpl(); UnmapImpl();
} else { } else {
// TODO(crbug.com/dawn/852): use ClearUnorderedAccessView*() when the buffer usage // TODO(crbug.com/dawn/852): use ClearUnorderedAccessView*() when the buffer usage
// includes STORAGE. // includes STORAGE.
DynamicUploader* uploader = device->GetDynamicUploader(); DynamicUploader* uploader = device->GetDynamicUploader();
UploadHandle uploadHandle; UploadHandle uploadHandle;
DAWN_TRY_ASSIGN(uploadHandle, uploader->Allocate(GetAllocatedSize(), DAWN_TRY_ASSIGN(uploadHandle,
device->GetPendingCommandSerial(), uploader->Allocate(size, device->GetPendingCommandSerial(),
kCopyBufferToBufferOffsetAlignment)); kCopyBufferToBufferOffsetAlignment));
memset(uploadHandle.mappedBuffer, clearValue, GetAllocatedSize()); memset(uploadHandle.mappedBuffer, clearValue, size);
device->CopyFromStagingToBufferImpl(commandContext, uploadHandle.stagingBuffer, device->CopyFromStagingToBufferImpl(commandContext, uploadHandle.stagingBuffer,
uploadHandle.startOffset, this, 0, uploadHandle.startOffset, this, offset, size);
GetAllocatedSize());
} }
return {}; return {};

View File

@ -68,7 +68,10 @@ namespace dawn_native { namespace d3d12 {
wgpu::BufferUsage newUsage); wgpu::BufferUsage newUsage);
MaybeError InitializeToZero(CommandRecordingContext* commandContext); MaybeError InitializeToZero(CommandRecordingContext* commandContext);
MaybeError ClearBuffer(CommandRecordingContext* commandContext, uint8_t clearValue); MaybeError ClearBuffer(CommandRecordingContext* commandContext,
uint8_t clearValue,
uint64_t offset = 0,
uint64_t size = 0);
ResourceHeapAllocation mResourceAllocation; ResourceHeapAllocation mResourceAllocation;
bool mFixedResourceState = false; bool mFixedResourceState = false;

View File

@ -52,7 +52,10 @@ namespace dawn_native { namespace metal {
MaybeError MapAtCreationImpl() override; MaybeError MapAtCreationImpl() override;
void InitializeToZero(CommandRecordingContext* commandContext); void InitializeToZero(CommandRecordingContext* commandContext);
void ClearBuffer(CommandRecordingContext* commandContext, uint8_t clearValue); void ClearBuffer(CommandRecordingContext* commandContext,
uint8_t clearValue,
uint64_t offset = 0,
uint64_t size = 0);
NSPRef<id<MTLBuffer>> mMtlBuffer; NSPRef<id<MTLBuffer>> mMtlBuffer;
}; };

View File

@ -45,10 +45,6 @@ namespace dawn_native { namespace metal {
storageMode = MTLResourceStorageModePrivate; storageMode = MTLResourceStorageModePrivate;
} }
if (GetSize() > std::numeric_limits<NSUInteger>::max()) {
return DAWN_OUT_OF_MEMORY_ERROR("Buffer allocation is too large");
}
uint32_t alignment = 1; uint32_t alignment = 1;
#ifdef DAWN_PLATFORM_MACOS #ifdef DAWN_PLATFORM_MACOS
// [MTLBlitCommandEncoder fillBuffer] requires the size to be a multiple of 4 on MacOS. // [MTLBlitCommandEncoder fillBuffer] requires the size to be a multiple of 4 on MacOS.
@ -64,13 +60,25 @@ namespace dawn_native { namespace metal {
alignment = kMinUniformOrStorageBufferAlignment; alignment = kMinUniformOrStorageBufferAlignment;
} }
// Allocate at least 4 bytes so clamped accesses are always in bounds. // The vertex pulling transform requires at least 4 bytes in the buffer.
NSUInteger currentSize = static_cast<NSUInteger>(std::max(GetSize(), uint64_t(4u))); // 0-sized vertex buffer bindings are allowed, so we always need an additional 4 bytes
// after the end.
NSUInteger extraBytes = 0u;
if ((GetUsage() & wgpu::BufferUsage::Vertex) != 0) {
extraBytes = 4u;
}
if (GetSize() > std::numeric_limits<NSUInteger>::max() - extraBytes) {
return DAWN_OUT_OF_MEMORY_ERROR("Buffer allocation is too large");
}
NSUInteger currentSize =
std::max(static_cast<NSUInteger>(GetSize()) + extraBytes, NSUInteger(4));
if (currentSize > std::numeric_limits<NSUInteger>::max() - alignment) { if (currentSize > std::numeric_limits<NSUInteger>::max() - alignment) {
// Alignment would overlow. // Alignment would overlow.
return DAWN_OUT_OF_MEMORY_ERROR("Buffer allocation is too large"); return DAWN_OUT_OF_MEMORY_ERROR("Buffer allocation is too large");
} }
currentSize = Align(currentSize, kMinUniformOrStorageBufferAlignment); currentSize = Align(currentSize, alignment);
if (@available(iOS 12, macOS 10.14, *)) { if (@available(iOS 12, macOS 10.14, *)) {
NSUInteger maxBufferSize = [ToBackend(GetDevice())->GetMTLDevice() maxBufferLength]; NSUInteger maxBufferSize = [ToBackend(GetDevice())->GetMTLDevice() maxBufferLength];
@ -109,6 +117,19 @@ namespace dawn_native { namespace metal {
ClearBuffer(commandContext, uint8_t(1u)); ClearBuffer(commandContext, uint8_t(1u));
} }
// Initialize the padding bytes to zero.
if (GetDevice()->IsToggleEnabled(Toggle::LazyClearResourceOnFirstUse) &&
!mappedAtCreation) {
uint32_t paddingBytes = GetAllocatedSize() - GetSize();
if (paddingBytes > 0) {
uint32_t clearSize = Align(paddingBytes, 4);
uint64_t clearOffset = GetAllocatedSize() - clearSize;
CommandRecordingContext* commandContext =
ToBackend(GetDevice())->GetPendingCommandContext();
ClearBuffer(commandContext, 0, clearOffset, clearSize);
}
}
return {}; return {};
} }
@ -197,11 +218,15 @@ namespace dawn_native { namespace metal {
GetDevice()->IncrementLazyClearCountForTesting(); GetDevice()->IncrementLazyClearCountForTesting();
} }
void Buffer::ClearBuffer(CommandRecordingContext* commandContext, uint8_t clearValue) { void Buffer::ClearBuffer(CommandRecordingContext* commandContext,
uint8_t clearValue,
uint64_t offset,
uint64_t size) {
ASSERT(commandContext != nullptr); ASSERT(commandContext != nullptr);
ASSERT(GetAllocatedSize() > 0); size = size > 0 ? size : GetAllocatedSize();
ASSERT(size > 0);
[commandContext->EnsureBlit() fillBuffer:mMtlBuffer.Get() [commandContext->EnsureBlit() fillBuffer:mMtlBuffer.Get()
range:NSMakeRange(0, GetAllocatedSize()) range:NSMakeRange(offset, size)
value:clearValue]; value:clearValue];
} }

View File

@ -548,7 +548,8 @@ namespace dawn_native { namespace metal {
mVertexBufferOffsets[slot] = offset; mVertexBufferOffsets[slot] = offset;
ASSERT(buffer->GetSize() < std::numeric_limits<uint32_t>::max()); ASSERT(buffer->GetSize() < std::numeric_limits<uint32_t>::max());
mVertexBufferBindingSizes[slot] = static_cast<uint32_t>(buffer->GetSize() - offset); mVertexBufferBindingSizes[slot] =
static_cast<uint32_t>(buffer->GetAllocatedSize() - offset);
mDirtyVertexBuffers.set(slot); mDirtyVertexBuffers.set(slot);
} }

View File

@ -49,6 +49,7 @@ namespace dawn_native { namespace opengl {
device->gl.BufferData(GL_ARRAY_BUFFER, mAllocatedSize, clearValues.data(), device->gl.BufferData(GL_ARRAY_BUFFER, mAllocatedSize, clearValues.data(),
GL_STATIC_DRAW); GL_STATIC_DRAW);
} else { } else {
// Buffers start zeroed if you pass nullptr to glBufferData.
device->gl.BufferData(GL_ARRAY_BUFFER, mAllocatedSize, nullptr, GL_STATIC_DRAW); device->gl.BufferData(GL_ARRAY_BUFFER, mAllocatedSize, nullptr, GL_STATIC_DRAW);
} }
} }

View File

@ -137,16 +137,34 @@ namespace dawn_native { namespace vulkan {
} }
MaybeError Buffer::Initialize(bool mappedAtCreation) { MaybeError Buffer::Initialize(bool mappedAtCreation) {
// vkCmdFillBuffer requires the size to be a multiple of 4.
constexpr size_t kAlignment = 4u;
uint32_t extraBytes = 0u;
if (GetUsage() & (wgpu::BufferUsage::Vertex | wgpu::BufferUsage::Index)) {
// vkCmdSetIndexBuffer and vkCmdSetVertexBuffer are invalid if the offset
// is equal to the whole buffer size. Allocate at least one more byte so it
// is valid to setVertex/IndexBuffer with a zero-sized range at the end
// of the buffer with (offset=buffer.size, size=0).
extraBytes = 1u;
}
uint64_t size = GetSize();
if (size > std::numeric_limits<uint64_t>::max() - extraBytes) {
return DAWN_OUT_OF_MEMORY_ERROR("Buffer allocation is too large");
}
size += extraBytes;
// Allocate at least 4 bytes so clamped accesses are always in bounds. // Allocate at least 4 bytes so clamped accesses are always in bounds.
// Also, Vulkan requires the size to be non-zero. // Also, Vulkan requires the size to be non-zero.
uint64_t size = std::max(GetSize(), uint64_t(4u)); size = std::max(size, uint64_t(4u));
// vkCmdFillBuffer requires the size to be a multiple of 4.
size_t alignment = 4u; if (size > std::numeric_limits<uint64_t>::max() - kAlignment) {
if (size > std::numeric_limits<uint64_t>::max() - alignment) {
// Alignment would overlow. // Alignment would overlow.
return DAWN_OUT_OF_MEMORY_ERROR("Buffer allocation is too large"); return DAWN_OUT_OF_MEMORY_ERROR("Buffer allocation is too large");
} }
mAllocatedSize = Align(size, alignment); mAllocatedSize = Align(size, kAlignment);
// Avoid passing ludicrously large sizes to drivers because it causes issues: drivers add // Avoid passing ludicrously large sizes to drivers because it causes issues: drivers add
// some constants to the size passed and align it, but for values close to the maximum // some constants to the size passed and align it, but for values close to the maximum
@ -200,6 +218,17 @@ namespace dawn_native { namespace vulkan {
ClearBuffer(device->GetPendingRecordingContext(), 0x01010101); ClearBuffer(device->GetPendingRecordingContext(), 0x01010101);
} }
// Initialize the padding bytes to zero.
if (device->IsToggleEnabled(Toggle::LazyClearResourceOnFirstUse) && !mappedAtCreation) {
uint32_t paddingBytes = GetAllocatedSize() - GetSize();
if (paddingBytes > 0) {
uint32_t clearSize = Align(paddingBytes, 4);
uint64_t clearOffset = GetAllocatedSize() - clearSize;
CommandRecordingContext* recordingContext = device->GetPendingRecordingContext();
ClearBuffer(recordingContext, 0, clearOffset, clearSize);
}
}
return {}; return {};
} }
@ -354,17 +383,21 @@ namespace dawn_native { namespace vulkan {
SetIsDataInitialized(); SetIsDataInitialized();
} }
void Buffer::ClearBuffer(CommandRecordingContext* recordingContext, uint32_t clearValue) { void Buffer::ClearBuffer(CommandRecordingContext* recordingContext,
uint32_t clearValue,
uint64_t offset,
uint64_t size) {
ASSERT(recordingContext != nullptr); ASSERT(recordingContext != nullptr);
ASSERT(GetAllocatedSize() > 0); size = size > 0 ? size : GetAllocatedSize();
ASSERT(size > 0);
TransitionUsageNow(recordingContext, wgpu::BufferUsage::CopyDst); TransitionUsageNow(recordingContext, wgpu::BufferUsage::CopyDst);
Device* device = ToBackend(GetDevice()); Device* device = ToBackend(GetDevice());
// VK_WHOLE_SIZE doesn't work on old Windows Intel Vulkan drivers, so we don't use it. // VK_WHOLE_SIZE doesn't work on old Windows Intel Vulkan drivers, so we don't use it.
// Note: Allocated size must be a multiple of 4. // Note: Allocated size must be a multiple of 4.
ASSERT(GetAllocatedSize() % 4 == 0); ASSERT(size % 4 == 0);
device->fn.CmdFillBuffer(recordingContext->commandBuffer, mHandle, 0, GetAllocatedSize(), device->fn.CmdFillBuffer(recordingContext->commandBuffer, mHandle, offset, size,
clearValue); clearValue);
} }
}} // namespace dawn_native::vulkan }} // namespace dawn_native::vulkan

View File

@ -55,7 +55,10 @@ namespace dawn_native { namespace vulkan {
MaybeError Initialize(bool mappedAtCreation); MaybeError Initialize(bool mappedAtCreation);
void InitializeToZero(CommandRecordingContext* recordingContext); void InitializeToZero(CommandRecordingContext* recordingContext);
void ClearBuffer(CommandRecordingContext* recordingContext, uint32_t clearValue); void ClearBuffer(CommandRecordingContext* recordingContext,
uint32_t clearValue,
uint64_t offset = 0,
uint64_t size = 0);
MaybeError MapAsyncImpl(wgpu::MapMode mode, size_t offset, size_t size) override; MaybeError MapAsyncImpl(wgpu::MapMode mode, size_t offset, size_t size) override;
void UnmapImpl() override; void UnmapImpl() override;

View File

@ -14,6 +14,7 @@
#include "tests/DawnTest.h" #include "tests/DawnTest.h"
#include "common/Math.h"
#include "utils/ComboRenderPipelineDescriptor.h" #include "utils/ComboRenderPipelineDescriptor.h"
#include "utils/TestUtils.h" #include "utils/TestUtils.h"
#include "utils/WGPUHelpers.h" #include "utils/WGPUHelpers.h"
@ -201,8 +202,10 @@ class BufferZeroInitTest : public DawnTest {
EXPECT_PIXEL_RGBA8_EQ(kExpectedColor, outputTexture, 0u, 0u); EXPECT_PIXEL_RGBA8_EQ(kExpectedColor, outputTexture, 0u, 0u);
} }
wgpu::RenderPipeline CreateRenderPipelineForTest(const char* vertexShader, wgpu::RenderPipeline CreateRenderPipelineForTest(
uint32_t vertexBufferCount = 1u) { const char* vertexShader,
uint32_t vertexBufferCount = 1u,
wgpu::VertexFormat vertexFormat = wgpu::VertexFormat::Float32x4) {
constexpr wgpu::TextureFormat kColorAttachmentFormat = wgpu::TextureFormat::RGBA8Unorm; constexpr wgpu::TextureFormat kColorAttachmentFormat = wgpu::TextureFormat::RGBA8Unorm;
wgpu::ShaderModule vsModule = utils::CreateShaderModule(device, vertexShader); wgpu::ShaderModule vsModule = utils::CreateShaderModule(device, vertexShader);
@ -219,9 +222,9 @@ class BufferZeroInitTest : public DawnTest {
descriptor.cFragment.module = fsModule; descriptor.cFragment.module = fsModule;
descriptor.primitive.topology = wgpu::PrimitiveTopology::PointList; descriptor.primitive.topology = wgpu::PrimitiveTopology::PointList;
descriptor.vertex.bufferCount = vertexBufferCount; descriptor.vertex.bufferCount = vertexBufferCount;
descriptor.cBuffers[0].arrayStride = 4 * sizeof(float); descriptor.cBuffers[0].arrayStride = Align(utils::VertexFormatSize(vertexFormat), 4);
descriptor.cBuffers[0].attributeCount = 1; descriptor.cBuffers[0].attributeCount = 1;
descriptor.cAttributes[0].format = wgpu::VertexFormat::Float32x4; descriptor.cAttributes[0].format = vertexFormat;
descriptor.cTargets[0].format = kColorAttachmentFormat; descriptor.cTargets[0].format = kColorAttachmentFormat;
return device.CreateRenderPipeline(&descriptor); return device.CreateRenderPipeline(&descriptor);
} }
@ -1128,6 +1131,114 @@ TEST_P(BufferZeroInitTest, SetVertexBuffer) {
} }
} }
// Test for crbug.com/dawn/837.
// Test that the padding after a buffer allocation is initialized to 0.
// This test makes an unaligned vertex buffer which should be padded in the backend
// allocation. It then tries to index off the end of the vertex buffer in an indexed
// draw call. A backend which implements robust buffer access via clamping should
// still see zeros at the end of the buffer.
TEST_P(BufferZeroInitTest, PaddingInitialized) {
DAWN_SUPPRESS_TEST_IF(IsANGLE()); // TODO(crbug.com/dawn/1084).
constexpr wgpu::TextureFormat kColorAttachmentFormat = wgpu::TextureFormat::RGBA8Unorm;
// A small sub-4-byte format means a single vertex can fit entirely within the padded buffer,
// touching some of the padding. Test a small format, as well as larger formats.
for (wgpu::VertexFormat vertexFormat :
{wgpu::VertexFormat::Unorm8x2, wgpu::VertexFormat::Float16x2,
wgpu::VertexFormat::Float32x2}) {
wgpu::RenderPipeline renderPipeline =
CreateRenderPipelineForTest(R"(
struct VertexOut {
[[location(0)]] color : vec4<f32>;
[[builtin(position)]] position : vec4<f32>;
};
[[stage(vertex)]] fn main([[location(0)]] pos : vec2<f32>) -> VertexOut {
var output : VertexOut;
if (all(pos == vec2<f32>(0.0, 0.0))) {
output.color = vec4<f32>(0.0, 1.0, 0.0, 1.0);
} else {
output.color = vec4<f32>(1.0, 0.0, 0.0, 1.0);
}
output.position = vec4<f32>(0.0, 0.0, 0.0, 1.0);
return output;
})",
/* vertexBufferCount */ 1u, vertexFormat);
// Create an index buffer the indexes off the end of the vertex buffer.
wgpu::Buffer indexBuffer =
utils::CreateBufferFromData<uint32_t>(device, wgpu::BufferUsage::Index, {1});
const uint32_t vertexFormatSize = utils::VertexFormatSize(vertexFormat);
// Create an 8-bit texture to use to initialize buffer contents.
wgpu::TextureDescriptor initTextureDesc = {};
initTextureDesc.size = {vertexFormatSize + 4, 1, 1};
initTextureDesc.format = wgpu::TextureFormat::R8Unorm;
initTextureDesc.usage = wgpu::TextureUsage::CopySrc | wgpu::TextureUsage::CopyDst;
wgpu::ImageCopyTexture zeroTextureSrc =
utils::CreateImageCopyTexture(device.CreateTexture(&initTextureDesc), 0, {0, 0, 0});
{
wgpu::TextureDataLayout layout =
utils::CreateTextureDataLayout(0, wgpu::kCopyStrideUndefined);
std::vector<uint8_t> data(initTextureDesc.size.width);
queue.WriteTexture(&zeroTextureSrc, data.data(), data.size(), &layout,
&initTextureDesc.size);
}
for (uint32_t extraBytes : {0, 1, 2, 3, 4}) {
// Create a vertex buffer to hold a single vertex attribute.
// Uniform usage is added to force even more padding on D3D12.
// The buffer is internally padded and allocated as a larger buffer.
const uint32_t vertexBufferSize = vertexFormatSize + extraBytes;
for (uint32_t vertexBufferOffset = 0; vertexBufferOffset <= vertexBufferSize;
vertexBufferOffset += 4u) {
wgpu::Buffer vertexBuffer = CreateBuffer(
vertexBufferSize, wgpu::BufferUsage::Vertex | wgpu::BufferUsage::Uniform |
wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst);
// "Fully" initialize the buffer with a copy from an 8-bit texture, touching
// everything except the padding. From the point-of-view of the API, all
// |vertexBufferSize| bytes are initialized. Note: Uses CopyTextureToBuffer because
// it does not require 4-byte alignment.
{
wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
wgpu::ImageCopyBuffer dst =
utils::CreateImageCopyBuffer(vertexBuffer, 0, wgpu::kCopyStrideUndefined);
wgpu::Extent3D extent = {vertexBufferSize, 1, 1};
encoder.CopyTextureToBuffer(&zeroTextureSrc, &dst, &extent);
wgpu::CommandBuffer commandBuffer = encoder.Finish();
EXPECT_LAZY_CLEAR(0u, queue.Submit(1, &commandBuffer));
}
wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
wgpu::Texture colorAttachment =
CreateAndInitializeTexture({1, 1, 1}, kColorAttachmentFormat);
utils::ComboRenderPassDescriptor renderPassDescriptor(
{colorAttachment.CreateView()});
wgpu::RenderPassEncoder renderPass = encoder.BeginRenderPass(&renderPassDescriptor);
renderPass.SetVertexBuffer(0, vertexBuffer, vertexBufferOffset);
renderPass.SetIndexBuffer(indexBuffer, wgpu::IndexFormat::Uint32);
renderPass.SetPipeline(renderPipeline);
renderPass.DrawIndexed(1);
renderPass.EndPass();
wgpu::CommandBuffer commandBuffer = encoder.Finish();
EXPECT_LAZY_CLEAR(0u, queue.Submit(1, &commandBuffer));
constexpr RGBA8 kExpectedPixelValue = {0, 255, 0, 255};
EXPECT_PIXEL_RGBA8_EQ(kExpectedPixelValue, colorAttachment, 0, 0);
}
}
}
}
// Test the buffer will be lazily initialized correctly when its first use is in SetIndexBuffer. // Test the buffer will be lazily initialized correctly when its first use is in SetIndexBuffer.
TEST_P(BufferZeroInitTest, SetIndexBuffer) { TEST_P(BufferZeroInitTest, SetIndexBuffer) {
// Bind the whole buffer as an index buffer. // Bind the whole buffer as an index buffer.

View File

@ -2060,6 +2060,84 @@ TEST_P(CompressedTextureZeroInitTest, HalfCopyTextureToTextureMipLevel) {
kViewMipLevel, 0, true); kViewMipLevel, 0, true);
} }
// Test uploading then reading back from a 2D array compressed texture.
// This is a regression test for a bug where the final destination buffer
// was considered fully initialized even though there was a 256-byte
// stride between images.
TEST_P(CompressedTextureZeroInitTest, Copy2DArrayCompressedB2T2B) {
// TODO(crbug.com/dawn/643): diagnose and fix this failure on OpenGL.
DAWN_SUPPRESS_TEST_IF(IsOpenGL() || IsOpenGLES());
// create srcTexture with data
wgpu::TextureDescriptor textureDescriptor = CreateTextureDescriptor(
4, 5, wgpu::TextureUsage::CopySrc | wgpu::TextureUsage::CopyDst, utils::kBCFormats[0]);
textureDescriptor.size = {8, 8, 5};
wgpu::Texture srcTexture = device.CreateTexture(&textureDescriptor);
uint32_t mipLevel = 2;
wgpu::Extent3D copyExtent3D = {4, 4, 5};
uint32_t copyWidthInBlock = copyExtent3D.width / kFormatBlockByteSize;
uint32_t copyHeightInBlock = copyExtent3D.height / kFormatBlockByteSize;
uint32_t copyRowsPerImage = copyHeightInBlock;
uint32_t copyBytesPerRow =
Align(copyWidthInBlock * utils::GetTexelBlockSizeInBytes(textureDescriptor.format),
kTextureBytesPerRowAlignment);
// Generate data to upload
std::vector<uint8_t> data(utils::RequiredBytesInCopy(copyBytesPerRow, copyRowsPerImage,
copyExtent3D, textureDescriptor.format));
for (size_t i = 0; i < data.size(); ++i) {
data[i] = i % 255;
}
// Copy texture data from a staging buffer to the destination texture.
wgpu::Buffer stagingBuffer =
utils::CreateBufferFromData(device, data.data(), data.size(), wgpu::BufferUsage::CopySrc);
wgpu::ImageCopyBuffer imageCopyBufferSrc =
utils::CreateImageCopyBuffer(stagingBuffer, 0, copyBytesPerRow, copyRowsPerImage);
wgpu::ImageCopyTexture imageCopyTexture =
utils::CreateImageCopyTexture(srcTexture, mipLevel, {0, 0, 0});
{
wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
encoder.CopyBufferToTexture(&imageCopyBufferSrc, &imageCopyTexture, &copyExtent3D);
wgpu::CommandBuffer copy = encoder.Finish();
EXPECT_LAZY_CLEAR(0u, queue.Submit(1, &copy));
}
// Create a buffer to read back the data. It is the same size as the upload buffer.
wgpu::BufferDescriptor readbackDesc = {};
readbackDesc.size = data.size();
readbackDesc.usage = wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
wgpu::Buffer readbackBuffer = device.CreateBuffer(&readbackDesc);
// Copy the texture to the readback buffer.
wgpu::ImageCopyBuffer imageCopyBufferDst =
utils::CreateImageCopyBuffer(readbackBuffer, 0, copyBytesPerRow, copyRowsPerImage);
{
wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
encoder.CopyTextureToBuffer(&imageCopyTexture, &imageCopyBufferDst, &copyExtent3D);
wgpu::CommandBuffer copy = encoder.Finish();
// Expect a lazy clear because the padding in the copy is not touched.
EXPECT_LAZY_CLEAR(1u, queue.Submit(1, &copy));
}
// Generate expected data. It is the same as the upload data, but padding is zero.
std::vector<uint8_t> expected(data.size(), 0);
for (uint32_t z = 0; z < copyExtent3D.depthOrArrayLayers; ++z) {
for (uint32_t y = 0; y < copyHeightInBlock; ++y) {
memcpy(&expected[copyBytesPerRow * y + copyBytesPerRow * copyRowsPerImage * z],
&data[copyBytesPerRow * y + copyBytesPerRow * copyRowsPerImage * z],
copyWidthInBlock * utils::GetTexelBlockSizeInBytes(textureDescriptor.format));
}
}
// Check final contents
EXPECT_BUFFER_U8_RANGE_EQ(expected.data(), readbackBuffer, 0, expected.size());
}
DAWN_INSTANTIATE_TEST(CompressedTextureZeroInitTest, DAWN_INSTANTIATE_TEST(CompressedTextureZeroInitTest,
D3D12Backend({"nonzero_clear_resources_on_creation_for_testing"}), D3D12Backend({"nonzero_clear_resources_on_creation_for_testing"}),
MetalBackend({"nonzero_clear_resources_on_creation_for_testing"}), MetalBackend({"nonzero_clear_resources_on_creation_for_testing"}),

View File

@ -134,4 +134,47 @@ namespace utils {
device.GetQueue().WriteTexture(&imageCopyTexture, data.data(), 1, &textureDataLayout, device.GetQueue().WriteTexture(&imageCopyTexture, data.data(), 1, &textureDataLayout,
&copyExtent); &copyExtent);
} }
uint32_t VertexFormatSize(wgpu::VertexFormat format) {
switch (format) {
case wgpu::VertexFormat::Uint8x2:
case wgpu::VertexFormat::Sint8x2:
case wgpu::VertexFormat::Unorm8x2:
case wgpu::VertexFormat::Snorm8x2:
return 2;
case wgpu::VertexFormat::Uint8x4:
case wgpu::VertexFormat::Sint8x4:
case wgpu::VertexFormat::Unorm8x4:
case wgpu::VertexFormat::Snorm8x4:
case wgpu::VertexFormat::Uint16x2:
case wgpu::VertexFormat::Sint16x2:
case wgpu::VertexFormat::Unorm16x2:
case wgpu::VertexFormat::Snorm16x2:
case wgpu::VertexFormat::Float16x2:
case wgpu::VertexFormat::Float32:
case wgpu::VertexFormat::Uint32:
case wgpu::VertexFormat::Sint32:
return 4;
case wgpu::VertexFormat::Uint16x4:
case wgpu::VertexFormat::Sint16x4:
case wgpu::VertexFormat::Unorm16x4:
case wgpu::VertexFormat::Snorm16x4:
case wgpu::VertexFormat::Float16x4:
case wgpu::VertexFormat::Float32x2:
case wgpu::VertexFormat::Uint32x2:
case wgpu::VertexFormat::Sint32x2:
return 8;
case wgpu::VertexFormat::Float32x3:
case wgpu::VertexFormat::Uint32x3:
case wgpu::VertexFormat::Sint32x3:
return 12;
case wgpu::VertexFormat::Float32x4:
case wgpu::VertexFormat::Uint32x4:
case wgpu::VertexFormat::Sint32x4:
return 16;
case wgpu::VertexFormat::Undefined:
UNREACHABLE();
}
}
} // namespace utils } // namespace utils

View File

@ -60,6 +60,8 @@ namespace utils {
// in it will contain 1 byte of data. // in it will contain 1 byte of data.
void UnalignDynamicUploader(wgpu::Device device); void UnalignDynamicUploader(wgpu::Device device);
uint32_t VertexFormatSize(wgpu::VertexFormat format);
} // namespace utils } // namespace utils
#endif // UTILS_TESTHELPERS_H_ #endif // UTILS_TESTHELPERS_H_