Fix query index of availability in timestamp shader

Currently we use offset to calculate the index of the queries in
timestamp compute shader, which is incorrect. The offset is the buffer
offset where we start to write the query results, and has nothing to
do with query index. In the query availability detection, the query
index should be based on the parameter firstQuery.

Add new test for resolving a timestamp query twice to the same
destination buffer with potentially overlapping ranges.

Bug: dawn:434
Change-Id: I2b5c5b192cf5d987ac48187e8240a25937957f51
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/50760
Reviewed-by: Austin Eng <enga@chromium.org>
Commit-Queue: Hao Li <hao.x.li@intel.com>
This commit is contained in:
Hao Li 2021-05-18 01:13:08 +00:00 committed by Commit Bot service account
parent 21ce5d2965
commit 880a3d6311
5 changed files with 82 additions and 20 deletions

View File

@ -424,6 +424,7 @@ namespace dawn_native {
MaybeError EncodeTimestampsToNanosecondsConversion(CommandEncoder* encoder, MaybeError EncodeTimestampsToNanosecondsConversion(CommandEncoder* encoder,
QuerySetBase* querySet, QuerySetBase* querySet,
uint32_t firstQuery,
uint32_t queryCount, uint32_t queryCount,
BufferBase* destination, BufferBase* destination,
uint64_t destinationOffset) { uint64_t destinationOffset) {
@ -447,7 +448,8 @@ namespace dawn_native {
availability.size() * sizeof(uint32_t))); availability.size() * sizeof(uint32_t)));
// Timestamp params uniform buffer // Timestamp params uniform buffer
TimestampParams params = {queryCount, static_cast<uint32_t>(destinationOffset), TimestampParams params = {firstQuery, queryCount,
static_cast<uint32_t>(destinationOffset),
device->GetTimestampPeriodInNS()}; device->GetTimestampPeriodInNS()};
BufferDescriptor parmsDesc = {}; BufferDescriptor parmsDesc = {};
@ -882,8 +884,8 @@ namespace dawn_native {
// Encode internal compute pipeline for timestamp query // Encode internal compute pipeline for timestamp query
if (querySet->GetQueryType() == wgpu::QueryType::Timestamp) { if (querySet->GetQueryType() == wgpu::QueryType::Timestamp) {
DAWN_TRY(EncodeTimestampsToNanosecondsConversion(this, querySet, queryCount, destination, DAWN_TRY(EncodeTimestampsToNanosecondsConversion(
destinationOffset)); this, querySet, firstQuery, queryCount, destination, destinationOffset));
} }
return {}; return {};

View File

@ -28,9 +28,10 @@ namespace dawn_native {
namespace { namespace {
// Assert the offsets in dawn_native::TimestampParams are same with the ones in the shader // Assert the offsets in dawn_native::TimestampParams are same with the ones in the shader
static_assert(offsetof(dawn_native::TimestampParams, count) == 0, ""); static_assert(offsetof(dawn_native::TimestampParams, first) == 0, "");
static_assert(offsetof(dawn_native::TimestampParams, offset) == 4, ""); static_assert(offsetof(dawn_native::TimestampParams, count) == 4, "");
static_assert(offsetof(dawn_native::TimestampParams, period) == 8, ""); static_assert(offsetof(dawn_native::TimestampParams, offset) == 8, "");
static_assert(offsetof(dawn_native::TimestampParams, period) == 12, "");
static const char sConvertTimestampsToNanoseconds[] = R"( static const char sConvertTimestampsToNanoseconds[] = R"(
struct Timestamp { struct Timestamp {
@ -47,6 +48,7 @@ namespace dawn_native {
}; };
[[block]] struct TimestampParams { [[block]] struct TimestampParams {
first : u32;
count : u32; count : u32;
offset : u32; offset : u32;
period : f32; period : f32;
@ -70,7 +72,7 @@ namespace dawn_native {
var timestamp : Timestamp = timestamps.t[index]; var timestamp : Timestamp = timestamps.t[index];
// Return 0 for the unavailable value. // Return 0 for the unavailable value.
if (availability.v[index] == 0u) { if (availability.v[GlobalInvocationID.x + params.first] == 0u) {
timestamps.t[index].low = 0u; timestamps.t[index].low = 0u;
timestamps.t[index].high = 0u; timestamps.t[index].high = 0u;
return; return;

View File

@ -24,6 +24,7 @@ namespace dawn_native {
class CommandEncoder; class CommandEncoder;
struct TimestampParams { struct TimestampParams {
uint32_t first;
uint32_t count; uint32_t count;
uint32_t offset; uint32_t offset;
float period; float period;

View File

@ -755,10 +755,10 @@ TEST_P(TimestampQueryTests, ResolveWithoutWritten) {
// Test resolving timestamp query to one slot in the buffer // Test resolving timestamp query to one slot in the buffer
TEST_P(TimestampQueryTests, ResolveToBufferWithOffset) { TEST_P(TimestampQueryTests, ResolveToBufferWithOffset) {
// TODO(hao.x.li@intel.com): Fail to resolve query to buffer with offset on Windows Vulkan and // TODO(hao.x.li@intel.com): Fails on Intel Windows Vulkan due to a driver issue that
// Metal on Intel platforms, need investigation. // vkCmdFillBuffer and vkCmdCopyQueryPoolResults are not executed in order, skip it util
DAWN_SKIP_TEST_IF(IsWindows() && IsIntel() && IsVulkan()); // the issue is fixed.
DAWN_SKIP_TEST_IF(IsIntel() && IsMetal()); DAWN_SKIP_TEST_IF(IsWindows() && IsVulkan() && IsIntel());
// TODO(hao.x.li@intel.com): Crash occurs if we only call WriteTimestamp in a command encoder // TODO(hao.x.li@intel.com): Crash occurs if we only call WriteTimestamp in a command encoder
// without any copy commands on Metal on AMD GPU. See https://crbug.com/dawn/545. // without any copy commands on Metal on AMD GPU. See https://crbug.com/dawn/545.
@ -774,7 +774,6 @@ TEST_P(TimestampQueryTests, ResolveToBufferWithOffset) {
wgpu::Buffer destination = CreateResolveBuffer(kQueryCount * sizeof(uint64_t)); wgpu::Buffer destination = CreateResolveBuffer(kQueryCount * sizeof(uint64_t));
wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
encoder.WriteTimestamp(querySet, 0); encoder.WriteTimestamp(querySet, 0);
encoder.WriteTimestamp(querySet, 1);
encoder.ResolveQuerySet(querySet, 0, 1, destination, 0); encoder.ResolveQuerySet(querySet, 0, 1, destination, 0);
wgpu::CommandBuffer commands = encoder.Finish(); wgpu::CommandBuffer commands = encoder.Finish();
queue.Submit(1, &commands); queue.Submit(1, &commands);
@ -789,7 +788,6 @@ TEST_P(TimestampQueryTests, ResolveToBufferWithOffset) {
wgpu::Buffer destination = CreateResolveBuffer(kQueryCount * sizeof(uint64_t)); wgpu::Buffer destination = CreateResolveBuffer(kQueryCount * sizeof(uint64_t));
wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
encoder.WriteTimestamp(querySet, 0); encoder.WriteTimestamp(querySet, 0);
encoder.WriteTimestamp(querySet, 1);
encoder.ResolveQuerySet(querySet, 0, 1, destination, sizeof(uint64_t)); encoder.ResolveQuerySet(querySet, 0, 1, destination, sizeof(uint64_t));
wgpu::CommandBuffer commands = encoder.Finish(); wgpu::CommandBuffer commands = encoder.Finish();
queue.Submit(1, &commands); queue.Submit(1, &commands);
@ -799,6 +797,31 @@ TEST_P(TimestampQueryTests, ResolveToBufferWithOffset) {
} }
} }
// Test resolving a query set twice into the same destination buffer with potentially overlapping
// ranges
TEST_P(TimestampQueryTests, ResolveTwiceToSameBuffer) {
// TODO(hao.x.li@intel.com): Fails on Intel Windows Vulkan due to a driver issue that
// vkCmdFillBuffer and vkCmdCopyQueryPoolResults are not executed in order, skip it util
// the issue is fixed.
DAWN_SKIP_TEST_IF(IsWindows() && IsVulkan() && IsIntel());
constexpr uint32_t kQueryCount = 3;
wgpu::QuerySet querySet = CreateQuerySetForTimestamp(kQueryCount);
wgpu::Buffer destination = CreateResolveBuffer(kQueryCount * sizeof(uint64_t));
wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
encoder.WriteTimestamp(querySet, 0);
encoder.WriteTimestamp(querySet, 1);
encoder.WriteTimestamp(querySet, 2);
encoder.ResolveQuerySet(querySet, 0, 2, destination, 0);
encoder.ResolveQuerySet(querySet, 1, 2, destination, sizeof(uint64_t));
wgpu::CommandBuffer commands = encoder.Finish();
queue.Submit(1, &commands);
EXPECT_BUFFER(destination, 0, kQueryCount * sizeof(uint64_t), new TimestampExpectation);
}
DAWN_INSTANTIATE_TEST(TimestampQueryTests, DAWN_INSTANTIATE_TEST(TimestampQueryTests,
D3D12Backend(), D3D12Backend(),
MetalBackend(), MetalBackend(),

View File

@ -125,12 +125,13 @@ TEST_P(QueryInternalShaderTests, TimestampComputeShader) {
wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst; wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
wgpu::Buffer timestampsBuffer = device.CreateBuffer(&timestampsDesc); wgpu::Buffer timestampsBuffer = device.CreateBuffer(&timestampsDesc);
auto PrepareExpectedResults = [&](uint32_t offset) -> std::vector<uint64_t> { auto PrepareExpectedResults = [&](uint32_t first, uint32_t count,
uint32_t offset) -> std::vector<uint64_t> {
ASSERT(offset % sizeof(uint64_t) == 0); ASSERT(offset % sizeof(uint64_t) == 0);
std::vector<uint64_t> expected; std::vector<uint64_t> expected;
for (size_t i = 0; i < kTimestampCount; i++) { for (size_t i = 0; i < kTimestampCount; i++) {
// The data before offset remains as it is // The data out of the rang [first, first + count] remains as it is
if (i < offset / sizeof(uint64_t)) { if (i < first || i >= first + count) {
expected.push_back(timestamps[i]); expected.push_back(timestamps[i]);
continue; continue;
} }
@ -149,7 +150,9 @@ TEST_P(QueryInternalShaderTests, TimestampComputeShader) {
}; };
// Convert timestamps in timestamps buffer with offset 0 // Convert timestamps in timestamps buffer with offset 0
// Test for ResolveQuerySet(querySet, 0, kTimestampCount, timestampsBuffer, 0)
{ {
constexpr uint32_t kFirst = 0u;
constexpr uint32_t kOffset = 0u; constexpr uint32_t kOffset = 0u;
// Write orignal timestamps to timestamps buffer // Write orignal timestamps to timestamps buffer
@ -157,7 +160,7 @@ TEST_P(QueryInternalShaderTests, TimestampComputeShader) {
kTimestampCount * sizeof(uint64_t)); kTimestampCount * sizeof(uint64_t));
// The params uniform buffer // The params uniform buffer
dawn_native::TimestampParams params = {kTimestampCount, kOffset, kPeriod}; dawn_native::TimestampParams params = {kFirst, kTimestampCount, kOffset, kPeriod};
wgpu::Buffer paramsBuffer = utils::CreateBufferFromData(device, &params, sizeof(params), wgpu::Buffer paramsBuffer = utils::CreateBufferFromData(device, &params, sizeof(params),
wgpu::BufferUsage::Uniform); wgpu::BufferUsage::Uniform);
@ -168,13 +171,15 @@ TEST_P(QueryInternalShaderTests, TimestampComputeShader) {
queue.Submit(1, &commands); queue.Submit(1, &commands);
// Expected results: Timestamp * period // Expected results: Timestamp * period
std::vector<uint64_t> expected = PrepareExpectedResults(kOffset); std::vector<uint64_t> expected = PrepareExpectedResults(0, kTimestampCount, kOffset);
EXPECT_BUFFER(timestampsBuffer, 0, kTimestampCount * sizeof(uint64_t), EXPECT_BUFFER(timestampsBuffer, 0, kTimestampCount * sizeof(uint64_t),
new InternalShaderExpectation(expected.data(), kTimestampCount)); new InternalShaderExpectation(expected.data(), kTimestampCount));
} }
// Convert timestamps in timestamps buffer with offset 8 // Convert timestamps in timestamps buffer with offset 8
// Test for ResolveQuerySet(querySet, 1, kTimestampCount - 1, timestampsBuffer, 8)
{ {
constexpr uint32_t kFirst = 1u;
constexpr uint32_t kOffset = 8u; constexpr uint32_t kOffset = 8u;
// Write orignal timestamps to timestamps buffer // Write orignal timestamps to timestamps buffer
@ -182,7 +187,7 @@ TEST_P(QueryInternalShaderTests, TimestampComputeShader) {
kTimestampCount * sizeof(uint64_t)); kTimestampCount * sizeof(uint64_t));
// The params uniform buffer // The params uniform buffer
dawn_native::TimestampParams params = {kTimestampCount, kOffset, kPeriod}; dawn_native::TimestampParams params = {kFirst, kTimestampCount - kFirst, kOffset, kPeriod};
wgpu::Buffer paramsBuffer = utils::CreateBufferFromData(device, &params, sizeof(params), wgpu::Buffer paramsBuffer = utils::CreateBufferFromData(device, &params, sizeof(params),
wgpu::BufferUsage::Uniform); wgpu::BufferUsage::Uniform);
@ -193,7 +198,36 @@ TEST_P(QueryInternalShaderTests, TimestampComputeShader) {
queue.Submit(1, &commands); queue.Submit(1, &commands);
// Expected results: Timestamp * period // Expected results: Timestamp * period
std::vector<uint64_t> expected = PrepareExpectedResults(kOffset); std::vector<uint64_t> expected =
PrepareExpectedResults(kFirst, kTimestampCount - kFirst, kOffset);
EXPECT_BUFFER(timestampsBuffer, 0, kTimestampCount * sizeof(uint64_t),
new InternalShaderExpectation(expected.data(), kTimestampCount));
}
// Convert partial timestamps in timestamps buffer with offset 8
// Test for ResolveQuerySet(querySet, 1, 3, timestampsBuffer, 8)
{
constexpr uint32_t kFirst = 1u;
constexpr uint32_t kCount = 3u;
constexpr uint32_t kOffset = 8u;
// Write orignal timestamps to timestamps buffer
queue.WriteBuffer(timestampsBuffer, 0, timestamps.data(),
kTimestampCount * sizeof(uint64_t));
// The params uniform buffer
dawn_native::TimestampParams params = {kFirst, kCount, kOffset, kPeriod};
wgpu::Buffer paramsBuffer = utils::CreateBufferFromData(device, &params, sizeof(params),
wgpu::BufferUsage::Uniform);
wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
EncodeConvertTimestampsToNanoseconds(encoder, timestampsBuffer, availabilityBuffer,
paramsBuffer);
wgpu::CommandBuffer commands = encoder.Finish();
queue.Submit(1, &commands);
// Expected results: Timestamp * period
std::vector<uint64_t> expected = PrepareExpectedResults(kFirst, kCount, kOffset);
EXPECT_BUFFER(timestampsBuffer, 0, kTimestampCount * sizeof(uint64_t), EXPECT_BUFFER(timestampsBuffer, 0, kTimestampCount * sizeof(uint64_t),
new InternalShaderExpectation(expected.data(), kTimestampCount)); new InternalShaderExpectation(expected.data(), kTimestampCount));
} }