diff --git a/src/dawn_native/CommandEncoder.cpp b/src/dawn_native/CommandEncoder.cpp index f90e525315..a256382868 100644 --- a/src/dawn_native/CommandEncoder.cpp +++ b/src/dawn_native/CommandEncoder.cpp @@ -382,10 +382,9 @@ namespace dawn_native { "set"); } - // The destinationOffset must be a multiple of 8 bytes on D3D12 and Vulkan - if (destinationOffset % 8 != 0) { + if (destinationOffset % 256 != 0) { return DAWN_VALIDATION_ERROR( - "The alignment offset into the destination buffer must be a multiple of 8 " + "The alignment offset into the destination buffer must be a multiple of 256 " "bytes"); } diff --git a/src/tests/end2end/BufferZeroInitTests.cpp b/src/tests/end2end/BufferZeroInitTests.cpp index 04c961b839..1fcc0d7eee 100644 --- a/src/tests/end2end/BufferZeroInitTests.cpp +++ b/src/tests/end2end/BufferZeroInitTests.cpp @@ -1362,9 +1362,9 @@ TEST_P(BufferZeroInitTest, ResolveQuerySet) { // destinationOffset > 0 and destinationOffset + 8 * queryCount <= kBufferSize { constexpr uint32_t kQueryCount = 1; - constexpr uint64_t kDestinationOffset = 8u; + constexpr uint64_t kDestinationOffset = 256u; - wgpu::Buffer destination = CreateBuffer(kBufferSize, kBufferUsage); + wgpu::Buffer destination = CreateBuffer(kBufferSize + kDestinationOffset, kBufferUsage); wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); encoder.WriteTimestamp(querySet, 0); encoder.ResolveQuerySet(querySet, 0, kQueryCount, destination, kDestinationOffset); diff --git a/src/tests/end2end/QueryTests.cpp b/src/tests/end2end/QueryTests.cpp index 5e0b8e0c62..bf3be4431f 100644 --- a/src/tests/end2end/QueryTests.cpp +++ b/src/tests/end2end/QueryTests.cpp @@ -31,6 +31,8 @@ class QueryTests : public DawnTest { // Clear the content of the result buffer into 0xFFFFFFFF. constexpr static uint64_t kSentinelValue = ~uint64_t(0u); constexpr static uint64_t kZero = 0u; +constexpr uint64_t kMinDestinationOffset = 256; +constexpr uint64_t kMinCount = kMinDestinationOffset / sizeof(uint64_t); class OcclusionExpectation : public detail::Expectation { public: @@ -405,12 +407,15 @@ TEST_P(OcclusionQueryTests, ResolveToBufferWithOffset) { wgpu::CommandBuffer commands = encoder.Finish(); queue.Submit(1, &commands); + constexpr uint64_t kBufferSize = kQueryCount * sizeof(uint64_t) + kMinDestinationOffset; + constexpr uint64_t kCount = kQueryCount + kMinCount; + // Resolve the query result to first slot in the buffer, other slots should not be written. { - wgpu::Buffer destination = CreateResolveBuffer(kQueryCount * sizeof(uint64_t)); + wgpu::Buffer destination = CreateResolveBuffer(kBufferSize); // Set sentinel values to check the query is resolved to the correct slot of the buffer. - std::vector sentinelValues(kQueryCount, kSentinelValue); - queue.WriteBuffer(destination, 0, sentinelValues.data(), kQueryCount * sizeof(uint64_t)); + std::vector sentinelValues(kCount, kSentinelValue); + queue.WriteBuffer(destination, 0, sentinelValues.data(), kBufferSize); wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); encoder.ResolveQuerySet(querySet, 0, 1, destination, 0); @@ -419,23 +424,24 @@ TEST_P(OcclusionQueryTests, ResolveToBufferWithOffset) { EXPECT_BUFFER(destination, 0, sizeof(uint64_t), new OcclusionExpectation(OcclusionExpectation::Result::NonZero)); - EXPECT_BUFFER_U64_RANGE_EQ(&kSentinelValue, destination, sizeof(uint64_t), 1); + EXPECT_BUFFER_U64_RANGE_EQ(sentinelValues.data(), destination, sizeof(uint64_t), + kCount - 1); } // Resolve the query result to second slot in the buffer, the first one should not be written. { - wgpu::Buffer destination = CreateResolveBuffer(kQueryCount * sizeof(uint64_t)); + wgpu::Buffer destination = CreateResolveBuffer(kBufferSize); // Set sentinel values to check the query is resolved to the correct slot of the buffer. - std::vector sentinelValues(kQueryCount, kSentinelValue); - queue.WriteBuffer(destination, 0, sentinelValues.data(), kQueryCount * sizeof(uint64_t)); + std::vector sentinelValues(kCount, kSentinelValue); + queue.WriteBuffer(destination, 0, sentinelValues.data(), kBufferSize); wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); - encoder.ResolveQuerySet(querySet, 0, 1, destination, sizeof(uint64_t)); + encoder.ResolveQuerySet(querySet, 0, 1, destination, kMinDestinationOffset); wgpu::CommandBuffer commands = encoder.Finish(); queue.Submit(1, &commands); - EXPECT_BUFFER_U64_RANGE_EQ(&kSentinelValue, destination, 0, 1); - EXPECT_BUFFER(destination, sizeof(uint64_t), sizeof(uint64_t), + EXPECT_BUFFER_U64_RANGE_EQ(sentinelValues.data(), destination, 0, kMinCount); + EXPECT_BUFFER(destination, kMinDestinationOffset, sizeof(uint64_t), new OcclusionExpectation(OcclusionExpectation::Result::NonZero)); } } @@ -769,35 +775,44 @@ TEST_P(TimestampQueryTests, ResolveToBufferWithOffset) { DAWN_SUPPRESS_TEST_IF(IsWindows() && IsVulkan() && IsIntel()); constexpr uint32_t kQueryCount = 2; - constexpr uint64_t kZero = 0; + constexpr uint64_t kBufferSize = kQueryCount * sizeof(uint64_t) + kMinDestinationOffset; + constexpr uint64_t kCount = kQueryCount + kMinCount; wgpu::QuerySet querySet = CreateQuerySetForTimestamp(kQueryCount); // Resolve the query result to first slot in the buffer, other slots should not be written { - wgpu::Buffer destination = CreateResolveBuffer(kQueryCount * sizeof(uint64_t)); + wgpu::Buffer destination = CreateResolveBuffer(kBufferSize); + wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); encoder.WriteTimestamp(querySet, 0); encoder.ResolveQuerySet(querySet, 0, 1, destination, 0); wgpu::CommandBuffer commands = encoder.Finish(); queue.Submit(1, &commands); + std::vector zeros(kCount - 1, kZero); EXPECT_BUFFER(destination, 0, sizeof(uint64_t), new TimestampExpectation); - EXPECT_BUFFER_U64_RANGE_EQ(&kZero, destination, sizeof(uint64_t), 1); + EXPECT_BUFFER_U64_RANGE_EQ(zeros.data(), destination, sizeof(uint64_t), kCount - 1); } // Resolve the query result to the buffer with offset, the slots before the offset // should not be written { - wgpu::Buffer destination = CreateResolveBuffer(kQueryCount * sizeof(uint64_t)); + wgpu::Buffer destination = CreateResolveBuffer(kBufferSize); + // Set sentinel values to check the query is resolved to the correct slot of the buffer. + std::vector sentinelValues(kCount, kZero); + queue.WriteBuffer(destination, 0, sentinelValues.data(), kBufferSize); + wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); encoder.WriteTimestamp(querySet, 0); - encoder.ResolveQuerySet(querySet, 0, 1, destination, sizeof(uint64_t)); + encoder.ResolveQuerySet(querySet, 0, 1, destination, kMinDestinationOffset); wgpu::CommandBuffer commands = encoder.Finish(); queue.Submit(1, &commands); - EXPECT_BUFFER_U64_RANGE_EQ(&kZero, destination, 0, 1); - EXPECT_BUFFER(destination, sizeof(uint64_t), sizeof(uint64_t), new TimestampExpectation); + std::vector zeros(kMinCount, kZero); + EXPECT_BUFFER_U64_RANGE_EQ(zeros.data(), destination, 0, kMinCount); + EXPECT_BUFFER(destination, kMinDestinationOffset, sizeof(uint64_t), + new TimestampExpectation); } } @@ -809,17 +824,17 @@ TEST_P(TimestampQueryTests, ResolveTwiceToSameBuffer) { // the issue is fixed. DAWN_SUPPRESS_TEST_IF(IsWindows() && IsVulkan() && IsIntel()); - constexpr uint32_t kQueryCount = 3; + constexpr uint32_t kQueryCount = kMinCount + 2; wgpu::QuerySet querySet = CreateQuerySetForTimestamp(kQueryCount); wgpu::Buffer destination = CreateResolveBuffer(kQueryCount * sizeof(uint64_t)); wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); - encoder.WriteTimestamp(querySet, 0); - encoder.WriteTimestamp(querySet, 1); - encoder.WriteTimestamp(querySet, 2); - encoder.ResolveQuerySet(querySet, 0, 2, destination, 0); - encoder.ResolveQuerySet(querySet, 1, 2, destination, sizeof(uint64_t)); + for (uint32_t i = 0; i < kQueryCount; i++) { + encoder.WriteTimestamp(querySet, i); + } + encoder.ResolveQuerySet(querySet, 0, kMinCount + 1, destination, 0); + encoder.ResolveQuerySet(querySet, kMinCount, 2, destination, kMinDestinationOffset); wgpu::CommandBuffer commands = encoder.Finish(); queue.Submit(1, &commands); diff --git a/src/tests/unittests/validation/QueryValidationTests.cpp b/src/tests/unittests/validation/QueryValidationTests.cpp index f7f8ce6467..2517887ac0 100644 --- a/src/tests/unittests/validation/QueryValidationTests.cpp +++ b/src/tests/unittests/validation/QueryValidationTests.cpp @@ -555,7 +555,8 @@ TEST_F(ResolveQuerySetValidationTest, ResolveInvalidQuerySetAndIndexCount) { // Test resolve query set with invalid query set, first query and query count TEST_F(ResolveQuerySetValidationTest, ResolveToInvalidBufferAndOffset) { constexpr uint32_t kQueryCount = 4; - constexpr uint64_t kBufferSize = kQueryCount * sizeof(uint64_t); + constexpr uint64_t kBufferSize = + (kQueryCount - 1) * sizeof(uint64_t) + 256 /*destinationOffset*/; wgpu::QuerySet querySet = CreateQuerySet(device, wgpu::QueryType::Occlusion, kQueryCount); wgpu::Buffer destination = CreateBuffer(device, kBufferSize, wgpu::BufferUsage::QueryResolve); @@ -563,7 +564,7 @@ TEST_F(ResolveQuerySetValidationTest, ResolveToInvalidBufferAndOffset) { // Success { wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); - encoder.ResolveQuerySet(querySet, 1, kQueryCount - 1, destination, 8); + encoder.ResolveQuerySet(querySet, 1, kQueryCount - 1, destination, 256); wgpu::CommandBuffer commands = encoder.Finish(); wgpu::Queue queue = device.GetQueue(); @@ -580,17 +581,17 @@ TEST_F(ResolveQuerySetValidationTest, ResolveToInvalidBufferAndOffset) { ASSERT_DEVICE_ERROR(encoder.Finish()); } - // Fail to resolve query set to a buffer if offset is not a multiple of 8 bytes + // Fail to resolve query set to a buffer if offset is not a multiple of 256 bytes { wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); - encoder.ResolveQuerySet(querySet, 0, kQueryCount, destination, 4); + encoder.ResolveQuerySet(querySet, 0, kQueryCount, destination, 128); ASSERT_DEVICE_ERROR(encoder.Finish()); } // Fail to resolve query set to a buffer if the data size overflow the buffer { wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); - encoder.ResolveQuerySet(querySet, 0, kQueryCount, destination, 8); + encoder.ResolveQuerySet(querySet, 0, kQueryCount, destination, 256); ASSERT_DEVICE_ERROR(encoder.Finish()); } diff --git a/src/tests/white_box/QueryInternalShaderTests.cpp b/src/tests/white_box/QueryInternalShaderTests.cpp index 4930f1e378..e488954fd0 100644 --- a/src/tests/white_box/QueryInternalShaderTests.cpp +++ b/src/tests/white_box/QueryInternalShaderTests.cpp @@ -71,7 +71,101 @@ namespace { } // anonymous namespace -class QueryInternalShaderTests : public DawnTest {}; +constexpr static uint64_t kSentinelValue = ~uint64_t(0u); + +// A gpu frequency on Intel D3D12 (ticks/second) +constexpr uint64_t kGPUFrequency = 12000048u; +constexpr uint64_t kNsPerSecond = 1000000000u; +// Timestamp period in nanoseconds +constexpr float kPeriod = static_cast(kNsPerSecond) / kGPUFrequency; + +class QueryInternalShaderTests : public DawnTest { + protected: + // Original timestamp values in query set for testing + const std::vector querySetValues = { + kSentinelValue, // garbage data which is not written at beginning + 10079569507, // t0 + 10394415012, // t1 + kSentinelValue, // garbage data which is not written between timestamps + 11713454943, // t2 + 38912556941, // t3 (big value) + 10080295766, // t4 (reset) + 12159966783, // t5 (after reset) + 12651224612, // t6 + 39872473956, // t7 + }; + + const uint32_t kQueryCount = querySetValues.size(); + + // Timestamps available state + const std::vector availabilities = {0, 1, 1, 0, 1, 1, 1, 1, 1, 1}; + + const std::vector GetExpectedResults(const std::vector& origin, + uint32_t start, + uint32_t firstQuery, + uint32_t queryCount) { + std::vector expected(origin.begin(), origin.end()); + for (size_t i = 0; i < queryCount; i++) { + if (availabilities[firstQuery + i] == 0) { + // Not a available timestamp, write 0 + expected[start + i] = 0u; + } else { + // Maybe the timestamp * period is larger than the maximum of uint64, so cast the + // delta value to double (higher precision than float) + expected[start + i] = + static_cast(static_cast(origin[start + i]) * kPeriod); + } + } + return expected; + } + + void RunTest(uint32_t firstQuery, uint32_t queryCount, uint32_t destinationOffset) { + ASSERT(destinationOffset % 256 == 0); + + uint64_t size = queryCount * sizeof(uint64_t) + destinationOffset; + + // The resolve buffer storing original timestamps and the converted values + wgpu::BufferDescriptor timestampsDesc; + timestampsDesc.size = size; + timestampsDesc.usage = wgpu::BufferUsage::QueryResolve | wgpu::BufferUsage::CopySrc | + wgpu::BufferUsage::CopyDst; + wgpu::Buffer timestampsBuffer = device.CreateBuffer(×tampsDesc); + + // Set sentinel values to check the slots before the destination offset should not be + // converted + std::vector timestampValues(size / sizeof(uint64_t), 1u); + uint32_t start = destinationOffset / sizeof(uint64_t); + for (uint32_t i = 0; i < queryCount; i++) { + timestampValues[start + i] = querySetValues[firstQuery + 1]; + } + // Write sentinel values and orignal timestamps to timestamps buffer + queue.WriteBuffer(timestampsBuffer, 0, timestampValues.data(), size); + + // The buffer indicating which values are available timestamps + wgpu::Buffer availabilityBuffer = + utils::CreateBufferFromData(device, availabilities.data(), + kQueryCount * sizeof(uint32_t), wgpu::BufferUsage::Storage); + + // The params uniform buffer + dawn_native::TimestampParams params = {firstQuery, queryCount, destinationOffset, kPeriod}; + wgpu::Buffer paramsBuffer = utils::CreateBufferFromData(device, ¶ms, sizeof(params), + wgpu::BufferUsage::Uniform); + + wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); + EncodeConvertTimestampsToNanoseconds(encoder, timestampsBuffer, availabilityBuffer, + paramsBuffer); + wgpu::CommandBuffer commands = encoder.Finish(); + queue.Submit(1, &commands); + + const std::vector expected = + GetExpectedResults(timestampValues, start, firstQuery, queryCount); + + EXPECT_BUFFER(timestampsBuffer, 0, size, + new InternalShaderExpectation(expected.data(), size / sizeof(uint64_t))); + } + + private: +}; // Test the accuracy of timestamp compute shader which uses unsigned 32-bit integers to simulate // unsigned 64-bit integers (timestamps) multiplied by float (period). @@ -91,146 +185,17 @@ TEST_P(QueryInternalShaderTests, TimestampComputeShader) { DAWN_TEST_UNSUPPORTED_IF(UsesWire()); - constexpr uint32_t kTimestampCount = 10u; - // A gpu frequency on Intel D3D12 (ticks/second) - constexpr uint64_t kGPUFrequency = 12000048u; - constexpr uint64_t kNsPerSecond = 1000000000u; - // Timestamp period in nanoseconds - constexpr float kPeriod = static_cast(kNsPerSecond) / kGPUFrequency; - - // Original timestamp values for testing - std::vector timestamps = { - 1, // garbage data which is not written at beginning - 10079569507, // t0 - 10394415012, // t1 - 1, // garbage data which is not written between timestamps - 11713454943, // t2 - 38912556941, // t3 (big value) - 10080295766, // t4 (reset) - 12159966783, // t5 (after reset) - 12651224612, // t6 - 39872473956, // t7 - }; - - // The buffer indicating which values are available timestamps - std::vector availabilities = {0, 1, 1, 0, 1, 1, 1, 1, 1, 1}; - wgpu::Buffer availabilityBuffer = - utils::CreateBufferFromData(device, availabilities.data(), - kTimestampCount * sizeof(uint32_t), wgpu::BufferUsage::Storage); - - // The resolve buffer storing original timestamps and the converted values - wgpu::BufferDescriptor timestampsDesc; - timestampsDesc.size = kTimestampCount * sizeof(uint64_t); - timestampsDesc.usage = - wgpu::BufferUsage::QueryResolve | wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst; - wgpu::Buffer timestampsBuffer = device.CreateBuffer(×tampsDesc); - - auto PrepareExpectedResults = [&](uint32_t first, uint32_t count, - uint32_t offset) -> std::vector { - ASSERT(offset % sizeof(uint64_t) == 0); - std::vector expected; - for (size_t i = 0; i < kTimestampCount; i++) { - // The data out of the rang [first, first + count] remains as it is - if (i < first || i >= first + count) { - expected.push_back(timestamps[i]); - continue; - } - - if (availabilities[i] == 0) { - // Not a available timestamp, write 0 - expected.push_back(0u); - } else { - // Maybe the timestamp * period is larger than the maximum of uint64, so cast the - // delta value to double (higher precision than float) - expected.push_back( - static_cast(static_cast(timestamps[i]) * kPeriod)); - } - } - return expected; - }; - // Convert timestamps in timestamps buffer with offset 0 - // Test for ResolveQuerySet(querySet, 0, kTimestampCount, timestampsBuffer, 0) - { - constexpr uint32_t kFirst = 0u; - constexpr uint32_t kOffset = 0u; + // Test for ResolveQuerySet(querySet, 0, kQueryCount, timestampsBuffer, 0) + RunTest(0, kQueryCount, 0); - // Write orignal timestamps to timestamps buffer - queue.WriteBuffer(timestampsBuffer, 0, timestamps.data(), - kTimestampCount * sizeof(uint64_t)); + // Convert timestamps in timestamps buffer with offset 256 + // Test for ResolveQuerySet(querySet, 1, kQueryCount - 1, timestampsBuffer, 256) + RunTest(1, kQueryCount - 1, 256); - // The params uniform buffer - dawn_native::TimestampParams params = {kFirst, kTimestampCount, kOffset, kPeriod}; - wgpu::Buffer paramsBuffer = utils::CreateBufferFromData(device, ¶ms, sizeof(params), - wgpu::BufferUsage::Uniform); - - wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); - EncodeConvertTimestampsToNanoseconds(encoder, timestampsBuffer, availabilityBuffer, - paramsBuffer); - wgpu::CommandBuffer commands = encoder.Finish(); - queue.Submit(1, &commands); - - // Expected results: Timestamp * period - std::vector expected = PrepareExpectedResults(0, kTimestampCount, kOffset); - EXPECT_BUFFER(timestampsBuffer, 0, kTimestampCount * sizeof(uint64_t), - new InternalShaderExpectation(expected.data(), kTimestampCount)); - } - - // Convert timestamps in timestamps buffer with offset 8 - // Test for ResolveQuerySet(querySet, 1, kTimestampCount - 1, timestampsBuffer, 8) - { - constexpr uint32_t kFirst = 1u; - constexpr uint32_t kOffset = 8u; - - // Write orignal timestamps to timestamps buffer - queue.WriteBuffer(timestampsBuffer, 0, timestamps.data(), - kTimestampCount * sizeof(uint64_t)); - - // The params uniform buffer - dawn_native::TimestampParams params = {kFirst, kTimestampCount - kFirst, kOffset, kPeriod}; - wgpu::Buffer paramsBuffer = utils::CreateBufferFromData(device, ¶ms, sizeof(params), - wgpu::BufferUsage::Uniform); - - wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); - EncodeConvertTimestampsToNanoseconds(encoder, timestampsBuffer, availabilityBuffer, - paramsBuffer); - wgpu::CommandBuffer commands = encoder.Finish(); - queue.Submit(1, &commands); - - // Expected results: Timestamp * period - std::vector expected = - PrepareExpectedResults(kFirst, kTimestampCount - kFirst, kOffset); - EXPECT_BUFFER(timestampsBuffer, 0, kTimestampCount * sizeof(uint64_t), - new InternalShaderExpectation(expected.data(), kTimestampCount)); - } - - // Convert partial timestamps in timestamps buffer with offset 8 - // Test for ResolveQuerySet(querySet, 1, 3, timestampsBuffer, 8) - { - constexpr uint32_t kFirst = 1u; - constexpr uint32_t kCount = 3u; - constexpr uint32_t kOffset = 8u; - - // Write orignal timestamps to timestamps buffer - queue.WriteBuffer(timestampsBuffer, 0, timestamps.data(), - kTimestampCount * sizeof(uint64_t)); - - // The params uniform buffer - dawn_native::TimestampParams params = {kFirst, kCount, kOffset, kPeriod}; - wgpu::Buffer paramsBuffer = utils::CreateBufferFromData(device, ¶ms, sizeof(params), - wgpu::BufferUsage::Uniform); - - wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); - EncodeConvertTimestampsToNanoseconds(encoder, timestampsBuffer, availabilityBuffer, - paramsBuffer); - wgpu::CommandBuffer commands = encoder.Finish(); - queue.Submit(1, &commands); - - // Expected results: Timestamp * period - std::vector expected = PrepareExpectedResults(kFirst, kCount, kOffset); - EXPECT_BUFFER(timestampsBuffer, 0, kTimestampCount * sizeof(uint64_t), - new InternalShaderExpectation(expected.data(), kTimestampCount)); - } + // Convert partial timestamps in timestamps buffer with offset 256 + // Test for ResolveQuerySet(querySet, 1, 4, timestampsBuffer, 256) + RunTest(1, 4, 256); } DAWN_INSTANTIATE_TEST(QueryInternalShaderTests, D3D12Backend(), MetalBackend(), VulkanBackend());