diff --git a/src/dawn_native/QueryHelper.cpp b/src/dawn_native/QueryHelper.cpp index 96a7ef7635..43b82606b7 100644 --- a/src/dawn_native/QueryHelper.cpp +++ b/src/dawn_native/QueryHelper.cpp @@ -14,8 +14,6 @@ #include "dawn_native/QueryHelper.h" -#include - #include "dawn_native/BindGroup.h" #include "dawn_native/BindGroupLayout.h" #include "dawn_native/Buffer.h" @@ -30,10 +28,9 @@ namespace dawn_native { namespace { // Assert the offsets in dawn_native::TimestampParams are same with the ones in the shader - static_assert(offsetof(dawn_native::TimestampParams, inputByteOffset) == 0, ""); - static_assert(offsetof(dawn_native::TimestampParams, outputByteOffset) == 4, ""); - static_assert(offsetof(dawn_native::TimestampParams, count) == 8, ""); - static_assert(offsetof(dawn_native::TimestampParams, period) == 12, ""); + static_assert(offsetof(dawn_native::TimestampParams, count) == 0, ""); + static_assert(offsetof(dawn_native::TimestampParams, offset) == 4, ""); + static_assert(offsetof(dawn_native::TimestampParams, period) == 8, ""); static const char sConvertTimestampsToNanoseconds[] = R"( struct Timestamp { @@ -50,19 +47,16 @@ namespace dawn_native { }; [[block]] struct TimestampParams { - [[offset(0)]] inputByteOffset : u32; - [[offset(4)]] outputByteOffset : u32; - [[offset(8)]] count : u32; - [[offset(12)]] period : f32; + [[offset(0)]] count : u32; + [[offset(4)]] offset : u32; + [[offset(8)]] period : f32; }; [[set(0), binding(0)]] - var input : [[access(read)]] TimestampArr; + var timestamps : [[access(read_write)]] TimestampArr; [[set(0), binding(1)]] var availability : [[access(read)]] AvailabilityArr; - [[set(0), binding(2)]] - var output : [[access(read_write)]] TimestampArr; - [[set(0), binding(3)]] var params : TimestampParams; + [[set(0), binding(2)]] var params : TimestampParams; [[builtin(global_invocation_id)]] var GlobalInvocationID : vec3; @@ -72,21 +66,18 @@ namespace dawn_native { fn main() -> void { if (GlobalInvocationID.x >= params.count) { return; } - var inputIndex : u32 = GlobalInvocationID.x + - params.inputByteOffset / sizeofTimestamp; - var outputIndex : u32 = GlobalInvocationID.x + - params.outputByteOffset / sizeofTimestamp; + var index : u32 = GlobalInvocationID.x + params.offset / sizeofTimestamp; - var timestamp : Timestamp = input.t[inputIndex]; + var timestamp : Timestamp = timestamps.t[index]; # Return 0 for the unavailable value. - if (availability.v[inputIndex] == 0u) { - output.t[outputIndex].low = 0u; - output.t[outputIndex].high = 0u; + if (availability.v[index] == 0u) { + timestamps.t[index].low = 0u; + timestamps.t[index].high = 0u; return; } - # Multiply input values by the period and store into output. + # Multiply the values in timestamps buffer by the period. var period : f32 = params.period; var w : u32 = 0u; @@ -94,7 +85,7 @@ namespace dawn_native { # directly do the multiplication, otherwise, use two u32 to represent the high # 16-bits and low 16-bits of this u32, then multiply them by the period separately. if (timestamp.low <= u32(f32(0xFFFFFFFFu) / period)) { - output.t[outputIndex].low = u32(round(f32(timestamp.low) * period)); + timestamps.t[index].low = u32(round(f32(timestamp.low) * period)); } else { var lo : u32 = timestamp.low & 0xFFFF; var hi : u32 = timestamp.low >> 16; @@ -105,12 +96,12 @@ namespace dawn_native { var result : u32 = t1 << 16; result = result | (t0 & 0xFFFF); - output.t[outputIndex].low = result; + timestamps.t[index].low = result; } # Get the nearest integer to the float result. For high 32-bits, the round # function will greatly help reduce the accuracy loss of the final result. - output.t[outputIndex].high = u32(round(f32(timestamp.high) * period)) + w; + timestamps.t[index].high = u32(round(f32(timestamp.high) * period)) + w; } )"; @@ -145,9 +136,8 @@ namespace dawn_native { } // anonymous namespace void EncodeConvertTimestampsToNanoseconds(CommandEncoder* encoder, - BufferBase* input, + BufferBase* timestamps, BufferBase* availability, - BufferBase* output, BufferBase* params) { DeviceBase* device = encoder->GetDevice(); @@ -157,25 +147,22 @@ namespace dawn_native { Ref layout = AcquireRef(pipeline->GetBindGroupLayout(0)); // Prepare bind group descriptor - std::array bindGroupEntries = {}; + std::array bindGroupEntries = {}; BindGroupDescriptor bgDesc = {}; bgDesc.layout = layout.Get(); - bgDesc.entryCount = 4; + bgDesc.entryCount = 3; bgDesc.entries = bindGroupEntries.data(); // Set bind group entries. bindGroupEntries[0].binding = 0; - bindGroupEntries[0].buffer = input; - bindGroupEntries[0].size = input->GetSize(); + bindGroupEntries[0].buffer = timestamps; + bindGroupEntries[0].size = timestamps->GetSize(); bindGroupEntries[1].binding = 1; bindGroupEntries[1].buffer = availability; bindGroupEntries[1].size = availability->GetSize(); bindGroupEntries[2].binding = 2; - bindGroupEntries[2].buffer = output; - bindGroupEntries[2].size = output->GetSize(); - bindGroupEntries[3].binding = 3; - bindGroupEntries[3].buffer = params; - bindGroupEntries[3].size = params->GetSize(); + bindGroupEntries[2].buffer = params; + bindGroupEntries[2].size = params->GetSize(); // Create bind group after all binding entries are set. Ref bindGroup = AcquireRef(device->CreateBindGroup(&bgDesc)); @@ -185,7 +172,7 @@ namespace dawn_native { Ref pass = AcquireRef(encoder->BeginComputePass(&passDesc)); pass->SetPipeline(pipeline); pass->SetBindGroup(0, bindGroup.Get()); - pass->Dispatch(static_cast(ceil((input->GetSize() / sizeof(uint64_t) + 7) / 8))); + pass->Dispatch(static_cast((timestamps->GetSize() / sizeof(uint64_t) + 7) / 8)); pass->EndPass(); } diff --git a/src/dawn_native/QueryHelper.h b/src/dawn_native/QueryHelper.h index 733475be3f..4f05f09cb2 100644 --- a/src/dawn_native/QueryHelper.h +++ b/src/dawn_native/QueryHelper.h @@ -24,16 +24,14 @@ namespace dawn_native { class CommandEncoder; struct TimestampParams { - uint32_t inputByteOffset; - uint32_t outputByteOffset; uint32_t count; + uint32_t offset; float period; }; void EncodeConvertTimestampsToNanoseconds(CommandEncoder* encoder, - BufferBase* input, + BufferBase* timestamps, BufferBase* availability, - BufferBase* output, BufferBase* params); } // namespace dawn_native diff --git a/src/tests/white_box/QueryInternalShaderTests.cpp b/src/tests/white_box/QueryInternalShaderTests.cpp index 996e1c724c..29a417ea8b 100644 --- a/src/tests/white_box/QueryInternalShaderTests.cpp +++ b/src/tests/white_box/QueryInternalShaderTests.cpp @@ -22,15 +22,13 @@ namespace { void EncodeConvertTimestampsToNanoseconds(wgpu::CommandEncoder encoder, - wgpu::Buffer input, + wgpu::Buffer timestamps, wgpu::Buffer availability, - wgpu::Buffer output, wgpu::Buffer params) { dawn_native::EncodeConvertTimestampsToNanoseconds( reinterpret_cast(encoder.Get()), - reinterpret_cast(input.Get()), + reinterpret_cast(timestamps.Get()), reinterpret_cast(availability.Get()), - reinterpret_cast(output.Get()), reinterpret_cast(params.Get())); } @@ -78,14 +76,14 @@ class QueryInternalShaderTests : public DawnTest {}; // Test the accuracy of timestamp compute shader which uses unsigned 32-bit integers to simulate // unsigned 64-bit integers (timestamps) multiplied by float (period). // The arguments pass to timestamp internal pipeline: -// - The input buffer passes the original timestamps resolved from query set (created by manual -//  here). -// - The availability buffer passes the data of which slot in input buffer is an initialized +// - The timestamps buffer contains the original timestamps resolved from query set (created +// manually here), and will be used to store the results processed by the compute shader. +// Expect 0 for unavailable timestamps and nanoseconds for available timestamps in an expected +// error tolerance ratio. +// - The availability buffer passes the data of which slot in timestamps buffer is an initialized //  timestamp. -// - The output buffer stores the converted results, expect 0 for unavailable timestamps and -//  nanoseconds for available timestamps in an expected error rate. -// - The params buffer passes the offset of input and output buffers, the count of timestamps and -// the timestamp period (here use GPU frequency (HZ) on Intel D3D12 to calculate the period in +// - The params buffer passes the timestamp count, the offset in timestamps buffer and the +// timestamp period (here use GPU frequency (HZ) on Intel D3D12 to calculate the period in // ns for testing). TEST_P(QueryInternalShaderTests, TimestampComputeShader) { DAWN_SKIP_TEST_IF(UsesWire()); @@ -100,103 +98,105 @@ TEST_P(QueryInternalShaderTests, TimestampComputeShader) { constexpr uint64_t kNsPerSecond = 1000000000u; // Timestamp period in nanoseconds constexpr float kPeriod = static_cast(kNsPerSecond) / kGPUFrequency; - constexpr uint64_t kOne = 1u; // Original timestamp values for testing - std::array timestamps; - timestamps[0] = 0; // not written at beginning - timestamps[1] = 10079569507; // t0 - timestamps[2] = 10394415012; // t1 - timestamps[3] = 0; // not written between timestamps - timestamps[4] = 11713454943; // t2 - timestamps[5] = 38912556941; // t3 (big value) - timestamps[6] = 10080295766; // t4 (reset) - timestamps[7] = 12159966783; // t5 (after reset) - timestamps[8] = 12651224612; // t6 - timestamps[9] = 39872473956; // t7 + std::vector timestamps = { + 1, // garbage data which is not written at beginning + 10079569507, // t0 + 10394415012, // t1 + 1, // garbage data which is not written between timestamps + 11713454943, // t2 + 38912556941, // t3 (big value) + 10080295766, // t4 (reset) + 12159966783, // t5 (after reset) + 12651224612, // t6 + 39872473956, // t7 + }; - // Expected results: Timestamp value * kNsPerSecond / kGPUFrequency - std::array expected; - // The availablility state of each timestamp - std::array availabilities; + // The buffer indicating which values are available timestamps + std::vector availabilities = {0, 1, 1, 0, 1, 1, 1, 1, 1, 1}; + wgpu::Buffer availabilityBuffer = + utils::CreateBufferFromData(device, availabilities.data(), + kTimestampCount * sizeof(uint32_t), wgpu::BufferUsage::Storage); - for (size_t i = 0; i < kTimestampCount; i++) { - if (timestamps[i] == 0) { - // Not a timestamp value, keep original value - expected[i] = 0u; - availabilities[i] = 0u; - } else { - // Maybe the timestamp * 10^9 is larger than the maximum of uint64, so cast the delta - // value to double (higher precision than float) - expected[i] = static_cast(static_cast(timestamps[i]) * kNsPerSecond / - kGPUFrequency); - availabilities[i] = 1u; - } - } - - // The input storage buffer - wgpu::Buffer inputBuffer = - utils::CreateBufferFromData(device, timestamps.data(), sizeof(timestamps), - wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc); - EXPECT_BUFFER_U64_RANGE_EQ(timestamps.data(), inputBuffer, 0, kTimestampCount); - - // To indicate which value is available - wgpu::Buffer availabilityBuffer = utils::CreateBufferFromData( - device, availabilities.data(), sizeof(availabilities), wgpu::BufferUsage::Storage); - - // The output storage buffer - wgpu::BufferDescriptor outputDesc; - outputDesc.size = kTimestampCount * sizeof(uint64_t); - outputDesc.usage = + // The resolve buffer storing original timestamps and the converted values + wgpu::BufferDescriptor timestampsDesc; + timestampsDesc.size = kTimestampCount * sizeof(uint64_t); + timestampsDesc.usage = wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst; - wgpu::Buffer outputBuffer = device.CreateBuffer(&outputDesc); + wgpu::Buffer timestampsBuffer = device.CreateBuffer(×tampsDesc); - std::array ones; - ones.fill(kOne); + auto PrepareExpectedResults = [&](uint32_t offset) -> std::vector { + ASSERT(offset % sizeof(uint64_t) == 0); + std::vector expected; + for (size_t i = 0; i < kTimestampCount; i++) { + // The data before offset remains as it is + if (i < offset / sizeof(uint64_t)) { + expected.push_back(timestamps[i]); + continue; + } - // Convert timestamps to output buffer with offset 0 + if (availabilities[i] == 0) { + // Not a available timestamp, write 0 + expected.push_back(0u); + } else { + // Maybe the timestamp * period is larger than the maximum of uint64, so cast the + // delta value to double (higher precision than float) + expected.push_back( + static_cast(static_cast(timestamps[i]) * kPeriod)); + } + } + return expected; + }; + + // Convert timestamps in timestamps buffer with offset 0 { - queue.WriteBuffer(outputBuffer, 0, ones.data(), sizeof(ones)); - constexpr uint32_t kOffset = 0u; + + // Write orignal timestamps to timestamps buffer + queue.WriteBuffer(timestampsBuffer, 0, timestamps.data(), + kTimestampCount * sizeof(uint64_t)); + // The params uniform buffer - dawn_native::TimestampParams params = {kOffset, kOffset, kTimestampCount, kPeriod}; + dawn_native::TimestampParams params = {kTimestampCount, kOffset, kPeriod}; wgpu::Buffer paramsBuffer = utils::CreateBufferFromData(device, ¶ms, sizeof(params), wgpu::BufferUsage::Uniform); wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); - - EncodeConvertTimestampsToNanoseconds(encoder, inputBuffer, availabilityBuffer, outputBuffer, + EncodeConvertTimestampsToNanoseconds(encoder, timestampsBuffer, availabilityBuffer, paramsBuffer); - wgpu::CommandBuffer commands = encoder.Finish(); queue.Submit(1, &commands); - EXPECT_BUFFER(outputBuffer, kOffset, kTimestampCount * sizeof(uint64_t), + // Expected results: Timestamp * period + std::vector expected = PrepareExpectedResults(kOffset); + EXPECT_BUFFER(timestampsBuffer, 0, kTimestampCount * sizeof(uint64_t), new InternalShaderExpectation(expected.data(), kTimestampCount)); } - // Convert timestamps to output buffer with offset 8 from input buffer with offset 8 + // Convert timestamps in timestamps buffer with offset 8 { - queue.WriteBuffer(outputBuffer, 0, ones.data(), sizeof(ones)); - constexpr uint32_t kOffset = 8u; + + // Write orignal timestamps to timestamps buffer + queue.WriteBuffer(timestampsBuffer, 0, timestamps.data(), + kTimestampCount * sizeof(uint64_t)); + // The params uniform buffer - dawn_native::TimestampParams params = {kOffset, kOffset, kTimestampCount, kPeriod}; + dawn_native::TimestampParams params = {kTimestampCount, kOffset, kPeriod}; wgpu::Buffer paramsBuffer = utils::CreateBufferFromData(device, ¶ms, sizeof(params), wgpu::BufferUsage::Uniform); wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); - - EncodeConvertTimestampsToNanoseconds(encoder, inputBuffer, availabilityBuffer, outputBuffer, + EncodeConvertTimestampsToNanoseconds(encoder, timestampsBuffer, availabilityBuffer, paramsBuffer); - wgpu::CommandBuffer commands = encoder.Finish(); queue.Submit(1, &commands); - EXPECT_BUFFER_U64_RANGE_EQ(&kOne, outputBuffer, 0, 1); - EXPECT_BUFFER(outputBuffer, kOffset, (kTimestampCount - 1) * sizeof(uint64_t), - new InternalShaderExpectation(expected.data() + 1, kTimestampCount - 1)); + // Expected results: Timestamp * period + std::vector expected = PrepareExpectedResults(kOffset); + EXPECT_BUFFER(timestampsBuffer, 0, kTimestampCount * sizeof(uint64_t), + new InternalShaderExpectation(expected.data(), kTimestampCount)); } }