Query API: Test the accuracy of timestamp compute shader

Because the uint64 is not supported on all GPU drivers, we use uint32 and float to simulate the multiplication of uint64, but there is accuracy loss between the results and the expected results computed by uint64. This test checks that the accuracy loss is less than 0.2%. Bug: dawn:434 Change-Id: I6f5c842b6915f101441886bdfa4f9feb2827d174 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/34120 Commit-Queue: Hao Li <hao.x.li@intel.com> Reviewed-by: Corentin Wallez <cwallez@chromium.org>
2025-12-20 18:29:23 +00:00 · 2020-12-22 06:55:36 +00:00
parent 3272f9da37
commit afcef3ee90
8 changed files with 446 additions and 2 deletions
--- a/src/dawn_native/BUILD.gn
+++ b/src/dawn_native/BUILD.gn
@@ -235,6 +235,8 @@ source_set("dawn_native_sources") {
    "PooledResourceMemoryAllocator.h",
    "ProgrammablePassEncoder.cpp",
    "ProgrammablePassEncoder.h",
+    "QueryHelper.cpp",
+    "QueryHelper.h",
    "QuerySet.cpp",
    "QuerySet.h",
    "Queue.cpp",
--- a/src/dawn_native/CMakeLists.txt
+++ b/src/dawn_native/CMakeLists.txt
@@ -122,6 +122,8 @@ target_sources(dawn_native PRIVATE
    "PooledResourceMemoryAllocator.h"
    "ProgrammablePassEncoder.cpp"
    "ProgrammablePassEncoder.h"
+    "QueryHelper.cpp"
+    "QueryHelper.h"
    "QuerySet.cpp"
    "QuerySet.h"
    "Queue.cpp"
--- a/src/dawn_native/ComputePassEncoder.h
+++ b/src/dawn_native/ComputePassEncoder.h
@@ -32,7 +32,7 @@ namespace dawn_native {

        void EndPass();

-        void Dispatch(uint32_t x, uint32_t y, uint32_t z);
+        void Dispatch(uint32_t x, uint32_t y = 1, uint32_t z = 1);
        void DispatchIndirect(BufferBase* indirectBuffer, uint64_t indirectOffset);
        void SetPipeline(ComputePipelineBase* pipeline);

--- a/src/dawn_native/InternalPipelineStore.h
+++ b/src/dawn_native/InternalPipelineStore.h
@@ -28,6 +28,9 @@ namespace dawn_native {
        Ref<RenderPipelineBase> copyTextureForBrowserPipeline;
        Ref<ShaderModuleBase> copyTextureForBrowserVS;
        Ref<ShaderModuleBase> copyTextureForBrowserFS;
+
+        Ref<ComputePipelineBase> timestampComputePipeline;
+        Ref<ShaderModuleBase> timestampCS;
    };
 }  // namespace dawn_native

--- a/src/dawn_native/QueryHelper.cpp
+++ b/src/dawn_native/QueryHelper.cpp
@@ -0,0 +1,190 @@
+// Copyright 2020 The Dawn Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "dawn_native/QueryHelper.h"
+
+#include "dawn_native/BindGroup.h"
+#include "dawn_native/BindGroupLayout.h"
+#include "dawn_native/Buffer.h"
+#include "dawn_native/CommandEncoder.h"
+#include "dawn_native/ComputePassEncoder.h"
+#include "dawn_native/ComputePipeline.h"
+#include "dawn_native/Device.h"
+#include "dawn_native/InternalPipelineStore.h"
+
+namespace dawn_native {
+
+    namespace {
+
+        // Assert the offsets in dawn_native::TimestampParams are same with the ones in the shader
+        static_assert(offsetof(dawn_native::TimestampParams, inputByteOffset) == 0, "");
+        static_assert(offsetof(dawn_native::TimestampParams, outputByteOffset) == 4, "");
+        static_assert(offsetof(dawn_native::TimestampParams, count) == 8, "");
+        static_assert(offsetof(dawn_native::TimestampParams, period) == 12, "");
+
+        static const char sConvertTimestampsToNanoseconds[] = R"(
+            struct Timestamp {
+                [[offset(0)]] low  : u32;
+                [[offset(4)]] high : u32;
+            };
+
+            [[block]] struct TimestampArr {
+                [[offset(0)]] t : [[stride(8)]] array<Timestamp>;
+            };
+
+            [[block]] struct AvailabilityArr {
+                [[offset(0)]] v : [[stride(4)]] array<u32>;
+            };
+
+            [[block]] struct TimestampParams {
+                [[offset(0)]]  inputByteOffset  : u32;
+                [[offset(4)]]  outputByteOffset : u32;
+                [[offset(8)]]  count            : u32;
+                [[offset(12)]] period           : f32;
+            };
+
+            [[set(0), binding(0)]]
+                var<storage_buffer> input : [[access(read)]] TimestampArr;
+            [[set(0), binding(1)]]
+                var<storage_buffer> availability : [[access(read)]] AvailabilityArr;
+            [[set(0), binding(2)]]
+                var<storage_buffer> output : [[access(read_write)]] TimestampArr;
+            [[set(0), binding(3)]] var<uniform> params : TimestampParams;
+
+            [[builtin(global_invocation_id)]] var<in> GlobalInvocationID : vec3<u32>;
+
+            const sizeofTimestamp : u32 = 8u;
+
+            [[stage(compute), workgroup_size(8, 1, 1)]]
+            fn main() -> void {
+                if (GlobalInvocationID.x >= params.count) { return; }
+
+                var inputIndex : u32 = GlobalInvocationID.x +
+                                       params.inputByteOffset / sizeofTimestamp;
+                var outputIndex : u32 = GlobalInvocationID.x +
+                                        params.outputByteOffset / sizeofTimestamp;
+
+                var timestamp : Timestamp = input.t[inputIndex];
+
+                # Return 0 for the unavailable value.
+                if (availability.v[inputIndex] == 0u) {
+                    output.t[outputIndex].low = 0u;
+                    output.t[outputIndex].high = 0u;
+                    return;
+                }
+
+                # Multiply input values by the period and store into output.
+                var period : f32 = params.period;
+                var w : u32 = 0u;
+
+                # If the product of low 32-bits and the period does not exceed the maximum of u32,
+                # directly do the multiplication, otherwise, use two u32 to represent the high
+                # 16-bits and low 16-bits of this u32, then multiply them by the period separately.
+                if (timestamp.low <= u32(f32(0xFFFFFFFFu) / period)) {
+                    output.t[outputIndex].low = u32(round(f32(timestamp.low) * period));
+                } else {
+                    var lo : u32 = timestamp.low & 0xFFFF;
+                    var hi : u32 = timestamp.low >> 16;
+
+                    var t0 : u32 = u32(round(f32(lo) * period));
+                    var t1 : u32 = u32(round(f32(hi) * period)) + (t0 >> 16);
+                    w = t1 >> 16;
+
+                    var result : u32 = t1 << 16;
+                    result = result | (t0 & 0xFFFF);
+                    output.t[outputIndex].low = result;
+                }
+
+                # Get the nearest integer to the float result. For high 32-bits, the round
+                # function will greatly help reduce the accuracy loss of the final result.
+                output.t[outputIndex].high = u32(round(f32(timestamp.high) * period)) + w;
+            }
+        )";
+
+        ComputePipelineBase* GetOrCreateTimestampComputePipeline(DeviceBase* device) {
+            InternalPipelineStore* store = device->GetInternalPipelineStore();
+
+            if (store->timestampComputePipeline == nullptr) {
+                // Create compute shader module if not cached before.
+                if (store->timestampCS == nullptr) {
+                    ShaderModuleDescriptor descriptor;
+                    ShaderModuleWGSLDescriptor wgslDesc;
+                    wgslDesc.source = sConvertTimestampsToNanoseconds;
+                    descriptor.nextInChain = reinterpret_cast<ChainedStruct*>(&wgslDesc);
+
+                    store->timestampCS = AcquireRef(device->CreateShaderModule(&descriptor));
+                }
+
+                // Create ComputePipeline.
+                ComputePipelineDescriptor computePipelineDesc = {};
+                // Generate the layout based on shader module.
+                computePipelineDesc.layout = nullptr;
+                computePipelineDesc.computeStage.module = store->timestampCS.Get();
+                computePipelineDesc.computeStage.entryPoint = "main";
+
+                store->timestampComputePipeline =
+                    AcquireRef(device->CreateComputePipeline(&computePipelineDesc));
+            }
+
+            return store->timestampComputePipeline.Get();
+        }
+
+    }  // anonymous namespace
+
+    void EncodeConvertTimestampsToNanoseconds(CommandEncoder* encoder,
+                                              BufferBase* input,
+                                              BufferBase* availability,
+                                              BufferBase* output,
+                                              BufferBase* params) {
+        DeviceBase* device = encoder->GetDevice();
+
+        ComputePipelineBase* pipeline = GetOrCreateTimestampComputePipeline(device);
+
+        // Prepare bind group layout.
+        Ref<BindGroupLayoutBase> layout = AcquireRef(pipeline->GetBindGroupLayout(0));
+
+        // Prepare bind group descriptor
+        std::array<BindGroupEntry, 4> bindGroupEntries = {};
+        BindGroupDescriptor bgDesc = {};
+        bgDesc.layout = layout.Get();
+        bgDesc.entryCount = 4;
+        bgDesc.entries = bindGroupEntries.data();
+
+        // Set bind group entries.
+        bindGroupEntries[0].binding = 0;
+        bindGroupEntries[0].buffer = input;
+        bindGroupEntries[0].size = input->GetSize();
+        bindGroupEntries[1].binding = 1;
+        bindGroupEntries[1].buffer = availability;
+        bindGroupEntries[1].size = availability->GetSize();
+        bindGroupEntries[2].binding = 2;
+        bindGroupEntries[2].buffer = output;
+        bindGroupEntries[2].size = output->GetSize();
+        bindGroupEntries[3].binding = 3;
+        bindGroupEntries[3].buffer = params;
+        bindGroupEntries[3].size = params->GetSize();
+
+        // Create bind group after all binding entries are set.
+        Ref<BindGroupBase> bindGroup = AcquireRef(device->CreateBindGroup(&bgDesc));
+
+        // Create compute encoder and issue dispatch.
+        ComputePassDescriptor passDesc = {};
+        Ref<ComputePassEncoder> pass = AcquireRef(encoder->BeginComputePass(&passDesc));
+        pass->SetPipeline(pipeline);
+        pass->SetBindGroup(0, bindGroup.Get());
+        pass->Dispatch(static_cast<uint32_t>(ceil((input->GetSize() / sizeof(uint64_t) + 7) / 8)));
+        pass->EndPass();
+    }
+
+}  // namespace dawn_native
--- a/src/dawn_native/QueryHelper.h
+++ b/src/dawn_native/QueryHelper.h
@@ -0,0 +1,41 @@
+// Copyright 2020 The Dawn Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef DAWNNATIVE_QUERYHELPER_H_
+#define DAWNNATIVE_QUERYHELPER_H_
+
+#include "dawn_native/ObjectBase.h"
+
+namespace dawn_native {
+
+    class BufferBase;
+    class DeviceBase;
+    class CommandEncoder;
+
+    struct TimestampParams {
+        uint32_t inputByteOffset;
+        uint32_t outputByteOffset;
+        uint32_t count;
+        float period;
+    };
+
+    void EncodeConvertTimestampsToNanoseconds(CommandEncoder* encoder,
+                                              BufferBase* input,
+                                              BufferBase* availability,
+                                              BufferBase* output,
+                                              BufferBase* params);
+
+}  // namespace dawn_native
+
+#endif  // DAWNNATIVE_QUERYHELPER_H_