diff --git a/src/dawn_native/QueryHelper.cpp b/src/dawn_native/QueryHelper.cpp
index 96a7ef7635..43b82606b7 100644
--- a/src/dawn_native/QueryHelper.cpp
+++ b/src/dawn_native/QueryHelper.cpp
@@ -14,8 +14,6 @@
 
 #include "dawn_native/QueryHelper.h"
 
-#include <cmath>
-
 #include "dawn_native/BindGroup.h"
 #include "dawn_native/BindGroupLayout.h"
 #include "dawn_native/Buffer.h"
@@ -30,10 +28,9 @@ namespace dawn_native {
     namespace {
 
         // Assert the offsets in dawn_native::TimestampParams are same with the ones in the shader
-        static_assert(offsetof(dawn_native::TimestampParams, inputByteOffset) == 0, "");
-        static_assert(offsetof(dawn_native::TimestampParams, outputByteOffset) == 4, "");
-        static_assert(offsetof(dawn_native::TimestampParams, count) == 8, "");
-        static_assert(offsetof(dawn_native::TimestampParams, period) == 12, "");
+        static_assert(offsetof(dawn_native::TimestampParams, count) == 0, "");
+        static_assert(offsetof(dawn_native::TimestampParams, offset) == 4, "");
+        static_assert(offsetof(dawn_native::TimestampParams, period) == 8, "");
 
         static const char sConvertTimestampsToNanoseconds[] = R"(
             struct Timestamp {
@@ -50,19 +47,16 @@ namespace dawn_native {
             };
 
             [[block]] struct TimestampParams {
-                [[offset(0)]]  inputByteOffset  : u32;
-                [[offset(4)]]  outputByteOffset : u32;
-                [[offset(8)]]  count            : u32;
-                [[offset(12)]] period           : f32;
+                [[offset(0)]]  count  : u32;
+                [[offset(4)]]  offset : u32;
+                [[offset(8)]]  period : f32;
             };
 
             [[set(0), binding(0)]]
-                var<storage_buffer> input : [[access(read)]] TimestampArr;
+                var<storage_buffer> timestamps : [[access(read_write)]] TimestampArr;
             [[set(0), binding(1)]]
                 var<storage_buffer> availability : [[access(read)]] AvailabilityArr;
-            [[set(0), binding(2)]]
-                var<storage_buffer> output : [[access(read_write)]] TimestampArr;
-            [[set(0), binding(3)]] var<uniform> params : TimestampParams;
+            [[set(0), binding(2)]] var<uniform> params : TimestampParams;
 
             [[builtin(global_invocation_id)]] var<in> GlobalInvocationID : vec3<u32>;
 
@@ -72,21 +66,18 @@ namespace dawn_native {
             fn main() -> void {
                 if (GlobalInvocationID.x >= params.count) { return; }
 
-                var inputIndex : u32 = GlobalInvocationID.x +
-                                       params.inputByteOffset / sizeofTimestamp;
-                var outputIndex : u32 = GlobalInvocationID.x +
-                                        params.outputByteOffset / sizeofTimestamp;
+                var index : u32 = GlobalInvocationID.x + params.offset / sizeofTimestamp;
 
-                var timestamp : Timestamp = input.t[inputIndex];
+                var timestamp : Timestamp = timestamps.t[index];
 
                 # Return 0 for the unavailable value.
-                if (availability.v[inputIndex] == 0u) {
-                    output.t[outputIndex].low = 0u;
-                    output.t[outputIndex].high = 0u;
+                if (availability.v[index] == 0u) {
+                    timestamps.t[index].low = 0u;
+                    timestamps.t[index].high = 0u;
                     return;
                 }
 
-                # Multiply input values by the period and store into output.
+                # Multiply the values in timestamps buffer by the period.
                 var period : f32 = params.period;
                 var w : u32 = 0u;
 
@@ -94,7 +85,7 @@ namespace dawn_native {
                 # directly do the multiplication, otherwise, use two u32 to represent the high
                 # 16-bits and low 16-bits of this u32, then multiply them by the period separately.
                 if (timestamp.low <= u32(f32(0xFFFFFFFFu) / period)) {
-                    output.t[outputIndex].low = u32(round(f32(timestamp.low) * period));
+                    timestamps.t[index].low = u32(round(f32(timestamp.low) * period));
                 } else {
                     var lo : u32 = timestamp.low & 0xFFFF;
                     var hi : u32 = timestamp.low >> 16;
@@ -105,12 +96,12 @@ namespace dawn_native {
 
                     var result : u32 = t1 << 16;
                     result = result | (t0 & 0xFFFF);
-                    output.t[outputIndex].low = result;
+                    timestamps.t[index].low = result;
                 }
 
                 # Get the nearest integer to the float result. For high 32-bits, the round
                 # function will greatly help reduce the accuracy loss of the final result.
-                output.t[outputIndex].high = u32(round(f32(timestamp.high) * period)) + w;
+                timestamps.t[index].high = u32(round(f32(timestamp.high) * period)) + w;
             }
         )";
 
@@ -145,9 +136,8 @@ namespace dawn_native {
     }  // anonymous namespace
 
     void EncodeConvertTimestampsToNanoseconds(CommandEncoder* encoder,
-                                              BufferBase* input,
+                                              BufferBase* timestamps,
                                               BufferBase* availability,
-                                              BufferBase* output,
                                               BufferBase* params) {
         DeviceBase* device = encoder->GetDevice();
 
@@ -157,25 +147,22 @@ namespace dawn_native {
         Ref<BindGroupLayoutBase> layout = AcquireRef(pipeline->GetBindGroupLayout(0));
 
         // Prepare bind group descriptor
-        std::array<BindGroupEntry, 4> bindGroupEntries = {};
+        std::array<BindGroupEntry, 3> bindGroupEntries = {};
         BindGroupDescriptor bgDesc = {};
         bgDesc.layout = layout.Get();
-        bgDesc.entryCount = 4;
+        bgDesc.entryCount = 3;
         bgDesc.entries = bindGroupEntries.data();
 
         // Set bind group entries.
         bindGroupEntries[0].binding = 0;
-        bindGroupEntries[0].buffer = input;
-        bindGroupEntries[0].size = input->GetSize();
+        bindGroupEntries[0].buffer = timestamps;
+        bindGroupEntries[0].size = timestamps->GetSize();
         bindGroupEntries[1].binding = 1;
         bindGroupEntries[1].buffer = availability;
         bindGroupEntries[1].size = availability->GetSize();
         bindGroupEntries[2].binding = 2;
-        bindGroupEntries[2].buffer = output;
-        bindGroupEntries[2].size = output->GetSize();
-        bindGroupEntries[3].binding = 3;
-        bindGroupEntries[3].buffer = params;
-        bindGroupEntries[3].size = params->GetSize();
+        bindGroupEntries[2].buffer = params;
+        bindGroupEntries[2].size = params->GetSize();
 
         // Create bind group after all binding entries are set.
         Ref<BindGroupBase> bindGroup = AcquireRef(device->CreateBindGroup(&bgDesc));
@@ -185,7 +172,7 @@ namespace dawn_native {
         Ref<ComputePassEncoder> pass = AcquireRef(encoder->BeginComputePass(&passDesc));
         pass->SetPipeline(pipeline);
         pass->SetBindGroup(0, bindGroup.Get());
-        pass->Dispatch(static_cast<uint32_t>(ceil((input->GetSize() / sizeof(uint64_t) + 7) / 8)));
+        pass->Dispatch(static_cast<uint32_t>((timestamps->GetSize() / sizeof(uint64_t) + 7) / 8));
         pass->EndPass();
     }
 
diff --git a/src/dawn_native/QueryHelper.h b/src/dawn_native/QueryHelper.h
index 733475be3f..4f05f09cb2 100644
--- a/src/dawn_native/QueryHelper.h
+++ b/src/dawn_native/QueryHelper.h
@@ -24,16 +24,14 @@ namespace dawn_native {
     class CommandEncoder;
 
     struct TimestampParams {
-        uint32_t inputByteOffset;
-        uint32_t outputByteOffset;
         uint32_t count;
+        uint32_t offset;
         float period;
     };
 
     void EncodeConvertTimestampsToNanoseconds(CommandEncoder* encoder,
-                                              BufferBase* input,
+                                              BufferBase* timestamps,
                                               BufferBase* availability,
-                                              BufferBase* output,
                                               BufferBase* params);
 
 }  // namespace dawn_native
diff --git a/src/tests/white_box/QueryInternalShaderTests.cpp b/src/tests/white_box/QueryInternalShaderTests.cpp
index 996e1c724c..29a417ea8b 100644
--- a/src/tests/white_box/QueryInternalShaderTests.cpp
+++ b/src/tests/white_box/QueryInternalShaderTests.cpp
@@ -22,15 +22,13 @@
 namespace {
 
     void EncodeConvertTimestampsToNanoseconds(wgpu::CommandEncoder encoder,
-                                              wgpu::Buffer input,
+                                              wgpu::Buffer timestamps,
                                               wgpu::Buffer availability,
-                                              wgpu::Buffer output,
                                               wgpu::Buffer params) {
         dawn_native::EncodeConvertTimestampsToNanoseconds(
             reinterpret_cast<dawn_native::CommandEncoder*>(encoder.Get()),
-            reinterpret_cast<dawn_native::BufferBase*>(input.Get()),
+            reinterpret_cast<dawn_native::BufferBase*>(timestamps.Get()),
             reinterpret_cast<dawn_native::BufferBase*>(availability.Get()),
-            reinterpret_cast<dawn_native::BufferBase*>(output.Get()),
             reinterpret_cast<dawn_native::BufferBase*>(params.Get()));
     }
 
@@ -78,14 +76,14 @@ class QueryInternalShaderTests : public DawnTest {};
 // Test the accuracy of timestamp compute shader which uses unsigned 32-bit integers to simulate
 // unsigned 64-bit integers (timestamps) multiplied by float (period).
 // The arguments pass to timestamp internal pipeline:
-// - The input buffer passes the original timestamps resolved from query set (created by manual
-//   here).
-// - The availability buffer passes the data of which slot in input buffer is an initialized
+// - The timestamps buffer contains the original timestamps resolved from query set (created
+//   manually here), and will be used to store the results processed by the compute shader.
+//   Expect 0 for unavailable timestamps and nanoseconds for available timestamps in an expected
+//   error tolerance ratio.
+// - The availability buffer passes the data of which slot in timestamps buffer is an initialized
 //   timestamp.
-// - The output buffer stores the converted results, expect 0 for unavailable timestamps and
-//   nanoseconds for available timestamps in an expected error rate.
-// - The params buffer passes the offset of input and output buffers, the count of timestamps and
-//   the timestamp period (here use GPU frequency (HZ) on Intel D3D12 to calculate the period in
+// - The params buffer passes the timestamp count, the offset in timestamps buffer and the
+//   timestamp period (here use GPU frequency (HZ) on Intel D3D12 to calculate the period in
 //   ns for testing).
 TEST_P(QueryInternalShaderTests, TimestampComputeShader) {
     DAWN_SKIP_TEST_IF(UsesWire());
@@ -100,103 +98,105 @@ TEST_P(QueryInternalShaderTests, TimestampComputeShader) {
     constexpr uint64_t kNsPerSecond = 1000000000u;
     // Timestamp period in nanoseconds
     constexpr float kPeriod = static_cast<float>(kNsPerSecond) / kGPUFrequency;
-    constexpr uint64_t kOne = 1u;
 
     // Original timestamp values for testing
-    std::array<uint64_t, kTimestampCount> timestamps;
-    timestamps[0] = 0;            // not written at beginning
-    timestamps[1] = 10079569507;  // t0
-    timestamps[2] = 10394415012;  // t1
-    timestamps[3] = 0;            // not written between timestamps
-    timestamps[4] = 11713454943;  // t2
-    timestamps[5] = 38912556941;  // t3 (big value)
-    timestamps[6] = 10080295766;  // t4 (reset)
-    timestamps[7] = 12159966783;  // t5 (after reset)
-    timestamps[8] = 12651224612;  // t6
-    timestamps[9] = 39872473956;  // t7
+    std::vector<uint64_t> timestamps = {
+        1,            // garbage data which is not written at beginning
+        10079569507,  // t0
+        10394415012,  // t1
+        1,            // garbage data which is not written between timestamps
+        11713454943,  // t2
+        38912556941,  // t3 (big value)
+        10080295766,  // t4 (reset)
+        12159966783,  // t5 (after reset)
+        12651224612,  // t6
+        39872473956,  // t7
+    };
 
-    // Expected results: Timestamp value * kNsPerSecond / kGPUFrequency
-    std::array<uint64_t, kTimestampCount> expected;
-    // The availablility state of each timestamp
-    std::array<uint32_t, kTimestampCount> availabilities;
+    // The buffer indicating which values are available timestamps
+    std::vector<uint32_t> availabilities = {0, 1, 1, 0, 1, 1, 1, 1, 1, 1};
+    wgpu::Buffer availabilityBuffer =
+        utils::CreateBufferFromData(device, availabilities.data(),
+                                    kTimestampCount * sizeof(uint32_t), wgpu::BufferUsage::Storage);
 
-    for (size_t i = 0; i < kTimestampCount; i++) {
-        if (timestamps[i] == 0) {
-            // Not a timestamp value, keep original value
-            expected[i] = 0u;
-            availabilities[i] = 0u;
-        } else {
-            // Maybe the timestamp * 10^9 is larger than the maximum of uint64, so cast the delta
-            // value to double (higher precision than float)
-            expected[i] = static_cast<uint64_t>(static_cast<double>(timestamps[i]) * kNsPerSecond /
-                                                kGPUFrequency);
-            availabilities[i] = 1u;
-        }
-    }
-
-    // The input storage buffer
-    wgpu::Buffer inputBuffer =
-        utils::CreateBufferFromData(device, timestamps.data(), sizeof(timestamps),
-                                    wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc);
-    EXPECT_BUFFER_U64_RANGE_EQ(timestamps.data(), inputBuffer, 0, kTimestampCount);
-
-    // To indicate which value is available
-    wgpu::Buffer availabilityBuffer = utils::CreateBufferFromData(
-        device, availabilities.data(), sizeof(availabilities), wgpu::BufferUsage::Storage);
-
-    // The output storage buffer
-    wgpu::BufferDescriptor outputDesc;
-    outputDesc.size = kTimestampCount * sizeof(uint64_t);
-    outputDesc.usage =
+    // The resolve buffer storing original timestamps and the converted values
+    wgpu::BufferDescriptor timestampsDesc;
+    timestampsDesc.size = kTimestampCount * sizeof(uint64_t);
+    timestampsDesc.usage =
         wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst;
-    wgpu::Buffer outputBuffer = device.CreateBuffer(&outputDesc);
+    wgpu::Buffer timestampsBuffer = device.CreateBuffer(&timestampsDesc);
 
-    std::array<uint64_t, kTimestampCount> ones;
-    ones.fill(kOne);
+    auto PrepareExpectedResults = [&](uint32_t offset) -> std::vector<uint64_t> {
+        ASSERT(offset % sizeof(uint64_t) == 0);
+        std::vector<uint64_t> expected;
+        for (size_t i = 0; i < kTimestampCount; i++) {
+            // The data before offset remains as it is
+            if (i < offset / sizeof(uint64_t)) {
+                expected.push_back(timestamps[i]);
+                continue;
+            }
 
-    // Convert timestamps to output buffer with offset 0
+            if (availabilities[i] == 0) {
+                // Not a available timestamp, write 0
+                expected.push_back(0u);
+            } else {
+                // Maybe the timestamp * period is larger than the maximum of uint64, so cast the
+                // delta value to double (higher precision than float)
+                expected.push_back(
+                    static_cast<uint64_t>(static_cast<double>(timestamps[i]) * kPeriod));
+            }
+        }
+        return expected;
+    };
+
+    // Convert timestamps in timestamps buffer with offset 0
     {
-        queue.WriteBuffer(outputBuffer, 0, ones.data(), sizeof(ones));
-
         constexpr uint32_t kOffset = 0u;
+
+        // Write orignal timestamps to timestamps buffer
+        queue.WriteBuffer(timestampsBuffer, 0, timestamps.data(),
+                          kTimestampCount * sizeof(uint64_t));
+
         // The params uniform buffer
-        dawn_native::TimestampParams params = {kOffset, kOffset, kTimestampCount, kPeriod};
+        dawn_native::TimestampParams params = {kTimestampCount, kOffset, kPeriod};
         wgpu::Buffer paramsBuffer = utils::CreateBufferFromData(device, &params, sizeof(params),
                                                                 wgpu::BufferUsage::Uniform);
 
         wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
-
-        EncodeConvertTimestampsToNanoseconds(encoder, inputBuffer, availabilityBuffer, outputBuffer,
+        EncodeConvertTimestampsToNanoseconds(encoder, timestampsBuffer, availabilityBuffer,
                                              paramsBuffer);
-
         wgpu::CommandBuffer commands = encoder.Finish();
         queue.Submit(1, &commands);
 
-        EXPECT_BUFFER(outputBuffer, kOffset, kTimestampCount * sizeof(uint64_t),
+        // Expected results: Timestamp * period
+        std::vector<uint64_t> expected = PrepareExpectedResults(kOffset);
+        EXPECT_BUFFER(timestampsBuffer, 0, kTimestampCount * sizeof(uint64_t),
                       new InternalShaderExpectation(expected.data(), kTimestampCount));
     }
 
-    // Convert timestamps to output buffer with offset 8 from input buffer with offset 8
+    // Convert timestamps in timestamps buffer with offset 8
     {
-        queue.WriteBuffer(outputBuffer, 0, ones.data(), sizeof(ones));
-
         constexpr uint32_t kOffset = 8u;
+
+        // Write orignal timestamps to timestamps buffer
+        queue.WriteBuffer(timestampsBuffer, 0, timestamps.data(),
+                          kTimestampCount * sizeof(uint64_t));
+
         // The params uniform buffer
-        dawn_native::TimestampParams params = {kOffset, kOffset, kTimestampCount, kPeriod};
+        dawn_native::TimestampParams params = {kTimestampCount, kOffset, kPeriod};
         wgpu::Buffer paramsBuffer = utils::CreateBufferFromData(device, &params, sizeof(params),
                                                                 wgpu::BufferUsage::Uniform);
 
         wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
-
-        EncodeConvertTimestampsToNanoseconds(encoder, inputBuffer, availabilityBuffer, outputBuffer,
+        EncodeConvertTimestampsToNanoseconds(encoder, timestampsBuffer, availabilityBuffer,
                                              paramsBuffer);
-
         wgpu::CommandBuffer commands = encoder.Finish();
         queue.Submit(1, &commands);
 
-        EXPECT_BUFFER_U64_RANGE_EQ(&kOne, outputBuffer, 0, 1);
-        EXPECT_BUFFER(outputBuffer, kOffset, (kTimestampCount - 1) * sizeof(uint64_t),
-                      new InternalShaderExpectation(expected.data() + 1, kTimestampCount - 1));
+        // Expected results: Timestamp * period
+        std::vector<uint64_t> expected = PrepareExpectedResults(kOffset);
+        EXPECT_BUFFER(timestampsBuffer, 0, kTimestampCount * sizeof(uint64_t),
+                      new InternalShaderExpectation(expected.data(), kTimestampCount));
     }
 }