From cfc9c6e322f48f9abf66985e5480ae4ea3296fe6 Mon Sep 17 00:00:00 2001 From: Austin Eng Date: Mon, 11 Nov 2019 23:15:56 +0000 Subject: [PATCH] Make perf test tracing thread-safe The Metal driver calls command buffer completion callbacks on a separate thread so enqueueing these trace events needs to be made thread-safe. In the future, Dawn will probably have other threads that also require thread-safe tracing. In this CL, each thread records trace events into its own buffer, and all buffers concatenated when trace events are acquired. Bug: dawn:254, dawn:208 Change-Id: I0f1abd404568d838091066a8f27a3bf98fa764b9 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/13080 Reviewed-by: Corentin Wallez Commit-Queue: Austin Eng --- src/tests/perf_tests/DawnPerfTest.cpp | 7 +++ src/tests/perf_tests/DawnPerfTestPlatform.cpp | 52 +++++++++++++++++-- src/tests/perf_tests/DawnPerfTestPlatform.h | 13 ++++- 3 files changed, 66 insertions(+), 6 deletions(-) diff --git a/src/tests/perf_tests/DawnPerfTest.cpp b/src/tests/perf_tests/DawnPerfTest.cpp index c9d683a9cb..20068ee947 100644 --- a/src/tests/perf_tests/DawnPerfTest.cpp +++ b/src/tests/perf_tests/DawnPerfTest.cpp @@ -65,6 +65,7 @@ namespace { } value["ph"] = &phase[0]; value["id"] = traceEvent.id; + value["tid"] = traceEvent.threadId; value["ts"] = microseconds; value["pid"] = "Dawn"; @@ -265,6 +266,9 @@ void DawnPerfTestBase::DoRunLoop(double maxRunTime) { } // Wait for all GPU commands to complete. + // TODO(enga): When Dawn has multiple backgrounds threads, add a Device::WaitForIdleForTesting() + // which waits for all threads to stop doing work. When we output results, there should + // be no additional incoming trace events. while (signaledFenceValue != fence.GetCompletedValue()) { mTest->WaitABit(); } @@ -273,6 +277,9 @@ void DawnPerfTestBase::DoRunLoop(double maxRunTime) { } void DawnPerfTestBase::OutputResults() { + // TODO(enga): When Dawn has multiple backgrounds threads, add a Device::WaitForIdleForTesting() + // which waits for all threads to stop doing work. When we output results, there should + // be no additional incoming trace events. DawnPerfTestPlatform* platform = reinterpret_cast(gTestEnv->GetInstance()->GetPlatform()); diff --git a/src/tests/perf_tests/DawnPerfTestPlatform.cpp b/src/tests/perf_tests/DawnPerfTestPlatform.cpp index 850b9302b1..016211fad9 100644 --- a/src/tests/perf_tests/DawnPerfTestPlatform.cpp +++ b/src/tests/perf_tests/DawnPerfTestPlatform.cpp @@ -15,6 +15,7 @@ #include "tests/perf_tests/DawnPerfTestPlatform.h" #include "common/Assert.h" +#include "common/HashUtils.h" #include "dawn_platform/tracing/TraceEvent.h" #include "tests/perf_tests/DawnPerfTest.h" #include "utils/Timer.h" @@ -67,6 +68,22 @@ double DawnPerfTestPlatform::MonotonicallyIncreasingTime() { return mTimer->GetAbsoluteTime() - origin; } +std::vector* DawnPerfTestPlatform::GetLocalTraceEventBuffer() { + // Cache the pointer to the vector in thread_local storage + thread_local std::vector* traceEventBuffer = nullptr; + + if (traceEventBuffer == nullptr) { + auto buffer = std::make_unique>(); + traceEventBuffer = buffer.get(); + + // Add a new buffer to the map + std::lock_guard guard(mTraceEventBufferMapMutex); + mTraceEventBuffers[std::this_thread::get_id()] = std::move(buffer); + } + + return traceEventBuffer; +} + // TODO(enga): Simplify this API. uint64_t DawnPerfTestPlatform::AddTraceEvent(char phase, const unsigned char* categoryGroupEnabled, @@ -90,8 +107,13 @@ uint64_t DawnPerfTestPlatform::AddTraceEvent(char phase, const TraceCategoryInfo* info = reinterpret_cast(categoryGroupEnabled); - mTraceEventBuffer.emplace_back(phase, info->category, name, id, timestamp); - return static_cast(mTraceEventBuffer.size()); + std::vector* buffer = GetLocalTraceEventBuffer(); + buffer->emplace_back(phase, info->category, name, id, timestamp); + + size_t hash = 0; + HashCombine(&hash, buffer->size()); + HashCombine(&hash, std::this_thread::get_id()); + return static_cast(hash); } void DawnPerfTestPlatform::EnableTraceEventRecording(bool enable) { @@ -99,7 +121,27 @@ void DawnPerfTestPlatform::EnableTraceEventRecording(bool enable) { } std::vector DawnPerfTestPlatform::AcquireTraceEventBuffer() { - std::vector buffer = mTraceEventBuffer; - mTraceEventBuffer.clear(); - return buffer; + std::vector traceEventBuffer; + { + // AcquireTraceEventBuffer should only be called when Dawn is completely idle. There should + // be no threads inserting trace events. + // Right now, this is safe because AcquireTraceEventBuffer is called after waiting on a + // fence for all GPU commands to finish executing. When Dawn has multiple background threads + // for other work (creation, validation, submission, residency, etc), we will need to ensure + // all work on those threads is stopped as well. + std::lock_guard guard(mTraceEventBufferMapMutex); + for (auto it = mTraceEventBuffers.begin(); it != mTraceEventBuffers.end(); ++it) { + std::ostringstream stream; + stream << it->first; + std::string threadId = stream.str(); + + std::transform(it->second->begin(), it->second->end(), + std::back_inserter(traceEventBuffer), [&threadId](TraceEvent ev) { + ev.threadId = threadId; + return ev; + }); + it->second->clear(); + } + } + return traceEventBuffer; } diff --git a/src/tests/perf_tests/DawnPerfTestPlatform.h b/src/tests/perf_tests/DawnPerfTestPlatform.h index b835e3480e..3fef9e92b6 100644 --- a/src/tests/perf_tests/DawnPerfTestPlatform.h +++ b/src/tests/perf_tests/DawnPerfTestPlatform.h @@ -18,6 +18,9 @@ #include #include +#include +#include +#include #include namespace utils { @@ -44,6 +47,7 @@ class DawnPerfTestPlatform : public dawn_platform::Platform { dawn_platform::TraceCategory category; const char* name = nullptr; uint64_t id = 0; + std::string threadId; double timestamp = 0; }; @@ -59,6 +63,8 @@ class DawnPerfTestPlatform : public dawn_platform::Platform { double MonotonicallyIncreasingTime() override; + std::vector* GetLocalTraceEventBuffer(); + uint64_t AddTraceEvent(char phase, const unsigned char* categoryGroupEnabled, const char* name, @@ -74,7 +80,12 @@ class DawnPerfTestPlatform : public dawn_platform::Platform { std::unique_ptr mTimer; // Trace event record. - std::vector mTraceEventBuffer; + // Each uses their own trace event buffer, but the PerfTestPlatform owns all of them in + // this map. The map stores all of them so we can iterate through them and flush when + // AcquireTraceEventBuffer is called. + std::unordered_map>> + mTraceEventBuffers; + std::mutex mTraceEventBufferMapMutex; }; #endif // TESTS_PERFTESTS_DAWNPERFTESTPLATFORM_H_