Make perf test tracing thread-safe

The Metal driver calls command buffer completion callbacks on a
separate thread so enqueueing these trace events needs to be made
thread-safe. In the future, Dawn will probably have other threads
that also require thread-safe tracing.

In this CL, each thread records trace events into its own buffer,
and all buffers concatenated when trace events are acquired.

Bug: dawn:254, dawn:208
Change-Id: I0f1abd404568d838091066a8f27a3bf98fa764b9
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/13080
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
Commit-Queue: Austin Eng <enga@chromium.org>
This commit is contained in:
Austin Eng 2019-11-11 23:15:56 +00:00 committed by Commit Bot service account
parent c51616d790
commit cfc9c6e322
3 changed files with 66 additions and 6 deletions

View File

@ -65,6 +65,7 @@ namespace {
} }
value["ph"] = &phase[0]; value["ph"] = &phase[0];
value["id"] = traceEvent.id; value["id"] = traceEvent.id;
value["tid"] = traceEvent.threadId;
value["ts"] = microseconds; value["ts"] = microseconds;
value["pid"] = "Dawn"; value["pid"] = "Dawn";
@ -265,6 +266,9 @@ void DawnPerfTestBase::DoRunLoop(double maxRunTime) {
} }
// Wait for all GPU commands to complete. // Wait for all GPU commands to complete.
// TODO(enga): When Dawn has multiple backgrounds threads, add a Device::WaitForIdleForTesting()
// which waits for all threads to stop doing work. When we output results, there should
// be no additional incoming trace events.
while (signaledFenceValue != fence.GetCompletedValue()) { while (signaledFenceValue != fence.GetCompletedValue()) {
mTest->WaitABit(); mTest->WaitABit();
} }
@ -273,6 +277,9 @@ void DawnPerfTestBase::DoRunLoop(double maxRunTime) {
} }
void DawnPerfTestBase::OutputResults() { void DawnPerfTestBase::OutputResults() {
// TODO(enga): When Dawn has multiple backgrounds threads, add a Device::WaitForIdleForTesting()
// which waits for all threads to stop doing work. When we output results, there should
// be no additional incoming trace events.
DawnPerfTestPlatform* platform = DawnPerfTestPlatform* platform =
reinterpret_cast<DawnPerfTestPlatform*>(gTestEnv->GetInstance()->GetPlatform()); reinterpret_cast<DawnPerfTestPlatform*>(gTestEnv->GetInstance()->GetPlatform());

View File

@ -15,6 +15,7 @@
#include "tests/perf_tests/DawnPerfTestPlatform.h" #include "tests/perf_tests/DawnPerfTestPlatform.h"
#include "common/Assert.h" #include "common/Assert.h"
#include "common/HashUtils.h"
#include "dawn_platform/tracing/TraceEvent.h" #include "dawn_platform/tracing/TraceEvent.h"
#include "tests/perf_tests/DawnPerfTest.h" #include "tests/perf_tests/DawnPerfTest.h"
#include "utils/Timer.h" #include "utils/Timer.h"
@ -67,6 +68,22 @@ double DawnPerfTestPlatform::MonotonicallyIncreasingTime() {
return mTimer->GetAbsoluteTime() - origin; return mTimer->GetAbsoluteTime() - origin;
} }
std::vector<DawnPerfTestPlatform::TraceEvent>* DawnPerfTestPlatform::GetLocalTraceEventBuffer() {
// Cache the pointer to the vector in thread_local storage
thread_local std::vector<TraceEvent>* traceEventBuffer = nullptr;
if (traceEventBuffer == nullptr) {
auto buffer = std::make_unique<std::vector<TraceEvent>>();
traceEventBuffer = buffer.get();
// Add a new buffer to the map
std::lock_guard<std::mutex> guard(mTraceEventBufferMapMutex);
mTraceEventBuffers[std::this_thread::get_id()] = std::move(buffer);
}
return traceEventBuffer;
}
// TODO(enga): Simplify this API. // TODO(enga): Simplify this API.
uint64_t DawnPerfTestPlatform::AddTraceEvent(char phase, uint64_t DawnPerfTestPlatform::AddTraceEvent(char phase,
const unsigned char* categoryGroupEnabled, const unsigned char* categoryGroupEnabled,
@ -90,8 +107,13 @@ uint64_t DawnPerfTestPlatform::AddTraceEvent(char phase,
const TraceCategoryInfo* info = const TraceCategoryInfo* info =
reinterpret_cast<const TraceCategoryInfo*>(categoryGroupEnabled); reinterpret_cast<const TraceCategoryInfo*>(categoryGroupEnabled);
mTraceEventBuffer.emplace_back(phase, info->category, name, id, timestamp); std::vector<TraceEvent>* buffer = GetLocalTraceEventBuffer();
return static_cast<uint64_t>(mTraceEventBuffer.size()); buffer->emplace_back(phase, info->category, name, id, timestamp);
size_t hash = 0;
HashCombine(&hash, buffer->size());
HashCombine(&hash, std::this_thread::get_id());
return static_cast<uint64_t>(hash);
} }
void DawnPerfTestPlatform::EnableTraceEventRecording(bool enable) { void DawnPerfTestPlatform::EnableTraceEventRecording(bool enable) {
@ -99,7 +121,27 @@ void DawnPerfTestPlatform::EnableTraceEventRecording(bool enable) {
} }
std::vector<DawnPerfTestPlatform::TraceEvent> DawnPerfTestPlatform::AcquireTraceEventBuffer() { std::vector<DawnPerfTestPlatform::TraceEvent> DawnPerfTestPlatform::AcquireTraceEventBuffer() {
std::vector<DawnPerfTestPlatform::TraceEvent> buffer = mTraceEventBuffer; std::vector<TraceEvent> traceEventBuffer;
mTraceEventBuffer.clear(); {
return buffer; // AcquireTraceEventBuffer should only be called when Dawn is completely idle. There should
// be no threads inserting trace events.
// Right now, this is safe because AcquireTraceEventBuffer is called after waiting on a
// fence for all GPU commands to finish executing. When Dawn has multiple background threads
// for other work (creation, validation, submission, residency, etc), we will need to ensure
// all work on those threads is stopped as well.
std::lock_guard<std::mutex> guard(mTraceEventBufferMapMutex);
for (auto it = mTraceEventBuffers.begin(); it != mTraceEventBuffers.end(); ++it) {
std::ostringstream stream;
stream << it->first;
std::string threadId = stream.str();
std::transform(it->second->begin(), it->second->end(),
std::back_inserter(traceEventBuffer), [&threadId](TraceEvent ev) {
ev.threadId = threadId;
return ev;
});
it->second->clear();
}
}
return traceEventBuffer;
} }

View File

@ -18,6 +18,9 @@
#include <dawn_platform/DawnPlatform.h> #include <dawn_platform/DawnPlatform.h>
#include <memory> #include <memory>
#include <mutex>
#include <thread>
#include <unordered_map>
#include <vector> #include <vector>
namespace utils { namespace utils {
@ -44,6 +47,7 @@ class DawnPerfTestPlatform : public dawn_platform::Platform {
dawn_platform::TraceCategory category; dawn_platform::TraceCategory category;
const char* name = nullptr; const char* name = nullptr;
uint64_t id = 0; uint64_t id = 0;
std::string threadId;
double timestamp = 0; double timestamp = 0;
}; };
@ -59,6 +63,8 @@ class DawnPerfTestPlatform : public dawn_platform::Platform {
double MonotonicallyIncreasingTime() override; double MonotonicallyIncreasingTime() override;
std::vector<TraceEvent>* GetLocalTraceEventBuffer();
uint64_t AddTraceEvent(char phase, uint64_t AddTraceEvent(char phase,
const unsigned char* categoryGroupEnabled, const unsigned char* categoryGroupEnabled,
const char* name, const char* name,
@ -74,7 +80,12 @@ class DawnPerfTestPlatform : public dawn_platform::Platform {
std::unique_ptr<utils::Timer> mTimer; std::unique_ptr<utils::Timer> mTimer;
// Trace event record. // Trace event record.
std::vector<TraceEvent> mTraceEventBuffer; // Each uses their own trace event buffer, but the PerfTestPlatform owns all of them in
// this map. The map stores all of them so we can iterate through them and flush when
// AcquireTraceEventBuffer is called.
std::unordered_map<std::thread::id, std::unique_ptr<std::vector<TraceEvent>>>
mTraceEventBuffers;
std::mutex mTraceEventBufferMapMutex;
}; };
#endif // TESTS_PERFTESTS_DAWNPERFTESTPLATFORM_H_ #endif // TESTS_PERFTESTS_DAWNPERFTESTPLATFORM_H_