RefCounted: use more precise barriers
This improves the DrawCallPerfRun/Vulkan_NoReuseBindGroups benchmark by 2% on an Intel processor but should be a bigger improvement on ARM. The change was inspired by the Boost documentation at https://www.boost.org/doc/libs/1_55_0/doc/html/atomic/usage_examples.html Chromium's base::AtomicRefCount implementation and Rust's core::Arc implementation. BUG=dawn:304 Change-Id: I7ca71f34af20fd267cf2efc63871ff330b1dcc7c Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/14482 Commit-Queue: Corentin Wallez <cwallez@chromium.org> Reviewed-by: Austin Eng <enga@chromium.org> Reviewed-by: David Turner <digit@google.com>
This commit is contained in:
parent
73c5573f1f
commit
ab4485f86c
|
@ -43,14 +43,36 @@ namespace dawn_native {
|
||||||
|
|
||||||
void RefCounted::Reference() {
|
void RefCounted::Reference() {
|
||||||
ASSERT((mRefCount & ~kPayloadMask) != 0);
|
ASSERT((mRefCount & ~kPayloadMask) != 0);
|
||||||
mRefCount += kRefCountIncrement;
|
|
||||||
|
// The relaxed ordering guarantees only the atomicity of the update, which is enough here
|
||||||
|
// because the reference we are copying from still exists and makes sure other threads
|
||||||
|
// don't delete `this`.
|
||||||
|
// See the explanation in the Boost documentation:
|
||||||
|
// https://www.boost.org/doc/libs/1_55_0/doc/html/atomic/usage_examples.html
|
||||||
|
mRefCount.fetch_add(kRefCountIncrement, std::memory_order_relaxed);
|
||||||
}
|
}
|
||||||
|
|
||||||
void RefCounted::Release() {
|
void RefCounted::Release() {
|
||||||
ASSERT((mRefCount & ~kPayloadMask) != 0);
|
ASSERT((mRefCount & ~kPayloadMask) != 0);
|
||||||
|
|
||||||
mRefCount -= kRefCountIncrement;
|
// The release fence here is to make sure all accesses to the object on a thread A
|
||||||
if (mRefCount < kRefCountIncrement) {
|
// happen-before the object is deleted on a thread B. The release memory order ensures that
|
||||||
|
// all accesses on thread A happen-before the refcount is decreased and the atomic variable
|
||||||
|
// makes sure the refcount decrease in A happens-before the refcount decrease in B. Finally
|
||||||
|
// the acquire fence in the destruction case makes sure the refcount decrease in B
|
||||||
|
// happens-before the `delete this`.
|
||||||
|
//
|
||||||
|
// See the explanation in the Boost documentation:
|
||||||
|
// https://www.boost.org/doc/libs/1_55_0/doc/html/atomic/usage_examples.html
|
||||||
|
uint64_t previousRefCount =
|
||||||
|
mRefCount.fetch_sub(kRefCountIncrement, std::memory_order_release);
|
||||||
|
|
||||||
|
// Check that the previous reference count was strictly less than 2, ignoring payload bits.
|
||||||
|
if (previousRefCount < 2 * kRefCountIncrement) {
|
||||||
|
// Note that on ARM64 this will generate a `dmb ish` instruction which is a global
|
||||||
|
// memory barrier, when an acquire load on mRefCount (using the `ldar` instruction)
|
||||||
|
// should be enough and could end up being faster.
|
||||||
|
std::atomic_thread_fence(std::memory_order_acquire);
|
||||||
delete this;
|
delete this;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue