Revert "Disable frontend cache when implicit device sync is on."
This reverts commit 8cc6205bf7
.
Reason for revert: Graphite actually reuses the bind groups between
draw calls using different pipelines and this change prevents it
from happening.
Original change's description:
> Disable frontend cache when implicit device sync is on.
>
> Normal behavior of ApiObjectBase's APIRelease() which only locks the
> device when last ref dropped is not thread safe if the object is cached
> as raw pointers by the device. Example of cached objects: bind group
> layout, pipeline, sampler, shader module.
>
> The following scenario could happen:
> - thread A:
> - shaderModuleA.APIRealease()
> - shaderModuleA.refCount.Decrement() == true (ref count has reached zero)
> - going to call shaderModuleA.LockAndDeleteThis().
> - thread B:
> - device.CreateShaderModule().
> - lock()
> - device.GetOrCreateShaderModule()
> - shaderModuleA is in the cache, so return it.
> - unlock()
> - thread A:
> - starting to call shaderModuleA.LockAndDeleteThis()
> - lock()
> - erase shaderModuleA from the cache.
> - delete shaderModuleA.
> - unlock()
>
> This CL disables caching when ImplicitDeviceSynchronization is turned on
> until we find a better solution.
>
> Bug: dawn:1769
> Change-Id: Ideb2a717ece0a40e18bd1c2bef00817262bd25da
> Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/127900
> Commit-Queue: Quyen Le <lehoangquyen@chromium.org>
> Reviewed-by: Austin Eng <enga@chromium.org>
> Kokoro: Kokoro <noreply+kokoro@google.com>
TBR=cwallez@chromium.org,enga@chromium.org,noreply+kokoro@google.com,dawn-scoped@luci-project-accounts.iam.gserviceaccount.com,lehoangquyen@chromium.org
Change-Id: Ib13bba8005402d06963865fae919388a91e718f0
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Bug: dawn:1769
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/128440
Reviewed-by: Austin Eng <enga@chromium.org>
Kokoro: Kokoro <noreply+kokoro@google.com>
Commit-Queue: Quyen Le <lehoangquyen@chromium.org>
This commit is contained in:
parent
cf54bb8c2a
commit
87f51237b4
|
@ -1,31 +0,0 @@
|
|||
# Implicit Device Synchronization
|
||||
|
||||
`implicit-device-sync` is an experimental feature that offers multithreading support for `dawn_native`.
|
||||
|
||||
Additional functionality:
|
||||
- `wgpu::Device` and most of its child objects are safe to be used on multiple threads, including:
|
||||
- `wgpu::Queue`.
|
||||
- `wgpu::BindGroup`.
|
||||
- `wgpu::BindGroupLayout`.
|
||||
- `wgpu::Buffer`. See Notes.
|
||||
- `wgpu::Texture`.
|
||||
- `wgpu::TextureView`.
|
||||
- `wgpu::ComputePipeline`.
|
||||
- `wgpu::RenderPipeline`.
|
||||
- `wgpu::PipelineLayout`.
|
||||
- `wgpu::Sampler`.
|
||||
- `wgpu::ShaderModule`.
|
||||
- `wgpu::SwapChain`.
|
||||
- These objects are *not* safe to be used concurrently:
|
||||
- `wgpu:CommandEncoder`.
|
||||
- `wgpu:ComputePassEncoder`.
|
||||
- `wgpu:RenderPassEncoder`.
|
||||
- `wgpu:RenderBundleEncoder`.
|
||||
- Except that the creation, referencing, releasing and destruction of these objects are guaranteed to be thread safe.
|
||||
|
||||
|
||||
Notes:
|
||||
- This feature is experimental, meaning currently it has some limitations:
|
||||
- For `wgpu::Buffer` to be safe on multiple threads. Proper manual synchronization must be done by users to ensure that the buffer is not currently being mapped and being used by any render/compute pass encoder at the same time on different threads.
|
||||
- Enabling this feature will disable the compatibility between `wgpu::BindGroupLayout`. That means the two different `wgpu::BindGroupLayout` objects created with equivalent `wgpu::BindGroupLayoutDescriptor` won't be considered "group-equivalent" anymore. You can only use a `wgpu::BindGroup` with a `wgpu::RenderPipeline`/`wgpu::ComputePipeline` if they are both created with the same `wgpu::BindGroupLayout`.
|
||||
- Consider using `wgpu::RenderPipeline::GetBindGroupLayout()`/`wgpu::ComputePipeline::GetBindGroupLayout()` when creating a `wgpu::BindGroup`.
|
|
@ -818,14 +818,9 @@ ResultOrError<Ref<BindGroupLayoutBase>> DeviceBase::GetOrCreateBindGroupLayout(
|
|||
result = *iter;
|
||||
} else {
|
||||
DAWN_TRY_ASSIGN(result, CreateBindGroupLayoutImpl(descriptor, pipelineCompatibilityToken));
|
||||
result->SetIsCachedReference();
|
||||
result->SetContentHash(blueprintHash);
|
||||
// TODO(crbug.com/dawn/1769): We disable caching if multithreading is enabled. The cache
|
||||
// would have been racing with the object's ref counting since it only stores raw pointers
|
||||
// to the objects.
|
||||
if (!HasFeature(Feature::ImplicitDeviceSynchronization)) {
|
||||
result->SetIsCachedReference();
|
||||
mCaches->bindGroupLayouts.insert(result.Get());
|
||||
}
|
||||
mCaches->bindGroupLayouts.insert(result.Get());
|
||||
}
|
||||
|
||||
return std::move(result);
|
||||
|
@ -875,13 +870,6 @@ Ref<RenderPipelineBase> DeviceBase::GetCachedRenderPipeline(
|
|||
Ref<ComputePipelineBase> DeviceBase::AddOrGetCachedComputePipeline(
|
||||
Ref<ComputePipelineBase> computePipeline) {
|
||||
ASSERT(IsLockedByCurrentThreadIfNeeded());
|
||||
// TODO(crbug.com/dawn/1769): We disable caching if multithreading is enabled. The cache would
|
||||
// have been racing with the object's ref counting since it only stores raw pointers to the
|
||||
// objects.
|
||||
if (HasFeature(Feature::ImplicitDeviceSynchronization)) {
|
||||
return computePipeline;
|
||||
}
|
||||
|
||||
auto [cachedPipeline, inserted] = mCaches->computePipelines.insert(computePipeline.Get());
|
||||
if (inserted) {
|
||||
computePipeline->SetIsCachedReference();
|
||||
|
@ -894,13 +882,6 @@ Ref<ComputePipelineBase> DeviceBase::AddOrGetCachedComputePipeline(
|
|||
Ref<RenderPipelineBase> DeviceBase::AddOrGetCachedRenderPipeline(
|
||||
Ref<RenderPipelineBase> renderPipeline) {
|
||||
ASSERT(IsLockedByCurrentThreadIfNeeded());
|
||||
// TODO(crbug.com/dawn/1769): We disable caching if multithreading is enabled. The cache would
|
||||
// have been racing with the object's ref counting since it only stores raw pointers to the
|
||||
// objects.
|
||||
if (HasFeature(Feature::ImplicitDeviceSynchronization)) {
|
||||
return renderPipeline;
|
||||
}
|
||||
|
||||
auto [cachedPipeline, inserted] = mCaches->renderPipelines.insert(renderPipeline.Get());
|
||||
if (inserted) {
|
||||
renderPipeline->SetIsCachedReference();
|
||||
|
@ -958,14 +939,9 @@ ResultOrError<Ref<PipelineLayoutBase>> DeviceBase::GetOrCreatePipelineLayout(
|
|||
result = *iter;
|
||||
} else {
|
||||
DAWN_TRY_ASSIGN(result, CreatePipelineLayoutImpl(descriptor));
|
||||
result->SetIsCachedReference();
|
||||
result->SetContentHash(blueprintHash);
|
||||
// TODO(crbug.com/dawn/1769): We disable caching if multithreading is enabled. The cache
|
||||
// would have been racing with the object's ref counting since it only stores raw pointers
|
||||
// to the objects.
|
||||
if (!HasFeature(Feature::ImplicitDeviceSynchronization)) {
|
||||
result->SetIsCachedReference();
|
||||
mCaches->pipelineLayouts.insert(result.Get());
|
||||
}
|
||||
mCaches->pipelineLayouts.insert(result.Get());
|
||||
}
|
||||
|
||||
return std::move(result);
|
||||
|
@ -996,14 +972,9 @@ ResultOrError<Ref<SamplerBase>> DeviceBase::GetOrCreateSampler(
|
|||
result = *iter;
|
||||
} else {
|
||||
DAWN_TRY_ASSIGN(result, CreateSamplerImpl(descriptor));
|
||||
result->SetIsCachedReference();
|
||||
result->SetContentHash(blueprintHash);
|
||||
// TODO(crbug.com/dawn/1769): We disable caching if multithreading is enabled. The cache
|
||||
// would have been racing with the object's ref counting since it only stores raw pointers
|
||||
// to the objects.
|
||||
if (!HasFeature(Feature::ImplicitDeviceSynchronization)) {
|
||||
result->SetIsCachedReference();
|
||||
mCaches->samplers.insert(result.Get());
|
||||
}
|
||||
mCaches->samplers.insert(result.Get());
|
||||
}
|
||||
|
||||
return std::move(result);
|
||||
|
@ -1041,14 +1012,9 @@ ResultOrError<Ref<ShaderModuleBase>> DeviceBase::GetOrCreateShaderModule(
|
|||
}
|
||||
DAWN_TRY_ASSIGN(result,
|
||||
CreateShaderModuleImpl(descriptor, parseResult, compilationMessages));
|
||||
result->SetIsCachedReference();
|
||||
result->SetContentHash(blueprintHash);
|
||||
// TODO(crbug.com/dawn/1769): We disable caching if multithreading is enabled. The cache
|
||||
// would have been racing with the object's ref counting since it only stores raw pointers
|
||||
// to the objects.
|
||||
if (!HasFeature(Feature::ImplicitDeviceSynchronization)) {
|
||||
result->SetIsCachedReference();
|
||||
mCaches->shaderModules.insert(result.Get());
|
||||
}
|
||||
mCaches->shaderModules.insert(result.Get());
|
||||
}
|
||||
|
||||
return std::move(result);
|
||||
|
|
|
@ -91,108 +91,6 @@ class MultithreadTests : public DawnTest {
|
|||
dawn::Mutex mutex;
|
||||
};
|
||||
|
||||
class MultithreadCachingTests : public MultithreadTests {
|
||||
protected:
|
||||
wgpu::ShaderModule CreateComputeShaderModule() const {
|
||||
return utils::CreateShaderModule(device, R"(
|
||||
struct SSBO {
|
||||
value : u32
|
||||
}
|
||||
@group(0) @binding(0) var<storage, read_write> ssbo : SSBO;
|
||||
|
||||
@compute @workgroup_size(1) fn main() {
|
||||
ssbo.value = 1;
|
||||
})");
|
||||
}
|
||||
|
||||
wgpu::BindGroupLayout CreateComputeBindGroupLayout() const {
|
||||
return utils::MakeBindGroupLayout(
|
||||
device, {
|
||||
{0, wgpu::ShaderStage::Compute, wgpu::BufferBindingType::Storage},
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
// Test that creating a same shader module (which will return the cached shader module) and release
|
||||
// it on multiple threads won't race.
|
||||
TEST_P(MultithreadCachingTests, RefAndReleaseCachedShaderModulesInParallel) {
|
||||
RunInParallel(100, [this](uint32_t) {
|
||||
wgpu::ShaderModule csModule = CreateComputeShaderModule();
|
||||
EXPECT_NE(nullptr, csModule.Get());
|
||||
});
|
||||
}
|
||||
|
||||
// Test that creating a same compute pipeline (which will return the cached pipeline) and release it
|
||||
// on multiple threads won't race.
|
||||
TEST_P(MultithreadCachingTests, RefAndReleaseCachedComputePipelinesInParallel) {
|
||||
wgpu::ShaderModule csModule = CreateComputeShaderModule();
|
||||
wgpu::BindGroupLayout bglayout = CreateComputeBindGroupLayout();
|
||||
wgpu::PipelineLayout pipelineLayout = utils::MakePipelineLayout(device, {bglayout});
|
||||
|
||||
wgpu::ComputePipelineDescriptor csDesc;
|
||||
csDesc.compute.module = csModule;
|
||||
csDesc.compute.entryPoint = "main";
|
||||
csDesc.layout = pipelineLayout;
|
||||
|
||||
RunInParallel(100, [&, this](uint32_t) {
|
||||
wgpu::ComputePipeline pipeline = device.CreateComputePipeline(&csDesc);
|
||||
EXPECT_NE(nullptr, pipeline.Get());
|
||||
});
|
||||
}
|
||||
|
||||
// Test that creating a same bind group layout (which will return the cached layout) and
|
||||
// release it on multiple threads won't race.
|
||||
TEST_P(MultithreadCachingTests, RefAndReleaseCachedBindGroupLayoutsInParallel) {
|
||||
RunInParallel(100, [&, this](uint32_t) {
|
||||
wgpu::BindGroupLayout layout = CreateComputeBindGroupLayout();
|
||||
EXPECT_NE(nullptr, layout.Get());
|
||||
});
|
||||
}
|
||||
|
||||
// Test that creating a same pipeline layout (which will return the cached layout) and
|
||||
// release it on multiple threads won't race.
|
||||
TEST_P(MultithreadCachingTests, RefAndReleaseCachedPipelineLayoutsInParallel) {
|
||||
wgpu::BindGroupLayout bglayout = CreateComputeBindGroupLayout();
|
||||
|
||||
RunInParallel(100, [&, this](uint32_t) {
|
||||
wgpu::PipelineLayout pipelineLayout = utils::MakePipelineLayout(device, {bglayout});
|
||||
EXPECT_NE(nullptr, pipelineLayout.Get());
|
||||
});
|
||||
}
|
||||
|
||||
// Test that creating a same render pipeline (which will return the cached pipeline) and release it
|
||||
// on multiple threads won't race.
|
||||
TEST_P(MultithreadCachingTests, RefAndReleaseCachedRenderPipelinesInParallel) {
|
||||
utils::ComboRenderPipelineDescriptor renderPipelineDescriptor;
|
||||
wgpu::ShaderModule vsModule = utils::CreateShaderModule(device, R"(
|
||||
@vertex fn main() -> @builtin(position) vec4f {
|
||||
return vec4f(0.0, 0.0, 0.0, 1.0);
|
||||
})");
|
||||
wgpu::ShaderModule fsModule = utils::CreateShaderModule(device, R"(
|
||||
@fragment fn main() -> @location(0) vec4f {
|
||||
return vec4f(0.0, 1.0, 0.0, 1.0);
|
||||
})");
|
||||
renderPipelineDescriptor.vertex.module = vsModule;
|
||||
renderPipelineDescriptor.cFragment.module = fsModule;
|
||||
renderPipelineDescriptor.cTargets[0].format = wgpu::TextureFormat::RGBA8Unorm;
|
||||
renderPipelineDescriptor.primitive.topology = wgpu::PrimitiveTopology::PointList;
|
||||
|
||||
RunInParallel(100, [&, this](uint32_t) {
|
||||
wgpu::RenderPipeline pipeline = device.CreateRenderPipeline(&renderPipelineDescriptor);
|
||||
EXPECT_NE(nullptr, pipeline.Get());
|
||||
});
|
||||
}
|
||||
|
||||
// Test that creating a same sampler pipeline (which will return the cached sampler) and release it
|
||||
// on multiple threads won't race.
|
||||
TEST_P(MultithreadCachingTests, RefAndReleaseCachedSamplersInParallel) {
|
||||
wgpu::SamplerDescriptor desc = {};
|
||||
RunInParallel(100, [&, this](uint32_t) {
|
||||
wgpu::Sampler sampler = device.CreateSampler(&desc);
|
||||
EXPECT_NE(nullptr, sampler.Get());
|
||||
});
|
||||
}
|
||||
|
||||
class MultithreadEncodingTests : public MultithreadTests {};
|
||||
|
||||
// Test that encoding render passes in parallel should work
|
||||
|
@ -490,13 +388,6 @@ TEST_P(MultithreadTimestampQueryTests, ResolveQuerySets_InParallel) {
|
|||
|
||||
} // namespace
|
||||
|
||||
DAWN_INSTANTIATE_TEST(MultithreadCachingTests,
|
||||
D3D12Backend(),
|
||||
MetalBackend(),
|
||||
OpenGLBackend(),
|
||||
OpenGLESBackend(),
|
||||
VulkanBackend());
|
||||
|
||||
DAWN_INSTANTIATE_TEST(MultithreadEncodingTests,
|
||||
D3D12Backend(),
|
||||
MetalBackend(),
|
||||
|
|
Loading…
Reference in New Issue