Initial support for asyncronous shader pipeline compilation

This commit is contained in:
Jack Andersen 2019-06-15 20:24:28 -10:00
parent 29a67b9ea8
commit 7eda81e55e
6 changed files with 342 additions and 231 deletions

View File

@ -46,7 +46,7 @@ public:
ObjToken<IShaderPipeline> newShaderPipeline(ObjToken<IShaderStage> vertex, ObjToken<IShaderStage> fragment,
ObjToken<IShaderStage> geometry, ObjToken<IShaderStage> control,
ObjToken<IShaderStage> evaluation, const VertexFormatInfo& vtxFmt,
const AdditionalPipelineInfo& additionalInfo);
const AdditionalPipelineInfo& additionalInfo, bool asynchronous = true);
ObjToken<IShaderDataBinding> newShaderDataBinding(
const ObjToken<IShaderPipeline>& pipeline, const ObjToken<IGraphicsBuffer>& vbo,

View File

@ -145,7 +145,9 @@ struct IShaderStage : IObj {};
/** Opaque token for referencing a complete graphics pipeline state necessary
* to rasterize geometry (shaders and blending modes mainly) */
struct IShaderPipeline : IObj {};
struct IShaderPipeline : IObj {
virtual bool isReady() const = 0;
};
/** Opaque token serving as indirection table for shader resources
* and IShaderPipeline reference. Each renderable surface-material holds one
@ -241,12 +243,14 @@ struct IGraphicsDataFactory {
ObjToken<IShaderStage> geometry, ObjToken<IShaderStage> control,
ObjToken<IShaderStage> evaluation,
const VertexFormatInfo& vtxFmt,
const AdditionalPipelineInfo& additionalInfo) = 0;
const AdditionalPipelineInfo& additionalInfo,
bool asynchronous = true) = 0;
ObjToken<IShaderPipeline> newShaderPipeline(ObjToken<IShaderStage> vertex, ObjToken<IShaderStage> fragment,
const VertexFormatInfo& vtxFmt,
const AdditionalPipelineInfo& additionalInfo) {
return newShaderPipeline(vertex, fragment, {}, {}, {}, vtxFmt, additionalInfo);
const AdditionalPipelineInfo& additionalInfo,
bool asynchronous = true) {
return newShaderPipeline(vertex, fragment, {}, {}, {}, vtxFmt, additionalInfo, asynchronous);
}
virtual ObjToken<IShaderDataBinding> newShaderDataBinding(
@ -273,6 +277,7 @@ struct IGraphicsDataFactory {
virtual ObjToken<IGraphicsBufferD> newPoolBuffer(BufferUse use, size_t stride, size_t count __BooTraceArgs) = 0;
virtual void setDisplayGamma(float gamma) = 0;
virtual bool isTessellationSupported(uint32_t& maxPatchSizeOut) = 0;
virtual void waitUntilShadersReady() = 0;
};
using GraphicsDataFactoryContext = IGraphicsDataFactory::Context;

View File

@ -155,7 +155,7 @@ public:
ObjToken<IShaderPipeline> newShaderPipeline(ObjToken<IShaderStage> vertex, ObjToken<IShaderStage> fragment,
ObjToken<IShaderStage> geometry, ObjToken<IShaderStage> control,
ObjToken<IShaderStage> evaluation, const VertexFormatInfo& vtxFmt,
const AdditionalPipelineInfo& additionalInfo);
const AdditionalPipelineInfo& additionalInfo, bool asynchronous = true);
boo::ObjToken<IShaderDataBinding> newShaderDataBinding(
const boo::ObjToken<IShaderPipeline>& pipeline, const boo::ObjToken<IGraphicsBuffer>& vbo,

View File

@ -7,6 +7,10 @@
#include <vector>
#include <mutex>
#include <cassert>
#include <thread>
#include <mutex>
#include <condition_variable>
#include <queue>
#include "boo/graphicsdev/IGraphicsDataFactory.hpp"
#include "../Common.hpp"
@ -196,4 +200,68 @@ struct GraphicsDataNode : ListNode<GraphicsDataNode<NodeCls, DataCls>, ObjToken<
void UpdateGammaLUT(ITextureD* tex, float gamma);
/** Generic work-queue for asynchronously building shader pipelines on supported backends
*/
template <class ShaderPipelineType>
class PipelineCompileQueue {
struct Task {
ObjToken<IShaderPipeline> m_pipeline;
explicit Task(ObjToken<IShaderPipeline> pipeline) : m_pipeline(pipeline) {}
void run() {
m_pipeline.cast<ShaderPipelineType>()->compile();
}
};
std::queue<Task> m_tasks;
size_t m_outstandingTasks = 0;
std::vector<std::thread> m_threads;
std::mutex m_mt;
std::condition_variable m_cv, m_backcv;
bool m_running = true;
void worker() {
std::unique_lock<std::mutex> lk(m_mt);
while (m_running) {
m_cv.wait(lk, [this]() { return !m_tasks.empty() || !m_running; });
if (!m_running)
break;
Task t = std::move(m_tasks.front());
m_tasks.pop();
lk.unlock();
t.run();
lk.lock();
--m_outstandingTasks;
m_backcv.notify_all();
}
}
public:
void addPipeline(ObjToken<IShaderPipeline> pipeline) {
std::lock_guard<std::mutex> lk(m_mt);
m_tasks.emplace(pipeline);
++m_outstandingTasks;
m_cv.notify_one();
}
void waitUntilReady() {
std::unique_lock<std::mutex> lk(m_mt);
m_backcv.wait(lk, [this]() { return m_outstandingTasks == 0 || !m_running; });
}
PipelineCompileQueue() {
unsigned int numThreads = std::thread::hardware_concurrency();
if (numThreads > 1)
--numThreads;
m_threads.reserve(numThreads);
for (unsigned int i = 0; i < numThreads; ++i)
m_threads.emplace_back(std::bind(&PipelineCompileQueue::worker, this));
}
~PipelineCompileQueue() {
m_running = false;
m_cv.notify_all();
for (auto& t : m_threads) t.join();
}
};
} // namespace boo

View File

@ -134,6 +134,8 @@ public:
maxPatchSizeOut = m_maxPatchSize;
return m_hasTessellation;
}
void waitUntilShadersReady() {}
};
static const GLenum USE_TABLE[] = {GL_INVALID_ENUM, GL_ARRAY_BUFFER, GL_ELEMENT_ARRAY_BUFFER, GL_UNIFORM_BUFFER};
@ -962,6 +964,8 @@ public:
return m_prog;
}
bool isReady() const { return true; }
};
ObjToken<IShaderStage> GLDataFactory::Context::newShaderStage(const uint8_t* data, size_t size, PipelineStage stage) {
@ -979,7 +983,7 @@ ObjToken<IShaderStage> GLDataFactory::Context::newShaderStage(const uint8_t* dat
ObjToken<IShaderPipeline> GLDataFactory::Context::newShaderPipeline(
ObjToken<IShaderStage> vertex, ObjToken<IShaderStage> fragment, ObjToken<IShaderStage> geometry,
ObjToken<IShaderStage> control, ObjToken<IShaderStage> evaluation, const VertexFormatInfo& vtxFmt,
const AdditionalPipelineInfo& additionalInfo) {
const AdditionalPipelineInfo& additionalInfo, bool asynchronous) {
GLDataFactoryImpl& factory = static_cast<GLDataFactoryImpl&>(m_parent);
if (control || evaluation) {

View File

@ -72,10 +72,13 @@ class VulkanDataFactoryImpl : public VulkanDataFactory, public GraphicsDataFacto
friend struct VulkanPool;
friend struct VulkanDescriptorPool;
friend struct VulkanShaderDataBinding;
IGraphicsContext* m_parent;
VulkanContext* m_ctx;
VulkanDescriptorPool* m_descPoolHead = nullptr;
PipelineCompileQueue<class VulkanShaderPipeline> m_pipelineQueue;
float m_gamma = 1.f;
ObjToken<IShaderPipeline> m_gammaShader;
ObjToken<ITextureD> m_gammaLUT;
@ -90,7 +93,7 @@ class VulkanDataFactoryImpl : public VulkanDataFactory, public GraphicsDataFacto
const VertexElementDescriptor vfmt[] = {{VertexSemantic::Position4}, {VertexSemantic::UV4}};
AdditionalPipelineInfo info = {
BlendFactor::One, BlendFactor::Zero, Primitive::TriStrips, ZTest::None, false, true, false, CullMode::None};
m_gammaShader = ctx.newShaderPipeline(vertexShader, fragmentShader, vfmt, info);
m_gammaShader = ctx.newShaderPipeline(vertexShader, fragmentShader, vfmt, info, false);
m_gammaLUT = ctx.newDynamicTexture(256, 256, TextureFormat::I16, TextureClampMode::ClampToEdge);
setDisplayGamma(1.f);
const struct Vert {
@ -140,6 +143,10 @@ public:
maxPatchSizeOut = m_ctx->m_gpuProps.limits.maxTessellationPatchSize;
return true;
}
void waitUntilShadersReady() {
m_pipelineQueue.waitUntilReady();
}
};
static inline void ThrowIfFailed(VkResult res) {
@ -489,6 +496,9 @@ void VulkanContext::initDevice() {
tessellationDescriptorBit = VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
features.tessellationShader = VK_TRUE;
}
if (!m_features.dualSrcBlend)
Log.report(logvisor::Fatal, "Vulkan device does not support dual-source blending");
features.dualSrcBlend = VK_TRUE;
uint32_t extCount = 0;
vk::EnumerateDeviceExtensionProperties(m_gpus[0], nullptr, &extCount, nullptr);
@ -2258,6 +2268,7 @@ public:
class VulkanShaderPipeline : public GraphicsDataNode<IShaderPipeline> {
protected:
friend class VulkanDataFactory;
friend class VulkanDataFactoryImpl;
friend struct VulkanShaderDataBinding;
VulkanContext* m_ctx;
VkPipelineCache m_pipelineCache;
@ -2277,12 +2288,13 @@ protected:
bool m_overwriteAlpha;
CullMode m_culling;
uint32_t m_patchSize;
mutable VkPipeline m_pipeline = VK_NULL_HANDLE;
bool m_asynchronous;
mutable std::atomic<VkPipeline> m_pipeline = VK_NULL_HANDLE;
VulkanShaderPipeline(const boo::ObjToken<BaseGraphicsData>& parent, VulkanContext* ctx, ObjToken<IShaderStage> vertex,
ObjToken<IShaderStage> fragment, ObjToken<IShaderStage> geometry, ObjToken<IShaderStage> control,
ObjToken<IShaderStage> evaluation, VkPipelineCache pipelineCache, const VertexFormatInfo& vtxFmt,
const AdditionalPipelineInfo& info)
const AdditionalPipelineInfo& info, bool asynchronous)
: GraphicsDataNode<IShaderPipeline>(parent)
, m_ctx(ctx)
, m_pipelineCache(pipelineCache)
@ -2301,7 +2313,8 @@ protected:
, m_alphaWrite(info.alphaWrite)
, m_overwriteAlpha(info.overwriteAlpha)
, m_culling(info.culling)
, m_patchSize(info.patchSize) {
, m_patchSize(info.patchSize)
, m_asynchronous(asynchronous) {
if (control && evaluation)
m_prim = Primitive::Patches;
}
@ -2316,6 +2329,15 @@ public:
VulkanShaderPipeline& operator=(const VulkanShaderPipeline&) = delete;
VulkanShaderPipeline(const VulkanShaderPipeline&) = delete;
VkPipeline bind(VkRenderPass rPass = 0) const {
compile(rPass);
while (m_pipeline == VK_NULL_HANDLE) {}
return m_pipeline;
}
mutable std::atomic_bool m_startCompile = {};
void compile(VkRenderPass rPass = 0) const {
bool falseCmp = false;
if (m_startCompile.compare_exchange_strong(falseCmp, true)) {
if (!m_pipeline) {
if (!rPass)
rPass = m_ctx->m_pass;
@ -2399,7 +2421,7 @@ public:
assemblyInfo.pNext = nullptr;
assemblyInfo.flags = 0;
assemblyInfo.topology = PRIMITIVE_TABLE[int(m_prim)];
assemblyInfo.primitiveRestartEnable = VK_TRUE;
assemblyInfo.primitiveRestartEnable = m_prim == Primitive::TriStrips;
VkPipelineTessellationStateCreateInfo tessInfo = {};
tessInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO;
@ -2525,8 +2547,10 @@ public:
pipelineCreateInfo.layout = m_ctx->m_pipelinelayout;
pipelineCreateInfo.renderPass = rPass;
VkPipeline p;
ThrowIfFailed(
vk::CreateGraphicsPipelines(m_ctx->m_dev, m_pipelineCache, 1, &pipelineCreateInfo, nullptr, &m_pipeline));
vk::CreateGraphicsPipelines(m_ctx->m_dev, m_pipelineCache, 1, &pipelineCreateInfo, nullptr, &p));
m_pipeline = p;
m_vertex.reset();
m_fragment.reset();
@ -2534,8 +2558,10 @@ public:
m_control.reset();
m_evaluation.reset();
}
return m_pipeline;
}
}
bool isReady() const { return m_pipeline != VK_NULL_HANDLE; }
};
static const VkDescriptorBufferInfo* GetBufferGPUResource(const IGraphicsBuffer* buf, int idx) {
@ -3690,7 +3716,7 @@ ObjToken<IShaderStage> VulkanDataFactory::Context::newShaderStage(const uint8_t*
ObjToken<IShaderPipeline> VulkanDataFactory::Context::newShaderPipeline(
ObjToken<IShaderStage> vertex, ObjToken<IShaderStage> fragment, ObjToken<IShaderStage> geometry,
ObjToken<IShaderStage> control, ObjToken<IShaderStage> evaluation, const VertexFormatInfo& vtxFmt,
const AdditionalPipelineInfo& additionalInfo) {
const AdditionalPipelineInfo& additionalInfo, bool asynchronous) {
VulkanDataFactoryImpl& factory = static_cast<VulkanDataFactoryImpl&>(m_parent);
if (control || evaluation) {
@ -3702,7 +3728,7 @@ ObjToken<IShaderPipeline> VulkanDataFactory::Context::newShaderPipeline(
}
return {new VulkanShaderPipeline(m_data, factory.m_ctx, vertex, fragment, geometry, control, evaluation,
VK_NULL_HANDLE, vtxFmt, additionalInfo)};
VK_NULL_HANDLE, vtxFmt, additionalInfo, asynchronous)};
}
boo::ObjToken<IShaderDataBinding> VulkanDataFactory::Context::newShaderDataBinding(
@ -3724,6 +3750,14 @@ void VulkanDataFactoryImpl::commitTransaction(
VulkanData* data = ctx.m_data.cast<VulkanData>();
/* Start asynchronous shader compiles */
if (data->m_SPs)
for (IShaderPipeline& p : *data->m_SPs) {
auto& cp = static_cast<VulkanShaderPipeline&>(p);
if (cp.m_asynchronous)
m_pipelineQueue.addPipeline({&p});
}
/* size up resources */
VkDeviceSize constantMemSizes[3] = {};
VkDeviceSize texMemSize = 0;