#include "../mac/CocoaCommon.hpp" #if BOO_HAS_METAL #include "logvisor/logvisor.hpp" #include "boo/IApplication.hpp" #include "boo/graphicsdev/Metal.hpp" #include "boo/IGraphicsContext.hpp" #include "Common.hpp" #include #include #include #include "xxhash.h" #if !__has_feature(objc_arc) #error ARC Required #endif #define MAX_UNIFORM_COUNT 8 #define MAX_TEXTURE_COUNT 8 static const char* GammaVS = "#include \n" "using namespace metal;\n" "struct VertData\n" "{\n" " float4 posIn [[ attribute(0) ]];\n" " float4 uvIn [[ attribute(1) ]];\n" "};\n" "\n" "struct VertToFrag\n" "{\n" " float4 pos [[ position ]];\n" " float2 uv;\n" "};\n" "\n" "vertex VertToFrag vmain(VertData v [[ stage_in ]])\n" "{\n" " VertToFrag vtf;\n" " vtf.uv = v.uvIn.xy;\n" " vtf.pos = v.posIn;\n" " return vtf;\n" "}\n"; static const char* GammaFS = "#include \n" "using namespace metal;\n" "struct VertToFrag\n" "{\n" " float4 pos [[ position ]];\n" " float2 uv;\n" "};\n" "\n" "fragment float4 fmain(VertToFrag vtf [[ stage_in ]],\n" " sampler clampSamp [[ sampler(3) ]],\n" " texture2d screenTex [[ texture(0) ]],\n" " texture2d gammaLUT [[ texture(1) ]])\n" "{\n" " uint4 tex = uint4(saturate(screenTex.sample(clampSamp, vtf.uv)) * float4(65535.0));\n" " float4 colorOut;\n" " for (int i=0 ; i<3 ; ++i)\n" " colorOut[i] = gammaLUT.read(uint2(tex[i] % 256, tex[i] / 256)).r;\n" " return colorOut;\n" "}\n"; namespace boo { static logvisor::Module Log("boo::Metal"); struct MetalCommandQueue; class MetalDataFactoryImpl; struct MetalShareableShader : IShareableShader { id m_shader; MetalShareableShader(MetalDataFactoryImpl& fac, uint64_t srcKey, uint64_t binKey, id s) : IShareableShader(fac, srcKey, binKey), m_shader(s) {} }; class MetalDataFactoryImpl : public MetalDataFactory, public GraphicsDataFactoryHead { friend struct MetalCommandQueue; friend class MetalDataFactory::Context; IGraphicsContext* m_parent; std::unordered_map> m_sharedShaders; struct MetalContext* m_ctx; bool m_hasTessellation = false; float m_gamma = 1.f; ObjToken m_gammaShader; ObjToken m_gammaLUT; ObjToken m_gammaVBO; ObjToken m_gammaVFMT; ObjToken m_gammaBinding; void SetupGammaResources() { m_hasTessellation = [m_ctx->m_dev supportsFeatureSet:MTLFeatureSet_macOS_GPUFamily1_v2]; commitTransaction([this](IGraphicsDataFactory::Context& ctx) { const VertexElementDescriptor vfmt[] = { {nullptr, nullptr, VertexSemantic::Position4}, {nullptr, nullptr, VertexSemantic::UV4} }; m_gammaVFMT = ctx.newVertexFormat(2, vfmt); m_gammaShader = static_cast(ctx).newShaderPipeline(GammaVS, GammaFS, nullptr, nullptr, m_gammaVFMT, BlendFactor::One, BlendFactor::Zero, Primitive::TriStrips, ZTest::None, false, true, false, CullMode::None, true, false); m_gammaLUT = ctx.newDynamicTexture(256, 256, TextureFormat::I16, TextureClampMode::ClampToEdge); setDisplayGamma(1.f); const struct Vert { float pos[4]; float uv[4]; } verts[4] = { {{-1.f, 1.f, 0.f, 1.f}, {0.f, 0.f, 0.f, 0.f}}, {{ 1.f, 1.f, 0.f, 1.f}, {1.f, 0.f, 0.f, 0.f}}, {{-1.f, -1.f, 0.f, 1.f}, {0.f, 1.f, 0.f, 0.f}}, {{ 1.f, -1.f, 0.f, 1.f}, {1.f, 1.f, 0.f, 0.f}} }; m_gammaVBO = ctx.newStaticBuffer(BufferUse::Vertex, verts, 32, 4); ObjToken texs[] = {{}, m_gammaLUT.get()}; m_gammaBinding = ctx.newShaderDataBinding(m_gammaShader, m_gammaVFMT, m_gammaVBO.get(), {}, {}, 0, nullptr, nullptr, 2, texs, nullptr, nullptr); return true; } BooTrace); } public: std::unordered_map m_sourceToBinary; char m_libfile[MAXPATHLEN]; bool m_hasCompiler = false; MetalDataFactoryImpl(IGraphicsContext* parent, MetalContext* ctx) : m_parent(parent), m_ctx(ctx) { snprintf(m_libfile, MAXPATHLEN, "%sboo_metal_shader.metallib", getenv("TMPDIR")); for (auto& arg : APP->getArgs()) if (arg == "--metal-compile") { m_hasCompiler = CheckForMetalCompiler(); break; } } ~MetalDataFactoryImpl() = default; Platform platform() const { return Platform::Metal; } const char* platformName() const { return "Metal"; } void commitTransaction(const std::function& __BooTraceArgs); ObjToken newPoolBuffer(BufferUse use, size_t stride, size_t count __BooTraceArgs); void _unregisterShareableShader(uint64_t srcKey, uint64_t binKey) { m_sharedShaders.erase(srcKey); } static bool CheckForMetalCompiler() { pid_t pid = fork(); if (!pid) { execlp("xcrun", "xcrun", "-sdk", "macosx", "metal", NULL); /* xcrun returns 72 if metal command not found; * emulate that if xcrun not found */ exit(72); } int status, ret; while ((ret = waitpid(pid, &status, 0)) < 0 && errno == EINTR) {} if (ret < 0) return false; return WEXITSTATUS(status) == 1; } uint64_t CompileLib(std::vector& blobOut, const char* source, uint64_t srcKey) { if (!m_hasCompiler) { /* Cache the source if there's no compiler */ size_t sourceLen = strlen(source); /* First byte unset to indicate source data */ blobOut.resize(sourceLen + 2); memcpy(&blobOut[1], source, sourceLen); } else { /* Cache the binary otherwise */ int compilerOut[2]; int compilerIn[2]; pipe(compilerOut); pipe(compilerIn); /* Pipe source write to compiler */ pid_t compilerPid = fork(); if (!compilerPid) { dup2(compilerIn[0], STDIN_FILENO); dup2(compilerOut[1], STDOUT_FILENO); close(compilerOut[0]); close(compilerOut[1]); close(compilerIn[0]); close(compilerIn[1]); execlp("xcrun", "xcrun", "-sdk", "macosx", "metal", "-o", "/dev/stdout", "-Wno-unused-variable", "-Wno-unused-const-variable", "-Wno-unused-function", "-x", "metal", "-", NULL); fprintf(stderr, "execlp fail %s\n", strerror(errno)); exit(1); } close(compilerIn[0]); close(compilerOut[1]); /* Pipe compiler to linker */ pid_t linkerPid = fork(); if (!linkerPid) { dup2(compilerOut[0], STDIN_FILENO); close(compilerOut[0]); close(compilerIn[1]); /* metallib doesn't like outputting to a pipe, so temp file will have to do */ execlp("xcrun", "xcrun", "-sdk", "macosx", "metallib", "-", "-o", m_libfile, NULL); fprintf(stderr, "execlp fail %s\n", strerror(errno)); exit(1); } close(compilerOut[0]); /* Stream in source */ const char* inPtr = source; size_t inRem = strlen(source); while (inRem) { ssize_t writeRes = write(compilerIn[1], inPtr, inRem); if (writeRes < 0) { fprintf(stderr, "write fail %s\n", strerror(errno)); break; } inPtr += writeRes; inRem -= writeRes; } close(compilerIn[1]); /* Wait for completion */ int compilerStat, linkerStat; if (waitpid(compilerPid, &compilerStat, 0) < 0 || waitpid(linkerPid, &linkerStat, 0) < 0) { fprintf(stderr, "waitpid fail %s\n", strerror(errno)); return 0; } if (WEXITSTATUS(compilerStat) || WEXITSTATUS(linkerStat)) return 0; /* Copy temp file into buffer with first byte set to indicate binary data */ FILE* fin = fopen(m_libfile, "rb"); fseek(fin, 0, SEEK_END); long libLen = ftell(fin); fseek(fin, 0, SEEK_SET); blobOut.resize(libLen + 1); blobOut[0] = 1; fread(&blobOut[1], 1, libLen, fin); fclose(fin); } XXH64_state_t hashState; XXH64_reset(&hashState, 0); XXH64_update(&hashState, blobOut.data(), blobOut.size()); uint64_t binKey = XXH64_digest(&hashState); m_sourceToBinary[srcKey] = binKey; return binKey; } uint64_t CompileLib(__strong id& libOut, const char* source, uint64_t srcKey, MTLCompileOptions* compOpts, NSError * _Nullable *err) { libOut = [m_ctx->m_dev newLibraryWithSource:@(source) options:compOpts error:err]; if (srcKey) { XXH64_state_t hashState; XXH64_reset(&hashState, 0); uint8_t zero = 0; XXH64_update(&hashState, &zero, 1); XXH64_update(&hashState, source, strlen(source) + 1); uint64_t binKey = XXH64_digest(&hashState); m_sourceToBinary[srcKey] = binKey; return binKey; } return 0; } MetalShareableShader::Token PrepareShaderStage(const char* source, std::vector* blobOut, NSString* funcName) { MTLCompileOptions* compOpts = [MTLCompileOptions new]; compOpts.languageVersion = MTLLanguageVersion1_2; NSError* err = nullptr; XXH64_state_t hashState; uint64_t srcHash = 0; uint64_t binHash = 0; XXH64_reset(&hashState, 0); if (source) { XXH64_update(&hashState, source, strlen(source)); srcHash = XXH64_digest(&hashState); auto binSearch = m_sourceToBinary.find(srcHash); if (binSearch != m_sourceToBinary.cend()) binHash = binSearch->second; } else if (blobOut && !blobOut->empty()) { XXH64_update(&hashState, blobOut->data(), blobOut->size()); binHash = XXH64_digest(&hashState); } if (blobOut && blobOut->empty()) binHash = CompileLib(*blobOut, source, srcHash); MetalShareableShader::Token shader; auto search = binHash ? m_sharedShaders.find(binHash) : m_sharedShaders.end(); if (search != m_sharedShaders.end()) { return search->second->lock(); } else { id shaderLib; if (blobOut && !blobOut->empty()) { if ((*blobOut)[0] == 1) { dispatch_data_t data = dispatch_data_create(blobOut->data() + 1, blobOut->size() - 1, nullptr, nullptr); shaderLib = [m_ctx->m_dev newLibraryWithData:data error:&err]; if (!shaderLib) Log.report(logvisor::Fatal, "error loading library: %s", [[err localizedDescription] UTF8String]); } else { CompileLib(shaderLib, (char*)blobOut->data() + 1, 0, compOpts, &err); } } else binHash = CompileLib(shaderLib, source, srcHash, compOpts, &err); if (!shaderLib) { printf("%s\n", source); Log.report(logvisor::Fatal, "error compiling shader: %s", [[err localizedDescription] UTF8String]); } id func = [shaderLib newFunctionWithName:funcName]; auto it = m_sharedShaders.emplace(std::make_pair(binHash, std::make_unique(*this, srcHash, binHash, func))).first; return it->second->lock(); } } void setDisplayGamma(float gamma) { if (m_ctx->m_pixelFormat == MTLPixelFormatRGBA16Float) m_gamma = gamma * 2.2f; else m_gamma = gamma; if (m_gamma != 1.f) UpdateGammaLUT(m_gammaLUT.get(), m_gamma); } bool isTessellationSupported(uint32_t& maxPatchSize) { maxPatchSize = 32; return m_hasTessellation; } }; #define MTL_STATIC MTLResourceCPUCacheModeWriteCombined|MTLResourceStorageModeManaged #define MTL_DYNAMIC MTLResourceCPUCacheModeWriteCombined|MTLResourceStorageModeManaged class MetalGraphicsBufferS : public GraphicsDataNode { friend class MetalDataFactory; friend struct MetalCommandQueue; MetalGraphicsBufferS(const ObjToken& parent, BufferUse use, MetalContext* ctx, const void* data, size_t stride, size_t count) : GraphicsDataNode(parent), m_stride(stride), m_count(count), m_sz(stride * count) { m_buf = [ctx->m_dev newBufferWithBytes:data length:m_sz options:MTL_STATIC]; } public: size_t m_stride; size_t m_count; size_t m_sz; id m_buf; ~MetalGraphicsBufferS() = default; }; template class MetalGraphicsBufferD : public GraphicsDataNode { friend class MetalDataFactory; friend class MetalDataFactoryImpl; friend struct MetalCommandQueue; MetalCommandQueue* m_q; std::unique_ptr m_cpuBuf; int m_validSlots = 0; MetalGraphicsBufferD(const ObjToken& parent, MetalCommandQueue* q, BufferUse use, MetalContext* ctx, size_t stride, size_t count) : GraphicsDataNode(parent), m_q(q), m_stride(stride), m_count(count), m_sz(stride * count) { m_cpuBuf.reset(new uint8_t[m_sz]); m_bufs[0] = [ctx->m_dev newBufferWithLength:m_sz options:MTL_DYNAMIC]; m_bufs[1] = [ctx->m_dev newBufferWithLength:m_sz options:MTL_DYNAMIC]; } public: size_t m_stride; size_t m_count; size_t m_sz; id m_bufs[2]; MetalGraphicsBufferD() = default; void update(int b) { int slot = 1 << b; if ((slot & m_validSlots) == 0) { id res = m_bufs[b]; memcpy(res.contents, m_cpuBuf.get(), m_sz); [res didModifyRange:NSMakeRange(0, m_sz)]; m_validSlots |= slot; } } void load(const void* data, size_t sz) { size_t bufSz = std::min(sz, m_sz); memcpy(m_cpuBuf.get(), data, bufSz); m_validSlots = 0; } void* map(size_t sz) { if (sz > m_sz) return nullptr; return m_cpuBuf.get(); } void unmap() { m_validSlots = 0; } }; class MetalTextureS : public GraphicsDataNode { friend class MetalDataFactory; MetalTextureS(const ObjToken& parent, MetalContext* ctx, size_t width, size_t height, size_t mips, TextureFormat fmt, const void* data, size_t sz) : GraphicsDataNode(parent) { MTLPixelFormat pfmt = MTLPixelFormatRGBA8Unorm; NSUInteger ppitchNum = 4; NSUInteger ppitchDenom = 1; NSUInteger bytesPerRow = width * ppitchNum; switch (fmt) { case TextureFormat::I8: pfmt = MTLPixelFormatR8Unorm; ppitchNum = 1; bytesPerRow = width * ppitchNum; break; case TextureFormat::I16: pfmt = MTLPixelFormatR16Unorm; ppitchNum = 2; bytesPerRow = width * ppitchNum; break; case TextureFormat::DXT1: pfmt = MTLPixelFormatBC1_RGBA; ppitchNum = 1; ppitchDenom = 2; bytesPerRow = width * 8 / 4; // Metal wants this in blocks, not bytes default: break; } @autoreleasepool { MTLTextureDescriptor* desc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:pfmt width:width height:height mipmapped:(mips>1)?YES:NO]; desc.usage = MTLTextureUsageShaderRead; desc.mipmapLevelCount = mips; m_tex = [ctx->m_dev newTextureWithDescriptor:desc]; const uint8_t* dataIt = reinterpret_cast(data); for (size_t i=0 ; i 1) { width /= 2; bytesPerRow /= 2; } if (height > 1) height /= 2; } } } public: id m_tex; ~MetalTextureS() = default; }; class MetalTextureSA : public GraphicsDataNode { friend class MetalDataFactory; MetalTextureSA(const ObjToken& parent, MetalContext* ctx, size_t width, size_t height, size_t layers, size_t mips, TextureFormat fmt, const void* data, size_t sz) : GraphicsDataNode(parent) { MTLPixelFormat pfmt = MTLPixelFormatRGBA8Unorm; NSUInteger ppitch = 4; switch (fmt) { case TextureFormat::I8: pfmt = MTLPixelFormatR8Unorm; ppitch = 1; break; case TextureFormat::I16: pfmt = MTLPixelFormatR16Unorm; ppitch = 2; break; default: break; } @autoreleasepool { MTLTextureDescriptor* desc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:pfmt width:width height:height mipmapped:(mips>1)?YES:NO]; desc.textureType = MTLTextureType2DArray; desc.arrayLength = layers; desc.mipmapLevelCount = mips; desc.usage = MTLTextureUsageShaderRead; m_tex = [ctx->m_dev newTextureWithDescriptor:desc]; const uint8_t* dataIt = reinterpret_cast(data); for (size_t i=0 ; i 1) width /= 2; if (height > 1) height /= 2; } } } public: id m_tex; ~MetalTextureSA() = default; }; class MetalTextureD : public GraphicsDataNode { friend class MetalDataFactory; friend struct MetalCommandQueue; MetalCommandQueue* m_q; size_t m_width = 0; size_t m_height = 0; std::unique_ptr m_cpuBuf; size_t m_cpuSz; size_t m_pxPitch; int m_validSlots = 0; MetalTextureD(const ObjToken& parent, MetalCommandQueue* q, MetalContext* ctx, size_t width, size_t height, TextureFormat fmt) : GraphicsDataNode(parent), m_q(q), m_width(width), m_height(height) { MTLPixelFormat format; switch (fmt) { case TextureFormat::RGBA8: format = MTLPixelFormatRGBA8Unorm; m_pxPitch = 4; break; case TextureFormat::I8: format = MTLPixelFormatR8Unorm; m_pxPitch = 1; break; case TextureFormat::I16: format = MTLPixelFormatR16Unorm; m_pxPitch = 2; break; default: Log.report(logvisor::Fatal, "unsupported tex format"); } m_cpuSz = width * height * m_pxPitch; m_cpuBuf.reset(new uint8_t[m_cpuSz]); @autoreleasepool { MTLTextureDescriptor* desc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:format width:width height:height mipmapped:NO]; desc.usage = MTLTextureUsageShaderRead; m_texs[0] = [ctx->m_dev newTextureWithDescriptor:desc]; m_texs[1] = [ctx->m_dev newTextureWithDescriptor:desc]; } } public: id m_texs[2]; ~MetalTextureD() = default; void update(int b) { int slot = 1 << b; if ((slot & m_validSlots) == 0) { id res = m_texs[b]; [res replaceRegion:MTLRegionMake2D(0, 0, m_width, m_height) mipmapLevel:0 withBytes:m_cpuBuf.get() bytesPerRow:m_width*m_pxPitch]; m_validSlots |= slot; } } void load(const void* data, size_t sz) { size_t bufSz = std::min(sz, m_cpuSz); memcpy(m_cpuBuf.get(), data, bufSz); m_validSlots = 0; } void* map(size_t sz) { if (sz > m_cpuSz) return nullptr; return m_cpuBuf.get(); } void unmap() { m_validSlots = 0; } }; #define MAX_BIND_TEXS 4 class MetalTextureR : public GraphicsDataNode { friend class MetalDataFactory; friend struct MetalCommandQueue; size_t m_width = 0; size_t m_height = 0; size_t m_samples = 0; size_t m_colorBindCount; size_t m_depthBindCount; void Setup(MetalContext* ctx) { if (m_colorBindCount > MAX_BIND_TEXS) Log.report(logvisor::Fatal, "too many color bindings for render texture"); if (m_depthBindCount > MAX_BIND_TEXS) Log.report(logvisor::Fatal, "too many depth bindings for render texture"); @autoreleasepool { MTLTextureDescriptor* desc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:ctx->m_pixelFormat width:m_width height:m_height mipmapped:NO]; desc.storageMode = MTLStorageModePrivate; if (m_samples > 1) { desc.textureType = MTLTextureType2DMultisample; desc.sampleCount = m_samples; desc.usage = MTLTextureUsageRenderTarget; m_colorTex = [ctx->m_dev newTextureWithDescriptor:desc]; desc.pixelFormat = MTLPixelFormatDepth32Float; m_depthTex = [ctx->m_dev newTextureWithDescriptor:desc]; } else { desc.textureType = MTLTextureType2D; desc.sampleCount = 1; desc.usage = MTLTextureUsageRenderTarget; m_colorTex = [ctx->m_dev newTextureWithDescriptor:desc]; desc.pixelFormat = MTLPixelFormatDepth32Float; m_depthTex = [ctx->m_dev newTextureWithDescriptor:desc]; } desc.textureType = MTLTextureType2D; desc.sampleCount = 1; desc.usage = MTLTextureUsageShaderRead; if (m_colorBindCount) { desc.pixelFormat = ctx->m_pixelFormat; for (int i=0 ; im_dev newTextureWithDescriptor:desc]; if (m_samples > 1) { m_blitColor[i] = [MTLRenderPassDescriptor renderPassDescriptor]; m_blitColor[i].colorAttachments[0].texture = m_colorTex; m_blitColor[i].colorAttachments[0].loadAction = MTLLoadActionLoad; m_blitColor[i].colorAttachments[0].storeAction = MTLStoreActionMultisampleResolve; m_blitColor[i].colorAttachments[0].resolveTexture = m_colorBindTex[i]; } } } if (m_depthBindCount) { desc.pixelFormat = MTLPixelFormatDepth32Float; for (int i=0 ; im_dev newTextureWithDescriptor:desc]; if (m_samples > 1) { m_blitDepth[i] = [MTLRenderPassDescriptor renderPassDescriptor]; m_blitDepth[i].depthAttachment.texture = m_colorTex; m_blitDepth[i].depthAttachment.loadAction = MTLLoadActionLoad; m_blitDepth[i].depthAttachment.storeAction = MTLStoreActionMultisampleResolve; m_blitDepth[i].depthAttachment.resolveTexture = m_depthBindTex[i]; } } } { m_passDesc = [MTLRenderPassDescriptor renderPassDescriptor]; m_passDesc.colorAttachments[0].texture = m_colorTex; m_passDesc.colorAttachments[0].loadAction = MTLLoadActionLoad; m_passDesc.colorAttachments[0].storeAction = MTLStoreActionStore; m_passDesc.depthAttachment.texture = m_depthTex; m_passDesc.depthAttachment.loadAction = MTLLoadActionLoad; m_passDesc.depthAttachment.storeAction = MTLStoreActionStore; m_passDesc.depthAttachment.clearDepth = 0.f; } { m_clearDepthPassDesc = [MTLRenderPassDescriptor renderPassDescriptor]; m_clearDepthPassDesc.colorAttachments[0].texture = m_colorTex; m_clearDepthPassDesc.colorAttachments[0].loadAction = MTLLoadActionLoad; m_clearDepthPassDesc.colorAttachments[0].storeAction = MTLStoreActionStore; m_clearDepthPassDesc.depthAttachment.texture = m_depthTex; m_clearDepthPassDesc.depthAttachment.loadAction = MTLLoadActionClear; m_clearDepthPassDesc.depthAttachment.storeAction = MTLStoreActionStore; m_clearDepthPassDesc.depthAttachment.clearDepth = 0.f; } { m_clearColorPassDesc = [MTLRenderPassDescriptor renderPassDescriptor]; m_clearColorPassDesc.colorAttachments[0].texture = m_colorTex; m_clearColorPassDesc.colorAttachments[0].loadAction = MTLLoadActionClear; m_clearColorPassDesc.colorAttachments[0].storeAction = MTLStoreActionStore; m_clearDepthPassDesc.colorAttachments[0].clearColor = MTLClearColorMake(0.0, 0.0, 0.0, 0.0); m_clearColorPassDesc.depthAttachment.texture = m_depthTex; m_clearColorPassDesc.depthAttachment.loadAction = MTLLoadActionLoad; m_clearColorPassDesc.depthAttachment.storeAction = MTLStoreActionStore; m_clearColorPassDesc.depthAttachment.clearDepth = 0.f; } { m_clearBothPassDesc = [MTLRenderPassDescriptor renderPassDescriptor]; m_clearBothPassDesc.colorAttachments[0].texture = m_colorTex; m_clearBothPassDesc.colorAttachments[0].loadAction = MTLLoadActionClear; m_clearBothPassDesc.colorAttachments[0].storeAction = MTLStoreActionStore; m_clearBothPassDesc.colorAttachments[0].clearColor = MTLClearColorMake(0.0, 0.0, 0.0, 0.0); m_clearBothPassDesc.depthAttachment.texture = m_depthTex; m_clearBothPassDesc.depthAttachment.loadAction = MTLLoadActionClear; m_clearBothPassDesc.depthAttachment.storeAction = MTLStoreActionStore; m_clearBothPassDesc.depthAttachment.clearDepth = 0.f; } } } MetalTextureR(const ObjToken& parent, MetalContext* ctx, size_t width, size_t height, size_t samples, size_t colorBindCount, size_t depthBindCount) : GraphicsDataNode(parent), m_width(width), m_height(height), m_samples(samples), m_colorBindCount(colorBindCount), m_depthBindCount(depthBindCount) { if (samples == 0) m_samples = 1; Setup(ctx); } public: size_t samples() const {return m_samples;} id m_colorTex; id m_depthTex; id m_colorBindTex[MAX_BIND_TEXS] = {}; id m_depthBindTex[MAX_BIND_TEXS] = {}; MTLRenderPassDescriptor* m_passDesc; MTLRenderPassDescriptor* m_clearDepthPassDesc; MTLRenderPassDescriptor* m_clearColorPassDesc; MTLRenderPassDescriptor* m_clearBothPassDesc; MTLRenderPassDescriptor* m_blitColor[MAX_BIND_TEXS] = {}; MTLRenderPassDescriptor* m_blitDepth[MAX_BIND_TEXS] = {}; ~MetalTextureR() = default; void resize(MetalContext* ctx, size_t width, size_t height) { if (width < 1) width = 1; if (height < 1) height = 1; m_width = width; m_height = height; Setup(ctx); } }; static const size_t SEMANTIC_SIZE_TABLE[] = { 0, 12, 16, 12, 16, 16, 4, 8, 16, 16, 16 }; static const MTLVertexFormat SEMANTIC_TYPE_TABLE[] = { MTLVertexFormatInvalid, MTLVertexFormatFloat3, MTLVertexFormatFloat4, MTLVertexFormatFloat3, MTLVertexFormatFloat4, MTLVertexFormatFloat4, MTLVertexFormatUChar4Normalized, MTLVertexFormatFloat2, MTLVertexFormatFloat4, MTLVertexFormatFloat4, MTLVertexFormatFloat4 }; struct MetalVertexFormat : GraphicsDataNode { size_t m_elementCount; MTLVertexDescriptor* m_vdesc; size_t m_stride = 0; size_t m_instStride = 0; MetalVertexFormat(const ObjToken& parent, size_t elementCount, const VertexElementDescriptor* elements) : GraphicsDataNode(parent), m_elementCount(elementCount) { for (size_t i=0 ; isemantic & VertexSemantic::SemanticMask); if ((elemin->semantic & VertexSemantic::Instanced) != VertexSemantic::None) m_instStride += SEMANTIC_SIZE_TABLE[semantic]; else m_stride += SEMANTIC_SIZE_TABLE[semantic]; } m_vdesc = [MTLVertexDescriptor vertexDescriptor]; MTLVertexBufferLayoutDescriptor* layoutDesc = m_vdesc.layouts[0]; layoutDesc.stride = m_stride; layoutDesc.stepFunction = MTLVertexStepFunctionPerVertex; layoutDesc.stepRate = 1; layoutDesc = m_vdesc.layouts[1]; layoutDesc.stride = m_instStride; layoutDesc.stepFunction = MTLVertexStepFunctionPerInstance; layoutDesc.stepRate = 1; size_t offset = 0; size_t instOffset = 0; for (size_t i=0 ; isemantic & VertexSemantic::SemanticMask); if ((elemin->semantic & VertexSemantic::Instanced) != VertexSemantic::None) { attrDesc.offset = instOffset; attrDesc.bufferIndex = 1; instOffset += SEMANTIC_SIZE_TABLE[semantic]; } else { attrDesc.offset = offset; attrDesc.bufferIndex = 0; offset += SEMANTIC_SIZE_TABLE[semantic]; } attrDesc.format = SEMANTIC_TYPE_TABLE[semantic]; } } MTLStageInputOutputDescriptor* makeTessellationComputeLayout() { MTLStageInputOutputDescriptor* ret = [MTLStageInputOutputDescriptor stageInputOutputDescriptor]; MTLBufferLayoutDescriptor* layoutDesc = ret.layouts[0]; layoutDesc.stride = m_stride; layoutDesc.stepFunction = MTLStepFunctionThreadPositionInGridX; layoutDesc.stepRate = 1; for (size_t i=0 ; i= 101200 MTLBlendFactorSource1Color, MTLBlendFactorOneMinusSource1Color, #else MTLBlendFactorSourceColor, MTLBlendFactorOneMinusSourceColor, #endif }; static const MTLPrimitiveType PRIMITIVE_TABLE[] = { MTLPrimitiveTypeTriangle, MTLPrimitiveTypeTriangleStrip, MTLPrimitiveTypePoint /* Actually patches */ }; #define COLOR_WRITE_MASK (MTLColorWriteMaskRed | MTLColorWriteMaskGreen | MTLColorWriteMaskBlue) class MetalShaderPipeline : public GraphicsDataNode { protected: friend class MetalDataFactory; friend struct MetalCommandQueue; friend struct MetalShaderDataBinding; MTLCullMode m_cullMode = MTLCullModeNone; MTLPrimitiveType m_drawPrim; MetalShareableShader::Token m_vert; MetalShareableShader::Token m_frag; MetalShaderPipeline(const ObjToken& parent, MetalShareableShader::Token&& vert, MetalShareableShader::Token&& frag) : GraphicsDataNode(parent), m_vert(std::move(vert)), m_frag(std::move(frag)) {} virtual void setupExtraStages(MetalContext* ctx, MTLRenderPipelineDescriptor* desc, MetalVertexFormat& cVtxFmt) {} virtual void draw(MetalCommandQueue& q, size_t start, size_t count); virtual void drawIndexed(MetalCommandQueue& q, size_t start, size_t count); virtual void drawInstances(MetalCommandQueue& q, size_t start, size_t count, size_t instCount); virtual void drawInstancesIndexed(MetalCommandQueue& q, size_t start, size_t count, size_t instCount); void setup(MetalContext* ctx, const ObjToken& vtxFmt, NSUInteger targetSamples, BlendFactor srcFac, BlendFactor dstFac, Primitive prim, ZTest depthTest, bool depthWrite, bool colorWrite, bool alphaWrite, bool overwriteAlpha, CullMode culling, bool depthAttachment = true) { m_drawPrim = PRIMITIVE_TABLE[int(prim)]; switch (culling) { case CullMode::None: default: m_cullMode = MTLCullModeNone; break; case CullMode::Backface: m_cullMode = MTLCullModeBack; break; case CullMode::Frontface: m_cullMode = MTLCullModeFront; break; } MTLRenderPipelineDescriptor* desc = [MTLRenderPipelineDescriptor new]; desc.vertexFunction = m_vert.get().m_shader; desc.fragmentFunction = m_frag.get().m_shader; MetalVertexFormat& cVtxFmt = *vtxFmt.cast(); desc.vertexDescriptor = cVtxFmt.m_vdesc; setupExtraStages(ctx, desc, cVtxFmt); desc.sampleCount = targetSamples; desc.colorAttachments[0].pixelFormat = ctx->m_pixelFormat; desc.colorAttachments[0].writeMask = (colorWrite ? COLOR_WRITE_MASK : 0) | (alphaWrite ? MTLColorWriteMaskAlpha : 0); desc.colorAttachments[0].blendingEnabled = dstFac != BlendFactor::Zero; if (srcFac == BlendFactor::Subtract || dstFac == BlendFactor::Subtract) { desc.colorAttachments[0].sourceRGBBlendFactor = MTLBlendFactorSourceAlpha; desc.colorAttachments[0].destinationRGBBlendFactor = MTLBlendFactorOne; desc.colorAttachments[0].rgbBlendOperation = MTLBlendOperationReverseSubtract; if (overwriteAlpha) { desc.colorAttachments[0].sourceAlphaBlendFactor = MTLBlendFactorOne; desc.colorAttachments[0].destinationAlphaBlendFactor = MTLBlendFactorZero; desc.colorAttachments[0].alphaBlendOperation = MTLBlendOperationAdd; } else { desc.colorAttachments[0].sourceAlphaBlendFactor = MTLBlendFactorSourceAlpha; desc.colorAttachments[0].destinationAlphaBlendFactor = MTLBlendFactorOne; desc.colorAttachments[0].alphaBlendOperation = MTLBlendOperationReverseSubtract; } } else { desc.colorAttachments[0].sourceRGBBlendFactor = BLEND_FACTOR_TABLE[int(srcFac)]; desc.colorAttachments[0].destinationRGBBlendFactor = BLEND_FACTOR_TABLE[int(dstFac)]; desc.colorAttachments[0].rgbBlendOperation = MTLBlendOperationAdd; if (overwriteAlpha) { desc.colorAttachments[0].sourceAlphaBlendFactor = MTLBlendFactorOne; desc.colorAttachments[0].destinationAlphaBlendFactor = MTLBlendFactorZero; } else { desc.colorAttachments[0].sourceAlphaBlendFactor = BLEND_FACTOR_TABLE[int(srcFac)]; desc.colorAttachments[0].destinationAlphaBlendFactor = BLEND_FACTOR_TABLE[int(dstFac)]; } desc.colorAttachments[0].alphaBlendOperation = MTLBlendOperationAdd; } desc.depthAttachmentPixelFormat = depthAttachment ? MTLPixelFormatDepth32Float : MTLPixelFormatInvalid; desc.inputPrimitiveTopology = MTLPrimitiveTopologyClassTriangle; NSError* err = nullptr; m_state = [ctx->m_dev newRenderPipelineStateWithDescriptor:desc error:&err]; if (err) Log.report(logvisor::Fatal, "error making shader pipeline: %s", [[err localizedDescription] UTF8String]); MTLDepthStencilDescriptor* dsDesc = [MTLDepthStencilDescriptor new]; switch (depthTest) { case ZTest::None: default: dsDesc.depthCompareFunction = MTLCompareFunctionAlways; break; case ZTest::LEqual: dsDesc.depthCompareFunction = MTLCompareFunctionGreaterEqual; break; case ZTest::Greater: dsDesc.depthCompareFunction = MTLCompareFunctionLess; break; case ZTest::GEqual: dsDesc.depthCompareFunction = MTLCompareFunctionLessEqual; break; case ZTest::Equal: dsDesc.depthCompareFunction = MTLCompareFunctionEqual; break; } dsDesc.depthWriteEnabled = depthWrite; m_dsState = [ctx->m_dev newDepthStencilStateWithDescriptor:dsDesc]; } public: id m_state; id m_dsState; ~MetalShaderPipeline() = default; MetalShaderPipeline& operator=(const MetalShaderPipeline&) = delete; MetalShaderPipeline(const MetalShaderPipeline&) = delete; void bind(id enc) { [enc setRenderPipelineState:m_state]; [enc setDepthStencilState:m_dsState]; [enc setCullMode:m_cullMode]; } }; class MetalTessellationShaderPipeline : public MetalShaderPipeline { friend class MetalDataFactory; friend struct MetalCommandQueue; friend struct MetalShaderDataBinding; MetalShareableShader::Token m_compute; uint32_t m_patchSize; MetalTessellationShaderPipeline( const ObjToken& parent, MetalShareableShader::Token&& compute, MetalShareableShader::Token&& frag, MetalShareableShader::Token&& evaluation, uint32_t patchSize) : MetalShaderPipeline(parent, std::move(evaluation), std::move(frag)), m_compute(std::move(compute)), m_patchSize(patchSize) {} void setupExtraStages(MetalContext* ctx, MTLRenderPipelineDescriptor* desc, MetalVertexFormat& cVtxFmt) { desc.maxTessellationFactor = 16; desc.tessellationFactorScaleEnabled = NO; desc.tessellationFactorFormat = MTLTessellationFactorFormatHalf; desc.tessellationControlPointIndexType = MTLTessellationControlPointIndexTypeNone; desc.tessellationFactorStepFunction = MTLTessellationFactorStepFunctionPerPatch; desc.tessellationOutputWindingOrder = MTLWindingClockwise; desc.tessellationPartitionMode = MTLTessellationPartitionModeInteger; desc.vertexDescriptor = cVtxFmt.makeTessellationVertexLayout(); MTLComputePipelineDescriptor* compDesc = [MTLComputePipelineDescriptor new]; compDesc.computeFunction = m_compute.get().m_shader; compDesc.stageInputDescriptor = cVtxFmt.makeTessellationComputeLayout(); NSError* err = nullptr; m_computeState = [ctx->m_dev newComputePipelineStateWithDescriptor:compDesc options:MTLPipelineOptionNone reflection:nil error:&err]; if (err) Log.report(logvisor::Fatal, "error making compute pipeline: %s", [[err localizedDescription] UTF8String]); } void draw(MetalCommandQueue& q, size_t start, size_t count); void drawIndexed(MetalCommandQueue& q, size_t start, size_t count); void drawInstances(MetalCommandQueue& q, size_t start, size_t count, size_t instCount); void drawInstancesIndexed(MetalCommandQueue& q, size_t start, size_t count, size_t instCount); public: id m_computeState; ~MetalTessellationShaderPipeline() = default; }; static id GetBufferGPUResource(const ObjToken& buf, int idx) { if (buf->dynamic()) { const MetalGraphicsBufferD* cbuf = buf.cast>(); return cbuf->m_bufs[idx]; } else { const MetalGraphicsBufferS* cbuf = buf.cast(); return cbuf->m_buf; } } static id GetTextureGPUResource(const ObjToken& tex, int idx, int bindIdx, bool depth) { switch (tex->type()) { case TextureType::Dynamic: { const MetalTextureD* ctex = tex.cast(); return ctex->m_texs[idx]; } case TextureType::Static: { const MetalTextureS* ctex = tex.cast(); return ctex->m_tex; } case TextureType::StaticArray: { const MetalTextureSA* ctex = tex.cast(); return ctex->m_tex; } case TextureType::Render: { const MetalTextureR* ctex = tex.cast(); return depth ? ctex->m_depthBindTex[bindIdx] : ctex->m_colorBindTex[bindIdx]; } default: break; } return nullptr; } struct MetalShaderDataBinding : GraphicsDataNode { ObjToken m_pipeline; ObjToken m_vbuf; ObjToken m_instVbo; ObjToken m_ibuf; std::vector> m_ubufs; std::vector m_ubufOffs; std::vector m_fubufs; struct BoundTex { ObjToken tex; int idx; bool depth; }; std::vector m_texs; size_t m_baseVert; size_t m_baseInst; MetalShaderDataBinding(const ObjToken& d, MetalContext* ctx, const ObjToken& pipeline, const ObjToken& vbuf, const ObjToken& instVbo, const ObjToken& ibuf, size_t ubufCount, const ObjToken* ubufs, const PipelineStage* ubufStages, const size_t* ubufOffs, const size_t* ubufSizes, size_t texCount, const ObjToken* texs, const int* texBindIdxs, const bool* depthBind, size_t baseVert, size_t baseInst) : GraphicsDataNode(d), m_pipeline(pipeline), m_vbuf(vbuf), m_instVbo(instVbo), m_ibuf(ibuf), m_baseVert(baseVert), m_baseInst(baseInst) { if (ubufCount && ubufStages) { m_fubufs.reserve(ubufCount); for (size_t i=0 ; i enc, int b) { m_pipeline.cast()->bind(enc); if (m_vbuf) { id buf = GetBufferGPUResource(m_vbuf, b); [enc setVertexBuffer:buf offset:0 atIndex:0]; } if (m_instVbo) { id buf = GetBufferGPUResource(m_instVbo, b); [enc setVertexBuffer:buf offset:0 atIndex:1]; } if (m_ubufOffs.size()) for (size_t i=0 ; i enc, int b) { if (m_vbuf) { id buf = GetBufferGPUResource(m_vbuf, b); [enc setBuffer:buf offset:0 atIndex:0]; } if (m_instVbo) { id buf = GetBufferGPUResource(m_instVbo, b); [enc setBuffer:buf offset:0 atIndex:1]; } } }; struct MetalCommandQueue : IGraphicsCommandQueue { Platform platform() const { return IGraphicsDataFactory::Platform::Metal; } const char* platformName() const { return "Metal"; } MetalContext* m_ctx; IWindow* m_parentWindow; IGraphicsContext* m_parent; id m_cmdBuf; id m_enc; id m_samplers[5]; bool m_running = true; int m_fillBuf = 0; int m_drawBuf = 0; MetalCommandQueue(MetalContext* ctx, IWindow* parentWindow, IGraphicsContext* parent) : m_ctx(ctx), m_parentWindow(parentWindow), m_parent(parent) { @autoreleasepool { m_cmdBuf = [ctx->m_q commandBuffer]; MTLSamplerDescriptor* sampDesc = [MTLSamplerDescriptor new]; sampDesc.rAddressMode = MTLSamplerAddressModeRepeat; sampDesc.sAddressMode = MTLSamplerAddressModeRepeat; sampDesc.tAddressMode = MTLSamplerAddressModeRepeat; sampDesc.minFilter = MTLSamplerMinMagFilterLinear; sampDesc.magFilter = MTLSamplerMinMagFilterLinear; sampDesc.mipFilter = MTLSamplerMipFilterLinear; sampDesc.maxAnisotropy = ctx->m_anisotropy; sampDesc.borderColor = MTLSamplerBorderColorOpaqueWhite; m_samplers[0] = [ctx->m_dev newSamplerStateWithDescriptor:sampDesc]; sampDesc.rAddressMode = MTLSamplerAddressModeClampToBorderColor; sampDesc.sAddressMode = MTLSamplerAddressModeClampToBorderColor; sampDesc.tAddressMode = MTLSamplerAddressModeClampToBorderColor; m_samplers[1] = [ctx->m_dev newSamplerStateWithDescriptor:sampDesc]; sampDesc.rAddressMode = MTLSamplerAddressModeClampToBorderColor; sampDesc.sAddressMode = MTLSamplerAddressModeClampToBorderColor; sampDesc.tAddressMode = MTLSamplerAddressModeClampToBorderColor; sampDesc.borderColor = MTLSamplerBorderColorOpaqueBlack; m_samplers[2] = [ctx->m_dev newSamplerStateWithDescriptor:sampDesc]; sampDesc.rAddressMode = MTLSamplerAddressModeClampToEdge; sampDesc.sAddressMode = MTLSamplerAddressModeClampToEdge; sampDesc.tAddressMode = MTLSamplerAddressModeClampToEdge; m_samplers[3] = [ctx->m_dev newSamplerStateWithDescriptor:sampDesc]; sampDesc.rAddressMode = MTLSamplerAddressModeClampToEdge; sampDesc.sAddressMode = MTLSamplerAddressModeClampToEdge; sampDesc.tAddressMode = MTLSamplerAddressModeClampToEdge; sampDesc.minFilter = MTLSamplerMinMagFilterNearest; sampDesc.magFilter = MTLSamplerMinMagFilterNearest; m_samplers[4] = [ctx->m_dev newSamplerStateWithDescriptor:sampDesc]; } } void startRenderer() { static_cast(m_parent->getDataFactory())->SetupGammaResources(); } void stopRenderer() { m_running = false; if (m_inProgress && m_cmdBuf.status != MTLCommandBufferStatusNotEnqueued) [m_cmdBuf waitUntilCompleted]; } ~MetalCommandQueue() { if (m_running) stopRenderer(); } MetalShaderDataBinding* m_boundData = nullptr; void setShaderDataBinding(const ObjToken& binding) { @autoreleasepool { MetalShaderDataBinding* cbind = binding.cast(); cbind->bind(m_enc, m_fillBuf); m_boundData = cbind; [m_enc setFragmentSamplerStates:m_samplers withRange:NSMakeRange(0, 5)]; [m_enc setVertexSamplerStates:m_samplers withRange:NSMakeRange(0, 5)]; } } ObjToken m_boundTarget; void _setRenderTarget(const ObjToken& target, bool clearColor, bool clearDepth) { @autoreleasepool { MetalTextureR* ctarget = target.cast(); [m_enc endEncoding]; if (clearColor && clearDepth) m_enc = [m_cmdBuf renderCommandEncoderWithDescriptor:ctarget->m_clearBothPassDesc]; else if (clearColor) m_enc = [m_cmdBuf renderCommandEncoderWithDescriptor:ctarget->m_clearColorPassDesc]; else if (clearDepth) m_enc = [m_cmdBuf renderCommandEncoderWithDescriptor:ctarget->m_clearDepthPassDesc]; else m_enc = [m_cmdBuf renderCommandEncoderWithDescriptor:ctarget->m_passDesc]; [m_enc setFrontFacingWinding:MTLWindingCounterClockwise]; if (ctarget == m_boundTarget.get()) { if (m_boundVp.width || m_boundVp.height) [m_enc setViewport:m_boundVp]; if (m_boundScissor.width || m_boundScissor.height) [m_enc setScissorRect:m_boundScissor]; } else m_boundTarget = target; } } void setRenderTarget(const ObjToken& target) { _setRenderTarget(target, false, false); } MTLViewport m_boundVp = {}; void setViewport(const SWindowRect& rect, float znear, float zfar) { m_boundVp = MTLViewport{double(rect.location[0]), double(rect.location[1]), double(rect.size[0]), double(rect.size[1]), 1.f - zfar, 1.f - znear}; [m_enc setViewport:m_boundVp]; } MTLScissorRect m_boundScissor = {}; void setScissor(const SWindowRect& rect) { if (m_boundTarget) { MetalTextureR* ctarget = m_boundTarget.cast(); SWindowRect intersectRect = rect.intersect(SWindowRect(0, 0, ctarget->m_width, ctarget->m_height)); m_boundScissor = MTLScissorRect{NSUInteger(intersectRect.location[0]), NSUInteger(ctarget->m_height - intersectRect.location[1] - intersectRect.size[1]), NSUInteger(intersectRect.size[0]), NSUInteger(intersectRect.size[1])}; [m_enc setScissorRect:m_boundScissor]; } } std::unordered_map> m_texResizes; void resizeRenderTexture(const ObjToken& tex, size_t width, size_t height) { MetalTextureR* ctex = tex.cast(); m_texResizes[ctex] = std::make_pair(width, height); } void schedulePostFrameHandler(std::function&& func) { func(); } void flushBufferUpdates() {} float m_clearColor[4] = {0.f,0.f,0.f,0.f}; void setClearColor(const float rgba[4]) { m_clearColor[0] = rgba[0]; m_clearColor[1] = rgba[1]; m_clearColor[2] = rgba[2]; m_clearColor[3] = rgba[3]; } void clearTarget(bool render=true, bool depth=true) { if (!m_boundTarget) return; _setRenderTarget(m_boundTarget, render, depth); } void draw(size_t start, size_t count) { m_boundData->m_pipeline.cast()->draw(*this, start, count); } void drawIndexed(size_t start, size_t count) { m_boundData->m_pipeline.cast()->drawIndexed(*this, start, count); } void drawInstances(size_t start, size_t count, size_t instCount) { m_boundData->m_pipeline.cast()->drawInstances(*this, start, count, instCount); } void drawInstancesIndexed(size_t start, size_t count, size_t instCount) { m_boundData->m_pipeline.cast()->drawInstancesIndexed(*this, start, count, instCount); } void _resolveBindTexture(MetalTextureR* tex, const SWindowRect& rect, bool tlOrigin, int bindIdx, bool color, bool depth) { if (tex->samples() > 1) { if (color && tex->m_colorBindTex[bindIdx]) [[m_cmdBuf renderCommandEncoderWithDescriptor:tex->m_blitColor[bindIdx]] endEncoding]; if (depth && tex->m_depthBindTex[bindIdx]) [[m_cmdBuf renderCommandEncoderWithDescriptor:tex->m_blitDepth[bindIdx]] endEncoding]; } else { SWindowRect intersectRect = rect.intersect(SWindowRect(0, 0, tex->m_width, tex->m_height)); NSUInteger y = tlOrigin ? intersectRect.location[1] : int(tex->m_height) - intersectRect.location[1] - intersectRect.size[1]; MTLOrigin origin = {NSUInteger(intersectRect.location[0]), y, 0}; id blitEnc = [m_cmdBuf blitCommandEncoder]; if (color && tex->m_colorBindTex[bindIdx]) { [blitEnc copyFromTexture:tex->m_colorTex sourceSlice:0 sourceLevel:0 sourceOrigin:origin sourceSize:MTLSizeMake(intersectRect.size[0], intersectRect.size[1], 1) toTexture:tex->m_colorBindTex[bindIdx] destinationSlice:0 destinationLevel:0 destinationOrigin:origin]; } if (depth && tex->m_depthBindTex[bindIdx]) { [blitEnc copyFromTexture:tex->m_depthTex sourceSlice:0 sourceLevel:0 sourceOrigin:origin sourceSize:MTLSizeMake(intersectRect.size[0], intersectRect.size[1], 1) toTexture:tex->m_depthBindTex[bindIdx] destinationSlice:0 destinationLevel:0 destinationOrigin:origin]; } [blitEnc endEncoding]; } } void resolveBindTexture(const ObjToken& texture, const SWindowRect& rect, bool tlOrigin, int bindIdx, bool color, bool depth, bool clearDepth) { MetalTextureR* tex = texture.cast(); @autoreleasepool { [m_enc endEncoding]; _resolveBindTexture(tex, rect, tlOrigin, bindIdx, color, depth); m_enc = [m_cmdBuf renderCommandEncoderWithDescriptor:clearDepth ? tex->m_clearDepthPassDesc : tex->m_passDesc]; [m_enc setFrontFacingWinding:MTLWindingCounterClockwise]; if (m_boundVp.width || m_boundVp.height) [m_enc setViewport:m_boundVp]; if (m_boundScissor.width || m_boundScissor.height) [m_enc setScissorRect:m_boundScissor]; } } ObjToken m_needsDisplay; void resolveDisplay(const ObjToken& source) { m_needsDisplay = source; } id m_tessFactorBuffer = nullptr; id ensureTessFactorBuffer(size_t patchCount) { size_t targetLength = sizeof(MTLQuadTessellationFactorsHalf) * patchCount; if (!m_tessFactorBuffer) { m_tessFactorBuffer = [m_ctx->m_dev newBufferWithLength:targetLength * 2 options:MTLResourceStorageModePrivate]; } else if (m_tessFactorBuffer.length < targetLength) { targetLength *= 2; id newBuf = [m_ctx->m_dev newBufferWithLength:targetLength options:MTLResourceStorageModePrivate]; id enc = [m_cmdBuf blitCommandEncoder]; [enc copyFromBuffer:m_tessFactorBuffer sourceOffset:0 toBuffer:newBuf destinationOffset:0 size:m_tessFactorBuffer.length]; [enc endEncoding]; m_tessFactorBuffer = newBuf; } return m_tessFactorBuffer; } void dispatchTessKernel(id computeState, size_t patchStart, size_t patchCount, uint32_t patchSize) { struct KernelPatchInfo { uint32_t numPatches; // total number of patches to process. // we need this because this value may // not be a multiple of threadgroup size. uint16_t numPatchesInThreadGroup; // number of patches processed by a // thread-group uint16_t numControlPointsPerPatch; } patchInfo = {uint32_t(patchCount), 32, uint16_t(patchSize)}; [m_enc endEncoding]; m_enc = nullptr; id tessFactorBuf = ensureTessFactorBuffer(patchStart + patchCount); id computeEnc = [m_cmdBuf computeCommandEncoder]; [computeEnc setComputePipelineState:computeState]; m_boundData->bindCompute(computeEnc, m_fillBuf); [computeEnc setStageInRegion:MTLRegionMake1D(patchStart, patchCount)]; [computeEnc setBytes:&patchInfo length:sizeof(patchInfo) atIndex:2]; [computeEnc setBuffer:tessFactorBuf offset:patchStart * sizeof(MTLQuadTessellationFactorsHalf) atIndex:3]; [computeEnc dispatchThreads:MTLSizeMake(patchCount, 1, 1) threadsPerThreadgroup:MTLSizeMake(32, 1, 1)]; [computeEnc endEncoding]; _setRenderTarget(m_boundTarget, false, false); m_boundData->bind(m_enc, m_fillBuf); [m_enc setFragmentSamplerStates:m_samplers withRange:NSMakeRange(0, 5)]; [m_enc setVertexSamplerStates:m_samplers withRange:NSMakeRange(0, 5)]; [m_enc setTessellationFactorBuffer:m_tessFactorBuffer offset:0 instanceStride:0]; } bool m_inProgress = false; std::unordered_map m_resolvePasses; std::unordered_map m_gammaPasses; void execute() { if (!m_running) return; @autoreleasepool { /* Update dynamic data here */ MetalDataFactoryImpl* gfxF = static_cast(m_parent->getDataFactory()); std::unique_lock datalk(gfxF->m_dataMutex); if (gfxF->m_dataHead) { for (BaseGraphicsData& d : *gfxF->m_dataHead) { if (d.m_DBufs) for (IGraphicsBufferD& b : *d.m_DBufs) static_cast&>(b).update(m_fillBuf); if (d.m_DTexs) for (ITextureD& t : *d.m_DTexs) static_cast(t).update(m_fillBuf); } } if (gfxF->m_poolHead) { for (BaseGraphicsPool& p : *gfxF->m_poolHead) { if (p.m_DBufs) for (IGraphicsBufferD& b : *p.m_DBufs) static_cast&>(b).update(m_fillBuf); } } datalk.unlock(); [m_enc endEncoding]; m_enc = nullptr; /* Abandon if in progress (renderer too slow) */ if (m_inProgress) { m_cmdBuf = [m_ctx->m_q commandBuffer]; return; } /* Perform texture resizes */ if (m_texResizes.size()) { for (const auto& resize : m_texResizes) resize.first->resize(m_ctx, resize.second.first, resize.second.second); m_texResizes.clear(); m_cmdBuf = [m_ctx->m_q commandBuffer]; return; } /* Wrap up and present if needed */ if (m_needsDisplay) { MetalContext::Window& w = m_ctx->m_windows[m_parentWindow]; { std::unique_lock lk(w.m_resizeLock); if (w.m_needsResize) { w.m_metalLayer.drawableSize = w.m_size; w.m_needsResize = NO; m_needsDisplay.reset(); return; } } id drawable = [w.m_metalLayer nextDrawable]; if (drawable) { MetalTextureR* src = m_needsDisplay.cast(); id dest = drawable.texture; if (src->m_colorTex.width == dest.width && src->m_colorTex.height == dest.height) { if (gfxF->m_gamma != 1.f) { SWindowRect rect(0, 0, src->m_width, src->m_height); _resolveBindTexture(src, rect, true, 0, true, false); uintptr_t key = uintptr_t(dest); auto passSearch = m_gammaPasses.find(key); if (passSearch == m_gammaPasses.end()) { MTLRenderPassDescriptor* desc = [MTLRenderPassDescriptor renderPassDescriptor]; desc.colorAttachments[0].texture = dest; desc.colorAttachments[0].loadAction = MTLLoadActionLoad; desc.colorAttachments[0].storeAction = MTLStoreActionStore; passSearch = m_gammaPasses.insert(std::make_pair(key, desc)).first; } id enc = [m_cmdBuf renderCommandEncoderWithDescriptor:passSearch->second]; MetalShaderDataBinding* gammaBinding = gfxF->m_gammaBinding.cast(); gammaBinding->m_texs[0].tex = m_needsDisplay.get(); gammaBinding->bind(enc, m_drawBuf); [enc setFragmentSamplerStates:m_samplers withRange:NSMakeRange(0, 5)]; [enc setVertexSamplerStates:m_samplers withRange:NSMakeRange(0, 5)]; [enc drawPrimitives:MTLPrimitiveTypeTriangleStrip vertexStart:0 vertexCount:4]; gammaBinding->m_texs[0].tex.reset(); [enc endEncoding]; } else { if (src->samples() > 1) { uintptr_t key = uintptr_t(src->m_colorTex) ^ uintptr_t(dest); auto passSearch = m_resolvePasses.find(key); if (passSearch == m_resolvePasses.end()) { MTLRenderPassDescriptor* desc = [MTLRenderPassDescriptor renderPassDescriptor]; desc.colorAttachments[0].texture = src->m_colorTex; desc.colorAttachments[0].loadAction = MTLLoadActionLoad; desc.colorAttachments[0].storeAction = MTLStoreActionMultisampleResolve; desc.colorAttachments[0].resolveTexture = dest; passSearch = m_resolvePasses.insert(std::make_pair(key, desc)).first; } [[m_cmdBuf renderCommandEncoderWithDescriptor:passSearch->second] endEncoding]; } else { id blitEnc = [m_cmdBuf blitCommandEncoder]; [blitEnc copyFromTexture:src->m_colorTex sourceSlice:0 sourceLevel:0 sourceOrigin:MTLOriginMake(0, 0, 0) sourceSize:MTLSizeMake(dest.width, dest.height, 1) toTexture:dest destinationSlice:0 destinationLevel:0 destinationOrigin:MTLOriginMake(0, 0, 0)]; [blitEnc endEncoding]; } } [m_cmdBuf presentDrawable:drawable]; } } m_needsDisplay.reset(); } m_drawBuf = m_fillBuf; m_fillBuf ^= 1; [m_cmdBuf addCompletedHandler:^(id buf) {m_inProgress = false;}]; m_inProgress = true; [m_cmdBuf commit]; m_cmdBuf = [m_ctx->m_q commandBuffer]; } } }; void MetalShaderPipeline::draw(MetalCommandQueue& q, size_t start, size_t count) { [q.m_enc drawPrimitives:m_drawPrim vertexStart:start + q.m_boundData->m_baseVert vertexCount:count]; } void MetalShaderPipeline::drawIndexed(MetalCommandQueue& q, size_t start, size_t count) { [q.m_enc drawIndexedPrimitives:m_drawPrim indexCount:count indexType:MTLIndexTypeUInt32 indexBuffer:GetBufferGPUResource(q.m_boundData->m_ibuf, q.m_fillBuf) indexBufferOffset:start*4 instanceCount:1 baseVertex:q.m_boundData->m_baseVert baseInstance:0]; } void MetalShaderPipeline::drawInstances(MetalCommandQueue& q, size_t start, size_t count, size_t instCount) { [q.m_enc drawPrimitives:m_drawPrim vertexStart:start + q.m_boundData->m_baseVert vertexCount:count instanceCount:instCount baseInstance:q.m_boundData->m_baseInst]; } void MetalShaderPipeline::drawInstancesIndexed(MetalCommandQueue& q, size_t start, size_t count, size_t instCount) { [q.m_enc drawIndexedPrimitives:m_drawPrim indexCount:count indexType:MTLIndexTypeUInt32 indexBuffer:GetBufferGPUResource(q.m_boundData->m_ibuf, q.m_fillBuf) indexBufferOffset:start*4 instanceCount:instCount baseVertex:q.m_boundData->m_baseVert baseInstance:q.m_boundData->m_baseInst]; } void MetalTessellationShaderPipeline::draw(MetalCommandQueue& q, size_t start, size_t count) { q.dispatchTessKernel(m_computeState, start, count, m_patchSize); [q.m_enc drawPatches:m_patchSize patchStart:start patchCount:count patchIndexBuffer:nullptr patchIndexBufferOffset:0 instanceCount:1 baseInstance:0]; } void MetalTessellationShaderPipeline::drawIndexed(MetalCommandQueue& q, size_t start, size_t count) { q.dispatchTessKernel(m_computeState, start, count, m_patchSize); [q.m_enc drawIndexedPatches:m_patchSize patchStart:0 patchCount:count patchIndexBuffer:nullptr patchIndexBufferOffset:0 controlPointIndexBuffer:GetBufferGPUResource(q.m_boundData->m_ibuf, q.m_fillBuf) controlPointIndexBufferOffset:start*4 instanceCount:1 baseInstance:0]; } void MetalTessellationShaderPipeline::drawInstances(MetalCommandQueue& q, size_t start, size_t count, size_t instCount) { q.dispatchTessKernel(m_computeState, start, count, m_patchSize); [q.m_enc drawPatches:m_patchSize patchStart:start patchCount:count patchIndexBuffer:nullptr patchIndexBufferOffset:0 instanceCount:instCount baseInstance:0]; } void MetalTessellationShaderPipeline::drawInstancesIndexed(MetalCommandQueue& q, size_t start, size_t count, size_t instCount) { q.dispatchTessKernel(m_computeState, start, count, m_patchSize); [q.m_enc drawIndexedPatches:m_patchSize patchStart:0 patchCount:count patchIndexBuffer:nullptr patchIndexBufferOffset:0 controlPointIndexBuffer:GetBufferGPUResource(q.m_boundData->m_ibuf, q.m_fillBuf) controlPointIndexBufferOffset:start*4 instanceCount:instCount baseInstance:0]; } MetalDataFactory::Context::Context(MetalDataFactory& parent __BooTraceArgs) : m_parent(parent), m_data(new BaseGraphicsData(static_cast(parent) __BooTraceArgsUse)) {} MetalDataFactory::Context::~Context() {} ObjToken MetalDataFactory::Context::newStaticBuffer(BufferUse use, const void* data, size_t stride, size_t count) { @autoreleasepool { MetalDataFactoryImpl& factory = static_cast(m_parent); return {new MetalGraphicsBufferS(m_data, use, factory.m_ctx, data, stride, count)}; } } ObjToken MetalDataFactory::Context::newDynamicBuffer(BufferUse use, size_t stride, size_t count) { @autoreleasepool { MetalDataFactoryImpl& factory = static_cast(m_parent); MetalCommandQueue* q = static_cast(factory.m_parent->getCommandQueue()); return {new MetalGraphicsBufferD(m_data, q, use, factory.m_ctx, stride, count)}; } } ObjToken MetalDataFactory::Context::newStaticTexture(size_t width, size_t height, size_t mips, TextureFormat fmt, TextureClampMode clampMode, const void* data, size_t sz) { @autoreleasepool { MetalDataFactoryImpl& factory = static_cast(m_parent); return {new MetalTextureS(m_data, factory.m_ctx, width, height, mips, fmt, data, sz)}; } } ObjToken MetalDataFactory::Context::newStaticArrayTexture(size_t width, size_t height, size_t layers, size_t mips, TextureFormat fmt, TextureClampMode clampMode, const void* data, size_t sz) { @autoreleasepool { MetalDataFactoryImpl& factory = static_cast(m_parent); return {new MetalTextureSA(m_data, factory.m_ctx, width, height, layers, mips, fmt, data, sz)}; } } ObjToken MetalDataFactory::Context::newDynamicTexture(size_t width, size_t height, TextureFormat fmt, TextureClampMode clampMode) { @autoreleasepool { MetalDataFactoryImpl& factory = static_cast(m_parent); MetalCommandQueue* q = static_cast(factory.m_parent->getCommandQueue()); return {new MetalTextureD(m_data, q, factory.m_ctx, width, height, fmt)}; } } ObjToken MetalDataFactory::Context::newRenderTexture(size_t width, size_t height, TextureClampMode clampMode, size_t colorBindCount, size_t depthBindCount) { @autoreleasepool { MetalDataFactoryImpl& factory = static_cast(m_parent); return {new MetalTextureR(m_data, factory.m_ctx, width, height, factory.m_ctx->m_sampleCount, colorBindCount, depthBindCount)}; } } ObjToken MetalDataFactory::Context::newVertexFormat(size_t elementCount, const VertexElementDescriptor* elements, size_t baseVert, size_t baseInst) { @autoreleasepool { return {new struct MetalVertexFormat(m_data, elementCount, elements)}; } } ObjToken MetalDataFactory::Context::newShaderPipeline(const char* vertSource, const char* fragSource, std::vector* vertBlobOut, std::vector* fragBlobOut, const ObjToken& vtxFmt, BlendFactor srcFac, BlendFactor dstFac, Primitive prim, ZTest depthTest, bool depthWrite, bool colorWrite, bool alphaWrite, CullMode culling, bool overwriteAlpha, bool depthAttachment) { @autoreleasepool { MetalDataFactoryImpl& factory = static_cast(m_parent); MetalShareableShader::Token vertShader = factory.PrepareShaderStage(vertSource, vertBlobOut, @"vmain"); MetalShareableShader::Token fragShader = factory.PrepareShaderStage(fragSource, fragBlobOut, @"fmain"); MetalShaderPipeline* ret = new MetalShaderPipeline(m_data, std::move(vertShader), std::move(fragShader)); ret->setup(factory.m_ctx, vtxFmt, depthAttachment ? factory.m_ctx->m_sampleCount : 1, srcFac, dstFac, prim, depthTest, depthWrite, colorWrite, alphaWrite, overwriteAlpha, culling, depthAttachment); return {ret}; } } ObjToken MetalDataFactory::Context::newTessellationShaderPipeline( const char* computeSource, const char* fragSource, const char* evaluationSource, std::vector* computeBlobOut, std::vector* fragBlobOut, std::vector* evaluationBlobOut, const ObjToken& vtxFmt, BlendFactor srcFac, BlendFactor dstFac, uint32_t patchSize, ZTest depthTest, bool depthWrite, bool colorWrite, bool alphaWrite, CullMode culling, bool overwriteAlpha, bool depthAttachment) { @autoreleasepool { MetalDataFactoryImpl& factory = static_cast(m_parent); if (!factory.m_hasTessellation) Log.report(logvisor::Fatal, "Device does not support tessellation"); MetalShareableShader::Token computeShader = factory.PrepareShaderStage(computeSource, computeBlobOut, @"cmain"); MetalShareableShader::Token fragShader = factory.PrepareShaderStage(fragSource, fragBlobOut, @"fmain"); MetalShareableShader::Token evaluationShader = factory.PrepareShaderStage(evaluationSource, evaluationBlobOut, @"emain"); MetalTessellationShaderPipeline* ret = new MetalTessellationShaderPipeline(m_data, std::move(computeShader), std::move(fragShader), std::move(evaluationShader), patchSize); ret->setup(factory.m_ctx, vtxFmt, depthAttachment ? factory.m_ctx->m_sampleCount : 1, srcFac, dstFac, Primitive::Patches, depthTest, depthWrite, colorWrite, alphaWrite, overwriteAlpha, culling, depthAttachment); return {ret}; } } ObjToken MetalDataFactory::Context::newShaderDataBinding(const ObjToken& pipeline, const ObjToken& vtxFormat, const ObjToken& vbo, const ObjToken& instVbo, const ObjToken& ibo, size_t ubufCount, const ObjToken* ubufs, const PipelineStage* ubufStages, const size_t* ubufOffs, const size_t* ubufSizes, size_t texCount, const ObjToken* texs, const int* texBindIdxs, const bool* depthBind, size_t baseVert, size_t baseInst) { @autoreleasepool { MetalDataFactoryImpl& factory = static_cast(m_parent); return {new MetalShaderDataBinding(m_data, factory.m_ctx, pipeline, vbo, instVbo, ibo, ubufCount, ubufs, ubufStages, ubufOffs, ubufSizes, texCount, texs, texBindIdxs, depthBind, baseVert, baseInst)}; } } void MetalDataFactoryImpl::commitTransaction(const FactoryCommitFunc& trans __BooTraceArgs) { MetalDataFactory::Context ctx(*this __BooTraceArgsUse); trans(ctx); } ObjToken MetalDataFactoryImpl::newPoolBuffer(BufferUse use, size_t stride, size_t count __BooTraceArgs) { ObjToken pool(new BaseGraphicsPool(*this __BooTraceArgsUse)); MetalCommandQueue* q = static_cast(m_parent->getCommandQueue()); return {new MetalGraphicsBufferD(pool, q, use, m_ctx, stride, count)}; } std::unique_ptr _NewMetalCommandQueue(MetalContext* ctx, IWindow* parentWindow, IGraphicsContext* parent) { return std::make_unique(ctx, parentWindow, parent); } std::unique_ptr _NewMetalDataFactory(IGraphicsContext* parent, MetalContext* ctx) { return std::make_unique(parent, ctx); } } #endif