#include "../mac/CocoaCommon.hpp" #if BOO_HAS_METAL #include "logvisor/logvisor.hpp" #include "boo/IApplication.hpp" #include "boo/graphicsdev/Metal.hpp" #include "boo/IGraphicsContext.hpp" #include "Common.hpp" #include #include #include #include "xxhash.h" #if !__has_feature(objc_arc) #error ARC Required #endif #define MAX_UNIFORM_COUNT 8 #define MAX_TEXTURE_COUNT 8 namespace boo { static logvisor::Module Log("boo::Metal"); struct MetalCommandQueue; class MetalDataFactoryImpl; struct MetalShareableShader : IShareableShader { id m_shader; MetalShareableShader(MetalDataFactoryImpl& fac, uint64_t srcKey, uint64_t binKey, id s) : IShareableShader(fac, srcKey, binKey), m_shader(s) {} }; class MetalDataFactoryImpl : public MetalDataFactory, public GraphicsDataFactoryHead { friend struct MetalCommandQueue; friend class MetalDataFactory::Context; IGraphicsContext* m_parent; std::unordered_map> m_sharedShaders; struct MetalContext* m_ctx; uint32_t m_sampleCount; public: std::unordered_map m_sourceToBinary; char m_libfile[MAXPATHLEN]; bool m_hasCompiler = false; MetalDataFactoryImpl(IGraphicsContext* parent, MetalContext* ctx, uint32_t sampleCount) : m_parent(parent), m_ctx(ctx), m_sampleCount(sampleCount) { snprintf(m_libfile, MAXPATHLEN, "%sboo_metal_shader.metallib", getenv("TMPDIR")); for (auto& arg : APP->getArgs()) if (arg == "--metal-compile") { m_hasCompiler = CheckForMetalCompiler(); break; } } ~MetalDataFactoryImpl() = default; Platform platform() const { return Platform::Metal; } const char* platformName() const { return "Metal"; } void commitTransaction(const std::function&); ObjToken newPoolBuffer(BufferUse use, size_t stride, size_t count); void _unregisterShareableShader(uint64_t srcKey, uint64_t binKey) { m_sharedShaders.erase(srcKey); } static bool CheckForMetalCompiler() { pid_t pid = fork(); if (!pid) { execlp("xcrun", "xcrun", "-sdk", "macosx", "metal", NULL); /* xcrun returns 72 if metal command not found; * emulate that if xcrun not found */ exit(72); } int status, ret; while ((ret = waitpid(pid, &status, 0)) < 0 && errno == EINTR) {} if (ret < 0) return false; return WEXITSTATUS(status) == 1; } uint64_t CompileLib(std::vector& blobOut, const char* source, uint64_t srcKey) { if (!m_hasCompiler) { /* Cache the source if there's no compiler */ size_t sourceLen = strlen(source); /* First byte unset to indicate source data */ blobOut.resize(sourceLen + 2); memcpy(&blobOut[1], source, sourceLen); } else { /* Cache the binary otherwise */ int compilerOut[2]; int compilerIn[2]; pipe(compilerOut); pipe(compilerIn); /* Pipe source write to compiler */ pid_t compilerPid = fork(); if (!compilerPid) { dup2(compilerIn[0], STDIN_FILENO); dup2(compilerOut[1], STDOUT_FILENO); close(compilerOut[0]); close(compilerOut[1]); close(compilerIn[0]); close(compilerIn[1]); execlp("xcrun", "xcrun", "-sdk", "macosx", "metal", "-o", "/dev/stdout", "-Wno-unused-variable", "-Wno-unused-const-variable", "-Wno-unused-function", "-x", "metal", "-", NULL); fprintf(stderr, "execlp fail %s\n", strerror(errno)); exit(1); } close(compilerIn[0]); close(compilerOut[1]); /* Pipe compiler to linker */ pid_t linkerPid = fork(); if (!linkerPid) { dup2(compilerOut[0], STDIN_FILENO); close(compilerOut[0]); close(compilerIn[1]); /* metallib doesn't like outputting to a pipe, so temp file will have to do */ execlp("xcrun", "xcrun", "-sdk", "macosx", "metallib", "-", "-o", m_libfile, NULL); fprintf(stderr, "execlp fail %s\n", strerror(errno)); exit(1); } close(compilerOut[0]); /* Stream in source */ const char* inPtr = source; size_t inRem = strlen(source); while (inRem) { ssize_t writeRes = write(compilerIn[1], inPtr, inRem); if (writeRes < 0) { fprintf(stderr, "write fail %s\n", strerror(errno)); break; } inPtr += writeRes; inRem -= writeRes; } close(compilerIn[1]); /* Wait for completion */ int compilerStat, linkerStat; if (waitpid(compilerPid, &compilerStat, 0) < 0 || waitpid(linkerPid, &linkerStat, 0) < 0) { fprintf(stderr, "waitpid fail %s\n", strerror(errno)); return 0; } if (WEXITSTATUS(compilerStat) || WEXITSTATUS(linkerStat)) return 0; /* Copy temp file into buffer with first byte set to indicate binary data */ FILE* fin = fopen(m_libfile, "rb"); fseek(fin, 0, SEEK_END); long libLen = ftell(fin); fseek(fin, 0, SEEK_SET); blobOut.resize(libLen + 1); blobOut[0] = 1; fread(&blobOut[1], 1, libLen, fin); fclose(fin); } XXH64_state_t hashState; XXH64_reset(&hashState, 0); XXH64_update(&hashState, blobOut.data(), blobOut.size()); uint64_t binKey = XXH64_digest(&hashState); m_sourceToBinary[srcKey] = binKey; return binKey; } uint64_t CompileLib(__strong id& libOut, const char* source, uint64_t srcKey, MTLCompileOptions* compOpts, NSError * _Nullable *err) { libOut = [m_ctx->m_dev newLibraryWithSource:@(source) options:compOpts error:err]; if (srcKey) { XXH64_state_t hashState; XXH64_reset(&hashState, 0); uint8_t zero = 0; XXH64_update(&hashState, &zero, 1); XXH64_update(&hashState, source, strlen(source) + 1); uint64_t binKey = XXH64_digest(&hashState); m_sourceToBinary[srcKey] = binKey; return binKey; } return 0; } }; #define MTL_STATIC MTLResourceCPUCacheModeWriteCombined|MTLResourceStorageModeManaged #define MTL_DYNAMIC MTLResourceCPUCacheModeWriteCombined|MTLResourceStorageModeManaged class MetalGraphicsBufferS : public GraphicsDataNode { friend class MetalDataFactory; friend struct MetalCommandQueue; MetalGraphicsBufferS(const ObjToken& parent, BufferUse use, MetalContext* ctx, const void* data, size_t stride, size_t count) : GraphicsDataNode(parent), m_stride(stride), m_count(count), m_sz(stride * count) { m_buf = [ctx->m_dev newBufferWithBytes:data length:m_sz options:MTL_STATIC]; } public: size_t m_stride; size_t m_count; size_t m_sz; id m_buf; ~MetalGraphicsBufferS() = default; }; template class MetalGraphicsBufferD : public GraphicsDataNode { friend class MetalDataFactory; friend class MetalDataFactoryImpl; friend struct MetalCommandQueue; MetalCommandQueue* m_q; std::unique_ptr m_cpuBuf; int m_validSlots = 0; MetalGraphicsBufferD(const ObjToken& parent, MetalCommandQueue* q, BufferUse use, MetalContext* ctx, size_t stride, size_t count) : GraphicsDataNode(parent), m_q(q), m_stride(stride), m_count(count), m_sz(stride * count) { m_cpuBuf.reset(new uint8_t[m_sz]); m_bufs[0] = [ctx->m_dev newBufferWithLength:m_sz options:MTL_DYNAMIC]; m_bufs[1] = [ctx->m_dev newBufferWithLength:m_sz options:MTL_DYNAMIC]; } public: size_t m_stride; size_t m_count; size_t m_sz; id m_bufs[2]; MetalGraphicsBufferD() = default; void update(int b) { int slot = 1 << b; if ((slot & m_validSlots) == 0) { id res = m_bufs[b]; memcpy(res.contents, m_cpuBuf.get(), m_sz); [res didModifyRange:NSMakeRange(0, m_sz)]; m_validSlots |= slot; } } void load(const void* data, size_t sz) { size_t bufSz = std::min(sz, m_sz); memcpy(m_cpuBuf.get(), data, bufSz); m_validSlots = 0; } void* map(size_t sz) { if (sz > m_sz) return nullptr; return m_cpuBuf.get(); } void unmap() { m_validSlots = 0; } }; class MetalTextureS : public GraphicsDataNode { friend class MetalDataFactory; MetalTextureS(const ObjToken& parent, MetalContext* ctx, size_t width, size_t height, size_t mips, TextureFormat fmt, const void* data, size_t sz) : GraphicsDataNode(parent) { MTLPixelFormat pfmt = MTLPixelFormatRGBA8Unorm; NSUInteger ppitchNum = 4; NSUInteger ppitchDenom = 1; switch (fmt) { case TextureFormat::I8: pfmt = MTLPixelFormatR8Unorm; ppitchNum = 1; break; case TextureFormat::DXT1: pfmt = MTLPixelFormatBC1_RGBA; ppitchNum = 1; ppitchDenom = 2; default: break; } @autoreleasepool { MTLTextureDescriptor* desc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:pfmt width:width height:height mipmapped:(mips>1)?YES:NO]; desc.usage = MTLTextureUsageShaderRead; desc.mipmapLevelCount = mips; m_tex = [ctx->m_dev newTextureWithDescriptor:desc]; const uint8_t* dataIt = reinterpret_cast(data); for (size_t i=0 ; i 1) width /= 2; if (height > 1) height /= 2; } } } public: id m_tex; ~MetalTextureS() = default; }; class MetalTextureSA : public GraphicsDataNode { friend class MetalDataFactory; MetalTextureSA(const ObjToken& parent, MetalContext* ctx, size_t width, size_t height, size_t layers, size_t mips, TextureFormat fmt, const void* data, size_t sz) : GraphicsDataNode(parent) { MTLPixelFormat pfmt = MTLPixelFormatRGBA8Unorm; NSUInteger ppitch = 4; switch (fmt) { case TextureFormat::I8: pfmt = MTLPixelFormatR8Unorm; ppitch = 1; break; default: break; } @autoreleasepool { MTLTextureDescriptor* desc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:pfmt width:width height:height mipmapped:(mips>1)?YES:NO]; desc.textureType = MTLTextureType2DArray; desc.arrayLength = layers; desc.mipmapLevelCount = mips; desc.usage = MTLTextureUsageShaderRead; m_tex = [ctx->m_dev newTextureWithDescriptor:desc]; const uint8_t* dataIt = reinterpret_cast(data); for (size_t i=0 ; i 1) width /= 2; if (height > 1) height /= 2; } } } public: id m_tex; ~MetalTextureSA() = default; }; class MetalTextureD : public GraphicsDataNode { friend class MetalDataFactory; friend struct MetalCommandQueue; MetalCommandQueue* m_q; size_t m_width = 0; size_t m_height = 0; std::unique_ptr m_cpuBuf; size_t m_cpuSz; size_t m_pxPitch; int m_validSlots = 0; MetalTextureD(const ObjToken& parent, MetalCommandQueue* q, MetalContext* ctx, size_t width, size_t height, TextureFormat fmt) : GraphicsDataNode(parent), m_q(q), m_width(width), m_height(height) { MTLPixelFormat format; switch (fmt) { case TextureFormat::RGBA8: format = MTLPixelFormatRGBA8Unorm; m_pxPitch = 4; break; case TextureFormat::I8: format = MTLPixelFormatR8Unorm; m_pxPitch = 1; break; default: Log.report(logvisor::Fatal, "unsupported tex format"); } m_cpuSz = width * height * m_pxPitch; m_cpuBuf.reset(new uint8_t[m_cpuSz]); @autoreleasepool { MTLTextureDescriptor* desc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:format width:width height:height mipmapped:NO]; desc.usage = MTLTextureUsageShaderRead; m_texs[0] = [ctx->m_dev newTextureWithDescriptor:desc]; m_texs[1] = [ctx->m_dev newTextureWithDescriptor:desc]; } } public: id m_texs[2]; ~MetalTextureD() = default; void update(int b) { int slot = 1 << b; if ((slot & m_validSlots) == 0) { id res = m_texs[b]; [res replaceRegion:MTLRegionMake2D(0, 0, m_width, m_height) mipmapLevel:0 withBytes:m_cpuBuf.get() bytesPerRow:m_width*m_pxPitch]; m_validSlots |= slot; } } void load(const void* data, size_t sz) { size_t bufSz = std::min(sz, m_cpuSz); memcpy(m_cpuBuf.get(), data, bufSz); m_validSlots = 0; } void* map(size_t sz) { if (sz > m_cpuSz) return nullptr; return m_cpuBuf.get(); } void unmap() { m_validSlots = 0; } }; #define MAX_BIND_TEXS 4 class MetalTextureR : public GraphicsDataNode { friend class MetalDataFactory; friend struct MetalCommandQueue; size_t m_width = 0; size_t m_height = 0; size_t m_samples = 0; size_t m_colorBindCount; size_t m_depthBindCount; void Setup(MetalContext* ctx) { if (m_colorBindCount > MAX_BIND_TEXS) Log.report(logvisor::Fatal, "too many color bindings for render texture"); if (m_depthBindCount > MAX_BIND_TEXS) Log.report(logvisor::Fatal, "too many depth bindings for render texture"); @autoreleasepool { MTLTextureDescriptor* desc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatBGRA8Unorm width:m_width height:m_height mipmapped:NO]; desc.storageMode = MTLStorageModePrivate; if (m_samples > 1) { desc.textureType = MTLTextureType2DMultisample; desc.sampleCount = m_samples; desc.usage = MTLTextureUsageRenderTarget; m_colorTex = [ctx->m_dev newTextureWithDescriptor:desc]; desc.pixelFormat = MTLPixelFormatDepth32Float; m_depthTex = [ctx->m_dev newTextureWithDescriptor:desc]; } else { desc.textureType = MTLTextureType2D; desc.sampleCount = 1; desc.usage = MTLTextureUsageRenderTarget; m_colorTex = [ctx->m_dev newTextureWithDescriptor:desc]; desc.pixelFormat = MTLPixelFormatDepth32Float; m_depthTex = [ctx->m_dev newTextureWithDescriptor:desc]; } desc.textureType = MTLTextureType2D; desc.sampleCount = 1; desc.usage = MTLTextureUsageShaderRead; if (m_colorBindCount) { desc.pixelFormat = MTLPixelFormatBGRA8Unorm; for (int i=0 ; im_dev newTextureWithDescriptor:desc]; } if (m_depthBindCount) { desc.pixelFormat = MTLPixelFormatDepth32Float; for (int i=0 ; im_dev newTextureWithDescriptor:desc]; } { m_passDesc = [MTLRenderPassDescriptor renderPassDescriptor]; m_passDesc.colorAttachments[0].texture = m_colorTex; m_passDesc.colorAttachments[0].loadAction = MTLLoadActionLoad; m_passDesc.colorAttachments[0].storeAction = MTLStoreActionStore; m_passDesc.depthAttachment.texture = m_depthTex; m_passDesc.depthAttachment.loadAction = MTLLoadActionLoad; m_passDesc.depthAttachment.storeAction = MTLStoreActionStore; m_passDesc.depthAttachment.clearDepth = 0.f; } { m_clearDepthPassDesc = [MTLRenderPassDescriptor renderPassDescriptor]; m_clearDepthPassDesc.colorAttachments[0].texture = m_colorTex; m_clearDepthPassDesc.colorAttachments[0].loadAction = MTLLoadActionLoad; m_clearDepthPassDesc.colorAttachments[0].storeAction = MTLStoreActionStore; m_clearDepthPassDesc.depthAttachment.texture = m_depthTex; m_clearDepthPassDesc.depthAttachment.loadAction = MTLLoadActionClear; m_clearDepthPassDesc.depthAttachment.storeAction = MTLStoreActionStore; m_clearDepthPassDesc.depthAttachment.clearDepth = 0.f; } { m_clearColorPassDesc = [MTLRenderPassDescriptor renderPassDescriptor]; m_clearColorPassDesc.colorAttachments[0].texture = m_colorTex; m_clearColorPassDesc.colorAttachments[0].loadAction = MTLLoadActionClear; m_clearColorPassDesc.colorAttachments[0].storeAction = MTLStoreActionStore; m_clearDepthPassDesc.colorAttachments[0].clearColor = MTLClearColorMake(0.0, 0.0, 0.0, 0.0); m_clearColorPassDesc.depthAttachment.texture = m_depthTex; m_clearColorPassDesc.depthAttachment.loadAction = MTLLoadActionLoad; m_clearColorPassDesc.depthAttachment.storeAction = MTLStoreActionStore; m_clearColorPassDesc.depthAttachment.clearDepth = 0.f; } { m_clearBothPassDesc = [MTLRenderPassDescriptor renderPassDescriptor]; m_clearBothPassDesc.colorAttachments[0].texture = m_colorTex; m_clearBothPassDesc.colorAttachments[0].loadAction = MTLLoadActionClear; m_clearBothPassDesc.colorAttachments[0].storeAction = MTLStoreActionStore; m_clearBothPassDesc.colorAttachments[0].clearColor = MTLClearColorMake(0.0, 0.0, 0.0, 0.0); m_clearBothPassDesc.depthAttachment.texture = m_depthTex; m_clearBothPassDesc.depthAttachment.loadAction = MTLLoadActionClear; m_clearBothPassDesc.depthAttachment.storeAction = MTLStoreActionStore; m_clearBothPassDesc.depthAttachment.clearDepth = 0.f; } } } MetalTextureR(const ObjToken& parent, MetalContext* ctx, size_t width, size_t height, size_t samples, size_t colorBindCount, size_t depthBindCount) : GraphicsDataNode(parent), m_width(width), m_height(height), m_samples(samples), m_colorBindCount(colorBindCount), m_depthBindCount(depthBindCount) { if (samples == 0) m_samples = 1; Setup(ctx); } public: size_t samples() const {return m_samples;} id m_colorTex; id m_depthTex; id m_colorBindTex[MAX_BIND_TEXS] = {}; id m_depthBindTex[MAX_BIND_TEXS] = {}; MTLRenderPassDescriptor* m_passDesc; MTLRenderPassDescriptor* m_clearDepthPassDesc; MTLRenderPassDescriptor* m_clearColorPassDesc; MTLRenderPassDescriptor* m_clearBothPassDesc; ~MetalTextureR() = default; void resize(MetalContext* ctx, size_t width, size_t height) { if (width < 1) width = 1; if (height < 1) height = 1; m_width = width; m_height = height; Setup(ctx); } }; static const size_t SEMANTIC_SIZE_TABLE[] = { 0, 12, 16, 12, 16, 16, 4, 8, 16, 16, 16 }; static const MTLVertexFormat SEMANTIC_TYPE_TABLE[] = { MTLVertexFormatInvalid, MTLVertexFormatFloat3, MTLVertexFormatFloat4, MTLVertexFormatFloat3, MTLVertexFormatFloat4, MTLVertexFormatFloat4, MTLVertexFormatUChar4Normalized, MTLVertexFormatFloat2, MTLVertexFormatFloat4, MTLVertexFormatFloat4, MTLVertexFormatFloat4 }; struct MetalVertexFormat : GraphicsDataNode { size_t m_elementCount; MTLVertexDescriptor* m_vdesc; size_t m_stride = 0; size_t m_instStride = 0; MetalVertexFormat(const ObjToken& parent, size_t elementCount, const VertexElementDescriptor* elements) : GraphicsDataNode(parent), m_elementCount(elementCount) { for (size_t i=0 ; isemantic & VertexSemantic::SemanticMask); if ((elemin->semantic & VertexSemantic::Instanced) != VertexSemantic::None) m_instStride += SEMANTIC_SIZE_TABLE[semantic]; else m_stride += SEMANTIC_SIZE_TABLE[semantic]; } m_vdesc = [MTLVertexDescriptor vertexDescriptor]; MTLVertexBufferLayoutDescriptor* layoutDesc = m_vdesc.layouts[0]; layoutDesc.stride = m_stride; layoutDesc.stepFunction = MTLVertexStepFunctionPerVertex; layoutDesc.stepRate = 1; layoutDesc = m_vdesc.layouts[1]; layoutDesc.stride = m_instStride; layoutDesc.stepFunction = MTLVertexStepFunctionPerInstance; layoutDesc.stepRate = 1; size_t offset = 0; size_t instOffset = 0; for (size_t i=0 ; isemantic & VertexSemantic::SemanticMask); if ((elemin->semantic & VertexSemantic::Instanced) != VertexSemantic::None) { attrDesc.offset = instOffset; attrDesc.bufferIndex = 1; instOffset += SEMANTIC_SIZE_TABLE[semantic]; } else { attrDesc.offset = offset; attrDesc.bufferIndex = 0; offset += SEMANTIC_SIZE_TABLE[semantic]; } attrDesc.format = SEMANTIC_TYPE_TABLE[semantic]; } } }; static const MTLBlendFactor BLEND_FACTOR_TABLE[] = { MTLBlendFactorZero, MTLBlendFactorOne, MTLBlendFactorSourceColor, MTLBlendFactorOneMinusSourceColor, MTLBlendFactorDestinationColor, MTLBlendFactorOneMinusDestinationColor, MTLBlendFactorSourceAlpha, MTLBlendFactorOneMinusSourceAlpha, MTLBlendFactorDestinationAlpha, MTLBlendFactorOneMinusDestinationAlpha, #if __MAC_OS_X_VERSION_MAX_ALLOWED >= 101200 MTLBlendFactorSource1Color, MTLBlendFactorOneMinusSource1Color, #else MTLBlendFactorSourceColor, MTLBlendFactorOneMinusSourceColor, #endif }; static const MTLPrimitiveType PRIMITIVE_TABLE[] = { MTLPrimitiveTypeTriangle, MTLPrimitiveTypeTriangleStrip }; #define COLOR_WRITE_MASK (MTLColorWriteMaskRed | MTLColorWriteMaskGreen | MTLColorWriteMaskBlue) class MetalShaderPipeline : public GraphicsDataNode { friend class MetalDataFactory; friend struct MetalCommandQueue; friend struct MetalShaderDataBinding; MTLCullMode m_cullMode = MTLCullModeNone; MTLPrimitiveType m_drawPrim; MetalShareableShader::Token m_vert; MetalShareableShader::Token m_frag; MetalShaderPipeline(const ObjToken& parent, MetalContext* ctx, MetalShareableShader::Token&& vert, MetalShareableShader::Token&& frag, const ObjToken& vtxFmt, NSUInteger targetSamples, BlendFactor srcFac, BlendFactor dstFac, Primitive prim, ZTest depthTest, bool depthWrite, bool colorWrite, bool alphaWrite, CullMode culling) : GraphicsDataNode(parent), m_drawPrim(PRIMITIVE_TABLE[int(prim)]), m_vert(std::move(vert)), m_frag(std::move(frag)) { switch (culling) { case CullMode::None: default: m_cullMode = MTLCullModeNone; break; case CullMode::Backface: m_cullMode = MTLCullModeBack; break; case CullMode::Frontface: m_cullMode = MTLCullModeFront; break; } MTLRenderPipelineDescriptor* desc = [MTLRenderPipelineDescriptor new]; desc.vertexFunction = m_vert.get().m_shader; desc.fragmentFunction = m_frag.get().m_shader; desc.vertexDescriptor = vtxFmt.cast()->m_vdesc; desc.sampleCount = targetSamples; desc.colorAttachments[0].pixelFormat = MTLPixelFormatBGRA8Unorm; desc.colorAttachments[0].writeMask = (colorWrite ? COLOR_WRITE_MASK : 0) | (alphaWrite ? MTLColorWriteMaskAlpha : 0); desc.colorAttachments[0].blendingEnabled = dstFac != BlendFactor::Zero; if (srcFac == BlendFactor::Subtract || dstFac == BlendFactor::Subtract) { desc.colorAttachments[0].sourceRGBBlendFactor = MTLBlendFactorDestinationColor; desc.colorAttachments[0].destinationRGBBlendFactor = MTLBlendFactorSourceColor; desc.colorAttachments[0].rgbBlendOperation = MTLBlendOperationSubtract; } else { desc.colorAttachments[0].sourceRGBBlendFactor = BLEND_FACTOR_TABLE[int(srcFac)]; desc.colorAttachments[0].destinationRGBBlendFactor = BLEND_FACTOR_TABLE[int(dstFac)]; desc.colorAttachments[0].rgbBlendOperation = MTLBlendOperationAdd; } desc.colorAttachments[0].sourceAlphaBlendFactor = MTLBlendFactorOne; desc.colorAttachments[0].destinationAlphaBlendFactor = MTLBlendFactorZero; desc.depthAttachmentPixelFormat = MTLPixelFormatDepth32Float; desc.inputPrimitiveTopology = MTLPrimitiveTopologyClassTriangle; NSError* err = nullptr; m_state = [ctx->m_dev newRenderPipelineStateWithDescriptor:desc error:&err]; if (err) Log.report(logvisor::Fatal, "error making shader pipeline: %s", [[err localizedDescription] UTF8String]); MTLDepthStencilDescriptor* dsDesc = [MTLDepthStencilDescriptor new]; switch (depthTest) { case ZTest::None: default: dsDesc.depthCompareFunction = MTLCompareFunctionAlways; break; case ZTest::LEqual: dsDesc.depthCompareFunction = MTLCompareFunctionGreaterEqual; break; case ZTest::Greater: dsDesc.depthCompareFunction = MTLCompareFunctionLess; break; case ZTest::GEqual: dsDesc.depthCompareFunction = MTLCompareFunctionLessEqual; break; case ZTest::Equal: dsDesc.depthCompareFunction = MTLCompareFunctionEqual; break; } dsDesc.depthWriteEnabled = depthWrite; m_dsState = [ctx->m_dev newDepthStencilStateWithDescriptor:dsDesc]; } public: id m_state; id m_dsState; ~MetalShaderPipeline() = default; MetalShaderPipeline& operator=(const MetalShaderPipeline&) = delete; MetalShaderPipeline(const MetalShaderPipeline&) = delete; void bind(id enc) { [enc setRenderPipelineState:m_state]; [enc setDepthStencilState:m_dsState]; [enc setCullMode:m_cullMode]; } }; static id GetBufferGPUResource(const ObjToken& buf, int idx) { if (buf->dynamic()) { const MetalGraphicsBufferD* cbuf = buf.cast>(); return cbuf->m_bufs[idx]; } else { const MetalGraphicsBufferS* cbuf = buf.cast(); return cbuf->m_buf; } } static id GetBufferGPUResource(const ObjToken& buf, int idx, size_t& strideOut) { if (buf->dynamic()) { const MetalGraphicsBufferD* cbuf = buf.cast>(); strideOut = cbuf->m_stride; return cbuf->m_bufs[idx]; } else { const MetalGraphicsBufferS* cbuf = buf.cast(); strideOut = cbuf->m_stride; return cbuf->m_buf; } } static id GetTextureGPUResource(const ObjToken& tex, int idx, int bindIdx, bool depth) { switch (tex->type()) { case TextureType::Dynamic: { const MetalTextureD* ctex = tex.cast(); return ctex->m_texs[idx]; } case TextureType::Static: { const MetalTextureS* ctex = tex.cast(); return ctex->m_tex; } case TextureType::StaticArray: { const MetalTextureSA* ctex = tex.cast(); return ctex->m_tex; } case TextureType::Render: { const MetalTextureR* ctex = tex.cast(); return depth ? ctex->m_depthBindTex[bindIdx] : ctex->m_colorBindTex[bindIdx]; } default: break; } return nullptr; } struct MetalShaderDataBinding : GraphicsDataNode { ObjToken m_pipeline; ObjToken m_vbuf; ObjToken m_instVbo; ObjToken m_ibuf; std::vector> m_ubufs; std::vector m_ubufOffs; std::vector m_fubufs; struct BoundTex { ObjToken tex; int idx; bool depth; }; std::vector m_texs; size_t m_baseVert; size_t m_baseInst; MetalShaderDataBinding(const ObjToken& d, MetalContext* ctx, const ObjToken& pipeline, const ObjToken& vbuf, const ObjToken& instVbo, const ObjToken& ibuf, size_t ubufCount, const ObjToken* ubufs, const PipelineStage* ubufStages, const size_t* ubufOffs, const size_t* ubufSizes, size_t texCount, const ObjToken* texs, const int* texBindIdxs, const bool* depthBind, size_t baseVert, size_t baseInst) : GraphicsDataNode(d), m_pipeline(pipeline), m_vbuf(vbuf), m_instVbo(instVbo), m_ibuf(ibuf), m_baseVert(baseVert), m_baseInst(baseInst) { if (ubufCount && ubufStages) { m_fubufs.reserve(ubufCount); for (size_t i=0 ; i enc, int b) { m_pipeline.cast()->bind(enc); size_t stride; if (m_vbuf) { id buf = GetBufferGPUResource(m_vbuf, b, stride); [enc setVertexBuffer:buf offset:stride * m_baseVert atIndex:0]; } if (m_instVbo) { id buf = GetBufferGPUResource(m_instVbo, b, stride); [enc setVertexBuffer:buf offset:stride * m_baseInst atIndex:1]; } if (m_ubufOffs.size()) for (size_t i=0 ; i m_cmdBuf; id m_enc; bool m_running = true; int m_fillBuf = 0; int m_drawBuf = 0; MetalCommandQueue(MetalContext* ctx, IWindow* parentWindow, IGraphicsContext* parent) : m_ctx(ctx), m_parentWindow(parentWindow), m_parent(parent) { @autoreleasepool { m_cmdBuf = [ctx->m_q commandBuffer]; } } void stopRenderer() { m_running = false; if (m_inProgress && m_cmdBuf.status != MTLCommandBufferStatusNotEnqueued) [m_cmdBuf waitUntilCompleted]; } ~MetalCommandQueue() { if (m_running) stopRenderer(); } MetalShaderDataBinding* m_boundData = nullptr; MTLPrimitiveType m_currentPrimitive = MTLPrimitiveTypeTriangle; void setShaderDataBinding(const ObjToken& binding) { @autoreleasepool { MetalShaderDataBinding* cbind = binding.cast(); cbind->bind(m_enc, m_fillBuf); m_boundData = cbind; m_currentPrimitive = cbind->m_pipeline.cast()->m_drawPrim; } } ObjToken m_boundTarget; void _setRenderTarget(const ObjToken& target, bool clearColor, bool clearDepth) { @autoreleasepool { MetalTextureR* ctarget = target.cast(); [m_enc endEncoding]; if (clearColor && clearDepth) m_enc = [m_cmdBuf renderCommandEncoderWithDescriptor:ctarget->m_clearBothPassDesc]; else if (clearColor) m_enc = [m_cmdBuf renderCommandEncoderWithDescriptor:ctarget->m_clearColorPassDesc]; else if (clearDepth) m_enc = [m_cmdBuf renderCommandEncoderWithDescriptor:ctarget->m_clearDepthPassDesc]; else m_enc = [m_cmdBuf renderCommandEncoderWithDescriptor:ctarget->m_passDesc]; [m_enc setFrontFacingWinding:MTLWindingCounterClockwise]; if (ctarget == m_boundTarget.get()) { if (m_boundVp.width || m_boundVp.height) [m_enc setViewport:m_boundVp]; if (m_boundScissor.width || m_boundScissor.height) [m_enc setScissorRect:m_boundScissor]; } else m_boundTarget = target; } } void setRenderTarget(const ObjToken& target) { _setRenderTarget(target, false, false); } MTLViewport m_boundVp = {}; void setViewport(const SWindowRect& rect, float znear, float zfar) { m_boundVp = MTLViewport{double(rect.location[0]), double(rect.location[1]), double(rect.size[0]), double(rect.size[1]), 1.f - zfar, 1.f - znear}; [m_enc setViewport:m_boundVp]; } MTLScissorRect m_boundScissor = {}; void setScissor(const SWindowRect& rect) { if (m_boundTarget) { MetalTextureR* ctarget = m_boundTarget.cast(); SWindowRect intersectRect = rect.intersect(SWindowRect(0, 0, ctarget->m_width, ctarget->m_height)); m_boundScissor = MTLScissorRect{NSUInteger(intersectRect.location[0]), NSUInteger(ctarget->m_height - intersectRect.location[1] - intersectRect.size[1]), NSUInteger(intersectRect.size[0]), NSUInteger(intersectRect.size[1])}; [m_enc setScissorRect:m_boundScissor]; } } std::unordered_map> m_texResizes; void resizeRenderTexture(const ObjToken& tex, size_t width, size_t height) { MetalTextureR* ctex = tex.cast(); m_texResizes[ctex] = std::make_pair(width, height); } void schedulePostFrameHandler(std::function&& func) { func(); } void flushBufferUpdates() {} float m_clearColor[4] = {0.f,0.f,0.f,0.f}; void setClearColor(const float rgba[4]) { m_clearColor[0] = rgba[0]; m_clearColor[1] = rgba[1]; m_clearColor[2] = rgba[2]; m_clearColor[3] = rgba[3]; } void clearTarget(bool render=true, bool depth=true) { if (!m_boundTarget) return; _setRenderTarget(m_boundTarget, render, depth); } void draw(size_t start, size_t count) { [m_enc drawPrimitives:m_currentPrimitive vertexStart:start vertexCount:count]; } void drawIndexed(size_t start, size_t count) { [m_enc drawIndexedPrimitives:m_currentPrimitive indexCount:count indexType:MTLIndexTypeUInt32 indexBuffer:GetBufferGPUResource(m_boundData->m_ibuf, m_fillBuf) indexBufferOffset:start*4]; } void drawInstances(size_t start, size_t count, size_t instCount) { [m_enc drawPrimitives:m_currentPrimitive vertexStart:start vertexCount:count instanceCount:instCount]; } void drawInstancesIndexed(size_t start, size_t count, size_t instCount) { [m_enc drawIndexedPrimitives:m_currentPrimitive indexCount:count indexType:MTLIndexTypeUInt32 indexBuffer:GetBufferGPUResource(m_boundData->m_ibuf, m_fillBuf) indexBufferOffset:start*4 instanceCount:instCount]; } void resolveBindTexture(const ObjToken& texture, const SWindowRect& rect, bool tlOrigin, int bindIdx, bool color, bool depth) { MetalTextureR* tex = texture.cast(); @autoreleasepool { [m_enc endEncoding]; SWindowRect intersectRect = rect.intersect(SWindowRect(0, 0, tex->m_width, tex->m_height)); NSUInteger y = tlOrigin ? intersectRect.location[1] : int(tex->m_height) - intersectRect.location[1] - intersectRect.size[1]; MTLOrigin origin = {NSUInteger(intersectRect.location[0]), y, 0}; id blitEnc = [m_cmdBuf blitCommandEncoder]; if (color && tex->m_colorBindTex[bindIdx]) { [blitEnc copyFromTexture:tex->m_colorTex sourceSlice:0 sourceLevel:0 sourceOrigin:origin sourceSize:MTLSizeMake(intersectRect.size[0], intersectRect.size[1], 1) toTexture:tex->m_colorBindTex[bindIdx] destinationSlice:0 destinationLevel:0 destinationOrigin:origin]; } if (depth && tex->m_depthBindTex[bindIdx]) { [blitEnc copyFromTexture:tex->m_depthTex sourceSlice:0 sourceLevel:0 sourceOrigin:origin sourceSize:MTLSizeMake(intersectRect.size[0], intersectRect.size[1], 1) toTexture:tex->m_depthBindTex[bindIdx] destinationSlice:0 destinationLevel:0 destinationOrigin:origin]; } [blitEnc endEncoding]; m_enc = [m_cmdBuf renderCommandEncoderWithDescriptor:tex->m_passDesc]; [m_enc setFrontFacingWinding:MTLWindingCounterClockwise]; if (m_boundVp.width || m_boundVp.height) [m_enc setViewport:m_boundVp]; if (m_boundScissor.width || m_boundScissor.height) [m_enc setScissorRect:m_boundScissor]; } } ObjToken m_needsDisplay; void resolveDisplay(const ObjToken& source) { m_needsDisplay = source; } bool m_inProgress = false; void execute() { if (!m_running) return; @autoreleasepool { /* Update dynamic data here */ MetalDataFactoryImpl* gfxF = static_cast(m_parent->getDataFactory()); std::unique_lock datalk(gfxF->m_dataMutex); if (gfxF->m_dataHead) { for (BaseGraphicsData& d : *gfxF->m_dataHead) { if (d.m_DBufs) for (IGraphicsBufferD& b : *d.m_DBufs) static_cast&>(b).update(m_fillBuf); if (d.m_DTexs) for (ITextureD& t : *d.m_DTexs) static_cast(t).update(m_fillBuf); } } if (gfxF->m_poolHead) { for (BaseGraphicsPool& p : *gfxF->m_poolHead) { if (p.m_DBufs) for (IGraphicsBufferD& b : *p.m_DBufs) static_cast&>(b).update(m_fillBuf); } } datalk.unlock(); [m_enc endEncoding]; m_enc = nullptr; /* Abandon if in progress (renderer too slow) */ if (m_inProgress) { m_cmdBuf = [m_ctx->m_q commandBuffer]; return; } /* Perform texture resizes */ if (m_texResizes.size()) { for (const auto& resize : m_texResizes) resize.first->resize(m_ctx, resize.second.first, resize.second.second); m_texResizes.clear(); m_cmdBuf = [m_ctx->m_q commandBuffer]; return; } /* Wrap up and present if needed */ if (m_needsDisplay) { MetalContext::Window& w = m_ctx->m_windows[m_parentWindow]; { std::unique_lock lk(w.m_resizeLock); if (w.m_needsResize) { w.m_metalLayer.drawableSize = w.m_size; w.m_needsResize = NO; m_needsDisplay.reset(); return; } } id drawable = [w.m_metalLayer nextDrawable]; if (drawable) { MetalTextureR* src = m_needsDisplay.cast(); id dest = drawable.texture; if (src->m_colorTex.width == dest.width && src->m_colorTex.height == dest.height) { id blitEnc = [m_cmdBuf blitCommandEncoder]; [blitEnc copyFromTexture:src->m_colorTex sourceSlice:0 sourceLevel:0 sourceOrigin:MTLOriginMake(0, 0, 0) sourceSize:MTLSizeMake(dest.width, dest.height, 1) toTexture:dest destinationSlice:0 destinationLevel:0 destinationOrigin:MTLOriginMake(0, 0, 0)]; [blitEnc endEncoding]; [m_cmdBuf presentDrawable:drawable]; } } m_needsDisplay.reset(); } m_drawBuf = m_fillBuf; m_fillBuf ^= 1; [m_cmdBuf addCompletedHandler:^(id buf) {m_inProgress = false;}]; m_inProgress = true; [m_cmdBuf commit]; m_cmdBuf = [m_ctx->m_q commandBuffer]; } } }; MetalDataFactory::Context::Context(MetalDataFactory& parent) : m_parent(parent), m_data(new BaseGraphicsData(static_cast(parent))) {} MetalDataFactory::Context::~Context() {} ObjToken MetalDataFactory::Context::newStaticBuffer(BufferUse use, const void* data, size_t stride, size_t count) { @autoreleasepool { MetalDataFactoryImpl& factory = static_cast(m_parent); return {new MetalGraphicsBufferS(m_data, use, factory.m_ctx, data, stride, count)}; } } ObjToken MetalDataFactory::Context::newDynamicBuffer(BufferUse use, size_t stride, size_t count) { @autoreleasepool { MetalDataFactoryImpl& factory = static_cast(m_parent); MetalCommandQueue* q = static_cast(factory.m_parent->getCommandQueue()); return {new MetalGraphicsBufferD(m_data, q, use, factory.m_ctx, stride, count)}; } } ObjToken MetalDataFactory::Context::newStaticTexture(size_t width, size_t height, size_t mips, TextureFormat fmt, TextureClampMode clampMode, const void* data, size_t sz) { @autoreleasepool { MetalDataFactoryImpl& factory = static_cast(m_parent); return {new MetalTextureS(m_data, factory.m_ctx, width, height, mips, fmt, data, sz)}; } } ObjToken MetalDataFactory::Context::newStaticArrayTexture(size_t width, size_t height, size_t layers, size_t mips, TextureFormat fmt, TextureClampMode clampMode, const void* data, size_t sz) { @autoreleasepool { MetalDataFactoryImpl& factory = static_cast(m_parent); return {new MetalTextureSA(m_data, factory.m_ctx, width, height, layers, mips, fmt, data, sz)}; } } ObjToken MetalDataFactory::Context::newDynamicTexture(size_t width, size_t height, TextureFormat fmt, TextureClampMode clampMode) { @autoreleasepool { MetalDataFactoryImpl& factory = static_cast(m_parent); MetalCommandQueue* q = static_cast(factory.m_parent->getCommandQueue()); return {new MetalTextureD(m_data, q, factory.m_ctx, width, height, fmt)}; } } ObjToken MetalDataFactory::Context::newRenderTexture(size_t width, size_t height, TextureClampMode clampMode, size_t colorBindCount, size_t depthBindCount) { @autoreleasepool { MetalDataFactoryImpl& factory = static_cast(m_parent); return {new MetalTextureR(m_data, factory.m_ctx, width, height, factory.m_sampleCount, colorBindCount, depthBindCount)}; } } ObjToken MetalDataFactory::Context::newVertexFormat(size_t elementCount, const VertexElementDescriptor* elements, size_t baseVert, size_t baseInst) { @autoreleasepool { return {new struct MetalVertexFormat(m_data, elementCount, elements)}; } } ObjToken MetalDataFactory::Context::newShaderPipeline(const char* vertSource, const char* fragSource, std::vector* vertBlobOut, std::vector* fragBlobOut, const ObjToken& vtxFmt, unsigned targetSamples, BlendFactor srcFac, BlendFactor dstFac, Primitive prim, ZTest depthTest, bool depthWrite, bool colorWrite, bool alphaWrite, CullMode culling) { @autoreleasepool { MetalDataFactoryImpl& factory = static_cast(m_parent); MTLCompileOptions* compOpts = [MTLCompileOptions new]; compOpts.languageVersion = MTLLanguageVersion1_2; NSError* err = nullptr; XXH64_state_t hashState; uint64_t srcHashes[2] = {}; uint64_t binHashes[2] = {}; XXH64_reset(&hashState, 0); if (vertSource) { XXH64_update(&hashState, vertSource, strlen(vertSource)); srcHashes[0] = XXH64_digest(&hashState); auto binSearch = factory.m_sourceToBinary.find(srcHashes[0]); if (binSearch != factory.m_sourceToBinary.cend()) binHashes[0] = binSearch->second; } else if (vertBlobOut && !vertBlobOut->empty()) { XXH64_update(&hashState, vertBlobOut->data(), vertBlobOut->size()); binHashes[0] = XXH64_digest(&hashState); } XXH64_reset(&hashState, 0); if (fragSource) { XXH64_update(&hashState, fragSource, strlen(fragSource)); srcHashes[1] = XXH64_digest(&hashState); auto binSearch = factory.m_sourceToBinary.find(srcHashes[1]); if (binSearch != factory.m_sourceToBinary.cend()) binHashes[1] = binSearch->second; } else if (fragBlobOut && !fragBlobOut->empty()) { XXH64_update(&hashState, fragBlobOut->data(), fragBlobOut->size()); binHashes[1] = XXH64_digest(&hashState); } if (vertBlobOut && vertBlobOut->empty()) binHashes[0] = factory.CompileLib(*vertBlobOut, vertSource, srcHashes[0]); if (fragBlobOut && fragBlobOut->empty()) binHashes[1] = factory.CompileLib(*fragBlobOut, fragSource, srcHashes[1]); MetalShareableShader::Token vertShader; MetalShareableShader::Token fragShader; auto vertFind = binHashes[0] ? factory.m_sharedShaders.find(binHashes[0]) : factory.m_sharedShaders.end(); if (vertFind != factory.m_sharedShaders.end()) { vertShader = vertFind->second->lock(); } else { id vertShaderLib; if (vertBlobOut && !vertBlobOut->empty()) { if ((*vertBlobOut)[0] == 1) { dispatch_data_t vertData = dispatch_data_create(vertBlobOut->data() + 1, vertBlobOut->size() - 1, nullptr, nullptr); vertShaderLib = [factory.m_ctx->m_dev newLibraryWithData:vertData error:&err]; if (!vertShaderLib) Log.report(logvisor::Fatal, "error loading vert library: %s", [[err localizedDescription] UTF8String]); } else { factory.CompileLib(vertShaderLib, (char*)vertBlobOut->data() + 1, 0, compOpts, &err); } } else binHashes[0] = factory.CompileLib(vertShaderLib, vertSource, srcHashes[0], compOpts, &err); if (!vertShaderLib) { printf("%s\n", vertSource); Log.report(logvisor::Fatal, "error compiling vert shader: %s", [[err localizedDescription] UTF8String]); } id vertFunc = [vertShaderLib newFunctionWithName:@"vmain"]; auto it = factory.m_sharedShaders.emplace(std::make_pair(binHashes[0], std::make_unique(factory, srcHashes[0], binHashes[0], vertFunc))).first; vertShader = it->second->lock(); } auto fragFind = binHashes[1] ? factory.m_sharedShaders.find(binHashes[1]) : factory.m_sharedShaders.end(); if (fragFind != factory.m_sharedShaders.end()) { fragShader = fragFind->second->lock(); } else { id fragShaderLib; if (fragBlobOut && !fragBlobOut->empty()) { if ((*fragBlobOut)[0] == 1) { dispatch_data_t fragData = dispatch_data_create(fragBlobOut->data() + 1, fragBlobOut->size() - 1, nullptr, nullptr); fragShaderLib = [factory.m_ctx->m_dev newLibraryWithData:fragData error:&err]; if (!fragShaderLib) Log.report(logvisor::Fatal, "error loading frag library: %s", [[err localizedDescription] UTF8String]); } else { factory.CompileLib(fragShaderLib, (char*)fragBlobOut->data() + 1, 0, compOpts, &err); } } else binHashes[1] = factory.CompileLib(fragShaderLib, fragSource, srcHashes[1], compOpts, &err); if (!fragShaderLib) { printf("%s\n", fragSource); Log.report(logvisor::Fatal, "error compiling frag shader: %s", [[err localizedDescription] UTF8String]); } id fragFunc = [fragShaderLib newFunctionWithName:@"fmain"]; auto it = factory.m_sharedShaders.emplace(std::make_pair(binHashes[1], std::make_unique(factory, srcHashes[1], binHashes[1], fragFunc))).first; fragShader = it->second->lock(); } return {new MetalShaderPipeline(m_data, factory.m_ctx, std::move(vertShader), std::move(fragShader), vtxFmt, targetSamples, srcFac, dstFac, prim, depthTest, depthWrite, colorWrite, alphaWrite, culling)}; } } ObjToken MetalDataFactory::Context::newShaderDataBinding(const ObjToken& pipeline, const ObjToken& vtxFormat, const ObjToken& vbo, const ObjToken& instVbo, const ObjToken& ibo, size_t ubufCount, const ObjToken* ubufs, const PipelineStage* ubufStages, const size_t* ubufOffs, const size_t* ubufSizes, size_t texCount, const ObjToken* texs, const int* texBindIdxs, const bool* depthBind, size_t baseVert, size_t baseInst) { @autoreleasepool { MetalDataFactoryImpl& factory = static_cast(m_parent); return {new MetalShaderDataBinding(m_data, factory.m_ctx, pipeline, vbo, instVbo, ibo, ubufCount, ubufs, ubufStages, ubufOffs, ubufSizes, texCount, texs, texBindIdxs, depthBind, baseVert, baseInst)}; } } void MetalDataFactoryImpl::commitTransaction(const FactoryCommitFunc& trans) { MetalDataFactory::Context ctx(*this); trans(ctx); } ObjToken MetalDataFactoryImpl::newPoolBuffer(BufferUse use, size_t stride, size_t count) { ObjToken pool(new BaseGraphicsPool(*this)); MetalCommandQueue* q = static_cast(m_parent->getCommandQueue()); return {new MetalGraphicsBufferD(pool, q, use, m_ctx, stride, count)}; } IGraphicsCommandQueue* _NewMetalCommandQueue(MetalContext* ctx, IWindow* parentWindow, IGraphicsContext* parent) { return new struct MetalCommandQueue(ctx, parentWindow, parent); } IGraphicsDataFactory* _NewMetalDataFactory(IGraphicsContext* parent, MetalContext* ctx, uint32_t sampleCount) { return new class MetalDataFactoryImpl(parent, ctx, sampleCount); } } #endif