Populate some Metal limits up from the backend

Bug: dawn:685 Change-Id: Idd36423e0f1f65c46ff835bfe90632b68505050c Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/64983 Reviewed-by: Corentin Wallez <cwallez@chromium.org> Commit-Queue: Austin Eng <enga@chromium.org>
2025-07-23 05:26:11 +00:00 · 2021-10-26 16:56:36 +00:00 · 2021-10-26 16:56:36 +00:00 · f45478d65e
commit f45478d65e
parent 167d299916
3 changed files with 236 additions and 22 deletions
--- a/src/dawn_native/metal/BackendMTL.mm
+++ b/src/dawn_native/metal/BackendMTL.mm
@ -21,6 +21,7 @@
 #include "common/SystemUtils.h"
 #include "dawn_native/Instance.h"
 #include "dawn_native/MetalBackend.h"
+#include "dawn_native/metal/BufferMTL.h"
 #include "dawn_native/metal/DeviceMTL.h"

 #if defined(DAWN_PLATFORM_MACOS)
@ -321,8 +322,211 @@ namespace dawn_native { namespace metal {
            return {};
        }

+        enum class MTLGPUFamily {
+            Apple1,
+            Apple2,
+            Apple3,
+            Apple4,
+            Apple5,
+            Apple6,
+            Apple7,
+            Mac1,
+            Mac2,
+        };
+
+        ResultOrError<MTLGPUFamily> GetMTLGPUFamily() const {
+            // https://developer.apple.com/documentation/metal/mtldevice/detecting_gpu_features_and_metal_software_versions?language=objc
+
+            if (@available(macOS 10.15, iOS 10.13, *)) {
+                if ([*mDevice supportsFamily:MTLGPUFamilyMac2]) {
+                    return MTLGPUFamily::Mac2;
+                }
+                if ([*mDevice supportsFamily:MTLGPUFamilyMac1]) {
+                    return MTLGPUFamily::Mac1;
+                }
+                if ([*mDevice supportsFamily:MTLGPUFamilyApple7]) {
+                    return MTLGPUFamily::Apple7;
+                }
+                if ([*mDevice supportsFamily:MTLGPUFamilyApple6]) {
+                    return MTLGPUFamily::Apple6;
+                }
+                if ([*mDevice supportsFamily:MTLGPUFamilyApple5]) {
+                    return MTLGPUFamily::Apple5;
+                }
+                if ([*mDevice supportsFamily:MTLGPUFamilyApple4]) {
+                    return MTLGPUFamily::Apple4;
+                }
+                if ([*mDevice supportsFamily:MTLGPUFamilyApple3]) {
+                    return MTLGPUFamily::Apple3;
+                }
+                if ([*mDevice supportsFamily:MTLGPUFamilyApple2]) {
+                    return MTLGPUFamily::Apple2;
+                }
+                if ([*mDevice supportsFamily:MTLGPUFamilyApple1]) {
+                    return MTLGPUFamily::Apple1;
+                }
+            }
+
+#if TARGET_OS_OSX
+            if (@available(macOS 10.14, *)) {
+                if ([*mDevice supportsFeatureSet:MTLFeatureSet_macOS_GPUFamily2_v1]) {
+                    return MTLGPUFamily::Mac2;
+                }
+            }
+            if (@available(macOS 10.11, *)) {
+                if ([*mDevice supportsFeatureSet:MTLFeatureSet_macOS_GPUFamily1_v1]) {
+                    return MTLGPUFamily::Mac1;
+                }
+            }
+#elif TARGET_OS_IOS
+            if (@available(iOS 10.11, *)) {
+                if ([*mDevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily4_v1]) {
+                    return MTLGPUFamily::Apple4;
+                }
+            }
+            if (@available(iOS 9.0, *)) {
+                if ([*mDevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily3_v1]) {
+                    return MTLGPUFamily::Apple3;
+                }
+            }
+            if (@available(iOS 8.0, *)) {
+                if ([*mDevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily2_v1]) {
+                    return MTLGPUFamily::Apple2;
+                }
+            }
+            if (@available(iOS 8.0, *)) {
+                if ([*mDevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily1_v1]) {
+                    return MTLGPUFamily::Apple1;
+                }
+            }
+#endif
+            return DAWN_INTERNAL_ERROR("Unsupported Metal device");
+        }
+
        MaybeError InitializeSupportedLimitsImpl(CombinedLimits* limits) override {
+            struct MTLDeviceLimits {
+                uint32_t maxVertexAttribsPerDescriptor;
+                uint32_t maxBufferArgumentEntriesPerFunc;
+                uint32_t maxTextureArgumentEntriesPerFunc;
+                uint32_t maxSamplerStateArgumentEntriesPerFunc;
+                uint32_t maxThreadsPerThreadgroup;
+                uint32_t maxTotalThreadgroupMemory;
+                uint32_t maxFragmentInputComponents;
+                uint32_t max1DTextureSize;
+                uint32_t max2DTextureSize;
+                uint32_t max3DTextureSize;
+                uint32_t maxTextureArrayLayers;
+                uint32_t minBufferOffsetAlignment;
+            };
+
+            struct LimitsForFamily {
+                uint32_t MTLDeviceLimits::*limit;
+                ityp::array<MTLGPUFamily, uint32_t, 9> values;
+            };
+
+            // clang-format off
+            // https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
+            //                                                               Apple                                                      Mac
+            //                                                                   1,      2,      3,      4,      5,      6,      7,       1,      2
+            constexpr LimitsForFamily kMTLLimits[12] = {
+                {&MTLDeviceLimits::maxVertexAttribsPerDescriptor,         {    31u,    31u,    31u,    31u,    31u,    31u,    31u,     31u,    31u }},
+                {&MTLDeviceLimits::maxBufferArgumentEntriesPerFunc,       {    31u,    31u,    31u,    31u,    31u,    31u,    31u,     31u,    31u }},
+                {&MTLDeviceLimits::maxTextureArgumentEntriesPerFunc,      {    31u,    31u,    31u,    96u,    96u,   128u,   128u,    128u,   128u }},
+                {&MTLDeviceLimits::maxSamplerStateArgumentEntriesPerFunc, {    16u,    16u,    16u,    16u,    16u,    16u,    16u,     16u,    16u }},
+                {&MTLDeviceLimits::maxThreadsPerThreadgroup,              {   512u,   512u,   512u,  1024u,  1024u,  1024u,  1024u,   1024u,  1024u }},
+                {&MTLDeviceLimits::maxTotalThreadgroupMemory,             { 16352u, 16352u, 16384u, 32768u, 32768u, 32768u, 32768u,  32768u, 32768u }},
+                {&MTLDeviceLimits::maxFragmentInputComponents,            {    60u,    60u,    60u,   124u,   124u,   124u,   124u,    124u,   124u }},
+                {&MTLDeviceLimits::max1DTextureSize,                      {  8192u,  8192u, 16384u, 16384u, 16384u, 16384u, 16384u,  16384u, 16384u }},
+                {&MTLDeviceLimits::max2DTextureSize,                      {  8192u,  8192u, 16384u, 16384u, 16384u, 16384u, 16384u,  16384u, 16384u }},
+                {&MTLDeviceLimits::max3DTextureSize,                      {  2048u,  2048u,  2048u,  2048u,  2048u,  2048u,  2048u,   2048u,  2048u }},
+                {&MTLDeviceLimits::maxTextureArrayLayers,                 {  2048u,  2048u,  2048u,  2048u,  2048u,  2048u,  2048u,   2048u,  2048u }},
+                {&MTLDeviceLimits::minBufferOffsetAlignment,              {     4u,     4u,     4u,     4u,     4u,     4u,     4u,    256u,   256u }},
+            };
+            // clang-format on
+
+            MTLGPUFamily mtlGPUFamily;
+            DAWN_TRY_ASSIGN(mtlGPUFamily, GetMTLGPUFamily());
+
+            MTLDeviceLimits mtlLimits;
+            for (const auto& limitsForFamily : kMTLLimits) {
+                mtlLimits.*limitsForFamily.limit = limitsForFamily.values[mtlGPUFamily];
+            }
+
            GetDefaultLimits(&limits->v1);
+
+            limits->v1.maxTextureDimension1D = mtlLimits.max1DTextureSize;
+            limits->v1.maxTextureDimension2D = mtlLimits.max2DTextureSize;
+            limits->v1.maxTextureDimension3D = mtlLimits.max3DTextureSize;
+            limits->v1.maxTextureArrayLayers = mtlLimits.maxTextureArrayLayers;
+
+            uint32_t maxBuffersPerStage = mtlLimits.maxBufferArgumentEntriesPerFunc;
+            maxBuffersPerStage -= 1;  // One slot is reserved to store buffer lengths.
+
+            uint32_t baseMaxBuffersPerStage = limits->v1.maxStorageBuffersPerShaderStage +
+                                              limits->v1.maxUniformBuffersPerShaderStage +
+                                              limits->v1.maxVertexBuffers;
+
+            ASSERT(maxBuffersPerStage >= baseMaxBuffersPerStage);
+            {
+                uint32_t additional = maxBuffersPerStage - baseMaxBuffersPerStage;
+                limits->v1.maxStorageBuffersPerShaderStage += additional / 3;
+                limits->v1.maxUniformBuffersPerShaderStage += additional / 3;
+                limits->v1.maxVertexBuffers += (additional - 2 * (additional / 3));
+            }
+
+            uint32_t baseMaxTexturesPerStage = limits->v1.maxSampledTexturesPerShaderStage +
+                                               limits->v1.maxStorageTexturesPerShaderStage;
+
+            ASSERT(mtlLimits.maxTextureArgumentEntriesPerFunc >= baseMaxTexturesPerStage);
+            {
+                uint32_t additional =
+                    mtlLimits.maxTextureArgumentEntriesPerFunc - baseMaxTexturesPerStage;
+                limits->v1.maxSampledTexturesPerShaderStage += additional / 2;
+                limits->v1.maxStorageTexturesPerShaderStage += (additional - additional / 2);
+            }
+
+            limits->v1.maxSamplersPerShaderStage = mtlLimits.maxSamplerStateArgumentEntriesPerFunc;
+
+            // Metal limits are per-function, so the layout limits are the same as the stage
+            // limits. Note: this should likely change if the implementation uses Metal argument
+            // buffers. Non-dynamic buffers will probably be bound argument buffers, but dynamic
+            // buffers may be set directly.
+            //   Mac GPU families with tier 1 argument buffers support 64
+            //   buffers, 128 textures, and 16 samplers. Mac GPU families
+            //   with tier 2 argument buffers support 500000 buffers and
+            //   textures, and 1024 unique samplers
+            limits->v1.maxDynamicUniformBuffersPerPipelineLayout =
+                limits->v1.maxUniformBuffersPerShaderStage;
+            limits->v1.maxDynamicStorageBuffersPerPipelineLayout =
+                limits->v1.maxStorageBuffersPerShaderStage;
+
+            // The WebGPU limit is the limit across all vertex buffers, combined.
+            limits->v1.maxVertexAttributes =
+                limits->v1.maxVertexBuffers * mtlLimits.maxVertexAttribsPerDescriptor;
+
+            limits->v1.maxInterStageShaderComponents = mtlLimits.maxFragmentInputComponents;
+
+            limits->v1.maxComputeWorkgroupStorageSize = mtlLimits.maxTotalThreadgroupMemory;
+            limits->v1.maxComputeInvocationsPerWorkgroup = mtlLimits.maxThreadsPerThreadgroup;
+            limits->v1.maxComputeWorkgroupSizeX = mtlLimits.maxThreadsPerThreadgroup;
+            limits->v1.maxComputeWorkgroupSizeY = mtlLimits.maxThreadsPerThreadgroup;
+            limits->v1.maxComputeWorkgroupSizeZ = mtlLimits.maxThreadsPerThreadgroup;
+
+            limits->v1.minUniformBufferOffsetAlignment = mtlLimits.minBufferOffsetAlignment;
+            limits->v1.minStorageBufferOffsetAlignment = mtlLimits.minBufferOffsetAlignment;
+
+            uint64_t maxBufferSize = Buffer::QueryMaxBufferLength(*mDevice);
+
+            // Metal has no documented limit on the size of a binding. Use the maximum
+            // buffer size.
+            limits->v1.maxUniformBufferBindingSize = maxBufferSize;
+            limits->v1.maxStorageBufferBindingSize = maxBufferSize;
+
+            // TODO(crbug.com/dawn/685):
+            // LIMITS NOT SET:
+            // - maxBindGroups
+            // - maxVertexBufferArrayStride
+
            return {};
        }

@ -360,7 +564,8 @@ namespace dawn_native { namespace metal {
        if (@available(iOS 8.0, *)) {
            supportedVersion = YES;
            // iOS only has a single device so MTLCopyAllDevices doesn't exist there.
-            std::unique_ptr<Adapter> adapter = std::make_unique<Adapter>(GetInstance(), MTLCreateSystemDefaultDevice());
+            std::unique_ptr<Adapter> adapter =
+                std::make_unique<Adapter>(GetInstance(), MTLCreateSystemDefaultDevice());
            if (!GetInstance()->ConsumedError(adapter->Initialize())) {
                adapters.push_back(std::move(adapter));
            }
--- a/src/dawn_native/metal/BufferMTL.h
+++ b/src/dawn_native/metal/BufferMTL.h
@ -39,6 +39,8 @@ namespace dawn_native { namespace metal {
        void EnsureDataInitializedAsDestination(CommandRecordingContext* commandContext,
                                                const CopyTextureToBufferCmd* copy);

+        static uint64_t QueryMaxBufferLength(id<MTLDevice> mtlDevice);
+
      private:
        using BufferBase::BufferBase;
        MaybeError Initialize(bool mappedAtCreation);
--- a/src/dawn_native/metal/BufferMTL.mm
+++ b/src/dawn_native/metal/BufferMTL.mm
@ -26,10 +26,6 @@ namespace dawn_native { namespace metal {
    // largest alignment of supported data types
    static constexpr uint32_t kMinUniformOrStorageBufferAlignment = 16u;

-    // The maximum buffer size if querying the maximum buffer size or recommended working set size
-    // is not available. This is a somewhat arbitrary limit of 1 GiB.
-    static constexpr uint32_t kMaxBufferSizeFallback = 1024u * 1024u * 1024u;
-
    // static
    ResultOrError<Ref<Buffer>> Buffer::Create(Device* device, const BufferDescriptor* descriptor) {
        Ref<Buffer> buffer = AcquireRef(new Buffer(device, descriptor));
@ -37,6 +33,32 @@ namespace dawn_native { namespace metal {
        return std::move(buffer);
    }

+    // static
+    uint64_t Buffer::QueryMaxBufferLength(id<MTLDevice> mtlDevice) {
+        if (@available(iOS 12, tvOS 12, macOS 10.14, *)) {
+            return [mtlDevice maxBufferLength];
+        }
+
+        // Earlier versions of Metal had maximums defined in the Metal feature set tables
+        // https://metalbyexample.com/wp-content/uploads/Metal-Feature-Set-Tables-2018.pdf
+#if defined(DAWN_PLATFORM_MACOS)
+        // 10.12 and 10.13 have a 1Gb limit.
+        if (@available(macOS 10.12, *)) {
+            // |maxBufferLength| isn't always available on older systems. If available, use
+            // |recommendedMaxWorkingSetSize| instead. We can probably allocate more than this,
+            // but don't have a way to discover a better limit. MoltenVK also uses this heuristic.
+            return 1024 * 1024 * 1024;
+        }
+        // 10.11 has a 256Mb limit
+        if (@available(maxOS 10.11, *)) {
+            return 256 * 1024 * 1024;
+        }
+#else
+        // macOS / tvOS: 256Mb limit in versions without [MTLDevice maxBufferLength]
+        return 256 * 1024 * 1024;
+#endif
+    }
+
    MaybeError Buffer::Initialize(bool mappedAtCreation) {
        MTLResourceOptions storageMode;
        if (GetUsage() & kMappableBufferUsages) {
@ -80,25 +102,10 @@ namespace dawn_native { namespace metal {
        }
        currentSize = Align(currentSize, alignment);

-        if (@available(iOS 12, macOS 10.14, *)) {
-            NSUInteger maxBufferSize = [ToBackend(GetDevice())->GetMTLDevice() maxBufferLength];
+        uint64_t maxBufferSize = QueryMaxBufferLength(ToBackend(GetDevice())->GetMTLDevice());
        if (currentSize > maxBufferSize) {
            return DAWN_OUT_OF_MEMORY_ERROR("Buffer allocation is too large");
        }
-#if defined(DAWN_PLATFORM_MACOS)
-        } else if (@available(macOS 10.12, *)) {
-            // |maxBufferLength| isn't always available on older systems. If available, use
-            // |recommendedMaxWorkingSetSize| instead. We can probably allocate more than this,
-            // but don't have a way to discover a better limit. MoltenVK also uses this heuristic.
-            uint64_t maxWorkingSetSize =
-                [ToBackend(GetDevice())->GetMTLDevice() recommendedMaxWorkingSetSize];
-            if (currentSize > maxWorkingSetSize) {
-                return DAWN_OUT_OF_MEMORY_ERROR("Buffer allocation is too large");
-            }
-#endif
-        } else if (currentSize > kMaxBufferSizeFallback) {
-            return DAWN_OUT_OF_MEMORY_ERROR("Buffer allocation is too large");
-        }

        mAllocatedSize = currentSize;
        mMtlBuffer.Acquire([ToBackend(GetDevice())->GetMTLDevice()