diff --git a/src/dawn_native/metal/BackendMTL.mm b/src/dawn_native/metal/BackendMTL.mm index caea140c9d..9ebccdb517 100644 --- a/src/dawn_native/metal/BackendMTL.mm +++ b/src/dawn_native/metal/BackendMTL.mm @@ -21,6 +21,7 @@ #include "common/SystemUtils.h" #include "dawn_native/Instance.h" #include "dawn_native/MetalBackend.h" +#include "dawn_native/metal/BufferMTL.h" #include "dawn_native/metal/DeviceMTL.h" #if defined(DAWN_PLATFORM_MACOS) @@ -321,8 +322,211 @@ namespace dawn_native { namespace metal { return {}; } + enum class MTLGPUFamily { + Apple1, + Apple2, + Apple3, + Apple4, + Apple5, + Apple6, + Apple7, + Mac1, + Mac2, + }; + + ResultOrError GetMTLGPUFamily() const { + // https://developer.apple.com/documentation/metal/mtldevice/detecting_gpu_features_and_metal_software_versions?language=objc + + if (@available(macOS 10.15, iOS 10.13, *)) { + if ([*mDevice supportsFamily:MTLGPUFamilyMac2]) { + return MTLGPUFamily::Mac2; + } + if ([*mDevice supportsFamily:MTLGPUFamilyMac1]) { + return MTLGPUFamily::Mac1; + } + if ([*mDevice supportsFamily:MTLGPUFamilyApple7]) { + return MTLGPUFamily::Apple7; + } + if ([*mDevice supportsFamily:MTLGPUFamilyApple6]) { + return MTLGPUFamily::Apple6; + } + if ([*mDevice supportsFamily:MTLGPUFamilyApple5]) { + return MTLGPUFamily::Apple5; + } + if ([*mDevice supportsFamily:MTLGPUFamilyApple4]) { + return MTLGPUFamily::Apple4; + } + if ([*mDevice supportsFamily:MTLGPUFamilyApple3]) { + return MTLGPUFamily::Apple3; + } + if ([*mDevice supportsFamily:MTLGPUFamilyApple2]) { + return MTLGPUFamily::Apple2; + } + if ([*mDevice supportsFamily:MTLGPUFamilyApple1]) { + return MTLGPUFamily::Apple1; + } + } + +#if TARGET_OS_OSX + if (@available(macOS 10.14, *)) { + if ([*mDevice supportsFeatureSet:MTLFeatureSet_macOS_GPUFamily2_v1]) { + return MTLGPUFamily::Mac2; + } + } + if (@available(macOS 10.11, *)) { + if ([*mDevice supportsFeatureSet:MTLFeatureSet_macOS_GPUFamily1_v1]) { + return MTLGPUFamily::Mac1; + } + } +#elif TARGET_OS_IOS + if (@available(iOS 10.11, *)) { + if ([*mDevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily4_v1]) { + return MTLGPUFamily::Apple4; + } + } + if (@available(iOS 9.0, *)) { + if ([*mDevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily3_v1]) { + return MTLGPUFamily::Apple3; + } + } + if (@available(iOS 8.0, *)) { + if ([*mDevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily2_v1]) { + return MTLGPUFamily::Apple2; + } + } + if (@available(iOS 8.0, *)) { + if ([*mDevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily1_v1]) { + return MTLGPUFamily::Apple1; + } + } +#endif + return DAWN_INTERNAL_ERROR("Unsupported Metal device"); + } + MaybeError InitializeSupportedLimitsImpl(CombinedLimits* limits) override { + struct MTLDeviceLimits { + uint32_t maxVertexAttribsPerDescriptor; + uint32_t maxBufferArgumentEntriesPerFunc; + uint32_t maxTextureArgumentEntriesPerFunc; + uint32_t maxSamplerStateArgumentEntriesPerFunc; + uint32_t maxThreadsPerThreadgroup; + uint32_t maxTotalThreadgroupMemory; + uint32_t maxFragmentInputComponents; + uint32_t max1DTextureSize; + uint32_t max2DTextureSize; + uint32_t max3DTextureSize; + uint32_t maxTextureArrayLayers; + uint32_t minBufferOffsetAlignment; + }; + + struct LimitsForFamily { + uint32_t MTLDeviceLimits::*limit; + ityp::array values; + }; + + // clang-format off + // https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf + // Apple Mac + // 1, 2, 3, 4, 5, 6, 7, 1, 2 + constexpr LimitsForFamily kMTLLimits[12] = { + {&MTLDeviceLimits::maxVertexAttribsPerDescriptor, { 31u, 31u, 31u, 31u, 31u, 31u, 31u, 31u, 31u }}, + {&MTLDeviceLimits::maxBufferArgumentEntriesPerFunc, { 31u, 31u, 31u, 31u, 31u, 31u, 31u, 31u, 31u }}, + {&MTLDeviceLimits::maxTextureArgumentEntriesPerFunc, { 31u, 31u, 31u, 96u, 96u, 128u, 128u, 128u, 128u }}, + {&MTLDeviceLimits::maxSamplerStateArgumentEntriesPerFunc, { 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u }}, + {&MTLDeviceLimits::maxThreadsPerThreadgroup, { 512u, 512u, 512u, 1024u, 1024u, 1024u, 1024u, 1024u, 1024u }}, + {&MTLDeviceLimits::maxTotalThreadgroupMemory, { 16352u, 16352u, 16384u, 32768u, 32768u, 32768u, 32768u, 32768u, 32768u }}, + {&MTLDeviceLimits::maxFragmentInputComponents, { 60u, 60u, 60u, 124u, 124u, 124u, 124u, 124u, 124u }}, + {&MTLDeviceLimits::max1DTextureSize, { 8192u, 8192u, 16384u, 16384u, 16384u, 16384u, 16384u, 16384u, 16384u }}, + {&MTLDeviceLimits::max2DTextureSize, { 8192u, 8192u, 16384u, 16384u, 16384u, 16384u, 16384u, 16384u, 16384u }}, + {&MTLDeviceLimits::max3DTextureSize, { 2048u, 2048u, 2048u, 2048u, 2048u, 2048u, 2048u, 2048u, 2048u }}, + {&MTLDeviceLimits::maxTextureArrayLayers, { 2048u, 2048u, 2048u, 2048u, 2048u, 2048u, 2048u, 2048u, 2048u }}, + {&MTLDeviceLimits::minBufferOffsetAlignment, { 4u, 4u, 4u, 4u, 4u, 4u, 4u, 256u, 256u }}, + }; + // clang-format on + + MTLGPUFamily mtlGPUFamily; + DAWN_TRY_ASSIGN(mtlGPUFamily, GetMTLGPUFamily()); + + MTLDeviceLimits mtlLimits; + for (const auto& limitsForFamily : kMTLLimits) { + mtlLimits.*limitsForFamily.limit = limitsForFamily.values[mtlGPUFamily]; + } + GetDefaultLimits(&limits->v1); + + limits->v1.maxTextureDimension1D = mtlLimits.max1DTextureSize; + limits->v1.maxTextureDimension2D = mtlLimits.max2DTextureSize; + limits->v1.maxTextureDimension3D = mtlLimits.max3DTextureSize; + limits->v1.maxTextureArrayLayers = mtlLimits.maxTextureArrayLayers; + + uint32_t maxBuffersPerStage = mtlLimits.maxBufferArgumentEntriesPerFunc; + maxBuffersPerStage -= 1; // One slot is reserved to store buffer lengths. + + uint32_t baseMaxBuffersPerStage = limits->v1.maxStorageBuffersPerShaderStage + + limits->v1.maxUniformBuffersPerShaderStage + + limits->v1.maxVertexBuffers; + + ASSERT(maxBuffersPerStage >= baseMaxBuffersPerStage); + { + uint32_t additional = maxBuffersPerStage - baseMaxBuffersPerStage; + limits->v1.maxStorageBuffersPerShaderStage += additional / 3; + limits->v1.maxUniformBuffersPerShaderStage += additional / 3; + limits->v1.maxVertexBuffers += (additional - 2 * (additional / 3)); + } + + uint32_t baseMaxTexturesPerStage = limits->v1.maxSampledTexturesPerShaderStage + + limits->v1.maxStorageTexturesPerShaderStage; + + ASSERT(mtlLimits.maxTextureArgumentEntriesPerFunc >= baseMaxTexturesPerStage); + { + uint32_t additional = + mtlLimits.maxTextureArgumentEntriesPerFunc - baseMaxTexturesPerStage; + limits->v1.maxSampledTexturesPerShaderStage += additional / 2; + limits->v1.maxStorageTexturesPerShaderStage += (additional - additional / 2); + } + + limits->v1.maxSamplersPerShaderStage = mtlLimits.maxSamplerStateArgumentEntriesPerFunc; + + // Metal limits are per-function, so the layout limits are the same as the stage + // limits. Note: this should likely change if the implementation uses Metal argument + // buffers. Non-dynamic buffers will probably be bound argument buffers, but dynamic + // buffers may be set directly. + // Mac GPU families with tier 1 argument buffers support 64 + // buffers, 128 textures, and 16 samplers. Mac GPU families + // with tier 2 argument buffers support 500000 buffers and + // textures, and 1024 unique samplers + limits->v1.maxDynamicUniformBuffersPerPipelineLayout = + limits->v1.maxUniformBuffersPerShaderStage; + limits->v1.maxDynamicStorageBuffersPerPipelineLayout = + limits->v1.maxStorageBuffersPerShaderStage; + + // The WebGPU limit is the limit across all vertex buffers, combined. + limits->v1.maxVertexAttributes = + limits->v1.maxVertexBuffers * mtlLimits.maxVertexAttribsPerDescriptor; + + limits->v1.maxInterStageShaderComponents = mtlLimits.maxFragmentInputComponents; + + limits->v1.maxComputeWorkgroupStorageSize = mtlLimits.maxTotalThreadgroupMemory; + limits->v1.maxComputeInvocationsPerWorkgroup = mtlLimits.maxThreadsPerThreadgroup; + limits->v1.maxComputeWorkgroupSizeX = mtlLimits.maxThreadsPerThreadgroup; + limits->v1.maxComputeWorkgroupSizeY = mtlLimits.maxThreadsPerThreadgroup; + limits->v1.maxComputeWorkgroupSizeZ = mtlLimits.maxThreadsPerThreadgroup; + + limits->v1.minUniformBufferOffsetAlignment = mtlLimits.minBufferOffsetAlignment; + limits->v1.minStorageBufferOffsetAlignment = mtlLimits.minBufferOffsetAlignment; + + uint64_t maxBufferSize = Buffer::QueryMaxBufferLength(*mDevice); + + // Metal has no documented limit on the size of a binding. Use the maximum + // buffer size. + limits->v1.maxUniformBufferBindingSize = maxBufferSize; + limits->v1.maxStorageBufferBindingSize = maxBufferSize; + + // TODO(crbug.com/dawn/685): + // LIMITS NOT SET: + // - maxBindGroups + // - maxVertexBufferArrayStride + return {}; } @@ -360,7 +564,8 @@ namespace dawn_native { namespace metal { if (@available(iOS 8.0, *)) { supportedVersion = YES; // iOS only has a single device so MTLCopyAllDevices doesn't exist there. - std::unique_ptr adapter = std::make_unique(GetInstance(), MTLCreateSystemDefaultDevice()); + std::unique_ptr adapter = + std::make_unique(GetInstance(), MTLCreateSystemDefaultDevice()); if (!GetInstance()->ConsumedError(adapter->Initialize())) { adapters.push_back(std::move(adapter)); } diff --git a/src/dawn_native/metal/BufferMTL.h b/src/dawn_native/metal/BufferMTL.h index 0c7c5e28b4..cf6dc6f6c6 100644 --- a/src/dawn_native/metal/BufferMTL.h +++ b/src/dawn_native/metal/BufferMTL.h @@ -39,6 +39,8 @@ namespace dawn_native { namespace metal { void EnsureDataInitializedAsDestination(CommandRecordingContext* commandContext, const CopyTextureToBufferCmd* copy); + static uint64_t QueryMaxBufferLength(id mtlDevice); + private: using BufferBase::BufferBase; MaybeError Initialize(bool mappedAtCreation); diff --git a/src/dawn_native/metal/BufferMTL.mm b/src/dawn_native/metal/BufferMTL.mm index c2c5a313ad..2b9c2fee01 100644 --- a/src/dawn_native/metal/BufferMTL.mm +++ b/src/dawn_native/metal/BufferMTL.mm @@ -26,10 +26,6 @@ namespace dawn_native { namespace metal { // largest alignment of supported data types static constexpr uint32_t kMinUniformOrStorageBufferAlignment = 16u; - // The maximum buffer size if querying the maximum buffer size or recommended working set size - // is not available. This is a somewhat arbitrary limit of 1 GiB. - static constexpr uint32_t kMaxBufferSizeFallback = 1024u * 1024u * 1024u; - // static ResultOrError> Buffer::Create(Device* device, const BufferDescriptor* descriptor) { Ref buffer = AcquireRef(new Buffer(device, descriptor)); @@ -37,6 +33,32 @@ namespace dawn_native { namespace metal { return std::move(buffer); } + // static + uint64_t Buffer::QueryMaxBufferLength(id mtlDevice) { + if (@available(iOS 12, tvOS 12, macOS 10.14, *)) { + return [mtlDevice maxBufferLength]; + } + + // Earlier versions of Metal had maximums defined in the Metal feature set tables + // https://metalbyexample.com/wp-content/uploads/Metal-Feature-Set-Tables-2018.pdf +#if defined(DAWN_PLATFORM_MACOS) + // 10.12 and 10.13 have a 1Gb limit. + if (@available(macOS 10.12, *)) { + // |maxBufferLength| isn't always available on older systems. If available, use + // |recommendedMaxWorkingSetSize| instead. We can probably allocate more than this, + // but don't have a way to discover a better limit. MoltenVK also uses this heuristic. + return 1024 * 1024 * 1024; + } + // 10.11 has a 256Mb limit + if (@available(maxOS 10.11, *)) { + return 256 * 1024 * 1024; + } +#else + // macOS / tvOS: 256Mb limit in versions without [MTLDevice maxBufferLength] + return 256 * 1024 * 1024; +#endif + } + MaybeError Buffer::Initialize(bool mappedAtCreation) { MTLResourceOptions storageMode; if (GetUsage() & kMappableBufferUsages) { @@ -80,23 +102,8 @@ namespace dawn_native { namespace metal { } currentSize = Align(currentSize, alignment); - if (@available(iOS 12, macOS 10.14, *)) { - NSUInteger maxBufferSize = [ToBackend(GetDevice())->GetMTLDevice() maxBufferLength]; - if (currentSize > maxBufferSize) { - return DAWN_OUT_OF_MEMORY_ERROR("Buffer allocation is too large"); - } -#if defined(DAWN_PLATFORM_MACOS) - } else if (@available(macOS 10.12, *)) { - // |maxBufferLength| isn't always available on older systems. If available, use - // |recommendedMaxWorkingSetSize| instead. We can probably allocate more than this, - // but don't have a way to discover a better limit. MoltenVK also uses this heuristic. - uint64_t maxWorkingSetSize = - [ToBackend(GetDevice())->GetMTLDevice() recommendedMaxWorkingSetSize]; - if (currentSize > maxWorkingSetSize) { - return DAWN_OUT_OF_MEMORY_ERROR("Buffer allocation is too large"); - } -#endif - } else if (currentSize > kMaxBufferSizeFallback) { + uint64_t maxBufferSize = QueryMaxBufferLength(ToBackend(GetDevice())->GetMTLDevice()); + if (currentSize > maxBufferSize) { return DAWN_OUT_OF_MEMORY_ERROR("Buffer allocation is too large"); }