Populate some Metal limits up from the backend

Bug: dawn:685
Change-Id: Idd36423e0f1f65c46ff835bfe90632b68505050c
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/64983
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
Commit-Queue: Austin Eng <enga@chromium.org>
This commit is contained in:
Austin Eng 2021-10-26 16:56:36 +00:00 committed by Dawn LUCI CQ
parent 167d299916
commit f45478d65e
3 changed files with 236 additions and 22 deletions

View File

@ -21,6 +21,7 @@
#include "common/SystemUtils.h"
#include "dawn_native/Instance.h"
#include "dawn_native/MetalBackend.h"
#include "dawn_native/metal/BufferMTL.h"
#include "dawn_native/metal/DeviceMTL.h"
#if defined(DAWN_PLATFORM_MACOS)
@ -321,8 +322,211 @@ namespace dawn_native { namespace metal {
return {};
}
enum class MTLGPUFamily {
Apple1,
Apple2,
Apple3,
Apple4,
Apple5,
Apple6,
Apple7,
Mac1,
Mac2,
};
ResultOrError<MTLGPUFamily> GetMTLGPUFamily() const {
// https://developer.apple.com/documentation/metal/mtldevice/detecting_gpu_features_and_metal_software_versions?language=objc
if (@available(macOS 10.15, iOS 10.13, *)) {
if ([*mDevice supportsFamily:MTLGPUFamilyMac2]) {
return MTLGPUFamily::Mac2;
}
if ([*mDevice supportsFamily:MTLGPUFamilyMac1]) {
return MTLGPUFamily::Mac1;
}
if ([*mDevice supportsFamily:MTLGPUFamilyApple7]) {
return MTLGPUFamily::Apple7;
}
if ([*mDevice supportsFamily:MTLGPUFamilyApple6]) {
return MTLGPUFamily::Apple6;
}
if ([*mDevice supportsFamily:MTLGPUFamilyApple5]) {
return MTLGPUFamily::Apple5;
}
if ([*mDevice supportsFamily:MTLGPUFamilyApple4]) {
return MTLGPUFamily::Apple4;
}
if ([*mDevice supportsFamily:MTLGPUFamilyApple3]) {
return MTLGPUFamily::Apple3;
}
if ([*mDevice supportsFamily:MTLGPUFamilyApple2]) {
return MTLGPUFamily::Apple2;
}
if ([*mDevice supportsFamily:MTLGPUFamilyApple1]) {
return MTLGPUFamily::Apple1;
}
}
#if TARGET_OS_OSX
if (@available(macOS 10.14, *)) {
if ([*mDevice supportsFeatureSet:MTLFeatureSet_macOS_GPUFamily2_v1]) {
return MTLGPUFamily::Mac2;
}
}
if (@available(macOS 10.11, *)) {
if ([*mDevice supportsFeatureSet:MTLFeatureSet_macOS_GPUFamily1_v1]) {
return MTLGPUFamily::Mac1;
}
}
#elif TARGET_OS_IOS
if (@available(iOS 10.11, *)) {
if ([*mDevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily4_v1]) {
return MTLGPUFamily::Apple4;
}
}
if (@available(iOS 9.0, *)) {
if ([*mDevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily3_v1]) {
return MTLGPUFamily::Apple3;
}
}
if (@available(iOS 8.0, *)) {
if ([*mDevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily2_v1]) {
return MTLGPUFamily::Apple2;
}
}
if (@available(iOS 8.0, *)) {
if ([*mDevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily1_v1]) {
return MTLGPUFamily::Apple1;
}
}
#endif
return DAWN_INTERNAL_ERROR("Unsupported Metal device");
}
MaybeError InitializeSupportedLimitsImpl(CombinedLimits* limits) override {
struct MTLDeviceLimits {
uint32_t maxVertexAttribsPerDescriptor;
uint32_t maxBufferArgumentEntriesPerFunc;
uint32_t maxTextureArgumentEntriesPerFunc;
uint32_t maxSamplerStateArgumentEntriesPerFunc;
uint32_t maxThreadsPerThreadgroup;
uint32_t maxTotalThreadgroupMemory;
uint32_t maxFragmentInputComponents;
uint32_t max1DTextureSize;
uint32_t max2DTextureSize;
uint32_t max3DTextureSize;
uint32_t maxTextureArrayLayers;
uint32_t minBufferOffsetAlignment;
};
struct LimitsForFamily {
uint32_t MTLDeviceLimits::*limit;
ityp::array<MTLGPUFamily, uint32_t, 9> values;
};
// clang-format off
// https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
// Apple Mac
// 1, 2, 3, 4, 5, 6, 7, 1, 2
constexpr LimitsForFamily kMTLLimits[12] = {
{&MTLDeviceLimits::maxVertexAttribsPerDescriptor, { 31u, 31u, 31u, 31u, 31u, 31u, 31u, 31u, 31u }},
{&MTLDeviceLimits::maxBufferArgumentEntriesPerFunc, { 31u, 31u, 31u, 31u, 31u, 31u, 31u, 31u, 31u }},
{&MTLDeviceLimits::maxTextureArgumentEntriesPerFunc, { 31u, 31u, 31u, 96u, 96u, 128u, 128u, 128u, 128u }},
{&MTLDeviceLimits::maxSamplerStateArgumentEntriesPerFunc, { 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u }},
{&MTLDeviceLimits::maxThreadsPerThreadgroup, { 512u, 512u, 512u, 1024u, 1024u, 1024u, 1024u, 1024u, 1024u }},
{&MTLDeviceLimits::maxTotalThreadgroupMemory, { 16352u, 16352u, 16384u, 32768u, 32768u, 32768u, 32768u, 32768u, 32768u }},
{&MTLDeviceLimits::maxFragmentInputComponents, { 60u, 60u, 60u, 124u, 124u, 124u, 124u, 124u, 124u }},
{&MTLDeviceLimits::max1DTextureSize, { 8192u, 8192u, 16384u, 16384u, 16384u, 16384u, 16384u, 16384u, 16384u }},
{&MTLDeviceLimits::max2DTextureSize, { 8192u, 8192u, 16384u, 16384u, 16384u, 16384u, 16384u, 16384u, 16384u }},
{&MTLDeviceLimits::max3DTextureSize, { 2048u, 2048u, 2048u, 2048u, 2048u, 2048u, 2048u, 2048u, 2048u }},
{&MTLDeviceLimits::maxTextureArrayLayers, { 2048u, 2048u, 2048u, 2048u, 2048u, 2048u, 2048u, 2048u, 2048u }},
{&MTLDeviceLimits::minBufferOffsetAlignment, { 4u, 4u, 4u, 4u, 4u, 4u, 4u, 256u, 256u }},
};
// clang-format on
MTLGPUFamily mtlGPUFamily;
DAWN_TRY_ASSIGN(mtlGPUFamily, GetMTLGPUFamily());
MTLDeviceLimits mtlLimits;
for (const auto& limitsForFamily : kMTLLimits) {
mtlLimits.*limitsForFamily.limit = limitsForFamily.values[mtlGPUFamily];
}
GetDefaultLimits(&limits->v1);
limits->v1.maxTextureDimension1D = mtlLimits.max1DTextureSize;
limits->v1.maxTextureDimension2D = mtlLimits.max2DTextureSize;
limits->v1.maxTextureDimension3D = mtlLimits.max3DTextureSize;
limits->v1.maxTextureArrayLayers = mtlLimits.maxTextureArrayLayers;
uint32_t maxBuffersPerStage = mtlLimits.maxBufferArgumentEntriesPerFunc;
maxBuffersPerStage -= 1; // One slot is reserved to store buffer lengths.
uint32_t baseMaxBuffersPerStage = limits->v1.maxStorageBuffersPerShaderStage +
limits->v1.maxUniformBuffersPerShaderStage +
limits->v1.maxVertexBuffers;
ASSERT(maxBuffersPerStage >= baseMaxBuffersPerStage);
{
uint32_t additional = maxBuffersPerStage - baseMaxBuffersPerStage;
limits->v1.maxStorageBuffersPerShaderStage += additional / 3;
limits->v1.maxUniformBuffersPerShaderStage += additional / 3;
limits->v1.maxVertexBuffers += (additional - 2 * (additional / 3));
}
uint32_t baseMaxTexturesPerStage = limits->v1.maxSampledTexturesPerShaderStage +
limits->v1.maxStorageTexturesPerShaderStage;
ASSERT(mtlLimits.maxTextureArgumentEntriesPerFunc >= baseMaxTexturesPerStage);
{
uint32_t additional =
mtlLimits.maxTextureArgumentEntriesPerFunc - baseMaxTexturesPerStage;
limits->v1.maxSampledTexturesPerShaderStage += additional / 2;
limits->v1.maxStorageTexturesPerShaderStage += (additional - additional / 2);
}
limits->v1.maxSamplersPerShaderStage = mtlLimits.maxSamplerStateArgumentEntriesPerFunc;
// Metal limits are per-function, so the layout limits are the same as the stage
// limits. Note: this should likely change if the implementation uses Metal argument
// buffers. Non-dynamic buffers will probably be bound argument buffers, but dynamic
// buffers may be set directly.
// Mac GPU families with tier 1 argument buffers support 64
// buffers, 128 textures, and 16 samplers. Mac GPU families
// with tier 2 argument buffers support 500000 buffers and
// textures, and 1024 unique samplers
limits->v1.maxDynamicUniformBuffersPerPipelineLayout =
limits->v1.maxUniformBuffersPerShaderStage;
limits->v1.maxDynamicStorageBuffersPerPipelineLayout =
limits->v1.maxStorageBuffersPerShaderStage;
// The WebGPU limit is the limit across all vertex buffers, combined.
limits->v1.maxVertexAttributes =
limits->v1.maxVertexBuffers * mtlLimits.maxVertexAttribsPerDescriptor;
limits->v1.maxInterStageShaderComponents = mtlLimits.maxFragmentInputComponents;
limits->v1.maxComputeWorkgroupStorageSize = mtlLimits.maxTotalThreadgroupMemory;
limits->v1.maxComputeInvocationsPerWorkgroup = mtlLimits.maxThreadsPerThreadgroup;
limits->v1.maxComputeWorkgroupSizeX = mtlLimits.maxThreadsPerThreadgroup;
limits->v1.maxComputeWorkgroupSizeY = mtlLimits.maxThreadsPerThreadgroup;
limits->v1.maxComputeWorkgroupSizeZ = mtlLimits.maxThreadsPerThreadgroup;
limits->v1.minUniformBufferOffsetAlignment = mtlLimits.minBufferOffsetAlignment;
limits->v1.minStorageBufferOffsetAlignment = mtlLimits.minBufferOffsetAlignment;
uint64_t maxBufferSize = Buffer::QueryMaxBufferLength(*mDevice);
// Metal has no documented limit on the size of a binding. Use the maximum
// buffer size.
limits->v1.maxUniformBufferBindingSize = maxBufferSize;
limits->v1.maxStorageBufferBindingSize = maxBufferSize;
// TODO(crbug.com/dawn/685):
// LIMITS NOT SET:
// - maxBindGroups
// - maxVertexBufferArrayStride
return {};
}
@ -360,7 +564,8 @@ namespace dawn_native { namespace metal {
if (@available(iOS 8.0, *)) {
supportedVersion = YES;
// iOS only has a single device so MTLCopyAllDevices doesn't exist there.
std::unique_ptr<Adapter> adapter = std::make_unique<Adapter>(GetInstance(), MTLCreateSystemDefaultDevice());
std::unique_ptr<Adapter> adapter =
std::make_unique<Adapter>(GetInstance(), MTLCreateSystemDefaultDevice());
if (!GetInstance()->ConsumedError(adapter->Initialize())) {
adapters.push_back(std::move(adapter));
}

View File

@ -39,6 +39,8 @@ namespace dawn_native { namespace metal {
void EnsureDataInitializedAsDestination(CommandRecordingContext* commandContext,
const CopyTextureToBufferCmd* copy);
static uint64_t QueryMaxBufferLength(id<MTLDevice> mtlDevice);
private:
using BufferBase::BufferBase;
MaybeError Initialize(bool mappedAtCreation);

View File

@ -26,10 +26,6 @@ namespace dawn_native { namespace metal {
// largest alignment of supported data types
static constexpr uint32_t kMinUniformOrStorageBufferAlignment = 16u;
// The maximum buffer size if querying the maximum buffer size or recommended working set size
// is not available. This is a somewhat arbitrary limit of 1 GiB.
static constexpr uint32_t kMaxBufferSizeFallback = 1024u * 1024u * 1024u;
// static
ResultOrError<Ref<Buffer>> Buffer::Create(Device* device, const BufferDescriptor* descriptor) {
Ref<Buffer> buffer = AcquireRef(new Buffer(device, descriptor));
@ -37,6 +33,32 @@ namespace dawn_native { namespace metal {
return std::move(buffer);
}
// static
uint64_t Buffer::QueryMaxBufferLength(id<MTLDevice> mtlDevice) {
if (@available(iOS 12, tvOS 12, macOS 10.14, *)) {
return [mtlDevice maxBufferLength];
}
// Earlier versions of Metal had maximums defined in the Metal feature set tables
// https://metalbyexample.com/wp-content/uploads/Metal-Feature-Set-Tables-2018.pdf
#if defined(DAWN_PLATFORM_MACOS)
// 10.12 and 10.13 have a 1Gb limit.
if (@available(macOS 10.12, *)) {
// |maxBufferLength| isn't always available on older systems. If available, use
// |recommendedMaxWorkingSetSize| instead. We can probably allocate more than this,
// but don't have a way to discover a better limit. MoltenVK also uses this heuristic.
return 1024 * 1024 * 1024;
}
// 10.11 has a 256Mb limit
if (@available(maxOS 10.11, *)) {
return 256 * 1024 * 1024;
}
#else
// macOS / tvOS: 256Mb limit in versions without [MTLDevice maxBufferLength]
return 256 * 1024 * 1024;
#endif
}
MaybeError Buffer::Initialize(bool mappedAtCreation) {
MTLResourceOptions storageMode;
if (GetUsage() & kMappableBufferUsages) {
@ -80,23 +102,8 @@ namespace dawn_native { namespace metal {
}
currentSize = Align(currentSize, alignment);
if (@available(iOS 12, macOS 10.14, *)) {
NSUInteger maxBufferSize = [ToBackend(GetDevice())->GetMTLDevice() maxBufferLength];
if (currentSize > maxBufferSize) {
return DAWN_OUT_OF_MEMORY_ERROR("Buffer allocation is too large");
}
#if defined(DAWN_PLATFORM_MACOS)
} else if (@available(macOS 10.12, *)) {
// |maxBufferLength| isn't always available on older systems. If available, use
// |recommendedMaxWorkingSetSize| instead. We can probably allocate more than this,
// but don't have a way to discover a better limit. MoltenVK also uses this heuristic.
uint64_t maxWorkingSetSize =
[ToBackend(GetDevice())->GetMTLDevice() recommendedMaxWorkingSetSize];
if (currentSize > maxWorkingSetSize) {
return DAWN_OUT_OF_MEMORY_ERROR("Buffer allocation is too large");
}
#endif
} else if (currentSize > kMaxBufferSizeFallback) {
uint64_t maxBufferSize = QueryMaxBufferLength(ToBackend(GetDevice())->GetMTLDevice());
if (currentSize > maxBufferSize) {
return DAWN_OUT_OF_MEMORY_ERROR("Buffer allocation is too large");
}