Populate some Metal limits up from the backend
Bug: dawn:685 Change-Id: Idd36423e0f1f65c46ff835bfe90632b68505050c Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/64983 Reviewed-by: Corentin Wallez <cwallez@chromium.org> Commit-Queue: Austin Eng <enga@chromium.org>
This commit is contained in:
parent
167d299916
commit
f45478d65e
|
@ -21,6 +21,7 @@
|
||||||
#include "common/SystemUtils.h"
|
#include "common/SystemUtils.h"
|
||||||
#include "dawn_native/Instance.h"
|
#include "dawn_native/Instance.h"
|
||||||
#include "dawn_native/MetalBackend.h"
|
#include "dawn_native/MetalBackend.h"
|
||||||
|
#include "dawn_native/metal/BufferMTL.h"
|
||||||
#include "dawn_native/metal/DeviceMTL.h"
|
#include "dawn_native/metal/DeviceMTL.h"
|
||||||
|
|
||||||
#if defined(DAWN_PLATFORM_MACOS)
|
#if defined(DAWN_PLATFORM_MACOS)
|
||||||
|
@ -321,8 +322,211 @@ namespace dawn_native { namespace metal {
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
enum class MTLGPUFamily {
|
||||||
|
Apple1,
|
||||||
|
Apple2,
|
||||||
|
Apple3,
|
||||||
|
Apple4,
|
||||||
|
Apple5,
|
||||||
|
Apple6,
|
||||||
|
Apple7,
|
||||||
|
Mac1,
|
||||||
|
Mac2,
|
||||||
|
};
|
||||||
|
|
||||||
|
ResultOrError<MTLGPUFamily> GetMTLGPUFamily() const {
|
||||||
|
// https://developer.apple.com/documentation/metal/mtldevice/detecting_gpu_features_and_metal_software_versions?language=objc
|
||||||
|
|
||||||
|
if (@available(macOS 10.15, iOS 10.13, *)) {
|
||||||
|
if ([*mDevice supportsFamily:MTLGPUFamilyMac2]) {
|
||||||
|
return MTLGPUFamily::Mac2;
|
||||||
|
}
|
||||||
|
if ([*mDevice supportsFamily:MTLGPUFamilyMac1]) {
|
||||||
|
return MTLGPUFamily::Mac1;
|
||||||
|
}
|
||||||
|
if ([*mDevice supportsFamily:MTLGPUFamilyApple7]) {
|
||||||
|
return MTLGPUFamily::Apple7;
|
||||||
|
}
|
||||||
|
if ([*mDevice supportsFamily:MTLGPUFamilyApple6]) {
|
||||||
|
return MTLGPUFamily::Apple6;
|
||||||
|
}
|
||||||
|
if ([*mDevice supportsFamily:MTLGPUFamilyApple5]) {
|
||||||
|
return MTLGPUFamily::Apple5;
|
||||||
|
}
|
||||||
|
if ([*mDevice supportsFamily:MTLGPUFamilyApple4]) {
|
||||||
|
return MTLGPUFamily::Apple4;
|
||||||
|
}
|
||||||
|
if ([*mDevice supportsFamily:MTLGPUFamilyApple3]) {
|
||||||
|
return MTLGPUFamily::Apple3;
|
||||||
|
}
|
||||||
|
if ([*mDevice supportsFamily:MTLGPUFamilyApple2]) {
|
||||||
|
return MTLGPUFamily::Apple2;
|
||||||
|
}
|
||||||
|
if ([*mDevice supportsFamily:MTLGPUFamilyApple1]) {
|
||||||
|
return MTLGPUFamily::Apple1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#if TARGET_OS_OSX
|
||||||
|
if (@available(macOS 10.14, *)) {
|
||||||
|
if ([*mDevice supportsFeatureSet:MTLFeatureSet_macOS_GPUFamily2_v1]) {
|
||||||
|
return MTLGPUFamily::Mac2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (@available(macOS 10.11, *)) {
|
||||||
|
if ([*mDevice supportsFeatureSet:MTLFeatureSet_macOS_GPUFamily1_v1]) {
|
||||||
|
return MTLGPUFamily::Mac1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#elif TARGET_OS_IOS
|
||||||
|
if (@available(iOS 10.11, *)) {
|
||||||
|
if ([*mDevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily4_v1]) {
|
||||||
|
return MTLGPUFamily::Apple4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (@available(iOS 9.0, *)) {
|
||||||
|
if ([*mDevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily3_v1]) {
|
||||||
|
return MTLGPUFamily::Apple3;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (@available(iOS 8.0, *)) {
|
||||||
|
if ([*mDevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily2_v1]) {
|
||||||
|
return MTLGPUFamily::Apple2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (@available(iOS 8.0, *)) {
|
||||||
|
if ([*mDevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily1_v1]) {
|
||||||
|
return MTLGPUFamily::Apple1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return DAWN_INTERNAL_ERROR("Unsupported Metal device");
|
||||||
|
}
|
||||||
|
|
||||||
MaybeError InitializeSupportedLimitsImpl(CombinedLimits* limits) override {
|
MaybeError InitializeSupportedLimitsImpl(CombinedLimits* limits) override {
|
||||||
|
struct MTLDeviceLimits {
|
||||||
|
uint32_t maxVertexAttribsPerDescriptor;
|
||||||
|
uint32_t maxBufferArgumentEntriesPerFunc;
|
||||||
|
uint32_t maxTextureArgumentEntriesPerFunc;
|
||||||
|
uint32_t maxSamplerStateArgumentEntriesPerFunc;
|
||||||
|
uint32_t maxThreadsPerThreadgroup;
|
||||||
|
uint32_t maxTotalThreadgroupMemory;
|
||||||
|
uint32_t maxFragmentInputComponents;
|
||||||
|
uint32_t max1DTextureSize;
|
||||||
|
uint32_t max2DTextureSize;
|
||||||
|
uint32_t max3DTextureSize;
|
||||||
|
uint32_t maxTextureArrayLayers;
|
||||||
|
uint32_t minBufferOffsetAlignment;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct LimitsForFamily {
|
||||||
|
uint32_t MTLDeviceLimits::*limit;
|
||||||
|
ityp::array<MTLGPUFamily, uint32_t, 9> values;
|
||||||
|
};
|
||||||
|
|
||||||
|
// clang-format off
|
||||||
|
// https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
|
||||||
|
// Apple Mac
|
||||||
|
// 1, 2, 3, 4, 5, 6, 7, 1, 2
|
||||||
|
constexpr LimitsForFamily kMTLLimits[12] = {
|
||||||
|
{&MTLDeviceLimits::maxVertexAttribsPerDescriptor, { 31u, 31u, 31u, 31u, 31u, 31u, 31u, 31u, 31u }},
|
||||||
|
{&MTLDeviceLimits::maxBufferArgumentEntriesPerFunc, { 31u, 31u, 31u, 31u, 31u, 31u, 31u, 31u, 31u }},
|
||||||
|
{&MTLDeviceLimits::maxTextureArgumentEntriesPerFunc, { 31u, 31u, 31u, 96u, 96u, 128u, 128u, 128u, 128u }},
|
||||||
|
{&MTLDeviceLimits::maxSamplerStateArgumentEntriesPerFunc, { 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u }},
|
||||||
|
{&MTLDeviceLimits::maxThreadsPerThreadgroup, { 512u, 512u, 512u, 1024u, 1024u, 1024u, 1024u, 1024u, 1024u }},
|
||||||
|
{&MTLDeviceLimits::maxTotalThreadgroupMemory, { 16352u, 16352u, 16384u, 32768u, 32768u, 32768u, 32768u, 32768u, 32768u }},
|
||||||
|
{&MTLDeviceLimits::maxFragmentInputComponents, { 60u, 60u, 60u, 124u, 124u, 124u, 124u, 124u, 124u }},
|
||||||
|
{&MTLDeviceLimits::max1DTextureSize, { 8192u, 8192u, 16384u, 16384u, 16384u, 16384u, 16384u, 16384u, 16384u }},
|
||||||
|
{&MTLDeviceLimits::max2DTextureSize, { 8192u, 8192u, 16384u, 16384u, 16384u, 16384u, 16384u, 16384u, 16384u }},
|
||||||
|
{&MTLDeviceLimits::max3DTextureSize, { 2048u, 2048u, 2048u, 2048u, 2048u, 2048u, 2048u, 2048u, 2048u }},
|
||||||
|
{&MTLDeviceLimits::maxTextureArrayLayers, { 2048u, 2048u, 2048u, 2048u, 2048u, 2048u, 2048u, 2048u, 2048u }},
|
||||||
|
{&MTLDeviceLimits::minBufferOffsetAlignment, { 4u, 4u, 4u, 4u, 4u, 4u, 4u, 256u, 256u }},
|
||||||
|
};
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
|
MTLGPUFamily mtlGPUFamily;
|
||||||
|
DAWN_TRY_ASSIGN(mtlGPUFamily, GetMTLGPUFamily());
|
||||||
|
|
||||||
|
MTLDeviceLimits mtlLimits;
|
||||||
|
for (const auto& limitsForFamily : kMTLLimits) {
|
||||||
|
mtlLimits.*limitsForFamily.limit = limitsForFamily.values[mtlGPUFamily];
|
||||||
|
}
|
||||||
|
|
||||||
GetDefaultLimits(&limits->v1);
|
GetDefaultLimits(&limits->v1);
|
||||||
|
|
||||||
|
limits->v1.maxTextureDimension1D = mtlLimits.max1DTextureSize;
|
||||||
|
limits->v1.maxTextureDimension2D = mtlLimits.max2DTextureSize;
|
||||||
|
limits->v1.maxTextureDimension3D = mtlLimits.max3DTextureSize;
|
||||||
|
limits->v1.maxTextureArrayLayers = mtlLimits.maxTextureArrayLayers;
|
||||||
|
|
||||||
|
uint32_t maxBuffersPerStage = mtlLimits.maxBufferArgumentEntriesPerFunc;
|
||||||
|
maxBuffersPerStage -= 1; // One slot is reserved to store buffer lengths.
|
||||||
|
|
||||||
|
uint32_t baseMaxBuffersPerStage = limits->v1.maxStorageBuffersPerShaderStage +
|
||||||
|
limits->v1.maxUniformBuffersPerShaderStage +
|
||||||
|
limits->v1.maxVertexBuffers;
|
||||||
|
|
||||||
|
ASSERT(maxBuffersPerStage >= baseMaxBuffersPerStage);
|
||||||
|
{
|
||||||
|
uint32_t additional = maxBuffersPerStage - baseMaxBuffersPerStage;
|
||||||
|
limits->v1.maxStorageBuffersPerShaderStage += additional / 3;
|
||||||
|
limits->v1.maxUniformBuffersPerShaderStage += additional / 3;
|
||||||
|
limits->v1.maxVertexBuffers += (additional - 2 * (additional / 3));
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t baseMaxTexturesPerStage = limits->v1.maxSampledTexturesPerShaderStage +
|
||||||
|
limits->v1.maxStorageTexturesPerShaderStage;
|
||||||
|
|
||||||
|
ASSERT(mtlLimits.maxTextureArgumentEntriesPerFunc >= baseMaxTexturesPerStage);
|
||||||
|
{
|
||||||
|
uint32_t additional =
|
||||||
|
mtlLimits.maxTextureArgumentEntriesPerFunc - baseMaxTexturesPerStage;
|
||||||
|
limits->v1.maxSampledTexturesPerShaderStage += additional / 2;
|
||||||
|
limits->v1.maxStorageTexturesPerShaderStage += (additional - additional / 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
limits->v1.maxSamplersPerShaderStage = mtlLimits.maxSamplerStateArgumentEntriesPerFunc;
|
||||||
|
|
||||||
|
// Metal limits are per-function, so the layout limits are the same as the stage
|
||||||
|
// limits. Note: this should likely change if the implementation uses Metal argument
|
||||||
|
// buffers. Non-dynamic buffers will probably be bound argument buffers, but dynamic
|
||||||
|
// buffers may be set directly.
|
||||||
|
// Mac GPU families with tier 1 argument buffers support 64
|
||||||
|
// buffers, 128 textures, and 16 samplers. Mac GPU families
|
||||||
|
// with tier 2 argument buffers support 500000 buffers and
|
||||||
|
// textures, and 1024 unique samplers
|
||||||
|
limits->v1.maxDynamicUniformBuffersPerPipelineLayout =
|
||||||
|
limits->v1.maxUniformBuffersPerShaderStage;
|
||||||
|
limits->v1.maxDynamicStorageBuffersPerPipelineLayout =
|
||||||
|
limits->v1.maxStorageBuffersPerShaderStage;
|
||||||
|
|
||||||
|
// The WebGPU limit is the limit across all vertex buffers, combined.
|
||||||
|
limits->v1.maxVertexAttributes =
|
||||||
|
limits->v1.maxVertexBuffers * mtlLimits.maxVertexAttribsPerDescriptor;
|
||||||
|
|
||||||
|
limits->v1.maxInterStageShaderComponents = mtlLimits.maxFragmentInputComponents;
|
||||||
|
|
||||||
|
limits->v1.maxComputeWorkgroupStorageSize = mtlLimits.maxTotalThreadgroupMemory;
|
||||||
|
limits->v1.maxComputeInvocationsPerWorkgroup = mtlLimits.maxThreadsPerThreadgroup;
|
||||||
|
limits->v1.maxComputeWorkgroupSizeX = mtlLimits.maxThreadsPerThreadgroup;
|
||||||
|
limits->v1.maxComputeWorkgroupSizeY = mtlLimits.maxThreadsPerThreadgroup;
|
||||||
|
limits->v1.maxComputeWorkgroupSizeZ = mtlLimits.maxThreadsPerThreadgroup;
|
||||||
|
|
||||||
|
limits->v1.minUniformBufferOffsetAlignment = mtlLimits.minBufferOffsetAlignment;
|
||||||
|
limits->v1.minStorageBufferOffsetAlignment = mtlLimits.minBufferOffsetAlignment;
|
||||||
|
|
||||||
|
uint64_t maxBufferSize = Buffer::QueryMaxBufferLength(*mDevice);
|
||||||
|
|
||||||
|
// Metal has no documented limit on the size of a binding. Use the maximum
|
||||||
|
// buffer size.
|
||||||
|
limits->v1.maxUniformBufferBindingSize = maxBufferSize;
|
||||||
|
limits->v1.maxStorageBufferBindingSize = maxBufferSize;
|
||||||
|
|
||||||
|
// TODO(crbug.com/dawn/685):
|
||||||
|
// LIMITS NOT SET:
|
||||||
|
// - maxBindGroups
|
||||||
|
// - maxVertexBufferArrayStride
|
||||||
|
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -360,7 +564,8 @@ namespace dawn_native { namespace metal {
|
||||||
if (@available(iOS 8.0, *)) {
|
if (@available(iOS 8.0, *)) {
|
||||||
supportedVersion = YES;
|
supportedVersion = YES;
|
||||||
// iOS only has a single device so MTLCopyAllDevices doesn't exist there.
|
// iOS only has a single device so MTLCopyAllDevices doesn't exist there.
|
||||||
std::unique_ptr<Adapter> adapter = std::make_unique<Adapter>(GetInstance(), MTLCreateSystemDefaultDevice());
|
std::unique_ptr<Adapter> adapter =
|
||||||
|
std::make_unique<Adapter>(GetInstance(), MTLCreateSystemDefaultDevice());
|
||||||
if (!GetInstance()->ConsumedError(adapter->Initialize())) {
|
if (!GetInstance()->ConsumedError(adapter->Initialize())) {
|
||||||
adapters.push_back(std::move(adapter));
|
adapters.push_back(std::move(adapter));
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,6 +39,8 @@ namespace dawn_native { namespace metal {
|
||||||
void EnsureDataInitializedAsDestination(CommandRecordingContext* commandContext,
|
void EnsureDataInitializedAsDestination(CommandRecordingContext* commandContext,
|
||||||
const CopyTextureToBufferCmd* copy);
|
const CopyTextureToBufferCmd* copy);
|
||||||
|
|
||||||
|
static uint64_t QueryMaxBufferLength(id<MTLDevice> mtlDevice);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
using BufferBase::BufferBase;
|
using BufferBase::BufferBase;
|
||||||
MaybeError Initialize(bool mappedAtCreation);
|
MaybeError Initialize(bool mappedAtCreation);
|
||||||
|
|
|
@ -26,10 +26,6 @@ namespace dawn_native { namespace metal {
|
||||||
// largest alignment of supported data types
|
// largest alignment of supported data types
|
||||||
static constexpr uint32_t kMinUniformOrStorageBufferAlignment = 16u;
|
static constexpr uint32_t kMinUniformOrStorageBufferAlignment = 16u;
|
||||||
|
|
||||||
// The maximum buffer size if querying the maximum buffer size or recommended working set size
|
|
||||||
// is not available. This is a somewhat arbitrary limit of 1 GiB.
|
|
||||||
static constexpr uint32_t kMaxBufferSizeFallback = 1024u * 1024u * 1024u;
|
|
||||||
|
|
||||||
// static
|
// static
|
||||||
ResultOrError<Ref<Buffer>> Buffer::Create(Device* device, const BufferDescriptor* descriptor) {
|
ResultOrError<Ref<Buffer>> Buffer::Create(Device* device, const BufferDescriptor* descriptor) {
|
||||||
Ref<Buffer> buffer = AcquireRef(new Buffer(device, descriptor));
|
Ref<Buffer> buffer = AcquireRef(new Buffer(device, descriptor));
|
||||||
|
@ -37,6 +33,32 @@ namespace dawn_native { namespace metal {
|
||||||
return std::move(buffer);
|
return std::move(buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// static
|
||||||
|
uint64_t Buffer::QueryMaxBufferLength(id<MTLDevice> mtlDevice) {
|
||||||
|
if (@available(iOS 12, tvOS 12, macOS 10.14, *)) {
|
||||||
|
return [mtlDevice maxBufferLength];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Earlier versions of Metal had maximums defined in the Metal feature set tables
|
||||||
|
// https://metalbyexample.com/wp-content/uploads/Metal-Feature-Set-Tables-2018.pdf
|
||||||
|
#if defined(DAWN_PLATFORM_MACOS)
|
||||||
|
// 10.12 and 10.13 have a 1Gb limit.
|
||||||
|
if (@available(macOS 10.12, *)) {
|
||||||
|
// |maxBufferLength| isn't always available on older systems. If available, use
|
||||||
|
// |recommendedMaxWorkingSetSize| instead. We can probably allocate more than this,
|
||||||
|
// but don't have a way to discover a better limit. MoltenVK also uses this heuristic.
|
||||||
|
return 1024 * 1024 * 1024;
|
||||||
|
}
|
||||||
|
// 10.11 has a 256Mb limit
|
||||||
|
if (@available(maxOS 10.11, *)) {
|
||||||
|
return 256 * 1024 * 1024;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
// macOS / tvOS: 256Mb limit in versions without [MTLDevice maxBufferLength]
|
||||||
|
return 256 * 1024 * 1024;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
MaybeError Buffer::Initialize(bool mappedAtCreation) {
|
MaybeError Buffer::Initialize(bool mappedAtCreation) {
|
||||||
MTLResourceOptions storageMode;
|
MTLResourceOptions storageMode;
|
||||||
if (GetUsage() & kMappableBufferUsages) {
|
if (GetUsage() & kMappableBufferUsages) {
|
||||||
|
@ -80,23 +102,8 @@ namespace dawn_native { namespace metal {
|
||||||
}
|
}
|
||||||
currentSize = Align(currentSize, alignment);
|
currentSize = Align(currentSize, alignment);
|
||||||
|
|
||||||
if (@available(iOS 12, macOS 10.14, *)) {
|
uint64_t maxBufferSize = QueryMaxBufferLength(ToBackend(GetDevice())->GetMTLDevice());
|
||||||
NSUInteger maxBufferSize = [ToBackend(GetDevice())->GetMTLDevice() maxBufferLength];
|
if (currentSize > maxBufferSize) {
|
||||||
if (currentSize > maxBufferSize) {
|
|
||||||
return DAWN_OUT_OF_MEMORY_ERROR("Buffer allocation is too large");
|
|
||||||
}
|
|
||||||
#if defined(DAWN_PLATFORM_MACOS)
|
|
||||||
} else if (@available(macOS 10.12, *)) {
|
|
||||||
// |maxBufferLength| isn't always available on older systems. If available, use
|
|
||||||
// |recommendedMaxWorkingSetSize| instead. We can probably allocate more than this,
|
|
||||||
// but don't have a way to discover a better limit. MoltenVK also uses this heuristic.
|
|
||||||
uint64_t maxWorkingSetSize =
|
|
||||||
[ToBackend(GetDevice())->GetMTLDevice() recommendedMaxWorkingSetSize];
|
|
||||||
if (currentSize > maxWorkingSetSize) {
|
|
||||||
return DAWN_OUT_OF_MEMORY_ERROR("Buffer allocation is too large");
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
} else if (currentSize > kMaxBufferSizeFallback) {
|
|
||||||
return DAWN_OUT_OF_MEMORY_ERROR("Buffer allocation is too large");
|
return DAWN_OUT_OF_MEMORY_ERROR("Buffer allocation is too large");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue