Populate some Metal limits up from the backend
Bug: dawn:685 Change-Id: Idd36423e0f1f65c46ff835bfe90632b68505050c Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/64983 Reviewed-by: Corentin Wallez <cwallez@chromium.org> Commit-Queue: Austin Eng <enga@chromium.org>
This commit is contained in:
parent
167d299916
commit
f45478d65e
|
@ -21,6 +21,7 @@
|
|||
#include "common/SystemUtils.h"
|
||||
#include "dawn_native/Instance.h"
|
||||
#include "dawn_native/MetalBackend.h"
|
||||
#include "dawn_native/metal/BufferMTL.h"
|
||||
#include "dawn_native/metal/DeviceMTL.h"
|
||||
|
||||
#if defined(DAWN_PLATFORM_MACOS)
|
||||
|
@ -321,8 +322,211 @@ namespace dawn_native { namespace metal {
|
|||
return {};
|
||||
}
|
||||
|
||||
enum class MTLGPUFamily {
|
||||
Apple1,
|
||||
Apple2,
|
||||
Apple3,
|
||||
Apple4,
|
||||
Apple5,
|
||||
Apple6,
|
||||
Apple7,
|
||||
Mac1,
|
||||
Mac2,
|
||||
};
|
||||
|
||||
ResultOrError<MTLGPUFamily> GetMTLGPUFamily() const {
|
||||
// https://developer.apple.com/documentation/metal/mtldevice/detecting_gpu_features_and_metal_software_versions?language=objc
|
||||
|
||||
if (@available(macOS 10.15, iOS 10.13, *)) {
|
||||
if ([*mDevice supportsFamily:MTLGPUFamilyMac2]) {
|
||||
return MTLGPUFamily::Mac2;
|
||||
}
|
||||
if ([*mDevice supportsFamily:MTLGPUFamilyMac1]) {
|
||||
return MTLGPUFamily::Mac1;
|
||||
}
|
||||
if ([*mDevice supportsFamily:MTLGPUFamilyApple7]) {
|
||||
return MTLGPUFamily::Apple7;
|
||||
}
|
||||
if ([*mDevice supportsFamily:MTLGPUFamilyApple6]) {
|
||||
return MTLGPUFamily::Apple6;
|
||||
}
|
||||
if ([*mDevice supportsFamily:MTLGPUFamilyApple5]) {
|
||||
return MTLGPUFamily::Apple5;
|
||||
}
|
||||
if ([*mDevice supportsFamily:MTLGPUFamilyApple4]) {
|
||||
return MTLGPUFamily::Apple4;
|
||||
}
|
||||
if ([*mDevice supportsFamily:MTLGPUFamilyApple3]) {
|
||||
return MTLGPUFamily::Apple3;
|
||||
}
|
||||
if ([*mDevice supportsFamily:MTLGPUFamilyApple2]) {
|
||||
return MTLGPUFamily::Apple2;
|
||||
}
|
||||
if ([*mDevice supportsFamily:MTLGPUFamilyApple1]) {
|
||||
return MTLGPUFamily::Apple1;
|
||||
}
|
||||
}
|
||||
|
||||
#if TARGET_OS_OSX
|
||||
if (@available(macOS 10.14, *)) {
|
||||
if ([*mDevice supportsFeatureSet:MTLFeatureSet_macOS_GPUFamily2_v1]) {
|
||||
return MTLGPUFamily::Mac2;
|
||||
}
|
||||
}
|
||||
if (@available(macOS 10.11, *)) {
|
||||
if ([*mDevice supportsFeatureSet:MTLFeatureSet_macOS_GPUFamily1_v1]) {
|
||||
return MTLGPUFamily::Mac1;
|
||||
}
|
||||
}
|
||||
#elif TARGET_OS_IOS
|
||||
if (@available(iOS 10.11, *)) {
|
||||
if ([*mDevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily4_v1]) {
|
||||
return MTLGPUFamily::Apple4;
|
||||
}
|
||||
}
|
||||
if (@available(iOS 9.0, *)) {
|
||||
if ([*mDevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily3_v1]) {
|
||||
return MTLGPUFamily::Apple3;
|
||||
}
|
||||
}
|
||||
if (@available(iOS 8.0, *)) {
|
||||
if ([*mDevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily2_v1]) {
|
||||
return MTLGPUFamily::Apple2;
|
||||
}
|
||||
}
|
||||
if (@available(iOS 8.0, *)) {
|
||||
if ([*mDevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily1_v1]) {
|
||||
return MTLGPUFamily::Apple1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return DAWN_INTERNAL_ERROR("Unsupported Metal device");
|
||||
}
|
||||
|
||||
MaybeError InitializeSupportedLimitsImpl(CombinedLimits* limits) override {
|
||||
struct MTLDeviceLimits {
|
||||
uint32_t maxVertexAttribsPerDescriptor;
|
||||
uint32_t maxBufferArgumentEntriesPerFunc;
|
||||
uint32_t maxTextureArgumentEntriesPerFunc;
|
||||
uint32_t maxSamplerStateArgumentEntriesPerFunc;
|
||||
uint32_t maxThreadsPerThreadgroup;
|
||||
uint32_t maxTotalThreadgroupMemory;
|
||||
uint32_t maxFragmentInputComponents;
|
||||
uint32_t max1DTextureSize;
|
||||
uint32_t max2DTextureSize;
|
||||
uint32_t max3DTextureSize;
|
||||
uint32_t maxTextureArrayLayers;
|
||||
uint32_t minBufferOffsetAlignment;
|
||||
};
|
||||
|
||||
struct LimitsForFamily {
|
||||
uint32_t MTLDeviceLimits::*limit;
|
||||
ityp::array<MTLGPUFamily, uint32_t, 9> values;
|
||||
};
|
||||
|
||||
// clang-format off
|
||||
// https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
|
||||
// Apple Mac
|
||||
// 1, 2, 3, 4, 5, 6, 7, 1, 2
|
||||
constexpr LimitsForFamily kMTLLimits[12] = {
|
||||
{&MTLDeviceLimits::maxVertexAttribsPerDescriptor, { 31u, 31u, 31u, 31u, 31u, 31u, 31u, 31u, 31u }},
|
||||
{&MTLDeviceLimits::maxBufferArgumentEntriesPerFunc, { 31u, 31u, 31u, 31u, 31u, 31u, 31u, 31u, 31u }},
|
||||
{&MTLDeviceLimits::maxTextureArgumentEntriesPerFunc, { 31u, 31u, 31u, 96u, 96u, 128u, 128u, 128u, 128u }},
|
||||
{&MTLDeviceLimits::maxSamplerStateArgumentEntriesPerFunc, { 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u }},
|
||||
{&MTLDeviceLimits::maxThreadsPerThreadgroup, { 512u, 512u, 512u, 1024u, 1024u, 1024u, 1024u, 1024u, 1024u }},
|
||||
{&MTLDeviceLimits::maxTotalThreadgroupMemory, { 16352u, 16352u, 16384u, 32768u, 32768u, 32768u, 32768u, 32768u, 32768u }},
|
||||
{&MTLDeviceLimits::maxFragmentInputComponents, { 60u, 60u, 60u, 124u, 124u, 124u, 124u, 124u, 124u }},
|
||||
{&MTLDeviceLimits::max1DTextureSize, { 8192u, 8192u, 16384u, 16384u, 16384u, 16384u, 16384u, 16384u, 16384u }},
|
||||
{&MTLDeviceLimits::max2DTextureSize, { 8192u, 8192u, 16384u, 16384u, 16384u, 16384u, 16384u, 16384u, 16384u }},
|
||||
{&MTLDeviceLimits::max3DTextureSize, { 2048u, 2048u, 2048u, 2048u, 2048u, 2048u, 2048u, 2048u, 2048u }},
|
||||
{&MTLDeviceLimits::maxTextureArrayLayers, { 2048u, 2048u, 2048u, 2048u, 2048u, 2048u, 2048u, 2048u, 2048u }},
|
||||
{&MTLDeviceLimits::minBufferOffsetAlignment, { 4u, 4u, 4u, 4u, 4u, 4u, 4u, 256u, 256u }},
|
||||
};
|
||||
// clang-format on
|
||||
|
||||
MTLGPUFamily mtlGPUFamily;
|
||||
DAWN_TRY_ASSIGN(mtlGPUFamily, GetMTLGPUFamily());
|
||||
|
||||
MTLDeviceLimits mtlLimits;
|
||||
for (const auto& limitsForFamily : kMTLLimits) {
|
||||
mtlLimits.*limitsForFamily.limit = limitsForFamily.values[mtlGPUFamily];
|
||||
}
|
||||
|
||||
GetDefaultLimits(&limits->v1);
|
||||
|
||||
limits->v1.maxTextureDimension1D = mtlLimits.max1DTextureSize;
|
||||
limits->v1.maxTextureDimension2D = mtlLimits.max2DTextureSize;
|
||||
limits->v1.maxTextureDimension3D = mtlLimits.max3DTextureSize;
|
||||
limits->v1.maxTextureArrayLayers = mtlLimits.maxTextureArrayLayers;
|
||||
|
||||
uint32_t maxBuffersPerStage = mtlLimits.maxBufferArgumentEntriesPerFunc;
|
||||
maxBuffersPerStage -= 1; // One slot is reserved to store buffer lengths.
|
||||
|
||||
uint32_t baseMaxBuffersPerStage = limits->v1.maxStorageBuffersPerShaderStage +
|
||||
limits->v1.maxUniformBuffersPerShaderStage +
|
||||
limits->v1.maxVertexBuffers;
|
||||
|
||||
ASSERT(maxBuffersPerStage >= baseMaxBuffersPerStage);
|
||||
{
|
||||
uint32_t additional = maxBuffersPerStage - baseMaxBuffersPerStage;
|
||||
limits->v1.maxStorageBuffersPerShaderStage += additional / 3;
|
||||
limits->v1.maxUniformBuffersPerShaderStage += additional / 3;
|
||||
limits->v1.maxVertexBuffers += (additional - 2 * (additional / 3));
|
||||
}
|
||||
|
||||
uint32_t baseMaxTexturesPerStage = limits->v1.maxSampledTexturesPerShaderStage +
|
||||
limits->v1.maxStorageTexturesPerShaderStage;
|
||||
|
||||
ASSERT(mtlLimits.maxTextureArgumentEntriesPerFunc >= baseMaxTexturesPerStage);
|
||||
{
|
||||
uint32_t additional =
|
||||
mtlLimits.maxTextureArgumentEntriesPerFunc - baseMaxTexturesPerStage;
|
||||
limits->v1.maxSampledTexturesPerShaderStage += additional / 2;
|
||||
limits->v1.maxStorageTexturesPerShaderStage += (additional - additional / 2);
|
||||
}
|
||||
|
||||
limits->v1.maxSamplersPerShaderStage = mtlLimits.maxSamplerStateArgumentEntriesPerFunc;
|
||||
|
||||
// Metal limits are per-function, so the layout limits are the same as the stage
|
||||
// limits. Note: this should likely change if the implementation uses Metal argument
|
||||
// buffers. Non-dynamic buffers will probably be bound argument buffers, but dynamic
|
||||
// buffers may be set directly.
|
||||
// Mac GPU families with tier 1 argument buffers support 64
|
||||
// buffers, 128 textures, and 16 samplers. Mac GPU families
|
||||
// with tier 2 argument buffers support 500000 buffers and
|
||||
// textures, and 1024 unique samplers
|
||||
limits->v1.maxDynamicUniformBuffersPerPipelineLayout =
|
||||
limits->v1.maxUniformBuffersPerShaderStage;
|
||||
limits->v1.maxDynamicStorageBuffersPerPipelineLayout =
|
||||
limits->v1.maxStorageBuffersPerShaderStage;
|
||||
|
||||
// The WebGPU limit is the limit across all vertex buffers, combined.
|
||||
limits->v1.maxVertexAttributes =
|
||||
limits->v1.maxVertexBuffers * mtlLimits.maxVertexAttribsPerDescriptor;
|
||||
|
||||
limits->v1.maxInterStageShaderComponents = mtlLimits.maxFragmentInputComponents;
|
||||
|
||||
limits->v1.maxComputeWorkgroupStorageSize = mtlLimits.maxTotalThreadgroupMemory;
|
||||
limits->v1.maxComputeInvocationsPerWorkgroup = mtlLimits.maxThreadsPerThreadgroup;
|
||||
limits->v1.maxComputeWorkgroupSizeX = mtlLimits.maxThreadsPerThreadgroup;
|
||||
limits->v1.maxComputeWorkgroupSizeY = mtlLimits.maxThreadsPerThreadgroup;
|
||||
limits->v1.maxComputeWorkgroupSizeZ = mtlLimits.maxThreadsPerThreadgroup;
|
||||
|
||||
limits->v1.minUniformBufferOffsetAlignment = mtlLimits.minBufferOffsetAlignment;
|
||||
limits->v1.minStorageBufferOffsetAlignment = mtlLimits.minBufferOffsetAlignment;
|
||||
|
||||
uint64_t maxBufferSize = Buffer::QueryMaxBufferLength(*mDevice);
|
||||
|
||||
// Metal has no documented limit on the size of a binding. Use the maximum
|
||||
// buffer size.
|
||||
limits->v1.maxUniformBufferBindingSize = maxBufferSize;
|
||||
limits->v1.maxStorageBufferBindingSize = maxBufferSize;
|
||||
|
||||
// TODO(crbug.com/dawn/685):
|
||||
// LIMITS NOT SET:
|
||||
// - maxBindGroups
|
||||
// - maxVertexBufferArrayStride
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
|
@ -360,7 +564,8 @@ namespace dawn_native { namespace metal {
|
|||
if (@available(iOS 8.0, *)) {
|
||||
supportedVersion = YES;
|
||||
// iOS only has a single device so MTLCopyAllDevices doesn't exist there.
|
||||
std::unique_ptr<Adapter> adapter = std::make_unique<Adapter>(GetInstance(), MTLCreateSystemDefaultDevice());
|
||||
std::unique_ptr<Adapter> adapter =
|
||||
std::make_unique<Adapter>(GetInstance(), MTLCreateSystemDefaultDevice());
|
||||
if (!GetInstance()->ConsumedError(adapter->Initialize())) {
|
||||
adapters.push_back(std::move(adapter));
|
||||
}
|
||||
|
|
|
@ -39,6 +39,8 @@ namespace dawn_native { namespace metal {
|
|||
void EnsureDataInitializedAsDestination(CommandRecordingContext* commandContext,
|
||||
const CopyTextureToBufferCmd* copy);
|
||||
|
||||
static uint64_t QueryMaxBufferLength(id<MTLDevice> mtlDevice);
|
||||
|
||||
private:
|
||||
using BufferBase::BufferBase;
|
||||
MaybeError Initialize(bool mappedAtCreation);
|
||||
|
|
|
@ -26,10 +26,6 @@ namespace dawn_native { namespace metal {
|
|||
// largest alignment of supported data types
|
||||
static constexpr uint32_t kMinUniformOrStorageBufferAlignment = 16u;
|
||||
|
||||
// The maximum buffer size if querying the maximum buffer size or recommended working set size
|
||||
// is not available. This is a somewhat arbitrary limit of 1 GiB.
|
||||
static constexpr uint32_t kMaxBufferSizeFallback = 1024u * 1024u * 1024u;
|
||||
|
||||
// static
|
||||
ResultOrError<Ref<Buffer>> Buffer::Create(Device* device, const BufferDescriptor* descriptor) {
|
||||
Ref<Buffer> buffer = AcquireRef(new Buffer(device, descriptor));
|
||||
|
@ -37,6 +33,32 @@ namespace dawn_native { namespace metal {
|
|||
return std::move(buffer);
|
||||
}
|
||||
|
||||
// static
|
||||
uint64_t Buffer::QueryMaxBufferLength(id<MTLDevice> mtlDevice) {
|
||||
if (@available(iOS 12, tvOS 12, macOS 10.14, *)) {
|
||||
return [mtlDevice maxBufferLength];
|
||||
}
|
||||
|
||||
// Earlier versions of Metal had maximums defined in the Metal feature set tables
|
||||
// https://metalbyexample.com/wp-content/uploads/Metal-Feature-Set-Tables-2018.pdf
|
||||
#if defined(DAWN_PLATFORM_MACOS)
|
||||
// 10.12 and 10.13 have a 1Gb limit.
|
||||
if (@available(macOS 10.12, *)) {
|
||||
// |maxBufferLength| isn't always available on older systems. If available, use
|
||||
// |recommendedMaxWorkingSetSize| instead. We can probably allocate more than this,
|
||||
// but don't have a way to discover a better limit. MoltenVK also uses this heuristic.
|
||||
return 1024 * 1024 * 1024;
|
||||
}
|
||||
// 10.11 has a 256Mb limit
|
||||
if (@available(maxOS 10.11, *)) {
|
||||
return 256 * 1024 * 1024;
|
||||
}
|
||||
#else
|
||||
// macOS / tvOS: 256Mb limit in versions without [MTLDevice maxBufferLength]
|
||||
return 256 * 1024 * 1024;
|
||||
#endif
|
||||
}
|
||||
|
||||
MaybeError Buffer::Initialize(bool mappedAtCreation) {
|
||||
MTLResourceOptions storageMode;
|
||||
if (GetUsage() & kMappableBufferUsages) {
|
||||
|
@ -80,25 +102,10 @@ namespace dawn_native { namespace metal {
|
|||
}
|
||||
currentSize = Align(currentSize, alignment);
|
||||
|
||||
if (@available(iOS 12, macOS 10.14, *)) {
|
||||
NSUInteger maxBufferSize = [ToBackend(GetDevice())->GetMTLDevice() maxBufferLength];
|
||||
uint64_t maxBufferSize = QueryMaxBufferLength(ToBackend(GetDevice())->GetMTLDevice());
|
||||
if (currentSize > maxBufferSize) {
|
||||
return DAWN_OUT_OF_MEMORY_ERROR("Buffer allocation is too large");
|
||||
}
|
||||
#if defined(DAWN_PLATFORM_MACOS)
|
||||
} else if (@available(macOS 10.12, *)) {
|
||||
// |maxBufferLength| isn't always available on older systems. If available, use
|
||||
// |recommendedMaxWorkingSetSize| instead. We can probably allocate more than this,
|
||||
// but don't have a way to discover a better limit. MoltenVK also uses this heuristic.
|
||||
uint64_t maxWorkingSetSize =
|
||||
[ToBackend(GetDevice())->GetMTLDevice() recommendedMaxWorkingSetSize];
|
||||
if (currentSize > maxWorkingSetSize) {
|
||||
return DAWN_OUT_OF_MEMORY_ERROR("Buffer allocation is too large");
|
||||
}
|
||||
#endif
|
||||
} else if (currentSize > kMaxBufferSizeFallback) {
|
||||
return DAWN_OUT_OF_MEMORY_ERROR("Buffer allocation is too large");
|
||||
}
|
||||
|
||||
mAllocatedSize = currentSize;
|
||||
mMtlBuffer.Acquire([ToBackend(GetDevice())->GetMTLDevice()
|
||||
|
|
Loading…
Reference in New Issue