dawn-cmake/src/dawn/native/metal/BackendMTL.mm

// Copyright 2019 The Dawn Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "dawn/native/metal/BackendMTL.h"

#include "dawn/common/CoreFoundationRef.h"
#include "dawn/common/GPUInfo.h"
#include "dawn/common/Log.h"
#include "dawn/common/NSRef.h"
#include "dawn/common/Platform.h"
#include "dawn/common/SystemUtils.h"
#include "dawn/native/Instance.h"
#include "dawn/native/MetalBackend.h"
#include "dawn/native/metal/BufferMTL.h"
#include "dawn/native/metal/DeviceMTL.h"
#include "dawn/native/metal/UtilsMetal.h"

#if DAWN_PLATFORM_IS(MACOS)
#import <IOKit/IOKitLib.h>
#include "dawn/common/IOKitRef.h"
#endif

#include <vector>

namespace dawn::native::metal {

namespace {

struct PCIIDs {
    uint32_t vendorId;
    uint32_t deviceId;
};

struct Vendor {
    const char* trademark;
    uint32_t vendorId;
};

#if DAWN_PLATFORM_IS(MACOS)
const Vendor kVendors[] = {
    {"AMD", gpu_info::kVendorID_AMD},        {"Apple", gpu_info::kVendorID_Apple},
    {"Radeon", gpu_info::kVendorID_AMD},     {"Intel", gpu_info::kVendorID_Intel},
    {"Geforce", gpu_info::kVendorID_Nvidia}, {"Quadro", gpu_info::kVendorID_Nvidia}};

// Find vendor ID from MTLDevice name.
MaybeError GetVendorIdFromVendors(id<MTLDevice> device, PCIIDs* ids) {
    uint32_t vendorId = 0;
    const char* deviceName = [device.name UTF8String];
    for (const auto& it : kVendors) {
        if (strstr(deviceName, it.trademark) != nullptr) {
            vendorId = it.vendorId;
            break;
        }
    }

    if (vendorId == 0) {
        return DAWN_INTERNAL_ERROR("Failed to find vendor id with the device");
    }

    // Set vendor id with 0
    *ids = PCIIDs{vendorId, 0};
    return {};
}

// Extracts an integer property from a registry entry.
uint32_t GetEntryProperty(io_registry_entry_t entry, CFStringRef name) {
    uint32_t value = 0;

    // Recursively search registry entry and its parents for property name
    // The data should release with CFRelease
    CFRef<CFDataRef> data = AcquireCFRef(static_cast<CFDataRef>(IORegistryEntrySearchCFProperty(
        entry, kIOServicePlane, name, kCFAllocatorDefault,
        kIORegistryIterateRecursively | kIORegistryIterateParents)));

    if (data == nullptr) {
        return value;
    }

    // CFDataGetBytePtr() is guaranteed to return a read-only pointer
    value = *reinterpret_cast<const uint32_t*>(CFDataGetBytePtr(data.Get()));
    return value;
}

// Queries the IO Registry to find the PCI device and vendor IDs of the MTLDevice.
// The registry entry correponding to [device registryID] doesn't contain the exact PCI ids
// because it corresponds to a driver. However its parent entry corresponds to the device
// itself and has uint32_t "device-id" and "registry-id" keys. For example on a dual-GPU
// MacBook Pro 2017 the IORegistry explorer shows the following tree (simplified here):
//
//  - PCI0@0
//  | - AppleACPIPCI
//  | | - IGPU@2 (type IOPCIDevice)
//  | | | - IntelAccelerator (type IOGraphicsAccelerator2)
//  | | - PEG0@1
//  | | | - IOPP
//  | | | | - GFX0@0 (type IOPCIDevice)
//  | | | | | - AMDRadeonX4000_AMDBaffinGraphicsAccelerator (type IOGraphicsAccelerator2)
//
// [device registryID] is the ID for one of the IOGraphicsAccelerator2 and we can see that
// their parent always is an IOPCIDevice that has properties for the device and vendor IDs.
MaybeError API_AVAILABLE(macos(10.13))
    GetDeviceIORegistryPCIInfo(id<MTLDevice> device, PCIIDs* ids) {
    // Get a matching dictionary for the IOGraphicsAccelerator2
    CFRef<CFMutableDictionaryRef> matchingDict =
        AcquireCFRef(IORegistryEntryIDMatching([device registryID]));
    if (matchingDict == nullptr) {
        return DAWN_INTERNAL_ERROR("Failed to create the matching dict for the device");
    }

    // IOServiceGetMatchingService will consume the reference on the matching dictionary,
    // so we don't need to release the dictionary.
    IORef<io_registry_entry_t> acceleratorEntry =
        AcquireIORef(IOServiceGetMatchingService(kIOMasterPortDefault, matchingDict.Detach()));
    if (acceleratorEntry == IO_OBJECT_NULL) {
        return DAWN_INTERNAL_ERROR("Failed to get the IO registry entry for the accelerator");
    }

    // Get the parent entry that will be the IOPCIDevice
    IORef<io_registry_entry_t> deviceEntry;
    if (IORegistryEntryGetParentEntry(acceleratorEntry.Get(), kIOServicePlane,
                                      deviceEntry.InitializeInto()) != kIOReturnSuccess) {
        return DAWN_INTERNAL_ERROR("Failed to get the IO registry entry for the device");
    }

    ASSERT(deviceEntry != IO_OBJECT_NULL);

    uint32_t vendorId = GetEntryProperty(deviceEntry.Get(), CFSTR("vendor-id"));
    uint32_t deviceId = GetEntryProperty(deviceEntry.Get(), CFSTR("device-id"));

    *ids = PCIIDs{vendorId, deviceId};

    return {};
}

MaybeError GetDevicePCIInfo(id<MTLDevice> device, PCIIDs* ids) {
    // [device registryID] is introduced on macOS 10.13+, otherwise workaround to get vendor
    // id by vendor name on old macOS
    if (@available(macos 10.13, *)) {
        auto result = GetDeviceIORegistryPCIInfo(device, ids);
        if (result.IsError()) {
            dawn::WarningLog() << "GetDeviceIORegistryPCIInfo failed: "
                               << result.AcquireError()->GetFormattedMessage();
        } else if (ids->vendorId != 0) {
            return result;
        }
    }

    return GetVendorIdFromVendors(device, ids);
}

#elif DAWN_PLATFORM_IS(IOS)

MaybeError GetDevicePCIInfo(id<MTLDevice> device, PCIIDs* ids) {
    DAWN_UNUSED(device);
    *ids = PCIIDs{0, 0};
    return {};
}

#else
#error "Unsupported Apple platform."
#endif

// This method has seen hard-to-debug crashes. See crbug.com/dawn/1102.
// For now, it is written defensively, with many potentially unnecessary guards until
// we narrow down the cause of the problem.
DAWN_NOINLINE bool IsGPUCounterSupported(id<MTLDevice> device,
                                         MTLCommonCounterSet counterSetName,
                                         std::vector<MTLCommonCounter> counterNames)
    API_AVAILABLE(macos(10.15), ios(14.0)) {
    NSPRef<id<MTLCounterSet>> counterSet = nil;
    if (![device respondsToSelector:@selector(counterSets)]) {
        dawn::ErrorLog() << "MTLDevice does not respond to selector: counterSets.";
        return false;
    }
    NSArray<id<MTLCounterSet>>* counterSets = device.counterSets;
    if (counterSets == nil) {
        // On some systems, [device counterSets] may be null and not an empty array.
        return false;
    }
    // MTLDevice’s counterSets property declares which counter sets it supports. Check
    // whether it's available on the device before requesting a counter set.
    // Note: Don't do for..in loop to avoid potentially crashy interaction with
    // NSFastEnumeration.
    for (NSUInteger i = 0; i < counterSets.count; ++i) {
        id<MTLCounterSet> set = [counterSets objectAtIndex:i];
        if ([set.name caseInsensitiveCompare:counterSetName] == NSOrderedSame) {
            counterSet = set;
            break;
        }
    }

    // The counter set is not supported.
    if (counterSet == nil) {
        return false;
    }

    if (![*counterSet respondsToSelector:@selector(counters)]) {
        dawn::ErrorLog() << "MTLCounterSet does not respond to selector: counters.";
        return false;
    }
    NSArray<id<MTLCounter>>* countersInSet = (*counterSet).counters;
    if (countersInSet == nil) {
        // On some systems, [MTLCounterSet counters] may be null and not an empty array.
        return false;
    }

    // A GPU might support a counter set, but only support a subset of the counters in that
    // set, check if the counter set supports all specific counters we need. Return false
    // if there is a counter unsupported.
    for (MTLCommonCounter counterName : counterNames) {
        bool found = false;
        // Note: Don't do for..in loop to avoid potentially crashy interaction with
        // NSFastEnumeration.
        for (NSUInteger i = 0; i < countersInSet.count; ++i) {
            id<MTLCounter> counter = [countersInSet objectAtIndex:i];
            if ([counter.name caseInsensitiveCompare:counterName] == NSOrderedSame) {
                found = true;
                break;
            }
        }
        if (!found) {
            return false;
        }
    }

    if (@available(macOS 11.0, iOS 14.0, *)) {
        // Check whether it can read GPU counters at the specified command boundary or stage
        // boundary. Apple family GPUs do not support sampling between different Metal commands,
        // because they defer fragment processing until after the GPU processes all the primitives
        // in the render pass. GPU counters are only available if sampling at least one of the
        // command or stage boundaries is supported.
        if (!SupportCounterSamplingAtCommandBoundary(device) &&
            !SupportCounterSamplingAtStageBoundary(device)) {
            return false;
        }
    }

    return true;
}

}  // anonymous namespace

// The Metal backend's PhysicalDevice.

class PhysicalDevice : public PhysicalDeviceBase {
  public:
    PhysicalDevice(InstanceBase* instance, id<MTLDevice> device)
        : PhysicalDeviceBase(instance, wgpu::BackendType::Metal), mDevice(device) {
        mName = std::string([[*mDevice name] UTF8String]);

        PCIIDs ids;
        if (!instance->ConsumedError(GetDevicePCIInfo(device, &ids))) {
            mVendorId = ids.vendorId;
            mDeviceId = ids.deviceId;
        }

#if DAWN_PLATFORM_IS(IOS)
        mAdapterType = wgpu::AdapterType::IntegratedGPU;
        const char* systemName = "iOS ";
#elif DAWN_PLATFORM_IS(MACOS)
        if ([device isLowPower]) {
            mAdapterType = wgpu::AdapterType::IntegratedGPU;
        } else {
            mAdapterType = wgpu::AdapterType::DiscreteGPU;
        }
        const char* systemName = "macOS ";
#else
#error "Unsupported Apple platform."
#endif

        NSString* osVersion = [[NSProcessInfo processInfo] operatingSystemVersionString];
        mDriverDescription = "Metal driver on " + std::string(systemName) + [osVersion UTF8String];
    }

    // PhysicalDeviceBase Implementation
    bool SupportsExternalImages() const override {
        // Via dawn::native::metal::WrapIOSurface
        return true;
    }

    bool SupportsFeatureLevel(FeatureLevel) const override { return true; }

  private:
    ResultOrError<Ref<DeviceBase>> CreateDeviceImpl(AdapterBase* adapter,
                                                    const DeviceDescriptor* descriptor,
                                                    const TogglesState& deviceToggles) override {
        return Device::Create(adapter, mDevice, descriptor, deviceToggles);
    }

    void SetupBackendDeviceToggles(TogglesState* deviceToggles) const override {
        {
            bool haveStoreAndMSAAResolve = false;
#if DAWN_PLATFORM_IS(MACOS)
            if (@available(macOS 10.12, *)) {
                haveStoreAndMSAAResolve =
                    [*mDevice supportsFeatureSet:MTLFeatureSet_macOS_GPUFamily1_v2];
            }
#elif DAWN_PLATFORM_IS(IOS)
            haveStoreAndMSAAResolve = [*mDevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily3_v2];
#endif
            // On tvOS, we would need MTLFeatureSet_tvOS_GPUFamily2_v1.
            deviceToggles->Default(Toggle::EmulateStoreAndMSAAResolve, !haveStoreAndMSAAResolve);

            bool haveSamplerCompare = true;
#if DAWN_PLATFORM_IS(IOS)
            haveSamplerCompare = [*mDevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily3_v1];
#endif
            // TODO(crbug.com/dawn/342): Investigate emulation -- possibly expensive.
            deviceToggles->Default(Toggle::MetalDisableSamplerCompare, !haveSamplerCompare);

            bool haveBaseVertexBaseInstance = true;
#if DAWN_PLATFORM_IS(IOS)
            haveBaseVertexBaseInstance =
                [*mDevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily3_v1];
#endif
            // TODO(crbug.com/dawn/343): Investigate emulation.
            deviceToggles->Default(Toggle::DisableBaseVertex, !haveBaseVertexBaseInstance);
            deviceToggles->Default(Toggle::DisableBaseInstance, !haveBaseVertexBaseInstance);
        }

        // Vertex buffer robustness is implemented by using programmable vertex pulling. Enable
        // that code path if it isn't explicitly disabled.
        if (!deviceToggles->IsEnabled(Toggle::DisableRobustness)) {
            deviceToggles->Default(Toggle::MetalEnableVertexPulling, true);
        }

        // TODO(crbug.com/dawn/846): tighten this workaround when the driver bug is fixed.
        deviceToggles->Default(Toggle::AlwaysResolveIntoZeroLevelAndLayer, true);

        uint32_t deviceId = GetDeviceId();
        uint32_t vendorId = GetVendorId();

        // TODO(crbug.com/dawn/847): Use MTLStorageModeShared instead of MTLStorageModePrivate when
        // creating MTLCounterSampleBuffer in QuerySet on Intel platforms, otherwise it fails to
        // create the buffer. Change to use MTLStorageModePrivate when the bug is fixed.
        if (@available(macOS 10.15, iOS 14.0, *)) {
            bool useSharedMode = gpu_info::IsIntel(vendorId);
            deviceToggles->Default(Toggle::MetalUseSharedModeForCounterSampleBuffer, useSharedMode);
        }

        // Rendering R8Unorm and RG8Unorm to small mip doesn't work properly on Intel.
        // TODO(crbug.com/dawn/1071): Tighten the workaround when this issue is fixed.
        if (gpu_info::IsIntel(vendorId)) {
            deviceToggles->Default(Toggle::MetalRenderR8RG8UnormSmallMipToTempTexture, true);
        }

        // On some Intel GPUs vertex only render pipeline get wrong depth result if no fragment
        // shader provided. Create a placeholder fragment shader module to work around this issue.
        if (gpu_info::IsIntel(vendorId)) {
            bool usePlaceholderFragmentShader = true;
            if (gpu_info::IsSkylake(deviceId)) {
                usePlaceholderFragmentShader = false;
            }
            deviceToggles->Default(Toggle::UsePlaceholderFragmentInVertexOnlyPipeline,
                                   usePlaceholderFragmentShader);
        }

        // On some Intel GPUs using big integer values as clear values in render pass doesn't work
        // correctly. Currently we have to add workaround for this issue by enabling the toggle
        // "apply_clear_big_integer_color_value_with_draw". See https://crbug.com/dawn/1109 and
        // https://crbug.com/dawn/1463 for more details.
        if (gpu_info::IsIntel(vendorId)) {
            deviceToggles->Default(Toggle::ApplyClearBigIntegerColorValueWithDraw, true);
        }

        // TODO(dawn:1473): Metal fails to store GPU counters to sampleBufferAttachments on empty
        // encoders on macOS 11.0+, we need to add mock blit command to blit encoder when encoding
        // writeTimestamp as workaround by enabling the toggle
        // "metal_use_mock_blit_encoder_for_write_timestamp".
        if (@available(macos 11.0, iOS 14.0, *)) {
            deviceToggles->Default(Toggle::MetalUseMockBlitEncoderForWriteTimestamp, true);
        }

#if DAWN_PLATFORM_IS(MACOS)
        if (gpu_info::IsIntel(vendorId)) {
            deviceToggles->Default(
                Toggle::MetalUseBothDepthAndStencilAttachmentsForCombinedDepthStencilFormats, true);
            deviceToggles->Default(Toggle::UseBlitForBufferToStencilTextureCopy, true);
            deviceToggles->Default(Toggle::UseBlitForBufferToDepthTextureCopy, true);
            deviceToggles->Default(Toggle::UseBlitForDepthTextureToTextureCopyToNonzeroSubresource,
                                   true);

            if ([NSProcessInfo.processInfo
                    isOperatingSystemAtLeastVersion:NSOperatingSystemVersion{12, 0, 0}]) {
                deviceToggles->ForceSet(
                    Toggle::NoWorkaroundSampleMaskBecomesZeroForAllButLastColorTarget, true);
            }
            if (gpu_info::IsIntelGen7(vendorId, deviceId) ||
                gpu_info::IsIntelGen8(vendorId, deviceId)) {
                deviceToggles->ForceSet(Toggle::NoWorkaroundIndirectBaseVertexNotApplied, true);
            }
        }
        if (gpu_info::IsAMD(vendorId) || gpu_info::IsIntel(vendorId)) {
            deviceToggles->Default(Toggle::MetalUseCombinedDepthStencilFormatForStencil8, true);
            deviceToggles->Default(Toggle::MetalKeepMultisubresourceDepthStencilTexturesInitialized,
                                   true);
        }

        if (gpu_info::IsApple(vendorId)) {
            deviceToggles->Default(Toggle::MetalFillEmptyOcclusionQueriesWithZero, true);
        }

        // Local testing shows the workaround is needed on AMD Radeon HD 8870M (gcn-1) MacOS 12.1;
        // not on AMD Radeon Pro 555 (gcn-4) MacOS 13.1.
        // Conservatively enable the workaround on AMD unless the system is MacOS 13.1+
        // with architecture at least AMD gcn-4.
        bool isLessThanAMDGN4OrMac13Dot1 = false;
        if (gpu_info::IsAMDGCN1(vendorId, deviceId) || gpu_info::IsAMDGCN2(vendorId, deviceId) ||
            gpu_info::IsAMDGCN3(vendorId, deviceId)) {
            isLessThanAMDGN4OrMac13Dot1 = true;
        } else if (gpu_info::IsAMD(vendorId)) {
            if (@available(macos 13.1, *)) {
            } else {
                isLessThanAMDGN4OrMac13Dot1 = true;
            }
        }
        if (isLessThanAMDGN4OrMac13Dot1) {
            deviceToggles->Default(
                Toggle::MetalUseBothDepthAndStencilAttachmentsForCombinedDepthStencilFormats, true);
        }
#endif
    }

    MaybeError InitializeImpl() override { return {}; }

    void InitializeSupportedFeaturesImpl() override {
        // Check texture formats with deprecated MTLFeatureSet way.
#if DAWN_PLATFORM_IS(MACOS)
        if ([*mDevice supportsFeatureSet:MTLFeatureSet_macOS_GPUFamily1_v1]) {
            EnableFeature(Feature::TextureCompressionBC);
        }
        if (@available(macOS 10.14, *)) {
            if ([*mDevice supportsFeatureSet:MTLFeatureSet_macOS_GPUFamily2_v1]) {
                EnableFeature(Feature::Float32Filterable);
            }
        }
#endif
#if DAWN_PLATFORM_IS(IOS)
        if ([*mDevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily1_v1]) {
            EnableFeature(Feature::TextureCompressionETC2);
        }
        if ([*mDevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily2_v1]) {
            EnableFeature(Feature::TextureCompressionASTC);
        }
#endif

        // Check texture formats with MTLGPUFamily
        if (@available(macOS 10.15, iOS 13.0, *)) {
            if ([*mDevice supportsFamily:MTLGPUFamilyMac1]) {
                EnableFeature(Feature::TextureCompressionBC);
            }
            if ([*mDevice supportsFamily:MTLGPUFamilyMac2]) {
                EnableFeature(Feature::Float32Filterable);
            }
            if ([*mDevice supportsFamily:MTLGPUFamilyApple2]) {
                EnableFeature(Feature::TextureCompressionETC2);
            }
            if ([*mDevice supportsFamily:MTLGPUFamilyApple3]) {
                EnableFeature(Feature::TextureCompressionASTC);
            }
        }

        if (@available(macOS 10.15, iOS 14.0, *)) {
            auto ShouldLeakCounterSets = [this]() {
                // Intentionally leak counterSets to workaround an issue where the driver
                // over-releases the handle if it is accessed more than once. It becomes a zombie.
                // For more information, see crbug.com/1443658.
                // Appears to occur on Intel prior to MacOS 11, and continuing on Intel Gen 7 after
                // that OS version.
                uint32_t vendorId = GetVendorId();
                uint32_t deviceId = GetDeviceId();
                if (gpu_info::IsIntelGen7(vendorId, deviceId)) {
                    return true;
                }
                if (gpu_info::IsIntelGen11(vendorId, deviceId)) {
                    return true;
                }
                if (gpu_info::IsIntel(vendorId) && !IsMacOSVersionAtLeast(11)) {
                    return true;
                }
                return false;
            };
            if (ShouldLeakCounterSets()) {
                [[*mDevice counterSets] retain];
            }
            if (IsGPUCounterSupported(
                    *mDevice, MTLCommonCounterSetStatistic,
                    {MTLCommonCounterVertexInvocations, MTLCommonCounterClipperInvocations,
                     MTLCommonCounterClipperPrimitivesOut, MTLCommonCounterFragmentInvocations,
                     MTLCommonCounterComputeKernelInvocations})) {
                EnableFeature(Feature::PipelineStatisticsQuery);
            }

            if (IsGPUCounterSupported(*mDevice, MTLCommonCounterSetTimestamp,
                                      {MTLCommonCounterTimestamp})) {
                bool enableTimestampQuery = true;
                bool enableTimestampQueryInsidePasses = true;

                if (@available(macOS 11.0, iOS 14.0, *)) {
                    enableTimestampQueryInsidePasses =
                        SupportCounterSamplingAtCommandBoundary(*mDevice);
                }

#if DAWN_PLATFORM_IS(MACOS)
                // Disable timestamp query on < macOS 11.0 on AMD GPU because WriteTimestamp
                // fails to call without any copy commands on MTLBlitCommandEncoder. This issue
                // has been fixed on macOS 11.0. See crbug.com/dawn/545.
                if (gpu_info::IsAMD(mVendorId) && !IsMacOSVersionAtLeast(11)) {
                    enableTimestampQuery = false;
                    enableTimestampQueryInsidePasses = false;
                }
#endif

                if (enableTimestampQuery) {
                    EnableFeature(Feature::TimestampQuery);
                }

                if (enableTimestampQueryInsidePasses) {
                    EnableFeature(Feature::TimestampQueryInsidePasses);
                }
            }
        }

        if (@available(macOS 10.11, iOS 11.0, *)) {
            EnableFeature(Feature::DepthClipControl);
        }

        if (@available(macOS 10.11, iOS 9.0, *)) {
            EnableFeature(Feature::Depth32FloatStencil8);
        }

        // Uses newTextureWithDescriptor::iosurface::plane which is available
        // on ios 11.0+ and macOS 11.0+
        if (@available(macOS 10.11, iOS 11.0, *)) {
            EnableFeature(Feature::MultiPlanarFormats);
        }

        if (@available(macOS 11.0, iOS 10.0, *)) {
            // Memoryless storage mode for Metal textures is available only
            // from the Apple2 family of GPUs on.
            if ([*mDevice supportsFamily:MTLGPUFamilyApple2]) {
                EnableFeature(Feature::TransientAttachments);
            }
        }

        EnableFeature(Feature::IndirectFirstInstance);
        EnableFeature(Feature::ShaderF16);
        EnableFeature(Feature::RG11B10UfloatRenderable);
        EnableFeature(Feature::BGRA8UnormStorage);
        EnableFeature(Feature::SurfaceCapabilities);
    }

    void InitializeVendorArchitectureImpl() override {
        if (@available(macOS 10.15, iOS 13.0, *)) {
            // According to Apple's documentation:
            // https://developer.apple.com/documentation/metal/gpu_devices_and_work_submission/detecting_gpu_features_and_metal_software_versions
            // - "Use the Common family to create apps that target a range of GPUs on multiple
            //   platforms.""
            // - "A GPU can be a member of more than one family; in most cases, a GPU supports one
            //   of the Common families and then one or more families specific to the build target."
            // So we'll use the highest supported common family as the reported "architecture" on
            // devices where a deviceID isn't available.
            if (mDeviceId == 0) {
                if ([*mDevice supportsFamily:MTLGPUFamilyCommon3]) {
                    mArchitectureName = "common-3";
                } else if ([*mDevice supportsFamily:MTLGPUFamilyCommon2]) {
                    mArchitectureName = "common-2";
                } else if ([*mDevice supportsFamily:MTLGPUFamilyCommon1]) {
                    mArchitectureName = "common-1";
                }
            }
        }

        mVendorName = gpu_info::GetVendorName(mVendorId);
        if (mDeviceId != 0) {
            mArchitectureName = gpu_info::GetArchitectureName(mVendorId, mDeviceId);
        }
    }

    enum class MTLGPUFamily {
        Apple1,
        Apple2,
        Apple3,
        Apple4,
        Apple5,
        Apple6,
        Apple7,
        Mac1,
        Mac2,
    };

    ResultOrError<MTLGPUFamily> GetMTLGPUFamily() const {
        // https://developer.apple.com/documentation/metal/mtldevice/detecting_gpu_features_and_metal_software_versions?language=objc

        if (@available(macOS 10.15, iOS 10.13, *)) {
            if ([*mDevice supportsFamily:MTLGPUFamilyMac2]) {
                return MTLGPUFamily::Mac2;
            }
            if ([*mDevice supportsFamily:MTLGPUFamilyMac1]) {
                return MTLGPUFamily::Mac1;
            }
            if ([*mDevice supportsFamily:MTLGPUFamilyApple7]) {
                return MTLGPUFamily::Apple7;
            }
            if ([*mDevice supportsFamily:MTLGPUFamilyApple6]) {
                return MTLGPUFamily::Apple6;
            }
            if ([*mDevice supportsFamily:MTLGPUFamilyApple5]) {
                return MTLGPUFamily::Apple5;
            }
            if ([*mDevice supportsFamily:MTLGPUFamilyApple4]) {
                return MTLGPUFamily::Apple4;
            }
            if ([*mDevice supportsFamily:MTLGPUFamilyApple3]) {
                return MTLGPUFamily::Apple3;
            }
            if ([*mDevice supportsFamily:MTLGPUFamilyApple2]) {
                return MTLGPUFamily::Apple2;
            }
            if ([*mDevice supportsFamily:MTLGPUFamilyApple1]) {
                return MTLGPUFamily::Apple1;
            }
        }

#if DAWN_PLATFORM_IS(MACOS)
        if (@available(macOS 10.14, *)) {
            if ([*mDevice supportsFeatureSet:MTLFeatureSet_macOS_GPUFamily2_v1]) {
                return MTLGPUFamily::Mac2;
            }
        }
        if ([*mDevice supportsFeatureSet:MTLFeatureSet_macOS_GPUFamily1_v1]) {
            return MTLGPUFamily::Mac1;
        }
#elif DAWN_PLATFORM_IS(IOS)
        if (@available(iOS 10.11, *)) {
            if ([*mDevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily4_v1]) {
                return MTLGPUFamily::Apple4;
            }
        }
        if (@available(iOS 9.0, *)) {
            if ([*mDevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily3_v1]) {
                return MTLGPUFamily::Apple3;
            }
        }
        if (@available(iOS 8.0, *)) {
            if ([*mDevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily2_v1]) {
                return MTLGPUFamily::Apple2;
            }
        }
        if (@available(iOS 8.0, *)) {
            if ([*mDevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily1_v1]) {
                return MTLGPUFamily::Apple1;
            }
        }
#endif
        return DAWN_INTERNAL_ERROR("Unsupported Metal device");
    }

    MaybeError InitializeSupportedLimitsImpl(CombinedLimits* limits) override {
        struct MTLDeviceLimits {
            uint32_t maxVertexAttribsPerDescriptor;
            uint32_t maxBufferArgumentEntriesPerFunc;
            uint32_t maxTextureArgumentEntriesPerFunc;
            uint32_t maxSamplerStateArgumentEntriesPerFunc;
            uint32_t maxThreadsPerThreadgroup;
            uint32_t maxTotalThreadgroupMemory;
            uint32_t maxFragmentInputComponents;
            uint32_t max1DTextureSize;
            uint32_t max2DTextureSize;
            uint32_t max3DTextureSize;
            uint32_t maxTextureArrayLayers;
            uint32_t minBufferOffsetAlignment;
            uint32_t maxColorRenderTargets;
        };

        struct LimitsForFamily {
            uint32_t MTLDeviceLimits::*limit;
            ityp::array<MTLGPUFamily, uint32_t, 9> values;
        };

        // clang-format off
            // https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
            //                                                               Apple                                                      Mac
            //                                                                   1,      2,      3,      4,      5,      6,      7,       1,      2
            constexpr LimitsForFamily kMTLLimits[13] = {
                {&MTLDeviceLimits::maxVertexAttribsPerDescriptor,         {    31u,    31u,    31u,    31u,    31u,    31u,    31u,     31u,    31u }},
                {&MTLDeviceLimits::maxBufferArgumentEntriesPerFunc,       {    31u,    31u,    31u,    31u,    31u,    31u,    31u,     31u,    31u }},
                {&MTLDeviceLimits::maxTextureArgumentEntriesPerFunc,      {    31u,    31u,    31u,    96u,    96u,   128u,   128u,    128u,   128u }},
                {&MTLDeviceLimits::maxSamplerStateArgumentEntriesPerFunc, {    16u,    16u,    16u,    16u,    16u,    16u,    16u,     16u,    16u }},
                {&MTLDeviceLimits::maxThreadsPerThreadgroup,              {   512u,   512u,   512u,  1024u,  1024u,  1024u,  1024u,   1024u,  1024u }},
                {&MTLDeviceLimits::maxTotalThreadgroupMemory,             { 16352u, 16352u, 16384u, 32768u, 32768u, 32768u, 32768u,  32768u, 32768u }},
                {&MTLDeviceLimits::maxFragmentInputComponents,            {    60u,    60u,    60u,   124u,   124u,   124u,   124u,    124u,   124u }},
                {&MTLDeviceLimits::max1DTextureSize,                      {  8192u,  8192u, 16384u, 16384u, 16384u, 16384u, 16384u,  16384u, 16384u }},
                {&MTLDeviceLimits::max2DTextureSize,                      {  8192u,  8192u, 16384u, 16384u, 16384u, 16384u, 16384u,  16384u, 16384u }},
                {&MTLDeviceLimits::max3DTextureSize,                      {  2048u,  2048u,  2048u,  2048u,  2048u,  2048u,  2048u,   2048u,  2048u }},
                {&MTLDeviceLimits::maxTextureArrayLayers,                 {  2048u,  2048u,  2048u,  2048u,  2048u,  2048u,  2048u,   2048u,  2048u }},
                {&MTLDeviceLimits::minBufferOffsetAlignment,              {     4u,     4u,     4u,     4u,     4u,     4u,     4u,    256u,   256u }},
                {&MTLDeviceLimits::maxColorRenderTargets,                 {     4u,     8u,     8u,     8u,     8u,     8u,     8u,      8u,     8u }},
            };
        // clang-format on

        MTLGPUFamily mtlGPUFamily;
        DAWN_TRY_ASSIGN(mtlGPUFamily, GetMTLGPUFamily());

        MTLDeviceLimits mtlLimits;
        for (const auto& limitsForFamily : kMTLLimits) {
            mtlLimits.*limitsForFamily.limit = limitsForFamily.values[mtlGPUFamily];
        }

        GetDefaultLimits(&limits->v1);

        limits->v1.maxTextureDimension1D = mtlLimits.max1DTextureSize;
        limits->v1.maxTextureDimension2D = mtlLimits.max2DTextureSize;
        limits->v1.maxTextureDimension3D = mtlLimits.max3DTextureSize;
        limits->v1.maxTextureArrayLayers = mtlLimits.maxTextureArrayLayers;
        limits->v1.maxColorAttachments = mtlLimits.maxColorRenderTargets;

        uint32_t maxBuffersPerStage = mtlLimits.maxBufferArgumentEntriesPerFunc;
        maxBuffersPerStage -= 1;  // One slot is reserved to store buffer lengths.

        uint32_t baseMaxBuffersPerStage = limits->v1.maxStorageBuffersPerShaderStage +
                                          limits->v1.maxUniformBuffersPerShaderStage +
                                          limits->v1.maxVertexBuffers;

        ASSERT(maxBuffersPerStage >= baseMaxBuffersPerStage);
        {
            uint32_t additional = maxBuffersPerStage - baseMaxBuffersPerStage;
            limits->v1.maxStorageBuffersPerShaderStage += additional / 3;
            limits->v1.maxUniformBuffersPerShaderStage += additional / 3;
            limits->v1.maxVertexBuffers += (additional - 2 * (additional / 3));
        }

        uint32_t baseMaxTexturesPerStage = limits->v1.maxSampledTexturesPerShaderStage +
                                           limits->v1.maxStorageTexturesPerShaderStage;

        ASSERT(mtlLimits.maxTextureArgumentEntriesPerFunc >= baseMaxTexturesPerStage);
        {
            uint32_t additional =
                mtlLimits.maxTextureArgumentEntriesPerFunc - baseMaxTexturesPerStage;
            limits->v1.maxSampledTexturesPerShaderStage += additional / 2;
            limits->v1.maxStorageTexturesPerShaderStage += (additional - additional / 2);
        }

        limits->v1.maxSamplersPerShaderStage = mtlLimits.maxSamplerStateArgumentEntriesPerFunc;

        // Metal limits are per-function, so the layout limits are the same as the stage
        // limits. Note: this should likely change if the implementation uses Metal argument
        // buffers. Non-dynamic buffers will probably be bound argument buffers, but dynamic
        // buffers may be set directly.
        //   Mac GPU families with tier 1 argument buffers support 64
        //   buffers, 128 textures, and 16 samplers. Mac GPU families
        //   with tier 2 argument buffers support 500000 buffers and
        //   textures, and 1024 unique samplers
        // Without argument buffers, we have slots [0 -> 29], inclusive, which is 30 total.
        // 8 are used by maxVertexBuffers.
        limits->v1.maxDynamicUniformBuffersPerPipelineLayout = 11u;
        limits->v1.maxDynamicStorageBuffersPerPipelineLayout = 11u;

        // The WebGPU limit is the limit across all vertex buffers, combined.
        limits->v1.maxVertexAttributes =
            limits->v1.maxVertexBuffers * mtlLimits.maxVertexAttribsPerDescriptor;

        limits->v1.maxInterStageShaderComponents = mtlLimits.maxFragmentInputComponents;

        limits->v1.maxComputeWorkgroupStorageSize = mtlLimits.maxTotalThreadgroupMemory;
        limits->v1.maxComputeInvocationsPerWorkgroup = mtlLimits.maxThreadsPerThreadgroup;
        limits->v1.maxComputeWorkgroupSizeX = mtlLimits.maxThreadsPerThreadgroup;
        limits->v1.maxComputeWorkgroupSizeY = mtlLimits.maxThreadsPerThreadgroup;
        limits->v1.maxComputeWorkgroupSizeZ = mtlLimits.maxThreadsPerThreadgroup;

        limits->v1.minUniformBufferOffsetAlignment = mtlLimits.minBufferOffsetAlignment;
        limits->v1.minStorageBufferOffsetAlignment = mtlLimits.minBufferOffsetAlignment;

        uint64_t maxBufferSize = Buffer::QueryMaxBufferLength(*mDevice);
        limits->v1.maxBufferSize = maxBufferSize;

        // Metal has no documented limit on the size of a binding. Use the maximum
        // buffer size.
        limits->v1.maxUniformBufferBindingSize = maxBufferSize;
        limits->v1.maxStorageBufferBindingSize = maxBufferSize;

        // Using base limits for:
        // TODO(crbug.com/dawn/685):
        // - maxBindGroups
        // - maxVertexBufferArrayStride

        // TODO(crbug.com/dawn/1448):
        // - maxInterStageShaderVariables

        return {};
    }

    MaybeError ValidateFeatureSupportedWithTogglesImpl(wgpu::FeatureName feature,
                                                       const TogglesState& toggles) const override {
        return {};
    }

    NSPRef<id<MTLDevice>> mDevice;
};

// Implementation of the Metal backend's BackendConnection

Backend::Backend(InstanceBase* instance) : BackendConnection(instance, wgpu::BackendType::Metal) {
    if (GetInstance()->IsBackendValidationEnabled()) {
        setenv("METAL_DEVICE_WRAPPER_TYPE", "1", 1);
    }
}

Backend::~Backend() = default;

std::vector<Ref<PhysicalDeviceBase>> Backend::DiscoverDefaultPhysicalDevices() {
    PhysicalDeviceDiscoveryOptions options;
    auto result = DiscoverPhysicalDevices(&options);
    if (result.IsError()) {
        GetInstance()->ConsumedError(result.AcquireError());
        return {};
    }
    return result.AcquireSuccess();
}

ResultOrError<std::vector<Ref<PhysicalDeviceBase>>> Backend::DiscoverPhysicalDevices(
    const PhysicalDeviceDiscoveryOptionsBase* optionsBase) {
    ASSERT(optionsBase->backendType == WGPUBackendType_Metal);

    std::vector<Ref<PhysicalDeviceBase>> physicalDevices;
#if DAWN_PLATFORM_IS(MACOS)
    NSRef<NSArray<id<MTLDevice>>> devices = AcquireNSRef(MTLCopyAllDevices());

    for (id<MTLDevice> device in devices.Get()) {
        Ref<PhysicalDevice> physicalDevice = AcquireRef(new PhysicalDevice(GetInstance(), device));
        if (!GetInstance()->ConsumedError(physicalDevice->Initialize())) {
            physicalDevices.push_back(std::move(physicalDevice));
        }
    }
#endif

    // iOS only has a single device so MTLCopyAllDevices doesn't exist there.
#if defined(DAWN_PLATFORM_IOS)
    Ref<PhysicalDevice> physicalDevice =
        AcquireRef(new PhysicalDevice(GetInstance(), MTLCreateSystemDefaultDevice()));
    if (!GetInstance()->ConsumedError(physicalDevice->Initialize())) {
        physicalDevices.push_back(std::move(physicalDevice));
    }
#endif

    return physicalDevices;
}

BackendConnection* Connect(InstanceBase* instance) {
    return new Backend(instance);
}

}  // namespace dawn::native::metal