Tint/E2E: Add f16 uniform/storage buffer E2E tests

This CL add Tint E2E tests for f16 types in uniform and storage buffers. Bug: tint:1473, tint:1502 Change-Id: I325524d2df326240cc1b080a90abf5bd076b3da1 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/107543 Reviewed-by: Ben Clayton <bclayton@google.com> Kokoro: Kokoro <noreply+kokoro@google.com> Commit-Queue: Zhaoming Jiang <zhaoming.jiang@intel.com>
2025-12-16 00:17:03 +00:00 · 2022-11-30 02:47:27 +00:00
parent 205e16de63
commit 776b221ae2
2995 changed files with 113726 additions and 2997 deletions
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/dynamic_index_via_ptr.wgsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/dynamic_index_via_ptr.wgsl
@@ -0,0 +1,15 @@
+@group(0) @binding(0) var<uniform> a : array<mat4x3<f32>, 4>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a       = &a;
+  let p_a_i     = &((*p_a)[i()]);
+  let p_a_i_i   = &((*p_a_i)[i()]);
+
+  let l_a       : array<mat4x3<f32>, 4> = *p_a;
+  let l_a_i     : mat4x3<f32>           = *p_a_i;
+  let l_a_i_i   : vec3<f32>             = *p_a_i_i;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
@@ -0,0 +1,39 @@
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[16];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float4x3 tint_symbol_1(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+typedef float4x3 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[16], uint offset) {
+  float4x3 arr[4] = (float4x3[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_i_save = i();
+  const float4x3 l_a[4] = tint_symbol(a, 0u);
+  const float4x3 l_a_i = tint_symbol_1(a, (64u * uint(p_a_i_save)));
+  const uint scalar_offset_4 = (((64u * uint(p_a_i_save)) + (16u * uint(p_a_i_i_save)))) / 4;
+  const float3 l_a_i_i = asfloat(a[scalar_offset_4 / 4].xyz);
+  return;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
@@ -0,0 +1,39 @@
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[16];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+float4x3 tint_symbol_1(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+typedef float4x3 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[16], uint offset) {
+  float4x3 arr[4] = (float4x3[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_i_save = i();
+  const float4x3 l_a[4] = tint_symbol(a, 0u);
+  const float4x3 l_a_i = tint_symbol_1(a, (64u * uint(p_a_i_save)));
+  const uint scalar_offset_4 = (((64u * uint(p_a_i_save)) + (16u * uint(p_a_i_i_save)))) / 4;
+  const float3 l_a_i_i = asfloat(a[scalar_offset_4 / 4].xyz);
+  return;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.glsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.glsl
@@ -0,0 +1,27 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform a_block_ubo {
+  mat4x3 inner[4];
+} a;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+void f() {
+  int tint_symbol = i();
+  int p_a_i_save = tint_symbol;
+  int tint_symbol_1 = i();
+  int p_a_i_i_save = tint_symbol_1;
+  mat4x3 l_a[4] = a.inner;
+  mat4x3 l_a_i = a.inner[p_a_i_save];
+  vec3 l_a_i_i = a.inner[p_a_i_save][p_a_i_i_save];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.msl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.msl
@@ -0,0 +1,33 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+int i() {
+  thread int tint_symbol_2 = 0;
+  tint_symbol_2 = as_type<int>((as_type<uint>(tint_symbol_2) + as_type<uint>(1)));
+  return tint_symbol_2;
+}
+
+kernel void f(const constant tint_array<float4x3, 4>* tint_symbol_3 [[buffer(0)]]) {
+  int const tint_symbol = i();
+  int const p_a_i_save = tint_symbol;
+  int const tint_symbol_1 = i();
+  int const p_a_i_i_save = tint_symbol_1;
+  tint_array<float4x3, 4> const l_a = *(tint_symbol_3);
+  float4x3 const l_a_i = (*(tint_symbol_3))[p_a_i_save];
+  float3 const l_a_i_i = (*(tint_symbol_3))[p_a_i_save][p_a_i_i_save];
+  return;
+}
+
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.spvasm
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.spvasm
@@ -0,0 +1,64 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 37
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block "a_block"
+               OpMemberName %a_block 0 "inner"
+               OpName %a "a"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %f "f"
+               OpDecorate %a_block Block
+               OpMemberDecorate %a_block 0 Offset 0
+               OpMemberDecorate %a_block 0 ColMajor
+               OpMemberDecorate %a_block 0 MatrixStride 16
+               OpDecorate %_arr_mat4v3float_uint_4 ArrayStride 64
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat4v3float = OpTypeMatrix %v3float 4
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4v3float_uint_4 = OpTypeArray %mat4v3float %uint_4
+    %a_block = OpTypeStruct %_arr_mat4v3float_uint_4
+%_ptr_Uniform_a_block = OpTypePointer Uniform %a_block
+          %a = OpVariable %_ptr_Uniform_a_block Uniform
+        %int = OpTypeInt 32 1
+         %11 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %11
+         %14 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+       %void = OpTypeVoid
+         %21 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat4v3float_uint_4 = OpTypePointer Uniform %_arr_mat4v3float_uint_4
+%_ptr_Uniform_mat4v3float = OpTypePointer Uniform %mat4v3float
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+          %i = OpFunction %int None %14
+         %16 = OpLabel
+         %17 = OpLoad %int %counter
+         %19 = OpIAdd %int %17 %int_1
+               OpStore %counter %19
+         %20 = OpLoad %int %counter
+               OpReturnValue %20
+               OpFunctionEnd
+          %f = OpFunction %void None %21
+         %24 = OpLabel
+         %25 = OpFunctionCall %int %i
+         %26 = OpFunctionCall %int %i
+         %29 = OpAccessChain %_ptr_Uniform__arr_mat4v3float_uint_4 %a %uint_0
+         %30 = OpLoad %_arr_mat4v3float_uint_4 %29
+         %32 = OpAccessChain %_ptr_Uniform_mat4v3float %a %uint_0 %25
+         %33 = OpLoad %mat4v3float %32
+         %35 = OpAccessChain %_ptr_Uniform_v3float %a %uint_0 %25 %26
+         %36 = OpLoad %v3float %35
+               OpReturn
+               OpFunctionEnd
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.wgsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/dynamic_index_via_ptr.wgsl.expected.wgsl
@@ -0,0 +1,18 @@
+@group(0) @binding(0) var<uniform> a : array<mat4x3<f32>, 4>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_i = &((*(p_a))[i()]);
+  let p_a_i_i = &((*(p_a_i))[i()]);
+  let l_a : array<mat4x3<f32>, 4> = *(p_a);
+  let l_a_i : mat4x3<f32> = *(p_a_i);
+  let l_a_i_i : vec3<f32> = *(p_a_i_i);
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/static_index_via_ptr.wgsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/static_index_via_ptr.wgsl
@@ -0,0 +1,12 @@
+@group(0) @binding(0) var<uniform> a : array<mat4x3<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a       = &a;
+  let p_a_2     = &((*p_a)[2]);
+  let p_a_2_1   = &((*p_a_2)[1]);
+
+  let l_a       : array<mat4x3<f32>, 4> = *p_a;
+  let l_a_i     : mat4x3<f32>           = *p_a_2;
+  let l_a_i_i   : vec3<f32>             = *p_a_2_1;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/static_index_via_ptr.wgsl.expected.dxc.hlsl
@@ -0,0 +1,30 @@
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[16];
+};
+
+float4x3 tint_symbol_1(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+typedef float4x3 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[16], uint offset) {
+  float4x3 arr[4] = (float4x3[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float4x3 l_a[4] = tint_symbol(a, 0u);
+  const float4x3 l_a_i = tint_symbol_1(a, 128u);
+  const float3 l_a_i_i = asfloat(a[9].xyz);
+  return;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/static_index_via_ptr.wgsl.expected.fxc.hlsl
@@ -0,0 +1,30 @@
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[16];
+};
+
+float4x3 tint_symbol_1(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+typedef float4x3 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[16], uint offset) {
+  float4x3 arr[4] = (float4x3[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float4x3 l_a[4] = tint_symbol(a, 0u);
+  const float4x3 l_a_i = tint_symbol_1(a, 128u);
+  const float3 l_a_i_i = asfloat(a[9].xyz);
+  return;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/static_index_via_ptr.wgsl.expected.glsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/static_index_via_ptr.wgsl.expected.glsl
@@ -0,0 +1,17 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform a_block_ubo {
+  mat4x3 inner[4];
+} a;
+
+void f() {
+  mat4x3 l_a[4] = a.inner;
+  mat4x3 l_a_i = a.inner[2];
+  vec3 l_a_i_i = a.inner[2][1];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/static_index_via_ptr.wgsl.expected.msl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/static_index_via_ptr.wgsl.expected.msl
@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(const constant tint_array<float4x3, 4>* tint_symbol [[buffer(0)]]) {
+  tint_array<float4x3, 4> const l_a = *(tint_symbol);
+  float4x3 const l_a_i = (*(tint_symbol))[2];
+  float3 const l_a_i_i = (*(tint_symbol))[2][1];
+  return;
+}
+
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/static_index_via_ptr.wgsl.expected.spvasm
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/static_index_via_ptr.wgsl.expected.spvasm
@@ -0,0 +1,49 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 27
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block "a_block"
+               OpMemberName %a_block 0 "inner"
+               OpName %a "a"
+               OpName %f "f"
+               OpDecorate %a_block Block
+               OpMemberDecorate %a_block 0 Offset 0
+               OpMemberDecorate %a_block 0 ColMajor
+               OpMemberDecorate %a_block 0 MatrixStride 16
+               OpDecorate %_arr_mat4v3float_uint_4 ArrayStride 64
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat4v3float = OpTypeMatrix %v3float 4
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4v3float_uint_4 = OpTypeArray %mat4v3float %uint_4
+    %a_block = OpTypeStruct %_arr_mat4v3float_uint_4
+%_ptr_Uniform_a_block = OpTypePointer Uniform %a_block
+          %a = OpVariable %_ptr_Uniform_a_block Uniform
+       %void = OpTypeVoid
+         %10 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat4v3float_uint_4 = OpTypePointer Uniform %_arr_mat4v3float_uint_4
+        %int = OpTypeInt 32 1
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_mat4v3float = OpTypePointer Uniform %mat4v3float
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+          %f = OpFunction %void None %10
+         %13 = OpLabel
+         %16 = OpAccessChain %_ptr_Uniform__arr_mat4v3float_uint_4 %a %uint_0
+         %17 = OpLoad %_arr_mat4v3float_uint_4 %16
+         %21 = OpAccessChain %_ptr_Uniform_mat4v3float %a %uint_0 %int_2
+         %22 = OpLoad %mat4v3float %21
+         %25 = OpAccessChain %_ptr_Uniform_v3float %a %uint_0 %int_2 %int_1
+         %26 = OpLoad %v3float %25
+               OpReturn
+               OpFunctionEnd
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/static_index_via_ptr.wgsl.expected.wgsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/static_index_via_ptr.wgsl.expected.wgsl
@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> a : array<mat4x3<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_2 = &((*(p_a))[2]);
+  let p_a_2_1 = &((*(p_a_2))[1]);
+  let l_a : array<mat4x3<f32>, 4> = *(p_a);
+  let l_a_i : mat4x3<f32> = *(p_a_2);
+  let l_a_i_i : vec3<f32> = *(p_a_2_1);
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_builtin.wgsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_builtin.wgsl
@@ -0,0 +1,8 @@
+@group(0) @binding(0) var<uniform> u : array<mat4x3<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u[2]);
+    let l = length(u[0][1].zxy);
+    let a = abs(u[0][1].zxy.x);
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_builtin.wgsl.expected.dxc.hlsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_builtin.wgsl.expected.dxc.hlsl
@@ -0,0 +1,19 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[16];
+};
+
+float4x3 tint_symbol(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float3x4 t = transpose(tint_symbol(u, 128u));
+  const float l = length(asfloat(u[1].xyz).zxy);
+  const float a = abs(asfloat(u[1].xyz).zxy.x);
+  return;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_builtin.wgsl.expected.fxc.hlsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_builtin.wgsl.expected.fxc.hlsl
@@ -0,0 +1,19 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[16];
+};
+
+float4x3 tint_symbol(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const float3x4 t = transpose(tint_symbol(u, 128u));
+  const float l = length(asfloat(u[1].xyz).zxy);
+  const float a = abs(asfloat(u[1].xyz).zxy.x);
+  return;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_builtin.wgsl.expected.glsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_builtin.wgsl.expected.glsl
@@ -0,0 +1,17 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat4x3 inner[4];
+} u;
+
+void f() {
+  mat3x4 t = transpose(u.inner[2]);
+  float l = length(u.inner[0][1].zxy);
+  float a = abs(u.inner[0][1].zxy.x);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_builtin.wgsl.expected.msl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_builtin.wgsl.expected.msl
@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(const constant tint_array<float4x3, 4>* tint_symbol [[buffer(0)]]) {
+  float3x4 const t = transpose((*(tint_symbol))[2]);
+  float const l = length(float3((*(tint_symbol))[0][1]).zxy);
+  float const a = fabs(float3((*(tint_symbol))[0][1]).zxy[0]);
+  return;
+}
+
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_builtin.wgsl.expected.spvasm
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_builtin.wgsl.expected.spvasm
@@ -0,0 +1,58 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 36
+; Schema: 0
+               OpCapability Shader
+         %24 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %_arr_mat4v3float_uint_4 ArrayStride 64
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat4v3float = OpTypeMatrix %v3float 4
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4v3float_uint_4 = OpTypeArray %mat4v3float %uint_4
+    %u_block = OpTypeStruct %_arr_mat4v3float_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+         %10 = OpTypeFunction %void
+    %v4float = OpTypeVector %float 4
+%mat3v4float = OpTypeMatrix %v4float 3
+     %uint_0 = OpConstant %uint 0
+        %int = OpTypeInt 32 1
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_mat4v3float = OpTypePointer Uniform %mat4v3float
+         %25 = OpConstantNull %int
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+          %f = OpFunction %void None %10
+         %13 = OpLabel
+         %21 = OpAccessChain %_ptr_Uniform_mat4v3float %u %uint_0 %int_2
+         %22 = OpLoad %mat4v3float %21
+         %14 = OpTranspose %mat3v4float %22
+         %28 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %25 %int_1
+         %29 = OpLoad %v3float %28
+         %30 = OpVectorShuffle %v3float %29 %29 2 0 1
+         %23 = OpExtInst %float %24 Length %30
+         %32 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %25 %int_1
+         %33 = OpLoad %v3float %32
+         %34 = OpVectorShuffle %v3float %33 %33 2 0 1
+         %35 = OpCompositeExtract %float %34 0
+         %31 = OpExtInst %float %24 FAbs %35
+               OpReturn
+               OpFunctionEnd
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_builtin.wgsl.expected.wgsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_builtin.wgsl.expected.wgsl
@@ -0,0 +1,8 @@
+@group(0) @binding(0) var<uniform> u : array<mat4x3<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u[2]);
+  let l = length(u[0][1].zxy);
+  let a = abs(u[0][1].zxy.x);
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_fn.wgsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_fn.wgsl
@@ -0,0 +1,14 @@
+@group(0) @binding(0) var<uniform> u : array<mat4x3<f32>, 4>;
+
+fn a(a : array<mat4x3<f32>, 4>) {}
+fn b(m : mat4x3<f32>) {}
+fn c(v : vec3<f32>) {}
+fn d(f : f32) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[1]);
+    c(u[1][0].zxy);
+    d(u[1][0].zxy.x);
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_fn.wgsl.expected.dxc.hlsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_fn.wgsl.expected.dxc.hlsl
@@ -0,0 +1,43 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[16];
+};
+
+void a(float4x3 a_1[4]) {
+}
+
+void b(float4x3 m) {
+}
+
+void c(float3 v) {
+}
+
+void d(float f_1) {
+}
+
+float4x3 tint_symbol_1(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+typedef float4x3 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[16], uint offset) {
+  float4x3 arr[4] = (float4x3[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 64u));
+  c(asfloat(u[4].xyz).zxy);
+  d(asfloat(u[4].xyz).zxy.x);
+  return;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_fn.wgsl.expected.fxc.hlsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_fn.wgsl.expected.fxc.hlsl
@@ -0,0 +1,43 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[16];
+};
+
+void a(float4x3 a_1[4]) {
+}
+
+void b(float4x3 m) {
+}
+
+void c(float3 v) {
+}
+
+void d(float f_1) {
+}
+
+float4x3 tint_symbol_1(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+typedef float4x3 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[16], uint offset) {
+  float4x3 arr[4] = (float4x3[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 64u));
+  c(asfloat(u[4].xyz).zxy);
+  d(asfloat(u[4].xyz).zxy.x);
+  return;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_fn.wgsl.expected.glsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_fn.wgsl.expected.glsl
@@ -0,0 +1,30 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat4x3 inner[4];
+} u;
+
+void a(mat4x3 a_1[4]) {
+}
+
+void b(mat4x3 m) {
+}
+
+void c(vec3 v) {
+}
+
+void d(float f_1) {
+}
+
+void f() {
+  a(u.inner);
+  b(u.inner[1]);
+  c(u.inner[1][0].zxy);
+  d(u.inner[1][0].zxy.x);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_fn.wgsl.expected.msl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_fn.wgsl.expected.msl
@@ -0,0 +1,36 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+void a(tint_array<float4x3, 4> a_1) {
+}
+
+void b(float4x3 m) {
+}
+
+void c(float3 v) {
+}
+
+void d(float f_1) {
+}
+
+kernel void f(const constant tint_array<float4x3, 4>* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[1]);
+  c(float3((*(tint_symbol))[1][0]).zxy);
+  d(float3((*(tint_symbol))[1][0]).zxy[0]);
+  return;
+}
+
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_fn.wgsl.expected.spvasm
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_fn.wgsl.expected.spvasm
@@ -0,0 +1,90 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 52
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %a "a"
+               OpName %a_1 "a_1"
+               OpName %b "b"
+               OpName %m "m"
+               OpName %c "c"
+               OpName %v "v"
+               OpName %d "d"
+               OpName %f_1 "f_1"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %_arr_mat4v3float_uint_4 ArrayStride 64
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat4v3float = OpTypeMatrix %v3float 4
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4v3float_uint_4 = OpTypeArray %mat4v3float %uint_4
+    %u_block = OpTypeStruct %_arr_mat4v3float_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+       %void = OpTypeVoid
+         %10 = OpTypeFunction %void %_arr_mat4v3float_uint_4
+         %15 = OpTypeFunction %void %mat4v3float
+         %19 = OpTypeFunction %void %v3float
+         %23 = OpTypeFunction %void %float
+         %27 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat4v3float_uint_4 = OpTypePointer Uniform %_arr_mat4v3float_uint_4
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_mat4v3float = OpTypePointer Uniform %mat4v3float
+         %42 = OpConstantNull %int
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+          %a = OpFunction %void None %10
+        %a_1 = OpFunctionParameter %_arr_mat4v3float_uint_4
+         %14 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %15
+          %m = OpFunctionParameter %mat4v3float
+         %18 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %19
+          %v = OpFunctionParameter %v3float
+         %22 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %d = OpFunction %void None %23
+        %f_1 = OpFunctionParameter %float
+         %26 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %27
+         %29 = OpLabel
+         %33 = OpAccessChain %_ptr_Uniform__arr_mat4v3float_uint_4 %u %uint_0
+         %34 = OpLoad %_arr_mat4v3float_uint_4 %33
+         %30 = OpFunctionCall %void %a %34
+         %39 = OpAccessChain %_ptr_Uniform_mat4v3float %u %uint_0 %int_1
+         %40 = OpLoad %mat4v3float %39
+         %35 = OpFunctionCall %void %b %40
+         %44 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %int_1 %42
+         %45 = OpLoad %v3float %44
+         %46 = OpVectorShuffle %v3float %45 %45 2 0 1
+         %41 = OpFunctionCall %void %c %46
+         %48 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %int_1 %42
+         %49 = OpLoad %v3float %48
+         %50 = OpVectorShuffle %v3float %49 %49 2 0 1
+         %51 = OpCompositeExtract %float %50 0
+         %47 = OpFunctionCall %void %d %51
+               OpReturn
+               OpFunctionEnd
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_fn.wgsl.expected.wgsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_fn.wgsl.expected.wgsl
@@ -0,0 +1,21 @@
+@group(0) @binding(0) var<uniform> u : array<mat4x3<f32>, 4>;
+
+fn a(a : array<mat4x3<f32>, 4>) {
+}
+
+fn b(m : mat4x3<f32>) {
+}
+
+fn c(v : vec3<f32>) {
+}
+
+fn d(f : f32) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[1]);
+  c(u[1][0].zxy);
+  d(u[1][0].zxy.x);
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_private.wgsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_private.wgsl
@@ -0,0 +1,10 @@
+@group(0) @binding(0) var<uniform> u : array<mat4x3<f32>, 4>;
+var<private> p : array<mat4x3<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[2];
+    p[1][0] = u[0][1].zxy;
+    p[1][0].x = u[0][1].x;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_private.wgsl.expected.dxc.hlsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_private.wgsl.expected.dxc.hlsl
@@ -0,0 +1,32 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[16];
+};
+static float4x3 p[4] = (float4x3[4])0;
+
+float4x3 tint_symbol_1(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+typedef float4x3 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[16], uint offset) {
+  float4x3 arr[4] = (float4x3[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 128u);
+  p[1][0] = asfloat(u[1].xyz).zxy;
+  p[1][0].x = asfloat(u[1].x);
+  return;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_private.wgsl.expected.fxc.hlsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_private.wgsl.expected.fxc.hlsl
@@ -0,0 +1,32 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[16];
+};
+static float4x3 p[4] = (float4x3[4])0;
+
+float4x3 tint_symbol_1(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+typedef float4x3 tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[16], uint offset) {
+  float4x3 arr[4] = (float4x3[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 64u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 128u);
+  p[1][0] = asfloat(u[1].xyz).zxy;
+  p[1][0].x = asfloat(u[1].x);
+  return;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_private.wgsl.expected.glsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_private.wgsl.expected.glsl
@@ -0,0 +1,19 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat4x3 inner[4];
+} u;
+
+mat4x3 p[4] = mat4x3[4](mat4x3(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), mat4x3(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), mat4x3(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f), mat4x3(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f));
+void f() {
+  p = u.inner;
+  p[1] = u.inner[2];
+  p[1][0] = u.inner[0][1].zxy;
+  p[1][0].x = u.inner[0][1].x;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_private.wgsl.expected.msl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_private.wgsl.expected.msl
@@ -0,0 +1,25 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(const constant tint_array<float4x3, 4>* tint_symbol_1 [[buffer(0)]]) {
+  thread tint_array<float4x3, 4> tint_symbol = {};
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[2];
+  tint_symbol[1][0] = float3((*(tint_symbol_1))[0][1]).zxy;
+  tint_symbol[1][0][0] = (*(tint_symbol_1))[0][1][0];
+  return;
+}
+
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_private.wgsl.expected.spvasm
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_private.wgsl.expected.spvasm
@@ -0,0 +1,68 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 41
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %p "p"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %_arr_mat4v3float_uint_4 ArrayStride 64
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat4v3float = OpTypeMatrix %v3float 4
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4v3float_uint_4 = OpTypeArray %mat4v3float %uint_4
+    %u_block = OpTypeStruct %_arr_mat4v3float_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_Private__arr_mat4v3float_uint_4 = OpTypePointer Private %_arr_mat4v3float_uint_4
+         %12 = OpConstantNull %_arr_mat4v3float_uint_4
+          %p = OpVariable %_ptr_Private__arr_mat4v3float_uint_4 Private %12
+       %void = OpTypeVoid
+         %13 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat4v3float_uint_4 = OpTypePointer Uniform %_arr_mat4v3float_uint_4
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Private_mat4v3float = OpTypePointer Private %mat4v3float
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_mat4v3float = OpTypePointer Uniform %mat4v3float
+         %29 = OpConstantNull %int
+%_ptr_Private_v3float = OpTypePointer Private %v3float
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+%_ptr_Private_float = OpTypePointer Private %float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+          %f = OpFunction %void None %13
+         %16 = OpLabel
+         %19 = OpAccessChain %_ptr_Uniform__arr_mat4v3float_uint_4 %u %uint_0
+         %20 = OpLoad %_arr_mat4v3float_uint_4 %19
+               OpStore %p %20
+         %24 = OpAccessChain %_ptr_Private_mat4v3float %p %int_1
+         %27 = OpAccessChain %_ptr_Uniform_mat4v3float %u %uint_0 %int_2
+         %28 = OpLoad %mat4v3float %27
+               OpStore %24 %28
+         %31 = OpAccessChain %_ptr_Private_v3float %p %int_1 %29
+         %33 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %29 %int_1
+         %34 = OpLoad %v3float %33
+         %35 = OpVectorShuffle %v3float %34 %34 2 0 1
+               OpStore %31 %35
+         %37 = OpAccessChain %_ptr_Private_float %p %int_1 %29 %uint_0
+         %39 = OpAccessChain %_ptr_Uniform_float %u %uint_0 %29 %int_1 %uint_0
+         %40 = OpLoad %float %39
+               OpStore %37 %40
+               OpReturn
+               OpFunctionEnd
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_private.wgsl.expected.wgsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_private.wgsl.expected.wgsl
@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> u : array<mat4x3<f32>, 4>;
+
+var<private> p : array<mat4x3<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[2];
+  p[1][0] = u[0][1].zxy;
+  p[1][0].x = u[0][1].x;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_storage.wgsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_storage.wgsl
@@ -0,0 +1,10 @@
+@group(0) @binding(0) var<uniform> u : array<mat4x3<f32>, 4>;
+@group(0) @binding(1) var<storage, read_write> s : array<mat4x3<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[2];
+    s[1][0] = u[0][1].zxy;
+    s[1][0].x = u[0][1].x;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_storage.wgsl.expected.dxc.hlsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_storage.wgsl.expected.dxc.hlsl
@@ -0,0 +1,48 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[16];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, float4x3 value) {
+  buffer.Store3((offset + 0u), asuint(value[0u]));
+  buffer.Store3((offset + 16u), asuint(value[1u]));
+  buffer.Store3((offset + 32u), asuint(value[2u]));
+  buffer.Store3((offset + 48u), asuint(value[3u]));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, float4x3 value[4]) {
+  float4x3 array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 64u)), array[i]);
+    }
+  }
+}
+
+float4x3 tint_symbol_4(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+typedef float4x3 tint_symbol_3_ret[4];
+tint_symbol_3_ret tint_symbol_3(uint4 buffer[16], uint offset) {
+  float4x3 arr[4] = (float4x3[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_4(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_3(u, 0u));
+  tint_symbol_1(s, 64u, tint_symbol_4(u, 128u));
+  s.Store3(64u, asuint(asfloat(u[1].xyz).zxy));
+  s.Store(64u, asuint(asfloat(u[1].x)));
+  return;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_storage.wgsl.expected.fxc.hlsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_storage.wgsl.expected.fxc.hlsl
@@ -0,0 +1,48 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[16];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, float4x3 value) {
+  buffer.Store3((offset + 0u), asuint(value[0u]));
+  buffer.Store3((offset + 16u), asuint(value[1u]));
+  buffer.Store3((offset + 32u), asuint(value[2u]));
+  buffer.Store3((offset + 48u), asuint(value[3u]));
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, float4x3 value[4]) {
+  float4x3 array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 64u)), array[i]);
+    }
+  }
+}
+
+float4x3 tint_symbol_4(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+typedef float4x3 tint_symbol_3_ret[4];
+tint_symbol_3_ret tint_symbol_3(uint4 buffer[16], uint offset) {
+  float4x3 arr[4] = (float4x3[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_4(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_3(u, 0u));
+  tint_symbol_1(s, 64u, tint_symbol_4(u, 128u));
+  s.Store3(64u, asuint(asfloat(u[1].xyz).zxy));
+  s.Store(64u, asuint(asfloat(u[1].x)));
+  return;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_storage.wgsl.expected.glsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_storage.wgsl.expected.glsl
@@ -0,0 +1,22 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat4x3 inner[4];
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  mat4x3 inner[4];
+} s;
+
+void f() {
+  s.inner = u.inner;
+  s.inner[1] = u.inner[2];
+  s.inner[1][0] = u.inner[0][1].zxy;
+  s.inner[1][0].x = u.inner[0][1].x;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_storage.wgsl.expected.msl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_storage.wgsl.expected.msl
@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(device tint_array<float4x3, 4>* tint_symbol [[buffer(1)]], const constant tint_array<float4x3, 4>* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[1][0] = float3((*(tint_symbol_1))[0][1]).zxy;
+  (*(tint_symbol))[1][0][0] = (*(tint_symbol_1))[0][1][0];
+  return;
+}
+
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_storage.wgsl.expected.spvasm
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_storage.wgsl.expected.spvasm
@@ -0,0 +1,71 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 42
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %s "s"
+               OpName %f "f"
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %_arr_mat4v3float_uint_4 ArrayStride 64
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat4v3float = OpTypeMatrix %v3float 4
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4v3float_uint_4 = OpTypeArray %mat4v3float %uint_4
+    %u_block = OpTypeStruct %_arr_mat4v3float_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
+          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
+       %void = OpTypeVoid
+         %12 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer__arr_mat4v3float_uint_4 = OpTypePointer StorageBuffer %_arr_mat4v3float_uint_4
+%_ptr_Uniform__arr_mat4v3float_uint_4 = OpTypePointer Uniform %_arr_mat4v3float_uint_4
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_mat4v3float = OpTypePointer StorageBuffer %mat4v3float
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_mat4v3float = OpTypePointer Uniform %mat4v3float
+         %30 = OpConstantNull %int
+%_ptr_StorageBuffer_v3float = OpTypePointer StorageBuffer %v3float
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+          %f = OpFunction %void None %12
+         %15 = OpLabel
+         %18 = OpAccessChain %_ptr_StorageBuffer__arr_mat4v3float_uint_4 %s %uint_0
+         %20 = OpAccessChain %_ptr_Uniform__arr_mat4v3float_uint_4 %u %uint_0
+         %21 = OpLoad %_arr_mat4v3float_uint_4 %20
+               OpStore %18 %21
+         %25 = OpAccessChain %_ptr_StorageBuffer_mat4v3float %s %uint_0 %int_1
+         %28 = OpAccessChain %_ptr_Uniform_mat4v3float %u %uint_0 %int_2
+         %29 = OpLoad %mat4v3float %28
+               OpStore %25 %29
+         %32 = OpAccessChain %_ptr_StorageBuffer_v3float %s %uint_0 %int_1 %30
+         %34 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %30 %int_1
+         %35 = OpLoad %v3float %34
+         %36 = OpVectorShuffle %v3float %35 %35 2 0 1
+               OpStore %32 %36
+         %38 = OpAccessChain %_ptr_StorageBuffer_float %s %uint_0 %int_1 %30 %uint_0
+         %40 = OpAccessChain %_ptr_Uniform_float %u %uint_0 %30 %int_1 %uint_0
+         %41 = OpLoad %float %40
+               OpStore %38 %41
+               OpReturn
+               OpFunctionEnd
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_storage.wgsl.expected.wgsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_storage.wgsl.expected.wgsl
@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> u : array<mat4x3<f32>, 4>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<mat4x3<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[2];
+  s[1][0] = u[0][1].zxy;
+  s[1][0].x = u[0][1].x;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_workgroup.wgsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_workgroup.wgsl
@@ -0,0 +1,10 @@
+@group(0) @binding(0) var<uniform> u : array<mat4x3<f32>, 4>;
+var<workgroup> w : array<mat4x3<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[2];
+    w[1][0] = u[0][1].zxy;
+    w[1][0].x = u[0][1].x;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_workgroup.wgsl.expected.dxc.hlsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_workgroup.wgsl.expected.dxc.hlsl
@@ -0,0 +1,47 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[16];
+};
+groupshared float4x3 w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float4x3 tint_symbol_3(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+typedef float4x3 tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[16], uint offset) {
+  float4x3 arr[4] = (float4x3[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      w[i] = float4x3((0.0f).xxx, (0.0f).xxx, (0.0f).xxx, (0.0f).xxx);
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 128u);
+  w[1][0] = asfloat(u[1].xyz).zxy;
+  w[1][0].x = asfloat(u[1].x);
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_workgroup.wgsl.expected.fxc.hlsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_workgroup.wgsl.expected.fxc.hlsl
@@ -0,0 +1,47 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[16];
+};
+groupshared float4x3 w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+float4x3 tint_symbol_3(uint4 buffer[16], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  const uint scalar_offset_1 = ((offset + 16u)) / 4;
+  const uint scalar_offset_2 = ((offset + 32u)) / 4;
+  const uint scalar_offset_3 = ((offset + 48u)) / 4;
+  return float4x3(asfloat(buffer[scalar_offset / 4].xyz), asfloat(buffer[scalar_offset_1 / 4].xyz), asfloat(buffer[scalar_offset_2 / 4].xyz), asfloat(buffer[scalar_offset_3 / 4].xyz));
+}
+
+typedef float4x3 tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[16], uint offset) {
+  float4x3 arr[4] = (float4x3[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 64u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      w[i] = float4x3((0.0f).xxx, (0.0f).xxx, (0.0f).xxx, (0.0f).xxx);
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 128u);
+  w[1][0] = asfloat(u[1].xyz).zxy;
+  w[1][0].x = asfloat(u[1].x);
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_workgroup.wgsl.expected.glsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_workgroup.wgsl.expected.glsl
@@ -0,0 +1,26 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform u_block_ubo {
+  mat4x3 inner[4];
+} u;
+
+shared mat4x3 w[4];
+void f(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      uint i = idx;
+      w[i] = mat4x3(vec3(0.0f), vec3(0.0f), vec3(0.0f), vec3(0.0f));
+    }
+  }
+  barrier();
+  w = u.inner;
+  w[1] = u.inner[2];
+  w[1][0] = u.inner[0][1].zxy;
+  w[1][0].x = u.inner[0][1].x;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_workgroup.wgsl.expected.msl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_workgroup.wgsl.expected.msl
@@ -0,0 +1,38 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct tint_symbol_5 {
+  tint_array<float4x3, 4> w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup tint_array<float4x3, 4>* const tint_symbol, const constant tint_array<float4x3, 4>* const tint_symbol_1) {
+  for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+    uint const i = idx;
+    (*(tint_symbol))[i] = float4x3(float3(0.0f), float3(0.0f), float3(0.0f), float3(0.0f));
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[1][0] = float3((*(tint_symbol_1))[0][1]).zxy;
+  (*(tint_symbol))[1][0][0] = (*(tint_symbol_1))[0][1][0];
+}
+
+kernel void f(const constant tint_array<float4x3, 4>* tint_symbol_4 [[buffer(0)]], threadgroup tint_symbol_5* tint_symbol_3 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup tint_array<float4x3, 4>* const tint_symbol_2 = &((*(tint_symbol_3)).w);
+  f_inner(local_invocation_index, tint_symbol_2, tint_symbol_4);
+  return;
+}
+
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_workgroup.wgsl.expected.spvasm
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_workgroup.wgsl.expected.spvasm
@@ -0,0 +1,115 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 70
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %u "u"
+               OpName %w "w"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %idx "idx"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 16
+               OpDecorate %_arr_mat4v3float_uint_4 ArrayStride 64
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat4v3float = OpTypeMatrix %v3float 4
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4v3float_uint_4 = OpTypeArray %mat4v3float %uint_4
+    %u_block = OpTypeStruct %_arr_mat4v3float_uint_4
+%_ptr_Uniform_u_block = OpTypePointer Uniform %u_block
+          %u = OpVariable %_ptr_Uniform_u_block Uniform
+%_ptr_Workgroup__arr_mat4v3float_uint_4 = OpTypePointer Workgroup %_arr_mat4v3float_uint_4
+          %w = OpVariable %_ptr_Workgroup__arr_mat4v3float_uint_4 Workgroup
+       %void = OpTypeVoid
+         %14 = OpTypeFunction %void %uint
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %21 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Workgroup_mat4v3float = OpTypePointer Workgroup %mat4v3float
+         %35 = OpConstantNull %mat4v3float
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+   %uint_264 = OpConstant %uint 264
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat4v3float_uint_4 = OpTypePointer Uniform %_arr_mat4v3float_uint_4
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_mat4v3float = OpTypePointer Uniform %mat4v3float
+         %53 = OpConstantNull %int
+%_ptr_Workgroup_v3float = OpTypePointer Workgroup %v3float
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+%_ptr_Workgroup_float = OpTypePointer Workgroup %float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+         %65 = OpTypeFunction %void
+    %f_inner = OpFunction %void None %14
+%local_invocation_index = OpFunctionParameter %uint
+         %18 = OpLabel
+        %idx = OpVariable %_ptr_Function_uint Function %21
+               OpStore %idx %local_invocation_index
+               OpBranch %22
+         %22 = OpLabel
+               OpLoopMerge %23 %24 None
+               OpBranch %25
+         %25 = OpLabel
+         %27 = OpLoad %uint %idx
+         %28 = OpULessThan %bool %27 %uint_4
+         %26 = OpLogicalNot %bool %28
+               OpSelectionMerge %30 None
+               OpBranchConditional %26 %31 %30
+         %31 = OpLabel
+               OpBranch %23
+         %30 = OpLabel
+         %32 = OpLoad %uint %idx
+         %34 = OpAccessChain %_ptr_Workgroup_mat4v3float %w %32
+               OpStore %34 %35
+               OpBranch %24
+         %24 = OpLabel
+         %36 = OpLoad %uint %idx
+         %38 = OpIAdd %uint %36 %uint_1
+               OpStore %idx %38
+               OpBranch %22
+         %23 = OpLabel
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+         %44 = OpAccessChain %_ptr_Uniform__arr_mat4v3float_uint_4 %u %uint_0
+         %45 = OpLoad %_arr_mat4v3float_uint_4 %44
+               OpStore %w %45
+         %48 = OpAccessChain %_ptr_Workgroup_mat4v3float %w %int_1
+         %51 = OpAccessChain %_ptr_Uniform_mat4v3float %u %uint_0 %int_2
+         %52 = OpLoad %mat4v3float %51
+               OpStore %48 %52
+         %55 = OpAccessChain %_ptr_Workgroup_v3float %w %int_1 %53
+         %57 = OpAccessChain %_ptr_Uniform_v3float %u %uint_0 %53 %int_1
+         %58 = OpLoad %v3float %57
+         %59 = OpVectorShuffle %v3float %58 %58 2 0 1
+               OpStore %55 %59
+         %61 = OpAccessChain %_ptr_Workgroup_float %w %int_1 %53 %uint_0
+         %63 = OpAccessChain %_ptr_Uniform_float %u %uint_0 %53 %int_1 %uint_0
+         %64 = OpLoad %float %63
+               OpStore %61 %64
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %65
+         %67 = OpLabel
+         %69 = OpLoad %uint %local_invocation_index_1
+         %68 = OpFunctionCall %void %f_inner %69
+               OpReturn
+               OpFunctionEnd
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_workgroup.wgsl.expected.wgsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_workgroup.wgsl.expected.wgsl
@@ -0,0 +1,11 @@
+@group(0) @binding(0) var<uniform> u : array<mat4x3<f32>, 4>;
+
+var<workgroup> w : array<mat4x3<f32>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[2];
+  w[1][0] = u[0][1].zxy;
+  w[1][0].x = u[0][1].x;
+}