Tint/E2E: Add f16 uniform/storage buffer E2E tests

This CL add Tint E2E tests for f16 types in uniform and storage buffers. Bug: tint:1473, tint:1502 Change-Id: I325524d2df326240cc1b080a90abf5bd076b3da1 Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/107543 Reviewed-by: Ben Clayton <bclayton@google.com> Kokoro: Kokoro <noreply+kokoro@google.com> Commit-Queue: Zhaoming Jiang <zhaoming.jiang@intel.com>
2025-12-15 16:16:08 +00:00 · 2022-11-30 02:47:27 +00:00
parent 205e16de63
commit 776b221ae2
2995 changed files with 113726 additions and 2997 deletions
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/dynamic_index_via_ptr.wgsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/dynamic_index_via_ptr.wgsl
@@ -0,0 +1,17 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<mat4x3<f16>, 4>;
+
+var<private> counter = 0;
+fn i() -> i32 { counter++; return counter; }
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a       = &a;
+  let p_a_i     = &((*p_a)[i()]);
+  let p_a_i_i   = &((*p_a_i)[i()]);
+
+  let l_a       : array<mat4x3<f16>, 4> = *p_a;
+  let l_a_i     : mat4x3<f16>           = *p_a_i;
+  let l_a_i_i   : vec3<f16>             = *p_a_i_i;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.dxc.hlsl
@@ -0,0 +1,59 @@
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[8];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 4, 3> tint_symbol_1(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+typedef matrix<float16_t, 4, 3> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
+  matrix<float16_t, 4, 3> arr[4] = (matrix<float16_t, 4, 3>[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_i_save = i();
+  const matrix<float16_t, 4, 3> l_a[4] = tint_symbol(a, 0u);
+  const matrix<float16_t, 4, 3> l_a_i = tint_symbol_1(a, (32u * uint(p_a_i_save)));
+  const uint scalar_offset_4 = (((32u * uint(p_a_i_save)) + (8u * uint(p_a_i_i_save)))) / 4;
+  uint4 ubo_load_9 = a[scalar_offset_4 / 4];
+  uint2 ubo_load_8 = ((scalar_offset_4 & 2) ? ubo_load_9.zw : ubo_load_9.xy);
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  const vector<float16_t, 3> l_a_i_i = vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]);
+  return;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.fxc.hlsl
@@ -0,0 +1,64 @@
+SKIP: FAILED
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[8];
+};
+static int counter = 0;
+
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+matrix<float16_t, 4, 3> tint_symbol_1(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+typedef matrix<float16_t, 4, 3> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
+  matrix<float16_t, 4, 3> arr[4] = (matrix<float16_t, 4, 3>[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_1(buffer, (offset + (i_1 * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const int p_a_i_save = i();
+  const int p_a_i_i_save = i();
+  const matrix<float16_t, 4, 3> l_a[4] = tint_symbol(a, 0u);
+  const matrix<float16_t, 4, 3> l_a_i = tint_symbol_1(a, (32u * uint(p_a_i_save)));
+  const uint scalar_offset_4 = (((32u * uint(p_a_i_save)) + (8u * uint(p_a_i_i_save)))) / 4;
+  uint4 ubo_load_9 = a[scalar_offset_4 / 4];
+  uint2 ubo_load_8 = ((scalar_offset_4 & 2) ? ubo_load_9.zw : ubo_load_9.xy);
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  const vector<float16_t, 3> l_a_i_i = vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x00000180EC89DCE0(11,8-16): error X3000: syntax error: unexpected token 'float16_t'
+
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.glsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.glsl
@@ -0,0 +1,75 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct mat4x3_f16 {
+  f16vec3 col0;
+  f16vec3 col1;
+  f16vec3 col2;
+  f16vec3 col3;
+};
+
+layout(binding = 0, std140) uniform a_block_std140_ubo {
+  mat4x3_f16 inner[4];
+} a;
+
+int counter = 0;
+int i() {
+  counter = (counter + 1);
+  return counter;
+}
+
+f16mat4x3 conv_mat4x3_f16(mat4x3_f16 val) {
+  return f16mat4x3(val.col0, val.col1, val.col2, val.col3);
+}
+
+f16mat4x3[4] conv_arr4_mat4x3_f16(mat4x3_f16 val[4]) {
+  f16mat4x3 arr[4] = f16mat4x3[4](f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_mat4x3_f16(val[i]);
+    }
+  }
+  return arr;
+}
+
+f16vec3 load_a_inner_p0_p1(uint p0, uint p1) {
+  switch(p1) {
+    case 0u: {
+      return a.inner[p0].col0;
+      break;
+    }
+    case 1u: {
+      return a.inner[p0].col1;
+      break;
+    }
+    case 2u: {
+      return a.inner[p0].col2;
+      break;
+    }
+    case 3u: {
+      return a.inner[p0].col3;
+      break;
+    }
+    default: {
+      return f16vec3(0.0hf);
+      break;
+    }
+  }
+}
+
+void f() {
+  f16mat4x3 p_a[4] = conv_arr4_mat4x3_f16(a.inner);
+  int tint_symbol = i();
+  f16mat4x3 p_a_i = conv_mat4x3_f16(a.inner[tint_symbol]);
+  int tint_symbol_1 = i();
+  f16vec3 p_a_i_i = load_a_inner_p0_p1(uint(tint_symbol), uint(tint_symbol_1));
+  f16mat4x3 l_a[4] = conv_arr4_mat4x3_f16(a.inner);
+  f16mat4x3 l_a_i = conv_mat4x3_f16(a.inner[tint_symbol]);
+  f16vec3 l_a_i_i = load_a_inner_p0_p1(uint(tint_symbol), uint(tint_symbol_1));
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.msl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.msl
@@ -0,0 +1,33 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+int i() {
+  thread int tint_symbol_2 = 0;
+  tint_symbol_2 = as_type<int>((as_type<uint>(tint_symbol_2) + as_type<uint>(1)));
+  return tint_symbol_2;
+}
+
+kernel void f(const constant tint_array<half4x3, 4>* tint_symbol_3 [[buffer(0)]]) {
+  int const tint_symbol = i();
+  int const p_a_i_save = tint_symbol;
+  int const tint_symbol_1 = i();
+  int const p_a_i_i_save = tint_symbol_1;
+  tint_array<half4x3, 4> const l_a = *(tint_symbol_3);
+  half4x3 const l_a_i = (*(tint_symbol_3))[p_a_i_save];
+  half3 const l_a_i_i = (*(tint_symbol_3))[p_a_i_save][p_a_i_i_save];
+  return;
+}
+
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.spvasm
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.spvasm
@@ -0,0 +1,181 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 108
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block_std140 "a_block_std140"
+               OpMemberName %a_block_std140 0 "inner"
+               OpName %mat4x3_f16 "mat4x3_f16"
+               OpMemberName %mat4x3_f16 0 "col0"
+               OpMemberName %mat4x3_f16 1 "col1"
+               OpMemberName %mat4x3_f16 2 "col2"
+               OpMemberName %mat4x3_f16 3 "col3"
+               OpName %a "a"
+               OpName %counter "counter"
+               OpName %i "i"
+               OpName %conv_mat4x3_f16 "conv_mat4x3_f16"
+               OpName %val "val"
+               OpName %conv_arr4_mat4x3_f16 "conv_arr4_mat4x3_f16"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i_0 "i"
+               OpName %var_for_index "var_for_index"
+               OpName %load_a_inner_p0_p1 "load_a_inner_p0_p1"
+               OpName %p0 "p0"
+               OpName %p1 "p1"
+               OpName %f "f"
+               OpDecorate %a_block_std140 Block
+               OpMemberDecorate %a_block_std140 0 Offset 0
+               OpMemberDecorate %mat4x3_f16 0 Offset 0
+               OpMemberDecorate %mat4x3_f16 1 Offset 8
+               OpMemberDecorate %mat4x3_f16 2 Offset 16
+               OpMemberDecorate %mat4x3_f16 3 Offset 24
+               OpDecorate %_arr_mat4x3_f16_uint_4 ArrayStride 32
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+               OpDecorate %_arr_mat4v3half_uint_4 ArrayStride 32
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+ %mat4x3_f16 = OpTypeStruct %v3half %v3half %v3half %v3half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4x3_f16_uint_4 = OpTypeArray %mat4x3_f16 %uint_4
+%a_block_std140 = OpTypeStruct %_arr_mat4x3_f16_uint_4
+%_ptr_Uniform_a_block_std140 = OpTypePointer Uniform %a_block_std140
+          %a = OpVariable %_ptr_Uniform_a_block_std140 Uniform
+        %int = OpTypeInt 32 1
+         %11 = OpConstantNull %int
+%_ptr_Private_int = OpTypePointer Private %int
+    %counter = OpVariable %_ptr_Private_int Private %11
+         %14 = OpTypeFunction %int
+      %int_1 = OpConstant %int 1
+ %mat4v3half = OpTypeMatrix %v3half 4
+         %21 = OpTypeFunction %mat4v3half %mat4x3_f16
+%_arr_mat4v3half_uint_4 = OpTypeArray %mat4v3half %uint_4
+         %31 = OpTypeFunction %_arr_mat4v3half_uint_4 %_arr_mat4x3_f16_uint_4
+%_ptr_Function__arr_mat4v3half_uint_4 = OpTypePointer Function %_arr_mat4v3half_uint_4
+         %38 = OpConstantNull %_arr_mat4v3half_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %41 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_mat4x3_f16_uint_4 = OpTypePointer Function %_arr_mat4x3_f16_uint_4
+         %54 = OpConstantNull %_arr_mat4x3_f16_uint_4
+%_ptr_Function_mat4v3half = OpTypePointer Function %mat4v3half
+%_ptr_Function_mat4x3_f16 = OpTypePointer Function %mat4x3_f16
+     %uint_1 = OpConstant %uint 1
+         %67 = OpTypeFunction %v3half %uint %uint
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+         %90 = OpConstantNull %v3half
+       %void = OpTypeVoid
+         %91 = OpTypeFunction %void
+%_ptr_Uniform__arr_mat4x3_f16_uint_4 = OpTypePointer Uniform %_arr_mat4x3_f16_uint_4
+%_ptr_Uniform_mat4x3_f16 = OpTypePointer Uniform %mat4x3_f16
+          %i = OpFunction %int None %14
+         %16 = OpLabel
+         %17 = OpLoad %int %counter
+         %19 = OpIAdd %int %17 %int_1
+               OpStore %counter %19
+         %20 = OpLoad %int %counter
+               OpReturnValue %20
+               OpFunctionEnd
+%conv_mat4x3_f16 = OpFunction %mat4v3half None %21
+        %val = OpFunctionParameter %mat4x3_f16
+         %25 = OpLabel
+         %26 = OpCompositeExtract %v3half %val 0
+         %27 = OpCompositeExtract %v3half %val 1
+         %28 = OpCompositeExtract %v3half %val 2
+         %29 = OpCompositeExtract %v3half %val 3
+         %30 = OpCompositeConstruct %mat4v3half %26 %27 %28 %29
+               OpReturnValue %30
+               OpFunctionEnd
+%conv_arr4_mat4x3_f16 = OpFunction %_arr_mat4v3half_uint_4 None %31
+      %val_0 = OpFunctionParameter %_arr_mat4x3_f16_uint_4
+         %35 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_mat4v3half_uint_4 Function %38
+        %i_0 = OpVariable %_ptr_Function_uint Function %41
+%var_for_index = OpVariable %_ptr_Function__arr_mat4x3_f16_uint_4 Function %54
+               OpBranch %42
+         %42 = OpLabel
+               OpLoopMerge %43 %44 None
+               OpBranch %45
+         %45 = OpLabel
+         %47 = OpLoad %uint %i_0
+         %48 = OpULessThan %bool %47 %uint_4
+         %46 = OpLogicalNot %bool %48
+               OpSelectionMerge %50 None
+               OpBranchConditional %46 %51 %50
+         %51 = OpLabel
+               OpBranch %43
+         %50 = OpLabel
+               OpStore %var_for_index %val_0
+         %55 = OpLoad %uint %i_0
+         %57 = OpAccessChain %_ptr_Function_mat4v3half %arr %55
+         %59 = OpLoad %uint %i_0
+         %61 = OpAccessChain %_ptr_Function_mat4x3_f16 %var_for_index %59
+         %62 = OpLoad %mat4x3_f16 %61
+         %58 = OpFunctionCall %mat4v3half %conv_mat4x3_f16 %62
+               OpStore %57 %58
+               OpBranch %44
+         %44 = OpLabel
+         %63 = OpLoad %uint %i_0
+         %65 = OpIAdd %uint %63 %uint_1
+               OpStore %i_0 %65
+               OpBranch %42
+         %43 = OpLabel
+         %66 = OpLoad %_arr_mat4v3half_uint_4 %arr
+               OpReturnValue %66
+               OpFunctionEnd
+%load_a_inner_p0_p1 = OpFunction %v3half None %67
+         %p0 = OpFunctionParameter %uint
+         %p1 = OpFunctionParameter %uint
+         %71 = OpLabel
+               OpSelectionMerge %72 None
+               OpSwitch %p1 %73 0 %74 1 %75 2 %76 3 %77
+         %74 = OpLabel
+         %80 = OpAccessChain %_ptr_Uniform_v3half %a %uint_0 %p0 %uint_0
+         %81 = OpLoad %v3half %80
+               OpReturnValue %81
+         %75 = OpLabel
+         %82 = OpAccessChain %_ptr_Uniform_v3half %a %uint_0 %p0 %uint_1
+         %83 = OpLoad %v3half %82
+               OpReturnValue %83
+         %76 = OpLabel
+         %85 = OpAccessChain %_ptr_Uniform_v3half %a %uint_0 %p0 %uint_2
+         %86 = OpLoad %v3half %85
+               OpReturnValue %86
+         %77 = OpLabel
+         %88 = OpAccessChain %_ptr_Uniform_v3half %a %uint_0 %p0 %uint_3
+         %89 = OpLoad %v3half %88
+               OpReturnValue %89
+         %73 = OpLabel
+               OpReturnValue %90
+         %72 = OpLabel
+               OpReturnValue %90
+               OpFunctionEnd
+          %f = OpFunction %void None %91
+         %94 = OpLabel
+         %95 = OpFunctionCall %int %i
+         %96 = OpFunctionCall %int %i
+         %99 = OpAccessChain %_ptr_Uniform__arr_mat4x3_f16_uint_4 %a %uint_0
+        %100 = OpLoad %_arr_mat4x3_f16_uint_4 %99
+         %97 = OpFunctionCall %_arr_mat4v3half_uint_4 %conv_arr4_mat4x3_f16 %100
+        %103 = OpAccessChain %_ptr_Uniform_mat4x3_f16 %a %uint_0 %95
+        %104 = OpLoad %mat4x3_f16 %103
+        %101 = OpFunctionCall %mat4v3half %conv_mat4x3_f16 %104
+        %106 = OpBitcast %uint %95
+        %107 = OpBitcast %uint %96
+        %105 = OpFunctionCall %v3half %load_a_inner_p0_p1 %106 %107
+               OpReturn
+               OpFunctionEnd
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.wgsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/dynamic_index_via_ptr.wgsl.expected.wgsl
@@ -0,0 +1,20 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<mat4x3<f16>, 4>;
+
+var<private> counter = 0;
+
+fn i() -> i32 {
+  counter++;
+  return counter;
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_i = &((*(p_a))[i()]);
+  let p_a_i_i = &((*(p_a_i))[i()]);
+  let l_a : array<mat4x3<f16>, 4> = *(p_a);
+  let l_a_i : mat4x3<f16> = *(p_a_i);
+  let l_a_i_i : vec3<f16> = *(p_a_i_i);
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/static_index_via_ptr.wgsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/static_index_via_ptr.wgsl
@@ -0,0 +1,14 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<mat4x3<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a       = &a;
+  let p_a_2     = &((*p_a)[2]);
+  let p_a_2_1   = &((*p_a_2)[1]);
+
+  let l_a       : array<mat4x3<f16>, 4> = *p_a;
+  let l_a_i     : mat4x3<f16>           = *p_a_2;
+  let l_a_i_i   : vec3<f16>             = *p_a_2_1;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/static_index_via_ptr.wgsl.expected.dxc.hlsl
@@ -0,0 +1,49 @@
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[8];
+};
+
+matrix<float16_t, 4, 3> tint_symbol_1(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+typedef matrix<float16_t, 4, 3> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
+  matrix<float16_t, 4, 3> arr[4] = (matrix<float16_t, 4, 3>[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 4, 3> l_a[4] = tint_symbol(a, 0u);
+  const matrix<float16_t, 4, 3> l_a_i = tint_symbol_1(a, 64u);
+  uint2 ubo_load_8 = a[4].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  const vector<float16_t, 3> l_a_i_i = vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]);
+  return;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/static_index_via_ptr.wgsl.expected.fxc.hlsl
@@ -0,0 +1,54 @@
+SKIP: FAILED
+
+cbuffer cbuffer_a : register(b0, space0) {
+  uint4 a[8];
+};
+
+matrix<float16_t, 4, 3> tint_symbol_1(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+typedef matrix<float16_t, 4, 3> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
+  matrix<float16_t, 4, 3> arr[4] = (matrix<float16_t, 4, 3>[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 4, 3> l_a[4] = tint_symbol(a, 0u);
+  const matrix<float16_t, 4, 3> l_a_i = tint_symbol_1(a, 64u);
+  uint2 ubo_load_8 = a[4].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  const vector<float16_t, 3> l_a_i_i = vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000021B182DE800(5,8-16): error X3000: syntax error: unexpected token 'float16_t'
+
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/static_index_via_ptr.wgsl.expected.glsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/static_index_via_ptr.wgsl.expected.glsl
@@ -0,0 +1,42 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct mat4x3_f16 {
+  f16vec3 col0;
+  f16vec3 col1;
+  f16vec3 col2;
+  f16vec3 col3;
+};
+
+layout(binding = 0, std140) uniform a_block_std140_ubo {
+  mat4x3_f16 inner[4];
+} a;
+
+f16mat4x3 conv_mat4x3_f16(mat4x3_f16 val) {
+  return f16mat4x3(val.col0, val.col1, val.col2, val.col3);
+}
+
+f16mat4x3[4] conv_arr4_mat4x3_f16(mat4x3_f16 val[4]) {
+  f16mat4x3 arr[4] = f16mat4x3[4](f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_mat4x3_f16(val[i]);
+    }
+  }
+  return arr;
+}
+
+void f() {
+  f16mat4x3 p_a[4] = conv_arr4_mat4x3_f16(a.inner);
+  f16mat4x3 p_a_2 = conv_mat4x3_f16(a.inner[2u]);
+  f16vec3 p_a_2_1 = a.inner[2u].col1;
+  f16mat4x3 l_a[4] = conv_arr4_mat4x3_f16(a.inner);
+  f16mat4x3 l_a_i = conv_mat4x3_f16(a.inner[2u]);
+  f16vec3 l_a_i_i = a.inner[2u].col1;
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/static_index_via_ptr.wgsl.expected.msl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/static_index_via_ptr.wgsl.expected.msl
@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(const constant tint_array<half4x3, 4>* tint_symbol [[buffer(0)]]) {
+  tint_array<half4x3, 4> const l_a = *(tint_symbol);
+  half4x3 const l_a_i = (*(tint_symbol))[2];
+  half3 const l_a_i_i = (*(tint_symbol))[2][1];
+  return;
+}
+
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/static_index_via_ptr.wgsl.expected.spvasm
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/static_index_via_ptr.wgsl.expected.spvasm
@@ -0,0 +1,129 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 73
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %a_block_std140 "a_block_std140"
+               OpMemberName %a_block_std140 0 "inner"
+               OpName %mat4x3_f16 "mat4x3_f16"
+               OpMemberName %mat4x3_f16 0 "col0"
+               OpMemberName %mat4x3_f16 1 "col1"
+               OpMemberName %mat4x3_f16 2 "col2"
+               OpMemberName %mat4x3_f16 3 "col3"
+               OpName %a "a"
+               OpName %conv_mat4x3_f16 "conv_mat4x3_f16"
+               OpName %val "val"
+               OpName %conv_arr4_mat4x3_f16 "conv_arr4_mat4x3_f16"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %f "f"
+               OpDecorate %a_block_std140 Block
+               OpMemberDecorate %a_block_std140 0 Offset 0
+               OpMemberDecorate %mat4x3_f16 0 Offset 0
+               OpMemberDecorate %mat4x3_f16 1 Offset 8
+               OpMemberDecorate %mat4x3_f16 2 Offset 16
+               OpMemberDecorate %mat4x3_f16 3 Offset 24
+               OpDecorate %_arr_mat4x3_f16_uint_4 ArrayStride 32
+               OpDecorate %a NonWritable
+               OpDecorate %a DescriptorSet 0
+               OpDecorate %a Binding 0
+               OpDecorate %_arr_mat4v3half_uint_4 ArrayStride 32
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+ %mat4x3_f16 = OpTypeStruct %v3half %v3half %v3half %v3half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4x3_f16_uint_4 = OpTypeArray %mat4x3_f16 %uint_4
+%a_block_std140 = OpTypeStruct %_arr_mat4x3_f16_uint_4
+%_ptr_Uniform_a_block_std140 = OpTypePointer Uniform %a_block_std140
+          %a = OpVariable %_ptr_Uniform_a_block_std140 Uniform
+ %mat4v3half = OpTypeMatrix %v3half 4
+         %10 = OpTypeFunction %mat4v3half %mat4x3_f16
+%_arr_mat4v3half_uint_4 = OpTypeArray %mat4v3half %uint_4
+         %20 = OpTypeFunction %_arr_mat4v3half_uint_4 %_arr_mat4x3_f16_uint_4
+%_ptr_Function__arr_mat4v3half_uint_4 = OpTypePointer Function %_arr_mat4v3half_uint_4
+         %27 = OpConstantNull %_arr_mat4v3half_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %30 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_mat4x3_f16_uint_4 = OpTypePointer Function %_arr_mat4x3_f16_uint_4
+         %43 = OpConstantNull %_arr_mat4x3_f16_uint_4
+%_ptr_Function_mat4v3half = OpTypePointer Function %mat4v3half
+%_ptr_Function_mat4x3_f16 = OpTypePointer Function %mat4x3_f16
+     %uint_1 = OpConstant %uint 1
+       %void = OpTypeVoid
+         %56 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat4x3_f16_uint_4 = OpTypePointer Uniform %_arr_mat4x3_f16_uint_4
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_mat4x3_f16 = OpTypePointer Uniform %mat4x3_f16
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+%conv_mat4x3_f16 = OpFunction %mat4v3half None %10
+        %val = OpFunctionParameter %mat4x3_f16
+         %14 = OpLabel
+         %15 = OpCompositeExtract %v3half %val 0
+         %16 = OpCompositeExtract %v3half %val 1
+         %17 = OpCompositeExtract %v3half %val 2
+         %18 = OpCompositeExtract %v3half %val 3
+         %19 = OpCompositeConstruct %mat4v3half %15 %16 %17 %18
+               OpReturnValue %19
+               OpFunctionEnd
+%conv_arr4_mat4x3_f16 = OpFunction %_arr_mat4v3half_uint_4 None %20
+      %val_0 = OpFunctionParameter %_arr_mat4x3_f16_uint_4
+         %24 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_mat4v3half_uint_4 Function %27
+          %i = OpVariable %_ptr_Function_uint Function %30
+%var_for_index = OpVariable %_ptr_Function__arr_mat4x3_f16_uint_4 Function %43
+               OpBranch %31
+         %31 = OpLabel
+               OpLoopMerge %32 %33 None
+               OpBranch %34
+         %34 = OpLabel
+         %36 = OpLoad %uint %i
+         %37 = OpULessThan %bool %36 %uint_4
+         %35 = OpLogicalNot %bool %37
+               OpSelectionMerge %39 None
+               OpBranchConditional %35 %40 %39
+         %40 = OpLabel
+               OpBranch %32
+         %39 = OpLabel
+               OpStore %var_for_index %val_0
+         %44 = OpLoad %uint %i
+         %46 = OpAccessChain %_ptr_Function_mat4v3half %arr %44
+         %48 = OpLoad %uint %i
+         %50 = OpAccessChain %_ptr_Function_mat4x3_f16 %var_for_index %48
+         %51 = OpLoad %mat4x3_f16 %50
+         %47 = OpFunctionCall %mat4v3half %conv_mat4x3_f16 %51
+               OpStore %46 %47
+               OpBranch %33
+         %33 = OpLabel
+         %52 = OpLoad %uint %i
+         %54 = OpIAdd %uint %52 %uint_1
+               OpStore %i %54
+               OpBranch %31
+         %32 = OpLabel
+         %55 = OpLoad %_arr_mat4v3half_uint_4 %arr
+               OpReturnValue %55
+               OpFunctionEnd
+          %f = OpFunction %void None %56
+         %59 = OpLabel
+         %63 = OpAccessChain %_ptr_Uniform__arr_mat4x3_f16_uint_4 %a %uint_0
+         %64 = OpLoad %_arr_mat4x3_f16_uint_4 %63
+         %60 = OpFunctionCall %_arr_mat4v3half_uint_4 %conv_arr4_mat4x3_f16 %64
+         %68 = OpAccessChain %_ptr_Uniform_mat4x3_f16 %a %uint_0 %uint_2
+         %69 = OpLoad %mat4x3_f16 %68
+         %65 = OpFunctionCall %mat4v3half %conv_mat4x3_f16 %69
+         %71 = OpAccessChain %_ptr_Uniform_v3half %a %uint_0 %uint_2 %uint_1
+         %72 = OpLoad %v3half %71
+               OpReturn
+               OpFunctionEnd
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/static_index_via_ptr.wgsl.expected.wgsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/static_index_via_ptr.wgsl.expected.wgsl
@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> a : array<mat4x3<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let p_a = &(a);
+  let p_a_2 = &((*(p_a))[2]);
+  let p_a_2_1 = &((*(p_a_2))[1]);
+  let l_a : array<mat4x3<f16>, 4> = *(p_a);
+  let l_a_i : mat4x3<f16> = *(p_a_2);
+  let l_a_i_i : vec3<f16> = *(p_a_2_1);
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_builtin.wgsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_builtin.wgsl
@@ -0,0 +1,10 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat4x3<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    let t = transpose(u[2]);
+    let l = length(u[0][1].zxy);
+    let a = abs(u[0][1].zxy.x);
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_builtin.wgsl.expected.dxc.hlsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_builtin.wgsl.expected.dxc.hlsl
@@ -0,0 +1,41 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+
+matrix<float16_t, 4, 3> tint_symbol(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 3, 4> t = transpose(tint_symbol(u, 64u));
+  uint2 ubo_load_8 = u[0].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  const float16_t l = length(vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]).zxy);
+  uint2 ubo_load_9 = u[0].zw;
+  vector<float16_t, 2> ubo_load_9_xz = vector<float16_t, 2>(f16tof32(ubo_load_9 & 0xFFFF));
+  float16_t ubo_load_9_y = f16tof32(ubo_load_9[0] >> 16);
+  const float16_t a = abs(vector<float16_t, 3>(ubo_load_9_xz[0], ubo_load_9_y, ubo_load_9_xz[1]).zxy.x);
+  return;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_builtin.wgsl.expected.fxc.hlsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_builtin.wgsl.expected.fxc.hlsl
@@ -0,0 +1,46 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+
+matrix<float16_t, 4, 3> tint_symbol(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  const matrix<float16_t, 3, 4> t = transpose(tint_symbol(u, 64u));
+  uint2 ubo_load_8 = u[0].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  const float16_t l = length(vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]).zxy);
+  uint2 ubo_load_9 = u[0].zw;
+  vector<float16_t, 2> ubo_load_9_xz = vector<float16_t, 2>(f16tof32(ubo_load_9 & 0xFFFF));
+  float16_t ubo_load_9_y = f16tof32(ubo_load_9[0] >> 16);
+  const float16_t a = abs(vector<float16_t, 3>(ubo_load_9_xz[0], ubo_load_9_y, ubo_load_9_xz[1]).zxy.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000002C9B1C5A400(5,8-16): error X3000: syntax error: unexpected token 'float16_t'
+
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_builtin.wgsl.expected.glsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_builtin.wgsl.expected.glsl
@@ -0,0 +1,29 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct mat4x3_f16 {
+  f16vec3 col0;
+  f16vec3 col1;
+  f16vec3 col2;
+  f16vec3 col3;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  mat4x3_f16 inner[4];
+} u;
+
+f16mat4x3 conv_mat4x3_f16(mat4x3_f16 val) {
+  return f16mat4x3(val.col0, val.col1, val.col2, val.col3);
+}
+
+void f() {
+  f16mat3x4 t = transpose(conv_mat4x3_f16(u.inner[2u]));
+  float16_t l = length(u.inner[0u].col1.zxy);
+  float16_t a = abs(u.inner[0u].col1.zxy[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_builtin.wgsl.expected.msl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_builtin.wgsl.expected.msl
@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(const constant tint_array<half4x3, 4>* tint_symbol [[buffer(0)]]) {
+  half3x4 const t = transpose((*(tint_symbol))[2]);
+  half const l = length(half3((*(tint_symbol))[0][1]).zxy);
+  half const a = fabs(half3((*(tint_symbol))[0][1]).zxy[0]);
+  return;
+}
+
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_builtin.wgsl.expected.spvasm
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_builtin.wgsl.expected.spvasm
@@ -0,0 +1,83 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 46
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+         %34 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %mat4x3_f16 "mat4x3_f16"
+               OpMemberName %mat4x3_f16 0 "col0"
+               OpMemberName %mat4x3_f16 1 "col1"
+               OpMemberName %mat4x3_f16 2 "col2"
+               OpMemberName %mat4x3_f16 3 "col3"
+               OpName %u "u"
+               OpName %conv_mat4x3_f16 "conv_mat4x3_f16"
+               OpName %val "val"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %mat4x3_f16 0 Offset 0
+               OpMemberDecorate %mat4x3_f16 1 Offset 8
+               OpMemberDecorate %mat4x3_f16 2 Offset 16
+               OpMemberDecorate %mat4x3_f16 3 Offset 24
+               OpDecorate %_arr_mat4x3_f16_uint_4 ArrayStride 32
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+ %mat4x3_f16 = OpTypeStruct %v3half %v3half %v3half %v3half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4x3_f16_uint_4 = OpTypeArray %mat4x3_f16 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_mat4x3_f16_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat4v3half = OpTypeMatrix %v3half 4
+         %10 = OpTypeFunction %mat4v3half %mat4x3_f16
+       %void = OpTypeVoid
+         %20 = OpTypeFunction %void
+     %v4half = OpTypeVector %half 4
+ %mat3v4half = OpTypeMatrix %v4half 3
+     %uint_0 = OpConstant %uint 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_mat4x3_f16 = OpTypePointer Uniform %mat4x3_f16
+         %35 = OpConstantNull %uint
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+%conv_mat4x3_f16 = OpFunction %mat4v3half None %10
+        %val = OpFunctionParameter %mat4x3_f16
+         %14 = OpLabel
+         %15 = OpCompositeExtract %v3half %val 0
+         %16 = OpCompositeExtract %v3half %val 1
+         %17 = OpCompositeExtract %v3half %val 2
+         %18 = OpCompositeExtract %v3half %val 3
+         %19 = OpCompositeConstruct %mat4v3half %15 %16 %17 %18
+               OpReturnValue %19
+               OpFunctionEnd
+          %f = OpFunction %void None %20
+         %23 = OpLabel
+         %31 = OpAccessChain %_ptr_Uniform_mat4x3_f16 %u %uint_0 %uint_2
+         %32 = OpLoad %mat4x3_f16 %31
+         %27 = OpFunctionCall %mat4v3half %conv_mat4x3_f16 %32
+         %24 = OpTranspose %mat3v4half %27
+         %38 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0 %35 %uint_1
+         %39 = OpLoad %v3half %38
+         %40 = OpVectorShuffle %v3half %39 %39 2 0 1
+         %33 = OpExtInst %half %34 Length %40
+         %42 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0 %35 %uint_1
+         %43 = OpLoad %v3half %42
+         %44 = OpVectorShuffle %v3half %43 %43 2 0 1
+         %45 = OpCompositeExtract %half %44 0
+         %41 = OpExtInst %half %34 FAbs %45
+               OpReturn
+               OpFunctionEnd
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_builtin.wgsl.expected.wgsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_builtin.wgsl.expected.wgsl
@@ -0,0 +1,10 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat4x3<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  let t = transpose(u[2]);
+  let l = length(u[0][1].zxy);
+  let a = abs(u[0][1].zxy.x);
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_fn.wgsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_fn.wgsl
@@ -0,0 +1,16 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat4x3<f16>, 4>;
+
+fn a(a : array<mat4x3<f16>, 4>) {}
+fn b(m : mat4x3<f16>) {}
+fn c(v : vec3<f16>) {}
+fn d(f : f16) {}
+
+@compute @workgroup_size(1)
+fn f() {
+    a(u);
+    b(u[1]);
+    c(u[1][0].zxy);
+    d(u[1][0].zxy.x);
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_fn.wgsl.expected.dxc.hlsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_fn.wgsl.expected.dxc.hlsl
@@ -0,0 +1,65 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+
+void a(matrix<float16_t, 4, 3> a_1[4]) {
+}
+
+void b(matrix<float16_t, 4, 3> m) {
+}
+
+void c(vector<float16_t, 3> v) {
+}
+
+void d(float16_t f_1) {
+}
+
+matrix<float16_t, 4, 3> tint_symbol_1(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+typedef matrix<float16_t, 4, 3> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
+  matrix<float16_t, 4, 3> arr[4] = (matrix<float16_t, 4, 3>[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 32u));
+  uint2 ubo_load_8 = u[2].xy;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  c(vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]).zxy);
+  uint2 ubo_load_9 = u[2].xy;
+  vector<float16_t, 2> ubo_load_9_xz = vector<float16_t, 2>(f16tof32(ubo_load_9 & 0xFFFF));
+  float16_t ubo_load_9_y = f16tof32(ubo_load_9[0] >> 16);
+  d(vector<float16_t, 3>(ubo_load_9_xz[0], ubo_load_9_y, ubo_load_9_xz[1]).zxy.x);
+  return;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_fn.wgsl.expected.fxc.hlsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_fn.wgsl.expected.fxc.hlsl
@@ -0,0 +1,73 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+
+void a(matrix<float16_t, 4, 3> a_1[4]) {
+}
+
+void b(matrix<float16_t, 4, 3> m) {
+}
+
+void c(vector<float16_t, 3> v) {
+}
+
+void d(float16_t f_1) {
+}
+
+matrix<float16_t, 4, 3> tint_symbol_1(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+typedef matrix<float16_t, 4, 3> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
+  matrix<float16_t, 4, 3> arr[4] = (matrix<float16_t, 4, 3>[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  a(tint_symbol(u, 0u));
+  b(tint_symbol_1(u, 32u));
+  uint2 ubo_load_8 = u[2].xy;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  c(vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]).zxy);
+  uint2 ubo_load_9 = u[2].xy;
+  vector<float16_t, 2> ubo_load_9_xz = vector<float16_t, 2>(f16tof32(ubo_load_9 & 0xFFFF));
+  float16_t ubo_load_9_y = f16tof32(ubo_load_9[0] >> 16);
+  d(vector<float16_t, 3>(ubo_load_9_xz[0], ubo_load_9_y, ubo_load_9_xz[1]).zxy.x);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x00000296784C0E70(5,15-23): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x00000296784C0E70(8,15-23): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x00000296784C0E70(11,15-23): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x00000296784C0E70(14,8-16): error X3000: unrecognized identifier 'float16_t'
+
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_fn.wgsl.expected.glsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_fn.wgsl.expected.glsl
@@ -0,0 +1,52 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct mat4x3_f16 {
+  f16vec3 col0;
+  f16vec3 col1;
+  f16vec3 col2;
+  f16vec3 col3;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  mat4x3_f16 inner[4];
+} u;
+
+void a(f16mat4x3 a_1[4]) {
+}
+
+void b(f16mat4x3 m) {
+}
+
+void c(f16vec3 v) {
+}
+
+void d(float16_t f_1) {
+}
+
+f16mat4x3 conv_mat4x3_f16(mat4x3_f16 val) {
+  return f16mat4x3(val.col0, val.col1, val.col2, val.col3);
+}
+
+f16mat4x3[4] conv_arr4_mat4x3_f16(mat4x3_f16 val[4]) {
+  f16mat4x3 arr[4] = f16mat4x3[4](f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_mat4x3_f16(val[i]);
+    }
+  }
+  return arr;
+}
+
+void f() {
+  a(conv_arr4_mat4x3_f16(u.inner));
+  b(conv_mat4x3_f16(u.inner[1u]));
+  c(u.inner[1u].col0.zxy);
+  d(u.inner[1u].col0.zxy[0u]);
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_fn.wgsl.expected.msl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_fn.wgsl.expected.msl
@@ -0,0 +1,36 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+void a(tint_array<half4x3, 4> a_1) {
+}
+
+void b(half4x3 m) {
+}
+
+void c(half3 v) {
+}
+
+void d(half f_1) {
+}
+
+kernel void f(const constant tint_array<half4x3, 4>* tint_symbol [[buffer(0)]]) {
+  a(*(tint_symbol));
+  b((*(tint_symbol))[1]);
+  c(half3((*(tint_symbol))[1][0]).zxy);
+  d(half3((*(tint_symbol))[1][0]).zxy[0]);
+  return;
+}
+
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_fn.wgsl.expected.spvasm
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_fn.wgsl.expected.spvasm
@@ -0,0 +1,169 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 97
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %mat4x3_f16 "mat4x3_f16"
+               OpMemberName %mat4x3_f16 0 "col0"
+               OpMemberName %mat4x3_f16 1 "col1"
+               OpMemberName %mat4x3_f16 2 "col2"
+               OpMemberName %mat4x3_f16 3 "col3"
+               OpName %u "u"
+               OpName %a "a"
+               OpName %a_1 "a_1"
+               OpName %b "b"
+               OpName %m "m"
+               OpName %c "c"
+               OpName %v "v"
+               OpName %d "d"
+               OpName %f_1 "f_1"
+               OpName %conv_mat4x3_f16 "conv_mat4x3_f16"
+               OpName %val "val"
+               OpName %conv_arr4_mat4x3_f16 "conv_arr4_mat4x3_f16"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %mat4x3_f16 0 Offset 0
+               OpMemberDecorate %mat4x3_f16 1 Offset 8
+               OpMemberDecorate %mat4x3_f16 2 Offset 16
+               OpMemberDecorate %mat4x3_f16 3 Offset 24
+               OpDecorate %_arr_mat4x3_f16_uint_4 ArrayStride 32
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %_arr_mat4v3half_uint_4 ArrayStride 32
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+ %mat4x3_f16 = OpTypeStruct %v3half %v3half %v3half %v3half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4x3_f16_uint_4 = OpTypeArray %mat4x3_f16 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_mat4x3_f16_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+       %void = OpTypeVoid
+ %mat4v3half = OpTypeMatrix %v3half 4
+%_arr_mat4v3half_uint_4 = OpTypeArray %mat4v3half %uint_4
+         %10 = OpTypeFunction %void %_arr_mat4v3half_uint_4
+         %17 = OpTypeFunction %void %mat4v3half
+         %21 = OpTypeFunction %void %v3half
+         %25 = OpTypeFunction %void %half
+         %29 = OpTypeFunction %mat4v3half %mat4x3_f16
+         %38 = OpTypeFunction %_arr_mat4v3half_uint_4 %_arr_mat4x3_f16_uint_4
+%_ptr_Function__arr_mat4v3half_uint_4 = OpTypePointer Function %_arr_mat4v3half_uint_4
+         %44 = OpConstantNull %_arr_mat4v3half_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %47 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_mat4x3_f16_uint_4 = OpTypePointer Function %_arr_mat4x3_f16_uint_4
+         %60 = OpConstantNull %_arr_mat4x3_f16_uint_4
+%_ptr_Function_mat4v3half = OpTypePointer Function %mat4v3half
+%_ptr_Function_mat4x3_f16 = OpTypePointer Function %mat4x3_f16
+     %uint_1 = OpConstant %uint 1
+         %73 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat4x3_f16_uint_4 = OpTypePointer Uniform %_arr_mat4x3_f16_uint_4
+%_ptr_Uniform_mat4x3_f16 = OpTypePointer Uniform %mat4x3_f16
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+          %a = OpFunction %void None %10
+        %a_1 = OpFunctionParameter %_arr_mat4v3half_uint_4
+         %16 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %b = OpFunction %void None %17
+          %m = OpFunctionParameter %mat4v3half
+         %20 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %c = OpFunction %void None %21
+          %v = OpFunctionParameter %v3half
+         %24 = OpLabel
+               OpReturn
+               OpFunctionEnd
+          %d = OpFunction %void None %25
+        %f_1 = OpFunctionParameter %half
+         %28 = OpLabel
+               OpReturn
+               OpFunctionEnd
+%conv_mat4x3_f16 = OpFunction %mat4v3half None %29
+        %val = OpFunctionParameter %mat4x3_f16
+         %32 = OpLabel
+         %33 = OpCompositeExtract %v3half %val 0
+         %34 = OpCompositeExtract %v3half %val 1
+         %35 = OpCompositeExtract %v3half %val 2
+         %36 = OpCompositeExtract %v3half %val 3
+         %37 = OpCompositeConstruct %mat4v3half %33 %34 %35 %36
+               OpReturnValue %37
+               OpFunctionEnd
+%conv_arr4_mat4x3_f16 = OpFunction %_arr_mat4v3half_uint_4 None %38
+      %val_0 = OpFunctionParameter %_arr_mat4x3_f16_uint_4
+         %41 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_mat4v3half_uint_4 Function %44
+          %i = OpVariable %_ptr_Function_uint Function %47
+%var_for_index = OpVariable %_ptr_Function__arr_mat4x3_f16_uint_4 Function %60
+               OpBranch %48
+         %48 = OpLabel
+               OpLoopMerge %49 %50 None
+               OpBranch %51
+         %51 = OpLabel
+         %53 = OpLoad %uint %i
+         %54 = OpULessThan %bool %53 %uint_4
+         %52 = OpLogicalNot %bool %54
+               OpSelectionMerge %56 None
+               OpBranchConditional %52 %57 %56
+         %57 = OpLabel
+               OpBranch %49
+         %56 = OpLabel
+               OpStore %var_for_index %val_0
+         %61 = OpLoad %uint %i
+         %63 = OpAccessChain %_ptr_Function_mat4v3half %arr %61
+         %65 = OpLoad %uint %i
+         %67 = OpAccessChain %_ptr_Function_mat4x3_f16 %var_for_index %65
+         %68 = OpLoad %mat4x3_f16 %67
+         %64 = OpFunctionCall %mat4v3half %conv_mat4x3_f16 %68
+               OpStore %63 %64
+               OpBranch %50
+         %50 = OpLabel
+         %69 = OpLoad %uint %i
+         %71 = OpIAdd %uint %69 %uint_1
+               OpStore %i %71
+               OpBranch %48
+         %49 = OpLabel
+         %72 = OpLoad %_arr_mat4v3half_uint_4 %arr
+               OpReturnValue %72
+               OpFunctionEnd
+          %f = OpFunction %void None %73
+         %75 = OpLabel
+         %80 = OpAccessChain %_ptr_Uniform__arr_mat4x3_f16_uint_4 %u %uint_0
+         %81 = OpLoad %_arr_mat4x3_f16_uint_4 %80
+         %77 = OpFunctionCall %_arr_mat4v3half_uint_4 %conv_arr4_mat4x3_f16 %81
+         %76 = OpFunctionCall %void %a %77
+         %85 = OpAccessChain %_ptr_Uniform_mat4x3_f16 %u %uint_0 %uint_1
+         %86 = OpLoad %mat4x3_f16 %85
+         %83 = OpFunctionCall %mat4v3half %conv_mat4x3_f16 %86
+         %82 = OpFunctionCall %void %b %83
+         %89 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0 %uint_1 %uint_0
+         %90 = OpLoad %v3half %89
+         %91 = OpVectorShuffle %v3half %90 %90 2 0 1
+         %87 = OpFunctionCall %void %c %91
+         %93 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0 %uint_1 %uint_0
+         %94 = OpLoad %v3half %93
+         %95 = OpVectorShuffle %v3half %94 %94 2 0 1
+         %96 = OpCompositeExtract %half %95 0
+         %92 = OpFunctionCall %void %d %96
+               OpReturn
+               OpFunctionEnd
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_fn.wgsl.expected.wgsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_fn.wgsl.expected.wgsl
@@ -0,0 +1,23 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat4x3<f16>, 4>;
+
+fn a(a : array<mat4x3<f16>, 4>) {
+}
+
+fn b(m : mat4x3<f16>) {
+}
+
+fn c(v : vec3<f16>) {
+}
+
+fn d(f : f16) {
+}
+
+@compute @workgroup_size(1)
+fn f() {
+  a(u);
+  b(u[1]);
+  c(u[1][0].zxy);
+  d(u[1][0].zxy.x);
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_private.wgsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_private.wgsl
@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat4x3<f16>, 4>;
+var<private> p : array<mat4x3<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    p = u;
+    p[1] = u[2];
+    p[1][0] = u[0][1].zxy;
+    p[1][0].x = u[0][1].x;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_private.wgsl.expected.dxc.hlsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_private.wgsl.expected.dxc.hlsl
@@ -0,0 +1,51 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+static matrix<float16_t, 4, 3> p[4] = (matrix<float16_t, 4, 3>[4])0;
+
+matrix<float16_t, 4, 3> tint_symbol_1(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+typedef matrix<float16_t, 4, 3> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
+  matrix<float16_t, 4, 3> arr[4] = (matrix<float16_t, 4, 3>[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 64u);
+  uint2 ubo_load_8 = u[0].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  p[1][0] = vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]).zxy;
+  p[1][0].x = float16_t(f16tof32(((u[0].z) & 0xFFFF)));
+  return;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_private.wgsl.expected.fxc.hlsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_private.wgsl.expected.fxc.hlsl
@@ -0,0 +1,56 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+static matrix<float16_t, 4, 3> p[4] = (matrix<float16_t, 4, 3>[4])0;
+
+matrix<float16_t, 4, 3> tint_symbol_1(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+typedef matrix<float16_t, 4, 3> tint_symbol_ret[4];
+tint_symbol_ret tint_symbol(uint4 buffer[8], uint offset) {
+  matrix<float16_t, 4, 3> arr[4] = (matrix<float16_t, 4, 3>[4])0;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = tint_symbol_1(buffer, (offset + (i * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  p = tint_symbol(u, 0u);
+  p[1] = tint_symbol_1(u, 64u);
+  uint2 ubo_load_8 = u[0].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  p[1][0] = vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]).zxy;
+  p[1][0].x = float16_t(f16tof32(((u[0].z) & 0xFFFF)));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x00000222F970C590(4,15-23): error X3000: syntax error: unexpected token 'float16_t'
+
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_private.wgsl.expected.glsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_private.wgsl.expected.glsl
@@ -0,0 +1,41 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct mat4x3_f16 {
+  f16vec3 col0;
+  f16vec3 col1;
+  f16vec3 col2;
+  f16vec3 col3;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  mat4x3_f16 inner[4];
+} u;
+
+f16mat4x3 p[4] = f16mat4x3[4](f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf));
+f16mat4x3 conv_mat4x3_f16(mat4x3_f16 val) {
+  return f16mat4x3(val.col0, val.col1, val.col2, val.col3);
+}
+
+f16mat4x3[4] conv_arr4_mat4x3_f16(mat4x3_f16 val[4]) {
+  f16mat4x3 arr[4] = f16mat4x3[4](f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_mat4x3_f16(val[i]);
+    }
+  }
+  return arr;
+}
+
+void f() {
+  p = conv_arr4_mat4x3_f16(u.inner);
+  p[1] = conv_mat4x3_f16(u.inner[2u]);
+  p[1][0] = u.inner[0u].col1.zxy;
+  p[1][0].x = u.inner[0u].col1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_private.wgsl.expected.msl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_private.wgsl.expected.msl
@@ -0,0 +1,25 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(const constant tint_array<half4x3, 4>* tint_symbol_1 [[buffer(0)]]) {
+  thread tint_array<half4x3, 4> tint_symbol = {};
+  tint_symbol = *(tint_symbol_1);
+  tint_symbol[1] = (*(tint_symbol_1))[2];
+  tint_symbol[1][0] = half3((*(tint_symbol_1))[0][1]).zxy;
+  tint_symbol[1][0][0] = (*(tint_symbol_1))[0][1][0];
+  return;
+}
+
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_private.wgsl.expected.spvasm
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_private.wgsl.expected.spvasm
@@ -0,0 +1,149 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 88
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %mat4x3_f16 "mat4x3_f16"
+               OpMemberName %mat4x3_f16 0 "col0"
+               OpMemberName %mat4x3_f16 1 "col1"
+               OpMemberName %mat4x3_f16 2 "col2"
+               OpMemberName %mat4x3_f16 3 "col3"
+               OpName %u "u"
+               OpName %p "p"
+               OpName %conv_mat4x3_f16 "conv_mat4x3_f16"
+               OpName %val "val"
+               OpName %conv_arr4_mat4x3_f16 "conv_arr4_mat4x3_f16"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %mat4x3_f16 0 Offset 0
+               OpMemberDecorate %mat4x3_f16 1 Offset 8
+               OpMemberDecorate %mat4x3_f16 2 Offset 16
+               OpMemberDecorate %mat4x3_f16 3 Offset 24
+               OpDecorate %_arr_mat4x3_f16_uint_4 ArrayStride 32
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %_arr_mat4v3half_uint_4 ArrayStride 32
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+ %mat4x3_f16 = OpTypeStruct %v3half %v3half %v3half %v3half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4x3_f16_uint_4 = OpTypeArray %mat4x3_f16 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_mat4x3_f16_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat4v3half = OpTypeMatrix %v3half 4
+%_arr_mat4v3half_uint_4 = OpTypeArray %mat4v3half %uint_4
+%_ptr_Private__arr_mat4v3half_uint_4 = OpTypePointer Private %_arr_mat4v3half_uint_4
+         %14 = OpConstantNull %_arr_mat4v3half_uint_4
+          %p = OpVariable %_ptr_Private__arr_mat4v3half_uint_4 Private %14
+         %15 = OpTypeFunction %mat4v3half %mat4x3_f16
+         %24 = OpTypeFunction %_arr_mat4v3half_uint_4 %_arr_mat4x3_f16_uint_4
+%_ptr_Function__arr_mat4v3half_uint_4 = OpTypePointer Function %_arr_mat4v3half_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %32 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_mat4x3_f16_uint_4 = OpTypePointer Function %_arr_mat4x3_f16_uint_4
+         %45 = OpConstantNull %_arr_mat4x3_f16_uint_4
+%_ptr_Function_mat4v3half = OpTypePointer Function %mat4v3half
+%_ptr_Function_mat4x3_f16 = OpTypePointer Function %mat4x3_f16
+     %uint_1 = OpConstant %uint 1
+       %void = OpTypeVoid
+         %58 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat4x3_f16_uint_4 = OpTypePointer Uniform %_arr_mat4x3_f16_uint_4
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Private_mat4v3half = OpTypePointer Private %mat4v3half
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_mat4x3_f16 = OpTypePointer Uniform %mat4x3_f16
+         %76 = OpConstantNull %int
+%_ptr_Private_v3half = OpTypePointer Private %v3half
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+%_ptr_Private_half = OpTypePointer Private %half
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+%conv_mat4x3_f16 = OpFunction %mat4v3half None %15
+        %val = OpFunctionParameter %mat4x3_f16
+         %18 = OpLabel
+         %19 = OpCompositeExtract %v3half %val 0
+         %20 = OpCompositeExtract %v3half %val 1
+         %21 = OpCompositeExtract %v3half %val 2
+         %22 = OpCompositeExtract %v3half %val 3
+         %23 = OpCompositeConstruct %mat4v3half %19 %20 %21 %22
+               OpReturnValue %23
+               OpFunctionEnd
+%conv_arr4_mat4x3_f16 = OpFunction %_arr_mat4v3half_uint_4 None %24
+      %val_0 = OpFunctionParameter %_arr_mat4x3_f16_uint_4
+         %27 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_mat4v3half_uint_4 Function %14
+          %i = OpVariable %_ptr_Function_uint Function %32
+%var_for_index = OpVariable %_ptr_Function__arr_mat4x3_f16_uint_4 Function %45
+               OpBranch %33
+         %33 = OpLabel
+               OpLoopMerge %34 %35 None
+               OpBranch %36
+         %36 = OpLabel
+         %38 = OpLoad %uint %i
+         %39 = OpULessThan %bool %38 %uint_4
+         %37 = OpLogicalNot %bool %39
+               OpSelectionMerge %41 None
+               OpBranchConditional %37 %42 %41
+         %42 = OpLabel
+               OpBranch %34
+         %41 = OpLabel
+               OpStore %var_for_index %val_0
+         %46 = OpLoad %uint %i
+         %48 = OpAccessChain %_ptr_Function_mat4v3half %arr %46
+         %50 = OpLoad %uint %i
+         %52 = OpAccessChain %_ptr_Function_mat4x3_f16 %var_for_index %50
+         %53 = OpLoad %mat4x3_f16 %52
+         %49 = OpFunctionCall %mat4v3half %conv_mat4x3_f16 %53
+               OpStore %48 %49
+               OpBranch %35
+         %35 = OpLabel
+         %54 = OpLoad %uint %i
+         %56 = OpIAdd %uint %54 %uint_1
+               OpStore %i %56
+               OpBranch %33
+         %34 = OpLabel
+         %57 = OpLoad %_arr_mat4v3half_uint_4 %arr
+               OpReturnValue %57
+               OpFunctionEnd
+          %f = OpFunction %void None %58
+         %61 = OpLabel
+         %65 = OpAccessChain %_ptr_Uniform__arr_mat4x3_f16_uint_4 %u %uint_0
+         %66 = OpLoad %_arr_mat4x3_f16_uint_4 %65
+         %62 = OpFunctionCall %_arr_mat4v3half_uint_4 %conv_arr4_mat4x3_f16 %66
+               OpStore %p %62
+         %70 = OpAccessChain %_ptr_Private_mat4v3half %p %int_1
+         %74 = OpAccessChain %_ptr_Uniform_mat4x3_f16 %u %uint_0 %uint_2
+         %75 = OpLoad %mat4x3_f16 %74
+         %71 = OpFunctionCall %mat4v3half %conv_mat4x3_f16 %75
+               OpStore %70 %71
+         %78 = OpAccessChain %_ptr_Private_v3half %p %int_1 %76
+         %80 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0 %32 %uint_1
+         %81 = OpLoad %v3half %80
+         %82 = OpVectorShuffle %v3half %81 %81 2 0 1
+               OpStore %78 %82
+         %84 = OpAccessChain %_ptr_Private_half %p %int_1 %76 %uint_0
+         %86 = OpAccessChain %_ptr_Uniform_half %u %uint_0 %32 %uint_1 %32
+         %87 = OpLoad %half %86
+               OpStore %84 %87
+               OpReturn
+               OpFunctionEnd
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_private.wgsl.expected.wgsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_private.wgsl.expected.wgsl
@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat4x3<f16>, 4>;
+
+var<private> p : array<mat4x3<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  p = u;
+  p[1] = u[2];
+  p[1][0] = u[0][1].zxy;
+  p[1][0].x = u[0][1].x;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_storage.wgsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_storage.wgsl
@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat4x3<f16>, 4>;
+@group(0) @binding(1) var<storage, read_write> s : array<mat4x3<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    s = u;
+    s[1] = u[2];
+    s[1][0] = u[0][1].zxy;
+    s[1][0].x = u[0][1].x;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_storage.wgsl.expected.dxc.hlsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_storage.wgsl.expected.dxc.hlsl
@@ -0,0 +1,67 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 3> value) {
+  buffer.Store<vector<float16_t, 3> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 16u), value[2u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 24u), value[3u]);
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 3> value[4]) {
+  matrix<float16_t, 4, 3> array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 32u)), array[i]);
+    }
+  }
+}
+
+matrix<float16_t, 4, 3> tint_symbol_4(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+typedef matrix<float16_t, 4, 3> tint_symbol_3_ret[4];
+tint_symbol_3_ret tint_symbol_3(uint4 buffer[8], uint offset) {
+  matrix<float16_t, 4, 3> arr[4] = (matrix<float16_t, 4, 3>[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_4(buffer, (offset + (i_1 * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_3(u, 0u));
+  tint_symbol_1(s, 32u, tint_symbol_4(u, 64u));
+  uint2 ubo_load_8 = u[0].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  s.Store<vector<float16_t, 3> >(32u, vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]).zxy);
+  s.Store<float16_t>(32u, float16_t(f16tof32(((u[0].z) & 0xFFFF))));
+  return;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_storage.wgsl.expected.fxc.hlsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_storage.wgsl.expected.fxc.hlsl
@@ -0,0 +1,73 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+RWByteAddressBuffer s : register(u1, space0);
+
+void tint_symbol_1(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 3> value) {
+  buffer.Store<vector<float16_t, 3> >((offset + 0u), value[0u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 8u), value[1u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 16u), value[2u]);
+  buffer.Store<vector<float16_t, 3> >((offset + 24u), value[3u]);
+}
+
+void tint_symbol(RWByteAddressBuffer buffer, uint offset, matrix<float16_t, 4, 3> value[4]) {
+  matrix<float16_t, 4, 3> array[4] = value;
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      tint_symbol_1(buffer, (offset + (i * 32u)), array[i]);
+    }
+  }
+}
+
+matrix<float16_t, 4, 3> tint_symbol_4(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+typedef matrix<float16_t, 4, 3> tint_symbol_3_ret[4];
+tint_symbol_3_ret tint_symbol_3(uint4 buffer[8], uint offset) {
+  matrix<float16_t, 4, 3> arr[4] = (matrix<float16_t, 4, 3>[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_4(buffer, (offset + (i_1 * 32u)));
+    }
+  }
+  return arr;
+}
+
+[numthreads(1, 1, 1)]
+void f() {
+  tint_symbol(s, 0u, tint_symbol_3(u, 0u));
+  tint_symbol_1(s, 32u, tint_symbol_4(u, 64u));
+  uint2 ubo_load_8 = u[0].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  s.Store<vector<float16_t, 3> >(32u, vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]).zxy);
+  s.Store<float16_t>(32u, float16_t(f16tof32(((u[0].z) & 0xFFFF))));
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000021F13DB33D0(6,68-76): error X3000: syntax error: unexpected token 'float16_t'
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x0000021F13DB33D0(7,3-14): error X3018: invalid subscript 'Store'
+
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_storage.wgsl.expected.glsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_storage.wgsl.expected.glsl
@@ -0,0 +1,44 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct mat4x3_f16 {
+  f16vec3 col0;
+  f16vec3 col1;
+  f16vec3 col2;
+  f16vec3 col3;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  mat4x3_f16 inner[4];
+} u;
+
+layout(binding = 1, std430) buffer u_block_ssbo {
+  f16mat4x3 inner[4];
+} s;
+
+f16mat4x3 conv_mat4x3_f16(mat4x3_f16 val) {
+  return f16mat4x3(val.col0, val.col1, val.col2, val.col3);
+}
+
+f16mat4x3[4] conv_arr4_mat4x3_f16(mat4x3_f16 val[4]) {
+  f16mat4x3 arr[4] = f16mat4x3[4](f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_mat4x3_f16(val[i]);
+    }
+  }
+  return arr;
+}
+
+void f() {
+  s.inner = conv_arr4_mat4x3_f16(u.inner);
+  s.inner[1] = conv_mat4x3_f16(u.inner[2u]);
+  s.inner[1][0] = u.inner[0u].col1.zxy;
+  s.inner[1][0].x = u.inner[0u].col1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f();
+  return;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_storage.wgsl.expected.msl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_storage.wgsl.expected.msl
@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+kernel void f(device tint_array<half4x3, 4>* tint_symbol [[buffer(1)]], const constant tint_array<half4x3, 4>* tint_symbol_1 [[buffer(0)]]) {
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[1][0] = half3((*(tint_symbol_1))[0][1]).zxy;
+  (*(tint_symbol))[1][0][0] = (*(tint_symbol_1))[0][1][0];
+  return;
+}
+
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_storage.wgsl.expected.spvasm
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_storage.wgsl.expected.spvasm
@@ -0,0 +1,160 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 91
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f"
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %mat4x3_f16 "mat4x3_f16"
+               OpMemberName %mat4x3_f16 0 "col0"
+               OpMemberName %mat4x3_f16 1 "col1"
+               OpMemberName %mat4x3_f16 2 "col2"
+               OpMemberName %mat4x3_f16 3 "col3"
+               OpName %u "u"
+               OpName %u_block "u_block"
+               OpMemberName %u_block 0 "inner"
+               OpName %s "s"
+               OpName %conv_mat4x3_f16 "conv_mat4x3_f16"
+               OpName %val "val"
+               OpName %conv_arr4_mat4x3_f16 "conv_arr4_mat4x3_f16"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %f "f"
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %mat4x3_f16 0 Offset 0
+               OpMemberDecorate %mat4x3_f16 1 Offset 8
+               OpMemberDecorate %mat4x3_f16 2 Offset 16
+               OpMemberDecorate %mat4x3_f16 3 Offset 24
+               OpDecorate %_arr_mat4x3_f16_uint_4 ArrayStride 32
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %u_block Block
+               OpMemberDecorate %u_block 0 Offset 0
+               OpMemberDecorate %u_block 0 ColMajor
+               OpMemberDecorate %u_block 0 MatrixStride 8
+               OpDecorate %_arr_mat4v3half_uint_4 ArrayStride 32
+               OpDecorate %s DescriptorSet 0
+               OpDecorate %s Binding 1
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+ %mat4x3_f16 = OpTypeStruct %v3half %v3half %v3half %v3half
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4x3_f16_uint_4 = OpTypeArray %mat4x3_f16 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_mat4x3_f16_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat4v3half = OpTypeMatrix %v3half 4
+%_arr_mat4v3half_uint_4 = OpTypeArray %mat4v3half %uint_4
+    %u_block = OpTypeStruct %_arr_mat4v3half_uint_4
+%_ptr_StorageBuffer_u_block = OpTypePointer StorageBuffer %u_block
+          %s = OpVariable %_ptr_StorageBuffer_u_block StorageBuffer
+         %15 = OpTypeFunction %mat4v3half %mat4x3_f16
+         %24 = OpTypeFunction %_arr_mat4v3half_uint_4 %_arr_mat4x3_f16_uint_4
+%_ptr_Function__arr_mat4v3half_uint_4 = OpTypePointer Function %_arr_mat4v3half_uint_4
+         %30 = OpConstantNull %_arr_mat4v3half_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %33 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_mat4x3_f16_uint_4 = OpTypePointer Function %_arr_mat4x3_f16_uint_4
+         %46 = OpConstantNull %_arr_mat4x3_f16_uint_4
+%_ptr_Function_mat4v3half = OpTypePointer Function %mat4v3half
+%_ptr_Function_mat4x3_f16 = OpTypePointer Function %mat4x3_f16
+     %uint_1 = OpConstant %uint 1
+       %void = OpTypeVoid
+         %59 = OpTypeFunction %void
+     %uint_0 = OpConstant %uint 0
+%_ptr_StorageBuffer__arr_mat4v3half_uint_4 = OpTypePointer StorageBuffer %_arr_mat4v3half_uint_4
+%_ptr_Uniform__arr_mat4x3_f16_uint_4 = OpTypePointer Uniform %_arr_mat4x3_f16_uint_4
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_StorageBuffer_mat4v3half = OpTypePointer StorageBuffer %mat4v3half
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_mat4x3_f16 = OpTypePointer Uniform %mat4x3_f16
+         %79 = OpConstantNull %int
+%_ptr_StorageBuffer_v3half = OpTypePointer StorageBuffer %v3half
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+%_ptr_StorageBuffer_half = OpTypePointer StorageBuffer %half
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+%conv_mat4x3_f16 = OpFunction %mat4v3half None %15
+        %val = OpFunctionParameter %mat4x3_f16
+         %18 = OpLabel
+         %19 = OpCompositeExtract %v3half %val 0
+         %20 = OpCompositeExtract %v3half %val 1
+         %21 = OpCompositeExtract %v3half %val 2
+         %22 = OpCompositeExtract %v3half %val 3
+         %23 = OpCompositeConstruct %mat4v3half %19 %20 %21 %22
+               OpReturnValue %23
+               OpFunctionEnd
+%conv_arr4_mat4x3_f16 = OpFunction %_arr_mat4v3half_uint_4 None %24
+      %val_0 = OpFunctionParameter %_arr_mat4x3_f16_uint_4
+         %27 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_mat4v3half_uint_4 Function %30
+          %i = OpVariable %_ptr_Function_uint Function %33
+%var_for_index = OpVariable %_ptr_Function__arr_mat4x3_f16_uint_4 Function %46
+               OpBranch %34
+         %34 = OpLabel
+               OpLoopMerge %35 %36 None
+               OpBranch %37
+         %37 = OpLabel
+         %39 = OpLoad %uint %i
+         %40 = OpULessThan %bool %39 %uint_4
+         %38 = OpLogicalNot %bool %40
+               OpSelectionMerge %42 None
+               OpBranchConditional %38 %43 %42
+         %43 = OpLabel
+               OpBranch %35
+         %42 = OpLabel
+               OpStore %var_for_index %val_0
+         %47 = OpLoad %uint %i
+         %49 = OpAccessChain %_ptr_Function_mat4v3half %arr %47
+         %51 = OpLoad %uint %i
+         %53 = OpAccessChain %_ptr_Function_mat4x3_f16 %var_for_index %51
+         %54 = OpLoad %mat4x3_f16 %53
+         %50 = OpFunctionCall %mat4v3half %conv_mat4x3_f16 %54
+               OpStore %49 %50
+               OpBranch %36
+         %36 = OpLabel
+         %55 = OpLoad %uint %i
+         %57 = OpIAdd %uint %55 %uint_1
+               OpStore %i %57
+               OpBranch %34
+         %35 = OpLabel
+         %58 = OpLoad %_arr_mat4v3half_uint_4 %arr
+               OpReturnValue %58
+               OpFunctionEnd
+          %f = OpFunction %void None %59
+         %62 = OpLabel
+         %65 = OpAccessChain %_ptr_StorageBuffer__arr_mat4v3half_uint_4 %s %uint_0
+         %68 = OpAccessChain %_ptr_Uniform__arr_mat4x3_f16_uint_4 %u %uint_0
+         %69 = OpLoad %_arr_mat4x3_f16_uint_4 %68
+         %66 = OpFunctionCall %_arr_mat4v3half_uint_4 %conv_arr4_mat4x3_f16 %69
+               OpStore %65 %66
+         %73 = OpAccessChain %_ptr_StorageBuffer_mat4v3half %s %uint_0 %int_1
+         %77 = OpAccessChain %_ptr_Uniform_mat4x3_f16 %u %uint_0 %uint_2
+         %78 = OpLoad %mat4x3_f16 %77
+         %74 = OpFunctionCall %mat4v3half %conv_mat4x3_f16 %78
+               OpStore %73 %74
+         %81 = OpAccessChain %_ptr_StorageBuffer_v3half %s %uint_0 %int_1 %79
+         %83 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0 %33 %uint_1
+         %84 = OpLoad %v3half %83
+         %85 = OpVectorShuffle %v3half %84 %84 2 0 1
+               OpStore %81 %85
+         %87 = OpAccessChain %_ptr_StorageBuffer_half %s %uint_0 %int_1 %79 %uint_0
+         %89 = OpAccessChain %_ptr_Uniform_half %u %uint_0 %33 %uint_1 %33
+         %90 = OpLoad %half %89
+               OpStore %87 %90
+               OpReturn
+               OpFunctionEnd
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_storage.wgsl.expected.wgsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_storage.wgsl.expected.wgsl
@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat4x3<f16>, 4>;
+
+@group(0) @binding(1) var<storage, read_write> s : array<mat4x3<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  s = u;
+  s[1] = u[2];
+  s[1][0] = u[0][1].zxy;
+  s[1][0].x = u[0][1].x;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_workgroup.wgsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_workgroup.wgsl
@@ -0,0 +1,12 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat4x3<f16>, 4>;
+var<workgroup> w : array<mat4x3<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+    w = u;
+    w[1] = u[2];
+    w[1][0] = u[0][1].zxy;
+    w[1][0].x = u[0][1].x;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_workgroup.wgsl.expected.dxc.hlsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_workgroup.wgsl.expected.dxc.hlsl
@@ -0,0 +1,66 @@
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+groupshared matrix<float16_t, 4, 3> w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 4, 3> tint_symbol_3(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+typedef matrix<float16_t, 4, 3> tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[8], uint offset) {
+  matrix<float16_t, 4, 3> arr[4] = (matrix<float16_t, 4, 3>[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 32u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      w[i] = matrix<float16_t, 4, 3>((float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx);
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 64u);
+  uint2 ubo_load_8 = u[0].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  w[1][0] = vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]).zxy;
+  w[1][0].x = float16_t(f16tof32(((u[0].z) & 0xFFFF)));
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_workgroup.wgsl.expected.fxc.hlsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_workgroup.wgsl.expected.fxc.hlsl
@@ -0,0 +1,71 @@
+SKIP: FAILED
+
+cbuffer cbuffer_u : register(b0, space0) {
+  uint4 u[8];
+};
+groupshared matrix<float16_t, 4, 3> w[4];
+
+struct tint_symbol_1 {
+  uint local_invocation_index : SV_GroupIndex;
+};
+
+matrix<float16_t, 4, 3> tint_symbol_3(uint4 buffer[8], uint offset) {
+  const uint scalar_offset = ((offset + 0u)) / 4;
+  uint4 ubo_load_1 = buffer[scalar_offset / 4];
+  uint2 ubo_load = ((scalar_offset & 2) ? ubo_load_1.zw : ubo_load_1.xy);
+  vector<float16_t, 2> ubo_load_xz = vector<float16_t, 2>(f16tof32(ubo_load & 0xFFFF));
+  float16_t ubo_load_y = f16tof32(ubo_load[0] >> 16);
+  const uint scalar_offset_1 = ((offset + 8u)) / 4;
+  uint4 ubo_load_3 = buffer[scalar_offset_1 / 4];
+  uint2 ubo_load_2 = ((scalar_offset_1 & 2) ? ubo_load_3.zw : ubo_load_3.xy);
+  vector<float16_t, 2> ubo_load_2_xz = vector<float16_t, 2>(f16tof32(ubo_load_2 & 0xFFFF));
+  float16_t ubo_load_2_y = f16tof32(ubo_load_2[0] >> 16);
+  const uint scalar_offset_2 = ((offset + 16u)) / 4;
+  uint4 ubo_load_5 = buffer[scalar_offset_2 / 4];
+  uint2 ubo_load_4 = ((scalar_offset_2 & 2) ? ubo_load_5.zw : ubo_load_5.xy);
+  vector<float16_t, 2> ubo_load_4_xz = vector<float16_t, 2>(f16tof32(ubo_load_4 & 0xFFFF));
+  float16_t ubo_load_4_y = f16tof32(ubo_load_4[0] >> 16);
+  const uint scalar_offset_3 = ((offset + 24u)) / 4;
+  uint4 ubo_load_7 = buffer[scalar_offset_3 / 4];
+  uint2 ubo_load_6 = ((scalar_offset_3 & 2) ? ubo_load_7.zw : ubo_load_7.xy);
+  vector<float16_t, 2> ubo_load_6_xz = vector<float16_t, 2>(f16tof32(ubo_load_6 & 0xFFFF));
+  float16_t ubo_load_6_y = f16tof32(ubo_load_6[0] >> 16);
+  return matrix<float16_t, 4, 3>(vector<float16_t, 3>(ubo_load_xz[0], ubo_load_y, ubo_load_xz[1]), vector<float16_t, 3>(ubo_load_2_xz[0], ubo_load_2_y, ubo_load_2_xz[1]), vector<float16_t, 3>(ubo_load_4_xz[0], ubo_load_4_y, ubo_load_4_xz[1]), vector<float16_t, 3>(ubo_load_6_xz[0], ubo_load_6_y, ubo_load_6_xz[1]));
+}
+
+typedef matrix<float16_t, 4, 3> tint_symbol_2_ret[4];
+tint_symbol_2_ret tint_symbol_2(uint4 buffer[8], uint offset) {
+  matrix<float16_t, 4, 3> arr[4] = (matrix<float16_t, 4, 3>[4])0;
+  {
+    for(uint i_1 = 0u; (i_1 < 4u); i_1 = (i_1 + 1u)) {
+      arr[i_1] = tint_symbol_3(buffer, (offset + (i_1 * 32u)));
+    }
+  }
+  return arr;
+}
+
+void f_inner(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      const uint i = idx;
+      w[i] = matrix<float16_t, 4, 3>((float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx, (float16_t(0.0h)).xxx);
+    }
+  }
+  GroupMemoryBarrierWithGroupSync();
+  w = tint_symbol_2(u, 0u);
+  w[1] = tint_symbol_3(u, 64u);
+  uint2 ubo_load_8 = u[0].zw;
+  vector<float16_t, 2> ubo_load_8_xz = vector<float16_t, 2>(f16tof32(ubo_load_8 & 0xFFFF));
+  float16_t ubo_load_8_y = f16tof32(ubo_load_8[0] >> 16);
+  w[1][0] = vector<float16_t, 3>(ubo_load_8_xz[0], ubo_load_8_y, ubo_load_8_xz[1]).zxy;
+  w[1][0].x = float16_t(f16tof32(((u[0].z) & 0xFFFF)));
+}
+
+[numthreads(1, 1, 1)]
+void f(tint_symbol_1 tint_symbol) {
+  f_inner(tint_symbol.local_invocation_index);
+  return;
+}
+FXC validation failure:
+D:\Projects\RampUp\dawn\test\tint\buffer\Shader@0x000001D6F2EDF360(4,20-28): error X3000: syntax error: unexpected token 'float16_t'
+
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_workgroup.wgsl.expected.glsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_workgroup.wgsl.expected.glsl
@@ -0,0 +1,48 @@
+#version 310 es
+#extension GL_AMD_gpu_shader_half_float : require
+
+struct mat4x3_f16 {
+  f16vec3 col0;
+  f16vec3 col1;
+  f16vec3 col2;
+  f16vec3 col3;
+};
+
+layout(binding = 0, std140) uniform u_block_std140_ubo {
+  mat4x3_f16 inner[4];
+} u;
+
+shared f16mat4x3 w[4];
+f16mat4x3 conv_mat4x3_f16(mat4x3_f16 val) {
+  return f16mat4x3(val.col0, val.col1, val.col2, val.col3);
+}
+
+f16mat4x3[4] conv_arr4_mat4x3_f16(mat4x3_f16 val[4]) {
+  f16mat4x3 arr[4] = f16mat4x3[4](f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf), f16mat4x3(0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf, 0.0hf));
+  {
+    for(uint i = 0u; (i < 4u); i = (i + 1u)) {
+      arr[i] = conv_mat4x3_f16(val[i]);
+    }
+  }
+  return arr;
+}
+
+void f(uint local_invocation_index) {
+  {
+    for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+      uint i = idx;
+      w[i] = f16mat4x3(f16vec3(0.0hf), f16vec3(0.0hf), f16vec3(0.0hf), f16vec3(0.0hf));
+    }
+  }
+  barrier();
+  w = conv_arr4_mat4x3_f16(u.inner);
+  w[1] = conv_mat4x3_f16(u.inner[2u]);
+  w[1][0] = u.inner[0u].col1.zxy;
+  w[1][0].x = u.inner[0u].col1[0u];
+}
+
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+void main() {
+  f(gl_LocalInvocationIndex);
+  return;
+}
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_workgroup.wgsl.expected.msl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_workgroup.wgsl.expected.msl
@@ -0,0 +1,38 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+template<typename T, size_t N>
+struct tint_array {
+    const constant T& operator[](size_t i) const constant { return elements[i]; }
+    device T& operator[](size_t i) device { return elements[i]; }
+    const device T& operator[](size_t i) const device { return elements[i]; }
+    thread T& operator[](size_t i) thread { return elements[i]; }
+    const thread T& operator[](size_t i) const thread { return elements[i]; }
+    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
+    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
+    T elements[N];
+};
+
+struct tint_symbol_5 {
+  tint_array<half4x3, 4> w;
+};
+
+void f_inner(uint local_invocation_index, threadgroup tint_array<half4x3, 4>* const tint_symbol, const constant tint_array<half4x3, 4>* const tint_symbol_1) {
+  for(uint idx = local_invocation_index; (idx < 4u); idx = (idx + 1u)) {
+    uint const i = idx;
+    (*(tint_symbol))[i] = half4x3(half3(0.0h), half3(0.0h), half3(0.0h), half3(0.0h));
+  }
+  threadgroup_barrier(mem_flags::mem_threadgroup);
+  *(tint_symbol) = *(tint_symbol_1);
+  (*(tint_symbol))[1] = (*(tint_symbol_1))[2];
+  (*(tint_symbol))[1][0] = half3((*(tint_symbol_1))[0][1]).zxy;
+  (*(tint_symbol))[1][0][0] = (*(tint_symbol_1))[0][1][0];
+}
+
+kernel void f(const constant tint_array<half4x3, 4>* tint_symbol_4 [[buffer(0)]], threadgroup tint_symbol_5* tint_symbol_3 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
+  threadgroup tint_array<half4x3, 4>* const tint_symbol_2 = &((*(tint_symbol_3)).w);
+  f_inner(local_invocation_index, tint_symbol_2, tint_symbol_4);
+  return;
+}
+
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_workgroup.wgsl.expected.spvasm
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_workgroup.wgsl.expected.spvasm
@@ -0,0 +1,192 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 113
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability UniformAndStorageBuffer16BitAccess
+               OpCapability StorageBuffer16BitAccess
+               OpCapability StorageInputOutput16
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %f "f" %local_invocation_index_1
+               OpExecutionMode %f LocalSize 1 1 1
+               OpName %local_invocation_index_1 "local_invocation_index_1"
+               OpName %u_block_std140 "u_block_std140"
+               OpMemberName %u_block_std140 0 "inner"
+               OpName %mat4x3_f16 "mat4x3_f16"
+               OpMemberName %mat4x3_f16 0 "col0"
+               OpMemberName %mat4x3_f16 1 "col1"
+               OpMemberName %mat4x3_f16 2 "col2"
+               OpMemberName %mat4x3_f16 3 "col3"
+               OpName %u "u"
+               OpName %w "w"
+               OpName %conv_mat4x3_f16 "conv_mat4x3_f16"
+               OpName %val "val"
+               OpName %conv_arr4_mat4x3_f16 "conv_arr4_mat4x3_f16"
+               OpName %val_0 "val"
+               OpName %arr "arr"
+               OpName %i "i"
+               OpName %var_for_index "var_for_index"
+               OpName %f_inner "f_inner"
+               OpName %local_invocation_index "local_invocation_index"
+               OpName %idx "idx"
+               OpName %f "f"
+               OpDecorate %local_invocation_index_1 BuiltIn LocalInvocationIndex
+               OpDecorate %u_block_std140 Block
+               OpMemberDecorate %u_block_std140 0 Offset 0
+               OpMemberDecorate %mat4x3_f16 0 Offset 0
+               OpMemberDecorate %mat4x3_f16 1 Offset 8
+               OpMemberDecorate %mat4x3_f16 2 Offset 16
+               OpMemberDecorate %mat4x3_f16 3 Offset 24
+               OpDecorate %_arr_mat4x3_f16_uint_4 ArrayStride 32
+               OpDecorate %u NonWritable
+               OpDecorate %u DescriptorSet 0
+               OpDecorate %u Binding 0
+               OpDecorate %_arr_mat4v3half_uint_4 ArrayStride 32
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%local_invocation_index_1 = OpVariable %_ptr_Input_uint Input
+       %half = OpTypeFloat 16
+     %v3half = OpTypeVector %half 3
+ %mat4x3_f16 = OpTypeStruct %v3half %v3half %v3half %v3half
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4x3_f16_uint_4 = OpTypeArray %mat4x3_f16 %uint_4
+%u_block_std140 = OpTypeStruct %_arr_mat4x3_f16_uint_4
+%_ptr_Uniform_u_block_std140 = OpTypePointer Uniform %u_block_std140
+          %u = OpVariable %_ptr_Uniform_u_block_std140 Uniform
+ %mat4v3half = OpTypeMatrix %v3half 4
+%_arr_mat4v3half_uint_4 = OpTypeArray %mat4v3half %uint_4
+%_ptr_Workgroup__arr_mat4v3half_uint_4 = OpTypePointer Workgroup %_arr_mat4v3half_uint_4
+          %w = OpVariable %_ptr_Workgroup__arr_mat4v3half_uint_4 Workgroup
+         %16 = OpTypeFunction %mat4v3half %mat4x3_f16
+         %25 = OpTypeFunction %_arr_mat4v3half_uint_4 %_arr_mat4x3_f16_uint_4
+%_ptr_Function__arr_mat4v3half_uint_4 = OpTypePointer Function %_arr_mat4v3half_uint_4
+         %31 = OpConstantNull %_arr_mat4v3half_uint_4
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %34 = OpConstantNull %uint
+       %bool = OpTypeBool
+%_ptr_Function__arr_mat4x3_f16_uint_4 = OpTypePointer Function %_arr_mat4x3_f16_uint_4
+         %47 = OpConstantNull %_arr_mat4x3_f16_uint_4
+%_ptr_Function_mat4v3half = OpTypePointer Function %mat4v3half
+%_ptr_Function_mat4x3_f16 = OpTypePointer Function %mat4x3_f16
+     %uint_1 = OpConstant %uint 1
+       %void = OpTypeVoid
+         %60 = OpTypeFunction %void %uint
+%_ptr_Workgroup_mat4v3half = OpTypePointer Workgroup %mat4v3half
+         %78 = OpConstantNull %mat4v3half
+     %uint_2 = OpConstant %uint 2
+   %uint_264 = OpConstant %uint 264
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform__arr_mat4x3_f16_uint_4 = OpTypePointer Uniform %_arr_mat4x3_f16_uint_4
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_mat4x3_f16 = OpTypePointer Uniform %mat4x3_f16
+         %96 = OpConstantNull %int
+%_ptr_Workgroup_v3half = OpTypePointer Workgroup %v3half
+%_ptr_Uniform_v3half = OpTypePointer Uniform %v3half
+%_ptr_Workgroup_half = OpTypePointer Workgroup %half
+%_ptr_Uniform_half = OpTypePointer Uniform %half
+        %108 = OpTypeFunction %void
+%conv_mat4x3_f16 = OpFunction %mat4v3half None %16
+        %val = OpFunctionParameter %mat4x3_f16
+         %19 = OpLabel
+         %20 = OpCompositeExtract %v3half %val 0
+         %21 = OpCompositeExtract %v3half %val 1
+         %22 = OpCompositeExtract %v3half %val 2
+         %23 = OpCompositeExtract %v3half %val 3
+         %24 = OpCompositeConstruct %mat4v3half %20 %21 %22 %23
+               OpReturnValue %24
+               OpFunctionEnd
+%conv_arr4_mat4x3_f16 = OpFunction %_arr_mat4v3half_uint_4 None %25
+      %val_0 = OpFunctionParameter %_arr_mat4x3_f16_uint_4
+         %28 = OpLabel
+        %arr = OpVariable %_ptr_Function__arr_mat4v3half_uint_4 Function %31
+          %i = OpVariable %_ptr_Function_uint Function %34
+%var_for_index = OpVariable %_ptr_Function__arr_mat4x3_f16_uint_4 Function %47
+               OpBranch %35
+         %35 = OpLabel
+               OpLoopMerge %36 %37 None
+               OpBranch %38
+         %38 = OpLabel
+         %40 = OpLoad %uint %i
+         %41 = OpULessThan %bool %40 %uint_4
+         %39 = OpLogicalNot %bool %41
+               OpSelectionMerge %43 None
+               OpBranchConditional %39 %44 %43
+         %44 = OpLabel
+               OpBranch %36
+         %43 = OpLabel
+               OpStore %var_for_index %val_0
+         %48 = OpLoad %uint %i
+         %50 = OpAccessChain %_ptr_Function_mat4v3half %arr %48
+         %52 = OpLoad %uint %i
+         %54 = OpAccessChain %_ptr_Function_mat4x3_f16 %var_for_index %52
+         %55 = OpLoad %mat4x3_f16 %54
+         %51 = OpFunctionCall %mat4v3half %conv_mat4x3_f16 %55
+               OpStore %50 %51
+               OpBranch %37
+         %37 = OpLabel
+         %56 = OpLoad %uint %i
+         %58 = OpIAdd %uint %56 %uint_1
+               OpStore %i %58
+               OpBranch %35
+         %36 = OpLabel
+         %59 = OpLoad %_arr_mat4v3half_uint_4 %arr
+               OpReturnValue %59
+               OpFunctionEnd
+    %f_inner = OpFunction %void None %60
+%local_invocation_index = OpFunctionParameter %uint
+         %64 = OpLabel
+        %idx = OpVariable %_ptr_Function_uint Function %34
+               OpStore %idx %local_invocation_index
+               OpBranch %66
+         %66 = OpLabel
+               OpLoopMerge %67 %68 None
+               OpBranch %69
+         %69 = OpLabel
+         %71 = OpLoad %uint %idx
+         %72 = OpULessThan %bool %71 %uint_4
+         %70 = OpLogicalNot %bool %72
+               OpSelectionMerge %73 None
+               OpBranchConditional %70 %74 %73
+         %74 = OpLabel
+               OpBranch %67
+         %73 = OpLabel
+         %75 = OpLoad %uint %idx
+         %77 = OpAccessChain %_ptr_Workgroup_mat4v3half %w %75
+               OpStore %77 %78
+               OpBranch %68
+         %68 = OpLabel
+         %79 = OpLoad %uint %idx
+         %80 = OpIAdd %uint %79 %uint_1
+               OpStore %idx %80
+               OpBranch %66
+         %67 = OpLabel
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+         %87 = OpAccessChain %_ptr_Uniform__arr_mat4x3_f16_uint_4 %u %uint_0
+         %88 = OpLoad %_arr_mat4x3_f16_uint_4 %87
+         %84 = OpFunctionCall %_arr_mat4v3half_uint_4 %conv_arr4_mat4x3_f16 %88
+               OpStore %w %84
+         %91 = OpAccessChain %_ptr_Workgroup_mat4v3half %w %int_1
+         %94 = OpAccessChain %_ptr_Uniform_mat4x3_f16 %u %uint_0 %uint_2
+         %95 = OpLoad %mat4x3_f16 %94
+         %92 = OpFunctionCall %mat4v3half %conv_mat4x3_f16 %95
+               OpStore %91 %92
+         %98 = OpAccessChain %_ptr_Workgroup_v3half %w %int_1 %96
+        %100 = OpAccessChain %_ptr_Uniform_v3half %u %uint_0 %34 %uint_1
+        %101 = OpLoad %v3half %100
+        %102 = OpVectorShuffle %v3half %101 %101 2 0 1
+               OpStore %98 %102
+        %104 = OpAccessChain %_ptr_Workgroup_half %w %int_1 %96 %uint_0
+        %106 = OpAccessChain %_ptr_Uniform_half %u %uint_0 %34 %uint_1 %34
+        %107 = OpLoad %half %106
+               OpStore %104 %107
+               OpReturn
+               OpFunctionEnd
+          %f = OpFunction %void None %108
+        %110 = OpLabel
+        %112 = OpLoad %uint %local_invocation_index_1
+        %111 = OpFunctionCall %void %f_inner %112
+               OpReturn
+               OpFunctionEnd
--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_workgroup.wgsl.expected.wgsl
+++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_workgroup.wgsl.expected.wgsl
@@ -0,0 +1,13 @@
+enable f16;
+
+@group(0) @binding(0) var<uniform> u : array<mat4x3<f16>, 4>;
+
+var<workgroup> w : array<mat4x3<f16>, 4>;
+
+@compute @workgroup_size(1)
+fn f() {
+  w = u;
+  w[1] = u[2];
+  w[1][0] = u[0][1].zxy;
+  w[1][0].x = u[0][1].x;
+}