tint/msl: Preserve trailing vec3 padding

In order to preserve padding properly for MSL, we need to use its
packed_vec type for all vec3 types in storage buffers, not just struct
members. This commit includes a complete rewrite of the PackedVec3
transform to achieve this. The key details are:

* An internal `__packed_vec3<>` type was added, which corresponds to a
  `type::Vector` with an additional flag to indicate that it will be
  emitted as packed vector.

* The `PackedVec3` transform replaces all vec3 types used in
  host-shareable address spaces with the internal `__packed_vec3`
  type. This includes vec3 types that appear as the store type of a
  pointer.

* When used as an array element, these `__packed_vec3` types are
  wrapped in a struct that contains a single `__packed_vec3`
  member. This allows us to add an `@align()` attribute that ensures
  that `array<vec3<T>>` still has the correct array element stride.

* When the `vec3<T>` appears as a struct member in the input program,
  we apply the `@align()` to that member to ensure that we do not
  change its offset.

* Matrix types with three rows that are used in memory are replaced
  with an array of columns, where each column uses a `__packed_vec3`
  inside an aligned wrapper structure as above.

* Accesses to host-shareable memory that involve any of these types
  invoke a "pack" or "unpack" helper function to convert them to the
  equivalent type that uses `__packed_vec3` or a regular `vec3` as
  required.

* The `chromium_internal_relaxed_uniform_layout` extension is used to
  avoid issues where modifying a type in the uniform address space
  triggers stricter layout validation rules.

Bug: tint:1571
Fixed: tint:1837
Change-Id: Idaf2da2f5bcb2be00c85ec657edfb614186476bb
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/121200
Reviewed-by: Ben Clayton <bclayton@google.com>
Commit-Queue: James Price <jrprice@google.com>
Kokoro: Kokoro <noreply+kokoro@google.com>
This commit is contained in:
James Price
2023-02-27 20:21:03 +00:00
committed by Dawn LUCI CQ
parent 55183e6c3a
commit 4d3af66bbd
389 changed files with 16692 additions and 3527 deletions

View File

@@ -24,8 +24,8 @@ struct S {
kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
half2x4 const t = transpose((*(tint_symbol))[2].m);
half const l = length(half2((*(tint_symbol))[0].m[1]).yx);
half const a = fabs(half2((*(tint_symbol))[0].m[1]).yx[0]);
half const l = length((*(tint_symbol))[0].m[1].yx);
half const a = fabs((*(tint_symbol))[0].m[1].yx[0]);
return;
}

View File

@@ -41,8 +41,8 @@ kernel void f(const constant tint_array<S, 4>* tint_symbol [[buffer(0)]]) {
a(*(tint_symbol));
b((*(tint_symbol))[2]);
c((*(tint_symbol))[2].m);
d(half2((*(tint_symbol))[0].m[1]).yx);
e(half2((*(tint_symbol))[0].m[1]).yx[0]);
d((*(tint_symbol))[0].m[1].yx);
e((*(tint_symbol))[0].m[1].yx[0]);
return;
}

View File

@@ -27,7 +27,7 @@ kernel void f(const constant tint_array<S, 4>* tint_symbol_1 [[buffer(0)]]) {
tint_symbol = *(tint_symbol_1);
tint_symbol[1] = (*(tint_symbol_1))[2];
tint_symbol[3].m = (*(tint_symbol_1))[2].m;
tint_symbol[1].m[0] = half2((*(tint_symbol_1))[0].m[1]).yx;
tint_symbol[1].m[0] = (*(tint_symbol_1))[0].m[1].yx;
return;
}

View File

@@ -38,7 +38,7 @@ kernel void f(device tint_array<S, 4>* tint_symbol [[buffer(1)]], const constant
assign_and_preserve_padding(tint_symbol, *(tint_symbol_1));
assign_and_preserve_padding_1(&((*(tint_symbol))[1]), (*(tint_symbol_1))[2]);
(*(tint_symbol))[3].m = (*(tint_symbol_1))[2].m;
(*(tint_symbol))[1].m[0] = half2((*(tint_symbol_1))[0].m[1]).yx;
(*(tint_symbol))[1].m[0] = (*(tint_symbol_1))[0].m[1].yx;
return;
}

View File

@@ -36,7 +36,7 @@ void f_inner(uint local_invocation_index, threadgroup tint_array<S, 4>* const ti
*(tint_symbol_1) = *(tint_symbol_2);
(*(tint_symbol_1))[1] = (*(tint_symbol_2))[2];
(*(tint_symbol_1))[3].m = (*(tint_symbol_2))[2].m;
(*(tint_symbol_1))[1].m[0] = half2((*(tint_symbol_2))[0].m[1]).yx;
(*(tint_symbol_1))[1].m[0] = (*(tint_symbol_2))[0].m[1].yx;
}
kernel void f(const constant tint_array<S, 4>* tint_symbol_5 [[buffer(0)]], threadgroup tint_symbol_6* tint_symbol_4 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {