James Price 4d3af66bbd tint/msl: Preserve trailing vec3 padding
In order to preserve padding properly for MSL, we need to use its
packed_vec type for all vec3 types in storage buffers, not just struct
members. This commit includes a complete rewrite of the PackedVec3
transform to achieve this. The key details are:

* An internal `__packed_vec3<>` type was added, which corresponds to a
  `type::Vector` with an additional flag to indicate that it will be
  emitted as packed vector.

* The `PackedVec3` transform replaces all vec3 types used in
  host-shareable address spaces with the internal `__packed_vec3`
  type. This includes vec3 types that appear as the store type of a
  pointer.

* When used as an array element, these `__packed_vec3` types are
  wrapped in a struct that contains a single `__packed_vec3`
  member. This allows us to add an `@align()` attribute that ensures
  that `array<vec3<T>>` still has the correct array element stride.

* When the `vec3<T>` appears as a struct member in the input program,
  we apply the `@align()` to that member to ensure that we do not
  change its offset.

* Matrix types with three rows that are used in memory are replaced
  with an array of columns, where each column uses a `__packed_vec3`
  inside an aligned wrapper structure as above.

* Accesses to host-shareable memory that involve any of these types
  invoke a "pack" or "unpack" helper function to convert them to the
  equivalent type that uses `__packed_vec3` or a regular `vec3` as
  required.

* The `chromium_internal_relaxed_uniform_layout` extension is used to
  avoid issues where modifying a type in the uniform address space
  triggers stricter layout validation rules.

Bug: tint:1571
Fixed: tint:1837
Change-Id: Idaf2da2f5bcb2be00c85ec657edfb614186476bb
Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/121200
Reviewed-by: Ben Clayton <bclayton@google.com>
Commit-Queue: James Price <jrprice@google.com>
Kokoro: Kokoro <noreply+kokoro@google.com>
2023-02-27 20:21:03 +00:00

46 lines
1.5 KiB
Plaintext

#include <metal_stdlib>
using namespace metal;
template<typename T, size_t N>
struct tint_array {
const constant T& operator[](size_t i) const constant { return elements[i]; }
device T& operator[](size_t i) device { return elements[i]; }
const device T& operator[](size_t i) const device { return elements[i]; }
thread T& operator[](size_t i) thread { return elements[i]; }
const thread T& operator[](size_t i) const thread { return elements[i]; }
threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
T elements[N];
};
struct tint_packed_vec3_f16_array_element {
/* 0x0000 */ packed_half3 elements;
/* 0x0006 */ tint_array<int8_t, 2> tint_pad;
};
struct S_tint_packed_vec3 {
/* 0x0000 */ tint_array<tint_packed_vec3_f16_array_element, 4> tint_symbol;
/* 0x0020 */ packed_half3 vector;
/* 0x0026 */ tint_array<int8_t, 2> tint_pad_1;
};
half4x3 tint_unpack_vec3_in_composite(tint_array<tint_packed_vec3_f16_array_element, 4> in) {
half4x3 result = half4x3(0.0h);
for(uint i = 0u; (i < 4u); i = (i + 1u)) {
result[i] = half3(in[i].elements);
}
return result;
}
struct S {
half4x3 tint_symbol;
half3 vector;
};
fragment void tint_symbol_1(const constant S_tint_packed_vec3* tint_symbol_2 [[buffer(0)]]) {
half4 const x = (half3((*(tint_symbol_2)).vector) * tint_unpack_vec3_in_composite((*(tint_symbol_2)).tint_symbol));
return;
}