dawn-cmake/test/tint/bug/tint/942.wgsl.expected.msl

75 lines
3.7 KiB
Plaintext
Raw Normal View History

#include <metal_stdlib>
using namespace metal;
template<typename T, size_t N>
struct tint_array {
const constant T& operator[](size_t i) const constant { return elements[i]; }
device T& operator[](size_t i) device { return elements[i]; }
const device T& operator[](size_t i) const device { return elements[i]; }
thread T& operator[](size_t i) thread { return elements[i]; }
const thread T& operator[](size_t i) const thread { return elements[i]; }
threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
T elements[N];
};
struct Params {
/* 0x0000 */ uint filterDim;
/* 0x0004 */ uint blockDim;
};
struct Flip {
/* 0x0000 */ uint value;
};
uint tint_div(uint lhs, uint rhs) {
return (lhs / select(rhs, 1u, (rhs == 0u)));
}
void tint_symbol_inner(uint3 WorkGroupID, uint3 LocalInvocationID, uint local_invocation_index, threadgroup tint_array<tint_array<float3, 256>, 4>* const tint_symbol_1, const constant Params* const tint_symbol_2, texture2d<float, access::sample> tint_symbol_3, const constant Flip* const tint_symbol_4, sampler tint_symbol_5, texture2d<float, access::write> tint_symbol_6) {
for(uint idx = local_invocation_index; (idx < 1024u); idx = (idx + 64u)) {
uint const i_1 = (idx / 256u);
uint const i_2 = (idx % 256u);
(*(tint_symbol_1))[i_1][i_2] = float3(0.0f);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
uint const filterOffset = tint_div(((*(tint_symbol_2)).filterDim - 1u), 2u);
uint2 const dims = uint2(tint_symbol_3.get_width(0), tint_symbol_3.get_height(0));
tint/msl: Preserve trailing vec3 padding In order to preserve padding properly for MSL, we need to use its packed_vec type for all vec3 types in storage buffers, not just struct members. This commit includes a complete rewrite of the PackedVec3 transform to achieve this. The key details are: * An internal `__packed_vec3<>` type was added, which corresponds to a `type::Vector` with an additional flag to indicate that it will be emitted as packed vector. * The `PackedVec3` transform replaces all vec3 types used in host-shareable address spaces with the internal `__packed_vec3` type. This includes vec3 types that appear as the store type of a pointer. * When used as an array element, these `__packed_vec3` types are wrapped in a struct that contains a single `__packed_vec3` member. This allows us to add an `@align()` attribute that ensures that `array<vec3<T>>` still has the correct array element stride. * When the `vec3<T>` appears as a struct member in the input program, we apply the `@align()` to that member to ensure that we do not change its offset. * Matrix types with three rows that are used in memory are replaced with an array of columns, where each column uses a `__packed_vec3` inside an aligned wrapper structure as above. * Accesses to host-shareable memory that involve any of these types invoke a "pack" or "unpack" helper function to convert them to the equivalent type that uses `__packed_vec3` or a regular `vec3` as required. * The `chromium_internal_relaxed_uniform_layout` extension is used to avoid issues where modifying a type in the uniform address space triggers stricter layout validation rules. Bug: tint:1571 Fixed: tint:1837 Change-Id: Idaf2da2f5bcb2be00c85ec657edfb614186476bb Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/121200 Reviewed-by: Ben Clayton <bclayton@google.com> Commit-Queue: James Price <jrprice@google.com> Kokoro: Kokoro <noreply+kokoro@google.com>
2023-02-27 20:21:03 +00:00
uint2 const baseIndex = (((WorkGroupID.xy * uint2((*(tint_symbol_2)).blockDim, 4u)) + (LocalInvocationID.xy * uint2(4u, 1u))) - uint2(filterOffset, 0u));
for(uint r = 0u; (r < 4u); r = (r + 1u)) {
for(uint c = 0u; (c < 4u); c = (c + 1u)) {
uint2 loadIndex = (baseIndex + uint2(c, r));
if (((*(tint_symbol_4)).value != 0u)) {
tint/msl: Preserve trailing vec3 padding In order to preserve padding properly for MSL, we need to use its packed_vec type for all vec3 types in storage buffers, not just struct members. This commit includes a complete rewrite of the PackedVec3 transform to achieve this. The key details are: * An internal `__packed_vec3<>` type was added, which corresponds to a `type::Vector` with an additional flag to indicate that it will be emitted as packed vector. * The `PackedVec3` transform replaces all vec3 types used in host-shareable address spaces with the internal `__packed_vec3` type. This includes vec3 types that appear as the store type of a pointer. * When used as an array element, these `__packed_vec3` types are wrapped in a struct that contains a single `__packed_vec3` member. This allows us to add an `@align()` attribute that ensures that `array<vec3<T>>` still has the correct array element stride. * When the `vec3<T>` appears as a struct member in the input program, we apply the `@align()` to that member to ensure that we do not change its offset. * Matrix types with three rows that are used in memory are replaced with an array of columns, where each column uses a `__packed_vec3` inside an aligned wrapper structure as above. * Accesses to host-shareable memory that involve any of these types invoke a "pack" or "unpack" helper function to convert them to the equivalent type that uses `__packed_vec3` or a regular `vec3` as required. * The `chromium_internal_relaxed_uniform_layout` extension is used to avoid issues where modifying a type in the uniform address space triggers stricter layout validation rules. Bug: tint:1571 Fixed: tint:1837 Change-Id: Idaf2da2f5bcb2be00c85ec657edfb614186476bb Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/121200 Reviewed-by: Ben Clayton <bclayton@google.com> Commit-Queue: James Price <jrprice@google.com> Kokoro: Kokoro <noreply+kokoro@google.com>
2023-02-27 20:21:03 +00:00
loadIndex = loadIndex.yx;
}
tint/msl: Preserve trailing vec3 padding In order to preserve padding properly for MSL, we need to use its packed_vec type for all vec3 types in storage buffers, not just struct members. This commit includes a complete rewrite of the PackedVec3 transform to achieve this. The key details are: * An internal `__packed_vec3<>` type was added, which corresponds to a `type::Vector` with an additional flag to indicate that it will be emitted as packed vector. * The `PackedVec3` transform replaces all vec3 types used in host-shareable address spaces with the internal `__packed_vec3` type. This includes vec3 types that appear as the store type of a pointer. * When used as an array element, these `__packed_vec3` types are wrapped in a struct that contains a single `__packed_vec3` member. This allows us to add an `@align()` attribute that ensures that `array<vec3<T>>` still has the correct array element stride. * When the `vec3<T>` appears as a struct member in the input program, we apply the `@align()` to that member to ensure that we do not change its offset. * Matrix types with three rows that are used in memory are replaced with an array of columns, where each column uses a `__packed_vec3` inside an aligned wrapper structure as above. * Accesses to host-shareable memory that involve any of these types invoke a "pack" or "unpack" helper function to convert them to the equivalent type that uses `__packed_vec3` or a regular `vec3` as required. * The `chromium_internal_relaxed_uniform_layout` extension is used to avoid issues where modifying a type in the uniform address space triggers stricter layout validation rules. Bug: tint:1571 Fixed: tint:1837 Change-Id: Idaf2da2f5bcb2be00c85ec657edfb614186476bb Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/121200 Reviewed-by: Ben Clayton <bclayton@google.com> Commit-Queue: James Price <jrprice@google.com> Kokoro: Kokoro <noreply+kokoro@google.com>
2023-02-27 20:21:03 +00:00
(*(tint_symbol_1))[r][((4u * LocalInvocationID[0]) + c)] = tint_symbol_3.sample(tint_symbol_5, ((float2(loadIndex) + float2(0.25f)) / float2(dims)), level(0.0f)).rgb;
}
}
threadgroup_barrier(mem_flags::mem_threadgroup);
for(uint r = 0u; (r < 4u); r = (r + 1u)) {
for(uint c = 0u; (c < 4u); c = (c + 1u)) {
uint2 writeIndex = (baseIndex + uint2(c, r));
if (((*(tint_symbol_4)).value != 0u)) {
tint/msl: Preserve trailing vec3 padding In order to preserve padding properly for MSL, we need to use its packed_vec type for all vec3 types in storage buffers, not just struct members. This commit includes a complete rewrite of the PackedVec3 transform to achieve this. The key details are: * An internal `__packed_vec3<>` type was added, which corresponds to a `type::Vector` with an additional flag to indicate that it will be emitted as packed vector. * The `PackedVec3` transform replaces all vec3 types used in host-shareable address spaces with the internal `__packed_vec3` type. This includes vec3 types that appear as the store type of a pointer. * When used as an array element, these `__packed_vec3` types are wrapped in a struct that contains a single `__packed_vec3` member. This allows us to add an `@align()` attribute that ensures that `array<vec3<T>>` still has the correct array element stride. * When the `vec3<T>` appears as a struct member in the input program, we apply the `@align()` to that member to ensure that we do not change its offset. * Matrix types with three rows that are used in memory are replaced with an array of columns, where each column uses a `__packed_vec3` inside an aligned wrapper structure as above. * Accesses to host-shareable memory that involve any of these types invoke a "pack" or "unpack" helper function to convert them to the equivalent type that uses `__packed_vec3` or a regular `vec3` as required. * The `chromium_internal_relaxed_uniform_layout` extension is used to avoid issues where modifying a type in the uniform address space triggers stricter layout validation rules. Bug: tint:1571 Fixed: tint:1837 Change-Id: Idaf2da2f5bcb2be00c85ec657edfb614186476bb Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/121200 Reviewed-by: Ben Clayton <bclayton@google.com> Commit-Queue: James Price <jrprice@google.com> Kokoro: Kokoro <noreply+kokoro@google.com>
2023-02-27 20:21:03 +00:00
writeIndex = writeIndex.yx;
}
uint const center = ((4u * LocalInvocationID[0]) + c);
if ((((center >= filterOffset) && (center < (256u - filterOffset))) && all((writeIndex < dims)))) {
float3 acc = float3(0.0f);
for(uint f = 0u; (f < (*(tint_symbol_2)).filterDim); f = (f + 1u)) {
uint i = ((center + f) - filterOffset);
acc = (acc + ((1.0f / float((*(tint_symbol_2)).filterDim)) * (*(tint_symbol_1))[r][i]));
}
tint_symbol_6.write(float4(acc, 1.0f), uint2(writeIndex));
}
}
}
}
kernel void tint_symbol(const constant Params* tint_symbol_8 [[buffer(0)]], texture2d<float, access::sample> tint_symbol_9 [[texture(0)]], const constant Flip* tint_symbol_10 [[buffer(1)]], sampler tint_symbol_11 [[sampler(0)]], texture2d<float, access::write> tint_symbol_12 [[texture(1)]], uint3 WorkGroupID [[threadgroup_position_in_grid]], uint3 LocalInvocationID [[thread_position_in_threadgroup]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
threadgroup tint_array<tint_array<float3, 256>, 4> tint_symbol_7;
tint_symbol_inner(WorkGroupID, LocalInvocationID, local_invocation_index, &(tint_symbol_7), tint_symbol_8, tint_symbol_9, tint_symbol_10, tint_symbol_11, tint_symbol_12);
return;
}