dawn-cmake/test/bug/tint/942.wgsl.expected.hlsl
Antonio Maiorano 11d09f2fe7 HLSL: force FXC to never unroll loops
Emit the "[loop]" attribute on "for" and "while" so that FXC does not
attempt to unroll them. This is to work around an FXC bug where it fails
to unroll loops with gradient operations.

FXC ostensibly unrolls such loops because gradient operations require
uniform control flow, and loops that have varying iterations may
possibly not be uniform. Tint will eventually validate that control flow
is indeed uniform, so forcing FXC to avoid unrolling in these cases
should be fine.

Bug: tint:1112
Change-Id: I10077f8b62fbbb230a0003f3864c75a8fe0e1d18
Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/69880
Kokoro: Kokoro <noreply+kokoro@google.com>
Reviewed-by: Ben Clayton <bclayton@google.com>
Commit-Queue: Antonio Maiorano <amaiorano@google.com>
2021-11-18 13:50:12 +00:00

85 lines
2.8 KiB
HLSL

SamplerState samp : register(s0, space0);
cbuffer cbuffer_params : register(b1, space0) {
uint4 params[1];
};
Texture2D<float4> inputTex : register(t1, space1);
RWTexture2D<float4> outputTex : register(u2, space1);
cbuffer cbuffer_flip : register(b3, space1) {
uint4 flip[1];
};
groupshared float3 tile[4][256];
struct tint_symbol_1 {
uint3 LocalInvocationID : SV_GroupThreadID;
uint local_invocation_index : SV_GroupIndex;
uint3 WorkGroupID : SV_GroupID;
};
void main_inner(uint3 WorkGroupID, uint3 LocalInvocationID, uint local_invocation_index) {
{
[loop] for(uint idx = local_invocation_index; (idx < 1024u); idx = (idx + 64u)) {
const uint i_1 = (idx / 256u);
const uint i_2 = (idx % 256u);
tile[i_1][i_2] = float3(0.0f, 0.0f, 0.0f);
}
}
GroupMemoryBarrierWithGroupSync();
const uint filterOffset = ((params[0].x - 1u) / 2u);
int3 tint_tmp;
inputTex.GetDimensions(0, tint_tmp.x, tint_tmp.y, tint_tmp.z);
const int2 dims = tint_tmp.xy;
const int2 baseIndex = (int2(((WorkGroupID.xy * uint2(params[0].y, 4u)) + (LocalInvocationID.xy * uint2(4u, 1u)))) - int2(int(filterOffset), 0));
{
[loop] for(uint r = 0u; (r < 4u); r = (r + 1u)) {
{
[loop] for(uint c = 0u; (c < 4u); c = (c + 1u)) {
int2 loadIndex = (baseIndex + int2(int(c), int(r)));
if ((flip[0].x != 0u)) {
loadIndex = loadIndex.yx;
}
tile[r][((4u * LocalInvocationID.x) + c)] = inputTex.SampleLevel(samp, ((float2(loadIndex) + float2(0.25f, 0.25f)) / float2(dims)), 0.0f).rgb;
}
}
}
}
GroupMemoryBarrierWithGroupSync();
{
[loop] for(uint r = 0u; (r < 4u); r = (r + 1u)) {
{
[loop] for(uint c = 0u; (c < 4u); c = (c + 1u)) {
int2 writeIndex = (baseIndex + int2(int(c), int(r)));
if ((flip[0].x != 0u)) {
writeIndex = writeIndex.yx;
}
const uint center = ((4u * LocalInvocationID.x) + c);
bool tint_tmp_2 = (center >= filterOffset);
if (tint_tmp_2) {
tint_tmp_2 = (center < (256u - filterOffset));
}
bool tint_tmp_1 = (tint_tmp_2);
if (tint_tmp_1) {
tint_tmp_1 = all((writeIndex < dims));
}
if ((tint_tmp_1)) {
float3 acc = float3(0.0f, 0.0f, 0.0f);
{
[loop] for(uint f = 0u; (f < params[0].x); f = (f + 1u)) {
uint i = ((center + f) - filterOffset);
acc = (acc + ((1.0f / float(params[0].x)) * tile[r][i]));
}
}
outputTex[writeIndex] = float4(acc, 1.0f);
}
}
}
}
}
}
[numthreads(64, 1, 1)]
void main(tint_symbol_1 tint_symbol) {
main_inner(tint_symbol.WorkGroupID, tint_symbol.LocalInvocationID, tint_symbol.local_invocation_index);
return;
}