mirror of
https://github.com/encounter/dawn-cmake.git
synced 2025-08-05 03:35:56 +00:00
Emit the "[loop]" attribute on "for" and "while" so that FXC does not attempt to unroll them. This is to work around an FXC bug where it fails to unroll loops with gradient operations. FXC ostensibly unrolls such loops because gradient operations require uniform control flow, and loops that have varying iterations may possibly not be uniform. Tint will eventually validate that control flow is indeed uniform, so forcing FXC to avoid unrolling in these cases should be fine. Bug: tint:1112 Change-Id: I10077f8b62fbbb230a0003f3864c75a8fe0e1d18 Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/69880 Kokoro: Kokoro <noreply+kokoro@google.com> Reviewed-by: Ben Clayton <bclayton@google.com> Commit-Queue: Antonio Maiorano <amaiorano@google.com>
85 lines
2.8 KiB
HLSL
85 lines
2.8 KiB
HLSL
SamplerState samp : register(s0, space0);
|
|
cbuffer cbuffer_params : register(b1, space0) {
|
|
uint4 params[1];
|
|
};
|
|
Texture2D<float4> inputTex : register(t1, space1);
|
|
RWTexture2D<float4> outputTex : register(u2, space1);
|
|
|
|
cbuffer cbuffer_flip : register(b3, space1) {
|
|
uint4 flip[1];
|
|
};
|
|
groupshared float3 tile[4][256];
|
|
|
|
struct tint_symbol_1 {
|
|
uint3 LocalInvocationID : SV_GroupThreadID;
|
|
uint local_invocation_index : SV_GroupIndex;
|
|
uint3 WorkGroupID : SV_GroupID;
|
|
};
|
|
|
|
void main_inner(uint3 WorkGroupID, uint3 LocalInvocationID, uint local_invocation_index) {
|
|
{
|
|
[loop] for(uint idx = local_invocation_index; (idx < 1024u); idx = (idx + 64u)) {
|
|
const uint i_1 = (idx / 256u);
|
|
const uint i_2 = (idx % 256u);
|
|
tile[i_1][i_2] = float3(0.0f, 0.0f, 0.0f);
|
|
}
|
|
}
|
|
GroupMemoryBarrierWithGroupSync();
|
|
const uint filterOffset = ((params[0].x - 1u) / 2u);
|
|
int3 tint_tmp;
|
|
inputTex.GetDimensions(0, tint_tmp.x, tint_tmp.y, tint_tmp.z);
|
|
const int2 dims = tint_tmp.xy;
|
|
const int2 baseIndex = (int2(((WorkGroupID.xy * uint2(params[0].y, 4u)) + (LocalInvocationID.xy * uint2(4u, 1u)))) - int2(int(filterOffset), 0));
|
|
{
|
|
[loop] for(uint r = 0u; (r < 4u); r = (r + 1u)) {
|
|
{
|
|
[loop] for(uint c = 0u; (c < 4u); c = (c + 1u)) {
|
|
int2 loadIndex = (baseIndex + int2(int(c), int(r)));
|
|
if ((flip[0].x != 0u)) {
|
|
loadIndex = loadIndex.yx;
|
|
}
|
|
tile[r][((4u * LocalInvocationID.x) + c)] = inputTex.SampleLevel(samp, ((float2(loadIndex) + float2(0.25f, 0.25f)) / float2(dims)), 0.0f).rgb;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
GroupMemoryBarrierWithGroupSync();
|
|
{
|
|
[loop] for(uint r = 0u; (r < 4u); r = (r + 1u)) {
|
|
{
|
|
[loop] for(uint c = 0u; (c < 4u); c = (c + 1u)) {
|
|
int2 writeIndex = (baseIndex + int2(int(c), int(r)));
|
|
if ((flip[0].x != 0u)) {
|
|
writeIndex = writeIndex.yx;
|
|
}
|
|
const uint center = ((4u * LocalInvocationID.x) + c);
|
|
bool tint_tmp_2 = (center >= filterOffset);
|
|
if (tint_tmp_2) {
|
|
tint_tmp_2 = (center < (256u - filterOffset));
|
|
}
|
|
bool tint_tmp_1 = (tint_tmp_2);
|
|
if (tint_tmp_1) {
|
|
tint_tmp_1 = all((writeIndex < dims));
|
|
}
|
|
if ((tint_tmp_1)) {
|
|
float3 acc = float3(0.0f, 0.0f, 0.0f);
|
|
{
|
|
[loop] for(uint f = 0u; (f < params[0].x); f = (f + 1u)) {
|
|
uint i = ((center + f) - filterOffset);
|
|
acc = (acc + ((1.0f / float(params[0].x)) * tile[r][i]));
|
|
}
|
|
}
|
|
outputTex[writeIndex] = float4(acc, 1.0f);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
[numthreads(64, 1, 1)]
|
|
void main(tint_symbol_1 tint_symbol) {
|
|
main_inner(tint_symbol.WorkGroupID, tint_symbol.LocalInvocationID, tint_symbol.local_invocation_index);
|
|
return;
|
|
}
|