HLSL: force FXC to never unroll loops

Emit the "[loop]" attribute on "for" and "while" so that FXC does not
attempt to unroll them. This is to work around an FXC bug where it fails
to unroll loops with gradient operations.

FXC ostensibly unrolls such loops because gradient operations require
uniform control flow, and loops that have varying iterations may
possibly not be uniform. Tint will eventually validate that control flow
is indeed uniform, so forcing FXC to avoid unrolling in these cases
should be fine.

Bug: tint:1112
Change-Id: I10077f8b62fbbb230a0003f3864c75a8fe0e1d18
Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/69880
Kokoro: Kokoro <noreply+kokoro@google.com>
Reviewed-by: Ben Clayton <bclayton@google.com>
Commit-Queue: Antonio Maiorano <amaiorano@google.com>
This commit is contained in:
Antonio Maiorano
2021-11-18 13:50:12 +00:00
committed by Tint LUCI CQ
parent 1704fe53f5
commit 11d09f2fe7
49 changed files with 117 additions and 110 deletions

View File

@@ -15,7 +15,7 @@ struct tint_symbol_2 {
void f_inner(uint local_invocation_index) {
{
for(uint idx = local_invocation_index; (idx < 64u); idx = (idx + 1u)) {
[loop] for(uint idx = local_invocation_index; (idx < 64u); idx = (idx + 1u)) {
const uint i = idx;
s.data[i] = 0;
}

View File

@@ -15,7 +15,7 @@ struct tint_symbol_2 {
void f_inner(uint local_invocation_index) {
{
for(uint idx = local_invocation_index; (idx < 64u); idx = (idx + 1u)) {
[loop] for(uint idx = local_invocation_index; (idx < 64u); idx = (idx + 1u)) {
const uint i = idx;
s.data[i] = 0;
}

View File

@@ -21,7 +21,7 @@ static bool2 v2b = bool2(false, false);
void foo() {
{
for(int i = 0; (i < 2); i = (i + 1)) {
[loop] for(int i = 0; (i < 2); i = (i + 1)) {
set_float2(v2f, i, 1.0f);
set_int3(v3i, i, 1);
set_uint4(v4u, i, 1u);
@@ -33,7 +33,7 @@ void foo() {
[numthreads(1, 1, 1)]
void main() {
{
for(int i = 0; (i < 2); i = (i + 1)) {
[loop] for(int i = 0; (i < 2); i = (i + 1)) {
foo();
}
}

View File

@@ -30,7 +30,7 @@ void foo() {
[numthreads(1, 1, 1)]
void main() {
{
for(int i = 0; (i < 2); i = (i + 1)) {
[loop] for(int i = 0; (i < 2); i = (i + 1)) {
foo();
}
}

View File

@@ -61,7 +61,7 @@ void main() {
bool3 v3b = bool3(false, false, false);
bool4 v4b = bool4(false, false, false, false);
{
for(int i = 0; (i < 2); i = (i + 1)) {
[loop] for(int i = 0; (i < 2); i = (i + 1)) {
set_float2(v2f, i, 1.0f);
set_float3(v3f, i, 1.0f);
set_float4(v4f, i, 1.0f);

View File

@@ -25,7 +25,7 @@ void main() {
bool2 v2b = bool2(false, false);
bool2 v2b_2 = bool2(false, false);
{
for(int i = 0; (i < 2); i = (i + 1)) {
[loop] for(int i = 0; (i < 2); i = (i + 1)) {
set_float2(v2f, i, 1.0f);
set_int3(v3i, i, 1);
set_uint4(v4u, i, 1u);

View File

@@ -61,7 +61,7 @@ void main() {
bool3 v3b = bool3(false, false, false);
bool4 v4b = bool4(false, false, false, false);
{
for(int i = 0; (i < 2); i = (i + 1)) {
[loop] for(int i = 0; (i < 2); i = (i + 1)) {
set_float2(v2f, i, 1.0f);
set_int2(v2i, i, 1);
set_uint2(v2u, i, 1u);

View File

@@ -1,5 +1,5 @@
void main() {
while (true) {
[loop] while (true) {
if (false) {
} else {
break;

View File

@@ -22,7 +22,7 @@ struct tint_symbol_2 {
int main_inner(int3 x) {
int y = x.x;
while (true) {
[loop] while (true) {
const int r = f(y);
if ((r == 0)) {
break;

View File

@@ -52,9 +52,9 @@ void main_inner(uint3 GlobalInvocationID) {
const int TILE_SIZE = 16;
const int TILE_COUNT_X = 2;
{
for(int y_1 = 0; (y_1 < 2); y_1 = (y_1 + 1)) {
[loop] for(int y_1 = 0; (y_1 < 2); y_1 = (y_1 + 1)) {
{
for(int x_1 = 0; (x_1 < TILE_COUNT_X); x_1 = (x_1 + 1)) {
[loop] for(int x_1 = 0; (x_1 < TILE_COUNT_X); x_1 = (x_1 + 1)) {
int2 tilePixel0Idx = int2((x_1 * TILE_SIZE), (y_1 * TILE_SIZE));
float2 floorCoord = (((2.0f * float2(tilePixel0Idx)) / asfloat(uniforms[10]).xy) - float2((1.0f).xx));
float2 ceilCoord = (((2.0f * float2((tilePixel0Idx + int2((TILE_SIZE).xx)))) / asfloat(uniforms[10]).xy) - float2((1.0f).xx));
@@ -66,7 +66,7 @@ void main_inner(uint3 GlobalInvocationID) {
frustumPlanes[3] = float4(0.0f, -1.0f, (viewCeilCoord.y / viewNear), 0.0f);
float dp = 0.0f;
{
for(uint i = 0u; (i < 6u); i = (i + 1u)) {
[loop] for(uint i = 0u; (i < 6u); i = (i + 1u)) {
float4 p = float4(0.0f, 0.0f, 0.0f, 0.0f);
if ((frustumPlanes[i].x > 0.0f)) {
p.x = boxMax.x;

View File

@@ -3,7 +3,7 @@ RWByteAddressBuffer b : register(u0, space0);
[numthreads(1, 1, 1)]
void main() {
uint i = 0u;
while (true) {
[loop] while (true) {
if ((i >= b.Load(0u))) {
break;
}

View File

@@ -32,7 +32,7 @@ void main_inner(uint3 GlobalInvocationID) {
uint4 srcColorBits = uint4(0u, 0u, 0u, 0u);
uint4 dstColorBits = uint4(dstColor);
{
for(uint i = 0u; (i < uniforms[0].w); i = (i + 1u)) {
[loop] for(uint i = 0u; (i < uniforms[0].w); i = (i + 1u)) {
set_uint4(srcColorBits, i, ConvertToFp16FloatValue(srcColor[i]));
bool tint_tmp_1 = success;
if (tint_tmp_1) {

View File

@@ -15,7 +15,7 @@ void main_inner(uint3 global_id) {
const uint dimOutter = uniforms[1].y;
uint result = 0u;
{
for(uint i = 0u; (i < dimInner); i = (i + 1u)) {
[loop] for(uint i = 0u; (i < dimInner); i = (i + 1u)) {
const uint a = (i + (resultCell.x * dimInner));
const uint b = (resultCell.y + (i * dimOutter));
result = (result + (firstMatrix.Load((4u * a)) * secondMatrix.Load((4u * b))));

View File

@@ -153,7 +153,7 @@ int performPartition_i1_i1_(inout int l, inout int h) {
const QuicksortObject tint_symbol_11 = {tint_symbol_10};
obj = tint_symbol_11;
obj = x_960;
while (true) {
[loop] while (true) {
const int x_961 = pivot;
pivot = 0;
pivot = x_961;
@@ -453,7 +453,7 @@ void quicksort_() {
p = 0;
p = x_1027;
stack[x_100_save] = x_99;
while (true) {
[loop] while (true) {
const float3 x_566 = float3(x_563.x, x_563.x, x_563.x);
const int x_1028 = h_1;
h_1 = 0;

View File

@@ -14,7 +14,7 @@ void main_inner(uint3 GlobalInvocationID) {
flatIndex = (flatIndex * 1u);
float4 texel = myTexture.Load(int4(int3(int2(GlobalInvocationID.xy), 0), 0));
{
for(uint i = 0u; (i < 1u); i = (i + 1u)) {
[loop] for(uint i = 0u; (i < 1u); i = (i + 1u)) {
result.Store((4u * (flatIndex + i)), asuint(texel.r));
}
}

View File

@@ -4,7 +4,7 @@ typedef int tint_symbol_ret[6];
tint_symbol_ret tint_symbol(ByteAddressBuffer buffer, uint offset) {
int arr[6] = (int[6])0;
{
for(uint i = 0u; (i < 6u); i = (i + 1u)) {
[loop] for(uint i = 0u; (i < 6u); i = (i + 1u)) {
arr[i] = asint(buffer.Load((offset + (i * 4u))));
}
}

View File

@@ -56,7 +56,7 @@ struct tint_symbol_1 {
void main_inner(uint3 local_id, uint3 global_id, uint local_invocation_index) {
{
for(uint idx = local_invocation_index; (idx < 4096u); idx = (idx + 256u)) {
[loop] for(uint idx = local_invocation_index; (idx < 4096u); idx = (idx + 256u)) {
const uint i = (idx / 64u);
const uint i_1 = (idx % 64u);
mm_Asub[i][i_1] = 0.0f;
@@ -73,7 +73,7 @@ void main_inner(uint3 local_id, uint3 global_id, uint local_invocation_index) {
float ACached = 0.0f;
float BCached[4] = (float[4])0;
{
for(uint index = 0u; (index < (RowPerThread * ColPerThread)); index = (index + 1u)) {
[loop] for(uint index = 0u; (index < (RowPerThread * ColPerThread)); index = (index + 1u)) {
acc[index] = 0.0f;
}
}
@@ -82,11 +82,11 @@ void main_inner(uint3 local_id, uint3 global_id, uint local_invocation_index) {
const uint RowPerThreadB = (TileInner / 16u);
const uint tileRowB = (local_id.y * RowPerThreadB);
{
for(uint t = 0u; (t < numTiles); t = (t + 1u)) {
[loop] for(uint t = 0u; (t < numTiles); t = (t + 1u)) {
{
for(uint innerRow = 0u; (innerRow < RowPerThread); innerRow = (innerRow + 1u)) {
[loop] for(uint innerRow = 0u; (innerRow < RowPerThread); innerRow = (innerRow + 1u)) {
{
for(uint innerCol = 0u; (innerCol < ColPerThreadA); innerCol = (innerCol + 1u)) {
[loop] for(uint innerCol = 0u; (innerCol < ColPerThreadA); innerCol = (innerCol + 1u)) {
const uint inputRow = (tileRow + innerRow);
const uint inputCol = (tileColA + innerCol);
mm_Asub[inputRow][inputCol] = mm_readA((globalRow + innerRow), ((t * TileInner) + inputCol));
@@ -95,9 +95,9 @@ void main_inner(uint3 local_id, uint3 global_id, uint local_invocation_index) {
}
}
{
for(uint innerRow = 0u; (innerRow < RowPerThreadB); innerRow = (innerRow + 1u)) {
[loop] for(uint innerRow = 0u; (innerRow < RowPerThreadB); innerRow = (innerRow + 1u)) {
{
for(uint innerCol = 0u; (innerCol < ColPerThread); innerCol = (innerCol + 1u)) {
[loop] for(uint innerCol = 0u; (innerCol < ColPerThread); innerCol = (innerCol + 1u)) {
const uint inputRow = (tileRowB + innerRow);
const uint inputCol = (tileCol + innerCol);
mm_Bsub[innerCol][inputCol] = mm_readB(((t * TileInner) + inputRow), (globalCol + innerCol));
@@ -107,17 +107,17 @@ void main_inner(uint3 local_id, uint3 global_id, uint local_invocation_index) {
}
GroupMemoryBarrierWithGroupSync();
{
for(uint k = 0u; (k < TileInner); k = (k + 1u)) {
[loop] for(uint k = 0u; (k < TileInner); k = (k + 1u)) {
{
for(uint inner = 0u; (inner < ColPerThread); inner = (inner + 1u)) {
[loop] for(uint inner = 0u; (inner < ColPerThread); inner = (inner + 1u)) {
BCached[inner] = mm_Bsub[k][(tileCol + inner)];
}
}
{
for(uint innerRow = 0u; (innerRow < RowPerThread); innerRow = (innerRow + 1u)) {
[loop] for(uint innerRow = 0u; (innerRow < RowPerThread); innerRow = (innerRow + 1u)) {
ACached = mm_Asub[(tileRow + innerRow)][k];
{
for(uint innerCol = 0u; (innerCol < ColPerThread); innerCol = (innerCol + 1u)) {
[loop] for(uint innerCol = 0u; (innerCol < ColPerThread); innerCol = (innerCol + 1u)) {
const uint index = ((innerRow * ColPerThread) + innerCol);
acc[index] = (acc[index] + (ACached * BCached[innerCol]));
}
@@ -130,9 +130,9 @@ void main_inner(uint3 local_id, uint3 global_id, uint local_invocation_index) {
}
}
{
for(uint innerRow = 0u; (innerRow < RowPerThread); innerRow = (innerRow + 1u)) {
[loop] for(uint innerRow = 0u; (innerRow < RowPerThread); innerRow = (innerRow + 1u)) {
{
for(uint innerCol = 0u; (innerCol < ColPerThread); innerCol = (innerCol + 1u)) {
[loop] for(uint innerCol = 0u; (innerCol < ColPerThread); innerCol = (innerCol + 1u)) {
const uint index = ((innerRow * ColPerThread) + innerCol);
mm_write((globalRow + innerRow), (globalCol + innerCol), acc[index]);
}

View File

@@ -18,7 +18,7 @@ struct tint_symbol_1 {
void main_inner(uint3 WorkGroupID, uint3 LocalInvocationID, uint local_invocation_index) {
{
for(uint idx = local_invocation_index; (idx < 1024u); idx = (idx + 64u)) {
[loop] for(uint idx = local_invocation_index; (idx < 1024u); idx = (idx + 64u)) {
const uint i_1 = (idx / 256u);
const uint i_2 = (idx % 256u);
tile[i_1][i_2] = float3(0.0f, 0.0f, 0.0f);
@@ -31,9 +31,9 @@ void main_inner(uint3 WorkGroupID, uint3 LocalInvocationID, uint local_invocatio
const int2 dims = tint_tmp.xy;
const int2 baseIndex = (int2(((WorkGroupID.xy * uint2(params[0].y, 4u)) + (LocalInvocationID.xy * uint2(4u, 1u)))) - int2(int(filterOffset), 0));
{
for(uint r = 0u; (r < 4u); r = (r + 1u)) {
[loop] for(uint r = 0u; (r < 4u); r = (r + 1u)) {
{
for(uint c = 0u; (c < 4u); c = (c + 1u)) {
[loop] for(uint c = 0u; (c < 4u); c = (c + 1u)) {
int2 loadIndex = (baseIndex + int2(int(c), int(r)));
if ((flip[0].x != 0u)) {
loadIndex = loadIndex.yx;
@@ -45,9 +45,9 @@ void main_inner(uint3 WorkGroupID, uint3 LocalInvocationID, uint local_invocatio
}
GroupMemoryBarrierWithGroupSync();
{
for(uint r = 0u; (r < 4u); r = (r + 1u)) {
[loop] for(uint r = 0u; (r < 4u); r = (r + 1u)) {
{
for(uint c = 0u; (c < 4u); c = (c + 1u)) {
[loop] for(uint c = 0u; (c < 4u); c = (c + 1u)) {
int2 writeIndex = (baseIndex + int2(int(c), int(r)));
if ((flip[0].x != 0u)) {
writeIndex = writeIndex.yx;
@@ -64,7 +64,7 @@ void main_inner(uint3 WorkGroupID, uint3 LocalInvocationID, uint local_invocatio
if ((tint_tmp_1)) {
float3 acc = float3(0.0f, 0.0f, 0.0f);
{
for(uint f = 0u; (f < params[0].x); f = (f + 1u)) {
[loop] for(uint f = 0u; (f < params[0].x); f = (f + 1u)) {
uint i = ((center + f) - filterOffset);
acc = (acc + ((1.0f / float(params[0].x)) * tile[r][i]));
}

View File

@@ -186,10 +186,10 @@ void mm_matMul_i1_i1_i1_(inout int dimAOuter, inout int dimInner, inout int dimB
numTiles = (((x_152 - 1) / 64) + 1);
innerRow = 0;
{
for(; (innerRow < 1); innerRow = (innerRow + 1)) {
[loop] for(; (innerRow < 1); innerRow = (innerRow + 1)) {
innerCol = 0;
{
for(; (innerCol < 1); innerCol = (innerCol + 1)) {
[loop] for(; (innerCol < 1); innerCol = (innerCol + 1)) {
acc[innerRow][innerCol] = 0.0f;
}
}
@@ -201,13 +201,13 @@ void mm_matMul_i1_i1_i1_(inout int dimAOuter, inout int dimInner, inout int dimB
tileRowB = (asint(x_192) * 1);
t = 0;
{
for(; (t < numTiles); t = (t + 1)) {
[loop] for(; (t < numTiles); t = (t + 1)) {
innerRow_1 = 0;
{
for(; (innerRow_1 < 1); innerRow_1 = (innerRow_1 + 1)) {
[loop] for(; (innerRow_1 < 1); innerRow_1 = (innerRow_1 + 1)) {
innerCol_1 = 0;
{
for(; (innerCol_1 < 64); innerCol_1 = (innerCol_1 + 1)) {
[loop] for(; (innerCol_1 < 64); innerCol_1 = (innerCol_1 + 1)) {
inputRow = (tileRow + innerRow_1);
inputCol = (tileColA + innerCol_1);
const int x_233 = inputRow;
@@ -224,10 +224,10 @@ void mm_matMul_i1_i1_i1_(inout int dimAOuter, inout int dimInner, inout int dimB
}
innerRow_2 = 0;
{
for(; (innerRow_2 < 1); innerRow_2 = (innerRow_2 + 1)) {
[loop] for(; (innerRow_2 < 1); innerRow_2 = (innerRow_2 + 1)) {
innerCol_2 = 0;
{
for(; (innerCol_2 < 1); innerCol_2 = (innerCol_2 + 1)) {
[loop] for(; (innerCol_2 < 1); innerCol_2 = (innerCol_2 + 1)) {
inputRow_1 = (tileRowB + innerRow_2);
inputCol_1 = (tileCol + innerCol_2);
const int x_278 = inputRow_1;
@@ -245,10 +245,10 @@ void mm_matMul_i1_i1_i1_(inout int dimAOuter, inout int dimInner, inout int dimB
GroupMemoryBarrierWithGroupSync();
k = 0;
{
for(; (k < 64); k = (k + 1)) {
[loop] for(; (k < 64); k = (k + 1)) {
inner = 0;
{
for(; (inner < 1); inner = (inner + 1)) {
[loop] for(; (inner < 1); inner = (inner + 1)) {
const int x_314 = inner;
const float x_320 = mm_Bsub[k][(tileCol + inner)];
BCached[x_314] = x_320;
@@ -256,12 +256,12 @@ void mm_matMul_i1_i1_i1_(inout int dimAOuter, inout int dimInner, inout int dimB
}
innerRow_3 = 0;
{
for(; (innerRow_3 < 1); innerRow_3 = (innerRow_3 + 1)) {
[loop] for(; (innerRow_3 < 1); innerRow_3 = (innerRow_3 + 1)) {
const float x_338 = mm_Asub[(tileRow + innerRow_3)][k];
ACached = x_338;
innerCol_3 = 0;
{
for(; (innerCol_3 < 1); innerCol_3 = (innerCol_3 + 1)) {
[loop] for(; (innerCol_3 < 1); innerCol_3 = (innerCol_3 + 1)) {
const int x_347 = innerRow_3;
const int x_348 = innerCol_3;
const float x_349 = ACached;
@@ -279,9 +279,9 @@ void mm_matMul_i1_i1_i1_(inout int dimAOuter, inout int dimInner, inout int dimB
}
innerRow_4 = 0;
{
for(; (innerRow_4 < 1); innerRow_4 = (innerRow_4 + 1)) {
[loop] for(; (innerRow_4 < 1); innerRow_4 = (innerRow_4 + 1)) {
innerCol_4 = 0;
while (true) {
[loop] while (true) {
bool x_393 = false;
bool x_394_phi = false;
if ((innerCol_4 < 1)) {
@@ -352,7 +352,7 @@ void main_inner(uint3 gl_LocalInvocationID_param, uint3 gl_GlobalInvocationID_pa
mm_Bsub[i_1][i_2] = 0.0f;
}
{
for(uint idx = local_invocation_index; (idx < 4096u); idx = (idx + 64u)) {
[loop] for(uint idx = local_invocation_index; (idx < 4096u); idx = (idx + 64u)) {
const uint i = (idx / 64u);
const uint i_1 = (idx % 64u);
mm_Asub[i][i_1] = 0.0f;

View File

@@ -62,7 +62,7 @@ void main_1() {
stageUnits = (float2(1.0f, 1.0f) / x_111);
i = 0;
{
for(; (i < 2); i = (i + 1)) {
[loop] for(; (i < 2); i = (i + 1)) {
switch(i) {
case 1: {
const float2 x_150 = tileID;
@@ -93,7 +93,7 @@ void main_1() {
mt = ((x_181 * x_184) % 1.0f);
f = 0.0f;
{
for(; (f < 8.0f); f = (f + 1.0f)) {
[loop] for(; (f < 8.0f); f = (f + 1.0f)) {
const float x_197 = animationData.y;
if ((x_197 > mt)) {
const float x_203 = animationData.x;

View File

@@ -244,7 +244,7 @@ void main_1() {
currSampledHeight = 1.0f;
i = 0;
{
for(; (i < 15); i = (i + 1)) {
[loop] for(; (i < 15); i = (i + 1)) {
const float4 x_397 = TextureSamplerTexture.Sample(TextureSamplerSampler, (v_uv + vCurrOffset));
currSampledHeight = x_397.w;
if ((currSampledHeight > currRayHeight)) {

View File

@@ -6,7 +6,7 @@ void unused_entry_point() {
void f() {
int i = 0;
{
for(; ; ) {
[loop] for(; ; ) {
}
}
}