writer/hlsl: Simplify UBO accesses for static indexing

Use the new semantic constant value information to significantly reduce the complex indexing logic emitted for UBO accesses.
This will dramatically reduce the number of `for` loops that are decayed to `while` loops.

Change-Id: I1b0adb5edde2b4ed39c6beafc2e28106b86e0edd
Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/57701
Kokoro: Kokoro <noreply+kokoro@google.com>
Reviewed-by: James Price <jrprice@google.com>
This commit is contained in:
Ben Clayton
2021-07-13 12:18:13 +00:00
parent 71f619b6f1
commit 51cfe26bb7
46 changed files with 1712 additions and 191 deletions

View File

@@ -0,0 +1,31 @@
struct Inner {
a : vec3<i32>;
b : i32;
c : vec3<u32>;
d : u32;
e : vec3<f32>;
f : f32;
g : mat2x3<f32>;
h : mat3x2<f32>;
i : [[stride(16)]] array<vec4<i32>, 4>;
};
[[block]]
struct S {
arr : array<Inner>;
};
[[binding(0), group(0)]] var<storage, read> s : S;
[[stage(compute), workgroup_size(1)]]
fn main([[builtin(local_invocation_index)]] idx : u32) {
let a = s.arr[idx].a;
let b = s.arr[idx].b;
let c = s.arr[idx].c;
let d = s.arr[idx].d;
let e = s.arr[idx].e;
let f = s.arr[idx].f;
let g = s.arr[idx].g;
let h = s.arr[idx].h;
let i = s.arr[idx].i;
}

View File

@@ -0,0 +1,34 @@
float2x3 tint_symbol_8(ByteAddressBuffer buffer, uint offset) {
return float2x3(asfloat(buffer.Load3((offset + 0u))), asfloat(buffer.Load3((offset + 16u))));
}
float3x2 tint_symbol_10(ByteAddressBuffer buffer, uint offset) {
return float3x2(asfloat(buffer.Load2((offset + 0u))), asfloat(buffer.Load2((offset + 8u))), asfloat(buffer.Load2((offset + 16u))));
}
typedef int4 tint_symbol_12_ret[4];
tint_symbol_12_ret tint_symbol_12(ByteAddressBuffer buffer, uint offset) {
const int4 tint_symbol_13[4] = {asint(buffer.Load4((offset + 0u))), asint(buffer.Load4((offset + 16u))), asint(buffer.Load4((offset + 32u))), asint(buffer.Load4((offset + 48u)))};
return tint_symbol_13;
}
ByteAddressBuffer s : register(t0, space0);
struct tint_symbol_1 {
uint idx : SV_GroupIndex;
};
[numthreads(1, 1, 1)]
void main(tint_symbol_1 tint_symbol) {
const uint idx = tint_symbol.idx;
const int3 a = asint(s.Load3((176u * idx)));
const int b = asint(s.Load(((176u * idx) + 12u)));
const uint3 c = s.Load3(((176u * idx) + 16u));
const uint d = s.Load(((176u * idx) + 28u));
const float3 e = asfloat(s.Load3(((176u * idx) + 32u)));
const float f = asfloat(s.Load(((176u * idx) + 44u)));
const float2x3 g = tint_symbol_8(s, ((176u * idx) + 48u));
const float3x2 h = tint_symbol_10(s, ((176u * idx) + 80u));
const int4 i[4] = tint_symbol_12(s, ((176u * idx) + 112u));
return;
}

View File

@@ -0,0 +1,35 @@
#include <metal_stdlib>
using namespace metal;
struct tint_array_wrapper {
/* 0x0000 */ int4 arr[4];
};
struct Inner {
/* 0x0000 */ packed_int3 a;
/* 0x000c */ int b;
/* 0x0010 */ packed_uint3 c;
/* 0x001c */ uint d;
/* 0x0020 */ packed_float3 e;
/* 0x002c */ float f;
/* 0x0030 */ float2x3 g;
/* 0x0050 */ float3x2 h;
/* 0x0068 */ int8_t tint_pad[8];
/* 0x0070 */ tint_array_wrapper i;
};
struct S {
/* 0x0000 */ Inner arr[1];
};
kernel void tint_symbol(uint idx [[thread_index_in_threadgroup]], const device S& s [[buffer(0)]]) {
int3 const a = s.arr[idx].a;
int const b = s.arr[idx].b;
uint3 const c = s.arr[idx].c;
uint const d = s.arr[idx].d;
float3 const e = s.arr[idx].e;
float const f = s.arr[idx].f;
float2x3 const g = s.arr[idx].g;
float3x2 const h = s.arr[idx].h;
tint_array_wrapper const i = s.arr[idx].i;
return;
}

View File

@@ -0,0 +1,114 @@
; SPIR-V
; Version: 1.3
; Generator: Google Tint Compiler; 0
; Bound: 68
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %main "main" %tint_symbol
OpExecutionMode %main LocalSize 1 1 1
OpName %S "S"
OpMemberName %S 0 "arr"
OpName %Inner "Inner"
OpMemberName %Inner 0 "a"
OpMemberName %Inner 1 "b"
OpMemberName %Inner 2 "c"
OpMemberName %Inner 3 "d"
OpMemberName %Inner 4 "e"
OpMemberName %Inner 5 "f"
OpMemberName %Inner 6 "g"
OpMemberName %Inner 7 "h"
OpMemberName %Inner 8 "i"
OpName %s "s"
OpName %tint_symbol "tint_symbol"
OpName %main "main"
OpDecorate %S Block
OpMemberDecorate %S 0 Offset 0
OpMemberDecorate %Inner 0 Offset 0
OpMemberDecorate %Inner 1 Offset 12
OpMemberDecorate %Inner 2 Offset 16
OpMemberDecorate %Inner 3 Offset 28
OpMemberDecorate %Inner 4 Offset 32
OpMemberDecorate %Inner 5 Offset 44
OpMemberDecorate %Inner 6 Offset 48
OpMemberDecorate %Inner 6 ColMajor
OpMemberDecorate %Inner 6 MatrixStride 16
OpMemberDecorate %Inner 7 Offset 80
OpMemberDecorate %Inner 7 ColMajor
OpMemberDecorate %Inner 7 MatrixStride 8
OpMemberDecorate %Inner 8 Offset 112
OpDecorate %_arr_v4int_uint_4 ArrayStride 16
OpDecorate %_runtimearr_Inner ArrayStride 176
OpDecorate %s NonWritable
OpDecorate %s Binding 0
OpDecorate %s DescriptorSet 0
OpDecorate %tint_symbol BuiltIn LocalInvocationIndex
%int = OpTypeInt 32 1
%v3int = OpTypeVector %int 3
%uint = OpTypeInt 32 0
%v3uint = OpTypeVector %uint 3
%float = OpTypeFloat 32
%v3float = OpTypeVector %float 3
%mat2v3float = OpTypeMatrix %v3float 2
%v2float = OpTypeVector %float 2
%mat3v2float = OpTypeMatrix %v2float 3
%v4int = OpTypeVector %int 4
%uint_4 = OpConstant %uint 4
%_arr_v4int_uint_4 = OpTypeArray %v4int %uint_4
%Inner = OpTypeStruct %v3int %int %v3uint %uint %v3float %float %mat2v3float %mat3v2float %_arr_v4int_uint_4
%_runtimearr_Inner = OpTypeRuntimeArray %Inner
%S = OpTypeStruct %_runtimearr_Inner
%_ptr_StorageBuffer_S = OpTypePointer StorageBuffer %S
%s = OpVariable %_ptr_StorageBuffer_S StorageBuffer
%_ptr_Input_uint = OpTypePointer Input %uint
%tint_symbol = OpVariable %_ptr_Input_uint Input
%void = OpTypeVoid
%20 = OpTypeFunction %void
%uint_0 = OpConstant %uint 0
%_ptr_StorageBuffer_v3int = OpTypePointer StorageBuffer %v3int
%uint_1 = OpConstant %uint 1
%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
%uint_2 = OpConstant %uint 2
%_ptr_StorageBuffer_v3uint = OpTypePointer StorageBuffer %v3uint
%uint_3 = OpConstant %uint 3
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
%_ptr_StorageBuffer_v3float = OpTypePointer StorageBuffer %v3float
%uint_5 = OpConstant %uint 5
%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
%uint_6 = OpConstant %uint 6
%_ptr_StorageBuffer_mat2v3float = OpTypePointer StorageBuffer %mat2v3float
%uint_7 = OpConstant %uint 7
%_ptr_StorageBuffer_mat3v2float = OpTypePointer StorageBuffer %mat3v2float
%uint_8 = OpConstant %uint 8
%_ptr_StorageBuffer__arr_v4int_uint_4 = OpTypePointer StorageBuffer %_arr_v4int_uint_4
%main = OpFunction %void None %20
%23 = OpLabel
%25 = OpLoad %uint %tint_symbol
%27 = OpAccessChain %_ptr_StorageBuffer_v3int %s %uint_0 %25 %uint_0
%28 = OpLoad %v3int %27
%29 = OpLoad %uint %tint_symbol
%32 = OpAccessChain %_ptr_StorageBuffer_int %s %uint_0 %29 %uint_1
%33 = OpLoad %int %32
%34 = OpLoad %uint %tint_symbol
%37 = OpAccessChain %_ptr_StorageBuffer_v3uint %s %uint_0 %34 %uint_2
%38 = OpLoad %v3uint %37
%39 = OpLoad %uint %tint_symbol
%42 = OpAccessChain %_ptr_StorageBuffer_uint %s %uint_0 %39 %uint_3
%43 = OpLoad %uint %42
%44 = OpLoad %uint %tint_symbol
%46 = OpAccessChain %_ptr_StorageBuffer_v3float %s %uint_0 %44 %uint_4
%47 = OpLoad %v3float %46
%48 = OpLoad %uint %tint_symbol
%51 = OpAccessChain %_ptr_StorageBuffer_float %s %uint_0 %48 %uint_5
%52 = OpLoad %float %51
%53 = OpLoad %uint %tint_symbol
%56 = OpAccessChain %_ptr_StorageBuffer_mat2v3float %s %uint_0 %53 %uint_6
%57 = OpLoad %mat2v3float %56
%58 = OpLoad %uint %tint_symbol
%61 = OpAccessChain %_ptr_StorageBuffer_mat3v2float %s %uint_0 %58 %uint_7
%62 = OpLoad %mat3v2float %61
%63 = OpLoad %uint %tint_symbol
%66 = OpAccessChain %_ptr_StorageBuffer__arr_v4int_uint_4 %s %uint_0 %63 %uint_8
%67 = OpLoad %_arr_v4int_uint_4 %66
OpReturn
OpFunctionEnd

View File

@@ -0,0 +1,31 @@
struct Inner {
a : vec3<i32>;
b : i32;
c : vec3<u32>;
d : u32;
e : vec3<f32>;
f : f32;
g : mat2x3<f32>;
h : mat3x2<f32>;
i : [[stride(16)]] array<vec4<i32>, 4>;
};
[[block]]
struct S {
arr : array<Inner>;
};
[[binding(0), group(0)]] var<storage, read> s : S;
[[stage(compute), workgroup_size(1)]]
fn main([[builtin(local_invocation_index)]] idx : u32) {
let a = s.arr[idx].a;
let b = s.arr[idx].b;
let c = s.arr[idx].c;
let d = s.arr[idx].d;
let e = s.arr[idx].e;
let f = s.arr[idx].f;
let g = s.arr[idx].g;
let h = s.arr[idx].h;
let i = s.arr[idx].i;
}

View File

@@ -0,0 +1,31 @@
struct Inner {
a : vec3<i32>;
b : i32;
c : vec3<u32>;
d : u32;
e : vec3<f32>;
f : f32;
g : mat2x3<f32>;
h : mat3x2<f32>;
i : [[stride(16)]] array<vec4<i32>, 4>;
};
[[block]]
struct S {
arr : array<Inner>;
};
[[binding(0), group(0)]] var<storage, read_write> s : S;
[[stage(compute), workgroup_size(1)]]
fn main([[builtin(local_invocation_index)]] idx : u32) {
s.arr[idx].a = vec3<i32>();
s.arr[idx].b = i32();
s.arr[idx].c = vec3<u32>();
s.arr[idx].d = u32();
s.arr[idx].e = vec3<f32>();
s.arr[idx].f = f32();
s.arr[idx].g = mat2x3<f32>();
s.arr[idx].h = mat3x2<f32>();
s.arr[idx].i = [[stride(16)]] array<vec4<i32>, 4>();
}

View File

@@ -0,0 +1,39 @@
void tint_symbol_8(RWByteAddressBuffer buffer, uint offset, float2x3 value) {
buffer.Store3((offset + 0u), asuint(value[0u]));
buffer.Store3((offset + 16u), asuint(value[1u]));
}
void tint_symbol_10(RWByteAddressBuffer buffer, uint offset, float3x2 value) {
buffer.Store2((offset + 0u), asuint(value[0u]));
buffer.Store2((offset + 8u), asuint(value[1u]));
buffer.Store2((offset + 16u), asuint(value[2u]));
}
void tint_symbol_12(RWByteAddressBuffer buffer, uint offset, int4 value[4]) {
buffer.Store4((offset + 0u), asuint(value[0u]));
buffer.Store4((offset + 16u), asuint(value[1u]));
buffer.Store4((offset + 32u), asuint(value[2u]));
buffer.Store4((offset + 48u), asuint(value[3u]));
}
RWByteAddressBuffer s : register(u0, space0);
struct tint_symbol_1 {
uint idx : SV_GroupIndex;
};
[numthreads(1, 1, 1)]
void main(tint_symbol_1 tint_symbol) {
const uint idx = tint_symbol.idx;
s.Store3((176u * idx), asuint(int3(0, 0, 0)));
s.Store(((176u * idx) + 12u), asuint(0));
s.Store3(((176u * idx) + 16u), asuint(uint3(0u, 0u, 0u)));
s.Store(((176u * idx) + 28u), asuint(0u));
s.Store3(((176u * idx) + 32u), asuint(float3(0.0f, 0.0f, 0.0f)));
s.Store(((176u * idx) + 44u), asuint(0.0f));
tint_symbol_8(s, ((176u * idx) + 48u), float2x3(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f));
tint_symbol_10(s, ((176u * idx) + 80u), float3x2(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f));
const int4 tint_symbol_13[4] = (int4[4])0;
tint_symbol_12(s, ((176u * idx) + 112u), tint_symbol_13);
return;
}

View File

@@ -0,0 +1,36 @@
#include <metal_stdlib>
using namespace metal;
struct tint_array_wrapper {
/* 0x0000 */ int4 arr[4];
};
struct Inner {
/* 0x0000 */ packed_int3 a;
/* 0x000c */ int b;
/* 0x0010 */ packed_uint3 c;
/* 0x001c */ uint d;
/* 0x0020 */ packed_float3 e;
/* 0x002c */ float f;
/* 0x0030 */ float2x3 g;
/* 0x0050 */ float3x2 h;
/* 0x0068 */ int8_t tint_pad[8];
/* 0x0070 */ tint_array_wrapper i;
};
struct S {
/* 0x0000 */ Inner arr[1];
};
kernel void tint_symbol(uint idx [[thread_index_in_threadgroup]], device S& s [[buffer(0)]]) {
s.arr[idx].a = int3();
s.arr[idx].b = int();
s.arr[idx].c = uint3();
s.arr[idx].d = uint();
s.arr[idx].e = float3();
s.arr[idx].f = float();
s.arr[idx].g = float2x3();
s.arr[idx].h = float3x2();
tint_array_wrapper const tint_symbol_2 = {.arr={}};
s.arr[idx].i = tint_symbol_2;
return;
}

View File

@@ -0,0 +1,122 @@
; SPIR-V
; Version: 1.3
; Generator: Google Tint Compiler; 0
; Bound: 68
; Schema: 0
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %main "main" %tint_symbol
OpExecutionMode %main LocalSize 1 1 1
OpName %S "S"
OpMemberName %S 0 "arr"
OpName %Inner "Inner"
OpMemberName %Inner 0 "a"
OpMemberName %Inner 1 "b"
OpMemberName %Inner 2 "c"
OpMemberName %Inner 3 "d"
OpMemberName %Inner 4 "e"
OpMemberName %Inner 5 "f"
OpMemberName %Inner 6 "g"
OpMemberName %Inner 7 "h"
OpMemberName %Inner 8 "i"
OpName %s "s"
OpName %tint_symbol "tint_symbol"
OpName %main "main"
OpDecorate %S Block
OpMemberDecorate %S 0 Offset 0
OpMemberDecorate %Inner 0 Offset 0
OpMemberDecorate %Inner 1 Offset 12
OpMemberDecorate %Inner 2 Offset 16
OpMemberDecorate %Inner 3 Offset 28
OpMemberDecorate %Inner 4 Offset 32
OpMemberDecorate %Inner 5 Offset 44
OpMemberDecorate %Inner 6 Offset 48
OpMemberDecorate %Inner 6 ColMajor
OpMemberDecorate %Inner 6 MatrixStride 16
OpMemberDecorate %Inner 7 Offset 80
OpMemberDecorate %Inner 7 ColMajor
OpMemberDecorate %Inner 7 MatrixStride 8
OpMemberDecorate %Inner 8 Offset 112
OpDecorate %_arr_v4int_uint_4 ArrayStride 16
OpDecorate %_runtimearr_Inner ArrayStride 176
OpDecorate %s Binding 0
OpDecorate %s DescriptorSet 0
OpDecorate %tint_symbol BuiltIn LocalInvocationIndex
%int = OpTypeInt 32 1
%v3int = OpTypeVector %int 3
%uint = OpTypeInt 32 0
%v3uint = OpTypeVector %uint 3
%float = OpTypeFloat 32
%v3float = OpTypeVector %float 3
%mat2v3float = OpTypeMatrix %v3float 2
%v2float = OpTypeVector %float 2
%mat3v2float = OpTypeMatrix %v2float 3
%v4int = OpTypeVector %int 4
%uint_4 = OpConstant %uint 4
%_arr_v4int_uint_4 = OpTypeArray %v4int %uint_4
%Inner = OpTypeStruct %v3int %int %v3uint %uint %v3float %float %mat2v3float %mat3v2float %_arr_v4int_uint_4
%_runtimearr_Inner = OpTypeRuntimeArray %Inner
%S = OpTypeStruct %_runtimearr_Inner
%_ptr_StorageBuffer_S = OpTypePointer StorageBuffer %S
%s = OpVariable %_ptr_StorageBuffer_S StorageBuffer
%_ptr_Input_uint = OpTypePointer Input %uint
%tint_symbol = OpVariable %_ptr_Input_uint Input
%void = OpTypeVoid
%20 = OpTypeFunction %void
%uint_0 = OpConstant %uint 0
%_ptr_StorageBuffer_v3int = OpTypePointer StorageBuffer %v3int
%28 = OpConstantNull %v3int
%uint_1 = OpConstant %uint 1
%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
%33 = OpConstantNull %int
%uint_2 = OpConstant %uint 2
%_ptr_StorageBuffer_v3uint = OpTypePointer StorageBuffer %v3uint
%38 = OpConstantNull %v3uint
%uint_3 = OpConstant %uint 3
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
%43 = OpConstantNull %uint
%_ptr_StorageBuffer_v3float = OpTypePointer StorageBuffer %v3float
%47 = OpConstantNull %v3float
%uint_5 = OpConstant %uint 5
%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
%52 = OpConstantNull %float
%uint_6 = OpConstant %uint 6
%_ptr_StorageBuffer_mat2v3float = OpTypePointer StorageBuffer %mat2v3float
%57 = OpConstantNull %mat2v3float
%uint_7 = OpConstant %uint 7
%_ptr_StorageBuffer_mat3v2float = OpTypePointer StorageBuffer %mat3v2float
%62 = OpConstantNull %mat3v2float
%uint_8 = OpConstant %uint 8
%_ptr_StorageBuffer__arr_v4int_uint_4 = OpTypePointer StorageBuffer %_arr_v4int_uint_4
%67 = OpConstantNull %_arr_v4int_uint_4
%main = OpFunction %void None %20
%23 = OpLabel
%25 = OpLoad %uint %tint_symbol
%27 = OpAccessChain %_ptr_StorageBuffer_v3int %s %uint_0 %25 %uint_0
OpStore %27 %28
%29 = OpLoad %uint %tint_symbol
%32 = OpAccessChain %_ptr_StorageBuffer_int %s %uint_0 %29 %uint_1
OpStore %32 %33
%34 = OpLoad %uint %tint_symbol
%37 = OpAccessChain %_ptr_StorageBuffer_v3uint %s %uint_0 %34 %uint_2
OpStore %37 %38
%39 = OpLoad %uint %tint_symbol
%42 = OpAccessChain %_ptr_StorageBuffer_uint %s %uint_0 %39 %uint_3
OpStore %42 %43
%44 = OpLoad %uint %tint_symbol
%46 = OpAccessChain %_ptr_StorageBuffer_v3float %s %uint_0 %44 %uint_4
OpStore %46 %47
%48 = OpLoad %uint %tint_symbol
%51 = OpAccessChain %_ptr_StorageBuffer_float %s %uint_0 %48 %uint_5
OpStore %51 %52
%53 = OpLoad %uint %tint_symbol
%56 = OpAccessChain %_ptr_StorageBuffer_mat2v3float %s %uint_0 %53 %uint_6
OpStore %56 %57
%58 = OpLoad %uint %tint_symbol
%61 = OpAccessChain %_ptr_StorageBuffer_mat3v2float %s %uint_0 %58 %uint_7
OpStore %61 %62
%63 = OpLoad %uint %tint_symbol
%66 = OpAccessChain %_ptr_StorageBuffer__arr_v4int_uint_4 %s %uint_0 %63 %uint_8
OpStore %66 %67
OpReturn
OpFunctionEnd

View File

@@ -0,0 +1,31 @@
struct Inner {
a : vec3<i32>;
b : i32;
c : vec3<u32>;
d : u32;
e : vec3<f32>;
f : f32;
g : mat2x3<f32>;
h : mat3x2<f32>;
i : [[stride(16)]] array<vec4<i32>, 4>;
};
[[block]]
struct S {
arr : array<Inner>;
};
[[binding(0), group(0)]] var<storage, read_write> s : S;
[[stage(compute), workgroup_size(1)]]
fn main([[builtin(local_invocation_index)]] idx : u32) {
s.arr[idx].a = vec3<i32>();
s.arr[idx].b = i32();
s.arr[idx].c = vec3<u32>();
s.arr[idx].d = u32();
s.arr[idx].e = vec3<f32>();
s.arr[idx].f = f32();
s.arr[idx].g = mat2x3<f32>();
s.arr[idx].h = mat3x2<f32>();
s.arr[idx].i = [[stride(16)]] array<vec4<i32>, 4>();
}