dawn-cmake/test/benchmark/cluster-lights.wgsl.expecte...

139 lines
5.3 KiB
Plaintext

#include <metal_stdlib>
using namespace metal;
template<typename T, int N, int M>
inline vec<T, M> operator*(matrix<T, N, M> lhs, packed_vec<T, N> rhs) {
return lhs * vec<T, N>(rhs);
}
template<typename T, int N, int M>
inline vec<T, N> operator*(packed_vec<T, M> lhs, matrix<T, N, M> rhs) {
return vec<T, M>(lhs) * rhs;
}
struct Camera {
/* 0x0000 */ float4x4 projection;
/* 0x0040 */ float4x4 inverseProjection;
/* 0x0080 */ float4x4 view;
/* 0x00c0 */ packed_float3 position;
/* 0x00cc */ float time;
/* 0x00d0 */ float2 outputSize;
/* 0x00d8 */ float zNear;
/* 0x00dc */ float zFar;
};
struct ClusterBounds {
/* 0x0000 */ packed_float3 minAABB;
/* 0x000c */ int8_t tint_pad[4];
/* 0x0010 */ packed_float3 maxAABB;
/* 0x001c */ int8_t tint_pad_1[4];
};
struct tint_array_wrapper {
/* 0x0000 */ ClusterBounds arr[27648];
};
struct Clusters {
/* 0x0000 */ tint_array_wrapper bounds;
};
struct ClusterLights {
/* 0x0000 */ uint offset;
/* 0x0004 */ uint count;
};
struct tint_array_wrapper_1 {
/* 0x0000 */ ClusterLights arr[27648];
};
struct tint_array_wrapper_2 {
/* 0x0000 */ uint arr[1769472];
};
struct ClusterLightGroup {
/* 0x0000 */ atomic_uint offset;
/* 0x0004 */ tint_array_wrapper_1 lights;
/* 0x36004 */ tint_array_wrapper_2 indices;
};
struct Light {
/* 0x0000 */ packed_float3 position;
/* 0x000c */ float range;
/* 0x0010 */ packed_float3 color;
/* 0x001c */ float intensity;
};
struct GlobalLights {
/* 0x0000 */ packed_float3 ambient;
/* 0x000c */ int8_t tint_pad_2[4];
/* 0x0010 */ packed_float3 dirColor;
/* 0x001c */ float dirIntensity;
/* 0x0020 */ packed_float3 dirDirection;
/* 0x002c */ uint lightCount;
/* 0x0030 */ Light lights[1];
};
struct tint_array_wrapper_3 {
uint arr[256];
};
constant uint3 tileCount = uint3(32u, 18u, 48u);
float linearDepth(float depthSample, const constant Camera* const tint_symbol) {
return (((*(tint_symbol)).zFar * (*(tint_symbol)).zNear) / fma(depthSample, ((*(tint_symbol)).zNear - (*(tint_symbol)).zFar), (*(tint_symbol)).zFar));
}
uint3 getTile(float4 fragCoord, const constant Camera* const tint_symbol_1) {
float const sliceScale = (float(tileCount[2]) / log2(((*(tint_symbol_1)).zFar / (*(tint_symbol_1)).zNear)));
float const sliceBias = -(((float(tileCount[2]) * log2((*(tint_symbol_1)).zNear)) / log2(((*(tint_symbol_1)).zFar / (*(tint_symbol_1)).zNear))));
uint const zTile = uint(fmax(((log2(linearDepth(fragCoord[2], tint_symbol_1)) * sliceScale) + sliceBias), 0.0f));
return uint3(uint((fragCoord[0] / ((*(tint_symbol_1)).outputSize[0] / float(tileCount[0])))), uint((fragCoord[1] / ((*(tint_symbol_1)).outputSize[1] / float(tileCount[1])))), zTile);
}
uint getClusterIndex(float4 fragCoord, const constant Camera* const tint_symbol_2) {
uint3 const tile = getTile(fragCoord, tint_symbol_2);
return ((tile[0] + (tile[1] * tileCount[0])) + ((tile[2] * tileCount[0]) * tileCount[1]));
}
float sqDistPointAABB(float3 point, float3 minAABB, float3 maxAABB) {
float sqDist = 0.0f;
for(int i = 0; (i < 3); i = as_type<int>((as_type<uint>(i) + as_type<uint>(1)))) {
float const v = point[i];
if ((v < minAABB[i])) {
sqDist = (sqDist + ((minAABB[i] - v) * (minAABB[i] - v)));
}
if ((v > maxAABB[i])) {
sqDist = (sqDist + ((v - maxAABB[i]) * (v - maxAABB[i])));
}
}
return sqDist;
}
void computeMain_inner(uint3 global_id, const device GlobalLights* const tint_symbol_3, const constant Camera* const tint_symbol_4, const device Clusters* const tint_symbol_5, device ClusterLightGroup* const tint_symbol_6) {
uint const tileIndex = ((global_id[0] + (global_id[1] * tileCount[0])) + ((global_id[2] * tileCount[0]) * tileCount[1]));
uint clusterLightCount = 0u;
tint_array_wrapper_3 cluserLightIndices = {};
for(uint i = 0u; (i < (*(tint_symbol_3)).lightCount); i = (i + 1u)) {
float const range = (*(tint_symbol_3)).lights[i].range;
bool lightInCluster = (range <= 0.0f);
if (!(lightInCluster)) {
float4 const lightViewPos = ((*(tint_symbol_4)).view * float4((*(tint_symbol_3)).lights[i].position, 1.0f));
float const sqDist = sqDistPointAABB(float4(lightViewPos).xyz, (*(tint_symbol_5)).bounds.arr[tileIndex].minAABB, (*(tint_symbol_5)).bounds.arr[tileIndex].maxAABB);
lightInCluster = (sqDist <= (range * range));
}
if (lightInCluster) {
cluserLightIndices.arr[clusterLightCount] = i;
clusterLightCount = (clusterLightCount + 1u);
}
if ((clusterLightCount == 256u)) {
break;
}
}
uint const lightCount = clusterLightCount;
uint offset = atomic_fetch_add_explicit(&((*(tint_symbol_6)).offset), lightCount, memory_order_relaxed);
if ((offset >= 1769472u)) {
return;
}
for(uint i = 0u; (i < clusterLightCount); i = (i + 1u)) {
(*(tint_symbol_6)).indices.arr[(offset + i)] = cluserLightIndices.arr[i];
}
(*(tint_symbol_6)).lights.arr[tileIndex].offset = offset;
(*(tint_symbol_6)).lights.arr[tileIndex].count = clusterLightCount;
}
kernel void computeMain(const device GlobalLights* tint_symbol_7 [[buffer(2)]], const constant Camera* tint_symbol_8 [[buffer(0)]], const device Clusters* tint_symbol_9 [[buffer(3)]], device ClusterLightGroup* tint_symbol_10 [[buffer(1)]], uint3 global_id [[thread_position_in_grid]]) {
computeMain_inner(global_id, tint_symbol_7, tint_symbol_8, tint_symbol_9, tint_symbol_10);
return;
}