139 lines
5.3 KiB
Plaintext
139 lines
5.3 KiB
Plaintext
#include <metal_stdlib>
|
|
|
|
using namespace metal;
|
|
|
|
template<typename T, int N, int M>
|
|
inline vec<T, M> operator*(matrix<T, N, M> lhs, packed_vec<T, N> rhs) {
|
|
return lhs * vec<T, N>(rhs);
|
|
}
|
|
|
|
template<typename T, int N, int M>
|
|
inline vec<T, N> operator*(packed_vec<T, M> lhs, matrix<T, N, M> rhs) {
|
|
return vec<T, M>(lhs) * rhs;
|
|
}
|
|
|
|
struct Camera {
|
|
/* 0x0000 */ float4x4 projection;
|
|
/* 0x0040 */ float4x4 inverseProjection;
|
|
/* 0x0080 */ float4x4 view;
|
|
/* 0x00c0 */ packed_float3 position;
|
|
/* 0x00cc */ float time;
|
|
/* 0x00d0 */ float2 outputSize;
|
|
/* 0x00d8 */ float zNear;
|
|
/* 0x00dc */ float zFar;
|
|
};
|
|
struct ClusterBounds {
|
|
/* 0x0000 */ packed_float3 minAABB;
|
|
/* 0x000c */ int8_t tint_pad[4];
|
|
/* 0x0010 */ packed_float3 maxAABB;
|
|
/* 0x001c */ int8_t tint_pad_1[4];
|
|
};
|
|
struct tint_array_wrapper {
|
|
/* 0x0000 */ ClusterBounds arr[27648];
|
|
};
|
|
struct Clusters {
|
|
/* 0x0000 */ tint_array_wrapper bounds;
|
|
};
|
|
struct ClusterLights {
|
|
/* 0x0000 */ uint offset;
|
|
/* 0x0004 */ uint count;
|
|
};
|
|
struct tint_array_wrapper_1 {
|
|
/* 0x0000 */ ClusterLights arr[27648];
|
|
};
|
|
struct tint_array_wrapper_2 {
|
|
/* 0x0000 */ uint arr[1769472];
|
|
};
|
|
struct ClusterLightGroup {
|
|
/* 0x0000 */ atomic_uint offset;
|
|
/* 0x0004 */ tint_array_wrapper_1 lights;
|
|
/* 0x36004 */ tint_array_wrapper_2 indices;
|
|
};
|
|
struct Light {
|
|
/* 0x0000 */ packed_float3 position;
|
|
/* 0x000c */ float range;
|
|
/* 0x0010 */ packed_float3 color;
|
|
/* 0x001c */ float intensity;
|
|
};
|
|
struct GlobalLights {
|
|
/* 0x0000 */ packed_float3 ambient;
|
|
/* 0x000c */ int8_t tint_pad_2[4];
|
|
/* 0x0010 */ packed_float3 dirColor;
|
|
/* 0x001c */ float dirIntensity;
|
|
/* 0x0020 */ packed_float3 dirDirection;
|
|
/* 0x002c */ uint lightCount;
|
|
/* 0x0030 */ Light lights[1];
|
|
};
|
|
struct tint_array_wrapper_3 {
|
|
uint arr[256];
|
|
};
|
|
|
|
constant uint3 tileCount = uint3(32u, 18u, 48u);
|
|
float linearDepth(float depthSample, const constant Camera* const tint_symbol) {
|
|
return (((*(tint_symbol)).zFar * (*(tint_symbol)).zNear) / fma(depthSample, ((*(tint_symbol)).zNear - (*(tint_symbol)).zFar), (*(tint_symbol)).zFar));
|
|
}
|
|
|
|
uint3 getTile(float4 fragCoord, const constant Camera* const tint_symbol_1) {
|
|
float const sliceScale = (float(tileCount[2]) / log2(((*(tint_symbol_1)).zFar / (*(tint_symbol_1)).zNear)));
|
|
float const sliceBias = -(((float(tileCount[2]) * log2((*(tint_symbol_1)).zNear)) / log2(((*(tint_symbol_1)).zFar / (*(tint_symbol_1)).zNear))));
|
|
uint const zTile = uint(fmax(((log2(linearDepth(fragCoord[2], tint_symbol_1)) * sliceScale) + sliceBias), 0.0f));
|
|
return uint3(uint((fragCoord[0] / ((*(tint_symbol_1)).outputSize[0] / float(tileCount[0])))), uint((fragCoord[1] / ((*(tint_symbol_1)).outputSize[1] / float(tileCount[1])))), zTile);
|
|
}
|
|
|
|
uint getClusterIndex(float4 fragCoord, const constant Camera* const tint_symbol_2) {
|
|
uint3 const tile = getTile(fragCoord, tint_symbol_2);
|
|
return ((tile[0] + (tile[1] * tileCount[0])) + ((tile[2] * tileCount[0]) * tileCount[1]));
|
|
}
|
|
|
|
float sqDistPointAABB(float3 point, float3 minAABB, float3 maxAABB) {
|
|
float sqDist = 0.0f;
|
|
for(int i = 0; (i < 3); i = as_type<int>((as_type<uint>(i) + as_type<uint>(1)))) {
|
|
float const v = point[i];
|
|
if ((v < minAABB[i])) {
|
|
sqDist = (sqDist + ((minAABB[i] - v) * (minAABB[i] - v)));
|
|
}
|
|
if ((v > maxAABB[i])) {
|
|
sqDist = (sqDist + ((v - maxAABB[i]) * (v - maxAABB[i])));
|
|
}
|
|
}
|
|
return sqDist;
|
|
}
|
|
|
|
void computeMain_inner(uint3 global_id, const device GlobalLights* const tint_symbol_3, const constant Camera* const tint_symbol_4, const device Clusters* const tint_symbol_5, device ClusterLightGroup* const tint_symbol_6) {
|
|
uint const tileIndex = ((global_id[0] + (global_id[1] * tileCount[0])) + ((global_id[2] * tileCount[0]) * tileCount[1]));
|
|
uint clusterLightCount = 0u;
|
|
tint_array_wrapper_3 cluserLightIndices = {};
|
|
for(uint i = 0u; (i < (*(tint_symbol_3)).lightCount); i = (i + 1u)) {
|
|
float const range = (*(tint_symbol_3)).lights[i].range;
|
|
bool lightInCluster = (range <= 0.0f);
|
|
if (!(lightInCluster)) {
|
|
float4 const lightViewPos = ((*(tint_symbol_4)).view * float4((*(tint_symbol_3)).lights[i].position, 1.0f));
|
|
float const sqDist = sqDistPointAABB(float4(lightViewPos).xyz, (*(tint_symbol_5)).bounds.arr[tileIndex].minAABB, (*(tint_symbol_5)).bounds.arr[tileIndex].maxAABB);
|
|
lightInCluster = (sqDist <= (range * range));
|
|
}
|
|
if (lightInCluster) {
|
|
cluserLightIndices.arr[clusterLightCount] = i;
|
|
clusterLightCount = (clusterLightCount + 1u);
|
|
}
|
|
if ((clusterLightCount == 256u)) {
|
|
break;
|
|
}
|
|
}
|
|
uint const lightCount = clusterLightCount;
|
|
uint offset = atomic_fetch_add_explicit(&((*(tint_symbol_6)).offset), lightCount, memory_order_relaxed);
|
|
if ((offset >= 1769472u)) {
|
|
return;
|
|
}
|
|
for(uint i = 0u; (i < clusterLightCount); i = (i + 1u)) {
|
|
(*(tint_symbol_6)).indices.arr[(offset + i)] = cluserLightIndices.arr[i];
|
|
}
|
|
(*(tint_symbol_6)).lights.arr[tileIndex].offset = offset;
|
|
(*(tint_symbol_6)).lights.arr[tileIndex].count = clusterLightCount;
|
|
}
|
|
|
|
kernel void computeMain(const device GlobalLights* tint_symbol_7 [[buffer(2)]], const constant Camera* tint_symbol_8 [[buffer(0)]], const device Clusters* tint_symbol_9 [[buffer(3)]], device ClusterLightGroup* tint_symbol_10 [[buffer(1)]], uint3 global_id [[thread_position_in_grid]]) {
|
|
computeMain_inner(global_id, tint_symbol_7, tint_symbol_8, tint_symbol_9, tint_symbol_10);
|
|
return;
|
|
}
|
|
|