#include using namespace metal; template inline auto operator*(matrix lhs, packed_vec rhs) { return lhs * vec(rhs); } template inline auto operator*(packed_vec lhs, matrix rhs) { return vec(lhs) * rhs; } struct Uniforms { /* 0x0000 */ uint numTriangles; /* 0x0004 */ uint gridSize; /* 0x0008 */ uint pad1; /* 0x000c */ uint pad2; /* 0x0010 */ packed_float3 bbMin; /* 0x001c */ int8_t tint_pad[4]; /* 0x0020 */ packed_float3 bbMax; /* 0x002c */ int8_t tint_pad_1[4]; }; struct Dbg { /* 0x0000 */ atomic_uint offsetCounter; /* 0x0004 */ uint pad0; /* 0x0008 */ uint pad1; /* 0x000c */ uint pad2; /* 0x0010 */ uint value0; /* 0x0014 */ uint value1; /* 0x0018 */ uint value2; /* 0x001c */ uint value3; /* 0x0020 */ float value_f32_0; /* 0x0024 */ float value_f32_1; /* 0x0028 */ float value_f32_2; /* 0x002c */ float value_f32_3; }; struct F32s { /* 0x0000 */ float values[1]; }; struct U32s { /* 0x0000 */ uint values[1]; }; struct I32s { int values[1]; }; struct AU32s { /* 0x0000 */ atomic_uint values[1]; }; struct AI32s { /* 0x0000 */ atomic_int values[1]; }; float3 toVoxelPos(constant Uniforms& uniforms, float3 position) { float3 bbMin = float3(uniforms.bbMin[0], uniforms.bbMin[1], uniforms.bbMin[2]); float3 bbMax = float3(uniforms.bbMax[0], uniforms.bbMax[1], uniforms.bbMax[2]); float3 bbSize = (bbMax - bbMin); float cubeSize = fmax(fmax(bbSize[0], bbSize[1]), bbSize[2]); float gridSize = float(uniforms.gridSize); float gx = ((gridSize * (position[0] - uniforms.bbMin[0])) / cubeSize); float gy = ((gridSize * (position[1] - uniforms.bbMin[1])) / cubeSize); float gz = ((gridSize * (position[2] - uniforms.bbMin[2])) / cubeSize); return float3(gx, gy, gz); } uint toIndex1D(uint gridSize, float3 voxelPos) { uint3 icoord = uint3(voxelPos); return ((icoord[0] + (gridSize * icoord[1])) + ((gridSize * gridSize) * icoord[2])); } uint3 toIndex3D(uint gridSize, uint index) { uint z_1 = (index / (gridSize * gridSize)); uint y_1 = ((index - ((gridSize * gridSize) * z_1)) / gridSize); uint x_1 = (index % gridSize); return uint3(x_1, y_1, z_1); } float3 loadPosition(device F32s& positions, uint vertexIndex) { float3 position = float3(positions.values[((3u * vertexIndex) + 0u)], positions.values[((3u * vertexIndex) + 1u)], positions.values[((3u * vertexIndex) + 2u)]); return position; } void doIgnore(constant Uniforms& uniforms, device Dbg& dbg, device AU32s& counters, device U32s& indices, device F32s& positions, device AI32s& LUT) { uint g42 = uniforms.numTriangles; uint kj6 = dbg.value1; uint b53 = atomic_load_explicit(&(counters.values[0]), memory_order_relaxed); uint rwg = indices.values[0]; float rb5 = positions.values[0]; int g55 = atomic_load_explicit(&(LUT.values[0]), memory_order_relaxed); } void main_count_inner(constant Uniforms& uniforms, device Dbg& dbg, device AU32s& counters, device U32s& indices, device F32s& positions, device AI32s& LUT, uint3 GlobalInvocationID) { uint triangleIndex = GlobalInvocationID[0]; if ((triangleIndex >= uniforms.numTriangles)) { return; } doIgnore(uniforms, dbg, counters, indices, positions, LUT); uint i0 = indices.values[((3u * triangleIndex) + 0u)]; uint i1 = indices.values[((3u * triangleIndex) + 1u)]; uint i2 = indices.values[((3u * triangleIndex) + 2u)]; float3 p0 = loadPosition(positions, i0); float3 p1 = loadPosition(positions, i1); float3 p2 = loadPosition(positions, i2); float3 center = (((p0 + p1) + p2) / 3.0f); float3 voxelPos = toVoxelPos(uniforms, center); uint voxelIndex = toIndex1D(uniforms.gridSize, voxelPos); uint acefg = atomic_fetch_add_explicit(&(counters.values[voxelIndex]), 1u, memory_order_relaxed); if ((triangleIndex == 0u)) { dbg.value0 = uniforms.gridSize; dbg.value_f32_0 = center[0]; dbg.value_f32_1 = center[1]; dbg.value_f32_2 = center[2]; } } kernel void main_count(uint3 GlobalInvocationID [[thread_position_in_grid]], constant Uniforms& uniforms [[buffer(0)]], device Dbg& dbg [[buffer(1)]], device AU32s& counters [[buffer(2)]], device U32s& indices [[buffer(3)]], device F32s& positions [[buffer(4)]], device AI32s& LUT [[buffer(5)]]) { main_count_inner(uniforms, dbg, counters, indices, positions, LUT, GlobalInvocationID); return; } void main_create_lut_inner(constant Uniforms& uniforms, device Dbg& dbg, device AU32s& counters, device U32s& indices, device F32s& positions, device AI32s& LUT, uint3 GlobalInvocationID) { uint voxelIndex = GlobalInvocationID[0]; doIgnore(uniforms, dbg, counters, indices, positions, LUT); uint maxVoxels = ((uniforms.gridSize * uniforms.gridSize) * uniforms.gridSize); if ((voxelIndex >= maxVoxels)) { return; } uint numTriangles = atomic_load_explicit(&(counters.values[voxelIndex]), memory_order_relaxed); int offset = -1; if ((numTriangles > 0u)) { offset = int(atomic_fetch_add_explicit(&(dbg.offsetCounter), numTriangles, memory_order_relaxed)); } atomic_store_explicit(&(LUT.values[voxelIndex]), offset, memory_order_relaxed); } kernel void main_create_lut(uint3 GlobalInvocationID [[thread_position_in_grid]], constant Uniforms& uniforms [[buffer(0)]], device Dbg& dbg [[buffer(1)]], device AU32s& counters [[buffer(2)]], device U32s& indices [[buffer(3)]], device F32s& positions [[buffer(4)]], device AI32s& LUT [[buffer(5)]]) { main_create_lut_inner(uniforms, dbg, counters, indices, positions, LUT, GlobalInvocationID); return; } void main_sort_triangles_inner(constant Uniforms& uniforms, device Dbg& dbg, device AU32s& counters, device U32s& indices, device F32s& positions, device AI32s& LUT, uint3 GlobalInvocationID) { uint triangleIndex = GlobalInvocationID[0]; doIgnore(uniforms, dbg, counters, indices, positions, LUT); if ((triangleIndex >= uniforms.numTriangles)) { return; } uint i0 = indices.values[((3u * triangleIndex) + 0u)]; uint i1 = indices.values[((3u * triangleIndex) + 1u)]; uint i2 = indices.values[((3u * triangleIndex) + 2u)]; float3 p0 = loadPosition(positions, i0); float3 p1 = loadPosition(positions, i1); float3 p2 = loadPosition(positions, i2); float3 center = (((p0 + p1) + p2) / 3.0f); float3 voxelPos = toVoxelPos(uniforms, center); uint voxelIndex = toIndex1D(uniforms.gridSize, voxelPos); int triangleOffset = atomic_fetch_add_explicit(&(LUT.values[voxelIndex]), 1, memory_order_relaxed); } kernel void main_sort_triangles(uint3 GlobalInvocationID [[thread_position_in_grid]], constant Uniforms& uniforms [[buffer(0)]], device Dbg& dbg [[buffer(1)]], device AU32s& counters [[buffer(2)]], device U32s& indices [[buffer(3)]], device F32s& positions [[buffer(4)]], device AI32s& LUT [[buffer(5)]]) { main_sort_triangles_inner(uniforms, dbg, counters, indices, positions, LUT, GlobalInvocationID); return; }