dawn-cmake/test/bug/tint/1113.wgsl.expected.msl

#include <metal_stdlib>

using namespace metal;

template<typename T, int N, int M>
inline auto operator*(matrix<T, N, M> lhs, packed_vec<T, N> rhs) {
  return lhs * vec<T, N>(rhs);
}

template<typename T, int N, int M>
inline auto operator*(packed_vec<T, M> lhs, matrix<T, N, M> rhs) {
  return vec<T, M>(lhs) * rhs;
}

struct Uniforms {
  /* 0x0000 */ uint numTriangles;
  /* 0x0004 */ uint gridSize;
  /* 0x0008 */ uint pad1;
  /* 0x000c */ uint pad2;
  /* 0x0010 */ packed_float3 bbMin;
  /* 0x001c */ int8_t tint_pad[4];
  /* 0x0020 */ packed_float3 bbMax;
  /* 0x002c */ int8_t tint_pad_1[4];
};
struct Dbg {
  /* 0x0000 */ atomic_uint offsetCounter;
  /* 0x0004 */ uint pad0;
  /* 0x0008 */ uint pad1;
  /* 0x000c */ uint pad2;
  /* 0x0010 */ uint value0;
  /* 0x0014 */ uint value1;
  /* 0x0018 */ uint value2;
  /* 0x001c */ uint value3;
  /* 0x0020 */ float value_f32_0;
  /* 0x0024 */ float value_f32_1;
  /* 0x0028 */ float value_f32_2;
  /* 0x002c */ float value_f32_3;
};
struct F32s {
  /* 0x0000 */ float values[1];
};
struct U32s {
  /* 0x0000 */ uint values[1];
};
struct I32s {
  int values[1];
};
struct AU32s {
  /* 0x0000 */ atomic_uint values[1];
};
struct AI32s {
  /* 0x0000 */ atomic_int values[1];
};

float3 toVoxelPos(constant Uniforms& uniforms, float3 position) {
  float3 bbMin = float3(uniforms.bbMin[0], uniforms.bbMin[1], uniforms.bbMin[2]);
  float3 bbMax = float3(uniforms.bbMax[0], uniforms.bbMax[1], uniforms.bbMax[2]);
  float3 bbSize = (bbMax - bbMin);
  float cubeSize = fmax(fmax(bbSize[0], bbSize[1]), bbSize[2]);
  float gridSize = float(uniforms.gridSize);
  float gx = ((gridSize * (position[0] - uniforms.bbMin[0])) / cubeSize);
  float gy = ((gridSize * (position[1] - uniforms.bbMin[1])) / cubeSize);
  float gz = ((gridSize * (position[2] - uniforms.bbMin[2])) / cubeSize);
  return float3(gx, gy, gz);
}

uint toIndex1D(uint gridSize, float3 voxelPos) {
  uint3 icoord = uint3(voxelPos);
  return ((icoord[0] + (gridSize * icoord[1])) + ((gridSize * gridSize) * icoord[2]));
}

uint3 toIndex3D(uint gridSize, uint index) {
  uint z_1 = (index / (gridSize * gridSize));
  uint y_1 = ((index - ((gridSize * gridSize) * z_1)) / gridSize);
  uint x_1 = (index % gridSize);
  return uint3(x_1, y_1, z_1);
}

float3 loadPosition(device F32s& positions, uint vertexIndex) {
  float3 position = float3(positions.values[((3u * vertexIndex) + 0u)], positions.values[((3u * vertexIndex) + 1u)], positions.values[((3u * vertexIndex) + 2u)]);
  return position;
}

void doIgnore(constant Uniforms& uniforms, device Dbg& dbg, device AU32s& counters, device U32s& indices, device F32s& positions, device AI32s& LUT) {
  uint g42 = uniforms.numTriangles;
  uint kj6 = dbg.value1;
  uint b53 = atomic_load_explicit(&(counters.values[0]), memory_order_relaxed);
  uint rwg = indices.values[0];
  float rb5 = positions.values[0];
  int g55 = atomic_load_explicit(&(LUT.values[0]), memory_order_relaxed);
}

void main_count_inner(constant Uniforms& uniforms, device Dbg& dbg, device AU32s& counters, device U32s& indices, device F32s& positions, device AI32s& LUT, uint3 GlobalInvocationID) {
  uint triangleIndex = GlobalInvocationID[0];
  if ((triangleIndex >= uniforms.numTriangles)) {
    return;
  }
  doIgnore(uniforms, dbg, counters, indices, positions, LUT);
  uint i0 = indices.values[((3u * triangleIndex) + 0u)];
  uint i1 = indices.values[((3u * triangleIndex) + 1u)];
  uint i2 = indices.values[((3u * triangleIndex) + 2u)];
  float3 p0 = loadPosition(positions, i0);
  float3 p1 = loadPosition(positions, i1);
  float3 p2 = loadPosition(positions, i2);
  float3 center = (((p0 + p1) + p2) / 3.0f);
  float3 voxelPos = toVoxelPos(uniforms, center);
  uint voxelIndex = toIndex1D(uniforms.gridSize, voxelPos);
  uint acefg = atomic_fetch_add_explicit(&(counters.values[voxelIndex]), 1u, memory_order_relaxed);
  if ((triangleIndex == 0u)) {
    dbg.value0 = uniforms.gridSize;
    dbg.value_f32_0 = center[0];
    dbg.value_f32_1 = center[1];
    dbg.value_f32_2 = center[2];
  }
}

kernel void main_count(uint3 GlobalInvocationID [[thread_position_in_grid]], constant Uniforms& uniforms [[buffer(0)]], device Dbg& dbg [[buffer(1)]], device AU32s& counters [[buffer(2)]], device U32s& indices [[buffer(3)]], device F32s& positions [[buffer(4)]], device AI32s& LUT [[buffer(5)]]) {
  main_count_inner(uniforms, dbg, counters, indices, positions, LUT, GlobalInvocationID);
  return;
}

void main_create_lut_inner(constant Uniforms& uniforms, device Dbg& dbg, device AU32s& counters, device U32s& indices, device F32s& positions, device AI32s& LUT, uint3 GlobalInvocationID) {
  uint voxelIndex = GlobalInvocationID[0];
  doIgnore(uniforms, dbg, counters, indices, positions, LUT);
  uint maxVoxels = ((uniforms.gridSize * uniforms.gridSize) * uniforms.gridSize);
  if ((voxelIndex >= maxVoxels)) {
    return;
  }
  uint numTriangles = atomic_load_explicit(&(counters.values[voxelIndex]), memory_order_relaxed);
  int offset = -1;
  if ((numTriangles > 0u)) {
    offset = int(atomic_fetch_add_explicit(&(dbg.offsetCounter), numTriangles, memory_order_relaxed));
  }
  atomic_store_explicit(&(LUT.values[voxelIndex]), offset, memory_order_relaxed);
}

kernel void main_create_lut(uint3 GlobalInvocationID [[thread_position_in_grid]], constant Uniforms& uniforms [[buffer(0)]], device Dbg& dbg [[buffer(1)]], device AU32s& counters [[buffer(2)]], device U32s& indices [[buffer(3)]], device F32s& positions [[buffer(4)]], device AI32s& LUT [[buffer(5)]]) {
  main_create_lut_inner(uniforms, dbg, counters, indices, positions, LUT, GlobalInvocationID);
  return;
}

void main_sort_triangles_inner(constant Uniforms& uniforms, device Dbg& dbg, device AU32s& counters, device U32s& indices, device F32s& positions, device AI32s& LUT, uint3 GlobalInvocationID) {
  uint triangleIndex = GlobalInvocationID[0];
  doIgnore(uniforms, dbg, counters, indices, positions, LUT);
  if ((triangleIndex >= uniforms.numTriangles)) {
    return;
  }
  uint i0 = indices.values[((3u * triangleIndex) + 0u)];
  uint i1 = indices.values[((3u * triangleIndex) + 1u)];
  uint i2 = indices.values[((3u * triangleIndex) + 2u)];
  float3 p0 = loadPosition(positions, i0);
  float3 p1 = loadPosition(positions, i1);
  float3 p2 = loadPosition(positions, i2);
  float3 center = (((p0 + p1) + p2) / 3.0f);
  float3 voxelPos = toVoxelPos(uniforms, center);
  uint voxelIndex = toIndex1D(uniforms.gridSize, voxelPos);
  int triangleOffset = atomic_fetch_add_explicit(&(LUT.values[voxelIndex]), 1, memory_order_relaxed);
}

kernel void main_sort_triangles(uint3 GlobalInvocationID [[thread_position_in_grid]], constant Uniforms& uniforms [[buffer(0)]], device Dbg& dbg [[buffer(1)]], device AU32s& counters [[buffer(2)]], device U32s& indices [[buffer(3)]], device F32s& positions [[buffer(4)]], device AI32s& LUT [[buffer(5)]]) {
  main_sort_triangles_inner(uniforms, dbg, counters, indices, positions, LUT, GlobalInvocationID);
  return;
}