Some hacks for TEV stage handling

2025-12-14 14:06:09 +00:00 · 2022-03-08 00:28:31 -05:00
parent 44a4d667b7
commit 14a82a678a
14 changed files with 330 additions and 99 deletions
--- a/Runtime/Graphics/CCubeMaterial.cpp
+++ b/Runtime/Graphics/CCubeMaterial.cpp
@@ -311,6 +311,8 @@ u32 CCubeMaterial::HandleColorChannels(u32 chanCount, u32 firstChan) {
  }
  // TODO
  aurora::gfx::set_chan_mat_src(GX::COLOR0A0, GX::SRC_REG);
  aurora::gfx::set_chan_mat_src(GX::COLOR1A1, GX::SRC_REG);
  return chanCount;
 }
--- a/Runtime/Graphics/CTevCombiners.cpp
+++ b/Runtime/Graphics/CTevCombiners.cpp
@@ -5,7 +5,12 @@ u32 CTevPass::sNextUniquePass = 0;
 void CTevPass::Execute(ERglTevStage stage) const {
  if (*this == skPassThru) {
-    aurora::gfx::disable_tev_stage(stage);
+    // TODO proper handling of # tev stages
    if (stage > ERglTevStage::Stage0) {
      aurora::gfx::disable_tev_stage(stage);
    } else {
      aurora::gfx::disable_tev_stage(ERglTevStage::Stage1);
    }
  } else {
    aurora::gfx::update_tev_stage(stage, x4_colorPass, x14_alphaPass, x24_colorOp, x38_alphaOp);
    aurora::gfx::set_tev_order(static_cast<GX::TevStageID>(stage), GX::TEXCOORD_NULL, static_cast<GX::TexMapID>(stage),
--- a/Runtime/GuiSys/CGuiPane.cpp
+++ b/Runtime/GuiSys/CGuiPane.cpp
@@ -16,13 +16,8 @@ void CGuiPane::Draw(const CGuiWidgetDrawParms& parms) {
    auto col = xa8_color2;
    col.a() = parms.x0_alphaMod * xa8_color2.a();
 #if 0
    CGraphics::SetTevOp(ERglTevStage::Stage0, CTevCombiners::skPassThru);
    CGraphics::DrawPrimitive(GX::Primitive::TRIANGLESTRIP, xc0_verts.data(), skDefaultNormal, col, xc0_verts.size());
 #else
    aurora::gfx::queue_colored_quad_verts(aurora::gfx::CameraFilterType::Blend, aurora::gfx::ZComp::Always, false, col,
                                          xc0_verts);
 #endif
  }
  CGuiWidget::Draw(parms);
 }
--- a/aurora/include/aurora/common.hpp
+++ b/aurora/include/aurora/common.hpp
@@ -2,8 +2,8 @@
 #include <array>
 #include <compare>
 #include <vector>
 #include <type_traits>
 #include <vector>
 #include "Runtime/RetroTypes.hpp"
--- a/aurora/lib/gfx/common.cpp
+++ b/aurora/lib/gfx/common.cpp
@@ -5,6 +5,7 @@
 #include "movie_player/shader.hpp"
 #include "stream/shader.hpp"
 #include "textured_quad/shader.hpp"
 #include "model/shader.hpp"
 #include <condition_variable>
 #include <deque>
@@ -23,6 +24,7 @@ struct ShaderState {
  colored_quad::State coloredQuad;
  textured_quad::State texturedQuad;
  stream::State stream;
  model::State model;
 };
 struct ShaderDrawCommand {
  ShaderType type;
@@ -31,6 +33,7 @@ struct ShaderDrawCommand {
    colored_quad::DrawData coloredQuad;
    textured_quad::DrawData texturedQuad;
    stream::DrawData stream;
    model::DrawData model;
  };
 };
 struct PipelineCreateCommand {
@@ -40,6 +43,7 @@ struct PipelineCreateCommand {
    colored_quad::PipelineConfig coloredQuad;
    textured_quad::PipelineConfig texturedQuad;
    stream::PipelineConfig stream;
    model::PipelineConfig model;
  };
 };
 enum class CommandType {
@@ -80,9 +84,11 @@ std::atomic_uint32_t createdPipelines;
 static ByteBuffer g_verts;
 static ByteBuffer g_uniforms;
 static ByteBuffer g_indices;
 static ByteBuffer g_storage;
 wgpu::Buffer g_vertexBuffer;
 wgpu::Buffer g_uniformBuffer;
 wgpu::Buffer g_indexBuffer;
 wgpu::Buffer g_storageBuffer;
 static ShaderState g_state;
 static PipelineRef g_currentPipeline;
@@ -207,6 +213,16 @@ PipelineRef pipeline_ref(stream::PipelineConfig config) {
                       [=]() { return create_pipeline(g_state.stream, config); });
 }
 template <>
 void push_draw_command(model::DrawData data) {
  push_draw_command({.type = ShaderType::Model, .model = data});
 }
 template <>
 PipelineRef pipeline_ref(model::PipelineConfig config) {
  return find_pipeline({.type = ShaderType::Model, .model = config},
                       [=]() { return create_pipeline(g_state.model, config); });
 }
 static void pipeline_worker() {
  bool hasMore = false;
  while (true) {
@@ -242,34 +258,43 @@ void initialize() {
  g_pipelineThread = std::thread(pipeline_worker);
  {
-    const auto uniformDescriptor = wgpu::BufferDescriptor{
+    const wgpu::BufferDescriptor descriptor{
        .label = "Shared Uniform Buffer",
        .usage = wgpu::BufferUsage::Uniform | wgpu::BufferUsage::CopyDst,
        .size = 134217728, // 128mb
    };
-    g_uniformBuffer = g_device.CreateBuffer(&uniformDescriptor);
+    g_uniformBuffer = g_device.CreateBuffer(&descriptor);
  }
  {
-    const auto vertexDescriptor = wgpu::BufferDescriptor{
+    const wgpu::BufferDescriptor descriptor{
        .label = "Shared Vertex Buffer",
        .usage = wgpu::BufferUsage::Vertex | wgpu::BufferUsage::CopyDst,
        .size = 16777216, // 16mb
    };
-    g_vertexBuffer = g_device.CreateBuffer(&vertexDescriptor);
+    g_vertexBuffer = g_device.CreateBuffer(&descriptor);
  }
  {
-    const auto vertexDescriptor = wgpu::BufferDescriptor{
+    const wgpu::BufferDescriptor descriptor{
        .label = "Shared Index Buffer",
-        .usage = wgpu::BufferUsage::Vertex | wgpu::BufferUsage::CopyDst,
+        .usage = wgpu::BufferUsage::Index | wgpu::BufferUsage::CopyDst,
        .size = 4194304, // 4mb
    };
-    g_indexBuffer = g_device.CreateBuffer(&vertexDescriptor);
+    g_indexBuffer = g_device.CreateBuffer(&descriptor);
  }
  {
    const wgpu::BufferDescriptor descriptor{
        .label = "Shared Storage Buffer",
        .usage = wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopyDst,
        .size = 134217728, // 128mb
    };
    g_storageBuffer = g_device.CreateBuffer(&descriptor);
  }
  g_state.moviePlayer = movie_player::construct_state();
  g_state.coloredQuad = colored_quad::construct_state();
  g_state.texturedQuad = textured_quad::construct_state();
  g_state.stream = stream::construct_state();
  g_state.model = model::construct_state();
 }
 void shutdown() {
@@ -285,6 +310,7 @@ void shutdown() {
  g_vertexBuffer = {};
  g_uniformBuffer = {};
  g_indexBuffer = {};
  g_storageBuffer = {};
  g_state = {};
 }
@@ -303,6 +329,10 @@ void render(const wgpu::RenderPassEncoder& pass) {
      g_queue.WriteBuffer(g_indexBuffer, 0, g_indices.data(), g_indices.size());
      g_indices.clear();
    }
    if (g_storage.size() > 0) {
      g_queue.WriteBuffer(g_storageBuffer, 0, g_storage.data(), g_storage.size());
      g_storage.clear();
    }
  }
  g_currentPipeline = UINT64_MAX;
@@ -336,6 +366,9 @@ void render(const wgpu::RenderPassEncoder& pass) {
      case ShaderType::Stream:
        stream::render(g_state.stream, draw.stream, pass);
        break;
      case ShaderType::Model:
        model::render(g_state.model, draw.model, pass);
        break;
      }
    } break;
    }
@@ -363,9 +396,15 @@ static inline Range push(ByteBuffer& target, const uint8_t* data, size_t length,
    padding = alignment - length % alignment;
  }
  auto begin = target.size();
-  target.append(data, length);
+  if (length == 0) {
-  if (padding > 0) {
+    // TODO shared zero buf?
-    target.append_zeroes(padding);
+    length = alignment;
    target.append_zeroes(alignment);
  } else {
    target.append(data, length);
    if (padding > 0) {
      target.append_zeroes(padding);
    }
  }
  return {begin, begin + length};
 }
@@ -376,6 +415,11 @@ Range push_uniform(const uint8_t* data, size_t length) {
  g_device.GetLimits(&limits);
  return push(g_uniforms, data, length, limits.limits.minUniformBufferOffsetAlignment);
 }
 Range push_storage(const uint8_t* data, size_t length) {
  wgpu::SupportedLimits limits;
  g_device.GetLimits(&limits);
  return push(g_storage, data, length, limits.limits.minStorageBufferOffsetAlignment);
 }
 BindGroupRef bind_group_ref(const wgpu::BindGroupDescriptor& descriptor) {
  const auto id =
--- a/aurora/lib/gfx/common.hpp
+++ b/aurora/lib/gfx/common.hpp
@@ -147,6 +147,7 @@ namespace aurora::gfx {
 extern wgpu::Buffer g_vertexBuffer;
 extern wgpu::Buffer g_uniformBuffer;
 extern wgpu::Buffer g_indexBuffer;
 extern wgpu::Buffer g_storageBuffer;
 struct TextureRef {
  wgpu::Texture texture;
@@ -178,6 +179,7 @@ enum class ShaderType {
  TexturedQuad,
  MoviePlayer,
  Stream,
  Model,
 };
 void initialize();
@@ -200,6 +202,11 @@ template <typename T>
 static inline Range push_uniform(const T& data) {
  return push_uniform(reinterpret_cast<const uint8_t*>(&data), sizeof(T));
 }
 Range push_storage(const uint8_t* data, size_t length);
 template <typename T>
 static inline Range push_storage(const T& data) {
  return push_storage(reinterpret_cast<const uint8_t*>(&data), sizeof(T));
 }
 template <typename State>
 const State& get_state();
--- a/aurora/lib/gfx/gx.cpp
+++ b/aurora/lib/gfx/gx.cpp
@@ -295,7 +295,7 @@ wgpu::RenderPipeline build_pipeline(const PipelineConfig& config, const ShaderIn
      .targetCount = colorTargets.size(),
      .targets = colorTargets.data(),
  };
-  const auto layouts = build_bind_group_layouts(info);
+  const auto layouts = build_bind_group_layouts(info, config.shaderConfig);
  const std::array bindGroupLayouts{
      layouts.uniformLayout,
      layouts.samplerLayout,
@@ -328,7 +328,7 @@ wgpu::RenderPipeline build_pipeline(const PipelineConfig& config, const ShaderIn
  return g_device.CreateRenderPipeline(&descriptor);
 }
-ShaderInfo populate_pipeline_config(PipelineConfig& config, GX::Primitive primitive) noexcept {
+ShaderInfo populate_pipeline_config(PipelineConfig& config, GX::Primitive primitive, const BindGroupRanges& ranges) noexcept {
  for (size_t idx = 0; const auto& item : g_tevStages) {
    // Copy until disabled TEV stage (indicating end)
    if (!item) {
@@ -357,7 +357,7 @@ ShaderInfo populate_pipeline_config(PipelineConfig& config, GX::Primitive primit
  {
    std::lock_guard lk{g_pipelineMutex};
    auto [_, info] = build_shader(config.shaderConfig);
-    info.bindGroups = build_bind_groups(info); // TODO this is hack
+    info.bindGroups = build_bind_groups(info, config.shaderConfig, ranges); // TODO this is hack
    return info;
  }
 }
@@ -398,15 +398,42 @@ Range build_uniform(const ShaderInfo& info) noexcept {
 static std::unordered_map<u32, wgpu::BindGroupLayout> sUniformBindGroupLayouts;
 static std::unordered_map<u32, std::pair<wgpu::BindGroupLayout, wgpu::BindGroupLayout>> sTextureBindGroupLayouts;
-GXBindGroups build_bind_groups(const ShaderInfo& info) noexcept {
+GXBindGroups build_bind_groups(const ShaderInfo& info, const ShaderConfig& config,
-  const auto layouts = build_bind_group_layouts(info);
+                               const BindGroupRanges& ranges) noexcept {
  const auto layouts = build_bind_group_layouts(info, config);
  u32 textureCount = info.sampledTextures.count();
-  const std::array uniformEntries{wgpu::BindGroupEntry{
+  const std::array uniformEntries{
-      .binding = 0,
+      wgpu::BindGroupEntry{
-      .buffer = g_uniformBuffer,
+          .binding = 0,
-      .size = info.uniformSize,
+          .buffer = g_uniformBuffer,
-  }};
+          .size = info.uniformSize,
      },
      // Vertices
      wgpu::BindGroupEntry{
          .binding = 1,
          .buffer = g_storageBuffer,
          .size = ranges.vtxDataRange.second - ranges.vtxDataRange.first,
      },
      // Normals
      wgpu::BindGroupEntry{
          .binding = 2,
          .buffer = g_storageBuffer,
          .size = ranges.nrmDataRange.second - ranges.nrmDataRange.first,
      },
      // UVs
      wgpu::BindGroupEntry{
          .binding = 3,
          .buffer = g_storageBuffer,
          .size = ranges.tcDataRange.second - ranges.tcDataRange.first,
      },
      // Packed UVs
      wgpu::BindGroupEntry{
          .binding = 4,
          .buffer = g_storageBuffer,
          .size = ranges.packedTcDataRange.second - ranges.packedTcDataRange.first,
      },
  };
  std::array<wgpu::BindGroupEntry, maxTextures> samplerEntries;
  std::array<wgpu::BindGroupEntry, maxTextures> textureEntries;
  for (u32 texIdx = 0, i = 0; texIdx < info.sampledTextures.size(); ++texIdx) {
@@ -432,7 +459,7 @@ GXBindGroups build_bind_groups(const ShaderInfo& info) noexcept {
      .uniformBindGroup = bind_group_ref(wgpu::BindGroupDescriptor{
          .label = "GX Uniform Bind Group",
          .layout = layouts.uniformLayout,
-          .entryCount = uniformEntries.size(),
+          .entryCount = static_cast<uint32_t>(config.denormalizedVertexAttributes ? 1 : uniformEntries.size()),
          .entries = uniformEntries.data(),
      }),
      .samplerBindGroup = bind_group_ref(wgpu::BindGroupDescriptor{
@@ -450,28 +477,67 @@ GXBindGroups build_bind_groups(const ShaderInfo& info) noexcept {
  };
 }
-GXBindGroupLayouts build_bind_group_layouts(const ShaderInfo& info) noexcept {
+GXBindGroupLayouts build_bind_group_layouts(const ShaderInfo& info, const ShaderConfig& config) noexcept {
  GXBindGroupLayouts out;
-  if (sUniformBindGroupLayouts.contains(info.uniformSize)) {
+  u32 uniformSizeKey = info.uniformSize + (config.denormalizedVertexAttributes ? 0 : 1);
-    out.uniformLayout = sUniformBindGroupLayouts[info.uniformSize];
+  if (sUniformBindGroupLayouts.contains(uniformSizeKey)) {
    out.uniformLayout = sUniformBindGroupLayouts[uniformSizeKey];
  } else {
-    const std::array uniformLayoutEntries{wgpu::BindGroupLayoutEntry{
+    const std::array uniformLayoutEntries{
-        .binding = 0,
+        wgpu::BindGroupLayoutEntry{
-        .visibility = wgpu::ShaderStage::Vertex | wgpu::ShaderStage::Fragment,
+            .binding = 0,
-        .buffer =
+            .visibility = wgpu::ShaderStage::Vertex | wgpu::ShaderStage::Fragment,
-            wgpu::BufferBindingLayout{
+            .buffer =
-                .type = wgpu::BufferBindingType::Uniform,
+                wgpu::BufferBindingLayout{
-                .hasDynamicOffset = true,
+                    .type = wgpu::BufferBindingType::Uniform,
-                .minBindingSize = info.uniformSize,
+                    .hasDynamicOffset = true,
-            },
+                    .minBindingSize = info.uniformSize,
-    }};
+                },
        },
        wgpu::BindGroupLayoutEntry{
            .binding = 1,
            .visibility = wgpu::ShaderStage::Vertex,
            .buffer =
                {
                    .type = wgpu::BufferBindingType::ReadOnlyStorage,
                    .hasDynamicOffset = true,
                },
        },
        wgpu::BindGroupLayoutEntry{
            .binding = 2,
            .visibility = wgpu::ShaderStage::Vertex,
            .buffer =
                {
                    .type = wgpu::BufferBindingType::ReadOnlyStorage,
                    .hasDynamicOffset = true,
                },
        },
        wgpu::BindGroupLayoutEntry{
            .binding = 3,
            .visibility = wgpu::ShaderStage::Vertex,
            .buffer =
                {
                    .type = wgpu::BufferBindingType::ReadOnlyStorage,
                    .hasDynamicOffset = true,
                },
        },
        wgpu::BindGroupLayoutEntry{
            .binding = 4,
            .visibility = wgpu::ShaderStage::Vertex,
            .buffer =
                {
                    .type = wgpu::BufferBindingType::ReadOnlyStorage,
                    .hasDynamicOffset = true,
                },
        },
    };
    const auto uniformLayoutDescriptor = wgpu::BindGroupLayoutDescriptor{
        .label = "GX Uniform Bind Group Layout",
-        .entryCount = uniformLayoutEntries.size(),
+        .entryCount = static_cast<uint32_t>(config.denormalizedVertexAttributes ? 1 : uniformLayoutEntries.size()),
        .entries = uniformLayoutEntries.data(),
    };
    out.uniformLayout = g_device.CreateBindGroupLayout(&uniformLayoutDescriptor);
-    sUniformBindGroupLayouts.try_emplace(info.uniformSize, out.uniformLayout);
+    sUniformBindGroupLayouts.try_emplace(uniformSizeKey, out.uniformLayout);
  }
  u32 textureCount = info.sampledTextures.count();
--- a/aurora/lib/gfx/gx.hpp
+++ b/aurora/lib/gfx/gx.hpp
@@ -77,8 +77,9 @@ const TextureBind& get_texture(GX::TexMapID id) noexcept;
 struct ShaderConfig {
  std::array<std::optional<STevStage>, maxTevStages> tevStages;
  std::array<GX::ColorSrc, 2> channelMatSrcs;
-  bool alphaDiscard;
+  bool alphaDiscard = false;
-  bool denormalizedVertexAttributes;
+  bool denormalizedVertexAttributes = false;
  bool denormalizedHasNrm = false; // TODO this is a hack
 };
 struct PipelineConfig {
  ShaderConfig shaderConfig;
@@ -111,15 +112,33 @@ struct ShaderInfo {
  bool usesVtxColor : 1 = false;
  bool usesNormal : 1 = false;
 };
-ShaderInfo populate_pipeline_config(PipelineConfig& config, GX::Primitive primitive) noexcept;
+struct BindGroupRanges {
  Range vtxDataRange;
  Range nrmDataRange;
  Range tcDataRange;
  Range packedTcDataRange;
 };
 ShaderInfo populate_pipeline_config(PipelineConfig& config, GX::Primitive primitive,
                                    const BindGroupRanges& ranges) noexcept;
 wgpu::RenderPipeline build_pipeline(const PipelineConfig& config, const ShaderInfo& info,
                                    ArrayRef<wgpu::VertexBufferLayout> vtxBuffers, wgpu::ShaderModule shader,
                                    zstring_view label) noexcept;
 std::pair<wgpu::ShaderModule, ShaderInfo> build_shader(const ShaderConfig& config) noexcept;
 // Range build_vertex_buffer(const GXShaderInfo& info) noexcept;
 Range build_uniform(const ShaderInfo& info) noexcept;
-GXBindGroupLayouts build_bind_group_layouts(const ShaderInfo& info) noexcept;
+GXBindGroupLayouts build_bind_group_layouts(const ShaderInfo& info, const ShaderConfig& config) noexcept;
-GXBindGroups build_bind_groups(const ShaderInfo& info) noexcept;
+GXBindGroups build_bind_groups(const ShaderInfo& info, const ShaderConfig& config,
                               const BindGroupRanges& ranges) noexcept;
 struct DlVert {
  s16 pos;
  s16 norm;
  // colors ignored
  std::array<s16, 7> uvs;
  // pn_mtx_idx ignored
  // tex_mtx_idxs ignored
  s16 _pad;
 };
 } // namespace aurora::gfx::gx
 namespace aurora {
@@ -149,6 +168,7 @@ inline XXH64_hash_t xxh3_hash(const gfx::gx::ShaderConfig& input, XXH64_hash_t s
  XXH3_64bits_update(&state, input.channelMatSrcs.data(), input.channelMatSrcs.size() * sizeof(GX::ColorSrc));
  XXH3_64bits_update(&state, &input.alphaDiscard, sizeof(bool));
  XXH3_64bits_update(&state, &input.denormalizedVertexAttributes, sizeof(bool));
  XXH3_64bits_update(&state, &input.denormalizedHasNrm, sizeof(bool));
  return XXH3_64bits_digest(&state);
 }
-} // namespace aurora
+} // namespace aurora
--- a/aurora/lib/gfx/gx_shader.cpp
+++ b/aurora/lib/gfx/gx_shader.cpp
@@ -315,6 +315,10 @@ std::pair<wgpu::ShaderModule, ShaderInfo> build_shader(const ShaderConfig& confi
  }
  Log.report(logvisor::Info, FMT_STRING("Shader config (hash {:x}):"), hash);
  ShaderInfo info{
      .uniformSize = 64, // MVP MTX
  };
  {
    for (int i = 0; i < config.tevStages.size(); ++i) {
      const auto& stage = config.tevStages[i];
@@ -354,22 +358,49 @@ std::pair<wgpu::ShaderModule, ShaderInfo> build_shader(const ShaderConfig& confi
  }
  std::string uniBufAttrs;
  std::string uniformBindings;
  std::string sampBindings;
  std::string texBindings;
  std::string vtxOutAttrs;
  std::string vtxInAttrs;
  std::string vtxXfrAttrsPre;
  std::string vtxXfrAttrs;
  size_t locIdx = 0;
-  // TODO
+  if (config.denormalizedVertexAttributes) {
-  //  if (config.denormalizedNorm) {
+    vtxInAttrs += "\n    @location(0) in_pos: vec3<f32>";
-  //    vtxOutAttrs += fmt::format(FMT_STRING("\n    @location({}) nrm: vec3<f32>;"), locIdx);
+    vtxOutAttrs += "\n    @builtin(position) pos: vec4<f32>;";
-  //    vtxInAttrs += fmt::format(FMT_STRING("\n    , @location({}) in_nrm: vec3<f32>"), ++locIdx);
+    vtxXfrAttrsPre += "\n    out.pos = ubuf.xf * vec4<f32>(in_pos, 1.0);";
-  //    vtxXfrAttrs += fmt::format(FMT_STRING("\n    out.nrm = in_nrm;"));
+    if (config.denormalizedHasNrm) {
-  //  }
+      vtxOutAttrs += fmt::format(FMT_STRING("\n    @location({}) nrm: vec3<f32>;"), locIdx);
      vtxInAttrs += fmt::format(FMT_STRING("\n    , @location({}) in_nrm: vec3<f32>"), ++locIdx);
      vtxXfrAttrs += fmt::format(FMT_STRING("\n    out.nrm = in_nrm;"));
      info.usesNormal = true;
    }
  } else {
    uniformBindings += R"""(
 struct Vec3Block {
    data: array<vec3<f32>>;
 };
 struct Vec2Block {
    data: array<vec2<f32>>;
 };
@group(0) @binding(1)
 var<storage, read> v_verts: Vec3Block;
@group(0) @binding(2)
 var<storage, read> v_norms: Vec3Block;
@group(0) @binding(3)
 var<storage, read> v_uvs: Vec2Block;
@group(0) @binding(4)
 var<storage, read> v_packed_uvs: Vec2Block;
 )""";
    vtxInAttrs +=
        "\n    @location(0) in_pos_nrm_idx: vec2<i32>"
        "\n    , @location(1) in_uv_0_4_idx: vec4<i32>"
        "\n    , @location(2) in_uv_5_7_idx: vec4<i32>";
    vtxOutAttrs += "\n    @builtin(position) pos: vec4<f32>;";
    vtxXfrAttrsPre += "\n    out.pos = ubuf.xf * vec4<f32>(v_verts.data[in_pos_nrm_idx[0]], 1.0);";
  }
  ShaderInfo info{
      .uniformSize = 64, // MVP MTX
  };
  std::string fragmentFnPre;
  std::string fragmentFn;
  for (size_t idx = 0; const auto& stage : config.tevStages) {
@@ -432,7 +463,7 @@ std::pair<wgpu::ShaderModule, ShaderInfo> build_shader(const ShaderConfig& confi
        }
        fragmentFnPre += fmt::format(FMT_STRING("\n    var rast{} = in.clr; // TODO lighting"), i);
      } else {
-        Log.report(logvisor::Fatal, FMT_STRING("Don't know how to do this yet")); // TODO
+        Log.report(logvisor::Fatal, FMT_STRING("SRC_VTX unsupported with normalized vertex attributes"));
      }
      info.usesVtxColor = true;
    } else {
@@ -465,7 +496,16 @@ std::pair<wgpu::ShaderModule, ShaderInfo> build_shader(const ShaderConfig& confi
      vtxInAttrs += fmt::format(FMT_STRING("\n    , @location({}) in_tex{}_uv: vec2<f32>"), locIdx + 1, i);
      vtxXfrAttrs += fmt::format(FMT_STRING("\n    out.tex{0}_uv = in_tex{0}_uv;"), i);
    } else {
-      Log.report(logvisor::Fatal, FMT_STRING("Don't know how to do this yet")); // TODO
+      vtxOutAttrs += fmt::format(FMT_STRING("\n    @location({}) tex{}_uv: vec2<f32>;"), locIdx, i);
      if (i < 4) {
        if (i == 0) {
          vtxXfrAttrs += fmt::format(FMT_STRING("\n    out.tex{}_uv = v_packed_uvs.data[in_uv_0_4[{}]];"), i, i);
        } else {
          vtxXfrAttrs += fmt::format(FMT_STRING("\n    out.tex{}_uv = v_uvs.data[in_uv_0_4[{}]];"), i, i);
        }
      } else {
        vtxXfrAttrs += fmt::format(FMT_STRING("\n    out.tex{}_uv = v_uvs.data[in_uv_5_7[{}]];"), i, i - 4);
      }
    }
    fragmentFnPre += fmt::format(
        FMT_STRING("\n    var sampled{0} = textureSampleBias(tex{0}, tex{0}_samp, in.tex{0}_uv, ubuf.tex{0}_lod);"), i);
@@ -478,18 +518,15 @@ struct Uniform {{
    xf: mat4x4<f32>;{uniBufAttrs}
 }};
@group(0) @binding(0)
-var<uniform> ubuf: Uniform;{sampBindings}{texBindings}
+var<uniform> ubuf: Uniform;{uniformBindings}{sampBindings}{texBindings}
-struct VertexOutput {{
+struct VertexOutput {{{vtxOutAttrs}
    @builtin(position) pos: vec4<f32>;{vtxOutAttrs}
 }};
@stage(vertex)
-fn vs_main(
+fn vs_main({vtxInAttrs}
    @location(0) in_pos: vec3<f32>{vtxInAttrs}
 ) -> VertexOutput {{
-    var out: VertexOutput;
+    var out: VertexOutput;{vtxXfrAttrsPre}{vtxXfrAttrs}
    out.pos = ubuf.xf * vec4<f32>(in_pos, 1.0);{vtxXfrAttrs}
    return out;
 }}
@@ -501,7 +538,8 @@ fn fs_main(in: VertexOutput) -> @location(0) vec4<f32> {{
 )"""),
                  "uniBufAttrs"_a = uniBufAttrs, "sampBindings"_a = sampBindings, "texBindings"_a = texBindings,
                  "vtxOutAttrs"_a = vtxOutAttrs, "vtxInAttrs"_a = vtxInAttrs, "vtxXfrAttrs"_a = vtxXfrAttrs,
-                  "fragmentFn"_a = fragmentFn, "fragmentFnPre"_a = fragmentFnPre);
+                  "fragmentFn"_a = fragmentFn, "fragmentFnPre"_a = fragmentFnPre, "vtxXfrAttrsPre"_a = vtxXfrAttrsPre,
                  "uniformBindings"_a = uniformBindings);
  Log.report(logvisor::Info, FMT_STRING("Generated shader: {}"), shaderSource);
  wgpu::ShaderModuleWGSLDescriptor wgslDescriptor{};
--- a/aurora/lib/gfx/model/shader.cpp
+++ b/aurora/lib/gfx/model/shader.cpp
@@ -59,16 +59,6 @@ void set_normal_buffer(const std::vector<zeus::CVector3f>& norm) noexcept { nrmD
 void set_tex0_tc_buffer(const std::vector<zeus::CVector2f>& tcs) noexcept { tex0TcData = &tcs; }
 void set_tc_buffer(const std::vector<zeus::CVector2f>& tcs) noexcept { tcData = &tcs; }
 struct DlVert {
  s16 pos;
  s16 norm;
  // colors ignored
  std::array<s16, 7> uvs;
  // pn_mtx_idx ignored
  // tex_mtx_idxs ignored
  s16 _pad;
 };
 enum class VertexFormat : u8 {
  F32F32,
  S16F32,
@@ -77,8 +67,8 @@ enum class VertexFormat : u8 {
 static VtxDescFlags sVtxDescFlags;
 void set_vtx_desc_compressed(u32 vtxDesc) noexcept { sVtxDescFlags = vtxDesc; }
-static inline std::pair<DlVert, size_t> readVert(const u8* data) noexcept {
+static inline std::pair<gx::DlVert, size_t> readVert(const u8* data) noexcept {
-  DlVert out{};
+  gx::DlVert out{};
  size_t offset = 0;
  const auto read8 = [data, &offset](VtxDescAttrType type) -> s8 {
    if (type == VtxDescAttrType::Direct) {
@@ -122,7 +112,7 @@ static inline std::pair<DlVert, size_t> readVert(const u8* data) noexcept {
 void queue_surface(const u8* dlStart, u32 dlSize) noexcept {
  //  Log.report(logvisor::Info, FMT_STRING("DL size {}"), dlSize);
-  std::vector<DlVert> verts;
+  std::vector<gx::DlVert> verts;
  std::vector<u32> indices;
  size_t offset = 0;
@@ -176,5 +166,70 @@ void queue_surface(const u8* dlStart, u32 dlSize) noexcept {
  }
  //  Log.report(logvisor::Info, FMT_STRING("Read {} verts, {} indices"), verts.size(), indices.size());
  const auto vertRange = push_verts(ArrayRef{verts});
  const auto idxRange = push_indices(ArrayRef{indices});
  const auto sVtxRange = push_storage(reinterpret_cast<const uint8_t*>(vtxData->data()), vtxData->size() * 16);
  const auto sNrmRange = push_storage(reinterpret_cast<const uint8_t*>(nrmData->data()), nrmData->size() * 16);
  const auto sTcRange = push_storage(reinterpret_cast<const uint8_t*>(tcData->data()), tcData->size() * 16);
  Range sPackedTcRange;
  if (tcData == tex0TcData) {
    sPackedTcRange = sTcRange;
  } else {
    sPackedTcRange = push_storage(reinterpret_cast<const uint8_t*>(tex0TcData->data()), tex0TcData->size() * 16);
  }
  model::PipelineConfig config{};
  const gx::BindGroupRanges ranges{
      .vtxDataRange = sVtxRange,
      .nrmDataRange = sNrmRange,
      .tcDataRange = sTcRange,
      .packedTcDataRange = sPackedTcRange,
  };
  const auto info = populate_pipeline_config(config, GX::TRIANGLES, ranges);
  const auto pipeline = pipeline_ref(config);
  push_draw_command(model::DrawData{
      .pipeline = pipeline,
      .vertRange = vertRange,
      .idxRange = idxRange,
      .sVtxRange = sVtxRange,
      .sNrmRange = sNrmRange,
      .sTcRange = sTcRange,
      .sPackedTcRange = sPackedTcRange,
      .uniformRange = build_uniform(info),
      .indexCount = static_cast<uint32_t>(indices.size()),
      .bindGroups = info.bindGroups,
  });
 }
 State construct_state() { return {}; }
 wgpu::RenderPipeline create_pipeline(const State& state, [[maybe_unused]] PipelineConfig config) {
  const auto [shader, info] = build_shader(config.shaderConfig);
  const auto attributes = gpu::utils::make_vertex_attributes(
      std::array{wgpu::VertexFormat::Sint16x2, wgpu::VertexFormat::Sint16x4, wgpu::VertexFormat::Sint16x4});
  const std::array vertexBuffers{gpu::utils::make_vertex_buffer_layout(sizeof(gx::DlVert), attributes)};
  return build_pipeline(config, info, vertexBuffers, shader, "Model Pipeline");
 }
 void render(const State& state, const DrawData& data, const wgpu::RenderPassEncoder& pass) {
  if (!bind_pipeline(data.pipeline, pass)) {
    return;
  }
  const std::array offsets{
      data.uniformRange.first, data.sVtxRange.first,      data.sNrmRange.first,
      data.sTcRange.first,     data.sPackedTcRange.first,
  };
  pass.SetBindGroup(0, find_bind_group(data.bindGroups.uniformBindGroup), offsets.size(), offsets.data());
  if (data.bindGroups.samplerBindGroup && data.bindGroups.textureBindGroup) {
    pass.SetBindGroup(1, find_bind_group(data.bindGroups.samplerBindGroup));
    pass.SetBindGroup(2, find_bind_group(data.bindGroups.textureBindGroup));
  }
  pass.SetVertexBuffer(0, g_vertexBuffer, data.vertRange.first, data.vertRange.second);
  pass.SetIndexBuffer(g_indexBuffer, wgpu::IndexFormat::Uint32, data.idxRange.first, data.idxRange.second);
  pass.DrawIndexed(data.indexCount);
 }
 } // namespace aurora::gfx::model
--- a/aurora/lib/gfx/model/shader.hpp
+++ b/aurora/lib/gfx/model/shader.hpp
@@ -9,16 +9,18 @@ namespace aurora::gfx::model {
 struct DrawData {
  PipelineRef pipeline;
  Range vertRange;
  Range idxRange;
  Range sVtxRange;
  Range sNrmRange;
  Range sTcRange;
  Range sPackedTcRange;
  Range uniformRange;
-  uint32_t vertexCount;
+  uint32_t indexCount;
-  uint32_t uniformSize;
+  gx::GXBindGroups bindGroups;
  BindGroupRef samplerBindGroup;
  BindGroupRef textureBindGroup;
 };
 struct PipelineConfig : gx::PipelineConfig {
-  ShaderRef shader;
+
  uint32_t uniformSize;
 };
 struct CachedBindGroup {
@@ -28,10 +30,6 @@ struct CachedBindGroup {
  : layout(std::move(layout)), bindGroup(std::move(group)) {}
 };
 struct State {
  wgpu::BindGroupLayout samplerLayout;
  wgpu::BindGroupLayout textureLayout;
  mutable std::unordered_map<uint32_t, CachedBindGroup> uniform;
  mutable std::unordered_map<uint64_t, wgpu::Sampler> sampler;
 };
 State construct_state();
--- a/aurora/lib/gfx/stream.cpp
+++ b/aurora/lib/gfx/stream.cpp
@@ -59,7 +59,8 @@ void stream_end() noexcept {
  stream::PipelineConfig config{};
  config.shaderConfig.denormalizedVertexAttributes = true;
-  const auto info = populate_pipeline_config(config, sStreamState->primitive);
+  config.shaderConfig.denormalizedHasNrm = sStreamState->flags.IsSet(metaforce::EStreamFlagBits::fHasNormal);
  const auto info = populate_pipeline_config(config, sStreamState->primitive, {});
  const auto pipeline = pipeline_ref(config);
  push_draw_command(stream::DrawData{
--- a/extern/xxhash/xxhash_impl.c
+++ b/extern/xxhash/xxhash_impl.c
@@ -1,4 +1,4 @@
-#if defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_X64)
+//#if defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_X64)
-#include "xxh_x86dispatch.c"
+//#include "xxh_x86dispatch.c"
-#endif
+//#endif
 #include "xxhash.c"
--- a/extern/xxhash/xxhash_impl.h
+++ b/extern/xxhash/xxhash_impl.h
@@ -1,5 +1,5 @@
-#if defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_X64)
+//#if defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_X64)
-#include "xxh_x86dispatch.h"
+//#include "xxh_x86dispatch.h"
-#else
+//#else
 #include "xxhash.h"
-#endif
+//#endif