Split out display_list/shader_info

2025-10-15 06:35:09 +00:00 · 2025-04-18 21:52:38 -06:00 · 2025-04-18 21:52:38 -06:00 · c4d91f18a1
commit c4d91f18a1
parent 357ecba0ae
16 changed files with 716 additions and 673 deletions
--- a/cmake/aurora_gx.cmake
+++ b/cmake/aurora_gx.cmake
@ -4,6 +4,8 @@ add_library(aurora_gx STATIC
        lib/gfx/gx.cpp
        lib/gfx/gx_shader.cpp
        lib/gfx/texture_convert.cpp
+        lib/gfx/display_list.cpp
+        lib/gfx/shader_info.cpp
        lib/gfx/model/shader.cpp
        lib/dolphin/gx/GXBump.cpp
        lib/dolphin/gx/GXCull.cpp
--- a/include/dolphin/gx/GXEnum.h
+++ b/include/dolphin/gx/GXEnum.h
@ -751,6 +751,13 @@ typedef enum {
  GX_MAX_TLUTFMT,
 } GXTlutFmt;

+typedef enum _GXTexCacheSize {
+  GX_TEXCACHE_32K,
+  GX_TEXCACHE_128K,
+  GX_TEXCACHE_512K,
+  GX_TEXCACHE_NONE
+} GXTexCacheSize;
+
 #ifdef __cplusplus
 }
 #endif
--- a/include/dolphin/gx/GXStruct.h
+++ b/include/dolphin/gx/GXStruct.h
@ -93,6 +93,14 @@ typedef struct {
  s16 a;
 } GXColorS10;

+typedef struct _GXTexRegion {
+  u32 dummy[4];
+} GXTexRegion;
+
+typedef struct _GXTlutRegion {
+  u32 dummy[4];
+} GXTlutRegion;
+
 #ifdef __cplusplus
 }
 #endif
--- a/include/dolphin/gx/GXTexture.h
+++ b/include/dolphin/gx/GXTexture.h
@ -8,6 +8,8 @@
 extern "C" {
 #endif

+typedef GXTexRegion* (*GXTexRegionCallback)(const GXTexObj* obj, GXTexMapID id);
+
 void GXInitTexObj(GXTexObj* obj, const void* data, u16 width, u16 height, u32 format, GXTexWrapMode wrapS,
                  GXTexWrapMode wrapT, GXBool mipmap);
 void GXInitTexObjCI(GXTexObj* obj, const void* data, u16 width, u16 height, GXCITexFmt format, GXTexWrapMode wrapS,
@ -21,6 +23,11 @@ void GXInvalidateTexAll();
 void GXInitTexObjWrapMode(GXTexObj* obj, GXTexWrapMode s, GXTexWrapMode t);
 void GXInitTlutObj(GXTlutObj* obj, const void* data, GXTlutFmt format, u16 entries);
 void GXLoadTlut(const GXTlutObj* obj, GXTlut idx);
+void GXSetTexCoordScaleManually(GXTexCoordID coord, GXBool enable, u16 ss, u16 ts);
+void GXInitTexCacheRegion(GXTexRegion* region, GXBool is_32b_mipmap, u32 tmem_even, GXTexCacheSize size_even,
+                          u32 tmem_odd, GXTexCacheSize size_odd);
+GXTexRegionCallback GXSetTexRegionCallback(GXTexRegionCallback callback);
+void GXInvalidateTexRegion(const GXTexRegion* region);

 #ifdef __cplusplus
 }
--- a/lib/dolphin/gx/GXTev.cpp
+++ b/lib/dolphin/gx/GXTev.cpp
@ -77,7 +77,9 @@ void GXSetTevOrder(GXTevStageID id, GXTexCoordID tcid, GXTexMapID tmid, GXChanne
  update_gx_state(stage.channelId, cid);
 }

-// TODO GXSetZTexture
+void GXSetZTexture(GXZTexOp op, GXTexFmt fmt, u32 bias) {
+  // TODO
+}

 void GXSetNumTevStages(u8 num) { update_gx_state(g_gxState.numTevStages, num); }

--- a/lib/dolphin/gx/GXTexture.cpp
+++ b/lib/dolphin/gx/GXTexture.cpp
@ -233,7 +233,9 @@ void GXInvalidateTexAll() {
 // TODO GXSetTexRegionCallback
 // TODO GXSetTlutRegionCallback
 // TODO GXLoadTexObjPreLoaded
-// TODO GXSetTexCoordScaleManually
+void GXSetTexCoordScaleManually(GXTexCoordID coord, GXBool enable, u16 ss, u16 ts) {
+  // TODO
+}
 // TODO GXSetTexCoordCylWrap
 // TODO GXSetTexCoordBias
 }
--- a/lib/dolphin/gx/GXVert.cpp
+++ b/lib/dolphin/gx/GXVert.cpp
@ -3,6 +3,7 @@
 #include "aurora/math.hpp"
 #include "../../gfx/model/shader.hpp"
 #include "../../gfx/gx_fmt.hpp"
+#include "../../gfx/shader_info.hpp"

 #include <cstring>
 #include <optional>
--- a/lib/gfx/display_list.cpp
+++ b/lib/gfx/display_list.cpp
@ -0,0 +1,288 @@
+#include "display_list.hpp"
+
+#include "gx.hpp"
+#include "gx_fmt.hpp"
+
+namespace aurora::gfx::gx {
+static Module Log("aurora::gfx::model");
+
+struct DisplayListCache {
+  ByteBuffer vtxBuf;
+  ByteBuffer idxBuf;
+  GXVtxFmt fmt;
+
+  DisplayListCache(ByteBuffer&& vtxBuf, ByteBuffer&& idxBuf, GXVtxFmt fmt)
+  : vtxBuf(std::move(vtxBuf)), idxBuf(std::move(idxBuf)), fmt(fmt) {}
+};
+
+static absl::flat_hash_map<HashType, DisplayListCache> sCachedDisplayLists;
+
+static u32 prepare_vtx_buffer(ByteBuffer& buf, GXVtxFmt vtxfmt, const u8* ptr, u16 vtxCount) {
+  using gx::g_gxState;
+  struct {
+    u8 count;
+    GXCompType type;
+  } attrArrays[GX_VA_MAX_ATTR] = {};
+  u32 vtxSize = 0;
+  u32 outVtxSize = 0;
+
+  // Calculate attribute offsets and vertex size
+  for (int attr = 0; attr < GX_VA_MAX_ATTR; attr++) {
+    const auto& attrFmt = g_gxState.vtxFmts[vtxfmt].attrs[attr];
+    switch (g_gxState.vtxDesc[attr]) {
+      DEFAULT_FATAL("unhandled attribute type {}", g_gxState.vtxDesc[attr]);
+    case GX_NONE:
+      break;
+    case GX_DIRECT:
+#define COMBINE(val1, val2, val3) (((val1) << 16) | ((val2) << 8) | (val3))
+      switch (COMBINE(attr, attrFmt.cnt, attrFmt.type)) {
+        DEFAULT_FATAL("not handled: attr {}, cnt {}, type {}", attr, attrFmt.cnt, attrFmt.type);
+      case COMBINE(GX_VA_POS, GX_POS_XYZ, GX_F32):
+      case COMBINE(GX_VA_NRM, GX_NRM_XYZ, GX_F32):
+        attrArrays[attr].count = 3;
+        attrArrays[attr].type = GX_F32;
+        vtxSize += 12;
+        outVtxSize += 12;
+        break;
+      case COMBINE(GX_VA_POS, GX_POS_XYZ, GX_S16):
+      case COMBINE(GX_VA_NRM, GX_NRM_XYZ, GX_S16):
+        attrArrays[attr].count = 3;
+        attrArrays[attr].type = GX_S16;
+        vtxSize += 6;
+        outVtxSize += 12;
+        break;
+      case COMBINE(GX_VA_TEX0, GX_TEX_ST, GX_F32):
+      case COMBINE(GX_VA_TEX1, GX_TEX_ST, GX_F32):
+      case COMBINE(GX_VA_TEX2, GX_TEX_ST, GX_F32):
+      case COMBINE(GX_VA_TEX3, GX_TEX_ST, GX_F32):
+      case COMBINE(GX_VA_TEX4, GX_TEX_ST, GX_F32):
+      case COMBINE(GX_VA_TEX5, GX_TEX_ST, GX_F32):
+      case COMBINE(GX_VA_TEX6, GX_TEX_ST, GX_F32):
+      case COMBINE(GX_VA_TEX7, GX_TEX_ST, GX_F32):
+        attrArrays[attr].count = 2;
+        attrArrays[attr].type = GX_F32;
+        vtxSize += 8;
+        outVtxSize += 8;
+        break;
+      case COMBINE(GX_VA_TEX0, GX_TEX_ST, GX_S16):
+      case COMBINE(GX_VA_TEX1, GX_TEX_ST, GX_S16):
+      case COMBINE(GX_VA_TEX2, GX_TEX_ST, GX_S16):
+      case COMBINE(GX_VA_TEX3, GX_TEX_ST, GX_S16):
+      case COMBINE(GX_VA_TEX4, GX_TEX_ST, GX_S16):
+      case COMBINE(GX_VA_TEX5, GX_TEX_ST, GX_S16):
+      case COMBINE(GX_VA_TEX6, GX_TEX_ST, GX_S16):
+      case COMBINE(GX_VA_TEX7, GX_TEX_ST, GX_S16):
+        attrArrays[attr].count = 2;
+        attrArrays[attr].type = GX_S16;
+        vtxSize += 4;
+        outVtxSize += 8;
+        break;
+      case COMBINE(GX_VA_CLR0, GX_CLR_RGBA, GX_RGBA8):
+      case COMBINE(GX_VA_CLR1, GX_CLR_RGBA, GX_RGBA8):
+        attrArrays[attr].count = 4;
+        attrArrays[attr].type = GX_RGBA8;
+        vtxSize += 4;
+        outVtxSize += 16;
+        break;
+      }
+#undef COMBINE
+      break;
+    case GX_INDEX8:
+      ++vtxSize;
+      outVtxSize += 2;
+      break;
+    case GX_INDEX16:
+      vtxSize += 2;
+      outVtxSize += 2;
+      break;
+    }
+  }
+  // Align to 4
+  int rem = outVtxSize % 4;
+  int padding = 0;
+  if (rem != 0) {
+    padding = 4 - rem;
+    outVtxSize += padding;
+  }
+
+  // Build vertex buffer
+  buf.reserve_extra(vtxCount * outVtxSize);
+  std::array<f32, 4> out{};
+  for (u32 v = 0; v < vtxCount; ++v) {
+    for (int attr = 0; attr < GX_VA_MAX_ATTR; attr++) {
+      if (g_gxState.vtxDesc[attr] == GX_INDEX8) {
+        buf.append(static_cast<u16>(*ptr));
+        ++ptr;
+      } else if (g_gxState.vtxDesc[attr] == GX_INDEX16) {
+        buf.append(bswap(*reinterpret_cast<const u16*>(ptr)));
+        ptr += 2;
+      }
+      if (g_gxState.vtxDesc[attr] != GX_DIRECT) {
+        continue;
+      }
+      const auto& attrFmt = g_gxState.vtxFmts[vtxfmt].attrs[attr];
+      u8 count = attrArrays[attr].count;
+      switch (attrArrays[attr].type) {
+      case GX_U8:
+        for (int i = 0; i < count; ++i) {
+          const auto value = reinterpret_cast<const u8*>(ptr)[i];
+          out[i] = static_cast<f32>(value) / static_cast<f32>(1 << attrFmt.frac);
+        }
+        buf.append(out.data(), sizeof(f32) * count);
+        ptr += count;
+        break;
+      case GX_S8:
+        for (int i = 0; i < count; ++i) {
+          const auto value = reinterpret_cast<const s8*>(ptr)[i];
+          out[i] = static_cast<f32>(value) / static_cast<f32>(1 << attrFmt.frac);
+        }
+        buf.append(out.data(), sizeof(f32) * count);
+        ptr += count;
+        break;
+      case GX_U16:
+        for (int i = 0; i < count; ++i) {
+          const auto value = bswap(reinterpret_cast<const u16*>(ptr)[i]);
+          out[i] = static_cast<f32>(value) / static_cast<f32>(1 << attrFmt.frac);
+        }
+        buf.append(out.data(), sizeof(f32) * count);
+        ptr += count * sizeof(u16);
+        break;
+      case GX_S16:
+        for (int i = 0; i < count; ++i) {
+          const auto value = bswap(reinterpret_cast<const s16*>(ptr)[i]);
+          out[i] = static_cast<f32>(value) / static_cast<f32>(1 << attrFmt.frac);
+        }
+        buf.append(out.data(), sizeof(f32) * count);
+        ptr += count * sizeof(s16);
+        break;
+      case GX_F32:
+        for (int i = 0; i < count; ++i) {
+          out[i] = bswap(reinterpret_cast<const f32*>(ptr)[i]);
+        }
+        buf.append(out.data(), sizeof(f32) * count);
+        ptr += count * sizeof(f32);
+        break;
+      case GX_RGBA8:
+        out[0] = static_cast<f32>(ptr[0]) / 255.f;
+        out[1] = static_cast<f32>(ptr[1]) / 255.f;
+        out[2] = static_cast<f32>(ptr[2]) / 255.f;
+        out[3] = static_cast<f32>(ptr[3]) / 255.f;
+        buf.append(out.data(), sizeof(f32) * 4);
+        ptr += sizeof(u32);
+        break;
+      }
+    }
+    if (padding > 0) {
+      buf.append_zeroes(padding);
+    }
+  }
+
+  return vtxSize;
+}
+
+static u16 prepare_idx_buffer(ByteBuffer& buf, GXPrimitive prim, u16 vtxStart, u16 vtxCount) {
+  u16 numIndices = 0;
+  if (prim == GX_TRIANGLES) {
+    buf.reserve_extra(vtxCount * sizeof(u16));
+    for (u16 v = 0; v < vtxCount; ++v) {
+      const u16 idx = vtxStart + v;
+      buf.append(idx);
+      ++numIndices;
+    }
+  } else if (prim == GX_TRIANGLEFAN) {
+    buf.reserve_extra(((u32(vtxCount) - 3) * 3 + 3) * sizeof(u16));
+    for (u16 v = 0; v < vtxCount; ++v) {
+      const u16 idx = vtxStart + v;
+      if (v < 3) {
+        buf.append(idx);
+        ++numIndices;
+        continue;
+      }
+      buf.append(std::array{vtxStart, static_cast<u16>(idx - 1), idx});
+      numIndices += 3;
+    }
+  } else if (prim == GX_TRIANGLESTRIP) {
+    buf.reserve_extra(((static_cast<u32>(vtxCount) - 3) * 3 + 3) * sizeof(u16));
+    for (u16 v = 0; v < vtxCount; ++v) {
+      const u16 idx = vtxStart + v;
+      if (v < 3) {
+        buf.append(idx);
+        ++numIndices;
+        continue;
+      }
+      if ((v & 1) == 0) {
+        buf.append(std::array{static_cast<u16>(idx - 2), static_cast<u16>(idx - 1), idx});
+      } else {
+        buf.append(std::array{static_cast<u16>(idx - 1), static_cast<u16>(idx - 2), idx});
+      }
+      numIndices += 3;
+    }
+  } else
+    UNLIKELY FATAL("unsupported primitive type {}", static_cast<u32>(prim));
+  return numIndices;
+}
+
+auto process_display_list(const u8* dlStart, u32 dlSize) -> DisplayListResult {
+  const auto hash = xxh3_hash_s(dlStart, dlSize, 0);
+  Range vertRange, idxRange;
+  u32 numIndices = 0;
+  GXVtxFmt fmt = GX_MAX_VTXFMT;
+  auto it = sCachedDisplayLists.find(hash);
+  if (it != sCachedDisplayLists.end()) {
+    const auto& cache = it->second;
+    numIndices = cache.idxBuf.size() / 2;
+    vertRange = push_verts(cache.vtxBuf.data(), cache.vtxBuf.size());
+    idxRange = push_indices(cache.idxBuf.data(), cache.idxBuf.size());
+    fmt = cache.fmt;
+  } else {
+    const u8* data = dlStart;
+    u32 pos = 0;
+    ByteBuffer vtxBuf;
+    ByteBuffer idxBuf;
+    u16 vtxStart = 0;
+
+    while (pos < dlSize) {
+      u8 cmd = data[pos++];
+
+      u8 opcode = cmd & GX_OPCODE_MASK;
+      switch (opcode) {
+        DEFAULT_FATAL("unimplemented opcode: {}", opcode);
+      case GX_NOP:
+        continue;
+      case GX_DRAW_QUADS:
+      case GX_DRAW_TRIANGLES:
+      case GX_DRAW_TRIANGLE_STRIP:
+      case GX_DRAW_TRIANGLE_FAN: {
+        const auto prim = static_cast<GXPrimitive>(opcode);
+        const auto newFmt = static_cast<GXVtxFmt>(cmd & GX_VAT_MASK);
+        if (fmt != GX_MAX_VTXFMT && fmt != newFmt) {
+          FATAL("Vertex format changed mid-display list: {} -> {}", fmt, newFmt);
+        }
+        fmt = newFmt;
+        u16 vtxCount = bswap(*reinterpret_cast<const u16*>(data + pos));
+        pos += 2;
+        pos += vtxCount * prepare_vtx_buffer(vtxBuf, fmt, data + pos, vtxCount);
+        numIndices += prepare_idx_buffer(idxBuf, prim, vtxStart, vtxCount);
+        vtxStart += vtxCount;
+        break;
+      }
+      case GX_DRAW_LINES:
+      case GX_DRAW_LINE_STRIP:
+      case GX_DRAW_POINTS:
+        FATAL("unimplemented prim type: {}", opcode);
+        break;
+      }
+    }
+    vertRange = push_verts(vtxBuf.data(), vtxBuf.size());
+    idxRange = push_indices(idxBuf.data(), idxBuf.size());
+    sCachedDisplayLists.try_emplace(hash, std::move(vtxBuf), std::move(idxBuf), fmt);
+  }
+
+  return {
+      .vertRange = vertRange,
+      .idxRange = idxRange,
+      .numIndices = numIndices,
+      .fmt = fmt,
+  };
+}
+} // namespace aurora::gfx::gx
--- a/lib/gfx/display_list.hpp
+++ b/lib/gfx/display_list.hpp
@ -0,0 +1,14 @@
+#pragma once
+
+#include "gx.hpp"
+
+namespace aurora::gfx::gx {
+struct DisplayListResult {
+  Range vertRange;
+  Range idxRange;
+  u32 numIndices;
+  GXVtxFmt fmt;
+};
+
+auto process_display_list(const u8* dlStart, u32 dlSize) -> DisplayListResult;
+}; // namespace aurora::gfx::gx
--- a/lib/gfx/gx.cpp
+++ b/lib/gfx/gx.cpp
@ -316,104 +316,6 @@ void populate_pipeline_config(PipelineConfig& config, GXPrimitive primitive, GXV
  };
 }

-Range build_uniform(const ShaderInfo& info) noexcept {
-  auto [buf, range] = map_uniform(info.uniformSize);
-  {
-    buf.append(g_gxState.pnMtx[g_gxState.currentPnMtx]);
-    buf.append(g_gxState.proj);
-  }
-  for (int i = 0; i < info.loadsTevReg.size(); ++i) {
-    if (!info.loadsTevReg.test(i)) {
-      continue;
-    }
-    buf.append(g_gxState.colorRegs[i]);
-  }
-  if (info.lightingEnabled) {
-    // Lights
-    static_assert(sizeof(g_gxState.lights) == 80 * GX::MaxLights);
-    buf.append(g_gxState.lights);
-    // Light state for all channels
-    for (int i = 0; i < 4; ++i) {
-      buf.append<u32>(g_gxState.colorChannelState[i].lightMask.to_ulong());
-    }
-  }
-  for (int i = 0; i < info.sampledColorChannels.size(); ++i) {
-    if (!info.sampledColorChannels.test(i)) {
-      continue;
-    }
-    const auto& ccc = g_gxState.colorChannelConfig[i];
-    const auto& ccs = g_gxState.colorChannelState[i];
-    if (ccc.lightingEnabled && ccc.ambSrc == GX_SRC_REG) {
-      buf.append(ccs.ambColor);
-    }
-    if (ccc.matSrc == GX_SRC_REG) {
-      buf.append(ccs.matColor);
-    }
-    const auto& ccca = g_gxState.colorChannelConfig[i + GX_ALPHA0];
-    const auto& ccsa = g_gxState.colorChannelState[i + GX_ALPHA0];
-    if (ccca.lightingEnabled && ccca.ambSrc == GX_SRC_REG) {
-      buf.append(ccsa.ambColor);
-    }
-    if (ccca.matSrc == GX_SRC_REG) {
-      buf.append(ccsa.matColor);
-    }
-  }
-  for (int i = 0; i < info.sampledKColors.size(); ++i) {
-    if (!info.sampledKColors.test(i)) {
-      continue;
-    }
-    buf.append(g_gxState.kcolors[i]);
-  }
-  for (int i = 0; i < info.usesTexMtx.size(); ++i) {
-    if (!info.usesTexMtx.test(i)) {
-      continue;
-    }
-    switch (info.texMtxTypes[i]) {
-      DEFAULT_FATAL("unhandled tex mtx type {}", underlying(info.texMtxTypes[i]));
-    case GX_TG_MTX2x4:
-      if (std::holds_alternative<Mat2x4<float>>(g_gxState.texMtxs[i])) {
-        buf.append(std::get<Mat2x4<float>>(g_gxState.texMtxs[i]));
-      } else
-        UNLIKELY FATAL("expected 2x4 mtx in idx {}", i);
-      break;
-    case GX_TG_MTX3x4:
-      if (std::holds_alternative<Mat3x4<float>>(g_gxState.texMtxs[i])) {
-        buf.append(std::get<Mat3x4<float>>(g_gxState.texMtxs[i]));
-      } else
-        UNLIKELY FATAL("expected 3x4 mtx in idx {}", i);
-      break;
-    }
-  }
-  for (int i = 0; i < info.usesPTTexMtx.size(); ++i) {
-    if (!info.usesPTTexMtx.test(i)) {
-      continue;
-    }
-    buf.append(g_gxState.ptTexMtxs[i]);
-  }
-  if (info.usesFog) {
-    const auto& state = g_gxState.fog;
-    Fog fog{.color = state.color};
-    if (state.nearZ != state.farZ && state.startZ != state.endZ) {
-      const float depthRange = state.farZ - state.nearZ;
-      const float fogRange = state.endZ - state.startZ;
-      fog.a = (state.farZ * state.nearZ) / (depthRange * fogRange);
-      fog.b = state.farZ / depthRange;
-      fog.c = state.startZ / fogRange;
-    }
-    buf.append(fog);
-  }
-  for (int i = 0; i < info.sampledTextures.size(); ++i) {
-    if (!info.sampledTextures.test(i)) {
-      continue;
-    }
-    const auto& tex = get_texture(static_cast<GXTexMapID>(i));
-    CHECK(tex, "unbound texture {}", i);
-    buf.append(tex.texObj.lodBias);
-  }
-  g_gxState.stateDirty = false;
-  return range;
-}
-
 static absl::flat_hash_map<u32, wgpu::BindGroupLayout> sUniformBindGroupLayouts;
 static absl::flat_hash_map<u32, std::pair<wgpu::BindGroupLayout, wgpu::BindGroupLayout>> sTextureBindGroupLayouts;

--- a/lib/gfx/gx.hpp
+++ b/lib/gfx/gx.hpp
@ -429,10 +429,7 @@ void populate_pipeline_config(PipelineConfig& config, GXPrimitive primitive, GXV
 wgpu::RenderPipeline build_pipeline(const PipelineConfig& config, const ShaderInfo& info,
                                    ArrayRef<wgpu::VertexBufferLayout> vtxBuffers, wgpu::ShaderModule shader,
                                    const char* label) noexcept;
-ShaderInfo build_shader_info(const ShaderConfig& config) noexcept;
 wgpu::ShaderModule build_shader(const ShaderConfig& config, const ShaderInfo& info) noexcept;
-// Range build_vertex_buffer(const GXShaderInfo& info) noexcept;
-Range build_uniform(const ShaderInfo& info) noexcept;
 GXBindGroupLayouts build_bind_group_layouts(const ShaderInfo& info, const ShaderConfig& config) noexcept;
 GXBindGroups build_bind_groups(const ShaderInfo& info, const ShaderConfig& config,
                               const BindGroupRanges& ranges) noexcept;
--- a/lib/gfx/gx_shader.cpp
+++ b/lib/gfx/gx_shader.cpp
@ -38,102 +38,6 @@ static inline std::string_view chan_comp(GXTevColorChan chan) noexcept {
  }
 }

-static void color_arg_reg_info(GXTevColorArg arg, const TevStage& stage, ShaderInfo& info) {
-  switch (arg) {
-  case GX_CC_CPREV:
-  case GX_CC_APREV:
-    if (!info.writesTevReg.test(GX_TEVPREV)) {
-      info.loadsTevReg.set(GX_TEVPREV);
-    }
-    break;
-  case GX_CC_C0:
-  case GX_CC_A0:
-    if (!info.writesTevReg.test(GX_TEVREG0)) {
-      info.loadsTevReg.set(GX_TEVREG0);
-    }
-    break;
-  case GX_CC_C1:
-  case GX_CC_A1:
-    if (!info.writesTevReg.test(GX_TEVREG1)) {
-      info.loadsTevReg.set(GX_TEVREG1);
-    }
-    break;
-  case GX_CC_C2:
-  case GX_CC_A2:
-    if (!info.writesTevReg.test(GX_TEVREG2)) {
-      info.loadsTevReg.set(GX_TEVREG2);
-    }
-    break;
-  case GX_CC_TEXC:
-  case GX_CC_TEXA:
-    CHECK(stage.texCoordId != GX_TEXCOORD_NULL, "tex coord not bound");
-    CHECK(stage.texMapId != GX_TEXMAP_NULL, "tex map not bound");
-    info.sampledTexCoords.set(stage.texCoordId);
-    info.sampledTextures.set(stage.texMapId);
-    break;
-  case GX_CC_RASC:
-  case GX_CC_RASA:
-    if (stage.channelId >= GX_COLOR0A0 && stage.channelId <= GX_COLOR1A1) {
-      info.sampledColorChannels.set(stage.channelId - GX_COLOR0A0);
-    }
-    break;
-  case GX_CC_KONST:
-    switch (stage.kcSel) {
-    case GX_TEV_KCSEL_K0:
-    case GX_TEV_KCSEL_K0_R:
-    case GX_TEV_KCSEL_K0_G:
-    case GX_TEV_KCSEL_K0_B:
-    case GX_TEV_KCSEL_K0_A:
-      info.sampledKColors.set(0);
-      break;
-    case GX_TEV_KCSEL_K1:
-    case GX_TEV_KCSEL_K1_R:
-    case GX_TEV_KCSEL_K1_G:
-    case GX_TEV_KCSEL_K1_B:
-    case GX_TEV_KCSEL_K1_A:
-      info.sampledKColors.set(1);
-      break;
-    case GX_TEV_KCSEL_K2:
-    case GX_TEV_KCSEL_K2_R:
-    case GX_TEV_KCSEL_K2_G:
-    case GX_TEV_KCSEL_K2_B:
-    case GX_TEV_KCSEL_K2_A:
-      info.sampledKColors.set(2);
-      break;
-    case GX_TEV_KCSEL_K3:
-    case GX_TEV_KCSEL_K3_R:
-    case GX_TEV_KCSEL_K3_G:
-    case GX_TEV_KCSEL_K3_B:
-    case GX_TEV_KCSEL_K3_A:
-      info.sampledKColors.set(3);
-      break;
-    default:
-      break;
-    }
-    break;
-  default:
-    break;
-  }
-}
-
-static bool formatHasAlpha(u32 format) {
-  switch (format) {
-  case GX_TF_IA4:
-  case GX_TF_IA8:
-  case GX_TF_RGB5A3:
-  case GX_TF_RGBA8:
-  case GX_TF_CMPR:
-  case GX_CTF_RA4:
-  case GX_CTF_RA8:
-  case GX_CTF_YUVA8:
-  case GX_CTF_A8:
-  case GX_TF_RGBA8_PC:
-    return true;
-  default:
-    return false;
-  }
-}
-
 static std::string color_arg_reg(GXTevColorArg arg, size_t stageIdx, const ShaderConfig& config,
                                 const TevStage& stage) {
  switch (arg) {
@ -260,74 +164,6 @@ static std::string color_arg_reg(GXTevColorArg arg, size_t stageIdx, const Shade
  }
 }

-static void alpha_arg_reg_info(GXTevAlphaArg arg, const TevStage& stage, ShaderInfo& info) {
-  switch (arg) {
-  case GX_CA_APREV:
-    if (!info.writesTevReg.test(GX_TEVPREV)) {
-      info.loadsTevReg.set(GX_TEVPREV);
-    }
-    break;
-  case GX_CA_A0:
-    if (!info.writesTevReg.test(GX_TEVREG0)) {
-      info.loadsTevReg.set(GX_TEVREG0);
-    }
-    break;
-  case GX_CA_A1:
-    if (!info.writesTevReg.test(GX_TEVREG1)) {
-      info.loadsTevReg.set(GX_TEVREG1);
-    }
-    break;
-  case GX_CA_A2:
-    if (!info.writesTevReg.test(GX_TEVREG2)) {
-      info.loadsTevReg.set(GX_TEVREG2);
-    }
-    break;
-  case GX_CA_TEXA:
-    CHECK(stage.texCoordId != GX_TEXCOORD_NULL, "tex coord not bound");
-    CHECK(stage.texMapId != GX_TEXMAP_NULL, "tex map not bound");
-    info.sampledTexCoords.set(stage.texCoordId);
-    info.sampledTextures.set(stage.texMapId);
-    break;
-  case GX_CA_RASA:
-    if (stage.channelId >= GX_COLOR0A0 && stage.channelId <= GX_COLOR1A1) {
-      info.sampledColorChannels.set(stage.channelId - GX_COLOR0A0);
-    }
-    break;
-  case GX_CA_KONST:
-    switch (stage.kaSel) {
-    case GX_TEV_KASEL_K0_R:
-    case GX_TEV_KASEL_K0_G:
-    case GX_TEV_KASEL_K0_B:
-    case GX_TEV_KASEL_K0_A:
-      info.sampledKColors.set(0);
-      break;
-    case GX_TEV_KASEL_K1_R:
-    case GX_TEV_KASEL_K1_G:
-    case GX_TEV_KASEL_K1_B:
-    case GX_TEV_KASEL_K1_A:
-      info.sampledKColors.set(1);
-      break;
-    case GX_TEV_KASEL_K2_R:
-    case GX_TEV_KASEL_K2_G:
-    case GX_TEV_KASEL_K2_B:
-    case GX_TEV_KASEL_K2_A:
-      info.sampledKColors.set(2);
-      break;
-    case GX_TEV_KASEL_K3_R:
-    case GX_TEV_KASEL_K3_G:
-    case GX_TEV_KASEL_K3_B:
-    case GX_TEV_KASEL_K3_A:
-      info.sampledKColors.set(3);
-      break;
-    default:
-      break;
-    }
-    break;
-  default:
-    break;
-  }
-}
-
 static std::string alpha_arg_reg(GXTevAlphaArg arg, size_t stageIdx, const ShaderConfig& config,
                                 const TevStage& stage) {
  switch (arg) {
@ -549,109 +385,6 @@ constexpr std::array<std::string_view, MaxVtxAttr> VtxAttributeNames{
    "pos_mtx_array", "nrm_mtx_array", "tex_mtx_array", "light_array", "nbt",
 };

-ShaderInfo build_shader_info(const ShaderConfig& config) noexcept {
-  //  const auto hash = xxh3_hash(config);
-  //  const auto it = g_gxCachedShaders.find(hash);
-  //  if (it != g_gxCachedShaders.end()) {
-  //    return it->second.second;
-  //  }
-
-  ShaderInfo info{
-      .uniformSize = sizeof(PnMtx) + sizeof(Mat4x4<float>), // pos_mtx, nrm_mtx, proj
-  };
-  for (int i = 0; i < config.tevStageCount; ++i) {
-    const auto& stage = config.tevStages[i];
-    // Color pass
-    color_arg_reg_info(stage.colorPass.a, stage, info);
-    color_arg_reg_info(stage.colorPass.b, stage, info);
-    color_arg_reg_info(stage.colorPass.c, stage, info);
-    color_arg_reg_info(stage.colorPass.d, stage, info);
-    info.writesTevReg.set(stage.colorOp.outReg);
-
-    // Alpha pass
-    alpha_arg_reg_info(stage.alphaPass.a, stage, info);
-    alpha_arg_reg_info(stage.alphaPass.b, stage, info);
-    alpha_arg_reg_info(stage.alphaPass.c, stage, info);
-    alpha_arg_reg_info(stage.alphaPass.d, stage, info);
-    if (!info.writesTevReg.test(stage.alphaOp.outReg)) {
-      // If we're writing alpha to a register that's not been
-      // written to in the shader, load from uniform buffer
-      info.loadsTevReg.set(stage.alphaOp.outReg);
-      info.writesTevReg.set(stage.alphaOp.outReg);
-    }
-  }
-  info.uniformSize += info.loadsTevReg.count() * sizeof(Vec4<float>);
-  for (int i = 0; i < info.sampledColorChannels.size(); ++i) {
-    if (info.sampledColorChannels.test(i)) {
-      const auto& cc = config.colorChannels[i];
-      const auto& cca = config.colorChannels[i + GX_ALPHA0];
-      if (cc.lightingEnabled || cca.lightingEnabled) {
-        info.lightingEnabled = true;
-      }
-    }
-  }
-  if (info.lightingEnabled) {
-    // Lights + light state for all channels
-    info.uniformSize += 16 + sizeof(Light) * GX::MaxLights;
-  }
-  for (int i = 0; i < info.sampledColorChannels.size(); ++i) {
-    if (info.sampledColorChannels.test(i)) {
-      const auto& cc = config.colorChannels[i];
-      if (cc.lightingEnabled && cc.ambSrc == GX_SRC_REG) {
-        info.uniformSize += sizeof(Vec4<float>);
-      }
-      if (cc.matSrc == GX_SRC_REG) {
-        info.uniformSize += sizeof(Vec4<float>);
-      }
-      const auto& cca = config.colorChannels[i + GX_ALPHA0];
-      if (cca.lightingEnabled && cca.ambSrc == GX_SRC_REG) {
-        info.uniformSize += sizeof(Vec4<float>);
-      }
-      if (cca.matSrc == GX_SRC_REG) {
-        info.uniformSize += sizeof(Vec4<float>);
-      }
-    }
-  }
-  info.uniformSize += info.sampledKColors.count() * sizeof(Vec4<float>);
-  for (int i = 0; i < info.sampledTexCoords.size(); ++i) {
-    if (!info.sampledTexCoords.test(i)) {
-      continue;
-    }
-    const auto& tcg = config.tcgs[i];
-    if (tcg.mtx != GX_IDENTITY) {
-      u32 texMtxIdx = (tcg.mtx - GX_TEXMTX0) / 3;
-      info.usesTexMtx.set(texMtxIdx);
-      info.texMtxTypes[texMtxIdx] = tcg.type;
-    }
-    if (tcg.postMtx != GX_PTIDENTITY) {
-      u32 postMtxIdx = (tcg.postMtx - GX_PTTEXMTX0) / 3;
-      info.usesPTTexMtx.set(postMtxIdx);
-    }
-  }
-  for (int i = 0; i < info.usesTexMtx.size(); ++i) {
-    if (info.usesTexMtx.test(i)) {
-      switch (info.texMtxTypes[i]) {
-      case GX_TG_MTX2x4:
-        info.uniformSize += sizeof(Mat2x4<float>);
-        break;
-      case GX_TG_MTX3x4:
-        info.uniformSize += sizeof(Mat3x4<float>);
-        break;
-      default:
-        break;
-      }
-    }
-  }
-  info.uniformSize += info.usesPTTexMtx.count() * sizeof(Mat3x4<float>);
-  if (config.fogType != GX_FOG_NONE) {
-    info.usesFog = true;
-    info.uniformSize += sizeof(Fog);
-  }
-  info.uniformSize += info.sampledTextures.count() * sizeof(u32);
-  info.uniformSize = align_uniform(info.uniformSize);
-  return info;
-}
-
 struct StorageLoadResult {
  std::string attrLoad;
  std::string_view arrType;
@ -947,6 +680,8 @@ wgpu::ShaderModule build_shader(const ShaderConfig& config, const ShaderInfo& in
      vtxInAttrs += fmt::format("@location({}) in_clr{}: vec4f", locIdx++, attr - GX_VA_CLR0);
    } else if (attr >= GX_VA_TEX0 && attr <= GX_VA_TEX7) {
      vtxInAttrs += fmt::format("@location({}) in_tex{}_uv: vec2f", locIdx++, attr - GX_VA_TEX0);
+    } else {
+      FATAL("unhandled vtx attr {}", underlying(attr));
    }
  }
  vtxXfrAttrsPre += fmt::format(
@ -1416,7 +1151,7 @@ fn fetch_i16_3(p: ptr<storage, array<i32>>, idx: u32, frac: u32) -> vec3<f32> {{
  var o0 = select(extractBits(v0, 0, 16), extractBits(v0, 16, 16), r);
  var o1 = select(extractBits(v0, 16, 16), extractBits(v1, 0, 16), r);
  var o2 = select(extractBits(v1, 0, 16), extractBits(v1, 16, 16), r);
-  return vec3<f32>(f32(o0), f32(o1), f32(o2)) / f32(1 << frac);
+  return vec3<f32>(f32(o0), f32(o1), f32(o2)) / f32(1u << frac);
 }}
 {10}
 struct Uniform {{
--- a/lib/gfx/model/shader.cpp
+++ b/lib/gfx/model/shader.cpp
@ -2,298 +2,20 @@

 #include "../../webgpu/gpu.hpp"
 #include "../gx_fmt.hpp"
+#include "../display_list.hpp"
+#include "../shader_info.hpp"

 #include <absl/container/flat_hash_map.h>

 namespace aurora::gfx::model {
 static Module Log("aurora::gfx::model");

-using IndexedAttrs = std::array<bool, GX_VA_MAX_ATTR>;
-struct DisplayListCache {
-  ByteBuffer vtxBuf;
-  ByteBuffer idxBuf;
-  IndexedAttrs indexedAttrs;
-  GXVtxFmt fmt;
-
-  DisplayListCache(ByteBuffer&& vtxBuf, ByteBuffer&& idxBuf, IndexedAttrs indexedAttrs, GXVtxFmt fmt)
-  : vtxBuf(std::move(vtxBuf)), idxBuf(std::move(idxBuf)), indexedAttrs(indexedAttrs), fmt(fmt) {}
-};
-
-static absl::flat_hash_map<HashType, DisplayListCache> sCachedDisplayLists;
-
-static u32 prepare_vtx_buffer(ByteBuffer& buf, GXVtxFmt vtxfmt, const u8* ptr, u16 vtxCount,
-                              IndexedAttrs& indexedAttrs) {
-  using gx::g_gxState;
-  struct {
-    u8 count;
-    GXCompType type;
-  } attrArrays[GX_VA_MAX_ATTR] = {};
-  u32 vtxSize = 0;
-  u32 outVtxSize = 0;
-
-  // Calculate attribute offsets and vertex size
-  for (int attr = 0; attr < GX_VA_MAX_ATTR; attr++) {
-    const auto& attrFmt = g_gxState.vtxFmts[vtxfmt].attrs[attr];
-    switch (g_gxState.vtxDesc[attr]) {
-      DEFAULT_FATAL("unhandled attribute type {}", g_gxState.vtxDesc[attr]);
-    case GX_NONE:
-      break;
-    case GX_DIRECT:
-#define COMBINE(val1, val2, val3) (((val1) << 16) | ((val2) << 8) | (val3))
-      switch (COMBINE(attr, attrFmt.cnt, attrFmt.type)) {
-        DEFAULT_FATAL("not handled: attr {}, cnt {}, type {}", attr, attrFmt.cnt, attrFmt.type);
-      case COMBINE(GX_VA_POS, GX_POS_XYZ, GX_F32):
-      case COMBINE(GX_VA_NRM, GX_NRM_XYZ, GX_F32):
-        attrArrays[attr].count = 3;
-        attrArrays[attr].type = GX_F32;
-        vtxSize += 12;
-        outVtxSize += 12;
-        break;
-      case COMBINE(GX_VA_POS, GX_POS_XYZ, GX_S16):
-      case COMBINE(GX_VA_NRM, GX_NRM_XYZ, GX_S16):
-        attrArrays[attr].count = 3;
-        attrArrays[attr].type = GX_S16;
-        vtxSize += 6;
-        outVtxSize += 12;
-        break;
-      case COMBINE(GX_VA_TEX0, GX_TEX_ST, GX_F32):
-      case COMBINE(GX_VA_TEX1, GX_TEX_ST, GX_F32):
-      case COMBINE(GX_VA_TEX2, GX_TEX_ST, GX_F32):
-      case COMBINE(GX_VA_TEX3, GX_TEX_ST, GX_F32):
-      case COMBINE(GX_VA_TEX4, GX_TEX_ST, GX_F32):
-      case COMBINE(GX_VA_TEX5, GX_TEX_ST, GX_F32):
-      case COMBINE(GX_VA_TEX6, GX_TEX_ST, GX_F32):
-      case COMBINE(GX_VA_TEX7, GX_TEX_ST, GX_F32):
-        attrArrays[attr].count = 2;
-        attrArrays[attr].type = GX_F32;
-        vtxSize += 8;
-        outVtxSize += 8;
-        break;
-      case COMBINE(GX_VA_TEX0, GX_TEX_ST, GX_S16):
-      case COMBINE(GX_VA_TEX1, GX_TEX_ST, GX_S16):
-      case COMBINE(GX_VA_TEX2, GX_TEX_ST, GX_S16):
-      case COMBINE(GX_VA_TEX3, GX_TEX_ST, GX_S16):
-      case COMBINE(GX_VA_TEX4, GX_TEX_ST, GX_S16):
-      case COMBINE(GX_VA_TEX5, GX_TEX_ST, GX_S16):
-      case COMBINE(GX_VA_TEX6, GX_TEX_ST, GX_S16):
-      case COMBINE(GX_VA_TEX7, GX_TEX_ST, GX_S16):
-        attrArrays[attr].count = 2;
-        attrArrays[attr].type = GX_S16;
-        vtxSize += 4;
-        outVtxSize += 8;
-        break;
-      case COMBINE(GX_VA_CLR0, GX_CLR_RGBA, GX_RGBA8):
-      case COMBINE(GX_VA_CLR1, GX_CLR_RGBA, GX_RGBA8):
-        attrArrays[attr].count = 4;
-        attrArrays[attr].type = GX_RGBA8;
-        vtxSize += 4;
-        outVtxSize += 16;
-        break;
-      }
-#undef COMBINE
-      break;
-    case GX_INDEX8:
-      ++vtxSize;
-      outVtxSize += 2;
-      indexedAttrs[attr] = true;
-      break;
-    case GX_INDEX16:
-      vtxSize += 2;
-      outVtxSize += 2;
-      indexedAttrs[attr] = true;
-      break;
-    }
-  }
-  // Align to 4
-  int rem = outVtxSize % 4;
-  int padding = 0;
-  if (rem != 0) {
-    padding = 4 - rem;
-    outVtxSize += padding;
-  }
-
-  // Build vertex buffer
-  buf.reserve_extra(vtxCount * outVtxSize);
-  std::array<f32, 4> out{};
-  for (u32 v = 0; v < vtxCount; ++v) {
-    for (int attr = 0; attr < GX_VA_MAX_ATTR; attr++) {
-      if (g_gxState.vtxDesc[attr] == GX_INDEX8) {
-        buf.append(static_cast<u16>(*ptr));
-        ++ptr;
-      } else if (g_gxState.vtxDesc[attr] == GX_INDEX16) {
-        buf.append(bswap(*reinterpret_cast<const u16*>(ptr)));
-        ptr += 2;
-      }
-      if (g_gxState.vtxDesc[attr] != GX_DIRECT) {
-        continue;
-      }
-      const auto& attrFmt = g_gxState.vtxFmts[vtxfmt].attrs[attr];
-      u8 count = attrArrays[attr].count;
-      switch (attrArrays[attr].type) {
-      case GX_U8:
-        for (int i = 0; i < count; ++i) {
-          const auto value = reinterpret_cast<const u8*>(ptr)[i];
-          out[i] = static_cast<f32>(value) / static_cast<f32>(1 << attrFmt.frac);
-        }
-        buf.append(out.data(), sizeof(f32) * count);
-        ptr += count;
-        break;
-      case GX_S8:
-        for (int i = 0; i < count; ++i) {
-          const auto value = reinterpret_cast<const s8*>(ptr)[i];
-          out[i] = static_cast<f32>(value) / static_cast<f32>(1 << attrFmt.frac);
-        }
-        buf.append(out.data(), sizeof(f32) * count);
-        ptr += count;
-        break;
-      case GX_U16:
-        for (int i = 0; i < count; ++i) {
-          const auto value = bswap(reinterpret_cast<const u16*>(ptr)[i]);
-          out[i] = static_cast<f32>(value) / static_cast<f32>(1 << attrFmt.frac);
-        }
-        buf.append(out.data(), sizeof(f32) * count);
-        ptr += count * sizeof(u16);
-        break;
-      case GX_S16:
-        for (int i = 0; i < count; ++i) {
-          const auto value = bswap(reinterpret_cast<const s16*>(ptr)[i]);
-          out[i] = static_cast<f32>(value) / static_cast<f32>(1 << attrFmt.frac);
-        }
-        buf.append(out.data(), sizeof(f32) * count);
-        ptr += count * sizeof(s16);
-        break;
-      case GX_F32:
-        for (int i = 0; i < count; ++i) {
-          out[i] = bswap(reinterpret_cast<const f32*>(ptr)[i]);
-        }
-        buf.append(out.data(), sizeof(f32) * count);
-        ptr += count * sizeof(f32);
-        break;
-      case GX_RGBA8:
-        out[0] = static_cast<f32>(ptr[0]) / 255.f;
-        out[1] = static_cast<f32>(ptr[1]) / 255.f;
-        out[2] = static_cast<f32>(ptr[2]) / 255.f;
-        out[3] = static_cast<f32>(ptr[3]) / 255.f;
-        buf.append(out.data(), sizeof(f32) * 4);
-        ptr += sizeof(u32);
-        break;
-      }
-    }
-    if (padding > 0) {
-      buf.append_zeroes(padding);
-    }
-  }
-
-  return vtxSize;
-}
-
-static u16 prepare_idx_buffer(ByteBuffer& buf, GXPrimitive prim, u16 vtxStart, u16 vtxCount) {
-  u16 numIndices = 0;
-  if (prim == GX_TRIANGLES) {
-    buf.reserve_extra(vtxCount * sizeof(u16));
-    for (u16 v = 0; v < vtxCount; ++v) {
-      const u16 idx = vtxStart + v;
-      buf.append(idx);
-      ++numIndices;
-    }
-  } else if (prim == GX_TRIANGLEFAN) {
-    buf.reserve_extra(((u32(vtxCount) - 3) * 3 + 3) * sizeof(u16));
-    for (u16 v = 0; v < vtxCount; ++v) {
-      const u16 idx = vtxStart + v;
-      if (v < 3) {
-        buf.append(idx);
-        ++numIndices;
-        continue;
-      }
-      buf.append(std::array{vtxStart, static_cast<u16>(idx - 1), idx});
-      numIndices += 3;
-    }
-  } else if (prim == GX_TRIANGLESTRIP) {
-    buf.reserve_extra(((static_cast<u32>(vtxCount) - 3) * 3 + 3) * sizeof(u16));
-    for (u16 v = 0; v < vtxCount; ++v) {
-      const u16 idx = vtxStart + v;
-      if (v < 3) {
-        buf.append(idx);
-        ++numIndices;
-        continue;
-      }
-      if ((v & 1) == 0) {
-        buf.append(std::array{static_cast<u16>(idx - 2), static_cast<u16>(idx - 1), idx});
-      } else {
-        buf.append(std::array{static_cast<u16>(idx - 1), static_cast<u16>(idx - 2), idx});
-      }
-      numIndices += 3;
-    }
-  } else
-    UNLIKELY FATAL("unsupported primitive type {}", static_cast<u32>(prim));
-  return numIndices;
-}
-
 void queue_surface(const u8* dlStart, u32 dlSize) noexcept {
-  const auto hash = xxh3_hash_s(dlStart, dlSize, 0);
-  Range vertRange, idxRange;
-  u32 numIndices = 0;
-  IndexedAttrs indexedAttrs{};
-  GXVtxFmt fmt = GX_MAX_VTXFMT;
-  auto it = sCachedDisplayLists.find(hash);
-  if (it != sCachedDisplayLists.end()) {
-    const auto& cache = it->second;
-    numIndices = cache.idxBuf.size() / 2;
-    vertRange = push_verts(cache.vtxBuf.data(), cache.vtxBuf.size());
-    idxRange = push_indices(cache.idxBuf.data(), cache.idxBuf.size());
-    indexedAttrs = cache.indexedAttrs;
-    fmt = cache.fmt;
-  } else {
-    const u8* data = dlStart;
-    u32 pos = 0;
-    ByteBuffer vtxBuf;
-    ByteBuffer idxBuf;
-    u16 vtxStart = 0;
-
-    while (pos < dlSize) {
-      u8 cmd = data[pos++];
-
-      u8 opcode = cmd & GX_OPCODE_MASK;
-      switch (opcode) {
-        DEFAULT_FATAL("unimplemented opcode: {}", opcode);
-      case GX_NOP:
-        continue;
-      case GX_LOAD_BP_REG:
-        // TODO?
-        pos += 4;
-        break;
-      case GX_DRAW_QUADS:
-      case GX_DRAW_TRIANGLES:
-      case GX_DRAW_TRIANGLE_STRIP:
-      case GX_DRAW_TRIANGLE_FAN: {
-        const auto prim = static_cast<GXPrimitive>(opcode);
-        const auto newFmt = static_cast<GXVtxFmt>(cmd & GX_VAT_MASK);
-        if (fmt != GX_MAX_VTXFMT && fmt != newFmt) {
-          FATAL("Vertex format changed mid-display list: {} -> {}", fmt, newFmt);
-        }
-        fmt = newFmt;
-        u16 vtxCount = bswap(*reinterpret_cast<const u16*>(data + pos));
-        pos += 2;
-        pos += vtxCount * prepare_vtx_buffer(vtxBuf, fmt, data + pos, vtxCount, indexedAttrs);
-        numIndices += prepare_idx_buffer(idxBuf, prim, vtxStart, vtxCount);
-        vtxStart += vtxCount;
-        break;
-      }
-      case GX_DRAW_LINES:
-      case GX_DRAW_LINE_STRIP:
-      case GX_DRAW_POINTS:
-        FATAL("unimplemented prim type: {}", opcode);
-        break;
-      }
-    }
-    vertRange = push_verts(vtxBuf.data(), vtxBuf.size());
-    idxRange = push_indices(idxBuf.data(), idxBuf.size());
-    sCachedDisplayLists.try_emplace(hash, std::move(vtxBuf), std::move(idxBuf), indexedAttrs, fmt);
-  }
+  const auto result = aurora::gfx::gx::process_display_list(dlStart, dlSize);

  gx::BindGroupRanges ranges{};
  for (int i = 0; i < GX_VA_MAX_ATTR; ++i) {
-    if (!indexedAttrs[i]) {
+    if (gx::g_gxState.vtxDesc[i] != GX_INDEX8 && gx::g_gxState.vtxDesc[i] != GX_INDEX16) {
      continue;
    }
    auto& array = gx::g_gxState.arrays[i];
@ -309,18 +31,18 @@ void queue_surface(const u8* dlStart, u32 dlSize) noexcept {
  }

  model::PipelineConfig config{};
-  populate_pipeline_config(config, GX_TRIANGLES, fmt);
+  populate_pipeline_config(config, GX_TRIANGLES, result.fmt);
  const auto info = gx::build_shader_info(config.shaderConfig);
  const auto bindGroups = gx::build_bind_groups(info, config.shaderConfig, ranges);
  const auto pipeline = pipeline_ref(config);

  push_draw_command(model::DrawData{
      .pipeline = pipeline,
-      .vertRange = vertRange,
-      .idxRange = idxRange,
+      .vertRange = result.vertRange,
+      .idxRange = result.idxRange,
      .dataRanges = ranges,
      .uniformRange = build_uniform(info),
-      .indexCount = numIndices,
+      .indexCount = result.numIndices,
      .bindGroups = bindGroups,
      .dstAlpha = gx::g_gxState.dstAlpha,
  });
--- a/lib/gfx/shader_info.cpp
+++ b/lib/gfx/shader_info.cpp
@ -0,0 +1,345 @@
+#include "shader_info.hpp"
+
+namespace aurora::gfx::gx {
+namespace {
+Module Log("aurora::gfx::gx");
+
+void color_arg_reg_info(GXTevColorArg arg, const TevStage& stage, ShaderInfo& info) {
+  switch (arg) {
+  case GX_CC_CPREV:
+  case GX_CC_APREV:
+    if (!info.writesTevReg.test(GX_TEVPREV)) {
+      info.loadsTevReg.set(GX_TEVPREV);
+    }
+    break;
+  case GX_CC_C0:
+  case GX_CC_A0:
+    if (!info.writesTevReg.test(GX_TEVREG0)) {
+      info.loadsTevReg.set(GX_TEVREG0);
+    }
+    break;
+  case GX_CC_C1:
+  case GX_CC_A1:
+    if (!info.writesTevReg.test(GX_TEVREG1)) {
+      info.loadsTevReg.set(GX_TEVREG1);
+    }
+    break;
+  case GX_CC_C2:
+  case GX_CC_A2:
+    if (!info.writesTevReg.test(GX_TEVREG2)) {
+      info.loadsTevReg.set(GX_TEVREG2);
+    }
+    break;
+  case GX_CC_TEXC:
+  case GX_CC_TEXA:
+    CHECK(stage.texCoordId != GX_TEXCOORD_NULL, "tex coord not bound");
+    CHECK(stage.texMapId != GX_TEXMAP_NULL, "tex map not bound");
+    info.sampledTexCoords.set(stage.texCoordId);
+    info.sampledTextures.set(stage.texMapId);
+    break;
+  case GX_CC_RASC:
+  case GX_CC_RASA:
+    if (stage.channelId >= GX_COLOR0A0 && stage.channelId <= GX_COLOR1A1) {
+      info.sampledColorChannels.set(stage.channelId - GX_COLOR0A0);
+    }
+    break;
+  case GX_CC_KONST:
+    switch (stage.kcSel) {
+    case GX_TEV_KCSEL_K0:
+    case GX_TEV_KCSEL_K0_R:
+    case GX_TEV_KCSEL_K0_G:
+    case GX_TEV_KCSEL_K0_B:
+    case GX_TEV_KCSEL_K0_A:
+      info.sampledKColors.set(0);
+      break;
+    case GX_TEV_KCSEL_K1:
+    case GX_TEV_KCSEL_K1_R:
+    case GX_TEV_KCSEL_K1_G:
+    case GX_TEV_KCSEL_K1_B:
+    case GX_TEV_KCSEL_K1_A:
+      info.sampledKColors.set(1);
+      break;
+    case GX_TEV_KCSEL_K2:
+    case GX_TEV_KCSEL_K2_R:
+    case GX_TEV_KCSEL_K2_G:
+    case GX_TEV_KCSEL_K2_B:
+    case GX_TEV_KCSEL_K2_A:
+      info.sampledKColors.set(2);
+      break;
+    case GX_TEV_KCSEL_K3:
+    case GX_TEV_KCSEL_K3_R:
+    case GX_TEV_KCSEL_K3_G:
+    case GX_TEV_KCSEL_K3_B:
+    case GX_TEV_KCSEL_K3_A:
+      info.sampledKColors.set(3);
+      break;
+    default:
+      break;
+    }
+    break;
+  default:
+    break;
+  }
+}
+
+void alpha_arg_reg_info(GXTevAlphaArg arg, const TevStage& stage, ShaderInfo& info) {
+  switch (arg) {
+  case GX_CA_APREV:
+    if (!info.writesTevReg.test(GX_TEVPREV)) {
+      info.loadsTevReg.set(GX_TEVPREV);
+    }
+    break;
+  case GX_CA_A0:
+    if (!info.writesTevReg.test(GX_TEVREG0)) {
+      info.loadsTevReg.set(GX_TEVREG0);
+    }
+    break;
+  case GX_CA_A1:
+    if (!info.writesTevReg.test(GX_TEVREG1)) {
+      info.loadsTevReg.set(GX_TEVREG1);
+    }
+    break;
+  case GX_CA_A2:
+    if (!info.writesTevReg.test(GX_TEVREG2)) {
+      info.loadsTevReg.set(GX_TEVREG2);
+    }
+    break;
+  case GX_CA_TEXA:
+    CHECK(stage.texCoordId != GX_TEXCOORD_NULL, "tex coord not bound");
+    CHECK(stage.texMapId != GX_TEXMAP_NULL, "tex map not bound");
+    info.sampledTexCoords.set(stage.texCoordId);
+    info.sampledTextures.set(stage.texMapId);
+    break;
+  case GX_CA_RASA:
+    if (stage.channelId >= GX_COLOR0A0 && stage.channelId <= GX_COLOR1A1) {
+      info.sampledColorChannels.set(stage.channelId - GX_COLOR0A0);
+    }
+    break;
+  case GX_CA_KONST:
+    switch (stage.kaSel) {
+    case GX_TEV_KASEL_K0_R:
+    case GX_TEV_KASEL_K0_G:
+    case GX_TEV_KASEL_K0_B:
+    case GX_TEV_KASEL_K0_A:
+      info.sampledKColors.set(0);
+      break;
+    case GX_TEV_KASEL_K1_R:
+    case GX_TEV_KASEL_K1_G:
+    case GX_TEV_KASEL_K1_B:
+    case GX_TEV_KASEL_K1_A:
+      info.sampledKColors.set(1);
+      break;
+    case GX_TEV_KASEL_K2_R:
+    case GX_TEV_KASEL_K2_G:
+    case GX_TEV_KASEL_K2_B:
+    case GX_TEV_KASEL_K2_A:
+      info.sampledKColors.set(2);
+      break;
+    case GX_TEV_KASEL_K3_R:
+    case GX_TEV_KASEL_K3_G:
+    case GX_TEV_KASEL_K3_B:
+    case GX_TEV_KASEL_K3_A:
+      info.sampledKColors.set(3);
+      break;
+    default:
+      break;
+    }
+    break;
+  default:
+    break;
+  }
+}
+} // namespace
+
+ShaderInfo build_shader_info(const ShaderConfig& config) noexcept {
+  ShaderInfo info{
+      .uniformSize = sizeof(PnMtx) + sizeof(Mat4x4<float>), // pos_mtx, nrm_mtx, proj
+  };
+  for (int i = 0; i < config.tevStageCount; ++i) {
+    const auto& stage = config.tevStages[i];
+    // Color pass
+    color_arg_reg_info(stage.colorPass.a, stage, info);
+    color_arg_reg_info(stage.colorPass.b, stage, info);
+    color_arg_reg_info(stage.colorPass.c, stage, info);
+    color_arg_reg_info(stage.colorPass.d, stage, info);
+    info.writesTevReg.set(stage.colorOp.outReg);
+
+    // Alpha pass
+    alpha_arg_reg_info(stage.alphaPass.a, stage, info);
+    alpha_arg_reg_info(stage.alphaPass.b, stage, info);
+    alpha_arg_reg_info(stage.alphaPass.c, stage, info);
+    alpha_arg_reg_info(stage.alphaPass.d, stage, info);
+    if (!info.writesTevReg.test(stage.alphaOp.outReg)) {
+      // If we're writing alpha to a register that's not been
+      // written to in the shader, load from uniform buffer
+      info.loadsTevReg.set(stage.alphaOp.outReg);
+      info.writesTevReg.set(stage.alphaOp.outReg);
+    }
+  }
+  info.uniformSize += info.loadsTevReg.count() * sizeof(Vec4<float>);
+  for (int i = 0; i < info.sampledColorChannels.size(); ++i) {
+    if (info.sampledColorChannels.test(i)) {
+      const auto& cc = config.colorChannels[i];
+      const auto& cca = config.colorChannels[i + GX_ALPHA0];
+      if (cc.lightingEnabled || cca.lightingEnabled) {
+        info.lightingEnabled = true;
+      }
+    }
+  }
+  if (info.lightingEnabled) {
+    // Lights + light state for all channels
+    info.uniformSize += 16 + sizeof(Light) * GX::MaxLights;
+  }
+  for (int i = 0; i < info.sampledColorChannels.size(); ++i) {
+    if (info.sampledColorChannels.test(i)) {
+      const auto& cc = config.colorChannels[i];
+      if (cc.lightingEnabled && cc.ambSrc == GX_SRC_REG) {
+        info.uniformSize += sizeof(Vec4<float>);
+      }
+      if (cc.matSrc == GX_SRC_REG) {
+        info.uniformSize += sizeof(Vec4<float>);
+      }
+      const auto& cca = config.colorChannels[i + GX_ALPHA0];
+      if (cca.lightingEnabled && cca.ambSrc == GX_SRC_REG) {
+        info.uniformSize += sizeof(Vec4<float>);
+      }
+      if (cca.matSrc == GX_SRC_REG) {
+        info.uniformSize += sizeof(Vec4<float>);
+      }
+    }
+  }
+  info.uniformSize += info.sampledKColors.count() * sizeof(Vec4<float>);
+  for (int i = 0; i < info.sampledTexCoords.size(); ++i) {
+    if (!info.sampledTexCoords.test(i)) {
+      continue;
+    }
+    const auto& tcg = config.tcgs[i];
+    if (tcg.mtx != GX_IDENTITY) {
+      u32 texMtxIdx = (tcg.mtx - GX_TEXMTX0) / 3;
+      info.usesTexMtx.set(texMtxIdx);
+      info.texMtxTypes[texMtxIdx] = tcg.type;
+    }
+    if (tcg.postMtx != GX_PTIDENTITY) {
+      u32 postMtxIdx = (tcg.postMtx - GX_PTTEXMTX0) / 3;
+      info.usesPTTexMtx.set(postMtxIdx);
+    }
+  }
+  for (int i = 0; i < info.usesTexMtx.size(); ++i) {
+    if (info.usesTexMtx.test(i)) {
+      switch (info.texMtxTypes[i]) {
+      case GX_TG_MTX2x4:
+        info.uniformSize += sizeof(Mat2x4<float>);
+        break;
+      case GX_TG_MTX3x4:
+        info.uniformSize += sizeof(Mat3x4<float>);
+        break;
+      default:
+        break;
+      }
+    }
+  }
+  info.uniformSize += info.usesPTTexMtx.count() * sizeof(Mat3x4<float>);
+  if (config.fogType != GX_FOG_NONE) {
+    info.usesFog = true;
+    info.uniformSize += sizeof(Fog);
+  }
+  info.uniformSize += info.sampledTextures.count() * sizeof(u32);
+  info.uniformSize = align_uniform(info.uniformSize);
+  return info;
+}
+
+Range build_uniform(const ShaderInfo& info) noexcept {
+  auto [buf, range] = map_uniform(info.uniformSize);
+  {
+    buf.append(g_gxState.pnMtx[g_gxState.currentPnMtx]);
+    buf.append(g_gxState.proj);
+  }
+  for (int i = 0; i < info.loadsTevReg.size(); ++i) {
+    if (info.loadsTevReg.test(i)) {
+      buf.append(g_gxState.colorRegs[i]);
+    }
+  }
+  if (info.lightingEnabled) {
+    // Lights
+    static_assert(sizeof(g_gxState.lights) == 80 * GX::MaxLights);
+    buf.append(g_gxState.lights);
+    // Light state for all channels
+    for (int i = 0; i < 4; ++i) {
+      buf.append<u32>(g_gxState.colorChannelState[i].lightMask.to_ulong());
+    }
+  }
+  for (int i = 0; i < info.sampledColorChannels.size(); ++i) {
+    if (!info.sampledColorChannels.test(i)) {
+      continue;
+    }
+    const auto& ccc = g_gxState.colorChannelConfig[i];
+    const auto& ccs = g_gxState.colorChannelState[i];
+    if (ccc.lightingEnabled && ccc.ambSrc == GX_SRC_REG) {
+      buf.append(ccs.ambColor);
+    }
+    if (ccc.matSrc == GX_SRC_REG) {
+      buf.append(ccs.matColor);
+    }
+    const auto& ccca = g_gxState.colorChannelConfig[i + GX_ALPHA0];
+    const auto& ccsa = g_gxState.colorChannelState[i + GX_ALPHA0];
+    if (ccca.lightingEnabled && ccca.ambSrc == GX_SRC_REG) {
+      buf.append(ccsa.ambColor);
+    }
+    if (ccca.matSrc == GX_SRC_REG) {
+      buf.append(ccsa.matColor);
+    }
+  }
+  for (int i = 0; i < info.sampledKColors.size(); ++i) {
+    if (info.sampledKColors.test(i)) {
+      buf.append(g_gxState.kcolors[i]);
+    }
+  }
+  for (int i = 0; i < info.usesTexMtx.size(); ++i) {
+    if (!info.usesTexMtx.test(i)) {
+      continue;
+    }
+    switch (info.texMtxTypes[i]) {
+      DEFAULT_FATAL("unhandled tex mtx type {}", underlying(info.texMtxTypes[i]));
+    case GX_TG_MTX2x4:
+      if (std::holds_alternative<Mat2x4<float>>(g_gxState.texMtxs[i])) {
+        buf.append(std::get<Mat2x4<float>>(g_gxState.texMtxs[i]));
+      } else
+        UNLIKELY FATAL("expected 2x4 mtx in idx {}", i);
+      break;
+    case GX_TG_MTX3x4:
+      if (std::holds_alternative<Mat3x4<float>>(g_gxState.texMtxs[i])) {
+        buf.append(std::get<Mat3x4<float>>(g_gxState.texMtxs[i]));
+      } else
+        UNLIKELY FATAL("expected 3x4 mtx in idx {}", i);
+      break;
+    }
+  }
+  for (int i = 0; i < info.usesPTTexMtx.size(); ++i) {
+    if (info.usesPTTexMtx.test(i)) {
+      buf.append(g_gxState.ptTexMtxs[i]);
+    }
+  }
+  if (info.usesFog) {
+    const auto& state = g_gxState.fog;
+    Fog fog{.color = state.color};
+    if (state.nearZ != state.farZ && state.startZ != state.endZ) {
+      const float depthRange = state.farZ - state.nearZ;
+      const float fogRange = state.endZ - state.startZ;
+      fog.a = (state.farZ * state.nearZ) / (depthRange * fogRange);
+      fog.b = state.farZ / depthRange;
+      fog.c = state.startZ / fogRange;
+    }
+    buf.append(fog);
+  }
+  for (int i = 0; i < info.sampledTextures.size(); ++i) {
+    if (!info.sampledTextures.test(i)) {
+      continue;
+    }
+    const auto& tex = get_texture(static_cast<GXTexMapID>(i));
+    CHECK(tex, "unbound texture {}", i);
+    buf.append(tex.texObj.lodBias);
+  }
+  g_gxState.stateDirty = false;
+  return range;
+}
+} // namespace aurora::gfx::gx
--- a/lib/gfx/shader_info.hpp
+++ b/lib/gfx/shader_info.hpp
@ -0,0 +1,8 @@
+#pragma once
+
+#include "gx.hpp"
+
+namespace aurora::gfx::gx {
+ShaderInfo build_shader_info(const ShaderConfig& config) noexcept;
+Range build_uniform(const ShaderInfo& info) noexcept;
+}; // namespace aurora::gfx::gx
--- a/lib/webgpu/gpu.cpp
+++ b/lib/webgpu/gpu.cpp
@ -384,7 +384,7 @@ bool initialize(AuroraBackend auroraBackend) {
    wgpu::Limits supportedLimits{};
    g_adapter.GetLimits(&supportedLimits);
    const wgpu::Limits requiredLimits{
-        // Use "best" supported alignments
+        // Use "best" supported limits
        .maxTextureDimension1D = supportedLimits.maxTextureDimension1D == 0 ? WGPU_LIMIT_U32_UNDEFINED
                                                                            : supportedLimits.maxTextureDimension1D,
        .maxTextureDimension2D = supportedLimits.maxTextureDimension2D == 0 ? WGPU_LIMIT_U32_UNDEFINED
@ -393,18 +393,12 @@ bool initialize(AuroraBackend auroraBackend) {
                                                                            : supportedLimits.maxTextureDimension3D,
        .maxTextureArrayLayers = supportedLimits.maxTextureArrayLayers == 0 ? WGPU_LIMIT_U32_UNDEFINED
                                                                            : supportedLimits.maxTextureArrayLayers,
-        .maxBindGroupsPlusVertexBuffers = supportedLimits.maxBindGroupsPlusVertexBuffers == 0
-                                              ? WGPU_LIMIT_U32_UNDEFINED
-                                              : supportedLimits.maxBindGroupsPlusVertexBuffers,
-        .maxBindingsPerBindGroup = supportedLimits.maxBindGroupsPlusVertexBuffers == 0
-                                       ? WGPU_LIMIT_U32_UNDEFINED
-                                       : supportedLimits.maxBindGroupsPlusVertexBuffers,
-        .maxDynamicUniformBuffersPerPipelineLayout = supportedLimits.maxDynamicUniformBuffersPerPipelineLayout == 0
-                                                         ? WGPU_LIMIT_U32_UNDEFINED
-                                                         : supportedLimits.maxDynamicUniformBuffersPerPipelineLayout,
        .maxDynamicStorageBuffersPerPipelineLayout = supportedLimits.maxDynamicStorageBuffersPerPipelineLayout == 0
                                                         ? WGPU_LIMIT_U32_UNDEFINED
                                                         : supportedLimits.maxDynamicStorageBuffersPerPipelineLayout,
+        .maxStorageBuffersPerShaderStage = supportedLimits.maxStorageBuffersPerShaderStage == 0
+                                               ? WGPU_LIMIT_U32_UNDEFINED
+                                               : supportedLimits.maxStorageBuffersPerShaderStage,
        .minUniformBufferOffsetAlignment = supportedLimits.minUniformBufferOffsetAlignment == 0
                                               ? WGPU_LIMIT_U32_UNDEFINED
                                               : supportedLimits.minUniformBufferOffsetAlignment,
@ -413,11 +407,19 @@ bool initialize(AuroraBackend auroraBackend) {
                                               : supportedLimits.minStorageBufferOffsetAlignment,
    };
    Log.info(
-        "Using limits\n  maxTextureDimension1D: {}\n  maxTextureDimension2D: {}\n  maxTextureDimension3D: {}\n  "
-        "minUniformBufferOffsetAlignment: {}\n  minStorageBufferOffsetAlignment: {}",
+        "Using limits:"
+        "\n  maxTextureDimension1D: {}"
+        "\n  maxTextureDimension2D: {}"
+        "\n  maxTextureDimension3D: {}"
+        "\n  maxTextureArrayLayers: {}"
+        "\n  maxDynamicStorageBuffersPerPipelineLayout: {}"
+        "\n  maxStorageBuffersPerShaderStage: {}"
+        "\n  minUniformBufferOffsetAlignment: {}"
+        "\n  minStorageBufferOffsetAlignment: {}",
        requiredLimits.maxTextureDimension1D, requiredLimits.maxTextureDimension2D,
-        requiredLimits.maxTextureDimension3D, requiredLimits.minUniformBufferOffsetAlignment,
-        requiredLimits.minStorageBufferOffsetAlignment);
+        requiredLimits.maxTextureDimension3D, requiredLimits.maxTextureArrayLayers,
+        requiredLimits.maxDynamicStorageBuffersPerPipelineLayout, requiredLimits.maxStorageBuffersPerShaderStage,
+        requiredLimits.minUniformBufferOffsetAlignment, requiredLimits.minStorageBufferOffsetAlignment);
    std::vector<wgpu::FeatureName> requiredFeatures;
    wgpu::SupportedFeatures supportedFeatures;
    g_adapter.GetFeatures(&supportedFeatures);
@ -432,6 +434,9 @@ bool initialize(AuroraBackend auroraBackend) {
    /* clang-format off */
 #if _WIN32
      "use_dxc",
+#ifdef NDEBUG
+      "emit_hlsl_debug_symbols",
+#endif
 #endif
 #ifdef NDEBUG
      "skip_validation",
@ -453,9 +458,7 @@ bool initialize(AuroraBackend auroraBackend) {
 #endif
        .requiredFeatureCount = requiredFeatures.size(),
        .requiredFeatures = requiredFeatures.data(),
-#ifdef WEBGPU_DAWN
        .requiredLimits = &requiredLimits,
-#endif
    });
    deviceDescriptor.SetUncapturedErrorCallback(
        [](const wgpu::Device& device, wgpu::ErrorType type, wgpu::StringView message) {