Split out display_list/shader_info

This commit is contained in:
Luke Street 2025-04-18 21:52:38 -06:00
parent 357ecba0ae
commit c4d91f18a1
16 changed files with 716 additions and 673 deletions

View File

@ -4,6 +4,8 @@ add_library(aurora_gx STATIC
lib/gfx/gx.cpp
lib/gfx/gx_shader.cpp
lib/gfx/texture_convert.cpp
lib/gfx/display_list.cpp
lib/gfx/shader_info.cpp
lib/gfx/model/shader.cpp
lib/dolphin/gx/GXBump.cpp
lib/dolphin/gx/GXCull.cpp

View File

@ -751,6 +751,13 @@ typedef enum {
GX_MAX_TLUTFMT,
} GXTlutFmt;
typedef enum _GXTexCacheSize {
GX_TEXCACHE_32K,
GX_TEXCACHE_128K,
GX_TEXCACHE_512K,
GX_TEXCACHE_NONE
} GXTexCacheSize;
#ifdef __cplusplus
}
#endif

View File

@ -93,6 +93,14 @@ typedef struct {
s16 a;
} GXColorS10;
typedef struct _GXTexRegion {
u32 dummy[4];
} GXTexRegion;
typedef struct _GXTlutRegion {
u32 dummy[4];
} GXTlutRegion;
#ifdef __cplusplus
}
#endif

View File

@ -8,6 +8,8 @@
extern "C" {
#endif
typedef GXTexRegion* (*GXTexRegionCallback)(const GXTexObj* obj, GXTexMapID id);
void GXInitTexObj(GXTexObj* obj, const void* data, u16 width, u16 height, u32 format, GXTexWrapMode wrapS,
GXTexWrapMode wrapT, GXBool mipmap);
void GXInitTexObjCI(GXTexObj* obj, const void* data, u16 width, u16 height, GXCITexFmt format, GXTexWrapMode wrapS,
@ -21,6 +23,11 @@ void GXInvalidateTexAll();
void GXInitTexObjWrapMode(GXTexObj* obj, GXTexWrapMode s, GXTexWrapMode t);
void GXInitTlutObj(GXTlutObj* obj, const void* data, GXTlutFmt format, u16 entries);
void GXLoadTlut(const GXTlutObj* obj, GXTlut idx);
void GXSetTexCoordScaleManually(GXTexCoordID coord, GXBool enable, u16 ss, u16 ts);
void GXInitTexCacheRegion(GXTexRegion* region, GXBool is_32b_mipmap, u32 tmem_even, GXTexCacheSize size_even,
u32 tmem_odd, GXTexCacheSize size_odd);
GXTexRegionCallback GXSetTexRegionCallback(GXTexRegionCallback callback);
void GXInvalidateTexRegion(const GXTexRegion* region);
#ifdef __cplusplus
}

View File

@ -77,7 +77,9 @@ void GXSetTevOrder(GXTevStageID id, GXTexCoordID tcid, GXTexMapID tmid, GXChanne
update_gx_state(stage.channelId, cid);
}
// TODO GXSetZTexture
void GXSetZTexture(GXZTexOp op, GXTexFmt fmt, u32 bias) {
// TODO
}
void GXSetNumTevStages(u8 num) { update_gx_state(g_gxState.numTevStages, num); }

View File

@ -233,7 +233,9 @@ void GXInvalidateTexAll() {
// TODO GXSetTexRegionCallback
// TODO GXSetTlutRegionCallback
// TODO GXLoadTexObjPreLoaded
// TODO GXSetTexCoordScaleManually
void GXSetTexCoordScaleManually(GXTexCoordID coord, GXBool enable, u16 ss, u16 ts) {
// TODO
}
// TODO GXSetTexCoordCylWrap
// TODO GXSetTexCoordBias
}

View File

@ -3,6 +3,7 @@
#include "aurora/math.hpp"
#include "../../gfx/model/shader.hpp"
#include "../../gfx/gx_fmt.hpp"
#include "../../gfx/shader_info.hpp"
#include <cstring>
#include <optional>

288
lib/gfx/display_list.cpp Normal file
View File

@ -0,0 +1,288 @@
#include "display_list.hpp"
#include "gx.hpp"
#include "gx_fmt.hpp"
namespace aurora::gfx::gx {
static Module Log("aurora::gfx::model");
struct DisplayListCache {
ByteBuffer vtxBuf;
ByteBuffer idxBuf;
GXVtxFmt fmt;
DisplayListCache(ByteBuffer&& vtxBuf, ByteBuffer&& idxBuf, GXVtxFmt fmt)
: vtxBuf(std::move(vtxBuf)), idxBuf(std::move(idxBuf)), fmt(fmt) {}
};
static absl::flat_hash_map<HashType, DisplayListCache> sCachedDisplayLists;
static u32 prepare_vtx_buffer(ByteBuffer& buf, GXVtxFmt vtxfmt, const u8* ptr, u16 vtxCount) {
using gx::g_gxState;
struct {
u8 count;
GXCompType type;
} attrArrays[GX_VA_MAX_ATTR] = {};
u32 vtxSize = 0;
u32 outVtxSize = 0;
// Calculate attribute offsets and vertex size
for (int attr = 0; attr < GX_VA_MAX_ATTR; attr++) {
const auto& attrFmt = g_gxState.vtxFmts[vtxfmt].attrs[attr];
switch (g_gxState.vtxDesc[attr]) {
DEFAULT_FATAL("unhandled attribute type {}", g_gxState.vtxDesc[attr]);
case GX_NONE:
break;
case GX_DIRECT:
#define COMBINE(val1, val2, val3) (((val1) << 16) | ((val2) << 8) | (val3))
switch (COMBINE(attr, attrFmt.cnt, attrFmt.type)) {
DEFAULT_FATAL("not handled: attr {}, cnt {}, type {}", attr, attrFmt.cnt, attrFmt.type);
case COMBINE(GX_VA_POS, GX_POS_XYZ, GX_F32):
case COMBINE(GX_VA_NRM, GX_NRM_XYZ, GX_F32):
attrArrays[attr].count = 3;
attrArrays[attr].type = GX_F32;
vtxSize += 12;
outVtxSize += 12;
break;
case COMBINE(GX_VA_POS, GX_POS_XYZ, GX_S16):
case COMBINE(GX_VA_NRM, GX_NRM_XYZ, GX_S16):
attrArrays[attr].count = 3;
attrArrays[attr].type = GX_S16;
vtxSize += 6;
outVtxSize += 12;
break;
case COMBINE(GX_VA_TEX0, GX_TEX_ST, GX_F32):
case COMBINE(GX_VA_TEX1, GX_TEX_ST, GX_F32):
case COMBINE(GX_VA_TEX2, GX_TEX_ST, GX_F32):
case COMBINE(GX_VA_TEX3, GX_TEX_ST, GX_F32):
case COMBINE(GX_VA_TEX4, GX_TEX_ST, GX_F32):
case COMBINE(GX_VA_TEX5, GX_TEX_ST, GX_F32):
case COMBINE(GX_VA_TEX6, GX_TEX_ST, GX_F32):
case COMBINE(GX_VA_TEX7, GX_TEX_ST, GX_F32):
attrArrays[attr].count = 2;
attrArrays[attr].type = GX_F32;
vtxSize += 8;
outVtxSize += 8;
break;
case COMBINE(GX_VA_TEX0, GX_TEX_ST, GX_S16):
case COMBINE(GX_VA_TEX1, GX_TEX_ST, GX_S16):
case COMBINE(GX_VA_TEX2, GX_TEX_ST, GX_S16):
case COMBINE(GX_VA_TEX3, GX_TEX_ST, GX_S16):
case COMBINE(GX_VA_TEX4, GX_TEX_ST, GX_S16):
case COMBINE(GX_VA_TEX5, GX_TEX_ST, GX_S16):
case COMBINE(GX_VA_TEX6, GX_TEX_ST, GX_S16):
case COMBINE(GX_VA_TEX7, GX_TEX_ST, GX_S16):
attrArrays[attr].count = 2;
attrArrays[attr].type = GX_S16;
vtxSize += 4;
outVtxSize += 8;
break;
case COMBINE(GX_VA_CLR0, GX_CLR_RGBA, GX_RGBA8):
case COMBINE(GX_VA_CLR1, GX_CLR_RGBA, GX_RGBA8):
attrArrays[attr].count = 4;
attrArrays[attr].type = GX_RGBA8;
vtxSize += 4;
outVtxSize += 16;
break;
}
#undef COMBINE
break;
case GX_INDEX8:
++vtxSize;
outVtxSize += 2;
break;
case GX_INDEX16:
vtxSize += 2;
outVtxSize += 2;
break;
}
}
// Align to 4
int rem = outVtxSize % 4;
int padding = 0;
if (rem != 0) {
padding = 4 - rem;
outVtxSize += padding;
}
// Build vertex buffer
buf.reserve_extra(vtxCount * outVtxSize);
std::array<f32, 4> out{};
for (u32 v = 0; v < vtxCount; ++v) {
for (int attr = 0; attr < GX_VA_MAX_ATTR; attr++) {
if (g_gxState.vtxDesc[attr] == GX_INDEX8) {
buf.append(static_cast<u16>(*ptr));
++ptr;
} else if (g_gxState.vtxDesc[attr] == GX_INDEX16) {
buf.append(bswap(*reinterpret_cast<const u16*>(ptr)));
ptr += 2;
}
if (g_gxState.vtxDesc[attr] != GX_DIRECT) {
continue;
}
const auto& attrFmt = g_gxState.vtxFmts[vtxfmt].attrs[attr];
u8 count = attrArrays[attr].count;
switch (attrArrays[attr].type) {
case GX_U8:
for (int i = 0; i < count; ++i) {
const auto value = reinterpret_cast<const u8*>(ptr)[i];
out[i] = static_cast<f32>(value) / static_cast<f32>(1 << attrFmt.frac);
}
buf.append(out.data(), sizeof(f32) * count);
ptr += count;
break;
case GX_S8:
for (int i = 0; i < count; ++i) {
const auto value = reinterpret_cast<const s8*>(ptr)[i];
out[i] = static_cast<f32>(value) / static_cast<f32>(1 << attrFmt.frac);
}
buf.append(out.data(), sizeof(f32) * count);
ptr += count;
break;
case GX_U16:
for (int i = 0; i < count; ++i) {
const auto value = bswap(reinterpret_cast<const u16*>(ptr)[i]);
out[i] = static_cast<f32>(value) / static_cast<f32>(1 << attrFmt.frac);
}
buf.append(out.data(), sizeof(f32) * count);
ptr += count * sizeof(u16);
break;
case GX_S16:
for (int i = 0; i < count; ++i) {
const auto value = bswap(reinterpret_cast<const s16*>(ptr)[i]);
out[i] = static_cast<f32>(value) / static_cast<f32>(1 << attrFmt.frac);
}
buf.append(out.data(), sizeof(f32) * count);
ptr += count * sizeof(s16);
break;
case GX_F32:
for (int i = 0; i < count; ++i) {
out[i] = bswap(reinterpret_cast<const f32*>(ptr)[i]);
}
buf.append(out.data(), sizeof(f32) * count);
ptr += count * sizeof(f32);
break;
case GX_RGBA8:
out[0] = static_cast<f32>(ptr[0]) / 255.f;
out[1] = static_cast<f32>(ptr[1]) / 255.f;
out[2] = static_cast<f32>(ptr[2]) / 255.f;
out[3] = static_cast<f32>(ptr[3]) / 255.f;
buf.append(out.data(), sizeof(f32) * 4);
ptr += sizeof(u32);
break;
}
}
if (padding > 0) {
buf.append_zeroes(padding);
}
}
return vtxSize;
}
static u16 prepare_idx_buffer(ByteBuffer& buf, GXPrimitive prim, u16 vtxStart, u16 vtxCount) {
u16 numIndices = 0;
if (prim == GX_TRIANGLES) {
buf.reserve_extra(vtxCount * sizeof(u16));
for (u16 v = 0; v < vtxCount; ++v) {
const u16 idx = vtxStart + v;
buf.append(idx);
++numIndices;
}
} else if (prim == GX_TRIANGLEFAN) {
buf.reserve_extra(((u32(vtxCount) - 3) * 3 + 3) * sizeof(u16));
for (u16 v = 0; v < vtxCount; ++v) {
const u16 idx = vtxStart + v;
if (v < 3) {
buf.append(idx);
++numIndices;
continue;
}
buf.append(std::array{vtxStart, static_cast<u16>(idx - 1), idx});
numIndices += 3;
}
} else if (prim == GX_TRIANGLESTRIP) {
buf.reserve_extra(((static_cast<u32>(vtxCount) - 3) * 3 + 3) * sizeof(u16));
for (u16 v = 0; v < vtxCount; ++v) {
const u16 idx = vtxStart + v;
if (v < 3) {
buf.append(idx);
++numIndices;
continue;
}
if ((v & 1) == 0) {
buf.append(std::array{static_cast<u16>(idx - 2), static_cast<u16>(idx - 1), idx});
} else {
buf.append(std::array{static_cast<u16>(idx - 1), static_cast<u16>(idx - 2), idx});
}
numIndices += 3;
}
} else
UNLIKELY FATAL("unsupported primitive type {}", static_cast<u32>(prim));
return numIndices;
}
auto process_display_list(const u8* dlStart, u32 dlSize) -> DisplayListResult {
const auto hash = xxh3_hash_s(dlStart, dlSize, 0);
Range vertRange, idxRange;
u32 numIndices = 0;
GXVtxFmt fmt = GX_MAX_VTXFMT;
auto it = sCachedDisplayLists.find(hash);
if (it != sCachedDisplayLists.end()) {
const auto& cache = it->second;
numIndices = cache.idxBuf.size() / 2;
vertRange = push_verts(cache.vtxBuf.data(), cache.vtxBuf.size());
idxRange = push_indices(cache.idxBuf.data(), cache.idxBuf.size());
fmt = cache.fmt;
} else {
const u8* data = dlStart;
u32 pos = 0;
ByteBuffer vtxBuf;
ByteBuffer idxBuf;
u16 vtxStart = 0;
while (pos < dlSize) {
u8 cmd = data[pos++];
u8 opcode = cmd & GX_OPCODE_MASK;
switch (opcode) {
DEFAULT_FATAL("unimplemented opcode: {}", opcode);
case GX_NOP:
continue;
case GX_DRAW_QUADS:
case GX_DRAW_TRIANGLES:
case GX_DRAW_TRIANGLE_STRIP:
case GX_DRAW_TRIANGLE_FAN: {
const auto prim = static_cast<GXPrimitive>(opcode);
const auto newFmt = static_cast<GXVtxFmt>(cmd & GX_VAT_MASK);
if (fmt != GX_MAX_VTXFMT && fmt != newFmt) {
FATAL("Vertex format changed mid-display list: {} -> {}", fmt, newFmt);
}
fmt = newFmt;
u16 vtxCount = bswap(*reinterpret_cast<const u16*>(data + pos));
pos += 2;
pos += vtxCount * prepare_vtx_buffer(vtxBuf, fmt, data + pos, vtxCount);
numIndices += prepare_idx_buffer(idxBuf, prim, vtxStart, vtxCount);
vtxStart += vtxCount;
break;
}
case GX_DRAW_LINES:
case GX_DRAW_LINE_STRIP:
case GX_DRAW_POINTS:
FATAL("unimplemented prim type: {}", opcode);
break;
}
}
vertRange = push_verts(vtxBuf.data(), vtxBuf.size());
idxRange = push_indices(idxBuf.data(), idxBuf.size());
sCachedDisplayLists.try_emplace(hash, std::move(vtxBuf), std::move(idxBuf), fmt);
}
return {
.vertRange = vertRange,
.idxRange = idxRange,
.numIndices = numIndices,
.fmt = fmt,
};
}
} // namespace aurora::gfx::gx

14
lib/gfx/display_list.hpp Normal file
View File

@ -0,0 +1,14 @@
#pragma once
#include "gx.hpp"
namespace aurora::gfx::gx {
struct DisplayListResult {
Range vertRange;
Range idxRange;
u32 numIndices;
GXVtxFmt fmt;
};
auto process_display_list(const u8* dlStart, u32 dlSize) -> DisplayListResult;
}; // namespace aurora::gfx::gx

View File

@ -316,104 +316,6 @@ void populate_pipeline_config(PipelineConfig& config, GXPrimitive primitive, GXV
};
}
Range build_uniform(const ShaderInfo& info) noexcept {
auto [buf, range] = map_uniform(info.uniformSize);
{
buf.append(g_gxState.pnMtx[g_gxState.currentPnMtx]);
buf.append(g_gxState.proj);
}
for (int i = 0; i < info.loadsTevReg.size(); ++i) {
if (!info.loadsTevReg.test(i)) {
continue;
}
buf.append(g_gxState.colorRegs[i]);
}
if (info.lightingEnabled) {
// Lights
static_assert(sizeof(g_gxState.lights) == 80 * GX::MaxLights);
buf.append(g_gxState.lights);
// Light state for all channels
for (int i = 0; i < 4; ++i) {
buf.append<u32>(g_gxState.colorChannelState[i].lightMask.to_ulong());
}
}
for (int i = 0; i < info.sampledColorChannels.size(); ++i) {
if (!info.sampledColorChannels.test(i)) {
continue;
}
const auto& ccc = g_gxState.colorChannelConfig[i];
const auto& ccs = g_gxState.colorChannelState[i];
if (ccc.lightingEnabled && ccc.ambSrc == GX_SRC_REG) {
buf.append(ccs.ambColor);
}
if (ccc.matSrc == GX_SRC_REG) {
buf.append(ccs.matColor);
}
const auto& ccca = g_gxState.colorChannelConfig[i + GX_ALPHA0];
const auto& ccsa = g_gxState.colorChannelState[i + GX_ALPHA0];
if (ccca.lightingEnabled && ccca.ambSrc == GX_SRC_REG) {
buf.append(ccsa.ambColor);
}
if (ccca.matSrc == GX_SRC_REG) {
buf.append(ccsa.matColor);
}
}
for (int i = 0; i < info.sampledKColors.size(); ++i) {
if (!info.sampledKColors.test(i)) {
continue;
}
buf.append(g_gxState.kcolors[i]);
}
for (int i = 0; i < info.usesTexMtx.size(); ++i) {
if (!info.usesTexMtx.test(i)) {
continue;
}
switch (info.texMtxTypes[i]) {
DEFAULT_FATAL("unhandled tex mtx type {}", underlying(info.texMtxTypes[i]));
case GX_TG_MTX2x4:
if (std::holds_alternative<Mat2x4<float>>(g_gxState.texMtxs[i])) {
buf.append(std::get<Mat2x4<float>>(g_gxState.texMtxs[i]));
} else
UNLIKELY FATAL("expected 2x4 mtx in idx {}", i);
break;
case GX_TG_MTX3x4:
if (std::holds_alternative<Mat3x4<float>>(g_gxState.texMtxs[i])) {
buf.append(std::get<Mat3x4<float>>(g_gxState.texMtxs[i]));
} else
UNLIKELY FATAL("expected 3x4 mtx in idx {}", i);
break;
}
}
for (int i = 0; i < info.usesPTTexMtx.size(); ++i) {
if (!info.usesPTTexMtx.test(i)) {
continue;
}
buf.append(g_gxState.ptTexMtxs[i]);
}
if (info.usesFog) {
const auto& state = g_gxState.fog;
Fog fog{.color = state.color};
if (state.nearZ != state.farZ && state.startZ != state.endZ) {
const float depthRange = state.farZ - state.nearZ;
const float fogRange = state.endZ - state.startZ;
fog.a = (state.farZ * state.nearZ) / (depthRange * fogRange);
fog.b = state.farZ / depthRange;
fog.c = state.startZ / fogRange;
}
buf.append(fog);
}
for (int i = 0; i < info.sampledTextures.size(); ++i) {
if (!info.sampledTextures.test(i)) {
continue;
}
const auto& tex = get_texture(static_cast<GXTexMapID>(i));
CHECK(tex, "unbound texture {}", i);
buf.append(tex.texObj.lodBias);
}
g_gxState.stateDirty = false;
return range;
}
static absl::flat_hash_map<u32, wgpu::BindGroupLayout> sUniformBindGroupLayouts;
static absl::flat_hash_map<u32, std::pair<wgpu::BindGroupLayout, wgpu::BindGroupLayout>> sTextureBindGroupLayouts;

View File

@ -429,10 +429,7 @@ void populate_pipeline_config(PipelineConfig& config, GXPrimitive primitive, GXV
wgpu::RenderPipeline build_pipeline(const PipelineConfig& config, const ShaderInfo& info,
ArrayRef<wgpu::VertexBufferLayout> vtxBuffers, wgpu::ShaderModule shader,
const char* label) noexcept;
ShaderInfo build_shader_info(const ShaderConfig& config) noexcept;
wgpu::ShaderModule build_shader(const ShaderConfig& config, const ShaderInfo& info) noexcept;
// Range build_vertex_buffer(const GXShaderInfo& info) noexcept;
Range build_uniform(const ShaderInfo& info) noexcept;
GXBindGroupLayouts build_bind_group_layouts(const ShaderInfo& info, const ShaderConfig& config) noexcept;
GXBindGroups build_bind_groups(const ShaderInfo& info, const ShaderConfig& config,
const BindGroupRanges& ranges) noexcept;

View File

@ -38,102 +38,6 @@ static inline std::string_view chan_comp(GXTevColorChan chan) noexcept {
}
}
static void color_arg_reg_info(GXTevColorArg arg, const TevStage& stage, ShaderInfo& info) {
switch (arg) {
case GX_CC_CPREV:
case GX_CC_APREV:
if (!info.writesTevReg.test(GX_TEVPREV)) {
info.loadsTevReg.set(GX_TEVPREV);
}
break;
case GX_CC_C0:
case GX_CC_A0:
if (!info.writesTevReg.test(GX_TEVREG0)) {
info.loadsTevReg.set(GX_TEVREG0);
}
break;
case GX_CC_C1:
case GX_CC_A1:
if (!info.writesTevReg.test(GX_TEVREG1)) {
info.loadsTevReg.set(GX_TEVREG1);
}
break;
case GX_CC_C2:
case GX_CC_A2:
if (!info.writesTevReg.test(GX_TEVREG2)) {
info.loadsTevReg.set(GX_TEVREG2);
}
break;
case GX_CC_TEXC:
case GX_CC_TEXA:
CHECK(stage.texCoordId != GX_TEXCOORD_NULL, "tex coord not bound");
CHECK(stage.texMapId != GX_TEXMAP_NULL, "tex map not bound");
info.sampledTexCoords.set(stage.texCoordId);
info.sampledTextures.set(stage.texMapId);
break;
case GX_CC_RASC:
case GX_CC_RASA:
if (stage.channelId >= GX_COLOR0A0 && stage.channelId <= GX_COLOR1A1) {
info.sampledColorChannels.set(stage.channelId - GX_COLOR0A0);
}
break;
case GX_CC_KONST:
switch (stage.kcSel) {
case GX_TEV_KCSEL_K0:
case GX_TEV_KCSEL_K0_R:
case GX_TEV_KCSEL_K0_G:
case GX_TEV_KCSEL_K0_B:
case GX_TEV_KCSEL_K0_A:
info.sampledKColors.set(0);
break;
case GX_TEV_KCSEL_K1:
case GX_TEV_KCSEL_K1_R:
case GX_TEV_KCSEL_K1_G:
case GX_TEV_KCSEL_K1_B:
case GX_TEV_KCSEL_K1_A:
info.sampledKColors.set(1);
break;
case GX_TEV_KCSEL_K2:
case GX_TEV_KCSEL_K2_R:
case GX_TEV_KCSEL_K2_G:
case GX_TEV_KCSEL_K2_B:
case GX_TEV_KCSEL_K2_A:
info.sampledKColors.set(2);
break;
case GX_TEV_KCSEL_K3:
case GX_TEV_KCSEL_K3_R:
case GX_TEV_KCSEL_K3_G:
case GX_TEV_KCSEL_K3_B:
case GX_TEV_KCSEL_K3_A:
info.sampledKColors.set(3);
break;
default:
break;
}
break;
default:
break;
}
}
static bool formatHasAlpha(u32 format) {
switch (format) {
case GX_TF_IA4:
case GX_TF_IA8:
case GX_TF_RGB5A3:
case GX_TF_RGBA8:
case GX_TF_CMPR:
case GX_CTF_RA4:
case GX_CTF_RA8:
case GX_CTF_YUVA8:
case GX_CTF_A8:
case GX_TF_RGBA8_PC:
return true;
default:
return false;
}
}
static std::string color_arg_reg(GXTevColorArg arg, size_t stageIdx, const ShaderConfig& config,
const TevStage& stage) {
switch (arg) {
@ -260,74 +164,6 @@ static std::string color_arg_reg(GXTevColorArg arg, size_t stageIdx, const Shade
}
}
static void alpha_arg_reg_info(GXTevAlphaArg arg, const TevStage& stage, ShaderInfo& info) {
switch (arg) {
case GX_CA_APREV:
if (!info.writesTevReg.test(GX_TEVPREV)) {
info.loadsTevReg.set(GX_TEVPREV);
}
break;
case GX_CA_A0:
if (!info.writesTevReg.test(GX_TEVREG0)) {
info.loadsTevReg.set(GX_TEVREG0);
}
break;
case GX_CA_A1:
if (!info.writesTevReg.test(GX_TEVREG1)) {
info.loadsTevReg.set(GX_TEVREG1);
}
break;
case GX_CA_A2:
if (!info.writesTevReg.test(GX_TEVREG2)) {
info.loadsTevReg.set(GX_TEVREG2);
}
break;
case GX_CA_TEXA:
CHECK(stage.texCoordId != GX_TEXCOORD_NULL, "tex coord not bound");
CHECK(stage.texMapId != GX_TEXMAP_NULL, "tex map not bound");
info.sampledTexCoords.set(stage.texCoordId);
info.sampledTextures.set(stage.texMapId);
break;
case GX_CA_RASA:
if (stage.channelId >= GX_COLOR0A0 && stage.channelId <= GX_COLOR1A1) {
info.sampledColorChannels.set(stage.channelId - GX_COLOR0A0);
}
break;
case GX_CA_KONST:
switch (stage.kaSel) {
case GX_TEV_KASEL_K0_R:
case GX_TEV_KASEL_K0_G:
case GX_TEV_KASEL_K0_B:
case GX_TEV_KASEL_K0_A:
info.sampledKColors.set(0);
break;
case GX_TEV_KASEL_K1_R:
case GX_TEV_KASEL_K1_G:
case GX_TEV_KASEL_K1_B:
case GX_TEV_KASEL_K1_A:
info.sampledKColors.set(1);
break;
case GX_TEV_KASEL_K2_R:
case GX_TEV_KASEL_K2_G:
case GX_TEV_KASEL_K2_B:
case GX_TEV_KASEL_K2_A:
info.sampledKColors.set(2);
break;
case GX_TEV_KASEL_K3_R:
case GX_TEV_KASEL_K3_G:
case GX_TEV_KASEL_K3_B:
case GX_TEV_KASEL_K3_A:
info.sampledKColors.set(3);
break;
default:
break;
}
break;
default:
break;
}
}
static std::string alpha_arg_reg(GXTevAlphaArg arg, size_t stageIdx, const ShaderConfig& config,
const TevStage& stage) {
switch (arg) {
@ -549,109 +385,6 @@ constexpr std::array<std::string_view, MaxVtxAttr> VtxAttributeNames{
"pos_mtx_array", "nrm_mtx_array", "tex_mtx_array", "light_array", "nbt",
};
ShaderInfo build_shader_info(const ShaderConfig& config) noexcept {
// const auto hash = xxh3_hash(config);
// const auto it = g_gxCachedShaders.find(hash);
// if (it != g_gxCachedShaders.end()) {
// return it->second.second;
// }
ShaderInfo info{
.uniformSize = sizeof(PnMtx) + sizeof(Mat4x4<float>), // pos_mtx, nrm_mtx, proj
};
for (int i = 0; i < config.tevStageCount; ++i) {
const auto& stage = config.tevStages[i];
// Color pass
color_arg_reg_info(stage.colorPass.a, stage, info);
color_arg_reg_info(stage.colorPass.b, stage, info);
color_arg_reg_info(stage.colorPass.c, stage, info);
color_arg_reg_info(stage.colorPass.d, stage, info);
info.writesTevReg.set(stage.colorOp.outReg);
// Alpha pass
alpha_arg_reg_info(stage.alphaPass.a, stage, info);
alpha_arg_reg_info(stage.alphaPass.b, stage, info);
alpha_arg_reg_info(stage.alphaPass.c, stage, info);
alpha_arg_reg_info(stage.alphaPass.d, stage, info);
if (!info.writesTevReg.test(stage.alphaOp.outReg)) {
// If we're writing alpha to a register that's not been
// written to in the shader, load from uniform buffer
info.loadsTevReg.set(stage.alphaOp.outReg);
info.writesTevReg.set(stage.alphaOp.outReg);
}
}
info.uniformSize += info.loadsTevReg.count() * sizeof(Vec4<float>);
for (int i = 0; i < info.sampledColorChannels.size(); ++i) {
if (info.sampledColorChannels.test(i)) {
const auto& cc = config.colorChannels[i];
const auto& cca = config.colorChannels[i + GX_ALPHA0];
if (cc.lightingEnabled || cca.lightingEnabled) {
info.lightingEnabled = true;
}
}
}
if (info.lightingEnabled) {
// Lights + light state for all channels
info.uniformSize += 16 + sizeof(Light) * GX::MaxLights;
}
for (int i = 0; i < info.sampledColorChannels.size(); ++i) {
if (info.sampledColorChannels.test(i)) {
const auto& cc = config.colorChannels[i];
if (cc.lightingEnabled && cc.ambSrc == GX_SRC_REG) {
info.uniformSize += sizeof(Vec4<float>);
}
if (cc.matSrc == GX_SRC_REG) {
info.uniformSize += sizeof(Vec4<float>);
}
const auto& cca = config.colorChannels[i + GX_ALPHA0];
if (cca.lightingEnabled && cca.ambSrc == GX_SRC_REG) {
info.uniformSize += sizeof(Vec4<float>);
}
if (cca.matSrc == GX_SRC_REG) {
info.uniformSize += sizeof(Vec4<float>);
}
}
}
info.uniformSize += info.sampledKColors.count() * sizeof(Vec4<float>);
for (int i = 0; i < info.sampledTexCoords.size(); ++i) {
if (!info.sampledTexCoords.test(i)) {
continue;
}
const auto& tcg = config.tcgs[i];
if (tcg.mtx != GX_IDENTITY) {
u32 texMtxIdx = (tcg.mtx - GX_TEXMTX0) / 3;
info.usesTexMtx.set(texMtxIdx);
info.texMtxTypes[texMtxIdx] = tcg.type;
}
if (tcg.postMtx != GX_PTIDENTITY) {
u32 postMtxIdx = (tcg.postMtx - GX_PTTEXMTX0) / 3;
info.usesPTTexMtx.set(postMtxIdx);
}
}
for (int i = 0; i < info.usesTexMtx.size(); ++i) {
if (info.usesTexMtx.test(i)) {
switch (info.texMtxTypes[i]) {
case GX_TG_MTX2x4:
info.uniformSize += sizeof(Mat2x4<float>);
break;
case GX_TG_MTX3x4:
info.uniformSize += sizeof(Mat3x4<float>);
break;
default:
break;
}
}
}
info.uniformSize += info.usesPTTexMtx.count() * sizeof(Mat3x4<float>);
if (config.fogType != GX_FOG_NONE) {
info.usesFog = true;
info.uniformSize += sizeof(Fog);
}
info.uniformSize += info.sampledTextures.count() * sizeof(u32);
info.uniformSize = align_uniform(info.uniformSize);
return info;
}
struct StorageLoadResult {
std::string attrLoad;
std::string_view arrType;
@ -947,6 +680,8 @@ wgpu::ShaderModule build_shader(const ShaderConfig& config, const ShaderInfo& in
vtxInAttrs += fmt::format("@location({}) in_clr{}: vec4f", locIdx++, attr - GX_VA_CLR0);
} else if (attr >= GX_VA_TEX0 && attr <= GX_VA_TEX7) {
vtxInAttrs += fmt::format("@location({}) in_tex{}_uv: vec2f", locIdx++, attr - GX_VA_TEX0);
} else {
FATAL("unhandled vtx attr {}", underlying(attr));
}
}
vtxXfrAttrsPre += fmt::format(
@ -1416,7 +1151,7 @@ fn fetch_i16_3(p: ptr<storage, array<i32>>, idx: u32, frac: u32) -> vec3<f32> {{
var o0 = select(extractBits(v0, 0, 16), extractBits(v0, 16, 16), r);
var o1 = select(extractBits(v0, 16, 16), extractBits(v1, 0, 16), r);
var o2 = select(extractBits(v1, 0, 16), extractBits(v1, 16, 16), r);
return vec3<f32>(f32(o0), f32(o1), f32(o2)) / f32(1 << frac);
return vec3<f32>(f32(o0), f32(o1), f32(o2)) / f32(1u << frac);
}}
{10}
struct Uniform {{

View File

@ -2,298 +2,20 @@
#include "../../webgpu/gpu.hpp"
#include "../gx_fmt.hpp"
#include "../display_list.hpp"
#include "../shader_info.hpp"
#include <absl/container/flat_hash_map.h>
namespace aurora::gfx::model {
static Module Log("aurora::gfx::model");
using IndexedAttrs = std::array<bool, GX_VA_MAX_ATTR>;
struct DisplayListCache {
ByteBuffer vtxBuf;
ByteBuffer idxBuf;
IndexedAttrs indexedAttrs;
GXVtxFmt fmt;
DisplayListCache(ByteBuffer&& vtxBuf, ByteBuffer&& idxBuf, IndexedAttrs indexedAttrs, GXVtxFmt fmt)
: vtxBuf(std::move(vtxBuf)), idxBuf(std::move(idxBuf)), indexedAttrs(indexedAttrs), fmt(fmt) {}
};
static absl::flat_hash_map<HashType, DisplayListCache> sCachedDisplayLists;
static u32 prepare_vtx_buffer(ByteBuffer& buf, GXVtxFmt vtxfmt, const u8* ptr, u16 vtxCount,
IndexedAttrs& indexedAttrs) {
using gx::g_gxState;
struct {
u8 count;
GXCompType type;
} attrArrays[GX_VA_MAX_ATTR] = {};
u32 vtxSize = 0;
u32 outVtxSize = 0;
// Calculate attribute offsets and vertex size
for (int attr = 0; attr < GX_VA_MAX_ATTR; attr++) {
const auto& attrFmt = g_gxState.vtxFmts[vtxfmt].attrs[attr];
switch (g_gxState.vtxDesc[attr]) {
DEFAULT_FATAL("unhandled attribute type {}", g_gxState.vtxDesc[attr]);
case GX_NONE:
break;
case GX_DIRECT:
#define COMBINE(val1, val2, val3) (((val1) << 16) | ((val2) << 8) | (val3))
switch (COMBINE(attr, attrFmt.cnt, attrFmt.type)) {
DEFAULT_FATAL("not handled: attr {}, cnt {}, type {}", attr, attrFmt.cnt, attrFmt.type);
case COMBINE(GX_VA_POS, GX_POS_XYZ, GX_F32):
case COMBINE(GX_VA_NRM, GX_NRM_XYZ, GX_F32):
attrArrays[attr].count = 3;
attrArrays[attr].type = GX_F32;
vtxSize += 12;
outVtxSize += 12;
break;
case COMBINE(GX_VA_POS, GX_POS_XYZ, GX_S16):
case COMBINE(GX_VA_NRM, GX_NRM_XYZ, GX_S16):
attrArrays[attr].count = 3;
attrArrays[attr].type = GX_S16;
vtxSize += 6;
outVtxSize += 12;
break;
case COMBINE(GX_VA_TEX0, GX_TEX_ST, GX_F32):
case COMBINE(GX_VA_TEX1, GX_TEX_ST, GX_F32):
case COMBINE(GX_VA_TEX2, GX_TEX_ST, GX_F32):
case COMBINE(GX_VA_TEX3, GX_TEX_ST, GX_F32):
case COMBINE(GX_VA_TEX4, GX_TEX_ST, GX_F32):
case COMBINE(GX_VA_TEX5, GX_TEX_ST, GX_F32):
case COMBINE(GX_VA_TEX6, GX_TEX_ST, GX_F32):
case COMBINE(GX_VA_TEX7, GX_TEX_ST, GX_F32):
attrArrays[attr].count = 2;
attrArrays[attr].type = GX_F32;
vtxSize += 8;
outVtxSize += 8;
break;
case COMBINE(GX_VA_TEX0, GX_TEX_ST, GX_S16):
case COMBINE(GX_VA_TEX1, GX_TEX_ST, GX_S16):
case COMBINE(GX_VA_TEX2, GX_TEX_ST, GX_S16):
case COMBINE(GX_VA_TEX3, GX_TEX_ST, GX_S16):
case COMBINE(GX_VA_TEX4, GX_TEX_ST, GX_S16):
case COMBINE(GX_VA_TEX5, GX_TEX_ST, GX_S16):
case COMBINE(GX_VA_TEX6, GX_TEX_ST, GX_S16):
case COMBINE(GX_VA_TEX7, GX_TEX_ST, GX_S16):
attrArrays[attr].count = 2;
attrArrays[attr].type = GX_S16;
vtxSize += 4;
outVtxSize += 8;
break;
case COMBINE(GX_VA_CLR0, GX_CLR_RGBA, GX_RGBA8):
case COMBINE(GX_VA_CLR1, GX_CLR_RGBA, GX_RGBA8):
attrArrays[attr].count = 4;
attrArrays[attr].type = GX_RGBA8;
vtxSize += 4;
outVtxSize += 16;
break;
}
#undef COMBINE
break;
case GX_INDEX8:
++vtxSize;
outVtxSize += 2;
indexedAttrs[attr] = true;
break;
case GX_INDEX16:
vtxSize += 2;
outVtxSize += 2;
indexedAttrs[attr] = true;
break;
}
}
// Align to 4
int rem = outVtxSize % 4;
int padding = 0;
if (rem != 0) {
padding = 4 - rem;
outVtxSize += padding;
}
// Build vertex buffer
buf.reserve_extra(vtxCount * outVtxSize);
std::array<f32, 4> out{};
for (u32 v = 0; v < vtxCount; ++v) {
for (int attr = 0; attr < GX_VA_MAX_ATTR; attr++) {
if (g_gxState.vtxDesc[attr] == GX_INDEX8) {
buf.append(static_cast<u16>(*ptr));
++ptr;
} else if (g_gxState.vtxDesc[attr] == GX_INDEX16) {
buf.append(bswap(*reinterpret_cast<const u16*>(ptr)));
ptr += 2;
}
if (g_gxState.vtxDesc[attr] != GX_DIRECT) {
continue;
}
const auto& attrFmt = g_gxState.vtxFmts[vtxfmt].attrs[attr];
u8 count = attrArrays[attr].count;
switch (attrArrays[attr].type) {
case GX_U8:
for (int i = 0; i < count; ++i) {
const auto value = reinterpret_cast<const u8*>(ptr)[i];
out[i] = static_cast<f32>(value) / static_cast<f32>(1 << attrFmt.frac);
}
buf.append(out.data(), sizeof(f32) * count);
ptr += count;
break;
case GX_S8:
for (int i = 0; i < count; ++i) {
const auto value = reinterpret_cast<const s8*>(ptr)[i];
out[i] = static_cast<f32>(value) / static_cast<f32>(1 << attrFmt.frac);
}
buf.append(out.data(), sizeof(f32) * count);
ptr += count;
break;
case GX_U16:
for (int i = 0; i < count; ++i) {
const auto value = bswap(reinterpret_cast<const u16*>(ptr)[i]);
out[i] = static_cast<f32>(value) / static_cast<f32>(1 << attrFmt.frac);
}
buf.append(out.data(), sizeof(f32) * count);
ptr += count * sizeof(u16);
break;
case GX_S16:
for (int i = 0; i < count; ++i) {
const auto value = bswap(reinterpret_cast<const s16*>(ptr)[i]);
out[i] = static_cast<f32>(value) / static_cast<f32>(1 << attrFmt.frac);
}
buf.append(out.data(), sizeof(f32) * count);
ptr += count * sizeof(s16);
break;
case GX_F32:
for (int i = 0; i < count; ++i) {
out[i] = bswap(reinterpret_cast<const f32*>(ptr)[i]);
}
buf.append(out.data(), sizeof(f32) * count);
ptr += count * sizeof(f32);
break;
case GX_RGBA8:
out[0] = static_cast<f32>(ptr[0]) / 255.f;
out[1] = static_cast<f32>(ptr[1]) / 255.f;
out[2] = static_cast<f32>(ptr[2]) / 255.f;
out[3] = static_cast<f32>(ptr[3]) / 255.f;
buf.append(out.data(), sizeof(f32) * 4);
ptr += sizeof(u32);
break;
}
}
if (padding > 0) {
buf.append_zeroes(padding);
}
}
return vtxSize;
}
static u16 prepare_idx_buffer(ByteBuffer& buf, GXPrimitive prim, u16 vtxStart, u16 vtxCount) {
u16 numIndices = 0;
if (prim == GX_TRIANGLES) {
buf.reserve_extra(vtxCount * sizeof(u16));
for (u16 v = 0; v < vtxCount; ++v) {
const u16 idx = vtxStart + v;
buf.append(idx);
++numIndices;
}
} else if (prim == GX_TRIANGLEFAN) {
buf.reserve_extra(((u32(vtxCount) - 3) * 3 + 3) * sizeof(u16));
for (u16 v = 0; v < vtxCount; ++v) {
const u16 idx = vtxStart + v;
if (v < 3) {
buf.append(idx);
++numIndices;
continue;
}
buf.append(std::array{vtxStart, static_cast<u16>(idx - 1), idx});
numIndices += 3;
}
} else if (prim == GX_TRIANGLESTRIP) {
buf.reserve_extra(((static_cast<u32>(vtxCount) - 3) * 3 + 3) * sizeof(u16));
for (u16 v = 0; v < vtxCount; ++v) {
const u16 idx = vtxStart + v;
if (v < 3) {
buf.append(idx);
++numIndices;
continue;
}
if ((v & 1) == 0) {
buf.append(std::array{static_cast<u16>(idx - 2), static_cast<u16>(idx - 1), idx});
} else {
buf.append(std::array{static_cast<u16>(idx - 1), static_cast<u16>(idx - 2), idx});
}
numIndices += 3;
}
} else
UNLIKELY FATAL("unsupported primitive type {}", static_cast<u32>(prim));
return numIndices;
}
void queue_surface(const u8* dlStart, u32 dlSize) noexcept {
const auto hash = xxh3_hash_s(dlStart, dlSize, 0);
Range vertRange, idxRange;
u32 numIndices = 0;
IndexedAttrs indexedAttrs{};
GXVtxFmt fmt = GX_MAX_VTXFMT;
auto it = sCachedDisplayLists.find(hash);
if (it != sCachedDisplayLists.end()) {
const auto& cache = it->second;
numIndices = cache.idxBuf.size() / 2;
vertRange = push_verts(cache.vtxBuf.data(), cache.vtxBuf.size());
idxRange = push_indices(cache.idxBuf.data(), cache.idxBuf.size());
indexedAttrs = cache.indexedAttrs;
fmt = cache.fmt;
} else {
const u8* data = dlStart;
u32 pos = 0;
ByteBuffer vtxBuf;
ByteBuffer idxBuf;
u16 vtxStart = 0;
while (pos < dlSize) {
u8 cmd = data[pos++];
u8 opcode = cmd & GX_OPCODE_MASK;
switch (opcode) {
DEFAULT_FATAL("unimplemented opcode: {}", opcode);
case GX_NOP:
continue;
case GX_LOAD_BP_REG:
// TODO?
pos += 4;
break;
case GX_DRAW_QUADS:
case GX_DRAW_TRIANGLES:
case GX_DRAW_TRIANGLE_STRIP:
case GX_DRAW_TRIANGLE_FAN: {
const auto prim = static_cast<GXPrimitive>(opcode);
const auto newFmt = static_cast<GXVtxFmt>(cmd & GX_VAT_MASK);
if (fmt != GX_MAX_VTXFMT && fmt != newFmt) {
FATAL("Vertex format changed mid-display list: {} -> {}", fmt, newFmt);
}
fmt = newFmt;
u16 vtxCount = bswap(*reinterpret_cast<const u16*>(data + pos));
pos += 2;
pos += vtxCount * prepare_vtx_buffer(vtxBuf, fmt, data + pos, vtxCount, indexedAttrs);
numIndices += prepare_idx_buffer(idxBuf, prim, vtxStart, vtxCount);
vtxStart += vtxCount;
break;
}
case GX_DRAW_LINES:
case GX_DRAW_LINE_STRIP:
case GX_DRAW_POINTS:
FATAL("unimplemented prim type: {}", opcode);
break;
}
}
vertRange = push_verts(vtxBuf.data(), vtxBuf.size());
idxRange = push_indices(idxBuf.data(), idxBuf.size());
sCachedDisplayLists.try_emplace(hash, std::move(vtxBuf), std::move(idxBuf), indexedAttrs, fmt);
}
const auto result = aurora::gfx::gx::process_display_list(dlStart, dlSize);
gx::BindGroupRanges ranges{};
for (int i = 0; i < GX_VA_MAX_ATTR; ++i) {
if (!indexedAttrs[i]) {
if (gx::g_gxState.vtxDesc[i] != GX_INDEX8 && gx::g_gxState.vtxDesc[i] != GX_INDEX16) {
continue;
}
auto& array = gx::g_gxState.arrays[i];
@ -309,18 +31,18 @@ void queue_surface(const u8* dlStart, u32 dlSize) noexcept {
}
model::PipelineConfig config{};
populate_pipeline_config(config, GX_TRIANGLES, fmt);
populate_pipeline_config(config, GX_TRIANGLES, result.fmt);
const auto info = gx::build_shader_info(config.shaderConfig);
const auto bindGroups = gx::build_bind_groups(info, config.shaderConfig, ranges);
const auto pipeline = pipeline_ref(config);
push_draw_command(model::DrawData{
.pipeline = pipeline,
.vertRange = vertRange,
.idxRange = idxRange,
.vertRange = result.vertRange,
.idxRange = result.idxRange,
.dataRanges = ranges,
.uniformRange = build_uniform(info),
.indexCount = numIndices,
.indexCount = result.numIndices,
.bindGroups = bindGroups,
.dstAlpha = gx::g_gxState.dstAlpha,
});

345
lib/gfx/shader_info.cpp Normal file
View File

@ -0,0 +1,345 @@
#include "shader_info.hpp"
namespace aurora::gfx::gx {
namespace {
Module Log("aurora::gfx::gx");
void color_arg_reg_info(GXTevColorArg arg, const TevStage& stage, ShaderInfo& info) {
switch (arg) {
case GX_CC_CPREV:
case GX_CC_APREV:
if (!info.writesTevReg.test(GX_TEVPREV)) {
info.loadsTevReg.set(GX_TEVPREV);
}
break;
case GX_CC_C0:
case GX_CC_A0:
if (!info.writesTevReg.test(GX_TEVREG0)) {
info.loadsTevReg.set(GX_TEVREG0);
}
break;
case GX_CC_C1:
case GX_CC_A1:
if (!info.writesTevReg.test(GX_TEVREG1)) {
info.loadsTevReg.set(GX_TEVREG1);
}
break;
case GX_CC_C2:
case GX_CC_A2:
if (!info.writesTevReg.test(GX_TEVREG2)) {
info.loadsTevReg.set(GX_TEVREG2);
}
break;
case GX_CC_TEXC:
case GX_CC_TEXA:
CHECK(stage.texCoordId != GX_TEXCOORD_NULL, "tex coord not bound");
CHECK(stage.texMapId != GX_TEXMAP_NULL, "tex map not bound");
info.sampledTexCoords.set(stage.texCoordId);
info.sampledTextures.set(stage.texMapId);
break;
case GX_CC_RASC:
case GX_CC_RASA:
if (stage.channelId >= GX_COLOR0A0 && stage.channelId <= GX_COLOR1A1) {
info.sampledColorChannels.set(stage.channelId - GX_COLOR0A0);
}
break;
case GX_CC_KONST:
switch (stage.kcSel) {
case GX_TEV_KCSEL_K0:
case GX_TEV_KCSEL_K0_R:
case GX_TEV_KCSEL_K0_G:
case GX_TEV_KCSEL_K0_B:
case GX_TEV_KCSEL_K0_A:
info.sampledKColors.set(0);
break;
case GX_TEV_KCSEL_K1:
case GX_TEV_KCSEL_K1_R:
case GX_TEV_KCSEL_K1_G:
case GX_TEV_KCSEL_K1_B:
case GX_TEV_KCSEL_K1_A:
info.sampledKColors.set(1);
break;
case GX_TEV_KCSEL_K2:
case GX_TEV_KCSEL_K2_R:
case GX_TEV_KCSEL_K2_G:
case GX_TEV_KCSEL_K2_B:
case GX_TEV_KCSEL_K2_A:
info.sampledKColors.set(2);
break;
case GX_TEV_KCSEL_K3:
case GX_TEV_KCSEL_K3_R:
case GX_TEV_KCSEL_K3_G:
case GX_TEV_KCSEL_K3_B:
case GX_TEV_KCSEL_K3_A:
info.sampledKColors.set(3);
break;
default:
break;
}
break;
default:
break;
}
}
void alpha_arg_reg_info(GXTevAlphaArg arg, const TevStage& stage, ShaderInfo& info) {
switch (arg) {
case GX_CA_APREV:
if (!info.writesTevReg.test(GX_TEVPREV)) {
info.loadsTevReg.set(GX_TEVPREV);
}
break;
case GX_CA_A0:
if (!info.writesTevReg.test(GX_TEVREG0)) {
info.loadsTevReg.set(GX_TEVREG0);
}
break;
case GX_CA_A1:
if (!info.writesTevReg.test(GX_TEVREG1)) {
info.loadsTevReg.set(GX_TEVREG1);
}
break;
case GX_CA_A2:
if (!info.writesTevReg.test(GX_TEVREG2)) {
info.loadsTevReg.set(GX_TEVREG2);
}
break;
case GX_CA_TEXA:
CHECK(stage.texCoordId != GX_TEXCOORD_NULL, "tex coord not bound");
CHECK(stage.texMapId != GX_TEXMAP_NULL, "tex map not bound");
info.sampledTexCoords.set(stage.texCoordId);
info.sampledTextures.set(stage.texMapId);
break;
case GX_CA_RASA:
if (stage.channelId >= GX_COLOR0A0 && stage.channelId <= GX_COLOR1A1) {
info.sampledColorChannels.set(stage.channelId - GX_COLOR0A0);
}
break;
case GX_CA_KONST:
switch (stage.kaSel) {
case GX_TEV_KASEL_K0_R:
case GX_TEV_KASEL_K0_G:
case GX_TEV_KASEL_K0_B:
case GX_TEV_KASEL_K0_A:
info.sampledKColors.set(0);
break;
case GX_TEV_KASEL_K1_R:
case GX_TEV_KASEL_K1_G:
case GX_TEV_KASEL_K1_B:
case GX_TEV_KASEL_K1_A:
info.sampledKColors.set(1);
break;
case GX_TEV_KASEL_K2_R:
case GX_TEV_KASEL_K2_G:
case GX_TEV_KASEL_K2_B:
case GX_TEV_KASEL_K2_A:
info.sampledKColors.set(2);
break;
case GX_TEV_KASEL_K3_R:
case GX_TEV_KASEL_K3_G:
case GX_TEV_KASEL_K3_B:
case GX_TEV_KASEL_K3_A:
info.sampledKColors.set(3);
break;
default:
break;
}
break;
default:
break;
}
}
} // namespace
ShaderInfo build_shader_info(const ShaderConfig& config) noexcept {
ShaderInfo info{
.uniformSize = sizeof(PnMtx) + sizeof(Mat4x4<float>), // pos_mtx, nrm_mtx, proj
};
for (int i = 0; i < config.tevStageCount; ++i) {
const auto& stage = config.tevStages[i];
// Color pass
color_arg_reg_info(stage.colorPass.a, stage, info);
color_arg_reg_info(stage.colorPass.b, stage, info);
color_arg_reg_info(stage.colorPass.c, stage, info);
color_arg_reg_info(stage.colorPass.d, stage, info);
info.writesTevReg.set(stage.colorOp.outReg);
// Alpha pass
alpha_arg_reg_info(stage.alphaPass.a, stage, info);
alpha_arg_reg_info(stage.alphaPass.b, stage, info);
alpha_arg_reg_info(stage.alphaPass.c, stage, info);
alpha_arg_reg_info(stage.alphaPass.d, stage, info);
if (!info.writesTevReg.test(stage.alphaOp.outReg)) {
// If we're writing alpha to a register that's not been
// written to in the shader, load from uniform buffer
info.loadsTevReg.set(stage.alphaOp.outReg);
info.writesTevReg.set(stage.alphaOp.outReg);
}
}
info.uniformSize += info.loadsTevReg.count() * sizeof(Vec4<float>);
for (int i = 0; i < info.sampledColorChannels.size(); ++i) {
if (info.sampledColorChannels.test(i)) {
const auto& cc = config.colorChannels[i];
const auto& cca = config.colorChannels[i + GX_ALPHA0];
if (cc.lightingEnabled || cca.lightingEnabled) {
info.lightingEnabled = true;
}
}
}
if (info.lightingEnabled) {
// Lights + light state for all channels
info.uniformSize += 16 + sizeof(Light) * GX::MaxLights;
}
for (int i = 0; i < info.sampledColorChannels.size(); ++i) {
if (info.sampledColorChannels.test(i)) {
const auto& cc = config.colorChannels[i];
if (cc.lightingEnabled && cc.ambSrc == GX_SRC_REG) {
info.uniformSize += sizeof(Vec4<float>);
}
if (cc.matSrc == GX_SRC_REG) {
info.uniformSize += sizeof(Vec4<float>);
}
const auto& cca = config.colorChannels[i + GX_ALPHA0];
if (cca.lightingEnabled && cca.ambSrc == GX_SRC_REG) {
info.uniformSize += sizeof(Vec4<float>);
}
if (cca.matSrc == GX_SRC_REG) {
info.uniformSize += sizeof(Vec4<float>);
}
}
}
info.uniformSize += info.sampledKColors.count() * sizeof(Vec4<float>);
for (int i = 0; i < info.sampledTexCoords.size(); ++i) {
if (!info.sampledTexCoords.test(i)) {
continue;
}
const auto& tcg = config.tcgs[i];
if (tcg.mtx != GX_IDENTITY) {
u32 texMtxIdx = (tcg.mtx - GX_TEXMTX0) / 3;
info.usesTexMtx.set(texMtxIdx);
info.texMtxTypes[texMtxIdx] = tcg.type;
}
if (tcg.postMtx != GX_PTIDENTITY) {
u32 postMtxIdx = (tcg.postMtx - GX_PTTEXMTX0) / 3;
info.usesPTTexMtx.set(postMtxIdx);
}
}
for (int i = 0; i < info.usesTexMtx.size(); ++i) {
if (info.usesTexMtx.test(i)) {
switch (info.texMtxTypes[i]) {
case GX_TG_MTX2x4:
info.uniformSize += sizeof(Mat2x4<float>);
break;
case GX_TG_MTX3x4:
info.uniformSize += sizeof(Mat3x4<float>);
break;
default:
break;
}
}
}
info.uniformSize += info.usesPTTexMtx.count() * sizeof(Mat3x4<float>);
if (config.fogType != GX_FOG_NONE) {
info.usesFog = true;
info.uniformSize += sizeof(Fog);
}
info.uniformSize += info.sampledTextures.count() * sizeof(u32);
info.uniformSize = align_uniform(info.uniformSize);
return info;
}
Range build_uniform(const ShaderInfo& info) noexcept {
auto [buf, range] = map_uniform(info.uniformSize);
{
buf.append(g_gxState.pnMtx[g_gxState.currentPnMtx]);
buf.append(g_gxState.proj);
}
for (int i = 0; i < info.loadsTevReg.size(); ++i) {
if (info.loadsTevReg.test(i)) {
buf.append(g_gxState.colorRegs[i]);
}
}
if (info.lightingEnabled) {
// Lights
static_assert(sizeof(g_gxState.lights) == 80 * GX::MaxLights);
buf.append(g_gxState.lights);
// Light state for all channels
for (int i = 0; i < 4; ++i) {
buf.append<u32>(g_gxState.colorChannelState[i].lightMask.to_ulong());
}
}
for (int i = 0; i < info.sampledColorChannels.size(); ++i) {
if (!info.sampledColorChannels.test(i)) {
continue;
}
const auto& ccc = g_gxState.colorChannelConfig[i];
const auto& ccs = g_gxState.colorChannelState[i];
if (ccc.lightingEnabled && ccc.ambSrc == GX_SRC_REG) {
buf.append(ccs.ambColor);
}
if (ccc.matSrc == GX_SRC_REG) {
buf.append(ccs.matColor);
}
const auto& ccca = g_gxState.colorChannelConfig[i + GX_ALPHA0];
const auto& ccsa = g_gxState.colorChannelState[i + GX_ALPHA0];
if (ccca.lightingEnabled && ccca.ambSrc == GX_SRC_REG) {
buf.append(ccsa.ambColor);
}
if (ccca.matSrc == GX_SRC_REG) {
buf.append(ccsa.matColor);
}
}
for (int i = 0; i < info.sampledKColors.size(); ++i) {
if (info.sampledKColors.test(i)) {
buf.append(g_gxState.kcolors[i]);
}
}
for (int i = 0; i < info.usesTexMtx.size(); ++i) {
if (!info.usesTexMtx.test(i)) {
continue;
}
switch (info.texMtxTypes[i]) {
DEFAULT_FATAL("unhandled tex mtx type {}", underlying(info.texMtxTypes[i]));
case GX_TG_MTX2x4:
if (std::holds_alternative<Mat2x4<float>>(g_gxState.texMtxs[i])) {
buf.append(std::get<Mat2x4<float>>(g_gxState.texMtxs[i]));
} else
UNLIKELY FATAL("expected 2x4 mtx in idx {}", i);
break;
case GX_TG_MTX3x4:
if (std::holds_alternative<Mat3x4<float>>(g_gxState.texMtxs[i])) {
buf.append(std::get<Mat3x4<float>>(g_gxState.texMtxs[i]));
} else
UNLIKELY FATAL("expected 3x4 mtx in idx {}", i);
break;
}
}
for (int i = 0; i < info.usesPTTexMtx.size(); ++i) {
if (info.usesPTTexMtx.test(i)) {
buf.append(g_gxState.ptTexMtxs[i]);
}
}
if (info.usesFog) {
const auto& state = g_gxState.fog;
Fog fog{.color = state.color};
if (state.nearZ != state.farZ && state.startZ != state.endZ) {
const float depthRange = state.farZ - state.nearZ;
const float fogRange = state.endZ - state.startZ;
fog.a = (state.farZ * state.nearZ) / (depthRange * fogRange);
fog.b = state.farZ / depthRange;
fog.c = state.startZ / fogRange;
}
buf.append(fog);
}
for (int i = 0; i < info.sampledTextures.size(); ++i) {
if (!info.sampledTextures.test(i)) {
continue;
}
const auto& tex = get_texture(static_cast<GXTexMapID>(i));
CHECK(tex, "unbound texture {}", i);
buf.append(tex.texObj.lodBias);
}
g_gxState.stateDirty = false;
return range;
}
} // namespace aurora::gfx::gx

8
lib/gfx/shader_info.hpp Normal file
View File

@ -0,0 +1,8 @@
#pragma once
#include "gx.hpp"
namespace aurora::gfx::gx {
ShaderInfo build_shader_info(const ShaderConfig& config) noexcept;
Range build_uniform(const ShaderInfo& info) noexcept;
}; // namespace aurora::gfx::gx

View File

@ -384,7 +384,7 @@ bool initialize(AuroraBackend auroraBackend) {
wgpu::Limits supportedLimits{};
g_adapter.GetLimits(&supportedLimits);
const wgpu::Limits requiredLimits{
// Use "best" supported alignments
// Use "best" supported limits
.maxTextureDimension1D = supportedLimits.maxTextureDimension1D == 0 ? WGPU_LIMIT_U32_UNDEFINED
: supportedLimits.maxTextureDimension1D,
.maxTextureDimension2D = supportedLimits.maxTextureDimension2D == 0 ? WGPU_LIMIT_U32_UNDEFINED
@ -393,18 +393,12 @@ bool initialize(AuroraBackend auroraBackend) {
: supportedLimits.maxTextureDimension3D,
.maxTextureArrayLayers = supportedLimits.maxTextureArrayLayers == 0 ? WGPU_LIMIT_U32_UNDEFINED
: supportedLimits.maxTextureArrayLayers,
.maxBindGroupsPlusVertexBuffers = supportedLimits.maxBindGroupsPlusVertexBuffers == 0
? WGPU_LIMIT_U32_UNDEFINED
: supportedLimits.maxBindGroupsPlusVertexBuffers,
.maxBindingsPerBindGroup = supportedLimits.maxBindGroupsPlusVertexBuffers == 0
? WGPU_LIMIT_U32_UNDEFINED
: supportedLimits.maxBindGroupsPlusVertexBuffers,
.maxDynamicUniformBuffersPerPipelineLayout = supportedLimits.maxDynamicUniformBuffersPerPipelineLayout == 0
? WGPU_LIMIT_U32_UNDEFINED
: supportedLimits.maxDynamicUniformBuffersPerPipelineLayout,
.maxDynamicStorageBuffersPerPipelineLayout = supportedLimits.maxDynamicStorageBuffersPerPipelineLayout == 0
? WGPU_LIMIT_U32_UNDEFINED
: supportedLimits.maxDynamicStorageBuffersPerPipelineLayout,
.maxStorageBuffersPerShaderStage = supportedLimits.maxStorageBuffersPerShaderStage == 0
? WGPU_LIMIT_U32_UNDEFINED
: supportedLimits.maxStorageBuffersPerShaderStage,
.minUniformBufferOffsetAlignment = supportedLimits.minUniformBufferOffsetAlignment == 0
? WGPU_LIMIT_U32_UNDEFINED
: supportedLimits.minUniformBufferOffsetAlignment,
@ -413,11 +407,19 @@ bool initialize(AuroraBackend auroraBackend) {
: supportedLimits.minStorageBufferOffsetAlignment,
};
Log.info(
"Using limits\n maxTextureDimension1D: {}\n maxTextureDimension2D: {}\n maxTextureDimension3D: {}\n "
"minUniformBufferOffsetAlignment: {}\n minStorageBufferOffsetAlignment: {}",
"Using limits:"
"\n maxTextureDimension1D: {}"
"\n maxTextureDimension2D: {}"
"\n maxTextureDimension3D: {}"
"\n maxTextureArrayLayers: {}"
"\n maxDynamicStorageBuffersPerPipelineLayout: {}"
"\n maxStorageBuffersPerShaderStage: {}"
"\n minUniformBufferOffsetAlignment: {}"
"\n minStorageBufferOffsetAlignment: {}",
requiredLimits.maxTextureDimension1D, requiredLimits.maxTextureDimension2D,
requiredLimits.maxTextureDimension3D, requiredLimits.minUniformBufferOffsetAlignment,
requiredLimits.minStorageBufferOffsetAlignment);
requiredLimits.maxTextureDimension3D, requiredLimits.maxTextureArrayLayers,
requiredLimits.maxDynamicStorageBuffersPerPipelineLayout, requiredLimits.maxStorageBuffersPerShaderStage,
requiredLimits.minUniformBufferOffsetAlignment, requiredLimits.minStorageBufferOffsetAlignment);
std::vector<wgpu::FeatureName> requiredFeatures;
wgpu::SupportedFeatures supportedFeatures;
g_adapter.GetFeatures(&supportedFeatures);
@ -432,6 +434,9 @@ bool initialize(AuroraBackend auroraBackend) {
/* clang-format off */
#if _WIN32
"use_dxc",
#ifdef NDEBUG
"emit_hlsl_debug_symbols",
#endif
#endif
#ifdef NDEBUG
"skip_validation",
@ -453,9 +458,7 @@ bool initialize(AuroraBackend auroraBackend) {
#endif
.requiredFeatureCount = requiredFeatures.size(),
.requiredFeatures = requiredFeatures.data(),
#ifdef WEBGPU_DAWN
.requiredLimits = &requiredLimits,
#endif
});
deviceDescriptor.SetUncapturedErrorCallback(
[](const wgpu::Device& device, wgpu::ErrorType type, wgpu::StringView message) {