Rewrite attribute buffer, matrix & stream handling

Now array attributes (GXSetArray) will be properly
fetched based on the vertex format. Buffers are
still assumed to be byte-swapped to little-endian.

Stream handling completely redone and many issues
resolved.

Eliminates matrix transposes. AURORA_NATIVE_MATRIX
is no longer necessary and removed.
This commit is contained in:
Luke Street 2025-04-14 17:16:13 -06:00
parent 3316ad9a7f
commit a600b0b84c
21 changed files with 1215 additions and 901 deletions

View File

@ -3,8 +3,6 @@ project(aurora LANGUAGES C CXX)
set(CMAKE_C_STANDARD 11)
set(CMAKE_CXX_STANDARD 20)
option(AURORA_NATIVE_MATRIX "Assume OpenGL-layout matrices, disables transposing" OFF)
add_subdirectory(extern)
include(cmake/aurora_core.cmake)

View File

@ -4,7 +4,6 @@ add_library(aurora_gx STATIC
lib/gfx/gx.cpp
lib/gfx/gx_shader.cpp
lib/gfx/texture_convert.cpp
lib/gfx/stream/shader.cpp
lib/gfx/model/shader.cpp
lib/dolphin/gx/GXBump.cpp
lib/dolphin/gx/GXCull.cpp
@ -28,9 +27,6 @@ add_library(aurora::gx ALIAS aurora_gx)
target_link_libraries(aurora_gx PUBLIC aurora::core xxhash)
target_link_libraries(aurora_gx PRIVATE absl::btree absl::flat_hash_map)
if (AURORA_NATIVE_MATRIX)
target_compile_definitions(aurora_gx PRIVATE AURORA_NATIVE_MATRIX)
endif ()
if (EMSCRIPTEN)
target_link_options(aurora_gx PUBLIC -sUSE_WEBGPU=1 -sASYNCIFY -sEXIT_RUNTIME)
target_compile_definitions(aurora_gx PRIVATE ENABLE_BACKEND_WEBGPU)

View File

@ -35,9 +35,6 @@ struct Vec2 {
constexpr Vec2() = default;
constexpr Vec2(T x, T y) : x(x), y(y) {}
AURORA_VEC2_EXTRA
#ifdef METAFORCE
constexpr Vec2(const zeus::CVector2f& vec) : x(vec.x()), y(vec.y()) {}
#endif
bool operator==(const Vec2& rhs) const { return x == rhs.x && y == rhs.y; }
bool operator!=(const Vec2& rhs) const { return !(*this == rhs); }
@ -51,10 +48,6 @@ struct Vec3 {
constexpr Vec3() = default;
constexpr Vec3(T x, T y, T z) : x(x), y(y), z(z) {}
AURORA_VEC3_EXTRA
#ifdef METAFORCE
constexpr Vec3(const zeus::CVector3f& vec) : x(vec.x()), y(vec.y()), z(vec.z()) {}
operator zeus::CVector3f() const { return {x, y, z}; }
#endif
bool operator==(const Vec3& rhs) const { return x == rhs.x && y == rhs.y && z == rhs.z; }
bool operator!=(const Vec3& rhs) const { return !(*this == rhs); }
@ -77,10 +70,6 @@ struct Vec4 {
// For Vec3 -> Vec4
constexpr Vec4(Vec3<T> v, T w) : m{v.x, v.y, v.z, w} {}
AURORA_VEC4_EXTRA
#ifdef METAFORCE
constexpr Vec4(const zeus::CVector4f& vec) : x(vec.x()), y(vec.y()), z(vec.z()), w(vec.w()) {}
constexpr Vec4(const zeus::CColor& color) : x(color.r()), y(color.g()), z(color.b()), w(color.a()) {}
#endif
inline Vec4& operator=(const Vec4& other) {
memcpy(&m, &other.m, sizeof(Vt));
@ -119,7 +108,7 @@ struct Vec4 {
bool operator!=(const Vec4& rhs) const { return !(*this == rhs); }
};
template <typename T>
[[nodiscard]] inline Vec4<T> operator+(const Vec4<T>& a, const Vec4<T>& b) {
[[nodiscard]] Vec4<T> operator+(const Vec4<T>& a, const Vec4<T>& b) {
#ifdef USE_GCC_VECTOR_EXTENSIONS
return a.m + b.m;
#else
@ -127,7 +116,7 @@ template <typename T>
#endif
}
template <typename T>
[[nodiscard]] inline Vec4<T> operator*(const Vec4<T>& a, const Vec4<T>& b) {
[[nodiscard]] Vec4<T> operator*(const Vec4<T>& a, const Vec4<T>& b) {
#ifdef USE_GCC_VECTOR_EXTENSIONS
return a.m * b.m;
#else
@ -170,6 +159,18 @@ struct Mat4x2 {
bool operator!=(const Mat4x2& rhs) const { return !(*this == rhs); }
};
template <typename T>
struct Mat2x4 {
Vec4<T> m0{};
Vec4<T> m1{};
constexpr Mat2x4() = default;
constexpr Mat2x4(const Vec4<T>& m0, const Vec4<T>& m1, const Vec4<T>& m2) : m0(m0), m1(m1) {}
bool operator==(const Mat2x4& rhs) const { return m0 == rhs.m0 && m1 == rhs.m1; }
bool operator!=(const Mat2x4& rhs) const { return !(*this == rhs); }
};
static_assert(sizeof(Mat2x4<float>) == 32);
template <typename T>
struct Mat4x4;
template <typename T>
struct Mat3x4 {
@ -180,10 +181,13 @@ struct Mat3x4 {
constexpr Mat3x4() = default;
constexpr Mat3x4(const Vec4<T>& m0, const Vec4<T>& m1, const Vec4<T>& m2) : m0(m0), m1(m1), m2(m2) {}
inline Mat4x4<T> to4x4() const;
inline Mat4x4<T> toTransposed4x4() const;
[[nodiscard]] Mat4x4<T> to4x4() const;
[[nodiscard]] Mat4x4<T> toTransposed4x4() const;
bool operator==(const Mat3x4& rhs) const { return m0 == rhs.m0 && m1 == rhs.m1 && m2 == rhs.m2; }
bool operator!=(const Mat3x4& rhs) const { return !(*this == rhs); }
};
static_assert(sizeof(Mat3x4<float>) == sizeof(float[3][4]));
static_assert(sizeof(Mat3x4<float>) == 48);
template <typename T>
struct Mat4x4 {
Vec4<T> m0{};
@ -195,10 +199,6 @@ struct Mat4x4 {
constexpr Mat4x4(const Vec4<T>& m0, const Vec4<T>& m1, const Vec4<T>& m2, const Vec4<T>& m3)
: m0(m0), m1(m1), m2(m2), m3(m3) {}
AURORA_MAT4X4_EXTRA
#ifdef METAFORCE
constexpr Mat4x4(const zeus::CMatrix4f& m) : m0(m[0]), m1(m[1]), m2(m[2]), m3(m[3]) {}
constexpr Mat4x4(const zeus::CTransform& m) : Mat4x4(m.toMatrix4f()) {}
#endif
[[nodiscard]] Mat4x4 transpose() const {
return {
@ -208,23 +208,17 @@ struct Mat4x4 {
{m0[3], m1[3], m2[3], m3[3]},
};
}
inline Mat4x4& operator=(const Mat4x4& other) {
m0 = other.m0;
m1 = other.m1;
m2 = other.m2;
m3 = other.m3;
return *this;
}
Mat4x4& operator=(const Mat4x4& other) = default;
inline Vec4<T>& operator[](size_t i) { return *(&m0 + i); }
inline const Vec4<T>& operator[](size_t i) const { return *(&m0 + i); }
Vec4<T>& operator[](size_t i) { return *(&m0 + i); }
const Vec4<T>& operator[](size_t i) const { return *(&m0 + i); }
bool operator==(const Mat4x4& rhs) const { return m0 == rhs.m0 && m1 == rhs.m1 && m2 == rhs.m2 && m3 == rhs.m3; }
bool operator!=(const Mat4x4& rhs) const { return !(*this == rhs); }
};
static_assert(sizeof(Mat4x4<float>) == sizeof(float[4][4]));
static_assert(sizeof(Mat4x4<float>) == 64);
template <typename T>
[[nodiscard]] inline Mat4x4<T> operator*(const Mat4x4<T>& a, const Mat4x4<T>& b) {
[[nodiscard]] Mat4x4<T> operator*(const Mat4x4<T>& a, const Mat4x4<T>& b) {
Mat4x4<T> out;
for (size_t i = 0; i < 4; ++i) {
*(&out.m0 + i) = a.m0 * b[i].template shuffle<0, 0, 0, 0>() + a.m1 * b[i].template shuffle<1, 1, 1, 1>() +
@ -233,28 +227,27 @@ template <typename T>
return out;
}
template <typename T>
[[nodiscard]] inline Mat4x4<T> Mat3x4<T>::to4x4() const {
[[nodiscard]] Mat4x4<T> Mat3x4<T>::to4x4() const {
return {
{m0.m[0], m0.m[1], m0.m[2], 0.f},
{m1.m[0], m1.m[1], m1.m[2], 0.f},
{m2.m[0], m2.m[1], m2.m[2], 0.f},
{m0.m[3], m1.m[3], m2.m[3], 1.f},
{m0[0], m0[1], m0[2], 0.f},
{m1[0], m1[1], m1[2], 0.f},
{m2[0], m2[1], m2[2], 0.f},
{m0[3], m1[3], m2[3], 1.f},
};
}
template <typename T>
[[nodiscard]] inline Mat4x4<T> Mat3x4<T>::toTransposed4x4() const {
[[nodiscard]] Mat4x4<T> Mat3x4<T>::toTransposed4x4() const {
return Mat4x4<T>{
m0,
m1,
m2,
{0.f, 0.f, 0.f, 1.f},
}
.transpose();
{m0[0], m1[0], m2[0], 0.f},
{m0[1], m1[1], m2[1], 0.f},
{m0[2], m1[2], m2[2], 0.f},
{m0[3], m1[3], m2[3], 1.f},
};
}
constexpr Mat4x4<float> Mat4x4_Identity{
Vec4<float>{1.f, 0.f, 0.f, 0.f},
Vec4<float>{0.f, 1.f, 0.f, 0.f},
Vec4<float>{0.f, 0.f, 1.f, 0.f},
Vec4<float>{0.f, 0.f, 0.f, 1.f},
constexpr Mat4x4 Mat4x4_Identity{
Vec4{1.f, 0.f, 0.f, 0.f},
Vec4{0.f, 1.f, 0.f, 0.f},
Vec4{0.f, 0.f, 1.f, 0.f},
Vec4{0.f, 0.f, 0.f, 1.f},
};
} // namespace aurora

View File

@ -68,11 +68,11 @@ void GXTexCoord2s16(s16 s, s16 t);
void GXTexCoord2u8(u8 s, u8 t);
void GXTexCoord2s8(s8 s, s8 t);
void GXTexCoord1f32(f32 s, f32 t);
void GXTexCoord1u16(u16 s, u16 t);
void GXTexCoord1s16(s16 s, s16 t);
void GXTexCoord1u8(u8 s, u8 t);
void GXTexCoord1s8(s8 s, s8 t);
void GXTexCoord1f32(f32 s);
void GXTexCoord1u16(u16 s);
void GXTexCoord1s16(s16 s);
void GXTexCoord1u8(u8 s);
void GXTexCoord1s8(s8 s);
void GXTexCoord1x16(u16 index);
void GXTexCoord1x8(u8 index);

View File

@ -7,7 +7,6 @@ extern "C" {
void GXSetVtxDesc(GXAttr attr, GXAttrType type) { update_gx_state(g_gxState.vtxDesc[attr], type); }
void GXSetVtxDescv(GXVtxDescList* list) {
g_gxState.vtxDesc.fill({});
while (list->attr != GX_VA_NULL) {
update_gx_state(g_gxState.vtxDesc[list->attr], list->type);
++list;
@ -17,8 +16,8 @@ void GXSetVtxDescv(GXVtxDescList* list) {
void GXClearVtxDesc() { g_gxState.vtxDesc.fill({}); }
void GXSetVtxAttrFmt(GXVtxFmt vtxfmt, GXAttr attr, GXCompCnt cnt, GXCompType type, u8 frac) {
CHECK(vtxfmt >= GX_VTXFMT0 && vtxfmt < GX_MAX_VTXFMT, "invalid vtxfmt {}", static_cast<int>(vtxfmt));
CHECK(attr >= GX_VA_PNMTXIDX && attr < GX_VA_MAX_ATTR, "invalid attr {}", static_cast<int>(attr));
CHECK(vtxfmt >= GX_VTXFMT0 && vtxfmt < GX_MAX_VTXFMT, "invalid vtxfmt {}", underlying(vtxfmt));
CHECK(attr >= GX_VA_PNMTXIDX && attr < GX_VA_MAX_ATTR, "invalid attr {}", underlying(attr));
auto& fmt = g_gxState.vtxFmts[vtxfmt].attrs[attr];
update_gx_state(fmt.cnt, cnt);
update_gx_state(fmt.type, type);
@ -38,7 +37,7 @@ void GXSetArray(GXAttr attr, const void* data, u32 size, u8 stride) {
// TODO move GXBegin, GXEnd here
void GXSetTexCoordGen2(GXTexCoordID dst, GXTexGenType type, GXTexGenSrc src, u32 mtx, GXBool normalize, u32 postMtx) {
CHECK(dst >= GX_TEXCOORD0 && dst <= GX_TEXCOORD7, "invalid tex coord {}", static_cast<int>(dst));
CHECK(dst >= GX_TEXCOORD0 && dst <= GX_TEXCOORD7, "invalid tex coord {}", underlying(dst));
update_gx_state(g_gxState.tcgs[dst],
{type, src, static_cast<GXTexMtx>(mtx), static_cast<GXPTTexMtx>(postMtx), normalize});
}

View File

@ -20,7 +20,7 @@ void GXGetVtxAttrFmt(GXVtxFmt idx, GXAttr attr, GXCompCnt* compCnt, GXCompType*
// TODO GXGetViewportv
void GXGetProjectionv(f32* p) {
const auto& mtx = g_gxState.origProj;
const auto& mtx = g_gxState.proj;
p[0] = static_cast<float>(g_gxState.projType);
p[1] = mtx.m0[0];
p[3] = mtx.m1[1];

View File

@ -4,15 +4,8 @@ extern "C" {
void GXSetProjection(const void* mtx_, GXProjectionType type) {
const auto& mtx = *reinterpret_cast<const aurora::Mat4x4<float>*>(mtx_);
g_gxState.origProj = mtx;
g_gxState.projType = type;
update_gx_state(g_gxState.proj,
#ifdef AURORA_NATIVE_MATRIX
mtx
#else
mtx.transpose()
#endif
);
update_gx_state(g_gxState.proj, mtx);
}
// TODO GXSetProjectionv
@ -20,13 +13,8 @@ void GXSetProjection(const void* mtx_, GXProjectionType type) {
void GXLoadPosMtxImm(const void* mtx_, u32 id) {
CHECK(id >= GX_PNMTX0 && id <= GX_PNMTX9, "invalid pn mtx {}", static_cast<int>(id));
auto& state = g_gxState.pnMtx[id / 3];
#ifdef AURORA_NATIVE_MATRIX
const auto& mtx = *reinterpret_cast<const aurora::Mat4x4<float>*>(mtx_);
const auto& mtx = *reinterpret_cast<const aurora::Mat3x4<float>*>(mtx_);
update_gx_state(state.pos, mtx);
#else
const auto* mtx = reinterpret_cast<const aurora::Mat3x4<float>*>(mtx_);
update_gx_state(state.pos, mtx->toTransposed4x4());
#endif
}
// TODO GXLoadPosMtxIndx
@ -34,56 +22,37 @@ void GXLoadPosMtxImm(const void* mtx_, u32 id) {
void GXLoadNrmMtxImm(const void* mtx_, u32 id) {
CHECK(id >= GX_PNMTX0 && id <= GX_PNMTX9, "invalid pn mtx {}", static_cast<int>(id));
auto& state = g_gxState.pnMtx[id / 3];
#ifdef AURORA_NATIVE_MATRIX
const auto& mtx = *reinterpret_cast<const aurora::Mat4x4<float>*>(mtx_);
const auto& mtx = *reinterpret_cast<const aurora::Mat3x4<float>*>(mtx_);
update_gx_state(state.nrm, mtx);
#else
const auto* mtx = reinterpret_cast<const aurora::Mat3x4<float>*>(mtx_);
update_gx_state(state.nrm, mtx->toTransposed4x4());
#endif
}
// TODO GXLoadNrmMtxImm3x3
// TODO GXLoadNrmMtxIndx3x3
void GXSetCurrentMtx(u32 id) {
CHECK(id >= GX_PNMTX0 && id <= GX_PNMTX9, "invalid pn mtx {}", static_cast<int>(id));
CHECK(id >= GX_PNMTX0 && id <= GX_PNMTX9, "invalid pn mtx {}", id);
update_gx_state(g_gxState.currentPnMtx, id / 3);
}
void GXLoadTexMtxImm(const void* mtx_, u32 id, GXTexMtxType type) {
CHECK((id >= GX_TEXMTX0 && id <= GX_IDENTITY) || (id >= GX_PTTEXMTX0 && id <= GX_PTIDENTITY), "invalid tex mtx {}",
static_cast<int>(id));
id);
if (id >= GX_PTTEXMTX0) {
CHECK(type == GX_MTX3x4, "invalid pt mtx type {}", static_cast<int>(type));
CHECK(type == GX_MTX3x4, "invalid pt mtx type {}", underlying(type));
const auto idx = (id - GX_PTTEXMTX0) / 3;
#ifdef AURORA_NATIVE_MATRIX
const auto& mtx = *reinterpret_cast<const aurora::Mat4x4<float>*>(mtx_);
update_gx_state<aurora::Mat4x4<float>>(g_gxState.ptTexMtxs[idx], mtx);
#else
const auto& mtx = *reinterpret_cast<const aurora::Mat3x4<float>*>(mtx_);
update_gx_state<aurora::Mat4x4<float>>(g_gxState.ptTexMtxs[idx], mtx.toTransposed4x4());
#endif
update_gx_state(g_gxState.ptTexMtxs[idx], mtx);
} else {
const auto idx = (id - GX_TEXMTX0) / 3;
switch (type) {
case GX_MTX3x4: {
#ifdef AURORA_NATIVE_MATRIX
const auto& mtx = *reinterpret_cast<const aurora::Mat4x4<float>*>(mtx_);
update_gx_state<aurora::gfx::gx::TexMtxVariant>(g_gxState.texMtxs[idx], mtx);
#else
const auto& mtx = *reinterpret_cast<const aurora::Mat3x4<float>*>(mtx_);
update_gx_state<aurora::gfx::gx::TexMtxVariant>(g_gxState.texMtxs[idx], mtx.toTransposed4x4());
#endif
update_gx_state<aurora::gfx::gx::TexMtxVariant>(g_gxState.texMtxs[idx], mtx);
break;
}
case GX_MTX2x4: {
const auto& mtx = *reinterpret_cast<const aurora::Mat4x2<float>*>(mtx_);
#ifdef AURORA_NATIVE_MATRIX
const auto& mtx = *reinterpret_cast<const aurora::Mat2x4<float>*>(mtx_);
update_gx_state<aurora::gfx::gx::TexMtxVariant>(g_gxState.texMtxs[idx], mtx);
#else
update_gx_state<aurora::gfx::gx::TexMtxVariant>(g_gxState.texMtxs[idx], mtx.transpose());
#endif
break;
}
}

View File

@ -1,47 +1,113 @@
#include "gx.hpp"
#include "../../gfx/stream/shader.hpp"
#include "aurora/math.hpp"
#include "../../gfx/model/shader.hpp"
#include "../../gfx/gx_fmt.hpp"
#include <algorithm>
#include <cstring>
#include <optional>
#ifndef NDEBUG
static inline GXAttr next_attr(size_t begin) {
auto iter = std::find_if(g_gxState.vtxDesc.begin() + begin, g_gxState.vtxDesc.end(),
[](const auto type) { return type != GX_NONE; });
if (begin > 0 && iter == g_gxState.vtxDesc.end()) {
// wrap around
iter = std::find_if(g_gxState.vtxDesc.begin(), g_gxState.vtxDesc.end(),
[](const auto type) { return type != GX_NONE; });
}
return GXAttr(iter - g_gxState.vtxDesc.begin());
}
#endif
struct Attribute {
uint32_t offset;
GXAttr attr;
GXAttrType type;
aurora::gfx::gx::VtxAttrFmt fmt;
};
struct SStreamState {
GXPrimitive primitive;
GXVtxFmt vtxFmt;
std::vector<Attribute> attrs;
u16 curAttr = 0;
u16 vertexCount = 0;
u16 vertexStart = 0;
u16 vertexStart;
u16 vertexSize;
aurora::ByteBuffer vertexBuffer;
uint8_t* vertexData = nullptr;
std::vector<u16> indices;
#ifndef NDEBUG
GXAttr nextAttr;
#endif
explicit SStreamState(GXPrimitive primitive, GXVtxFmt vtxFmt, u16 numVerts, u16 vertexSize, u16 vertexStart) noexcept
: primitive(primitive), vtxFmt(vtxFmt), vertexStart(vertexStart) {
vertexBuffer.reserve_extra(size_t(numVerts) * vertexSize);
explicit SStreamState(GXPrimitive primitive, GXVtxFmt vtxFmt, std::vector<Attribute> attrs, u16 numVerts,
u16 vertexSize, u16 vertexStart) noexcept
: primitive(primitive), vtxFmt(vtxFmt), attrs(std::move(attrs)), vertexStart(vertexStart), vertexSize(vertexSize) {
vertexBuffer.reserve_extra(static_cast<size_t>(numVerts) * vertexSize);
if (numVerts > 3 && (primitive == GX_TRIANGLEFAN || primitive == GX_TRIANGLESTRIP)) {
indices.reserve((u32(numVerts) - 3) * 3 + 3);
indices.reserve(((static_cast<u32>(numVerts) - 3) * 3) + 3);
} else if (numVerts > 4 && primitive == GX_QUADS) {
indices.reserve(u32(numVerts) / 4 * 6);
indices.reserve(static_cast<u32>(numVerts) / 4 * 6);
} else {
indices.reserve(numVerts);
}
#ifndef NDEBUG
nextAttr = next_attr(0);
#endif
}
[[maybe_unused]] u8 check_direct(GXAttr attr, GXCompCnt cnt, GXCompType type) noexcept {
const auto& curAttr = attrs[this->curAttr];
ASSERT(curAttr.attr == attr, "bad attribute order: {}, expected {}", attr, curAttr.attr);
ASSERT(curAttr.type == GX_DIRECT, "bad attribute type: GX_DIRECT, expected {}", curAttr.type);
ASSERT(curAttr.fmt.cnt == cnt, "bad attribute count: {}, expected {}", cnt, curAttr.fmt.cnt);
ASSERT(curAttr.fmt.type == type, "bad attribute type: {}, expected {}", type, curAttr.fmt.type);
return curAttr.fmt.frac;
}
void check_indexed(GXAttr attr, GXAttrType type) noexcept {
const auto& curAttr = attrs[this->curAttr];
ASSERT(curAttr.attr == attr, "bad attribute order: {}, expected {}", attr, curAttr.attr);
ASSERT(curAttr.type == type, "bad attribute type: {}, expected {}", type, curAttr.type);
}
template <typename T>
void append(const T& value) noexcept {
append_data(&value, sizeof(value), attrs[curAttr].offset);
next_attribute();
}
private:
void append_data(const void* ptr, size_t size, uint32_t offset) {
if (vertexData == nullptr) {
const auto vertexStart = vertexBuffer.size();
vertexBuffer.append_zeroes(vertexSize);
vertexData = vertexBuffer.data() + vertexStart;
inc_vertex_count();
}
ASSERT(offset + size <= vertexSize, "bad attribute end: {}, expected {}", offset + size, vertexSize);
memcpy(vertexData + offset, ptr, size);
}
void next_attribute() noexcept {
curAttr = curAttr + 1;
if (curAttr >= attrs.size()) {
curAttr = 0;
vertexData = nullptr;
}
}
void inc_vertex_count() noexcept {
auto curVertex = vertexStart + vertexCount;
if (primitive == GX_LINES || primitive == GX_LINESTRIP || primitive == GX_POINTS) {
// Currently unsupported, skip
return;
}
if (primitive == GX_TRIANGLES || primitive == GX_TRIANGLESTRIP || vertexCount < 3) {
// pass
} else if (primitive == GX_TRIANGLEFAN) {
indices.push_back(vertexStart);
indices.push_back(curVertex - 1);
} /*else if (primitive == GX_TRIANGLESTRIP) {
if ((vertexCount & 1) == 0) {
indices.push_back(curVertex - 2);
indices.push_back(curVertex - 1);
} else {
indices.push_back(curVertex - 1);
indices.push_back(curVertex - 2);
}
}*/
else if (primitive == GX_QUADS) {
if ((vertexCount & 3) == 3) {
indices.push_back(curVertex - 3);
indices.push_back(curVertex - 1);
}
}
indices.push_back(curVertex);
++vertexCount;
}
};
@ -51,228 +117,319 @@ static u16 lastVertexStart = 0;
extern "C" {
void GXBegin(GXPrimitive primitive, GXVtxFmt vtxFmt, u16 nVerts) {
CHECK(!sStreamState, "Stream began twice!");
uint16_t vertexSize = 0;
uint16_t numDirectAttrs = 0;
uint16_t numIndexedAttrs = 0;
for (GXAttr attr{}; const auto type : g_gxState.vtxDesc) {
if (type == GX_DIRECT) {
++numDirectAttrs;
if (attr == GX_VA_POS || attr == GX_VA_NRM) {
vertexSize += 12;
} else if (attr == GX_VA_CLR0 || attr == GX_VA_CLR1) {
vertexSize += 16;
} else if (attr >= GX_VA_TEX0 && attr <= GX_VA_TEX7) {
vertexSize += 8;
} else UNLIKELY {
FATAL("dont know how to handle attr {}", static_cast<int>(attr));
}
} else
UNLIKELY { FATAL("dont know how to handle attr {}", attr); }
} else if (type == GX_INDEX8 || type == GX_INDEX16) {
vertexSize += 2;
++numIndexedAttrs;
}
attr = GXAttr(attr + 1);
attr = static_cast<GXAttr>(attr + 1);
}
auto [num4xAttr, rem] = std::div(numIndexedAttrs, 4);
u32 num2xAttr = 0;
if (rem > 2) {
++num4xAttr;
} else if (rem > 0) {
++num2xAttr;
}
u32 directStart = num4xAttr * 8 + num2xAttr * 4;
vertexSize += directStart;
u32 indexOffset = 0;
u32 directOffset = directStart;
std::vector<Attribute> attrs;
attrs.reserve(numDirectAttrs + numIndexedAttrs);
const auto& curVtxFmt = g_gxState.vtxFmts[vtxFmt];
for (GXAttr attr{}; const auto type : g_gxState.vtxDesc) {
if (type == GX_DIRECT) {
u32 attrSize;
if (attr == GX_VA_POS || attr == GX_VA_NRM) {
attrSize = 12;
} else if (attr == GX_VA_CLR0 || attr == GX_VA_CLR1) {
attrSize = 16;
} else if (attr >= GX_VA_TEX0 && attr <= GX_VA_TEX7) {
attrSize = 8;
} else
UNLIKELY { FATAL("dont know how to handle attr {}", attr); }
const auto& attrFmt = curVtxFmt.attrs[attr];
attrs.emplace_back(directOffset, attr, type, attrFmt);
directOffset += attrSize;
} else if (type == GX_INDEX8 || type == GX_INDEX16) {
attrs.emplace_back(indexOffset, attr, type);
indexOffset += 2;
}
attr = static_cast<GXAttr>(attr + 1);
}
CHECK(vertexSize > 0, "no vtx attributes enabled?");
sStreamState.emplace(primitive, vtxFmt, nVerts, vertexSize, g_gxState.stateDirty ? 0 : lastVertexStart);
sStreamState.emplace(primitive, vtxFmt, std::move(attrs), nVerts, vertexSize,
/*g_gxState.stateDirty ? 0 : lastVertexStart*/ 0);
}
static inline void check_attr_order(GXAttr attr) noexcept {
#ifndef NDEBUG
CHECK(sStreamState, "Stream not started!");
CHECK(sStreamState->nextAttr == attr, "bad attribute order: {}, expected {}", static_cast<int>(attr),
static_cast<int>(sStreamState->nextAttr));
sStreamState->nextAttr = next_attr(attr + 1);
#endif
}
void GXPosition3f32(float x, float y, float z) {
check_attr_order(GX_VA_POS);
auto& state = *sStreamState;
state.vertexBuffer.append(&x, sizeof(float));
state.vertexBuffer.append(&y, sizeof(float));
state.vertexBuffer.append(&z, sizeof(float));
auto curVertex = state.vertexStart + state.vertexCount;
if (state.primitive == GX_TRIANGLES || state.vertexCount < 3) {
// pass
} else if (state.primitive == GX_TRIANGLEFAN) {
state.indices.push_back(state.vertexStart);
state.indices.push_back(curVertex - 1);
} else if (state.primitive == GX_TRIANGLESTRIP) {
if ((state.vertexCount & 1) == 0) {
state.indices.push_back(curVertex - 2);
state.indices.push_back(curVertex - 1);
} else {
state.indices.push_back(curVertex - 1);
state.indices.push_back(curVertex - 2);
}
} else if (state.primitive == GX_QUADS) {
if ((state.vertexCount & 3) == 3) {
state.indices.push_back(curVertex - 3);
state.indices.push_back(curVertex - 1);
}
}
state.indices.push_back(curVertex);
++state.vertexCount;
void GXPosition3f32(f32 x, f32 y, f32 z) {
sStreamState->check_direct(GX_VA_POS, GX_POS_XYZ, GX_F32);
sStreamState->append(aurora::Vec3{x, y, z});
}
void GXPosition3u16(u16 x, u16 y, u16 z) {
const auto& attrFmt = g_gxState.vtxFmts[sStreamState->vtxFmt].attrs[GX_VA_POS];
GXPosition3f32(
static_cast<float>(x) / static_cast<f32>(1 << attrFmt.frac),
static_cast<float>(y) / static_cast<f32>(1 << attrFmt.frac),
static_cast<float>(z) / static_cast<f32>(1 << attrFmt.frac)
);
const auto frac = sStreamState->check_direct(GX_VA_POS, GX_POS_XYZ, GX_U16);
sStreamState->append(aurora::Vec3{
static_cast<f32>(x) / static_cast<f32>(1 << frac),
static_cast<f32>(y) / static_cast<f32>(1 << frac),
static_cast<f32>(z) / static_cast<f32>(1 << frac),
});
}
void GXPosition3s16(s16 x, s16 y, s16 z) {
const auto& attrFmt = g_gxState.vtxFmts[sStreamState->vtxFmt].attrs[GX_VA_POS];
GXPosition3f32(
static_cast<float>(x) / static_cast<f32>(1 << attrFmt.frac),
static_cast<float>(y) / static_cast<f32>(1 << attrFmt.frac),
static_cast<float>(z) / static_cast<f32>(1 << attrFmt.frac)
);
const auto frac = sStreamState->check_direct(GX_VA_POS, GX_POS_XYZ, GX_S16);
sStreamState->append(aurora::Vec3{
static_cast<f32>(x) / static_cast<f32>(1 << frac),
static_cast<f32>(y) / static_cast<f32>(1 << frac),
static_cast<f32>(z) / static_cast<f32>(1 << frac),
});
}
void GXPosition3u8(u8 x, u8 y, u8 z) {
const auto& attrFmt = g_gxState.vtxFmts[sStreamState->vtxFmt].attrs[GX_VA_POS];
GXPosition3f32(
static_cast<float>(x) / static_cast<f32>(1 << attrFmt.frac),
static_cast<float>(y) / static_cast<f32>(1 << attrFmt.frac),
static_cast<float>(z) / static_cast<f32>(1 << attrFmt.frac)
);
const auto frac = sStreamState->check_direct(GX_VA_POS, GX_POS_XYZ, GX_U8);
sStreamState->append(aurora::Vec3{
static_cast<f32>(x) / static_cast<f32>(1 << frac),
static_cast<f32>(y) / static_cast<f32>(1 << frac),
static_cast<f32>(z) / static_cast<f32>(1 << frac),
});
}
void GXPosition3s8(s8 x, s8 y, s8 z) {
const auto& attrFmt = g_gxState.vtxFmts[sStreamState->vtxFmt].attrs[GX_VA_POS];
GXPosition3f32(
static_cast<float>(x) / static_cast<f32>(1 << attrFmt.frac),
static_cast<float>(y) / static_cast<f32>(1 << attrFmt.frac),
static_cast<float>(z) / static_cast<f32>(1 << attrFmt.frac)
);
const auto frac = sStreamState->check_direct(GX_VA_POS, GX_POS_XYZ, GX_S8);
sStreamState->append(aurora::Vec3{
static_cast<f32>(x) / static_cast<f32>(1 << frac),
static_cast<f32>(y) / static_cast<f32>(1 << frac),
static_cast<f32>(z) / static_cast<f32>(1 << frac),
});
}
void GXPosition2f32(float x, float y) {
GXPosition3f32(x, y, 0.f);
void GXPosition2f32(f32 x, f32 y) {
sStreamState->check_direct(GX_VA_POS, GX_POS_XY, GX_F32);
sStreamState->append(aurora::Vec3{x, y, 0.f});
}
void GXPosition2u16(u16 x, u16 y) {
GXPosition3u16(x, y, 0);
const auto frac = sStreamState->check_direct(GX_VA_POS, GX_POS_XY, GX_U16);
sStreamState->append(aurora::Vec3{
static_cast<f32>(x) / static_cast<f32>(1 << frac),
static_cast<f32>(y) / static_cast<f32>(1 << frac),
0.f,
});
}
void GXPosition2s16(s16 x, s16 y) {
GXPosition3s16(x, y, 0);
const auto frac = sStreamState->check_direct(GX_VA_POS, GX_POS_XY, GX_S16);
sStreamState->append(aurora::Vec3{
static_cast<f32>(x) / static_cast<f32>(1 << frac),
static_cast<f32>(y) / static_cast<f32>(1 << frac),
0.f,
});
}
void GXPosition2u8(u8 x, u8 y) {
GXPosition3u8(x, y, 0);
const auto frac = sStreamState->check_direct(GX_VA_POS, GX_POS_XY, GX_U8);
sStreamState->append(aurora::Vec3{
static_cast<f32>(x) / static_cast<f32>(1 << frac),
static_cast<f32>(y) / static_cast<f32>(1 << frac),
0.f,
});
}
void GXPosition2s8(s8 x, s8 y) {
GXPosition3s8(x, y, 0);
const auto frac = sStreamState->check_direct(GX_VA_POS, GX_POS_XY, GX_S8);
sStreamState->append(aurora::Vec3{
static_cast<f32>(x) / static_cast<f32>(1 << frac),
static_cast<f32>(y) / static_cast<f32>(1 << frac),
0.f,
});
}
void GXPosition1x16(u16 idx) {
check_attr_order(GX_VA_POS);
// keep aligned
if (sStreamState->vertexBuffer.size() % 4 != 0) {
sStreamState->vertexBuffer.append_zeroes(4 - (sStreamState->vertexBuffer.size() % 4));
}
sStreamState->vertexBuffer.append(&idx, 2);
sStreamState->check_indexed(GX_VA_POS, GX_INDEX16);
sStreamState->append<u16>(idx);
}
void GXPosition1x8(u8 idx) {
GXPosition1x16(idx);
sStreamState->check_indexed(GX_VA_POS, GX_INDEX8);
sStreamState->append<u16>(idx);
}
void GXNormal3f32(float x, float y, float z) {
check_attr_order(GX_VA_NRM);
sStreamState->vertexBuffer.append(&x, 4);
sStreamState->vertexBuffer.append(&y, 4);
sStreamState->vertexBuffer.append(&z, 4);
void GXNormal3f32(f32 x, f32 y, f32 z) {
sStreamState->check_direct(GX_VA_NRM, GX_NRM_XYZ, GX_F32);
sStreamState->append(aurora::Vec3{x, y, z});
}
void GXNormal3s16(s16 x, s16 y, s16 z) {
const auto& attrFmt = g_gxState.vtxFmts[sStreamState->vtxFmt].attrs[GX_VA_NRM];
GXNormal3f32(
static_cast<float>(x) / static_cast<f32>(1 << attrFmt.frac),
static_cast<float>(y) / static_cast<f32>(1 << attrFmt.frac),
static_cast<float>(z) / static_cast<f32>(1 << attrFmt.frac)
);
const auto frac = sStreamState->check_direct(GX_VA_NRM, GX_NRM_XYZ, GX_S16);
sStreamState->append(aurora::Vec3{
static_cast<f32>(x) / static_cast<f32>(1 << frac),
static_cast<f32>(y) / static_cast<f32>(1 << frac),
static_cast<f32>(z) / static_cast<f32>(1 << frac),
});
}
void GXNormal3s8(s8 x, s8 y, s8 z) {
const auto& attrFmt = g_gxState.vtxFmts[sStreamState->vtxFmt].attrs[GX_VA_NRM];
GXNormal3f32(
static_cast<float>(x) / static_cast<f32>(1 << attrFmt.frac),
static_cast<float>(y) / static_cast<f32>(1 << attrFmt.frac),
static_cast<float>(z) / static_cast<f32>(1 << attrFmt.frac)
);
const auto frac = sStreamState->check_direct(GX_VA_NRM, GX_NRM_XYZ, GX_S8);
sStreamState->append(aurora::Vec3{
static_cast<f32>(x) / static_cast<f32>(1 << frac),
static_cast<f32>(y) / static_cast<f32>(1 << frac),
static_cast<f32>(z) / static_cast<f32>(1 << frac),
});
}
void GXNormal1x16(u16 idx) {
check_attr_order(GX_VA_NRM);
// keep aligned
if (sStreamState->vertexBuffer.size() % 4 != 0) {
sStreamState->vertexBuffer.append_zeroes(4 - (sStreamState->vertexBuffer.size() % 4));
}
sStreamState->vertexBuffer.append(&idx, 2);
void GXNormal1x16(u16 index) {
sStreamState->check_indexed(GX_VA_NRM, GX_INDEX16);
sStreamState->append<u16>(index);
}
void GXNormal1x8(u8 idx) {
GXNormal1x16(idx);
void GXNormal1x8(u8 index) {
sStreamState->check_indexed(GX_VA_POS, GX_INDEX8);
sStreamState->append<u16>(index);
}
void GXColor4f32(float r, float g, float b, float a) {
check_attr_order(GX_VA_CLR0);
sStreamState->vertexBuffer.append(&r, 4);
sStreamState->vertexBuffer.append(&g, 4);
sStreamState->vertexBuffer.append(&b, 4);
sStreamState->vertexBuffer.append(&a, 4);
void GXColor4f32(f32 r, f32 g, f32 b, f32 a) {
sStreamState->check_direct(GX_VA_CLR0, GX_CLR_RGBA, GX_RGBA8);
sStreamState->append(aurora::Vec4{r, g, b, a});
}
void GXColor4u8(u8 r, u8 g, u8 b, u8 a) {
GXColor4f32(static_cast<float>(r) / 255.f, static_cast<float>(g) / 255.f, static_cast<float>(b) / 255.f,
static_cast<float>(a) / 255.f);
sStreamState->check_direct(GX_VA_CLR0, GX_CLR_RGBA, GX_RGBA8);
sStreamState->append(aurora::Vec4{
static_cast<f32>(r) / 255.f,
static_cast<f32>(g) / 255.f,
static_cast<f32>(b) / 255.f,
static_cast<f32>(a) / 255.f,
});
}
void GXColor3u8(u8 r, u8 g, u8 b) {
GXColor4u8(r, g, b, 255);
sStreamState->check_direct(GX_VA_CLR0, GX_CLR_RGB, GX_RGB8);
sStreamState->append(aurora::Vec4{
static_cast<f32>(r) / 255.f,
static_cast<f32>(g) / 255.f,
static_cast<f32>(b) / 255.f,
1.f,
});
}
void GXColor1x16(u16 idx) {
check_attr_order(GX_VA_CLR0);
// keep aligned
if (sStreamState->vertexBuffer.size() % 4 != 0) {
sStreamState->vertexBuffer.append_zeroes(4 - (sStreamState->vertexBuffer.size() % 4));
}
sStreamState->vertexBuffer.append(&idx, 2);
void GXColor1u32(u32 clr) {
sStreamState->check_direct(GX_VA_CLR0, GX_CLR_RGBA, GX_RGBA8);
sStreamState->append(aurora::Vec4{
static_cast<f32>((clr >> 24) & 0xff) / 255.f,
static_cast<f32>((clr >> 16) & 0xff) / 255.f,
static_cast<f32>((clr >> 8) & 0xff) / 255.f,
static_cast<f32>(clr & 0xff) / 255.f,
});
}
void GXColor1x8(u8 idx) {
GXColor1x16(idx);
void GXColor1u16(u16 clr) {
sStreamState->check_direct(GX_VA_CLR0, GX_CLR_RGB, GX_RGB565);
sStreamState->append(aurora::Vec4{
static_cast<f32>((clr >> 11) & 0x1f) / 31.f,
static_cast<f32>((clr >> 5) & 0x3f) / 63.f,
static_cast<f32>(clr & 0x1f) / 31.f,
1.f,
});
}
void GXTexCoord2f32(float u, float v) {
check_attr_order(GX_VA_TEX0);
sStreamState->vertexBuffer.append(&u, 4);
sStreamState->vertexBuffer.append(&v, 4);
void GXTexCoord2f32(f32 s, f32 t) {
sStreamState->check_direct(GX_VA_TEX0, GX_TEX_ST, GX_F32);
sStreamState->append(aurora::Vec2{s, t});
}
void GXTexCoord2u16(u16 s, u16 t) {
const auto frac = sStreamState->check_direct(GX_VA_TEX0, GX_TEX_ST, GX_U16);
sStreamState->append(aurora::Vec2{
static_cast<f32>(s) / static_cast<f32>(1 << frac),
static_cast<f32>(t) / static_cast<f32>(1 << frac),
});
}
void GXTexCoord2s16(s16 s, s16 t) {
const auto& attrFmt = g_gxState.vtxFmts[sStreamState->vtxFmt].attrs[GX_VA_TEX0];
GXTexCoord2f32(
static_cast<float>(s) / static_cast<f32>(1 << attrFmt.frac),
static_cast<float>(t) / static_cast<f32>(1 << attrFmt.frac)
);
const auto frac = sStreamState->check_direct(GX_VA_TEX0, GX_TEX_ST, GX_S16);
sStreamState->append(aurora::Vec2{
static_cast<f32>(s) / static_cast<f32>(1 << frac),
static_cast<f32>(t) / static_cast<f32>(1 << frac),
});
}
void GXTexCoord1x16(u16 idx) {
check_attr_order(GX_VA_TEX0);
// keep aligned
if (sStreamState->vertexBuffer.size() % 4 != 0) {
sStreamState->vertexBuffer.append_zeroes(4 - (sStreamState->vertexBuffer.size() % 4));
}
sStreamState->vertexBuffer.append(&idx, 2);
void GXTexCoord2u8(u8 s, u8 t) {
const auto frac = sStreamState->check_direct(GX_VA_TEX0, GX_TEX_ST, GX_U8);
sStreamState->append(aurora::Vec2{
static_cast<f32>(s) / static_cast<f32>(1 << frac),
static_cast<f32>(t) / static_cast<f32>(1 << frac),
});
}
void GXTexCoord1x8(u8 idx) {
GXTexCoord1x16(idx);
void GXTexCoord2s8(s8 s, s8 t) {
const auto frac = sStreamState->check_direct(GX_VA_TEX0, GX_TEX_ST, GX_S8);
sStreamState->append(aurora::Vec2{
static_cast<f32>(s) / static_cast<f32>(1 << frac),
static_cast<f32>(t) / static_cast<f32>(1 << frac),
});
}
void GXTexCoord1f32(f32 s) {
sStreamState->check_direct(GX_VA_TEX0, GX_TEX_S, GX_F32);
sStreamState->append(aurora::Vec2{s, 0.f});
}
void GXTexCoord1u16(u16 s) {
const auto frac = sStreamState->check_direct(GX_VA_TEX0, GX_TEX_S, GX_U16);
sStreamState->append(aurora::Vec2{
static_cast<f32>(s) / static_cast<f32>(1 << frac),
0.f,
});
}
void GXTexCoord1s16(s16 s) {
const auto frac = sStreamState->check_direct(GX_VA_TEX0, GX_TEX_S, GX_S16);
sStreamState->append(aurora::Vec2{
static_cast<f32>(s) / static_cast<f32>(1 << frac),
0.f,
});
}
void GXTexCoord1u8(u8 s) {
const auto frac = sStreamState->check_direct(GX_VA_TEX0, GX_TEX_S, GX_U8);
sStreamState->append(aurora::Vec2{
static_cast<f32>(s) / static_cast<f32>(1 << frac),
0.f,
});
}
void GXTexCoord1s8(s8 s) {
const auto frac = sStreamState->check_direct(GX_VA_TEX0, GX_TEX_S, GX_S8);
sStreamState->append(aurora::Vec2{
static_cast<f32>(s) / static_cast<f32>(1 << frac),
0.f,
});
}
void GXTexCoord1x16(u16 index) {
sStreamState->check_indexed(GX_VA_TEX0, GX_INDEX16);
sStreamState->append(index);
}
void GXTexCoord1x8(u8 index) {
sStreamState->check_indexed(GX_VA_TEX0, GX_INDEX8);
sStreamState->append(static_cast<u16>(index));
}
void GXEnd() {
@ -282,27 +439,55 @@ void GXEnd() {
}
const auto vertRange = aurora::gfx::push_verts(sStreamState->vertexBuffer.data(), sStreamState->vertexBuffer.size());
const auto indexRange = aurora::gfx::push_indices(aurora::ArrayRef{sStreamState->indices});
if (g_gxState.stateDirty) {
aurora::gfx::stream::PipelineConfig config{};
populate_pipeline_config(config, GX_TRIANGLES);
const auto info = build_shader_info(config.shaderConfig);
const auto pipeline = aurora::gfx::pipeline_ref(config);
aurora::gfx::push_draw_command(aurora::gfx::stream::DrawData{
.pipeline = pipeline,
.vertRange = vertRange,
.uniformRange = build_uniform(info),
.indexRange = indexRange,
.indexCount = static_cast<uint32_t>(sStreamState->indices.size()),
.bindGroups = build_bind_groups(info, config.shaderConfig, {}),
.dstAlpha = g_gxState.dstAlpha,
});
} else {
aurora::gfx::merge_draw_command(aurora::gfx::stream::DrawData{
.vertRange = vertRange,
.indexRange = indexRange,
.indexCount = static_cast<uint32_t>(sStreamState->indices.size()),
});
aurora::gfx::gx::BindGroupRanges ranges{};
for (int i = 0; i < GX_VA_MAX_ATTR; ++i) {
if (g_gxState.vtxDesc[i] != GX_INDEX8 && g_gxState.vtxDesc[i] != GX_INDEX16) {
continue;
}
auto& array = g_gxState.arrays[i];
if (array.cachedRange.size > 0) {
// Use the currently cached range
ranges.vaRanges[i] = array.cachedRange;
} else {
// Push array data to storage and cache range
const auto range = aurora::gfx::push_storage(static_cast<const uint8_t*>(array.data), array.size);
ranges.vaRanges[i] = range;
array.cachedRange = range;
}
}
// if (g_gxState.stateDirty) {
aurora::gfx::model::PipelineConfig config{};
GXPrimitive primitive = GX_TRIANGLES;
switch (sStreamState->primitive) {
case GX_TRIANGLESTRIP:
primitive = GX_TRIANGLESTRIP;
break;
default:
break;
}
populate_pipeline_config(config, primitive, sStreamState->vtxFmt);
const auto info = build_shader_info(config.shaderConfig);
const auto bindGroups = aurora::gfx::gx::build_bind_groups(info, config.shaderConfig, ranges);
const auto pipeline = aurora::gfx::pipeline_ref(config);
aurora::gfx::push_draw_command(aurora::gfx::model::DrawData{
.pipeline = pipeline,
.vertRange = vertRange,
.idxRange = indexRange,
.dataRanges = ranges,
.uniformRange = build_uniform(info),
.indexCount = static_cast<uint32_t>(sStreamState->indices.size()),
.bindGroups = bindGroups,
.dstAlpha = g_gxState.dstAlpha,
});
// } else {
// aurora::gfx::merge_draw_command(aurora::gfx::model::DrawData{
// .vertRange = vertRange,
// .idxRange = indexRange,
// .indexCount = static_cast<uint32_t>(sStreamState->indices.size()),
// });
// }
lastVertexStart = sStreamState->vertexStart + sStreamState->vertexCount;
sStreamState.reset();
}

View File

@ -3,7 +3,6 @@
#include "../internal.hpp"
#include "../webgpu/gpu.hpp"
#include "model/shader.hpp"
#include "stream/shader.hpp"
#include "texture.hpp"
#include <condition_variable>
@ -11,7 +10,6 @@
#include <fstream>
#include <mutex>
#include <thread>
#include <variant>
#include <absl/container/flat_hash_map.h>
#include <magic_enum.hpp>
@ -37,13 +35,11 @@ constexpr uint64_t StagingBufferSize =
UniformBufferSize + VertexBufferSize + IndexBufferSize + StorageBufferSize + TextureUploadSize;
struct ShaderState {
stream::State stream;
model::State model;
};
struct ShaderDrawCommand {
ShaderType type;
union {
stream::DrawData stream;
model::DrawData model;
};
};
@ -168,10 +164,9 @@ static u32 g_serializedPipelineCount = 0;
template <typename PipelineConfig>
static void serialize_pipeline_config(ShaderType type, const PipelineConfig& config) {
static_assert(std::has_unique_object_representations_v<PipelineConfig>);
g_serializedPipelines.append(&type, sizeof(type));
const u32 configSize = sizeof(config);
g_serializedPipelines.append(&configSize, sizeof(configSize));
g_serializedPipelines.append(&config, configSize);
g_serializedPipelines.append(type);
g_serializedPipelines.append<u32>(sizeof(config));
g_serializedPipelines.append(config);
++g_serializedPipelineCount;
}
@ -278,33 +273,19 @@ void resolve_pass(TextureHandle texture, ClipRect rect, bool clear, Vec4<float>
++g_currentRenderPass;
}
template <>
const stream::State& get_state() {
return g_state.stream;
}
template <>
void push_draw_command(stream::DrawData data) {
push_draw_command(ShaderDrawCommand{.type = ShaderType::Stream, .stream = data});
}
template <>
void merge_draw_command(stream::DrawData data) {
auto& last = get_last_draw_command(ShaderType::Stream).data.draw.stream;
CHECK(last.vertRange.offset + last.vertRange.size == data.vertRange.offset, "Invalid vertex merge range: {} -> {}",
last.vertRange.offset + last.vertRange.size, data.vertRange.offset);
CHECK(last.indexRange.offset + last.indexRange.size == data.indexRange.offset, "Invalid index merge range: {} -> {}",
last.indexRange.offset + last.indexRange.size, data.indexRange.offset);
last.vertRange.size += data.vertRange.size;
last.indexRange.size += data.indexRange.size;
last.indexCount += data.indexCount;
++g_mergedDrawCallCount;
}
template <>
PipelineRef pipeline_ref(stream::PipelineConfig config) {
return find_pipeline(ShaderType::Stream, config, [=]() { return create_pipeline(g_state.stream, config); });
}
// template <>
// void merge_draw_command(stream::DrawData data) {
// auto& last = get_last_draw_command(ShaderType::Stream).data.draw.stream;
// CHECK(last.vertRange.offset + last.vertRange.size == data.vertRange.offset, "Invalid vertex merge range: {} -> {}",
// last.vertRange.offset + last.vertRange.size, data.vertRange.offset);
// CHECK(last.indexRange.offset + last.indexRange.size == data.indexRange.offset, "Invalid index merge range: {} ->
// {}",
// last.indexRange.offset + last.indexRange.size, data.indexRange.offset);
// last.vertRange.size += data.vertRange.size;
// last.indexRange.size += data.indexRange.size;
// last.indexCount += data.indexCount;
// ++g_mergedDrawCallCount;
// }
template <>
void push_draw_command(model::DrawData data) {
@ -378,16 +359,6 @@ void load_pipeline_cache() {
u32 size = *reinterpret_cast<const u32*>(pipelineCache.data() + offset);
offset += sizeof(u32);
switch (type) {
case ShaderType::Stream: {
if (size != sizeof(stream::PipelineConfig)) {
break;
}
const auto config = *reinterpret_cast<const stream::PipelineConfig*>(pipelineCache.data() + offset);
if (config.version != gx::GXPipelineConfigVersion) {
break;
}
find_pipeline(type, config, [=]() { return stream::create_pipeline(g_state.stream, config); }, true);
} break;
case ShaderType::Model: {
if (size != sizeof(model::PipelineConfig)) {
break;
@ -397,9 +368,10 @@ void load_pipeline_cache() {
break;
}
find_pipeline(type, config, [=]() { return model::create_pipeline(g_state.model, config); }, true);
} break;
break;
}
default:
Log.warn("Unknown pipeline type {}", static_cast<int>(type));
Log.warn("Unknown pipeline type {}", underlying(type));
break;
}
offset += size;
@ -459,7 +431,6 @@ void initialize() {
}
map_staging_buffer();
g_state.stream = stream::construct_state();
g_state.model = model::construct_state();
load_pipeline_cache();
@ -581,6 +552,9 @@ void end_frame(const wgpu::CommandEncoder& cmd) {
currentStagingBuffer = (currentStagingBuffer + 1) % g_stagingBuffers.size();
map_staging_buffer();
g_currentRenderPass = UINT32_MAX;
for (auto& array : gx::g_gxState.arrays) {
array.cachedRange = {};
}
if (!g_hasPipelineThread) {
pipeline_worker();
@ -612,7 +586,7 @@ void render(wgpu::CommandEncoder& cmd) {
.view = webgpu::g_depthBuffer.view,
.depthLoadOp = passInfo.clear ? wgpu::LoadOp::Clear : wgpu::LoadOp::Load,
.depthStoreOp = wgpu::StoreOp::Store,
.depthClearValue = 1.f,
.depthClearValue = gx::UseReversedZ ? 0.f : 1.f,
};
const auto label = fmt::format("Render pass {}", i);
const wgpu::RenderPassDescriptor renderPassDescriptor{
@ -680,7 +654,9 @@ void render_pass(const wgpu::RenderPassEncoder& pass, u32 idx) {
switch (cmd.type) {
case CommandType::SetViewport: {
const auto& vp = cmd.data.setViewport;
pass.SetViewport(vp.left, vp.top, vp.width, vp.height, vp.znear, vp.zfar);
const float minDepth = gx::UseReversedZ ? 1.f - vp.zfar : vp.znear;
const float maxDepth = gx::UseReversedZ ? 1.f - vp.znear : vp.zfar;
pass.SetViewport(vp.left, vp.top, vp.width, vp.height, minDepth, maxDepth);
} break;
case CommandType::SetScissor: {
const auto& sc = cmd.data.setScissor;
@ -694,9 +670,6 @@ void render_pass(const wgpu::RenderPassEncoder& pass, u32 idx) {
case CommandType::Draw: {
const auto& draw = cmd.data.draw;
switch (draw.type) {
case ShaderType::Stream:
stream::render(g_state.stream, draw.stream, pass);
break;
case ShaderType::Model:
model::render(g_state.model, draw.model, pass);
break;

View File

@ -56,8 +56,7 @@ public:
ByteBuffer() noexcept = default;
explicit ByteBuffer(size_t size) noexcept
: m_data(static_cast<uint8_t*>(calloc(1, size))), m_length(size), m_capacity(size) {}
explicit ByteBuffer(uint8_t* data, size_t size) noexcept
: m_data(data), m_capacity(size), m_owned(false) {}
explicit ByteBuffer(uint8_t* data, size_t size) noexcept : m_data(data), m_capacity(size), m_owned(false) {}
~ByteBuffer() noexcept {
if (m_data != nullptr && m_owned) {
free(m_data);
@ -98,6 +97,11 @@ public:
m_length += size;
}
template <typename T>
void append(const T& obj) {
append(&obj, sizeof(T));
}
void append_zeroes(size_t size) {
resize(m_length + size, true);
m_length += size;
@ -179,8 +183,7 @@ struct TextureRef;
using TextureHandle = std::shared_ptr<TextureRef>;
enum class ShaderType : uint8_t {
Stream,
Model,
Model = 1,
};
void initialize();

View File

@ -7,7 +7,6 @@
#include <absl/container/flat_hash_map.h>
#include <cfloat>
#include <cmath>
using aurora::gfx::gx::g_gxState;
static aurora::Module Log("aurora::gx");
@ -25,7 +24,7 @@ const TextureBind& get_texture(GXTexMapID id) noexcept { return g_gxState.textur
static inline wgpu::BlendFactor to_blend_factor(GXBlendFactor fac, bool isDst) {
switch (fac) {
DEFAULT_FATAL("invalid blend factor {}", static_cast<int>(fac));
DEFAULT_FATAL("invalid blend factor {}", underlying(fac));
case GX_BL_ZERO:
return wgpu::BlendFactor::Zero;
case GX_BL_ONE:
@ -55,21 +54,21 @@ static inline wgpu::BlendFactor to_blend_factor(GXBlendFactor fac, bool isDst) {
static inline wgpu::CompareFunction to_compare_function(GXCompare func) {
switch (func) {
DEFAULT_FATAL("invalid depth fn {}", static_cast<int>(func));
DEFAULT_FATAL("invalid depth fn {}", underlying(func));
case GX_NEVER:
return wgpu::CompareFunction::Never;
case GX_LESS:
return wgpu::CompareFunction::Less;
return UseReversedZ ? wgpu::CompareFunction::Greater : wgpu::CompareFunction::Less;
case GX_EQUAL:
return wgpu::CompareFunction::Equal;
case GX_LEQUAL:
return wgpu::CompareFunction::LessEqual;
return UseReversedZ ? wgpu::CompareFunction::GreaterEqual : wgpu::CompareFunction::LessEqual;
case GX_GREATER:
return wgpu::CompareFunction::Greater;
return UseReversedZ ? wgpu::CompareFunction::Less : wgpu::CompareFunction::Greater;
case GX_NEQUAL:
return wgpu::CompareFunction::NotEqual;
case GX_GEQUAL:
return wgpu::CompareFunction::GreaterEqual;
return UseReversedZ ? wgpu::CompareFunction::LessEqual : wgpu::CompareFunction::GreaterEqual;
case GX_ALWAYS:
return wgpu::CompareFunction::Always;
}
@ -79,7 +78,7 @@ static inline wgpu::BlendState to_blend_state(GXBlendMode mode, GXBlendFactor sr
GXLogicOp op, u32 dstAlpha) {
wgpu::BlendComponent colorBlendComponent;
switch (mode) {
DEFAULT_FATAL("unsupported blend mode {}", static_cast<int>(mode));
DEFAULT_FATAL("unsupported blend mode {}", underlying(mode));
case GX_BM_NONE:
colorBlendComponent = {
.operation = wgpu::BlendOperation::Add,
@ -103,7 +102,7 @@ static inline wgpu::BlendState to_blend_state(GXBlendMode mode, GXBlendFactor sr
break;
case GX_BM_LOGIC:
switch (op) {
DEFAULT_FATAL("unsupported logic op {}", static_cast<int>(op));
DEFAULT_FATAL("unsupported logic op {}", underlying(op));
case GX_LO_CLEAR:
colorBlendComponent = {
.operation = wgpu::BlendOperation::Add,
@ -160,7 +159,7 @@ static inline wgpu::ColorWriteMask to_write_mask(bool colorUpdate, bool alphaUpd
static inline wgpu::PrimitiveState to_primitive_state(GXPrimitive gx_prim, GXCullMode gx_cullMode) {
wgpu::PrimitiveTopology primitive = wgpu::PrimitiveTopology::TriangleList;
switch (gx_prim) {
DEFAULT_FATAL("unsupported primitive type {}", static_cast<int>(gx_prim));
DEFAULT_FATAL("unsupported primitive type {}", underlying(gx_prim));
case GX_TRIANGLES:
break;
case GX_TRIANGLESTRIP:
@ -169,7 +168,7 @@ static inline wgpu::PrimitiveState to_primitive_state(GXPrimitive gx_prim, GXCul
}
wgpu::CullMode cullMode = wgpu::CullMode::None;
switch (gx_cullMode) {
DEFAULT_FATAL("unsupported cull mode {}", static_cast<int>(gx_cullMode));
DEFAULT_FATAL("unsupported cull mode {}", underlying(gx_cullMode));
case GX_CULL_FRONT:
cullMode = wgpu::CullMode::Front;
break;
@ -193,14 +192,6 @@ wgpu::RenderPipeline build_pipeline(const PipelineConfig& config, const ShaderIn
.format = g_graphicsConfig.depthFormat,
.depthWriteEnabled = config.depthUpdate,
.depthCompare = to_compare_function(config.depthFunc),
.stencilFront =
wgpu::StencilFaceState{
.compare = wgpu::CompareFunction::Always,
},
.stencilBack =
wgpu::StencilFaceState{
.compare = wgpu::CompareFunction::Always,
},
};
const auto blendState =
to_blend_state(config.blendMode, config.blendFacSrc, config.blendFacDst, config.blendOp, config.dstAlpha);
@ -249,25 +240,23 @@ wgpu::RenderPipeline build_pipeline(const PipelineConfig& config, const ShaderIn
return g_device.CreateRenderPipeline(&descriptor);
}
void populate_pipeline_config(PipelineConfig& config, GXPrimitive primitive) noexcept {
void populate_pipeline_config(PipelineConfig& config, GXPrimitive primitive, GXVtxFmt fmt) noexcept {
const auto& vtxFmt = g_gxState.vtxFmts[fmt];
config.shaderConfig.fogType = g_gxState.fog.type;
config.shaderConfig.vtxAttrs = g_gxState.vtxDesc;
int lastIndexedAttr = -1;
for (int i = 0; i < GX_VA_MAX_ATTR; ++i) {
const auto type = g_gxState.vtxDesc[i];
if (type != GX_INDEX8 && type != GX_INDEX16) {
config.shaderConfig.attrMapping[i] = GX_VA_NULL;
config.shaderConfig.attrMapping[i] = {};
continue;
}
const auto& array = g_gxState.arrays[i];
if (lastIndexedAttr >= 0 && array == g_gxState.arrays[lastIndexedAttr]) {
// Map attribute to previous attribute
config.shaderConfig.attrMapping[i] = config.shaderConfig.attrMapping[lastIndexedAttr];
} else {
// Map attribute to its own storage
config.shaderConfig.attrMapping[i] = static_cast<GXAttr>(i);
}
lastIndexedAttr = i;
// Map attribute to its own storage
config.shaderConfig.attrMapping[i] = StorageConfig {
.attr = static_cast<GXAttr>(i),
.cnt = vtxFmt.attrs[i].cnt,
.compType = vtxFmt.attrs[i].type,
.frac = vtxFmt.attrs[i].frac,
};
}
config.shaderConfig.tevSwapTable = g_gxState.tevSwapTable;
for (u8 i = 0; i < g_gxState.numTevStages; ++i) {
@ -328,14 +317,14 @@ void populate_pipeline_config(PipelineConfig& config, GXPrimitive primitive) noe
Range build_uniform(const ShaderInfo& info) noexcept {
auto [buf, range] = map_uniform(info.uniformSize);
{
buf.append(&g_gxState.pnMtx[g_gxState.currentPnMtx], 128);
buf.append(&g_gxState.proj, 64);
buf.append(g_gxState.pnMtx[g_gxState.currentPnMtx]);
buf.append(g_gxState.proj);
}
for (int i = 0; i < info.loadsTevReg.size(); ++i) {
if (!info.loadsTevReg.test(i)) {
continue;
}
buf.append(&g_gxState.colorRegs[i], 16);
buf.append(g_gxState.colorRegs[i]);
}
bool lightingEnabled = false;
for (int i = 0; i < info.sampledColorChannels.size(); ++i) {
@ -352,11 +341,10 @@ Range build_uniform(const ShaderInfo& info) noexcept {
if (lightingEnabled) {
// Lights
static_assert(sizeof(g_gxState.lights) == 80 * GX::MaxLights);
buf.append(&g_gxState.lights, 80 * GX::MaxLights);
buf.append(g_gxState.lights);
// Light state for all channels
for (int i = 0; i < 4; ++i) {
u32 lightState = g_gxState.colorChannelState[i].lightMask.to_ulong();
buf.append(&lightState, 4);
buf.append<u32>(g_gxState.colorChannelState[i].lightMask.to_ulong());
}
}
for (int i = 0; i < info.sampledColorChannels.size(); ++i) {
@ -366,25 +354,25 @@ Range build_uniform(const ShaderInfo& info) noexcept {
const auto& ccc = g_gxState.colorChannelConfig[i * 2];
const auto& ccs = g_gxState.colorChannelState[i * 2];
if (ccc.lightingEnabled && ccc.ambSrc == GX_SRC_REG) {
buf.append(&ccs.ambColor, 16);
buf.append(ccs.ambColor);
}
if (ccc.matSrc == GX_SRC_REG) {
buf.append(&ccs.matColor, 16);
buf.append(ccs.matColor);
}
const auto& ccca = g_gxState.colorChannelConfig[i * 2 + 1];
const auto& ccsa = g_gxState.colorChannelState[i * 2 + 1];
if (ccca.lightingEnabled && ccca.ambSrc == GX_SRC_REG) {
buf.append(&ccsa.ambColor, 16);
buf.append(ccsa.ambColor);
}
if (ccca.matSrc == GX_SRC_REG) {
buf.append(&ccsa.matColor, 16);
buf.append(ccsa.matColor);
}
}
for (int i = 0; i < info.sampledKColors.size(); ++i) {
if (!info.sampledKColors.test(i)) {
continue;
}
buf.append(&g_gxState.kcolors[i], 16);
buf.append(g_gxState.kcolors[i]);
}
for (int i = 0; i < info.usesTexMtx.size(); ++i) {
if (!info.usesTexMtx.test(i)) {
@ -392,26 +380,16 @@ Range build_uniform(const ShaderInfo& info) noexcept {
}
const auto& state = g_gxState;
switch (info.texMtxTypes[i]) {
DEFAULT_FATAL("unhandled tex mtx type {}", static_cast<int>(info.texMtxTypes[i]));
DEFAULT_FATAL("unhandled tex mtx type {}", underlying(info.texMtxTypes[i]));
case GX_TG_MTX2x4:
if (std::holds_alternative<Mat4x2<float>>(state.texMtxs[i])) {
buf.append(&std::get<Mat4x2<float>>(state.texMtxs[i]), 32);
} else if (std::holds_alternative<Mat4x4<float>>(g_gxState.texMtxs[i])) {
// TODO: SMB hits this?
Mat4x2<float> mtx{
{1.f, 0.f},
{0.f, 1.f},
{0.f, 0.f},
{0.f, 0.f},
};
buf.append(&mtx, 32);
if (std::holds_alternative<Mat2x4<float>>(state.texMtxs[i])) {
buf.append(std::get<Mat2x4<float>>(state.texMtxs[i]));
} else
UNLIKELY FATAL("expected 2x4 mtx in idx {}", i);
break;
case GX_TG_MTX3x4:
if (std::holds_alternative<Mat4x4<float>>(g_gxState.texMtxs[i])) {
const auto& mat = std::get<Mat4x4<float>>(g_gxState.texMtxs[i]);
buf.append(&mat, 64);
if (std::holds_alternative<Mat3x4<float>>(g_gxState.texMtxs[i])) {
buf.append(std::get<Mat3x4<float>>(g_gxState.texMtxs[i]));
} else
UNLIKELY FATAL("expected 3x4 mtx in idx {}", i);
break;
@ -421,18 +399,11 @@ Range build_uniform(const ShaderInfo& info) noexcept {
if (!info.usesPTTexMtx.test(i)) {
continue;
}
buf.append(&g_gxState.ptTexMtxs[i], 64);
buf.append(g_gxState.ptTexMtxs[i]);
}
if (info.usesFog) {
const auto& state = g_gxState.fog;
struct Fog {
Vec4<float> color = state.color;
float a = 0.f;
float b = 0.5f;
float c = 0.f;
float pad = FLT_MAX;
} fog{};
static_assert(sizeof(Fog) == 32);
Fog fog{.color = state.color};
if (state.nearZ != state.farZ && state.startZ != state.endZ) {
const float depthRange = state.farZ - state.nearZ;
const float fogRange = state.endZ - state.startZ;
@ -440,7 +411,7 @@ Range build_uniform(const ShaderInfo& info) noexcept {
fog.b = state.farZ / depthRange;
fog.c = state.startZ / fogRange;
}
buf.append(&fog, 32);
buf.append(fog);
}
for (int i = 0; i < info.sampledTextures.size(); ++i) {
if (!info.sampledTextures.test(i)) {
@ -448,7 +419,7 @@ Range build_uniform(const ShaderInfo& info) noexcept {
}
const auto& tex = get_texture(static_cast<GXTexMapID>(i));
CHECK(tex, "unbound texture {}", i);
buf.append(&tex.texObj.lodBias, 4);
buf.append(tex.texObj.lodBias);
}
g_gxState.stateDirty = false;
return range;
@ -564,7 +535,7 @@ GXBindGroupLayouts build_bind_group_layouts(const ShaderInfo& info, const Shader
};
u32 bindIdx = 1;
for (int i = 0; i < GX_VA_MAX_ATTR; ++i) {
if (config.attrMapping[i] == static_cast<GXAttr>(i)) {
if (config.attrMapping[i].attr == static_cast<GXAttr>(i)) {
uniformLayoutEntries[bindIdx] = wgpu::BindGroupLayoutEntry{
.binding = bindIdx,
.visibility = wgpu::ShaderStage::Vertex,
@ -688,7 +659,7 @@ void shutdown() noexcept {
static wgpu::AddressMode wgpu_address_mode(GXTexWrapMode mode) {
switch (mode) {
DEFAULT_FATAL("invalid wrap mode {}", static_cast<int>(mode));
DEFAULT_FATAL("invalid wrap mode {}", underlying(mode));
case GX_CLAMP:
return wgpu::AddressMode::ClampToEdge;
case GX_REPEAT:
@ -735,8 +706,6 @@ wgpu::SamplerDescriptor TextureBind::get_descriptor() const noexcept {
.magFilter = wgpu::FilterMode::Nearest,
.minFilter = wgpu::FilterMode::Nearest,
.mipmapFilter = wgpu::MipmapFilterMode::Nearest,
.lodMinClamp = 0.f,
.lodMaxClamp = 1000.f,
.maxAnisotropy = 1,
};
}
@ -750,8 +719,6 @@ wgpu::SamplerDescriptor TextureBind::get_descriptor() const noexcept {
.magFilter = magFilter,
.minFilter = minFilter,
.mipmapFilter = mipFilter,
.lodMinClamp = 0.f,
.lodMaxClamp = 1000.f,
.maxAnisotropy = wgpu_aniso(texObj.maxAniso),
};
}

View File

@ -46,6 +46,11 @@ constexpr float GX_LARGE_NUMBER = -1048576.0f;
#endif
namespace aurora::gfx::gx {
constexpr bool EnableNormalVisualization = false;
constexpr bool EnableDebugPrints = false;
constexpr bool UsePerPixelLighting = true;
constexpr bool UseReversedZ = true;
constexpr u32 MaxTextures = GX_MAX_TEXMAP;
constexpr u32 MaxTluts = 20;
constexpr u32 MaxTevStages = GX_MAX_TEVSTAGE;
@ -144,8 +149,7 @@ struct ColorChannelState {
Vec4<float> ambColor;
GX::LightMask lightMask;
};
// Mat4x4 used instead of Mat4x3 for padding purposes
using TexMtxVariant = std::variant<std::monostate, Mat4x2<float>, Mat4x4<float>>;
using TexMtxVariant = std::variant<std::monostate, Mat2x4<float>, Mat3x4<float>>;
struct TcgConfig {
GXTexGenType type = GX_TG_MTX2x4;
GXTexGenSrc src = GX_MAX_TEXGENSRC;
@ -213,10 +217,10 @@ struct VtxFmt {
std::array<VtxAttrFmt, MaxVtxAttr> attrs;
};
struct PnMtx {
Mat4x4<float> pos;
Mat4x4<float> nrm;
Mat3x4<float> pos;
Mat3x4<float> nrm;
};
static_assert(sizeof(PnMtx) == sizeof(Mat4x4<float>) * 2);
static_assert(sizeof(PnMtx) == sizeof(Mat3x4<float>) * 2);
struct Light {
Vec4<float> pos{0.f, 0.f, 0.f};
Vec4<float> dir{0.f, 0.f, 0.f};
@ -230,6 +234,14 @@ struct Light {
bool operator!=(const Light& rhs) const { return !(*this == rhs); }
};
static_assert(sizeof(Light) == 80);
struct Fog {
Vec4<float> color;
float a = 0.f;
float b = 0.5f;
float c = 0.f;
float pad = FLT_MAX;
};
static_assert(sizeof(Fog) == 32);
struct AttrArray {
const void* data;
u32 size;
@ -245,7 +257,6 @@ struct GXState {
std::array<PnMtx, MaxPnMtx> pnMtx;
u32 currentPnMtx;
Mat4x4<float> proj;
Mat4x4<float> origProj; // for GXGetProjectionv
GXProjectionType projType; // for GXGetProjectionv
FogState fog;
GXCullMode cullMode = GX_CULL_BACK;
@ -266,7 +277,7 @@ struct GXState {
std::array<TextureBind, MaxTextures> textures;
std::array<GXTlutObj_, MaxTluts> tluts;
std::array<TexMtxVariant, MaxTexMtx> texMtxs;
std::array<Mat4x4<float>, MaxPTTexMtx> ptTexMtxs;
std::array<Mat3x4<float>, MaxPTTexMtx> ptTexMtxs;
std::array<TcgConfig, MaxTexCoord> tcgs;
std::array<GXAttrType, MaxVtxAttr> vtxDesc;
std::array<VtxFmt, MaxVtxFmt> vtxFmts;
@ -345,11 +356,18 @@ struct TextureConfig {
bool operator==(const TextureConfig& rhs) const { return memcmp(this, &rhs, sizeof(*this)) == 0; }
};
static_assert(std::has_unique_object_representations_v<TextureConfig>);
struct StorageConfig {
GXAttr attr = GX_VA_NULL;
GXCompCnt cnt = static_cast<GXCompCnt>(0xFF);
GXCompType compType = static_cast<GXCompType>(0xFF);
u8 frac = 0;
std::array<u8, 3> pad{};
};
struct ShaderConfig {
GXFogType fogType;
std::array<GXAttrType, MaxVtxAttr> vtxAttrs;
// Mapping for indexed attributes -> storage buffer
std::array<GXAttr, MaxVtxAttr> attrMapping;
std::array<StorageConfig, MaxVtxAttr> attrMapping;
std::array<TevSwap, MaxTevSwap> tevSwapTable;
std::array<TevStage, MaxTevStages> tevStages;
u32 tevStageCount = 0;
@ -363,7 +381,7 @@ struct ShaderConfig {
};
static_assert(std::has_unique_object_representations_v<ShaderConfig>);
constexpr u32 GXPipelineConfigVersion = 4;
constexpr u32 GXPipelineConfigVersion = 5;
struct PipelineConfig {
u32 version = GXPipelineConfigVersion;
ShaderConfig shaderConfig;
@ -405,7 +423,7 @@ struct ShaderInfo {
struct BindGroupRanges {
std::array<Range, GX_VA_MAX_ATTR> vaRanges{};
};
void populate_pipeline_config(PipelineConfig& config, GXPrimitive primitive) noexcept;
void populate_pipeline_config(PipelineConfig& config, GXPrimitive primitive, GXVtxFmt fmt) noexcept;
wgpu::RenderPipeline build_pipeline(const PipelineConfig& config, const ShaderInfo& info,
ArrayRef<wgpu::VertexBufferLayout> vtxBuffers, wgpu::ShaderModule shader,
const char* label) noexcept;

View File

@ -1,3 +1,7 @@
#pragma once
#include "../internal.hpp"
#include <dolphin/gx/GXEnum.h>
#include <fmt/format.h>
#include <string>
@ -25,7 +29,7 @@ inline std::string format_as(const GXTevOp& op) {
case GX_TEV_COMP_RGB8_EQ:
return "GX_TEV_COMP_RGB8_EQ";
default:
return fmt::format("GXTevOp({})", static_cast<int>(op));
return fmt::format("GXTevOp({})", underlying(op));
}
}
@ -64,7 +68,7 @@ inline std::string format_as(const GXTevColorArg& arg) {
case GX_CC_ZERO:
return "GX_CC_ZERO";
default:
return fmt::format("GXTevColorArg({})", static_cast<int>(arg));
return fmt::format("GXTevColorArg({})", underlying(arg));
}
}
@ -87,7 +91,7 @@ inline std::string format_as(const GXTevAlphaArg& arg) {
case GX_CA_ZERO:
return "GX_CA_ZERO";
default:
return fmt::format("GXTevAlphaArg({})", static_cast<int>(arg));
return fmt::format("GXTevAlphaArg({})", underlying(arg));
}
}
@ -118,7 +122,7 @@ inline std::string format_as(const GXTexGenSrc& src) {
case GX_TG_TEX7:
return "GX_TG_TEX7";
default:
return fmt::format("GXTexGenSrc({})", static_cast<int>(src));
return fmt::format("GXTexGenSrc({})", underlying(src));
}
}
@ -133,7 +137,7 @@ inline std::string format_as(const GXTexGenType& type) {
case GX_TG_BUMP1:
return "GX_TG_BUMP1";
default:
return fmt::format("GXTexGenType({})", static_cast<int>(type));
return fmt::format("GXTexGenType({})", underlying(type));
}
}
@ -146,7 +150,7 @@ inline std::string format_as(const GXTevBias& bias) {
case GX_TB_SUBHALF:
return "GX_TB_SUBHALF";
default:
return fmt::format("GXTevBias({})", static_cast<int>(bias));
return fmt::format("GXTevBias({})", underlying(bias));
}
}
@ -161,7 +165,7 @@ inline std::string format_as(const GXTevScale& scale) {
case GX_CS_DIVIDE_2:
return "GX_CS_DIVIDE_2";
default:
return fmt::format("GXTevScale({})", static_cast<int>(scale));
return fmt::format("GXTevScale({})", underlying(scale));
}
}
@ -176,7 +180,7 @@ inline std::string format_as(const GXTevRegID& reg) {
case GX_TEVREG2:
return "GX_TEVREG2";
default:
return fmt::format("GXTevRegID({})", static_cast<int>(reg));
return fmt::format("GXTevRegID({})", underlying(reg));
}
}
@ -231,7 +235,7 @@ inline std::string format_as(const GXTevKColorSel& sel) {
case GX_TEV_KCSEL_K3_A:
return "GX_TEV_KCSEL_K3_A";
default:
return fmt::format("GXTevKColorSel({})", static_cast<int>(sel));
return fmt::format("GXTevKColorSel({})", underlying(sel));
}
}
@ -286,7 +290,7 @@ inline std::string format_as(const GXTevKAlphaSel& sel) {
case GX_TEV_KASEL_K3_A:
return "GX_TEV_KASEL_K3_A";
default:
return fmt::format("GXTevKAlphaSel({})", static_cast<int>(sel));
return fmt::format("GXTevKAlphaSel({})", underlying(sel));
}
}
@ -313,7 +317,7 @@ inline std::string format_as(const GXTexMapID& id) {
case GX_TEX_DISABLE:
return "GX_TEX_DISABLE";
default:
return fmt::format("GXTexMapID({})", static_cast<int>(id));
return fmt::format("GXTexMapID({})", underlying(id));
}
}
@ -340,7 +344,7 @@ inline std::string format_as(const GXChannelID& id) {
case GX_COLOR_NULL:
return "GX_COLOR_NULL";
default:
return fmt::format("GXChannelID({})", static_cast<int>(id));
return fmt::format("GXChannelID({})", underlying(id));
}
}
@ -351,7 +355,7 @@ inline std::string format_as(const GXColorSrc& src) {
case GX_SRC_VTX:
return "GX_SRC_VTX";
default:
return fmt::format("GXColorSrc({})", static_cast<int>(src));
return fmt::format("GXColorSrc({})", underlying(src));
}
}
@ -380,7 +384,7 @@ inline std::string format_as(const GXTexMtx& mtx) {
case GX_IDENTITY:
return "GX_IDENTITY";
default:
return fmt::format("GXTexMtx({})", static_cast<int>(mtx));
return fmt::format("GXTexMtx({})", underlying(mtx));
}
}
@ -429,7 +433,7 @@ inline std::string format_as(const GXPTTexMtx& mtx) {
case GX_PTIDENTITY:
return "GX_PTIDENTITY";
default:
return fmt::format("GXPTTexMtx({})", static_cast<int>(mtx));
return fmt::format("GXPTTexMtx({})", underlying(mtx));
}
}
@ -452,7 +456,7 @@ inline std::string format_as(const GXCompare& comp) {
case GX_ALWAYS:
return "GX_ALWAYS";
default:
return fmt::format("GXCompare({})", static_cast<int>(comp));
return fmt::format("GXCompare({})", underlying(comp));
}
}
@ -467,7 +471,7 @@ inline std::string format_as(const GXAlphaOp& op) {
case GX_AOP_XNOR:
return "GX_AOP_XNOR";
default:
return fmt::format("GXAlphaOp({})", static_cast<int>(op));
return fmt::format("GXAlphaOp({})", underlying(op));
}
}
@ -496,7 +500,7 @@ inline std::string format_as(const GXFogType& type) {
case GX_FOG_ORTHO_REVEXP2:
return "GX_FOG_ORTHO_REVEXP2";
default:
return fmt::format("GXFogType({})", static_cast<int>(type));
return fmt::format("GXFogType({})", underlying(type));
}
}
@ -521,6 +525,158 @@ inline std::string format_as(const GXTexCoordID& id) {
case GX_TEXCOORD_NULL:
return "GX_TEXCOORD_NULL";
default:
return fmt::format("GXTexCoordID({})", static_cast<int>(id));
return fmt::format("GXTexCoordID({})", underlying(id));
}
}
inline std::string format_as(const GXPrimitive& prim) {
switch (prim) {
case GX_QUADS:
return "GX_QUADS";
case GX_TRIANGLES:
return "GX_TRIANGLES";
case GX_TRIANGLESTRIP:
return "GX_TRIANGLESTRIP";
case GX_TRIANGLEFAN:
return "GX_TRIANGLEFAN";
case GX_LINES:
return "GX_LINES";
case GX_LINESTRIP:
return "GX_LINESTRIP";
case GX_POINTS:
return "GX_POINTS";
default:
return fmt::format("GXPrimitive({})", underlying(prim));
}
}
inline std::string format_as(const GXAttr& attr) {
switch (attr) {
case GX_VA_PNMTXIDX:
return "GX_VA_PNMTXIDX";
case GX_VA_TEX0MTXIDX:
return "GX_VA_TEX0MTXIDX";
case GX_VA_TEX1MTXIDX:
return "GX_VA_TEX1MTXIDX";
case GX_VA_TEX2MTXIDX:
return "GX_VA_TEX2MTXIDX";
case GX_VA_TEX3MTXIDX:
return "GX_VA_TEX3MTXIDX";
case GX_VA_TEX4MTXIDX:
return "GX_VA_TEX4MTXIDX";
case GX_VA_TEX5MTXIDX:
return "GX_VA_TEX5MTXIDX";
case GX_VA_TEX6MTXIDX:
return "GX_VA_TEX6MTXIDX";
case GX_VA_TEX7MTXIDX:
return "GX_VA_TEX7MTXIDX";
case GX_VA_POS:
return "GX_VA_POS";
case GX_VA_NRM:
return "GX_VA_NRM";
case GX_VA_CLR0:
return "GX_VA_CLR0";
case GX_VA_CLR1:
return "GX_VA_CLR1";
case GX_VA_TEX0:
return "GX_VA_TEX0";
case GX_VA_TEX1:
return "GX_VA_TEX1";
case GX_VA_TEX2:
return "GX_VA_TEX2";
case GX_VA_TEX3:
return "GX_VA_TEX3";
case GX_VA_TEX4:
return "GX_VA_TEX4";
case GX_VA_TEX5:
return "GX_VA_TEX5";
case GX_VA_TEX6:
return "GX_VA_TEX6";
case GX_VA_TEX7:
return "GX_VA_TEX7";
case GX_POS_MTX_ARRAY:
return "GX_POS_MTX_ARRAY";
case GX_NRM_MTX_ARRAY:
return "GX_NRM_MTX_ARRAY";
case GX_TEX_MTX_ARRAY:
return "GX_TEX_MTX_ARRAY";
case GX_LIGHT_ARRAY:
return "GX_LIGHT_ARRAY";
case GX_VA_NBT:
return "GX_VA_NBT";
case GX_VA_NULL:
return "GX_VA_NULL";
default:
return fmt::format("GXAttr({})", underlying(attr));
}
}
inline std::string format_as(const GXCompCnt& cnt) {
switch (cnt) {
case GX_POS_XY:
return "GX_POS_XY|GX_NRM_XYZ|GX_CLR_RGB|GX_TEX_S";
case GX_POS_XYZ:
return "GX_POS_XYZ|GX_NRM_NBT|GX_CLR_RGBA|GX_TEX_ST";
case GX_NRM_NBT3:
return "GX_NRM_NBT3";
default:
return fmt::format("GXCompCnt({})", underlying(cnt));
}
}
inline std::string format_as(const GXCompType& type) {
switch (type) {
case GX_U8:
return "GX_U8|GX_RGB565";
case GX_S8:
return "GX_S8|GX_RGB8";
case GX_U16:
return "GX_U16|GX_RGBX8";
case GX_S16:
return "GX_S16|GX_RGBA4";
case GX_F32:
return "GX_F32|GX_RGBA6";
case GX_RGBA8:
return "GX_RGBA8";
default:
return fmt::format("GXCompType({})", underlying(type));
}
}
inline std::string format_as(const GXAttrType& type) {
switch (type) {
case GX_NONE:
return "GX_NONE";
case GX_DIRECT:
return "GX_DIRECT";
case GX_INDEX8:
return "GX_INDEX8";
case GX_INDEX16:
return "GX_INDEX16";
default:
return fmt::format("GXAttrType({})", underlying(type));
}
}
inline std::string format_as(const GXVtxFmt& fmt) {
switch (fmt) {
case GX_VTXFMT0:
return "GX_VTXFMT0";
case GX_VTXFMT1:
return "GX_VTXFMT1";
case GX_VTXFMT2:
return "GX_VTXFMT2";
case GX_VTXFMT3:
return "GX_VTXFMT3";
case GX_VTXFMT4:
return "GX_VTXFMT4";
case GX_VTXFMT5:
return "GX_VTXFMT5";
case GX_VTXFMT6:
return "GX_VTXFMT6";
case GX_VTXFMT7:
return "GX_VTXFMT7";
default:
return fmt::format("GXVtxFmt({})", underlying(fmt));
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1,60 +1,29 @@
#include "shader.hpp"
#include "../../webgpu/gpu.hpp"
#include "../gx_fmt.hpp"
#include <absl/container/flat_hash_map.h>
namespace aurora::gfx::model {
static Module Log("aurora::gfx::model");
template <typename T>
constexpr T bswap16(T val) noexcept {
static_assert(sizeof(T) == sizeof(u16));
union {
u16 u;
T t;
} v{.t = val};
#if __GNUC__
v.u = __builtin_bswap16(v.u);
#elif _WIN32
v.u = _byteswap_ushort(v.u);
#else
v.u = (v.u << 8) | ((v.u >> 8) & 0xFF);
#endif
return v.t;
}
template <typename T>
constexpr T bswap32(T val) noexcept {
static_assert(sizeof(T) == sizeof(u32));
union {
u32 u;
T t;
} v{.t = val};
#if __GNUC__
v.u = __builtin_bswap32(v.u);
#elif _WIN32
v.u = _byteswap_ulong(v.u);
#else
v.u = ((v.u & 0x0000FFFF) << 16) | ((v.u & 0xFFFF0000) >> 16) | ((v.u & 0x00FF00FF) << 8) | ((v.u & 0xFF00FF00) >> 8);
#endif
return v.t;
}
using IndexedAttrs = std::array<bool, GX_VA_MAX_ATTR>;
struct DisplayListCache {
ByteBuffer vtxBuf;
ByteBuffer idxBuf;
IndexedAttrs indexedAttrs;
GXVtxFmt fmt;
DisplayListCache(ByteBuffer&& vtxBuf, ByteBuffer&& idxBuf, IndexedAttrs indexedAttrs)
: vtxBuf(std::move(vtxBuf)), idxBuf(std::move(idxBuf)), indexedAttrs(indexedAttrs) {}
DisplayListCache(ByteBuffer&& vtxBuf, ByteBuffer&& idxBuf, IndexedAttrs indexedAttrs, GXVtxFmt fmt)
: vtxBuf(std::move(vtxBuf)), idxBuf(std::move(idxBuf)), indexedAttrs(indexedAttrs), fmt(fmt) {}
};
static absl::flat_hash_map<HashType, DisplayListCache> sCachedDisplayLists;
static u32 prepare_vtx_buffer(ByteBuffer& buf, GXVtxFmt vtxfmt, const u8* ptr, u16 vtxCount,
IndexedAttrs& indexedAttrs) {
using aurora::gfx::gx::g_gxState;
using gx::g_gxState;
struct {
u8 count;
GXCompType type;
@ -66,14 +35,13 @@ static u32 prepare_vtx_buffer(ByteBuffer& buf, GXVtxFmt vtxfmt, const u8* ptr, u
for (int attr = 0; attr < GX_VA_MAX_ATTR; attr++) {
const auto& attrFmt = g_gxState.vtxFmts[vtxfmt].attrs[attr];
switch (g_gxState.vtxDesc[attr]) {
DEFAULT_FATAL("unhandled attribute type {}", static_cast<int>(g_gxState.vtxDesc[attr]));
DEFAULT_FATAL("unhandled attribute type {}", g_gxState.vtxDesc[attr]);
case GX_NONE:
break;
case GX_DIRECT:
#define COMBINE(val1, val2, val3) (((val1) << 16) | ((val2) << 8) | (val3))
switch (COMBINE(attr, attrFmt.cnt, attrFmt.type)) {
DEFAULT_FATAL("not handled: attr {}, cnt {}, type {}", static_cast<int>(attr), static_cast<int>(attrFmt.cnt),
static_cast<int>(attrFmt.type));
DEFAULT_FATAL("not handled: attr {}, cnt {}, type {}", attr, attrFmt.cnt, attrFmt.type);
case COMBINE(GX_VA_POS, GX_POS_XYZ, GX_F32):
case COMBINE(GX_VA_NRM, GX_NRM_XYZ, GX_F32):
attrArrays[attr].count = 3;
@ -150,12 +118,10 @@ static u32 prepare_vtx_buffer(ByteBuffer& buf, GXVtxFmt vtxfmt, const u8* ptr, u
for (u32 v = 0; v < vtxCount; ++v) {
for (int attr = 0; attr < GX_VA_MAX_ATTR; attr++) {
if (g_gxState.vtxDesc[attr] == GX_INDEX8) {
u16 index = *ptr;
buf.append(&index, 2);
buf.append(static_cast<u16>(*ptr));
++ptr;
} else if (g_gxState.vtxDesc[attr] == GX_INDEX16) {
u16 index = bswap16(*reinterpret_cast<const u16*>(ptr));
buf.append(&index, 2);
buf.append(bswap(*reinterpret_cast<const u16*>(ptr)));
ptr += 2;
}
if (g_gxState.vtxDesc[attr] != GX_DIRECT) {
@ -182,7 +148,7 @@ static u32 prepare_vtx_buffer(ByteBuffer& buf, GXVtxFmt vtxfmt, const u8* ptr, u
break;
case GX_U16:
for (int i = 0; i < count; ++i) {
const auto value = bswap16(reinterpret_cast<const u16*>(ptr)[i]);
const auto value = bswap(reinterpret_cast<const u16*>(ptr)[i]);
out[i] = static_cast<f32>(value) / static_cast<f32>(1 << attrFmt.frac);
}
buf.append(out.data(), sizeof(f32) * count);
@ -190,7 +156,7 @@ static u32 prepare_vtx_buffer(ByteBuffer& buf, GXVtxFmt vtxfmt, const u8* ptr, u
break;
case GX_S16:
for (int i = 0; i < count; ++i) {
const auto value = bswap16(reinterpret_cast<const s16*>(ptr)[i]);
const auto value = bswap(reinterpret_cast<const s16*>(ptr)[i]);
out[i] = static_cast<f32>(value) / static_cast<f32>(1 << attrFmt.frac);
}
buf.append(out.data(), sizeof(f32) * count);
@ -198,7 +164,7 @@ static u32 prepare_vtx_buffer(ByteBuffer& buf, GXVtxFmt vtxfmt, const u8* ptr, u
break;
case GX_F32:
for (int i = 0; i < count; ++i) {
out[i] = bswap32(reinterpret_cast<const f32*>(ptr)[i]);
out[i] = bswap(reinterpret_cast<const f32*>(ptr)[i]);
}
buf.append(out.data(), sizeof(f32) * count);
ptr += count * sizeof(f32);
@ -227,7 +193,7 @@ static u16 prepare_idx_buffer(ByteBuffer& buf, GXPrimitive prim, u16 vtxStart, u
buf.reserve_extra(vtxCount * sizeof(u16));
for (u16 v = 0; v < vtxCount; ++v) {
const u16 idx = vtxStart + v;
buf.append(&idx, sizeof(u16));
buf.append(idx);
++numIndices;
}
} else if (prim == GX_TRIANGLEFAN) {
@ -235,29 +201,26 @@ static u16 prepare_idx_buffer(ByteBuffer& buf, GXPrimitive prim, u16 vtxStart, u
for (u16 v = 0; v < vtxCount; ++v) {
const u16 idx = vtxStart + v;
if (v < 3) {
buf.append(&idx, sizeof(u16));
buf.append(idx);
++numIndices;
continue;
}
const std::array<u16, 3> idxs{vtxStart, u16(idx - 1), idx};
buf.append(idxs.data(), sizeof(u16) * 3);
buf.append(std::array{vtxStart, static_cast<u16>(idx - 1), idx});
numIndices += 3;
}
} else if (prim == GX_TRIANGLESTRIP) {
buf.reserve_extra(((u32(vtxCount) - 3) * 3 + 3) * sizeof(u16));
buf.reserve_extra(((static_cast<u32>(vtxCount) - 3) * 3 + 3) * sizeof(u16));
for (u16 v = 0; v < vtxCount; ++v) {
const u16 idx = vtxStart + v;
if (v < 3) {
buf.append(&idx, sizeof(u16));
buf.append(idx);
++numIndices;
continue;
}
if ((v & 1) == 0) {
const std::array<u16, 3> idxs{u16(idx - 2), u16(idx - 1), idx};
buf.append(idxs.data(), sizeof(u16) * 3);
buf.append(std::array{static_cast<u16>(idx - 2), static_cast<u16>(idx - 1), idx});
} else {
const std::array<u16, 3> idxs{u16(idx - 1), u16(idx - 2), idx};
buf.append(idxs.data(), sizeof(u16) * 3);
buf.append(std::array{static_cast<u16>(idx - 1), static_cast<u16>(idx - 2), idx});
}
numIndices += 3;
}
@ -271,6 +234,7 @@ void queue_surface(const u8* dlStart, u32 dlSize) noexcept {
Range vertRange, idxRange;
u32 numIndices = 0;
IndexedAttrs indexedAttrs{};
GXVtxFmt fmt = GX_MAX_VTXFMT;
auto it = sCachedDisplayLists.find(hash);
if (it != sCachedDisplayLists.end()) {
const auto& cache = it->second;
@ -278,6 +242,7 @@ void queue_surface(const u8* dlStart, u32 dlSize) noexcept {
vertRange = push_verts(cache.vtxBuf.data(), cache.vtxBuf.size());
idxRange = push_indices(cache.idxBuf.data(), cache.idxBuf.size());
indexedAttrs = cache.indexedAttrs;
fmt = cache.fmt;
} else {
const u8* data = dlStart;
u32 pos = 0;
@ -302,8 +267,12 @@ void queue_surface(const u8* dlStart, u32 dlSize) noexcept {
case GX_DRAW_TRIANGLE_STRIP:
case GX_DRAW_TRIANGLE_FAN: {
const auto prim = static_cast<GXPrimitive>(opcode);
const auto fmt = static_cast<GXVtxFmt>(cmd & GX_VAT_MASK);
u16 vtxCount = bswap16(*reinterpret_cast<const u16*>(data + pos));
const auto newFmt = static_cast<GXVtxFmt>(cmd & GX_VAT_MASK);
if (fmt != GX_MAX_VTXFMT && fmt != newFmt) {
FATAL("Vertex format changed mid-display list: {} -> {}", fmt, newFmt);
}
fmt = newFmt;
u16 vtxCount = bswap(*reinterpret_cast<const u16*>(data + pos));
pos += 2;
pos += vtxCount * prepare_vtx_buffer(vtxBuf, fmt, data + pos, vtxCount, indexedAttrs);
numIndices += prepare_idx_buffer(idxBuf, prim, vtxStart, vtxCount);
@ -319,22 +288,16 @@ void queue_surface(const u8* dlStart, u32 dlSize) noexcept {
}
vertRange = push_verts(vtxBuf.data(), vtxBuf.size());
idxRange = push_indices(idxBuf.data(), idxBuf.size());
sCachedDisplayLists.try_emplace(hash, std::move(vtxBuf), std::move(idxBuf), indexedAttrs);
sCachedDisplayLists.try_emplace(hash, std::move(vtxBuf), std::move(idxBuf), indexedAttrs, fmt);
}
gx::BindGroupRanges ranges{};
int lastIndexedAttr = -1;
for (int i = 0; i < GX_VA_MAX_ATTR; ++i) {
if (!indexedAttrs[i]) {
continue;
}
auto& array = gx::g_gxState.arrays[i];
if (lastIndexedAttr >= 0 && array == gx::g_gxState.arrays[lastIndexedAttr]) {
// Reuse range from last attribute in shader
// Don't set the output range, so it remains unbound
const auto range = gx::g_gxState.arrays[lastIndexedAttr].cachedRange;
array.cachedRange = range;
} else if (array.cachedRange.size > 0) {
if (array.cachedRange.size > 0) {
// Use the currently cached range
ranges.vaRanges[i] = array.cachedRange;
} else {
@ -343,11 +306,10 @@ void queue_surface(const u8* dlStart, u32 dlSize) noexcept {
ranges.vaRanges[i] = range;
array.cachedRange = range;
}
lastIndexedAttr = i;
}
model::PipelineConfig config{};
populate_pipeline_config(config, GX_TRIANGLES);
populate_pipeline_config(config, GX_TRIANGLES, fmt);
const auto info = gx::build_shader_info(config.shaderConfig);
const auto bindGroups = gx::build_bind_groups(info, config.shaderConfig, ranges);
const auto pipeline = pipeline_ref(config);
@ -366,7 +328,7 @@ void queue_surface(const u8* dlStart, u32 dlSize) noexcept {
State construct_state() { return {}; }
wgpu::RenderPipeline create_pipeline(const State& state, [[maybe_unused]] const PipelineConfig& config) {
wgpu::RenderPipeline create_pipeline(const State& state, const PipelineConfig& config) {
const auto info = build_shader_info(config.shaderConfig); // TODO remove
const auto shader = build_shader(config.shaderConfig, info);
@ -385,7 +347,7 @@ wgpu::RenderPipeline create_pipeline(const State& state, [[maybe_unused]] const
// Indexed attributes
for (u32 i = 0; i < num4xAttr; ++i) {
vtxAttrs[shaderLocation] = {
.format = wgpu::VertexFormat::Sint16x4,
.format = wgpu::VertexFormat::Uint16x4,
.offset = offset,
.shaderLocation = shaderLocation,
};
@ -394,7 +356,7 @@ wgpu::RenderPipeline create_pipeline(const State& state, [[maybe_unused]] const
}
for (u32 i = 0; i < num2xAttr; ++i) {
vtxAttrs[shaderLocation] = {
.format = wgpu::VertexFormat::Sint16x2,
.format = wgpu::VertexFormat::Uint16x2,
.offset = offset,
.shaderLocation = shaderLocation,
};

View File

@ -1,82 +0,0 @@
#include "shader.hpp"
#include "../../webgpu/gpu.hpp"
namespace aurora::gfx::stream {
static Module Log("aurora::gfx::stream");
using webgpu::g_device;
wgpu::RenderPipeline create_pipeline(const State& state, [[maybe_unused]] const PipelineConfig& config) {
const auto info = build_shader_info(config.shaderConfig); // TODO remove
const auto shader = build_shader(config.shaderConfig, info);
std::array<wgpu::VertexAttribute, 4> attributes{};
attributes[0] = wgpu::VertexAttribute{
.format = wgpu::VertexFormat::Float32x3,
.offset = 0,
.shaderLocation = 0,
};
uint64_t offset = 12;
uint32_t shaderLocation = 1;
if (config.shaderConfig.vtxAttrs[GX_VA_NRM] == GX_DIRECT) {
attributes[shaderLocation] = wgpu::VertexAttribute{
.format = wgpu::VertexFormat::Float32x3,
.offset = offset,
.shaderLocation = shaderLocation,
};
offset += 12;
shaderLocation++;
}
if (config.shaderConfig.vtxAttrs[GX_VA_CLR0] == GX_DIRECT) {
attributes[shaderLocation] = wgpu::VertexAttribute{
.format = wgpu::VertexFormat::Float32x4,
.offset = offset,
.shaderLocation = shaderLocation,
};
offset += 16;
shaderLocation++;
}
for (int i = GX_VA_TEX0; i < GX_VA_TEX7; ++i) {
if (config.shaderConfig.vtxAttrs[i] != GX_DIRECT) {
continue;
}
attributes[shaderLocation] = wgpu::VertexAttribute{
.format = wgpu::VertexFormat::Float32x2,
.offset = offset,
.shaderLocation = shaderLocation,
};
offset += 8;
shaderLocation++;
}
const std::array vertexBuffers{wgpu::VertexBufferLayout{
.arrayStride = offset,
.attributeCount = shaderLocation,
.attributes = attributes.data(),
}};
return build_pipeline(config, info, vertexBuffers, shader, "Stream Pipeline");
}
State construct_state() { return {}; }
void render(const State& state, const DrawData& data, const wgpu::RenderPassEncoder& pass) {
if (!bind_pipeline(data.pipeline, pass)) {
return;
}
const std::array offsets{data.uniformRange.offset};
pass.SetBindGroup(0, find_bind_group(data.bindGroups.uniformBindGroup), offsets.size(), offsets.data());
if (data.bindGroups.samplerBindGroup && data.bindGroups.textureBindGroup) {
pass.SetBindGroup(1, find_bind_group(data.bindGroups.samplerBindGroup));
pass.SetBindGroup(2, find_bind_group(data.bindGroups.textureBindGroup));
}
pass.SetVertexBuffer(0, g_vertexBuffer, data.vertRange.offset, data.vertRange.size);
pass.SetIndexBuffer(g_indexBuffer, wgpu::IndexFormat::Uint16, data.indexRange.offset, data.indexRange.size);
if (data.dstAlpha != UINT32_MAX) {
const wgpu::Color color{0.f, 0.f, 0.f, data.dstAlpha / 255.f};
pass.SetBlendConstant(&color);
}
pass.DrawIndexed(data.indexCount);
}
} // namespace aurora::gfx::stream

View File

@ -1,24 +0,0 @@
#pragma once
#include "../common.hpp"
#include "../gx.hpp"
namespace aurora::gfx::stream {
struct DrawData {
PipelineRef pipeline;
Range vertRange;
Range uniformRange;
Range indexRange;
uint32_t indexCount;
gx::GXBindGroups bindGroups;
u32 dstAlpha;
};
struct PipelineConfig : public gx::PipelineConfig {};
struct State {};
State construct_state();
wgpu::RenderPipeline create_pipeline(const State& state, [[maybe_unused]] const PipelineConfig& config);
void render(const State& state, const DrawData& data, const wgpu::RenderPassEncoder& pass);
} // namespace aurora::gfx::stream

View File

@ -66,17 +66,6 @@ static size_t ComputeMippedBlockCountDXT1(uint32_t w, uint32_t h, uint32_t mips)
return ret;
}
template <typename T>
constexpr T bswap16(T val) noexcept {
#if __GNUC__
return __builtin_bswap16(val);
#elif _WIN32
return _byteswap_ushort(val);
#else
return (val = (val << 8) | ((val >> 8) & 0xFF));
#endif
}
template <typename T>
concept TextureDecoder = requires(T) {
typename T::Source;
@ -178,15 +167,15 @@ struct TextureDecoderIA4 {
};
struct TextureDecoderIA8 {
using Source = uint8_t;
using Source = uint16_t;
using Target = RGBA8;
static constexpr uint32_t Frac = 1;
static constexpr uint32_t BlockWidth = 8;
static constexpr uint32_t BlockWidth = 4;
static constexpr uint32_t BlockHeight = 4;
static void decode_texel(Target* target, const Source* in, const uint32_t x) {
const auto texel = bswap16(in[x]);
const auto texel = bswap(in[x]);
const uint8_t intensity = texel >> 8;
target[x].r = intensity;
target[x].g = intensity;
@ -228,7 +217,7 @@ struct TextureDecoderRGB565 {
static constexpr uint32_t BlockHeight = 4;
static void decode_texel(Target* target, const Source* in, const uint32_t x) {
const auto texel = bswap16(in[x]);
const auto texel = bswap(in[x]);
target[x].r = ExpandTo8<5>(texel >> 11 & 0x1f);
target[x].g = ExpandTo8<6>(texel >> 5 & 0x3f);
target[x].b = ExpandTo8<5>(texel & 0x1f);
@ -245,7 +234,7 @@ struct TextureDecoderRGB5A3 {
static constexpr uint32_t BlockHeight = 4;
static void decode_texel(Target* target, const Source* in, const uint32_t x) {
const auto texel = bswap16(in[x]);
const auto texel = bswap(in[x]);
if ((texel & 0x8000) != 0) {
target[x].r = ExpandTo8<5>(texel >> 10 & 0x1f);
target[x].g = ExpandTo8<5>(texel >> 5 & 0x1f);
@ -322,8 +311,8 @@ static ByteBuffer BuildDXT1FromGCN(uint32_t width, uint32_t height, uint32_t mip
for (uint32_t y = 0; y < 2; ++y) {
DXT1Block* target = targetMip + (baseY + y) * w + baseX;
for (size_t x = 0; x < 2; ++x) {
target[x].color1 = bswap16(in[x].color1);
target[x].color2 = bswap16(in[x].color2);
target[x].color1 = bswap(in[x].color1);
target[x].color2 = bswap(in[x].color2);
for (size_t i = 0; i < 4; ++i) {
std::array<uint8_t, 4> ind;
const uint8_t packed = in[x].lines[i];
@ -365,8 +354,8 @@ static ByteBuffer BuildRGBA8FromCMPR(uint32_t width, uint32_t height, uint32_t m
for (uint32_t yb = 0; yb < 8; yb += 4) {
for (uint32_t xb = 0; xb < 8; xb += 4) {
// CMPR difference: Big-endian color1/2
const uint16_t color1 = bswap16(*reinterpret_cast<const uint16_t*>(src));
const uint16_t color2 = bswap16(*reinterpret_cast<const uint16_t*>(src + 2));
const uint16_t color1 = bswap(*reinterpret_cast<const uint16_t*>(src));
const uint16_t color2 = bswap(*reinterpret_cast<const uint16_t*>(src + 2));
src += 4;
// Fill in first two colors in color table.
@ -480,4 +469,4 @@ ByteBuffer convert_tlut(u32 format, uint32_t width, ArrayRef<uint8_t> data) {
return DecodeLinear<TextureDecoderRGB5A3>(width, data);
}
}
} // namespace aurora::gfx
} // namespace aurora::gfx

View File

@ -6,6 +6,8 @@
#include <array>
#include <cassert>
#include <cstdint>
#include <type_traits>
#include <vector>
using namespace std::string_view_literals;
@ -21,6 +23,46 @@ using namespace std::string_view_literals;
#endif
#endif
template <typename T>
requires(sizeof(T) == sizeof(uint16_t) && std::is_arithmetic_v<T>)
constexpr T bswap(T val) noexcept {
union {
uint16_t u;
T t;
} v{.t = val};
#if __GNUC__
v.u = __builtin_bswap16(v.u);
#elif _WIN32
v.u = _byteswap_ushort(v.u);
#else
v.u = (v.u << 8) | ((v.u >> 8) & 0xFF);
#endif
return v.t;
}
template <typename T>
requires(sizeof(T) == sizeof(uint32_t) && std::is_arithmetic_v<T>)
constexpr T bswap(T val) noexcept {
union {
uint32_t u;
T t;
} v{.t = val};
#if __GNUC__
v.u = __builtin_bswap32(v.u);
#elif _WIN32
v.u = _byteswap_ulong(v.u);
#else
v.u = ((v.u & 0x0000FFFF) << 16) | ((v.u & 0xFFFF0000) >> 16) | ((v.u & 0x00FF00FF) << 8) | ((v.u & 0xFF00FF00) >> 8);
#endif
return v.t;
}
template <typename T>
requires(std::is_enum_v<T>)
auto underlying(T value) -> std::underlying_type_t<T> {
return static_cast<std::underlying_type_t<T>>(value);
}
#ifndef ALIGN
#define ALIGN(x, a) (((x) + ((a) - 1)) & ~((a) - 1))
#endif
@ -33,11 +75,7 @@ using namespace std::string_view_literals;
#else
#define UNLIKELY
#endif
#define FATAL(msg, ...) \
{ \
Log.fatal(msg, ##__VA_ARGS__); \
unreachable(); \
}
#define FATAL(msg, ...) Log.fatal(msg, ##__VA_ARGS__);
#define ASSERT(cond, msg, ...) \
if (!(cond)) \
UNLIKELY FATAL(msg, ##__VA_ARGS__)

View File

@ -4,15 +4,9 @@
#include <fmt/base.h>
#include <fmt/format.h>
#include <string_view>
#ifdef __GNUC__
[[noreturn]] inline __attribute__((always_inline)) void unreachable() { __builtin_unreachable(); }
#elif defined(_MSC_VER)
[[noreturn]] __forceinline void unreachable() { __assume(false); }
#else
#error Unknown compiler
#endif
#include <cstdlib>
#include <string_view>
namespace aurora {
void log_internal(AuroraLogLevel level, const char* module, const char* message, unsigned int len) noexcept;
@ -50,7 +44,7 @@ struct Module {
template <typename... T>
[[noreturn]] void fatal(fmt::format_string<T...> fmt, T&&... args) noexcept {
report(LOG_FATAL, fmt, std::forward<T>(args)...);
unreachable();
std::abort();
}
};
} // namespace aurora

View File

@ -385,15 +385,12 @@ bool initialize(AuroraBackend auroraBackend) {
g_adapter.GetLimits(&supportedLimits);
const wgpu::Limits requiredLimits{
// Use "best" supported alignments
.maxTextureDimension1D = supportedLimits.maxTextureDimension1D == 0
? WGPU_LIMIT_U32_UNDEFINED
: supportedLimits.maxTextureDimension1D,
.maxTextureDimension2D = supportedLimits.maxTextureDimension2D == 0
? WGPU_LIMIT_U32_UNDEFINED
: supportedLimits.maxTextureDimension2D,
.maxTextureDimension3D = supportedLimits.maxTextureDimension3D == 0
? WGPU_LIMIT_U32_UNDEFINED
: supportedLimits.maxTextureDimension3D,
.maxTextureDimension1D = supportedLimits.maxTextureDimension1D == 0 ? WGPU_LIMIT_U32_UNDEFINED
: supportedLimits.maxTextureDimension1D,
.maxTextureDimension2D = supportedLimits.maxTextureDimension2D == 0 ? WGPU_LIMIT_U32_UNDEFINED
: supportedLimits.maxTextureDimension2D,
.maxTextureDimension3D = supportedLimits.maxTextureDimension3D == 0 ? WGPU_LIMIT_U32_UNDEFINED
: supportedLimits.maxTextureDimension3D,
.minUniformBufferOffsetAlignment = supportedLimits.minUniformBufferOffsetAlignment == 0
? WGPU_LIMIT_U32_UNDEFINED
: supportedLimits.minUniformBufferOffsetAlignment,
@ -401,6 +398,12 @@ bool initialize(AuroraBackend auroraBackend) {
? WGPU_LIMIT_U32_UNDEFINED
: supportedLimits.minStorageBufferOffsetAlignment,
};
Log.info(
"Using limits\n maxTextureDimension1D: {}\n maxTextureDimension2D: {}\n maxTextureDimension3D: {}\n "
"minUniformBufferOffsetAlignment: {}\n minStorageBufferOffsetAlignment: {}",
requiredLimits.maxTextureDimension1D, requiredLimits.maxTextureDimension2D,
requiredLimits.maxTextureDimension3D, requiredLimits.minUniformBufferOffsetAlignment,
requiredLimits.minStorageBufferOffsetAlignment);
std::vector<wgpu::FeatureName> requiredFeatures;
wgpu::SupportedFeatures supportedFeatures;
g_adapter.GetFeatures(&supportedFeatures);
@ -442,22 +445,20 @@ bool initialize(AuroraBackend auroraBackend) {
});
deviceDescriptor.SetUncapturedErrorCallback(
[](const wgpu::Device& device, wgpu::ErrorType type, wgpu::StringView message) {
FATAL("WebGPU error {}: {}", static_cast<int>(type), message);
});
deviceDescriptor.SetDeviceLostCallback(
wgpu::CallbackMode::AllowSpontaneous,
[](const wgpu::Device& device, wgpu::DeviceLostReason reason, wgpu::StringView message) {
Log.warn("Device lost: {}", message);
});
const auto future = g_adapter.RequestDevice(
&deviceDescriptor, wgpu::CallbackMode::WaitAnyOnly,
[](wgpu::RequestDeviceStatus status, wgpu::Device device, wgpu::StringView message) {
if (status == wgpu::RequestDeviceStatus::Success) {
g_device = std::move(device);
} else {
Log.warn("Device request failed: {}", message);
}
FATAL("WebGPU error {}: {}", underlying(type), message);
});
deviceDescriptor.SetDeviceLostCallback(wgpu::CallbackMode::AllowSpontaneous,
[](const wgpu::Device& device, wgpu::DeviceLostReason reason,
wgpu::StringView message) { Log.warn("Device lost: {}", message); });
const auto future =
g_adapter.RequestDevice(&deviceDescriptor, wgpu::CallbackMode::WaitAnyOnly,
[](wgpu::RequestDeviceStatus status, wgpu::Device device, wgpu::StringView message) {
if (status == wgpu::RequestDeviceStatus::Success) {
g_device = std::move(device);
} else {
Log.warn("Device request failed: {}", message);
}
});
const auto status = g_instance.WaitAny(future, 5000000000);
if (status != wgpu::WaitStatus::Success) {
Log.error("Failed to create device: {}", magic_enum::enum_name(status));