mirror of
https://github.com/encounter/aurora.git
synced 2025-07-05 04:35:55 +00:00
Rewrite attribute buffer, matrix & stream handling
Now array attributes (GXSetArray) will be properly fetched based on the vertex format. Buffers are still assumed to be byte-swapped to little-endian. Stream handling completely redone and many issues resolved. Eliminates matrix transposes. AURORA_NATIVE_MATRIX is no longer necessary and removed.
This commit is contained in:
parent
3316ad9a7f
commit
a600b0b84c
@ -3,8 +3,6 @@ project(aurora LANGUAGES C CXX)
|
||||
set(CMAKE_C_STANDARD 11)
|
||||
set(CMAKE_CXX_STANDARD 20)
|
||||
|
||||
option(AURORA_NATIVE_MATRIX "Assume OpenGL-layout matrices, disables transposing" OFF)
|
||||
|
||||
add_subdirectory(extern)
|
||||
|
||||
include(cmake/aurora_core.cmake)
|
||||
|
@ -4,7 +4,6 @@ add_library(aurora_gx STATIC
|
||||
lib/gfx/gx.cpp
|
||||
lib/gfx/gx_shader.cpp
|
||||
lib/gfx/texture_convert.cpp
|
||||
lib/gfx/stream/shader.cpp
|
||||
lib/gfx/model/shader.cpp
|
||||
lib/dolphin/gx/GXBump.cpp
|
||||
lib/dolphin/gx/GXCull.cpp
|
||||
@ -28,9 +27,6 @@ add_library(aurora::gx ALIAS aurora_gx)
|
||||
|
||||
target_link_libraries(aurora_gx PUBLIC aurora::core xxhash)
|
||||
target_link_libraries(aurora_gx PRIVATE absl::btree absl::flat_hash_map)
|
||||
if (AURORA_NATIVE_MATRIX)
|
||||
target_compile_definitions(aurora_gx PRIVATE AURORA_NATIVE_MATRIX)
|
||||
endif ()
|
||||
if (EMSCRIPTEN)
|
||||
target_link_options(aurora_gx PUBLIC -sUSE_WEBGPU=1 -sASYNCIFY -sEXIT_RUNTIME)
|
||||
target_compile_definitions(aurora_gx PRIVATE ENABLE_BACKEND_WEBGPU)
|
||||
|
@ -35,9 +35,6 @@ struct Vec2 {
|
||||
constexpr Vec2() = default;
|
||||
constexpr Vec2(T x, T y) : x(x), y(y) {}
|
||||
AURORA_VEC2_EXTRA
|
||||
#ifdef METAFORCE
|
||||
constexpr Vec2(const zeus::CVector2f& vec) : x(vec.x()), y(vec.y()) {}
|
||||
#endif
|
||||
|
||||
bool operator==(const Vec2& rhs) const { return x == rhs.x && y == rhs.y; }
|
||||
bool operator!=(const Vec2& rhs) const { return !(*this == rhs); }
|
||||
@ -51,10 +48,6 @@ struct Vec3 {
|
||||
constexpr Vec3() = default;
|
||||
constexpr Vec3(T x, T y, T z) : x(x), y(y), z(z) {}
|
||||
AURORA_VEC3_EXTRA
|
||||
#ifdef METAFORCE
|
||||
constexpr Vec3(const zeus::CVector3f& vec) : x(vec.x()), y(vec.y()), z(vec.z()) {}
|
||||
operator zeus::CVector3f() const { return {x, y, z}; }
|
||||
#endif
|
||||
|
||||
bool operator==(const Vec3& rhs) const { return x == rhs.x && y == rhs.y && z == rhs.z; }
|
||||
bool operator!=(const Vec3& rhs) const { return !(*this == rhs); }
|
||||
@ -77,10 +70,6 @@ struct Vec4 {
|
||||
// For Vec3 -> Vec4
|
||||
constexpr Vec4(Vec3<T> v, T w) : m{v.x, v.y, v.z, w} {}
|
||||
AURORA_VEC4_EXTRA
|
||||
#ifdef METAFORCE
|
||||
constexpr Vec4(const zeus::CVector4f& vec) : x(vec.x()), y(vec.y()), z(vec.z()), w(vec.w()) {}
|
||||
constexpr Vec4(const zeus::CColor& color) : x(color.r()), y(color.g()), z(color.b()), w(color.a()) {}
|
||||
#endif
|
||||
|
||||
inline Vec4& operator=(const Vec4& other) {
|
||||
memcpy(&m, &other.m, sizeof(Vt));
|
||||
@ -119,7 +108,7 @@ struct Vec4 {
|
||||
bool operator!=(const Vec4& rhs) const { return !(*this == rhs); }
|
||||
};
|
||||
template <typename T>
|
||||
[[nodiscard]] inline Vec4<T> operator+(const Vec4<T>& a, const Vec4<T>& b) {
|
||||
[[nodiscard]] Vec4<T> operator+(const Vec4<T>& a, const Vec4<T>& b) {
|
||||
#ifdef USE_GCC_VECTOR_EXTENSIONS
|
||||
return a.m + b.m;
|
||||
#else
|
||||
@ -127,7 +116,7 @@ template <typename T>
|
||||
#endif
|
||||
}
|
||||
template <typename T>
|
||||
[[nodiscard]] inline Vec4<T> operator*(const Vec4<T>& a, const Vec4<T>& b) {
|
||||
[[nodiscard]] Vec4<T> operator*(const Vec4<T>& a, const Vec4<T>& b) {
|
||||
#ifdef USE_GCC_VECTOR_EXTENSIONS
|
||||
return a.m * b.m;
|
||||
#else
|
||||
@ -170,6 +159,18 @@ struct Mat4x2 {
|
||||
bool operator!=(const Mat4x2& rhs) const { return !(*this == rhs); }
|
||||
};
|
||||
template <typename T>
|
||||
struct Mat2x4 {
|
||||
Vec4<T> m0{};
|
||||
Vec4<T> m1{};
|
||||
|
||||
constexpr Mat2x4() = default;
|
||||
constexpr Mat2x4(const Vec4<T>& m0, const Vec4<T>& m1, const Vec4<T>& m2) : m0(m0), m1(m1) {}
|
||||
|
||||
bool operator==(const Mat2x4& rhs) const { return m0 == rhs.m0 && m1 == rhs.m1; }
|
||||
bool operator!=(const Mat2x4& rhs) const { return !(*this == rhs); }
|
||||
};
|
||||
static_assert(sizeof(Mat2x4<float>) == 32);
|
||||
template <typename T>
|
||||
struct Mat4x4;
|
||||
template <typename T>
|
||||
struct Mat3x4 {
|
||||
@ -180,10 +181,13 @@ struct Mat3x4 {
|
||||
constexpr Mat3x4() = default;
|
||||
constexpr Mat3x4(const Vec4<T>& m0, const Vec4<T>& m1, const Vec4<T>& m2) : m0(m0), m1(m1), m2(m2) {}
|
||||
|
||||
inline Mat4x4<T> to4x4() const;
|
||||
inline Mat4x4<T> toTransposed4x4() const;
|
||||
[[nodiscard]] Mat4x4<T> to4x4() const;
|
||||
[[nodiscard]] Mat4x4<T> toTransposed4x4() const;
|
||||
|
||||
bool operator==(const Mat3x4& rhs) const { return m0 == rhs.m0 && m1 == rhs.m1 && m2 == rhs.m2; }
|
||||
bool operator!=(const Mat3x4& rhs) const { return !(*this == rhs); }
|
||||
};
|
||||
static_assert(sizeof(Mat3x4<float>) == sizeof(float[3][4]));
|
||||
static_assert(sizeof(Mat3x4<float>) == 48);
|
||||
template <typename T>
|
||||
struct Mat4x4 {
|
||||
Vec4<T> m0{};
|
||||
@ -195,10 +199,6 @@ struct Mat4x4 {
|
||||
constexpr Mat4x4(const Vec4<T>& m0, const Vec4<T>& m1, const Vec4<T>& m2, const Vec4<T>& m3)
|
||||
: m0(m0), m1(m1), m2(m2), m3(m3) {}
|
||||
AURORA_MAT4X4_EXTRA
|
||||
#ifdef METAFORCE
|
||||
constexpr Mat4x4(const zeus::CMatrix4f& m) : m0(m[0]), m1(m[1]), m2(m[2]), m3(m[3]) {}
|
||||
constexpr Mat4x4(const zeus::CTransform& m) : Mat4x4(m.toMatrix4f()) {}
|
||||
#endif
|
||||
|
||||
[[nodiscard]] Mat4x4 transpose() const {
|
||||
return {
|
||||
@ -208,23 +208,17 @@ struct Mat4x4 {
|
||||
{m0[3], m1[3], m2[3], m3[3]},
|
||||
};
|
||||
}
|
||||
inline Mat4x4& operator=(const Mat4x4& other) {
|
||||
m0 = other.m0;
|
||||
m1 = other.m1;
|
||||
m2 = other.m2;
|
||||
m3 = other.m3;
|
||||
return *this;
|
||||
}
|
||||
Mat4x4& operator=(const Mat4x4& other) = default;
|
||||
|
||||
inline Vec4<T>& operator[](size_t i) { return *(&m0 + i); }
|
||||
inline const Vec4<T>& operator[](size_t i) const { return *(&m0 + i); }
|
||||
Vec4<T>& operator[](size_t i) { return *(&m0 + i); }
|
||||
const Vec4<T>& operator[](size_t i) const { return *(&m0 + i); }
|
||||
|
||||
bool operator==(const Mat4x4& rhs) const { return m0 == rhs.m0 && m1 == rhs.m1 && m2 == rhs.m2 && m3 == rhs.m3; }
|
||||
bool operator!=(const Mat4x4& rhs) const { return !(*this == rhs); }
|
||||
};
|
||||
static_assert(sizeof(Mat4x4<float>) == sizeof(float[4][4]));
|
||||
static_assert(sizeof(Mat4x4<float>) == 64);
|
||||
template <typename T>
|
||||
[[nodiscard]] inline Mat4x4<T> operator*(const Mat4x4<T>& a, const Mat4x4<T>& b) {
|
||||
[[nodiscard]] Mat4x4<T> operator*(const Mat4x4<T>& a, const Mat4x4<T>& b) {
|
||||
Mat4x4<T> out;
|
||||
for (size_t i = 0; i < 4; ++i) {
|
||||
*(&out.m0 + i) = a.m0 * b[i].template shuffle<0, 0, 0, 0>() + a.m1 * b[i].template shuffle<1, 1, 1, 1>() +
|
||||
@ -233,28 +227,27 @@ template <typename T>
|
||||
return out;
|
||||
}
|
||||
template <typename T>
|
||||
[[nodiscard]] inline Mat4x4<T> Mat3x4<T>::to4x4() const {
|
||||
[[nodiscard]] Mat4x4<T> Mat3x4<T>::to4x4() const {
|
||||
return {
|
||||
{m0.m[0], m0.m[1], m0.m[2], 0.f},
|
||||
{m1.m[0], m1.m[1], m1.m[2], 0.f},
|
||||
{m2.m[0], m2.m[1], m2.m[2], 0.f},
|
||||
{m0.m[3], m1.m[3], m2.m[3], 1.f},
|
||||
{m0[0], m0[1], m0[2], 0.f},
|
||||
{m1[0], m1[1], m1[2], 0.f},
|
||||
{m2[0], m2[1], m2[2], 0.f},
|
||||
{m0[3], m1[3], m2[3], 1.f},
|
||||
};
|
||||
}
|
||||
template <typename T>
|
||||
[[nodiscard]] inline Mat4x4<T> Mat3x4<T>::toTransposed4x4() const {
|
||||
[[nodiscard]] Mat4x4<T> Mat3x4<T>::toTransposed4x4() const {
|
||||
return Mat4x4<T>{
|
||||
m0,
|
||||
m1,
|
||||
m2,
|
||||
{0.f, 0.f, 0.f, 1.f},
|
||||
}
|
||||
.transpose();
|
||||
{m0[0], m1[0], m2[0], 0.f},
|
||||
{m0[1], m1[1], m2[1], 0.f},
|
||||
{m0[2], m1[2], m2[2], 0.f},
|
||||
{m0[3], m1[3], m2[3], 1.f},
|
||||
};
|
||||
}
|
||||
constexpr Mat4x4<float> Mat4x4_Identity{
|
||||
Vec4<float>{1.f, 0.f, 0.f, 0.f},
|
||||
Vec4<float>{0.f, 1.f, 0.f, 0.f},
|
||||
Vec4<float>{0.f, 0.f, 1.f, 0.f},
|
||||
Vec4<float>{0.f, 0.f, 0.f, 1.f},
|
||||
constexpr Mat4x4 Mat4x4_Identity{
|
||||
Vec4{1.f, 0.f, 0.f, 0.f},
|
||||
Vec4{0.f, 1.f, 0.f, 0.f},
|
||||
Vec4{0.f, 0.f, 1.f, 0.f},
|
||||
Vec4{0.f, 0.f, 0.f, 1.f},
|
||||
};
|
||||
} // namespace aurora
|
||||
|
@ -68,11 +68,11 @@ void GXTexCoord2s16(s16 s, s16 t);
|
||||
void GXTexCoord2u8(u8 s, u8 t);
|
||||
void GXTexCoord2s8(s8 s, s8 t);
|
||||
|
||||
void GXTexCoord1f32(f32 s, f32 t);
|
||||
void GXTexCoord1u16(u16 s, u16 t);
|
||||
void GXTexCoord1s16(s16 s, s16 t);
|
||||
void GXTexCoord1u8(u8 s, u8 t);
|
||||
void GXTexCoord1s8(s8 s, s8 t);
|
||||
void GXTexCoord1f32(f32 s);
|
||||
void GXTexCoord1u16(u16 s);
|
||||
void GXTexCoord1s16(s16 s);
|
||||
void GXTexCoord1u8(u8 s);
|
||||
void GXTexCoord1s8(s8 s);
|
||||
|
||||
void GXTexCoord1x16(u16 index);
|
||||
void GXTexCoord1x8(u8 index);
|
||||
|
@ -7,7 +7,6 @@ extern "C" {
|
||||
void GXSetVtxDesc(GXAttr attr, GXAttrType type) { update_gx_state(g_gxState.vtxDesc[attr], type); }
|
||||
|
||||
void GXSetVtxDescv(GXVtxDescList* list) {
|
||||
g_gxState.vtxDesc.fill({});
|
||||
while (list->attr != GX_VA_NULL) {
|
||||
update_gx_state(g_gxState.vtxDesc[list->attr], list->type);
|
||||
++list;
|
||||
@ -17,8 +16,8 @@ void GXSetVtxDescv(GXVtxDescList* list) {
|
||||
void GXClearVtxDesc() { g_gxState.vtxDesc.fill({}); }
|
||||
|
||||
void GXSetVtxAttrFmt(GXVtxFmt vtxfmt, GXAttr attr, GXCompCnt cnt, GXCompType type, u8 frac) {
|
||||
CHECK(vtxfmt >= GX_VTXFMT0 && vtxfmt < GX_MAX_VTXFMT, "invalid vtxfmt {}", static_cast<int>(vtxfmt));
|
||||
CHECK(attr >= GX_VA_PNMTXIDX && attr < GX_VA_MAX_ATTR, "invalid attr {}", static_cast<int>(attr));
|
||||
CHECK(vtxfmt >= GX_VTXFMT0 && vtxfmt < GX_MAX_VTXFMT, "invalid vtxfmt {}", underlying(vtxfmt));
|
||||
CHECK(attr >= GX_VA_PNMTXIDX && attr < GX_VA_MAX_ATTR, "invalid attr {}", underlying(attr));
|
||||
auto& fmt = g_gxState.vtxFmts[vtxfmt].attrs[attr];
|
||||
update_gx_state(fmt.cnt, cnt);
|
||||
update_gx_state(fmt.type, type);
|
||||
@ -38,7 +37,7 @@ void GXSetArray(GXAttr attr, const void* data, u32 size, u8 stride) {
|
||||
// TODO move GXBegin, GXEnd here
|
||||
|
||||
void GXSetTexCoordGen2(GXTexCoordID dst, GXTexGenType type, GXTexGenSrc src, u32 mtx, GXBool normalize, u32 postMtx) {
|
||||
CHECK(dst >= GX_TEXCOORD0 && dst <= GX_TEXCOORD7, "invalid tex coord {}", static_cast<int>(dst));
|
||||
CHECK(dst >= GX_TEXCOORD0 && dst <= GX_TEXCOORD7, "invalid tex coord {}", underlying(dst));
|
||||
update_gx_state(g_gxState.tcgs[dst],
|
||||
{type, src, static_cast<GXTexMtx>(mtx), static_cast<GXPTTexMtx>(postMtx), normalize});
|
||||
}
|
||||
|
@ -20,7 +20,7 @@ void GXGetVtxAttrFmt(GXVtxFmt idx, GXAttr attr, GXCompCnt* compCnt, GXCompType*
|
||||
// TODO GXGetViewportv
|
||||
|
||||
void GXGetProjectionv(f32* p) {
|
||||
const auto& mtx = g_gxState.origProj;
|
||||
const auto& mtx = g_gxState.proj;
|
||||
p[0] = static_cast<float>(g_gxState.projType);
|
||||
p[1] = mtx.m0[0];
|
||||
p[3] = mtx.m1[1];
|
||||
|
@ -4,15 +4,8 @@ extern "C" {
|
||||
|
||||
void GXSetProjection(const void* mtx_, GXProjectionType type) {
|
||||
const auto& mtx = *reinterpret_cast<const aurora::Mat4x4<float>*>(mtx_);
|
||||
g_gxState.origProj = mtx;
|
||||
g_gxState.projType = type;
|
||||
update_gx_state(g_gxState.proj,
|
||||
#ifdef AURORA_NATIVE_MATRIX
|
||||
mtx
|
||||
#else
|
||||
mtx.transpose()
|
||||
#endif
|
||||
);
|
||||
update_gx_state(g_gxState.proj, mtx);
|
||||
}
|
||||
|
||||
// TODO GXSetProjectionv
|
||||
@ -20,13 +13,8 @@ void GXSetProjection(const void* mtx_, GXProjectionType type) {
|
||||
void GXLoadPosMtxImm(const void* mtx_, u32 id) {
|
||||
CHECK(id >= GX_PNMTX0 && id <= GX_PNMTX9, "invalid pn mtx {}", static_cast<int>(id));
|
||||
auto& state = g_gxState.pnMtx[id / 3];
|
||||
#ifdef AURORA_NATIVE_MATRIX
|
||||
const auto& mtx = *reinterpret_cast<const aurora::Mat4x4<float>*>(mtx_);
|
||||
const auto& mtx = *reinterpret_cast<const aurora::Mat3x4<float>*>(mtx_);
|
||||
update_gx_state(state.pos, mtx);
|
||||
#else
|
||||
const auto* mtx = reinterpret_cast<const aurora::Mat3x4<float>*>(mtx_);
|
||||
update_gx_state(state.pos, mtx->toTransposed4x4());
|
||||
#endif
|
||||
}
|
||||
|
||||
// TODO GXLoadPosMtxIndx
|
||||
@ -34,56 +22,37 @@ void GXLoadPosMtxImm(const void* mtx_, u32 id) {
|
||||
void GXLoadNrmMtxImm(const void* mtx_, u32 id) {
|
||||
CHECK(id >= GX_PNMTX0 && id <= GX_PNMTX9, "invalid pn mtx {}", static_cast<int>(id));
|
||||
auto& state = g_gxState.pnMtx[id / 3];
|
||||
#ifdef AURORA_NATIVE_MATRIX
|
||||
const auto& mtx = *reinterpret_cast<const aurora::Mat4x4<float>*>(mtx_);
|
||||
const auto& mtx = *reinterpret_cast<const aurora::Mat3x4<float>*>(mtx_);
|
||||
update_gx_state(state.nrm, mtx);
|
||||
#else
|
||||
const auto* mtx = reinterpret_cast<const aurora::Mat3x4<float>*>(mtx_);
|
||||
update_gx_state(state.nrm, mtx->toTransposed4x4());
|
||||
#endif
|
||||
}
|
||||
|
||||
// TODO GXLoadNrmMtxImm3x3
|
||||
// TODO GXLoadNrmMtxIndx3x3
|
||||
|
||||
void GXSetCurrentMtx(u32 id) {
|
||||
CHECK(id >= GX_PNMTX0 && id <= GX_PNMTX9, "invalid pn mtx {}", static_cast<int>(id));
|
||||
CHECK(id >= GX_PNMTX0 && id <= GX_PNMTX9, "invalid pn mtx {}", id);
|
||||
update_gx_state(g_gxState.currentPnMtx, id / 3);
|
||||
}
|
||||
|
||||
void GXLoadTexMtxImm(const void* mtx_, u32 id, GXTexMtxType type) {
|
||||
CHECK((id >= GX_TEXMTX0 && id <= GX_IDENTITY) || (id >= GX_PTTEXMTX0 && id <= GX_PTIDENTITY), "invalid tex mtx {}",
|
||||
static_cast<int>(id));
|
||||
id);
|
||||
if (id >= GX_PTTEXMTX0) {
|
||||
CHECK(type == GX_MTX3x4, "invalid pt mtx type {}", static_cast<int>(type));
|
||||
CHECK(type == GX_MTX3x4, "invalid pt mtx type {}", underlying(type));
|
||||
const auto idx = (id - GX_PTTEXMTX0) / 3;
|
||||
#ifdef AURORA_NATIVE_MATRIX
|
||||
const auto& mtx = *reinterpret_cast<const aurora::Mat4x4<float>*>(mtx_);
|
||||
update_gx_state<aurora::Mat4x4<float>>(g_gxState.ptTexMtxs[idx], mtx);
|
||||
#else
|
||||
const auto& mtx = *reinterpret_cast<const aurora::Mat3x4<float>*>(mtx_);
|
||||
update_gx_state<aurora::Mat4x4<float>>(g_gxState.ptTexMtxs[idx], mtx.toTransposed4x4());
|
||||
#endif
|
||||
update_gx_state(g_gxState.ptTexMtxs[idx], mtx);
|
||||
} else {
|
||||
const auto idx = (id - GX_TEXMTX0) / 3;
|
||||
switch (type) {
|
||||
case GX_MTX3x4: {
|
||||
#ifdef AURORA_NATIVE_MATRIX
|
||||
const auto& mtx = *reinterpret_cast<const aurora::Mat4x4<float>*>(mtx_);
|
||||
update_gx_state<aurora::gfx::gx::TexMtxVariant>(g_gxState.texMtxs[idx], mtx);
|
||||
#else
|
||||
const auto& mtx = *reinterpret_cast<const aurora::Mat3x4<float>*>(mtx_);
|
||||
update_gx_state<aurora::gfx::gx::TexMtxVariant>(g_gxState.texMtxs[idx], mtx.toTransposed4x4());
|
||||
#endif
|
||||
update_gx_state<aurora::gfx::gx::TexMtxVariant>(g_gxState.texMtxs[idx], mtx);
|
||||
break;
|
||||
}
|
||||
case GX_MTX2x4: {
|
||||
const auto& mtx = *reinterpret_cast<const aurora::Mat4x2<float>*>(mtx_);
|
||||
#ifdef AURORA_NATIVE_MATRIX
|
||||
const auto& mtx = *reinterpret_cast<const aurora::Mat2x4<float>*>(mtx_);
|
||||
update_gx_state<aurora::gfx::gx::TexMtxVariant>(g_gxState.texMtxs[idx], mtx);
|
||||
#else
|
||||
update_gx_state<aurora::gfx::gx::TexMtxVariant>(g_gxState.texMtxs[idx], mtx.transpose());
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -1,47 +1,113 @@
|
||||
#include "gx.hpp"
|
||||
|
||||
#include "../../gfx/stream/shader.hpp"
|
||||
#include "aurora/math.hpp"
|
||||
#include "../../gfx/model/shader.hpp"
|
||||
#include "../../gfx/gx_fmt.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <optional>
|
||||
|
||||
#ifndef NDEBUG
|
||||
static inline GXAttr next_attr(size_t begin) {
|
||||
auto iter = std::find_if(g_gxState.vtxDesc.begin() + begin, g_gxState.vtxDesc.end(),
|
||||
[](const auto type) { return type != GX_NONE; });
|
||||
if (begin > 0 && iter == g_gxState.vtxDesc.end()) {
|
||||
// wrap around
|
||||
iter = std::find_if(g_gxState.vtxDesc.begin(), g_gxState.vtxDesc.end(),
|
||||
[](const auto type) { return type != GX_NONE; });
|
||||
}
|
||||
return GXAttr(iter - g_gxState.vtxDesc.begin());
|
||||
}
|
||||
#endif
|
||||
struct Attribute {
|
||||
uint32_t offset;
|
||||
GXAttr attr;
|
||||
GXAttrType type;
|
||||
aurora::gfx::gx::VtxAttrFmt fmt;
|
||||
};
|
||||
|
||||
struct SStreamState {
|
||||
GXPrimitive primitive;
|
||||
GXVtxFmt vtxFmt;
|
||||
std::vector<Attribute> attrs;
|
||||
u16 curAttr = 0;
|
||||
u16 vertexCount = 0;
|
||||
u16 vertexStart = 0;
|
||||
u16 vertexStart;
|
||||
u16 vertexSize;
|
||||
aurora::ByteBuffer vertexBuffer;
|
||||
uint8_t* vertexData = nullptr;
|
||||
std::vector<u16> indices;
|
||||
#ifndef NDEBUG
|
||||
GXAttr nextAttr;
|
||||
#endif
|
||||
|
||||
explicit SStreamState(GXPrimitive primitive, GXVtxFmt vtxFmt, u16 numVerts, u16 vertexSize, u16 vertexStart) noexcept
|
||||
: primitive(primitive), vtxFmt(vtxFmt), vertexStart(vertexStart) {
|
||||
vertexBuffer.reserve_extra(size_t(numVerts) * vertexSize);
|
||||
explicit SStreamState(GXPrimitive primitive, GXVtxFmt vtxFmt, std::vector<Attribute> attrs, u16 numVerts,
|
||||
u16 vertexSize, u16 vertexStart) noexcept
|
||||
: primitive(primitive), vtxFmt(vtxFmt), attrs(std::move(attrs)), vertexStart(vertexStart), vertexSize(vertexSize) {
|
||||
vertexBuffer.reserve_extra(static_cast<size_t>(numVerts) * vertexSize);
|
||||
if (numVerts > 3 && (primitive == GX_TRIANGLEFAN || primitive == GX_TRIANGLESTRIP)) {
|
||||
indices.reserve((u32(numVerts) - 3) * 3 + 3);
|
||||
indices.reserve(((static_cast<u32>(numVerts) - 3) * 3) + 3);
|
||||
} else if (numVerts > 4 && primitive == GX_QUADS) {
|
||||
indices.reserve(u32(numVerts) / 4 * 6);
|
||||
indices.reserve(static_cast<u32>(numVerts) / 4 * 6);
|
||||
} else {
|
||||
indices.reserve(numVerts);
|
||||
}
|
||||
#ifndef NDEBUG
|
||||
nextAttr = next_attr(0);
|
||||
#endif
|
||||
}
|
||||
|
||||
[[maybe_unused]] u8 check_direct(GXAttr attr, GXCompCnt cnt, GXCompType type) noexcept {
|
||||
const auto& curAttr = attrs[this->curAttr];
|
||||
ASSERT(curAttr.attr == attr, "bad attribute order: {}, expected {}", attr, curAttr.attr);
|
||||
ASSERT(curAttr.type == GX_DIRECT, "bad attribute type: GX_DIRECT, expected {}", curAttr.type);
|
||||
ASSERT(curAttr.fmt.cnt == cnt, "bad attribute count: {}, expected {}", cnt, curAttr.fmt.cnt);
|
||||
ASSERT(curAttr.fmt.type == type, "bad attribute type: {}, expected {}", type, curAttr.fmt.type);
|
||||
return curAttr.fmt.frac;
|
||||
}
|
||||
|
||||
void check_indexed(GXAttr attr, GXAttrType type) noexcept {
|
||||
const auto& curAttr = attrs[this->curAttr];
|
||||
ASSERT(curAttr.attr == attr, "bad attribute order: {}, expected {}", attr, curAttr.attr);
|
||||
ASSERT(curAttr.type == type, "bad attribute type: {}, expected {}", type, curAttr.type);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void append(const T& value) noexcept {
|
||||
append_data(&value, sizeof(value), attrs[curAttr].offset);
|
||||
next_attribute();
|
||||
}
|
||||
|
||||
private:
|
||||
void append_data(const void* ptr, size_t size, uint32_t offset) {
|
||||
if (vertexData == nullptr) {
|
||||
const auto vertexStart = vertexBuffer.size();
|
||||
vertexBuffer.append_zeroes(vertexSize);
|
||||
vertexData = vertexBuffer.data() + vertexStart;
|
||||
inc_vertex_count();
|
||||
}
|
||||
ASSERT(offset + size <= vertexSize, "bad attribute end: {}, expected {}", offset + size, vertexSize);
|
||||
memcpy(vertexData + offset, ptr, size);
|
||||
}
|
||||
|
||||
void next_attribute() noexcept {
|
||||
curAttr = curAttr + 1;
|
||||
if (curAttr >= attrs.size()) {
|
||||
curAttr = 0;
|
||||
vertexData = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void inc_vertex_count() noexcept {
|
||||
auto curVertex = vertexStart + vertexCount;
|
||||
if (primitive == GX_LINES || primitive == GX_LINESTRIP || primitive == GX_POINTS) {
|
||||
// Currently unsupported, skip
|
||||
return;
|
||||
}
|
||||
if (primitive == GX_TRIANGLES || primitive == GX_TRIANGLESTRIP || vertexCount < 3) {
|
||||
// pass
|
||||
} else if (primitive == GX_TRIANGLEFAN) {
|
||||
indices.push_back(vertexStart);
|
||||
indices.push_back(curVertex - 1);
|
||||
} /*else if (primitive == GX_TRIANGLESTRIP) {
|
||||
if ((vertexCount & 1) == 0) {
|
||||
indices.push_back(curVertex - 2);
|
||||
indices.push_back(curVertex - 1);
|
||||
} else {
|
||||
indices.push_back(curVertex - 1);
|
||||
indices.push_back(curVertex - 2);
|
||||
}
|
||||
}*/
|
||||
else if (primitive == GX_QUADS) {
|
||||
if ((vertexCount & 3) == 3) {
|
||||
indices.push_back(curVertex - 3);
|
||||
indices.push_back(curVertex - 1);
|
||||
}
|
||||
}
|
||||
indices.push_back(curVertex);
|
||||
++vertexCount;
|
||||
}
|
||||
};
|
||||
|
||||
@ -51,228 +117,319 @@ static u16 lastVertexStart = 0;
|
||||
extern "C" {
|
||||
void GXBegin(GXPrimitive primitive, GXVtxFmt vtxFmt, u16 nVerts) {
|
||||
CHECK(!sStreamState, "Stream began twice!");
|
||||
|
||||
uint16_t vertexSize = 0;
|
||||
uint16_t numDirectAttrs = 0;
|
||||
uint16_t numIndexedAttrs = 0;
|
||||
for (GXAttr attr{}; const auto type : g_gxState.vtxDesc) {
|
||||
if (type == GX_DIRECT) {
|
||||
++numDirectAttrs;
|
||||
if (attr == GX_VA_POS || attr == GX_VA_NRM) {
|
||||
vertexSize += 12;
|
||||
} else if (attr == GX_VA_CLR0 || attr == GX_VA_CLR1) {
|
||||
vertexSize += 16;
|
||||
} else if (attr >= GX_VA_TEX0 && attr <= GX_VA_TEX7) {
|
||||
vertexSize += 8;
|
||||
} else UNLIKELY {
|
||||
FATAL("dont know how to handle attr {}", static_cast<int>(attr));
|
||||
}
|
||||
} else
|
||||
UNLIKELY { FATAL("dont know how to handle attr {}", attr); }
|
||||
} else if (type == GX_INDEX8 || type == GX_INDEX16) {
|
||||
vertexSize += 2;
|
||||
++numIndexedAttrs;
|
||||
}
|
||||
attr = GXAttr(attr + 1);
|
||||
attr = static_cast<GXAttr>(attr + 1);
|
||||
}
|
||||
auto [num4xAttr, rem] = std::div(numIndexedAttrs, 4);
|
||||
u32 num2xAttr = 0;
|
||||
if (rem > 2) {
|
||||
++num4xAttr;
|
||||
} else if (rem > 0) {
|
||||
++num2xAttr;
|
||||
}
|
||||
u32 directStart = num4xAttr * 8 + num2xAttr * 4;
|
||||
vertexSize += directStart;
|
||||
|
||||
u32 indexOffset = 0;
|
||||
u32 directOffset = directStart;
|
||||
std::vector<Attribute> attrs;
|
||||
attrs.reserve(numDirectAttrs + numIndexedAttrs);
|
||||
const auto& curVtxFmt = g_gxState.vtxFmts[vtxFmt];
|
||||
for (GXAttr attr{}; const auto type : g_gxState.vtxDesc) {
|
||||
if (type == GX_DIRECT) {
|
||||
u32 attrSize;
|
||||
if (attr == GX_VA_POS || attr == GX_VA_NRM) {
|
||||
attrSize = 12;
|
||||
} else if (attr == GX_VA_CLR0 || attr == GX_VA_CLR1) {
|
||||
attrSize = 16;
|
||||
} else if (attr >= GX_VA_TEX0 && attr <= GX_VA_TEX7) {
|
||||
attrSize = 8;
|
||||
} else
|
||||
UNLIKELY { FATAL("dont know how to handle attr {}", attr); }
|
||||
const auto& attrFmt = curVtxFmt.attrs[attr];
|
||||
attrs.emplace_back(directOffset, attr, type, attrFmt);
|
||||
directOffset += attrSize;
|
||||
} else if (type == GX_INDEX8 || type == GX_INDEX16) {
|
||||
attrs.emplace_back(indexOffset, attr, type);
|
||||
indexOffset += 2;
|
||||
}
|
||||
attr = static_cast<GXAttr>(attr + 1);
|
||||
}
|
||||
|
||||
CHECK(vertexSize > 0, "no vtx attributes enabled?");
|
||||
sStreamState.emplace(primitive, vtxFmt, nVerts, vertexSize, g_gxState.stateDirty ? 0 : lastVertexStart);
|
||||
sStreamState.emplace(primitive, vtxFmt, std::move(attrs), nVerts, vertexSize,
|
||||
/*g_gxState.stateDirty ? 0 : lastVertexStart*/ 0);
|
||||
}
|
||||
|
||||
static inline void check_attr_order(GXAttr attr) noexcept {
|
||||
#ifndef NDEBUG
|
||||
CHECK(sStreamState, "Stream not started!");
|
||||
CHECK(sStreamState->nextAttr == attr, "bad attribute order: {}, expected {}", static_cast<int>(attr),
|
||||
static_cast<int>(sStreamState->nextAttr));
|
||||
sStreamState->nextAttr = next_attr(attr + 1);
|
||||
#endif
|
||||
}
|
||||
|
||||
void GXPosition3f32(float x, float y, float z) {
|
||||
check_attr_order(GX_VA_POS);
|
||||
auto& state = *sStreamState;
|
||||
state.vertexBuffer.append(&x, sizeof(float));
|
||||
state.vertexBuffer.append(&y, sizeof(float));
|
||||
state.vertexBuffer.append(&z, sizeof(float));
|
||||
auto curVertex = state.vertexStart + state.vertexCount;
|
||||
if (state.primitive == GX_TRIANGLES || state.vertexCount < 3) {
|
||||
// pass
|
||||
} else if (state.primitive == GX_TRIANGLEFAN) {
|
||||
state.indices.push_back(state.vertexStart);
|
||||
state.indices.push_back(curVertex - 1);
|
||||
} else if (state.primitive == GX_TRIANGLESTRIP) {
|
||||
if ((state.vertexCount & 1) == 0) {
|
||||
state.indices.push_back(curVertex - 2);
|
||||
state.indices.push_back(curVertex - 1);
|
||||
} else {
|
||||
state.indices.push_back(curVertex - 1);
|
||||
state.indices.push_back(curVertex - 2);
|
||||
}
|
||||
} else if (state.primitive == GX_QUADS) {
|
||||
if ((state.vertexCount & 3) == 3) {
|
||||
state.indices.push_back(curVertex - 3);
|
||||
state.indices.push_back(curVertex - 1);
|
||||
}
|
||||
}
|
||||
state.indices.push_back(curVertex);
|
||||
++state.vertexCount;
|
||||
void GXPosition3f32(f32 x, f32 y, f32 z) {
|
||||
sStreamState->check_direct(GX_VA_POS, GX_POS_XYZ, GX_F32);
|
||||
sStreamState->append(aurora::Vec3{x, y, z});
|
||||
}
|
||||
|
||||
void GXPosition3u16(u16 x, u16 y, u16 z) {
|
||||
const auto& attrFmt = g_gxState.vtxFmts[sStreamState->vtxFmt].attrs[GX_VA_POS];
|
||||
GXPosition3f32(
|
||||
static_cast<float>(x) / static_cast<f32>(1 << attrFmt.frac),
|
||||
static_cast<float>(y) / static_cast<f32>(1 << attrFmt.frac),
|
||||
static_cast<float>(z) / static_cast<f32>(1 << attrFmt.frac)
|
||||
);
|
||||
const auto frac = sStreamState->check_direct(GX_VA_POS, GX_POS_XYZ, GX_U16);
|
||||
sStreamState->append(aurora::Vec3{
|
||||
static_cast<f32>(x) / static_cast<f32>(1 << frac),
|
||||
static_cast<f32>(y) / static_cast<f32>(1 << frac),
|
||||
static_cast<f32>(z) / static_cast<f32>(1 << frac),
|
||||
});
|
||||
}
|
||||
|
||||
void GXPosition3s16(s16 x, s16 y, s16 z) {
|
||||
const auto& attrFmt = g_gxState.vtxFmts[sStreamState->vtxFmt].attrs[GX_VA_POS];
|
||||
GXPosition3f32(
|
||||
static_cast<float>(x) / static_cast<f32>(1 << attrFmt.frac),
|
||||
static_cast<float>(y) / static_cast<f32>(1 << attrFmt.frac),
|
||||
static_cast<float>(z) / static_cast<f32>(1 << attrFmt.frac)
|
||||
);
|
||||
const auto frac = sStreamState->check_direct(GX_VA_POS, GX_POS_XYZ, GX_S16);
|
||||
sStreamState->append(aurora::Vec3{
|
||||
static_cast<f32>(x) / static_cast<f32>(1 << frac),
|
||||
static_cast<f32>(y) / static_cast<f32>(1 << frac),
|
||||
static_cast<f32>(z) / static_cast<f32>(1 << frac),
|
||||
});
|
||||
}
|
||||
|
||||
void GXPosition3u8(u8 x, u8 y, u8 z) {
|
||||
const auto& attrFmt = g_gxState.vtxFmts[sStreamState->vtxFmt].attrs[GX_VA_POS];
|
||||
GXPosition3f32(
|
||||
static_cast<float>(x) / static_cast<f32>(1 << attrFmt.frac),
|
||||
static_cast<float>(y) / static_cast<f32>(1 << attrFmt.frac),
|
||||
static_cast<float>(z) / static_cast<f32>(1 << attrFmt.frac)
|
||||
);
|
||||
const auto frac = sStreamState->check_direct(GX_VA_POS, GX_POS_XYZ, GX_U8);
|
||||
sStreamState->append(aurora::Vec3{
|
||||
static_cast<f32>(x) / static_cast<f32>(1 << frac),
|
||||
static_cast<f32>(y) / static_cast<f32>(1 << frac),
|
||||
static_cast<f32>(z) / static_cast<f32>(1 << frac),
|
||||
});
|
||||
}
|
||||
|
||||
void GXPosition3s8(s8 x, s8 y, s8 z) {
|
||||
const auto& attrFmt = g_gxState.vtxFmts[sStreamState->vtxFmt].attrs[GX_VA_POS];
|
||||
GXPosition3f32(
|
||||
static_cast<float>(x) / static_cast<f32>(1 << attrFmt.frac),
|
||||
static_cast<float>(y) / static_cast<f32>(1 << attrFmt.frac),
|
||||
static_cast<float>(z) / static_cast<f32>(1 << attrFmt.frac)
|
||||
);
|
||||
const auto frac = sStreamState->check_direct(GX_VA_POS, GX_POS_XYZ, GX_S8);
|
||||
sStreamState->append(aurora::Vec3{
|
||||
static_cast<f32>(x) / static_cast<f32>(1 << frac),
|
||||
static_cast<f32>(y) / static_cast<f32>(1 << frac),
|
||||
static_cast<f32>(z) / static_cast<f32>(1 << frac),
|
||||
});
|
||||
}
|
||||
|
||||
void GXPosition2f32(float x, float y) {
|
||||
GXPosition3f32(x, y, 0.f);
|
||||
void GXPosition2f32(f32 x, f32 y) {
|
||||
sStreamState->check_direct(GX_VA_POS, GX_POS_XY, GX_F32);
|
||||
sStreamState->append(aurora::Vec3{x, y, 0.f});
|
||||
}
|
||||
|
||||
void GXPosition2u16(u16 x, u16 y) {
|
||||
GXPosition3u16(x, y, 0);
|
||||
const auto frac = sStreamState->check_direct(GX_VA_POS, GX_POS_XY, GX_U16);
|
||||
sStreamState->append(aurora::Vec3{
|
||||
static_cast<f32>(x) / static_cast<f32>(1 << frac),
|
||||
static_cast<f32>(y) / static_cast<f32>(1 << frac),
|
||||
0.f,
|
||||
});
|
||||
}
|
||||
|
||||
void GXPosition2s16(s16 x, s16 y) {
|
||||
GXPosition3s16(x, y, 0);
|
||||
const auto frac = sStreamState->check_direct(GX_VA_POS, GX_POS_XY, GX_S16);
|
||||
sStreamState->append(aurora::Vec3{
|
||||
static_cast<f32>(x) / static_cast<f32>(1 << frac),
|
||||
static_cast<f32>(y) / static_cast<f32>(1 << frac),
|
||||
0.f,
|
||||
});
|
||||
}
|
||||
|
||||
void GXPosition2u8(u8 x, u8 y) {
|
||||
GXPosition3u8(x, y, 0);
|
||||
const auto frac = sStreamState->check_direct(GX_VA_POS, GX_POS_XY, GX_U8);
|
||||
sStreamState->append(aurora::Vec3{
|
||||
static_cast<f32>(x) / static_cast<f32>(1 << frac),
|
||||
static_cast<f32>(y) / static_cast<f32>(1 << frac),
|
||||
0.f,
|
||||
});
|
||||
}
|
||||
|
||||
void GXPosition2s8(s8 x, s8 y) {
|
||||
GXPosition3s8(x, y, 0);
|
||||
const auto frac = sStreamState->check_direct(GX_VA_POS, GX_POS_XY, GX_S8);
|
||||
sStreamState->append(aurora::Vec3{
|
||||
static_cast<f32>(x) / static_cast<f32>(1 << frac),
|
||||
static_cast<f32>(y) / static_cast<f32>(1 << frac),
|
||||
0.f,
|
||||
});
|
||||
}
|
||||
|
||||
void GXPosition1x16(u16 idx) {
|
||||
check_attr_order(GX_VA_POS);
|
||||
// keep aligned
|
||||
if (sStreamState->vertexBuffer.size() % 4 != 0) {
|
||||
sStreamState->vertexBuffer.append_zeroes(4 - (sStreamState->vertexBuffer.size() % 4));
|
||||
}
|
||||
sStreamState->vertexBuffer.append(&idx, 2);
|
||||
sStreamState->check_indexed(GX_VA_POS, GX_INDEX16);
|
||||
sStreamState->append<u16>(idx);
|
||||
}
|
||||
|
||||
void GXPosition1x8(u8 idx) {
|
||||
GXPosition1x16(idx);
|
||||
sStreamState->check_indexed(GX_VA_POS, GX_INDEX8);
|
||||
sStreamState->append<u16>(idx);
|
||||
}
|
||||
|
||||
void GXNormal3f32(float x, float y, float z) {
|
||||
check_attr_order(GX_VA_NRM);
|
||||
sStreamState->vertexBuffer.append(&x, 4);
|
||||
sStreamState->vertexBuffer.append(&y, 4);
|
||||
sStreamState->vertexBuffer.append(&z, 4);
|
||||
void GXNormal3f32(f32 x, f32 y, f32 z) {
|
||||
sStreamState->check_direct(GX_VA_NRM, GX_NRM_XYZ, GX_F32);
|
||||
sStreamState->append(aurora::Vec3{x, y, z});
|
||||
}
|
||||
|
||||
void GXNormal3s16(s16 x, s16 y, s16 z) {
|
||||
const auto& attrFmt = g_gxState.vtxFmts[sStreamState->vtxFmt].attrs[GX_VA_NRM];
|
||||
GXNormal3f32(
|
||||
static_cast<float>(x) / static_cast<f32>(1 << attrFmt.frac),
|
||||
static_cast<float>(y) / static_cast<f32>(1 << attrFmt.frac),
|
||||
static_cast<float>(z) / static_cast<f32>(1 << attrFmt.frac)
|
||||
);
|
||||
const auto frac = sStreamState->check_direct(GX_VA_NRM, GX_NRM_XYZ, GX_S16);
|
||||
sStreamState->append(aurora::Vec3{
|
||||
static_cast<f32>(x) / static_cast<f32>(1 << frac),
|
||||
static_cast<f32>(y) / static_cast<f32>(1 << frac),
|
||||
static_cast<f32>(z) / static_cast<f32>(1 << frac),
|
||||
});
|
||||
}
|
||||
|
||||
void GXNormal3s8(s8 x, s8 y, s8 z) {
|
||||
const auto& attrFmt = g_gxState.vtxFmts[sStreamState->vtxFmt].attrs[GX_VA_NRM];
|
||||
GXNormal3f32(
|
||||
static_cast<float>(x) / static_cast<f32>(1 << attrFmt.frac),
|
||||
static_cast<float>(y) / static_cast<f32>(1 << attrFmt.frac),
|
||||
static_cast<float>(z) / static_cast<f32>(1 << attrFmt.frac)
|
||||
);
|
||||
const auto frac = sStreamState->check_direct(GX_VA_NRM, GX_NRM_XYZ, GX_S8);
|
||||
sStreamState->append(aurora::Vec3{
|
||||
static_cast<f32>(x) / static_cast<f32>(1 << frac),
|
||||
static_cast<f32>(y) / static_cast<f32>(1 << frac),
|
||||
static_cast<f32>(z) / static_cast<f32>(1 << frac),
|
||||
});
|
||||
}
|
||||
|
||||
void GXNormal1x16(u16 idx) {
|
||||
check_attr_order(GX_VA_NRM);
|
||||
// keep aligned
|
||||
if (sStreamState->vertexBuffer.size() % 4 != 0) {
|
||||
sStreamState->vertexBuffer.append_zeroes(4 - (sStreamState->vertexBuffer.size() % 4));
|
||||
}
|
||||
sStreamState->vertexBuffer.append(&idx, 2);
|
||||
void GXNormal1x16(u16 index) {
|
||||
sStreamState->check_indexed(GX_VA_NRM, GX_INDEX16);
|
||||
sStreamState->append<u16>(index);
|
||||
}
|
||||
|
||||
void GXNormal1x8(u8 idx) {
|
||||
GXNormal1x16(idx);
|
||||
void GXNormal1x8(u8 index) {
|
||||
sStreamState->check_indexed(GX_VA_POS, GX_INDEX8);
|
||||
sStreamState->append<u16>(index);
|
||||
}
|
||||
|
||||
void GXColor4f32(float r, float g, float b, float a) {
|
||||
check_attr_order(GX_VA_CLR0);
|
||||
sStreamState->vertexBuffer.append(&r, 4);
|
||||
sStreamState->vertexBuffer.append(&g, 4);
|
||||
sStreamState->vertexBuffer.append(&b, 4);
|
||||
sStreamState->vertexBuffer.append(&a, 4);
|
||||
void GXColor4f32(f32 r, f32 g, f32 b, f32 a) {
|
||||
sStreamState->check_direct(GX_VA_CLR0, GX_CLR_RGBA, GX_RGBA8);
|
||||
sStreamState->append(aurora::Vec4{r, g, b, a});
|
||||
}
|
||||
|
||||
void GXColor4u8(u8 r, u8 g, u8 b, u8 a) {
|
||||
GXColor4f32(static_cast<float>(r) / 255.f, static_cast<float>(g) / 255.f, static_cast<float>(b) / 255.f,
|
||||
static_cast<float>(a) / 255.f);
|
||||
sStreamState->check_direct(GX_VA_CLR0, GX_CLR_RGBA, GX_RGBA8);
|
||||
sStreamState->append(aurora::Vec4{
|
||||
static_cast<f32>(r) / 255.f,
|
||||
static_cast<f32>(g) / 255.f,
|
||||
static_cast<f32>(b) / 255.f,
|
||||
static_cast<f32>(a) / 255.f,
|
||||
});
|
||||
}
|
||||
|
||||
void GXColor3u8(u8 r, u8 g, u8 b) {
|
||||
GXColor4u8(r, g, b, 255);
|
||||
sStreamState->check_direct(GX_VA_CLR0, GX_CLR_RGB, GX_RGB8);
|
||||
sStreamState->append(aurora::Vec4{
|
||||
static_cast<f32>(r) / 255.f,
|
||||
static_cast<f32>(g) / 255.f,
|
||||
static_cast<f32>(b) / 255.f,
|
||||
1.f,
|
||||
});
|
||||
}
|
||||
|
||||
void GXColor1x16(u16 idx) {
|
||||
check_attr_order(GX_VA_CLR0);
|
||||
// keep aligned
|
||||
if (sStreamState->vertexBuffer.size() % 4 != 0) {
|
||||
sStreamState->vertexBuffer.append_zeroes(4 - (sStreamState->vertexBuffer.size() % 4));
|
||||
}
|
||||
sStreamState->vertexBuffer.append(&idx, 2);
|
||||
void GXColor1u32(u32 clr) {
|
||||
sStreamState->check_direct(GX_VA_CLR0, GX_CLR_RGBA, GX_RGBA8);
|
||||
sStreamState->append(aurora::Vec4{
|
||||
static_cast<f32>((clr >> 24) & 0xff) / 255.f,
|
||||
static_cast<f32>((clr >> 16) & 0xff) / 255.f,
|
||||
static_cast<f32>((clr >> 8) & 0xff) / 255.f,
|
||||
static_cast<f32>(clr & 0xff) / 255.f,
|
||||
});
|
||||
}
|
||||
|
||||
void GXColor1x8(u8 idx) {
|
||||
GXColor1x16(idx);
|
||||
void GXColor1u16(u16 clr) {
|
||||
sStreamState->check_direct(GX_VA_CLR0, GX_CLR_RGB, GX_RGB565);
|
||||
sStreamState->append(aurora::Vec4{
|
||||
static_cast<f32>((clr >> 11) & 0x1f) / 31.f,
|
||||
static_cast<f32>((clr >> 5) & 0x3f) / 63.f,
|
||||
static_cast<f32>(clr & 0x1f) / 31.f,
|
||||
1.f,
|
||||
});
|
||||
}
|
||||
|
||||
void GXTexCoord2f32(float u, float v) {
|
||||
check_attr_order(GX_VA_TEX0);
|
||||
sStreamState->vertexBuffer.append(&u, 4);
|
||||
sStreamState->vertexBuffer.append(&v, 4);
|
||||
void GXTexCoord2f32(f32 s, f32 t) {
|
||||
sStreamState->check_direct(GX_VA_TEX0, GX_TEX_ST, GX_F32);
|
||||
sStreamState->append(aurora::Vec2{s, t});
|
||||
}
|
||||
|
||||
void GXTexCoord2u16(u16 s, u16 t) {
|
||||
const auto frac = sStreamState->check_direct(GX_VA_TEX0, GX_TEX_ST, GX_U16);
|
||||
sStreamState->append(aurora::Vec2{
|
||||
static_cast<f32>(s) / static_cast<f32>(1 << frac),
|
||||
static_cast<f32>(t) / static_cast<f32>(1 << frac),
|
||||
});
|
||||
}
|
||||
|
||||
void GXTexCoord2s16(s16 s, s16 t) {
|
||||
const auto& attrFmt = g_gxState.vtxFmts[sStreamState->vtxFmt].attrs[GX_VA_TEX0];
|
||||
GXTexCoord2f32(
|
||||
static_cast<float>(s) / static_cast<f32>(1 << attrFmt.frac),
|
||||
static_cast<float>(t) / static_cast<f32>(1 << attrFmt.frac)
|
||||
);
|
||||
const auto frac = sStreamState->check_direct(GX_VA_TEX0, GX_TEX_ST, GX_S16);
|
||||
sStreamState->append(aurora::Vec2{
|
||||
static_cast<f32>(s) / static_cast<f32>(1 << frac),
|
||||
static_cast<f32>(t) / static_cast<f32>(1 << frac),
|
||||
});
|
||||
}
|
||||
|
||||
void GXTexCoord1x16(u16 idx) {
|
||||
check_attr_order(GX_VA_TEX0);
|
||||
// keep aligned
|
||||
if (sStreamState->vertexBuffer.size() % 4 != 0) {
|
||||
sStreamState->vertexBuffer.append_zeroes(4 - (sStreamState->vertexBuffer.size() % 4));
|
||||
}
|
||||
sStreamState->vertexBuffer.append(&idx, 2);
|
||||
void GXTexCoord2u8(u8 s, u8 t) {
|
||||
const auto frac = sStreamState->check_direct(GX_VA_TEX0, GX_TEX_ST, GX_U8);
|
||||
sStreamState->append(aurora::Vec2{
|
||||
static_cast<f32>(s) / static_cast<f32>(1 << frac),
|
||||
static_cast<f32>(t) / static_cast<f32>(1 << frac),
|
||||
});
|
||||
}
|
||||
|
||||
void GXTexCoord1x8(u8 idx) {
|
||||
GXTexCoord1x16(idx);
|
||||
void GXTexCoord2s8(s8 s, s8 t) {
|
||||
const auto frac = sStreamState->check_direct(GX_VA_TEX0, GX_TEX_ST, GX_S8);
|
||||
sStreamState->append(aurora::Vec2{
|
||||
static_cast<f32>(s) / static_cast<f32>(1 << frac),
|
||||
static_cast<f32>(t) / static_cast<f32>(1 << frac),
|
||||
});
|
||||
}
|
||||
|
||||
void GXTexCoord1f32(f32 s) {
|
||||
sStreamState->check_direct(GX_VA_TEX0, GX_TEX_S, GX_F32);
|
||||
sStreamState->append(aurora::Vec2{s, 0.f});
|
||||
}
|
||||
|
||||
void GXTexCoord1u16(u16 s) {
|
||||
const auto frac = sStreamState->check_direct(GX_VA_TEX0, GX_TEX_S, GX_U16);
|
||||
sStreamState->append(aurora::Vec2{
|
||||
static_cast<f32>(s) / static_cast<f32>(1 << frac),
|
||||
0.f,
|
||||
});
|
||||
}
|
||||
|
||||
void GXTexCoord1s16(s16 s) {
|
||||
const auto frac = sStreamState->check_direct(GX_VA_TEX0, GX_TEX_S, GX_S16);
|
||||
sStreamState->append(aurora::Vec2{
|
||||
static_cast<f32>(s) / static_cast<f32>(1 << frac),
|
||||
0.f,
|
||||
});
|
||||
}
|
||||
|
||||
void GXTexCoord1u8(u8 s) {
|
||||
const auto frac = sStreamState->check_direct(GX_VA_TEX0, GX_TEX_S, GX_U8);
|
||||
sStreamState->append(aurora::Vec2{
|
||||
static_cast<f32>(s) / static_cast<f32>(1 << frac),
|
||||
0.f,
|
||||
});
|
||||
}
|
||||
|
||||
void GXTexCoord1s8(s8 s) {
|
||||
const auto frac = sStreamState->check_direct(GX_VA_TEX0, GX_TEX_S, GX_S8);
|
||||
sStreamState->append(aurora::Vec2{
|
||||
static_cast<f32>(s) / static_cast<f32>(1 << frac),
|
||||
0.f,
|
||||
});
|
||||
}
|
||||
|
||||
void GXTexCoord1x16(u16 index) {
|
||||
sStreamState->check_indexed(GX_VA_TEX0, GX_INDEX16);
|
||||
sStreamState->append(index);
|
||||
}
|
||||
|
||||
void GXTexCoord1x8(u8 index) {
|
||||
sStreamState->check_indexed(GX_VA_TEX0, GX_INDEX8);
|
||||
sStreamState->append(static_cast<u16>(index));
|
||||
}
|
||||
|
||||
void GXEnd() {
|
||||
@ -282,27 +439,55 @@ void GXEnd() {
|
||||
}
|
||||
const auto vertRange = aurora::gfx::push_verts(sStreamState->vertexBuffer.data(), sStreamState->vertexBuffer.size());
|
||||
const auto indexRange = aurora::gfx::push_indices(aurora::ArrayRef{sStreamState->indices});
|
||||
if (g_gxState.stateDirty) {
|
||||
aurora::gfx::stream::PipelineConfig config{};
|
||||
populate_pipeline_config(config, GX_TRIANGLES);
|
||||
const auto info = build_shader_info(config.shaderConfig);
|
||||
const auto pipeline = aurora::gfx::pipeline_ref(config);
|
||||
aurora::gfx::push_draw_command(aurora::gfx::stream::DrawData{
|
||||
.pipeline = pipeline,
|
||||
.vertRange = vertRange,
|
||||
.uniformRange = build_uniform(info),
|
||||
.indexRange = indexRange,
|
||||
.indexCount = static_cast<uint32_t>(sStreamState->indices.size()),
|
||||
.bindGroups = build_bind_groups(info, config.shaderConfig, {}),
|
||||
.dstAlpha = g_gxState.dstAlpha,
|
||||
});
|
||||
} else {
|
||||
aurora::gfx::merge_draw_command(aurora::gfx::stream::DrawData{
|
||||
.vertRange = vertRange,
|
||||
.indexRange = indexRange,
|
||||
.indexCount = static_cast<uint32_t>(sStreamState->indices.size()),
|
||||
});
|
||||
|
||||
aurora::gfx::gx::BindGroupRanges ranges{};
|
||||
for (int i = 0; i < GX_VA_MAX_ATTR; ++i) {
|
||||
if (g_gxState.vtxDesc[i] != GX_INDEX8 && g_gxState.vtxDesc[i] != GX_INDEX16) {
|
||||
continue;
|
||||
}
|
||||
auto& array = g_gxState.arrays[i];
|
||||
if (array.cachedRange.size > 0) {
|
||||
// Use the currently cached range
|
||||
ranges.vaRanges[i] = array.cachedRange;
|
||||
} else {
|
||||
// Push array data to storage and cache range
|
||||
const auto range = aurora::gfx::push_storage(static_cast<const uint8_t*>(array.data), array.size);
|
||||
ranges.vaRanges[i] = range;
|
||||
array.cachedRange = range;
|
||||
}
|
||||
}
|
||||
|
||||
// if (g_gxState.stateDirty) {
|
||||
aurora::gfx::model::PipelineConfig config{};
|
||||
GXPrimitive primitive = GX_TRIANGLES;
|
||||
switch (sStreamState->primitive) {
|
||||
case GX_TRIANGLESTRIP:
|
||||
primitive = GX_TRIANGLESTRIP;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
populate_pipeline_config(config, primitive, sStreamState->vtxFmt);
|
||||
const auto info = build_shader_info(config.shaderConfig);
|
||||
const auto bindGroups = aurora::gfx::gx::build_bind_groups(info, config.shaderConfig, ranges);
|
||||
const auto pipeline = aurora::gfx::pipeline_ref(config);
|
||||
aurora::gfx::push_draw_command(aurora::gfx::model::DrawData{
|
||||
.pipeline = pipeline,
|
||||
.vertRange = vertRange,
|
||||
.idxRange = indexRange,
|
||||
.dataRanges = ranges,
|
||||
.uniformRange = build_uniform(info),
|
||||
.indexCount = static_cast<uint32_t>(sStreamState->indices.size()),
|
||||
.bindGroups = bindGroups,
|
||||
.dstAlpha = g_gxState.dstAlpha,
|
||||
});
|
||||
// } else {
|
||||
// aurora::gfx::merge_draw_command(aurora::gfx::model::DrawData{
|
||||
// .vertRange = vertRange,
|
||||
// .idxRange = indexRange,
|
||||
// .indexCount = static_cast<uint32_t>(sStreamState->indices.size()),
|
||||
// });
|
||||
// }
|
||||
lastVertexStart = sStreamState->vertexStart + sStreamState->vertexCount;
|
||||
sStreamState.reset();
|
||||
}
|
||||
|
@ -3,7 +3,6 @@
|
||||
#include "../internal.hpp"
|
||||
#include "../webgpu/gpu.hpp"
|
||||
#include "model/shader.hpp"
|
||||
#include "stream/shader.hpp"
|
||||
#include "texture.hpp"
|
||||
|
||||
#include <condition_variable>
|
||||
@ -11,7 +10,6 @@
|
||||
#include <fstream>
|
||||
#include <mutex>
|
||||
#include <thread>
|
||||
#include <variant>
|
||||
|
||||
#include <absl/container/flat_hash_map.h>
|
||||
#include <magic_enum.hpp>
|
||||
@ -37,13 +35,11 @@ constexpr uint64_t StagingBufferSize =
|
||||
UniformBufferSize + VertexBufferSize + IndexBufferSize + StorageBufferSize + TextureUploadSize;
|
||||
|
||||
struct ShaderState {
|
||||
stream::State stream;
|
||||
model::State model;
|
||||
};
|
||||
struct ShaderDrawCommand {
|
||||
ShaderType type;
|
||||
union {
|
||||
stream::DrawData stream;
|
||||
model::DrawData model;
|
||||
};
|
||||
};
|
||||
@ -168,10 +164,9 @@ static u32 g_serializedPipelineCount = 0;
|
||||
template <typename PipelineConfig>
|
||||
static void serialize_pipeline_config(ShaderType type, const PipelineConfig& config) {
|
||||
static_assert(std::has_unique_object_representations_v<PipelineConfig>);
|
||||
g_serializedPipelines.append(&type, sizeof(type));
|
||||
const u32 configSize = sizeof(config);
|
||||
g_serializedPipelines.append(&configSize, sizeof(configSize));
|
||||
g_serializedPipelines.append(&config, configSize);
|
||||
g_serializedPipelines.append(type);
|
||||
g_serializedPipelines.append<u32>(sizeof(config));
|
||||
g_serializedPipelines.append(config);
|
||||
++g_serializedPipelineCount;
|
||||
}
|
||||
|
||||
@ -278,33 +273,19 @@ void resolve_pass(TextureHandle texture, ClipRect rect, bool clear, Vec4<float>
|
||||
++g_currentRenderPass;
|
||||
}
|
||||
|
||||
template <>
|
||||
const stream::State& get_state() {
|
||||
return g_state.stream;
|
||||
}
|
||||
|
||||
template <>
|
||||
void push_draw_command(stream::DrawData data) {
|
||||
push_draw_command(ShaderDrawCommand{.type = ShaderType::Stream, .stream = data});
|
||||
}
|
||||
|
||||
template <>
|
||||
void merge_draw_command(stream::DrawData data) {
|
||||
auto& last = get_last_draw_command(ShaderType::Stream).data.draw.stream;
|
||||
CHECK(last.vertRange.offset + last.vertRange.size == data.vertRange.offset, "Invalid vertex merge range: {} -> {}",
|
||||
last.vertRange.offset + last.vertRange.size, data.vertRange.offset);
|
||||
CHECK(last.indexRange.offset + last.indexRange.size == data.indexRange.offset, "Invalid index merge range: {} -> {}",
|
||||
last.indexRange.offset + last.indexRange.size, data.indexRange.offset);
|
||||
last.vertRange.size += data.vertRange.size;
|
||||
last.indexRange.size += data.indexRange.size;
|
||||
last.indexCount += data.indexCount;
|
||||
++g_mergedDrawCallCount;
|
||||
}
|
||||
|
||||
template <>
|
||||
PipelineRef pipeline_ref(stream::PipelineConfig config) {
|
||||
return find_pipeline(ShaderType::Stream, config, [=]() { return create_pipeline(g_state.stream, config); });
|
||||
}
|
||||
// template <>
|
||||
// void merge_draw_command(stream::DrawData data) {
|
||||
// auto& last = get_last_draw_command(ShaderType::Stream).data.draw.stream;
|
||||
// CHECK(last.vertRange.offset + last.vertRange.size == data.vertRange.offset, "Invalid vertex merge range: {} -> {}",
|
||||
// last.vertRange.offset + last.vertRange.size, data.vertRange.offset);
|
||||
// CHECK(last.indexRange.offset + last.indexRange.size == data.indexRange.offset, "Invalid index merge range: {} ->
|
||||
// {}",
|
||||
// last.indexRange.offset + last.indexRange.size, data.indexRange.offset);
|
||||
// last.vertRange.size += data.vertRange.size;
|
||||
// last.indexRange.size += data.indexRange.size;
|
||||
// last.indexCount += data.indexCount;
|
||||
// ++g_mergedDrawCallCount;
|
||||
// }
|
||||
|
||||
template <>
|
||||
void push_draw_command(model::DrawData data) {
|
||||
@ -378,16 +359,6 @@ void load_pipeline_cache() {
|
||||
u32 size = *reinterpret_cast<const u32*>(pipelineCache.data() + offset);
|
||||
offset += sizeof(u32);
|
||||
switch (type) {
|
||||
case ShaderType::Stream: {
|
||||
if (size != sizeof(stream::PipelineConfig)) {
|
||||
break;
|
||||
}
|
||||
const auto config = *reinterpret_cast<const stream::PipelineConfig*>(pipelineCache.data() + offset);
|
||||
if (config.version != gx::GXPipelineConfigVersion) {
|
||||
break;
|
||||
}
|
||||
find_pipeline(type, config, [=]() { return stream::create_pipeline(g_state.stream, config); }, true);
|
||||
} break;
|
||||
case ShaderType::Model: {
|
||||
if (size != sizeof(model::PipelineConfig)) {
|
||||
break;
|
||||
@ -397,9 +368,10 @@ void load_pipeline_cache() {
|
||||
break;
|
||||
}
|
||||
find_pipeline(type, config, [=]() { return model::create_pipeline(g_state.model, config); }, true);
|
||||
} break;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
Log.warn("Unknown pipeline type {}", static_cast<int>(type));
|
||||
Log.warn("Unknown pipeline type {}", underlying(type));
|
||||
break;
|
||||
}
|
||||
offset += size;
|
||||
@ -459,7 +431,6 @@ void initialize() {
|
||||
}
|
||||
map_staging_buffer();
|
||||
|
||||
g_state.stream = stream::construct_state();
|
||||
g_state.model = model::construct_state();
|
||||
|
||||
load_pipeline_cache();
|
||||
@ -581,6 +552,9 @@ void end_frame(const wgpu::CommandEncoder& cmd) {
|
||||
currentStagingBuffer = (currentStagingBuffer + 1) % g_stagingBuffers.size();
|
||||
map_staging_buffer();
|
||||
g_currentRenderPass = UINT32_MAX;
|
||||
for (auto& array : gx::g_gxState.arrays) {
|
||||
array.cachedRange = {};
|
||||
}
|
||||
|
||||
if (!g_hasPipelineThread) {
|
||||
pipeline_worker();
|
||||
@ -612,7 +586,7 @@ void render(wgpu::CommandEncoder& cmd) {
|
||||
.view = webgpu::g_depthBuffer.view,
|
||||
.depthLoadOp = passInfo.clear ? wgpu::LoadOp::Clear : wgpu::LoadOp::Load,
|
||||
.depthStoreOp = wgpu::StoreOp::Store,
|
||||
.depthClearValue = 1.f,
|
||||
.depthClearValue = gx::UseReversedZ ? 0.f : 1.f,
|
||||
};
|
||||
const auto label = fmt::format("Render pass {}", i);
|
||||
const wgpu::RenderPassDescriptor renderPassDescriptor{
|
||||
@ -680,7 +654,9 @@ void render_pass(const wgpu::RenderPassEncoder& pass, u32 idx) {
|
||||
switch (cmd.type) {
|
||||
case CommandType::SetViewport: {
|
||||
const auto& vp = cmd.data.setViewport;
|
||||
pass.SetViewport(vp.left, vp.top, vp.width, vp.height, vp.znear, vp.zfar);
|
||||
const float minDepth = gx::UseReversedZ ? 1.f - vp.zfar : vp.znear;
|
||||
const float maxDepth = gx::UseReversedZ ? 1.f - vp.znear : vp.zfar;
|
||||
pass.SetViewport(vp.left, vp.top, vp.width, vp.height, minDepth, maxDepth);
|
||||
} break;
|
||||
case CommandType::SetScissor: {
|
||||
const auto& sc = cmd.data.setScissor;
|
||||
@ -694,9 +670,6 @@ void render_pass(const wgpu::RenderPassEncoder& pass, u32 idx) {
|
||||
case CommandType::Draw: {
|
||||
const auto& draw = cmd.data.draw;
|
||||
switch (draw.type) {
|
||||
case ShaderType::Stream:
|
||||
stream::render(g_state.stream, draw.stream, pass);
|
||||
break;
|
||||
case ShaderType::Model:
|
||||
model::render(g_state.model, draw.model, pass);
|
||||
break;
|
||||
|
@ -56,8 +56,7 @@ public:
|
||||
ByteBuffer() noexcept = default;
|
||||
explicit ByteBuffer(size_t size) noexcept
|
||||
: m_data(static_cast<uint8_t*>(calloc(1, size))), m_length(size), m_capacity(size) {}
|
||||
explicit ByteBuffer(uint8_t* data, size_t size) noexcept
|
||||
: m_data(data), m_capacity(size), m_owned(false) {}
|
||||
explicit ByteBuffer(uint8_t* data, size_t size) noexcept : m_data(data), m_capacity(size), m_owned(false) {}
|
||||
~ByteBuffer() noexcept {
|
||||
if (m_data != nullptr && m_owned) {
|
||||
free(m_data);
|
||||
@ -98,6 +97,11 @@ public:
|
||||
m_length += size;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void append(const T& obj) {
|
||||
append(&obj, sizeof(T));
|
||||
}
|
||||
|
||||
void append_zeroes(size_t size) {
|
||||
resize(m_length + size, true);
|
||||
m_length += size;
|
||||
@ -179,8 +183,7 @@ struct TextureRef;
|
||||
using TextureHandle = std::shared_ptr<TextureRef>;
|
||||
|
||||
enum class ShaderType : uint8_t {
|
||||
Stream,
|
||||
Model,
|
||||
Model = 1,
|
||||
};
|
||||
|
||||
void initialize();
|
||||
|
115
lib/gfx/gx.cpp
115
lib/gfx/gx.cpp
@ -7,7 +7,6 @@
|
||||
|
||||
#include <absl/container/flat_hash_map.h>
|
||||
#include <cfloat>
|
||||
#include <cmath>
|
||||
|
||||
using aurora::gfx::gx::g_gxState;
|
||||
static aurora::Module Log("aurora::gx");
|
||||
@ -25,7 +24,7 @@ const TextureBind& get_texture(GXTexMapID id) noexcept { return g_gxState.textur
|
||||
|
||||
static inline wgpu::BlendFactor to_blend_factor(GXBlendFactor fac, bool isDst) {
|
||||
switch (fac) {
|
||||
DEFAULT_FATAL("invalid blend factor {}", static_cast<int>(fac));
|
||||
DEFAULT_FATAL("invalid blend factor {}", underlying(fac));
|
||||
case GX_BL_ZERO:
|
||||
return wgpu::BlendFactor::Zero;
|
||||
case GX_BL_ONE:
|
||||
@ -55,21 +54,21 @@ static inline wgpu::BlendFactor to_blend_factor(GXBlendFactor fac, bool isDst) {
|
||||
|
||||
static inline wgpu::CompareFunction to_compare_function(GXCompare func) {
|
||||
switch (func) {
|
||||
DEFAULT_FATAL("invalid depth fn {}", static_cast<int>(func));
|
||||
DEFAULT_FATAL("invalid depth fn {}", underlying(func));
|
||||
case GX_NEVER:
|
||||
return wgpu::CompareFunction::Never;
|
||||
case GX_LESS:
|
||||
return wgpu::CompareFunction::Less;
|
||||
return UseReversedZ ? wgpu::CompareFunction::Greater : wgpu::CompareFunction::Less;
|
||||
case GX_EQUAL:
|
||||
return wgpu::CompareFunction::Equal;
|
||||
case GX_LEQUAL:
|
||||
return wgpu::CompareFunction::LessEqual;
|
||||
return UseReversedZ ? wgpu::CompareFunction::GreaterEqual : wgpu::CompareFunction::LessEqual;
|
||||
case GX_GREATER:
|
||||
return wgpu::CompareFunction::Greater;
|
||||
return UseReversedZ ? wgpu::CompareFunction::Less : wgpu::CompareFunction::Greater;
|
||||
case GX_NEQUAL:
|
||||
return wgpu::CompareFunction::NotEqual;
|
||||
case GX_GEQUAL:
|
||||
return wgpu::CompareFunction::GreaterEqual;
|
||||
return UseReversedZ ? wgpu::CompareFunction::LessEqual : wgpu::CompareFunction::GreaterEqual;
|
||||
case GX_ALWAYS:
|
||||
return wgpu::CompareFunction::Always;
|
||||
}
|
||||
@ -79,7 +78,7 @@ static inline wgpu::BlendState to_blend_state(GXBlendMode mode, GXBlendFactor sr
|
||||
GXLogicOp op, u32 dstAlpha) {
|
||||
wgpu::BlendComponent colorBlendComponent;
|
||||
switch (mode) {
|
||||
DEFAULT_FATAL("unsupported blend mode {}", static_cast<int>(mode));
|
||||
DEFAULT_FATAL("unsupported blend mode {}", underlying(mode));
|
||||
case GX_BM_NONE:
|
||||
colorBlendComponent = {
|
||||
.operation = wgpu::BlendOperation::Add,
|
||||
@ -103,7 +102,7 @@ static inline wgpu::BlendState to_blend_state(GXBlendMode mode, GXBlendFactor sr
|
||||
break;
|
||||
case GX_BM_LOGIC:
|
||||
switch (op) {
|
||||
DEFAULT_FATAL("unsupported logic op {}", static_cast<int>(op));
|
||||
DEFAULT_FATAL("unsupported logic op {}", underlying(op));
|
||||
case GX_LO_CLEAR:
|
||||
colorBlendComponent = {
|
||||
.operation = wgpu::BlendOperation::Add,
|
||||
@ -160,7 +159,7 @@ static inline wgpu::ColorWriteMask to_write_mask(bool colorUpdate, bool alphaUpd
|
||||
static inline wgpu::PrimitiveState to_primitive_state(GXPrimitive gx_prim, GXCullMode gx_cullMode) {
|
||||
wgpu::PrimitiveTopology primitive = wgpu::PrimitiveTopology::TriangleList;
|
||||
switch (gx_prim) {
|
||||
DEFAULT_FATAL("unsupported primitive type {}", static_cast<int>(gx_prim));
|
||||
DEFAULT_FATAL("unsupported primitive type {}", underlying(gx_prim));
|
||||
case GX_TRIANGLES:
|
||||
break;
|
||||
case GX_TRIANGLESTRIP:
|
||||
@ -169,7 +168,7 @@ static inline wgpu::PrimitiveState to_primitive_state(GXPrimitive gx_prim, GXCul
|
||||
}
|
||||
wgpu::CullMode cullMode = wgpu::CullMode::None;
|
||||
switch (gx_cullMode) {
|
||||
DEFAULT_FATAL("unsupported cull mode {}", static_cast<int>(gx_cullMode));
|
||||
DEFAULT_FATAL("unsupported cull mode {}", underlying(gx_cullMode));
|
||||
case GX_CULL_FRONT:
|
||||
cullMode = wgpu::CullMode::Front;
|
||||
break;
|
||||
@ -193,14 +192,6 @@ wgpu::RenderPipeline build_pipeline(const PipelineConfig& config, const ShaderIn
|
||||
.format = g_graphicsConfig.depthFormat,
|
||||
.depthWriteEnabled = config.depthUpdate,
|
||||
.depthCompare = to_compare_function(config.depthFunc),
|
||||
.stencilFront =
|
||||
wgpu::StencilFaceState{
|
||||
.compare = wgpu::CompareFunction::Always,
|
||||
},
|
||||
.stencilBack =
|
||||
wgpu::StencilFaceState{
|
||||
.compare = wgpu::CompareFunction::Always,
|
||||
},
|
||||
};
|
||||
const auto blendState =
|
||||
to_blend_state(config.blendMode, config.blendFacSrc, config.blendFacDst, config.blendOp, config.dstAlpha);
|
||||
@ -249,25 +240,23 @@ wgpu::RenderPipeline build_pipeline(const PipelineConfig& config, const ShaderIn
|
||||
return g_device.CreateRenderPipeline(&descriptor);
|
||||
}
|
||||
|
||||
void populate_pipeline_config(PipelineConfig& config, GXPrimitive primitive) noexcept {
|
||||
void populate_pipeline_config(PipelineConfig& config, GXPrimitive primitive, GXVtxFmt fmt) noexcept {
|
||||
const auto& vtxFmt = g_gxState.vtxFmts[fmt];
|
||||
config.shaderConfig.fogType = g_gxState.fog.type;
|
||||
config.shaderConfig.vtxAttrs = g_gxState.vtxDesc;
|
||||
int lastIndexedAttr = -1;
|
||||
for (int i = 0; i < GX_VA_MAX_ATTR; ++i) {
|
||||
const auto type = g_gxState.vtxDesc[i];
|
||||
if (type != GX_INDEX8 && type != GX_INDEX16) {
|
||||
config.shaderConfig.attrMapping[i] = GX_VA_NULL;
|
||||
config.shaderConfig.attrMapping[i] = {};
|
||||
continue;
|
||||
}
|
||||
const auto& array = g_gxState.arrays[i];
|
||||
if (lastIndexedAttr >= 0 && array == g_gxState.arrays[lastIndexedAttr]) {
|
||||
// Map attribute to previous attribute
|
||||
config.shaderConfig.attrMapping[i] = config.shaderConfig.attrMapping[lastIndexedAttr];
|
||||
} else {
|
||||
// Map attribute to its own storage
|
||||
config.shaderConfig.attrMapping[i] = static_cast<GXAttr>(i);
|
||||
}
|
||||
lastIndexedAttr = i;
|
||||
// Map attribute to its own storage
|
||||
config.shaderConfig.attrMapping[i] = StorageConfig {
|
||||
.attr = static_cast<GXAttr>(i),
|
||||
.cnt = vtxFmt.attrs[i].cnt,
|
||||
.compType = vtxFmt.attrs[i].type,
|
||||
.frac = vtxFmt.attrs[i].frac,
|
||||
};
|
||||
}
|
||||
config.shaderConfig.tevSwapTable = g_gxState.tevSwapTable;
|
||||
for (u8 i = 0; i < g_gxState.numTevStages; ++i) {
|
||||
@ -328,14 +317,14 @@ void populate_pipeline_config(PipelineConfig& config, GXPrimitive primitive) noe
|
||||
Range build_uniform(const ShaderInfo& info) noexcept {
|
||||
auto [buf, range] = map_uniform(info.uniformSize);
|
||||
{
|
||||
buf.append(&g_gxState.pnMtx[g_gxState.currentPnMtx], 128);
|
||||
buf.append(&g_gxState.proj, 64);
|
||||
buf.append(g_gxState.pnMtx[g_gxState.currentPnMtx]);
|
||||
buf.append(g_gxState.proj);
|
||||
}
|
||||
for (int i = 0; i < info.loadsTevReg.size(); ++i) {
|
||||
if (!info.loadsTevReg.test(i)) {
|
||||
continue;
|
||||
}
|
||||
buf.append(&g_gxState.colorRegs[i], 16);
|
||||
buf.append(g_gxState.colorRegs[i]);
|
||||
}
|
||||
bool lightingEnabled = false;
|
||||
for (int i = 0; i < info.sampledColorChannels.size(); ++i) {
|
||||
@ -352,11 +341,10 @@ Range build_uniform(const ShaderInfo& info) noexcept {
|
||||
if (lightingEnabled) {
|
||||
// Lights
|
||||
static_assert(sizeof(g_gxState.lights) == 80 * GX::MaxLights);
|
||||
buf.append(&g_gxState.lights, 80 * GX::MaxLights);
|
||||
buf.append(g_gxState.lights);
|
||||
// Light state for all channels
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
u32 lightState = g_gxState.colorChannelState[i].lightMask.to_ulong();
|
||||
buf.append(&lightState, 4);
|
||||
buf.append<u32>(g_gxState.colorChannelState[i].lightMask.to_ulong());
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < info.sampledColorChannels.size(); ++i) {
|
||||
@ -366,25 +354,25 @@ Range build_uniform(const ShaderInfo& info) noexcept {
|
||||
const auto& ccc = g_gxState.colorChannelConfig[i * 2];
|
||||
const auto& ccs = g_gxState.colorChannelState[i * 2];
|
||||
if (ccc.lightingEnabled && ccc.ambSrc == GX_SRC_REG) {
|
||||
buf.append(&ccs.ambColor, 16);
|
||||
buf.append(ccs.ambColor);
|
||||
}
|
||||
if (ccc.matSrc == GX_SRC_REG) {
|
||||
buf.append(&ccs.matColor, 16);
|
||||
buf.append(ccs.matColor);
|
||||
}
|
||||
const auto& ccca = g_gxState.colorChannelConfig[i * 2 + 1];
|
||||
const auto& ccsa = g_gxState.colorChannelState[i * 2 + 1];
|
||||
if (ccca.lightingEnabled && ccca.ambSrc == GX_SRC_REG) {
|
||||
buf.append(&ccsa.ambColor, 16);
|
||||
buf.append(ccsa.ambColor);
|
||||
}
|
||||
if (ccca.matSrc == GX_SRC_REG) {
|
||||
buf.append(&ccsa.matColor, 16);
|
||||
buf.append(ccsa.matColor);
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < info.sampledKColors.size(); ++i) {
|
||||
if (!info.sampledKColors.test(i)) {
|
||||
continue;
|
||||
}
|
||||
buf.append(&g_gxState.kcolors[i], 16);
|
||||
buf.append(g_gxState.kcolors[i]);
|
||||
}
|
||||
for (int i = 0; i < info.usesTexMtx.size(); ++i) {
|
||||
if (!info.usesTexMtx.test(i)) {
|
||||
@ -392,26 +380,16 @@ Range build_uniform(const ShaderInfo& info) noexcept {
|
||||
}
|
||||
const auto& state = g_gxState;
|
||||
switch (info.texMtxTypes[i]) {
|
||||
DEFAULT_FATAL("unhandled tex mtx type {}", static_cast<int>(info.texMtxTypes[i]));
|
||||
DEFAULT_FATAL("unhandled tex mtx type {}", underlying(info.texMtxTypes[i]));
|
||||
case GX_TG_MTX2x4:
|
||||
if (std::holds_alternative<Mat4x2<float>>(state.texMtxs[i])) {
|
||||
buf.append(&std::get<Mat4x2<float>>(state.texMtxs[i]), 32);
|
||||
} else if (std::holds_alternative<Mat4x4<float>>(g_gxState.texMtxs[i])) {
|
||||
// TODO: SMB hits this?
|
||||
Mat4x2<float> mtx{
|
||||
{1.f, 0.f},
|
||||
{0.f, 1.f},
|
||||
{0.f, 0.f},
|
||||
{0.f, 0.f},
|
||||
};
|
||||
buf.append(&mtx, 32);
|
||||
if (std::holds_alternative<Mat2x4<float>>(state.texMtxs[i])) {
|
||||
buf.append(std::get<Mat2x4<float>>(state.texMtxs[i]));
|
||||
} else
|
||||
UNLIKELY FATAL("expected 2x4 mtx in idx {}", i);
|
||||
break;
|
||||
case GX_TG_MTX3x4:
|
||||
if (std::holds_alternative<Mat4x4<float>>(g_gxState.texMtxs[i])) {
|
||||
const auto& mat = std::get<Mat4x4<float>>(g_gxState.texMtxs[i]);
|
||||
buf.append(&mat, 64);
|
||||
if (std::holds_alternative<Mat3x4<float>>(g_gxState.texMtxs[i])) {
|
||||
buf.append(std::get<Mat3x4<float>>(g_gxState.texMtxs[i]));
|
||||
} else
|
||||
UNLIKELY FATAL("expected 3x4 mtx in idx {}", i);
|
||||
break;
|
||||
@ -421,18 +399,11 @@ Range build_uniform(const ShaderInfo& info) noexcept {
|
||||
if (!info.usesPTTexMtx.test(i)) {
|
||||
continue;
|
||||
}
|
||||
buf.append(&g_gxState.ptTexMtxs[i], 64);
|
||||
buf.append(g_gxState.ptTexMtxs[i]);
|
||||
}
|
||||
if (info.usesFog) {
|
||||
const auto& state = g_gxState.fog;
|
||||
struct Fog {
|
||||
Vec4<float> color = state.color;
|
||||
float a = 0.f;
|
||||
float b = 0.5f;
|
||||
float c = 0.f;
|
||||
float pad = FLT_MAX;
|
||||
} fog{};
|
||||
static_assert(sizeof(Fog) == 32);
|
||||
Fog fog{.color = state.color};
|
||||
if (state.nearZ != state.farZ && state.startZ != state.endZ) {
|
||||
const float depthRange = state.farZ - state.nearZ;
|
||||
const float fogRange = state.endZ - state.startZ;
|
||||
@ -440,7 +411,7 @@ Range build_uniform(const ShaderInfo& info) noexcept {
|
||||
fog.b = state.farZ / depthRange;
|
||||
fog.c = state.startZ / fogRange;
|
||||
}
|
||||
buf.append(&fog, 32);
|
||||
buf.append(fog);
|
||||
}
|
||||
for (int i = 0; i < info.sampledTextures.size(); ++i) {
|
||||
if (!info.sampledTextures.test(i)) {
|
||||
@ -448,7 +419,7 @@ Range build_uniform(const ShaderInfo& info) noexcept {
|
||||
}
|
||||
const auto& tex = get_texture(static_cast<GXTexMapID>(i));
|
||||
CHECK(tex, "unbound texture {}", i);
|
||||
buf.append(&tex.texObj.lodBias, 4);
|
||||
buf.append(tex.texObj.lodBias);
|
||||
}
|
||||
g_gxState.stateDirty = false;
|
||||
return range;
|
||||
@ -564,7 +535,7 @@ GXBindGroupLayouts build_bind_group_layouts(const ShaderInfo& info, const Shader
|
||||
};
|
||||
u32 bindIdx = 1;
|
||||
for (int i = 0; i < GX_VA_MAX_ATTR; ++i) {
|
||||
if (config.attrMapping[i] == static_cast<GXAttr>(i)) {
|
||||
if (config.attrMapping[i].attr == static_cast<GXAttr>(i)) {
|
||||
uniformLayoutEntries[bindIdx] = wgpu::BindGroupLayoutEntry{
|
||||
.binding = bindIdx,
|
||||
.visibility = wgpu::ShaderStage::Vertex,
|
||||
@ -688,7 +659,7 @@ void shutdown() noexcept {
|
||||
|
||||
static wgpu::AddressMode wgpu_address_mode(GXTexWrapMode mode) {
|
||||
switch (mode) {
|
||||
DEFAULT_FATAL("invalid wrap mode {}", static_cast<int>(mode));
|
||||
DEFAULT_FATAL("invalid wrap mode {}", underlying(mode));
|
||||
case GX_CLAMP:
|
||||
return wgpu::AddressMode::ClampToEdge;
|
||||
case GX_REPEAT:
|
||||
@ -735,8 +706,6 @@ wgpu::SamplerDescriptor TextureBind::get_descriptor() const noexcept {
|
||||
.magFilter = wgpu::FilterMode::Nearest,
|
||||
.minFilter = wgpu::FilterMode::Nearest,
|
||||
.mipmapFilter = wgpu::MipmapFilterMode::Nearest,
|
||||
.lodMinClamp = 0.f,
|
||||
.lodMaxClamp = 1000.f,
|
||||
.maxAnisotropy = 1,
|
||||
};
|
||||
}
|
||||
@ -750,8 +719,6 @@ wgpu::SamplerDescriptor TextureBind::get_descriptor() const noexcept {
|
||||
.magFilter = magFilter,
|
||||
.minFilter = minFilter,
|
||||
.mipmapFilter = mipFilter,
|
||||
.lodMinClamp = 0.f,
|
||||
.lodMaxClamp = 1000.f,
|
||||
.maxAnisotropy = wgpu_aniso(texObj.maxAniso),
|
||||
};
|
||||
}
|
||||
|
@ -46,6 +46,11 @@ constexpr float GX_LARGE_NUMBER = -1048576.0f;
|
||||
#endif
|
||||
|
||||
namespace aurora::gfx::gx {
|
||||
constexpr bool EnableNormalVisualization = false;
|
||||
constexpr bool EnableDebugPrints = false;
|
||||
constexpr bool UsePerPixelLighting = true;
|
||||
constexpr bool UseReversedZ = true;
|
||||
|
||||
constexpr u32 MaxTextures = GX_MAX_TEXMAP;
|
||||
constexpr u32 MaxTluts = 20;
|
||||
constexpr u32 MaxTevStages = GX_MAX_TEVSTAGE;
|
||||
@ -144,8 +149,7 @@ struct ColorChannelState {
|
||||
Vec4<float> ambColor;
|
||||
GX::LightMask lightMask;
|
||||
};
|
||||
// Mat4x4 used instead of Mat4x3 for padding purposes
|
||||
using TexMtxVariant = std::variant<std::monostate, Mat4x2<float>, Mat4x4<float>>;
|
||||
using TexMtxVariant = std::variant<std::monostate, Mat2x4<float>, Mat3x4<float>>;
|
||||
struct TcgConfig {
|
||||
GXTexGenType type = GX_TG_MTX2x4;
|
||||
GXTexGenSrc src = GX_MAX_TEXGENSRC;
|
||||
@ -213,10 +217,10 @@ struct VtxFmt {
|
||||
std::array<VtxAttrFmt, MaxVtxAttr> attrs;
|
||||
};
|
||||
struct PnMtx {
|
||||
Mat4x4<float> pos;
|
||||
Mat4x4<float> nrm;
|
||||
Mat3x4<float> pos;
|
||||
Mat3x4<float> nrm;
|
||||
};
|
||||
static_assert(sizeof(PnMtx) == sizeof(Mat4x4<float>) * 2);
|
||||
static_assert(sizeof(PnMtx) == sizeof(Mat3x4<float>) * 2);
|
||||
struct Light {
|
||||
Vec4<float> pos{0.f, 0.f, 0.f};
|
||||
Vec4<float> dir{0.f, 0.f, 0.f};
|
||||
@ -230,6 +234,14 @@ struct Light {
|
||||
bool operator!=(const Light& rhs) const { return !(*this == rhs); }
|
||||
};
|
||||
static_assert(sizeof(Light) == 80);
|
||||
struct Fog {
|
||||
Vec4<float> color;
|
||||
float a = 0.f;
|
||||
float b = 0.5f;
|
||||
float c = 0.f;
|
||||
float pad = FLT_MAX;
|
||||
};
|
||||
static_assert(sizeof(Fog) == 32);
|
||||
struct AttrArray {
|
||||
const void* data;
|
||||
u32 size;
|
||||
@ -245,7 +257,6 @@ struct GXState {
|
||||
std::array<PnMtx, MaxPnMtx> pnMtx;
|
||||
u32 currentPnMtx;
|
||||
Mat4x4<float> proj;
|
||||
Mat4x4<float> origProj; // for GXGetProjectionv
|
||||
GXProjectionType projType; // for GXGetProjectionv
|
||||
FogState fog;
|
||||
GXCullMode cullMode = GX_CULL_BACK;
|
||||
@ -266,7 +277,7 @@ struct GXState {
|
||||
std::array<TextureBind, MaxTextures> textures;
|
||||
std::array<GXTlutObj_, MaxTluts> tluts;
|
||||
std::array<TexMtxVariant, MaxTexMtx> texMtxs;
|
||||
std::array<Mat4x4<float>, MaxPTTexMtx> ptTexMtxs;
|
||||
std::array<Mat3x4<float>, MaxPTTexMtx> ptTexMtxs;
|
||||
std::array<TcgConfig, MaxTexCoord> tcgs;
|
||||
std::array<GXAttrType, MaxVtxAttr> vtxDesc;
|
||||
std::array<VtxFmt, MaxVtxFmt> vtxFmts;
|
||||
@ -345,11 +356,18 @@ struct TextureConfig {
|
||||
bool operator==(const TextureConfig& rhs) const { return memcmp(this, &rhs, sizeof(*this)) == 0; }
|
||||
};
|
||||
static_assert(std::has_unique_object_representations_v<TextureConfig>);
|
||||
struct StorageConfig {
|
||||
GXAttr attr = GX_VA_NULL;
|
||||
GXCompCnt cnt = static_cast<GXCompCnt>(0xFF);
|
||||
GXCompType compType = static_cast<GXCompType>(0xFF);
|
||||
u8 frac = 0;
|
||||
std::array<u8, 3> pad{};
|
||||
};
|
||||
struct ShaderConfig {
|
||||
GXFogType fogType;
|
||||
std::array<GXAttrType, MaxVtxAttr> vtxAttrs;
|
||||
// Mapping for indexed attributes -> storage buffer
|
||||
std::array<GXAttr, MaxVtxAttr> attrMapping;
|
||||
std::array<StorageConfig, MaxVtxAttr> attrMapping;
|
||||
std::array<TevSwap, MaxTevSwap> tevSwapTable;
|
||||
std::array<TevStage, MaxTevStages> tevStages;
|
||||
u32 tevStageCount = 0;
|
||||
@ -363,7 +381,7 @@ struct ShaderConfig {
|
||||
};
|
||||
static_assert(std::has_unique_object_representations_v<ShaderConfig>);
|
||||
|
||||
constexpr u32 GXPipelineConfigVersion = 4;
|
||||
constexpr u32 GXPipelineConfigVersion = 5;
|
||||
struct PipelineConfig {
|
||||
u32 version = GXPipelineConfigVersion;
|
||||
ShaderConfig shaderConfig;
|
||||
@ -405,7 +423,7 @@ struct ShaderInfo {
|
||||
struct BindGroupRanges {
|
||||
std::array<Range, GX_VA_MAX_ATTR> vaRanges{};
|
||||
};
|
||||
void populate_pipeline_config(PipelineConfig& config, GXPrimitive primitive) noexcept;
|
||||
void populate_pipeline_config(PipelineConfig& config, GXPrimitive primitive, GXVtxFmt fmt) noexcept;
|
||||
wgpu::RenderPipeline build_pipeline(const PipelineConfig& config, const ShaderInfo& info,
|
||||
ArrayRef<wgpu::VertexBufferLayout> vtxBuffers, wgpu::ShaderModule shader,
|
||||
const char* label) noexcept;
|
||||
|
@ -1,3 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "../internal.hpp"
|
||||
|
||||
#include <dolphin/gx/GXEnum.h>
|
||||
#include <fmt/format.h>
|
||||
#include <string>
|
||||
@ -25,7 +29,7 @@ inline std::string format_as(const GXTevOp& op) {
|
||||
case GX_TEV_COMP_RGB8_EQ:
|
||||
return "GX_TEV_COMP_RGB8_EQ";
|
||||
default:
|
||||
return fmt::format("GXTevOp({})", static_cast<int>(op));
|
||||
return fmt::format("GXTevOp({})", underlying(op));
|
||||
}
|
||||
}
|
||||
|
||||
@ -64,7 +68,7 @@ inline std::string format_as(const GXTevColorArg& arg) {
|
||||
case GX_CC_ZERO:
|
||||
return "GX_CC_ZERO";
|
||||
default:
|
||||
return fmt::format("GXTevColorArg({})", static_cast<int>(arg));
|
||||
return fmt::format("GXTevColorArg({})", underlying(arg));
|
||||
}
|
||||
}
|
||||
|
||||
@ -87,7 +91,7 @@ inline std::string format_as(const GXTevAlphaArg& arg) {
|
||||
case GX_CA_ZERO:
|
||||
return "GX_CA_ZERO";
|
||||
default:
|
||||
return fmt::format("GXTevAlphaArg({})", static_cast<int>(arg));
|
||||
return fmt::format("GXTevAlphaArg({})", underlying(arg));
|
||||
}
|
||||
}
|
||||
|
||||
@ -118,7 +122,7 @@ inline std::string format_as(const GXTexGenSrc& src) {
|
||||
case GX_TG_TEX7:
|
||||
return "GX_TG_TEX7";
|
||||
default:
|
||||
return fmt::format("GXTexGenSrc({})", static_cast<int>(src));
|
||||
return fmt::format("GXTexGenSrc({})", underlying(src));
|
||||
}
|
||||
}
|
||||
|
||||
@ -133,7 +137,7 @@ inline std::string format_as(const GXTexGenType& type) {
|
||||
case GX_TG_BUMP1:
|
||||
return "GX_TG_BUMP1";
|
||||
default:
|
||||
return fmt::format("GXTexGenType({})", static_cast<int>(type));
|
||||
return fmt::format("GXTexGenType({})", underlying(type));
|
||||
}
|
||||
}
|
||||
|
||||
@ -146,7 +150,7 @@ inline std::string format_as(const GXTevBias& bias) {
|
||||
case GX_TB_SUBHALF:
|
||||
return "GX_TB_SUBHALF";
|
||||
default:
|
||||
return fmt::format("GXTevBias({})", static_cast<int>(bias));
|
||||
return fmt::format("GXTevBias({})", underlying(bias));
|
||||
}
|
||||
}
|
||||
|
||||
@ -161,7 +165,7 @@ inline std::string format_as(const GXTevScale& scale) {
|
||||
case GX_CS_DIVIDE_2:
|
||||
return "GX_CS_DIVIDE_2";
|
||||
default:
|
||||
return fmt::format("GXTevScale({})", static_cast<int>(scale));
|
||||
return fmt::format("GXTevScale({})", underlying(scale));
|
||||
}
|
||||
}
|
||||
|
||||
@ -176,7 +180,7 @@ inline std::string format_as(const GXTevRegID& reg) {
|
||||
case GX_TEVREG2:
|
||||
return "GX_TEVREG2";
|
||||
default:
|
||||
return fmt::format("GXTevRegID({})", static_cast<int>(reg));
|
||||
return fmt::format("GXTevRegID({})", underlying(reg));
|
||||
}
|
||||
}
|
||||
|
||||
@ -231,7 +235,7 @@ inline std::string format_as(const GXTevKColorSel& sel) {
|
||||
case GX_TEV_KCSEL_K3_A:
|
||||
return "GX_TEV_KCSEL_K3_A";
|
||||
default:
|
||||
return fmt::format("GXTevKColorSel({})", static_cast<int>(sel));
|
||||
return fmt::format("GXTevKColorSel({})", underlying(sel));
|
||||
}
|
||||
}
|
||||
|
||||
@ -286,7 +290,7 @@ inline std::string format_as(const GXTevKAlphaSel& sel) {
|
||||
case GX_TEV_KASEL_K3_A:
|
||||
return "GX_TEV_KASEL_K3_A";
|
||||
default:
|
||||
return fmt::format("GXTevKAlphaSel({})", static_cast<int>(sel));
|
||||
return fmt::format("GXTevKAlphaSel({})", underlying(sel));
|
||||
}
|
||||
}
|
||||
|
||||
@ -313,7 +317,7 @@ inline std::string format_as(const GXTexMapID& id) {
|
||||
case GX_TEX_DISABLE:
|
||||
return "GX_TEX_DISABLE";
|
||||
default:
|
||||
return fmt::format("GXTexMapID({})", static_cast<int>(id));
|
||||
return fmt::format("GXTexMapID({})", underlying(id));
|
||||
}
|
||||
}
|
||||
|
||||
@ -340,7 +344,7 @@ inline std::string format_as(const GXChannelID& id) {
|
||||
case GX_COLOR_NULL:
|
||||
return "GX_COLOR_NULL";
|
||||
default:
|
||||
return fmt::format("GXChannelID({})", static_cast<int>(id));
|
||||
return fmt::format("GXChannelID({})", underlying(id));
|
||||
}
|
||||
}
|
||||
|
||||
@ -351,7 +355,7 @@ inline std::string format_as(const GXColorSrc& src) {
|
||||
case GX_SRC_VTX:
|
||||
return "GX_SRC_VTX";
|
||||
default:
|
||||
return fmt::format("GXColorSrc({})", static_cast<int>(src));
|
||||
return fmt::format("GXColorSrc({})", underlying(src));
|
||||
}
|
||||
}
|
||||
|
||||
@ -380,7 +384,7 @@ inline std::string format_as(const GXTexMtx& mtx) {
|
||||
case GX_IDENTITY:
|
||||
return "GX_IDENTITY";
|
||||
default:
|
||||
return fmt::format("GXTexMtx({})", static_cast<int>(mtx));
|
||||
return fmt::format("GXTexMtx({})", underlying(mtx));
|
||||
}
|
||||
}
|
||||
|
||||
@ -429,7 +433,7 @@ inline std::string format_as(const GXPTTexMtx& mtx) {
|
||||
case GX_PTIDENTITY:
|
||||
return "GX_PTIDENTITY";
|
||||
default:
|
||||
return fmt::format("GXPTTexMtx({})", static_cast<int>(mtx));
|
||||
return fmt::format("GXPTTexMtx({})", underlying(mtx));
|
||||
}
|
||||
}
|
||||
|
||||
@ -452,7 +456,7 @@ inline std::string format_as(const GXCompare& comp) {
|
||||
case GX_ALWAYS:
|
||||
return "GX_ALWAYS";
|
||||
default:
|
||||
return fmt::format("GXCompare({})", static_cast<int>(comp));
|
||||
return fmt::format("GXCompare({})", underlying(comp));
|
||||
}
|
||||
}
|
||||
|
||||
@ -467,7 +471,7 @@ inline std::string format_as(const GXAlphaOp& op) {
|
||||
case GX_AOP_XNOR:
|
||||
return "GX_AOP_XNOR";
|
||||
default:
|
||||
return fmt::format("GXAlphaOp({})", static_cast<int>(op));
|
||||
return fmt::format("GXAlphaOp({})", underlying(op));
|
||||
}
|
||||
}
|
||||
|
||||
@ -496,7 +500,7 @@ inline std::string format_as(const GXFogType& type) {
|
||||
case GX_FOG_ORTHO_REVEXP2:
|
||||
return "GX_FOG_ORTHO_REVEXP2";
|
||||
default:
|
||||
return fmt::format("GXFogType({})", static_cast<int>(type));
|
||||
return fmt::format("GXFogType({})", underlying(type));
|
||||
}
|
||||
}
|
||||
|
||||
@ -521,6 +525,158 @@ inline std::string format_as(const GXTexCoordID& id) {
|
||||
case GX_TEXCOORD_NULL:
|
||||
return "GX_TEXCOORD_NULL";
|
||||
default:
|
||||
return fmt::format("GXTexCoordID({})", static_cast<int>(id));
|
||||
return fmt::format("GXTexCoordID({})", underlying(id));
|
||||
}
|
||||
}
|
||||
|
||||
inline std::string format_as(const GXPrimitive& prim) {
|
||||
switch (prim) {
|
||||
case GX_QUADS:
|
||||
return "GX_QUADS";
|
||||
case GX_TRIANGLES:
|
||||
return "GX_TRIANGLES";
|
||||
case GX_TRIANGLESTRIP:
|
||||
return "GX_TRIANGLESTRIP";
|
||||
case GX_TRIANGLEFAN:
|
||||
return "GX_TRIANGLEFAN";
|
||||
case GX_LINES:
|
||||
return "GX_LINES";
|
||||
case GX_LINESTRIP:
|
||||
return "GX_LINESTRIP";
|
||||
case GX_POINTS:
|
||||
return "GX_POINTS";
|
||||
default:
|
||||
return fmt::format("GXPrimitive({})", underlying(prim));
|
||||
}
|
||||
}
|
||||
|
||||
inline std::string format_as(const GXAttr& attr) {
|
||||
switch (attr) {
|
||||
case GX_VA_PNMTXIDX:
|
||||
return "GX_VA_PNMTXIDX";
|
||||
case GX_VA_TEX0MTXIDX:
|
||||
return "GX_VA_TEX0MTXIDX";
|
||||
case GX_VA_TEX1MTXIDX:
|
||||
return "GX_VA_TEX1MTXIDX";
|
||||
case GX_VA_TEX2MTXIDX:
|
||||
return "GX_VA_TEX2MTXIDX";
|
||||
case GX_VA_TEX3MTXIDX:
|
||||
return "GX_VA_TEX3MTXIDX";
|
||||
case GX_VA_TEX4MTXIDX:
|
||||
return "GX_VA_TEX4MTXIDX";
|
||||
case GX_VA_TEX5MTXIDX:
|
||||
return "GX_VA_TEX5MTXIDX";
|
||||
case GX_VA_TEX6MTXIDX:
|
||||
return "GX_VA_TEX6MTXIDX";
|
||||
case GX_VA_TEX7MTXIDX:
|
||||
return "GX_VA_TEX7MTXIDX";
|
||||
case GX_VA_POS:
|
||||
return "GX_VA_POS";
|
||||
case GX_VA_NRM:
|
||||
return "GX_VA_NRM";
|
||||
case GX_VA_CLR0:
|
||||
return "GX_VA_CLR0";
|
||||
case GX_VA_CLR1:
|
||||
return "GX_VA_CLR1";
|
||||
case GX_VA_TEX0:
|
||||
return "GX_VA_TEX0";
|
||||
case GX_VA_TEX1:
|
||||
return "GX_VA_TEX1";
|
||||
case GX_VA_TEX2:
|
||||
return "GX_VA_TEX2";
|
||||
case GX_VA_TEX3:
|
||||
return "GX_VA_TEX3";
|
||||
case GX_VA_TEX4:
|
||||
return "GX_VA_TEX4";
|
||||
case GX_VA_TEX5:
|
||||
return "GX_VA_TEX5";
|
||||
case GX_VA_TEX6:
|
||||
return "GX_VA_TEX6";
|
||||
case GX_VA_TEX7:
|
||||
return "GX_VA_TEX7";
|
||||
case GX_POS_MTX_ARRAY:
|
||||
return "GX_POS_MTX_ARRAY";
|
||||
case GX_NRM_MTX_ARRAY:
|
||||
return "GX_NRM_MTX_ARRAY";
|
||||
case GX_TEX_MTX_ARRAY:
|
||||
return "GX_TEX_MTX_ARRAY";
|
||||
case GX_LIGHT_ARRAY:
|
||||
return "GX_LIGHT_ARRAY";
|
||||
case GX_VA_NBT:
|
||||
return "GX_VA_NBT";
|
||||
case GX_VA_NULL:
|
||||
return "GX_VA_NULL";
|
||||
default:
|
||||
return fmt::format("GXAttr({})", underlying(attr));
|
||||
}
|
||||
}
|
||||
|
||||
inline std::string format_as(const GXCompCnt& cnt) {
|
||||
switch (cnt) {
|
||||
case GX_POS_XY:
|
||||
return "GX_POS_XY|GX_NRM_XYZ|GX_CLR_RGB|GX_TEX_S";
|
||||
case GX_POS_XYZ:
|
||||
return "GX_POS_XYZ|GX_NRM_NBT|GX_CLR_RGBA|GX_TEX_ST";
|
||||
case GX_NRM_NBT3:
|
||||
return "GX_NRM_NBT3";
|
||||
default:
|
||||
return fmt::format("GXCompCnt({})", underlying(cnt));
|
||||
}
|
||||
}
|
||||
|
||||
inline std::string format_as(const GXCompType& type) {
|
||||
switch (type) {
|
||||
case GX_U8:
|
||||
return "GX_U8|GX_RGB565";
|
||||
case GX_S8:
|
||||
return "GX_S8|GX_RGB8";
|
||||
case GX_U16:
|
||||
return "GX_U16|GX_RGBX8";
|
||||
case GX_S16:
|
||||
return "GX_S16|GX_RGBA4";
|
||||
case GX_F32:
|
||||
return "GX_F32|GX_RGBA6";
|
||||
case GX_RGBA8:
|
||||
return "GX_RGBA8";
|
||||
default:
|
||||
return fmt::format("GXCompType({})", underlying(type));
|
||||
}
|
||||
}
|
||||
|
||||
inline std::string format_as(const GXAttrType& type) {
|
||||
switch (type) {
|
||||
case GX_NONE:
|
||||
return "GX_NONE";
|
||||
case GX_DIRECT:
|
||||
return "GX_DIRECT";
|
||||
case GX_INDEX8:
|
||||
return "GX_INDEX8";
|
||||
case GX_INDEX16:
|
||||
return "GX_INDEX16";
|
||||
default:
|
||||
return fmt::format("GXAttrType({})", underlying(type));
|
||||
}
|
||||
}
|
||||
|
||||
inline std::string format_as(const GXVtxFmt& fmt) {
|
||||
switch (fmt) {
|
||||
case GX_VTXFMT0:
|
||||
return "GX_VTXFMT0";
|
||||
case GX_VTXFMT1:
|
||||
return "GX_VTXFMT1";
|
||||
case GX_VTXFMT2:
|
||||
return "GX_VTXFMT2";
|
||||
case GX_VTXFMT3:
|
||||
return "GX_VTXFMT3";
|
||||
case GX_VTXFMT4:
|
||||
return "GX_VTXFMT4";
|
||||
case GX_VTXFMT5:
|
||||
return "GX_VTXFMT5";
|
||||
case GX_VTXFMT6:
|
||||
return "GX_VTXFMT6";
|
||||
case GX_VTXFMT7:
|
||||
return "GX_VTXFMT7";
|
||||
default:
|
||||
return fmt::format("GXVtxFmt({})", underlying(fmt));
|
||||
}
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,60 +1,29 @@
|
||||
#include "shader.hpp"
|
||||
|
||||
#include "../../webgpu/gpu.hpp"
|
||||
#include "../gx_fmt.hpp"
|
||||
|
||||
#include <absl/container/flat_hash_map.h>
|
||||
|
||||
namespace aurora::gfx::model {
|
||||
static Module Log("aurora::gfx::model");
|
||||
|
||||
template <typename T>
|
||||
constexpr T bswap16(T val) noexcept {
|
||||
static_assert(sizeof(T) == sizeof(u16));
|
||||
union {
|
||||
u16 u;
|
||||
T t;
|
||||
} v{.t = val};
|
||||
#if __GNUC__
|
||||
v.u = __builtin_bswap16(v.u);
|
||||
#elif _WIN32
|
||||
v.u = _byteswap_ushort(v.u);
|
||||
#else
|
||||
v.u = (v.u << 8) | ((v.u >> 8) & 0xFF);
|
||||
#endif
|
||||
return v.t;
|
||||
}
|
||||
template <typename T>
|
||||
constexpr T bswap32(T val) noexcept {
|
||||
static_assert(sizeof(T) == sizeof(u32));
|
||||
union {
|
||||
u32 u;
|
||||
T t;
|
||||
} v{.t = val};
|
||||
#if __GNUC__
|
||||
v.u = __builtin_bswap32(v.u);
|
||||
#elif _WIN32
|
||||
v.u = _byteswap_ulong(v.u);
|
||||
#else
|
||||
v.u = ((v.u & 0x0000FFFF) << 16) | ((v.u & 0xFFFF0000) >> 16) | ((v.u & 0x00FF00FF) << 8) | ((v.u & 0xFF00FF00) >> 8);
|
||||
#endif
|
||||
return v.t;
|
||||
}
|
||||
|
||||
using IndexedAttrs = std::array<bool, GX_VA_MAX_ATTR>;
|
||||
struct DisplayListCache {
|
||||
ByteBuffer vtxBuf;
|
||||
ByteBuffer idxBuf;
|
||||
IndexedAttrs indexedAttrs;
|
||||
GXVtxFmt fmt;
|
||||
|
||||
DisplayListCache(ByteBuffer&& vtxBuf, ByteBuffer&& idxBuf, IndexedAttrs indexedAttrs)
|
||||
: vtxBuf(std::move(vtxBuf)), idxBuf(std::move(idxBuf)), indexedAttrs(indexedAttrs) {}
|
||||
DisplayListCache(ByteBuffer&& vtxBuf, ByteBuffer&& idxBuf, IndexedAttrs indexedAttrs, GXVtxFmt fmt)
|
||||
: vtxBuf(std::move(vtxBuf)), idxBuf(std::move(idxBuf)), indexedAttrs(indexedAttrs), fmt(fmt) {}
|
||||
};
|
||||
|
||||
static absl::flat_hash_map<HashType, DisplayListCache> sCachedDisplayLists;
|
||||
|
||||
static u32 prepare_vtx_buffer(ByteBuffer& buf, GXVtxFmt vtxfmt, const u8* ptr, u16 vtxCount,
|
||||
IndexedAttrs& indexedAttrs) {
|
||||
using aurora::gfx::gx::g_gxState;
|
||||
using gx::g_gxState;
|
||||
struct {
|
||||
u8 count;
|
||||
GXCompType type;
|
||||
@ -66,14 +35,13 @@ static u32 prepare_vtx_buffer(ByteBuffer& buf, GXVtxFmt vtxfmt, const u8* ptr, u
|
||||
for (int attr = 0; attr < GX_VA_MAX_ATTR; attr++) {
|
||||
const auto& attrFmt = g_gxState.vtxFmts[vtxfmt].attrs[attr];
|
||||
switch (g_gxState.vtxDesc[attr]) {
|
||||
DEFAULT_FATAL("unhandled attribute type {}", static_cast<int>(g_gxState.vtxDesc[attr]));
|
||||
DEFAULT_FATAL("unhandled attribute type {}", g_gxState.vtxDesc[attr]);
|
||||
case GX_NONE:
|
||||
break;
|
||||
case GX_DIRECT:
|
||||
#define COMBINE(val1, val2, val3) (((val1) << 16) | ((val2) << 8) | (val3))
|
||||
switch (COMBINE(attr, attrFmt.cnt, attrFmt.type)) {
|
||||
DEFAULT_FATAL("not handled: attr {}, cnt {}, type {}", static_cast<int>(attr), static_cast<int>(attrFmt.cnt),
|
||||
static_cast<int>(attrFmt.type));
|
||||
DEFAULT_FATAL("not handled: attr {}, cnt {}, type {}", attr, attrFmt.cnt, attrFmt.type);
|
||||
case COMBINE(GX_VA_POS, GX_POS_XYZ, GX_F32):
|
||||
case COMBINE(GX_VA_NRM, GX_NRM_XYZ, GX_F32):
|
||||
attrArrays[attr].count = 3;
|
||||
@ -150,12 +118,10 @@ static u32 prepare_vtx_buffer(ByteBuffer& buf, GXVtxFmt vtxfmt, const u8* ptr, u
|
||||
for (u32 v = 0; v < vtxCount; ++v) {
|
||||
for (int attr = 0; attr < GX_VA_MAX_ATTR; attr++) {
|
||||
if (g_gxState.vtxDesc[attr] == GX_INDEX8) {
|
||||
u16 index = *ptr;
|
||||
buf.append(&index, 2);
|
||||
buf.append(static_cast<u16>(*ptr));
|
||||
++ptr;
|
||||
} else if (g_gxState.vtxDesc[attr] == GX_INDEX16) {
|
||||
u16 index = bswap16(*reinterpret_cast<const u16*>(ptr));
|
||||
buf.append(&index, 2);
|
||||
buf.append(bswap(*reinterpret_cast<const u16*>(ptr)));
|
||||
ptr += 2;
|
||||
}
|
||||
if (g_gxState.vtxDesc[attr] != GX_DIRECT) {
|
||||
@ -182,7 +148,7 @@ static u32 prepare_vtx_buffer(ByteBuffer& buf, GXVtxFmt vtxfmt, const u8* ptr, u
|
||||
break;
|
||||
case GX_U16:
|
||||
for (int i = 0; i < count; ++i) {
|
||||
const auto value = bswap16(reinterpret_cast<const u16*>(ptr)[i]);
|
||||
const auto value = bswap(reinterpret_cast<const u16*>(ptr)[i]);
|
||||
out[i] = static_cast<f32>(value) / static_cast<f32>(1 << attrFmt.frac);
|
||||
}
|
||||
buf.append(out.data(), sizeof(f32) * count);
|
||||
@ -190,7 +156,7 @@ static u32 prepare_vtx_buffer(ByteBuffer& buf, GXVtxFmt vtxfmt, const u8* ptr, u
|
||||
break;
|
||||
case GX_S16:
|
||||
for (int i = 0; i < count; ++i) {
|
||||
const auto value = bswap16(reinterpret_cast<const s16*>(ptr)[i]);
|
||||
const auto value = bswap(reinterpret_cast<const s16*>(ptr)[i]);
|
||||
out[i] = static_cast<f32>(value) / static_cast<f32>(1 << attrFmt.frac);
|
||||
}
|
||||
buf.append(out.data(), sizeof(f32) * count);
|
||||
@ -198,7 +164,7 @@ static u32 prepare_vtx_buffer(ByteBuffer& buf, GXVtxFmt vtxfmt, const u8* ptr, u
|
||||
break;
|
||||
case GX_F32:
|
||||
for (int i = 0; i < count; ++i) {
|
||||
out[i] = bswap32(reinterpret_cast<const f32*>(ptr)[i]);
|
||||
out[i] = bswap(reinterpret_cast<const f32*>(ptr)[i]);
|
||||
}
|
||||
buf.append(out.data(), sizeof(f32) * count);
|
||||
ptr += count * sizeof(f32);
|
||||
@ -227,7 +193,7 @@ static u16 prepare_idx_buffer(ByteBuffer& buf, GXPrimitive prim, u16 vtxStart, u
|
||||
buf.reserve_extra(vtxCount * sizeof(u16));
|
||||
for (u16 v = 0; v < vtxCount; ++v) {
|
||||
const u16 idx = vtxStart + v;
|
||||
buf.append(&idx, sizeof(u16));
|
||||
buf.append(idx);
|
||||
++numIndices;
|
||||
}
|
||||
} else if (prim == GX_TRIANGLEFAN) {
|
||||
@ -235,29 +201,26 @@ static u16 prepare_idx_buffer(ByteBuffer& buf, GXPrimitive prim, u16 vtxStart, u
|
||||
for (u16 v = 0; v < vtxCount; ++v) {
|
||||
const u16 idx = vtxStart + v;
|
||||
if (v < 3) {
|
||||
buf.append(&idx, sizeof(u16));
|
||||
buf.append(idx);
|
||||
++numIndices;
|
||||
continue;
|
||||
}
|
||||
const std::array<u16, 3> idxs{vtxStart, u16(idx - 1), idx};
|
||||
buf.append(idxs.data(), sizeof(u16) * 3);
|
||||
buf.append(std::array{vtxStart, static_cast<u16>(idx - 1), idx});
|
||||
numIndices += 3;
|
||||
}
|
||||
} else if (prim == GX_TRIANGLESTRIP) {
|
||||
buf.reserve_extra(((u32(vtxCount) - 3) * 3 + 3) * sizeof(u16));
|
||||
buf.reserve_extra(((static_cast<u32>(vtxCount) - 3) * 3 + 3) * sizeof(u16));
|
||||
for (u16 v = 0; v < vtxCount; ++v) {
|
||||
const u16 idx = vtxStart + v;
|
||||
if (v < 3) {
|
||||
buf.append(&idx, sizeof(u16));
|
||||
buf.append(idx);
|
||||
++numIndices;
|
||||
continue;
|
||||
}
|
||||
if ((v & 1) == 0) {
|
||||
const std::array<u16, 3> idxs{u16(idx - 2), u16(idx - 1), idx};
|
||||
buf.append(idxs.data(), sizeof(u16) * 3);
|
||||
buf.append(std::array{static_cast<u16>(idx - 2), static_cast<u16>(idx - 1), idx});
|
||||
} else {
|
||||
const std::array<u16, 3> idxs{u16(idx - 1), u16(idx - 2), idx};
|
||||
buf.append(idxs.data(), sizeof(u16) * 3);
|
||||
buf.append(std::array{static_cast<u16>(idx - 1), static_cast<u16>(idx - 2), idx});
|
||||
}
|
||||
numIndices += 3;
|
||||
}
|
||||
@ -271,6 +234,7 @@ void queue_surface(const u8* dlStart, u32 dlSize) noexcept {
|
||||
Range vertRange, idxRange;
|
||||
u32 numIndices = 0;
|
||||
IndexedAttrs indexedAttrs{};
|
||||
GXVtxFmt fmt = GX_MAX_VTXFMT;
|
||||
auto it = sCachedDisplayLists.find(hash);
|
||||
if (it != sCachedDisplayLists.end()) {
|
||||
const auto& cache = it->second;
|
||||
@ -278,6 +242,7 @@ void queue_surface(const u8* dlStart, u32 dlSize) noexcept {
|
||||
vertRange = push_verts(cache.vtxBuf.data(), cache.vtxBuf.size());
|
||||
idxRange = push_indices(cache.idxBuf.data(), cache.idxBuf.size());
|
||||
indexedAttrs = cache.indexedAttrs;
|
||||
fmt = cache.fmt;
|
||||
} else {
|
||||
const u8* data = dlStart;
|
||||
u32 pos = 0;
|
||||
@ -302,8 +267,12 @@ void queue_surface(const u8* dlStart, u32 dlSize) noexcept {
|
||||
case GX_DRAW_TRIANGLE_STRIP:
|
||||
case GX_DRAW_TRIANGLE_FAN: {
|
||||
const auto prim = static_cast<GXPrimitive>(opcode);
|
||||
const auto fmt = static_cast<GXVtxFmt>(cmd & GX_VAT_MASK);
|
||||
u16 vtxCount = bswap16(*reinterpret_cast<const u16*>(data + pos));
|
||||
const auto newFmt = static_cast<GXVtxFmt>(cmd & GX_VAT_MASK);
|
||||
if (fmt != GX_MAX_VTXFMT && fmt != newFmt) {
|
||||
FATAL("Vertex format changed mid-display list: {} -> {}", fmt, newFmt);
|
||||
}
|
||||
fmt = newFmt;
|
||||
u16 vtxCount = bswap(*reinterpret_cast<const u16*>(data + pos));
|
||||
pos += 2;
|
||||
pos += vtxCount * prepare_vtx_buffer(vtxBuf, fmt, data + pos, vtxCount, indexedAttrs);
|
||||
numIndices += prepare_idx_buffer(idxBuf, prim, vtxStart, vtxCount);
|
||||
@ -319,22 +288,16 @@ void queue_surface(const u8* dlStart, u32 dlSize) noexcept {
|
||||
}
|
||||
vertRange = push_verts(vtxBuf.data(), vtxBuf.size());
|
||||
idxRange = push_indices(idxBuf.data(), idxBuf.size());
|
||||
sCachedDisplayLists.try_emplace(hash, std::move(vtxBuf), std::move(idxBuf), indexedAttrs);
|
||||
sCachedDisplayLists.try_emplace(hash, std::move(vtxBuf), std::move(idxBuf), indexedAttrs, fmt);
|
||||
}
|
||||
|
||||
gx::BindGroupRanges ranges{};
|
||||
int lastIndexedAttr = -1;
|
||||
for (int i = 0; i < GX_VA_MAX_ATTR; ++i) {
|
||||
if (!indexedAttrs[i]) {
|
||||
continue;
|
||||
}
|
||||
auto& array = gx::g_gxState.arrays[i];
|
||||
if (lastIndexedAttr >= 0 && array == gx::g_gxState.arrays[lastIndexedAttr]) {
|
||||
// Reuse range from last attribute in shader
|
||||
// Don't set the output range, so it remains unbound
|
||||
const auto range = gx::g_gxState.arrays[lastIndexedAttr].cachedRange;
|
||||
array.cachedRange = range;
|
||||
} else if (array.cachedRange.size > 0) {
|
||||
if (array.cachedRange.size > 0) {
|
||||
// Use the currently cached range
|
||||
ranges.vaRanges[i] = array.cachedRange;
|
||||
} else {
|
||||
@ -343,11 +306,10 @@ void queue_surface(const u8* dlStart, u32 dlSize) noexcept {
|
||||
ranges.vaRanges[i] = range;
|
||||
array.cachedRange = range;
|
||||
}
|
||||
lastIndexedAttr = i;
|
||||
}
|
||||
|
||||
model::PipelineConfig config{};
|
||||
populate_pipeline_config(config, GX_TRIANGLES);
|
||||
populate_pipeline_config(config, GX_TRIANGLES, fmt);
|
||||
const auto info = gx::build_shader_info(config.shaderConfig);
|
||||
const auto bindGroups = gx::build_bind_groups(info, config.shaderConfig, ranges);
|
||||
const auto pipeline = pipeline_ref(config);
|
||||
@ -366,7 +328,7 @@ void queue_surface(const u8* dlStart, u32 dlSize) noexcept {
|
||||
|
||||
State construct_state() { return {}; }
|
||||
|
||||
wgpu::RenderPipeline create_pipeline(const State& state, [[maybe_unused]] const PipelineConfig& config) {
|
||||
wgpu::RenderPipeline create_pipeline(const State& state, const PipelineConfig& config) {
|
||||
const auto info = build_shader_info(config.shaderConfig); // TODO remove
|
||||
const auto shader = build_shader(config.shaderConfig, info);
|
||||
|
||||
@ -385,7 +347,7 @@ wgpu::RenderPipeline create_pipeline(const State& state, [[maybe_unused]] const
|
||||
// Indexed attributes
|
||||
for (u32 i = 0; i < num4xAttr; ++i) {
|
||||
vtxAttrs[shaderLocation] = {
|
||||
.format = wgpu::VertexFormat::Sint16x4,
|
||||
.format = wgpu::VertexFormat::Uint16x4,
|
||||
.offset = offset,
|
||||
.shaderLocation = shaderLocation,
|
||||
};
|
||||
@ -394,7 +356,7 @@ wgpu::RenderPipeline create_pipeline(const State& state, [[maybe_unused]] const
|
||||
}
|
||||
for (u32 i = 0; i < num2xAttr; ++i) {
|
||||
vtxAttrs[shaderLocation] = {
|
||||
.format = wgpu::VertexFormat::Sint16x2,
|
||||
.format = wgpu::VertexFormat::Uint16x2,
|
||||
.offset = offset,
|
||||
.shaderLocation = shaderLocation,
|
||||
};
|
||||
|
@ -1,82 +0,0 @@
|
||||
#include "shader.hpp"
|
||||
|
||||
#include "../../webgpu/gpu.hpp"
|
||||
|
||||
namespace aurora::gfx::stream {
|
||||
static Module Log("aurora::gfx::stream");
|
||||
|
||||
using webgpu::g_device;
|
||||
|
||||
wgpu::RenderPipeline create_pipeline(const State& state, [[maybe_unused]] const PipelineConfig& config) {
|
||||
const auto info = build_shader_info(config.shaderConfig); // TODO remove
|
||||
const auto shader = build_shader(config.shaderConfig, info);
|
||||
|
||||
std::array<wgpu::VertexAttribute, 4> attributes{};
|
||||
attributes[0] = wgpu::VertexAttribute{
|
||||
.format = wgpu::VertexFormat::Float32x3,
|
||||
.offset = 0,
|
||||
.shaderLocation = 0,
|
||||
};
|
||||
uint64_t offset = 12;
|
||||
uint32_t shaderLocation = 1;
|
||||
if (config.shaderConfig.vtxAttrs[GX_VA_NRM] == GX_DIRECT) {
|
||||
attributes[shaderLocation] = wgpu::VertexAttribute{
|
||||
.format = wgpu::VertexFormat::Float32x3,
|
||||
.offset = offset,
|
||||
.shaderLocation = shaderLocation,
|
||||
};
|
||||
offset += 12;
|
||||
shaderLocation++;
|
||||
}
|
||||
if (config.shaderConfig.vtxAttrs[GX_VA_CLR0] == GX_DIRECT) {
|
||||
attributes[shaderLocation] = wgpu::VertexAttribute{
|
||||
.format = wgpu::VertexFormat::Float32x4,
|
||||
.offset = offset,
|
||||
.shaderLocation = shaderLocation,
|
||||
};
|
||||
offset += 16;
|
||||
shaderLocation++;
|
||||
}
|
||||
for (int i = GX_VA_TEX0; i < GX_VA_TEX7; ++i) {
|
||||
if (config.shaderConfig.vtxAttrs[i] != GX_DIRECT) {
|
||||
continue;
|
||||
}
|
||||
attributes[shaderLocation] = wgpu::VertexAttribute{
|
||||
.format = wgpu::VertexFormat::Float32x2,
|
||||
.offset = offset,
|
||||
.shaderLocation = shaderLocation,
|
||||
};
|
||||
offset += 8;
|
||||
shaderLocation++;
|
||||
}
|
||||
const std::array vertexBuffers{wgpu::VertexBufferLayout{
|
||||
.arrayStride = offset,
|
||||
.attributeCount = shaderLocation,
|
||||
.attributes = attributes.data(),
|
||||
}};
|
||||
|
||||
return build_pipeline(config, info, vertexBuffers, shader, "Stream Pipeline");
|
||||
}
|
||||
|
||||
State construct_state() { return {}; }
|
||||
|
||||
void render(const State& state, const DrawData& data, const wgpu::RenderPassEncoder& pass) {
|
||||
if (!bind_pipeline(data.pipeline, pass)) {
|
||||
return;
|
||||
}
|
||||
|
||||
const std::array offsets{data.uniformRange.offset};
|
||||
pass.SetBindGroup(0, find_bind_group(data.bindGroups.uniformBindGroup), offsets.size(), offsets.data());
|
||||
if (data.bindGroups.samplerBindGroup && data.bindGroups.textureBindGroup) {
|
||||
pass.SetBindGroup(1, find_bind_group(data.bindGroups.samplerBindGroup));
|
||||
pass.SetBindGroup(2, find_bind_group(data.bindGroups.textureBindGroup));
|
||||
}
|
||||
pass.SetVertexBuffer(0, g_vertexBuffer, data.vertRange.offset, data.vertRange.size);
|
||||
pass.SetIndexBuffer(g_indexBuffer, wgpu::IndexFormat::Uint16, data.indexRange.offset, data.indexRange.size);
|
||||
if (data.dstAlpha != UINT32_MAX) {
|
||||
const wgpu::Color color{0.f, 0.f, 0.f, data.dstAlpha / 255.f};
|
||||
pass.SetBlendConstant(&color);
|
||||
}
|
||||
pass.DrawIndexed(data.indexCount);
|
||||
}
|
||||
} // namespace aurora::gfx::stream
|
@ -1,24 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "../common.hpp"
|
||||
#include "../gx.hpp"
|
||||
|
||||
namespace aurora::gfx::stream {
|
||||
struct DrawData {
|
||||
PipelineRef pipeline;
|
||||
Range vertRange;
|
||||
Range uniformRange;
|
||||
Range indexRange;
|
||||
uint32_t indexCount;
|
||||
gx::GXBindGroups bindGroups;
|
||||
u32 dstAlpha;
|
||||
};
|
||||
|
||||
struct PipelineConfig : public gx::PipelineConfig {};
|
||||
|
||||
struct State {};
|
||||
|
||||
State construct_state();
|
||||
wgpu::RenderPipeline create_pipeline(const State& state, [[maybe_unused]] const PipelineConfig& config);
|
||||
void render(const State& state, const DrawData& data, const wgpu::RenderPassEncoder& pass);
|
||||
} // namespace aurora::gfx::stream
|
@ -66,17 +66,6 @@ static size_t ComputeMippedBlockCountDXT1(uint32_t w, uint32_t h, uint32_t mips)
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
constexpr T bswap16(T val) noexcept {
|
||||
#if __GNUC__
|
||||
return __builtin_bswap16(val);
|
||||
#elif _WIN32
|
||||
return _byteswap_ushort(val);
|
||||
#else
|
||||
return (val = (val << 8) | ((val >> 8) & 0xFF));
|
||||
#endif
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
concept TextureDecoder = requires(T) {
|
||||
typename T::Source;
|
||||
@ -178,15 +167,15 @@ struct TextureDecoderIA4 {
|
||||
};
|
||||
|
||||
struct TextureDecoderIA8 {
|
||||
using Source = uint8_t;
|
||||
using Source = uint16_t;
|
||||
using Target = RGBA8;
|
||||
|
||||
static constexpr uint32_t Frac = 1;
|
||||
static constexpr uint32_t BlockWidth = 8;
|
||||
static constexpr uint32_t BlockWidth = 4;
|
||||
static constexpr uint32_t BlockHeight = 4;
|
||||
|
||||
static void decode_texel(Target* target, const Source* in, const uint32_t x) {
|
||||
const auto texel = bswap16(in[x]);
|
||||
const auto texel = bswap(in[x]);
|
||||
const uint8_t intensity = texel >> 8;
|
||||
target[x].r = intensity;
|
||||
target[x].g = intensity;
|
||||
@ -228,7 +217,7 @@ struct TextureDecoderRGB565 {
|
||||
static constexpr uint32_t BlockHeight = 4;
|
||||
|
||||
static void decode_texel(Target* target, const Source* in, const uint32_t x) {
|
||||
const auto texel = bswap16(in[x]);
|
||||
const auto texel = bswap(in[x]);
|
||||
target[x].r = ExpandTo8<5>(texel >> 11 & 0x1f);
|
||||
target[x].g = ExpandTo8<6>(texel >> 5 & 0x3f);
|
||||
target[x].b = ExpandTo8<5>(texel & 0x1f);
|
||||
@ -245,7 +234,7 @@ struct TextureDecoderRGB5A3 {
|
||||
static constexpr uint32_t BlockHeight = 4;
|
||||
|
||||
static void decode_texel(Target* target, const Source* in, const uint32_t x) {
|
||||
const auto texel = bswap16(in[x]);
|
||||
const auto texel = bswap(in[x]);
|
||||
if ((texel & 0x8000) != 0) {
|
||||
target[x].r = ExpandTo8<5>(texel >> 10 & 0x1f);
|
||||
target[x].g = ExpandTo8<5>(texel >> 5 & 0x1f);
|
||||
@ -322,8 +311,8 @@ static ByteBuffer BuildDXT1FromGCN(uint32_t width, uint32_t height, uint32_t mip
|
||||
for (uint32_t y = 0; y < 2; ++y) {
|
||||
DXT1Block* target = targetMip + (baseY + y) * w + baseX;
|
||||
for (size_t x = 0; x < 2; ++x) {
|
||||
target[x].color1 = bswap16(in[x].color1);
|
||||
target[x].color2 = bswap16(in[x].color2);
|
||||
target[x].color1 = bswap(in[x].color1);
|
||||
target[x].color2 = bswap(in[x].color2);
|
||||
for (size_t i = 0; i < 4; ++i) {
|
||||
std::array<uint8_t, 4> ind;
|
||||
const uint8_t packed = in[x].lines[i];
|
||||
@ -365,8 +354,8 @@ static ByteBuffer BuildRGBA8FromCMPR(uint32_t width, uint32_t height, uint32_t m
|
||||
for (uint32_t yb = 0; yb < 8; yb += 4) {
|
||||
for (uint32_t xb = 0; xb < 8; xb += 4) {
|
||||
// CMPR difference: Big-endian color1/2
|
||||
const uint16_t color1 = bswap16(*reinterpret_cast<const uint16_t*>(src));
|
||||
const uint16_t color2 = bswap16(*reinterpret_cast<const uint16_t*>(src + 2));
|
||||
const uint16_t color1 = bswap(*reinterpret_cast<const uint16_t*>(src));
|
||||
const uint16_t color2 = bswap(*reinterpret_cast<const uint16_t*>(src + 2));
|
||||
src += 4;
|
||||
|
||||
// Fill in first two colors in color table.
|
||||
@ -480,4 +469,4 @@ ByteBuffer convert_tlut(u32 format, uint32_t width, ArrayRef<uint8_t> data) {
|
||||
return DecodeLinear<TextureDecoderRGB5A3>(width, data);
|
||||
}
|
||||
}
|
||||
} // namespace aurora::gfx
|
||||
} // namespace aurora::gfx
|
@ -6,6 +6,8 @@
|
||||
|
||||
#include <array>
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
|
||||
using namespace std::string_view_literals;
|
||||
@ -21,6 +23,46 @@ using namespace std::string_view_literals;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
template <typename T>
|
||||
requires(sizeof(T) == sizeof(uint16_t) && std::is_arithmetic_v<T>)
|
||||
constexpr T bswap(T val) noexcept {
|
||||
union {
|
||||
uint16_t u;
|
||||
T t;
|
||||
} v{.t = val};
|
||||
#if __GNUC__
|
||||
v.u = __builtin_bswap16(v.u);
|
||||
#elif _WIN32
|
||||
v.u = _byteswap_ushort(v.u);
|
||||
#else
|
||||
v.u = (v.u << 8) | ((v.u >> 8) & 0xFF);
|
||||
#endif
|
||||
return v.t;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires(sizeof(T) == sizeof(uint32_t) && std::is_arithmetic_v<T>)
|
||||
constexpr T bswap(T val) noexcept {
|
||||
union {
|
||||
uint32_t u;
|
||||
T t;
|
||||
} v{.t = val};
|
||||
#if __GNUC__
|
||||
v.u = __builtin_bswap32(v.u);
|
||||
#elif _WIN32
|
||||
v.u = _byteswap_ulong(v.u);
|
||||
#else
|
||||
v.u = ((v.u & 0x0000FFFF) << 16) | ((v.u & 0xFFFF0000) >> 16) | ((v.u & 0x00FF00FF) << 8) | ((v.u & 0xFF00FF00) >> 8);
|
||||
#endif
|
||||
return v.t;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires(std::is_enum_v<T>)
|
||||
auto underlying(T value) -> std::underlying_type_t<T> {
|
||||
return static_cast<std::underlying_type_t<T>>(value);
|
||||
}
|
||||
|
||||
#ifndef ALIGN
|
||||
#define ALIGN(x, a) (((x) + ((a) - 1)) & ~((a) - 1))
|
||||
#endif
|
||||
@ -33,11 +75,7 @@ using namespace std::string_view_literals;
|
||||
#else
|
||||
#define UNLIKELY
|
||||
#endif
|
||||
#define FATAL(msg, ...) \
|
||||
{ \
|
||||
Log.fatal(msg, ##__VA_ARGS__); \
|
||||
unreachable(); \
|
||||
}
|
||||
#define FATAL(msg, ...) Log.fatal(msg, ##__VA_ARGS__);
|
||||
#define ASSERT(cond, msg, ...) \
|
||||
if (!(cond)) \
|
||||
UNLIKELY FATAL(msg, ##__VA_ARGS__)
|
||||
|
@ -4,15 +4,9 @@
|
||||
|
||||
#include <fmt/base.h>
|
||||
#include <fmt/format.h>
|
||||
#include <string_view>
|
||||
|
||||
#ifdef __GNUC__
|
||||
[[noreturn]] inline __attribute__((always_inline)) void unreachable() { __builtin_unreachable(); }
|
||||
#elif defined(_MSC_VER)
|
||||
[[noreturn]] __forceinline void unreachable() { __assume(false); }
|
||||
#else
|
||||
#error Unknown compiler
|
||||
#endif
|
||||
#include <cstdlib>
|
||||
#include <string_view>
|
||||
|
||||
namespace aurora {
|
||||
void log_internal(AuroraLogLevel level, const char* module, const char* message, unsigned int len) noexcept;
|
||||
@ -50,7 +44,7 @@ struct Module {
|
||||
template <typename... T>
|
||||
[[noreturn]] void fatal(fmt::format_string<T...> fmt, T&&... args) noexcept {
|
||||
report(LOG_FATAL, fmt, std::forward<T>(args)...);
|
||||
unreachable();
|
||||
std::abort();
|
||||
}
|
||||
};
|
||||
} // namespace aurora
|
||||
|
@ -385,15 +385,12 @@ bool initialize(AuroraBackend auroraBackend) {
|
||||
g_adapter.GetLimits(&supportedLimits);
|
||||
const wgpu::Limits requiredLimits{
|
||||
// Use "best" supported alignments
|
||||
.maxTextureDimension1D = supportedLimits.maxTextureDimension1D == 0
|
||||
? WGPU_LIMIT_U32_UNDEFINED
|
||||
: supportedLimits.maxTextureDimension1D,
|
||||
.maxTextureDimension2D = supportedLimits.maxTextureDimension2D == 0
|
||||
? WGPU_LIMIT_U32_UNDEFINED
|
||||
: supportedLimits.maxTextureDimension2D,
|
||||
.maxTextureDimension3D = supportedLimits.maxTextureDimension3D == 0
|
||||
? WGPU_LIMIT_U32_UNDEFINED
|
||||
: supportedLimits.maxTextureDimension3D,
|
||||
.maxTextureDimension1D = supportedLimits.maxTextureDimension1D == 0 ? WGPU_LIMIT_U32_UNDEFINED
|
||||
: supportedLimits.maxTextureDimension1D,
|
||||
.maxTextureDimension2D = supportedLimits.maxTextureDimension2D == 0 ? WGPU_LIMIT_U32_UNDEFINED
|
||||
: supportedLimits.maxTextureDimension2D,
|
||||
.maxTextureDimension3D = supportedLimits.maxTextureDimension3D == 0 ? WGPU_LIMIT_U32_UNDEFINED
|
||||
: supportedLimits.maxTextureDimension3D,
|
||||
.minUniformBufferOffsetAlignment = supportedLimits.minUniformBufferOffsetAlignment == 0
|
||||
? WGPU_LIMIT_U32_UNDEFINED
|
||||
: supportedLimits.minUniformBufferOffsetAlignment,
|
||||
@ -401,6 +398,12 @@ bool initialize(AuroraBackend auroraBackend) {
|
||||
? WGPU_LIMIT_U32_UNDEFINED
|
||||
: supportedLimits.minStorageBufferOffsetAlignment,
|
||||
};
|
||||
Log.info(
|
||||
"Using limits\n maxTextureDimension1D: {}\n maxTextureDimension2D: {}\n maxTextureDimension3D: {}\n "
|
||||
"minUniformBufferOffsetAlignment: {}\n minStorageBufferOffsetAlignment: {}",
|
||||
requiredLimits.maxTextureDimension1D, requiredLimits.maxTextureDimension2D,
|
||||
requiredLimits.maxTextureDimension3D, requiredLimits.minUniformBufferOffsetAlignment,
|
||||
requiredLimits.minStorageBufferOffsetAlignment);
|
||||
std::vector<wgpu::FeatureName> requiredFeatures;
|
||||
wgpu::SupportedFeatures supportedFeatures;
|
||||
g_adapter.GetFeatures(&supportedFeatures);
|
||||
@ -442,22 +445,20 @@ bool initialize(AuroraBackend auroraBackend) {
|
||||
});
|
||||
deviceDescriptor.SetUncapturedErrorCallback(
|
||||
[](const wgpu::Device& device, wgpu::ErrorType type, wgpu::StringView message) {
|
||||
FATAL("WebGPU error {}: {}", static_cast<int>(type), message);
|
||||
});
|
||||
deviceDescriptor.SetDeviceLostCallback(
|
||||
wgpu::CallbackMode::AllowSpontaneous,
|
||||
[](const wgpu::Device& device, wgpu::DeviceLostReason reason, wgpu::StringView message) {
|
||||
Log.warn("Device lost: {}", message);
|
||||
});
|
||||
const auto future = g_adapter.RequestDevice(
|
||||
&deviceDescriptor, wgpu::CallbackMode::WaitAnyOnly,
|
||||
[](wgpu::RequestDeviceStatus status, wgpu::Device device, wgpu::StringView message) {
|
||||
if (status == wgpu::RequestDeviceStatus::Success) {
|
||||
g_device = std::move(device);
|
||||
} else {
|
||||
Log.warn("Device request failed: {}", message);
|
||||
}
|
||||
FATAL("WebGPU error {}: {}", underlying(type), message);
|
||||
});
|
||||
deviceDescriptor.SetDeviceLostCallback(wgpu::CallbackMode::AllowSpontaneous,
|
||||
[](const wgpu::Device& device, wgpu::DeviceLostReason reason,
|
||||
wgpu::StringView message) { Log.warn("Device lost: {}", message); });
|
||||
const auto future =
|
||||
g_adapter.RequestDevice(&deviceDescriptor, wgpu::CallbackMode::WaitAnyOnly,
|
||||
[](wgpu::RequestDeviceStatus status, wgpu::Device device, wgpu::StringView message) {
|
||||
if (status == wgpu::RequestDeviceStatus::Success) {
|
||||
g_device = std::move(device);
|
||||
} else {
|
||||
Log.warn("Device request failed: {}", message);
|
||||
}
|
||||
});
|
||||
const auto status = g_instance.WaitAny(future, 5000000000);
|
||||
if (status != wgpu::WaitStatus::Success) {
|
||||
Log.error("Failed to create device: {}", magic_enum::enum_name(status));
|
||||
|
Loading…
x
Reference in New Issue
Block a user