metaforce/aurora/lib/gfx/model/shader.cpp

292 lines
9.6 KiB
C++

#include "shader.hpp"
#include "../../gpu.hpp"
#include "../common.hpp"
#include <absl/container/flat_hash_map.h>
#include <aurora/model.hpp>
namespace aurora::gfx::model {
static logvisor::Module Log("aurora::gfx::model");
static const std::vector<zeus::CVector3f>* vtxData;
static const std::vector<zeus::CVector3f>* nrmData;
static const std::vector<Vec2<float>>* tex0TcData;
static const std::vector<Vec2<float>>* tcData;
static std::optional<Range> cachedVtxRange;
static std::optional<Range> cachedNrmRange;
static std::optional<Range> cachedPackedTcRange;
static std::optional<Range> cachedTcRange;
static inline void read_vert(ByteBuffer& out, const u8* data) noexcept {
size_t offset = 0;
for (const auto& type : gx::g_gxState.vtxDesc) {
if (type == GX::INDEX8) {
const auto v = static_cast<s16>(data[offset]); // expand to s16
out.append(&v, 2);
++offset;
} else if (type == GX::INDEX16) {
const s16 v = metaforce::SBig(*reinterpret_cast<const s16*>(data + offset));
out.append(&v, 2);
offset += 2;
}
}
constexpr size_t align = 4; // Sint16x2
if (offset % align != 0) {
out.append_zeroes(align - (offset % align));
}
}
static absl::flat_hash_map<XXH64_hash_t, std::pair<ByteBuffer, ByteBuffer>> sCachedDisplayLists;
void queue_surface(const u8* dlStart, u32 dlSize) noexcept {
const auto hash = xxh3_hash_s(dlStart, dlSize, 0);
Range vertRange, idxRange;
u32 numIndices = 0;
auto it = sCachedDisplayLists.find(hash);
if (it != sCachedDisplayLists.end()) {
const auto& [verts, indices] = it->second;
numIndices = indices.size() / 2;
vertRange = push_verts(verts.data(), verts.size());
idxRange = push_indices(indices.data(), indices.size());
} else {
ByteBuffer vtxBuf;
ByteBuffer idxBuf;
u8 inVtxSize = 0;
u8 outVtxSize = 0;
for (const auto& type : gx::g_gxState.vtxDesc) {
if (type == GX::NONE || type == GX::DIRECT) {
continue;
}
if (type == GX::INDEX8) {
++inVtxSize;
outVtxSize += 2;
} else if (type == GX::INDEX16) {
inVtxSize += 2;
outVtxSize += 2;
} else {
Log.report(logvisor::Fatal, FMT_STRING("unexpected vtx type {}"), type);
unreachable();
}
}
outVtxSize = ALIGN(outVtxSize, 4);
u16 vtxStart = 0;
size_t offset = 0;
while (offset < dlSize - 6) {
const auto header = dlStart[offset];
const auto primitive = static_cast<GX::Primitive>(header & 0xF8);
const auto dlVtxCount = metaforce::SBig(*reinterpret_cast<const u16*>(dlStart + offset + 1));
offset += 3;
if (primitive == 0) {
break;
}
if (primitive != GX::TRIANGLES && primitive != GX::TRIANGLESTRIP && primitive != GX::TRIANGLEFAN) {
Log.report(logvisor::Fatal, FMT_STRING("queue_surface: unsupported primitive type {}"), primitive);
unreachable();
}
vtxBuf.reserve_extra(dlVtxCount * outVtxSize);
if (dlVtxCount > 3 && (primitive == GX::TRIANGLEFAN || primitive == GX::TRIANGLESTRIP)) {
idxBuf.reserve_extra(((u32(dlVtxCount) - 3) * 3 + 3) * 2);
} else {
idxBuf.reserve_extra(dlVtxCount * 2);
}
u16 curVert = vtxStart;
for (u16 v = 0; v < dlVtxCount; ++v) {
read_vert(vtxBuf, dlStart + offset);
offset += inVtxSize;
if (primitive == GX::TRIANGLES || v < 3) {
idxBuf.append(&curVert, 2);
++numIndices;
} else if (primitive == GX::TRIANGLEFAN) {
const std::array<u16, 3> idxs{
vtxStart,
u16(curVert - 1),
curVert,
};
idxBuf.append(idxs.data(), 6);
numIndices += 3;
} else if (primitive == GX::TRIANGLESTRIP) {
if ((v & 1) == 0) {
const std::array<u16, 3> idxs{
u16(curVert - 2),
u16(curVert - 1),
curVert,
};
idxBuf.append(idxs.data(), 6);
} else {
const std::array<u16, 3> idxs{
u16(curVert - 1),
u16(curVert - 2),
curVert,
};
idxBuf.append(idxs.data(), 6);
}
numIndices += 3;
}
++curVert;
}
vtxStart += dlVtxCount;
}
vertRange = push_verts(vtxBuf.data(), vtxBuf.size());
idxRange = push_indices(idxBuf.data(), idxBuf.size());
sCachedDisplayLists.try_emplace(hash, std::move(vtxBuf), std::move(idxBuf));
}
Range sVtxRange, sNrmRange, sTcRange, sPackedTcRange;
if (cachedVtxRange) {
sVtxRange = *cachedVtxRange;
} else {
sVtxRange = push_storage(reinterpret_cast<const uint8_t*>(vtxData->data()), vtxData->size() * 16);
cachedVtxRange = sVtxRange;
}
if (cachedNrmRange) {
sNrmRange = *cachedNrmRange;
} else {
sNrmRange = push_storage(reinterpret_cast<const uint8_t*>(nrmData->data()), nrmData->size() * 16);
cachedNrmRange = sNrmRange;
}
if (cachedTcRange) {
sTcRange = *cachedTcRange;
} else {
sTcRange = push_storage(reinterpret_cast<const uint8_t*>(tcData->data()), tcData->size() * 8);
cachedTcRange = sTcRange;
}
if (cachedPackedTcRange) {
sPackedTcRange = *cachedPackedTcRange;
} else if (tcData == tex0TcData) {
sPackedTcRange = sTcRange;
} else {
sPackedTcRange = push_storage(reinterpret_cast<const uint8_t*>(tex0TcData->data()), tex0TcData->size() * 8);
cachedPackedTcRange = sPackedTcRange;
}
model::PipelineConfig config{};
populate_pipeline_config(config, GX::TRIANGLES);
const auto info = gx::build_shader_info(config.shaderConfig);
const gx::BindGroupRanges ranges{
.vtxDataRange = sVtxRange,
.nrmDataRange = sNrmRange,
.tcDataRange = sTcRange,
.packedTcDataRange = sPackedTcRange,
};
const auto bindGroups = gx::build_bind_groups(info, config.shaderConfig, ranges);
const auto pipeline = pipeline_ref(config);
push_draw_command(model::DrawData{
.pipeline = pipeline,
.vertRange = vertRange,
.idxRange = idxRange,
.dataRanges = ranges,
.uniformRange = build_uniform(info),
.indexCount = numIndices,
.bindGroups = bindGroups,
.dstAlpha = gx::g_gxState.dstAlpha,
});
}
State construct_state() { return {}; }
wgpu::RenderPipeline create_pipeline(const State& state, [[maybe_unused]] PipelineConfig config) {
const auto info = build_shader_info(config.shaderConfig); // TODO remove
const auto shader = build_shader(config.shaderConfig, info);
std::array<wgpu::VertexAttribute, gx::MaxVtxAttr> vtxAttrs{};
auto [num4xAttr, rem] = std::div(config.shaderConfig.indexedAttributeCount, 4);
u32 num2xAttr = 0;
if (rem > 2) {
++num4xAttr;
} else if (rem > 0) {
++num2xAttr;
}
u32 offset = 0;
for (u32 i = 0; i < num4xAttr; ++i) {
vtxAttrs[i] = {
.format = wgpu::VertexFormat::Sint16x4,
.offset = offset,
.shaderLocation = i,
};
offset += 8;
}
for (u32 i = 0; i < num2xAttr; ++i) {
const u32 idx = num4xAttr + i;
vtxAttrs[idx] = {
.format = wgpu::VertexFormat::Sint16x2,
.offset = offset,
.shaderLocation = idx,
};
offset += 4;
}
const std::array vtxBuffers{wgpu::VertexBufferLayout{
.arrayStride = offset,
.stepMode = wgpu::VertexStepMode::Vertex,
.attributeCount = num4xAttr + num2xAttr,
.attributes = vtxAttrs.data(),
}};
return build_pipeline(config, info, vtxBuffers, shader, "Model Pipeline");
}
void render(const State& state, const DrawData& data, const wgpu::RenderPassEncoder& pass) {
if (!bind_pipeline(data.pipeline, pass)) {
return;
}
const std::array offsets{
data.uniformRange.offset,
storage_offset(data.dataRanges.vtxDataRange),
storage_offset(data.dataRanges.nrmDataRange),
storage_offset(data.dataRanges.packedTcDataRange),
storage_offset(data.dataRanges.tcDataRange),
};
pass.SetBindGroup(0, find_bind_group(data.bindGroups.uniformBindGroup), offsets.size(), offsets.data());
if (data.bindGroups.samplerBindGroup && data.bindGroups.textureBindGroup) {
pass.SetBindGroup(1, find_bind_group(data.bindGroups.samplerBindGroup));
pass.SetBindGroup(2, find_bind_group(data.bindGroups.textureBindGroup));
}
pass.SetVertexBuffer(0, g_vertexBuffer, data.vertRange.offset, data.vertRange.size);
pass.SetIndexBuffer(g_indexBuffer, wgpu::IndexFormat::Uint16, data.idxRange.offset, data.idxRange.size);
if (data.dstAlpha != UINT32_MAX) {
const wgpu::Color color{0.f, 0.f, 0.f, data.dstAlpha / 255.f};
pass.SetBlendConstant(&color);
}
pass.DrawIndexed(data.indexCount);
}
} // namespace aurora::gfx::model
static absl::flat_hash_map<XXH64_hash_t, aurora::gfx::Range> sCachedRanges;
template <typename Vec>
static inline void cache_array(const void* data, Vec*& outPtr, std::optional<aurora::gfx::Range>& outRange, u8 stride) {
Vec* vecPtr = static_cast<Vec*>(data);
outPtr = vecPtr;
outRange.reset();
}
void GXSetArray(GX::Attr attr, const void* data, u8 stride) noexcept {
using namespace aurora::gfx::model;
switch (attr) {
case GX::VA_POS:
cache_array(data, vtxData, cachedVtxRange, stride);
break;
case GX::VA_NRM:
cache_array(data, nrmData, cachedNrmRange, stride);
break;
case GX::VA_TEX0:
cache_array(data, tex0TcData, cachedPackedTcRange, stride);
break;
case GX::VA_TEX1:
cache_array(data, tcData, cachedTcRange, stride);
break;
default:
Log.report(logvisor::Fatal, FMT_STRING("GXSetArray: invalid attr {}"), attr);
unreachable();
}
}
void GXCallDisplayList(const void* data, u32 nbytes) noexcept {
aurora::gfx::model::queue_surface(static_cast<const u8*>(data), nbytes);
}