Merge GXBegin/GXEnd draws if GX state unchanged

This commit is contained in:
Luke Street 2022-07-28 19:48:02 -04:00
parent a9cf9271c4
commit 0da998450b
6 changed files with 90 additions and 39 deletions

View File

@ -134,9 +134,9 @@ void GXLoadLightObjImm(GXLightObj* light_, GXLightID id) {
auto* light = reinterpret_cast<const GXLightObj_*>(light_); auto* light = reinterpret_cast<const GXLightObj_*>(light_);
realLight.pos = {light->px, light->py, light->pz}; realLight.pos = {light->px, light->py, light->pz};
realLight.dir = {light->nx, light->ny, light->nz}; realLight.dir = {light->nx, light->ny, light->nz};
realLight.color = from_gx_color(light->color);
realLight.cosAtt = {light->a0, light->a1, light->a2}; realLight.cosAtt = {light->a0, light->a1, light->a2};
realLight.distAtt = {light->k0, light->k1, light->k2}; realLight.distAtt = {light->k0, light->k1, light->k2};
realLight.color = from_gx_color(light->color);
update_gx_state(g_gxState.lights[idx], realLight); update_gx_state(g_gxState.lights[idx], realLight);
} }

View File

@ -109,7 +109,7 @@ void GXLoadTexObj(GXTexObj* obj_, GXTexMapID id) {
obj->dataInvalidated = false; obj->dataInvalidated = false;
} }
g_gxState.textures[id] = {*obj}; g_gxState.textures[id] = {*obj};
// TODO stateDirty? g_gxState.stateDirty = true; // TODO only if changed?
} }
u32 GXGetTexBufferSize(u16 width, u16 height, u32 fmt, GXBool mips, u8 maxLod) { u32 GXGetTexBufferSize(u16 width, u16 height, u32 fmt, GXBool mips, u8 maxLod) {

View File

@ -21,13 +21,15 @@ static inline GXAttr next_attr(size_t begin) {
struct SStreamState { struct SStreamState {
GXPrimitive primitive; GXPrimitive primitive;
u16 vertexCount = 0; u16 vertexCount = 0;
u16 vertexStart = 0;
aurora::ByteBuffer vertexBuffer; aurora::ByteBuffer vertexBuffer;
std::vector<u16> indices; std::vector<u16> indices;
#ifndef NDEBUG #ifndef NDEBUG
GXAttr nextAttr; GXAttr nextAttr;
#endif #endif
explicit SStreamState(GXPrimitive primitive, u16 numVerts, u16 vertexSize) noexcept : primitive(primitive) { explicit SStreamState(GXPrimitive primitive, u16 numVerts, u16 vertexSize, u16 vertexStart) noexcept
: primitive(primitive), vertexStart(vertexStart) {
vertexBuffer.reserve_extra(size_t(numVerts) * vertexSize); vertexBuffer.reserve_extra(size_t(numVerts) * vertexSize);
if (numVerts > 3 && (primitive == GX_TRIANGLEFAN || primitive == GX_TRIANGLESTRIP)) { if (numVerts > 3 && (primitive == GX_TRIANGLEFAN || primitive == GX_TRIANGLESTRIP)) {
indices.reserve((u32(numVerts) - 3) * 3 + 3); indices.reserve((u32(numVerts) - 3) * 3 + 3);
@ -43,6 +45,7 @@ struct SStreamState {
}; };
static std::optional<SStreamState> sStreamState; static std::optional<SStreamState> sStreamState;
static u16 lastVertexStart = 0;
void GXBegin(GXPrimitive primitive, GXVtxFmt vtxFmt, u16 nVerts) { void GXBegin(GXPrimitive primitive, GXVtxFmt vtxFmt, u16 nVerts) {
#ifndef NDEBUG #ifndef NDEBUG
@ -73,7 +76,7 @@ void GXBegin(GXPrimitive primitive, GXVtxFmt vtxFmt, u16 nVerts) {
Log.report(LOG_FATAL, FMT_STRING("no vtx attributes enabled?")); Log.report(LOG_FATAL, FMT_STRING("no vtx attributes enabled?"));
unreachable(); unreachable();
} }
sStreamState.emplace(primitive, nVerts, vertexSize); sStreamState.emplace(primitive, nVerts, vertexSize, g_gxState.stateDirty ? 0 : lastVertexStart);
} }
static inline void check_attr_order(GXAttr attr) noexcept { static inline void check_attr_order(GXAttr attr) noexcept {
@ -96,26 +99,27 @@ void GXPosition3f32(float x, float y, float z) {
state.vertexBuffer.append(&x, sizeof(float)); state.vertexBuffer.append(&x, sizeof(float));
state.vertexBuffer.append(&y, sizeof(float)); state.vertexBuffer.append(&y, sizeof(float));
state.vertexBuffer.append(&z, sizeof(float)); state.vertexBuffer.append(&z, sizeof(float));
auto curVertex = state.vertexStart + state.vertexCount;
if (state.primitive == GX_TRIANGLES || state.vertexCount < 3) { if (state.primitive == GX_TRIANGLES || state.vertexCount < 3) {
// pass // pass
} else if (state.primitive == GX_TRIANGLEFAN) { } else if (state.primitive == GX_TRIANGLEFAN) {
state.indices.push_back(0); state.indices.push_back(state.vertexStart);
state.indices.push_back(state.vertexCount - 1); state.indices.push_back(curVertex - 1);
} else if (state.primitive == GX_TRIANGLESTRIP) { } else if (state.primitive == GX_TRIANGLESTRIP) {
if ((state.vertexCount & 1) == 0) { if ((state.vertexCount & 1) == 0) {
state.indices.push_back(state.vertexCount - 2); state.indices.push_back(curVertex - 2);
state.indices.push_back(state.vertexCount - 1); state.indices.push_back(curVertex - 1);
} else { } else {
state.indices.push_back(state.vertexCount - 1); state.indices.push_back(curVertex - 1);
state.indices.push_back(state.vertexCount - 2); state.indices.push_back(curVertex - 2);
} }
} else if (state.primitive == GX_QUADS) { } else if (state.primitive == GX_QUADS) {
if ((state.vertexCount & 3) == 3) { if ((state.vertexCount & 3) == 3) {
state.indices.push_back(state.vertexCount - 3); state.indices.push_back(curVertex - 3);
state.indices.push_back(state.vertexCount - 1); state.indices.push_back(curVertex - 1);
} }
} }
state.indices.push_back(state.vertexCount); state.indices.push_back(curVertex);
++state.vertexCount; ++state.vertexCount;
} }
@ -171,6 +175,7 @@ void GXEnd() {
} }
const auto vertRange = aurora::gfx::push_verts(sStreamState->vertexBuffer.data(), sStreamState->vertexBuffer.size()); const auto vertRange = aurora::gfx::push_verts(sStreamState->vertexBuffer.data(), sStreamState->vertexBuffer.size());
const auto indexRange = aurora::gfx::push_indices(aurora::ArrayRef{sStreamState->indices}); const auto indexRange = aurora::gfx::push_indices(aurora::ArrayRef{sStreamState->indices});
if (g_gxState.stateDirty) {
aurora::gfx::stream::PipelineConfig config{}; aurora::gfx::stream::PipelineConfig config{};
populate_pipeline_config(config, GX_TRIANGLES); populate_pipeline_config(config, GX_TRIANGLES);
const auto info = aurora::gfx::gx::build_shader_info(config.shaderConfig); const auto info = aurora::gfx::gx::build_shader_info(config.shaderConfig);
@ -184,5 +189,13 @@ void GXEnd() {
.bindGroups = aurora::gfx::gx::build_bind_groups(info, config.shaderConfig, {}), .bindGroups = aurora::gfx::gx::build_bind_groups(info, config.shaderConfig, {}),
.dstAlpha = g_gxState.dstAlpha, .dstAlpha = g_gxState.dstAlpha,
}); });
} else {
aurora::gfx::merge_draw_command(aurora::gfx::stream::DrawData{
.vertRange = vertRange,
.indexRange = indexRange,
.indexCount = static_cast<uint32_t>(sStreamState->indices.size()),
});
}
lastVertexStart = sStreamState->vertexStart + sStreamState->vertexCount;
sStreamState.reset(); sStreamState.reset();
} }

View File

@ -122,19 +122,25 @@ static ByteBuffer g_verts;
static ByteBuffer g_uniforms; static ByteBuffer g_uniforms;
static ByteBuffer g_indices; static ByteBuffer g_indices;
static ByteBuffer g_storage; static ByteBuffer g_storage;
static ByteBuffer g_staticStorage;
static ByteBuffer g_textureUpload; static ByteBuffer g_textureUpload;
WGPUBuffer g_vertexBuffer; WGPUBuffer g_vertexBuffer;
WGPUBuffer g_uniformBuffer; WGPUBuffer g_uniformBuffer;
WGPUBuffer g_indexBuffer; WGPUBuffer g_indexBuffer;
WGPUBuffer g_storageBuffer; WGPUBuffer g_storageBuffer;
size_t g_staticStorageLastSize = 0;
static std::array<WGPUBuffer, 3> g_stagingBuffers; static std::array<WGPUBuffer, 3> g_stagingBuffers;
static WGPUSupportedLimits g_cachedLimits; static WGPUSupportedLimits g_cachedLimits;
static ShaderState g_state; static ShaderState g_state;
static PipelineRef g_currentPipeline; static PipelineRef g_currentPipeline;
// for imgui debug
size_t g_drawCallCount;
size_t g_mergedDrawCallCount;
size_t g_lastVertSize;
size_t g_lastUniformSize;
size_t g_lastIndexSize;
size_t g_lastStorageSize;
using CommandList = std::vector<Command>; using CommandList = std::vector<Command>;
struct ClipRect { struct ClipRect {
int32_t x; int32_t x;
@ -217,8 +223,25 @@ static inline void push_command(CommandType type, const Command::Data& data) {
.data = data, .data = data,
}); });
} }
static inline Command& get_last_draw_command(ShaderType type) {
if (g_currentRenderPass == UINT32_MAX) {
Log.report(LOG_FATAL, FMT_STRING("No last command"));
unreachable();
}
auto& last = g_renderPasses[g_currentRenderPass].commands.back();
if (last.type != CommandType::Draw || last.data.draw.type != type) {
Log.report(LOG_FATAL, FMT_STRING("Last command invalid: {} {}, expected {} {}"), magic_enum::enum_name(last.type),
magic_enum::enum_name(last.data.draw.type), magic_enum::enum_name(CommandType::Draw),
magic_enum::enum_name(type));
unreachable();
}
return last;
}
static void push_draw_command(ShaderDrawCommand data) { push_command(CommandType::Draw, Command::Data{.draw = data}); } static void push_draw_command(ShaderDrawCommand data) {
push_command(CommandType::Draw, Command::Data{.draw = data});
++g_drawCallCount;
}
static Command::Data::SetViewportCommand g_cachedViewport; static Command::Data::SetViewportCommand g_cachedViewport;
void set_viewport(float left, float top, float width, float height, float znear, float zfar) noexcept { void set_viewport(float left, float top, float width, float height, float znear, float zfar) noexcept {
@ -272,6 +295,22 @@ void push_draw_command(stream::DrawData data) {
push_draw_command(ShaderDrawCommand{.type = ShaderType::Stream, .stream = data}); push_draw_command(ShaderDrawCommand{.type = ShaderType::Stream, .stream = data});
} }
template <> template <>
void merge_draw_command(stream::DrawData data) {
auto& last = get_last_draw_command(ShaderType::Stream).data.draw.stream;
if (last.vertRange.offset + last.vertRange.size != data.vertRange.offset) {
Log.report(LOG_FATAL, FMT_STRING("Invalid merge range: {} -> {}"), last.vertRange.offset + last.vertRange.size,
data.vertRange.offset);
}
if (last.indexRange.offset + last.indexRange.size != data.indexRange.offset) {
Log.report(LOG_FATAL, FMT_STRING("Invalid merge range: {} -> {}"), last.indexRange.offset + last.indexRange.size,
data.indexRange.offset);
}
last.vertRange.size += data.vertRange.size;
last.indexRange.size += data.indexRange.size;
last.indexCount += data.indexCount;
++g_mergedDrawCallCount;
}
template <>
PipelineRef pipeline_ref(stream::PipelineConfig config) { PipelineRef pipeline_ref(stream::PipelineConfig config) {
return find_pipeline(ShaderType::Stream, config, [=]() { return create_pipeline(g_state.stream, config); }); return find_pipeline(ShaderType::Stream, config, [=]() { return create_pipeline(g_state.stream, config); });
} }
@ -515,6 +554,9 @@ void begin_frame() {
mapBuffer(g_storage, StorageBufferSize); mapBuffer(g_storage, StorageBufferSize);
mapBuffer(g_textureUpload, TextureUploadSize); mapBuffer(g_textureUpload, TextureUploadSize);
g_drawCallCount = 0;
g_mergedDrawCallCount = 0;
g_renderPasses.emplace_back(); g_renderPasses.emplace_back();
g_renderPasses[0].clearColor = gx::g_gxState.clearColor; g_renderPasses[0].clearColor = gx::g_gxState.clearColor;
g_currentRenderPass = 0; g_currentRenderPass = 0;
@ -522,19 +564,13 @@ void begin_frame() {
// push_command(CommandType::SetScissor, Command::Data{.setScissor = g_cachedScissor}); // push_command(CommandType::SetScissor, Command::Data{.setScissor = g_cachedScissor});
} }
// for imgui debug
size_t g_lastVertSize;
size_t g_lastUniformSize;
size_t g_lastIndexSize;
size_t g_lastStorageSize;
void end_frame(WGPUCommandEncoder cmd) { void end_frame(WGPUCommandEncoder cmd) {
uint64_t bufferOffset = 0; uint64_t bufferOffset = 0;
const auto writeBuffer = [&](ByteBuffer& buf, WGPUBuffer& out, uint64_t size, std::string_view label) { const auto writeBuffer = [&](ByteBuffer& buf, WGPUBuffer& out, uint64_t size, std::string_view label) {
const auto writeSize = buf.size(); // Only need to copy this many bytes const auto writeSize = buf.size(); // Only need to copy this many bytes
if (writeSize > 0) { if (writeSize > 0) {
wgpuCommandEncoderCopyBufferToBuffer(cmd, g_stagingBuffers[currentStagingBuffer], bufferOffset, out, 0, wgpuCommandEncoderCopyBufferToBuffer(cmd, g_stagingBuffers[currentStagingBuffer], bufferOffset, out, 0,
writeSize); ALIGN(writeSize, 4));
buf.clear(); buf.clear();
} }
bufferOffset += size; bufferOffset += size;
@ -736,8 +772,8 @@ static inline Range map(ByteBuffer& target, size_t length, size_t alignment) {
target.append_zeroes(length + padding); target.append_zeroes(length + padding);
return {static_cast<uint32_t>(begin), static_cast<uint32_t>(length + padding)}; return {static_cast<uint32_t>(begin), static_cast<uint32_t>(length + padding)};
} }
Range push_verts(const uint8_t* data, size_t length) { return push(g_verts, data, length, 4); } Range push_verts(const uint8_t* data, size_t length) { return push(g_verts, data, length, 0); }
Range push_indices(const uint8_t* data, size_t length) { return push(g_indices, data, length, 4); } Range push_indices(const uint8_t* data, size_t length) { return push(g_indices, data, length, 0); }
Range push_uniform(const uint8_t* data, size_t length) { Range push_uniform(const uint8_t* data, size_t length) {
return push(g_uniforms, data, length, g_cachedLimits.limits.minUniformBufferOffsetAlignment); return push(g_uniforms, data, length, g_cachedLimits.limits.minUniformBufferOffsetAlignment);
} }

View File

@ -127,7 +127,6 @@ extern WGPUBuffer g_vertexBuffer;
extern WGPUBuffer g_uniformBuffer; extern WGPUBuffer g_uniformBuffer;
extern WGPUBuffer g_indexBuffer; extern WGPUBuffer g_indexBuffer;
extern WGPUBuffer g_storageBuffer; extern WGPUBuffer g_storageBuffer;
extern size_t g_staticStorageLastSize;
using BindGroupRef = HashType; using BindGroupRef = HashType;
using PipelineRef = HashType; using PipelineRef = HashType;
@ -188,6 +187,8 @@ template <typename State>
const State& get_state(); const State& get_state();
template <typename DrawData> template <typename DrawData>
void push_draw_command(DrawData data); void push_draw_command(DrawData data);
template <typename DrawData>
void merge_draw_command(DrawData data);
template <typename PipelineConfig> template <typename PipelineConfig>
PipelineRef pipeline_ref(PipelineConfig config); PipelineRef pipeline_ref(PipelineConfig config);

View File

@ -473,6 +473,7 @@ Range build_uniform(const ShaderInfo& info) noexcept {
} }
buf.append(&tex.texObj.lodBias, 4); buf.append(&tex.texObj.lodBias, 4);
} }
g_gxState.stateDirty = false;
return range; return range;
} }