Merge GXBegin/GXEnd draws if GX state unchanged

This commit is contained in:
Luke Street 2022-07-28 19:48:02 -04:00
parent a9cf9271c4
commit 0da998450b
6 changed files with 90 additions and 39 deletions

View File

@ -134,9 +134,9 @@ void GXLoadLightObjImm(GXLightObj* light_, GXLightID id) {
auto* light = reinterpret_cast<const GXLightObj_*>(light_);
realLight.pos = {light->px, light->py, light->pz};
realLight.dir = {light->nx, light->ny, light->nz};
realLight.color = from_gx_color(light->color);
realLight.cosAtt = {light->a0, light->a1, light->a2};
realLight.distAtt = {light->k0, light->k1, light->k2};
realLight.color = from_gx_color(light->color);
update_gx_state(g_gxState.lights[idx], realLight);
}

View File

@ -109,7 +109,7 @@ void GXLoadTexObj(GXTexObj* obj_, GXTexMapID id) {
obj->dataInvalidated = false;
}
g_gxState.textures[id] = {*obj};
// TODO stateDirty?
g_gxState.stateDirty = true; // TODO only if changed?
}
u32 GXGetTexBufferSize(u16 width, u16 height, u32 fmt, GXBool mips, u8 maxLod) {

View File

@ -21,13 +21,15 @@ static inline GXAttr next_attr(size_t begin) {
struct SStreamState {
GXPrimitive primitive;
u16 vertexCount = 0;
u16 vertexStart = 0;
aurora::ByteBuffer vertexBuffer;
std::vector<u16> indices;
#ifndef NDEBUG
GXAttr nextAttr;
#endif
explicit SStreamState(GXPrimitive primitive, u16 numVerts, u16 vertexSize) noexcept : primitive(primitive) {
explicit SStreamState(GXPrimitive primitive, u16 numVerts, u16 vertexSize, u16 vertexStart) noexcept
: primitive(primitive), vertexStart(vertexStart) {
vertexBuffer.reserve_extra(size_t(numVerts) * vertexSize);
if (numVerts > 3 && (primitive == GX_TRIANGLEFAN || primitive == GX_TRIANGLESTRIP)) {
indices.reserve((u32(numVerts) - 3) * 3 + 3);
@ -43,6 +45,7 @@ struct SStreamState {
};
static std::optional<SStreamState> sStreamState;
static u16 lastVertexStart = 0;
void GXBegin(GXPrimitive primitive, GXVtxFmt vtxFmt, u16 nVerts) {
#ifndef NDEBUG
@ -73,7 +76,7 @@ void GXBegin(GXPrimitive primitive, GXVtxFmt vtxFmt, u16 nVerts) {
Log.report(LOG_FATAL, FMT_STRING("no vtx attributes enabled?"));
unreachable();
}
sStreamState.emplace(primitive, nVerts, vertexSize);
sStreamState.emplace(primitive, nVerts, vertexSize, g_gxState.stateDirty ? 0 : lastVertexStart);
}
static inline void check_attr_order(GXAttr attr) noexcept {
@ -96,26 +99,27 @@ void GXPosition3f32(float x, float y, float z) {
state.vertexBuffer.append(&x, sizeof(float));
state.vertexBuffer.append(&y, sizeof(float));
state.vertexBuffer.append(&z, sizeof(float));
auto curVertex = state.vertexStart + state.vertexCount;
if (state.primitive == GX_TRIANGLES || state.vertexCount < 3) {
// pass
} else if (state.primitive == GX_TRIANGLEFAN) {
state.indices.push_back(0);
state.indices.push_back(state.vertexCount - 1);
state.indices.push_back(state.vertexStart);
state.indices.push_back(curVertex - 1);
} else if (state.primitive == GX_TRIANGLESTRIP) {
if ((state.vertexCount & 1) == 0) {
state.indices.push_back(state.vertexCount - 2);
state.indices.push_back(state.vertexCount - 1);
state.indices.push_back(curVertex - 2);
state.indices.push_back(curVertex - 1);
} else {
state.indices.push_back(state.vertexCount - 1);
state.indices.push_back(state.vertexCount - 2);
state.indices.push_back(curVertex - 1);
state.indices.push_back(curVertex - 2);
}
} else if (state.primitive == GX_QUADS) {
if ((state.vertexCount & 3) == 3) {
state.indices.push_back(state.vertexCount - 3);
state.indices.push_back(state.vertexCount - 1);
state.indices.push_back(curVertex - 3);
state.indices.push_back(curVertex - 1);
}
}
state.indices.push_back(state.vertexCount);
state.indices.push_back(curVertex);
++state.vertexCount;
}
@ -171,18 +175,27 @@ void GXEnd() {
}
const auto vertRange = aurora::gfx::push_verts(sStreamState->vertexBuffer.data(), sStreamState->vertexBuffer.size());
const auto indexRange = aurora::gfx::push_indices(aurora::ArrayRef{sStreamState->indices});
aurora::gfx::stream::PipelineConfig config{};
populate_pipeline_config(config, GX_TRIANGLES);
const auto info = aurora::gfx::gx::build_shader_info(config.shaderConfig);
const auto pipeline = aurora::gfx::pipeline_ref(config);
aurora::gfx::push_draw_command(aurora::gfx::stream::DrawData{
.pipeline = pipeline,
.vertRange = vertRange,
.uniformRange = build_uniform(info),
.indexRange = indexRange,
.indexCount = static_cast<uint32_t>(sStreamState->indices.size()),
.bindGroups = aurora::gfx::gx::build_bind_groups(info, config.shaderConfig, {}),
.dstAlpha = g_gxState.dstAlpha,
});
if (g_gxState.stateDirty) {
aurora::gfx::stream::PipelineConfig config{};
populate_pipeline_config(config, GX_TRIANGLES);
const auto info = aurora::gfx::gx::build_shader_info(config.shaderConfig);
const auto pipeline = aurora::gfx::pipeline_ref(config);
aurora::gfx::push_draw_command(aurora::gfx::stream::DrawData{
.pipeline = pipeline,
.vertRange = vertRange,
.uniformRange = build_uniform(info),
.indexRange = indexRange,
.indexCount = static_cast<uint32_t>(sStreamState->indices.size()),
.bindGroups = aurora::gfx::gx::build_bind_groups(info, config.shaderConfig, {}),
.dstAlpha = g_gxState.dstAlpha,
});
} else {
aurora::gfx::merge_draw_command(aurora::gfx::stream::DrawData{
.vertRange = vertRange,
.indexRange = indexRange,
.indexCount = static_cast<uint32_t>(sStreamState->indices.size()),
});
}
lastVertexStart = sStreamState->vertexStart + sStreamState->vertexCount;
sStreamState.reset();
}

View File

@ -122,19 +122,25 @@ static ByteBuffer g_verts;
static ByteBuffer g_uniforms;
static ByteBuffer g_indices;
static ByteBuffer g_storage;
static ByteBuffer g_staticStorage;
static ByteBuffer g_textureUpload;
WGPUBuffer g_vertexBuffer;
WGPUBuffer g_uniformBuffer;
WGPUBuffer g_indexBuffer;
WGPUBuffer g_storageBuffer;
size_t g_staticStorageLastSize = 0;
static std::array<WGPUBuffer, 3> g_stagingBuffers;
static WGPUSupportedLimits g_cachedLimits;
static ShaderState g_state;
static PipelineRef g_currentPipeline;
// for imgui debug
size_t g_drawCallCount;
size_t g_mergedDrawCallCount;
size_t g_lastVertSize;
size_t g_lastUniformSize;
size_t g_lastIndexSize;
size_t g_lastStorageSize;
using CommandList = std::vector<Command>;
struct ClipRect {
int32_t x;
@ -217,8 +223,25 @@ static inline void push_command(CommandType type, const Command::Data& data) {
.data = data,
});
}
static inline Command& get_last_draw_command(ShaderType type) {
if (g_currentRenderPass == UINT32_MAX) {
Log.report(LOG_FATAL, FMT_STRING("No last command"));
unreachable();
}
auto& last = g_renderPasses[g_currentRenderPass].commands.back();
if (last.type != CommandType::Draw || last.data.draw.type != type) {
Log.report(LOG_FATAL, FMT_STRING("Last command invalid: {} {}, expected {} {}"), magic_enum::enum_name(last.type),
magic_enum::enum_name(last.data.draw.type), magic_enum::enum_name(CommandType::Draw),
magic_enum::enum_name(type));
unreachable();
}
return last;
}
static void push_draw_command(ShaderDrawCommand data) { push_command(CommandType::Draw, Command::Data{.draw = data}); }
static void push_draw_command(ShaderDrawCommand data) {
push_command(CommandType::Draw, Command::Data{.draw = data});
++g_drawCallCount;
}
static Command::Data::SetViewportCommand g_cachedViewport;
void set_viewport(float left, float top, float width, float height, float znear, float zfar) noexcept {
@ -272,6 +295,22 @@ void push_draw_command(stream::DrawData data) {
push_draw_command(ShaderDrawCommand{.type = ShaderType::Stream, .stream = data});
}
template <>
void merge_draw_command(stream::DrawData data) {
auto& last = get_last_draw_command(ShaderType::Stream).data.draw.stream;
if (last.vertRange.offset + last.vertRange.size != data.vertRange.offset) {
Log.report(LOG_FATAL, FMT_STRING("Invalid merge range: {} -> {}"), last.vertRange.offset + last.vertRange.size,
data.vertRange.offset);
}
if (last.indexRange.offset + last.indexRange.size != data.indexRange.offset) {
Log.report(LOG_FATAL, FMT_STRING("Invalid merge range: {} -> {}"), last.indexRange.offset + last.indexRange.size,
data.indexRange.offset);
}
last.vertRange.size += data.vertRange.size;
last.indexRange.size += data.indexRange.size;
last.indexCount += data.indexCount;
++g_mergedDrawCallCount;
}
template <>
PipelineRef pipeline_ref(stream::PipelineConfig config) {
return find_pipeline(ShaderType::Stream, config, [=]() { return create_pipeline(g_state.stream, config); });
}
@ -515,6 +554,9 @@ void begin_frame() {
mapBuffer(g_storage, StorageBufferSize);
mapBuffer(g_textureUpload, TextureUploadSize);
g_drawCallCount = 0;
g_mergedDrawCallCount = 0;
g_renderPasses.emplace_back();
g_renderPasses[0].clearColor = gx::g_gxState.clearColor;
g_currentRenderPass = 0;
@ -522,19 +564,13 @@ void begin_frame() {
// push_command(CommandType::SetScissor, Command::Data{.setScissor = g_cachedScissor});
}
// for imgui debug
size_t g_lastVertSize;
size_t g_lastUniformSize;
size_t g_lastIndexSize;
size_t g_lastStorageSize;
void end_frame(WGPUCommandEncoder cmd) {
uint64_t bufferOffset = 0;
const auto writeBuffer = [&](ByteBuffer& buf, WGPUBuffer& out, uint64_t size, std::string_view label) {
const auto writeSize = buf.size(); // Only need to copy this many bytes
if (writeSize > 0) {
wgpuCommandEncoderCopyBufferToBuffer(cmd, g_stagingBuffers[currentStagingBuffer], bufferOffset, out, 0,
writeSize);
ALIGN(writeSize, 4));
buf.clear();
}
bufferOffset += size;
@ -736,8 +772,8 @@ static inline Range map(ByteBuffer& target, size_t length, size_t alignment) {
target.append_zeroes(length + padding);
return {static_cast<uint32_t>(begin), static_cast<uint32_t>(length + padding)};
}
Range push_verts(const uint8_t* data, size_t length) { return push(g_verts, data, length, 4); }
Range push_indices(const uint8_t* data, size_t length) { return push(g_indices, data, length, 4); }
Range push_verts(const uint8_t* data, size_t length) { return push(g_verts, data, length, 0); }
Range push_indices(const uint8_t* data, size_t length) { return push(g_indices, data, length, 0); }
Range push_uniform(const uint8_t* data, size_t length) {
return push(g_uniforms, data, length, g_cachedLimits.limits.minUniformBufferOffsetAlignment);
}

View File

@ -127,7 +127,6 @@ extern WGPUBuffer g_vertexBuffer;
extern WGPUBuffer g_uniformBuffer;
extern WGPUBuffer g_indexBuffer;
extern WGPUBuffer g_storageBuffer;
extern size_t g_staticStorageLastSize;
using BindGroupRef = HashType;
using PipelineRef = HashType;
@ -188,6 +187,8 @@ template <typename State>
const State& get_state();
template <typename DrawData>
void push_draw_command(DrawData data);
template <typename DrawData>
void merge_draw_command(DrawData data);
template <typename PipelineConfig>
PipelineRef pipeline_ref(PipelineConfig config);

View File

@ -473,6 +473,7 @@ Range build_uniform(const ShaderInfo& info) noexcept {
}
buf.append(&tex.texObj.lodBias, 4);
}
g_gxState.stateDirty = false;
return range;
}