SIMD refactor

This commit is contained in:
Jack Andersen 2018-12-07 15:31:11 -10:00
parent 1b073abb76
commit 54c466276b
13 changed files with 107 additions and 81 deletions

2
hecl/extern/athena vendored

@ -1 +1 @@
Subproject commit 0cdfd0ad9f14599a53bb3577ef6481807191616d Subproject commit e1b29fda7acf3a17a297a02a63a5f11e94eb2328

2
hecl/extern/boo vendored

@ -1 +1 @@
Subproject commit 54676aff916dfee736132bfd06cd4ad71ac45b81 Subproject commit 2c2c72bfd1e59815bfb90041974ef1e9f57325cb

View File

@ -35,7 +35,8 @@ private:
std::string EmitVec3(const atVec4f& vec) const std::string EmitVec3(const atVec4f& vec) const
{ {
return hecl::Format("vec3(%g,%g,%g)", vec.vec[0], vec.vec[1], vec.vec[2]); athena::simd_floats f(vec.simd);
return hecl::Format("vec3(%g,%g,%g)", f[0], f[1], f[2]);
} }
std::string EmitVec3(const std::string& a, const std::string& b, const std::string& c) const std::string EmitVec3(const std::string& a, const std::string& b, const std::string& c) const

View File

@ -431,17 +431,19 @@ struct GX final : IBackend
Color() = default; Color() = default;
Color& operator=(const atVec4f& vec) Color& operator=(const atVec4f& vec)
{ {
color[0] = uint8_t(std::min(std::max(vec.vec[0] * 255.f, 0.f), 255.f)); athena::simd_floats f(vec.simd);
color[1] = uint8_t(std::min(std::max(vec.vec[1] * 255.f, 0.f), 255.f)); color[0] = uint8_t(std::min(std::max(f[0] * 255.f, 0.f), 255.f));
color[2] = uint8_t(std::min(std::max(vec.vec[2] * 255.f, 0.f), 255.f)); color[1] = uint8_t(std::min(std::max(f[1] * 255.f, 0.f), 255.f));
color[3] = uint8_t(std::min(std::max(vec.vec[3] * 255.f, 0.f), 255.f)); color[2] = uint8_t(std::min(std::max(f[2] * 255.f, 0.f), 255.f));
color[3] = uint8_t(std::min(std::max(f[3] * 255.f, 0.f), 255.f));
return *this; return *this;
} }
Color& operator=(const atVec3f& vec) Color& operator=(const atVec3f& vec)
{ {
color[0] = uint8_t(std::min(std::max(vec.vec[0] * 255.f, 0.f), 255.f)); athena::simd_floats f(vec.simd);
color[1] = uint8_t(std::min(std::max(vec.vec[1] * 255.f, 0.f), 255.f)); color[0] = uint8_t(std::min(std::max(f[0] * 255.f, 0.f), 255.f));
color[2] = uint8_t(std::min(std::max(vec.vec[2] * 255.f, 0.f), 255.f)); color[1] = uint8_t(std::min(std::max(f[1] * 255.f, 0.f), 255.f));
color[2] = uint8_t(std::min(std::max(f[2] * 255.f, 0.f), 255.f));
color[3] = 0xff; color[3] = 0xff;
return *this; return *this;
} }
@ -456,10 +458,12 @@ struct GX final : IBackend
atVec4f toVec4f() const atVec4f toVec4f() const
{ {
atVec4f out; atVec4f out;
out.vec[0] = color[0] / 255.f; athena::simd_floats f;
out.vec[1] = color[1] / 255.f; f[0] = color[0] / 255.f;
out.vec[2] = color[2] / 255.f; f[1] = color[1] / 255.f;
out.vec[3] = color[3] / 255.f; f[2] = color[2] / 255.f;
f[3] = color[3] / 255.f;
out.simd.copy_from(f);
return out; return out;
} }
Color(const atVec4f& vec) {*this = vec;} Color(const atVec4f& vec) {*this = vec;}

View File

@ -31,7 +31,8 @@ private:
std::string EmitVec3(const atVec4f& vec) const std::string EmitVec3(const atVec4f& vec) const
{ {
return hecl::Format("float3(%g,%g,%g)", vec.vec[0], vec.vec[1], vec.vec[2]); athena::simd_floats f(vec.simd);
return hecl::Format("float3(%g,%g,%g)", f[0], f[1], f[2]);
} }
std::string EmitVec3(const std::string& a, const std::string& b, const std::string& c) const std::string EmitVec3(const std::string& a, const std::string& b, const std::string& c) const

View File

@ -32,7 +32,8 @@ private:
std::string EmitVec3(const atVec4f& vec) const std::string EmitVec3(const atVec4f& vec) const
{ {
return hecl::Format("float3(%g,%g,%g)", vec.vec[0], vec.vec[1], vec.vec[2]); athena::simd_floats f(vec.simd);
return hecl::Format("float3(%g,%g,%g)", f[0], f[1], f[2]);
} }
std::string EmitVec3(const std::string& a, const std::string& b, const std::string& c) const std::string EmitVec3(const std::string& a, const std::string& b, const std::string& c) const

View File

@ -104,7 +104,7 @@ unsigned GX::RecursiveTraceTexGen(const IR& ir, Diagnostics& diag, const IR::Ins
diag.reportBackendErr(inst.m_loc, "TexCoordGen UV(layerIdx) requires one argument"); diag.reportBackendErr(inst.m_loc, "TexCoordGen UV(layerIdx) requires one argument");
const IR::Instruction& idxInst = inst.getChildInst(ir, 0); const IR::Instruction& idxInst = inst.getChildInst(ir, 0);
auto& idxImm = idxInst.getImmVec(); auto& idxImm = idxInst.getImmVec();
return addTexCoordGen(diag, inst.m_loc, TexGenSrc(TG_TEX0 + unsigned(idxImm.vec[0])), mtx, normalize, pmtx); return addTexCoordGen(diag, inst.m_loc, TexGenSrc(TG_TEX0 + unsigned(idxImm.simd[0])), mtx, normalize, pmtx);
} }
else if (!tcgName.compare("Normal")) else if (!tcgName.compare("Normal"))
return addTexCoordGen(diag, inst.m_loc, TG_NRM, mtx, normalize, pmtx); return addTexCoordGen(diag, inst.m_loc, TG_NRM, mtx, normalize, pmtx);
@ -147,7 +147,7 @@ GX::TraceResult GX::RecursiveTraceColor(const IR& ir, Diagnostics& diag, const I
const IR::Instruction& mapInst = inst.getChildInst(ir, 0); const IR::Instruction& mapInst = inst.getChildInst(ir, 0);
auto& mapImm = mapInst.getImmVec(); auto& mapImm = mapInst.getImmVec();
newStage.m_texMapIdx = unsigned(mapImm.vec[0]); newStage.m_texMapIdx = unsigned(mapImm.simd[0]);
newStage.m_color[0] = swizzleAlpha ? CC_TEXA : CC_TEXC; newStage.m_color[0] = swizzleAlpha ? CC_TEXA : CC_TEXC;
const IR::Instruction& tcgInst = inst.getChildInst(ir, 1); const IR::Instruction& tcgInst = inst.getChildInst(ir, 1);
@ -158,7 +158,7 @@ GX::TraceResult GX::RecursiveTraceColor(const IR& ir, Diagnostics& diag, const I
else if (!name.compare("ColorReg")) else if (!name.compare("ColorReg"))
{ {
const IR::Instruction& idxInst = inst.getChildInst(ir, 0); const IR::Instruction& idxInst = inst.getChildInst(ir, 0);
unsigned idx = unsigned(idxInst.getImmVec().vec[0]); unsigned idx = unsigned(idxInst.getImmVec().simd[0]);
if (swizzleAlpha) if (swizzleAlpha)
m_aRegMask |= 1 << idx; m_aRegMask |= 1 << idx;
else else
@ -176,9 +176,9 @@ GX::TraceResult GX::RecursiveTraceColor(const IR& ir, Diagnostics& diag, const I
case IR::OpType::LoadImm: case IR::OpType::LoadImm:
{ {
const atVec4f& vec = inst.m_loadImm.m_immVec; const atVec4f& vec = inst.m_loadImm.m_immVec;
if (vec.vec[0] == 0.f && vec.vec[1] == 0.f && vec.vec[2] == 0.f) if (vec.simd[0] == 0.f && vec.simd[1] == 0.f && vec.simd[2] == 0.f)
return TraceResult(CC_ZERO); return TraceResult(CC_ZERO);
else if (vec.vec[0] == 1.f && vec.vec[1] == 1.f && vec.vec[2] == 1.f) else if (vec.simd[0] == 1.f && vec.simd[1] == 1.f && vec.simd[2] == 1.f)
return TraceResult(CC_ONE); return TraceResult(CC_ONE);
unsigned idx = addKColor(diag, inst.m_loc, vec); unsigned idx = addKColor(diag, inst.m_loc, vec);
return TraceResult(TevKColorSel(TEV_KCSEL_K0 + idx)); return TraceResult(TevKColorSel(TEV_KCSEL_K0 + idx));
@ -438,7 +438,7 @@ GX::TraceResult GX::RecursiveTraceAlpha(const IR& ir, Diagnostics& diag, const I
const IR::Instruction& mapInst = inst.getChildInst(ir, 0); const IR::Instruction& mapInst = inst.getChildInst(ir, 0);
const atVec4f& mapImm = mapInst.getImmVec(); const atVec4f& mapImm = mapInst.getImmVec();
unsigned mapIdx = unsigned(mapImm.vec[0]); unsigned mapIdx = unsigned(mapImm.simd[0]);
int foundStage = -1; int foundStage = -1;
for (int i=m_alphaTraceStage+1 ; i<int(m_tevCount) ; ++i) for (int i=m_alphaTraceStage+1 ; i<int(m_tevCount) ; ++i)
@ -473,7 +473,7 @@ GX::TraceResult GX::RecursiveTraceAlpha(const IR& ir, Diagnostics& diag, const I
else if (!name.compare("ColorReg")) else if (!name.compare("ColorReg"))
{ {
const IR::Instruction& idxInst = inst.getChildInst(ir, 0); const IR::Instruction& idxInst = inst.getChildInst(ir, 0);
unsigned idx = unsigned(idxInst.getImmVec().vec[0]); unsigned idx = unsigned(idxInst.getImmVec().simd[0]);
m_aRegMask |= 1 << idx; m_aRegMask |= 1 << idx;
return TraceResult(TevAlphaArg(CA_A0 + idx)); return TraceResult(TevAlphaArg(CA_A0 + idx));
} }
@ -488,11 +488,11 @@ GX::TraceResult GX::RecursiveTraceAlpha(const IR& ir, Diagnostics& diag, const I
case IR::OpType::LoadImm: case IR::OpType::LoadImm:
{ {
const atVec4f& vec = inst.m_loadImm.m_immVec; const atVec4f& vec = inst.m_loadImm.m_immVec;
if (vec.vec[0] == 0.f) if (vec.simd[0] == 0.f)
return TraceResult(CA_ZERO); return TraceResult(CA_ZERO);
else if (vec.vec[0] == 1.f) else if (vec.simd[0] == 1.f)
return TraceResult(TEV_KASEL_1); return TraceResult(TEV_KASEL_1);
unsigned idx = addKAlpha(diag, inst.m_loc, vec.vec[0]); unsigned idx = addKAlpha(diag, inst.m_loc, vec.simd[0]);
return TraceResult(TevKAlphaSel(TEV_KASEL_K0_A + idx)); return TraceResult(TevKAlphaSel(TEV_KASEL_K0_A + idx));
} }
case IR::OpType::Arithmetic: case IR::OpType::Arithmetic:

View File

@ -183,23 +183,23 @@ std::string Metal::makeVert(unsigned col, unsigned uv, unsigned w,
retval += " float4 objPos = float4(0.0,0.0,0.0,0.0);\n" retval += " float4 objPos = float4(0.0,0.0,0.0,0.0);\n"
" float4 objNorm = float4(0.0,0.0,0.0,0.0);\n"; " float4 objNorm = float4(0.0,0.0,0.0,0.0);\n";
for (size_t i=0 ; i<s ; ++i) for (size_t i=0 ; i<s ; ++i)
retval += hecl::Format(" objPos += (vu.mv[%" PRISize "] * float4(v.posIn, 1.0)) * v.weightIn%" PRISize "[%" PRISize "];\n" retval += hecl::Format(" objPos += (vu.objs[%" PRISize "] * float4(v.posIn, 1.0)) * v.weightIn%" PRISize "[%" PRISize "];\n"
" objNorm += (vu.mvInv[%" PRISize "] * float4(v.normIn, 1.0)) * v.weightIn%" PRISize "[%" PRISize "];\n", " objNorm += (vu.objsInv[%" PRISize "] * float4(v.normIn, 1.0)) * v.weightIn%" PRISize "[%" PRISize "];\n",
i, i/4, i%4, i, i/4, i%4); i, i/4, i%4, i, i/4, i%4);
retval += " objPos[3] = 1.0;\n" retval += " objPos[3] = 1.0;\n"
" objNorm = float4(normalize(objNorm.xyz), 0.0);\n" " objNorm = float4(normalize(objNorm.xyz), 0.0);\n"
" vtf.mvPos = mv * objPos;\n" " vtf.mvPos = vu.mv * objPos;\n"
" vtf.mvNorm = float4(normalize((mvInv * objNorm).xyz), 0.0);\n" " vtf.mvNorm = float4(normalize((vu.mvInv * objNorm).xyz), 0.0);\n"
" vtf.mvpPos = proj * vtf.mvPos;\n"; " vtf.mvpPos = vu.proj * vtf.mvPos;\n";
} }
else else
{ {
/* non-skinned */ /* non-skinned */
retval += " float4 objPos = float4(posIn, 1.0);\n" retval += " float4 objPos = float4(v.posIn, 1.0);\n"
" float4 objNorm = float4(normIn, 0.0);\n" " float4 objNorm = float4(v.normIn, 0.0);\n"
" vtf.mvPos = mv * objPos;\n" " vtf.mvPos = vu.mv * objPos;\n"
" vtf.mvNorm = mvInv * objNorm;\n" " vtf.mvNorm = vu.mvInv * objNorm;\n"
" vtf.mvpPos = proj * vtf.mvPos;\n"; " vtf.mvpPos = vu.proj * vtf.mvPos;\n";
} }
retval += " float4 tmpProj;\n"; retval += " float4 tmpProj;\n";

View File

@ -84,7 +84,7 @@ unsigned ProgrammableCommon::RecursiveTraceTexGen(const IR& ir, Diagnostics& dia
diag.reportBackendErr(inst.m_loc, "TexCoordGen UV(layerIdx) requires one argument"); diag.reportBackendErr(inst.m_loc, "TexCoordGen UV(layerIdx) requires one argument");
const IR::Instruction& idxInst = inst.getChildInst(ir, 0); const IR::Instruction& idxInst = inst.getChildInst(ir, 0);
auto& idxImm = idxInst.getImmVec(); auto& idxImm = idxInst.getImmVec();
return addTexCoordGen(TexGenSrc::UV, idxImm.vec[0], mtx, normalize); return addTexCoordGen(TexGenSrc::UV, int(idxImm.simd[0]), mtx, normalize);
} }
else if (!tcgName.compare("Normal")) else if (!tcgName.compare("Normal"))
return addTexCoordGen(TexGenSrc::Normal, -1, mtx, normalize); return addTexCoordGen(TexGenSrc::Normal, -1, mtx, normalize);
@ -123,7 +123,7 @@ std::string ProgrammableCommon::RecursiveTraceColor(const IR& ir, Diagnostics& d
const IR::Instruction& mapInst = inst.getChildInst(ir, 0); const IR::Instruction& mapInst = inst.getChildInst(ir, 0);
auto& mapImm = mapInst.getImmVec(); auto& mapImm = mapInst.getImmVec();
unsigned mapIdx = unsigned(mapImm.vec[0]); unsigned mapIdx = unsigned(mapImm.simd[0]);
const IR::Instruction& tcgInst = inst.getChildInst(ir, 1); const IR::Instruction& tcgInst = inst.getChildInst(ir, 1);
unsigned texGenIdx = RecursiveTraceTexGen(ir, diag, tcgInst, -1, normalize); unsigned texGenIdx = RecursiveTraceTexGen(ir, diag, tcgInst, -1, normalize);
@ -134,7 +134,7 @@ std::string ProgrammableCommon::RecursiveTraceColor(const IR& ir, Diagnostics& d
else if (!name.compare("ColorReg")) else if (!name.compare("ColorReg"))
{ {
const IR::Instruction& idxInst = inst.getChildInst(ir, 0); const IR::Instruction& idxInst = inst.getChildInst(ir, 0);
unsigned idx = unsigned(idxInst.getImmVec().vec[0]); unsigned idx = unsigned(idxInst.getImmVec().simd[0]);
return toSwizzle ? EmitColorRegUseRaw(idx) : EmitColorRegUseRGB(idx); return toSwizzle ? EmitColorRegUseRaw(idx) : EmitColorRegUseRGB(idx);
} }
else if (!name.compare("Lighting")) else if (!name.compare("Lighting"))
@ -221,7 +221,7 @@ std::string ProgrammableCommon::RecursiveTraceAlpha(const IR& ir, Diagnostics& d
const IR::Instruction& mapInst = inst.getChildInst(ir, 0); const IR::Instruction& mapInst = inst.getChildInst(ir, 0);
const atVec4f& mapImm = mapInst.getImmVec(); const atVec4f& mapImm = mapInst.getImmVec();
unsigned mapIdx = unsigned(mapImm.vec[0]); unsigned mapIdx = unsigned(mapImm.simd[0]);
const IR::Instruction& tcgInst = inst.getChildInst(ir, 1); const IR::Instruction& tcgInst = inst.getChildInst(ir, 1);
unsigned texGenIdx = RecursiveTraceTexGen(ir, diag, tcgInst, -1, normalize); unsigned texGenIdx = RecursiveTraceTexGen(ir, diag, tcgInst, -1, normalize);
@ -232,7 +232,7 @@ std::string ProgrammableCommon::RecursiveTraceAlpha(const IR& ir, Diagnostics& d
else if (!name.compare("ColorReg")) else if (!name.compare("ColorReg"))
{ {
const IR::Instruction& idxInst = inst.getChildInst(ir, 0); const IR::Instruction& idxInst = inst.getChildInst(ir, 0);
unsigned idx = unsigned(idxInst.getImmVec().vec[0]); unsigned idx = unsigned(idxInst.getImmVec().simd[0]);
return toSwizzle ? EmitColorRegUseRaw(idx) : EmitColorRegUseAlpha(idx); return toSwizzle ? EmitColorRegUseRaw(idx) : EmitColorRegUseAlpha(idx);
} }
else if (!name.compare("Lighting")) else if (!name.compare("Lighting"))
@ -247,7 +247,7 @@ std::string ProgrammableCommon::RecursiveTraceAlpha(const IR& ir, Diagnostics& d
case IR::OpType::LoadImm: case IR::OpType::LoadImm:
{ {
const atVec4f& vec = inst.m_loadImm.m_immVec; const atVec4f& vec = inst.m_loadImm.m_immVec;
return EmitVal(vec.vec[0]); return EmitVal(vec.simd[0]);
} }
case IR::OpType::Arithmetic: case IR::OpType::Arithmetic:
{ {

View File

@ -830,6 +830,8 @@ void PyOutStream::linkBackground(const char* target, const char* sceneName)
void PyOutStream::AABBToBMesh(const atVec3f& min, const atVec3f& max) void PyOutStream::AABBToBMesh(const atVec3f& min, const atVec3f& max)
{ {
athena::simd_floats minf(min.simd);
athena::simd_floats maxf(max.simd);
format("bm = bmesh.new()\n" format("bm = bmesh.new()\n"
"bm.verts.new((%f,%f,%f))\n" "bm.verts.new((%f,%f,%f))\n"
"bm.verts.new((%f,%f,%f))\n" "bm.verts.new((%f,%f,%f))\n"
@ -852,14 +854,14 @@ void PyOutStream::AABBToBMesh(const atVec3f& min, const atVec3f& max)
"bm.edges.new((bm.verts[6], bm.verts[2]))\n" "bm.edges.new((bm.verts[6], bm.verts[2]))\n"
"bm.edges.new((bm.verts[6], bm.verts[4]))\n" "bm.edges.new((bm.verts[6], bm.verts[4]))\n"
"bm.edges.new((bm.verts[6], bm.verts[7]))\n", "bm.edges.new((bm.verts[6], bm.verts[7]))\n",
min.vec[0], min.vec[1], min.vec[2], minf[0], minf[1], minf[2],
max.vec[0], min.vec[1], min.vec[2], maxf[0], minf[1], minf[2],
min.vec[0], max.vec[1], min.vec[2], minf[0], maxf[1], minf[2],
max.vec[0], max.vec[1], min.vec[2], maxf[0], maxf[1], minf[2],
min.vec[0], min.vec[1], max.vec[2], minf[0], minf[1], maxf[2],
max.vec[0], min.vec[1], max.vec[2], maxf[0], minf[1], maxf[2],
min.vec[0], max.vec[1], max.vec[2], minf[0], maxf[1], maxf[2],
max.vec[0], max.vec[1], max.vec[2]); maxf[0], maxf[1], maxf[2]);
} }
void PyOutStream::centerView() void PyOutStream::centerView()
@ -2333,9 +2335,9 @@ DataStream::getBoneMatrices(std::string_view name)
{ {
float val; float val;
m_parent->_readBuf(&val, 4); m_parent->_readBuf(&val, 4);
matOut[i].vec[j] = val; matOut[i].simd[j] = val;
} }
reinterpret_cast<atVec4f&>(matOut[i]).vec[3] = 0.f; reinterpret_cast<atVec4f&>(matOut[i]).simd[3] = 0.f;
} }
ret.emplace(std::make_pair(std::move(name), std::move(matOut))); ret.emplace(std::make_pair(std::move(name), std::move(matOut)));
@ -2355,8 +2357,8 @@ bool DataStream::renderPvs(std::string_view path, const atVec3f& location)
m_parent->getBlendPath().getAbsolutePath().data()); m_parent->getBlendPath().getAbsolutePath().data());
char req[256]; char req[256];
snprintf(req, 256, "RENDERPVS %s %f %f %f", path.data(), athena::simd_floats f(location.simd);
location.vec[0], location.vec[1], location.vec[2]); snprintf(req, 256, "RENDERPVS %s %f %f %f", path.data(), f[0], f[1], f[2]);
m_parent->_writeStr(req); m_parent->_writeStr(req);
char readBuf[256]; char readBuf[256];

View File

@ -11,18 +11,30 @@ namespace hecl::blender
atVec3f MtxVecMul4RM(const Matrix4f& mtx, const Vector3f& vec) atVec3f MtxVecMul4RM(const Matrix4f& mtx, const Vector3f& vec)
{ {
atVec3f res; atVec3f res;
res.vec[0] = mtx[0].vec[0] * vec.val.vec[0] + mtx[0].vec[1] * vec.val.vec[1] + mtx[0].vec[2] * vec.val.vec[2] + mtx[0].vec[3]; athena::simd_floats resf;
res.vec[1] = mtx[1].vec[0] * vec.val.vec[0] + mtx[1].vec[1] * vec.val.vec[1] + mtx[1].vec[2] * vec.val.vec[2] + mtx[1].vec[3]; athena::simd_floats mtxf[3];
res.vec[2] = mtx[2].vec[0] * vec.val.vec[0] + mtx[2].vec[1] * vec.val.vec[1] + mtx[2].vec[2] * vec.val.vec[2] + mtx[2].vec[3]; for (int i = 0; i < 3; ++i)
mtx[i].simd.copy_to(mtxf[i]);
athena::simd_floats vecf(vec.val.simd);
resf[0] = mtxf[0][0] * vecf[0] + mtxf[0][1] * vecf[1] + mtxf[0][2] * vecf[2] + mtxf[0][3];
resf[1] = mtxf[1][0] * vecf[0] + mtxf[1][1] * vecf[1] + mtxf[1][2] * vecf[2] + mtxf[1][3];
resf[2] = mtxf[2][0] * vecf[0] + mtxf[2][1] * vecf[1] + mtxf[2][2] * vecf[2] + mtxf[2][3];
res.simd.copy_from(resf);
return res; return res;
} }
atVec3f MtxVecMul3RM(const Matrix4f& mtx, const Vector3f& vec) atVec3f MtxVecMul3RM(const Matrix4f& mtx, const Vector3f& vec)
{ {
atVec3f res; atVec3f res;
res.vec[0] = mtx[0].vec[0] * vec.val.vec[0] + mtx[0].vec[1] * vec.val.vec[1] + mtx[0].vec[2] * vec.val.vec[2]; athena::simd_floats resf;
res.vec[1] = mtx[1].vec[0] * vec.val.vec[0] + mtx[1].vec[1] * vec.val.vec[1] + mtx[1].vec[2] * vec.val.vec[2]; athena::simd_floats mtxf[3];
res.vec[2] = mtx[2].vec[0] * vec.val.vec[0] + mtx[2].vec[1] * vec.val.vec[1] + mtx[2].vec[2] * vec.val.vec[2]; for (int i = 0; i < 3; ++i)
mtx[i].simd.copy_to(mtxf[i]);
athena::simd_floats vecf(vec.val.simd);
resf[0] = mtxf[0][0] * vecf[0] + mtxf[0][1] * vecf[1] + mtxf[0][2] * vecf[2];
resf[1] = mtxf[1][0] * vecf[0] + mtxf[1][1] * vecf[1] + mtxf[1][2] * vecf[2];
resf[2] = mtxf[2][0] * vecf[0] + mtxf[2][1] * vecf[1] + mtxf[2][2] * vecf[2];
res.simd.copy_from(resf);
return res; return res;
} }
@ -113,15 +125,11 @@ HMDLBuffers Mesh::getHMDLBuffers(bool absoluteCoords, PoolSkinIndex& poolSkinInd
vboW.writeVec3fLittle(preXfPos); vboW.writeVec3fLittle(preXfPos);
atVec3f preXfNorm = MtxVecMul3RM(sceneXf, norm[v.iNorm]); atVec3f preXfNorm = MtxVecMul3RM(sceneXf, norm[v.iNorm]);
float mag = athena::simd_floats f(preXfNorm.simd * preXfNorm.simd);
preXfNorm.vec[0] * preXfNorm.vec[0] + float mag = f[0] + f[1] + f[2];
preXfNorm.vec[1] * preXfNorm.vec[1] +
preXfNorm.vec[2] * preXfNorm.vec[2];
if (mag > FLT_EPSILON) if (mag > FLT_EPSILON)
mag = 1.f / std::sqrt(mag); mag = 1.f / std::sqrt(mag);
preXfNorm.vec[0] *= mag; preXfNorm.simd *= mag;
preXfNorm.vec[1] *= mag;
preXfNorm.vec[2] *= mag;
vboW.writeVec3fLittle(preXfNorm); vboW.writeVec3fLittle(preXfNorm);
} }
else else
@ -133,9 +141,10 @@ HMDLBuffers Mesh::getHMDLBuffers(bool absoluteCoords, PoolSkinIndex& poolSkinInd
for (size_t i=0 ; i<colorLayerCount ; ++i) for (size_t i=0 ; i<colorLayerCount ; ++i)
{ {
const Vector3f& c = color[v.iColor[i]]; const Vector3f& c = color[v.iColor[i]];
vboW.writeUByte(std::max(0, std::min(255, int(c.val.vec[0] * 255)))); athena::simd_floats f(c.val.simd);
vboW.writeUByte(std::max(0, std::min(255, int(c.val.vec[1] * 255)))); vboW.writeUByte(std::max(0, std::min(255, int(f[0] * 255))));
vboW.writeUByte(std::max(0, std::min(255, int(c.val.vec[2] * 255)))); vboW.writeUByte(std::max(0, std::min(255, int(f[1] * 255))));
vboW.writeUByte(std::max(0, std::min(255, int(f[2] * 255))));
vboW.writeUByte(255); vboW.writeUByte(255);
} }
@ -158,7 +167,7 @@ HMDLBuffers Mesh::getHMDLBuffers(bool absoluteCoords, PoolSkinIndex& poolSkinInd
for (const SkinBind& bind : binds) for (const SkinBind& bind : binds)
if (bind.boneIdx == *it) if (bind.boneIdx == *it)
{ {
vec.vec[j] = bind.weight; vec.simd[j] = bind.weight;
break; break;
} }
++it; ++it;

View File

@ -144,7 +144,9 @@ atVec4f CVar::toVec4f(bool* isValid) const
*isValid = true; *isValid = true;
atVec4f vec; atVec4f vec;
std::sscanf(m_value.c_str(), "%f %f %f %f", &vec.vec[0], &vec.vec[1], &vec.vec[2], &vec.vec[3]); athena::simd_floats f;
std::sscanf(m_value.c_str(), "%f %f %f %f", &f[0], &f[1], &f[2], &f[3]);
vec.simd.copy_from(f);
return vec; return vec;
} }
@ -247,7 +249,8 @@ bool CVar::fromVec4f(const atVec4f& val)
if (isReadOnly() && (com_developer && !com_developer->toBoolean())) if (isReadOnly() && (com_developer && !com_developer->toBoolean()))
return false; return false;
m_value.assign(hecl::Format("%f %f %f %f", val.vec[0], val.vec[1], val.vec[2], val.vec[3])); athena::simd_floats f(val.simd);
m_value.assign(hecl::Format("%f %f %f %f", f[0], f[1], f[2], f[3]));
m_flags |= EFlags::Modified; m_flags |= EFlags::Modified;
return true; return true;
} }
@ -378,7 +381,9 @@ bool CVar::fromLiteralToType(std::string_view val, bool setDefault)
case EType::Vec4f: case EType::Vec4f:
{ {
atVec4f vec; atVec4f vec;
std::sscanf(val.data(), "%f %f %f %f", &vec.vec[0], &vec.vec[1], &vec.vec[2], &vec.vec[3]); athena::simd_floats f;
std::sscanf(val.data(), "%f %f %f %f", &f[0], &f[1], &f[2], &f[3]);
vec.simd.copy_from(f);
return fromVec4f(vec); return fromVec4f(vec);
} }
} }
@ -419,7 +424,9 @@ bool CVar::fromLiteralToType(std::wstring_view val, bool setDefault)
case EType::Vec4f: case EType::Vec4f:
{ {
atVec4f vec; atVec4f vec;
std::swscanf(val.data(), L"%f %f %f %f", &vec.vec[0], &vec.vec[1], &vec.vec[2], &vec.vec[3]); athena::simd_floats f;
std::swscanf(val.data(), L"%f %f %f %f", &f[0], &f[1], &f[2], &f[3]);
vec.simd.copy_from(f);
return fromVec4f(vec); return fromVec4f(vec);
} }
} }

View File

@ -104,12 +104,14 @@ int IR::addInstruction(const IRNode& n, IR::RegID target)
atVec4f vec = {}; atVec4f vec = {};
auto it = n.children.cbegin(); auto it = n.children.cbegin();
int i; int i;
athena::simd_floats f;
for (i=0 ; i<3 ; ++i, ++it) for (i=0 ; i<3 ; ++i, ++it)
{ {
if (it->kind != IRNode::Kind::Imm) if (it->kind != IRNode::Kind::Imm)
break; break;
vec.vec[i] = it->val; f[i] = it->val;
} }
vec.simd.copy_from(f);
if (i == 3) if (i == 3)
{ {
m_instructions.emplace_back(OpType::LoadImm, target, n.loc); m_instructions.emplace_back(OpType::LoadImm, target, n.loc);
@ -123,12 +125,14 @@ int IR::addInstruction(const IRNode& n, IR::RegID target)
atVec4f vec = {}; atVec4f vec = {};
auto it = n.children.cbegin(); auto it = n.children.cbegin();
int i; int i;
athena::simd_floats f;
for (i=0 ; i<4 ; ++i, ++it) for (i=0 ; i<4 ; ++i, ++it)
{ {
if (it->kind != IRNode::Kind::Imm) if (it->kind != IRNode::Kind::Imm)
break; break;
vec.vec[i] = it->val; f[i] = it->val;
} }
vec.simd.copy_from(f);
if (i == 4) if (i == 4)
{ {
m_instructions.emplace_back(OpType::LoadImm, target, n.loc); m_instructions.emplace_back(OpType::LoadImm, target, n.loc);
@ -154,10 +158,7 @@ int IR::addInstruction(const IRNode& n, IR::RegID target)
{ {
m_instructions.emplace_back(OpType::LoadImm, target, n.loc); m_instructions.emplace_back(OpType::LoadImm, target, n.loc);
Instruction::LoadImm& inst = m_instructions.back().m_loadImm; Instruction::LoadImm& inst = m_instructions.back().m_loadImm;
inst.m_immVec.vec[0] = n.val; inst.m_immVec.simd = athena::simd<float>(n.val);
inst.m_immVec.vec[1] = n.val;
inst.m_immVec.vec[2] = n.val;
inst.m_immVec.vec[3] = n.val;
return m_instructions.size() - 1; return m_instructions.size() - 1;
} }
case IRNode::Kind::Binop: case IRNode::Kind::Binop: