mirror of https://github.com/AxioDL/metaforce.git
SIMD refactor
This commit is contained in:
parent
1b073abb76
commit
54c466276b
|
@ -1 +1 @@
|
|||
Subproject commit 0cdfd0ad9f14599a53bb3577ef6481807191616d
|
||||
Subproject commit e1b29fda7acf3a17a297a02a63a5f11e94eb2328
|
|
@ -1 +1 @@
|
|||
Subproject commit 54676aff916dfee736132bfd06cd4ad71ac45b81
|
||||
Subproject commit 2c2c72bfd1e59815bfb90041974ef1e9f57325cb
|
|
@ -35,7 +35,8 @@ private:
|
|||
|
||||
std::string EmitVec3(const atVec4f& vec) const
|
||||
{
|
||||
return hecl::Format("vec3(%g,%g,%g)", vec.vec[0], vec.vec[1], vec.vec[2]);
|
||||
athena::simd_floats f(vec.simd);
|
||||
return hecl::Format("vec3(%g,%g,%g)", f[0], f[1], f[2]);
|
||||
}
|
||||
|
||||
std::string EmitVec3(const std::string& a, const std::string& b, const std::string& c) const
|
||||
|
|
|
@ -431,17 +431,19 @@ struct GX final : IBackend
|
|||
Color() = default;
|
||||
Color& operator=(const atVec4f& vec)
|
||||
{
|
||||
color[0] = uint8_t(std::min(std::max(vec.vec[0] * 255.f, 0.f), 255.f));
|
||||
color[1] = uint8_t(std::min(std::max(vec.vec[1] * 255.f, 0.f), 255.f));
|
||||
color[2] = uint8_t(std::min(std::max(vec.vec[2] * 255.f, 0.f), 255.f));
|
||||
color[3] = uint8_t(std::min(std::max(vec.vec[3] * 255.f, 0.f), 255.f));
|
||||
athena::simd_floats f(vec.simd);
|
||||
color[0] = uint8_t(std::min(std::max(f[0] * 255.f, 0.f), 255.f));
|
||||
color[1] = uint8_t(std::min(std::max(f[1] * 255.f, 0.f), 255.f));
|
||||
color[2] = uint8_t(std::min(std::max(f[2] * 255.f, 0.f), 255.f));
|
||||
color[3] = uint8_t(std::min(std::max(f[3] * 255.f, 0.f), 255.f));
|
||||
return *this;
|
||||
}
|
||||
Color& operator=(const atVec3f& vec)
|
||||
{
|
||||
color[0] = uint8_t(std::min(std::max(vec.vec[0] * 255.f, 0.f), 255.f));
|
||||
color[1] = uint8_t(std::min(std::max(vec.vec[1] * 255.f, 0.f), 255.f));
|
||||
color[2] = uint8_t(std::min(std::max(vec.vec[2] * 255.f, 0.f), 255.f));
|
||||
athena::simd_floats f(vec.simd);
|
||||
color[0] = uint8_t(std::min(std::max(f[0] * 255.f, 0.f), 255.f));
|
||||
color[1] = uint8_t(std::min(std::max(f[1] * 255.f, 0.f), 255.f));
|
||||
color[2] = uint8_t(std::min(std::max(f[2] * 255.f, 0.f), 255.f));
|
||||
color[3] = 0xff;
|
||||
return *this;
|
||||
}
|
||||
|
@ -456,10 +458,12 @@ struct GX final : IBackend
|
|||
atVec4f toVec4f() const
|
||||
{
|
||||
atVec4f out;
|
||||
out.vec[0] = color[0] / 255.f;
|
||||
out.vec[1] = color[1] / 255.f;
|
||||
out.vec[2] = color[2] / 255.f;
|
||||
out.vec[3] = color[3] / 255.f;
|
||||
athena::simd_floats f;
|
||||
f[0] = color[0] / 255.f;
|
||||
f[1] = color[1] / 255.f;
|
||||
f[2] = color[2] / 255.f;
|
||||
f[3] = color[3] / 255.f;
|
||||
out.simd.copy_from(f);
|
||||
return out;
|
||||
}
|
||||
Color(const atVec4f& vec) {*this = vec;}
|
||||
|
|
|
@ -31,7 +31,8 @@ private:
|
|||
|
||||
std::string EmitVec3(const atVec4f& vec) const
|
||||
{
|
||||
return hecl::Format("float3(%g,%g,%g)", vec.vec[0], vec.vec[1], vec.vec[2]);
|
||||
athena::simd_floats f(vec.simd);
|
||||
return hecl::Format("float3(%g,%g,%g)", f[0], f[1], f[2]);
|
||||
}
|
||||
|
||||
std::string EmitVec3(const std::string& a, const std::string& b, const std::string& c) const
|
||||
|
|
|
@ -32,7 +32,8 @@ private:
|
|||
|
||||
std::string EmitVec3(const atVec4f& vec) const
|
||||
{
|
||||
return hecl::Format("float3(%g,%g,%g)", vec.vec[0], vec.vec[1], vec.vec[2]);
|
||||
athena::simd_floats f(vec.simd);
|
||||
return hecl::Format("float3(%g,%g,%g)", f[0], f[1], f[2]);
|
||||
}
|
||||
|
||||
std::string EmitVec3(const std::string& a, const std::string& b, const std::string& c) const
|
||||
|
|
|
@ -104,7 +104,7 @@ unsigned GX::RecursiveTraceTexGen(const IR& ir, Diagnostics& diag, const IR::Ins
|
|||
diag.reportBackendErr(inst.m_loc, "TexCoordGen UV(layerIdx) requires one argument");
|
||||
const IR::Instruction& idxInst = inst.getChildInst(ir, 0);
|
||||
auto& idxImm = idxInst.getImmVec();
|
||||
return addTexCoordGen(diag, inst.m_loc, TexGenSrc(TG_TEX0 + unsigned(idxImm.vec[0])), mtx, normalize, pmtx);
|
||||
return addTexCoordGen(diag, inst.m_loc, TexGenSrc(TG_TEX0 + unsigned(idxImm.simd[0])), mtx, normalize, pmtx);
|
||||
}
|
||||
else if (!tcgName.compare("Normal"))
|
||||
return addTexCoordGen(diag, inst.m_loc, TG_NRM, mtx, normalize, pmtx);
|
||||
|
@ -147,7 +147,7 @@ GX::TraceResult GX::RecursiveTraceColor(const IR& ir, Diagnostics& diag, const I
|
|||
|
||||
const IR::Instruction& mapInst = inst.getChildInst(ir, 0);
|
||||
auto& mapImm = mapInst.getImmVec();
|
||||
newStage.m_texMapIdx = unsigned(mapImm.vec[0]);
|
||||
newStage.m_texMapIdx = unsigned(mapImm.simd[0]);
|
||||
newStage.m_color[0] = swizzleAlpha ? CC_TEXA : CC_TEXC;
|
||||
|
||||
const IR::Instruction& tcgInst = inst.getChildInst(ir, 1);
|
||||
|
@ -158,7 +158,7 @@ GX::TraceResult GX::RecursiveTraceColor(const IR& ir, Diagnostics& diag, const I
|
|||
else if (!name.compare("ColorReg"))
|
||||
{
|
||||
const IR::Instruction& idxInst = inst.getChildInst(ir, 0);
|
||||
unsigned idx = unsigned(idxInst.getImmVec().vec[0]);
|
||||
unsigned idx = unsigned(idxInst.getImmVec().simd[0]);
|
||||
if (swizzleAlpha)
|
||||
m_aRegMask |= 1 << idx;
|
||||
else
|
||||
|
@ -176,9 +176,9 @@ GX::TraceResult GX::RecursiveTraceColor(const IR& ir, Diagnostics& diag, const I
|
|||
case IR::OpType::LoadImm:
|
||||
{
|
||||
const atVec4f& vec = inst.m_loadImm.m_immVec;
|
||||
if (vec.vec[0] == 0.f && vec.vec[1] == 0.f && vec.vec[2] == 0.f)
|
||||
if (vec.simd[0] == 0.f && vec.simd[1] == 0.f && vec.simd[2] == 0.f)
|
||||
return TraceResult(CC_ZERO);
|
||||
else if (vec.vec[0] == 1.f && vec.vec[1] == 1.f && vec.vec[2] == 1.f)
|
||||
else if (vec.simd[0] == 1.f && vec.simd[1] == 1.f && vec.simd[2] == 1.f)
|
||||
return TraceResult(CC_ONE);
|
||||
unsigned idx = addKColor(diag, inst.m_loc, vec);
|
||||
return TraceResult(TevKColorSel(TEV_KCSEL_K0 + idx));
|
||||
|
@ -438,7 +438,7 @@ GX::TraceResult GX::RecursiveTraceAlpha(const IR& ir, Diagnostics& diag, const I
|
|||
|
||||
const IR::Instruction& mapInst = inst.getChildInst(ir, 0);
|
||||
const atVec4f& mapImm = mapInst.getImmVec();
|
||||
unsigned mapIdx = unsigned(mapImm.vec[0]);
|
||||
unsigned mapIdx = unsigned(mapImm.simd[0]);
|
||||
|
||||
int foundStage = -1;
|
||||
for (int i=m_alphaTraceStage+1 ; i<int(m_tevCount) ; ++i)
|
||||
|
@ -473,7 +473,7 @@ GX::TraceResult GX::RecursiveTraceAlpha(const IR& ir, Diagnostics& diag, const I
|
|||
else if (!name.compare("ColorReg"))
|
||||
{
|
||||
const IR::Instruction& idxInst = inst.getChildInst(ir, 0);
|
||||
unsigned idx = unsigned(idxInst.getImmVec().vec[0]);
|
||||
unsigned idx = unsigned(idxInst.getImmVec().simd[0]);
|
||||
m_aRegMask |= 1 << idx;
|
||||
return TraceResult(TevAlphaArg(CA_A0 + idx));
|
||||
}
|
||||
|
@ -488,11 +488,11 @@ GX::TraceResult GX::RecursiveTraceAlpha(const IR& ir, Diagnostics& diag, const I
|
|||
case IR::OpType::LoadImm:
|
||||
{
|
||||
const atVec4f& vec = inst.m_loadImm.m_immVec;
|
||||
if (vec.vec[0] == 0.f)
|
||||
if (vec.simd[0] == 0.f)
|
||||
return TraceResult(CA_ZERO);
|
||||
else if (vec.vec[0] == 1.f)
|
||||
else if (vec.simd[0] == 1.f)
|
||||
return TraceResult(TEV_KASEL_1);
|
||||
unsigned idx = addKAlpha(diag, inst.m_loc, vec.vec[0]);
|
||||
unsigned idx = addKAlpha(diag, inst.m_loc, vec.simd[0]);
|
||||
return TraceResult(TevKAlphaSel(TEV_KASEL_K0_A + idx));
|
||||
}
|
||||
case IR::OpType::Arithmetic:
|
||||
|
|
|
@ -183,23 +183,23 @@ std::string Metal::makeVert(unsigned col, unsigned uv, unsigned w,
|
|||
retval += " float4 objPos = float4(0.0,0.0,0.0,0.0);\n"
|
||||
" float4 objNorm = float4(0.0,0.0,0.0,0.0);\n";
|
||||
for (size_t i=0 ; i<s ; ++i)
|
||||
retval += hecl::Format(" objPos += (vu.mv[%" PRISize "] * float4(v.posIn, 1.0)) * v.weightIn%" PRISize "[%" PRISize "];\n"
|
||||
" objNorm += (vu.mvInv[%" PRISize "] * float4(v.normIn, 1.0)) * v.weightIn%" PRISize "[%" PRISize "];\n",
|
||||
retval += hecl::Format(" objPos += (vu.objs[%" PRISize "] * float4(v.posIn, 1.0)) * v.weightIn%" PRISize "[%" PRISize "];\n"
|
||||
" objNorm += (vu.objsInv[%" PRISize "] * float4(v.normIn, 1.0)) * v.weightIn%" PRISize "[%" PRISize "];\n",
|
||||
i, i/4, i%4, i, i/4, i%4);
|
||||
retval += " objPos[3] = 1.0;\n"
|
||||
" objNorm = float4(normalize(objNorm.xyz), 0.0);\n"
|
||||
" vtf.mvPos = mv * objPos;\n"
|
||||
" vtf.mvNorm = float4(normalize((mvInv * objNorm).xyz), 0.0);\n"
|
||||
" vtf.mvpPos = proj * vtf.mvPos;\n";
|
||||
" vtf.mvPos = vu.mv * objPos;\n"
|
||||
" vtf.mvNorm = float4(normalize((vu.mvInv * objNorm).xyz), 0.0);\n"
|
||||
" vtf.mvpPos = vu.proj * vtf.mvPos;\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
/* non-skinned */
|
||||
retval += " float4 objPos = float4(posIn, 1.0);\n"
|
||||
" float4 objNorm = float4(normIn, 0.0);\n"
|
||||
" vtf.mvPos = mv * objPos;\n"
|
||||
" vtf.mvNorm = mvInv * objNorm;\n"
|
||||
" vtf.mvpPos = proj * vtf.mvPos;\n";
|
||||
retval += " float4 objPos = float4(v.posIn, 1.0);\n"
|
||||
" float4 objNorm = float4(v.normIn, 0.0);\n"
|
||||
" vtf.mvPos = vu.mv * objPos;\n"
|
||||
" vtf.mvNorm = vu.mvInv * objNorm;\n"
|
||||
" vtf.mvpPos = vu.proj * vtf.mvPos;\n";
|
||||
}
|
||||
|
||||
retval += " float4 tmpProj;\n";
|
||||
|
|
|
@ -84,7 +84,7 @@ unsigned ProgrammableCommon::RecursiveTraceTexGen(const IR& ir, Diagnostics& dia
|
|||
diag.reportBackendErr(inst.m_loc, "TexCoordGen UV(layerIdx) requires one argument");
|
||||
const IR::Instruction& idxInst = inst.getChildInst(ir, 0);
|
||||
auto& idxImm = idxInst.getImmVec();
|
||||
return addTexCoordGen(TexGenSrc::UV, idxImm.vec[0], mtx, normalize);
|
||||
return addTexCoordGen(TexGenSrc::UV, int(idxImm.simd[0]), mtx, normalize);
|
||||
}
|
||||
else if (!tcgName.compare("Normal"))
|
||||
return addTexCoordGen(TexGenSrc::Normal, -1, mtx, normalize);
|
||||
|
@ -123,7 +123,7 @@ std::string ProgrammableCommon::RecursiveTraceColor(const IR& ir, Diagnostics& d
|
|||
|
||||
const IR::Instruction& mapInst = inst.getChildInst(ir, 0);
|
||||
auto& mapImm = mapInst.getImmVec();
|
||||
unsigned mapIdx = unsigned(mapImm.vec[0]);
|
||||
unsigned mapIdx = unsigned(mapImm.simd[0]);
|
||||
|
||||
const IR::Instruction& tcgInst = inst.getChildInst(ir, 1);
|
||||
unsigned texGenIdx = RecursiveTraceTexGen(ir, diag, tcgInst, -1, normalize);
|
||||
|
@ -134,7 +134,7 @@ std::string ProgrammableCommon::RecursiveTraceColor(const IR& ir, Diagnostics& d
|
|||
else if (!name.compare("ColorReg"))
|
||||
{
|
||||
const IR::Instruction& idxInst = inst.getChildInst(ir, 0);
|
||||
unsigned idx = unsigned(idxInst.getImmVec().vec[0]);
|
||||
unsigned idx = unsigned(idxInst.getImmVec().simd[0]);
|
||||
return toSwizzle ? EmitColorRegUseRaw(idx) : EmitColorRegUseRGB(idx);
|
||||
}
|
||||
else if (!name.compare("Lighting"))
|
||||
|
@ -221,7 +221,7 @@ std::string ProgrammableCommon::RecursiveTraceAlpha(const IR& ir, Diagnostics& d
|
|||
|
||||
const IR::Instruction& mapInst = inst.getChildInst(ir, 0);
|
||||
const atVec4f& mapImm = mapInst.getImmVec();
|
||||
unsigned mapIdx = unsigned(mapImm.vec[0]);
|
||||
unsigned mapIdx = unsigned(mapImm.simd[0]);
|
||||
|
||||
const IR::Instruction& tcgInst = inst.getChildInst(ir, 1);
|
||||
unsigned texGenIdx = RecursiveTraceTexGen(ir, diag, tcgInst, -1, normalize);
|
||||
|
@ -232,7 +232,7 @@ std::string ProgrammableCommon::RecursiveTraceAlpha(const IR& ir, Diagnostics& d
|
|||
else if (!name.compare("ColorReg"))
|
||||
{
|
||||
const IR::Instruction& idxInst = inst.getChildInst(ir, 0);
|
||||
unsigned idx = unsigned(idxInst.getImmVec().vec[0]);
|
||||
unsigned idx = unsigned(idxInst.getImmVec().simd[0]);
|
||||
return toSwizzle ? EmitColorRegUseRaw(idx) : EmitColorRegUseAlpha(idx);
|
||||
}
|
||||
else if (!name.compare("Lighting"))
|
||||
|
@ -247,7 +247,7 @@ std::string ProgrammableCommon::RecursiveTraceAlpha(const IR& ir, Diagnostics& d
|
|||
case IR::OpType::LoadImm:
|
||||
{
|
||||
const atVec4f& vec = inst.m_loadImm.m_immVec;
|
||||
return EmitVal(vec.vec[0]);
|
||||
return EmitVal(vec.simd[0]);
|
||||
}
|
||||
case IR::OpType::Arithmetic:
|
||||
{
|
||||
|
|
|
@ -830,6 +830,8 @@ void PyOutStream::linkBackground(const char* target, const char* sceneName)
|
|||
|
||||
void PyOutStream::AABBToBMesh(const atVec3f& min, const atVec3f& max)
|
||||
{
|
||||
athena::simd_floats minf(min.simd);
|
||||
athena::simd_floats maxf(max.simd);
|
||||
format("bm = bmesh.new()\n"
|
||||
"bm.verts.new((%f,%f,%f))\n"
|
||||
"bm.verts.new((%f,%f,%f))\n"
|
||||
|
@ -852,14 +854,14 @@ void PyOutStream::AABBToBMesh(const atVec3f& min, const atVec3f& max)
|
|||
"bm.edges.new((bm.verts[6], bm.verts[2]))\n"
|
||||
"bm.edges.new((bm.verts[6], bm.verts[4]))\n"
|
||||
"bm.edges.new((bm.verts[6], bm.verts[7]))\n",
|
||||
min.vec[0], min.vec[1], min.vec[2],
|
||||
max.vec[0], min.vec[1], min.vec[2],
|
||||
min.vec[0], max.vec[1], min.vec[2],
|
||||
max.vec[0], max.vec[1], min.vec[2],
|
||||
min.vec[0], min.vec[1], max.vec[2],
|
||||
max.vec[0], min.vec[1], max.vec[2],
|
||||
min.vec[0], max.vec[1], max.vec[2],
|
||||
max.vec[0], max.vec[1], max.vec[2]);
|
||||
minf[0], minf[1], minf[2],
|
||||
maxf[0], minf[1], minf[2],
|
||||
minf[0], maxf[1], minf[2],
|
||||
maxf[0], maxf[1], minf[2],
|
||||
minf[0], minf[1], maxf[2],
|
||||
maxf[0], minf[1], maxf[2],
|
||||
minf[0], maxf[1], maxf[2],
|
||||
maxf[0], maxf[1], maxf[2]);
|
||||
}
|
||||
|
||||
void PyOutStream::centerView()
|
||||
|
@ -2333,9 +2335,9 @@ DataStream::getBoneMatrices(std::string_view name)
|
|||
{
|
||||
float val;
|
||||
m_parent->_readBuf(&val, 4);
|
||||
matOut[i].vec[j] = val;
|
||||
matOut[i].simd[j] = val;
|
||||
}
|
||||
reinterpret_cast<atVec4f&>(matOut[i]).vec[3] = 0.f;
|
||||
reinterpret_cast<atVec4f&>(matOut[i]).simd[3] = 0.f;
|
||||
}
|
||||
|
||||
ret.emplace(std::make_pair(std::move(name), std::move(matOut)));
|
||||
|
@ -2355,8 +2357,8 @@ bool DataStream::renderPvs(std::string_view path, const atVec3f& location)
|
|||
m_parent->getBlendPath().getAbsolutePath().data());
|
||||
|
||||
char req[256];
|
||||
snprintf(req, 256, "RENDERPVS %s %f %f %f", path.data(),
|
||||
location.vec[0], location.vec[1], location.vec[2]);
|
||||
athena::simd_floats f(location.simd);
|
||||
snprintf(req, 256, "RENDERPVS %s %f %f %f", path.data(), f[0], f[1], f[2]);
|
||||
m_parent->_writeStr(req);
|
||||
|
||||
char readBuf[256];
|
||||
|
|
|
@ -11,18 +11,30 @@ namespace hecl::blender
|
|||
atVec3f MtxVecMul4RM(const Matrix4f& mtx, const Vector3f& vec)
|
||||
{
|
||||
atVec3f res;
|
||||
res.vec[0] = mtx[0].vec[0] * vec.val.vec[0] + mtx[0].vec[1] * vec.val.vec[1] + mtx[0].vec[2] * vec.val.vec[2] + mtx[0].vec[3];
|
||||
res.vec[1] = mtx[1].vec[0] * vec.val.vec[0] + mtx[1].vec[1] * vec.val.vec[1] + mtx[1].vec[2] * vec.val.vec[2] + mtx[1].vec[3];
|
||||
res.vec[2] = mtx[2].vec[0] * vec.val.vec[0] + mtx[2].vec[1] * vec.val.vec[1] + mtx[2].vec[2] * vec.val.vec[2] + mtx[2].vec[3];
|
||||
athena::simd_floats resf;
|
||||
athena::simd_floats mtxf[3];
|
||||
for (int i = 0; i < 3; ++i)
|
||||
mtx[i].simd.copy_to(mtxf[i]);
|
||||
athena::simd_floats vecf(vec.val.simd);
|
||||
resf[0] = mtxf[0][0] * vecf[0] + mtxf[0][1] * vecf[1] + mtxf[0][2] * vecf[2] + mtxf[0][3];
|
||||
resf[1] = mtxf[1][0] * vecf[0] + mtxf[1][1] * vecf[1] + mtxf[1][2] * vecf[2] + mtxf[1][3];
|
||||
resf[2] = mtxf[2][0] * vecf[0] + mtxf[2][1] * vecf[1] + mtxf[2][2] * vecf[2] + mtxf[2][3];
|
||||
res.simd.copy_from(resf);
|
||||
return res;
|
||||
}
|
||||
|
||||
atVec3f MtxVecMul3RM(const Matrix4f& mtx, const Vector3f& vec)
|
||||
{
|
||||
atVec3f res;
|
||||
res.vec[0] = mtx[0].vec[0] * vec.val.vec[0] + mtx[0].vec[1] * vec.val.vec[1] + mtx[0].vec[2] * vec.val.vec[2];
|
||||
res.vec[1] = mtx[1].vec[0] * vec.val.vec[0] + mtx[1].vec[1] * vec.val.vec[1] + mtx[1].vec[2] * vec.val.vec[2];
|
||||
res.vec[2] = mtx[2].vec[0] * vec.val.vec[0] + mtx[2].vec[1] * vec.val.vec[1] + mtx[2].vec[2] * vec.val.vec[2];
|
||||
athena::simd_floats resf;
|
||||
athena::simd_floats mtxf[3];
|
||||
for (int i = 0; i < 3; ++i)
|
||||
mtx[i].simd.copy_to(mtxf[i]);
|
||||
athena::simd_floats vecf(vec.val.simd);
|
||||
resf[0] = mtxf[0][0] * vecf[0] + mtxf[0][1] * vecf[1] + mtxf[0][2] * vecf[2];
|
||||
resf[1] = mtxf[1][0] * vecf[0] + mtxf[1][1] * vecf[1] + mtxf[1][2] * vecf[2];
|
||||
resf[2] = mtxf[2][0] * vecf[0] + mtxf[2][1] * vecf[1] + mtxf[2][2] * vecf[2];
|
||||
res.simd.copy_from(resf);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@ -113,15 +125,11 @@ HMDLBuffers Mesh::getHMDLBuffers(bool absoluteCoords, PoolSkinIndex& poolSkinInd
|
|||
vboW.writeVec3fLittle(preXfPos);
|
||||
|
||||
atVec3f preXfNorm = MtxVecMul3RM(sceneXf, norm[v.iNorm]);
|
||||
float mag =
|
||||
preXfNorm.vec[0] * preXfNorm.vec[0] +
|
||||
preXfNorm.vec[1] * preXfNorm.vec[1] +
|
||||
preXfNorm.vec[2] * preXfNorm.vec[2];
|
||||
athena::simd_floats f(preXfNorm.simd * preXfNorm.simd);
|
||||
float mag = f[0] + f[1] + f[2];
|
||||
if (mag > FLT_EPSILON)
|
||||
mag = 1.f / std::sqrt(mag);
|
||||
preXfNorm.vec[0] *= mag;
|
||||
preXfNorm.vec[1] *= mag;
|
||||
preXfNorm.vec[2] *= mag;
|
||||
preXfNorm.simd *= mag;
|
||||
vboW.writeVec3fLittle(preXfNorm);
|
||||
}
|
||||
else
|
||||
|
@ -133,9 +141,10 @@ HMDLBuffers Mesh::getHMDLBuffers(bool absoluteCoords, PoolSkinIndex& poolSkinInd
|
|||
for (size_t i=0 ; i<colorLayerCount ; ++i)
|
||||
{
|
||||
const Vector3f& c = color[v.iColor[i]];
|
||||
vboW.writeUByte(std::max(0, std::min(255, int(c.val.vec[0] * 255))));
|
||||
vboW.writeUByte(std::max(0, std::min(255, int(c.val.vec[1] * 255))));
|
||||
vboW.writeUByte(std::max(0, std::min(255, int(c.val.vec[2] * 255))));
|
||||
athena::simd_floats f(c.val.simd);
|
||||
vboW.writeUByte(std::max(0, std::min(255, int(f[0] * 255))));
|
||||
vboW.writeUByte(std::max(0, std::min(255, int(f[1] * 255))));
|
||||
vboW.writeUByte(std::max(0, std::min(255, int(f[2] * 255))));
|
||||
vboW.writeUByte(255);
|
||||
}
|
||||
|
||||
|
@ -158,7 +167,7 @@ HMDLBuffers Mesh::getHMDLBuffers(bool absoluteCoords, PoolSkinIndex& poolSkinInd
|
|||
for (const SkinBind& bind : binds)
|
||||
if (bind.boneIdx == *it)
|
||||
{
|
||||
vec.vec[j] = bind.weight;
|
||||
vec.simd[j] = bind.weight;
|
||||
break;
|
||||
}
|
||||
++it;
|
||||
|
|
|
@ -144,7 +144,9 @@ atVec4f CVar::toVec4f(bool* isValid) const
|
|||
*isValid = true;
|
||||
|
||||
atVec4f vec;
|
||||
std::sscanf(m_value.c_str(), "%f %f %f %f", &vec.vec[0], &vec.vec[1], &vec.vec[2], &vec.vec[3]);
|
||||
athena::simd_floats f;
|
||||
std::sscanf(m_value.c_str(), "%f %f %f %f", &f[0], &f[1], &f[2], &f[3]);
|
||||
vec.simd.copy_from(f);
|
||||
|
||||
return vec;
|
||||
}
|
||||
|
@ -247,7 +249,8 @@ bool CVar::fromVec4f(const atVec4f& val)
|
|||
if (isReadOnly() && (com_developer && !com_developer->toBoolean()))
|
||||
return false;
|
||||
|
||||
m_value.assign(hecl::Format("%f %f %f %f", val.vec[0], val.vec[1], val.vec[2], val.vec[3]));
|
||||
athena::simd_floats f(val.simd);
|
||||
m_value.assign(hecl::Format("%f %f %f %f", f[0], f[1], f[2], f[3]));
|
||||
m_flags |= EFlags::Modified;
|
||||
return true;
|
||||
}
|
||||
|
@ -378,7 +381,9 @@ bool CVar::fromLiteralToType(std::string_view val, bool setDefault)
|
|||
case EType::Vec4f:
|
||||
{
|
||||
atVec4f vec;
|
||||
std::sscanf(val.data(), "%f %f %f %f", &vec.vec[0], &vec.vec[1], &vec.vec[2], &vec.vec[3]);
|
||||
athena::simd_floats f;
|
||||
std::sscanf(val.data(), "%f %f %f %f", &f[0], &f[1], &f[2], &f[3]);
|
||||
vec.simd.copy_from(f);
|
||||
return fromVec4f(vec);
|
||||
}
|
||||
}
|
||||
|
@ -419,7 +424,9 @@ bool CVar::fromLiteralToType(std::wstring_view val, bool setDefault)
|
|||
case EType::Vec4f:
|
||||
{
|
||||
atVec4f vec;
|
||||
std::swscanf(val.data(), L"%f %f %f %f", &vec.vec[0], &vec.vec[1], &vec.vec[2], &vec.vec[3]);
|
||||
athena::simd_floats f;
|
||||
std::swscanf(val.data(), L"%f %f %f %f", &f[0], &f[1], &f[2], &f[3]);
|
||||
vec.simd.copy_from(f);
|
||||
return fromVec4f(vec);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -104,12 +104,14 @@ int IR::addInstruction(const IRNode& n, IR::RegID target)
|
|||
atVec4f vec = {};
|
||||
auto it = n.children.cbegin();
|
||||
int i;
|
||||
athena::simd_floats f;
|
||||
for (i=0 ; i<3 ; ++i, ++it)
|
||||
{
|
||||
if (it->kind != IRNode::Kind::Imm)
|
||||
break;
|
||||
vec.vec[i] = it->val;
|
||||
f[i] = it->val;
|
||||
}
|
||||
vec.simd.copy_from(f);
|
||||
if (i == 3)
|
||||
{
|
||||
m_instructions.emplace_back(OpType::LoadImm, target, n.loc);
|
||||
|
@ -123,12 +125,14 @@ int IR::addInstruction(const IRNode& n, IR::RegID target)
|
|||
atVec4f vec = {};
|
||||
auto it = n.children.cbegin();
|
||||
int i;
|
||||
athena::simd_floats f;
|
||||
for (i=0 ; i<4 ; ++i, ++it)
|
||||
{
|
||||
if (it->kind != IRNode::Kind::Imm)
|
||||
break;
|
||||
vec.vec[i] = it->val;
|
||||
f[i] = it->val;
|
||||
}
|
||||
vec.simd.copy_from(f);
|
||||
if (i == 4)
|
||||
{
|
||||
m_instructions.emplace_back(OpType::LoadImm, target, n.loc);
|
||||
|
@ -154,10 +158,7 @@ int IR::addInstruction(const IRNode& n, IR::RegID target)
|
|||
{
|
||||
m_instructions.emplace_back(OpType::LoadImm, target, n.loc);
|
||||
Instruction::LoadImm& inst = m_instructions.back().m_loadImm;
|
||||
inst.m_immVec.vec[0] = n.val;
|
||||
inst.m_immVec.vec[1] = n.val;
|
||||
inst.m_immVec.vec[2] = n.val;
|
||||
inst.m_immVec.vec[3] = n.val;
|
||||
inst.m_immVec.simd = athena::simd<float>(n.val);
|
||||
return m_instructions.size() - 1;
|
||||
}
|
||||
case IRNode::Kind::Binop:
|
||||
|
|
Loading…
Reference in New Issue