SIMD refactor

This commit is contained in:
Jack Andersen 2018-12-07 15:31:11 -10:00
parent 1b073abb76
commit 54c466276b
13 changed files with 107 additions and 81 deletions

2
hecl/extern/athena vendored

@ -1 +1 @@
Subproject commit 0cdfd0ad9f14599a53bb3577ef6481807191616d
Subproject commit e1b29fda7acf3a17a297a02a63a5f11e94eb2328

2
hecl/extern/boo vendored

@ -1 +1 @@
Subproject commit 54676aff916dfee736132bfd06cd4ad71ac45b81
Subproject commit 2c2c72bfd1e59815bfb90041974ef1e9f57325cb

View File

@ -35,7 +35,8 @@ private:
std::string EmitVec3(const atVec4f& vec) const
{
return hecl::Format("vec3(%g,%g,%g)", vec.vec[0], vec.vec[1], vec.vec[2]);
athena::simd_floats f(vec.simd);
return hecl::Format("vec3(%g,%g,%g)", f[0], f[1], f[2]);
}
std::string EmitVec3(const std::string& a, const std::string& b, const std::string& c) const

View File

@ -431,17 +431,19 @@ struct GX final : IBackend
Color() = default;
Color& operator=(const atVec4f& vec)
{
color[0] = uint8_t(std::min(std::max(vec.vec[0] * 255.f, 0.f), 255.f));
color[1] = uint8_t(std::min(std::max(vec.vec[1] * 255.f, 0.f), 255.f));
color[2] = uint8_t(std::min(std::max(vec.vec[2] * 255.f, 0.f), 255.f));
color[3] = uint8_t(std::min(std::max(vec.vec[3] * 255.f, 0.f), 255.f));
athena::simd_floats f(vec.simd);
color[0] = uint8_t(std::min(std::max(f[0] * 255.f, 0.f), 255.f));
color[1] = uint8_t(std::min(std::max(f[1] * 255.f, 0.f), 255.f));
color[2] = uint8_t(std::min(std::max(f[2] * 255.f, 0.f), 255.f));
color[3] = uint8_t(std::min(std::max(f[3] * 255.f, 0.f), 255.f));
return *this;
}
Color& operator=(const atVec3f& vec)
{
color[0] = uint8_t(std::min(std::max(vec.vec[0] * 255.f, 0.f), 255.f));
color[1] = uint8_t(std::min(std::max(vec.vec[1] * 255.f, 0.f), 255.f));
color[2] = uint8_t(std::min(std::max(vec.vec[2] * 255.f, 0.f), 255.f));
athena::simd_floats f(vec.simd);
color[0] = uint8_t(std::min(std::max(f[0] * 255.f, 0.f), 255.f));
color[1] = uint8_t(std::min(std::max(f[1] * 255.f, 0.f), 255.f));
color[2] = uint8_t(std::min(std::max(f[2] * 255.f, 0.f), 255.f));
color[3] = 0xff;
return *this;
}
@ -456,10 +458,12 @@ struct GX final : IBackend
atVec4f toVec4f() const
{
atVec4f out;
out.vec[0] = color[0] / 255.f;
out.vec[1] = color[1] / 255.f;
out.vec[2] = color[2] / 255.f;
out.vec[3] = color[3] / 255.f;
athena::simd_floats f;
f[0] = color[0] / 255.f;
f[1] = color[1] / 255.f;
f[2] = color[2] / 255.f;
f[3] = color[3] / 255.f;
out.simd.copy_from(f);
return out;
}
Color(const atVec4f& vec) {*this = vec;}

View File

@ -31,7 +31,8 @@ private:
std::string EmitVec3(const atVec4f& vec) const
{
return hecl::Format("float3(%g,%g,%g)", vec.vec[0], vec.vec[1], vec.vec[2]);
athena::simd_floats f(vec.simd);
return hecl::Format("float3(%g,%g,%g)", f[0], f[1], f[2]);
}
std::string EmitVec3(const std::string& a, const std::string& b, const std::string& c) const

View File

@ -32,7 +32,8 @@ private:
std::string EmitVec3(const atVec4f& vec) const
{
return hecl::Format("float3(%g,%g,%g)", vec.vec[0], vec.vec[1], vec.vec[2]);
athena::simd_floats f(vec.simd);
return hecl::Format("float3(%g,%g,%g)", f[0], f[1], f[2]);
}
std::string EmitVec3(const std::string& a, const std::string& b, const std::string& c) const

View File

@ -104,7 +104,7 @@ unsigned GX::RecursiveTraceTexGen(const IR& ir, Diagnostics& diag, const IR::Ins
diag.reportBackendErr(inst.m_loc, "TexCoordGen UV(layerIdx) requires one argument");
const IR::Instruction& idxInst = inst.getChildInst(ir, 0);
auto& idxImm = idxInst.getImmVec();
return addTexCoordGen(diag, inst.m_loc, TexGenSrc(TG_TEX0 + unsigned(idxImm.vec[0])), mtx, normalize, pmtx);
return addTexCoordGen(diag, inst.m_loc, TexGenSrc(TG_TEX0 + unsigned(idxImm.simd[0])), mtx, normalize, pmtx);
}
else if (!tcgName.compare("Normal"))
return addTexCoordGen(diag, inst.m_loc, TG_NRM, mtx, normalize, pmtx);
@ -147,7 +147,7 @@ GX::TraceResult GX::RecursiveTraceColor(const IR& ir, Diagnostics& diag, const I
const IR::Instruction& mapInst = inst.getChildInst(ir, 0);
auto& mapImm = mapInst.getImmVec();
newStage.m_texMapIdx = unsigned(mapImm.vec[0]);
newStage.m_texMapIdx = unsigned(mapImm.simd[0]);
newStage.m_color[0] = swizzleAlpha ? CC_TEXA : CC_TEXC;
const IR::Instruction& tcgInst = inst.getChildInst(ir, 1);
@ -158,7 +158,7 @@ GX::TraceResult GX::RecursiveTraceColor(const IR& ir, Diagnostics& diag, const I
else if (!name.compare("ColorReg"))
{
const IR::Instruction& idxInst = inst.getChildInst(ir, 0);
unsigned idx = unsigned(idxInst.getImmVec().vec[0]);
unsigned idx = unsigned(idxInst.getImmVec().simd[0]);
if (swizzleAlpha)
m_aRegMask |= 1 << idx;
else
@ -176,9 +176,9 @@ GX::TraceResult GX::RecursiveTraceColor(const IR& ir, Diagnostics& diag, const I
case IR::OpType::LoadImm:
{
const atVec4f& vec = inst.m_loadImm.m_immVec;
if (vec.vec[0] == 0.f && vec.vec[1] == 0.f && vec.vec[2] == 0.f)
if (vec.simd[0] == 0.f && vec.simd[1] == 0.f && vec.simd[2] == 0.f)
return TraceResult(CC_ZERO);
else if (vec.vec[0] == 1.f && vec.vec[1] == 1.f && vec.vec[2] == 1.f)
else if (vec.simd[0] == 1.f && vec.simd[1] == 1.f && vec.simd[2] == 1.f)
return TraceResult(CC_ONE);
unsigned idx = addKColor(diag, inst.m_loc, vec);
return TraceResult(TevKColorSel(TEV_KCSEL_K0 + idx));
@ -438,7 +438,7 @@ GX::TraceResult GX::RecursiveTraceAlpha(const IR& ir, Diagnostics& diag, const I
const IR::Instruction& mapInst = inst.getChildInst(ir, 0);
const atVec4f& mapImm = mapInst.getImmVec();
unsigned mapIdx = unsigned(mapImm.vec[0]);
unsigned mapIdx = unsigned(mapImm.simd[0]);
int foundStage = -1;
for (int i=m_alphaTraceStage+1 ; i<int(m_tevCount) ; ++i)
@ -473,7 +473,7 @@ GX::TraceResult GX::RecursiveTraceAlpha(const IR& ir, Diagnostics& diag, const I
else if (!name.compare("ColorReg"))
{
const IR::Instruction& idxInst = inst.getChildInst(ir, 0);
unsigned idx = unsigned(idxInst.getImmVec().vec[0]);
unsigned idx = unsigned(idxInst.getImmVec().simd[0]);
m_aRegMask |= 1 << idx;
return TraceResult(TevAlphaArg(CA_A0 + idx));
}
@ -488,11 +488,11 @@ GX::TraceResult GX::RecursiveTraceAlpha(const IR& ir, Diagnostics& diag, const I
case IR::OpType::LoadImm:
{
const atVec4f& vec = inst.m_loadImm.m_immVec;
if (vec.vec[0] == 0.f)
if (vec.simd[0] == 0.f)
return TraceResult(CA_ZERO);
else if (vec.vec[0] == 1.f)
else if (vec.simd[0] == 1.f)
return TraceResult(TEV_KASEL_1);
unsigned idx = addKAlpha(diag, inst.m_loc, vec.vec[0]);
unsigned idx = addKAlpha(diag, inst.m_loc, vec.simd[0]);
return TraceResult(TevKAlphaSel(TEV_KASEL_K0_A + idx));
}
case IR::OpType::Arithmetic:

View File

@ -183,23 +183,23 @@ std::string Metal::makeVert(unsigned col, unsigned uv, unsigned w,
retval += " float4 objPos = float4(0.0,0.0,0.0,0.0);\n"
" float4 objNorm = float4(0.0,0.0,0.0,0.0);\n";
for (size_t i=0 ; i<s ; ++i)
retval += hecl::Format(" objPos += (vu.mv[%" PRISize "] * float4(v.posIn, 1.0)) * v.weightIn%" PRISize "[%" PRISize "];\n"
" objNorm += (vu.mvInv[%" PRISize "] * float4(v.normIn, 1.0)) * v.weightIn%" PRISize "[%" PRISize "];\n",
retval += hecl::Format(" objPos += (vu.objs[%" PRISize "] * float4(v.posIn, 1.0)) * v.weightIn%" PRISize "[%" PRISize "];\n"
" objNorm += (vu.objsInv[%" PRISize "] * float4(v.normIn, 1.0)) * v.weightIn%" PRISize "[%" PRISize "];\n",
i, i/4, i%4, i, i/4, i%4);
retval += " objPos[3] = 1.0;\n"
" objNorm = float4(normalize(objNorm.xyz), 0.0);\n"
" vtf.mvPos = mv * objPos;\n"
" vtf.mvNorm = float4(normalize((mvInv * objNorm).xyz), 0.0);\n"
" vtf.mvpPos = proj * vtf.mvPos;\n";
" vtf.mvPos = vu.mv * objPos;\n"
" vtf.mvNorm = float4(normalize((vu.mvInv * objNorm).xyz), 0.0);\n"
" vtf.mvpPos = vu.proj * vtf.mvPos;\n";
}
else
{
/* non-skinned */
retval += " float4 objPos = float4(posIn, 1.0);\n"
" float4 objNorm = float4(normIn, 0.0);\n"
" vtf.mvPos = mv * objPos;\n"
" vtf.mvNorm = mvInv * objNorm;\n"
" vtf.mvpPos = proj * vtf.mvPos;\n";
retval += " float4 objPos = float4(v.posIn, 1.0);\n"
" float4 objNorm = float4(v.normIn, 0.0);\n"
" vtf.mvPos = vu.mv * objPos;\n"
" vtf.mvNorm = vu.mvInv * objNorm;\n"
" vtf.mvpPos = vu.proj * vtf.mvPos;\n";
}
retval += " float4 tmpProj;\n";

View File

@ -84,7 +84,7 @@ unsigned ProgrammableCommon::RecursiveTraceTexGen(const IR& ir, Diagnostics& dia
diag.reportBackendErr(inst.m_loc, "TexCoordGen UV(layerIdx) requires one argument");
const IR::Instruction& idxInst = inst.getChildInst(ir, 0);
auto& idxImm = idxInst.getImmVec();
return addTexCoordGen(TexGenSrc::UV, idxImm.vec[0], mtx, normalize);
return addTexCoordGen(TexGenSrc::UV, int(idxImm.simd[0]), mtx, normalize);
}
else if (!tcgName.compare("Normal"))
return addTexCoordGen(TexGenSrc::Normal, -1, mtx, normalize);
@ -123,7 +123,7 @@ std::string ProgrammableCommon::RecursiveTraceColor(const IR& ir, Diagnostics& d
const IR::Instruction& mapInst = inst.getChildInst(ir, 0);
auto& mapImm = mapInst.getImmVec();
unsigned mapIdx = unsigned(mapImm.vec[0]);
unsigned mapIdx = unsigned(mapImm.simd[0]);
const IR::Instruction& tcgInst = inst.getChildInst(ir, 1);
unsigned texGenIdx = RecursiveTraceTexGen(ir, diag, tcgInst, -1, normalize);
@ -134,7 +134,7 @@ std::string ProgrammableCommon::RecursiveTraceColor(const IR& ir, Diagnostics& d
else if (!name.compare("ColorReg"))
{
const IR::Instruction& idxInst = inst.getChildInst(ir, 0);
unsigned idx = unsigned(idxInst.getImmVec().vec[0]);
unsigned idx = unsigned(idxInst.getImmVec().simd[0]);
return toSwizzle ? EmitColorRegUseRaw(idx) : EmitColorRegUseRGB(idx);
}
else if (!name.compare("Lighting"))
@ -221,7 +221,7 @@ std::string ProgrammableCommon::RecursiveTraceAlpha(const IR& ir, Diagnostics& d
const IR::Instruction& mapInst = inst.getChildInst(ir, 0);
const atVec4f& mapImm = mapInst.getImmVec();
unsigned mapIdx = unsigned(mapImm.vec[0]);
unsigned mapIdx = unsigned(mapImm.simd[0]);
const IR::Instruction& tcgInst = inst.getChildInst(ir, 1);
unsigned texGenIdx = RecursiveTraceTexGen(ir, diag, tcgInst, -1, normalize);
@ -232,7 +232,7 @@ std::string ProgrammableCommon::RecursiveTraceAlpha(const IR& ir, Diagnostics& d
else if (!name.compare("ColorReg"))
{
const IR::Instruction& idxInst = inst.getChildInst(ir, 0);
unsigned idx = unsigned(idxInst.getImmVec().vec[0]);
unsigned idx = unsigned(idxInst.getImmVec().simd[0]);
return toSwizzle ? EmitColorRegUseRaw(idx) : EmitColorRegUseAlpha(idx);
}
else if (!name.compare("Lighting"))
@ -247,7 +247,7 @@ std::string ProgrammableCommon::RecursiveTraceAlpha(const IR& ir, Diagnostics& d
case IR::OpType::LoadImm:
{
const atVec4f& vec = inst.m_loadImm.m_immVec;
return EmitVal(vec.vec[0]);
return EmitVal(vec.simd[0]);
}
case IR::OpType::Arithmetic:
{

View File

@ -830,6 +830,8 @@ void PyOutStream::linkBackground(const char* target, const char* sceneName)
void PyOutStream::AABBToBMesh(const atVec3f& min, const atVec3f& max)
{
athena::simd_floats minf(min.simd);
athena::simd_floats maxf(max.simd);
format("bm = bmesh.new()\n"
"bm.verts.new((%f,%f,%f))\n"
"bm.verts.new((%f,%f,%f))\n"
@ -852,14 +854,14 @@ void PyOutStream::AABBToBMesh(const atVec3f& min, const atVec3f& max)
"bm.edges.new((bm.verts[6], bm.verts[2]))\n"
"bm.edges.new((bm.verts[6], bm.verts[4]))\n"
"bm.edges.new((bm.verts[6], bm.verts[7]))\n",
min.vec[0], min.vec[1], min.vec[2],
max.vec[0], min.vec[1], min.vec[2],
min.vec[0], max.vec[1], min.vec[2],
max.vec[0], max.vec[1], min.vec[2],
min.vec[0], min.vec[1], max.vec[2],
max.vec[0], min.vec[1], max.vec[2],
min.vec[0], max.vec[1], max.vec[2],
max.vec[0], max.vec[1], max.vec[2]);
minf[0], minf[1], minf[2],
maxf[0], minf[1], minf[2],
minf[0], maxf[1], minf[2],
maxf[0], maxf[1], minf[2],
minf[0], minf[1], maxf[2],
maxf[0], minf[1], maxf[2],
minf[0], maxf[1], maxf[2],
maxf[0], maxf[1], maxf[2]);
}
void PyOutStream::centerView()
@ -2333,9 +2335,9 @@ DataStream::getBoneMatrices(std::string_view name)
{
float val;
m_parent->_readBuf(&val, 4);
matOut[i].vec[j] = val;
matOut[i].simd[j] = val;
}
reinterpret_cast<atVec4f&>(matOut[i]).vec[3] = 0.f;
reinterpret_cast<atVec4f&>(matOut[i]).simd[3] = 0.f;
}
ret.emplace(std::make_pair(std::move(name), std::move(matOut)));
@ -2355,8 +2357,8 @@ bool DataStream::renderPvs(std::string_view path, const atVec3f& location)
m_parent->getBlendPath().getAbsolutePath().data());
char req[256];
snprintf(req, 256, "RENDERPVS %s %f %f %f", path.data(),
location.vec[0], location.vec[1], location.vec[2]);
athena::simd_floats f(location.simd);
snprintf(req, 256, "RENDERPVS %s %f %f %f", path.data(), f[0], f[1], f[2]);
m_parent->_writeStr(req);
char readBuf[256];

View File

@ -11,18 +11,30 @@ namespace hecl::blender
atVec3f MtxVecMul4RM(const Matrix4f& mtx, const Vector3f& vec)
{
atVec3f res;
res.vec[0] = mtx[0].vec[0] * vec.val.vec[0] + mtx[0].vec[1] * vec.val.vec[1] + mtx[0].vec[2] * vec.val.vec[2] + mtx[0].vec[3];
res.vec[1] = mtx[1].vec[0] * vec.val.vec[0] + mtx[1].vec[1] * vec.val.vec[1] + mtx[1].vec[2] * vec.val.vec[2] + mtx[1].vec[3];
res.vec[2] = mtx[2].vec[0] * vec.val.vec[0] + mtx[2].vec[1] * vec.val.vec[1] + mtx[2].vec[2] * vec.val.vec[2] + mtx[2].vec[3];
athena::simd_floats resf;
athena::simd_floats mtxf[3];
for (int i = 0; i < 3; ++i)
mtx[i].simd.copy_to(mtxf[i]);
athena::simd_floats vecf(vec.val.simd);
resf[0] = mtxf[0][0] * vecf[0] + mtxf[0][1] * vecf[1] + mtxf[0][2] * vecf[2] + mtxf[0][3];
resf[1] = mtxf[1][0] * vecf[0] + mtxf[1][1] * vecf[1] + mtxf[1][2] * vecf[2] + mtxf[1][3];
resf[2] = mtxf[2][0] * vecf[0] + mtxf[2][1] * vecf[1] + mtxf[2][2] * vecf[2] + mtxf[2][3];
res.simd.copy_from(resf);
return res;
}
atVec3f MtxVecMul3RM(const Matrix4f& mtx, const Vector3f& vec)
{
atVec3f res;
res.vec[0] = mtx[0].vec[0] * vec.val.vec[0] + mtx[0].vec[1] * vec.val.vec[1] + mtx[0].vec[2] * vec.val.vec[2];
res.vec[1] = mtx[1].vec[0] * vec.val.vec[0] + mtx[1].vec[1] * vec.val.vec[1] + mtx[1].vec[2] * vec.val.vec[2];
res.vec[2] = mtx[2].vec[0] * vec.val.vec[0] + mtx[2].vec[1] * vec.val.vec[1] + mtx[2].vec[2] * vec.val.vec[2];
athena::simd_floats resf;
athena::simd_floats mtxf[3];
for (int i = 0; i < 3; ++i)
mtx[i].simd.copy_to(mtxf[i]);
athena::simd_floats vecf(vec.val.simd);
resf[0] = mtxf[0][0] * vecf[0] + mtxf[0][1] * vecf[1] + mtxf[0][2] * vecf[2];
resf[1] = mtxf[1][0] * vecf[0] + mtxf[1][1] * vecf[1] + mtxf[1][2] * vecf[2];
resf[2] = mtxf[2][0] * vecf[0] + mtxf[2][1] * vecf[1] + mtxf[2][2] * vecf[2];
res.simd.copy_from(resf);
return res;
}
@ -113,15 +125,11 @@ HMDLBuffers Mesh::getHMDLBuffers(bool absoluteCoords, PoolSkinIndex& poolSkinInd
vboW.writeVec3fLittle(preXfPos);
atVec3f preXfNorm = MtxVecMul3RM(sceneXf, norm[v.iNorm]);
float mag =
preXfNorm.vec[0] * preXfNorm.vec[0] +
preXfNorm.vec[1] * preXfNorm.vec[1] +
preXfNorm.vec[2] * preXfNorm.vec[2];
athena::simd_floats f(preXfNorm.simd * preXfNorm.simd);
float mag = f[0] + f[1] + f[2];
if (mag > FLT_EPSILON)
mag = 1.f / std::sqrt(mag);
preXfNorm.vec[0] *= mag;
preXfNorm.vec[1] *= mag;
preXfNorm.vec[2] *= mag;
preXfNorm.simd *= mag;
vboW.writeVec3fLittle(preXfNorm);
}
else
@ -133,9 +141,10 @@ HMDLBuffers Mesh::getHMDLBuffers(bool absoluteCoords, PoolSkinIndex& poolSkinInd
for (size_t i=0 ; i<colorLayerCount ; ++i)
{
const Vector3f& c = color[v.iColor[i]];
vboW.writeUByte(std::max(0, std::min(255, int(c.val.vec[0] * 255))));
vboW.writeUByte(std::max(0, std::min(255, int(c.val.vec[1] * 255))));
vboW.writeUByte(std::max(0, std::min(255, int(c.val.vec[2] * 255))));
athena::simd_floats f(c.val.simd);
vboW.writeUByte(std::max(0, std::min(255, int(f[0] * 255))));
vboW.writeUByte(std::max(0, std::min(255, int(f[1] * 255))));
vboW.writeUByte(std::max(0, std::min(255, int(f[2] * 255))));
vboW.writeUByte(255);
}
@ -158,7 +167,7 @@ HMDLBuffers Mesh::getHMDLBuffers(bool absoluteCoords, PoolSkinIndex& poolSkinInd
for (const SkinBind& bind : binds)
if (bind.boneIdx == *it)
{
vec.vec[j] = bind.weight;
vec.simd[j] = bind.weight;
break;
}
++it;

View File

@ -144,7 +144,9 @@ atVec4f CVar::toVec4f(bool* isValid) const
*isValid = true;
atVec4f vec;
std::sscanf(m_value.c_str(), "%f %f %f %f", &vec.vec[0], &vec.vec[1], &vec.vec[2], &vec.vec[3]);
athena::simd_floats f;
std::sscanf(m_value.c_str(), "%f %f %f %f", &f[0], &f[1], &f[2], &f[3]);
vec.simd.copy_from(f);
return vec;
}
@ -247,7 +249,8 @@ bool CVar::fromVec4f(const atVec4f& val)
if (isReadOnly() && (com_developer && !com_developer->toBoolean()))
return false;
m_value.assign(hecl::Format("%f %f %f %f", val.vec[0], val.vec[1], val.vec[2], val.vec[3]));
athena::simd_floats f(val.simd);
m_value.assign(hecl::Format("%f %f %f %f", f[0], f[1], f[2], f[3]));
m_flags |= EFlags::Modified;
return true;
}
@ -378,7 +381,9 @@ bool CVar::fromLiteralToType(std::string_view val, bool setDefault)
case EType::Vec4f:
{
atVec4f vec;
std::sscanf(val.data(), "%f %f %f %f", &vec.vec[0], &vec.vec[1], &vec.vec[2], &vec.vec[3]);
athena::simd_floats f;
std::sscanf(val.data(), "%f %f %f %f", &f[0], &f[1], &f[2], &f[3]);
vec.simd.copy_from(f);
return fromVec4f(vec);
}
}
@ -419,7 +424,9 @@ bool CVar::fromLiteralToType(std::wstring_view val, bool setDefault)
case EType::Vec4f:
{
atVec4f vec;
std::swscanf(val.data(), L"%f %f %f %f", &vec.vec[0], &vec.vec[1], &vec.vec[2], &vec.vec[3]);
athena::simd_floats f;
std::swscanf(val.data(), L"%f %f %f %f", &f[0], &f[1], &f[2], &f[3]);
vec.simd.copy_from(f);
return fromVec4f(vec);
}
}

View File

@ -104,12 +104,14 @@ int IR::addInstruction(const IRNode& n, IR::RegID target)
atVec4f vec = {};
auto it = n.children.cbegin();
int i;
athena::simd_floats f;
for (i=0 ; i<3 ; ++i, ++it)
{
if (it->kind != IRNode::Kind::Imm)
break;
vec.vec[i] = it->val;
f[i] = it->val;
}
vec.simd.copy_from(f);
if (i == 3)
{
m_instructions.emplace_back(OpType::LoadImm, target, n.loc);
@ -123,12 +125,14 @@ int IR::addInstruction(const IRNode& n, IR::RegID target)
atVec4f vec = {};
auto it = n.children.cbegin();
int i;
athena::simd_floats f;
for (i=0 ; i<4 ; ++i, ++it)
{
if (it->kind != IRNode::Kind::Imm)
break;
vec.vec[i] = it->val;
f[i] = it->val;
}
vec.simd.copy_from(f);
if (i == 4)
{
m_instructions.emplace_back(OpType::LoadImm, target, n.loc);
@ -154,10 +158,7 @@ int IR::addInstruction(const IRNode& n, IR::RegID target)
{
m_instructions.emplace_back(OpType::LoadImm, target, n.loc);
Instruction::LoadImm& inst = m_instructions.back().m_loadImm;
inst.m_immVec.vec[0] = n.val;
inst.m_immVec.vec[1] = n.val;
inst.m_immVec.vec[2] = n.val;
inst.m_immVec.vec[3] = n.val;
inst.m_immVec.simd = athena::simd<float>(n.val);
return m_instructions.size() - 1;
}
case IRNode::Kind::Binop: