mirror of
				https://github.com/AxioDL/metaforce.git
				synced 2025-10-25 04:50:25 +00:00 
			
		
		
		
	SIMD refactor
This commit is contained in:
		
							parent
							
								
									1b073abb76
								
							
						
					
					
						commit
						54c466276b
					
				
							
								
								
									
										2
									
								
								hecl/extern/athena
									
									
									
									
										vendored
									
									
								
							
							
								
								
								
								
								
								
									
									
								
							
						
						
									
										2
									
								
								hecl/extern/athena
									
									
									
									
										vendored
									
									
								
							| @ -1 +1 @@ | ||||
| Subproject commit 0cdfd0ad9f14599a53bb3577ef6481807191616d | ||||
| Subproject commit e1b29fda7acf3a17a297a02a63a5f11e94eb2328 | ||||
							
								
								
									
										2
									
								
								hecl/extern/boo
									
									
									
									
										vendored
									
									
								
							
							
								
								
								
								
								
								
									
									
								
							
						
						
									
										2
									
								
								hecl/extern/boo
									
									
									
									
										vendored
									
									
								
							| @ -1 +1 @@ | ||||
| Subproject commit 54676aff916dfee736132bfd06cd4ad71ac45b81 | ||||
| Subproject commit 2c2c72bfd1e59815bfb90041974ef1e9f57325cb | ||||
| @ -35,7 +35,8 @@ private: | ||||
| 
 | ||||
|     std::string EmitVec3(const atVec4f& vec) const | ||||
|     { | ||||
|         return hecl::Format("vec3(%g,%g,%g)", vec.vec[0], vec.vec[1], vec.vec[2]); | ||||
|         athena::simd_floats f(vec.simd); | ||||
|         return hecl::Format("vec3(%g,%g,%g)", f[0], f[1], f[2]); | ||||
|     } | ||||
| 
 | ||||
|     std::string EmitVec3(const std::string& a, const std::string& b, const std::string& c) const | ||||
|  | ||||
| @ -431,17 +431,19 @@ struct GX final : IBackend | ||||
|         Color() = default; | ||||
|         Color& operator=(const atVec4f& vec) | ||||
|         { | ||||
|             color[0] = uint8_t(std::min(std::max(vec.vec[0] * 255.f, 0.f), 255.f)); | ||||
|             color[1] = uint8_t(std::min(std::max(vec.vec[1] * 255.f, 0.f), 255.f)); | ||||
|             color[2] = uint8_t(std::min(std::max(vec.vec[2] * 255.f, 0.f), 255.f)); | ||||
|             color[3] = uint8_t(std::min(std::max(vec.vec[3] * 255.f, 0.f), 255.f)); | ||||
|             athena::simd_floats f(vec.simd); | ||||
|             color[0] = uint8_t(std::min(std::max(f[0] * 255.f, 0.f), 255.f)); | ||||
|             color[1] = uint8_t(std::min(std::max(f[1] * 255.f, 0.f), 255.f)); | ||||
|             color[2] = uint8_t(std::min(std::max(f[2] * 255.f, 0.f), 255.f)); | ||||
|             color[3] = uint8_t(std::min(std::max(f[3] * 255.f, 0.f), 255.f)); | ||||
|             return *this; | ||||
|         } | ||||
|         Color& operator=(const atVec3f& vec) | ||||
|         { | ||||
|             color[0] = uint8_t(std::min(std::max(vec.vec[0] * 255.f, 0.f), 255.f)); | ||||
|             color[1] = uint8_t(std::min(std::max(vec.vec[1] * 255.f, 0.f), 255.f)); | ||||
|             color[2] = uint8_t(std::min(std::max(vec.vec[2] * 255.f, 0.f), 255.f)); | ||||
|             athena::simd_floats f(vec.simd); | ||||
|             color[0] = uint8_t(std::min(std::max(f[0] * 255.f, 0.f), 255.f)); | ||||
|             color[1] = uint8_t(std::min(std::max(f[1] * 255.f, 0.f), 255.f)); | ||||
|             color[2] = uint8_t(std::min(std::max(f[2] * 255.f, 0.f), 255.f)); | ||||
|             color[3] = 0xff; | ||||
|             return *this; | ||||
|         } | ||||
| @ -456,10 +458,12 @@ struct GX final : IBackend | ||||
|         atVec4f toVec4f() const | ||||
|         { | ||||
|             atVec4f out; | ||||
|             out.vec[0] = color[0] / 255.f; | ||||
|             out.vec[1] = color[1] / 255.f; | ||||
|             out.vec[2] = color[2] / 255.f; | ||||
|             out.vec[3] = color[3] / 255.f; | ||||
|             athena::simd_floats f; | ||||
|             f[0] = color[0] / 255.f; | ||||
|             f[1] = color[1] / 255.f; | ||||
|             f[2] = color[2] / 255.f; | ||||
|             f[3] = color[3] / 255.f; | ||||
|             out.simd.copy_from(f); | ||||
|             return out; | ||||
|         } | ||||
|         Color(const atVec4f& vec) {*this = vec;} | ||||
|  | ||||
| @ -31,7 +31,8 @@ private: | ||||
| 
 | ||||
|     std::string EmitVec3(const atVec4f& vec) const | ||||
|     { | ||||
|         return hecl::Format("float3(%g,%g,%g)", vec.vec[0], vec.vec[1], vec.vec[2]); | ||||
|         athena::simd_floats f(vec.simd); | ||||
|         return hecl::Format("float3(%g,%g,%g)", f[0], f[1], f[2]); | ||||
|     } | ||||
| 
 | ||||
|     std::string EmitVec3(const std::string& a, const std::string& b, const std::string& c) const | ||||
|  | ||||
| @ -32,7 +32,8 @@ private: | ||||
| 
 | ||||
|     std::string EmitVec3(const atVec4f& vec) const | ||||
|     { | ||||
|         return hecl::Format("float3(%g,%g,%g)", vec.vec[0], vec.vec[1], vec.vec[2]); | ||||
|         athena::simd_floats f(vec.simd); | ||||
|         return hecl::Format("float3(%g,%g,%g)", f[0], f[1], f[2]); | ||||
|     } | ||||
| 
 | ||||
|     std::string EmitVec3(const std::string& a, const std::string& b, const std::string& c) const | ||||
|  | ||||
| @ -104,7 +104,7 @@ unsigned GX::RecursiveTraceTexGen(const IR& ir, Diagnostics& diag, const IR::Ins | ||||
|             diag.reportBackendErr(inst.m_loc, "TexCoordGen UV(layerIdx) requires one argument"); | ||||
|         const IR::Instruction& idxInst = inst.getChildInst(ir, 0); | ||||
|         auto& idxImm = idxInst.getImmVec(); | ||||
|         return addTexCoordGen(diag, inst.m_loc, TexGenSrc(TG_TEX0 + unsigned(idxImm.vec[0])), mtx, normalize, pmtx); | ||||
|         return addTexCoordGen(diag, inst.m_loc, TexGenSrc(TG_TEX0 + unsigned(idxImm.simd[0])), mtx, normalize, pmtx); | ||||
|     } | ||||
|     else if (!tcgName.compare("Normal")) | ||||
|         return addTexCoordGen(diag, inst.m_loc, TG_NRM, mtx, normalize, pmtx); | ||||
| @ -147,7 +147,7 @@ GX::TraceResult GX::RecursiveTraceColor(const IR& ir, Diagnostics& diag, const I | ||||
| 
 | ||||
|             const IR::Instruction& mapInst = inst.getChildInst(ir, 0); | ||||
|             auto& mapImm = mapInst.getImmVec(); | ||||
|             newStage.m_texMapIdx = unsigned(mapImm.vec[0]); | ||||
|             newStage.m_texMapIdx = unsigned(mapImm.simd[0]); | ||||
|             newStage.m_color[0] = swizzleAlpha ? CC_TEXA : CC_TEXC; | ||||
| 
 | ||||
|             const IR::Instruction& tcgInst = inst.getChildInst(ir, 1); | ||||
| @ -158,7 +158,7 @@ GX::TraceResult GX::RecursiveTraceColor(const IR& ir, Diagnostics& diag, const I | ||||
|         else if (!name.compare("ColorReg")) | ||||
|         { | ||||
|             const IR::Instruction& idxInst = inst.getChildInst(ir, 0); | ||||
|             unsigned idx = unsigned(idxInst.getImmVec().vec[0]); | ||||
|             unsigned idx = unsigned(idxInst.getImmVec().simd[0]); | ||||
|             if (swizzleAlpha) | ||||
|                 m_aRegMask |= 1 << idx; | ||||
|             else | ||||
| @ -176,9 +176,9 @@ GX::TraceResult GX::RecursiveTraceColor(const IR& ir, Diagnostics& diag, const I | ||||
|     case IR::OpType::LoadImm: | ||||
|     { | ||||
|         const atVec4f& vec = inst.m_loadImm.m_immVec; | ||||
|         if (vec.vec[0] == 0.f && vec.vec[1] == 0.f && vec.vec[2] == 0.f) | ||||
|         if (vec.simd[0] == 0.f && vec.simd[1] == 0.f && vec.simd[2] == 0.f) | ||||
|             return TraceResult(CC_ZERO); | ||||
|         else if (vec.vec[0] == 1.f && vec.vec[1] == 1.f && vec.vec[2] == 1.f) | ||||
|         else if (vec.simd[0] == 1.f && vec.simd[1] == 1.f && vec.simd[2] == 1.f) | ||||
|             return TraceResult(CC_ONE); | ||||
|         unsigned idx = addKColor(diag, inst.m_loc, vec); | ||||
|         return TraceResult(TevKColorSel(TEV_KCSEL_K0 + idx)); | ||||
| @ -438,7 +438,7 @@ GX::TraceResult GX::RecursiveTraceAlpha(const IR& ir, Diagnostics& diag, const I | ||||
| 
 | ||||
|             const IR::Instruction& mapInst = inst.getChildInst(ir, 0); | ||||
|             const atVec4f& mapImm = mapInst.getImmVec(); | ||||
|             unsigned mapIdx = unsigned(mapImm.vec[0]); | ||||
|             unsigned mapIdx = unsigned(mapImm.simd[0]); | ||||
| 
 | ||||
|             int foundStage = -1; | ||||
|             for (int i=m_alphaTraceStage+1 ; i<int(m_tevCount) ; ++i) | ||||
| @ -473,7 +473,7 @@ GX::TraceResult GX::RecursiveTraceAlpha(const IR& ir, Diagnostics& diag, const I | ||||
|         else if (!name.compare("ColorReg")) | ||||
|         { | ||||
|             const IR::Instruction& idxInst = inst.getChildInst(ir, 0); | ||||
|             unsigned idx = unsigned(idxInst.getImmVec().vec[0]); | ||||
|             unsigned idx = unsigned(idxInst.getImmVec().simd[0]); | ||||
|             m_aRegMask |= 1 << idx; | ||||
|             return TraceResult(TevAlphaArg(CA_A0 + idx)); | ||||
|         } | ||||
| @ -488,11 +488,11 @@ GX::TraceResult GX::RecursiveTraceAlpha(const IR& ir, Diagnostics& diag, const I | ||||
|     case IR::OpType::LoadImm: | ||||
|     { | ||||
|         const atVec4f& vec = inst.m_loadImm.m_immVec; | ||||
|         if (vec.vec[0] == 0.f) | ||||
|         if (vec.simd[0] == 0.f) | ||||
|             return TraceResult(CA_ZERO); | ||||
|         else if (vec.vec[0] == 1.f) | ||||
|         else if (vec.simd[0] == 1.f) | ||||
|             return TraceResult(TEV_KASEL_1); | ||||
|         unsigned idx = addKAlpha(diag, inst.m_loc, vec.vec[0]); | ||||
|         unsigned idx = addKAlpha(diag, inst.m_loc, vec.simd[0]); | ||||
|         return TraceResult(TevKAlphaSel(TEV_KASEL_K0_A + idx)); | ||||
|     } | ||||
|     case IR::OpType::Arithmetic: | ||||
|  | ||||
| @ -183,23 +183,23 @@ std::string Metal::makeVert(unsigned col, unsigned uv, unsigned w, | ||||
|         retval += "    float4 objPos = float4(0.0,0.0,0.0,0.0);\n" | ||||
|                   "    float4 objNorm = float4(0.0,0.0,0.0,0.0);\n"; | ||||
|         for (size_t i=0 ; i<s ; ++i) | ||||
|             retval += hecl::Format("    objPos += (vu.mv[%" PRISize "] * float4(v.posIn, 1.0)) * v.weightIn%" PRISize "[%" PRISize "];\n" | ||||
|                                    "    objNorm += (vu.mvInv[%" PRISize "] * float4(v.normIn, 1.0)) * v.weightIn%" PRISize "[%" PRISize "];\n", | ||||
|             retval += hecl::Format("    objPos += (vu.objs[%" PRISize "] * float4(v.posIn, 1.0)) * v.weightIn%" PRISize "[%" PRISize "];\n" | ||||
|                                    "    objNorm += (vu.objsInv[%" PRISize "] * float4(v.normIn, 1.0)) * v.weightIn%" PRISize "[%" PRISize "];\n", | ||||
|                                    i, i/4, i%4, i, i/4, i%4); | ||||
|         retval += "    objPos[3] = 1.0;\n" | ||||
|                   "    objNorm = float4(normalize(objNorm.xyz), 0.0);\n" | ||||
|                   "    vtf.mvPos = mv * objPos;\n" | ||||
|                   "    vtf.mvNorm = float4(normalize((mvInv * objNorm).xyz), 0.0);\n" | ||||
|                   "    vtf.mvpPos = proj * vtf.mvPos;\n"; | ||||
|                   "    vtf.mvPos = vu.mv * objPos;\n" | ||||
|                   "    vtf.mvNorm = float4(normalize((vu.mvInv * objNorm).xyz), 0.0);\n" | ||||
|                   "    vtf.mvpPos = vu.proj * vtf.mvPos;\n"; | ||||
|     } | ||||
|     else | ||||
|     { | ||||
|         /* non-skinned */ | ||||
|         retval += "    float4 objPos = float4(posIn, 1.0);\n" | ||||
|                   "    float4 objNorm = float4(normIn, 0.0);\n" | ||||
|                   "    vtf.mvPos = mv * objPos;\n" | ||||
|                   "    vtf.mvNorm = mvInv * objNorm;\n" | ||||
|                   "    vtf.mvpPos = proj * vtf.mvPos;\n"; | ||||
|         retval += "    float4 objPos = float4(v.posIn, 1.0);\n" | ||||
|                   "    float4 objNorm = float4(v.normIn, 0.0);\n" | ||||
|                   "    vtf.mvPos = vu.mv * objPos;\n" | ||||
|                   "    vtf.mvNorm = vu.mvInv * objNorm;\n" | ||||
|                   "    vtf.mvpPos = vu.proj * vtf.mvPos;\n"; | ||||
|     } | ||||
| 
 | ||||
|     retval += "    float4 tmpProj;\n"; | ||||
|  | ||||
| @ -84,7 +84,7 @@ unsigned ProgrammableCommon::RecursiveTraceTexGen(const IR& ir, Diagnostics& dia | ||||
|             diag.reportBackendErr(inst.m_loc, "TexCoordGen UV(layerIdx) requires one argument"); | ||||
|         const IR::Instruction& idxInst = inst.getChildInst(ir, 0); | ||||
|         auto& idxImm = idxInst.getImmVec(); | ||||
|         return addTexCoordGen(TexGenSrc::UV, idxImm.vec[0], mtx, normalize); | ||||
|         return addTexCoordGen(TexGenSrc::UV, int(idxImm.simd[0]), mtx, normalize); | ||||
|     } | ||||
|     else if (!tcgName.compare("Normal")) | ||||
|         return addTexCoordGen(TexGenSrc::Normal, -1, mtx, normalize); | ||||
| @ -123,7 +123,7 @@ std::string ProgrammableCommon::RecursiveTraceColor(const IR& ir, Diagnostics& d | ||||
| 
 | ||||
|             const IR::Instruction& mapInst = inst.getChildInst(ir, 0); | ||||
|             auto& mapImm = mapInst.getImmVec(); | ||||
|             unsigned mapIdx = unsigned(mapImm.vec[0]); | ||||
|             unsigned mapIdx = unsigned(mapImm.simd[0]); | ||||
| 
 | ||||
|             const IR::Instruction& tcgInst = inst.getChildInst(ir, 1); | ||||
|             unsigned texGenIdx = RecursiveTraceTexGen(ir, diag, tcgInst, -1, normalize); | ||||
| @ -134,7 +134,7 @@ std::string ProgrammableCommon::RecursiveTraceColor(const IR& ir, Diagnostics& d | ||||
|         else if (!name.compare("ColorReg")) | ||||
|         { | ||||
|             const IR::Instruction& idxInst = inst.getChildInst(ir, 0); | ||||
|             unsigned idx = unsigned(idxInst.getImmVec().vec[0]); | ||||
|             unsigned idx = unsigned(idxInst.getImmVec().simd[0]); | ||||
|             return toSwizzle ? EmitColorRegUseRaw(idx) : EmitColorRegUseRGB(idx); | ||||
|         } | ||||
|         else if (!name.compare("Lighting")) | ||||
| @ -221,7 +221,7 @@ std::string ProgrammableCommon::RecursiveTraceAlpha(const IR& ir, Diagnostics& d | ||||
| 
 | ||||
|             const IR::Instruction& mapInst = inst.getChildInst(ir, 0); | ||||
|             const atVec4f& mapImm = mapInst.getImmVec(); | ||||
|             unsigned mapIdx = unsigned(mapImm.vec[0]); | ||||
|             unsigned mapIdx = unsigned(mapImm.simd[0]); | ||||
| 
 | ||||
|             const IR::Instruction& tcgInst = inst.getChildInst(ir, 1); | ||||
|             unsigned texGenIdx = RecursiveTraceTexGen(ir, diag, tcgInst, -1, normalize); | ||||
| @ -232,7 +232,7 @@ std::string ProgrammableCommon::RecursiveTraceAlpha(const IR& ir, Diagnostics& d | ||||
|         else if (!name.compare("ColorReg")) | ||||
|         { | ||||
|             const IR::Instruction& idxInst = inst.getChildInst(ir, 0); | ||||
|             unsigned idx = unsigned(idxInst.getImmVec().vec[0]); | ||||
|             unsigned idx = unsigned(idxInst.getImmVec().simd[0]); | ||||
|             return toSwizzle ? EmitColorRegUseRaw(idx) : EmitColorRegUseAlpha(idx); | ||||
|         } | ||||
|         else if (!name.compare("Lighting")) | ||||
| @ -247,7 +247,7 @@ std::string ProgrammableCommon::RecursiveTraceAlpha(const IR& ir, Diagnostics& d | ||||
|     case IR::OpType::LoadImm: | ||||
|     { | ||||
|         const atVec4f& vec = inst.m_loadImm.m_immVec; | ||||
|         return EmitVal(vec.vec[0]); | ||||
|         return EmitVal(vec.simd[0]); | ||||
|     } | ||||
|     case IR::OpType::Arithmetic: | ||||
|     { | ||||
|  | ||||
| @ -830,6 +830,8 @@ void PyOutStream::linkBackground(const char* target, const char* sceneName) | ||||
| 
 | ||||
| void PyOutStream::AABBToBMesh(const atVec3f& min, const atVec3f& max) | ||||
| { | ||||
|     athena::simd_floats minf(min.simd); | ||||
|     athena::simd_floats maxf(max.simd); | ||||
|     format("bm = bmesh.new()\n" | ||||
|                "bm.verts.new((%f,%f,%f))\n" | ||||
|                "bm.verts.new((%f,%f,%f))\n" | ||||
| @ -852,14 +854,14 @@ void PyOutStream::AABBToBMesh(const atVec3f& min, const atVec3f& max) | ||||
|                "bm.edges.new((bm.verts[6], bm.verts[2]))\n" | ||||
|                "bm.edges.new((bm.verts[6], bm.verts[4]))\n" | ||||
|                "bm.edges.new((bm.verts[6], bm.verts[7]))\n", | ||||
|            min.vec[0], min.vec[1], min.vec[2], | ||||
|            max.vec[0], min.vec[1], min.vec[2], | ||||
|            min.vec[0], max.vec[1], min.vec[2], | ||||
|            max.vec[0], max.vec[1], min.vec[2], | ||||
|            min.vec[0], min.vec[1], max.vec[2], | ||||
|            max.vec[0], min.vec[1], max.vec[2], | ||||
|            min.vec[0], max.vec[1], max.vec[2], | ||||
|            max.vec[0], max.vec[1], max.vec[2]); | ||||
|            minf[0], minf[1], minf[2], | ||||
|            maxf[0], minf[1], minf[2], | ||||
|            minf[0], maxf[1], minf[2], | ||||
|            maxf[0], maxf[1], minf[2], | ||||
|            minf[0], minf[1], maxf[2], | ||||
|            maxf[0], minf[1], maxf[2], | ||||
|            minf[0], maxf[1], maxf[2], | ||||
|            maxf[0], maxf[1], maxf[2]); | ||||
| } | ||||
| 
 | ||||
| void PyOutStream::centerView() | ||||
| @ -2333,9 +2335,9 @@ DataStream::getBoneMatrices(std::string_view name) | ||||
|             { | ||||
|                 float val; | ||||
|                 m_parent->_readBuf(&val, 4); | ||||
|                 matOut[i].vec[j] = val; | ||||
|                 matOut[i].simd[j] = val; | ||||
|             } | ||||
|             reinterpret_cast<atVec4f&>(matOut[i]).vec[3] = 0.f; | ||||
|             reinterpret_cast<atVec4f&>(matOut[i]).simd[3] = 0.f; | ||||
|         } | ||||
| 
 | ||||
|         ret.emplace(std::make_pair(std::move(name), std::move(matOut))); | ||||
| @ -2355,8 +2357,8 @@ bool DataStream::renderPvs(std::string_view path, const atVec3f& location) | ||||
|                           m_parent->getBlendPath().getAbsolutePath().data()); | ||||
| 
 | ||||
|     char req[256]; | ||||
|     snprintf(req, 256, "RENDERPVS %s %f %f %f", path.data(), | ||||
|              location.vec[0], location.vec[1], location.vec[2]); | ||||
|     athena::simd_floats f(location.simd); | ||||
|     snprintf(req, 256, "RENDERPVS %s %f %f %f", path.data(), f[0], f[1], f[2]); | ||||
|     m_parent->_writeStr(req); | ||||
| 
 | ||||
|     char readBuf[256]; | ||||
|  | ||||
| @ -11,18 +11,30 @@ namespace hecl::blender | ||||
| atVec3f MtxVecMul4RM(const Matrix4f& mtx, const Vector3f& vec) | ||||
| { | ||||
|     atVec3f res; | ||||
|     res.vec[0] = mtx[0].vec[0] * vec.val.vec[0] + mtx[0].vec[1] * vec.val.vec[1] + mtx[0].vec[2] * vec.val.vec[2] + mtx[0].vec[3]; | ||||
|     res.vec[1] = mtx[1].vec[0] * vec.val.vec[0] + mtx[1].vec[1] * vec.val.vec[1] + mtx[1].vec[2] * vec.val.vec[2] + mtx[1].vec[3]; | ||||
|     res.vec[2] = mtx[2].vec[0] * vec.val.vec[0] + mtx[2].vec[1] * vec.val.vec[1] + mtx[2].vec[2] * vec.val.vec[2] + mtx[2].vec[3]; | ||||
|     athena::simd_floats resf; | ||||
|     athena::simd_floats mtxf[3]; | ||||
|     for (int i = 0; i < 3; ++i) | ||||
|         mtx[i].simd.copy_to(mtxf[i]); | ||||
|     athena::simd_floats vecf(vec.val.simd); | ||||
|     resf[0] = mtxf[0][0] * vecf[0] + mtxf[0][1] * vecf[1] + mtxf[0][2] * vecf[2] + mtxf[0][3]; | ||||
|     resf[1] = mtxf[1][0] * vecf[0] + mtxf[1][1] * vecf[1] + mtxf[1][2] * vecf[2] + mtxf[1][3]; | ||||
|     resf[2] = mtxf[2][0] * vecf[0] + mtxf[2][1] * vecf[1] + mtxf[2][2] * vecf[2] + mtxf[2][3]; | ||||
|     res.simd.copy_from(resf); | ||||
|     return res; | ||||
| } | ||||
| 
 | ||||
| atVec3f MtxVecMul3RM(const Matrix4f& mtx, const Vector3f& vec) | ||||
| { | ||||
|     atVec3f res; | ||||
|     res.vec[0] = mtx[0].vec[0] * vec.val.vec[0] + mtx[0].vec[1] * vec.val.vec[1] + mtx[0].vec[2] * vec.val.vec[2]; | ||||
|     res.vec[1] = mtx[1].vec[0] * vec.val.vec[0] + mtx[1].vec[1] * vec.val.vec[1] + mtx[1].vec[2] * vec.val.vec[2]; | ||||
|     res.vec[2] = mtx[2].vec[0] * vec.val.vec[0] + mtx[2].vec[1] * vec.val.vec[1] + mtx[2].vec[2] * vec.val.vec[2]; | ||||
|     athena::simd_floats resf; | ||||
|     athena::simd_floats mtxf[3]; | ||||
|     for (int i = 0; i < 3; ++i) | ||||
|         mtx[i].simd.copy_to(mtxf[i]); | ||||
|     athena::simd_floats vecf(vec.val.simd); | ||||
|     resf[0] = mtxf[0][0] * vecf[0] + mtxf[0][1] * vecf[1] + mtxf[0][2] * vecf[2]; | ||||
|     resf[1] = mtxf[1][0] * vecf[0] + mtxf[1][1] * vecf[1] + mtxf[1][2] * vecf[2]; | ||||
|     resf[2] = mtxf[2][0] * vecf[0] + mtxf[2][1] * vecf[1] + mtxf[2][2] * vecf[2]; | ||||
|     res.simd.copy_from(resf); | ||||
|     return res; | ||||
| } | ||||
| 
 | ||||
| @ -113,15 +125,11 @@ HMDLBuffers Mesh::getHMDLBuffers(bool absoluteCoords, PoolSkinIndex& poolSkinInd | ||||
|             vboW.writeVec3fLittle(preXfPos); | ||||
| 
 | ||||
|             atVec3f preXfNorm = MtxVecMul3RM(sceneXf, norm[v.iNorm]); | ||||
|             float mag = | ||||
|                 preXfNorm.vec[0] * preXfNorm.vec[0] + | ||||
|                 preXfNorm.vec[1] * preXfNorm.vec[1] + | ||||
|                 preXfNorm.vec[2] * preXfNorm.vec[2]; | ||||
|             athena::simd_floats f(preXfNorm.simd * preXfNorm.simd); | ||||
|             float mag = f[0] + f[1] + f[2]; | ||||
|             if (mag > FLT_EPSILON) | ||||
|                 mag = 1.f / std::sqrt(mag); | ||||
|             preXfNorm.vec[0] *= mag; | ||||
|             preXfNorm.vec[1] *= mag; | ||||
|             preXfNorm.vec[2] *= mag; | ||||
|             preXfNorm.simd *= mag; | ||||
|             vboW.writeVec3fLittle(preXfNorm); | ||||
|         } | ||||
|         else | ||||
| @ -133,9 +141,10 @@ HMDLBuffers Mesh::getHMDLBuffers(bool absoluteCoords, PoolSkinIndex& poolSkinInd | ||||
|         for (size_t i=0 ; i<colorLayerCount ; ++i) | ||||
|         { | ||||
|             const Vector3f& c = color[v.iColor[i]]; | ||||
|             vboW.writeUByte(std::max(0, std::min(255, int(c.val.vec[0] * 255)))); | ||||
|             vboW.writeUByte(std::max(0, std::min(255, int(c.val.vec[1] * 255)))); | ||||
|             vboW.writeUByte(std::max(0, std::min(255, int(c.val.vec[2] * 255)))); | ||||
|             athena::simd_floats f(c.val.simd); | ||||
|             vboW.writeUByte(std::max(0, std::min(255, int(f[0] * 255)))); | ||||
|             vboW.writeUByte(std::max(0, std::min(255, int(f[1] * 255)))); | ||||
|             vboW.writeUByte(std::max(0, std::min(255, int(f[2] * 255)))); | ||||
|             vboW.writeUByte(255); | ||||
|         } | ||||
| 
 | ||||
| @ -158,7 +167,7 @@ HMDLBuffers Mesh::getHMDLBuffers(bool absoluteCoords, PoolSkinIndex& poolSkinInd | ||||
|                     for (const SkinBind& bind : binds) | ||||
|                         if (bind.boneIdx == *it) | ||||
|                         { | ||||
|                             vec.vec[j] = bind.weight; | ||||
|                             vec.simd[j] = bind.weight; | ||||
|                             break; | ||||
|                         } | ||||
|                     ++it; | ||||
|  | ||||
| @ -144,7 +144,9 @@ atVec4f CVar::toVec4f(bool* isValid) const | ||||
|         *isValid = true; | ||||
| 
 | ||||
|     atVec4f vec; | ||||
|     std::sscanf(m_value.c_str(), "%f %f %f %f", &vec.vec[0], &vec.vec[1], &vec.vec[2], &vec.vec[3]); | ||||
|     athena::simd_floats f; | ||||
|     std::sscanf(m_value.c_str(), "%f %f %f %f", &f[0], &f[1], &f[2], &f[3]); | ||||
|     vec.simd.copy_from(f); | ||||
| 
 | ||||
|     return vec; | ||||
| } | ||||
| @ -247,7 +249,8 @@ bool CVar::fromVec4f(const atVec4f& val) | ||||
|     if (isReadOnly() && (com_developer && !com_developer->toBoolean())) | ||||
|         return false; | ||||
| 
 | ||||
|     m_value.assign(hecl::Format("%f %f %f %f", val.vec[0], val.vec[1], val.vec[2], val.vec[3])); | ||||
|     athena::simd_floats f(val.simd); | ||||
|     m_value.assign(hecl::Format("%f %f %f %f", f[0], f[1], f[2], f[3])); | ||||
|     m_flags |= EFlags::Modified; | ||||
|     return true; | ||||
| } | ||||
| @ -378,7 +381,9 @@ bool CVar::fromLiteralToType(std::string_view val, bool setDefault) | ||||
|     case EType::Vec4f: | ||||
|     { | ||||
|         atVec4f vec; | ||||
|         std::sscanf(val.data(), "%f %f %f %f", &vec.vec[0], &vec.vec[1], &vec.vec[2], &vec.vec[3]); | ||||
|         athena::simd_floats f; | ||||
|         std::sscanf(val.data(), "%f %f %f %f", &f[0], &f[1], &f[2], &f[3]); | ||||
|         vec.simd.copy_from(f); | ||||
|         return fromVec4f(vec); | ||||
|     } | ||||
|     } | ||||
| @ -419,7 +424,9 @@ bool CVar::fromLiteralToType(std::wstring_view val, bool setDefault) | ||||
|     case EType::Vec4f: | ||||
|     { | ||||
|         atVec4f vec; | ||||
|         std::swscanf(val.data(), L"%f %f %f %f", &vec.vec[0], &vec.vec[1], &vec.vec[2], &vec.vec[3]); | ||||
|         athena::simd_floats f; | ||||
|         std::swscanf(val.data(), L"%f %f %f %f", &f[0], &f[1], &f[2], &f[3]); | ||||
|         vec.simd.copy_from(f); | ||||
|         return fromVec4f(vec); | ||||
|     } | ||||
|     } | ||||
|  | ||||
| @ -104,12 +104,14 @@ int IR::addInstruction(const IRNode& n, IR::RegID target) | ||||
|             atVec4f vec = {}; | ||||
|             auto it = n.children.cbegin(); | ||||
|             int i; | ||||
|             athena::simd_floats f; | ||||
|             for (i=0 ; i<3 ; ++i, ++it) | ||||
|             { | ||||
|                 if (it->kind != IRNode::Kind::Imm) | ||||
|                     break; | ||||
|                 vec.vec[i] = it->val; | ||||
|                 f[i] = it->val; | ||||
|             } | ||||
|             vec.simd.copy_from(f); | ||||
|             if (i == 3) | ||||
|             { | ||||
|                 m_instructions.emplace_back(OpType::LoadImm, target, n.loc); | ||||
| @ -123,12 +125,14 @@ int IR::addInstruction(const IRNode& n, IR::RegID target) | ||||
|             atVec4f vec = {}; | ||||
|             auto it = n.children.cbegin(); | ||||
|             int i; | ||||
|             athena::simd_floats f; | ||||
|             for (i=0 ; i<4 ; ++i, ++it) | ||||
|             { | ||||
|                 if (it->kind != IRNode::Kind::Imm) | ||||
|                     break; | ||||
|                 vec.vec[i] = it->val; | ||||
|                 f[i] = it->val; | ||||
|             } | ||||
|             vec.simd.copy_from(f); | ||||
|             if (i == 4) | ||||
|             { | ||||
|                 m_instructions.emplace_back(OpType::LoadImm, target, n.loc); | ||||
| @ -154,10 +158,7 @@ int IR::addInstruction(const IRNode& n, IR::RegID target) | ||||
|     { | ||||
|         m_instructions.emplace_back(OpType::LoadImm, target, n.loc); | ||||
|         Instruction::LoadImm& inst = m_instructions.back().m_loadImm; | ||||
|         inst.m_immVec.vec[0] = n.val; | ||||
|         inst.m_immVec.vec[1] = n.val; | ||||
|         inst.m_immVec.vec[2] = n.val; | ||||
|         inst.m_immVec.vec[3] = n.val; | ||||
|         inst.m_immVec.simd = athena::simd<float>(n.val); | ||||
|         return m_instructions.size() - 1; | ||||
|     } | ||||
|     case IRNode::Kind::Binop: | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user