mirror of https://github.com/AxioDL/zeus.git
Remove SSELegacy.cpp and add validateCPU()
This commit is contained in:
parent
4134568660
commit
b359ff96d0
|
@ -27,40 +27,8 @@ set(SOURCES
|
||||||
src/CAABox.cpp
|
src/CAABox.cpp
|
||||||
src/CEulerAngles.cpp)
|
src/CEulerAngles.cpp)
|
||||||
|
|
||||||
# SSELegacy.cpp compiled separately to escape the effects of link-time optimization
|
|
||||||
if(NOT MSVC)
|
|
||||||
set_source_files_properties(${SOURCES} PROPERTIES COMPILE_FLAGS "-msse4.1 -msse4.2")
|
|
||||||
if(CUSTOM_FLAGS)
|
|
||||||
string(REPLACE "-flto=thin" "" CUSTOM_FLAGS ${CMAKE_CXX_FLAGS})
|
|
||||||
endif(CUSTOM_FLAGS)
|
|
||||||
if (CMAKE_OSX_SYSROOT)
|
|
||||||
set(CUSTOM_FLAGS "${CUSTOM_FLAGS} -isysroot ${CMAKE_OSX_SYSROOT}")
|
|
||||||
endif()
|
|
||||||
if (CMAKE_OSX_DEPLOYMENT_TARGET AND NOT CMAKE_OSX_DEPLOYMENT_TARGET STREQUAL "")
|
|
||||||
set(CUSTOM_FLAGS "${CUSTOM_FLAGS} -mmacosx-version-min=${CMAKE_OSX_DEPLOYMENT_TARGET} -O3 -msse3 -std=c++14")
|
|
||||||
else()
|
|
||||||
set(CUSTOM_FLAGS "${CUSTOM_FLAGS} -O3 -msse3 -std=c++14")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
set(SSE_LEGACY_OBJ ${CMAKE_CURRENT_BINARY_DIR}/SSELegacy.o)
|
|
||||||
separate_arguments(CUSTOM_FLAGS UNIX_COMMAND ${CUSTOM_FLAGS})
|
|
||||||
add_custom_command(
|
|
||||||
OUTPUT ${SSE_LEGACY_OBJ}
|
|
||||||
COMMAND ${CMAKE_CXX_COMPILER}
|
|
||||||
ARGS ${CUSTOM_FLAGS} -c ${CMAKE_CURRENT_SOURCE_DIR}/src/SSELegacy.cpp
|
|
||||||
-o ${SSE_LEGACY_OBJ}
|
|
||||||
-I ${CMAKE_CURRENT_SOURCE_DIR}/include
|
|
||||||
-I ${ATHENA_INCLUDE_DIR}
|
|
||||||
MAIN_DEPENDENCY src/SSELegacy.cpp)
|
|
||||||
|
|
||||||
else()
|
|
||||||
set(SSE_LEGACY_OBJ src/SSELegacy.cpp)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
add_library(zeus
|
add_library(zeus
|
||||||
${SOURCES}
|
${SOURCES}
|
||||||
${SSE_LEGACY_OBJ}
|
|
||||||
|
|
||||||
include/zeus/Math.hpp
|
include/zeus/Math.hpp
|
||||||
include/zeus/CQuaternion.hpp
|
include/zeus/CQuaternion.hpp
|
||||||
include/zeus/CMatrix3f.hpp
|
include/zeus/CMatrix3f.hpp
|
||||||
|
|
|
@ -48,6 +48,7 @@ struct CPUInfo
|
||||||
*/
|
*/
|
||||||
void detectCPU();
|
void detectCPU();
|
||||||
const CPUInfo& cpuFeatures();
|
const CPUInfo& cpuFeatures();
|
||||||
|
std::pair<bool, const CPUInfo&> validateCPU();
|
||||||
class CVector3f;
|
class CVector3f;
|
||||||
class CVector2f;
|
class CVector2f;
|
||||||
class CTransform;
|
class CTransform;
|
||||||
|
|
|
@ -33,11 +33,6 @@ CMatrix3f::CMatrix3f(const CQuaternion& quat)
|
||||||
void CMatrix3f::transpose()
|
void CMatrix3f::transpose()
|
||||||
{
|
{
|
||||||
#if __SSE__
|
#if __SSE__
|
||||||
if (!cpuFeatures().SSE41)
|
|
||||||
{
|
|
||||||
transposeSSE3();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
__m128 zero = _mm_xor_ps(vec[0].mVec128, vec[0].mVec128);
|
__m128 zero = _mm_xor_ps(vec[0].mVec128, vec[0].mVec128);
|
||||||
__m128 T0 = _mm_unpacklo_ps(vec[0].mVec128, vec[1].mVec128);
|
__m128 T0 = _mm_unpacklo_ps(vec[0].mVec128, vec[1].mVec128);
|
||||||
__m128 T2 = _mm_unpacklo_ps(vec[2].mVec128, zero);
|
__m128 T2 = _mm_unpacklo_ps(vec[2].mVec128, zero);
|
||||||
|
@ -66,8 +61,6 @@ void CMatrix3f::transpose()
|
||||||
CMatrix3f CMatrix3f::transposed() const
|
CMatrix3f CMatrix3f::transposed() const
|
||||||
{
|
{
|
||||||
#if __SSE__
|
#if __SSE__
|
||||||
if (!cpuFeatures().SSE41)
|
|
||||||
return transposedSSE3();
|
|
||||||
__m128 zero = _mm_xor_ps(vec[0].mVec128, vec[0].mVec128);
|
__m128 zero = _mm_xor_ps(vec[0].mVec128, vec[0].mVec128);
|
||||||
__m128 T0 = _mm_unpacklo_ps(vec[0].mVec128, vec[1].mVec128);
|
__m128 T0 = _mm_unpacklo_ps(vec[0].mVec128, vec[1].mVec128);
|
||||||
__m128 T2 = _mm_unpacklo_ps(vec[2].mVec128, zero);
|
__m128 T2 = _mm_unpacklo_ps(vec[2].mVec128, zero);
|
||||||
|
|
|
@ -9,8 +9,6 @@ CMatrix4f CMatrix4f::transposed() const
|
||||||
{
|
{
|
||||||
CMatrix4f ret;
|
CMatrix4f ret;
|
||||||
#if __SSE__
|
#if __SSE__
|
||||||
if (!cpuFeatures().SSE41)
|
|
||||||
return transposedSSE3();
|
|
||||||
__m128 T0 = _mm_unpacklo_ps(vec[0].mVec128, vec[1].mVec128);
|
__m128 T0 = _mm_unpacklo_ps(vec[0].mVec128, vec[1].mVec128);
|
||||||
__m128 T2 = _mm_unpacklo_ps(vec[2].mVec128, vec[3].mVec128);
|
__m128 T2 = _mm_unpacklo_ps(vec[2].mVec128, vec[3].mVec128);
|
||||||
__m128 T1 = _mm_unpackhi_ps(vec[0].mVec128, vec[1].mVec128);
|
__m128 T1 = _mm_unpackhi_ps(vec[0].mVec128, vec[1].mVec128);
|
||||||
|
|
|
@ -48,7 +48,7 @@ CQuaternion CQuaternion::operator*(const CQuaternion& q) const
|
||||||
|
|
||||||
CNUQuaternion CNUQuaternion::operator*(const CNUQuaternion& q) const
|
CNUQuaternion CNUQuaternion::operator*(const CNUQuaternion& q) const
|
||||||
{
|
{
|
||||||
return CQuaternion(w * q.w - CVector3f(x, y, z).dot({q.x, q.y, q.z}),
|
return CNUQuaternion(w * q.w - CVector3f(x, y, z).dot({q.x, q.y, q.z}),
|
||||||
y * q.z - z * q.y + w * q.x + x * q.w,
|
y * q.z - z * q.y + w * q.x + x * q.w,
|
||||||
z * q.x - x * q.z + w * q.y + y * q.w,
|
z * q.x - x * q.z + w * q.y + y * q.w,
|
||||||
x * q.y - y * q.x + w * q.z + z * q.w);
|
x * q.y - y * q.x + w * q.z + z * q.w);
|
||||||
|
|
61
src/Math.cpp
61
src/Math.cpp
|
@ -12,7 +12,8 @@ namespace zeus
|
||||||
{
|
{
|
||||||
|
|
||||||
static bool isCPUInit = false;
|
static bool isCPUInit = false;
|
||||||
static CPUInfo g_cpuFeatures;
|
static CPUInfo g_cpuFeatures = {};
|
||||||
|
static CPUInfo g_missingFeatures = {};
|
||||||
|
|
||||||
void getCpuInfo(int level, int regs[4])
|
void getCpuInfo(int level, int regs[4])
|
||||||
{
|
{
|
||||||
|
@ -68,6 +69,64 @@ void detectCPU()
|
||||||
|
|
||||||
const CPUInfo& cpuFeatures() { detectCPU(); return g_cpuFeatures; }
|
const CPUInfo& cpuFeatures() { detectCPU(); return g_cpuFeatures; }
|
||||||
|
|
||||||
|
std::pair<bool, const CPUInfo&> validateCPU()
|
||||||
|
{
|
||||||
|
detectCPU();
|
||||||
|
bool ret = true;
|
||||||
|
|
||||||
|
#if __SSE4A__
|
||||||
|
if (!g_cpuFeatures.SSE4a)
|
||||||
|
{
|
||||||
|
*(bool*) &g_missingFeatures.SSE4a = true;
|
||||||
|
ret = false;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#if __SSE4_2__
|
||||||
|
if (!g_cpuFeatures.SSE42)
|
||||||
|
{
|
||||||
|
*(bool*) &g_missingFeatures.SSE42 = true;
|
||||||
|
ret = false;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#if __SSE4_1__
|
||||||
|
if (!g_cpuFeatures.SSE41)
|
||||||
|
{
|
||||||
|
*(bool*) &g_missingFeatures.SSE41 = true;
|
||||||
|
ret = false;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#if __SSSE3__
|
||||||
|
if (!g_cpuFeatures.SSSE3)
|
||||||
|
{
|
||||||
|
*(bool*) &g_missingFeatures.SSSE3 = true;
|
||||||
|
ret = false;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#if __SSE3__
|
||||||
|
if (!g_cpuFeatures.SSE3)
|
||||||
|
{
|
||||||
|
*(bool*) &g_missingFeatures.SSE3 = true;
|
||||||
|
ret = false;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#if __SSE2__
|
||||||
|
if (!g_cpuFeatures.SSE2)
|
||||||
|
{
|
||||||
|
*(bool*) &g_missingFeatures.SSE2 = true;
|
||||||
|
ret = false;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#if __SSE__
|
||||||
|
if (!g_cpuFeatures.SSE1)
|
||||||
|
{
|
||||||
|
*(bool*) &g_missingFeatures.SSE1 = true;
|
||||||
|
ret = false;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return {ret, g_missingFeatures};
|
||||||
|
}
|
||||||
|
|
||||||
CTransform lookAt(const CVector3f& pos, const CVector3f& lookPos, const CVector3f& up)
|
CTransform lookAt(const CVector3f& pos, const CVector3f& lookPos, const CVector3f& up)
|
||||||
{
|
{
|
||||||
CVector3f vLook, vRight, vUp;
|
CVector3f vLook, vRight, vUp;
|
||||||
|
|
|
@ -1,99 +0,0 @@
|
||||||
#include "zeus/CMatrix3f.hpp"
|
|
||||||
#include "zeus/CMatrix4f.hpp"
|
|
||||||
|
|
||||||
namespace zeus
|
|
||||||
{
|
|
||||||
|
|
||||||
void CMatrix3f::transposeSSE3()
|
|
||||||
{
|
|
||||||
#if __SSE__
|
|
||||||
__m128 zero = _mm_xor_ps(vec[0].mVec128, vec[0].mVec128);
|
|
||||||
__m128 T0 = _mm_unpacklo_ps(vec[0].mVec128, vec[1].mVec128);
|
|
||||||
__m128 T2 = _mm_unpacklo_ps(vec[2].mVec128, zero);
|
|
||||||
__m128 T1 = _mm_unpackhi_ps(vec[0].mVec128, vec[1].mVec128);
|
|
||||||
__m128 T3 = _mm_unpackhi_ps(vec[2].mVec128, zero);
|
|
||||||
vec[0].mVec128 = _mm_movelh_ps(T0, T2);
|
|
||||||
vec[1].mVec128 = _mm_movehl_ps(T2, T0);
|
|
||||||
vec[2].mVec128 = _mm_movelh_ps(T1, T3);
|
|
||||||
#else
|
|
||||||
float tmp;
|
|
||||||
|
|
||||||
tmp = m[0][1];
|
|
||||||
m[0][1] = m[1][0];
|
|
||||||
m[1][0] = tmp;
|
|
||||||
|
|
||||||
tmp = m[0][2];
|
|
||||||
m[0][2] = m[2][0];
|
|
||||||
m[2][0] = tmp;
|
|
||||||
|
|
||||||
tmp = m[1][2];
|
|
||||||
m[1][2] = m[2][1];
|
|
||||||
m[2][1] = tmp;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
CMatrix3f CMatrix3f::transposedSSE3() const
|
|
||||||
{
|
|
||||||
#if __SSE__
|
|
||||||
__m128 zero = _mm_xor_ps(vec[0].mVec128, vec[0].mVec128);
|
|
||||||
__m128 T0 = _mm_unpacklo_ps(vec[0].mVec128, vec[1].mVec128);
|
|
||||||
__m128 T2 = _mm_unpacklo_ps(vec[2].mVec128, zero);
|
|
||||||
__m128 T1 = _mm_unpackhi_ps(vec[0].mVec128, vec[1].mVec128);
|
|
||||||
__m128 T3 = _mm_unpackhi_ps(vec[2].mVec128, zero);
|
|
||||||
return CMatrix3f(_mm_movelh_ps(T0, T2), _mm_movehl_ps(T2, T0), _mm_movelh_ps(T1, T3));
|
|
||||||
#else
|
|
||||||
CMatrix3f ret(*this);
|
|
||||||
float tmp;
|
|
||||||
|
|
||||||
tmp = ret.m[0][1];
|
|
||||||
ret.m[0][1] = ret.m[1][0];
|
|
||||||
ret.m[1][0] = tmp;
|
|
||||||
|
|
||||||
tmp = m[0][2];
|
|
||||||
ret.m[0][2] = ret.m[2][0];
|
|
||||||
ret.m[2][0] = tmp;
|
|
||||||
|
|
||||||
tmp = m[1][2];
|
|
||||||
ret.m[1][2] = ret.m[2][1];
|
|
||||||
ret.m[2][1] = tmp;
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
CMatrix4f CMatrix4f::transposedSSE3() const
|
|
||||||
{
|
|
||||||
CMatrix4f ret;
|
|
||||||
#if __SSE__
|
|
||||||
__m128 T0 = _mm_unpacklo_ps(vec[0].mVec128, vec[1].mVec128);
|
|
||||||
__m128 T2 = _mm_unpacklo_ps(vec[2].mVec128, vec[3].mVec128);
|
|
||||||
__m128 T1 = _mm_unpackhi_ps(vec[0].mVec128, vec[1].mVec128);
|
|
||||||
__m128 T3 = _mm_unpackhi_ps(vec[2].mVec128, vec[3].mVec128);
|
|
||||||
ret.vec[0].mVec128 = _mm_movelh_ps(T0, T2);
|
|
||||||
ret.vec[1].mVec128 = _mm_movehl_ps(T2, T0);
|
|
||||||
ret.vec[2].mVec128 = _mm_movelh_ps(T1, T3);
|
|
||||||
ret.vec[3].mVec128 = _mm_movehl_ps(T3, T1);
|
|
||||||
#else
|
|
||||||
ret.m[0][0] = m[0][0];
|
|
||||||
ret.m[1][0] = m[0][1];
|
|
||||||
ret.m[2][0] = m[0][2];
|
|
||||||
ret.m[3][0] = m[0][3];
|
|
||||||
|
|
||||||
ret.m[0][1] = m[1][0];
|
|
||||||
ret.m[1][1] = m[1][1];
|
|
||||||
ret.m[2][1] = m[1][2];
|
|
||||||
ret.m[3][1] = m[1][3];
|
|
||||||
|
|
||||||
ret.m[0][2] = m[2][0];
|
|
||||||
ret.m[1][2] = m[2][1];
|
|
||||||
ret.m[2][2] = m[2][2];
|
|
||||||
ret.m[3][2] = m[2][3];
|
|
||||||
|
|
||||||
ret.m[0][3] = m[3][0];
|
|
||||||
ret.m[1][3] = m[3][1];
|
|
||||||
ret.m[2][3] = m[3][2];
|
|
||||||
ret.m[3][3] = m[3][3];
|
|
||||||
#endif
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
}
|
|
Loading…
Reference in New Issue