From 271df619e5b43d4479f766da7915628dd4d27d7a Mon Sep 17 00:00:00 2001
From: Phillip Stephens <antidote.crk@gmail.com>
Date: Wed, 6 May 2015 00:05:06 -0700
Subject: [PATCH] * Make a proper Matrix4 implementation (WIP) * Add CVector4f
 * Add CRectangle (for 2D AABBs)

---
 CMatrix3f.hpp    |   8 +-
 CMatrix4f.cpp    |   3 +
 CMatrix4f.hpp    | 179 ++++++++++++++++++++++++
 CProjection.cpp  |   8 --
 CProjection.hpp  | 114 +--------------
 CRectangle.cpp   |   1 +
 CRectangle.hpp   |  32 +++++
 CTransform.hpp   |  12 +-
 CVector2f.cpp    |   4 +-
 CVector2f.hpp    |  50 ++++++-
 CVector3d.cpp    | 108 ---------------
 CVector3d.hpp    |  45 ------
 CVector3f.hpp    |  21 +--
 CVector4f.cpp    |   2 +
 CVector4f.hpp    | 354 +++++++++++++++++++++++++++++++++++++++++++++++
 MathLib.hpp      |   4 +-
 MathLib.pri      |  13 +-
 TVectorUnion.hpp |  13 ++
 18 files changed, 672 insertions(+), 299 deletions(-)
 create mode 100644 CMatrix4f.cpp
 create mode 100644 CMatrix4f.hpp
 create mode 100644 CRectangle.cpp
 create mode 100644 CRectangle.hpp
 delete mode 100644 CVector3d.cpp
 delete mode 100644 CVector3d.hpp
 create mode 100644 CVector4f.cpp
 create mode 100644 CVector4f.hpp
 create mode 100644 TVectorUnion.hpp

diff --git a/CMatrix3f.hpp b/CMatrix3f.hpp
index e1d4e8d..7566864 100644
--- a/CMatrix3f.hpp
+++ b/CMatrix3f.hpp
@@ -38,13 +38,13 @@ public:
         m[1][1] = scaleVec[1];
         m[2][2] = scaleVec[2];
     }
-    CMatrix3f(const CVector3f& u, const CVector3f& m, const CVector3f& w)
-    {vec[0] = u; vec[1] = m; vec[2] = w;}
+    CMatrix3f(const CVector3f& r0, const CVector3f& r1, const CVector3f& r2)
+    {vec[0] = r0; vec[1] = r1; vec[2] = r2;}
     CMatrix3f(const CMatrix3f& other)
     {vec[0] = other.vec[0]; vec[1] = other.vec[1]; vec[2] = other.vec[2];}
 #if __SSE__
-    CMatrix3f(const __m128& u, const __m128& m, const __m128& w)
-    {vec[0].mVec128 = u; vec[1].mVec128 = m; vec[2].mVec128 = w;}
+    CMatrix3f(const __m128& r0, const __m128& r1, const __m128& r2)
+    {vec[0].mVec128 = r0; vec[1].mVec128 = r1; vec[2].mVec128 = r2;}
 #endif
     CMatrix3f(const CVector3f& axis, float angle);
     CMatrix3f(const CQuaternion& quat);
diff --git a/CMatrix4f.cpp b/CMatrix4f.cpp
new file mode 100644
index 0000000..ef78109
--- /dev/null
+++ b/CMatrix4f.cpp
@@ -0,0 +1,3 @@
+#include "CMatrix4f.hpp"
+
+const CMatrix4f CMatrix4f::skIdentityMatrix4f = CMatrix4f();
diff --git a/CMatrix4f.hpp b/CMatrix4f.hpp
new file mode 100644
index 0000000..6de46ae
--- /dev/null
+++ b/CMatrix4f.hpp
@@ -0,0 +1,179 @@
+#ifndef CMATRIX4F
+#define CMATRIX4F
+#include "CMatrix3f.hpp"
+#include "CVector4f.hpp"
+#include "CVector3f.hpp"
+
+class ZE_ALIGN(16) CMatrix4f
+{
+public:
+    static const CMatrix4f skIdentityMatrix4f;
+    ZE_DECLARE_ALIGNED_ALLOCATOR();
+    explicit CMatrix4f(bool zero = false)
+    {
+        memset(m, 0, sizeof(m));
+        if (!zero)
+        {
+            m[0][0] = 1.0;
+            m[1][1] = 1.0;
+            m[2][2] = 1.0;
+            m[3][3] = 1.0;
+        }
+    }
+    CMatrix4f(float m00, float m01, float m02, float m03,
+              float m10, float m11, float m12, float m13,
+              float m20, float m21, float m22, float m23,
+              float m30, float m31, float m32, float m33)
+    {
+        m[0][0] = m00, m[1][0] = m01, m[2][0] = m02, m[3][0] = m03;
+        m[0][1] = m10, m[1][1] = m11, m[2][1] = m12, m[3][1] = m13;
+        m[0][2] = m20, m[1][2] = m21, m[2][2] = m22, m[3][2] = m23;
+        m[0][3] = m30, m[1][3] = m31, m[2][3] = m32, m[3][3] = m33;
+    }
+    CMatrix4f(const CVector3f& scaleVec)
+    {
+        memset(m, 0, sizeof(m));
+        m[0][0] = scaleVec[0];
+        m[1][1] = scaleVec[1];
+        m[2][2] = scaleVec[2];
+        m[3][3] = 1.0f;
+    }
+    CMatrix4f(const CVector4f& r0, const CVector4f& r1, const CVector4f& r2, const CVector4f& r3)
+    {vec[0] = r0; vec[1] = r1; vec[2] = r2; vec[3] = r3;}
+    CMatrix4f(const CMatrix4f& other)
+    {vec[0] = other.vec[0]; vec[1] = other.vec[1]; vec[2] = other.vec[2]; vec[3] = other.vec[3];}
+#if __SSE__
+    CMatrix4f(const __m128& r0, const __m128& r1, const __m128& r2, const __m128& r3)
+    {vec[0].mVec128 = r0; vec[1].mVec128 = r1; vec[2].mVec128 = r2; vec[3].mVec128 = r3;}
+#endif
+    CMatrix4f(const CMatrix3f& other)
+    {
+        memset(m, 0, sizeof(m));
+        vec[0] = other.vec[0];
+        vec[1] = other.vec[1];
+        vec[2] = other.vec[2];
+        vec[3] = CVector4f(0, 0, 0, 1.0f);
+    }
+    inline CMatrix4f& operator=(const CMatrix4f& other)
+    {
+        vec[0] = other.vec[0];
+        vec[1] = other.vec[1];
+        vec[2] = other.vec[2];
+        vec[3] = other.vec[3];
+        return *this;
+    }
+    inline CVector4f operator*(const CVector4f& other) const
+    {
+#if __SSE__
+        TVectorUnion res;
+        res.mVec128 = _mm_add_ps(_mm_add_ps(_mm_mul_ps(vec[0].mVec128, ze_splat_ps(other.mVec128, 0)),
+                                 _mm_mul_ps(vec[1].mVec128, ze_splat_ps(other.mVec128, 1))),
+                      _mm_add_ps(_mm_mul_ps(vec[2].mVec128, ze_splat_ps(other.mVec128, 2)),
+                                 _mm_mul_ps(vec[3].mVec128, ze_splat_ps(other.mVec128, 3))));
+
+        return CVector4f(res.mVec128);
+#else
+        return CVector3f(
+                    m[0][0] * other.v[0] + m[1][0] * other.v[1] + m[2][0] * other.v[2] + m[3][0] * other.v[3],
+                m[0][1] * other.v[0] + m[1][1] * other.v[1] + m[2][1] * other.v[2] + m[3][1] * other.v[3],
+                m[0][2] * other.v[0] + m[1][2] * other.v[1] + m[2][2] * other.v[2] + m[3][2] * other.v[3],
+                m[0][3] * other.v[0] + m[1][3] * other.v[1] + m[2][3] * other.v[2] + m[3][3] * other.v[3]);
+#endif
+    }
+
+    inline CVector4f& operator[](int i)
+    {
+        assert(0 <= i && i < 4);
+        return vec[i];
+    }
+
+    inline const CVector4f& operator[](int i) const
+    {
+        assert(0 <= i && i < 4);
+        return vec[i];
+    }
+
+    inline CMatrix4f transposed()
+    {
+        CMatrix4f ret;
+#if __SSE__
+        __m128 T0 = _mm_unpacklo_ps(vec[0].mVec128, vec[1].mVec128);
+        __m128 T2 = _mm_unpacklo_ps(vec[2].mVec128, vec[3].mVec128);
+        __m128 T1 = _mm_unpackhi_ps(vec[0].mVec128, vec[1].mVec128);
+        __m128 T3 = _mm_unpackhi_ps(vec[2].mVec128, vec[3].mVec128);
+        ret.vec[0].mVec128 = _mm_movelh_ps(T0, T2);
+        ret.vec[1].mVec128 = _mm_movehl_ps(T2, T0);
+        ret.vec[2].mVec128 = _mm_movelh_ps(T1, T3);
+        ret.vec[3].mVec128 = _mm_movehl_ps(T3, T1);
+#else
+        ret.m[0][0] = m[0][0];
+        ret.m[1][0] = m[0][1];
+        ret.m[2][0] = m[0][2];
+        ret.m[3][0] = m[0][3];
+
+        ret.m[0][1] = m[1][0];
+        ret.m[1][1] = m[1][1];
+        ret.m[2][1] = m[1][2];
+        ret.m[3][1] = m[1][3];
+
+        ret.m[0][2] = m[2][0];
+        ret.m[1][2] = m[2][1];
+        ret.m[2][2] = m[2][2];
+        ret.m[3][2] = m[2][3];
+
+        ret.m[0][3] = m[3][0];
+        ret.m[1][3] = m[3][1];
+        ret.m[2][3] = m[3][2];
+        ret.m[3][3] = m[3][3];
+#endif
+        return ret;
+    }
+
+    union
+    {
+        float m[4][4];
+        struct
+        {
+            CVector4f vec[4];
+        };
+    };
+};
+static inline CMatrix4f operator*(const CMatrix4f& lhs, const CMatrix4f& rhs)
+{
+    CMatrix4f ret;
+#if __SSE__
+    unsigned i;
+    for (i=0 ; i<4 ; ++i) {
+        ret.vec[i].mVec128 =
+                _mm_add_ps(_mm_add_ps(_mm_add_ps(
+                                          _mm_mul_ps(lhs.vec[0].mVec128, _mm_shuffle_ps(rhs.vec[i].mVec128, rhs.vec[i].mVec128, _MM_SHUFFLE(0, 0, 0, 0))),
+                                      _mm_mul_ps(lhs.vec[1].mVec128, _mm_shuffle_ps(rhs.vec[i].mVec128, rhs.vec[i].mVec128, _MM_SHUFFLE(1, 1, 1, 1)))),
+                _mm_mul_ps(lhs.vec[2].mVec128, _mm_shuffle_ps(rhs.vec[i].mVec128, rhs.vec[i].mVec128, _MM_SHUFFLE(2, 2, 2, 2)))),
+                _mm_mul_ps(lhs.vec[3].mVec128, _mm_shuffle_ps(rhs.vec[i].mVec128, rhs.vec[i].mVec128, _MM_SHUFFLE(3, 3, 3, 3))));
+    }
+#else
+    ret.m[0][0] = lhs.m[0][0]*rhs.m[0][0] + lhs.m[1][0]*rhs.m[0][1] + lhs.m[2][0]*rhs.m[0][2] + lhs.m[3][0]*rhs.m[0][3];
+    ret.m[1][0] = lhs.m[0][0]*rhs.m[1][0] + lhs.m[1][0]*rhs.m[1][1] + lhs.m[2][0]*rhs.m[1][2] + lhs.m[3][0]*rhs.m[1][3];
+    ret.m[2][0] = lhs.m[0][0]*rhs.m[2][0] + lhs.m[1][0]*rhs.m[2][1] + lhs.m[2][0]*rhs.m[2][2] + lhs.m[3][0]*rhs.m[2][3];
+    ret.m[3][0] = lhs.m[0][0]*rhs.m[3][0] + lhs.m[1][0]*rhs.m[3][1] + lhs.m[2][0]*rhs.m[3][2] + lhs.m[3][0]*rhs.m[3][3];
+
+    ret.m[0][1] = lhs.m[0][1]*rhs.m[0][0] + lhs.m[1][1]*rhs.m[0][1] + lhs.m[2][1]*rhs.m[0][2] + lhs.m[3][1]*rhs.m[0][3];
+    ret.m[1][1] = lhs.m[0][1]*rhs.m[1][0] + lhs.m[1][1]*rhs.m[1][1] + lhs.m[2][1]*rhs.m[1][2] + lhs.m[3][1]*rhs.m[1][3];
+    ret.m[2][1] = lhs.m[0][1]*rhs.m[2][0] + lhs.m[1][1]*rhs.m[2][1] + lhs.m[2][1]*rhs.m[2][2] + lhs.m[3][1]*rhs.m[2][3];
+    ret.m[3][1] = lhs.m[0][1]*rhs.m[3][0] + lhs.m[1][1]*rhs.m[3][1] + lhs.m[2][1]*rhs.m[3][2] + lhs.m[3][1]*rhs.m[3][3];
+
+    ret.m[0][2] = lhs.m[0][2]*rhs.m[0][0] + lhs.m[1][2]*rhs.m[0][1] + lhs.m[2][2]*rhs.m[0][2] + lhs.m[3][2]*rhs.m[0][3];
+    ret.m[1][2] = lhs.m[0][2]*rhs.m[1][0] + lhs.m[1][2]*rhs.m[1][1] + lhs.m[2][2]*rhs.m[1][2] + lhs.m[3][2]*rhs.m[1][3];
+    ret.m[2][2] = lhs.m[0][2]*rhs.m[2][0] + lhs.m[1][2]*rhs.m[2][1] + lhs.m[2][2]*rhs.m[2][2] + lhs.m[3][2]*rhs.m[2][3];
+    ret.m[3][2] = lhs.m[0][2]*rhs.m[3][0] + lhs.m[1][2]*rhs.m[3][1] + lhs.m[2][2]*rhs.m[3][2] + lhs.m[3][2]*rhs.m[3][3];
+
+    ret.m[0][3] = lhs.m[0][3]*rhs.m[0][0] + lhs.m[1][3]*rhs.m[0][1] + lhs.m[2][3]*rhs.m[0][2] + lhs.m[3][3]*rhs.m[0][3];
+    ret.m[1][3] = lhs.m[0][3]*rhs.m[1][0] + lhs.m[1][3]*rhs.m[1][1] + lhs.m[2][3]*rhs.m[1][2] + lhs.m[3][3]*rhs.m[1][3];
+    ret.m[2][3] = lhs.m[0][3]*rhs.m[2][0] + lhs.m[1][3]*rhs.m[2][1] + lhs.m[2][3]*rhs.m[2][2] + lhs.m[3][3]*rhs.m[2][3];
+    ret.m[3][3] = lhs.m[0][3]*rhs.m[3][0] + lhs.m[1][3]*rhs.m[3][1] + lhs.m[2][3]*rhs.m[2][2] + lhs.m[3][3]*rhs.m[3][3];
+#endif
+    return ret;
+}
+
+#endif // CMATRIX4F
+
diff --git a/CProjection.cpp b/CProjection.cpp
index 6349289..42cde8d 100644
--- a/CProjection.cpp
+++ b/CProjection.cpp
@@ -1,14 +1,6 @@
 #include "CProjection.hpp"
 #include <math.h>
 
-const TMatrix4f kIdentityMtx4 =
-{{
-    {1.0, 0.0, 0.0, 0.0},
-    {0.0, 1.0, 0.0, 0.0},
-    {0.0, 0.0, 1.0, 0.0},
-    {0.0, 0.0, 0.0, 1.0}
-}};
-
 void CProjection::_updateCachedMatrix()
 {
     if (m_projType == PROJ_ORTHO)
diff --git a/CProjection.hpp b/CProjection.hpp
index 4c441ec..e29c05d 100644
--- a/CProjection.hpp
+++ b/CProjection.hpp
@@ -2,118 +2,12 @@
 #define CPROJECTION_HPP
 
 #include "Global.hpp"
+#include "CMatrix4f.hpp"
 #include <stdexcept>
 
 #define _USE_MATH_DEFINES 1
 #include <math.h>
 
-union TMatrix4f
-{
-    float m[4][4];
-#if __SSE__
-    __m128 mVec128[4];
-#endif
-    inline TMatrix4f transposed()
-    {
-        TMatrix4f ret;
-#if __SSE__
-        __m128 T0 = _mm_unpacklo_ps(mVec128[0], mVec128[1]);
-        __m128 T2 = _mm_unpacklo_ps(mVec128[2], mVec128[3]);
-        __m128 T1 = _mm_unpackhi_ps(mVec128[0], mVec128[1]);
-        __m128 T3 = _mm_unpackhi_ps(mVec128[2], mVec128[3]);
-        ret.mVec128[0] = _mm_movelh_ps(T0, T2);
-        ret.mVec128[1] = _mm_movehl_ps(T2, T0);
-        ret.mVec128[2] = _mm_movelh_ps(T1, T3);
-        ret.mVec128[3] = _mm_movehl_ps(T3, T1);
-#else
-        ret.m[0][0] = m[0][0];
-        ret.m[1][0] = m[0][1];
-        ret.m[2][0] = m[0][2];
-        ret.m[3][0] = m[0][3];
-        
-        ret.m[0][1] = m[1][0];
-        ret.m[1][1] = m[1][1];
-        ret.m[2][1] = m[1][2];
-        ret.m[3][1] = m[1][3];
-        
-        ret.m[0][2] = m[2][0];
-        ret.m[1][2] = m[2][1];
-        ret.m[2][2] = m[2][2];
-        ret.m[3][2] = m[2][3];
-        
-        ret.m[0][3] = m[3][0];
-        ret.m[1][3] = m[3][1];
-        ret.m[2][3] = m[3][2];
-        ret.m[3][3] = m[3][3];
-#endif
-        return ret;
-    }
-    inline TMatrix4f& operator=(const TMatrix4f& other)
-    {
-#if __SSE__
-        mVec128[0] = other.mVec128[0];
-        mVec128[1] = other.mVec128[1];
-        mVec128[2] = other.mVec128[2];
-        mVec128[3] = other.mVec128[3];
-#else
-        m[0][0] = other.m[0][0];
-        m[0][1] = other.m[0][1];
-        m[0][2] = other.m[0][2];
-        m[0][3] = other.m[0][3];
-        m[1][0] = other.m[1][0];
-        m[1][1] = other.m[1][1];
-        m[1][2] = other.m[1][2];
-        m[1][3] = other.m[1][3];
-        m[2][0] = other.m[2][0];
-        m[2][1] = other.m[2][1];
-        m[2][2] = other.m[2][2];
-        m[2][3] = other.m[2][3];
-        m[3][0] = other.m[3][0];
-        m[3][1] = other.m[3][1];
-        m[3][2] = other.m[3][2];
-        m[3][3] = other.m[3][3];
-#endif
-        return *this;
-    }
-};
-static inline TMatrix4f operator*(const TMatrix4f& lhs, const TMatrix4f& rhs)
-{
-    TMatrix4f ret;
-#if __SSE__
-    unsigned i;
-    for (i=0 ; i<4 ; ++i) {
-        ret.mVec128[i] =
-        _mm_add_ps(_mm_add_ps(_mm_add_ps(
-                   _mm_mul_ps(lhs.mVec128[0], _mm_shuffle_ps(rhs.mVec128[i], rhs.mVec128[i], _MM_SHUFFLE(0, 0, 0, 0))),
-                   _mm_mul_ps(lhs.mVec128[1], _mm_shuffle_ps(rhs.mVec128[i], rhs.mVec128[i], _MM_SHUFFLE(1, 1, 1, 1)))),
-                   _mm_mul_ps(lhs.mVec128[2], _mm_shuffle_ps(rhs.mVec128[i], rhs.mVec128[i], _MM_SHUFFLE(2, 2, 2, 2)))),
-                   _mm_mul_ps(lhs.mVec128[3], _mm_shuffle_ps(rhs.mVec128[i], rhs.mVec128[i], _MM_SHUFFLE(3, 3, 3, 3))));
-    }
-#else
-    ret.m[0][0] = lhs.m[0][0]*rhs.m[0][0] + lhs.m[1][0]*rhs.m[0][1] + lhs.m[2][0]*rhs.m[0][2] + lhs.m[3][0]*rhs.m[0][3];
-    ret.m[1][0] = lhs.m[0][0]*rhs.m[1][0] + lhs.m[1][0]*rhs.m[1][1] + lhs.m[2][0]*rhs.m[1][2] + lhs.m[3][0]*rhs.m[1][3];
-    ret.m[2][0] = lhs.m[0][0]*rhs.m[2][0] + lhs.m[1][0]*rhs.m[2][1] + lhs.m[2][0]*rhs.m[2][2] + lhs.m[3][0]*rhs.m[2][3];
-    ret.m[3][0] = lhs.m[0][0]*rhs.m[3][0] + lhs.m[1][0]*rhs.m[3][1] + lhs.m[2][0]*rhs.m[3][2] + lhs.m[3][0]*rhs.m[3][3];
-    
-    ret.m[0][1] = lhs.m[0][1]*rhs.m[0][0] + lhs.m[1][1]*rhs.m[0][1] + lhs.m[2][1]*rhs.m[0][2] + lhs.m[3][1]*rhs.m[0][3];
-    ret.m[1][1] = lhs.m[0][1]*rhs.m[1][0] + lhs.m[1][1]*rhs.m[1][1] + lhs.m[2][1]*rhs.m[1][2] + lhs.m[3][1]*rhs.m[1][3];
-    ret.m[2][1] = lhs.m[0][1]*rhs.m[2][0] + lhs.m[1][1]*rhs.m[2][1] + lhs.m[2][1]*rhs.m[2][2] + lhs.m[3][1]*rhs.m[2][3];
-    ret.m[3][1] = lhs.m[0][1]*rhs.m[3][0] + lhs.m[1][1]*rhs.m[3][1] + lhs.m[2][1]*rhs.m[3][2] + lhs.m[3][1]*rhs.m[3][3];
-    
-    ret.m[0][2] = lhs.m[0][2]*rhs.m[0][0] + lhs.m[1][2]*rhs.m[0][1] + lhs.m[2][2]*rhs.m[0][2] + lhs.m[3][2]*rhs.m[0][3];
-    ret.m[1][2] = lhs.m[0][2]*rhs.m[1][0] + lhs.m[1][2]*rhs.m[1][1] + lhs.m[2][2]*rhs.m[1][2] + lhs.m[3][2]*rhs.m[1][3];
-    ret.m[2][2] = lhs.m[0][2]*rhs.m[2][0] + lhs.m[1][2]*rhs.m[2][1] + lhs.m[2][2]*rhs.m[2][2] + lhs.m[3][2]*rhs.m[2][3];
-    ret.m[3][2] = lhs.m[0][2]*rhs.m[3][0] + lhs.m[1][2]*rhs.m[3][1] + lhs.m[2][2]*rhs.m[3][2] + lhs.m[3][2]*rhs.m[3][3];
-    
-    ret.m[0][3] = lhs.m[0][3]*rhs.m[0][0] + lhs.m[1][3]*rhs.m[0][1] + lhs.m[2][3]*rhs.m[0][2] + lhs.m[3][3]*rhs.m[0][3];
-    ret.m[1][3] = lhs.m[0][3]*rhs.m[1][0] + lhs.m[1][3]*rhs.m[1][1] + lhs.m[2][3]*rhs.m[1][2] + lhs.m[3][3]*rhs.m[1][3];
-    ret.m[2][3] = lhs.m[0][3]*rhs.m[2][0] + lhs.m[1][3]*rhs.m[2][1] + lhs.m[2][3]*rhs.m[2][2] + lhs.m[3][3]*rhs.m[2][3];
-    ret.m[3][3] = lhs.m[0][3]*rhs.m[3][0] + lhs.m[1][3]*rhs.m[3][1] + lhs.m[2][3]*rhs.m[2][2] + lhs.m[3][3]*rhs.m[3][3];
-#endif
-    return ret;
-}
-extern const TMatrix4f kIdentityMtx4;
-
 enum EProjType
 {
     PROJ_NONE = 0,
@@ -145,7 +39,7 @@ public:
     {
         m_projType = PROJ_ORTHO;
         m_ortho = SProjOrtho();
-        m_mtx = kIdentityMtx4;
+        m_mtx = CMatrix4f::skIdentityMatrix4f;
     }
     CProjection(const CProjection& other) {*this = other;}
     CProjection(const SProjOrtho& ortho) {setOrtho(ortho);}
@@ -181,7 +75,7 @@ public:
         return m_persp;
     }
     
-    inline const TMatrix4f& getCachedMatrix() const {return m_mtx;}
+    inline const CMatrix4f& getCachedMatrix() const {return m_mtx;}
     
 protected:
 
@@ -207,7 +101,7 @@ protected:
     };
 
     /* Cached projection matrix */
-    TMatrix4f m_mtx;
+    CMatrix4f m_mtx;
     
 };
 
diff --git a/CRectangle.cpp b/CRectangle.cpp
new file mode 100644
index 0000000..d9d1d5b
--- /dev/null
+++ b/CRectangle.cpp
@@ -0,0 +1 @@
+#include "CRectangle.hpp"
diff --git a/CRectangle.hpp b/CRectangle.hpp
new file mode 100644
index 0000000..66f7741
--- /dev/null
+++ b/CRectangle.hpp
@@ -0,0 +1,32 @@
+#ifndef CRECTANGLE_HPP
+#define CRECTANGLE_HPP
+#include "CVector2f.hpp"
+
+class CRectangle
+{
+public:
+    CRectangle() {}
+
+    inline bool contains(const CVector2f& point) const
+    {
+        if (point.x < position.x || point.x > position.x + size.x)
+            return false;
+        if (point.y < position.y || point.y > position.y + size.y)
+            return false;
+
+        return true;
+    }
+
+    inline bool intersects(const CRectangle& rect) const
+    {
+        return !(     position.x > rect.position.x + rect.size.x ||
+                 rect.position.x >      position.x +      size.x ||
+                      position.y > rect.position.y + rect.size.y ||
+                 rect.position.y >      position.y +      size.y);
+    }
+
+    CVector2f position;
+    CVector2f size;
+};
+
+#endif // CRECTANGLE_HPP
diff --git a/CTransform.hpp b/CTransform.hpp
index 13987eb..cd2de4b 100644
--- a/CTransform.hpp
+++ b/CTransform.hpp
@@ -3,8 +3,8 @@
 
 #include "Global.hpp"
 #include "CMatrix3f.hpp"
+#include "CMatrix4f.hpp"
 #include "CVector3f.hpp"
-#include "CProjection.hpp"
 
 class ZE_ALIGN(16) CTransform
 {
@@ -27,13 +27,13 @@ public:
     inline CVector3f operator*(const CVector3f& other) const
     {return m_origin + m_basis * other;}
     
-    inline void toMatrix4f(TMatrix4f& mat) const
+    inline void toMatrix4f(CMatrix4f& mat) const
     {
 #if __SSE__
-        mat.mVec128[0] = m_basis[0].mVec128; mat.m[0][3] = 0.0f;
-        mat.mVec128[1] = m_basis[1].mVec128; mat.m[1][3] = 0.0f;
-        mat.mVec128[2] = m_basis[2].mVec128; mat.m[2][3] = 0.0f;
-        mat.mVec128[3] = m_origin.mVec128; mat.m[3][3] = 1.0f;
+        mat.vec[0].mVec128 = m_basis[0].mVec128; mat.m[0][3] = 0.0f;
+        mat.vec[1].mVec128 = m_basis[1].mVec128; mat.m[1][3] = 0.0f;
+        mat.vec[2].mVec128 = m_basis[2].mVec128; mat.m[2][3] = 0.0f;
+        mat.vec[3].mVec128 = m_origin.mVec128; mat.m[3][3] = 1.0f;
 #else
         mat.m[0][0] = m_basis[0][0]; mat.m[0][1] = m_basis[0][1]; mat.m[0][2] = m_basis[0][2]; mat.m[0][3] = 0.0f;
         mat.m[1][0] = m_basis[1][0]; mat.m[1][1] = m_basis[1][1]; mat.m[1][2] = m_basis[1][2]; mat.m[1][3] = 0.0f;
diff --git a/CVector2f.cpp b/CVector2f.cpp
index 486f42c..557a8aa 100644
--- a/CVector2f.cpp
+++ b/CVector2f.cpp
@@ -42,9 +42,7 @@ CVector2f CVector2f::slerp(const CVector2f& a, const CVector2f& b, float t)
         const double d = 1.0 / sin(theta);
         const double s0 = sin((1.0 - t) * theta);
 
-        ret.x = (float)(a.x * s0 + b.x * s1) * d;
-        ret.y = (float)(a.y * s0 + b.y * s1) * d;
-
+        ret = (a * s0 + b * s1) * d;
         return ret;
     }
     return a;
diff --git a/CVector2f.hpp b/CVector2f.hpp
index 3b1ba5d..47f961f 100644
--- a/CVector2f.hpp
+++ b/CVector2f.hpp
@@ -2,15 +2,16 @@
 #define CVECTOR2f_HPP
 
 #include "Global.hpp"
+#include "TVectorUnion.hpp"
+
 #include <Athena/IStreamReader.hpp>
 #include <math.h>
 #include <assert.h>
-#include "CVector3f.hpp"
 
 
 class ZE_ALIGN(16) CVector2f
 {
-public:
+    public:
     ZE_DECLARE_ALIGNED_ALLOCATOR();
 
     inline CVector2f() {zeroOut();}
@@ -31,6 +32,47 @@ public:
     {return (x == rhs.x && y == rhs.y);}
     inline bool operator !=(const CVector2f& rhs) const
     {return !(*this == rhs);}
+    inline bool operator <(const CVector2f& rhs) const
+    {
+#if __SSE__
+        TVectorUnion vec;
+        vec.mVec128 = _mm_cmplt_ps(mVec128, rhs.mVec128);
+        return (vec.v[0] != 0 || vec.v[1] != 0);
+#else
+        return (x < rhs.x || y < rhs.y);
+#endif
+    }
+    inline bool operator <=(const CVector2f& rhs) const
+    {
+#if __SSE__
+        TVectorUnion vec;
+        vec.mVec128 = _mm_cmple_ps(mVec128, rhs.mVec128);
+        return (vec.v[0] != 0 || vec.v[1] != 0);
+#else
+        return (x <= rhs.x || y <= rhs.y);
+#endif
+    }
+    inline bool operator >(const CVector2f& rhs) const
+    {
+#if __SSE__
+        TVectorUnion vec;
+        vec.mVec128 = _mm_cmpgt_ps(mVec128, rhs.mVec128);
+        return (vec.v[0] != 0 || vec.v[1] != 0);
+#else
+        return (x > rhs.x || y > rhs.y);
+#endif
+    }
+    inline bool operator >=(const CVector2f& rhs) const
+    {
+#if __SSE__
+        TVectorUnion vec;
+        vec.mVec128 = _mm_cmpge_ps(mVec128, rhs.mVec128);
+        return (vec.v[0] != 0 || vec.v[1] != 0);
+#else
+        return (x >= rhs.x || y >= rhs.y);
+#endif
+    }
+
     inline CVector2f operator+(const CVector2f& rhs) const
     {
 #if __SSE__
@@ -225,6 +267,10 @@ public:
     }
     static CVector2f slerp(const CVector2f& a, const CVector2f& b, float t);
 
+    inline bool isNormalized(float thresh = 0.0001f) const
+    {
+        return (length() > thresh);
+    }
 
     inline float& operator[](size_t idx) {return (&x)[idx];}
     inline const float& operator[](size_t idx) const {return (&x)[idx];}
diff --git a/CVector3d.cpp b/CVector3d.cpp
deleted file mode 100644
index 516055c..0000000
--- a/CVector3d.cpp
+++ /dev/null
@@ -1,108 +0,0 @@
-#include "CVector3d.hpp"
-#include <memory.h>
-#include <cmath>
-
-CVector3d::CVector3d()
-{
-    memset(&v, 0, sizeof(v));
-}
-
-CVector3d::CVector3d(double x, double y, double z)
-    : x(x),
-      y(y),
-      z(z)
-{
-}
-
-CVector3d::CVector3d(Athena::io::IStreamReader& input)
-{
-    x = input.readDouble();
-    y = input.readDouble();
-    z = input.readDouble();
-}
-
-CVector3d::~CVector3d()
-{
-}
-
-bool CVector3d::operator ==(const CVector3d& rhs)
-{
-    return (x == rhs.x && y == rhs.y && z == rhs.z);
-}
-
-CVector3d CVector3d::operator+(const CVector3d& rhs)
-{
-    return CVector3d(x + rhs.x, y + rhs.y, z + rhs.z);
-}
-
-CVector3d CVector3d::operator-(const CVector3d& rhs)
-{
-    return CVector3d(x - rhs.x, y - rhs.y, z - rhs.z);
-}
-
-CVector3d CVector3d::operator-() const
-{
-    return CVector3d(-x, -y, -z);
-}
-
-CVector3d CVector3d::operator*(const CVector3d& rhs)
-{
-    return CVector3d(x * rhs.x, y * rhs.y, z * rhs.z);
-}
-
-CVector3d CVector3d::operator/(const CVector3d& rhs)
-{
-    return CVector3d(x / rhs.x, y / rhs.y, z / rhs.z);
-}
-
-CVector3d CVector3d::operator+(double val)
-{
-    return CVector3d(x + val, y + val, z + val);
-}
-
-CVector3d CVector3d::operator-(double val)
-{
-    return CVector3d(x - val, y - val, z - val);
-}
-
-CVector3d CVector3d::operator*(double val)
-{
-    return CVector3d(x * val, y * val, z * val);
-}
-
-CVector3d CVector3d::operator/(double val)
-{
-    return CVector3d(x / val, y / val, z / val);
-}
-
-CVector3d CVector3d::normalized()
-{
-    CVector3d ret;
-    double mag = magnitude();
-
-    ret.x = x/mag;
-    ret.y = y/mag;
-    ret.z = z/mag;
-
-    return ret;
-}
-
-CVector3d CVector3d::cross(const CVector3d& rhs) const
-{
-    return CVector3d(y * rhs.z - z * rhs.y, z * rhs.x - x * rhs.z, x * rhs.y - y * rhs.x);
-}
-
-double CVector3d::dot(const CVector3d& rhs) const
-{
-    return (x * rhs.x) + (y * rhs.y) + (z * rhs.z);
-}
-
-double CVector3d::magnitude() const
-{
-    return sqrt(x*x + y*y + z*z);
-}
-
-CVector3f CVector3d::asVector3f()
-{
-    return CVector3f((float)x, (float)y, (float)z);
-}
diff --git a/CVector3d.hpp b/CVector3d.hpp
deleted file mode 100644
index e125e28..0000000
--- a/CVector3d.hpp
+++ /dev/null
@@ -1,45 +0,0 @@
-#ifndef CVECTOR3D_HPP
-#define CVECTOR3D_HPP
-
-#include <Athena/IStreamReader.hpp>
-
-#include "CVector3f.hpp"
-
-class ZE_ALIGN(16) CVector3d
-{
-public:
-    ZE_DECLARE_ALIGNED_ALLOCATOR();
-    
-    CVector3d();
-    CVector3d(const CVector3f& vec3);
-    CVector3d(double x, double y, double z);
-    CVector3d(Athena::io::IStreamReader& input);
-    ~CVector3d();
-
-    bool operator ==(const CVector3d& rhs);
-    CVector3d operator+(const CVector3d& rhs);
-    CVector3d operator-(const CVector3d& rhs);
-    CVector3d operator-() const;
-    CVector3d operator*(const CVector3d& rhs);
-    CVector3d operator/(const CVector3d& rhs);
-    CVector3d operator+(double val);
-    CVector3d operator-(double val);
-    CVector3d operator*(double val);
-    CVector3d operator/(double val);
-    CVector3d normalized();
-    CVector3d cross(const CVector3d& rhs) const;
-    double    dot(const CVector3d& rhs) const;
-    double    magnitude() const;
-
-    CVector3f asVector3f();
-    union
-    {
-        struct
-        {
-            double x, y, z;
-        };
-        __m128 v;
-    };
-};
-
-#endif // CVECTOR3D_HPP
diff --git a/CVector3f.hpp b/CVector3f.hpp
index e1d9df7..ab23e3e 100644
--- a/CVector3f.hpp
+++ b/CVector3f.hpp
@@ -2,18 +2,12 @@
 #define CVECTOR3F_HPP
 
 #include "Global.hpp"
+#include "CVector2f.hpp"
+#include "TVectorUnion.hpp"
 #include <Athena/IStreamReader.hpp>
 #include <math.h>
 #include <assert.h>
 
-typedef union
-{
-    float v[4];
-#if __SSE__
-    __m128 mVec128;
-#endif
-} TVectorUnion;
-
 class ZE_ALIGN(16) CVector3f
 {
 public:
@@ -32,6 +26,13 @@ public:
         z = input.readFloat();
         v[3] = 0.0f;
     }
+    CVector3f(const CVector2f& other)
+    {
+        x = other.x;
+        y = other.y;
+        z = 0.0;
+        v[3] = 0.0f;
+    }
 
     inline bool operator ==(const CVector3f& rhs) const
     {return (x == rhs.x && y == rhs.y && z == rhs.z);}
@@ -231,6 +232,10 @@ public:
     }
     static CVector3f slerp(const CVector3f& a, const CVector3f& b, float t);
 
+    inline bool isNormalized(float thresh = 0.0001f) const
+    {
+        return (length() > thresh);
+    }
 
     inline float& operator[](size_t idx) {return (&x)[idx];}
     inline const float& operator[](size_t idx) const {return (&x)[idx];}
diff --git a/CVector4f.cpp b/CVector4f.cpp
new file mode 100644
index 0000000..afaccbe
--- /dev/null
+++ b/CVector4f.cpp
@@ -0,0 +1,2 @@
+#include "CVector4f.hpp"
+
diff --git a/CVector4f.hpp b/CVector4f.hpp
new file mode 100644
index 0000000..934d76b
--- /dev/null
+++ b/CVector4f.hpp
@@ -0,0 +1,354 @@
+#ifndef CVECTOR4F_HPP
+#define CVECTOR4F_HPP
+
+#include "Global.hpp"
+#include "TVectorUnion.hpp"
+#include "CVector3f.hpp"
+#include <Athena/IStreamReader.hpp>
+#include <math.h>
+#include <float.h>
+#include <assert.h>
+
+class ZE_ALIGN(16) CVector4f
+{
+    public:
+    ZE_DECLARE_ALIGNED_ALLOCATOR();
+
+    inline CVector4f() {zeroOut();}
+#if __SSE__
+    CVector4f(const __m128& mVec128) : mVec128(mVec128) {}
+#endif
+    CVector4f(float xyzw) {splat(xyzw);}
+    CVector4f(float x, float y, float z, float w) {v[0] = x; v[1] = y; v[2] = z; v[3] = w;}
+    CVector4f(Athena::io::IStreamReader& input)
+    {
+        x = input.readFloat();
+        y = input.readFloat();
+        z = input.readFloat();
+        w = input.readFloat();
+    }
+
+    CVector4f(const CVector3f& other)
+    {
+        x = other.x;
+        y = other.y;
+        z = other.z;
+        w = 1.0f;
+    }
+
+    inline bool operator ==(const CVector4f& rhs) const
+    {
+#if __SSE__
+        TVectorUnion vec;
+        vec.mVec128 = _mm_cmpeq_ps(mVec128, rhs.mVec128);
+        return (vec.v[0] != 0 && vec.v[1] != 0 && vec.v[2] != 0 && vec.v[3] != 0);
+#else
+        return (x == rhs.x && y == rhs.y && z == rhs.z && w == rhs.w);
+#endif
+    }
+    inline bool operator !=(const CVector4f& rhs) const
+    {
+#if __SSE__
+        TVectorUnion vec;
+        vec.mVec128 = _mm_cmpneq_ps(mVec128, rhs.mVec128);
+        return (vec.v[0] != 0 && vec.v[1] != 0 && vec.v[2] != 0 && vec.v[3] != 0);
+#else
+        return !(*this == rhs);
+#endif
+    }
+    inline bool operator <(const CVector4f& rhs) const
+    {
+#if __SSE__
+        TVectorUnion vec;
+        vec.mVec128 = _mm_cmplt_ps(mVec128, rhs.mVec128);
+        return (vec.v[0] != 0 || vec.v[1] != 0 || vec.v[2] != 0 || vec.v[3] != 0);
+#else
+        return (x < rhs.x || y < rhs.y || z < rhs.z || w < rhs.w);
+#endif
+    }
+    inline bool operator <=(const CVector4f& rhs) const
+    {
+#if __SSE__
+        TVectorUnion vec;
+        vec.mVec128 = _mm_cmple_ps(mVec128, rhs.mVec128);
+        return (vec.v[0] != 0 || vec.v[1] != 0 || vec.v[2] != 0 || vec.v[3] != 0);
+#else
+        return (x <= rhs.x || y <= rhs.y || z <= rhs.z || w <= rhs.w);
+#endif
+    }
+    inline bool operator >(const CVector4f& rhs) const
+    {
+#if __SSE__
+        TVectorUnion vec;
+        vec.mVec128 = _mm_cmpgt_ps(mVec128, rhs.mVec128);
+        return (vec.v[0] != 0 || vec.v[1] != 0 || vec.v[2] != 0 || vec.v[3] != 0);
+#else
+        return (x > rhs.x || y > rhs.y || z > rhs.z || w > rhs.w);
+#endif
+    }
+    inline bool operator >=(const CVector4f& rhs) const
+    {
+#if __SSE__
+        TVectorUnion vec;
+        vec.mVec128 = _mm_cmpge_ps(mVec128, rhs.mVec128);
+        return (vec.v[0] != 0 || vec.v[1] != 0 || vec.v[2] != 0 || vec.v[3] != 0);
+#else
+        return (x >= rhs.x || y >= rhs.y || z >= rhs.z || w >= rhs.w);
+#endif
+    }
+    inline CVector4f operator+(const CVector4f& rhs) const
+    {
+#if __SSE__
+        return CVector4f(_mm_add_ps(mVec128, rhs.mVec128));
+#else
+        return CVector4f(x + rhs.x, y + rhs.y, z + rhs.z, w + rhs.w);
+#endif
+    }
+    inline CVector4f operator-(const CVector4f& rhs) const
+    {
+#if __SSE__
+        return CVector4f(_mm_sub_ps(mVec128, rhs.mVec128));
+#else
+        return CVector4f(x - rhs.x, y - rhs.y, z - rhs.z, w - rhs.w);
+#endif
+    }
+    inline CVector4f operator-() const
+    {
+#if __SSE__
+        return CVector4f(_mm_sub_ps(_mm_xor_ps(mVec128, mVec128), mVec128));
+#else
+        return CVector4f(-x, -y, -z, -w);
+#endif
+    }
+    inline CVector4f operator*(const CVector4f& rhs) const
+    {
+#if __SSE__
+        return CVector4f(_mm_mul_ps(mVec128, rhs.mVec128));
+#else
+        return CVector4f(x * rhs.x, y * rhs.y, z * rhs.z, w * rhs.w);
+#endif
+    }
+    inline CVector4f operator/(const CVector4f& rhs) const
+    {
+#if __SSE__
+        return CVector4f(_mm_div_ps(mVec128, rhs.mVec128));
+#else
+        return CVector4f(x / rhs.x, y / rhs.y, z / rhs.z, w / rhs.w);
+#endif
+    }
+    inline CVector4f operator+(float val) const
+    {
+#if __SSE__
+        TVectorUnion splat = {{val, val, val, val}};
+        return CVector4f(_mm_add_ps(mVec128, splat.mVec128));
+#else
+        return CVector4f(x + val, y + val, z + val, w + val);
+#endif
+    }
+    inline CVector4f operator-(float val) const
+    {
+#if __SSE__
+        TVectorUnion splat = {{val, val, val, val}};
+        return CVector4f(_mm_sub_ps(mVec128, splat.mVec128));
+#else
+        return CVector4f(x - val, y - val, z - val, w - val);
+#endif
+    }
+    inline CVector4f operator*(float val) const
+    {
+#if __SSE__
+        TVectorUnion splat = {{val, val, val, val}};
+        return CVector4f(_mm_mul_ps(mVec128, splat.mVec128));
+#else
+        return CVector4f(x * val, y * val, z * val, w * val);
+#endif
+    }
+    inline CVector4f operator/(float val) const
+    {
+#if __SSE__
+        TVectorUnion splat = {{val, val, val, val}};
+        return CVector4f(_mm_div_ps(mVec128, splat.mVec128));
+#else
+        return CVector4f(x / val, y / val, z / val, w / val);
+#endif
+    }
+    inline const CVector4f& operator +=(const CVector4f& rhs)
+    {
+#if __SSE__
+        mVec128 = _mm_add_ps(mVec128, rhs.mVec128);
+#else
+        x += rhs.x; y += rhs.y; z += rhs.z; w += rhs.w;
+#endif
+        return *this;
+    }
+    inline const CVector4f& operator -=(const CVector4f& rhs)
+    {
+#if __SSE__
+        mVec128 = _mm_sub_ps(mVec128, rhs.mVec128);
+#else
+        x -= rhs.x; y -= rhs.y; z -= rhs.z; w -= rhs.w;
+#endif
+        return *this;
+    }
+    inline const CVector4f& operator *=(const CVector4f& rhs)
+    {
+#if __SSE__
+        mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
+#else
+        x *= rhs.x; y *= rhs.y; z *= rhs.z; w *= rhs.w;
+#endif
+        return *this;
+    }
+    inline const CVector4f& operator /=(const CVector4f& rhs)
+    {
+#if __SSE__
+        mVec128 = _mm_div_ps(mVec128, rhs.mVec128);
+#else
+        x /= rhs.x; y /= rhs.y; z /= rhs.z; w /= rhs.w;
+#endif
+        return *this;
+    }
+    inline void normalize()
+    {
+        float mag = length();
+        assert(mag != 0.0);
+        mag = 1.0 / mag;
+        *this *= mag;
+    }
+    inline CVector4f normalized() const
+    {
+        float mag = length();
+        assert(mag != 0.0);
+        mag = 1.0 / mag;
+        return *this * mag;
+    }
+
+    inline float dot(const CVector4f& rhs) const
+    {
+#if __SSE4_1__
+        TVectorUnion result;
+        result.mVec128 = _mm_dp_ps(mVec128, rhs.mVec128, 0xF1);
+        return result.v[0];
+#elif __SSE__
+        TVectorUnion result;
+        result.mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
+        return result.v[0] + result.v[1] + result.v[2] + result.v[3];
+#else
+        return (x * rhs.x) + (y * rhs.y) + (z * rhs.z) + (w * rhs.w);
+#endif
+    }
+    inline float lengthSquared() const
+    {
+#if __SSE4_1__
+        TVectorUnion result;
+        result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0x71);
+        return result.v[0];
+#elif __SSE__
+        TVectorUnion result;
+        result.mVec128 = _mm_mul_ps(mVec128, mVec128);
+        return result.v[0] + result.v[1] + result.v[2];
+#else
+        return x*x + y*y + z*z + w*w;
+#endif
+    }
+    inline float length() const
+    {
+        return sqrtf(lengthSquared());
+    }
+
+    inline void zeroOut()
+    {
+#if __SSE__
+        mVec128 = _mm_xor_ps(mVec128, mVec128);
+#else
+        v[0] = 0.0; v[1] = 0.0; v[2] = 0.0; v[3] = 0.0;
+#endif
+    }
+
+    inline void splat(float xyzw)
+    {
+#if __SSE__
+        TVectorUnion splat = {{xyzw, xyzw, xyzw, xyzw}};
+        mVec128 = splat.mVec128;
+#else
+        v[0] = xyz; v[1] = xyz; v[2] = xyz; v[3] = xyzw;
+#endif
+    }
+
+    static inline CVector4f lerp(const CVector4f& a, const CVector4f& b, float t)
+    {
+        return (a + (b - a) * t);
+    }
+    static inline CVector4f nlerp(const CVector4f& a, const CVector4f& b, float t)
+    {
+        return lerp(a, b, t).normalized();
+    }
+
+    inline bool isNormalized(float thresh = 0.0001f) const
+    {
+        return (length() > thresh);
+    }
+
+    inline float& operator[](size_t idx) {return (&x)[idx];}
+    inline const float& operator[](size_t idx) const {return (&x)[idx];}
+
+
+    union
+    {
+        struct
+        {
+            float x, y, z, w;
+        };
+        float v[4];
+#if __SSE__
+        __m128 mVec128;
+#endif
+    };
+
+    static const CVector4f skOne;
+    static const CVector4f skNegOne;
+    static const CVector4f skZero;
+};
+
+
+static inline CVector4f operator+(float lhs, const CVector4f& rhs)
+{
+#if __SSE__
+    TVectorUnion splat = {{lhs, lhs, lhs, lhs}};
+    return CVector4f(_mm_add_ps(splat.mVec128, rhs.mVec128));
+#else
+    return CVector4f(lhs + rhs.x, lhs + rhs.y, lhs + rhs.z, lhs + rhs.w);
+#endif
+}
+
+static inline CVector4f operator-(float lhs, const CVector4f& rhs)
+{
+#if __SSE__
+    TVectorUnion splat = {{lhs, lhs, lhs, lhs}};
+    return CVector4f(_mm_sub_ps(splat.mVec128, rhs.mVec128));
+#else
+    return CVector4f(lhs - rhs.x, lhs - rhs.y, lhs - rhs.z, lhs - rhs.w);
+#endif
+}
+
+static inline CVector4f operator*(float lhs, const CVector4f& rhs)
+{
+#if __SSE__
+    TVectorUnion splat = {{lhs, lhs, lhs, lhs}};
+    return CVector4f(_mm_mul_ps(splat.mVec128, rhs.mVec128));
+#else
+    return CVector4f(lhs * rhs.x, lhs * rhs.y, lhs * rhs.z, lhs * rhs.w);
+#endif
+}
+
+static inline CVector4f operator/(float lhs, const CVector4f& rhs)
+{
+#if __SSE__
+    TVectorUnion splat = {{lhs, lhs, lhs, lhs}};
+    return CVector4f(_mm_div_ps(splat.mVec128, rhs.mVec128));
+#else
+    return CVector4f(lhs / rhs.x, lhs / rhs.y, lhs / rhs.z, lhs / rhs.w);
+#endif
+}
+
+#endif // CVECTOR4F_HPP
diff --git a/MathLib.hpp b/MathLib.hpp
index 248b0ab..28b0031 100644
--- a/MathLib.hpp
+++ b/MathLib.hpp
@@ -3,12 +3,14 @@
 
 #include "CAxisAngle.hpp"
 #include "CMatrix3f.hpp"
+#include "CMatrix4f.hpp"
 #include "CProjection.hpp"
 #include "CTransform.hpp"
 #include "CQuaternion.hpp"
 #include "CVector2f.hpp"
-#include "CVector3d.hpp"
 #include "CVector3f.hpp"
+#include "CVector4f.hpp"
+#include "CRectangle.hpp"
 #include "CPlane.hpp"
 #include "CColor.hpp"
 #include "Global.hpp"
diff --git a/MathLib.pri b/MathLib.pri
index 501112a..1db0cee 100644
--- a/MathLib.pri
+++ b/MathLib.pri
@@ -1,17 +1,18 @@
 SOURCES += \
     $$PWD/CVector3f.cpp \
-    $$PWD/CVector3d.cpp \
     $$PWD/Math.cpp \
     $$PWD/CQuaternion.cpp \
     $$PWD/CMatrix3f.cpp \
     $$PWD/CProjection.cpp \
     $$PWD/CPlane.cpp \
     $$PWD/CTransform.cpp \
-    $$PWD/CVector2f.cpp
+    $$PWD/CVector2f.cpp \
+    $$PWD/CRectangle.cpp \
+    $$PWD/CVector4f.cpp \
+    $$PWD/CMatrix4f.cpp
 
 HEADERS += \
     $$PWD/CVector3f.hpp \
-    $$PWD/CVector3d.hpp \
     $$PWD/Math.hpp \
     $$PWD/CQuaternion.hpp \
     $$PWD/CMatrix3f.hpp \
@@ -22,6 +23,10 @@ HEADERS += \
     $$PWD/CColor.hpp \
     $$PWD/Global.hpp \
     $$PWD/MathLib.hpp \
-    $$PWD/CVector2f.hpp
+    $$PWD/CVector2f.hpp \
+    $$PWD/CRectangle.hpp \
+    $$PWD/CMatrix4f.hpp \
+    $$PWD/TVectorUnion.hpp \
+    $$PWD/CVector4f.hpp
 
 INCLUDEPATH += $$PWD
diff --git a/TVectorUnion.hpp b/TVectorUnion.hpp
new file mode 100644
index 0000000..3cdee5a
--- /dev/null
+++ b/TVectorUnion.hpp
@@ -0,0 +1,13 @@
+#ifndef TVECTORUNION
+#define TVECTORUNION
+
+typedef union
+{
+    float v[4];
+#if __SSE__
+    __m128 mVec128;
+#endif
+} TVectorUnion;
+
+#endif // TVECTORUNION
+