diff --git a/include/CColor.hpp b/include/CColor.hpp
index dae15dc..8d98c06 100644
--- a/include/CColor.hpp
+++ b/include/CColor.hpp
@@ -1,23 +1,29 @@
 #ifndef CCOLOR_HPP
 #define CCOLOR_HPP
 
-#include "Global.hpp"
-#include "Math.hpp"
-#if ZE_ATHENA_TYPES
-#include <Athena/IStreamReader.hpp>
-#endif
+#include "MathLib.hpp"
+#include <iostream>
 
 #if BYTE_ORDER == __ORDER_LITTLE_ENDIAN__
-#define COLOR(rgba) (unsigned long)( ( (rgba) & 0x000000FF ) << 24 | ( (rgba) & 0x0000FF00 ) <<  8 \
+#define COLOR(rgba) ( ( (rgba) & 0x000000FF ) << 24 | ( (rgba) & 0x0000FF00 ) <<  8 \
                     | ( (rgba) & 0x00FF0000 ) >>  8 | ( (rgba) & 0xFF000000 ) >> 24 )
 #else
-#define COLOR(rgba) (unsigned long)rgba
+#define COLOR(rgba) rgba
 #endif
 
 namespace Zeus
 {
-typedef unsigned char Comp8;
-typedef unsigned long Comp32;
+typedef union
+{
+    struct
+    {
+        unsigned char r, g, b, a;
+    };
+    unsigned int rgba;
+} RGBA32;
+
+typedef uint8_t Comp8;
+typedef uint32_t Comp32;
 
 class alignas(16) CColor
 {
@@ -34,126 +40,176 @@ public:
     static const CColor skYellow;
     static const CColor skWhite;
 
+#if __SSE__
+    CColor(const __m128& mVec128) : mVec128(mVec128) {}
+#endif
 
-    CColor() : rgba(~0u) {}
+    CColor() : r(1.0f), g(1.0f), b(1.0f), a(1.0f) {}
     CColor(float rgb, float a = 1.0) { splat(rgb, a); }
-    CColor(float r, float g, float b, float a = 1.0f)
-        : r(Comp8(255*r)),
-          g(Comp8(255*g)),
-          b(Comp8(255*b)),
-          a(Comp8(255*a))
-    {}
-
+    CColor(float r, float g, float b, float a = 1.0f) {v[0] = r; v[1] = g; v[2] = b; v[3] = a; }
 #if ZE_ATHENA_TYPES
     CColor(Athena::io::IStreamReader& reader) {readRGBA(reader);}
 #endif
+
     CColor(Comp32 rgba) { fromRGBA32(rgba); }
-    CColor(const Comp8 rgba[4]) : r(rgba[0]), g(rgba[1]), b(rgba[2]), a(rgba[3]) {}
-    CColor(Comp8 r, Comp8 g, Comp8 b, Comp8 a = 255)
-        : r(r),  g(g),  b(b), a(a) {}
+    CColor(const Comp8* rgba) { fromRGBA8(rgba[0], rgba[1], rgba[2], rgba[3]); }
 
 #if ZE_ATHENA_TYPES
     inline void readRGBA(Athena::io::IStreamReader& reader)
     {
-        r = Comp8(255 * reader.readFloat());
-        g = Comp8(255 * reader.readFloat());
-        b = Comp8(255 * reader.readFloat());
-        a = Comp8(255 * reader.readFloat());
+        r = reader.readFloat();
+        g = reader.readFloat();
+        b = reader.readFloat();
+        a = reader.readFloat();
     }
     inline void readBGRA(Athena::io::IStreamReader& reader)
     {
-        b = Comp8(255 * reader.readFloat());
-        g = Comp8(255 * reader.readFloat());
-        r = Comp8(255 * reader.readFloat());
-        a = Comp8(255 * reader.readFloat());
+        b = reader.readFloat();
+        g = reader.readFloat();
+        r = reader.readFloat();
+        a = reader.readFloat();
     }
 #endif
 
     inline bool operator==(const CColor& rhs) const
-    { return (rgba == rhs.rgba); }
+    { return (r == rhs.r && g == rhs.g && b == rhs.b && a == rhs.a); }
     inline bool operator!=(const CColor& rhs) const
     { return !(*this == rhs); }
     inline CColor operator+(const CColor& rhs) const
     {
-        CColor ret; ret.r = r + rhs.r; ret.g = g + rhs.g; ret.b = b + rhs.b; ret.a = a + rhs.a;
-        return ret;
+#if __SSE__
+        return CColor(_mm_add_ps(mVec128, rhs.mVec128));
+#else
+        return CColor(r + rhs.r, g + rhs.g, b + rhs.b, a + rhs.a);
+#endif
     }
     inline CColor operator-(const CColor& rhs) const
     {
-        CColor ret; ret.r = r - rhs.r; ret.g = g - rhs.g; ret.b = b - rhs.b; ret.a = a - rhs.a;
-        return ret;
+#if __SSE__
+        return CColor(_mm_sub_ps(mVec128, rhs.mVec128));
+#else
+        return CColor(r - rhs.r, g - rhs.g, b - rhs.b, a - rhs.a);
+#endif
     }
-
     inline CColor operator*(const CColor& rhs) const
     {
-        CColor ret; ret.r = r * rhs.r; ret.g = g * rhs.g; ret.b = b * rhs.b; ret.a = a * rhs.a;
-        return ret;
+#if __SSE__
+        return CColor(_mm_mul_ps(mVec128, rhs.mVec128));
+#else
+        return CColor(r * rhs.r, g * rhs.g, b * rhs.b, a * rhs.a);
+#endif
     }
     inline CColor operator/(const CColor& rhs) const
     {
-        CColor ret; ret.r = r / rhs.r; ret.g = g / rhs.g; ret.b = b / rhs.b; ret.a = a / rhs.a;
-        return ret;
+#if __SSE__
+        return CColor(_mm_div_ps(mVec128, rhs.mVec128));
+#else
+        return CColor(r / rhs.r, g / rhs.g, b / rhs.b, a / rhs.a);
+#endif
     }
     inline CColor operator+(float val) const
     {
-        CColor ret;
-        ret.r = r + Comp8(255 * val); ret.g = g + Comp8(255 * val); ret.b = b + Comp8(255 * val); ret.a = a + Comp8(255 * val);
-        return ret;
+#if __SSE__
+        TVectorUnion splat = {{val, val, val, val}};
+        return CColor(_mm_add_ps(mVec128, splat.mVec128));
+#else
+        return CColor(r + val, g + val, b + val, a + val);
+#endif
     }
     inline CColor operator-(float val) const
     {
-        CColor ret;
-        ret.r = r - Comp8(255 * val); ret.g = g - Comp8(255 * val); ret.b = b - Comp8(255 * val); ret.a = a - Comp8(255 * val);
-        return ret;
+#if __SSE__
+        TVectorUnion splat = {{val, val, val, val}};
+        return CColor(_mm_sub_ps(mVec128, splat.mVec128));
+#else
+        return CColor(r - val, g - val, b - val, a - val);
+#endif
     }
     inline CColor operator*(float val) const
     {
-        CColor ret;
-        ret.r = r * Comp8(255 * val); ret.g = g * Comp8(255 * val); ret.b = b * Comp8(255 * val); ret.a = a * Comp8(255 * val);
-        return ret;
+#if __SSE__
+        TVectorUnion splat = {{val, val, val, val}};
+        return CColor(_mm_mul_ps(mVec128, splat.mVec128));
+#else
+        return CColor(r * val, g * val, b * val, a * val);
+#endif
     }
     inline CColor operator/(float val) const
     {
-        CColor ret;
-        ret.r = r / Comp8(255 * val); ret.g = g / Comp8(255 * val); ret.b = b / Comp8(255 * val); ret.a = a / Comp8(255 * val);
-        return ret;
+#if __SSE__
+        TVectorUnion splat = {{val, val, val, val}};
+        return CColor(_mm_div_ps(mVec128, splat.mVec128));
+#else
+        return CColor(r / val, g / val, b / val, a / val);
+#endif
     }
     inline const CColor& operator+=(const CColor& rhs)
     {
-        r += rhs.r; g += rhs.g; b += rhs.b;
-        a += rhs.a; return *this;
+#if __SSE__
+        mVec128 = _mm_add_ps(mVec128, rhs.mVec128);
+#else
+        r += rhs.r; g += rhs.g; b += rhs.b; a += rhs.a;
+#endif
+        return *this;
     }
     inline const CColor& operator-=(const CColor& rhs)
     {
+#if __SSE__
+        mVec128 = _mm_sub_ps(mVec128, rhs.mVec128);
+#else
         r -= rhs.r; g -= rhs.g; b -= rhs.b; a -= rhs.a;
+#endif
         return *this;
     }
     inline const CColor& operator *=(const CColor& rhs)
     {
+#if __SSE__
+        mVec128 = _mm_mul_ps(mVec128, rhs.mVec128);
+#else
         r *= rhs.r; g *= rhs.g; b *= rhs.b; a *= rhs.a;
+#endif
         return *this;
     }
     inline const CColor& operator /=(const CColor& rhs)
     {
+#if __SSE__
+        mVec128 = _mm_div_ps(mVec128, rhs.mVec128);
+#else
         r /= rhs.r; g /= rhs.g; b /= rhs.b; a /= rhs.a;
+#endif
         return *this;
     }
     inline void normalize()
     {
         float mag = magnitude();
+        assert(mag != 0.0);
+        mag = 1.0 / mag;
         *this *= mag;
     }
     inline CColor normalized()
     {
         float mag = magnitude();
+        assert(mag != 0.0);
+        mag = 1.0 / mag;
         return *this * mag;
     }
     inline float magSquared() const
-    { return ((r * r + g * g + b * b + a * a) / 255.f); }
-
+    {
+#if __SSE4_1__
+        TVectorUnion result;
+        result.mVec128 = _mm_dp_ps(mVec128, mVec128, 0xF1);
+        return result.v[0];
+#elif __SSE__
+        TVectorUnion result;
+        result.mVec128 = _mm_mul_ps(mVec128, mVec128);
+        return result.v[0] + result.v[1] + result.v[2] + result.v[3];
+#else
+        return r * r + g * g + b * b + a * a;
+#endif
+    }
     inline float magnitude() const
     {
-        return Math::sqrtF(magSquared());
+        return sqrtf(magSquared());
     }
     static inline CColor lerp(const CColor& a, const CColor& b, float t)
     {
@@ -163,29 +219,43 @@ public:
     {
         return lerp(a, b, t).normalized();
     }
-
-    inline Comp8& operator[](size_t idx) {return (&r)[idx];}
-    inline const Comp8& operator[](size_t idx) const { return (&r)[idx]; }
+    inline float& operator[](const size_t& idx) {return (&r)[idx];}
+    inline const float& operator[](const size_t& idx) const { return (&r)[idx]; }
     inline void splat(float rgb, float a)
     {
-        r = Comp8(255 * rgb);
-        g = Comp8(255 * rgb);
-        b = Comp8(255 * rgb);
-        this->a = Comp8(255 * a);
+#if __SSE__
+        TVectorUnion splat = {{rgb, rgb, rgb, a}};
+        mVec128 = splat.mVec128;
+#else
+        v[0] = rgb; v[1] = rgb; v[2] = rgb; v[3] = a;
+#endif
     }
 
     union
     {
-        struct { Comp8 r, g, b, a; };
-        Comp32 rgba;
+        struct
+        {
+            float r, g, b, a;
+        };
+        float v[4];
+#if __SSE__
+        __m128 mVec128;
+#endif
     };
 
     void fromRGBA8(unsigned char r, unsigned char g, unsigned char b, unsigned char a)
     {
-        this->r = r;
-        this->g = g;
-        this->b = b;
-        this->a = a;
+        this->r = r / 255.f;
+        this->g = g / 255.f;
+        this->b = b / 255.f;
+        this->a = a / 255.f;
+    }
+
+    void fromRGBA32(unsigned int rgba)
+    {
+        static RGBA32 tmp;
+        tmp.rgba = COLOR(rgba);
+        fromRGBA8(tmp.r, tmp.g, tmp.b, tmp.a);
     }
 
     /**
@@ -210,51 +280,47 @@ public:
     void fromHSL(float h, float s, float l, float _a = 1.0);
 
     void toHSL(float& h, float& s, float& l);
-
-    void fromRGBA32(unsigned int rgba)
-    { this->rgba = COLOR(rgba); }
 };
 
 static inline CColor operator+(float lhs, const CColor& rhs)
 {
-    CColor ret;
-    ret.r = Comp8(255 * lhs) + rhs.r;
-    ret.g = Comp8(255 * lhs) + rhs.g;
-    ret.b = Comp8(255 * lhs) + rhs.b;
-    ret.a = Comp8(255 * lhs) + rhs.a;
-    return ret;
+#if __SSE__
+    TVectorUnion splat = {{lhs, lhs, lhs, lhs}};
+    return CColor(_mm_add_ps(splat.mVec128, rhs.mVec128));
+#else
+    return CColor(lhs + rhs.r, lhs + rhs.g, lhs + rhs.b, lhs + rhs.a);
+#endif
 }
 
 static inline CColor operator-(float lhs, const CColor& rhs)
 {
-    CColor ret;
-    ret.r = Comp8(255 * lhs) - rhs.r;
-    ret.g = Comp8(255 * lhs) - rhs.g;
-    ret.b = Comp8(255 * lhs) - rhs.b;
-    ret.a = Comp8(255 * lhs) - rhs.a;
-    return ret;
+#if __SSE__
+    TVectorUnion splat = {{lhs, lhs, lhs, lhs}};
+    return CColor(_mm_sub_ps(splat.mVec128, rhs.mVec128));
+#else
+    return CColor(lhs - rhs.r, lhs - rhs.g, lhs - rhs.b, lhs - rhs.a);
+#endif
 }
 
 static inline CColor operator*(float lhs, const CColor& rhs)
 {
-    CColor ret;
-    ret.r = Comp8(255 * lhs) * rhs.r;
-    ret.g = Comp8(255 * lhs) * rhs.g;
-    ret.b = Comp8(255 * lhs) * rhs.b;
-    ret.a = Comp8(255 * lhs) * rhs.a;
-    return ret;
+#if __SSE__
+    TVectorUnion splat = {{lhs, lhs, lhs, lhs}};
+    return CColor(_mm_mul_ps(splat.mVec128, rhs.mVec128));
+#else
+    return CColor(lhs * rhs.r, lhs * rhs.g, lhs * rhs.b, lhs * rhs.a);
+#endif
 }
 
 static inline CColor operator/(float lhs, const CColor& rhs)
 {
-    CColor ret;
-    ret.r = Comp8(255 * lhs) / rhs.r;
-    ret.g = Comp8(255 * lhs) / rhs.g;
-    ret.b = Comp8(255 * lhs) / rhs.b;
-    ret.a = Comp8(255 * lhs) / rhs.a;
-    return ret;
+#if __SSE__
+    TVectorUnion splat = {{lhs, lhs, lhs, lhs}};
+    return CColor(_mm_div_ps(splat.mVec128, rhs.mVec128));
+#else
+    return CColor(lhs / rhs.r, lhs / rhs.g, lhs / rhs.b, lhs / rhs.a);
+#endif
 }
 
 }
-
 #endif // CCOLOR_HPP
diff --git a/src/CColor.cpp b/src/CColor.cpp
index c1f12c3..997d0ad 100644
--- a/src/CColor.cpp
+++ b/src/CColor.cpp
@@ -46,20 +46,16 @@ void CColor::fromHSV(float h, float s, float v, float _a)
     case 5: _r = v, _g = p, _b = q; break;
     }
 
-    r = _r * 255;
-    g = _g * 255;
-    b = _b * 255;
-    a = _a * 255;
+    r = _r;
+    g = _g;
+    b = _b;
+    a = _a;
 }
 
 void CColor::toHSV(float &h, float &s, float &v) const
 {
-    float rf = r/255.f;
-    float gf = g/255.f;
-    float bf = b/255.f;
-
-    float min = Math::min(rf, Math::min(gf, bf));
-    float max = Math::max(rf, Math::max(gf, bf));
+    float min = Math::min(r, Math::min(g, b));
+    float max = Math::max(r, Math::max(g, b));
     v = max;
 
     float delta = max - min;
@@ -69,45 +65,35 @@ void CColor::toHSV(float &h, float &s, float &v) const
         h = 0;
     else
     {
-        if (max == rf)
-            h = (gf - bf) / delta + (gf < bf ? 6 : 0);
-        else if (max == gf)
-            h = (bf - rf) / delta + 2;
-        else if (max == bf)
-            h = (rf - gf) / delta + 4;
+        if (max == r)
+            h = (g - b) / delta + (g < b ? 6 : 0);
+        else if (max == g)
+            h = (b - r) / delta + 2;
+        else if (max == b)
+            h = (r - g) / delta + 4;
         h /= 6;
     }
 }
 
 void CColor::fromHSL(float h, float s, float l, float _a)
 {
-    float _r, _g, _b;
-
     if (s == 0.0f)
         r = g = b = l;
     else
     {
         const float q = l < 0.5f ? l * (1.f + s) : l + s - 1.f * s;
         const float p = 2 * l - q;
-        _r = hueToRgb(p, q, h + 1.f/3);
-        _g = hueToRgb(p, q, h);
-        _b = hueToRgb(p, q, h - 1.f/3);
+        r = hueToRgb(p, q, h + 1.f/3);
+        g = hueToRgb(p, q, h);
+        b = hueToRgb(p, q, h - 1.f/3);
     }
-
-
-    r = _r * 255.f;
-    g = _g * 255.f;
-    b = _b * 255.f;
-    a = _a * 255.f;
+    a = _a;
 }
 
 void CColor::toHSL(float &h, float &s, float &l)
 {
-    const float rf = r / 255.f;
-    const float gf = g / 255.f;
-    const float bf = b / 255.f;
-    const float min = Math::min(rf, Math::min(gf, bf));
-    const float max = Math::max(rf, Math::max(gf, bf));
+    const float min = Math::min(r, Math::min(g, b));
+    const float max = Math::max(r, Math::max(g, b));
     const float d = max - min;
 
     if (max == min)
@@ -115,12 +101,12 @@ void CColor::toHSL(float &h, float &s, float &l)
     else
     {
         s = l > 0.5f ? d / (2.f - max - min) : d / (max + min);
-        if (max == rf)
-            h = (gf - bf) / d + (gf < bf ? 6.f : 0.f);
-        else if (max == gf)
-            h = (bf - rf) / d + 2.f;
-        else if (max == bf)
-            h = (rf - gf) / d + 4.f;
+        if (max == r)
+            h = (g - b) / d + (g < b ? 6.f : 0.f);
+        else if (max == g)
+            h = (b - r) / d + 2.f;
+        else if (max == b)
+            h = (r - g) / d + 4.f;
 
         h /= 6;
     }