Initial AudioMatrixSSE

2025-12-12 22:56:22 +00:00 · 2016-05-21 22:37:16 -10:00
parent 9078a16642
commit d1eb3a6b3b
4 changed files with 567 additions and 38 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -183,7 +183,8 @@ add_library(boo
            lib/inputdev/DeviceSignature.cpp include/boo/inputdev/DeviceSignature.hpp
            lib/inputdev/IHIDDevice.hpp
            lib/audiodev/AudioMatrix.hpp
-            lib/audiodev/AudioMatrix.cpp
+            #lib/audiodev/AudioMatrix.cpp
+            lib/audiodev/AudioMatrixSSE.cpp
            lib/audiodev/AudioVoiceEngine.hpp
            lib/audiodev/AudioVoiceEngine.cpp
            lib/audiodev/AudioVoice.hpp
--- a/lib/audiodev/AudioMatrix.cpp
+++ b/lib/audiodev/AudioMatrix.cpp
@@ -9,17 +9,17 @@ void AudioMatrixMono::setDefaultMatrixCoefficients(AudioChannelSet acSet)
 {
    m_curSlewFrame = 0;
    m_slewFrames = 0;
-    memset(m_coefs, 0, sizeof(m_coefs));
+    memset(&m_coefs, 0, sizeof(m_coefs));
    switch (acSet)
    {
    case AudioChannelSet::Stereo:
    case AudioChannelSet::Quad:
-        m_coefs[int(AudioChannel::FrontLeft)] = 1.0;
-        m_coefs[int(AudioChannel::FrontRight)] = 1.0;
+        m_coefs.v[int(AudioChannel::FrontLeft)] = 1.0;
+        m_coefs.v[int(AudioChannel::FrontRight)] = 1.0;
        break;
    case AudioChannelSet::Surround51:
    case AudioChannelSet::Surround71:
-        m_coefs[int(AudioChannel::FrontCenter)] = 1.0;
+        m_coefs.v[int(AudioChannel::FrontCenter)] = 1.0;
        break;
    default: break;
    }
@@ -41,7 +41,7 @@ int16_t* AudioMatrixMono::mixMonoSampleData(const AudioVoiceEngineMixInfo& info,
                AudioChannel ch = chmap.m_channels[c];
                if (ch != AudioChannel::Unknown)
                {
-                    *dataOut = Clamp16(*dataOut + *dataIn * (m_coefs[int(ch)] * t + m_oldCoefs[int(ch)] * omt));
+                    *dataOut = Clamp16(*dataOut + *dataIn * (m_coefs.v[int(ch)] * t + m_oldCoefs.v[int(ch)] * omt));
                    ++dataOut;
                }
            }
@@ -55,7 +55,7 @@ int16_t* AudioMatrixMono::mixMonoSampleData(const AudioVoiceEngineMixInfo& info,
                AudioChannel ch = chmap.m_channels[c];
                if (ch != AudioChannel::Unknown)
                {
-                    *dataOut = Clamp16(*dataOut + *dataIn * m_coefs[int(ch)]);
+                    *dataOut = Clamp16(*dataOut + *dataIn * m_coefs.v[int(ch)]);
                    ++dataOut;
                }
            }
@@ -80,7 +80,7 @@ int32_t* AudioMatrixMono::mixMonoSampleData(const AudioVoiceEngineMixInfo& info,
                AudioChannel ch = chmap.m_channels[c];
                if (ch != AudioChannel::Unknown)
                {
-                    *dataOut = Clamp32(*dataOut + *dataIn * (m_coefs[int(ch)] * t + m_oldCoefs[int(ch)] * omt));
+                    *dataOut = Clamp32(*dataOut + *dataIn * (m_coefs.v[int(ch)] * t + m_oldCoefs.v[int(ch)] * omt));
                    ++dataOut;
                }
            }
@@ -94,7 +94,7 @@ int32_t* AudioMatrixMono::mixMonoSampleData(const AudioVoiceEngineMixInfo& info,
                AudioChannel ch = chmap.m_channels[c];
                if (ch != AudioChannel::Unknown)
                {
-                    *dataOut = Clamp32(*dataOut + *dataIn * m_coefs[int(ch)]);
+                    *dataOut = Clamp32(*dataOut + *dataIn * m_coefs.v[int(ch)]);
                    ++dataOut;
                }
            }
@@ -119,7 +119,7 @@ float* AudioMatrixMono::mixMonoSampleData(const AudioVoiceEngineMixInfo& info,
                AudioChannel ch = chmap.m_channels[c];
                if (ch != AudioChannel::Unknown)
                {
-                    *dataOut = ClampFlt(*dataOut + *dataIn * (m_coefs[int(ch)] * t + m_oldCoefs[int(ch)] * omt));
+                    *dataOut = ClampFlt(*dataOut + *dataIn * (m_coefs.v[int(ch)] * t + m_oldCoefs.v[int(ch)] * omt));
                    ++dataOut;
                }
            }
@@ -133,7 +133,7 @@ float* AudioMatrixMono::mixMonoSampleData(const AudioVoiceEngineMixInfo& info,
                AudioChannel ch = chmap.m_channels[c];
                if (ch != AudioChannel::Unknown)
                {
-                    *dataOut = ClampFlt(*dataOut + *dataIn * m_coefs[int(ch)]);
+                    *dataOut = ClampFlt(*dataOut + *dataIn * m_coefs.v[int(ch)]);
                    ++dataOut;
                }
            }
@@ -146,18 +146,18 @@ void AudioMatrixStereo::setDefaultMatrixCoefficients(AudioChannelSet acSet)
 {
    m_curSlewFrame = 0;
    m_slewFrames = 0;
-    memset(m_coefs, 0, sizeof(m_coefs));
+    memset(&m_coefs, 0, sizeof(m_coefs));
    switch (acSet)
    {
    case AudioChannelSet::Stereo:
    case AudioChannelSet::Quad:
-        m_coefs[int(AudioChannel::FrontLeft)][0] = 1.0;
-        m_coefs[int(AudioChannel::FrontRight)][1] = 1.0;
+        m_coefs.v[int(AudioChannel::FrontLeft)][0] = 1.0;
+        m_coefs.v[int(AudioChannel::FrontRight)][1] = 1.0;
        break;
    case AudioChannelSet::Surround51:
    case AudioChannelSet::Surround71:
-        m_coefs[int(AudioChannel::FrontLeft)][0] = 1.0;
-        m_coefs[int(AudioChannel::FrontRight)][1] = 1.0;
+        m_coefs.v[int(AudioChannel::FrontLeft)][0] = 1.0;
+        m_coefs.v[int(AudioChannel::FrontRight)][1] = 1.0;
        break;
    default: break;
    }
@@ -180,8 +180,8 @@ int16_t* AudioMatrixStereo::mixStereoSampleData(const AudioVoiceEngineMixInfo& i
                if (ch != AudioChannel::Unknown)
                {
                    *dataOut = Clamp16(*dataOut +
-                                       *dataIn * (m_coefs[int(ch)][0] * t + m_oldCoefs[int(ch)][0] * omt) +
-                                       *dataIn * (m_coefs[int(ch)][1] * t + m_oldCoefs[int(ch)][1] * omt));
+                                       *dataIn * (m_coefs.v[int(ch)][0] * t + m_oldCoefs.v[int(ch)][0] * omt) +
+                                       *dataIn * (m_coefs.v[int(ch)][1] * t + m_oldCoefs.v[int(ch)][1] * omt));
                    ++dataOut;
                }
            }
@@ -196,8 +196,8 @@ int16_t* AudioMatrixStereo::mixStereoSampleData(const AudioVoiceEngineMixInfo& i
                if (ch != AudioChannel::Unknown)
                {
                    *dataOut = Clamp16(*dataOut +
-                                       dataIn[0] * m_coefs[int(ch)][0] +
-                                       dataIn[1] * m_coefs[int(ch)][1]);
+                                       dataIn[0] * m_coefs.v[int(ch)][0] +
+                                       dataIn[1] * m_coefs.v[int(ch)][1]);
                    ++dataOut;
                }
            }
@@ -223,8 +223,8 @@ int32_t* AudioMatrixStereo::mixStereoSampleData(const AudioVoiceEngineMixInfo& i
                if (ch != AudioChannel::Unknown)
                {
                    *dataOut = Clamp32(*dataOut +
-                                       *dataIn * (m_coefs[int(ch)][0] * t + m_oldCoefs[int(ch)][0] * omt) +
-                                       *dataIn * (m_coefs[int(ch)][1] * t + m_oldCoefs[int(ch)][1] * omt));
+                                       *dataIn * (m_coefs.v[int(ch)][0] * t + m_oldCoefs.v[int(ch)][0] * omt) +
+                                       *dataIn * (m_coefs.v[int(ch)][1] * t + m_oldCoefs.v[int(ch)][1] * omt));
                    ++dataOut;
                }
            }
@@ -239,8 +239,8 @@ int32_t* AudioMatrixStereo::mixStereoSampleData(const AudioVoiceEngineMixInfo& i
                if (ch != AudioChannel::Unknown)
                {
                    *dataOut = Clamp32(*dataOut +
-                                       dataIn[0] * m_coefs[int(ch)][0] +
-                                       dataIn[1] * m_coefs[int(ch)][1]);
+                                       dataIn[0] * m_coefs.v[int(ch)][0] +
+                                       dataIn[1] * m_coefs.v[int(ch)][1]);
                    ++dataOut;
                }
            }
@@ -266,8 +266,8 @@ float* AudioMatrixStereo::mixStereoSampleData(const AudioVoiceEngineMixInfo& inf
                if (ch != AudioChannel::Unknown)
                {
                    *dataOut = ClampFlt(*dataOut +
-                                        *dataIn * (m_coefs[int(ch)][0] * t + m_oldCoefs[int(ch)][0] * omt) +
-                                        *dataIn * (m_coefs[int(ch)][1] * t + m_oldCoefs[int(ch)][1] * omt));
+                                        *dataIn * (m_coefs.v[int(ch)][0] * t + m_oldCoefs.v[int(ch)][0] * omt) +
+                                        *dataIn * (m_coefs.v[int(ch)][1] * t + m_oldCoefs.v[int(ch)][1] * omt));
                    ++dataOut;
                }
            }
@@ -282,8 +282,8 @@ float* AudioMatrixStereo::mixStereoSampleData(const AudioVoiceEngineMixInfo& inf
                if (ch != AudioChannel::Unknown)
                {
                    *dataOut = ClampFlt(*dataOut +
-                                        dataIn[0] * m_coefs[int(ch)][0] +
-                                        dataIn[1] * m_coefs[int(ch)][1]);
+                                        dataIn[0] * m_coefs.v[int(ch)][0] +
+                                        dataIn[1] * m_coefs.v[int(ch)][1]);
                    ++dataOut;
                }
            }
--- a/lib/audiodev/AudioMatrix.hpp
+++ b/lib/audiodev/AudioMatrix.hpp
@@ -6,6 +6,10 @@
 #include <stdint.h>
 #include <limits.h>

+#if __SSE__
+#include <xmmintrin.h>
+#endif
+
 namespace boo
 {
 struct AudioVoiceEngineMixInfo;
@@ -39,8 +43,16 @@ static inline float ClampFlt(float in)

 class AudioMatrixMono
 {
-    float m_coefs[8] = {};
-    float m_oldCoefs[8] = {};
+    union Coefs
+    {
+        float v[8];
+#if __SSE__
+        __m128 q[2];
+        __m64 d[4];
+#endif
+    };
+    Coefs m_coefs = {};
+    Coefs m_oldCoefs = {};
    size_t m_slewFrames = 0;
    size_t m_curSlewFrame = 0;
 public:
@@ -51,11 +63,18 @@ public:
    {
        m_slewFrames = slewFrames;
        m_curSlewFrame = 0;
+#if __SSE__
+        m_oldCoefs.q[0] = m_coefs.q[0];
+        m_oldCoefs.q[1] = m_coefs.q[1];
+        m_coefs.q[0] = _mm_loadu_ps(coefs);
+        m_coefs.q[1] = _mm_loadu_ps(&coefs[4]);
+#else
        for (int i=0 ; i<8 ; ++i)
        {
-            m_oldCoefs[i] = m_coefs[i];
-            m_coefs[i] = coefs[i];
+            m_oldCoefs.v[i] = m_coefs.v[i];
+            m_coefs.v[i] = coefs[i];
        }
+#endif
    }

    int16_t* mixMonoSampleData(const AudioVoiceEngineMixInfo& info,
@@ -68,8 +87,16 @@ public:

 class AudioMatrixStereo
 {
-    float m_coefs[8][2] = {};
-    float m_oldCoefs[8][2] = {};
+    union Coefs
+    {
+        float v[8][2];
+#if __SSE__
+        __m128 q[4];
+        __m64 d[8];
+#endif
+    };
+    Coefs m_coefs = {};
+    Coefs m_oldCoefs = {};
    size_t m_slewFrames = 0;
    size_t m_curSlewFrame = 0;
 public:
@@ -80,13 +107,24 @@ public:
    {
        m_slewFrames = slewFrames;
        m_curSlewFrame = 0;
+#if __SSE__
+        m_oldCoefs.q[0] = m_coefs.q[0];
+        m_oldCoefs.q[1] = m_coefs.q[1];
+        m_oldCoefs.q[2] = m_coefs.q[2];
+        m_oldCoefs.q[3] = m_coefs.q[3];
+        m_coefs.q[0] = _mm_loadu_ps(coefs[0]);
+        m_coefs.q[1] = _mm_loadu_ps(coefs[2]);
+        m_coefs.q[2] = _mm_loadu_ps(coefs[4]);
+        m_coefs.q[3] = _mm_loadu_ps(coefs[6]);
+#else
        for (int i=0 ; i<8 ; ++i)
        {
-            m_oldCoefs[i][0] = m_coefs[i][0];
-            m_oldCoefs[i][1] = m_coefs[i][1];
-            m_coefs[i][0] = coefs[i][0];
-            m_coefs[i][1] = coefs[i][1];
+            m_oldCoefs.v[i][0] = m_coefs.v[i][0];
+            m_oldCoefs.v[i][1] = m_coefs.v[i][1];
+            m_coefs.v[i][0] = coefs.v[i][0];
+            m_coefs.v[i][1] = coefs.v[i][1];
        }
+#endif
    }

    int16_t* mixStereoSampleData(const AudioVoiceEngineMixInfo& info,
--- a/lib/audiodev/AudioMatrixSSE.cpp
+++ b/lib/audiodev/AudioMatrixSSE.cpp
@@ -0,0 +1,490 @@
+#include "AudioMatrix.hpp"
+#include "AudioVoiceEngine.hpp"
+#include <string.h>
+
+#include <immintrin.h>
+
+namespace boo
+{
+
+typedef union
+{
+    float v[4];
+#if __SSE__
+    __m128 q;
+    __m64 d[2];
+#endif
+} TVectorUnion;
+
+static constexpr TVectorUnion ZeroVec = {};
+static constexpr TVectorUnion Min16Vec = {INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN};
+static constexpr TVectorUnion Max16Vec = {INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX};
+static constexpr TVectorUnion Min32Vec = {INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN};
+static constexpr TVectorUnion Max32Vec = {INT32_MAX, INT32_MAX, INT32_MAX, INT32_MAX};
+static constexpr TVectorUnion MinFltVec = {-1.f, -1.f, -1.f, -1.f};
+static constexpr TVectorUnion MaxFltVec = {1.f, 1.f, 1.f, 1.f};
+
+void AudioMatrixMono::setDefaultMatrixCoefficients(AudioChannelSet acSet)
+{
+    m_curSlewFrame = 0;
+    m_slewFrames = 0;
+    m_coefs.q[0] = _mm_xor_ps(m_coefs.q[0], m_coefs.q[0]);
+    m_coefs.q[1] = _mm_xor_ps(m_coefs.q[1], m_coefs.q[1]);
+    switch (acSet)
+    {
+    case AudioChannelSet::Stereo:
+    case AudioChannelSet::Quad:
+        m_coefs.v[int(AudioChannel::FrontLeft)] = 1.0;
+        m_coefs.v[int(AudioChannel::FrontRight)] = 1.0;
+        break;
+    case AudioChannelSet::Surround51:
+    case AudioChannelSet::Surround71:
+        m_coefs.v[int(AudioChannel::FrontCenter)] = 1.0;
+        break;
+    default: break;
+    }
+}
+
+int16_t* AudioMatrixMono::mixMonoSampleData(const AudioVoiceEngineMixInfo& info,
+                                            const int16_t* dataIn, int16_t* dataOut, size_t samples)
+{
+    const ChannelMap& chmap = info.m_channelMap;
+    for (size_t s=0 ; s<samples ; ++s, ++dataIn)
+    {
+        if (m_slewFrames && m_curSlewFrame < m_slewFrames)
+        {
+            double t = m_curSlewFrame / double(m_slewFrames);
+            double omt = 1.0 - t;
+
+            for (unsigned c=0 ; c<chmap.m_channelCount ; ++c)
+            {
+                AudioChannel ch = chmap.m_channels[c];
+                if (ch != AudioChannel::Unknown)
+                {
+                    *dataOut = Clamp16(*dataOut + *dataIn * (m_coefs.v[int(ch)] * t + m_oldCoefs.v[int(ch)] * omt));
+                    ++dataOut;
+                }
+            }
+
+            ++m_curSlewFrame;
+        }
+        else
+        {
+            for (unsigned c=0 ; c<chmap.m_channelCount ; ++c)
+            {
+                AudioChannel ch = chmap.m_channels[c];
+                if (ch != AudioChannel::Unknown)
+                {
+                    *dataOut = Clamp16(*dataOut + *dataIn * m_coefs.v[int(ch)]);
+                    ++dataOut;
+                }
+            }
+        }
+    }
+    return dataOut;
+}
+
+int32_t* AudioMatrixMono::mixMonoSampleData(const AudioVoiceEngineMixInfo& info,
+                                            const int32_t* dataIn, int32_t* dataOut, size_t samples)
+{
+    const ChannelMap& chmap = info.m_channelMap;
+    for (size_t s=0 ; s<samples ; ++s, ++dataIn)
+    {
+        if (m_slewFrames && m_curSlewFrame < m_slewFrames)
+        {
+            float t = m_curSlewFrame / float(m_slewFrames);
+            float omt = 1.f - t;
+
+            switch (chmap.m_channelCount)
+            {
+            case 2:
+            {
+                ++m_curSlewFrame;
+                float t2 = m_curSlewFrame / float(m_slewFrames);
+                float omt2 = 1.f - t2;
+
+                TVectorUnion coefs, samps;
+                coefs.q = _mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(m_coefs.q[0], m_coefs.q[0], _MM_SHUFFLE(1, 0, 1, 0)),
+                                                _mm_set_ps(t, t, t2, t2)),
+                                     _mm_mul_ps(_mm_shuffle_ps(m_oldCoefs.q[0], m_oldCoefs.q[0], _MM_SHUFFLE(1, 0, 1, 0)),
+                                                _mm_set_ps(omt, omt, omt2, omt2)));
+                samps.q = _mm_cvtepi32_ps(_mm_loadu_si128(reinterpret_cast<const __m128i*>(dataIn)));
+                samps.q = _mm_shuffle_ps(samps.q, samps.q, _MM_SHUFFLE(1, 0, 1, 0));
+
+                __m128i* out = reinterpret_cast<__m128i*>(dataOut);
+                __m128 pre = _mm_add_ps(_mm_cvtepi32_ps(_mm_loadu_si128(out)), _mm_mul_ps(coefs.q, samps.q));
+                _mm_storeu_si128(out, _mm_cvttps_epi32(_mm_min_ps(_mm_max_ps(pre, Min32Vec.q), Max32Vec.q)));
+
+                dataOut += 4;
+                ++s;
+                ++dataIn;
+                break;
+            }
+            case 4:
+            {
+                TVectorUnion coefs, samps;
+                coefs.q = _mm_add_ps(_mm_mul_ps(m_coefs.q[0], _mm_set1_ps(t)),
+                                     _mm_mul_ps(m_oldCoefs.q[0], _mm_set1_ps(omt)));
+                samps.q = _mm_cvtepi32_ps(_mm_loadu_si128(reinterpret_cast<const __m128i*>(dataIn)));
+
+                __m128i* out = reinterpret_cast<__m128i*>(dataOut);
+                __m128 pre = _mm_add_ps(_mm_cvtepi32_ps(_mm_loadu_si128(out)), _mm_mul_ps(coefs.q, samps.q));
+                _mm_storeu_si128(out, _mm_cvttps_epi32(_mm_min_ps(_mm_max_ps(pre, Min32Vec.q), Max32Vec.q)));
+
+                dataOut += 4;
+                break;
+            }
+            case 6:
+            {
+                TVectorUnion coefs, samps;
+                coefs.q = _mm_add_ps(_mm_mul_ps(m_coefs.q[0], _mm_set1_ps(t)),
+                                     _mm_mul_ps(m_oldCoefs.q[0], _mm_set1_ps(omt)));
+                samps.q = _mm_cvtepi32_ps(_mm_loadu_si128(reinterpret_cast<const __m128i*>(dataIn)));
+
+                __m128i* out = reinterpret_cast<__m128i*>(dataOut);
+                __m128 pre = _mm_add_ps(_mm_cvtepi32_ps(_mm_loadu_si128(out)), _mm_mul_ps(coefs.q, samps.q));
+                _mm_storeu_si128(out, _mm_cvttps_epi32(_mm_min_ps(_mm_max_ps(pre, Min32Vec.q), Max32Vec.q)));
+
+                dataOut += 4;
+
+                coefs.q = _mm_add_ps(_mm_mul_ps(m_coefs.q[1], _mm_set1_ps(t)),
+                                     _mm_mul_ps(m_oldCoefs.q[1], _mm_set1_ps(omt)));
+                samps.q = _mm_cvtepi32_ps(_mm_loadu_si128(reinterpret_cast<const __m128i*>(dataIn)));
+
+                out = reinterpret_cast<__m128i*>(dataOut);
+                __m128i loadOut = _mm_loadu_si128(out);
+                pre = _mm_add_ps(_mm_cvtepi32_ps(loadOut), _mm_mul_ps(coefs.q, samps.q));
+                _mm_storel_epi64(out, _mm_cvttps_epi32(_mm_min_ps(_mm_max_ps(pre, Min32Vec.q), Max32Vec.q)));
+
+                dataOut += 2;
+                break;
+            }
+            case 8:
+            {
+                TVectorUnion coefs, samps;
+                coefs.q = _mm_add_ps(_mm_mul_ps(m_coefs.q[0], _mm_set1_ps(t)),
+                                     _mm_mul_ps(m_oldCoefs.q[0], _mm_set1_ps(omt)));
+                samps.q = _mm_cvtepi32_ps(_mm_loadu_si128(reinterpret_cast<const __m128i*>(dataIn)));
+
+                __m128i* out = reinterpret_cast<__m128i*>(dataOut);
+                __m128 pre = _mm_add_ps(_mm_cvtepi32_ps(_mm_loadu_si128(out)), _mm_mul_ps(coefs.q, samps.q));
+                _mm_storeu_si128(out, _mm_cvttps_epi32(_mm_min_ps(_mm_max_ps(pre, Min32Vec.q), Max32Vec.q)));
+
+                dataOut += 4;
+
+                coefs.q = _mm_add_ps(_mm_mul_ps(m_coefs.q[1], _mm_set1_ps(t)),
+                                     _mm_mul_ps(m_oldCoefs.q[1], _mm_set1_ps(omt)));
+                samps.q = _mm_cvtepi32_ps(_mm_loadu_si128(reinterpret_cast<const __m128i*>(dataIn)));
+
+                out = reinterpret_cast<__m128i*>(dataOut);
+                pre = _mm_add_ps(_mm_cvtepi32_ps(_mm_loadu_si128(out)), _mm_mul_ps(coefs.q, samps.q));
+                _mm_storeu_si128(out, _mm_cvttps_epi32(_mm_min_ps(_mm_max_ps(pre, Min32Vec.q), Max32Vec.q)));
+
+                dataOut += 4;
+                break;
+            }
+            default:
+            {
+                for (unsigned c=0 ; c<chmap.m_channelCount ; ++c)
+                {
+                    AudioChannel ch = chmap.m_channels[c];
+                    if (ch != AudioChannel::Unknown)
+                    {
+                        *dataOut = Clamp32(*dataOut + *dataIn * (m_coefs.v[int(ch)] * t + m_oldCoefs.v[int(ch)] * omt));
+                        ++dataOut;
+                    }
+                }
+                break;
+            }
+            }
+
+            ++m_curSlewFrame;
+        }
+        else
+        {
+            switch (chmap.m_channelCount)
+            {
+            case 2:
+            {
+                TVectorUnion coefs, samps;
+                coefs.q = _mm_shuffle_ps(m_coefs.q[0], m_coefs.q[0], _MM_SHUFFLE(1, 0, 1, 0));
+                samps.q = _mm_cvtepi32_ps(_mm_loadu_si128(reinterpret_cast<const __m128i*>(dataIn)));
+                samps.q = _mm_shuffle_ps(samps.q, samps.q, _MM_SHUFFLE(1, 0, 1, 0));
+
+                __m128i* out = reinterpret_cast<__m128i*>(dataOut);
+                __m128i huh2 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(out));
+                __m128 huh3 = _mm_cvtepi32_ps(huh2);
+                __m128 pre = _mm_add_ps(huh3, _mm_mul_ps(coefs.q, samps.q));
+                _mm_storeu_si128(out, _mm_cvttps_epi32(_mm_min_ps(_mm_max_ps(pre, Min32Vec.q), Max32Vec.q)));
+
+                dataOut += 4;
+                ++s;
+                ++dataIn;
+                break;
+            }
+            case 4:
+            {
+                TVectorUnion samps;
+                samps.q = _mm_cvtepi32_ps(_mm_loadu_si128(reinterpret_cast<const __m128i*>(dataIn)));
+
+                __m128i* out = reinterpret_cast<__m128i*>(dataOut);
+                __m128 pre = _mm_add_ps(_mm_cvtepi32_ps(_mm_loadu_si128(out)), _mm_mul_ps(m_coefs.q[0], samps.q));
+                _mm_storeu_si128(out, _mm_cvttps_epi32(_mm_min_ps(_mm_max_ps(pre, Min32Vec.q), Max32Vec.q)));
+
+                dataOut += 4;
+                break;
+            }
+            case 6:
+            {
+                TVectorUnion samps;
+                samps.q = _mm_cvtepi32_ps(_mm_loadu_si128(reinterpret_cast<const __m128i*>(dataIn)));
+
+                __m128i* out = reinterpret_cast<__m128i*>(dataOut);
+                __m128 pre = _mm_add_ps(_mm_cvtepi32_ps(_mm_loadu_si128(out)), _mm_mul_ps(m_coefs.q[0], samps.q));
+                _mm_storeu_si128(out, _mm_cvttps_epi32(_mm_min_ps(_mm_max_ps(pre, Min32Vec.q), Max32Vec.q)));
+
+                dataOut += 4;
+
+                samps.q = _mm_cvtepi32_ps(_mm_loadu_si128(reinterpret_cast<const __m128i*>(dataIn)));
+
+                out = reinterpret_cast<__m128i*>(dataOut);
+                __m128i loadOut = _mm_loadu_si128(out);
+                pre = _mm_add_ps(_mm_cvtepi32_ps(loadOut), _mm_mul_ps(m_coefs.q[1], samps.q));
+                _mm_storel_epi64(out, _mm_cvttps_epi32(_mm_min_ps(_mm_max_ps(pre, Min32Vec.q), Max32Vec.q)));
+
+                dataOut += 2;
+                break;
+            }
+            case 8:
+            {
+                TVectorUnion samps;
+                samps.q = _mm_cvtepi32_ps(_mm_loadu_si128(reinterpret_cast<const __m128i*>(dataIn)));
+
+                __m128i* out = reinterpret_cast<__m128i*>(dataOut);
+                __m128 pre = _mm_add_ps(_mm_cvtepi32_ps(_mm_loadu_si128(out)), _mm_mul_ps(m_coefs.q[0], samps.q));
+                _mm_storeu_si128(out, _mm_cvttps_epi32(_mm_min_ps(_mm_max_ps(pre, Min32Vec.q), Max32Vec.q)));
+
+                dataOut += 4;
+
+                samps.q = _mm_cvtepi32_ps(_mm_loadu_si128(reinterpret_cast<const __m128i*>(dataIn)));
+
+                out = reinterpret_cast<__m128i*>(dataOut);
+                pre = _mm_add_ps(_mm_cvtepi32_ps(_mm_loadu_si128(out)), _mm_mul_ps(m_coefs.q[1], samps.q));
+                _mm_storeu_si128(out, _mm_cvttps_epi32(_mm_min_ps(_mm_max_ps(pre, Min32Vec.q), Max32Vec.q)));
+
+                dataOut += 4;
+                break;
+            }
+            default:
+            {
+                for (unsigned c=0 ; c<chmap.m_channelCount ; ++c)
+                {
+                    AudioChannel ch = chmap.m_channels[c];
+                    if (ch != AudioChannel::Unknown)
+                    {
+                        *dataOut = Clamp32(*dataOut + *dataIn * m_coefs.v[int(ch)]);
+                        ++dataOut;
+                    }
+                }
+                break;
+            }
+            }
+        }
+    }
+    return dataOut;
+}
+
+float* AudioMatrixMono::mixMonoSampleData(const AudioVoiceEngineMixInfo& info,
+                                          const float* dataIn, float* dataOut, size_t samples)
+{
+    const ChannelMap& chmap = info.m_channelMap;
+    for (size_t s=0 ; s<samples ; ++s, ++dataIn)
+    {
+        if (m_slewFrames && m_curSlewFrame < m_slewFrames)
+        {
+            double t = m_curSlewFrame / double(m_slewFrames);
+            double omt = 1.0 - t;
+
+            for (unsigned c=0 ; c<chmap.m_channelCount ; ++c)
+            {
+                AudioChannel ch = chmap.m_channels[c];
+                if (ch != AudioChannel::Unknown)
+                {
+                    *dataOut = ClampFlt(*dataOut + *dataIn * (m_coefs.v[int(ch)] * t + m_oldCoefs.v[int(ch)] * omt));
+                    ++dataOut;
+                }
+            }
+
+            ++m_curSlewFrame;
+        }
+        else
+        {
+            for (unsigned c=0 ; c<chmap.m_channelCount ; ++c)
+            {
+                AudioChannel ch = chmap.m_channels[c];
+                if (ch != AudioChannel::Unknown)
+                {
+                    *dataOut = ClampFlt(*dataOut + *dataIn * m_coefs.v[int(ch)]);
+                    ++dataOut;
+                }
+            }
+        }
+    }
+    return dataOut;
+}
+
+void AudioMatrixStereo::setDefaultMatrixCoefficients(AudioChannelSet acSet)
+{
+    m_curSlewFrame = 0;
+    m_slewFrames = 0;
+    m_coefs.q[0] = _mm_xor_ps(m_coefs.q[0], m_coefs.q[0]);
+    m_coefs.q[1] = _mm_xor_ps(m_coefs.q[1], m_coefs.q[1]);
+    m_coefs.q[2] = _mm_xor_ps(m_coefs.q[2], m_coefs.q[2]);
+    m_coefs.q[3] = _mm_xor_ps(m_coefs.q[3], m_coefs.q[3]);
+    switch (acSet)
+    {
+    case AudioChannelSet::Stereo:
+    case AudioChannelSet::Quad:
+        m_coefs.v[int(AudioChannel::FrontLeft)][0] = 1.0;
+        m_coefs.v[int(AudioChannel::FrontRight)][1] = 1.0;
+        break;
+    case AudioChannelSet::Surround51:
+    case AudioChannelSet::Surround71:
+        m_coefs.v[int(AudioChannel::FrontLeft)][0] = 1.0;
+        m_coefs.v[int(AudioChannel::FrontRight)][1] = 1.0;
+        break;
+    default: break;
+    }
+}
+
+int16_t* AudioMatrixStereo::mixStereoSampleData(const AudioVoiceEngineMixInfo& info,
+                                                const int16_t* dataIn, int16_t* dataOut, size_t frames)
+{
+    const ChannelMap& chmap = info.m_channelMap;
+    for (size_t f=0 ; f<frames ; ++f, dataIn += 2)
+    {
+        if (m_slewFrames && m_curSlewFrame < m_slewFrames)
+        {
+            double t = m_curSlewFrame / double(m_slewFrames);
+            double omt = 1.0 - t;
+
+            for (unsigned c=0 ; c<chmap.m_channelCount ; ++c)
+            {
+                AudioChannel ch = chmap.m_channels[c];
+                if (ch != AudioChannel::Unknown)
+                {
+                    *dataOut = Clamp16(*dataOut +
+                                       *dataIn * (m_coefs.v[int(ch)][0] * t + m_oldCoefs.v[int(ch)][0] * omt) +
+                                       *dataIn * (m_coefs.v[int(ch)][1] * t + m_oldCoefs.v[int(ch)][1] * omt));
+                    ++dataOut;
+                }
+            }
+
+            ++m_curSlewFrame;
+        }
+        else
+        {
+            for (unsigned c=0 ; c<chmap.m_channelCount ; ++c)
+            {
+                AudioChannel ch = chmap.m_channels[c];
+                if (ch != AudioChannel::Unknown)
+                {
+                    *dataOut = Clamp16(*dataOut +
+                                       dataIn[0] * m_coefs.v[int(ch)][0] +
+                                       dataIn[1] * m_coefs.v[int(ch)][1]);
+                    ++dataOut;
+                }
+            }
+        }
+    }
+    return dataOut;
+}
+
+int32_t* AudioMatrixStereo::mixStereoSampleData(const AudioVoiceEngineMixInfo& info,
+                                                const int32_t* dataIn, int32_t* dataOut, size_t frames)
+{
+    const ChannelMap& chmap = info.m_channelMap;
+    for (size_t f=0 ; f<frames ; ++f, dataIn += 2)
+    {
+        if (m_slewFrames && m_curSlewFrame < m_slewFrames)
+        {
+            double t = m_curSlewFrame / double(m_slewFrames);
+            double omt = 1.0 - t;
+
+            for (unsigned c=0 ; c<chmap.m_channelCount ; ++c)
+            {
+                AudioChannel ch = chmap.m_channels[c];
+                if (ch != AudioChannel::Unknown)
+                {
+                    *dataOut = Clamp32(*dataOut +
+                                       *dataIn * (m_coefs.v[int(ch)][0] * t + m_oldCoefs.v[int(ch)][0] * omt) +
+                                       *dataIn * (m_coefs.v[int(ch)][1] * t + m_oldCoefs.v[int(ch)][1] * omt));
+                    ++dataOut;
+                }
+            }
+
+            ++m_curSlewFrame;
+        }
+        else
+        {
+            for (unsigned c=0 ; c<chmap.m_channelCount ; ++c)
+            {
+                AudioChannel ch = chmap.m_channels[c];
+                if (ch != AudioChannel::Unknown)
+                {
+                    *dataOut = Clamp32(*dataOut +
+                                       dataIn[0] * m_coefs.v[int(ch)][0] +
+                                       dataIn[1] * m_coefs.v[int(ch)][1]);
+                    ++dataOut;
+                }
+            }
+        }
+    }
+    return dataOut;
+}
+
+float* AudioMatrixStereo::mixStereoSampleData(const AudioVoiceEngineMixInfo& info,
+                                              const float* dataIn, float* dataOut, size_t frames)
+{
+    const ChannelMap& chmap = info.m_channelMap;
+    for (size_t f=0 ; f<frames ; ++f, dataIn += 2)
+    {
+        if (m_slewFrames && m_curSlewFrame < m_slewFrames)
+        {
+            double t = m_curSlewFrame / double(m_slewFrames);
+            double omt = 1.0 - t;
+
+            for (unsigned c=0 ; c<chmap.m_channelCount ; ++c)
+            {
+                AudioChannel ch = chmap.m_channels[c];
+                if (ch != AudioChannel::Unknown)
+                {
+                    *dataOut = ClampFlt(*dataOut +
+                                        *dataIn * (m_coefs.v[int(ch)][0] * t + m_oldCoefs.v[int(ch)][0] * omt) +
+                                        *dataIn * (m_coefs.v[int(ch)][1] * t + m_oldCoefs.v[int(ch)][1] * omt));
+                    ++dataOut;
+                }
+            }
+
+            ++m_curSlewFrame;
+        }
+        else
+        {
+            for (unsigned c=0 ; c<chmap.m_channelCount ; ++c)
+            {
+                AudioChannel ch = chmap.m_channels[c];
+                if (ch != AudioChannel::Unknown)
+                {
+                    *dataOut = ClampFlt(*dataOut +
+                                        dataIn[0] * m_coefs.v[int(ch)][0] +
+                                        dataIn[1] * m_coefs.v[int(ch)][1]);
+                    ++dataOut;
+                }
+            }
+        }
+    }
+    return dataOut;
+}
+
+}
+