Initial AudioMatrixSSE

This commit is contained in:
Jack Andersen 2016-05-21 22:37:16 -10:00
parent 9078a16642
commit d1eb3a6b3b
4 changed files with 567 additions and 38 deletions

View File

@ -183,7 +183,8 @@ add_library(boo
lib/inputdev/DeviceSignature.cpp include/boo/inputdev/DeviceSignature.hpp
lib/inputdev/IHIDDevice.hpp
lib/audiodev/AudioMatrix.hpp
lib/audiodev/AudioMatrix.cpp
#lib/audiodev/AudioMatrix.cpp
lib/audiodev/AudioMatrixSSE.cpp
lib/audiodev/AudioVoiceEngine.hpp
lib/audiodev/AudioVoiceEngine.cpp
lib/audiodev/AudioVoice.hpp

View File

@ -9,17 +9,17 @@ void AudioMatrixMono::setDefaultMatrixCoefficients(AudioChannelSet acSet)
{
m_curSlewFrame = 0;
m_slewFrames = 0;
memset(m_coefs, 0, sizeof(m_coefs));
memset(&m_coefs, 0, sizeof(m_coefs));
switch (acSet)
{
case AudioChannelSet::Stereo:
case AudioChannelSet::Quad:
m_coefs[int(AudioChannel::FrontLeft)] = 1.0;
m_coefs[int(AudioChannel::FrontRight)] = 1.0;
m_coefs.v[int(AudioChannel::FrontLeft)] = 1.0;
m_coefs.v[int(AudioChannel::FrontRight)] = 1.0;
break;
case AudioChannelSet::Surround51:
case AudioChannelSet::Surround71:
m_coefs[int(AudioChannel::FrontCenter)] = 1.0;
m_coefs.v[int(AudioChannel::FrontCenter)] = 1.0;
break;
default: break;
}
@ -41,7 +41,7 @@ int16_t* AudioMatrixMono::mixMonoSampleData(const AudioVoiceEngineMixInfo& info,
AudioChannel ch = chmap.m_channels[c];
if (ch != AudioChannel::Unknown)
{
*dataOut = Clamp16(*dataOut + *dataIn * (m_coefs[int(ch)] * t + m_oldCoefs[int(ch)] * omt));
*dataOut = Clamp16(*dataOut + *dataIn * (m_coefs.v[int(ch)] * t + m_oldCoefs.v[int(ch)] * omt));
++dataOut;
}
}
@ -55,7 +55,7 @@ int16_t* AudioMatrixMono::mixMonoSampleData(const AudioVoiceEngineMixInfo& info,
AudioChannel ch = chmap.m_channels[c];
if (ch != AudioChannel::Unknown)
{
*dataOut = Clamp16(*dataOut + *dataIn * m_coefs[int(ch)]);
*dataOut = Clamp16(*dataOut + *dataIn * m_coefs.v[int(ch)]);
++dataOut;
}
}
@ -80,7 +80,7 @@ int32_t* AudioMatrixMono::mixMonoSampleData(const AudioVoiceEngineMixInfo& info,
AudioChannel ch = chmap.m_channels[c];
if (ch != AudioChannel::Unknown)
{
*dataOut = Clamp32(*dataOut + *dataIn * (m_coefs[int(ch)] * t + m_oldCoefs[int(ch)] * omt));
*dataOut = Clamp32(*dataOut + *dataIn * (m_coefs.v[int(ch)] * t + m_oldCoefs.v[int(ch)] * omt));
++dataOut;
}
}
@ -94,7 +94,7 @@ int32_t* AudioMatrixMono::mixMonoSampleData(const AudioVoiceEngineMixInfo& info,
AudioChannel ch = chmap.m_channels[c];
if (ch != AudioChannel::Unknown)
{
*dataOut = Clamp32(*dataOut + *dataIn * m_coefs[int(ch)]);
*dataOut = Clamp32(*dataOut + *dataIn * m_coefs.v[int(ch)]);
++dataOut;
}
}
@ -119,7 +119,7 @@ float* AudioMatrixMono::mixMonoSampleData(const AudioVoiceEngineMixInfo& info,
AudioChannel ch = chmap.m_channels[c];
if (ch != AudioChannel::Unknown)
{
*dataOut = ClampFlt(*dataOut + *dataIn * (m_coefs[int(ch)] * t + m_oldCoefs[int(ch)] * omt));
*dataOut = ClampFlt(*dataOut + *dataIn * (m_coefs.v[int(ch)] * t + m_oldCoefs.v[int(ch)] * omt));
++dataOut;
}
}
@ -133,7 +133,7 @@ float* AudioMatrixMono::mixMonoSampleData(const AudioVoiceEngineMixInfo& info,
AudioChannel ch = chmap.m_channels[c];
if (ch != AudioChannel::Unknown)
{
*dataOut = ClampFlt(*dataOut + *dataIn * m_coefs[int(ch)]);
*dataOut = ClampFlt(*dataOut + *dataIn * m_coefs.v[int(ch)]);
++dataOut;
}
}
@ -146,18 +146,18 @@ void AudioMatrixStereo::setDefaultMatrixCoefficients(AudioChannelSet acSet)
{
m_curSlewFrame = 0;
m_slewFrames = 0;
memset(m_coefs, 0, sizeof(m_coefs));
memset(&m_coefs, 0, sizeof(m_coefs));
switch (acSet)
{
case AudioChannelSet::Stereo:
case AudioChannelSet::Quad:
m_coefs[int(AudioChannel::FrontLeft)][0] = 1.0;
m_coefs[int(AudioChannel::FrontRight)][1] = 1.0;
m_coefs.v[int(AudioChannel::FrontLeft)][0] = 1.0;
m_coefs.v[int(AudioChannel::FrontRight)][1] = 1.0;
break;
case AudioChannelSet::Surround51:
case AudioChannelSet::Surround71:
m_coefs[int(AudioChannel::FrontLeft)][0] = 1.0;
m_coefs[int(AudioChannel::FrontRight)][1] = 1.0;
m_coefs.v[int(AudioChannel::FrontLeft)][0] = 1.0;
m_coefs.v[int(AudioChannel::FrontRight)][1] = 1.0;
break;
default: break;
}
@ -180,8 +180,8 @@ int16_t* AudioMatrixStereo::mixStereoSampleData(const AudioVoiceEngineMixInfo& i
if (ch != AudioChannel::Unknown)
{
*dataOut = Clamp16(*dataOut +
*dataIn * (m_coefs[int(ch)][0] * t + m_oldCoefs[int(ch)][0] * omt) +
*dataIn * (m_coefs[int(ch)][1] * t + m_oldCoefs[int(ch)][1] * omt));
*dataIn * (m_coefs.v[int(ch)][0] * t + m_oldCoefs.v[int(ch)][0] * omt) +
*dataIn * (m_coefs.v[int(ch)][1] * t + m_oldCoefs.v[int(ch)][1] * omt));
++dataOut;
}
}
@ -196,8 +196,8 @@ int16_t* AudioMatrixStereo::mixStereoSampleData(const AudioVoiceEngineMixInfo& i
if (ch != AudioChannel::Unknown)
{
*dataOut = Clamp16(*dataOut +
dataIn[0] * m_coefs[int(ch)][0] +
dataIn[1] * m_coefs[int(ch)][1]);
dataIn[0] * m_coefs.v[int(ch)][0] +
dataIn[1] * m_coefs.v[int(ch)][1]);
++dataOut;
}
}
@ -223,8 +223,8 @@ int32_t* AudioMatrixStereo::mixStereoSampleData(const AudioVoiceEngineMixInfo& i
if (ch != AudioChannel::Unknown)
{
*dataOut = Clamp32(*dataOut +
*dataIn * (m_coefs[int(ch)][0] * t + m_oldCoefs[int(ch)][0] * omt) +
*dataIn * (m_coefs[int(ch)][1] * t + m_oldCoefs[int(ch)][1] * omt));
*dataIn * (m_coefs.v[int(ch)][0] * t + m_oldCoefs.v[int(ch)][0] * omt) +
*dataIn * (m_coefs.v[int(ch)][1] * t + m_oldCoefs.v[int(ch)][1] * omt));
++dataOut;
}
}
@ -239,8 +239,8 @@ int32_t* AudioMatrixStereo::mixStereoSampleData(const AudioVoiceEngineMixInfo& i
if (ch != AudioChannel::Unknown)
{
*dataOut = Clamp32(*dataOut +
dataIn[0] * m_coefs[int(ch)][0] +
dataIn[1] * m_coefs[int(ch)][1]);
dataIn[0] * m_coefs.v[int(ch)][0] +
dataIn[1] * m_coefs.v[int(ch)][1]);
++dataOut;
}
}
@ -266,8 +266,8 @@ float* AudioMatrixStereo::mixStereoSampleData(const AudioVoiceEngineMixInfo& inf
if (ch != AudioChannel::Unknown)
{
*dataOut = ClampFlt(*dataOut +
*dataIn * (m_coefs[int(ch)][0] * t + m_oldCoefs[int(ch)][0] * omt) +
*dataIn * (m_coefs[int(ch)][1] * t + m_oldCoefs[int(ch)][1] * omt));
*dataIn * (m_coefs.v[int(ch)][0] * t + m_oldCoefs.v[int(ch)][0] * omt) +
*dataIn * (m_coefs.v[int(ch)][1] * t + m_oldCoefs.v[int(ch)][1] * omt));
++dataOut;
}
}
@ -282,8 +282,8 @@ float* AudioMatrixStereo::mixStereoSampleData(const AudioVoiceEngineMixInfo& inf
if (ch != AudioChannel::Unknown)
{
*dataOut = ClampFlt(*dataOut +
dataIn[0] * m_coefs[int(ch)][0] +
dataIn[1] * m_coefs[int(ch)][1]);
dataIn[0] * m_coefs.v[int(ch)][0] +
dataIn[1] * m_coefs.v[int(ch)][1]);
++dataOut;
}
}

View File

@ -6,6 +6,10 @@
#include <stdint.h>
#include <limits.h>
#if __SSE__
#include <xmmintrin.h>
#endif
namespace boo
{
struct AudioVoiceEngineMixInfo;
@ -39,8 +43,16 @@ static inline float ClampFlt(float in)
class AudioMatrixMono
{
float m_coefs[8] = {};
float m_oldCoefs[8] = {};
union Coefs
{
float v[8];
#if __SSE__
__m128 q[2];
__m64 d[4];
#endif
};
Coefs m_coefs = {};
Coefs m_oldCoefs = {};
size_t m_slewFrames = 0;
size_t m_curSlewFrame = 0;
public:
@ -51,11 +63,18 @@ public:
{
m_slewFrames = slewFrames;
m_curSlewFrame = 0;
#if __SSE__
m_oldCoefs.q[0] = m_coefs.q[0];
m_oldCoefs.q[1] = m_coefs.q[1];
m_coefs.q[0] = _mm_loadu_ps(coefs);
m_coefs.q[1] = _mm_loadu_ps(&coefs[4]);
#else
for (int i=0 ; i<8 ; ++i)
{
m_oldCoefs[i] = m_coefs[i];
m_coefs[i] = coefs[i];
m_oldCoefs.v[i] = m_coefs.v[i];
m_coefs.v[i] = coefs[i];
}
#endif
}
int16_t* mixMonoSampleData(const AudioVoiceEngineMixInfo& info,
@ -68,8 +87,16 @@ public:
class AudioMatrixStereo
{
float m_coefs[8][2] = {};
float m_oldCoefs[8][2] = {};
union Coefs
{
float v[8][2];
#if __SSE__
__m128 q[4];
__m64 d[8];
#endif
};
Coefs m_coefs = {};
Coefs m_oldCoefs = {};
size_t m_slewFrames = 0;
size_t m_curSlewFrame = 0;
public:
@ -80,13 +107,24 @@ public:
{
m_slewFrames = slewFrames;
m_curSlewFrame = 0;
#if __SSE__
m_oldCoefs.q[0] = m_coefs.q[0];
m_oldCoefs.q[1] = m_coefs.q[1];
m_oldCoefs.q[2] = m_coefs.q[2];
m_oldCoefs.q[3] = m_coefs.q[3];
m_coefs.q[0] = _mm_loadu_ps(coefs[0]);
m_coefs.q[1] = _mm_loadu_ps(coefs[2]);
m_coefs.q[2] = _mm_loadu_ps(coefs[4]);
m_coefs.q[3] = _mm_loadu_ps(coefs[6]);
#else
for (int i=0 ; i<8 ; ++i)
{
m_oldCoefs[i][0] = m_coefs[i][0];
m_oldCoefs[i][1] = m_coefs[i][1];
m_coefs[i][0] = coefs[i][0];
m_coefs[i][1] = coefs[i][1];
m_oldCoefs.v[i][0] = m_coefs.v[i][0];
m_oldCoefs.v[i][1] = m_coefs.v[i][1];
m_coefs.v[i][0] = coefs.v[i][0];
m_coefs.v[i][1] = coefs.v[i][1];
}
#endif
}
int16_t* mixStereoSampleData(const AudioVoiceEngineMixInfo& info,

View File

@ -0,0 +1,490 @@
#include "AudioMatrix.hpp"
#include "AudioVoiceEngine.hpp"
#include <string.h>
#include <immintrin.h>
namespace boo
{
typedef union
{
float v[4];
#if __SSE__
__m128 q;
__m64 d[2];
#endif
} TVectorUnion;
static constexpr TVectorUnion ZeroVec = {};
static constexpr TVectorUnion Min16Vec = {INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN};
static constexpr TVectorUnion Max16Vec = {INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX};
static constexpr TVectorUnion Min32Vec = {INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN};
static constexpr TVectorUnion Max32Vec = {INT32_MAX, INT32_MAX, INT32_MAX, INT32_MAX};
static constexpr TVectorUnion MinFltVec = {-1.f, -1.f, -1.f, -1.f};
static constexpr TVectorUnion MaxFltVec = {1.f, 1.f, 1.f, 1.f};
void AudioMatrixMono::setDefaultMatrixCoefficients(AudioChannelSet acSet)
{
m_curSlewFrame = 0;
m_slewFrames = 0;
m_coefs.q[0] = _mm_xor_ps(m_coefs.q[0], m_coefs.q[0]);
m_coefs.q[1] = _mm_xor_ps(m_coefs.q[1], m_coefs.q[1]);
switch (acSet)
{
case AudioChannelSet::Stereo:
case AudioChannelSet::Quad:
m_coefs.v[int(AudioChannel::FrontLeft)] = 1.0;
m_coefs.v[int(AudioChannel::FrontRight)] = 1.0;
break;
case AudioChannelSet::Surround51:
case AudioChannelSet::Surround71:
m_coefs.v[int(AudioChannel::FrontCenter)] = 1.0;
break;
default: break;
}
}
int16_t* AudioMatrixMono::mixMonoSampleData(const AudioVoiceEngineMixInfo& info,
const int16_t* dataIn, int16_t* dataOut, size_t samples)
{
const ChannelMap& chmap = info.m_channelMap;
for (size_t s=0 ; s<samples ; ++s, ++dataIn)
{
if (m_slewFrames && m_curSlewFrame < m_slewFrames)
{
double t = m_curSlewFrame / double(m_slewFrames);
double omt = 1.0 - t;
for (unsigned c=0 ; c<chmap.m_channelCount ; ++c)
{
AudioChannel ch = chmap.m_channels[c];
if (ch != AudioChannel::Unknown)
{
*dataOut = Clamp16(*dataOut + *dataIn * (m_coefs.v[int(ch)] * t + m_oldCoefs.v[int(ch)] * omt));
++dataOut;
}
}
++m_curSlewFrame;
}
else
{
for (unsigned c=0 ; c<chmap.m_channelCount ; ++c)
{
AudioChannel ch = chmap.m_channels[c];
if (ch != AudioChannel::Unknown)
{
*dataOut = Clamp16(*dataOut + *dataIn * m_coefs.v[int(ch)]);
++dataOut;
}
}
}
}
return dataOut;
}
int32_t* AudioMatrixMono::mixMonoSampleData(const AudioVoiceEngineMixInfo& info,
const int32_t* dataIn, int32_t* dataOut, size_t samples)
{
const ChannelMap& chmap = info.m_channelMap;
for (size_t s=0 ; s<samples ; ++s, ++dataIn)
{
if (m_slewFrames && m_curSlewFrame < m_slewFrames)
{
float t = m_curSlewFrame / float(m_slewFrames);
float omt = 1.f - t;
switch (chmap.m_channelCount)
{
case 2:
{
++m_curSlewFrame;
float t2 = m_curSlewFrame / float(m_slewFrames);
float omt2 = 1.f - t2;
TVectorUnion coefs, samps;
coefs.q = _mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(m_coefs.q[0], m_coefs.q[0], _MM_SHUFFLE(1, 0, 1, 0)),
_mm_set_ps(t, t, t2, t2)),
_mm_mul_ps(_mm_shuffle_ps(m_oldCoefs.q[0], m_oldCoefs.q[0], _MM_SHUFFLE(1, 0, 1, 0)),
_mm_set_ps(omt, omt, omt2, omt2)));
samps.q = _mm_cvtepi32_ps(_mm_loadu_si128(reinterpret_cast<const __m128i*>(dataIn)));
samps.q = _mm_shuffle_ps(samps.q, samps.q, _MM_SHUFFLE(1, 0, 1, 0));
__m128i* out = reinterpret_cast<__m128i*>(dataOut);
__m128 pre = _mm_add_ps(_mm_cvtepi32_ps(_mm_loadu_si128(out)), _mm_mul_ps(coefs.q, samps.q));
_mm_storeu_si128(out, _mm_cvttps_epi32(_mm_min_ps(_mm_max_ps(pre, Min32Vec.q), Max32Vec.q)));
dataOut += 4;
++s;
++dataIn;
break;
}
case 4:
{
TVectorUnion coefs, samps;
coefs.q = _mm_add_ps(_mm_mul_ps(m_coefs.q[0], _mm_set1_ps(t)),
_mm_mul_ps(m_oldCoefs.q[0], _mm_set1_ps(omt)));
samps.q = _mm_cvtepi32_ps(_mm_loadu_si128(reinterpret_cast<const __m128i*>(dataIn)));
__m128i* out = reinterpret_cast<__m128i*>(dataOut);
__m128 pre = _mm_add_ps(_mm_cvtepi32_ps(_mm_loadu_si128(out)), _mm_mul_ps(coefs.q, samps.q));
_mm_storeu_si128(out, _mm_cvttps_epi32(_mm_min_ps(_mm_max_ps(pre, Min32Vec.q), Max32Vec.q)));
dataOut += 4;
break;
}
case 6:
{
TVectorUnion coefs, samps;
coefs.q = _mm_add_ps(_mm_mul_ps(m_coefs.q[0], _mm_set1_ps(t)),
_mm_mul_ps(m_oldCoefs.q[0], _mm_set1_ps(omt)));
samps.q = _mm_cvtepi32_ps(_mm_loadu_si128(reinterpret_cast<const __m128i*>(dataIn)));
__m128i* out = reinterpret_cast<__m128i*>(dataOut);
__m128 pre = _mm_add_ps(_mm_cvtepi32_ps(_mm_loadu_si128(out)), _mm_mul_ps(coefs.q, samps.q));
_mm_storeu_si128(out, _mm_cvttps_epi32(_mm_min_ps(_mm_max_ps(pre, Min32Vec.q), Max32Vec.q)));
dataOut += 4;
coefs.q = _mm_add_ps(_mm_mul_ps(m_coefs.q[1], _mm_set1_ps(t)),
_mm_mul_ps(m_oldCoefs.q[1], _mm_set1_ps(omt)));
samps.q = _mm_cvtepi32_ps(_mm_loadu_si128(reinterpret_cast<const __m128i*>(dataIn)));
out = reinterpret_cast<__m128i*>(dataOut);
__m128i loadOut = _mm_loadu_si128(out);
pre = _mm_add_ps(_mm_cvtepi32_ps(loadOut), _mm_mul_ps(coefs.q, samps.q));
_mm_storel_epi64(out, _mm_cvttps_epi32(_mm_min_ps(_mm_max_ps(pre, Min32Vec.q), Max32Vec.q)));
dataOut += 2;
break;
}
case 8:
{
TVectorUnion coefs, samps;
coefs.q = _mm_add_ps(_mm_mul_ps(m_coefs.q[0], _mm_set1_ps(t)),
_mm_mul_ps(m_oldCoefs.q[0], _mm_set1_ps(omt)));
samps.q = _mm_cvtepi32_ps(_mm_loadu_si128(reinterpret_cast<const __m128i*>(dataIn)));
__m128i* out = reinterpret_cast<__m128i*>(dataOut);
__m128 pre = _mm_add_ps(_mm_cvtepi32_ps(_mm_loadu_si128(out)), _mm_mul_ps(coefs.q, samps.q));
_mm_storeu_si128(out, _mm_cvttps_epi32(_mm_min_ps(_mm_max_ps(pre, Min32Vec.q), Max32Vec.q)));
dataOut += 4;
coefs.q = _mm_add_ps(_mm_mul_ps(m_coefs.q[1], _mm_set1_ps(t)),
_mm_mul_ps(m_oldCoefs.q[1], _mm_set1_ps(omt)));
samps.q = _mm_cvtepi32_ps(_mm_loadu_si128(reinterpret_cast<const __m128i*>(dataIn)));
out = reinterpret_cast<__m128i*>(dataOut);
pre = _mm_add_ps(_mm_cvtepi32_ps(_mm_loadu_si128(out)), _mm_mul_ps(coefs.q, samps.q));
_mm_storeu_si128(out, _mm_cvttps_epi32(_mm_min_ps(_mm_max_ps(pre, Min32Vec.q), Max32Vec.q)));
dataOut += 4;
break;
}
default:
{
for (unsigned c=0 ; c<chmap.m_channelCount ; ++c)
{
AudioChannel ch = chmap.m_channels[c];
if (ch != AudioChannel::Unknown)
{
*dataOut = Clamp32(*dataOut + *dataIn * (m_coefs.v[int(ch)] * t + m_oldCoefs.v[int(ch)] * omt));
++dataOut;
}
}
break;
}
}
++m_curSlewFrame;
}
else
{
switch (chmap.m_channelCount)
{
case 2:
{
TVectorUnion coefs, samps;
coefs.q = _mm_shuffle_ps(m_coefs.q[0], m_coefs.q[0], _MM_SHUFFLE(1, 0, 1, 0));
samps.q = _mm_cvtepi32_ps(_mm_loadu_si128(reinterpret_cast<const __m128i*>(dataIn)));
samps.q = _mm_shuffle_ps(samps.q, samps.q, _MM_SHUFFLE(1, 0, 1, 0));
__m128i* out = reinterpret_cast<__m128i*>(dataOut);
__m128i huh2 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(out));
__m128 huh3 = _mm_cvtepi32_ps(huh2);
__m128 pre = _mm_add_ps(huh3, _mm_mul_ps(coefs.q, samps.q));
_mm_storeu_si128(out, _mm_cvttps_epi32(_mm_min_ps(_mm_max_ps(pre, Min32Vec.q), Max32Vec.q)));
dataOut += 4;
++s;
++dataIn;
break;
}
case 4:
{
TVectorUnion samps;
samps.q = _mm_cvtepi32_ps(_mm_loadu_si128(reinterpret_cast<const __m128i*>(dataIn)));
__m128i* out = reinterpret_cast<__m128i*>(dataOut);
__m128 pre = _mm_add_ps(_mm_cvtepi32_ps(_mm_loadu_si128(out)), _mm_mul_ps(m_coefs.q[0], samps.q));
_mm_storeu_si128(out, _mm_cvttps_epi32(_mm_min_ps(_mm_max_ps(pre, Min32Vec.q), Max32Vec.q)));
dataOut += 4;
break;
}
case 6:
{
TVectorUnion samps;
samps.q = _mm_cvtepi32_ps(_mm_loadu_si128(reinterpret_cast<const __m128i*>(dataIn)));
__m128i* out = reinterpret_cast<__m128i*>(dataOut);
__m128 pre = _mm_add_ps(_mm_cvtepi32_ps(_mm_loadu_si128(out)), _mm_mul_ps(m_coefs.q[0], samps.q));
_mm_storeu_si128(out, _mm_cvttps_epi32(_mm_min_ps(_mm_max_ps(pre, Min32Vec.q), Max32Vec.q)));
dataOut += 4;
samps.q = _mm_cvtepi32_ps(_mm_loadu_si128(reinterpret_cast<const __m128i*>(dataIn)));
out = reinterpret_cast<__m128i*>(dataOut);
__m128i loadOut = _mm_loadu_si128(out);
pre = _mm_add_ps(_mm_cvtepi32_ps(loadOut), _mm_mul_ps(m_coefs.q[1], samps.q));
_mm_storel_epi64(out, _mm_cvttps_epi32(_mm_min_ps(_mm_max_ps(pre, Min32Vec.q), Max32Vec.q)));
dataOut += 2;
break;
}
case 8:
{
TVectorUnion samps;
samps.q = _mm_cvtepi32_ps(_mm_loadu_si128(reinterpret_cast<const __m128i*>(dataIn)));
__m128i* out = reinterpret_cast<__m128i*>(dataOut);
__m128 pre = _mm_add_ps(_mm_cvtepi32_ps(_mm_loadu_si128(out)), _mm_mul_ps(m_coefs.q[0], samps.q));
_mm_storeu_si128(out, _mm_cvttps_epi32(_mm_min_ps(_mm_max_ps(pre, Min32Vec.q), Max32Vec.q)));
dataOut += 4;
samps.q = _mm_cvtepi32_ps(_mm_loadu_si128(reinterpret_cast<const __m128i*>(dataIn)));
out = reinterpret_cast<__m128i*>(dataOut);
pre = _mm_add_ps(_mm_cvtepi32_ps(_mm_loadu_si128(out)), _mm_mul_ps(m_coefs.q[1], samps.q));
_mm_storeu_si128(out, _mm_cvttps_epi32(_mm_min_ps(_mm_max_ps(pre, Min32Vec.q), Max32Vec.q)));
dataOut += 4;
break;
}
default:
{
for (unsigned c=0 ; c<chmap.m_channelCount ; ++c)
{
AudioChannel ch = chmap.m_channels[c];
if (ch != AudioChannel::Unknown)
{
*dataOut = Clamp32(*dataOut + *dataIn * m_coefs.v[int(ch)]);
++dataOut;
}
}
break;
}
}
}
}
return dataOut;
}
float* AudioMatrixMono::mixMonoSampleData(const AudioVoiceEngineMixInfo& info,
const float* dataIn, float* dataOut, size_t samples)
{
const ChannelMap& chmap = info.m_channelMap;
for (size_t s=0 ; s<samples ; ++s, ++dataIn)
{
if (m_slewFrames && m_curSlewFrame < m_slewFrames)
{
double t = m_curSlewFrame / double(m_slewFrames);
double omt = 1.0 - t;
for (unsigned c=0 ; c<chmap.m_channelCount ; ++c)
{
AudioChannel ch = chmap.m_channels[c];
if (ch != AudioChannel::Unknown)
{
*dataOut = ClampFlt(*dataOut + *dataIn * (m_coefs.v[int(ch)] * t + m_oldCoefs.v[int(ch)] * omt));
++dataOut;
}
}
++m_curSlewFrame;
}
else
{
for (unsigned c=0 ; c<chmap.m_channelCount ; ++c)
{
AudioChannel ch = chmap.m_channels[c];
if (ch != AudioChannel::Unknown)
{
*dataOut = ClampFlt(*dataOut + *dataIn * m_coefs.v[int(ch)]);
++dataOut;
}
}
}
}
return dataOut;
}
void AudioMatrixStereo::setDefaultMatrixCoefficients(AudioChannelSet acSet)
{
m_curSlewFrame = 0;
m_slewFrames = 0;
m_coefs.q[0] = _mm_xor_ps(m_coefs.q[0], m_coefs.q[0]);
m_coefs.q[1] = _mm_xor_ps(m_coefs.q[1], m_coefs.q[1]);
m_coefs.q[2] = _mm_xor_ps(m_coefs.q[2], m_coefs.q[2]);
m_coefs.q[3] = _mm_xor_ps(m_coefs.q[3], m_coefs.q[3]);
switch (acSet)
{
case AudioChannelSet::Stereo:
case AudioChannelSet::Quad:
m_coefs.v[int(AudioChannel::FrontLeft)][0] = 1.0;
m_coefs.v[int(AudioChannel::FrontRight)][1] = 1.0;
break;
case AudioChannelSet::Surround51:
case AudioChannelSet::Surround71:
m_coefs.v[int(AudioChannel::FrontLeft)][0] = 1.0;
m_coefs.v[int(AudioChannel::FrontRight)][1] = 1.0;
break;
default: break;
}
}
int16_t* AudioMatrixStereo::mixStereoSampleData(const AudioVoiceEngineMixInfo& info,
const int16_t* dataIn, int16_t* dataOut, size_t frames)
{
const ChannelMap& chmap = info.m_channelMap;
for (size_t f=0 ; f<frames ; ++f, dataIn += 2)
{
if (m_slewFrames && m_curSlewFrame < m_slewFrames)
{
double t = m_curSlewFrame / double(m_slewFrames);
double omt = 1.0 - t;
for (unsigned c=0 ; c<chmap.m_channelCount ; ++c)
{
AudioChannel ch = chmap.m_channels[c];
if (ch != AudioChannel::Unknown)
{
*dataOut = Clamp16(*dataOut +
*dataIn * (m_coefs.v[int(ch)][0] * t + m_oldCoefs.v[int(ch)][0] * omt) +
*dataIn * (m_coefs.v[int(ch)][1] * t + m_oldCoefs.v[int(ch)][1] * omt));
++dataOut;
}
}
++m_curSlewFrame;
}
else
{
for (unsigned c=0 ; c<chmap.m_channelCount ; ++c)
{
AudioChannel ch = chmap.m_channels[c];
if (ch != AudioChannel::Unknown)
{
*dataOut = Clamp16(*dataOut +
dataIn[0] * m_coefs.v[int(ch)][0] +
dataIn[1] * m_coefs.v[int(ch)][1]);
++dataOut;
}
}
}
}
return dataOut;
}
int32_t* AudioMatrixStereo::mixStereoSampleData(const AudioVoiceEngineMixInfo& info,
const int32_t* dataIn, int32_t* dataOut, size_t frames)
{
const ChannelMap& chmap = info.m_channelMap;
for (size_t f=0 ; f<frames ; ++f, dataIn += 2)
{
if (m_slewFrames && m_curSlewFrame < m_slewFrames)
{
double t = m_curSlewFrame / double(m_slewFrames);
double omt = 1.0 - t;
for (unsigned c=0 ; c<chmap.m_channelCount ; ++c)
{
AudioChannel ch = chmap.m_channels[c];
if (ch != AudioChannel::Unknown)
{
*dataOut = Clamp32(*dataOut +
*dataIn * (m_coefs.v[int(ch)][0] * t + m_oldCoefs.v[int(ch)][0] * omt) +
*dataIn * (m_coefs.v[int(ch)][1] * t + m_oldCoefs.v[int(ch)][1] * omt));
++dataOut;
}
}
++m_curSlewFrame;
}
else
{
for (unsigned c=0 ; c<chmap.m_channelCount ; ++c)
{
AudioChannel ch = chmap.m_channels[c];
if (ch != AudioChannel::Unknown)
{
*dataOut = Clamp32(*dataOut +
dataIn[0] * m_coefs.v[int(ch)][0] +
dataIn[1] * m_coefs.v[int(ch)][1]);
++dataOut;
}
}
}
}
return dataOut;
}
float* AudioMatrixStereo::mixStereoSampleData(const AudioVoiceEngineMixInfo& info,
const float* dataIn, float* dataOut, size_t frames)
{
const ChannelMap& chmap = info.m_channelMap;
for (size_t f=0 ; f<frames ; ++f, dataIn += 2)
{
if (m_slewFrames && m_curSlewFrame < m_slewFrames)
{
double t = m_curSlewFrame / double(m_slewFrames);
double omt = 1.0 - t;
for (unsigned c=0 ; c<chmap.m_channelCount ; ++c)
{
AudioChannel ch = chmap.m_channels[c];
if (ch != AudioChannel::Unknown)
{
*dataOut = ClampFlt(*dataOut +
*dataIn * (m_coefs.v[int(ch)][0] * t + m_oldCoefs.v[int(ch)][0] * omt) +
*dataIn * (m_coefs.v[int(ch)][1] * t + m_oldCoefs.v[int(ch)][1] * omt));
++dataOut;
}
}
++m_curSlewFrame;
}
else
{
for (unsigned c=0 ; c<chmap.m_channelCount ; ++c)
{
AudioChannel ch = chmap.m_channels[c];
if (ch != AudioChannel::Unknown)
{
*dataOut = ClampFlt(*dataOut +
dataIn[0] * m_coefs.v[int(ch)][0] +
dataIn[1] * m_coefs.v[int(ch)][1]);
++dataOut;
}
}
}
}
return dataOut;
}
}