Huge quaility improvement with soxr by priming with 5ms of silence

This commit is contained in:
Jack Andersen 2016-06-16 20:01:36 -10:00
parent bb7728129e
commit 9724488da3
3 changed files with 104 additions and 19 deletions

View File

@ -83,7 +83,8 @@ void AudioVoiceMono::_resetSampleRate(double sampleRate)
soxr_delete(m_src);
double rateOut = m_parent.mixInfo().m_sampleRate;
soxr_io_spec_t ioSpec = soxr_io_spec(SOXR_INT16_I, m_parent.mixInfo().m_sampleFormat);
soxr_datatype_t formatOut = m_parent.mixInfo().m_sampleFormat;
soxr_io_spec_t ioSpec = soxr_io_spec(SOXR_INT16_I, formatOut);
soxr_quality_spec_t qSpec = soxr_quality_spec(SOXR_20_BITQ, m_dynamicRate ? SOXR_VR : 0);
soxr_error_t err;
@ -102,6 +103,37 @@ void AudioVoiceMono::_resetSampleRate(double sampleRate)
soxr_set_input_fn(m_src, soxr_input_fn_t(SRCCallback), this, 0);
_setPitchRatio(m_pitchRatio, false);
m_resetSampleRate = false;
m_silentOut = true;
switch (formatOut)
{
case SOXR_INT16_I:
{
std::vector<int16_t>& scratch16 = m_root.m_scratch16;
if (scratch16.size() < m_root.m_5msFrames)
scratch16.resize(m_root.m_5msFrames);
soxr_output(m_src, scratch16.data(), m_root.m_5msFrames);
break;
}
case SOXR_INT32_I:
{
std::vector<int32_t>& scratch32 = m_root.m_scratch32;
if (scratch32.size() < m_root.m_5msFrames)
scratch32.resize(m_root.m_5msFrames);
soxr_output(m_src, scratch32.data(), m_root.m_5msFrames);
break;
}
case SOXR_FLOAT32_I:
{
std::vector<float>& scratchFlt = m_root.m_scratchFlt;
if (scratchFlt.size() < m_root.m_5msFrames)
scratchFlt.resize(m_root.m_5msFrames);
soxr_output(m_src, scratchFlt.data(), m_root.m_5msFrames);
break;
}
default: break;
}
m_silentOut = false;
}
size_t AudioVoiceMono::SRCCallback(AudioVoiceMono* ctx, int16_t** data, size_t frames)
@ -110,7 +142,13 @@ size_t AudioVoiceMono::SRCCallback(AudioVoiceMono* ctx, int16_t** data, size_t f
if (scratchIn.size() < frames)
scratchIn.resize(frames);
*data = scratchIn.data();
return ctx->m_cb->supplyAudio(*ctx, frames, scratchIn.data());
if (ctx->m_silentOut)
{
memset(*data, 0, frames * 2);
return frames;
}
else
return ctx->m_cb->supplyAudio(*ctx, frames, scratchIn.data());
}
size_t AudioVoiceMono::pumpAndMix(const AudioVoiceEngineMixInfo& mixInfo,
@ -234,7 +272,8 @@ void AudioVoiceStereo::_resetSampleRate(double sampleRate)
soxr_delete(m_src);
double rateOut = m_parent.mixInfo().m_sampleRate;
soxr_io_spec_t ioSpec = soxr_io_spec(SOXR_INT16_I, m_parent.mixInfo().m_sampleFormat);
soxr_datatype_t formatOut = m_parent.mixInfo().m_sampleFormat;
soxr_io_spec_t ioSpec = soxr_io_spec(SOXR_INT16_I, formatOut);
soxr_quality_spec_t qSpec = soxr_quality_spec(SOXR_20_BITQ, m_dynamicRate ? SOXR_VR : 0);
soxr_error_t err;
@ -253,6 +292,37 @@ void AudioVoiceStereo::_resetSampleRate(double sampleRate)
soxr_set_input_fn(m_src, soxr_input_fn_t(SRCCallback), this, 0);
_setPitchRatio(m_pitchRatio, false);
m_resetSampleRate = false;
m_silentOut = true;
switch (formatOut)
{
case SOXR_INT16_I:
{
std::vector<int16_t>& scratch16 = m_root.m_scratch16;
if (scratch16.size() < m_root.m_5msFrames * 2)
scratch16.resize(m_root.m_5msFrames * 2);
soxr_output(m_src, scratch16.data(), m_root.m_5msFrames);
break;
}
case SOXR_INT32_I:
{
std::vector<int32_t>& scratch32 = m_root.m_scratch32;
if (scratch32.size() < m_root.m_5msFrames * 2)
scratch32.resize(m_root.m_5msFrames * 2);
soxr_output(m_src, scratch32.data(), m_root.m_5msFrames);
break;
}
case SOXR_FLOAT32_I:
{
std::vector<float>& scratchFlt = m_root.m_scratchFlt;
if (scratchFlt.size() < m_root.m_5msFrames * 2)
scratchFlt.resize(m_root.m_5msFrames * 2);
soxr_output(m_src, scratchFlt.data(), m_root.m_5msFrames);
break;
}
default: break;
}
m_silentOut = false;
}
size_t AudioVoiceStereo::SRCCallback(AudioVoiceStereo* ctx, int16_t** data, size_t frames)
@ -262,7 +332,13 @@ size_t AudioVoiceStereo::SRCCallback(AudioVoiceStereo* ctx, int16_t** data, size
if (scratchIn.size() < samples)
scratchIn.resize(samples);
*data = scratchIn.data();
return ctx->m_cb->supplyAudio(*ctx, frames, scratchIn.data());
if (ctx->m_silentOut)
{
memset(*data, 0, samples * 2);
return frames;
}
else
return ctx->m_cb->supplyAudio(*ctx, frames, scratchIn.data());
}
size_t AudioVoiceStereo::pumpAndMix(const AudioVoiceEngineMixInfo& mixInfo,

View File

@ -81,6 +81,7 @@ class AudioVoiceMono : public AudioVoice
{
AudioMatrixMono m_matrix;
AudioMatrixMono m_subMatrix;
bool m_silentOut = false;
void _resetSampleRate(double sampleRate);
static size_t SRCCallback(AudioVoiceMono* ctx,
@ -104,6 +105,7 @@ class AudioVoiceStereo : public AudioVoice
{
AudioMatrixStereo m_matrix;
AudioMatrixStereo m_subMatrix;
bool m_silentOut = false;
void _resetSampleRate(double sampleRate);
static size_t SRCCallback(AudioVoiceStereo* ctx,

View File

@ -28,6 +28,9 @@ struct WASAPIAudioVoiceEngine : BaseAudioVoiceEngine
ComPtr<IAudioClient> m_audClient;
ComPtr<IAudioRenderClient> m_renderClient;
size_t m_curBufFrame = 0;
std::vector<float> m_5msBuffer;
struct NotificationClient : public IMMNotificationClient
{
WASAPIAudioVoiceEngine& m_parent;
@ -229,6 +232,8 @@ struct WASAPIAudioVoiceEngine : BaseAudioVoiceEngine
}
m_mixInfo.m_sampleRate = pwfx->Format.nSamplesPerSec;
m_5msFrames = (m_mixInfo.m_sampleRate * 5 / 500 + 1) / 2;
m_curBufFrame = m_5msFrames;
m_5msBuffer.resize(m_5msFrames * chMapOut.m_channelCount);
if (pwfx->Format.wFormatTag == WAVE_FORMAT_PCM ||
(pwfx->Format.wFormatTag == WAVE_FORMAT_EXTENSIBLE && pwfx->SubFormat == KSDATAFORMAT_SUBTYPE_PCM))
@ -373,24 +378,26 @@ struct WASAPIAudioVoiceEngine : BaseAudioVoiceEngine
continue;
}
DWORD flags = 0;
switch (m_mixInfo.m_sampleFormat)
for (size_t f=0 ; f<frames ;)
{
case SOXR_INT16_I:
_pumpAndMixVoices(frames, reinterpret_cast<int16_t*>(bufOut));
break;
case SOXR_INT32_I:
_pumpAndMixVoices(frames, reinterpret_cast<int32_t*>(bufOut));
break;
case SOXR_FLOAT32_I:
_pumpAndMixVoices(frames, reinterpret_cast<float*>(bufOut));
break;
default:
flags = AUDCLNT_BUFFERFLAGS_SILENT;
break;
if (m_curBufFrame == m_5msFrames)
{
_pumpAndMixVoices(m_5msFrames, m_5msBuffer.data());
m_curBufFrame = 0;
}
size_t remRenderFrames = std::min(frames - f, m_5msFrames - m_curBufFrame);
if (remRenderFrames)
{
memmove(reinterpret_cast<float*>(bufOut) + m_mixInfo.m_channelMap.m_channelCount * f,
&m_5msBuffer[m_curBufFrame * m_mixInfo.m_channelMap.m_channelCount],
remRenderFrames * m_mixInfo.m_channelMap.m_channelCount * sizeof(float));
m_curBufFrame += remRenderFrames;
f += remRenderFrames;
}
}
res = m_renderClient->ReleaseBuffer(frames, flags);
res = m_renderClient->ReleaseBuffer(frames, 0);
if (FAILED(res))
{
m_rebuild = true;