From 9724488da31f78b03b79d35957094bd766778d20 Mon Sep 17 00:00:00 2001 From: Jack Andersen Date: Thu, 16 Jun 2016 20:01:36 -1000 Subject: [PATCH] Huge quaility improvement with soxr by priming with 5ms of silence --- lib/audiodev/AudioVoice.cpp | 84 +++++++++++++++++++++++++++++++++++-- lib/audiodev/AudioVoice.hpp | 2 + lib/audiodev/WASAPI.cpp | 37 +++++++++------- 3 files changed, 104 insertions(+), 19 deletions(-) diff --git a/lib/audiodev/AudioVoice.cpp b/lib/audiodev/AudioVoice.cpp index 1b875fd..a3b976c 100644 --- a/lib/audiodev/AudioVoice.cpp +++ b/lib/audiodev/AudioVoice.cpp @@ -83,7 +83,8 @@ void AudioVoiceMono::_resetSampleRate(double sampleRate) soxr_delete(m_src); double rateOut = m_parent.mixInfo().m_sampleRate; - soxr_io_spec_t ioSpec = soxr_io_spec(SOXR_INT16_I, m_parent.mixInfo().m_sampleFormat); + soxr_datatype_t formatOut = m_parent.mixInfo().m_sampleFormat; + soxr_io_spec_t ioSpec = soxr_io_spec(SOXR_INT16_I, formatOut); soxr_quality_spec_t qSpec = soxr_quality_spec(SOXR_20_BITQ, m_dynamicRate ? SOXR_VR : 0); soxr_error_t err; @@ -102,6 +103,37 @@ void AudioVoiceMono::_resetSampleRate(double sampleRate) soxr_set_input_fn(m_src, soxr_input_fn_t(SRCCallback), this, 0); _setPitchRatio(m_pitchRatio, false); m_resetSampleRate = false; + + m_silentOut = true; + switch (formatOut) + { + case SOXR_INT16_I: + { + std::vector& scratch16 = m_root.m_scratch16; + if (scratch16.size() < m_root.m_5msFrames) + scratch16.resize(m_root.m_5msFrames); + soxr_output(m_src, scratch16.data(), m_root.m_5msFrames); + break; + } + case SOXR_INT32_I: + { + std::vector& scratch32 = m_root.m_scratch32; + if (scratch32.size() < m_root.m_5msFrames) + scratch32.resize(m_root.m_5msFrames); + soxr_output(m_src, scratch32.data(), m_root.m_5msFrames); + break; + } + case SOXR_FLOAT32_I: + { + std::vector& scratchFlt = m_root.m_scratchFlt; + if (scratchFlt.size() < m_root.m_5msFrames) + scratchFlt.resize(m_root.m_5msFrames); + soxr_output(m_src, scratchFlt.data(), m_root.m_5msFrames); + break; + } + default: break; + } + m_silentOut = false; } size_t AudioVoiceMono::SRCCallback(AudioVoiceMono* ctx, int16_t** data, size_t frames) @@ -110,7 +142,13 @@ size_t AudioVoiceMono::SRCCallback(AudioVoiceMono* ctx, int16_t** data, size_t f if (scratchIn.size() < frames) scratchIn.resize(frames); *data = scratchIn.data(); - return ctx->m_cb->supplyAudio(*ctx, frames, scratchIn.data()); + if (ctx->m_silentOut) + { + memset(*data, 0, frames * 2); + return frames; + } + else + return ctx->m_cb->supplyAudio(*ctx, frames, scratchIn.data()); } size_t AudioVoiceMono::pumpAndMix(const AudioVoiceEngineMixInfo& mixInfo, @@ -234,7 +272,8 @@ void AudioVoiceStereo::_resetSampleRate(double sampleRate) soxr_delete(m_src); double rateOut = m_parent.mixInfo().m_sampleRate; - soxr_io_spec_t ioSpec = soxr_io_spec(SOXR_INT16_I, m_parent.mixInfo().m_sampleFormat); + soxr_datatype_t formatOut = m_parent.mixInfo().m_sampleFormat; + soxr_io_spec_t ioSpec = soxr_io_spec(SOXR_INT16_I, formatOut); soxr_quality_spec_t qSpec = soxr_quality_spec(SOXR_20_BITQ, m_dynamicRate ? SOXR_VR : 0); soxr_error_t err; @@ -253,6 +292,37 @@ void AudioVoiceStereo::_resetSampleRate(double sampleRate) soxr_set_input_fn(m_src, soxr_input_fn_t(SRCCallback), this, 0); _setPitchRatio(m_pitchRatio, false); m_resetSampleRate = false; + + m_silentOut = true; + switch (formatOut) + { + case SOXR_INT16_I: + { + std::vector& scratch16 = m_root.m_scratch16; + if (scratch16.size() < m_root.m_5msFrames * 2) + scratch16.resize(m_root.m_5msFrames * 2); + soxr_output(m_src, scratch16.data(), m_root.m_5msFrames); + break; + } + case SOXR_INT32_I: + { + std::vector& scratch32 = m_root.m_scratch32; + if (scratch32.size() < m_root.m_5msFrames * 2) + scratch32.resize(m_root.m_5msFrames * 2); + soxr_output(m_src, scratch32.data(), m_root.m_5msFrames); + break; + } + case SOXR_FLOAT32_I: + { + std::vector& scratchFlt = m_root.m_scratchFlt; + if (scratchFlt.size() < m_root.m_5msFrames * 2) + scratchFlt.resize(m_root.m_5msFrames * 2); + soxr_output(m_src, scratchFlt.data(), m_root.m_5msFrames); + break; + } + default: break; + } + m_silentOut = false; } size_t AudioVoiceStereo::SRCCallback(AudioVoiceStereo* ctx, int16_t** data, size_t frames) @@ -262,7 +332,13 @@ size_t AudioVoiceStereo::SRCCallback(AudioVoiceStereo* ctx, int16_t** data, size if (scratchIn.size() < samples) scratchIn.resize(samples); *data = scratchIn.data(); - return ctx->m_cb->supplyAudio(*ctx, frames, scratchIn.data()); + if (ctx->m_silentOut) + { + memset(*data, 0, samples * 2); + return frames; + } + else + return ctx->m_cb->supplyAudio(*ctx, frames, scratchIn.data()); } size_t AudioVoiceStereo::pumpAndMix(const AudioVoiceEngineMixInfo& mixInfo, diff --git a/lib/audiodev/AudioVoice.hpp b/lib/audiodev/AudioVoice.hpp index 82e1232..16a5f8b 100644 --- a/lib/audiodev/AudioVoice.hpp +++ b/lib/audiodev/AudioVoice.hpp @@ -81,6 +81,7 @@ class AudioVoiceMono : public AudioVoice { AudioMatrixMono m_matrix; AudioMatrixMono m_subMatrix; + bool m_silentOut = false; void _resetSampleRate(double sampleRate); static size_t SRCCallback(AudioVoiceMono* ctx, @@ -104,6 +105,7 @@ class AudioVoiceStereo : public AudioVoice { AudioMatrixStereo m_matrix; AudioMatrixStereo m_subMatrix; + bool m_silentOut = false; void _resetSampleRate(double sampleRate); static size_t SRCCallback(AudioVoiceStereo* ctx, diff --git a/lib/audiodev/WASAPI.cpp b/lib/audiodev/WASAPI.cpp index 3f9e79e..2b9a319 100644 --- a/lib/audiodev/WASAPI.cpp +++ b/lib/audiodev/WASAPI.cpp @@ -28,6 +28,9 @@ struct WASAPIAudioVoiceEngine : BaseAudioVoiceEngine ComPtr m_audClient; ComPtr m_renderClient; + size_t m_curBufFrame = 0; + std::vector m_5msBuffer; + struct NotificationClient : public IMMNotificationClient { WASAPIAudioVoiceEngine& m_parent; @@ -229,6 +232,8 @@ struct WASAPIAudioVoiceEngine : BaseAudioVoiceEngine } m_mixInfo.m_sampleRate = pwfx->Format.nSamplesPerSec; m_5msFrames = (m_mixInfo.m_sampleRate * 5 / 500 + 1) / 2; + m_curBufFrame = m_5msFrames; + m_5msBuffer.resize(m_5msFrames * chMapOut.m_channelCount); if (pwfx->Format.wFormatTag == WAVE_FORMAT_PCM || (pwfx->Format.wFormatTag == WAVE_FORMAT_EXTENSIBLE && pwfx->SubFormat == KSDATAFORMAT_SUBTYPE_PCM)) @@ -373,24 +378,26 @@ struct WASAPIAudioVoiceEngine : BaseAudioVoiceEngine continue; } - DWORD flags = 0; - switch (m_mixInfo.m_sampleFormat) + for (size_t f=0 ; f(bufOut)); - break; - case SOXR_INT32_I: - _pumpAndMixVoices(frames, reinterpret_cast(bufOut)); - break; - case SOXR_FLOAT32_I: - _pumpAndMixVoices(frames, reinterpret_cast(bufOut)); - break; - default: - flags = AUDCLNT_BUFFERFLAGS_SILENT; - break; + if (m_curBufFrame == m_5msFrames) + { + _pumpAndMixVoices(m_5msFrames, m_5msBuffer.data()); + m_curBufFrame = 0; + } + + size_t remRenderFrames = std::min(frames - f, m_5msFrames - m_curBufFrame); + if (remRenderFrames) + { + memmove(reinterpret_cast(bufOut) + m_mixInfo.m_channelMap.m_channelCount * f, + &m_5msBuffer[m_curBufFrame * m_mixInfo.m_channelMap.m_channelCount], + remRenderFrames * m_mixInfo.m_channelMap.m_channelCount * sizeof(float)); + m_curBufFrame += remRenderFrames; + f += remRenderFrames; + } } - res = m_renderClient->ReleaseBuffer(frames, flags); + res = m_renderClient->ReleaseBuffer(frames, 0); if (FAILED(res)) { m_rebuild = true;