#include "amuse/DSPCodec.hpp" #include #include #if __SWITCH__ #include "switch_math.hpp" #endif #undef min #undef max #pragma mark Decoder static const int32_t NibbleToInt[16] = {0,1,2,3,4,5,6,7,-8,-7,-6,-5,-4,-3,-2,-1}; unsigned DSPDecompressFrame(int16_t* out, const uint8_t* in, const int16_t coefs[8][2], int16_t* prev1, int16_t* prev2, unsigned lastSample) { uint8_t cIdx = (in[0]>>4) & 0xf; int32_t factor1 = coefs[cIdx][0]; int32_t factor2 = coefs[cIdx][1]; uint8_t exp = in[0] & 0xf; unsigned ret = 0; for (unsigned s=0 ; s<14 && s>4)&0xf]; sampleData <<= exp; sampleData <<= 11; sampleData += 1024; sampleData += factor1 * ((int32_t)(*prev1)) + factor2 * ((int32_t)(*prev2)); sampleData >>= 11; sampleData = DSPSampClamp(sampleData); out[s] = sampleData; *prev2 = *prev1; *prev1 = sampleData; ++ret; } return ret; } unsigned DSPDecompressFrameStereoStride(int16_t* out, const uint8_t* in, const int16_t coefs[8][2], int16_t* prev1, int16_t* prev2, unsigned lastSample) { uint32_t cIdx = (in[0]>>4) & 0xf; int32_t factor1 = coefs[cIdx][0]; int32_t factor2 = coefs[cIdx][1]; uint32_t exp = in[0] & 0xf; unsigned ret = 0; for (unsigned s=0 ; s<14 && s>4)&0xf]; sampleData <<= exp; sampleData <<= 11; sampleData += 1024; sampleData += factor1 * ((int32_t)(*prev1)) + factor2 * ((int32_t)(*prev2)); sampleData >>= 11; sampleData = DSPSampClamp(sampleData); out[s*2] = sampleData; *prev2 = *prev1; *prev1 = sampleData; ++ret; } return ret; } unsigned DSPDecompressFrameStereoDupe(int16_t* out, const uint8_t* in, const int16_t coefs[8][2], int16_t* prev1, int16_t* prev2, unsigned lastSample) { uint8_t cIdx = (in[0]>>4) & 0xf; int32_t factor1 = coefs[cIdx][0]; int32_t factor2 = coefs[cIdx][1]; uint8_t exp = in[0] & 0xf; unsigned ret = 0; for (unsigned s=0 ; s<14 && s>4)&0xf]; sampleData <<= exp; sampleData <<= 11; sampleData += 1024; sampleData += factor1 * ((int32_t)(*prev1)) + factor2 * ((int32_t)(*prev2)); sampleData >>= 11; sampleData = DSPSampClamp(sampleData); out[s*2] = sampleData; out[s*2+1] = sampleData; *prev2 = *prev1; *prev1 = sampleData; ++ret; } return ret; } unsigned DSPDecompressFrameRanged(int16_t* out, const uint8_t* in, const int16_t coefs[8][2], int16_t* prev1, int16_t* prev2, unsigned firstSample, unsigned lastSample) { uint8_t cIdx = (in[0]>>4) & 0xf; int32_t factor1 = coefs[cIdx][0]; int32_t factor2 = coefs[cIdx][1]; uint8_t exp = in[0] & 0xf; unsigned ret = 0; for (unsigned s=firstSample ; s<14 && s>4)&0xf]; sampleData <<= exp; sampleData <<= 11; sampleData += 1024; sampleData += factor1 * ((int32_t)(*prev1)) + factor2 * ((int32_t)(*prev2)); sampleData >>= 11; sampleData = DSPSampClamp(sampleData); *out++ = sampleData; *prev2 = *prev1; *prev1 = sampleData; ++ret; } return ret; } unsigned DSPDecompressFrameStateOnly(const uint8_t* in, const int16_t coefs[8][2], int16_t* prev1, int16_t* prev2, unsigned lastSample) { uint8_t cIdx = (in[0]>>4) & 0xf; int32_t factor1 = coefs[cIdx][0]; int32_t factor2 = coefs[cIdx][1]; uint8_t exp = in[0] & 0xf; unsigned ret = 0; for (unsigned s=0 ; s<14 && s>4)&0xf]; sampleData <<= exp; sampleData <<= 11; sampleData += 1024; sampleData += factor1 * ((int32_t)(*prev1)) + factor2 * ((int32_t)(*prev2)); sampleData >>= 11; sampleData = DSPSampClamp(sampleData); *prev2 = *prev1; *prev1 = sampleData; ++ret; } return ret; } unsigned DSPDecompressFrameRangedStateOnly(const uint8_t* in, const int16_t coefs[8][2], int16_t* prev1, int16_t* prev2, unsigned firstSample, unsigned lastSample) { uint8_t cIdx = (in[0]>>4) & 0xf; int32_t factor1 = coefs[cIdx][0]; int32_t factor2 = coefs[cIdx][1]; uint8_t exp = in[0] & 0xf; unsigned ret = 0; for (unsigned s=firstSample ; s<14 && s>4)&0xf]; sampleData <<= exp; sampleData <<= 11; sampleData += 1024; sampleData += factor1 * ((int32_t)(*prev1)) + factor2 * ((int32_t)(*prev2)); sampleData >>= 11; sampleData = DSPSampClamp(sampleData); *prev2 = *prev1; *prev1 = sampleData; ++ret; } return ret; } #pragma mark Encoder /* Reference: * https://code.google.com/p/brawltools/source/browse/trunk/BrawlLib/Wii/Audio/AudioConverter.cs */ /* Temporal Vector * A contiguous history of 3 samples starting with * 'current' and going 2 backwards */ typedef double tvec[3]; static inline void InnerProductMerge(tvec vecOut, short pcmBuf[14]) { for (int i=0 ; i<=2 ; i++) { vecOut[i] = 0.0f; for (int x=0 ; x<14 ; x++) vecOut[i] -= pcmBuf[x-i] * pcmBuf[x]; } } static inline void OuterProductMerge(tvec mtxOut[3], short pcmBuf[14]) { for (int x=1 ; x<=2 ; x++) for (int y=1 ; y<=2 ; y++) { mtxOut[x][y] = 0.0; for (int z=0 ; z<14 ; z++) mtxOut[x][y] += pcmBuf[z-x] * pcmBuf[z-y]; } } static bool AnalyzeRanges(tvec mtx[3], int* vecIdxsOut) { double recips[3]; double val, tmp, min, max; /* Get greatest distance from zero */ for (int x=1 ; x<=2 ; x++) { val = std::max(fabs(mtx[x][1]), fabs(mtx[x][2])); if (val < DBL_EPSILON) return true; recips[x] = 1.0 / val; } int maxIndex = 0; for (int i=1 ; i<=2 ; i++) { for (int x=1 ; x= val) { val = tmp; maxIndex = x; } } if (maxIndex != i) { for (int y=1 ; y<=2 ; y++) { tmp = mtx[maxIndex][y]; mtx[maxIndex][y] = mtx[i][y]; mtx[i][y] = tmp; } recips[maxIndex] = recips[i]; } vecIdxsOut[i] = maxIndex; if (mtx[i][i] == 0.0) return true; if (i != 2) { tmp = 1.0 / mtx[i][i]; for (int x=i+1 ; x<=2 ; x++) mtx[x][i] *= tmp; } } /* Get range */ min = 1.0e10; max = 0.0; for (int i=1 ; i<=2 ; i++) { tmp = fabs(mtx[i][i]); if (tmp < min) min = tmp; if (tmp > max) max = tmp; } if (min / max < 1.0e-10) return true; return false; } static void BidirectionalFilter(tvec mtx[3], int* vecIdxs, tvec vecOut) { double tmp; for (int i=1, x=0 ; i<=2 ; i++) { int index = vecIdxs[i]; tmp = vecOut[index]; vecOut[index] = vecOut[i]; if (x != 0) for (int y=x ; y<=i-1 ; y++) tmp -= vecOut[y] * mtx[i][y]; else if (tmp != 0.0) x = i; vecOut[i] = tmp; } for (int i=2 ; i>0 ; i--) { tmp = vecOut[i]; for (int y=i+1 ; y<=2 ; y++) tmp -= vecOut[y] * mtx[i][y]; vecOut[i] = tmp / mtx[i][i]; } vecOut[0] = 1.0; } static bool QuadraticMerge(tvec inOutVec) { double v0, v1, v2 = inOutVec[2]; double tmp = 1.0 - (v2 * v2); if (tmp == 0.0) return true; v0 = (inOutVec[0] - (v2 * v2)) / tmp; v1 = (inOutVec[1] - (inOutVec[1] * v2)) / tmp; inOutVec[0] = v0; inOutVec[1] = v1; return fabs(v1) > 1.0; } static void FinishRecord(tvec in, tvec out) { for (int z=1 ; z<=2 ; z++) { if (in[z] >= 1.0) in[z] = 0.9999999999; else if (in[z] <= -1.0) in[z] = -0.9999999999; } out[0] = 1.0; out[1] = (in[2] * in[1]) + in[1]; out[2] = in[2]; } static void MatrixFilter(tvec src, tvec dst) { tvec mtx[3]; mtx[2][0] = 1.0; for (int i=1 ; i<=2 ; i++) mtx[2][i] = -src[i]; for (int i=2 ; i>0 ; i--) { double val = 1.0 - (mtx[i][i] * mtx[i][i]); for (int y = 1; y <= i; y++) mtx[i-1][y] = ((mtx[i][i] * mtx[i][y]) + mtx[i][y]) / val; } dst[0] = 1.0; for (int i=1 ; i<=2 ; i++) { dst[i] = 0.0; for (int y=1 ; y<=i ; y++) dst[i] += mtx[i][y] * dst[i-y]; } } static void MergeFinishRecord(tvec src, tvec dst) { tvec tmp; double val = src[0]; dst[0] = 1.0; for (int i=1 ; i<=2 ; i++) { double v2 = 0.0; for (int y=1 ; y 0.0) dst[i] = -(v2 + src[i]) / val; else dst[i] = 0.0; tmp[i] = dst[i]; for (int y=1 ; y 0) for (int y=0 ; y<=2 ; y++) bufferList[i][y] /= buffer1[i]; for (int i=0 ; i0 ;) { if (x > 0x3800) /* Full 1024-block frame */ { frameSamples = 0x3800; x -= 0x3800; } else /* Partial frame */ { /* Zero lingering block samples */ frameSamples = x; for (int z=0 ; z<14 && z+frameSamples<0x3800 ; z++) blockBuffer[frameSamples+z] = 0; x = 0; } /* Copy (potentially non-frame-aligned PCM samples into aligned buffer) */ memcpy(blockBuffer, source, frameSamples * sizeof(short)); source += frameSamples; for (int i=0 ; i 10.0) { OuterProductMerge(mtx, pcmHistBuffer[1]); if (!AnalyzeRanges(mtx, vecIdxs)) { BidirectionalFilter(mtx, vecIdxs, vec1); if (!QuadraticMerge(vec1)) { FinishRecord(vec1, records[recordCount]); recordCount++; } } } } } vec1[0] = 1.0; vec1[1] = 0.0; vec1[2] = 0.0; for (int z=0 ; z 0.0) coefsOut[z][0] = (d > 32767.0) ? (short)32767 : (short)lround(d); else coefsOut[z][0] = (d < -32768.0) ? (short)-32768 : (short)lround(d); d = -vecBest[z][2] * 2048.0; if (d > 0.0) coefsOut[z][1] = (d > 32767.0) ? (short)32767 : (short)lround(d); else coefsOut[z][1] = (d < -32768.0) ? (short)-32768 : (short)lround(d); } /* Free memory */ free(records); free(blockBuffer); } /* Make sure source includes the yn values (16 samples total) */ void DSPEncodeFrame(short pcmInOut[16], int sampleCount, unsigned char adpcmOut[8], const short coefsIn[8][2]) { int inSamples[8][16]; int outSamples[8][14]; int bestIndex = 0; int scale[8]; double distAccum[8]; /* Iterate through each coef set, finding the set with the smallest error */ for (int i=0 ; i<8 ; i++) { int v1, v2, v3; int distance, index; /* Set yn values */ inSamples[i][0] = pcmInOut[0]; inSamples[i][1] = pcmInOut[1]; /* Round and clamp samples for this coef set */ distance = 0; for (int s=0 ; s= 32767) ? 32767 : (v2 <= -32768) ? -32768 : v2; /* Compare distance */ if (abs(v3) > abs(distance)) distance = v3; } /* Set initial scale */ for (scale[i]=0; (scale[i]<=12) && ((distance>7) || (distance<-8)); scale[i]++, distance/=2) {} scale[i] = (scale[i] <= 1) ? -1 : scale[i] - 2; do { scale[i]++; distAccum[i] = 0; index = 0; for (int s=0 ; s 0) ? (int)((double)v2 / (1 << scale[i]) + 0.4999999f) : (int)((double)v2 / (1 << scale[i]) - 0.4999999f); /* Clamp sample and set index */ if (v3 < -8) { if (index < (v3 = -8 - v3)) index = v3; v3 = -8; } else if (v3 > 7) { if (index < (v3 -= 7)) index = v3; v3 = 7; } /* Store result */ outSamples[i][s] = v3; /* Round and expand */ v1 = (v1 + ((v3 * (1 << scale[i])) << 11) + 1024) >> 11; /* Clamp and store */ inSamples[i][s + 2] = v2 = (v1 >= 32767) ? 32767 : (v1 <= -32768) ? -32768 : v1; /* Accumulate distance */ v3 = pcmInOut[s + 2] - v2; distAccum[i] += v3 * (double)v3; } for (int x=index+8 ; x>256 ; x>>=1) if (++scale[i] >= 12) scale[i] = 11; } while ((scale[i] < 12) && (index > 1)); } double min = DBL_MAX; for (int i = 0; i < 8; i++) { if (distAccum[i] < min) { min = distAccum[i]; bestIndex = i; } } /* Write converted samples */ for (int s=0 ; s