diff --git a/app/src/main/cpp/radio.cpp b/app/src/main/cpp/radio.cpp index 667a41c..5bbe574 100644 --- a/app/src/main/cpp/radio.cpp +++ b/app/src/main/cpp/radio.cpp @@ -55,7 +55,7 @@ struct alignas(16) BiquadBank { // Optimized bulk processing for a single channel inline void processBlock(float* __restrict__ data, int count) { - if (!hasActiveBands()) return; + if (!this -> hasActiveBands()) return; for (int i = 0; i < count; i++) { float x = data[i]; @@ -103,6 +103,7 @@ struct alignas(16) BassFilter { alignas(16) float a0 = 1.2f, a1 = 1.2f, a2 = 1.2f, b1 = 0.0f, b2 = 0.0f; alignas(16) float z1 = 0.0f, z2 = 0.0f; bool active = false; + BiquadBank myBank; inline float process(float x) { if (!active) return x; @@ -116,18 +117,23 @@ struct alignas(16) BassFilter { inline void processNEON(float* __restrict__ data, int count) { #if defined(__ARM_NEON) - if (!active || count < 4) { for(int i=0;i1.2f) y=1.2f; - else if(y<-1.2f) y=-1.2f; - data[i] = y; + if (!active) return; + int i = 0; + for (; i <= count-4; i+=4) { + float32x4_t x = vld1q_f32(data + i); + for(int b=0;b envelope; - envelope = attackMode - ? attackCoef * envelope + (1.0f - attackCoef) * absInput - : releaseCoef * envelope + (1.0f - releaseCoef) * absInput; - - // Soft-knee compression - if (envelope > threshold) { - float gainReduction = threshold + (envelope - threshold) / ratio; - buffer[i] *= (gainReduction / (envelope + 1e-9f)); + const int blockSize = 32; + for(int b=0;bmaxVal) maxVal = absInput; } + bool attackMode = maxVal > envelope; + envelope = attackMode ? attackCoef*envelope + (1-attackCoef)*maxVal + : releaseCoef*envelope + (1-releaseCoef)*maxVal; + float gain = (envelope>threshold)? (threshold + (envelope-threshold)/ratio)/(envelope+1e-9f) : 1.0f; + for(int i=0;i(count)); - if (rms > 0.001f) { - float target = gTargetRMS / rms; - // Smooth gain transition (exponential moving average) - gCurrentGain = gCurrentGain * 0.99f + target * 0.01f; - gCurrentGain = fminf(gCurrentGain, 2.0f); - - // NEON vectorized gain application -#if defined(__ARM_NEON) - float32x4_t gVec = vdupq_n_f32(gCurrentGain); - int j = 0; - for (; j <= count - 4; j += 4) { - float32x4_t v = vld1q_f32(buffer + j); - vst1q_f32(buffer + j, vmulq_f32(v, gVec)); + float32x4_t sumVec = vdupq_n_f32(0.0f); + int j=0; + for(; j<=sz-4; j+=4){ + float32x4_t v = vld1q_f32(buffer + i + j); + sumVec = vmlaq_f32(sumVec, v, v); } + float32x2_t lo = vget_low_f32(sumVec); + float32x2_t hi = vget_high_f32(sumVec); + sumSq = vget_lane_f32(lo,0) + vget_lane_f32(lo,1) + vget_lane_f32(hi,0) + vget_lane_f32(hi,1); + for(; j(sz)); + if(rms > 0.001f){ + float target = gTargetRMS / rms; + gCurrentGain = gCurrentGain*0.99f + target*0.01f; + if(gCurrentGain > 2.0f) gCurrentGain = 2.0f; + +#if defined(__ARM_NEON) + float32x4_t gVec = vdupq_n_f32(gCurrentGain); + int j=0; + for(; j<=sz-4; j+=4){ + float32x4_t v = vld1q_f32(buffer + i + j); + vst1q_f32(buffer + i + j, vmulq_f32(v, gVec)); + } + for(; j