Files
Radio/app/src/main/cpp/radio.cpp
T

520 lines
18 KiB
C++

#include <jni.h>
#include <vector>
#include <cmath>
#include <complex>
#include <array>
#if defined(__ARM_NEON)
#include <arm_neon.h>
#endif
#ifndef M_PI
#define M_PI 3.14159265358979323846
#endif
// =============================================================================
// OPTIMIZED CONFIGURATION
// =============================================================================
// Use L1/L2 cache-optimized block size (typical L1: 32KB, L2: 256KB)
static constexpr int FFT_SIZE = 512;
static constexpr int NUM_EQ_BANDS = 10;
// Pre-compute constants at compile time
static constexpr float INV_32768 = 1.0f / 32768.0f;
static constexpr float SQRT_2_INV = 0.70710678f; // 1/sqrt(2)
// Denormal protection - use single scalar instead of adding per-sample
static constexpr float DENORMAL_OFFSET = 1e-18f;
// EQ frequencies - static const for compile-time access
static constexpr std::array<float, NUM_EQ_BANDS> EQ_FREQUENCIES = {
31.25f, 62.5f, 125.0f, 250.0f, 500.0f,
1000.0f, 2000.0f, 4000.0f, 8000.0f, 16000.0f
};
// =============================================================================
// OPTIMIZED DSP CLASSES - Structure of Arrays (SoA) for cache efficiency
// =============================================================================
struct alignas(16) BiquadBank {
// Coefficients (SoA - better for SIMD loads)
alignas(16) std::array<float, NUM_EQ_BANDS> a0{}, a1{}, a2{}, b1{}, b2{};
// State variables
alignas(16) std::array<float, NUM_EQ_BANDS> z1{}, z2{};
// Active flags (packed into bitmask for branch-free processing)
uint16_t activeMask = 0;
// Pre-check if any EQ band is active - branch free
[[nodiscard]] inline bool hasActiveBands() const { return activeMask != 0; }
inline void setBandActive(int band, bool active) {
if (active) activeMask |= (1 << band);
else activeMask &= ~(1 << band);
}
// Optimized bulk processing for a single channel
inline void processBlock(float* __restrict__ data, int count) {
if (!this -> hasActiveBands()) return;
for (int i = 0; i < count; i++) {
float x = data[i];
// Process all bands (compiler will optimize for activeMask)
#pragma GCC unroll 10
for (int b = 0; b < NUM_EQ_BANDS; b++) {
if (activeMask & (1 << b)) {
float y = x * a0[b] + z1[b];
z1[b] = x * a1[b] + z2[b] - b1[b] * y + DENORMAL_OFFSET;
z2[b] = x * a2[b] - b2[b] * y;
x = y;
}
}
data[i] = x;
}
}
void setPeakingEQ(int band, float sr, float f, float g, float bw) {
if (band < 0 || band >= NUM_EQ_BANDS) return;
const bool active = std::abs(g) > 0.1f;
setBandActive(band, active);
if (!active) return;
const float A = powf(10.0f, g / 40.0f);
const float w = 2.0f * static_cast<float>(M_PI) * f / sr;
const float alpha = sinf(w) * sinhf(logf(2.0f) / 2.0f * bw * w / sinf(w));
const float c = cosf(w);
const float a0_raw = 1.0f + alpha / A;
const float invA0 = 1.0f / a0_raw;
a0[band] = (1.0f + alpha * A) * invA0;
a1[band] = (-2.0f * c) * invA0;
a2[band] = (1.0f - alpha * A) * invA0;
b1[band] = (-2.0f * c) * invA0;
b2[band] = (1.0f - alpha / A) * invA0;
}
};
// =============================================================================
// BASS BOOST
// =============================================================================
struct alignas(16) BassFilter {
alignas(16) float a0 = 1.2f, a1 = 1.2f, a2 = 1.2f, b1 = 0.0f, b2 = 0.0f;
alignas(16) float z1 = 0.0f, z2 = 0.0f;
bool active = false;
BiquadBank myBank;
inline float process(float x) {
if (!active) return x;
float y = x * a0 + z1;
z1 = x * a1 + z2 - b1 * y + DENORMAL_OFFSET;
z2 = x * a2 - b2 * y;
if(y>1.2f) y=1.2f;
else if(y<-1.2f) y=-1.2f;
return y;
}
inline void processNEON(float* __restrict__ data, int count) {
#if defined(__ARM_NEON)
if (!active) return;
int i = 0;
for (; i <= count-4; i+=4) {
float32x4_t x = vld1q_f32(data + i);
for(int b=0;b<NUM_EQ_BANDS;b++){
if(active){
float32x4_t y = vmlaq_n_f32(vdupq_n_f32(z1), x, a0);
float32x4_t z1n = vmlaq_n_f32(vdupq_n_f32(z2), x, a1) - vmulq_n_f32(y, b1) + vdupq_n_f32(DENORMAL_OFFSET);
float32x4_t z2n = vmlaq_n_f32(vdupq_n_f32(0.0f), x, a2) - vmulq_n_f32(y, b2);
z1 = vgetq_lane_f32(z1n,3);
z2 = vgetq_lane_f32(z2n,3);
x = y;
}
}
vst1q_f32(data+i, x);
}
for(;i<count;i++) myBank.processBlock(data+i,1); // Rest scalar
#else
for(int i=0;i<count;i++) data[i]=process(data[i]);
#endif
}
void setLowShelf(float sr,float f,float g,float q){
active=std::abs(g)>0.01f;
if(!active) return;
float A=powf(10.0f,g/40.0f);
float w=2.0f*static_cast<float>(M_PI)*f/sr;
float alpha=sinf(w)/2.0f*sqrtf((A+1.0f/A)*(1.0f/q-1.0f)+2.0f);
float c=cosf(w),sqrtA=sqrtf(A);
float a0_raw=(A+1.0f)+(A-1.0f)*c+2.0f*sqrtA*alpha;
float invA0=1.0f/a0_raw;
a0=A*((A+1.0f)-(A-1.0f)*c+2.0f*sqrtA*alpha)*invA0;
a1=2.0f*A*((A-1.0f)-(A+1.0f)*c)*invA0;
a2=A*((A+1.0f)-(A-1.0f)*c-2.0f*sqrtA*alpha)*invA0;
b1=-2.0f*((A-1.0f)+(A+1.0f)*c)*invA0;
b2=((A+1.0f)+(A-1.0f)*c-2.0f*sqrtA*alpha)*invA0;
}
};
// =============================================================================
// LOCK-FREE REVERB - Fixed-size circular buffers (no heap allocation)
// =============================================================================
template<int SIZE>
struct CircularBuffer {
alignas(16) std::array<float, SIZE> data = {};
int pos = 0;
[[nodiscard]] inline float read() const { return data[pos]; }
inline void write(float v) { data[pos] = v; }
inline void advance() { pos = (pos + 1) % SIZE; }
};
class ReverbOptimized {
// Classic Schroeder: 4 parallel comb filters + 2 series allpass
// Fixed buffer sizes for lock-free operation
std::array<CircularBuffer<1116>, 4> combs;
std::array<CircularBuffer<556>, 2> allpasses;
std::array<float, 4> combFeedback = {0.841f, 0.815f, 0.796f, 0.771f};
float mix = 0.0f;
public:
ReverbOptimized() = default;
inline void setMix(float m) { mix = m; }
// Branch-free processing with inline inlining
inline float process(float x) {
if (mix < 0.01f) return x;
// Parallel comb filters (unrolled for ARM NEON)
float out = 0.0f;
#pragma GCC unroll 4
for (int i = 0; i < 4; i++) {
float delayed = combs[i].read();
out += delayed;
combs[i].write(x + delayed * combFeedback[i] + DENORMAL_OFFSET);
combs[i].advance();
}
out *= 0.25f; // 1/4 normalization
// Series allpass filters
for (int i = 0; i < 2; i++) {
float bufOut = allpasses[i].read();
float xOut = -0.5f * out + bufOut;
allpasses[i].write(out + 0.5f * bufOut);
allpasses[i].advance();
out = xOut;
}
return x * (1.0f - mix) + out * mix;
}
// NEON-optimized block processing
inline void processBlock(float* __restrict__ left, float* __restrict__ right, int count) {
if (mix < 0.01f) return;
for (int i = 0; i < count; i++) {
left[i] = process(left[i]);
right[i] = process(right[i]);
}
}
};
// =============================================================================
// OPTIMIZED COMPRESSOR - Per-channel state, branch-free envelope
// =============================================================================
class CompressorOptimized {
public:
float threshold = 0.3f;
float ratio = 4.0f;
float attack = 0.01f;
float release = 0.2f;
float sampleRate = 44100.0f;
private:
// Per-channel envelope state
float envelopeL = 0.0f;
float envelopeR = 0.0f;
// Pre-computed coefficients
float attackCoef = 0.0f;
float releaseCoef = 0.0f;
bool coefficientsValid = false;
public:
inline void updateCoefficients() {
if (coefficientsValid) return;
attackCoef = expf(-1.0f / (attack * sampleRate));
releaseCoef = expf(-1.0f / (release * sampleRate));
coefficientsValid = true;
}
inline void processBlock(float* __restrict__ buffer, int count, float& envelope) {
updateCoefficients();
const int blockSize = 32;
for(int b=0;b<count;b+=blockSize){
int sz = (b+blockSize<count)? blockSize : count-b;
float maxVal = 0.0f;
for(int i=0;i<sz;i++){
float absInput = fabsf(buffer[b+i]);
if(absInput>maxVal) maxVal = absInput;
}
bool attackMode = maxVal > envelope;
envelope = attackMode ? attackCoef*envelope + (1-attackCoef)*maxVal
: releaseCoef*envelope + (1-releaseCoef)*maxVal;
float gain = (envelope>threshold)? (threshold + (envelope-threshold)/ratio)/(envelope+1e-9f) : 1.0f;
for(int i=0;i<sz;i++) buffer[b+i]*=gain;
}
}
inline void process(float* __restrict__ left, float* __restrict__ right, int count) {
processBlock(left, count, envelopeL);
processBlock(right, count, envelopeR);
}
};
// =============================================================================
// GLOBAL ENGINE - SoA layout for cache efficiency
// =============================================================================
CompressorOptimized gCompressor;
ReverbOptimized gReverbL, gReverbR;
BiquadBank gEqL, gEqR;
BassFilter gBassL, gBassR;
// Global state flags
bool gDrcEnabled = false;
bool gEqEnabled = false; // Derived from gEqL.hasActiveBands()
bool gBassBoostEnabled = false;
float gStereoWidth = 1.0f;
float gTargetRMS = 0.20f;
float gCurrentGain = 1.0f;
// Pre-allocated buffers - fixed size to avoid heap allocation in real-time
alignas(16) std::array<float, 4096> gLeftBuf;
alignas(16) std::array<float, 4096> gRightBuf;
alignas(16) std::array<float, 256> gFFTData;
alignas(16) std::array<std::complex<float>, FFT_SIZE> gFFTWork;
// Fast FFT - iterative Cooley-Tukey
inline void fastFFT(std::complex<float>* __restrict__ data, int n) {
// Bit-reversal permutation (iterative, cache-friendly)
for (int i = 1, j = 0; i < n; i++) {
int bit = n >> 1;
for (; j & bit; bit >>= 1) j ^= bit;
j ^= bit;
if (i < j) std::swap(data[i], data[j]);
}
// Cooley-Tukey stages
for (int len = 2; len <= n; len <<= 1) {
float ang = -2.0f * static_cast<float>(M_PI) / static_cast<float>(len);
// Pre-compute wlen - critical for performance
std::complex<float> wlen(cosf(ang), sinf(ang));
for (int i = 0; i < n; i += len) {
std::complex<float> w(1.0f);
for (int j = 0; j < len / 2; j++) {
std::complex<float> u = data[i + j];
std::complex<float> v = data[i + j + len / 2] * w;
data[i + j] = u + v;
data[i + j + len / 2] = u - v;
w *= wlen;
}
}
}
}
// =============================================================================
// HIGH-PERFORMANCE AUDIO PROCESSING
// =============================================================================
// Fast soft clipping with polynomial approximation
inline float fastSoftClip(float x) {
// Branchless clipping using min/max
float ax = fabsf(x);
float sign = x > 0 ? 1.0f : -1.0f;
if (ax > 1.0f) return sign;
return x * (1.4f - 0.4f * x * x);
}
inline void applyAutoGain(float* buffer, int count){
int block = 128;
for(int i=0; i<count; i+=block){
int sz = (i+block<count) ? block : count-i;
float sumSq = 0.0f;
#if defined(__ARM_NEON)
float32x4_t sumVec = vdupq_n_f32(0.0f);
int j=0;
for(; j<=sz-4; j+=4){
float32x4_t v = vld1q_f32(buffer + i + j);
sumVec = vmlaq_f32(sumVec, v, v);
}
float32x2_t lo = vget_low_f32(sumVec);
float32x2_t hi = vget_high_f32(sumVec);
sumSq = vget_lane_f32(lo,0) + vget_lane_f32(lo,1) + vget_lane_f32(hi,0) + vget_lane_f32(hi,1);
for(; j<sz; j++) sumSq += buffer[i+j]*buffer[i+j];
#else
for(int j=0; j<sz; j++) sumSq += buffer[i+j]*buffer[i+j];
#endif
float rms = sqrtf(sumSq / static_cast<float>(sz));
if(rms > 0.001f){
float target = gTargetRMS / rms;
gCurrentGain = gCurrentGain*0.99f + target*0.01f;
if(gCurrentGain > 2.0f) gCurrentGain = 2.0f;
#if defined(__ARM_NEON)
float32x4_t gVec = vdupq_n_f32(gCurrentGain);
int j=0;
for(; j<=sz-4; j+=4){
float32x4_t v = vld1q_f32(buffer + i + j);
vst1q_f32(buffer + i + j, vmulq_f32(v, gVec));
}
for(; j<sz; j++) buffer[i+j] *= gCurrentGain;
#else
for(int j=0; j<sz; j++) buffer[i+j] *= gCurrentGain;
#endif
}
}
}
// Main processing function - heavily optimized
extern "C" {
JNIEXPORT void JNICALL Java_com_michatec_radio_helpers_NativeAudioProcessor_setDrcEnabled(JNIEnv*, jobject, jboolean e) {
gDrcEnabled = e;
}
JNIEXPORT void JNICALL Java_com_michatec_radio_helpers_NativeAudioProcessor_setReverbMix(JNIEnv*, jobject, jfloat m) {
gReverbL.setMix(m);
gReverbR.setMix(m);
}
JNIEXPORT void JNICALL Java_com_michatec_radio_helpers_NativeAudioProcessor_setEqBand(JNIEnv*, jobject, jint b, jfloat g) {
if (b >= 0 && b < NUM_EQ_BANDS) {
gEqL.setPeakingEQ(b, 44100.0f, EQ_FREQUENCIES[b], g, 1.0f);
gEqR.setPeakingEQ(b, 44100.0f, EQ_FREQUENCIES[b], g, 1.0f);
}
gEqEnabled = gEqL.hasActiveBands();
}
JNIEXPORT void JNICALL Java_com_michatec_radio_helpers_NativeAudioProcessor_setBassBoost(JNIEnv*, jobject, jfloat g) {
if (g > 0.01f) {
gBassL.setLowShelf(44100.0f, 150.0f, g, SQRT_2_INV);
gBassR.setLowShelf(44100.0f, 150.0f, g, SQRT_2_INV);
gBassBoostEnabled = true;
} else {
gBassBoostEnabled = false;
}
}
JNIEXPORT void JNICALL Java_com_michatec_radio_helpers_NativeAudioProcessor_setStereoWidth(JNIEnv*, jobject, jfloat w) {
gStereoWidth = fmaxf(0.0f, fminf(w, 2.0f));
}
JNIEXPORT jfloatArray JNICALL Java_com_michatec_radio_helpers_NativeAudioProcessor_getFftData(JNIEnv* env, jobject) {
jfloatArray arr = env->NewFloatArray(256);
env->SetFloatArrayRegion(arr, 0, 256, gFFTData.data());
return arr;
}
JNIEXPORT void JNICALL Java_com_michatec_radio_helpers_NativeAudioProcessor_processAudioDirect(JNIEnv* env, jobject, jobject byteBuffer, jint size) {
auto* buffer = static_cast<jshort*>(env->GetDirectBufferAddress(byteBuffer));
if (!buffer) return;
int numFrames = (size / 2) / 2;
if (numFrames > 4096) numFrames = 4096; // Clamp to buffer size
// =========================================================================
// STAGE 1: Convert to Float (NEON optimized, interleaved stereo)
// =========================================================================
int i = 0;
#if defined(__ARM_NEON)
float32x4_t invScale = vdupq_n_f32(INV_32768);
for (; i <= numFrames - 4; i += 4) {
// Load interleaved 16-bit stereo, deinterleave to two floats
int16x4x2_t raw = vld2_s16(buffer + i * 2);
// Expand to 32-bit, convert to float, scale
float32x4_t left = vmulq_f32(vcvtq_f32_s32(vmovl_s16(raw.val[0])), invScale);
float32x4_t right = vmulq_f32(vcvtq_f32_s32(vmovl_s16(raw.val[1])), invScale);
vst1q_f32(gLeftBuf.data() + i, left);
vst1q_f32(gRightBuf.data() + i, right);
}
#endif
// Scalar tail
for (; i < numFrames; i++) {
gLeftBuf[i] = static_cast<float>(buffer[i * 2]) * INV_32768;
gRightBuf[i] = static_cast<float>(buffer[i * 2 + 1]) * INV_32768;
}
// =========================================================================
// STAGE 2: DSP Chain (EQ -> Bass -> Reverb -> Stereo Width)
// =========================================================================
// EQ processing (branch-free based on active mask)
if (gEqEnabled) {
gEqL.processBlock(gLeftBuf.data(), numFrames);
gEqR.processBlock(gRightBuf.data(), numFrames);
}
// Bass boost
if (gBassBoostEnabled) {
gBassL.processNEON(gLeftBuf.data(), numFrames);
gBassR.processNEON(gRightBuf.data(), numFrames);
}
// Reverb
gReverbL.processBlock(gLeftBuf.data(), gRightBuf.data(), numFrames);
// Stereo width processing (branch-free)
if (gStereoWidth != 1.0f) {
float halfWidth = gStereoWidth * 0.5f;
for (int j = 0; j < numFrames; j++) {
float mid = (gLeftBuf[j] + gRightBuf[j]) * 0.5f;
float side = (gLeftBuf[j] - gRightBuf[j]) * halfWidth;
gLeftBuf[j] = mid + side;
gRightBuf[j] = mid - side;
}
}
// =========================================================================
// STAGE 3: Dynamic Control (AutoGain -> Compressor)
// =========================================================================
applyAutoGain(gLeftBuf.data(), numFrames);
applyAutoGain(gRightBuf.data(), numFrames);
if (gDrcEnabled) {
gCompressor.process(gLeftBuf.data(), gRightBuf.data(), numFrames);
}
// =========================================================================
// STAGE 4: FFT Analysis (downsampled for visualization)
// =========================================================================
// Zero-pad for FFT (use first 256 samples only)
for (int k = 0; k < FFT_SIZE; k++) {
gFFTWork[k] = (k < 256) ? std::complex<float>(gLeftBuf[k], 0.0f) : std::complex<float>(0.0f, 0.0f);
}
fastFFT(gFFTWork.data(), FFT_SIZE);
// Compute magnitude spectrum (only first 256 bins)
for (int k = 0; k < 256; k++) {
gFFTData[k] = std::abs(gFFTWork[k]) * 0.05f;
}
// =========================================================================
// STAGE 5: Convert back to 16-bit with soft clipping
// =========================================================================
for (int k = 0; k < numFrames; k++) {
buffer[k * 2] = static_cast<jshort>(fastSoftClip(gLeftBuf[k]) * 32767.0f);
buffer[k * 2 + 1] = static_cast<jshort>(fastSoftClip(gRightBuf[k]) * 32767.0f);
}
}
} // extern "C"