code.delx.au - pulseaudio/blob - src/modules/echo-cancel/adrian-aec.c

   1 /* aec.cpp
   2  *
   3  * Copyright (C) DFS Deutsche Flugsicherung (2004, 2005).
   4  * All Rights Reserved.
   5  *
   6  * Acoustic Echo Cancellation NLMS-pw algorithm
   7  *
   8  * Version 0.3 filter created with www.dsptutor.freeuk.com
   9  * Version 0.3.1 Allow change of stability parameter delta
  10  * Version 0.4 Leaky Normalized LMS - pre whitening algorithm
  11  */
  12
  13 #ifndef _GNU_SOURCE
  14 #define _GNU_SOURCE
  15 #endif
  16
  17 #include <math.h>
  18 #include <string.h>
  19 #include <stdint.h>
  20
  21 #include <pulse/xmalloc.h>
  22
  23 #include "adrian-aec.h"
  24
  25 #ifndef DISABLE_ORC
  26 #include "adrian-aec-orc-gen.h"
  27 #endif
  28
  29 #ifdef __SSE__
  30 #include <xmmintrin.h>
  31 #endif
  32
  33 /* Vector Dot Product */
  34 static REAL dotp(REAL a[], REAL b[])
  35 {
  36   REAL sum0 = 0.0f, sum1 = 0.0f;
  37   int j;
  38
  39   for (j = 0; j < NLMS_LEN; j += 2) {
  40     // optimize: partial loop unrolling
  41     sum0 += a[j] * b[j];
  42     sum1 += a[j + 1] * b[j + 1];
  43   }
  44   return sum0 + sum1;
  45 }
  46
  47 static REAL dotp_sse(REAL a[], REAL b[])
  48 {
  49 #ifdef __SSE__
  50   /* This is taken from speex's inner product implementation */
  51   int j;
  52   REAL sum;
  53   __m128 acc = _mm_setzero_ps();
  54
  55   for (j=0;j<NLMS_LEN;j+=8)
  56   {
  57     acc = _mm_add_ps(acc, _mm_mul_ps(_mm_load_ps(a+j), _mm_loadu_ps(b+j)));
  58     acc = _mm_add_ps(acc, _mm_mul_ps(_mm_load_ps(a+j+4), _mm_loadu_ps(b+j+4)));
  59   }
  60   acc = _mm_add_ps(acc, _mm_movehl_ps(acc, acc));
  61   acc = _mm_add_ss(acc, _mm_shuffle_ps(acc, acc, 0x55));
  62   _mm_store_ss(&sum, acc);
  63
  64   return sum;
  65 #else
  66   return dotp(a, b);
  67 #endif
  68 }
  69
  70
  71 AEC* AEC_init(int RATE, int have_vector)
  72 {
  73   AEC *a = pa_xnew0(AEC, 1);
  74   a->j = NLMS_EXT;
  75   AEC_setambient(a, NoiseFloor);
  76   a->dfast = a->dslow = M75dB_PCM;
  77   a->xfast = a->xslow = M80dB_PCM;
  78   a->gain = 1.0f;
  79   a->Fx = IIR1_init(2000.0f/RATE);
  80   a->Fe = IIR1_init(2000.0f/RATE);
  81   a->cutoff = FIR_HP_300Hz_init();
  82   a->acMic = IIR_HP_init();
  83   a->acSpk = IIR_HP_init();
  84
  85   a->aes_y2 = M0dB;
  86
  87   a->fdwdisplay = -1;
  88
  89   if (have_vector) {
  90       /* Get a 16-byte aligned location */
  91       a->w = (REAL *) (((uintptr_t) a->w_arr) - (((uintptr_t) a->w_arr) % 16) + 16);
  92       a->dotp = dotp_sse;
  93   } else {
  94       /* We don't care about alignment, just use the array as-is */
  95       a->w = a->w_arr;
  96       a->dotp = dotp;
  97   }
  98
  99   return a;
 100 }
 101
 102 void AEC_done(AEC *a) {
 103     pa_assert(a);
 104
 105     pa_xfree(a->Fx);
 106     pa_xfree(a->Fe);
 107     pa_xfree(a->acMic);
 108     pa_xfree(a->acSpk);
 109     pa_xfree(a->cutoff);
 110     pa_xfree(a);
 111 }
 112
 113 // Adrian soft decision DTD
 114 // (Dual Average Near-End to Far-End signal Ratio DTD)
 115 // This algorithm uses exponential smoothing with differnt
 116 // ageing parameters to get fast and slow near-end and far-end
 117 // signal averages. The ratio of NFRs term
 118 // (dfast / xfast) / (dslow / xslow) is used to compute the stepsize
 119 // A ratio value of 2.5 is mapped to stepsize 0, a ratio of 0 is
 120 // mapped to 1.0 with a limited linear function.
 121 static float AEC_dtd(AEC *a, REAL d, REAL x)
 122 {
 123   float ratio, stepsize;
 124
 125   // fast near-end and far-end average
 126   a->dfast += ALPHAFAST * (fabsf(d) - a->dfast);
 127   a->xfast += ALPHAFAST * (fabsf(x) - a->xfast);
 128
 129   // slow near-end and far-end average
 130   a->dslow += ALPHASLOW * (fabsf(d) - a->dslow);
 131   a->xslow += ALPHASLOW * (fabsf(x) - a->xslow);
 132
 133   if (a->xfast < M70dB_PCM) {
 134     return 0.0f;   // no Spk signal
 135   }
 136
 137   if (a->dfast < M70dB_PCM) {
 138     return 0.0f;   // no Mic signal
 139   }
 140
 141   // ratio of NFRs
 142   ratio = (a->dfast * a->xslow) / (a->dslow * a->xfast);
 143
 144   // Linear interpolation with clamping at the limits
 145   if (ratio < STEPX1)
 146     stepsize = STEPY1;
 147   else if (ratio > STEPX2)
 148     stepsize = STEPY2;
 149   else
 150     stepsize = STEPY1 + (STEPY2 - STEPY1) * (ratio - STEPX1) / (STEPX2 - STEPX1);
 151
 152   return stepsize;
 153 }
 154
 155
 156 static void AEC_leaky(AEC *a)
 157 // The xfast signal is used to charge the hangover timer to Thold.
 158 // When hangover expires (no Spk signal for some time) the vector w
 159 // is erased. This is my implementation of Leaky NLMS.
 160 {
 161   if (a->xfast >= M70dB_PCM) {
 162     // vector w is valid for hangover Thold time
 163     a->hangover = Thold;
 164   } else {
 165     if (a->hangover > 1) {
 166       --(a->hangover);
 167     } else if (1 == a->hangover) {
 168       --(a->hangover);
 169       // My Leaky NLMS is to erase vector w when hangover expires
 170       memset(a->w_arr, 0, sizeof(a->w_arr));
 171     }
 172   }
 173 }
 174
 175
 176 #if 0
 177 void AEC::openwdisplay() {
 178   // open TCP connection to program wdisplay.tcl
 179   fdwdisplay = socket_async("127.0.0.1", 50999);
 180 };
 181 #endif
 182
 183
 184 static REAL AEC_nlms_pw(AEC *a, REAL d, REAL x_, float stepsize)
 185 {
 186   REAL e;
 187   REAL ef;
 188   a->x[a->j] = x_;
 189   a->xf[a->j] = IIR1_highpass(a->Fx, x_);     // pre-whitening of x
 190
 191   // calculate error value
 192   // (mic signal - estimated mic signal from spk signal)
 193   e = d;
 194   if (a->hangover > 0) {
 195     e -= a->dotp(a->w, a->x + a->j);
 196   }
 197   ef = IIR1_highpass(a->Fe, e);     // pre-whitening of e
 198
 199   // optimize: iterative dotp(xf, xf)
 200   a->dotp_xf_xf += (a->xf[a->j] * a->xf[a->j] - a->xf[a->j + NLMS_LEN - 1] * a->xf[a->j + NLMS_LEN - 1]);
 201
 202   if (stepsize > 0.0f) {
 203     // calculate variable step size
 204     REAL mikro_ef = stepsize * ef / a->dotp_xf_xf;
 205
 206 #ifdef DISABLE_ORC
 207     // update tap weights (filter learning)
 208     int i;
 209     for (i = 0; i < NLMS_LEN; i += 2) {
 210       // optimize: partial loop unrolling
 211       a->w[i] += mikro_ef * a->xf[i + a->j];
 212       a->w[i + 1] += mikro_ef * a->xf[i + a->j + 1];
 213     }
 214 #else
 215     update_tap_weights(a->w, &a->xf[a->j], mikro_ef, NLMS_LEN);
 216 #endif
 217   }
 218
 219   if (--(a->j) < 0) {
 220     // optimize: decrease number of memory copies
 221     a->j = NLMS_EXT;
 222     memmove(a->x + a->j + 1, a->x, (NLMS_LEN - 1) * sizeof(REAL));
 223     memmove(a->xf + a->j + 1, a->xf, (NLMS_LEN - 1) * sizeof(REAL));
 224   }
 225
 226   // Saturation
 227   if (e > MAXPCM) {
 228     return MAXPCM;
 229   } else if (e < -MAXPCM) {
 230     return -MAXPCM;
 231   } else {
 232     return e;
 233   }
 234 }
 235
 236
 237 int AEC_doAEC(AEC *a, int d_, int x_)
 238 {
 239   REAL d = (REAL) d_;
 240   REAL x = (REAL) x_;
 241
 242   // Mic Highpass Filter - to remove DC
 243   d = IIR_HP_highpass(a->acMic, d);
 244
 245   // Mic Highpass Filter - cut-off below 300Hz
 246   d = FIR_HP_300Hz_highpass(a->cutoff, d);
 247
 248   // Amplify, for e.g. Soundcards with -6dB max. volume
 249   d *= a->gain;
 250
 251   // Spk Highpass Filter - to remove DC
 252   x = IIR_HP_highpass(a->acSpk, x);
 253
 254   // Double Talk Detector
 255   a->stepsize = AEC_dtd(a, d, x);
 256
 257   // Leaky (ageing of vector w)
 258   AEC_leaky(a);
 259
 260   // Acoustic Echo Cancellation
 261   d = AEC_nlms_pw(a, d, x, a->stepsize);
 262
 263 #if 0
 264   if (fdwdisplay >= 0) {
 265     if (++dumpcnt >= (WIDEB*RATE/10)) {
 266       // wdisplay creates 10 dumps per seconds = large CPU load!
 267       dumpcnt = 0;
 268       write(fdwdisplay, ws, DUMP_LEN*sizeof(float));
 269       // we don't check return value. This is not production quality!!!
 270       memset(ws, 0, sizeof(ws));
 271     } else {
 272       int i;
 273       for (i = 0; i < DUMP_LEN; i += 2) {
 274         // optimize: partial loop unrolling
 275         ws[i] += w[i];
 276         ws[i + 1] += w[i + 1];
 277       }
 278     }
 279   }
 280 #endif
 281
 282   return (int) d;
 283 }