code.delx.au - pulseaudio/blob - src/modules/echo-cancel/adrian-aec.c

   1 /* aec.cpp
   2  *
   3  * Copyright (C) DFS Deutsche Flugsicherung (2004, 2005).
   4  * All Rights Reserved.
   5  *
   6  * Acoustic Echo Cancellation NLMS-pw algorithm
   7  *
   8  * Version 0.3 filter created with www.dsptutor.freeuk.com
   9  * Version 0.3.1 Allow change of stability parameter delta
  10  * Version 0.4 Leaky Normalized LMS - pre whitening algorithm
  11  */
  12
  13 #include <math.h>
  14 #include <string.h>
  15
  16 #include <pulse/xmalloc.h>
  17
  18 #include "adrian-aec.h"
  19
  20 #ifdef __SSE__
  21 #include <xmmintrin.h>
  22 #endif
  23
  24 /* Vector Dot Product */
  25 static REAL dotp(REAL a[], REAL b[])
  26 {
  27   REAL sum0 = 0.0, sum1 = 0.0;
  28   int j;
  29
  30   for (j = 0; j < NLMS_LEN; j += 2) {
  31     // optimize: partial loop unrolling
  32     sum0 += a[j] * b[j];
  33     sum1 += a[j + 1] * b[j + 1];
  34   }
  35   return sum0 + sum1;
  36 }
  37
  38 static REAL dotp_sse(REAL a[], REAL b[]) __attribute__((noinline));
  39 static REAL dotp_sse(REAL a[], REAL b[])
  40 {
  41 #ifdef __SSE__
  42   /* This is taken from speex's inner product implementation */
  43   int j;
  44   REAL sum;
  45   __m128 acc = _mm_setzero_ps();
  46
  47   for (j=0;j<NLMS_LEN;j+=8)
  48   {
  49     acc = _mm_add_ps(acc, _mm_mul_ps(_mm_load_ps(a+j), _mm_loadu_ps(b+j)));
  50     acc = _mm_add_ps(acc, _mm_mul_ps(_mm_load_ps(a+j+4), _mm_loadu_ps(b+j+4)));
  51   }
  52   acc = _mm_add_ps(acc, _mm_movehl_ps(acc, acc));
  53   acc = _mm_add_ss(acc, _mm_shuffle_ps(acc, acc, 0x55));
  54   _mm_store_ss(&sum, acc);
  55
  56   return sum;
  57 #else
  58   return dotp(a, b);
  59 #endif
  60 }
  61
  62
  63 AEC* AEC_init(int RATE, int have_vector)
  64 {
  65   AEC *a = pa_xnew(AEC, 1);
  66   a->hangover = 0;
  67   memset(a->x, 0, sizeof(a->x));
  68   memset(a->xf, 0, sizeof(a->xf));
  69   memset(a->w, 0, sizeof(a->w));
  70   a->j = NLMS_EXT;
  71   a->delta = 0.0f;
  72   AEC_setambient(a, NoiseFloor);
  73   a->dfast = a->dslow = M75dB_PCM;
  74   a->xfast = a->xslow = M80dB_PCM;
  75   a->gain = 1.0f;
  76   a->Fx = IIR1_init(2000.0f/RATE);
  77   a->Fe = IIR1_init(2000.0f/RATE);
  78   a->cutoff = FIR_HP_300Hz_init();
  79   a->acMic = IIR_HP_init();
  80   a->acSpk = IIR_HP_init();
  81
  82   a->aes_y2 = M0dB;
  83
  84   a->fdwdisplay = -1;
  85   a->dumpcnt = 0;
  86   memset(a->ws, 0, sizeof(a->ws));
  87
  88   if (have_vector)
  89       a->dotp = dotp_sse;
  90   else
  91       a->dotp = dotp;
  92
  93   return a;
  94 }
  95
  96 // Adrian soft decision DTD
  97 // (Dual Average Near-End to Far-End signal Ratio DTD)
  98 // This algorithm uses exponential smoothing with differnt
  99 // ageing parameters to get fast and slow near-end and far-end
 100 // signal averages. The ratio of NFRs term
 101 // (dfast / xfast) / (dslow / xslow) is used to compute the stepsize
 102 // A ratio value of 2.5 is mapped to stepsize 0, a ratio of 0 is
 103 // mapped to 1.0 with a limited linear function.
 104 static float AEC_dtd(AEC *a, REAL d, REAL x)
 105 {
 106   float stepsize;
 107   float ratio, M;
 108
 109   // fast near-end and far-end average
 110   a->dfast += ALPHAFAST * (fabsf(d) - a->dfast);
 111   a->xfast += ALPHAFAST * (fabsf(x) - a->xfast);
 112
 113   // slow near-end and far-end average
 114   a->dslow += ALPHASLOW * (fabsf(d) - a->dslow);
 115   a->xslow += ALPHASLOW * (fabsf(x) - a->xslow);
 116
 117   if (a->xfast < M70dB_PCM) {
 118     return 0.0;   // no Spk signal
 119   }
 120
 121   if (a->dfast < M70dB_PCM) {
 122     return 0.0;   // no Mic signal
 123   }
 124
 125   // ratio of NFRs
 126   ratio = (a->dfast * a->xslow) / (a->dslow * a->xfast);
 127
 128   // begrenzte lineare Kennlinie
 129   M = (STEPY2 - STEPY1) / (STEPX2 - STEPX1);
 130   if (ratio < STEPX1) {
 131     stepsize = STEPY1;
 132   } else if (ratio > STEPX2) {
 133     stepsize = STEPY2;
 134   } else {
 135     // Punktrichtungsform einer Geraden
 136     stepsize = M * (ratio - STEPX1) + STEPY1;
 137   }
 138
 139   return stepsize;
 140 }
 141
 142
 143 static void AEC_leaky(AEC *a)
 144 // The xfast signal is used to charge the hangover timer to Thold.
 145 // When hangover expires (no Spk signal for some time) the vector w
 146 // is erased. This is my implementation of Leaky NLMS.
 147 {
 148   if (a->xfast >= M70dB_PCM) {
 149     // vector w is valid for hangover Thold time
 150     a->hangover = Thold;
 151   } else {
 152     if (a->hangover > 1) {
 153       --(a->hangover);
 154     } else if (1 == a->hangover) {
 155       --(a->hangover);
 156       // My Leaky NLMS is to erase vector w when hangover expires
 157       memset(a->w, 0, sizeof(a->w));
 158     }
 159   }
 160 }
 161
 162
 163 #if 0
 164 void AEC::openwdisplay() {
 165   // open TCP connection to program wdisplay.tcl
 166   fdwdisplay = socket_async("127.0.0.1", 50999);
 167 };
 168 #endif
 169
 170
 171 static REAL AEC_nlms_pw(AEC *a, REAL d, REAL x_, float stepsize)
 172 {
 173   REAL e;
 174   REAL ef;
 175   a->x[a->j] = x_;
 176   a->xf[a->j] = IIR1_highpass(a->Fx, x_);     // pre-whitening of x
 177
 178   // calculate error value
 179   // (mic signal - estimated mic signal from spk signal)
 180   e = d;
 181   if (a->hangover > 0) {
 182     e -= a->dotp(a->w, a->x + a->j);
 183   }
 184   ef = IIR1_highpass(a->Fe, e);     // pre-whitening of e
 185
 186   // optimize: iterative dotp(xf, xf)
 187   a->dotp_xf_xf += (a->xf[a->j] * a->xf[a->j] - a->xf[a->j + NLMS_LEN - 1] * a->xf[a->j + NLMS_LEN - 1]);
 188
 189   if (stepsize > 0.0) {
 190     // calculate variable step size
 191     REAL mikro_ef = stepsize * ef / a->dotp_xf_xf;
 192
 193     // update tap weights (filter learning)
 194     int i;
 195     for (i = 0; i < NLMS_LEN; i += 2) {
 196       // optimize: partial loop unrolling
 197       a->w[i] += mikro_ef * a->xf[i + a->j];
 198       a->w[i + 1] += mikro_ef * a->xf[i + a->j + 1];
 199     }
 200   }
 201
 202   if (--(a->j) < 0) {
 203     // optimize: decrease number of memory copies
 204     a->j = NLMS_EXT;
 205     memmove(a->x + a->j + 1, a->x, (NLMS_LEN - 1) * sizeof(REAL));
 206     memmove(a->xf + a->j + 1, a->xf, (NLMS_LEN - 1) * sizeof(REAL));
 207   }
 208
 209   // Saturation
 210   if (e > MAXPCM) {
 211     return MAXPCM;
 212   } else if (e < -MAXPCM) {
 213     return -MAXPCM;
 214   } else {
 215     return e;
 216   }
 217 }
 218
 219
 220 int AEC_doAEC(AEC *a, int d_, int x_)
 221 {
 222   REAL d = (REAL) d_;
 223   REAL x = (REAL) x_;
 224
 225   // Mic Highpass Filter - to remove DC
 226   d = IIR_HP_highpass(a->acMic, d);
 227
 228   // Mic Highpass Filter - cut-off below 300Hz
 229   d = FIR_HP_300Hz_highpass(a->cutoff, d);
 230
 231   // Amplify, for e.g. Soundcards with -6dB max. volume
 232   d *= a->gain;
 233
 234   // Spk Highpass Filter - to remove DC
 235   x = IIR_HP_highpass(a->acSpk, x);
 236
 237   // Double Talk Detector
 238   a->stepsize = AEC_dtd(a, d, x);
 239
 240   // Leaky (ageing of vector w)
 241   AEC_leaky(a);
 242
 243   // Acoustic Echo Cancellation
 244   d = AEC_nlms_pw(a, d, x, a->stepsize);
 245
 246 #if 0
 247   if (fdwdisplay >= 0) {
 248     if (++dumpcnt >= (WIDEB*RATE/10)) {
 249       // wdisplay creates 10 dumps per seconds = large CPU load!
 250       dumpcnt = 0;
 251       write(fdwdisplay, ws, DUMP_LEN*sizeof(float));
 252       // we don't check return value. This is not production quality!!!
 253       memset(ws, 0, sizeof(ws));
 254     } else {
 255       int i;
 256       for (i = 0; i < DUMP_LEN; i += 2) {
 257         // optimize: partial loop unrolling
 258         ws[i] += w[i];
 259         ws[i + 1] += w[i + 1];
 260       }
 261     }
 262   }
 263 #endif
 264
 265   return (int) d;
 266 }