]> code.delx.au - pulseaudio/blob - src/modules/echo-cancel/adrian-aec.c
bluetooth: Don't mark device valid before it has an adapter
[pulseaudio] / src / modules / echo-cancel / adrian-aec.c
1 /* aec.cpp
2 *
3 * Copyright (C) DFS Deutsche Flugsicherung (2004, 2005).
4 * All Rights Reserved.
5 *
6 * Acoustic Echo Cancellation NLMS-pw algorithm
7 *
8 * Version 0.3 filter created with www.dsptutor.freeuk.com
9 * Version 0.3.1 Allow change of stability parameter delta
10 * Version 0.4 Leaky Normalized LMS - pre whitening algorithm
11 */
12
13 #ifndef _GNU_SOURCE
14 #define _GNU_SOURCE
15 #endif
16
17 #include <math.h>
18 #include <string.h>
19 #include <stdint.h>
20
21 #include <pulse/xmalloc.h>
22
23 #include "adrian-aec.h"
24
25 #ifndef DISABLE_ORC
26 #include "adrian-aec-orc-gen.h"
27 #endif
28
29 #ifdef __SSE__
30 #include <xmmintrin.h>
31 #endif
32
33 /* Vector Dot Product */
34 static REAL dotp(REAL a[], REAL b[])
35 {
36 REAL sum0 = 0.0f, sum1 = 0.0f;
37 int j;
38
39 for (j = 0; j < NLMS_LEN; j += 2) {
40 // optimize: partial loop unrolling
41 sum0 += a[j] * b[j];
42 sum1 += a[j + 1] * b[j + 1];
43 }
44 return sum0 + sum1;
45 }
46
47 static REAL dotp_sse(REAL a[], REAL b[])
48 {
49 #ifdef __SSE__
50 /* This is taken from speex's inner product implementation */
51 int j;
52 REAL sum;
53 __m128 acc = _mm_setzero_ps();
54
55 for (j=0;j<NLMS_LEN;j+=8)
56 {
57 acc = _mm_add_ps(acc, _mm_mul_ps(_mm_load_ps(a+j), _mm_loadu_ps(b+j)));
58 acc = _mm_add_ps(acc, _mm_mul_ps(_mm_load_ps(a+j+4), _mm_loadu_ps(b+j+4)));
59 }
60 acc = _mm_add_ps(acc, _mm_movehl_ps(acc, acc));
61 acc = _mm_add_ss(acc, _mm_shuffle_ps(acc, acc, 0x55));
62 _mm_store_ss(&sum, acc);
63
64 return sum;
65 #else
66 return dotp(a, b);
67 #endif
68 }
69
70
71 AEC* AEC_init(int RATE, int have_vector)
72 {
73 AEC *a = pa_xnew0(AEC, 1);
74 a->j = NLMS_EXT;
75 AEC_setambient(a, NoiseFloor);
76 a->dfast = a->dslow = M75dB_PCM;
77 a->xfast = a->xslow = M80dB_PCM;
78 a->gain = 1.0f;
79 a->Fx = IIR1_init(2000.0f/RATE);
80 a->Fe = IIR1_init(2000.0f/RATE);
81 a->cutoff = FIR_HP_300Hz_init();
82 a->acMic = IIR_HP_init();
83 a->acSpk = IIR_HP_init();
84
85 a->aes_y2 = M0dB;
86
87 a->fdwdisplay = -1;
88
89 if (have_vector) {
90 /* Get a 16-byte aligned location */
91 a->w = (REAL *) (((uintptr_t) a->w_arr) - (((uintptr_t) a->w_arr) % 16) + 16);
92 a->dotp = dotp_sse;
93 } else {
94 /* We don't care about alignment, just use the array as-is */
95 a->w = a->w_arr;
96 a->dotp = dotp;
97 }
98
99 return a;
100 }
101
102 void AEC_done(AEC *a) {
103 pa_assert(a);
104
105 pa_xfree(a->Fx);
106 pa_xfree(a->Fe);
107 pa_xfree(a->acMic);
108 pa_xfree(a->acSpk);
109 pa_xfree(a->cutoff);
110 pa_xfree(a);
111 }
112
113 // Adrian soft decision DTD
114 // (Dual Average Near-End to Far-End signal Ratio DTD)
115 // This algorithm uses exponential smoothing with differnt
116 // ageing parameters to get fast and slow near-end and far-end
117 // signal averages. The ratio of NFRs term
118 // (dfast / xfast) / (dslow / xslow) is used to compute the stepsize
119 // A ratio value of 2.5 is mapped to stepsize 0, a ratio of 0 is
120 // mapped to 1.0 with a limited linear function.
121 static float AEC_dtd(AEC *a, REAL d, REAL x)
122 {
123 float ratio, stepsize;
124
125 // fast near-end and far-end average
126 a->dfast += ALPHAFAST * (fabsf(d) - a->dfast);
127 a->xfast += ALPHAFAST * (fabsf(x) - a->xfast);
128
129 // slow near-end and far-end average
130 a->dslow += ALPHASLOW * (fabsf(d) - a->dslow);
131 a->xslow += ALPHASLOW * (fabsf(x) - a->xslow);
132
133 if (a->xfast < M70dB_PCM) {
134 return 0.0f; // no Spk signal
135 }
136
137 if (a->dfast < M70dB_PCM) {
138 return 0.0f; // no Mic signal
139 }
140
141 // ratio of NFRs
142 ratio = (a->dfast * a->xslow) / (a->dslow * a->xfast);
143
144 // Linear interpolation with clamping at the limits
145 if (ratio < STEPX1)
146 stepsize = STEPY1;
147 else if (ratio > STEPX2)
148 stepsize = STEPY2;
149 else
150 stepsize = STEPY1 + (STEPY2 - STEPY1) * (ratio - STEPX1) / (STEPX2 - STEPX1);
151
152 return stepsize;
153 }
154
155
156 static void AEC_leaky(AEC *a)
157 // The xfast signal is used to charge the hangover timer to Thold.
158 // When hangover expires (no Spk signal for some time) the vector w
159 // is erased. This is my implementation of Leaky NLMS.
160 {
161 if (a->xfast >= M70dB_PCM) {
162 // vector w is valid for hangover Thold time
163 a->hangover = Thold;
164 } else {
165 if (a->hangover > 1) {
166 --(a->hangover);
167 } else if (1 == a->hangover) {
168 --(a->hangover);
169 // My Leaky NLMS is to erase vector w when hangover expires
170 memset(a->w_arr, 0, sizeof(a->w_arr));
171 }
172 }
173 }
174
175
176 #if 0
177 void AEC::openwdisplay() {
178 // open TCP connection to program wdisplay.tcl
179 fdwdisplay = socket_async("127.0.0.1", 50999);
180 };
181 #endif
182
183
184 static REAL AEC_nlms_pw(AEC *a, REAL d, REAL x_, float stepsize)
185 {
186 REAL e;
187 REAL ef;
188 a->x[a->j] = x_;
189 a->xf[a->j] = IIR1_highpass(a->Fx, x_); // pre-whitening of x
190
191 // calculate error value
192 // (mic signal - estimated mic signal from spk signal)
193 e = d;
194 if (a->hangover > 0) {
195 e -= a->dotp(a->w, a->x + a->j);
196 }
197 ef = IIR1_highpass(a->Fe, e); // pre-whitening of e
198
199 // optimize: iterative dotp(xf, xf)
200 a->dotp_xf_xf += (a->xf[a->j] * a->xf[a->j] - a->xf[a->j + NLMS_LEN - 1] * a->xf[a->j + NLMS_LEN - 1]);
201
202 if (stepsize > 0.0f) {
203 // calculate variable step size
204 REAL mikro_ef = stepsize * ef / a->dotp_xf_xf;
205
206 #ifdef DISABLE_ORC
207 // update tap weights (filter learning)
208 int i;
209 for (i = 0; i < NLMS_LEN; i += 2) {
210 // optimize: partial loop unrolling
211 a->w[i] += mikro_ef * a->xf[i + a->j];
212 a->w[i + 1] += mikro_ef * a->xf[i + a->j + 1];
213 }
214 #else
215 update_tap_weights(a->w, &a->xf[a->j], mikro_ef, NLMS_LEN);
216 #endif
217 }
218
219 if (--(a->j) < 0) {
220 // optimize: decrease number of memory copies
221 a->j = NLMS_EXT;
222 memmove(a->x + a->j + 1, a->x, (NLMS_LEN - 1) * sizeof(REAL));
223 memmove(a->xf + a->j + 1, a->xf, (NLMS_LEN - 1) * sizeof(REAL));
224 }
225
226 // Saturation
227 if (e > MAXPCM) {
228 return MAXPCM;
229 } else if (e < -MAXPCM) {
230 return -MAXPCM;
231 } else {
232 return e;
233 }
234 }
235
236
237 int AEC_doAEC(AEC *a, int d_, int x_)
238 {
239 REAL d = (REAL) d_;
240 REAL x = (REAL) x_;
241
242 // Mic Highpass Filter - to remove DC
243 d = IIR_HP_highpass(a->acMic, d);
244
245 // Mic Highpass Filter - cut-off below 300Hz
246 d = FIR_HP_300Hz_highpass(a->cutoff, d);
247
248 // Amplify, for e.g. Soundcards with -6dB max. volume
249 d *= a->gain;
250
251 // Spk Highpass Filter - to remove DC
252 x = IIR_HP_highpass(a->acSpk, x);
253
254 // Double Talk Detector
255 a->stepsize = AEC_dtd(a, d, x);
256
257 // Leaky (ageing of vector w)
258 AEC_leaky(a);
259
260 // Acoustic Echo Cancellation
261 d = AEC_nlms_pw(a, d, x, a->stepsize);
262
263 #if 0
264 if (fdwdisplay >= 0) {
265 if (++dumpcnt >= (WIDEB*RATE/10)) {
266 // wdisplay creates 10 dumps per seconds = large CPU load!
267 dumpcnt = 0;
268 write(fdwdisplay, ws, DUMP_LEN*sizeof(float));
269 // we don't check return value. This is not production quality!!!
270 memset(ws, 0, sizeof(ws));
271 } else {
272 int i;
273 for (i = 0; i < DUMP_LEN; i += 2) {
274 // optimize: partial loop unrolling
275 ws[i] += w[i];
276 ws[i + 1] += w[i + 1];
277 }
278 }
279 }
280 #endif
281
282 return (int) d;
283 }