]> code.delx.au - pulseaudio/blob - src/modules/echo-cancel/adrian-aec.c
39c2d638660a830e7daf0d1ad7fed14cb837d29b
[pulseaudio] / src / modules / echo-cancel / adrian-aec.c
1 /* aec.cpp
2 *
3 * Copyright (C) DFS Deutsche Flugsicherung (2004, 2005).
4 * All Rights Reserved.
5 *
6 * Acoustic Echo Cancellation NLMS-pw algorithm
7 *
8 * Version 0.3 filter created with www.dsptutor.freeuk.com
9 * Version 0.3.1 Allow change of stability parameter delta
10 * Version 0.4 Leaky Normalized LMS - pre whitening algorithm
11 */
12
13 #include <math.h>
14 #include <string.h>
15
16 #include <pulse/xmalloc.h>
17
18 #include "adrian-aec.h"
19
20 #ifdef __SSE__
21 #include <xmmintrin.h>
22 #endif
23
24 /* Vector Dot Product */
25 static REAL dotp(REAL a[], REAL b[])
26 {
27 REAL sum0 = 0.0, sum1 = 0.0;
28 int j;
29
30 for (j = 0; j < NLMS_LEN; j += 2) {
31 // optimize: partial loop unrolling
32 sum0 += a[j] * b[j];
33 sum1 += a[j + 1] * b[j + 1];
34 }
35 return sum0 + sum1;
36 }
37
38 static REAL dotp_sse(REAL a[], REAL b[]) __attribute__((noinline));
39 static REAL dotp_sse(REAL a[], REAL b[])
40 {
41 #ifdef __SSE__
42 /* This is taken from speex's inner product implementation */
43 int j;
44 REAL sum;
45 __m128 acc = _mm_setzero_ps();
46
47 for (j=0;j<NLMS_LEN;j+=8)
48 {
49 acc = _mm_add_ps(acc, _mm_mul_ps(_mm_load_ps(a+j), _mm_loadu_ps(b+j)));
50 acc = _mm_add_ps(acc, _mm_mul_ps(_mm_load_ps(a+j+4), _mm_loadu_ps(b+j+4)));
51 }
52 acc = _mm_add_ps(acc, _mm_movehl_ps(acc, acc));
53 acc = _mm_add_ss(acc, _mm_shuffle_ps(acc, acc, 0x55));
54 _mm_store_ss(&sum, acc);
55
56 return sum;
57 #else
58 return dotp(a, b);
59 #endif
60 }
61
62
63 AEC* AEC_init(int RATE, int have_vector)
64 {
65 AEC *a = pa_xnew(AEC, 1);
66 a->hangover = 0;
67 memset(a->x, 0, sizeof(a->x));
68 memset(a->xf, 0, sizeof(a->xf));
69 memset(a->w, 0, sizeof(a->w));
70 a->j = NLMS_EXT;
71 a->delta = 0.0f;
72 AEC_setambient(a, NoiseFloor);
73 a->dfast = a->dslow = M75dB_PCM;
74 a->xfast = a->xslow = M80dB_PCM;
75 a->gain = 1.0f;
76 a->Fx = IIR1_init(2000.0f/RATE);
77 a->Fe = IIR1_init(2000.0f/RATE);
78 a->cutoff = FIR_HP_300Hz_init();
79 a->acMic = IIR_HP_init();
80 a->acSpk = IIR_HP_init();
81
82 a->aes_y2 = M0dB;
83
84 a->fdwdisplay = -1;
85 a->dumpcnt = 0;
86 memset(a->ws, 0, sizeof(a->ws));
87
88 if (have_vector)
89 a->dotp = dotp_sse;
90 else
91 a->dotp = dotp;
92
93 return a;
94 }
95
96 // Adrian soft decision DTD
97 // (Dual Average Near-End to Far-End signal Ratio DTD)
98 // This algorithm uses exponential smoothing with differnt
99 // ageing parameters to get fast and slow near-end and far-end
100 // signal averages. The ratio of NFRs term
101 // (dfast / xfast) / (dslow / xslow) is used to compute the stepsize
102 // A ratio value of 2.5 is mapped to stepsize 0, a ratio of 0 is
103 // mapped to 1.0 with a limited linear function.
104 static float AEC_dtd(AEC *a, REAL d, REAL x)
105 {
106 float stepsize;
107 float ratio, M;
108
109 // fast near-end and far-end average
110 a->dfast += ALPHAFAST * (fabsf(d) - a->dfast);
111 a->xfast += ALPHAFAST * (fabsf(x) - a->xfast);
112
113 // slow near-end and far-end average
114 a->dslow += ALPHASLOW * (fabsf(d) - a->dslow);
115 a->xslow += ALPHASLOW * (fabsf(x) - a->xslow);
116
117 if (a->xfast < M70dB_PCM) {
118 return 0.0; // no Spk signal
119 }
120
121 if (a->dfast < M70dB_PCM) {
122 return 0.0; // no Mic signal
123 }
124
125 // ratio of NFRs
126 ratio = (a->dfast * a->xslow) / (a->dslow * a->xfast);
127
128 // begrenzte lineare Kennlinie
129 M = (STEPY2 - STEPY1) / (STEPX2 - STEPX1);
130 if (ratio < STEPX1) {
131 stepsize = STEPY1;
132 } else if (ratio > STEPX2) {
133 stepsize = STEPY2;
134 } else {
135 // Punktrichtungsform einer Geraden
136 stepsize = M * (ratio - STEPX1) + STEPY1;
137 }
138
139 return stepsize;
140 }
141
142
143 static void AEC_leaky(AEC *a)
144 // The xfast signal is used to charge the hangover timer to Thold.
145 // When hangover expires (no Spk signal for some time) the vector w
146 // is erased. This is my implementation of Leaky NLMS.
147 {
148 if (a->xfast >= M70dB_PCM) {
149 // vector w is valid for hangover Thold time
150 a->hangover = Thold;
151 } else {
152 if (a->hangover > 1) {
153 --(a->hangover);
154 } else if (1 == a->hangover) {
155 --(a->hangover);
156 // My Leaky NLMS is to erase vector w when hangover expires
157 memset(a->w, 0, sizeof(a->w));
158 }
159 }
160 }
161
162
163 #if 0
164 void AEC::openwdisplay() {
165 // open TCP connection to program wdisplay.tcl
166 fdwdisplay = socket_async("127.0.0.1", 50999);
167 };
168 #endif
169
170
171 static REAL AEC_nlms_pw(AEC *a, REAL d, REAL x_, float stepsize)
172 {
173 REAL e;
174 REAL ef;
175 a->x[a->j] = x_;
176 a->xf[a->j] = IIR1_highpass(a->Fx, x_); // pre-whitening of x
177
178 // calculate error value
179 // (mic signal - estimated mic signal from spk signal)
180 e = d;
181 if (a->hangover > 0) {
182 e -= a->dotp(a->w, a->x + a->j);
183 }
184 ef = IIR1_highpass(a->Fe, e); // pre-whitening of e
185
186 // optimize: iterative dotp(xf, xf)
187 a->dotp_xf_xf += (a->xf[a->j] * a->xf[a->j] - a->xf[a->j + NLMS_LEN - 1] * a->xf[a->j + NLMS_LEN - 1]);
188
189 if (stepsize > 0.0) {
190 // calculate variable step size
191 REAL mikro_ef = stepsize * ef / a->dotp_xf_xf;
192
193 // update tap weights (filter learning)
194 int i;
195 for (i = 0; i < NLMS_LEN; i += 2) {
196 // optimize: partial loop unrolling
197 a->w[i] += mikro_ef * a->xf[i + a->j];
198 a->w[i + 1] += mikro_ef * a->xf[i + a->j + 1];
199 }
200 }
201
202 if (--(a->j) < 0) {
203 // optimize: decrease number of memory copies
204 a->j = NLMS_EXT;
205 memmove(a->x + a->j + 1, a->x, (NLMS_LEN - 1) * sizeof(REAL));
206 memmove(a->xf + a->j + 1, a->xf, (NLMS_LEN - 1) * sizeof(REAL));
207 }
208
209 // Saturation
210 if (e > MAXPCM) {
211 return MAXPCM;
212 } else if (e < -MAXPCM) {
213 return -MAXPCM;
214 } else {
215 return e;
216 }
217 }
218
219
220 int AEC_doAEC(AEC *a, int d_, int x_)
221 {
222 REAL d = (REAL) d_;
223 REAL x = (REAL) x_;
224
225 // Mic Highpass Filter - to remove DC
226 d = IIR_HP_highpass(a->acMic, d);
227
228 // Mic Highpass Filter - cut-off below 300Hz
229 d = FIR_HP_300Hz_highpass(a->cutoff, d);
230
231 // Amplify, for e.g. Soundcards with -6dB max. volume
232 d *= a->gain;
233
234 // Spk Highpass Filter - to remove DC
235 x = IIR_HP_highpass(a->acSpk, x);
236
237 // Double Talk Detector
238 a->stepsize = AEC_dtd(a, d, x);
239
240 // Leaky (ageing of vector w)
241 AEC_leaky(a);
242
243 // Acoustic Echo Cancellation
244 d = AEC_nlms_pw(a, d, x, a->stepsize);
245
246 #if 0
247 if (fdwdisplay >= 0) {
248 if (++dumpcnt >= (WIDEB*RATE/10)) {
249 // wdisplay creates 10 dumps per seconds = large CPU load!
250 dumpcnt = 0;
251 write(fdwdisplay, ws, DUMP_LEN*sizeof(float));
252 // we don't check return value. This is not production quality!!!
253 memset(ws, 0, sizeof(ws));
254 } else {
255 int i;
256 for (i = 0; i < DUMP_LEN; i += 2) {
257 // optimize: partial loop unrolling
258 ws[i] += w[i];
259 ws[i + 1] += w[i + 1];
260 }
261 }
262 }
263 #endif
264
265 return (int) d;
266 }