]>
code.delx.au - pulseaudio/blob - src/modules/echo-cancel/adrian-aec.c
3 * Copyright (C) DFS Deutsche Flugsicherung (2004, 2005).
6 * Acoustic Echo Cancellation NLMS-pw algorithm
8 * Version 0.3 filter created with www.dsptutor.freeuk.com
9 * Version 0.3.1 Allow change of stability parameter delta
10 * Version 0.4 Leaky Normalized LMS - pre whitening algorithm
21 #include <pulse/xmalloc.h>
23 #include "adrian-aec.h"
26 #include "adrian-aec-orc-gen.h"
30 #include <xmmintrin.h>
33 /* Vector Dot Product */
34 static REAL
dotp(REAL a
[], REAL b
[])
36 REAL sum0
= 0.0f
, sum1
= 0.0f
;
39 for (j
= 0; j
< NLMS_LEN
; j
+= 2) {
40 // optimize: partial loop unrolling
42 sum1
+= a
[j
+ 1] * b
[j
+ 1];
47 static REAL
dotp_sse(REAL a
[], REAL b
[])
50 /* This is taken from speex's inner product implementation */
53 __m128 acc
= _mm_setzero_ps();
55 for (j
=0;j
<NLMS_LEN
;j
+=8)
57 acc
= _mm_add_ps(acc
, _mm_mul_ps(_mm_load_ps(a
+j
), _mm_loadu_ps(b
+j
)));
58 acc
= _mm_add_ps(acc
, _mm_mul_ps(_mm_load_ps(a
+j
+4), _mm_loadu_ps(b
+j
+4)));
60 acc
= _mm_add_ps(acc
, _mm_movehl_ps(acc
, acc
));
61 acc
= _mm_add_ss(acc
, _mm_shuffle_ps(acc
, acc
, 0x55));
62 _mm_store_ss(&sum
, acc
);
71 AEC
* AEC_init(int RATE
, int have_vector
)
73 AEC
*a
= pa_xnew0(AEC
, 1);
75 AEC_setambient(a
, NoiseFloor
);
76 a
->dfast
= a
->dslow
= M75dB_PCM
;
77 a
->xfast
= a
->xslow
= M80dB_PCM
;
79 a
->Fx
= IIR1_init(2000.0f
/RATE
);
80 a
->Fe
= IIR1_init(2000.0f
/RATE
);
81 a
->cutoff
= FIR_HP_300Hz_init();
82 a
->acMic
= IIR_HP_init();
83 a
->acSpk
= IIR_HP_init();
90 /* Get a 16-byte aligned location */
91 a
->w
= (REAL
*) (((uintptr_t) a
->w_arr
) - (((uintptr_t) a
->w_arr
) % 16) + 16);
94 /* We don't care about alignment, just use the array as-is */
102 void AEC_done(AEC
*a
) {
113 // Adrian soft decision DTD
114 // (Dual Average Near-End to Far-End signal Ratio DTD)
115 // This algorithm uses exponential smoothing with differnt
116 // ageing parameters to get fast and slow near-end and far-end
117 // signal averages. The ratio of NFRs term
118 // (dfast / xfast) / (dslow / xslow) is used to compute the stepsize
119 // A ratio value of 2.5 is mapped to stepsize 0, a ratio of 0 is
120 // mapped to 1.0 with a limited linear function.
121 static float AEC_dtd(AEC
*a
, REAL d
, REAL x
)
123 float ratio
, stepsize
;
125 // fast near-end and far-end average
126 a
->dfast
+= ALPHAFAST
* (fabsf(d
) - a
->dfast
);
127 a
->xfast
+= ALPHAFAST
* (fabsf(x
) - a
->xfast
);
129 // slow near-end and far-end average
130 a
->dslow
+= ALPHASLOW
* (fabsf(d
) - a
->dslow
);
131 a
->xslow
+= ALPHASLOW
* (fabsf(x
) - a
->xslow
);
133 if (a
->xfast
< M70dB_PCM
) {
134 return 0.0f
; // no Spk signal
137 if (a
->dfast
< M70dB_PCM
) {
138 return 0.0f
; // no Mic signal
142 ratio
= (a
->dfast
* a
->xslow
) / (a
->dslow
* a
->xfast
);
144 // Linear interpolation with clamping at the limits
147 else if (ratio
> STEPX2
)
150 stepsize
= STEPY1
+ (STEPY2
- STEPY1
) * (ratio
- STEPX1
) / (STEPX2
- STEPX1
);
156 static void AEC_leaky(AEC
*a
)
157 // The xfast signal is used to charge the hangover timer to Thold.
158 // When hangover expires (no Spk signal for some time) the vector w
159 // is erased. This is my implementation of Leaky NLMS.
161 if (a
->xfast
>= M70dB_PCM
) {
162 // vector w is valid for hangover Thold time
165 if (a
->hangover
> 1) {
167 } else if (1 == a
->hangover
) {
169 // My Leaky NLMS is to erase vector w when hangover expires
170 memset(a
->w_arr
, 0, sizeof(a
->w_arr
));
177 void AEC::openwdisplay() {
178 // open TCP connection to program wdisplay.tcl
179 fdwdisplay
= socket_async("127.0.0.1", 50999);
184 static REAL
AEC_nlms_pw(AEC
*a
, REAL d
, REAL x_
, float stepsize
)
189 a
->xf
[a
->j
] = IIR1_highpass(a
->Fx
, x_
); // pre-whitening of x
191 // calculate error value
192 // (mic signal - estimated mic signal from spk signal)
194 if (a
->hangover
> 0) {
195 e
-= a
->dotp(a
->w
, a
->x
+ a
->j
);
197 ef
= IIR1_highpass(a
->Fe
, e
); // pre-whitening of e
199 // optimize: iterative dotp(xf, xf)
200 a
->dotp_xf_xf
+= (a
->xf
[a
->j
] * a
->xf
[a
->j
] - a
->xf
[a
->j
+ NLMS_LEN
- 1] * a
->xf
[a
->j
+ NLMS_LEN
- 1]);
202 if (stepsize
> 0.0f
) {
203 // calculate variable step size
204 REAL mikro_ef
= stepsize
* ef
/ a
->dotp_xf_xf
;
207 // update tap weights (filter learning)
209 for (i
= 0; i
< NLMS_LEN
; i
+= 2) {
210 // optimize: partial loop unrolling
211 a
->w
[i
] += mikro_ef
* a
->xf
[i
+ a
->j
];
212 a
->w
[i
+ 1] += mikro_ef
* a
->xf
[i
+ a
->j
+ 1];
215 update_tap_weights(a
->w
, &a
->xf
[a
->j
], mikro_ef
, NLMS_LEN
);
220 // optimize: decrease number of memory copies
222 memmove(a
->x
+ a
->j
+ 1, a
->x
, (NLMS_LEN
- 1) * sizeof(REAL
));
223 memmove(a
->xf
+ a
->j
+ 1, a
->xf
, (NLMS_LEN
- 1) * sizeof(REAL
));
229 } else if (e
< -MAXPCM
) {
237 int AEC_doAEC(AEC
*a
, int d_
, int x_
)
242 // Mic Highpass Filter - to remove DC
243 d
= IIR_HP_highpass(a
->acMic
, d
);
245 // Mic Highpass Filter - cut-off below 300Hz
246 d
= FIR_HP_300Hz_highpass(a
->cutoff
, d
);
248 // Amplify, for e.g. Soundcards with -6dB max. volume
251 // Spk Highpass Filter - to remove DC
252 x
= IIR_HP_highpass(a
->acSpk
, x
);
254 // Double Talk Detector
255 a
->stepsize
= AEC_dtd(a
, d
, x
);
257 // Leaky (ageing of vector w)
260 // Acoustic Echo Cancellation
261 d
= AEC_nlms_pw(a
, d
, x
, a
->stepsize
);
264 if (fdwdisplay
>= 0) {
265 if (++dumpcnt
>= (WIDEB
*RATE
/10)) {
266 // wdisplay creates 10 dumps per seconds = large CPU load!
268 write(fdwdisplay
, ws
, DUMP_LEN
*sizeof(float));
269 // we don't check return value. This is not production quality!!!
270 memset(ws
, 0, sizeof(ws
));
273 for (i
= 0; i
< DUMP_LEN
; i
+= 2) {
274 // optimize: partial loop unrolling
276 ws
[i
+ 1] += w
[i
+ 1];