]>
code.delx.au - pulseaudio/blob - src/modules/echo-cancel/adrian-aec.c
3 * Copyright (C) DFS Deutsche Flugsicherung (2004, 2005).
6 * Acoustic Echo Cancellation NLMS-pw algorithm
8 * Version 0.3 filter created with www.dsptutor.freeuk.com
9 * Version 0.3.1 Allow change of stability parameter delta
10 * Version 0.4 Leaky Normalized LMS - pre whitening algorithm
21 #include <pulse/xmalloc.h>
23 #include "adrian-aec.h"
26 #include "adrian-aec-orc-gen.h"
30 #include <xmmintrin.h>
33 /* Vector Dot Product */
34 static REAL
dotp(REAL a
[], REAL b
[])
36 REAL sum0
= 0.0, sum1
= 0.0;
39 for (j
= 0; j
< NLMS_LEN
; j
+= 2) {
40 // optimize: partial loop unrolling
42 sum1
+= a
[j
+ 1] * b
[j
+ 1];
47 static REAL
dotp_sse(REAL a
[], REAL b
[])
50 /* This is taken from speex's inner product implementation */
53 __m128 acc
= _mm_setzero_ps();
55 for (j
=0;j
<NLMS_LEN
;j
+=8)
57 acc
= _mm_add_ps(acc
, _mm_mul_ps(_mm_load_ps(a
+j
), _mm_loadu_ps(b
+j
)));
58 acc
= _mm_add_ps(acc
, _mm_mul_ps(_mm_load_ps(a
+j
+4), _mm_loadu_ps(b
+j
+4)));
60 acc
= _mm_add_ps(acc
, _mm_movehl_ps(acc
, acc
));
61 acc
= _mm_add_ss(acc
, _mm_shuffle_ps(acc
, acc
, 0x55));
62 _mm_store_ss(&sum
, acc
);
71 AEC
* AEC_init(int RATE
, int have_vector
)
73 AEC
*a
= pa_xnew(AEC
, 1);
75 memset(a
->x
, 0, sizeof(a
->x
));
76 memset(a
->xf
, 0, sizeof(a
->xf
));
77 memset(a
->w_arr
, 0, sizeof(a
->w_arr
));
80 AEC_setambient(a
, NoiseFloor
);
81 a
->dfast
= a
->dslow
= M75dB_PCM
;
82 a
->xfast
= a
->xslow
= M80dB_PCM
;
84 a
->Fx
= IIR1_init(2000.0f
/RATE
);
85 a
->Fe
= IIR1_init(2000.0f
/RATE
);
86 a
->cutoff
= FIR_HP_300Hz_init();
87 a
->acMic
= IIR_HP_init();
88 a
->acSpk
= IIR_HP_init();
94 memset(a
->ws
, 0, sizeof(a
->ws
));
97 /* Get a 16-byte aligned location */
98 a
->w
= (REAL
*) (((uintptr_t) a
->w_arr
) + (((uintptr_t) a
->w_arr
) % 16));
101 /* We don't care about alignment, just use the array as-is */
109 // Adrian soft decision DTD
110 // (Dual Average Near-End to Far-End signal Ratio DTD)
111 // This algorithm uses exponential smoothing with differnt
112 // ageing parameters to get fast and slow near-end and far-end
113 // signal averages. The ratio of NFRs term
114 // (dfast / xfast) / (dslow / xslow) is used to compute the stepsize
115 // A ratio value of 2.5 is mapped to stepsize 0, a ratio of 0 is
116 // mapped to 1.0 with a limited linear function.
117 static float AEC_dtd(AEC
*a
, REAL d
, REAL x
)
119 float ratio
, stepsize
;
121 // fast near-end and far-end average
122 a
->dfast
+= ALPHAFAST
* (fabsf(d
) - a
->dfast
);
123 a
->xfast
+= ALPHAFAST
* (fabsf(x
) - a
->xfast
);
125 // slow near-end and far-end average
126 a
->dslow
+= ALPHASLOW
* (fabsf(d
) - a
->dslow
);
127 a
->xslow
+= ALPHASLOW
* (fabsf(x
) - a
->xslow
);
129 if (a
->xfast
< M70dB_PCM
) {
130 return 0.0; // no Spk signal
133 if (a
->dfast
< M70dB_PCM
) {
134 return 0.0; // no Mic signal
138 ratio
= (a
->dfast
* a
->xslow
) / (a
->dslow
* a
->xfast
);
140 // Linear interpolation with clamping at the limits
143 else if (ratio
> STEPX2
)
146 stepsize
= STEPY1
+ (STEPY2
- STEPY1
) * (ratio
- STEPX1
) / (STEPX2
- STEPX1
);
152 static void AEC_leaky(AEC
*a
)
153 // The xfast signal is used to charge the hangover timer to Thold.
154 // When hangover expires (no Spk signal for some time) the vector w
155 // is erased. This is my implementation of Leaky NLMS.
157 if (a
->xfast
>= M70dB_PCM
) {
158 // vector w is valid for hangover Thold time
161 if (a
->hangover
> 1) {
163 } else if (1 == a
->hangover
) {
165 // My Leaky NLMS is to erase vector w when hangover expires
166 memset(a
->w
, 0, sizeof(a
->w
));
173 void AEC::openwdisplay() {
174 // open TCP connection to program wdisplay.tcl
175 fdwdisplay
= socket_async("127.0.0.1", 50999);
180 static REAL
AEC_nlms_pw(AEC
*a
, REAL d
, REAL x_
, float stepsize
)
185 a
->xf
[a
->j
] = IIR1_highpass(a
->Fx
, x_
); // pre-whitening of x
187 // calculate error value
188 // (mic signal - estimated mic signal from spk signal)
190 if (a
->hangover
> 0) {
191 e
-= a
->dotp(a
->w
, a
->x
+ a
->j
);
193 ef
= IIR1_highpass(a
->Fe
, e
); // pre-whitening of e
195 // optimize: iterative dotp(xf, xf)
196 a
->dotp_xf_xf
+= (a
->xf
[a
->j
] * a
->xf
[a
->j
] - a
->xf
[a
->j
+ NLMS_LEN
- 1] * a
->xf
[a
->j
+ NLMS_LEN
- 1]);
198 if (stepsize
> 0.0) {
199 // calculate variable step size
200 REAL mikro_ef
= stepsize
* ef
/ a
->dotp_xf_xf
;
203 // update tap weights (filter learning)
205 for (i
= 0; i
< NLMS_LEN
; i
+= 2) {
206 // optimize: partial loop unrolling
207 a
->w
[i
] += mikro_ef
* a
->xf
[i
+ a
->j
];
208 a
->w
[i
+ 1] += mikro_ef
* a
->xf
[i
+ a
->j
+ 1];
211 update_tap_weights(a
->w
, &a
->xf
[a
->j
], mikro_ef
, NLMS_LEN
);
216 // optimize: decrease number of memory copies
218 memmove(a
->x
+ a
->j
+ 1, a
->x
, (NLMS_LEN
- 1) * sizeof(REAL
));
219 memmove(a
->xf
+ a
->j
+ 1, a
->xf
, (NLMS_LEN
- 1) * sizeof(REAL
));
225 } else if (e
< -MAXPCM
) {
233 int AEC_doAEC(AEC
*a
, int d_
, int x_
)
238 // Mic Highpass Filter - to remove DC
239 d
= IIR_HP_highpass(a
->acMic
, d
);
241 // Mic Highpass Filter - cut-off below 300Hz
242 d
= FIR_HP_300Hz_highpass(a
->cutoff
, d
);
244 // Amplify, for e.g. Soundcards with -6dB max. volume
247 // Spk Highpass Filter - to remove DC
248 x
= IIR_HP_highpass(a
->acSpk
, x
);
250 // Double Talk Detector
251 a
->stepsize
= AEC_dtd(a
, d
, x
);
253 // Leaky (ageing of vector w)
256 // Acoustic Echo Cancellation
257 d
= AEC_nlms_pw(a
, d
, x
, a
->stepsize
);
260 if (fdwdisplay
>= 0) {
261 if (++dumpcnt
>= (WIDEB
*RATE
/10)) {
262 // wdisplay creates 10 dumps per seconds = large CPU load!
264 write(fdwdisplay
, ws
, DUMP_LEN
*sizeof(float));
265 // we don't check return value. This is not production quality!!!
266 memset(ws
, 0, sizeof(ws
));
269 for (i
= 0; i
< DUMP_LEN
; i
+= 2) {
270 // optimize: partial loop unrolling
272 ws
[i
+ 1] += w
[i
+ 1];