]>
code.delx.au - pulseaudio/blob - src/modules/echo-cancel/adrian-aec.c
3 * Copyright (C) DFS Deutsche Flugsicherung (2004, 2005).
6 * Acoustic Echo Cancellation NLMS-pw algorithm
8 * Version 0.3 filter created with www.dsptutor.freeuk.com
9 * Version 0.3.1 Allow change of stability parameter delta
10 * Version 0.4 Leaky Normalized LMS - pre whitening algorithm
16 #include <pulse/xmalloc.h>
18 #include "adrian-aec.h"
21 #include "adrian-aec-orc-gen.h"
25 #include <xmmintrin.h>
28 /* Vector Dot Product */
29 static REAL
dotp(REAL a
[], REAL b
[])
31 REAL sum0
= 0.0, sum1
= 0.0;
34 for (j
= 0; j
< NLMS_LEN
; j
+= 2) {
35 // optimize: partial loop unrolling
37 sum1
+= a
[j
+ 1] * b
[j
+ 1];
42 static REAL
dotp_sse(REAL a
[], REAL b
[]) __attribute__((noinline
));
43 static REAL
dotp_sse(REAL a
[], REAL b
[])
46 /* This is taken from speex's inner product implementation */
49 __m128 acc
= _mm_setzero_ps();
51 for (j
=0;j
<NLMS_LEN
;j
+=8)
53 acc
= _mm_add_ps(acc
, _mm_mul_ps(_mm_load_ps(a
+j
), _mm_loadu_ps(b
+j
)));
54 acc
= _mm_add_ps(acc
, _mm_mul_ps(_mm_load_ps(a
+j
+4), _mm_loadu_ps(b
+j
+4)));
56 acc
= _mm_add_ps(acc
, _mm_movehl_ps(acc
, acc
));
57 acc
= _mm_add_ss(acc
, _mm_shuffle_ps(acc
, acc
, 0x55));
58 _mm_store_ss(&sum
, acc
);
67 AEC
* AEC_init(int RATE
, int have_vector
)
69 AEC
*a
= pa_xnew(AEC
, 1);
71 memset(a
->x
, 0, sizeof(a
->x
));
72 memset(a
->xf
, 0, sizeof(a
->xf
));
73 memset(a
->w
, 0, sizeof(a
->w
));
76 AEC_setambient(a
, NoiseFloor
);
77 a
->dfast
= a
->dslow
= M75dB_PCM
;
78 a
->xfast
= a
->xslow
= M80dB_PCM
;
80 a
->Fx
= IIR1_init(2000.0f
/RATE
);
81 a
->Fe
= IIR1_init(2000.0f
/RATE
);
82 a
->cutoff
= FIR_HP_300Hz_init();
83 a
->acMic
= IIR_HP_init();
84 a
->acSpk
= IIR_HP_init();
90 memset(a
->ws
, 0, sizeof(a
->ws
));
100 // Adrian soft decision DTD
101 // (Dual Average Near-End to Far-End signal Ratio DTD)
102 // This algorithm uses exponential smoothing with differnt
103 // ageing parameters to get fast and slow near-end and far-end
104 // signal averages. The ratio of NFRs term
105 // (dfast / xfast) / (dslow / xslow) is used to compute the stepsize
106 // A ratio value of 2.5 is mapped to stepsize 0, a ratio of 0 is
107 // mapped to 1.0 with a limited linear function.
108 static float AEC_dtd(AEC
*a
, REAL d
, REAL x
)
110 float ratio
, stepsize
;
112 // fast near-end and far-end average
113 a
->dfast
+= ALPHAFAST
* (fabsf(d
) - a
->dfast
);
114 a
->xfast
+= ALPHAFAST
* (fabsf(x
) - a
->xfast
);
116 // slow near-end and far-end average
117 a
->dslow
+= ALPHASLOW
* (fabsf(d
) - a
->dslow
);
118 a
->xslow
+= ALPHASLOW
* (fabsf(x
) - a
->xslow
);
120 if (a
->xfast
< M70dB_PCM
) {
121 return 0.0; // no Spk signal
124 if (a
->dfast
< M70dB_PCM
) {
125 return 0.0; // no Mic signal
129 ratio
= (a
->dfast
* a
->xslow
) / (a
->dslow
* a
->xfast
);
131 // Linear interpolation with clamping at the limits
134 else if (ratio
> STEPX2
)
137 stepsize
= STEPY1
+ (STEPY2
- STEPY1
) * (ratio
- STEPX1
) / (STEPX2
- STEPX1
);
143 static void AEC_leaky(AEC
*a
)
144 // The xfast signal is used to charge the hangover timer to Thold.
145 // When hangover expires (no Spk signal for some time) the vector w
146 // is erased. This is my implementation of Leaky NLMS.
148 if (a
->xfast
>= M70dB_PCM
) {
149 // vector w is valid for hangover Thold time
152 if (a
->hangover
> 1) {
154 } else if (1 == a
->hangover
) {
156 // My Leaky NLMS is to erase vector w when hangover expires
157 memset(a
->w
, 0, sizeof(a
->w
));
164 void AEC::openwdisplay() {
165 // open TCP connection to program wdisplay.tcl
166 fdwdisplay
= socket_async("127.0.0.1", 50999);
171 static REAL
AEC_nlms_pw(AEC
*a
, REAL d
, REAL x_
, float stepsize
)
176 a
->xf
[a
->j
] = IIR1_highpass(a
->Fx
, x_
); // pre-whitening of x
178 // calculate error value
179 // (mic signal - estimated mic signal from spk signal)
181 if (a
->hangover
> 0) {
182 e
-= a
->dotp(a
->w
, a
->x
+ a
->j
);
184 ef
= IIR1_highpass(a
->Fe
, e
); // pre-whitening of e
186 // optimize: iterative dotp(xf, xf)
187 a
->dotp_xf_xf
+= (a
->xf
[a
->j
] * a
->xf
[a
->j
] - a
->xf
[a
->j
+ NLMS_LEN
- 1] * a
->xf
[a
->j
+ NLMS_LEN
- 1]);
189 if (stepsize
> 0.0) {
190 // calculate variable step size
191 REAL mikro_ef
= stepsize
* ef
/ a
->dotp_xf_xf
;
194 // update tap weights (filter learning)
196 for (i
= 0; i
< NLMS_LEN
; i
+= 2) {
197 // optimize: partial loop unrolling
198 a
->w
[i
] += mikro_ef
* a
->xf
[i
+ a
->j
];
199 a
->w
[i
+ 1] += mikro_ef
* a
->xf
[i
+ a
->j
+ 1];
202 update_tap_weights(a
->w
, &a
->xf
[a
->j
], mikro_ef
, NLMS_LEN
);
207 // optimize: decrease number of memory copies
209 memmove(a
->x
+ a
->j
+ 1, a
->x
, (NLMS_LEN
- 1) * sizeof(REAL
));
210 memmove(a
->xf
+ a
->j
+ 1, a
->xf
, (NLMS_LEN
- 1) * sizeof(REAL
));
216 } else if (e
< -MAXPCM
) {
224 int AEC_doAEC(AEC
*a
, int d_
, int x_
)
229 // Mic Highpass Filter - to remove DC
230 d
= IIR_HP_highpass(a
->acMic
, d
);
232 // Mic Highpass Filter - cut-off below 300Hz
233 d
= FIR_HP_300Hz_highpass(a
->cutoff
, d
);
235 // Amplify, for e.g. Soundcards with -6dB max. volume
238 // Spk Highpass Filter - to remove DC
239 x
= IIR_HP_highpass(a
->acSpk
, x
);
241 // Double Talk Detector
242 a
->stepsize
= AEC_dtd(a
, d
, x
);
244 // Leaky (ageing of vector w)
247 // Acoustic Echo Cancellation
248 d
= AEC_nlms_pw(a
, d
, x
, a
->stepsize
);
251 if (fdwdisplay
>= 0) {
252 if (++dumpcnt
>= (WIDEB
*RATE
/10)) {
253 // wdisplay creates 10 dumps per seconds = large CPU load!
255 write(fdwdisplay
, ws
, DUMP_LEN
*sizeof(float));
256 // we don't check return value. This is not production quality!!!
257 memset(ws
, 0, sizeof(ws
));
260 for (i
= 0; i
< DUMP_LEN
; i
+= 2) {
261 // optimize: partial loop unrolling
263 ws
[i
+ 1] += w
[i
+ 1];