]>
code.delx.au - pulseaudio/blob - src/modules/echo-cancel/adrian-aec.c
39c2d638660a830e7daf0d1ad7fed14cb837d29b
3 * Copyright (C) DFS Deutsche Flugsicherung (2004, 2005).
6 * Acoustic Echo Cancellation NLMS-pw algorithm
8 * Version 0.3 filter created with www.dsptutor.freeuk.com
9 * Version 0.3.1 Allow change of stability parameter delta
10 * Version 0.4 Leaky Normalized LMS - pre whitening algorithm
16 #include <pulse/xmalloc.h>
18 #include "adrian-aec.h"
21 #include <xmmintrin.h>
24 /* Vector Dot Product */
25 static REAL
dotp(REAL a
[], REAL b
[])
27 REAL sum0
= 0.0, sum1
= 0.0;
30 for (j
= 0; j
< NLMS_LEN
; j
+= 2) {
31 // optimize: partial loop unrolling
33 sum1
+= a
[j
+ 1] * b
[j
+ 1];
38 static REAL
dotp_sse(REAL a
[], REAL b
[]) __attribute__((noinline
));
39 static REAL
dotp_sse(REAL a
[], REAL b
[])
42 /* This is taken from speex's inner product implementation */
45 __m128 acc
= _mm_setzero_ps();
47 for (j
=0;j
<NLMS_LEN
;j
+=8)
49 acc
= _mm_add_ps(acc
, _mm_mul_ps(_mm_load_ps(a
+j
), _mm_loadu_ps(b
+j
)));
50 acc
= _mm_add_ps(acc
, _mm_mul_ps(_mm_load_ps(a
+j
+4), _mm_loadu_ps(b
+j
+4)));
52 acc
= _mm_add_ps(acc
, _mm_movehl_ps(acc
, acc
));
53 acc
= _mm_add_ss(acc
, _mm_shuffle_ps(acc
, acc
, 0x55));
54 _mm_store_ss(&sum
, acc
);
63 AEC
* AEC_init(int RATE
, int have_vector
)
65 AEC
*a
= pa_xnew(AEC
, 1);
67 memset(a
->x
, 0, sizeof(a
->x
));
68 memset(a
->xf
, 0, sizeof(a
->xf
));
69 memset(a
->w
, 0, sizeof(a
->w
));
72 AEC_setambient(a
, NoiseFloor
);
73 a
->dfast
= a
->dslow
= M75dB_PCM
;
74 a
->xfast
= a
->xslow
= M80dB_PCM
;
76 a
->Fx
= IIR1_init(2000.0f
/RATE
);
77 a
->Fe
= IIR1_init(2000.0f
/RATE
);
78 a
->cutoff
= FIR_HP_300Hz_init();
79 a
->acMic
= IIR_HP_init();
80 a
->acSpk
= IIR_HP_init();
86 memset(a
->ws
, 0, sizeof(a
->ws
));
96 // Adrian soft decision DTD
97 // (Dual Average Near-End to Far-End signal Ratio DTD)
98 // This algorithm uses exponential smoothing with differnt
99 // ageing parameters to get fast and slow near-end and far-end
100 // signal averages. The ratio of NFRs term
101 // (dfast / xfast) / (dslow / xslow) is used to compute the stepsize
102 // A ratio value of 2.5 is mapped to stepsize 0, a ratio of 0 is
103 // mapped to 1.0 with a limited linear function.
104 static float AEC_dtd(AEC
*a
, REAL d
, REAL x
)
109 // fast near-end and far-end average
110 a
->dfast
+= ALPHAFAST
* (fabsf(d
) - a
->dfast
);
111 a
->xfast
+= ALPHAFAST
* (fabsf(x
) - a
->xfast
);
113 // slow near-end and far-end average
114 a
->dslow
+= ALPHASLOW
* (fabsf(d
) - a
->dslow
);
115 a
->xslow
+= ALPHASLOW
* (fabsf(x
) - a
->xslow
);
117 if (a
->xfast
< M70dB_PCM
) {
118 return 0.0; // no Spk signal
121 if (a
->dfast
< M70dB_PCM
) {
122 return 0.0; // no Mic signal
126 ratio
= (a
->dfast
* a
->xslow
) / (a
->dslow
* a
->xfast
);
128 // begrenzte lineare Kennlinie
129 M
= (STEPY2
- STEPY1
) / (STEPX2
- STEPX1
);
130 if (ratio
< STEPX1
) {
132 } else if (ratio
> STEPX2
) {
135 // Punktrichtungsform einer Geraden
136 stepsize
= M
* (ratio
- STEPX1
) + STEPY1
;
143 static void AEC_leaky(AEC
*a
)
144 // The xfast signal is used to charge the hangover timer to Thold.
145 // When hangover expires (no Spk signal for some time) the vector w
146 // is erased. This is my implementation of Leaky NLMS.
148 if (a
->xfast
>= M70dB_PCM
) {
149 // vector w is valid for hangover Thold time
152 if (a
->hangover
> 1) {
154 } else if (1 == a
->hangover
) {
156 // My Leaky NLMS is to erase vector w when hangover expires
157 memset(a
->w
, 0, sizeof(a
->w
));
164 void AEC::openwdisplay() {
165 // open TCP connection to program wdisplay.tcl
166 fdwdisplay
= socket_async("127.0.0.1", 50999);
171 static REAL
AEC_nlms_pw(AEC
*a
, REAL d
, REAL x_
, float stepsize
)
176 a
->xf
[a
->j
] = IIR1_highpass(a
->Fx
, x_
); // pre-whitening of x
178 // calculate error value
179 // (mic signal - estimated mic signal from spk signal)
181 if (a
->hangover
> 0) {
182 e
-= a
->dotp(a
->w
, a
->x
+ a
->j
);
184 ef
= IIR1_highpass(a
->Fe
, e
); // pre-whitening of e
186 // optimize: iterative dotp(xf, xf)
187 a
->dotp_xf_xf
+= (a
->xf
[a
->j
] * a
->xf
[a
->j
] - a
->xf
[a
->j
+ NLMS_LEN
- 1] * a
->xf
[a
->j
+ NLMS_LEN
- 1]);
189 if (stepsize
> 0.0) {
190 // calculate variable step size
191 REAL mikro_ef
= stepsize
* ef
/ a
->dotp_xf_xf
;
193 // update tap weights (filter learning)
195 for (i
= 0; i
< NLMS_LEN
; i
+= 2) {
196 // optimize: partial loop unrolling
197 a
->w
[i
] += mikro_ef
* a
->xf
[i
+ a
->j
];
198 a
->w
[i
+ 1] += mikro_ef
* a
->xf
[i
+ a
->j
+ 1];
203 // optimize: decrease number of memory copies
205 memmove(a
->x
+ a
->j
+ 1, a
->x
, (NLMS_LEN
- 1) * sizeof(REAL
));
206 memmove(a
->xf
+ a
->j
+ 1, a
->xf
, (NLMS_LEN
- 1) * sizeof(REAL
));
212 } else if (e
< -MAXPCM
) {
220 int AEC_doAEC(AEC
*a
, int d_
, int x_
)
225 // Mic Highpass Filter - to remove DC
226 d
= IIR_HP_highpass(a
->acMic
, d
);
228 // Mic Highpass Filter - cut-off below 300Hz
229 d
= FIR_HP_300Hz_highpass(a
->cutoff
, d
);
231 // Amplify, for e.g. Soundcards with -6dB max. volume
234 // Spk Highpass Filter - to remove DC
235 x
= IIR_HP_highpass(a
->acSpk
, x
);
237 // Double Talk Detector
238 a
->stepsize
= AEC_dtd(a
, d
, x
);
240 // Leaky (ageing of vector w)
243 // Acoustic Echo Cancellation
244 d
= AEC_nlms_pw(a
, d
, x
, a
->stepsize
);
247 if (fdwdisplay
>= 0) {
248 if (++dumpcnt
>= (WIDEB
*RATE
/10)) {
249 // wdisplay creates 10 dumps per seconds = large CPU load!
251 write(fdwdisplay
, ws
, DUMP_LEN
*sizeof(float));
252 // we don't check return value. This is not production quality!!!
253 memset(ws
, 0, sizeof(ws
));
256 for (i
= 0; i
< DUMP_LEN
; i
+= 2) {
257 // optimize: partial loop unrolling
259 ws
[i
+ 1] += w
[i
+ 1];