]>
code.delx.au - pulseaudio/blob - src/modules/echo-cancel/adrian-aec.c
3 * Copyright (C) DFS Deutsche Flugsicherung (2004, 2005).
6 * Acoustic Echo Cancellation NLMS-pw algorithm
8 * Version 0.3 filter created with www.dsptutor.freeuk.com
9 * Version 0.3.1 Allow change of stability parameter delta
10 * Version 0.4 Leaky Normalized LMS - pre whitening algorithm
16 #include <pulse/xmalloc.h>
18 #include "adrian-aec.h"
21 #include "adrian-aec-orc-gen.h"
25 #include <xmmintrin.h>
28 /* Vector Dot Product */
29 static REAL
dotp(REAL a
[], REAL b
[])
31 REAL sum0
= 0.0, sum1
= 0.0;
34 for (j
= 0; j
< NLMS_LEN
; j
+= 2) {
35 // optimize: partial loop unrolling
37 sum1
+= a
[j
+ 1] * b
[j
+ 1];
42 static REAL
dotp_sse(REAL a
[], REAL b
[]) __attribute__((noinline
));
43 static REAL
dotp_sse(REAL a
[], REAL b
[])
46 /* This is taken from speex's inner product implementation */
49 __m128 acc
= _mm_setzero_ps();
51 for (j
=0;j
<NLMS_LEN
;j
+=8)
53 acc
= _mm_add_ps(acc
, _mm_mul_ps(_mm_load_ps(a
+j
), _mm_loadu_ps(b
+j
)));
54 acc
= _mm_add_ps(acc
, _mm_mul_ps(_mm_load_ps(a
+j
+4), _mm_loadu_ps(b
+j
+4)));
56 acc
= _mm_add_ps(acc
, _mm_movehl_ps(acc
, acc
));
57 acc
= _mm_add_ss(acc
, _mm_shuffle_ps(acc
, acc
, 0x55));
58 _mm_store_ss(&sum
, acc
);
67 AEC
* AEC_init(int RATE
, int have_vector
)
69 AEC
*a
= pa_xnew(AEC
, 1);
71 memset(a
->x
, 0, sizeof(a
->x
));
72 memset(a
->xf
, 0, sizeof(a
->xf
));
73 memset(a
->w
, 0, sizeof(a
->w
));
76 AEC_setambient(a
, NoiseFloor
);
77 a
->dfast
= a
->dslow
= M75dB_PCM
;
78 a
->xfast
= a
->xslow
= M80dB_PCM
;
80 a
->Fx
= IIR1_init(2000.0f
/RATE
);
81 a
->Fe
= IIR1_init(2000.0f
/RATE
);
82 a
->cutoff
= FIR_HP_300Hz_init();
83 a
->acMic
= IIR_HP_init();
84 a
->acSpk
= IIR_HP_init();
90 memset(a
->ws
, 0, sizeof(a
->ws
));
100 // Adrian soft decision DTD
101 // (Dual Average Near-End to Far-End signal Ratio DTD)
102 // This algorithm uses exponential smoothing with differnt
103 // ageing parameters to get fast and slow near-end and far-end
104 // signal averages. The ratio of NFRs term
105 // (dfast / xfast) / (dslow / xslow) is used to compute the stepsize
106 // A ratio value of 2.5 is mapped to stepsize 0, a ratio of 0 is
107 // mapped to 1.0 with a limited linear function.
108 static float AEC_dtd(AEC
*a
, REAL d
, REAL x
)
113 // fast near-end and far-end average
114 a
->dfast
+= ALPHAFAST
* (fabsf(d
) - a
->dfast
);
115 a
->xfast
+= ALPHAFAST
* (fabsf(x
) - a
->xfast
);
117 // slow near-end and far-end average
118 a
->dslow
+= ALPHASLOW
* (fabsf(d
) - a
->dslow
);
119 a
->xslow
+= ALPHASLOW
* (fabsf(x
) - a
->xslow
);
121 if (a
->xfast
< M70dB_PCM
) {
122 return 0.0; // no Spk signal
125 if (a
->dfast
< M70dB_PCM
) {
126 return 0.0; // no Mic signal
130 ratio
= (a
->dfast
* a
->xslow
) / (a
->dslow
* a
->xfast
);
132 // begrenzte lineare Kennlinie
133 M
= (STEPY2
- STEPY1
) / (STEPX2
- STEPX1
);
134 if (ratio
< STEPX1
) {
136 } else if (ratio
> STEPX2
) {
139 // Punktrichtungsform einer Geraden
140 stepsize
= M
* (ratio
- STEPX1
) + STEPY1
;
147 static void AEC_leaky(AEC
*a
)
148 // The xfast signal is used to charge the hangover timer to Thold.
149 // When hangover expires (no Spk signal for some time) the vector w
150 // is erased. This is my implementation of Leaky NLMS.
152 if (a
->xfast
>= M70dB_PCM
) {
153 // vector w is valid for hangover Thold time
156 if (a
->hangover
> 1) {
158 } else if (1 == a
->hangover
) {
160 // My Leaky NLMS is to erase vector w when hangover expires
161 memset(a
->w
, 0, sizeof(a
->w
));
168 void AEC::openwdisplay() {
169 // open TCP connection to program wdisplay.tcl
170 fdwdisplay
= socket_async("127.0.0.1", 50999);
175 static REAL
AEC_nlms_pw(AEC
*a
, REAL d
, REAL x_
, float stepsize
)
180 a
->xf
[a
->j
] = IIR1_highpass(a
->Fx
, x_
); // pre-whitening of x
182 // calculate error value
183 // (mic signal - estimated mic signal from spk signal)
185 if (a
->hangover
> 0) {
186 e
-= a
->dotp(a
->w
, a
->x
+ a
->j
);
188 ef
= IIR1_highpass(a
->Fe
, e
); // pre-whitening of e
190 // optimize: iterative dotp(xf, xf)
191 a
->dotp_xf_xf
+= (a
->xf
[a
->j
] * a
->xf
[a
->j
] - a
->xf
[a
->j
+ NLMS_LEN
- 1] * a
->xf
[a
->j
+ NLMS_LEN
- 1]);
193 if (stepsize
> 0.0) {
194 // calculate variable step size
195 REAL mikro_ef
= stepsize
* ef
/ a
->dotp_xf_xf
;
198 // update tap weights (filter learning)
200 for (i
= 0; i
< NLMS_LEN
; i
+= 2) {
201 // optimize: partial loop unrolling
202 a
->w
[i
] += mikro_ef
* a
->xf
[i
+ a
->j
];
203 a
->w
[i
+ 1] += mikro_ef
* a
->xf
[i
+ a
->j
+ 1];
206 update_tap_weights(a
->w
, &a
->xf
[a
->j
], mikro_ef
, NLMS_LEN
);
211 // optimize: decrease number of memory copies
213 memmove(a
->x
+ a
->j
+ 1, a
->x
, (NLMS_LEN
- 1) * sizeof(REAL
));
214 memmove(a
->xf
+ a
->j
+ 1, a
->xf
, (NLMS_LEN
- 1) * sizeof(REAL
));
220 } else if (e
< -MAXPCM
) {
228 int AEC_doAEC(AEC
*a
, int d_
, int x_
)
233 // Mic Highpass Filter - to remove DC
234 d
= IIR_HP_highpass(a
->acMic
, d
);
236 // Mic Highpass Filter - cut-off below 300Hz
237 d
= FIR_HP_300Hz_highpass(a
->cutoff
, d
);
239 // Amplify, for e.g. Soundcards with -6dB max. volume
242 // Spk Highpass Filter - to remove DC
243 x
= IIR_HP_highpass(a
->acSpk
, x
);
245 // Double Talk Detector
246 a
->stepsize
= AEC_dtd(a
, d
, x
);
248 // Leaky (ageing of vector w)
251 // Acoustic Echo Cancellation
252 d
= AEC_nlms_pw(a
, d
, x
, a
->stepsize
);
255 if (fdwdisplay
>= 0) {
256 if (++dumpcnt
>= (WIDEB
*RATE
/10)) {
257 // wdisplay creates 10 dumps per seconds = large CPU load!
259 write(fdwdisplay
, ws
, DUMP_LEN
*sizeof(float));
260 // we don't check return value. This is not production quality!!!
261 memset(ws
, 0, sizeof(ws
));
264 for (i
= 0; i
< DUMP_LEN
; i
+= 2) {
265 // optimize: partial loop unrolling
267 ws
[i
+ 1] += w
[i
+ 1];