From c0e0e7ea8c836a0c2695d658ca2abbb166101bb3 Mon Sep 17 00:00:00 2001 From: Peter Meerwald Date: Wed, 16 Apr 2014 11:58:11 +0200 Subject: [PATCH] remap: Split remapping functions into s16 and float implementation The sample format is known when the remap structure is initialized, no runtime decision needed. Signed-off-by: Peter Meerwald --- src/pulsecore/remap.c | 188 +++++++++++++++++++------------------- src/pulsecore/remap_mmx.c | 55 +++++------ src/pulsecore/remap_sse.c | 55 +++++------ 3 files changed, 150 insertions(+), 148 deletions(-) diff --git a/src/pulsecore/remap.c b/src/pulsecore/remap.c index 136e31db..4108fdd6 100644 --- a/src/pulsecore/remap.c +++ b/src/pulsecore/remap.c @@ -33,130 +33,107 @@ #include "remap.h" -static void remap_mono_to_stereo_c(pa_remap_t *m, void *dst, const void *src, unsigned n) { +static void remap_mono_to_stereo_s16ne_c(pa_remap_t *m, int16_t *dst, const int16_t *src, unsigned n) { unsigned i; - switch (m->format) { - case PA_SAMPLE_FLOAT32NE: - { - float *d, *s; - - d = (float *) dst; - s = (float *) src; - - for (i = n >> 2; i; i--) { - d[0] = d[1] = s[0]; - d[2] = d[3] = s[1]; - d[4] = d[5] = s[2]; - d[6] = d[7] = s[3]; - s += 4; - d += 8; - } - for (i = n & 3; i; i--) { - d[0] = d[1] = s[0]; - s++; - d += 2; - } - break; - } - case PA_SAMPLE_S16NE: - { - int16_t *d, *s; + for (i = n >> 2; i; i--) { + dst[0] = dst[1] = src[0]; + dst[2] = dst[3] = src[1]; + dst[4] = dst[5] = src[2]; + dst[6] = dst[7] = src[3]; + src += 4; + dst += 8; + } + for (i = n & 3; i; i--) { + dst[0] = dst[1] = src[0]; + src++; + dst += 2; + } +} - d = (int16_t *) dst; - s = (int16_t *) src; +static void remap_mono_to_stereo_float32ne_c(pa_remap_t *m, float *dst, const float *src, unsigned n) { + unsigned i; - for (i = n >> 2; i; i--) { - d[0] = d[1] = s[0]; - d[2] = d[3] = s[1]; - d[4] = d[5] = s[2]; - d[6] = d[7] = s[3]; - s += 4; - d += 8; - } - for (i = n & 3; i; i--) { - d[0] = d[1] = s[0]; - s++; - d += 2; - } - break; - } - default: - pa_assert_not_reached(); + for (i = n >> 2; i; i--) { + dst[0] = dst[1] = src[0]; + dst[2] = dst[3] = src[1]; + dst[4] = dst[5] = src[2]; + dst[6] = dst[7] = src[3]; + src += 4; + dst += 8; + } + for (i = n & 3; i; i--) { + dst[0] = dst[1] = src[0]; + src++; + dst += 2; } } -static void remap_channels_matrix_c(pa_remap_t *m, void *dst, const void *src, unsigned n) { +static void remap_channels_matrix_s16ne_c(pa_remap_t *m, void *dst, const void *src, unsigned n) { unsigned oc, ic, i; unsigned n_ic, n_oc; n_ic = m->i_ss.channels; n_oc = m->o_ss.channels; - switch (m->format) { - case PA_SAMPLE_FLOAT32NE: - { - float *d, *s; + memset(dst, 0, n * sizeof(int16_t) * n_oc); - memset(dst, 0, n * sizeof(float) * n_oc); + for (oc = 0; oc < n_oc; oc++) { - for (oc = 0; oc < n_oc; oc++) { - - for (ic = 0; ic < n_ic; ic++) { - float vol; + for (ic = 0; ic < n_ic; ic++) { + int16_t *d, *s; + int32_t vol; - vol = m->map_table_f[oc][ic]; + vol = m->map_table_i[oc][ic]; - if (vol <= 0.0) - continue; + if (vol <= 0) + continue; - d = (float *)dst + oc; - s = (float *)src + ic; + d = (int16_t *)dst + oc; + s = (int16_t *)src + ic; - if (vol >= 1.0) { - for (i = n; i > 0; i--, s += n_ic, d += n_oc) - *d += *s; - } else { - for (i = n; i > 0; i--, s += n_ic, d += n_oc) - *d += *s * vol; - } - } + if (vol >= 0x10000) { + for (i = n; i > 0; i--, s += n_ic, d += n_oc) + *d += *s; + } else { + for (i = n; i > 0; i--, s += n_ic, d += n_oc) + *d += (int16_t) (((int32_t)*s * vol) >> 16); } - - break; } - case PA_SAMPLE_S16NE: - { - int16_t *d, *s; + } +} + +static void remap_channels_matrix_float32ne_c(pa_remap_t *m, void *dst, const void *src, unsigned n) { + unsigned oc, ic, i; + unsigned n_ic, n_oc; + + n_ic = m->i_ss.channels; + n_oc = m->o_ss.channels; - memset(dst, 0, n * sizeof(int16_t) * n_oc); + memset(dst, 0, n * sizeof(float) * n_oc); - for (oc = 0; oc < n_oc; oc++) { + for (oc = 0; oc < n_oc; oc++) { - for (ic = 0; ic < n_ic; ic++) { - int32_t vol; + for (ic = 0; ic < n_ic; ic++) { + float *d, *s; + float vol; - vol = m->map_table_i[oc][ic]; + vol = m->map_table_f[oc][ic]; - if (vol <= 0) - continue; + if (vol <= 0.0) + continue; - d = (int16_t *)dst + oc; - s = (int16_t *)src + ic; + d = (float *)dst + oc; + s = (float *)src + ic; - if (vol >= 0x10000) { - for (i = n; i > 0; i--, s += n_ic, d += n_oc) - *d += *s; - } else { - for (i = n; i > 0; i--, s += n_ic, d += n_oc) - *d += (int16_t) (((int32_t)*s * vol) >> 16); - } - } + if (vol >= 1.0) { + for (i = n; i > 0; i--, s += n_ic, d += n_oc) + *d += *s; + } else { + for (i = n; i > 0; i--, s += n_ic, d += n_oc) + *d += *s * vol; } - break; } - default: - pa_assert_not_reached(); } } @@ -170,11 +147,30 @@ static void init_remap_c(pa_remap_t *m) { /* find some common channel remappings, fall back to full matrix operation. */ if (n_ic == 1 && n_oc == 2 && m->map_table_i[0][0] == 0x10000 && m->map_table_i[1][0] == 0x10000) { - m->do_remap = (pa_do_remap_func_t) remap_mono_to_stereo_c; + pa_log_info("Using mono to stereo remapping"); + switch (m->format) { + case PA_SAMPLE_S16NE: + m->do_remap = (pa_do_remap_func_t) remap_mono_to_stereo_s16ne_c; + break; + case PA_SAMPLE_FLOAT32NE: + m->do_remap = (pa_do_remap_func_t) remap_mono_to_stereo_float32ne_c; + break; + default: + pa_assert_not_reached(); + } } else { - m->do_remap = (pa_do_remap_func_t) remap_channels_matrix_c; pa_log_info("Using generic matrix remapping"); + switch (m->format) { + case PA_SAMPLE_S16NE: + m->do_remap = (pa_do_remap_func_t) remap_channels_matrix_s16ne_c; + break; + case PA_SAMPLE_FLOAT32NE: + m->do_remap = (pa_do_remap_func_t) remap_channels_matrix_float32ne_c; + break; + default: + pa_assert_not_reached(); + } } } diff --git a/src/pulsecore/remap_mmx.c b/src/pulsecore/remap_mmx.c index 3d49045a..ef8c9611 100644 --- a/src/pulsecore/remap_mmx.c +++ b/src/pulsecore/remap_mmx.c @@ -102,33 +102,26 @@ " emms \n\t" #if defined (__i386__) || defined (__amd64__) -static void remap_mono_to_stereo_mmx(pa_remap_t *m, void *dst, const void *src, unsigned n) { +static void remap_mono_to_stereo_s16ne_mmx(pa_remap_t *m, void *dst, const void *src, unsigned n) { pa_reg_x86 temp, temp2; - switch (m->format) { - case PA_SAMPLE_FLOAT32NE: - { - __asm__ __volatile__ ( - MONO_TO_STEREO(dq,3,7) /* do doubles to quads */ - : "+r" (dst), "+r" (src), "=&r" (temp), "=&r" (temp2) - : "r" ((pa_reg_x86)n) - : "cc" - ); - break; - } - case PA_SAMPLE_S16NE: - { - __asm__ __volatile__ ( - MONO_TO_STEREO(wd,4,15) /* do words to doubles */ - : "+r" (dst), "+r" (src), "=&r" (temp), "=&r" (temp2) - : "r" ((pa_reg_x86)n) - : "cc" - ); - break; - } - default: - pa_assert_not_reached(); - } + __asm__ __volatile__ ( + MONO_TO_STEREO(wd,4,15) /* do words to doubles */ + : "+r" (dst), "+r" (src), "=&r" (temp), "=&r" (temp2) + : "r" ((pa_reg_x86)n) + : "cc" + ); +} + +static void remap_mono_to_stereo_float32ne_mmx(pa_remap_t *m, void *dst, const void *src, unsigned n) { + pa_reg_x86 temp, temp2; + + __asm__ __volatile__ ( + MONO_TO_STEREO(dq,3,7) /* do doubles to quads */ + : "+r" (dst), "+r" (src), "=&r" (temp), "=&r" (temp2) + : "r" ((pa_reg_x86)n) + : "cc" + ); } /* set the function that will execute the remapping based on the matrices */ @@ -141,8 +134,18 @@ static void init_remap_mmx(pa_remap_t *m) { /* find some common channel remappings, fall back to full matrix operation. */ if (n_ic == 1 && n_oc == 2 && m->map_table_i[0][0] == 0x10000 && m->map_table_i[1][0] == 0x10000) { - m->do_remap = (pa_do_remap_func_t) remap_mono_to_stereo_mmx; + pa_log_info("Using MMX mono to stereo remapping"); + switch (m->format) { + case PA_SAMPLE_S16NE: + m->do_remap = (pa_do_remap_func_t) remap_mono_to_stereo_s16ne_mmx; + break; + case PA_SAMPLE_FLOAT32NE: + m->do_remap = (pa_do_remap_func_t) remap_mono_to_stereo_float32ne_mmx; + break; + default: + pa_assert_not_reached(); + } } } #endif /* defined (__i386__) || defined (__amd64__) */ diff --git a/src/pulsecore/remap_sse.c b/src/pulsecore/remap_sse.c index be6d3b09..3d283302 100644 --- a/src/pulsecore/remap_sse.c +++ b/src/pulsecore/remap_sse.c @@ -101,33 +101,26 @@ "4: \n\t" #if defined (__i386__) || defined (__amd64__) -static void remap_mono_to_stereo_sse2(pa_remap_t *m, void *dst, const void *src, unsigned n) { +static void remap_mono_to_stereo_s16ne_sse2(pa_remap_t *m, void *dst, const void *src, unsigned n) { pa_reg_x86 temp, temp2; - switch (m->format) { - case PA_SAMPLE_FLOAT32NE: - { - __asm__ __volatile__ ( - MONO_TO_STEREO(dq, 4, 15) /* do doubles to quads */ - : "+r" (dst), "+r" (src), "=&r" (temp), "=&r" (temp2) - : "r" ((pa_reg_x86)n) - : "cc" - ); - break; - } - case PA_SAMPLE_S16NE: - { - __asm__ __volatile__ ( - MONO_TO_STEREO(wd, 5, 31) /* do words to doubles */ - : "+r" (dst), "+r" (src), "=&r" (temp), "=&r" (temp2) - : "r" ((pa_reg_x86)n) - : "cc" - ); - break; - } - default: - pa_assert_not_reached(); - } + __asm__ __volatile__ ( + MONO_TO_STEREO(wd, 5, 31) /* do words to doubles */ + : "+r" (dst), "+r" (src), "=&r" (temp), "=&r" (temp2) + : "r" ((pa_reg_x86)n) + : "cc" + ); +} + +static void remap_mono_to_stereo_float32ne_sse2(pa_remap_t *m, void *dst, const void *src, unsigned n) { + pa_reg_x86 temp, temp2; + + __asm__ __volatile__ ( + MONO_TO_STEREO(dq, 4, 15) /* do doubles to quads */ + : "+r" (dst), "+r" (src), "=&r" (temp), "=&r" (temp2) + : "r" ((pa_reg_x86)n) + : "cc" + ); } /* set the function that will execute the remapping based on the matrices */ @@ -140,8 +133,18 @@ static void init_remap_sse2(pa_remap_t *m) { /* find some common channel remappings, fall back to full matrix operation. */ if (n_ic == 1 && n_oc == 2 && m->map_table_i[0][0] == 0x10000 && m->map_table_i[1][0] == 0x10000) { - m->do_remap = (pa_do_remap_func_t) remap_mono_to_stereo_sse2; + pa_log_info("Using SSE2 mono to stereo remapping"); + switch (m->format) { + case PA_SAMPLE_S16NE: + m->do_remap = (pa_do_remap_func_t) remap_mono_to_stereo_s16ne_sse2; + break; + case PA_SAMPLE_FLOAT32NE: + m->do_remap = (pa_do_remap_func_t) remap_mono_to_stereo_float32ne_sse2; + break; + default: + pa_assert_not_reached(); + } } } #endif /* defined (__i386__) || defined (__amd64__) */ -- 2.39.2