X-Git-Url: https://code.delx.au/pulseaudio/blobdiff_plain/118466638aa651eac0d20513d348ddcc446bb5e6..33a88fbfdee773b1473cb5339540d79809363bdc:/src/pulsecore/svolume_mmx.c diff --git a/src/pulsecore/svolume_mmx.c b/src/pulsecore/svolume_mmx.c index a011789c..421156ea 100644 --- a/src/pulsecore/svolume_mmx.c +++ b/src/pulsecore/svolume_mmx.c @@ -31,11 +31,11 @@ #include #include #include +#include #include "cpu-x86.h" #include "sample-util.h" -#include "endianmacros.h" #if defined (__i386__) || defined (__amd64__) /* in s: 2 int16_t samples @@ -95,14 +95,13 @@ " por %%mm4, "#s1" \n\t" /* .. | l h | */ \ " por %%mm5, "#s2" \n\t" -static void -pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) -{ +static void pa_volume_s16ne_mmx(int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) { pa_reg_x86 channel, temp; - /* the max number of samples we process at a time, this is also the max amount - * we overread the volume array, which should have enough padding. */ - channels = PA_MAX (4U, channels); + /* Channels must be at least 4, and always a multiple of the original number. + * This is also the max amount we overread the volume array, which should + * have enough padding. */ + channels = channels == 3 ? 6 : PA_MAX (4U, channels); __asm__ __volatile__ ( " xor %3, %3 \n\t" @@ -155,20 +154,19 @@ pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi "6: \n\t" " emms \n\t" - : "+r" (samples), "+r" (volumes), "+r" (length), "=D" ((pa_reg_x86)channel), "=&r" (temp) + : "+r" (samples), "+r" (volumes), "+r" (length), "=D" (channel), "=&r" (temp) : "rm" ((pa_reg_x86)channels) : "cc" ); } -static void -pa_volume_s16re_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) -{ +static void pa_volume_s16re_mmx(int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) { pa_reg_x86 channel, temp; - /* the max number of samples we process at a time, this is also the max amount - * we overread the volume array, which should have enough padding. */ - channels = PA_MAX (4U, channels); + /* Channels must be at least 4, and always a multiple of the original number. + * This is also the max amount we overread the volume array, which should + * have enough padding. */ + channels = channels == 3 ? 6 : PA_MAX (4U, channels); __asm__ __volatile__ ( " xor %3, %3 \n\t" @@ -231,7 +229,7 @@ pa_volume_s16re_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi "6: \n\t" " emms \n\t" - : "+r" (samples), "+r" (volumes), "+r" (length), "=D" ((pa_reg_x86)channel), "=&r" (temp) + : "+r" (samples), "+r" (volumes), "+r" (length), "=D" (channel), "=&r" (temp) : "rm" ((pa_reg_x86)channels) : "cc" ); @@ -241,11 +239,11 @@ pa_volume_s16re_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi #ifdef RUN_TEST #define CHANNELS 2 -#define SAMPLES 1021 +#define SAMPLES 1022 #define TIMES 1000 #define PADDING 16 -static void run_test (void) { +static void run_test(void) { int16_t samples[SAMPLES]; int16_t samples_ref[SAMPLES]; int16_t samples_orig[SAMPLES]; @@ -254,43 +252,43 @@ static void run_test (void) { pa_do_volume_func_t func; pa_usec_t start, stop; - func = pa_get_volume_func (PA_SAMPLE_S16NE); + func = pa_get_volume_func(PA_SAMPLE_S16NE); - printf ("checking MMX %zd\n", sizeof (samples)); + printf("checking MMX %zd\n", sizeof(samples)); - pa_random (samples, sizeof (samples)); + pa_random(samples, sizeof(samples)); /* for (i = 0; i < SAMPLES; i++) samples[i] = -1; */ - memcpy (samples_ref, samples, sizeof (samples)); - memcpy (samples_orig, samples, sizeof (samples)); + memcpy(samples_ref, samples, sizeof(samples)); + memcpy(samples_orig, samples, sizeof(samples)); for (i = 0; i < CHANNELS; i++) - volumes[i] = rand() >> 1; + volumes[i] = PA_CLAMP_VOLUME(rand() >> 1); /* volumes[i] = 0x0000ffff; */ for (padding = 0; padding < PADDING; padding++, i++) volumes[i] = volumes[padding]; - func (samples_ref, volumes, CHANNELS, sizeof (samples)); - pa_volume_s16ne_mmx (samples, volumes, CHANNELS, sizeof (samples)); + func(samples_ref, volumes, CHANNELS, sizeof(samples)); + pa_volume_s16ne_mmx(samples, volumes, CHANNELS, sizeof(samples)); for (i = 0; i < SAMPLES; i++) { if (samples[i] != samples_ref[i]) { - printf ("%d: %04x != %04x (%04x * %08x)\n", i, samples[i], samples_ref[i], + printf("%d: %04x != %04x (%04x * %08x)\n", i, samples[i], samples_ref[i], samples_orig[i], volumes[i % CHANNELS]); } } start = pa_rtclock_now(); for (j = 0; j < TIMES; j++) { - memcpy (samples, samples_orig, sizeof (samples)); - pa_volume_s16ne_mmx (samples, volumes, CHANNELS, sizeof (samples)); + memcpy(samples, samples_orig, sizeof(samples)); + pa_volume_s16ne_mmx(samples, volumes, CHANNELS, sizeof(samples)); } stop = pa_rtclock_now(); pa_log_info("MMX: %llu usec.", (long long unsigned int)(stop - start)); start = pa_rtclock_now(); for (j = 0; j < TIMES; j++) { - memcpy (samples_ref, samples_orig, sizeof (samples)); - func (samples_ref, volumes, CHANNELS, sizeof (samples)); + memcpy(samples_ref, samples_orig, sizeof(samples)); + func(samples_ref, volumes, CHANNELS, sizeof(samples)); } stop = pa_rtclock_now(); pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); @@ -302,18 +300,18 @@ static void run_test (void) { #endif /* defined (__i386__) || defined (__amd64__) */ -void pa_volume_func_init_mmx (pa_cpu_x86_flag_t flags) { +void pa_volume_func_init_mmx(pa_cpu_x86_flag_t flags) { #if defined (__i386__) || defined (__amd64__) #ifdef RUN_TEST - run_test (); + run_test(); #endif - if (flags & PA_CPU_X86_MMX) { + if ((flags & PA_CPU_X86_MMX) && (flags & PA_CPU_X86_CMOV)) { pa_log_info("Initialising MMX optimized functions."); - pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_mmx); - pa_set_volume_func (PA_SAMPLE_S16RE, (pa_do_volume_func_t) pa_volume_s16re_mmx); + pa_set_volume_func(PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_mmx); + pa_set_volume_func(PA_SAMPLE_S16RE, (pa_do_volume_func_t) pa_volume_s16re_mmx); } #endif /* defined (__i386__) || defined (__amd64__) */ }