X-Git-Url: https://code.delx.au/pulseaudio/blobdiff_plain/c5dca7cf2b0423f152c6e81a503cc9639b08f09f..b51d28bbbf0b544789be7fe6b3d2a148cf0a500a:/src/pulsecore/svolume_mmx.c diff --git a/src/pulsecore/svolume_mmx.c b/src/pulsecore/svolume_mmx.c index ae2cf053..d68a1059 100644 --- a/src/pulsecore/svolume_mmx.c +++ b/src/pulsecore/svolume_mmx.c @@ -92,7 +92,7 @@ " por %%mm4, "#s1" \n\t" /* .. | l h | */ \ " por %%mm5, "#s2" \n\t" -static void pa_volume_s16ne_mmx(int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) { +static void pa_volume_s16ne_mmx(int16_t *samples, const int32_t *volumes, unsigned channels, unsigned length) { pa_reg_x86 channel, temp; /* Channels must be at least 4, and always a multiple of the original number. @@ -107,7 +107,7 @@ static void pa_volume_s16ne_mmx(int16_t *samples, int32_t *volumes, unsigned cha " test $1, %2 \n\t" /* check for odd samples */ " je 2f \n\t" - " movd (%1, %3, 4), %%mm0 \n\t" /* | v0h | v0l | */ + " movd (%q1, %3, 4), %%mm0 \n\t" /* | v0h | v0l | */ " movw (%0), %w4 \n\t" /* .. | p0 | */ " movd %4, %%mm1 \n\t" VOLUME_32x16 (%%mm1, %%mm0) @@ -122,7 +122,7 @@ static void pa_volume_s16ne_mmx(int16_t *samples, int32_t *volumes, unsigned cha " je 4f \n\t" "3: \n\t" /* do samples in groups of 2 */ - " movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */ + " movq (%q1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */ " movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */ VOLUME_32x16 (%%mm1, %%mm0) " movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */ @@ -135,8 +135,8 @@ static void pa_volume_s16ne_mmx(int16_t *samples, int32_t *volumes, unsigned cha " je 6f \n\t" "5: \n\t" /* do samples in groups of 4 */ - " movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */ - " movq 8(%1, %3, 4), %%mm2 \n\t" /* | v3h | v3l | v2h | v2l | */ + " movq (%q1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */ + " movq 8(%q1, %3, 4), %%mm2 \n\t" /* | v3h | v3l | v2h | v2l | */ " movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */ " movd 4(%0), %%mm3 \n\t" /* .. | p3 | p2 | */ VOLUME_32x16 (%%mm1, %%mm0) @@ -153,7 +153,7 @@ static void pa_volume_s16ne_mmx(int16_t *samples, int32_t *volumes, unsigned cha : "+r" (samples), "+r" (volumes), "+r" (length), "=D" (channel), "=&r" (temp) #if defined (__i386__) - : "m" ((pa_reg_x86)channels) + : "m" (channels) #else : "r" ((pa_reg_x86)channels) #endif @@ -161,7 +161,7 @@ static void pa_volume_s16ne_mmx(int16_t *samples, int32_t *volumes, unsigned cha ); } -static void pa_volume_s16re_mmx(int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) { +static void pa_volume_s16re_mmx(int16_t *samples, const int32_t *volumes, unsigned channels, unsigned length) { pa_reg_x86 channel, temp; /* Channels must be at least 4, and always a multiple of the original number. @@ -180,7 +180,7 @@ static void pa_volume_s16re_mmx(int16_t *samples, int32_t *volumes, unsigned cha " test $1, %2 \n\t" /* check for odd samples */ " je 2f \n\t" - " movd (%1, %3, 4), %%mm0 \n\t" /* | v0h | v0l | */ + " movd (%q1, %3, 4), %%mm0 \n\t" /* | v0h | v0l | */ " movw (%0), %w4 \n\t" /* .. | p0 | */ " rorw $8, %w4 \n\t" " movd %4, %%mm1 \n\t" @@ -197,7 +197,7 @@ static void pa_volume_s16re_mmx(int16_t *samples, int32_t *volumes, unsigned cha " je 4f \n\t" "3: \n\t" /* do samples in groups of 2 */ - " movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */ + " movq (%q1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */ " movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */ SWAP_16 (%%mm1) VOLUME_32x16 (%%mm1, %%mm0) @@ -212,8 +212,8 @@ static void pa_volume_s16re_mmx(int16_t *samples, int32_t *volumes, unsigned cha " je 6f \n\t" "5: \n\t" /* do samples in groups of 4 */ - " movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */ - " movq 8(%1, %3, 4), %%mm2 \n\t" /* | v3h | v3l | v2h | v2l | */ + " movq (%q1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */ + " movq 8(%q1, %3, 4), %%mm2 \n\t" /* | v3h | v3l | v2h | v2l | */ " movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */ " movd 4(%0), %%mm3 \n\t" /* .. | p3 | p2 | */ SWAP_16_2 (%%mm1, %%mm3) @@ -232,7 +232,7 @@ static void pa_volume_s16re_mmx(int16_t *samples, int32_t *volumes, unsigned cha : "+r" (samples), "+r" (volumes), "+r" (length), "=D" (channel), "=&r" (temp) #if defined (__i386__) - : "m" ((pa_reg_x86)channels) + : "m" (channels) #else : "r" ((pa_reg_x86)channels) #endif @@ -240,100 +240,10 @@ static void pa_volume_s16re_mmx(int16_t *samples, int32_t *volumes, unsigned cha ); } -#undef RUN_TEST - -#ifdef RUN_TEST -#define CHANNELS 2 -#define SAMPLES 1022 -#define TIMES 1000 -#define TIMES2 100 -#define PADDING 16 - -static void run_test(void) { - int16_t samples[SAMPLES]; - int16_t samples_ref[SAMPLES]; - int16_t samples_orig[SAMPLES]; - int32_t volumes[CHANNELS + PADDING]; - int i, j, padding; - pa_do_volume_func_t func; - pa_usec_t start, stop; - int k; - pa_usec_t min = INT_MAX, max = 0; - double s1 = 0, s2 = 0; - - func = pa_get_volume_func(PA_SAMPLE_S16NE); - - printf("checking MMX %zd\n", sizeof(samples)); - - pa_random(samples, sizeof(samples)); - /* for (i = 0; i < SAMPLES; i++) - samples[i] = -1; */ - memcpy(samples_ref, samples, sizeof(samples)); - memcpy(samples_orig, samples, sizeof(samples)); - - for (i = 0; i < CHANNELS; i++) - volumes[i] = PA_CLAMP_VOLUME(rand() >> 1); - /* volumes[i] = 0x0000ffff; */ - for (padding = 0; padding < PADDING; padding++, i++) - volumes[i] = volumes[padding]; - - func(samples_ref, volumes, CHANNELS, sizeof(samples)); - pa_volume_s16ne_mmx(samples, volumes, CHANNELS, sizeof(samples)); - for (i = 0; i < SAMPLES; i++) { - if (samples[i] != samples_ref[i]) { - printf("%d: %04x != %04x (%04x * %08x)\n", i, samples[i], samples_ref[i], - samples_orig[i], volumes[i % CHANNELS]); - } - } - - for (k = 0; k < TIMES2; k++) { - start = pa_rtclock_now(); - for (j = 0; j < TIMES; j++) { - memcpy(samples, samples_orig, sizeof(samples)); - pa_volume_s16ne_mmx(samples, volumes, CHANNELS, sizeof(samples)); - } - stop = pa_rtclock_now(); - - if (min > (stop - start)) min = stop - start; - if (max < (stop - start)) max = stop - start; - s1 += stop - start; - s2 += (stop - start) * (stop - start); - } - pa_log_info("MMX: %llu usec (min = %llu, max = %llu, stddev = %g).", (long long unsigned int)s1, - (long long unsigned int)min, (long long unsigned int)max, sqrt(TIMES2 * s2 - s1 * s1) / TIMES2); - - min = INT_MAX; max = 0; - s1 = s2 = 0; - for (k = 0; k < TIMES2; k++) { - start = pa_rtclock_now(); - for (j = 0; j < TIMES; j++) { - memcpy(samples_ref, samples_orig, sizeof(samples)); - func(samples_ref, volumes, CHANNELS, sizeof(samples)); - } - stop = pa_rtclock_now(); - - if (min > (stop - start)) min = stop - start; - if (max < (stop - start)) max = stop - start; - s1 += stop - start; - s2 += (stop - start) * (stop - start); - } - pa_log_info("ref: %llu usec (min = %llu, max = %llu, stddev = %g).", (long long unsigned int)s1, - (long long unsigned int)min, (long long unsigned int)max, sqrt(TIMES2 * s2 - s1 * s1) / TIMES2); - - pa_assert_se(memcmp(samples_ref, samples, sizeof(samples)) == 0); -} -#endif - #endif /* defined (__i386__) || defined (__amd64__) */ - void pa_volume_func_init_mmx(pa_cpu_x86_flag_t flags) { #if defined (__i386__) || defined (__amd64__) - -#ifdef RUN_TEST - run_test(); -#endif - if ((flags & PA_CPU_X86_MMX) && (flags & PA_CPU_X86_CMOV)) { pa_log_info("Initialising MMX optimized volume functions.");