#endif
#include <pulse/timeval.h>
+#include <pulse/rtclock.h>
+
#include <pulsecore/random.h>
#include <pulsecore/macro.h>
#include <pulsecore/g711.h>
" punpcklwd %%mm4, "#s" \n\t" /* .. | 0 | p0 | */ \
" pcmpgtw "#v", %%mm4 \n\t" /* .. | 0 | s(vl) | */ \
" pand "#s", %%mm4 \n\t" /* .. | 0 | (p0) | (vl >> 15) & p */ \
- " movq %%mm6, %%mm5 \n\t" /* .. | ffff | 0 | */ \
- " pand "#v", %%mm5 \n\t" /* .. | vh | 0 | */ \
- " por %%mm5, %%mm4 \n\t" /* .. | vh | (p0) | */ \
- " pmulhw "#s", "#v" \n\t" /* .. | 0 | vl*p0 | */ \
- " paddw %%mm4, "#v" \n\t" /* .. | vh | vl*p0 | vh + sign correct */ \
- " pslld $16, "#s" \n\t" /* .. | p0 | 0 | */ \
- " por %%mm7, "#s" \n\t" /* .. | p0 | 1 | */ \
- " pmaddwd "#s", "#v" \n\t" /* .. | p0 * v0 | */ \
+ " movq "#s", %%mm5 \n\t" \
+ " pmulhw "#v", "#s" \n\t" /* .. | 0 | vl*p0 | */ \
+ " paddw %%mm4, "#s" \n\t" /* .. | 0 | vl*p0 | + sign correct */ \
+ " pslld $16, "#s" \n\t" /* .. | vl*p0 | 0 | */ \
+ " psrld $16, "#v" \n\t" /* .. | 0 | vh | */ \
+ " psrad $16, "#s" \n\t" /* .. | vl*p0 | sign extend */ \
+ " pmaddwd %%mm5, "#v" \n\t" /* .. | p0 * vh | */ \
+ " paddd "#s", "#v" \n\t" /* .. | p0 * v0 | */ \
" packssdw "#v", "#v" \n\t" /* .. | p1*v1 | p0*v0 | */
/* approximately advances %3 = (%3 + a) % b. This function requires that
__asm__ __volatile__ (
" xor %3, %3 \n\t"
" sar $1, %2 \n\t" /* length /= sizeof (int16_t) */
- " pcmpeqw %%mm6, %%mm6 \n\t" /* .. | ffff | ffff | */
- " pcmpeqw %%mm7, %%mm7 \n\t" /* .. | ffff | ffff | */
- " pslld $16, %%mm6 \n\t" /* .. | ffff | 0 | */
- " psrld $31, %%mm7 \n\t" /* .. | 0 | 1 | */
" test $1, %2 \n\t" /* check for odd samples */
" je 2f \n\t"
" emms \n\t"
: "+r" (samples), "+r" (volumes), "+r" (length), "=D" ((pa_reg_x86)channel), "=&r" (temp)
- : "r" ((pa_reg_x86)channels)
+ : "rm" ((pa_reg_x86)channels)
: "cc"
);
}
" emms \n\t"
: "+r" (samples), "+r" (volumes), "+r" (length), "=D" ((pa_reg_x86)channel), "=&r" (temp)
- : "r" ((pa_reg_x86)channels)
+ : "rm" ((pa_reg_x86)channels)
: "cc"
);
}
printf ("checking MMX %zd\n", sizeof (samples));
pa_random (samples, sizeof (samples));
+ /* for (i = 0; i < SAMPLES; i++)
+ samples[i] = -1; */
memcpy (samples_ref, samples, sizeof (samples));
memcpy (samples_orig, samples, sizeof (samples));
for (i = 0; i < CHANNELS; i++)
volumes[i] = rand() >> 1;
+ /* volumes[i] = 0x0000ffff; */
for (padding = 0; padding < PADDING; padding++, i++)
volumes[i] = volumes[padding];
pa_volume_s16ne_mmx (samples, volumes, CHANNELS, sizeof (samples));
for (i = 0; i < SAMPLES; i++) {
if (samples[i] != samples_ref[i]) {
- printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i],
+ printf ("%d: %04x != %04x (%04x * %08x)\n", i, samples[i], samples_ref[i],
samples_orig[i], volumes[i % CHANNELS]);
}
}
}
stop = pa_rtclock_now();
pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start));
+
+ pa_assert_se(memcmp(samples_ref, samples, sizeof(samples)) == 0);
}
#endif
run_test ();
#endif
- if (flags & PA_CPU_X86_MMX) {
+ if ((flags & PA_CPU_X86_MMX) && (flags & PA_CPU_X86_CMOV)) {
pa_log_info("Initialising MMX optimized functions.");
pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_mmx);