22 #include "../SDL_internal.h" 30 #define HAVE_NEON_INTRINSICS 1 34 #define HAVE_SSE2_INTRINSICS 1 37 #if defined(__x86_64__) && HAVE_SSE2_INTRINSICS 38 #define NEED_SCALAR_CONVERTER_FALLBACKS 0 39 #elif __MACOSX__ && HAVE_SSE2_INTRINSICS 40 #define NEED_SCALAR_CONVERTER_FALLBACKS 0 41 #elif defined(__ARM_ARCH) && (__ARM_ARCH >= 8) && HAVE_NEON_INTRINSICS 42 #define NEED_SCALAR_CONVERTER_FALLBACKS 0 43 #elif defined(__APPLE__) && defined(__ARM_ARCH) && (__ARM_ARCH >= 7) && HAVE_NEON_INTRINSICS 44 #define NEED_SCALAR_CONVERTER_FALLBACKS 0 48 #ifndef NEED_SCALAR_CONVERTER_FALLBACKS 49 #define NEED_SCALAR_CONVERTER_FALLBACKS 1 65 #define DIVBY128 0.0078125f 66 #define DIVBY32768 0.000030517578125f 67 #define DIVBY8388607 0.00000011920930376163766f 70 #if NEED_SCALAR_CONVERTER_FALLBACKS 80 for (i = cvt->
len_cvt; i; --i, --src, --dst) {
99 for (i = cvt->
len_cvt; i; --i, --src, --dst) {
100 *dst = (((float) *src) *
DIVBY128) - 1.0
f;
151 float *
dst = (
float *) cvt->
buf;
168 const float *
src = (
const float *) cvt->
buf;
175 const float sample = *
src;
176 if (sample >= 1.0
f) {
178 }
else if (sample <= -1.0
f) {
194 const float *
src = (
const float *) cvt->
buf;
201 const float sample = *
src;
202 if (sample >= 1.0
f) {
204 }
else if (sample <= -1.0
f) {
207 *
dst = (
Uint8)((sample + 1.0
f) * 127.0f);
220 const float *
src = (
const float *) cvt->
buf;
227 const float sample = *
src;
228 if (sample >= 1.0
f) {
230 }
else if (sample <= -1.0
f) {
246 const float *
src = (
const float *) cvt->
buf;
253 const float sample = *
src;
254 if (sample >= 1.0
f) {
256 }
else if (sample <= -1.0
f) {
272 const float *
src = (
const float *) cvt->
buf;
279 const float sample = *
src;
280 if (sample >= 1.0
f) {
282 }
else if (sample <= -1.0
f) {
296 #if HAVE_SSE2_INTRINSICS 307 for (i = cvt->
len_cvt; i && (((
size_t) (dst-15)) & 15); --i, --src, --dst) {
311 src -= 15; dst -= 15;
312 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
315 if ((((
size_t) src) & 15) == 0) {
317 const __m128i *mmsrc = (
const __m128i *) src;
318 const __m128i
zero = _mm_setzero_si128();
319 const __m128 divby128 = _mm_set1_ps(
DIVBY128);
321 const __m128i bytes = _mm_load_si128(mmsrc);
323 const __m128i shorts1 = _mm_srai_epi16(_mm_slli_epi16(bytes, 8), 8);
325 const __m128i shorts2 = _mm_srai_epi16(bytes, 8);
327 const __m128 floats1 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpacklo_epi16(shorts1, zero), 16), 16)), divby128);
328 const __m128 floats2 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpacklo_epi16(shorts2, zero), 16), 16)), divby128);
329 const __m128 floats3 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpackhi_epi16(shorts1, zero), 16), 16)), divby128);
330 const __m128 floats4 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpackhi_epi16(shorts2, zero), 16), 16)), divby128);
332 _mm_store_ps(dst, _mm_unpacklo_ps(floats1, floats2));
333 _mm_store_ps(dst+4, _mm_unpackhi_ps(floats1, floats2));
334 _mm_store_ps(dst+8, _mm_unpacklo_ps(floats3, floats4));
335 _mm_store_ps(dst+12, _mm_unpackhi_ps(floats3, floats4));
336 i -= 16; mmsrc--; dst -= 16;
339 src = (
const Sint8 *) mmsrc;
342 src += 15; dst += 15;
366 for (i = cvt->
len_cvt; i && (((
size_t) (dst-15)) & 15); --i, --src, --dst) {
367 *dst = (((float) *src) *
DIVBY128) - 1.0
f;
370 src -= 15; dst -= 15;
371 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
374 if ((((
size_t) src) & 15) == 0) {
376 const __m128i *mmsrc = (
const __m128i *) src;
377 const __m128i
zero = _mm_setzero_si128();
378 const __m128 divby128 = _mm_set1_ps(
DIVBY128);
379 const __m128 minus1 = _mm_set1_ps(-1.0
f);
381 const __m128i bytes = _mm_load_si128(mmsrc);
383 const __m128i shorts1 = _mm_srli_epi16(_mm_slli_epi16(bytes, 8), 8);
385 const __m128i shorts2 = _mm_srli_epi16(bytes, 8);
388 const __m128 floats1 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(shorts1, zero)), divby128), minus1);
389 const __m128 floats2 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(shorts2, zero)), divby128), minus1);
390 const __m128 floats3 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(shorts1, zero)), divby128), minus1);
391 const __m128 floats4 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(shorts2, zero)), divby128), minus1);
393 _mm_store_ps(dst, _mm_unpacklo_ps(floats1, floats2));
394 _mm_store_ps(dst+4, _mm_unpackhi_ps(floats1, floats2));
395 _mm_store_ps(dst+8, _mm_unpacklo_ps(floats3, floats4));
396 _mm_store_ps(dst+12, _mm_unpackhi_ps(floats3, floats4));
397 i -= 16; mmsrc--; dst -= 16;
400 src = (
const Uint8 *) mmsrc;
403 src += 15; dst += 15;
407 *dst = (((float) *src) *
DIVBY128) - 1.0
f;
432 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
435 if ((((
size_t) src) & 15) == 0) {
437 const __m128 divby32768 = _mm_set1_ps(
DIVBY32768);
439 const __m128i ints = _mm_load_si128((__m128i
const *) src);
441 const __m128i
a = _mm_srai_epi32(_mm_slli_epi32(ints, 16), 16);
443 const __m128i
b = _mm_srai_epi32(ints, 16);
445 _mm_store_ps(dst, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi32(a, b)), divby32768));
446 _mm_store_ps(dst+4, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi32(a, b)), divby32768));
447 i -= 8; src -= 8; dst -= 8;
480 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
483 if ((((
size_t) src) & 15) == 0) {
485 const __m128 divby32768 = _mm_set1_ps(
DIVBY32768);
486 const __m128 minus1 = _mm_set1_ps(1.0
f);
488 const __m128i ints = _mm_load_si128((__m128i
const *) src);
490 const __m128i
a = _mm_srli_epi32(_mm_slli_epi32(ints, 16), 16);
492 const __m128i
b = _mm_srli_epi32(ints, 16);
494 _mm_store_ps(dst, _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi32(a, b)), divby32768), minus1));
495 _mm_store_ps(dst+4, _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi32(a, b)), divby32768), minus1));
496 i -= 8; src -= 8; dst -= 8;
518 float *
dst = (
float *) cvt->
buf;
528 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
529 SDL_assert(!i || ((((
size_t) src) & 15) == 0));
534 const __m128i *mmsrc = (
const __m128i *) src;
537 _mm_store_ps(dst, _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_load_si128(mmsrc), 8)), divby8388607));
538 i -= 4; mmsrc++; dst += 4;
540 src = (
const Sint32 *) mmsrc;
557 const float *
src = (
const float *) cvt->
buf;
565 const float sample = *
src;
566 if (sample >= 1.0
f) {
568 }
else if (sample <= -1.0
f) {
571 *dst = (
Sint8)(sample * 127.0
f);
575 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
578 if ((((
size_t) src) & 15) == 0) {
580 const __m128
one = _mm_set1_ps(1.0
f);
581 const __m128 negone = _mm_set1_ps(-1.0
f);
582 const __m128 mulby127 = _mm_set1_ps(127.0
f);
583 __m128i *mmdst = (__m128i *) dst;
585 const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src)), one), mulby127));
586 const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+4)), one), mulby127));
587 const __m128i ints3 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+8)), one), mulby127));
588 const __m128i ints4 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+12)), one), mulby127));
589 _mm_store_si128(mmdst, _mm_packs_epi16(_mm_packs_epi32(ints1, ints2), _mm_packs_epi32(ints3, ints4)));
590 i -= 16; src += 16; mmdst++;
592 dst = (
Sint8 *) mmdst;
597 const float sample = *
src;
598 if (sample >= 1.0
f) {
600 }
else if (sample <= -1.0
f) {
603 *dst = (
Sint8)(sample * 127.0
f);
617 const float *
src = (
const float *) cvt->
buf;
625 const float sample = *
src;
626 if (sample >= 1.0
f) {
628 }
else if (sample <= -1.0
f) {
631 *dst = (
Uint8)((sample + 1.0
f) * 127.0f);
635 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
638 if ((((
size_t) src) & 15) == 0) {
640 const __m128
one = _mm_set1_ps(1.0
f);
641 const __m128 negone = _mm_set1_ps(-1.0
f);
642 const __m128 mulby127 = _mm_set1_ps(127.0
f);
643 __m128i *mmdst = (__m128i *) dst;
645 const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src)), one), one), mulby127));
646 const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+4)), one), one), mulby127));
647 const __m128i ints3 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+8)), one), one), mulby127));
648 const __m128i ints4 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+12)), one), one), mulby127));
649 _mm_store_si128(mmdst, _mm_packus_epi16(_mm_packs_epi32(ints1, ints2), _mm_packs_epi32(ints3, ints4)));
650 i -= 16; src += 16; mmdst++;
652 dst = (
Uint8 *) mmdst;
657 const float sample = *
src;
658 if (sample >= 1.0
f) {
660 }
else if (sample <= -1.0
f) {
663 *dst = (
Uint8)((sample + 1.0
f) * 127.0f);
677 const float *
src = (
const float *) cvt->
buf;
685 const float sample = *
src;
686 if (sample >= 1.0
f) {
688 }
else if (sample <= -1.0
f) {
691 *dst = (
Sint16)(sample * 32767.0
f);
695 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
698 if ((((
size_t) src) & 15) == 0) {
700 const __m128
one = _mm_set1_ps(1.0
f);
701 const __m128 negone = _mm_set1_ps(-1.0
f);
702 const __m128 mulby32767 = _mm_set1_ps(32767.0
f);
703 __m128i *mmdst = (__m128i *) dst;
705 const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src)), one), mulby32767));
706 const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+4)), one), mulby32767));
707 _mm_store_si128(mmdst, _mm_packs_epi32(ints1, ints2));
708 i -= 8; src += 8; mmdst++;
715 const float sample = *
src;
716 if (sample >= 1.0
f) {
718 }
else if (sample <= -1.0
f) {
721 *dst = (
Sint16)(sample * 32767.0
f);
735 const float *
src = (
const float *) cvt->
buf;
743 const float sample = *
src;
744 if (sample >= 1.0
f) {
746 }
else if (sample <= -1.0
f) {
749 *dst = (
Uint16)((sample + 1.0
f) * 32767.0f);
753 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
756 if ((((
size_t) src) & 15) == 0) {
765 const __m128 mulby32767 = _mm_set1_ps(32767.0
f);
766 const __m128i topbit = _mm_set1_epi16(-32768);
767 const __m128
one = _mm_set1_ps(1.0
f);
768 const __m128 negone = _mm_set1_ps(-1.0
f);
769 __m128i *mmdst = (__m128i *) dst;
771 const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src)), one), mulby32767));
772 const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+4)), one), mulby32767));
773 _mm_store_si128(mmdst, _mm_xor_si128(_mm_packs_epi32(ints1, ints2), topbit));
774 i -= 8; src += 8; mmdst++;
781 const float sample = *
src;
782 if (sample >= 1.0
f) {
784 }
else if (sample <= -1.0
f) {
787 *dst = (
Uint16)((sample + 1.0
f) * 32767.0f);
801 const float *
src = (
const float *) cvt->
buf;
809 const float sample = *
src;
810 if (sample >= 1.0
f) {
812 }
else if (sample <= -1.0
f) {
813 *dst = (
Sint32) -2147483648LL;
815 *dst = ((
Sint32)(sample * 8388607.0
f)) << 8;
819 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
820 SDL_assert(!i || ((((
size_t) src) & 15) == 0));
824 const __m128
one = _mm_set1_ps(1.0
f);
825 const __m128 negone = _mm_set1_ps(-1.0
f);
826 const __m128 mulby8388607 = _mm_set1_ps(8388607.0
f);
827 __m128i *mmdst = (__m128i *) dst;
829 _mm_store_si128(mmdst, _mm_slli_epi32(_mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src)), one), mulby8388607)), 8));
830 i -= 4; src += 4; mmdst++;
837 const float sample = *
src;
838 if (sample >= 1.0
f) {
840 }
else if (sample <= -1.0
f) {
841 *dst = (
Sint32) -2147483648LL;
843 *dst = ((
Sint32)(sample * 8388607.0
f)) << 8;
855 #if HAVE_NEON_INTRINSICS 866 for (i = cvt->
len_cvt; i && (((
size_t) (dst-15)) & 15); --i, --src, --dst) {
870 src -= 15; dst -= 15;
871 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
874 if ((((
size_t) src) & 15) == 0) {
877 const float32x4_t divby128 = vdupq_n_f32(
DIVBY128);
879 const int8x16_t bytes = vld1q_s8(mmsrc);
880 const int16x8_t int16hi = vmovl_s8(vget_high_s8(bytes));
881 const int16x8_t int16lo = vmovl_s8(vget_low_s8(bytes));
883 vst1q_f32(dst, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(int16hi))), divby128));
884 vst1q_f32(dst+4, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(int16hi))), divby128));
885 vst1q_f32(dst+8, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(int16lo))), divby128));
886 vst1q_f32(dst+12, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(int16lo))), divby128));
887 i -= 16; mmsrc -= 16; dst -= 16;
890 src = (
const Sint8 *) mmsrc;
893 src += 15; dst += 15;
917 for (i = cvt->
len_cvt; i && (((
size_t) (dst-15)) & 15); --i, --src, --dst) {
918 *dst = (((float) *src) *
DIVBY128) - 1.0
f;
921 src -= 15; dst -= 15;
922 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
925 if ((((
size_t) src) & 15) == 0) {
928 const float32x4_t divby128 = vdupq_n_f32(
DIVBY128);
929 const float32x4_t
one = vdupq_n_f32(1.0
f);
931 const uint8x16_t bytes = vld1q_u8(mmsrc);
932 const uint16x8_t uint16hi = vmovl_u8(vget_high_u8(bytes));
933 const uint16x8_t uint16lo = vmovl_u8(vget_low_u8(bytes));
935 vst1q_f32(dst, vmlsq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(uint16hi))), divby128, one));
936 vst1q_f32(dst+4, vmlsq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(uint16hi))), divby128, one));
937 vst1q_f32(dst+8, vmlsq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(uint16lo))), divby128, one));
938 vst1q_f32(dst+12, vmlsq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(uint16lo))), divby128, one));
939 i -= 16; mmsrc -= 16; dst -= 16;
942 src = (
const Uint8 *) mmsrc;
945 src += 15; dst += 15;
949 *dst = (((float) *src) *
DIVBY128) - 1.0
f;
974 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
977 if ((((
size_t) src) & 15) == 0) {
979 const float32x4_t divby32768 = vdupq_n_f32(
DIVBY32768);
981 const int16x8_t ints = vld1q_s16((
int16_t const *) src);
983 vst1q_f32(dst, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(ints))), divby32768));
984 vst1q_f32(dst+4, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(ints))), divby32768));
985 i -= 8; src -= 8; dst -= 8;
1018 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
1021 if ((((
size_t) src) & 15) == 0) {
1023 const float32x4_t divby32768 = vdupq_n_f32(
DIVBY32768);
1024 const float32x4_t
one = vdupq_n_f32(1.0
f);
1026 const uint16x8_t uints = vld1q_u16((
uint16_t const *) src);
1028 vst1q_f32(dst, vmlsq_f32(one, vcvtq_f32_u32(vmovl_u16(vget_low_u16(uints))), divby32768));
1029 vst1q_f32(dst+4, vmlsq_f32(one, vcvtq_f32_u32(vmovl_u16(vget_high_u16(uints))), divby32768));
1030 i -= 8; src -= 8; dst -= 8;
1052 float *
dst = (
float *) cvt->
buf;
1062 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
1063 SDL_assert(!i || ((((
size_t) src) & 15) == 0));
1067 const float32x4_t divby8388607 = vdupq_n_f32(
DIVBY8388607);
1071 vst1q_f32(dst, vmulq_f32(vcvtq_f32_s32(vshrq_n_s32(vld1q_s32(mmsrc), 8)), divby8388607));
1072 i -= 4; mmsrc += 4; dst += 4;
1074 src = (
const Sint32 *) mmsrc;
1091 const float *
src = (
const float *) cvt->
buf;
1099 const float sample = *
src;
1100 if (sample >= 1.0
f) {
1102 }
else if (sample <= -1.0
f) {
1105 *dst = (
Sint8)(sample * 127.0
f);
1109 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
1112 if ((((
size_t) src) & 15) == 0) {
1114 const float32x4_t
one = vdupq_n_f32(1.0
f);
1115 const float32x4_t negone = vdupq_n_f32(-1.0
f);
1116 const float32x4_t mulby127 = vdupq_n_f32(127.0
f);
1119 const int32x4_t ints1 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src)), one), mulby127));
1120 const int32x4_t ints2 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+4)), one), mulby127));
1121 const int32x4_t ints3 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+8)), one), mulby127));
1122 const int32x4_t ints4 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+12)), one), mulby127));
1123 const int8x8_t i8lo = vmovn_s16(vcombine_s16(vmovn_s32(ints1), vmovn_s32(ints2)));
1124 const int8x8_t i8hi = vmovn_s16(vcombine_s16(vmovn_s32(ints3), vmovn_s32(ints4)));
1125 vst1q_s8(mmdst, vcombine_s8(i8lo, i8hi));
1126 i -= 16; src += 16; mmdst += 16;
1128 dst = (
Sint8 *) mmdst;
1133 const float sample = *
src;
1134 if (sample >= 1.0
f) {
1136 }
else if (sample <= -1.0
f) {
1139 *dst = (
Sint8)(sample * 127.0
f);
1153 const float *
src = (
const float *) cvt->
buf;
1161 const float sample = *
src;
1162 if (sample >= 1.0
f) {
1164 }
else if (sample <= -1.0
f) {
1167 *dst = (
Uint8)((sample + 1.0
f) * 127.0f);
1171 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
1174 if ((((
size_t) src) & 15) == 0) {
1176 const float32x4_t
one = vdupq_n_f32(1.0
f);
1177 const float32x4_t negone = vdupq_n_f32(-1.0
f);
1178 const float32x4_t mulby127 = vdupq_n_f32(127.0
f);
1181 const uint32x4_t uints1 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src)), one), one), mulby127));
1182 const uint32x4_t uints2 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+4)), one), one), mulby127));
1183 const uint32x4_t uints3 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+8)), one), one), mulby127));
1184 const uint32x4_t uints4 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+12)), one), one), mulby127));
1185 const uint8x8_t ui8lo = vmovn_u16(vcombine_u16(vmovn_u32(uints1), vmovn_u32(uints2)));
1186 const uint8x8_t ui8hi = vmovn_u16(vcombine_u16(vmovn_u32(uints3), vmovn_u32(uints4)));
1187 vst1q_u8(mmdst, vcombine_u8(ui8lo, ui8hi));
1188 i -= 16; src += 16; mmdst += 16;
1191 dst = (
Uint8 *) mmdst;
1196 const float sample = *
src;
1197 if (sample >= 1.0
f) {
1199 }
else if (sample <= -1.0
f) {
1202 *dst = (
Uint8)((sample + 1.0
f) * 127.0f);
1216 const float *
src = (
const float *) cvt->
buf;
1224 const float sample = *
src;
1225 if (sample >= 1.0
f) {
1227 }
else if (sample <= -1.0
f) {
1230 *dst = (
Sint16)(sample * 32767.0
f);
1234 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
1237 if ((((
size_t) src) & 15) == 0) {
1239 const float32x4_t
one = vdupq_n_f32(1.0
f);
1240 const float32x4_t negone = vdupq_n_f32(-1.0
f);
1241 const float32x4_t mulby32767 = vdupq_n_f32(32767.0
f);
1244 const int32x4_t ints1 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src)), one), mulby32767));
1245 const int32x4_t ints2 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+4)), one), mulby32767));
1246 vst1q_s16(mmdst, vcombine_s16(vmovn_s32(ints1), vmovn_s32(ints2)));
1247 i -= 8; src += 8; mmdst += 8;
1254 const float sample = *
src;
1255 if (sample >= 1.0
f) {
1257 }
else if (sample <= -1.0
f) {
1260 *dst = (
Sint16)(sample * 32767.0
f);
1274 const float *
src = (
const float *) cvt->
buf;
1282 const float sample = *
src;
1283 if (sample >= 1.0
f) {
1285 }
else if (sample <= -1.0
f) {
1288 *dst = (
Uint16)((sample + 1.0
f) * 32767.0f);
1292 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
1295 if ((((
size_t) src) & 15) == 0) {
1297 const float32x4_t
one = vdupq_n_f32(1.0
f);
1298 const float32x4_t negone = vdupq_n_f32(-1.0
f);
1299 const float32x4_t mulby32767 = vdupq_n_f32(32767.0
f);
1302 const uint32x4_t uints1 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src)), one), one), mulby32767));
1303 const uint32x4_t uints2 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+4)), one), one), mulby32767));
1304 vst1q_u16(mmdst, vcombine_u16(vmovn_u32(uints1), vmovn_u32(uints2)));
1305 i -= 8; src += 8; mmdst += 8;
1312 const float sample = *
src;
1313 if (sample >= 1.0
f) {
1315 }
else if (sample <= -1.0
f) {
1318 *dst = (
Uint16)((sample + 1.0
f) * 32767.0f);
1332 const float *
src = (
const float *) cvt->
buf;
1340 const float sample = *
src;
1341 if (sample >= 1.0
f) {
1343 }
else if (sample <= -1.0
f) {
1346 *dst = ((
Sint32)(sample * 8388607.0
f)) << 8;
1350 SDL_assert(!i || ((((
size_t) dst) & 15) == 0));
1351 SDL_assert(!i || ((((
size_t) src) & 15) == 0));
1355 const float32x4_t
one = vdupq_n_f32(1.0
f);
1356 const float32x4_t negone = vdupq_n_f32(-1.0
f);
1357 const float32x4_t mulby8388607 = vdupq_n_f32(8388607.0
f);
1360 vst1q_s32(mmdst, vshlq_n_s32(vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src)), one), mulby8388607)), 8));
1361 i -= 4; src += 4; mmdst += 4;
1368 const float sample = *
src;
1369 if (sample >= 1.0
f) {
1371 }
else if (sample <= -1.0
f) {
1374 *dst = ((
Sint32)(sample * 8388607.0
f)) << 8;
1391 if (converters_chosen) {
1395 #define SET_CONVERTER_FUNCS(fntype) \ 1396 SDL_Convert_S8_to_F32 = SDL_Convert_S8_to_F32_##fntype; \ 1397 SDL_Convert_U8_to_F32 = SDL_Convert_U8_to_F32_##fntype; \ 1398 SDL_Convert_S16_to_F32 = SDL_Convert_S16_to_F32_##fntype; \ 1399 SDL_Convert_U16_to_F32 = SDL_Convert_U16_to_F32_##fntype; \ 1400 SDL_Convert_S32_to_F32 = SDL_Convert_S32_to_F32_##fntype; \ 1401 SDL_Convert_F32_to_S8 = SDL_Convert_F32_to_S8_##fntype; \ 1402 SDL_Convert_F32_to_U8 = SDL_Convert_F32_to_U8_##fntype; \ 1403 SDL_Convert_F32_to_S16 = SDL_Convert_F32_to_S16_##fntype; \ 1404 SDL_Convert_F32_to_U16 = SDL_Convert_F32_to_U16_##fntype; \ 1405 SDL_Convert_F32_to_S32 = SDL_Convert_F32_to_S32_##fntype; \ 1406 converters_chosen = SDL_TRUE 1408 #if HAVE_SSE2_INTRINSICS 1415 #if HAVE_NEON_INTRINSICS 1422 #if NEED_SCALAR_CONVERTER_FALLBACKS 1426 #undef SET_CONVERTER_FUNCS #define LOG_DEBUG_CONVERT(from, to)
static void SDL_Convert_F32_to_S32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
SDL_AudioFilter SDL_Convert_F32_to_U16
SDL_AudioFilter SDL_Convert_F32_to_S16
void SDL_ChooseAudioConverters(void)
SDL_AudioFilter SDL_Convert_U8_to_F32
static void SDL_Convert_S32_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
Uint16 SDL_AudioFormat
Audio format flags.
SDL_AudioFilter SDL_Convert_F32_to_U8
A structure to hold a set of audio conversion filters and buffers.
static void SDL_Convert_U16_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
SDL_AudioFilter SDL_Convert_S16_to_F32
GLint GLint GLsizei GLsizei GLsizei GLint GLenum format
SDL_AudioFilter filters[SDL_AUDIOCVT_MAX_FILTERS+1]
static void SDL_Convert_F32_to_U16_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
static void SDL_Convert_S16_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
static void SDL_Convert_U8_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
static void SDL_Convert_S8_to_F32_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
static void SDL_Convert_F32_to_S8_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
void(* SDL_AudioFilter)(struct SDL_AudioCVT *cvt, SDL_AudioFormat format)
return Display return Display Bool Bool int int int return Display XEvent Bool(*) XPointer return Display return Display Drawable _Xconst char unsigned int unsigned int return Display Pixmap Pixmap XColor XColor unsigned int unsigned int return Display _Xconst char char int char return Display Visual unsigned int int int char unsigned int unsigned int in i)
#define SDL_assert(condition)
static void SDL_Convert_F32_to_S16_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
SDL_AudioFilter SDL_Convert_S8_to_F32
SDL_AudioFilter SDL_Convert_F32_to_S32
SDL_AudioFilter SDL_Convert_F32_to_S8
static void SDL_Convert_F32_to_U8_Scalar(SDL_AudioCVT *cvt, SDL_AudioFormat format)
SDL_AudioFilter SDL_Convert_U16_to_F32
GLboolean GLboolean GLboolean GLboolean a
SDL_AudioFilter SDL_Convert_S32_to_F32
GLboolean GLboolean GLboolean b
#define SET_CONVERTER_FUNCS(fntype)