8#include <private/qsimd_p.h>
730#if defined(__SSSE3__)
731using ShuffleMask =
uchar[16];
732alignas(16)
static const ShuffleMask shuffleMasks[3] = {
734 {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14},
736 {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12},
738 {7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8}
742 const __m128i *shuffleMaskPtr)
noexcept
745 const __m128i shuffleMask = _mm_load_si128(shuffleMaskPtr);
748 const __m256i shuffleMask256 = _mm256_inserti128_si256(_mm256_castsi128_si256(shuffleMask), shuffleMask, 1);
749 for ( ;
i +
sizeof(__m256i) <= bytes;
i +=
sizeof(__m256i)) {
750 __m256i
data = _mm256_loadu_si256(
reinterpret_cast<const __m256i *
>(
src +
i));
751 data = _mm256_shuffle_epi8(
data, shuffleMask256);
752 _mm256_storeu_si256(
reinterpret_cast<__m256i *
>(
dst +
i),
data);
755 for ( ;
i + 2 *
sizeof(__m128i) <= bytes;
i += 2 *
sizeof(__m128i)) {
756 __m128i data1 = _mm_loadu_si128(
reinterpret_cast<const __m128i *
>(
src +
i));
757 __m128i data2 = _mm_loadu_si128(
reinterpret_cast<const __m128i *
>(
src +
i) + 1);
758 data1 = _mm_shuffle_epi8(data1, shuffleMask);
759 data2 = _mm_shuffle_epi8(data2, shuffleMask);
760 _mm_storeu_si128(
reinterpret_cast<__m128i *
>(
dst +
i), data1);
761 _mm_storeu_si128(
reinterpret_cast<__m128i *
>(
dst +
i) + 1, data2);
765 if (
i +
sizeof(__m128i) <= bytes) {
766 __m128i
data = _mm_loadu_si128(
reinterpret_cast<const __m128i *
>(
src +
i));
767 data = _mm_shuffle_epi8(
data, shuffleMask);
768 _mm_storeu_si128(
reinterpret_cast<__m128i *
>(
dst +
i),
data);
769 i +=
sizeof(__m128i);
778 auto shuffleMaskPtr =
reinterpret_cast<const __m128i *
>(shuffleMasks[0]);
780 size_t i = sseSwapLoop(
src, bytes,
dst, shuffleMaskPtr);
783 for (
size_t _i = 0;
i < bytes && _i <
sizeof(__m128i);
i +=
sizeof(T), _i +=
sizeof(T))
789#elif defined(__SSE2__)
790template <
typename T>
static
798template <>
size_t simdSwapLoop<quint16>(
const uchar *
src,
size_t bytes,
uchar *
dst)
noexcept
800 auto swapEndian = [](__m128i &
data) {
801 __m128i lows = _mm_srli_epi16(
data, 8);
802 __m128i highs = _mm_slli_epi16(
data, 8);
803 data = _mm_xor_si128(lows, highs);
807 for ( ;
i + 2 *
sizeof(__m128i) <= bytes;
i += 2 *
sizeof(__m128i)) {
808 __m128i data1 = _mm_loadu_si128(
reinterpret_cast<const __m128i *
>(
src +
i));
809 __m128i data2 = _mm_loadu_si128(
reinterpret_cast<const __m128i *
>(
src +
i) + 1);
812 _mm_storeu_si128(
reinterpret_cast<__m128i *
>(
dst +
i), data1);
813 _mm_storeu_si128(
reinterpret_cast<__m128i *
>(
dst +
i) + 1, data2);
816 if (
i +
sizeof(__m128i) <= bytes) {
817 __m128i
data = _mm_loadu_si128(
reinterpret_cast<const __m128i *
>(
src +
i));
819 _mm_storeu_si128(
reinterpret_cast<__m128i *
>(
dst +
i),
data);
820 i +=
sizeof(__m128i);
824 for (
size_t _i = 0 ;
i < bytes && _i <
sizeof(__m128i);
i +=
sizeof(
quint16), _i +=
sizeof(
quint16))
852 size_t i = simdSwapLoop<T>(
src,
n,
dst);
854 for (;
i <
n;
i +=
sizeof(T))
865 return bswapLoop<quint16>(
src,
n << 1,
dst);
874 return bswapLoop<quint32>(
src,
n << 2,
dst);
883 return bswapLoop<quint64>(
src,
n << 3,
dst);
Combined button and popup list for selecting options.
constexpr uint qCountTrailingZeroBits(quint32 v) noexcept
void * qbswap< 8 >(const void *source, qsizetype n, void *dest) noexcept
void * qbswap< 4 >(const void *source, qsizetype n, void *dest) noexcept
static Q_ALWAYS_INLINE size_t simdSwapLoop(const uchar *, size_t, uchar *) noexcept
static Q_ALWAYS_INLINE void * bswapLoop(const uchar *src, size_t n, uchar *dst) noexcept
void * qbswap< 2 >(const void *source, qsizetype n, void *dest) noexcept
constexpr T qbswap(T source)
GLint GLsizei GLsizei GLenum GLenum GLsizei void * data
GLsizei GLsizei GLchar * source