5#include <private/qdrawhelper_x86_p.h>
7#if defined(QT_COMPILER_SUPPORTS_SSSE3)
9#include <private/qdrawingprimitive_sse2_p.h>
16#define BLENDING_LOOP(palignrOffset, length)\
17 for (; x-minusOffsetToAlignSrcOn16Bytes < length-7; x += 4) { \
18 const __m128i srcVectorLastLoaded = _mm_load_si128((const __m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes + 4]);\
19 const __m128i srcVector = _mm_alignr_epi8(srcVectorLastLoaded, srcVectorPrevLoaded, palignrOffset); \
20 const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \
21 if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \
22 _mm_store_si128((__m128i *)&dst[x], srcVector); \
23 } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \
24 __m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); \
25 alphaChannel = _mm_sub_epi16(one, alphaChannel); \
26 const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \
27 __m128i destMultipliedByOneMinusAlpha; \
28 BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \
29 const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \
30 _mm_store_si128((__m128i *)&dst[x], result); \
32 srcVectorPrevLoaded = srcVectorLastLoaded;\
49 __m128i nullVector, __m128i half, __m128i one, __m128i colorMask, __m128i alphaMask)
58 const int minusOffsetToAlignSrcOn16Bytes = (
reinterpret_cast<quintptr>(&(
src[
x])) >> 2) & 0x3;
60 if (!minusOffsetToAlignSrcOn16Bytes) {
63 const __m128i alphaShuffleMask = _mm_set_epi8(
char(0xff),15,
char(0xff),15,
char(0xff),11,
char(0xff),11,
char(0xff),7,
char(0xff),7,
char(0xff),3,
char(0xff),3);
65 const __m128i srcVector = _mm_load_si128((
const __m128i *)&
src[
x]);
66 const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask);
67 if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) {
68 _mm_store_si128((__m128i *)&
dst[
x], srcVector);
69 }
else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) {
70 __m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask);
71 alphaChannel = _mm_sub_epi16(one, alphaChannel);
72 const __m128i dstVector = _mm_load_si128((__m128i *)&
dst[
x]);
73 __m128i destMultipliedByOneMinusAlpha;
74 BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half);
75 const __m128i
result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha);
81 __m128i srcVectorPrevLoaded = _mm_load_si128((
const __m128i *)&
src[
x - minusOffsetToAlignSrcOn16Bytes]);
82 const int palignrOffset = minusOffsetToAlignSrcOn16Bytes << 2;
84 const __m128i alphaShuffleMask = _mm_set_epi8(
char(0xff),15,
char(0xff),15,
char(0xff),11,
char(0xff),11,
char(0xff),7,
char(0xff),7,
char(0xff),3,
char(0xff),3);
85 switch (palignrOffset) {
101void qt_blend_argb32_on_argb32_ssse3(
uchar *destPixels,
int dbpl,
102 const uchar *srcPixels,
int sbpl,
108 if (const_alpha == 256) {
109 const __m128i alphaMask = _mm_set1_epi32(0xff000000);
110 const __m128i nullVector = _mm_setzero_si128();
111 const __m128i half = _mm_set1_epi16(0x80);
112 const __m128i one = _mm_set1_epi16(0xff);
113 const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
115 for (
int y = 0;
y <
h; ++
y) {
116 BLEND_SOURCE_OVER_ARGB32_SSSE3(
dst,
src,
w, nullVector, half, one, colorMask, alphaMask);
120 }
else if (const_alpha != 0) {
124 const_alpha = (const_alpha * 255) >> 8;
125 const __m128i nullVector = _mm_setzero_si128();
126 const __m128i half = _mm_set1_epi16(0x80);
127 const __m128i one = _mm_set1_epi16(0xff);
128 const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
129 const __m128i constAlphaVector = _mm_set1_epi16(const_alpha);
130 for (
int y = 0;
y <
h; ++
y) {
131 BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2(
dst,
src,
w, nullVector, half, one, colorMask, constAlphaVector)
160 __m128i
m = _mm_cvtsi32_si128(
v);
163 constexpr uchar x = 2,
y = 1,
z = 0;
164 alignas(__m128i)
static const uchar
165 shuffleMask[16 + 1] = {
x,
y,
z,
x,
y,
z,
x,
y,
z,
x,
y,
z,
x,
y,
z,
x,
y };
167 __m128i mval1 = _mm_shuffle_epi8(
m, _mm_load_si128(
reinterpret_cast<const __m128i *
>(shuffleMask)));
168 __m128i mval2 = _mm_shuffle_epi8(
m, _mm_loadu_si128(
reinterpret_cast<const __m128i *
>(shuffleMask + 1)));
169 __m128i mval3 = _mm_alignr_epi8(mval2, mval1, 2);
171 for ( ; dest + 16 <=
end; dest += 16) {
174 __m256 mval12 = _mm256_castps128_ps256(_mm_castsi128_ps(mval1));
175 mval12 = _mm256_insertf128_ps(mval12, _mm_castsi128_ps(mval2), 1);
176 _mm256_storeu_ps(
reinterpret_cast<float *
>(dest), mval12);
178 _mm_storeu_si128(
reinterpret_cast<__m128i *
>(dest) + 0, mval1);
179 _mm_storeu_si128(
reinterpret_cast<__m128i *
>(dest) + 1, mval2);
181 _mm_storeu_si128(
reinterpret_cast<__m128i *
>(dest) + 2, mval3);
198 _mm_storeu_si128(
reinterpret_cast<__m128i *
>(
ptr) + 0, mval1);
199 _mm_storel_epi64(
reinterpret_cast<__m128i *
>(
ptr) + 1, mval2);
208 _mm_storeu_si128(
reinterpret_cast<__m128i *
>(
ptr) , mval1);
209 }
else if (
left >= 8) {
211 _mm_storel_epi64(
reinterpret_cast<__m128i *
>(
ptr), mval1);
217 _mm_storel_epi64(
reinterpret_cast<__m128i *
>(ptr_end - 8), mval2);
225 const static __m128i shuffleMask1 = _mm_setr_epi8(2, 1, 0, 5, 4, 3, 8, 7, 6, 11, 10, 9, 14, 13, 12, 15);
226 const static __m128i shuffleMask2 = _mm_setr_epi8(0, 1, 4, 3, 2, 7, 6, 5, 10, 9, 8, 13, 12, 11, 14, 15);
227 const static __m128i shuffleMask3 = _mm_setr_epi8(0, 3, 2, 1, 6, 5, 4, 9, 8, 7, 12, 11, 10, 15, 14, 13);
229 for (;
i + 15 <
count;
i += 16) {
230 __m128i
s1 = _mm_loadu_si128((
const __m128i *)
src);
231 __m128i
s2 = _mm_loadu_si128((
const __m128i *)(
src + 16));
232 __m128i
s3 = _mm_loadu_si128((
const __m128i *)(
src + 32));
233 s1 = _mm_shuffle_epi8(
s1, shuffleMask1);
234 s2 = _mm_shuffle_epi8(
s2, shuffleMask2);
235 s3 = _mm_shuffle_epi8(
s3, shuffleMask3);
236 _mm_storeu_si128((__m128i *)
dst,
s1);
237 _mm_storeu_si128((__m128i *)(
dst + 16),
s2);
238 _mm_storeu_si128((__m128i *)(
dst + 32),
s3);
241 std::swap(
dst[15],
dst[17]);
242 std::swap(
dst[30],
dst[32]);
258 std::swap(
dst[0],
dst[2]);
Combined button and popup list for selecting options.
static void blend_pixel(quint32 &dst, const quint32 src)
#define Q_DECL_VECTORCALL
Q_GUI_EXPORT void QT_FASTCALL qt_convert_rgb888_to_rgb32_ssse3(quint32 *dst, const uchar *src, int len)
static ControlElement< T > * ptr(QWidget *widget)
GLsizei const GLfloat * v
[13]
GLuint GLfloat GLfloat GLfloat GLfloat GLfloat z
GLint GLint GLint GLint GLint x
[0]
GLfloat GLfloat GLfloat w
[0]
GLuint GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat s1
GLenum GLuint GLenum GLsizei length
GLenum GLenum GLsizei count
GLint GLsizei GLsizei GLenum GLenum GLsizei void * data
GLfloat GLfloat GLfloat GLfloat h
#define ALIGNMENT_PROLOGUE_16BYTES(ptr, i, length)
#define SIMD_EPILOGUE(i, length, max)