Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
14 pmbaty 1
/*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------===
2
 *
3
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
 * See https://llvm.org/LICENSE.txt for license information.
5
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
 *
7
 *===-----------------------------------------------------------------------===
8
 */
9
 
10
#ifndef __TMMINTRIN_H
11
#define __TMMINTRIN_H
12
 
13
#if !defined(__i386__) && !defined(__x86_64__)
14
#error "This header is only meant to be used on x86 and x64 architecture"
15
#endif
16
 
17
#include <pmmintrin.h>
18
 
19
/* Define the default attributes for the functions in this file. */
20
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3"), __min_vector_width__(64)))
21
#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,ssse3"), __min_vector_width__(64)))
22
 
23
/// Computes the absolute value of each of the packed 8-bit signed
24
///    integers in the source operand and stores the 8-bit unsigned integer
25
///    results in the destination.
26
///
27
/// \headerfile <x86intrin.h>
28
///
29
/// This intrinsic corresponds to the \c PABSB instruction.
30
///
31
/// \param __a
32
///    A 64-bit vector of [8 x i8].
33
/// \returns A 64-bit integer vector containing the absolute values of the
34
///    elements in the operand.
35
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
36
_mm_abs_pi8(__m64 __a)
37
{
38
    return (__m64)__builtin_ia32_pabsb((__v8qi)__a);
39
}
40
 
41
/// Computes the absolute value of each of the packed 8-bit signed
42
///    integers in the source operand and stores the 8-bit unsigned integer
43
///    results in the destination.
44
///
45
/// \headerfile <x86intrin.h>
46
///
47
/// This intrinsic corresponds to the \c VPABSB instruction.
48
///
49
/// \param __a
50
///    A 128-bit vector of [16 x i8].
51
/// \returns A 128-bit integer vector containing the absolute values of the
52
///    elements in the operand.
53
static __inline__ __m128i __DEFAULT_FN_ATTRS
54
_mm_abs_epi8(__m128i __a)
55
{
56
    return (__m128i)__builtin_elementwise_abs((__v16qs)__a);
57
}
58
 
59
/// Computes the absolute value of each of the packed 16-bit signed
60
///    integers in the source operand and stores the 16-bit unsigned integer
61
///    results in the destination.
62
///
63
/// \headerfile <x86intrin.h>
64
///
65
/// This intrinsic corresponds to the \c PABSW instruction.
66
///
67
/// \param __a
68
///    A 64-bit vector of [4 x i16].
69
/// \returns A 64-bit integer vector containing the absolute values of the
70
///    elements in the operand.
71
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
72
_mm_abs_pi16(__m64 __a)
73
{
74
    return (__m64)__builtin_ia32_pabsw((__v4hi)__a);
75
}
76
 
77
/// Computes the absolute value of each of the packed 16-bit signed
78
///    integers in the source operand and stores the 16-bit unsigned integer
79
///    results in the destination.
80
///
81
/// \headerfile <x86intrin.h>
82
///
83
/// This intrinsic corresponds to the \c VPABSW instruction.
84
///
85
/// \param __a
86
///    A 128-bit vector of [8 x i16].
87
/// \returns A 128-bit integer vector containing the absolute values of the
88
///    elements in the operand.
89
static __inline__ __m128i __DEFAULT_FN_ATTRS
90
_mm_abs_epi16(__m128i __a)
91
{
92
    return (__m128i)__builtin_elementwise_abs((__v8hi)__a);
93
}
94
 
95
/// Computes the absolute value of each of the packed 32-bit signed
96
///    integers in the source operand and stores the 32-bit unsigned integer
97
///    results in the destination.
98
///
99
/// \headerfile <x86intrin.h>
100
///
101
/// This intrinsic corresponds to the \c PABSD instruction.
102
///
103
/// \param __a
104
///    A 64-bit vector of [2 x i32].
105
/// \returns A 64-bit integer vector containing the absolute values of the
106
///    elements in the operand.
107
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
108
_mm_abs_pi32(__m64 __a)
109
{
110
    return (__m64)__builtin_ia32_pabsd((__v2si)__a);
111
}
112
 
113
/// Computes the absolute value of each of the packed 32-bit signed
114
///    integers in the source operand and stores the 32-bit unsigned integer
115
///    results in the destination.
116
///
117
/// \headerfile <x86intrin.h>
118
///
119
/// This intrinsic corresponds to the \c VPABSD instruction.
120
///
121
/// \param __a
122
///    A 128-bit vector of [4 x i32].
123
/// \returns A 128-bit integer vector containing the absolute values of the
124
///    elements in the operand.
125
static __inline__ __m128i __DEFAULT_FN_ATTRS
126
_mm_abs_epi32(__m128i __a)
127
{
128
    return (__m128i)__builtin_elementwise_abs((__v4si)__a);
129
}
130
 
131
/// Concatenates the two 128-bit integer vector operands, and
132
///    right-shifts the result by the number of bytes specified in the immediate
133
///    operand.
134
///
135
/// \headerfile <x86intrin.h>
136
///
137
/// \code
138
/// __m128i _mm_alignr_epi8(__m128i a, __m128i b, const int n);
139
/// \endcode
140
///
141
/// This intrinsic corresponds to the \c PALIGNR instruction.
142
///
143
/// \param a
144
///    A 128-bit vector of [16 x i8] containing one of the source operands.
145
/// \param b
146
///    A 128-bit vector of [16 x i8] containing one of the source operands.
147
/// \param n
148
///    An immediate operand specifying how many bytes to right-shift the result.
149
/// \returns A 128-bit integer vector containing the concatenated right-shifted
150
///    value.
151
#define _mm_alignr_epi8(a, b, n) \
152
  ((__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \
153
                                      (__v16qi)(__m128i)(b), (n)))
154
 
155
/// Concatenates the two 64-bit integer vector operands, and right-shifts
156
///    the result by the number of bytes specified in the immediate operand.
157
///
158
/// \headerfile <x86intrin.h>
159
///
160
/// \code
161
/// __m64 _mm_alignr_pi8(__m64 a, __m64 b, const int n);
162
/// \endcode
163
///
164
/// This intrinsic corresponds to the \c PALIGNR instruction.
165
///
166
/// \param a
167
///    A 64-bit vector of [8 x i8] containing one of the source operands.
168
/// \param b
169
///    A 64-bit vector of [8 x i8] containing one of the source operands.
170
/// \param n
171
///    An immediate operand specifying how many bytes to right-shift the result.
172
/// \returns A 64-bit integer vector containing the concatenated right-shifted
173
///    value.
174
#define _mm_alignr_pi8(a, b, n) \
175
  ((__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)))
176
 
177
/// Horizontally adds the adjacent pairs of values contained in 2 packed
178
///    128-bit vectors of [8 x i16].
179
///
180
/// \headerfile <x86intrin.h>
181
///
182
/// This intrinsic corresponds to the \c VPHADDW instruction.
183
///
184
/// \param __a
185
///    A 128-bit vector of [8 x i16] containing one of the source operands. The
186
///    horizontal sums of the values are stored in the lower bits of the
187
///    destination.
188
/// \param __b
189
///    A 128-bit vector of [8 x i16] containing one of the source operands. The
190
///    horizontal sums of the values are stored in the upper bits of the
191
///    destination.
192
/// \returns A 128-bit vector of [8 x i16] containing the horizontal sums of
193
///    both operands.
194
static __inline__ __m128i __DEFAULT_FN_ATTRS
195
_mm_hadd_epi16(__m128i __a, __m128i __b)
196
{
197
    return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);
198
}
199
 
200
/// Horizontally adds the adjacent pairs of values contained in 2 packed
201
///    128-bit vectors of [4 x i32].
202
///
203
/// \headerfile <x86intrin.h>
204
///
205
/// This intrinsic corresponds to the \c VPHADDD instruction.
206
///
207
/// \param __a
208
///    A 128-bit vector of [4 x i32] containing one of the source operands. The
209
///    horizontal sums of the values are stored in the lower bits of the
210
///    destination.
211
/// \param __b
212
///    A 128-bit vector of [4 x i32] containing one of the source operands. The
213
///    horizontal sums of the values are stored in the upper bits of the
214
///    destination.
215
/// \returns A 128-bit vector of [4 x i32] containing the horizontal sums of
216
///    both operands.
217
static __inline__ __m128i __DEFAULT_FN_ATTRS
218
_mm_hadd_epi32(__m128i __a, __m128i __b)
219
{
220
    return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);
221
}
222
 
223
/// Horizontally adds the adjacent pairs of values contained in 2 packed
224
///    64-bit vectors of [4 x i16].
225
///
226
/// \headerfile <x86intrin.h>
227
///
228
/// This intrinsic corresponds to the \c PHADDW instruction.
229
///
230
/// \param __a
231
///    A 64-bit vector of [4 x i16] containing one of the source operands. The
232
///    horizontal sums of the values are stored in the lower bits of the
233
///    destination.
234
/// \param __b
235
///    A 64-bit vector of [4 x i16] containing one of the source operands. The
236
///    horizontal sums of the values are stored in the upper bits of the
237
///    destination.
238
/// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both
239
///    operands.
240
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
241
_mm_hadd_pi16(__m64 __a, __m64 __b)
242
{
243
    return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b);
244
}
245
 
246
/// Horizontally adds the adjacent pairs of values contained in 2 packed
247
///    64-bit vectors of [2 x i32].
248
///
249
/// \headerfile <x86intrin.h>
250
///
251
/// This intrinsic corresponds to the \c PHADDD instruction.
252
///
253
/// \param __a
254
///    A 64-bit vector of [2 x i32] containing one of the source operands. The
255
///    horizontal sums of the values are stored in the lower bits of the
256
///    destination.
257
/// \param __b
258
///    A 64-bit vector of [2 x i32] containing one of the source operands. The
259
///    horizontal sums of the values are stored in the upper bits of the
260
///    destination.
261
/// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both
262
///    operands.
263
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
264
_mm_hadd_pi32(__m64 __a, __m64 __b)
265
{
266
    return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b);
267
}
268
 
269
/// Horizontally adds the adjacent pairs of values contained in 2 packed
270
///    128-bit vectors of [8 x i16]. Positive sums greater than 0x7FFF are
271
///    saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
272
///    0x8000.
273
///
274
/// \headerfile <x86intrin.h>
275
///
276
/// This intrinsic corresponds to the \c VPHADDSW instruction.
277
///
278
/// \param __a
279
///    A 128-bit vector of [8 x i16] containing one of the source operands. The
280
///    horizontal sums of the values are stored in the lower bits of the
281
///    destination.
282
/// \param __b
283
///    A 128-bit vector of [8 x i16] containing one of the source operands. The
284
///    horizontal sums of the values are stored in the upper bits of the
285
///    destination.
286
/// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
287
///    sums of both operands.
288
static __inline__ __m128i __DEFAULT_FN_ATTRS
289
_mm_hadds_epi16(__m128i __a, __m128i __b)
290
{
291
    return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
292
}
293
 
294
/// Horizontally adds the adjacent pairs of values contained in 2 packed
295
///    64-bit vectors of [4 x i16]. Positive sums greater than 0x7FFF are
296
///    saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
297
///    0x8000.
298
///
299
/// \headerfile <x86intrin.h>
300
///
301
/// This intrinsic corresponds to the \c PHADDSW instruction.
302
///
303
/// \param __a
304
///    A 64-bit vector of [4 x i16] containing one of the source operands. The
305
///    horizontal sums of the values are stored in the lower bits of the
306
///    destination.
307
/// \param __b
308
///    A 64-bit vector of [4 x i16] containing one of the source operands. The
309
///    horizontal sums of the values are stored in the upper bits of the
310
///    destination.
311
/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
312
///    sums of both operands.
313
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
314
_mm_hadds_pi16(__m64 __a, __m64 __b)
315
{
316
    return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b);
317
}
318
 
319
/// Horizontally subtracts the adjacent pairs of values contained in 2
320
///    packed 128-bit vectors of [8 x i16].
321
///
322
/// \headerfile <x86intrin.h>
323
///
324
/// This intrinsic corresponds to the \c VPHSUBW instruction.
325
///
326
/// \param __a
327
///    A 128-bit vector of [8 x i16] containing one of the source operands. The
328
///    horizontal differences between the values are stored in the lower bits of
329
///    the destination.
330
/// \param __b
331
///    A 128-bit vector of [8 x i16] containing one of the source operands. The
332
///    horizontal differences between the values are stored in the upper bits of
333
///    the destination.
334
/// \returns A 128-bit vector of [8 x i16] containing the horizontal differences
335
///    of both operands.
336
static __inline__ __m128i __DEFAULT_FN_ATTRS
337
_mm_hsub_epi16(__m128i __a, __m128i __b)
338
{
339
    return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);
340
}
341
 
342
/// Horizontally subtracts the adjacent pairs of values contained in 2
343
///    packed 128-bit vectors of [4 x i32].
344
///
345
/// \headerfile <x86intrin.h>
346
///
347
/// This intrinsic corresponds to the \c VPHSUBD instruction.
348
///
349
/// \param __a
350
///    A 128-bit vector of [4 x i32] containing one of the source operands. The
351
///    horizontal differences between the values are stored in the lower bits of
352
///    the destination.
353
/// \param __b
354
///    A 128-bit vector of [4 x i32] containing one of the source operands. The
355
///    horizontal differences between the values are stored in the upper bits of
356
///    the destination.
357
/// \returns A 128-bit vector of [4 x i32] containing the horizontal differences
358
///    of both operands.
359
static __inline__ __m128i __DEFAULT_FN_ATTRS
360
_mm_hsub_epi32(__m128i __a, __m128i __b)
361
{
362
    return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);
363
}
364
 
365
/// Horizontally subtracts the adjacent pairs of values contained in 2
366
///    packed 64-bit vectors of [4 x i16].
367
///
368
/// \headerfile <x86intrin.h>
369
///
370
/// This intrinsic corresponds to the \c PHSUBW instruction.
371
///
372
/// \param __a
373
///    A 64-bit vector of [4 x i16] containing one of the source operands. The
374
///    horizontal differences between the values are stored in the lower bits of
375
///    the destination.
376
/// \param __b
377
///    A 64-bit vector of [4 x i16] containing one of the source operands. The
378
///    horizontal differences between the values are stored in the upper bits of
379
///    the destination.
380
/// \returns A 64-bit vector of [4 x i16] containing the horizontal differences
381
///    of both operands.
382
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
383
_mm_hsub_pi16(__m64 __a, __m64 __b)
384
{
385
    return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b);
386
}
387
 
388
/// Horizontally subtracts the adjacent pairs of values contained in 2
389
///    packed 64-bit vectors of [2 x i32].
390
///
391
/// \headerfile <x86intrin.h>
392
///
393
/// This intrinsic corresponds to the \c PHSUBD instruction.
394
///
395
/// \param __a
396
///    A 64-bit vector of [2 x i32] containing one of the source operands. The
397
///    horizontal differences between the values are stored in the lower bits of
398
///    the destination.
399
/// \param __b
400
///    A 64-bit vector of [2 x i32] containing one of the source operands. The
401
///    horizontal differences between the values are stored in the upper bits of
402
///    the destination.
403
/// \returns A 64-bit vector of [2 x i32] containing the horizontal differences
404
///    of both operands.
405
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
406
_mm_hsub_pi32(__m64 __a, __m64 __b)
407
{
408
    return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b);
409
}
410
 
411
/// Horizontally subtracts the adjacent pairs of values contained in 2
412
///    packed 128-bit vectors of [8 x i16]. Positive differences greater than
413
///    0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are
414
///    saturated to 0x8000.
415
///
416
/// \headerfile <x86intrin.h>
417
///
418
/// This intrinsic corresponds to the \c VPHSUBSW instruction.
419
///
420
/// \param __a
421
///    A 128-bit vector of [8 x i16] containing one of the source operands. The
422
///    horizontal differences between the values are stored in the lower bits of
423
///    the destination.
424
/// \param __b
425
///    A 128-bit vector of [8 x i16] containing one of the source operands. The
426
///    horizontal differences between the values are stored in the upper bits of
427
///    the destination.
428
/// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
429
///    differences of both operands.
430
static __inline__ __m128i __DEFAULT_FN_ATTRS
431
_mm_hsubs_epi16(__m128i __a, __m128i __b)
432
{
433
    return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
434
}
435
 
436
/// Horizontally subtracts the adjacent pairs of values contained in 2
437
///    packed 64-bit vectors of [4 x i16]. Positive differences greater than
438
///    0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are
439
///    saturated to 0x8000.
440
///
441
/// \headerfile <x86intrin.h>
442
///
443
/// This intrinsic corresponds to the \c PHSUBSW instruction.
444
///
445
/// \param __a
446
///    A 64-bit vector of [4 x i16] containing one of the source operands. The
447
///    horizontal differences between the values are stored in the lower bits of
448
///    the destination.
449
/// \param __b
450
///    A 64-bit vector of [4 x i16] containing one of the source operands. The
451
///    horizontal differences between the values are stored in the upper bits of
452
///    the destination.
453
/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
454
///    differences of both operands.
455
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
456
_mm_hsubs_pi16(__m64 __a, __m64 __b)
457
{
458
    return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b);
459
}
460
 
461
/// Multiplies corresponding pairs of packed 8-bit unsigned integer
462
///    values contained in the first source operand and packed 8-bit signed
463
///    integer values contained in the second source operand, adds pairs of
464
///    contiguous products with signed saturation, and writes the 16-bit sums to
465
///    the corresponding bits in the destination.
466
///
467
///    For example, bits [7:0] of both operands are multiplied, bits [15:8] of
468
///    both operands are multiplied, and the sum of both results is written to
469
///    bits [15:0] of the destination.
470
///
471
/// \headerfile <x86intrin.h>
472
///
473
/// This intrinsic corresponds to the \c VPMADDUBSW instruction.
474
///
475
/// \param __a
476
///    A 128-bit integer vector containing the first source operand.
477
/// \param __b
478
///    A 128-bit integer vector containing the second source operand.
479
/// \returns A 128-bit integer vector containing the sums of products of both
480
///    operands: \n
481
///    \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n
482
///    \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
483
///    \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
484
///    \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7) \n
485
///    \a R4 := (\a __a8 * \a __b8) + (\a __a9 * \a __b9) \n
486
///    \a R5 := (\a __a10 * \a __b10) + (\a __a11 * \a __b11) \n
487
///    \a R6 := (\a __a12 * \a __b12) + (\a __a13 * \a __b13) \n
488
///    \a R7 := (\a __a14 * \a __b14) + (\a __a15 * \a __b15)
489
static __inline__ __m128i __DEFAULT_FN_ATTRS
490
_mm_maddubs_epi16(__m128i __a, __m128i __b)
491
{
492
    return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);
493
}
494
 
495
/// Multiplies corresponding pairs of packed 8-bit unsigned integer
496
///    values contained in the first source operand and packed 8-bit signed
497
///    integer values contained in the second source operand, adds pairs of
498
///    contiguous products with signed saturation, and writes the 16-bit sums to
499
///    the corresponding bits in the destination.
500
///
501
///    For example, bits [7:0] of both operands are multiplied, bits [15:8] of
502
///    both operands are multiplied, and the sum of both results is written to
503
///    bits [15:0] of the destination.
504
///
505
/// \headerfile <x86intrin.h>
506
///
507
/// This intrinsic corresponds to the \c PMADDUBSW instruction.
508
///
509
/// \param __a
510
///    A 64-bit integer vector containing the first source operand.
511
/// \param __b
512
///    A 64-bit integer vector containing the second source operand.
513
/// \returns A 64-bit integer vector containing the sums of products of both
514
///    operands: \n
515
///    \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n
516
///    \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
517
///    \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
518
///    \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7)
519
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
520
_mm_maddubs_pi16(__m64 __a, __m64 __b)
521
{
522
    return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b);
523
}
524
 
525
/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
526
///    products to the 18 most significant bits by right-shifting, rounds the
527
///    truncated value by adding 1, and writes bits [16:1] to the destination.
528
///
529
/// \headerfile <x86intrin.h>
530
///
531
/// This intrinsic corresponds to the \c VPMULHRSW instruction.
532
///
533
/// \param __a
534
///    A 128-bit vector of [8 x i16] containing one of the source operands.
535
/// \param __b
536
///    A 128-bit vector of [8 x i16] containing one of the source operands.
537
/// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled
538
///    products of both operands.
539
static __inline__ __m128i __DEFAULT_FN_ATTRS
540
_mm_mulhrs_epi16(__m128i __a, __m128i __b)
541
{
542
    return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
543
}
544
 
545
/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
546
///    products to the 18 most significant bits by right-shifting, rounds the
547
///    truncated value by adding 1, and writes bits [16:1] to the destination.
548
///
549
/// \headerfile <x86intrin.h>
550
///
551
/// This intrinsic corresponds to the \c PMULHRSW instruction.
552
///
553
/// \param __a
554
///    A 64-bit vector of [4 x i16] containing one of the source operands.
555
/// \param __b
556
///    A 64-bit vector of [4 x i16] containing one of the source operands.
557
/// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled
558
///    products of both operands.
559
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
560
_mm_mulhrs_pi16(__m64 __a, __m64 __b)
561
{
562
    return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b);
563
}
564
 
565
/// Copies the 8-bit integers from a 128-bit integer vector to the
566
///    destination or clears 8-bit values in the destination, as specified by
567
///    the second source operand.
568
///
569
/// \headerfile <x86intrin.h>
570
///
571
/// This intrinsic corresponds to the \c VPSHUFB instruction.
572
///
573
/// \param __a
574
///    A 128-bit integer vector containing the values to be copied.
575
/// \param __b
576
///    A 128-bit integer vector containing control bytes corresponding to
577
///    positions in the destination:
578
///    Bit 7: \n
579
///    1: Clear the corresponding byte in the destination. \n
580
///    0: Copy the selected source byte to the corresponding byte in the
581
///    destination. \n
582
///    Bits [6:4] Reserved.  \n
583
///    Bits [3:0] select the source byte to be copied.
584
/// \returns A 128-bit integer vector containing the copied or cleared values.
585
static __inline__ __m128i __DEFAULT_FN_ATTRS
586
_mm_shuffle_epi8(__m128i __a, __m128i __b)
587
{
588
    return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);
589
}
590
 
591
/// Copies the 8-bit integers from a 64-bit integer vector to the
592
///    destination or clears 8-bit values in the destination, as specified by
593
///    the second source operand.
594
///
595
/// \headerfile <x86intrin.h>
596
///
597
/// This intrinsic corresponds to the \c PSHUFB instruction.
598
///
599
/// \param __a
600
///    A 64-bit integer vector containing the values to be copied.
601
/// \param __b
602
///    A 64-bit integer vector containing control bytes corresponding to
603
///    positions in the destination:
604
///    Bit 7: \n
605
///    1: Clear the corresponding byte in the destination. \n
606
///    0: Copy the selected source byte to the corresponding byte in the
607
///    destination. \n
608
///    Bits [3:0] select the source byte to be copied.
609
/// \returns A 64-bit integer vector containing the copied or cleared values.
610
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
611
_mm_shuffle_pi8(__m64 __a, __m64 __b)
612
{
613
    return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b);
614
}
615
 
616
/// For each 8-bit integer in the first source operand, perform one of
617
///    the following actions as specified by the second source operand.
618
///
619
///    If the byte in the second source is negative, calculate the two's
620
///    complement of the corresponding byte in the first source, and write that
621
///    value to the destination. If the byte in the second source is positive,
622
///    copy the corresponding byte from the first source to the destination. If
623
///    the byte in the second source is zero, clear the corresponding byte in
624
///    the destination.
625
///
626
/// \headerfile <x86intrin.h>
627
///
628
/// This intrinsic corresponds to the \c VPSIGNB instruction.
629
///
630
/// \param __a
631
///    A 128-bit integer vector containing the values to be copied.
632
/// \param __b
633
///    A 128-bit integer vector containing control bytes corresponding to
634
///    positions in the destination.
635
/// \returns A 128-bit integer vector containing the resultant values.
636
static __inline__ __m128i __DEFAULT_FN_ATTRS
637
_mm_sign_epi8(__m128i __a, __m128i __b)
638
{
639
    return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
640
}
641
 
642
/// For each 16-bit integer in the first source operand, perform one of
643
///    the following actions as specified by the second source operand.
644
///
645
///    If the word in the second source is negative, calculate the two's
646
///    complement of the corresponding word in the first source, and write that
647
///    value to the destination. If the word in the second source is positive,
648
///    copy the corresponding word from the first source to the destination. If
649
///    the word in the second source is zero, clear the corresponding word in
650
///    the destination.
651
///
652
/// \headerfile <x86intrin.h>
653
///
654
/// This intrinsic corresponds to the \c VPSIGNW instruction.
655
///
656
/// \param __a
657
///    A 128-bit integer vector containing the values to be copied.
658
/// \param __b
659
///    A 128-bit integer vector containing control words corresponding to
660
///    positions in the destination.
661
/// \returns A 128-bit integer vector containing the resultant values.
662
static __inline__ __m128i __DEFAULT_FN_ATTRS
663
_mm_sign_epi16(__m128i __a, __m128i __b)
664
{
665
    return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
666
}
667
 
668
/// For each 32-bit integer in the first source operand, perform one of
669
///    the following actions as specified by the second source operand.
670
///
671
///    If the doubleword in the second source is negative, calculate the two's
672
///    complement of the corresponding word in the first source, and write that
673
///    value to the destination. If the doubleword in the second source is
674
///    positive, copy the corresponding word from the first source to the
675
///    destination. If the doubleword in the second source is zero, clear the
676
///    corresponding word in the destination.
677
///
678
/// \headerfile <x86intrin.h>
679
///
680
/// This intrinsic corresponds to the \c VPSIGND instruction.
681
///
682
/// \param __a
683
///    A 128-bit integer vector containing the values to be copied.
684
/// \param __b
685
///    A 128-bit integer vector containing control doublewords corresponding to
686
///    positions in the destination.
687
/// \returns A 128-bit integer vector containing the resultant values.
688
static __inline__ __m128i __DEFAULT_FN_ATTRS
689
_mm_sign_epi32(__m128i __a, __m128i __b)
690
{
691
    return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
692
}
693
 
694
/// For each 8-bit integer in the first source operand, perform one of
695
///    the following actions as specified by the second source operand.
696
///
697
///    If the byte in the second source is negative, calculate the two's
698
///    complement of the corresponding byte in the first source, and write that
699
///    value to the destination. If the byte in the second source is positive,
700
///    copy the corresponding byte from the first source to the destination. If
701
///    the byte in the second source is zero, clear the corresponding byte in
702
///    the destination.
703
///
704
/// \headerfile <x86intrin.h>
705
///
706
/// This intrinsic corresponds to the \c PSIGNB instruction.
707
///
708
/// \param __a
709
///    A 64-bit integer vector containing the values to be copied.
710
/// \param __b
711
///    A 64-bit integer vector containing control bytes corresponding to
712
///    positions in the destination.
713
/// \returns A 64-bit integer vector containing the resultant values.
714
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
715
_mm_sign_pi8(__m64 __a, __m64 __b)
716
{
717
    return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b);
718
}
719
 
720
/// For each 16-bit integer in the first source operand, perform one of
721
///    the following actions as specified by the second source operand.
722
///
723
///    If the word in the second source is negative, calculate the two's
724
///    complement of the corresponding word in the first source, and write that
725
///    value to the destination. If the word in the second source is positive,
726
///    copy the corresponding word from the first source to the destination. If
727
///    the word in the second source is zero, clear the corresponding word in
728
///    the destination.
729
///
730
/// \headerfile <x86intrin.h>
731
///
732
/// This intrinsic corresponds to the \c PSIGNW instruction.
733
///
734
/// \param __a
735
///    A 64-bit integer vector containing the values to be copied.
736
/// \param __b
737
///    A 64-bit integer vector containing control words corresponding to
738
///    positions in the destination.
739
/// \returns A 64-bit integer vector containing the resultant values.
740
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
741
_mm_sign_pi16(__m64 __a, __m64 __b)
742
{
743
    return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b);
744
}
745
 
746
/// For each 32-bit integer in the first source operand, perform one of
747
///    the following actions as specified by the second source operand.
748
///
749
///    If the doubleword in the second source is negative, calculate the two's
750
///    complement of the corresponding doubleword in the first source, and
751
///    write that value to the destination. If the doubleword in the second
752
///    source is positive, copy the corresponding doubleword from the first
753
///    source to the destination. If the doubleword in the second source is
754
///    zero, clear the corresponding doubleword in the destination.
755
///
756
/// \headerfile <x86intrin.h>
757
///
758
/// This intrinsic corresponds to the \c PSIGND instruction.
759
///
760
/// \param __a
761
///    A 64-bit integer vector containing the values to be copied.
762
/// \param __b
763
///    A 64-bit integer vector containing two control doublewords corresponding
764
///    to positions in the destination.
765
/// \returns A 64-bit integer vector containing the resultant values.
766
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
767
_mm_sign_pi32(__m64 __a, __m64 __b)
768
{
769
    return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b);
770
}
771
 
772
#undef __DEFAULT_FN_ATTRS
773
#undef __DEFAULT_FN_ATTRS_MMX
774
 
775
#endif /* __TMMINTRIN_H */