Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
14 | pmbaty | 1 | /*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------=== |
2 | * |
||
3 | * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
4 | * See https://llvm.org/LICENSE.txt for license information. |
||
5 | * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
6 | * |
||
7 | *===-----------------------------------------------------------------------=== |
||
8 | */ |
||
9 | #ifndef __IMMINTRIN_H |
||
10 | #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead." |
||
11 | #endif |
||
12 | |||
13 | #ifndef __AVX512FINTRIN_H |
||
14 | #define __AVX512FINTRIN_H |
||
15 | |||
16 | typedef char __v64qi __attribute__((__vector_size__(64))); |
||
17 | typedef short __v32hi __attribute__((__vector_size__(64))); |
||
18 | typedef double __v8df __attribute__((__vector_size__(64))); |
||
19 | typedef float __v16sf __attribute__((__vector_size__(64))); |
||
20 | typedef long long __v8di __attribute__((__vector_size__(64))); |
||
21 | typedef int __v16si __attribute__((__vector_size__(64))); |
||
22 | |||
23 | /* Unsigned types */ |
||
24 | typedef unsigned char __v64qu __attribute__((__vector_size__(64))); |
||
25 | typedef unsigned short __v32hu __attribute__((__vector_size__(64))); |
||
26 | typedef unsigned long long __v8du __attribute__((__vector_size__(64))); |
||
27 | typedef unsigned int __v16su __attribute__((__vector_size__(64))); |
||
28 | |||
29 | /* We need an explicitly signed variant for char. Note that this shouldn't |
||
30 | * appear in the interface though. */ |
||
31 | typedef signed char __v64qs __attribute__((__vector_size__(64))); |
||
32 | |||
33 | typedef float __m512 __attribute__((__vector_size__(64), __aligned__(64))); |
||
34 | typedef double __m512d __attribute__((__vector_size__(64), __aligned__(64))); |
||
35 | typedef long long __m512i __attribute__((__vector_size__(64), __aligned__(64))); |
||
36 | |||
37 | typedef float __m512_u __attribute__((__vector_size__(64), __aligned__(1))); |
||
38 | typedef double __m512d_u __attribute__((__vector_size__(64), __aligned__(1))); |
||
39 | typedef long long __m512i_u __attribute__((__vector_size__(64), __aligned__(1))); |
||
40 | |||
41 | typedef unsigned char __mmask8; |
||
42 | typedef unsigned short __mmask16; |
||
43 | |||
44 | /* Rounding mode macros. */ |
||
45 | #define _MM_FROUND_TO_NEAREST_INT 0x00 |
||
46 | #define _MM_FROUND_TO_NEG_INF 0x01 |
||
47 | #define _MM_FROUND_TO_POS_INF 0x02 |
||
48 | #define _MM_FROUND_TO_ZERO 0x03 |
||
49 | #define _MM_FROUND_CUR_DIRECTION 0x04 |
||
50 | |||
51 | /* Constants for integer comparison predicates */ |
||
52 | typedef enum { |
||
53 | _MM_CMPINT_EQ, /* Equal */ |
||
54 | _MM_CMPINT_LT, /* Less than */ |
||
55 | _MM_CMPINT_LE, /* Less than or Equal */ |
||
56 | _MM_CMPINT_UNUSED, |
||
57 | _MM_CMPINT_NE, /* Not Equal */ |
||
58 | _MM_CMPINT_NLT, /* Not Less than */ |
||
59 | #define _MM_CMPINT_GE _MM_CMPINT_NLT /* Greater than or Equal */ |
||
60 | _MM_CMPINT_NLE /* Not Less than or Equal */ |
||
61 | #define _MM_CMPINT_GT _MM_CMPINT_NLE /* Greater than */ |
||
62 | } _MM_CMPINT_ENUM; |
||
63 | |||
64 | typedef enum |
||
65 | { |
||
66 | _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02, |
||
67 | _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05, |
||
68 | _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08, |
||
69 | _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B, |
||
70 | _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E, |
||
71 | _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11, |
||
72 | _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14, |
||
73 | _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17, |
||
74 | _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A, |
||
75 | _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D, |
||
76 | _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20, |
||
77 | _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23, |
||
78 | _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26, |
||
79 | _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29, |
||
80 | _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C, |
||
81 | _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F, |
||
82 | _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32, |
||
83 | _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35, |
||
84 | _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38, |
||
85 | _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B, |
||
86 | _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E, |
||
87 | _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41, |
||
88 | _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44, |
||
89 | _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47, |
||
90 | _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A, |
||
91 | _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D, |
||
92 | _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50, |
||
93 | _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53, |
||
94 | _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56, |
||
95 | _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59, |
||
96 | _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C, |
||
97 | _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F, |
||
98 | _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62, |
||
99 | _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65, |
||
100 | _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68, |
||
101 | _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B, |
||
102 | _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E, |
||
103 | _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71, |
||
104 | _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74, |
||
105 | _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77, |
||
106 | _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A, |
||
107 | _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D, |
||
108 | _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80, |
||
109 | _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83, |
||
110 | _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86, |
||
111 | _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89, |
||
112 | _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C, |
||
113 | _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F, |
||
114 | _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92, |
||
115 | _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95, |
||
116 | _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98, |
||
117 | _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B, |
||
118 | _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E, |
||
119 | _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1, |
||
120 | _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4, |
||
121 | _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7, |
||
122 | _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA, |
||
123 | _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD, |
||
124 | _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0, |
||
125 | _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3, |
||
126 | _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6, |
||
127 | _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9, |
||
128 | _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC, |
||
129 | _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF, |
||
130 | _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2, |
||
131 | _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5, |
||
132 | _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8, |
||
133 | _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB, |
||
134 | _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE, |
||
135 | _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1, |
||
136 | _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4, |
||
137 | _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7, |
||
138 | _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA, |
||
139 | _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD, |
||
140 | _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0, |
||
141 | _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3, |
||
142 | _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6, |
||
143 | _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9, |
||
144 | _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC, |
||
145 | _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF, |
||
146 | _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2, |
||
147 | _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5, |
||
148 | _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8, |
||
149 | _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB, |
||
150 | _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE, |
||
151 | _MM_PERM_DDDD = 0xFF |
||
152 | } _MM_PERM_ENUM; |
||
153 | |||
154 | typedef enum |
||
155 | { |
||
156 | _MM_MANT_NORM_1_2, /* interval [1, 2) */ |
||
157 | _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */ |
||
158 | _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */ |
||
159 | _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */ |
||
160 | } _MM_MANTISSA_NORM_ENUM; |
||
161 | |||
162 | typedef enum |
||
163 | { |
||
164 | _MM_MANT_SIGN_src, /* sign = sign(SRC) */ |
||
165 | _MM_MANT_SIGN_zero, /* sign = 0 */ |
||
166 | _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */ |
||
167 | } _MM_MANTISSA_SIGN_ENUM; |
||
168 | |||
169 | /* Define the default attributes for the functions in this file. */ |
||
170 | #define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(512))) |
||
171 | #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(128))) |
||
172 | #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f"))) |
||
173 | |||
174 | /* Create vectors with repeated elements */ |
||
175 | |||
176 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
177 | _mm512_setzero_si512(void) |
||
178 | { |
||
179 | return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; |
||
180 | } |
||
181 | |||
182 | #define _mm512_setzero_epi32 _mm512_setzero_si512 |
||
183 | |||
184 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
185 | _mm512_undefined_pd(void) |
||
186 | { |
||
187 | return (__m512d)__builtin_ia32_undef512(); |
||
188 | } |
||
189 | |||
190 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
191 | _mm512_undefined(void) |
||
192 | { |
||
193 | return (__m512)__builtin_ia32_undef512(); |
||
194 | } |
||
195 | |||
196 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
197 | _mm512_undefined_ps(void) |
||
198 | { |
||
199 | return (__m512)__builtin_ia32_undef512(); |
||
200 | } |
||
201 | |||
202 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
203 | _mm512_undefined_epi32(void) |
||
204 | { |
||
205 | return (__m512i)__builtin_ia32_undef512(); |
||
206 | } |
||
207 | |||
208 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
209 | _mm512_broadcastd_epi32 (__m128i __A) |
||
210 | { |
||
211 | return (__m512i)__builtin_shufflevector((__v4si) __A, (__v4si) __A, |
||
212 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
||
213 | } |
||
214 | |||
215 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
216 | _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A) |
||
217 | { |
||
218 | return (__m512i)__builtin_ia32_selectd_512(__M, |
||
219 | (__v16si) _mm512_broadcastd_epi32(__A), |
||
220 | (__v16si) __O); |
||
221 | } |
||
222 | |||
223 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
224 | _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A) |
||
225 | { |
||
226 | return (__m512i)__builtin_ia32_selectd_512(__M, |
||
227 | (__v16si) _mm512_broadcastd_epi32(__A), |
||
228 | (__v16si) _mm512_setzero_si512()); |
||
229 | } |
||
230 | |||
231 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
232 | _mm512_broadcastq_epi64 (__m128i __A) |
||
233 | { |
||
234 | return (__m512i)__builtin_shufflevector((__v2di) __A, (__v2di) __A, |
||
235 | 0, 0, 0, 0, 0, 0, 0, 0); |
||
236 | } |
||
237 | |||
238 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
239 | _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A) |
||
240 | { |
||
241 | return (__m512i)__builtin_ia32_selectq_512(__M, |
||
242 | (__v8di) _mm512_broadcastq_epi64(__A), |
||
243 | (__v8di) __O); |
||
244 | |||
245 | } |
||
246 | |||
247 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
248 | _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) |
||
249 | { |
||
250 | return (__m512i)__builtin_ia32_selectq_512(__M, |
||
251 | (__v8di) _mm512_broadcastq_epi64(__A), |
||
252 | (__v8di) _mm512_setzero_si512()); |
||
253 | } |
||
254 | |||
255 | |||
256 | static __inline __m512 __DEFAULT_FN_ATTRS512 |
||
257 | _mm512_setzero_ps(void) |
||
258 | { |
||
259 | return __extension__ (__m512){ 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, |
||
260 | 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f }; |
||
261 | } |
||
262 | |||
263 | #define _mm512_setzero _mm512_setzero_ps |
||
264 | |||
265 | static __inline __m512d __DEFAULT_FN_ATTRS512 |
||
266 | _mm512_setzero_pd(void) |
||
267 | { |
||
268 | return __extension__ (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; |
||
269 | } |
||
270 | |||
271 | static __inline __m512 __DEFAULT_FN_ATTRS512 |
||
272 | _mm512_set1_ps(float __w) |
||
273 | { |
||
274 | return __extension__ (__m512){ __w, __w, __w, __w, __w, __w, __w, __w, |
||
275 | __w, __w, __w, __w, __w, __w, __w, __w }; |
||
276 | } |
||
277 | |||
278 | static __inline __m512d __DEFAULT_FN_ATTRS512 |
||
279 | _mm512_set1_pd(double __w) |
||
280 | { |
||
281 | return __extension__ (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w }; |
||
282 | } |
||
283 | |||
284 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
285 | _mm512_set1_epi8(char __w) |
||
286 | { |
||
287 | return __extension__ (__m512i)(__v64qi){ |
||
288 | __w, __w, __w, __w, __w, __w, __w, __w, |
||
289 | __w, __w, __w, __w, __w, __w, __w, __w, |
||
290 | __w, __w, __w, __w, __w, __w, __w, __w, |
||
291 | __w, __w, __w, __w, __w, __w, __w, __w, |
||
292 | __w, __w, __w, __w, __w, __w, __w, __w, |
||
293 | __w, __w, __w, __w, __w, __w, __w, __w, |
||
294 | __w, __w, __w, __w, __w, __w, __w, __w, |
||
295 | __w, __w, __w, __w, __w, __w, __w, __w }; |
||
296 | } |
||
297 | |||
298 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
299 | _mm512_set1_epi16(short __w) |
||
300 | { |
||
301 | return __extension__ (__m512i)(__v32hi){ |
||
302 | __w, __w, __w, __w, __w, __w, __w, __w, |
||
303 | __w, __w, __w, __w, __w, __w, __w, __w, |
||
304 | __w, __w, __w, __w, __w, __w, __w, __w, |
||
305 | __w, __w, __w, __w, __w, __w, __w, __w }; |
||
306 | } |
||
307 | |||
308 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
309 | _mm512_set1_epi32(int __s) |
||
310 | { |
||
311 | return __extension__ (__m512i)(__v16si){ |
||
312 | __s, __s, __s, __s, __s, __s, __s, __s, |
||
313 | __s, __s, __s, __s, __s, __s, __s, __s }; |
||
314 | } |
||
315 | |||
316 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
317 | _mm512_maskz_set1_epi32(__mmask16 __M, int __A) |
||
318 | { |
||
319 | return (__m512i)__builtin_ia32_selectd_512(__M, |
||
320 | (__v16si)_mm512_set1_epi32(__A), |
||
321 | (__v16si)_mm512_setzero_si512()); |
||
322 | } |
||
323 | |||
324 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
325 | _mm512_set1_epi64(long long __d) |
||
326 | { |
||
327 | return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d }; |
||
328 | } |
||
329 | |||
330 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
331 | _mm512_maskz_set1_epi64(__mmask8 __M, long long __A) |
||
332 | { |
||
333 | return (__m512i)__builtin_ia32_selectq_512(__M, |
||
334 | (__v8di)_mm512_set1_epi64(__A), |
||
335 | (__v8di)_mm512_setzero_si512()); |
||
336 | } |
||
337 | |||
338 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
339 | _mm512_broadcastss_ps(__m128 __A) |
||
340 | { |
||
341 | return (__m512)__builtin_shufflevector((__v4sf) __A, (__v4sf) __A, |
||
342 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
||
343 | } |
||
344 | |||
345 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
346 | _mm512_set4_epi32 (int __A, int __B, int __C, int __D) |
||
347 | { |
||
348 | return __extension__ (__m512i)(__v16si) |
||
349 | { __D, __C, __B, __A, __D, __C, __B, __A, |
||
350 | __D, __C, __B, __A, __D, __C, __B, __A }; |
||
351 | } |
||
352 | |||
353 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
354 | _mm512_set4_epi64 (long long __A, long long __B, long long __C, |
||
355 | long long __D) |
||
356 | { |
||
357 | return __extension__ (__m512i) (__v8di) |
||
358 | { __D, __C, __B, __A, __D, __C, __B, __A }; |
||
359 | } |
||
360 | |||
361 | static __inline __m512d __DEFAULT_FN_ATTRS512 |
||
362 | _mm512_set4_pd (double __A, double __B, double __C, double __D) |
||
363 | { |
||
364 | return __extension__ (__m512d) |
||
365 | { __D, __C, __B, __A, __D, __C, __B, __A }; |
||
366 | } |
||
367 | |||
368 | static __inline __m512 __DEFAULT_FN_ATTRS512 |
||
369 | _mm512_set4_ps (float __A, float __B, float __C, float __D) |
||
370 | { |
||
371 | return __extension__ (__m512) |
||
372 | { __D, __C, __B, __A, __D, __C, __B, __A, |
||
373 | __D, __C, __B, __A, __D, __C, __B, __A }; |
||
374 | } |
||
375 | |||
376 | #define _mm512_setr4_epi32(e0,e1,e2,e3) \ |
||
377 | _mm512_set4_epi32((e3),(e2),(e1),(e0)) |
||
378 | |||
379 | #define _mm512_setr4_epi64(e0,e1,e2,e3) \ |
||
380 | _mm512_set4_epi64((e3),(e2),(e1),(e0)) |
||
381 | |||
382 | #define _mm512_setr4_pd(e0,e1,e2,e3) \ |
||
383 | _mm512_set4_pd((e3),(e2),(e1),(e0)) |
||
384 | |||
385 | #define _mm512_setr4_ps(e0,e1,e2,e3) \ |
||
386 | _mm512_set4_ps((e3),(e2),(e1),(e0)) |
||
387 | |||
388 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
389 | _mm512_broadcastsd_pd(__m128d __A) |
||
390 | { |
||
391 | return (__m512d)__builtin_shufflevector((__v2df) __A, (__v2df) __A, |
||
392 | 0, 0, 0, 0, 0, 0, 0, 0); |
||
393 | } |
||
394 | |||
395 | /* Cast between vector types */ |
||
396 | |||
397 | static __inline __m512d __DEFAULT_FN_ATTRS512 |
||
398 | _mm512_castpd256_pd512(__m256d __a) |
||
399 | { |
||
400 | return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1); |
||
401 | } |
||
402 | |||
403 | static __inline __m512 __DEFAULT_FN_ATTRS512 |
||
404 | _mm512_castps256_ps512(__m256 __a) |
||
405 | { |
||
406 | return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, |
||
407 | -1, -1, -1, -1, -1, -1, -1, -1); |
||
408 | } |
||
409 | |||
410 | static __inline __m128d __DEFAULT_FN_ATTRS512 |
||
411 | _mm512_castpd512_pd128(__m512d __a) |
||
412 | { |
||
413 | return __builtin_shufflevector(__a, __a, 0, 1); |
||
414 | } |
||
415 | |||
416 | static __inline __m256d __DEFAULT_FN_ATTRS512 |
||
417 | _mm512_castpd512_pd256 (__m512d __A) |
||
418 | { |
||
419 | return __builtin_shufflevector(__A, __A, 0, 1, 2, 3); |
||
420 | } |
||
421 | |||
422 | static __inline __m128 __DEFAULT_FN_ATTRS512 |
||
423 | _mm512_castps512_ps128(__m512 __a) |
||
424 | { |
||
425 | return __builtin_shufflevector(__a, __a, 0, 1, 2, 3); |
||
426 | } |
||
427 | |||
428 | static __inline __m256 __DEFAULT_FN_ATTRS512 |
||
429 | _mm512_castps512_ps256 (__m512 __A) |
||
430 | { |
||
431 | return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7); |
||
432 | } |
||
433 | |||
434 | static __inline __m512 __DEFAULT_FN_ATTRS512 |
||
435 | _mm512_castpd_ps (__m512d __A) |
||
436 | { |
||
437 | return (__m512) (__A); |
||
438 | } |
||
439 | |||
440 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
441 | _mm512_castpd_si512 (__m512d __A) |
||
442 | { |
||
443 | return (__m512i) (__A); |
||
444 | } |
||
445 | |||
446 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
447 | _mm512_castpd128_pd512 (__m128d __A) |
||
448 | { |
||
449 | return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1); |
||
450 | } |
||
451 | |||
452 | static __inline __m512d __DEFAULT_FN_ATTRS512 |
||
453 | _mm512_castps_pd (__m512 __A) |
||
454 | { |
||
455 | return (__m512d) (__A); |
||
456 | } |
||
457 | |||
458 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
459 | _mm512_castps_si512 (__m512 __A) |
||
460 | { |
||
461 | return (__m512i) (__A); |
||
462 | } |
||
463 | |||
464 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
465 | _mm512_castps128_ps512 (__m128 __A) |
||
466 | { |
||
467 | return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); |
||
468 | } |
||
469 | |||
470 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
471 | _mm512_castsi128_si512 (__m128i __A) |
||
472 | { |
||
473 | return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1); |
||
474 | } |
||
475 | |||
476 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
477 | _mm512_castsi256_si512 (__m256i __A) |
||
478 | { |
||
479 | return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1); |
||
480 | } |
||
481 | |||
482 | static __inline __m512 __DEFAULT_FN_ATTRS512 |
||
483 | _mm512_castsi512_ps (__m512i __A) |
||
484 | { |
||
485 | return (__m512) (__A); |
||
486 | } |
||
487 | |||
488 | static __inline __m512d __DEFAULT_FN_ATTRS512 |
||
489 | _mm512_castsi512_pd (__m512i __A) |
||
490 | { |
||
491 | return (__m512d) (__A); |
||
492 | } |
||
493 | |||
494 | static __inline __m128i __DEFAULT_FN_ATTRS512 |
||
495 | _mm512_castsi512_si128 (__m512i __A) |
||
496 | { |
||
497 | return (__m128i)__builtin_shufflevector(__A, __A , 0, 1); |
||
498 | } |
||
499 | |||
500 | static __inline __m256i __DEFAULT_FN_ATTRS512 |
||
501 | _mm512_castsi512_si256 (__m512i __A) |
||
502 | { |
||
503 | return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3); |
||
504 | } |
||
505 | |||
506 | static __inline__ __mmask16 __DEFAULT_FN_ATTRS |
||
507 | _mm512_int2mask(int __a) |
||
508 | { |
||
509 | return (__mmask16)__a; |
||
510 | } |
||
511 | |||
512 | static __inline__ int __DEFAULT_FN_ATTRS |
||
513 | _mm512_mask2int(__mmask16 __a) |
||
514 | { |
||
515 | return (int)__a; |
||
516 | } |
||
517 | |||
518 | /// Constructs a 512-bit floating-point vector of [8 x double] from a |
||
519 | /// 128-bit floating-point vector of [2 x double]. The lower 128 bits |
||
520 | /// contain the value of the source vector. The upper 384 bits are set |
||
521 | /// to zero. |
||
522 | /// |
||
523 | /// \headerfile <x86intrin.h> |
||
524 | /// |
||
525 | /// This intrinsic has no corresponding instruction. |
||
526 | /// |
||
527 | /// \param __a |
||
528 | /// A 128-bit vector of [2 x double]. |
||
529 | /// \returns A 512-bit floating-point vector of [8 x double]. The lower 128 bits |
||
530 | /// contain the value of the parameter. The upper 384 bits are set to zero. |
||
531 | static __inline __m512d __DEFAULT_FN_ATTRS512 |
||
532 | _mm512_zextpd128_pd512(__m128d __a) |
||
533 | { |
||
534 | return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3); |
||
535 | } |
||
536 | |||
537 | /// Constructs a 512-bit floating-point vector of [8 x double] from a |
||
538 | /// 256-bit floating-point vector of [4 x double]. The lower 256 bits |
||
539 | /// contain the value of the source vector. The upper 256 bits are set |
||
540 | /// to zero. |
||
541 | /// |
||
542 | /// \headerfile <x86intrin.h> |
||
543 | /// |
||
544 | /// This intrinsic has no corresponding instruction. |
||
545 | /// |
||
546 | /// \param __a |
||
547 | /// A 256-bit vector of [4 x double]. |
||
548 | /// \returns A 512-bit floating-point vector of [8 x double]. The lower 256 bits |
||
549 | /// contain the value of the parameter. The upper 256 bits are set to zero. |
||
550 | static __inline __m512d __DEFAULT_FN_ATTRS512 |
||
551 | _mm512_zextpd256_pd512(__m256d __a) |
||
552 | { |
||
553 | return __builtin_shufflevector((__v4df)__a, (__v4df)_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7); |
||
554 | } |
||
555 | |||
556 | /// Constructs a 512-bit floating-point vector of [16 x float] from a |
||
557 | /// 128-bit floating-point vector of [4 x float]. The lower 128 bits contain |
||
558 | /// the value of the source vector. The upper 384 bits are set to zero. |
||
559 | /// |
||
560 | /// \headerfile <x86intrin.h> |
||
561 | /// |
||
562 | /// This intrinsic has no corresponding instruction. |
||
563 | /// |
||
564 | /// \param __a |
||
565 | /// A 128-bit vector of [4 x float]. |
||
566 | /// \returns A 512-bit floating-point vector of [16 x float]. The lower 128 bits |
||
567 | /// contain the value of the parameter. The upper 384 bits are set to zero. |
||
568 | static __inline __m512 __DEFAULT_FN_ATTRS512 |
||
569 | _mm512_zextps128_ps512(__m128 __a) |
||
570 | { |
||
571 | return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7); |
||
572 | } |
||
573 | |||
574 | /// Constructs a 512-bit floating-point vector of [16 x float] from a |
||
575 | /// 256-bit floating-point vector of [8 x float]. The lower 256 bits contain |
||
576 | /// the value of the source vector. The upper 256 bits are set to zero. |
||
577 | /// |
||
578 | /// \headerfile <x86intrin.h> |
||
579 | /// |
||
580 | /// This intrinsic has no corresponding instruction. |
||
581 | /// |
||
582 | /// \param __a |
||
583 | /// A 256-bit vector of [8 x float]. |
||
584 | /// \returns A 512-bit floating-point vector of [16 x float]. The lower 256 bits |
||
585 | /// contain the value of the parameter. The upper 256 bits are set to zero. |
||
586 | static __inline __m512 __DEFAULT_FN_ATTRS512 |
||
587 | _mm512_zextps256_ps512(__m256 __a) |
||
588 | { |
||
589 | return __builtin_shufflevector((__v8sf)__a, (__v8sf)_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
||
590 | } |
||
591 | |||
592 | /// Constructs a 512-bit integer vector from a 128-bit integer vector. |
||
593 | /// The lower 128 bits contain the value of the source vector. The upper |
||
594 | /// 384 bits are set to zero. |
||
595 | /// |
||
596 | /// \headerfile <x86intrin.h> |
||
597 | /// |
||
598 | /// This intrinsic has no corresponding instruction. |
||
599 | /// |
||
600 | /// \param __a |
||
601 | /// A 128-bit integer vector. |
||
602 | /// \returns A 512-bit integer vector. The lower 128 bits contain the value of |
||
603 | /// the parameter. The upper 384 bits are set to zero. |
||
604 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
605 | _mm512_zextsi128_si512(__m128i __a) |
||
606 | { |
||
607 | return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3); |
||
608 | } |
||
609 | |||
610 | /// Constructs a 512-bit integer vector from a 256-bit integer vector. |
||
611 | /// The lower 256 bits contain the value of the source vector. The upper |
||
612 | /// 256 bits are set to zero. |
||
613 | /// |
||
614 | /// \headerfile <x86intrin.h> |
||
615 | /// |
||
616 | /// This intrinsic has no corresponding instruction. |
||
617 | /// |
||
618 | /// \param __a |
||
619 | /// A 256-bit integer vector. |
||
620 | /// \returns A 512-bit integer vector. The lower 256 bits contain the value of |
||
621 | /// the parameter. The upper 256 bits are set to zero. |
||
622 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
623 | _mm512_zextsi256_si512(__m256i __a) |
||
624 | { |
||
625 | return __builtin_shufflevector((__v4di)__a, (__v4di)_mm256_setzero_si256(), 0, 1, 2, 3, 4, 5, 6, 7); |
||
626 | } |
||
627 | |||
628 | /* Bitwise operators */ |
||
629 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
630 | _mm512_and_epi32(__m512i __a, __m512i __b) |
||
631 | { |
||
632 | return (__m512i)((__v16su)__a & (__v16su)__b); |
||
633 | } |
||
634 | |||
635 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
636 | _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) |
||
637 | { |
||
638 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k, |
||
639 | (__v16si) _mm512_and_epi32(__a, __b), |
||
640 | (__v16si) __src); |
||
641 | } |
||
642 | |||
643 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
644 | _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b) |
||
645 | { |
||
646 | return (__m512i) _mm512_mask_and_epi32(_mm512_setzero_si512 (), |
||
647 | __k, __a, __b); |
||
648 | } |
||
649 | |||
650 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
651 | _mm512_and_epi64(__m512i __a, __m512i __b) |
||
652 | { |
||
653 | return (__m512i)((__v8du)__a & (__v8du)__b); |
||
654 | } |
||
655 | |||
656 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
657 | _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) |
||
658 | { |
||
659 | return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __k, |
||
660 | (__v8di) _mm512_and_epi64(__a, __b), |
||
661 | (__v8di) __src); |
||
662 | } |
||
663 | |||
664 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
665 | _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b) |
||
666 | { |
||
667 | return (__m512i) _mm512_mask_and_epi64(_mm512_setzero_si512 (), |
||
668 | __k, __a, __b); |
||
669 | } |
||
670 | |||
671 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
672 | _mm512_andnot_si512 (__m512i __A, __m512i __B) |
||
673 | { |
||
674 | return (__m512i)(~(__v8du)__A & (__v8du)__B); |
||
675 | } |
||
676 | |||
677 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
678 | _mm512_andnot_epi32 (__m512i __A, __m512i __B) |
||
679 | { |
||
680 | return (__m512i)(~(__v16su)__A & (__v16su)__B); |
||
681 | } |
||
682 | |||
683 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
684 | _mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) |
||
685 | { |
||
686 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, |
||
687 | (__v16si)_mm512_andnot_epi32(__A, __B), |
||
688 | (__v16si)__W); |
||
689 | } |
||
690 | |||
691 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
692 | _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B) |
||
693 | { |
||
694 | return (__m512i)_mm512_mask_andnot_epi32(_mm512_setzero_si512(), |
||
695 | __U, __A, __B); |
||
696 | } |
||
697 | |||
698 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
699 | _mm512_andnot_epi64(__m512i __A, __m512i __B) |
||
700 | { |
||
701 | return (__m512i)(~(__v8du)__A & (__v8du)__B); |
||
702 | } |
||
703 | |||
704 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
705 | _mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) |
||
706 | { |
||
707 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, |
||
708 | (__v8di)_mm512_andnot_epi64(__A, __B), |
||
709 | (__v8di)__W); |
||
710 | } |
||
711 | |||
712 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
713 | _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B) |
||
714 | { |
||
715 | return (__m512i)_mm512_mask_andnot_epi64(_mm512_setzero_si512(), |
||
716 | __U, __A, __B); |
||
717 | } |
||
718 | |||
719 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
720 | _mm512_or_epi32(__m512i __a, __m512i __b) |
||
721 | { |
||
722 | return (__m512i)((__v16su)__a | (__v16su)__b); |
||
723 | } |
||
724 | |||
725 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
726 | _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) |
||
727 | { |
||
728 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k, |
||
729 | (__v16si)_mm512_or_epi32(__a, __b), |
||
730 | (__v16si)__src); |
||
731 | } |
||
732 | |||
733 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
734 | _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b) |
||
735 | { |
||
736 | return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b); |
||
737 | } |
||
738 | |||
739 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
740 | _mm512_or_epi64(__m512i __a, __m512i __b) |
||
741 | { |
||
742 | return (__m512i)((__v8du)__a | (__v8du)__b); |
||
743 | } |
||
744 | |||
745 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
746 | _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) |
||
747 | { |
||
748 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k, |
||
749 | (__v8di)_mm512_or_epi64(__a, __b), |
||
750 | (__v8di)__src); |
||
751 | } |
||
752 | |||
753 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
754 | _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b) |
||
755 | { |
||
756 | return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b); |
||
757 | } |
||
758 | |||
759 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
760 | _mm512_xor_epi32(__m512i __a, __m512i __b) |
||
761 | { |
||
762 | return (__m512i)((__v16su)__a ^ (__v16su)__b); |
||
763 | } |
||
764 | |||
765 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
766 | _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) |
||
767 | { |
||
768 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k, |
||
769 | (__v16si)_mm512_xor_epi32(__a, __b), |
||
770 | (__v16si)__src); |
||
771 | } |
||
772 | |||
773 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
774 | _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b) |
||
775 | { |
||
776 | return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b); |
||
777 | } |
||
778 | |||
779 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
780 | _mm512_xor_epi64(__m512i __a, __m512i __b) |
||
781 | { |
||
782 | return (__m512i)((__v8du)__a ^ (__v8du)__b); |
||
783 | } |
||
784 | |||
785 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
786 | _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) |
||
787 | { |
||
788 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k, |
||
789 | (__v8di)_mm512_xor_epi64(__a, __b), |
||
790 | (__v8di)__src); |
||
791 | } |
||
792 | |||
793 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
794 | _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b) |
||
795 | { |
||
796 | return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b); |
||
797 | } |
||
798 | |||
799 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
800 | _mm512_and_si512(__m512i __a, __m512i __b) |
||
801 | { |
||
802 | return (__m512i)((__v8du)__a & (__v8du)__b); |
||
803 | } |
||
804 | |||
805 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
806 | _mm512_or_si512(__m512i __a, __m512i __b) |
||
807 | { |
||
808 | return (__m512i)((__v8du)__a | (__v8du)__b); |
||
809 | } |
||
810 | |||
811 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
812 | _mm512_xor_si512(__m512i __a, __m512i __b) |
||
813 | { |
||
814 | return (__m512i)((__v8du)__a ^ (__v8du)__b); |
||
815 | } |
||
816 | |||
817 | /* Arithmetic */ |
||
818 | |||
819 | static __inline __m512d __DEFAULT_FN_ATTRS512 |
||
820 | _mm512_add_pd(__m512d __a, __m512d __b) |
||
821 | { |
||
822 | return (__m512d)((__v8df)__a + (__v8df)__b); |
||
823 | } |
||
824 | |||
825 | static __inline __m512 __DEFAULT_FN_ATTRS512 |
||
826 | _mm512_add_ps(__m512 __a, __m512 __b) |
||
827 | { |
||
828 | return (__m512)((__v16sf)__a + (__v16sf)__b); |
||
829 | } |
||
830 | |||
831 | static __inline __m512d __DEFAULT_FN_ATTRS512 |
||
832 | _mm512_mul_pd(__m512d __a, __m512d __b) |
||
833 | { |
||
834 | return (__m512d)((__v8df)__a * (__v8df)__b); |
||
835 | } |
||
836 | |||
837 | static __inline __m512 __DEFAULT_FN_ATTRS512 |
||
838 | _mm512_mul_ps(__m512 __a, __m512 __b) |
||
839 | { |
||
840 | return (__m512)((__v16sf)__a * (__v16sf)__b); |
||
841 | } |
||
842 | |||
843 | static __inline __m512d __DEFAULT_FN_ATTRS512 |
||
844 | _mm512_sub_pd(__m512d __a, __m512d __b) |
||
845 | { |
||
846 | return (__m512d)((__v8df)__a - (__v8df)__b); |
||
847 | } |
||
848 | |||
849 | static __inline __m512 __DEFAULT_FN_ATTRS512 |
||
850 | _mm512_sub_ps(__m512 __a, __m512 __b) |
||
851 | { |
||
852 | return (__m512)((__v16sf)__a - (__v16sf)__b); |
||
853 | } |
||
854 | |||
855 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
856 | _mm512_add_epi64 (__m512i __A, __m512i __B) |
||
857 | { |
||
858 | return (__m512i) ((__v8du) __A + (__v8du) __B); |
||
859 | } |
||
860 | |||
861 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
862 | _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) |
||
863 | { |
||
864 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, |
||
865 | (__v8di)_mm512_add_epi64(__A, __B), |
||
866 | (__v8di)__W); |
||
867 | } |
||
868 | |||
869 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
870 | _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B) |
||
871 | { |
||
872 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, |
||
873 | (__v8di)_mm512_add_epi64(__A, __B), |
||
874 | (__v8di)_mm512_setzero_si512()); |
||
875 | } |
||
876 | |||
877 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
878 | _mm512_sub_epi64 (__m512i __A, __m512i __B) |
||
879 | { |
||
880 | return (__m512i) ((__v8du) __A - (__v8du) __B); |
||
881 | } |
||
882 | |||
883 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
884 | _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) |
||
885 | { |
||
886 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, |
||
887 | (__v8di)_mm512_sub_epi64(__A, __B), |
||
888 | (__v8di)__W); |
||
889 | } |
||
890 | |||
891 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
892 | _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B) |
||
893 | { |
||
894 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, |
||
895 | (__v8di)_mm512_sub_epi64(__A, __B), |
||
896 | (__v8di)_mm512_setzero_si512()); |
||
897 | } |
||
898 | |||
899 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
900 | _mm512_add_epi32 (__m512i __A, __m512i __B) |
||
901 | { |
||
902 | return (__m512i) ((__v16su) __A + (__v16su) __B); |
||
903 | } |
||
904 | |||
905 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
906 | _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) |
||
907 | { |
||
908 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, |
||
909 | (__v16si)_mm512_add_epi32(__A, __B), |
||
910 | (__v16si)__W); |
||
911 | } |
||
912 | |||
913 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
914 | _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B) |
||
915 | { |
||
916 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, |
||
917 | (__v16si)_mm512_add_epi32(__A, __B), |
||
918 | (__v16si)_mm512_setzero_si512()); |
||
919 | } |
||
920 | |||
921 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
922 | _mm512_sub_epi32 (__m512i __A, __m512i __B) |
||
923 | { |
||
924 | return (__m512i) ((__v16su) __A - (__v16su) __B); |
||
925 | } |
||
926 | |||
927 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
928 | _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) |
||
929 | { |
||
930 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, |
||
931 | (__v16si)_mm512_sub_epi32(__A, __B), |
||
932 | (__v16si)__W); |
||
933 | } |
||
934 | |||
935 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
936 | _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B) |
||
937 | { |
||
938 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, |
||
939 | (__v16si)_mm512_sub_epi32(__A, __B), |
||
940 | (__v16si)_mm512_setzero_si512()); |
||
941 | } |
||
942 | |||
943 | #define _mm512_max_round_pd(A, B, R) \ |
||
944 | ((__m512d)__builtin_ia32_maxpd512((__v8df)(__m512d)(A), \ |
||
945 | (__v8df)(__m512d)(B), (int)(R))) |
||
946 | |||
947 | #define _mm512_mask_max_round_pd(W, U, A, B, R) \ |
||
948 | ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ |
||
949 | (__v8df)_mm512_max_round_pd((A), (B), (R)), \ |
||
950 | (__v8df)(W))) |
||
951 | |||
952 | #define _mm512_maskz_max_round_pd(U, A, B, R) \ |
||
953 | ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ |
||
954 | (__v8df)_mm512_max_round_pd((A), (B), (R)), \ |
||
955 | (__v8df)_mm512_setzero_pd())) |
||
956 | |||
957 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
958 | _mm512_max_pd(__m512d __A, __m512d __B) |
||
959 | { |
||
960 | return (__m512d) __builtin_ia32_maxpd512((__v8df) __A, (__v8df) __B, |
||
961 | _MM_FROUND_CUR_DIRECTION); |
||
962 | } |
||
963 | |||
964 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
965 | _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) |
||
966 | { |
||
967 | return (__m512d)__builtin_ia32_selectpd_512(__U, |
||
968 | (__v8df)_mm512_max_pd(__A, __B), |
||
969 | (__v8df)__W); |
||
970 | } |
||
971 | |||
972 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
973 | _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B) |
||
974 | { |
||
975 | return (__m512d)__builtin_ia32_selectpd_512(__U, |
||
976 | (__v8df)_mm512_max_pd(__A, __B), |
||
977 | (__v8df)_mm512_setzero_pd()); |
||
978 | } |
||
979 | |||
980 | #define _mm512_max_round_ps(A, B, R) \ |
||
981 | ((__m512)__builtin_ia32_maxps512((__v16sf)(__m512)(A), \ |
||
982 | (__v16sf)(__m512)(B), (int)(R))) |
||
983 | |||
984 | #define _mm512_mask_max_round_ps(W, U, A, B, R) \ |
||
985 | ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ |
||
986 | (__v16sf)_mm512_max_round_ps((A), (B), (R)), \ |
||
987 | (__v16sf)(W))) |
||
988 | |||
989 | #define _mm512_maskz_max_round_ps(U, A, B, R) \ |
||
990 | ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ |
||
991 | (__v16sf)_mm512_max_round_ps((A), (B), (R)), \ |
||
992 | (__v16sf)_mm512_setzero_ps())) |
||
993 | |||
994 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
995 | _mm512_max_ps(__m512 __A, __m512 __B) |
||
996 | { |
||
997 | return (__m512) __builtin_ia32_maxps512((__v16sf) __A, (__v16sf) __B, |
||
998 | _MM_FROUND_CUR_DIRECTION); |
||
999 | } |
||
1000 | |||
1001 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
1002 | _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) |
||
1003 | { |
||
1004 | return (__m512)__builtin_ia32_selectps_512(__U, |
||
1005 | (__v16sf)_mm512_max_ps(__A, __B), |
||
1006 | (__v16sf)__W); |
||
1007 | } |
||
1008 | |||
1009 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
1010 | _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B) |
||
1011 | { |
||
1012 | return (__m512)__builtin_ia32_selectps_512(__U, |
||
1013 | (__v16sf)_mm512_max_ps(__A, __B), |
||
1014 | (__v16sf)_mm512_setzero_ps()); |
||
1015 | } |
||
1016 | |||
1017 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
1018 | _mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { |
||
1019 | return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A, |
||
1020 | (__v4sf) __B, |
||
1021 | (__v4sf) __W, |
||
1022 | (__mmask8) __U, |
||
1023 | _MM_FROUND_CUR_DIRECTION); |
||
1024 | } |
||
1025 | |||
1026 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
1027 | _mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) { |
||
1028 | return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A, |
||
1029 | (__v4sf) __B, |
||
1030 | (__v4sf) _mm_setzero_ps (), |
||
1031 | (__mmask8) __U, |
||
1032 | _MM_FROUND_CUR_DIRECTION); |
||
1033 | } |
||
1034 | |||
1035 | #define _mm_max_round_ss(A, B, R) \ |
||
1036 | ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ |
||
1037 | (__v4sf)(__m128)(B), \ |
||
1038 | (__v4sf)_mm_setzero_ps(), \ |
||
1039 | (__mmask8)-1, (int)(R))) |
||
1040 | |||
1041 | #define _mm_mask_max_round_ss(W, U, A, B, R) \ |
||
1042 | ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ |
||
1043 | (__v4sf)(__m128)(B), \ |
||
1044 | (__v4sf)(__m128)(W), (__mmask8)(U), \ |
||
1045 | (int)(R))) |
||
1046 | |||
1047 | #define _mm_maskz_max_round_ss(U, A, B, R) \ |
||
1048 | ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ |
||
1049 | (__v4sf)(__m128)(B), \ |
||
1050 | (__v4sf)_mm_setzero_ps(), \ |
||
1051 | (__mmask8)(U), (int)(R))) |
||
1052 | |||
1053 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
1054 | _mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { |
||
1055 | return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A, |
||
1056 | (__v2df) __B, |
||
1057 | (__v2df) __W, |
||
1058 | (__mmask8) __U, |
||
1059 | _MM_FROUND_CUR_DIRECTION); |
||
1060 | } |
||
1061 | |||
1062 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
1063 | _mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) { |
||
1064 | return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A, |
||
1065 | (__v2df) __B, |
||
1066 | (__v2df) _mm_setzero_pd (), |
||
1067 | (__mmask8) __U, |
||
1068 | _MM_FROUND_CUR_DIRECTION); |
||
1069 | } |
||
1070 | |||
1071 | #define _mm_max_round_sd(A, B, R) \ |
||
1072 | ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ |
||
1073 | (__v2df)(__m128d)(B), \ |
||
1074 | (__v2df)_mm_setzero_pd(), \ |
||
1075 | (__mmask8)-1, (int)(R))) |
||
1076 | |||
1077 | #define _mm_mask_max_round_sd(W, U, A, B, R) \ |
||
1078 | ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ |
||
1079 | (__v2df)(__m128d)(B), \ |
||
1080 | (__v2df)(__m128d)(W), \ |
||
1081 | (__mmask8)(U), (int)(R))) |
||
1082 | |||
1083 | #define _mm_maskz_max_round_sd(U, A, B, R) \ |
||
1084 | ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ |
||
1085 | (__v2df)(__m128d)(B), \ |
||
1086 | (__v2df)_mm_setzero_pd(), \ |
||
1087 | (__mmask8)(U), (int)(R))) |
||
1088 | |||
1089 | static __inline __m512i |
||
1090 | __DEFAULT_FN_ATTRS512 |
||
1091 | _mm512_max_epi32(__m512i __A, __m512i __B) |
||
1092 | { |
||
1093 | return (__m512i)__builtin_elementwise_max((__v16si)__A, (__v16si)__B); |
||
1094 | } |
||
1095 | |||
1096 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
1097 | _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) |
||
1098 | { |
||
1099 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, |
||
1100 | (__v16si)_mm512_max_epi32(__A, __B), |
||
1101 | (__v16si)__W); |
||
1102 | } |
||
1103 | |||
1104 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
1105 | _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B) |
||
1106 | { |
||
1107 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, |
||
1108 | (__v16si)_mm512_max_epi32(__A, __B), |
||
1109 | (__v16si)_mm512_setzero_si512()); |
||
1110 | } |
||
1111 | |||
1112 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
1113 | _mm512_max_epu32(__m512i __A, __m512i __B) |
||
1114 | { |
||
1115 | return (__m512i)__builtin_elementwise_max((__v16su)__A, (__v16su)__B); |
||
1116 | } |
||
1117 | |||
1118 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
1119 | _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) |
||
1120 | { |
||
1121 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, |
||
1122 | (__v16si)_mm512_max_epu32(__A, __B), |
||
1123 | (__v16si)__W); |
||
1124 | } |
||
1125 | |||
1126 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
1127 | _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B) |
||
1128 | { |
||
1129 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, |
||
1130 | (__v16si)_mm512_max_epu32(__A, __B), |
||
1131 | (__v16si)_mm512_setzero_si512()); |
||
1132 | } |
||
1133 | |||
1134 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
1135 | _mm512_max_epi64(__m512i __A, __m512i __B) |
||
1136 | { |
||
1137 | return (__m512i)__builtin_elementwise_max((__v8di)__A, (__v8di)__B); |
||
1138 | } |
||
1139 | |||
1140 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
1141 | _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) |
||
1142 | { |
||
1143 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, |
||
1144 | (__v8di)_mm512_max_epi64(__A, __B), |
||
1145 | (__v8di)__W); |
||
1146 | } |
||
1147 | |||
1148 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
1149 | _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B) |
||
1150 | { |
||
1151 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, |
||
1152 | (__v8di)_mm512_max_epi64(__A, __B), |
||
1153 | (__v8di)_mm512_setzero_si512()); |
||
1154 | } |
||
1155 | |||
1156 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
1157 | _mm512_max_epu64(__m512i __A, __m512i __B) |
||
1158 | { |
||
1159 | return (__m512i)__builtin_elementwise_max((__v8du)__A, (__v8du)__B); |
||
1160 | } |
||
1161 | |||
1162 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
1163 | _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) |
||
1164 | { |
||
1165 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, |
||
1166 | (__v8di)_mm512_max_epu64(__A, __B), |
||
1167 | (__v8di)__W); |
||
1168 | } |
||
1169 | |||
1170 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
1171 | _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B) |
||
1172 | { |
||
1173 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, |
||
1174 | (__v8di)_mm512_max_epu64(__A, __B), |
||
1175 | (__v8di)_mm512_setzero_si512()); |
||
1176 | } |
||
1177 | |||
1178 | #define _mm512_min_round_pd(A, B, R) \ |
||
1179 | ((__m512d)__builtin_ia32_minpd512((__v8df)(__m512d)(A), \ |
||
1180 | (__v8df)(__m512d)(B), (int)(R))) |
||
1181 | |||
1182 | #define _mm512_mask_min_round_pd(W, U, A, B, R) \ |
||
1183 | ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ |
||
1184 | (__v8df)_mm512_min_round_pd((A), (B), (R)), \ |
||
1185 | (__v8df)(W))) |
||
1186 | |||
1187 | #define _mm512_maskz_min_round_pd(U, A, B, R) \ |
||
1188 | ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ |
||
1189 | (__v8df)_mm512_min_round_pd((A), (B), (R)), \ |
||
1190 | (__v8df)_mm512_setzero_pd())) |
||
1191 | |||
1192 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
1193 | _mm512_min_pd(__m512d __A, __m512d __B) |
||
1194 | { |
||
1195 | return (__m512d) __builtin_ia32_minpd512((__v8df) __A, (__v8df) __B, |
||
1196 | _MM_FROUND_CUR_DIRECTION); |
||
1197 | } |
||
1198 | |||
1199 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
1200 | _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) |
||
1201 | { |
||
1202 | return (__m512d)__builtin_ia32_selectpd_512(__U, |
||
1203 | (__v8df)_mm512_min_pd(__A, __B), |
||
1204 | (__v8df)__W); |
||
1205 | } |
||
1206 | |||
1207 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
1208 | _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B) |
||
1209 | { |
||
1210 | return (__m512d)__builtin_ia32_selectpd_512(__U, |
||
1211 | (__v8df)_mm512_min_pd(__A, __B), |
||
1212 | (__v8df)_mm512_setzero_pd()); |
||
1213 | } |
||
1214 | |||
1215 | #define _mm512_min_round_ps(A, B, R) \ |
||
1216 | ((__m512)__builtin_ia32_minps512((__v16sf)(__m512)(A), \ |
||
1217 | (__v16sf)(__m512)(B), (int)(R))) |
||
1218 | |||
1219 | #define _mm512_mask_min_round_ps(W, U, A, B, R) \ |
||
1220 | ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ |
||
1221 | (__v16sf)_mm512_min_round_ps((A), (B), (R)), \ |
||
1222 | (__v16sf)(W))) |
||
1223 | |||
1224 | #define _mm512_maskz_min_round_ps(U, A, B, R) \ |
||
1225 | ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ |
||
1226 | (__v16sf)_mm512_min_round_ps((A), (B), (R)), \ |
||
1227 | (__v16sf)_mm512_setzero_ps())) |
||
1228 | |||
1229 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
1230 | _mm512_min_ps(__m512 __A, __m512 __B) |
||
1231 | { |
||
1232 | return (__m512) __builtin_ia32_minps512((__v16sf) __A, (__v16sf) __B, |
||
1233 | _MM_FROUND_CUR_DIRECTION); |
||
1234 | } |
||
1235 | |||
1236 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
1237 | _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) |
||
1238 | { |
||
1239 | return (__m512)__builtin_ia32_selectps_512(__U, |
||
1240 | (__v16sf)_mm512_min_ps(__A, __B), |
||
1241 | (__v16sf)__W); |
||
1242 | } |
||
1243 | |||
1244 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
1245 | _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B) |
||
1246 | { |
||
1247 | return (__m512)__builtin_ia32_selectps_512(__U, |
||
1248 | (__v16sf)_mm512_min_ps(__A, __B), |
||
1249 | (__v16sf)_mm512_setzero_ps()); |
||
1250 | } |
||
1251 | |||
1252 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
1253 | _mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { |
||
1254 | return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A, |
||
1255 | (__v4sf) __B, |
||
1256 | (__v4sf) __W, |
||
1257 | (__mmask8) __U, |
||
1258 | _MM_FROUND_CUR_DIRECTION); |
||
1259 | } |
||
1260 | |||
1261 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
1262 | _mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) { |
||
1263 | return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A, |
||
1264 | (__v4sf) __B, |
||
1265 | (__v4sf) _mm_setzero_ps (), |
||
1266 | (__mmask8) __U, |
||
1267 | _MM_FROUND_CUR_DIRECTION); |
||
1268 | } |
||
1269 | |||
1270 | #define _mm_min_round_ss(A, B, R) \ |
||
1271 | ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ |
||
1272 | (__v4sf)(__m128)(B), \ |
||
1273 | (__v4sf)_mm_setzero_ps(), \ |
||
1274 | (__mmask8)-1, (int)(R))) |
||
1275 | |||
1276 | #define _mm_mask_min_round_ss(W, U, A, B, R) \ |
||
1277 | ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ |
||
1278 | (__v4sf)(__m128)(B), \ |
||
1279 | (__v4sf)(__m128)(W), (__mmask8)(U), \ |
||
1280 | (int)(R))) |
||
1281 | |||
1282 | #define _mm_maskz_min_round_ss(U, A, B, R) \ |
||
1283 | ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ |
||
1284 | (__v4sf)(__m128)(B), \ |
||
1285 | (__v4sf)_mm_setzero_ps(), \ |
||
1286 | (__mmask8)(U), (int)(R))) |
||
1287 | |||
1288 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
1289 | _mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { |
||
1290 | return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A, |
||
1291 | (__v2df) __B, |
||
1292 | (__v2df) __W, |
||
1293 | (__mmask8) __U, |
||
1294 | _MM_FROUND_CUR_DIRECTION); |
||
1295 | } |
||
1296 | |||
1297 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
1298 | _mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) { |
||
1299 | return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A, |
||
1300 | (__v2df) __B, |
||
1301 | (__v2df) _mm_setzero_pd (), |
||
1302 | (__mmask8) __U, |
||
1303 | _MM_FROUND_CUR_DIRECTION); |
||
1304 | } |
||
1305 | |||
1306 | #define _mm_min_round_sd(A, B, R) \ |
||
1307 | ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ |
||
1308 | (__v2df)(__m128d)(B), \ |
||
1309 | (__v2df)_mm_setzero_pd(), \ |
||
1310 | (__mmask8)-1, (int)(R))) |
||
1311 | |||
1312 | #define _mm_mask_min_round_sd(W, U, A, B, R) \ |
||
1313 | ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ |
||
1314 | (__v2df)(__m128d)(B), \ |
||
1315 | (__v2df)(__m128d)(W), \ |
||
1316 | (__mmask8)(U), (int)(R))) |
||
1317 | |||
1318 | #define _mm_maskz_min_round_sd(U, A, B, R) \ |
||
1319 | ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ |
||
1320 | (__v2df)(__m128d)(B), \ |
||
1321 | (__v2df)_mm_setzero_pd(), \ |
||
1322 | (__mmask8)(U), (int)(R))) |
||
1323 | |||
1324 | static __inline __m512i |
||
1325 | __DEFAULT_FN_ATTRS512 |
||
1326 | _mm512_min_epi32(__m512i __A, __m512i __B) |
||
1327 | { |
||
1328 | return (__m512i)__builtin_elementwise_min((__v16si)__A, (__v16si)__B); |
||
1329 | } |
||
1330 | |||
1331 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
1332 | _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) |
||
1333 | { |
||
1334 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, |
||
1335 | (__v16si)_mm512_min_epi32(__A, __B), |
||
1336 | (__v16si)__W); |
||
1337 | } |
||
1338 | |||
1339 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
1340 | _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B) |
||
1341 | { |
||
1342 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, |
||
1343 | (__v16si)_mm512_min_epi32(__A, __B), |
||
1344 | (__v16si)_mm512_setzero_si512()); |
||
1345 | } |
||
1346 | |||
1347 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
1348 | _mm512_min_epu32(__m512i __A, __m512i __B) |
||
1349 | { |
||
1350 | return (__m512i)__builtin_elementwise_min((__v16su)__A, (__v16su)__B); |
||
1351 | } |
||
1352 | |||
1353 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
1354 | _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) |
||
1355 | { |
||
1356 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, |
||
1357 | (__v16si)_mm512_min_epu32(__A, __B), |
||
1358 | (__v16si)__W); |
||
1359 | } |
||
1360 | |||
1361 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
1362 | _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B) |
||
1363 | { |
||
1364 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, |
||
1365 | (__v16si)_mm512_min_epu32(__A, __B), |
||
1366 | (__v16si)_mm512_setzero_si512()); |
||
1367 | } |
||
1368 | |||
1369 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
1370 | _mm512_min_epi64(__m512i __A, __m512i __B) |
||
1371 | { |
||
1372 | return (__m512i)__builtin_elementwise_min((__v8di)__A, (__v8di)__B); |
||
1373 | } |
||
1374 | |||
1375 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
1376 | _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) |
||
1377 | { |
||
1378 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, |
||
1379 | (__v8di)_mm512_min_epi64(__A, __B), |
||
1380 | (__v8di)__W); |
||
1381 | } |
||
1382 | |||
1383 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
1384 | _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B) |
||
1385 | { |
||
1386 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, |
||
1387 | (__v8di)_mm512_min_epi64(__A, __B), |
||
1388 | (__v8di)_mm512_setzero_si512()); |
||
1389 | } |
||
1390 | |||
1391 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
1392 | _mm512_min_epu64(__m512i __A, __m512i __B) |
||
1393 | { |
||
1394 | return (__m512i)__builtin_elementwise_min((__v8du)__A, (__v8du)__B); |
||
1395 | } |
||
1396 | |||
1397 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
1398 | _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) |
||
1399 | { |
||
1400 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, |
||
1401 | (__v8di)_mm512_min_epu64(__A, __B), |
||
1402 | (__v8di)__W); |
||
1403 | } |
||
1404 | |||
1405 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
1406 | _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B) |
||
1407 | { |
||
1408 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, |
||
1409 | (__v8di)_mm512_min_epu64(__A, __B), |
||
1410 | (__v8di)_mm512_setzero_si512()); |
||
1411 | } |
||
1412 | |||
1413 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
1414 | _mm512_mul_epi32(__m512i __X, __m512i __Y) |
||
1415 | { |
||
1416 | return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y); |
||
1417 | } |
||
1418 | |||
1419 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
1420 | _mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) |
||
1421 | { |
||
1422 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, |
||
1423 | (__v8di)_mm512_mul_epi32(__X, __Y), |
||
1424 | (__v8di)__W); |
||
1425 | } |
||
1426 | |||
1427 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
1428 | _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y) |
||
1429 | { |
||
1430 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, |
||
1431 | (__v8di)_mm512_mul_epi32(__X, __Y), |
||
1432 | (__v8di)_mm512_setzero_si512 ()); |
||
1433 | } |
||
1434 | |||
1435 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
1436 | _mm512_mul_epu32(__m512i __X, __m512i __Y) |
||
1437 | { |
||
1438 | return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y); |
||
1439 | } |
||
1440 | |||
1441 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
1442 | _mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) |
||
1443 | { |
||
1444 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, |
||
1445 | (__v8di)_mm512_mul_epu32(__X, __Y), |
||
1446 | (__v8di)__W); |
||
1447 | } |
||
1448 | |||
1449 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
1450 | _mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y) |
||
1451 | { |
||
1452 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, |
||
1453 | (__v8di)_mm512_mul_epu32(__X, __Y), |
||
1454 | (__v8di)_mm512_setzero_si512 ()); |
||
1455 | } |
||
1456 | |||
1457 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
1458 | _mm512_mullo_epi32 (__m512i __A, __m512i __B) |
||
1459 | { |
||
1460 | return (__m512i) ((__v16su) __A * (__v16su) __B); |
||
1461 | } |
||
1462 | |||
1463 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
1464 | _mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B) |
||
1465 | { |
||
1466 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, |
||
1467 | (__v16si)_mm512_mullo_epi32(__A, __B), |
||
1468 | (__v16si)_mm512_setzero_si512()); |
||
1469 | } |
||
1470 | |||
1471 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
1472 | _mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) |
||
1473 | { |
||
1474 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, |
||
1475 | (__v16si)_mm512_mullo_epi32(__A, __B), |
||
1476 | (__v16si)__W); |
||
1477 | } |
||
1478 | |||
1479 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
1480 | _mm512_mullox_epi64 (__m512i __A, __m512i __B) { |
||
1481 | return (__m512i) ((__v8du) __A * (__v8du) __B); |
||
1482 | } |
||
1483 | |||
1484 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
1485 | _mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { |
||
1486 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, |
||
1487 | (__v8di)_mm512_mullox_epi64(__A, __B), |
||
1488 | (__v8di)__W); |
||
1489 | } |
||
1490 | |||
1491 | #define _mm512_sqrt_round_pd(A, R) \ |
||
1492 | ((__m512d)__builtin_ia32_sqrtpd512((__v8df)(__m512d)(A), (int)(R))) |
||
1493 | |||
1494 | #define _mm512_mask_sqrt_round_pd(W, U, A, R) \ |
||
1495 | ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ |
||
1496 | (__v8df)_mm512_sqrt_round_pd((A), (R)), \ |
||
1497 | (__v8df)(__m512d)(W))) |
||
1498 | |||
1499 | #define _mm512_maskz_sqrt_round_pd(U, A, R) \ |
||
1500 | ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ |
||
1501 | (__v8df)_mm512_sqrt_round_pd((A), (R)), \ |
||
1502 | (__v8df)_mm512_setzero_pd())) |
||
1503 | |||
1504 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
1505 | _mm512_sqrt_pd(__m512d __A) |
||
1506 | { |
||
1507 | return (__m512d)__builtin_ia32_sqrtpd512((__v8df)__A, |
||
1508 | _MM_FROUND_CUR_DIRECTION); |
||
1509 | } |
||
1510 | |||
1511 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
1512 | _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A) |
||
1513 | { |
||
1514 | return (__m512d)__builtin_ia32_selectpd_512(__U, |
||
1515 | (__v8df)_mm512_sqrt_pd(__A), |
||
1516 | (__v8df)__W); |
||
1517 | } |
||
1518 | |||
1519 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
1520 | _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A) |
||
1521 | { |
||
1522 | return (__m512d)__builtin_ia32_selectpd_512(__U, |
||
1523 | (__v8df)_mm512_sqrt_pd(__A), |
||
1524 | (__v8df)_mm512_setzero_pd()); |
||
1525 | } |
||
1526 | |||
1527 | #define _mm512_sqrt_round_ps(A, R) \ |
||
1528 | ((__m512)__builtin_ia32_sqrtps512((__v16sf)(__m512)(A), (int)(R))) |
||
1529 | |||
1530 | #define _mm512_mask_sqrt_round_ps(W, U, A, R) \ |
||
1531 | ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ |
||
1532 | (__v16sf)_mm512_sqrt_round_ps((A), (R)), \ |
||
1533 | (__v16sf)(__m512)(W))) |
||
1534 | |||
1535 | #define _mm512_maskz_sqrt_round_ps(U, A, R) \ |
||
1536 | ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ |
||
1537 | (__v16sf)_mm512_sqrt_round_ps((A), (R)), \ |
||
1538 | (__v16sf)_mm512_setzero_ps())) |
||
1539 | |||
1540 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
1541 | _mm512_sqrt_ps(__m512 __A) |
||
1542 | { |
||
1543 | return (__m512)__builtin_ia32_sqrtps512((__v16sf)__A, |
||
1544 | _MM_FROUND_CUR_DIRECTION); |
||
1545 | } |
||
1546 | |||
1547 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
1548 | _mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A) |
||
1549 | { |
||
1550 | return (__m512)__builtin_ia32_selectps_512(__U, |
||
1551 | (__v16sf)_mm512_sqrt_ps(__A), |
||
1552 | (__v16sf)__W); |
||
1553 | } |
||
1554 | |||
1555 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
1556 | _mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A) |
||
1557 | { |
||
1558 | return (__m512)__builtin_ia32_selectps_512(__U, |
||
1559 | (__v16sf)_mm512_sqrt_ps(__A), |
||
1560 | (__v16sf)_mm512_setzero_ps()); |
||
1561 | } |
||
1562 | |||
1563 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
1564 | _mm512_rsqrt14_pd(__m512d __A) |
||
1565 | { |
||
1566 | return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, |
||
1567 | (__v8df) |
||
1568 | _mm512_setzero_pd (), |
||
1569 | (__mmask8) -1);} |
||
1570 | |||
1571 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
1572 | _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A) |
||
1573 | { |
||
1574 | return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, |
||
1575 | (__v8df) __W, |
||
1576 | (__mmask8) __U); |
||
1577 | } |
||
1578 | |||
1579 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
1580 | _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A) |
||
1581 | { |
||
1582 | return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, |
||
1583 | (__v8df) |
||
1584 | _mm512_setzero_pd (), |
||
1585 | (__mmask8) __U); |
||
1586 | } |
||
1587 | |||
1588 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
1589 | _mm512_rsqrt14_ps(__m512 __A) |
||
1590 | { |
||
1591 | return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, |
||
1592 | (__v16sf) |
||
1593 | _mm512_setzero_ps (), |
||
1594 | (__mmask16) -1); |
||
1595 | } |
||
1596 | |||
1597 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
1598 | _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A) |
||
1599 | { |
||
1600 | return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, |
||
1601 | (__v16sf) __W, |
||
1602 | (__mmask16) __U); |
||
1603 | } |
||
1604 | |||
1605 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
1606 | _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A) |
||
1607 | { |
||
1608 | return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, |
||
1609 | (__v16sf) |
||
1610 | _mm512_setzero_ps (), |
||
1611 | (__mmask16) __U); |
||
1612 | } |
||
1613 | |||
1614 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
1615 | _mm_rsqrt14_ss(__m128 __A, __m128 __B) |
||
1616 | { |
||
1617 | return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A, |
||
1618 | (__v4sf) __B, |
||
1619 | (__v4sf) |
||
1620 | _mm_setzero_ps (), |
||
1621 | (__mmask8) -1); |
||
1622 | } |
||
1623 | |||
1624 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
1625 | _mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) |
||
1626 | { |
||
1627 | return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A, |
||
1628 | (__v4sf) __B, |
||
1629 | (__v4sf) __W, |
||
1630 | (__mmask8) __U); |
||
1631 | } |
||
1632 | |||
1633 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
1634 | _mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B) |
||
1635 | { |
||
1636 | return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A, |
||
1637 | (__v4sf) __B, |
||
1638 | (__v4sf) _mm_setzero_ps (), |
||
1639 | (__mmask8) __U); |
||
1640 | } |
||
1641 | |||
1642 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
1643 | _mm_rsqrt14_sd(__m128d __A, __m128d __B) |
||
1644 | { |
||
1645 | return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A, |
||
1646 | (__v2df) __B, |
||
1647 | (__v2df) |
||
1648 | _mm_setzero_pd (), |
||
1649 | (__mmask8) -1); |
||
1650 | } |
||
1651 | |||
1652 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
1653 | _mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) |
||
1654 | { |
||
1655 | return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A, |
||
1656 | (__v2df) __B, |
||
1657 | (__v2df) __W, |
||
1658 | (__mmask8) __U); |
||
1659 | } |
||
1660 | |||
1661 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
1662 | _mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B) |
||
1663 | { |
||
1664 | return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A, |
||
1665 | (__v2df) __B, |
||
1666 | (__v2df) _mm_setzero_pd (), |
||
1667 | (__mmask8) __U); |
||
1668 | } |
||
1669 | |||
1670 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
1671 | _mm512_rcp14_pd(__m512d __A) |
||
1672 | { |
||
1673 | return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, |
||
1674 | (__v8df) |
||
1675 | _mm512_setzero_pd (), |
||
1676 | (__mmask8) -1); |
||
1677 | } |
||
1678 | |||
1679 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
1680 | _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A) |
||
1681 | { |
||
1682 | return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, |
||
1683 | (__v8df) __W, |
||
1684 | (__mmask8) __U); |
||
1685 | } |
||
1686 | |||
1687 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
1688 | _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A) |
||
1689 | { |
||
1690 | return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, |
||
1691 | (__v8df) |
||
1692 | _mm512_setzero_pd (), |
||
1693 | (__mmask8) __U); |
||
1694 | } |
||
1695 | |||
1696 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
1697 | _mm512_rcp14_ps(__m512 __A) |
||
1698 | { |
||
1699 | return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, |
||
1700 | (__v16sf) |
||
1701 | _mm512_setzero_ps (), |
||
1702 | (__mmask16) -1); |
||
1703 | } |
||
1704 | |||
1705 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
1706 | _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A) |
||
1707 | { |
||
1708 | return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, |
||
1709 | (__v16sf) __W, |
||
1710 | (__mmask16) __U); |
||
1711 | } |
||
1712 | |||
1713 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
1714 | _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A) |
||
1715 | { |
||
1716 | return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, |
||
1717 | (__v16sf) |
||
1718 | _mm512_setzero_ps (), |
||
1719 | (__mmask16) __U); |
||
1720 | } |
||
1721 | |||
1722 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
1723 | _mm_rcp14_ss(__m128 __A, __m128 __B) |
||
1724 | { |
||
1725 | return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A, |
||
1726 | (__v4sf) __B, |
||
1727 | (__v4sf) |
||
1728 | _mm_setzero_ps (), |
||
1729 | (__mmask8) -1); |
||
1730 | } |
||
1731 | |||
1732 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
1733 | _mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) |
||
1734 | { |
||
1735 | return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A, |
||
1736 | (__v4sf) __B, |
||
1737 | (__v4sf) __W, |
||
1738 | (__mmask8) __U); |
||
1739 | } |
||
1740 | |||
1741 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
1742 | _mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B) |
||
1743 | { |
||
1744 | return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A, |
||
1745 | (__v4sf) __B, |
||
1746 | (__v4sf) _mm_setzero_ps (), |
||
1747 | (__mmask8) __U); |
||
1748 | } |
||
1749 | |||
1750 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
1751 | _mm_rcp14_sd(__m128d __A, __m128d __B) |
||
1752 | { |
||
1753 | return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A, |
||
1754 | (__v2df) __B, |
||
1755 | (__v2df) |
||
1756 | _mm_setzero_pd (), |
||
1757 | (__mmask8) -1); |
||
1758 | } |
||
1759 | |||
1760 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
1761 | _mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) |
||
1762 | { |
||
1763 | return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A, |
||
1764 | (__v2df) __B, |
||
1765 | (__v2df) __W, |
||
1766 | (__mmask8) __U); |
||
1767 | } |
||
1768 | |||
1769 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
1770 | _mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B) |
||
1771 | { |
||
1772 | return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A, |
||
1773 | (__v2df) __B, |
||
1774 | (__v2df) _mm_setzero_pd (), |
||
1775 | (__mmask8) __U); |
||
1776 | } |
||
1777 | |||
1778 | static __inline __m512 __DEFAULT_FN_ATTRS512 |
||
1779 | _mm512_floor_ps(__m512 __A) |
||
1780 | { |
||
1781 | return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, |
||
1782 | _MM_FROUND_FLOOR, |
||
1783 | (__v16sf) __A, (unsigned short)-1, |
||
1784 | _MM_FROUND_CUR_DIRECTION); |
||
1785 | } |
||
1786 | |||
1787 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
1788 | _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A) |
||
1789 | { |
||
1790 | return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, |
||
1791 | _MM_FROUND_FLOOR, |
||
1792 | (__v16sf) __W, __U, |
||
1793 | _MM_FROUND_CUR_DIRECTION); |
||
1794 | } |
||
1795 | |||
1796 | static __inline __m512d __DEFAULT_FN_ATTRS512 |
||
1797 | _mm512_floor_pd(__m512d __A) |
||
1798 | { |
||
1799 | return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, |
||
1800 | _MM_FROUND_FLOOR, |
||
1801 | (__v8df) __A, (unsigned char)-1, |
||
1802 | _MM_FROUND_CUR_DIRECTION); |
||
1803 | } |
||
1804 | |||
1805 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
1806 | _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A) |
||
1807 | { |
||
1808 | return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, |
||
1809 | _MM_FROUND_FLOOR, |
||
1810 | (__v8df) __W, __U, |
||
1811 | _MM_FROUND_CUR_DIRECTION); |
||
1812 | } |
||
1813 | |||
1814 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
1815 | _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A) |
||
1816 | { |
||
1817 | return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, |
||
1818 | _MM_FROUND_CEIL, |
||
1819 | (__v16sf) __W, __U, |
||
1820 | _MM_FROUND_CUR_DIRECTION); |
||
1821 | } |
||
1822 | |||
1823 | static __inline __m512 __DEFAULT_FN_ATTRS512 |
||
1824 | _mm512_ceil_ps(__m512 __A) |
||
1825 | { |
||
1826 | return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, |
||
1827 | _MM_FROUND_CEIL, |
||
1828 | (__v16sf) __A, (unsigned short)-1, |
||
1829 | _MM_FROUND_CUR_DIRECTION); |
||
1830 | } |
||
1831 | |||
1832 | static __inline __m512d __DEFAULT_FN_ATTRS512 |
||
1833 | _mm512_ceil_pd(__m512d __A) |
||
1834 | { |
||
1835 | return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, |
||
1836 | _MM_FROUND_CEIL, |
||
1837 | (__v8df) __A, (unsigned char)-1, |
||
1838 | _MM_FROUND_CUR_DIRECTION); |
||
1839 | } |
||
1840 | |||
1841 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
1842 | _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A) |
||
1843 | { |
||
1844 | return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, |
||
1845 | _MM_FROUND_CEIL, |
||
1846 | (__v8df) __W, __U, |
||
1847 | _MM_FROUND_CUR_DIRECTION); |
||
1848 | } |
||
1849 | |||
1850 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
1851 | _mm512_abs_epi64(__m512i __A) |
||
1852 | { |
||
1853 | return (__m512i)__builtin_elementwise_abs((__v8di)__A); |
||
1854 | } |
||
1855 | |||
1856 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
1857 | _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A) |
||
1858 | { |
||
1859 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, |
||
1860 | (__v8di)_mm512_abs_epi64(__A), |
||
1861 | (__v8di)__W); |
||
1862 | } |
||
1863 | |||
1864 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
1865 | _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A) |
||
1866 | { |
||
1867 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, |
||
1868 | (__v8di)_mm512_abs_epi64(__A), |
||
1869 | (__v8di)_mm512_setzero_si512()); |
||
1870 | } |
||
1871 | |||
1872 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
1873 | _mm512_abs_epi32(__m512i __A) |
||
1874 | { |
||
1875 | return (__m512i)__builtin_elementwise_abs((__v16si) __A); |
||
1876 | } |
||
1877 | |||
1878 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
1879 | _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A) |
||
1880 | { |
||
1881 | return (__m512i)__builtin_ia32_selectd_512(__U, |
||
1882 | (__v16si)_mm512_abs_epi32(__A), |
||
1883 | (__v16si)__W); |
||
1884 | } |
||
1885 | |||
1886 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
1887 | _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A) |
||
1888 | { |
||
1889 | return (__m512i)__builtin_ia32_selectd_512(__U, |
||
1890 | (__v16si)_mm512_abs_epi32(__A), |
||
1891 | (__v16si)_mm512_setzero_si512()); |
||
1892 | } |
||
1893 | |||
1894 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
1895 | _mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { |
||
1896 | __A = _mm_add_ss(__A, __B); |
||
1897 | return __builtin_ia32_selectss_128(__U, __A, __W); |
||
1898 | } |
||
1899 | |||
1900 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
1901 | _mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) { |
||
1902 | __A = _mm_add_ss(__A, __B); |
||
1903 | return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps()); |
||
1904 | } |
||
1905 | |||
1906 | #define _mm_add_round_ss(A, B, R) \ |
||
1907 | ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ |
||
1908 | (__v4sf)(__m128)(B), \ |
||
1909 | (__v4sf)_mm_setzero_ps(), \ |
||
1910 | (__mmask8)-1, (int)(R))) |
||
1911 | |||
1912 | #define _mm_mask_add_round_ss(W, U, A, B, R) \ |
||
1913 | ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ |
||
1914 | (__v4sf)(__m128)(B), \ |
||
1915 | (__v4sf)(__m128)(W), (__mmask8)(U), \ |
||
1916 | (int)(R))) |
||
1917 | |||
1918 | #define _mm_maskz_add_round_ss(U, A, B, R) \ |
||
1919 | ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ |
||
1920 | (__v4sf)(__m128)(B), \ |
||
1921 | (__v4sf)_mm_setzero_ps(), \ |
||
1922 | (__mmask8)(U), (int)(R))) |
||
1923 | |||
1924 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
1925 | _mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { |
||
1926 | __A = _mm_add_sd(__A, __B); |
||
1927 | return __builtin_ia32_selectsd_128(__U, __A, __W); |
||
1928 | } |
||
1929 | |||
1930 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
1931 | _mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) { |
||
1932 | __A = _mm_add_sd(__A, __B); |
||
1933 | return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd()); |
||
1934 | } |
||
1935 | #define _mm_add_round_sd(A, B, R) \ |
||
1936 | ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ |
||
1937 | (__v2df)(__m128d)(B), \ |
||
1938 | (__v2df)_mm_setzero_pd(), \ |
||
1939 | (__mmask8)-1, (int)(R))) |
||
1940 | |||
1941 | #define _mm_mask_add_round_sd(W, U, A, B, R) \ |
||
1942 | ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ |
||
1943 | (__v2df)(__m128d)(B), \ |
||
1944 | (__v2df)(__m128d)(W), \ |
||
1945 | (__mmask8)(U), (int)(R))) |
||
1946 | |||
1947 | #define _mm_maskz_add_round_sd(U, A, B, R) \ |
||
1948 | ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ |
||
1949 | (__v2df)(__m128d)(B), \ |
||
1950 | (__v2df)_mm_setzero_pd(), \ |
||
1951 | (__mmask8)(U), (int)(R))) |
||
1952 | |||
1953 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
1954 | _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { |
||
1955 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, |
||
1956 | (__v8df)_mm512_add_pd(__A, __B), |
||
1957 | (__v8df)__W); |
||
1958 | } |
||
1959 | |||
1960 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
1961 | _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) { |
||
1962 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, |
||
1963 | (__v8df)_mm512_add_pd(__A, __B), |
||
1964 | (__v8df)_mm512_setzero_pd()); |
||
1965 | } |
||
1966 | |||
1967 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
1968 | _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { |
||
1969 | return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, |
||
1970 | (__v16sf)_mm512_add_ps(__A, __B), |
||
1971 | (__v16sf)__W); |
||
1972 | } |
||
1973 | |||
1974 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
1975 | _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) { |
||
1976 | return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, |
||
1977 | (__v16sf)_mm512_add_ps(__A, __B), |
||
1978 | (__v16sf)_mm512_setzero_ps()); |
||
1979 | } |
||
1980 | |||
1981 | #define _mm512_add_round_pd(A, B, R) \ |
||
1982 | ((__m512d)__builtin_ia32_addpd512((__v8df)(__m512d)(A), \ |
||
1983 | (__v8df)(__m512d)(B), (int)(R))) |
||
1984 | |||
1985 | #define _mm512_mask_add_round_pd(W, U, A, B, R) \ |
||
1986 | ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ |
||
1987 | (__v8df)_mm512_add_round_pd((A), (B), (R)), \ |
||
1988 | (__v8df)(__m512d)(W))) |
||
1989 | |||
1990 | #define _mm512_maskz_add_round_pd(U, A, B, R) \ |
||
1991 | ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ |
||
1992 | (__v8df)_mm512_add_round_pd((A), (B), (R)), \ |
||
1993 | (__v8df)_mm512_setzero_pd())) |
||
1994 | |||
1995 | #define _mm512_add_round_ps(A, B, R) \ |
||
1996 | ((__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \ |
||
1997 | (__v16sf)(__m512)(B), (int)(R))) |
||
1998 | |||
1999 | #define _mm512_mask_add_round_ps(W, U, A, B, R) \ |
||
2000 | ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ |
||
2001 | (__v16sf)_mm512_add_round_ps((A), (B), (R)), \ |
||
2002 | (__v16sf)(__m512)(W))) |
||
2003 | |||
2004 | #define _mm512_maskz_add_round_ps(U, A, B, R) \ |
||
2005 | ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ |
||
2006 | (__v16sf)_mm512_add_round_ps((A), (B), (R)), \ |
||
2007 | (__v16sf)_mm512_setzero_ps())) |
||
2008 | |||
2009 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
2010 | _mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { |
||
2011 | __A = _mm_sub_ss(__A, __B); |
||
2012 | return __builtin_ia32_selectss_128(__U, __A, __W); |
||
2013 | } |
||
2014 | |||
2015 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
2016 | _mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) { |
||
2017 | __A = _mm_sub_ss(__A, __B); |
||
2018 | return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps()); |
||
2019 | } |
||
2020 | #define _mm_sub_round_ss(A, B, R) \ |
||
2021 | ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ |
||
2022 | (__v4sf)(__m128)(B), \ |
||
2023 | (__v4sf)_mm_setzero_ps(), \ |
||
2024 | (__mmask8)-1, (int)(R))) |
||
2025 | |||
2026 | #define _mm_mask_sub_round_ss(W, U, A, B, R) \ |
||
2027 | ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ |
||
2028 | (__v4sf)(__m128)(B), \ |
||
2029 | (__v4sf)(__m128)(W), (__mmask8)(U), \ |
||
2030 | (int)(R))) |
||
2031 | |||
2032 | #define _mm_maskz_sub_round_ss(U, A, B, R) \ |
||
2033 | ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ |
||
2034 | (__v4sf)(__m128)(B), \ |
||
2035 | (__v4sf)_mm_setzero_ps(), \ |
||
2036 | (__mmask8)(U), (int)(R))) |
||
2037 | |||
2038 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
2039 | _mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { |
||
2040 | __A = _mm_sub_sd(__A, __B); |
||
2041 | return __builtin_ia32_selectsd_128(__U, __A, __W); |
||
2042 | } |
||
2043 | |||
2044 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
2045 | _mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) { |
||
2046 | __A = _mm_sub_sd(__A, __B); |
||
2047 | return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd()); |
||
2048 | } |
||
2049 | |||
2050 | #define _mm_sub_round_sd(A, B, R) \ |
||
2051 | ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ |
||
2052 | (__v2df)(__m128d)(B), \ |
||
2053 | (__v2df)_mm_setzero_pd(), \ |
||
2054 | (__mmask8)-1, (int)(R))) |
||
2055 | |||
2056 | #define _mm_mask_sub_round_sd(W, U, A, B, R) \ |
||
2057 | ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ |
||
2058 | (__v2df)(__m128d)(B), \ |
||
2059 | (__v2df)(__m128d)(W), \ |
||
2060 | (__mmask8)(U), (int)(R))) |
||
2061 | |||
2062 | #define _mm_maskz_sub_round_sd(U, A, B, R) \ |
||
2063 | ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ |
||
2064 | (__v2df)(__m128d)(B), \ |
||
2065 | (__v2df)_mm_setzero_pd(), \ |
||
2066 | (__mmask8)(U), (int)(R))) |
||
2067 | |||
2068 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
2069 | _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { |
||
2070 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, |
||
2071 | (__v8df)_mm512_sub_pd(__A, __B), |
||
2072 | (__v8df)__W); |
||
2073 | } |
||
2074 | |||
2075 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
2076 | _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) { |
||
2077 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, |
||
2078 | (__v8df)_mm512_sub_pd(__A, __B), |
||
2079 | (__v8df)_mm512_setzero_pd()); |
||
2080 | } |
||
2081 | |||
2082 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
2083 | _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { |
||
2084 | return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, |
||
2085 | (__v16sf)_mm512_sub_ps(__A, __B), |
||
2086 | (__v16sf)__W); |
||
2087 | } |
||
2088 | |||
2089 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
2090 | _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) { |
||
2091 | return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, |
||
2092 | (__v16sf)_mm512_sub_ps(__A, __B), |
||
2093 | (__v16sf)_mm512_setzero_ps()); |
||
2094 | } |
||
2095 | |||
2096 | #define _mm512_sub_round_pd(A, B, R) \ |
||
2097 | ((__m512d)__builtin_ia32_subpd512((__v8df)(__m512d)(A), \ |
||
2098 | (__v8df)(__m512d)(B), (int)(R))) |
||
2099 | |||
2100 | #define _mm512_mask_sub_round_pd(W, U, A, B, R) \ |
||
2101 | ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ |
||
2102 | (__v8df)_mm512_sub_round_pd((A), (B), (R)), \ |
||
2103 | (__v8df)(__m512d)(W))) |
||
2104 | |||
2105 | #define _mm512_maskz_sub_round_pd(U, A, B, R) \ |
||
2106 | ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ |
||
2107 | (__v8df)_mm512_sub_round_pd((A), (B), (R)), \ |
||
2108 | (__v8df)_mm512_setzero_pd())) |
||
2109 | |||
2110 | #define _mm512_sub_round_ps(A, B, R) \ |
||
2111 | ((__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \ |
||
2112 | (__v16sf)(__m512)(B), (int)(R))) |
||
2113 | |||
2114 | #define _mm512_mask_sub_round_ps(W, U, A, B, R) \ |
||
2115 | ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ |
||
2116 | (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \ |
||
2117 | (__v16sf)(__m512)(W))) |
||
2118 | |||
2119 | #define _mm512_maskz_sub_round_ps(U, A, B, R) \ |
||
2120 | ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ |
||
2121 | (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \ |
||
2122 | (__v16sf)_mm512_setzero_ps())) |
||
2123 | |||
2124 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
2125 | _mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { |
||
2126 | __A = _mm_mul_ss(__A, __B); |
||
2127 | return __builtin_ia32_selectss_128(__U, __A, __W); |
||
2128 | } |
||
2129 | |||
2130 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
2131 | _mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) { |
||
2132 | __A = _mm_mul_ss(__A, __B); |
||
2133 | return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps()); |
||
2134 | } |
||
2135 | #define _mm_mul_round_ss(A, B, R) \ |
||
2136 | ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ |
||
2137 | (__v4sf)(__m128)(B), \ |
||
2138 | (__v4sf)_mm_setzero_ps(), \ |
||
2139 | (__mmask8)-1, (int)(R))) |
||
2140 | |||
2141 | #define _mm_mask_mul_round_ss(W, U, A, B, R) \ |
||
2142 | ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ |
||
2143 | (__v4sf)(__m128)(B), \ |
||
2144 | (__v4sf)(__m128)(W), (__mmask8)(U), \ |
||
2145 | (int)(R))) |
||
2146 | |||
2147 | #define _mm_maskz_mul_round_ss(U, A, B, R) \ |
||
2148 | ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ |
||
2149 | (__v4sf)(__m128)(B), \ |
||
2150 | (__v4sf)_mm_setzero_ps(), \ |
||
2151 | (__mmask8)(U), (int)(R))) |
||
2152 | |||
2153 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
2154 | _mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { |
||
2155 | __A = _mm_mul_sd(__A, __B); |
||
2156 | return __builtin_ia32_selectsd_128(__U, __A, __W); |
||
2157 | } |
||
2158 | |||
2159 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
2160 | _mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) { |
||
2161 | __A = _mm_mul_sd(__A, __B); |
||
2162 | return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd()); |
||
2163 | } |
||
2164 | |||
2165 | #define _mm_mul_round_sd(A, B, R) \ |
||
2166 | ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ |
||
2167 | (__v2df)(__m128d)(B), \ |
||
2168 | (__v2df)_mm_setzero_pd(), \ |
||
2169 | (__mmask8)-1, (int)(R))) |
||
2170 | |||
2171 | #define _mm_mask_mul_round_sd(W, U, A, B, R) \ |
||
2172 | ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ |
||
2173 | (__v2df)(__m128d)(B), \ |
||
2174 | (__v2df)(__m128d)(W), \ |
||
2175 | (__mmask8)(U), (int)(R))) |
||
2176 | |||
2177 | #define _mm_maskz_mul_round_sd(U, A, B, R) \ |
||
2178 | ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ |
||
2179 | (__v2df)(__m128d)(B), \ |
||
2180 | (__v2df)_mm_setzero_pd(), \ |
||
2181 | (__mmask8)(U), (int)(R))) |
||
2182 | |||
2183 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
2184 | _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { |
||
2185 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, |
||
2186 | (__v8df)_mm512_mul_pd(__A, __B), |
||
2187 | (__v8df)__W); |
||
2188 | } |
||
2189 | |||
2190 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
2191 | _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) { |
||
2192 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, |
||
2193 | (__v8df)_mm512_mul_pd(__A, __B), |
||
2194 | (__v8df)_mm512_setzero_pd()); |
||
2195 | } |
||
2196 | |||
2197 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
2198 | _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { |
||
2199 | return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, |
||
2200 | (__v16sf)_mm512_mul_ps(__A, __B), |
||
2201 | (__v16sf)__W); |
||
2202 | } |
||
2203 | |||
2204 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
2205 | _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) { |
||
2206 | return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, |
||
2207 | (__v16sf)_mm512_mul_ps(__A, __B), |
||
2208 | (__v16sf)_mm512_setzero_ps()); |
||
2209 | } |
||
2210 | |||
2211 | #define _mm512_mul_round_pd(A, B, R) \ |
||
2212 | ((__m512d)__builtin_ia32_mulpd512((__v8df)(__m512d)(A), \ |
||
2213 | (__v8df)(__m512d)(B), (int)(R))) |
||
2214 | |||
2215 | #define _mm512_mask_mul_round_pd(W, U, A, B, R) \ |
||
2216 | ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ |
||
2217 | (__v8df)_mm512_mul_round_pd((A), (B), (R)), \ |
||
2218 | (__v8df)(__m512d)(W))) |
||
2219 | |||
2220 | #define _mm512_maskz_mul_round_pd(U, A, B, R) \ |
||
2221 | ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ |
||
2222 | (__v8df)_mm512_mul_round_pd((A), (B), (R)), \ |
||
2223 | (__v8df)_mm512_setzero_pd())) |
||
2224 | |||
2225 | #define _mm512_mul_round_ps(A, B, R) \ |
||
2226 | ((__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \ |
||
2227 | (__v16sf)(__m512)(B), (int)(R))) |
||
2228 | |||
2229 | #define _mm512_mask_mul_round_ps(W, U, A, B, R) \ |
||
2230 | ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ |
||
2231 | (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \ |
||
2232 | (__v16sf)(__m512)(W))) |
||
2233 | |||
2234 | #define _mm512_maskz_mul_round_ps(U, A, B, R) \ |
||
2235 | ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ |
||
2236 | (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \ |
||
2237 | (__v16sf)_mm512_setzero_ps())) |
||
2238 | |||
2239 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
2240 | _mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { |
||
2241 | __A = _mm_div_ss(__A, __B); |
||
2242 | return __builtin_ia32_selectss_128(__U, __A, __W); |
||
2243 | } |
||
2244 | |||
2245 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
2246 | _mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) { |
||
2247 | __A = _mm_div_ss(__A, __B); |
||
2248 | return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps()); |
||
2249 | } |
||
2250 | |||
2251 | #define _mm_div_round_ss(A, B, R) \ |
||
2252 | ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ |
||
2253 | (__v4sf)(__m128)(B), \ |
||
2254 | (__v4sf)_mm_setzero_ps(), \ |
||
2255 | (__mmask8)-1, (int)(R))) |
||
2256 | |||
2257 | #define _mm_mask_div_round_ss(W, U, A, B, R) \ |
||
2258 | ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ |
||
2259 | (__v4sf)(__m128)(B), \ |
||
2260 | (__v4sf)(__m128)(W), (__mmask8)(U), \ |
||
2261 | (int)(R))) |
||
2262 | |||
2263 | #define _mm_maskz_div_round_ss(U, A, B, R) \ |
||
2264 | ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ |
||
2265 | (__v4sf)(__m128)(B), \ |
||
2266 | (__v4sf)_mm_setzero_ps(), \ |
||
2267 | (__mmask8)(U), (int)(R))) |
||
2268 | |||
2269 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
2270 | _mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { |
||
2271 | __A = _mm_div_sd(__A, __B); |
||
2272 | return __builtin_ia32_selectsd_128(__U, __A, __W); |
||
2273 | } |
||
2274 | |||
2275 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
2276 | _mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) { |
||
2277 | __A = _mm_div_sd(__A, __B); |
||
2278 | return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd()); |
||
2279 | } |
||
2280 | |||
2281 | #define _mm_div_round_sd(A, B, R) \ |
||
2282 | ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ |
||
2283 | (__v2df)(__m128d)(B), \ |
||
2284 | (__v2df)_mm_setzero_pd(), \ |
||
2285 | (__mmask8)-1, (int)(R))) |
||
2286 | |||
2287 | #define _mm_mask_div_round_sd(W, U, A, B, R) \ |
||
2288 | ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ |
||
2289 | (__v2df)(__m128d)(B), \ |
||
2290 | (__v2df)(__m128d)(W), \ |
||
2291 | (__mmask8)(U), (int)(R))) |
||
2292 | |||
2293 | #define _mm_maskz_div_round_sd(U, A, B, R) \ |
||
2294 | ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ |
||
2295 | (__v2df)(__m128d)(B), \ |
||
2296 | (__v2df)_mm_setzero_pd(), \ |
||
2297 | (__mmask8)(U), (int)(R))) |
||
2298 | |||
2299 | static __inline __m512d __DEFAULT_FN_ATTRS512 |
||
2300 | _mm512_div_pd(__m512d __a, __m512d __b) |
||
2301 | { |
||
2302 | return (__m512d)((__v8df)__a/(__v8df)__b); |
||
2303 | } |
||
2304 | |||
2305 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
2306 | _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { |
||
2307 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, |
||
2308 | (__v8df)_mm512_div_pd(__A, __B), |
||
2309 | (__v8df)__W); |
||
2310 | } |
||
2311 | |||
2312 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
2313 | _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) { |
||
2314 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, |
||
2315 | (__v8df)_mm512_div_pd(__A, __B), |
||
2316 | (__v8df)_mm512_setzero_pd()); |
||
2317 | } |
||
2318 | |||
2319 | static __inline __m512 __DEFAULT_FN_ATTRS512 |
||
2320 | _mm512_div_ps(__m512 __a, __m512 __b) |
||
2321 | { |
||
2322 | return (__m512)((__v16sf)__a/(__v16sf)__b); |
||
2323 | } |
||
2324 | |||
2325 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
2326 | _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { |
||
2327 | return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, |
||
2328 | (__v16sf)_mm512_div_ps(__A, __B), |
||
2329 | (__v16sf)__W); |
||
2330 | } |
||
2331 | |||
2332 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
2333 | _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) { |
||
2334 | return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, |
||
2335 | (__v16sf)_mm512_div_ps(__A, __B), |
||
2336 | (__v16sf)_mm512_setzero_ps()); |
||
2337 | } |
||
2338 | |||
2339 | #define _mm512_div_round_pd(A, B, R) \ |
||
2340 | ((__m512d)__builtin_ia32_divpd512((__v8df)(__m512d)(A), \ |
||
2341 | (__v8df)(__m512d)(B), (int)(R))) |
||
2342 | |||
2343 | #define _mm512_mask_div_round_pd(W, U, A, B, R) \ |
||
2344 | ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ |
||
2345 | (__v8df)_mm512_div_round_pd((A), (B), (R)), \ |
||
2346 | (__v8df)(__m512d)(W))) |
||
2347 | |||
2348 | #define _mm512_maskz_div_round_pd(U, A, B, R) \ |
||
2349 | ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ |
||
2350 | (__v8df)_mm512_div_round_pd((A), (B), (R)), \ |
||
2351 | (__v8df)_mm512_setzero_pd())) |
||
2352 | |||
2353 | #define _mm512_div_round_ps(A, B, R) \ |
||
2354 | ((__m512)__builtin_ia32_divps512((__v16sf)(__m512)(A), \ |
||
2355 | (__v16sf)(__m512)(B), (int)(R))) |
||
2356 | |||
2357 | #define _mm512_mask_div_round_ps(W, U, A, B, R) \ |
||
2358 | ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ |
||
2359 | (__v16sf)_mm512_div_round_ps((A), (B), (R)), \ |
||
2360 | (__v16sf)(__m512)(W))) |
||
2361 | |||
2362 | #define _mm512_maskz_div_round_ps(U, A, B, R) \ |
||
2363 | ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ |
||
2364 | (__v16sf)_mm512_div_round_ps((A), (B), (R)), \ |
||
2365 | (__v16sf)_mm512_setzero_ps())) |
||
2366 | |||
2367 | #define _mm512_roundscale_ps(A, B) \ |
||
2368 | ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \ |
||
2369 | (__v16sf)_mm512_undefined_ps(), \ |
||
2370 | (__mmask16)-1, \ |
||
2371 | _MM_FROUND_CUR_DIRECTION)) |
||
2372 | |||
2373 | #define _mm512_mask_roundscale_ps(A, B, C, imm) \ |
||
2374 | ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \ |
||
2375 | (__v16sf)(__m512)(A), (__mmask16)(B), \ |
||
2376 | _MM_FROUND_CUR_DIRECTION)) |
||
2377 | |||
2378 | #define _mm512_maskz_roundscale_ps(A, B, imm) \ |
||
2379 | ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \ |
||
2380 | (__v16sf)_mm512_setzero_ps(), \ |
||
2381 | (__mmask16)(A), \ |
||
2382 | _MM_FROUND_CUR_DIRECTION)) |
||
2383 | |||
2384 | #define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) \ |
||
2385 | ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \ |
||
2386 | (__v16sf)(__m512)(A), (__mmask16)(B), \ |
||
2387 | (int)(R))) |
||
2388 | |||
2389 | #define _mm512_maskz_roundscale_round_ps(A, B, imm, R) \ |
||
2390 | ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \ |
||
2391 | (__v16sf)_mm512_setzero_ps(), \ |
||
2392 | (__mmask16)(A), (int)(R))) |
||
2393 | |||
2394 | #define _mm512_roundscale_round_ps(A, imm, R) \ |
||
2395 | ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \ |
||
2396 | (__v16sf)_mm512_undefined_ps(), \ |
||
2397 | (__mmask16)-1, (int)(R))) |
||
2398 | |||
2399 | #define _mm512_roundscale_pd(A, B) \ |
||
2400 | ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \ |
||
2401 | (__v8df)_mm512_undefined_pd(), \ |
||
2402 | (__mmask8)-1, \ |
||
2403 | _MM_FROUND_CUR_DIRECTION)) |
||
2404 | |||
2405 | #define _mm512_mask_roundscale_pd(A, B, C, imm) \ |
||
2406 | ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \ |
||
2407 | (__v8df)(__m512d)(A), (__mmask8)(B), \ |
||
2408 | _MM_FROUND_CUR_DIRECTION)) |
||
2409 | |||
2410 | #define _mm512_maskz_roundscale_pd(A, B, imm) \ |
||
2411 | ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \ |
||
2412 | (__v8df)_mm512_setzero_pd(), \ |
||
2413 | (__mmask8)(A), \ |
||
2414 | _MM_FROUND_CUR_DIRECTION)) |
||
2415 | |||
2416 | #define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) \ |
||
2417 | ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \ |
||
2418 | (__v8df)(__m512d)(A), (__mmask8)(B), \ |
||
2419 | (int)(R))) |
||
2420 | |||
2421 | #define _mm512_maskz_roundscale_round_pd(A, B, imm, R) \ |
||
2422 | ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \ |
||
2423 | (__v8df)_mm512_setzero_pd(), \ |
||
2424 | (__mmask8)(A), (int)(R))) |
||
2425 | |||
2426 | #define _mm512_roundscale_round_pd(A, imm, R) \ |
||
2427 | ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \ |
||
2428 | (__v8df)_mm512_undefined_pd(), \ |
||
2429 | (__mmask8)-1, (int)(R))) |
||
2430 | |||
2431 | #define _mm512_fmadd_round_pd(A, B, C, R) \ |
||
2432 | ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ |
||
2433 | (__v8df)(__m512d)(B), \ |
||
2434 | (__v8df)(__m512d)(C), \ |
||
2435 | (__mmask8)-1, (int)(R))) |
||
2436 | |||
2437 | |||
2438 | #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \ |
||
2439 | ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ |
||
2440 | (__v8df)(__m512d)(B), \ |
||
2441 | (__v8df)(__m512d)(C), \ |
||
2442 | (__mmask8)(U), (int)(R))) |
||
2443 | |||
2444 | |||
2445 | #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \ |
||
2446 | ((__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \ |
||
2447 | (__v8df)(__m512d)(B), \ |
||
2448 | (__v8df)(__m512d)(C), \ |
||
2449 | (__mmask8)(U), (int)(R))) |
||
2450 | |||
2451 | |||
2452 | #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \ |
||
2453 | ((__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \ |
||
2454 | (__v8df)(__m512d)(B), \ |
||
2455 | (__v8df)(__m512d)(C), \ |
||
2456 | (__mmask8)(U), (int)(R))) |
||
2457 | |||
2458 | |||
2459 | #define _mm512_fmsub_round_pd(A, B, C, R) \ |
||
2460 | ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ |
||
2461 | (__v8df)(__m512d)(B), \ |
||
2462 | -(__v8df)(__m512d)(C), \ |
||
2463 | (__mmask8)-1, (int)(R))) |
||
2464 | |||
2465 | |||
2466 | #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \ |
||
2467 | ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ |
||
2468 | (__v8df)(__m512d)(B), \ |
||
2469 | -(__v8df)(__m512d)(C), \ |
||
2470 | (__mmask8)(U), (int)(R))) |
||
2471 | |||
2472 | |||
2473 | #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \ |
||
2474 | ((__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \ |
||
2475 | (__v8df)(__m512d)(B), \ |
||
2476 | -(__v8df)(__m512d)(C), \ |
||
2477 | (__mmask8)(U), (int)(R))) |
||
2478 | |||
2479 | |||
2480 | #define _mm512_fnmadd_round_pd(A, B, C, R) \ |
||
2481 | ((__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \ |
||
2482 | (__v8df)(__m512d)(B), \ |
||
2483 | (__v8df)(__m512d)(C), \ |
||
2484 | (__mmask8)-1, (int)(R))) |
||
2485 | |||
2486 | |||
2487 | #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \ |
||
2488 | ((__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \ |
||
2489 | (__v8df)(__m512d)(B), \ |
||
2490 | (__v8df)(__m512d)(C), \ |
||
2491 | (__mmask8)(U), (int)(R))) |
||
2492 | |||
2493 | |||
2494 | #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \ |
||
2495 | ((__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \ |
||
2496 | (__v8df)(__m512d)(B), \ |
||
2497 | (__v8df)(__m512d)(C), \ |
||
2498 | (__mmask8)(U), (int)(R))) |
||
2499 | |||
2500 | |||
2501 | #define _mm512_fnmsub_round_pd(A, B, C, R) \ |
||
2502 | ((__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \ |
||
2503 | (__v8df)(__m512d)(B), \ |
||
2504 | -(__v8df)(__m512d)(C), \ |
||
2505 | (__mmask8)-1, (int)(R))) |
||
2506 | |||
2507 | |||
2508 | #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \ |
||
2509 | ((__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \ |
||
2510 | (__v8df)(__m512d)(B), \ |
||
2511 | -(__v8df)(__m512d)(C), \ |
||
2512 | (__mmask8)(U), (int)(R))) |
||
2513 | |||
2514 | |||
2515 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
2516 | _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C) |
||
2517 | { |
||
2518 | return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, |
||
2519 | (__v8df) __B, |
||
2520 | (__v8df) __C, |
||
2521 | (__mmask8) -1, |
||
2522 | _MM_FROUND_CUR_DIRECTION); |
||
2523 | } |
||
2524 | |||
2525 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
2526 | _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) |
||
2527 | { |
||
2528 | return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, |
||
2529 | (__v8df) __B, |
||
2530 | (__v8df) __C, |
||
2531 | (__mmask8) __U, |
||
2532 | _MM_FROUND_CUR_DIRECTION); |
||
2533 | } |
||
2534 | |||
2535 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
2536 | _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) |
||
2537 | { |
||
2538 | return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A, |
||
2539 | (__v8df) __B, |
||
2540 | (__v8df) __C, |
||
2541 | (__mmask8) __U, |
||
2542 | _MM_FROUND_CUR_DIRECTION); |
||
2543 | } |
||
2544 | |||
2545 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
2546 | _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) |
||
2547 | { |
||
2548 | return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, |
||
2549 | (__v8df) __B, |
||
2550 | (__v8df) __C, |
||
2551 | (__mmask8) __U, |
||
2552 | _MM_FROUND_CUR_DIRECTION); |
||
2553 | } |
||
2554 | |||
2555 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
2556 | _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C) |
||
2557 | { |
||
2558 | return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, |
||
2559 | (__v8df) __B, |
||
2560 | -(__v8df) __C, |
||
2561 | (__mmask8) -1, |
||
2562 | _MM_FROUND_CUR_DIRECTION); |
||
2563 | } |
||
2564 | |||
2565 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
2566 | _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) |
||
2567 | { |
||
2568 | return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, |
||
2569 | (__v8df) __B, |
||
2570 | -(__v8df) __C, |
||
2571 | (__mmask8) __U, |
||
2572 | _MM_FROUND_CUR_DIRECTION); |
||
2573 | } |
||
2574 | |||
2575 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
2576 | _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) |
||
2577 | { |
||
2578 | return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, |
||
2579 | (__v8df) __B, |
||
2580 | -(__v8df) __C, |
||
2581 | (__mmask8) __U, |
||
2582 | _MM_FROUND_CUR_DIRECTION); |
||
2583 | } |
||
2584 | |||
2585 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
2586 | _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C) |
||
2587 | { |
||
2588 | return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, |
||
2589 | -(__v8df) __B, |
||
2590 | (__v8df) __C, |
||
2591 | (__mmask8) -1, |
||
2592 | _MM_FROUND_CUR_DIRECTION); |
||
2593 | } |
||
2594 | |||
2595 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
2596 | _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) |
||
2597 | { |
||
2598 | return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A, |
||
2599 | (__v8df) __B, |
||
2600 | (__v8df) __C, |
||
2601 | (__mmask8) __U, |
||
2602 | _MM_FROUND_CUR_DIRECTION); |
||
2603 | } |
||
2604 | |||
2605 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
2606 | _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) |
||
2607 | { |
||
2608 | return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, |
||
2609 | (__v8df) __B, |
||
2610 | (__v8df) __C, |
||
2611 | (__mmask8) __U, |
||
2612 | _MM_FROUND_CUR_DIRECTION); |
||
2613 | } |
||
2614 | |||
2615 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
2616 | _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C) |
||
2617 | { |
||
2618 | return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, |
||
2619 | -(__v8df) __B, |
||
2620 | -(__v8df) __C, |
||
2621 | (__mmask8) -1, |
||
2622 | _MM_FROUND_CUR_DIRECTION); |
||
2623 | } |
||
2624 | |||
2625 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
2626 | _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) |
||
2627 | { |
||
2628 | return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, |
||
2629 | (__v8df) __B, |
||
2630 | -(__v8df) __C, |
||
2631 | (__mmask8) __U, |
||
2632 | _MM_FROUND_CUR_DIRECTION); |
||
2633 | } |
||
2634 | |||
2635 | #define _mm512_fmadd_round_ps(A, B, C, R) \ |
||
2636 | ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ |
||
2637 | (__v16sf)(__m512)(B), \ |
||
2638 | (__v16sf)(__m512)(C), \ |
||
2639 | (__mmask16)-1, (int)(R))) |
||
2640 | |||
2641 | |||
2642 | #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \ |
||
2643 | ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ |
||
2644 | (__v16sf)(__m512)(B), \ |
||
2645 | (__v16sf)(__m512)(C), \ |
||
2646 | (__mmask16)(U), (int)(R))) |
||
2647 | |||
2648 | |||
2649 | #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \ |
||
2650 | ((__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \ |
||
2651 | (__v16sf)(__m512)(B), \ |
||
2652 | (__v16sf)(__m512)(C), \ |
||
2653 | (__mmask16)(U), (int)(R))) |
||
2654 | |||
2655 | |||
2656 | #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \ |
||
2657 | ((__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \ |
||
2658 | (__v16sf)(__m512)(B), \ |
||
2659 | (__v16sf)(__m512)(C), \ |
||
2660 | (__mmask16)(U), (int)(R))) |
||
2661 | |||
2662 | |||
2663 | #define _mm512_fmsub_round_ps(A, B, C, R) \ |
||
2664 | ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ |
||
2665 | (__v16sf)(__m512)(B), \ |
||
2666 | -(__v16sf)(__m512)(C), \ |
||
2667 | (__mmask16)-1, (int)(R))) |
||
2668 | |||
2669 | |||
2670 | #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \ |
||
2671 | ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ |
||
2672 | (__v16sf)(__m512)(B), \ |
||
2673 | -(__v16sf)(__m512)(C), \ |
||
2674 | (__mmask16)(U), (int)(R))) |
||
2675 | |||
2676 | |||
2677 | #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \ |
||
2678 | ((__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \ |
||
2679 | (__v16sf)(__m512)(B), \ |
||
2680 | -(__v16sf)(__m512)(C), \ |
||
2681 | (__mmask16)(U), (int)(R))) |
||
2682 | |||
2683 | |||
2684 | #define _mm512_fnmadd_round_ps(A, B, C, R) \ |
||
2685 | ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ |
||
2686 | -(__v16sf)(__m512)(B), \ |
||
2687 | (__v16sf)(__m512)(C), \ |
||
2688 | (__mmask16)-1, (int)(R))) |
||
2689 | |||
2690 | |||
2691 | #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \ |
||
2692 | ((__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \ |
||
2693 | (__v16sf)(__m512)(B), \ |
||
2694 | (__v16sf)(__m512)(C), \ |
||
2695 | (__mmask16)(U), (int)(R))) |
||
2696 | |||
2697 | |||
2698 | #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \ |
||
2699 | ((__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \ |
||
2700 | (__v16sf)(__m512)(B), \ |
||
2701 | (__v16sf)(__m512)(C), \ |
||
2702 | (__mmask16)(U), (int)(R))) |
||
2703 | |||
2704 | |||
2705 | #define _mm512_fnmsub_round_ps(A, B, C, R) \ |
||
2706 | ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ |
||
2707 | -(__v16sf)(__m512)(B), \ |
||
2708 | -(__v16sf)(__m512)(C), \ |
||
2709 | (__mmask16)-1, (int)(R))) |
||
2710 | |||
2711 | |||
2712 | #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \ |
||
2713 | ((__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \ |
||
2714 | (__v16sf)(__m512)(B), \ |
||
2715 | -(__v16sf)(__m512)(C), \ |
||
2716 | (__mmask16)(U), (int)(R))) |
||
2717 | |||
2718 | |||
2719 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
2720 | _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C) |
||
2721 | { |
||
2722 | return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, |
||
2723 | (__v16sf) __B, |
||
2724 | (__v16sf) __C, |
||
2725 | (__mmask16) -1, |
||
2726 | _MM_FROUND_CUR_DIRECTION); |
||
2727 | } |
||
2728 | |||
2729 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
2730 | _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) |
||
2731 | { |
||
2732 | return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, |
||
2733 | (__v16sf) __B, |
||
2734 | (__v16sf) __C, |
||
2735 | (__mmask16) __U, |
||
2736 | _MM_FROUND_CUR_DIRECTION); |
||
2737 | } |
||
2738 | |||
2739 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
2740 | _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) |
||
2741 | { |
||
2742 | return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A, |
||
2743 | (__v16sf) __B, |
||
2744 | (__v16sf) __C, |
||
2745 | (__mmask16) __U, |
||
2746 | _MM_FROUND_CUR_DIRECTION); |
||
2747 | } |
||
2748 | |||
2749 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
2750 | _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) |
||
2751 | { |
||
2752 | return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, |
||
2753 | (__v16sf) __B, |
||
2754 | (__v16sf) __C, |
||
2755 | (__mmask16) __U, |
||
2756 | _MM_FROUND_CUR_DIRECTION); |
||
2757 | } |
||
2758 | |||
2759 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
2760 | _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C) |
||
2761 | { |
||
2762 | return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, |
||
2763 | (__v16sf) __B, |
||
2764 | -(__v16sf) __C, |
||
2765 | (__mmask16) -1, |
||
2766 | _MM_FROUND_CUR_DIRECTION); |
||
2767 | } |
||
2768 | |||
2769 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
2770 | _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) |
||
2771 | { |
||
2772 | return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, |
||
2773 | (__v16sf) __B, |
||
2774 | -(__v16sf) __C, |
||
2775 | (__mmask16) __U, |
||
2776 | _MM_FROUND_CUR_DIRECTION); |
||
2777 | } |
||
2778 | |||
2779 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
2780 | _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) |
||
2781 | { |
||
2782 | return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, |
||
2783 | (__v16sf) __B, |
||
2784 | -(__v16sf) __C, |
||
2785 | (__mmask16) __U, |
||
2786 | _MM_FROUND_CUR_DIRECTION); |
||
2787 | } |
||
2788 | |||
2789 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
2790 | _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C) |
||
2791 | { |
||
2792 | return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, |
||
2793 | -(__v16sf) __B, |
||
2794 | (__v16sf) __C, |
||
2795 | (__mmask16) -1, |
||
2796 | _MM_FROUND_CUR_DIRECTION); |
||
2797 | } |
||
2798 | |||
2799 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
2800 | _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) |
||
2801 | { |
||
2802 | return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A, |
||
2803 | (__v16sf) __B, |
||
2804 | (__v16sf) __C, |
||
2805 | (__mmask16) __U, |
||
2806 | _MM_FROUND_CUR_DIRECTION); |
||
2807 | } |
||
2808 | |||
2809 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
2810 | _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) |
||
2811 | { |
||
2812 | return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, |
||
2813 | (__v16sf) __B, |
||
2814 | (__v16sf) __C, |
||
2815 | (__mmask16) __U, |
||
2816 | _MM_FROUND_CUR_DIRECTION); |
||
2817 | } |
||
2818 | |||
2819 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
2820 | _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C) |
||
2821 | { |
||
2822 | return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, |
||
2823 | -(__v16sf) __B, |
||
2824 | -(__v16sf) __C, |
||
2825 | (__mmask16) -1, |
||
2826 | _MM_FROUND_CUR_DIRECTION); |
||
2827 | } |
||
2828 | |||
2829 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
2830 | _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) |
||
2831 | { |
||
2832 | return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, |
||
2833 | (__v16sf) __B, |
||
2834 | -(__v16sf) __C, |
||
2835 | (__mmask16) __U, |
||
2836 | _MM_FROUND_CUR_DIRECTION); |
||
2837 | } |
||
2838 | |||
2839 | #define _mm512_fmaddsub_round_pd(A, B, C, R) \ |
||
2840 | ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ |
||
2841 | (__v8df)(__m512d)(B), \ |
||
2842 | (__v8df)(__m512d)(C), \ |
||
2843 | (__mmask8)-1, (int)(R))) |
||
2844 | |||
2845 | |||
2846 | #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \ |
||
2847 | ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ |
||
2848 | (__v8df)(__m512d)(B), \ |
||
2849 | (__v8df)(__m512d)(C), \ |
||
2850 | (__mmask8)(U), (int)(R))) |
||
2851 | |||
2852 | |||
2853 | #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \ |
||
2854 | ((__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \ |
||
2855 | (__v8df)(__m512d)(B), \ |
||
2856 | (__v8df)(__m512d)(C), \ |
||
2857 | (__mmask8)(U), (int)(R))) |
||
2858 | |||
2859 | |||
2860 | #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \ |
||
2861 | ((__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \ |
||
2862 | (__v8df)(__m512d)(B), \ |
||
2863 | (__v8df)(__m512d)(C), \ |
||
2864 | (__mmask8)(U), (int)(R))) |
||
2865 | |||
2866 | |||
2867 | #define _mm512_fmsubadd_round_pd(A, B, C, R) \ |
||
2868 | ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ |
||
2869 | (__v8df)(__m512d)(B), \ |
||
2870 | -(__v8df)(__m512d)(C), \ |
||
2871 | (__mmask8)-1, (int)(R))) |
||
2872 | |||
2873 | |||
2874 | #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \ |
||
2875 | ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ |
||
2876 | (__v8df)(__m512d)(B), \ |
||
2877 | -(__v8df)(__m512d)(C), \ |
||
2878 | (__mmask8)(U), (int)(R))) |
||
2879 | |||
2880 | |||
2881 | #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \ |
||
2882 | ((__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \ |
||
2883 | (__v8df)(__m512d)(B), \ |
||
2884 | -(__v8df)(__m512d)(C), \ |
||
2885 | (__mmask8)(U), (int)(R))) |
||
2886 | |||
2887 | |||
2888 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
2889 | _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C) |
||
2890 | { |
||
2891 | return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, |
||
2892 | (__v8df) __B, |
||
2893 | (__v8df) __C, |
||
2894 | (__mmask8) -1, |
||
2895 | _MM_FROUND_CUR_DIRECTION); |
||
2896 | } |
||
2897 | |||
2898 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
2899 | _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) |
||
2900 | { |
||
2901 | return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, |
||
2902 | (__v8df) __B, |
||
2903 | (__v8df) __C, |
||
2904 | (__mmask8) __U, |
||
2905 | _MM_FROUND_CUR_DIRECTION); |
||
2906 | } |
||
2907 | |||
2908 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
2909 | _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) |
||
2910 | { |
||
2911 | return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A, |
||
2912 | (__v8df) __B, |
||
2913 | (__v8df) __C, |
||
2914 | (__mmask8) __U, |
||
2915 | _MM_FROUND_CUR_DIRECTION); |
||
2916 | } |
||
2917 | |||
2918 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
2919 | _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) |
||
2920 | { |
||
2921 | return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, |
||
2922 | (__v8df) __B, |
||
2923 | (__v8df) __C, |
||
2924 | (__mmask8) __U, |
||
2925 | _MM_FROUND_CUR_DIRECTION); |
||
2926 | } |
||
2927 | |||
2928 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
2929 | _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C) |
||
2930 | { |
||
2931 | return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, |
||
2932 | (__v8df) __B, |
||
2933 | -(__v8df) __C, |
||
2934 | (__mmask8) -1, |
||
2935 | _MM_FROUND_CUR_DIRECTION); |
||
2936 | } |
||
2937 | |||
2938 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
2939 | _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) |
||
2940 | { |
||
2941 | return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, |
||
2942 | (__v8df) __B, |
||
2943 | -(__v8df) __C, |
||
2944 | (__mmask8) __U, |
||
2945 | _MM_FROUND_CUR_DIRECTION); |
||
2946 | } |
||
2947 | |||
2948 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
2949 | _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) |
||
2950 | { |
||
2951 | return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, |
||
2952 | (__v8df) __B, |
||
2953 | -(__v8df) __C, |
||
2954 | (__mmask8) __U, |
||
2955 | _MM_FROUND_CUR_DIRECTION); |
||
2956 | } |
||
2957 | |||
2958 | #define _mm512_fmaddsub_round_ps(A, B, C, R) \ |
||
2959 | ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ |
||
2960 | (__v16sf)(__m512)(B), \ |
||
2961 | (__v16sf)(__m512)(C), \ |
||
2962 | (__mmask16)-1, (int)(R))) |
||
2963 | |||
2964 | |||
2965 | #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \ |
||
2966 | ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ |
||
2967 | (__v16sf)(__m512)(B), \ |
||
2968 | (__v16sf)(__m512)(C), \ |
||
2969 | (__mmask16)(U), (int)(R))) |
||
2970 | |||
2971 | |||
2972 | #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \ |
||
2973 | ((__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \ |
||
2974 | (__v16sf)(__m512)(B), \ |
||
2975 | (__v16sf)(__m512)(C), \ |
||
2976 | (__mmask16)(U), (int)(R))) |
||
2977 | |||
2978 | |||
2979 | #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \ |
||
2980 | ((__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \ |
||
2981 | (__v16sf)(__m512)(B), \ |
||
2982 | (__v16sf)(__m512)(C), \ |
||
2983 | (__mmask16)(U), (int)(R))) |
||
2984 | |||
2985 | |||
2986 | #define _mm512_fmsubadd_round_ps(A, B, C, R) \ |
||
2987 | ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ |
||
2988 | (__v16sf)(__m512)(B), \ |
||
2989 | -(__v16sf)(__m512)(C), \ |
||
2990 | (__mmask16)-1, (int)(R))) |
||
2991 | |||
2992 | |||
2993 | #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \ |
||
2994 | ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ |
||
2995 | (__v16sf)(__m512)(B), \ |
||
2996 | -(__v16sf)(__m512)(C), \ |
||
2997 | (__mmask16)(U), (int)(R))) |
||
2998 | |||
2999 | |||
3000 | #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \ |
||
3001 | ((__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \ |
||
3002 | (__v16sf)(__m512)(B), \ |
||
3003 | -(__v16sf)(__m512)(C), \ |
||
3004 | (__mmask16)(U), (int)(R))) |
||
3005 | |||
3006 | |||
3007 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
3008 | _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C) |
||
3009 | { |
||
3010 | return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, |
||
3011 | (__v16sf) __B, |
||
3012 | (__v16sf) __C, |
||
3013 | (__mmask16) -1, |
||
3014 | _MM_FROUND_CUR_DIRECTION); |
||
3015 | } |
||
3016 | |||
3017 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
3018 | _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) |
||
3019 | { |
||
3020 | return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, |
||
3021 | (__v16sf) __B, |
||
3022 | (__v16sf) __C, |
||
3023 | (__mmask16) __U, |
||
3024 | _MM_FROUND_CUR_DIRECTION); |
||
3025 | } |
||
3026 | |||
3027 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
3028 | _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) |
||
3029 | { |
||
3030 | return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A, |
||
3031 | (__v16sf) __B, |
||
3032 | (__v16sf) __C, |
||
3033 | (__mmask16) __U, |
||
3034 | _MM_FROUND_CUR_DIRECTION); |
||
3035 | } |
||
3036 | |||
3037 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
3038 | _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) |
||
3039 | { |
||
3040 | return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, |
||
3041 | (__v16sf) __B, |
||
3042 | (__v16sf) __C, |
||
3043 | (__mmask16) __U, |
||
3044 | _MM_FROUND_CUR_DIRECTION); |
||
3045 | } |
||
3046 | |||
3047 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
3048 | _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C) |
||
3049 | { |
||
3050 | return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, |
||
3051 | (__v16sf) __B, |
||
3052 | -(__v16sf) __C, |
||
3053 | (__mmask16) -1, |
||
3054 | _MM_FROUND_CUR_DIRECTION); |
||
3055 | } |
||
3056 | |||
3057 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
3058 | _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) |
||
3059 | { |
||
3060 | return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, |
||
3061 | (__v16sf) __B, |
||
3062 | -(__v16sf) __C, |
||
3063 | (__mmask16) __U, |
||
3064 | _MM_FROUND_CUR_DIRECTION); |
||
3065 | } |
||
3066 | |||
3067 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
3068 | _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) |
||
3069 | { |
||
3070 | return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, |
||
3071 | (__v16sf) __B, |
||
3072 | -(__v16sf) __C, |
||
3073 | (__mmask16) __U, |
||
3074 | _MM_FROUND_CUR_DIRECTION); |
||
3075 | } |
||
3076 | |||
3077 | #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \ |
||
3078 | ((__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \ |
||
3079 | (__v8df)(__m512d)(B), \ |
||
3080 | (__v8df)(__m512d)(C), \ |
||
3081 | (__mmask8)(U), (int)(R))) |
||
3082 | |||
3083 | |||
3084 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
3085 | _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) |
||
3086 | { |
||
3087 | return (__m512d)__builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A, |
||
3088 | (__v8df) __B, |
||
3089 | (__v8df) __C, |
||
3090 | (__mmask8) __U, |
||
3091 | _MM_FROUND_CUR_DIRECTION); |
||
3092 | } |
||
3093 | |||
3094 | #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \ |
||
3095 | ((__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \ |
||
3096 | (__v16sf)(__m512)(B), \ |
||
3097 | (__v16sf)(__m512)(C), \ |
||
3098 | (__mmask16)(U), (int)(R))) |
||
3099 | |||
3100 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
3101 | _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) |
||
3102 | { |
||
3103 | return (__m512)__builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A, |
||
3104 | (__v16sf) __B, |
||
3105 | (__v16sf) __C, |
||
3106 | (__mmask16) __U, |
||
3107 | _MM_FROUND_CUR_DIRECTION); |
||
3108 | } |
||
3109 | |||
3110 | #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \ |
||
3111 | ((__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \ |
||
3112 | (__v8df)(__m512d)(B), \ |
||
3113 | (__v8df)(__m512d)(C), \ |
||
3114 | (__mmask8)(U), (int)(R))) |
||
3115 | |||
3116 | |||
3117 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
3118 | _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) |
||
3119 | { |
||
3120 | return (__m512d)__builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A, |
||
3121 | (__v8df) __B, |
||
3122 | (__v8df) __C, |
||
3123 | (__mmask8) __U, |
||
3124 | _MM_FROUND_CUR_DIRECTION); |
||
3125 | } |
||
3126 | |||
3127 | #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \ |
||
3128 | ((__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \ |
||
3129 | (__v16sf)(__m512)(B), \ |
||
3130 | (__v16sf)(__m512)(C), \ |
||
3131 | (__mmask16)(U), (int)(R))) |
||
3132 | |||
3133 | |||
3134 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
3135 | _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) |
||
3136 | { |
||
3137 | return (__m512)__builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A, |
||
3138 | (__v16sf) __B, |
||
3139 | (__v16sf) __C, |
||
3140 | (__mmask16) __U, |
||
3141 | _MM_FROUND_CUR_DIRECTION); |
||
3142 | } |
||
3143 | |||
3144 | #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \ |
||
3145 | ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ |
||
3146 | -(__v8df)(__m512d)(B), \ |
||
3147 | (__v8df)(__m512d)(C), \ |
||
3148 | (__mmask8)(U), (int)(R))) |
||
3149 | |||
3150 | |||
3151 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
3152 | _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) |
||
3153 | { |
||
3154 | return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, |
||
3155 | -(__v8df) __B, |
||
3156 | (__v8df) __C, |
||
3157 | (__mmask8) __U, |
||
3158 | _MM_FROUND_CUR_DIRECTION); |
||
3159 | } |
||
3160 | |||
3161 | #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \ |
||
3162 | ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ |
||
3163 | -(__v16sf)(__m512)(B), \ |
||
3164 | (__v16sf)(__m512)(C), \ |
||
3165 | (__mmask16)(U), (int)(R))) |
||
3166 | |||
3167 | |||
3168 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
3169 | _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) |
||
3170 | { |
||
3171 | return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, |
||
3172 | -(__v16sf) __B, |
||
3173 | (__v16sf) __C, |
||
3174 | (__mmask16) __U, |
||
3175 | _MM_FROUND_CUR_DIRECTION); |
||
3176 | } |
||
3177 | |||
3178 | #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \ |
||
3179 | ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ |
||
3180 | -(__v8df)(__m512d)(B), \ |
||
3181 | -(__v8df)(__m512d)(C), \ |
||
3182 | (__mmask8)(U), (int)(R))) |
||
3183 | |||
3184 | |||
3185 | #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \ |
||
3186 | ((__m512d)__builtin_ia32_vfmsubpd512_mask3(-(__v8df)(__m512d)(A), \ |
||
3187 | (__v8df)(__m512d)(B), \ |
||
3188 | (__v8df)(__m512d)(C), \ |
||
3189 | (__mmask8)(U), (int)(R))) |
||
3190 | |||
3191 | |||
3192 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
3193 | _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) |
||
3194 | { |
||
3195 | return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, |
||
3196 | -(__v8df) __B, |
||
3197 | -(__v8df) __C, |
||
3198 | (__mmask8) __U, |
||
3199 | _MM_FROUND_CUR_DIRECTION); |
||
3200 | } |
||
3201 | |||
3202 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
3203 | _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) |
||
3204 | { |
||
3205 | return (__m512d) __builtin_ia32_vfmsubpd512_mask3 (-(__v8df) __A, |
||
3206 | (__v8df) __B, |
||
3207 | (__v8df) __C, |
||
3208 | (__mmask8) __U, |
||
3209 | _MM_FROUND_CUR_DIRECTION); |
||
3210 | } |
||
3211 | |||
3212 | #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \ |
||
3213 | ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ |
||
3214 | -(__v16sf)(__m512)(B), \ |
||
3215 | -(__v16sf)(__m512)(C), \ |
||
3216 | (__mmask16)(U), (int)(R))) |
||
3217 | |||
3218 | |||
3219 | #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \ |
||
3220 | ((__m512)__builtin_ia32_vfmsubps512_mask3(-(__v16sf)(__m512)(A), \ |
||
3221 | (__v16sf)(__m512)(B), \ |
||
3222 | (__v16sf)(__m512)(C), \ |
||
3223 | (__mmask16)(U), (int)(R))) |
||
3224 | |||
3225 | |||
3226 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
3227 | _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) |
||
3228 | { |
||
3229 | return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, |
||
3230 | -(__v16sf) __B, |
||
3231 | -(__v16sf) __C, |
||
3232 | (__mmask16) __U, |
||
3233 | _MM_FROUND_CUR_DIRECTION); |
||
3234 | } |
||
3235 | |||
3236 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
3237 | _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) |
||
3238 | { |
||
3239 | return (__m512) __builtin_ia32_vfmsubps512_mask3 (-(__v16sf) __A, |
||
3240 | (__v16sf) __B, |
||
3241 | (__v16sf) __C, |
||
3242 | (__mmask16) __U, |
||
3243 | _MM_FROUND_CUR_DIRECTION); |
||
3244 | } |
||
3245 | |||
3246 | |||
3247 | |||
3248 | /* Vector permutations */ |
||
3249 | |||
3250 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
3251 | _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B) |
||
3252 | { |
||
3253 | return (__m512i)__builtin_ia32_vpermi2vard512((__v16si)__A, (__v16si) __I, |
||
3254 | (__v16si) __B); |
||
3255 | } |
||
3256 | |||
3257 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
3258 | _mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I, |
||
3259 | __m512i __B) |
||
3260 | { |
||
3261 | return (__m512i)__builtin_ia32_selectd_512(__U, |
||
3262 | (__v16si)_mm512_permutex2var_epi32(__A, __I, __B), |
||
3263 | (__v16si)__A); |
||
3264 | } |
||
3265 | |||
3266 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
3267 | _mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U, |
||
3268 | __m512i __B) |
||
3269 | { |
||
3270 | return (__m512i)__builtin_ia32_selectd_512(__U, |
||
3271 | (__v16si)_mm512_permutex2var_epi32(__A, __I, __B), |
||
3272 | (__v16si)__I); |
||
3273 | } |
||
3274 | |||
3275 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
3276 | _mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I, |
||
3277 | __m512i __B) |
||
3278 | { |
||
3279 | return (__m512i)__builtin_ia32_selectd_512(__U, |
||
3280 | (__v16si)_mm512_permutex2var_epi32(__A, __I, __B), |
||
3281 | (__v16si)_mm512_setzero_si512()); |
||
3282 | } |
||
3283 | |||
3284 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
3285 | _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B) |
||
3286 | { |
||
3287 | return (__m512i)__builtin_ia32_vpermi2varq512((__v8di)__A, (__v8di) __I, |
||
3288 | (__v8di) __B); |
||
3289 | } |
||
3290 | |||
3291 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
3292 | _mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I, |
||
3293 | __m512i __B) |
||
3294 | { |
||
3295 | return (__m512i)__builtin_ia32_selectq_512(__U, |
||
3296 | (__v8di)_mm512_permutex2var_epi64(__A, __I, __B), |
||
3297 | (__v8di)__A); |
||
3298 | } |
||
3299 | |||
3300 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
3301 | _mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U, |
||
3302 | __m512i __B) |
||
3303 | { |
||
3304 | return (__m512i)__builtin_ia32_selectq_512(__U, |
||
3305 | (__v8di)_mm512_permutex2var_epi64(__A, __I, __B), |
||
3306 | (__v8di)__I); |
||
3307 | } |
||
3308 | |||
3309 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
3310 | _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, |
||
3311 | __m512i __B) |
||
3312 | { |
||
3313 | return (__m512i)__builtin_ia32_selectq_512(__U, |
||
3314 | (__v8di)_mm512_permutex2var_epi64(__A, __I, __B), |
||
3315 | (__v8di)_mm512_setzero_si512()); |
||
3316 | } |
||
3317 | |||
3318 | #define _mm512_alignr_epi64(A, B, I) \ |
||
3319 | ((__m512i)__builtin_ia32_alignq512((__v8di)(__m512i)(A), \ |
||
3320 | (__v8di)(__m512i)(B), (int)(I))) |
||
3321 | |||
3322 | #define _mm512_mask_alignr_epi64(W, U, A, B, imm) \ |
||
3323 | ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ |
||
3324 | (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \ |
||
3325 | (__v8di)(__m512i)(W))) |
||
3326 | |||
3327 | #define _mm512_maskz_alignr_epi64(U, A, B, imm) \ |
||
3328 | ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ |
||
3329 | (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \ |
||
3330 | (__v8di)_mm512_setzero_si512())) |
||
3331 | |||
3332 | #define _mm512_alignr_epi32(A, B, I) \ |
||
3333 | ((__m512i)__builtin_ia32_alignd512((__v16si)(__m512i)(A), \ |
||
3334 | (__v16si)(__m512i)(B), (int)(I))) |
||
3335 | |||
3336 | #define _mm512_mask_alignr_epi32(W, U, A, B, imm) \ |
||
3337 | ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ |
||
3338 | (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \ |
||
3339 | (__v16si)(__m512i)(W))) |
||
3340 | |||
3341 | #define _mm512_maskz_alignr_epi32(U, A, B, imm) \ |
||
3342 | ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ |
||
3343 | (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \ |
||
3344 | (__v16si)_mm512_setzero_si512())) |
||
3345 | /* Vector Extract */ |
||
3346 | |||
3347 | #define _mm512_extractf64x4_pd(A, I) \ |
||
3348 | ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \ |
||
3349 | (__v4df)_mm256_undefined_pd(), \ |
||
3350 | (__mmask8)-1)) |
||
3351 | |||
3352 | #define _mm512_mask_extractf64x4_pd(W, U, A, imm) \ |
||
3353 | ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \ |
||
3354 | (__v4df)(__m256d)(W), \ |
||
3355 | (__mmask8)(U))) |
||
3356 | |||
3357 | #define _mm512_maskz_extractf64x4_pd(U, A, imm) \ |
||
3358 | ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \ |
||
3359 | (__v4df)_mm256_setzero_pd(), \ |
||
3360 | (__mmask8)(U))) |
||
3361 | |||
3362 | #define _mm512_extractf32x4_ps(A, I) \ |
||
3363 | ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \ |
||
3364 | (__v4sf)_mm_undefined_ps(), \ |
||
3365 | (__mmask8)-1)) |
||
3366 | |||
3367 | #define _mm512_mask_extractf32x4_ps(W, U, A, imm) \ |
||
3368 | ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \ |
||
3369 | (__v4sf)(__m128)(W), \ |
||
3370 | (__mmask8)(U))) |
||
3371 | |||
3372 | #define _mm512_maskz_extractf32x4_ps(U, A, imm) \ |
||
3373 | ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \ |
||
3374 | (__v4sf)_mm_setzero_ps(), \ |
||
3375 | (__mmask8)(U))) |
||
3376 | |||
3377 | /* Vector Blend */ |
||
3378 | |||
3379 | static __inline __m512d __DEFAULT_FN_ATTRS512 |
||
3380 | _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W) |
||
3381 | { |
||
3382 | return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U, |
||
3383 | (__v8df) __W, |
||
3384 | (__v8df) __A); |
||
3385 | } |
||
3386 | |||
3387 | static __inline __m512 __DEFAULT_FN_ATTRS512 |
||
3388 | _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W) |
||
3389 | { |
||
3390 | return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U, |
||
3391 | (__v16sf) __W, |
||
3392 | (__v16sf) __A); |
||
3393 | } |
||
3394 | |||
3395 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
3396 | _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W) |
||
3397 | { |
||
3398 | return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U, |
||
3399 | (__v8di) __W, |
||
3400 | (__v8di) __A); |
||
3401 | } |
||
3402 | |||
3403 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
3404 | _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) |
||
3405 | { |
||
3406 | return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U, |
||
3407 | (__v16si) __W, |
||
3408 | (__v16si) __A); |
||
3409 | } |
||
3410 | |||
3411 | /* Compare */ |
||
3412 | |||
3413 | #define _mm512_cmp_round_ps_mask(A, B, P, R) \ |
||
3414 | ((__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \ |
||
3415 | (__v16sf)(__m512)(B), (int)(P), \ |
||
3416 | (__mmask16)-1, (int)(R))) |
||
3417 | |||
3418 | #define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) \ |
||
3419 | ((__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \ |
||
3420 | (__v16sf)(__m512)(B), (int)(P), \ |
||
3421 | (__mmask16)(U), (int)(R))) |
||
3422 | |||
3423 | #define _mm512_cmp_ps_mask(A, B, P) \ |
||
3424 | _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) |
||
3425 | #define _mm512_mask_cmp_ps_mask(U, A, B, P) \ |
||
3426 | _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION) |
||
3427 | |||
3428 | #define _mm512_cmpeq_ps_mask(A, B) \ |
||
3429 | _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ) |
||
3430 | #define _mm512_mask_cmpeq_ps_mask(k, A, B) \ |
||
3431 | _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ) |
||
3432 | |||
3433 | #define _mm512_cmplt_ps_mask(A, B) \ |
||
3434 | _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS) |
||
3435 | #define _mm512_mask_cmplt_ps_mask(k, A, B) \ |
||
3436 | _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS) |
||
3437 | |||
3438 | #define _mm512_cmple_ps_mask(A, B) \ |
||
3439 | _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS) |
||
3440 | #define _mm512_mask_cmple_ps_mask(k, A, B) \ |
||
3441 | _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS) |
||
3442 | |||
3443 | #define _mm512_cmpunord_ps_mask(A, B) \ |
||
3444 | _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q) |
||
3445 | #define _mm512_mask_cmpunord_ps_mask(k, A, B) \ |
||
3446 | _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q) |
||
3447 | |||
3448 | #define _mm512_cmpneq_ps_mask(A, B) \ |
||
3449 | _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ) |
||
3450 | #define _mm512_mask_cmpneq_ps_mask(k, A, B) \ |
||
3451 | _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ) |
||
3452 | |||
3453 | #define _mm512_cmpnlt_ps_mask(A, B) \ |
||
3454 | _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US) |
||
3455 | #define _mm512_mask_cmpnlt_ps_mask(k, A, B) \ |
||
3456 | _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US) |
||
3457 | |||
3458 | #define _mm512_cmpnle_ps_mask(A, B) \ |
||
3459 | _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US) |
||
3460 | #define _mm512_mask_cmpnle_ps_mask(k, A, B) \ |
||
3461 | _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US) |
||
3462 | |||
3463 | #define _mm512_cmpord_ps_mask(A, B) \ |
||
3464 | _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q) |
||
3465 | #define _mm512_mask_cmpord_ps_mask(k, A, B) \ |
||
3466 | _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q) |
||
3467 | |||
3468 | #define _mm512_cmp_round_pd_mask(A, B, P, R) \ |
||
3469 | ((__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \ |
||
3470 | (__v8df)(__m512d)(B), (int)(P), \ |
||
3471 | (__mmask8)-1, (int)(R))) |
||
3472 | |||
3473 | #define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) \ |
||
3474 | ((__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \ |
||
3475 | (__v8df)(__m512d)(B), (int)(P), \ |
||
3476 | (__mmask8)(U), (int)(R))) |
||
3477 | |||
3478 | #define _mm512_cmp_pd_mask(A, B, P) \ |
||
3479 | _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) |
||
3480 | #define _mm512_mask_cmp_pd_mask(U, A, B, P) \ |
||
3481 | _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION) |
||
3482 | |||
3483 | #define _mm512_cmpeq_pd_mask(A, B) \ |
||
3484 | _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ) |
||
3485 | #define _mm512_mask_cmpeq_pd_mask(k, A, B) \ |
||
3486 | _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ) |
||
3487 | |||
3488 | #define _mm512_cmplt_pd_mask(A, B) \ |
||
3489 | _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS) |
||
3490 | #define _mm512_mask_cmplt_pd_mask(k, A, B) \ |
||
3491 | _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS) |
||
3492 | |||
3493 | #define _mm512_cmple_pd_mask(A, B) \ |
||
3494 | _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS) |
||
3495 | #define _mm512_mask_cmple_pd_mask(k, A, B) \ |
||
3496 | _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS) |
||
3497 | |||
3498 | #define _mm512_cmpunord_pd_mask(A, B) \ |
||
3499 | _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q) |
||
3500 | #define _mm512_mask_cmpunord_pd_mask(k, A, B) \ |
||
3501 | _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q) |
||
3502 | |||
3503 | #define _mm512_cmpneq_pd_mask(A, B) \ |
||
3504 | _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ) |
||
3505 | #define _mm512_mask_cmpneq_pd_mask(k, A, B) \ |
||
3506 | _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ) |
||
3507 | |||
3508 | #define _mm512_cmpnlt_pd_mask(A, B) \ |
||
3509 | _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US) |
||
3510 | #define _mm512_mask_cmpnlt_pd_mask(k, A, B) \ |
||
3511 | _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US) |
||
3512 | |||
3513 | #define _mm512_cmpnle_pd_mask(A, B) \ |
||
3514 | _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US) |
||
3515 | #define _mm512_mask_cmpnle_pd_mask(k, A, B) \ |
||
3516 | _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US) |
||
3517 | |||
3518 | #define _mm512_cmpord_pd_mask(A, B) \ |
||
3519 | _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q) |
||
3520 | #define _mm512_mask_cmpord_pd_mask(k, A, B) \ |
||
3521 | _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q) |
||
3522 | |||
3523 | /* Conversion */ |
||
3524 | |||
3525 | #define _mm512_cvtt_roundps_epu32(A, R) \ |
||
3526 | ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \ |
||
3527 | (__v16si)_mm512_undefined_epi32(), \ |
||
3528 | (__mmask16)-1, (int)(R))) |
||
3529 | |||
3530 | #define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) \ |
||
3531 | ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \ |
||
3532 | (__v16si)(__m512i)(W), \ |
||
3533 | (__mmask16)(U), (int)(R))) |
||
3534 | |||
3535 | #define _mm512_maskz_cvtt_roundps_epu32(U, A, R) \ |
||
3536 | ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \ |
||
3537 | (__v16si)_mm512_setzero_si512(), \ |
||
3538 | (__mmask16)(U), (int)(R))) |
||
3539 | |||
3540 | |||
3541 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
3542 | _mm512_cvttps_epu32(__m512 __A) |
||
3543 | { |
||
3544 | return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, |
||
3545 | (__v16si) |
||
3546 | _mm512_setzero_si512 (), |
||
3547 | (__mmask16) -1, |
||
3548 | _MM_FROUND_CUR_DIRECTION); |
||
3549 | } |
||
3550 | |||
3551 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
3552 | _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A) |
||
3553 | { |
||
3554 | return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, |
||
3555 | (__v16si) __W, |
||
3556 | (__mmask16) __U, |
||
3557 | _MM_FROUND_CUR_DIRECTION); |
||
3558 | } |
||
3559 | |||
3560 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
3561 | _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A) |
||
3562 | { |
||
3563 | return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, |
||
3564 | (__v16si) _mm512_setzero_si512 (), |
||
3565 | (__mmask16) __U, |
||
3566 | _MM_FROUND_CUR_DIRECTION); |
||
3567 | } |
||
3568 | |||
3569 | #define _mm512_cvt_roundepi32_ps(A, R) \ |
||
3570 | ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \ |
||
3571 | (__v16sf)_mm512_setzero_ps(), \ |
||
3572 | (__mmask16)-1, (int)(R))) |
||
3573 | |||
3574 | #define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) \ |
||
3575 | ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \ |
||
3576 | (__v16sf)(__m512)(W), \ |
||
3577 | (__mmask16)(U), (int)(R))) |
||
3578 | |||
3579 | #define _mm512_maskz_cvt_roundepi32_ps(U, A, R) \ |
||
3580 | ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \ |
||
3581 | (__v16sf)_mm512_setzero_ps(), \ |
||
3582 | (__mmask16)(U), (int)(R))) |
||
3583 | |||
3584 | #define _mm512_cvt_roundepu32_ps(A, R) \ |
||
3585 | ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \ |
||
3586 | (__v16sf)_mm512_setzero_ps(), \ |
||
3587 | (__mmask16)-1, (int)(R))) |
||
3588 | |||
3589 | #define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) \ |
||
3590 | ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \ |
||
3591 | (__v16sf)(__m512)(W), \ |
||
3592 | (__mmask16)(U), (int)(R))) |
||
3593 | |||
3594 | #define _mm512_maskz_cvt_roundepu32_ps(U, A, R) \ |
||
3595 | ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \ |
||
3596 | (__v16sf)_mm512_setzero_ps(), \ |
||
3597 | (__mmask16)(U), (int)(R))) |
||
3598 | |||
3599 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
3600 | _mm512_cvtepu32_ps (__m512i __A) |
||
3601 | { |
||
3602 | return (__m512)__builtin_convertvector((__v16su)__A, __v16sf); |
||
3603 | } |
||
3604 | |||
3605 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
3606 | _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A) |
||
3607 | { |
||
3608 | return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, |
||
3609 | (__v16sf)_mm512_cvtepu32_ps(__A), |
||
3610 | (__v16sf)__W); |
||
3611 | } |
||
3612 | |||
3613 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
3614 | _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A) |
||
3615 | { |
||
3616 | return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, |
||
3617 | (__v16sf)_mm512_cvtepu32_ps(__A), |
||
3618 | (__v16sf)_mm512_setzero_ps()); |
||
3619 | } |
||
3620 | |||
3621 | static __inline __m512d __DEFAULT_FN_ATTRS512 |
||
3622 | _mm512_cvtepi32_pd(__m256i __A) |
||
3623 | { |
||
3624 | return (__m512d)__builtin_convertvector((__v8si)__A, __v8df); |
||
3625 | } |
||
3626 | |||
3627 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
3628 | _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A) |
||
3629 | { |
||
3630 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, |
||
3631 | (__v8df)_mm512_cvtepi32_pd(__A), |
||
3632 | (__v8df)__W); |
||
3633 | } |
||
3634 | |||
3635 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
3636 | _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A) |
||
3637 | { |
||
3638 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, |
||
3639 | (__v8df)_mm512_cvtepi32_pd(__A), |
||
3640 | (__v8df)_mm512_setzero_pd()); |
||
3641 | } |
||
3642 | |||
3643 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
3644 | _mm512_cvtepi32lo_pd(__m512i __A) |
||
3645 | { |
||
3646 | return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A)); |
||
3647 | } |
||
3648 | |||
3649 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
3650 | _mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U,__m512i __A) |
||
3651 | { |
||
3652 | return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A)); |
||
3653 | } |
||
3654 | |||
3655 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
3656 | _mm512_cvtepi32_ps (__m512i __A) |
||
3657 | { |
||
3658 | return (__m512)__builtin_convertvector((__v16si)__A, __v16sf); |
||
3659 | } |
||
3660 | |||
3661 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
3662 | _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A) |
||
3663 | { |
||
3664 | return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, |
||
3665 | (__v16sf)_mm512_cvtepi32_ps(__A), |
||
3666 | (__v16sf)__W); |
||
3667 | } |
||
3668 | |||
3669 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
3670 | _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A) |
||
3671 | { |
||
3672 | return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, |
||
3673 | (__v16sf)_mm512_cvtepi32_ps(__A), |
||
3674 | (__v16sf)_mm512_setzero_ps()); |
||
3675 | } |
||
3676 | |||
3677 | static __inline __m512d __DEFAULT_FN_ATTRS512 |
||
3678 | _mm512_cvtepu32_pd(__m256i __A) |
||
3679 | { |
||
3680 | return (__m512d)__builtin_convertvector((__v8su)__A, __v8df); |
||
3681 | } |
||
3682 | |||
3683 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
3684 | _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A) |
||
3685 | { |
||
3686 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, |
||
3687 | (__v8df)_mm512_cvtepu32_pd(__A), |
||
3688 | (__v8df)__W); |
||
3689 | } |
||
3690 | |||
3691 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
3692 | _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A) |
||
3693 | { |
||
3694 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, |
||
3695 | (__v8df)_mm512_cvtepu32_pd(__A), |
||
3696 | (__v8df)_mm512_setzero_pd()); |
||
3697 | } |
||
3698 | |||
3699 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
3700 | _mm512_cvtepu32lo_pd(__m512i __A) |
||
3701 | { |
||
3702 | return (__m512d) _mm512_cvtepu32_pd(_mm512_castsi512_si256(__A)); |
||
3703 | } |
||
3704 | |||
3705 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
3706 | _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U,__m512i __A) |
||
3707 | { |
||
3708 | return (__m512d) _mm512_mask_cvtepu32_pd(__W, __U, _mm512_castsi512_si256(__A)); |
||
3709 | } |
||
3710 | |||
3711 | #define _mm512_cvt_roundpd_ps(A, R) \ |
||
3712 | ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ |
||
3713 | (__v8sf)_mm256_setzero_ps(), \ |
||
3714 | (__mmask8)-1, (int)(R))) |
||
3715 | |||
3716 | #define _mm512_mask_cvt_roundpd_ps(W, U, A, R) \ |
||
3717 | ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ |
||
3718 | (__v8sf)(__m256)(W), (__mmask8)(U), \ |
||
3719 | (int)(R))) |
||
3720 | |||
3721 | #define _mm512_maskz_cvt_roundpd_ps(U, A, R) \ |
||
3722 | ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ |
||
3723 | (__v8sf)_mm256_setzero_ps(), \ |
||
3724 | (__mmask8)(U), (int)(R))) |
||
3725 | |||
3726 | static __inline__ __m256 __DEFAULT_FN_ATTRS512 |
||
3727 | _mm512_cvtpd_ps (__m512d __A) |
||
3728 | { |
||
3729 | return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, |
||
3730 | (__v8sf) _mm256_undefined_ps (), |
||
3731 | (__mmask8) -1, |
||
3732 | _MM_FROUND_CUR_DIRECTION); |
||
3733 | } |
||
3734 | |||
3735 | static __inline__ __m256 __DEFAULT_FN_ATTRS512 |
||
3736 | _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A) |
||
3737 | { |
||
3738 | return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, |
||
3739 | (__v8sf) __W, |
||
3740 | (__mmask8) __U, |
||
3741 | _MM_FROUND_CUR_DIRECTION); |
||
3742 | } |
||
3743 | |||
3744 | static __inline__ __m256 __DEFAULT_FN_ATTRS512 |
||
3745 | _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A) |
||
3746 | { |
||
3747 | return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, |
||
3748 | (__v8sf) _mm256_setzero_ps (), |
||
3749 | (__mmask8) __U, |
||
3750 | _MM_FROUND_CUR_DIRECTION); |
||
3751 | } |
||
3752 | |||
3753 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
3754 | _mm512_cvtpd_pslo (__m512d __A) |
||
3755 | { |
||
3756 | return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A), |
||
3757 | (__v8sf) _mm256_setzero_ps (), |
||
3758 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
||
3759 | } |
||
3760 | |||
3761 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
3762 | _mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A) |
||
3763 | { |
||
3764 | return (__m512) __builtin_shufflevector ( |
||
3765 | (__v8sf) _mm512_mask_cvtpd_ps (_mm512_castps512_ps256(__W), |
||
3766 | __U, __A), |
||
3767 | (__v8sf) _mm256_setzero_ps (), |
||
3768 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
||
3769 | } |
||
3770 | |||
3771 | #define _mm512_cvt_roundps_ph(A, I) \ |
||
3772 | ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ |
||
3773 | (__v16hi)_mm256_undefined_si256(), \ |
||
3774 | (__mmask16)-1)) |
||
3775 | |||
3776 | #define _mm512_mask_cvt_roundps_ph(U, W, A, I) \ |
||
3777 | ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ |
||
3778 | (__v16hi)(__m256i)(U), \ |
||
3779 | (__mmask16)(W))) |
||
3780 | |||
3781 | #define _mm512_maskz_cvt_roundps_ph(W, A, I) \ |
||
3782 | ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ |
||
3783 | (__v16hi)_mm256_setzero_si256(), \ |
||
3784 | (__mmask16)(W))) |
||
3785 | |||
3786 | #define _mm512_cvtps_ph _mm512_cvt_roundps_ph |
||
3787 | #define _mm512_mask_cvtps_ph _mm512_mask_cvt_roundps_ph |
||
3788 | #define _mm512_maskz_cvtps_ph _mm512_maskz_cvt_roundps_ph |
||
3789 | |||
3790 | #define _mm512_cvt_roundph_ps(A, R) \ |
||
3791 | ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ |
||
3792 | (__v16sf)_mm512_undefined_ps(), \ |
||
3793 | (__mmask16)-1, (int)(R))) |
||
3794 | |||
3795 | #define _mm512_mask_cvt_roundph_ps(W, U, A, R) \ |
||
3796 | ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ |
||
3797 | (__v16sf)(__m512)(W), \ |
||
3798 | (__mmask16)(U), (int)(R))) |
||
3799 | |||
3800 | #define _mm512_maskz_cvt_roundph_ps(U, A, R) \ |
||
3801 | ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ |
||
3802 | (__v16sf)_mm512_setzero_ps(), \ |
||
3803 | (__mmask16)(U), (int)(R))) |
||
3804 | |||
3805 | |||
3806 | static __inline __m512 __DEFAULT_FN_ATTRS512 |
||
3807 | _mm512_cvtph_ps(__m256i __A) |
||
3808 | { |
||
3809 | return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, |
||
3810 | (__v16sf) |
||
3811 | _mm512_setzero_ps (), |
||
3812 | (__mmask16) -1, |
||
3813 | _MM_FROUND_CUR_DIRECTION); |
||
3814 | } |
||
3815 | |||
3816 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
3817 | _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A) |
||
3818 | { |
||
3819 | return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, |
||
3820 | (__v16sf) __W, |
||
3821 | (__mmask16) __U, |
||
3822 | _MM_FROUND_CUR_DIRECTION); |
||
3823 | } |
||
3824 | |||
3825 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
3826 | _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A) |
||
3827 | { |
||
3828 | return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, |
||
3829 | (__v16sf) _mm512_setzero_ps (), |
||
3830 | (__mmask16) __U, |
||
3831 | _MM_FROUND_CUR_DIRECTION); |
||
3832 | } |
||
3833 | |||
3834 | #define _mm512_cvtt_roundpd_epi32(A, R) \ |
||
3835 | ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ |
||
3836 | (__v8si)_mm256_setzero_si256(), \ |
||
3837 | (__mmask8)-1, (int)(R))) |
||
3838 | |||
3839 | #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) \ |
||
3840 | ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ |
||
3841 | (__v8si)(__m256i)(W), \ |
||
3842 | (__mmask8)(U), (int)(R))) |
||
3843 | |||
3844 | #define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) \ |
||
3845 | ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ |
||
3846 | (__v8si)_mm256_setzero_si256(), \ |
||
3847 | (__mmask8)(U), (int)(R))) |
||
3848 | |||
3849 | static __inline __m256i __DEFAULT_FN_ATTRS512 |
||
3850 | _mm512_cvttpd_epi32(__m512d __a) |
||
3851 | { |
||
3852 | return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a, |
||
3853 | (__v8si)_mm256_setzero_si256(), |
||
3854 | (__mmask8) -1, |
||
3855 | _MM_FROUND_CUR_DIRECTION); |
||
3856 | } |
||
3857 | |||
3858 | static __inline__ __m256i __DEFAULT_FN_ATTRS512 |
||
3859 | _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A) |
||
3860 | { |
||
3861 | return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, |
||
3862 | (__v8si) __W, |
||
3863 | (__mmask8) __U, |
||
3864 | _MM_FROUND_CUR_DIRECTION); |
||
3865 | } |
||
3866 | |||
3867 | static __inline__ __m256i __DEFAULT_FN_ATTRS512 |
||
3868 | _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A) |
||
3869 | { |
||
3870 | return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, |
||
3871 | (__v8si) _mm256_setzero_si256 (), |
||
3872 | (__mmask8) __U, |
||
3873 | _MM_FROUND_CUR_DIRECTION); |
||
3874 | } |
||
3875 | |||
3876 | #define _mm512_cvtt_roundps_epi32(A, R) \ |
||
3877 | ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ |
||
3878 | (__v16si)_mm512_setzero_si512(), \ |
||
3879 | (__mmask16)-1, (int)(R))) |
||
3880 | |||
3881 | #define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) \ |
||
3882 | ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ |
||
3883 | (__v16si)(__m512i)(W), \ |
||
3884 | (__mmask16)(U), (int)(R))) |
||
3885 | |||
3886 | #define _mm512_maskz_cvtt_roundps_epi32(U, A, R) \ |
||
3887 | ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ |
||
3888 | (__v16si)_mm512_setzero_si512(), \ |
||
3889 | (__mmask16)(U), (int)(R))) |
||
3890 | |||
3891 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
3892 | _mm512_cvttps_epi32(__m512 __a) |
||
3893 | { |
||
3894 | return (__m512i) |
||
3895 | __builtin_ia32_cvttps2dq512_mask((__v16sf) __a, |
||
3896 | (__v16si) _mm512_setzero_si512 (), |
||
3897 | (__mmask16) -1, _MM_FROUND_CUR_DIRECTION); |
||
3898 | } |
||
3899 | |||
3900 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
3901 | _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A) |
||
3902 | { |
||
3903 | return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, |
||
3904 | (__v16si) __W, |
||
3905 | (__mmask16) __U, |
||
3906 | _MM_FROUND_CUR_DIRECTION); |
||
3907 | } |
||
3908 | |||
3909 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
3910 | _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A) |
||
3911 | { |
||
3912 | return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, |
||
3913 | (__v16si) _mm512_setzero_si512 (), |
||
3914 | (__mmask16) __U, |
||
3915 | _MM_FROUND_CUR_DIRECTION); |
||
3916 | } |
||
3917 | |||
3918 | #define _mm512_cvt_roundps_epi32(A, R) \ |
||
3919 | ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \ |
||
3920 | (__v16si)_mm512_setzero_si512(), \ |
||
3921 | (__mmask16)-1, (int)(R))) |
||
3922 | |||
3923 | #define _mm512_mask_cvt_roundps_epi32(W, U, A, R) \ |
||
3924 | ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \ |
||
3925 | (__v16si)(__m512i)(W), \ |
||
3926 | (__mmask16)(U), (int)(R))) |
||
3927 | |||
3928 | #define _mm512_maskz_cvt_roundps_epi32(U, A, R) \ |
||
3929 | ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \ |
||
3930 | (__v16si)_mm512_setzero_si512(), \ |
||
3931 | (__mmask16)(U), (int)(R))) |
||
3932 | |||
3933 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
3934 | _mm512_cvtps_epi32 (__m512 __A) |
||
3935 | { |
||
3936 | return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, |
||
3937 | (__v16si) _mm512_undefined_epi32 (), |
||
3938 | (__mmask16) -1, |
||
3939 | _MM_FROUND_CUR_DIRECTION); |
||
3940 | } |
||
3941 | |||
3942 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
3943 | _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A) |
||
3944 | { |
||
3945 | return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, |
||
3946 | (__v16si) __W, |
||
3947 | (__mmask16) __U, |
||
3948 | _MM_FROUND_CUR_DIRECTION); |
||
3949 | } |
||
3950 | |||
3951 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
3952 | _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A) |
||
3953 | { |
||
3954 | return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, |
||
3955 | (__v16si) |
||
3956 | _mm512_setzero_si512 (), |
||
3957 | (__mmask16) __U, |
||
3958 | _MM_FROUND_CUR_DIRECTION); |
||
3959 | } |
||
3960 | |||
3961 | #define _mm512_cvt_roundpd_epi32(A, R) \ |
||
3962 | ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ |
||
3963 | (__v8si)_mm256_setzero_si256(), \ |
||
3964 | (__mmask8)-1, (int)(R))) |
||
3965 | |||
3966 | #define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) \ |
||
3967 | ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ |
||
3968 | (__v8si)(__m256i)(W), \ |
||
3969 | (__mmask8)(U), (int)(R))) |
||
3970 | |||
3971 | #define _mm512_maskz_cvt_roundpd_epi32(U, A, R) \ |
||
3972 | ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ |
||
3973 | (__v8si)_mm256_setzero_si256(), \ |
||
3974 | (__mmask8)(U), (int)(R))) |
||
3975 | |||
3976 | static __inline__ __m256i __DEFAULT_FN_ATTRS512 |
||
3977 | _mm512_cvtpd_epi32 (__m512d __A) |
||
3978 | { |
||
3979 | return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, |
||
3980 | (__v8si) |
||
3981 | _mm256_undefined_si256 (), |
||
3982 | (__mmask8) -1, |
||
3983 | _MM_FROUND_CUR_DIRECTION); |
||
3984 | } |
||
3985 | |||
3986 | static __inline__ __m256i __DEFAULT_FN_ATTRS512 |
||
3987 | _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A) |
||
3988 | { |
||
3989 | return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, |
||
3990 | (__v8si) __W, |
||
3991 | (__mmask8) __U, |
||
3992 | _MM_FROUND_CUR_DIRECTION); |
||
3993 | } |
||
3994 | |||
3995 | static __inline__ __m256i __DEFAULT_FN_ATTRS512 |
||
3996 | _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A) |
||
3997 | { |
||
3998 | return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, |
||
3999 | (__v8si) |
||
4000 | _mm256_setzero_si256 (), |
||
4001 | (__mmask8) __U, |
||
4002 | _MM_FROUND_CUR_DIRECTION); |
||
4003 | } |
||
4004 | |||
4005 | #define _mm512_cvt_roundps_epu32(A, R) \ |
||
4006 | ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \ |
||
4007 | (__v16si)_mm512_setzero_si512(), \ |
||
4008 | (__mmask16)-1, (int)(R))) |
||
4009 | |||
4010 | #define _mm512_mask_cvt_roundps_epu32(W, U, A, R) \ |
||
4011 | ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \ |
||
4012 | (__v16si)(__m512i)(W), \ |
||
4013 | (__mmask16)(U), (int)(R))) |
||
4014 | |||
4015 | #define _mm512_maskz_cvt_roundps_epu32(U, A, R) \ |
||
4016 | ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \ |
||
4017 | (__v16si)_mm512_setzero_si512(), \ |
||
4018 | (__mmask16)(U), (int)(R))) |
||
4019 | |||
4020 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4021 | _mm512_cvtps_epu32 ( __m512 __A) |
||
4022 | { |
||
4023 | return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\ |
||
4024 | (__v16si)\ |
||
4025 | _mm512_undefined_epi32 (), |
||
4026 | (__mmask16) -1,\ |
||
4027 | _MM_FROUND_CUR_DIRECTION); |
||
4028 | } |
||
4029 | |||
4030 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4031 | _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A) |
||
4032 | { |
||
4033 | return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, |
||
4034 | (__v16si) __W, |
||
4035 | (__mmask16) __U, |
||
4036 | _MM_FROUND_CUR_DIRECTION); |
||
4037 | } |
||
4038 | |||
4039 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4040 | _mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A) |
||
4041 | { |
||
4042 | return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, |
||
4043 | (__v16si) |
||
4044 | _mm512_setzero_si512 (), |
||
4045 | (__mmask16) __U , |
||
4046 | _MM_FROUND_CUR_DIRECTION); |
||
4047 | } |
||
4048 | |||
4049 | #define _mm512_cvt_roundpd_epu32(A, R) \ |
||
4050 | ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ |
||
4051 | (__v8si)_mm256_setzero_si256(), \ |
||
4052 | (__mmask8)-1, (int)(R))) |
||
4053 | |||
4054 | #define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) \ |
||
4055 | ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ |
||
4056 | (__v8si)(__m256i)(W), \ |
||
4057 | (__mmask8)(U), (int)(R))) |
||
4058 | |||
4059 | #define _mm512_maskz_cvt_roundpd_epu32(U, A, R) \ |
||
4060 | ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ |
||
4061 | (__v8si)_mm256_setzero_si256(), \ |
||
4062 | (__mmask8)(U), (int)(R))) |
||
4063 | |||
4064 | static __inline__ __m256i __DEFAULT_FN_ATTRS512 |
||
4065 | _mm512_cvtpd_epu32 (__m512d __A) |
||
4066 | { |
||
4067 | return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, |
||
4068 | (__v8si) |
||
4069 | _mm256_undefined_si256 (), |
||
4070 | (__mmask8) -1, |
||
4071 | _MM_FROUND_CUR_DIRECTION); |
||
4072 | } |
||
4073 | |||
4074 | static __inline__ __m256i __DEFAULT_FN_ATTRS512 |
||
4075 | _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A) |
||
4076 | { |
||
4077 | return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, |
||
4078 | (__v8si) __W, |
||
4079 | (__mmask8) __U, |
||
4080 | _MM_FROUND_CUR_DIRECTION); |
||
4081 | } |
||
4082 | |||
4083 | static __inline__ __m256i __DEFAULT_FN_ATTRS512 |
||
4084 | _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A) |
||
4085 | { |
||
4086 | return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, |
||
4087 | (__v8si) |
||
4088 | _mm256_setzero_si256 (), |
||
4089 | (__mmask8) __U, |
||
4090 | _MM_FROUND_CUR_DIRECTION); |
||
4091 | } |
||
4092 | |||
4093 | static __inline__ double __DEFAULT_FN_ATTRS512 |
||
4094 | _mm512_cvtsd_f64(__m512d __a) |
||
4095 | { |
||
4096 | return __a[0]; |
||
4097 | } |
||
4098 | |||
4099 | static __inline__ float __DEFAULT_FN_ATTRS512 |
||
4100 | _mm512_cvtss_f32(__m512 __a) |
||
4101 | { |
||
4102 | return __a[0]; |
||
4103 | } |
||
4104 | |||
4105 | /* Unpack and Interleave */ |
||
4106 | |||
4107 | static __inline __m512d __DEFAULT_FN_ATTRS512 |
||
4108 | _mm512_unpackhi_pd(__m512d __a, __m512d __b) |
||
4109 | { |
||
4110 | return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b, |
||
4111 | 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6); |
||
4112 | } |
||
4113 | |||
4114 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
4115 | _mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) |
||
4116 | { |
||
4117 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, |
||
4118 | (__v8df)_mm512_unpackhi_pd(__A, __B), |
||
4119 | (__v8df)__W); |
||
4120 | } |
||
4121 | |||
4122 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
4123 | _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B) |
||
4124 | { |
||
4125 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, |
||
4126 | (__v8df)_mm512_unpackhi_pd(__A, __B), |
||
4127 | (__v8df)_mm512_setzero_pd()); |
||
4128 | } |
||
4129 | |||
4130 | static __inline __m512d __DEFAULT_FN_ATTRS512 |
||
4131 | _mm512_unpacklo_pd(__m512d __a, __m512d __b) |
||
4132 | { |
||
4133 | return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b, |
||
4134 | 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6); |
||
4135 | } |
||
4136 | |||
4137 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
4138 | _mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) |
||
4139 | { |
||
4140 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, |
||
4141 | (__v8df)_mm512_unpacklo_pd(__A, __B), |
||
4142 | (__v8df)__W); |
||
4143 | } |
||
4144 | |||
4145 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
4146 | _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B) |
||
4147 | { |
||
4148 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, |
||
4149 | (__v8df)_mm512_unpacklo_pd(__A, __B), |
||
4150 | (__v8df)_mm512_setzero_pd()); |
||
4151 | } |
||
4152 | |||
4153 | static __inline __m512 __DEFAULT_FN_ATTRS512 |
||
4154 | _mm512_unpackhi_ps(__m512 __a, __m512 __b) |
||
4155 | { |
||
4156 | return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b, |
||
4157 | 2, 18, 3, 19, |
||
4158 | 2+4, 18+4, 3+4, 19+4, |
||
4159 | 2+8, 18+8, 3+8, 19+8, |
||
4160 | 2+12, 18+12, 3+12, 19+12); |
||
4161 | } |
||
4162 | |||
4163 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
4164 | _mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) |
||
4165 | { |
||
4166 | return (__m512)__builtin_ia32_selectps_512((__mmask16) __U, |
||
4167 | (__v16sf)_mm512_unpackhi_ps(__A, __B), |
||
4168 | (__v16sf)__W); |
||
4169 | } |
||
4170 | |||
4171 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
4172 | _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B) |
||
4173 | { |
||
4174 | return (__m512)__builtin_ia32_selectps_512((__mmask16) __U, |
||
4175 | (__v16sf)_mm512_unpackhi_ps(__A, __B), |
||
4176 | (__v16sf)_mm512_setzero_ps()); |
||
4177 | } |
||
4178 | |||
4179 | static __inline __m512 __DEFAULT_FN_ATTRS512 |
||
4180 | _mm512_unpacklo_ps(__m512 __a, __m512 __b) |
||
4181 | { |
||
4182 | return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b, |
||
4183 | 0, 16, 1, 17, |
||
4184 | 0+4, 16+4, 1+4, 17+4, |
||
4185 | 0+8, 16+8, 1+8, 17+8, |
||
4186 | 0+12, 16+12, 1+12, 17+12); |
||
4187 | } |
||
4188 | |||
4189 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
4190 | _mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) |
||
4191 | { |
||
4192 | return (__m512)__builtin_ia32_selectps_512((__mmask16) __U, |
||
4193 | (__v16sf)_mm512_unpacklo_ps(__A, __B), |
||
4194 | (__v16sf)__W); |
||
4195 | } |
||
4196 | |||
4197 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
4198 | _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B) |
||
4199 | { |
||
4200 | return (__m512)__builtin_ia32_selectps_512((__mmask16) __U, |
||
4201 | (__v16sf)_mm512_unpacklo_ps(__A, __B), |
||
4202 | (__v16sf)_mm512_setzero_ps()); |
||
4203 | } |
||
4204 | |||
4205 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4206 | _mm512_unpackhi_epi32(__m512i __A, __m512i __B) |
||
4207 | { |
||
4208 | return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B, |
||
4209 | 2, 18, 3, 19, |
||
4210 | 2+4, 18+4, 3+4, 19+4, |
||
4211 | 2+8, 18+8, 3+8, 19+8, |
||
4212 | 2+12, 18+12, 3+12, 19+12); |
||
4213 | } |
||
4214 | |||
4215 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4216 | _mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) |
||
4217 | { |
||
4218 | return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U, |
||
4219 | (__v16si)_mm512_unpackhi_epi32(__A, __B), |
||
4220 | (__v16si)__W); |
||
4221 | } |
||
4222 | |||
4223 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4224 | _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B) |
||
4225 | { |
||
4226 | return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U, |
||
4227 | (__v16si)_mm512_unpackhi_epi32(__A, __B), |
||
4228 | (__v16si)_mm512_setzero_si512()); |
||
4229 | } |
||
4230 | |||
4231 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4232 | _mm512_unpacklo_epi32(__m512i __A, __m512i __B) |
||
4233 | { |
||
4234 | return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B, |
||
4235 | 0, 16, 1, 17, |
||
4236 | 0+4, 16+4, 1+4, 17+4, |
||
4237 | 0+8, 16+8, 1+8, 17+8, |
||
4238 | 0+12, 16+12, 1+12, 17+12); |
||
4239 | } |
||
4240 | |||
4241 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4242 | _mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) |
||
4243 | { |
||
4244 | return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U, |
||
4245 | (__v16si)_mm512_unpacklo_epi32(__A, __B), |
||
4246 | (__v16si)__W); |
||
4247 | } |
||
4248 | |||
4249 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4250 | _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B) |
||
4251 | { |
||
4252 | return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U, |
||
4253 | (__v16si)_mm512_unpacklo_epi32(__A, __B), |
||
4254 | (__v16si)_mm512_setzero_si512()); |
||
4255 | } |
||
4256 | |||
4257 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4258 | _mm512_unpackhi_epi64(__m512i __A, __m512i __B) |
||
4259 | { |
||
4260 | return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B, |
||
4261 | 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6); |
||
4262 | } |
||
4263 | |||
4264 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4265 | _mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) |
||
4266 | { |
||
4267 | return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U, |
||
4268 | (__v8di)_mm512_unpackhi_epi64(__A, __B), |
||
4269 | (__v8di)__W); |
||
4270 | } |
||
4271 | |||
4272 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4273 | _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B) |
||
4274 | { |
||
4275 | return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U, |
||
4276 | (__v8di)_mm512_unpackhi_epi64(__A, __B), |
||
4277 | (__v8di)_mm512_setzero_si512()); |
||
4278 | } |
||
4279 | |||
4280 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4281 | _mm512_unpacklo_epi64 (__m512i __A, __m512i __B) |
||
4282 | { |
||
4283 | return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B, |
||
4284 | 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6); |
||
4285 | } |
||
4286 | |||
4287 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4288 | _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) |
||
4289 | { |
||
4290 | return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U, |
||
4291 | (__v8di)_mm512_unpacklo_epi64(__A, __B), |
||
4292 | (__v8di)__W); |
||
4293 | } |
||
4294 | |||
4295 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4296 | _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B) |
||
4297 | { |
||
4298 | return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U, |
||
4299 | (__v8di)_mm512_unpacklo_epi64(__A, __B), |
||
4300 | (__v8di)_mm512_setzero_si512()); |
||
4301 | } |
||
4302 | |||
4303 | |||
4304 | /* SIMD load ops */ |
||
4305 | |||
4306 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
4307 | _mm512_loadu_si512 (void const *__P) |
||
4308 | { |
||
4309 | struct __loadu_si512 { |
||
4310 | __m512i_u __v; |
||
4311 | } __attribute__((__packed__, __may_alias__)); |
||
4312 | return ((const struct __loadu_si512*)__P)->__v; |
||
4313 | } |
||
4314 | |||
4315 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
4316 | _mm512_loadu_epi32 (void const *__P) |
||
4317 | { |
||
4318 | struct __loadu_epi32 { |
||
4319 | __m512i_u __v; |
||
4320 | } __attribute__((__packed__, __may_alias__)); |
||
4321 | return ((const struct __loadu_epi32*)__P)->__v; |
||
4322 | } |
||
4323 | |||
4324 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
4325 | _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P) |
||
4326 | { |
||
4327 | return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P, |
||
4328 | (__v16si) __W, |
||
4329 | (__mmask16) __U); |
||
4330 | } |
||
4331 | |||
4332 | |||
4333 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
4334 | _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P) |
||
4335 | { |
||
4336 | return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P, |
||
4337 | (__v16si) |
||
4338 | _mm512_setzero_si512 (), |
||
4339 | (__mmask16) __U); |
||
4340 | } |
||
4341 | |||
4342 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
4343 | _mm512_loadu_epi64 (void const *__P) |
||
4344 | { |
||
4345 | struct __loadu_epi64 { |
||
4346 | __m512i_u __v; |
||
4347 | } __attribute__((__packed__, __may_alias__)); |
||
4348 | return ((const struct __loadu_epi64*)__P)->__v; |
||
4349 | } |
||
4350 | |||
4351 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
4352 | _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P) |
||
4353 | { |
||
4354 | return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P, |
||
4355 | (__v8di) __W, |
||
4356 | (__mmask8) __U); |
||
4357 | } |
||
4358 | |||
4359 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
4360 | _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P) |
||
4361 | { |
||
4362 | return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P, |
||
4363 | (__v8di) |
||
4364 | _mm512_setzero_si512 (), |
||
4365 | (__mmask8) __U); |
||
4366 | } |
||
4367 | |||
4368 | static __inline __m512 __DEFAULT_FN_ATTRS512 |
||
4369 | _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P) |
||
4370 | { |
||
4371 | return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P, |
||
4372 | (__v16sf) __W, |
||
4373 | (__mmask16) __U); |
||
4374 | } |
||
4375 | |||
4376 | static __inline __m512 __DEFAULT_FN_ATTRS512 |
||
4377 | _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P) |
||
4378 | { |
||
4379 | return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P, |
||
4380 | (__v16sf) |
||
4381 | _mm512_setzero_ps (), |
||
4382 | (__mmask16) __U); |
||
4383 | } |
||
4384 | |||
4385 | static __inline __m512d __DEFAULT_FN_ATTRS512 |
||
4386 | _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P) |
||
4387 | { |
||
4388 | return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P, |
||
4389 | (__v8df) __W, |
||
4390 | (__mmask8) __U); |
||
4391 | } |
||
4392 | |||
4393 | static __inline __m512d __DEFAULT_FN_ATTRS512 |
||
4394 | _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P) |
||
4395 | { |
||
4396 | return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P, |
||
4397 | (__v8df) |
||
4398 | _mm512_setzero_pd (), |
||
4399 | (__mmask8) __U); |
||
4400 | } |
||
4401 | |||
4402 | static __inline __m512d __DEFAULT_FN_ATTRS512 |
||
4403 | _mm512_loadu_pd(void const *__p) |
||
4404 | { |
||
4405 | struct __loadu_pd { |
||
4406 | __m512d_u __v; |
||
4407 | } __attribute__((__packed__, __may_alias__)); |
||
4408 | return ((const struct __loadu_pd*)__p)->__v; |
||
4409 | } |
||
4410 | |||
4411 | static __inline __m512 __DEFAULT_FN_ATTRS512 |
||
4412 | _mm512_loadu_ps(void const *__p) |
||
4413 | { |
||
4414 | struct __loadu_ps { |
||
4415 | __m512_u __v; |
||
4416 | } __attribute__((__packed__, __may_alias__)); |
||
4417 | return ((const struct __loadu_ps*)__p)->__v; |
||
4418 | } |
||
4419 | |||
4420 | static __inline __m512 __DEFAULT_FN_ATTRS512 |
||
4421 | _mm512_load_ps(void const *__p) |
||
4422 | { |
||
4423 | return *(const __m512*)__p; |
||
4424 | } |
||
4425 | |||
4426 | static __inline __m512 __DEFAULT_FN_ATTRS512 |
||
4427 | _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P) |
||
4428 | { |
||
4429 | return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P, |
||
4430 | (__v16sf) __W, |
||
4431 | (__mmask16) __U); |
||
4432 | } |
||
4433 | |||
4434 | static __inline __m512 __DEFAULT_FN_ATTRS512 |
||
4435 | _mm512_maskz_load_ps(__mmask16 __U, void const *__P) |
||
4436 | { |
||
4437 | return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P, |
||
4438 | (__v16sf) |
||
4439 | _mm512_setzero_ps (), |
||
4440 | (__mmask16) __U); |
||
4441 | } |
||
4442 | |||
4443 | static __inline __m512d __DEFAULT_FN_ATTRS512 |
||
4444 | _mm512_load_pd(void const *__p) |
||
4445 | { |
||
4446 | return *(const __m512d*)__p; |
||
4447 | } |
||
4448 | |||
4449 | static __inline __m512d __DEFAULT_FN_ATTRS512 |
||
4450 | _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P) |
||
4451 | { |
||
4452 | return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P, |
||
4453 | (__v8df) __W, |
||
4454 | (__mmask8) __U); |
||
4455 | } |
||
4456 | |||
4457 | static __inline __m512d __DEFAULT_FN_ATTRS512 |
||
4458 | _mm512_maskz_load_pd(__mmask8 __U, void const *__P) |
||
4459 | { |
||
4460 | return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P, |
||
4461 | (__v8df) |
||
4462 | _mm512_setzero_pd (), |
||
4463 | (__mmask8) __U); |
||
4464 | } |
||
4465 | |||
4466 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
4467 | _mm512_load_si512 (void const *__P) |
||
4468 | { |
||
4469 | return *(const __m512i *) __P; |
||
4470 | } |
||
4471 | |||
4472 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
4473 | _mm512_load_epi32 (void const *__P) |
||
4474 | { |
||
4475 | return *(const __m512i *) __P; |
||
4476 | } |
||
4477 | |||
4478 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
4479 | _mm512_load_epi64 (void const *__P) |
||
4480 | { |
||
4481 | return *(const __m512i *) __P; |
||
4482 | } |
||
4483 | |||
4484 | /* SIMD store ops */ |
||
4485 | |||
4486 | static __inline void __DEFAULT_FN_ATTRS512 |
||
4487 | _mm512_storeu_epi64 (void *__P, __m512i __A) |
||
4488 | { |
||
4489 | struct __storeu_epi64 { |
||
4490 | __m512i_u __v; |
||
4491 | } __attribute__((__packed__, __may_alias__)); |
||
4492 | ((struct __storeu_epi64*)__P)->__v = __A; |
||
4493 | } |
||
4494 | |||
4495 | static __inline void __DEFAULT_FN_ATTRS512 |
||
4496 | _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A) |
||
4497 | { |
||
4498 | __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A, |
||
4499 | (__mmask8) __U); |
||
4500 | } |
||
4501 | |||
4502 | static __inline void __DEFAULT_FN_ATTRS512 |
||
4503 | _mm512_storeu_si512 (void *__P, __m512i __A) |
||
4504 | { |
||
4505 | struct __storeu_si512 { |
||
4506 | __m512i_u __v; |
||
4507 | } __attribute__((__packed__, __may_alias__)); |
||
4508 | ((struct __storeu_si512*)__P)->__v = __A; |
||
4509 | } |
||
4510 | |||
4511 | static __inline void __DEFAULT_FN_ATTRS512 |
||
4512 | _mm512_storeu_epi32 (void *__P, __m512i __A) |
||
4513 | { |
||
4514 | struct __storeu_epi32 { |
||
4515 | __m512i_u __v; |
||
4516 | } __attribute__((__packed__, __may_alias__)); |
||
4517 | ((struct __storeu_epi32*)__P)->__v = __A; |
||
4518 | } |
||
4519 | |||
4520 | static __inline void __DEFAULT_FN_ATTRS512 |
||
4521 | _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A) |
||
4522 | { |
||
4523 | __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A, |
||
4524 | (__mmask16) __U); |
||
4525 | } |
||
4526 | |||
4527 | static __inline void __DEFAULT_FN_ATTRS512 |
||
4528 | _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A) |
||
4529 | { |
||
4530 | __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U); |
||
4531 | } |
||
4532 | |||
4533 | static __inline void __DEFAULT_FN_ATTRS512 |
||
4534 | _mm512_storeu_pd(void *__P, __m512d __A) |
||
4535 | { |
||
4536 | struct __storeu_pd { |
||
4537 | __m512d_u __v; |
||
4538 | } __attribute__((__packed__, __may_alias__)); |
||
4539 | ((struct __storeu_pd*)__P)->__v = __A; |
||
4540 | } |
||
4541 | |||
4542 | static __inline void __DEFAULT_FN_ATTRS512 |
||
4543 | _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A) |
||
4544 | { |
||
4545 | __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A, |
||
4546 | (__mmask16) __U); |
||
4547 | } |
||
4548 | |||
4549 | static __inline void __DEFAULT_FN_ATTRS512 |
||
4550 | _mm512_storeu_ps(void *__P, __m512 __A) |
||
4551 | { |
||
4552 | struct __storeu_ps { |
||
4553 | __m512_u __v; |
||
4554 | } __attribute__((__packed__, __may_alias__)); |
||
4555 | ((struct __storeu_ps*)__P)->__v = __A; |
||
4556 | } |
||
4557 | |||
4558 | static __inline void __DEFAULT_FN_ATTRS512 |
||
4559 | _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A) |
||
4560 | { |
||
4561 | __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U); |
||
4562 | } |
||
4563 | |||
4564 | static __inline void __DEFAULT_FN_ATTRS512 |
||
4565 | _mm512_store_pd(void *__P, __m512d __A) |
||
4566 | { |
||
4567 | *(__m512d*)__P = __A; |
||
4568 | } |
||
4569 | |||
4570 | static __inline void __DEFAULT_FN_ATTRS512 |
||
4571 | _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A) |
||
4572 | { |
||
4573 | __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A, |
||
4574 | (__mmask16) __U); |
||
4575 | } |
||
4576 | |||
4577 | static __inline void __DEFAULT_FN_ATTRS512 |
||
4578 | _mm512_store_ps(void *__P, __m512 __A) |
||
4579 | { |
||
4580 | *(__m512*)__P = __A; |
||
4581 | } |
||
4582 | |||
4583 | static __inline void __DEFAULT_FN_ATTRS512 |
||
4584 | _mm512_store_si512 (void *__P, __m512i __A) |
||
4585 | { |
||
4586 | *(__m512i *) __P = __A; |
||
4587 | } |
||
4588 | |||
4589 | static __inline void __DEFAULT_FN_ATTRS512 |
||
4590 | _mm512_store_epi32 (void *__P, __m512i __A) |
||
4591 | { |
||
4592 | *(__m512i *) __P = __A; |
||
4593 | } |
||
4594 | |||
4595 | static __inline void __DEFAULT_FN_ATTRS512 |
||
4596 | _mm512_store_epi64 (void *__P, __m512i __A) |
||
4597 | { |
||
4598 | *(__m512i *) __P = __A; |
||
4599 | } |
||
4600 | |||
4601 | /* Mask ops */ |
||
4602 | |||
4603 | static __inline __mmask16 __DEFAULT_FN_ATTRS |
||
4604 | _mm512_knot(__mmask16 __M) |
||
4605 | { |
||
4606 | return __builtin_ia32_knothi(__M); |
||
4607 | } |
||
4608 | |||
4609 | /* Integer compare */ |
||
4610 | |||
4611 | #define _mm512_cmpeq_epi32_mask(A, B) \ |
||
4612 | _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ) |
||
4613 | #define _mm512_mask_cmpeq_epi32_mask(k, A, B) \ |
||
4614 | _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ) |
||
4615 | #define _mm512_cmpge_epi32_mask(A, B) \ |
||
4616 | _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GE) |
||
4617 | #define _mm512_mask_cmpge_epi32_mask(k, A, B) \ |
||
4618 | _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE) |
||
4619 | #define _mm512_cmpgt_epi32_mask(A, B) \ |
||
4620 | _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GT) |
||
4621 | #define _mm512_mask_cmpgt_epi32_mask(k, A, B) \ |
||
4622 | _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT) |
||
4623 | #define _mm512_cmple_epi32_mask(A, B) \ |
||
4624 | _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LE) |
||
4625 | #define _mm512_mask_cmple_epi32_mask(k, A, B) \ |
||
4626 | _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE) |
||
4627 | #define _mm512_cmplt_epi32_mask(A, B) \ |
||
4628 | _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LT) |
||
4629 | #define _mm512_mask_cmplt_epi32_mask(k, A, B) \ |
||
4630 | _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT) |
||
4631 | #define _mm512_cmpneq_epi32_mask(A, B) \ |
||
4632 | _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_NE) |
||
4633 | #define _mm512_mask_cmpneq_epi32_mask(k, A, B) \ |
||
4634 | _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE) |
||
4635 | |||
4636 | #define _mm512_cmpeq_epu32_mask(A, B) \ |
||
4637 | _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ) |
||
4638 | #define _mm512_mask_cmpeq_epu32_mask(k, A, B) \ |
||
4639 | _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ) |
||
4640 | #define _mm512_cmpge_epu32_mask(A, B) \ |
||
4641 | _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GE) |
||
4642 | #define _mm512_mask_cmpge_epu32_mask(k, A, B) \ |
||
4643 | _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE) |
||
4644 | #define _mm512_cmpgt_epu32_mask(A, B) \ |
||
4645 | _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GT) |
||
4646 | #define _mm512_mask_cmpgt_epu32_mask(k, A, B) \ |
||
4647 | _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT) |
||
4648 | #define _mm512_cmple_epu32_mask(A, B) \ |
||
4649 | _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LE) |
||
4650 | #define _mm512_mask_cmple_epu32_mask(k, A, B) \ |
||
4651 | _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE) |
||
4652 | #define _mm512_cmplt_epu32_mask(A, B) \ |
||
4653 | _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LT) |
||
4654 | #define _mm512_mask_cmplt_epu32_mask(k, A, B) \ |
||
4655 | _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT) |
||
4656 | #define _mm512_cmpneq_epu32_mask(A, B) \ |
||
4657 | _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_NE) |
||
4658 | #define _mm512_mask_cmpneq_epu32_mask(k, A, B) \ |
||
4659 | _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE) |
||
4660 | |||
4661 | #define _mm512_cmpeq_epi64_mask(A, B) \ |
||
4662 | _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ) |
||
4663 | #define _mm512_mask_cmpeq_epi64_mask(k, A, B) \ |
||
4664 | _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ) |
||
4665 | #define _mm512_cmpge_epi64_mask(A, B) \ |
||
4666 | _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GE) |
||
4667 | #define _mm512_mask_cmpge_epi64_mask(k, A, B) \ |
||
4668 | _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE) |
||
4669 | #define _mm512_cmpgt_epi64_mask(A, B) \ |
||
4670 | _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GT) |
||
4671 | #define _mm512_mask_cmpgt_epi64_mask(k, A, B) \ |
||
4672 | _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT) |
||
4673 | #define _mm512_cmple_epi64_mask(A, B) \ |
||
4674 | _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LE) |
||
4675 | #define _mm512_mask_cmple_epi64_mask(k, A, B) \ |
||
4676 | _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE) |
||
4677 | #define _mm512_cmplt_epi64_mask(A, B) \ |
||
4678 | _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LT) |
||
4679 | #define _mm512_mask_cmplt_epi64_mask(k, A, B) \ |
||
4680 | _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT) |
||
4681 | #define _mm512_cmpneq_epi64_mask(A, B) \ |
||
4682 | _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_NE) |
||
4683 | #define _mm512_mask_cmpneq_epi64_mask(k, A, B) \ |
||
4684 | _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE) |
||
4685 | |||
4686 | #define _mm512_cmpeq_epu64_mask(A, B) \ |
||
4687 | _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ) |
||
4688 | #define _mm512_mask_cmpeq_epu64_mask(k, A, B) \ |
||
4689 | _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ) |
||
4690 | #define _mm512_cmpge_epu64_mask(A, B) \ |
||
4691 | _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GE) |
||
4692 | #define _mm512_mask_cmpge_epu64_mask(k, A, B) \ |
||
4693 | _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE) |
||
4694 | #define _mm512_cmpgt_epu64_mask(A, B) \ |
||
4695 | _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GT) |
||
4696 | #define _mm512_mask_cmpgt_epu64_mask(k, A, B) \ |
||
4697 | _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT) |
||
4698 | #define _mm512_cmple_epu64_mask(A, B) \ |
||
4699 | _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LE) |
||
4700 | #define _mm512_mask_cmple_epu64_mask(k, A, B) \ |
||
4701 | _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE) |
||
4702 | #define _mm512_cmplt_epu64_mask(A, B) \ |
||
4703 | _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LT) |
||
4704 | #define _mm512_mask_cmplt_epu64_mask(k, A, B) \ |
||
4705 | _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT) |
||
4706 | #define _mm512_cmpneq_epu64_mask(A, B) \ |
||
4707 | _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_NE) |
||
4708 | #define _mm512_mask_cmpneq_epu64_mask(k, A, B) \ |
||
4709 | _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE) |
||
4710 | |||
4711 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4712 | _mm512_cvtepi8_epi32(__m128i __A) |
||
4713 | { |
||
4714 | /* This function always performs a signed extension, but __v16qi is a char |
||
4715 | which may be signed or unsigned, so use __v16qs. */ |
||
4716 | return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si); |
||
4717 | } |
||
4718 | |||
4719 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4720 | _mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A) |
||
4721 | { |
||
4722 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, |
||
4723 | (__v16si)_mm512_cvtepi8_epi32(__A), |
||
4724 | (__v16si)__W); |
||
4725 | } |
||
4726 | |||
4727 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4728 | _mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A) |
||
4729 | { |
||
4730 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, |
||
4731 | (__v16si)_mm512_cvtepi8_epi32(__A), |
||
4732 | (__v16si)_mm512_setzero_si512()); |
||
4733 | } |
||
4734 | |||
4735 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4736 | _mm512_cvtepi8_epi64(__m128i __A) |
||
4737 | { |
||
4738 | /* This function always performs a signed extension, but __v16qi is a char |
||
4739 | which may be signed or unsigned, so use __v16qs. */ |
||
4740 | return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di); |
||
4741 | } |
||
4742 | |||
4743 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4744 | _mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A) |
||
4745 | { |
||
4746 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, |
||
4747 | (__v8di)_mm512_cvtepi8_epi64(__A), |
||
4748 | (__v8di)__W); |
||
4749 | } |
||
4750 | |||
4751 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4752 | _mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) |
||
4753 | { |
||
4754 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, |
||
4755 | (__v8di)_mm512_cvtepi8_epi64(__A), |
||
4756 | (__v8di)_mm512_setzero_si512 ()); |
||
4757 | } |
||
4758 | |||
4759 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4760 | _mm512_cvtepi32_epi64(__m256i __X) |
||
4761 | { |
||
4762 | return (__m512i)__builtin_convertvector((__v8si)__X, __v8di); |
||
4763 | } |
||
4764 | |||
4765 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4766 | _mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X) |
||
4767 | { |
||
4768 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, |
||
4769 | (__v8di)_mm512_cvtepi32_epi64(__X), |
||
4770 | (__v8di)__W); |
||
4771 | } |
||
4772 | |||
4773 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4774 | _mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X) |
||
4775 | { |
||
4776 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, |
||
4777 | (__v8di)_mm512_cvtepi32_epi64(__X), |
||
4778 | (__v8di)_mm512_setzero_si512()); |
||
4779 | } |
||
4780 | |||
4781 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4782 | _mm512_cvtepi16_epi32(__m256i __A) |
||
4783 | { |
||
4784 | return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si); |
||
4785 | } |
||
4786 | |||
4787 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4788 | _mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A) |
||
4789 | { |
||
4790 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, |
||
4791 | (__v16si)_mm512_cvtepi16_epi32(__A), |
||
4792 | (__v16si)__W); |
||
4793 | } |
||
4794 | |||
4795 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4796 | _mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A) |
||
4797 | { |
||
4798 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, |
||
4799 | (__v16si)_mm512_cvtepi16_epi32(__A), |
||
4800 | (__v16si)_mm512_setzero_si512 ()); |
||
4801 | } |
||
4802 | |||
4803 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4804 | _mm512_cvtepi16_epi64(__m128i __A) |
||
4805 | { |
||
4806 | return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di); |
||
4807 | } |
||
4808 | |||
4809 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4810 | _mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A) |
||
4811 | { |
||
4812 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, |
||
4813 | (__v8di)_mm512_cvtepi16_epi64(__A), |
||
4814 | (__v8di)__W); |
||
4815 | } |
||
4816 | |||
4817 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4818 | _mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) |
||
4819 | { |
||
4820 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, |
||
4821 | (__v8di)_mm512_cvtepi16_epi64(__A), |
||
4822 | (__v8di)_mm512_setzero_si512()); |
||
4823 | } |
||
4824 | |||
4825 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4826 | _mm512_cvtepu8_epi32(__m128i __A) |
||
4827 | { |
||
4828 | return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si); |
||
4829 | } |
||
4830 | |||
4831 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4832 | _mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A) |
||
4833 | { |
||
4834 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, |
||
4835 | (__v16si)_mm512_cvtepu8_epi32(__A), |
||
4836 | (__v16si)__W); |
||
4837 | } |
||
4838 | |||
4839 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4840 | _mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A) |
||
4841 | { |
||
4842 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, |
||
4843 | (__v16si)_mm512_cvtepu8_epi32(__A), |
||
4844 | (__v16si)_mm512_setzero_si512()); |
||
4845 | } |
||
4846 | |||
4847 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4848 | _mm512_cvtepu8_epi64(__m128i __A) |
||
4849 | { |
||
4850 | return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di); |
||
4851 | } |
||
4852 | |||
4853 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4854 | _mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A) |
||
4855 | { |
||
4856 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, |
||
4857 | (__v8di)_mm512_cvtepu8_epi64(__A), |
||
4858 | (__v8di)__W); |
||
4859 | } |
||
4860 | |||
4861 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4862 | _mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A) |
||
4863 | { |
||
4864 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, |
||
4865 | (__v8di)_mm512_cvtepu8_epi64(__A), |
||
4866 | (__v8di)_mm512_setzero_si512()); |
||
4867 | } |
||
4868 | |||
4869 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4870 | _mm512_cvtepu32_epi64(__m256i __X) |
||
4871 | { |
||
4872 | return (__m512i)__builtin_convertvector((__v8su)__X, __v8di); |
||
4873 | } |
||
4874 | |||
4875 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4876 | _mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X) |
||
4877 | { |
||
4878 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, |
||
4879 | (__v8di)_mm512_cvtepu32_epi64(__X), |
||
4880 | (__v8di)__W); |
||
4881 | } |
||
4882 | |||
4883 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4884 | _mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X) |
||
4885 | { |
||
4886 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, |
||
4887 | (__v8di)_mm512_cvtepu32_epi64(__X), |
||
4888 | (__v8di)_mm512_setzero_si512()); |
||
4889 | } |
||
4890 | |||
4891 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4892 | _mm512_cvtepu16_epi32(__m256i __A) |
||
4893 | { |
||
4894 | return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si); |
||
4895 | } |
||
4896 | |||
4897 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4898 | _mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A) |
||
4899 | { |
||
4900 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, |
||
4901 | (__v16si)_mm512_cvtepu16_epi32(__A), |
||
4902 | (__v16si)__W); |
||
4903 | } |
||
4904 | |||
4905 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4906 | _mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A) |
||
4907 | { |
||
4908 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, |
||
4909 | (__v16si)_mm512_cvtepu16_epi32(__A), |
||
4910 | (__v16si)_mm512_setzero_si512()); |
||
4911 | } |
||
4912 | |||
4913 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4914 | _mm512_cvtepu16_epi64(__m128i __A) |
||
4915 | { |
||
4916 | return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di); |
||
4917 | } |
||
4918 | |||
4919 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4920 | _mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A) |
||
4921 | { |
||
4922 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, |
||
4923 | (__v8di)_mm512_cvtepu16_epi64(__A), |
||
4924 | (__v8di)__W); |
||
4925 | } |
||
4926 | |||
4927 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4928 | _mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) |
||
4929 | { |
||
4930 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, |
||
4931 | (__v8di)_mm512_cvtepu16_epi64(__A), |
||
4932 | (__v8di)_mm512_setzero_si512()); |
||
4933 | } |
||
4934 | |||
4935 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4936 | _mm512_rorv_epi32 (__m512i __A, __m512i __B) |
||
4937 | { |
||
4938 | return (__m512i)__builtin_ia32_prorvd512((__v16si)__A, (__v16si)__B); |
||
4939 | } |
||
4940 | |||
4941 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4942 | _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) |
||
4943 | { |
||
4944 | return (__m512i)__builtin_ia32_selectd_512(__U, |
||
4945 | (__v16si)_mm512_rorv_epi32(__A, __B), |
||
4946 | (__v16si)__W); |
||
4947 | } |
||
4948 | |||
4949 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4950 | _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B) |
||
4951 | { |
||
4952 | return (__m512i)__builtin_ia32_selectd_512(__U, |
||
4953 | (__v16si)_mm512_rorv_epi32(__A, __B), |
||
4954 | (__v16si)_mm512_setzero_si512()); |
||
4955 | } |
||
4956 | |||
4957 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4958 | _mm512_rorv_epi64 (__m512i __A, __m512i __B) |
||
4959 | { |
||
4960 | return (__m512i)__builtin_ia32_prorvq512((__v8di)__A, (__v8di)__B); |
||
4961 | } |
||
4962 | |||
4963 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4964 | _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) |
||
4965 | { |
||
4966 | return (__m512i)__builtin_ia32_selectq_512(__U, |
||
4967 | (__v8di)_mm512_rorv_epi64(__A, __B), |
||
4968 | (__v8di)__W); |
||
4969 | } |
||
4970 | |||
4971 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
4972 | _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B) |
||
4973 | { |
||
4974 | return (__m512i)__builtin_ia32_selectq_512(__U, |
||
4975 | (__v8di)_mm512_rorv_epi64(__A, __B), |
||
4976 | (__v8di)_mm512_setzero_si512()); |
||
4977 | } |
||
4978 | |||
4979 | |||
4980 | |||
4981 | #define _mm512_cmp_epi32_mask(a, b, p) \ |
||
4982 | ((__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \ |
||
4983 | (__v16si)(__m512i)(b), (int)(p), \ |
||
4984 | (__mmask16)-1)) |
||
4985 | |||
4986 | #define _mm512_cmp_epu32_mask(a, b, p) \ |
||
4987 | ((__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \ |
||
4988 | (__v16si)(__m512i)(b), (int)(p), \ |
||
4989 | (__mmask16)-1)) |
||
4990 | |||
4991 | #define _mm512_cmp_epi64_mask(a, b, p) \ |
||
4992 | ((__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \ |
||
4993 | (__v8di)(__m512i)(b), (int)(p), \ |
||
4994 | (__mmask8)-1)) |
||
4995 | |||
4996 | #define _mm512_cmp_epu64_mask(a, b, p) \ |
||
4997 | ((__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \ |
||
4998 | (__v8di)(__m512i)(b), (int)(p), \ |
||
4999 | (__mmask8)-1)) |
||
5000 | |||
5001 | #define _mm512_mask_cmp_epi32_mask(m, a, b, p) \ |
||
5002 | ((__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \ |
||
5003 | (__v16si)(__m512i)(b), (int)(p), \ |
||
5004 | (__mmask16)(m))) |
||
5005 | |||
5006 | #define _mm512_mask_cmp_epu32_mask(m, a, b, p) \ |
||
5007 | ((__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \ |
||
5008 | (__v16si)(__m512i)(b), (int)(p), \ |
||
5009 | (__mmask16)(m))) |
||
5010 | |||
5011 | #define _mm512_mask_cmp_epi64_mask(m, a, b, p) \ |
||
5012 | ((__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \ |
||
5013 | (__v8di)(__m512i)(b), (int)(p), \ |
||
5014 | (__mmask8)(m))) |
||
5015 | |||
5016 | #define _mm512_mask_cmp_epu64_mask(m, a, b, p) \ |
||
5017 | ((__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \ |
||
5018 | (__v8di)(__m512i)(b), (int)(p), \ |
||
5019 | (__mmask8)(m))) |
||
5020 | |||
5021 | #define _mm512_rol_epi32(a, b) \ |
||
5022 | ((__m512i)__builtin_ia32_prold512((__v16si)(__m512i)(a), (int)(b))) |
||
5023 | |||
5024 | #define _mm512_mask_rol_epi32(W, U, a, b) \ |
||
5025 | ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ |
||
5026 | (__v16si)_mm512_rol_epi32((a), (b)), \ |
||
5027 | (__v16si)(__m512i)(W))) |
||
5028 | |||
5029 | #define _mm512_maskz_rol_epi32(U, a, b) \ |
||
5030 | ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ |
||
5031 | (__v16si)_mm512_rol_epi32((a), (b)), \ |
||
5032 | (__v16si)_mm512_setzero_si512())) |
||
5033 | |||
5034 | #define _mm512_rol_epi64(a, b) \ |
||
5035 | ((__m512i)__builtin_ia32_prolq512((__v8di)(__m512i)(a), (int)(b))) |
||
5036 | |||
5037 | #define _mm512_mask_rol_epi64(W, U, a, b) \ |
||
5038 | ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ |
||
5039 | (__v8di)_mm512_rol_epi64((a), (b)), \ |
||
5040 | (__v8di)(__m512i)(W))) |
||
5041 | |||
5042 | #define _mm512_maskz_rol_epi64(U, a, b) \ |
||
5043 | ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ |
||
5044 | (__v8di)_mm512_rol_epi64((a), (b)), \ |
||
5045 | (__v8di)_mm512_setzero_si512())) |
||
5046 | |||
5047 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5048 | _mm512_rolv_epi32 (__m512i __A, __m512i __B) |
||
5049 | { |
||
5050 | return (__m512i)__builtin_ia32_prolvd512((__v16si)__A, (__v16si)__B); |
||
5051 | } |
||
5052 | |||
5053 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5054 | _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) |
||
5055 | { |
||
5056 | return (__m512i)__builtin_ia32_selectd_512(__U, |
||
5057 | (__v16si)_mm512_rolv_epi32(__A, __B), |
||
5058 | (__v16si)__W); |
||
5059 | } |
||
5060 | |||
5061 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5062 | _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B) |
||
5063 | { |
||
5064 | return (__m512i)__builtin_ia32_selectd_512(__U, |
||
5065 | (__v16si)_mm512_rolv_epi32(__A, __B), |
||
5066 | (__v16si)_mm512_setzero_si512()); |
||
5067 | } |
||
5068 | |||
5069 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5070 | _mm512_rolv_epi64 (__m512i __A, __m512i __B) |
||
5071 | { |
||
5072 | return (__m512i)__builtin_ia32_prolvq512((__v8di)__A, (__v8di)__B); |
||
5073 | } |
||
5074 | |||
5075 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5076 | _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) |
||
5077 | { |
||
5078 | return (__m512i)__builtin_ia32_selectq_512(__U, |
||
5079 | (__v8di)_mm512_rolv_epi64(__A, __B), |
||
5080 | (__v8di)__W); |
||
5081 | } |
||
5082 | |||
5083 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5084 | _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B) |
||
5085 | { |
||
5086 | return (__m512i)__builtin_ia32_selectq_512(__U, |
||
5087 | (__v8di)_mm512_rolv_epi64(__A, __B), |
||
5088 | (__v8di)_mm512_setzero_si512()); |
||
5089 | } |
||
5090 | |||
5091 | #define _mm512_ror_epi32(A, B) \ |
||
5092 | ((__m512i)__builtin_ia32_prord512((__v16si)(__m512i)(A), (int)(B))) |
||
5093 | |||
5094 | #define _mm512_mask_ror_epi32(W, U, A, B) \ |
||
5095 | ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ |
||
5096 | (__v16si)_mm512_ror_epi32((A), (B)), \ |
||
5097 | (__v16si)(__m512i)(W))) |
||
5098 | |||
5099 | #define _mm512_maskz_ror_epi32(U, A, B) \ |
||
5100 | ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ |
||
5101 | (__v16si)_mm512_ror_epi32((A), (B)), \ |
||
5102 | (__v16si)_mm512_setzero_si512())) |
||
5103 | |||
5104 | #define _mm512_ror_epi64(A, B) \ |
||
5105 | ((__m512i)__builtin_ia32_prorq512((__v8di)(__m512i)(A), (int)(B))) |
||
5106 | |||
5107 | #define _mm512_mask_ror_epi64(W, U, A, B) \ |
||
5108 | ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ |
||
5109 | (__v8di)_mm512_ror_epi64((A), (B)), \ |
||
5110 | (__v8di)(__m512i)(W))) |
||
5111 | |||
5112 | #define _mm512_maskz_ror_epi64(U, A, B) \ |
||
5113 | ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ |
||
5114 | (__v8di)_mm512_ror_epi64((A), (B)), \ |
||
5115 | (__v8di)_mm512_setzero_si512())) |
||
5116 | |||
5117 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5118 | _mm512_slli_epi32(__m512i __A, unsigned int __B) |
||
5119 | { |
||
5120 | return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, (int)__B); |
||
5121 | } |
||
5122 | |||
5123 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5124 | _mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, |
||
5125 | unsigned int __B) |
||
5126 | { |
||
5127 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, |
||
5128 | (__v16si)_mm512_slli_epi32(__A, __B), |
||
5129 | (__v16si)__W); |
||
5130 | } |
||
5131 | |||
5132 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5133 | _mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, unsigned int __B) { |
||
5134 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, |
||
5135 | (__v16si)_mm512_slli_epi32(__A, __B), |
||
5136 | (__v16si)_mm512_setzero_si512()); |
||
5137 | } |
||
5138 | |||
5139 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5140 | _mm512_slli_epi64(__m512i __A, unsigned int __B) |
||
5141 | { |
||
5142 | return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, (int)__B); |
||
5143 | } |
||
5144 | |||
5145 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5146 | _mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B) |
||
5147 | { |
||
5148 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, |
||
5149 | (__v8di)_mm512_slli_epi64(__A, __B), |
||
5150 | (__v8di)__W); |
||
5151 | } |
||
5152 | |||
5153 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5154 | _mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, unsigned int __B) |
||
5155 | { |
||
5156 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, |
||
5157 | (__v8di)_mm512_slli_epi64(__A, __B), |
||
5158 | (__v8di)_mm512_setzero_si512()); |
||
5159 | } |
||
5160 | |||
5161 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5162 | _mm512_srli_epi32(__m512i __A, unsigned int __B) |
||
5163 | { |
||
5164 | return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, (int)__B); |
||
5165 | } |
||
5166 | |||
5167 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5168 | _mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, |
||
5169 | unsigned int __B) |
||
5170 | { |
||
5171 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, |
||
5172 | (__v16si)_mm512_srli_epi32(__A, __B), |
||
5173 | (__v16si)__W); |
||
5174 | } |
||
5175 | |||
5176 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5177 | _mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, unsigned int __B) { |
||
5178 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, |
||
5179 | (__v16si)_mm512_srli_epi32(__A, __B), |
||
5180 | (__v16si)_mm512_setzero_si512()); |
||
5181 | } |
||
5182 | |||
5183 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5184 | _mm512_srli_epi64(__m512i __A, unsigned int __B) |
||
5185 | { |
||
5186 | return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, (int)__B); |
||
5187 | } |
||
5188 | |||
5189 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5190 | _mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, |
||
5191 | unsigned int __B) |
||
5192 | { |
||
5193 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, |
||
5194 | (__v8di)_mm512_srli_epi64(__A, __B), |
||
5195 | (__v8di)__W); |
||
5196 | } |
||
5197 | |||
5198 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5199 | _mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, |
||
5200 | unsigned int __B) |
||
5201 | { |
||
5202 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, |
||
5203 | (__v8di)_mm512_srli_epi64(__A, __B), |
||
5204 | (__v8di)_mm512_setzero_si512()); |
||
5205 | } |
||
5206 | |||
5207 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5208 | _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P) |
||
5209 | { |
||
5210 | return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P, |
||
5211 | (__v16si) __W, |
||
5212 | (__mmask16) __U); |
||
5213 | } |
||
5214 | |||
5215 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5216 | _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P) |
||
5217 | { |
||
5218 | return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P, |
||
5219 | (__v16si) |
||
5220 | _mm512_setzero_si512 (), |
||
5221 | (__mmask16) __U); |
||
5222 | } |
||
5223 | |||
5224 | static __inline__ void __DEFAULT_FN_ATTRS512 |
||
5225 | _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A) |
||
5226 | { |
||
5227 | __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A, |
||
5228 | (__mmask16) __U); |
||
5229 | } |
||
5230 | |||
5231 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5232 | _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A) |
||
5233 | { |
||
5234 | return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U, |
||
5235 | (__v16si) __A, |
||
5236 | (__v16si) __W); |
||
5237 | } |
||
5238 | |||
5239 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5240 | _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A) |
||
5241 | { |
||
5242 | return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U, |
||
5243 | (__v16si) __A, |
||
5244 | (__v16si) _mm512_setzero_si512 ()); |
||
5245 | } |
||
5246 | |||
5247 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5248 | _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A) |
||
5249 | { |
||
5250 | return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U, |
||
5251 | (__v8di) __A, |
||
5252 | (__v8di) __W); |
||
5253 | } |
||
5254 | |||
5255 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5256 | _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A) |
||
5257 | { |
||
5258 | return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U, |
||
5259 | (__v8di) __A, |
||
5260 | (__v8di) _mm512_setzero_si512 ()); |
||
5261 | } |
||
5262 | |||
5263 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5264 | _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P) |
||
5265 | { |
||
5266 | return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P, |
||
5267 | (__v8di) __W, |
||
5268 | (__mmask8) __U); |
||
5269 | } |
||
5270 | |||
5271 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5272 | _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P) |
||
5273 | { |
||
5274 | return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P, |
||
5275 | (__v8di) |
||
5276 | _mm512_setzero_si512 (), |
||
5277 | (__mmask8) __U); |
||
5278 | } |
||
5279 | |||
5280 | static __inline__ void __DEFAULT_FN_ATTRS512 |
||
5281 | _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A) |
||
5282 | { |
||
5283 | __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A, |
||
5284 | (__mmask8) __U); |
||
5285 | } |
||
5286 | |||
5287 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
5288 | _mm512_movedup_pd (__m512d __A) |
||
5289 | { |
||
5290 | return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A, |
||
5291 | 0, 0, 2, 2, 4, 4, 6, 6); |
||
5292 | } |
||
5293 | |||
5294 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
5295 | _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A) |
||
5296 | { |
||
5297 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, |
||
5298 | (__v8df)_mm512_movedup_pd(__A), |
||
5299 | (__v8df)__W); |
||
5300 | } |
||
5301 | |||
5302 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
5303 | _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A) |
||
5304 | { |
||
5305 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, |
||
5306 | (__v8df)_mm512_movedup_pd(__A), |
||
5307 | (__v8df)_mm512_setzero_pd()); |
||
5308 | } |
||
5309 | |||
5310 | #define _mm512_fixupimm_round_pd(A, B, C, imm, R) \ |
||
5311 | ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ |
||
5312 | (__v8df)(__m512d)(B), \ |
||
5313 | (__v8di)(__m512i)(C), (int)(imm), \ |
||
5314 | (__mmask8)-1, (int)(R))) |
||
5315 | |||
5316 | #define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) \ |
||
5317 | ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ |
||
5318 | (__v8df)(__m512d)(B), \ |
||
5319 | (__v8di)(__m512i)(C), (int)(imm), \ |
||
5320 | (__mmask8)(U), (int)(R))) |
||
5321 | |||
5322 | #define _mm512_fixupimm_pd(A, B, C, imm) \ |
||
5323 | ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ |
||
5324 | (__v8df)(__m512d)(B), \ |
||
5325 | (__v8di)(__m512i)(C), (int)(imm), \ |
||
5326 | (__mmask8)-1, \ |
||
5327 | _MM_FROUND_CUR_DIRECTION)) |
||
5328 | |||
5329 | #define _mm512_mask_fixupimm_pd(A, U, B, C, imm) \ |
||
5330 | ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ |
||
5331 | (__v8df)(__m512d)(B), \ |
||
5332 | (__v8di)(__m512i)(C), (int)(imm), \ |
||
5333 | (__mmask8)(U), \ |
||
5334 | _MM_FROUND_CUR_DIRECTION)) |
||
5335 | |||
5336 | #define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) \ |
||
5337 | ((__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \ |
||
5338 | (__v8df)(__m512d)(B), \ |
||
5339 | (__v8di)(__m512i)(C), \ |
||
5340 | (int)(imm), (__mmask8)(U), \ |
||
5341 | (int)(R))) |
||
5342 | |||
5343 | #define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) \ |
||
5344 | ((__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \ |
||
5345 | (__v8df)(__m512d)(B), \ |
||
5346 | (__v8di)(__m512i)(C), \ |
||
5347 | (int)(imm), (__mmask8)(U), \ |
||
5348 | _MM_FROUND_CUR_DIRECTION)) |
||
5349 | |||
5350 | #define _mm512_fixupimm_round_ps(A, B, C, imm, R) \ |
||
5351 | ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ |
||
5352 | (__v16sf)(__m512)(B), \ |
||
5353 | (__v16si)(__m512i)(C), (int)(imm), \ |
||
5354 | (__mmask16)-1, (int)(R))) |
||
5355 | |||
5356 | #define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) \ |
||
5357 | ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ |
||
5358 | (__v16sf)(__m512)(B), \ |
||
5359 | (__v16si)(__m512i)(C), (int)(imm), \ |
||
5360 | (__mmask16)(U), (int)(R))) |
||
5361 | |||
5362 | #define _mm512_fixupimm_ps(A, B, C, imm) \ |
||
5363 | ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ |
||
5364 | (__v16sf)(__m512)(B), \ |
||
5365 | (__v16si)(__m512i)(C), (int)(imm), \ |
||
5366 | (__mmask16)-1, \ |
||
5367 | _MM_FROUND_CUR_DIRECTION)) |
||
5368 | |||
5369 | #define _mm512_mask_fixupimm_ps(A, U, B, C, imm) \ |
||
5370 | ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ |
||
5371 | (__v16sf)(__m512)(B), \ |
||
5372 | (__v16si)(__m512i)(C), (int)(imm), \ |
||
5373 | (__mmask16)(U), \ |
||
5374 | _MM_FROUND_CUR_DIRECTION)) |
||
5375 | |||
5376 | #define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) \ |
||
5377 | ((__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \ |
||
5378 | (__v16sf)(__m512)(B), \ |
||
5379 | (__v16si)(__m512i)(C), \ |
||
5380 | (int)(imm), (__mmask16)(U), \ |
||
5381 | (int)(R))) |
||
5382 | |||
5383 | #define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) \ |
||
5384 | ((__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \ |
||
5385 | (__v16sf)(__m512)(B), \ |
||
5386 | (__v16si)(__m512i)(C), \ |
||
5387 | (int)(imm), (__mmask16)(U), \ |
||
5388 | _MM_FROUND_CUR_DIRECTION)) |
||
5389 | |||
5390 | #define _mm_fixupimm_round_sd(A, B, C, imm, R) \ |
||
5391 | ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ |
||
5392 | (__v2df)(__m128d)(B), \ |
||
5393 | (__v2di)(__m128i)(C), (int)(imm), \ |
||
5394 | (__mmask8)-1, (int)(R))) |
||
5395 | |||
5396 | #define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) \ |
||
5397 | ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ |
||
5398 | (__v2df)(__m128d)(B), \ |
||
5399 | (__v2di)(__m128i)(C), (int)(imm), \ |
||
5400 | (__mmask8)(U), (int)(R))) |
||
5401 | |||
5402 | #define _mm_fixupimm_sd(A, B, C, imm) \ |
||
5403 | ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ |
||
5404 | (__v2df)(__m128d)(B), \ |
||
5405 | (__v2di)(__m128i)(C), (int)(imm), \ |
||
5406 | (__mmask8)-1, \ |
||
5407 | _MM_FROUND_CUR_DIRECTION)) |
||
5408 | |||
5409 | #define _mm_mask_fixupimm_sd(A, U, B, C, imm) \ |
||
5410 | ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ |
||
5411 | (__v2df)(__m128d)(B), \ |
||
5412 | (__v2di)(__m128i)(C), (int)(imm), \ |
||
5413 | (__mmask8)(U), \ |
||
5414 | _MM_FROUND_CUR_DIRECTION)) |
||
5415 | |||
5416 | #define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) \ |
||
5417 | ((__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \ |
||
5418 | (__v2df)(__m128d)(B), \ |
||
5419 | (__v2di)(__m128i)(C), (int)(imm), \ |
||
5420 | (__mmask8)(U), (int)(R))) |
||
5421 | |||
5422 | #define _mm_maskz_fixupimm_sd(U, A, B, C, imm) \ |
||
5423 | ((__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \ |
||
5424 | (__v2df)(__m128d)(B), \ |
||
5425 | (__v2di)(__m128i)(C), (int)(imm), \ |
||
5426 | (__mmask8)(U), \ |
||
5427 | _MM_FROUND_CUR_DIRECTION)) |
||
5428 | |||
5429 | #define _mm_fixupimm_round_ss(A, B, C, imm, R) \ |
||
5430 | ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ |
||
5431 | (__v4sf)(__m128)(B), \ |
||
5432 | (__v4si)(__m128i)(C), (int)(imm), \ |
||
5433 | (__mmask8)-1, (int)(R))) |
||
5434 | |||
5435 | #define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) \ |
||
5436 | ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ |
||
5437 | (__v4sf)(__m128)(B), \ |
||
5438 | (__v4si)(__m128i)(C), (int)(imm), \ |
||
5439 | (__mmask8)(U), (int)(R))) |
||
5440 | |||
5441 | #define _mm_fixupimm_ss(A, B, C, imm) \ |
||
5442 | ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ |
||
5443 | (__v4sf)(__m128)(B), \ |
||
5444 | (__v4si)(__m128i)(C), (int)(imm), \ |
||
5445 | (__mmask8)-1, \ |
||
5446 | _MM_FROUND_CUR_DIRECTION)) |
||
5447 | |||
5448 | #define _mm_mask_fixupimm_ss(A, U, B, C, imm) \ |
||
5449 | ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ |
||
5450 | (__v4sf)(__m128)(B), \ |
||
5451 | (__v4si)(__m128i)(C), (int)(imm), \ |
||
5452 | (__mmask8)(U), \ |
||
5453 | _MM_FROUND_CUR_DIRECTION)) |
||
5454 | |||
5455 | #define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) \ |
||
5456 | ((__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \ |
||
5457 | (__v4sf)(__m128)(B), \ |
||
5458 | (__v4si)(__m128i)(C), (int)(imm), \ |
||
5459 | (__mmask8)(U), (int)(R))) |
||
5460 | |||
5461 | #define _mm_maskz_fixupimm_ss(U, A, B, C, imm) \ |
||
5462 | ((__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \ |
||
5463 | (__v4sf)(__m128)(B), \ |
||
5464 | (__v4si)(__m128i)(C), (int)(imm), \ |
||
5465 | (__mmask8)(U), \ |
||
5466 | _MM_FROUND_CUR_DIRECTION)) |
||
5467 | |||
5468 | #define _mm_getexp_round_sd(A, B, R) \ |
||
5469 | ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ |
||
5470 | (__v2df)(__m128d)(B), \ |
||
5471 | (__v2df)_mm_setzero_pd(), \ |
||
5472 | (__mmask8)-1, (int)(R))) |
||
5473 | |||
5474 | |||
5475 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
5476 | _mm_getexp_sd (__m128d __A, __m128d __B) |
||
5477 | { |
||
5478 | return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A, |
||
5479 | (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); |
||
5480 | } |
||
5481 | |||
5482 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
5483 | _mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) |
||
5484 | { |
||
5485 | return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A, |
||
5486 | (__v2df) __B, |
||
5487 | (__v2df) __W, |
||
5488 | (__mmask8) __U, |
||
5489 | _MM_FROUND_CUR_DIRECTION); |
||
5490 | } |
||
5491 | |||
5492 | #define _mm_mask_getexp_round_sd(W, U, A, B, R) \ |
||
5493 | ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ |
||
5494 | (__v2df)(__m128d)(B), \ |
||
5495 | (__v2df)(__m128d)(W), \ |
||
5496 | (__mmask8)(U), (int)(R))) |
||
5497 | |||
5498 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
5499 | _mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B) |
||
5500 | { |
||
5501 | return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A, |
||
5502 | (__v2df) __B, |
||
5503 | (__v2df) _mm_setzero_pd (), |
||
5504 | (__mmask8) __U, |
||
5505 | _MM_FROUND_CUR_DIRECTION); |
||
5506 | } |
||
5507 | |||
5508 | #define _mm_maskz_getexp_round_sd(U, A, B, R) \ |
||
5509 | ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ |
||
5510 | (__v2df)(__m128d)(B), \ |
||
5511 | (__v2df)_mm_setzero_pd(), \ |
||
5512 | (__mmask8)(U), (int)(R))) |
||
5513 | |||
5514 | #define _mm_getexp_round_ss(A, B, R) \ |
||
5515 | ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ |
||
5516 | (__v4sf)(__m128)(B), \ |
||
5517 | (__v4sf)_mm_setzero_ps(), \ |
||
5518 | (__mmask8)-1, (int)(R))) |
||
5519 | |||
5520 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
5521 | _mm_getexp_ss (__m128 __A, __m128 __B) |
||
5522 | { |
||
5523 | return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A, |
||
5524 | (__v4sf) __B, (__v4sf) _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); |
||
5525 | } |
||
5526 | |||
5527 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
5528 | _mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) |
||
5529 | { |
||
5530 | return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A, |
||
5531 | (__v4sf) __B, |
||
5532 | (__v4sf) __W, |
||
5533 | (__mmask8) __U, |
||
5534 | _MM_FROUND_CUR_DIRECTION); |
||
5535 | } |
||
5536 | |||
5537 | #define _mm_mask_getexp_round_ss(W, U, A, B, R) \ |
||
5538 | ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ |
||
5539 | (__v4sf)(__m128)(B), \ |
||
5540 | (__v4sf)(__m128)(W), \ |
||
5541 | (__mmask8)(U), (int)(R))) |
||
5542 | |||
5543 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
5544 | _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B) |
||
5545 | { |
||
5546 | return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A, |
||
5547 | (__v4sf) __B, |
||
5548 | (__v4sf) _mm_setzero_ps (), |
||
5549 | (__mmask8) __U, |
||
5550 | _MM_FROUND_CUR_DIRECTION); |
||
5551 | } |
||
5552 | |||
5553 | #define _mm_maskz_getexp_round_ss(U, A, B, R) \ |
||
5554 | ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ |
||
5555 | (__v4sf)(__m128)(B), \ |
||
5556 | (__v4sf)_mm_setzero_ps(), \ |
||
5557 | (__mmask8)(U), (int)(R))) |
||
5558 | |||
5559 | #define _mm_getmant_round_sd(A, B, C, D, R) \ |
||
5560 | ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ |
||
5561 | (__v2df)(__m128d)(B), \ |
||
5562 | (int)(((D)<<2) | (C)), \ |
||
5563 | (__v2df)_mm_setzero_pd(), \ |
||
5564 | (__mmask8)-1, (int)(R))) |
||
5565 | |||
5566 | #define _mm_getmant_sd(A, B, C, D) \ |
||
5567 | ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ |
||
5568 | (__v2df)(__m128d)(B), \ |
||
5569 | (int)(((D)<<2) | (C)), \ |
||
5570 | (__v2df)_mm_setzero_pd(), \ |
||
5571 | (__mmask8)-1, \ |
||
5572 | _MM_FROUND_CUR_DIRECTION)) |
||
5573 | |||
5574 | #define _mm_mask_getmant_sd(W, U, A, B, C, D) \ |
||
5575 | ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ |
||
5576 | (__v2df)(__m128d)(B), \ |
||
5577 | (int)(((D)<<2) | (C)), \ |
||
5578 | (__v2df)(__m128d)(W), \ |
||
5579 | (__mmask8)(U), \ |
||
5580 | _MM_FROUND_CUR_DIRECTION)) |
||
5581 | |||
5582 | #define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R) \ |
||
5583 | ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ |
||
5584 | (__v2df)(__m128d)(B), \ |
||
5585 | (int)(((D)<<2) | (C)), \ |
||
5586 | (__v2df)(__m128d)(W), \ |
||
5587 | (__mmask8)(U), (int)(R))) |
||
5588 | |||
5589 | #define _mm_maskz_getmant_sd(U, A, B, C, D) \ |
||
5590 | ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ |
||
5591 | (__v2df)(__m128d)(B), \ |
||
5592 | (int)(((D)<<2) | (C)), \ |
||
5593 | (__v2df)_mm_setzero_pd(), \ |
||
5594 | (__mmask8)(U), \ |
||
5595 | _MM_FROUND_CUR_DIRECTION)) |
||
5596 | |||
5597 | #define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) \ |
||
5598 | ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ |
||
5599 | (__v2df)(__m128d)(B), \ |
||
5600 | (int)(((D)<<2) | (C)), \ |
||
5601 | (__v2df)_mm_setzero_pd(), \ |
||
5602 | (__mmask8)(U), (int)(R))) |
||
5603 | |||
5604 | #define _mm_getmant_round_ss(A, B, C, D, R) \ |
||
5605 | ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ |
||
5606 | (__v4sf)(__m128)(B), \ |
||
5607 | (int)(((D)<<2) | (C)), \ |
||
5608 | (__v4sf)_mm_setzero_ps(), \ |
||
5609 | (__mmask8)-1, (int)(R))) |
||
5610 | |||
5611 | #define _mm_getmant_ss(A, B, C, D) \ |
||
5612 | ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ |
||
5613 | (__v4sf)(__m128)(B), \ |
||
5614 | (int)(((D)<<2) | (C)), \ |
||
5615 | (__v4sf)_mm_setzero_ps(), \ |
||
5616 | (__mmask8)-1, \ |
||
5617 | _MM_FROUND_CUR_DIRECTION)) |
||
5618 | |||
5619 | #define _mm_mask_getmant_ss(W, U, A, B, C, D) \ |
||
5620 | ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ |
||
5621 | (__v4sf)(__m128)(B), \ |
||
5622 | (int)(((D)<<2) | (C)), \ |
||
5623 | (__v4sf)(__m128)(W), \ |
||
5624 | (__mmask8)(U), \ |
||
5625 | _MM_FROUND_CUR_DIRECTION)) |
||
5626 | |||
5627 | #define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R) \ |
||
5628 | ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ |
||
5629 | (__v4sf)(__m128)(B), \ |
||
5630 | (int)(((D)<<2) | (C)), \ |
||
5631 | (__v4sf)(__m128)(W), \ |
||
5632 | (__mmask8)(U), (int)(R))) |
||
5633 | |||
5634 | #define _mm_maskz_getmant_ss(U, A, B, C, D) \ |
||
5635 | ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ |
||
5636 | (__v4sf)(__m128)(B), \ |
||
5637 | (int)(((D)<<2) | (C)), \ |
||
5638 | (__v4sf)_mm_setzero_ps(), \ |
||
5639 | (__mmask8)(U), \ |
||
5640 | _MM_FROUND_CUR_DIRECTION)) |
||
5641 | |||
5642 | #define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) \ |
||
5643 | ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ |
||
5644 | (__v4sf)(__m128)(B), \ |
||
5645 | (int)(((D)<<2) | (C)), \ |
||
5646 | (__v4sf)_mm_setzero_ps(), \ |
||
5647 | (__mmask8)(U), (int)(R))) |
||
5648 | |||
5649 | static __inline__ __mmask16 __DEFAULT_FN_ATTRS |
||
5650 | _mm512_kmov (__mmask16 __A) |
||
5651 | { |
||
5652 | return __A; |
||
5653 | } |
||
5654 | |||
5655 | #define _mm_comi_round_sd(A, B, P, R) \ |
||
5656 | ((int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \ |
||
5657 | (int)(P), (int)(R))) |
||
5658 | |||
5659 | #define _mm_comi_round_ss(A, B, P, R) \ |
||
5660 | ((int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \ |
||
5661 | (int)(P), (int)(R))) |
||
5662 | |||
5663 | #ifdef __x86_64__ |
||
5664 | #define _mm_cvt_roundsd_si64(A, R) \ |
||
5665 | ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R))) |
||
5666 | #endif |
||
5667 | |||
5668 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5669 | _mm512_sll_epi32(__m512i __A, __m128i __B) |
||
5670 | { |
||
5671 | return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B); |
||
5672 | } |
||
5673 | |||
5674 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5675 | _mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) |
||
5676 | { |
||
5677 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, |
||
5678 | (__v16si)_mm512_sll_epi32(__A, __B), |
||
5679 | (__v16si)__W); |
||
5680 | } |
||
5681 | |||
5682 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5683 | _mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B) |
||
5684 | { |
||
5685 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, |
||
5686 | (__v16si)_mm512_sll_epi32(__A, __B), |
||
5687 | (__v16si)_mm512_setzero_si512()); |
||
5688 | } |
||
5689 | |||
5690 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5691 | _mm512_sll_epi64(__m512i __A, __m128i __B) |
||
5692 | { |
||
5693 | return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B); |
||
5694 | } |
||
5695 | |||
5696 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5697 | _mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) |
||
5698 | { |
||
5699 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, |
||
5700 | (__v8di)_mm512_sll_epi64(__A, __B), |
||
5701 | (__v8di)__W); |
||
5702 | } |
||
5703 | |||
5704 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5705 | _mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B) |
||
5706 | { |
||
5707 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, |
||
5708 | (__v8di)_mm512_sll_epi64(__A, __B), |
||
5709 | (__v8di)_mm512_setzero_si512()); |
||
5710 | } |
||
5711 | |||
5712 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5713 | _mm512_sllv_epi32(__m512i __X, __m512i __Y) |
||
5714 | { |
||
5715 | return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y); |
||
5716 | } |
||
5717 | |||
5718 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5719 | _mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) |
||
5720 | { |
||
5721 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, |
||
5722 | (__v16si)_mm512_sllv_epi32(__X, __Y), |
||
5723 | (__v16si)__W); |
||
5724 | } |
||
5725 | |||
5726 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5727 | _mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) |
||
5728 | { |
||
5729 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, |
||
5730 | (__v16si)_mm512_sllv_epi32(__X, __Y), |
||
5731 | (__v16si)_mm512_setzero_si512()); |
||
5732 | } |
||
5733 | |||
5734 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5735 | _mm512_sllv_epi64(__m512i __X, __m512i __Y) |
||
5736 | { |
||
5737 | return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y); |
||
5738 | } |
||
5739 | |||
5740 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5741 | _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) |
||
5742 | { |
||
5743 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, |
||
5744 | (__v8di)_mm512_sllv_epi64(__X, __Y), |
||
5745 | (__v8di)__W); |
||
5746 | } |
||
5747 | |||
5748 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5749 | _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y) |
||
5750 | { |
||
5751 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, |
||
5752 | (__v8di)_mm512_sllv_epi64(__X, __Y), |
||
5753 | (__v8di)_mm512_setzero_si512()); |
||
5754 | } |
||
5755 | |||
5756 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5757 | _mm512_sra_epi32(__m512i __A, __m128i __B) |
||
5758 | { |
||
5759 | return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B); |
||
5760 | } |
||
5761 | |||
5762 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5763 | _mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) |
||
5764 | { |
||
5765 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, |
||
5766 | (__v16si)_mm512_sra_epi32(__A, __B), |
||
5767 | (__v16si)__W); |
||
5768 | } |
||
5769 | |||
5770 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5771 | _mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B) |
||
5772 | { |
||
5773 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, |
||
5774 | (__v16si)_mm512_sra_epi32(__A, __B), |
||
5775 | (__v16si)_mm512_setzero_si512()); |
||
5776 | } |
||
5777 | |||
5778 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5779 | _mm512_sra_epi64(__m512i __A, __m128i __B) |
||
5780 | { |
||
5781 | return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B); |
||
5782 | } |
||
5783 | |||
5784 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5785 | _mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) |
||
5786 | { |
||
5787 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, |
||
5788 | (__v8di)_mm512_sra_epi64(__A, __B), |
||
5789 | (__v8di)__W); |
||
5790 | } |
||
5791 | |||
5792 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5793 | _mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B) |
||
5794 | { |
||
5795 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, |
||
5796 | (__v8di)_mm512_sra_epi64(__A, __B), |
||
5797 | (__v8di)_mm512_setzero_si512()); |
||
5798 | } |
||
5799 | |||
5800 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5801 | _mm512_srav_epi32(__m512i __X, __m512i __Y) |
||
5802 | { |
||
5803 | return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y); |
||
5804 | } |
||
5805 | |||
5806 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5807 | _mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) |
||
5808 | { |
||
5809 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, |
||
5810 | (__v16si)_mm512_srav_epi32(__X, __Y), |
||
5811 | (__v16si)__W); |
||
5812 | } |
||
5813 | |||
5814 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5815 | _mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y) |
||
5816 | { |
||
5817 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, |
||
5818 | (__v16si)_mm512_srav_epi32(__X, __Y), |
||
5819 | (__v16si)_mm512_setzero_si512()); |
||
5820 | } |
||
5821 | |||
5822 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5823 | _mm512_srav_epi64(__m512i __X, __m512i __Y) |
||
5824 | { |
||
5825 | return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y); |
||
5826 | } |
||
5827 | |||
5828 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5829 | _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) |
||
5830 | { |
||
5831 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, |
||
5832 | (__v8di)_mm512_srav_epi64(__X, __Y), |
||
5833 | (__v8di)__W); |
||
5834 | } |
||
5835 | |||
5836 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5837 | _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y) |
||
5838 | { |
||
5839 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, |
||
5840 | (__v8di)_mm512_srav_epi64(__X, __Y), |
||
5841 | (__v8di)_mm512_setzero_si512()); |
||
5842 | } |
||
5843 | |||
5844 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5845 | _mm512_srl_epi32(__m512i __A, __m128i __B) |
||
5846 | { |
||
5847 | return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B); |
||
5848 | } |
||
5849 | |||
5850 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5851 | _mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) |
||
5852 | { |
||
5853 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, |
||
5854 | (__v16si)_mm512_srl_epi32(__A, __B), |
||
5855 | (__v16si)__W); |
||
5856 | } |
||
5857 | |||
5858 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5859 | _mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B) |
||
5860 | { |
||
5861 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, |
||
5862 | (__v16si)_mm512_srl_epi32(__A, __B), |
||
5863 | (__v16si)_mm512_setzero_si512()); |
||
5864 | } |
||
5865 | |||
5866 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5867 | _mm512_srl_epi64(__m512i __A, __m128i __B) |
||
5868 | { |
||
5869 | return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B); |
||
5870 | } |
||
5871 | |||
5872 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5873 | _mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) |
||
5874 | { |
||
5875 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, |
||
5876 | (__v8di)_mm512_srl_epi64(__A, __B), |
||
5877 | (__v8di)__W); |
||
5878 | } |
||
5879 | |||
5880 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5881 | _mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B) |
||
5882 | { |
||
5883 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, |
||
5884 | (__v8di)_mm512_srl_epi64(__A, __B), |
||
5885 | (__v8di)_mm512_setzero_si512()); |
||
5886 | } |
||
5887 | |||
5888 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5889 | _mm512_srlv_epi32(__m512i __X, __m512i __Y) |
||
5890 | { |
||
5891 | return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y); |
||
5892 | } |
||
5893 | |||
5894 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5895 | _mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) |
||
5896 | { |
||
5897 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, |
||
5898 | (__v16si)_mm512_srlv_epi32(__X, __Y), |
||
5899 | (__v16si)__W); |
||
5900 | } |
||
5901 | |||
5902 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5903 | _mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) |
||
5904 | { |
||
5905 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, |
||
5906 | (__v16si)_mm512_srlv_epi32(__X, __Y), |
||
5907 | (__v16si)_mm512_setzero_si512()); |
||
5908 | } |
||
5909 | |||
5910 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5911 | _mm512_srlv_epi64 (__m512i __X, __m512i __Y) |
||
5912 | { |
||
5913 | return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y); |
||
5914 | } |
||
5915 | |||
5916 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5917 | _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) |
||
5918 | { |
||
5919 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, |
||
5920 | (__v8di)_mm512_srlv_epi64(__X, __Y), |
||
5921 | (__v8di)__W); |
||
5922 | } |
||
5923 | |||
5924 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
5925 | _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y) |
||
5926 | { |
||
5927 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, |
||
5928 | (__v8di)_mm512_srlv_epi64(__X, __Y), |
||
5929 | (__v8di)_mm512_setzero_si512()); |
||
5930 | } |
||
5931 | |||
5932 | /// \enum _MM_TERNLOG_ENUM |
||
5933 | /// A helper to represent the ternary logic operations among vector \a A, |
||
5934 | /// \a B and \a C. The representation is passed to \a imm. |
||
5935 | typedef enum { |
||
5936 | _MM_TERNLOG_A = 0xF0, |
||
5937 | _MM_TERNLOG_B = 0xCC, |
||
5938 | _MM_TERNLOG_C = 0xAA |
||
5939 | } _MM_TERNLOG_ENUM; |
||
5940 | |||
5941 | #define _mm512_ternarylogic_epi32(A, B, C, imm) \ |
||
5942 | ((__m512i)__builtin_ia32_pternlogd512_mask( \ |
||
5943 | (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \ |
||
5944 | (unsigned char)(imm), (__mmask16)-1)) |
||
5945 | |||
5946 | #define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) \ |
||
5947 | ((__m512i)__builtin_ia32_pternlogd512_mask( \ |
||
5948 | (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \ |
||
5949 | (unsigned char)(imm), (__mmask16)(U))) |
||
5950 | |||
5951 | #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) \ |
||
5952 | ((__m512i)__builtin_ia32_pternlogd512_maskz( \ |
||
5953 | (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \ |
||
5954 | (unsigned char)(imm), (__mmask16)(U))) |
||
5955 | |||
5956 | #define _mm512_ternarylogic_epi64(A, B, C, imm) \ |
||
5957 | ((__m512i)__builtin_ia32_pternlogq512_mask( \ |
||
5958 | (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \ |
||
5959 | (unsigned char)(imm), (__mmask8)-1)) |
||
5960 | |||
5961 | #define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) \ |
||
5962 | ((__m512i)__builtin_ia32_pternlogq512_mask( \ |
||
5963 | (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \ |
||
5964 | (unsigned char)(imm), (__mmask8)(U))) |
||
5965 | |||
5966 | #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) \ |
||
5967 | ((__m512i)__builtin_ia32_pternlogq512_maskz( \ |
||
5968 | (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \ |
||
5969 | (unsigned char)(imm), (__mmask8)(U))) |
||
5970 | |||
5971 | #ifdef __x86_64__ |
||
5972 | #define _mm_cvt_roundsd_i64(A, R) \ |
||
5973 | ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R))) |
||
5974 | #endif |
||
5975 | |||
5976 | #define _mm_cvt_roundsd_si32(A, R) \ |
||
5977 | ((int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R))) |
||
5978 | |||
5979 | #define _mm_cvt_roundsd_i32(A, R) \ |
||
5980 | ((int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R))) |
||
5981 | |||
5982 | #define _mm_cvt_roundsd_u32(A, R) \ |
||
5983 | ((unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R))) |
||
5984 | |||
5985 | static __inline__ unsigned __DEFAULT_FN_ATTRS128 |
||
5986 | _mm_cvtsd_u32 (__m128d __A) |
||
5987 | { |
||
5988 | return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, |
||
5989 | _MM_FROUND_CUR_DIRECTION); |
||
5990 | } |
||
5991 | |||
5992 | #ifdef __x86_64__ |
||
5993 | #define _mm_cvt_roundsd_u64(A, R) \ |
||
5994 | ((unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \ |
||
5995 | (int)(R))) |
||
5996 | |||
5997 | static __inline__ unsigned long long __DEFAULT_FN_ATTRS128 |
||
5998 | _mm_cvtsd_u64 (__m128d __A) |
||
5999 | { |
||
6000 | return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) |
||
6001 | __A, |
||
6002 | _MM_FROUND_CUR_DIRECTION); |
||
6003 | } |
||
6004 | #endif |
||
6005 | |||
6006 | #define _mm_cvt_roundss_si32(A, R) \ |
||
6007 | ((int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R))) |
||
6008 | |||
6009 | #define _mm_cvt_roundss_i32(A, R) \ |
||
6010 | ((int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R))) |
||
6011 | |||
6012 | #ifdef __x86_64__ |
||
6013 | #define _mm_cvt_roundss_si64(A, R) \ |
||
6014 | ((long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R))) |
||
6015 | |||
6016 | #define _mm_cvt_roundss_i64(A, R) \ |
||
6017 | ((long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R))) |
||
6018 | #endif |
||
6019 | |||
6020 | #define _mm_cvt_roundss_u32(A, R) \ |
||
6021 | ((unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R))) |
||
6022 | |||
6023 | static __inline__ unsigned __DEFAULT_FN_ATTRS128 |
||
6024 | _mm_cvtss_u32 (__m128 __A) |
||
6025 | { |
||
6026 | return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, |
||
6027 | _MM_FROUND_CUR_DIRECTION); |
||
6028 | } |
||
6029 | |||
6030 | #ifdef __x86_64__ |
||
6031 | #define _mm_cvt_roundss_u64(A, R) \ |
||
6032 | ((unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \ |
||
6033 | (int)(R))) |
||
6034 | |||
6035 | static __inline__ unsigned long long __DEFAULT_FN_ATTRS128 |
||
6036 | _mm_cvtss_u64 (__m128 __A) |
||
6037 | { |
||
6038 | return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) |
||
6039 | __A, |
||
6040 | _MM_FROUND_CUR_DIRECTION); |
||
6041 | } |
||
6042 | #endif |
||
6043 | |||
6044 | #define _mm_cvtt_roundsd_i32(A, R) \ |
||
6045 | ((int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R))) |
||
6046 | |||
6047 | #define _mm_cvtt_roundsd_si32(A, R) \ |
||
6048 | ((int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R))) |
||
6049 | |||
6050 | static __inline__ int __DEFAULT_FN_ATTRS128 |
||
6051 | _mm_cvttsd_i32 (__m128d __A) |
||
6052 | { |
||
6053 | return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, |
||
6054 | _MM_FROUND_CUR_DIRECTION); |
||
6055 | } |
||
6056 | |||
6057 | #ifdef __x86_64__ |
||
6058 | #define _mm_cvtt_roundsd_si64(A, R) \ |
||
6059 | ((long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R))) |
||
6060 | |||
6061 | #define _mm_cvtt_roundsd_i64(A, R) \ |
||
6062 | ((long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R))) |
||
6063 | |||
6064 | static __inline__ long long __DEFAULT_FN_ATTRS128 |
||
6065 | _mm_cvttsd_i64 (__m128d __A) |
||
6066 | { |
||
6067 | return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, |
||
6068 | _MM_FROUND_CUR_DIRECTION); |
||
6069 | } |
||
6070 | #endif |
||
6071 | |||
6072 | #define _mm_cvtt_roundsd_u32(A, R) \ |
||
6073 | ((unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R))) |
||
6074 | |||
6075 | static __inline__ unsigned __DEFAULT_FN_ATTRS128 |
||
6076 | _mm_cvttsd_u32 (__m128d __A) |
||
6077 | { |
||
6078 | return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, |
||
6079 | _MM_FROUND_CUR_DIRECTION); |
||
6080 | } |
||
6081 | |||
6082 | #ifdef __x86_64__ |
||
6083 | #define _mm_cvtt_roundsd_u64(A, R) \ |
||
6084 | ((unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \ |
||
6085 | (int)(R))) |
||
6086 | |||
6087 | static __inline__ unsigned long long __DEFAULT_FN_ATTRS128 |
||
6088 | _mm_cvttsd_u64 (__m128d __A) |
||
6089 | { |
||
6090 | return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) |
||
6091 | __A, |
||
6092 | _MM_FROUND_CUR_DIRECTION); |
||
6093 | } |
||
6094 | #endif |
||
6095 | |||
6096 | #define _mm_cvtt_roundss_i32(A, R) \ |
||
6097 | ((int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R))) |
||
6098 | |||
6099 | #define _mm_cvtt_roundss_si32(A, R) \ |
||
6100 | ((int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R))) |
||
6101 | |||
6102 | static __inline__ int __DEFAULT_FN_ATTRS128 |
||
6103 | _mm_cvttss_i32 (__m128 __A) |
||
6104 | { |
||
6105 | return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, |
||
6106 | _MM_FROUND_CUR_DIRECTION); |
||
6107 | } |
||
6108 | |||
6109 | #ifdef __x86_64__ |
||
6110 | #define _mm_cvtt_roundss_i64(A, R) \ |
||
6111 | ((long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R))) |
||
6112 | |||
6113 | #define _mm_cvtt_roundss_si64(A, R) \ |
||
6114 | ((long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R))) |
||
6115 | |||
6116 | static __inline__ long long __DEFAULT_FN_ATTRS128 |
||
6117 | _mm_cvttss_i64 (__m128 __A) |
||
6118 | { |
||
6119 | return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, |
||
6120 | _MM_FROUND_CUR_DIRECTION); |
||
6121 | } |
||
6122 | #endif |
||
6123 | |||
6124 | #define _mm_cvtt_roundss_u32(A, R) \ |
||
6125 | ((unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R))) |
||
6126 | |||
6127 | static __inline__ unsigned __DEFAULT_FN_ATTRS128 |
||
6128 | _mm_cvttss_u32 (__m128 __A) |
||
6129 | { |
||
6130 | return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, |
||
6131 | _MM_FROUND_CUR_DIRECTION); |
||
6132 | } |
||
6133 | |||
6134 | #ifdef __x86_64__ |
||
6135 | #define _mm_cvtt_roundss_u64(A, R) \ |
||
6136 | ((unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \ |
||
6137 | (int)(R))) |
||
6138 | |||
6139 | static __inline__ unsigned long long __DEFAULT_FN_ATTRS128 |
||
6140 | _mm_cvttss_u64 (__m128 __A) |
||
6141 | { |
||
6142 | return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) |
||
6143 | __A, |
||
6144 | _MM_FROUND_CUR_DIRECTION); |
||
6145 | } |
||
6146 | #endif |
||
6147 | |||
6148 | #define _mm512_permute_pd(X, C) \ |
||
6149 | ((__m512d)__builtin_ia32_vpermilpd512((__v8df)(__m512d)(X), (int)(C))) |
||
6150 | |||
6151 | #define _mm512_mask_permute_pd(W, U, X, C) \ |
||
6152 | ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ |
||
6153 | (__v8df)_mm512_permute_pd((X), (C)), \ |
||
6154 | (__v8df)(__m512d)(W))) |
||
6155 | |||
6156 | #define _mm512_maskz_permute_pd(U, X, C) \ |
||
6157 | ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ |
||
6158 | (__v8df)_mm512_permute_pd((X), (C)), \ |
||
6159 | (__v8df)_mm512_setzero_pd())) |
||
6160 | |||
6161 | #define _mm512_permute_ps(X, C) \ |
||
6162 | ((__m512)__builtin_ia32_vpermilps512((__v16sf)(__m512)(X), (int)(C))) |
||
6163 | |||
6164 | #define _mm512_mask_permute_ps(W, U, X, C) \ |
||
6165 | ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ |
||
6166 | (__v16sf)_mm512_permute_ps((X), (C)), \ |
||
6167 | (__v16sf)(__m512)(W))) |
||
6168 | |||
6169 | #define _mm512_maskz_permute_ps(U, X, C) \ |
||
6170 | ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ |
||
6171 | (__v16sf)_mm512_permute_ps((X), (C)), \ |
||
6172 | (__v16sf)_mm512_setzero_ps())) |
||
6173 | |||
6174 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
6175 | _mm512_permutevar_pd(__m512d __A, __m512i __C) |
||
6176 | { |
||
6177 | return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C); |
||
6178 | } |
||
6179 | |||
6180 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
6181 | _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C) |
||
6182 | { |
||
6183 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, |
||
6184 | (__v8df)_mm512_permutevar_pd(__A, __C), |
||
6185 | (__v8df)__W); |
||
6186 | } |
||
6187 | |||
6188 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
6189 | _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C) |
||
6190 | { |
||
6191 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, |
||
6192 | (__v8df)_mm512_permutevar_pd(__A, __C), |
||
6193 | (__v8df)_mm512_setzero_pd()); |
||
6194 | } |
||
6195 | |||
6196 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
6197 | _mm512_permutevar_ps(__m512 __A, __m512i __C) |
||
6198 | { |
||
6199 | return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C); |
||
6200 | } |
||
6201 | |||
6202 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
6203 | _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C) |
||
6204 | { |
||
6205 | return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, |
||
6206 | (__v16sf)_mm512_permutevar_ps(__A, __C), |
||
6207 | (__v16sf)__W); |
||
6208 | } |
||
6209 | |||
6210 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
6211 | _mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C) |
||
6212 | { |
||
6213 | return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, |
||
6214 | (__v16sf)_mm512_permutevar_ps(__A, __C), |
||
6215 | (__v16sf)_mm512_setzero_ps()); |
||
6216 | } |
||
6217 | |||
6218 | static __inline __m512d __DEFAULT_FN_ATTRS512 |
||
6219 | _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B) |
||
6220 | { |
||
6221 | return (__m512d)__builtin_ia32_vpermi2varpd512((__v8df)__A, (__v8di)__I, |
||
6222 | (__v8df)__B); |
||
6223 | } |
||
6224 | |||
6225 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
6226 | _mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I, __m512d __B) |
||
6227 | { |
||
6228 | return (__m512d)__builtin_ia32_selectpd_512(__U, |
||
6229 | (__v8df)_mm512_permutex2var_pd(__A, __I, __B), |
||
6230 | (__v8df)__A); |
||
6231 | } |
||
6232 | |||
6233 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
6234 | _mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U, |
||
6235 | __m512d __B) |
||
6236 | { |
||
6237 | return (__m512d)__builtin_ia32_selectpd_512(__U, |
||
6238 | (__v8df)_mm512_permutex2var_pd(__A, __I, __B), |
||
6239 | (__v8df)(__m512d)__I); |
||
6240 | } |
||
6241 | |||
6242 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
6243 | _mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I, |
||
6244 | __m512d __B) |
||
6245 | { |
||
6246 | return (__m512d)__builtin_ia32_selectpd_512(__U, |
||
6247 | (__v8df)_mm512_permutex2var_pd(__A, __I, __B), |
||
6248 | (__v8df)_mm512_setzero_pd()); |
||
6249 | } |
||
6250 | |||
6251 | static __inline __m512 __DEFAULT_FN_ATTRS512 |
||
6252 | _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B) |
||
6253 | { |
||
6254 | return (__m512)__builtin_ia32_vpermi2varps512((__v16sf)__A, (__v16si)__I, |
||
6255 | (__v16sf) __B); |
||
6256 | } |
||
6257 | |||
6258 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
6259 | _mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I, __m512 __B) |
||
6260 | { |
||
6261 | return (__m512)__builtin_ia32_selectps_512(__U, |
||
6262 | (__v16sf)_mm512_permutex2var_ps(__A, __I, __B), |
||
6263 | (__v16sf)__A); |
||
6264 | } |
||
6265 | |||
6266 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
6267 | _mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, __m512 __B) |
||
6268 | { |
||
6269 | return (__m512)__builtin_ia32_selectps_512(__U, |
||
6270 | (__v16sf)_mm512_permutex2var_ps(__A, __I, __B), |
||
6271 | (__v16sf)(__m512)__I); |
||
6272 | } |
||
6273 | |||
6274 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
6275 | _mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B) |
||
6276 | { |
||
6277 | return (__m512)__builtin_ia32_selectps_512(__U, |
||
6278 | (__v16sf)_mm512_permutex2var_ps(__A, __I, __B), |
||
6279 | (__v16sf)_mm512_setzero_ps()); |
||
6280 | } |
||
6281 | |||
6282 | |||
6283 | #define _mm512_cvtt_roundpd_epu32(A, R) \ |
||
6284 | ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ |
||
6285 | (__v8si)_mm256_undefined_si256(), \ |
||
6286 | (__mmask8)-1, (int)(R))) |
||
6287 | |||
6288 | #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) \ |
||
6289 | ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ |
||
6290 | (__v8si)(__m256i)(W), \ |
||
6291 | (__mmask8)(U), (int)(R))) |
||
6292 | |||
6293 | #define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) \ |
||
6294 | ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ |
||
6295 | (__v8si)_mm256_setzero_si256(), \ |
||
6296 | (__mmask8)(U), (int)(R))) |
||
6297 | |||
6298 | static __inline__ __m256i __DEFAULT_FN_ATTRS512 |
||
6299 | _mm512_cvttpd_epu32 (__m512d __A) |
||
6300 | { |
||
6301 | return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, |
||
6302 | (__v8si) |
||
6303 | _mm256_undefined_si256 (), |
||
6304 | (__mmask8) -1, |
||
6305 | _MM_FROUND_CUR_DIRECTION); |
||
6306 | } |
||
6307 | |||
6308 | static __inline__ __m256i __DEFAULT_FN_ATTRS512 |
||
6309 | _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A) |
||
6310 | { |
||
6311 | return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, |
||
6312 | (__v8si) __W, |
||
6313 | (__mmask8) __U, |
||
6314 | _MM_FROUND_CUR_DIRECTION); |
||
6315 | } |
||
6316 | |||
6317 | static __inline__ __m256i __DEFAULT_FN_ATTRS512 |
||
6318 | _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A) |
||
6319 | { |
||
6320 | return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, |
||
6321 | (__v8si) |
||
6322 | _mm256_setzero_si256 (), |
||
6323 | (__mmask8) __U, |
||
6324 | _MM_FROUND_CUR_DIRECTION); |
||
6325 | } |
||
6326 | |||
6327 | #define _mm_roundscale_round_sd(A, B, imm, R) \ |
||
6328 | ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ |
||
6329 | (__v2df)(__m128d)(B), \ |
||
6330 | (__v2df)_mm_setzero_pd(), \ |
||
6331 | (__mmask8)-1, (int)(imm), \ |
||
6332 | (int)(R))) |
||
6333 | |||
6334 | #define _mm_roundscale_sd(A, B, imm) \ |
||
6335 | ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ |
||
6336 | (__v2df)(__m128d)(B), \ |
||
6337 | (__v2df)_mm_setzero_pd(), \ |
||
6338 | (__mmask8)-1, (int)(imm), \ |
||
6339 | _MM_FROUND_CUR_DIRECTION)) |
||
6340 | |||
6341 | #define _mm_mask_roundscale_sd(W, U, A, B, imm) \ |
||
6342 | ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ |
||
6343 | (__v2df)(__m128d)(B), \ |
||
6344 | (__v2df)(__m128d)(W), \ |
||
6345 | (__mmask8)(U), (int)(imm), \ |
||
6346 | _MM_FROUND_CUR_DIRECTION)) |
||
6347 | |||
6348 | #define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) \ |
||
6349 | ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ |
||
6350 | (__v2df)(__m128d)(B), \ |
||
6351 | (__v2df)(__m128d)(W), \ |
||
6352 | (__mmask8)(U), (int)(I), \ |
||
6353 | (int)(R))) |
||
6354 | |||
6355 | #define _mm_maskz_roundscale_sd(U, A, B, I) \ |
||
6356 | ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ |
||
6357 | (__v2df)(__m128d)(B), \ |
||
6358 | (__v2df)_mm_setzero_pd(), \ |
||
6359 | (__mmask8)(U), (int)(I), \ |
||
6360 | _MM_FROUND_CUR_DIRECTION)) |
||
6361 | |||
6362 | #define _mm_maskz_roundscale_round_sd(U, A, B, I, R) \ |
||
6363 | ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ |
||
6364 | (__v2df)(__m128d)(B), \ |
||
6365 | (__v2df)_mm_setzero_pd(), \ |
||
6366 | (__mmask8)(U), (int)(I), \ |
||
6367 | (int)(R))) |
||
6368 | |||
6369 | #define _mm_roundscale_round_ss(A, B, imm, R) \ |
||
6370 | ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ |
||
6371 | (__v4sf)(__m128)(B), \ |
||
6372 | (__v4sf)_mm_setzero_ps(), \ |
||
6373 | (__mmask8)-1, (int)(imm), \ |
||
6374 | (int)(R))) |
||
6375 | |||
6376 | #define _mm_roundscale_ss(A, B, imm) \ |
||
6377 | ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ |
||
6378 | (__v4sf)(__m128)(B), \ |
||
6379 | (__v4sf)_mm_setzero_ps(), \ |
||
6380 | (__mmask8)-1, (int)(imm), \ |
||
6381 | _MM_FROUND_CUR_DIRECTION)) |
||
6382 | |||
6383 | #define _mm_mask_roundscale_ss(W, U, A, B, I) \ |
||
6384 | ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ |
||
6385 | (__v4sf)(__m128)(B), \ |
||
6386 | (__v4sf)(__m128)(W), \ |
||
6387 | (__mmask8)(U), (int)(I), \ |
||
6388 | _MM_FROUND_CUR_DIRECTION)) |
||
6389 | |||
6390 | #define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) \ |
||
6391 | ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ |
||
6392 | (__v4sf)(__m128)(B), \ |
||
6393 | (__v4sf)(__m128)(W), \ |
||
6394 | (__mmask8)(U), (int)(I), \ |
||
6395 | (int)(R))) |
||
6396 | |||
6397 | #define _mm_maskz_roundscale_ss(U, A, B, I) \ |
||
6398 | ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ |
||
6399 | (__v4sf)(__m128)(B), \ |
||
6400 | (__v4sf)_mm_setzero_ps(), \ |
||
6401 | (__mmask8)(U), (int)(I), \ |
||
6402 | _MM_FROUND_CUR_DIRECTION)) |
||
6403 | |||
6404 | #define _mm_maskz_roundscale_round_ss(U, A, B, I, R) \ |
||
6405 | ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ |
||
6406 | (__v4sf)(__m128)(B), \ |
||
6407 | (__v4sf)_mm_setzero_ps(), \ |
||
6408 | (__mmask8)(U), (int)(I), \ |
||
6409 | (int)(R))) |
||
6410 | |||
6411 | #define _mm512_scalef_round_pd(A, B, R) \ |
||
6412 | ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ |
||
6413 | (__v8df)(__m512d)(B), \ |
||
6414 | (__v8df)_mm512_undefined_pd(), \ |
||
6415 | (__mmask8)-1, (int)(R))) |
||
6416 | |||
6417 | #define _mm512_mask_scalef_round_pd(W, U, A, B, R) \ |
||
6418 | ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ |
||
6419 | (__v8df)(__m512d)(B), \ |
||
6420 | (__v8df)(__m512d)(W), \ |
||
6421 | (__mmask8)(U), (int)(R))) |
||
6422 | |||
6423 | #define _mm512_maskz_scalef_round_pd(U, A, B, R) \ |
||
6424 | ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ |
||
6425 | (__v8df)(__m512d)(B), \ |
||
6426 | (__v8df)_mm512_setzero_pd(), \ |
||
6427 | (__mmask8)(U), (int)(R))) |
||
6428 | |||
6429 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
6430 | _mm512_scalef_pd (__m512d __A, __m512d __B) |
||
6431 | { |
||
6432 | return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, |
||
6433 | (__v8df) __B, |
||
6434 | (__v8df) |
||
6435 | _mm512_undefined_pd (), |
||
6436 | (__mmask8) -1, |
||
6437 | _MM_FROUND_CUR_DIRECTION); |
||
6438 | } |
||
6439 | |||
6440 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
6441 | _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) |
||
6442 | { |
||
6443 | return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, |
||
6444 | (__v8df) __B, |
||
6445 | (__v8df) __W, |
||
6446 | (__mmask8) __U, |
||
6447 | _MM_FROUND_CUR_DIRECTION); |
||
6448 | } |
||
6449 | |||
6450 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
6451 | _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B) |
||
6452 | { |
||
6453 | return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, |
||
6454 | (__v8df) __B, |
||
6455 | (__v8df) |
||
6456 | _mm512_setzero_pd (), |
||
6457 | (__mmask8) __U, |
||
6458 | _MM_FROUND_CUR_DIRECTION); |
||
6459 | } |
||
6460 | |||
6461 | #define _mm512_scalef_round_ps(A, B, R) \ |
||
6462 | ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ |
||
6463 | (__v16sf)(__m512)(B), \ |
||
6464 | (__v16sf)_mm512_undefined_ps(), \ |
||
6465 | (__mmask16)-1, (int)(R))) |
||
6466 | |||
6467 | #define _mm512_mask_scalef_round_ps(W, U, A, B, R) \ |
||
6468 | ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ |
||
6469 | (__v16sf)(__m512)(B), \ |
||
6470 | (__v16sf)(__m512)(W), \ |
||
6471 | (__mmask16)(U), (int)(R))) |
||
6472 | |||
6473 | #define _mm512_maskz_scalef_round_ps(U, A, B, R) \ |
||
6474 | ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ |
||
6475 | (__v16sf)(__m512)(B), \ |
||
6476 | (__v16sf)_mm512_setzero_ps(), \ |
||
6477 | (__mmask16)(U), (int)(R))) |
||
6478 | |||
6479 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
6480 | _mm512_scalef_ps (__m512 __A, __m512 __B) |
||
6481 | { |
||
6482 | return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, |
||
6483 | (__v16sf) __B, |
||
6484 | (__v16sf) |
||
6485 | _mm512_undefined_ps (), |
||
6486 | (__mmask16) -1, |
||
6487 | _MM_FROUND_CUR_DIRECTION); |
||
6488 | } |
||
6489 | |||
6490 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
6491 | _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) |
||
6492 | { |
||
6493 | return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, |
||
6494 | (__v16sf) __B, |
||
6495 | (__v16sf) __W, |
||
6496 | (__mmask16) __U, |
||
6497 | _MM_FROUND_CUR_DIRECTION); |
||
6498 | } |
||
6499 | |||
6500 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
6501 | _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B) |
||
6502 | { |
||
6503 | return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, |
||
6504 | (__v16sf) __B, |
||
6505 | (__v16sf) |
||
6506 | _mm512_setzero_ps (), |
||
6507 | (__mmask16) __U, |
||
6508 | _MM_FROUND_CUR_DIRECTION); |
||
6509 | } |
||
6510 | |||
6511 | #define _mm_scalef_round_sd(A, B, R) \ |
||
6512 | ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ |
||
6513 | (__v2df)(__m128d)(B), \ |
||
6514 | (__v2df)_mm_setzero_pd(), \ |
||
6515 | (__mmask8)-1, (int)(R))) |
||
6516 | |||
6517 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
6518 | _mm_scalef_sd (__m128d __A, __m128d __B) |
||
6519 | { |
||
6520 | return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A, |
||
6521 | (__v2df)( __B), (__v2df) _mm_setzero_pd(), |
||
6522 | (__mmask8) -1, |
||
6523 | _MM_FROUND_CUR_DIRECTION); |
||
6524 | } |
||
6525 | |||
6526 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
6527 | _mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) |
||
6528 | { |
||
6529 | return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A, |
||
6530 | (__v2df) __B, |
||
6531 | (__v2df) __W, |
||
6532 | (__mmask8) __U, |
||
6533 | _MM_FROUND_CUR_DIRECTION); |
||
6534 | } |
||
6535 | |||
6536 | #define _mm_mask_scalef_round_sd(W, U, A, B, R) \ |
||
6537 | ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ |
||
6538 | (__v2df)(__m128d)(B), \ |
||
6539 | (__v2df)(__m128d)(W), \ |
||
6540 | (__mmask8)(U), (int)(R))) |
||
6541 | |||
6542 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
6543 | _mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B) |
||
6544 | { |
||
6545 | return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A, |
||
6546 | (__v2df) __B, |
||
6547 | (__v2df) _mm_setzero_pd (), |
||
6548 | (__mmask8) __U, |
||
6549 | _MM_FROUND_CUR_DIRECTION); |
||
6550 | } |
||
6551 | |||
6552 | #define _mm_maskz_scalef_round_sd(U, A, B, R) \ |
||
6553 | ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ |
||
6554 | (__v2df)(__m128d)(B), \ |
||
6555 | (__v2df)_mm_setzero_pd(), \ |
||
6556 | (__mmask8)(U), (int)(R))) |
||
6557 | |||
6558 | #define _mm_scalef_round_ss(A, B, R) \ |
||
6559 | ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ |
||
6560 | (__v4sf)(__m128)(B), \ |
||
6561 | (__v4sf)_mm_setzero_ps(), \ |
||
6562 | (__mmask8)-1, (int)(R))) |
||
6563 | |||
6564 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
6565 | _mm_scalef_ss (__m128 __A, __m128 __B) |
||
6566 | { |
||
6567 | return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A, |
||
6568 | (__v4sf)( __B), (__v4sf) _mm_setzero_ps(), |
||
6569 | (__mmask8) -1, |
||
6570 | _MM_FROUND_CUR_DIRECTION); |
||
6571 | } |
||
6572 | |||
6573 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
6574 | _mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) |
||
6575 | { |
||
6576 | return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A, |
||
6577 | (__v4sf) __B, |
||
6578 | (__v4sf) __W, |
||
6579 | (__mmask8) __U, |
||
6580 | _MM_FROUND_CUR_DIRECTION); |
||
6581 | } |
||
6582 | |||
6583 | #define _mm_mask_scalef_round_ss(W, U, A, B, R) \ |
||
6584 | ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ |
||
6585 | (__v4sf)(__m128)(B), \ |
||
6586 | (__v4sf)(__m128)(W), \ |
||
6587 | (__mmask8)(U), (int)(R))) |
||
6588 | |||
6589 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
6590 | _mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B) |
||
6591 | { |
||
6592 | return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A, |
||
6593 | (__v4sf) __B, |
||
6594 | (__v4sf) _mm_setzero_ps (), |
||
6595 | (__mmask8) __U, |
||
6596 | _MM_FROUND_CUR_DIRECTION); |
||
6597 | } |
||
6598 | |||
6599 | #define _mm_maskz_scalef_round_ss(U, A, B, R) \ |
||
6600 | ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ |
||
6601 | (__v4sf)(__m128)(B), \ |
||
6602 | (__v4sf)_mm_setzero_ps(), \ |
||
6603 | (__mmask8)(U), \ |
||
6604 | (int)(R))) |
||
6605 | |||
6606 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
6607 | _mm512_srai_epi32(__m512i __A, unsigned int __B) |
||
6608 | { |
||
6609 | return (__m512i)__builtin_ia32_psradi512((__v16si)__A, (int)__B); |
||
6610 | } |
||
6611 | |||
6612 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
6613 | _mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, |
||
6614 | unsigned int __B) |
||
6615 | { |
||
6616 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, |
||
6617 | (__v16si)_mm512_srai_epi32(__A, __B), |
||
6618 | (__v16si)__W); |
||
6619 | } |
||
6620 | |||
6621 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
6622 | _mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, |
||
6623 | unsigned int __B) { |
||
6624 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, |
||
6625 | (__v16si)_mm512_srai_epi32(__A, __B), |
||
6626 | (__v16si)_mm512_setzero_si512()); |
||
6627 | } |
||
6628 | |||
6629 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
6630 | _mm512_srai_epi64(__m512i __A, unsigned int __B) |
||
6631 | { |
||
6632 | return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, (int)__B); |
||
6633 | } |
||
6634 | |||
6635 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
6636 | _mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B) |
||
6637 | { |
||
6638 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, |
||
6639 | (__v8di)_mm512_srai_epi64(__A, __B), |
||
6640 | (__v8di)__W); |
||
6641 | } |
||
6642 | |||
6643 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
6644 | _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, unsigned int __B) |
||
6645 | { |
||
6646 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, |
||
6647 | (__v8di)_mm512_srai_epi64(__A, __B), |
||
6648 | (__v8di)_mm512_setzero_si512()); |
||
6649 | } |
||
6650 | |||
6651 | #define _mm512_shuffle_f32x4(A, B, imm) \ |
||
6652 | ((__m512)__builtin_ia32_shuf_f32x4((__v16sf)(__m512)(A), \ |
||
6653 | (__v16sf)(__m512)(B), (int)(imm))) |
||
6654 | |||
6655 | #define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) \ |
||
6656 | ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ |
||
6657 | (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \ |
||
6658 | (__v16sf)(__m512)(W))) |
||
6659 | |||
6660 | #define _mm512_maskz_shuffle_f32x4(U, A, B, imm) \ |
||
6661 | ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ |
||
6662 | (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \ |
||
6663 | (__v16sf)_mm512_setzero_ps())) |
||
6664 | |||
6665 | #define _mm512_shuffle_f64x2(A, B, imm) \ |
||
6666 | ((__m512d)__builtin_ia32_shuf_f64x2((__v8df)(__m512d)(A), \ |
||
6667 | (__v8df)(__m512d)(B), (int)(imm))) |
||
6668 | |||
6669 | #define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) \ |
||
6670 | ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ |
||
6671 | (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \ |
||
6672 | (__v8df)(__m512d)(W))) |
||
6673 | |||
6674 | #define _mm512_maskz_shuffle_f64x2(U, A, B, imm) \ |
||
6675 | ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ |
||
6676 | (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \ |
||
6677 | (__v8df)_mm512_setzero_pd())) |
||
6678 | |||
6679 | #define _mm512_shuffle_i32x4(A, B, imm) \ |
||
6680 | ((__m512i)__builtin_ia32_shuf_i32x4((__v16si)(__m512i)(A), \ |
||
6681 | (__v16si)(__m512i)(B), (int)(imm))) |
||
6682 | |||
6683 | #define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) \ |
||
6684 | ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ |
||
6685 | (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \ |
||
6686 | (__v16si)(__m512i)(W))) |
||
6687 | |||
6688 | #define _mm512_maskz_shuffle_i32x4(U, A, B, imm) \ |
||
6689 | ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ |
||
6690 | (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \ |
||
6691 | (__v16si)_mm512_setzero_si512())) |
||
6692 | |||
6693 | #define _mm512_shuffle_i64x2(A, B, imm) \ |
||
6694 | ((__m512i)__builtin_ia32_shuf_i64x2((__v8di)(__m512i)(A), \ |
||
6695 | (__v8di)(__m512i)(B), (int)(imm))) |
||
6696 | |||
6697 | #define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) \ |
||
6698 | ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ |
||
6699 | (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \ |
||
6700 | (__v8di)(__m512i)(W))) |
||
6701 | |||
6702 | #define _mm512_maskz_shuffle_i64x2(U, A, B, imm) \ |
||
6703 | ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ |
||
6704 | (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \ |
||
6705 | (__v8di)_mm512_setzero_si512())) |
||
6706 | |||
6707 | #define _mm512_shuffle_pd(A, B, M) \ |
||
6708 | ((__m512d)__builtin_ia32_shufpd512((__v8df)(__m512d)(A), \ |
||
6709 | (__v8df)(__m512d)(B), (int)(M))) |
||
6710 | |||
6711 | #define _mm512_mask_shuffle_pd(W, U, A, B, M) \ |
||
6712 | ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ |
||
6713 | (__v8df)_mm512_shuffle_pd((A), (B), (M)), \ |
||
6714 | (__v8df)(__m512d)(W))) |
||
6715 | |||
6716 | #define _mm512_maskz_shuffle_pd(U, A, B, M) \ |
||
6717 | ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ |
||
6718 | (__v8df)_mm512_shuffle_pd((A), (B), (M)), \ |
||
6719 | (__v8df)_mm512_setzero_pd())) |
||
6720 | |||
6721 | #define _mm512_shuffle_ps(A, B, M) \ |
||
6722 | ((__m512)__builtin_ia32_shufps512((__v16sf)(__m512)(A), \ |
||
6723 | (__v16sf)(__m512)(B), (int)(M))) |
||
6724 | |||
6725 | #define _mm512_mask_shuffle_ps(W, U, A, B, M) \ |
||
6726 | ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ |
||
6727 | (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \ |
||
6728 | (__v16sf)(__m512)(W))) |
||
6729 | |||
6730 | #define _mm512_maskz_shuffle_ps(U, A, B, M) \ |
||
6731 | ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ |
||
6732 | (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \ |
||
6733 | (__v16sf)_mm512_setzero_ps())) |
||
6734 | |||
6735 | #define _mm_sqrt_round_sd(A, B, R) \ |
||
6736 | ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ |
||
6737 | (__v2df)(__m128d)(B), \ |
||
6738 | (__v2df)_mm_setzero_pd(), \ |
||
6739 | (__mmask8)-1, (int)(R))) |
||
6740 | |||
6741 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
6742 | _mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) |
||
6743 | { |
||
6744 | return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A, |
||
6745 | (__v2df) __B, |
||
6746 | (__v2df) __W, |
||
6747 | (__mmask8) __U, |
||
6748 | _MM_FROUND_CUR_DIRECTION); |
||
6749 | } |
||
6750 | |||
6751 | #define _mm_mask_sqrt_round_sd(W, U, A, B, R) \ |
||
6752 | ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ |
||
6753 | (__v2df)(__m128d)(B), \ |
||
6754 | (__v2df)(__m128d)(W), \ |
||
6755 | (__mmask8)(U), (int)(R))) |
||
6756 | |||
6757 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
6758 | _mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B) |
||
6759 | { |
||
6760 | return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A, |
||
6761 | (__v2df) __B, |
||
6762 | (__v2df) _mm_setzero_pd (), |
||
6763 | (__mmask8) __U, |
||
6764 | _MM_FROUND_CUR_DIRECTION); |
||
6765 | } |
||
6766 | |||
6767 | #define _mm_maskz_sqrt_round_sd(U, A, B, R) \ |
||
6768 | ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ |
||
6769 | (__v2df)(__m128d)(B), \ |
||
6770 | (__v2df)_mm_setzero_pd(), \ |
||
6771 | (__mmask8)(U), (int)(R))) |
||
6772 | |||
6773 | #define _mm_sqrt_round_ss(A, B, R) \ |
||
6774 | ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ |
||
6775 | (__v4sf)(__m128)(B), \ |
||
6776 | (__v4sf)_mm_setzero_ps(), \ |
||
6777 | (__mmask8)-1, (int)(R))) |
||
6778 | |||
6779 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
6780 | _mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) |
||
6781 | { |
||
6782 | return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A, |
||
6783 | (__v4sf) __B, |
||
6784 | (__v4sf) __W, |
||
6785 | (__mmask8) __U, |
||
6786 | _MM_FROUND_CUR_DIRECTION); |
||
6787 | } |
||
6788 | |||
6789 | #define _mm_mask_sqrt_round_ss(W, U, A, B, R) \ |
||
6790 | ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ |
||
6791 | (__v4sf)(__m128)(B), \ |
||
6792 | (__v4sf)(__m128)(W), (__mmask8)(U), \ |
||
6793 | (int)(R))) |
||
6794 | |||
6795 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
6796 | _mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B) |
||
6797 | { |
||
6798 | return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A, |
||
6799 | (__v4sf) __B, |
||
6800 | (__v4sf) _mm_setzero_ps (), |
||
6801 | (__mmask8) __U, |
||
6802 | _MM_FROUND_CUR_DIRECTION); |
||
6803 | } |
||
6804 | |||
6805 | #define _mm_maskz_sqrt_round_ss(U, A, B, R) \ |
||
6806 | ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ |
||
6807 | (__v4sf)(__m128)(B), \ |
||
6808 | (__v4sf)_mm_setzero_ps(), \ |
||
6809 | (__mmask8)(U), (int)(R))) |
||
6810 | |||
6811 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
6812 | _mm512_broadcast_f32x4(__m128 __A) |
||
6813 | { |
||
6814 | return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A, |
||
6815 | 0, 1, 2, 3, 0, 1, 2, 3, |
||
6816 | 0, 1, 2, 3, 0, 1, 2, 3); |
||
6817 | } |
||
6818 | |||
6819 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
6820 | _mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A) |
||
6821 | { |
||
6822 | return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, |
||
6823 | (__v16sf)_mm512_broadcast_f32x4(__A), |
||
6824 | (__v16sf)__O); |
||
6825 | } |
||
6826 | |||
6827 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
6828 | _mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A) |
||
6829 | { |
||
6830 | return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, |
||
6831 | (__v16sf)_mm512_broadcast_f32x4(__A), |
||
6832 | (__v16sf)_mm512_setzero_ps()); |
||
6833 | } |
||
6834 | |||
6835 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
6836 | _mm512_broadcast_f64x4(__m256d __A) |
||
6837 | { |
||
6838 | return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A, |
||
6839 | 0, 1, 2, 3, 0, 1, 2, 3); |
||
6840 | } |
||
6841 | |||
6842 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
6843 | _mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A) |
||
6844 | { |
||
6845 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M, |
||
6846 | (__v8df)_mm512_broadcast_f64x4(__A), |
||
6847 | (__v8df)__O); |
||
6848 | } |
||
6849 | |||
6850 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
6851 | _mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A) |
||
6852 | { |
||
6853 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M, |
||
6854 | (__v8df)_mm512_broadcast_f64x4(__A), |
||
6855 | (__v8df)_mm512_setzero_pd()); |
||
6856 | } |
||
6857 | |||
6858 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
6859 | _mm512_broadcast_i32x4(__m128i __A) |
||
6860 | { |
||
6861 | return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, |
||
6862 | 0, 1, 2, 3, 0, 1, 2, 3, |
||
6863 | 0, 1, 2, 3, 0, 1, 2, 3); |
||
6864 | } |
||
6865 | |||
6866 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
6867 | _mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A) |
||
6868 | { |
||
6869 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, |
||
6870 | (__v16si)_mm512_broadcast_i32x4(__A), |
||
6871 | (__v16si)__O); |
||
6872 | } |
||
6873 | |||
6874 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
6875 | _mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A) |
||
6876 | { |
||
6877 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, |
||
6878 | (__v16si)_mm512_broadcast_i32x4(__A), |
||
6879 | (__v16si)_mm512_setzero_si512()); |
||
6880 | } |
||
6881 | |||
6882 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
6883 | _mm512_broadcast_i64x4(__m256i __A) |
||
6884 | { |
||
6885 | return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A, |
||
6886 | 0, 1, 2, 3, 0, 1, 2, 3); |
||
6887 | } |
||
6888 | |||
6889 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
6890 | _mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A) |
||
6891 | { |
||
6892 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, |
||
6893 | (__v8di)_mm512_broadcast_i64x4(__A), |
||
6894 | (__v8di)__O); |
||
6895 | } |
||
6896 | |||
6897 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
6898 | _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A) |
||
6899 | { |
||
6900 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, |
||
6901 | (__v8di)_mm512_broadcast_i64x4(__A), |
||
6902 | (__v8di)_mm512_setzero_si512()); |
||
6903 | } |
||
6904 | |||
6905 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
6906 | _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A) |
||
6907 | { |
||
6908 | return (__m512d)__builtin_ia32_selectpd_512(__M, |
||
6909 | (__v8df) _mm512_broadcastsd_pd(__A), |
||
6910 | (__v8df) __O); |
||
6911 | } |
||
6912 | |||
6913 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
6914 | _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A) |
||
6915 | { |
||
6916 | return (__m512d)__builtin_ia32_selectpd_512(__M, |
||
6917 | (__v8df) _mm512_broadcastsd_pd(__A), |
||
6918 | (__v8df) _mm512_setzero_pd()); |
||
6919 | } |
||
6920 | |||
6921 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
6922 | _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A) |
||
6923 | { |
||
6924 | return (__m512)__builtin_ia32_selectps_512(__M, |
||
6925 | (__v16sf) _mm512_broadcastss_ps(__A), |
||
6926 | (__v16sf) __O); |
||
6927 | } |
||
6928 | |||
6929 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
6930 | _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A) |
||
6931 | { |
||
6932 | return (__m512)__builtin_ia32_selectps_512(__M, |
||
6933 | (__v16sf) _mm512_broadcastss_ps(__A), |
||
6934 | (__v16sf) _mm512_setzero_ps()); |
||
6935 | } |
||
6936 | |||
6937 | static __inline__ __m128i __DEFAULT_FN_ATTRS512 |
||
6938 | _mm512_cvtsepi32_epi8 (__m512i __A) |
||
6939 | { |
||
6940 | return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, |
||
6941 | (__v16qi) _mm_undefined_si128 (), |
||
6942 | (__mmask16) -1); |
||
6943 | } |
||
6944 | |||
6945 | static __inline__ __m128i __DEFAULT_FN_ATTRS512 |
||
6946 | _mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) |
||
6947 | { |
||
6948 | return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, |
||
6949 | (__v16qi) __O, __M); |
||
6950 | } |
||
6951 | |||
6952 | static __inline__ __m128i __DEFAULT_FN_ATTRS512 |
||
6953 | _mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A) |
||
6954 | { |
||
6955 | return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, |
||
6956 | (__v16qi) _mm_setzero_si128 (), |
||
6957 | __M); |
||
6958 | } |
||
6959 | |||
6960 | static __inline__ void __DEFAULT_FN_ATTRS512 |
||
6961 | _mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) |
||
6962 | { |
||
6963 | __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); |
||
6964 | } |
||
6965 | |||
6966 | static __inline__ __m256i __DEFAULT_FN_ATTRS512 |
||
6967 | _mm512_cvtsepi32_epi16 (__m512i __A) |
||
6968 | { |
||
6969 | return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, |
||
6970 | (__v16hi) _mm256_undefined_si256 (), |
||
6971 | (__mmask16) -1); |
||
6972 | } |
||
6973 | |||
6974 | static __inline__ __m256i __DEFAULT_FN_ATTRS512 |
||
6975 | _mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) |
||
6976 | { |
||
6977 | return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, |
||
6978 | (__v16hi) __O, __M); |
||
6979 | } |
||
6980 | |||
6981 | static __inline__ __m256i __DEFAULT_FN_ATTRS512 |
||
6982 | _mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A) |
||
6983 | { |
||
6984 | return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, |
||
6985 | (__v16hi) _mm256_setzero_si256 (), |
||
6986 | __M); |
||
6987 | } |
||
6988 | |||
6989 | static __inline__ void __DEFAULT_FN_ATTRS512 |
||
6990 | _mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A) |
||
6991 | { |
||
6992 | __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M); |
||
6993 | } |
||
6994 | |||
6995 | static __inline__ __m128i __DEFAULT_FN_ATTRS512 |
||
6996 | _mm512_cvtsepi64_epi8 (__m512i __A) |
||
6997 | { |
||
6998 | return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, |
||
6999 | (__v16qi) _mm_undefined_si128 (), |
||
7000 | (__mmask8) -1); |
||
7001 | } |
||
7002 | |||
7003 | static __inline__ __m128i __DEFAULT_FN_ATTRS512 |
||
7004 | _mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) |
||
7005 | { |
||
7006 | return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, |
||
7007 | (__v16qi) __O, __M); |
||
7008 | } |
||
7009 | |||
7010 | static __inline__ __m128i __DEFAULT_FN_ATTRS512 |
||
7011 | _mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A) |
||
7012 | { |
||
7013 | return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, |
||
7014 | (__v16qi) _mm_setzero_si128 (), |
||
7015 | __M); |
||
7016 | } |
||
7017 | |||
7018 | static __inline__ void __DEFAULT_FN_ATTRS512 |
||
7019 | _mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) |
||
7020 | { |
||
7021 | __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); |
||
7022 | } |
||
7023 | |||
7024 | static __inline__ __m256i __DEFAULT_FN_ATTRS512 |
||
7025 | _mm512_cvtsepi64_epi32 (__m512i __A) |
||
7026 | { |
||
7027 | return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, |
||
7028 | (__v8si) _mm256_undefined_si256 (), |
||
7029 | (__mmask8) -1); |
||
7030 | } |
||
7031 | |||
7032 | static __inline__ __m256i __DEFAULT_FN_ATTRS512 |
||
7033 | _mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) |
||
7034 | { |
||
7035 | return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, |
||
7036 | (__v8si) __O, __M); |
||
7037 | } |
||
7038 | |||
7039 | static __inline__ __m256i __DEFAULT_FN_ATTRS512 |
||
7040 | _mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A) |
||
7041 | { |
||
7042 | return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, |
||
7043 | (__v8si) _mm256_setzero_si256 (), |
||
7044 | __M); |
||
7045 | } |
||
7046 | |||
7047 | static __inline__ void __DEFAULT_FN_ATTRS512 |
||
7048 | _mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A) |
||
7049 | { |
||
7050 | __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M); |
||
7051 | } |
||
7052 | |||
7053 | static __inline__ __m128i __DEFAULT_FN_ATTRS512 |
||
7054 | _mm512_cvtsepi64_epi16 (__m512i __A) |
||
7055 | { |
||
7056 | return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, |
||
7057 | (__v8hi) _mm_undefined_si128 (), |
||
7058 | (__mmask8) -1); |
||
7059 | } |
||
7060 | |||
7061 | static __inline__ __m128i __DEFAULT_FN_ATTRS512 |
||
7062 | _mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) |
||
7063 | { |
||
7064 | return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, |
||
7065 | (__v8hi) __O, __M); |
||
7066 | } |
||
7067 | |||
7068 | static __inline__ __m128i __DEFAULT_FN_ATTRS512 |
||
7069 | _mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A) |
||
7070 | { |
||
7071 | return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, |
||
7072 | (__v8hi) _mm_setzero_si128 (), |
||
7073 | __M); |
||
7074 | } |
||
7075 | |||
7076 | static __inline__ void __DEFAULT_FN_ATTRS512 |
||
7077 | _mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A) |
||
7078 | { |
||
7079 | __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M); |
||
7080 | } |
||
7081 | |||
7082 | static __inline__ __m128i __DEFAULT_FN_ATTRS512 |
||
7083 | _mm512_cvtusepi32_epi8 (__m512i __A) |
||
7084 | { |
||
7085 | return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, |
||
7086 | (__v16qi) _mm_undefined_si128 (), |
||
7087 | (__mmask16) -1); |
||
7088 | } |
||
7089 | |||
7090 | static __inline__ __m128i __DEFAULT_FN_ATTRS512 |
||
7091 | _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) |
||
7092 | { |
||
7093 | return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, |
||
7094 | (__v16qi) __O, |
||
7095 | __M); |
||
7096 | } |
||
7097 | |||
7098 | static __inline__ __m128i __DEFAULT_FN_ATTRS512 |
||
7099 | _mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A) |
||
7100 | { |
||
7101 | return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, |
||
7102 | (__v16qi) _mm_setzero_si128 (), |
||
7103 | __M); |
||
7104 | } |
||
7105 | |||
7106 | static __inline__ void __DEFAULT_FN_ATTRS512 |
||
7107 | _mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) |
||
7108 | { |
||
7109 | __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); |
||
7110 | } |
||
7111 | |||
7112 | static __inline__ __m256i __DEFAULT_FN_ATTRS512 |
||
7113 | _mm512_cvtusepi32_epi16 (__m512i __A) |
||
7114 | { |
||
7115 | return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, |
||
7116 | (__v16hi) _mm256_undefined_si256 (), |
||
7117 | (__mmask16) -1); |
||
7118 | } |
||
7119 | |||
7120 | static __inline__ __m256i __DEFAULT_FN_ATTRS512 |
||
7121 | _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) |
||
7122 | { |
||
7123 | return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, |
||
7124 | (__v16hi) __O, |
||
7125 | __M); |
||
7126 | } |
||
7127 | |||
7128 | static __inline__ __m256i __DEFAULT_FN_ATTRS512 |
||
7129 | _mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A) |
||
7130 | { |
||
7131 | return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, |
||
7132 | (__v16hi) _mm256_setzero_si256 (), |
||
7133 | __M); |
||
7134 | } |
||
7135 | |||
7136 | static __inline__ void __DEFAULT_FN_ATTRS512 |
||
7137 | _mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A) |
||
7138 | { |
||
7139 | __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M); |
||
7140 | } |
||
7141 | |||
7142 | static __inline__ __m128i __DEFAULT_FN_ATTRS512 |
||
7143 | _mm512_cvtusepi64_epi8 (__m512i __A) |
||
7144 | { |
||
7145 | return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, |
||
7146 | (__v16qi) _mm_undefined_si128 (), |
||
7147 | (__mmask8) -1); |
||
7148 | } |
||
7149 | |||
7150 | static __inline__ __m128i __DEFAULT_FN_ATTRS512 |
||
7151 | _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) |
||
7152 | { |
||
7153 | return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, |
||
7154 | (__v16qi) __O, |
||
7155 | __M); |
||
7156 | } |
||
7157 | |||
7158 | static __inline__ __m128i __DEFAULT_FN_ATTRS512 |
||
7159 | _mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A) |
||
7160 | { |
||
7161 | return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, |
||
7162 | (__v16qi) _mm_setzero_si128 (), |
||
7163 | __M); |
||
7164 | } |
||
7165 | |||
7166 | static __inline__ void __DEFAULT_FN_ATTRS512 |
||
7167 | _mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) |
||
7168 | { |
||
7169 | __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); |
||
7170 | } |
||
7171 | |||
7172 | static __inline__ __m256i __DEFAULT_FN_ATTRS512 |
||
7173 | _mm512_cvtusepi64_epi32 (__m512i __A) |
||
7174 | { |
||
7175 | return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, |
||
7176 | (__v8si) _mm256_undefined_si256 (), |
||
7177 | (__mmask8) -1); |
||
7178 | } |
||
7179 | |||
7180 | static __inline__ __m256i __DEFAULT_FN_ATTRS512 |
||
7181 | _mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) |
||
7182 | { |
||
7183 | return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, |
||
7184 | (__v8si) __O, __M); |
||
7185 | } |
||
7186 | |||
7187 | static __inline__ __m256i __DEFAULT_FN_ATTRS512 |
||
7188 | _mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A) |
||
7189 | { |
||
7190 | return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, |
||
7191 | (__v8si) _mm256_setzero_si256 (), |
||
7192 | __M); |
||
7193 | } |
||
7194 | |||
7195 | static __inline__ void __DEFAULT_FN_ATTRS512 |
||
7196 | _mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A) |
||
7197 | { |
||
7198 | __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M); |
||
7199 | } |
||
7200 | |||
7201 | static __inline__ __m128i __DEFAULT_FN_ATTRS512 |
||
7202 | _mm512_cvtusepi64_epi16 (__m512i __A) |
||
7203 | { |
||
7204 | return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, |
||
7205 | (__v8hi) _mm_undefined_si128 (), |
||
7206 | (__mmask8) -1); |
||
7207 | } |
||
7208 | |||
7209 | static __inline__ __m128i __DEFAULT_FN_ATTRS512 |
||
7210 | _mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) |
||
7211 | { |
||
7212 | return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, |
||
7213 | (__v8hi) __O, __M); |
||
7214 | } |
||
7215 | |||
7216 | static __inline__ __m128i __DEFAULT_FN_ATTRS512 |
||
7217 | _mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A) |
||
7218 | { |
||
7219 | return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, |
||
7220 | (__v8hi) _mm_setzero_si128 (), |
||
7221 | __M); |
||
7222 | } |
||
7223 | |||
7224 | static __inline__ void __DEFAULT_FN_ATTRS512 |
||
7225 | _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) |
||
7226 | { |
||
7227 | __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M); |
||
7228 | } |
||
7229 | |||
7230 | static __inline__ __m128i __DEFAULT_FN_ATTRS512 |
||
7231 | _mm512_cvtepi32_epi8 (__m512i __A) |
||
7232 | { |
||
7233 | return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, |
||
7234 | (__v16qi) _mm_undefined_si128 (), |
||
7235 | (__mmask16) -1); |
||
7236 | } |
||
7237 | |||
7238 | static __inline__ __m128i __DEFAULT_FN_ATTRS512 |
||
7239 | _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) |
||
7240 | { |
||
7241 | return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, |
||
7242 | (__v16qi) __O, __M); |
||
7243 | } |
||
7244 | |||
7245 | static __inline__ __m128i __DEFAULT_FN_ATTRS512 |
||
7246 | _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A) |
||
7247 | { |
||
7248 | return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, |
||
7249 | (__v16qi) _mm_setzero_si128 (), |
||
7250 | __M); |
||
7251 | } |
||
7252 | |||
7253 | static __inline__ void __DEFAULT_FN_ATTRS512 |
||
7254 | _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) |
||
7255 | { |
||
7256 | __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); |
||
7257 | } |
||
7258 | |||
7259 | static __inline__ __m256i __DEFAULT_FN_ATTRS512 |
||
7260 | _mm512_cvtepi32_epi16 (__m512i __A) |
||
7261 | { |
||
7262 | return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, |
||
7263 | (__v16hi) _mm256_undefined_si256 (), |
||
7264 | (__mmask16) -1); |
||
7265 | } |
||
7266 | |||
7267 | static __inline__ __m256i __DEFAULT_FN_ATTRS512 |
||
7268 | _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) |
||
7269 | { |
||
7270 | return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, |
||
7271 | (__v16hi) __O, __M); |
||
7272 | } |
||
7273 | |||
7274 | static __inline__ __m256i __DEFAULT_FN_ATTRS512 |
||
7275 | _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A) |
||
7276 | { |
||
7277 | return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, |
||
7278 | (__v16hi) _mm256_setzero_si256 (), |
||
7279 | __M); |
||
7280 | } |
||
7281 | |||
7282 | static __inline__ void __DEFAULT_FN_ATTRS512 |
||
7283 | _mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A) |
||
7284 | { |
||
7285 | __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M); |
||
7286 | } |
||
7287 | |||
7288 | static __inline__ __m128i __DEFAULT_FN_ATTRS512 |
||
7289 | _mm512_cvtepi64_epi8 (__m512i __A) |
||
7290 | { |
||
7291 | return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, |
||
7292 | (__v16qi) _mm_undefined_si128 (), |
||
7293 | (__mmask8) -1); |
||
7294 | } |
||
7295 | |||
7296 | static __inline__ __m128i __DEFAULT_FN_ATTRS512 |
||
7297 | _mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) |
||
7298 | { |
||
7299 | return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, |
||
7300 | (__v16qi) __O, __M); |
||
7301 | } |
||
7302 | |||
7303 | static __inline__ __m128i __DEFAULT_FN_ATTRS512 |
||
7304 | _mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A) |
||
7305 | { |
||
7306 | return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, |
||
7307 | (__v16qi) _mm_setzero_si128 (), |
||
7308 | __M); |
||
7309 | } |
||
7310 | |||
7311 | static __inline__ void __DEFAULT_FN_ATTRS512 |
||
7312 | _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) |
||
7313 | { |
||
7314 | __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); |
||
7315 | } |
||
7316 | |||
7317 | static __inline__ __m256i __DEFAULT_FN_ATTRS512 |
||
7318 | _mm512_cvtepi64_epi32 (__m512i __A) |
||
7319 | { |
||
7320 | return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, |
||
7321 | (__v8si) _mm256_undefined_si256 (), |
||
7322 | (__mmask8) -1); |
||
7323 | } |
||
7324 | |||
7325 | static __inline__ __m256i __DEFAULT_FN_ATTRS512 |
||
7326 | _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) |
||
7327 | { |
||
7328 | return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, |
||
7329 | (__v8si) __O, __M); |
||
7330 | } |
||
7331 | |||
7332 | static __inline__ __m256i __DEFAULT_FN_ATTRS512 |
||
7333 | _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A) |
||
7334 | { |
||
7335 | return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, |
||
7336 | (__v8si) _mm256_setzero_si256 (), |
||
7337 | __M); |
||
7338 | } |
||
7339 | |||
7340 | static __inline__ void __DEFAULT_FN_ATTRS512 |
||
7341 | _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A) |
||
7342 | { |
||
7343 | __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M); |
||
7344 | } |
||
7345 | |||
7346 | static __inline__ __m128i __DEFAULT_FN_ATTRS512 |
||
7347 | _mm512_cvtepi64_epi16 (__m512i __A) |
||
7348 | { |
||
7349 | return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, |
||
7350 | (__v8hi) _mm_undefined_si128 (), |
||
7351 | (__mmask8) -1); |
||
7352 | } |
||
7353 | |||
7354 | static __inline__ __m128i __DEFAULT_FN_ATTRS512 |
||
7355 | _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) |
||
7356 | { |
||
7357 | return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, |
||
7358 | (__v8hi) __O, __M); |
||
7359 | } |
||
7360 | |||
7361 | static __inline__ __m128i __DEFAULT_FN_ATTRS512 |
||
7362 | _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A) |
||
7363 | { |
||
7364 | return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, |
||
7365 | (__v8hi) _mm_setzero_si128 (), |
||
7366 | __M); |
||
7367 | } |
||
7368 | |||
7369 | static __inline__ void __DEFAULT_FN_ATTRS512 |
||
7370 | _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) |
||
7371 | { |
||
7372 | __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M); |
||
7373 | } |
||
7374 | |||
7375 | #define _mm512_extracti32x4_epi32(A, imm) \ |
||
7376 | ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \ |
||
7377 | (__v4si)_mm_undefined_si128(), \ |
||
7378 | (__mmask8)-1)) |
||
7379 | |||
7380 | #define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \ |
||
7381 | ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \ |
||
7382 | (__v4si)(__m128i)(W), \ |
||
7383 | (__mmask8)(U))) |
||
7384 | |||
7385 | #define _mm512_maskz_extracti32x4_epi32(U, A, imm) \ |
||
7386 | ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \ |
||
7387 | (__v4si)_mm_setzero_si128(), \ |
||
7388 | (__mmask8)(U))) |
||
7389 | |||
7390 | #define _mm512_extracti64x4_epi64(A, imm) \ |
||
7391 | ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \ |
||
7392 | (__v4di)_mm256_undefined_si256(), \ |
||
7393 | (__mmask8)-1)) |
||
7394 | |||
7395 | #define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \ |
||
7396 | ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \ |
||
7397 | (__v4di)(__m256i)(W), \ |
||
7398 | (__mmask8)(U))) |
||
7399 | |||
7400 | #define _mm512_maskz_extracti64x4_epi64(U, A, imm) \ |
||
7401 | ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \ |
||
7402 | (__v4di)_mm256_setzero_si256(), \ |
||
7403 | (__mmask8)(U))) |
||
7404 | |||
7405 | #define _mm512_insertf64x4(A, B, imm) \ |
||
7406 | ((__m512d)__builtin_ia32_insertf64x4((__v8df)(__m512d)(A), \ |
||
7407 | (__v4df)(__m256d)(B), (int)(imm))) |
||
7408 | |||
7409 | #define _mm512_mask_insertf64x4(W, U, A, B, imm) \ |
||
7410 | ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ |
||
7411 | (__v8df)_mm512_insertf64x4((A), (B), (imm)), \ |
||
7412 | (__v8df)(__m512d)(W))) |
||
7413 | |||
7414 | #define _mm512_maskz_insertf64x4(U, A, B, imm) \ |
||
7415 | ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ |
||
7416 | (__v8df)_mm512_insertf64x4((A), (B), (imm)), \ |
||
7417 | (__v8df)_mm512_setzero_pd())) |
||
7418 | |||
7419 | #define _mm512_inserti64x4(A, B, imm) \ |
||
7420 | ((__m512i)__builtin_ia32_inserti64x4((__v8di)(__m512i)(A), \ |
||
7421 | (__v4di)(__m256i)(B), (int)(imm))) |
||
7422 | |||
7423 | #define _mm512_mask_inserti64x4(W, U, A, B, imm) \ |
||
7424 | ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ |
||
7425 | (__v8di)_mm512_inserti64x4((A), (B), (imm)), \ |
||
7426 | (__v8di)(__m512i)(W))) |
||
7427 | |||
7428 | #define _mm512_maskz_inserti64x4(U, A, B, imm) \ |
||
7429 | ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ |
||
7430 | (__v8di)_mm512_inserti64x4((A), (B), (imm)), \ |
||
7431 | (__v8di)_mm512_setzero_si512())) |
||
7432 | |||
7433 | #define _mm512_insertf32x4(A, B, imm) \ |
||
7434 | ((__m512)__builtin_ia32_insertf32x4((__v16sf)(__m512)(A), \ |
||
7435 | (__v4sf)(__m128)(B), (int)(imm))) |
||
7436 | |||
7437 | #define _mm512_mask_insertf32x4(W, U, A, B, imm) \ |
||
7438 | ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ |
||
7439 | (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \ |
||
7440 | (__v16sf)(__m512)(W))) |
||
7441 | |||
7442 | #define _mm512_maskz_insertf32x4(U, A, B, imm) \ |
||
7443 | ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ |
||
7444 | (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \ |
||
7445 | (__v16sf)_mm512_setzero_ps())) |
||
7446 | |||
7447 | #define _mm512_inserti32x4(A, B, imm) \ |
||
7448 | ((__m512i)__builtin_ia32_inserti32x4((__v16si)(__m512i)(A), \ |
||
7449 | (__v4si)(__m128i)(B), (int)(imm))) |
||
7450 | |||
7451 | #define _mm512_mask_inserti32x4(W, U, A, B, imm) \ |
||
7452 | ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ |
||
7453 | (__v16si)_mm512_inserti32x4((A), (B), (imm)), \ |
||
7454 | (__v16si)(__m512i)(W))) |
||
7455 | |||
7456 | #define _mm512_maskz_inserti32x4(U, A, B, imm) \ |
||
7457 | ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ |
||
7458 | (__v16si)_mm512_inserti32x4((A), (B), (imm)), \ |
||
7459 | (__v16si)_mm512_setzero_si512())) |
||
7460 | |||
7461 | #define _mm512_getmant_round_pd(A, B, C, R) \ |
||
7462 | ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ |
||
7463 | (int)(((C)<<2) | (B)), \ |
||
7464 | (__v8df)_mm512_undefined_pd(), \ |
||
7465 | (__mmask8)-1, (int)(R))) |
||
7466 | |||
7467 | #define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) \ |
||
7468 | ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ |
||
7469 | (int)(((C)<<2) | (B)), \ |
||
7470 | (__v8df)(__m512d)(W), \ |
||
7471 | (__mmask8)(U), (int)(R))) |
||
7472 | |||
7473 | #define _mm512_maskz_getmant_round_pd(U, A, B, C, R) \ |
||
7474 | ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ |
||
7475 | (int)(((C)<<2) | (B)), \ |
||
7476 | (__v8df)_mm512_setzero_pd(), \ |
||
7477 | (__mmask8)(U), (int)(R))) |
||
7478 | |||
7479 | #define _mm512_getmant_pd(A, B, C) \ |
||
7480 | ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ |
||
7481 | (int)(((C)<<2) | (B)), \ |
||
7482 | (__v8df)_mm512_setzero_pd(), \ |
||
7483 | (__mmask8)-1, \ |
||
7484 | _MM_FROUND_CUR_DIRECTION)) |
||
7485 | |||
7486 | #define _mm512_mask_getmant_pd(W, U, A, B, C) \ |
||
7487 | ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ |
||
7488 | (int)(((C)<<2) | (B)), \ |
||
7489 | (__v8df)(__m512d)(W), \ |
||
7490 | (__mmask8)(U), \ |
||
7491 | _MM_FROUND_CUR_DIRECTION)) |
||
7492 | |||
7493 | #define _mm512_maskz_getmant_pd(U, A, B, C) \ |
||
7494 | ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ |
||
7495 | (int)(((C)<<2) | (B)), \ |
||
7496 | (__v8df)_mm512_setzero_pd(), \ |
||
7497 | (__mmask8)(U), \ |
||
7498 | _MM_FROUND_CUR_DIRECTION)) |
||
7499 | |||
7500 | #define _mm512_getmant_round_ps(A, B, C, R) \ |
||
7501 | ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ |
||
7502 | (int)(((C)<<2) | (B)), \ |
||
7503 | (__v16sf)_mm512_undefined_ps(), \ |
||
7504 | (__mmask16)-1, (int)(R))) |
||
7505 | |||
7506 | #define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) \ |
||
7507 | ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ |
||
7508 | (int)(((C)<<2) | (B)), \ |
||
7509 | (__v16sf)(__m512)(W), \ |
||
7510 | (__mmask16)(U), (int)(R))) |
||
7511 | |||
7512 | #define _mm512_maskz_getmant_round_ps(U, A, B, C, R) \ |
||
7513 | ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ |
||
7514 | (int)(((C)<<2) | (B)), \ |
||
7515 | (__v16sf)_mm512_setzero_ps(), \ |
||
7516 | (__mmask16)(U), (int)(R))) |
||
7517 | |||
7518 | #define _mm512_getmant_ps(A, B, C) \ |
||
7519 | ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ |
||
7520 | (int)(((C)<<2)|(B)), \ |
||
7521 | (__v16sf)_mm512_undefined_ps(), \ |
||
7522 | (__mmask16)-1, \ |
||
7523 | _MM_FROUND_CUR_DIRECTION)) |
||
7524 | |||
7525 | #define _mm512_mask_getmant_ps(W, U, A, B, C) \ |
||
7526 | ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ |
||
7527 | (int)(((C)<<2)|(B)), \ |
||
7528 | (__v16sf)(__m512)(W), \ |
||
7529 | (__mmask16)(U), \ |
||
7530 | _MM_FROUND_CUR_DIRECTION)) |
||
7531 | |||
7532 | #define _mm512_maskz_getmant_ps(U, A, B, C) \ |
||
7533 | ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ |
||
7534 | (int)(((C)<<2)|(B)), \ |
||
7535 | (__v16sf)_mm512_setzero_ps(), \ |
||
7536 | (__mmask16)(U), \ |
||
7537 | _MM_FROUND_CUR_DIRECTION)) |
||
7538 | |||
7539 | #define _mm512_getexp_round_pd(A, R) \ |
||
7540 | ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ |
||
7541 | (__v8df)_mm512_undefined_pd(), \ |
||
7542 | (__mmask8)-1, (int)(R))) |
||
7543 | |||
7544 | #define _mm512_mask_getexp_round_pd(W, U, A, R) \ |
||
7545 | ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ |
||
7546 | (__v8df)(__m512d)(W), \ |
||
7547 | (__mmask8)(U), (int)(R))) |
||
7548 | |||
7549 | #define _mm512_maskz_getexp_round_pd(U, A, R) \ |
||
7550 | ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ |
||
7551 | (__v8df)_mm512_setzero_pd(), \ |
||
7552 | (__mmask8)(U), (int)(R))) |
||
7553 | |||
7554 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
7555 | _mm512_getexp_pd (__m512d __A) |
||
7556 | { |
||
7557 | return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, |
||
7558 | (__v8df) _mm512_undefined_pd (), |
||
7559 | (__mmask8) -1, |
||
7560 | _MM_FROUND_CUR_DIRECTION); |
||
7561 | } |
||
7562 | |||
7563 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
7564 | _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A) |
||
7565 | { |
||
7566 | return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, |
||
7567 | (__v8df) __W, |
||
7568 | (__mmask8) __U, |
||
7569 | _MM_FROUND_CUR_DIRECTION); |
||
7570 | } |
||
7571 | |||
7572 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
7573 | _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A) |
||
7574 | { |
||
7575 | return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, |
||
7576 | (__v8df) _mm512_setzero_pd (), |
||
7577 | (__mmask8) __U, |
||
7578 | _MM_FROUND_CUR_DIRECTION); |
||
7579 | } |
||
7580 | |||
7581 | #define _mm512_getexp_round_ps(A, R) \ |
||
7582 | ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ |
||
7583 | (__v16sf)_mm512_undefined_ps(), \ |
||
7584 | (__mmask16)-1, (int)(R))) |
||
7585 | |||
7586 | #define _mm512_mask_getexp_round_ps(W, U, A, R) \ |
||
7587 | ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ |
||
7588 | (__v16sf)(__m512)(W), \ |
||
7589 | (__mmask16)(U), (int)(R))) |
||
7590 | |||
7591 | #define _mm512_maskz_getexp_round_ps(U, A, R) \ |
||
7592 | ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ |
||
7593 | (__v16sf)_mm512_setzero_ps(), \ |
||
7594 | (__mmask16)(U), (int)(R))) |
||
7595 | |||
7596 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
7597 | _mm512_getexp_ps (__m512 __A) |
||
7598 | { |
||
7599 | return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, |
||
7600 | (__v16sf) _mm512_undefined_ps (), |
||
7601 | (__mmask16) -1, |
||
7602 | _MM_FROUND_CUR_DIRECTION); |
||
7603 | } |
||
7604 | |||
7605 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
7606 | _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A) |
||
7607 | { |
||
7608 | return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, |
||
7609 | (__v16sf) __W, |
||
7610 | (__mmask16) __U, |
||
7611 | _MM_FROUND_CUR_DIRECTION); |
||
7612 | } |
||
7613 | |||
7614 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
7615 | _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A) |
||
7616 | { |
||
7617 | return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, |
||
7618 | (__v16sf) _mm512_setzero_ps (), |
||
7619 | (__mmask16) __U, |
||
7620 | _MM_FROUND_CUR_DIRECTION); |
||
7621 | } |
||
7622 | |||
7623 | #define _mm512_i64gather_ps(index, addr, scale) \ |
||
7624 | ((__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \ |
||
7625 | (void const *)(addr), \ |
||
7626 | (__v8di)(__m512i)(index), (__mmask8)-1, \ |
||
7627 | (int)(scale))) |
||
7628 | |||
7629 | #define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) \ |
||
7630 | ((__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\ |
||
7631 | (void const *)(addr), \ |
||
7632 | (__v8di)(__m512i)(index), \ |
||
7633 | (__mmask8)(mask), (int)(scale))) |
||
7634 | |||
7635 | #define _mm512_i64gather_epi32(index, addr, scale) \ |
||
7636 | ((__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_si256(), \ |
||
7637 | (void const *)(addr), \ |
||
7638 | (__v8di)(__m512i)(index), \ |
||
7639 | (__mmask8)-1, (int)(scale))) |
||
7640 | |||
7641 | #define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) \ |
||
7642 | ((__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \ |
||
7643 | (void const *)(addr), \ |
||
7644 | (__v8di)(__m512i)(index), \ |
||
7645 | (__mmask8)(mask), (int)(scale))) |
||
7646 | |||
7647 | #define _mm512_i64gather_pd(index, addr, scale) \ |
||
7648 | ((__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \ |
||
7649 | (void const *)(addr), \ |
||
7650 | (__v8di)(__m512i)(index), (__mmask8)-1, \ |
||
7651 | (int)(scale))) |
||
7652 | |||
7653 | #define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) \ |
||
7654 | ((__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \ |
||
7655 | (void const *)(addr), \ |
||
7656 | (__v8di)(__m512i)(index), \ |
||
7657 | (__mmask8)(mask), (int)(scale))) |
||
7658 | |||
7659 | #define _mm512_i64gather_epi64(index, addr, scale) \ |
||
7660 | ((__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_epi32(), \ |
||
7661 | (void const *)(addr), \ |
||
7662 | (__v8di)(__m512i)(index), (__mmask8)-1, \ |
||
7663 | (int)(scale))) |
||
7664 | |||
7665 | #define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) \ |
||
7666 | ((__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \ |
||
7667 | (void const *)(addr), \ |
||
7668 | (__v8di)(__m512i)(index), \ |
||
7669 | (__mmask8)(mask), (int)(scale))) |
||
7670 | |||
7671 | #define _mm512_i32gather_ps(index, addr, scale) \ |
||
7672 | ((__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \ |
||
7673 | (void const *)(addr), \ |
||
7674 | (__v16si)(__m512)(index), \ |
||
7675 | (__mmask16)-1, (int)(scale))) |
||
7676 | |||
7677 | #define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) \ |
||
7678 | ((__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \ |
||
7679 | (void const *)(addr), \ |
||
7680 | (__v16si)(__m512)(index), \ |
||
7681 | (__mmask16)(mask), (int)(scale))) |
||
7682 | |||
7683 | #define _mm512_i32gather_epi32(index, addr, scale) \ |
||
7684 | ((__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \ |
||
7685 | (void const *)(addr), \ |
||
7686 | (__v16si)(__m512i)(index), \ |
||
7687 | (__mmask16)-1, (int)(scale))) |
||
7688 | |||
7689 | #define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) \ |
||
7690 | ((__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \ |
||
7691 | (void const *)(addr), \ |
||
7692 | (__v16si)(__m512i)(index), \ |
||
7693 | (__mmask16)(mask), (int)(scale))) |
||
7694 | |||
7695 | #define _mm512_i32gather_pd(index, addr, scale) \ |
||
7696 | ((__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \ |
||
7697 | (void const *)(addr), \ |
||
7698 | (__v8si)(__m256i)(index), (__mmask8)-1, \ |
||
7699 | (int)(scale))) |
||
7700 | |||
7701 | #define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) \ |
||
7702 | ((__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \ |
||
7703 | (void const *)(addr), \ |
||
7704 | (__v8si)(__m256i)(index), \ |
||
7705 | (__mmask8)(mask), (int)(scale))) |
||
7706 | |||
7707 | #define _mm512_i32gather_epi64(index, addr, scale) \ |
||
7708 | ((__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \ |
||
7709 | (void const *)(addr), \ |
||
7710 | (__v8si)(__m256i)(index), (__mmask8)-1, \ |
||
7711 | (int)(scale))) |
||
7712 | |||
7713 | #define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) \ |
||
7714 | ((__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \ |
||
7715 | (void const *)(addr), \ |
||
7716 | (__v8si)(__m256i)(index), \ |
||
7717 | (__mmask8)(mask), (int)(scale))) |
||
7718 | |||
7719 | #define _mm512_i64scatter_ps(addr, index, v1, scale) \ |
||
7720 | __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)-1, \ |
||
7721 | (__v8di)(__m512i)(index), \ |
||
7722 | (__v8sf)(__m256)(v1), (int)(scale)) |
||
7723 | |||
7724 | #define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) \ |
||
7725 | __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)(mask), \ |
||
7726 | (__v8di)(__m512i)(index), \ |
||
7727 | (__v8sf)(__m256)(v1), (int)(scale)) |
||
7728 | |||
7729 | #define _mm512_i64scatter_epi32(addr, index, v1, scale) \ |
||
7730 | __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)-1, \ |
||
7731 | (__v8di)(__m512i)(index), \ |
||
7732 | (__v8si)(__m256i)(v1), (int)(scale)) |
||
7733 | |||
7734 | #define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) \ |
||
7735 | __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)(mask), \ |
||
7736 | (__v8di)(__m512i)(index), \ |
||
7737 | (__v8si)(__m256i)(v1), (int)(scale)) |
||
7738 | |||
7739 | #define _mm512_i64scatter_pd(addr, index, v1, scale) \ |
||
7740 | __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)-1, \ |
||
7741 | (__v8di)(__m512i)(index), \ |
||
7742 | (__v8df)(__m512d)(v1), (int)(scale)) |
||
7743 | |||
7744 | #define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) \ |
||
7745 | __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)(mask), \ |
||
7746 | (__v8di)(__m512i)(index), \ |
||
7747 | (__v8df)(__m512d)(v1), (int)(scale)) |
||
7748 | |||
7749 | #define _mm512_i64scatter_epi64(addr, index, v1, scale) \ |
||
7750 | __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)-1, \ |
||
7751 | (__v8di)(__m512i)(index), \ |
||
7752 | (__v8di)(__m512i)(v1), (int)(scale)) |
||
7753 | |||
7754 | #define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) \ |
||
7755 | __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)(mask), \ |
||
7756 | (__v8di)(__m512i)(index), \ |
||
7757 | (__v8di)(__m512i)(v1), (int)(scale)) |
||
7758 | |||
7759 | #define _mm512_i32scatter_ps(addr, index, v1, scale) \ |
||
7760 | __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)-1, \ |
||
7761 | (__v16si)(__m512i)(index), \ |
||
7762 | (__v16sf)(__m512)(v1), (int)(scale)) |
||
7763 | |||
7764 | #define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) \ |
||
7765 | __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)(mask), \ |
||
7766 | (__v16si)(__m512i)(index), \ |
||
7767 | (__v16sf)(__m512)(v1), (int)(scale)) |
||
7768 | |||
7769 | #define _mm512_i32scatter_epi32(addr, index, v1, scale) \ |
||
7770 | __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)-1, \ |
||
7771 | (__v16si)(__m512i)(index), \ |
||
7772 | (__v16si)(__m512i)(v1), (int)(scale)) |
||
7773 | |||
7774 | #define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) \ |
||
7775 | __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)(mask), \ |
||
7776 | (__v16si)(__m512i)(index), \ |
||
7777 | (__v16si)(__m512i)(v1), (int)(scale)) |
||
7778 | |||
7779 | #define _mm512_i32scatter_pd(addr, index, v1, scale) \ |
||
7780 | __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)-1, \ |
||
7781 | (__v8si)(__m256i)(index), \ |
||
7782 | (__v8df)(__m512d)(v1), (int)(scale)) |
||
7783 | |||
7784 | #define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) \ |
||
7785 | __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)(mask), \ |
||
7786 | (__v8si)(__m256i)(index), \ |
||
7787 | (__v8df)(__m512d)(v1), (int)(scale)) |
||
7788 | |||
7789 | #define _mm512_i32scatter_epi64(addr, index, v1, scale) \ |
||
7790 | __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)-1, \ |
||
7791 | (__v8si)(__m256i)(index), \ |
||
7792 | (__v8di)(__m512i)(v1), (int)(scale)) |
||
7793 | |||
7794 | #define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) \ |
||
7795 | __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)(mask), \ |
||
7796 | (__v8si)(__m256i)(index), \ |
||
7797 | (__v8di)(__m512i)(v1), (int)(scale)) |
||
7798 | |||
7799 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
7800 | _mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) |
||
7801 | { |
||
7802 | return __builtin_ia32_vfmaddss3_mask((__v4sf)__W, |
||
7803 | (__v4sf)__A, |
||
7804 | (__v4sf)__B, |
||
7805 | (__mmask8)__U, |
||
7806 | _MM_FROUND_CUR_DIRECTION); |
||
7807 | } |
||
7808 | |||
7809 | #define _mm_fmadd_round_ss(A, B, C, R) \ |
||
7810 | ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \ |
||
7811 | (__v4sf)(__m128)(B), \ |
||
7812 | (__v4sf)(__m128)(C), (__mmask8)-1, \ |
||
7813 | (int)(R))) |
||
7814 | |||
7815 | #define _mm_mask_fmadd_round_ss(W, U, A, B, R) \ |
||
7816 | ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ |
||
7817 | (__v4sf)(__m128)(A), \ |
||
7818 | (__v4sf)(__m128)(B), (__mmask8)(U), \ |
||
7819 | (int)(R))) |
||
7820 | |||
7821 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
7822 | _mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) |
||
7823 | { |
||
7824 | return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A, |
||
7825 | (__v4sf)__B, |
||
7826 | (__v4sf)__C, |
||
7827 | (__mmask8)__U, |
||
7828 | _MM_FROUND_CUR_DIRECTION); |
||
7829 | } |
||
7830 | |||
7831 | #define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \ |
||
7832 | ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ |
||
7833 | (__v4sf)(__m128)(B), \ |
||
7834 | (__v4sf)(__m128)(C), (__mmask8)(U), \ |
||
7835 | (int)(R))) |
||
7836 | |||
7837 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
7838 | _mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) |
||
7839 | { |
||
7840 | return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W, |
||
7841 | (__v4sf)__X, |
||
7842 | (__v4sf)__Y, |
||
7843 | (__mmask8)__U, |
||
7844 | _MM_FROUND_CUR_DIRECTION); |
||
7845 | } |
||
7846 | |||
7847 | #define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) \ |
||
7848 | ((__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \ |
||
7849 | (__v4sf)(__m128)(X), \ |
||
7850 | (__v4sf)(__m128)(Y), (__mmask8)(U), \ |
||
7851 | (int)(R))) |
||
7852 | |||
7853 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
7854 | _mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) |
||
7855 | { |
||
7856 | return __builtin_ia32_vfmaddss3_mask((__v4sf)__W, |
||
7857 | (__v4sf)__A, |
||
7858 | -(__v4sf)__B, |
||
7859 | (__mmask8)__U, |
||
7860 | _MM_FROUND_CUR_DIRECTION); |
||
7861 | } |
||
7862 | |||
7863 | #define _mm_fmsub_round_ss(A, B, C, R) \ |
||
7864 | ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \ |
||
7865 | (__v4sf)(__m128)(B), \ |
||
7866 | -(__v4sf)(__m128)(C), (__mmask8)-1, \ |
||
7867 | (int)(R))) |
||
7868 | |||
7869 | #define _mm_mask_fmsub_round_ss(W, U, A, B, R) \ |
||
7870 | ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ |
||
7871 | (__v4sf)(__m128)(A), \ |
||
7872 | -(__v4sf)(__m128)(B), (__mmask8)(U), \ |
||
7873 | (int)(R))) |
||
7874 | |||
7875 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
7876 | _mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) |
||
7877 | { |
||
7878 | return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A, |
||
7879 | (__v4sf)__B, |
||
7880 | -(__v4sf)__C, |
||
7881 | (__mmask8)__U, |
||
7882 | _MM_FROUND_CUR_DIRECTION); |
||
7883 | } |
||
7884 | |||
7885 | #define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \ |
||
7886 | ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ |
||
7887 | (__v4sf)(__m128)(B), \ |
||
7888 | -(__v4sf)(__m128)(C), (__mmask8)(U), \ |
||
7889 | (int)(R))) |
||
7890 | |||
7891 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
7892 | _mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) |
||
7893 | { |
||
7894 | return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W, |
||
7895 | (__v4sf)__X, |
||
7896 | (__v4sf)__Y, |
||
7897 | (__mmask8)__U, |
||
7898 | _MM_FROUND_CUR_DIRECTION); |
||
7899 | } |
||
7900 | |||
7901 | #define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) \ |
||
7902 | ((__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \ |
||
7903 | (__v4sf)(__m128)(X), \ |
||
7904 | (__v4sf)(__m128)(Y), (__mmask8)(U), \ |
||
7905 | (int)(R))) |
||
7906 | |||
7907 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
7908 | _mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) |
||
7909 | { |
||
7910 | return __builtin_ia32_vfmaddss3_mask((__v4sf)__W, |
||
7911 | -(__v4sf)__A, |
||
7912 | (__v4sf)__B, |
||
7913 | (__mmask8)__U, |
||
7914 | _MM_FROUND_CUR_DIRECTION); |
||
7915 | } |
||
7916 | |||
7917 | #define _mm_fnmadd_round_ss(A, B, C, R) \ |
||
7918 | ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \ |
||
7919 | -(__v4sf)(__m128)(B), \ |
||
7920 | (__v4sf)(__m128)(C), (__mmask8)-1, \ |
||
7921 | (int)(R))) |
||
7922 | |||
7923 | #define _mm_mask_fnmadd_round_ss(W, U, A, B, R) \ |
||
7924 | ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ |
||
7925 | -(__v4sf)(__m128)(A), \ |
||
7926 | (__v4sf)(__m128)(B), (__mmask8)(U), \ |
||
7927 | (int)(R))) |
||
7928 | |||
7929 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
7930 | _mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) |
||
7931 | { |
||
7932 | return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A, |
||
7933 | -(__v4sf)__B, |
||
7934 | (__v4sf)__C, |
||
7935 | (__mmask8)__U, |
||
7936 | _MM_FROUND_CUR_DIRECTION); |
||
7937 | } |
||
7938 | |||
7939 | #define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \ |
||
7940 | ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ |
||
7941 | -(__v4sf)(__m128)(B), \ |
||
7942 | (__v4sf)(__m128)(C), (__mmask8)(U), \ |
||
7943 | (int)(R))) |
||
7944 | |||
7945 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
7946 | _mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) |
||
7947 | { |
||
7948 | return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W, |
||
7949 | -(__v4sf)__X, |
||
7950 | (__v4sf)__Y, |
||
7951 | (__mmask8)__U, |
||
7952 | _MM_FROUND_CUR_DIRECTION); |
||
7953 | } |
||
7954 | |||
7955 | #define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) \ |
||
7956 | ((__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \ |
||
7957 | -(__v4sf)(__m128)(X), \ |
||
7958 | (__v4sf)(__m128)(Y), (__mmask8)(U), \ |
||
7959 | (int)(R))) |
||
7960 | |||
7961 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
7962 | _mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) |
||
7963 | { |
||
7964 | return __builtin_ia32_vfmaddss3_mask((__v4sf)__W, |
||
7965 | -(__v4sf)__A, |
||
7966 | -(__v4sf)__B, |
||
7967 | (__mmask8)__U, |
||
7968 | _MM_FROUND_CUR_DIRECTION); |
||
7969 | } |
||
7970 | |||
7971 | #define _mm_fnmsub_round_ss(A, B, C, R) \ |
||
7972 | ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \ |
||
7973 | -(__v4sf)(__m128)(B), \ |
||
7974 | -(__v4sf)(__m128)(C), (__mmask8)-1, \ |
||
7975 | (int)(R))) |
||
7976 | |||
7977 | #define _mm_mask_fnmsub_round_ss(W, U, A, B, R) \ |
||
7978 | ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ |
||
7979 | -(__v4sf)(__m128)(A), \ |
||
7980 | -(__v4sf)(__m128)(B), (__mmask8)(U), \ |
||
7981 | (int)(R))) |
||
7982 | |||
7983 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
7984 | _mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) |
||
7985 | { |
||
7986 | return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A, |
||
7987 | -(__v4sf)__B, |
||
7988 | -(__v4sf)__C, |
||
7989 | (__mmask8)__U, |
||
7990 | _MM_FROUND_CUR_DIRECTION); |
||
7991 | } |
||
7992 | |||
7993 | #define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \ |
||
7994 | ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ |
||
7995 | -(__v4sf)(__m128)(B), \ |
||
7996 | -(__v4sf)(__m128)(C), (__mmask8)(U), \ |
||
7997 | (int)(R))) |
||
7998 | |||
7999 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
8000 | _mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) |
||
8001 | { |
||
8002 | return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W, |
||
8003 | -(__v4sf)__X, |
||
8004 | (__v4sf)__Y, |
||
8005 | (__mmask8)__U, |
||
8006 | _MM_FROUND_CUR_DIRECTION); |
||
8007 | } |
||
8008 | |||
8009 | #define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) \ |
||
8010 | ((__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \ |
||
8011 | -(__v4sf)(__m128)(X), \ |
||
8012 | (__v4sf)(__m128)(Y), (__mmask8)(U), \ |
||
8013 | (int)(R))) |
||
8014 | |||
8015 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
8016 | _mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) |
||
8017 | { |
||
8018 | return __builtin_ia32_vfmaddsd3_mask((__v2df)__W, |
||
8019 | (__v2df)__A, |
||
8020 | (__v2df)__B, |
||
8021 | (__mmask8)__U, |
||
8022 | _MM_FROUND_CUR_DIRECTION); |
||
8023 | } |
||
8024 | |||
8025 | #define _mm_fmadd_round_sd(A, B, C, R) \ |
||
8026 | ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \ |
||
8027 | (__v2df)(__m128d)(B), \ |
||
8028 | (__v2df)(__m128d)(C), (__mmask8)-1, \ |
||
8029 | (int)(R))) |
||
8030 | |||
8031 | #define _mm_mask_fmadd_round_sd(W, U, A, B, R) \ |
||
8032 | ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ |
||
8033 | (__v2df)(__m128d)(A), \ |
||
8034 | (__v2df)(__m128d)(B), (__mmask8)(U), \ |
||
8035 | (int)(R))) |
||
8036 | |||
8037 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
8038 | _mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) |
||
8039 | { |
||
8040 | return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A, |
||
8041 | (__v2df)__B, |
||
8042 | (__v2df)__C, |
||
8043 | (__mmask8)__U, |
||
8044 | _MM_FROUND_CUR_DIRECTION); |
||
8045 | } |
||
8046 | |||
8047 | #define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \ |
||
8048 | ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ |
||
8049 | (__v2df)(__m128d)(B), \ |
||
8050 | (__v2df)(__m128d)(C), (__mmask8)(U), \ |
||
8051 | (int)(R))) |
||
8052 | |||
8053 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
8054 | _mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) |
||
8055 | { |
||
8056 | return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W, |
||
8057 | (__v2df)__X, |
||
8058 | (__v2df)__Y, |
||
8059 | (__mmask8)__U, |
||
8060 | _MM_FROUND_CUR_DIRECTION); |
||
8061 | } |
||
8062 | |||
8063 | #define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) \ |
||
8064 | ((__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \ |
||
8065 | (__v2df)(__m128d)(X), \ |
||
8066 | (__v2df)(__m128d)(Y), (__mmask8)(U), \ |
||
8067 | (int)(R))) |
||
8068 | |||
8069 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
8070 | _mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) |
||
8071 | { |
||
8072 | return __builtin_ia32_vfmaddsd3_mask((__v2df)__W, |
||
8073 | (__v2df)__A, |
||
8074 | -(__v2df)__B, |
||
8075 | (__mmask8)__U, |
||
8076 | _MM_FROUND_CUR_DIRECTION); |
||
8077 | } |
||
8078 | |||
8079 | #define _mm_fmsub_round_sd(A, B, C, R) \ |
||
8080 | ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \ |
||
8081 | (__v2df)(__m128d)(B), \ |
||
8082 | -(__v2df)(__m128d)(C), (__mmask8)-1, \ |
||
8083 | (int)(R))) |
||
8084 | |||
8085 | #define _mm_mask_fmsub_round_sd(W, U, A, B, R) \ |
||
8086 | ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ |
||
8087 | (__v2df)(__m128d)(A), \ |
||
8088 | -(__v2df)(__m128d)(B), (__mmask8)(U), \ |
||
8089 | (int)(R))) |
||
8090 | |||
8091 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
8092 | _mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) |
||
8093 | { |
||
8094 | return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A, |
||
8095 | (__v2df)__B, |
||
8096 | -(__v2df)__C, |
||
8097 | (__mmask8)__U, |
||
8098 | _MM_FROUND_CUR_DIRECTION); |
||
8099 | } |
||
8100 | |||
8101 | #define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \ |
||
8102 | ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ |
||
8103 | (__v2df)(__m128d)(B), \ |
||
8104 | -(__v2df)(__m128d)(C), \ |
||
8105 | (__mmask8)(U), (int)(R))) |
||
8106 | |||
8107 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
8108 | _mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) |
||
8109 | { |
||
8110 | return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W, |
||
8111 | (__v2df)__X, |
||
8112 | (__v2df)__Y, |
||
8113 | (__mmask8)__U, |
||
8114 | _MM_FROUND_CUR_DIRECTION); |
||
8115 | } |
||
8116 | |||
8117 | #define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) \ |
||
8118 | ((__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \ |
||
8119 | (__v2df)(__m128d)(X), \ |
||
8120 | (__v2df)(__m128d)(Y), \ |
||
8121 | (__mmask8)(U), (int)(R))) |
||
8122 | |||
8123 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
8124 | _mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) |
||
8125 | { |
||
8126 | return __builtin_ia32_vfmaddsd3_mask((__v2df)__W, |
||
8127 | -(__v2df)__A, |
||
8128 | (__v2df)__B, |
||
8129 | (__mmask8)__U, |
||
8130 | _MM_FROUND_CUR_DIRECTION); |
||
8131 | } |
||
8132 | |||
8133 | #define _mm_fnmadd_round_sd(A, B, C, R) \ |
||
8134 | ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \ |
||
8135 | -(__v2df)(__m128d)(B), \ |
||
8136 | (__v2df)(__m128d)(C), (__mmask8)-1, \ |
||
8137 | (int)(R))) |
||
8138 | |||
8139 | #define _mm_mask_fnmadd_round_sd(W, U, A, B, R) \ |
||
8140 | ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ |
||
8141 | -(__v2df)(__m128d)(A), \ |
||
8142 | (__v2df)(__m128d)(B), (__mmask8)(U), \ |
||
8143 | (int)(R))) |
||
8144 | |||
8145 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
8146 | _mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) |
||
8147 | { |
||
8148 | return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A, |
||
8149 | -(__v2df)__B, |
||
8150 | (__v2df)__C, |
||
8151 | (__mmask8)__U, |
||
8152 | _MM_FROUND_CUR_DIRECTION); |
||
8153 | } |
||
8154 | |||
8155 | #define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \ |
||
8156 | ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ |
||
8157 | -(__v2df)(__m128d)(B), \ |
||
8158 | (__v2df)(__m128d)(C), (__mmask8)(U), \ |
||
8159 | (int)(R))) |
||
8160 | |||
8161 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
8162 | _mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) |
||
8163 | { |
||
8164 | return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W, |
||
8165 | -(__v2df)__X, |
||
8166 | (__v2df)__Y, |
||
8167 | (__mmask8)__U, |
||
8168 | _MM_FROUND_CUR_DIRECTION); |
||
8169 | } |
||
8170 | |||
8171 | #define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) \ |
||
8172 | ((__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \ |
||
8173 | -(__v2df)(__m128d)(X), \ |
||
8174 | (__v2df)(__m128d)(Y), (__mmask8)(U), \ |
||
8175 | (int)(R))) |
||
8176 | |||
8177 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
8178 | _mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) |
||
8179 | { |
||
8180 | return __builtin_ia32_vfmaddsd3_mask((__v2df)__W, |
||
8181 | -(__v2df)__A, |
||
8182 | -(__v2df)__B, |
||
8183 | (__mmask8)__U, |
||
8184 | _MM_FROUND_CUR_DIRECTION); |
||
8185 | } |
||
8186 | |||
8187 | #define _mm_fnmsub_round_sd(A, B, C, R) \ |
||
8188 | ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \ |
||
8189 | -(__v2df)(__m128d)(B), \ |
||
8190 | -(__v2df)(__m128d)(C), (__mmask8)-1, \ |
||
8191 | (int)(R))) |
||
8192 | |||
8193 | #define _mm_mask_fnmsub_round_sd(W, U, A, B, R) \ |
||
8194 | ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ |
||
8195 | -(__v2df)(__m128d)(A), \ |
||
8196 | -(__v2df)(__m128d)(B), (__mmask8)(U), \ |
||
8197 | (int)(R))) |
||
8198 | |||
8199 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
8200 | _mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) |
||
8201 | { |
||
8202 | return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A, |
||
8203 | -(__v2df)__B, |
||
8204 | -(__v2df)__C, |
||
8205 | (__mmask8)__U, |
||
8206 | _MM_FROUND_CUR_DIRECTION); |
||
8207 | } |
||
8208 | |||
8209 | #define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \ |
||
8210 | ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ |
||
8211 | -(__v2df)(__m128d)(B), \ |
||
8212 | -(__v2df)(__m128d)(C), \ |
||
8213 | (__mmask8)(U), \ |
||
8214 | (int)(R))) |
||
8215 | |||
8216 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
8217 | _mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) |
||
8218 | { |
||
8219 | return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W, |
||
8220 | -(__v2df)__X, |
||
8221 | (__v2df)__Y, |
||
8222 | (__mmask8)__U, |
||
8223 | _MM_FROUND_CUR_DIRECTION); |
||
8224 | } |
||
8225 | |||
8226 | #define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) \ |
||
8227 | ((__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \ |
||
8228 | -(__v2df)(__m128d)(X), \ |
||
8229 | (__v2df)(__m128d)(Y), \ |
||
8230 | (__mmask8)(U), (int)(R))) |
||
8231 | |||
8232 | #define _mm512_permutex_pd(X, C) \ |
||
8233 | ((__m512d)__builtin_ia32_permdf512((__v8df)(__m512d)(X), (int)(C))) |
||
8234 | |||
8235 | #define _mm512_mask_permutex_pd(W, U, X, C) \ |
||
8236 | ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ |
||
8237 | (__v8df)_mm512_permutex_pd((X), (C)), \ |
||
8238 | (__v8df)(__m512d)(W))) |
||
8239 | |||
8240 | #define _mm512_maskz_permutex_pd(U, X, C) \ |
||
8241 | ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ |
||
8242 | (__v8df)_mm512_permutex_pd((X), (C)), \ |
||
8243 | (__v8df)_mm512_setzero_pd())) |
||
8244 | |||
8245 | #define _mm512_permutex_epi64(X, C) \ |
||
8246 | ((__m512i)__builtin_ia32_permdi512((__v8di)(__m512i)(X), (int)(C))) |
||
8247 | |||
8248 | #define _mm512_mask_permutex_epi64(W, U, X, C) \ |
||
8249 | ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ |
||
8250 | (__v8di)_mm512_permutex_epi64((X), (C)), \ |
||
8251 | (__v8di)(__m512i)(W))) |
||
8252 | |||
8253 | #define _mm512_maskz_permutex_epi64(U, X, C) \ |
||
8254 | ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ |
||
8255 | (__v8di)_mm512_permutex_epi64((X), (C)), \ |
||
8256 | (__v8di)_mm512_setzero_si512())) |
||
8257 | |||
8258 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
8259 | _mm512_permutexvar_pd (__m512i __X, __m512d __Y) |
||
8260 | { |
||
8261 | return (__m512d)__builtin_ia32_permvardf512((__v8df) __Y, (__v8di) __X); |
||
8262 | } |
||
8263 | |||
8264 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
8265 | _mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y) |
||
8266 | { |
||
8267 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, |
||
8268 | (__v8df)_mm512_permutexvar_pd(__X, __Y), |
||
8269 | (__v8df)__W); |
||
8270 | } |
||
8271 | |||
8272 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
8273 | _mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y) |
||
8274 | { |
||
8275 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, |
||
8276 | (__v8df)_mm512_permutexvar_pd(__X, __Y), |
||
8277 | (__v8df)_mm512_setzero_pd()); |
||
8278 | } |
||
8279 | |||
8280 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
8281 | _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y) |
||
8282 | { |
||
8283 | return (__m512i)__builtin_ia32_permvardi512((__v8di)__Y, (__v8di)__X); |
||
8284 | } |
||
8285 | |||
8286 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
8287 | _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y) |
||
8288 | { |
||
8289 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, |
||
8290 | (__v8di)_mm512_permutexvar_epi64(__X, __Y), |
||
8291 | (__v8di)_mm512_setzero_si512()); |
||
8292 | } |
||
8293 | |||
8294 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
8295 | _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X, |
||
8296 | __m512i __Y) |
||
8297 | { |
||
8298 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, |
||
8299 | (__v8di)_mm512_permutexvar_epi64(__X, __Y), |
||
8300 | (__v8di)__W); |
||
8301 | } |
||
8302 | |||
8303 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
8304 | _mm512_permutexvar_ps (__m512i __X, __m512 __Y) |
||
8305 | { |
||
8306 | return (__m512)__builtin_ia32_permvarsf512((__v16sf)__Y, (__v16si)__X); |
||
8307 | } |
||
8308 | |||
8309 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
8310 | _mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y) |
||
8311 | { |
||
8312 | return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, |
||
8313 | (__v16sf)_mm512_permutexvar_ps(__X, __Y), |
||
8314 | (__v16sf)__W); |
||
8315 | } |
||
8316 | |||
8317 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
8318 | _mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y) |
||
8319 | { |
||
8320 | return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, |
||
8321 | (__v16sf)_mm512_permutexvar_ps(__X, __Y), |
||
8322 | (__v16sf)_mm512_setzero_ps()); |
||
8323 | } |
||
8324 | |||
8325 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
8326 | _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y) |
||
8327 | { |
||
8328 | return (__m512i)__builtin_ia32_permvarsi512((__v16si)__Y, (__v16si)__X); |
||
8329 | } |
||
8330 | |||
8331 | #define _mm512_permutevar_epi32 _mm512_permutexvar_epi32 |
||
8332 | |||
8333 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
8334 | _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y) |
||
8335 | { |
||
8336 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, |
||
8337 | (__v16si)_mm512_permutexvar_epi32(__X, __Y), |
||
8338 | (__v16si)_mm512_setzero_si512()); |
||
8339 | } |
||
8340 | |||
8341 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
8342 | _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X, |
||
8343 | __m512i __Y) |
||
8344 | { |
||
8345 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, |
||
8346 | (__v16si)_mm512_permutexvar_epi32(__X, __Y), |
||
8347 | (__v16si)__W); |
||
8348 | } |
||
8349 | |||
8350 | #define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32 |
||
8351 | |||
8352 | static __inline__ __mmask16 __DEFAULT_FN_ATTRS |
||
8353 | _mm512_kand (__mmask16 __A, __mmask16 __B) |
||
8354 | { |
||
8355 | return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B); |
||
8356 | } |
||
8357 | |||
8358 | static __inline__ __mmask16 __DEFAULT_FN_ATTRS |
||
8359 | _mm512_kandn (__mmask16 __A, __mmask16 __B) |
||
8360 | { |
||
8361 | return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B); |
||
8362 | } |
||
8363 | |||
8364 | static __inline__ __mmask16 __DEFAULT_FN_ATTRS |
||
8365 | _mm512_kor (__mmask16 __A, __mmask16 __B) |
||
8366 | { |
||
8367 | return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B); |
||
8368 | } |
||
8369 | |||
8370 | static __inline__ int __DEFAULT_FN_ATTRS |
||
8371 | _mm512_kortestc (__mmask16 __A, __mmask16 __B) |
||
8372 | { |
||
8373 | return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B); |
||
8374 | } |
||
8375 | |||
8376 | static __inline__ int __DEFAULT_FN_ATTRS |
||
8377 | _mm512_kortestz (__mmask16 __A, __mmask16 __B) |
||
8378 | { |
||
8379 | return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B); |
||
8380 | } |
||
8381 | |||
8382 | static __inline__ unsigned char __DEFAULT_FN_ATTRS |
||
8383 | _kortestc_mask16_u8(__mmask16 __A, __mmask16 __B) |
||
8384 | { |
||
8385 | return (unsigned char)__builtin_ia32_kortestchi(__A, __B); |
||
8386 | } |
||
8387 | |||
8388 | static __inline__ unsigned char __DEFAULT_FN_ATTRS |
||
8389 | _kortestz_mask16_u8(__mmask16 __A, __mmask16 __B) |
||
8390 | { |
||
8391 | return (unsigned char)__builtin_ia32_kortestzhi(__A, __B); |
||
8392 | } |
||
8393 | |||
8394 | static __inline__ unsigned char __DEFAULT_FN_ATTRS |
||
8395 | _kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) { |
||
8396 | *__C = (unsigned char)__builtin_ia32_kortestchi(__A, __B); |
||
8397 | return (unsigned char)__builtin_ia32_kortestzhi(__A, __B); |
||
8398 | } |
||
8399 | |||
8400 | static __inline__ __mmask16 __DEFAULT_FN_ATTRS |
||
8401 | _mm512_kunpackb (__mmask16 __A, __mmask16 __B) |
||
8402 | { |
||
8403 | return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B); |
||
8404 | } |
||
8405 | |||
8406 | static __inline__ __mmask16 __DEFAULT_FN_ATTRS |
||
8407 | _mm512_kxnor (__mmask16 __A, __mmask16 __B) |
||
8408 | { |
||
8409 | return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B); |
||
8410 | } |
||
8411 | |||
8412 | static __inline__ __mmask16 __DEFAULT_FN_ATTRS |
||
8413 | _mm512_kxor (__mmask16 __A, __mmask16 __B) |
||
8414 | { |
||
8415 | return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B); |
||
8416 | } |
||
8417 | |||
8418 | #define _kand_mask16 _mm512_kand |
||
8419 | #define _kandn_mask16 _mm512_kandn |
||
8420 | #define _knot_mask16 _mm512_knot |
||
8421 | #define _kor_mask16 _mm512_kor |
||
8422 | #define _kxnor_mask16 _mm512_kxnor |
||
8423 | #define _kxor_mask16 _mm512_kxor |
||
8424 | |||
8425 | #define _kshiftli_mask16(A, I) \ |
||
8426 | ((__mmask16)__builtin_ia32_kshiftlihi((__mmask16)(A), (unsigned int)(I))) |
||
8427 | |||
8428 | #define _kshiftri_mask16(A, I) \ |
||
8429 | ((__mmask16)__builtin_ia32_kshiftrihi((__mmask16)(A), (unsigned int)(I))) |
||
8430 | |||
8431 | static __inline__ unsigned int __DEFAULT_FN_ATTRS |
||
8432 | _cvtmask16_u32(__mmask16 __A) { |
||
8433 | return (unsigned int)__builtin_ia32_kmovw((__mmask16)__A); |
||
8434 | } |
||
8435 | |||
8436 | static __inline__ __mmask16 __DEFAULT_FN_ATTRS |
||
8437 | _cvtu32_mask16(unsigned int __A) { |
||
8438 | return (__mmask16)__builtin_ia32_kmovw((__mmask16)__A); |
||
8439 | } |
||
8440 | |||
8441 | static __inline__ __mmask16 __DEFAULT_FN_ATTRS |
||
8442 | _load_mask16(__mmask16 *__A) { |
||
8443 | return (__mmask16)__builtin_ia32_kmovw(*(__mmask16 *)__A); |
||
8444 | } |
||
8445 | |||
8446 | static __inline__ void __DEFAULT_FN_ATTRS |
||
8447 | _store_mask16(__mmask16 *__A, __mmask16 __B) { |
||
8448 | *(__mmask16 *)__A = __builtin_ia32_kmovw((__mmask16)__B); |
||
8449 | } |
||
8450 | |||
8451 | static __inline__ void __DEFAULT_FN_ATTRS512 |
||
8452 | _mm512_stream_si512 (void * __P, __m512i __A) |
||
8453 | { |
||
8454 | typedef __v8di __v8di_aligned __attribute__((aligned(64))); |
||
8455 | __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)__P); |
||
8456 | } |
||
8457 | |||
8458 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
8459 | _mm512_stream_load_si512 (void const *__P) |
||
8460 | { |
||
8461 | typedef __v8di __v8di_aligned __attribute__((aligned(64))); |
||
8462 | return (__m512i) __builtin_nontemporal_load((const __v8di_aligned *)__P); |
||
8463 | } |
||
8464 | |||
8465 | static __inline__ void __DEFAULT_FN_ATTRS512 |
||
8466 | _mm512_stream_pd (void *__P, __m512d __A) |
||
8467 | { |
||
8468 | typedef __v8df __v8df_aligned __attribute__((aligned(64))); |
||
8469 | __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)__P); |
||
8470 | } |
||
8471 | |||
8472 | static __inline__ void __DEFAULT_FN_ATTRS512 |
||
8473 | _mm512_stream_ps (void *__P, __m512 __A) |
||
8474 | { |
||
8475 | typedef __v16sf __v16sf_aligned __attribute__((aligned(64))); |
||
8476 | __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)__P); |
||
8477 | } |
||
8478 | |||
8479 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
8480 | _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A) |
||
8481 | { |
||
8482 | return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A, |
||
8483 | (__v8df) __W, |
||
8484 | (__mmask8) __U); |
||
8485 | } |
||
8486 | |||
8487 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
8488 | _mm512_maskz_compress_pd (__mmask8 __U, __m512d __A) |
||
8489 | { |
||
8490 | return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A, |
||
8491 | (__v8df) |
||
8492 | _mm512_setzero_pd (), |
||
8493 | (__mmask8) __U); |
||
8494 | } |
||
8495 | |||
8496 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
8497 | _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A) |
||
8498 | { |
||
8499 | return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A, |
||
8500 | (__v8di) __W, |
||
8501 | (__mmask8) __U); |
||
8502 | } |
||
8503 | |||
8504 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
8505 | _mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A) |
||
8506 | { |
||
8507 | return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A, |
||
8508 | (__v8di) |
||
8509 | _mm512_setzero_si512 (), |
||
8510 | (__mmask8) __U); |
||
8511 | } |
||
8512 | |||
8513 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
8514 | _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A) |
||
8515 | { |
||
8516 | return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A, |
||
8517 | (__v16sf) __W, |
||
8518 | (__mmask16) __U); |
||
8519 | } |
||
8520 | |||
8521 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
8522 | _mm512_maskz_compress_ps (__mmask16 __U, __m512 __A) |
||
8523 | { |
||
8524 | return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A, |
||
8525 | (__v16sf) |
||
8526 | _mm512_setzero_ps (), |
||
8527 | (__mmask16) __U); |
||
8528 | } |
||
8529 | |||
8530 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
8531 | _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A) |
||
8532 | { |
||
8533 | return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A, |
||
8534 | (__v16si) __W, |
||
8535 | (__mmask16) __U); |
||
8536 | } |
||
8537 | |||
8538 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
8539 | _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A) |
||
8540 | { |
||
8541 | return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A, |
||
8542 | (__v16si) |
||
8543 | _mm512_setzero_si512 (), |
||
8544 | (__mmask16) __U); |
||
8545 | } |
||
8546 | |||
8547 | #define _mm_cmp_round_ss_mask(X, Y, P, R) \ |
||
8548 | ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ |
||
8549 | (__v4sf)(__m128)(Y), (int)(P), \ |
||
8550 | (__mmask8)-1, (int)(R))) |
||
8551 | |||
8552 | #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \ |
||
8553 | ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ |
||
8554 | (__v4sf)(__m128)(Y), (int)(P), \ |
||
8555 | (__mmask8)(M), (int)(R))) |
||
8556 | |||
8557 | #define _mm_cmp_ss_mask(X, Y, P) \ |
||
8558 | ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ |
||
8559 | (__v4sf)(__m128)(Y), (int)(P), \ |
||
8560 | (__mmask8)-1, \ |
||
8561 | _MM_FROUND_CUR_DIRECTION)) |
||
8562 | |||
8563 | #define _mm_mask_cmp_ss_mask(M, X, Y, P) \ |
||
8564 | ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ |
||
8565 | (__v4sf)(__m128)(Y), (int)(P), \ |
||
8566 | (__mmask8)(M), \ |
||
8567 | _MM_FROUND_CUR_DIRECTION)) |
||
8568 | |||
8569 | #define _mm_cmp_round_sd_mask(X, Y, P, R) \ |
||
8570 | ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ |
||
8571 | (__v2df)(__m128d)(Y), (int)(P), \ |
||
8572 | (__mmask8)-1, (int)(R))) |
||
8573 | |||
8574 | #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \ |
||
8575 | ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ |
||
8576 | (__v2df)(__m128d)(Y), (int)(P), \ |
||
8577 | (__mmask8)(M), (int)(R))) |
||
8578 | |||
8579 | #define _mm_cmp_sd_mask(X, Y, P) \ |
||
8580 | ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ |
||
8581 | (__v2df)(__m128d)(Y), (int)(P), \ |
||
8582 | (__mmask8)-1, \ |
||
8583 | _MM_FROUND_CUR_DIRECTION)) |
||
8584 | |||
8585 | #define _mm_mask_cmp_sd_mask(M, X, Y, P) \ |
||
8586 | ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ |
||
8587 | (__v2df)(__m128d)(Y), (int)(P), \ |
||
8588 | (__mmask8)(M), \ |
||
8589 | _MM_FROUND_CUR_DIRECTION)) |
||
8590 | |||
8591 | /* Bit Test */ |
||
8592 | |||
8593 | static __inline __mmask16 __DEFAULT_FN_ATTRS512 |
||
8594 | _mm512_test_epi32_mask (__m512i __A, __m512i __B) |
||
8595 | { |
||
8596 | return _mm512_cmpneq_epi32_mask (_mm512_and_epi32(__A, __B), |
||
8597 | _mm512_setzero_si512()); |
||
8598 | } |
||
8599 | |||
8600 | static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 |
||
8601 | _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) |
||
8602 | { |
||
8603 | return _mm512_mask_cmpneq_epi32_mask (__U, _mm512_and_epi32 (__A, __B), |
||
8604 | _mm512_setzero_si512()); |
||
8605 | } |
||
8606 | |||
8607 | static __inline __mmask8 __DEFAULT_FN_ATTRS512 |
||
8608 | _mm512_test_epi64_mask (__m512i __A, __m512i __B) |
||
8609 | { |
||
8610 | return _mm512_cmpneq_epi64_mask (_mm512_and_epi32 (__A, __B), |
||
8611 | _mm512_setzero_si512()); |
||
8612 | } |
||
8613 | |||
8614 | static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 |
||
8615 | _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) |
||
8616 | { |
||
8617 | return _mm512_mask_cmpneq_epi64_mask (__U, _mm512_and_epi32 (__A, __B), |
||
8618 | _mm512_setzero_si512()); |
||
8619 | } |
||
8620 | |||
8621 | static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 |
||
8622 | _mm512_testn_epi32_mask (__m512i __A, __m512i __B) |
||
8623 | { |
||
8624 | return _mm512_cmpeq_epi32_mask (_mm512_and_epi32 (__A, __B), |
||
8625 | _mm512_setzero_si512()); |
||
8626 | } |
||
8627 | |||
8628 | static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 |
||
8629 | _mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) |
||
8630 | { |
||
8631 | return _mm512_mask_cmpeq_epi32_mask (__U, _mm512_and_epi32 (__A, __B), |
||
8632 | _mm512_setzero_si512()); |
||
8633 | } |
||
8634 | |||
8635 | static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 |
||
8636 | _mm512_testn_epi64_mask (__m512i __A, __m512i __B) |
||
8637 | { |
||
8638 | return _mm512_cmpeq_epi64_mask (_mm512_and_epi32 (__A, __B), |
||
8639 | _mm512_setzero_si512()); |
||
8640 | } |
||
8641 | |||
8642 | static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 |
||
8643 | _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) |
||
8644 | { |
||
8645 | return _mm512_mask_cmpeq_epi64_mask (__U, _mm512_and_epi32 (__A, __B), |
||
8646 | _mm512_setzero_si512()); |
||
8647 | } |
||
8648 | |||
8649 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
8650 | _mm512_movehdup_ps (__m512 __A) |
||
8651 | { |
||
8652 | return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A, |
||
8653 | 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15); |
||
8654 | } |
||
8655 | |||
8656 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
8657 | _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A) |
||
8658 | { |
||
8659 | return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, |
||
8660 | (__v16sf)_mm512_movehdup_ps(__A), |
||
8661 | (__v16sf)__W); |
||
8662 | } |
||
8663 | |||
8664 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
8665 | _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A) |
||
8666 | { |
||
8667 | return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, |
||
8668 | (__v16sf)_mm512_movehdup_ps(__A), |
||
8669 | (__v16sf)_mm512_setzero_ps()); |
||
8670 | } |
||
8671 | |||
8672 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
8673 | _mm512_moveldup_ps (__m512 __A) |
||
8674 | { |
||
8675 | return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A, |
||
8676 | 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14); |
||
8677 | } |
||
8678 | |||
8679 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
8680 | _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A) |
||
8681 | { |
||
8682 | return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, |
||
8683 | (__v16sf)_mm512_moveldup_ps(__A), |
||
8684 | (__v16sf)__W); |
||
8685 | } |
||
8686 | |||
8687 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
8688 | _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A) |
||
8689 | { |
||
8690 | return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, |
||
8691 | (__v16sf)_mm512_moveldup_ps(__A), |
||
8692 | (__v16sf)_mm512_setzero_ps()); |
||
8693 | } |
||
8694 | |||
8695 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
8696 | _mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) |
||
8697 | { |
||
8698 | return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), __W); |
||
8699 | } |
||
8700 | |||
8701 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
8702 | _mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B) |
||
8703 | { |
||
8704 | return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), |
||
8705 | _mm_setzero_ps()); |
||
8706 | } |
||
8707 | |||
8708 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
8709 | _mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) |
||
8710 | { |
||
8711 | return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), __W); |
||
8712 | } |
||
8713 | |||
8714 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
8715 | _mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B) |
||
8716 | { |
||
8717 | return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), |
||
8718 | _mm_setzero_pd()); |
||
8719 | } |
||
8720 | |||
8721 | static __inline__ void __DEFAULT_FN_ATTRS128 |
||
8722 | _mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A) |
||
8723 | { |
||
8724 | __builtin_ia32_storess128_mask ((__v4sf *)__W, __A, __U & 1); |
||
8725 | } |
||
8726 | |||
8727 | static __inline__ void __DEFAULT_FN_ATTRS128 |
||
8728 | _mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A) |
||
8729 | { |
||
8730 | __builtin_ia32_storesd128_mask ((__v2df *)__W, __A, __U & 1); |
||
8731 | } |
||
8732 | |||
8733 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
8734 | _mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A) |
||
8735 | { |
||
8736 | __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W, |
||
8737 | (__v4sf)_mm_setzero_ps(), |
||
8738 | 0, 4, 4, 4); |
||
8739 | |||
8740 | return (__m128) __builtin_ia32_loadss128_mask ((const __v4sf *) __A, src, __U & 1); |
||
8741 | } |
||
8742 | |||
8743 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
8744 | _mm_maskz_load_ss (__mmask8 __U, const float* __A) |
||
8745 | { |
||
8746 | return (__m128)__builtin_ia32_loadss128_mask ((const __v4sf *) __A, |
||
8747 | (__v4sf) _mm_setzero_ps(), |
||
8748 | __U & 1); |
||
8749 | } |
||
8750 | |||
8751 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
8752 | _mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A) |
||
8753 | { |
||
8754 | __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W, |
||
8755 | (__v2df)_mm_setzero_pd(), |
||
8756 | 0, 2); |
||
8757 | |||
8758 | return (__m128d) __builtin_ia32_loadsd128_mask ((const __v2df *) __A, src, __U & 1); |
||
8759 | } |
||
8760 | |||
8761 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
8762 | _mm_maskz_load_sd (__mmask8 __U, const double* __A) |
||
8763 | { |
||
8764 | return (__m128d) __builtin_ia32_loadsd128_mask ((const __v2df *) __A, |
||
8765 | (__v2df) _mm_setzero_pd(), |
||
8766 | __U & 1); |
||
8767 | } |
||
8768 | |||
8769 | #define _mm512_shuffle_epi32(A, I) \ |
||
8770 | ((__m512i)__builtin_ia32_pshufd512((__v16si)(__m512i)(A), (int)(I))) |
||
8771 | |||
8772 | #define _mm512_mask_shuffle_epi32(W, U, A, I) \ |
||
8773 | ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ |
||
8774 | (__v16si)_mm512_shuffle_epi32((A), (I)), \ |
||
8775 | (__v16si)(__m512i)(W))) |
||
8776 | |||
8777 | #define _mm512_maskz_shuffle_epi32(U, A, I) \ |
||
8778 | ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ |
||
8779 | (__v16si)_mm512_shuffle_epi32((A), (I)), \ |
||
8780 | (__v16si)_mm512_setzero_si512())) |
||
8781 | |||
8782 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
8783 | _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A) |
||
8784 | { |
||
8785 | return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A, |
||
8786 | (__v8df) __W, |
||
8787 | (__mmask8) __U); |
||
8788 | } |
||
8789 | |||
8790 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
8791 | _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A) |
||
8792 | { |
||
8793 | return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A, |
||
8794 | (__v8df) _mm512_setzero_pd (), |
||
8795 | (__mmask8) __U); |
||
8796 | } |
||
8797 | |||
8798 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
8799 | _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A) |
||
8800 | { |
||
8801 | return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A, |
||
8802 | (__v8di) __W, |
||
8803 | (__mmask8) __U); |
||
8804 | } |
||
8805 | |||
8806 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
8807 | _mm512_maskz_expand_epi64 ( __mmask8 __U, __m512i __A) |
||
8808 | { |
||
8809 | return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A, |
||
8810 | (__v8di) _mm512_setzero_si512 (), |
||
8811 | (__mmask8) __U); |
||
8812 | } |
||
8813 | |||
8814 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
8815 | _mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P) |
||
8816 | { |
||
8817 | return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P, |
||
8818 | (__v8df) __W, |
||
8819 | (__mmask8) __U); |
||
8820 | } |
||
8821 | |||
8822 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
8823 | _mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P) |
||
8824 | { |
||
8825 | return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P, |
||
8826 | (__v8df) _mm512_setzero_pd(), |
||
8827 | (__mmask8) __U); |
||
8828 | } |
||
8829 | |||
8830 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
8831 | _mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P) |
||
8832 | { |
||
8833 | return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P, |
||
8834 | (__v8di) __W, |
||
8835 | (__mmask8) __U); |
||
8836 | } |
||
8837 | |||
8838 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
8839 | _mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P) |
||
8840 | { |
||
8841 | return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P, |
||
8842 | (__v8di) _mm512_setzero_si512(), |
||
8843 | (__mmask8) __U); |
||
8844 | } |
||
8845 | |||
8846 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
8847 | _mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P) |
||
8848 | { |
||
8849 | return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P, |
||
8850 | (__v16sf) __W, |
||
8851 | (__mmask16) __U); |
||
8852 | } |
||
8853 | |||
8854 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
8855 | _mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P) |
||
8856 | { |
||
8857 | return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P, |
||
8858 | (__v16sf) _mm512_setzero_ps(), |
||
8859 | (__mmask16) __U); |
||
8860 | } |
||
8861 | |||
8862 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
8863 | _mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P) |
||
8864 | { |
||
8865 | return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P, |
||
8866 | (__v16si) __W, |
||
8867 | (__mmask16) __U); |
||
8868 | } |
||
8869 | |||
8870 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
8871 | _mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P) |
||
8872 | { |
||
8873 | return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P, |
||
8874 | (__v16si) _mm512_setzero_si512(), |
||
8875 | (__mmask16) __U); |
||
8876 | } |
||
8877 | |||
8878 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
8879 | _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A) |
||
8880 | { |
||
8881 | return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A, |
||
8882 | (__v16sf) __W, |
||
8883 | (__mmask16) __U); |
||
8884 | } |
||
8885 | |||
8886 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
8887 | _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A) |
||
8888 | { |
||
8889 | return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A, |
||
8890 | (__v16sf) _mm512_setzero_ps(), |
||
8891 | (__mmask16) __U); |
||
8892 | } |
||
8893 | |||
8894 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
8895 | _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A) |
||
8896 | { |
||
8897 | return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A, |
||
8898 | (__v16si) __W, |
||
8899 | (__mmask16) __U); |
||
8900 | } |
||
8901 | |||
8902 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
8903 | _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A) |
||
8904 | { |
||
8905 | return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A, |
||
8906 | (__v16si) _mm512_setzero_si512(), |
||
8907 | (__mmask16) __U); |
||
8908 | } |
||
8909 | |||
8910 | #define _mm512_cvt_roundps_pd(A, R) \ |
||
8911 | ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ |
||
8912 | (__v8df)_mm512_undefined_pd(), \ |
||
8913 | (__mmask8)-1, (int)(R))) |
||
8914 | |||
8915 | #define _mm512_mask_cvt_roundps_pd(W, U, A, R) \ |
||
8916 | ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ |
||
8917 | (__v8df)(__m512d)(W), \ |
||
8918 | (__mmask8)(U), (int)(R))) |
||
8919 | |||
8920 | #define _mm512_maskz_cvt_roundps_pd(U, A, R) \ |
||
8921 | ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ |
||
8922 | (__v8df)_mm512_setzero_pd(), \ |
||
8923 | (__mmask8)(U), (int)(R))) |
||
8924 | |||
8925 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
8926 | _mm512_cvtps_pd (__m256 __A) |
||
8927 | { |
||
8928 | return (__m512d) __builtin_convertvector((__v8sf)__A, __v8df); |
||
8929 | } |
||
8930 | |||
8931 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
8932 | _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A) |
||
8933 | { |
||
8934 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, |
||
8935 | (__v8df)_mm512_cvtps_pd(__A), |
||
8936 | (__v8df)__W); |
||
8937 | } |
||
8938 | |||
8939 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
8940 | _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A) |
||
8941 | { |
||
8942 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, |
||
8943 | (__v8df)_mm512_cvtps_pd(__A), |
||
8944 | (__v8df)_mm512_setzero_pd()); |
||
8945 | } |
||
8946 | |||
8947 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
8948 | _mm512_cvtpslo_pd (__m512 __A) |
||
8949 | { |
||
8950 | return (__m512d) _mm512_cvtps_pd(_mm512_castps512_ps256(__A)); |
||
8951 | } |
||
8952 | |||
8953 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
8954 | _mm512_mask_cvtpslo_pd (__m512d __W, __mmask8 __U, __m512 __A) |
||
8955 | { |
||
8956 | return (__m512d) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A)); |
||
8957 | } |
||
8958 | |||
8959 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
8960 | _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A) |
||
8961 | { |
||
8962 | return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U, |
||
8963 | (__v8df) __A, |
||
8964 | (__v8df) __W); |
||
8965 | } |
||
8966 | |||
8967 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
8968 | _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A) |
||
8969 | { |
||
8970 | return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U, |
||
8971 | (__v8df) __A, |
||
8972 | (__v8df) _mm512_setzero_pd ()); |
||
8973 | } |
||
8974 | |||
8975 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
8976 | _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A) |
||
8977 | { |
||
8978 | return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U, |
||
8979 | (__v16sf) __A, |
||
8980 | (__v16sf) __W); |
||
8981 | } |
||
8982 | |||
8983 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
8984 | _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A) |
||
8985 | { |
||
8986 | return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U, |
||
8987 | (__v16sf) __A, |
||
8988 | (__v16sf) _mm512_setzero_ps ()); |
||
8989 | } |
||
8990 | |||
8991 | static __inline__ void __DEFAULT_FN_ATTRS512 |
||
8992 | _mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A) |
||
8993 | { |
||
8994 | __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A, |
||
8995 | (__mmask8) __U); |
||
8996 | } |
||
8997 | |||
8998 | static __inline__ void __DEFAULT_FN_ATTRS512 |
||
8999 | _mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A) |
||
9000 | { |
||
9001 | __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A, |
||
9002 | (__mmask8) __U); |
||
9003 | } |
||
9004 | |||
9005 | static __inline__ void __DEFAULT_FN_ATTRS512 |
||
9006 | _mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A) |
||
9007 | { |
||
9008 | __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A, |
||
9009 | (__mmask16) __U); |
||
9010 | } |
||
9011 | |||
9012 | static __inline__ void __DEFAULT_FN_ATTRS512 |
||
9013 | _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A) |
||
9014 | { |
||
9015 | __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A, |
||
9016 | (__mmask16) __U); |
||
9017 | } |
||
9018 | |||
9019 | #define _mm_cvt_roundsd_ss(A, B, R) \ |
||
9020 | ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ |
||
9021 | (__v2df)(__m128d)(B), \ |
||
9022 | (__v4sf)_mm_undefined_ps(), \ |
||
9023 | (__mmask8)-1, (int)(R))) |
||
9024 | |||
9025 | #define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) \ |
||
9026 | ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ |
||
9027 | (__v2df)(__m128d)(B), \ |
||
9028 | (__v4sf)(__m128)(W), \ |
||
9029 | (__mmask8)(U), (int)(R))) |
||
9030 | |||
9031 | #define _mm_maskz_cvt_roundsd_ss(U, A, B, R) \ |
||
9032 | ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ |
||
9033 | (__v2df)(__m128d)(B), \ |
||
9034 | (__v4sf)_mm_setzero_ps(), \ |
||
9035 | (__mmask8)(U), (int)(R))) |
||
9036 | |||
9037 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
9038 | _mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B) |
||
9039 | { |
||
9040 | return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A, |
||
9041 | (__v2df)__B, |
||
9042 | (__v4sf)__W, |
||
9043 | (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); |
||
9044 | } |
||
9045 | |||
9046 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
9047 | _mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B) |
||
9048 | { |
||
9049 | return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A, |
||
9050 | (__v2df)__B, |
||
9051 | (__v4sf)_mm_setzero_ps(), |
||
9052 | (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); |
||
9053 | } |
||
9054 | |||
9055 | #define _mm_cvtss_i32 _mm_cvtss_si32 |
||
9056 | #define _mm_cvtsd_i32 _mm_cvtsd_si32 |
||
9057 | #define _mm_cvti32_sd _mm_cvtsi32_sd |
||
9058 | #define _mm_cvti32_ss _mm_cvtsi32_ss |
||
9059 | #ifdef __x86_64__ |
||
9060 | #define _mm_cvtss_i64 _mm_cvtss_si64 |
||
9061 | #define _mm_cvtsd_i64 _mm_cvtsd_si64 |
||
9062 | #define _mm_cvti64_sd _mm_cvtsi64_sd |
||
9063 | #define _mm_cvti64_ss _mm_cvtsi64_ss |
||
9064 | #endif |
||
9065 | |||
9066 | #ifdef __x86_64__ |
||
9067 | #define _mm_cvt_roundi64_sd(A, B, R) \ |
||
9068 | ((__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \ |
||
9069 | (int)(R))) |
||
9070 | |||
9071 | #define _mm_cvt_roundsi64_sd(A, B, R) \ |
||
9072 | ((__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \ |
||
9073 | (int)(R))) |
||
9074 | #endif |
||
9075 | |||
9076 | #define _mm_cvt_roundsi32_ss(A, B, R) \ |
||
9077 | ((__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R))) |
||
9078 | |||
9079 | #define _mm_cvt_roundi32_ss(A, B, R) \ |
||
9080 | ((__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R))) |
||
9081 | |||
9082 | #ifdef __x86_64__ |
||
9083 | #define _mm_cvt_roundsi64_ss(A, B, R) \ |
||
9084 | ((__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \ |
||
9085 | (int)(R))) |
||
9086 | |||
9087 | #define _mm_cvt_roundi64_ss(A, B, R) \ |
||
9088 | ((__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \ |
||
9089 | (int)(R))) |
||
9090 | #endif |
||
9091 | |||
9092 | #define _mm_cvt_roundss_sd(A, B, R) \ |
||
9093 | ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ |
||
9094 | (__v4sf)(__m128)(B), \ |
||
9095 | (__v2df)_mm_undefined_pd(), \ |
||
9096 | (__mmask8)-1, (int)(R))) |
||
9097 | |||
9098 | #define _mm_mask_cvt_roundss_sd(W, U, A, B, R) \ |
||
9099 | ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ |
||
9100 | (__v4sf)(__m128)(B), \ |
||
9101 | (__v2df)(__m128d)(W), \ |
||
9102 | (__mmask8)(U), (int)(R))) |
||
9103 | |||
9104 | #define _mm_maskz_cvt_roundss_sd(U, A, B, R) \ |
||
9105 | ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ |
||
9106 | (__v4sf)(__m128)(B), \ |
||
9107 | (__v2df)_mm_setzero_pd(), \ |
||
9108 | (__mmask8)(U), (int)(R))) |
||
9109 | |||
9110 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
9111 | _mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B) |
||
9112 | { |
||
9113 | return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A, |
||
9114 | (__v4sf)__B, |
||
9115 | (__v2df)__W, |
||
9116 | (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); |
||
9117 | } |
||
9118 | |||
9119 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
9120 | _mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B) |
||
9121 | { |
||
9122 | return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A, |
||
9123 | (__v4sf)__B, |
||
9124 | (__v2df)_mm_setzero_pd(), |
||
9125 | (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); |
||
9126 | } |
||
9127 | |||
9128 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
9129 | _mm_cvtu32_sd (__m128d __A, unsigned __B) |
||
9130 | { |
||
9131 | __A[0] = __B; |
||
9132 | return __A; |
||
9133 | } |
||
9134 | |||
9135 | #ifdef __x86_64__ |
||
9136 | #define _mm_cvt_roundu64_sd(A, B, R) \ |
||
9137 | ((__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \ |
||
9138 | (unsigned long long)(B), (int)(R))) |
||
9139 | |||
9140 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
9141 | _mm_cvtu64_sd (__m128d __A, unsigned long long __B) |
||
9142 | { |
||
9143 | __A[0] = __B; |
||
9144 | return __A; |
||
9145 | } |
||
9146 | #endif |
||
9147 | |||
9148 | #define _mm_cvt_roundu32_ss(A, B, R) \ |
||
9149 | ((__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \ |
||
9150 | (int)(R))) |
||
9151 | |||
9152 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
9153 | _mm_cvtu32_ss (__m128 __A, unsigned __B) |
||
9154 | { |
||
9155 | __A[0] = __B; |
||
9156 | return __A; |
||
9157 | } |
||
9158 | |||
9159 | #ifdef __x86_64__ |
||
9160 | #define _mm_cvt_roundu64_ss(A, B, R) \ |
||
9161 | ((__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \ |
||
9162 | (unsigned long long)(B), (int)(R))) |
||
9163 | |||
9164 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
9165 | _mm_cvtu64_ss (__m128 __A, unsigned long long __B) |
||
9166 | { |
||
9167 | __A[0] = __B; |
||
9168 | return __A; |
||
9169 | } |
||
9170 | #endif |
||
9171 | |||
9172 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
9173 | _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A) |
||
9174 | { |
||
9175 | return (__m512i) __builtin_ia32_selectd_512(__M, |
||
9176 | (__v16si) _mm512_set1_epi32(__A), |
||
9177 | (__v16si) __O); |
||
9178 | } |
||
9179 | |||
9180 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
9181 | _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A) |
||
9182 | { |
||
9183 | return (__m512i) __builtin_ia32_selectq_512(__M, |
||
9184 | (__v8di) _mm512_set1_epi64(__A), |
||
9185 | (__v8di) __O); |
||
9186 | } |
||
9187 | |||
9188 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
9189 | _mm512_set_epi8 (char __e63, char __e62, char __e61, char __e60, char __e59, |
||
9190 | char __e58, char __e57, char __e56, char __e55, char __e54, char __e53, |
||
9191 | char __e52, char __e51, char __e50, char __e49, char __e48, char __e47, |
||
9192 | char __e46, char __e45, char __e44, char __e43, char __e42, char __e41, |
||
9193 | char __e40, char __e39, char __e38, char __e37, char __e36, char __e35, |
||
9194 | char __e34, char __e33, char __e32, char __e31, char __e30, char __e29, |
||
9195 | char __e28, char __e27, char __e26, char __e25, char __e24, char __e23, |
||
9196 | char __e22, char __e21, char __e20, char __e19, char __e18, char __e17, |
||
9197 | char __e16, char __e15, char __e14, char __e13, char __e12, char __e11, |
||
9198 | char __e10, char __e9, char __e8, char __e7, char __e6, char __e5, |
||
9199 | char __e4, char __e3, char __e2, char __e1, char __e0) { |
||
9200 | |||
9201 | return __extension__ (__m512i)(__v64qi) |
||
9202 | {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7, |
||
9203 | __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15, |
||
9204 | __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23, |
||
9205 | __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31, |
||
9206 | __e32, __e33, __e34, __e35, __e36, __e37, __e38, __e39, |
||
9207 | __e40, __e41, __e42, __e43, __e44, __e45, __e46, __e47, |
||
9208 | __e48, __e49, __e50, __e51, __e52, __e53, __e54, __e55, |
||
9209 | __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63}; |
||
9210 | } |
||
9211 | |||
9212 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
9213 | _mm512_set_epi16(short __e31, short __e30, short __e29, short __e28, |
||
9214 | short __e27, short __e26, short __e25, short __e24, short __e23, |
||
9215 | short __e22, short __e21, short __e20, short __e19, short __e18, |
||
9216 | short __e17, short __e16, short __e15, short __e14, short __e13, |
||
9217 | short __e12, short __e11, short __e10, short __e9, short __e8, |
||
9218 | short __e7, short __e6, short __e5, short __e4, short __e3, |
||
9219 | short __e2, short __e1, short __e0) { |
||
9220 | return __extension__ (__m512i)(__v32hi) |
||
9221 | {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7, |
||
9222 | __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15, |
||
9223 | __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23, |
||
9224 | __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 }; |
||
9225 | } |
||
9226 | |||
9227 | static __inline __m512i __DEFAULT_FN_ATTRS512 |
||
9228 | _mm512_set_epi32 (int __A, int __B, int __C, int __D, |
||
9229 | int __E, int __F, int __G, int __H, |
||
9230 | int __I, int __J, int __K, int __L, |
||
9231 | int __M, int __N, int __O, int __P) |
||
9232 | { |
||
9233 | return __extension__ (__m512i)(__v16si) |
||
9234 | { __P, __O, __N, __M, __L, __K, __J, __I, |
||
9235 | __H, __G, __F, __E, __D, __C, __B, __A }; |
||
9236 | } |
||
9237 | |||
9238 | #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \ |
||
9239 | e8,e9,e10,e11,e12,e13,e14,e15) \ |
||
9240 | _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6), \ |
||
9241 | (e5),(e4),(e3),(e2),(e1),(e0)) |
||
9242 | |||
9243 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
9244 | _mm512_set_epi64 (long long __A, long long __B, long long __C, |
||
9245 | long long __D, long long __E, long long __F, |
||
9246 | long long __G, long long __H) |
||
9247 | { |
||
9248 | return __extension__ (__m512i) (__v8di) |
||
9249 | { __H, __G, __F, __E, __D, __C, __B, __A }; |
||
9250 | } |
||
9251 | |||
9252 | #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \ |
||
9253 | _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0)) |
||
9254 | |||
9255 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
9256 | _mm512_set_pd (double __A, double __B, double __C, double __D, |
||
9257 | double __E, double __F, double __G, double __H) |
||
9258 | { |
||
9259 | return __extension__ (__m512d) |
||
9260 | { __H, __G, __F, __E, __D, __C, __B, __A }; |
||
9261 | } |
||
9262 | |||
9263 | #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \ |
||
9264 | _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0)) |
||
9265 | |||
9266 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
9267 | _mm512_set_ps (float __A, float __B, float __C, float __D, |
||
9268 | float __E, float __F, float __G, float __H, |
||
9269 | float __I, float __J, float __K, float __L, |
||
9270 | float __M, float __N, float __O, float __P) |
||
9271 | { |
||
9272 | return __extension__ (__m512) |
||
9273 | { __P, __O, __N, __M, __L, __K, __J, __I, |
||
9274 | __H, __G, __F, __E, __D, __C, __B, __A }; |
||
9275 | } |
||
9276 | |||
9277 | #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \ |
||
9278 | _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5), \ |
||
9279 | (e4),(e3),(e2),(e1),(e0)) |
||
9280 | |||
9281 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
9282 | _mm512_abs_ps(__m512 __A) |
||
9283 | { |
||
9284 | return (__m512)_mm512_and_epi32(_mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ; |
||
9285 | } |
||
9286 | |||
9287 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
9288 | _mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A) |
||
9289 | { |
||
9290 | return (__m512)_mm512_mask_and_epi32((__m512i)__W, __K, _mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ; |
||
9291 | } |
||
9292 | |||
9293 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
9294 | _mm512_abs_pd(__m512d __A) |
||
9295 | { |
||
9296 | return (__m512d)_mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A) ; |
||
9297 | } |
||
9298 | |||
9299 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
9300 | _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A) |
||
9301 | { |
||
9302 | return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A); |
||
9303 | } |
||
9304 | |||
9305 | /* Vector-reduction arithmetic accepts vectors as inputs and produces scalars as |
||
9306 | * outputs. This class of vector operation forms the basis of many scientific |
||
9307 | * computations. In vector-reduction arithmetic, the evaluation order is |
||
9308 | * independent of the order of the input elements of V. |
||
9309 | |||
9310 | * For floating-point intrinsics: |
||
9311 | * 1. When using fadd/fmul intrinsics, the order of operations within the |
||
9312 | * vector is unspecified (associative math). |
||
9313 | * 2. When using fmin/fmax intrinsics, NaN or -0.0 elements within the vector |
||
9314 | * produce unspecified results. |
||
9315 | |||
9316 | * Used bisection method. At each step, we partition the vector with previous |
||
9317 | * step in half, and the operation is performed on its two halves. |
||
9318 | * This takes log2(n) steps where n is the number of elements in the vector. |
||
9319 | */ |
||
9320 | |||
9321 | static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi64(__m512i __W) { |
||
9322 | return __builtin_reduce_add((__v8di)__W); |
||
9323 | } |
||
9324 | |||
9325 | static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi64(__m512i __W) { |
||
9326 | return __builtin_reduce_mul((__v8di)__W); |
||
9327 | } |
||
9328 | |||
9329 | static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi64(__m512i __W) { |
||
9330 | return __builtin_reduce_and((__v8di)__W); |
||
9331 | } |
||
9332 | |||
9333 | static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi64(__m512i __W) { |
||
9334 | return __builtin_reduce_or((__v8di)__W); |
||
9335 | } |
||
9336 | |||
9337 | static __inline__ long long __DEFAULT_FN_ATTRS512 |
||
9338 | _mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W) { |
||
9339 | __W = _mm512_maskz_mov_epi64(__M, __W); |
||
9340 | return __builtin_reduce_add((__v8di)__W); |
||
9341 | } |
||
9342 | |||
9343 | static __inline__ long long __DEFAULT_FN_ATTRS512 |
||
9344 | _mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W) { |
||
9345 | __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(1), __M, __W); |
||
9346 | return __builtin_reduce_mul((__v8di)__W); |
||
9347 | } |
||
9348 | |||
9349 | static __inline__ long long __DEFAULT_FN_ATTRS512 |
||
9350 | _mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W) { |
||
9351 | __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(-1LL), __M, __W); |
||
9352 | return __builtin_reduce_and((__v8di)__W); |
||
9353 | } |
||
9354 | |||
9355 | static __inline__ long long __DEFAULT_FN_ATTRS512 |
||
9356 | _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W) { |
||
9357 | __W = _mm512_maskz_mov_epi64(__M, __W); |
||
9358 | return __builtin_reduce_or((__v8di)__W); |
||
9359 | } |
||
9360 | |||
9361 | // -0.0 is used to ignore the start value since it is the neutral value of |
||
9362 | // floating point addition. For more information, please refer to |
||
9363 | // https://llvm.org/docs/LangRef.html#llvm-vector-reduce-fadd-intrinsic |
||
9364 | static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W) { |
||
9365 | return __builtin_ia32_reduce_fadd_pd512(-0.0, __W); |
||
9366 | } |
||
9367 | |||
9368 | static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W) { |
||
9369 | return __builtin_ia32_reduce_fmul_pd512(1.0, __W); |
||
9370 | } |
||
9371 | |||
9372 | static __inline__ double __DEFAULT_FN_ATTRS512 |
||
9373 | _mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W) { |
||
9374 | __W = _mm512_maskz_mov_pd(__M, __W); |
||
9375 | return __builtin_ia32_reduce_fadd_pd512(-0.0, __W); |
||
9376 | } |
||
9377 | |||
9378 | static __inline__ double __DEFAULT_FN_ATTRS512 |
||
9379 | _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W) { |
||
9380 | __W = _mm512_mask_mov_pd(_mm512_set1_pd(1.0), __M, __W); |
||
9381 | return __builtin_ia32_reduce_fmul_pd512(1.0, __W); |
||
9382 | } |
||
9383 | |||
9384 | static __inline__ int __DEFAULT_FN_ATTRS512 |
||
9385 | _mm512_reduce_add_epi32(__m512i __W) { |
||
9386 | return __builtin_reduce_add((__v16si)__W); |
||
9387 | } |
||
9388 | |||
9389 | static __inline__ int __DEFAULT_FN_ATTRS512 |
||
9390 | _mm512_reduce_mul_epi32(__m512i __W) { |
||
9391 | return __builtin_reduce_mul((__v16si)__W); |
||
9392 | } |
||
9393 | |||
9394 | static __inline__ int __DEFAULT_FN_ATTRS512 |
||
9395 | _mm512_reduce_and_epi32(__m512i __W) { |
||
9396 | return __builtin_reduce_and((__v16si)__W); |
||
9397 | } |
||
9398 | |||
9399 | static __inline__ int __DEFAULT_FN_ATTRS512 |
||
9400 | _mm512_reduce_or_epi32(__m512i __W) { |
||
9401 | return __builtin_reduce_or((__v16si)__W); |
||
9402 | } |
||
9403 | |||
9404 | static __inline__ int __DEFAULT_FN_ATTRS512 |
||
9405 | _mm512_mask_reduce_add_epi32( __mmask16 __M, __m512i __W) { |
||
9406 | __W = _mm512_maskz_mov_epi32(__M, __W); |
||
9407 | return __builtin_reduce_add((__v16si)__W); |
||
9408 | } |
||
9409 | |||
9410 | static __inline__ int __DEFAULT_FN_ATTRS512 |
||
9411 | _mm512_mask_reduce_mul_epi32( __mmask16 __M, __m512i __W) { |
||
9412 | __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(1), __M, __W); |
||
9413 | return __builtin_reduce_mul((__v16si)__W); |
||
9414 | } |
||
9415 | |||
9416 | static __inline__ int __DEFAULT_FN_ATTRS512 |
||
9417 | _mm512_mask_reduce_and_epi32( __mmask16 __M, __m512i __W) { |
||
9418 | __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(-1), __M, __W); |
||
9419 | return __builtin_reduce_and((__v16si)__W); |
||
9420 | } |
||
9421 | |||
9422 | static __inline__ int __DEFAULT_FN_ATTRS512 |
||
9423 | _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W) { |
||
9424 | __W = _mm512_maskz_mov_epi32(__M, __W); |
||
9425 | return __builtin_reduce_or((__v16si)__W); |
||
9426 | } |
||
9427 | |||
9428 | static __inline__ float __DEFAULT_FN_ATTRS512 |
||
9429 | _mm512_reduce_add_ps(__m512 __W) { |
||
9430 | return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W); |
||
9431 | } |
||
9432 | |||
9433 | static __inline__ float __DEFAULT_FN_ATTRS512 |
||
9434 | _mm512_reduce_mul_ps(__m512 __W) { |
||
9435 | return __builtin_ia32_reduce_fmul_ps512(1.0f, __W); |
||
9436 | } |
||
9437 | |||
9438 | static __inline__ float __DEFAULT_FN_ATTRS512 |
||
9439 | _mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W) { |
||
9440 | __W = _mm512_maskz_mov_ps(__M, __W); |
||
9441 | return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W); |
||
9442 | } |
||
9443 | |||
9444 | static __inline__ float __DEFAULT_FN_ATTRS512 |
||
9445 | _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W) { |
||
9446 | __W = _mm512_mask_mov_ps(_mm512_set1_ps(1.0f), __M, __W); |
||
9447 | return __builtin_ia32_reduce_fmul_ps512(1.0f, __W); |
||
9448 | } |
||
9449 | |||
9450 | static __inline__ long long __DEFAULT_FN_ATTRS512 |
||
9451 | _mm512_reduce_max_epi64(__m512i __V) { |
||
9452 | return __builtin_reduce_max((__v8di)__V); |
||
9453 | } |
||
9454 | |||
9455 | static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 |
||
9456 | _mm512_reduce_max_epu64(__m512i __V) { |
||
9457 | return __builtin_reduce_max((__v8du)__V); |
||
9458 | } |
||
9459 | |||
9460 | static __inline__ long long __DEFAULT_FN_ATTRS512 |
||
9461 | _mm512_reduce_min_epi64(__m512i __V) { |
||
9462 | return __builtin_reduce_min((__v8di)__V); |
||
9463 | } |
||
9464 | |||
9465 | static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 |
||
9466 | _mm512_reduce_min_epu64(__m512i __V) { |
||
9467 | return __builtin_reduce_min((__v8du)__V); |
||
9468 | } |
||
9469 | |||
9470 | static __inline__ long long __DEFAULT_FN_ATTRS512 |
||
9471 | _mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V) { |
||
9472 | __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-__LONG_LONG_MAX__ - 1LL), __M, __V); |
||
9473 | return __builtin_reduce_max((__v8di)__V); |
||
9474 | } |
||
9475 | |||
9476 | static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 |
||
9477 | _mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V) { |
||
9478 | __V = _mm512_maskz_mov_epi64(__M, __V); |
||
9479 | return __builtin_reduce_max((__v8du)__V); |
||
9480 | } |
||
9481 | |||
9482 | static __inline__ long long __DEFAULT_FN_ATTRS512 |
||
9483 | _mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V) { |
||
9484 | __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(__LONG_LONG_MAX__), __M, __V); |
||
9485 | return __builtin_reduce_min((__v8di)__V); |
||
9486 | } |
||
9487 | |||
9488 | static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 |
||
9489 | _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V) { |
||
9490 | __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-1LL), __M, __V); |
||
9491 | return __builtin_reduce_min((__v8du)__V); |
||
9492 | } |
||
9493 | static __inline__ int __DEFAULT_FN_ATTRS512 |
||
9494 | _mm512_reduce_max_epi32(__m512i __V) { |
||
9495 | return __builtin_reduce_max((__v16si)__V); |
||
9496 | } |
||
9497 | |||
9498 | static __inline__ unsigned int __DEFAULT_FN_ATTRS512 |
||
9499 | _mm512_reduce_max_epu32(__m512i __V) { |
||
9500 | return __builtin_reduce_max((__v16su)__V); |
||
9501 | } |
||
9502 | |||
9503 | static __inline__ int __DEFAULT_FN_ATTRS512 |
||
9504 | _mm512_reduce_min_epi32(__m512i __V) { |
||
9505 | return __builtin_reduce_min((__v16si)__V); |
||
9506 | } |
||
9507 | |||
9508 | static __inline__ unsigned int __DEFAULT_FN_ATTRS512 |
||
9509 | _mm512_reduce_min_epu32(__m512i __V) { |
||
9510 | return __builtin_reduce_min((__v16su)__V); |
||
9511 | } |
||
9512 | |||
9513 | static __inline__ int __DEFAULT_FN_ATTRS512 |
||
9514 | _mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V) { |
||
9515 | __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(-__INT_MAX__ - 1), __M, __V); |
||
9516 | return __builtin_reduce_max((__v16si)__V); |
||
9517 | } |
||
9518 | |||
9519 | static __inline__ unsigned int __DEFAULT_FN_ATTRS512 |
||
9520 | _mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V) { |
||
9521 | __V = _mm512_maskz_mov_epi32(__M, __V); |
||
9522 | return __builtin_reduce_max((__v16su)__V); |
||
9523 | } |
||
9524 | |||
9525 | static __inline__ int __DEFAULT_FN_ATTRS512 |
||
9526 | _mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V) { |
||
9527 | __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(__INT_MAX__), __M, __V); |
||
9528 | return __builtin_reduce_min((__v16si)__V); |
||
9529 | } |
||
9530 | |||
9531 | static __inline__ unsigned int __DEFAULT_FN_ATTRS512 |
||
9532 | _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V) { |
||
9533 | __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(-1), __M, __V); |
||
9534 | return __builtin_reduce_min((__v16su)__V); |
||
9535 | } |
||
9536 | |||
9537 | static __inline__ double __DEFAULT_FN_ATTRS512 |
||
9538 | _mm512_reduce_max_pd(__m512d __V) { |
||
9539 | return __builtin_ia32_reduce_fmax_pd512(__V); |
||
9540 | } |
||
9541 | |||
9542 | static __inline__ double __DEFAULT_FN_ATTRS512 |
||
9543 | _mm512_reduce_min_pd(__m512d __V) { |
||
9544 | return __builtin_ia32_reduce_fmin_pd512(__V); |
||
9545 | } |
||
9546 | |||
9547 | static __inline__ double __DEFAULT_FN_ATTRS512 |
||
9548 | _mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V) { |
||
9549 | __V = _mm512_mask_mov_pd(_mm512_set1_pd(-__builtin_inf()), __M, __V); |
||
9550 | return __builtin_ia32_reduce_fmax_pd512(__V); |
||
9551 | } |
||
9552 | |||
9553 | static __inline__ double __DEFAULT_FN_ATTRS512 |
||
9554 | _mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V) { |
||
9555 | __V = _mm512_mask_mov_pd(_mm512_set1_pd(__builtin_inf()), __M, __V); |
||
9556 | return __builtin_ia32_reduce_fmin_pd512(__V); |
||
9557 | } |
||
9558 | |||
9559 | static __inline__ float __DEFAULT_FN_ATTRS512 |
||
9560 | _mm512_reduce_max_ps(__m512 __V) { |
||
9561 | return __builtin_ia32_reduce_fmax_ps512(__V); |
||
9562 | } |
||
9563 | |||
9564 | static __inline__ float __DEFAULT_FN_ATTRS512 |
||
9565 | _mm512_reduce_min_ps(__m512 __V) { |
||
9566 | return __builtin_ia32_reduce_fmin_ps512(__V); |
||
9567 | } |
||
9568 | |||
9569 | static __inline__ float __DEFAULT_FN_ATTRS512 |
||
9570 | _mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V) { |
||
9571 | __V = _mm512_mask_mov_ps(_mm512_set1_ps(-__builtin_inff()), __M, __V); |
||
9572 | return __builtin_ia32_reduce_fmax_ps512(__V); |
||
9573 | } |
||
9574 | |||
9575 | static __inline__ float __DEFAULT_FN_ATTRS512 |
||
9576 | _mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V) { |
||
9577 | __V = _mm512_mask_mov_ps(_mm512_set1_ps(__builtin_inff()), __M, __V); |
||
9578 | return __builtin_ia32_reduce_fmin_ps512(__V); |
||
9579 | } |
||
9580 | |||
9581 | /// Moves the least significant 32 bits of a vector of [16 x i32] to a |
||
9582 | /// 32-bit signed integer value. |
||
9583 | /// |
||
9584 | /// \headerfile <x86intrin.h> |
||
9585 | /// |
||
9586 | /// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction. |
||
9587 | /// |
||
9588 | /// \param __A |
||
9589 | /// A vector of [16 x i32]. The least significant 32 bits are moved to the |
||
9590 | /// destination. |
||
9591 | /// \returns A 32-bit signed integer containing the moved value. |
||
9592 | static __inline__ int __DEFAULT_FN_ATTRS512 |
||
9593 | _mm512_cvtsi512_si32(__m512i __A) { |
||
9594 | __v16si __b = (__v16si)__A; |
||
9595 | return __b[0]; |
||
9596 | } |
||
9597 | |||
9598 | /// Loads 8 double-precision (64-bit) floating-point elements stored at memory |
||
9599 | /// locations starting at location \a base_addr at packed 32-bit integer indices |
||
9600 | /// stored in the lower half of \a vindex scaled by \a scale them in dst. |
||
9601 | /// |
||
9602 | /// This intrinsic corresponds to the <c> VGATHERDPD </c> instructions. |
||
9603 | /// |
||
9604 | /// \code{.operation} |
||
9605 | /// FOR j := 0 to 7 |
||
9606 | /// i := j*64 |
||
9607 | /// m := j*32 |
||
9608 | /// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 |
||
9609 | /// dst[i+63:i] := MEM[addr+63:addr] |
||
9610 | /// ENDFOR |
||
9611 | /// dst[MAX:512] := 0 |
||
9612 | /// \endcode |
||
9613 | #define _mm512_i32logather_pd(vindex, base_addr, scale) \ |
||
9614 | _mm512_i32gather_pd(_mm512_castsi512_si256(vindex), (base_addr), (scale)) |
||
9615 | |||
9616 | /// Loads 8 double-precision (64-bit) floating-point elements from memory |
||
9617 | /// starting at location \a base_addr at packed 32-bit integer indices stored in |
||
9618 | /// the lower half of \a vindex scaled by \a scale into dst using writemask |
||
9619 | /// \a mask (elements are copied from \a src when the corresponding mask bit is |
||
9620 | /// not set). |
||
9621 | /// |
||
9622 | /// This intrinsic corresponds to the <c> VGATHERDPD </c> instructions. |
||
9623 | /// |
||
9624 | /// \code{.operation} |
||
9625 | /// FOR j := 0 to 7 |
||
9626 | /// i := j*64 |
||
9627 | /// m := j*32 |
||
9628 | /// IF mask[j] |
||
9629 | /// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 |
||
9630 | /// dst[i+63:i] := MEM[addr+63:addr] |
||
9631 | /// ELSE |
||
9632 | /// dst[i+63:i] := src[i+63:i] |
||
9633 | /// FI |
||
9634 | /// ENDFOR |
||
9635 | /// dst[MAX:512] := 0 |
||
9636 | /// \endcode |
||
9637 | #define _mm512_mask_i32logather_pd(src, mask, vindex, base_addr, scale) \ |
||
9638 | _mm512_mask_i32gather_pd((src), (mask), _mm512_castsi512_si256(vindex), \ |
||
9639 | (base_addr), (scale)) |
||
9640 | |||
9641 | /// Loads 8 64-bit integer elements from memory starting at location \a base_addr |
||
9642 | /// at packed 32-bit integer indices stored in the lower half of \a vindex |
||
9643 | /// scaled by \a scale and stores them in dst. |
||
9644 | /// |
||
9645 | /// This intrinsic corresponds to the <c> VPGATHERDQ </c> instructions. |
||
9646 | /// |
||
9647 | /// \code{.operation} |
||
9648 | /// FOR j := 0 to 7 |
||
9649 | /// i := j*64 |
||
9650 | /// m := j*32 |
||
9651 | /// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 |
||
9652 | /// dst[i+63:i] := MEM[addr+63:addr] |
||
9653 | /// ENDFOR |
||
9654 | /// dst[MAX:512] := 0 |
||
9655 | /// \endcode |
||
9656 | #define _mm512_i32logather_epi64(vindex, base_addr, scale) \ |
||
9657 | _mm512_i32gather_epi64(_mm512_castsi512_si256(vindex), (base_addr), (scale)) |
||
9658 | |||
9659 | /// Loads 8 64-bit integer elements from memory starting at location \a base_addr |
||
9660 | /// at packed 32-bit integer indices stored in the lower half of \a vindex |
||
9661 | /// scaled by \a scale and stores them in dst using writemask \a mask (elements |
||
9662 | /// are copied from \a src when the corresponding mask bit is not set). |
||
9663 | /// |
||
9664 | /// This intrinsic corresponds to the <c> VPGATHERDQ </c> instructions. |
||
9665 | /// |
||
9666 | /// \code{.operation} |
||
9667 | /// FOR j := 0 to 7 |
||
9668 | /// i := j*64 |
||
9669 | /// m := j*32 |
||
9670 | /// IF mask[j] |
||
9671 | /// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 |
||
9672 | /// dst[i+63:i] := MEM[addr+63:addr] |
||
9673 | /// ELSE |
||
9674 | /// dst[i+63:i] := src[i+63:i] |
||
9675 | /// FI |
||
9676 | /// ENDFOR |
||
9677 | /// dst[MAX:512] := 0 |
||
9678 | /// \endcode |
||
9679 | #define _mm512_mask_i32logather_epi64(src, mask, vindex, base_addr, scale) \ |
||
9680 | _mm512_mask_i32gather_epi64((src), (mask), _mm512_castsi512_si256(vindex), \ |
||
9681 | (base_addr), (scale)) |
||
9682 | |||
9683 | /// Stores 8 packed double-precision (64-bit) floating-point elements in \a v1 |
||
9684 | /// and to memory locations starting at location \a base_addr at packed 32-bit |
||
9685 | /// integer indices stored in \a vindex scaled by \a scale. |
||
9686 | /// |
||
9687 | /// This intrinsic corresponds to the <c> VSCATTERDPD </c> instructions. |
||
9688 | /// |
||
9689 | /// \code{.operation} |
||
9690 | /// FOR j := 0 to 7 |
||
9691 | /// i := j*64 |
||
9692 | /// m := j*32 |
||
9693 | /// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 |
||
9694 | /// MEM[addr+63:addr] := v1[i+63:i] |
||
9695 | /// ENDFOR |
||
9696 | /// \endcode |
||
9697 | #define _mm512_i32loscatter_pd(base_addr, vindex, v1, scale) \ |
||
9698 | _mm512_i32scatter_pd((base_addr), _mm512_castsi512_si256(vindex), (v1), (scale)) |
||
9699 | |||
9700 | /// Stores 8 packed double-precision (64-bit) floating-point elements in \a v1 |
||
9701 | /// to memory locations starting at location \a base_addr at packed 32-bit |
||
9702 | /// integer indices stored in \a vindex scaled by \a scale. Only those elements |
||
9703 | /// whose corresponding mask bit is set in writemask \a mask are written to |
||
9704 | /// memory. |
||
9705 | /// |
||
9706 | /// This intrinsic corresponds to the <c> VSCATTERDPD </c> instructions. |
||
9707 | /// |
||
9708 | /// \code{.operation} |
||
9709 | /// FOR j := 0 to 7 |
||
9710 | /// i := j*64 |
||
9711 | /// m := j*32 |
||
9712 | /// IF mask[j] |
||
9713 | /// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 |
||
9714 | /// MEM[addr+63:addr] := a[i+63:i] |
||
9715 | /// FI |
||
9716 | /// ENDFOR |
||
9717 | /// \endcode |
||
9718 | #define _mm512_mask_i32loscatter_pd(base_addr, mask, vindex, v1, scale) \ |
||
9719 | _mm512_mask_i32scatter_pd((base_addr), (mask), \ |
||
9720 | _mm512_castsi512_si256(vindex), (v1), (scale)) |
||
9721 | |||
9722 | /// Stores 8 packed 64-bit integer elements located in \a v1 and stores them in |
||
9723 | /// memory locations starting at location \a base_addr at packed 32-bit integer |
||
9724 | /// indices stored in \a vindex scaled by \a scale. |
||
9725 | /// |
||
9726 | /// This intrinsic corresponds to the <c> VPSCATTERDQ </c> instructions. |
||
9727 | /// |
||
9728 | /// \code{.operation} |
||
9729 | /// FOR j := 0 to 7 |
||
9730 | /// i := j*64 |
||
9731 | /// m := j*32 |
||
9732 | /// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 |
||
9733 | /// MEM[addr+63:addr] := a[i+63:i] |
||
9734 | /// ENDFOR |
||
9735 | /// \endcode |
||
9736 | #define _mm512_i32loscatter_epi64(base_addr, vindex, v1, scale) \ |
||
9737 | _mm512_i32scatter_epi64((base_addr), \ |
||
9738 | _mm512_castsi512_si256(vindex), (v1), (scale)) |
||
9739 | |||
9740 | /// Stores 8 packed 64-bit integer elements located in a and stores them in |
||
9741 | /// memory locations starting at location \a base_addr at packed 32-bit integer |
||
9742 | /// indices stored in \a vindex scaled by scale using writemask \a mask (elements |
||
9743 | /// whose corresponding mask bit is not set are not written to memory). |
||
9744 | /// |
||
9745 | /// This intrinsic corresponds to the <c> VPSCATTERDQ </c> instructions. |
||
9746 | /// |
||
9747 | /// \code{.operation} |
||
9748 | /// FOR j := 0 to 7 |
||
9749 | /// i := j*64 |
||
9750 | /// m := j*32 |
||
9751 | /// IF mask[j] |
||
9752 | /// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 |
||
9753 | /// MEM[addr+63:addr] := a[i+63:i] |
||
9754 | /// FI |
||
9755 | /// ENDFOR |
||
9756 | /// \endcode |
||
9757 | #define _mm512_mask_i32loscatter_epi64(base_addr, mask, vindex, v1, scale) \ |
||
9758 | _mm512_mask_i32scatter_epi64((base_addr), (mask), \ |
||
9759 | _mm512_castsi512_si256(vindex), (v1), (scale)) |
||
9760 | |||
9761 | #undef __DEFAULT_FN_ATTRS512 |
||
9762 | #undef __DEFAULT_FN_ATTRS128 |
||
9763 | #undef __DEFAULT_FN_ATTRS |
||
9764 | |||
9765 | #endif /* __AVX512FINTRIN_H */ |