Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
14 | pmbaty | 1 | /*===---- avx512dqintrin.h - AVX512DQ intrinsics ---------------------------=== |
2 | * |
||
3 | * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
4 | * See https://llvm.org/LICENSE.txt for license information. |
||
5 | * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
6 | * |
||
7 | *===-----------------------------------------------------------------------=== |
||
8 | */ |
||
9 | |||
10 | #ifndef __IMMINTRIN_H |
||
11 | #error "Never use <avx512dqintrin.h> directly; include <immintrin.h> instead." |
||
12 | #endif |
||
13 | |||
14 | #ifndef __AVX512DQINTRIN_H |
||
15 | #define __AVX512DQINTRIN_H |
||
16 | |||
17 | /* Define the default attributes for the functions in this file. */ |
||
18 | #define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"), __min_vector_width__(512))) |
||
19 | #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"))) |
||
20 | |||
21 | static __inline __mmask8 __DEFAULT_FN_ATTRS |
||
22 | _knot_mask8(__mmask8 __M) |
||
23 | { |
||
24 | return __builtin_ia32_knotqi(__M); |
||
25 | } |
||
26 | |||
27 | static __inline__ __mmask8 __DEFAULT_FN_ATTRS |
||
28 | _kand_mask8(__mmask8 __A, __mmask8 __B) |
||
29 | { |
||
30 | return (__mmask8)__builtin_ia32_kandqi((__mmask8)__A, (__mmask8)__B); |
||
31 | } |
||
32 | |||
33 | static __inline__ __mmask8 __DEFAULT_FN_ATTRS |
||
34 | _kandn_mask8(__mmask8 __A, __mmask8 __B) |
||
35 | { |
||
36 | return (__mmask8)__builtin_ia32_kandnqi((__mmask8)__A, (__mmask8)__B); |
||
37 | } |
||
38 | |||
39 | static __inline__ __mmask8 __DEFAULT_FN_ATTRS |
||
40 | _kor_mask8(__mmask8 __A, __mmask8 __B) |
||
41 | { |
||
42 | return (__mmask8)__builtin_ia32_korqi((__mmask8)__A, (__mmask8)__B); |
||
43 | } |
||
44 | |||
45 | static __inline__ __mmask8 __DEFAULT_FN_ATTRS |
||
46 | _kxnor_mask8(__mmask8 __A, __mmask8 __B) |
||
47 | { |
||
48 | return (__mmask8)__builtin_ia32_kxnorqi((__mmask8)__A, (__mmask8)__B); |
||
49 | } |
||
50 | |||
51 | static __inline__ __mmask8 __DEFAULT_FN_ATTRS |
||
52 | _kxor_mask8(__mmask8 __A, __mmask8 __B) |
||
53 | { |
||
54 | return (__mmask8)__builtin_ia32_kxorqi((__mmask8)__A, (__mmask8)__B); |
||
55 | } |
||
56 | |||
57 | static __inline__ unsigned char __DEFAULT_FN_ATTRS |
||
58 | _kortestc_mask8_u8(__mmask8 __A, __mmask8 __B) |
||
59 | { |
||
60 | return (unsigned char)__builtin_ia32_kortestcqi(__A, __B); |
||
61 | } |
||
62 | |||
63 | static __inline__ unsigned char __DEFAULT_FN_ATTRS |
||
64 | _kortestz_mask8_u8(__mmask8 __A, __mmask8 __B) |
||
65 | { |
||
66 | return (unsigned char)__builtin_ia32_kortestzqi(__A, __B); |
||
67 | } |
||
68 | |||
69 | static __inline__ unsigned char __DEFAULT_FN_ATTRS |
||
70 | _kortest_mask8_u8(__mmask8 __A, __mmask8 __B, unsigned char *__C) { |
||
71 | *__C = (unsigned char)__builtin_ia32_kortestcqi(__A, __B); |
||
72 | return (unsigned char)__builtin_ia32_kortestzqi(__A, __B); |
||
73 | } |
||
74 | |||
75 | static __inline__ unsigned char __DEFAULT_FN_ATTRS |
||
76 | _ktestc_mask8_u8(__mmask8 __A, __mmask8 __B) |
||
77 | { |
||
78 | return (unsigned char)__builtin_ia32_ktestcqi(__A, __B); |
||
79 | } |
||
80 | |||
81 | static __inline__ unsigned char __DEFAULT_FN_ATTRS |
||
82 | _ktestz_mask8_u8(__mmask8 __A, __mmask8 __B) |
||
83 | { |
||
84 | return (unsigned char)__builtin_ia32_ktestzqi(__A, __B); |
||
85 | } |
||
86 | |||
87 | static __inline__ unsigned char __DEFAULT_FN_ATTRS |
||
88 | _ktest_mask8_u8(__mmask8 __A, __mmask8 __B, unsigned char *__C) { |
||
89 | *__C = (unsigned char)__builtin_ia32_ktestcqi(__A, __B); |
||
90 | return (unsigned char)__builtin_ia32_ktestzqi(__A, __B); |
||
91 | } |
||
92 | |||
93 | static __inline__ unsigned char __DEFAULT_FN_ATTRS |
||
94 | _ktestc_mask16_u8(__mmask16 __A, __mmask16 __B) |
||
95 | { |
||
96 | return (unsigned char)__builtin_ia32_ktestchi(__A, __B); |
||
97 | } |
||
98 | |||
99 | static __inline__ unsigned char __DEFAULT_FN_ATTRS |
||
100 | _ktestz_mask16_u8(__mmask16 __A, __mmask16 __B) |
||
101 | { |
||
102 | return (unsigned char)__builtin_ia32_ktestzhi(__A, __B); |
||
103 | } |
||
104 | |||
105 | static __inline__ unsigned char __DEFAULT_FN_ATTRS |
||
106 | _ktest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) { |
||
107 | *__C = (unsigned char)__builtin_ia32_ktestchi(__A, __B); |
||
108 | return (unsigned char)__builtin_ia32_ktestzhi(__A, __B); |
||
109 | } |
||
110 | |||
111 | static __inline__ __mmask8 __DEFAULT_FN_ATTRS |
||
112 | _kadd_mask8(__mmask8 __A, __mmask8 __B) |
||
113 | { |
||
114 | return (__mmask8)__builtin_ia32_kaddqi((__mmask8)__A, (__mmask8)__B); |
||
115 | } |
||
116 | |||
117 | static __inline__ __mmask16 __DEFAULT_FN_ATTRS |
||
118 | _kadd_mask16(__mmask16 __A, __mmask16 __B) |
||
119 | { |
||
120 | return (__mmask16)__builtin_ia32_kaddhi((__mmask16)__A, (__mmask16)__B); |
||
121 | } |
||
122 | |||
123 | #define _kshiftli_mask8(A, I) \ |
||
124 | ((__mmask8)__builtin_ia32_kshiftliqi((__mmask8)(A), (unsigned int)(I))) |
||
125 | |||
126 | #define _kshiftri_mask8(A, I) \ |
||
127 | ((__mmask8)__builtin_ia32_kshiftriqi((__mmask8)(A), (unsigned int)(I))) |
||
128 | |||
129 | static __inline__ unsigned int __DEFAULT_FN_ATTRS |
||
130 | _cvtmask8_u32(__mmask8 __A) { |
||
131 | return (unsigned int)__builtin_ia32_kmovb((__mmask8)__A); |
||
132 | } |
||
133 | |||
134 | static __inline__ __mmask8 __DEFAULT_FN_ATTRS |
||
135 | _cvtu32_mask8(unsigned int __A) { |
||
136 | return (__mmask8)__builtin_ia32_kmovb((__mmask8)__A); |
||
137 | } |
||
138 | |||
139 | static __inline__ __mmask8 __DEFAULT_FN_ATTRS |
||
140 | _load_mask8(__mmask8 *__A) { |
||
141 | return (__mmask8)__builtin_ia32_kmovb(*(__mmask8 *)__A); |
||
142 | } |
||
143 | |||
144 | static __inline__ void __DEFAULT_FN_ATTRS |
||
145 | _store_mask8(__mmask8 *__A, __mmask8 __B) { |
||
146 | *(__mmask8 *)__A = __builtin_ia32_kmovb((__mmask8)__B); |
||
147 | } |
||
148 | |||
149 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
150 | _mm512_mullo_epi64 (__m512i __A, __m512i __B) { |
||
151 | return (__m512i) ((__v8du) __A * (__v8du) __B); |
||
152 | } |
||
153 | |||
154 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
155 | _mm512_mask_mullo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { |
||
156 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, |
||
157 | (__v8di)_mm512_mullo_epi64(__A, __B), |
||
158 | (__v8di)__W); |
||
159 | } |
||
160 | |||
161 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
162 | _mm512_maskz_mullo_epi64(__mmask8 __U, __m512i __A, __m512i __B) { |
||
163 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, |
||
164 | (__v8di)_mm512_mullo_epi64(__A, __B), |
||
165 | (__v8di)_mm512_setzero_si512()); |
||
166 | } |
||
167 | |||
168 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
169 | _mm512_xor_pd(__m512d __A, __m512d __B) { |
||
170 | return (__m512d)((__v8du)__A ^ (__v8du)__B); |
||
171 | } |
||
172 | |||
173 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
174 | _mm512_mask_xor_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { |
||
175 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, |
||
176 | (__v8df)_mm512_xor_pd(__A, __B), |
||
177 | (__v8df)__W); |
||
178 | } |
||
179 | |||
180 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
181 | _mm512_maskz_xor_pd(__mmask8 __U, __m512d __A, __m512d __B) { |
||
182 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, |
||
183 | (__v8df)_mm512_xor_pd(__A, __B), |
||
184 | (__v8df)_mm512_setzero_pd()); |
||
185 | } |
||
186 | |||
187 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
188 | _mm512_xor_ps (__m512 __A, __m512 __B) { |
||
189 | return (__m512)((__v16su)__A ^ (__v16su)__B); |
||
190 | } |
||
191 | |||
192 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
193 | _mm512_mask_xor_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { |
||
194 | return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, |
||
195 | (__v16sf)_mm512_xor_ps(__A, __B), |
||
196 | (__v16sf)__W); |
||
197 | } |
||
198 | |||
199 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
200 | _mm512_maskz_xor_ps(__mmask16 __U, __m512 __A, __m512 __B) { |
||
201 | return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, |
||
202 | (__v16sf)_mm512_xor_ps(__A, __B), |
||
203 | (__v16sf)_mm512_setzero_ps()); |
||
204 | } |
||
205 | |||
206 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
207 | _mm512_or_pd(__m512d __A, __m512d __B) { |
||
208 | return (__m512d)((__v8du)__A | (__v8du)__B); |
||
209 | } |
||
210 | |||
211 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
212 | _mm512_mask_or_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { |
||
213 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, |
||
214 | (__v8df)_mm512_or_pd(__A, __B), |
||
215 | (__v8df)__W); |
||
216 | } |
||
217 | |||
218 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
219 | _mm512_maskz_or_pd(__mmask8 __U, __m512d __A, __m512d __B) { |
||
220 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, |
||
221 | (__v8df)_mm512_or_pd(__A, __B), |
||
222 | (__v8df)_mm512_setzero_pd()); |
||
223 | } |
||
224 | |||
225 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
226 | _mm512_or_ps(__m512 __A, __m512 __B) { |
||
227 | return (__m512)((__v16su)__A | (__v16su)__B); |
||
228 | } |
||
229 | |||
230 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
231 | _mm512_mask_or_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { |
||
232 | return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, |
||
233 | (__v16sf)_mm512_or_ps(__A, __B), |
||
234 | (__v16sf)__W); |
||
235 | } |
||
236 | |||
237 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
238 | _mm512_maskz_or_ps(__mmask16 __U, __m512 __A, __m512 __B) { |
||
239 | return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, |
||
240 | (__v16sf)_mm512_or_ps(__A, __B), |
||
241 | (__v16sf)_mm512_setzero_ps()); |
||
242 | } |
||
243 | |||
244 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
245 | _mm512_and_pd(__m512d __A, __m512d __B) { |
||
246 | return (__m512d)((__v8du)__A & (__v8du)__B); |
||
247 | } |
||
248 | |||
249 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
250 | _mm512_mask_and_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { |
||
251 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, |
||
252 | (__v8df)_mm512_and_pd(__A, __B), |
||
253 | (__v8df)__W); |
||
254 | } |
||
255 | |||
256 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
257 | _mm512_maskz_and_pd(__mmask8 __U, __m512d __A, __m512d __B) { |
||
258 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, |
||
259 | (__v8df)_mm512_and_pd(__A, __B), |
||
260 | (__v8df)_mm512_setzero_pd()); |
||
261 | } |
||
262 | |||
263 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
264 | _mm512_and_ps(__m512 __A, __m512 __B) { |
||
265 | return (__m512)((__v16su)__A & (__v16su)__B); |
||
266 | } |
||
267 | |||
268 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
269 | _mm512_mask_and_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { |
||
270 | return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, |
||
271 | (__v16sf)_mm512_and_ps(__A, __B), |
||
272 | (__v16sf)__W); |
||
273 | } |
||
274 | |||
275 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
276 | _mm512_maskz_and_ps(__mmask16 __U, __m512 __A, __m512 __B) { |
||
277 | return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, |
||
278 | (__v16sf)_mm512_and_ps(__A, __B), |
||
279 | (__v16sf)_mm512_setzero_ps()); |
||
280 | } |
||
281 | |||
282 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
283 | _mm512_andnot_pd(__m512d __A, __m512d __B) { |
||
284 | return (__m512d)(~(__v8du)__A & (__v8du)__B); |
||
285 | } |
||
286 | |||
287 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
288 | _mm512_mask_andnot_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { |
||
289 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, |
||
290 | (__v8df)_mm512_andnot_pd(__A, __B), |
||
291 | (__v8df)__W); |
||
292 | } |
||
293 | |||
294 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
295 | _mm512_maskz_andnot_pd(__mmask8 __U, __m512d __A, __m512d __B) { |
||
296 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, |
||
297 | (__v8df)_mm512_andnot_pd(__A, __B), |
||
298 | (__v8df)_mm512_setzero_pd()); |
||
299 | } |
||
300 | |||
301 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
302 | _mm512_andnot_ps(__m512 __A, __m512 __B) { |
||
303 | return (__m512)(~(__v16su)__A & (__v16su)__B); |
||
304 | } |
||
305 | |||
306 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
307 | _mm512_mask_andnot_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { |
||
308 | return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, |
||
309 | (__v16sf)_mm512_andnot_ps(__A, __B), |
||
310 | (__v16sf)__W); |
||
311 | } |
||
312 | |||
313 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
314 | _mm512_maskz_andnot_ps(__mmask16 __U, __m512 __A, __m512 __B) { |
||
315 | return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, |
||
316 | (__v16sf)_mm512_andnot_ps(__A, __B), |
||
317 | (__v16sf)_mm512_setzero_ps()); |
||
318 | } |
||
319 | |||
320 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
321 | _mm512_cvtpd_epi64 (__m512d __A) { |
||
322 | return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, |
||
323 | (__v8di) _mm512_setzero_si512(), |
||
324 | (__mmask8) -1, |
||
325 | _MM_FROUND_CUR_DIRECTION); |
||
326 | } |
||
327 | |||
328 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
329 | _mm512_mask_cvtpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) { |
||
330 | return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, |
||
331 | (__v8di) __W, |
||
332 | (__mmask8) __U, |
||
333 | _MM_FROUND_CUR_DIRECTION); |
||
334 | } |
||
335 | |||
336 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
337 | _mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A) { |
||
338 | return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, |
||
339 | (__v8di) _mm512_setzero_si512(), |
||
340 | (__mmask8) __U, |
||
341 | _MM_FROUND_CUR_DIRECTION); |
||
342 | } |
||
343 | |||
344 | #define _mm512_cvt_roundpd_epi64(A, R) \ |
||
345 | ((__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \ |
||
346 | (__v8di)_mm512_setzero_si512(), \ |
||
347 | (__mmask8)-1, (int)(R))) |
||
348 | |||
349 | #define _mm512_mask_cvt_roundpd_epi64(W, U, A, R) \ |
||
350 | ((__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \ |
||
351 | (__v8di)(__m512i)(W), \ |
||
352 | (__mmask8)(U), (int)(R))) |
||
353 | |||
354 | #define _mm512_maskz_cvt_roundpd_epi64(U, A, R) \ |
||
355 | ((__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \ |
||
356 | (__v8di)_mm512_setzero_si512(), \ |
||
357 | (__mmask8)(U), (int)(R))) |
||
358 | |||
359 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
360 | _mm512_cvtpd_epu64 (__m512d __A) { |
||
361 | return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, |
||
362 | (__v8di) _mm512_setzero_si512(), |
||
363 | (__mmask8) -1, |
||
364 | _MM_FROUND_CUR_DIRECTION); |
||
365 | } |
||
366 | |||
367 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
368 | _mm512_mask_cvtpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) { |
||
369 | return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, |
||
370 | (__v8di) __W, |
||
371 | (__mmask8) __U, |
||
372 | _MM_FROUND_CUR_DIRECTION); |
||
373 | } |
||
374 | |||
375 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
376 | _mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A) { |
||
377 | return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, |
||
378 | (__v8di) _mm512_setzero_si512(), |
||
379 | (__mmask8) __U, |
||
380 | _MM_FROUND_CUR_DIRECTION); |
||
381 | } |
||
382 | |||
383 | #define _mm512_cvt_roundpd_epu64(A, R) \ |
||
384 | ((__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \ |
||
385 | (__v8di)_mm512_setzero_si512(), \ |
||
386 | (__mmask8)-1, (int)(R))) |
||
387 | |||
388 | #define _mm512_mask_cvt_roundpd_epu64(W, U, A, R) \ |
||
389 | ((__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \ |
||
390 | (__v8di)(__m512i)(W), \ |
||
391 | (__mmask8)(U), (int)(R))) |
||
392 | |||
393 | #define _mm512_maskz_cvt_roundpd_epu64(U, A, R) \ |
||
394 | ((__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \ |
||
395 | (__v8di)_mm512_setzero_si512(), \ |
||
396 | (__mmask8)(U), (int)(R))) |
||
397 | |||
398 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
399 | _mm512_cvtps_epi64 (__m256 __A) { |
||
400 | return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, |
||
401 | (__v8di) _mm512_setzero_si512(), |
||
402 | (__mmask8) -1, |
||
403 | _MM_FROUND_CUR_DIRECTION); |
||
404 | } |
||
405 | |||
406 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
407 | _mm512_mask_cvtps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) { |
||
408 | return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, |
||
409 | (__v8di) __W, |
||
410 | (__mmask8) __U, |
||
411 | _MM_FROUND_CUR_DIRECTION); |
||
412 | } |
||
413 | |||
414 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
415 | _mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A) { |
||
416 | return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, |
||
417 | (__v8di) _mm512_setzero_si512(), |
||
418 | (__mmask8) __U, |
||
419 | _MM_FROUND_CUR_DIRECTION); |
||
420 | } |
||
421 | |||
422 | #define _mm512_cvt_roundps_epi64(A, R) \ |
||
423 | ((__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \ |
||
424 | (__v8di)_mm512_setzero_si512(), \ |
||
425 | (__mmask8)-1, (int)(R))) |
||
426 | |||
427 | #define _mm512_mask_cvt_roundps_epi64(W, U, A, R) \ |
||
428 | ((__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \ |
||
429 | (__v8di)(__m512i)(W), \ |
||
430 | (__mmask8)(U), (int)(R))) |
||
431 | |||
432 | #define _mm512_maskz_cvt_roundps_epi64(U, A, R) \ |
||
433 | ((__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \ |
||
434 | (__v8di)_mm512_setzero_si512(), \ |
||
435 | (__mmask8)(U), (int)(R))) |
||
436 | |||
437 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
438 | _mm512_cvtps_epu64 (__m256 __A) { |
||
439 | return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, |
||
440 | (__v8di) _mm512_setzero_si512(), |
||
441 | (__mmask8) -1, |
||
442 | _MM_FROUND_CUR_DIRECTION); |
||
443 | } |
||
444 | |||
445 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
446 | _mm512_mask_cvtps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) { |
||
447 | return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, |
||
448 | (__v8di) __W, |
||
449 | (__mmask8) __U, |
||
450 | _MM_FROUND_CUR_DIRECTION); |
||
451 | } |
||
452 | |||
453 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
454 | _mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A) { |
||
455 | return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, |
||
456 | (__v8di) _mm512_setzero_si512(), |
||
457 | (__mmask8) __U, |
||
458 | _MM_FROUND_CUR_DIRECTION); |
||
459 | } |
||
460 | |||
461 | #define _mm512_cvt_roundps_epu64(A, R) \ |
||
462 | ((__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \ |
||
463 | (__v8di)_mm512_setzero_si512(), \ |
||
464 | (__mmask8)-1, (int)(R))) |
||
465 | |||
466 | #define _mm512_mask_cvt_roundps_epu64(W, U, A, R) \ |
||
467 | ((__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \ |
||
468 | (__v8di)(__m512i)(W), \ |
||
469 | (__mmask8)(U), (int)(R))) |
||
470 | |||
471 | #define _mm512_maskz_cvt_roundps_epu64(U, A, R) \ |
||
472 | ((__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \ |
||
473 | (__v8di)_mm512_setzero_si512(), \ |
||
474 | (__mmask8)(U), (int)(R))) |
||
475 | |||
476 | |||
477 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
478 | _mm512_cvtepi64_pd (__m512i __A) { |
||
479 | return (__m512d)__builtin_convertvector((__v8di)__A, __v8df); |
||
480 | } |
||
481 | |||
482 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
483 | _mm512_mask_cvtepi64_pd (__m512d __W, __mmask8 __U, __m512i __A) { |
||
484 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, |
||
485 | (__v8df)_mm512_cvtepi64_pd(__A), |
||
486 | (__v8df)__W); |
||
487 | } |
||
488 | |||
489 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
490 | _mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A) { |
||
491 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, |
||
492 | (__v8df)_mm512_cvtepi64_pd(__A), |
||
493 | (__v8df)_mm512_setzero_pd()); |
||
494 | } |
||
495 | |||
496 | #define _mm512_cvt_roundepi64_pd(A, R) \ |
||
497 | ((__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \ |
||
498 | (__v8df)_mm512_setzero_pd(), \ |
||
499 | (__mmask8)-1, (int)(R))) |
||
500 | |||
501 | #define _mm512_mask_cvt_roundepi64_pd(W, U, A, R) \ |
||
502 | ((__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \ |
||
503 | (__v8df)(__m512d)(W), \ |
||
504 | (__mmask8)(U), (int)(R))) |
||
505 | |||
506 | #define _mm512_maskz_cvt_roundepi64_pd(U, A, R) \ |
||
507 | ((__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \ |
||
508 | (__v8df)_mm512_setzero_pd(), \ |
||
509 | (__mmask8)(U), (int)(R))) |
||
510 | |||
511 | static __inline__ __m256 __DEFAULT_FN_ATTRS512 |
||
512 | _mm512_cvtepi64_ps (__m512i __A) { |
||
513 | return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, |
||
514 | (__v8sf) _mm256_setzero_ps(), |
||
515 | (__mmask8) -1, |
||
516 | _MM_FROUND_CUR_DIRECTION); |
||
517 | } |
||
518 | |||
519 | static __inline__ __m256 __DEFAULT_FN_ATTRS512 |
||
520 | _mm512_mask_cvtepi64_ps (__m256 __W, __mmask8 __U, __m512i __A) { |
||
521 | return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, |
||
522 | (__v8sf) __W, |
||
523 | (__mmask8) __U, |
||
524 | _MM_FROUND_CUR_DIRECTION); |
||
525 | } |
||
526 | |||
527 | static __inline__ __m256 __DEFAULT_FN_ATTRS512 |
||
528 | _mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A) { |
||
529 | return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, |
||
530 | (__v8sf) _mm256_setzero_ps(), |
||
531 | (__mmask8) __U, |
||
532 | _MM_FROUND_CUR_DIRECTION); |
||
533 | } |
||
534 | |||
535 | #define _mm512_cvt_roundepi64_ps(A, R) \ |
||
536 | ((__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \ |
||
537 | (__v8sf)_mm256_setzero_ps(), \ |
||
538 | (__mmask8)-1, (int)(R))) |
||
539 | |||
540 | #define _mm512_mask_cvt_roundepi64_ps(W, U, A, R) \ |
||
541 | ((__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \ |
||
542 | (__v8sf)(__m256)(W), (__mmask8)(U), \ |
||
543 | (int)(R))) |
||
544 | |||
545 | #define _mm512_maskz_cvt_roundepi64_ps(U, A, R) \ |
||
546 | ((__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \ |
||
547 | (__v8sf)_mm256_setzero_ps(), \ |
||
548 | (__mmask8)(U), (int)(R))) |
||
549 | |||
550 | |||
551 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
552 | _mm512_cvttpd_epi64 (__m512d __A) { |
||
553 | return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, |
||
554 | (__v8di) _mm512_setzero_si512(), |
||
555 | (__mmask8) -1, |
||
556 | _MM_FROUND_CUR_DIRECTION); |
||
557 | } |
||
558 | |||
559 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
560 | _mm512_mask_cvttpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) { |
||
561 | return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, |
||
562 | (__v8di) __W, |
||
563 | (__mmask8) __U, |
||
564 | _MM_FROUND_CUR_DIRECTION); |
||
565 | } |
||
566 | |||
567 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
568 | _mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A) { |
||
569 | return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, |
||
570 | (__v8di) _mm512_setzero_si512(), |
||
571 | (__mmask8) __U, |
||
572 | _MM_FROUND_CUR_DIRECTION); |
||
573 | } |
||
574 | |||
575 | #define _mm512_cvtt_roundpd_epi64(A, R) \ |
||
576 | ((__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \ |
||
577 | (__v8di)_mm512_setzero_si512(), \ |
||
578 | (__mmask8)-1, (int)(R))) |
||
579 | |||
580 | #define _mm512_mask_cvtt_roundpd_epi64(W, U, A, R) \ |
||
581 | ((__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \ |
||
582 | (__v8di)(__m512i)(W), \ |
||
583 | (__mmask8)(U), (int)(R))) |
||
584 | |||
585 | #define _mm512_maskz_cvtt_roundpd_epi64(U, A, R) \ |
||
586 | ((__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \ |
||
587 | (__v8di)_mm512_setzero_si512(), \ |
||
588 | (__mmask8)(U), (int)(R))) |
||
589 | |||
590 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
591 | _mm512_cvttpd_epu64 (__m512d __A) { |
||
592 | return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, |
||
593 | (__v8di) _mm512_setzero_si512(), |
||
594 | (__mmask8) -1, |
||
595 | _MM_FROUND_CUR_DIRECTION); |
||
596 | } |
||
597 | |||
598 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
599 | _mm512_mask_cvttpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) { |
||
600 | return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, |
||
601 | (__v8di) __W, |
||
602 | (__mmask8) __U, |
||
603 | _MM_FROUND_CUR_DIRECTION); |
||
604 | } |
||
605 | |||
606 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
607 | _mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A) { |
||
608 | return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, |
||
609 | (__v8di) _mm512_setzero_si512(), |
||
610 | (__mmask8) __U, |
||
611 | _MM_FROUND_CUR_DIRECTION); |
||
612 | } |
||
613 | |||
614 | #define _mm512_cvtt_roundpd_epu64(A, R) \ |
||
615 | ((__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \ |
||
616 | (__v8di)_mm512_setzero_si512(), \ |
||
617 | (__mmask8)-1, (int)(R))) |
||
618 | |||
619 | #define _mm512_mask_cvtt_roundpd_epu64(W, U, A, R) \ |
||
620 | ((__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \ |
||
621 | (__v8di)(__m512i)(W), \ |
||
622 | (__mmask8)(U), (int)(R))) |
||
623 | |||
624 | #define _mm512_maskz_cvtt_roundpd_epu64(U, A, R) \ |
||
625 | ((__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \ |
||
626 | (__v8di)_mm512_setzero_si512(), \ |
||
627 | (__mmask8)(U), (int)(R))) |
||
628 | |||
629 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
630 | _mm512_cvttps_epi64 (__m256 __A) { |
||
631 | return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, |
||
632 | (__v8di) _mm512_setzero_si512(), |
||
633 | (__mmask8) -1, |
||
634 | _MM_FROUND_CUR_DIRECTION); |
||
635 | } |
||
636 | |||
637 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
638 | _mm512_mask_cvttps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) { |
||
639 | return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, |
||
640 | (__v8di) __W, |
||
641 | (__mmask8) __U, |
||
642 | _MM_FROUND_CUR_DIRECTION); |
||
643 | } |
||
644 | |||
645 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
646 | _mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A) { |
||
647 | return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, |
||
648 | (__v8di) _mm512_setzero_si512(), |
||
649 | (__mmask8) __U, |
||
650 | _MM_FROUND_CUR_DIRECTION); |
||
651 | } |
||
652 | |||
653 | #define _mm512_cvtt_roundps_epi64(A, R) \ |
||
654 | ((__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \ |
||
655 | (__v8di)_mm512_setzero_si512(), \ |
||
656 | (__mmask8)-1, (int)(R))) |
||
657 | |||
658 | #define _mm512_mask_cvtt_roundps_epi64(W, U, A, R) \ |
||
659 | ((__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \ |
||
660 | (__v8di)(__m512i)(W), \ |
||
661 | (__mmask8)(U), (int)(R))) |
||
662 | |||
663 | #define _mm512_maskz_cvtt_roundps_epi64(U, A, R) \ |
||
664 | ((__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \ |
||
665 | (__v8di)_mm512_setzero_si512(), \ |
||
666 | (__mmask8)(U), (int)(R))) |
||
667 | |||
668 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
669 | _mm512_cvttps_epu64 (__m256 __A) { |
||
670 | return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, |
||
671 | (__v8di) _mm512_setzero_si512(), |
||
672 | (__mmask8) -1, |
||
673 | _MM_FROUND_CUR_DIRECTION); |
||
674 | } |
||
675 | |||
676 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
677 | _mm512_mask_cvttps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) { |
||
678 | return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, |
||
679 | (__v8di) __W, |
||
680 | (__mmask8) __U, |
||
681 | _MM_FROUND_CUR_DIRECTION); |
||
682 | } |
||
683 | |||
684 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
685 | _mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A) { |
||
686 | return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, |
||
687 | (__v8di) _mm512_setzero_si512(), |
||
688 | (__mmask8) __U, |
||
689 | _MM_FROUND_CUR_DIRECTION); |
||
690 | } |
||
691 | |||
692 | #define _mm512_cvtt_roundps_epu64(A, R) \ |
||
693 | ((__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \ |
||
694 | (__v8di)_mm512_setzero_si512(), \ |
||
695 | (__mmask8)-1, (int)(R))) |
||
696 | |||
697 | #define _mm512_mask_cvtt_roundps_epu64(W, U, A, R) \ |
||
698 | ((__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \ |
||
699 | (__v8di)(__m512i)(W), \ |
||
700 | (__mmask8)(U), (int)(R))) |
||
701 | |||
702 | #define _mm512_maskz_cvtt_roundps_epu64(U, A, R) \ |
||
703 | ((__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \ |
||
704 | (__v8di)_mm512_setzero_si512(), \ |
||
705 | (__mmask8)(U), (int)(R))) |
||
706 | |||
707 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
708 | _mm512_cvtepu64_pd (__m512i __A) { |
||
709 | return (__m512d)__builtin_convertvector((__v8du)__A, __v8df); |
||
710 | } |
||
711 | |||
712 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
713 | _mm512_mask_cvtepu64_pd (__m512d __W, __mmask8 __U, __m512i __A) { |
||
714 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, |
||
715 | (__v8df)_mm512_cvtepu64_pd(__A), |
||
716 | (__v8df)__W); |
||
717 | } |
||
718 | |||
719 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
720 | _mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A) { |
||
721 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, |
||
722 | (__v8df)_mm512_cvtepu64_pd(__A), |
||
723 | (__v8df)_mm512_setzero_pd()); |
||
724 | } |
||
725 | |||
726 | #define _mm512_cvt_roundepu64_pd(A, R) \ |
||
727 | ((__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \ |
||
728 | (__v8df)_mm512_setzero_pd(), \ |
||
729 | (__mmask8)-1, (int)(R))) |
||
730 | |||
731 | #define _mm512_mask_cvt_roundepu64_pd(W, U, A, R) \ |
||
732 | ((__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \ |
||
733 | (__v8df)(__m512d)(W), \ |
||
734 | (__mmask8)(U), (int)(R))) |
||
735 | |||
736 | |||
737 | #define _mm512_maskz_cvt_roundepu64_pd(U, A, R) \ |
||
738 | ((__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \ |
||
739 | (__v8df)_mm512_setzero_pd(), \ |
||
740 | (__mmask8)(U), (int)(R))) |
||
741 | |||
742 | |||
743 | static __inline__ __m256 __DEFAULT_FN_ATTRS512 |
||
744 | _mm512_cvtepu64_ps (__m512i __A) { |
||
745 | return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, |
||
746 | (__v8sf) _mm256_setzero_ps(), |
||
747 | (__mmask8) -1, |
||
748 | _MM_FROUND_CUR_DIRECTION); |
||
749 | } |
||
750 | |||
751 | static __inline__ __m256 __DEFAULT_FN_ATTRS512 |
||
752 | _mm512_mask_cvtepu64_ps (__m256 __W, __mmask8 __U, __m512i __A) { |
||
753 | return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, |
||
754 | (__v8sf) __W, |
||
755 | (__mmask8) __U, |
||
756 | _MM_FROUND_CUR_DIRECTION); |
||
757 | } |
||
758 | |||
759 | static __inline__ __m256 __DEFAULT_FN_ATTRS512 |
||
760 | _mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A) { |
||
761 | return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, |
||
762 | (__v8sf) _mm256_setzero_ps(), |
||
763 | (__mmask8) __U, |
||
764 | _MM_FROUND_CUR_DIRECTION); |
||
765 | } |
||
766 | |||
767 | #define _mm512_cvt_roundepu64_ps(A, R) \ |
||
768 | ((__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \ |
||
769 | (__v8sf)_mm256_setzero_ps(), \ |
||
770 | (__mmask8)-1, (int)(R))) |
||
771 | |||
772 | #define _mm512_mask_cvt_roundepu64_ps(W, U, A, R) \ |
||
773 | ((__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \ |
||
774 | (__v8sf)(__m256)(W), (__mmask8)(U), \ |
||
775 | (int)(R))) |
||
776 | |||
777 | #define _mm512_maskz_cvt_roundepu64_ps(U, A, R) \ |
||
778 | ((__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \ |
||
779 | (__v8sf)_mm256_setzero_ps(), \ |
||
780 | (__mmask8)(U), (int)(R))) |
||
781 | |||
782 | #define _mm512_range_pd(A, B, C) \ |
||
783 | ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ |
||
784 | (__v8df)(__m512d)(B), (int)(C), \ |
||
785 | (__v8df)_mm512_setzero_pd(), \ |
||
786 | (__mmask8)-1, \ |
||
787 | _MM_FROUND_CUR_DIRECTION)) |
||
788 | |||
789 | #define _mm512_mask_range_pd(W, U, A, B, C) \ |
||
790 | ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ |
||
791 | (__v8df)(__m512d)(B), (int)(C), \ |
||
792 | (__v8df)(__m512d)(W), (__mmask8)(U), \ |
||
793 | _MM_FROUND_CUR_DIRECTION)) |
||
794 | |||
795 | #define _mm512_maskz_range_pd(U, A, B, C) \ |
||
796 | ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ |
||
797 | (__v8df)(__m512d)(B), (int)(C), \ |
||
798 | (__v8df)_mm512_setzero_pd(), \ |
||
799 | (__mmask8)(U), \ |
||
800 | _MM_FROUND_CUR_DIRECTION)) |
||
801 | |||
802 | #define _mm512_range_round_pd(A, B, C, R) \ |
||
803 | ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ |
||
804 | (__v8df)(__m512d)(B), (int)(C), \ |
||
805 | (__v8df)_mm512_setzero_pd(), \ |
||
806 | (__mmask8)-1, (int)(R))) |
||
807 | |||
808 | #define _mm512_mask_range_round_pd(W, U, A, B, C, R) \ |
||
809 | ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ |
||
810 | (__v8df)(__m512d)(B), (int)(C), \ |
||
811 | (__v8df)(__m512d)(W), (__mmask8)(U), \ |
||
812 | (int)(R))) |
||
813 | |||
814 | #define _mm512_maskz_range_round_pd(U, A, B, C, R) \ |
||
815 | ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ |
||
816 | (__v8df)(__m512d)(B), (int)(C), \ |
||
817 | (__v8df)_mm512_setzero_pd(), \ |
||
818 | (__mmask8)(U), (int)(R))) |
||
819 | |||
820 | #define _mm512_range_ps(A, B, C) \ |
||
821 | ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ |
||
822 | (__v16sf)(__m512)(B), (int)(C), \ |
||
823 | (__v16sf)_mm512_setzero_ps(), \ |
||
824 | (__mmask16)-1, \ |
||
825 | _MM_FROUND_CUR_DIRECTION)) |
||
826 | |||
827 | #define _mm512_mask_range_ps(W, U, A, B, C) \ |
||
828 | ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ |
||
829 | (__v16sf)(__m512)(B), (int)(C), \ |
||
830 | (__v16sf)(__m512)(W), (__mmask16)(U), \ |
||
831 | _MM_FROUND_CUR_DIRECTION)) |
||
832 | |||
833 | #define _mm512_maskz_range_ps(U, A, B, C) \ |
||
834 | ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ |
||
835 | (__v16sf)(__m512)(B), (int)(C), \ |
||
836 | (__v16sf)_mm512_setzero_ps(), \ |
||
837 | (__mmask16)(U), \ |
||
838 | _MM_FROUND_CUR_DIRECTION)) |
||
839 | |||
840 | #define _mm512_range_round_ps(A, B, C, R) \ |
||
841 | ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ |
||
842 | (__v16sf)(__m512)(B), (int)(C), \ |
||
843 | (__v16sf)_mm512_setzero_ps(), \ |
||
844 | (__mmask16)-1, (int)(R))) |
||
845 | |||
846 | #define _mm512_mask_range_round_ps(W, U, A, B, C, R) \ |
||
847 | ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ |
||
848 | (__v16sf)(__m512)(B), (int)(C), \ |
||
849 | (__v16sf)(__m512)(W), (__mmask16)(U), \ |
||
850 | (int)(R))) |
||
851 | |||
852 | #define _mm512_maskz_range_round_ps(U, A, B, C, R) \ |
||
853 | ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ |
||
854 | (__v16sf)(__m512)(B), (int)(C), \ |
||
855 | (__v16sf)_mm512_setzero_ps(), \ |
||
856 | (__mmask16)(U), (int)(R))) |
||
857 | |||
858 | #define _mm_range_round_ss(A, B, C, R) \ |
||
859 | ((__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ |
||
860 | (__v4sf)(__m128)(B), \ |
||
861 | (__v4sf)_mm_setzero_ps(), \ |
||
862 | (__mmask8) -1, (int)(C),\ |
||
863 | (int)(R))) |
||
864 | |||
865 | #define _mm_range_ss(A ,B , C) _mm_range_round_ss(A, B, C ,_MM_FROUND_CUR_DIRECTION) |
||
866 | |||
867 | #define _mm_mask_range_round_ss(W, U, A, B, C, R) \ |
||
868 | ((__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ |
||
869 | (__v4sf)(__m128)(B), \ |
||
870 | (__v4sf)(__m128)(W),\ |
||
871 | (__mmask8)(U), (int)(C),\ |
||
872 | (int)(R))) |
||
873 | |||
874 | #define _mm_mask_range_ss(W , U, A, B, C) _mm_mask_range_round_ss(W, U, A, B, C , _MM_FROUND_CUR_DIRECTION) |
||
875 | |||
876 | #define _mm_maskz_range_round_ss(U, A, B, C, R) \ |
||
877 | ((__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ |
||
878 | (__v4sf)(__m128)(B), \ |
||
879 | (__v4sf)_mm_setzero_ps(), \ |
||
880 | (__mmask8)(U), (int)(C),\ |
||
881 | (int)(R))) |
||
882 | |||
883 | #define _mm_maskz_range_ss(U, A ,B , C) _mm_maskz_range_round_ss(U, A, B, C ,_MM_FROUND_CUR_DIRECTION) |
||
884 | |||
885 | #define _mm_range_round_sd(A, B, C, R) \ |
||
886 | ((__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ |
||
887 | (__v2df)(__m128d)(B), \ |
||
888 | (__v2df)_mm_setzero_pd(), \ |
||
889 | (__mmask8) -1, (int)(C),\ |
||
890 | (int)(R))) |
||
891 | |||
892 | #define _mm_range_sd(A ,B , C) _mm_range_round_sd(A, B, C ,_MM_FROUND_CUR_DIRECTION) |
||
893 | |||
894 | #define _mm_mask_range_round_sd(W, U, A, B, C, R) \ |
||
895 | ((__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ |
||
896 | (__v2df)(__m128d)(B), \ |
||
897 | (__v2df)(__m128d)(W),\ |
||
898 | (__mmask8)(U), (int)(C),\ |
||
899 | (int)(R))) |
||
900 | |||
901 | #define _mm_mask_range_sd(W, U, A, B, C) _mm_mask_range_round_sd(W, U, A, B, C ,_MM_FROUND_CUR_DIRECTION) |
||
902 | |||
903 | #define _mm_maskz_range_round_sd(U, A, B, C, R) \ |
||
904 | ((__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ |
||
905 | (__v2df)(__m128d)(B), \ |
||
906 | (__v2df)_mm_setzero_pd(), \ |
||
907 | (__mmask8)(U), (int)(C),\ |
||
908 | (int)(R))) |
||
909 | |||
910 | #define _mm_maskz_range_sd(U, A, B, C) _mm_maskz_range_round_sd(U, A, B, C ,_MM_FROUND_CUR_DIRECTION) |
||
911 | |||
912 | #define _mm512_reduce_pd(A, B) \ |
||
913 | ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ |
||
914 | (__v8df)_mm512_setzero_pd(), \ |
||
915 | (__mmask8)-1, \ |
||
916 | _MM_FROUND_CUR_DIRECTION)) |
||
917 | |||
918 | #define _mm512_mask_reduce_pd(W, U, A, B) \ |
||
919 | ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ |
||
920 | (__v8df)(__m512d)(W), \ |
||
921 | (__mmask8)(U), \ |
||
922 | _MM_FROUND_CUR_DIRECTION)) |
||
923 | |||
924 | #define _mm512_maskz_reduce_pd(U, A, B) \ |
||
925 | ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ |
||
926 | (__v8df)_mm512_setzero_pd(), \ |
||
927 | (__mmask8)(U), \ |
||
928 | _MM_FROUND_CUR_DIRECTION)) |
||
929 | |||
930 | #define _mm512_reduce_ps(A, B) \ |
||
931 | ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ |
||
932 | (__v16sf)_mm512_setzero_ps(), \ |
||
933 | (__mmask16)-1, \ |
||
934 | _MM_FROUND_CUR_DIRECTION)) |
||
935 | |||
936 | #define _mm512_mask_reduce_ps(W, U, A, B) \ |
||
937 | ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ |
||
938 | (__v16sf)(__m512)(W), \ |
||
939 | (__mmask16)(U), \ |
||
940 | _MM_FROUND_CUR_DIRECTION)) |
||
941 | |||
942 | #define _mm512_maskz_reduce_ps(U, A, B) \ |
||
943 | ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ |
||
944 | (__v16sf)_mm512_setzero_ps(), \ |
||
945 | (__mmask16)(U), \ |
||
946 | _MM_FROUND_CUR_DIRECTION)) |
||
947 | |||
948 | #define _mm512_reduce_round_pd(A, B, R) \ |
||
949 | ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ |
||
950 | (__v8df)_mm512_setzero_pd(), \ |
||
951 | (__mmask8)-1, (int)(R))) |
||
952 | |||
953 | #define _mm512_mask_reduce_round_pd(W, U, A, B, R) \ |
||
954 | ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ |
||
955 | (__v8df)(__m512d)(W), \ |
||
956 | (__mmask8)(U), (int)(R))) |
||
957 | |||
958 | #define _mm512_maskz_reduce_round_pd(U, A, B, R) \ |
||
959 | ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ |
||
960 | (__v8df)_mm512_setzero_pd(), \ |
||
961 | (__mmask8)(U), (int)(R))) |
||
962 | |||
963 | #define _mm512_reduce_round_ps(A, B, R) \ |
||
964 | ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ |
||
965 | (__v16sf)_mm512_setzero_ps(), \ |
||
966 | (__mmask16)-1, (int)(R))) |
||
967 | |||
968 | #define _mm512_mask_reduce_round_ps(W, U, A, B, R) \ |
||
969 | ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ |
||
970 | (__v16sf)(__m512)(W), \ |
||
971 | (__mmask16)(U), (int)(R))) |
||
972 | |||
973 | #define _mm512_maskz_reduce_round_ps(U, A, B, R) \ |
||
974 | ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ |
||
975 | (__v16sf)_mm512_setzero_ps(), \ |
||
976 | (__mmask16)(U), (int)(R))) |
||
977 | |||
978 | #define _mm_reduce_ss(A, B, C) \ |
||
979 | ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ |
||
980 | (__v4sf)(__m128)(B), \ |
||
981 | (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \ |
||
982 | (int)(C), _MM_FROUND_CUR_DIRECTION)) |
||
983 | |||
984 | #define _mm_mask_reduce_ss(W, U, A, B, C) \ |
||
985 | ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ |
||
986 | (__v4sf)(__m128)(B), \ |
||
987 | (__v4sf)(__m128)(W), (__mmask8)(U), \ |
||
988 | (int)(C), _MM_FROUND_CUR_DIRECTION)) |
||
989 | |||
990 | #define _mm_maskz_reduce_ss(U, A, B, C) \ |
||
991 | ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ |
||
992 | (__v4sf)(__m128)(B), \ |
||
993 | (__v4sf)_mm_setzero_ps(), \ |
||
994 | (__mmask8)(U), (int)(C), \ |
||
995 | _MM_FROUND_CUR_DIRECTION)) |
||
996 | |||
997 | #define _mm_reduce_round_ss(A, B, C, R) \ |
||
998 | ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ |
||
999 | (__v4sf)(__m128)(B), \ |
||
1000 | (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \ |
||
1001 | (int)(C), (int)(R))) |
||
1002 | |||
1003 | #define _mm_mask_reduce_round_ss(W, U, A, B, C, R) \ |
||
1004 | ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ |
||
1005 | (__v4sf)(__m128)(B), \ |
||
1006 | (__v4sf)(__m128)(W), (__mmask8)(U), \ |
||
1007 | (int)(C), (int)(R))) |
||
1008 | |||
1009 | #define _mm_maskz_reduce_round_ss(U, A, B, C, R) \ |
||
1010 | ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ |
||
1011 | (__v4sf)(__m128)(B), \ |
||
1012 | (__v4sf)_mm_setzero_ps(), \ |
||
1013 | (__mmask8)(U), (int)(C), (int)(R))) |
||
1014 | |||
1015 | #define _mm_reduce_sd(A, B, C) \ |
||
1016 | ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ |
||
1017 | (__v2df)(__m128d)(B), \ |
||
1018 | (__v2df)_mm_setzero_pd(), \ |
||
1019 | (__mmask8)-1, (int)(C), \ |
||
1020 | _MM_FROUND_CUR_DIRECTION)) |
||
1021 | |||
1022 | #define _mm_mask_reduce_sd(W, U, A, B, C) \ |
||
1023 | ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ |
||
1024 | (__v2df)(__m128d)(B), \ |
||
1025 | (__v2df)(__m128d)(W), (__mmask8)(U), \ |
||
1026 | (int)(C), _MM_FROUND_CUR_DIRECTION)) |
||
1027 | |||
1028 | #define _mm_maskz_reduce_sd(U, A, B, C) \ |
||
1029 | ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ |
||
1030 | (__v2df)(__m128d)(B), \ |
||
1031 | (__v2df)_mm_setzero_pd(), \ |
||
1032 | (__mmask8)(U), (int)(C), \ |
||
1033 | _MM_FROUND_CUR_DIRECTION)) |
||
1034 | |||
1035 | #define _mm_reduce_round_sd(A, B, C, R) \ |
||
1036 | ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ |
||
1037 | (__v2df)(__m128d)(B), \ |
||
1038 | (__v2df)_mm_setzero_pd(), \ |
||
1039 | (__mmask8)-1, (int)(C), (int)(R))) |
||
1040 | |||
1041 | #define _mm_mask_reduce_round_sd(W, U, A, B, C, R) \ |
||
1042 | ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ |
||
1043 | (__v2df)(__m128d)(B), \ |
||
1044 | (__v2df)(__m128d)(W), (__mmask8)(U), \ |
||
1045 | (int)(C), (int)(R))) |
||
1046 | |||
1047 | #define _mm_maskz_reduce_round_sd(U, A, B, C, R) \ |
||
1048 | ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ |
||
1049 | (__v2df)(__m128d)(B), \ |
||
1050 | (__v2df)_mm_setzero_pd(), \ |
||
1051 | (__mmask8)(U), (int)(C), (int)(R))) |
||
1052 | |||
1053 | static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 |
||
1054 | _mm512_movepi32_mask (__m512i __A) |
||
1055 | { |
||
1056 | return (__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A); |
||
1057 | } |
||
1058 | |||
1059 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
1060 | _mm512_movm_epi32 (__mmask16 __A) |
||
1061 | { |
||
1062 | return (__m512i) __builtin_ia32_cvtmask2d512 (__A); |
||
1063 | } |
||
1064 | |||
1065 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
1066 | _mm512_movm_epi64 (__mmask8 __A) |
||
1067 | { |
||
1068 | return (__m512i) __builtin_ia32_cvtmask2q512 (__A); |
||
1069 | } |
||
1070 | |||
1071 | static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 |
||
1072 | _mm512_movepi64_mask (__m512i __A) |
||
1073 | { |
||
1074 | return (__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A); |
||
1075 | } |
||
1076 | |||
1077 | |||
1078 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
1079 | _mm512_broadcast_f32x2 (__m128 __A) |
||
1080 | { |
||
1081 | return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A, |
||
1082 | 0, 1, 0, 1, 0, 1, 0, 1, |
||
1083 | 0, 1, 0, 1, 0, 1, 0, 1); |
||
1084 | } |
||
1085 | |||
1086 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
1087 | _mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A) |
||
1088 | { |
||
1089 | return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, |
||
1090 | (__v16sf)_mm512_broadcast_f32x2(__A), |
||
1091 | (__v16sf)__O); |
||
1092 | } |
||
1093 | |||
1094 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
1095 | _mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A) |
||
1096 | { |
||
1097 | return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, |
||
1098 | (__v16sf)_mm512_broadcast_f32x2(__A), |
||
1099 | (__v16sf)_mm512_setzero_ps()); |
||
1100 | } |
||
1101 | |||
1102 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
1103 | _mm512_broadcast_f32x8(__m256 __A) |
||
1104 | { |
||
1105 | return (__m512)__builtin_shufflevector((__v8sf)__A, (__v8sf)__A, |
||
1106 | 0, 1, 2, 3, 4, 5, 6, 7, |
||
1107 | 0, 1, 2, 3, 4, 5, 6, 7); |
||
1108 | } |
||
1109 | |||
1110 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
1111 | _mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, __m256 __A) |
||
1112 | { |
||
1113 | return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, |
||
1114 | (__v16sf)_mm512_broadcast_f32x8(__A), |
||
1115 | (__v16sf)__O); |
||
1116 | } |
||
1117 | |||
1118 | static __inline__ __m512 __DEFAULT_FN_ATTRS512 |
||
1119 | _mm512_maskz_broadcast_f32x8(__mmask16 __M, __m256 __A) |
||
1120 | { |
||
1121 | return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, |
||
1122 | (__v16sf)_mm512_broadcast_f32x8(__A), |
||
1123 | (__v16sf)_mm512_setzero_ps()); |
||
1124 | } |
||
1125 | |||
1126 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
1127 | _mm512_broadcast_f64x2(__m128d __A) |
||
1128 | { |
||
1129 | return (__m512d)__builtin_shufflevector((__v2df)__A, (__v2df)__A, |
||
1130 | 0, 1, 0, 1, 0, 1, 0, 1); |
||
1131 | } |
||
1132 | |||
1133 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
1134 | _mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, __m128d __A) |
||
1135 | { |
||
1136 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M, |
||
1137 | (__v8df)_mm512_broadcast_f64x2(__A), |
||
1138 | (__v8df)__O); |
||
1139 | } |
||
1140 | |||
1141 | static __inline__ __m512d __DEFAULT_FN_ATTRS512 |
||
1142 | _mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A) |
||
1143 | { |
||
1144 | return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M, |
||
1145 | (__v8df)_mm512_broadcast_f64x2(__A), |
||
1146 | (__v8df)_mm512_setzero_pd()); |
||
1147 | } |
||
1148 | |||
1149 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
1150 | _mm512_broadcast_i32x2 (__m128i __A) |
||
1151 | { |
||
1152 | return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, |
||
1153 | 0, 1, 0, 1, 0, 1, 0, 1, |
||
1154 | 0, 1, 0, 1, 0, 1, 0, 1); |
||
1155 | } |
||
1156 | |||
1157 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
1158 | _mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A) |
||
1159 | { |
||
1160 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, |
||
1161 | (__v16si)_mm512_broadcast_i32x2(__A), |
||
1162 | (__v16si)__O); |
||
1163 | } |
||
1164 | |||
1165 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
1166 | _mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A) |
||
1167 | { |
||
1168 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, |
||
1169 | (__v16si)_mm512_broadcast_i32x2(__A), |
||
1170 | (__v16si)_mm512_setzero_si512()); |
||
1171 | } |
||
1172 | |||
1173 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
1174 | _mm512_broadcast_i32x8(__m256i __A) |
||
1175 | { |
||
1176 | return (__m512i)__builtin_shufflevector((__v8si)__A, (__v8si)__A, |
||
1177 | 0, 1, 2, 3, 4, 5, 6, 7, |
||
1178 | 0, 1, 2, 3, 4, 5, 6, 7); |
||
1179 | } |
||
1180 | |||
1181 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
1182 | _mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i __A) |
||
1183 | { |
||
1184 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, |
||
1185 | (__v16si)_mm512_broadcast_i32x8(__A), |
||
1186 | (__v16si)__O); |
||
1187 | } |
||
1188 | |||
1189 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
1190 | _mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i __A) |
||
1191 | { |
||
1192 | return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, |
||
1193 | (__v16si)_mm512_broadcast_i32x8(__A), |
||
1194 | (__v16si)_mm512_setzero_si512()); |
||
1195 | } |
||
1196 | |||
1197 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
1198 | _mm512_broadcast_i64x2(__m128i __A) |
||
1199 | { |
||
1200 | return (__m512i)__builtin_shufflevector((__v2di)__A, (__v2di)__A, |
||
1201 | 0, 1, 0, 1, 0, 1, 0, 1); |
||
1202 | } |
||
1203 | |||
1204 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
1205 | _mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i __A) |
||
1206 | { |
||
1207 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, |
||
1208 | (__v8di)_mm512_broadcast_i64x2(__A), |
||
1209 | (__v8di)__O); |
||
1210 | } |
||
1211 | |||
1212 | static __inline__ __m512i __DEFAULT_FN_ATTRS512 |
||
1213 | _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) |
||
1214 | { |
||
1215 | return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, |
||
1216 | (__v8di)_mm512_broadcast_i64x2(__A), |
||
1217 | (__v8di)_mm512_setzero_si512()); |
||
1218 | } |
||
1219 | |||
1220 | #define _mm512_extractf32x8_ps(A, imm) \ |
||
1221 | ((__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \ |
||
1222 | (__v8sf)_mm256_undefined_ps(), \ |
||
1223 | (__mmask8)-1)) |
||
1224 | |||
1225 | #define _mm512_mask_extractf32x8_ps(W, U, A, imm) \ |
||
1226 | ((__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \ |
||
1227 | (__v8sf)(__m256)(W), \ |
||
1228 | (__mmask8)(U))) |
||
1229 | |||
1230 | #define _mm512_maskz_extractf32x8_ps(U, A, imm) \ |
||
1231 | ((__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \ |
||
1232 | (__v8sf)_mm256_setzero_ps(), \ |
||
1233 | (__mmask8)(U))) |
||
1234 | |||
1235 | #define _mm512_extractf64x2_pd(A, imm) \ |
||
1236 | ((__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \ |
||
1237 | (int)(imm), \ |
||
1238 | (__v2df)_mm_undefined_pd(), \ |
||
1239 | (__mmask8)-1)) |
||
1240 | |||
1241 | #define _mm512_mask_extractf64x2_pd(W, U, A, imm) \ |
||
1242 | ((__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \ |
||
1243 | (int)(imm), \ |
||
1244 | (__v2df)(__m128d)(W), \ |
||
1245 | (__mmask8)(U))) |
||
1246 | |||
1247 | #define _mm512_maskz_extractf64x2_pd(U, A, imm) \ |
||
1248 | ((__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \ |
||
1249 | (int)(imm), \ |
||
1250 | (__v2df)_mm_setzero_pd(), \ |
||
1251 | (__mmask8)(U))) |
||
1252 | |||
1253 | #define _mm512_extracti32x8_epi32(A, imm) \ |
||
1254 | ((__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \ |
||
1255 | (__v8si)_mm256_undefined_si256(), \ |
||
1256 | (__mmask8)-1)) |
||
1257 | |||
1258 | #define _mm512_mask_extracti32x8_epi32(W, U, A, imm) \ |
||
1259 | ((__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \ |
||
1260 | (__v8si)(__m256i)(W), \ |
||
1261 | (__mmask8)(U))) |
||
1262 | |||
1263 | #define _mm512_maskz_extracti32x8_epi32(U, A, imm) \ |
||
1264 | ((__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \ |
||
1265 | (__v8si)_mm256_setzero_si256(), \ |
||
1266 | (__mmask8)(U))) |
||
1267 | |||
1268 | #define _mm512_extracti64x2_epi64(A, imm) \ |
||
1269 | ((__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \ |
||
1270 | (int)(imm), \ |
||
1271 | (__v2di)_mm_undefined_si128(), \ |
||
1272 | (__mmask8)-1)) |
||
1273 | |||
1274 | #define _mm512_mask_extracti64x2_epi64(W, U, A, imm) \ |
||
1275 | ((__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \ |
||
1276 | (int)(imm), \ |
||
1277 | (__v2di)(__m128i)(W), \ |
||
1278 | (__mmask8)(U))) |
||
1279 | |||
1280 | #define _mm512_maskz_extracti64x2_epi64(U, A, imm) \ |
||
1281 | ((__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \ |
||
1282 | (int)(imm), \ |
||
1283 | (__v2di)_mm_setzero_si128(), \ |
||
1284 | (__mmask8)(U))) |
||
1285 | |||
1286 | #define _mm512_insertf32x8(A, B, imm) \ |
||
1287 | ((__m512)__builtin_ia32_insertf32x8((__v16sf)(__m512)(A), \ |
||
1288 | (__v8sf)(__m256)(B), (int)(imm))) |
||
1289 | |||
1290 | #define _mm512_mask_insertf32x8(W, U, A, B, imm) \ |
||
1291 | ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ |
||
1292 | (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \ |
||
1293 | (__v16sf)(__m512)(W))) |
||
1294 | |||
1295 | #define _mm512_maskz_insertf32x8(U, A, B, imm) \ |
||
1296 | ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ |
||
1297 | (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \ |
||
1298 | (__v16sf)_mm512_setzero_ps())) |
||
1299 | |||
1300 | #define _mm512_insertf64x2(A, B, imm) \ |
||
1301 | ((__m512d)__builtin_ia32_insertf64x2_512((__v8df)(__m512d)(A), \ |
||
1302 | (__v2df)(__m128d)(B), (int)(imm))) |
||
1303 | |||
1304 | #define _mm512_mask_insertf64x2(W, U, A, B, imm) \ |
||
1305 | ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ |
||
1306 | (__v8df)_mm512_insertf64x2((A), (B), (imm)), \ |
||
1307 | (__v8df)(__m512d)(W))) |
||
1308 | |||
1309 | #define _mm512_maskz_insertf64x2(U, A, B, imm) \ |
||
1310 | ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ |
||
1311 | (__v8df)_mm512_insertf64x2((A), (B), (imm)), \ |
||
1312 | (__v8df)_mm512_setzero_pd())) |
||
1313 | |||
1314 | #define _mm512_inserti32x8(A, B, imm) \ |
||
1315 | ((__m512i)__builtin_ia32_inserti32x8((__v16si)(__m512i)(A), \ |
||
1316 | (__v8si)(__m256i)(B), (int)(imm))) |
||
1317 | |||
1318 | #define _mm512_mask_inserti32x8(W, U, A, B, imm) \ |
||
1319 | ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ |
||
1320 | (__v16si)_mm512_inserti32x8((A), (B), (imm)), \ |
||
1321 | (__v16si)(__m512i)(W))) |
||
1322 | |||
1323 | #define _mm512_maskz_inserti32x8(U, A, B, imm) \ |
||
1324 | ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ |
||
1325 | (__v16si)_mm512_inserti32x8((A), (B), (imm)), \ |
||
1326 | (__v16si)_mm512_setzero_si512())) |
||
1327 | |||
1328 | #define _mm512_inserti64x2(A, B, imm) \ |
||
1329 | ((__m512i)__builtin_ia32_inserti64x2_512((__v8di)(__m512i)(A), \ |
||
1330 | (__v2di)(__m128i)(B), (int)(imm))) |
||
1331 | |||
1332 | #define _mm512_mask_inserti64x2(W, U, A, B, imm) \ |
||
1333 | ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ |
||
1334 | (__v8di)_mm512_inserti64x2((A), (B), (imm)), \ |
||
1335 | (__v8di)(__m512i)(W))) |
||
1336 | |||
1337 | #define _mm512_maskz_inserti64x2(U, A, B, imm) \ |
||
1338 | ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ |
||
1339 | (__v8di)_mm512_inserti64x2((A), (B), (imm)), \ |
||
1340 | (__v8di)_mm512_setzero_si512())) |
||
1341 | |||
1342 | #define _mm512_mask_fpclass_ps_mask(U, A, imm) \ |
||
1343 | ((__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \ |
||
1344 | (int)(imm), (__mmask16)(U))) |
||
1345 | |||
1346 | #define _mm512_fpclass_ps_mask(A, imm) \ |
||
1347 | ((__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \ |
||
1348 | (int)(imm), (__mmask16)-1)) |
||
1349 | |||
1350 | #define _mm512_mask_fpclass_pd_mask(U, A, imm) \ |
||
1351 | ((__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \ |
||
1352 | (__mmask8)(U))) |
||
1353 | |||
1354 | #define _mm512_fpclass_pd_mask(A, imm) \ |
||
1355 | ((__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \ |
||
1356 | (__mmask8)-1)) |
||
1357 | |||
1358 | #define _mm_fpclass_sd_mask(A, imm) \ |
||
1359 | ((__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \ |
||
1360 | (__mmask8)-1)) |
||
1361 | |||
1362 | #define _mm_mask_fpclass_sd_mask(U, A, imm) \ |
||
1363 | ((__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \ |
||
1364 | (__mmask8)(U))) |
||
1365 | |||
1366 | #define _mm_fpclass_ss_mask(A, imm) \ |
||
1367 | ((__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \ |
||
1368 | (__mmask8)-1)) |
||
1369 | |||
1370 | #define _mm_mask_fpclass_ss_mask(U, A, imm) \ |
||
1371 | ((__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \ |
||
1372 | (__mmask8)(U))) |
||
1373 | |||
1374 | #undef __DEFAULT_FN_ATTRS512 |
||
1375 | #undef __DEFAULT_FN_ATTRS |
||
1376 | |||
1377 | #endif |