Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
14 | pmbaty | 1 | /*===---- avx512vldqintrin.h - AVX512VL and AVX512DQ intrinsics ------------=== |
2 | * |
||
3 | * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
4 | * See https://llvm.org/LICENSE.txt for license information. |
||
5 | * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
6 | * |
||
7 | *===-----------------------------------------------------------------------=== |
||
8 | */ |
||
9 | |||
10 | #ifndef __IMMINTRIN_H |
||
11 | #error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead." |
||
12 | #endif |
||
13 | |||
14 | #ifndef __AVX512VLDQINTRIN_H |
||
15 | #define __AVX512VLDQINTRIN_H |
||
16 | |||
17 | /* Define the default attributes for the functions in this file. */ |
||
18 | #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"), __min_vector_width__(128))) |
||
19 | #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"), __min_vector_width__(256))) |
||
20 | |||
21 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
22 | _mm256_mullo_epi64 (__m256i __A, __m256i __B) { |
||
23 | return (__m256i) ((__v4du) __A * (__v4du) __B); |
||
24 | } |
||
25 | |||
26 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
27 | _mm256_mask_mullo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { |
||
28 | return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, |
||
29 | (__v4di)_mm256_mullo_epi64(__A, __B), |
||
30 | (__v4di)__W); |
||
31 | } |
||
32 | |||
33 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
34 | _mm256_maskz_mullo_epi64(__mmask8 __U, __m256i __A, __m256i __B) { |
||
35 | return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, |
||
36 | (__v4di)_mm256_mullo_epi64(__A, __B), |
||
37 | (__v4di)_mm256_setzero_si256()); |
||
38 | } |
||
39 | |||
40 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
41 | _mm_mullo_epi64 (__m128i __A, __m128i __B) { |
||
42 | return (__m128i) ((__v2du) __A * (__v2du) __B); |
||
43 | } |
||
44 | |||
45 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
46 | _mm_mask_mullo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { |
||
47 | return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, |
||
48 | (__v2di)_mm_mullo_epi64(__A, __B), |
||
49 | (__v2di)__W); |
||
50 | } |
||
51 | |||
52 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
53 | _mm_maskz_mullo_epi64(__mmask8 __U, __m128i __A, __m128i __B) { |
||
54 | return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, |
||
55 | (__v2di)_mm_mullo_epi64(__A, __B), |
||
56 | (__v2di)_mm_setzero_si128()); |
||
57 | } |
||
58 | |||
59 | static __inline__ __m256d __DEFAULT_FN_ATTRS256 |
||
60 | _mm256_mask_andnot_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { |
||
61 | return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, |
||
62 | (__v4df)_mm256_andnot_pd(__A, __B), |
||
63 | (__v4df)__W); |
||
64 | } |
||
65 | |||
66 | static __inline__ __m256d __DEFAULT_FN_ATTRS256 |
||
67 | _mm256_maskz_andnot_pd(__mmask8 __U, __m256d __A, __m256d __B) { |
||
68 | return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, |
||
69 | (__v4df)_mm256_andnot_pd(__A, __B), |
||
70 | (__v4df)_mm256_setzero_pd()); |
||
71 | } |
||
72 | |||
73 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
74 | _mm_mask_andnot_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { |
||
75 | return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, |
||
76 | (__v2df)_mm_andnot_pd(__A, __B), |
||
77 | (__v2df)__W); |
||
78 | } |
||
79 | |||
80 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
81 | _mm_maskz_andnot_pd(__mmask8 __U, __m128d __A, __m128d __B) { |
||
82 | return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, |
||
83 | (__v2df)_mm_andnot_pd(__A, __B), |
||
84 | (__v2df)_mm_setzero_pd()); |
||
85 | } |
||
86 | |||
87 | static __inline__ __m256 __DEFAULT_FN_ATTRS256 |
||
88 | _mm256_mask_andnot_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { |
||
89 | return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, |
||
90 | (__v8sf)_mm256_andnot_ps(__A, __B), |
||
91 | (__v8sf)__W); |
||
92 | } |
||
93 | |||
94 | static __inline__ __m256 __DEFAULT_FN_ATTRS256 |
||
95 | _mm256_maskz_andnot_ps(__mmask8 __U, __m256 __A, __m256 __B) { |
||
96 | return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, |
||
97 | (__v8sf)_mm256_andnot_ps(__A, __B), |
||
98 | (__v8sf)_mm256_setzero_ps()); |
||
99 | } |
||
100 | |||
101 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
102 | _mm_mask_andnot_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { |
||
103 | return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, |
||
104 | (__v4sf)_mm_andnot_ps(__A, __B), |
||
105 | (__v4sf)__W); |
||
106 | } |
||
107 | |||
108 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
109 | _mm_maskz_andnot_ps(__mmask8 __U, __m128 __A, __m128 __B) { |
||
110 | return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, |
||
111 | (__v4sf)_mm_andnot_ps(__A, __B), |
||
112 | (__v4sf)_mm_setzero_ps()); |
||
113 | } |
||
114 | |||
115 | static __inline__ __m256d __DEFAULT_FN_ATTRS256 |
||
116 | _mm256_mask_and_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { |
||
117 | return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, |
||
118 | (__v4df)_mm256_and_pd(__A, __B), |
||
119 | (__v4df)__W); |
||
120 | } |
||
121 | |||
122 | static __inline__ __m256d __DEFAULT_FN_ATTRS256 |
||
123 | _mm256_maskz_and_pd(__mmask8 __U, __m256d __A, __m256d __B) { |
||
124 | return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, |
||
125 | (__v4df)_mm256_and_pd(__A, __B), |
||
126 | (__v4df)_mm256_setzero_pd()); |
||
127 | } |
||
128 | |||
129 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
130 | _mm_mask_and_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { |
||
131 | return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, |
||
132 | (__v2df)_mm_and_pd(__A, __B), |
||
133 | (__v2df)__W); |
||
134 | } |
||
135 | |||
136 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
137 | _mm_maskz_and_pd(__mmask8 __U, __m128d __A, __m128d __B) { |
||
138 | return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, |
||
139 | (__v2df)_mm_and_pd(__A, __B), |
||
140 | (__v2df)_mm_setzero_pd()); |
||
141 | } |
||
142 | |||
143 | static __inline__ __m256 __DEFAULT_FN_ATTRS256 |
||
144 | _mm256_mask_and_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { |
||
145 | return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, |
||
146 | (__v8sf)_mm256_and_ps(__A, __B), |
||
147 | (__v8sf)__W); |
||
148 | } |
||
149 | |||
150 | static __inline__ __m256 __DEFAULT_FN_ATTRS256 |
||
151 | _mm256_maskz_and_ps(__mmask8 __U, __m256 __A, __m256 __B) { |
||
152 | return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, |
||
153 | (__v8sf)_mm256_and_ps(__A, __B), |
||
154 | (__v8sf)_mm256_setzero_ps()); |
||
155 | } |
||
156 | |||
157 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
158 | _mm_mask_and_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { |
||
159 | return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, |
||
160 | (__v4sf)_mm_and_ps(__A, __B), |
||
161 | (__v4sf)__W); |
||
162 | } |
||
163 | |||
164 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
165 | _mm_maskz_and_ps(__mmask8 __U, __m128 __A, __m128 __B) { |
||
166 | return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, |
||
167 | (__v4sf)_mm_and_ps(__A, __B), |
||
168 | (__v4sf)_mm_setzero_ps()); |
||
169 | } |
||
170 | |||
171 | static __inline__ __m256d __DEFAULT_FN_ATTRS256 |
||
172 | _mm256_mask_xor_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { |
||
173 | return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, |
||
174 | (__v4df)_mm256_xor_pd(__A, __B), |
||
175 | (__v4df)__W); |
||
176 | } |
||
177 | |||
178 | static __inline__ __m256d __DEFAULT_FN_ATTRS256 |
||
179 | _mm256_maskz_xor_pd(__mmask8 __U, __m256d __A, __m256d __B) { |
||
180 | return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, |
||
181 | (__v4df)_mm256_xor_pd(__A, __B), |
||
182 | (__v4df)_mm256_setzero_pd()); |
||
183 | } |
||
184 | |||
185 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
186 | _mm_mask_xor_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { |
||
187 | return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, |
||
188 | (__v2df)_mm_xor_pd(__A, __B), |
||
189 | (__v2df)__W); |
||
190 | } |
||
191 | |||
192 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
193 | _mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) { |
||
194 | return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, |
||
195 | (__v2df)_mm_xor_pd(__A, __B), |
||
196 | (__v2df)_mm_setzero_pd()); |
||
197 | } |
||
198 | |||
199 | static __inline__ __m256 __DEFAULT_FN_ATTRS256 |
||
200 | _mm256_mask_xor_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { |
||
201 | return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, |
||
202 | (__v8sf)_mm256_xor_ps(__A, __B), |
||
203 | (__v8sf)__W); |
||
204 | } |
||
205 | |||
206 | static __inline__ __m256 __DEFAULT_FN_ATTRS256 |
||
207 | _mm256_maskz_xor_ps(__mmask8 __U, __m256 __A, __m256 __B) { |
||
208 | return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, |
||
209 | (__v8sf)_mm256_xor_ps(__A, __B), |
||
210 | (__v8sf)_mm256_setzero_ps()); |
||
211 | } |
||
212 | |||
213 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
214 | _mm_mask_xor_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { |
||
215 | return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, |
||
216 | (__v4sf)_mm_xor_ps(__A, __B), |
||
217 | (__v4sf)__W); |
||
218 | } |
||
219 | |||
220 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
221 | _mm_maskz_xor_ps(__mmask8 __U, __m128 __A, __m128 __B) { |
||
222 | return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, |
||
223 | (__v4sf)_mm_xor_ps(__A, __B), |
||
224 | (__v4sf)_mm_setzero_ps()); |
||
225 | } |
||
226 | |||
227 | static __inline__ __m256d __DEFAULT_FN_ATTRS256 |
||
228 | _mm256_mask_or_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { |
||
229 | return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, |
||
230 | (__v4df)_mm256_or_pd(__A, __B), |
||
231 | (__v4df)__W); |
||
232 | } |
||
233 | |||
234 | static __inline__ __m256d __DEFAULT_FN_ATTRS256 |
||
235 | _mm256_maskz_or_pd(__mmask8 __U, __m256d __A, __m256d __B) { |
||
236 | return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, |
||
237 | (__v4df)_mm256_or_pd(__A, __B), |
||
238 | (__v4df)_mm256_setzero_pd()); |
||
239 | } |
||
240 | |||
241 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
242 | _mm_mask_or_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { |
||
243 | return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, |
||
244 | (__v2df)_mm_or_pd(__A, __B), |
||
245 | (__v2df)__W); |
||
246 | } |
||
247 | |||
248 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
249 | _mm_maskz_or_pd(__mmask8 __U, __m128d __A, __m128d __B) { |
||
250 | return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, |
||
251 | (__v2df)_mm_or_pd(__A, __B), |
||
252 | (__v2df)_mm_setzero_pd()); |
||
253 | } |
||
254 | |||
255 | static __inline__ __m256 __DEFAULT_FN_ATTRS256 |
||
256 | _mm256_mask_or_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { |
||
257 | return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, |
||
258 | (__v8sf)_mm256_or_ps(__A, __B), |
||
259 | (__v8sf)__W); |
||
260 | } |
||
261 | |||
262 | static __inline__ __m256 __DEFAULT_FN_ATTRS256 |
||
263 | _mm256_maskz_or_ps(__mmask8 __U, __m256 __A, __m256 __B) { |
||
264 | return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, |
||
265 | (__v8sf)_mm256_or_ps(__A, __B), |
||
266 | (__v8sf)_mm256_setzero_ps()); |
||
267 | } |
||
268 | |||
269 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
270 | _mm_mask_or_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { |
||
271 | return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, |
||
272 | (__v4sf)_mm_or_ps(__A, __B), |
||
273 | (__v4sf)__W); |
||
274 | } |
||
275 | |||
276 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
277 | _mm_maskz_or_ps(__mmask8 __U, __m128 __A, __m128 __B) { |
||
278 | return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, |
||
279 | (__v4sf)_mm_or_ps(__A, __B), |
||
280 | (__v4sf)_mm_setzero_ps()); |
||
281 | } |
||
282 | |||
283 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
284 | _mm_cvtpd_epi64 (__m128d __A) { |
||
285 | return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, |
||
286 | (__v2di) _mm_setzero_si128(), |
||
287 | (__mmask8) -1); |
||
288 | } |
||
289 | |||
290 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
291 | _mm_mask_cvtpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) { |
||
292 | return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, |
||
293 | (__v2di) __W, |
||
294 | (__mmask8) __U); |
||
295 | } |
||
296 | |||
297 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
298 | _mm_maskz_cvtpd_epi64 (__mmask8 __U, __m128d __A) { |
||
299 | return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, |
||
300 | (__v2di) _mm_setzero_si128(), |
||
301 | (__mmask8) __U); |
||
302 | } |
||
303 | |||
304 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
305 | _mm256_cvtpd_epi64 (__m256d __A) { |
||
306 | return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, |
||
307 | (__v4di) _mm256_setzero_si256(), |
||
308 | (__mmask8) -1); |
||
309 | } |
||
310 | |||
311 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
312 | _mm256_mask_cvtpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) { |
||
313 | return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, |
||
314 | (__v4di) __W, |
||
315 | (__mmask8) __U); |
||
316 | } |
||
317 | |||
318 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
319 | _mm256_maskz_cvtpd_epi64 (__mmask8 __U, __m256d __A) { |
||
320 | return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, |
||
321 | (__v4di) _mm256_setzero_si256(), |
||
322 | (__mmask8) __U); |
||
323 | } |
||
324 | |||
325 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
326 | _mm_cvtpd_epu64 (__m128d __A) { |
||
327 | return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, |
||
328 | (__v2di) _mm_setzero_si128(), |
||
329 | (__mmask8) -1); |
||
330 | } |
||
331 | |||
332 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
333 | _mm_mask_cvtpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) { |
||
334 | return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, |
||
335 | (__v2di) __W, |
||
336 | (__mmask8) __U); |
||
337 | } |
||
338 | |||
339 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
340 | _mm_maskz_cvtpd_epu64 (__mmask8 __U, __m128d __A) { |
||
341 | return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, |
||
342 | (__v2di) _mm_setzero_si128(), |
||
343 | (__mmask8) __U); |
||
344 | } |
||
345 | |||
346 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
347 | _mm256_cvtpd_epu64 (__m256d __A) { |
||
348 | return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, |
||
349 | (__v4di) _mm256_setzero_si256(), |
||
350 | (__mmask8) -1); |
||
351 | } |
||
352 | |||
353 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
354 | _mm256_mask_cvtpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) { |
||
355 | return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, |
||
356 | (__v4di) __W, |
||
357 | (__mmask8) __U); |
||
358 | } |
||
359 | |||
360 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
361 | _mm256_maskz_cvtpd_epu64 (__mmask8 __U, __m256d __A) { |
||
362 | return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, |
||
363 | (__v4di) _mm256_setzero_si256(), |
||
364 | (__mmask8) __U); |
||
365 | } |
||
366 | |||
367 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
368 | _mm_cvtps_epi64 (__m128 __A) { |
||
369 | return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, |
||
370 | (__v2di) _mm_setzero_si128(), |
||
371 | (__mmask8) -1); |
||
372 | } |
||
373 | |||
374 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
375 | _mm_mask_cvtps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) { |
||
376 | return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, |
||
377 | (__v2di) __W, |
||
378 | (__mmask8) __U); |
||
379 | } |
||
380 | |||
381 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
382 | _mm_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) { |
||
383 | return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, |
||
384 | (__v2di) _mm_setzero_si128(), |
||
385 | (__mmask8) __U); |
||
386 | } |
||
387 | |||
388 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
389 | _mm256_cvtps_epi64 (__m128 __A) { |
||
390 | return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, |
||
391 | (__v4di) _mm256_setzero_si256(), |
||
392 | (__mmask8) -1); |
||
393 | } |
||
394 | |||
395 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
396 | _mm256_mask_cvtps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) { |
||
397 | return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, |
||
398 | (__v4di) __W, |
||
399 | (__mmask8) __U); |
||
400 | } |
||
401 | |||
402 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
403 | _mm256_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) { |
||
404 | return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, |
||
405 | (__v4di) _mm256_setzero_si256(), |
||
406 | (__mmask8) __U); |
||
407 | } |
||
408 | |||
409 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
410 | _mm_cvtps_epu64 (__m128 __A) { |
||
411 | return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, |
||
412 | (__v2di) _mm_setzero_si128(), |
||
413 | (__mmask8) -1); |
||
414 | } |
||
415 | |||
416 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
417 | _mm_mask_cvtps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) { |
||
418 | return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, |
||
419 | (__v2di) __W, |
||
420 | (__mmask8) __U); |
||
421 | } |
||
422 | |||
423 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
424 | _mm_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) { |
||
425 | return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, |
||
426 | (__v2di) _mm_setzero_si128(), |
||
427 | (__mmask8) __U); |
||
428 | } |
||
429 | |||
430 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
431 | _mm256_cvtps_epu64 (__m128 __A) { |
||
432 | return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, |
||
433 | (__v4di) _mm256_setzero_si256(), |
||
434 | (__mmask8) -1); |
||
435 | } |
||
436 | |||
437 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
438 | _mm256_mask_cvtps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) { |
||
439 | return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, |
||
440 | (__v4di) __W, |
||
441 | (__mmask8) __U); |
||
442 | } |
||
443 | |||
444 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
445 | _mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) { |
||
446 | return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, |
||
447 | (__v4di) _mm256_setzero_si256(), |
||
448 | (__mmask8) __U); |
||
449 | } |
||
450 | |||
451 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
452 | _mm_cvtepi64_pd (__m128i __A) { |
||
453 | return (__m128d)__builtin_convertvector((__v2di)__A, __v2df); |
||
454 | } |
||
455 | |||
456 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
457 | _mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A) { |
||
458 | return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, |
||
459 | (__v2df)_mm_cvtepi64_pd(__A), |
||
460 | (__v2df)__W); |
||
461 | } |
||
462 | |||
463 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
464 | _mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A) { |
||
465 | return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, |
||
466 | (__v2df)_mm_cvtepi64_pd(__A), |
||
467 | (__v2df)_mm_setzero_pd()); |
||
468 | } |
||
469 | |||
470 | static __inline__ __m256d __DEFAULT_FN_ATTRS256 |
||
471 | _mm256_cvtepi64_pd (__m256i __A) { |
||
472 | return (__m256d)__builtin_convertvector((__v4di)__A, __v4df); |
||
473 | } |
||
474 | |||
475 | static __inline__ __m256d __DEFAULT_FN_ATTRS256 |
||
476 | _mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A) { |
||
477 | return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, |
||
478 | (__v4df)_mm256_cvtepi64_pd(__A), |
||
479 | (__v4df)__W); |
||
480 | } |
||
481 | |||
482 | static __inline__ __m256d __DEFAULT_FN_ATTRS256 |
||
483 | _mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A) { |
||
484 | return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, |
||
485 | (__v4df)_mm256_cvtepi64_pd(__A), |
||
486 | (__v4df)_mm256_setzero_pd()); |
||
487 | } |
||
488 | |||
489 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
490 | _mm_cvtepi64_ps (__m128i __A) { |
||
491 | return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, |
||
492 | (__v4sf) _mm_setzero_ps(), |
||
493 | (__mmask8) -1); |
||
494 | } |
||
495 | |||
496 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
497 | _mm_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m128i __A) { |
||
498 | return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, |
||
499 | (__v4sf) __W, |
||
500 | (__mmask8) __U); |
||
501 | } |
||
502 | |||
503 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
504 | _mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A) { |
||
505 | return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, |
||
506 | (__v4sf) _mm_setzero_ps(), |
||
507 | (__mmask8) __U); |
||
508 | } |
||
509 | |||
510 | static __inline__ __m128 __DEFAULT_FN_ATTRS256 |
||
511 | _mm256_cvtepi64_ps (__m256i __A) { |
||
512 | return (__m128)__builtin_convertvector((__v4di)__A, __v4sf); |
||
513 | } |
||
514 | |||
515 | static __inline__ __m128 __DEFAULT_FN_ATTRS256 |
||
516 | _mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A) { |
||
517 | return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, |
||
518 | (__v4sf)_mm256_cvtepi64_ps(__A), |
||
519 | (__v4sf)__W); |
||
520 | } |
||
521 | |||
522 | static __inline__ __m128 __DEFAULT_FN_ATTRS256 |
||
523 | _mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A) { |
||
524 | return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, |
||
525 | (__v4sf)_mm256_cvtepi64_ps(__A), |
||
526 | (__v4sf)_mm_setzero_ps()); |
||
527 | } |
||
528 | |||
529 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
530 | _mm_cvttpd_epi64 (__m128d __A) { |
||
531 | return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, |
||
532 | (__v2di) _mm_setzero_si128(), |
||
533 | (__mmask8) -1); |
||
534 | } |
||
535 | |||
536 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
537 | _mm_mask_cvttpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) { |
||
538 | return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, |
||
539 | (__v2di) __W, |
||
540 | (__mmask8) __U); |
||
541 | } |
||
542 | |||
543 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
544 | _mm_maskz_cvttpd_epi64 (__mmask8 __U, __m128d __A) { |
||
545 | return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, |
||
546 | (__v2di) _mm_setzero_si128(), |
||
547 | (__mmask8) __U); |
||
548 | } |
||
549 | |||
550 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
551 | _mm256_cvttpd_epi64 (__m256d __A) { |
||
552 | return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, |
||
553 | (__v4di) _mm256_setzero_si256(), |
||
554 | (__mmask8) -1); |
||
555 | } |
||
556 | |||
557 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
558 | _mm256_mask_cvttpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) { |
||
559 | return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, |
||
560 | (__v4di) __W, |
||
561 | (__mmask8) __U); |
||
562 | } |
||
563 | |||
564 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
565 | _mm256_maskz_cvttpd_epi64 (__mmask8 __U, __m256d __A) { |
||
566 | return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, |
||
567 | (__v4di) _mm256_setzero_si256(), |
||
568 | (__mmask8) __U); |
||
569 | } |
||
570 | |||
571 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
572 | _mm_cvttpd_epu64 (__m128d __A) { |
||
573 | return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, |
||
574 | (__v2di) _mm_setzero_si128(), |
||
575 | (__mmask8) -1); |
||
576 | } |
||
577 | |||
578 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
579 | _mm_mask_cvttpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) { |
||
580 | return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, |
||
581 | (__v2di) __W, |
||
582 | (__mmask8) __U); |
||
583 | } |
||
584 | |||
585 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
586 | _mm_maskz_cvttpd_epu64 (__mmask8 __U, __m128d __A) { |
||
587 | return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, |
||
588 | (__v2di) _mm_setzero_si128(), |
||
589 | (__mmask8) __U); |
||
590 | } |
||
591 | |||
592 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
593 | _mm256_cvttpd_epu64 (__m256d __A) { |
||
594 | return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, |
||
595 | (__v4di) _mm256_setzero_si256(), |
||
596 | (__mmask8) -1); |
||
597 | } |
||
598 | |||
599 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
600 | _mm256_mask_cvttpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) { |
||
601 | return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, |
||
602 | (__v4di) __W, |
||
603 | (__mmask8) __U); |
||
604 | } |
||
605 | |||
606 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
607 | _mm256_maskz_cvttpd_epu64 (__mmask8 __U, __m256d __A) { |
||
608 | return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, |
||
609 | (__v4di) _mm256_setzero_si256(), |
||
610 | (__mmask8) __U); |
||
611 | } |
||
612 | |||
613 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
614 | _mm_cvttps_epi64 (__m128 __A) { |
||
615 | return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, |
||
616 | (__v2di) _mm_setzero_si128(), |
||
617 | (__mmask8) -1); |
||
618 | } |
||
619 | |||
620 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
621 | _mm_mask_cvttps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) { |
||
622 | return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, |
||
623 | (__v2di) __W, |
||
624 | (__mmask8) __U); |
||
625 | } |
||
626 | |||
627 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
628 | _mm_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) { |
||
629 | return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, |
||
630 | (__v2di) _mm_setzero_si128(), |
||
631 | (__mmask8) __U); |
||
632 | } |
||
633 | |||
634 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
635 | _mm256_cvttps_epi64 (__m128 __A) { |
||
636 | return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, |
||
637 | (__v4di) _mm256_setzero_si256(), |
||
638 | (__mmask8) -1); |
||
639 | } |
||
640 | |||
641 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
642 | _mm256_mask_cvttps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) { |
||
643 | return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, |
||
644 | (__v4di) __W, |
||
645 | (__mmask8) __U); |
||
646 | } |
||
647 | |||
648 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
649 | _mm256_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) { |
||
650 | return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, |
||
651 | (__v4di) _mm256_setzero_si256(), |
||
652 | (__mmask8) __U); |
||
653 | } |
||
654 | |||
655 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
656 | _mm_cvttps_epu64 (__m128 __A) { |
||
657 | return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, |
||
658 | (__v2di) _mm_setzero_si128(), |
||
659 | (__mmask8) -1); |
||
660 | } |
||
661 | |||
662 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
663 | _mm_mask_cvttps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) { |
||
664 | return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, |
||
665 | (__v2di) __W, |
||
666 | (__mmask8) __U); |
||
667 | } |
||
668 | |||
669 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
670 | _mm_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) { |
||
671 | return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, |
||
672 | (__v2di) _mm_setzero_si128(), |
||
673 | (__mmask8) __U); |
||
674 | } |
||
675 | |||
676 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
677 | _mm256_cvttps_epu64 (__m128 __A) { |
||
678 | return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, |
||
679 | (__v4di) _mm256_setzero_si256(), |
||
680 | (__mmask8) -1); |
||
681 | } |
||
682 | |||
683 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
684 | _mm256_mask_cvttps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) { |
||
685 | return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, |
||
686 | (__v4di) __W, |
||
687 | (__mmask8) __U); |
||
688 | } |
||
689 | |||
690 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
691 | _mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) { |
||
692 | return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, |
||
693 | (__v4di) _mm256_setzero_si256(), |
||
694 | (__mmask8) __U); |
||
695 | } |
||
696 | |||
697 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
698 | _mm_cvtepu64_pd (__m128i __A) { |
||
699 | return (__m128d)__builtin_convertvector((__v2du)__A, __v2df); |
||
700 | } |
||
701 | |||
702 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
703 | _mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A) { |
||
704 | return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, |
||
705 | (__v2df)_mm_cvtepu64_pd(__A), |
||
706 | (__v2df)__W); |
||
707 | } |
||
708 | |||
709 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
||
710 | _mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A) { |
||
711 | return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, |
||
712 | (__v2df)_mm_cvtepu64_pd(__A), |
||
713 | (__v2df)_mm_setzero_pd()); |
||
714 | } |
||
715 | |||
716 | static __inline__ __m256d __DEFAULT_FN_ATTRS256 |
||
717 | _mm256_cvtepu64_pd (__m256i __A) { |
||
718 | return (__m256d)__builtin_convertvector((__v4du)__A, __v4df); |
||
719 | } |
||
720 | |||
721 | static __inline__ __m256d __DEFAULT_FN_ATTRS256 |
||
722 | _mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A) { |
||
723 | return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, |
||
724 | (__v4df)_mm256_cvtepu64_pd(__A), |
||
725 | (__v4df)__W); |
||
726 | } |
||
727 | |||
728 | static __inline__ __m256d __DEFAULT_FN_ATTRS256 |
||
729 | _mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A) { |
||
730 | return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, |
||
731 | (__v4df)_mm256_cvtepu64_pd(__A), |
||
732 | (__v4df)_mm256_setzero_pd()); |
||
733 | } |
||
734 | |||
735 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
736 | _mm_cvtepu64_ps (__m128i __A) { |
||
737 | return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, |
||
738 | (__v4sf) _mm_setzero_ps(), |
||
739 | (__mmask8) -1); |
||
740 | } |
||
741 | |||
742 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
743 | _mm_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m128i __A) { |
||
744 | return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, |
||
745 | (__v4sf) __W, |
||
746 | (__mmask8) __U); |
||
747 | } |
||
748 | |||
749 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
||
750 | _mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A) { |
||
751 | return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, |
||
752 | (__v4sf) _mm_setzero_ps(), |
||
753 | (__mmask8) __U); |
||
754 | } |
||
755 | |||
756 | static __inline__ __m128 __DEFAULT_FN_ATTRS256 |
||
757 | _mm256_cvtepu64_ps (__m256i __A) { |
||
758 | return (__m128)__builtin_convertvector((__v4du)__A, __v4sf); |
||
759 | } |
||
760 | |||
761 | static __inline__ __m128 __DEFAULT_FN_ATTRS256 |
||
762 | _mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A) { |
||
763 | return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, |
||
764 | (__v4sf)_mm256_cvtepu64_ps(__A), |
||
765 | (__v4sf)__W); |
||
766 | } |
||
767 | |||
768 | static __inline__ __m128 __DEFAULT_FN_ATTRS256 |
||
769 | _mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A) { |
||
770 | return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, |
||
771 | (__v4sf)_mm256_cvtepu64_ps(__A), |
||
772 | (__v4sf)_mm_setzero_ps()); |
||
773 | } |
||
774 | |||
775 | #define _mm_range_pd(A, B, C) \ |
||
776 | ((__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \ |
||
777 | (__v2df)(__m128d)(B), (int)(C), \ |
||
778 | (__v2df)_mm_setzero_pd(), \ |
||
779 | (__mmask8)-1)) |
||
780 | |||
781 | #define _mm_mask_range_pd(W, U, A, B, C) \ |
||
782 | ((__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \ |
||
783 | (__v2df)(__m128d)(B), (int)(C), \ |
||
784 | (__v2df)(__m128d)(W), \ |
||
785 | (__mmask8)(U))) |
||
786 | |||
787 | #define _mm_maskz_range_pd(U, A, B, C) \ |
||
788 | ((__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \ |
||
789 | (__v2df)(__m128d)(B), (int)(C), \ |
||
790 | (__v2df)_mm_setzero_pd(), \ |
||
791 | (__mmask8)(U))) |
||
792 | |||
793 | #define _mm256_range_pd(A, B, C) \ |
||
794 | ((__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \ |
||
795 | (__v4df)(__m256d)(B), (int)(C), \ |
||
796 | (__v4df)_mm256_setzero_pd(), \ |
||
797 | (__mmask8)-1)) |
||
798 | |||
799 | #define _mm256_mask_range_pd(W, U, A, B, C) \ |
||
800 | ((__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \ |
||
801 | (__v4df)(__m256d)(B), (int)(C), \ |
||
802 | (__v4df)(__m256d)(W), \ |
||
803 | (__mmask8)(U))) |
||
804 | |||
805 | #define _mm256_maskz_range_pd(U, A, B, C) \ |
||
806 | ((__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \ |
||
807 | (__v4df)(__m256d)(B), (int)(C), \ |
||
808 | (__v4df)_mm256_setzero_pd(), \ |
||
809 | (__mmask8)(U))) |
||
810 | |||
811 | #define _mm_range_ps(A, B, C) \ |
||
812 | ((__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \ |
||
813 | (__v4sf)(__m128)(B), (int)(C), \ |
||
814 | (__v4sf)_mm_setzero_ps(), \ |
||
815 | (__mmask8)-1)) |
||
816 | |||
817 | #define _mm_mask_range_ps(W, U, A, B, C) \ |
||
818 | ((__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \ |
||
819 | (__v4sf)(__m128)(B), (int)(C), \ |
||
820 | (__v4sf)(__m128)(W), (__mmask8)(U))) |
||
821 | |||
822 | #define _mm_maskz_range_ps(U, A, B, C) \ |
||
823 | ((__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \ |
||
824 | (__v4sf)(__m128)(B), (int)(C), \ |
||
825 | (__v4sf)_mm_setzero_ps(), \ |
||
826 | (__mmask8)(U))) |
||
827 | |||
828 | #define _mm256_range_ps(A, B, C) \ |
||
829 | ((__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \ |
||
830 | (__v8sf)(__m256)(B), (int)(C), \ |
||
831 | (__v8sf)_mm256_setzero_ps(), \ |
||
832 | (__mmask8)-1)) |
||
833 | |||
834 | #define _mm256_mask_range_ps(W, U, A, B, C) \ |
||
835 | ((__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \ |
||
836 | (__v8sf)(__m256)(B), (int)(C), \ |
||
837 | (__v8sf)(__m256)(W), (__mmask8)(U))) |
||
838 | |||
839 | #define _mm256_maskz_range_ps(U, A, B, C) \ |
||
840 | ((__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \ |
||
841 | (__v8sf)(__m256)(B), (int)(C), \ |
||
842 | (__v8sf)_mm256_setzero_ps(), \ |
||
843 | (__mmask8)(U))) |
||
844 | |||
845 | #define _mm_reduce_pd(A, B) \ |
||
846 | ((__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \ |
||
847 | (__v2df)_mm_setzero_pd(), \ |
||
848 | (__mmask8)-1)) |
||
849 | |||
850 | #define _mm_mask_reduce_pd(W, U, A, B) \ |
||
851 | ((__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \ |
||
852 | (__v2df)(__m128d)(W), \ |
||
853 | (__mmask8)(U))) |
||
854 | |||
855 | #define _mm_maskz_reduce_pd(U, A, B) \ |
||
856 | ((__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \ |
||
857 | (__v2df)_mm_setzero_pd(), \ |
||
858 | (__mmask8)(U))) |
||
859 | |||
860 | #define _mm256_reduce_pd(A, B) \ |
||
861 | ((__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \ |
||
862 | (__v4df)_mm256_setzero_pd(), \ |
||
863 | (__mmask8)-1)) |
||
864 | |||
865 | #define _mm256_mask_reduce_pd(W, U, A, B) \ |
||
866 | ((__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \ |
||
867 | (__v4df)(__m256d)(W), \ |
||
868 | (__mmask8)(U))) |
||
869 | |||
870 | #define _mm256_maskz_reduce_pd(U, A, B) \ |
||
871 | ((__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \ |
||
872 | (__v4df)_mm256_setzero_pd(), \ |
||
873 | (__mmask8)(U))) |
||
874 | |||
875 | #define _mm_reduce_ps(A, B) \ |
||
876 | ((__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \ |
||
877 | (__v4sf)_mm_setzero_ps(), \ |
||
878 | (__mmask8)-1)) |
||
879 | |||
880 | #define _mm_mask_reduce_ps(W, U, A, B) \ |
||
881 | ((__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \ |
||
882 | (__v4sf)(__m128)(W), \ |
||
883 | (__mmask8)(U))) |
||
884 | |||
885 | #define _mm_maskz_reduce_ps(U, A, B) \ |
||
886 | ((__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \ |
||
887 | (__v4sf)_mm_setzero_ps(), \ |
||
888 | (__mmask8)(U))) |
||
889 | |||
890 | #define _mm256_reduce_ps(A, B) \ |
||
891 | ((__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \ |
||
892 | (__v8sf)_mm256_setzero_ps(), \ |
||
893 | (__mmask8)-1)) |
||
894 | |||
895 | #define _mm256_mask_reduce_ps(W, U, A, B) \ |
||
896 | ((__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \ |
||
897 | (__v8sf)(__m256)(W), \ |
||
898 | (__mmask8)(U))) |
||
899 | |||
900 | #define _mm256_maskz_reduce_ps(U, A, B) \ |
||
901 | ((__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \ |
||
902 | (__v8sf)_mm256_setzero_ps(), \ |
||
903 | (__mmask8)(U))) |
||
904 | |||
905 | static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 |
||
906 | _mm_movepi32_mask (__m128i __A) |
||
907 | { |
||
908 | return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A); |
||
909 | } |
||
910 | |||
911 | static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 |
||
912 | _mm256_movepi32_mask (__m256i __A) |
||
913 | { |
||
914 | return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A); |
||
915 | } |
||
916 | |||
917 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
918 | _mm_movm_epi32 (__mmask8 __A) |
||
919 | { |
||
920 | return (__m128i) __builtin_ia32_cvtmask2d128 (__A); |
||
921 | } |
||
922 | |||
923 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
924 | _mm256_movm_epi32 (__mmask8 __A) |
||
925 | { |
||
926 | return (__m256i) __builtin_ia32_cvtmask2d256 (__A); |
||
927 | } |
||
928 | |||
929 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
930 | _mm_movm_epi64 (__mmask8 __A) |
||
931 | { |
||
932 | return (__m128i) __builtin_ia32_cvtmask2q128 (__A); |
||
933 | } |
||
934 | |||
935 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
936 | _mm256_movm_epi64 (__mmask8 __A) |
||
937 | { |
||
938 | return (__m256i) __builtin_ia32_cvtmask2q256 (__A); |
||
939 | } |
||
940 | |||
941 | static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 |
||
942 | _mm_movepi64_mask (__m128i __A) |
||
943 | { |
||
944 | return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A); |
||
945 | } |
||
946 | |||
947 | static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 |
||
948 | _mm256_movepi64_mask (__m256i __A) |
||
949 | { |
||
950 | return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A); |
||
951 | } |
||
952 | |||
953 | static __inline__ __m256 __DEFAULT_FN_ATTRS256 |
||
954 | _mm256_broadcast_f32x2 (__m128 __A) |
||
955 | { |
||
956 | return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A, |
||
957 | 0, 1, 0, 1, 0, 1, 0, 1); |
||
958 | } |
||
959 | |||
960 | static __inline__ __m256 __DEFAULT_FN_ATTRS256 |
||
961 | _mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A) |
||
962 | { |
||
963 | return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, |
||
964 | (__v8sf)_mm256_broadcast_f32x2(__A), |
||
965 | (__v8sf)__O); |
||
966 | } |
||
967 | |||
968 | static __inline__ __m256 __DEFAULT_FN_ATTRS256 |
||
969 | _mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A) |
||
970 | { |
||
971 | return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, |
||
972 | (__v8sf)_mm256_broadcast_f32x2(__A), |
||
973 | (__v8sf)_mm256_setzero_ps()); |
||
974 | } |
||
975 | |||
976 | static __inline__ __m256d __DEFAULT_FN_ATTRS256 |
||
977 | _mm256_broadcast_f64x2(__m128d __A) |
||
978 | { |
||
979 | return (__m256d)__builtin_shufflevector((__v2df)__A, (__v2df)__A, |
||
980 | 0, 1, 0, 1); |
||
981 | } |
||
982 | |||
983 | static __inline__ __m256d __DEFAULT_FN_ATTRS256 |
||
984 | _mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A) |
||
985 | { |
||
986 | return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M, |
||
987 | (__v4df)_mm256_broadcast_f64x2(__A), |
||
988 | (__v4df)__O); |
||
989 | } |
||
990 | |||
991 | static __inline__ __m256d __DEFAULT_FN_ATTRS256 |
||
992 | _mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A) |
||
993 | { |
||
994 | return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M, |
||
995 | (__v4df)_mm256_broadcast_f64x2(__A), |
||
996 | (__v4df)_mm256_setzero_pd()); |
||
997 | } |
||
998 | |||
999 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
1000 | _mm_broadcast_i32x2 (__m128i __A) |
||
1001 | { |
||
1002 | return (__m128i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, |
||
1003 | 0, 1, 0, 1); |
||
1004 | } |
||
1005 | |||
1006 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
1007 | _mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A) |
||
1008 | { |
||
1009 | return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, |
||
1010 | (__v4si)_mm_broadcast_i32x2(__A), |
||
1011 | (__v4si)__O); |
||
1012 | } |
||
1013 | |||
1014 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
1015 | _mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) |
||
1016 | { |
||
1017 | return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, |
||
1018 | (__v4si)_mm_broadcast_i32x2(__A), |
||
1019 | (__v4si)_mm_setzero_si128()); |
||
1020 | } |
||
1021 | |||
1022 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
1023 | _mm256_broadcast_i32x2 (__m128i __A) |
||
1024 | { |
||
1025 | return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, |
||
1026 | 0, 1, 0, 1, 0, 1, 0, 1); |
||
1027 | } |
||
1028 | |||
1029 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
1030 | _mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A) |
||
1031 | { |
||
1032 | return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, |
||
1033 | (__v8si)_mm256_broadcast_i32x2(__A), |
||
1034 | (__v8si)__O); |
||
1035 | } |
||
1036 | |||
1037 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
1038 | _mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) |
||
1039 | { |
||
1040 | return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, |
||
1041 | (__v8si)_mm256_broadcast_i32x2(__A), |
||
1042 | (__v8si)_mm256_setzero_si256()); |
||
1043 | } |
||
1044 | |||
1045 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
1046 | _mm256_broadcast_i64x2(__m128i __A) |
||
1047 | { |
||
1048 | return (__m256i)__builtin_shufflevector((__v2di)__A, (__v2di)__A, |
||
1049 | 0, 1, 0, 1); |
||
1050 | } |
||
1051 | |||
1052 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
1053 | _mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A) |
||
1054 | { |
||
1055 | return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, |
||
1056 | (__v4di)_mm256_broadcast_i64x2(__A), |
||
1057 | (__v4di)__O); |
||
1058 | } |
||
1059 | |||
1060 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
1061 | _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) |
||
1062 | { |
||
1063 | return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, |
||
1064 | (__v4di)_mm256_broadcast_i64x2(__A), |
||
1065 | (__v4di)_mm256_setzero_si256()); |
||
1066 | } |
||
1067 | |||
1068 | #define _mm256_extractf64x2_pd(A, imm) \ |
||
1069 | ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \ |
||
1070 | (int)(imm), \ |
||
1071 | (__v2df)_mm_undefined_pd(), \ |
||
1072 | (__mmask8)-1)) |
||
1073 | |||
1074 | #define _mm256_mask_extractf64x2_pd(W, U, A, imm) \ |
||
1075 | ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \ |
||
1076 | (int)(imm), \ |
||
1077 | (__v2df)(__m128d)(W), \ |
||
1078 | (__mmask8)(U))) |
||
1079 | |||
1080 | #define _mm256_maskz_extractf64x2_pd(U, A, imm) \ |
||
1081 | ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \ |
||
1082 | (int)(imm), \ |
||
1083 | (__v2df)_mm_setzero_pd(), \ |
||
1084 | (__mmask8)(U))) |
||
1085 | |||
1086 | #define _mm256_extracti64x2_epi64(A, imm) \ |
||
1087 | ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \ |
||
1088 | (int)(imm), \ |
||
1089 | (__v2di)_mm_undefined_si128(), \ |
||
1090 | (__mmask8)-1)) |
||
1091 | |||
1092 | #define _mm256_mask_extracti64x2_epi64(W, U, A, imm) \ |
||
1093 | ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \ |
||
1094 | (int)(imm), \ |
||
1095 | (__v2di)(__m128i)(W), \ |
||
1096 | (__mmask8)(U))) |
||
1097 | |||
1098 | #define _mm256_maskz_extracti64x2_epi64(U, A, imm) \ |
||
1099 | ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \ |
||
1100 | (int)(imm), \ |
||
1101 | (__v2di)_mm_setzero_si128(), \ |
||
1102 | (__mmask8)(U))) |
||
1103 | |||
1104 | #define _mm256_insertf64x2(A, B, imm) \ |
||
1105 | ((__m256d)__builtin_ia32_insertf64x2_256((__v4df)(__m256d)(A), \ |
||
1106 | (__v2df)(__m128d)(B), (int)(imm))) |
||
1107 | |||
1108 | #define _mm256_mask_insertf64x2(W, U, A, B, imm) \ |
||
1109 | ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ |
||
1110 | (__v4df)_mm256_insertf64x2((A), (B), (imm)), \ |
||
1111 | (__v4df)(__m256d)(W))) |
||
1112 | |||
1113 | #define _mm256_maskz_insertf64x2(U, A, B, imm) \ |
||
1114 | ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ |
||
1115 | (__v4df)_mm256_insertf64x2((A), (B), (imm)), \ |
||
1116 | (__v4df)_mm256_setzero_pd())) |
||
1117 | |||
1118 | #define _mm256_inserti64x2(A, B, imm) \ |
||
1119 | ((__m256i)__builtin_ia32_inserti64x2_256((__v4di)(__m256i)(A), \ |
||
1120 | (__v2di)(__m128i)(B), (int)(imm))) |
||
1121 | |||
1122 | #define _mm256_mask_inserti64x2(W, U, A, B, imm) \ |
||
1123 | ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ |
||
1124 | (__v4di)_mm256_inserti64x2((A), (B), (imm)), \ |
||
1125 | (__v4di)(__m256i)(W))) |
||
1126 | |||
1127 | #define _mm256_maskz_inserti64x2(U, A, B, imm) \ |
||
1128 | ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ |
||
1129 | (__v4di)_mm256_inserti64x2((A), (B), (imm)), \ |
||
1130 | (__v4di)_mm256_setzero_si256())) |
||
1131 | |||
1132 | #define _mm_mask_fpclass_pd_mask(U, A, imm) \ |
||
1133 | ((__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \ |
||
1134 | (__mmask8)(U))) |
||
1135 | |||
1136 | #define _mm_fpclass_pd_mask(A, imm) \ |
||
1137 | ((__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \ |
||
1138 | (__mmask8)-1)) |
||
1139 | |||
1140 | #define _mm256_mask_fpclass_pd_mask(U, A, imm) \ |
||
1141 | ((__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \ |
||
1142 | (__mmask8)(U))) |
||
1143 | |||
1144 | #define _mm256_fpclass_pd_mask(A, imm) \ |
||
1145 | ((__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \ |
||
1146 | (__mmask8)-1)) |
||
1147 | |||
1148 | #define _mm_mask_fpclass_ps_mask(U, A, imm) \ |
||
1149 | ((__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \ |
||
1150 | (__mmask8)(U))) |
||
1151 | |||
1152 | #define _mm_fpclass_ps_mask(A, imm) \ |
||
1153 | ((__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \ |
||
1154 | (__mmask8)-1)) |
||
1155 | |||
1156 | #define _mm256_mask_fpclass_ps_mask(U, A, imm) \ |
||
1157 | ((__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \ |
||
1158 | (__mmask8)(U))) |
||
1159 | |||
1160 | #define _mm256_fpclass_ps_mask(A, imm) \ |
||
1161 | ((__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \ |
||
1162 | (__mmask8)-1)) |
||
1163 | |||
1164 | #undef __DEFAULT_FN_ATTRS128 |
||
1165 | #undef __DEFAULT_FN_ATTRS256 |
||
1166 | |||
1167 | #endif |