Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
14 | pmbaty | 1 | /*===---- immintrin.h - Intel intrinsics -----------------------------------=== |
2 | * |
||
3 | * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
4 | * See https://llvm.org/LICENSE.txt for license information. |
||
5 | * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
6 | * |
||
7 | *===-----------------------------------------------------------------------=== |
||
8 | */ |
||
9 | |||
10 | #ifndef __IMMINTRIN_H |
||
11 | #define __IMMINTRIN_H |
||
12 | |||
13 | #if !defined(__i386__) && !defined(__x86_64__) |
||
14 | #error "This header is only meant to be used on x86 and x64 architecture" |
||
15 | #endif |
||
16 | |||
17 | #include <x86gprintrin.h> |
||
18 | |||
19 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
20 | defined(__MMX__) |
||
21 | #include <mmintrin.h> |
||
22 | #endif |
||
23 | |||
24 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
25 | defined(__SSE__) |
||
26 | #include <xmmintrin.h> |
||
27 | #endif |
||
28 | |||
29 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
30 | defined(__SSE2__) |
||
31 | #include <emmintrin.h> |
||
32 | #endif |
||
33 | |||
34 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
35 | defined(__SSE3__) |
||
36 | #include <pmmintrin.h> |
||
37 | #endif |
||
38 | |||
39 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
40 | defined(__SSSE3__) |
||
41 | #include <tmmintrin.h> |
||
42 | #endif |
||
43 | |||
44 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
45 | (defined(__SSE4_2__) || defined(__SSE4_1__)) |
||
46 | #include <smmintrin.h> |
||
47 | #endif |
||
48 | |||
49 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
50 | (defined(__AES__) || defined(__PCLMUL__)) |
||
51 | #include <wmmintrin.h> |
||
52 | #endif |
||
53 | |||
54 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
55 | defined(__CLFLUSHOPT__) |
||
56 | #include <clflushoptintrin.h> |
||
57 | #endif |
||
58 | |||
59 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
60 | defined(__CLWB__) |
||
61 | #include <clwbintrin.h> |
||
62 | #endif |
||
63 | |||
64 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
65 | defined(__AVX__) |
||
66 | #include <avxintrin.h> |
||
67 | #endif |
||
68 | |||
69 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
70 | defined(__AVX2__) |
||
71 | #include <avx2intrin.h> |
||
72 | #endif |
||
73 | |||
74 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
75 | defined(__F16C__) |
||
76 | #include <f16cintrin.h> |
||
77 | #endif |
||
78 | |||
79 | /* No feature check desired due to internal checks */ |
||
80 | #include <bmiintrin.h> |
||
81 | |||
82 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
83 | defined(__BMI2__) |
||
84 | #include <bmi2intrin.h> |
||
85 | #endif |
||
86 | |||
87 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
88 | defined(__LZCNT__) |
||
89 | #include <lzcntintrin.h> |
||
90 | #endif |
||
91 | |||
92 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
93 | defined(__POPCNT__) |
||
94 | #include <popcntintrin.h> |
||
95 | #endif |
||
96 | |||
97 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
98 | defined(__FMA__) |
||
99 | #include <fmaintrin.h> |
||
100 | #endif |
||
101 | |||
102 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
103 | defined(__AVX512F__) |
||
104 | #include <avx512fintrin.h> |
||
105 | #endif |
||
106 | |||
107 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
108 | defined(__AVX512VL__) |
||
109 | #include <avx512vlintrin.h> |
||
110 | #endif |
||
111 | |||
112 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
113 | defined(__AVX512BW__) |
||
114 | #include <avx512bwintrin.h> |
||
115 | #endif |
||
116 | |||
117 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
118 | defined(__AVX512BITALG__) |
||
119 | #include <avx512bitalgintrin.h> |
||
120 | #endif |
||
121 | |||
122 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
123 | defined(__AVX512CD__) |
||
124 | #include <avx512cdintrin.h> |
||
125 | #endif |
||
126 | |||
127 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
128 | defined(__AVX512VPOPCNTDQ__) |
||
129 | #include <avx512vpopcntdqintrin.h> |
||
130 | #endif |
||
131 | |||
132 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
133 | (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__)) |
||
134 | #include <avx512vpopcntdqvlintrin.h> |
||
135 | #endif |
||
136 | |||
137 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
138 | defined(__AVX512VNNI__) |
||
139 | #include <avx512vnniintrin.h> |
||
140 | #endif |
||
141 | |||
142 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
143 | (defined(__AVX512VL__) && defined(__AVX512VNNI__)) |
||
144 | #include <avx512vlvnniintrin.h> |
||
145 | #endif |
||
146 | |||
147 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
148 | defined(__AVXVNNI__) |
||
149 | #include <avxvnniintrin.h> |
||
150 | #endif |
||
151 | |||
152 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
153 | defined(__AVX512DQ__) |
||
154 | #include <avx512dqintrin.h> |
||
155 | #endif |
||
156 | |||
157 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
158 | (defined(__AVX512VL__) && defined(__AVX512BITALG__)) |
||
159 | #include <avx512vlbitalgintrin.h> |
||
160 | #endif |
||
161 | |||
162 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
163 | (defined(__AVX512VL__) && defined(__AVX512BW__)) |
||
164 | #include <avx512vlbwintrin.h> |
||
165 | #endif |
||
166 | |||
167 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
168 | (defined(__AVX512VL__) && defined(__AVX512CD__)) |
||
169 | #include <avx512vlcdintrin.h> |
||
170 | #endif |
||
171 | |||
172 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
173 | (defined(__AVX512VL__) && defined(__AVX512DQ__)) |
||
174 | #include <avx512vldqintrin.h> |
||
175 | #endif |
||
176 | |||
177 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
178 | defined(__AVX512ER__) |
||
179 | #include <avx512erintrin.h> |
||
180 | #endif |
||
181 | |||
182 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
183 | defined(__AVX512IFMA__) |
||
184 | #include <avx512ifmaintrin.h> |
||
185 | #endif |
||
186 | |||
187 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
188 | (defined(__AVX512IFMA__) && defined(__AVX512VL__)) |
||
189 | #include <avx512ifmavlintrin.h> |
||
190 | #endif |
||
191 | |||
192 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
193 | defined(__AVXIFMA__) |
||
194 | #include <avxifmaintrin.h> |
||
195 | #endif |
||
196 | |||
197 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
198 | defined(__AVX512VBMI__) |
||
199 | #include <avx512vbmiintrin.h> |
||
200 | #endif |
||
201 | |||
202 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
203 | (defined(__AVX512VBMI__) && defined(__AVX512VL__)) |
||
204 | #include <avx512vbmivlintrin.h> |
||
205 | #endif |
||
206 | |||
207 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
208 | defined(__AVX512VBMI2__) |
||
209 | #include <avx512vbmi2intrin.h> |
||
210 | #endif |
||
211 | |||
212 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
213 | (defined(__AVX512VBMI2__) && defined(__AVX512VL__)) |
||
214 | #include <avx512vlvbmi2intrin.h> |
||
215 | #endif |
||
216 | |||
217 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
218 | defined(__AVX512PF__) |
||
219 | #include <avx512pfintrin.h> |
||
220 | #endif |
||
221 | |||
222 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
223 | defined(__AVX512FP16__) |
||
224 | #include <avx512fp16intrin.h> |
||
225 | #endif |
||
226 | |||
227 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
228 | (defined(__AVX512VL__) && defined(__AVX512FP16__)) |
||
229 | #include <avx512vlfp16intrin.h> |
||
230 | #endif |
||
231 | |||
232 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
233 | defined(__AVX512BF16__) |
||
234 | #include <avx512bf16intrin.h> |
||
235 | #endif |
||
236 | |||
237 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
238 | (defined(__AVX512VL__) && defined(__AVX512BF16__)) |
||
239 | #include <avx512vlbf16intrin.h> |
||
240 | #endif |
||
241 | |||
242 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
243 | defined(__PKU__) |
||
244 | #include <pkuintrin.h> |
||
245 | #endif |
||
246 | |||
247 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
248 | defined(__VPCLMULQDQ__) |
||
249 | #include <vpclmulqdqintrin.h> |
||
250 | #endif |
||
251 | |||
252 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
253 | defined(__VAES__) |
||
254 | #include <vaesintrin.h> |
||
255 | #endif |
||
256 | |||
257 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
258 | defined(__GFNI__) |
||
259 | #include <gfniintrin.h> |
||
260 | #endif |
||
261 | |||
262 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
263 | defined(__AVXVNNIINT8__) |
||
264 | #include <avxvnniint8intrin.h> |
||
265 | #endif |
||
266 | |||
267 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
268 | defined(__AVXNECONVERT__) |
||
269 | #include <avxneconvertintrin.h> |
||
270 | #endif |
||
271 | |||
272 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
273 | defined(__RDPID__) |
||
274 | /// Returns the value of the IA32_TSC_AUX MSR (0xc0000103). |
||
275 | /// |
||
276 | /// \headerfile <immintrin.h> |
||
277 | /// |
||
278 | /// This intrinsic corresponds to the <c> RDPID </c> instruction. |
||
279 | static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid"))) |
||
280 | _rdpid_u32(void) { |
||
281 | return __builtin_ia32_rdpid(); |
||
282 | } |
||
283 | #endif // __RDPID__ |
||
284 | |||
285 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
286 | defined(__RDRND__) |
||
287 | static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) |
||
288 | _rdrand16_step(unsigned short *__p) |
||
289 | { |
||
290 | return (int)__builtin_ia32_rdrand16_step(__p); |
||
291 | } |
||
292 | |||
293 | static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) |
||
294 | _rdrand32_step(unsigned int *__p) |
||
295 | { |
||
296 | return (int)__builtin_ia32_rdrand32_step(__p); |
||
297 | } |
||
298 | |||
299 | #ifdef __x86_64__ |
||
300 | static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) |
||
301 | _rdrand64_step(unsigned long long *__p) |
||
302 | { |
||
303 | return (int)__builtin_ia32_rdrand64_step(__p); |
||
304 | } |
||
305 | #else |
||
306 | // We need to emulate the functionality of 64-bit rdrand with 2 32-bit |
||
307 | // rdrand instructions. |
||
308 | static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) |
||
309 | _rdrand64_step(unsigned long long *__p) |
||
310 | { |
||
311 | unsigned int __lo, __hi; |
||
312 | unsigned int __res_lo = __builtin_ia32_rdrand32_step(&__lo); |
||
313 | unsigned int __res_hi = __builtin_ia32_rdrand32_step(&__hi); |
||
314 | if (__res_lo && __res_hi) { |
||
315 | *__p = ((unsigned long long)__hi << 32) | (unsigned long long)__lo; |
||
316 | return 1; |
||
317 | } else { |
||
318 | *__p = 0; |
||
319 | return 0; |
||
320 | } |
||
321 | } |
||
322 | #endif |
||
323 | #endif /* __RDRND__ */ |
||
324 | |||
325 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
326 | defined(__FSGSBASE__) |
||
327 | #ifdef __x86_64__ |
||
328 | static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) |
||
329 | _readfsbase_u32(void) |
||
330 | { |
||
331 | return __builtin_ia32_rdfsbase32(); |
||
332 | } |
||
333 | |||
334 | static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) |
||
335 | _readfsbase_u64(void) |
||
336 | { |
||
337 | return __builtin_ia32_rdfsbase64(); |
||
338 | } |
||
339 | |||
340 | static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) |
||
341 | _readgsbase_u32(void) |
||
342 | { |
||
343 | return __builtin_ia32_rdgsbase32(); |
||
344 | } |
||
345 | |||
346 | static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) |
||
347 | _readgsbase_u64(void) |
||
348 | { |
||
349 | return __builtin_ia32_rdgsbase64(); |
||
350 | } |
||
351 | |||
352 | static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) |
||
353 | _writefsbase_u32(unsigned int __V) |
||
354 | { |
||
355 | __builtin_ia32_wrfsbase32(__V); |
||
356 | } |
||
357 | |||
358 | static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) |
||
359 | _writefsbase_u64(unsigned long long __V) |
||
360 | { |
||
361 | __builtin_ia32_wrfsbase64(__V); |
||
362 | } |
||
363 | |||
364 | static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) |
||
365 | _writegsbase_u32(unsigned int __V) |
||
366 | { |
||
367 | __builtin_ia32_wrgsbase32(__V); |
||
368 | } |
||
369 | |||
370 | static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) |
||
371 | _writegsbase_u64(unsigned long long __V) |
||
372 | { |
||
373 | __builtin_ia32_wrgsbase64(__V); |
||
374 | } |
||
375 | |||
376 | #endif |
||
377 | #endif /* __FSGSBASE__ */ |
||
378 | |||
379 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
380 | defined(__MOVBE__) |
||
381 | |||
382 | /* The structs used below are to force the load/store to be unaligned. This |
||
383 | * is accomplished with the __packed__ attribute. The __may_alias__ prevents |
||
384 | * tbaa metadata from being generated based on the struct and the type of the |
||
385 | * field inside of it. |
||
386 | */ |
||
387 | |||
388 | static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) |
||
389 | _loadbe_i16(void const * __P) { |
||
390 | struct __loadu_i16 { |
||
391 | unsigned short __v; |
||
392 | } __attribute__((__packed__, __may_alias__)); |
||
393 | return (short)__builtin_bswap16(((const struct __loadu_i16*)__P)->__v); |
||
394 | } |
||
395 | |||
396 | static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) |
||
397 | _storebe_i16(void * __P, short __D) { |
||
398 | struct __storeu_i16 { |
||
399 | unsigned short __v; |
||
400 | } __attribute__((__packed__, __may_alias__)); |
||
401 | ((struct __storeu_i16*)__P)->__v = __builtin_bswap16((unsigned short)__D); |
||
402 | } |
||
403 | |||
404 | static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) |
||
405 | _loadbe_i32(void const * __P) { |
||
406 | struct __loadu_i32 { |
||
407 | unsigned int __v; |
||
408 | } __attribute__((__packed__, __may_alias__)); |
||
409 | return (int)__builtin_bswap32(((const struct __loadu_i32*)__P)->__v); |
||
410 | } |
||
411 | |||
412 | static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) |
||
413 | _storebe_i32(void * __P, int __D) { |
||
414 | struct __storeu_i32 { |
||
415 | unsigned int __v; |
||
416 | } __attribute__((__packed__, __may_alias__)); |
||
417 | ((struct __storeu_i32*)__P)->__v = __builtin_bswap32((unsigned int)__D); |
||
418 | } |
||
419 | |||
420 | #ifdef __x86_64__ |
||
421 | static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) |
||
422 | _loadbe_i64(void const * __P) { |
||
423 | struct __loadu_i64 { |
||
424 | unsigned long long __v; |
||
425 | } __attribute__((__packed__, __may_alias__)); |
||
426 | return (long long)__builtin_bswap64(((const struct __loadu_i64*)__P)->__v); |
||
427 | } |
||
428 | |||
429 | static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) |
||
430 | _storebe_i64(void * __P, long long __D) { |
||
431 | struct __storeu_i64 { |
||
432 | unsigned long long __v; |
||
433 | } __attribute__((__packed__, __may_alias__)); |
||
434 | ((struct __storeu_i64*)__P)->__v = __builtin_bswap64((unsigned long long)__D); |
||
435 | } |
||
436 | #endif |
||
437 | #endif /* __MOVBE */ |
||
438 | |||
439 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
440 | defined(__RTM__) |
||
441 | #include <rtmintrin.h> |
||
442 | #include <xtestintrin.h> |
||
443 | #endif |
||
444 | |||
445 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
446 | defined(__SHA__) |
||
447 | #include <shaintrin.h> |
||
448 | #endif |
||
449 | |||
450 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
451 | defined(__FXSR__) |
||
452 | #include <fxsrintrin.h> |
||
453 | #endif |
||
454 | |||
455 | /* No feature check desired due to internal MSC_VER checks */ |
||
456 | #include <xsaveintrin.h> |
||
457 | |||
458 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
459 | defined(__XSAVEOPT__) |
||
460 | #include <xsaveoptintrin.h> |
||
461 | #endif |
||
462 | |||
463 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
464 | defined(__XSAVEC__) |
||
465 | #include <xsavecintrin.h> |
||
466 | #endif |
||
467 | |||
468 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
469 | defined(__XSAVES__) |
||
470 | #include <xsavesintrin.h> |
||
471 | #endif |
||
472 | |||
473 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
474 | defined(__SHSTK__) |
||
475 | #include <cetintrin.h> |
||
476 | #endif |
||
477 | |||
478 | /* Some intrinsics inside adxintrin.h are available only on processors with ADX, |
||
479 | * whereas others are also available at all times. */ |
||
480 | #include <adxintrin.h> |
||
481 | |||
482 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
483 | defined(__RDSEED__) |
||
484 | #include <rdseedintrin.h> |
||
485 | #endif |
||
486 | |||
487 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
488 | defined(__WBNOINVD__) |
||
489 | #include <wbnoinvdintrin.h> |
||
490 | #endif |
||
491 | |||
492 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
493 | defined(__CLDEMOTE__) |
||
494 | #include <cldemoteintrin.h> |
||
495 | #endif |
||
496 | |||
497 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
498 | defined(__WAITPKG__) |
||
499 | #include <waitpkgintrin.h> |
||
500 | #endif |
||
501 | |||
502 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
503 | defined(__MOVDIRI__) || defined(__MOVDIR64B__) |
||
504 | #include <movdirintrin.h> |
||
505 | #endif |
||
506 | |||
507 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
508 | defined(__PCONFIG__) |
||
509 | #include <pconfigintrin.h> |
||
510 | #endif |
||
511 | |||
512 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
513 | defined(__SGX__) |
||
514 | #include <sgxintrin.h> |
||
515 | #endif |
||
516 | |||
517 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
518 | defined(__PTWRITE__) |
||
519 | #include <ptwriteintrin.h> |
||
520 | #endif |
||
521 | |||
522 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
523 | defined(__INVPCID__) |
||
524 | #include <invpcidintrin.h> |
||
525 | #endif |
||
526 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
527 | defined(__AMX_FP16__) |
||
528 | #include <amxfp16intrin.h> |
||
529 | #endif |
||
530 | |||
531 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
532 | defined(__KL__) || defined(__WIDEKL__) |
||
533 | #include <keylockerintrin.h> |
||
534 | #endif |
||
535 | |||
536 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
537 | defined(__AMX_TILE__) || defined(__AMX_INT8__) || defined(__AMX_BF16__) |
||
538 | #include <amxintrin.h> |
||
539 | #endif |
||
540 | |||
541 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
542 | defined(__AVX512VP2INTERSECT__) |
||
543 | #include <avx512vp2intersectintrin.h> |
||
544 | #endif |
||
545 | |||
546 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
547 | (defined(__AVX512VL__) && defined(__AVX512VP2INTERSECT__)) |
||
548 | #include <avx512vlvp2intersectintrin.h> |
||
549 | #endif |
||
550 | |||
551 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
552 | defined(__ENQCMD__) |
||
553 | #include <enqcmdintrin.h> |
||
554 | #endif |
||
555 | |||
556 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
557 | defined(__SERIALIZE__) |
||
558 | #include <serializeintrin.h> |
||
559 | #endif |
||
560 | |||
561 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
562 | defined(__TSXLDTRK__) |
||
563 | #include <tsxldtrkintrin.h> |
||
564 | #endif |
||
565 | |||
566 | #if defined(_MSC_VER) && __has_extension(gnu_asm) |
||
567 | /* Define the default attributes for these intrinsics */ |
||
568 | #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) |
||
569 | #ifdef __cplusplus |
||
570 | extern "C" { |
||
571 | #endif |
||
572 | /*----------------------------------------------------------------------------*\ |
||
573 | |* Interlocked Exchange HLE |
||
574 | \*----------------------------------------------------------------------------*/ |
||
575 | #if defined(__i386__) || defined(__x86_64__) |
||
576 | static __inline__ long __DEFAULT_FN_ATTRS |
||
577 | _InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) { |
||
578 | __asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}" |
||
579 | : "+r" (_Value), "+m" (*_Target) :: "memory"); |
||
580 | return _Value; |
||
581 | } |
||
582 | static __inline__ long __DEFAULT_FN_ATTRS |
||
583 | _InterlockedExchange_HLERelease(long volatile *_Target, long _Value) { |
||
584 | __asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}" |
||
585 | : "+r" (_Value), "+m" (*_Target) :: "memory"); |
||
586 | return _Value; |
||
587 | } |
||
588 | #endif |
||
589 | #if defined(__x86_64__) |
||
590 | static __inline__ __int64 __DEFAULT_FN_ATTRS |
||
591 | _InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) { |
||
592 | __asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}" |
||
593 | : "+r" (_Value), "+m" (*_Target) :: "memory"); |
||
594 | return _Value; |
||
595 | } |
||
596 | static __inline__ __int64 __DEFAULT_FN_ATTRS |
||
597 | _InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) { |
||
598 | __asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}" |
||
599 | : "+r" (_Value), "+m" (*_Target) :: "memory"); |
||
600 | return _Value; |
||
601 | } |
||
602 | #endif |
||
603 | /*----------------------------------------------------------------------------*\ |
||
604 | |* Interlocked Compare Exchange HLE |
||
605 | \*----------------------------------------------------------------------------*/ |
||
606 | #if defined(__i386__) || defined(__x86_64__) |
||
607 | static __inline__ long __DEFAULT_FN_ATTRS |
||
608 | _InterlockedCompareExchange_HLEAcquire(long volatile *_Destination, |
||
609 | long _Exchange, long _Comparand) { |
||
610 | __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}" |
||
611 | : "+a" (_Comparand), "+m" (*_Destination) |
||
612 | : "r" (_Exchange) : "memory"); |
||
613 | return _Comparand; |
||
614 | } |
||
615 | static __inline__ long __DEFAULT_FN_ATTRS |
||
616 | _InterlockedCompareExchange_HLERelease(long volatile *_Destination, |
||
617 | long _Exchange, long _Comparand) { |
||
618 | __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}" |
||
619 | : "+a" (_Comparand), "+m" (*_Destination) |
||
620 | : "r" (_Exchange) : "memory"); |
||
621 | return _Comparand; |
||
622 | } |
||
623 | #endif |
||
624 | #if defined(__x86_64__) |
||
625 | static __inline__ __int64 __DEFAULT_FN_ATTRS |
||
626 | _InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination, |
||
627 | __int64 _Exchange, __int64 _Comparand) { |
||
628 | __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}" |
||
629 | : "+a" (_Comparand), "+m" (*_Destination) |
||
630 | : "r" (_Exchange) : "memory"); |
||
631 | return _Comparand; |
||
632 | } |
||
633 | static __inline__ __int64 __DEFAULT_FN_ATTRS |
||
634 | _InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination, |
||
635 | __int64 _Exchange, __int64 _Comparand) { |
||
636 | __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}" |
||
637 | : "+a" (_Comparand), "+m" (*_Destination) |
||
638 | : "r" (_Exchange) : "memory"); |
||
639 | return _Comparand; |
||
640 | } |
||
641 | #endif |
||
642 | #ifdef __cplusplus |
||
643 | } |
||
644 | #endif |
||
645 | |||
646 | #undef __DEFAULT_FN_ATTRS |
||
647 | |||
648 | #endif /* defined(_MSC_VER) && __has_extension(gnu_asm) */ |
||
649 | |||
650 | #endif /* __IMMINTRIN_H */ |