Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
14 | pmbaty | 1 | /*===---- avx512vpopcntdqintrin.h - AVX512VPOPCNTDQ intrinsics -------------=== |
2 | * |
||
3 | * |
||
4 | * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
5 | * See https://llvm.org/LICENSE.txt for license information. |
||
6 | * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
7 | * |
||
8 | *===-----------------------------------------------------------------------=== |
||
9 | */ |
||
10 | #ifndef __IMMINTRIN_H |
||
11 | #error \ |
||
12 | "Never use <avx512vpopcntdqvlintrin.h> directly; include <immintrin.h> instead." |
||
13 | #endif |
||
14 | |||
15 | #ifndef __AVX512VPOPCNTDQVLINTRIN_H |
||
16 | #define __AVX512VPOPCNTDQVLINTRIN_H |
||
17 | |||
18 | /* Define the default attributes for the functions in this file. */ |
||
19 | #define __DEFAULT_FN_ATTRS128 \ |
||
20 | __attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntdq,avx512vl"), __min_vector_width__(128))) |
||
21 | #define __DEFAULT_FN_ATTRS256 \ |
||
22 | __attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntdq,avx512vl"), __min_vector_width__(256))) |
||
23 | |||
24 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
25 | _mm_popcnt_epi64(__m128i __A) { |
||
26 | return (__m128i)__builtin_ia32_vpopcntq_128((__v2di)__A); |
||
27 | } |
||
28 | |||
29 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
30 | _mm_mask_popcnt_epi64(__m128i __W, __mmask8 __U, __m128i __A) { |
||
31 | return (__m128i)__builtin_ia32_selectq_128( |
||
32 | (__mmask8)__U, (__v2di)_mm_popcnt_epi64(__A), (__v2di)__W); |
||
33 | } |
||
34 | |||
35 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
36 | _mm_maskz_popcnt_epi64(__mmask8 __U, __m128i __A) { |
||
37 | return _mm_mask_popcnt_epi64((__m128i)_mm_setzero_si128(), __U, __A); |
||
38 | } |
||
39 | |||
40 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
41 | _mm_popcnt_epi32(__m128i __A) { |
||
42 | return (__m128i)__builtin_ia32_vpopcntd_128((__v4si)__A); |
||
43 | } |
||
44 | |||
45 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
46 | _mm_mask_popcnt_epi32(__m128i __W, __mmask8 __U, __m128i __A) { |
||
47 | return (__m128i)__builtin_ia32_selectd_128( |
||
48 | (__mmask8)__U, (__v4si)_mm_popcnt_epi32(__A), (__v4si)__W); |
||
49 | } |
||
50 | |||
51 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
52 | _mm_maskz_popcnt_epi32(__mmask8 __U, __m128i __A) { |
||
53 | return _mm_mask_popcnt_epi32((__m128i)_mm_setzero_si128(), __U, __A); |
||
54 | } |
||
55 | |||
56 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
57 | _mm256_popcnt_epi64(__m256i __A) { |
||
58 | return (__m256i)__builtin_ia32_vpopcntq_256((__v4di)__A); |
||
59 | } |
||
60 | |||
61 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
62 | _mm256_mask_popcnt_epi64(__m256i __W, __mmask8 __U, __m256i __A) { |
||
63 | return (__m256i)__builtin_ia32_selectq_256( |
||
64 | (__mmask8)__U, (__v4di)_mm256_popcnt_epi64(__A), (__v4di)__W); |
||
65 | } |
||
66 | |||
67 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
68 | _mm256_maskz_popcnt_epi64(__mmask8 __U, __m256i __A) { |
||
69 | return _mm256_mask_popcnt_epi64((__m256i)_mm256_setzero_si256(), __U, __A); |
||
70 | } |
||
71 | |||
72 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
73 | _mm256_popcnt_epi32(__m256i __A) { |
||
74 | return (__m256i)__builtin_ia32_vpopcntd_256((__v8si)__A); |
||
75 | } |
||
76 | |||
77 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
78 | _mm256_mask_popcnt_epi32(__m256i __W, __mmask8 __U, __m256i __A) { |
||
79 | return (__m256i)__builtin_ia32_selectd_256( |
||
80 | (__mmask8)__U, (__v8si)_mm256_popcnt_epi32(__A), (__v8si)__W); |
||
81 | } |
||
82 | |||
83 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
84 | _mm256_maskz_popcnt_epi32(__mmask8 __U, __m256i __A) { |
||
85 | return _mm256_mask_popcnt_epi32((__m256i)_mm256_setzero_si256(), __U, __A); |
||
86 | } |
||
87 | |||
88 | #undef __DEFAULT_FN_ATTRS128 |
||
89 | #undef __DEFAULT_FN_ATTRS256 |
||
90 | |||
91 | #endif |