Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
14 | pmbaty | 1 | /*===---- avx512vlcdintrin.h - AVX512VL and AVX512CD intrinsics ------------=== |
2 | * |
||
3 | * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
4 | * See https://llvm.org/LICENSE.txt for license information. |
||
5 | * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
6 | * |
||
7 | *===-----------------------------------------------------------------------=== |
||
8 | */ |
||
9 | #ifndef __IMMINTRIN_H |
||
10 | #error "Never use <avx512vlcdintrin.h> directly; include <immintrin.h> instead." |
||
11 | #endif |
||
12 | |||
13 | #ifndef __AVX512VLCDINTRIN_H |
||
14 | #define __AVX512VLCDINTRIN_H |
||
15 | |||
16 | /* Define the default attributes for the functions in this file. */ |
||
17 | #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512cd"), __min_vector_width__(128))) |
||
18 | #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512cd"), __min_vector_width__(256))) |
||
19 | |||
20 | |||
21 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
22 | _mm_broadcastmb_epi64 (__mmask8 __A) |
||
23 | { |
||
24 | return (__m128i) _mm_set1_epi64x((long long) __A); |
||
25 | } |
||
26 | |||
27 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
28 | _mm256_broadcastmb_epi64 (__mmask8 __A) |
||
29 | { |
||
30 | return (__m256i) _mm256_set1_epi64x((long long)__A); |
||
31 | } |
||
32 | |||
33 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
34 | _mm_broadcastmw_epi32 (__mmask16 __A) |
||
35 | { |
||
36 | return (__m128i) _mm_set1_epi32((int)__A); |
||
37 | } |
||
38 | |||
39 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
40 | _mm256_broadcastmw_epi32 (__mmask16 __A) |
||
41 | { |
||
42 | return (__m256i) _mm256_set1_epi32((int)__A); |
||
43 | } |
||
44 | |||
45 | |||
46 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
47 | _mm_conflict_epi64 (__m128i __A) |
||
48 | { |
||
49 | return (__m128i) __builtin_ia32_vpconflictdi_128 ((__v2di) __A); |
||
50 | } |
||
51 | |||
52 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
53 | _mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A) |
||
54 | { |
||
55 | return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, |
||
56 | (__v2di)_mm_conflict_epi64(__A), |
||
57 | (__v2di)__W); |
||
58 | } |
||
59 | |||
60 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
61 | _mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A) |
||
62 | { |
||
63 | return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, |
||
64 | (__v2di)_mm_conflict_epi64(__A), |
||
65 | (__v2di)_mm_setzero_si128()); |
||
66 | } |
||
67 | |||
68 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
69 | _mm256_conflict_epi64 (__m256i __A) |
||
70 | { |
||
71 | return (__m256i) __builtin_ia32_vpconflictdi_256 ((__v4di) __A); |
||
72 | } |
||
73 | |||
74 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
75 | _mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A) |
||
76 | { |
||
77 | return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, |
||
78 | (__v4di)_mm256_conflict_epi64(__A), |
||
79 | (__v4di)__W); |
||
80 | } |
||
81 | |||
82 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
83 | _mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A) |
||
84 | { |
||
85 | return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, |
||
86 | (__v4di)_mm256_conflict_epi64(__A), |
||
87 | (__v4di)_mm256_setzero_si256()); |
||
88 | } |
||
89 | |||
90 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
91 | _mm_conflict_epi32 (__m128i __A) |
||
92 | { |
||
93 | return (__m128i) __builtin_ia32_vpconflictsi_128 ((__v4si) __A); |
||
94 | } |
||
95 | |||
96 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
97 | _mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A) |
||
98 | { |
||
99 | return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, |
||
100 | (__v4si)_mm_conflict_epi32(__A), |
||
101 | (__v4si)__W); |
||
102 | } |
||
103 | |||
104 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
105 | _mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A) |
||
106 | { |
||
107 | return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, |
||
108 | (__v4si)_mm_conflict_epi32(__A), |
||
109 | (__v4si)_mm_setzero_si128()); |
||
110 | } |
||
111 | |||
112 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
113 | _mm256_conflict_epi32 (__m256i __A) |
||
114 | { |
||
115 | return (__m256i) __builtin_ia32_vpconflictsi_256 ((__v8si) __A); |
||
116 | } |
||
117 | |||
118 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
119 | _mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A) |
||
120 | { |
||
121 | return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, |
||
122 | (__v8si)_mm256_conflict_epi32(__A), |
||
123 | (__v8si)__W); |
||
124 | } |
||
125 | |||
126 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
127 | _mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A) |
||
128 | { |
||
129 | return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, |
||
130 | (__v8si)_mm256_conflict_epi32(__A), |
||
131 | (__v8si)_mm256_setzero_si256()); |
||
132 | } |
||
133 | |||
134 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
135 | _mm_lzcnt_epi32 (__m128i __A) |
||
136 | { |
||
137 | return (__m128i) __builtin_ia32_vplzcntd_128 ((__v4si) __A); |
||
138 | } |
||
139 | |||
140 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
141 | _mm_mask_lzcnt_epi32 (__m128i __W, __mmask8 __U, __m128i __A) |
||
142 | { |
||
143 | return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, |
||
144 | (__v4si)_mm_lzcnt_epi32(__A), |
||
145 | (__v4si)__W); |
||
146 | } |
||
147 | |||
148 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
149 | _mm_maskz_lzcnt_epi32 (__mmask8 __U, __m128i __A) |
||
150 | { |
||
151 | return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, |
||
152 | (__v4si)_mm_lzcnt_epi32(__A), |
||
153 | (__v4si)_mm_setzero_si128()); |
||
154 | } |
||
155 | |||
156 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
157 | _mm256_lzcnt_epi32 (__m256i __A) |
||
158 | { |
||
159 | return (__m256i) __builtin_ia32_vplzcntd_256 ((__v8si) __A); |
||
160 | } |
||
161 | |||
162 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
163 | _mm256_mask_lzcnt_epi32 (__m256i __W, __mmask8 __U, __m256i __A) |
||
164 | { |
||
165 | return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, |
||
166 | (__v8si)_mm256_lzcnt_epi32(__A), |
||
167 | (__v8si)__W); |
||
168 | } |
||
169 | |||
170 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
171 | _mm256_maskz_lzcnt_epi32 (__mmask8 __U, __m256i __A) |
||
172 | { |
||
173 | return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, |
||
174 | (__v8si)_mm256_lzcnt_epi32(__A), |
||
175 | (__v8si)_mm256_setzero_si256()); |
||
176 | } |
||
177 | |||
178 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
179 | _mm_lzcnt_epi64 (__m128i __A) |
||
180 | { |
||
181 | return (__m128i) __builtin_ia32_vplzcntq_128 ((__v2di) __A); |
||
182 | } |
||
183 | |||
184 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
185 | _mm_mask_lzcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A) |
||
186 | { |
||
187 | return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, |
||
188 | (__v2di)_mm_lzcnt_epi64(__A), |
||
189 | (__v2di)__W); |
||
190 | } |
||
191 | |||
192 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
||
193 | _mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A) |
||
194 | { |
||
195 | return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, |
||
196 | (__v2di)_mm_lzcnt_epi64(__A), |
||
197 | (__v2di)_mm_setzero_si128()); |
||
198 | } |
||
199 | |||
200 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
201 | _mm256_lzcnt_epi64 (__m256i __A) |
||
202 | { |
||
203 | return (__m256i) __builtin_ia32_vplzcntq_256 ((__v4di) __A); |
||
204 | } |
||
205 | |||
206 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
207 | _mm256_mask_lzcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A) |
||
208 | { |
||
209 | return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, |
||
210 | (__v4di)_mm256_lzcnt_epi64(__A), |
||
211 | (__v4di)__W); |
||
212 | } |
||
213 | |||
214 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
||
215 | _mm256_maskz_lzcnt_epi64 (__mmask8 __U, __m256i __A) |
||
216 | { |
||
217 | return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, |
||
218 | (__v4di)_mm256_lzcnt_epi64(__A), |
||
219 | (__v4di)_mm256_setzero_si256()); |
||
220 | } |
||
221 | |||
222 | #undef __DEFAULT_FN_ATTRS128 |
||
223 | #undef __DEFAULT_FN_ATTRS256 |
||
224 | |||
225 | #endif /* __AVX512VLCDINTRIN_H */ |