Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
14 | pmbaty | 1 | /*===---- bmiintrin.h - BMI intrinsics -------------------------------------=== |
2 | * |
||
3 | * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
4 | * See https://llvm.org/LICENSE.txt for license information. |
||
5 | * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
6 | * |
||
7 | *===-----------------------------------------------------------------------=== |
||
8 | */ |
||
9 | |||
10 | #if !defined __X86INTRIN_H && !defined __IMMINTRIN_H |
||
11 | #error "Never use <bmiintrin.h> directly; include <x86intrin.h> instead." |
||
12 | #endif |
||
13 | |||
14 | #ifndef __BMIINTRIN_H |
||
15 | #define __BMIINTRIN_H |
||
16 | |||
17 | /* Allow using the tzcnt intrinsics even for non-BMI targets. Since the TZCNT |
||
18 | instruction behaves as BSF on non-BMI targets, there is code that expects |
||
19 | to use it as a potentially faster version of BSF. */ |
||
20 | #define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) |
||
21 | |||
22 | #define _tzcnt_u16(a) (__tzcnt_u16((a))) |
||
23 | |||
24 | /// Counts the number of trailing zero bits in the operand. |
||
25 | /// |
||
26 | /// \headerfile <x86intrin.h> |
||
27 | /// |
||
28 | /// This intrinsic corresponds to the <c> TZCNT </c> instruction. |
||
29 | /// |
||
30 | /// \param __X |
||
31 | /// An unsigned 16-bit integer whose trailing zeros are to be counted. |
||
32 | /// \returns An unsigned 16-bit integer containing the number of trailing zero |
||
33 | /// bits in the operand. |
||
34 | static __inline__ unsigned short __RELAXED_FN_ATTRS |
||
35 | __tzcnt_u16(unsigned short __X) |
||
36 | { |
||
37 | return __builtin_ia32_tzcnt_u16(__X); |
||
38 | } |
||
39 | |||
40 | /// Counts the number of trailing zero bits in the operand. |
||
41 | /// |
||
42 | /// \headerfile <x86intrin.h> |
||
43 | /// |
||
44 | /// This intrinsic corresponds to the <c> TZCNT </c> instruction. |
||
45 | /// |
||
46 | /// \param __X |
||
47 | /// An unsigned 32-bit integer whose trailing zeros are to be counted. |
||
48 | /// \returns An unsigned 32-bit integer containing the number of trailing zero |
||
49 | /// bits in the operand. |
||
50 | /// \see _mm_tzcnt_32 |
||
51 | static __inline__ unsigned int __RELAXED_FN_ATTRS |
||
52 | __tzcnt_u32(unsigned int __X) |
||
53 | { |
||
54 | return __builtin_ia32_tzcnt_u32(__X); |
||
55 | } |
||
56 | |||
57 | /// Counts the number of trailing zero bits in the operand. |
||
58 | /// |
||
59 | /// \headerfile <x86intrin.h> |
||
60 | /// |
||
61 | /// This intrinsic corresponds to the <c> TZCNT </c> instruction. |
||
62 | /// |
||
63 | /// \param __X |
||
64 | /// An unsigned 32-bit integer whose trailing zeros are to be counted. |
||
65 | /// \returns An 32-bit integer containing the number of trailing zero bits in |
||
66 | /// the operand. |
||
67 | /// \see __tzcnt_u32 |
||
68 | static __inline__ int __RELAXED_FN_ATTRS |
||
69 | _mm_tzcnt_32(unsigned int __X) |
||
70 | { |
||
71 | return (int)__builtin_ia32_tzcnt_u32(__X); |
||
72 | } |
||
73 | |||
74 | #define _tzcnt_u32(a) (__tzcnt_u32((a))) |
||
75 | |||
76 | #ifdef __x86_64__ |
||
77 | |||
78 | /// Counts the number of trailing zero bits in the operand. |
||
79 | /// |
||
80 | /// \headerfile <x86intrin.h> |
||
81 | /// |
||
82 | /// This intrinsic corresponds to the <c> TZCNT </c> instruction. |
||
83 | /// |
||
84 | /// \param __X |
||
85 | /// An unsigned 64-bit integer whose trailing zeros are to be counted. |
||
86 | /// \returns An unsigned 64-bit integer containing the number of trailing zero |
||
87 | /// bits in the operand. |
||
88 | /// \see _mm_tzcnt_64 |
||
89 | static __inline__ unsigned long long __RELAXED_FN_ATTRS |
||
90 | __tzcnt_u64(unsigned long long __X) |
||
91 | { |
||
92 | return __builtin_ia32_tzcnt_u64(__X); |
||
93 | } |
||
94 | |||
95 | /// Counts the number of trailing zero bits in the operand. |
||
96 | /// |
||
97 | /// \headerfile <x86intrin.h> |
||
98 | /// |
||
99 | /// This intrinsic corresponds to the <c> TZCNT </c> instruction. |
||
100 | /// |
||
101 | /// \param __X |
||
102 | /// An unsigned 64-bit integer whose trailing zeros are to be counted. |
||
103 | /// \returns An 64-bit integer containing the number of trailing zero bits in |
||
104 | /// the operand. |
||
105 | /// \see __tzcnt_u64 |
||
106 | static __inline__ long long __RELAXED_FN_ATTRS |
||
107 | _mm_tzcnt_64(unsigned long long __X) |
||
108 | { |
||
109 | return (long long)__builtin_ia32_tzcnt_u64(__X); |
||
110 | } |
||
111 | |||
112 | #define _tzcnt_u64(a) (__tzcnt_u64((a))) |
||
113 | |||
114 | #endif /* __x86_64__ */ |
||
115 | |||
116 | #undef __RELAXED_FN_ATTRS |
||
117 | |||
118 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
119 | defined(__BMI__) |
||
120 | |||
121 | /* Define the default attributes for the functions in this file. */ |
||
122 | #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi"))) |
||
123 | |||
124 | #define _andn_u32(a, b) (__andn_u32((a), (b))) |
||
125 | |||
126 | /* _bextr_u32 != __bextr_u32 */ |
||
127 | #define _blsi_u32(a) (__blsi_u32((a))) |
||
128 | |||
129 | #define _blsmsk_u32(a) (__blsmsk_u32((a))) |
||
130 | |||
131 | #define _blsr_u32(a) (__blsr_u32((a))) |
||
132 | |||
133 | /// Performs a bitwise AND of the second operand with the one's |
||
134 | /// complement of the first operand. |
||
135 | /// |
||
136 | /// \headerfile <x86intrin.h> |
||
137 | /// |
||
138 | /// This intrinsic corresponds to the <c> ANDN </c> instruction. |
||
139 | /// |
||
140 | /// \param __X |
||
141 | /// An unsigned integer containing one of the operands. |
||
142 | /// \param __Y |
||
143 | /// An unsigned integer containing one of the operands. |
||
144 | /// \returns An unsigned integer containing the bitwise AND of the second |
||
145 | /// operand with the one's complement of the first operand. |
||
146 | static __inline__ unsigned int __DEFAULT_FN_ATTRS |
||
147 | __andn_u32(unsigned int __X, unsigned int __Y) |
||
148 | { |
||
149 | return ~__X & __Y; |
||
150 | } |
||
151 | |||
152 | /* AMD-specified, double-leading-underscore version of BEXTR */ |
||
153 | /// Extracts the specified bits from the first operand and returns them |
||
154 | /// in the least significant bits of the result. |
||
155 | /// |
||
156 | /// \headerfile <x86intrin.h> |
||
157 | /// |
||
158 | /// This intrinsic corresponds to the <c> BEXTR </c> instruction. |
||
159 | /// |
||
160 | /// \param __X |
||
161 | /// An unsigned integer whose bits are to be extracted. |
||
162 | /// \param __Y |
||
163 | /// An unsigned integer used to specify which bits are extracted. Bits [7:0] |
||
164 | /// specify the index of the least significant bit. Bits [15:8] specify the |
||
165 | /// number of bits to be extracted. |
||
166 | /// \returns An unsigned integer whose least significant bits contain the |
||
167 | /// extracted bits. |
||
168 | /// \see _bextr_u32 |
||
169 | static __inline__ unsigned int __DEFAULT_FN_ATTRS |
||
170 | __bextr_u32(unsigned int __X, unsigned int __Y) |
||
171 | { |
||
172 | return __builtin_ia32_bextr_u32(__X, __Y); |
||
173 | } |
||
174 | |||
175 | /* Intel-specified, single-leading-underscore version of BEXTR */ |
||
176 | /// Extracts the specified bits from the first operand and returns them |
||
177 | /// in the least significant bits of the result. |
||
178 | /// |
||
179 | /// \headerfile <x86intrin.h> |
||
180 | /// |
||
181 | /// This intrinsic corresponds to the <c> BEXTR </c> instruction. |
||
182 | /// |
||
183 | /// \param __X |
||
184 | /// An unsigned integer whose bits are to be extracted. |
||
185 | /// \param __Y |
||
186 | /// An unsigned integer used to specify the index of the least significant |
||
187 | /// bit for the bits to be extracted. Bits [7:0] specify the index. |
||
188 | /// \param __Z |
||
189 | /// An unsigned integer used to specify the number of bits to be extracted. |
||
190 | /// Bits [7:0] specify the number of bits. |
||
191 | /// \returns An unsigned integer whose least significant bits contain the |
||
192 | /// extracted bits. |
||
193 | /// \see __bextr_u32 |
||
194 | static __inline__ unsigned int __DEFAULT_FN_ATTRS |
||
195 | _bextr_u32(unsigned int __X, unsigned int __Y, unsigned int __Z) |
||
196 | { |
||
197 | return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8))); |
||
198 | } |
||
199 | |||
200 | /* Intel-specified, single-leading-underscore version of BEXTR2 */ |
||
201 | /// Extracts the specified bits from the first operand and returns them |
||
202 | /// in the least significant bits of the result. |
||
203 | /// |
||
204 | /// \headerfile <x86intrin.h> |
||
205 | /// |
||
206 | /// This intrinsic corresponds to the <c> BEXTR </c> instruction. |
||
207 | /// |
||
208 | /// \param __X |
||
209 | /// An unsigned integer whose bits are to be extracted. |
||
210 | /// \param __Y |
||
211 | /// An unsigned integer used to specify which bits are extracted. Bits [7:0] |
||
212 | /// specify the index of the least significant bit. Bits [15:8] specify the |
||
213 | /// number of bits to be extracted. |
||
214 | /// \returns An unsigned integer whose least significant bits contain the |
||
215 | /// extracted bits. |
||
216 | /// \see __bextr_u32 |
||
217 | static __inline__ unsigned int __DEFAULT_FN_ATTRS |
||
218 | _bextr2_u32(unsigned int __X, unsigned int __Y) { |
||
219 | return __builtin_ia32_bextr_u32(__X, __Y); |
||
220 | } |
||
221 | |||
222 | /// Clears all bits in the source except for the least significant bit |
||
223 | /// containing a value of 1 and returns the result. |
||
224 | /// |
||
225 | /// \headerfile <x86intrin.h> |
||
226 | /// |
||
227 | /// This intrinsic corresponds to the <c> BLSI </c> instruction. |
||
228 | /// |
||
229 | /// \param __X |
||
230 | /// An unsigned integer whose bits are to be cleared. |
||
231 | /// \returns An unsigned integer containing the result of clearing the bits from |
||
232 | /// the source operand. |
||
233 | static __inline__ unsigned int __DEFAULT_FN_ATTRS |
||
234 | __blsi_u32(unsigned int __X) |
||
235 | { |
||
236 | return __X & -__X; |
||
237 | } |
||
238 | |||
239 | /// Creates a mask whose bits are set to 1, using bit 0 up to and |
||
240 | /// including the least significant bit that is set to 1 in the source |
||
241 | /// operand and returns the result. |
||
242 | /// |
||
243 | /// \headerfile <x86intrin.h> |
||
244 | /// |
||
245 | /// This intrinsic corresponds to the <c> BLSMSK </c> instruction. |
||
246 | /// |
||
247 | /// \param __X |
||
248 | /// An unsigned integer used to create the mask. |
||
249 | /// \returns An unsigned integer containing the newly created mask. |
||
250 | static __inline__ unsigned int __DEFAULT_FN_ATTRS |
||
251 | __blsmsk_u32(unsigned int __X) |
||
252 | { |
||
253 | return __X ^ (__X - 1); |
||
254 | } |
||
255 | |||
256 | /// Clears the least significant bit that is set to 1 in the source |
||
257 | /// operand and returns the result. |
||
258 | /// |
||
259 | /// \headerfile <x86intrin.h> |
||
260 | /// |
||
261 | /// This intrinsic corresponds to the <c> BLSR </c> instruction. |
||
262 | /// |
||
263 | /// \param __X |
||
264 | /// An unsigned integer containing the operand to be cleared. |
||
265 | /// \returns An unsigned integer containing the result of clearing the source |
||
266 | /// operand. |
||
267 | static __inline__ unsigned int __DEFAULT_FN_ATTRS |
||
268 | __blsr_u32(unsigned int __X) |
||
269 | { |
||
270 | return __X & (__X - 1); |
||
271 | } |
||
272 | |||
273 | #ifdef __x86_64__ |
||
274 | |||
275 | #define _andn_u64(a, b) (__andn_u64((a), (b))) |
||
276 | |||
277 | /* _bextr_u64 != __bextr_u64 */ |
||
278 | #define _blsi_u64(a) (__blsi_u64((a))) |
||
279 | |||
280 | #define _blsmsk_u64(a) (__blsmsk_u64((a))) |
||
281 | |||
282 | #define _blsr_u64(a) (__blsr_u64((a))) |
||
283 | |||
284 | /// Performs a bitwise AND of the second operand with the one's |
||
285 | /// complement of the first operand. |
||
286 | /// |
||
287 | /// \headerfile <x86intrin.h> |
||
288 | /// |
||
289 | /// This intrinsic corresponds to the <c> ANDN </c> instruction. |
||
290 | /// |
||
291 | /// \param __X |
||
292 | /// An unsigned 64-bit integer containing one of the operands. |
||
293 | /// \param __Y |
||
294 | /// An unsigned 64-bit integer containing one of the operands. |
||
295 | /// \returns An unsigned 64-bit integer containing the bitwise AND of the second |
||
296 | /// operand with the one's complement of the first operand. |
||
297 | static __inline__ unsigned long long __DEFAULT_FN_ATTRS |
||
298 | __andn_u64 (unsigned long long __X, unsigned long long __Y) |
||
299 | { |
||
300 | return ~__X & __Y; |
||
301 | } |
||
302 | |||
303 | /* AMD-specified, double-leading-underscore version of BEXTR */ |
||
304 | /// Extracts the specified bits from the first operand and returns them |
||
305 | /// in the least significant bits of the result. |
||
306 | /// |
||
307 | /// \headerfile <x86intrin.h> |
||
308 | /// |
||
309 | /// This intrinsic corresponds to the <c> BEXTR </c> instruction. |
||
310 | /// |
||
311 | /// \param __X |
||
312 | /// An unsigned 64-bit integer whose bits are to be extracted. |
||
313 | /// \param __Y |
||
314 | /// An unsigned 64-bit integer used to specify which bits are extracted. Bits |
||
315 | /// [7:0] specify the index of the least significant bit. Bits [15:8] specify |
||
316 | /// the number of bits to be extracted. |
||
317 | /// \returns An unsigned 64-bit integer whose least significant bits contain the |
||
318 | /// extracted bits. |
||
319 | /// \see _bextr_u64 |
||
320 | static __inline__ unsigned long long __DEFAULT_FN_ATTRS |
||
321 | __bextr_u64(unsigned long long __X, unsigned long long __Y) |
||
322 | { |
||
323 | return __builtin_ia32_bextr_u64(__X, __Y); |
||
324 | } |
||
325 | |||
326 | /* Intel-specified, single-leading-underscore version of BEXTR */ |
||
327 | /// Extracts the specified bits from the first operand and returns them |
||
328 | /// in the least significant bits of the result. |
||
329 | /// |
||
330 | /// \headerfile <x86intrin.h> |
||
331 | /// |
||
332 | /// This intrinsic corresponds to the <c> BEXTR </c> instruction. |
||
333 | /// |
||
334 | /// \param __X |
||
335 | /// An unsigned 64-bit integer whose bits are to be extracted. |
||
336 | /// \param __Y |
||
337 | /// An unsigned integer used to specify the index of the least significant |
||
338 | /// bit for the bits to be extracted. Bits [7:0] specify the index. |
||
339 | /// \param __Z |
||
340 | /// An unsigned integer used to specify the number of bits to be extracted. |
||
341 | /// Bits [7:0] specify the number of bits. |
||
342 | /// \returns An unsigned 64-bit integer whose least significant bits contain the |
||
343 | /// extracted bits. |
||
344 | /// \see __bextr_u64 |
||
345 | static __inline__ unsigned long long __DEFAULT_FN_ATTRS |
||
346 | _bextr_u64(unsigned long long __X, unsigned int __Y, unsigned int __Z) |
||
347 | { |
||
348 | return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8))); |
||
349 | } |
||
350 | |||
351 | /* Intel-specified, single-leading-underscore version of BEXTR2 */ |
||
352 | /// Extracts the specified bits from the first operand and returns them |
||
353 | /// in the least significant bits of the result. |
||
354 | /// |
||
355 | /// \headerfile <x86intrin.h> |
||
356 | /// |
||
357 | /// This intrinsic corresponds to the <c> BEXTR </c> instruction. |
||
358 | /// |
||
359 | /// \param __X |
||
360 | /// An unsigned 64-bit integer whose bits are to be extracted. |
||
361 | /// \param __Y |
||
362 | /// An unsigned 64-bit integer used to specify which bits are extracted. Bits |
||
363 | /// [7:0] specify the index of the least significant bit. Bits [15:8] specify |
||
364 | /// the number of bits to be extracted. |
||
365 | /// \returns An unsigned 64-bit integer whose least significant bits contain the |
||
366 | /// extracted bits. |
||
367 | /// \see __bextr_u64 |
||
368 | static __inline__ unsigned long long __DEFAULT_FN_ATTRS |
||
369 | _bextr2_u64(unsigned long long __X, unsigned long long __Y) { |
||
370 | return __builtin_ia32_bextr_u64(__X, __Y); |
||
371 | } |
||
372 | |||
373 | /// Clears all bits in the source except for the least significant bit |
||
374 | /// containing a value of 1 and returns the result. |
||
375 | /// |
||
376 | /// \headerfile <x86intrin.h> |
||
377 | /// |
||
378 | /// This intrinsic corresponds to the <c> BLSI </c> instruction. |
||
379 | /// |
||
380 | /// \param __X |
||
381 | /// An unsigned 64-bit integer whose bits are to be cleared. |
||
382 | /// \returns An unsigned 64-bit integer containing the result of clearing the |
||
383 | /// bits from the source operand. |
||
384 | static __inline__ unsigned long long __DEFAULT_FN_ATTRS |
||
385 | __blsi_u64(unsigned long long __X) |
||
386 | { |
||
387 | return __X & -__X; |
||
388 | } |
||
389 | |||
390 | /// Creates a mask whose bits are set to 1, using bit 0 up to and |
||
391 | /// including the least significant bit that is set to 1 in the source |
||
392 | /// operand and returns the result. |
||
393 | /// |
||
394 | /// \headerfile <x86intrin.h> |
||
395 | /// |
||
396 | /// This intrinsic corresponds to the <c> BLSMSK </c> instruction. |
||
397 | /// |
||
398 | /// \param __X |
||
399 | /// An unsigned 64-bit integer used to create the mask. |
||
400 | /// \returns An unsigned 64-bit integer containing the newly created mask. |
||
401 | static __inline__ unsigned long long __DEFAULT_FN_ATTRS |
||
402 | __blsmsk_u64(unsigned long long __X) |
||
403 | { |
||
404 | return __X ^ (__X - 1); |
||
405 | } |
||
406 | |||
407 | /// Clears the least significant bit that is set to 1 in the source |
||
408 | /// operand and returns the result. |
||
409 | /// |
||
410 | /// \headerfile <x86intrin.h> |
||
411 | /// |
||
412 | /// This intrinsic corresponds to the <c> BLSR </c> instruction. |
||
413 | /// |
||
414 | /// \param __X |
||
415 | /// An unsigned 64-bit integer containing the operand to be cleared. |
||
416 | /// \returns An unsigned 64-bit integer containing the result of clearing the |
||
417 | /// source operand. |
||
418 | static __inline__ unsigned long long __DEFAULT_FN_ATTRS |
||
419 | __blsr_u64(unsigned long long __X) |
||
420 | { |
||
421 | return __X & (__X - 1); |
||
422 | } |
||
423 | |||
424 | #endif /* __x86_64__ */ |
||
425 | |||
426 | #undef __DEFAULT_FN_ATTRS |
||
427 | |||
428 | #endif /* !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) \ |
||
429 | || defined(__BMI__) */ |
||
430 | |||
431 | #endif /* __BMIINTRIN_H */ |