Details | Last modification | View Log | RSS feed
| Rev | Author | Line No. | Line |
|---|---|---|---|
| 14 | pmbaty | 1 | /*===---- bmiintrin.h - BMI intrinsics -------------------------------------=== |
| 2 | * |
||
| 3 | * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
| 4 | * See https://llvm.org/LICENSE.txt for license information. |
||
| 5 | * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
| 6 | * |
||
| 7 | *===-----------------------------------------------------------------------=== |
||
| 8 | */ |
||
| 9 | |||
| 10 | #if !defined __X86INTRIN_H && !defined __IMMINTRIN_H |
||
| 11 | #error "Never use <bmiintrin.h> directly; include <x86intrin.h> instead." |
||
| 12 | #endif |
||
| 13 | |||
| 14 | #ifndef __BMIINTRIN_H |
||
| 15 | #define __BMIINTRIN_H |
||
| 16 | |||
| 17 | /* Allow using the tzcnt intrinsics even for non-BMI targets. Since the TZCNT |
||
| 18 | instruction behaves as BSF on non-BMI targets, there is code that expects |
||
| 19 | to use it as a potentially faster version of BSF. */ |
||
| 20 | #define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) |
||
| 21 | |||
| 22 | #define _tzcnt_u16(a) (__tzcnt_u16((a))) |
||
| 23 | |||
| 24 | /// Counts the number of trailing zero bits in the operand. |
||
| 25 | /// |
||
| 26 | /// \headerfile <x86intrin.h> |
||
| 27 | /// |
||
| 28 | /// This intrinsic corresponds to the <c> TZCNT </c> instruction. |
||
| 29 | /// |
||
| 30 | /// \param __X |
||
| 31 | /// An unsigned 16-bit integer whose trailing zeros are to be counted. |
||
| 32 | /// \returns An unsigned 16-bit integer containing the number of trailing zero |
||
| 33 | /// bits in the operand. |
||
| 34 | static __inline__ unsigned short __RELAXED_FN_ATTRS |
||
| 35 | __tzcnt_u16(unsigned short __X) |
||
| 36 | { |
||
| 37 | return __builtin_ia32_tzcnt_u16(__X); |
||
| 38 | } |
||
| 39 | |||
| 40 | /// Counts the number of trailing zero bits in the operand. |
||
| 41 | /// |
||
| 42 | /// \headerfile <x86intrin.h> |
||
| 43 | /// |
||
| 44 | /// This intrinsic corresponds to the <c> TZCNT </c> instruction. |
||
| 45 | /// |
||
| 46 | /// \param __X |
||
| 47 | /// An unsigned 32-bit integer whose trailing zeros are to be counted. |
||
| 48 | /// \returns An unsigned 32-bit integer containing the number of trailing zero |
||
| 49 | /// bits in the operand. |
||
| 50 | /// \see _mm_tzcnt_32 |
||
| 51 | static __inline__ unsigned int __RELAXED_FN_ATTRS |
||
| 52 | __tzcnt_u32(unsigned int __X) |
||
| 53 | { |
||
| 54 | return __builtin_ia32_tzcnt_u32(__X); |
||
| 55 | } |
||
| 56 | |||
| 57 | /// Counts the number of trailing zero bits in the operand. |
||
| 58 | /// |
||
| 59 | /// \headerfile <x86intrin.h> |
||
| 60 | /// |
||
| 61 | /// This intrinsic corresponds to the <c> TZCNT </c> instruction. |
||
| 62 | /// |
||
| 63 | /// \param __X |
||
| 64 | /// An unsigned 32-bit integer whose trailing zeros are to be counted. |
||
| 65 | /// \returns An 32-bit integer containing the number of trailing zero bits in |
||
| 66 | /// the operand. |
||
| 67 | /// \see __tzcnt_u32 |
||
| 68 | static __inline__ int __RELAXED_FN_ATTRS |
||
| 69 | _mm_tzcnt_32(unsigned int __X) |
||
| 70 | { |
||
| 71 | return (int)__builtin_ia32_tzcnt_u32(__X); |
||
| 72 | } |
||
| 73 | |||
| 74 | #define _tzcnt_u32(a) (__tzcnt_u32((a))) |
||
| 75 | |||
| 76 | #ifdef __x86_64__ |
||
| 77 | |||
| 78 | /// Counts the number of trailing zero bits in the operand. |
||
| 79 | /// |
||
| 80 | /// \headerfile <x86intrin.h> |
||
| 81 | /// |
||
| 82 | /// This intrinsic corresponds to the <c> TZCNT </c> instruction. |
||
| 83 | /// |
||
| 84 | /// \param __X |
||
| 85 | /// An unsigned 64-bit integer whose trailing zeros are to be counted. |
||
| 86 | /// \returns An unsigned 64-bit integer containing the number of trailing zero |
||
| 87 | /// bits in the operand. |
||
| 88 | /// \see _mm_tzcnt_64 |
||
| 89 | static __inline__ unsigned long long __RELAXED_FN_ATTRS |
||
| 90 | __tzcnt_u64(unsigned long long __X) |
||
| 91 | { |
||
| 92 | return __builtin_ia32_tzcnt_u64(__X); |
||
| 93 | } |
||
| 94 | |||
| 95 | /// Counts the number of trailing zero bits in the operand. |
||
| 96 | /// |
||
| 97 | /// \headerfile <x86intrin.h> |
||
| 98 | /// |
||
| 99 | /// This intrinsic corresponds to the <c> TZCNT </c> instruction. |
||
| 100 | /// |
||
| 101 | /// \param __X |
||
| 102 | /// An unsigned 64-bit integer whose trailing zeros are to be counted. |
||
| 103 | /// \returns An 64-bit integer containing the number of trailing zero bits in |
||
| 104 | /// the operand. |
||
| 105 | /// \see __tzcnt_u64 |
||
| 106 | static __inline__ long long __RELAXED_FN_ATTRS |
||
| 107 | _mm_tzcnt_64(unsigned long long __X) |
||
| 108 | { |
||
| 109 | return (long long)__builtin_ia32_tzcnt_u64(__X); |
||
| 110 | } |
||
| 111 | |||
| 112 | #define _tzcnt_u64(a) (__tzcnt_u64((a))) |
||
| 113 | |||
| 114 | #endif /* __x86_64__ */ |
||
| 115 | |||
| 116 | #undef __RELAXED_FN_ATTRS |
||
| 117 | |||
| 118 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
||
| 119 | defined(__BMI__) |
||
| 120 | |||
| 121 | /* Define the default attributes for the functions in this file. */ |
||
| 122 | #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi"))) |
||
| 123 | |||
| 124 | #define _andn_u32(a, b) (__andn_u32((a), (b))) |
||
| 125 | |||
| 126 | /* _bextr_u32 != __bextr_u32 */ |
||
| 127 | #define _blsi_u32(a) (__blsi_u32((a))) |
||
| 128 | |||
| 129 | #define _blsmsk_u32(a) (__blsmsk_u32((a))) |
||
| 130 | |||
| 131 | #define _blsr_u32(a) (__blsr_u32((a))) |
||
| 132 | |||
| 133 | /// Performs a bitwise AND of the second operand with the one's |
||
| 134 | /// complement of the first operand. |
||
| 135 | /// |
||
| 136 | /// \headerfile <x86intrin.h> |
||
| 137 | /// |
||
| 138 | /// This intrinsic corresponds to the <c> ANDN </c> instruction. |
||
| 139 | /// |
||
| 140 | /// \param __X |
||
| 141 | /// An unsigned integer containing one of the operands. |
||
| 142 | /// \param __Y |
||
| 143 | /// An unsigned integer containing one of the operands. |
||
| 144 | /// \returns An unsigned integer containing the bitwise AND of the second |
||
| 145 | /// operand with the one's complement of the first operand. |
||
| 146 | static __inline__ unsigned int __DEFAULT_FN_ATTRS |
||
| 147 | __andn_u32(unsigned int __X, unsigned int __Y) |
||
| 148 | { |
||
| 149 | return ~__X & __Y; |
||
| 150 | } |
||
| 151 | |||
| 152 | /* AMD-specified, double-leading-underscore version of BEXTR */ |
||
| 153 | /// Extracts the specified bits from the first operand and returns them |
||
| 154 | /// in the least significant bits of the result. |
||
| 155 | /// |
||
| 156 | /// \headerfile <x86intrin.h> |
||
| 157 | /// |
||
| 158 | /// This intrinsic corresponds to the <c> BEXTR </c> instruction. |
||
| 159 | /// |
||
| 160 | /// \param __X |
||
| 161 | /// An unsigned integer whose bits are to be extracted. |
||
| 162 | /// \param __Y |
||
| 163 | /// An unsigned integer used to specify which bits are extracted. Bits [7:0] |
||
| 164 | /// specify the index of the least significant bit. Bits [15:8] specify the |
||
| 165 | /// number of bits to be extracted. |
||
| 166 | /// \returns An unsigned integer whose least significant bits contain the |
||
| 167 | /// extracted bits. |
||
| 168 | /// \see _bextr_u32 |
||
| 169 | static __inline__ unsigned int __DEFAULT_FN_ATTRS |
||
| 170 | __bextr_u32(unsigned int __X, unsigned int __Y) |
||
| 171 | { |
||
| 172 | return __builtin_ia32_bextr_u32(__X, __Y); |
||
| 173 | } |
||
| 174 | |||
| 175 | /* Intel-specified, single-leading-underscore version of BEXTR */ |
||
| 176 | /// Extracts the specified bits from the first operand and returns them |
||
| 177 | /// in the least significant bits of the result. |
||
| 178 | /// |
||
| 179 | /// \headerfile <x86intrin.h> |
||
| 180 | /// |
||
| 181 | /// This intrinsic corresponds to the <c> BEXTR </c> instruction. |
||
| 182 | /// |
||
| 183 | /// \param __X |
||
| 184 | /// An unsigned integer whose bits are to be extracted. |
||
| 185 | /// \param __Y |
||
| 186 | /// An unsigned integer used to specify the index of the least significant |
||
| 187 | /// bit for the bits to be extracted. Bits [7:0] specify the index. |
||
| 188 | /// \param __Z |
||
| 189 | /// An unsigned integer used to specify the number of bits to be extracted. |
||
| 190 | /// Bits [7:0] specify the number of bits. |
||
| 191 | /// \returns An unsigned integer whose least significant bits contain the |
||
| 192 | /// extracted bits. |
||
| 193 | /// \see __bextr_u32 |
||
| 194 | static __inline__ unsigned int __DEFAULT_FN_ATTRS |
||
| 195 | _bextr_u32(unsigned int __X, unsigned int __Y, unsigned int __Z) |
||
| 196 | { |
||
| 197 | return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8))); |
||
| 198 | } |
||
| 199 | |||
| 200 | /* Intel-specified, single-leading-underscore version of BEXTR2 */ |
||
| 201 | /// Extracts the specified bits from the first operand and returns them |
||
| 202 | /// in the least significant bits of the result. |
||
| 203 | /// |
||
| 204 | /// \headerfile <x86intrin.h> |
||
| 205 | /// |
||
| 206 | /// This intrinsic corresponds to the <c> BEXTR </c> instruction. |
||
| 207 | /// |
||
| 208 | /// \param __X |
||
| 209 | /// An unsigned integer whose bits are to be extracted. |
||
| 210 | /// \param __Y |
||
| 211 | /// An unsigned integer used to specify which bits are extracted. Bits [7:0] |
||
| 212 | /// specify the index of the least significant bit. Bits [15:8] specify the |
||
| 213 | /// number of bits to be extracted. |
||
| 214 | /// \returns An unsigned integer whose least significant bits contain the |
||
| 215 | /// extracted bits. |
||
| 216 | /// \see __bextr_u32 |
||
| 217 | static __inline__ unsigned int __DEFAULT_FN_ATTRS |
||
| 218 | _bextr2_u32(unsigned int __X, unsigned int __Y) { |
||
| 219 | return __builtin_ia32_bextr_u32(__X, __Y); |
||
| 220 | } |
||
| 221 | |||
| 222 | /// Clears all bits in the source except for the least significant bit |
||
| 223 | /// containing a value of 1 and returns the result. |
||
| 224 | /// |
||
| 225 | /// \headerfile <x86intrin.h> |
||
| 226 | /// |
||
| 227 | /// This intrinsic corresponds to the <c> BLSI </c> instruction. |
||
| 228 | /// |
||
| 229 | /// \param __X |
||
| 230 | /// An unsigned integer whose bits are to be cleared. |
||
| 231 | /// \returns An unsigned integer containing the result of clearing the bits from |
||
| 232 | /// the source operand. |
||
| 233 | static __inline__ unsigned int __DEFAULT_FN_ATTRS |
||
| 234 | __blsi_u32(unsigned int __X) |
||
| 235 | { |
||
| 236 | return __X & -__X; |
||
| 237 | } |
||
| 238 | |||
| 239 | /// Creates a mask whose bits are set to 1, using bit 0 up to and |
||
| 240 | /// including the least significant bit that is set to 1 in the source |
||
| 241 | /// operand and returns the result. |
||
| 242 | /// |
||
| 243 | /// \headerfile <x86intrin.h> |
||
| 244 | /// |
||
| 245 | /// This intrinsic corresponds to the <c> BLSMSK </c> instruction. |
||
| 246 | /// |
||
| 247 | /// \param __X |
||
| 248 | /// An unsigned integer used to create the mask. |
||
| 249 | /// \returns An unsigned integer containing the newly created mask. |
||
| 250 | static __inline__ unsigned int __DEFAULT_FN_ATTRS |
||
| 251 | __blsmsk_u32(unsigned int __X) |
||
| 252 | { |
||
| 253 | return __X ^ (__X - 1); |
||
| 254 | } |
||
| 255 | |||
| 256 | /// Clears the least significant bit that is set to 1 in the source |
||
| 257 | /// operand and returns the result. |
||
| 258 | /// |
||
| 259 | /// \headerfile <x86intrin.h> |
||
| 260 | /// |
||
| 261 | /// This intrinsic corresponds to the <c> BLSR </c> instruction. |
||
| 262 | /// |
||
| 263 | /// \param __X |
||
| 264 | /// An unsigned integer containing the operand to be cleared. |
||
| 265 | /// \returns An unsigned integer containing the result of clearing the source |
||
| 266 | /// operand. |
||
| 267 | static __inline__ unsigned int __DEFAULT_FN_ATTRS |
||
| 268 | __blsr_u32(unsigned int __X) |
||
| 269 | { |
||
| 270 | return __X & (__X - 1); |
||
| 271 | } |
||
| 272 | |||
| 273 | #ifdef __x86_64__ |
||
| 274 | |||
| 275 | #define _andn_u64(a, b) (__andn_u64((a), (b))) |
||
| 276 | |||
| 277 | /* _bextr_u64 != __bextr_u64 */ |
||
| 278 | #define _blsi_u64(a) (__blsi_u64((a))) |
||
| 279 | |||
| 280 | #define _blsmsk_u64(a) (__blsmsk_u64((a))) |
||
| 281 | |||
| 282 | #define _blsr_u64(a) (__blsr_u64((a))) |
||
| 283 | |||
| 284 | /// Performs a bitwise AND of the second operand with the one's |
||
| 285 | /// complement of the first operand. |
||
| 286 | /// |
||
| 287 | /// \headerfile <x86intrin.h> |
||
| 288 | /// |
||
| 289 | /// This intrinsic corresponds to the <c> ANDN </c> instruction. |
||
| 290 | /// |
||
| 291 | /// \param __X |
||
| 292 | /// An unsigned 64-bit integer containing one of the operands. |
||
| 293 | /// \param __Y |
||
| 294 | /// An unsigned 64-bit integer containing one of the operands. |
||
| 295 | /// \returns An unsigned 64-bit integer containing the bitwise AND of the second |
||
| 296 | /// operand with the one's complement of the first operand. |
||
| 297 | static __inline__ unsigned long long __DEFAULT_FN_ATTRS |
||
| 298 | __andn_u64 (unsigned long long __X, unsigned long long __Y) |
||
| 299 | { |
||
| 300 | return ~__X & __Y; |
||
| 301 | } |
||
| 302 | |||
| 303 | /* AMD-specified, double-leading-underscore version of BEXTR */ |
||
| 304 | /// Extracts the specified bits from the first operand and returns them |
||
| 305 | /// in the least significant bits of the result. |
||
| 306 | /// |
||
| 307 | /// \headerfile <x86intrin.h> |
||
| 308 | /// |
||
| 309 | /// This intrinsic corresponds to the <c> BEXTR </c> instruction. |
||
| 310 | /// |
||
| 311 | /// \param __X |
||
| 312 | /// An unsigned 64-bit integer whose bits are to be extracted. |
||
| 313 | /// \param __Y |
||
| 314 | /// An unsigned 64-bit integer used to specify which bits are extracted. Bits |
||
| 315 | /// [7:0] specify the index of the least significant bit. Bits [15:8] specify |
||
| 316 | /// the number of bits to be extracted. |
||
| 317 | /// \returns An unsigned 64-bit integer whose least significant bits contain the |
||
| 318 | /// extracted bits. |
||
| 319 | /// \see _bextr_u64 |
||
| 320 | static __inline__ unsigned long long __DEFAULT_FN_ATTRS |
||
| 321 | __bextr_u64(unsigned long long __X, unsigned long long __Y) |
||
| 322 | { |
||
| 323 | return __builtin_ia32_bextr_u64(__X, __Y); |
||
| 324 | } |
||
| 325 | |||
| 326 | /* Intel-specified, single-leading-underscore version of BEXTR */ |
||
| 327 | /// Extracts the specified bits from the first operand and returns them |
||
| 328 | /// in the least significant bits of the result. |
||
| 329 | /// |
||
| 330 | /// \headerfile <x86intrin.h> |
||
| 331 | /// |
||
| 332 | /// This intrinsic corresponds to the <c> BEXTR </c> instruction. |
||
| 333 | /// |
||
| 334 | /// \param __X |
||
| 335 | /// An unsigned 64-bit integer whose bits are to be extracted. |
||
| 336 | /// \param __Y |
||
| 337 | /// An unsigned integer used to specify the index of the least significant |
||
| 338 | /// bit for the bits to be extracted. Bits [7:0] specify the index. |
||
| 339 | /// \param __Z |
||
| 340 | /// An unsigned integer used to specify the number of bits to be extracted. |
||
| 341 | /// Bits [7:0] specify the number of bits. |
||
| 342 | /// \returns An unsigned 64-bit integer whose least significant bits contain the |
||
| 343 | /// extracted bits. |
||
| 344 | /// \see __bextr_u64 |
||
| 345 | static __inline__ unsigned long long __DEFAULT_FN_ATTRS |
||
| 346 | _bextr_u64(unsigned long long __X, unsigned int __Y, unsigned int __Z) |
||
| 347 | { |
||
| 348 | return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8))); |
||
| 349 | } |
||
| 350 | |||
| 351 | /* Intel-specified, single-leading-underscore version of BEXTR2 */ |
||
| 352 | /// Extracts the specified bits from the first operand and returns them |
||
| 353 | /// in the least significant bits of the result. |
||
| 354 | /// |
||
| 355 | /// \headerfile <x86intrin.h> |
||
| 356 | /// |
||
| 357 | /// This intrinsic corresponds to the <c> BEXTR </c> instruction. |
||
| 358 | /// |
||
| 359 | /// \param __X |
||
| 360 | /// An unsigned 64-bit integer whose bits are to be extracted. |
||
| 361 | /// \param __Y |
||
| 362 | /// An unsigned 64-bit integer used to specify which bits are extracted. Bits |
||
| 363 | /// [7:0] specify the index of the least significant bit. Bits [15:8] specify |
||
| 364 | /// the number of bits to be extracted. |
||
| 365 | /// \returns An unsigned 64-bit integer whose least significant bits contain the |
||
| 366 | /// extracted bits. |
||
| 367 | /// \see __bextr_u64 |
||
| 368 | static __inline__ unsigned long long __DEFAULT_FN_ATTRS |
||
| 369 | _bextr2_u64(unsigned long long __X, unsigned long long __Y) { |
||
| 370 | return __builtin_ia32_bextr_u64(__X, __Y); |
||
| 371 | } |
||
| 372 | |||
| 373 | /// Clears all bits in the source except for the least significant bit |
||
| 374 | /// containing a value of 1 and returns the result. |
||
| 375 | /// |
||
| 376 | /// \headerfile <x86intrin.h> |
||
| 377 | /// |
||
| 378 | /// This intrinsic corresponds to the <c> BLSI </c> instruction. |
||
| 379 | /// |
||
| 380 | /// \param __X |
||
| 381 | /// An unsigned 64-bit integer whose bits are to be cleared. |
||
| 382 | /// \returns An unsigned 64-bit integer containing the result of clearing the |
||
| 383 | /// bits from the source operand. |
||
| 384 | static __inline__ unsigned long long __DEFAULT_FN_ATTRS |
||
| 385 | __blsi_u64(unsigned long long __X) |
||
| 386 | { |
||
| 387 | return __X & -__X; |
||
| 388 | } |
||
| 389 | |||
| 390 | /// Creates a mask whose bits are set to 1, using bit 0 up to and |
||
| 391 | /// including the least significant bit that is set to 1 in the source |
||
| 392 | /// operand and returns the result. |
||
| 393 | /// |
||
| 394 | /// \headerfile <x86intrin.h> |
||
| 395 | /// |
||
| 396 | /// This intrinsic corresponds to the <c> BLSMSK </c> instruction. |
||
| 397 | /// |
||
| 398 | /// \param __X |
||
| 399 | /// An unsigned 64-bit integer used to create the mask. |
||
| 400 | /// \returns An unsigned 64-bit integer containing the newly created mask. |
||
| 401 | static __inline__ unsigned long long __DEFAULT_FN_ATTRS |
||
| 402 | __blsmsk_u64(unsigned long long __X) |
||
| 403 | { |
||
| 404 | return __X ^ (__X - 1); |
||
| 405 | } |
||
| 406 | |||
| 407 | /// Clears the least significant bit that is set to 1 in the source |
||
| 408 | /// operand and returns the result. |
||
| 409 | /// |
||
| 410 | /// \headerfile <x86intrin.h> |
||
| 411 | /// |
||
| 412 | /// This intrinsic corresponds to the <c> BLSR </c> instruction. |
||
| 413 | /// |
||
| 414 | /// \param __X |
||
| 415 | /// An unsigned 64-bit integer containing the operand to be cleared. |
||
| 416 | /// \returns An unsigned 64-bit integer containing the result of clearing the |
||
| 417 | /// source operand. |
||
| 418 | static __inline__ unsigned long long __DEFAULT_FN_ATTRS |
||
| 419 | __blsr_u64(unsigned long long __X) |
||
| 420 | { |
||
| 421 | return __X & (__X - 1); |
||
| 422 | } |
||
| 423 | |||
| 424 | #endif /* __x86_64__ */ |
||
| 425 | |||
| 426 | #undef __DEFAULT_FN_ATTRS |
||
| 427 | |||
| 428 | #endif /* !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) \ |
||
| 429 | || defined(__BMI__) */ |
||
| 430 | |||
| 431 | #endif /* __BMIINTRIN_H */ |