Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. /*===---- ammintrin.h - SSE4a intrinsics -----------------------------------===
  2.  *
  3.  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4.  * See https://llvm.org/LICENSE.txt for license information.
  5.  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6.  *
  7.  *===-----------------------------------------------------------------------===
  8.  */
  9.  
  10. #ifndef __AMMINTRIN_H
  11. #define __AMMINTRIN_H
  12.  
  13. #if !defined(__i386__) && !defined(__x86_64__)
  14. #error "This header is only meant to be used on x86 and x64 architecture"
  15. #endif
  16.  
  17. #include <pmmintrin.h>
  18.  
  19. /* Define the default attributes for the functions in this file. */
  20. #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4a"), __min_vector_width__(128)))
  21.  
  22. /// Extracts the specified bits from the lower 64 bits of the 128-bit
  23. ///    integer vector operand at the index \a idx and of the length \a len.
  24. ///
  25. /// \headerfile <x86intrin.h>
  26. ///
  27. /// \code
  28. /// __m128i _mm_extracti_si64(__m128i x, const int len, const int idx);
  29. /// \endcode
  30. ///
  31. /// This intrinsic corresponds to the <c> EXTRQ </c> instruction.
  32. ///
  33. /// \param x
  34. ///    The value from which bits are extracted.
  35. /// \param len
  36. ///    Bits [5:0] specify the length; the other bits are ignored. If bits [5:0]
  37. ///    are zero, the length is interpreted as 64.
  38. /// \param idx
  39. ///    Bits [5:0] specify the index of the least significant bit; the other
  40. ///    bits are ignored. If the sum of the index and length is greater than 64,
  41. ///    the result is undefined. If the length and index are both zero, bits
  42. ///    [63:0] of parameter \a x are extracted. If the length is zero but the
  43. ///    index is non-zero, the result is undefined.
  44. /// \returns A 128-bit integer vector whose lower 64 bits contain the bits
  45. ///    extracted from the source operand.
  46. #define _mm_extracti_si64(x, len, idx) \
  47.   ((__m128i)__builtin_ia32_extrqi((__v2di)(__m128i)(x), \
  48.                                   (char)(len), (char)(idx)))
  49.  
  50. /// Extracts the specified bits from the lower 64 bits of the 128-bit
  51. ///    integer vector operand at the index and of the length specified by
  52. ///    \a __y.
  53. ///
  54. /// \headerfile <x86intrin.h>
  55. ///
  56. /// This intrinsic corresponds to the <c> EXTRQ </c> instruction.
  57. ///
  58. /// \param __x
  59. ///    The value from which bits are extracted.
  60. /// \param __y
  61. ///    Specifies the index of the least significant bit at [13:8] and the
  62. ///    length at [5:0]; all other bits are ignored. If bits [5:0] are zero, the
  63. ///    length is interpreted as 64. If the sum of the index and length is
  64. ///    greater than 64, the result is undefined. If the length and index are
  65. ///    both zero, bits [63:0] of parameter \a __x are extracted. If the length
  66. ///    is zero but the index is non-zero, the result is undefined.
  67. /// \returns A 128-bit vector whose lower 64 bits contain the bits extracted
  68. ///    from the source operand.
  69. static __inline__ __m128i __DEFAULT_FN_ATTRS
  70. _mm_extract_si64(__m128i __x, __m128i __y)
  71. {
  72.   return (__m128i)__builtin_ia32_extrq((__v2di)__x, (__v16qi)__y);
  73. }
  74.  
  75. /// Inserts bits of a specified length from the source integer vector
  76. ///    \a y into the lower 64 bits of the destination integer vector \a x at
  77. ///    the index \a idx and of the length \a len.
  78. ///
  79. /// \headerfile <x86intrin.h>
  80. ///
  81. /// \code
  82. /// __m128i _mm_inserti_si64(__m128i x, __m128i y, const int len,
  83. /// const int idx);
  84. /// \endcode
  85. ///
  86. /// This intrinsic corresponds to the <c> INSERTQ </c> instruction.
  87. ///
  88. /// \param x
  89. ///    The destination operand where bits will be inserted. The inserted bits
  90. ///    are defined by the length \a len and by the index \a idx specifying the
  91. ///    least significant bit.
  92. /// \param y
  93. ///    The source operand containing the bits to be extracted. The extracted
  94. ///    bits are the least significant bits of operand \a y of length \a len.
  95. /// \param len
  96. ///    Bits [5:0] specify the length; the other bits are ignored. If bits [5:0]
  97. ///    are zero, the length is interpreted as 64.
  98. /// \param idx
  99. ///    Bits [5:0] specify the index of the least significant bit; the other
  100. ///    bits are ignored. If the sum of the index and length is greater than 64,
  101. ///    the result is undefined. If the length and index are both zero, bits
  102. ///    [63:0] of parameter \a y are inserted into parameter \a x. If the length
  103. ///    is zero but the index is non-zero, the result is undefined.
  104. /// \returns A 128-bit integer vector containing the original lower 64-bits of
  105. ///    destination operand \a x with the specified bitfields replaced by the
  106. ///    lower bits of source operand \a y. The upper 64 bits of the return value
  107. ///    are undefined.
  108. #define _mm_inserti_si64(x, y, len, idx) \
  109.   ((__m128i)__builtin_ia32_insertqi((__v2di)(__m128i)(x), \
  110.                                     (__v2di)(__m128i)(y), \
  111.                                     (char)(len), (char)(idx)))
  112.  
  113. /// Inserts bits of a specified length from the source integer vector
  114. ///    \a __y into the lower 64 bits of the destination integer vector \a __x
  115. ///    at the index and of the length specified by \a __y.
  116. ///
  117. /// \headerfile <x86intrin.h>
  118. ///
  119. /// This intrinsic corresponds to the <c> INSERTQ </c> instruction.
  120. ///
  121. /// \param __x
  122. ///    The destination operand where bits will be inserted. The inserted bits
  123. ///    are defined by the length and by the index of the least significant bit
  124. ///    specified by operand \a __y.
  125. /// \param __y
  126. ///    The source operand containing the bits to be extracted. The extracted
  127. ///    bits are the least significant bits of operand \a __y with length
  128. ///    specified by bits [69:64]. These are inserted into the destination at the
  129. ///    index specified by bits [77:72]; all other bits are ignored. If bits
  130. ///    [69:64] are zero, the length is interpreted as 64. If the sum of the
  131. ///    index and length is greater than 64, the result is undefined. If the
  132. ///    length and index are both zero, bits [63:0] of parameter \a __y are
  133. ///    inserted into parameter \a __x. If the length is zero but the index is
  134. ///    non-zero, the result is undefined.
  135. /// \returns A 128-bit integer vector containing the original lower 64-bits of
  136. ///    destination operand \a __x with the specified bitfields replaced by the
  137. ///    lower bits of source operand \a __y. The upper 64 bits of the return
  138. ///    value are undefined.
  139. static __inline__ __m128i __DEFAULT_FN_ATTRS
  140. _mm_insert_si64(__m128i __x, __m128i __y)
  141. {
  142.   return (__m128i)__builtin_ia32_insertq((__v2di)__x, (__v2di)__y);
  143. }
  144.  
  145. /// Stores a 64-bit double-precision value in a 64-bit memory location.
  146. ///    To minimize caching, the data is flagged as non-temporal (unlikely to be
  147. ///    used again soon).
  148. ///
  149. /// \headerfile <x86intrin.h>
  150. ///
  151. /// This intrinsic corresponds to the <c> MOVNTSD </c> instruction.
  152. ///
  153. /// \param __p
  154. ///    The 64-bit memory location used to store the register value.
  155. /// \param __a
  156. ///    The 64-bit double-precision floating-point register value to be stored.
  157. static __inline__ void __DEFAULT_FN_ATTRS
  158. _mm_stream_sd(double *__p, __m128d __a)
  159. {
  160.   __builtin_ia32_movntsd(__p, (__v2df)__a);
  161. }
  162.  
  163. /// Stores a 32-bit single-precision floating-point value in a 32-bit
  164. ///    memory location. To minimize caching, the data is flagged as
  165. ///    non-temporal (unlikely to be used again soon).
  166. ///
  167. /// \headerfile <x86intrin.h>
  168. ///
  169. /// This intrinsic corresponds to the <c> MOVNTSS </c> instruction.
  170. ///
  171. /// \param __p
  172. ///    The 32-bit memory location used to store the register value.
  173. /// \param __a
  174. ///    The 32-bit single-precision floating-point register value to be stored.
  175. static __inline__ void __DEFAULT_FN_ATTRS
  176. _mm_stream_ss(float *__p, __m128 __a)
  177. {
  178.   __builtin_ia32_movntss(__p, (__v4sf)__a);
  179. }
  180.  
  181. #undef __DEFAULT_FN_ATTRS
  182.  
  183. #endif /* __AMMINTRIN_H */
  184.