Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. /*===----------------- avxifmaintrin.h - IFMA intrinsics -------------------===
  2.  *
  3.  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4.  * See https://llvm.org/LICENSE.txt for license information.
  5.  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6.  *
  7.  *===-----------------------------------------------------------------------===
  8.  */
  9.  
  10. #ifndef __IMMINTRIN_H
  11. #error "Never use <avxifmaintrin.h> directly; include <immintrin.h> instead."
  12. #endif
  13.  
  14. #ifndef __AVXIFMAINTRIN_H
  15. #define __AVXIFMAINTRIN_H
  16.  
  17. /* Define the default attributes for the functions in this file. */
  18. #define __DEFAULT_FN_ATTRS128                                                  \
  19.   __attribute__((__always_inline__, __nodebug__, __target__("avxifma"),        \
  20.                  __min_vector_width__(128)))
  21. #define __DEFAULT_FN_ATTRS256                                                  \
  22.   __attribute__((__always_inline__, __nodebug__, __target__("avxifma"),        \
  23.                  __min_vector_width__(256)))
  24.  
  25. // must vex-encoding
  26.  
  27. /// Multiply packed unsigned 52-bit integers in each 64-bit element of \a __Y
  28. /// and \a __Z to form a 104-bit intermediate result. Add the high 52-bit
  29. /// unsigned integer from the intermediate result with the corresponding
  30. /// unsigned 64-bit integer in \a __X, and store the results in \a dst.
  31. ///
  32. /// \headerfile <immintrin.h>
  33. ///
  34. /// \code
  35. /// __m128i
  36. /// _mm_madd52hi_avx_epu64 (__m128i __X, __m128i __Y, __m128i __Z)
  37. /// \endcode
  38. ///
  39. /// This intrinsic corresponds to the \c VPMADD52HUQ instruction.
  40. ///
  41. /// \return
  42. ///     return __m128i dst.
  43. /// \param __X
  44. ///     A 128-bit vector of [2 x i64]
  45. /// \param __Y
  46. ///     A 128-bit vector of [2 x i64]
  47. /// \param __Z
  48. ///     A 128-bit vector of [2 x i64]
  49. ///
  50. /// \code{.operation}
  51. /// FOR j := 0 to 1
  52. ///     i := j*64
  53. ///     tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i])
  54. ///     dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[103:52])
  55. /// ENDFOR
  56. /// dst[MAX:128] := 0
  57. /// \endcode
  58. static __inline__ __m128i __DEFAULT_FN_ATTRS128
  59. _mm_madd52hi_avx_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
  60.   return (__m128i)__builtin_ia32_vpmadd52huq128((__v2di)__X, (__v2di)__Y,
  61.                                                 (__v2di)__Z);
  62. }
  63.  
  64. /// Multiply packed unsigned 52-bit integers in each 64-bit element of \a __Y
  65. /// and \a __Z to form a 104-bit intermediate result. Add the high 52-bit
  66. /// unsigned integer from the intermediate result with the corresponding
  67. /// unsigned 64-bit integer in \a __X, and store the results in \a dst.
  68. ///
  69. /// \headerfile <immintrin.h>
  70. ///
  71. /// \code
  72. /// __m256i
  73. /// _mm256_madd52hi_avx_epu64 (__m256i __X, __m256i __Y, __m256i __Z)
  74. /// \endcode
  75. ///
  76. /// This intrinsic corresponds to the \c VPMADD52HUQ instruction.
  77. ///
  78. /// \return
  79. ///     return __m256i dst.
  80. /// \param __X
  81. ///     A 256-bit vector of [4 x i64]
  82. /// \param __Y
  83. ///     A 256-bit vector of [4 x i64]
  84. /// \param __Z
  85. ///     A 256-bit vector of [4 x i64]
  86. ///
  87. /// \code{.operation}
  88. /// FOR j := 0 to 3
  89. ///     i := j*64
  90. ///     tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i])
  91. ///     dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[103:52])
  92. /// ENDFOR
  93. /// dst[MAX:256] := 0
  94. /// \endcode
  95. static __inline__ __m256i __DEFAULT_FN_ATTRS256
  96. _mm256_madd52hi_avx_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
  97.   return (__m256i)__builtin_ia32_vpmadd52huq256((__v4di)__X, (__v4di)__Y,
  98.                                                 (__v4di)__Z);
  99. }
  100.  
  101. /// Multiply packed unsigned 52-bit integers in each 64-bit element of \a __Y
  102. /// and \a __Z to form a 104-bit intermediate result. Add the low 52-bit
  103. /// unsigned integer from the intermediate result with the corresponding
  104. /// unsigned 64-bit integer in \a __X, and store the results in \a dst.
  105. ///
  106. /// \headerfile <immintrin.h>
  107. ///
  108. /// \code
  109. /// __m128i
  110. /// _mm_madd52lo_avx_epu64 (__m128i __X, __m128i __Y, __m128i __Z)
  111. /// \endcode
  112. ///
  113. /// This intrinsic corresponds to the \c VPMADD52LUQ instruction.
  114. ///
  115. /// \return
  116. ///     return __m128i dst.
  117. /// \param __X
  118. ///     A 128-bit vector of [2 x i64]
  119. /// \param __Y
  120. ///     A 128-bit vector of [2 x i64]
  121. /// \param __Z
  122. ///     A 128-bit vector of [2 x i64]
  123. ///
  124. /// \code{.operation}
  125. /// FOR j := 0 to 1
  126. ///     i := j*64
  127. ///     tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i])
  128. ///     dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[51:0])
  129. /// ENDFOR
  130. /// dst[MAX:128] := 0
  131. /// \endcode
  132. static __inline__ __m128i __DEFAULT_FN_ATTRS128
  133. _mm_madd52lo_avx_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
  134.   return (__m128i)__builtin_ia32_vpmadd52luq128((__v2di)__X, (__v2di)__Y,
  135.                                                 (__v2di)__Z);
  136. }
  137.  
  138. /// Multiply packed unsigned 52-bit integers in each 64-bit element of \a __Y
  139. /// and \a __Z to form a 104-bit intermediate result. Add the low 52-bit
  140. /// unsigned integer from the intermediate result with the corresponding
  141. /// unsigned 64-bit integer in \a __X, and store the results in \a dst.
  142. ///
  143. /// \headerfile <immintrin.h>
  144. ///
  145. /// \code
  146. /// __m256i
  147. /// _mm256_madd52lo_avx_epu64 (__m256i __X, __m256i __Y, __m256i __Z)
  148. /// \endcode
  149. ///
  150. /// This intrinsic corresponds to the \c VPMADD52LUQ instruction.
  151. ///
  152. /// \return
  153. ///     return __m256i dst.
  154. /// \param __X
  155. ///     A 256-bit vector of [4 x i64]
  156. /// \param __Y
  157. ///     A 256-bit vector of [4 x i64]
  158. /// \param __Z
  159. ///     A 256-bit vector of [4 x i64]
  160. ///
  161. /// \code{.operation}
  162. /// FOR j := 0 to 3
  163. ///     i := j*64
  164. ///     tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i])
  165. ///     dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[51:0])
  166. /// ENDFOR
  167. /// dst[MAX:256] := 0
  168. /// \endcode
  169. static __inline__ __m256i __DEFAULT_FN_ATTRS256
  170. _mm256_madd52lo_avx_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
  171.   return (__m256i)__builtin_ia32_vpmadd52luq256((__v4di)__X, (__v4di)__Y,
  172.                                                 (__v4di)__Z);
  173. }
  174. #undef __DEFAULT_FN_ATTRS128
  175. #undef __DEFAULT_FN_ATTRS256
  176.  
  177. #endif // __AVXIFMAINTRIN_H
  178.