Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. /*===---- fma4intrin.h - FMA4 intrinsics -----------------------------------===
  2.  *
  3.  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4.  * See https://llvm.org/LICENSE.txt for license information.
  5.  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6.  *
  7.  *===-----------------------------------------------------------------------===
  8.  */
  9.  
  10. #ifndef __X86INTRIN_H
  11. #error "Never use <fma4intrin.h> directly; include <x86intrin.h> instead."
  12. #endif
  13.  
  14. #ifndef __FMA4INTRIN_H
  15. #define __FMA4INTRIN_H
  16.  
  17. #include <pmmintrin.h>
  18.  
  19. /* Define the default attributes for the functions in this file. */
  20. #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("fma4"), __min_vector_width__(128)))
  21. #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("fma4"), __min_vector_width__(256)))
  22.  
  23. static __inline__ __m128 __DEFAULT_FN_ATTRS128
  24. _mm_macc_ps(__m128 __A, __m128 __B, __m128 __C)
  25. {
  26.   return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
  27. }
  28.  
  29. static __inline__ __m128d __DEFAULT_FN_ATTRS128
  30. _mm_macc_pd(__m128d __A, __m128d __B, __m128d __C)
  31. {
  32.   return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
  33. }
  34.  
  35. static __inline__ __m128 __DEFAULT_FN_ATTRS128
  36. _mm_macc_ss(__m128 __A, __m128 __B, __m128 __C)
  37. {
  38.   return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
  39. }
  40.  
  41. static __inline__ __m128d __DEFAULT_FN_ATTRS128
  42. _mm_macc_sd(__m128d __A, __m128d __B, __m128d __C)
  43. {
  44.   return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
  45. }
  46.  
  47. static __inline__ __m128 __DEFAULT_FN_ATTRS128
  48. _mm_msub_ps(__m128 __A, __m128 __B, __m128 __C)
  49. {
  50.   return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
  51. }
  52.  
  53. static __inline__ __m128d __DEFAULT_FN_ATTRS128
  54. _mm_msub_pd(__m128d __A, __m128d __B, __m128d __C)
  55. {
  56.   return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
  57. }
  58.  
  59. static __inline__ __m128 __DEFAULT_FN_ATTRS128
  60. _mm_msub_ss(__m128 __A, __m128 __B, __m128 __C)
  61. {
  62.   return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
  63. }
  64.  
  65. static __inline__ __m128d __DEFAULT_FN_ATTRS128
  66. _mm_msub_sd(__m128d __A, __m128d __B, __m128d __C)
  67. {
  68.   return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
  69. }
  70.  
  71. static __inline__ __m128 __DEFAULT_FN_ATTRS128
  72. _mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C)
  73. {
  74.   return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
  75. }
  76.  
  77. static __inline__ __m128d __DEFAULT_FN_ATTRS128
  78. _mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C)
  79. {
  80.   return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);
  81. }
  82.  
  83. static __inline__ __m128 __DEFAULT_FN_ATTRS128
  84. _mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C)
  85. {
  86.   return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
  87. }
  88.  
  89. static __inline__ __m128d __DEFAULT_FN_ATTRS128
  90. _mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C)
  91. {
  92.   return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);
  93. }
  94.  
  95. static __inline__ __m128 __DEFAULT_FN_ATTRS128
  96. _mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C)
  97. {
  98.   return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
  99. }
  100.  
  101. static __inline__ __m128d __DEFAULT_FN_ATTRS128
  102. _mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C)
  103. {
  104.   return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
  105. }
  106.  
  107. static __inline__ __m128 __DEFAULT_FN_ATTRS128
  108. _mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C)
  109. {
  110.   return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
  111. }
  112.  
  113. static __inline__ __m128d __DEFAULT_FN_ATTRS128
  114. _mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C)
  115. {
  116.   return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
  117. }
  118.  
  119. static __inline__ __m128 __DEFAULT_FN_ATTRS128
  120. _mm_maddsub_ps(__m128 __A, __m128 __B, __m128 __C)
  121. {
  122.   return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
  123. }
  124.  
  125. static __inline__ __m128d __DEFAULT_FN_ATTRS128
  126. _mm_maddsub_pd(__m128d __A, __m128d __B, __m128d __C)
  127. {
  128.   return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
  129. }
  130.  
  131. static __inline__ __m128 __DEFAULT_FN_ATTRS128
  132. _mm_msubadd_ps(__m128 __A, __m128 __B, __m128 __C)
  133. {
  134.   return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
  135. }
  136.  
  137. static __inline__ __m128d __DEFAULT_FN_ATTRS128
  138. _mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C)
  139. {
  140.   return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
  141. }
  142.  
  143. static __inline__ __m256 __DEFAULT_FN_ATTRS256
  144. _mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C)
  145. {
  146.   return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
  147. }
  148.  
  149. static __inline__ __m256d __DEFAULT_FN_ATTRS256
  150. _mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C)
  151. {
  152.   return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
  153. }
  154.  
  155. static __inline__ __m256 __DEFAULT_FN_ATTRS256
  156. _mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C)
  157. {
  158.   return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
  159. }
  160.  
  161. static __inline__ __m256d __DEFAULT_FN_ATTRS256
  162. _mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C)
  163. {
  164.   return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
  165. }
  166.  
  167. static __inline__ __m256 __DEFAULT_FN_ATTRS256
  168. _mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C)
  169. {
  170.   return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
  171. }
  172.  
  173. static __inline__ __m256d __DEFAULT_FN_ATTRS256
  174. _mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C)
  175. {
  176.   return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C);
  177. }
  178.  
  179. static __inline__ __m256 __DEFAULT_FN_ATTRS256
  180. _mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C)
  181. {
  182.   return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
  183. }
  184.  
  185. static __inline__ __m256d __DEFAULT_FN_ATTRS256
  186. _mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C)
  187. {
  188.   return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C);
  189. }
  190.  
  191. static __inline__ __m256 __DEFAULT_FN_ATTRS256
  192. _mm256_maddsub_ps(__m256 __A, __m256 __B, __m256 __C)
  193. {
  194.   return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
  195. }
  196.  
  197. static __inline__ __m256d __DEFAULT_FN_ATTRS256
  198. _mm256_maddsub_pd(__m256d __A, __m256d __B, __m256d __C)
  199. {
  200.   return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
  201. }
  202.  
  203. static __inline__ __m256 __DEFAULT_FN_ATTRS256
  204. _mm256_msubadd_ps(__m256 __A, __m256 __B, __m256 __C)
  205. {
  206.   return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
  207. }
  208.  
  209. static __inline__ __m256d __DEFAULT_FN_ATTRS256
  210. _mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C)
  211. {
  212.   return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
  213. }
  214.  
  215. #undef __DEFAULT_FN_ATTRS128
  216. #undef __DEFAULT_FN_ATTRS256
  217.  
  218. #endif /* __FMA4INTRIN_H */
  219.