Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. /*===---- avx512erintrin.h - AVX512ER intrinsics ---------------------------===
  2.  *
  3.  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4.  * See https://llvm.org/LICENSE.txt for license information.
  5.  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6.  *
  7.  *===-----------------------------------------------------------------------===
  8.  */
  9. #ifndef __IMMINTRIN_H
  10. #error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead."
  11. #endif
  12.  
  13. #ifndef __AVX512ERINTRIN_H
  14. #define __AVX512ERINTRIN_H
  15.  
  16. /* exp2a23 */
  17. #define _mm512_exp2a23_round_pd(A, R) \
  18.   ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
  19.                                        (__v8df)_mm512_setzero_pd(), \
  20.                                        (__mmask8)-1, (int)(R)))
  21.  
  22. #define _mm512_mask_exp2a23_round_pd(S, M, A, R) \
  23.   ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
  24.                                        (__v8df)(__m512d)(S), (__mmask8)(M), \
  25.                                        (int)(R)))
  26.  
  27. #define _mm512_maskz_exp2a23_round_pd(M, A, R) \
  28.   ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
  29.                                        (__v8df)_mm512_setzero_pd(), \
  30.                                        (__mmask8)(M), (int)(R)))
  31.  
  32. #define _mm512_exp2a23_pd(A) \
  33.   _mm512_exp2a23_round_pd((A), _MM_FROUND_CUR_DIRECTION)
  34.  
  35. #define _mm512_mask_exp2a23_pd(S, M, A) \
  36.   _mm512_mask_exp2a23_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
  37.  
  38. #define _mm512_maskz_exp2a23_pd(M, A) \
  39.   _mm512_maskz_exp2a23_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
  40.  
  41. #define _mm512_exp2a23_round_ps(A, R) \
  42.   ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
  43.                                       (__v16sf)_mm512_setzero_ps(), \
  44.                                       (__mmask16)-1, (int)(R)))
  45.  
  46. #define _mm512_mask_exp2a23_round_ps(S, M, A, R) \
  47.   ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
  48.                                       (__v16sf)(__m512)(S), (__mmask16)(M), \
  49.                                       (int)(R)))
  50.  
  51. #define _mm512_maskz_exp2a23_round_ps(M, A, R) \
  52.   ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
  53.                                       (__v16sf)_mm512_setzero_ps(), \
  54.                                       (__mmask16)(M), (int)(R)))
  55.  
  56. #define _mm512_exp2a23_ps(A) \
  57.   _mm512_exp2a23_round_ps((A), _MM_FROUND_CUR_DIRECTION)
  58.  
  59. #define _mm512_mask_exp2a23_ps(S, M, A) \
  60.   _mm512_mask_exp2a23_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
  61.  
  62. #define _mm512_maskz_exp2a23_ps(M, A) \
  63.   _mm512_maskz_exp2a23_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
  64.  
  65. /* rsqrt28 */
  66. #define _mm512_rsqrt28_round_pd(A, R) \
  67.   ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
  68.                                           (__v8df)_mm512_setzero_pd(), \
  69.                                           (__mmask8)-1, (int)(R)))
  70.  
  71. #define _mm512_mask_rsqrt28_round_pd(S, M, A, R) \
  72.   ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
  73.                                           (__v8df)(__m512d)(S), (__mmask8)(M), \
  74.                                           (int)(R)))
  75.  
  76. #define _mm512_maskz_rsqrt28_round_pd(M, A, R) \
  77.   ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
  78.                                           (__v8df)_mm512_setzero_pd(), \
  79.                                           (__mmask8)(M), (int)(R)))
  80.  
  81. #define _mm512_rsqrt28_pd(A) \
  82.   _mm512_rsqrt28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
  83.  
  84. #define _mm512_mask_rsqrt28_pd(S, M, A) \
  85.   _mm512_mask_rsqrt28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
  86.  
  87. #define _mm512_maskz_rsqrt28_pd(M, A) \
  88.   _mm512_maskz_rsqrt28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
  89.  
  90. #define _mm512_rsqrt28_round_ps(A, R) \
  91.   ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
  92.                                          (__v16sf)_mm512_setzero_ps(), \
  93.                                          (__mmask16)-1, (int)(R)))
  94.  
  95. #define _mm512_mask_rsqrt28_round_ps(S, M, A, R) \
  96.   ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
  97.                                          (__v16sf)(__m512)(S), (__mmask16)(M), \
  98.                                          (int)(R)))
  99.  
  100. #define _mm512_maskz_rsqrt28_round_ps(M, A, R) \
  101.   ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
  102.                                          (__v16sf)_mm512_setzero_ps(), \
  103.                                          (__mmask16)(M), (int)(R)))
  104.  
  105. #define _mm512_rsqrt28_ps(A) \
  106.   _mm512_rsqrt28_round_ps((A), _MM_FROUND_CUR_DIRECTION)
  107.  
  108. #define _mm512_mask_rsqrt28_ps(S, M, A) \
  109.   _mm512_mask_rsqrt28_round_ps((S), (M), A, _MM_FROUND_CUR_DIRECTION)
  110.  
  111. #define _mm512_maskz_rsqrt28_ps(M, A) \
  112.   _mm512_maskz_rsqrt28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
  113.  
  114. #define _mm_rsqrt28_round_ss(A, B, R) \
  115.   ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
  116.                                                (__v4sf)(__m128)(B), \
  117.                                                (__v4sf)_mm_setzero_ps(), \
  118.                                                (__mmask8)-1, (int)(R)))
  119.  
  120. #define _mm_mask_rsqrt28_round_ss(S, M, A, B, R) \
  121.   ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
  122.                                                (__v4sf)(__m128)(B), \
  123.                                                (__v4sf)(__m128)(S), \
  124.                                                (__mmask8)(M), (int)(R)))
  125.  
  126. #define _mm_maskz_rsqrt28_round_ss(M, A, B, R) \
  127.   ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
  128.                                                (__v4sf)(__m128)(B), \
  129.                                                (__v4sf)_mm_setzero_ps(), \
  130.                                                (__mmask8)(M), (int)(R)))
  131.  
  132. #define _mm_rsqrt28_ss(A, B) \
  133.   _mm_rsqrt28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION)
  134.  
  135. #define _mm_mask_rsqrt28_ss(S, M, A, B) \
  136.   _mm_mask_rsqrt28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
  137.  
  138. #define _mm_maskz_rsqrt28_ss(M, A, B) \
  139.   _mm_maskz_rsqrt28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
  140.  
  141. #define _mm_rsqrt28_round_sd(A, B, R) \
  142.   ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
  143.                                                 (__v2df)(__m128d)(B), \
  144.                                                 (__v2df)_mm_setzero_pd(), \
  145.                                                 (__mmask8)-1, (int)(R)))
  146.  
  147. #define _mm_mask_rsqrt28_round_sd(S, M, A, B, R) \
  148.   ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
  149.                                                 (__v2df)(__m128d)(B), \
  150.                                                 (__v2df)(__m128d)(S), \
  151.                                                 (__mmask8)(M), (int)(R)))
  152.  
  153. #define _mm_maskz_rsqrt28_round_sd(M, A, B, R) \
  154.   ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
  155.                                                 (__v2df)(__m128d)(B), \
  156.                                                 (__v2df)_mm_setzero_pd(), \
  157.                                                 (__mmask8)(M), (int)(R)))
  158.  
  159. #define _mm_rsqrt28_sd(A, B) \
  160.   _mm_rsqrt28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION)
  161.  
  162. #define _mm_mask_rsqrt28_sd(S, M, A, B) \
  163.   _mm_mask_rsqrt28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
  164.  
  165. #define _mm_maskz_rsqrt28_sd(M, A, B) \
  166.   _mm_maskz_rsqrt28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
  167.  
  168. /* rcp28 */
  169. #define _mm512_rcp28_round_pd(A, R) \
  170.   ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
  171.                                         (__v8df)_mm512_setzero_pd(), \
  172.                                         (__mmask8)-1, (int)(R)))
  173.  
  174. #define _mm512_mask_rcp28_round_pd(S, M, A, R) \
  175.   ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
  176.                                         (__v8df)(__m512d)(S), (__mmask8)(M), \
  177.                                         (int)(R)))
  178.  
  179. #define _mm512_maskz_rcp28_round_pd(M, A, R) \
  180.   ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
  181.                                         (__v8df)_mm512_setzero_pd(), \
  182.                                         (__mmask8)(M), (int)(R)))
  183.  
  184. #define _mm512_rcp28_pd(A) \
  185.   _mm512_rcp28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
  186.  
  187. #define _mm512_mask_rcp28_pd(S, M, A) \
  188.   _mm512_mask_rcp28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
  189.  
  190. #define _mm512_maskz_rcp28_pd(M, A) \
  191.   _mm512_maskz_rcp28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
  192.  
  193. #define _mm512_rcp28_round_ps(A, R) \
  194.   ((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
  195.                                        (__v16sf)_mm512_setzero_ps(), \
  196.                                        (__mmask16)-1, (int)(R)))
  197.  
  198. #define _mm512_mask_rcp28_round_ps(S, M, A, R) \
  199.   ((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
  200.                                        (__v16sf)(__m512)(S), (__mmask16)(M), \
  201.                                        (int)(R)))
  202.  
  203. #define _mm512_maskz_rcp28_round_ps(M, A, R) \
  204.   ((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
  205.                                        (__v16sf)_mm512_setzero_ps(), \
  206.                                        (__mmask16)(M), (int)(R)))
  207.  
  208. #define _mm512_rcp28_ps(A) \
  209.   _mm512_rcp28_round_ps((A), _MM_FROUND_CUR_DIRECTION)
  210.  
  211. #define _mm512_mask_rcp28_ps(S, M, A) \
  212.   _mm512_mask_rcp28_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
  213.  
  214. #define _mm512_maskz_rcp28_ps(M, A) \
  215.   _mm512_maskz_rcp28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
  216.  
  217. #define _mm_rcp28_round_ss(A, B, R) \
  218.   ((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
  219.                                              (__v4sf)(__m128)(B), \
  220.                                              (__v4sf)_mm_setzero_ps(), \
  221.                                              (__mmask8)-1, (int)(R)))
  222.  
  223. #define _mm_mask_rcp28_round_ss(S, M, A, B, R) \
  224.   ((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
  225.                                              (__v4sf)(__m128)(B), \
  226.                                              (__v4sf)(__m128)(S), \
  227.                                              (__mmask8)(M), (int)(R)))
  228.  
  229. #define _mm_maskz_rcp28_round_ss(M, A, B, R) \
  230.   ((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
  231.                                              (__v4sf)(__m128)(B), \
  232.                                              (__v4sf)_mm_setzero_ps(), \
  233.                                              (__mmask8)(M), (int)(R)))
  234.  
  235. #define _mm_rcp28_ss(A, B) \
  236.   _mm_rcp28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION)
  237.  
  238. #define _mm_mask_rcp28_ss(S, M, A, B) \
  239.   _mm_mask_rcp28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
  240.  
  241. #define _mm_maskz_rcp28_ss(M, A, B) \
  242.   _mm_maskz_rcp28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
  243.  
  244. #define _mm_rcp28_round_sd(A, B, R) \
  245.   ((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
  246.                                               (__v2df)(__m128d)(B), \
  247.                                               (__v2df)_mm_setzero_pd(), \
  248.                                               (__mmask8)-1, (int)(R)))
  249.  
  250. #define _mm_mask_rcp28_round_sd(S, M, A, B, R) \
  251.   ((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
  252.                                               (__v2df)(__m128d)(B), \
  253.                                               (__v2df)(__m128d)(S), \
  254.                                               (__mmask8)(M), (int)(R)))
  255.  
  256. #define _mm_maskz_rcp28_round_sd(M, A, B, R) \
  257.   ((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
  258.                                               (__v2df)(__m128d)(B), \
  259.                                               (__v2df)_mm_setzero_pd(), \
  260.                                               (__mmask8)(M), (int)(R)))
  261.  
  262. #define _mm_rcp28_sd(A, B) \
  263.   _mm_rcp28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION)
  264.  
  265. #define _mm_mask_rcp28_sd(S, M, A, B) \
  266.   _mm_mask_rcp28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
  267.  
  268. #define _mm_maskz_rcp28_sd(M, A, B) \
  269.   _mm_maskz_rcp28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
  270.  
  271. #endif /* __AVX512ERINTRIN_H */
  272.