Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. /*===---- __clang_hip_math.h - Device-side HIP math support ----------------===
  2.  *
  3.  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4.  * See https://llvm.org/LICENSE.txt for license information.
  5.  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6.  *
  7.  *===-----------------------------------------------------------------------===
  8.  */
  9. #ifndef __CLANG_HIP_MATH_H__
  10. #define __CLANG_HIP_MATH_H__
  11.  
  12. #if !defined(__HIP__) && !defined(__OPENMP_AMDGCN__)
  13. #error "This file is for HIP and OpenMP AMDGCN device compilation only."
  14. #endif
  15.  
  16. #if !defined(__HIPCC_RTC__)
  17. #if defined(__cplusplus)
  18. #include <algorithm>
  19. #endif
  20. #include <limits.h>
  21. #include <stdint.h>
  22. #ifdef __OPENMP_AMDGCN__
  23. #include <omp.h>
  24. #endif
  25. #endif // !defined(__HIPCC_RTC__)
  26.  
  27. #pragma push_macro("__DEVICE__")
  28.  
  29. #ifdef __OPENMP_AMDGCN__
  30. #define __DEVICE__ static inline __attribute__((always_inline, nothrow))
  31. #else
  32. #define __DEVICE__ static __device__ inline __attribute__((always_inline))
  33. #endif
  34.  
  35. // A few functions return bool type starting only in C++11.
  36. #pragma push_macro("__RETURN_TYPE")
  37. #ifdef __OPENMP_AMDGCN__
  38. #define __RETURN_TYPE int
  39. #else
  40. #if defined(__cplusplus)
  41. #define __RETURN_TYPE bool
  42. #else
  43. #define __RETURN_TYPE int
  44. #endif
  45. #endif // __OPENMP_AMDGCN__
  46.  
  47. #if defined (__cplusplus) && __cplusplus < 201103L
  48. // emulate static_assert on type sizes
  49. template<bool>
  50. struct __compare_result{};
  51. template<>
  52. struct __compare_result<true> {
  53.   static const __device__ bool valid;
  54. };
  55.  
  56. __DEVICE__
  57. void __suppress_unused_warning(bool b){};
  58. template <unsigned int S, unsigned int T>
  59. __DEVICE__ void __static_assert_equal_size() {
  60.   __suppress_unused_warning(__compare_result<S == T>::valid);
  61. }
  62.  
  63. #define __static_assert_type_size_equal(A, B) \
  64.   __static_assert_equal_size<A,B>()
  65.  
  66. #else
  67. #define __static_assert_type_size_equal(A,B) \
  68.   static_assert((A) == (B), "")
  69.  
  70. #endif
  71.  
  72. __DEVICE__
  73. uint64_t __make_mantissa_base8(const char *__tagp __attribute__((nonnull))) {
  74.   uint64_t __r = 0;
  75.   while (*__tagp != '\0') {
  76.     char __tmp = *__tagp;
  77.  
  78.     if (__tmp >= '0' && __tmp <= '7')
  79.       __r = (__r * 8u) + __tmp - '0';
  80.     else
  81.       return 0;
  82.  
  83.     ++__tagp;
  84.   }
  85.  
  86.   return __r;
  87. }
  88.  
  89. __DEVICE__
  90. uint64_t __make_mantissa_base10(const char *__tagp __attribute__((nonnull))) {
  91.   uint64_t __r = 0;
  92.   while (*__tagp != '\0') {
  93.     char __tmp = *__tagp;
  94.  
  95.     if (__tmp >= '0' && __tmp <= '9')
  96.       __r = (__r * 10u) + __tmp - '0';
  97.     else
  98.       return 0;
  99.  
  100.     ++__tagp;
  101.   }
  102.  
  103.   return __r;
  104. }
  105.  
  106. __DEVICE__
  107. uint64_t __make_mantissa_base16(const char *__tagp __attribute__((nonnull))) {
  108.   uint64_t __r = 0;
  109.   while (*__tagp != '\0') {
  110.     char __tmp = *__tagp;
  111.  
  112.     if (__tmp >= '0' && __tmp <= '9')
  113.       __r = (__r * 16u) + __tmp - '0';
  114.     else if (__tmp >= 'a' && __tmp <= 'f')
  115.       __r = (__r * 16u) + __tmp - 'a' + 10;
  116.     else if (__tmp >= 'A' && __tmp <= 'F')
  117.       __r = (__r * 16u) + __tmp - 'A' + 10;
  118.     else
  119.       return 0;
  120.  
  121.     ++__tagp;
  122.   }
  123.  
  124.   return __r;
  125. }
  126.  
  127. __DEVICE__
  128. uint64_t __make_mantissa(const char *__tagp __attribute__((nonnull))) {
  129.   if (*__tagp == '0') {
  130.     ++__tagp;
  131.  
  132.     if (*__tagp == 'x' || *__tagp == 'X')
  133.       return __make_mantissa_base16(__tagp);
  134.     else
  135.       return __make_mantissa_base8(__tagp);
  136.   }
  137.  
  138.   return __make_mantissa_base10(__tagp);
  139. }
  140.  
  141. // BEGIN FLOAT
  142. #if defined(__cplusplus)
  143. __DEVICE__
  144. int abs(int __x) {
  145.   int __sgn = __x >> (sizeof(int) * CHAR_BIT - 1);
  146.   return (__x ^ __sgn) - __sgn;
  147. }
  148. __DEVICE__
  149. long labs(long __x) {
  150.   long __sgn = __x >> (sizeof(long) * CHAR_BIT - 1);
  151.   return (__x ^ __sgn) - __sgn;
  152. }
  153. __DEVICE__
  154. long long llabs(long long __x) {
  155.   long long __sgn = __x >> (sizeof(long long) * CHAR_BIT - 1);
  156.   return (__x ^ __sgn) - __sgn;
  157. }
  158. #endif
  159.  
  160. __DEVICE__
  161. float acosf(float __x) { return __ocml_acos_f32(__x); }
  162.  
  163. __DEVICE__
  164. float acoshf(float __x) { return __ocml_acosh_f32(__x); }
  165.  
  166. __DEVICE__
  167. float asinf(float __x) { return __ocml_asin_f32(__x); }
  168.  
  169. __DEVICE__
  170. float asinhf(float __x) { return __ocml_asinh_f32(__x); }
  171.  
  172. __DEVICE__
  173. float atan2f(float __x, float __y) { return __ocml_atan2_f32(__x, __y); }
  174.  
  175. __DEVICE__
  176. float atanf(float __x) { return __ocml_atan_f32(__x); }
  177.  
  178. __DEVICE__
  179. float atanhf(float __x) { return __ocml_atanh_f32(__x); }
  180.  
  181. __DEVICE__
  182. float cbrtf(float __x) { return __ocml_cbrt_f32(__x); }
  183.  
  184. __DEVICE__
  185. float ceilf(float __x) { return __ocml_ceil_f32(__x); }
  186.  
  187. __DEVICE__
  188. float copysignf(float __x, float __y) { return __ocml_copysign_f32(__x, __y); }
  189.  
  190. __DEVICE__
  191. float cosf(float __x) { return __ocml_cos_f32(__x); }
  192.  
  193. __DEVICE__
  194. float coshf(float __x) { return __ocml_cosh_f32(__x); }
  195.  
  196. __DEVICE__
  197. float cospif(float __x) { return __ocml_cospi_f32(__x); }
  198.  
  199. __DEVICE__
  200. float cyl_bessel_i0f(float __x) { return __ocml_i0_f32(__x); }
  201.  
  202. __DEVICE__
  203. float cyl_bessel_i1f(float __x) { return __ocml_i1_f32(__x); }
  204.  
  205. __DEVICE__
  206. float erfcf(float __x) { return __ocml_erfc_f32(__x); }
  207.  
  208. __DEVICE__
  209. float erfcinvf(float __x) { return __ocml_erfcinv_f32(__x); }
  210.  
  211. __DEVICE__
  212. float erfcxf(float __x) { return __ocml_erfcx_f32(__x); }
  213.  
  214. __DEVICE__
  215. float erff(float __x) { return __ocml_erf_f32(__x); }
  216.  
  217. __DEVICE__
  218. float erfinvf(float __x) { return __ocml_erfinv_f32(__x); }
  219.  
  220. __DEVICE__
  221. float exp10f(float __x) { return __ocml_exp10_f32(__x); }
  222.  
  223. __DEVICE__
  224. float exp2f(float __x) { return __ocml_exp2_f32(__x); }
  225.  
  226. __DEVICE__
  227. float expf(float __x) { return __ocml_exp_f32(__x); }
  228.  
  229. __DEVICE__
  230. float expm1f(float __x) { return __ocml_expm1_f32(__x); }
  231.  
  232. __DEVICE__
  233. float fabsf(float __x) { return __builtin_fabsf(__x); }
  234.  
  235. __DEVICE__
  236. float fdimf(float __x, float __y) { return __ocml_fdim_f32(__x, __y); }
  237.  
  238. __DEVICE__
  239. float fdividef(float __x, float __y) { return __x / __y; }
  240.  
  241. __DEVICE__
  242. float floorf(float __x) { return __ocml_floor_f32(__x); }
  243.  
  244. __DEVICE__
  245. float fmaf(float __x, float __y, float __z) {
  246.   return __ocml_fma_f32(__x, __y, __z);
  247. }
  248.  
  249. __DEVICE__
  250. float fmaxf(float __x, float __y) { return __ocml_fmax_f32(__x, __y); }
  251.  
  252. __DEVICE__
  253. float fminf(float __x, float __y) { return __ocml_fmin_f32(__x, __y); }
  254.  
  255. __DEVICE__
  256. float fmodf(float __x, float __y) { return __ocml_fmod_f32(__x, __y); }
  257.  
  258. __DEVICE__
  259. float frexpf(float __x, int *__nptr) {
  260.   int __tmp;
  261. #ifdef __OPENMP_AMDGCN__
  262. #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
  263. #endif
  264.   float __r =
  265.       __ocml_frexp_f32(__x, (__attribute__((address_space(5))) int *)&__tmp);
  266.   *__nptr = __tmp;
  267.  
  268.   return __r;
  269. }
  270.  
  271. __DEVICE__
  272. float hypotf(float __x, float __y) { return __ocml_hypot_f32(__x, __y); }
  273.  
  274. __DEVICE__
  275. int ilogbf(float __x) { return __ocml_ilogb_f32(__x); }
  276.  
  277. __DEVICE__
  278. __RETURN_TYPE __finitef(float __x) { return __ocml_isfinite_f32(__x); }
  279.  
  280. __DEVICE__
  281. __RETURN_TYPE __isinff(float __x) { return __ocml_isinf_f32(__x); }
  282.  
  283. __DEVICE__
  284. __RETURN_TYPE __isnanf(float __x) { return __ocml_isnan_f32(__x); }
  285.  
  286. __DEVICE__
  287. float j0f(float __x) { return __ocml_j0_f32(__x); }
  288.  
  289. __DEVICE__
  290. float j1f(float __x) { return __ocml_j1_f32(__x); }
  291.  
  292. __DEVICE__
  293. float jnf(int __n, float __x) { // TODO: we could use Ahmes multiplication
  294.                                 // and the Miller & Brown algorithm
  295.   //       for linear recurrences to get O(log n) steps, but it's unclear if
  296.   //       it'd be beneficial in this case.
  297.   if (__n == 0)
  298.     return j0f(__x);
  299.   if (__n == 1)
  300.     return j1f(__x);
  301.  
  302.   float __x0 = j0f(__x);
  303.   float __x1 = j1f(__x);
  304.   for (int __i = 1; __i < __n; ++__i) {
  305.     float __x2 = (2 * __i) / __x * __x1 - __x0;
  306.     __x0 = __x1;
  307.     __x1 = __x2;
  308.   }
  309.  
  310.   return __x1;
  311. }
  312.  
  313. __DEVICE__
  314. float ldexpf(float __x, int __e) { return __ocml_ldexp_f32(__x, __e); }
  315.  
  316. __DEVICE__
  317. float lgammaf(float __x) { return __ocml_lgamma_f32(__x); }
  318.  
  319. __DEVICE__
  320. long long int llrintf(float __x) { return __ocml_rint_f32(__x); }
  321.  
  322. __DEVICE__
  323. long long int llroundf(float __x) { return __ocml_round_f32(__x); }
  324.  
  325. __DEVICE__
  326. float log10f(float __x) { return __ocml_log10_f32(__x); }
  327.  
  328. __DEVICE__
  329. float log1pf(float __x) { return __ocml_log1p_f32(__x); }
  330.  
  331. __DEVICE__
  332. float log2f(float __x) { return __ocml_log2_f32(__x); }
  333.  
  334. __DEVICE__
  335. float logbf(float __x) { return __ocml_logb_f32(__x); }
  336.  
  337. __DEVICE__
  338. float logf(float __x) { return __ocml_log_f32(__x); }
  339.  
  340. __DEVICE__
  341. long int lrintf(float __x) { return __ocml_rint_f32(__x); }
  342.  
  343. __DEVICE__
  344. long int lroundf(float __x) { return __ocml_round_f32(__x); }
  345.  
  346. __DEVICE__
  347. float modff(float __x, float *__iptr) {
  348.   float __tmp;
  349. #ifdef __OPENMP_AMDGCN__
  350. #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
  351. #endif
  352.   float __r =
  353.       __ocml_modf_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
  354.   *__iptr = __tmp;
  355.   return __r;
  356. }
  357.  
  358. __DEVICE__
  359. float nanf(const char *__tagp __attribute__((nonnull))) {
  360.   union {
  361.     float val;
  362.     struct ieee_float {
  363.       unsigned int mantissa : 22;
  364.       unsigned int quiet : 1;
  365.       unsigned int exponent : 8;
  366.       unsigned int sign : 1;
  367.     } bits;
  368.   } __tmp;
  369.   __static_assert_type_size_equal(sizeof(__tmp.val), sizeof(__tmp.bits));
  370.  
  371.   __tmp.bits.sign = 0u;
  372.   __tmp.bits.exponent = ~0u;
  373.   __tmp.bits.quiet = 1u;
  374.   __tmp.bits.mantissa = __make_mantissa(__tagp);
  375.  
  376.   return __tmp.val;
  377. }
  378.  
  379. __DEVICE__
  380. float nearbyintf(float __x) { return __ocml_nearbyint_f32(__x); }
  381.  
  382. __DEVICE__
  383. float nextafterf(float __x, float __y) {
  384.   return __ocml_nextafter_f32(__x, __y);
  385. }
  386.  
  387. __DEVICE__
  388. float norm3df(float __x, float __y, float __z) {
  389.   return __ocml_len3_f32(__x, __y, __z);
  390. }
  391.  
  392. __DEVICE__
  393. float norm4df(float __x, float __y, float __z, float __w) {
  394.   return __ocml_len4_f32(__x, __y, __z, __w);
  395. }
  396.  
  397. __DEVICE__
  398. float normcdff(float __x) { return __ocml_ncdf_f32(__x); }
  399.  
  400. __DEVICE__
  401. float normcdfinvf(float __x) { return __ocml_ncdfinv_f32(__x); }
  402.  
  403. __DEVICE__
  404. float normf(int __dim,
  405.             const float *__a) { // TODO: placeholder until OCML adds support.
  406.   float __r = 0;
  407.   while (__dim--) {
  408.     __r += __a[0] * __a[0];
  409.     ++__a;
  410.   }
  411.  
  412.   return __ocml_sqrt_f32(__r);
  413. }
  414.  
  415. __DEVICE__
  416. float powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); }
  417.  
  418. __DEVICE__
  419. float powif(float __x, int __y) { return __ocml_pown_f32(__x, __y); }
  420.  
  421. __DEVICE__
  422. float rcbrtf(float __x) { return __ocml_rcbrt_f32(__x); }
  423.  
  424. __DEVICE__
  425. float remainderf(float __x, float __y) {
  426.   return __ocml_remainder_f32(__x, __y);
  427. }
  428.  
  429. __DEVICE__
  430. float remquof(float __x, float __y, int *__quo) {
  431.   int __tmp;
  432. #ifdef __OPENMP_AMDGCN__
  433. #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
  434. #endif
  435.   float __r = __ocml_remquo_f32(
  436.       __x, __y, (__attribute__((address_space(5))) int *)&__tmp);
  437.   *__quo = __tmp;
  438.  
  439.   return __r;
  440. }
  441.  
  442. __DEVICE__
  443. float rhypotf(float __x, float __y) { return __ocml_rhypot_f32(__x, __y); }
  444.  
  445. __DEVICE__
  446. float rintf(float __x) { return __ocml_rint_f32(__x); }
  447.  
  448. __DEVICE__
  449. float rnorm3df(float __x, float __y, float __z) {
  450.   return __ocml_rlen3_f32(__x, __y, __z);
  451. }
  452.  
  453. __DEVICE__
  454. float rnorm4df(float __x, float __y, float __z, float __w) {
  455.   return __ocml_rlen4_f32(__x, __y, __z, __w);
  456. }
  457.  
  458. __DEVICE__
  459. float rnormf(int __dim,
  460.              const float *__a) { // TODO: placeholder until OCML adds support.
  461.   float __r = 0;
  462.   while (__dim--) {
  463.     __r += __a[0] * __a[0];
  464.     ++__a;
  465.   }
  466.  
  467.   return __ocml_rsqrt_f32(__r);
  468. }
  469.  
  470. __DEVICE__
  471. float roundf(float __x) { return __ocml_round_f32(__x); }
  472.  
  473. __DEVICE__
  474. float rsqrtf(float __x) { return __ocml_rsqrt_f32(__x); }
  475.  
  476. __DEVICE__
  477. float scalblnf(float __x, long int __n) {
  478.   return (__n < INT_MAX) ? __ocml_scalbn_f32(__x, __n)
  479.                          : __ocml_scalb_f32(__x, __n);
  480. }
  481.  
  482. __DEVICE__
  483. float scalbnf(float __x, int __n) { return __ocml_scalbn_f32(__x, __n); }
  484.  
  485. __DEVICE__
  486. __RETURN_TYPE __signbitf(float __x) { return __ocml_signbit_f32(__x); }
  487.  
  488. __DEVICE__
  489. void sincosf(float __x, float *__sinptr, float *__cosptr) {
  490.   float __tmp;
  491. #ifdef __OPENMP_AMDGCN__
  492. #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
  493. #endif
  494.   *__sinptr =
  495.       __ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
  496.   *__cosptr = __tmp;
  497. }
  498.  
  499. __DEVICE__
  500. void sincospif(float __x, float *__sinptr, float *__cosptr) {
  501.   float __tmp;
  502. #ifdef __OPENMP_AMDGCN__
  503. #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
  504. #endif
  505.   *__sinptr = __ocml_sincospi_f32(
  506.       __x, (__attribute__((address_space(5))) float *)&__tmp);
  507.   *__cosptr = __tmp;
  508. }
  509.  
  510. __DEVICE__
  511. float sinf(float __x) { return __ocml_sin_f32(__x); }
  512.  
  513. __DEVICE__
  514. float sinhf(float __x) { return __ocml_sinh_f32(__x); }
  515.  
  516. __DEVICE__
  517. float sinpif(float __x) { return __ocml_sinpi_f32(__x); }
  518.  
  519. __DEVICE__
  520. float sqrtf(float __x) { return __ocml_sqrt_f32(__x); }
  521.  
  522. __DEVICE__
  523. float tanf(float __x) { return __ocml_tan_f32(__x); }
  524.  
  525. __DEVICE__
  526. float tanhf(float __x) { return __ocml_tanh_f32(__x); }
  527.  
  528. __DEVICE__
  529. float tgammaf(float __x) { return __ocml_tgamma_f32(__x); }
  530.  
  531. __DEVICE__
  532. float truncf(float __x) { return __ocml_trunc_f32(__x); }
  533.  
  534. __DEVICE__
  535. float y0f(float __x) { return __ocml_y0_f32(__x); }
  536.  
  537. __DEVICE__
  538. float y1f(float __x) { return __ocml_y1_f32(__x); }
  539.  
  540. __DEVICE__
  541. float ynf(int __n, float __x) { // TODO: we could use Ahmes multiplication
  542.                                 // and the Miller & Brown algorithm
  543.   //       for linear recurrences to get O(log n) steps, but it's unclear if
  544.   //       it'd be beneficial in this case. Placeholder until OCML adds
  545.   //       support.
  546.   if (__n == 0)
  547.     return y0f(__x);
  548.   if (__n == 1)
  549.     return y1f(__x);
  550.  
  551.   float __x0 = y0f(__x);
  552.   float __x1 = y1f(__x);
  553.   for (int __i = 1; __i < __n; ++__i) {
  554.     float __x2 = (2 * __i) / __x * __x1 - __x0;
  555.     __x0 = __x1;
  556.     __x1 = __x2;
  557.   }
  558.  
  559.   return __x1;
  560. }
  561.  
  562. // BEGIN INTRINSICS
  563.  
  564. __DEVICE__
  565. float __cosf(float __x) { return __ocml_native_cos_f32(__x); }
  566.  
  567. __DEVICE__
  568. float __exp10f(float __x) { return __ocml_native_exp10_f32(__x); }
  569.  
  570. __DEVICE__
  571. float __expf(float __x) { return __ocml_native_exp_f32(__x); }
  572.  
  573. #if defined OCML_BASIC_ROUNDED_OPERATIONS
  574. __DEVICE__
  575. float __fadd_rd(float __x, float __y) { return __ocml_add_rtn_f32(__x, __y); }
  576. __DEVICE__
  577. float __fadd_rn(float __x, float __y) { return __ocml_add_rte_f32(__x, __y); }
  578. __DEVICE__
  579. float __fadd_ru(float __x, float __y) { return __ocml_add_rtp_f32(__x, __y); }
  580. __DEVICE__
  581. float __fadd_rz(float __x, float __y) { return __ocml_add_rtz_f32(__x, __y); }
  582. #else
  583. __DEVICE__
  584. float __fadd_rn(float __x, float __y) { return __x + __y; }
  585. #endif
  586.  
  587. #if defined OCML_BASIC_ROUNDED_OPERATIONS
  588. __DEVICE__
  589. float __fdiv_rd(float __x, float __y) { return __ocml_div_rtn_f32(__x, __y); }
  590. __DEVICE__
  591. float __fdiv_rn(float __x, float __y) { return __ocml_div_rte_f32(__x, __y); }
  592. __DEVICE__
  593. float __fdiv_ru(float __x, float __y) { return __ocml_div_rtp_f32(__x, __y); }
  594. __DEVICE__
  595. float __fdiv_rz(float __x, float __y) { return __ocml_div_rtz_f32(__x, __y); }
  596. #else
  597. __DEVICE__
  598. float __fdiv_rn(float __x, float __y) { return __x / __y; }
  599. #endif
  600.  
  601. __DEVICE__
  602. float __fdividef(float __x, float __y) { return __x / __y; }
  603.  
  604. #if defined OCML_BASIC_ROUNDED_OPERATIONS
  605. __DEVICE__
  606. float __fmaf_rd(float __x, float __y, float __z) {
  607.   return __ocml_fma_rtn_f32(__x, __y, __z);
  608. }
  609. __DEVICE__
  610. float __fmaf_rn(float __x, float __y, float __z) {
  611.   return __ocml_fma_rte_f32(__x, __y, __z);
  612. }
  613. __DEVICE__
  614. float __fmaf_ru(float __x, float __y, float __z) {
  615.   return __ocml_fma_rtp_f32(__x, __y, __z);
  616. }
  617. __DEVICE__
  618. float __fmaf_rz(float __x, float __y, float __z) {
  619.   return __ocml_fma_rtz_f32(__x, __y, __z);
  620. }
  621. #else
  622. __DEVICE__
  623. float __fmaf_rn(float __x, float __y, float __z) {
  624.   return __ocml_fma_f32(__x, __y, __z);
  625. }
  626. #endif
  627.  
  628. #if defined OCML_BASIC_ROUNDED_OPERATIONS
  629. __DEVICE__
  630. float __fmul_rd(float __x, float __y) { return __ocml_mul_rtn_f32(__x, __y); }
  631. __DEVICE__
  632. float __fmul_rn(float __x, float __y) { return __ocml_mul_rte_f32(__x, __y); }
  633. __DEVICE__
  634. float __fmul_ru(float __x, float __y) { return __ocml_mul_rtp_f32(__x, __y); }
  635. __DEVICE__
  636. float __fmul_rz(float __x, float __y) { return __ocml_mul_rtz_f32(__x, __y); }
  637. #else
  638. __DEVICE__
  639. float __fmul_rn(float __x, float __y) { return __x * __y; }
  640. #endif
  641.  
  642. #if defined OCML_BASIC_ROUNDED_OPERATIONS
  643. __DEVICE__
  644. float __frcp_rd(float __x) { return __ocml_div_rtn_f32(1.0f, __x); }
  645. __DEVICE__
  646. float __frcp_rn(float __x) { return __ocml_div_rte_f32(1.0f, __x); }
  647. __DEVICE__
  648. float __frcp_ru(float __x) { return __ocml_div_rtp_f32(1.0f, __x); }
  649. __DEVICE__
  650. float __frcp_rz(float __x) { return __ocml_div_rtz_f32(1.0f, __x); }
  651. #else
  652. __DEVICE__
  653. float __frcp_rn(float __x) { return 1.0f / __x; }
  654. #endif
  655.  
  656. __DEVICE__
  657. float __frsqrt_rn(float __x) { return __llvm_amdgcn_rsq_f32(__x); }
  658.  
  659. #if defined OCML_BASIC_ROUNDED_OPERATIONS
  660. __DEVICE__
  661. float __fsqrt_rd(float __x) { return __ocml_sqrt_rtn_f32(__x); }
  662. __DEVICE__
  663. float __fsqrt_rn(float __x) { return __ocml_sqrt_rte_f32(__x); }
  664. __DEVICE__
  665. float __fsqrt_ru(float __x) { return __ocml_sqrt_rtp_f32(__x); }
  666. __DEVICE__
  667. float __fsqrt_rz(float __x) { return __ocml_sqrt_rtz_f32(__x); }
  668. #else
  669. __DEVICE__
  670. float __fsqrt_rn(float __x) { return __ocml_native_sqrt_f32(__x); }
  671. #endif
  672.  
  673. #if defined OCML_BASIC_ROUNDED_OPERATIONS
  674. __DEVICE__
  675. float __fsub_rd(float __x, float __y) { return __ocml_sub_rtn_f32(__x, __y); }
  676. __DEVICE__
  677. float __fsub_rn(float __x, float __y) { return __ocml_sub_rte_f32(__x, __y); }
  678. __DEVICE__
  679. float __fsub_ru(float __x, float __y) { return __ocml_sub_rtp_f32(__x, __y); }
  680. __DEVICE__
  681. float __fsub_rz(float __x, float __y) { return __ocml_sub_rtz_f32(__x, __y); }
  682. #else
  683. __DEVICE__
  684. float __fsub_rn(float __x, float __y) { return __x - __y; }
  685. #endif
  686.  
  687. __DEVICE__
  688. float __log10f(float __x) { return __ocml_native_log10_f32(__x); }
  689.  
  690. __DEVICE__
  691. float __log2f(float __x) { return __ocml_native_log2_f32(__x); }
  692.  
  693. __DEVICE__
  694. float __logf(float __x) { return __ocml_native_log_f32(__x); }
  695.  
  696. __DEVICE__
  697. float __powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); }
  698.  
  699. __DEVICE__
  700. float __saturatef(float __x) { return (__x < 0) ? 0 : ((__x > 1) ? 1 : __x); }
  701.  
  702. __DEVICE__
  703. void __sincosf(float __x, float *__sinptr, float *__cosptr) {
  704.   *__sinptr = __ocml_native_sin_f32(__x);
  705.   *__cosptr = __ocml_native_cos_f32(__x);
  706. }
  707.  
  708. __DEVICE__
  709. float __sinf(float __x) { return __ocml_native_sin_f32(__x); }
  710.  
  711. __DEVICE__
  712. float __tanf(float __x) { return __ocml_tan_f32(__x); }
  713. // END INTRINSICS
  714. // END FLOAT
  715.  
  716. // BEGIN DOUBLE
  717. __DEVICE__
  718. double acos(double __x) { return __ocml_acos_f64(__x); }
  719.  
  720. __DEVICE__
  721. double acosh(double __x) { return __ocml_acosh_f64(__x); }
  722.  
  723. __DEVICE__
  724. double asin(double __x) { return __ocml_asin_f64(__x); }
  725.  
  726. __DEVICE__
  727. double asinh(double __x) { return __ocml_asinh_f64(__x); }
  728.  
  729. __DEVICE__
  730. double atan(double __x) { return __ocml_atan_f64(__x); }
  731.  
  732. __DEVICE__
  733. double atan2(double __x, double __y) { return __ocml_atan2_f64(__x, __y); }
  734.  
  735. __DEVICE__
  736. double atanh(double __x) { return __ocml_atanh_f64(__x); }
  737.  
  738. __DEVICE__
  739. double cbrt(double __x) { return __ocml_cbrt_f64(__x); }
  740.  
  741. __DEVICE__
  742. double ceil(double __x) { return __ocml_ceil_f64(__x); }
  743.  
  744. __DEVICE__
  745. double copysign(double __x, double __y) {
  746.   return __ocml_copysign_f64(__x, __y);
  747. }
  748.  
  749. __DEVICE__
  750. double cos(double __x) { return __ocml_cos_f64(__x); }
  751.  
  752. __DEVICE__
  753. double cosh(double __x) { return __ocml_cosh_f64(__x); }
  754.  
  755. __DEVICE__
  756. double cospi(double __x) { return __ocml_cospi_f64(__x); }
  757.  
  758. __DEVICE__
  759. double cyl_bessel_i0(double __x) { return __ocml_i0_f64(__x); }
  760.  
  761. __DEVICE__
  762. double cyl_bessel_i1(double __x) { return __ocml_i1_f64(__x); }
  763.  
  764. __DEVICE__
  765. double erf(double __x) { return __ocml_erf_f64(__x); }
  766.  
  767. __DEVICE__
  768. double erfc(double __x) { return __ocml_erfc_f64(__x); }
  769.  
  770. __DEVICE__
  771. double erfcinv(double __x) { return __ocml_erfcinv_f64(__x); }
  772.  
  773. __DEVICE__
  774. double erfcx(double __x) { return __ocml_erfcx_f64(__x); }
  775.  
  776. __DEVICE__
  777. double erfinv(double __x) { return __ocml_erfinv_f64(__x); }
  778.  
  779. __DEVICE__
  780. double exp(double __x) { return __ocml_exp_f64(__x); }
  781.  
  782. __DEVICE__
  783. double exp10(double __x) { return __ocml_exp10_f64(__x); }
  784.  
  785. __DEVICE__
  786. double exp2(double __x) { return __ocml_exp2_f64(__x); }
  787.  
  788. __DEVICE__
  789. double expm1(double __x) { return __ocml_expm1_f64(__x); }
  790.  
  791. __DEVICE__
  792. double fabs(double __x) { return __builtin_fabs(__x); }
  793.  
  794. __DEVICE__
  795. double fdim(double __x, double __y) { return __ocml_fdim_f64(__x, __y); }
  796.  
  797. __DEVICE__
  798. double floor(double __x) { return __ocml_floor_f64(__x); }
  799.  
  800. __DEVICE__
  801. double fma(double __x, double __y, double __z) {
  802.   return __ocml_fma_f64(__x, __y, __z);
  803. }
  804.  
  805. __DEVICE__
  806. double fmax(double __x, double __y) { return __ocml_fmax_f64(__x, __y); }
  807.  
  808. __DEVICE__
  809. double fmin(double __x, double __y) { return __ocml_fmin_f64(__x, __y); }
  810.  
  811. __DEVICE__
  812. double fmod(double __x, double __y) { return __ocml_fmod_f64(__x, __y); }
  813.  
  814. __DEVICE__
  815. double frexp(double __x, int *__nptr) {
  816.   int __tmp;
  817. #ifdef __OPENMP_AMDGCN__
  818. #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
  819. #endif
  820.   double __r =
  821.       __ocml_frexp_f64(__x, (__attribute__((address_space(5))) int *)&__tmp);
  822.   *__nptr = __tmp;
  823.   return __r;
  824. }
  825.  
  826. __DEVICE__
  827. double hypot(double __x, double __y) { return __ocml_hypot_f64(__x, __y); }
  828.  
  829. __DEVICE__
  830. int ilogb(double __x) { return __ocml_ilogb_f64(__x); }
  831.  
  832. __DEVICE__
  833. __RETURN_TYPE __finite(double __x) { return __ocml_isfinite_f64(__x); }
  834.  
  835. __DEVICE__
  836. __RETURN_TYPE __isinf(double __x) { return __ocml_isinf_f64(__x); }
  837.  
  838. __DEVICE__
  839. __RETURN_TYPE __isnan(double __x) { return __ocml_isnan_f64(__x); }
  840.  
  841. __DEVICE__
  842. double j0(double __x) { return __ocml_j0_f64(__x); }
  843.  
  844. __DEVICE__
  845. double j1(double __x) { return __ocml_j1_f64(__x); }
  846.  
  847. __DEVICE__
  848. double jn(int __n, double __x) { // TODO: we could use Ahmes multiplication
  849.                                  // and the Miller & Brown algorithm
  850.   //       for linear recurrences to get O(log n) steps, but it's unclear if
  851.   //       it'd be beneficial in this case. Placeholder until OCML adds
  852.   //       support.
  853.   if (__n == 0)
  854.     return j0(__x);
  855.   if (__n == 1)
  856.     return j1(__x);
  857.  
  858.   double __x0 = j0(__x);
  859.   double __x1 = j1(__x);
  860.   for (int __i = 1; __i < __n; ++__i) {
  861.     double __x2 = (2 * __i) / __x * __x1 - __x0;
  862.     __x0 = __x1;
  863.     __x1 = __x2;
  864.   }
  865.   return __x1;
  866. }
  867.  
  868. __DEVICE__
  869. double ldexp(double __x, int __e) { return __ocml_ldexp_f64(__x, __e); }
  870.  
  871. __DEVICE__
  872. double lgamma(double __x) { return __ocml_lgamma_f64(__x); }
  873.  
  874. __DEVICE__
  875. long long int llrint(double __x) { return __ocml_rint_f64(__x); }
  876.  
  877. __DEVICE__
  878. long long int llround(double __x) { return __ocml_round_f64(__x); }
  879.  
  880. __DEVICE__
  881. double log(double __x) { return __ocml_log_f64(__x); }
  882.  
  883. __DEVICE__
  884. double log10(double __x) { return __ocml_log10_f64(__x); }
  885.  
  886. __DEVICE__
  887. double log1p(double __x) { return __ocml_log1p_f64(__x); }
  888.  
  889. __DEVICE__
  890. double log2(double __x) { return __ocml_log2_f64(__x); }
  891.  
  892. __DEVICE__
  893. double logb(double __x) { return __ocml_logb_f64(__x); }
  894.  
  895. __DEVICE__
  896. long int lrint(double __x) { return __ocml_rint_f64(__x); }
  897.  
  898. __DEVICE__
  899. long int lround(double __x) { return __ocml_round_f64(__x); }
  900.  
  901. __DEVICE__
  902. double modf(double __x, double *__iptr) {
  903.   double __tmp;
  904. #ifdef __OPENMP_AMDGCN__
  905. #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
  906. #endif
  907.   double __r =
  908.       __ocml_modf_f64(__x, (__attribute__((address_space(5))) double *)&__tmp);
  909.   *__iptr = __tmp;
  910.  
  911.   return __r;
  912. }
  913.  
  914. __DEVICE__
  915. double nan(const char *__tagp) {
  916. #if !_WIN32
  917.   union {
  918.     double val;
  919.     struct ieee_double {
  920.       uint64_t mantissa : 51;
  921.       uint32_t quiet : 1;
  922.       uint32_t exponent : 11;
  923.       uint32_t sign : 1;
  924.     } bits;
  925.   } __tmp;
  926.   __static_assert_type_size_equal(sizeof(__tmp.val), sizeof(__tmp.bits));
  927.  
  928.   __tmp.bits.sign = 0u;
  929.   __tmp.bits.exponent = ~0u;
  930.   __tmp.bits.quiet = 1u;
  931.   __tmp.bits.mantissa = __make_mantissa(__tagp);
  932.  
  933.   return __tmp.val;
  934. #else
  935.   __static_assert_type_size_equal(sizeof(uint64_t), sizeof(double));
  936.   uint64_t __val = __make_mantissa(__tagp);
  937.   __val |= 0xFFF << 51;
  938.   return *reinterpret_cast<double *>(&__val);
  939. #endif
  940. }
  941.  
  942. __DEVICE__
  943. double nearbyint(double __x) { return __ocml_nearbyint_f64(__x); }
  944.  
  945. __DEVICE__
  946. double nextafter(double __x, double __y) {
  947.   return __ocml_nextafter_f64(__x, __y);
  948. }
  949.  
  950. __DEVICE__
  951. double norm(int __dim,
  952.             const double *__a) { // TODO: placeholder until OCML adds support.
  953.   double __r = 0;
  954.   while (__dim--) {
  955.     __r += __a[0] * __a[0];
  956.     ++__a;
  957.   }
  958.  
  959.   return __ocml_sqrt_f64(__r);
  960. }
  961.  
  962. __DEVICE__
  963. double norm3d(double __x, double __y, double __z) {
  964.   return __ocml_len3_f64(__x, __y, __z);
  965. }
  966.  
  967. __DEVICE__
  968. double norm4d(double __x, double __y, double __z, double __w) {
  969.   return __ocml_len4_f64(__x, __y, __z, __w);
  970. }
  971.  
  972. __DEVICE__
  973. double normcdf(double __x) { return __ocml_ncdf_f64(__x); }
  974.  
  975. __DEVICE__
  976. double normcdfinv(double __x) { return __ocml_ncdfinv_f64(__x); }
  977.  
  978. __DEVICE__
  979. double pow(double __x, double __y) { return __ocml_pow_f64(__x, __y); }
  980.  
  981. __DEVICE__
  982. double powi(double __x, int __y) { return __ocml_pown_f64(__x, __y); }
  983.  
  984. __DEVICE__
  985. double rcbrt(double __x) { return __ocml_rcbrt_f64(__x); }
  986.  
  987. __DEVICE__
  988. double remainder(double __x, double __y) {
  989.   return __ocml_remainder_f64(__x, __y);
  990. }
  991.  
  992. __DEVICE__
  993. double remquo(double __x, double __y, int *__quo) {
  994.   int __tmp;
  995. #ifdef __OPENMP_AMDGCN__
  996. #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
  997. #endif
  998.   double __r = __ocml_remquo_f64(
  999.       __x, __y, (__attribute__((address_space(5))) int *)&__tmp);
  1000.   *__quo = __tmp;
  1001.  
  1002.   return __r;
  1003. }
  1004.  
  1005. __DEVICE__
  1006. double rhypot(double __x, double __y) { return __ocml_rhypot_f64(__x, __y); }
  1007.  
  1008. __DEVICE__
  1009. double rint(double __x) { return __ocml_rint_f64(__x); }
  1010.  
  1011. __DEVICE__
  1012. double rnorm(int __dim,
  1013.              const double *__a) { // TODO: placeholder until OCML adds support.
  1014.   double __r = 0;
  1015.   while (__dim--) {
  1016.     __r += __a[0] * __a[0];
  1017.     ++__a;
  1018.   }
  1019.  
  1020.   return __ocml_rsqrt_f64(__r);
  1021. }
  1022.  
  1023. __DEVICE__
  1024. double rnorm3d(double __x, double __y, double __z) {
  1025.   return __ocml_rlen3_f64(__x, __y, __z);
  1026. }
  1027.  
  1028. __DEVICE__
  1029. double rnorm4d(double __x, double __y, double __z, double __w) {
  1030.   return __ocml_rlen4_f64(__x, __y, __z, __w);
  1031. }
  1032.  
  1033. __DEVICE__
  1034. double round(double __x) { return __ocml_round_f64(__x); }
  1035.  
  1036. __DEVICE__
  1037. double rsqrt(double __x) { return __ocml_rsqrt_f64(__x); }
  1038.  
  1039. __DEVICE__
  1040. double scalbln(double __x, long int __n) {
  1041.   return (__n < INT_MAX) ? __ocml_scalbn_f64(__x, __n)
  1042.                          : __ocml_scalb_f64(__x, __n);
  1043. }
  1044. __DEVICE__
  1045. double scalbn(double __x, int __n) { return __ocml_scalbn_f64(__x, __n); }
  1046.  
  1047. __DEVICE__
  1048. __RETURN_TYPE __signbit(double __x) { return __ocml_signbit_f64(__x); }
  1049.  
  1050. __DEVICE__
  1051. double sin(double __x) { return __ocml_sin_f64(__x); }
  1052.  
  1053. __DEVICE__
  1054. void sincos(double __x, double *__sinptr, double *__cosptr) {
  1055.   double __tmp;
  1056. #ifdef __OPENMP_AMDGCN__
  1057. #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
  1058. #endif
  1059.   *__sinptr = __ocml_sincos_f64(
  1060.       __x, (__attribute__((address_space(5))) double *)&__tmp);
  1061.   *__cosptr = __tmp;
  1062. }
  1063.  
  1064. __DEVICE__
  1065. void sincospi(double __x, double *__sinptr, double *__cosptr) {
  1066.   double __tmp;
  1067. #ifdef __OPENMP_AMDGCN__
  1068. #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
  1069. #endif
  1070.   *__sinptr = __ocml_sincospi_f64(
  1071.       __x, (__attribute__((address_space(5))) double *)&__tmp);
  1072.   *__cosptr = __tmp;
  1073. }
  1074.  
  1075. __DEVICE__
  1076. double sinh(double __x) { return __ocml_sinh_f64(__x); }
  1077.  
  1078. __DEVICE__
  1079. double sinpi(double __x) { return __ocml_sinpi_f64(__x); }
  1080.  
  1081. __DEVICE__
  1082. double sqrt(double __x) { return __ocml_sqrt_f64(__x); }
  1083.  
  1084. __DEVICE__
  1085. double tan(double __x) { return __ocml_tan_f64(__x); }
  1086.  
  1087. __DEVICE__
  1088. double tanh(double __x) { return __ocml_tanh_f64(__x); }
  1089.  
  1090. __DEVICE__
  1091. double tgamma(double __x) { return __ocml_tgamma_f64(__x); }
  1092.  
  1093. __DEVICE__
  1094. double trunc(double __x) { return __ocml_trunc_f64(__x); }
  1095.  
  1096. __DEVICE__
  1097. double y0(double __x) { return __ocml_y0_f64(__x); }
  1098.  
  1099. __DEVICE__
  1100. double y1(double __x) { return __ocml_y1_f64(__x); }
  1101.  
  1102. __DEVICE__
  1103. double yn(int __n, double __x) { // TODO: we could use Ahmes multiplication
  1104.                                  // and the Miller & Brown algorithm
  1105.   //       for linear recurrences to get O(log n) steps, but it's unclear if
  1106.   //       it'd be beneficial in this case. Placeholder until OCML adds
  1107.   //       support.
  1108.   if (__n == 0)
  1109.     return y0(__x);
  1110.   if (__n == 1)
  1111.     return y1(__x);
  1112.  
  1113.   double __x0 = y0(__x);
  1114.   double __x1 = y1(__x);
  1115.   for (int __i = 1; __i < __n; ++__i) {
  1116.     double __x2 = (2 * __i) / __x * __x1 - __x0;
  1117.     __x0 = __x1;
  1118.     __x1 = __x2;
  1119.   }
  1120.  
  1121.   return __x1;
  1122. }
  1123.  
  1124. // BEGIN INTRINSICS
  1125. #if defined OCML_BASIC_ROUNDED_OPERATIONS
  1126. __DEVICE__
  1127. double __dadd_rd(double __x, double __y) {
  1128.   return __ocml_add_rtn_f64(__x, __y);
  1129. }
  1130. __DEVICE__
  1131. double __dadd_rn(double __x, double __y) {
  1132.   return __ocml_add_rte_f64(__x, __y);
  1133. }
  1134. __DEVICE__
  1135. double __dadd_ru(double __x, double __y) {
  1136.   return __ocml_add_rtp_f64(__x, __y);
  1137. }
  1138. __DEVICE__
  1139. double __dadd_rz(double __x, double __y) {
  1140.   return __ocml_add_rtz_f64(__x, __y);
  1141. }
  1142. #else
  1143. __DEVICE__
  1144. double __dadd_rn(double __x, double __y) { return __x + __y; }
  1145. #endif
  1146.  
  1147. #if defined OCML_BASIC_ROUNDED_OPERATIONS
  1148. __DEVICE__
  1149. double __ddiv_rd(double __x, double __y) {
  1150.   return __ocml_div_rtn_f64(__x, __y);
  1151. }
  1152. __DEVICE__
  1153. double __ddiv_rn(double __x, double __y) {
  1154.   return __ocml_div_rte_f64(__x, __y);
  1155. }
  1156. __DEVICE__
  1157. double __ddiv_ru(double __x, double __y) {
  1158.   return __ocml_div_rtp_f64(__x, __y);
  1159. }
  1160. __DEVICE__
  1161. double __ddiv_rz(double __x, double __y) {
  1162.   return __ocml_div_rtz_f64(__x, __y);
  1163. }
  1164. #else
  1165. __DEVICE__
  1166. double __ddiv_rn(double __x, double __y) { return __x / __y; }
  1167. #endif
  1168.  
  1169. #if defined OCML_BASIC_ROUNDED_OPERATIONS
  1170. __DEVICE__
  1171. double __dmul_rd(double __x, double __y) {
  1172.   return __ocml_mul_rtn_f64(__x, __y);
  1173. }
  1174. __DEVICE__
  1175. double __dmul_rn(double __x, double __y) {
  1176.   return __ocml_mul_rte_f64(__x, __y);
  1177. }
  1178. __DEVICE__
  1179. double __dmul_ru(double __x, double __y) {
  1180.   return __ocml_mul_rtp_f64(__x, __y);
  1181. }
  1182. __DEVICE__
  1183. double __dmul_rz(double __x, double __y) {
  1184.   return __ocml_mul_rtz_f64(__x, __y);
  1185. }
  1186. #else
  1187. __DEVICE__
  1188. double __dmul_rn(double __x, double __y) { return __x * __y; }
  1189. #endif
  1190.  
  1191. #if defined OCML_BASIC_ROUNDED_OPERATIONS
  1192. __DEVICE__
  1193. double __drcp_rd(double __x) { return __ocml_div_rtn_f64(1.0, __x); }
  1194. __DEVICE__
  1195. double __drcp_rn(double __x) { return __ocml_div_rte_f64(1.0, __x); }
  1196. __DEVICE__
  1197. double __drcp_ru(double __x) { return __ocml_div_rtp_f64(1.0, __x); }
  1198. __DEVICE__
  1199. double __drcp_rz(double __x) { return __ocml_div_rtz_f64(1.0, __x); }
  1200. #else
  1201. __DEVICE__
  1202. double __drcp_rn(double __x) { return 1.0 / __x; }
  1203. #endif
  1204.  
  1205. #if defined OCML_BASIC_ROUNDED_OPERATIONS
  1206. __DEVICE__
  1207. double __dsqrt_rd(double __x) { return __ocml_sqrt_rtn_f64(__x); }
  1208. __DEVICE__
  1209. double __dsqrt_rn(double __x) { return __ocml_sqrt_rte_f64(__x); }
  1210. __DEVICE__
  1211. double __dsqrt_ru(double __x) { return __ocml_sqrt_rtp_f64(__x); }
  1212. __DEVICE__
  1213. double __dsqrt_rz(double __x) { return __ocml_sqrt_rtz_f64(__x); }
  1214. #else
  1215. __DEVICE__
  1216. double __dsqrt_rn(double __x) { return __ocml_sqrt_f64(__x); }
  1217. #endif
  1218.  
  1219. #if defined OCML_BASIC_ROUNDED_OPERATIONS
  1220. __DEVICE__
  1221. double __dsub_rd(double __x, double __y) {
  1222.   return __ocml_sub_rtn_f64(__x, __y);
  1223. }
  1224. __DEVICE__
  1225. double __dsub_rn(double __x, double __y) {
  1226.   return __ocml_sub_rte_f64(__x, __y);
  1227. }
  1228. __DEVICE__
  1229. double __dsub_ru(double __x, double __y) {
  1230.   return __ocml_sub_rtp_f64(__x, __y);
  1231. }
  1232. __DEVICE__
  1233. double __dsub_rz(double __x, double __y) {
  1234.   return __ocml_sub_rtz_f64(__x, __y);
  1235. }
  1236. #else
  1237. __DEVICE__
  1238. double __dsub_rn(double __x, double __y) { return __x - __y; }
  1239. #endif
  1240.  
  1241. #if defined OCML_BASIC_ROUNDED_OPERATIONS
  1242. __DEVICE__
  1243. double __fma_rd(double __x, double __y, double __z) {
  1244.   return __ocml_fma_rtn_f64(__x, __y, __z);
  1245. }
  1246. __DEVICE__
  1247. double __fma_rn(double __x, double __y, double __z) {
  1248.   return __ocml_fma_rte_f64(__x, __y, __z);
  1249. }
  1250. __DEVICE__
  1251. double __fma_ru(double __x, double __y, double __z) {
  1252.   return __ocml_fma_rtp_f64(__x, __y, __z);
  1253. }
  1254. __DEVICE__
  1255. double __fma_rz(double __x, double __y, double __z) {
  1256.   return __ocml_fma_rtz_f64(__x, __y, __z);
  1257. }
  1258. #else
  1259. __DEVICE__
  1260. double __fma_rn(double __x, double __y, double __z) {
  1261.   return __ocml_fma_f64(__x, __y, __z);
  1262. }
  1263. #endif
  1264. // END INTRINSICS
  1265. // END DOUBLE
  1266.  
  1267. // C only macros
  1268. #if !defined(__cplusplus) && __STDC_VERSION__ >= 201112L
  1269. #define isfinite(__x) _Generic((__x), float : __finitef, double : __finite)(__x)
  1270. #define isinf(__x) _Generic((__x), float : __isinff, double : __isinf)(__x)
  1271. #define isnan(__x) _Generic((__x), float : __isnanf, double : __isnan)(__x)
  1272. #define signbit(__x)                                                           \
  1273.   _Generic((__x), float : __signbitf, double : __signbit)(__x)
  1274. #endif // !defined(__cplusplus) && __STDC_VERSION__ >= 201112L
  1275.  
  1276. #if defined(__cplusplus)
  1277. template <class T> __DEVICE__ T min(T __arg1, T __arg2) {
  1278.   return (__arg1 < __arg2) ? __arg1 : __arg2;
  1279. }
  1280.  
  1281. template <class T> __DEVICE__ T max(T __arg1, T __arg2) {
  1282.   return (__arg1 > __arg2) ? __arg1 : __arg2;
  1283. }
  1284.  
  1285. __DEVICE__ int min(int __arg1, int __arg2) {
  1286.   return (__arg1 < __arg2) ? __arg1 : __arg2;
  1287. }
  1288. __DEVICE__ int max(int __arg1, int __arg2) {
  1289.   return (__arg1 > __arg2) ? __arg1 : __arg2;
  1290. }
  1291.  
  1292. __DEVICE__
  1293. float max(float __x, float __y) { return fmaxf(__x, __y); }
  1294.  
  1295. __DEVICE__
  1296. double max(double __x, double __y) { return fmax(__x, __y); }
  1297.  
  1298. __DEVICE__
  1299. float min(float __x, float __y) { return fminf(__x, __y); }
  1300.  
  1301. __DEVICE__
  1302. double min(double __x, double __y) { return fmin(__x, __y); }
  1303.  
  1304. #if !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
  1305. __host__ inline static int min(int __arg1, int __arg2) {
  1306.   return std::min(__arg1, __arg2);
  1307. }
  1308.  
  1309. __host__ inline static int max(int __arg1, int __arg2) {
  1310.   return std::max(__arg1, __arg2);
  1311. }
  1312. #endif // !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
  1313. #endif
  1314.  
  1315. #pragma pop_macro("__DEVICE__")
  1316. #pragma pop_macro("__RETURN_TYPE")
  1317.  
  1318. #endif // __CLANG_HIP_MATH_H__
  1319.