Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. /*===-- __clang_cuda_complex_builtins - CUDA impls of runtime complex fns ---===
  2.  *
  3.  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4.  * See https://llvm.org/LICENSE.txt for license information.
  5.  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6.  *
  7.  *===-----------------------------------------------------------------------===
  8.  */
  9.  
  10. #ifndef __CLANG_CUDA_COMPLEX_BUILTINS
  11. #define __CLANG_CUDA_COMPLEX_BUILTINS
  12.  
  13. // This header defines __muldc3, __mulsc3, __divdc3, and __divsc3.  These are
  14. // libgcc functions that clang assumes are available when compiling c99 complex
  15. // operations.  (These implementations come from libc++, and have been modified
  16. // to work with CUDA and OpenMP target offloading [in C and C++ mode].)
  17.  
  18. #pragma push_macro("__DEVICE__")
  19. #if defined(__OPENMP_NVPTX__) || defined(__OPENMP_AMDGCN__)
  20. #pragma omp declare target
  21. #define __DEVICE__ __attribute__((noinline, nothrow, cold, weak))
  22. #else
  23. #define __DEVICE__ __device__ inline
  24. #endif
  25.  
  26. // To make the algorithms available for C and C++ in CUDA and OpenMP we select
  27. // different but equivalent function versions. TODO: For OpenMP we currently
  28. // select the native builtins as the overload support for templates is lacking.
  29. #if !defined(__OPENMP_NVPTX__) && !defined(__OPENMP_AMDGCN__)
  30. #define _ISNANd std::isnan
  31. #define _ISNANf std::isnan
  32. #define _ISINFd std::isinf
  33. #define _ISINFf std::isinf
  34. #define _ISFINITEd std::isfinite
  35. #define _ISFINITEf std::isfinite
  36. #define _COPYSIGNd std::copysign
  37. #define _COPYSIGNf std::copysign
  38. #define _SCALBNd std::scalbn
  39. #define _SCALBNf std::scalbn
  40. #define _ABSd std::abs
  41. #define _ABSf std::abs
  42. #define _LOGBd std::logb
  43. #define _LOGBf std::logb
  44. // Rather than pulling in std::max from algorithm everytime, use available ::max.
  45. #define _fmaxd max
  46. #define _fmaxf max
  47. #else
  48. #ifdef __AMDGCN__
  49. #define _ISNANd __ocml_isnan_f64
  50. #define _ISNANf __ocml_isnan_f32
  51. #define _ISINFd __ocml_isinf_f64
  52. #define _ISINFf __ocml_isinf_f32
  53. #define _ISFINITEd __ocml_isfinite_f64
  54. #define _ISFINITEf __ocml_isfinite_f32
  55. #define _COPYSIGNd __ocml_copysign_f64
  56. #define _COPYSIGNf __ocml_copysign_f32
  57. #define _SCALBNd __ocml_scalbn_f64
  58. #define _SCALBNf __ocml_scalbn_f32
  59. #define _ABSd __ocml_fabs_f64
  60. #define _ABSf __ocml_fabs_f32
  61. #define _LOGBd __ocml_logb_f64
  62. #define _LOGBf __ocml_logb_f32
  63. #define _fmaxd __ocml_fmax_f64
  64. #define _fmaxf __ocml_fmax_f32
  65. #else
  66. #define _ISNANd __nv_isnand
  67. #define _ISNANf __nv_isnanf
  68. #define _ISINFd __nv_isinfd
  69. #define _ISINFf __nv_isinff
  70. #define _ISFINITEd __nv_isfinited
  71. #define _ISFINITEf __nv_finitef
  72. #define _COPYSIGNd __nv_copysign
  73. #define _COPYSIGNf __nv_copysignf
  74. #define _SCALBNd __nv_scalbn
  75. #define _SCALBNf __nv_scalbnf
  76. #define _ABSd __nv_fabs
  77. #define _ABSf __nv_fabsf
  78. #define _LOGBd __nv_logb
  79. #define _LOGBf __nv_logbf
  80. #define _fmaxd __nv_fmax
  81. #define _fmaxf __nv_fmaxf
  82. #endif
  83. #endif
  84.  
  85. #if defined(__cplusplus)
  86. extern "C" {
  87. #endif
  88.  
  89. __DEVICE__ double _Complex __muldc3(double __a, double __b, double __c,
  90.                                     double __d) {
  91.   double __ac = __a * __c;
  92.   double __bd = __b * __d;
  93.   double __ad = __a * __d;
  94.   double __bc = __b * __c;
  95.   double _Complex z;
  96.   __real__(z) = __ac - __bd;
  97.   __imag__(z) = __ad + __bc;
  98.   if (_ISNANd(__real__(z)) && _ISNANd(__imag__(z))) {
  99.     int __recalc = 0;
  100.     if (_ISINFd(__a) || _ISINFd(__b)) {
  101.       __a = _COPYSIGNd(_ISINFd(__a) ? 1 : 0, __a);
  102.       __b = _COPYSIGNd(_ISINFd(__b) ? 1 : 0, __b);
  103.       if (_ISNANd(__c))
  104.         __c = _COPYSIGNd(0, __c);
  105.       if (_ISNANd(__d))
  106.         __d = _COPYSIGNd(0, __d);
  107.       __recalc = 1;
  108.     }
  109.     if (_ISINFd(__c) || _ISINFd(__d)) {
  110.       __c = _COPYSIGNd(_ISINFd(__c) ? 1 : 0, __c);
  111.       __d = _COPYSIGNd(_ISINFd(__d) ? 1 : 0, __d);
  112.       if (_ISNANd(__a))
  113.         __a = _COPYSIGNd(0, __a);
  114.       if (_ISNANd(__b))
  115.         __b = _COPYSIGNd(0, __b);
  116.       __recalc = 1;
  117.     }
  118.     if (!__recalc &&
  119.         (_ISINFd(__ac) || _ISINFd(__bd) || _ISINFd(__ad) || _ISINFd(__bc))) {
  120.       if (_ISNANd(__a))
  121.         __a = _COPYSIGNd(0, __a);
  122.       if (_ISNANd(__b))
  123.         __b = _COPYSIGNd(0, __b);
  124.       if (_ISNANd(__c))
  125.         __c = _COPYSIGNd(0, __c);
  126.       if (_ISNANd(__d))
  127.         __d = _COPYSIGNd(0, __d);
  128.       __recalc = 1;
  129.     }
  130.     if (__recalc) {
  131.       // Can't use std::numeric_limits<double>::infinity() -- that doesn't have
  132.       // a device overload (and isn't constexpr before C++11, naturally).
  133.       __real__(z) = __builtin_huge_val() * (__a * __c - __b * __d);
  134.       __imag__(z) = __builtin_huge_val() * (__a * __d + __b * __c);
  135.     }
  136.   }
  137.   return z;
  138. }
  139.  
  140. __DEVICE__ float _Complex __mulsc3(float __a, float __b, float __c, float __d) {
  141.   float __ac = __a * __c;
  142.   float __bd = __b * __d;
  143.   float __ad = __a * __d;
  144.   float __bc = __b * __c;
  145.   float _Complex z;
  146.   __real__(z) = __ac - __bd;
  147.   __imag__(z) = __ad + __bc;
  148.   if (_ISNANf(__real__(z)) && _ISNANf(__imag__(z))) {
  149.     int __recalc = 0;
  150.     if (_ISINFf(__a) || _ISINFf(__b)) {
  151.       __a = _COPYSIGNf(_ISINFf(__a) ? 1 : 0, __a);
  152.       __b = _COPYSIGNf(_ISINFf(__b) ? 1 : 0, __b);
  153.       if (_ISNANf(__c))
  154.         __c = _COPYSIGNf(0, __c);
  155.       if (_ISNANf(__d))
  156.         __d = _COPYSIGNf(0, __d);
  157.       __recalc = 1;
  158.     }
  159.     if (_ISINFf(__c) || _ISINFf(__d)) {
  160.       __c = _COPYSIGNf(_ISINFf(__c) ? 1 : 0, __c);
  161.       __d = _COPYSIGNf(_ISINFf(__d) ? 1 : 0, __d);
  162.       if (_ISNANf(__a))
  163.         __a = _COPYSIGNf(0, __a);
  164.       if (_ISNANf(__b))
  165.         __b = _COPYSIGNf(0, __b);
  166.       __recalc = 1;
  167.     }
  168.     if (!__recalc &&
  169.         (_ISINFf(__ac) || _ISINFf(__bd) || _ISINFf(__ad) || _ISINFf(__bc))) {
  170.       if (_ISNANf(__a))
  171.         __a = _COPYSIGNf(0, __a);
  172.       if (_ISNANf(__b))
  173.         __b = _COPYSIGNf(0, __b);
  174.       if (_ISNANf(__c))
  175.         __c = _COPYSIGNf(0, __c);
  176.       if (_ISNANf(__d))
  177.         __d = _COPYSIGNf(0, __d);
  178.       __recalc = 1;
  179.     }
  180.     if (__recalc) {
  181.       __real__(z) = __builtin_huge_valf() * (__a * __c - __b * __d);
  182.       __imag__(z) = __builtin_huge_valf() * (__a * __d + __b * __c);
  183.     }
  184.   }
  185.   return z;
  186. }
  187.  
  188. __DEVICE__ double _Complex __divdc3(double __a, double __b, double __c,
  189.                                     double __d) {
  190.   int __ilogbw = 0;
  191.   // Can't use std::max, because that's defined in <algorithm>, and we don't
  192.   // want to pull that in for every compile.  The CUDA headers define
  193.   // ::max(float, float) and ::max(double, double), which is sufficient for us.
  194.   double __logbw = _LOGBd(_fmaxd(_ABSd(__c), _ABSd(__d)));
  195.   if (_ISFINITEd(__logbw)) {
  196.     __ilogbw = (int)__logbw;
  197.     __c = _SCALBNd(__c, -__ilogbw);
  198.     __d = _SCALBNd(__d, -__ilogbw);
  199.   }
  200.   double __denom = __c * __c + __d * __d;
  201.   double _Complex z;
  202.   __real__(z) = _SCALBNd((__a * __c + __b * __d) / __denom, -__ilogbw);
  203.   __imag__(z) = _SCALBNd((__b * __c - __a * __d) / __denom, -__ilogbw);
  204.   if (_ISNANd(__real__(z)) && _ISNANd(__imag__(z))) {
  205.     if ((__denom == 0.0) && (!_ISNANd(__a) || !_ISNANd(__b))) {
  206.       __real__(z) = _COPYSIGNd(__builtin_huge_val(), __c) * __a;
  207.       __imag__(z) = _COPYSIGNd(__builtin_huge_val(), __c) * __b;
  208.     } else if ((_ISINFd(__a) || _ISINFd(__b)) && _ISFINITEd(__c) &&
  209.                _ISFINITEd(__d)) {
  210.       __a = _COPYSIGNd(_ISINFd(__a) ? 1.0 : 0.0, __a);
  211.       __b = _COPYSIGNd(_ISINFd(__b) ? 1.0 : 0.0, __b);
  212.       __real__(z) = __builtin_huge_val() * (__a * __c + __b * __d);
  213.       __imag__(z) = __builtin_huge_val() * (__b * __c - __a * __d);
  214.     } else if (_ISINFd(__logbw) && __logbw > 0.0 && _ISFINITEd(__a) &&
  215.                _ISFINITEd(__b)) {
  216.       __c = _COPYSIGNd(_ISINFd(__c) ? 1.0 : 0.0, __c);
  217.       __d = _COPYSIGNd(_ISINFd(__d) ? 1.0 : 0.0, __d);
  218.       __real__(z) = 0.0 * (__a * __c + __b * __d);
  219.       __imag__(z) = 0.0 * (__b * __c - __a * __d);
  220.     }
  221.   }
  222.   return z;
  223. }
  224.  
  225. __DEVICE__ float _Complex __divsc3(float __a, float __b, float __c, float __d) {
  226.   int __ilogbw = 0;
  227.   float __logbw = _LOGBf(_fmaxf(_ABSf(__c), _ABSf(__d)));
  228.   if (_ISFINITEf(__logbw)) {
  229.     __ilogbw = (int)__logbw;
  230.     __c = _SCALBNf(__c, -__ilogbw);
  231.     __d = _SCALBNf(__d, -__ilogbw);
  232.   }
  233.   float __denom = __c * __c + __d * __d;
  234.   float _Complex z;
  235.   __real__(z) = _SCALBNf((__a * __c + __b * __d) / __denom, -__ilogbw);
  236.   __imag__(z) = _SCALBNf((__b * __c - __a * __d) / __denom, -__ilogbw);
  237.   if (_ISNANf(__real__(z)) && _ISNANf(__imag__(z))) {
  238.     if ((__denom == 0) && (!_ISNANf(__a) || !_ISNANf(__b))) {
  239.       __real__(z) = _COPYSIGNf(__builtin_huge_valf(), __c) * __a;
  240.       __imag__(z) = _COPYSIGNf(__builtin_huge_valf(), __c) * __b;
  241.     } else if ((_ISINFf(__a) || _ISINFf(__b)) && _ISFINITEf(__c) &&
  242.                _ISFINITEf(__d)) {
  243.       __a = _COPYSIGNf(_ISINFf(__a) ? 1 : 0, __a);
  244.       __b = _COPYSIGNf(_ISINFf(__b) ? 1 : 0, __b);
  245.       __real__(z) = __builtin_huge_valf() * (__a * __c + __b * __d);
  246.       __imag__(z) = __builtin_huge_valf() * (__b * __c - __a * __d);
  247.     } else if (_ISINFf(__logbw) && __logbw > 0 && _ISFINITEf(__a) &&
  248.                _ISFINITEf(__b)) {
  249.       __c = _COPYSIGNf(_ISINFf(__c) ? 1 : 0, __c);
  250.       __d = _COPYSIGNf(_ISINFf(__d) ? 1 : 0, __d);
  251.       __real__(z) = 0 * (__a * __c + __b * __d);
  252.       __imag__(z) = 0 * (__b * __c - __a * __d);
  253.     }
  254.   }
  255.   return z;
  256. }
  257.  
  258. #if defined(__cplusplus)
  259. } // extern "C"
  260. #endif
  261.  
  262. #undef _ISNANd
  263. #undef _ISNANf
  264. #undef _ISINFd
  265. #undef _ISINFf
  266. #undef _COPYSIGNd
  267. #undef _COPYSIGNf
  268. #undef _ISFINITEd
  269. #undef _ISFINITEf
  270. #undef _SCALBNd
  271. #undef _SCALBNf
  272. #undef _ABSd
  273. #undef _ABSf
  274. #undef _LOGBd
  275. #undef _LOGBf
  276. #undef _fmaxd
  277. #undef _fmaxf
  278.  
  279. #if defined(__OPENMP_NVPTX__) || defined(__OPENMP_AMDGCN__)
  280. #pragma omp end declare target
  281. #endif
  282.  
  283. #pragma pop_macro("__DEVICE__")
  284.  
  285. #endif // __CLANG_CUDA_COMPLEX_BUILTINS
  286.