Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
14 | pmbaty | 1 | /*===---- __clang_hip_cmath.h - HIP cmath decls -----------------------------=== |
2 | * |
||
3 | * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
4 | * See https://llvm.org/LICENSE.txt for license information. |
||
5 | * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
6 | * |
||
7 | *===-----------------------------------------------------------------------=== |
||
8 | */ |
||
9 | |||
10 | #ifndef __CLANG_HIP_CMATH_H__ |
||
11 | #define __CLANG_HIP_CMATH_H__ |
||
12 | |||
13 | #if !defined(__HIP__) && !defined(__OPENMP_AMDGCN__) |
||
14 | #error "This file is for HIP and OpenMP AMDGCN device compilation only." |
||
15 | #endif |
||
16 | |||
17 | #if !defined(__HIPCC_RTC__) |
||
18 | #if defined(__cplusplus) |
||
19 | #include <limits> |
||
20 | #include <type_traits> |
||
21 | #include <utility> |
||
22 | #endif |
||
23 | #include <limits.h> |
||
24 | #include <stdint.h> |
||
25 | #endif // !defined(__HIPCC_RTC__) |
||
26 | |||
27 | #pragma push_macro("__DEVICE__") |
||
28 | #pragma push_macro("__CONSTEXPR__") |
||
29 | #ifdef __OPENMP_AMDGCN__ |
||
30 | #define __DEVICE__ static __attribute__((always_inline, nothrow)) |
||
31 | #define __CONSTEXPR__ constexpr |
||
32 | #else |
||
33 | #define __DEVICE__ static __device__ inline __attribute__((always_inline)) |
||
34 | #define __CONSTEXPR__ |
||
35 | #endif // __OPENMP_AMDGCN__ |
||
36 | |||
37 | // Start with functions that cannot be defined by DEF macros below. |
||
38 | #if defined(__cplusplus) |
||
39 | #if defined __OPENMP_AMDGCN__ |
||
40 | __DEVICE__ __CONSTEXPR__ float fabs(float __x) { return ::fabsf(__x); } |
||
41 | __DEVICE__ __CONSTEXPR__ float sin(float __x) { return ::sinf(__x); } |
||
42 | __DEVICE__ __CONSTEXPR__ float cos(float __x) { return ::cosf(__x); } |
||
43 | #endif |
||
44 | __DEVICE__ __CONSTEXPR__ double abs(double __x) { return ::fabs(__x); } |
||
45 | __DEVICE__ __CONSTEXPR__ float abs(float __x) { return ::fabsf(__x); } |
||
46 | __DEVICE__ __CONSTEXPR__ long long abs(long long __n) { return ::llabs(__n); } |
||
47 | __DEVICE__ __CONSTEXPR__ long abs(long __n) { return ::labs(__n); } |
||
48 | __DEVICE__ __CONSTEXPR__ float fma(float __x, float __y, float __z) { |
||
49 | return ::fmaf(__x, __y, __z); |
||
50 | } |
||
51 | #if !defined(__HIPCC_RTC__) |
||
52 | // The value returned by fpclassify is platform dependent, therefore it is not |
||
53 | // supported by hipRTC. |
||
54 | __DEVICE__ __CONSTEXPR__ int fpclassify(float __x) { |
||
55 | return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, |
||
56 | FP_ZERO, __x); |
||
57 | } |
||
58 | __DEVICE__ __CONSTEXPR__ int fpclassify(double __x) { |
||
59 | return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, |
||
60 | FP_ZERO, __x); |
||
61 | } |
||
62 | #endif // !defined(__HIPCC_RTC__) |
||
63 | |||
64 | __DEVICE__ __CONSTEXPR__ float frexp(float __arg, int *__exp) { |
||
65 | return ::frexpf(__arg, __exp); |
||
66 | } |
||
67 | |||
68 | #if defined(__OPENMP_AMDGCN__) |
||
69 | // For OpenMP we work around some old system headers that have non-conforming |
||
70 | // `isinf(float)` and `isnan(float)` implementations that return an `int`. We do |
||
71 | // this by providing two versions of these functions, differing only in the |
||
72 | // return type. To avoid conflicting definitions we disable implicit base |
||
73 | // function generation. That means we will end up with two specializations, one |
||
74 | // per type, but only one has a base function defined by the system header. |
||
75 | #pragma omp begin declare variant match( \ |
||
76 | implementation = {extension(disable_implicit_base)}) |
||
77 | |||
78 | // FIXME: We lack an extension to customize the mangling of the variants, e.g., |
||
79 | // add a suffix. This means we would clash with the names of the variants |
||
80 | // (note that we do not create implicit base functions here). To avoid |
||
81 | // this clash we add a new trait to some of them that is always true |
||
82 | // (this is LLVM after all ;)). It will only influence the mangled name |
||
83 | // of the variants inside the inner region and avoid the clash. |
||
84 | #pragma omp begin declare variant match(implementation = {vendor(llvm)}) |
||
85 | |||
86 | __DEVICE__ __CONSTEXPR__ int isinf(float __x) { return ::__isinff(__x); } |
||
87 | __DEVICE__ __CONSTEXPR__ int isinf(double __x) { return ::__isinf(__x); } |
||
88 | __DEVICE__ __CONSTEXPR__ int isfinite(float __x) { return ::__finitef(__x); } |
||
89 | __DEVICE__ __CONSTEXPR__ int isfinite(double __x) { return ::__finite(__x); } |
||
90 | __DEVICE__ __CONSTEXPR__ int isnan(float __x) { return ::__isnanf(__x); } |
||
91 | __DEVICE__ __CONSTEXPR__ int isnan(double __x) { return ::__isnan(__x); } |
||
92 | |||
93 | #pragma omp end declare variant |
||
94 | #endif // defined(__OPENMP_AMDGCN__) |
||
95 | |||
96 | __DEVICE__ __CONSTEXPR__ bool isinf(float __x) { return ::__isinff(__x); } |
||
97 | __DEVICE__ __CONSTEXPR__ bool isinf(double __x) { return ::__isinf(__x); } |
||
98 | __DEVICE__ __CONSTEXPR__ bool isfinite(float __x) { return ::__finitef(__x); } |
||
99 | __DEVICE__ __CONSTEXPR__ bool isfinite(double __x) { return ::__finite(__x); } |
||
100 | __DEVICE__ __CONSTEXPR__ bool isnan(float __x) { return ::__isnanf(__x); } |
||
101 | __DEVICE__ __CONSTEXPR__ bool isnan(double __x) { return ::__isnan(__x); } |
||
102 | |||
103 | #if defined(__OPENMP_AMDGCN__) |
||
104 | #pragma omp end declare variant |
||
105 | #endif // defined(__OPENMP_AMDGCN__) |
||
106 | |||
107 | __DEVICE__ __CONSTEXPR__ bool isgreater(float __x, float __y) { |
||
108 | return __builtin_isgreater(__x, __y); |
||
109 | } |
||
110 | __DEVICE__ __CONSTEXPR__ bool isgreater(double __x, double __y) { |
||
111 | return __builtin_isgreater(__x, __y); |
||
112 | } |
||
113 | __DEVICE__ __CONSTEXPR__ bool isgreaterequal(float __x, float __y) { |
||
114 | return __builtin_isgreaterequal(__x, __y); |
||
115 | } |
||
116 | __DEVICE__ __CONSTEXPR__ bool isgreaterequal(double __x, double __y) { |
||
117 | return __builtin_isgreaterequal(__x, __y); |
||
118 | } |
||
119 | __DEVICE__ __CONSTEXPR__ bool isless(float __x, float __y) { |
||
120 | return __builtin_isless(__x, __y); |
||
121 | } |
||
122 | __DEVICE__ __CONSTEXPR__ bool isless(double __x, double __y) { |
||
123 | return __builtin_isless(__x, __y); |
||
124 | } |
||
125 | __DEVICE__ __CONSTEXPR__ bool islessequal(float __x, float __y) { |
||
126 | return __builtin_islessequal(__x, __y); |
||
127 | } |
||
128 | __DEVICE__ __CONSTEXPR__ bool islessequal(double __x, double __y) { |
||
129 | return __builtin_islessequal(__x, __y); |
||
130 | } |
||
131 | __DEVICE__ __CONSTEXPR__ bool islessgreater(float __x, float __y) { |
||
132 | return __builtin_islessgreater(__x, __y); |
||
133 | } |
||
134 | __DEVICE__ __CONSTEXPR__ bool islessgreater(double __x, double __y) { |
||
135 | return __builtin_islessgreater(__x, __y); |
||
136 | } |
||
137 | __DEVICE__ __CONSTEXPR__ bool isnormal(float __x) { |
||
138 | return __builtin_isnormal(__x); |
||
139 | } |
||
140 | __DEVICE__ __CONSTEXPR__ bool isnormal(double __x) { |
||
141 | return __builtin_isnormal(__x); |
||
142 | } |
||
143 | __DEVICE__ __CONSTEXPR__ bool isunordered(float __x, float __y) { |
||
144 | return __builtin_isunordered(__x, __y); |
||
145 | } |
||
146 | __DEVICE__ __CONSTEXPR__ bool isunordered(double __x, double __y) { |
||
147 | return __builtin_isunordered(__x, __y); |
||
148 | } |
||
149 | __DEVICE__ __CONSTEXPR__ float modf(float __x, float *__iptr) { |
||
150 | return ::modff(__x, __iptr); |
||
151 | } |
||
152 | __DEVICE__ __CONSTEXPR__ float pow(float __base, int __iexp) { |
||
153 | return ::powif(__base, __iexp); |
||
154 | } |
||
155 | __DEVICE__ __CONSTEXPR__ double pow(double __base, int __iexp) { |
||
156 | return ::powi(__base, __iexp); |
||
157 | } |
||
158 | __DEVICE__ __CONSTEXPR__ float remquo(float __x, float __y, int *__quo) { |
||
159 | return ::remquof(__x, __y, __quo); |
||
160 | } |
||
161 | __DEVICE__ __CONSTEXPR__ float scalbln(float __x, long int __n) { |
||
162 | return ::scalblnf(__x, __n); |
||
163 | } |
||
164 | __DEVICE__ __CONSTEXPR__ bool signbit(float __x) { return ::__signbitf(__x); } |
||
165 | __DEVICE__ __CONSTEXPR__ bool signbit(double __x) { return ::__signbit(__x); } |
||
166 | |||
167 | // Notably missing above is nexttoward. We omit it because |
||
168 | // ocml doesn't provide an implementation, and we don't want to be in the |
||
169 | // business of implementing tricky libm functions in this header. |
||
170 | |||
171 | // Other functions. |
||
172 | __DEVICE__ __CONSTEXPR__ _Float16 fma(_Float16 __x, _Float16 __y, |
||
173 | _Float16 __z) { |
||
174 | return __ocml_fma_f16(__x, __y, __z); |
||
175 | } |
||
176 | __DEVICE__ __CONSTEXPR__ _Float16 pow(_Float16 __base, int __iexp) { |
||
177 | return __ocml_pown_f16(__base, __iexp); |
||
178 | } |
||
179 | |||
180 | #ifndef __OPENMP_AMDGCN__ |
||
181 | // BEGIN DEF_FUN and HIP_OVERLOAD |
||
182 | |||
183 | // BEGIN DEF_FUN |
||
184 | |||
185 | #pragma push_macro("__DEF_FUN1") |
||
186 | #pragma push_macro("__DEF_FUN2") |
||
187 | #pragma push_macro("__DEF_FUN2_FI") |
||
188 | |||
189 | // Define cmath functions with float argument and returns __retty. |
||
190 | #define __DEF_FUN1(__retty, __func) \ |
||
191 | __DEVICE__ __CONSTEXPR__ __retty __func(float __x) { return __func##f(__x); } |
||
192 | |||
193 | // Define cmath functions with two float arguments and returns __retty. |
||
194 | #define __DEF_FUN2(__retty, __func) \ |
||
195 | __DEVICE__ __CONSTEXPR__ __retty __func(float __x, float __y) { \ |
||
196 | return __func##f(__x, __y); \ |
||
197 | } |
||
198 | |||
199 | // Define cmath functions with a float and an int argument and returns __retty. |
||
200 | #define __DEF_FUN2_FI(__retty, __func) \ |
||
201 | __DEVICE__ __CONSTEXPR__ __retty __func(float __x, int __y) { \ |
||
202 | return __func##f(__x, __y); \ |
||
203 | } |
||
204 | |||
205 | __DEF_FUN1(float, acos) |
||
206 | __DEF_FUN1(float, acosh) |
||
207 | __DEF_FUN1(float, asin) |
||
208 | __DEF_FUN1(float, asinh) |
||
209 | __DEF_FUN1(float, atan) |
||
210 | __DEF_FUN2(float, atan2) |
||
211 | __DEF_FUN1(float, atanh) |
||
212 | __DEF_FUN1(float, cbrt) |
||
213 | __DEF_FUN1(float, ceil) |
||
214 | __DEF_FUN2(float, copysign) |
||
215 | __DEF_FUN1(float, cos) |
||
216 | __DEF_FUN1(float, cosh) |
||
217 | __DEF_FUN1(float, erf) |
||
218 | __DEF_FUN1(float, erfc) |
||
219 | __DEF_FUN1(float, exp) |
||
220 | __DEF_FUN1(float, exp2) |
||
221 | __DEF_FUN1(float, expm1) |
||
222 | __DEF_FUN1(float, fabs) |
||
223 | __DEF_FUN2(float, fdim) |
||
224 | __DEF_FUN1(float, floor) |
||
225 | __DEF_FUN2(float, fmax) |
||
226 | __DEF_FUN2(float, fmin) |
||
227 | __DEF_FUN2(float, fmod) |
||
228 | __DEF_FUN2(float, hypot) |
||
229 | __DEF_FUN1(int, ilogb) |
||
230 | __DEF_FUN2_FI(float, ldexp) |
||
231 | __DEF_FUN1(float, lgamma) |
||
232 | __DEF_FUN1(float, log) |
||
233 | __DEF_FUN1(float, log10) |
||
234 | __DEF_FUN1(float, log1p) |
||
235 | __DEF_FUN1(float, log2) |
||
236 | __DEF_FUN1(float, logb) |
||
237 | __DEF_FUN1(long long, llrint) |
||
238 | __DEF_FUN1(long long, llround) |
||
239 | __DEF_FUN1(long, lrint) |
||
240 | __DEF_FUN1(long, lround) |
||
241 | __DEF_FUN1(float, nearbyint) |
||
242 | __DEF_FUN2(float, nextafter) |
||
243 | __DEF_FUN2(float, pow) |
||
244 | __DEF_FUN2(float, remainder) |
||
245 | __DEF_FUN1(float, rint) |
||
246 | __DEF_FUN1(float, round) |
||
247 | __DEF_FUN2_FI(float, scalbn) |
||
248 | __DEF_FUN1(float, sin) |
||
249 | __DEF_FUN1(float, sinh) |
||
250 | __DEF_FUN1(float, sqrt) |
||
251 | __DEF_FUN1(float, tan) |
||
252 | __DEF_FUN1(float, tanh) |
||
253 | __DEF_FUN1(float, tgamma) |
||
254 | __DEF_FUN1(float, trunc) |
||
255 | |||
256 | #pragma pop_macro("__DEF_FUN1") |
||
257 | #pragma pop_macro("__DEF_FUN2") |
||
258 | #pragma pop_macro("__DEF_FUN2_FI") |
||
259 | |||
260 | // END DEF_FUN |
||
261 | |||
262 | // BEGIN HIP_OVERLOAD |
||
263 | |||
264 | #pragma push_macro("__HIP_OVERLOAD1") |
||
265 | #pragma push_macro("__HIP_OVERLOAD2") |
||
266 | |||
267 | // __hip_enable_if::type is a type function which returns __T if __B is true. |
||
268 | template <bool __B, class __T = void> struct __hip_enable_if {}; |
||
269 | |||
270 | template <class __T> struct __hip_enable_if<true, __T> { typedef __T type; }; |
||
271 | |||
272 | namespace __hip { |
||
273 | template <class _Tp> struct is_integral { |
||
274 | enum { value = 0 }; |
||
275 | }; |
||
276 | template <> struct is_integral<bool> { |
||
277 | enum { value = 1 }; |
||
278 | }; |
||
279 | template <> struct is_integral<char> { |
||
280 | enum { value = 1 }; |
||
281 | }; |
||
282 | template <> struct is_integral<signed char> { |
||
283 | enum { value = 1 }; |
||
284 | }; |
||
285 | template <> struct is_integral<unsigned char> { |
||
286 | enum { value = 1 }; |
||
287 | }; |
||
288 | template <> struct is_integral<wchar_t> { |
||
289 | enum { value = 1 }; |
||
290 | }; |
||
291 | template <> struct is_integral<short> { |
||
292 | enum { value = 1 }; |
||
293 | }; |
||
294 | template <> struct is_integral<unsigned short> { |
||
295 | enum { value = 1 }; |
||
296 | }; |
||
297 | template <> struct is_integral<int> { |
||
298 | enum { value = 1 }; |
||
299 | }; |
||
300 | template <> struct is_integral<unsigned int> { |
||
301 | enum { value = 1 }; |
||
302 | }; |
||
303 | template <> struct is_integral<long> { |
||
304 | enum { value = 1 }; |
||
305 | }; |
||
306 | template <> struct is_integral<unsigned long> { |
||
307 | enum { value = 1 }; |
||
308 | }; |
||
309 | template <> struct is_integral<long long> { |
||
310 | enum { value = 1 }; |
||
311 | }; |
||
312 | template <> struct is_integral<unsigned long long> { |
||
313 | enum { value = 1 }; |
||
314 | }; |
||
315 | |||
316 | // ToDo: specializes is_arithmetic<_Float16> |
||
317 | template <class _Tp> struct is_arithmetic { |
||
318 | enum { value = 0 }; |
||
319 | }; |
||
320 | template <> struct is_arithmetic<bool> { |
||
321 | enum { value = 1 }; |
||
322 | }; |
||
323 | template <> struct is_arithmetic<char> { |
||
324 | enum { value = 1 }; |
||
325 | }; |
||
326 | template <> struct is_arithmetic<signed char> { |
||
327 | enum { value = 1 }; |
||
328 | }; |
||
329 | template <> struct is_arithmetic<unsigned char> { |
||
330 | enum { value = 1 }; |
||
331 | }; |
||
332 | template <> struct is_arithmetic<wchar_t> { |
||
333 | enum { value = 1 }; |
||
334 | }; |
||
335 | template <> struct is_arithmetic<short> { |
||
336 | enum { value = 1 }; |
||
337 | }; |
||
338 | template <> struct is_arithmetic<unsigned short> { |
||
339 | enum { value = 1 }; |
||
340 | }; |
||
341 | template <> struct is_arithmetic<int> { |
||
342 | enum { value = 1 }; |
||
343 | }; |
||
344 | template <> struct is_arithmetic<unsigned int> { |
||
345 | enum { value = 1 }; |
||
346 | }; |
||
347 | template <> struct is_arithmetic<long> { |
||
348 | enum { value = 1 }; |
||
349 | }; |
||
350 | template <> struct is_arithmetic<unsigned long> { |
||
351 | enum { value = 1 }; |
||
352 | }; |
||
353 | template <> struct is_arithmetic<long long> { |
||
354 | enum { value = 1 }; |
||
355 | }; |
||
356 | template <> struct is_arithmetic<unsigned long long> { |
||
357 | enum { value = 1 }; |
||
358 | }; |
||
359 | template <> struct is_arithmetic<float> { |
||
360 | enum { value = 1 }; |
||
361 | }; |
||
362 | template <> struct is_arithmetic<double> { |
||
363 | enum { value = 1 }; |
||
364 | }; |
||
365 | |||
366 | struct true_type { |
||
367 | static const __constant__ bool value = true; |
||
368 | }; |
||
369 | struct false_type { |
||
370 | static const __constant__ bool value = false; |
||
371 | }; |
||
372 | |||
373 | template <typename __T, typename __U> struct is_same : public false_type {}; |
||
374 | template <typename __T> struct is_same<__T, __T> : public true_type {}; |
||
375 | |||
376 | template <typename __T> struct add_rvalue_reference { typedef __T &&type; }; |
||
377 | |||
378 | template <typename __T> typename add_rvalue_reference<__T>::type declval(); |
||
379 | |||
380 | // decltype is only available in C++11 and above. |
||
381 | #if __cplusplus >= 201103L |
||
382 | // __hip_promote |
||
383 | template <class _Tp> struct __numeric_type { |
||
384 | static void __test(...); |
||
385 | static _Float16 __test(_Float16); |
||
386 | static float __test(float); |
||
387 | static double __test(char); |
||
388 | static double __test(int); |
||
389 | static double __test(unsigned); |
||
390 | static double __test(long); |
||
391 | static double __test(unsigned long); |
||
392 | static double __test(long long); |
||
393 | static double __test(unsigned long long); |
||
394 | static double __test(double); |
||
395 | // No support for long double, use double instead. |
||
396 | static double __test(long double); |
||
397 | |||
398 | typedef decltype(__test(declval<_Tp>())) type; |
||
399 | static const bool value = !is_same<type, void>::value; |
||
400 | }; |
||
401 | |||
402 | template <> struct __numeric_type<void> { static const bool value = true; }; |
||
403 | |||
404 | template <class _A1, class _A2 = void, class _A3 = void, |
||
405 | bool = __numeric_type<_A1>::value &&__numeric_type<_A2>::value |
||
406 | &&__numeric_type<_A3>::value> |
||
407 | class __promote_imp { |
||
408 | public: |
||
409 | static const bool value = false; |
||
410 | }; |
||
411 | |||
412 | template <class _A1, class _A2, class _A3> |
||
413 | class __promote_imp<_A1, _A2, _A3, true> { |
||
414 | private: |
||
415 | typedef typename __promote_imp<_A1>::type __type1; |
||
416 | typedef typename __promote_imp<_A2>::type __type2; |
||
417 | typedef typename __promote_imp<_A3>::type __type3; |
||
418 | |||
419 | public: |
||
420 | typedef decltype(__type1() + __type2() + __type3()) type; |
||
421 | static const bool value = true; |
||
422 | }; |
||
423 | |||
424 | template <class _A1, class _A2> class __promote_imp<_A1, _A2, void, true> { |
||
425 | private: |
||
426 | typedef typename __promote_imp<_A1>::type __type1; |
||
427 | typedef typename __promote_imp<_A2>::type __type2; |
||
428 | |||
429 | public: |
||
430 | typedef decltype(__type1() + __type2()) type; |
||
431 | static const bool value = true; |
||
432 | }; |
||
433 | |||
434 | template <class _A1> class __promote_imp<_A1, void, void, true> { |
||
435 | public: |
||
436 | typedef typename __numeric_type<_A1>::type type; |
||
437 | static const bool value = true; |
||
438 | }; |
||
439 | |||
440 | template <class _A1, class _A2 = void, class _A3 = void> |
||
441 | class __promote : public __promote_imp<_A1, _A2, _A3> {}; |
||
442 | #endif //__cplusplus >= 201103L |
||
443 | } // namespace __hip |
||
444 | |||
445 | // __HIP_OVERLOAD1 is used to resolve function calls with integer argument to |
||
446 | // avoid compilation error due to ambibuity. e.g. floor(5) is resolved with |
||
447 | // floor(double). |
||
448 | #define __HIP_OVERLOAD1(__retty, __fn) \ |
||
449 | template <typename __T> \ |
||
450 | __DEVICE__ __CONSTEXPR__ \ |
||
451 | typename __hip_enable_if<__hip::is_integral<__T>::value, __retty>::type \ |
||
452 | __fn(__T __x) { \ |
||
453 | return ::__fn((double)__x); \ |
||
454 | } |
||
455 | |||
456 | // __HIP_OVERLOAD2 is used to resolve function calls with mixed float/double |
||
457 | // or integer argument to avoid compilation error due to ambibuity. e.g. |
||
458 | // max(5.0f, 6.0) is resolved with max(double, double). |
||
459 | #if __cplusplus >= 201103L |
||
460 | #define __HIP_OVERLOAD2(__retty, __fn) \ |
||
461 | template <typename __T1, typename __T2> \ |
||
462 | __DEVICE__ __CONSTEXPR__ typename __hip_enable_if< \ |
||
463 | __hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value, \ |
||
464 | typename __hip::__promote<__T1, __T2>::type>::type \ |
||
465 | __fn(__T1 __x, __T2 __y) { \ |
||
466 | typedef typename __hip::__promote<__T1, __T2>::type __result_type; \ |
||
467 | return __fn((__result_type)__x, (__result_type)__y); \ |
||
468 | } |
||
469 | #else |
||
470 | #define __HIP_OVERLOAD2(__retty, __fn) \ |
||
471 | template <typename __T1, typename __T2> \ |
||
472 | __DEVICE__ __CONSTEXPR__ \ |
||
473 | typename __hip_enable_if<__hip::is_arithmetic<__T1>::value && \ |
||
474 | __hip::is_arithmetic<__T2>::value, \ |
||
475 | __retty>::type \ |
||
476 | __fn(__T1 __x, __T2 __y) { \ |
||
477 | return __fn((double)__x, (double)__y); \ |
||
478 | } |
||
479 | #endif |
||
480 | |||
481 | __HIP_OVERLOAD1(double, acos) |
||
482 | __HIP_OVERLOAD1(double, acosh) |
||
483 | __HIP_OVERLOAD1(double, asin) |
||
484 | __HIP_OVERLOAD1(double, asinh) |
||
485 | __HIP_OVERLOAD1(double, atan) |
||
486 | __HIP_OVERLOAD2(double, atan2) |
||
487 | __HIP_OVERLOAD1(double, atanh) |
||
488 | __HIP_OVERLOAD1(double, cbrt) |
||
489 | __HIP_OVERLOAD1(double, ceil) |
||
490 | __HIP_OVERLOAD2(double, copysign) |
||
491 | __HIP_OVERLOAD1(double, cos) |
||
492 | __HIP_OVERLOAD1(double, cosh) |
||
493 | __HIP_OVERLOAD1(double, erf) |
||
494 | __HIP_OVERLOAD1(double, erfc) |
||
495 | __HIP_OVERLOAD1(double, exp) |
||
496 | __HIP_OVERLOAD1(double, exp2) |
||
497 | __HIP_OVERLOAD1(double, expm1) |
||
498 | __HIP_OVERLOAD1(double, fabs) |
||
499 | __HIP_OVERLOAD2(double, fdim) |
||
500 | __HIP_OVERLOAD1(double, floor) |
||
501 | __HIP_OVERLOAD2(double, fmax) |
||
502 | __HIP_OVERLOAD2(double, fmin) |
||
503 | __HIP_OVERLOAD2(double, fmod) |
||
504 | #if !defined(__HIPCC_RTC__) |
||
505 | __HIP_OVERLOAD1(int, fpclassify) |
||
506 | #endif // !defined(__HIPCC_RTC__) |
||
507 | __HIP_OVERLOAD2(double, hypot) |
||
508 | __HIP_OVERLOAD1(int, ilogb) |
||
509 | __HIP_OVERLOAD1(bool, isfinite) |
||
510 | __HIP_OVERLOAD2(bool, isgreater) |
||
511 | __HIP_OVERLOAD2(bool, isgreaterequal) |
||
512 | __HIP_OVERLOAD1(bool, isinf) |
||
513 | __HIP_OVERLOAD2(bool, isless) |
||
514 | __HIP_OVERLOAD2(bool, islessequal) |
||
515 | __HIP_OVERLOAD2(bool, islessgreater) |
||
516 | __HIP_OVERLOAD1(bool, isnan) |
||
517 | __HIP_OVERLOAD1(bool, isnormal) |
||
518 | __HIP_OVERLOAD2(bool, isunordered) |
||
519 | __HIP_OVERLOAD1(double, lgamma) |
||
520 | __HIP_OVERLOAD1(double, log) |
||
521 | __HIP_OVERLOAD1(double, log10) |
||
522 | __HIP_OVERLOAD1(double, log1p) |
||
523 | __HIP_OVERLOAD1(double, log2) |
||
524 | __HIP_OVERLOAD1(double, logb) |
||
525 | __HIP_OVERLOAD1(long long, llrint) |
||
526 | __HIP_OVERLOAD1(long long, llround) |
||
527 | __HIP_OVERLOAD1(long, lrint) |
||
528 | __HIP_OVERLOAD1(long, lround) |
||
529 | __HIP_OVERLOAD1(double, nearbyint) |
||
530 | __HIP_OVERLOAD2(double, nextafter) |
||
531 | __HIP_OVERLOAD2(double, pow) |
||
532 | __HIP_OVERLOAD2(double, remainder) |
||
533 | __HIP_OVERLOAD1(double, rint) |
||
534 | __HIP_OVERLOAD1(double, round) |
||
535 | __HIP_OVERLOAD1(bool, signbit) |
||
536 | __HIP_OVERLOAD1(double, sin) |
||
537 | __HIP_OVERLOAD1(double, sinh) |
||
538 | __HIP_OVERLOAD1(double, sqrt) |
||
539 | __HIP_OVERLOAD1(double, tan) |
||
540 | __HIP_OVERLOAD1(double, tanh) |
||
541 | __HIP_OVERLOAD1(double, tgamma) |
||
542 | __HIP_OVERLOAD1(double, trunc) |
||
543 | |||
544 | // Overload these but don't add them to std, they are not part of cmath. |
||
545 | __HIP_OVERLOAD2(double, max) |
||
546 | __HIP_OVERLOAD2(double, min) |
||
547 | |||
548 | // Additional Overloads that don't quite match HIP_OVERLOAD. |
||
549 | #if __cplusplus >= 201103L |
||
550 | template <typename __T1, typename __T2, typename __T3> |
||
551 | __DEVICE__ __CONSTEXPR__ typename __hip_enable_if< |
||
552 | __hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value && |
||
553 | __hip::is_arithmetic<__T3>::value, |
||
554 | typename __hip::__promote<__T1, __T2, __T3>::type>::type |
||
555 | fma(__T1 __x, __T2 __y, __T3 __z) { |
||
556 | typedef typename __hip::__promote<__T1, __T2, __T3>::type __result_type; |
||
557 | return ::fma((__result_type)__x, (__result_type)__y, (__result_type)__z); |
||
558 | } |
||
559 | #else |
||
560 | template <typename __T1, typename __T2, typename __T3> |
||
561 | __DEVICE__ __CONSTEXPR__ |
||
562 | typename __hip_enable_if<__hip::is_arithmetic<__T1>::value && |
||
563 | __hip::is_arithmetic<__T2>::value && |
||
564 | __hip::is_arithmetic<__T3>::value, |
||
565 | double>::type |
||
566 | fma(__T1 __x, __T2 __y, __T3 __z) { |
||
567 | return ::fma((double)__x, (double)__y, (double)__z); |
||
568 | } |
||
569 | #endif |
||
570 | |||
571 | template <typename __T> |
||
572 | __DEVICE__ __CONSTEXPR__ |
||
573 | typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type |
||
574 | frexp(__T __x, int *__exp) { |
||
575 | return ::frexp((double)__x, __exp); |
||
576 | } |
||
577 | |||
578 | template <typename __T> |
||
579 | __DEVICE__ __CONSTEXPR__ |
||
580 | typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type |
||
581 | ldexp(__T __x, int __exp) { |
||
582 | return ::ldexp((double)__x, __exp); |
||
583 | } |
||
584 | |||
585 | template <typename __T> |
||
586 | __DEVICE__ __CONSTEXPR__ |
||
587 | typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type |
||
588 | modf(__T __x, double *__exp) { |
||
589 | return ::modf((double)__x, __exp); |
||
590 | } |
||
591 | |||
592 | #if __cplusplus >= 201103L |
||
593 | template <typename __T1, typename __T2> |
||
594 | __DEVICE__ __CONSTEXPR__ |
||
595 | typename __hip_enable_if<__hip::is_arithmetic<__T1>::value && |
||
596 | __hip::is_arithmetic<__T2>::value, |
||
597 | typename __hip::__promote<__T1, __T2>::type>::type |
||
598 | remquo(__T1 __x, __T2 __y, int *__quo) { |
||
599 | typedef typename __hip::__promote<__T1, __T2>::type __result_type; |
||
600 | return ::remquo((__result_type)__x, (__result_type)__y, __quo); |
||
601 | } |
||
602 | #else |
||
603 | template <typename __T1, typename __T2> |
||
604 | __DEVICE__ __CONSTEXPR__ |
||
605 | typename __hip_enable_if<__hip::is_arithmetic<__T1>::value && |
||
606 | __hip::is_arithmetic<__T2>::value, |
||
607 | double>::type |
||
608 | remquo(__T1 __x, __T2 __y, int *__quo) { |
||
609 | return ::remquo((double)__x, (double)__y, __quo); |
||
610 | } |
||
611 | #endif |
||
612 | |||
613 | template <typename __T> |
||
614 | __DEVICE__ __CONSTEXPR__ |
||
615 | typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type |
||
616 | scalbln(__T __x, long int __exp) { |
||
617 | return ::scalbln((double)__x, __exp); |
||
618 | } |
||
619 | |||
620 | template <typename __T> |
||
621 | __DEVICE__ __CONSTEXPR__ |
||
622 | typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type |
||
623 | scalbn(__T __x, int __exp) { |
||
624 | return ::scalbn((double)__x, __exp); |
||
625 | } |
||
626 | |||
627 | #pragma pop_macro("__HIP_OVERLOAD1") |
||
628 | #pragma pop_macro("__HIP_OVERLOAD2") |
||
629 | |||
630 | // END HIP_OVERLOAD |
||
631 | |||
632 | // END DEF_FUN and HIP_OVERLOAD |
||
633 | |||
634 | #endif // ifndef __OPENMP_AMDGCN__ |
||
635 | #endif // defined(__cplusplus) |
||
636 | |||
637 | #ifndef __OPENMP_AMDGCN__ |
||
638 | // Define these overloads inside the namespace our standard library uses. |
||
639 | #if !defined(__HIPCC_RTC__) |
||
640 | #ifdef _LIBCPP_BEGIN_NAMESPACE_STD |
||
641 | _LIBCPP_BEGIN_NAMESPACE_STD |
||
642 | #else |
||
643 | namespace std { |
||
644 | #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION |
||
645 | _GLIBCXX_BEGIN_NAMESPACE_VERSION |
||
646 | #endif // _GLIBCXX_BEGIN_NAMESPACE_VERSION |
||
647 | #endif // _LIBCPP_BEGIN_NAMESPACE_STD |
||
648 | |||
649 | // Pull the new overloads we defined above into namespace std. |
||
650 | // using ::abs; - This may be considered for C++. |
||
651 | using ::acos; |
||
652 | using ::acosh; |
||
653 | using ::asin; |
||
654 | using ::asinh; |
||
655 | using ::atan; |
||
656 | using ::atan2; |
||
657 | using ::atanh; |
||
658 | using ::cbrt; |
||
659 | using ::ceil; |
||
660 | using ::copysign; |
||
661 | using ::cos; |
||
662 | using ::cosh; |
||
663 | using ::erf; |
||
664 | using ::erfc; |
||
665 | using ::exp; |
||
666 | using ::exp2; |
||
667 | using ::expm1; |
||
668 | using ::fabs; |
||
669 | using ::fdim; |
||
670 | using ::floor; |
||
671 | using ::fma; |
||
672 | using ::fmax; |
||
673 | using ::fmin; |
||
674 | using ::fmod; |
||
675 | using ::fpclassify; |
||
676 | using ::frexp; |
||
677 | using ::hypot; |
||
678 | using ::ilogb; |
||
679 | using ::isfinite; |
||
680 | using ::isgreater; |
||
681 | using ::isgreaterequal; |
||
682 | using ::isless; |
||
683 | using ::islessequal; |
||
684 | using ::islessgreater; |
||
685 | using ::isnormal; |
||
686 | using ::isunordered; |
||
687 | using ::ldexp; |
||
688 | using ::lgamma; |
||
689 | using ::llrint; |
||
690 | using ::llround; |
||
691 | using ::log; |
||
692 | using ::log10; |
||
693 | using ::log1p; |
||
694 | using ::log2; |
||
695 | using ::logb; |
||
696 | using ::lrint; |
||
697 | using ::lround; |
||
698 | using ::modf; |
||
699 | // using ::nan; - This may be considered for C++. |
||
700 | // using ::nanf; - This may be considered for C++. |
||
701 | // using ::nanl; - This is not yet defined. |
||
702 | using ::nearbyint; |
||
703 | using ::nextafter; |
||
704 | // using ::nexttoward; - Omit this since we do not have a definition. |
||
705 | using ::pow; |
||
706 | using ::remainder; |
||
707 | using ::remquo; |
||
708 | using ::rint; |
||
709 | using ::round; |
||
710 | using ::scalbln; |
||
711 | using ::scalbn; |
||
712 | using ::signbit; |
||
713 | using ::sin; |
||
714 | using ::sinh; |
||
715 | using ::sqrt; |
||
716 | using ::tan; |
||
717 | using ::tanh; |
||
718 | using ::tgamma; |
||
719 | using ::trunc; |
||
720 | |||
721 | // Well this is fun: We need to pull these symbols in for libc++, but we can't |
||
722 | // pull them in with libstdc++, because its ::isinf and ::isnan are different |
||
723 | // than its std::isinf and std::isnan. |
||
724 | #ifndef __GLIBCXX__ |
||
725 | using ::isinf; |
||
726 | using ::isnan; |
||
727 | #endif |
||
728 | |||
729 | // Finally, pull the "foobarf" functions that HIP defines into std. |
||
730 | using ::acosf; |
||
731 | using ::acoshf; |
||
732 | using ::asinf; |
||
733 | using ::asinhf; |
||
734 | using ::atan2f; |
||
735 | using ::atanf; |
||
736 | using ::atanhf; |
||
737 | using ::cbrtf; |
||
738 | using ::ceilf; |
||
739 | using ::copysignf; |
||
740 | using ::cosf; |
||
741 | using ::coshf; |
||
742 | using ::erfcf; |
||
743 | using ::erff; |
||
744 | using ::exp2f; |
||
745 | using ::expf; |
||
746 | using ::expm1f; |
||
747 | using ::fabsf; |
||
748 | using ::fdimf; |
||
749 | using ::floorf; |
||
750 | using ::fmaf; |
||
751 | using ::fmaxf; |
||
752 | using ::fminf; |
||
753 | using ::fmodf; |
||
754 | using ::frexpf; |
||
755 | using ::hypotf; |
||
756 | using ::ilogbf; |
||
757 | using ::ldexpf; |
||
758 | using ::lgammaf; |
||
759 | using ::llrintf; |
||
760 | using ::llroundf; |
||
761 | using ::log10f; |
||
762 | using ::log1pf; |
||
763 | using ::log2f; |
||
764 | using ::logbf; |
||
765 | using ::logf; |
||
766 | using ::lrintf; |
||
767 | using ::lroundf; |
||
768 | using ::modff; |
||
769 | using ::nearbyintf; |
||
770 | using ::nextafterf; |
||
771 | // using ::nexttowardf; - Omit this since we do not have a definition. |
||
772 | using ::powf; |
||
773 | using ::remainderf; |
||
774 | using ::remquof; |
||
775 | using ::rintf; |
||
776 | using ::roundf; |
||
777 | using ::scalblnf; |
||
778 | using ::scalbnf; |
||
779 | using ::sinf; |
||
780 | using ::sinhf; |
||
781 | using ::sqrtf; |
||
782 | using ::tanf; |
||
783 | using ::tanhf; |
||
784 | using ::tgammaf; |
||
785 | using ::truncf; |
||
786 | |||
787 | #ifdef _LIBCPP_END_NAMESPACE_STD |
||
788 | _LIBCPP_END_NAMESPACE_STD |
||
789 | #else |
||
790 | #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION |
||
791 | _GLIBCXX_END_NAMESPACE_VERSION |
||
792 | #endif // _GLIBCXX_BEGIN_NAMESPACE_VERSION |
||
793 | } // namespace std |
||
794 | #endif // _LIBCPP_END_NAMESPACE_STD |
||
795 | #endif // !defined(__HIPCC_RTC__) |
||
796 | |||
797 | // Define device-side math functions from <ymath.h> on MSVC. |
||
798 | #if !defined(__HIPCC_RTC__) |
||
799 | #if defined(_MSC_VER) |
||
800 | |||
801 | // Before VS2019, `<ymath.h>` is also included in `<limits>` and other headers. |
||
802 | // But, from VS2019, it's only included in `<complex>`. Need to include |
||
803 | // `<ymath.h>` here to ensure C functions declared there won't be markded as |
||
804 | // `__host__` and `__device__` through `<complex>` wrapper. |
||
805 | #include <ymath.h> |
||
806 | |||
807 | #if defined(__cplusplus) |
||
808 | extern "C" { |
||
809 | #endif // defined(__cplusplus) |
||
810 | __DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) double _Cosh(double x, |
||
811 | double y) { |
||
812 | return cosh(x) * y; |
||
813 | } |
||
814 | __DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) float _FCosh(float x, |
||
815 | float y) { |
||
816 | return coshf(x) * y; |
||
817 | } |
||
818 | __DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) short _Dtest(double *p) { |
||
819 | return fpclassify(*p); |
||
820 | } |
||
821 | __DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) short _FDtest(float *p) { |
||
822 | return fpclassify(*p); |
||
823 | } |
||
824 | __DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) double _Sinh(double x, |
||
825 | double y) { |
||
826 | return sinh(x) * y; |
||
827 | } |
||
828 | __DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) float _FSinh(float x, |
||
829 | float y) { |
||
830 | return sinhf(x) * y; |
||
831 | } |
||
832 | #if defined(__cplusplus) |
||
833 | } |
||
834 | #endif // defined(__cplusplus) |
||
835 | #endif // defined(_MSC_VER) |
||
836 | #endif // !defined(__HIPCC_RTC__) |
||
837 | #endif // ifndef __OPENMP_AMDGCN__ |
||
838 | |||
839 | #pragma pop_macro("__DEVICE__") |
||
840 | #pragma pop_macro("__CONSTEXPR__") |
||
841 | |||
842 | #endif // __CLANG_HIP_CMATH_H__ |