Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
14 pmbaty 1
/*===---- __clang_cuda_cmath.h - Device-side CUDA cmath support ------------===
2
 *
3
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
 * See https://llvm.org/LICENSE.txt for license information.
5
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
 *
7
 *===-----------------------------------------------------------------------===
8
 */
9
#ifndef __CLANG_CUDA_CMATH_H__
10
#define __CLANG_CUDA_CMATH_H__
11
#ifndef __CUDA__
12
#error "This file is for CUDA compilation only."
13
#endif
14
 
15
#ifndef __OPENMP_NVPTX__
16
#include <limits>
17
#endif
18
 
19
// CUDA lets us use various std math functions on the device side.  This file
20
// works in concert with __clang_cuda_math_forward_declares.h to make this work.
21
//
22
// Specifically, the forward-declares header declares __device__ overloads for
23
// these functions in the global namespace, then pulls them into namespace std
24
// with 'using' statements.  Then this file implements those functions, after
25
// their implementations have been pulled in.
26
//
27
// It's important that we declare the functions in the global namespace and pull
28
// them into namespace std with using statements, as opposed to simply declaring
29
// these functions in namespace std, because our device functions need to
30
// overload the standard library functions, which may be declared in the global
31
// namespace or in std, depending on the degree of conformance of the stdlib
32
// implementation.  Declaring in the global namespace and pulling into namespace
33
// std covers all of the known knowns.
34
 
35
#ifdef __OPENMP_NVPTX__
36
#define __DEVICE__ static constexpr __attribute__((always_inline, nothrow))
37
#else
38
#define __DEVICE__ static __device__ __inline__ __attribute__((always_inline))
39
#endif
40
 
41
__DEVICE__ long long abs(long long __n) { return ::llabs(__n); }
42
__DEVICE__ long abs(long __n) { return ::labs(__n); }
43
__DEVICE__ float abs(float __x) { return ::fabsf(__x); }
44
__DEVICE__ double abs(double __x) { return ::fabs(__x); }
45
__DEVICE__ float acos(float __x) { return ::acosf(__x); }
46
__DEVICE__ float asin(float __x) { return ::asinf(__x); }
47
__DEVICE__ float atan(float __x) { return ::atanf(__x); }
48
__DEVICE__ float atan2(float __x, float __y) { return ::atan2f(__x, __y); }
49
__DEVICE__ float ceil(float __x) { return ::ceilf(__x); }
50
__DEVICE__ float cos(float __x) { return ::cosf(__x); }
51
__DEVICE__ float cosh(float __x) { return ::coshf(__x); }
52
__DEVICE__ float exp(float __x) { return ::expf(__x); }
53
__DEVICE__ float fabs(float __x) { return ::fabsf(__x); }
54
__DEVICE__ float floor(float __x) { return ::floorf(__x); }
55
__DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); }
56
__DEVICE__ int fpclassify(float __x) {
57
  return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
58
                              FP_ZERO, __x);
59
}
60
__DEVICE__ int fpclassify(double __x) {
61
  return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
62
                              FP_ZERO, __x);
63
}
64
__DEVICE__ float frexp(float __arg, int *__exp) {
65
  return ::frexpf(__arg, __exp);
66
}
67
 
68
// For inscrutable reasons, the CUDA headers define these functions for us on
69
// Windows.
70
#if !defined(_MSC_VER) || defined(__OPENMP_NVPTX__)
71
 
72
// For OpenMP we work around some old system headers that have non-conforming
73
// `isinf(float)` and `isnan(float)` implementations that return an `int`. We do
74
// this by providing two versions of these functions, differing only in the
75
// return type. To avoid conflicting definitions we disable implicit base
76
// function generation. That means we will end up with two specializations, one
77
// per type, but only one has a base function defined by the system header.
78
#if defined(__OPENMP_NVPTX__)
79
#pragma omp begin declare variant match(                                       \
80
    implementation = {extension(disable_implicit_base)})
81
 
82
// FIXME: We lack an extension to customize the mangling of the variants, e.g.,
83
//        add a suffix. This means we would clash with the names of the variants
84
//        (note that we do not create implicit base functions here). To avoid
85
//        this clash we add a new trait to some of them that is always true
86
//        (this is LLVM after all ;)). It will only influence the mangled name
87
//        of the variants inside the inner region and avoid the clash.
88
#pragma omp begin declare variant match(implementation = {vendor(llvm)})
89
 
90
__DEVICE__ int isinf(float __x) { return ::__isinff(__x); }
91
__DEVICE__ int isinf(double __x) { return ::__isinf(__x); }
92
__DEVICE__ int isfinite(float __x) { return ::__finitef(__x); }
93
__DEVICE__ int isfinite(double __x) { return ::__isfinited(__x); }
94
__DEVICE__ int isnan(float __x) { return ::__isnanf(__x); }
95
__DEVICE__ int isnan(double __x) { return ::__isnan(__x); }
96
 
97
#pragma omp end declare variant
98
 
99
#endif
100
 
101
__DEVICE__ bool isinf(float __x) { return ::__isinff(__x); }
102
__DEVICE__ bool isinf(double __x) { return ::__isinf(__x); }
103
__DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); }
104
// For inscrutable reasons, __finite(), the double-precision version of
105
// __finitef, does not exist when compiling for MacOS.  __isfinited is available
106
// everywhere and is just as good.
107
__DEVICE__ bool isfinite(double __x) { return ::__isfinited(__x); }
108
__DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); }
109
__DEVICE__ bool isnan(double __x) { return ::__isnan(__x); }
110
 
111
#if defined(__OPENMP_NVPTX__)
112
#pragma omp end declare variant
113
#endif
114
 
115
#endif
116
 
117
__DEVICE__ bool isgreater(float __x, float __y) {
118
  return __builtin_isgreater(__x, __y);
119
}
120
__DEVICE__ bool isgreater(double __x, double __y) {
121
  return __builtin_isgreater(__x, __y);
122
}
123
__DEVICE__ bool isgreaterequal(float __x, float __y) {
124
  return __builtin_isgreaterequal(__x, __y);
125
}
126
__DEVICE__ bool isgreaterequal(double __x, double __y) {
127
  return __builtin_isgreaterequal(__x, __y);
128
}
129
__DEVICE__ bool isless(float __x, float __y) {
130
  return __builtin_isless(__x, __y);
131
}
132
__DEVICE__ bool isless(double __x, double __y) {
133
  return __builtin_isless(__x, __y);
134
}
135
__DEVICE__ bool islessequal(float __x, float __y) {
136
  return __builtin_islessequal(__x, __y);
137
}
138
__DEVICE__ bool islessequal(double __x, double __y) {
139
  return __builtin_islessequal(__x, __y);
140
}
141
__DEVICE__ bool islessgreater(float __x, float __y) {
142
  return __builtin_islessgreater(__x, __y);
143
}
144
__DEVICE__ bool islessgreater(double __x, double __y) {
145
  return __builtin_islessgreater(__x, __y);
146
}
147
__DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); }
148
__DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); }
149
__DEVICE__ bool isunordered(float __x, float __y) {
150
  return __builtin_isunordered(__x, __y);
151
}
152
__DEVICE__ bool isunordered(double __x, double __y) {
153
  return __builtin_isunordered(__x, __y);
154
}
155
__DEVICE__ float ldexp(float __arg, int __exp) {
156
  return ::ldexpf(__arg, __exp);
157
}
158
__DEVICE__ float log(float __x) { return ::logf(__x); }
159
__DEVICE__ float log10(float __x) { return ::log10f(__x); }
160
__DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); }
161
__DEVICE__ float pow(float __base, float __exp) {
162
  return ::powf(__base, __exp);
163
}
164
__DEVICE__ float pow(float __base, int __iexp) {
165
  return ::powif(__base, __iexp);
166
}
167
__DEVICE__ double pow(double __base, int __iexp) {
168
  return ::powi(__base, __iexp);
169
}
170
__DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); }
171
__DEVICE__ bool signbit(double __x) { return ::__signbitd(__x); }
172
__DEVICE__ float sin(float __x) { return ::sinf(__x); }
173
__DEVICE__ float sinh(float __x) { return ::sinhf(__x); }
174
__DEVICE__ float sqrt(float __x) { return ::sqrtf(__x); }
175
__DEVICE__ float tan(float __x) { return ::tanf(__x); }
176
__DEVICE__ float tanh(float __x) { return ::tanhf(__x); }
177
 
178
// There was a redefinition error for this this overload in CUDA mode.
179
// We restrict it to OpenMP mode for now, that is where it is actually needed
180
// anyway.
181
#ifdef __OPENMP_NVPTX__
182
__DEVICE__ float remquo(float __n, float __d, int *__q) {
183
  return ::remquof(__n, __d, __q);
184
}
185
#endif
186
 
187
// Notably missing above is nexttoward.  We omit it because
188
// libdevice doesn't provide an implementation, and we don't want to be in the
189
// business of implementing tricky libm functions in this header.
190
 
191
#ifndef __OPENMP_NVPTX__
192
 
193
// Now we've defined everything we promised we'd define in
194
// __clang_cuda_math_forward_declares.h.  We need to do two additional things to
195
// fix up our math functions.
196
//
197
// 1) Define __device__ overloads for e.g. sin(int).  The CUDA headers define
198
//    only sin(float) and sin(double), which means that e.g. sin(0) is
199
//    ambiguous.
200
//
201
// 2) Pull the __device__ overloads of "foobarf" math functions into namespace
202
//    std.  These are defined in the CUDA headers in the global namespace,
203
//    independent of everything else we've done here.
204
 
205
// We can't use std::enable_if, because we want to be pre-C++11 compatible.  But
206
// we go ahead and unconditionally define functions that are only available when
207
// compiling for C++11 to match the behavior of the CUDA headers.
208
template<bool __B, class __T = void>
209
struct __clang_cuda_enable_if {};
210
 
211
template <class __T> struct __clang_cuda_enable_if<true, __T> {
212
  typedef __T type;
213
};
214
 
215
// Defines an overload of __fn that accepts one integral argument, calls
216
// __fn((double)x), and returns __retty.
217
#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(__retty, __fn)                      \
218
  template <typename __T>                                                      \
219
  __DEVICE__                                                                   \
220
      typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,    \
221
                                      __retty>::type                           \
222
      __fn(__T __x) {                                                          \
223
    return ::__fn((double)__x);                                                \
224
  }
225
 
226
// Defines an overload of __fn that accepts one two arithmetic arguments, calls
227
// __fn((double)x, (double)y), and returns a double.
228
//
229
// Note this is different from OVERLOAD_1, which generates an overload that
230
// accepts only *integral* arguments.
231
#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(__retty, __fn)                      \
232
  template <typename __T1, typename __T2>                                      \
233
  __DEVICE__ typename __clang_cuda_enable_if<                                  \
234
      std::numeric_limits<__T1>::is_specialized &&                             \
235
          std::numeric_limits<__T2>::is_specialized,                           \
236
      __retty>::type                                                           \
237
  __fn(__T1 __x, __T2 __y) {                                                   \
238
    return __fn((double)__x, (double)__y);                                     \
239
  }
240
 
241
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acos)
242
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acosh)
243
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asin)
244
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asinh)
245
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atan)
246
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, atan2);
247
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atanh)
248
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cbrt)
249
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, ceil)
250
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, copysign);
251
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cos)
252
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cosh)
253
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erf)
254
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erfc)
255
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp)
256
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp2)
257
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, expm1)
258
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, fabs)
259
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fdim);
260
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, floor)
261
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmax);
262
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmin);
263
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmod);
264
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, fpclassify)
265
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, hypot);
266
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, ilogb)
267
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isfinite)
268
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreater);
269
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreaterequal);
270
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isinf);
271
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isless);
272
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessequal);
273
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessgreater);
274
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnan);
275
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnormal)
276
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isunordered);
277
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, lgamma)
278
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log)
279
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log10)
280
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log1p)
281
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log2)
282
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, logb)
283
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llrint)
284
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llround)
285
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lrint)
286
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lround)
287
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, nearbyint);
288
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, nextafter);
289
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, pow);
290
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, remainder);
291
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, rint);
292
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, round);
293
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, signbit)
294
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sin)
295
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sinh)
296
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sqrt)
297
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tan)
298
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tanh)
299
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tgamma)
300
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, trunc);
301
 
302
#undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_1
303
#undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_2
304
 
305
// Overloads for functions that don't match the patterns expected by
306
// __CUDA_CLANG_FN_INTEGER_OVERLOAD_{1,2}.
307
template <typename __T1, typename __T2, typename __T3>
308
__DEVICE__ typename __clang_cuda_enable_if<
309
    std::numeric_limits<__T1>::is_specialized &&
310
        std::numeric_limits<__T2>::is_specialized &&
311
        std::numeric_limits<__T3>::is_specialized,
312
    double>::type
313
fma(__T1 __x, __T2 __y, __T3 __z) {
314
  return std::fma((double)__x, (double)__y, (double)__z);
315
}
316
 
317
template <typename __T>
318
__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
319
                                           double>::type
320
frexp(__T __x, int *__exp) {
321
  return std::frexp((double)__x, __exp);
322
}
323
 
324
template <typename __T>
325
__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
326
                                           double>::type
327
ldexp(__T __x, int __exp) {
328
  return std::ldexp((double)__x, __exp);
329
}
330
 
331
template <typename __T1, typename __T2>
332
__DEVICE__ typename __clang_cuda_enable_if<
333
    std::numeric_limits<__T1>::is_specialized &&
334
        std::numeric_limits<__T2>::is_specialized,
335
    double>::type
336
remquo(__T1 __x, __T2 __y, int *__quo) {
337
  return std::remquo((double)__x, (double)__y, __quo);
338
}
339
 
340
template <typename __T>
341
__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
342
                                           double>::type
343
scalbln(__T __x, long __exp) {
344
  return std::scalbln((double)__x, __exp);
345
}
346
 
347
template <typename __T>
348
__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
349
                                           double>::type
350
scalbn(__T __x, int __exp) {
351
  return std::scalbn((double)__x, __exp);
352
}
353
 
354
// We need to define these overloads in exactly the namespace our standard
355
// library uses (including the right inline namespace), otherwise they won't be
356
// picked up by other functions in the standard library (e.g. functions in
357
// <complex>).  Thus the ugliness below.
358
#ifdef _LIBCPP_BEGIN_NAMESPACE_STD
359
_LIBCPP_BEGIN_NAMESPACE_STD
360
#else
361
namespace std {
362
#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION
363
_GLIBCXX_BEGIN_NAMESPACE_VERSION
364
#endif
365
#endif
366
 
367
// Pull the new overloads we defined above into namespace std.
368
using ::acos;
369
using ::acosh;
370
using ::asin;
371
using ::asinh;
372
using ::atan;
373
using ::atan2;
374
using ::atanh;
375
using ::cbrt;
376
using ::ceil;
377
using ::copysign;
378
using ::cos;
379
using ::cosh;
380
using ::erf;
381
using ::erfc;
382
using ::exp;
383
using ::exp2;
384
using ::expm1;
385
using ::fabs;
386
using ::fdim;
387
using ::floor;
388
using ::fma;
389
using ::fmax;
390
using ::fmin;
391
using ::fmod;
392
using ::fpclassify;
393
using ::frexp;
394
using ::hypot;
395
using ::ilogb;
396
using ::isfinite;
397
using ::isgreater;
398
using ::isgreaterequal;
399
using ::isless;
400
using ::islessequal;
401
using ::islessgreater;
402
using ::isnormal;
403
using ::isunordered;
404
using ::ldexp;
405
using ::lgamma;
406
using ::llrint;
407
using ::llround;
408
using ::log;
409
using ::log10;
410
using ::log1p;
411
using ::log2;
412
using ::logb;
413
using ::lrint;
414
using ::lround;
415
using ::nearbyint;
416
using ::nextafter;
417
using ::pow;
418
using ::remainder;
419
using ::remquo;
420
using ::rint;
421
using ::round;
422
using ::scalbln;
423
using ::scalbn;
424
using ::signbit;
425
using ::sin;
426
using ::sinh;
427
using ::sqrt;
428
using ::tan;
429
using ::tanh;
430
using ::tgamma;
431
using ::trunc;
432
 
433
// Well this is fun: We need to pull these symbols in for libc++, but we can't
434
// pull them in with libstdc++, because its ::isinf and ::isnan are different
435
// than its std::isinf and std::isnan.
436
#ifndef __GLIBCXX__
437
using ::isinf;
438
using ::isnan;
439
#endif
440
 
441
// Finally, pull the "foobarf" functions that CUDA defines in its headers into
442
// namespace std.
443
using ::acosf;
444
using ::acoshf;
445
using ::asinf;
446
using ::asinhf;
447
using ::atan2f;
448
using ::atanf;
449
using ::atanhf;
450
using ::cbrtf;
451
using ::ceilf;
452
using ::copysignf;
453
using ::cosf;
454
using ::coshf;
455
using ::erfcf;
456
using ::erff;
457
using ::exp2f;
458
using ::expf;
459
using ::expm1f;
460
using ::fabsf;
461
using ::fdimf;
462
using ::floorf;
463
using ::fmaf;
464
using ::fmaxf;
465
using ::fminf;
466
using ::fmodf;
467
using ::frexpf;
468
using ::hypotf;
469
using ::ilogbf;
470
using ::ldexpf;
471
using ::lgammaf;
472
using ::llrintf;
473
using ::llroundf;
474
using ::log10f;
475
using ::log1pf;
476
using ::log2f;
477
using ::logbf;
478
using ::logf;
479
using ::lrintf;
480
using ::lroundf;
481
using ::modff;
482
using ::nearbyintf;
483
using ::nextafterf;
484
using ::powf;
485
using ::remainderf;
486
using ::remquof;
487
using ::rintf;
488
using ::roundf;
489
using ::scalblnf;
490
using ::scalbnf;
491
using ::sinf;
492
using ::sinhf;
493
using ::sqrtf;
494
using ::tanf;
495
using ::tanhf;
496
using ::tgammaf;
497
using ::truncf;
498
 
499
#ifdef _LIBCPP_END_NAMESPACE_STD
500
_LIBCPP_END_NAMESPACE_STD
501
#else
502
#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION
503
_GLIBCXX_END_NAMESPACE_VERSION
504
#endif
505
} // namespace std
506
#endif
507
 
508
#endif // __OPENMP_NVPTX__
509
 
510
#undef __DEVICE__
511
 
512
#endif