Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
14 pmbaty 1
/*===---- __clang_hip_math.h - Device-side HIP math support ----------------===
2
 *
3
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
 * See https://llvm.org/LICENSE.txt for license information.
5
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
 *
7
 *===-----------------------------------------------------------------------===
8
 */
9
#ifndef __CLANG_HIP_MATH_H__
10
#define __CLANG_HIP_MATH_H__
11
 
12
#if !defined(__HIP__) && !defined(__OPENMP_AMDGCN__)
13
#error "This file is for HIP and OpenMP AMDGCN device compilation only."
14
#endif
15
 
16
#if !defined(__HIPCC_RTC__)
17
#if defined(__cplusplus)
18
#include <algorithm>
19
#endif
20
#include <limits.h>
21
#include <stdint.h>
22
#ifdef __OPENMP_AMDGCN__
23
#include <omp.h>
24
#endif
25
#endif // !defined(__HIPCC_RTC__)
26
 
27
#pragma push_macro("__DEVICE__")
28
 
29
#ifdef __OPENMP_AMDGCN__
30
#define __DEVICE__ static inline __attribute__((always_inline, nothrow))
31
#else
32
#define __DEVICE__ static __device__ inline __attribute__((always_inline))
33
#endif
34
 
35
// A few functions return bool type starting only in C++11.
36
#pragma push_macro("__RETURN_TYPE")
37
#ifdef __OPENMP_AMDGCN__
38
#define __RETURN_TYPE int
39
#else
40
#if defined(__cplusplus)
41
#define __RETURN_TYPE bool
42
#else
43
#define __RETURN_TYPE int
44
#endif
45
#endif // __OPENMP_AMDGCN__
46
 
47
#if defined (__cplusplus) && __cplusplus < 201103L
48
// emulate static_assert on type sizes
49
template<bool>
50
struct __compare_result{};
51
template<>
52
struct __compare_result<true> {
53
  static const __device__ bool valid;
54
};
55
 
56
__DEVICE__
57
void __suppress_unused_warning(bool b){};
58
template <unsigned int S, unsigned int T>
59
__DEVICE__ void __static_assert_equal_size() {
60
  __suppress_unused_warning(__compare_result<S == T>::valid);
61
}
62
 
63
#define __static_assert_type_size_equal(A, B) \
64
  __static_assert_equal_size<A,B>()
65
 
66
#else
67
#define __static_assert_type_size_equal(A,B) \
68
  static_assert((A) == (B), "")
69
 
70
#endif
71
 
72
__DEVICE__
73
uint64_t __make_mantissa_base8(const char *__tagp __attribute__((nonnull))) {
74
  uint64_t __r = 0;
75
  while (*__tagp != '\0') {
76
    char __tmp = *__tagp;
77
 
78
    if (__tmp >= '0' && __tmp <= '7')
79
      __r = (__r * 8u) + __tmp - '0';
80
    else
81
      return 0;
82
 
83
    ++__tagp;
84
  }
85
 
86
  return __r;
87
}
88
 
89
__DEVICE__
90
uint64_t __make_mantissa_base10(const char *__tagp __attribute__((nonnull))) {
91
  uint64_t __r = 0;
92
  while (*__tagp != '\0') {
93
    char __tmp = *__tagp;
94
 
95
    if (__tmp >= '0' && __tmp <= '9')
96
      __r = (__r * 10u) + __tmp - '0';
97
    else
98
      return 0;
99
 
100
    ++__tagp;
101
  }
102
 
103
  return __r;
104
}
105
 
106
__DEVICE__
107
uint64_t __make_mantissa_base16(const char *__tagp __attribute__((nonnull))) {
108
  uint64_t __r = 0;
109
  while (*__tagp != '\0') {
110
    char __tmp = *__tagp;
111
 
112
    if (__tmp >= '0' && __tmp <= '9')
113
      __r = (__r * 16u) + __tmp - '0';
114
    else if (__tmp >= 'a' && __tmp <= 'f')
115
      __r = (__r * 16u) + __tmp - 'a' + 10;
116
    else if (__tmp >= 'A' && __tmp <= 'F')
117
      __r = (__r * 16u) + __tmp - 'A' + 10;
118
    else
119
      return 0;
120
 
121
    ++__tagp;
122
  }
123
 
124
  return __r;
125
}
126
 
127
__DEVICE__
128
uint64_t __make_mantissa(const char *__tagp __attribute__((nonnull))) {
129
  if (*__tagp == '0') {
130
    ++__tagp;
131
 
132
    if (*__tagp == 'x' || *__tagp == 'X')
133
      return __make_mantissa_base16(__tagp);
134
    else
135
      return __make_mantissa_base8(__tagp);
136
  }
137
 
138
  return __make_mantissa_base10(__tagp);
139
}
140
 
141
// BEGIN FLOAT
142
#if defined(__cplusplus)
143
__DEVICE__
144
int abs(int __x) {
145
  int __sgn = __x >> (sizeof(int) * CHAR_BIT - 1);
146
  return (__x ^ __sgn) - __sgn;
147
}
148
__DEVICE__
149
long labs(long __x) {
150
  long __sgn = __x >> (sizeof(long) * CHAR_BIT - 1);
151
  return (__x ^ __sgn) - __sgn;
152
}
153
__DEVICE__
154
long long llabs(long long __x) {
155
  long long __sgn = __x >> (sizeof(long long) * CHAR_BIT - 1);
156
  return (__x ^ __sgn) - __sgn;
157
}
158
#endif
159
 
160
__DEVICE__
161
float acosf(float __x) { return __ocml_acos_f32(__x); }
162
 
163
__DEVICE__
164
float acoshf(float __x) { return __ocml_acosh_f32(__x); }
165
 
166
__DEVICE__
167
float asinf(float __x) { return __ocml_asin_f32(__x); }
168
 
169
__DEVICE__
170
float asinhf(float __x) { return __ocml_asinh_f32(__x); }
171
 
172
__DEVICE__
173
float atan2f(float __x, float __y) { return __ocml_atan2_f32(__x, __y); }
174
 
175
__DEVICE__
176
float atanf(float __x) { return __ocml_atan_f32(__x); }
177
 
178
__DEVICE__
179
float atanhf(float __x) { return __ocml_atanh_f32(__x); }
180
 
181
__DEVICE__
182
float cbrtf(float __x) { return __ocml_cbrt_f32(__x); }
183
 
184
__DEVICE__
185
float ceilf(float __x) { return __ocml_ceil_f32(__x); }
186
 
187
__DEVICE__
188
float copysignf(float __x, float __y) { return __ocml_copysign_f32(__x, __y); }
189
 
190
__DEVICE__
191
float cosf(float __x) { return __ocml_cos_f32(__x); }
192
 
193
__DEVICE__
194
float coshf(float __x) { return __ocml_cosh_f32(__x); }
195
 
196
__DEVICE__
197
float cospif(float __x) { return __ocml_cospi_f32(__x); }
198
 
199
__DEVICE__
200
float cyl_bessel_i0f(float __x) { return __ocml_i0_f32(__x); }
201
 
202
__DEVICE__
203
float cyl_bessel_i1f(float __x) { return __ocml_i1_f32(__x); }
204
 
205
__DEVICE__
206
float erfcf(float __x) { return __ocml_erfc_f32(__x); }
207
 
208
__DEVICE__
209
float erfcinvf(float __x) { return __ocml_erfcinv_f32(__x); }
210
 
211
__DEVICE__
212
float erfcxf(float __x) { return __ocml_erfcx_f32(__x); }
213
 
214
__DEVICE__
215
float erff(float __x) { return __ocml_erf_f32(__x); }
216
 
217
__DEVICE__
218
float erfinvf(float __x) { return __ocml_erfinv_f32(__x); }
219
 
220
__DEVICE__
221
float exp10f(float __x) { return __ocml_exp10_f32(__x); }
222
 
223
__DEVICE__
224
float exp2f(float __x) { return __ocml_exp2_f32(__x); }
225
 
226
__DEVICE__
227
float expf(float __x) { return __ocml_exp_f32(__x); }
228
 
229
__DEVICE__
230
float expm1f(float __x) { return __ocml_expm1_f32(__x); }
231
 
232
__DEVICE__
233
float fabsf(float __x) { return __builtin_fabsf(__x); }
234
 
235
__DEVICE__
236
float fdimf(float __x, float __y) { return __ocml_fdim_f32(__x, __y); }
237
 
238
__DEVICE__
239
float fdividef(float __x, float __y) { return __x / __y; }
240
 
241
__DEVICE__
242
float floorf(float __x) { return __ocml_floor_f32(__x); }
243
 
244
__DEVICE__
245
float fmaf(float __x, float __y, float __z) {
246
  return __ocml_fma_f32(__x, __y, __z);
247
}
248
 
249
__DEVICE__
250
float fmaxf(float __x, float __y) { return __ocml_fmax_f32(__x, __y); }
251
 
252
__DEVICE__
253
float fminf(float __x, float __y) { return __ocml_fmin_f32(__x, __y); }
254
 
255
__DEVICE__
256
float fmodf(float __x, float __y) { return __ocml_fmod_f32(__x, __y); }
257
 
258
__DEVICE__
259
float frexpf(float __x, int *__nptr) {
260
  int __tmp;
261
#ifdef __OPENMP_AMDGCN__
262
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
263
#endif
264
  float __r =
265
      __ocml_frexp_f32(__x, (__attribute__((address_space(5))) int *)&__tmp);
266
  *__nptr = __tmp;
267
 
268
  return __r;
269
}
270
 
271
__DEVICE__
272
float hypotf(float __x, float __y) { return __ocml_hypot_f32(__x, __y); }
273
 
274
__DEVICE__
275
int ilogbf(float __x) { return __ocml_ilogb_f32(__x); }
276
 
277
__DEVICE__
278
__RETURN_TYPE __finitef(float __x) { return __ocml_isfinite_f32(__x); }
279
 
280
__DEVICE__
281
__RETURN_TYPE __isinff(float __x) { return __ocml_isinf_f32(__x); }
282
 
283
__DEVICE__
284
__RETURN_TYPE __isnanf(float __x) { return __ocml_isnan_f32(__x); }
285
 
286
__DEVICE__
287
float j0f(float __x) { return __ocml_j0_f32(__x); }
288
 
289
__DEVICE__
290
float j1f(float __x) { return __ocml_j1_f32(__x); }
291
 
292
__DEVICE__
293
float jnf(int __n, float __x) { // TODO: we could use Ahmes multiplication
294
                                // and the Miller & Brown algorithm
295
  //       for linear recurrences to get O(log n) steps, but it's unclear if
296
  //       it'd be beneficial in this case.
297
  if (__n == 0)
298
    return j0f(__x);
299
  if (__n == 1)
300
    return j1f(__x);
301
 
302
  float __x0 = j0f(__x);
303
  float __x1 = j1f(__x);
304
  for (int __i = 1; __i < __n; ++__i) {
305
    float __x2 = (2 * __i) / __x * __x1 - __x0;
306
    __x0 = __x1;
307
    __x1 = __x2;
308
  }
309
 
310
  return __x1;
311
}
312
 
313
__DEVICE__
314
float ldexpf(float __x, int __e) { return __ocml_ldexp_f32(__x, __e); }
315
 
316
__DEVICE__
317
float lgammaf(float __x) { return __ocml_lgamma_f32(__x); }
318
 
319
__DEVICE__
320
long long int llrintf(float __x) { return __ocml_rint_f32(__x); }
321
 
322
__DEVICE__
323
long long int llroundf(float __x) { return __ocml_round_f32(__x); }
324
 
325
__DEVICE__
326
float log10f(float __x) { return __ocml_log10_f32(__x); }
327
 
328
__DEVICE__
329
float log1pf(float __x) { return __ocml_log1p_f32(__x); }
330
 
331
__DEVICE__
332
float log2f(float __x) { return __ocml_log2_f32(__x); }
333
 
334
__DEVICE__
335
float logbf(float __x) { return __ocml_logb_f32(__x); }
336
 
337
__DEVICE__
338
float logf(float __x) { return __ocml_log_f32(__x); }
339
 
340
__DEVICE__
341
long int lrintf(float __x) { return __ocml_rint_f32(__x); }
342
 
343
__DEVICE__
344
long int lroundf(float __x) { return __ocml_round_f32(__x); }
345
 
346
__DEVICE__
347
float modff(float __x, float *__iptr) {
348
  float __tmp;
349
#ifdef __OPENMP_AMDGCN__
350
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
351
#endif
352
  float __r =
353
      __ocml_modf_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
354
  *__iptr = __tmp;
355
  return __r;
356
}
357
 
358
__DEVICE__
359
float nanf(const char *__tagp __attribute__((nonnull))) {
360
  union {
361
    float val;
362
    struct ieee_float {
363
      unsigned int mantissa : 22;
364
      unsigned int quiet : 1;
365
      unsigned int exponent : 8;
366
      unsigned int sign : 1;
367
    } bits;
368
  } __tmp;
369
  __static_assert_type_size_equal(sizeof(__tmp.val), sizeof(__tmp.bits));
370
 
371
  __tmp.bits.sign = 0u;
372
  __tmp.bits.exponent = ~0u;
373
  __tmp.bits.quiet = 1u;
374
  __tmp.bits.mantissa = __make_mantissa(__tagp);
375
 
376
  return __tmp.val;
377
}
378
 
379
__DEVICE__
380
float nearbyintf(float __x) { return __ocml_nearbyint_f32(__x); }
381
 
382
__DEVICE__
383
float nextafterf(float __x, float __y) {
384
  return __ocml_nextafter_f32(__x, __y);
385
}
386
 
387
__DEVICE__
388
float norm3df(float __x, float __y, float __z) {
389
  return __ocml_len3_f32(__x, __y, __z);
390
}
391
 
392
__DEVICE__
393
float norm4df(float __x, float __y, float __z, float __w) {
394
  return __ocml_len4_f32(__x, __y, __z, __w);
395
}
396
 
397
__DEVICE__
398
float normcdff(float __x) { return __ocml_ncdf_f32(__x); }
399
 
400
__DEVICE__
401
float normcdfinvf(float __x) { return __ocml_ncdfinv_f32(__x); }
402
 
403
__DEVICE__
404
float normf(int __dim,
405
            const float *__a) { // TODO: placeholder until OCML adds support.
406
  float __r = 0;
407
  while (__dim--) {
408
    __r += __a[0] * __a[0];
409
    ++__a;
410
  }
411
 
412
  return __ocml_sqrt_f32(__r);
413
}
414
 
415
__DEVICE__
416
float powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); }
417
 
418
__DEVICE__
419
float powif(float __x, int __y) { return __ocml_pown_f32(__x, __y); }
420
 
421
__DEVICE__
422
float rcbrtf(float __x) { return __ocml_rcbrt_f32(__x); }
423
 
424
__DEVICE__
425
float remainderf(float __x, float __y) {
426
  return __ocml_remainder_f32(__x, __y);
427
}
428
 
429
__DEVICE__
430
float remquof(float __x, float __y, int *__quo) {
431
  int __tmp;
432
#ifdef __OPENMP_AMDGCN__
433
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
434
#endif
435
  float __r = __ocml_remquo_f32(
436
      __x, __y, (__attribute__((address_space(5))) int *)&__tmp);
437
  *__quo = __tmp;
438
 
439
  return __r;
440
}
441
 
442
__DEVICE__
443
float rhypotf(float __x, float __y) { return __ocml_rhypot_f32(__x, __y); }
444
 
445
__DEVICE__
446
float rintf(float __x) { return __ocml_rint_f32(__x); }
447
 
448
__DEVICE__
449
float rnorm3df(float __x, float __y, float __z) {
450
  return __ocml_rlen3_f32(__x, __y, __z);
451
}
452
 
453
__DEVICE__
454
float rnorm4df(float __x, float __y, float __z, float __w) {
455
  return __ocml_rlen4_f32(__x, __y, __z, __w);
456
}
457
 
458
__DEVICE__
459
float rnormf(int __dim,
460
             const float *__a) { // TODO: placeholder until OCML adds support.
461
  float __r = 0;
462
  while (__dim--) {
463
    __r += __a[0] * __a[0];
464
    ++__a;
465
  }
466
 
467
  return __ocml_rsqrt_f32(__r);
468
}
469
 
470
__DEVICE__
471
float roundf(float __x) { return __ocml_round_f32(__x); }
472
 
473
__DEVICE__
474
float rsqrtf(float __x) { return __ocml_rsqrt_f32(__x); }
475
 
476
__DEVICE__
477
float scalblnf(float __x, long int __n) {
478
  return (__n < INT_MAX) ? __ocml_scalbn_f32(__x, __n)
479
                         : __ocml_scalb_f32(__x, __n);
480
}
481
 
482
__DEVICE__
483
float scalbnf(float __x, int __n) { return __ocml_scalbn_f32(__x, __n); }
484
 
485
__DEVICE__
486
__RETURN_TYPE __signbitf(float __x) { return __ocml_signbit_f32(__x); }
487
 
488
__DEVICE__
489
void sincosf(float __x, float *__sinptr, float *__cosptr) {
490
  float __tmp;
491
#ifdef __OPENMP_AMDGCN__
492
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
493
#endif
494
  *__sinptr =
495
      __ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
496
  *__cosptr = __tmp;
497
}
498
 
499
__DEVICE__
500
void sincospif(float __x, float *__sinptr, float *__cosptr) {
501
  float __tmp;
502
#ifdef __OPENMP_AMDGCN__
503
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
504
#endif
505
  *__sinptr = __ocml_sincospi_f32(
506
      __x, (__attribute__((address_space(5))) float *)&__tmp);
507
  *__cosptr = __tmp;
508
}
509
 
510
__DEVICE__
511
float sinf(float __x) { return __ocml_sin_f32(__x); }
512
 
513
__DEVICE__
514
float sinhf(float __x) { return __ocml_sinh_f32(__x); }
515
 
516
__DEVICE__
517
float sinpif(float __x) { return __ocml_sinpi_f32(__x); }
518
 
519
__DEVICE__
520
float sqrtf(float __x) { return __ocml_sqrt_f32(__x); }
521
 
522
__DEVICE__
523
float tanf(float __x) { return __ocml_tan_f32(__x); }
524
 
525
__DEVICE__
526
float tanhf(float __x) { return __ocml_tanh_f32(__x); }
527
 
528
__DEVICE__
529
float tgammaf(float __x) { return __ocml_tgamma_f32(__x); }
530
 
531
__DEVICE__
532
float truncf(float __x) { return __ocml_trunc_f32(__x); }
533
 
534
__DEVICE__
535
float y0f(float __x) { return __ocml_y0_f32(__x); }
536
 
537
__DEVICE__
538
float y1f(float __x) { return __ocml_y1_f32(__x); }
539
 
540
__DEVICE__
541
float ynf(int __n, float __x) { // TODO: we could use Ahmes multiplication
542
                                // and the Miller & Brown algorithm
543
  //       for linear recurrences to get O(log n) steps, but it's unclear if
544
  //       it'd be beneficial in this case. Placeholder until OCML adds
545
  //       support.
546
  if (__n == 0)
547
    return y0f(__x);
548
  if (__n == 1)
549
    return y1f(__x);
550
 
551
  float __x0 = y0f(__x);
552
  float __x1 = y1f(__x);
553
  for (int __i = 1; __i < __n; ++__i) {
554
    float __x2 = (2 * __i) / __x * __x1 - __x0;
555
    __x0 = __x1;
556
    __x1 = __x2;
557
  }
558
 
559
  return __x1;
560
}
561
 
562
// BEGIN INTRINSICS
563
 
564
__DEVICE__
565
float __cosf(float __x) { return __ocml_native_cos_f32(__x); }
566
 
567
__DEVICE__
568
float __exp10f(float __x) { return __ocml_native_exp10_f32(__x); }
569
 
570
__DEVICE__
571
float __expf(float __x) { return __ocml_native_exp_f32(__x); }
572
 
573
#if defined OCML_BASIC_ROUNDED_OPERATIONS
574
__DEVICE__
575
float __fadd_rd(float __x, float __y) { return __ocml_add_rtn_f32(__x, __y); }
576
__DEVICE__
577
float __fadd_rn(float __x, float __y) { return __ocml_add_rte_f32(__x, __y); }
578
__DEVICE__
579
float __fadd_ru(float __x, float __y) { return __ocml_add_rtp_f32(__x, __y); }
580
__DEVICE__
581
float __fadd_rz(float __x, float __y) { return __ocml_add_rtz_f32(__x, __y); }
582
#else
583
__DEVICE__
584
float __fadd_rn(float __x, float __y) { return __x + __y; }
585
#endif
586
 
587
#if defined OCML_BASIC_ROUNDED_OPERATIONS
588
__DEVICE__
589
float __fdiv_rd(float __x, float __y) { return __ocml_div_rtn_f32(__x, __y); }
590
__DEVICE__
591
float __fdiv_rn(float __x, float __y) { return __ocml_div_rte_f32(__x, __y); }
592
__DEVICE__
593
float __fdiv_ru(float __x, float __y) { return __ocml_div_rtp_f32(__x, __y); }
594
__DEVICE__
595
float __fdiv_rz(float __x, float __y) { return __ocml_div_rtz_f32(__x, __y); }
596
#else
597
__DEVICE__
598
float __fdiv_rn(float __x, float __y) { return __x / __y; }
599
#endif
600
 
601
__DEVICE__
602
float __fdividef(float __x, float __y) { return __x / __y; }
603
 
604
#if defined OCML_BASIC_ROUNDED_OPERATIONS
605
__DEVICE__
606
float __fmaf_rd(float __x, float __y, float __z) {
607
  return __ocml_fma_rtn_f32(__x, __y, __z);
608
}
609
__DEVICE__
610
float __fmaf_rn(float __x, float __y, float __z) {
611
  return __ocml_fma_rte_f32(__x, __y, __z);
612
}
613
__DEVICE__
614
float __fmaf_ru(float __x, float __y, float __z) {
615
  return __ocml_fma_rtp_f32(__x, __y, __z);
616
}
617
__DEVICE__
618
float __fmaf_rz(float __x, float __y, float __z) {
619
  return __ocml_fma_rtz_f32(__x, __y, __z);
620
}
621
#else
622
__DEVICE__
623
float __fmaf_rn(float __x, float __y, float __z) {
624
  return __ocml_fma_f32(__x, __y, __z);
625
}
626
#endif
627
 
628
#if defined OCML_BASIC_ROUNDED_OPERATIONS
629
__DEVICE__
630
float __fmul_rd(float __x, float __y) { return __ocml_mul_rtn_f32(__x, __y); }
631
__DEVICE__
632
float __fmul_rn(float __x, float __y) { return __ocml_mul_rte_f32(__x, __y); }
633
__DEVICE__
634
float __fmul_ru(float __x, float __y) { return __ocml_mul_rtp_f32(__x, __y); }
635
__DEVICE__
636
float __fmul_rz(float __x, float __y) { return __ocml_mul_rtz_f32(__x, __y); }
637
#else
638
__DEVICE__
639
float __fmul_rn(float __x, float __y) { return __x * __y; }
640
#endif
641
 
642
#if defined OCML_BASIC_ROUNDED_OPERATIONS
643
__DEVICE__
644
float __frcp_rd(float __x) { return __ocml_div_rtn_f32(1.0f, __x); }
645
__DEVICE__
646
float __frcp_rn(float __x) { return __ocml_div_rte_f32(1.0f, __x); }
647
__DEVICE__
648
float __frcp_ru(float __x) { return __ocml_div_rtp_f32(1.0f, __x); }
649
__DEVICE__
650
float __frcp_rz(float __x) { return __ocml_div_rtz_f32(1.0f, __x); }
651
#else
652
__DEVICE__
653
float __frcp_rn(float __x) { return 1.0f / __x; }
654
#endif
655
 
656
__DEVICE__
657
float __frsqrt_rn(float __x) { return __llvm_amdgcn_rsq_f32(__x); }
658
 
659
#if defined OCML_BASIC_ROUNDED_OPERATIONS
660
__DEVICE__
661
float __fsqrt_rd(float __x) { return __ocml_sqrt_rtn_f32(__x); }
662
__DEVICE__
663
float __fsqrt_rn(float __x) { return __ocml_sqrt_rte_f32(__x); }
664
__DEVICE__
665
float __fsqrt_ru(float __x) { return __ocml_sqrt_rtp_f32(__x); }
666
__DEVICE__
667
float __fsqrt_rz(float __x) { return __ocml_sqrt_rtz_f32(__x); }
668
#else
669
__DEVICE__
670
float __fsqrt_rn(float __x) { return __ocml_native_sqrt_f32(__x); }
671
#endif
672
 
673
#if defined OCML_BASIC_ROUNDED_OPERATIONS
674
__DEVICE__
675
float __fsub_rd(float __x, float __y) { return __ocml_sub_rtn_f32(__x, __y); }
676
__DEVICE__
677
float __fsub_rn(float __x, float __y) { return __ocml_sub_rte_f32(__x, __y); }
678
__DEVICE__
679
float __fsub_ru(float __x, float __y) { return __ocml_sub_rtp_f32(__x, __y); }
680
__DEVICE__
681
float __fsub_rz(float __x, float __y) { return __ocml_sub_rtz_f32(__x, __y); }
682
#else
683
__DEVICE__
684
float __fsub_rn(float __x, float __y) { return __x - __y; }
685
#endif
686
 
687
__DEVICE__
688
float __log10f(float __x) { return __ocml_native_log10_f32(__x); }
689
 
690
__DEVICE__
691
float __log2f(float __x) { return __ocml_native_log2_f32(__x); }
692
 
693
__DEVICE__
694
float __logf(float __x) { return __ocml_native_log_f32(__x); }
695
 
696
__DEVICE__
697
float __powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); }
698
 
699
__DEVICE__
700
float __saturatef(float __x) { return (__x < 0) ? 0 : ((__x > 1) ? 1 : __x); }
701
 
702
__DEVICE__
703
void __sincosf(float __x, float *__sinptr, float *__cosptr) {
704
  *__sinptr = __ocml_native_sin_f32(__x);
705
  *__cosptr = __ocml_native_cos_f32(__x);
706
}
707
 
708
__DEVICE__
709
float __sinf(float __x) { return __ocml_native_sin_f32(__x); }
710
 
711
__DEVICE__
712
float __tanf(float __x) { return __ocml_tan_f32(__x); }
713
// END INTRINSICS
714
// END FLOAT
715
 
716
// BEGIN DOUBLE
717
__DEVICE__
718
double acos(double __x) { return __ocml_acos_f64(__x); }
719
 
720
__DEVICE__
721
double acosh(double __x) { return __ocml_acosh_f64(__x); }
722
 
723
__DEVICE__
724
double asin(double __x) { return __ocml_asin_f64(__x); }
725
 
726
__DEVICE__
727
double asinh(double __x) { return __ocml_asinh_f64(__x); }
728
 
729
__DEVICE__
730
double atan(double __x) { return __ocml_atan_f64(__x); }
731
 
732
__DEVICE__
733
double atan2(double __x, double __y) { return __ocml_atan2_f64(__x, __y); }
734
 
735
__DEVICE__
736
double atanh(double __x) { return __ocml_atanh_f64(__x); }
737
 
738
__DEVICE__
739
double cbrt(double __x) { return __ocml_cbrt_f64(__x); }
740
 
741
__DEVICE__
742
double ceil(double __x) { return __ocml_ceil_f64(__x); }
743
 
744
__DEVICE__
745
double copysign(double __x, double __y) {
746
  return __ocml_copysign_f64(__x, __y);
747
}
748
 
749
__DEVICE__
750
double cos(double __x) { return __ocml_cos_f64(__x); }
751
 
752
__DEVICE__
753
double cosh(double __x) { return __ocml_cosh_f64(__x); }
754
 
755
__DEVICE__
756
double cospi(double __x) { return __ocml_cospi_f64(__x); }
757
 
758
__DEVICE__
759
double cyl_bessel_i0(double __x) { return __ocml_i0_f64(__x); }
760
 
761
__DEVICE__
762
double cyl_bessel_i1(double __x) { return __ocml_i1_f64(__x); }
763
 
764
__DEVICE__
765
double erf(double __x) { return __ocml_erf_f64(__x); }
766
 
767
__DEVICE__
768
double erfc(double __x) { return __ocml_erfc_f64(__x); }
769
 
770
__DEVICE__
771
double erfcinv(double __x) { return __ocml_erfcinv_f64(__x); }
772
 
773
__DEVICE__
774
double erfcx(double __x) { return __ocml_erfcx_f64(__x); }
775
 
776
__DEVICE__
777
double erfinv(double __x) { return __ocml_erfinv_f64(__x); }
778
 
779
__DEVICE__
780
double exp(double __x) { return __ocml_exp_f64(__x); }
781
 
782
__DEVICE__
783
double exp10(double __x) { return __ocml_exp10_f64(__x); }
784
 
785
__DEVICE__
786
double exp2(double __x) { return __ocml_exp2_f64(__x); }
787
 
788
__DEVICE__
789
double expm1(double __x) { return __ocml_expm1_f64(__x); }
790
 
791
__DEVICE__
792
double fabs(double __x) { return __builtin_fabs(__x); }
793
 
794
__DEVICE__
795
double fdim(double __x, double __y) { return __ocml_fdim_f64(__x, __y); }
796
 
797
__DEVICE__
798
double floor(double __x) { return __ocml_floor_f64(__x); }
799
 
800
__DEVICE__
801
double fma(double __x, double __y, double __z) {
802
  return __ocml_fma_f64(__x, __y, __z);
803
}
804
 
805
__DEVICE__
806
double fmax(double __x, double __y) { return __ocml_fmax_f64(__x, __y); }
807
 
808
__DEVICE__
809
double fmin(double __x, double __y) { return __ocml_fmin_f64(__x, __y); }
810
 
811
__DEVICE__
812
double fmod(double __x, double __y) { return __ocml_fmod_f64(__x, __y); }
813
 
814
__DEVICE__
815
double frexp(double __x, int *__nptr) {
816
  int __tmp;
817
#ifdef __OPENMP_AMDGCN__
818
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
819
#endif
820
  double __r =
821
      __ocml_frexp_f64(__x, (__attribute__((address_space(5))) int *)&__tmp);
822
  *__nptr = __tmp;
823
  return __r;
824
}
825
 
826
__DEVICE__
827
double hypot(double __x, double __y) { return __ocml_hypot_f64(__x, __y); }
828
 
829
__DEVICE__
830
int ilogb(double __x) { return __ocml_ilogb_f64(__x); }
831
 
832
__DEVICE__
833
__RETURN_TYPE __finite(double __x) { return __ocml_isfinite_f64(__x); }
834
 
835
__DEVICE__
836
__RETURN_TYPE __isinf(double __x) { return __ocml_isinf_f64(__x); }
837
 
838
__DEVICE__
839
__RETURN_TYPE __isnan(double __x) { return __ocml_isnan_f64(__x); }
840
 
841
__DEVICE__
842
double j0(double __x) { return __ocml_j0_f64(__x); }
843
 
844
__DEVICE__
845
double j1(double __x) { return __ocml_j1_f64(__x); }
846
 
847
__DEVICE__
848
double jn(int __n, double __x) { // TODO: we could use Ahmes multiplication
849
                                 // and the Miller & Brown algorithm
850
  //       for linear recurrences to get O(log n) steps, but it's unclear if
851
  //       it'd be beneficial in this case. Placeholder until OCML adds
852
  //       support.
853
  if (__n == 0)
854
    return j0(__x);
855
  if (__n == 1)
856
    return j1(__x);
857
 
858
  double __x0 = j0(__x);
859
  double __x1 = j1(__x);
860
  for (int __i = 1; __i < __n; ++__i) {
861
    double __x2 = (2 * __i) / __x * __x1 - __x0;
862
    __x0 = __x1;
863
    __x1 = __x2;
864
  }
865
  return __x1;
866
}
867
 
868
__DEVICE__
869
double ldexp(double __x, int __e) { return __ocml_ldexp_f64(__x, __e); }
870
 
871
__DEVICE__
872
double lgamma(double __x) { return __ocml_lgamma_f64(__x); }
873
 
874
__DEVICE__
875
long long int llrint(double __x) { return __ocml_rint_f64(__x); }
876
 
877
__DEVICE__
878
long long int llround(double __x) { return __ocml_round_f64(__x); }
879
 
880
__DEVICE__
881
double log(double __x) { return __ocml_log_f64(__x); }
882
 
883
__DEVICE__
884
double log10(double __x) { return __ocml_log10_f64(__x); }
885
 
886
__DEVICE__
887
double log1p(double __x) { return __ocml_log1p_f64(__x); }
888
 
889
__DEVICE__
890
double log2(double __x) { return __ocml_log2_f64(__x); }
891
 
892
__DEVICE__
893
double logb(double __x) { return __ocml_logb_f64(__x); }
894
 
895
__DEVICE__
896
long int lrint(double __x) { return __ocml_rint_f64(__x); }
897
 
898
__DEVICE__
899
long int lround(double __x) { return __ocml_round_f64(__x); }
900
 
901
__DEVICE__
902
double modf(double __x, double *__iptr) {
903
  double __tmp;
904
#ifdef __OPENMP_AMDGCN__
905
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
906
#endif
907
  double __r =
908
      __ocml_modf_f64(__x, (__attribute__((address_space(5))) double *)&__tmp);
909
  *__iptr = __tmp;
910
 
911
  return __r;
912
}
913
 
914
__DEVICE__
915
double nan(const char *__tagp) {
916
#if !_WIN32
917
  union {
918
    double val;
919
    struct ieee_double {
920
      uint64_t mantissa : 51;
921
      uint32_t quiet : 1;
922
      uint32_t exponent : 11;
923
      uint32_t sign : 1;
924
    } bits;
925
  } __tmp;
926
  __static_assert_type_size_equal(sizeof(__tmp.val), sizeof(__tmp.bits));
927
 
928
  __tmp.bits.sign = 0u;
929
  __tmp.bits.exponent = ~0u;
930
  __tmp.bits.quiet = 1u;
931
  __tmp.bits.mantissa = __make_mantissa(__tagp);
932
 
933
  return __tmp.val;
934
#else
935
  __static_assert_type_size_equal(sizeof(uint64_t), sizeof(double));
936
  uint64_t __val = __make_mantissa(__tagp);
937
  __val |= 0xFFF << 51;
938
  return *reinterpret_cast<double *>(&__val);
939
#endif
940
}
941
 
942
__DEVICE__
943
double nearbyint(double __x) { return __ocml_nearbyint_f64(__x); }
944
 
945
__DEVICE__
946
double nextafter(double __x, double __y) {
947
  return __ocml_nextafter_f64(__x, __y);
948
}
949
 
950
__DEVICE__
951
double norm(int __dim,
952
            const double *__a) { // TODO: placeholder until OCML adds support.
953
  double __r = 0;
954
  while (__dim--) {
955
    __r += __a[0] * __a[0];
956
    ++__a;
957
  }
958
 
959
  return __ocml_sqrt_f64(__r);
960
}
961
 
962
__DEVICE__
963
double norm3d(double __x, double __y, double __z) {
964
  return __ocml_len3_f64(__x, __y, __z);
965
}
966
 
967
__DEVICE__
968
double norm4d(double __x, double __y, double __z, double __w) {
969
  return __ocml_len4_f64(__x, __y, __z, __w);
970
}
971
 
972
__DEVICE__
973
double normcdf(double __x) { return __ocml_ncdf_f64(__x); }
974
 
975
__DEVICE__
976
double normcdfinv(double __x) { return __ocml_ncdfinv_f64(__x); }
977
 
978
__DEVICE__
979
double pow(double __x, double __y) { return __ocml_pow_f64(__x, __y); }
980
 
981
__DEVICE__
982
double powi(double __x, int __y) { return __ocml_pown_f64(__x, __y); }
983
 
984
__DEVICE__
985
double rcbrt(double __x) { return __ocml_rcbrt_f64(__x); }
986
 
987
__DEVICE__
988
double remainder(double __x, double __y) {
989
  return __ocml_remainder_f64(__x, __y);
990
}
991
 
992
__DEVICE__
993
double remquo(double __x, double __y, int *__quo) {
994
  int __tmp;
995
#ifdef __OPENMP_AMDGCN__
996
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
997
#endif
998
  double __r = __ocml_remquo_f64(
999
      __x, __y, (__attribute__((address_space(5))) int *)&__tmp);
1000
  *__quo = __tmp;
1001
 
1002
  return __r;
1003
}
1004
 
1005
__DEVICE__
1006
double rhypot(double __x, double __y) { return __ocml_rhypot_f64(__x, __y); }
1007
 
1008
__DEVICE__
1009
double rint(double __x) { return __ocml_rint_f64(__x); }
1010
 
1011
__DEVICE__
1012
double rnorm(int __dim,
1013
             const double *__a) { // TODO: placeholder until OCML adds support.
1014
  double __r = 0;
1015
  while (__dim--) {
1016
    __r += __a[0] * __a[0];
1017
    ++__a;
1018
  }
1019
 
1020
  return __ocml_rsqrt_f64(__r);
1021
}
1022
 
1023
__DEVICE__
1024
double rnorm3d(double __x, double __y, double __z) {
1025
  return __ocml_rlen3_f64(__x, __y, __z);
1026
}
1027
 
1028
__DEVICE__
1029
double rnorm4d(double __x, double __y, double __z, double __w) {
1030
  return __ocml_rlen4_f64(__x, __y, __z, __w);
1031
}
1032
 
1033
__DEVICE__
1034
double round(double __x) { return __ocml_round_f64(__x); }
1035
 
1036
__DEVICE__
1037
double rsqrt(double __x) { return __ocml_rsqrt_f64(__x); }
1038
 
1039
__DEVICE__
1040
double scalbln(double __x, long int __n) {
1041
  return (__n < INT_MAX) ? __ocml_scalbn_f64(__x, __n)
1042
                         : __ocml_scalb_f64(__x, __n);
1043
}
1044
__DEVICE__
1045
double scalbn(double __x, int __n) { return __ocml_scalbn_f64(__x, __n); }
1046
 
1047
__DEVICE__
1048
__RETURN_TYPE __signbit(double __x) { return __ocml_signbit_f64(__x); }
1049
 
1050
__DEVICE__
1051
double sin(double __x) { return __ocml_sin_f64(__x); }
1052
 
1053
__DEVICE__
1054
void sincos(double __x, double *__sinptr, double *__cosptr) {
1055
  double __tmp;
1056
#ifdef __OPENMP_AMDGCN__
1057
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
1058
#endif
1059
  *__sinptr = __ocml_sincos_f64(
1060
      __x, (__attribute__((address_space(5))) double *)&__tmp);
1061
  *__cosptr = __tmp;
1062
}
1063
 
1064
__DEVICE__
1065
void sincospi(double __x, double *__sinptr, double *__cosptr) {
1066
  double __tmp;
1067
#ifdef __OPENMP_AMDGCN__
1068
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
1069
#endif
1070
  *__sinptr = __ocml_sincospi_f64(
1071
      __x, (__attribute__((address_space(5))) double *)&__tmp);
1072
  *__cosptr = __tmp;
1073
}
1074
 
1075
__DEVICE__
1076
double sinh(double __x) { return __ocml_sinh_f64(__x); }
1077
 
1078
__DEVICE__
1079
double sinpi(double __x) { return __ocml_sinpi_f64(__x); }
1080
 
1081
__DEVICE__
1082
double sqrt(double __x) { return __ocml_sqrt_f64(__x); }
1083
 
1084
__DEVICE__
1085
double tan(double __x) { return __ocml_tan_f64(__x); }
1086
 
1087
__DEVICE__
1088
double tanh(double __x) { return __ocml_tanh_f64(__x); }
1089
 
1090
__DEVICE__
1091
double tgamma(double __x) { return __ocml_tgamma_f64(__x); }
1092
 
1093
__DEVICE__
1094
double trunc(double __x) { return __ocml_trunc_f64(__x); }
1095
 
1096
__DEVICE__
1097
double y0(double __x) { return __ocml_y0_f64(__x); }
1098
 
1099
__DEVICE__
1100
double y1(double __x) { return __ocml_y1_f64(__x); }
1101
 
1102
__DEVICE__
1103
double yn(int __n, double __x) { // TODO: we could use Ahmes multiplication
1104
                                 // and the Miller & Brown algorithm
1105
  //       for linear recurrences to get O(log n) steps, but it's unclear if
1106
  //       it'd be beneficial in this case. Placeholder until OCML adds
1107
  //       support.
1108
  if (__n == 0)
1109
    return y0(__x);
1110
  if (__n == 1)
1111
    return y1(__x);
1112
 
1113
  double __x0 = y0(__x);
1114
  double __x1 = y1(__x);
1115
  for (int __i = 1; __i < __n; ++__i) {
1116
    double __x2 = (2 * __i) / __x * __x1 - __x0;
1117
    __x0 = __x1;
1118
    __x1 = __x2;
1119
  }
1120
 
1121
  return __x1;
1122
}
1123
 
1124
// BEGIN INTRINSICS
1125
#if defined OCML_BASIC_ROUNDED_OPERATIONS
1126
__DEVICE__
1127
double __dadd_rd(double __x, double __y) {
1128
  return __ocml_add_rtn_f64(__x, __y);
1129
}
1130
__DEVICE__
1131
double __dadd_rn(double __x, double __y) {
1132
  return __ocml_add_rte_f64(__x, __y);
1133
}
1134
__DEVICE__
1135
double __dadd_ru(double __x, double __y) {
1136
  return __ocml_add_rtp_f64(__x, __y);
1137
}
1138
__DEVICE__
1139
double __dadd_rz(double __x, double __y) {
1140
  return __ocml_add_rtz_f64(__x, __y);
1141
}
1142
#else
1143
__DEVICE__
1144
double __dadd_rn(double __x, double __y) { return __x + __y; }
1145
#endif
1146
 
1147
#if defined OCML_BASIC_ROUNDED_OPERATIONS
1148
__DEVICE__
1149
double __ddiv_rd(double __x, double __y) {
1150
  return __ocml_div_rtn_f64(__x, __y);
1151
}
1152
__DEVICE__
1153
double __ddiv_rn(double __x, double __y) {
1154
  return __ocml_div_rte_f64(__x, __y);
1155
}
1156
__DEVICE__
1157
double __ddiv_ru(double __x, double __y) {
1158
  return __ocml_div_rtp_f64(__x, __y);
1159
}
1160
__DEVICE__
1161
double __ddiv_rz(double __x, double __y) {
1162
  return __ocml_div_rtz_f64(__x, __y);
1163
}
1164
#else
1165
__DEVICE__
1166
double __ddiv_rn(double __x, double __y) { return __x / __y; }
1167
#endif
1168
 
1169
#if defined OCML_BASIC_ROUNDED_OPERATIONS
1170
__DEVICE__
1171
double __dmul_rd(double __x, double __y) {
1172
  return __ocml_mul_rtn_f64(__x, __y);
1173
}
1174
__DEVICE__
1175
double __dmul_rn(double __x, double __y) {
1176
  return __ocml_mul_rte_f64(__x, __y);
1177
}
1178
__DEVICE__
1179
double __dmul_ru(double __x, double __y) {
1180
  return __ocml_mul_rtp_f64(__x, __y);
1181
}
1182
__DEVICE__
1183
double __dmul_rz(double __x, double __y) {
1184
  return __ocml_mul_rtz_f64(__x, __y);
1185
}
1186
#else
1187
__DEVICE__
1188
double __dmul_rn(double __x, double __y) { return __x * __y; }
1189
#endif
1190
 
1191
#if defined OCML_BASIC_ROUNDED_OPERATIONS
1192
__DEVICE__
1193
double __drcp_rd(double __x) { return __ocml_div_rtn_f64(1.0, __x); }
1194
__DEVICE__
1195
double __drcp_rn(double __x) { return __ocml_div_rte_f64(1.0, __x); }
1196
__DEVICE__
1197
double __drcp_ru(double __x) { return __ocml_div_rtp_f64(1.0, __x); }
1198
__DEVICE__
1199
double __drcp_rz(double __x) { return __ocml_div_rtz_f64(1.0, __x); }
1200
#else
1201
__DEVICE__
1202
double __drcp_rn(double __x) { return 1.0 / __x; }
1203
#endif
1204
 
1205
#if defined OCML_BASIC_ROUNDED_OPERATIONS
1206
__DEVICE__
1207
double __dsqrt_rd(double __x) { return __ocml_sqrt_rtn_f64(__x); }
1208
__DEVICE__
1209
double __dsqrt_rn(double __x) { return __ocml_sqrt_rte_f64(__x); }
1210
__DEVICE__
1211
double __dsqrt_ru(double __x) { return __ocml_sqrt_rtp_f64(__x); }
1212
__DEVICE__
1213
double __dsqrt_rz(double __x) { return __ocml_sqrt_rtz_f64(__x); }
1214
#else
1215
__DEVICE__
1216
double __dsqrt_rn(double __x) { return __ocml_sqrt_f64(__x); }
1217
#endif
1218
 
1219
#if defined OCML_BASIC_ROUNDED_OPERATIONS
1220
__DEVICE__
1221
double __dsub_rd(double __x, double __y) {
1222
  return __ocml_sub_rtn_f64(__x, __y);
1223
}
1224
__DEVICE__
1225
double __dsub_rn(double __x, double __y) {
1226
  return __ocml_sub_rte_f64(__x, __y);
1227
}
1228
__DEVICE__
1229
double __dsub_ru(double __x, double __y) {
1230
  return __ocml_sub_rtp_f64(__x, __y);
1231
}
1232
__DEVICE__
1233
double __dsub_rz(double __x, double __y) {
1234
  return __ocml_sub_rtz_f64(__x, __y);
1235
}
1236
#else
1237
__DEVICE__
1238
double __dsub_rn(double __x, double __y) { return __x - __y; }
1239
#endif
1240
 
1241
#if defined OCML_BASIC_ROUNDED_OPERATIONS
1242
__DEVICE__
1243
double __fma_rd(double __x, double __y, double __z) {
1244
  return __ocml_fma_rtn_f64(__x, __y, __z);
1245
}
1246
__DEVICE__
1247
double __fma_rn(double __x, double __y, double __z) {
1248
  return __ocml_fma_rte_f64(__x, __y, __z);
1249
}
1250
__DEVICE__
1251
double __fma_ru(double __x, double __y, double __z) {
1252
  return __ocml_fma_rtp_f64(__x, __y, __z);
1253
}
1254
__DEVICE__
1255
double __fma_rz(double __x, double __y, double __z) {
1256
  return __ocml_fma_rtz_f64(__x, __y, __z);
1257
}
1258
#else
1259
__DEVICE__
1260
double __fma_rn(double __x, double __y, double __z) {
1261
  return __ocml_fma_f64(__x, __y, __z);
1262
}
1263
#endif
1264
// END INTRINSICS
1265
// END DOUBLE
1266
 
1267
// C only macros
1268
#if !defined(__cplusplus) && __STDC_VERSION__ >= 201112L
1269
#define isfinite(__x) _Generic((__x), float : __finitef, double : __finite)(__x)
1270
#define isinf(__x) _Generic((__x), float : __isinff, double : __isinf)(__x)
1271
#define isnan(__x) _Generic((__x), float : __isnanf, double : __isnan)(__x)
1272
#define signbit(__x)                                                           \
1273
  _Generic((__x), float : __signbitf, double : __signbit)(__x)
1274
#endif // !defined(__cplusplus) && __STDC_VERSION__ >= 201112L
1275
 
1276
#if defined(__cplusplus)
1277
template <class T> __DEVICE__ T min(T __arg1, T __arg2) {
1278
  return (__arg1 < __arg2) ? __arg1 : __arg2;
1279
}
1280
 
1281
template <class T> __DEVICE__ T max(T __arg1, T __arg2) {
1282
  return (__arg1 > __arg2) ? __arg1 : __arg2;
1283
}
1284
 
1285
__DEVICE__ int min(int __arg1, int __arg2) {
1286
  return (__arg1 < __arg2) ? __arg1 : __arg2;
1287
}
1288
__DEVICE__ int max(int __arg1, int __arg2) {
1289
  return (__arg1 > __arg2) ? __arg1 : __arg2;
1290
}
1291
 
1292
__DEVICE__
1293
float max(float __x, float __y) { return fmaxf(__x, __y); }
1294
 
1295
__DEVICE__
1296
double max(double __x, double __y) { return fmax(__x, __y); }
1297
 
1298
__DEVICE__
1299
float min(float __x, float __y) { return fminf(__x, __y); }
1300
 
1301
__DEVICE__
1302
double min(double __x, double __y) { return fmin(__x, __y); }
1303
 
1304
#if !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
1305
__host__ inline static int min(int __arg1, int __arg2) {
1306
  return std::min(__arg1, __arg2);
1307
}
1308
 
1309
__host__ inline static int max(int __arg1, int __arg2) {
1310
  return std::max(__arg1, __arg2);
1311
}
1312
#endif // !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
1313
#endif
1314
 
1315
#pragma pop_macro("__DEVICE__")
1316
#pragma pop_macro("__RETURN_TYPE")
1317
 
1318
#endif // __CLANG_HIP_MATH_H__