WebSVN – QNX 8.QNX8 LLVM/Clang compiler suite – Blame – //llvm-build/x86_64/lib/clang/16/include/__clang_cuda_complex_builtins.h

Rev	Author	Line No.	Line
14	pmbaty	1	/*===-- __clang_cuda_complex_builtins - CUDA impls of runtime complex fns ---===
		2	*
		3	* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
		4	* See https://llvm.org/LICENSE.txt for license information.
		5	* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
		6	*
		7	*===-----------------------------------------------------------------------===
		8	*/
		9
		10	#ifndef __CLANG_CUDA_COMPLEX_BUILTINS
		11	#define __CLANG_CUDA_COMPLEX_BUILTINS
		12
		13	// This header defines __muldc3, __mulsc3, __divdc3, and __divsc3. These are
		14	// libgcc functions that clang assumes are available when compiling c99 complex
		15	// operations. (These implementations come from libc++, and have been modified
		16	// to work with CUDA and OpenMP target offloading [in C and C++ mode].)
		17
		18	#pragma push_macro("__DEVICE__")
		19	#if defined(__OPENMP_NVPTX__) \|\| defined(__OPENMP_AMDGCN__)
		20	#pragma omp declare target
		21	#define __DEVICE__ __attribute__((noinline, nothrow, cold, weak))
		22	#else
		23	#define __DEVICE__ __device__ inline
		24	#endif
		25
		26	// To make the algorithms available for C and C++ in CUDA and OpenMP we select
		27	// different but equivalent function versions. TODO: For OpenMP we currently
		28	// select the native builtins as the overload support for templates is lacking.
		29	#if !defined(__OPENMP_NVPTX__) && !defined(__OPENMP_AMDGCN__)
		30	#define _ISNANd std::isnan
		31	#define _ISNANf std::isnan
		32	#define _ISINFd std::isinf
		33	#define _ISINFf std::isinf
		34	#define _ISFINITEd std::isfinite
		35	#define _ISFINITEf std::isfinite
		36	#define _COPYSIGNd std::copysign
		37	#define _COPYSIGNf std::copysign
		38	#define _SCALBNd std::scalbn
		39	#define _SCALBNf std::scalbn
		40	#define _ABSd std::abs
		41	#define _ABSf std::abs
		42	#define _LOGBd std::logb
		43	#define _LOGBf std::logb
		44	// Rather than pulling in std::max from algorithm everytime, use available ::max.
		45	#define _fmaxd max
		46	#define _fmaxf max
		47	#else
		48	#ifdef __AMDGCN__
		49	#define _ISNANd __ocml_isnan_f64
		50	#define _ISNANf __ocml_isnan_f32
		51	#define _ISINFd __ocml_isinf_f64
		52	#define _ISINFf __ocml_isinf_f32
		53	#define _ISFINITEd __ocml_isfinite_f64
		54	#define _ISFINITEf __ocml_isfinite_f32
		55	#define _COPYSIGNd __ocml_copysign_f64
		56	#define _COPYSIGNf __ocml_copysign_f32
		57	#define _SCALBNd __ocml_scalbn_f64
		58	#define _SCALBNf __ocml_scalbn_f32
		59	#define _ABSd __ocml_fabs_f64
		60	#define _ABSf __ocml_fabs_f32
		61	#define _LOGBd __ocml_logb_f64
		62	#define _LOGBf __ocml_logb_f32
		63	#define _fmaxd __ocml_fmax_f64
		64	#define _fmaxf __ocml_fmax_f32
		65	#else
		66	#define _ISNANd __nv_isnand
		67	#define _ISNANf __nv_isnanf
		68	#define _ISINFd __nv_isinfd
		69	#define _ISINFf __nv_isinff
		70	#define _ISFINITEd __nv_isfinited
		71	#define _ISFINITEf __nv_finitef
		72	#define _COPYSIGNd __nv_copysign
		73	#define _COPYSIGNf __nv_copysignf
		74	#define _SCALBNd __nv_scalbn
		75	#define _SCALBNf __nv_scalbnf
		76	#define _ABSd __nv_fabs
		77	#define _ABSf __nv_fabsf
		78	#define _LOGBd __nv_logb
		79	#define _LOGBf __nv_logbf
		80	#define _fmaxd __nv_fmax
		81	#define _fmaxf __nv_fmaxf
		82	#endif
		83	#endif
		84
		85	#if defined(__cplusplus)
		86	extern "C" {
		87	#endif
		88
		89	__DEVICE__ double _Complex __muldc3(double __a, double __b, double __c,
		90	double __d) {
		91	double __ac = __a * __c;
		92	double __bd = __b * __d;
		93	double __ad = __a * __d;
		94	double __bc = __b * __c;
		95	double _Complex z;
		96	__real__(z) = __ac - __bd;
		97	__imag__(z) = __ad + __bc;
		98	if (_ISNANd(__real__(z)) && _ISNANd(__imag__(z))) {
		99	int __recalc = 0;
		100	if (_ISINFd(__a) \|\| _ISINFd(__b)) {
		101	__a = _COPYSIGNd(_ISINFd(__a) ? 1 : 0, __a);
		102	__b = _COPYSIGNd(_ISINFd(__b) ? 1 : 0, __b);
		103	if (_ISNANd(__c))
		104	__c = _COPYSIGNd(0, __c);
		105	if (_ISNANd(__d))
		106	__d = _COPYSIGNd(0, __d);
		107	__recalc = 1;
		108	}
		109	if (_ISINFd(__c) \|\| _ISINFd(__d)) {
		110	__c = _COPYSIGNd(_ISINFd(__c) ? 1 : 0, __c);
		111	__d = _COPYSIGNd(_ISINFd(__d) ? 1 : 0, __d);
		112	if (_ISNANd(__a))
		113	__a = _COPYSIGNd(0, __a);
		114	if (_ISNANd(__b))
		115	__b = _COPYSIGNd(0, __b);
		116	__recalc = 1;
		117	}
		118	if (!__recalc &&
		119	(_ISINFd(__ac) \|\| _ISINFd(__bd) \|\| _ISINFd(__ad) \|\| _ISINFd(__bc))) {
		120	if (_ISNANd(__a))
		121	__a = _COPYSIGNd(0, __a);
		122	if (_ISNANd(__b))
		123	__b = _COPYSIGNd(0, __b);
		124	if (_ISNANd(__c))
		125	__c = _COPYSIGNd(0, __c);
		126	if (_ISNANd(__d))
		127	__d = _COPYSIGNd(0, __d);
		128	__recalc = 1;
		129	}
		130	if (__recalc) {
		131	// Can't use std::numeric_limits<double>::infinity() -- that doesn't have
		132	// a device overload (and isn't constexpr before C++11, naturally).
		133	__real__(z) = __builtin_huge_val() * (__a * __c - __b * __d);
		134	__imag__(z) = __builtin_huge_val() * (__a * __d + __b * __c);
		135	}
		136	}
		137	return z;
		138	}
		139
		140	__DEVICE__ float _Complex __mulsc3(float __a, float __b, float __c, float __d) {
		141	float __ac = __a * __c;
		142	float __bd = __b * __d;
		143	float __ad = __a * __d;
		144	float __bc = __b * __c;
		145	float _Complex z;
		146	__real__(z) = __ac - __bd;
		147	__imag__(z) = __ad + __bc;
		148	if (_ISNANf(__real__(z)) && _ISNANf(__imag__(z))) {
		149	int __recalc = 0;
		150	if (_ISINFf(__a) \|\| _ISINFf(__b)) {
		151	__a = _COPYSIGNf(_ISINFf(__a) ? 1 : 0, __a);
		152	__b = _COPYSIGNf(_ISINFf(__b) ? 1 : 0, __b);
		153	if (_ISNANf(__c))
		154	__c = _COPYSIGNf(0, __c);
		155	if (_ISNANf(__d))
		156	__d = _COPYSIGNf(0, __d);
		157	__recalc = 1;
		158	}
		159	if (_ISINFf(__c) \|\| _ISINFf(__d)) {
		160	__c = _COPYSIGNf(_ISINFf(__c) ? 1 : 0, __c);
		161	__d = _COPYSIGNf(_ISINFf(__d) ? 1 : 0, __d);
		162	if (_ISNANf(__a))
		163	__a = _COPYSIGNf(0, __a);
		164	if (_ISNANf(__b))
		165	__b = _COPYSIGNf(0, __b);
		166	__recalc = 1;
		167	}
		168	if (!__recalc &&
		169	(_ISINFf(__ac) \|\| _ISINFf(__bd) \|\| _ISINFf(__ad) \|\| _ISINFf(__bc))) {
		170	if (_ISNANf(__a))
		171	__a = _COPYSIGNf(0, __a);
		172	if (_ISNANf(__b))
		173	__b = _COPYSIGNf(0, __b);
		174	if (_ISNANf(__c))
		175	__c = _COPYSIGNf(0, __c);
		176	if (_ISNANf(__d))
		177	__d = _COPYSIGNf(0, __d);
		178	__recalc = 1;
		179	}
		180	if (__recalc) {
		181	__real__(z) = __builtin_huge_valf() * (__a * __c - __b * __d);
		182	__imag__(z) = __builtin_huge_valf() * (__a * __d + __b * __c);
		183	}
		184	}
		185	return z;
		186	}
		187
		188	__DEVICE__ double _Complex __divdc3(double __a, double __b, double __c,
		189	double __d) {
		190	int __ilogbw = 0;
		191	// Can't use std::max, because that's defined in <algorithm>, and we don't
		192	// want to pull that in for every compile. The CUDA headers define
		193	// ::max(float, float) and ::max(double, double), which is sufficient for us.
		194	double __logbw = _LOGBd(_fmaxd(_ABSd(__c), _ABSd(__d)));
		195	if (_ISFINITEd(__logbw)) {
		196	__ilogbw = (int)__logbw;
		197	__c = _SCALBNd(__c, -__ilogbw);
		198	__d = _SCALBNd(__d, -__ilogbw);
		199	}
		200	double __denom = __c * __c + __d * __d;
		201	double _Complex z;
		202	__real__(z) = _SCALBNd((__a * __c + __b * __d) / __denom, -__ilogbw);
		203	__imag__(z) = _SCALBNd((__b * __c - __a * __d) / __denom, -__ilogbw);
		204	if (_ISNANd(__real__(z)) && _ISNANd(__imag__(z))) {
		205	if ((__denom == 0.0) && (!_ISNANd(__a) \|\| !_ISNANd(__b))) {
		206	__real__(z) = _COPYSIGNd(__builtin_huge_val(), __c) * __a;
		207	__imag__(z) = _COPYSIGNd(__builtin_huge_val(), __c) * __b;
		208	} else if ((_ISINFd(__a) \|\| _ISINFd(__b)) && _ISFINITEd(__c) &&
		209	_ISFINITEd(__d)) {
		210	__a = _COPYSIGNd(_ISINFd(__a) ? 1.0 : 0.0, __a);
		211	__b = _COPYSIGNd(_ISINFd(__b) ? 1.0 : 0.0, __b);
		212	__real__(z) = __builtin_huge_val() * (__a * __c + __b * __d);
		213	__imag__(z) = __builtin_huge_val() * (__b * __c - __a * __d);
		214	} else if (_ISINFd(__logbw) && __logbw > 0.0 && _ISFINITEd(__a) &&
		215	_ISFINITEd(__b)) {
		216	__c = _COPYSIGNd(_ISINFd(__c) ? 1.0 : 0.0, __c);
		217	__d = _COPYSIGNd(_ISINFd(__d) ? 1.0 : 0.0, __d);
		218	__real__(z) = 0.0 * (__a * __c + __b * __d);
		219	__imag__(z) = 0.0 * (__b * __c - __a * __d);
		220	}
		221	}
		222	return z;
		223	}
		224
		225	__DEVICE__ float _Complex __divsc3(float __a, float __b, float __c, float __d) {
		226	int __ilogbw = 0;
		227	float __logbw = _LOGBf(_fmaxf(_ABSf(__c), _ABSf(__d)));
		228	if (_ISFINITEf(__logbw)) {
		229	__ilogbw = (int)__logbw;
		230	__c = _SCALBNf(__c, -__ilogbw);
		231	__d = _SCALBNf(__d, -__ilogbw);
		232	}
		233	float __denom = __c * __c + __d * __d;
		234	float _Complex z;
		235	__real__(z) = _SCALBNf((__a * __c + __b * __d) / __denom, -__ilogbw);
		236	__imag__(z) = _SCALBNf((__b * __c - __a * __d) / __denom, -__ilogbw);
		237	if (_ISNANf(__real__(z)) && _ISNANf(__imag__(z))) {
		238	if ((__denom == 0) && (!_ISNANf(__a) \|\| !_ISNANf(__b))) {
		239	__real__(z) = _COPYSIGNf(__builtin_huge_valf(), __c) * __a;
		240	__imag__(z) = _COPYSIGNf(__builtin_huge_valf(), __c) * __b;
		241	} else if ((_ISINFf(__a) \|\| _ISINFf(__b)) && _ISFINITEf(__c) &&
		242	_ISFINITEf(__d)) {
		243	__a = _COPYSIGNf(_ISINFf(__a) ? 1 : 0, __a);
		244	__b = _COPYSIGNf(_ISINFf(__b) ? 1 : 0, __b);
		245	__real__(z) = __builtin_huge_valf() * (__a * __c + __b * __d);
		246	__imag__(z) = __builtin_huge_valf() * (__b * __c - __a * __d);
		247	} else if (_ISINFf(__logbw) && __logbw > 0 && _ISFINITEf(__a) &&
		248	_ISFINITEf(__b)) {
		249	__c = _COPYSIGNf(_ISINFf(__c) ? 1 : 0, __c);
		250	__d = _COPYSIGNf(_ISINFf(__d) ? 1 : 0, __d);
		251	__real__(z) = 0 * (__a * __c + __b * __d);
		252	__imag__(z) = 0 * (__b * __c - __a * __d);
		253	}
		254	}
		255	return z;
		256	}
		257
		258	#if defined(__cplusplus)
		259	} // extern "C"
		260	#endif
		261
		262	#undef _ISNANd
		263	#undef _ISNANf
		264	#undef _ISINFd
		265	#undef _ISINFf
		266	#undef _COPYSIGNd
		267	#undef _COPYSIGNf
		268	#undef _ISFINITEd
		269	#undef _ISFINITEf
		270	#undef _SCALBNd
		271	#undef _SCALBNf
		272	#undef _ABSd
		273	#undef _ABSf
		274	#undef _LOGBd
		275	#undef _LOGBf
		276	#undef _fmaxd
		277	#undef _fmaxf
		278
		279	#if defined(__OPENMP_NVPTX__) \|\| defined(__OPENMP_AMDGCN__)
		280	#pragma omp end declare target
		281	#endif
		282
		283	#pragma pop_macro("__DEVICE__")
		284
		285	#endif // __CLANG_CUDA_COMPLEX_BUILTINS

Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

QNX 8.QNX8 LLVM/Clang compiler suite//llvm-build/x86_64/lib/clang/16/include/__clang_cuda_complex_builtins.h – Rev 14