WebSVN – QNX 8.QNX8 LLVM/Clang compiler suite – Blame – /llvm-build/x86_64/lib/clang/16/include/avxifmaintrin.h

Rev	Author	Line No.	Line
14	pmbaty	1	/*===----------------- avxifmaintrin.h - IFMA intrinsics -------------------===
		2	*
		3	* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
		4	* See https://llvm.org/LICENSE.txt for license information.
		5	* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
		6	*
		7	*===-----------------------------------------------------------------------===
		8	*/
		9
		10	#ifndef __IMMINTRIN_H
		11	#error "Never use <avxifmaintrin.h> directly; include <immintrin.h> instead."
		12	#endif
		13
		14	#ifndef __AVXIFMAINTRIN_H
		15	#define __AVXIFMAINTRIN_H
		16
		17	/* Define the default attributes for the functions in this file. */
		18	#define __DEFAULT_FN_ATTRS128 \
		19	__attribute__((__always_inline__, __nodebug__, __target__("avxifma"), \
		20	__min_vector_width__(128)))
		21	#define __DEFAULT_FN_ATTRS256 \
		22	__attribute__((__always_inline__, __nodebug__, __target__("avxifma"), \
		23	__min_vector_width__(256)))
		24
		25	// must vex-encoding
		26
		27	/// Multiply packed unsigned 52-bit integers in each 64-bit element of \a __Y
		28	/// and \a __Z to form a 104-bit intermediate result. Add the high 52-bit
		29	/// unsigned integer from the intermediate result with the corresponding
		30	/// unsigned 64-bit integer in \a __X, and store the results in \a dst.
		31	///
		32	/// \headerfile <immintrin.h>
		33	///
		34	/// \code
		35	/// __m128i
		36	/// _mm_madd52hi_avx_epu64 (__m128i __X, __m128i __Y, __m128i __Z)
		37	/// \endcode
		38	///
		39	/// This intrinsic corresponds to the \c VPMADD52HUQ instruction.
		40	///
		41	/// \return
		42	/// return __m128i dst.
		43	/// \param __X
		44	/// A 128-bit vector of [2 x i64]
		45	/// \param __Y
		46	/// A 128-bit vector of [2 x i64]
		47	/// \param __Z
		48	/// A 128-bit vector of [2 x i64]
		49	///
		50	/// \code{.operation}
		51	/// FOR j := 0 to 1
		52	/// i := j*64
		53	/// tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i])
		54	/// dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[103:52])
		55	/// ENDFOR
		56	/// dst[MAX:128] := 0
		57	/// \endcode
		58	static __inline__ __m128i __DEFAULT_FN_ATTRS128
		59	_mm_madd52hi_avx_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
		60	return (__m128i)__builtin_ia32_vpmadd52huq128((__v2di)__X, (__v2di)__Y,
		61	(__v2di)__Z);
		62	}
		63
		64	/// Multiply packed unsigned 52-bit integers in each 64-bit element of \a __Y
		65	/// and \a __Z to form a 104-bit intermediate result. Add the high 52-bit
		66	/// unsigned integer from the intermediate result with the corresponding
		67	/// unsigned 64-bit integer in \a __X, and store the results in \a dst.
		68	///
		69	/// \headerfile <immintrin.h>
		70	///
		71	/// \code
		72	/// __m256i
		73	/// _mm256_madd52hi_avx_epu64 (__m256i __X, __m256i __Y, __m256i __Z)
		74	/// \endcode
		75	///
		76	/// This intrinsic corresponds to the \c VPMADD52HUQ instruction.
		77	///
		78	/// \return
		79	/// return __m256i dst.
		80	/// \param __X
		81	/// A 256-bit vector of [4 x i64]
		82	/// \param __Y
		83	/// A 256-bit vector of [4 x i64]
		84	/// \param __Z
		85	/// A 256-bit vector of [4 x i64]
		86	///
		87	/// \code{.operation}
		88	/// FOR j := 0 to 3
		89	/// i := j*64
		90	/// tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i])
		91	/// dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[103:52])
		92	/// ENDFOR
		93	/// dst[MAX:256] := 0
		94	/// \endcode
		95	static __inline__ __m256i __DEFAULT_FN_ATTRS256
		96	_mm256_madd52hi_avx_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
		97	return (__m256i)__builtin_ia32_vpmadd52huq256((__v4di)__X, (__v4di)__Y,
		98	(__v4di)__Z);
		99	}
		100
		101	/// Multiply packed unsigned 52-bit integers in each 64-bit element of \a __Y
		102	/// and \a __Z to form a 104-bit intermediate result. Add the low 52-bit
		103	/// unsigned integer from the intermediate result with the corresponding
		104	/// unsigned 64-bit integer in \a __X, and store the results in \a dst.
		105	///
		106	/// \headerfile <immintrin.h>
		107	///
		108	/// \code
		109	/// __m128i
		110	/// _mm_madd52lo_avx_epu64 (__m128i __X, __m128i __Y, __m128i __Z)
		111	/// \endcode
		112	///
		113	/// This intrinsic corresponds to the \c VPMADD52LUQ instruction.
		114	///
		115	/// \return
		116	/// return __m128i dst.
		117	/// \param __X
		118	/// A 128-bit vector of [2 x i64]
		119	/// \param __Y
		120	/// A 128-bit vector of [2 x i64]
		121	/// \param __Z
		122	/// A 128-bit vector of [2 x i64]
		123	///
		124	/// \code{.operation}
		125	/// FOR j := 0 to 1
		126	/// i := j*64
		127	/// tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i])
		128	/// dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[51:0])
		129	/// ENDFOR
		130	/// dst[MAX:128] := 0
		131	/// \endcode
		132	static __inline__ __m128i __DEFAULT_FN_ATTRS128
		133	_mm_madd52lo_avx_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
		134	return (__m128i)__builtin_ia32_vpmadd52luq128((__v2di)__X, (__v2di)__Y,
		135	(__v2di)__Z);
		136	}
		137
		138	/// Multiply packed unsigned 52-bit integers in each 64-bit element of \a __Y
		139	/// and \a __Z to form a 104-bit intermediate result. Add the low 52-bit
		140	/// unsigned integer from the intermediate result with the corresponding
		141	/// unsigned 64-bit integer in \a __X, and store the results in \a dst.
		142	///
		143	/// \headerfile <immintrin.h>
		144	///
		145	/// \code
		146	/// __m256i
		147	/// _mm256_madd52lo_avx_epu64 (__m256i __X, __m256i __Y, __m256i __Z)
		148	/// \endcode
		149	///
		150	/// This intrinsic corresponds to the \c VPMADD52LUQ instruction.
		151	///
		152	/// \return
		153	/// return __m256i dst.
		154	/// \param __X
		155	/// A 256-bit vector of [4 x i64]
		156	/// \param __Y
		157	/// A 256-bit vector of [4 x i64]
		158	/// \param __Z
		159	/// A 256-bit vector of [4 x i64]
		160	///
		161	/// \code{.operation}
		162	/// FOR j := 0 to 3
		163	/// i := j*64
		164	/// tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i])
		165	/// dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[51:0])
		166	/// ENDFOR
		167	/// dst[MAX:256] := 0
		168	/// \endcode
		169	static __inline__ __m256i __DEFAULT_FN_ATTRS256
		170	_mm256_madd52lo_avx_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
		171	return (__m256i)__builtin_ia32_vpmadd52luq256((__v4di)__X, (__v4di)__Y,
		172	(__v4di)__Z);
		173	}
		174	#undef __DEFAULT_FN_ATTRS128
		175	#undef __DEFAULT_FN_ATTRS256
		176
		177	#endif // __AVXIFMAINTRIN_H

Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

QNX 8.QNX8 LLVM/Clang compiler suite/llvm-build/x86_64/lib/clang/16/include/avxifmaintrin.h – Rev 14