WebSVN – QNX 8.QNX8 LLVM/Clang compiler suite – Blame – /llvm-build/x86_64/lib/clang/16/include/pmmintrin.h

Rev	Author	Line No.	Line
14	pmbaty	1	/*===---- pmmintrin.h - SSE3 intrinsics ------------------------------------===
		2	*
		3	* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
		4	* See https://llvm.org/LICENSE.txt for license information.
		5	* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
		6	*
		7	*===-----------------------------------------------------------------------===
		8	*/
		9
		10	#ifndef __PMMINTRIN_H
		11	#define __PMMINTRIN_H
		12
		13	#if !defined(__i386__) && !defined(__x86_64__)
		14	#error "This header is only meant to be used on x86 and x64 architecture"
		15	#endif
		16
		17	#include <emmintrin.h>
		18
		19	/* Define the default attributes for the functions in this file. */
		20	#define __DEFAULT_FN_ATTRS \
		21	__attribute__((__always_inline__, __nodebug__, __target__("sse3"), __min_vector_width__(128)))
		22
		23	/// Loads data from an unaligned memory location to elements in a 128-bit
		24	/// vector.
		25	///
		26	/// If the address of the data is not 16-byte aligned, the instruction may
		27	/// read two adjacent aligned blocks of memory to retrieve the requested
		28	/// data.
		29	///
		30	/// \headerfile <x86intrin.h>
		31	///
		32	/// This intrinsic corresponds to the <c> VLDDQU </c> instruction.
		33	///
		34	/// \param __p
		35	/// A pointer to a 128-bit integer vector containing integer values.
		36	/// \returns A 128-bit vector containing the moved values.
		37	static __inline__ __m128i __DEFAULT_FN_ATTRS
		38	_mm_lddqu_si128(__m128i_u const *__p)
		39	{
		40	return (__m128i)__builtin_ia32_lddqu((char const *)__p);
		41	}
		42
		43	/// Adds the even-indexed values and subtracts the odd-indexed values of
		44	/// two 128-bit vectors of [4 x float].
		45	///
		46	/// \headerfile <x86intrin.h>
		47	///
		48	/// This intrinsic corresponds to the <c> VADDSUBPS </c> instruction.
		49	///
		50	/// \param __a
		51	/// A 128-bit vector of [4 x float] containing the left source operand.
		52	/// \param __b
		53	/// A 128-bit vector of [4 x float] containing the right source operand.
		54	/// \returns A 128-bit vector of [4 x float] containing the alternating sums and
		55	/// differences of both operands.
		56	static __inline__ __m128 __DEFAULT_FN_ATTRS
		57	_mm_addsub_ps(__m128 __a, __m128 __b)
		58	{
		59	return __builtin_ia32_addsubps((__v4sf)__a, (__v4sf)__b);
		60	}
		61
		62	/// Horizontally adds the adjacent pairs of values contained in two
		63	/// 128-bit vectors of [4 x float].
		64	///
		65	/// \headerfile <x86intrin.h>
		66	///
		67	/// This intrinsic corresponds to the <c> VHADDPS </c> instruction.
		68	///
		69	/// \param __a
		70	/// A 128-bit vector of [4 x float] containing one of the source operands.
		71	/// The horizontal sums of the values are stored in the lower bits of the
		72	/// destination.
		73	/// \param __b
		74	/// A 128-bit vector of [4 x float] containing one of the source operands.
		75	/// The horizontal sums of the values are stored in the upper bits of the
		76	/// destination.
		77	/// \returns A 128-bit vector of [4 x float] containing the horizontal sums of
		78	/// both operands.
		79	static __inline__ __m128 __DEFAULT_FN_ATTRS
		80	_mm_hadd_ps(__m128 __a, __m128 __b)
		81	{
		82	return __builtin_ia32_haddps((__v4sf)__a, (__v4sf)__b);
		83	}
		84
		85	/// Horizontally subtracts the adjacent pairs of values contained in two
		86	/// 128-bit vectors of [4 x float].
		87	///
		88	/// \headerfile <x86intrin.h>
		89	///
		90	/// This intrinsic corresponds to the <c> VHSUBPS </c> instruction.
		91	///
		92	/// \param __a
		93	/// A 128-bit vector of [4 x float] containing one of the source operands.
		94	/// The horizontal differences between the values are stored in the lower
		95	/// bits of the destination.
		96	/// \param __b
		97	/// A 128-bit vector of [4 x float] containing one of the source operands.
		98	/// The horizontal differences between the values are stored in the upper
		99	/// bits of the destination.
		100	/// \returns A 128-bit vector of [4 x float] containing the horizontal
		101	/// differences of both operands.
		102	static __inline__ __m128 __DEFAULT_FN_ATTRS
		103	_mm_hsub_ps(__m128 __a, __m128 __b)
		104	{
		105	return __builtin_ia32_hsubps((__v4sf)__a, (__v4sf)__b);
		106	}
		107
		108	/// Moves and duplicates odd-indexed values from a 128-bit vector
		109	/// of [4 x float] to float values stored in a 128-bit vector of
		110	/// [4 x float].
		111	///
		112	/// \headerfile <x86intrin.h>
		113	///
		114	/// This intrinsic corresponds to the <c> VMOVSHDUP </c> instruction.
		115	///
		116	/// \param __a
		117	/// A 128-bit vector of [4 x float]. \n
		118	/// Bits [127:96] of the source are written to bits [127:96] and [95:64] of
		119	/// the destination. \n
		120	/// Bits [63:32] of the source are written to bits [63:32] and [31:0] of the
		121	/// destination.
		122	/// \returns A 128-bit vector of [4 x float] containing the moved and duplicated
		123	/// values.
		124	static __inline__ __m128 __DEFAULT_FN_ATTRS
		125	_mm_movehdup_ps(__m128 __a)
		126	{
		127	return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 1, 1, 3, 3);
		128	}
		129
		130	/// Duplicates even-indexed values from a 128-bit vector of
		131	/// [4 x float] to float values stored in a 128-bit vector of [4 x float].
		132	///
		133	/// \headerfile <x86intrin.h>
		134	///
		135	/// This intrinsic corresponds to the <c> VMOVSLDUP </c> instruction.
		136	///
		137	/// \param __a
		138	/// A 128-bit vector of [4 x float] \n
		139	/// Bits [95:64] of the source are written to bits [127:96] and [95:64] of
		140	/// the destination. \n
		141	/// Bits [31:0] of the source are written to bits [63:32] and [31:0] of the
		142	/// destination.
		143	/// \returns A 128-bit vector of [4 x float] containing the moved and duplicated
		144	/// values.
		145	static __inline__ __m128 __DEFAULT_FN_ATTRS
		146	_mm_moveldup_ps(__m128 __a)
		147	{
		148	return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 2, 2);
		149	}
		150
		151	/// Adds the even-indexed values and subtracts the odd-indexed values of
		152	/// two 128-bit vectors of [2 x double].
		153	///
		154	/// \headerfile <x86intrin.h>
		155	///
		156	/// This intrinsic corresponds to the <c> VADDSUBPD </c> instruction.
		157	///
		158	/// \param __a
		159	/// A 128-bit vector of [2 x double] containing the left source operand.
		160	/// \param __b
		161	/// A 128-bit vector of [2 x double] containing the right source operand.
		162	/// \returns A 128-bit vector of [2 x double] containing the alternating sums
		163	/// and differences of both operands.
		164	static __inline__ __m128d __DEFAULT_FN_ATTRS
		165	_mm_addsub_pd(__m128d __a, __m128d __b)
		166	{
		167	return __builtin_ia32_addsubpd((__v2df)__a, (__v2df)__b);
		168	}
		169
		170	/// Horizontally adds the pairs of values contained in two 128-bit
		171	/// vectors of [2 x double].
		172	///
		173	/// \headerfile <x86intrin.h>
		174	///
		175	/// This intrinsic corresponds to the <c> VHADDPD </c> instruction.
		176	///
		177	/// \param __a
		178	/// A 128-bit vector of [2 x double] containing one of the source operands.
		179	/// The horizontal sum of the values is stored in the lower bits of the
		180	/// destination.
		181	/// \param __b
		182	/// A 128-bit vector of [2 x double] containing one of the source operands.
		183	/// The horizontal sum of the values is stored in the upper bits of the
		184	/// destination.
		185	/// \returns A 128-bit vector of [2 x double] containing the horizontal sums of
		186	/// both operands.
		187	static __inline__ __m128d __DEFAULT_FN_ATTRS
		188	_mm_hadd_pd(__m128d __a, __m128d __b)
		189	{
		190	return __builtin_ia32_haddpd((__v2df)__a, (__v2df)__b);
		191	}
		192
		193	/// Horizontally subtracts the pairs of values contained in two 128-bit
		194	/// vectors of [2 x double].
		195	///
		196	/// \headerfile <x86intrin.h>
		197	///
		198	/// This intrinsic corresponds to the <c> VHSUBPD </c> instruction.
		199	///
		200	/// \param __a
		201	/// A 128-bit vector of [2 x double] containing one of the source operands.
		202	/// The horizontal difference of the values is stored in the lower bits of
		203	/// the destination.
		204	/// \param __b
		205	/// A 128-bit vector of [2 x double] containing one of the source operands.
		206	/// The horizontal difference of the values is stored in the upper bits of
		207	/// the destination.
		208	/// \returns A 128-bit vector of [2 x double] containing the horizontal
		209	/// differences of both operands.
		210	static __inline__ __m128d __DEFAULT_FN_ATTRS
		211	_mm_hsub_pd(__m128d __a, __m128d __b)
		212	{
		213	return __builtin_ia32_hsubpd((__v2df)__a, (__v2df)__b);
		214	}
		215
		216	/// Moves and duplicates one double-precision value to double-precision
		217	/// values stored in a 128-bit vector of [2 x double].
		218	///
		219	/// \headerfile <x86intrin.h>
		220	///
		221	/// \code
		222	/// __m128d _mm_loaddup_pd(double const *dp);
		223	/// \endcode
		224	///
		225	/// This intrinsic corresponds to the <c> VMOVDDUP </c> instruction.
		226	///
		227	/// \param dp
		228	/// A pointer to a double-precision value to be moved and duplicated.
		229	/// \returns A 128-bit vector of [2 x double] containing the moved and
		230	/// duplicated values.
		231	#define _mm_loaddup_pd(dp) _mm_load1_pd(dp)
		232
		233	/// Moves and duplicates the double-precision value in the lower bits of
		234	/// a 128-bit vector of [2 x double] to double-precision values stored in a
		235	/// 128-bit vector of [2 x double].
		236	///
		237	/// \headerfile <x86intrin.h>
		238	///
		239	/// This intrinsic corresponds to the <c> VMOVDDUP </c> instruction.
		240	///
		241	/// \param __a
		242	/// A 128-bit vector of [2 x double]. Bits [63:0] are written to bits
		243	/// [127:64] and [63:0] of the destination.
		244	/// \returns A 128-bit vector of [2 x double] containing the moved and
		245	/// duplicated values.
		246	static __inline__ __m128d __DEFAULT_FN_ATTRS
		247	_mm_movedup_pd(__m128d __a)
		248	{
		249	return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);
		250	}
		251
		252	/// Establishes a linear address memory range to be monitored and puts
		253	/// the processor in the monitor event pending state. Data stored in the
		254	/// monitored address range causes the processor to exit the pending state.
		255	///
		256	/// \headerfile <x86intrin.h>
		257	///
		258	/// This intrinsic corresponds to the <c> MONITOR </c> instruction.
		259	///
		260	/// \param __p
		261	/// The memory range to be monitored. The size of the range is determined by
		262	/// CPUID function 0000_0005h.
		263	/// \param __extensions
		264	/// Optional extensions for the monitoring state.
		265	/// \param __hints
		266	/// Optional hints for the monitoring state.
		267	static __inline__ void __DEFAULT_FN_ATTRS
		268	_mm_monitor(void const *__p, unsigned __extensions, unsigned __hints)
		269	{
		270	__builtin_ia32_monitor(__p, __extensions, __hints);
		271	}
		272
		273	/// Used with the MONITOR instruction to wait while the processor is in
		274	/// the monitor event pending state. Data stored in the monitored address
		275	/// range causes the processor to exit the pending state.
		276	///
		277	/// \headerfile <x86intrin.h>
		278	///
		279	/// This intrinsic corresponds to the <c> MWAIT </c> instruction.
		280	///
		281	/// \param __extensions
		282	/// Optional extensions for the monitoring state, which may vary by
		283	/// processor.
		284	/// \param __hints
		285	/// Optional hints for the monitoring state, which may vary by processor.
		286	static __inline__ void __DEFAULT_FN_ATTRS
		287	_mm_mwait(unsigned __extensions, unsigned __hints)
		288	{
		289	__builtin_ia32_mwait(__extensions, __hints);
		290	}
		291
		292	#undef __DEFAULT_FN_ATTRS
		293
		294	#endif /* __PMMINTRIN_H */

Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

QNX 8.QNX8 LLVM/Clang compiler suite/llvm-build/x86_64/lib/clang/16/include/pmmintrin.h – Rev 14