WebSVN – QNX 8.QNX8 LLVM/Clang compiler suite – Blame – //llvm-build/x86_64/lib/clang/16/include/tmmintrin.h

Rev	Author	Line No.	Line
14	pmbaty	1	/*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------===
		2	*
		3	* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
		4	* See https://llvm.org/LICENSE.txt for license information.
		5	* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
		6	*
		7	*===-----------------------------------------------------------------------===
		8	*/
		9
		10	#ifndef __TMMINTRIN_H
		11	#define __TMMINTRIN_H
		12
		13	#if !defined(__i386__) && !defined(__x86_64__)
		14	#error "This header is only meant to be used on x86 and x64 architecture"
		15	#endif
		16
		17	#include <pmmintrin.h>
		18
		19	/* Define the default attributes for the functions in this file. */
		20	#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3"), __min_vector_width__(64)))
		21	#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,ssse3"), __min_vector_width__(64)))
		22
		23	/// Computes the absolute value of each of the packed 8-bit signed
		24	/// integers in the source operand and stores the 8-bit unsigned integer
		25	/// results in the destination.
		26	///
		27	/// \headerfile <x86intrin.h>
		28	///
		29	/// This intrinsic corresponds to the \c PABSB instruction.
		30	///
		31	/// \param __a
		32	/// A 64-bit vector of [8 x i8].
		33	/// \returns A 64-bit integer vector containing the absolute values of the
		34	/// elements in the operand.
		35	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
		36	_mm_abs_pi8(__m64 __a)
		37	{
		38	return (__m64)__builtin_ia32_pabsb((__v8qi)__a);
		39	}
		40
		41	/// Computes the absolute value of each of the packed 8-bit signed
		42	/// integers in the source operand and stores the 8-bit unsigned integer
		43	/// results in the destination.
		44	///
		45	/// \headerfile <x86intrin.h>
		46	///
		47	/// This intrinsic corresponds to the \c VPABSB instruction.
		48	///
		49	/// \param __a
		50	/// A 128-bit vector of [16 x i8].
		51	/// \returns A 128-bit integer vector containing the absolute values of the
		52	/// elements in the operand.
		53	static __inline__ __m128i __DEFAULT_FN_ATTRS
		54	_mm_abs_epi8(__m128i __a)
		55	{
		56	return (__m128i)__builtin_elementwise_abs((__v16qs)__a);
		57	}
		58
		59	/// Computes the absolute value of each of the packed 16-bit signed
		60	/// integers in the source operand and stores the 16-bit unsigned integer
		61	/// results in the destination.
		62	///
		63	/// \headerfile <x86intrin.h>
		64	///
		65	/// This intrinsic corresponds to the \c PABSW instruction.
		66	///
		67	/// \param __a
		68	/// A 64-bit vector of [4 x i16].
		69	/// \returns A 64-bit integer vector containing the absolute values of the
		70	/// elements in the operand.
		71	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
		72	_mm_abs_pi16(__m64 __a)
		73	{
		74	return (__m64)__builtin_ia32_pabsw((__v4hi)__a);
		75	}
		76
		77	/// Computes the absolute value of each of the packed 16-bit signed
		78	/// integers in the source operand and stores the 16-bit unsigned integer
		79	/// results in the destination.
		80	///
		81	/// \headerfile <x86intrin.h>
		82	///
		83	/// This intrinsic corresponds to the \c VPABSW instruction.
		84	///
		85	/// \param __a
		86	/// A 128-bit vector of [8 x i16].
		87	/// \returns A 128-bit integer vector containing the absolute values of the
		88	/// elements in the operand.
		89	static __inline__ __m128i __DEFAULT_FN_ATTRS
		90	_mm_abs_epi16(__m128i __a)
		91	{
		92	return (__m128i)__builtin_elementwise_abs((__v8hi)__a);
		93	}
		94
		95	/// Computes the absolute value of each of the packed 32-bit signed
		96	/// integers in the source operand and stores the 32-bit unsigned integer
		97	/// results in the destination.
		98	///
		99	/// \headerfile <x86intrin.h>
		100	///
		101	/// This intrinsic corresponds to the \c PABSD instruction.
		102	///
		103	/// \param __a
		104	/// A 64-bit vector of [2 x i32].
		105	/// \returns A 64-bit integer vector containing the absolute values of the
		106	/// elements in the operand.
		107	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
		108	_mm_abs_pi32(__m64 __a)
		109	{
		110	return (__m64)__builtin_ia32_pabsd((__v2si)__a);
		111	}
		112
		113	/// Computes the absolute value of each of the packed 32-bit signed
		114	/// integers in the source operand and stores the 32-bit unsigned integer
		115	/// results in the destination.
		116	///
		117	/// \headerfile <x86intrin.h>
		118	///
		119	/// This intrinsic corresponds to the \c VPABSD instruction.
		120	///
		121	/// \param __a
		122	/// A 128-bit vector of [4 x i32].
		123	/// \returns A 128-bit integer vector containing the absolute values of the
		124	/// elements in the operand.
		125	static __inline__ __m128i __DEFAULT_FN_ATTRS
		126	_mm_abs_epi32(__m128i __a)
		127	{
		128	return (__m128i)__builtin_elementwise_abs((__v4si)__a);
		129	}
		130
		131	/// Concatenates the two 128-bit integer vector operands, and
		132	/// right-shifts the result by the number of bytes specified in the immediate
		133	/// operand.
		134	///
		135	/// \headerfile <x86intrin.h>
		136	///
		137	/// \code
		138	/// __m128i _mm_alignr_epi8(__m128i a, __m128i b, const int n);
		139	/// \endcode
		140	///
		141	/// This intrinsic corresponds to the \c PALIGNR instruction.
		142	///
		143	/// \param a
		144	/// A 128-bit vector of [16 x i8] containing one of the source operands.
		145	/// \param b
		146	/// A 128-bit vector of [16 x i8] containing one of the source operands.
		147	/// \param n
		148	/// An immediate operand specifying how many bytes to right-shift the result.
		149	/// \returns A 128-bit integer vector containing the concatenated right-shifted
		150	/// value.
		151	#define _mm_alignr_epi8(a, b, n) \
		152	((__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \
		153	(__v16qi)(__m128i)(b), (n)))
		154
		155	/// Concatenates the two 64-bit integer vector operands, and right-shifts
		156	/// the result by the number of bytes specified in the immediate operand.
		157	///
		158	/// \headerfile <x86intrin.h>
		159	///
		160	/// \code
		161	/// __m64 _mm_alignr_pi8(__m64 a, __m64 b, const int n);
		162	/// \endcode
		163	///
		164	/// This intrinsic corresponds to the \c PALIGNR instruction.
		165	///
		166	/// \param a
		167	/// A 64-bit vector of [8 x i8] containing one of the source operands.
		168	/// \param b
		169	/// A 64-bit vector of [8 x i8] containing one of the source operands.
		170	/// \param n
		171	/// An immediate operand specifying how many bytes to right-shift the result.
		172	/// \returns A 64-bit integer vector containing the concatenated right-shifted
		173	/// value.
		174	#define _mm_alignr_pi8(a, b, n) \
		175	((__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)))
		176
		177	/// Horizontally adds the adjacent pairs of values contained in 2 packed
		178	/// 128-bit vectors of [8 x i16].
		179	///
		180	/// \headerfile <x86intrin.h>
		181	///
		182	/// This intrinsic corresponds to the \c VPHADDW instruction.
		183	///
		184	/// \param __a
		185	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
		186	/// horizontal sums of the values are stored in the lower bits of the
		187	/// destination.
		188	/// \param __b
		189	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
		190	/// horizontal sums of the values are stored in the upper bits of the
		191	/// destination.
		192	/// \returns A 128-bit vector of [8 x i16] containing the horizontal sums of
		193	/// both operands.
		194	static __inline__ __m128i __DEFAULT_FN_ATTRS
		195	_mm_hadd_epi16(__m128i __a, __m128i __b)
		196	{
		197	return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);
		198	}
		199
		200	/// Horizontally adds the adjacent pairs of values contained in 2 packed
		201	/// 128-bit vectors of [4 x i32].
		202	///
		203	/// \headerfile <x86intrin.h>
		204	///
		205	/// This intrinsic corresponds to the \c VPHADDD instruction.
		206	///
		207	/// \param __a
		208	/// A 128-bit vector of [4 x i32] containing one of the source operands. The
		209	/// horizontal sums of the values are stored in the lower bits of the
		210	/// destination.
		211	/// \param __b
		212	/// A 128-bit vector of [4 x i32] containing one of the source operands. The
		213	/// horizontal sums of the values are stored in the upper bits of the
		214	/// destination.
		215	/// \returns A 128-bit vector of [4 x i32] containing the horizontal sums of
		216	/// both operands.
		217	static __inline__ __m128i __DEFAULT_FN_ATTRS
		218	_mm_hadd_epi32(__m128i __a, __m128i __b)
		219	{
		220	return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);
		221	}
		222
		223	/// Horizontally adds the adjacent pairs of values contained in 2 packed
		224	/// 64-bit vectors of [4 x i16].
		225	///
		226	/// \headerfile <x86intrin.h>
		227	///
		228	/// This intrinsic corresponds to the \c PHADDW instruction.
		229	///
		230	/// \param __a
		231	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
		232	/// horizontal sums of the values are stored in the lower bits of the
		233	/// destination.
		234	/// \param __b
		235	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
		236	/// horizontal sums of the values are stored in the upper bits of the
		237	/// destination.
		238	/// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both
		239	/// operands.
		240	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
		241	_mm_hadd_pi16(__m64 __a, __m64 __b)
		242	{
		243	return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b);
		244	}
		245
		246	/// Horizontally adds the adjacent pairs of values contained in 2 packed
		247	/// 64-bit vectors of [2 x i32].
		248	///
		249	/// \headerfile <x86intrin.h>
		250	///
		251	/// This intrinsic corresponds to the \c PHADDD instruction.
		252	///
		253	/// \param __a
		254	/// A 64-bit vector of [2 x i32] containing one of the source operands. The
		255	/// horizontal sums of the values are stored in the lower bits of the
		256	/// destination.
		257	/// \param __b
		258	/// A 64-bit vector of [2 x i32] containing one of the source operands. The
		259	/// horizontal sums of the values are stored in the upper bits of the
		260	/// destination.
		261	/// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both
		262	/// operands.
		263	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
		264	_mm_hadd_pi32(__m64 __a, __m64 __b)
		265	{
		266	return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b);
		267	}
		268
		269	/// Horizontally adds the adjacent pairs of values contained in 2 packed
		270	/// 128-bit vectors of [8 x i16]. Positive sums greater than 0x7FFF are
		271	/// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
		272	/// 0x8000.
		273	///
		274	/// \headerfile <x86intrin.h>
		275	///
		276	/// This intrinsic corresponds to the \c VPHADDSW instruction.
		277	///
		278	/// \param __a
		279	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
		280	/// horizontal sums of the values are stored in the lower bits of the
		281	/// destination.
		282	/// \param __b
		283	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
		284	/// horizontal sums of the values are stored in the upper bits of the
		285	/// destination.
		286	/// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
		287	/// sums of both operands.
		288	static __inline__ __m128i __DEFAULT_FN_ATTRS
		289	_mm_hadds_epi16(__m128i __a, __m128i __b)
		290	{
		291	return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
		292	}
		293
		294	/// Horizontally adds the adjacent pairs of values contained in 2 packed
		295	/// 64-bit vectors of [4 x i16]. Positive sums greater than 0x7FFF are
		296	/// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
		297	/// 0x8000.
		298	///
		299	/// \headerfile <x86intrin.h>
		300	///
		301	/// This intrinsic corresponds to the \c PHADDSW instruction.
		302	///
		303	/// \param __a
		304	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
		305	/// horizontal sums of the values are stored in the lower bits of the
		306	/// destination.
		307	/// \param __b
		308	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
		309	/// horizontal sums of the values are stored in the upper bits of the
		310	/// destination.
		311	/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
		312	/// sums of both operands.
		313	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
		314	_mm_hadds_pi16(__m64 __a, __m64 __b)
		315	{
		316	return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b);
		317	}
		318
		319	/// Horizontally subtracts the adjacent pairs of values contained in 2
		320	/// packed 128-bit vectors of [8 x i16].
		321	///
		322	/// \headerfile <x86intrin.h>
		323	///
		324	/// This intrinsic corresponds to the \c VPHSUBW instruction.
		325	///
		326	/// \param __a
		327	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
		328	/// horizontal differences between the values are stored in the lower bits of
		329	/// the destination.
		330	/// \param __b
		331	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
		332	/// horizontal differences between the values are stored in the upper bits of
		333	/// the destination.
		334	/// \returns A 128-bit vector of [8 x i16] containing the horizontal differences
		335	/// of both operands.
		336	static __inline__ __m128i __DEFAULT_FN_ATTRS
		337	_mm_hsub_epi16(__m128i __a, __m128i __b)
		338	{
		339	return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);
		340	}
		341
		342	/// Horizontally subtracts the adjacent pairs of values contained in 2
		343	/// packed 128-bit vectors of [4 x i32].
		344	///
		345	/// \headerfile <x86intrin.h>
		346	///
		347	/// This intrinsic corresponds to the \c VPHSUBD instruction.
		348	///
		349	/// \param __a
		350	/// A 128-bit vector of [4 x i32] containing one of the source operands. The
		351	/// horizontal differences between the values are stored in the lower bits of
		352	/// the destination.
		353	/// \param __b
		354	/// A 128-bit vector of [4 x i32] containing one of the source operands. The
		355	/// horizontal differences between the values are stored in the upper bits of
		356	/// the destination.
		357	/// \returns A 128-bit vector of [4 x i32] containing the horizontal differences
		358	/// of both operands.
		359	static __inline__ __m128i __DEFAULT_FN_ATTRS
		360	_mm_hsub_epi32(__m128i __a, __m128i __b)
		361	{
		362	return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);
		363	}
		364
		365	/// Horizontally subtracts the adjacent pairs of values contained in 2
		366	/// packed 64-bit vectors of [4 x i16].
		367	///
		368	/// \headerfile <x86intrin.h>
		369	///
		370	/// This intrinsic corresponds to the \c PHSUBW instruction.
		371	///
		372	/// \param __a
		373	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
		374	/// horizontal differences between the values are stored in the lower bits of
		375	/// the destination.
		376	/// \param __b
		377	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
		378	/// horizontal differences between the values are stored in the upper bits of
		379	/// the destination.
		380	/// \returns A 64-bit vector of [4 x i16] containing the horizontal differences
		381	/// of both operands.
		382	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
		383	_mm_hsub_pi16(__m64 __a, __m64 __b)
		384	{
		385	return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b);
		386	}
		387
		388	/// Horizontally subtracts the adjacent pairs of values contained in 2
		389	/// packed 64-bit vectors of [2 x i32].
		390	///
		391	/// \headerfile <x86intrin.h>
		392	///
		393	/// This intrinsic corresponds to the \c PHSUBD instruction.
		394	///
		395	/// \param __a
		396	/// A 64-bit vector of [2 x i32] containing one of the source operands. The
		397	/// horizontal differences between the values are stored in the lower bits of
		398	/// the destination.
		399	/// \param __b
		400	/// A 64-bit vector of [2 x i32] containing one of the source operands. The
		401	/// horizontal differences between the values are stored in the upper bits of
		402	/// the destination.
		403	/// \returns A 64-bit vector of [2 x i32] containing the horizontal differences
		404	/// of both operands.
		405	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
		406	_mm_hsub_pi32(__m64 __a, __m64 __b)
		407	{
		408	return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b);
		409	}
		410
		411	/// Horizontally subtracts the adjacent pairs of values contained in 2
		412	/// packed 128-bit vectors of [8 x i16]. Positive differences greater than
		413	/// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are
		414	/// saturated to 0x8000.
		415	///
		416	/// \headerfile <x86intrin.h>
		417	///
		418	/// This intrinsic corresponds to the \c VPHSUBSW instruction.
		419	///
		420	/// \param __a
		421	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
		422	/// horizontal differences between the values are stored in the lower bits of
		423	/// the destination.
		424	/// \param __b
		425	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
		426	/// horizontal differences between the values are stored in the upper bits of
		427	/// the destination.
		428	/// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
		429	/// differences of both operands.
		430	static __inline__ __m128i __DEFAULT_FN_ATTRS
		431	_mm_hsubs_epi16(__m128i __a, __m128i __b)
		432	{
		433	return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
		434	}
		435
		436	/// Horizontally subtracts the adjacent pairs of values contained in 2
		437	/// packed 64-bit vectors of [4 x i16]. Positive differences greater than
		438	/// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are
		439	/// saturated to 0x8000.
		440	///
		441	/// \headerfile <x86intrin.h>
		442	///
		443	/// This intrinsic corresponds to the \c PHSUBSW instruction.
		444	///
		445	/// \param __a
		446	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
		447	/// horizontal differences between the values are stored in the lower bits of
		448	/// the destination.
		449	/// \param __b
		450	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
		451	/// horizontal differences between the values are stored in the upper bits of
		452	/// the destination.
		453	/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
		454	/// differences of both operands.
		455	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
		456	_mm_hsubs_pi16(__m64 __a, __m64 __b)
		457	{
		458	return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b);
		459	}
		460
		461	/// Multiplies corresponding pairs of packed 8-bit unsigned integer
		462	/// values contained in the first source operand and packed 8-bit signed
		463	/// integer values contained in the second source operand, adds pairs of
		464	/// contiguous products with signed saturation, and writes the 16-bit sums to
		465	/// the corresponding bits in the destination.
		466	///
		467	/// For example, bits [7:0] of both operands are multiplied, bits [15:8] of
		468	/// both operands are multiplied, and the sum of both results is written to
		469	/// bits [15:0] of the destination.
		470	///
		471	/// \headerfile <x86intrin.h>
		472	///
		473	/// This intrinsic corresponds to the \c VPMADDUBSW instruction.
		474	///
		475	/// \param __a
		476	/// A 128-bit integer vector containing the first source operand.
		477	/// \param __b
		478	/// A 128-bit integer vector containing the second source operand.
		479	/// \returns A 128-bit integer vector containing the sums of products of both
		480	/// operands: \n
		481	/// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n
		482	/// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
		483	/// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
		484	/// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7) \n
		485	/// \a R4 := (\a __a8 * \a __b8) + (\a __a9 * \a __b9) \n
		486	/// \a R5 := (\a __a10 * \a __b10) + (\a __a11 * \a __b11) \n
		487	/// \a R6 := (\a __a12 * \a __b12) + (\a __a13 * \a __b13) \n
		488	/// \a R7 := (\a __a14 * \a __b14) + (\a __a15 * \a __b15)
		489	static __inline__ __m128i __DEFAULT_FN_ATTRS
		490	_mm_maddubs_epi16(__m128i __a, __m128i __b)
		491	{
		492	return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);
		493	}
		494
		495	/// Multiplies corresponding pairs of packed 8-bit unsigned integer
		496	/// values contained in the first source operand and packed 8-bit signed
		497	/// integer values contained in the second source operand, adds pairs of
		498	/// contiguous products with signed saturation, and writes the 16-bit sums to
		499	/// the corresponding bits in the destination.
		500	///
		501	/// For example, bits [7:0] of both operands are multiplied, bits [15:8] of
		502	/// both operands are multiplied, and the sum of both results is written to
		503	/// bits [15:0] of the destination.
		504	///
		505	/// \headerfile <x86intrin.h>
		506	///
		507	/// This intrinsic corresponds to the \c PMADDUBSW instruction.
		508	///
		509	/// \param __a
		510	/// A 64-bit integer vector containing the first source operand.
		511	/// \param __b
		512	/// A 64-bit integer vector containing the second source operand.
		513	/// \returns A 64-bit integer vector containing the sums of products of both
		514	/// operands: \n
		515	/// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n
		516	/// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
		517	/// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
		518	/// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7)
		519	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
		520	_mm_maddubs_pi16(__m64 __a, __m64 __b)
		521	{
		522	return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b);
		523	}
		524
		525	/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
		526	/// products to the 18 most significant bits by right-shifting, rounds the
		527	/// truncated value by adding 1, and writes bits [16:1] to the destination.
		528	///
		529	/// \headerfile <x86intrin.h>
		530	///
		531	/// This intrinsic corresponds to the \c VPMULHRSW instruction.
		532	///
		533	/// \param __a
		534	/// A 128-bit vector of [8 x i16] containing one of the source operands.
		535	/// \param __b
		536	/// A 128-bit vector of [8 x i16] containing one of the source operands.
		537	/// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled
		538	/// products of both operands.
		539	static __inline__ __m128i __DEFAULT_FN_ATTRS
		540	_mm_mulhrs_epi16(__m128i __a, __m128i __b)
		541	{
		542	return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
		543	}
		544
		545	/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
		546	/// products to the 18 most significant bits by right-shifting, rounds the
		547	/// truncated value by adding 1, and writes bits [16:1] to the destination.
		548	///
		549	/// \headerfile <x86intrin.h>
		550	///
		551	/// This intrinsic corresponds to the \c PMULHRSW instruction.
		552	///
		553	/// \param __a
		554	/// A 64-bit vector of [4 x i16] containing one of the source operands.
		555	/// \param __b
		556	/// A 64-bit vector of [4 x i16] containing one of the source operands.
		557	/// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled
		558	/// products of both operands.
		559	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
		560	_mm_mulhrs_pi16(__m64 __a, __m64 __b)
		561	{
		562	return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b);
		563	}
		564
		565	/// Copies the 8-bit integers from a 128-bit integer vector to the
		566	/// destination or clears 8-bit values in the destination, as specified by
		567	/// the second source operand.
		568	///
		569	/// \headerfile <x86intrin.h>
		570	///
		571	/// This intrinsic corresponds to the \c VPSHUFB instruction.
		572	///
		573	/// \param __a
		574	/// A 128-bit integer vector containing the values to be copied.
		575	/// \param __b
		576	/// A 128-bit integer vector containing control bytes corresponding to
		577	/// positions in the destination:
		578	/// Bit 7: \n
		579	/// 1: Clear the corresponding byte in the destination. \n
		580	/// 0: Copy the selected source byte to the corresponding byte in the
		581	/// destination. \n
		582	/// Bits [6:4] Reserved. \n
		583	/// Bits [3:0] select the source byte to be copied.
		584	/// \returns A 128-bit integer vector containing the copied or cleared values.
		585	static __inline__ __m128i __DEFAULT_FN_ATTRS
		586	_mm_shuffle_epi8(__m128i __a, __m128i __b)
		587	{
		588	return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);
		589	}
		590
		591	/// Copies the 8-bit integers from a 64-bit integer vector to the
		592	/// destination or clears 8-bit values in the destination, as specified by
		593	/// the second source operand.
		594	///
		595	/// \headerfile <x86intrin.h>
		596	///
		597	/// This intrinsic corresponds to the \c PSHUFB instruction.
		598	///
		599	/// \param __a
		600	/// A 64-bit integer vector containing the values to be copied.
		601	/// \param __b
		602	/// A 64-bit integer vector containing control bytes corresponding to
		603	/// positions in the destination:
		604	/// Bit 7: \n
		605	/// 1: Clear the corresponding byte in the destination. \n
		606	/// 0: Copy the selected source byte to the corresponding byte in the
		607	/// destination. \n
		608	/// Bits [3:0] select the source byte to be copied.
		609	/// \returns A 64-bit integer vector containing the copied or cleared values.
		610	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
		611	_mm_shuffle_pi8(__m64 __a, __m64 __b)
		612	{
		613	return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b);
		614	}
		615
		616	/// For each 8-bit integer in the first source operand, perform one of
		617	/// the following actions as specified by the second source operand.
		618	///
		619	/// If the byte in the second source is negative, calculate the two's
		620	/// complement of the corresponding byte in the first source, and write that
		621	/// value to the destination. If the byte in the second source is positive,
		622	/// copy the corresponding byte from the first source to the destination. If
		623	/// the byte in the second source is zero, clear the corresponding byte in
		624	/// the destination.
		625	///
		626	/// \headerfile <x86intrin.h>
		627	///
		628	/// This intrinsic corresponds to the \c VPSIGNB instruction.
		629	///
		630	/// \param __a
		631	/// A 128-bit integer vector containing the values to be copied.
		632	/// \param __b
		633	/// A 128-bit integer vector containing control bytes corresponding to
		634	/// positions in the destination.
		635	/// \returns A 128-bit integer vector containing the resultant values.
		636	static __inline__ __m128i __DEFAULT_FN_ATTRS
		637	_mm_sign_epi8(__m128i __a, __m128i __b)
		638	{
		639	return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
		640	}
		641
		642	/// For each 16-bit integer in the first source operand, perform one of
		643	/// the following actions as specified by the second source operand.
		644	///
		645	/// If the word in the second source is negative, calculate the two's
		646	/// complement of the corresponding word in the first source, and write that
		647	/// value to the destination. If the word in the second source is positive,
		648	/// copy the corresponding word from the first source to the destination. If
		649	/// the word in the second source is zero, clear the corresponding word in
		650	/// the destination.
		651	///
		652	/// \headerfile <x86intrin.h>
		653	///
		654	/// This intrinsic corresponds to the \c VPSIGNW instruction.
		655	///
		656	/// \param __a
		657	/// A 128-bit integer vector containing the values to be copied.
		658	/// \param __b
		659	/// A 128-bit integer vector containing control words corresponding to
		660	/// positions in the destination.
		661	/// \returns A 128-bit integer vector containing the resultant values.
		662	static __inline__ __m128i __DEFAULT_FN_ATTRS
		663	_mm_sign_epi16(__m128i __a, __m128i __b)
		664	{
		665	return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
		666	}
		667
		668	/// For each 32-bit integer in the first source operand, perform one of
		669	/// the following actions as specified by the second source operand.
		670	///
		671	/// If the doubleword in the second source is negative, calculate the two's
		672	/// complement of the corresponding word in the first source, and write that
		673	/// value to the destination. If the doubleword in the second source is
		674	/// positive, copy the corresponding word from the first source to the
		675	/// destination. If the doubleword in the second source is zero, clear the
		676	/// corresponding word in the destination.
		677	///
		678	/// \headerfile <x86intrin.h>
		679	///
		680	/// This intrinsic corresponds to the \c VPSIGND instruction.
		681	///
		682	/// \param __a
		683	/// A 128-bit integer vector containing the values to be copied.
		684	/// \param __b
		685	/// A 128-bit integer vector containing control doublewords corresponding to
		686	/// positions in the destination.
		687	/// \returns A 128-bit integer vector containing the resultant values.
		688	static __inline__ __m128i __DEFAULT_FN_ATTRS
		689	_mm_sign_epi32(__m128i __a, __m128i __b)
		690	{
		691	return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
		692	}
		693
		694	/// For each 8-bit integer in the first source operand, perform one of
		695	/// the following actions as specified by the second source operand.
		696	///
		697	/// If the byte in the second source is negative, calculate the two's
		698	/// complement of the corresponding byte in the first source, and write that
		699	/// value to the destination. If the byte in the second source is positive,
		700	/// copy the corresponding byte from the first source to the destination. If
		701	/// the byte in the second source is zero, clear the corresponding byte in
		702	/// the destination.
		703	///
		704	/// \headerfile <x86intrin.h>
		705	///
		706	/// This intrinsic corresponds to the \c PSIGNB instruction.
		707	///
		708	/// \param __a
		709	/// A 64-bit integer vector containing the values to be copied.
		710	/// \param __b
		711	/// A 64-bit integer vector containing control bytes corresponding to
		712	/// positions in the destination.
		713	/// \returns A 64-bit integer vector containing the resultant values.
		714	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
		715	_mm_sign_pi8(__m64 __a, __m64 __b)
		716	{
		717	return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b);
		718	}
		719
		720	/// For each 16-bit integer in the first source operand, perform one of
		721	/// the following actions as specified by the second source operand.
		722	///
		723	/// If the word in the second source is negative, calculate the two's
		724	/// complement of the corresponding word in the first source, and write that
		725	/// value to the destination. If the word in the second source is positive,
		726	/// copy the corresponding word from the first source to the destination. If
		727	/// the word in the second source is zero, clear the corresponding word in
		728	/// the destination.
		729	///
		730	/// \headerfile <x86intrin.h>
		731	///
		732	/// This intrinsic corresponds to the \c PSIGNW instruction.
		733	///
		734	/// \param __a
		735	/// A 64-bit integer vector containing the values to be copied.
		736	/// \param __b
		737	/// A 64-bit integer vector containing control words corresponding to
		738	/// positions in the destination.
		739	/// \returns A 64-bit integer vector containing the resultant values.
		740	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
		741	_mm_sign_pi16(__m64 __a, __m64 __b)
		742	{
		743	return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b);
		744	}
		745
		746	/// For each 32-bit integer in the first source operand, perform one of
		747	/// the following actions as specified by the second source operand.
		748	///
		749	/// If the doubleword in the second source is negative, calculate the two's
		750	/// complement of the corresponding doubleword in the first source, and
		751	/// write that value to the destination. If the doubleword in the second
		752	/// source is positive, copy the corresponding doubleword from the first
		753	/// source to the destination. If the doubleword in the second source is
		754	/// zero, clear the corresponding doubleword in the destination.
		755	///
		756	/// \headerfile <x86intrin.h>
		757	///
		758	/// This intrinsic corresponds to the \c PSIGND instruction.
		759	///
		760	/// \param __a
		761	/// A 64-bit integer vector containing the values to be copied.
		762	/// \param __b
		763	/// A 64-bit integer vector containing two control doublewords corresponding
		764	/// to positions in the destination.
		765	/// \returns A 64-bit integer vector containing the resultant values.
		766	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
		767	_mm_sign_pi32(__m64 __a, __m64 __b)
		768	{
		769	return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b);
		770	}
		771
		772	#undef __DEFAULT_FN_ATTRS
		773	#undef __DEFAULT_FN_ATTRS_MMX
		774
		775	#endif /* __TMMINTRIN_H */

Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

QNX 8.QNX8 LLVM/Clang compiler suite//llvm-build/x86_64/lib/clang/16/include/tmmintrin.h – Rev 14