WebSVN – QNX 8.QNX8 LLVM/Clang compiler suite – Blame – //llvm-build/x86_64/lib/clang/16/include/mmintrin.h

Rev	Author	Line No.	Line
14	pmbaty	1	/*===---- mmintrin.h - MMX intrinsics --------------------------------------===
		2	*
		3	* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
		4	* See https://llvm.org/LICENSE.txt for license information.
		5	* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
		6	*
		7	*===-----------------------------------------------------------------------===
		8	*/
		9
		10	#ifndef __MMINTRIN_H
		11	#define __MMINTRIN_H
		12
		13	#if !defined(__i386__) && !defined(__x86_64__)
		14	#error "This header is only meant to be used on x86 and x64 architecture"
		15	#endif
		16
		17	typedef long long __m64 __attribute__((__vector_size__(8), __aligned__(8)));
		18
		19	typedef long long __v1di __attribute__((__vector_size__(8)));
		20	typedef int __v2si __attribute__((__vector_size__(8)));
		21	typedef short __v4hi __attribute__((__vector_size__(8)));
		22	typedef char __v8qi __attribute__((__vector_size__(8)));
		23
		24	/* Define the default attributes for the functions in this file. */
		25	#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mmx"), __min_vector_width__(64)))
		26
		27	/// Clears the MMX state by setting the state of the x87 stack registers
		28	/// to empty.
		29	///
		30	/// \headerfile <x86intrin.h>
		31	///
		32	/// This intrinsic corresponds to the <c> EMMS </c> instruction.
		33	///
		34	static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("mmx")))
		35	_mm_empty(void)
		36	{
		37	__builtin_ia32_emms();
		38	}
		39
		40	/// Constructs a 64-bit integer vector, setting the lower 32 bits to the
		41	/// value of the 32-bit integer parameter and setting the upper 32 bits to 0.
		42	///
		43	/// \headerfile <x86intrin.h>
		44	///
		45	/// This intrinsic corresponds to the <c> MOVD </c> instruction.
		46	///
		47	/// \param __i
		48	/// A 32-bit integer value.
		49	/// \returns A 64-bit integer vector. The lower 32 bits contain the value of the
		50	/// parameter. The upper 32 bits are set to 0.
		51	static __inline__ __m64 __DEFAULT_FN_ATTRS
		52	_mm_cvtsi32_si64(int __i)
		53	{
		54	return (__m64)__builtin_ia32_vec_init_v2si(__i, 0);
		55	}
		56
		57	/// Returns the lower 32 bits of a 64-bit integer vector as a 32-bit
		58	/// signed integer.
		59	///
		60	/// \headerfile <x86intrin.h>
		61	///
		62	/// This intrinsic corresponds to the <c> MOVD </c> instruction.
		63	///
		64	/// \param __m
		65	/// A 64-bit integer vector.
		66	/// \returns A 32-bit signed integer value containing the lower 32 bits of the
		67	/// parameter.
		68	static __inline__ int __DEFAULT_FN_ATTRS
		69	_mm_cvtsi64_si32(__m64 __m)
		70	{
		71	return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0);
		72	}
		73
		74	/// Casts a 64-bit signed integer value into a 64-bit integer vector.
		75	///
		76	/// \headerfile <x86intrin.h>
		77	///
		78	/// This intrinsic corresponds to the <c> MOVQ </c> instruction.
		79	///
		80	/// \param __i
		81	/// A 64-bit signed integer.
		82	/// \returns A 64-bit integer vector containing the same bitwise pattern as the
		83	/// parameter.
		84	static __inline__ __m64 __DEFAULT_FN_ATTRS
		85	_mm_cvtsi64_m64(long long __i)
		86	{
		87	return (__m64)__i;
		88	}
		89
		90	/// Casts a 64-bit integer vector into a 64-bit signed integer value.
		91	///
		92	/// \headerfile <x86intrin.h>
		93	///
		94	/// This intrinsic corresponds to the <c> MOVQ </c> instruction.
		95	///
		96	/// \param __m
		97	/// A 64-bit integer vector.
		98	/// \returns A 64-bit signed integer containing the same bitwise pattern as the
		99	/// parameter.
		100	static __inline__ long long __DEFAULT_FN_ATTRS
		101	_mm_cvtm64_si64(__m64 __m)
		102	{
		103	return (long long)__m;
		104	}
		105
		106	/// Converts 16-bit signed integers from both 64-bit integer vector
		107	/// parameters of [4 x i16] into 8-bit signed integer values, and constructs
		108	/// a 64-bit integer vector of [8 x i8] as the result. Positive values
		109	/// greater than 0x7F are saturated to 0x7F. Negative values less than 0x80
		110	/// are saturated to 0x80.
		111	///
		112	/// \headerfile <x86intrin.h>
		113	///
		114	/// This intrinsic corresponds to the <c> PACKSSWB </c> instruction.
		115	///
		116	/// \param __m1
		117	/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
		118	/// 16-bit signed integer and is converted to an 8-bit signed integer with
		119	/// saturation. Positive values greater than 0x7F are saturated to 0x7F.
		120	/// Negative values less than 0x80 are saturated to 0x80. The converted
		121	/// [4 x i8] values are written to the lower 32 bits of the result.
		122	/// \param __m2
		123	/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
		124	/// 16-bit signed integer and is converted to an 8-bit signed integer with
		125	/// saturation. Positive values greater than 0x7F are saturated to 0x7F.
		126	/// Negative values less than 0x80 are saturated to 0x80. The converted
		127	/// [4 x i8] values are written to the upper 32 bits of the result.
		128	/// \returns A 64-bit integer vector of [8 x i8] containing the converted
		129	/// values.
		130	static __inline__ __m64 __DEFAULT_FN_ATTRS
		131	_mm_packs_pi16(__m64 __m1, __m64 __m2)
		132	{
		133	return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2);
		134	}
		135
		136	/// Converts 32-bit signed integers from both 64-bit integer vector
		137	/// parameters of [2 x i32] into 16-bit signed integer values, and constructs
		138	/// a 64-bit integer vector of [4 x i16] as the result. Positive values
		139	/// greater than 0x7FFF are saturated to 0x7FFF. Negative values less than
		140	/// 0x8000 are saturated to 0x8000.
		141	///
		142	/// \headerfile <x86intrin.h>
		143	///
		144	/// This intrinsic corresponds to the <c> PACKSSDW </c> instruction.
		145	///
		146	/// \param __m1
		147	/// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a
		148	/// 32-bit signed integer and is converted to a 16-bit signed integer with
		149	/// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF.
		150	/// Negative values less than 0x8000 are saturated to 0x8000. The converted
		151	/// [2 x i16] values are written to the lower 32 bits of the result.
		152	/// \param __m2
		153	/// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a
		154	/// 32-bit signed integer and is converted to a 16-bit signed integer with
		155	/// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF.
		156	/// Negative values less than 0x8000 are saturated to 0x8000. The converted
		157	/// [2 x i16] values are written to the upper 32 bits of the result.
		158	/// \returns A 64-bit integer vector of [4 x i16] containing the converted
		159	/// values.
		160	static __inline__ __m64 __DEFAULT_FN_ATTRS
		161	_mm_packs_pi32(__m64 __m1, __m64 __m2)
		162	{
		163	return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2);
		164	}
		165
		166	/// Converts 16-bit signed integers from both 64-bit integer vector
		167	/// parameters of [4 x i16] into 8-bit unsigned integer values, and
		168	/// constructs a 64-bit integer vector of [8 x i8] as the result. Values
		169	/// greater than 0xFF are saturated to 0xFF. Values less than 0 are saturated
		170	/// to 0.
		171	///
		172	/// \headerfile <x86intrin.h>
		173	///
		174	/// This intrinsic corresponds to the <c> PACKUSWB </c> instruction.
		175	///
		176	/// \param __m1
		177	/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
		178	/// 16-bit signed integer and is converted to an 8-bit unsigned integer with
		179	/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less
		180	/// than 0 are saturated to 0. The converted [4 x i8] values are written to
		181	/// the lower 32 bits of the result.
		182	/// \param __m2
		183	/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
		184	/// 16-bit signed integer and is converted to an 8-bit unsigned integer with
		185	/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less
		186	/// than 0 are saturated to 0. The converted [4 x i8] values are written to
		187	/// the upper 32 bits of the result.
		188	/// \returns A 64-bit integer vector of [8 x i8] containing the converted
		189	/// values.
		190	static __inline__ __m64 __DEFAULT_FN_ATTRS
		191	_mm_packs_pu16(__m64 __m1, __m64 __m2)
		192	{
		193	return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2);
		194	}
		195
		196	/// Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8]
		197	/// and interleaves them into a 64-bit integer vector of [8 x i8].
		198	///
		199	/// \headerfile <x86intrin.h>
		200	///
		201	/// This intrinsic corresponds to the <c> PUNPCKHBW </c> instruction.
		202	///
		203	/// \param __m1
		204	/// A 64-bit integer vector of [8 x i8]. \n
		205	/// Bits [39:32] are written to bits [7:0] of the result. \n
		206	/// Bits [47:40] are written to bits [23:16] of the result. \n
		207	/// Bits [55:48] are written to bits [39:32] of the result. \n
		208	/// Bits [63:56] are written to bits [55:48] of the result.
		209	/// \param __m2
		210	/// A 64-bit integer vector of [8 x i8].
		211	/// Bits [39:32] are written to bits [15:8] of the result. \n
		212	/// Bits [47:40] are written to bits [31:24] of the result. \n
		213	/// Bits [55:48] are written to bits [47:40] of the result. \n
		214	/// Bits [63:56] are written to bits [63:56] of the result.
		215	/// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
		216	/// values.
		217	static __inline__ __m64 __DEFAULT_FN_ATTRS
		218	_mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
		219	{
		220	return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2);
		221	}
		222
		223	/// Unpacks the upper 32 bits from two 64-bit integer vectors of
		224	/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
		225	///
		226	/// \headerfile <x86intrin.h>
		227	///
		228	/// This intrinsic corresponds to the <c> PUNPCKHWD </c> instruction.
		229	///
		230	/// \param __m1
		231	/// A 64-bit integer vector of [4 x i16].
		232	/// Bits [47:32] are written to bits [15:0] of the result. \n
		233	/// Bits [63:48] are written to bits [47:32] of the result.
		234	/// \param __m2
		235	/// A 64-bit integer vector of [4 x i16].
		236	/// Bits [47:32] are written to bits [31:16] of the result. \n
		237	/// Bits [63:48] are written to bits [63:48] of the result.
		238	/// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
		239	/// values.
		240	static __inline__ __m64 __DEFAULT_FN_ATTRS
		241	_mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
		242	{
		243	return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2);
		244	}
		245
		246	/// Unpacks the upper 32 bits from two 64-bit integer vectors of
		247	/// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].
		248	///
		249	/// \headerfile <x86intrin.h>
		250	///
		251	/// This intrinsic corresponds to the <c> PUNPCKHDQ </c> instruction.
		252	///
		253	/// \param __m1
		254	/// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to
		255	/// the lower 32 bits of the result.
		256	/// \param __m2
		257	/// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to
		258	/// the upper 32 bits of the result.
		259	/// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
		260	/// values.
		261	static __inline__ __m64 __DEFAULT_FN_ATTRS
		262	_mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
		263	{
		264	return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2);
		265	}
		266
		267	/// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8]
		268	/// and interleaves them into a 64-bit integer vector of [8 x i8].
		269	///
		270	/// \headerfile <x86intrin.h>
		271	///
		272	/// This intrinsic corresponds to the <c> PUNPCKLBW </c> instruction.
		273	///
		274	/// \param __m1
		275	/// A 64-bit integer vector of [8 x i8].
		276	/// Bits [7:0] are written to bits [7:0] of the result. \n
		277	/// Bits [15:8] are written to bits [23:16] of the result. \n
		278	/// Bits [23:16] are written to bits [39:32] of the result. \n
		279	/// Bits [31:24] are written to bits [55:48] of the result.
		280	/// \param __m2
		281	/// A 64-bit integer vector of [8 x i8].
		282	/// Bits [7:0] are written to bits [15:8] of the result. \n
		283	/// Bits [15:8] are written to bits [31:24] of the result. \n
		284	/// Bits [23:16] are written to bits [47:40] of the result. \n
		285	/// Bits [31:24] are written to bits [63:56] of the result.
		286	/// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
		287	/// values.
		288	static __inline__ __m64 __DEFAULT_FN_ATTRS
		289	_mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
		290	{
		291	return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2);
		292	}
		293
		294	/// Unpacks the lower 32 bits from two 64-bit integer vectors of
		295	/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
		296	///
		297	/// \headerfile <x86intrin.h>
		298	///
		299	/// This intrinsic corresponds to the <c> PUNPCKLWD </c> instruction.
		300	///
		301	/// \param __m1
		302	/// A 64-bit integer vector of [4 x i16].
		303	/// Bits [15:0] are written to bits [15:0] of the result. \n
		304	/// Bits [31:16] are written to bits [47:32] of the result.
		305	/// \param __m2
		306	/// A 64-bit integer vector of [4 x i16].
		307	/// Bits [15:0] are written to bits [31:16] of the result. \n
		308	/// Bits [31:16] are written to bits [63:48] of the result.
		309	/// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
		310	/// values.
		311	static __inline__ __m64 __DEFAULT_FN_ATTRS
		312	_mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
		313	{
		314	return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2);
		315	}
		316
		317	/// Unpacks the lower 32 bits from two 64-bit integer vectors of
		318	/// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].
		319	///
		320	/// \headerfile <x86intrin.h>
		321	///
		322	/// This intrinsic corresponds to the <c> PUNPCKLDQ </c> instruction.
		323	///
		324	/// \param __m1
		325	/// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to
		326	/// the lower 32 bits of the result.
		327	/// \param __m2
		328	/// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to
		329	/// the upper 32 bits of the result.
		330	/// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
		331	/// values.
		332	static __inline__ __m64 __DEFAULT_FN_ATTRS
		333	_mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
		334	{
		335	return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2);
		336	}
		337
		338	/// Adds each 8-bit integer element of the first 64-bit integer vector
		339	/// of [8 x i8] to the corresponding 8-bit integer element of the second
		340	/// 64-bit integer vector of [8 x i8]. The lower 8 bits of the results are
		341	/// packed into a 64-bit integer vector of [8 x i8].
		342	///
		343	/// \headerfile <x86intrin.h>
		344	///
		345	/// This intrinsic corresponds to the <c> PADDB </c> instruction.
		346	///
		347	/// \param __m1
		348	/// A 64-bit integer vector of [8 x i8].
		349	/// \param __m2
		350	/// A 64-bit integer vector of [8 x i8].
		351	/// \returns A 64-bit integer vector of [8 x i8] containing the sums of both
		352	/// parameters.
		353	static __inline__ __m64 __DEFAULT_FN_ATTRS
		354	_mm_add_pi8(__m64 __m1, __m64 __m2)
		355	{
		356	return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2);
		357	}
		358
		359	/// Adds each 16-bit integer element of the first 64-bit integer vector
		360	/// of [4 x i16] to the corresponding 16-bit integer element of the second
		361	/// 64-bit integer vector of [4 x i16]. The lower 16 bits of the results are
		362	/// packed into a 64-bit integer vector of [4 x i16].
		363	///
		364	/// \headerfile <x86intrin.h>
		365	///
		366	/// This intrinsic corresponds to the <c> PADDW </c> instruction.
		367	///
		368	/// \param __m1
		369	/// A 64-bit integer vector of [4 x i16].
		370	/// \param __m2
		371	/// A 64-bit integer vector of [4 x i16].
		372	/// \returns A 64-bit integer vector of [4 x i16] containing the sums of both
		373	/// parameters.
		374	static __inline__ __m64 __DEFAULT_FN_ATTRS
		375	_mm_add_pi16(__m64 __m1, __m64 __m2)
		376	{
		377	return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2);
		378	}
		379
		380	/// Adds each 32-bit integer element of the first 64-bit integer vector
		381	/// of [2 x i32] to the corresponding 32-bit integer element of the second
		382	/// 64-bit integer vector of [2 x i32]. The lower 32 bits of the results are
		383	/// packed into a 64-bit integer vector of [2 x i32].
		384	///
		385	/// \headerfile <x86intrin.h>
		386	///
		387	/// This intrinsic corresponds to the <c> PADDD </c> instruction.
		388	///
		389	/// \param __m1
		390	/// A 64-bit integer vector of [2 x i32].
		391	/// \param __m2
		392	/// A 64-bit integer vector of [2 x i32].
		393	/// \returns A 64-bit integer vector of [2 x i32] containing the sums of both
		394	/// parameters.
		395	static __inline__ __m64 __DEFAULT_FN_ATTRS
		396	_mm_add_pi32(__m64 __m1, __m64 __m2)
		397	{
		398	return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2);
		399	}
		400
		401	/// Adds each 8-bit signed integer element of the first 64-bit integer
		402	/// vector of [8 x i8] to the corresponding 8-bit signed integer element of
		403	/// the second 64-bit integer vector of [8 x i8]. Positive sums greater than
		404	/// 0x7F are saturated to 0x7F. Negative sums less than 0x80 are saturated to
		405	/// 0x80. The results are packed into a 64-bit integer vector of [8 x i8].
		406	///
		407	/// \headerfile <x86intrin.h>
		408	///
		409	/// This intrinsic corresponds to the <c> PADDSB </c> instruction.
		410	///
		411	/// \param __m1
		412	/// A 64-bit integer vector of [8 x i8].
		413	/// \param __m2
		414	/// A 64-bit integer vector of [8 x i8].
		415	/// \returns A 64-bit integer vector of [8 x i8] containing the saturated sums
		416	/// of both parameters.
		417	static __inline__ __m64 __DEFAULT_FN_ATTRS
		418	_mm_adds_pi8(__m64 __m1, __m64 __m2)
		419	{
		420	return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2);
		421	}
		422
		423	/// Adds each 16-bit signed integer element of the first 64-bit integer
		424	/// vector of [4 x i16] to the corresponding 16-bit signed integer element of
		425	/// the second 64-bit integer vector of [4 x i16]. Positive sums greater than
		426	/// 0x7FFF are saturated to 0x7FFF. Negative sums less than 0x8000 are
		427	/// saturated to 0x8000. The results are packed into a 64-bit integer vector
		428	/// of [4 x i16].
		429	///
		430	/// \headerfile <x86intrin.h>
		431	///
		432	/// This intrinsic corresponds to the <c> PADDSW </c> instruction.
		433	///
		434	/// \param __m1
		435	/// A 64-bit integer vector of [4 x i16].
		436	/// \param __m2
		437	/// A 64-bit integer vector of [4 x i16].
		438	/// \returns A 64-bit integer vector of [4 x i16] containing the saturated sums
		439	/// of both parameters.
		440	static __inline__ __m64 __DEFAULT_FN_ATTRS
		441	_mm_adds_pi16(__m64 __m1, __m64 __m2)
		442	{
		443	return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2);
		444	}
		445
		446	/// Adds each 8-bit unsigned integer element of the first 64-bit integer
		447	/// vector of [8 x i8] to the corresponding 8-bit unsigned integer element of
		448	/// the second 64-bit integer vector of [8 x i8]. Sums greater than 0xFF are
		449	/// saturated to 0xFF. The results are packed into a 64-bit integer vector of
		450	/// [8 x i8].
		451	///
		452	/// \headerfile <x86intrin.h>
		453	///
		454	/// This intrinsic corresponds to the <c> PADDUSB </c> instruction.
		455	///
		456	/// \param __m1
		457	/// A 64-bit integer vector of [8 x i8].
		458	/// \param __m2
		459	/// A 64-bit integer vector of [8 x i8].
		460	/// \returns A 64-bit integer vector of [8 x i8] containing the saturated
		461	/// unsigned sums of both parameters.
		462	static __inline__ __m64 __DEFAULT_FN_ATTRS
		463	_mm_adds_pu8(__m64 __m1, __m64 __m2)
		464	{
		465	return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2);
		466	}
		467
		468	/// Adds each 16-bit unsigned integer element of the first 64-bit integer
		469	/// vector of [4 x i16] to the corresponding 16-bit unsigned integer element
		470	/// of the second 64-bit integer vector of [4 x i16]. Sums greater than
		471	/// 0xFFFF are saturated to 0xFFFF. The results are packed into a 64-bit
		472	/// integer vector of [4 x i16].
		473	///
		474	/// \headerfile <x86intrin.h>
		475	///
		476	/// This intrinsic corresponds to the <c> PADDUSW </c> instruction.
		477	///
		478	/// \param __m1
		479	/// A 64-bit integer vector of [4 x i16].
		480	/// \param __m2
		481	/// A 64-bit integer vector of [4 x i16].
		482	/// \returns A 64-bit integer vector of [4 x i16] containing the saturated
		483	/// unsigned sums of both parameters.
		484	static __inline__ __m64 __DEFAULT_FN_ATTRS
		485	_mm_adds_pu16(__m64 __m1, __m64 __m2)
		486	{
		487	return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2);
		488	}
		489
		490	/// Subtracts each 8-bit integer element of the second 64-bit integer
		491	/// vector of [8 x i8] from the corresponding 8-bit integer element of the
		492	/// first 64-bit integer vector of [8 x i8]. The lower 8 bits of the results
		493	/// are packed into a 64-bit integer vector of [8 x i8].
		494	///
		495	/// \headerfile <x86intrin.h>
		496	///
		497	/// This intrinsic corresponds to the <c> PSUBB </c> instruction.
		498	///
		499	/// \param __m1
		500	/// A 64-bit integer vector of [8 x i8] containing the minuends.
		501	/// \param __m2
		502	/// A 64-bit integer vector of [8 x i8] containing the subtrahends.
		503	/// \returns A 64-bit integer vector of [8 x i8] containing the differences of
		504	/// both parameters.
		505	static __inline__ __m64 __DEFAULT_FN_ATTRS
		506	_mm_sub_pi8(__m64 __m1, __m64 __m2)
		507	{
		508	return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2);
		509	}
		510
		511	/// Subtracts each 16-bit integer element of the second 64-bit integer
		512	/// vector of [4 x i16] from the corresponding 16-bit integer element of the
		513	/// first 64-bit integer vector of [4 x i16]. The lower 16 bits of the
		514	/// results are packed into a 64-bit integer vector of [4 x i16].
		515	///
		516	/// \headerfile <x86intrin.h>
		517	///
		518	/// This intrinsic corresponds to the <c> PSUBW </c> instruction.
		519	///
		520	/// \param __m1
		521	/// A 64-bit integer vector of [4 x i16] containing the minuends.
		522	/// \param __m2
		523	/// A 64-bit integer vector of [4 x i16] containing the subtrahends.
		524	/// \returns A 64-bit integer vector of [4 x i16] containing the differences of
		525	/// both parameters.
		526	static __inline__ __m64 __DEFAULT_FN_ATTRS
		527	_mm_sub_pi16(__m64 __m1, __m64 __m2)
		528	{
		529	return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2);
		530	}
		531
		532	/// Subtracts each 32-bit integer element of the second 64-bit integer
		533	/// vector of [2 x i32] from the corresponding 32-bit integer element of the
		534	/// first 64-bit integer vector of [2 x i32]. The lower 32 bits of the
		535	/// results are packed into a 64-bit integer vector of [2 x i32].
		536	///
		537	/// \headerfile <x86intrin.h>
		538	///
		539	/// This intrinsic corresponds to the <c> PSUBD </c> instruction.
		540	///
		541	/// \param __m1
		542	/// A 64-bit integer vector of [2 x i32] containing the minuends.
		543	/// \param __m2
		544	/// A 64-bit integer vector of [2 x i32] containing the subtrahends.
		545	/// \returns A 64-bit integer vector of [2 x i32] containing the differences of
		546	/// both parameters.
		547	static __inline__ __m64 __DEFAULT_FN_ATTRS
		548	_mm_sub_pi32(__m64 __m1, __m64 __m2)
		549	{
		550	return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2);
		551	}
		552
		553	/// Subtracts each 8-bit signed integer element of the second 64-bit
		554	/// integer vector of [8 x i8] from the corresponding 8-bit signed integer
		555	/// element of the first 64-bit integer vector of [8 x i8]. Positive results
		556	/// greater than 0x7F are saturated to 0x7F. Negative results less than 0x80
		557	/// are saturated to 0x80. The results are packed into a 64-bit integer
		558	/// vector of [8 x i8].
		559	///
		560	/// \headerfile <x86intrin.h>
		561	///
		562	/// This intrinsic corresponds to the <c> PSUBSB </c> instruction.
		563	///
		564	/// \param __m1
		565	/// A 64-bit integer vector of [8 x i8] containing the minuends.
		566	/// \param __m2
		567	/// A 64-bit integer vector of [8 x i8] containing the subtrahends.
		568	/// \returns A 64-bit integer vector of [8 x i8] containing the saturated
		569	/// differences of both parameters.
		570	static __inline__ __m64 __DEFAULT_FN_ATTRS
		571	_mm_subs_pi8(__m64 __m1, __m64 __m2)
		572	{
		573	return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2);
		574	}
		575
		576	/// Subtracts each 16-bit signed integer element of the second 64-bit
		577	/// integer vector of [4 x i16] from the corresponding 16-bit signed integer
		578	/// element of the first 64-bit integer vector of [4 x i16]. Positive results
		579	/// greater than 0x7FFF are saturated to 0x7FFF. Negative results less than
		580	/// 0x8000 are saturated to 0x8000. The results are packed into a 64-bit
		581	/// integer vector of [4 x i16].
		582	///
		583	/// \headerfile <x86intrin.h>
		584	///
		585	/// This intrinsic corresponds to the <c> PSUBSW </c> instruction.
		586	///
		587	/// \param __m1
		588	/// A 64-bit integer vector of [4 x i16] containing the minuends.
		589	/// \param __m2
		590	/// A 64-bit integer vector of [4 x i16] containing the subtrahends.
		591	/// \returns A 64-bit integer vector of [4 x i16] containing the saturated
		592	/// differences of both parameters.
		593	static __inline__ __m64 __DEFAULT_FN_ATTRS
		594	_mm_subs_pi16(__m64 __m1, __m64 __m2)
		595	{
		596	return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2);
		597	}
		598
		599	/// Subtracts each 8-bit unsigned integer element of the second 64-bit
		600	/// integer vector of [8 x i8] from the corresponding 8-bit unsigned integer
		601	/// element of the first 64-bit integer vector of [8 x i8].
		602	///
		603	/// If an element of the first vector is less than the corresponding element
		604	/// of the second vector, the result is saturated to 0. The results are
		605	/// packed into a 64-bit integer vector of [8 x i8].
		606	///
		607	/// \headerfile <x86intrin.h>
		608	///
		609	/// This intrinsic corresponds to the <c> PSUBUSB </c> instruction.
		610	///
		611	/// \param __m1
		612	/// A 64-bit integer vector of [8 x i8] containing the minuends.
		613	/// \param __m2
		614	/// A 64-bit integer vector of [8 x i8] containing the subtrahends.
		615	/// \returns A 64-bit integer vector of [8 x i8] containing the saturated
		616	/// differences of both parameters.
		617	static __inline__ __m64 __DEFAULT_FN_ATTRS
		618	_mm_subs_pu8(__m64 __m1, __m64 __m2)
		619	{
		620	return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2);
		621	}
		622
		623	/// Subtracts each 16-bit unsigned integer element of the second 64-bit
		624	/// integer vector of [4 x i16] from the corresponding 16-bit unsigned
		625	/// integer element of the first 64-bit integer vector of [4 x i16].
		626	///
		627	/// If an element of the first vector is less than the corresponding element
		628	/// of the second vector, the result is saturated to 0. The results are
		629	/// packed into a 64-bit integer vector of [4 x i16].
		630	///
		631	/// \headerfile <x86intrin.h>
		632	///
		633	/// This intrinsic corresponds to the <c> PSUBUSW </c> instruction.
		634	///
		635	/// \param __m1
		636	/// A 64-bit integer vector of [4 x i16] containing the minuends.
		637	/// \param __m2
		638	/// A 64-bit integer vector of [4 x i16] containing the subtrahends.
		639	/// \returns A 64-bit integer vector of [4 x i16] containing the saturated
		640	/// differences of both parameters.
		641	static __inline__ __m64 __DEFAULT_FN_ATTRS
		642	_mm_subs_pu16(__m64 __m1, __m64 __m2)
		643	{
		644	return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2);
		645	}
		646
		647	/// Multiplies each 16-bit signed integer element of the first 64-bit
		648	/// integer vector of [4 x i16] by the corresponding 16-bit signed integer
		649	/// element of the second 64-bit integer vector of [4 x i16] and get four
		650	/// 32-bit products. Adds adjacent pairs of products to get two 32-bit sums.
		651	/// The lower 32 bits of these two sums are packed into a 64-bit integer
		652	/// vector of [2 x i32].
		653	///
		654	/// For example, bits [15:0] of both parameters are multiplied, bits [31:16]
		655	/// of both parameters are multiplied, and the sum of both results is written
		656	/// to bits [31:0] of the result.
		657	///
		658	/// \headerfile <x86intrin.h>
		659	///
		660	/// This intrinsic corresponds to the <c> PMADDWD </c> instruction.
		661	///
		662	/// \param __m1
		663	/// A 64-bit integer vector of [4 x i16].
		664	/// \param __m2
		665	/// A 64-bit integer vector of [4 x i16].
		666	/// \returns A 64-bit integer vector of [2 x i32] containing the sums of
		667	/// products of both parameters.
		668	static __inline__ __m64 __DEFAULT_FN_ATTRS
		669	_mm_madd_pi16(__m64 __m1, __m64 __m2)
		670	{
		671	return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2);
		672	}
		673
		674	/// Multiplies each 16-bit signed integer element of the first 64-bit
		675	/// integer vector of [4 x i16] by the corresponding 16-bit signed integer
		676	/// element of the second 64-bit integer vector of [4 x i16]. Packs the upper
		677	/// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].
		678	///
		679	/// \headerfile <x86intrin.h>
		680	///
		681	/// This intrinsic corresponds to the <c> PMULHW </c> instruction.
		682	///
		683	/// \param __m1
		684	/// A 64-bit integer vector of [4 x i16].
		685	/// \param __m2
		686	/// A 64-bit integer vector of [4 x i16].
		687	/// \returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits
		688	/// of the products of both parameters.
		689	static __inline__ __m64 __DEFAULT_FN_ATTRS
		690	_mm_mulhi_pi16(__m64 __m1, __m64 __m2)
		691	{
		692	return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2);
		693	}
		694
		695	/// Multiplies each 16-bit signed integer element of the first 64-bit
		696	/// integer vector of [4 x i16] by the corresponding 16-bit signed integer
		697	/// element of the second 64-bit integer vector of [4 x i16]. Packs the lower
		698	/// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].
		699	///
		700	/// \headerfile <x86intrin.h>
		701	///
		702	/// This intrinsic corresponds to the <c> PMULLW </c> instruction.
		703	///
		704	/// \param __m1
		705	/// A 64-bit integer vector of [4 x i16].
		706	/// \param __m2
		707	/// A 64-bit integer vector of [4 x i16].
		708	/// \returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits
		709	/// of the products of both parameters.
		710	static __inline__ __m64 __DEFAULT_FN_ATTRS
		711	_mm_mullo_pi16(__m64 __m1, __m64 __m2)
		712	{
		713	return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2);
		714	}
		715
		716	/// Left-shifts each 16-bit signed integer element of the first
		717	/// parameter, which is a 64-bit integer vector of [4 x i16], by the number
		718	/// of bits specified by the second parameter, which is a 64-bit integer. The
		719	/// lower 16 bits of the results are packed into a 64-bit integer vector of
		720	/// [4 x i16].
		721	///
		722	/// \headerfile <x86intrin.h>
		723	///
		724	/// This intrinsic corresponds to the <c> PSLLW </c> instruction.
		725	///
		726	/// \param __m
		727	/// A 64-bit integer vector of [4 x i16].
		728	/// \param __count
		729	/// A 64-bit integer vector interpreted as a single 64-bit integer.
		730	/// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted
		731	/// values. If \a __count is greater or equal to 16, the result is set to all
		732	/// 0.
		733	static __inline__ __m64 __DEFAULT_FN_ATTRS
		734	_mm_sll_pi16(__m64 __m, __m64 __count)
		735	{
		736	return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count);
		737	}
		738
		739	/// Left-shifts each 16-bit signed integer element of a 64-bit integer
		740	/// vector of [4 x i16] by the number of bits specified by a 32-bit integer.
		741	/// The lower 16 bits of the results are packed into a 64-bit integer vector
		742	/// of [4 x i16].
		743	///
		744	/// \headerfile <x86intrin.h>
		745	///
		746	/// This intrinsic corresponds to the <c> PSLLW </c> instruction.
		747	///
		748	/// \param __m
		749	/// A 64-bit integer vector of [4 x i16].
		750	/// \param __count
		751	/// A 32-bit integer value.
		752	/// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted
		753	/// values. If \a __count is greater or equal to 16, the result is set to all
		754	/// 0.
		755	static __inline__ __m64 __DEFAULT_FN_ATTRS
		756	_mm_slli_pi16(__m64 __m, int __count)
		757	{
		758	return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count);
		759	}
		760
		761	/// Left-shifts each 32-bit signed integer element of the first
		762	/// parameter, which is a 64-bit integer vector of [2 x i32], by the number
		763	/// of bits specified by the second parameter, which is a 64-bit integer. The
		764	/// lower 32 bits of the results are packed into a 64-bit integer vector of
		765	/// [2 x i32].
		766	///
		767	/// \headerfile <x86intrin.h>
		768	///
		769	/// This intrinsic corresponds to the <c> PSLLD </c> instruction.
		770	///
		771	/// \param __m
		772	/// A 64-bit integer vector of [2 x i32].
		773	/// \param __count
		774	/// A 64-bit integer vector interpreted as a single 64-bit integer.
		775	/// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted
		776	/// values. If \a __count is greater or equal to 32, the result is set to all
		777	/// 0.
		778	static __inline__ __m64 __DEFAULT_FN_ATTRS
		779	_mm_sll_pi32(__m64 __m, __m64 __count)
		780	{
		781	return (__m64)__builtin_ia32_pslld((__v2si)__m, __count);
		782	}
		783
		784	/// Left-shifts each 32-bit signed integer element of a 64-bit integer
		785	/// vector of [2 x i32] by the number of bits specified by a 32-bit integer.
		786	/// The lower 32 bits of the results are packed into a 64-bit integer vector
		787	/// of [2 x i32].
		788	///
		789	/// \headerfile <x86intrin.h>
		790	///
		791	/// This intrinsic corresponds to the <c> PSLLD </c> instruction.
		792	///
		793	/// \param __m
		794	/// A 64-bit integer vector of [2 x i32].
		795	/// \param __count
		796	/// A 32-bit integer value.
		797	/// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted
		798	/// values. If \a __count is greater or equal to 32, the result is set to all
		799	/// 0.
		800	static __inline__ __m64 __DEFAULT_FN_ATTRS
		801	_mm_slli_pi32(__m64 __m, int __count)
		802	{
		803	return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count);
		804	}
		805
		806	/// Left-shifts the first 64-bit integer parameter by the number of bits
		807	/// specified by the second 64-bit integer parameter. The lower 64 bits of
		808	/// result are returned.
		809	///
		810	/// \headerfile <x86intrin.h>
		811	///
		812	/// This intrinsic corresponds to the <c> PSLLQ </c> instruction.
		813	///
		814	/// \param __m
		815	/// A 64-bit integer vector interpreted as a single 64-bit integer.
		816	/// \param __count
		817	/// A 64-bit integer vector interpreted as a single 64-bit integer.
		818	/// \returns A 64-bit integer vector containing the left-shifted value. If
		819	/// \a __count is greater or equal to 64, the result is set to 0.
		820	static __inline__ __m64 __DEFAULT_FN_ATTRS
		821	_mm_sll_si64(__m64 __m, __m64 __count)
		822	{
		823	return (__m64)__builtin_ia32_psllq((__v1di)__m, __count);
		824	}
		825
		826	/// Left-shifts the first parameter, which is a 64-bit integer, by the
		827	/// number of bits specified by the second parameter, which is a 32-bit
		828	/// integer. The lower 64 bits of result are returned.
		829	///
		830	/// \headerfile <x86intrin.h>
		831	///
		832	/// This intrinsic corresponds to the <c> PSLLQ </c> instruction.
		833	///
		834	/// \param __m
		835	/// A 64-bit integer vector interpreted as a single 64-bit integer.
		836	/// \param __count
		837	/// A 32-bit integer value.
		838	/// \returns A 64-bit integer vector containing the left-shifted value. If
		839	/// \a __count is greater or equal to 64, the result is set to 0.
		840	static __inline__ __m64 __DEFAULT_FN_ATTRS
		841	_mm_slli_si64(__m64 __m, int __count)
		842	{
		843	return (__m64)__builtin_ia32_psllqi((__v1di)__m, __count);
		844	}
		845
		846	/// Right-shifts each 16-bit integer element of the first parameter,
		847	/// which is a 64-bit integer vector of [4 x i16], by the number of bits
		848	/// specified by the second parameter, which is a 64-bit integer.
		849	///
		850	/// High-order bits are filled with the sign bit of the initial value of each
		851	/// 16-bit element. The 16-bit results are packed into a 64-bit integer
		852	/// vector of [4 x i16].
		853	///
		854	/// \headerfile <x86intrin.h>
		855	///
		856	/// This intrinsic corresponds to the <c> PSRAW </c> instruction.
		857	///
		858	/// \param __m
		859	/// A 64-bit integer vector of [4 x i16].
		860	/// \param __count
		861	/// A 64-bit integer vector interpreted as a single 64-bit integer.
		862	/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
		863	/// values.
		864	static __inline__ __m64 __DEFAULT_FN_ATTRS
		865	_mm_sra_pi16(__m64 __m, __m64 __count)
		866	{
		867	return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count);
		868	}
		869
		870	/// Right-shifts each 16-bit integer element of a 64-bit integer vector
		871	/// of [4 x i16] by the number of bits specified by a 32-bit integer.
		872	///
		873	/// High-order bits are filled with the sign bit of the initial value of each
		874	/// 16-bit element. The 16-bit results are packed into a 64-bit integer
		875	/// vector of [4 x i16].
		876	///
		877	/// \headerfile <x86intrin.h>
		878	///
		879	/// This intrinsic corresponds to the <c> PSRAW </c> instruction.
		880	///
		881	/// \param __m
		882	/// A 64-bit integer vector of [4 x i16].
		883	/// \param __count
		884	/// A 32-bit integer value.
		885	/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
		886	/// values.
		887	static __inline__ __m64 __DEFAULT_FN_ATTRS
		888	_mm_srai_pi16(__m64 __m, int __count)
		889	{
		890	return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count);
		891	}
		892
		893	/// Right-shifts each 32-bit integer element of the first parameter,
		894	/// which is a 64-bit integer vector of [2 x i32], by the number of bits
		895	/// specified by the second parameter, which is a 64-bit integer.
		896	///
		897	/// High-order bits are filled with the sign bit of the initial value of each
		898	/// 32-bit element. The 32-bit results are packed into a 64-bit integer
		899	/// vector of [2 x i32].
		900	///
		901	/// \headerfile <x86intrin.h>
		902	///
		903	/// This intrinsic corresponds to the <c> PSRAD </c> instruction.
		904	///
		905	/// \param __m
		906	/// A 64-bit integer vector of [2 x i32].
		907	/// \param __count
		908	/// A 64-bit integer vector interpreted as a single 64-bit integer.
		909	/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
		910	/// values.
		911	static __inline__ __m64 __DEFAULT_FN_ATTRS
		912	_mm_sra_pi32(__m64 __m, __m64 __count)
		913	{
		914	return (__m64)__builtin_ia32_psrad((__v2si)__m, __count);
		915	}
		916
		917	/// Right-shifts each 32-bit integer element of a 64-bit integer vector
		918	/// of [2 x i32] by the number of bits specified by a 32-bit integer.
		919	///
		920	/// High-order bits are filled with the sign bit of the initial value of each
		921	/// 32-bit element. The 32-bit results are packed into a 64-bit integer
		922	/// vector of [2 x i32].
		923	///
		924	/// \headerfile <x86intrin.h>
		925	///
		926	/// This intrinsic corresponds to the <c> PSRAD </c> instruction.
		927	///
		928	/// \param __m
		929	/// A 64-bit integer vector of [2 x i32].
		930	/// \param __count
		931	/// A 32-bit integer value.
		932	/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
		933	/// values.
		934	static __inline__ __m64 __DEFAULT_FN_ATTRS
		935	_mm_srai_pi32(__m64 __m, int __count)
		936	{
		937	return (__m64)__builtin_ia32_psradi((__v2si)__m, __count);
		938	}
		939
		940	/// Right-shifts each 16-bit integer element of the first parameter,
		941	/// which is a 64-bit integer vector of [4 x i16], by the number of bits
		942	/// specified by the second parameter, which is a 64-bit integer.
		943	///
		944	/// High-order bits are cleared. The 16-bit results are packed into a 64-bit
		945	/// integer vector of [4 x i16].
		946	///
		947	/// \headerfile <x86intrin.h>
		948	///
		949	/// This intrinsic corresponds to the <c> PSRLW </c> instruction.
		950	///
		951	/// \param __m
		952	/// A 64-bit integer vector of [4 x i16].
		953	/// \param __count
		954	/// A 64-bit integer vector interpreted as a single 64-bit integer.
		955	/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
		956	/// values.
		957	static __inline__ __m64 __DEFAULT_FN_ATTRS
		958	_mm_srl_pi16(__m64 __m, __m64 __count)
		959	{
		960	return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count);
		961	}
		962
		963	/// Right-shifts each 16-bit integer element of a 64-bit integer vector
		964	/// of [4 x i16] by the number of bits specified by a 32-bit integer.
		965	///
		966	/// High-order bits are cleared. The 16-bit results are packed into a 64-bit
		967	/// integer vector of [4 x i16].
		968	///
		969	/// \headerfile <x86intrin.h>
		970	///
		971	/// This intrinsic corresponds to the <c> PSRLW </c> instruction.
		972	///
		973	/// \param __m
		974	/// A 64-bit integer vector of [4 x i16].
		975	/// \param __count
		976	/// A 32-bit integer value.
		977	/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
		978	/// values.
		979	static __inline__ __m64 __DEFAULT_FN_ATTRS
		980	_mm_srli_pi16(__m64 __m, int __count)
		981	{
		982	return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count);
		983	}
		984
		985	/// Right-shifts each 32-bit integer element of the first parameter,
		986	/// which is a 64-bit integer vector of [2 x i32], by the number of bits
		987	/// specified by the second parameter, which is a 64-bit integer.
		988	///
		989	/// High-order bits are cleared. The 32-bit results are packed into a 64-bit
		990	/// integer vector of [2 x i32].
		991	///
		992	/// \headerfile <x86intrin.h>
		993	///
		994	/// This intrinsic corresponds to the <c> PSRLD </c> instruction.
		995	///
		996	/// \param __m
		997	/// A 64-bit integer vector of [2 x i32].
		998	/// \param __count
		999	/// A 64-bit integer vector interpreted as a single 64-bit integer.
		1000	/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
		1001	/// values.
		1002	static __inline__ __m64 __DEFAULT_FN_ATTRS
		1003	_mm_srl_pi32(__m64 __m, __m64 __count)
		1004	{
		1005	return (__m64)__builtin_ia32_psrld((__v2si)__m, __count);
		1006	}
		1007
		1008	/// Right-shifts each 32-bit integer element of a 64-bit integer vector
		1009	/// of [2 x i32] by the number of bits specified by a 32-bit integer.
		1010	///
		1011	/// High-order bits are cleared. The 32-bit results are packed into a 64-bit
		1012	/// integer vector of [2 x i32].
		1013	///
		1014	/// \headerfile <x86intrin.h>
		1015	///
		1016	/// This intrinsic corresponds to the <c> PSRLD </c> instruction.
		1017	///
		1018	/// \param __m
		1019	/// A 64-bit integer vector of [2 x i32].
		1020	/// \param __count
		1021	/// A 32-bit integer value.
		1022	/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
		1023	/// values.
		1024	static __inline__ __m64 __DEFAULT_FN_ATTRS
		1025	_mm_srli_pi32(__m64 __m, int __count)
		1026	{
		1027	return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count);
		1028	}
		1029
		1030	/// Right-shifts the first 64-bit integer parameter by the number of bits
		1031	/// specified by the second 64-bit integer parameter.
		1032	///
		1033	/// High-order bits are cleared.
		1034	///
		1035	/// \headerfile <x86intrin.h>
		1036	///
		1037	/// This intrinsic corresponds to the <c> PSRLQ </c> instruction.
		1038	///
		1039	/// \param __m
		1040	/// A 64-bit integer vector interpreted as a single 64-bit integer.
		1041	/// \param __count
		1042	/// A 64-bit integer vector interpreted as a single 64-bit integer.
		1043	/// \returns A 64-bit integer vector containing the right-shifted value.
		1044	static __inline__ __m64 __DEFAULT_FN_ATTRS
		1045	_mm_srl_si64(__m64 __m, __m64 __count)
		1046	{
		1047	return (__m64)__builtin_ia32_psrlq((__v1di)__m, __count);
		1048	}
		1049
		1050	/// Right-shifts the first parameter, which is a 64-bit integer, by the
		1051	/// number of bits specified by the second parameter, which is a 32-bit
		1052	/// integer.
		1053	///
		1054	/// High-order bits are cleared.
		1055	///
		1056	/// \headerfile <x86intrin.h>
		1057	///
		1058	/// This intrinsic corresponds to the <c> PSRLQ </c> instruction.
		1059	///
		1060	/// \param __m
		1061	/// A 64-bit integer vector interpreted as a single 64-bit integer.
		1062	/// \param __count
		1063	/// A 32-bit integer value.
		1064	/// \returns A 64-bit integer vector containing the right-shifted value.
		1065	static __inline__ __m64 __DEFAULT_FN_ATTRS
		1066	_mm_srli_si64(__m64 __m, int __count)
		1067	{
		1068	return (__m64)__builtin_ia32_psrlqi((__v1di)__m, __count);
		1069	}
		1070
		1071	/// Performs a bitwise AND of two 64-bit integer vectors.
		1072	///
		1073	/// \headerfile <x86intrin.h>
		1074	///
		1075	/// This intrinsic corresponds to the <c> PAND </c> instruction.
		1076	///
		1077	/// \param __m1
		1078	/// A 64-bit integer vector.
		1079	/// \param __m2
		1080	/// A 64-bit integer vector.
		1081	/// \returns A 64-bit integer vector containing the bitwise AND of both
		1082	/// parameters.
		1083	static __inline__ __m64 __DEFAULT_FN_ATTRS
		1084	_mm_and_si64(__m64 __m1, __m64 __m2)
		1085	{
		1086	return __builtin_ia32_pand((__v1di)__m1, (__v1di)__m2);
		1087	}
		1088
		1089	/// Performs a bitwise NOT of the first 64-bit integer vector, and then
		1090	/// performs a bitwise AND of the intermediate result and the second 64-bit
		1091	/// integer vector.
		1092	///
		1093	/// \headerfile <x86intrin.h>
		1094	///
		1095	/// This intrinsic corresponds to the <c> PANDN </c> instruction.
		1096	///
		1097	/// \param __m1
		1098	/// A 64-bit integer vector. The one's complement of this parameter is used
		1099	/// in the bitwise AND.
		1100	/// \param __m2
		1101	/// A 64-bit integer vector.
		1102	/// \returns A 64-bit integer vector containing the bitwise AND of the second
		1103	/// parameter and the one's complement of the first parameter.
		1104	static __inline__ __m64 __DEFAULT_FN_ATTRS
		1105	_mm_andnot_si64(__m64 __m1, __m64 __m2)
		1106	{
		1107	return __builtin_ia32_pandn((__v1di)__m1, (__v1di)__m2);
		1108	}
		1109
		1110	/// Performs a bitwise OR of two 64-bit integer vectors.
		1111	///
		1112	/// \headerfile <x86intrin.h>
		1113	///
		1114	/// This intrinsic corresponds to the <c> POR </c> instruction.
		1115	///
		1116	/// \param __m1
		1117	/// A 64-bit integer vector.
		1118	/// \param __m2
		1119	/// A 64-bit integer vector.
		1120	/// \returns A 64-bit integer vector containing the bitwise OR of both
		1121	/// parameters.
		1122	static __inline__ __m64 __DEFAULT_FN_ATTRS
		1123	_mm_or_si64(__m64 __m1, __m64 __m2)
		1124	{
		1125	return __builtin_ia32_por((__v1di)__m1, (__v1di)__m2);
		1126	}
		1127
		1128	/// Performs a bitwise exclusive OR of two 64-bit integer vectors.
		1129	///
		1130	/// \headerfile <x86intrin.h>
		1131	///
		1132	/// This intrinsic corresponds to the <c> PXOR </c> instruction.
		1133	///
		1134	/// \param __m1
		1135	/// A 64-bit integer vector.
		1136	/// \param __m2
		1137	/// A 64-bit integer vector.
		1138	/// \returns A 64-bit integer vector containing the bitwise exclusive OR of both
		1139	/// parameters.
		1140	static __inline__ __m64 __DEFAULT_FN_ATTRS
		1141	_mm_xor_si64(__m64 __m1, __m64 __m2)
		1142	{
		1143	return __builtin_ia32_pxor((__v1di)__m1, (__v1di)__m2);
		1144	}
		1145
		1146	/// Compares the 8-bit integer elements of two 64-bit integer vectors of
		1147	/// [8 x i8] to determine if the element of the first vector is equal to the
		1148	/// corresponding element of the second vector.
		1149	///
		1150	/// The comparison yields 0 for false, 0xFF for true.
		1151	///
		1152	/// \headerfile <x86intrin.h>
		1153	///
		1154	/// This intrinsic corresponds to the <c> PCMPEQB </c> instruction.
		1155	///
		1156	/// \param __m1
		1157	/// A 64-bit integer vector of [8 x i8].
		1158	/// \param __m2
		1159	/// A 64-bit integer vector of [8 x i8].
		1160	/// \returns A 64-bit integer vector of [8 x i8] containing the comparison
		1161	/// results.
		1162	static __inline__ __m64 __DEFAULT_FN_ATTRS
		1163	_mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
		1164	{
		1165	return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2);
		1166	}
		1167
		1168	/// Compares the 16-bit integer elements of two 64-bit integer vectors of
		1169	/// [4 x i16] to determine if the element of the first vector is equal to the
		1170	/// corresponding element of the second vector.
		1171	///
		1172	/// The comparison yields 0 for false, 0xFFFF for true.
		1173	///
		1174	/// \headerfile <x86intrin.h>
		1175	///
		1176	/// This intrinsic corresponds to the <c> PCMPEQW </c> instruction.
		1177	///
		1178	/// \param __m1
		1179	/// A 64-bit integer vector of [4 x i16].
		1180	/// \param __m2
		1181	/// A 64-bit integer vector of [4 x i16].
		1182	/// \returns A 64-bit integer vector of [4 x i16] containing the comparison
		1183	/// results.
		1184	static __inline__ __m64 __DEFAULT_FN_ATTRS
		1185	_mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
		1186	{
		1187	return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2);
		1188	}
		1189
		1190	/// Compares the 32-bit integer elements of two 64-bit integer vectors of
		1191	/// [2 x i32] to determine if the element of the first vector is equal to the
		1192	/// corresponding element of the second vector.
		1193	///
		1194	/// The comparison yields 0 for false, 0xFFFFFFFF for true.
		1195	///
		1196	/// \headerfile <x86intrin.h>
		1197	///
		1198	/// This intrinsic corresponds to the <c> PCMPEQD </c> instruction.
		1199	///
		1200	/// \param __m1
		1201	/// A 64-bit integer vector of [2 x i32].
		1202	/// \param __m2
		1203	/// A 64-bit integer vector of [2 x i32].
		1204	/// \returns A 64-bit integer vector of [2 x i32] containing the comparison
		1205	/// results.
		1206	static __inline__ __m64 __DEFAULT_FN_ATTRS
		1207	_mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
		1208	{
		1209	return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2);
		1210	}
		1211
		1212	/// Compares the 8-bit integer elements of two 64-bit integer vectors of
		1213	/// [8 x i8] to determine if the element of the first vector is greater than
		1214	/// the corresponding element of the second vector.
		1215	///
		1216	/// The comparison yields 0 for false, 0xFF for true.
		1217	///
		1218	/// \headerfile <x86intrin.h>
		1219	///
		1220	/// This intrinsic corresponds to the <c> PCMPGTB </c> instruction.
		1221	///
		1222	/// \param __m1
		1223	/// A 64-bit integer vector of [8 x i8].
		1224	/// \param __m2
		1225	/// A 64-bit integer vector of [8 x i8].
		1226	/// \returns A 64-bit integer vector of [8 x i8] containing the comparison
		1227	/// results.
		1228	static __inline__ __m64 __DEFAULT_FN_ATTRS
		1229	_mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
		1230	{
		1231	return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2);
		1232	}
		1233
		1234	/// Compares the 16-bit integer elements of two 64-bit integer vectors of
		1235	/// [4 x i16] to determine if the element of the first vector is greater than
		1236	/// the corresponding element of the second vector.
		1237	///
		1238	/// The comparison yields 0 for false, 0xFFFF for true.
		1239	///
		1240	/// \headerfile <x86intrin.h>
		1241	///
		1242	/// This intrinsic corresponds to the <c> PCMPGTW </c> instruction.
		1243	///
		1244	/// \param __m1
		1245	/// A 64-bit integer vector of [4 x i16].
		1246	/// \param __m2
		1247	/// A 64-bit integer vector of [4 x i16].
		1248	/// \returns A 64-bit integer vector of [4 x i16] containing the comparison
		1249	/// results.
		1250	static __inline__ __m64 __DEFAULT_FN_ATTRS
		1251	_mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
		1252	{
		1253	return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2);
		1254	}
		1255
		1256	/// Compares the 32-bit integer elements of two 64-bit integer vectors of
		1257	/// [2 x i32] to determine if the element of the first vector is greater than
		1258	/// the corresponding element of the second vector.
		1259	///
		1260	/// The comparison yields 0 for false, 0xFFFFFFFF for true.
		1261	///
		1262	/// \headerfile <x86intrin.h>
		1263	///
		1264	/// This intrinsic corresponds to the <c> PCMPGTD </c> instruction.
		1265	///
		1266	/// \param __m1
		1267	/// A 64-bit integer vector of [2 x i32].
		1268	/// \param __m2
		1269	/// A 64-bit integer vector of [2 x i32].
		1270	/// \returns A 64-bit integer vector of [2 x i32] containing the comparison
		1271	/// results.
		1272	static __inline__ __m64 __DEFAULT_FN_ATTRS
		1273	_mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
		1274	{
		1275	return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2);
		1276	}
		1277
		1278	/// Constructs a 64-bit integer vector initialized to zero.
		1279	///
		1280	/// \headerfile <x86intrin.h>
		1281	///
		1282	/// This intrinsic corresponds to the <c> PXOR </c> instruction.
		1283	///
		1284	/// \returns An initialized 64-bit integer vector with all elements set to zero.
		1285	static __inline__ __m64 __DEFAULT_FN_ATTRS
		1286	_mm_setzero_si64(void)
		1287	{
		1288	return __extension__ (__m64){ 0LL };
		1289	}
		1290
		1291	/// Constructs a 64-bit integer vector initialized with the specified
		1292	/// 32-bit integer values.
		1293	///
		1294	/// \headerfile <x86intrin.h>
		1295	///
		1296	/// This intrinsic is a utility function and does not correspond to a specific
		1297	/// instruction.
		1298	///
		1299	/// \param __i1
		1300	/// A 32-bit integer value used to initialize the upper 32 bits of the
		1301	/// result.
		1302	/// \param __i0
		1303	/// A 32-bit integer value used to initialize the lower 32 bits of the
		1304	/// result.
		1305	/// \returns An initialized 64-bit integer vector.
		1306	static __inline__ __m64 __DEFAULT_FN_ATTRS
		1307	_mm_set_pi32(int __i1, int __i0)
		1308	{
		1309	return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1);
		1310	}
		1311
		1312	/// Constructs a 64-bit integer vector initialized with the specified
		1313	/// 16-bit integer values.
		1314	///
		1315	/// \headerfile <x86intrin.h>
		1316	///
		1317	/// This intrinsic is a utility function and does not correspond to a specific
		1318	/// instruction.
		1319	///
		1320	/// \param __s3
		1321	/// A 16-bit integer value used to initialize bits [63:48] of the result.
		1322	/// \param __s2
		1323	/// A 16-bit integer value used to initialize bits [47:32] of the result.
		1324	/// \param __s1
		1325	/// A 16-bit integer value used to initialize bits [31:16] of the result.
		1326	/// \param __s0
		1327	/// A 16-bit integer value used to initialize bits [15:0] of the result.
		1328	/// \returns An initialized 64-bit integer vector.
		1329	static __inline__ __m64 __DEFAULT_FN_ATTRS
		1330	_mm_set_pi16(short __s3, short __s2, short __s1, short __s0)
		1331	{
		1332	return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3);
		1333	}
		1334
		1335	/// Constructs a 64-bit integer vector initialized with the specified
		1336	/// 8-bit integer values.
		1337	///
		1338	/// \headerfile <x86intrin.h>
		1339	///
		1340	/// This intrinsic is a utility function and does not correspond to a specific
		1341	/// instruction.
		1342	///
		1343	/// \param __b7
		1344	/// An 8-bit integer value used to initialize bits [63:56] of the result.
		1345	/// \param __b6
		1346	/// An 8-bit integer value used to initialize bits [55:48] of the result.
		1347	/// \param __b5
		1348	/// An 8-bit integer value used to initialize bits [47:40] of the result.
		1349	/// \param __b4
		1350	/// An 8-bit integer value used to initialize bits [39:32] of the result.
		1351	/// \param __b3
		1352	/// An 8-bit integer value used to initialize bits [31:24] of the result.
		1353	/// \param __b2
		1354	/// An 8-bit integer value used to initialize bits [23:16] of the result.
		1355	/// \param __b1
		1356	/// An 8-bit integer value used to initialize bits [15:8] of the result.
		1357	/// \param __b0
		1358	/// An 8-bit integer value used to initialize bits [7:0] of the result.
		1359	/// \returns An initialized 64-bit integer vector.
		1360	static __inline__ __m64 __DEFAULT_FN_ATTRS
		1361	_mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,
		1362	char __b1, char __b0)
		1363	{
		1364	return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3,
		1365	__b4, __b5, __b6, __b7);
		1366	}
		1367
		1368	/// Constructs a 64-bit integer vector of [2 x i32], with each of the
		1369	/// 32-bit integer vector elements set to the specified 32-bit integer
		1370	/// value.
		1371	///
		1372	/// \headerfile <x86intrin.h>
		1373	///
		1374	/// This intrinsic is a utility function and does not correspond to a specific
		1375	/// instruction.
		1376	///
		1377	/// \param __i
		1378	/// A 32-bit integer value used to initialize each vector element of the
		1379	/// result.
		1380	/// \returns An initialized 64-bit integer vector of [2 x i32].
		1381	static __inline__ __m64 __DEFAULT_FN_ATTRS
		1382	_mm_set1_pi32(int __i)
		1383	{
		1384	return _mm_set_pi32(__i, __i);
		1385	}
		1386
		1387	/// Constructs a 64-bit integer vector of [4 x i16], with each of the
		1388	/// 16-bit integer vector elements set to the specified 16-bit integer
		1389	/// value.
		1390	///
		1391	/// \headerfile <x86intrin.h>
		1392	///
		1393	/// This intrinsic is a utility function and does not correspond to a specific
		1394	/// instruction.
		1395	///
		1396	/// \param __w
		1397	/// A 16-bit integer value used to initialize each vector element of the
		1398	/// result.
		1399	/// \returns An initialized 64-bit integer vector of [4 x i16].
		1400	static __inline__ __m64 __DEFAULT_FN_ATTRS
		1401	_mm_set1_pi16(short __w)
		1402	{
		1403	return _mm_set_pi16(__w, __w, __w, __w);
		1404	}
		1405
		1406	/// Constructs a 64-bit integer vector of [8 x i8], with each of the
		1407	/// 8-bit integer vector elements set to the specified 8-bit integer value.
		1408	///
		1409	/// \headerfile <x86intrin.h>
		1410	///
		1411	/// This intrinsic is a utility function and does not correspond to a specific
		1412	/// instruction.
		1413	///
		1414	/// \param __b
		1415	/// An 8-bit integer value used to initialize each vector element of the
		1416	/// result.
		1417	/// \returns An initialized 64-bit integer vector of [8 x i8].
		1418	static __inline__ __m64 __DEFAULT_FN_ATTRS
		1419	_mm_set1_pi8(char __b)
		1420	{
		1421	return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b);
		1422	}
		1423
		1424	/// Constructs a 64-bit integer vector, initialized in reverse order with
		1425	/// the specified 32-bit integer values.
		1426	///
		1427	/// \headerfile <x86intrin.h>
		1428	///
		1429	/// This intrinsic is a utility function and does not correspond to a specific
		1430	/// instruction.
		1431	///
		1432	/// \param __i0
		1433	/// A 32-bit integer value used to initialize the lower 32 bits of the
		1434	/// result.
		1435	/// \param __i1
		1436	/// A 32-bit integer value used to initialize the upper 32 bits of the
		1437	/// result.
		1438	/// \returns An initialized 64-bit integer vector.
		1439	static __inline__ __m64 __DEFAULT_FN_ATTRS
		1440	_mm_setr_pi32(int __i0, int __i1)
		1441	{
		1442	return _mm_set_pi32(__i1, __i0);
		1443	}
		1444
		1445	/// Constructs a 64-bit integer vector, initialized in reverse order with
		1446	/// the specified 16-bit integer values.
		1447	///
		1448	/// \headerfile <x86intrin.h>
		1449	///
		1450	/// This intrinsic is a utility function and does not correspond to a specific
		1451	/// instruction.
		1452	///
		1453	/// \param __w0
		1454	/// A 16-bit integer value used to initialize bits [15:0] of the result.
		1455	/// \param __w1
		1456	/// A 16-bit integer value used to initialize bits [31:16] of the result.
		1457	/// \param __w2
		1458	/// A 16-bit integer value used to initialize bits [47:32] of the result.
		1459	/// \param __w3
		1460	/// A 16-bit integer value used to initialize bits [63:48] of the result.
		1461	/// \returns An initialized 64-bit integer vector.
		1462	static __inline__ __m64 __DEFAULT_FN_ATTRS
		1463	_mm_setr_pi16(short __w0, short __w1, short __w2, short __w3)
		1464	{
		1465	return _mm_set_pi16(__w3, __w2, __w1, __w0);
		1466	}
		1467
		1468	/// Constructs a 64-bit integer vector, initialized in reverse order with
		1469	/// the specified 8-bit integer values.
		1470	///
		1471	/// \headerfile <x86intrin.h>
		1472	///
		1473	/// This intrinsic is a utility function and does not correspond to a specific
		1474	/// instruction.
		1475	///
		1476	/// \param __b0
		1477	/// An 8-bit integer value used to initialize bits [7:0] of the result.
		1478	/// \param __b1
		1479	/// An 8-bit integer value used to initialize bits [15:8] of the result.
		1480	/// \param __b2
		1481	/// An 8-bit integer value used to initialize bits [23:16] of the result.
		1482	/// \param __b3
		1483	/// An 8-bit integer value used to initialize bits [31:24] of the result.
		1484	/// \param __b4
		1485	/// An 8-bit integer value used to initialize bits [39:32] of the result.
		1486	/// \param __b5
		1487	/// An 8-bit integer value used to initialize bits [47:40] of the result.
		1488	/// \param __b6
		1489	/// An 8-bit integer value used to initialize bits [55:48] of the result.
		1490	/// \param __b7
		1491	/// An 8-bit integer value used to initialize bits [63:56] of the result.
		1492	/// \returns An initialized 64-bit integer vector.
		1493	static __inline__ __m64 __DEFAULT_FN_ATTRS
		1494	_mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,
		1495	char __b6, char __b7)
		1496	{
		1497	return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
		1498	}
		1499
		1500	#undef __DEFAULT_FN_ATTRS
		1501
		1502	/* Aliases for compatibility. */
		1503	#define _m_empty _mm_empty
		1504	#define _m_from_int _mm_cvtsi32_si64
		1505	#define _m_from_int64 _mm_cvtsi64_m64
		1506	#define _m_to_int _mm_cvtsi64_si32
		1507	#define _m_to_int64 _mm_cvtm64_si64
		1508	#define _m_packsswb _mm_packs_pi16
		1509	#define _m_packssdw _mm_packs_pi32
		1510	#define _m_packuswb _mm_packs_pu16
		1511	#define _m_punpckhbw _mm_unpackhi_pi8
		1512	#define _m_punpckhwd _mm_unpackhi_pi16
		1513	#define _m_punpckhdq _mm_unpackhi_pi32
		1514	#define _m_punpcklbw _mm_unpacklo_pi8
		1515	#define _m_punpcklwd _mm_unpacklo_pi16
		1516	#define _m_punpckldq _mm_unpacklo_pi32
		1517	#define _m_paddb _mm_add_pi8
		1518	#define _m_paddw _mm_add_pi16
		1519	#define _m_paddd _mm_add_pi32
		1520	#define _m_paddsb _mm_adds_pi8
		1521	#define _m_paddsw _mm_adds_pi16
		1522	#define _m_paddusb _mm_adds_pu8
		1523	#define _m_paddusw _mm_adds_pu16
		1524	#define _m_psubb _mm_sub_pi8
		1525	#define _m_psubw _mm_sub_pi16
		1526	#define _m_psubd _mm_sub_pi32
		1527	#define _m_psubsb _mm_subs_pi8
		1528	#define _m_psubsw _mm_subs_pi16
		1529	#define _m_psubusb _mm_subs_pu8
		1530	#define _m_psubusw _mm_subs_pu16
		1531	#define _m_pmaddwd _mm_madd_pi16
		1532	#define _m_pmulhw _mm_mulhi_pi16
		1533	#define _m_pmullw _mm_mullo_pi16
		1534	#define _m_psllw _mm_sll_pi16
		1535	#define _m_psllwi _mm_slli_pi16
		1536	#define _m_pslld _mm_sll_pi32
		1537	#define _m_pslldi _mm_slli_pi32
		1538	#define _m_psllq _mm_sll_si64
		1539	#define _m_psllqi _mm_slli_si64
		1540	#define _m_psraw _mm_sra_pi16
		1541	#define _m_psrawi _mm_srai_pi16
		1542	#define _m_psrad _mm_sra_pi32
		1543	#define _m_psradi _mm_srai_pi32
		1544	#define _m_psrlw _mm_srl_pi16
		1545	#define _m_psrlwi _mm_srli_pi16
		1546	#define _m_psrld _mm_srl_pi32
		1547	#define _m_psrldi _mm_srli_pi32
		1548	#define _m_psrlq _mm_srl_si64
		1549	#define _m_psrlqi _mm_srli_si64
		1550	#define _m_pand _mm_and_si64
		1551	#define _m_pandn _mm_andnot_si64
		1552	#define _m_por _mm_or_si64
		1553	#define _m_pxor _mm_xor_si64
		1554	#define _m_pcmpeqb _mm_cmpeq_pi8
		1555	#define _m_pcmpeqw _mm_cmpeq_pi16
		1556	#define _m_pcmpeqd _mm_cmpeq_pi32
		1557	#define _m_pcmpgtb _mm_cmpgt_pi8
		1558	#define _m_pcmpgtw _mm_cmpgt_pi16
		1559	#define _m_pcmpgtd _mm_cmpgt_pi32
		1560
		1561	#endif /* __MMINTRIN_H */
		1562

Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

QNX 8.QNX8 LLVM/Clang compiler suite//llvm-build/x86_64/lib/clang/16/include/mmintrin.h – Rev 14