WebSVN – QNX 8.QNX8 LLVM/Clang compiler suite – Blame – //llvm-build/x86_64/lib/clang/16/include/ppc_wrappers/smmintrin.h

Rev	Author	Line No.	Line
14	pmbaty	1	/*===---- smmintrin.h - Implementation of SSE4 intrinsics on PowerPC -------===
		2	*
		3	* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
		4	* See https://llvm.org/LICENSE.txt for license information.
		5	* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
		6	*
		7	*===-----------------------------------------------------------------------===
		8	*/
		9
		10	/* Implemented from the specification included in the Intel C++ Compiler
		11	User Guide and Reference, version 9.0.
		12
		13	NOTE: This is NOT a complete implementation of the SSE4 intrinsics! */
		14
		15	#ifndef NO_WARN_X86_INTRINSICS
		16	/* This header is distributed to simplify porting x86_64 code that
		17	makes explicit use of Intel intrinsics to powerp64/powerpc64le.
		18
		19	It is the user's responsibility to determine if the results are
		20	acceptable and make additional changes as necessary.
		21
		22	Note that much code that uses Intel intrinsics can be rewritten in
		23	standard C or GNU C extensions, which are more portable and better
		24	optimized across multiple targets. */
		25	#error \
		26	"Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error."
		27	#endif
		28
		29	#ifndef SMMINTRIN_H_
		30	#define SMMINTRIN_H_
		31
		32	#if defined(__powerpc64__) && \
		33	(defined(__linux__) \|\| defined(__FreeBSD__) \|\| defined(_AIX))
		34
		35	#include <altivec.h>
		36	#include <tmmintrin.h>
		37
		38	/* Rounding mode macros. */
		39	#define _MM_FROUND_TO_NEAREST_INT 0x00
		40	#define _MM_FROUND_TO_ZERO 0x01
		41	#define _MM_FROUND_TO_POS_INF 0x02
		42	#define _MM_FROUND_TO_NEG_INF 0x03
		43	#define _MM_FROUND_CUR_DIRECTION 0x04
		44
		45	#define _MM_FROUND_NINT (_MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_RAISE_EXC)
		46	#define _MM_FROUND_FLOOR (_MM_FROUND_TO_NEG_INF \| _MM_FROUND_RAISE_EXC)
		47	#define _MM_FROUND_CEIL (_MM_FROUND_TO_POS_INF \| _MM_FROUND_RAISE_EXC)
		48	#define _MM_FROUND_TRUNC (_MM_FROUND_TO_ZERO \| _MM_FROUND_RAISE_EXC)
		49	#define _MM_FROUND_RINT (_MM_FROUND_CUR_DIRECTION \| _MM_FROUND_RAISE_EXC)
		50	#define _MM_FROUND_NEARBYINT (_MM_FROUND_CUR_DIRECTION \| _MM_FROUND_NO_EXC)
		51
		52	#define _MM_FROUND_RAISE_EXC 0x00
		53	#define _MM_FROUND_NO_EXC 0x08
		54
		55	extern __inline __m128d
		56	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
		57	_mm_round_pd(__m128d __A, int __rounding) {
		58	__v2df __r;
		59	union {
		60	double __fr;
		61	long long __fpscr;
		62	} __enables_save, __fpscr_save;
		63
		64	if (__rounding & _MM_FROUND_NO_EXC) {
		65	/* Save enabled exceptions, disable all exceptions,
		66	and preserve the rounding mode. */
		67	#ifdef _ARCH_PWR9
		68	__asm__("mffsce %0" : "=f"(__fpscr_save.__fr));
		69	__enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8;
		70	#else
		71	__fpscr_save.__fr = __builtin_mffs();
		72	__enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8;
		73	__fpscr_save.__fpscr &= ~0xf8;
		74	__builtin_mtfsf(0b00000011, __fpscr_save.__fr);
		75	#endif
		76	/* Insert an artificial "read/write" reference to the variable
		77	read below, to ensure the compiler does not schedule
		78	a read/use of the variable before the FPSCR is modified, above.
		79	This can be removed if and when GCC PR102783 is fixed.
		80	*/
		81	__asm__("" : "+wa"(__A));
		82	}
		83
		84	switch (__rounding) {
		85	case _MM_FROUND_TO_NEAREST_INT:
		86	__fpscr_save.__fr = __builtin_mffsl();
		87	__attribute__((fallthrough));
		88	case _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC:
		89	__builtin_set_fpscr_rn(0b00);
		90	/* Insert an artificial "read/write" reference to the variable
		91	read below, to ensure the compiler does not schedule
		92	a read/use of the variable before the FPSCR is modified, above.
		93	This can be removed if and when GCC PR102783 is fixed.
		94	*/
		95	__asm__("" : "+wa"(__A));
		96
		97	__r = vec_rint((__v2df)__A);
		98
		99	/* Insert an artificial "read" reference to the variable written
		100	above, to ensure the compiler does not schedule the computation
		101	of the value after the manipulation of the FPSCR, below.
		102	This can be removed if and when GCC PR102783 is fixed.
		103	*/
		104	__asm__("" : : "wa"(__r));
		105	__builtin_set_fpscr_rn(__fpscr_save.__fpscr);
		106	break;
		107	case _MM_FROUND_TO_NEG_INF:
		108	case _MM_FROUND_TO_NEG_INF \| _MM_FROUND_NO_EXC:
		109	__r = vec_floor((__v2df)__A);
		110	break;
		111	case _MM_FROUND_TO_POS_INF:
		112	case _MM_FROUND_TO_POS_INF \| _MM_FROUND_NO_EXC:
		113	__r = vec_ceil((__v2df)__A);
		114	break;
		115	case _MM_FROUND_TO_ZERO:
		116	case _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC:
		117	__r = vec_trunc((__v2df)__A);
		118	break;
		119	case _MM_FROUND_CUR_DIRECTION:
		120	__r = vec_rint((__v2df)__A);
		121	break;
		122	}
		123	if (__rounding & _MM_FROUND_NO_EXC) {
		124	/* Insert an artificial "read" reference to the variable written
		125	above, to ensure the compiler does not schedule the computation
		126	of the value after the manipulation of the FPSCR, below.
		127	This can be removed if and when GCC PR102783 is fixed.
		128	*/
		129	__asm__("" : : "wa"(__r));
		130	/* Restore enabled exceptions. */
		131	__fpscr_save.__fr = __builtin_mffsl();
		132	__fpscr_save.__fpscr \|= __enables_save.__fpscr;
		133	__builtin_mtfsf(0b00000011, __fpscr_save.__fr);
		134	}
		135	return (__m128d)__r;
		136	}
		137
		138	extern __inline __m128d
		139	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
		140	_mm_round_sd(__m128d __A, __m128d __B, int __rounding) {
		141	__B = _mm_round_pd(__B, __rounding);
		142	__v2df __r = {((__v2df)__B)[0], ((__v2df)__A)[1]};
		143	return (__m128d)__r;
		144	}
		145
		146	extern __inline __m128
		147	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
		148	_mm_round_ps(__m128 __A, int __rounding) {
		149	__v4sf __r;
		150	union {
		151	double __fr;
		152	long long __fpscr;
		153	} __enables_save, __fpscr_save;
		154
		155	if (__rounding & _MM_FROUND_NO_EXC) {
		156	/* Save enabled exceptions, disable all exceptions,
		157	and preserve the rounding mode. */
		158	#ifdef _ARCH_PWR9
		159	__asm__("mffsce %0" : "=f"(__fpscr_save.__fr));
		160	__enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8;
		161	#else
		162	__fpscr_save.__fr = __builtin_mffs();
		163	__enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8;
		164	__fpscr_save.__fpscr &= ~0xf8;
		165	__builtin_mtfsf(0b00000011, __fpscr_save.__fr);
		166	#endif
		167	/* Insert an artificial "read/write" reference to the variable
		168	read below, to ensure the compiler does not schedule
		169	a read/use of the variable before the FPSCR is modified, above.
		170	This can be removed if and when GCC PR102783 is fixed.
		171	*/
		172	__asm__("" : "+wa"(__A));
		173	}
		174
		175	switch (__rounding) {
		176	case _MM_FROUND_TO_NEAREST_INT:
		177	__fpscr_save.__fr = __builtin_mffsl();
		178	__attribute__((fallthrough));
		179	case _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC:
		180	__builtin_set_fpscr_rn(0b00);
		181	/* Insert an artificial "read/write" reference to the variable
		182	read below, to ensure the compiler does not schedule
		183	a read/use of the variable before the FPSCR is modified, above.
		184	This can be removed if and when GCC PR102783 is fixed.
		185	*/
		186	__asm__("" : "+wa"(__A));
		187
		188	__r = vec_rint((__v4sf)__A);
		189
		190	/* Insert an artificial "read" reference to the variable written
		191	above, to ensure the compiler does not schedule the computation
		192	of the value after the manipulation of the FPSCR, below.
		193	This can be removed if and when GCC PR102783 is fixed.
		194	*/
		195	__asm__("" : : "wa"(__r));
		196	__builtin_set_fpscr_rn(__fpscr_save.__fpscr);
		197	break;
		198	case _MM_FROUND_TO_NEG_INF:
		199	case _MM_FROUND_TO_NEG_INF \| _MM_FROUND_NO_EXC:
		200	__r = vec_floor((__v4sf)__A);
		201	break;
		202	case _MM_FROUND_TO_POS_INF:
		203	case _MM_FROUND_TO_POS_INF \| _MM_FROUND_NO_EXC:
		204	__r = vec_ceil((__v4sf)__A);
		205	break;
		206	case _MM_FROUND_TO_ZERO:
		207	case _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC:
		208	__r = vec_trunc((__v4sf)__A);
		209	break;
		210	case _MM_FROUND_CUR_DIRECTION:
		211	__r = vec_rint((__v4sf)__A);
		212	break;
		213	}
		214	if (__rounding & _MM_FROUND_NO_EXC) {
		215	/* Insert an artificial "read" reference to the variable written
		216	above, to ensure the compiler does not schedule the computation
		217	of the value after the manipulation of the FPSCR, below.
		218	This can be removed if and when GCC PR102783 is fixed.
		219	*/
		220	__asm__("" : : "wa"(__r));
		221	/* Restore enabled exceptions. */
		222	__fpscr_save.__fr = __builtin_mffsl();
		223	__fpscr_save.__fpscr \|= __enables_save.__fpscr;
		224	__builtin_mtfsf(0b00000011, __fpscr_save.__fr);
		225	}
		226	return (__m128)__r;
		227	}
		228
		229	extern __inline __m128
		230	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
		231	_mm_round_ss(__m128 __A, __m128 __B, int __rounding) {
		232	__B = _mm_round_ps(__B, __rounding);
		233	__v4sf __r = (__v4sf)__A;
		234	__r[0] = ((__v4sf)__B)[0];
		235	return (__m128)__r;
		236	}
		237
		238	#define _mm_ceil_pd(V) _mm_round_pd((V), _MM_FROUND_CEIL)
		239	#define _mm_ceil_sd(D, V) _mm_round_sd((D), (V), _MM_FROUND_CEIL)
		240
		241	#define _mm_floor_pd(V) _mm_round_pd((V), _MM_FROUND_FLOOR)
		242	#define _mm_floor_sd(D, V) _mm_round_sd((D), (V), _MM_FROUND_FLOOR)
		243
		244	#define _mm_ceil_ps(V) _mm_round_ps((V), _MM_FROUND_CEIL)
		245	#define _mm_ceil_ss(D, V) _mm_round_ss((D), (V), _MM_FROUND_CEIL)
		246
		247	#define _mm_floor_ps(V) _mm_round_ps((V), _MM_FROUND_FLOOR)
		248	#define _mm_floor_ss(D, V) _mm_round_ss((D), (V), _MM_FROUND_FLOOR)
		249
		250	extern __inline __m128i
		251	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
		252	_mm_insert_epi8(__m128i const __A, int const __D, int const __N) {
		253	__v16qi __result = (__v16qi)__A;
		254
		255	__result[__N & 0xf] = __D;
		256
		257	return (__m128i)__result;
		258	}
		259
		260	extern __inline __m128i
		261	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
		262	_mm_insert_epi32(__m128i const __A, int const __D, int const __N) {
		263	__v4si __result = (__v4si)__A;
		264
		265	__result[__N & 3] = __D;
		266
		267	return (__m128i)__result;
		268	}
		269
		270	extern __inline __m128i
		271	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
		272	_mm_insert_epi64(__m128i const __A, long long const __D, int const __N) {
		273	__v2di __result = (__v2di)__A;
		274
		275	__result[__N & 1] = __D;
		276
		277	return (__m128i)__result;
		278	}
		279
		280	extern __inline int
		281	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
		282	_mm_extract_epi8(__m128i __X, const int __N) {
		283	return (unsigned char)((__v16qi)__X)[__N & 15];
		284	}
		285
		286	extern __inline int
		287	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
		288	_mm_extract_epi32(__m128i __X, const int __N) {
		289	return ((__v4si)__X)[__N & 3];
		290	}
		291
		292	extern __inline int
		293	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
		294	_mm_extract_epi64(__m128i __X, const int __N) {
		295	return ((__v2di)__X)[__N & 1];
		296	}
		297
		298	extern __inline int
		299	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
		300	_mm_extract_ps(__m128 __X, const int __N) {
		301	return ((__v4si)__X)[__N & 3];
		302	}
		303
		304	#ifdef _ARCH_PWR8
		305	extern __inline __m128i
		306	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
		307	_mm_blend_epi16(__m128i __A, __m128i __B, const int __imm8) {
		308	__v16qi __charmask = vec_splats((signed char)__imm8);
		309	__charmask = vec_gb(__charmask);
		310	__v8hu __shortmask = (__v8hu)vec_unpackh(__charmask);
		311	#ifdef __BIG_ENDIAN__
		312	__shortmask = vec_reve(__shortmask);
		313	#endif
		314	return (__m128i)vec_sel((__v8hu)__A, (__v8hu)__B, __shortmask);
		315	}
		316	#endif
		317
		318	extern __inline __m128i
		319	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
		320	_mm_blendv_epi8(__m128i __A, __m128i __B, __m128i __mask) {
		321	#ifdef _ARCH_PWR10
		322	return (__m128i)vec_blendv((__v16qi)__A, (__v16qi)__B, (__v16qu)__mask);
		323	#else
		324	const __v16qu __seven = vec_splats((unsigned char)0x07);
		325	__v16qu __lmask = vec_sra((__v16qu)__mask, __seven);
		326	return (__m128i)vec_sel((__v16qi)__A, (__v16qi)__B, __lmask);
		327	#endif
		328	}
		329
		330	extern __inline __m128
		331	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
		332	_mm_blend_ps(__m128 __A, __m128 __B, const int __imm8) {
		333	__v16qu __pcv[] = {
		334	{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
		335	{16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
		336	{0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15},
		337	{16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15},
		338	{0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 12, 13, 14, 15},
		339	{16, 17, 18, 19, 4, 5, 6, 7, 24, 25, 26, 27, 12, 13, 14, 15},
		340	{0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 12, 13, 14, 15},
		341	{16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 12, 13, 14, 15},
		342	{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 28, 29, 30, 31},
		343	{16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 28, 29, 30, 31},
		344	{0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31},
		345	{16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31},
		346	{0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31},
		347	{16, 17, 18, 19, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31},
		348	{0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31},
		349	{16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31},
		350	};
		351	__v16qu __r = vec_perm((__v16qu)__A, (__v16qu)__B, __pcv[__imm8]);
		352	return (__m128)__r;
		353	}
		354
		355	extern __inline __m128
		356	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
		357	_mm_blendv_ps(__m128 __A, __m128 __B, __m128 __mask) {
		358	#ifdef _ARCH_PWR10
		359	return (__m128)vec_blendv((__v4sf)__A, (__v4sf)__B, (__v4su)__mask);
		360	#else
		361	const __v4si __zero = {0};
		362	const __vector __bool int __boolmask = vec_cmplt((__v4si)__mask, __zero);
		363	return (__m128)vec_sel((__v4su)__A, (__v4su)__B, (__v4su)__boolmask);
		364	#endif
		365	}
		366
		367	extern __inline __m128d
		368	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
		369	_mm_blend_pd(__m128d __A, __m128d __B, const int __imm8) {
		370	__v16qu __pcv[] = {
		371	{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
		372	{16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15},
		373	{0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31},
		374	{16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}};
		375	__v16qu __r = vec_perm((__v16qu)__A, (__v16qu)__B, __pcv[__imm8]);
		376	return (__m128d)__r;
		377	}
		378
		379	#ifdef _ARCH_PWR8
		380	extern __inline __m128d
		381	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
		382	_mm_blendv_pd(__m128d __A, __m128d __B, __m128d __mask) {
		383	#ifdef _ARCH_PWR10
		384	return (__m128d)vec_blendv((__v2df)__A, (__v2df)__B, (__v2du)__mask);
		385	#else
		386	const __v2di __zero = {0};
		387	const __vector __bool long long __boolmask =
		388	vec_cmplt((__v2di)__mask, __zero);
		389	return (__m128d)vec_sel((__v2du)__A, (__v2du)__B, (__v2du)__boolmask);
		390	#endif
		391	}
		392	#endif
		393
		394	extern __inline int
		395	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
		396	_mm_testz_si128(__m128i __A, __m128i __B) {
		397	/* Note: This implementation does NOT set "zero" or "carry" flags. */
		398	const __v16qu __zero = {0};
		399	return vec_all_eq(vec_and((__v16qu)__A, (__v16qu)__B), __zero);
		400	}
		401
		402	extern __inline int
		403	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
		404	_mm_testc_si128(__m128i __A, __m128i __B) {
		405	/* Note: This implementation does NOT set "zero" or "carry" flags. */
		406	const __v16qu __zero = {0};
		407	const __v16qu __notA = vec_nor((__v16qu)__A, (__v16qu)__A);
		408	return vec_all_eq(vec_and((__v16qu)__notA, (__v16qu)__B), __zero);
		409	}
		410
		411	extern __inline int
		412	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
		413	_mm_testnzc_si128(__m128i __A, __m128i __B) {
		414	/* Note: This implementation does NOT set "zero" or "carry" flags. */
		415	return _mm_testz_si128(__A, __B) == 0 && _mm_testc_si128(__A, __B) == 0;
		416	}
		417
		418	#define _mm_test_all_zeros(M, V) _mm_testz_si128((M), (V))
		419
		420	#define _mm_test_all_ones(V) _mm_testc_si128((V), _mm_cmpeq_epi32((V), (V)))
		421
		422	#define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128((M), (V))
		423
		424	#ifdef _ARCH_PWR8
		425	extern __inline __m128i
		426	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
		427	_mm_cmpeq_epi64(__m128i __X, __m128i __Y) {
		428	return (__m128i)vec_cmpeq((__v2di)__X, (__v2di)__Y);
		429	}
		430	#endif
		431
		432	extern __inline __m128i
		433	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
		434	_mm_min_epi8(__m128i __X, __m128i __Y) {
		435	return (__m128i)vec_min((__v16qi)__X, (__v16qi)__Y);
		436	}
		437
		438	extern __inline __m128i
		439	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
		440	_mm_min_epu16(__m128i __X, __m128i __Y) {
		441	return (__m128i)vec_min((__v8hu)__X, (__v8hu)__Y);
		442	}
		443
		444	extern __inline __m128i
		445	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
		446	_mm_min_epi32(__m128i __X, __m128i __Y) {
		447	return (__m128i)vec_min((__v4si)__X, (__v4si)__Y);
		448	}
		449
		450	extern __inline __m128i
		451	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
		452	_mm_min_epu32(__m128i __X, __m128i __Y) {
		453	return (__m128i)vec_min((__v4su)__X, (__v4su)__Y);
		454	}
		455
		456	extern __inline __m128i
		457	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
		458	_mm_max_epi8(__m128i __X, __m128i __Y) {
		459	return (__m128i)vec_max((__v16qi)__X, (__v16qi)__Y);
		460	}
		461
		462	extern __inline __m128i
		463	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
		464	_mm_max_epu16(__m128i __X, __m128i __Y) {
		465	return (__m128i)vec_max((__v8hu)__X, (__v8hu)__Y);
		466	}
		467
		468	extern __inline __m128i
		469	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
		470	_mm_max_epi32(__m128i __X, __m128i __Y) {
		471	return (__m128i)vec_max((__v4si)__X, (__v4si)__Y);
		472	}
		473
		474	extern __inline __m128i
		475	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
		476	_mm_max_epu32(__m128i __X, __m128i __Y) {
		477	return (__m128i)vec_max((__v4su)__X, (__v4su)__Y);
		478	}
		479
		480	extern __inline __m128i
		481	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
		482	_mm_mullo_epi32(__m128i __X, __m128i __Y) {
		483	return (__m128i)vec_mul((__v4su)__X, (__v4su)__Y);
		484	}
		485
		486	#ifdef _ARCH_PWR8
		487	extern __inline __m128i
		488	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
		489	_mm_mul_epi32(__m128i __X, __m128i __Y) {
		490	return (__m128i)vec_mule((__v4si)__X, (__v4si)__Y);
		491	}
		492	#endif
		493
		494	extern __inline __m128i
		495	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
		496	_mm_cvtepi8_epi16(__m128i __A) {
		497	return (__m128i)vec_unpackh((__v16qi)__A);
		498	}
		499
		500	extern __inline __m128i
		501	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
		502	_mm_cvtepi8_epi32(__m128i __A) {
		503	__A = (__m128i)vec_unpackh((__v16qi)__A);
		504	return (__m128i)vec_unpackh((__v8hi)__A);
		505	}
		506
		507	#ifdef _ARCH_PWR8
		508	extern __inline __m128i
		509	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
		510	_mm_cvtepi8_epi64(__m128i __A) {
		511	__A = (__m128i)vec_unpackh((__v16qi)__A);
		512	__A = (__m128i)vec_unpackh((__v8hi)__A);
		513	return (__m128i)vec_unpackh((__v4si)__A);
		514	}
		515	#endif
		516
		517	extern __inline __m128i
		518	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
		519	_mm_cvtepi16_epi32(__m128i __A) {
		520	return (__m128i)vec_unpackh((__v8hi)__A);
		521	}
		522
		523	#ifdef _ARCH_PWR8
		524	extern __inline __m128i
		525	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
		526	_mm_cvtepi16_epi64(__m128i __A) {
		527	__A = (__m128i)vec_unpackh((__v8hi)__A);
		528	return (__m128i)vec_unpackh((__v4si)__A);
		529	}
		530	#endif
		531
		532	#ifdef _ARCH_PWR8
		533	extern __inline __m128i
		534	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
		535	_mm_cvtepi32_epi64(__m128i __A) {
		536	return (__m128i)vec_unpackh((__v4si)__A);
		537	}
		538	#endif
		539
		540	extern __inline __m128i
		541	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
		542	_mm_cvtepu8_epi16(__m128i __A) {
		543	const __v16qu __zero = {0};
		544	#ifdef __LITTLE_ENDIAN__
		545	__A = (__m128i)vec_mergeh((__v16qu)__A, __zero);
		546	#else /* __BIG_ENDIAN__. */
		547	__A = (__m128i)vec_mergeh(__zero, (__v16qu)__A);
		548	#endif /* __BIG_ENDIAN__. */
		549	return __A;
		550	}
		551
		552	extern __inline __m128i
		553	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
		554	_mm_cvtepu8_epi32(__m128i __A) {
		555	const __v16qu __zero = {0};
		556	#ifdef __LITTLE_ENDIAN__
		557	__A = (__m128i)vec_mergeh((__v16qu)__A, __zero);
		558	__A = (__m128i)vec_mergeh((__v8hu)__A, (__v8hu)__zero);
		559	#else /* __BIG_ENDIAN__. */
		560	__A = (__m128i)vec_mergeh(__zero, (__v16qu)__A);
		561	__A = (__m128i)vec_mergeh((__v8hu)__zero, (__v8hu)__A);
		562	#endif /* __BIG_ENDIAN__. */
		563	return __A;
		564	}
		565
		566	extern __inline __m128i
		567	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
		568	_mm_cvtepu8_epi64(__m128i __A) {
		569	const __v16qu __zero = {0};
		570	#ifdef __LITTLE_ENDIAN__
		571	__A = (__m128i)vec_mergeh((__v16qu)__A, __zero);
		572	__A = (__m128i)vec_mergeh((__v8hu)__A, (__v8hu)__zero);
		573	__A = (__m128i)vec_mergeh((__v4su)__A, (__v4su)__zero);
		574	#else /* __BIG_ENDIAN__. */
		575	__A = (__m128i)vec_mergeh(__zero, (__v16qu)__A);
		576	__A = (__m128i)vec_mergeh((__v8hu)__zero, (__v8hu)__A);
		577	__A = (__m128i)vec_mergeh((__v4su)__zero, (__v4su)__A);
		578	#endif /* __BIG_ENDIAN__. */
		579	return __A;
		580	}
		581
		582	extern __inline __m128i
		583	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
		584	_mm_cvtepu16_epi32(__m128i __A) {
		585	const __v8hu __zero = {0};
		586	#ifdef __LITTLE_ENDIAN__
		587	__A = (__m128i)vec_mergeh((__v8hu)__A, __zero);
		588	#else /* __BIG_ENDIAN__. */
		589	__A = (__m128i)vec_mergeh(__zero, (__v8hu)__A);
		590	#endif /* __BIG_ENDIAN__. */
		591	return __A;
		592	}
		593
		594	extern __inline __m128i
		595	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
		596	_mm_cvtepu16_epi64(__m128i __A) {
		597	const __v8hu __zero = {0};
		598	#ifdef __LITTLE_ENDIAN__
		599	__A = (__m128i)vec_mergeh((__v8hu)__A, __zero);
		600	__A = (__m128i)vec_mergeh((__v4su)__A, (__v4su)__zero);
		601	#else /* __BIG_ENDIAN__. */
		602	__A = (__m128i)vec_mergeh(__zero, (__v8hu)__A);
		603	__A = (__m128i)vec_mergeh((__v4su)__zero, (__v4su)__A);
		604	#endif /* __BIG_ENDIAN__. */
		605	return __A;
		606	}
		607
		608	extern __inline __m128i
		609	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
		610	_mm_cvtepu32_epi64(__m128i __A) {
		611	const __v4su __zero = {0};
		612	#ifdef __LITTLE_ENDIAN__
		613	__A = (__m128i)vec_mergeh((__v4su)__A, __zero);
		614	#else /* __BIG_ENDIAN__. */
		615	__A = (__m128i)vec_mergeh(__zero, (__v4su)__A);
		616	#endif /* __BIG_ENDIAN__. */
		617	return __A;
		618	}
		619
		620	/* Return horizontal packed word minimum and its index in bits [15:0]
		621	and bits [18:16] respectively. */
		622	extern __inline __m128i
		623	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
		624	_mm_minpos_epu16(__m128i __A) {
		625	union __u {
		626	__m128i __m;
		627	__v8hu __uh;
		628	};
		629	union __u __u = {.__m = __A}, __r = {.__m = {0}};
		630	unsigned short __ridx = 0;
		631	unsigned short __rmin = __u.__uh[__ridx];
		632	unsigned long __i;
		633	for (__i = 1; __i < 8; __i++) {
		634	if (__u.__uh[__i] < __rmin) {
		635	__rmin = __u.__uh[__i];
		636	__ridx = __i;
		637	}
		638	}
		639	__r.__uh[0] = __rmin;
		640	__r.__uh[1] = __ridx;
		641	return __r.__m;
		642	}
		643
		644	extern __inline __m128i
		645	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
		646	_mm_packus_epi32(__m128i __X, __m128i __Y) {
		647	return (__m128i)vec_packsu((__v4si)__X, (__v4si)__Y);
		648	}
		649
		650	#ifdef _ARCH_PWR8
		651	extern __inline __m128i
		652	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
		653	_mm_cmpgt_epi64(__m128i __X, __m128i __Y) {
		654	return (__m128i)vec_cmpgt((__v2di)__X, (__v2di)__Y);
		655	}
		656	#endif
		657
		658	#else
		659	#include_next <smmintrin.h>
		660	#endif /* defined(__powerpc64__) && \
		661	* (defined(__linux__) \|\| defined(__FreeBSD__) \|\| defined(_AIX)) */
		662
		663	#endif /* SMMINTRIN_H_ */

Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

QNX 8.QNX8 LLVM/Clang compiler suite//llvm-build/x86_64/lib/clang/16/include/ppc_wrappers/smmintrin.h – Rev 14