WebSVN – Games.Chess Giants – Blame – /DirectX9/Include/xnamathvector.inl

Rev	Author	Line No.	Line
1	pmbaty	1	/*++
		2
		3	Copyright (c) Microsoft Corporation. All rights reserved.
		4
		5	Module Name:
		6
		7	xnamathvector.inl
		8
		9	Abstract:
		10
		11	XNA math library for Windows and Xbox 360: Vector functions
		12	--*/
		13
		14	#if defined(_MSC_VER) && (_MSC_VER > 1000)
		15	#pragma once
		16	#endif
		17
		18	#ifndef __XNAMATHVECTOR_INL__
		19	#define __XNAMATHVECTOR_INL__
		20
		21	#if defined(_XM_NO_INTRINSICS_)
		22	#define XMISNAN(x) (((UINT)&(x) & 0x7F800000) == 0x7F800000 && ((UINT)&(x) & 0x7FFFFF) != 0)
		23	#define XMISINF(x) (((UINT)&(x) & 0x7FFFFFFF) == 0x7F800000)
		24	#endif
		25
		26	/****************************************************************************
		27	*
		28	* General Vector
		29	*
		30	****************************************************************************/
		31
		32	//------------------------------------------------------------------------------
		33	// Assignment operations
		34	//------------------------------------------------------------------------------
		35
		36	//------------------------------------------------------------------------------
		37	// Return a vector with all elements equaling zero
		38	XMFINLINE XMVECTOR XMVectorZero()
		39	{
		40	#if defined(_XM_NO_INTRINSICS_)
		41	XMVECTOR vResult = {0.0f,0.0f,0.0f,0.0f};
		42	return vResult;
		43	#elif defined(_XM_SSE_INTRINSICS_)
		44	return _mm_setzero_ps();
		45	#else // _XM_VMX128_INTRINSICS_
		46	#endif // _XM_VMX128_INTRINSICS_
		47	}
		48
		49	//------------------------------------------------------------------------------
		50	// Initialize a vector with four floating point values
		51	XMFINLINE XMVECTOR XMVectorSet
		52	(
		53	FLOAT x,
		54	FLOAT y,
		55	FLOAT z,
		56	FLOAT w
		57	)
		58	{
		59	#if defined(_XM_NO_INTRINSICS_)
		60	XMVECTORF32 vResult = {x,y,z,w};
		61	return vResult.v;
		62	#elif defined(_XM_SSE_INTRINSICS_)
		63	return _mm_set_ps( w, z, y, x );
		64	#else // _XM_VMX128_INTRINSICS_
		65	#endif // _XM_VMX128_INTRINSICS_
		66	}
		67
		68	//------------------------------------------------------------------------------
		69	// Initialize a vector with four integer values
		70	XMFINLINE XMVECTOR XMVectorSetInt
		71	(
		72	UINT x,
		73	UINT y,
		74	UINT z,
		75	UINT w
		76	)
		77	{
		78	#if defined(_XM_NO_INTRINSICS_)
		79	XMVECTORU32 vResult = {x,y,z,w};
		80	return vResult.v;
		81	#elif defined(_XM_SSE_INTRINSICS_)
		82	__m128i V = _mm_set_epi32( w, z, y, x );
		83	return reinterpret_cast<__m128 *>(&V)[0];
		84	#else // _XM_VMX128_INTRINSICS_
		85	#endif // _XM_VMX128_INTRINSICS_
		86	}
		87
		88	//------------------------------------------------------------------------------
		89	// Initialize a vector with a replicated floating point value
		90	XMFINLINE XMVECTOR XMVectorReplicate
		91	(
		92	FLOAT Value
		93	)
		94	{
		95	#if defined(_XM_NO_INTRINSICS_) \|\| defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
		96	XMVECTORF32 vResult = {Value,Value,Value,Value};
		97	return vResult.v;
		98	#elif defined(_XM_SSE_INTRINSICS_)
		99	return _mm_set_ps1( Value );
		100	#else // _XM_VMX128_INTRINSICS_
		101	#endif // _XM_VMX128_INTRINSICS_
		102	}
		103
		104	//------------------------------------------------------------------------------
		105	// Initialize a vector with a replicated floating point value passed by pointer
		106	XMFINLINE XMVECTOR XMVectorReplicatePtr
		107	(
		108	CONST FLOAT *pValue
		109	)
		110	{
		111	#if defined(_XM_NO_INTRINSICS_) \|\| defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
		112	FLOAT Value = pValue[0];
		113	XMVECTORF32 vResult = {Value,Value,Value,Value};
		114	return vResult.v;
		115	#elif defined(_XM_SSE_INTRINSICS_)
		116	return _mm_load_ps1( pValue );
		117	#else // _XM_VMX128_INTRINSICS_
		118	#endif // _XM_VMX128_INTRINSICS_
		119	}
		120
		121	//------------------------------------------------------------------------------
		122	// Initialize a vector with a replicated integer value
		123	XMFINLINE XMVECTOR XMVectorReplicateInt
		124	(
		125	UINT Value
		126	)
		127	{
		128	#if defined(_XM_NO_INTRINSICS_) \|\| defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
		129	XMVECTORU32 vResult = {Value,Value,Value,Value};
		130	return vResult.v;
		131	#elif defined(_XM_SSE_INTRINSICS_)
		132	__m128i vTemp = _mm_set1_epi32( Value );
		133	return reinterpret_cast<const __m128 *>(&vTemp)[0];
		134	#else // _XM_VMX128_INTRINSICS_
		135	#endif // _XM_VMX128_INTRINSICS_
		136	}
		137
		138	//------------------------------------------------------------------------------
		139	// Initialize a vector with a replicated integer value passed by pointer
		140	XMFINLINE XMVECTOR XMVectorReplicateIntPtr
		141	(
		142	CONST UINT *pValue
		143	)
		144	{
		145	#if defined(_XM_NO_INTRINSICS_) \|\| defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
		146	UINT Value = pValue[0];
		147	XMVECTORU32 vResult = {Value,Value,Value,Value};
		148	return vResult.v;
		149	#elif defined(_XM_SSE_INTRINSICS_)
		150	return _mm_load_ps1(reinterpret_cast<const float *>(pValue));
		151	#else // _XM_VMX128_INTRINSICS_
		152	#endif // _XM_VMX128_INTRINSICS_
		153	}
		154
		155	//------------------------------------------------------------------------------
		156	// Initialize a vector with all bits set (true mask)
		157	XMFINLINE XMVECTOR XMVectorTrueInt()
		158	{
		159	#if defined(_XM_NO_INTRINSICS_)
		160	XMVECTORU32 vResult = {0xFFFFFFFFU,0xFFFFFFFFU,0xFFFFFFFFU,0xFFFFFFFFU};
		161	return vResult.v;
		162	#elif defined(_XM_SSE_INTRINSICS_)
		163	__m128i V = _mm_set1_epi32(-1);
		164	return reinterpret_cast<__m128 *>(&V)[0];
		165	#else // _XM_VMX128_INTRINSICS_
		166	#endif // _XM_VMX128_INTRINSICS_
		167	}
		168
		169	//------------------------------------------------------------------------------
		170	// Initialize a vector with all bits clear (false mask)
		171	XMFINLINE XMVECTOR XMVectorFalseInt()
		172	{
		173	#if defined(_XM_NO_INTRINSICS_)
		174	XMVECTOR vResult = {0.0f,0.0f,0.0f,0.0f};
		175	return vResult;
		176	#elif defined(_XM_SSE_INTRINSICS_)
		177	return _mm_setzero_ps();
		178	#else // _XM_VMX128_INTRINSICS_
		179	#endif // _XM_VMX128_INTRINSICS_
		180	}
		181
		182	//------------------------------------------------------------------------------
		183	// Replicate the x component of the vector
		184	XMFINLINE XMVECTOR XMVectorSplatX
		185	(
		186	FXMVECTOR V
		187	)
		188	{
		189	#if defined(_XM_NO_INTRINSICS_)
		190	XMVECTOR vResult;
		191	vResult.vector4_f32[0] =
		192	vResult.vector4_f32[1] =
		193	vResult.vector4_f32[2] =
		194	vResult.vector4_f32[3] = V.vector4_f32[0];
		195	return vResult;
		196	#elif defined(_XM_SSE_INTRINSICS_)
		197	return _mm_shuffle_ps( V, V, _MM_SHUFFLE(0, 0, 0, 0) );
		198	#else // _XM_VMX128_INTRINSICS_
		199	#endif // _XM_VMX128_INTRINSICS_
		200	}
		201
		202	//------------------------------------------------------------------------------
		203	// Replicate the y component of the vector
		204	XMFINLINE XMVECTOR XMVectorSplatY
		205	(
		206	FXMVECTOR V
		207	)
		208	{
		209	#if defined(_XM_NO_INTRINSICS_)
		210	XMVECTOR vResult;
		211	vResult.vector4_f32[0] =
		212	vResult.vector4_f32[1] =
		213	vResult.vector4_f32[2] =
		214	vResult.vector4_f32[3] = V.vector4_f32[1];
		215	return vResult;
		216	#elif defined(_XM_SSE_INTRINSICS_)
		217	return _mm_shuffle_ps( V, V, _MM_SHUFFLE(1, 1, 1, 1) );
		218	#else // _XM_VMX128_INTRINSICS_
		219	#endif // _XM_VMX128_INTRINSICS_
		220	}
		221
		222	//------------------------------------------------------------------------------
		223	// Replicate the z component of the vector
		224	XMFINLINE XMVECTOR XMVectorSplatZ
		225	(
		226	FXMVECTOR V
		227	)
		228	{
		229	#if defined(_XM_NO_INTRINSICS_)
		230	XMVECTOR vResult;
		231	vResult.vector4_f32[0] =
		232	vResult.vector4_f32[1] =
		233	vResult.vector4_f32[2] =
		234	vResult.vector4_f32[3] = V.vector4_f32[2];
		235	return vResult;
		236	#elif defined(_XM_SSE_INTRINSICS_)
		237	return _mm_shuffle_ps( V, V, _MM_SHUFFLE(2, 2, 2, 2) );
		238	#else // _XM_VMX128_INTRINSICS_
		239	#endif // _XM_VMX128_INTRINSICS_
		240	}
		241
		242	//------------------------------------------------------------------------------
		243	// Replicate the w component of the vector
		244	XMFINLINE XMVECTOR XMVectorSplatW
		245	(
		246	FXMVECTOR V
		247	)
		248	{
		249	#if defined(_XM_NO_INTRINSICS_)
		250	XMVECTOR vResult;
		251	vResult.vector4_f32[0] =
		252	vResult.vector4_f32[1] =
		253	vResult.vector4_f32[2] =
		254	vResult.vector4_f32[3] = V.vector4_f32[3];
		255	return vResult;
		256	#elif defined(_XM_SSE_INTRINSICS_)
		257	return _mm_shuffle_ps( V, V, _MM_SHUFFLE(3, 3, 3, 3) );
		258	#else // _XM_VMX128_INTRINSICS_
		259	#endif // _XM_VMX128_INTRINSICS_
		260	}
		261
		262	//------------------------------------------------------------------------------
		263	// Return a vector of 1.0f,1.0f,1.0f,1.0f
		264	XMFINLINE XMVECTOR XMVectorSplatOne()
		265	{
		266	#if defined(_XM_NO_INTRINSICS_)
		267	XMVECTOR vResult;
		268	vResult.vector4_f32[0] =
		269	vResult.vector4_f32[1] =
		270	vResult.vector4_f32[2] =
		271	vResult.vector4_f32[3] = 1.0f;
		272	return vResult;
		273	#elif defined(_XM_SSE_INTRINSICS_)
		274	return g_XMOne;
		275	#else // _XM_VMX128_INTRINSICS_
		276	#endif // _XM_VMX128_INTRINSICS_
		277	}
		278
		279	//------------------------------------------------------------------------------
		280	// Return a vector of INF,INF,INF,INF
		281	XMFINLINE XMVECTOR XMVectorSplatInfinity()
		282	{
		283	#if defined(_XM_NO_INTRINSICS_)
		284	XMVECTOR vResult;
		285	vResult.vector4_u32[0] =
		286	vResult.vector4_u32[1] =
		287	vResult.vector4_u32[2] =
		288	vResult.vector4_u32[3] = 0x7F800000;
		289	return vResult;
		290	#elif defined(_XM_SSE_INTRINSICS_)
		291	return g_XMInfinity;
		292	#else // _XM_VMX128_INTRINSICS_
		293	#endif // _XM_VMX128_INTRINSICS_
		294	}
		295
		296	//------------------------------------------------------------------------------
		297	// Return a vector of Q_NAN,Q_NAN,Q_NAN,Q_NAN
		298	XMFINLINE XMVECTOR XMVectorSplatQNaN()
		299	{
		300	#if defined(_XM_NO_INTRINSICS_)
		301	XMVECTOR vResult;
		302	vResult.vector4_u32[0] =
		303	vResult.vector4_u32[1] =
		304	vResult.vector4_u32[2] =
		305	vResult.vector4_u32[3] = 0x7FC00000;
		306	return vResult;
		307	#elif defined(_XM_SSE_INTRINSICS_)
		308	return g_XMQNaN;
		309	#else // _XM_VMX128_INTRINSICS_
		310	#endif // _XM_VMX128_INTRINSICS_
		311	}
		312
		313	//------------------------------------------------------------------------------
		314	// Return a vector of 1.192092896e-7f,1.192092896e-7f,1.192092896e-7f,1.192092896e-7f
		315	XMFINLINE XMVECTOR XMVectorSplatEpsilon()
		316	{
		317	#if defined(_XM_NO_INTRINSICS_)
		318	XMVECTOR vResult;
		319	vResult.vector4_u32[0] =
		320	vResult.vector4_u32[1] =
		321	vResult.vector4_u32[2] =
		322	vResult.vector4_u32[3] = 0x34000000;
		323	return vResult;
		324	#elif defined(_XM_SSE_INTRINSICS_)
		325	return g_XMEpsilon;
		326	#else // _XM_VMX128_INTRINSICS_
		327	#endif // _XM_VMX128_INTRINSICS_
		328	}
		329
		330	//------------------------------------------------------------------------------
		331	// Return a vector of -0.0f (0x80000000),-0.0f,-0.0f,-0.0f
		332	XMFINLINE XMVECTOR XMVectorSplatSignMask()
		333	{
		334	#if defined(_XM_NO_INTRINSICS_)
		335	XMVECTOR vResult;
		336	vResult.vector4_u32[0] =
		337	vResult.vector4_u32[1] =
		338	vResult.vector4_u32[2] =
		339	vResult.vector4_u32[3] = 0x80000000U;
		340	return vResult;
		341	#elif defined(_XM_SSE_INTRINSICS_)
		342	__m128i V = _mm_set1_epi32( 0x80000000 );
		343	return reinterpret_cast<__m128*>(&V)[0];
		344	#else // _XM_VMX128_INTRINSICS_
		345	#endif // _XM_VMX128_INTRINSICS_
		346	}
		347
		348	//------------------------------------------------------------------------------
		349	// Return a floating point value via an index. This is not a recommended
		350	// function to use due to performance loss.
		351	XMFINLINE FLOAT XMVectorGetByIndex(FXMVECTOR V,UINT i)
		352	{
		353	XMASSERT( i <= 3 );
		354	#if defined(_XM_NO_INTRINSICS_)
		355	return V.vector4_f32[i];
		356	#elif defined(_XM_SSE_INTRINSICS_)
		357	return V.m128_f32[i];
		358	#else // _XM_VMX128_INTRINSICS_
		359	#endif // _XM_VMX128_INTRINSICS_
		360	}
		361
		362	//------------------------------------------------------------------------------
		363	// Return the X component in an FPU register.
		364	// This causes Load/Hit/Store on VMX targets
		365	XMFINLINE FLOAT XMVectorGetX(FXMVECTOR V)
		366	{
		367	#if defined(_XM_NO_INTRINSICS_)
		368	return V.vector4_f32[0];
		369	#elif defined(_XM_SSE_INTRINSICS_)
		370	#if defined(_MSC_VER) && (_MSC_VER>=1500)
		371	return _mm_cvtss_f32(V);
		372	#else
		373	return V.m128_f32[0];
		374	#endif
		375	#else // _XM_VMX128_INTRINSICS_
		376	#endif // _XM_VMX128_INTRINSICS_
		377	}
		378
		379	// Return the Y component in an FPU register.
		380	// This causes Load/Hit/Store on VMX targets
		381	XMFINLINE FLOAT XMVectorGetY(FXMVECTOR V)
		382	{
		383	#if defined(_XM_NO_INTRINSICS_)
		384	return V.vector4_f32[1];
		385	#elif defined(_XM_SSE_INTRINSICS_)
		386	#if defined(_MSC_VER) && (_MSC_VER>=1500)
		387	XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
		388	return _mm_cvtss_f32(vTemp);
		389	#else
		390	return V.m128_f32[1];
		391	#endif
		392	#else // _XM_VMX128_INTRINSICS_
		393	#endif // _XM_VMX128_INTRINSICS_
		394	}
		395
		396	// Return the Z component in an FPU register.
		397	// This causes Load/Hit/Store on VMX targets
		398	XMFINLINE FLOAT XMVectorGetZ(FXMVECTOR V)
		399	{
		400	#if defined(_XM_NO_INTRINSICS_)
		401	return V.vector4_f32[2];
		402	#elif defined(_XM_SSE_INTRINSICS_)
		403	#if defined(_MSC_VER) && (_MSC_VER>=1500)
		404	XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2));
		405	return _mm_cvtss_f32(vTemp);
		406	#else
		407	return V.m128_f32[2];
		408	#endif
		409	#else // _XM_VMX128_INTRINSICS_
		410	#endif // _XM_VMX128_INTRINSICS_
		411	}
		412
		413	// Return the W component in an FPU register.
		414	// This causes Load/Hit/Store on VMX targets
		415	XMFINLINE FLOAT XMVectorGetW(FXMVECTOR V)
		416	{
		417	#if defined(_XM_NO_INTRINSICS_)
		418	return V.vector4_f32[3];
		419	#elif defined(_XM_SSE_INTRINSICS_)
		420	#if defined(_MSC_VER) && (_MSC_VER>=1500)
		421	XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,3,3,3));
		422	return _mm_cvtss_f32(vTemp);
		423	#else
		424	return V.m128_f32[3];
		425	#endif
		426	#else // _XM_VMX128_INTRINSICS_
		427	#endif // _XM_VMX128_INTRINSICS_
		428	}
		429
		430	//------------------------------------------------------------------------------
		431
		432	// Store a component indexed by i into a 32 bit float location in memory.
		433	// This causes Load/Hit/Store on VMX targets
		434	XMFINLINE VOID XMVectorGetByIndexPtr(FLOAT *f,FXMVECTOR V,UINT i)
		435	{
		436	XMASSERT( f != 0 );
		437	XMASSERT( i < 4 );
		438	#if defined(_XM_NO_INTRINSICS_)
		439	*f = V.vector4_f32[i];
		440	#elif defined(_XM_SSE_INTRINSICS_)
		441	*f = V.m128_f32[i];
		442	#else // _XM_VMX128_INTRINSICS_
		443	#endif // _XM_VMX128_INTRINSICS_
		444	}
		445
		446	//------------------------------------------------------------------------------
		447
		448	// Store the X component into a 32 bit float location in memory.
		449	XMFINLINE VOID XMVectorGetXPtr(FLOAT *x,FXMVECTOR V)
		450	{
		451	XMASSERT( x != 0 );
		452	#if defined(_XM_NO_INTRINSICS_)
		453	*x = V.vector4_f32[0];
		454	#elif defined(_XM_SSE_INTRINSICS_)
		455	_mm_store_ss(x,V);
		456	#else // _XM_VMX128_INTRINSICS_
		457	#endif // _XM_VMX128_INTRINSICS_
		458	}
		459
		460	// Store the Y component into a 32 bit float location in memory.
		461	XMFINLINE VOID XMVectorGetYPtr(FLOAT *y,FXMVECTOR V)
		462	{
		463	XMASSERT( y != 0 );
		464	#if defined(_XM_NO_INTRINSICS_)
		465	*y = V.vector4_f32[1];
		466	#elif defined(_XM_SSE_INTRINSICS_)
		467	XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
		468	_mm_store_ss(y,vResult);
		469	#else // _XM_VMX128_INTRINSICS_
		470	#endif // _XM_VMX128_INTRINSICS_
		471	}
		472
		473	// Store the Z component into a 32 bit float location in memory.
		474	XMFINLINE VOID XMVectorGetZPtr(FLOAT *z,FXMVECTOR V)
		475	{
		476	XMASSERT( z != 0 );
		477	#if defined(_XM_NO_INTRINSICS_)
		478	*z = V.vector4_f32[2];
		479	#elif defined(_XM_SSE_INTRINSICS_)
		480	XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2));
		481	_mm_store_ss(z,vResult);
		482	#else // _XM_VMX128_INTRINSICS_
		483	#endif // _XM_VMX128_INTRINSICS_
		484	}
		485
		486	// Store the W component into a 32 bit float location in memory.
		487	XMFINLINE VOID XMVectorGetWPtr(FLOAT *w,FXMVECTOR V)
		488	{
		489	XMASSERT( w != 0 );
		490	#if defined(_XM_NO_INTRINSICS_)
		491	*w = V.vector4_f32[3];
		492	#elif defined(_XM_SSE_INTRINSICS_)
		493	XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,3,3,3));
		494	_mm_store_ss(w,vResult);
		495	#else // _XM_VMX128_INTRINSICS_
		496	#endif // _XM_VMX128_INTRINSICS_
		497	}
		498
		499	//------------------------------------------------------------------------------
		500
		501	// Return an integer value via an index. This is not a recommended
		502	// function to use due to performance loss.
		503	XMFINLINE UINT XMVectorGetIntByIndex(FXMVECTOR V, UINT i)
		504	{
		505	XMASSERT( i < 4 );
		506	#if defined(_XM_NO_INTRINSICS_)
		507	return V.vector4_u32[i];
		508	#elif defined(_XM_SSE_INTRINSICS_)
		509	#if defined(_MSC_VER) && (_MSC_VER<1400)
		510	XMVECTORU32 tmp;
		511	tmp.v = V;
		512	return tmp.u[i];
		513	#else
		514	return V.m128_u32[i];
		515	#endif
		516	#else // _XM_VMX128_INTRINSICS_
		517	#endif // _XM_VMX128_INTRINSICS_
		518	}
		519
		520	//------------------------------------------------------------------------------
		521
		522	// Return the X component in an integer register.
		523	// This causes Load/Hit/Store on VMX targets
		524	XMFINLINE UINT XMVectorGetIntX(FXMVECTOR V)
		525	{
		526	#if defined(_XM_NO_INTRINSICS_)
		527	return V.vector4_u32[0];
		528	#elif defined(_XM_SSE_INTRINSICS_)
		529	return static_cast<UINT>(_mm_cvtsi128_si32(reinterpret_cast<const __m128i *>(&V)[0]));
		530	#else // _XM_VMX128_INTRINSICS_
		531	#endif // _XM_VMX128_INTRINSICS_
		532	}
		533
		534	// Return the Y component in an integer register.
		535	// This causes Load/Hit/Store on VMX targets
		536	XMFINLINE UINT XMVectorGetIntY(FXMVECTOR V)
		537	{
		538	#if defined(_XM_NO_INTRINSICS_)
		539	return V.vector4_u32[1];
		540	#elif defined(_XM_SSE_INTRINSICS_)
		541	__m128i vResulti = _mm_shuffle_epi32(reinterpret_cast<const __m128i *>(&V)[0],_MM_SHUFFLE(1,1,1,1));
		542	return static_cast<UINT>(_mm_cvtsi128_si32(vResulti));
		543	#else // _XM_VMX128_INTRINSICS_
		544	#endif // _XM_VMX128_INTRINSICS_
		545	}
		546
		547	// Return the Z component in an integer register.
		548	// This causes Load/Hit/Store on VMX targets
		549	XMFINLINE UINT XMVectorGetIntZ(FXMVECTOR V)
		550	{
		551	#if defined(_XM_NO_INTRINSICS_)
		552	return V.vector4_u32[2];
		553	#elif defined(_XM_SSE_INTRINSICS_)
		554	__m128i vResulti = _mm_shuffle_epi32(reinterpret_cast<const __m128i *>(&V)[0],_MM_SHUFFLE(2,2,2,2));
		555	return static_cast<UINT>(_mm_cvtsi128_si32(vResulti));
		556	#else // _XM_VMX128_INTRINSICS_
		557	#endif // _XM_VMX128_INTRINSICS_
		558	}
		559
		560	// Return the W component in an integer register.
		561	// This causes Load/Hit/Store on VMX targets
		562	XMFINLINE UINT XMVectorGetIntW(FXMVECTOR V)
		563	{
		564	#if defined(_XM_NO_INTRINSICS_)
		565	return V.vector4_u32[3];
		566	#elif defined(_XM_SSE_INTRINSICS_)
		567	__m128i vResulti = _mm_shuffle_epi32(reinterpret_cast<const __m128i *>(&V)[0],_MM_SHUFFLE(3,3,3,3));
		568	return static_cast<UINT>(_mm_cvtsi128_si32(vResulti));
		569	#else // _XM_VMX128_INTRINSICS_
		570	#endif // _XM_VMX128_INTRINSICS_
		571	}
		572
		573	//------------------------------------------------------------------------------
		574
		575	// Store a component indexed by i into a 32 bit integer location in memory.
		576	// This causes Load/Hit/Store on VMX targets
		577	XMFINLINE VOID XMVectorGetIntByIndexPtr(UINT *x,FXMVECTOR V,UINT i)
		578	{
		579	XMASSERT( x != 0 );
		580	XMASSERT( i < 4 );
		581	#if defined(_XM_NO_INTRINSICS_)
		582	*x = V.vector4_u32[i];
		583	#elif defined(_XM_SSE_INTRINSICS_)
		584	#if defined(_MSC_VER) && (_MSC_VER<1400)
		585	XMVECTORU32 tmp;
		586	tmp.v = V;
		587	*x = tmp.u[i];
		588	#else
		589	*x = V.m128_u32[i];
		590	#endif
		591	#else // _XM_VMX128_INTRINSICS_
		592	#endif // _XM_VMX128_INTRINSICS_
		593	}
		594
		595	//------------------------------------------------------------------------------
		596
		597	// Store the X component into a 32 bit integer location in memory.
		598	XMFINLINE VOID XMVectorGetIntXPtr(UINT *x,FXMVECTOR V)
		599	{
		600	XMASSERT( x != 0 );
		601	#if defined(_XM_NO_INTRINSICS_)
		602	*x = V.vector4_u32[0];
		603	#elif defined(_XM_SSE_INTRINSICS_)
		604	_mm_store_ss(reinterpret_cast<float *>(x),V);
		605	#else // _XM_VMX128_INTRINSICS_
		606	#endif // _XM_VMX128_INTRINSICS_
		607	}
		608
		609	// Store the Y component into a 32 bit integer location in memory.
		610	XMFINLINE VOID XMVectorGetIntYPtr(UINT *y,FXMVECTOR V)
		611	{
		612	XMASSERT( y != 0 );
		613	#if defined(_XM_NO_INTRINSICS_)
		614	*y = V.vector4_u32[1];
		615	#elif defined(_XM_SSE_INTRINSICS_)
		616	XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
		617	_mm_store_ss(reinterpret_cast<float *>(y),vResult);
		618	#else // _XM_VMX128_INTRINSICS_
		619	#endif // _XM_VMX128_INTRINSICS_
		620	}
		621
		622	// Store the Z component into a 32 bit integer locaCantion in memory.
		623	XMFINLINE VOID XMVectorGetIntZPtr(UINT *z,FXMVECTOR V)
		624	{
		625	XMASSERT( z != 0 );
		626	#if defined(_XM_NO_INTRINSICS_)
		627	*z = V.vector4_u32[2];
		628	#elif defined(_XM_SSE_INTRINSICS_)
		629	XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2));
		630	_mm_store_ss(reinterpret_cast<float *>(z),vResult);
		631	#else // _XM_VMX128_INTRINSICS_
		632	#endif // _XM_VMX128_INTRINSICS_
		633	}
		634
		635	// Store the W component into a 32 bit integer location in memory.
		636	XMFINLINE VOID XMVectorGetIntWPtr(UINT *w,FXMVECTOR V)
		637	{
		638	XMASSERT( w != 0 );
		639	#if defined(_XM_NO_INTRINSICS_)
		640	*w = V.vector4_u32[3];
		641	#elif defined(_XM_SSE_INTRINSICS_)
		642	XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,3,3,3));
		643	_mm_store_ss(reinterpret_cast<float *>(w),vResult);
		644	#else // _XM_VMX128_INTRINSICS_
		645	#endif // _XM_VMX128_INTRINSICS_
		646	}
		647
		648	//------------------------------------------------------------------------------
		649
		650	// Set a single indexed floating point component
		651	// This causes Load/Hit/Store on VMX targets
		652	XMFINLINE XMVECTOR XMVectorSetByIndex(FXMVECTOR V, FLOAT f,UINT i)
		653	{
		654	#if defined(_XM_NO_INTRINSICS_)
		655	XMVECTOR U;
		656	XMASSERT( i <= 3 );
		657	U = V;
		658	U.vector4_f32[i] = f;
		659	return U;
		660	#elif defined(_XM_SSE_INTRINSICS_)
		661	XMASSERT( i <= 3 );
		662	XMVECTOR U = V;
		663	U.m128_f32[i] = f;
		664	return U;
		665	#else // _XM_VMX128_INTRINSICS_
		666	#endif // _XM_VMX128_INTRINSICS_
		667	}
		668
		669	//------------------------------------------------------------------------------
		670
		671	// Sets the X component of a vector to a passed floating point value
		672	// This causes Load/Hit/Store on VMX targets
		673	XMFINLINE XMVECTOR XMVectorSetX(FXMVECTOR V, FLOAT x)
		674	{
		675	#if defined(_XM_NO_INTRINSICS_)
		676	XMVECTOR U;
		677	U.vector4_f32[0] = x;
		678	U.vector4_f32[1] = V.vector4_f32[1];
		679	U.vector4_f32[2] = V.vector4_f32[2];
		680	U.vector4_f32[3] = V.vector4_f32[3];
		681	return U;
		682	#elif defined(_XM_SSE_INTRINSICS_)
		683	#if defined(_XM_ISVS2005_)
		684	XMVECTOR vResult = V;
		685	vResult.m128_f32[0] = x;
		686	return vResult;
		687	#else
		688	XMVECTOR vResult = _mm_set_ss(x);
		689	vResult = _mm_move_ss(V,vResult);
		690	return vResult;
		691	#endif // _XM_ISVS2005_
		692	#else // _XM_VMX128_INTRINSICS_
		693	#endif // _XM_VMX128_INTRINSICS_
		694	}
		695
		696	// Sets the Y component of a vector to a passed floating point value
		697	// This causes Load/Hit/Store on VMX targets
		698	XMFINLINE XMVECTOR XMVectorSetY(FXMVECTOR V, FLOAT y)
		699	{
		700	#if defined(_XM_NO_INTRINSICS_)
		701	XMVECTOR U;
		702	U.vector4_f32[0] = V.vector4_f32[0];
		703	U.vector4_f32[1] = y;
		704	U.vector4_f32[2] = V.vector4_f32[2];
		705	U.vector4_f32[3] = V.vector4_f32[3];
		706	return U;
		707	#elif defined(_XM_SSE_INTRINSICS_)
		708	#if defined(_XM_ISVS2005_)
		709	XMVECTOR vResult = V;
		710	vResult.m128_f32[1] = y;
		711	return vResult;
		712	#else
		713	// Swap y and x
		714	XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,2,0,1));
		715	// Convert input to vector
		716	XMVECTOR vTemp = _mm_set_ss(y);
		717	// Replace the x component
		718	vResult = _mm_move_ss(vResult,vTemp);
		719	// Swap y and x again
		720	vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,2,0,1));
		721	return vResult;
		722	#endif // _XM_ISVS2005_
		723	#else // _XM_VMX128_INTRINSICS_
		724	#endif // _XM_VMX128_INTRINSICS_
		725	}
		726	// Sets the Z component of a vector to a passed floating point value
		727	// This causes Load/Hit/Store on VMX targets
		728	XMFINLINE XMVECTOR XMVectorSetZ(FXMVECTOR V, FLOAT z)
		729	{
		730	#if defined(_XM_NO_INTRINSICS_)
		731	XMVECTOR U;
		732	U.vector4_f32[0] = V.vector4_f32[0];
		733	U.vector4_f32[1] = V.vector4_f32[1];
		734	U.vector4_f32[2] = z;
		735	U.vector4_f32[3] = V.vector4_f32[3];
		736	return U;
		737	#elif defined(_XM_SSE_INTRINSICS_)
		738	#if defined(_XM_ISVS2005_)
		739	XMVECTOR vResult = V;
		740	vResult.m128_f32[2] = z;
		741	return vResult;
		742	#else
		743	// Swap z and x
		744	XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,0,1,2));
		745	// Convert input to vector
		746	XMVECTOR vTemp = _mm_set_ss(z);
		747	// Replace the x component
		748	vResult = _mm_move_ss(vResult,vTemp);
		749	// Swap z and x again
		750	vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,0,1,2));
		751	return vResult;
		752	#endif // _XM_ISVS2005_
		753	#else // _XM_VMX128_INTRINSICS_
		754	#endif // _XM_VMX128_INTRINSICS_
		755	}
		756
		757	// Sets the W component of a vector to a passed floating point value
		758	// This causes Load/Hit/Store on VMX targets
		759	XMFINLINE XMVECTOR XMVectorSetW(FXMVECTOR V, FLOAT w)
		760	{
		761	#if defined(_XM_NO_INTRINSICS_)
		762	XMVECTOR U;
		763	U.vector4_f32[0] = V.vector4_f32[0];
		764	U.vector4_f32[1] = V.vector4_f32[1];
		765	U.vector4_f32[2] = V.vector4_f32[2];
		766	U.vector4_f32[3] = w;
		767	return U;
		768	#elif defined(_XM_SSE_INTRINSICS_)
		769	#if defined(_XM_ISVS2005_)
		770	XMVECTOR vResult = V;
		771	vResult.m128_f32[3] = w;
		772	return vResult;
		773	#else
		774	// Swap w and x
		775	XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,2,1,3));
		776	// Convert input to vector
		777	XMVECTOR vTemp = _mm_set_ss(w);
		778	// Replace the x component
		779	vResult = _mm_move_ss(vResult,vTemp);
		780	// Swap w and x again
		781	vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,2,1,3));
		782	return vResult;
		783	#endif // _XM_ISVS2005_
		784	#else // _XM_VMX128_INTRINSICS_
		785	#endif // _XM_VMX128_INTRINSICS_
		786	}
		787
		788	//------------------------------------------------------------------------------
		789
		790	// Sets a component of a vector to a floating point value passed by pointer
		791	// This causes Load/Hit/Store on VMX targets
		792	XMFINLINE XMVECTOR XMVectorSetByIndexPtr(FXMVECTOR V,CONST FLOAT *f,UINT i)
		793	{
		794	#if defined(_XM_NO_INTRINSICS_)
		795	XMVECTOR U;
		796	XMASSERT( f != 0 );
		797	XMASSERT( i <= 3 );
		798	U = V;
		799	U.vector4_f32[i] = *f;
		800	return U;
		801	#elif defined(_XM_SSE_INTRINSICS_)
		802	XMASSERT( f != 0 );
		803	XMASSERT( i <= 3 );
		804	XMVECTOR U = V;
		805	U.m128_f32[i] = *f;
		806	return U;
		807	#else // _XM_VMX128_INTRINSICS_
		808	#endif // _XM_VMX128_INTRINSICS_
		809	}
		810
		811	//------------------------------------------------------------------------------
		812
		813	// Sets the X component of a vector to a floating point value passed by pointer
		814	XMFINLINE XMVECTOR XMVectorSetXPtr(FXMVECTOR V,CONST FLOAT *x)
		815	{
		816	#if defined(_XM_NO_INTRINSICS_)
		817	XMVECTOR U;
		818	XMASSERT( x != 0 );
		819	U.vector4_f32[0] = *x;
		820	U.vector4_f32[1] = V.vector4_f32[1];
		821	U.vector4_f32[2] = V.vector4_f32[2];
		822	U.vector4_f32[3] = V.vector4_f32[3];
		823	return U;
		824	#elif defined(_XM_SSE_INTRINSICS_)
		825	XMASSERT( x != 0 );
		826	XMVECTOR vResult = _mm_load_ss(x);
		827	vResult = _mm_move_ss(V,vResult);
		828	return vResult;
		829	#else // _XM_VMX128_INTRINSICS_
		830	#endif // _XM_VMX128_INTRINSICS_
		831	}
		832
		833	// Sets the Y component of a vector to a floating point value passed by pointer
		834	XMFINLINE XMVECTOR XMVectorSetYPtr(FXMVECTOR V,CONST FLOAT *y)
		835	{
		836	#if defined(_XM_NO_INTRINSICS_)
		837	XMVECTOR U;
		838	XMASSERT( y != 0 );
		839	U.vector4_f32[0] = V.vector4_f32[0];
		840	U.vector4_f32[1] = *y;
		841	U.vector4_f32[2] = V.vector4_f32[2];
		842	U.vector4_f32[3] = V.vector4_f32[3];
		843	return U;
		844	#elif defined(_XM_SSE_INTRINSICS_)
		845	XMASSERT( y != 0 );
		846	// Swap y and x
		847	XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,2,0,1));
		848	// Convert input to vector
		849	XMVECTOR vTemp = _mm_load_ss(y);
		850	// Replace the x component
		851	vResult = _mm_move_ss(vResult,vTemp);
		852	// Swap y and x again
		853	vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,2,0,1));
		854	return vResult;
		855	#else // _XM_VMX128_INTRINSICS_
		856	#endif // _XM_VMX128_INTRINSICS_
		857	}
		858
		859	// Sets the Z component of a vector to a floating point value passed by pointer
		860	XMFINLINE XMVECTOR XMVectorSetZPtr(FXMVECTOR V,CONST FLOAT *z)
		861	{
		862	#if defined(_XM_NO_INTRINSICS_)
		863	XMVECTOR U;
		864	XMASSERT( z != 0 );
		865	U.vector4_f32[0] = V.vector4_f32[0];
		866	U.vector4_f32[1] = V.vector4_f32[1];
		867	U.vector4_f32[2] = *z;
		868	U.vector4_f32[3] = V.vector4_f32[3];
		869	return U;
		870	#elif defined(_XM_SSE_INTRINSICS_)
		871	XMASSERT( z != 0 );
		872	// Swap z and x
		873	XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,0,1,2));
		874	// Convert input to vector
		875	XMVECTOR vTemp = _mm_load_ss(z);
		876	// Replace the x component
		877	vResult = _mm_move_ss(vResult,vTemp);
		878	// Swap z and x again
		879	vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,0,1,2));
		880	return vResult;
		881	#else // _XM_VMX128_INTRINSICS_
		882	#endif // _XM_VMX128_INTRINSICS_
		883	}
		884
		885	// Sets the W component of a vector to a floating point value passed by pointer
		886	XMFINLINE XMVECTOR XMVectorSetWPtr(FXMVECTOR V,CONST FLOAT *w)
		887	{
		888	#if defined(_XM_NO_INTRINSICS_)
		889	XMVECTOR U;
		890	XMASSERT( w != 0 );
		891	U.vector4_f32[0] = V.vector4_f32[0];
		892	U.vector4_f32[1] = V.vector4_f32[1];
		893	U.vector4_f32[2] = V.vector4_f32[2];
		894	U.vector4_f32[3] = *w;
		895	return U;
		896	#elif defined(_XM_SSE_INTRINSICS_)
		897	XMASSERT( w != 0 );
		898	// Swap w and x
		899	XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,2,1,3));
		900	// Convert input to vector
		901	XMVECTOR vTemp = _mm_load_ss(w);
		902	// Replace the x component
		903	vResult = _mm_move_ss(vResult,vTemp);
		904	// Swap w and x again
		905	vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,2,1,3));
		906	return vResult;
		907	#else // _XM_VMX128_INTRINSICS_
		908	#endif // _XM_VMX128_INTRINSICS_
		909	}
		910
		911	//------------------------------------------------------------------------------
		912
		913	// Sets a component of a vector to an integer passed by value
		914	// This causes Load/Hit/Store on VMX targets
		915	XMFINLINE XMVECTOR XMVectorSetIntByIndex(FXMVECTOR V, UINT x, UINT i)
		916	{
		917	#if defined(_XM_NO_INTRINSICS_)
		918	XMVECTOR U;
		919	XMASSERT( i <= 3 );
		920	U = V;
		921	U.vector4_u32[i] = x;
		922	return U;
		923	#elif defined(_XM_SSE_INTRINSICS_)
		924	XMASSERT( i <= 3 );
		925	XMVECTORU32 tmp;
		926	tmp.v = V;
		927	tmp.u[i] = x;
		928	return tmp;
		929	#else // _XM_VMX128_INTRINSICS_
		930	#endif // _XM_VMX128_INTRINSICS_
		931	}
		932
		933	//------------------------------------------------------------------------------
		934
		935	// Sets the X component of a vector to an integer passed by value
		936	// This causes Load/Hit/Store on VMX targets
		937	XMFINLINE XMVECTOR XMVectorSetIntX(FXMVECTOR V, UINT x)
		938	{
		939	#if defined(_XM_NO_INTRINSICS_)
		940	XMVECTOR U;
		941	U.vector4_u32[0] = x;
		942	U.vector4_u32[1] = V.vector4_u32[1];
		943	U.vector4_u32[2] = V.vector4_u32[2];
		944	U.vector4_u32[3] = V.vector4_u32[3];
		945	return U;
		946	#elif defined(_XM_SSE_INTRINSICS_)
		947	#if defined(_XM_ISVS2005_)
		948	XMVECTOR vResult = V;
		949	vResult.m128_i32[0] = x;
		950	return vResult;
		951	#else
		952	__m128i vTemp = _mm_cvtsi32_si128(x);
		953	XMVECTOR vResult = _mm_move_ss(V,reinterpret_cast<const __m128 *>(&vTemp)[0]);
		954	return vResult;
		955	#endif // _XM_ISVS2005_
		956	#else // _XM_VMX128_INTRINSICS_
		957	#endif // _XM_VMX128_INTRINSICS_
		958	}
		959
		960	// Sets the Y component of a vector to an integer passed by value
		961	// This causes Load/Hit/Store on VMX targets
		962	XMFINLINE XMVECTOR XMVectorSetIntY(FXMVECTOR V, UINT y)
		963	{
		964	#if defined(_XM_NO_INTRINSICS_)
		965	XMVECTOR U;
		966	U.vector4_u32[0] = V.vector4_u32[0];
		967	U.vector4_u32[1] = y;
		968	U.vector4_u32[2] = V.vector4_u32[2];
		969	U.vector4_u32[3] = V.vector4_u32[3];
		970	return U;
		971	#elif defined(_XM_SSE_INTRINSICS_)
		972	#if defined(_XM_ISVS2005_)
		973	XMVECTOR vResult = V;
		974	vResult.m128_i32[1] = y;
		975	return vResult;
		976	#else // Swap y and x
		977	XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,2,0,1));
		978	// Convert input to vector
		979	__m128i vTemp = _mm_cvtsi32_si128(y);
		980	// Replace the x component
		981	vResult = _mm_move_ss(vResult,reinterpret_cast<const __m128 *>(&vTemp)[0]);
		982	// Swap y and x again
		983	vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,2,0,1));
		984	return vResult;
		985	#endif // _XM_ISVS2005_
		986	#else // _XM_VMX128_INTRINSICS_
		987	#endif // _XM_VMX128_INTRINSICS_
		988	}
		989
		990	// Sets the Z component of a vector to an integer passed by value
		991	// This causes Load/Hit/Store on VMX targets
		992	XMFINLINE XMVECTOR XMVectorSetIntZ(FXMVECTOR V, UINT z)
		993	{
		994	#if defined(_XM_NO_INTRINSICS_)
		995	XMVECTOR U;
		996	U.vector4_u32[0] = V.vector4_u32[0];
		997	U.vector4_u32[1] = V.vector4_u32[1];
		998	U.vector4_u32[2] = z;
		999	U.vector4_u32[3] = V.vector4_u32[3];
		1000	return U;
		1001	#elif defined(_XM_SSE_INTRINSICS_)
		1002	#if defined(_XM_ISVS2005_)
		1003	XMVECTOR vResult = V;
		1004	vResult.m128_i32[2] = z;
		1005	return vResult;
		1006	#else
		1007	// Swap z and x
		1008	XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,0,1,2));
		1009	// Convert input to vector
		1010	__m128i vTemp = _mm_cvtsi32_si128(z);
		1011	// Replace the x component
		1012	vResult = _mm_move_ss(vResult,reinterpret_cast<const __m128 *>(&vTemp)[0]);
		1013	// Swap z and x again
		1014	vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,0,1,2));
		1015	return vResult;
		1016	#endif // _XM_ISVS2005_
		1017	#else // _XM_VMX128_INTRINSICS_
		1018	#endif // _XM_VMX128_INTRINSICS_
		1019	}
		1020
		1021	// Sets the W component of a vector to an integer passed by value
		1022	// This causes Load/Hit/Store on VMX targets
		1023	XMFINLINE XMVECTOR XMVectorSetIntW(FXMVECTOR V, UINT w)
		1024	{
		1025	#if defined(_XM_NO_INTRINSICS_)
		1026	XMVECTOR U;
		1027	U.vector4_u32[0] = V.vector4_u32[0];
		1028	U.vector4_u32[1] = V.vector4_u32[1];
		1029	U.vector4_u32[2] = V.vector4_u32[2];
		1030	U.vector4_u32[3] = w;
		1031	return U;
		1032	#elif defined(_XM_SSE_INTRINSICS_)
		1033	#if defined(_XM_ISVS2005_)
		1034	XMVECTOR vResult = V;
		1035	vResult.m128_i32[3] = w;
		1036	return vResult;
		1037	#else
		1038	// Swap w and x
		1039	XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,2,1,3));
		1040	// Convert input to vector
		1041	__m128i vTemp = _mm_cvtsi32_si128(w);
		1042	// Replace the x component
		1043	vResult = _mm_move_ss(vResult,reinterpret_cast<const __m128 *>(&vTemp)[0]);
		1044	// Swap w and x again
		1045	vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,2,1,3));
		1046	return vResult;
		1047	#endif // _XM_ISVS2005_
		1048	#else // _XM_VMX128_INTRINSICS_
		1049	#endif // _XM_VMX128_INTRINSICS_
		1050	}
		1051
		1052	//------------------------------------------------------------------------------
		1053
		1054	// Sets a component of a vector to an integer value passed by pointer
		1055	// This causes Load/Hit/Store on VMX targets
		1056	XMFINLINE XMVECTOR XMVectorSetIntByIndexPtr(FXMVECTOR V, CONST UINT *x,UINT i)
		1057	{
		1058	#if defined(_XM_NO_INTRINSICS_)
		1059	XMVECTOR U;
		1060	XMASSERT( x != 0 );
		1061	XMASSERT( i <= 3 );
		1062	U = V;
		1063	U.vector4_u32[i] = *x;
		1064	return U;
		1065	#elif defined(_XM_SSE_INTRINSICS_)
		1066	XMASSERT( x != 0 );
		1067	XMASSERT( i <= 3 );
		1068	XMVECTORU32 tmp;
		1069	tmp.v = V;
		1070	tmp.u[i] = *x;
		1071	return tmp;
		1072	#else // _XM_VMX128_INTRINSICS_
		1073	#endif // _XM_VMX128_INTRINSICS_
		1074	}
		1075
		1076	//------------------------------------------------------------------------------
		1077
		1078	// Sets the X component of a vector to an integer value passed by pointer
		1079	XMFINLINE XMVECTOR XMVectorSetIntXPtr(FXMVECTOR V,CONST UINT *x)
		1080	{
		1081	#if defined(_XM_NO_INTRINSICS_)
		1082	XMVECTOR U;
		1083	XMASSERT( x != 0 );
		1084	U.vector4_u32[0] = *x;
		1085	U.vector4_u32[1] = V.vector4_u32[1];
		1086	U.vector4_u32[2] = V.vector4_u32[2];
		1087	U.vector4_u32[3] = V.vector4_u32[3];
		1088	return U;
		1089	#elif defined(_XM_SSE_INTRINSICS_)
		1090	XMASSERT( x != 0 );
		1091	XMVECTOR vTemp = _mm_load_ss(reinterpret_cast<const float *>(x));
		1092	XMVECTOR vResult = _mm_move_ss(V,vTemp);
		1093	return vResult;
		1094	#else // _XM_VMX128_INTRINSICS_
		1095	#endif // _XM_VMX128_INTRINSICS_
		1096	}
		1097
		1098	// Sets the Y component of a vector to an integer value passed by pointer
		1099	XMFINLINE XMVECTOR XMVectorSetIntYPtr(FXMVECTOR V,CONST UINT *y)
		1100	{
		1101	#if defined(_XM_NO_INTRINSICS_)
		1102	XMVECTOR U;
		1103	XMASSERT( y != 0 );
		1104	U.vector4_u32[0] = V.vector4_u32[0];
		1105	U.vector4_u32[1] = *y;
		1106	U.vector4_u32[2] = V.vector4_u32[2];
		1107	U.vector4_u32[3] = V.vector4_u32[3];
		1108	return U;
		1109	#elif defined(_XM_SSE_INTRINSICS_)
		1110	XMASSERT( y != 0 );
		1111	// Swap y and x
		1112	XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,2,0,1));
		1113	// Convert input to vector
		1114	XMVECTOR vTemp = _mm_load_ss(reinterpret_cast<const float *>(y));
		1115	// Replace the x component
		1116	vResult = _mm_move_ss(vResult,vTemp);
		1117	// Swap y and x again
		1118	vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,2,0,1));
		1119	return vResult;
		1120	#else // _XM_VMX128_INTRINSICS_
		1121	#endif // _XM_VMX128_INTRINSICS_
		1122	}
		1123
		1124	// Sets the Z component of a vector to an integer value passed by pointer
		1125	XMFINLINE XMVECTOR XMVectorSetIntZPtr(FXMVECTOR V,CONST UINT *z)
		1126	{
		1127	#if defined(_XM_NO_INTRINSICS_)
		1128	XMVECTOR U;
		1129	XMASSERT( z != 0 );
		1130	U.vector4_u32[0] = V.vector4_u32[0];
		1131	U.vector4_u32[1] = V.vector4_u32[1];
		1132	U.vector4_u32[2] = *z;
		1133	U.vector4_u32[3] = V.vector4_u32[3];
		1134	return U;
		1135	#elif defined(_XM_SSE_INTRINSICS_)
		1136	XMASSERT( z != 0 );
		1137	// Swap z and x
		1138	XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,0,1,2));
		1139	// Convert input to vector
		1140	XMVECTOR vTemp = _mm_load_ss(reinterpret_cast<const float *>(z));
		1141	// Replace the x component
		1142	vResult = _mm_move_ss(vResult,vTemp);
		1143	// Swap z and x again
		1144	vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,0,1,2));
		1145	return vResult;
		1146	#else // _XM_VMX128_INTRINSICS_
		1147	#endif // _XM_VMX128_INTRINSICS_
		1148	}
		1149
		1150	// Sets the W component of a vector to an integer value passed by pointer
		1151	XMFINLINE XMVECTOR XMVectorSetIntWPtr(FXMVECTOR V,CONST UINT *w)
		1152	{
		1153	#if defined(_XM_NO_INTRINSICS_)
		1154	XMVECTOR U;
		1155	XMASSERT( w != 0 );
		1156	U.vector4_u32[0] = V.vector4_u32[0];
		1157	U.vector4_u32[1] = V.vector4_u32[1];
		1158	U.vector4_u32[2] = V.vector4_u32[2];
		1159	U.vector4_u32[3] = *w;
		1160	return U;
		1161	#elif defined(_XM_SSE_INTRINSICS_)
		1162	XMASSERT( w != 0 );
		1163	// Swap w and x
		1164	XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,2,1,3));
		1165	// Convert input to vector
		1166	XMVECTOR vTemp = _mm_load_ss(reinterpret_cast<const float *>(w));
		1167	// Replace the x component
		1168	vResult = _mm_move_ss(vResult,vTemp);
		1169	// Swap w and x again
		1170	vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,2,1,3));
		1171	return vResult;
		1172	#else // _XM_VMX128_INTRINSICS_
		1173	#endif // _XM_VMX128_INTRINSICS_
		1174	}
		1175
		1176	//------------------------------------------------------------------------------
		1177	// Define a control vector to be used in XMVectorPermute
		1178	// operations. Visualize the two vectors V1 and V2 given
		1179	// in a permute as arranged back to back in a linear fashion,
		1180	// such that they form an array of 8 floating point values.
		1181	// The four integers specified in XMVectorPermuteControl
		1182	// will serve as indices into the array to select components
		1183	// from the two vectors. ElementIndex0 is used to select
		1184	// an element from the vectors to be placed in the first
		1185	// component of the resulting vector, ElementIndex1 is used
		1186	// to select an element for the second component, etc.
		1187
		1188	XMFINLINE XMVECTOR XMVectorPermuteControl
		1189	(
		1190	UINT ElementIndex0,
		1191	UINT ElementIndex1,
		1192	UINT ElementIndex2,
		1193	UINT ElementIndex3
		1194	)
		1195	{
		1196	#if defined(_XM_SSE_INTRINSICS_) \|\| defined(_XM_NO_INTRINSICS_)
		1197	XMVECTORU32 vControl;
		1198	static CONST UINT ControlElement[] = {
		1199	XM_PERMUTE_0X,
		1200	XM_PERMUTE_0Y,
		1201	XM_PERMUTE_0Z,
		1202	XM_PERMUTE_0W,
		1203	XM_PERMUTE_1X,
		1204	XM_PERMUTE_1Y,
		1205	XM_PERMUTE_1Z,
		1206	XM_PERMUTE_1W
		1207	};
		1208	XMASSERT(ElementIndex0 < 8);
		1209	XMASSERT(ElementIndex1 < 8);
		1210	XMASSERT(ElementIndex2 < 8);
		1211	XMASSERT(ElementIndex3 < 8);
		1212
		1213	vControl.u[0] = ControlElement[ElementIndex0];
		1214	vControl.u[1] = ControlElement[ElementIndex1];
		1215	vControl.u[2] = ControlElement[ElementIndex2];
		1216	vControl.u[3] = ControlElement[ElementIndex3];
		1217	return vControl.v;
		1218	#else
		1219	#endif
		1220	}
		1221
		1222	//------------------------------------------------------------------------------
		1223
		1224	// Using a control vector made up of 16 bytes from 0-31, remap V1 and V2's byte
		1225	// entries into a single 16 byte vector and return it. Index 0-15 = V1,
		1226	// 16-31 = V2
		1227	XMFINLINE XMVECTOR XMVectorPermute
		1228	(
		1229	FXMVECTOR V1,
		1230	FXMVECTOR V2,
		1231	FXMVECTOR Control
		1232	)
		1233	{
		1234	#if defined(_XM_NO_INTRINSICS_)
		1235	const BYTE *aByte[2];
		1236	XMVECTOR Result;
		1237	UINT i, uIndex, VectorIndex;
		1238	const BYTE *pControl;
		1239	BYTE *pWork;
		1240
		1241	// Indices must be in range from 0 to 31
		1242	XMASSERT((Control.vector4_u32[0] & 0xE0E0E0E0) == 0);
		1243	XMASSERT((Control.vector4_u32[1] & 0xE0E0E0E0) == 0);
		1244	XMASSERT((Control.vector4_u32[2] & 0xE0E0E0E0) == 0);
		1245	XMASSERT((Control.vector4_u32[3] & 0xE0E0E0E0) == 0);
		1246
		1247	// 0-15 = V1, 16-31 = V2
		1248	aByte[0] = (const BYTE*)(&V1);
		1249	aByte[1] = (const BYTE*)(&V2);
		1250	i = 16;
		1251	pControl = (const BYTE *)(&Control);
		1252	pWork = (BYTE *)(&Result);
		1253	do {
		1254	// Get the byte to map from
		1255	uIndex = pControl[0];
		1256	++pControl;
		1257	VectorIndex = (uIndex>>4)&1;
		1258	uIndex &= 0x0F;
		1259	#if defined(_XM_X86_) \|\| defined(_XM_X64_)
		1260	uIndex ^= 3; // Swap byte ordering on little endian machines
		1261	#endif
		1262	pWork[0] = aByte[VectorIndex][uIndex];
		1263	++pWork;
		1264	} while (--i);
		1265	return Result;
		1266	#elif defined(_XM_SSE_INTRINSICS_)
		1267	#if defined(_PREFAST_) \|\| defined(XMDEBUG)
		1268	// Indices must be in range from 0 to 31
		1269	static const XMVECTORI32 PremuteTest = {0xE0E0E0E0,0xE0E0E0E0,0xE0E0E0E0,0xE0E0E0E0};
		1270	XMVECTOR vAssert = _mm_and_ps(Control,PremuteTest);
		1271	__m128i vAsserti = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&vAssert)[0],g_XMZero);
		1272	XMASSERT(_mm_movemask_ps(reinterpret_cast<const __m128 >(&vAsserti)) == 0xf);
		1273	#endif
		1274	// Store the vectors onto local memory on the stack
		1275	XMVECTOR Array[2];
		1276	Array[0] = V1;
		1277	Array[1] = V2;
		1278	// Output vector, on the stack
		1279	XMVECTORU8 vResult;
		1280	// Get pointer to the two vectors on the stack
		1281	const BYTE pInput = reinterpret_cast<const BYTE >(Array);
		1282	// Store the Control vector on the stack to access the bytes
		1283	// don't use Control, it can cause a register variable to spill on the stack.
		1284	XMVECTORU8 vControl;
		1285	vControl.v = Control; // Write to memory
		1286	UINT i = 0;
		1287	do {
		1288	UINT ComponentIndex = vControl.u[i] & 0x1FU;
		1289	ComponentIndex ^= 3; // Swap byte ordering
		1290	vResult.u[i] = pInput[ComponentIndex];
		1291	} while (++i<16);
		1292	return vResult;
		1293	#else // _XM_SSE_INTRINSICS_
		1294	#endif // _XM_VMX128_INTRINSICS_
		1295	}
		1296
		1297	//------------------------------------------------------------------------------
		1298	// Define a control vector to be used in XMVectorSelect
		1299	// operations. The four integers specified in XMVectorSelectControl
		1300	// serve as indices to select between components in two vectors.
		1301	// The first index controls selection for the first component of
		1302	// the vectors involved in a select operation, the second index
		1303	// controls selection for the second component etc. A value of
		1304	// zero for an index causes the corresponding component from the first
		1305	// vector to be selected whereas a one causes the component from the
		1306	// second vector to be selected instead.
		1307
		1308	XMFINLINE XMVECTOR XMVectorSelectControl
		1309	(
		1310	UINT VectorIndex0,
		1311	UINT VectorIndex1,
		1312	UINT VectorIndex2,
		1313	UINT VectorIndex3
		1314	)
		1315	{
		1316	#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
		1317	// x=Index0,y=Index1,z=Index2,w=Index3
		1318	__m128i vTemp = _mm_set_epi32(VectorIndex3,VectorIndex2,VectorIndex1,VectorIndex0);
		1319	// Any non-zero entries become 0xFFFFFFFF else 0
		1320	vTemp = _mm_cmpgt_epi32(vTemp,g_XMZero);
		1321	return reinterpret_cast<__m128 *>(&vTemp)[0];
		1322	#else
		1323	XMVECTOR ControlVector;
		1324	CONST UINT ControlElement[] =
		1325	{
		1326	XM_SELECT_0,
		1327	XM_SELECT_1
		1328	};
		1329
		1330	XMASSERT(VectorIndex0 < 2);
		1331	XMASSERT(VectorIndex1 < 2);
		1332	XMASSERT(VectorIndex2 < 2);
		1333	XMASSERT(VectorIndex3 < 2);
		1334
		1335	ControlVector.vector4_u32[0] = ControlElement[VectorIndex0];
		1336	ControlVector.vector4_u32[1] = ControlElement[VectorIndex1];
		1337	ControlVector.vector4_u32[2] = ControlElement[VectorIndex2];
		1338	ControlVector.vector4_u32[3] = ControlElement[VectorIndex3];
		1339
		1340	return ControlVector;
		1341
		1342	#endif
		1343	}
		1344
		1345	//------------------------------------------------------------------------------
		1346
		1347	XMFINLINE XMVECTOR XMVectorSelect
		1348	(
		1349	FXMVECTOR V1,
		1350	FXMVECTOR V2,
		1351	FXMVECTOR Control
		1352	)
		1353	{
		1354	#if defined(_XM_NO_INTRINSICS_)
		1355
		1356	XMVECTOR Result;
		1357
		1358	Result.vector4_u32[0] = (V1.vector4_u32[0] & ~Control.vector4_u32[0]) \| (V2.vector4_u32[0] & Control.vector4_u32[0]);
		1359	Result.vector4_u32[1] = (V1.vector4_u32[1] & ~Control.vector4_u32[1]) \| (V2.vector4_u32[1] & Control.vector4_u32[1]);
		1360	Result.vector4_u32[2] = (V1.vector4_u32[2] & ~Control.vector4_u32[2]) \| (V2.vector4_u32[2] & Control.vector4_u32[2]);
		1361	Result.vector4_u32[3] = (V1.vector4_u32[3] & ~Control.vector4_u32[3]) \| (V2.vector4_u32[3] & Control.vector4_u32[3]);
		1362
		1363	return Result;
		1364
		1365	#elif defined(_XM_SSE_INTRINSICS_)
		1366	XMVECTOR vTemp1 = _mm_andnot_ps(Control,V1);
		1367	XMVECTOR vTemp2 = _mm_and_ps(V2,Control);
		1368	return _mm_or_ps(vTemp1,vTemp2);
		1369	#else // _XM_VMX128_INTRINSICS_
		1370	#endif // _XM_VMX128_INTRINSICS_
		1371	}
		1372
		1373	//------------------------------------------------------------------------------
		1374
		1375	XMFINLINE XMVECTOR XMVectorMergeXY
		1376	(
		1377	FXMVECTOR V1,
		1378	FXMVECTOR V2
		1379	)
		1380	{
		1381	#if defined(_XM_NO_INTRINSICS_)
		1382
		1383	XMVECTOR Result;
		1384
		1385	Result.vector4_u32[0] = V1.vector4_u32[0];
		1386	Result.vector4_u32[1] = V2.vector4_u32[0];
		1387	Result.vector4_u32[2] = V1.vector4_u32[1];
		1388	Result.vector4_u32[3] = V2.vector4_u32[1];
		1389
		1390	return Result;
		1391
		1392	#elif defined(_XM_SSE_INTRINSICS_)
		1393	return _mm_unpacklo_ps( V1, V2 );
		1394	#else // _XM_VMX128_INTRINSICS_
		1395	#endif // _XM_VMX128_INTRINSICS_
		1396	}
		1397
		1398	//------------------------------------------------------------------------------
		1399
		1400	XMFINLINE XMVECTOR XMVectorMergeZW
		1401	(
		1402	FXMVECTOR V1,
		1403	FXMVECTOR V2
		1404	)
		1405	{
		1406	#if defined(_XM_NO_INTRINSICS_)
		1407
		1408	XMVECTOR Result;
		1409
		1410	Result.vector4_u32[0] = V1.vector4_u32[2];
		1411	Result.vector4_u32[1] = V2.vector4_u32[2];
		1412	Result.vector4_u32[2] = V1.vector4_u32[3];
		1413	Result.vector4_u32[3] = V2.vector4_u32[3];
		1414
		1415	return Result;
		1416
		1417	#elif defined(_XM_SSE_INTRINSICS_)
		1418	return _mm_unpackhi_ps( V1, V2 );
		1419	#else // _XM_VMX128_INTRINSICS_
		1420	#endif // _XM_VMX128_INTRINSICS_
		1421	}
		1422
		1423	//------------------------------------------------------------------------------
		1424	// Comparison operations
		1425	//------------------------------------------------------------------------------
		1426
		1427	//------------------------------------------------------------------------------
		1428
		1429	XMFINLINE XMVECTOR XMVectorEqual
		1430	(
		1431	FXMVECTOR V1,
		1432	FXMVECTOR V2
		1433	)
		1434	{
		1435	#if defined(_XM_NO_INTRINSICS_)
		1436
		1437	XMVECTOR Control;
		1438
		1439	Control.vector4_u32[0] = (V1.vector4_f32[0] == V2.vector4_f32[0]) ? 0xFFFFFFFF : 0;
		1440	Control.vector4_u32[1] = (V1.vector4_f32[1] == V2.vector4_f32[1]) ? 0xFFFFFFFF : 0;
		1441	Control.vector4_u32[2] = (V1.vector4_f32[2] == V2.vector4_f32[2]) ? 0xFFFFFFFF : 0;
		1442	Control.vector4_u32[3] = (V1.vector4_f32[3] == V2.vector4_f32[3]) ? 0xFFFFFFFF : 0;
		1443
		1444	return Control;
		1445
		1446	#elif defined(_XM_SSE_INTRINSICS_)
		1447	return _mm_cmpeq_ps( V1, V2 );
		1448	#else // _XM_VMX128_INTRINSICS_
		1449	#endif // _XM_VMX128_INTRINSICS_
		1450	}
		1451
		1452	//------------------------------------------------------------------------------
		1453
		1454	XMFINLINE XMVECTOR XMVectorEqualR
		1455	(
		1456	UINT* pCR,
		1457	FXMVECTOR V1,
		1458	FXMVECTOR V2
		1459	)
		1460	{
		1461	#if defined(_XM_NO_INTRINSICS_)
		1462	UINT ux, uy, uz, uw, CR;
		1463	XMVECTOR Control;
		1464
		1465	XMASSERT( pCR );
		1466
		1467	ux = (V1.vector4_f32[0] == V2.vector4_f32[0]) ? 0xFFFFFFFFU : 0;
		1468	uy = (V1.vector4_f32[1] == V2.vector4_f32[1]) ? 0xFFFFFFFFU : 0;
		1469	uz = (V1.vector4_f32[2] == V2.vector4_f32[2]) ? 0xFFFFFFFFU : 0;
		1470	uw = (V1.vector4_f32[3] == V2.vector4_f32[3]) ? 0xFFFFFFFFU : 0;
		1471	CR = 0;
		1472	if (ux&uy&uz&uw)
		1473	{
		1474	// All elements are greater
		1475	CR = XM_CRMASK_CR6TRUE;
		1476	}
		1477	else if (!(ux\|uy\|uz\|uw))
		1478	{
		1479	// All elements are not greater
		1480	CR = XM_CRMASK_CR6FALSE;
		1481	}
		1482	*pCR = CR;
		1483	Control.vector4_u32[0] = ux;
		1484	Control.vector4_u32[1] = uy;
		1485	Control.vector4_u32[2] = uz;
		1486	Control.vector4_u32[3] = uw;
		1487	return Control;
		1488
		1489	#elif defined(_XM_SSE_INTRINSICS_)
		1490	XMASSERT( pCR );
		1491	XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
		1492	UINT CR = 0;
		1493	int iTest = _mm_movemask_ps(vTemp);
		1494	if (iTest==0xf)
		1495	{
		1496	CR = XM_CRMASK_CR6TRUE;
		1497	}
		1498	else if (!iTest)
		1499	{
		1500	// All elements are not greater
		1501	CR = XM_CRMASK_CR6FALSE;
		1502	}
		1503	*pCR = CR;
		1504	return vTemp;
		1505	#else // _XM_VMX128_INTRINSICS_
		1506	#endif // _XM_VMX128_INTRINSICS_
		1507	}
		1508
		1509	//------------------------------------------------------------------------------
		1510	// Treat the components of the vectors as unsigned integers and
		1511	// compare individual bits between the two. This is useful for
		1512	// comparing control vectors and result vectors returned from
		1513	// other comparison operations.
		1514
		1515	XMFINLINE XMVECTOR XMVectorEqualInt
		1516	(
		1517	FXMVECTOR V1,
		1518	FXMVECTOR V2
		1519	)
		1520	{
		1521	#if defined(_XM_NO_INTRINSICS_)
		1522
		1523	XMVECTOR Control;
		1524
		1525	Control.vector4_u32[0] = (V1.vector4_u32[0] == V2.vector4_u32[0]) ? 0xFFFFFFFF : 0;
		1526	Control.vector4_u32[1] = (V1.vector4_u32[1] == V2.vector4_u32[1]) ? 0xFFFFFFFF : 0;
		1527	Control.vector4_u32[2] = (V1.vector4_u32[2] == V2.vector4_u32[2]) ? 0xFFFFFFFF : 0;
		1528	Control.vector4_u32[3] = (V1.vector4_u32[3] == V2.vector4_u32[3]) ? 0xFFFFFFFF : 0;
		1529
		1530	return Control;
		1531
		1532	#elif defined(_XM_SSE_INTRINSICS_)
		1533	__m128i V = _mm_cmpeq_epi32( reinterpret_cast<const __m128i >(&V1)[0],reinterpret_cast<const __m128i >(&V2)[0] );
		1534	return reinterpret_cast<__m128 *>(&V)[0];
		1535	#else // _XM_VMX128_INTRINSICS_
		1536	#endif // _XM_VMX128_INTRINSICS_
		1537	}
		1538
		1539	//------------------------------------------------------------------------------
		1540
		1541	XMFINLINE XMVECTOR XMVectorEqualIntR
		1542	(
		1543	UINT* pCR,
		1544	FXMVECTOR V1,
		1545	FXMVECTOR V2
		1546	)
		1547	{
		1548	#if defined(_XM_NO_INTRINSICS_)
		1549
		1550	XMVECTOR Control;
		1551
		1552	XMASSERT(pCR);
		1553
		1554	Control = XMVectorEqualInt(V1, V2);
		1555
		1556	*pCR = 0;
		1557
		1558	if (XMVector4EqualInt(Control, XMVectorTrueInt()))
		1559	{
		1560	// All elements are equal
		1561	*pCR \|= XM_CRMASK_CR6TRUE;
		1562	}
		1563	else if (XMVector4EqualInt(Control, XMVectorFalseInt()))
		1564	{
		1565	// All elements are not equal
		1566	*pCR \|= XM_CRMASK_CR6FALSE;
		1567	}
		1568
		1569	return Control;
		1570
		1571	#elif defined(_XM_SSE_INTRINSICS_)
		1572	XMASSERT(pCR);
		1573	__m128i V = _mm_cmpeq_epi32( reinterpret_cast<const __m128i >(&V1)[0],reinterpret_cast<const __m128i >(&V2)[0] );
		1574	int iTemp = _mm_movemask_ps(reinterpret_cast<const __m128*>(&V)[0]);
		1575	UINT CR = 0;
		1576	if (iTemp==0x0F)
		1577	{
		1578	CR = XM_CRMASK_CR6TRUE;
		1579	}
		1580	else if (!iTemp)
		1581	{
		1582	CR = XM_CRMASK_CR6FALSE;
		1583	}
		1584	*pCR = CR;
		1585	return reinterpret_cast<__m128 *>(&V)[0];
		1586	#else // _XM_VMX128_INTRINSICS_
		1587	#endif // _XM_VMX128_INTRINSICS_
		1588	}
		1589
		1590	//------------------------------------------------------------------------------
		1591
		1592	XMFINLINE XMVECTOR XMVectorNearEqual
		1593	(
		1594	FXMVECTOR V1,
		1595	FXMVECTOR V2,
		1596	FXMVECTOR Epsilon
		1597	)
		1598	{
		1599	#if defined(_XM_NO_INTRINSICS_)
		1600
		1601	FLOAT fDeltax, fDeltay, fDeltaz, fDeltaw;
		1602	XMVECTOR Control;
		1603
		1604	fDeltax = V1.vector4_f32[0]-V2.vector4_f32[0];
		1605	fDeltay = V1.vector4_f32[1]-V2.vector4_f32[1];
		1606	fDeltaz = V1.vector4_f32[2]-V2.vector4_f32[2];
		1607	fDeltaw = V1.vector4_f32[3]-V2.vector4_f32[3];
		1608
		1609	fDeltax = fabsf(fDeltax);
		1610	fDeltay = fabsf(fDeltay);
		1611	fDeltaz = fabsf(fDeltaz);
		1612	fDeltaw = fabsf(fDeltaw);
		1613
		1614	Control.vector4_u32[0] = (fDeltax <= Epsilon.vector4_f32[0]) ? 0xFFFFFFFFU : 0;
		1615	Control.vector4_u32[1] = (fDeltay <= Epsilon.vector4_f32[1]) ? 0xFFFFFFFFU : 0;
		1616	Control.vector4_u32[2] = (fDeltaz <= Epsilon.vector4_f32[2]) ? 0xFFFFFFFFU : 0;
		1617	Control.vector4_u32[3] = (fDeltaw <= Epsilon.vector4_f32[3]) ? 0xFFFFFFFFU : 0;
		1618
		1619	return Control;
		1620
		1621	#elif defined(_XM_SSE_INTRINSICS_)
		1622	// Get the difference
		1623	XMVECTOR vDelta = _mm_sub_ps(V1,V2);
		1624	// Get the absolute value of the difference
		1625	XMVECTOR vTemp = _mm_setzero_ps();
		1626	vTemp = _mm_sub_ps(vTemp,vDelta);
		1627	vTemp = _mm_max_ps(vTemp,vDelta);
		1628	vTemp = _mm_cmple_ps(vTemp,Epsilon);
		1629	return vTemp;
		1630	#else // _XM_VMX128_INTRINSICS_
		1631	#endif // _XM_VMX128_INTRINSICS_
		1632	}
		1633
		1634	//------------------------------------------------------------------------------
		1635
		1636	XMFINLINE XMVECTOR XMVectorNotEqual
		1637	(
		1638	FXMVECTOR V1,
		1639	FXMVECTOR V2
		1640	)
		1641	{
		1642	#if defined(_XM_NO_INTRINSICS_)
		1643
		1644	XMVECTOR Control;
		1645	Control.vector4_u32[0] = (V1.vector4_f32[0] != V2.vector4_f32[0]) ? 0xFFFFFFFF : 0;
		1646	Control.vector4_u32[1] = (V1.vector4_f32[1] != V2.vector4_f32[1]) ? 0xFFFFFFFF : 0;
		1647	Control.vector4_u32[2] = (V1.vector4_f32[2] != V2.vector4_f32[2]) ? 0xFFFFFFFF : 0;
		1648	Control.vector4_u32[3] = (V1.vector4_f32[3] != V2.vector4_f32[3]) ? 0xFFFFFFFF : 0;
		1649	return Control;
		1650
		1651	#elif defined(_XM_SSE_INTRINSICS_)
		1652	return _mm_cmpneq_ps( V1, V2 );
		1653	#else // _XM_VMX128_INTRINSICS_
		1654	#endif // _XM_VMX128_INTRINSICS_
		1655	}
		1656
		1657	//------------------------------------------------------------------------------
		1658
		1659	XMFINLINE XMVECTOR XMVectorNotEqualInt
		1660	(
		1661	FXMVECTOR V1,
		1662	FXMVECTOR V2
		1663	)
		1664	{
		1665	#if defined(_XM_NO_INTRINSICS_)
		1666
		1667	XMVECTOR Control;
		1668	Control.vector4_u32[0] = (V1.vector4_u32[0] != V2.vector4_u32[0]) ? 0xFFFFFFFFU : 0;
		1669	Control.vector4_u32[1] = (V1.vector4_u32[1] != V2.vector4_u32[1]) ? 0xFFFFFFFFU : 0;
		1670	Control.vector4_u32[2] = (V1.vector4_u32[2] != V2.vector4_u32[2]) ? 0xFFFFFFFFU : 0;
		1671	Control.vector4_u32[3] = (V1.vector4_u32[3] != V2.vector4_u32[3]) ? 0xFFFFFFFFU : 0;
		1672	return Control;
		1673
		1674	#elif defined(_XM_SSE_INTRINSICS_)
		1675	__m128i V = _mm_cmpeq_epi32( reinterpret_cast<const __m128i >(&V1)[0],reinterpret_cast<const __m128i >(&V2)[0] );
		1676	return _mm_xor_ps(reinterpret_cast<__m128 *>(&V)[0],g_XMNegOneMask);
		1677	#else // _XM_VMX128_INTRINSICS_
		1678	#endif // _XM_VMX128_INTRINSICS_
		1679	}
		1680
		1681	//------------------------------------------------------------------------------
		1682
		1683	XMFINLINE XMVECTOR XMVectorGreater
		1684	(
		1685	FXMVECTOR V1,
		1686	FXMVECTOR V2
		1687	)
		1688	{
		1689	#if defined(_XM_NO_INTRINSICS_)
		1690
		1691	XMVECTOR Control;
		1692	Control.vector4_u32[0] = (V1.vector4_f32[0] > V2.vector4_f32[0]) ? 0xFFFFFFFF : 0;
		1693	Control.vector4_u32[1] = (V1.vector4_f32[1] > V2.vector4_f32[1]) ? 0xFFFFFFFF : 0;
		1694	Control.vector4_u32[2] = (V1.vector4_f32[2] > V2.vector4_f32[2]) ? 0xFFFFFFFF : 0;
		1695	Control.vector4_u32[3] = (V1.vector4_f32[3] > V2.vector4_f32[3]) ? 0xFFFFFFFF : 0;
		1696	return Control;
		1697
		1698	#elif defined(_XM_SSE_INTRINSICS_)
		1699	return _mm_cmpgt_ps( V1, V2 );
		1700	#else // _XM_VMX128_INTRINSICS_
		1701	#endif // _XM_VMX128_INTRINSICS_
		1702	}
		1703
		1704	//------------------------------------------------------------------------------
		1705
		1706	XMFINLINE XMVECTOR XMVectorGreaterR
		1707	(
		1708	UINT* pCR,
		1709	FXMVECTOR V1,
		1710	FXMVECTOR V2
		1711	)
		1712	{
		1713	#if defined(_XM_NO_INTRINSICS_)
		1714	UINT ux, uy, uz, uw, CR;
		1715	XMVECTOR Control;
		1716
		1717	XMASSERT( pCR );
		1718
		1719	ux = (V1.vector4_f32[0] > V2.vector4_f32[0]) ? 0xFFFFFFFFU : 0;
		1720	uy = (V1.vector4_f32[1] > V2.vector4_f32[1]) ? 0xFFFFFFFFU : 0;
		1721	uz = (V1.vector4_f32[2] > V2.vector4_f32[2]) ? 0xFFFFFFFFU : 0;
		1722	uw = (V1.vector4_f32[3] > V2.vector4_f32[3]) ? 0xFFFFFFFFU : 0;
		1723	CR = 0;
		1724	if (ux&uy&uz&uw)
		1725	{
		1726	// All elements are greater
		1727	CR = XM_CRMASK_CR6TRUE;
		1728	}
		1729	else if (!(ux\|uy\|uz\|uw))
		1730	{
		1731	// All elements are not greater
		1732	CR = XM_CRMASK_CR6FALSE;
		1733	}
		1734	*pCR = CR;
		1735	Control.vector4_u32[0] = ux;
		1736	Control.vector4_u32[1] = uy;
		1737	Control.vector4_u32[2] = uz;
		1738	Control.vector4_u32[3] = uw;
		1739	return Control;
		1740
		1741	#elif defined(_XM_SSE_INTRINSICS_)
		1742	XMASSERT( pCR );
		1743	XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
		1744	UINT CR = 0;
		1745	int iTest = _mm_movemask_ps(vTemp);
		1746	if (iTest==0xf)
		1747	{
		1748	CR = XM_CRMASK_CR6TRUE;
		1749	}
		1750	else if (!iTest)
		1751	{
		1752	// All elements are not greater
		1753	CR = XM_CRMASK_CR6FALSE;
		1754	}
		1755	*pCR = CR;
		1756	return vTemp;
		1757	#else // _XM_VMX128_INTRINSICS_
		1758	#endif // _XM_VMX128_INTRINSICS_
		1759	}
		1760
		1761	//------------------------------------------------------------------------------
		1762
		1763	XMFINLINE XMVECTOR XMVectorGreaterOrEqual
		1764	(
		1765	FXMVECTOR V1,
		1766	FXMVECTOR V2
		1767	)
		1768	{
		1769	#if defined(_XM_NO_INTRINSICS_)
		1770
		1771	XMVECTOR Control;
		1772	Control.vector4_u32[0] = (V1.vector4_f32[0] >= V2.vector4_f32[0]) ? 0xFFFFFFFF : 0;
		1773	Control.vector4_u32[1] = (V1.vector4_f32[1] >= V2.vector4_f32[1]) ? 0xFFFFFFFF : 0;
		1774	Control.vector4_u32[2] = (V1.vector4_f32[2] >= V2.vector4_f32[2]) ? 0xFFFFFFFF : 0;
		1775	Control.vector4_u32[3] = (V1.vector4_f32[3] >= V2.vector4_f32[3]) ? 0xFFFFFFFF : 0;
		1776	return Control;
		1777
		1778	#elif defined(_XM_SSE_INTRINSICS_)
		1779	return _mm_cmpge_ps( V1, V2 );
		1780	#else // _XM_VMX128_INTRINSICS_
		1781	#endif // _XM_VMX128_INTRINSICS_
		1782	}
		1783
		1784	//------------------------------------------------------------------------------
		1785
		1786	XMFINLINE XMVECTOR XMVectorGreaterOrEqualR
		1787	(
		1788	UINT* pCR,
		1789	FXMVECTOR V1,
		1790	FXMVECTOR V2
		1791	)
		1792	{
		1793	#if defined(_XM_NO_INTRINSICS_)
		1794	UINT ux, uy, uz, uw, CR;
		1795	XMVECTOR Control;
		1796
		1797	XMASSERT( pCR );
		1798
		1799	ux = (V1.vector4_f32[0] >= V2.vector4_f32[0]) ? 0xFFFFFFFFU : 0;
		1800	uy = (V1.vector4_f32[1] >= V2.vector4_f32[1]) ? 0xFFFFFFFFU : 0;
		1801	uz = (V1.vector4_f32[2] >= V2.vector4_f32[2]) ? 0xFFFFFFFFU : 0;
		1802	uw = (V1.vector4_f32[3] >= V2.vector4_f32[3]) ? 0xFFFFFFFFU : 0;
		1803	CR = 0;
		1804	if (ux&uy&uz&uw)
		1805	{
		1806	// All elements are greater
		1807	CR = XM_CRMASK_CR6TRUE;
		1808	}
		1809	else if (!(ux\|uy\|uz\|uw))
		1810	{
		1811	// All elements are not greater
		1812	CR = XM_CRMASK_CR6FALSE;
		1813	}
		1814	*pCR = CR;
		1815	Control.vector4_u32[0] = ux;
		1816	Control.vector4_u32[1] = uy;
		1817	Control.vector4_u32[2] = uz;
		1818	Control.vector4_u32[3] = uw;
		1819	return Control;
		1820
		1821	#elif defined(_XM_SSE_INTRINSICS_)
		1822	XMASSERT( pCR );
		1823	XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
		1824	UINT CR = 0;
		1825	int iTest = _mm_movemask_ps(vTemp);
		1826	if (iTest==0xf)
		1827	{
		1828	CR = XM_CRMASK_CR6TRUE;
		1829	}
		1830	else if (!iTest)
		1831	{
		1832	// All elements are not greater
		1833	CR = XM_CRMASK_CR6FALSE;
		1834	}
		1835	*pCR = CR;
		1836	return vTemp;
		1837	#else // _XM_VMX128_INTRINSICS_
		1838	#endif // _XM_VMX128_INTRINSICS_
		1839	}
		1840
		1841	//------------------------------------------------------------------------------
		1842
		1843	XMFINLINE XMVECTOR XMVectorLess
		1844	(
		1845	FXMVECTOR V1,
		1846	FXMVECTOR V2
		1847	)
		1848	{
		1849	#if defined(_XM_NO_INTRINSICS_)
		1850
		1851	XMVECTOR Control;
		1852	Control.vector4_u32[0] = (V1.vector4_f32[0] < V2.vector4_f32[0]) ? 0xFFFFFFFF : 0;
		1853	Control.vector4_u32[1] = (V1.vector4_f32[1] < V2.vector4_f32[1]) ? 0xFFFFFFFF : 0;
		1854	Control.vector4_u32[2] = (V1.vector4_f32[2] < V2.vector4_f32[2]) ? 0xFFFFFFFF : 0;
		1855	Control.vector4_u32[3] = (V1.vector4_f32[3] < V2.vector4_f32[3]) ? 0xFFFFFFFF : 0;
		1856	return Control;
		1857
		1858	#elif defined(_XM_SSE_INTRINSICS_)
		1859	return _mm_cmplt_ps( V1, V2 );
		1860	#else // _XM_VMX128_INTRINSICS_
		1861	#endif // _XM_VMX128_INTRINSICS_
		1862	}
		1863
		1864	//------------------------------------------------------------------------------
		1865
		1866	XMFINLINE XMVECTOR XMVectorLessOrEqual
		1867	(
		1868	FXMVECTOR V1,
		1869	FXMVECTOR V2
		1870	)
		1871	{
		1872	#if defined(_XM_NO_INTRINSICS_)
		1873
		1874	XMVECTOR Control;
		1875	Control.vector4_u32[0] = (V1.vector4_f32[0] <= V2.vector4_f32[0]) ? 0xFFFFFFFF : 0;
		1876	Control.vector4_u32[1] = (V1.vector4_f32[1] <= V2.vector4_f32[1]) ? 0xFFFFFFFF : 0;
		1877	Control.vector4_u32[2] = (V1.vector4_f32[2] <= V2.vector4_f32[2]) ? 0xFFFFFFFF : 0;
		1878	Control.vector4_u32[3] = (V1.vector4_f32[3] <= V2.vector4_f32[3]) ? 0xFFFFFFFF : 0;
		1879	return Control;
		1880
		1881	#elif defined(_XM_SSE_INTRINSICS_)
		1882	return _mm_cmple_ps( V1, V2 );
		1883	#else // _XM_VMX128_INTRINSICS_
		1884	#endif // _XM_VMX128_INTRINSICS_
		1885	}
		1886
		1887	//------------------------------------------------------------------------------
		1888
		1889	XMFINLINE XMVECTOR XMVectorInBounds
		1890	(
		1891	FXMVECTOR V,
		1892	FXMVECTOR Bounds
		1893	)
		1894	{
		1895	#if defined(_XM_NO_INTRINSICS_)
		1896
		1897	XMVECTOR Control;
		1898	Control.vector4_u32[0] = (V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) ? 0xFFFFFFFF : 0;
		1899	Control.vector4_u32[1] = (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) ? 0xFFFFFFFF : 0;
		1900	Control.vector4_u32[2] = (V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2]) ? 0xFFFFFFFF : 0;
		1901	Control.vector4_u32[3] = (V.vector4_f32[3] <= Bounds.vector4_f32[3] && V.vector4_f32[3] >= -Bounds.vector4_f32[3]) ? 0xFFFFFFFF : 0;
		1902	return Control;
		1903
		1904	#elif defined(_XM_SSE_INTRINSICS_)
		1905	// Test if less than or equal
		1906	XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
		1907	// Negate the bounds
		1908	XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
		1909	// Test if greater or equal (Reversed)
		1910	vTemp2 = _mm_cmple_ps(vTemp2,V);
		1911	// Blend answers
		1912	vTemp1 = _mm_and_ps(vTemp1,vTemp2);
		1913	return vTemp1;
		1914	#else // _XM_VMX128_INTRINSICS_
		1915	#endif // _XM_VMX128_INTRINSICS_
		1916	}
		1917
		1918	//------------------------------------------------------------------------------
		1919
		1920	XMFINLINE XMVECTOR XMVectorInBoundsR
		1921	(
		1922	UINT* pCR,
		1923	FXMVECTOR V,
		1924	FXMVECTOR Bounds
		1925	)
		1926	{
		1927	#if defined(_XM_NO_INTRINSICS_)
		1928	UINT ux, uy, uz, uw, CR;
		1929	XMVECTOR Control;
		1930
		1931	XMASSERT( pCR != 0 );
		1932
		1933	ux = (V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) ? 0xFFFFFFFFU : 0;
		1934	uy = (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) ? 0xFFFFFFFFU : 0;
		1935	uz = (V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2]) ? 0xFFFFFFFFU : 0;
		1936	uw = (V.vector4_f32[3] <= Bounds.vector4_f32[3] && V.vector4_f32[3] >= -Bounds.vector4_f32[3]) ? 0xFFFFFFFFU : 0;
		1937
		1938	CR = 0;
		1939
		1940	if (ux&uy&uz&uw)
		1941	{
		1942	// All elements are in bounds
		1943	CR = XM_CRMASK_CR6BOUNDS;
		1944	}
		1945	*pCR = CR;
		1946	Control.vector4_u32[0] = ux;
		1947	Control.vector4_u32[1] = uy;
		1948	Control.vector4_u32[2] = uz;
		1949	Control.vector4_u32[3] = uw;
		1950	return Control;
		1951
		1952	#elif defined(_XM_SSE_INTRINSICS_)
		1953	XMASSERT( pCR != 0 );
		1954	// Test if less than or equal
		1955	XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
		1956	// Negate the bounds
		1957	XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
		1958	// Test if greater or equal (Reversed)
		1959	vTemp2 = _mm_cmple_ps(vTemp2,V);
		1960	// Blend answers
		1961	vTemp1 = _mm_and_ps(vTemp1,vTemp2);
		1962
		1963	UINT CR = 0;
		1964	if (_mm_movemask_ps(vTemp1)==0xf) {
		1965	// All elements are in bounds
		1966	CR = XM_CRMASK_CR6BOUNDS;
		1967	}
		1968	*pCR = CR;
		1969	return vTemp1;
		1970	#else // _XM_VMX128_INTRINSICS_
		1971	#endif // _XM_VMX128_INTRINSICS_
		1972	}
		1973
		1974	//------------------------------------------------------------------------------
		1975
		1976	XMFINLINE XMVECTOR XMVectorIsNaN
		1977	(
		1978	FXMVECTOR V
		1979	)
		1980	{
		1981	#if defined(_XM_NO_INTRINSICS_)
		1982
		1983	XMVECTOR Control;
		1984	Control.vector4_u32[0] = XMISNAN(V.vector4_f32[0]) ? 0xFFFFFFFFU : 0;
		1985	Control.vector4_u32[1] = XMISNAN(V.vector4_f32[1]) ? 0xFFFFFFFFU : 0;
		1986	Control.vector4_u32[2] = XMISNAN(V.vector4_f32[2]) ? 0xFFFFFFFFU : 0;
		1987	Control.vector4_u32[3] = XMISNAN(V.vector4_f32[3]) ? 0xFFFFFFFFU : 0;
		1988	return Control;
		1989
		1990	#elif defined(_XM_SSE_INTRINSICS_)
		1991	// Mask off the exponent
		1992	__m128i vTempInf = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMInfinity);
		1993	// Mask off the mantissa
		1994	__m128i vTempNan = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMQNaNTest);
		1995	// Are any of the exponents == 0x7F800000?
		1996	vTempInf = _mm_cmpeq_epi32(vTempInf,g_XMInfinity);
		1997	// Are any of the mantissa's zero? (SSE2 doesn't have a neq test)
		1998	vTempNan = _mm_cmpeq_epi32(vTempNan,g_XMZero);
		1999	// Perform a not on the NaN test to be true on NON-zero mantissas
		2000	vTempNan = _mm_andnot_si128(vTempNan,vTempInf);
		2001	// If any are NaN, the signs are true after the merge above
		2002	return reinterpret_cast<const XMVECTOR *>(&vTempNan)[0];
		2003	#else // _XM_VMX128_INTRINSICS_
		2004	#endif // _XM_VMX128_INTRINSICS_
		2005	}
		2006
		2007	//------------------------------------------------------------------------------
		2008
		2009	XMFINLINE XMVECTOR XMVectorIsInfinite
		2010	(
		2011	FXMVECTOR V
		2012	)
		2013	{
		2014	#if defined(_XM_NO_INTRINSICS_)
		2015
		2016	XMVECTOR Control;
		2017	Control.vector4_u32[0] = XMISINF(V.vector4_f32[0]) ? 0xFFFFFFFFU : 0;
		2018	Control.vector4_u32[1] = XMISINF(V.vector4_f32[1]) ? 0xFFFFFFFFU : 0;
		2019	Control.vector4_u32[2] = XMISINF(V.vector4_f32[2]) ? 0xFFFFFFFFU : 0;
		2020	Control.vector4_u32[3] = XMISINF(V.vector4_f32[3]) ? 0xFFFFFFFFU : 0;
		2021	return Control;
		2022
		2023	#elif defined(_XM_SSE_INTRINSICS_)
		2024	// Mask off the sign bit
		2025	__m128 vTemp = _mm_and_ps(V,g_XMAbsMask);
		2026	// Compare to infinity
		2027	vTemp = _mm_cmpeq_ps(vTemp,g_XMInfinity);
		2028	// If any are infinity, the signs are true.
		2029	return vTemp;
		2030	#else // _XM_VMX128_INTRINSICS_
		2031	#endif // _XM_VMX128_INTRINSICS_
		2032	}
		2033
		2034	//------------------------------------------------------------------------------
		2035	// Rounding and clamping operations
		2036	//------------------------------------------------------------------------------
		2037
		2038	//------------------------------------------------------------------------------
		2039
		2040	XMFINLINE XMVECTOR XMVectorMin
		2041	(
		2042	FXMVECTOR V1,
		2043	FXMVECTOR V2
		2044	)
		2045	{
		2046	#if defined(_XM_NO_INTRINSICS_)
		2047
		2048	XMVECTOR Result;
		2049	Result.vector4_f32[0] = (V1.vector4_f32[0] < V2.vector4_f32[0]) ? V1.vector4_f32[0] : V2.vector4_f32[0];
		2050	Result.vector4_f32[1] = (V1.vector4_f32[1] < V2.vector4_f32[1]) ? V1.vector4_f32[1] : V2.vector4_f32[1];
		2051	Result.vector4_f32[2] = (V1.vector4_f32[2] < V2.vector4_f32[2]) ? V1.vector4_f32[2] : V2.vector4_f32[2];
		2052	Result.vector4_f32[3] = (V1.vector4_f32[3] < V2.vector4_f32[3]) ? V1.vector4_f32[3] : V2.vector4_f32[3];
		2053	return Result;
		2054
		2055	#elif defined(_XM_SSE_INTRINSICS_)
		2056	return _mm_min_ps( V1, V2 );
		2057	#else // _XM_VMX128_INTRINSICS_
		2058	#endif // _XM_VMX128_INTRINSICS_
		2059	}
		2060
		2061	//------------------------------------------------------------------------------
		2062
		2063	XMFINLINE XMVECTOR XMVectorMax
		2064	(
		2065	FXMVECTOR V1,
		2066	FXMVECTOR V2
		2067	)
		2068	{
		2069	#if defined(_XM_NO_INTRINSICS_)
		2070
		2071	XMVECTOR Result;
		2072	Result.vector4_f32[0] = (V1.vector4_f32[0] > V2.vector4_f32[0]) ? V1.vector4_f32[0] : V2.vector4_f32[0];
		2073	Result.vector4_f32[1] = (V1.vector4_f32[1] > V2.vector4_f32[1]) ? V1.vector4_f32[1] : V2.vector4_f32[1];
		2074	Result.vector4_f32[2] = (V1.vector4_f32[2] > V2.vector4_f32[2]) ? V1.vector4_f32[2] : V2.vector4_f32[2];
		2075	Result.vector4_f32[3] = (V1.vector4_f32[3] > V2.vector4_f32[3]) ? V1.vector4_f32[3] : V2.vector4_f32[3];
		2076	return Result;
		2077
		2078	#elif defined(_XM_SSE_INTRINSICS_)
		2079	return _mm_max_ps( V1, V2 );
		2080	#else // _XM_VMX128_INTRINSICS_
		2081	#endif // _XM_VMX128_INTRINSICS_
		2082	}
		2083
		2084	//------------------------------------------------------------------------------
		2085
		2086	XMFINLINE XMVECTOR XMVectorRound
		2087	(
		2088	FXMVECTOR V
		2089	)
		2090	{
		2091	#if defined(_XM_NO_INTRINSICS_)
		2092
		2093	XMVECTOR Result;
		2094	XMVECTOR Bias;
		2095	CONST XMVECTOR Zero = XMVectorZero();
		2096	CONST XMVECTOR BiasPos = XMVectorReplicate(0.5f);
		2097	CONST XMVECTOR BiasNeg = XMVectorReplicate(-0.5f);
		2098
		2099	Bias = XMVectorLess(V, Zero);
		2100	Bias = XMVectorSelect(BiasPos, BiasNeg, Bias);
		2101	Result = XMVectorAdd(V, Bias);
		2102	Result = XMVectorTruncate(Result);
		2103
		2104	return Result;
		2105
		2106	#elif defined(_XM_SSE_INTRINSICS_)
		2107	// To handle NAN, INF and numbers greater than 8388608, use masking
		2108	// Get the abs value
		2109	__m128i vTest = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMAbsMask);
		2110	// Test for greater than 8388608 (All floats with NO fractionals, NAN and INF
		2111	vTest = _mm_cmplt_epi32(vTest,g_XMNoFraction);
		2112	// Convert to int and back to float for rounding
		2113	__m128i vInt = _mm_cvtps_epi32(V);
		2114	// Convert back to floats
		2115	XMVECTOR vResult = _mm_cvtepi32_ps(vInt);
		2116	// All numbers less than 8388608 will use the round to int
		2117	vResult = _mm_and_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
		2118	// All others, use the ORIGINAL value
		2119	vTest = _mm_andnot_si128(vTest,reinterpret_cast<const __m128i *>(&V)[0]);
		2120	vResult = _mm_or_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
		2121	return vResult;
		2122	#else // _XM_VMX128_INTRINSICS_
		2123	#endif // _XM_VMX128_INTRINSICS_
		2124	}
		2125
		2126	//------------------------------------------------------------------------------
		2127
		2128	XMFINLINE XMVECTOR XMVectorTruncate
		2129	(
		2130	FXMVECTOR V
		2131	)
		2132	{
		2133	#if defined(_XM_NO_INTRINSICS_)
		2134	XMVECTOR Result;
		2135	Result.vector4_f32[0] = (FLOAT)((INT)V.vector4_f32[0]);
		2136	Result.vector4_f32[1] = (FLOAT)((INT)V.vector4_f32[1]);
		2137	Result.vector4_f32[2] = (FLOAT)((INT)V.vector4_f32[2]);
		2138	Result.vector4_f32[3] = (FLOAT)((INT)V.vector4_f32[3]);
		2139
		2140	return Result;
		2141
		2142	#elif defined(_XM_SSE_INTRINSICS_)
		2143	// To handle NAN, INF and numbers greater than 8388608, use masking
		2144	// Get the abs value
		2145	__m128i vTest = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMAbsMask);
		2146	// Test for greater than 8388608 (All floats with NO fractionals, NAN and INF
		2147	vTest = _mm_cmplt_epi32(vTest,g_XMNoFraction);
		2148	// Convert to int and back to float for rounding with truncation
		2149	__m128i vInt = _mm_cvttps_epi32(V);
		2150	// Convert back to floats
		2151	XMVECTOR vResult = _mm_cvtepi32_ps(vInt);
		2152	// All numbers less than 8388608 will use the round to int
		2153	vResult = _mm_and_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
		2154	// All others, use the ORIGINAL value
		2155	vTest = _mm_andnot_si128(vTest,reinterpret_cast<const __m128i *>(&V)[0]);
		2156	vResult = _mm_or_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
		2157	return vResult;
		2158	#else // _XM_VMX128_INTRINSICS_
		2159	#endif // _XM_VMX128_INTRINSICS_
		2160	}
		2161
		2162	//------------------------------------------------------------------------------
		2163
		2164	XMFINLINE XMVECTOR XMVectorFloor
		2165	(
		2166	FXMVECTOR V
		2167	)
		2168	{
		2169	#if defined(_XM_NO_INTRINSICS_)
		2170
		2171	XMVECTOR vResult = {
		2172	floorf(V.vector4_f32[0]),
		2173	floorf(V.vector4_f32[1]),
		2174	floorf(V.vector4_f32[2]),
		2175	floorf(V.vector4_f32[3])
		2176	};
		2177	return vResult;
		2178
		2179	#elif defined(_XM_SSE_INTRINSICS_)
		2180	XMVECTOR vResult = _mm_sub_ps(V,g_XMOneHalfMinusEpsilon);
		2181	__m128i vInt = _mm_cvtps_epi32(vResult);
		2182	vResult = _mm_cvtepi32_ps(vInt);
		2183	return vResult;
		2184	#else // _XM_VMX128_INTRINSICS_
		2185	#endif // _XM_VMX128_INTRINSICS_
		2186	}
		2187
		2188	//------------------------------------------------------------------------------
		2189
		2190	XMFINLINE XMVECTOR XMVectorCeiling
		2191	(
		2192	FXMVECTOR V
		2193	)
		2194	{
		2195	#if defined(_XM_NO_INTRINSICS_)
		2196	XMVECTOR vResult = {
		2197	ceilf(V.vector4_f32[0]),
		2198	ceilf(V.vector4_f32[1]),
		2199	ceilf(V.vector4_f32[2]),
		2200	ceilf(V.vector4_f32[3])
		2201	};
		2202	return vResult;
		2203
		2204	#elif defined(_XM_SSE_INTRINSICS_)
		2205	XMVECTOR vResult = _mm_add_ps(V,g_XMOneHalfMinusEpsilon);
		2206	__m128i vInt = _mm_cvtps_epi32(vResult);
		2207	vResult = _mm_cvtepi32_ps(vInt);
		2208	return vResult;
		2209	#else // _XM_VMX128_INTRINSICS_
		2210	#endif // _XM_VMX128_INTRINSICS_
		2211	}
		2212
		2213	//------------------------------------------------------------------------------
		2214
		2215	XMFINLINE XMVECTOR XMVectorClamp
		2216	(
		2217	FXMVECTOR V,
		2218	FXMVECTOR Min,
		2219	FXMVECTOR Max
		2220	)
		2221	{
		2222	#if defined(_XM_NO_INTRINSICS_)
		2223
		2224	XMVECTOR Result;
		2225
		2226	XMASSERT(XMVector4LessOrEqual(Min, Max));
		2227
		2228	Result = XMVectorMax(Min, V);
		2229	Result = XMVectorMin(Max, Result);
		2230
		2231	return Result;
		2232
		2233	#elif defined(_XM_SSE_INTRINSICS_)
		2234	XMVECTOR vResult;
		2235	XMASSERT(XMVector4LessOrEqual(Min, Max));
		2236	vResult = _mm_max_ps(Min,V);
		2237	vResult = _mm_min_ps(vResult,Max);
		2238	return vResult;
		2239	#else // _XM_VMX128_INTRINSICS_
		2240	#endif // _XM_VMX128_INTRINSICS_
		2241	}
		2242
		2243	//------------------------------------------------------------------------------
		2244
		2245	XMFINLINE XMVECTOR XMVectorSaturate
		2246	(
		2247	FXMVECTOR V
		2248	)
		2249	{
		2250	#if defined(_XM_NO_INTRINSICS_)
		2251
		2252	CONST XMVECTOR Zero = XMVectorZero();
		2253
		2254	return XMVectorClamp(V, Zero, g_XMOne.v);
		2255
		2256	#elif defined(_XM_SSE_INTRINSICS_)
		2257	// Set <0 to 0
		2258	XMVECTOR vResult = _mm_max_ps(V,g_XMZero);
		2259	// Set>1 to 1
		2260	return _mm_min_ps(vResult,g_XMOne);
		2261	#else // _XM_VMX128_INTRINSICS_
		2262	#endif // _XM_VMX128_INTRINSICS_
		2263	}
		2264
		2265	//------------------------------------------------------------------------------
		2266	// Bitwise logical operations
		2267	//------------------------------------------------------------------------------
		2268
		2269	XMFINLINE XMVECTOR XMVectorAndInt
		2270	(
		2271	FXMVECTOR V1,
		2272	FXMVECTOR V2
		2273	)
		2274	{
		2275	#if defined(_XM_NO_INTRINSICS_)
		2276
		2277	XMVECTOR Result;
		2278
		2279	Result.vector4_u32[0] = V1.vector4_u32[0] & V2.vector4_u32[0];
		2280	Result.vector4_u32[1] = V1.vector4_u32[1] & V2.vector4_u32[1];
		2281	Result.vector4_u32[2] = V1.vector4_u32[2] & V2.vector4_u32[2];
		2282	Result.vector4_u32[3] = V1.vector4_u32[3] & V2.vector4_u32[3];
		2283	return Result;
		2284
		2285	#elif defined(_XM_SSE_INTRINSICS_)
		2286	return _mm_and_ps(V1,V2);
		2287	#else // _XM_VMX128_INTRINSICS_
		2288	#endif // _XM_VMX128_INTRINSICS_
		2289	}
		2290
		2291	//------------------------------------------------------------------------------
		2292
		2293	XMFINLINE XMVECTOR XMVectorAndCInt
		2294	(
		2295	FXMVECTOR V1,
		2296	FXMVECTOR V2
		2297	)
		2298	{
		2299	#if defined(_XM_NO_INTRINSICS_)
		2300
		2301	XMVECTOR Result;
		2302
		2303	Result.vector4_u32[0] = V1.vector4_u32[0] & ~V2.vector4_u32[0];
		2304	Result.vector4_u32[1] = V1.vector4_u32[1] & ~V2.vector4_u32[1];
		2305	Result.vector4_u32[2] = V1.vector4_u32[2] & ~V2.vector4_u32[2];
		2306	Result.vector4_u32[3] = V1.vector4_u32[3] & ~V2.vector4_u32[3];
		2307
		2308	return Result;
		2309
		2310	#elif defined(_XM_SSE_INTRINSICS_)
		2311	__m128i V = _mm_andnot_si128( reinterpret_cast<const __m128i >(&V2)[0], reinterpret_cast<const __m128i >(&V1)[0] );
		2312	return reinterpret_cast<__m128 *>(&V)[0];
		2313	#else // _XM_VMX128_INTRINSICS_
		2314	#endif // _XM_VMX128_INTRINSICS_
		2315	}
		2316
		2317	//------------------------------------------------------------------------------
		2318
		2319	XMFINLINE XMVECTOR XMVectorOrInt
		2320	(
		2321	FXMVECTOR V1,
		2322	FXMVECTOR V2
		2323	)
		2324	{
		2325	#if defined(_XM_NO_INTRINSICS_)
		2326
		2327	XMVECTOR Result;
		2328
		2329	Result.vector4_u32[0] = V1.vector4_u32[0] \| V2.vector4_u32[0];
		2330	Result.vector4_u32[1] = V1.vector4_u32[1] \| V2.vector4_u32[1];
		2331	Result.vector4_u32[2] = V1.vector4_u32[2] \| V2.vector4_u32[2];
		2332	Result.vector4_u32[3] = V1.vector4_u32[3] \| V2.vector4_u32[3];
		2333
		2334	return Result;
		2335
		2336	#elif defined(_XM_SSE_INTRINSICS_)
		2337	__m128i V = _mm_or_si128( reinterpret_cast<const __m128i >(&V1)[0], reinterpret_cast<const __m128i >(&V2)[0] );
		2338	return reinterpret_cast<__m128 *>(&V)[0];
		2339	#else // _XM_VMX128_INTRINSICS_
		2340	#endif // _XM_VMX128_INTRINSICS_
		2341	}
		2342
		2343	//------------------------------------------------------------------------------
		2344
		2345	XMFINLINE XMVECTOR XMVectorNorInt
		2346	(
		2347	FXMVECTOR V1,
		2348	FXMVECTOR V2
		2349	)
		2350	{
		2351	#if defined(_XM_NO_INTRINSICS_)
		2352
		2353	XMVECTOR Result;
		2354
		2355	Result.vector4_u32[0] = ~(V1.vector4_u32[0] \| V2.vector4_u32[0]);
		2356	Result.vector4_u32[1] = ~(V1.vector4_u32[1] \| V2.vector4_u32[1]);
		2357	Result.vector4_u32[2] = ~(V1.vector4_u32[2] \| V2.vector4_u32[2]);
		2358	Result.vector4_u32[3] = ~(V1.vector4_u32[3] \| V2.vector4_u32[3]);
		2359
		2360	return Result;
		2361
		2362	#elif defined(_XM_SSE_INTRINSICS_)
		2363	__m128i Result;
		2364	Result = _mm_or_si128( reinterpret_cast<const __m128i >(&V1)[0], reinterpret_cast<const __m128i >(&V2)[0] );
		2365	Result = _mm_andnot_si128( Result,g_XMNegOneMask);
		2366	return reinterpret_cast<__m128 *>(&Result)[0];
		2367	#else // _XM_VMX128_INTRINSICS_
		2368	#endif // _XM_VMX128_INTRINSICS_
		2369	}
		2370
		2371	//------------------------------------------------------------------------------
		2372
		2373	XMFINLINE XMVECTOR XMVectorXorInt
		2374	(
		2375	FXMVECTOR V1,
		2376	FXMVECTOR V2
		2377	)
		2378	{
		2379	#if defined(_XM_NO_INTRINSICS_)
		2380
		2381	XMVECTOR Result;
		2382
		2383	Result.vector4_u32[0] = V1.vector4_u32[0] ^ V2.vector4_u32[0];
		2384	Result.vector4_u32[1] = V1.vector4_u32[1] ^ V2.vector4_u32[1];
		2385	Result.vector4_u32[2] = V1.vector4_u32[2] ^ V2.vector4_u32[2];
		2386	Result.vector4_u32[3] = V1.vector4_u32[3] ^ V2.vector4_u32[3];
		2387
		2388	return Result;
		2389
		2390	#elif defined(_XM_SSE_INTRINSICS_)
		2391	__m128i V = _mm_xor_si128( reinterpret_cast<const __m128i >(&V1)[0], reinterpret_cast<const __m128i >(&V2)[0] );
		2392	return reinterpret_cast<__m128 *>(&V)[0];
		2393	#else // _XM_VMX128_INTRINSICS_
		2394	#endif // _XM_VMX128_INTRINSICS_
		2395	}
		2396
		2397	//------------------------------------------------------------------------------
		2398	// Computation operations
		2399	//------------------------------------------------------------------------------
		2400
		2401	//------------------------------------------------------------------------------
		2402
		2403	XMFINLINE XMVECTOR XMVectorNegate
		2404	(
		2405	FXMVECTOR V
		2406	)
		2407	{
		2408	#if defined(_XM_NO_INTRINSICS_)
		2409
		2410	XMVECTOR Result;
		2411
		2412	Result.vector4_f32[0] = -V.vector4_f32[0];
		2413	Result.vector4_f32[1] = -V.vector4_f32[1];
		2414	Result.vector4_f32[2] = -V.vector4_f32[2];
		2415	Result.vector4_f32[3] = -V.vector4_f32[3];
		2416
		2417	return Result;
		2418
		2419	#elif defined(_XM_SSE_INTRINSICS_)
		2420	XMVECTOR Z;
		2421
		2422	Z = _mm_setzero_ps();
		2423
		2424	return _mm_sub_ps( Z, V );
		2425	#else // _XM_VMX128_INTRINSICS_
		2426	#endif // _XM_VMX128_INTRINSICS_
		2427	}
		2428
		2429	//------------------------------------------------------------------------------
		2430
		2431	XMFINLINE XMVECTOR XMVectorAdd
		2432	(
		2433	FXMVECTOR V1,
		2434	FXMVECTOR V2
		2435	)
		2436	{
		2437	#if defined(_XM_NO_INTRINSICS_)
		2438
		2439	XMVECTOR Result;
		2440
		2441	Result.vector4_f32[0] = V1.vector4_f32[0] + V2.vector4_f32[0];
		2442	Result.vector4_f32[1] = V1.vector4_f32[1] + V2.vector4_f32[1];
		2443	Result.vector4_f32[2] = V1.vector4_f32[2] + V2.vector4_f32[2];
		2444	Result.vector4_f32[3] = V1.vector4_f32[3] + V2.vector4_f32[3];
		2445
		2446	return Result;
		2447
		2448	#elif defined(_XM_SSE_INTRINSICS_)
		2449	return _mm_add_ps( V1, V2 );
		2450	#else // _XM_VMX128_INTRINSICS_
		2451	#endif // _XM_VMX128_INTRINSICS_
		2452	}
		2453
		2454	//------------------------------------------------------------------------------
		2455
		2456	XMFINLINE XMVECTOR XMVectorAddAngles
		2457	(
		2458	FXMVECTOR V1,
		2459	FXMVECTOR V2
		2460	)
		2461	{
		2462	#if defined(_XM_NO_INTRINSICS_)
		2463
		2464	XMVECTOR Mask;
		2465	XMVECTOR Offset;
		2466	XMVECTOR Result;
		2467	CONST XMVECTOR Zero = XMVectorZero();
		2468
		2469	// Add the given angles together. If the range of V1 is such
		2470	// that -Pi <= V1 < Pi and the range of V2 is such that
		2471	// -2Pi <= V2 <= 2Pi, then the range of the resulting angle
		2472	// will be -Pi <= Result < Pi.
		2473	Result = XMVectorAdd(V1, V2);
		2474
		2475	Mask = XMVectorLess(Result, g_XMNegativePi.v);
		2476	Offset = XMVectorSelect(Zero, g_XMTwoPi.v, Mask);
		2477
		2478	Mask = XMVectorGreaterOrEqual(Result, g_XMPi.v);
		2479	Offset = XMVectorSelect(Offset, g_XMNegativeTwoPi.v, Mask);
		2480
		2481	Result = XMVectorAdd(Result, Offset);
		2482
		2483	return Result;
		2484
		2485	#elif defined(_XM_SSE_INTRINSICS_)
		2486	// Adjust the angles
		2487	XMVECTOR vResult = _mm_add_ps(V1,V2);
		2488	// Less than Pi?
		2489	XMVECTOR vOffset = _mm_cmplt_ps(vResult,g_XMNegativePi);
		2490	vOffset = _mm_and_ps(vOffset,g_XMTwoPi);
		2491	// Add 2Pi to all entries less than -Pi
		2492	vResult = _mm_add_ps(vResult,vOffset);
		2493	// Greater than or equal to Pi?
		2494	vOffset = _mm_cmpge_ps(vResult,g_XMPi);
		2495	vOffset = _mm_and_ps(vOffset,g_XMTwoPi);
		2496	// Sub 2Pi to all entries greater than Pi
		2497	vResult = _mm_sub_ps(vResult,vOffset);
		2498	return vResult;
		2499	#else // _XM_VMX128_INTRINSICS_
		2500	#endif // _XM_VMX128_INTRINSICS_
		2501	}
		2502
		2503	//------------------------------------------------------------------------------
		2504
		2505	XMFINLINE XMVECTOR XMVectorSubtract
		2506	(
		2507	FXMVECTOR V1,
		2508	FXMVECTOR V2
		2509	)
		2510	{
		2511	#if defined(_XM_NO_INTRINSICS_)
		2512
		2513	XMVECTOR Result;
		2514
		2515	Result.vector4_f32[0] = V1.vector4_f32[0] - V2.vector4_f32[0];
		2516	Result.vector4_f32[1] = V1.vector4_f32[1] - V2.vector4_f32[1];
		2517	Result.vector4_f32[2] = V1.vector4_f32[2] - V2.vector4_f32[2];
		2518	Result.vector4_f32[3] = V1.vector4_f32[3] - V2.vector4_f32[3];
		2519
		2520	return Result;
		2521
		2522	#elif defined(_XM_SSE_INTRINSICS_)
		2523	return _mm_sub_ps( V1, V2 );
		2524	#else // _XM_VMX128_INTRINSICS_
		2525	#endif // _XM_VMX128_INTRINSICS_
		2526	}
		2527
		2528	//------------------------------------------------------------------------------
		2529
		2530	XMFINLINE XMVECTOR XMVectorSubtractAngles
		2531	(
		2532	FXMVECTOR V1,
		2533	FXMVECTOR V2
		2534	)
		2535	{
		2536	#if defined(_XM_NO_INTRINSICS_)
		2537
		2538	XMVECTOR Mask;
		2539	XMVECTOR Offset;
		2540	XMVECTOR Result;
		2541	CONST XMVECTOR Zero = XMVectorZero();
		2542
		2543	// Subtract the given angles. If the range of V1 is such
		2544	// that -Pi <= V1 < Pi and the range of V2 is such that
		2545	// -2Pi <= V2 <= 2Pi, then the range of the resulting angle
		2546	// will be -Pi <= Result < Pi.
		2547	Result = XMVectorSubtract(V1, V2);
		2548
		2549	Mask = XMVectorLess(Result, g_XMNegativePi.v);
		2550	Offset = XMVectorSelect(Zero, g_XMTwoPi.v, Mask);
		2551
		2552	Mask = XMVectorGreaterOrEqual(Result, g_XMPi.v);
		2553	Offset = XMVectorSelect(Offset, g_XMNegativeTwoPi.v, Mask);
		2554
		2555	Result = XMVectorAdd(Result, Offset);
		2556
		2557	return Result;
		2558
		2559	#elif defined(_XM_SSE_INTRINSICS_)
		2560	// Adjust the angles
		2561	XMVECTOR vResult = _mm_sub_ps(V1,V2);
		2562	// Less than Pi?
		2563	XMVECTOR vOffset = _mm_cmplt_ps(vResult,g_XMNegativePi);
		2564	vOffset = _mm_and_ps(vOffset,g_XMTwoPi);
		2565	// Add 2Pi to all entries less than -Pi
		2566	vResult = _mm_add_ps(vResult,vOffset);
		2567	// Greater than or equal to Pi?
		2568	vOffset = _mm_cmpge_ps(vResult,g_XMPi);
		2569	vOffset = _mm_and_ps(vOffset,g_XMTwoPi);
		2570	// Sub 2Pi to all entries greater than Pi
		2571	vResult = _mm_sub_ps(vResult,vOffset);
		2572	return vResult;
		2573	#else // _XM_VMX128_INTRINSICS_
		2574	#endif // _XM_VMX128_INTRINSICS_
		2575	}
		2576
		2577	//------------------------------------------------------------------------------
		2578
		2579	XMFINLINE XMVECTOR XMVectorMultiply
		2580	(
		2581	FXMVECTOR V1,
		2582	FXMVECTOR V2
		2583	)
		2584	{
		2585	#if defined(_XM_NO_INTRINSICS_)
		2586	XMVECTOR Result = {
		2587	V1.vector4_f32[0] * V2.vector4_f32[0],
		2588	V1.vector4_f32[1] * V2.vector4_f32[1],
		2589	V1.vector4_f32[2] * V2.vector4_f32[2],
		2590	V1.vector4_f32[3] * V2.vector4_f32[3]
		2591	};
		2592	return Result;
		2593	#elif defined(_XM_SSE_INTRINSICS_)
		2594	return _mm_mul_ps( V1, V2 );
		2595	#else // _XM_VMX128_INTRINSICS_
		2596	#endif // _XM_VMX128_INTRINSICS_
		2597	}
		2598
		2599	//------------------------------------------------------------------------------
		2600
		2601	XMFINLINE XMVECTOR XMVectorMultiplyAdd
		2602	(
		2603	FXMVECTOR V1,
		2604	FXMVECTOR V2,
		2605	FXMVECTOR V3
		2606	)
		2607	{
		2608	#if defined(_XM_NO_INTRINSICS_)
		2609	XMVECTOR vResult = {
		2610	(V1.vector4_f32[0] * V2.vector4_f32[0]) + V3.vector4_f32[0],
		2611	(V1.vector4_f32[1] * V2.vector4_f32[1]) + V3.vector4_f32[1],
		2612	(V1.vector4_f32[2] * V2.vector4_f32[2]) + V3.vector4_f32[2],
		2613	(V1.vector4_f32[3] * V2.vector4_f32[3]) + V3.vector4_f32[3]
		2614	};
		2615	return vResult;
		2616
		2617	#elif defined(_XM_SSE_INTRINSICS_)
		2618	XMVECTOR vResult = _mm_mul_ps( V1, V2 );
		2619	return _mm_add_ps(vResult, V3 );
		2620	#else // _XM_VMX128_INTRINSICS_
		2621	#endif // _XM_VMX128_INTRINSICS_
		2622	}
		2623
		2624	//------------------------------------------------------------------------------
		2625
		2626	XMFINLINE XMVECTOR XMVectorNegativeMultiplySubtract
		2627	(
		2628	FXMVECTOR V1,
		2629	FXMVECTOR V2,
		2630	FXMVECTOR V3
		2631	)
		2632	{
		2633	#if defined(_XM_NO_INTRINSICS_)
		2634
		2635	XMVECTOR vResult = {
		2636	V3.vector4_f32[0] - (V1.vector4_f32[0] * V2.vector4_f32[0]),
		2637	V3.vector4_f32[1] - (V1.vector4_f32[1] * V2.vector4_f32[1]),
		2638	V3.vector4_f32[2] - (V1.vector4_f32[2] * V2.vector4_f32[2]),
		2639	V3.vector4_f32[3] - (V1.vector4_f32[3] * V2.vector4_f32[3])
		2640	};
		2641	return vResult;
		2642
		2643	#elif defined(_XM_SSE_INTRINSICS_)
		2644	XMVECTOR R = _mm_mul_ps( V1, V2 );
		2645	return _mm_sub_ps( V3, R );
		2646	#else // _XM_VMX128_INTRINSICS_
		2647	#endif // _XM_VMX128_INTRINSICS_
		2648	}
		2649
		2650	//------------------------------------------------------------------------------
		2651
		2652	XMFINLINE XMVECTOR XMVectorScale
		2653	(
		2654	FXMVECTOR V,
		2655	FLOAT ScaleFactor
		2656	)
		2657	{
		2658	#if defined(_XM_NO_INTRINSICS_)
		2659	XMVECTOR vResult = {
		2660	V.vector4_f32[0] * ScaleFactor,
		2661	V.vector4_f32[1] * ScaleFactor,
		2662	V.vector4_f32[2] * ScaleFactor,
		2663	V.vector4_f32[3] * ScaleFactor
		2664	};
		2665	return vResult;
		2666
		2667	#elif defined(_XM_SSE_INTRINSICS_)
		2668	XMVECTOR vResult = _mm_set_ps1(ScaleFactor);
		2669	return _mm_mul_ps(vResult,V);
		2670	#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
		2671	#endif // _XM_VMX128_INTRINSICS_
		2672	}
		2673
		2674	//------------------------------------------------------------------------------
		2675
		2676	XMFINLINE XMVECTOR XMVectorReciprocalEst
		2677	(
		2678	FXMVECTOR V
		2679	)
		2680	{
		2681	#if defined(_XM_NO_INTRINSICS_)
		2682
		2683	XMVECTOR Result;
		2684	UINT i;
		2685
		2686	// Avoid C4701
		2687	Result.vector4_f32[0] = 0.0f;
		2688
		2689	for (i = 0; i < 4; i++)
		2690	{
		2691	if (XMISINF(V.vector4_f32[i]))
		2692	{
		2693	Result.vector4_f32[i] = (V.vector4_f32[i] < 0.0f) ? -0.0f : 0.0f;
		2694	}
		2695	else if (V.vector4_f32[i] == -0.0f)
		2696	{
		2697	Result.vector4_u32[i] = 0xFF800000;
		2698	}
		2699	else if (V.vector4_f32[i] == 0.0f)
		2700	{
		2701	Result.vector4_u32[i] = 0x7F800000;
		2702	}
		2703	else
		2704	{
		2705	Result.vector4_f32[i] = 1.0f / V.vector4_f32[i];
		2706	}
		2707	}
		2708
		2709	return Result;
		2710
		2711	#elif defined(_XM_SSE_INTRINSICS_)
		2712	return _mm_rcp_ps(V);
		2713	#else // _XM_VMX128_INTRINSICS_
		2714	#endif // _XM_VMX128_INTRINSICS_
		2715	}
		2716
		2717	//------------------------------------------------------------------------------
		2718
		2719	XMFINLINE XMVECTOR XMVectorReciprocal
		2720	(
		2721	FXMVECTOR V
		2722	)
		2723	{
		2724	#if defined(_XM_NO_INTRINSICS_)
		2725	return XMVectorReciprocalEst(V);
		2726	#elif defined(_XM_SSE_INTRINSICS_)
		2727	return _mm_div_ps(g_XMOne,V);
		2728	#else // _XM_VMX128_INTRINSICS_
		2729	#endif // _XM_VMX128_INTRINSICS_
		2730	}
		2731
		2732	//------------------------------------------------------------------------------
		2733	// Return an estimated square root
		2734	XMFINLINE XMVECTOR XMVectorSqrtEst
		2735	(
		2736	FXMVECTOR V
		2737	)
		2738	{
		2739	#if defined(_XM_NO_INTRINSICS_)
		2740	XMVECTOR Select;
		2741
		2742	// if (x == +Infinity) sqrt(x) = +Infinity
		2743	// if (x == +0.0f) sqrt(x) = +0.0f
		2744	// if (x == -0.0f) sqrt(x) = -0.0f
		2745	// if (x < -0.0f) sqrt(x) = QNaN
		2746
		2747	XMVECTOR Result = XMVectorReciprocalSqrtEst(V);
		2748	XMVECTOR Zero = XMVectorZero();
		2749	XMVECTOR VEqualsInfinity = XMVectorEqualInt(V, g_XMInfinity.v);
		2750	XMVECTOR VEqualsZero = XMVectorEqual(V, Zero);
		2751	Result = XMVectorMultiply(V, Result);
		2752	Select = XMVectorEqualInt(VEqualsInfinity, VEqualsZero);
		2753	Result = XMVectorSelect(V, Result, Select);
		2754	return Result;
		2755
		2756	#elif defined(_XM_SSE_INTRINSICS_)
		2757	return _mm_sqrt_ps(V);
		2758	#else // _XM_VMX128_INTRINSICS_
		2759	#endif // _XM_VMX128_INTRINSICS_
		2760	}
		2761
		2762	//------------------------------------------------------------------------------
		2763
		2764	XMFINLINE XMVECTOR XMVectorSqrt
		2765	(
		2766	FXMVECTOR V
		2767	)
		2768	{
		2769	#if defined(_XM_NO_INTRINSICS_)
		2770
		2771	XMVECTOR Zero;
		2772	XMVECTOR VEqualsInfinity, VEqualsZero;
		2773	XMVECTOR Select;
		2774	XMVECTOR Result;
		2775
		2776	// if (x == +Infinity) sqrt(x) = +Infinity
		2777	// if (x == +0.0f) sqrt(x) = +0.0f
		2778	// if (x == -0.0f) sqrt(x) = -0.0f
		2779	// if (x < -0.0f) sqrt(x) = QNaN
		2780
		2781	Result = XMVectorReciprocalSqrt(V);
		2782	Zero = XMVectorZero();
		2783	VEqualsInfinity = XMVectorEqualInt(V, g_XMInfinity.v);
		2784	VEqualsZero = XMVectorEqual(V, Zero);
		2785	Result = XMVectorMultiply(V, Result);
		2786	Select = XMVectorEqualInt(VEqualsInfinity, VEqualsZero);
		2787	Result = XMVectorSelect(V, Result, Select);
		2788
		2789	return Result;
		2790
		2791	#elif defined(_XM_SSE_INTRINSICS_)
		2792	return _mm_sqrt_ps(V);
		2793	#else // _XM_VMX128_INTRINSICS_
		2794	#endif // _XM_VMX128_INTRINSICS_
		2795	}
		2796
		2797	//------------------------------------------------------------------------------
		2798
		2799	XMFINLINE XMVECTOR XMVectorReciprocalSqrtEst
		2800	(
		2801	FXMVECTOR V
		2802	)
		2803	{
		2804	#if defined(_XM_NO_INTRINSICS_)
		2805
		2806	XMVECTOR Result;
		2807	UINT i;
		2808
		2809	// Avoid C4701
		2810	Result.vector4_f32[0] = 0.0f;
		2811
		2812	for (i = 0; i < 4; i++)
		2813	{
		2814	if (V.vector4_f32[i] == 0.0f)
		2815	{
		2816	Result.vector4_u32[i] = 0x7F800000;
		2817	}
		2818	else if (V.vector4_f32[i] == -0.0f)
		2819	{
		2820	Result.vector4_u32[i] = 0xFF800000;
		2821	}
		2822	else if (V.vector4_f32[i] < 0.0f)
		2823	{
		2824	Result.vector4_u32[i] = 0x7FFFFFFF;
		2825	}
		2826	else if (XMISINF(V.vector4_f32[i]))
		2827	{
		2828	Result.vector4_f32[i] = 0.0f;
		2829	}
		2830	else
		2831	{
		2832	Result.vector4_f32[i] = 1.0f / sqrtf(V.vector4_f32[i]);
		2833	}
		2834	}
		2835
		2836	return Result;
		2837
		2838	#elif defined(_XM_SSE_INTRINSICS_)
		2839	return _mm_rsqrt_ps(V);
		2840	#else // _XM_VMX128_INTRINSICS_
		2841	#endif // _XM_VMX128_INTRINSICS_
		2842	}
		2843
		2844	//------------------------------------------------------------------------------
		2845
		2846	XMFINLINE XMVECTOR XMVectorReciprocalSqrt
		2847	(
		2848	FXMVECTOR V
		2849	)
		2850	{
		2851	#if defined(_XM_NO_INTRINSICS_)
		2852
		2853	return XMVectorReciprocalSqrtEst(V);
		2854
		2855	#elif defined(_XM_SSE_INTRINSICS_)
		2856	XMVECTOR vResult = _mm_sqrt_ps(V);
		2857	vResult = _mm_div_ps(g_XMOne,vResult);
		2858	return vResult;
		2859	#else // _XM_VMX128_INTRINSICS_
		2860	#endif // _XM_VMX128_INTRINSICS_
		2861	}
		2862
		2863	//------------------------------------------------------------------------------
		2864
		2865	XMFINLINE XMVECTOR XMVectorExpEst
		2866	(
		2867	FXMVECTOR V
		2868	)
		2869	{
		2870	#if defined(_XM_NO_INTRINSICS_)
		2871
		2872	XMVECTOR Result;
		2873	Result.vector4_f32[0] = powf(2.0f, V.vector4_f32[0]);
		2874	Result.vector4_f32[1] = powf(2.0f, V.vector4_f32[1]);
		2875	Result.vector4_f32[2] = powf(2.0f, V.vector4_f32[2]);
		2876	Result.vector4_f32[3] = powf(2.0f, V.vector4_f32[3]);
		2877	return Result;
		2878
		2879	#elif defined(_XM_SSE_INTRINSICS_)
		2880	XMVECTOR vResult = _mm_setr_ps(
		2881	powf(2.0f,XMVectorGetX(V)),
		2882	powf(2.0f,XMVectorGetY(V)),
		2883	powf(2.0f,XMVectorGetZ(V)),
		2884	powf(2.0f,XMVectorGetW(V)));
		2885	return vResult;
		2886	#else // _XM_VMX128_INTRINSICS_
		2887	#endif // _XM_VMX128_INTRINSICS_
		2888	}
		2889
		2890	//------------------------------------------------------------------------------
		2891
		2892	XMINLINE XMVECTOR XMVectorExp
		2893	(
		2894	FXMVECTOR V
		2895	)
		2896	{
		2897	#if defined(_XM_NO_INTRINSICS_)
		2898
		2899	XMVECTOR E, S;
		2900	XMVECTOR R, R2, R3, R4;
		2901	XMVECTOR V0, V1;
		2902	XMVECTOR C0X, C0Y, C0Z, C0W;
		2903	XMVECTOR C1X, C1Y, C1Z, C1W;
		2904	XMVECTOR Result;
		2905	static CONST XMVECTOR C0 = {1.0f, -6.93147182e-1f, 2.40226462e-1f, -5.55036440e-2f};
		2906	static CONST XMVECTOR C1 = {9.61597636e-3f, -1.32823968e-3f, 1.47491097e-4f, -1.08635004e-5f};
		2907
		2908	R = XMVectorFloor(V);
		2909	E = XMVectorExpEst(R);
		2910	R = XMVectorSubtract(V, R);
		2911	R2 = XMVectorMultiply(R, R);
		2912	R3 = XMVectorMultiply(R, R2);
		2913	R4 = XMVectorMultiply(R2, R2);
		2914
		2915	C0X = XMVectorSplatX(C0);
		2916	C0Y = XMVectorSplatY(C0);
		2917	C0Z = XMVectorSplatZ(C0);
		2918	C0W = XMVectorSplatW(C0);
		2919
		2920	C1X = XMVectorSplatX(C1);
		2921	C1Y = XMVectorSplatY(C1);
		2922	C1Z = XMVectorSplatZ(C1);
		2923	C1W = XMVectorSplatW(C1);
		2924
		2925	V0 = XMVectorMultiplyAdd(R, C0Y, C0X);
		2926	V0 = XMVectorMultiplyAdd(R2, C0Z, V0);
		2927	V0 = XMVectorMultiplyAdd(R3, C0W, V0);
		2928
		2929	V1 = XMVectorMultiplyAdd(R, C1Y, C1X);
		2930	V1 = XMVectorMultiplyAdd(R2, C1Z, V1);
		2931	V1 = XMVectorMultiplyAdd(R3, C1W, V1);
		2932
		2933	S = XMVectorMultiplyAdd(R4, V1, V0);
		2934
		2935	S = XMVectorReciprocal(S);
		2936	Result = XMVectorMultiply(E, S);
		2937
		2938	return Result;
		2939
		2940	#elif defined(_XM_SSE_INTRINSICS_)
		2941	static CONST XMVECTORF32 C0 = {1.0f, -6.93147182e-1f, 2.40226462e-1f, -5.55036440e-2f};
		2942	static CONST XMVECTORF32 C1 = {9.61597636e-3f, -1.32823968e-3f, 1.47491097e-4f, -1.08635004e-5f};
		2943
		2944	// Get the integer of the input
		2945	XMVECTOR R = XMVectorFloor(V);
		2946	// Get the exponent estimate
		2947	XMVECTOR E = XMVectorExpEst(R);
		2948	// Get the fractional only
		2949	R = _mm_sub_ps(V,R);
		2950	// Get R^2
		2951	XMVECTOR R2 = _mm_mul_ps(R,R);
		2952	// And R^3
		2953	XMVECTOR R3 = _mm_mul_ps(R,R2);
		2954
		2955	XMVECTOR V0 = _mm_load_ps1(&C0.f[1]);
		2956	V0 = _mm_mul_ps(V0,R);
		2957	XMVECTOR vConstants = _mm_load_ps1(&C0.f[0]);
		2958	V0 = _mm_add_ps(V0,vConstants);
		2959	vConstants = _mm_load_ps1(&C0.f[2]);
		2960	vConstants = _mm_mul_ps(vConstants,R2);
		2961	V0 = _mm_add_ps(V0,vConstants);
		2962	vConstants = _mm_load_ps1(&C0.f[3]);
		2963	vConstants = _mm_mul_ps(vConstants,R3);
		2964	V0 = _mm_add_ps(V0,vConstants);
		2965
		2966	XMVECTOR V1 = _mm_load_ps1(&C1.f[1]);
		2967	V1 = _mm_mul_ps(V1,R);
		2968	vConstants = _mm_load_ps1(&C1.f[0]);
		2969	V1 = _mm_add_ps(V1,vConstants);
		2970	vConstants = _mm_load_ps1(&C1.f[2]);
		2971	vConstants = _mm_mul_ps(vConstants,R2);
		2972	V1 = _mm_add_ps(V1,vConstants);
		2973	vConstants = _mm_load_ps1(&C1.f[3]);
		2974	vConstants = _mm_mul_ps(vConstants,R3);
		2975	V1 = _mm_add_ps(V1,vConstants);
		2976	// R2 = R^4
		2977	R2 = _mm_mul_ps(R2,R2);
		2978	R2 = _mm_mul_ps(R2,V1);
		2979	R2 = _mm_add_ps(R2,V0);
		2980	E = _mm_div_ps(E,R2);
		2981	return E;
		2982	#else // _XM_VMX128_INTRINSICS_
		2983	#endif // _XM_VMX128_INTRINSICS_
		2984	}
		2985
		2986	//------------------------------------------------------------------------------
		2987
		2988	XMFINLINE XMVECTOR XMVectorLogEst
		2989	(
		2990	FXMVECTOR V
		2991	)
		2992	{
		2993	#if defined(_XM_NO_INTRINSICS_)
		2994
		2995	FLOAT fScale = (1.0f / logf(2.0f));
		2996	XMVECTOR Result;
		2997
		2998	Result.vector4_f32[0] = logf(V.vector4_f32[0])*fScale;
		2999	Result.vector4_f32[1] = logf(V.vector4_f32[1])*fScale;
		3000	Result.vector4_f32[2] = logf(V.vector4_f32[2])*fScale;
		3001	Result.vector4_f32[3] = logf(V.vector4_f32[3])*fScale;
		3002	return Result;
		3003
		3004	#elif defined(_XM_SSE_INTRINSICS_)
		3005	XMVECTOR vScale = _mm_set_ps1(1.0f / logf(2.0f));
		3006	XMVECTOR vResult = _mm_setr_ps(
		3007	logf(XMVectorGetX(V)),
		3008	logf(XMVectorGetY(V)),
		3009	logf(XMVectorGetZ(V)),
		3010	logf(XMVectorGetW(V)));
		3011	vResult = _mm_mul_ps(vResult,vScale);
		3012	return vResult;
		3013	#else // _XM_VMX128_INTRINSICS_
		3014	#endif // _XM_VMX128_INTRINSICS_
		3015	}
		3016
		3017	//------------------------------------------------------------------------------
		3018
		3019	XMINLINE XMVECTOR XMVectorLog
		3020	(
		3021	FXMVECTOR V
		3022	)
		3023	{
		3024	#if defined(_XM_NO_INTRINSICS_)
		3025	FLOAT fScale = (1.0f / logf(2.0f));
		3026	XMVECTOR Result;
		3027
		3028	Result.vector4_f32[0] = logf(V.vector4_f32[0])*fScale;
		3029	Result.vector4_f32[1] = logf(V.vector4_f32[1])*fScale;
		3030	Result.vector4_f32[2] = logf(V.vector4_f32[2])*fScale;
		3031	Result.vector4_f32[3] = logf(V.vector4_f32[3])*fScale;
		3032	return Result;
		3033
		3034	#elif defined(_XM_SSE_INTRINSICS_)
		3035	XMVECTOR vScale = _mm_set_ps1(1.0f / logf(2.0f));
		3036	XMVECTOR vResult = _mm_setr_ps(
		3037	logf(XMVectorGetX(V)),
		3038	logf(XMVectorGetY(V)),
		3039	logf(XMVectorGetZ(V)),
		3040	logf(XMVectorGetW(V)));
		3041	vResult = _mm_mul_ps(vResult,vScale);
		3042	return vResult;
		3043	#else // _XM_VMX128_INTRINSICS_
		3044	#endif // _XM_VMX128_INTRINSICS_
		3045	}
		3046
		3047	//------------------------------------------------------------------------------
		3048
		3049	XMFINLINE XMVECTOR XMVectorPowEst
		3050	(
		3051	FXMVECTOR V1,
		3052	FXMVECTOR V2
		3053	)
		3054	{
		3055	#if defined(_XM_NO_INTRINSICS_)
		3056
		3057	XMVECTOR Result;
		3058
		3059	Result.vector4_f32[0] = powf(V1.vector4_f32[0], V2.vector4_f32[0]);
		3060	Result.vector4_f32[1] = powf(V1.vector4_f32[1], V2.vector4_f32[1]);
		3061	Result.vector4_f32[2] = powf(V1.vector4_f32[2], V2.vector4_f32[2]);
		3062	Result.vector4_f32[3] = powf(V1.vector4_f32[3], V2.vector4_f32[3]);
		3063
		3064	return Result;
		3065
		3066	#elif defined(_XM_SSE_INTRINSICS_)
		3067	XMVECTOR vResult = _mm_setr_ps(
		3068	powf(XMVectorGetX(V1),XMVectorGetX(V2)),
		3069	powf(XMVectorGetY(V1),XMVectorGetY(V2)),
		3070	powf(XMVectorGetZ(V1),XMVectorGetZ(V2)),
		3071	powf(XMVectorGetW(V1),XMVectorGetW(V2)));
		3072	return vResult;
		3073	#else // _XM_VMX128_INTRINSICS_
		3074	#endif // _XM_VMX128_INTRINSICS_
		3075	}
		3076
		3077	//------------------------------------------------------------------------------
		3078
		3079	XMFINLINE XMVECTOR XMVectorPow
		3080	(
		3081	FXMVECTOR V1,
		3082	FXMVECTOR V2
		3083	)
		3084	{
		3085	#if defined(_XM_NO_INTRINSICS_) \|\| defined(_XM_SSE_INTRINSICS_)
		3086
		3087	return XMVectorPowEst(V1, V2);
		3088
		3089	#else // _XM_VMX128_INTRINSICS_
		3090	#endif // _XM_VMX128_INTRINSICS_
		3091	}
		3092
		3093	//------------------------------------------------------------------------------
		3094
		3095	XMFINLINE XMVECTOR XMVectorAbs
		3096	(
		3097	FXMVECTOR V
		3098	)
		3099	{
		3100	#if defined(_XM_NO_INTRINSICS_)
		3101	XMVECTOR vResult = {
		3102	fabsf(V.vector4_f32[0]),
		3103	fabsf(V.vector4_f32[1]),
		3104	fabsf(V.vector4_f32[2]),
		3105	fabsf(V.vector4_f32[3])
		3106	};
		3107	return vResult;
		3108
		3109	#elif defined(_XM_SSE_INTRINSICS_)
		3110	XMVECTOR vResult = _mm_setzero_ps();
		3111	vResult = _mm_sub_ps(vResult,V);
		3112	vResult = _mm_max_ps(vResult,V);
		3113	return vResult;
		3114	#else // _XM_VMX128_INTRINSICS_
		3115	#endif // _XM_VMX128_INTRINSICS_
		3116	}
		3117
		3118	//------------------------------------------------------------------------------
		3119
		3120	XMFINLINE XMVECTOR XMVectorMod
		3121	(
		3122	FXMVECTOR V1,
		3123	FXMVECTOR V2
		3124	)
		3125	{
		3126	#if defined(_XM_NO_INTRINSICS_)
		3127
		3128	XMVECTOR Reciprocal;
		3129	XMVECTOR Quotient;
		3130	XMVECTOR Result;
		3131
		3132	// V1 % V2 = V1 - V2 * truncate(V1 / V2)
		3133	Reciprocal = XMVectorReciprocal(V2);
		3134	Quotient = XMVectorMultiply(V1, Reciprocal);
		3135	Quotient = XMVectorTruncate(Quotient);
		3136	Result = XMVectorNegativeMultiplySubtract(V2, Quotient, V1);
		3137
		3138	return Result;
		3139
		3140	#elif defined(_XM_SSE_INTRINSICS_)
		3141	XMVECTOR vResult = _mm_div_ps(V1, V2);
		3142	vResult = XMVectorTruncate(vResult);
		3143	vResult = _mm_mul_ps(vResult,V2);
		3144	vResult = _mm_sub_ps(V1,vResult);
		3145	return vResult;
		3146	#else // _XM_VMX128_INTRINSICS_
		3147	#endif // _XM_VMX128_INTRINSICS_
		3148	}
		3149
		3150	//------------------------------------------------------------------------------
		3151
		3152	XMFINLINE XMVECTOR XMVectorModAngles
		3153	(
		3154	FXMVECTOR Angles
		3155	)
		3156	{
		3157	#if defined(_XM_NO_INTRINSICS_)
		3158
		3159	XMVECTOR V;
		3160	XMVECTOR Result;
		3161
		3162	// Modulo the range of the given angles such that -XM_PI <= Angles < XM_PI
		3163	V = XMVectorMultiply(Angles, g_XMReciprocalTwoPi.v);
		3164	V = XMVectorRound(V);
		3165	Result = XMVectorNegativeMultiplySubtract(g_XMTwoPi.v, V, Angles);
		3166
		3167	return Result;
		3168
		3169	#elif defined(_XM_SSE_INTRINSICS_)
		3170	// Modulo the range of the given angles such that -XM_PI <= Angles < XM_PI
		3171	XMVECTOR vResult = _mm_mul_ps(Angles,g_XMReciprocalTwoPi);
		3172	// Use the inline function due to complexity for rounding
		3173	vResult = XMVectorRound(vResult);
		3174	vResult = _mm_mul_ps(vResult,g_XMTwoPi);
		3175	vResult = _mm_sub_ps(Angles,vResult);
		3176	return vResult;
		3177	#else // _XM_VMX128_INTRINSICS_
		3178	#endif // _XM_VMX128_INTRINSICS_
		3179	}
		3180
		3181	//------------------------------------------------------------------------------
		3182
		3183	XMINLINE XMVECTOR XMVectorSin
		3184	(
		3185	FXMVECTOR V
		3186	)
		3187	{
		3188
		3189	#if defined(_XM_NO_INTRINSICS_)
		3190
		3191	XMVECTOR V1, V2, V3, V5, V7, V9, V11, V13, V15, V17, V19, V21, V23;
		3192	XMVECTOR S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11;
		3193	XMVECTOR Result;
		3194
		3195	V1 = XMVectorModAngles(V);
		3196
		3197	// sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! -
		3198	// V^15 / 15! + V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI)
		3199	V2 = XMVectorMultiply(V1, V1);
		3200	V3 = XMVectorMultiply(V2, V1);
		3201	V5 = XMVectorMultiply(V3, V2);
		3202	V7 = XMVectorMultiply(V5, V2);
		3203	V9 = XMVectorMultiply(V7, V2);
		3204	V11 = XMVectorMultiply(V9, V2);
		3205	V13 = XMVectorMultiply(V11, V2);
		3206	V15 = XMVectorMultiply(V13, V2);
		3207	V17 = XMVectorMultiply(V15, V2);
		3208	V19 = XMVectorMultiply(V17, V2);
		3209	V21 = XMVectorMultiply(V19, V2);
		3210	V23 = XMVectorMultiply(V21, V2);
		3211
		3212	S1 = XMVectorSplatY(g_XMSinCoefficients0.v);
		3213	S2 = XMVectorSplatZ(g_XMSinCoefficients0.v);
		3214	S3 = XMVectorSplatW(g_XMSinCoefficients0.v);
		3215	S4 = XMVectorSplatX(g_XMSinCoefficients1.v);
		3216	S5 = XMVectorSplatY(g_XMSinCoefficients1.v);
		3217	S6 = XMVectorSplatZ(g_XMSinCoefficients1.v);
		3218	S7 = XMVectorSplatW(g_XMSinCoefficients1.v);
		3219	S8 = XMVectorSplatX(g_XMSinCoefficients2.v);
		3220	S9 = XMVectorSplatY(g_XMSinCoefficients2.v);
		3221	S10 = XMVectorSplatZ(g_XMSinCoefficients2.v);
		3222	S11 = XMVectorSplatW(g_XMSinCoefficients2.v);
		3223
		3224	Result = XMVectorMultiplyAdd(S1, V3, V1);
		3225	Result = XMVectorMultiplyAdd(S2, V5, Result);
		3226	Result = XMVectorMultiplyAdd(S3, V7, Result);
		3227	Result = XMVectorMultiplyAdd(S4, V9, Result);
		3228	Result = XMVectorMultiplyAdd(S5, V11, Result);
		3229	Result = XMVectorMultiplyAdd(S6, V13, Result);
		3230	Result = XMVectorMultiplyAdd(S7, V15, Result);
		3231	Result = XMVectorMultiplyAdd(S8, V17, Result);
		3232	Result = XMVectorMultiplyAdd(S9, V19, Result);
		3233	Result = XMVectorMultiplyAdd(S10, V21, Result);
		3234	Result = XMVectorMultiplyAdd(S11, V23, Result);
		3235
		3236	return Result;
		3237
		3238	#elif defined(_XM_SSE_INTRINSICS_)
		3239	// Force the value within the bounds of pi
		3240	XMVECTOR vResult = XMVectorModAngles(V);
		3241	// Each on is V to the "num" power
		3242	// V2 = V1^2
		3243	XMVECTOR V2 = _mm_mul_ps(vResult,vResult);
		3244	// V1^3
		3245	XMVECTOR vPower = _mm_mul_ps(vResult,V2);
		3246	XMVECTOR vConstants = _mm_load_ps1(&g_XMSinCoefficients0.f[1]);
		3247	vConstants = _mm_mul_ps(vConstants,vPower);
		3248	vResult = _mm_add_ps(vResult,vConstants);
		3249
		3250	// V^5
		3251	vPower = _mm_mul_ps(vPower,V2);
		3252	vConstants = _mm_load_ps1(&g_XMSinCoefficients0.f[2]);
		3253	vConstants = _mm_mul_ps(vConstants,vPower);
		3254	vResult = _mm_add_ps(vResult,vConstants);
		3255
		3256	// V^7
		3257	vPower = _mm_mul_ps(vPower,V2);
		3258	vConstants = _mm_load_ps1(&g_XMSinCoefficients0.f[3]);
		3259	vConstants = _mm_mul_ps(vConstants,vPower);
		3260	vResult = _mm_add_ps(vResult,vConstants);
		3261
		3262	// V^9
		3263	vPower = _mm_mul_ps(vPower,V2);
		3264	vConstants = _mm_load_ps1(&g_XMSinCoefficients1.f[0]);
		3265	vConstants = _mm_mul_ps(vConstants,vPower);
		3266	vResult = _mm_add_ps(vResult,vConstants);
		3267
		3268	// V^11
		3269	vPower = _mm_mul_ps(vPower,V2);
		3270	vConstants = _mm_load_ps1(&g_XMSinCoefficients1.f[1]);
		3271	vConstants = _mm_mul_ps(vConstants,vPower);
		3272	vResult = _mm_add_ps(vResult,vConstants);
		3273
		3274	// V^13
		3275	vPower = _mm_mul_ps(vPower,V2);
		3276	vConstants = _mm_load_ps1(&g_XMSinCoefficients1.f[2]);
		3277	vConstants = _mm_mul_ps(vConstants,vPower);
		3278	vResult = _mm_add_ps(vResult,vConstants);
		3279
		3280	// V^15
		3281	vPower = _mm_mul_ps(vPower,V2);
		3282	vConstants = _mm_load_ps1(&g_XMSinCoefficients1.f[3]);
		3283	vConstants = _mm_mul_ps(vConstants,vPower);
		3284	vResult = _mm_add_ps(vResult,vConstants);
		3285
		3286	// V^17
		3287	vPower = _mm_mul_ps(vPower,V2);
		3288	vConstants = _mm_load_ps1(&g_XMSinCoefficients2.f[0]);
		3289	vConstants = _mm_mul_ps(vConstants,vPower);
		3290	vResult = _mm_add_ps(vResult,vConstants);
		3291
		3292	// V^19
		3293	vPower = _mm_mul_ps(vPower,V2);
		3294	vConstants = _mm_load_ps1(&g_XMSinCoefficients2.f[1]);
		3295	vConstants = _mm_mul_ps(vConstants,vPower);
		3296	vResult = _mm_add_ps(vResult,vConstants);
		3297
		3298	// V^21
		3299	vPower = _mm_mul_ps(vPower,V2);
		3300	vConstants = _mm_load_ps1(&g_XMSinCoefficients2.f[2]);
		3301	vConstants = _mm_mul_ps(vConstants,vPower);
		3302	vResult = _mm_add_ps(vResult,vConstants);
		3303
		3304	// V^23
		3305	vPower = _mm_mul_ps(vPower,V2);
		3306	vConstants = _mm_load_ps1(&g_XMSinCoefficients2.f[3]);
		3307	vConstants = _mm_mul_ps(vConstants,vPower);
		3308	vResult = _mm_add_ps(vResult,vConstants);
		3309	return vResult;
		3310	#else // _XM_VMX128_INTRINSICS_
		3311	#endif // _XM_VMX128_INTRINSICS_
		3312	}
		3313
		3314	//------------------------------------------------------------------------------
		3315
		3316	XMINLINE XMVECTOR XMVectorCos
		3317	(
		3318	FXMVECTOR V
		3319	)
		3320	{
		3321	#if defined(_XM_NO_INTRINSICS_)
		3322
		3323	XMVECTOR V1, V2, V4, V6, V8, V10, V12, V14, V16, V18, V20, V22;
		3324	XMVECTOR C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11;
		3325	XMVECTOR Result;
		3326
		3327	V1 = XMVectorModAngles(V);
		3328
		3329	// cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! + V^12 / 12! -
		3330	// V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI)
		3331	V2 = XMVectorMultiply(V1, V1);
		3332	V4 = XMVectorMultiply(V2, V2);
		3333	V6 = XMVectorMultiply(V4, V2);
		3334	V8 = XMVectorMultiply(V4, V4);
		3335	V10 = XMVectorMultiply(V6, V4);
		3336	V12 = XMVectorMultiply(V6, V6);
		3337	V14 = XMVectorMultiply(V8, V6);
		3338	V16 = XMVectorMultiply(V8, V8);
		3339	V18 = XMVectorMultiply(V10, V8);
		3340	V20 = XMVectorMultiply(V10, V10);
		3341	V22 = XMVectorMultiply(V12, V10);
		3342
		3343	C1 = XMVectorSplatY(g_XMCosCoefficients0.v);
		3344	C2 = XMVectorSplatZ(g_XMCosCoefficients0.v);
		3345	C3 = XMVectorSplatW(g_XMCosCoefficients0.v);
		3346	C4 = XMVectorSplatX(g_XMCosCoefficients1.v);
		3347	C5 = XMVectorSplatY(g_XMCosCoefficients1.v);
		3348	C6 = XMVectorSplatZ(g_XMCosCoefficients1.v);
		3349	C7 = XMVectorSplatW(g_XMCosCoefficients1.v);
		3350	C8 = XMVectorSplatX(g_XMCosCoefficients2.v);
		3351	C9 = XMVectorSplatY(g_XMCosCoefficients2.v);
		3352	C10 = XMVectorSplatZ(g_XMCosCoefficients2.v);
		3353	C11 = XMVectorSplatW(g_XMCosCoefficients2.v);
		3354
		3355	Result = XMVectorMultiplyAdd(C1, V2, g_XMOne.v);
		3356	Result = XMVectorMultiplyAdd(C2, V4, Result);
		3357	Result = XMVectorMultiplyAdd(C3, V6, Result);
		3358	Result = XMVectorMultiplyAdd(C4, V8, Result);
		3359	Result = XMVectorMultiplyAdd(C5, V10, Result);
		3360	Result = XMVectorMultiplyAdd(C6, V12, Result);
		3361	Result = XMVectorMultiplyAdd(C7, V14, Result);
		3362	Result = XMVectorMultiplyAdd(C8, V16, Result);
		3363	Result = XMVectorMultiplyAdd(C9, V18, Result);
		3364	Result = XMVectorMultiplyAdd(C10, V20, Result);
		3365	Result = XMVectorMultiplyAdd(C11, V22, Result);
		3366
		3367	return Result;
		3368
		3369	#elif defined(_XM_SSE_INTRINSICS_)
		3370	// Force the value within the bounds of pi
		3371	XMVECTOR V2 = XMVectorModAngles(V);
		3372	// Each on is V to the "num" power
		3373	// V2 = V1^2
		3374	V2 = _mm_mul_ps(V2,V2);
		3375	// V^2
		3376	XMVECTOR vConstants = _mm_load_ps1(&g_XMCosCoefficients0.f[1]);
		3377	vConstants = _mm_mul_ps(vConstants,V2);
		3378	XMVECTOR vResult = _mm_add_ps(vConstants,g_XMOne);
		3379
		3380	// V^4
		3381	XMVECTOR vPower = _mm_mul_ps(V2,V2);
		3382	vConstants = _mm_load_ps1(&g_XMCosCoefficients0.f[2]);
		3383	vConstants = _mm_mul_ps(vConstants,vPower);
		3384	vResult = _mm_add_ps(vResult,vConstants);
		3385
		3386	// V^6
		3387	vPower = _mm_mul_ps(vPower,V2);
		3388	vConstants = _mm_load_ps1(&g_XMCosCoefficients0.f[3]);
		3389	vConstants = _mm_mul_ps(vConstants,vPower);
		3390	vResult = _mm_add_ps(vResult,vConstants);
		3391
		3392	// V^8
		3393	vPower = _mm_mul_ps(vPower,V2);
		3394	vConstants = _mm_load_ps1(&g_XMCosCoefficients1.f[0]);
		3395	vConstants = _mm_mul_ps(vConstants,vPower);
		3396	vResult = _mm_add_ps(vResult,vConstants);
		3397
		3398	// V^10
		3399	vPower = _mm_mul_ps(vPower,V2);
		3400	vConstants = _mm_load_ps1(&g_XMCosCoefficients1.f[1]);
		3401	vConstants = _mm_mul_ps(vConstants,vPower);
		3402	vResult = _mm_add_ps(vResult,vConstants);
		3403
		3404	// V^12
		3405	vPower = _mm_mul_ps(vPower,V2);
		3406	vConstants = _mm_load_ps1(&g_XMCosCoefficients1.f[2]);
		3407	vConstants = _mm_mul_ps(vConstants,vPower);
		3408	vResult = _mm_add_ps(vResult,vConstants);
		3409
		3410	// V^14
		3411	vPower = _mm_mul_ps(vPower,V2);
		3412	vConstants = _mm_load_ps1(&g_XMCosCoefficients1.f[3]);
		3413	vConstants = _mm_mul_ps(vConstants,vPower);
		3414	vResult = _mm_add_ps(vResult,vConstants);
		3415
		3416	// V^16
		3417	vPower = _mm_mul_ps(vPower,V2);
		3418	vConstants = _mm_load_ps1(&g_XMCosCoefficients2.f[0]);
		3419	vConstants = _mm_mul_ps(vConstants,vPower);
		3420	vResult = _mm_add_ps(vResult,vConstants);
		3421
		3422	// V^18
		3423	vPower = _mm_mul_ps(vPower,V2);
		3424	vConstants = _mm_load_ps1(&g_XMCosCoefficients2.f[1]);
		3425	vConstants = _mm_mul_ps(vConstants,vPower);
		3426	vResult = _mm_add_ps(vResult,vConstants);
		3427
		3428	// V^20
		3429	vPower = _mm_mul_ps(vPower,V2);
		3430	vConstants = _mm_load_ps1(&g_XMCosCoefficients2.f[2]);
		3431	vConstants = _mm_mul_ps(vConstants,vPower);
		3432	vResult = _mm_add_ps(vResult,vConstants);
		3433
		3434	// V^22
		3435	vPower = _mm_mul_ps(vPower,V2);
		3436	vConstants = _mm_load_ps1(&g_XMCosCoefficients2.f[3]);
		3437	vConstants = _mm_mul_ps(vConstants,vPower);
		3438	vResult = _mm_add_ps(vResult,vConstants);
		3439	return vResult;
		3440	#else // _XM_VMX128_INTRINSICS_
		3441	#endif // _XM_VMX128_INTRINSICS_
		3442	}
		3443
		3444	//------------------------------------------------------------------------------
		3445
		3446	XMINLINE VOID XMVectorSinCos
		3447	(
		3448	XMVECTOR* pSin,
		3449	XMVECTOR* pCos,
		3450	FXMVECTOR V
		3451	)
		3452	{
		3453	#if defined(_XM_NO_INTRINSICS_)
		3454
		3455	XMVECTOR V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13;
		3456	XMVECTOR V14, V15, V16, V17, V18, V19, V20, V21, V22, V23;
		3457	XMVECTOR S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11;
		3458	XMVECTOR C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11;
		3459	XMVECTOR Sin, Cos;
		3460
		3461	XMASSERT(pSin);
		3462	XMASSERT(pCos);
		3463
		3464	V1 = XMVectorModAngles(V);
		3465
		3466	// sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! -
		3467	// V^15 / 15! + V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI)
		3468	// cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! + V^12 / 12! -
		3469	// V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI)
		3470
		3471	V2 = XMVectorMultiply(V1, V1);
		3472	V3 = XMVectorMultiply(V2, V1);
		3473	V4 = XMVectorMultiply(V2, V2);
		3474	V5 = XMVectorMultiply(V3, V2);
		3475	V6 = XMVectorMultiply(V3, V3);
		3476	V7 = XMVectorMultiply(V4, V3);
		3477	V8 = XMVectorMultiply(V4, V4);
		3478	V9 = XMVectorMultiply(V5, V4);
		3479	V10 = XMVectorMultiply(V5, V5);
		3480	V11 = XMVectorMultiply(V6, V5);
		3481	V12 = XMVectorMultiply(V6, V6);
		3482	V13 = XMVectorMultiply(V7, V6);
		3483	V14 = XMVectorMultiply(V7, V7);
		3484	V15 = XMVectorMultiply(V8, V7);
		3485	V16 = XMVectorMultiply(V8, V8);
		3486	V17 = XMVectorMultiply(V9, V8);
		3487	V18 = XMVectorMultiply(V9, V9);
		3488	V19 = XMVectorMultiply(V10, V9);
		3489	V20 = XMVectorMultiply(V10, V10);
		3490	V21 = XMVectorMultiply(V11, V10);
		3491	V22 = XMVectorMultiply(V11, V11);
		3492	V23 = XMVectorMultiply(V12, V11);
		3493
		3494	S1 = XMVectorSplatY(g_XMSinCoefficients0.v);
		3495	S2 = XMVectorSplatZ(g_XMSinCoefficients0.v);
		3496	S3 = XMVectorSplatW(g_XMSinCoefficients0.v);
		3497	S4 = XMVectorSplatX(g_XMSinCoefficients1.v);
		3498	S5 = XMVectorSplatY(g_XMSinCoefficients1.v);
		3499	S6 = XMVectorSplatZ(g_XMSinCoefficients1.v);
		3500	S7 = XMVectorSplatW(g_XMSinCoefficients1.v);
		3501	S8 = XMVectorSplatX(g_XMSinCoefficients2.v);
		3502	S9 = XMVectorSplatY(g_XMSinCoefficients2.v);
		3503	S10 = XMVectorSplatZ(g_XMSinCoefficients2.v);
		3504	S11 = XMVectorSplatW(g_XMSinCoefficients2.v);
		3505
		3506	C1 = XMVectorSplatY(g_XMCosCoefficients0.v);
		3507	C2 = XMVectorSplatZ(g_XMCosCoefficients0.v);
		3508	C3 = XMVectorSplatW(g_XMCosCoefficients0.v);
		3509	C4 = XMVectorSplatX(g_XMCosCoefficients1.v);
		3510	C5 = XMVectorSplatY(g_XMCosCoefficients1.v);
		3511	C6 = XMVectorSplatZ(g_XMCosCoefficients1.v);
		3512	C7 = XMVectorSplatW(g_XMCosCoefficients1.v);
		3513	C8 = XMVectorSplatX(g_XMCosCoefficients2.v);
		3514	C9 = XMVectorSplatY(g_XMCosCoefficients2.v);
		3515	C10 = XMVectorSplatZ(g_XMCosCoefficients2.v);
		3516	C11 = XMVectorSplatW(g_XMCosCoefficients2.v);
		3517
		3518	Sin = XMVectorMultiplyAdd(S1, V3, V1);
		3519	Sin = XMVectorMultiplyAdd(S2, V5, Sin);
		3520	Sin = XMVectorMultiplyAdd(S3, V7, Sin);
		3521	Sin = XMVectorMultiplyAdd(S4, V9, Sin);
		3522	Sin = XMVectorMultiplyAdd(S5, V11, Sin);
		3523	Sin = XMVectorMultiplyAdd(S6, V13, Sin);
		3524	Sin = XMVectorMultiplyAdd(S7, V15, Sin);
		3525	Sin = XMVectorMultiplyAdd(S8, V17, Sin);
		3526	Sin = XMVectorMultiplyAdd(S9, V19, Sin);
		3527	Sin = XMVectorMultiplyAdd(S10, V21, Sin);
		3528	Sin = XMVectorMultiplyAdd(S11, V23, Sin);
		3529
		3530	Cos = XMVectorMultiplyAdd(C1, V2, g_XMOne.v);
		3531	Cos = XMVectorMultiplyAdd(C2, V4, Cos);
		3532	Cos = XMVectorMultiplyAdd(C3, V6, Cos);
		3533	Cos = XMVectorMultiplyAdd(C4, V8, Cos);
		3534	Cos = XMVectorMultiplyAdd(C5, V10, Cos);
		3535	Cos = XMVectorMultiplyAdd(C6, V12, Cos);
		3536	Cos = XMVectorMultiplyAdd(C7, V14, Cos);
		3537	Cos = XMVectorMultiplyAdd(C8, V16, Cos);
		3538	Cos = XMVectorMultiplyAdd(C9, V18, Cos);
		3539	Cos = XMVectorMultiplyAdd(C10, V20, Cos);
		3540	Cos = XMVectorMultiplyAdd(C11, V22, Cos);
		3541
		3542	*pSin = Sin;
		3543	*pCos = Cos;
		3544
		3545	#elif defined(_XM_SSE_INTRINSICS_)
		3546	XMASSERT(pSin);
		3547	XMASSERT(pCos);
		3548	XMVECTOR V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13;
		3549	XMVECTOR V14, V15, V16, V17, V18, V19, V20, V21, V22, V23;
		3550	XMVECTOR S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11;
		3551	XMVECTOR C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11;
		3552	XMVECTOR Sin, Cos;
		3553
		3554	V1 = XMVectorModAngles(V);
		3555
		3556	// sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! -
		3557	// V^15 / 15! + V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI)
		3558	// cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! + V^12 / 12! -
		3559	// V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI)
		3560
		3561	V2 = XMVectorMultiply(V1, V1);
		3562	V3 = XMVectorMultiply(V2, V1);
		3563	V4 = XMVectorMultiply(V2, V2);
		3564	V5 = XMVectorMultiply(V3, V2);
		3565	V6 = XMVectorMultiply(V3, V3);
		3566	V7 = XMVectorMultiply(V4, V3);
		3567	V8 = XMVectorMultiply(V4, V4);
		3568	V9 = XMVectorMultiply(V5, V4);
		3569	V10 = XMVectorMultiply(V5, V5);
		3570	V11 = XMVectorMultiply(V6, V5);
		3571	V12 = XMVectorMultiply(V6, V6);
		3572	V13 = XMVectorMultiply(V7, V6);
		3573	V14 = XMVectorMultiply(V7, V7);
		3574	V15 = XMVectorMultiply(V8, V7);
		3575	V16 = XMVectorMultiply(V8, V8);
		3576	V17 = XMVectorMultiply(V9, V8);
		3577	V18 = XMVectorMultiply(V9, V9);
		3578	V19 = XMVectorMultiply(V10, V9);
		3579	V20 = XMVectorMultiply(V10, V10);
		3580	V21 = XMVectorMultiply(V11, V10);
		3581	V22 = XMVectorMultiply(V11, V11);
		3582	V23 = XMVectorMultiply(V12, V11);
		3583
		3584	S1 = _mm_load_ps1(&g_XMSinCoefficients0.f[1]);
		3585	S2 = _mm_load_ps1(&g_XMSinCoefficients0.f[2]);
		3586	S3 = _mm_load_ps1(&g_XMSinCoefficients0.f[3]);
		3587	S4 = _mm_load_ps1(&g_XMSinCoefficients1.f[0]);
		3588	S5 = _mm_load_ps1(&g_XMSinCoefficients1.f[1]);
		3589	S6 = _mm_load_ps1(&g_XMSinCoefficients1.f[2]);
		3590	S7 = _mm_load_ps1(&g_XMSinCoefficients1.f[3]);
		3591	S8 = _mm_load_ps1(&g_XMSinCoefficients2.f[0]);
		3592	S9 = _mm_load_ps1(&g_XMSinCoefficients2.f[1]);
		3593	S10 = _mm_load_ps1(&g_XMSinCoefficients2.f[2]);
		3594	S11 = _mm_load_ps1(&g_XMSinCoefficients2.f[3]);
		3595
		3596	C1 = _mm_load_ps1(&g_XMCosCoefficients0.f[1]);
		3597	C2 = _mm_load_ps1(&g_XMCosCoefficients0.f[2]);
		3598	C3 = _mm_load_ps1(&g_XMCosCoefficients0.f[3]);
		3599	C4 = _mm_load_ps1(&g_XMCosCoefficients1.f[0]);
		3600	C5 = _mm_load_ps1(&g_XMCosCoefficients1.f[1]);
		3601	C6 = _mm_load_ps1(&g_XMCosCoefficients1.f[2]);
		3602	C7 = _mm_load_ps1(&g_XMCosCoefficients1.f[3]);
		3603	C8 = _mm_load_ps1(&g_XMCosCoefficients2.f[0]);
		3604	C9 = _mm_load_ps1(&g_XMCosCoefficients2.f[1]);
		3605	C10 = _mm_load_ps1(&g_XMCosCoefficients2.f[2]);
		3606	C11 = _mm_load_ps1(&g_XMCosCoefficients2.f[3]);
		3607
		3608	S1 = _mm_mul_ps(S1,V3);
		3609	Sin = _mm_add_ps(S1,V1);
		3610	Sin = XMVectorMultiplyAdd(S2, V5, Sin);
		3611	Sin = XMVectorMultiplyAdd(S3, V7, Sin);
		3612	Sin = XMVectorMultiplyAdd(S4, V9, Sin);
		3613	Sin = XMVectorMultiplyAdd(S5, V11, Sin);
		3614	Sin = XMVectorMultiplyAdd(S6, V13, Sin);
		3615	Sin = XMVectorMultiplyAdd(S7, V15, Sin);
		3616	Sin = XMVectorMultiplyAdd(S8, V17, Sin);
		3617	Sin = XMVectorMultiplyAdd(S9, V19, Sin);
		3618	Sin = XMVectorMultiplyAdd(S10, V21, Sin);
		3619	Sin = XMVectorMultiplyAdd(S11, V23, Sin);
		3620
		3621	Cos = _mm_mul_ps(C1,V2);
		3622	Cos = _mm_add_ps(Cos,g_XMOne);
		3623	Cos = XMVectorMultiplyAdd(C2, V4, Cos);
		3624	Cos = XMVectorMultiplyAdd(C3, V6, Cos);
		3625	Cos = XMVectorMultiplyAdd(C4, V8, Cos);
		3626	Cos = XMVectorMultiplyAdd(C5, V10, Cos);
		3627	Cos = XMVectorMultiplyAdd(C6, V12, Cos);
		3628	Cos = XMVectorMultiplyAdd(C7, V14, Cos);
		3629	Cos = XMVectorMultiplyAdd(C8, V16, Cos);
		3630	Cos = XMVectorMultiplyAdd(C9, V18, Cos);
		3631	Cos = XMVectorMultiplyAdd(C10, V20, Cos);
		3632	Cos = XMVectorMultiplyAdd(C11, V22, Cos);
		3633
		3634	*pSin = Sin;
		3635	*pCos = Cos;
		3636	#else // _XM_VMX128_INTRINSICS_
		3637	#endif // _XM_VMX128_INTRINSICS_
		3638	}
		3639
		3640	//------------------------------------------------------------------------------
		3641
		3642	XMINLINE XMVECTOR XMVectorTan
		3643	(
		3644	FXMVECTOR V
		3645	)
		3646	{
		3647	#if defined(_XM_NO_INTRINSICS_)
		3648
		3649	// Cody and Waite algorithm to compute tangent.
		3650
		3651	XMVECTOR VA, VB, VC, VC2;
		3652	XMVECTOR T0, T1, T2, T3, T4, T5, T6, T7;
		3653	XMVECTOR C0, C1, TwoDivPi, Epsilon;
		3654	XMVECTOR N, D;
		3655	XMVECTOR R0, R1;
		3656	XMVECTOR VIsZero, VCNearZero, VBIsEven;
		3657	XMVECTOR Zero;
		3658	XMVECTOR Result;
		3659	UINT i;
		3660	static CONST XMVECTOR TanCoefficients0 = {1.0f, -4.667168334e-1f, 2.566383229e-2f, -3.118153191e-4f};
		3661	static CONST XMVECTOR TanCoefficients1 = {4.981943399e-7f, -1.333835001e-1f, 3.424887824e-3f, -1.786170734e-5f};
		3662	static CONST XMVECTOR TanConstants = {1.570796371f, 6.077100628e-11f, 0.000244140625f, 2.0f / XM_PI};
		3663	static CONST XMVECTORU32 Mask = {0x1, 0x1, 0x1, 0x1};
		3664
		3665	TwoDivPi = XMVectorSplatW(TanConstants);
		3666
		3667	Zero = XMVectorZero();
		3668
		3669	C0 = XMVectorSplatX(TanConstants);
		3670	C1 = XMVectorSplatY(TanConstants);
		3671	Epsilon = XMVectorSplatZ(TanConstants);
		3672
		3673	VA = XMVectorMultiply(V, TwoDivPi);
		3674
		3675	VA = XMVectorRound(VA);
		3676
		3677	VC = XMVectorNegativeMultiplySubtract(VA, C0, V);
		3678
		3679	VB = XMVectorAbs(VA);
		3680
		3681	VC = XMVectorNegativeMultiplySubtract(VA, C1, VC);
		3682
		3683	for (i = 0; i < 4; i++)
		3684	{
		3685	VB.vector4_u32[i] = (UINT)VB.vector4_f32[i];
		3686	}
		3687
		3688	VC2 = XMVectorMultiply(VC, VC);
		3689
		3690	T7 = XMVectorSplatW(TanCoefficients1);
		3691	T6 = XMVectorSplatZ(TanCoefficients1);
		3692	T4 = XMVectorSplatX(TanCoefficients1);
		3693	T3 = XMVectorSplatW(TanCoefficients0);
		3694	T5 = XMVectorSplatY(TanCoefficients1);
		3695	T2 = XMVectorSplatZ(TanCoefficients0);
		3696	T1 = XMVectorSplatY(TanCoefficients0);
		3697	T0 = XMVectorSplatX(TanCoefficients0);
		3698
		3699	VBIsEven = XMVectorAndInt(VB, Mask.v);
		3700	VBIsEven = XMVectorEqualInt(VBIsEven, Zero);
		3701
		3702	N = XMVectorMultiplyAdd(VC2, T7, T6);
		3703	D = XMVectorMultiplyAdd(VC2, T4, T3);
		3704	N = XMVectorMultiplyAdd(VC2, N, T5);
		3705	D = XMVectorMultiplyAdd(VC2, D, T2);
		3706	N = XMVectorMultiply(VC2, N);
		3707	D = XMVectorMultiplyAdd(VC2, D, T1);
		3708	N = XMVectorMultiplyAdd(VC, N, VC);
		3709	VCNearZero = XMVectorInBounds(VC, Epsilon);
		3710	D = XMVectorMultiplyAdd(VC2, D, T0);
		3711
		3712	N = XMVectorSelect(N, VC, VCNearZero);
		3713	D = XMVectorSelect(D, g_XMOne.v, VCNearZero);
		3714
		3715	R0 = XMVectorNegate(N);
		3716	R1 = XMVectorReciprocal(D);
		3717	R0 = XMVectorReciprocal(R0);
		3718	R1 = XMVectorMultiply(N, R1);
		3719	R0 = XMVectorMultiply(D, R0);
		3720
		3721	VIsZero = XMVectorEqual(V, Zero);
		3722
		3723	Result = XMVectorSelect(R0, R1, VBIsEven);
		3724
		3725	Result = XMVectorSelect(Result, Zero, VIsZero);
		3726
		3727	return Result;
		3728
		3729	#elif defined(_XM_SSE_INTRINSICS_)
		3730	// Cody and Waite algorithm to compute tangent.
		3731
		3732	XMVECTOR VA, VB, VC, VC2;
		3733	XMVECTOR T0, T1, T2, T3, T4, T5, T6, T7;
		3734	XMVECTOR C0, C1, TwoDivPi, Epsilon;
		3735	XMVECTOR N, D;
		3736	XMVECTOR R0, R1;
		3737	XMVECTOR VIsZero, VCNearZero, VBIsEven;
		3738	XMVECTOR Zero;
		3739	XMVECTOR Result;
		3740	static CONST XMVECTORF32 TanCoefficients0 = {1.0f, -4.667168334e-1f, 2.566383229e-2f, -3.118153191e-4f};
		3741	static CONST XMVECTORF32 TanCoefficients1 = {4.981943399e-7f, -1.333835001e-1f, 3.424887824e-3f, -1.786170734e-5f};
		3742	static CONST XMVECTORF32 TanConstants = {1.570796371f, 6.077100628e-11f, 0.000244140625f, 2.0f / XM_PI};
		3743	static CONST XMVECTORI32 Mask = {0x1, 0x1, 0x1, 0x1};
		3744
		3745	TwoDivPi = XMVectorSplatW(TanConstants);
		3746
		3747	Zero = XMVectorZero();
		3748
		3749	C0 = XMVectorSplatX(TanConstants);
		3750	C1 = XMVectorSplatY(TanConstants);
		3751	Epsilon = XMVectorSplatZ(TanConstants);
		3752
		3753	VA = XMVectorMultiply(V, TwoDivPi);
		3754
		3755	VA = XMVectorRound(VA);
		3756
		3757	VC = XMVectorNegativeMultiplySubtract(VA, C0, V);
		3758
		3759	VB = XMVectorAbs(VA);
		3760
		3761	VC = XMVectorNegativeMultiplySubtract(VA, C1, VC);
		3762
		3763	reinterpret_cast<__m128i *>(&VB)[0] = _mm_cvttps_epi32(VB);
		3764
		3765	VC2 = XMVectorMultiply(VC, VC);
		3766
		3767	T7 = XMVectorSplatW(TanCoefficients1);
		3768	T6 = XMVectorSplatZ(TanCoefficients1);
		3769	T4 = XMVectorSplatX(TanCoefficients1);
		3770	T3 = XMVectorSplatW(TanCoefficients0);
		3771	T5 = XMVectorSplatY(TanCoefficients1);
		3772	T2 = XMVectorSplatZ(TanCoefficients0);
		3773	T1 = XMVectorSplatY(TanCoefficients0);
		3774	T0 = XMVectorSplatX(TanCoefficients0);
		3775
		3776	VBIsEven = XMVectorAndInt(VB,Mask);
		3777	VBIsEven = XMVectorEqualInt(VBIsEven, Zero);
		3778
		3779	N = XMVectorMultiplyAdd(VC2, T7, T6);
		3780	D = XMVectorMultiplyAdd(VC2, T4, T3);
		3781	N = XMVectorMultiplyAdd(VC2, N, T5);
		3782	D = XMVectorMultiplyAdd(VC2, D, T2);
		3783	N = XMVectorMultiply(VC2, N);
		3784	D = XMVectorMultiplyAdd(VC2, D, T1);
		3785	N = XMVectorMultiplyAdd(VC, N, VC);
		3786	VCNearZero = XMVectorInBounds(VC, Epsilon);
		3787	D = XMVectorMultiplyAdd(VC2, D, T0);
		3788
		3789	N = XMVectorSelect(N, VC, VCNearZero);
		3790	D = XMVectorSelect(D, g_XMOne, VCNearZero);
		3791	R0 = XMVectorNegate(N);
		3792	R1 = _mm_div_ps(N,D);
		3793	R0 = _mm_div_ps(D,R0);
		3794	VIsZero = XMVectorEqual(V, Zero);
		3795	Result = XMVectorSelect(R0, R1, VBIsEven);
		3796	Result = XMVectorSelect(Result, Zero, VIsZero);
		3797
		3798	return Result;
		3799
		3800	#else // _XM_VMX128_INTRINSICS_
		3801	#endif // _XM_VMX128_INTRINSICS_
		3802	}
		3803
		3804	//------------------------------------------------------------------------------
		3805
		3806	XMINLINE XMVECTOR XMVectorSinH
		3807	(
		3808	FXMVECTOR V
		3809	)
		3810	{
		3811	#if defined(_XM_NO_INTRINSICS_)
		3812
		3813	XMVECTOR V1, V2;
		3814	XMVECTOR E1, E2;
		3815	XMVECTOR Result;
		3816	static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
		3817
		3818	V1 = XMVectorMultiplyAdd(V, Scale.v, g_XMNegativeOne.v);
		3819	V2 = XMVectorNegativeMultiplySubtract(V, Scale.v, g_XMNegativeOne.v);
		3820
		3821	E1 = XMVectorExp(V1);
		3822	E2 = XMVectorExp(V2);
		3823
		3824	Result = XMVectorSubtract(E1, E2);
		3825
		3826	return Result;
		3827
		3828	#elif defined(_XM_SSE_INTRINSICS_)
		3829	XMVECTOR V1, V2;
		3830	XMVECTOR E1, E2;
		3831	XMVECTOR Result;
		3832	static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
		3833
		3834	V1 = _mm_mul_ps(V, Scale);
		3835	V1 = _mm_add_ps(V1,g_XMNegativeOne);
		3836	V2 = _mm_mul_ps(V, Scale);
		3837	V2 = _mm_sub_ps(g_XMNegativeOne,V2);
		3838	E1 = XMVectorExp(V1);
		3839	E2 = XMVectorExp(V2);
		3840
		3841	Result = _mm_sub_ps(E1, E2);
		3842
		3843	return Result;
		3844	#else // _XM_VMX128_INTRINSICS_
		3845	#endif // _XM_VMX128_INTRINSICS_
		3846	}
		3847
		3848	//------------------------------------------------------------------------------
		3849
		3850	XMINLINE XMVECTOR XMVectorCosH
		3851	(
		3852	FXMVECTOR V
		3853	)
		3854	{
		3855	#if defined(_XM_NO_INTRINSICS_)
		3856
		3857	XMVECTOR V1, V2;
		3858	XMVECTOR E1, E2;
		3859	XMVECTOR Result;
		3860	static CONST XMVECTOR Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
		3861
		3862	V1 = XMVectorMultiplyAdd(V, Scale, g_XMNegativeOne.v);
		3863	V2 = XMVectorNegativeMultiplySubtract(V, Scale, g_XMNegativeOne.v);
		3864
		3865	E1 = XMVectorExp(V1);
		3866	E2 = XMVectorExp(V2);
		3867
		3868	Result = XMVectorAdd(E1, E2);
		3869
		3870	return Result;
		3871
		3872	#elif defined(_XM_SSE_INTRINSICS_)
		3873	XMVECTOR V1, V2;
		3874	XMVECTOR E1, E2;
		3875	XMVECTOR Result;
		3876	static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
		3877
		3878	V1 = _mm_mul_ps(V,Scale);
		3879	V1 = _mm_add_ps(V1,g_XMNegativeOne);
		3880	V2 = _mm_mul_ps(V, Scale);
		3881	V2 = _mm_sub_ps(g_XMNegativeOne,V2);
		3882	E1 = XMVectorExp(V1);
		3883	E2 = XMVectorExp(V2);
		3884	Result = _mm_add_ps(E1, E2);
		3885	return Result;
		3886	#else // _XM_VMX128_INTRINSICS_
		3887	#endif // _XM_VMX128_INTRINSICS_
		3888	}
		3889
		3890	//------------------------------------------------------------------------------
		3891
		3892	XMINLINE XMVECTOR XMVectorTanH
		3893	(
		3894	FXMVECTOR V
		3895	)
		3896	{
		3897	#if defined(_XM_NO_INTRINSICS_)
		3898
		3899	XMVECTOR E;
		3900	XMVECTOR Result;
		3901	static CONST XMVECTORF32 Scale = {2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f}; // 2.0f / ln(2.0f)
		3902
		3903	E = XMVectorMultiply(V, Scale.v);
		3904	E = XMVectorExp(E);
		3905	E = XMVectorMultiplyAdd(E, g_XMOneHalf.v, g_XMOneHalf.v);
		3906	E = XMVectorReciprocal(E);
		3907
		3908	Result = XMVectorSubtract(g_XMOne.v, E);
		3909
		3910	return Result;
		3911
		3912	#elif defined(_XM_SSE_INTRINSICS_)
		3913	static CONST XMVECTORF32 Scale = {2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f}; // 2.0f / ln(2.0f)
		3914
		3915	XMVECTOR E = _mm_mul_ps(V, Scale);
		3916	E = XMVectorExp(E);
		3917	E = _mm_mul_ps(E,g_XMOneHalf);
		3918	E = _mm_add_ps(E,g_XMOneHalf);
		3919	E = XMVectorReciprocal(E);
		3920	E = _mm_sub_ps(g_XMOne, E);
		3921	return E;
		3922	#else // _XM_VMX128_INTRINSICS_
		3923	#endif // _XM_VMX128_INTRINSICS_
		3924	}
		3925
		3926	//------------------------------------------------------------------------------
		3927
		3928	XMINLINE XMVECTOR XMVectorASin
		3929	(
		3930	FXMVECTOR V
		3931	)
		3932	{
		3933	#if defined(_XM_NO_INTRINSICS_)
		3934
		3935	XMVECTOR V2, V3, AbsV;
		3936	XMVECTOR C0, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11;
		3937	XMVECTOR R0, R1, R2, R3, R4;
		3938	XMVECTOR OneMinusAbsV;
		3939	XMVECTOR Rsq;
		3940	XMVECTOR Result;
		3941	static CONST XMVECTOR OnePlusEpsilon = {1.00000011921f, 1.00000011921f, 1.00000011921f, 1.00000011921f};
		3942
		3943	// asin(V) = V * (C0 + C1 * V + C2 * V^2 + C3 * V^3 + C4 * V^4 + C5 * V^5) + (1 - V) * rsq(1 - V) *
		3944	// V * (C6 + C7 * V + C8 * V^2 + C9 * V^3 + C10 * V^4 + C11 * V^5)
		3945
		3946	AbsV = XMVectorAbs(V);
		3947
		3948	V2 = XMVectorMultiply(V, V);
		3949	V3 = XMVectorMultiply(V2, AbsV);
		3950
		3951	R4 = XMVectorNegativeMultiplySubtract(AbsV, V, V);
		3952
		3953	OneMinusAbsV = XMVectorSubtract(OnePlusEpsilon, AbsV);
		3954	Rsq = XMVectorReciprocalSqrt(OneMinusAbsV);
		3955
		3956	C0 = XMVectorSplatX(g_XMASinCoefficients0.v);
		3957	C1 = XMVectorSplatY(g_XMASinCoefficients0.v);
		3958	C2 = XMVectorSplatZ(g_XMASinCoefficients0.v);
		3959	C3 = XMVectorSplatW(g_XMASinCoefficients0.v);
		3960
		3961	C4 = XMVectorSplatX(g_XMASinCoefficients1.v);
		3962	C5 = XMVectorSplatY(g_XMASinCoefficients1.v);
		3963	C6 = XMVectorSplatZ(g_XMASinCoefficients1.v);
		3964	C7 = XMVectorSplatW(g_XMASinCoefficients1.v);
		3965
		3966	C8 = XMVectorSplatX(g_XMASinCoefficients2.v);
		3967	C9 = XMVectorSplatY(g_XMASinCoefficients2.v);
		3968	C10 = XMVectorSplatZ(g_XMASinCoefficients2.v);
		3969	C11 = XMVectorSplatW(g_XMASinCoefficients2.v);
		3970
		3971	R0 = XMVectorMultiplyAdd(C3, AbsV, C7);
		3972	R1 = XMVectorMultiplyAdd(C1, AbsV, C5);
		3973	R2 = XMVectorMultiplyAdd(C2, AbsV, C6);
		3974	R3 = XMVectorMultiplyAdd(C0, AbsV, C4);
		3975
		3976	R0 = XMVectorMultiplyAdd(R0, AbsV, C11);
		3977	R1 = XMVectorMultiplyAdd(R1, AbsV, C9);
		3978	R2 = XMVectorMultiplyAdd(R2, AbsV, C10);
		3979	R3 = XMVectorMultiplyAdd(R3, AbsV, C8);
		3980
		3981	R0 = XMVectorMultiplyAdd(R2, V3, R0);
		3982	R1 = XMVectorMultiplyAdd(R3, V3, R1);
		3983
		3984	R0 = XMVectorMultiply(V, R0);
		3985	R1 = XMVectorMultiply(R4, R1);
		3986
		3987	Result = XMVectorMultiplyAdd(R1, Rsq, R0);
		3988
		3989	return Result;
		3990
		3991	#elif defined(_XM_SSE_INTRINSICS_)
		3992	static CONST XMVECTORF32 OnePlusEpsilon = {1.00000011921f, 1.00000011921f, 1.00000011921f, 1.00000011921f};
		3993
		3994	// asin(V) = V * (C0 + C1 * V + C2 * V^2 + C3 * V^3 + C4 * V^4 + C5 * V^5) + (1 - V) * rsq(1 - V) *
		3995	// V * (C6 + C7 * V + C8 * V^2 + C9 * V^3 + C10 * V^4 + C11 * V^5)
		3996	// Get abs(V)
		3997	XMVECTOR vAbsV = _mm_setzero_ps();
		3998	vAbsV = _mm_sub_ps(vAbsV,V);
		3999	vAbsV = _mm_max_ps(vAbsV,V);
		4000
		4001	XMVECTOR R0 = vAbsV;
		4002	XMVECTOR vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[3]);
		4003	R0 = _mm_mul_ps(R0,vConstants);
		4004	vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[3]);
		4005	R0 = _mm_add_ps(R0,vConstants);
		4006
		4007	XMVECTOR R1 = vAbsV;
		4008	vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[1]);
		4009	R1 = _mm_mul_ps(R1,vConstants);
		4010	vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[1]);
		4011	R1 = _mm_add_ps(R1, vConstants);
		4012
		4013	XMVECTOR R2 = vAbsV;
		4014	vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[2]);
		4015	R2 = _mm_mul_ps(R2,vConstants);
		4016	vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[2]);
		4017	R2 = _mm_add_ps(R2, vConstants);
		4018
		4019	XMVECTOR R3 = vAbsV;
		4020	vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[0]);
		4021	R3 = _mm_mul_ps(R3,vConstants);
		4022	vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[0]);
		4023	R3 = _mm_add_ps(R3, vConstants);
		4024
		4025	vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[3]);
		4026	R0 = _mm_mul_ps(R0,vAbsV);
		4027	R0 = _mm_add_ps(R0,vConstants);
		4028
		4029	vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[1]);
		4030	R1 = _mm_mul_ps(R1,vAbsV);
		4031	R1 = _mm_add_ps(R1,vConstants);
		4032
		4033	vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[2]);
		4034	R2 = _mm_mul_ps(R2,vAbsV);
		4035	R2 = _mm_add_ps(R2,vConstants);
		4036
		4037	vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[0]);
		4038	R3 = _mm_mul_ps(R3,vAbsV);
		4039	R3 = _mm_add_ps(R3,vConstants);
		4040
		4041	// V3 = V^3
		4042	vConstants = _mm_mul_ps(V,V);
		4043	vConstants = _mm_mul_ps(vConstants, vAbsV);
		4044	// Mul by V^3
		4045	R2 = _mm_mul_ps(R2,vConstants);
		4046	R3 = _mm_mul_ps(R3,vConstants);
		4047	// Merge the results
		4048	R0 = _mm_add_ps(R0,R2);
		4049	R1 = _mm_add_ps(R1,R3);
		4050
		4051	R0 = _mm_mul_ps(R0,V);
		4052	// vConstants = V-(V^2 retaining sign)
		4053	vConstants = _mm_mul_ps(vAbsV, V);
		4054	vConstants = _mm_sub_ps(V,vConstants);
		4055	R1 = _mm_mul_ps(R1,vConstants);
		4056	vConstants = _mm_sub_ps(OnePlusEpsilon,vAbsV);
		4057	// Do NOT use rsqrt/mul. This needs the precision
		4058	vConstants = _mm_sqrt_ps(vConstants);
		4059	R1 = _mm_div_ps(R1,vConstants);
		4060	R0 = _mm_add_ps(R0,R1);
		4061	return R0;
		4062	#else // _XM_VMX128_INTRINSICS_
		4063	#endif // _XM_VMX128_INTRINSICS_
		4064	}
		4065
		4066	//------------------------------------------------------------------------------
		4067
		4068	XMINLINE XMVECTOR XMVectorACos
		4069	(
		4070	FXMVECTOR V
		4071	)
		4072	{
		4073	#if defined(_XM_NO_INTRINSICS_)
		4074
		4075	XMVECTOR V2, V3, AbsV;
		4076	XMVECTOR C0, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11;
		4077	XMVECTOR R0, R1, R2, R3, R4;
		4078	XMVECTOR OneMinusAbsV;
		4079	XMVECTOR Rsq;
		4080	XMVECTOR Result;
		4081	static CONST XMVECTOR OnePlusEpsilon = {1.00000011921f, 1.00000011921f, 1.00000011921f, 1.00000011921f};
		4082
		4083	// acos(V) = PI / 2 - asin(V)
		4084
		4085	AbsV = XMVectorAbs(V);
		4086
		4087	V2 = XMVectorMultiply(V, V);
		4088	V3 = XMVectorMultiply(V2, AbsV);
		4089
		4090	R4 = XMVectorNegativeMultiplySubtract(AbsV, V, V);
		4091
		4092	OneMinusAbsV = XMVectorSubtract(OnePlusEpsilon, AbsV);
		4093	Rsq = XMVectorReciprocalSqrt(OneMinusAbsV);
		4094
		4095	C0 = XMVectorSplatX(g_XMASinCoefficients0.v);
		4096	C1 = XMVectorSplatY(g_XMASinCoefficients0.v);
		4097	C2 = XMVectorSplatZ(g_XMASinCoefficients0.v);
		4098	C3 = XMVectorSplatW(g_XMASinCoefficients0.v);
		4099
		4100	C4 = XMVectorSplatX(g_XMASinCoefficients1.v);
		4101	C5 = XMVectorSplatY(g_XMASinCoefficients1.v);
		4102	C6 = XMVectorSplatZ(g_XMASinCoefficients1.v);
		4103	C7 = XMVectorSplatW(g_XMASinCoefficients1.v);
		4104
		4105	C8 = XMVectorSplatX(g_XMASinCoefficients2.v);
		4106	C9 = XMVectorSplatY(g_XMASinCoefficients2.v);
		4107	C10 = XMVectorSplatZ(g_XMASinCoefficients2.v);
		4108	C11 = XMVectorSplatW(g_XMASinCoefficients2.v);
		4109
		4110	R0 = XMVectorMultiplyAdd(C3, AbsV, C7);
		4111	R1 = XMVectorMultiplyAdd(C1, AbsV, C5);
		4112	R2 = XMVectorMultiplyAdd(C2, AbsV, C6);
		4113	R3 = XMVectorMultiplyAdd(C0, AbsV, C4);
		4114
		4115	R0 = XMVectorMultiplyAdd(R0, AbsV, C11);
		4116	R1 = XMVectorMultiplyAdd(R1, AbsV, C9);
		4117	R2 = XMVectorMultiplyAdd(R2, AbsV, C10);
		4118	R3 = XMVectorMultiplyAdd(R3, AbsV, C8);
		4119
		4120	R0 = XMVectorMultiplyAdd(R2, V3, R0);
		4121	R1 = XMVectorMultiplyAdd(R3, V3, R1);
		4122
		4123	R0 = XMVectorMultiply(V, R0);
		4124	R1 = XMVectorMultiply(R4, R1);
		4125
		4126	Result = XMVectorMultiplyAdd(R1, Rsq, R0);
		4127
		4128	Result = XMVectorSubtract(g_XMHalfPi.v, Result);
		4129
		4130	return Result;
		4131
		4132	#elif defined(_XM_SSE_INTRINSICS_)
		4133	static CONST XMVECTORF32 OnePlusEpsilon = {1.00000011921f, 1.00000011921f, 1.00000011921f, 1.00000011921f};
		4134	// Uses only 6 registers for good code on x86 targets
		4135	// acos(V) = PI / 2 - asin(V)
		4136	// Get abs(V)
		4137	XMVECTOR vAbsV = _mm_setzero_ps();
		4138	vAbsV = _mm_sub_ps(vAbsV,V);
		4139	vAbsV = _mm_max_ps(vAbsV,V);
		4140	// Perform the series in precision groups to
		4141	// retain precision across 20 bits. (3 bits of imprecision due to operations)
		4142	XMVECTOR R0 = vAbsV;
		4143	XMVECTOR vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[3]);
		4144	R0 = _mm_mul_ps(R0,vConstants);
		4145	vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[3]);
		4146	R0 = _mm_add_ps(R0,vConstants);
		4147	R0 = _mm_mul_ps(R0,vAbsV);
		4148	vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[3]);
		4149	R0 = _mm_add_ps(R0,vConstants);
		4150
		4151	XMVECTOR R1 = vAbsV;
		4152	vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[1]);
		4153	R1 = _mm_mul_ps(R1,vConstants);
		4154	vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[1]);
		4155	R1 = _mm_add_ps(R1,vConstants);
		4156	R1 = _mm_mul_ps(R1, vAbsV);
		4157	vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[1]);
		4158	R1 = _mm_add_ps(R1,vConstants);
		4159
		4160	XMVECTOR R2 = vAbsV;
		4161	vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[2]);
		4162	R2 = _mm_mul_ps(R2,vConstants);
		4163	vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[2]);
		4164	R2 = _mm_add_ps(R2,vConstants);
		4165	R2 = _mm_mul_ps(R2, vAbsV);
		4166	vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[2]);
		4167	R2 = _mm_add_ps(R2,vConstants);
		4168
		4169	XMVECTOR R3 = vAbsV;
		4170	vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[0]);
		4171	R3 = _mm_mul_ps(R3,vConstants);
		4172	vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[0]);
		4173	R3 = _mm_add_ps(R3,vConstants);
		4174	R3 = _mm_mul_ps(R3, vAbsV);
		4175	vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[0]);
		4176	R3 = _mm_add_ps(R3,vConstants);
		4177
		4178	// vConstants = V^3
		4179	vConstants = _mm_mul_ps(V,V);
		4180	vConstants = _mm_mul_ps(vConstants,vAbsV);
		4181	R2 = _mm_mul_ps(R2,vConstants);
		4182	R3 = _mm_mul_ps(R3,vConstants);
		4183	// Add the pair of values together here to retain
		4184	// as much precision as possible
		4185	R0 = _mm_add_ps(R0,R2);
		4186	R1 = _mm_add_ps(R1,R3);
		4187
		4188	R0 = _mm_mul_ps(R0,V);
		4189	// vConstants = V-(V*abs(V))
		4190	vConstants = _mm_mul_ps(V,vAbsV);
		4191	vConstants = _mm_sub_ps(V,vConstants);
		4192	R1 = _mm_mul_ps(R1,vConstants);
		4193	// Episilon exists to allow 1.0 as an answer
		4194	vConstants = _mm_sub_ps(OnePlusEpsilon, vAbsV);
		4195	// Use sqrt instead of rsqrt for precision
		4196	vConstants = _mm_sqrt_ps(vConstants);
		4197	R1 = _mm_div_ps(R1,vConstants);
		4198	R1 = _mm_add_ps(R1,R0);
		4199	vConstants = _mm_sub_ps(g_XMHalfPi,R1);
		4200	return vConstants;
		4201	#else // _XM_VMX128_INTRINSICS_
		4202	#endif // _XM_VMX128_INTRINSICS_
		4203	}
		4204
		4205	//------------------------------------------------------------------------------
		4206
		4207	XMINLINE XMVECTOR XMVectorATan
		4208	(
		4209	FXMVECTOR V
		4210	)
		4211	{
		4212	#if defined(_XM_NO_INTRINSICS_)
		4213
		4214	// Cody and Waite algorithm to compute inverse tangent.
		4215
		4216	XMVECTOR N, D;
		4217	XMVECTOR VF, G, ReciprocalF, AbsF, FA, FB;
		4218	XMVECTOR Sqrt3, Sqrt3MinusOne, TwoMinusSqrt3;
		4219	XMVECTOR HalfPi, OneThirdPi, OneSixthPi, Epsilon, MinV, MaxV;
		4220	XMVECTOR Zero;
		4221	XMVECTOR NegativeHalfPi;
		4222	XMVECTOR Angle1, Angle2;
		4223	XMVECTOR F_GT_One, F_GT_TwoMinusSqrt3, AbsF_LT_Epsilon, V_LT_Zero, V_GT_MaxV, V_LT_MinV;
		4224	XMVECTOR NegativeResult, Result;
		4225	XMVECTOR P0, P1, P2, P3, Q0, Q1, Q2, Q3;
		4226	static CONST XMVECTOR ATanConstants0 = {-1.3688768894e+1f, -2.0505855195e+1f, -8.4946240351f, -8.3758299368e-1f};
		4227	static CONST XMVECTOR ATanConstants1 = {4.1066306682e+1f, 8.6157349597e+1f, 5.9578436142e+1f, 1.5024001160e+1f};
		4228	static CONST XMVECTOR ATanConstants2 = {1.732050808f, 7.320508076e-1f, 2.679491924e-1f, 0.000244140625f}; // <sqrt(3), sqrt(3) - 1, 2 - sqrt(3), Epsilon>
		4229	static CONST XMVECTOR ATanConstants3 = {XM_PIDIV2, XM_PI / 3.0f, XM_PI / 6.0f, 8.507059173e+37f}; // <Pi / 2, Pi / 3, Pi / 6, MaxV>
		4230
		4231	Zero = XMVectorZero();
		4232
		4233	P0 = XMVectorSplatX(ATanConstants0);
		4234	P1 = XMVectorSplatY(ATanConstants0);
		4235	P2 = XMVectorSplatZ(ATanConstants0);
		4236	P3 = XMVectorSplatW(ATanConstants0);
		4237
		4238	Q0 = XMVectorSplatX(ATanConstants1);
		4239	Q1 = XMVectorSplatY(ATanConstants1);
		4240	Q2 = XMVectorSplatZ(ATanConstants1);
		4241	Q3 = XMVectorSplatW(ATanConstants1);
		4242
		4243	Sqrt3 = XMVectorSplatX(ATanConstants2);
		4244	Sqrt3MinusOne = XMVectorSplatY(ATanConstants2);
		4245	TwoMinusSqrt3 = XMVectorSplatZ(ATanConstants2);
		4246	Epsilon = XMVectorSplatW(ATanConstants2);
		4247
		4248	HalfPi = XMVectorSplatX(ATanConstants3);
		4249	OneThirdPi = XMVectorSplatY(ATanConstants3);
		4250	OneSixthPi = XMVectorSplatZ(ATanConstants3);
		4251	MaxV = XMVectorSplatW(ATanConstants3);
		4252
		4253	VF = XMVectorAbs(V);
		4254	ReciprocalF = XMVectorReciprocal(VF);
		4255
		4256	F_GT_One = XMVectorGreater(VF, g_XMOne.v);
		4257
		4258	VF = XMVectorSelect(VF, ReciprocalF, F_GT_One);
		4259	Angle1 = XMVectorSelect(Zero, HalfPi, F_GT_One);
		4260	Angle2 = XMVectorSelect(OneSixthPi, OneThirdPi, F_GT_One);
		4261
		4262	F_GT_TwoMinusSqrt3 = XMVectorGreater(VF, TwoMinusSqrt3);
		4263
		4264	FA = XMVectorMultiplyAdd(Sqrt3MinusOne, VF, VF);
		4265	FA = XMVectorAdd(FA, g_XMNegativeOne.v);
		4266	FB = XMVectorAdd(VF, Sqrt3);
		4267	FB = XMVectorReciprocal(FB);
		4268	FA = XMVectorMultiply(FA, FB);
		4269
		4270	VF = XMVectorSelect(VF, FA, F_GT_TwoMinusSqrt3);
		4271	Angle1 = XMVectorSelect(Angle1, Angle2, F_GT_TwoMinusSqrt3);
		4272
		4273	AbsF = XMVectorAbs(VF);
		4274	AbsF_LT_Epsilon = XMVectorLess(AbsF, Epsilon);
		4275
		4276	G = XMVectorMultiply(VF, VF);
		4277
		4278	D = XMVectorAdd(G, Q3);
		4279	D = XMVectorMultiplyAdd(D, G, Q2);
		4280	D = XMVectorMultiplyAdd(D, G, Q1);
		4281	D = XMVectorMultiplyAdd(D, G, Q0);
		4282	D = XMVectorReciprocal(D);
		4283
		4284	N = XMVectorMultiplyAdd(P3, G, P2);
		4285	N = XMVectorMultiplyAdd(N, G, P1);
		4286	N = XMVectorMultiplyAdd(N, G, P0);
		4287	N = XMVectorMultiply(N, G);
		4288	Result = XMVectorMultiply(N, D);
		4289
		4290	Result = XMVectorMultiplyAdd(Result, VF, VF);
		4291
		4292	Result = XMVectorSelect(Result, VF, AbsF_LT_Epsilon);
		4293
		4294	NegativeResult = XMVectorNegate(Result);
		4295	Result = XMVectorSelect(Result, NegativeResult, F_GT_One);
		4296
		4297	Result = XMVectorAdd(Result, Angle1);
		4298
		4299	V_LT_Zero = XMVectorLess(V, Zero);
		4300	NegativeResult = XMVectorNegate(Result);
		4301	Result = XMVectorSelect(Result, NegativeResult, V_LT_Zero);
		4302
		4303	MinV = XMVectorNegate(MaxV);
		4304	NegativeHalfPi = XMVectorNegate(HalfPi);
		4305	V_GT_MaxV = XMVectorGreater(V, MaxV);
		4306	V_LT_MinV = XMVectorLess(V, MinV);
		4307	Result = XMVectorSelect(Result, g_XMHalfPi.v, V_GT_MaxV);
		4308	Result = XMVectorSelect(Result, NegativeHalfPi, V_LT_MinV);
		4309
		4310	return Result;
		4311
		4312	#elif defined(_XM_SSE_INTRINSICS_)
		4313	static CONST XMVECTORF32 ATanConstants0 = {-1.3688768894e+1f, -2.0505855195e+1f, -8.4946240351f, -8.3758299368e-1f};
		4314	static CONST XMVECTORF32 ATanConstants1 = {4.1066306682e+1f, 8.6157349597e+1f, 5.9578436142e+1f, 1.5024001160e+1f};
		4315	static CONST XMVECTORF32 ATanConstants2 = {1.732050808f, 7.320508076e-1f, 2.679491924e-1f, 0.000244140625f}; // <sqrt(3), sqrt(3) - 1, 2 - sqrt(3), Epsilon>
		4316	static CONST XMVECTORF32 ATanConstants3 = {XM_PIDIV2, XM_PI / 3.0f, XM_PI / 6.0f, 8.507059173e+37f}; // <Pi / 2, Pi / 3, Pi / 6, MaxV>
		4317
		4318	XMVECTOR VF = XMVectorAbs(V);
		4319	XMVECTOR F_GT_One = _mm_cmpgt_ps(VF,g_XMOne);
		4320	XMVECTOR ReciprocalF = XMVectorReciprocal(VF);
		4321	VF = XMVectorSelect(VF, ReciprocalF, F_GT_One);
		4322	XMVECTOR Zero = XMVectorZero();
		4323	XMVECTOR HalfPi = _mm_load_ps1(&ATanConstants3.f[0]);
		4324	XMVECTOR Angle1 = XMVectorSelect(Zero, HalfPi, F_GT_One);
		4325	// Pi/3
		4326	XMVECTOR vConstants = _mm_load_ps1(&ATanConstants3.f[1]);
		4327	// Pi/6
		4328	XMVECTOR Angle2 = _mm_load_ps1(&ATanConstants3.f[2]);
		4329	Angle2 = XMVectorSelect(Angle2, vConstants, F_GT_One);
		4330
		4331	// 1-sqrt(3)
		4332	XMVECTOR FA = _mm_load_ps1(&ATanConstants2.f[1]);
		4333	FA = _mm_mul_ps(FA,VF);
		4334	FA = _mm_add_ps(FA,VF);
		4335	FA = _mm_add_ps(FA,g_XMNegativeOne);
		4336	// sqrt(3)
		4337	vConstants = _mm_load_ps1(&ATanConstants2.f[0]);
		4338	vConstants = _mm_add_ps(vConstants,VF);
		4339	FA = _mm_div_ps(FA,vConstants);
		4340
		4341	// 2-sqrt(3)
		4342	vConstants = _mm_load_ps1(&ATanConstants2.f[2]);
		4343	// >2-sqrt(3)?
		4344	vConstants = _mm_cmpgt_ps(VF,vConstants);
		4345	VF = XMVectorSelect(VF, FA, vConstants);
		4346	Angle1 = XMVectorSelect(Angle1, Angle2, vConstants);
		4347
		4348	XMVECTOR AbsF = XMVectorAbs(VF);
		4349
		4350	XMVECTOR G = _mm_mul_ps(VF,VF);
		4351	XMVECTOR D = _mm_load_ps1(&ATanConstants1.f[3]);
		4352	D = _mm_add_ps(D,G);
		4353	D = _mm_mul_ps(D,G);
		4354	vConstants = _mm_load_ps1(&ATanConstants1.f[2]);
		4355	D = _mm_add_ps(D,vConstants);
		4356	D = _mm_mul_ps(D,G);
		4357	vConstants = _mm_load_ps1(&ATanConstants1.f[1]);
		4358	D = _mm_add_ps(D,vConstants);
		4359	D = _mm_mul_ps(D,G);
		4360	vConstants = _mm_load_ps1(&ATanConstants1.f[0]);
		4361	D = _mm_add_ps(D,vConstants);
		4362
		4363	XMVECTOR N = _mm_load_ps1(&ATanConstants0.f[3]);
		4364	N = _mm_mul_ps(N,G);
		4365	vConstants = _mm_load_ps1(&ATanConstants0.f[2]);
		4366	N = _mm_add_ps(N,vConstants);
		4367	N = _mm_mul_ps(N,G);
		4368	vConstants = _mm_load_ps1(&ATanConstants0.f[1]);
		4369	N = _mm_add_ps(N,vConstants);
		4370	N = _mm_mul_ps(N,G);
		4371	vConstants = _mm_load_ps1(&ATanConstants0.f[0]);
		4372	N = _mm_add_ps(N,vConstants);
		4373	N = _mm_mul_ps(N,G);
		4374	XMVECTOR Result = _mm_div_ps(N,D);
		4375
		4376	Result = _mm_mul_ps(Result,VF);
		4377	Result = _mm_add_ps(Result,VF);
		4378	// Epsilon
		4379	vConstants = _mm_load_ps1(&ATanConstants2.f[3]);
		4380	vConstants = _mm_cmpge_ps(vConstants,AbsF);
		4381	Result = XMVectorSelect(Result,VF,vConstants);
		4382
		4383	XMVECTOR NegativeResult = _mm_mul_ps(Result,g_XMNegativeOne);
		4384	Result = XMVectorSelect(Result,NegativeResult,F_GT_One);
		4385	Result = _mm_add_ps(Result,Angle1);
		4386
		4387	Zero = _mm_cmpge_ps(Zero,V);
		4388	NegativeResult = _mm_mul_ps(Result,g_XMNegativeOne);
		4389	Result = XMVectorSelect(Result,NegativeResult,Zero);
		4390
		4391	XMVECTOR MaxV = _mm_load_ps1(&ATanConstants3.f[3]);
		4392	XMVECTOR MinV = _mm_mul_ps(MaxV,g_XMNegativeOne);
		4393	// Negate HalfPi
		4394	HalfPi = _mm_mul_ps(HalfPi,g_XMNegativeOne);
		4395	MaxV = _mm_cmple_ps(MaxV,V);
		4396	MinV = _mm_cmpge_ps(MinV,V);
		4397	Result = XMVectorSelect(Result,g_XMHalfPi,MaxV);
		4398	// HalfPi = -HalfPi
		4399	Result = XMVectorSelect(Result,HalfPi,MinV);
		4400	return Result;
		4401	#else // _XM_VMX128_INTRINSICS_
		4402	#endif // _XM_VMX128_INTRINSICS_
		4403	}
		4404
		4405	//------------------------------------------------------------------------------
		4406
		4407	XMINLINE XMVECTOR XMVectorATan2
		4408	(
		4409	FXMVECTOR Y,
		4410	FXMVECTOR X
		4411	)
		4412	{
		4413	#if defined(_XM_NO_INTRINSICS_)
		4414
		4415	// Return the inverse tangent of Y / X in the range of -Pi to Pi with the following exceptions:
		4416
		4417	// Y == 0 and X is Negative -> Pi with the sign of Y
		4418	// Y == 0 and X is Positive -> 0 with the sign of Y
		4419	// Y != 0 and X == 0 -> Pi / 2 with the sign of Y
		4420	// X == -Infinity and Finite Y > 0 -> Pi with the sign of Y
		4421	// X == +Infinity and Finite Y > 0 -> 0 with the sign of Y
		4422	// Y == Infinity and X is Finite -> Pi / 2 with the sign of Y
		4423	// Y == Infinity and X == -Infinity -> 3Pi / 4 with the sign of Y
		4424	// Y == Infinity and X == +Infinity -> Pi / 4 with the sign of Y
		4425	// TODO: Return Y / X if the result underflows
		4426
		4427	XMVECTOR Reciprocal;
		4428	XMVECTOR V;
		4429	XMVECTOR YSign;
		4430	XMVECTOR Pi, PiOverTwo, PiOverFour, ThreePiOverFour;
		4431	XMVECTOR YEqualsZero, XEqualsZero, XIsPositive, YEqualsInfinity, XEqualsInfinity, FiniteYGreaterZero;
		4432	XMVECTOR ATanResultValid;
		4433	XMVECTOR R0, R1, R2, R3, R4, R5, R6, R7;
		4434	XMVECTOR Zero;
		4435	XMVECTOR Result;
		4436	static CONST XMVECTOR ATan2Constants = {XM_PI, XM_PIDIV2, XM_PIDIV4, XM_PI * 3.0f / 4.0f};
		4437
		4438	Zero = XMVectorZero();
		4439	ATanResultValid = XMVectorTrueInt();
		4440
		4441	Pi = XMVectorSplatX(ATan2Constants);
		4442	PiOverTwo = XMVectorSplatY(ATan2Constants);
		4443	PiOverFour = XMVectorSplatZ(ATan2Constants);
		4444	ThreePiOverFour = XMVectorSplatW(ATan2Constants);
		4445
		4446	YEqualsZero = XMVectorEqual(Y, Zero);
		4447	XEqualsZero = XMVectorEqual(X, Zero);
		4448	XIsPositive = XMVectorAndInt(X, g_XMNegativeZero.v);
		4449	XIsPositive = XMVectorEqualInt(XIsPositive, Zero);
		4450	YEqualsInfinity = XMVectorIsInfinite(Y);
		4451	XEqualsInfinity = XMVectorIsInfinite(X);
		4452	FiniteYGreaterZero = XMVectorGreater(Y, Zero);
		4453	FiniteYGreaterZero = XMVectorSelect(FiniteYGreaterZero, Zero, YEqualsInfinity);
		4454
		4455	YSign = XMVectorAndInt(Y, g_XMNegativeZero.v);
		4456	Pi = XMVectorOrInt(Pi, YSign);
		4457	PiOverTwo = XMVectorOrInt(PiOverTwo, YSign);
		4458	PiOverFour = XMVectorOrInt(PiOverFour, YSign);
		4459	ThreePiOverFour = XMVectorOrInt(ThreePiOverFour, YSign);
		4460
		4461	R1 = XMVectorSelect(Pi, YSign, XIsPositive);
		4462	R2 = XMVectorSelect(ATanResultValid, PiOverTwo, XEqualsZero);
		4463	R3 = XMVectorSelect(R2, R1, YEqualsZero);
		4464	R4 = XMVectorSelect(ThreePiOverFour, PiOverFour, XIsPositive);
		4465	R5 = XMVectorSelect(PiOverTwo, R4, XEqualsInfinity);
		4466	R6 = XMVectorSelect(R3, R5, YEqualsInfinity);
		4467	R7 = XMVectorSelect(R6, R1, FiniteYGreaterZero);
		4468	Result = XMVectorSelect(R6, R7, XEqualsInfinity);
		4469	ATanResultValid = XMVectorEqualInt(Result, ATanResultValid);
		4470
		4471	Reciprocal = XMVectorReciprocal(X);
		4472	V = XMVectorMultiply(Y, Reciprocal);
		4473	R0 = XMVectorATan(V);
		4474
		4475	Result = XMVectorSelect(Result, R0, ATanResultValid);
		4476
		4477	return Result;
		4478
		4479	#elif defined(_XM_SSE_INTRINSICS_)
		4480	static CONST XMVECTORF32 ATan2Constants = {XM_PI, XM_PIDIV2, XM_PIDIV4, XM_PI * 3.0f / 4.0f};
		4481	// Mask if Y>0 && Y!=INF
		4482	XMVECTOR FiniteYGreaterZero = _mm_cmpgt_ps(Y,g_XMZero);
		4483	XMVECTOR YEqualsInfinity = XMVectorIsInfinite(Y);
		4484	FiniteYGreaterZero = _mm_andnot_ps(YEqualsInfinity,FiniteYGreaterZero);
		4485	// Get the sign of (Y&0x80000000)
		4486	XMVECTOR YSign = _mm_and_ps(Y, g_XMNegativeZero);
		4487	// Get the sign bits of X
		4488	XMVECTOR XIsPositive = _mm_and_ps(X,g_XMNegativeZero);
		4489	// Change them to masks
		4490	XIsPositive = XMVectorEqualInt(XIsPositive,g_XMZero);
		4491	// Get Pi
		4492	XMVECTOR R1 = _mm_load_ps1(&ATan2Constants.f[0]);
		4493	// Copy the sign of Y
		4494	R1 = _mm_or_ps(R1,YSign);
		4495	R1 = XMVectorSelect(R1,YSign,XIsPositive);
		4496	// Mask for X==0
		4497	XMVECTOR vConstants = _mm_cmpeq_ps(X,g_XMZero);
		4498	// Get Pi/2 with with sign of Y
		4499	XMVECTOR PiOverTwo = _mm_load_ps1(&ATan2Constants.f[1]);
		4500	PiOverTwo = _mm_or_ps(PiOverTwo,YSign);
		4501	XMVECTOR R2 = XMVectorSelect(g_XMNegOneMask,PiOverTwo,vConstants);
		4502	// Mask for Y==0
		4503	vConstants = _mm_cmpeq_ps(Y,g_XMZero);
		4504	R2 = XMVectorSelect(R2,R1,vConstants);
		4505	// Get Pi/4 with sign of Y
		4506	XMVECTOR PiOverFour = _mm_load_ps1(&ATan2Constants.f[2]);
		4507	PiOverFour = _mm_or_ps(PiOverFour,YSign);
		4508	// Get (Pi*3)/4 with sign of Y
		4509	XMVECTOR ThreePiOverFour = _mm_load_ps1(&ATan2Constants.f[3]);
		4510	ThreePiOverFour = _mm_or_ps(ThreePiOverFour,YSign);
		4511	vConstants = XMVectorSelect(ThreePiOverFour, PiOverFour, XIsPositive);
		4512	XMVECTOR XEqualsInfinity = XMVectorIsInfinite(X);
		4513	vConstants = XMVectorSelect(PiOverTwo,vConstants,XEqualsInfinity);
		4514
		4515	XMVECTOR vResult = XMVectorSelect(R2,vConstants,YEqualsInfinity);
		4516	vConstants = XMVectorSelect(vResult,R1,FiniteYGreaterZero);
		4517	// At this point, any entry that's zero will get the result
		4518	// from XMVectorATan(), otherwise, return the failsafe value
		4519	vResult = XMVectorSelect(vResult,vConstants,XEqualsInfinity);
		4520	// Any entries not 0xFFFFFFFF, are considered precalculated
		4521	XMVECTOR ATanResultValid = XMVectorEqualInt(vResult,g_XMNegOneMask);
		4522	// Let's do the ATan2 function
		4523	vConstants = _mm_div_ps(Y,X);
		4524	vConstants = XMVectorATan(vConstants);
		4525	// Discard entries that have been declared void
		4526	vResult = XMVectorSelect(vResult,vConstants,ATanResultValid);
		4527	return vResult;
		4528	#else // _XM_VMX128_INTRINSICS_
		4529	#endif // _XM_VMX128_INTRINSICS_
		4530	}
		4531
		4532	//------------------------------------------------------------------------------
		4533
		4534	XMFINLINE XMVECTOR XMVectorSinEst
		4535	(
		4536	FXMVECTOR V
		4537	)
		4538	{
		4539	#if defined(_XM_NO_INTRINSICS_)
		4540
		4541	XMVECTOR V2, V3, V5, V7;
		4542	XMVECTOR S1, S2, S3;
		4543	XMVECTOR Result;
		4544
		4545	// sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI)
		4546	V2 = XMVectorMultiply(V, V);
		4547	V3 = XMVectorMultiply(V2, V);
		4548	V5 = XMVectorMultiply(V3, V2);
		4549	V7 = XMVectorMultiply(V5, V2);
		4550
		4551	S1 = XMVectorSplatY(g_XMSinEstCoefficients.v);
		4552	S2 = XMVectorSplatZ(g_XMSinEstCoefficients.v);
		4553	S3 = XMVectorSplatW(g_XMSinEstCoefficients.v);
		4554
		4555	Result = XMVectorMultiplyAdd(S1, V3, V);
		4556	Result = XMVectorMultiplyAdd(S2, V5, Result);
		4557	Result = XMVectorMultiplyAdd(S3, V7, Result);
		4558
		4559	return Result;
		4560
		4561	#elif defined(_XM_SSE_INTRINSICS_)
		4562	// sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI)
		4563	XMVECTOR V2 = _mm_mul_ps(V,V);
		4564	XMVECTOR V3 = _mm_mul_ps(V2,V);
		4565	XMVECTOR vResult = _mm_load_ps1(&g_XMSinEstCoefficients.f[1]);
		4566	vResult = _mm_mul_ps(vResult,V3);
		4567	vResult = _mm_add_ps(vResult,V);
		4568	XMVECTOR vConstants = _mm_load_ps1(&g_XMSinEstCoefficients.f[2]);
		4569	// V^5
		4570	V3 = _mm_mul_ps(V3,V2);
		4571	vConstants = _mm_mul_ps(vConstants,V3);
		4572	vResult = _mm_add_ps(vResult,vConstants);
		4573	vConstants = _mm_load_ps1(&g_XMSinEstCoefficients.f[3]);
		4574	// V^7
		4575	V3 = _mm_mul_ps(V3,V2);
		4576	vConstants = _mm_mul_ps(vConstants,V3);
		4577	vResult = _mm_add_ps(vResult,vConstants);
		4578	return vResult;
		4579	#else // _XM_VMX128_INTRINSICS_
		4580	#endif // _XM_VMX128_INTRINSICS_
		4581	}
		4582
		4583	//------------------------------------------------------------------------------
		4584
		4585	XMFINLINE XMVECTOR XMVectorCosEst
		4586	(
		4587	FXMVECTOR V
		4588	)
		4589	{
		4590	#if defined(_XM_NO_INTRINSICS_)
		4591
		4592	XMVECTOR V2, V4, V6;
		4593	XMVECTOR C0, C1, C2, C3;
		4594	XMVECTOR Result;
		4595
		4596	V2 = XMVectorMultiply(V, V);
		4597	V4 = XMVectorMultiply(V2, V2);
		4598	V6 = XMVectorMultiply(V4, V2);
		4599
		4600	C0 = XMVectorSplatX(g_XMCosEstCoefficients.v);
		4601	C1 = XMVectorSplatY(g_XMCosEstCoefficients.v);
		4602	C2 = XMVectorSplatZ(g_XMCosEstCoefficients.v);
		4603	C3 = XMVectorSplatW(g_XMCosEstCoefficients.v);
		4604
		4605	Result = XMVectorMultiplyAdd(C1, V2, C0);
		4606	Result = XMVectorMultiplyAdd(C2, V4, Result);
		4607	Result = XMVectorMultiplyAdd(C3, V6, Result);
		4608
		4609	return Result;
		4610
		4611	#elif defined(_XM_SSE_INTRINSICS_)
		4612	// Get V^2
		4613	XMVECTOR V2 = _mm_mul_ps(V,V);
		4614	XMVECTOR vResult = _mm_load_ps1(&g_XMCosEstCoefficients.f[1]);
		4615	vResult = _mm_mul_ps(vResult,V2);
		4616	XMVECTOR vConstants = _mm_load_ps1(&g_XMCosEstCoefficients.f[0]);
		4617	vResult = _mm_add_ps(vResult,vConstants);
		4618	vConstants = _mm_load_ps1(&g_XMCosEstCoefficients.f[2]);
		4619	// Get V^4
		4620	XMVECTOR V4 = _mm_mul_ps(V2, V2);
		4621	vConstants = _mm_mul_ps(vConstants,V4);
		4622	vResult = _mm_add_ps(vResult,vConstants);
		4623	vConstants = _mm_load_ps1(&g_XMCosEstCoefficients.f[3]);
		4624	// It's really V^6
		4625	V4 = _mm_mul_ps(V4,V2);
		4626	vConstants = _mm_mul_ps(vConstants,V4);
		4627	vResult = _mm_add_ps(vResult,vConstants);
		4628	return vResult;
		4629	#else // _XM_VMX128_INTRINSICS_
		4630	#endif // _XM_VMX128_INTRINSICS_
		4631	}
		4632
		4633	//------------------------------------------------------------------------------
		4634
		4635	XMFINLINE VOID XMVectorSinCosEst
		4636	(
		4637	XMVECTOR* pSin,
		4638	XMVECTOR* pCos,
		4639	FXMVECTOR V
		4640	)
		4641	{
		4642	#if defined(_XM_NO_INTRINSICS_)
		4643
		4644	XMVECTOR V2, V3, V4, V5, V6, V7;
		4645	XMVECTOR S1, S2, S3;
		4646	XMVECTOR C0, C1, C2, C3;
		4647	XMVECTOR Sin, Cos;
		4648
		4649	XMASSERT(pSin);
		4650	XMASSERT(pCos);
		4651
		4652	// sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI)
		4653	// cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! (for -PI <= V < PI)
		4654	V2 = XMVectorMultiply(V, V);
		4655	V3 = XMVectorMultiply(V2, V);
		4656	V4 = XMVectorMultiply(V2, V2);
		4657	V5 = XMVectorMultiply(V3, V2);
		4658	V6 = XMVectorMultiply(V3, V3);
		4659	V7 = XMVectorMultiply(V4, V3);
		4660
		4661	S1 = XMVectorSplatY(g_XMSinEstCoefficients.v);
		4662	S2 = XMVectorSplatZ(g_XMSinEstCoefficients.v);
		4663	S3 = XMVectorSplatW(g_XMSinEstCoefficients.v);
		4664
		4665	C0 = XMVectorSplatX(g_XMCosEstCoefficients.v);
		4666	C1 = XMVectorSplatY(g_XMCosEstCoefficients.v);
		4667	C2 = XMVectorSplatZ(g_XMCosEstCoefficients.v);
		4668	C3 = XMVectorSplatW(g_XMCosEstCoefficients.v);
		4669
		4670	Sin = XMVectorMultiplyAdd(S1, V3, V);
		4671	Sin = XMVectorMultiplyAdd(S2, V5, Sin);
		4672	Sin = XMVectorMultiplyAdd(S3, V7, Sin);
		4673
		4674	Cos = XMVectorMultiplyAdd(C1, V2, C0);
		4675	Cos = XMVectorMultiplyAdd(C2, V4, Cos);
		4676	Cos = XMVectorMultiplyAdd(C3, V6, Cos);
		4677
		4678	*pSin = Sin;
		4679	*pCos = Cos;
		4680
		4681	#elif defined(_XM_SSE_INTRINSICS_)
		4682	XMASSERT(pSin);
		4683	XMASSERT(pCos);
		4684	XMVECTOR V2, V3, V4, V5, V6, V7;
		4685	XMVECTOR S1, S2, S3;
		4686	XMVECTOR C0, C1, C2, C3;
		4687	XMVECTOR Sin, Cos;
		4688
		4689	// sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI)
		4690	// cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! (for -PI <= V < PI)
		4691	V2 = XMVectorMultiply(V, V);
		4692	V3 = XMVectorMultiply(V2, V);
		4693	V4 = XMVectorMultiply(V2, V2);
		4694	V5 = XMVectorMultiply(V3, V2);
		4695	V6 = XMVectorMultiply(V3, V3);
		4696	V7 = XMVectorMultiply(V4, V3);
		4697
		4698	S1 = _mm_load_ps1(&g_XMSinEstCoefficients.f[1]);
		4699	S2 = _mm_load_ps1(&g_XMSinEstCoefficients.f[2]);
		4700	S3 = _mm_load_ps1(&g_XMSinEstCoefficients.f[3]);
		4701
		4702	C0 = _mm_load_ps1(&g_XMCosEstCoefficients.f[0]);
		4703	C1 = _mm_load_ps1(&g_XMCosEstCoefficients.f[1]);
		4704	C2 = _mm_load_ps1(&g_XMCosEstCoefficients.f[2]);
		4705	C3 = _mm_load_ps1(&g_XMCosEstCoefficients.f[3]);
		4706
		4707	Sin = XMVectorMultiplyAdd(S1, V3, V);
		4708	Sin = XMVectorMultiplyAdd(S2, V5, Sin);
		4709	Sin = XMVectorMultiplyAdd(S3, V7, Sin);
		4710
		4711	Cos = XMVectorMultiplyAdd(C1, V2, C0);
		4712	Cos = XMVectorMultiplyAdd(C2, V4, Cos);
		4713	Cos = XMVectorMultiplyAdd(C3, V6, Cos);
		4714
		4715	*pSin = Sin;
		4716	*pCos = Cos;
		4717	#else // _XM_VMX128_INTRINSICS_
		4718	#endif // _XM_VMX128_INTRINSICS_
		4719	}
		4720
		4721	//------------------------------------------------------------------------------
		4722
		4723	XMFINLINE XMVECTOR XMVectorTanEst
		4724	(
		4725	FXMVECTOR V
		4726	)
		4727	{
		4728	#if defined(_XM_NO_INTRINSICS_)
		4729
		4730	XMVECTOR V1, V2, V1T0, V1T1, V2T2;
		4731	XMVECTOR T0, T1, T2;
		4732	XMVECTOR N, D;
		4733	XMVECTOR OneOverPi;
		4734	XMVECTOR Result;
		4735
		4736	OneOverPi = XMVectorSplatW(g_XMTanEstCoefficients.v);
		4737
		4738	V1 = XMVectorMultiply(V, OneOverPi);
		4739	V1 = XMVectorRound(V1);
		4740
		4741	V1 = XMVectorNegativeMultiplySubtract(g_XMPi.v, V1, V);
		4742
		4743	T0 = XMVectorSplatX(g_XMTanEstCoefficients.v);
		4744	T1 = XMVectorSplatY(g_XMTanEstCoefficients.v);
		4745	T2 = XMVectorSplatZ(g_XMTanEstCoefficients.v);
		4746
		4747	V2T2 = XMVectorNegativeMultiplySubtract(V1, V1, T2);
		4748	V2 = XMVectorMultiply(V1, V1);
		4749	V1T0 = XMVectorMultiply(V1, T0);
		4750	V1T1 = XMVectorMultiply(V1, T1);
		4751
		4752	D = XMVectorReciprocalEst(V2T2);
		4753	N = XMVectorMultiplyAdd(V2, V1T1, V1T0);
		4754
		4755	Result = XMVectorMultiply(N, D);
		4756
		4757	return Result;
		4758
		4759	#elif defined(_XM_SSE_INTRINSICS_)
		4760	XMVECTOR V1, V2, V1T0, V1T1, V2T2;
		4761	XMVECTOR T0, T1, T2;
		4762	XMVECTOR N, D;
		4763	XMVECTOR OneOverPi;
		4764	XMVECTOR Result;
		4765
		4766	OneOverPi = XMVectorSplatW(g_XMTanEstCoefficients);
		4767
		4768	V1 = XMVectorMultiply(V, OneOverPi);
		4769	V1 = XMVectorRound(V1);
		4770
		4771	V1 = XMVectorNegativeMultiplySubtract(g_XMPi, V1, V);
		4772
		4773	T0 = XMVectorSplatX(g_XMTanEstCoefficients);
		4774	T1 = XMVectorSplatY(g_XMTanEstCoefficients);
		4775	T2 = XMVectorSplatZ(g_XMTanEstCoefficients);
		4776
		4777	V2T2 = XMVectorNegativeMultiplySubtract(V1, V1, T2);
		4778	V2 = XMVectorMultiply(V1, V1);
		4779	V1T0 = XMVectorMultiply(V1, T0);
		4780	V1T1 = XMVectorMultiply(V1, T1);
		4781
		4782	D = XMVectorReciprocalEst(V2T2);
		4783	N = XMVectorMultiplyAdd(V2, V1T1, V1T0);
		4784
		4785	Result = XMVectorMultiply(N, D);
		4786
		4787	return Result;
		4788	#else // _XM_VMX128_INTRINSICS_
		4789	#endif // _XM_VMX128_INTRINSICS_
		4790	}
		4791
		4792	//------------------------------------------------------------------------------
		4793
		4794	XMFINLINE XMVECTOR XMVectorSinHEst
		4795	(
		4796	FXMVECTOR V
		4797	)
		4798	{
		4799	#if defined(_XM_NO_INTRINSICS_)
		4800
		4801	XMVECTOR V1, V2;
		4802	XMVECTOR E1, E2;
		4803	XMVECTOR Result;
		4804	static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
		4805
		4806	V1 = XMVectorMultiplyAdd(V, Scale.v, g_XMNegativeOne.v);
		4807	V2 = XMVectorNegativeMultiplySubtract(V, Scale.v, g_XMNegativeOne.v);
		4808
		4809	E1 = XMVectorExpEst(V1);
		4810	E2 = XMVectorExpEst(V2);
		4811
		4812	Result = XMVectorSubtract(E1, E2);
		4813
		4814	return Result;
		4815
		4816	#elif defined(_XM_SSE_INTRINSICS_)
		4817	XMVECTOR V1, V2;
		4818	XMVECTOR E1, E2;
		4819	XMVECTOR Result;
		4820	static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
		4821
		4822	V1 = _mm_mul_ps(V,Scale);
		4823	V1 = _mm_add_ps(V1,g_XMNegativeOne);
		4824	V2 = _mm_mul_ps(V,Scale);
		4825	V2 = _mm_sub_ps(g_XMNegativeOne,V2);
		4826	E1 = XMVectorExpEst(V1);
		4827	E2 = XMVectorExpEst(V2);
		4828	Result = _mm_sub_ps(E1, E2);
		4829	return Result;
		4830	#else // _XM_VMX128_INTRINSICS_
		4831	#endif // _XM_VMX128_INTRINSICS_
		4832	}
		4833
		4834	//------------------------------------------------------------------------------
		4835
		4836	XMFINLINE XMVECTOR XMVectorCosHEst
		4837	(
		4838	FXMVECTOR V
		4839	)
		4840	{
		4841	#if defined(_XM_NO_INTRINSICS_)
		4842
		4843	XMVECTOR V1, V2;
		4844	XMVECTOR E1, E2;
		4845	XMVECTOR Result;
		4846	static CONST XMVECTOR Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
		4847
		4848	V1 = XMVectorMultiplyAdd(V, Scale, g_XMNegativeOne.v);
		4849	V2 = XMVectorNegativeMultiplySubtract(V, Scale, g_XMNegativeOne.v);
		4850
		4851	E1 = XMVectorExpEst(V1);
		4852	E2 = XMVectorExpEst(V2);
		4853
		4854	Result = XMVectorAdd(E1, E2);
		4855
		4856	return Result;
		4857
		4858	#elif defined(_XM_SSE_INTRINSICS_)
		4859	XMVECTOR V1, V2;
		4860	XMVECTOR E1, E2;
		4861	XMVECTOR Result;
		4862	static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
		4863
		4864	V1 = _mm_mul_ps(V,Scale);
		4865	V1 = _mm_add_ps(V1,g_XMNegativeOne);
		4866	V2 = _mm_mul_ps(V, Scale);
		4867	V2 = _mm_sub_ps(g_XMNegativeOne,V2);
		4868	E1 = XMVectorExpEst(V1);
		4869	E2 = XMVectorExpEst(V2);
		4870	Result = _mm_add_ps(E1, E2);
		4871	return Result;
		4872	#else // _XM_VMX128_INTRINSICS_
		4873	#endif // _XM_VMX128_INTRINSICS_
		4874	}
		4875
		4876	//------------------------------------------------------------------------------
		4877
		4878	XMFINLINE XMVECTOR XMVectorTanHEst
		4879	(
		4880	FXMVECTOR V
		4881	)
		4882	{
		4883	#if defined(_XM_NO_INTRINSICS_)
		4884
		4885	XMVECTOR E;
		4886	XMVECTOR Result;
		4887	static CONST XMVECTOR Scale = {2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f}; // 2.0f / ln(2.0f)
		4888
		4889	E = XMVectorMultiply(V, Scale);
		4890	E = XMVectorExpEst(E);
		4891	E = XMVectorMultiplyAdd(E, g_XMOneHalf.v, g_XMOneHalf.v);
		4892	E = XMVectorReciprocalEst(E);
		4893
		4894	Result = XMVectorSubtract(g_XMOne.v, E);
		4895
		4896	return Result;
		4897
		4898	#elif defined(_XM_SSE_INTRINSICS_)
		4899	static CONST XMVECTORF32 Scale = {2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f}; // 2.0f / ln(2.0f)
		4900
		4901	XMVECTOR E = _mm_mul_ps(V, Scale);
		4902	E = XMVectorExpEst(E);
		4903	E = _mm_mul_ps(E,g_XMOneHalf);
		4904	E = _mm_add_ps(E,g_XMOneHalf);
		4905	E = XMVectorReciprocalEst(E);
		4906	E = _mm_sub_ps(g_XMOne, E);
		4907	return E;
		4908	#else // _XM_VMX128_INTRINSICS_
		4909	#endif // _XM_VMX128_INTRINSICS_
		4910	}
		4911
		4912	//------------------------------------------------------------------------------
		4913
		4914	XMFINLINE XMVECTOR XMVectorASinEst
		4915	(
		4916	FXMVECTOR V
		4917	)
		4918	{
		4919	#if defined(_XM_NO_INTRINSICS_)
		4920
		4921	XMVECTOR AbsV, V2, VD, VC0, V2C3;
		4922	XMVECTOR C0, C1, C2, C3;
		4923	XMVECTOR D, Rsq, SqrtD;
		4924	XMVECTOR OnePlusEps;
		4925	XMVECTOR Result;
		4926
		4927	AbsV = XMVectorAbs(V);
		4928
		4929	OnePlusEps = XMVectorSplatX(g_XMASinEstConstants.v);
		4930
		4931	C0 = XMVectorSplatX(g_XMASinEstCoefficients.v);
		4932	C1 = XMVectorSplatY(g_XMASinEstCoefficients.v);
		4933	C2 = XMVectorSplatZ(g_XMASinEstCoefficients.v);
		4934	C3 = XMVectorSplatW(g_XMASinEstCoefficients.v);
		4935
		4936	D = XMVectorSubtract(OnePlusEps, AbsV);
		4937
		4938	Rsq = XMVectorReciprocalSqrtEst(D);
		4939	SqrtD = XMVectorMultiply(D, Rsq);
		4940
		4941	V2 = XMVectorMultiply(V, AbsV);
		4942	V2C3 = XMVectorMultiply(V2, C3);
		4943	VD = XMVectorMultiply(D, AbsV);
		4944	VC0 = XMVectorMultiply(V, C0);
		4945
		4946	Result = XMVectorMultiply(V, C1);
		4947	Result = XMVectorMultiplyAdd(V2, C2, Result);
		4948	Result = XMVectorMultiplyAdd(V2C3, VD, Result);
		4949	Result = XMVectorMultiplyAdd(VC0, SqrtD, Result);
		4950
		4951	return Result;
		4952
		4953	#elif defined(_XM_SSE_INTRINSICS_)
		4954	// Get abs(V)
		4955	XMVECTOR vAbsV = _mm_setzero_ps();
		4956	vAbsV = _mm_sub_ps(vAbsV,V);
		4957	vAbsV = _mm_max_ps(vAbsV,V);
		4958
		4959	XMVECTOR D = _mm_load_ps1(&g_XMASinEstConstants.f[0]);
		4960	D = _mm_sub_ps(D,vAbsV);
		4961	// Since this is an estimate, rqsrt is okay
		4962	XMVECTOR vConstants = _mm_rsqrt_ps(D);
		4963	XMVECTOR SqrtD = _mm_mul_ps(D,vConstants);
		4964	// V2 = V^2 retaining sign
		4965	XMVECTOR V2 = _mm_mul_ps(V,vAbsV);
		4966	D = _mm_mul_ps(D,vAbsV);
		4967
		4968	XMVECTOR vResult = _mm_load_ps1(&g_XMASinEstCoefficients.f[1]);
		4969	vResult = _mm_mul_ps(vResult,V);
		4970	vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[2]);
		4971	vConstants = _mm_mul_ps(vConstants,V2);
		4972	vResult = _mm_add_ps(vResult,vConstants);
		4973
		4974	vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[3]);
		4975	vConstants = _mm_mul_ps(vConstants,V2);
		4976	vConstants = _mm_mul_ps(vConstants,D);
		4977	vResult = _mm_add_ps(vResult,vConstants);
		4978
		4979	vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[0]);
		4980	vConstants = _mm_mul_ps(vConstants,V);
		4981	vConstants = _mm_mul_ps(vConstants,SqrtD);
		4982	vResult = _mm_add_ps(vResult,vConstants);
		4983	return vResult;
		4984	#else // _XM_VMX128_INTRINSICS_
		4985	#endif // _XM_VMX128_INTRINSICS_
		4986	}
		4987
		4988	//------------------------------------------------------------------------------
		4989
		4990	XMFINLINE XMVECTOR XMVectorACosEst
		4991	(
		4992	FXMVECTOR V
		4993	)
		4994	{
		4995	#if defined(_XM_NO_INTRINSICS_)
		4996
		4997	XMVECTOR AbsV, V2, VD, VC0, V2C3;
		4998	XMVECTOR C0, C1, C2, C3;
		4999	XMVECTOR D, Rsq, SqrtD;
		5000	XMVECTOR OnePlusEps, HalfPi;
		5001	XMVECTOR Result;
		5002
		5003	// acos(V) = PI / 2 - asin(V)
		5004
		5005	AbsV = XMVectorAbs(V);
		5006
		5007	OnePlusEps = XMVectorSplatX(g_XMASinEstConstants.v);
		5008	HalfPi = XMVectorSplatY(g_XMASinEstConstants.v);
		5009
		5010	C0 = XMVectorSplatX(g_XMASinEstCoefficients.v);
		5011	C1 = XMVectorSplatY(g_XMASinEstCoefficients.v);
		5012	C2 = XMVectorSplatZ(g_XMASinEstCoefficients.v);
		5013	C3 = XMVectorSplatW(g_XMASinEstCoefficients.v);
		5014
		5015	D = XMVectorSubtract(OnePlusEps, AbsV);
		5016
		5017	Rsq = XMVectorReciprocalSqrtEst(D);
		5018	SqrtD = XMVectorMultiply(D, Rsq);
		5019
		5020	V2 = XMVectorMultiply(V, AbsV);
		5021	V2C3 = XMVectorMultiply(V2, C3);
		5022	VD = XMVectorMultiply(D, AbsV);
		5023	VC0 = XMVectorMultiply(V, C0);
		5024
		5025	Result = XMVectorMultiply(V, C1);
		5026	Result = XMVectorMultiplyAdd(V2, C2, Result);
		5027	Result = XMVectorMultiplyAdd(V2C3, VD, Result);
		5028	Result = XMVectorMultiplyAdd(VC0, SqrtD, Result);
		5029	Result = XMVectorSubtract(HalfPi, Result);
		5030
		5031	return Result;
		5032
		5033	#elif defined(_XM_SSE_INTRINSICS_)
		5034	// acos(V) = PI / 2 - asin(V)
		5035	// Get abs(V)
		5036	XMVECTOR vAbsV = _mm_setzero_ps();
		5037	vAbsV = _mm_sub_ps(vAbsV,V);
		5038	vAbsV = _mm_max_ps(vAbsV,V);
		5039	// Calc D
		5040	XMVECTOR D = _mm_load_ps1(&g_XMASinEstConstants.f[0]);
		5041	D = _mm_sub_ps(D,vAbsV);
		5042	// SqrtD = sqrt(D-abs(V)) estimated
		5043	XMVECTOR vConstants = _mm_rsqrt_ps(D);
		5044	XMVECTOR SqrtD = _mm_mul_ps(D,vConstants);
		5045	// V2 = V^2 while retaining sign
		5046	XMVECTOR V2 = _mm_mul_ps(V, vAbsV);
		5047	// Drop vAbsV here. D = (Const-abs(V))*abs(V)
		5048	D = _mm_mul_ps(D, vAbsV);
		5049
		5050	XMVECTOR vResult = _mm_load_ps1(&g_XMASinEstCoefficients.f[1]);
		5051	vResult = _mm_mul_ps(vResult,V);
		5052	vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[2]);
		5053	vConstants = _mm_mul_ps(vConstants,V2);
		5054	vResult = _mm_add_ps(vResult,vConstants);
		5055
		5056	vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[3]);
		5057	vConstants = _mm_mul_ps(vConstants,V2);
		5058	vConstants = _mm_mul_ps(vConstants,D);
		5059	vResult = _mm_add_ps(vResult,vConstants);
		5060
		5061	vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[0]);
		5062	vConstants = _mm_mul_ps(vConstants,V);
		5063	vConstants = _mm_mul_ps(vConstants,SqrtD);
		5064	vResult = _mm_add_ps(vResult,vConstants);
		5065
		5066	vConstants = _mm_load_ps1(&g_XMASinEstConstants.f[1]);
		5067	vResult = _mm_sub_ps(vConstants,vResult);
		5068	return vResult;
		5069	#else // _XM_VMX128_INTRINSICS_
		5070	#endif // _XM_VMX128_INTRINSICS_
		5071	}
		5072
		5073	//------------------------------------------------------------------------------
		5074
		5075	XMFINLINE XMVECTOR XMVectorATanEst
		5076	(
		5077	FXMVECTOR V
		5078	)
		5079	{
		5080	#if defined(_XM_NO_INTRINSICS_)
		5081
		5082	XMVECTOR AbsV, V2S2, N, D;
		5083	XMVECTOR S0, S1, S2;
		5084	XMVECTOR HalfPi;
		5085	XMVECTOR Result;
		5086
		5087	S0 = XMVectorSplatX(g_XMATanEstCoefficients.v);
		5088	S1 = XMVectorSplatY(g_XMATanEstCoefficients.v);
		5089	S2 = XMVectorSplatZ(g_XMATanEstCoefficients.v);
		5090	HalfPi = XMVectorSplatW(g_XMATanEstCoefficients.v);
		5091
		5092	AbsV = XMVectorAbs(V);
		5093
		5094	V2S2 = XMVectorMultiplyAdd(V, V, S2);
		5095	N = XMVectorMultiplyAdd(AbsV, HalfPi, S0);
		5096	D = XMVectorMultiplyAdd(AbsV, S1, V2S2);
		5097	N = XMVectorMultiply(N, V);
		5098	D = XMVectorReciprocalEst(D);
		5099
		5100	Result = XMVectorMultiply(N, D);
		5101
		5102	return Result;
		5103
		5104	#elif defined(_XM_SSE_INTRINSICS_)
		5105	// Get abs(V)
		5106	XMVECTOR vAbsV = _mm_setzero_ps();
		5107	vAbsV = _mm_sub_ps(vAbsV,V);
		5108	vAbsV = _mm_max_ps(vAbsV,V);
		5109
		5110	XMVECTOR vResult = _mm_load_ps1(&g_XMATanEstCoefficients.f[3]);
		5111	vResult = _mm_mul_ps(vResult,vAbsV);
		5112	XMVECTOR vConstants = _mm_load_ps1(&g_XMATanEstCoefficients.f[0]);
		5113	vResult = _mm_add_ps(vResult,vConstants);
		5114	vResult = _mm_mul_ps(vResult,V);
		5115
		5116	XMVECTOR D = _mm_mul_ps(V,V);
		5117	vConstants = _mm_load_ps1(&g_XMATanEstCoefficients.f[2]);
		5118	D = _mm_add_ps(D,vConstants);
		5119	vConstants = _mm_load_ps1(&g_XMATanEstCoefficients.f[1]);
		5120	vConstants = _mm_mul_ps(vConstants,vAbsV);
		5121	D = _mm_add_ps(D,vConstants);
		5122	vResult = _mm_div_ps(vResult,D);
		5123	return vResult;
		5124	#else // _XM_VMX128_INTRINSICS_
		5125	#endif // _XM_VMX128_INTRINSICS_
		5126	}
		5127
		5128	//------------------------------------------------------------------------------
		5129
		5130	XMFINLINE XMVECTOR XMVectorATan2Est
		5131	(
		5132	FXMVECTOR Y,
		5133	FXMVECTOR X
		5134	)
		5135	{
		5136	#if defined(_XM_NO_INTRINSICS_)
		5137
		5138	XMVECTOR Reciprocal;
		5139	XMVECTOR V;
		5140	XMVECTOR YSign;
		5141	XMVECTOR Pi, PiOverTwo, PiOverFour, ThreePiOverFour;
		5142	XMVECTOR YEqualsZero, XEqualsZero, XIsPositive, YEqualsInfinity, XEqualsInfinity, FiniteYGreaterZero;
		5143	XMVECTOR ATanResultValid;
		5144	XMVECTOR R0, R1, R2, R3, R4, R5, R6, R7;
		5145	XMVECTOR Zero;
		5146	XMVECTOR Result;
		5147	static CONST XMVECTOR ATan2Constants = {XM_PI, XM_PIDIV2, XM_PIDIV4, XM_PI * 3.0f / 4.0f};
		5148
		5149	Zero = XMVectorZero();
		5150	ATanResultValid = XMVectorTrueInt();
		5151
		5152	Pi = XMVectorSplatX(ATan2Constants);
		5153	PiOverTwo = XMVectorSplatY(ATan2Constants);
		5154	PiOverFour = XMVectorSplatZ(ATan2Constants);
		5155	ThreePiOverFour = XMVectorSplatW(ATan2Constants);
		5156
		5157	YEqualsZero = XMVectorEqual(Y, Zero);
		5158	XEqualsZero = XMVectorEqual(X, Zero);
		5159	XIsPositive = XMVectorAndInt(X, g_XMNegativeZero.v);
		5160	XIsPositive = XMVectorEqualInt(XIsPositive, Zero);
		5161	YEqualsInfinity = XMVectorIsInfinite(Y);
		5162	XEqualsInfinity = XMVectorIsInfinite(X);
		5163	FiniteYGreaterZero = XMVectorGreater(Y, Zero);
		5164	FiniteYGreaterZero = XMVectorSelect(FiniteYGreaterZero, Zero, YEqualsInfinity);
		5165
		5166	YSign = XMVectorAndInt(Y, g_XMNegativeZero.v);
		5167	Pi = XMVectorOrInt(Pi, YSign);
		5168	PiOverTwo = XMVectorOrInt(PiOverTwo, YSign);
		5169	PiOverFour = XMVectorOrInt(PiOverFour, YSign);
		5170	ThreePiOverFour = XMVectorOrInt(ThreePiOverFour, YSign);
		5171
		5172	R1 = XMVectorSelect(Pi, YSign, XIsPositive);
		5173	R2 = XMVectorSelect(ATanResultValid, PiOverTwo, XEqualsZero);
		5174	R3 = XMVectorSelect(R2, R1, YEqualsZero);
		5175	R4 = XMVectorSelect(ThreePiOverFour, PiOverFour, XIsPositive);
		5176	R5 = XMVectorSelect(PiOverTwo, R4, XEqualsInfinity);
		5177	R6 = XMVectorSelect(R3, R5, YEqualsInfinity);
		5178	R7 = XMVectorSelect(R6, R1, FiniteYGreaterZero);
		5179	Result = XMVectorSelect(R6, R7, XEqualsInfinity);
		5180	ATanResultValid = XMVectorEqualInt(Result, ATanResultValid);
		5181
		5182	Reciprocal = XMVectorReciprocalEst(X);
		5183	V = XMVectorMultiply(Y, Reciprocal);
		5184	R0 = XMVectorATanEst(V);
		5185
		5186	Result = XMVectorSelect(Result, R0, ATanResultValid);
		5187
		5188	return Result;
		5189
		5190	#elif defined(_XM_SSE_INTRINSICS_)
		5191	static CONST XMVECTORF32 ATan2Constants = {XM_PI, XM_PIDIV2, XM_PIDIV4, XM_PI * 3.0f / 4.0f};
		5192	// Mask if Y>0 && Y!=INF
		5193	XMVECTOR FiniteYGreaterZero = _mm_cmpgt_ps(Y,g_XMZero);
		5194	XMVECTOR YEqualsInfinity = XMVectorIsInfinite(Y);
		5195	FiniteYGreaterZero = _mm_andnot_ps(YEqualsInfinity,FiniteYGreaterZero);
		5196	// Get the sign of (Y&0x80000000)
		5197	XMVECTOR YSign = _mm_and_ps(Y, g_XMNegativeZero);
		5198	// Get the sign bits of X
		5199	XMVECTOR XIsPositive = _mm_and_ps(X,g_XMNegativeZero);
		5200	// Change them to masks
		5201	XIsPositive = XMVectorEqualInt(XIsPositive,g_XMZero);
		5202	// Get Pi
		5203	XMVECTOR R1 = _mm_load_ps1(&ATan2Constants.f[0]);
		5204	// Copy the sign of Y
		5205	R1 = _mm_or_ps(R1,YSign);
		5206	R1 = XMVectorSelect(R1,YSign,XIsPositive);
		5207	// Mask for X==0
		5208	XMVECTOR vConstants = _mm_cmpeq_ps(X,g_XMZero);
		5209	// Get Pi/2 with with sign of Y
		5210	XMVECTOR PiOverTwo = _mm_load_ps1(&ATan2Constants.f[1]);
		5211	PiOverTwo = _mm_or_ps(PiOverTwo,YSign);
		5212	XMVECTOR R2 = XMVectorSelect(g_XMNegOneMask,PiOverTwo,vConstants);
		5213	// Mask for Y==0
		5214	vConstants = _mm_cmpeq_ps(Y,g_XMZero);
		5215	R2 = XMVectorSelect(R2,R1,vConstants);
		5216	// Get Pi/4 with sign of Y
		5217	XMVECTOR PiOverFour = _mm_load_ps1(&ATan2Constants.f[2]);
		5218	PiOverFour = _mm_or_ps(PiOverFour,YSign);
		5219	// Get (Pi*3)/4 with sign of Y
		5220	XMVECTOR ThreePiOverFour = _mm_load_ps1(&ATan2Constants.f[3]);
		5221	ThreePiOverFour = _mm_or_ps(ThreePiOverFour,YSign);
		5222	vConstants = XMVectorSelect(ThreePiOverFour, PiOverFour, XIsPositive);
		5223	XMVECTOR XEqualsInfinity = XMVectorIsInfinite(X);
		5224	vConstants = XMVectorSelect(PiOverTwo,vConstants,XEqualsInfinity);
		5225
		5226	XMVECTOR vResult = XMVectorSelect(R2,vConstants,YEqualsInfinity);
		5227	vConstants = XMVectorSelect(vResult,R1,FiniteYGreaterZero);
		5228	// At this point, any entry that's zero will get the result
		5229	// from XMVectorATan(), otherwise, return the failsafe value
		5230	vResult = XMVectorSelect(vResult,vConstants,XEqualsInfinity);
		5231	// Any entries not 0xFFFFFFFF, are considered precalculated
		5232	XMVECTOR ATanResultValid = XMVectorEqualInt(vResult,g_XMNegOneMask);
		5233	// Let's do the ATan2 function
		5234	vConstants = _mm_div_ps(Y,X);
		5235	vConstants = XMVectorATanEst(vConstants);
		5236	// Discard entries that have been declared void
		5237	vResult = XMVectorSelect(vResult,vConstants,ATanResultValid);
		5238	return vResult;
		5239	#else // _XM_VMX128_INTRINSICS_
		5240	#endif // _XM_VMX128_INTRINSICS_
		5241	}
		5242
		5243	//------------------------------------------------------------------------------
		5244
		5245	XMFINLINE XMVECTOR XMVectorLerp
		5246	(
		5247	FXMVECTOR V0,
		5248	FXMVECTOR V1,
		5249	FLOAT t
		5250	)
		5251	{
		5252	#if defined(_XM_NO_INTRINSICS_)
		5253
		5254	XMVECTOR Scale;
		5255	XMVECTOR Length;
		5256	XMVECTOR Result;
		5257
		5258	// V0 + t * (V1 - V0)
		5259	Scale = XMVectorReplicate(t);
		5260	Length = XMVectorSubtract(V1, V0);
		5261	Result = XMVectorMultiplyAdd(Length, Scale, V0);
		5262
		5263	return Result;
		5264
		5265	#elif defined(_XM_SSE_INTRINSICS_)
		5266	XMVECTOR L, S;
		5267	XMVECTOR Result;
		5268
		5269	L = _mm_sub_ps( V1, V0 );
		5270
		5271	S = _mm_set_ps1( t );
		5272
		5273	Result = _mm_mul_ps( L, S );
		5274
		5275	return _mm_add_ps( Result, V0 );
		5276	#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
		5277	#endif // _XM_VMX128_INTRINSICS_
		5278	}
		5279
		5280	//------------------------------------------------------------------------------
		5281
		5282	XMFINLINE XMVECTOR XMVectorLerpV
		5283	(
		5284	FXMVECTOR V0,
		5285	FXMVECTOR V1,
		5286	FXMVECTOR T
		5287	)
		5288	{
		5289	#if defined(_XM_NO_INTRINSICS_)
		5290
		5291	XMVECTOR Length;
		5292	XMVECTOR Result;
		5293
		5294	// V0 + T * (V1 - V0)
		5295	Length = XMVectorSubtract(V1, V0);
		5296	Result = XMVectorMultiplyAdd(Length, T, V0);
		5297
		5298	return Result;
		5299
		5300	#elif defined(_XM_SSE_INTRINSICS_)
		5301	XMVECTOR Length;
		5302	XMVECTOR Result;
		5303
		5304	Length = _mm_sub_ps( V1, V0 );
		5305
		5306	Result = _mm_mul_ps( Length, T );
		5307
		5308	return _mm_add_ps( Result, V0 );
		5309	#else // _XM_VMX128_INTRINSICS_
		5310	#endif // _XM_VMX128_INTRINSICS_
		5311	}
		5312
		5313	//------------------------------------------------------------------------------
		5314
		5315	XMFINLINE XMVECTOR XMVectorHermite
		5316	(
		5317	FXMVECTOR Position0,
		5318	FXMVECTOR Tangent0,
		5319	FXMVECTOR Position1,
		5320	CXMVECTOR Tangent1,
		5321	FLOAT t
		5322	)
		5323	{
		5324	#if defined(_XM_NO_INTRINSICS_)
		5325
		5326	XMVECTOR P0;
		5327	XMVECTOR T0;
		5328	XMVECTOR P1;
		5329	XMVECTOR T1;
		5330	XMVECTOR Result;
		5331	FLOAT t2;
		5332	FLOAT t3;
		5333
		5334	// Result = (2 * t^3 - 3 * t^2 + 1) * Position0 +
		5335	// (t^3 - 2 * t^2 + t) * Tangent0 +
		5336	// (-2 * t^3 + 3 * t^2) * Position1 +
		5337	// (t^3 - t^2) * Tangent1
		5338	t2 = t * t;
		5339	t3 = t * t2;
		5340
		5341	P0 = XMVectorReplicate(2.0f * t3 - 3.0f * t2 + 1.0f);
		5342	T0 = XMVectorReplicate(t3 - 2.0f * t2 + t);
		5343	P1 = XMVectorReplicate(-2.0f * t3 + 3.0f * t2);
		5344	T1 = XMVectorReplicate(t3 - t2);
		5345
		5346	Result = XMVectorMultiply(P0, Position0);
		5347	Result = XMVectorMultiplyAdd(T0, Tangent0, Result);
		5348	Result = XMVectorMultiplyAdd(P1, Position1, Result);
		5349	Result = XMVectorMultiplyAdd(T1, Tangent1, Result);
		5350
		5351	return Result;
		5352
		5353	#elif defined(_XM_SSE_INTRINSICS_)
		5354	FLOAT t2 = t * t;
		5355	FLOAT t3 = t * t2;
		5356
		5357	XMVECTOR P0 = _mm_set_ps1(2.0f * t3 - 3.0f * t2 + 1.0f);
		5358	XMVECTOR T0 = _mm_set_ps1(t3 - 2.0f * t2 + t);
		5359	XMVECTOR P1 = _mm_set_ps1(-2.0f * t3 + 3.0f * t2);
		5360	XMVECTOR T1 = _mm_set_ps1(t3 - t2);
		5361
		5362	XMVECTOR vResult = _mm_mul_ps(P0, Position0);
		5363	XMVECTOR vTemp = _mm_mul_ps(T0, Tangent0);
		5364	vResult = _mm_add_ps(vResult,vTemp);
		5365	vTemp = _mm_mul_ps(P1, Position1);
		5366	vResult = _mm_add_ps(vResult,vTemp);
		5367	vTemp = _mm_mul_ps(T1, Tangent1);
		5368	vResult = _mm_add_ps(vResult,vTemp);
		5369	return vResult;
		5370	#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
		5371	#endif // _XM_VMX128_INTRINSICS_
		5372	}
		5373
		5374	//------------------------------------------------------------------------------
		5375
		5376	XMFINLINE XMVECTOR XMVectorHermiteV
		5377	(
		5378	FXMVECTOR Position0,
		5379	FXMVECTOR Tangent0,
		5380	FXMVECTOR Position1,
		5381	CXMVECTOR Tangent1,
		5382	CXMVECTOR T
		5383	)
		5384	{
		5385	#if defined(_XM_NO_INTRINSICS_)
		5386
		5387	XMVECTOR P0;
		5388	XMVECTOR T0;
		5389	XMVECTOR P1;
		5390	XMVECTOR T1;
		5391	XMVECTOR Result;
		5392	XMVECTOR T2;
		5393	XMVECTOR T3;
		5394
		5395	// Result = (2 * t^3 - 3 * t^2 + 1) * Position0 +
		5396	// (t^3 - 2 * t^2 + t) * Tangent0 +
		5397	// (-2 * t^3 + 3 * t^2) * Position1 +
		5398	// (t^3 - t^2) * Tangent1
		5399	T2 = XMVectorMultiply(T, T);
		5400	T3 = XMVectorMultiply(T , T2);
		5401
		5402	P0 = XMVectorReplicate(2.0f * T3.vector4_f32[0] - 3.0f * T2.vector4_f32[0] + 1.0f);
		5403	T0 = XMVectorReplicate(T3.vector4_f32[1] - 2.0f * T2.vector4_f32[1] + T.vector4_f32[1]);
		5404	P1 = XMVectorReplicate(-2.0f * T3.vector4_f32[2] + 3.0f * T2.vector4_f32[2]);
		5405	T1 = XMVectorReplicate(T3.vector4_f32[3] - T2.vector4_f32[3]);
		5406
		5407	Result = XMVectorMultiply(P0, Position0);
		5408	Result = XMVectorMultiplyAdd(T0, Tangent0, Result);
		5409	Result = XMVectorMultiplyAdd(P1, Position1, Result);
		5410	Result = XMVectorMultiplyAdd(T1, Tangent1, Result);
		5411
		5412	return Result;
		5413
		5414	#elif defined(_XM_SSE_INTRINSICS_)
		5415	static const XMVECTORF32 CatMulT2 = {-3.0f,-2.0f,3.0f,-1.0f};
		5416	static const XMVECTORF32 CatMulT3 = {2.0f,1.0f,-2.0f,1.0f};
		5417
		5418	// Result = (2 * t^3 - 3 * t^2 + 1) * Position0 +
		5419	// (t^3 - 2 * t^2 + t) * Tangent0 +
		5420	// (-2 * t^3 + 3 * t^2) * Position1 +
		5421	// (t^3 - t^2) * Tangent1
		5422	XMVECTOR T2 = _mm_mul_ps(T,T);
		5423	XMVECTOR T3 = _mm_mul_ps(T,T2);
		5424	// Mul by the constants against t^2
		5425	T2 = _mm_mul_ps(T2,CatMulT2);
		5426	// Mul by the constants against t^3
		5427	T3 = _mm_mul_ps(T3,CatMulT3);
		5428	// T3 now has the pre-result.
		5429	T3 = _mm_add_ps(T3,T2);
		5430	// I need to add t.y only
		5431	T2 = _mm_and_ps(T,g_XMMaskY);
		5432	T3 = _mm_add_ps(T3,T2);
		5433	// Add 1.0f to x
		5434	T3 = _mm_add_ps(T3,g_XMIdentityR0);
		5435	// Now, I have the constants created
		5436	// Mul the x constant to Position0
		5437	XMVECTOR vResult = _mm_shuffle_ps(T3,T3,_MM_SHUFFLE(0,0,0,0));
		5438	vResult = _mm_mul_ps(vResult,Position0);
		5439	// Mul the y constant to Tangent0
		5440	T2 = _mm_shuffle_ps(T3,T3,_MM_SHUFFLE(1,1,1,1));
		5441	T2 = _mm_mul_ps(T2,Tangent0);
		5442	vResult = _mm_add_ps(vResult,T2);
		5443	// Mul the z constant to Position1
		5444	T2 = _mm_shuffle_ps(T3,T3,_MM_SHUFFLE(2,2,2,2));
		5445	T2 = _mm_mul_ps(T2,Position1);
		5446	vResult = _mm_add_ps(vResult,T2);
		5447	// Mul the w constant to Tangent1
		5448	T3 = _mm_shuffle_ps(T3,T3,_MM_SHUFFLE(3,3,3,3));
		5449	T3 = _mm_mul_ps(T3,Tangent1);
		5450	vResult = _mm_add_ps(vResult,T3);
		5451	return vResult;
		5452	#else // _XM_VMX128_INTRINSICS_
		5453	#endif // _XM_VMX128_INTRINSICS_
		5454	}
		5455
		5456	//------------------------------------------------------------------------------
		5457
		5458	XMFINLINE XMVECTOR XMVectorCatmullRom
		5459	(
		5460	FXMVECTOR Position0,
		5461	FXMVECTOR Position1,
		5462	FXMVECTOR Position2,
		5463	CXMVECTOR Position3,
		5464	FLOAT t
		5465	)
		5466	{
		5467	#if defined(_XM_NO_INTRINSICS_)
		5468
		5469	XMVECTOR P0;
		5470	XMVECTOR P1;
		5471	XMVECTOR P2;
		5472	XMVECTOR P3;
		5473	XMVECTOR Result;
		5474	FLOAT t2;
		5475	FLOAT t3;
		5476
		5477	// Result = ((-t^3 + 2 * t^2 - t) * Position0 +
		5478	// (3 * t^3 - 5 * t^2 + 2) * Position1 +
		5479	// (-3 * t^3 + 4 * t^2 + t) * Position2 +
		5480	// (t^3 - t^2) * Position3) * 0.5
		5481	t2 = t * t;
		5482	t3 = t * t2;
		5483
		5484	P0 = XMVectorReplicate((-t3 + 2.0f * t2 - t) * 0.5f);
		5485	P1 = XMVectorReplicate((3.0f * t3 - 5.0f * t2 + 2.0f) * 0.5f);
		5486	P2 = XMVectorReplicate((-3.0f * t3 + 4.0f * t2 + t) * 0.5f);
		5487	P3 = XMVectorReplicate((t3 - t2) * 0.5f);
		5488
		5489	Result = XMVectorMultiply(P0, Position0);
		5490	Result = XMVectorMultiplyAdd(P1, Position1, Result);
		5491	Result = XMVectorMultiplyAdd(P2, Position2, Result);
		5492	Result = XMVectorMultiplyAdd(P3, Position3, Result);
		5493
		5494	return Result;
		5495
		5496	#elif defined(_XM_SSE_INTRINSICS_)
		5497	FLOAT t2 = t * t;
		5498	FLOAT t3 = t * t2;
		5499
		5500	XMVECTOR P0 = _mm_set_ps1((-t3 + 2.0f * t2 - t) * 0.5f);
		5501	XMVECTOR P1 = _mm_set_ps1((3.0f * t3 - 5.0f * t2 + 2.0f) * 0.5f);
		5502	XMVECTOR P2 = _mm_set_ps1((-3.0f * t3 + 4.0f * t2 + t) * 0.5f);
		5503	XMVECTOR P3 = _mm_set_ps1((t3 - t2) * 0.5f);
		5504
		5505	P0 = _mm_mul_ps(P0, Position0);
		5506	P1 = _mm_mul_ps(P1, Position1);
		5507	P2 = _mm_mul_ps(P2, Position2);
		5508	P3 = _mm_mul_ps(P3, Position3);
		5509	P0 = _mm_add_ps(P0,P1);
		5510	P2 = _mm_add_ps(P2,P3);
		5511	P0 = _mm_add_ps(P0,P2);
		5512	return P0;
		5513	#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
		5514	#endif // _XM_VMX128_INTRINSICS_
		5515	}
		5516
		5517	//------------------------------------------------------------------------------
		5518
		5519	XMFINLINE XMVECTOR XMVectorCatmullRomV
		5520	(
		5521	FXMVECTOR Position0,
		5522	FXMVECTOR Position1,
		5523	FXMVECTOR Position2,
		5524	CXMVECTOR Position3,
		5525	CXMVECTOR T
		5526	)
		5527	{
		5528	#if defined(_XM_NO_INTRINSICS_)
		5529	float fx = T.vector4_f32[0];
		5530	float fy = T.vector4_f32[1];
		5531	float fz = T.vector4_f32[2];
		5532	float fw = T.vector4_f32[3];
		5533	XMVECTOR vResult = {
		5534	0.5f((-fxfxfx+2fxfx-fx)Position0.vector4_f32[0]+
		5535	(3fxfxfx-5fxfx+2)Position1.vector4_f32[0]+
		5536	(-3fxfxfx+4fxfx+fx)Position2.vector4_f32[0]+
		5537	(fxfxfx-fxfx)Position3.vector4_f32[0]),
		5538	0.5f((-fyfyfy+2fyfy-fy)Position0.vector4_f32[1]+
		5539	(3fyfyfy-5fyfy+2)Position1.vector4_f32[1]+
		5540	(-3fyfyfy+4fyfy+fy)Position2.vector4_f32[1]+
		5541	(fyfyfy-fyfy)Position3.vector4_f32[1]),
		5542	0.5f((-fzfzfz+2fzfz-fz)Position0.vector4_f32[2]+
		5543	(3fzfzfz-5fzfz+2)Position1.vector4_f32[2]+
		5544	(-3fzfzfz+4fzfz+fz)Position2.vector4_f32[2]+
		5545	(fzfzfz-fzfz)Position3.vector4_f32[2]),
		5546	0.5f((-fwfwfw+2fwfw-fw)Position0.vector4_f32[3]+
		5547	(3fwfwfw-5fwfw+2)Position1.vector4_f32[3]+
		5548	(-3fwfwfw+4fwfw+fw)Position2.vector4_f32[3]+
		5549	(fwfwfw-fwfw)Position3.vector4_f32[3])
		5550	};
		5551	return vResult;
		5552	#elif defined(_XM_SSE_INTRINSICS_)
		5553	static const XMVECTORF32 Catmul2 = {2.0f,2.0f,2.0f,2.0f};
		5554	static const XMVECTORF32 Catmul3 = {3.0f,3.0f,3.0f,3.0f};
		5555	static const XMVECTORF32 Catmul4 = {4.0f,4.0f,4.0f,4.0f};
		5556	static const XMVECTORF32 Catmul5 = {5.0f,5.0f,5.0f,5.0f};
		5557	// Cache T^2 and T^3
		5558	XMVECTOR T2 = _mm_mul_ps(T,T);
		5559	XMVECTOR T3 = _mm_mul_ps(T,T2);
		5560	// Perform the Position0 term
		5561	XMVECTOR vResult = _mm_add_ps(T2,T2);
		5562	vResult = _mm_sub_ps(vResult,T);
		5563	vResult = _mm_sub_ps(vResult,T3);
		5564	vResult = _mm_mul_ps(vResult,Position0);
		5565	// Perform the Position1 term and add
		5566	XMVECTOR vTemp = _mm_mul_ps(T3,Catmul3);
		5567	XMVECTOR vTemp2 = _mm_mul_ps(T2,Catmul5);
		5568	vTemp = _mm_sub_ps(vTemp,vTemp2);
		5569	vTemp = _mm_add_ps(vTemp,Catmul2);
		5570	vTemp = _mm_mul_ps(vTemp,Position1);
		5571	vResult = _mm_add_ps(vResult,vTemp);
		5572	// Perform the Position2 term and add
		5573	vTemp = _mm_mul_ps(T2,Catmul4);
		5574	vTemp2 = _mm_mul_ps(T3,Catmul3);
		5575	vTemp = _mm_sub_ps(vTemp,vTemp2);
		5576	vTemp = _mm_add_ps(vTemp,T);
		5577	vTemp = _mm_mul_ps(vTemp,Position2);
		5578	vResult = _mm_add_ps(vResult,vTemp);
		5579	// Position3 is the last term
		5580	T3 = _mm_sub_ps(T3,T2);
		5581	T3 = _mm_mul_ps(T3,Position3);
		5582	vResult = _mm_add_ps(vResult,T3);
		5583	// Multiply by 0.5f and exit
		5584	vResult = _mm_mul_ps(vResult,g_XMOneHalf);
		5585	return vResult;
		5586	#else // _XM_VMX128_INTRINSICS_
		5587	#endif // _XM_VMX128_INTRINSICS_
		5588	}
		5589
		5590	//------------------------------------------------------------------------------
		5591
		5592	XMFINLINE XMVECTOR XMVectorBaryCentric
		5593	(
		5594	FXMVECTOR Position0,
		5595	FXMVECTOR Position1,
		5596	FXMVECTOR Position2,
		5597	FLOAT f,
		5598	FLOAT g
		5599	)
		5600	{
		5601	#if defined(_XM_NO_INTRINSICS_)
		5602
		5603	// Result = Position0 + f * (Position1 - Position0) + g * (Position2 - Position0)
		5604	XMVECTOR P10;
		5605	XMVECTOR P20;
		5606	XMVECTOR ScaleF;
		5607	XMVECTOR ScaleG;
		5608	XMVECTOR Result;
		5609
		5610	P10 = XMVectorSubtract(Position1, Position0);
		5611	ScaleF = XMVectorReplicate(f);
		5612
		5613	P20 = XMVectorSubtract(Position2, Position0);
		5614	ScaleG = XMVectorReplicate(g);
		5615
		5616	Result = XMVectorMultiplyAdd(P10, ScaleF, Position0);
		5617	Result = XMVectorMultiplyAdd(P20, ScaleG, Result);
		5618
		5619	return Result;
		5620
		5621	#elif defined(_XM_SSE_INTRINSICS_)
		5622	XMVECTOR R1 = _mm_sub_ps(Position1,Position0);
		5623	XMVECTOR SF = _mm_set_ps1(f);
		5624	XMVECTOR R2 = _mm_sub_ps(Position2,Position0);
		5625	XMVECTOR SG = _mm_set_ps1(g);
		5626	R1 = _mm_mul_ps(R1,SF);
		5627	R2 = _mm_mul_ps(R2,SG);
		5628	R1 = _mm_add_ps(R1,Position0);
		5629	R1 = _mm_add_ps(R1,R2);
		5630	return R1;
		5631	#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
		5632	#endif // _XM_VMX128_INTRINSICS_
		5633	}
		5634
		5635	//------------------------------------------------------------------------------
		5636
		5637	XMFINLINE XMVECTOR XMVectorBaryCentricV
		5638	(
		5639	FXMVECTOR Position0,
		5640	FXMVECTOR Position1,
		5641	FXMVECTOR Position2,
		5642	CXMVECTOR F,
		5643	CXMVECTOR G
		5644	)
		5645	{
		5646	#if defined(_XM_NO_INTRINSICS_)
		5647
		5648	// Result = Position0 + f * (Position1 - Position0) + g * (Position2 - Position0)
		5649	XMVECTOR P10;
		5650	XMVECTOR P20;
		5651	XMVECTOR Result;
		5652
		5653	P10 = XMVectorSubtract(Position1, Position0);
		5654	P20 = XMVectorSubtract(Position2, Position0);
		5655
		5656	Result = XMVectorMultiplyAdd(P10, F, Position0);
		5657	Result = XMVectorMultiplyAdd(P20, G, Result);
		5658
		5659	return Result;
		5660
		5661	#elif defined(_XM_SSE_INTRINSICS_)
		5662	XMVECTOR R1 = _mm_sub_ps(Position1,Position0);
		5663	XMVECTOR R2 = _mm_sub_ps(Position2,Position0);
		5664	R1 = _mm_mul_ps(R1,F);
		5665	R2 = _mm_mul_ps(R2,G);
		5666	R1 = _mm_add_ps(R1,Position0);
		5667	R1 = _mm_add_ps(R1,R2);
		5668	return R1;
		5669	#else // _XM_VMX128_INTRINSICS_
		5670	#endif // _XM_VMX128_INTRINSICS_
		5671	}
		5672
		5673	/****************************************************************************
		5674	*
		5675	* 2D Vector
		5676	*
		5677	****************************************************************************/
		5678
		5679	//------------------------------------------------------------------------------
		5680	// Comparison operations
		5681	//------------------------------------------------------------------------------
		5682
		5683	//------------------------------------------------------------------------------
		5684
		5685	XMFINLINE BOOL XMVector2Equal
		5686	(
		5687	FXMVECTOR V1,
		5688	FXMVECTOR V2
		5689	)
		5690	{
		5691	#if defined(_XM_NO_INTRINSICS_)
		5692	return (((V1.vector4_f32[0] == V2.vector4_f32[0]) && (V1.vector4_f32[1] == V2.vector4_f32[1])) != 0);
		5693	#elif defined(_XM_SSE_INTRINSICS_)
		5694	XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
		5695	// z and w are don't care
		5696	return (((_mm_movemask_ps(vTemp)&3)==3) != 0);
		5697	#else // _XM_VMX128_INTRINSICS_
		5698	return XMComparisonAllTrue(XMVector2EqualR(V1, V2));
		5699	#endif
		5700	}
		5701
		5702
		5703	//------------------------------------------------------------------------------
		5704
		5705	XMFINLINE UINT XMVector2EqualR
		5706	(
		5707	FXMVECTOR V1,
		5708	FXMVECTOR V2
		5709	)
		5710	{
		5711	#if defined(_XM_NO_INTRINSICS_)
		5712
		5713	UINT CR = 0;
		5714
		5715	if ((V1.vector4_f32[0] == V2.vector4_f32[0]) &&
		5716	(V1.vector4_f32[1] == V2.vector4_f32[1]))
		5717	{
		5718	CR = XM_CRMASK_CR6TRUE;
		5719	}
		5720	else if ((V1.vector4_f32[0] != V2.vector4_f32[0]) &&
		5721	(V1.vector4_f32[1] != V2.vector4_f32[1]))
		5722	{
		5723	CR = XM_CRMASK_CR6FALSE;
		5724	}
		5725	return CR;
		5726	#elif defined(_XM_SSE_INTRINSICS_)
		5727	XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
		5728	// z and w are don't care
		5729	int iTest = _mm_movemask_ps(vTemp)&3;
		5730	UINT CR = 0;
		5731	if (iTest==3)
		5732	{
		5733	CR = XM_CRMASK_CR6TRUE;
		5734	}
		5735	else if (!iTest)
		5736	{
		5737	CR = XM_CRMASK_CR6FALSE;
		5738	}
		5739	return CR;
		5740	#else // _XM_VMX128_INTRINSICS_
		5741	#endif // _XM_VMX128_INTRINSICS_
		5742	}
		5743
		5744	//------------------------------------------------------------------------------
		5745
		5746	XMFINLINE BOOL XMVector2EqualInt
		5747	(
		5748	FXMVECTOR V1,
		5749	FXMVECTOR V2
		5750	)
		5751	{
		5752	#if defined(_XM_NO_INTRINSICS_)
		5753	return (((V1.vector4_u32[0] == V2.vector4_u32[0]) && (V1.vector4_u32[1] == V2.vector4_u32[1])) != 0);
		5754	#elif defined(_XM_SSE_INTRINSICS_)
		5755	__m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i >(&V1)[0],reinterpret_cast<const __m128i >(&V2)[0]);
		5756	return (((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&3)==3) != 0);
		5757	#else // _XM_VMX128_INTRINSICS_
		5758	return XMComparisonAllTrue(XMVector2EqualIntR(V1, V2));
		5759	#endif
		5760	}
		5761
		5762	//------------------------------------------------------------------------------
		5763
		5764	XMFINLINE UINT XMVector2EqualIntR
		5765	(
		5766	FXMVECTOR V1,
		5767	FXMVECTOR V2
		5768	)
		5769	{
		5770	#if defined(_XM_NO_INTRINSICS_)
		5771
		5772	UINT CR = 0;
		5773	if ((V1.vector4_u32[0] == V2.vector4_u32[0]) &&
		5774	(V1.vector4_u32[1] == V2.vector4_u32[1]))
		5775	{
		5776	CR = XM_CRMASK_CR6TRUE;
		5777	}
		5778	else if ((V1.vector4_u32[0] != V2.vector4_u32[0]) &&
		5779	(V1.vector4_u32[1] != V2.vector4_u32[1]))
		5780	{
		5781	CR = XM_CRMASK_CR6FALSE;
		5782	}
		5783	return CR;
		5784
		5785	#elif defined(_XM_SSE_INTRINSICS_)
		5786	__m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i >(&V1)[0],reinterpret_cast<const __m128i >(&V2)[0]);
		5787	int iTest = _mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&3;
		5788	UINT CR = 0;
		5789	if (iTest==3)
		5790	{
		5791	CR = XM_CRMASK_CR6TRUE;
		5792	}
		5793	else if (!iTest)
		5794	{
		5795	CR = XM_CRMASK_CR6FALSE;
		5796	}
		5797	return CR;
		5798	#else // _XM_VMX128_INTRINSICS_
		5799	#endif // _XM_VMX128_INTRINSICS_
		5800	}
		5801
		5802	//------------------------------------------------------------------------------
		5803
		5804	XMFINLINE BOOL XMVector2NearEqual
		5805	(
		5806	FXMVECTOR V1,
		5807	FXMVECTOR V2,
		5808	FXMVECTOR Epsilon
		5809	)
		5810	{
		5811	#if defined(_XM_NO_INTRINSICS_)
		5812	FLOAT dx, dy;
		5813	dx = fabsf(V1.vector4_f32[0]-V2.vector4_f32[0]);
		5814	dy = fabsf(V1.vector4_f32[1]-V2.vector4_f32[1]);
		5815	return ((dx <= Epsilon.vector4_f32[0]) &&
		5816	(dy <= Epsilon.vector4_f32[1]));
		5817	#elif defined(_XM_SSE_INTRINSICS_)
		5818	// Get the difference
		5819	XMVECTOR vDelta = _mm_sub_ps(V1,V2);
		5820	// Get the absolute value of the difference
		5821	XMVECTOR vTemp = _mm_setzero_ps();
		5822	vTemp = _mm_sub_ps(vTemp,vDelta);
		5823	vTemp = _mm_max_ps(vTemp,vDelta);
		5824	vTemp = _mm_cmple_ps(vTemp,Epsilon);
		5825	// z and w are don't care
		5826	return (((_mm_movemask_ps(vTemp)&3)==0x3) != 0);
		5827	#else // _XM_VMX128_INTRINSICS_
		5828	#endif // _XM_VMX128_INTRINSICS_
		5829	}
		5830
		5831	//------------------------------------------------------------------------------
		5832
		5833	XMFINLINE BOOL XMVector2NotEqual
		5834	(
		5835	FXMVECTOR V1,
		5836	FXMVECTOR V2
		5837	)
		5838	{
		5839	#if defined(_XM_NO_INTRINSICS_)
		5840	return (((V1.vector4_f32[0] != V2.vector4_f32[0]) \|\| (V1.vector4_f32[1] != V2.vector4_f32[1])) != 0);
		5841	#elif defined(_XM_SSE_INTRINSICS_)
		5842	XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
		5843	// z and w are don't care
		5844	return (((_mm_movemask_ps(vTemp)&3)!=3) != 0);
		5845	#else // _XM_VMX128_INTRINSICS_
		5846	return XMComparisonAnyFalse(XMVector2EqualR(V1, V2));
		5847	#endif
		5848	}
		5849
		5850	//------------------------------------------------------------------------------
		5851
		5852	XMFINLINE BOOL XMVector2NotEqualInt
		5853	(
		5854	FXMVECTOR V1,
		5855	FXMVECTOR V2
		5856	)
		5857	{
		5858	#if defined(_XM_NO_INTRINSICS_)
		5859	return (((V1.vector4_u32[0] != V2.vector4_u32[0]) \|\| (V1.vector4_u32[1] != V2.vector4_u32[1])) != 0);
		5860	#elif defined(_XM_SSE_INTRINSICS_)
		5861	__m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i >(&V1)[0],reinterpret_cast<const __m128i >(&V2)[0]);
		5862	return (((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&3)!=3) != 0);
		5863	#else // _XM_VMX128_INTRINSICS_
		5864	return XMComparisonAnyFalse(XMVector2EqualIntR(V1, V2));
		5865	#endif
		5866	}
		5867
		5868	//------------------------------------------------------------------------------
		5869
		5870	XMFINLINE BOOL XMVector2Greater
		5871	(
		5872	FXMVECTOR V1,
		5873	FXMVECTOR V2
		5874	)
		5875	{
		5876	#if defined(_XM_NO_INTRINSICS_)
		5877	return (((V1.vector4_f32[0] > V2.vector4_f32[0]) && (V1.vector4_f32[1] > V2.vector4_f32[1])) != 0);
		5878
		5879	#elif defined(_XM_SSE_INTRINSICS_)
		5880	XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
		5881	// z and w are don't care
		5882	return (((_mm_movemask_ps(vTemp)&3)==3) != 0);
		5883	#else // _XM_VMX128_INTRINSICS_
		5884	return XMComparisonAllTrue(XMVector2GreaterR(V1, V2));
		5885	#endif
		5886	}
		5887
		5888	//------------------------------------------------------------------------------
		5889
		5890	XMFINLINE UINT XMVector2GreaterR
		5891	(
		5892	FXMVECTOR V1,
		5893	FXMVECTOR V2
		5894	)
		5895	{
		5896	#if defined(_XM_NO_INTRINSICS_)
		5897
		5898	UINT CR = 0;
		5899	if ((V1.vector4_f32[0] > V2.vector4_f32[0]) &&
		5900	(V1.vector4_f32[1] > V2.vector4_f32[1]))
		5901	{
		5902	CR = XM_CRMASK_CR6TRUE;
		5903	}
		5904	else if ((V1.vector4_f32[0] <= V2.vector4_f32[0]) &&
		5905	(V1.vector4_f32[1] <= V2.vector4_f32[1]))
		5906	{
		5907	CR = XM_CRMASK_CR6FALSE;
		5908	}
		5909	return CR;
		5910	#elif defined(_XM_SSE_INTRINSICS_)
		5911	XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
		5912	int iTest = _mm_movemask_ps(vTemp)&3;
		5913	UINT CR = 0;
		5914	if (iTest==3)
		5915	{
		5916	CR = XM_CRMASK_CR6TRUE;
		5917	}
		5918	else if (!iTest)
		5919	{
		5920	CR = XM_CRMASK_CR6FALSE;
		5921	}
		5922	return CR;
		5923	#else // _XM_VMX128_INTRINSICS_
		5924	#endif // _XM_VMX128_INTRINSICS_
		5925	}
		5926
		5927	//------------------------------------------------------------------------------
		5928
		5929	XMFINLINE BOOL XMVector2GreaterOrEqual
		5930	(
		5931	FXMVECTOR V1,
		5932	FXMVECTOR V2
		5933	)
		5934	{
		5935	#if defined(_XM_NO_INTRINSICS_)
		5936	return (((V1.vector4_f32[0] >= V2.vector4_f32[0]) && (V1.vector4_f32[1] >= V2.vector4_f32[1])) != 0);
		5937	#elif defined(_XM_SSE_INTRINSICS_)
		5938	XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
		5939	return (((_mm_movemask_ps(vTemp)&3)==3) != 0);
		5940	#else // _XM_VMX128_INTRINSICS_
		5941	return XMComparisonAllTrue(XMVector2GreaterOrEqualR(V1, V2));
		5942	#endif
		5943	}
		5944
		5945	//------------------------------------------------------------------------------
		5946
		5947	XMFINLINE UINT XMVector2GreaterOrEqualR
		5948	(
		5949	FXMVECTOR V1,
		5950	FXMVECTOR V2
		5951	)
		5952	{
		5953	#if defined(_XM_NO_INTRINSICS_)
		5954	UINT CR = 0;
		5955	if ((V1.vector4_f32[0] >= V2.vector4_f32[0]) &&
		5956	(V1.vector4_f32[1] >= V2.vector4_f32[1]))
		5957	{
		5958	CR = XM_CRMASK_CR6TRUE;
		5959	}
		5960	else if ((V1.vector4_f32[0] < V2.vector4_f32[0]) &&
		5961	(V1.vector4_f32[1] < V2.vector4_f32[1]))
		5962	{
		5963	CR = XM_CRMASK_CR6FALSE;
		5964	}
		5965	return CR;
		5966
		5967	#elif defined(_XM_SSE_INTRINSICS_)
		5968	XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
		5969	int iTest = _mm_movemask_ps(vTemp)&3;
		5970	UINT CR = 0;
		5971	if (iTest == 3)
		5972	{
		5973	CR = XM_CRMASK_CR6TRUE;
		5974	}
		5975	else if (!iTest)
		5976	{
		5977	CR = XM_CRMASK_CR6FALSE;
		5978	}
		5979	return CR;
		5980	#else // _XM_VMX128_INTRINSICS_
		5981	#endif // _XM_VMX128_INTRINSICS_
		5982	}
		5983
		5984	//------------------------------------------------------------------------------
		5985
		5986	XMFINLINE BOOL XMVector2Less
		5987	(
		5988	FXMVECTOR V1,
		5989	FXMVECTOR V2
		5990	)
		5991	{
		5992	#if defined(_XM_NO_INTRINSICS_)
		5993	return (((V1.vector4_f32[0] < V2.vector4_f32[0]) && (V1.vector4_f32[1] < V2.vector4_f32[1])) != 0);
		5994	#elif defined(_XM_SSE_INTRINSICS_)
		5995	XMVECTOR vTemp = _mm_cmplt_ps(V1,V2);
		5996	return (((_mm_movemask_ps(vTemp)&3)==3) != 0);
		5997	#else // _XM_VMX128_INTRINSICS_
		5998	return XMComparisonAllTrue(XMVector2GreaterR(V2, V1));
		5999	#endif
		6000	}
		6001
		6002	//------------------------------------------------------------------------------
		6003
		6004	XMFINLINE BOOL XMVector2LessOrEqual
		6005	(
		6006	FXMVECTOR V1,
		6007	FXMVECTOR V2
		6008	)
		6009	{
		6010	#if defined(_XM_NO_INTRINSICS_)
		6011	return (((V1.vector4_f32[0] <= V2.vector4_f32[0]) && (V1.vector4_f32[1] <= V2.vector4_f32[1])) != 0);
		6012	#elif defined(_XM_SSE_INTRINSICS_)
		6013	XMVECTOR vTemp = _mm_cmple_ps(V1,V2);
		6014	return (((_mm_movemask_ps(vTemp)&3)==3) != 0);
		6015	#else // _XM_VMX128_INTRINSICS_
		6016	return XMComparisonAllTrue(XMVector2GreaterOrEqualR(V2, V1));
		6017	#endif
		6018	}
		6019
		6020	//------------------------------------------------------------------------------
		6021
		6022	XMFINLINE BOOL XMVector2InBounds
		6023	(
		6024	FXMVECTOR V,
		6025	FXMVECTOR Bounds
		6026	)
		6027	{
		6028	#if defined(_XM_NO_INTRINSICS_)
		6029	return (((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) &&
		6030	(V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1])) != 0);
		6031	#elif defined(_XM_SSE_INTRINSICS_)
		6032	// Test if less than or equal
		6033	XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
		6034	// Negate the bounds
		6035	XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
		6036	// Test if greater or equal (Reversed)
		6037	vTemp2 = _mm_cmple_ps(vTemp2,V);
		6038	// Blend answers
		6039	vTemp1 = _mm_and_ps(vTemp1,vTemp2);
		6040	// x and y in bounds? (z and w are don't care)
		6041	return (((_mm_movemask_ps(vTemp1)&0x3)==0x3) != 0);
		6042	#else // _XM_VMX128_INTRINSICS_
		6043	return XMComparisonAllInBounds(XMVector2InBoundsR(V, Bounds));
		6044	#endif
		6045	}
		6046
		6047	//------------------------------------------------------------------------------
		6048
		6049	XMFINLINE UINT XMVector2InBoundsR
		6050	(
		6051	FXMVECTOR V,
		6052	FXMVECTOR Bounds
		6053	)
		6054	{
		6055	#if defined(_XM_NO_INTRINSICS_)
		6056	UINT CR = 0;
		6057	if ((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) &&
		6058	(V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]))
		6059	{
		6060	CR = XM_CRMASK_CR6BOUNDS;
		6061	}
		6062	return CR;
		6063
		6064	#elif defined(_XM_SSE_INTRINSICS_)
		6065	// Test if less than or equal
		6066	XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
		6067	// Negate the bounds
		6068	XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
		6069	// Test if greater or equal (Reversed)
		6070	vTemp2 = _mm_cmple_ps(vTemp2,V);
		6071	// Blend answers
		6072	vTemp1 = _mm_and_ps(vTemp1,vTemp2);
		6073	// x and y in bounds? (z and w are don't care)
		6074	return ((_mm_movemask_ps(vTemp1)&0x3)==0x3) ? XM_CRMASK_CR6BOUNDS : 0;
		6075	#else // _XM_VMX128_INTRINSICS_
		6076	#endif // _XM_VMX128_INTRINSICS_
		6077	}
		6078
		6079	//------------------------------------------------------------------------------
		6080
		6081	XMFINLINE BOOL XMVector2IsNaN
		6082	(
		6083	FXMVECTOR V
		6084	)
		6085	{
		6086	#if defined(_XM_NO_INTRINSICS_)
		6087	return (XMISNAN(V.vector4_f32[0]) \|\|
		6088	XMISNAN(V.vector4_f32[1]));
		6089	#elif defined(_XM_SSE_INTRINSICS_)
		6090	// Mask off the exponent
		6091	__m128i vTempInf = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMInfinity);
		6092	// Mask off the mantissa
		6093	__m128i vTempNan = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMQNaNTest);
		6094	// Are any of the exponents == 0x7F800000?
		6095	vTempInf = _mm_cmpeq_epi32(vTempInf,g_XMInfinity);
		6096	// Are any of the mantissa's zero? (SSE2 doesn't have a neq test)
		6097	vTempNan = _mm_cmpeq_epi32(vTempNan,g_XMZero);
		6098	// Perform a not on the NaN test to be true on NON-zero mantissas
		6099	vTempNan = _mm_andnot_si128(vTempNan,vTempInf);
		6100	// If x or y are NaN, the signs are true after the merge above
		6101	return ((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTempNan)[0])&3) != 0);
		6102	#else // _XM_VMX128_INTRINSICS_
		6103	#endif // _XM_VMX128_INTRINSICS_
		6104	}
		6105
		6106	//------------------------------------------------------------------------------
		6107
		6108	XMFINLINE BOOL XMVector2IsInfinite
		6109	(
		6110	FXMVECTOR V
		6111	)
		6112	{
		6113	#if defined(_XM_NO_INTRINSICS_)
		6114
		6115	return (XMISINF(V.vector4_f32[0]) \|\|
		6116	XMISINF(V.vector4_f32[1]));
		6117	#elif defined(_XM_SSE_INTRINSICS_)
		6118	// Mask off the sign bit
		6119	__m128 vTemp = _mm_and_ps(V,g_XMAbsMask);
		6120	// Compare to infinity
		6121	vTemp = _mm_cmpeq_ps(vTemp,g_XMInfinity);
		6122	// If x or z are infinity, the signs are true.
		6123	return ((_mm_movemask_ps(vTemp)&3) != 0);
		6124	#else // _XM_VMX128_INTRINSICS_
		6125	#endif // _XM_VMX128_INTRINSICS_
		6126	}
		6127
		6128	//------------------------------------------------------------------------------
		6129	// Computation operations
		6130	//------------------------------------------------------------------------------
		6131
		6132	//------------------------------------------------------------------------------
		6133
		6134	XMFINLINE XMVECTOR XMVector2Dot
		6135	(
		6136	FXMVECTOR V1,
		6137	FXMVECTOR V2
		6138	)
		6139	{
		6140	#if defined(_XM_NO_INTRINSICS_)
		6141
		6142	XMVECTOR Result;
		6143
		6144	Result.vector4_f32[0] =
		6145	Result.vector4_f32[1] =
		6146	Result.vector4_f32[2] =
		6147	Result.vector4_f32[3] = V1.vector4_f32[0] * V2.vector4_f32[0] + V1.vector4_f32[1] * V2.vector4_f32[1];
		6148
		6149	return Result;
		6150
		6151	#elif defined(_XM_SSE_INTRINSICS_)
		6152	// Perform the dot product on x and y
		6153	XMVECTOR vLengthSq = _mm_mul_ps(V1,V2);
		6154	// vTemp has y splatted
		6155	XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
		6156	// x+y
		6157	vLengthSq = _mm_add_ss(vLengthSq,vTemp);
		6158	vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
		6159	return vLengthSq;
		6160	#else // _XM_VMX128_INTRINSICS_
		6161	#endif // _XM_VMX128_INTRINSICS_
		6162	}
		6163
		6164	//------------------------------------------------------------------------------
		6165
		6166	XMFINLINE XMVECTOR XMVector2Cross
		6167	(
		6168	FXMVECTOR V1,
		6169	FXMVECTOR V2
		6170	)
		6171	{
		6172	#if defined(_XM_NO_INTRINSICS_)
		6173	FLOAT fCross = (V1.vector4_f32[0] * V2.vector4_f32[1]) - (V1.vector4_f32[1] * V2.vector4_f32[0]);
		6174	XMVECTOR vResult = {
		6175	fCross,
		6176	fCross,
		6177	fCross,
		6178	fCross
		6179	};
		6180	return vResult;
		6181	#elif defined(_XM_SSE_INTRINSICS_)
		6182	// Swap x and y
		6183	XMVECTOR vResult = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(0,1,0,1));
		6184	// Perform the muls
		6185	vResult = _mm_mul_ps(vResult,V1);
		6186	// Splat y
		6187	XMVECTOR vTemp = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(1,1,1,1));
		6188	// Sub the values
		6189	vResult = _mm_sub_ss(vResult,vTemp);
		6190	// Splat the cross product
		6191	vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,0,0,0));
		6192	return vResult;
		6193	#else // _XM_VMX128_INTRINSICS_
		6194	#endif // _XM_VMX128_INTRINSICS_
		6195	}
		6196
		6197	//------------------------------------------------------------------------------
		6198
		6199	XMFINLINE XMVECTOR XMVector2LengthSq
		6200	(
		6201	FXMVECTOR V
		6202	)
		6203	{
		6204	#if defined(_XM_NO_INTRINSICS_)
		6205	return XMVector2Dot(V, V);
		6206	#elif defined(_XM_SSE_INTRINSICS_)
		6207	// Perform the dot product on x and y
		6208	XMVECTOR vLengthSq = _mm_mul_ps(V,V);
		6209	// vTemp has y splatted
		6210	XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
		6211	// x+y
		6212	vLengthSq = _mm_add_ss(vLengthSq,vTemp);
		6213	vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
		6214	return vLengthSq;
		6215	#else
		6216	return XMVector2Dot(V, V);
		6217	#endif
		6218	}
		6219
		6220	//------------------------------------------------------------------------------
		6221
		6222	XMFINLINE XMVECTOR XMVector2ReciprocalLengthEst
		6223	(
		6224	FXMVECTOR V
		6225	)
		6226	{
		6227	#if defined(_XM_NO_INTRINSICS_)
		6228
		6229	XMVECTOR Result;
		6230
		6231	Result = XMVector2LengthSq(V);
		6232	Result = XMVectorReciprocalSqrtEst(Result);
		6233
		6234	return Result;
		6235
		6236	#elif defined(_XM_SSE_INTRINSICS_)
		6237	// Perform the dot product on x and y
		6238	XMVECTOR vLengthSq = _mm_mul_ps(V,V);
		6239	// vTemp has y splatted
		6240	XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
		6241	// x+y
		6242	vLengthSq = _mm_add_ss(vLengthSq,vTemp);
		6243	vLengthSq = _mm_rsqrt_ss(vLengthSq);
		6244	vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
		6245	return vLengthSq;
		6246	#else // _XM_VMX128_INTRINSICS_
		6247	#endif // _XM_VMX128_INTRINSICS_
		6248	}
		6249
		6250	//------------------------------------------------------------------------------
		6251
		6252	XMFINLINE XMVECTOR XMVector2ReciprocalLength
		6253	(
		6254	FXMVECTOR V
		6255	)
		6256	{
		6257	#if defined(_XM_NO_INTRINSICS_)
		6258
		6259	XMVECTOR Result;
		6260
		6261	Result = XMVector2LengthSq(V);
		6262	Result = XMVectorReciprocalSqrt(Result);
		6263
		6264	return Result;
		6265
		6266	#elif defined(_XM_SSE_INTRINSICS_)
		6267	// Perform the dot product on x and y
		6268	XMVECTOR vLengthSq = _mm_mul_ps(V,V);
		6269	// vTemp has y splatted
		6270	XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
		6271	// x+y
		6272	vLengthSq = _mm_add_ss(vLengthSq,vTemp);
		6273	vLengthSq = _mm_sqrt_ss(vLengthSq);
		6274	vLengthSq = _mm_div_ss(g_XMOne,vLengthSq);
		6275	vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
		6276	return vLengthSq;
		6277	#else // _XM_VMX128_INTRINSICS_
		6278	#endif // _XM_VMX128_INTRINSICS_
		6279	}
		6280
		6281	//------------------------------------------------------------------------------
		6282
		6283	XMFINLINE XMVECTOR XMVector2LengthEst
		6284	(
		6285	FXMVECTOR V
		6286	)
		6287	{
		6288	#if defined(_XM_NO_INTRINSICS_)
		6289	XMVECTOR Result;
		6290	Result = XMVector2LengthSq(V);
		6291	Result = XMVectorSqrtEst(Result);
		6292	return Result;
		6293	#elif defined(_XM_SSE_INTRINSICS_)
		6294	// Perform the dot product on x and y
		6295	XMVECTOR vLengthSq = _mm_mul_ps(V,V);
		6296	// vTemp has y splatted
		6297	XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
		6298	// x+y
		6299	vLengthSq = _mm_add_ss(vLengthSq,vTemp);
		6300	vLengthSq = _mm_sqrt_ss(vLengthSq);
		6301	vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
		6302	return vLengthSq;
		6303	#else // _XM_VMX128_INTRINSICS_
		6304	#endif // _XM_VMX128_INTRINSICS_
		6305	}
		6306
		6307	//------------------------------------------------------------------------------
		6308
		6309	XMFINLINE XMVECTOR XMVector2Length
		6310	(
		6311	FXMVECTOR V
		6312	)
		6313	{
		6314	#if defined(_XM_NO_INTRINSICS_)
		6315
		6316	XMVECTOR Result;
		6317	Result = XMVector2LengthSq(V);
		6318	Result = XMVectorSqrt(Result);
		6319	return Result;
		6320
		6321	#elif defined(_XM_SSE_INTRINSICS_)
		6322	// Perform the dot product on x and y
		6323	XMVECTOR vLengthSq = _mm_mul_ps(V,V);
		6324	// vTemp has y splatted
		6325	XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
		6326	// x+y
		6327	vLengthSq = _mm_add_ss(vLengthSq,vTemp);
		6328	vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
		6329	vLengthSq = _mm_sqrt_ps(vLengthSq);
		6330	return vLengthSq;
		6331	#else // _XM_VMX128_INTRINSICS_
		6332	#endif // _XM_VMX128_INTRINSICS_
		6333	}
		6334
		6335	//------------------------------------------------------------------------------
		6336	// XMVector2NormalizeEst uses a reciprocal estimate and
		6337	// returns QNaN on zero and infinite vectors.
		6338
		6339	XMFINLINE XMVECTOR XMVector2NormalizeEst
		6340	(
		6341	FXMVECTOR V
		6342	)
		6343	{
		6344	#if defined(_XM_NO_INTRINSICS_)
		6345
		6346	XMVECTOR Result;
		6347	Result = XMVector2ReciprocalLength(V);
		6348	Result = XMVectorMultiply(V, Result);
		6349	return Result;
		6350
		6351	#elif defined(_XM_SSE_INTRINSICS_)
		6352	// Perform the dot product on x and y
		6353	XMVECTOR vLengthSq = _mm_mul_ps(V,V);
		6354	// vTemp has y splatted
		6355	XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
		6356	// x+y
		6357	vLengthSq = _mm_add_ss(vLengthSq,vTemp);
		6358	vLengthSq = _mm_rsqrt_ss(vLengthSq);
		6359	vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
		6360	vLengthSq = _mm_mul_ps(vLengthSq,V);
		6361	return vLengthSq;
		6362	#else // _XM_VMX128_INTRINSICS_
		6363	#endif // _XM_VMX128_INTRINSICS_
		6364	}
		6365
		6366	//------------------------------------------------------------------------------
		6367
		6368	XMFINLINE XMVECTOR XMVector2Normalize
		6369	(
		6370	FXMVECTOR V
		6371	)
		6372	{
		6373	#if defined(_XM_NO_INTRINSICS_)
		6374
		6375	XMVECTOR LengthSq;
		6376	XMVECTOR Zero;
		6377	XMVECTOR InfiniteLength;
		6378	XMVECTOR ZeroLength;
		6379	XMVECTOR Select;
		6380	XMVECTOR Result;
		6381
		6382	LengthSq = XMVector2LengthSq(V);
		6383	Zero = XMVectorZero();
		6384	Result = XMVectorReciprocalSqrt(LengthSq);
		6385	InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity.v);
		6386	ZeroLength = XMVectorEqual(LengthSq, Zero);
		6387	Result = XMVectorMultiply(V, Result);
		6388	Select = XMVectorEqualInt(InfiniteLength, ZeroLength);
		6389	Result = XMVectorSelect(LengthSq, Result, Select);
		6390
		6391	return Result;
		6392
		6393	#elif defined(_XM_SSE_INTRINSICS_)
		6394	// Perform the dot product on x and y only
		6395	XMVECTOR vLengthSq = _mm_mul_ps(V,V);
		6396	XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
		6397	vLengthSq = _mm_add_ss(vLengthSq,vTemp);
		6398	vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
		6399	// Prepare for the division
		6400	XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
		6401	// Failsafe on zero (Or epsilon) length planes
		6402	// If the length is infinity, set the elements to zero
		6403	vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
		6404	// Reciprocal mul to perform the normalization
		6405	vResult = _mm_div_ps(V,vResult);
		6406	// Any that are infinity, set to zero
		6407	vResult = _mm_and_ps(vResult,vLengthSq);
		6408	return vResult;
		6409	#else // _XM_VMX128_INTRINSICS_
		6410	#endif // _XM_VMX128_INTRINSICS_
		6411	}
		6412
		6413	//------------------------------------------------------------------------------
		6414
		6415	XMFINLINE XMVECTOR XMVector2ClampLength
		6416	(
		6417	FXMVECTOR V,
		6418	FLOAT LengthMin,
		6419	FLOAT LengthMax
		6420	)
		6421	{
		6422	#if defined(_XM_NO_INTRINSICS_)
		6423
		6424	XMVECTOR ClampMax;
		6425	XMVECTOR ClampMin;
		6426
		6427	ClampMax = XMVectorReplicate(LengthMax);
		6428	ClampMin = XMVectorReplicate(LengthMin);
		6429
		6430	return XMVector2ClampLengthV(V, ClampMin, ClampMax);
		6431
		6432	#elif defined(_XM_SSE_INTRINSICS_)
		6433	XMVECTOR ClampMax = _mm_set_ps1(LengthMax);
		6434	XMVECTOR ClampMin = _mm_set_ps1(LengthMin);
		6435	return XMVector2ClampLengthV(V, ClampMin, ClampMax);
		6436	#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
		6437	#endif // _XM_VMX128_INTRINSICS_
		6438	}
		6439
		6440	//------------------------------------------------------------------------------
		6441
		6442	XMFINLINE XMVECTOR XMVector2ClampLengthV
		6443	(
		6444	FXMVECTOR V,
		6445	FXMVECTOR LengthMin,
		6446	FXMVECTOR LengthMax
		6447	)
		6448	{
		6449	#if defined(_XM_NO_INTRINSICS_)
		6450
		6451	XMVECTOR ClampLength;
		6452	XMVECTOR LengthSq;
		6453	XMVECTOR RcpLength;
		6454	XMVECTOR Length;
		6455	XMVECTOR Normal;
		6456	XMVECTOR Zero;
		6457	XMVECTOR InfiniteLength;
		6458	XMVECTOR ZeroLength;
		6459	XMVECTOR Select;
		6460	XMVECTOR ControlMax;
		6461	XMVECTOR ControlMin;
		6462	XMVECTOR Control;
		6463	XMVECTOR Result;
		6464
		6465	XMASSERT((LengthMin.vector4_f32[1] == LengthMin.vector4_f32[0]));
		6466	XMASSERT((LengthMax.vector4_f32[1] == LengthMax.vector4_f32[0]));
		6467	XMASSERT(XMVector2GreaterOrEqual(LengthMin, XMVectorZero()));
		6468	XMASSERT(XMVector2GreaterOrEqual(LengthMax, XMVectorZero()));
		6469	XMASSERT(XMVector2GreaterOrEqual(LengthMax, LengthMin));
		6470
		6471	LengthSq = XMVector2LengthSq(V);
		6472
		6473	Zero = XMVectorZero();
		6474
		6475	RcpLength = XMVectorReciprocalSqrt(LengthSq);
		6476
		6477	InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity.v);
		6478	ZeroLength = XMVectorEqual(LengthSq, Zero);
		6479
		6480	Length = XMVectorMultiply(LengthSq, RcpLength);
		6481
		6482	Normal = XMVectorMultiply(V, RcpLength);
		6483
		6484	Select = XMVectorEqualInt(InfiniteLength, ZeroLength);
		6485	Length = XMVectorSelect(LengthSq, Length, Select);
		6486	Normal = XMVectorSelect(LengthSq, Normal, Select);
		6487
		6488	ControlMax = XMVectorGreater(Length, LengthMax);
		6489	ControlMin = XMVectorLess(Length, LengthMin);
		6490
		6491	ClampLength = XMVectorSelect(Length, LengthMax, ControlMax);
		6492	ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin);
		6493
		6494	Result = XMVectorMultiply(Normal, ClampLength);
		6495
		6496	// Preserve the original vector (with no precision loss) if the length falls within the given range
		6497	Control = XMVectorEqualInt(ControlMax, ControlMin);
		6498	Result = XMVectorSelect(Result, V, Control);
		6499
		6500	return Result;
		6501
		6502	#elif defined(_XM_SSE_INTRINSICS_)
		6503	XMVECTOR ClampLength;
		6504	XMVECTOR LengthSq;
		6505	XMVECTOR RcpLength;
		6506	XMVECTOR Length;
		6507	XMVECTOR Normal;
		6508	XMVECTOR InfiniteLength;
		6509	XMVECTOR ZeroLength;
		6510	XMVECTOR Select;
		6511	XMVECTOR ControlMax;
		6512	XMVECTOR ControlMin;
		6513	XMVECTOR Control;
		6514	XMVECTOR Result;
		6515
		6516	XMASSERT((XMVectorGetY(LengthMin) == XMVectorGetX(LengthMin)));
		6517	XMASSERT((XMVectorGetY(LengthMax) == XMVectorGetX(LengthMax)));
		6518	XMASSERT(XMVector2GreaterOrEqual(LengthMin, g_XMZero));
		6519	XMASSERT(XMVector2GreaterOrEqual(LengthMax, g_XMZero));
		6520	XMASSERT(XMVector2GreaterOrEqual(LengthMax, LengthMin));
		6521	LengthSq = XMVector2LengthSq(V);
		6522	RcpLength = XMVectorReciprocalSqrt(LengthSq);
		6523	InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity);
		6524	ZeroLength = XMVectorEqual(LengthSq, g_XMZero);
		6525	Length = _mm_mul_ps(LengthSq, RcpLength);
		6526	Normal = _mm_mul_ps(V, RcpLength);
		6527	Select = XMVectorEqualInt(InfiniteLength, ZeroLength);
		6528	Length = XMVectorSelect(LengthSq, Length, Select);
		6529	Normal = XMVectorSelect(LengthSq, Normal, Select);
		6530	ControlMax = XMVectorGreater(Length, LengthMax);
		6531	ControlMin = XMVectorLess(Length, LengthMin);
		6532	ClampLength = XMVectorSelect(Length, LengthMax, ControlMax);
		6533	ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin);
		6534	Result = _mm_mul_ps(Normal, ClampLength);
		6535	// Preserve the original vector (with no precision loss) if the length falls within the given range
		6536	Control = XMVectorEqualInt(ControlMax, ControlMin);
		6537	Result = XMVectorSelect(Result, V, Control);
		6538	return Result;
		6539	#else // _XM_VMX128_INTRINSICS_
		6540	#endif // _XM_VMX128_INTRINSICS_
		6541	}
		6542
		6543	//------------------------------------------------------------------------------
		6544
		6545	XMFINLINE XMVECTOR XMVector2Reflect
		6546	(
		6547	FXMVECTOR Incident,
		6548	FXMVECTOR Normal
		6549	)
		6550	{
		6551	#if defined(_XM_NO_INTRINSICS_)
		6552
		6553	XMVECTOR Result;
		6554
		6555	// Result = Incident - (2 * dot(Incident, Normal)) * Normal
		6556	Result = XMVector2Dot(Incident, Normal);
		6557	Result = XMVectorAdd(Result, Result);
		6558	Result = XMVectorNegativeMultiplySubtract(Result, Normal, Incident);
		6559
		6560	return Result;
		6561
		6562	#elif defined(_XM_SSE_INTRINSICS_)
		6563	// Result = Incident - (2 * dot(Incident, Normal)) * Normal
		6564	XMVECTOR Result = XMVector2Dot(Incident,Normal);
		6565	Result = _mm_add_ps(Result, Result);
		6566	Result = _mm_mul_ps(Result, Normal);
		6567	Result = _mm_sub_ps(Incident,Result);
		6568	return Result;
		6569	#else // _XM_VMX128_INTRINSICS_
		6570	#endif // _XM_VMX128_INTRINSICS_
		6571	}
		6572
		6573	//------------------------------------------------------------------------------
		6574
		6575	XMFINLINE XMVECTOR XMVector2Refract
		6576	(
		6577	FXMVECTOR Incident,
		6578	FXMVECTOR Normal,
		6579	FLOAT RefractionIndex
		6580	)
		6581	{
		6582	#if defined(_XM_NO_INTRINSICS_)
		6583	XMVECTOR Index;
		6584	Index = XMVectorReplicate(RefractionIndex);
		6585	return XMVector2RefractV(Incident, Normal, Index);
		6586
		6587	#elif defined(_XM_SSE_INTRINSICS_)
		6588	XMVECTOR Index = _mm_set_ps1(RefractionIndex);
		6589	return XMVector2RefractV(Incident,Normal,Index);
		6590	#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
		6591	#endif // _XM_VMX128_INTRINSICS_
		6592	}
		6593
		6594	//------------------------------------------------------------------------------
		6595
		6596	// Return the refraction of a 2D vector
		6597	XMFINLINE XMVECTOR XMVector2RefractV
		6598	(
		6599	FXMVECTOR Incident,
		6600	FXMVECTOR Normal,
		6601	FXMVECTOR RefractionIndex
		6602	)
		6603	{
		6604	#if defined(_XM_NO_INTRINSICS_)
		6605	float IDotN;
		6606	float RX,RY;
		6607	XMVECTOR vResult;
		6608	// Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) +
		6609	// sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal))))
		6610	IDotN = (Incident.vector4_f32[0]Normal.vector4_f32[0])+(Incident.vector4_f32[1]Normal.vector4_f32[1]);
		6611	// R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN)
		6612	RY = 1.0f-(IDotN*IDotN);
		6613	RX = 1.0f-(RYRefractionIndex.vector4_f32[0]RefractionIndex.vector4_f32[0]);
		6614	RY = 1.0f-(RYRefractionIndex.vector4_f32[1]RefractionIndex.vector4_f32[1]);
		6615	if (RX>=0.0f) {
		6616	RX = (RefractionIndex.vector4_f32[0]Incident.vector4_f32[0])-(Normal.vector4_f32[0]((RefractionIndex.vector4_f32[0]*IDotN)+sqrtf(RX)));
		6617	} else {
		6618	RX = 0.0f;
		6619	}
		6620	if (RY>=0.0f) {
		6621	RY = (RefractionIndex.vector4_f32[1]Incident.vector4_f32[1])-(Normal.vector4_f32[1]((RefractionIndex.vector4_f32[1]*IDotN)+sqrtf(RY)));
		6622	} else {
		6623	RY = 0.0f;
		6624	}
		6625	vResult.vector4_f32[0] = RX;
		6626	vResult.vector4_f32[1] = RY;
		6627	vResult.vector4_f32[2] = 0.0f;
		6628	vResult.vector4_f32[3] = 0.0f;
		6629	return vResult;
		6630	#elif defined(_XM_SSE_INTRINSICS_)
		6631	// Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) +
		6632	// sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal))))
		6633	// Get the 2D Dot product of Incident-Normal
		6634	XMVECTOR IDotN = _mm_mul_ps(Incident,Normal);
		6635	XMVECTOR vTemp = _mm_shuffle_ps(IDotN,IDotN,_MM_SHUFFLE(1,1,1,1));
		6636	IDotN = _mm_add_ss(IDotN,vTemp);
		6637	IDotN = _mm_shuffle_ps(IDotN,IDotN,_MM_SHUFFLE(0,0,0,0));
		6638	// vTemp = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN)
		6639	vTemp = _mm_mul_ps(IDotN,IDotN);
		6640	vTemp = _mm_sub_ps(g_XMOne,vTemp);
		6641	vTemp = _mm_mul_ps(vTemp,RefractionIndex);
		6642	vTemp = _mm_mul_ps(vTemp,RefractionIndex);
		6643	vTemp = _mm_sub_ps(g_XMOne,vTemp);
		6644	// If any terms are <=0, sqrt() will fail, punt to zero
		6645	XMVECTOR vMask = _mm_cmpgt_ps(vTemp,g_XMZero);
		6646	// R = RefractionIndex * IDotN + sqrt(R)
		6647	vTemp = _mm_sqrt_ps(vTemp);
		6648	XMVECTOR vResult = _mm_mul_ps(RefractionIndex,IDotN);
		6649	vTemp = _mm_add_ps(vTemp,vResult);
		6650	// Result = RefractionIndex * Incident - Normal * R
		6651	vResult = _mm_mul_ps(RefractionIndex,Incident);
		6652	vTemp = _mm_mul_ps(vTemp,Normal);
		6653	vResult = _mm_sub_ps(vResult,vTemp);
		6654	vResult = _mm_and_ps(vResult,vMask);
		6655	return vResult;
		6656	#else // _XM_VMX128_INTRINSICS_
		6657	#endif // _XM_VMX128_INTRINSICS_
		6658	}
		6659
		6660	//------------------------------------------------------------------------------
		6661
		6662	XMFINLINE XMVECTOR XMVector2Orthogonal
		6663	(
		6664	FXMVECTOR V
		6665	)
		6666	{
		6667	#if defined(_XM_NO_INTRINSICS_)
		6668
		6669	XMVECTOR Result;
		6670
		6671	Result.vector4_f32[0] = -V.vector4_f32[1];
		6672	Result.vector4_f32[1] = V.vector4_f32[0];
		6673
		6674	return Result;
		6675
		6676	#elif defined(_XM_SSE_INTRINSICS_)
		6677	XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,2,0,1));
		6678	vResult = _mm_mul_ps(vResult,g_XMNegateX);
		6679	return vResult;
		6680	#else // _XM_VMX128_INTRINSICS_
		6681	#endif // _XM_VMX128_INTRINSICS_
		6682	}
		6683
		6684	//------------------------------------------------------------------------------
		6685
		6686	XMFINLINE XMVECTOR XMVector2AngleBetweenNormalsEst
		6687	(
		6688	FXMVECTOR N1,
		6689	FXMVECTOR N2
		6690	)
		6691	{
		6692	#if defined(_XM_NO_INTRINSICS_)
		6693
		6694	XMVECTOR NegativeOne;
		6695	XMVECTOR One;
		6696	XMVECTOR Result;
		6697
		6698	Result = XMVector2Dot(N1, N2);
		6699	NegativeOne = XMVectorSplatConstant(-1, 0);
		6700	One = XMVectorSplatOne();
		6701	Result = XMVectorClamp(Result, NegativeOne, One);
		6702	Result = XMVectorACosEst(Result);
		6703
		6704	return Result;
		6705
		6706	#elif defined(_XM_SSE_INTRINSICS_)
		6707	XMVECTOR vResult = XMVector2Dot(N1,N2);
		6708	// Clamp to -1.0f to 1.0f
		6709	vResult = _mm_max_ps(vResult,g_XMNegativeOne);
		6710	vResult = _mm_min_ps(vResult,g_XMOne);;
		6711	vResult = XMVectorACosEst(vResult);
		6712	return vResult;
		6713	#else // _XM_VMX128_INTRINSICS_
		6714	#endif // _XM_VMX128_INTRINSICS_
		6715	}
		6716
		6717	//------------------------------------------------------------------------------
		6718
		6719	XMFINLINE XMVECTOR XMVector2AngleBetweenNormals
		6720	(
		6721	FXMVECTOR N1,
		6722	FXMVECTOR N2
		6723	)
		6724	{
		6725	#if defined(_XM_NO_INTRINSICS_)
		6726
		6727	XMVECTOR NegativeOne;
		6728	XMVECTOR One;
		6729	XMVECTOR Result;
		6730
		6731	Result = XMVector2Dot(N1, N2);
		6732	NegativeOne = XMVectorSplatConstant(-1, 0);
		6733	One = XMVectorSplatOne();
		6734	Result = XMVectorClamp(Result, NegativeOne, One);
		6735	Result = XMVectorACos(Result);
		6736
		6737	return Result;
		6738
		6739	#elif defined(_XM_SSE_INTRINSICS_)
		6740	XMVECTOR vResult = XMVector2Dot(N1,N2);
		6741	// Clamp to -1.0f to 1.0f
		6742	vResult = _mm_max_ps(vResult,g_XMNegativeOne);
		6743	vResult = _mm_min_ps(vResult,g_XMOne);;
		6744	vResult = XMVectorACos(vResult);
		6745	return vResult;
		6746	#else // _XM_VMX128_INTRINSICS_
		6747	#endif // _XM_VMX128_INTRINSICS_
		6748	}
		6749
		6750	//------------------------------------------------------------------------------
		6751
		6752	XMFINLINE XMVECTOR XMVector2AngleBetweenVectors
		6753	(
		6754	FXMVECTOR V1,
		6755	FXMVECTOR V2
		6756	)
		6757	{
		6758	#if defined(_XM_NO_INTRINSICS_)
		6759
		6760	XMVECTOR L1;
		6761	XMVECTOR L2;
		6762	XMVECTOR Dot;
		6763	XMVECTOR CosAngle;
		6764	XMVECTOR NegativeOne;
		6765	XMVECTOR One;
		6766	XMVECTOR Result;
		6767
		6768	L1 = XMVector2ReciprocalLength(V1);
		6769	L2 = XMVector2ReciprocalLength(V2);
		6770
		6771	Dot = XMVector2Dot(V1, V2);
		6772
		6773	L1 = XMVectorMultiply(L1, L2);
		6774
		6775	CosAngle = XMVectorMultiply(Dot, L1);
		6776	NegativeOne = XMVectorSplatConstant(-1, 0);
		6777	One = XMVectorSplatOne();
		6778	CosAngle = XMVectorClamp(CosAngle, NegativeOne, One);
		6779
		6780	Result = XMVectorACos(CosAngle);
		6781
		6782	return Result;
		6783
		6784	#elif defined(_XM_SSE_INTRINSICS_)
		6785	XMVECTOR L1;
		6786	XMVECTOR L2;
		6787	XMVECTOR Dot;
		6788	XMVECTOR CosAngle;
		6789	XMVECTOR Result;
		6790	L1 = XMVector2ReciprocalLength(V1);
		6791	L2 = XMVector2ReciprocalLength(V2);
		6792	Dot = XMVector2Dot(V1, V2);
		6793	L1 = _mm_mul_ps(L1, L2);
		6794	CosAngle = _mm_mul_ps(Dot, L1);
		6795	CosAngle = XMVectorClamp(CosAngle, g_XMNegativeOne,g_XMOne);
		6796	Result = XMVectorACos(CosAngle);
		6797	return Result;
		6798	#else // _XM_VMX128_INTRINSICS_
		6799	#endif // _XM_VMX128_INTRINSICS_
		6800	}
		6801
		6802	//------------------------------------------------------------------------------
		6803
		6804	XMFINLINE XMVECTOR XMVector2LinePointDistance
		6805	(
		6806	FXMVECTOR LinePoint1,
		6807	FXMVECTOR LinePoint2,
		6808	FXMVECTOR Point
		6809	)
		6810	{
		6811	#if defined(_XM_NO_INTRINSICS_)
		6812
		6813	XMVECTOR PointVector;
		6814	XMVECTOR LineVector;
		6815	XMVECTOR ReciprocalLengthSq;
		6816	XMVECTOR PointProjectionScale;
		6817	XMVECTOR DistanceVector;
		6818	XMVECTOR Result;
		6819
		6820	// Given a vector PointVector from LinePoint1 to Point and a vector
		6821	// LineVector from LinePoint1 to LinePoint2, the scaled distance
		6822	// PointProjectionScale from LinePoint1 to the perpendicular projection
		6823	// of PointVector onto the line is defined as:
		6824	//
		6825	// PointProjectionScale = dot(PointVector, LineVector) / LengthSq(LineVector)
		6826
		6827	PointVector = XMVectorSubtract(Point, LinePoint1);
		6828	LineVector = XMVectorSubtract(LinePoint2, LinePoint1);
		6829
		6830	ReciprocalLengthSq = XMVector2LengthSq(LineVector);
		6831	ReciprocalLengthSq = XMVectorReciprocal(ReciprocalLengthSq);
		6832
		6833	PointProjectionScale = XMVector2Dot(PointVector, LineVector);
		6834	PointProjectionScale = XMVectorMultiply(PointProjectionScale, ReciprocalLengthSq);
		6835
		6836	DistanceVector = XMVectorMultiply(LineVector, PointProjectionScale);
		6837	DistanceVector = XMVectorSubtract(PointVector, DistanceVector);
		6838
		6839	Result = XMVector2Length(DistanceVector);
		6840
		6841	return Result;
		6842
		6843	#elif defined(_XM_SSE_INTRINSICS_)
		6844	XMVECTOR PointVector = _mm_sub_ps(Point,LinePoint1);
		6845	XMVECTOR LineVector = _mm_sub_ps(LinePoint2,LinePoint1);
		6846	XMVECTOR ReciprocalLengthSq = XMVector2LengthSq(LineVector);
		6847	XMVECTOR vResult = XMVector2Dot(PointVector,LineVector);
		6848	vResult = _mm_div_ps(vResult,ReciprocalLengthSq);
		6849	vResult = _mm_mul_ps(vResult,LineVector);
		6850	vResult = _mm_sub_ps(PointVector,vResult);
		6851	vResult = XMVector2Length(vResult);
		6852	return vResult;
		6853	#else // _XM_VMX128_INTRINSICS_
		6854	#endif // _XM_VMX128_INTRINSICS_
		6855	}
		6856
		6857	//------------------------------------------------------------------------------
		6858
		6859	XMFINLINE XMVECTOR XMVector2IntersectLine
		6860	(
		6861	FXMVECTOR Line1Point1,
		6862	FXMVECTOR Line1Point2,
		6863	FXMVECTOR Line2Point1,
		6864	CXMVECTOR Line2Point2
		6865	)
		6866	{
		6867	#if defined(_XM_NO_INTRINSICS_)
		6868
		6869	XMVECTOR V1;
		6870	XMVECTOR V2;
		6871	XMVECTOR V3;
		6872	XMVECTOR C1;
		6873	XMVECTOR C2;
		6874	XMVECTOR Result;
		6875	CONST XMVECTOR Zero = XMVectorZero();
		6876
		6877	V1 = XMVectorSubtract(Line1Point2, Line1Point1);
		6878	V2 = XMVectorSubtract(Line2Point2, Line2Point1);
		6879	V3 = XMVectorSubtract(Line1Point1, Line2Point1);
		6880
		6881	C1 = XMVector2Cross(V1, V2);
		6882	C2 = XMVector2Cross(V2, V3);
		6883
		6884	if (XMVector2NearEqual(C1, Zero, g_XMEpsilon.v))
		6885	{
		6886	if (XMVector2NearEqual(C2, Zero, g_XMEpsilon.v))
		6887	{
		6888	// Coincident
		6889	Result = g_XMInfinity.v;
		6890	}
		6891	else
		6892	{
		6893	// Parallel
		6894	Result = g_XMQNaN.v;
		6895	}
		6896	}
		6897	else
		6898	{
		6899	// Intersection point = Line1Point1 + V1 * (C2 / C1)
		6900	XMVECTOR Scale;
		6901	Scale = XMVectorReciprocal(C1);
		6902	Scale = XMVectorMultiply(C2, Scale);
		6903	Result = XMVectorMultiplyAdd(V1, Scale, Line1Point1);
		6904	}
		6905
		6906	return Result;
		6907
		6908	#elif defined(_XM_SSE_INTRINSICS_)
		6909	XMVECTOR V1 = _mm_sub_ps(Line1Point2, Line1Point1);
		6910	XMVECTOR V2 = _mm_sub_ps(Line2Point2, Line2Point1);
		6911	XMVECTOR V3 = _mm_sub_ps(Line1Point1, Line2Point1);
		6912	// Generate the cross products
		6913	XMVECTOR C1 = XMVector2Cross(V1, V2);
		6914	XMVECTOR C2 = XMVector2Cross(V2, V3);
		6915	// If C1 is not close to epsilon, use the calculated value
		6916	XMVECTOR vResultMask = _mm_setzero_ps();
		6917	vResultMask = _mm_sub_ps(vResultMask,C1);
		6918	vResultMask = _mm_max_ps(vResultMask,C1);
		6919	// 0xFFFFFFFF if the calculated value is to be used
		6920	vResultMask = _mm_cmpgt_ps(vResultMask,g_XMEpsilon);
		6921	// If C1 is close to epsilon, which fail type is it? INFINITY or NAN?
		6922	XMVECTOR vFailMask = _mm_setzero_ps();
		6923	vFailMask = _mm_sub_ps(vFailMask,C2);
		6924	vFailMask = _mm_max_ps(vFailMask,C2);
		6925	vFailMask = _mm_cmple_ps(vFailMask,g_XMEpsilon);
		6926	XMVECTOR vFail = _mm_and_ps(vFailMask,g_XMInfinity);
		6927	vFailMask = _mm_andnot_ps(vFailMask,g_XMQNaN);
		6928	// vFail is NAN or INF
		6929	vFail = _mm_or_ps(vFail,vFailMask);
		6930	// Intersection point = Line1Point1 + V1 * (C2 / C1)
		6931	XMVECTOR vResult = _mm_div_ps(C2,C1);
		6932	vResult = _mm_mul_ps(vResult,V1);
		6933	vResult = _mm_add_ps(vResult,Line1Point1);
		6934	// Use result, or failure value
		6935	vResult = _mm_and_ps(vResult,vResultMask);
		6936	vResultMask = _mm_andnot_ps(vResultMask,vFail);
		6937	vResult = _mm_or_ps(vResult,vResultMask);
		6938	return vResult;
		6939	#else // _XM_VMX128_INTRINSICS_
		6940	#endif // _XM_VMX128_INTRINSICS_
		6941	}
		6942
		6943	//------------------------------------------------------------------------------
		6944
		6945	XMFINLINE XMVECTOR XMVector2Transform
		6946	(
		6947	FXMVECTOR V,
		6948	CXMMATRIX M
		6949	)
		6950	{
		6951	#if defined(_XM_NO_INTRINSICS_)
		6952
		6953	XMVECTOR X;
		6954	XMVECTOR Y;
		6955	XMVECTOR Result;
		6956
		6957	Y = XMVectorSplatY(V);
		6958	X = XMVectorSplatX(V);
		6959
		6960	Result = XMVectorMultiplyAdd(Y, M.r[1], M.r[3]);
		6961	Result = XMVectorMultiplyAdd(X, M.r[0], Result);
		6962
		6963	return Result;
		6964
		6965	#elif defined(_XM_SSE_INTRINSICS_)
		6966	XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0));
		6967	vResult = _mm_mul_ps(vResult,M.r[0]);
		6968	XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
		6969	vTemp = _mm_mul_ps(vTemp,M.r[1]);
		6970	vResult = _mm_add_ps(vResult,vTemp);
		6971	vResult = _mm_add_ps(vResult,M.r[3]);
		6972	return vResult;
		6973	#else // _XM_VMX128_INTRINSICS_
		6974	#endif // _XM_VMX128_INTRINSICS_
		6975	}
		6976
		6977	//------------------------------------------------------------------------------
		6978
		6979	XMINLINE XMFLOAT4* XMVector2TransformStream
		6980	(
		6981	XMFLOAT4* pOutputStream,
		6982	UINT OutputStride,
		6983	CONST XMFLOAT2* pInputStream,
		6984	UINT InputStride,
		6985	UINT VectorCount,
		6986	CXMMATRIX M
		6987	)
		6988	{
		6989	#if defined(_XM_NO_INTRINSICS_)
		6990
		6991	XMVECTOR V;
		6992	XMVECTOR X;
		6993	XMVECTOR Y;
		6994	XMVECTOR Result;
		6995	UINT i;
		6996	BYTE* pInputVector = (BYTE*)pInputStream;
		6997	BYTE* pOutputVector = (BYTE*)pOutputStream;
		6998
		6999	XMASSERT(pOutputStream);
		7000	XMASSERT(pInputStream);
		7001
		7002	for (i = 0; i < VectorCount; i++)
		7003	{
		7004	V = XMLoadFloat2((XMFLOAT2*)pInputVector);
		7005	Y = XMVectorSplatY(V);
		7006	X = XMVectorSplatX(V);
		7007	// Y = XMVectorReplicate(((XMFLOAT2*)pInputVector)->y);
		7008	// X = XMVectorReplicate(((XMFLOAT2*)pInputVector)->x);
		7009
		7010	Result = XMVectorMultiplyAdd(Y, M.r[1], M.r[3]);
		7011	Result = XMVectorMultiplyAdd(X, M.r[0], Result);
		7012
		7013	XMStoreFloat4((XMFLOAT4*)pOutputVector, Result);
		7014
		7015	pInputVector += InputStride;
		7016	pOutputVector += OutputStride;
		7017	}
		7018
		7019	return pOutputStream;
		7020
		7021	#elif defined(_XM_SSE_INTRINSICS_)
		7022	XMASSERT(pOutputStream);
		7023	XMASSERT(pInputStream);
		7024	UINT i;
		7025	const BYTE* pInputVector = (const BYTE*)pInputStream;
		7026	BYTE* pOutputVector = (BYTE*)pOutputStream;
		7027
		7028	for (i = 0; i < VectorCount; i++)
		7029	{
		7030	XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2*>(pInputVector)->x);
		7031	XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2*>(pInputVector)->y);
		7032	vResult = _mm_mul_ps(vResult,M.r[1]);
		7033	vResult = _mm_add_ps(vResult,M.r[3]);
		7034	X = _mm_mul_ps(X,M.r[0]);
		7035	vResult = _mm_add_ps(vResult,X);
		7036	_mm_storeu_ps(reinterpret_cast<float*>(pOutputVector),vResult);
		7037	pInputVector += InputStride;
		7038	pOutputVector += OutputStride;
		7039	}
		7040	return pOutputStream;
		7041	#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
		7042	#endif // _XM_VMX128_INTRINSICS_
		7043	}
		7044
		7045	//------------------------------------------------------------------------------
		7046
		7047	XMINLINE XMFLOAT4* XMVector2TransformStreamNC
		7048	(
		7049	XMFLOAT4* pOutputStream,
		7050	UINT OutputStride,
		7051	CONST XMFLOAT2* pInputStream,
		7052	UINT InputStride,
		7053	UINT VectorCount,
		7054	CXMMATRIX M
		7055	)
		7056	{
		7057	#if defined(_XM_NO_INTRINSICS_) \|\| defined(XM_NO_MISALIGNED_VECTOR_ACCESS) \|\| defined(_XM_SSE_INTRINSICS_)
		7058	return XMVector2TransformStream( pOutputStream, OutputStride, pInputStream, InputStride, VectorCount, M );
		7059	#else // _XM_VMX128_INTRINSICS_
		7060	#endif // _XM_VMX128_INTRINSICS_
		7061	}
		7062
		7063	//------------------------------------------------------------------------------
		7064
		7065	XMFINLINE XMVECTOR XMVector2TransformCoord
		7066	(
		7067	FXMVECTOR V,
		7068	CXMMATRIX M
		7069	)
		7070	{
		7071	#if defined(_XM_NO_INTRINSICS_)
		7072
		7073	XMVECTOR X;
		7074	XMVECTOR Y;
		7075	XMVECTOR InverseW;
		7076	XMVECTOR Result;
		7077
		7078	Y = XMVectorSplatY(V);
		7079	X = XMVectorSplatX(V);
		7080
		7081	Result = XMVectorMultiplyAdd(Y, M.r[1], M.r[3]);
		7082	Result = XMVectorMultiplyAdd(X, M.r[0], Result);
		7083
		7084	InverseW = XMVectorSplatW(Result);
		7085	InverseW = XMVectorReciprocal(InverseW);
		7086
		7087	Result = XMVectorMultiply(Result, InverseW);
		7088
		7089	return Result;
		7090
		7091	#elif defined(_XM_SSE_INTRINSICS_)
		7092	XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0));
		7093	vResult = _mm_mul_ps(vResult,M.r[0]);
		7094	XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
		7095	vTemp = _mm_mul_ps(vTemp,M.r[1]);
		7096	vResult = _mm_add_ps(vResult,vTemp);
		7097	vResult = _mm_add_ps(vResult,M.r[3]);
		7098	vTemp = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,3,3,3));
		7099	vResult = _mm_div_ps(vResult,vTemp);
		7100	return vResult;
		7101	#else // _XM_VMX128_INTRINSICS_
		7102	#endif // _XM_VMX128_INTRINSICS_
		7103	}
		7104
		7105	//------------------------------------------------------------------------------
		7106
		7107	XMINLINE XMFLOAT2* XMVector2TransformCoordStream
		7108	(
		7109	XMFLOAT2* pOutputStream,
		7110	UINT OutputStride,
		7111	CONST XMFLOAT2* pInputStream,
		7112	UINT InputStride,
		7113	UINT VectorCount,
		7114	CXMMATRIX M
		7115	)
		7116	{
		7117	#if defined(_XM_NO_INTRINSICS_)
		7118
		7119	XMVECTOR V;
		7120	XMVECTOR X;
		7121	XMVECTOR Y;
		7122	XMVECTOR InverseW;
		7123	XMVECTOR Result;
		7124	UINT i;
		7125	BYTE* pInputVector = (BYTE*)pInputStream;
		7126	BYTE* pOutputVector = (BYTE*)pOutputStream;
		7127
		7128	XMASSERT(pOutputStream);
		7129	XMASSERT(pInputStream);
		7130
		7131	for (i = 0; i < VectorCount; i++)
		7132	{
		7133	V = XMLoadFloat2((XMFLOAT2*)pInputVector);
		7134	Y = XMVectorSplatY(V);
		7135	X = XMVectorSplatX(V);
		7136	// Y = XMVectorReplicate(((XMFLOAT2*)pInputVector)->y);
		7137	// X = XMVectorReplicate(((XMFLOAT2*)pInputVector)->x);
		7138
		7139	Result = XMVectorMultiplyAdd(Y, M.r[1], M.r[3]);
		7140	Result = XMVectorMultiplyAdd(X, M.r[0], Result);
		7141
		7142	InverseW = XMVectorSplatW(Result);
		7143	InverseW = XMVectorReciprocal(InverseW);
		7144
		7145	Result = XMVectorMultiply(Result, InverseW);
		7146
		7147	XMStoreFloat2((XMFLOAT2*)pOutputVector, Result);
		7148
		7149	pInputVector += InputStride;
		7150	pOutputVector += OutputStride;
		7151	}
		7152
		7153	return pOutputStream;
		7154
		7155	#elif defined(_XM_SSE_INTRINSICS_)
		7156	XMASSERT(pOutputStream);
		7157	XMASSERT(pInputStream);
		7158	UINT i;
		7159	const BYTE pInputVector = (BYTE)pInputStream;
		7160	BYTE pOutputVector = (BYTE)pOutputStream;
		7161
		7162	for (i = 0; i < VectorCount; i++)
		7163	{
		7164	XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2*>(pInputVector)->x);
		7165	XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2*>(pInputVector)->y);
		7166	vResult = _mm_mul_ps(vResult,M.r[1]);
		7167	vResult = _mm_add_ps(vResult,M.r[3]);
		7168	X = _mm_mul_ps(X,M.r[0]);
		7169	vResult = _mm_add_ps(vResult,X);
		7170	X = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,3,3,3));
		7171	vResult = _mm_div_ps(vResult,X);
		7172	_mm_store_sd(reinterpret_cast<double >(pOutputVector),reinterpret_cast<__m128d >(&vResult)[0]);
		7173	pInputVector += InputStride;
		7174	pOutputVector += OutputStride;
		7175	}
		7176	return pOutputStream;
		7177	#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
		7178	#endif // _XM_VMX128_INTRINSICS_
		7179	}
		7180
		7181	//------------------------------------------------------------------------------
		7182
		7183	XMFINLINE XMVECTOR XMVector2TransformNormal
		7184	(
		7185	FXMVECTOR V,
		7186	CXMMATRIX M
		7187	)
		7188	{
		7189	#if defined(_XM_NO_INTRINSICS_)
		7190
		7191	XMVECTOR X;
		7192	XMVECTOR Y;
		7193	XMVECTOR Result;
		7194
		7195	Y = XMVectorSplatY(V);
		7196	X = XMVectorSplatX(V);
		7197
		7198	Result = XMVectorMultiply(Y, M.r[1]);
		7199	Result = XMVectorMultiplyAdd(X, M.r[0], Result);
		7200
		7201	return Result;
		7202
		7203	#elif defined(_XM_SSE_INTRINSICS_)
		7204	XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0));
		7205	vResult = _mm_mul_ps(vResult,M.r[0]);
		7206	XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
		7207	vTemp = _mm_mul_ps(vTemp,M.r[1]);
		7208	vResult = _mm_add_ps(vResult,vTemp);
		7209	return vResult;
		7210	#else // _XM_VMX128_INTRINSICS_
		7211	#endif // _XM_VMX128_INTRINSICS_
		7212	}
		7213
		7214	//------------------------------------------------------------------------------
		7215
		7216	XMINLINE XMFLOAT2* XMVector2TransformNormalStream
		7217	(
		7218	XMFLOAT2* pOutputStream,
		7219	UINT OutputStride,
		7220	CONST XMFLOAT2* pInputStream,
		7221	UINT InputStride,
		7222	UINT VectorCount,
		7223	CXMMATRIX M
		7224	)
		7225	{
		7226	#if defined(_XM_NO_INTRINSICS_)
		7227
		7228	XMVECTOR V;
		7229	XMVECTOR X;
		7230	XMVECTOR Y;
		7231	XMVECTOR Result;
		7232	UINT i;
		7233	BYTE* pInputVector = (BYTE*)pInputStream;
		7234	BYTE* pOutputVector = (BYTE*)pOutputStream;
		7235
		7236	XMASSERT(pOutputStream);
		7237	XMASSERT(pInputStream);
		7238
		7239	for (i = 0; i < VectorCount; i++)
		7240	{
		7241	V = XMLoadFloat2((XMFLOAT2*)pInputVector);
		7242	Y = XMVectorSplatY(V);
		7243	X = XMVectorSplatX(V);
		7244	// Y = XMVectorReplicate(((XMFLOAT2*)pInputVector)->y);
		7245	// X = XMVectorReplicate(((XMFLOAT2*)pInputVector)->x);
		7246
		7247	Result = XMVectorMultiply(Y, M.r[1]);
		7248	Result = XMVectorMultiplyAdd(X, M.r[0], Result);
		7249
		7250	XMStoreFloat2((XMFLOAT2*)pOutputVector, Result);
		7251
		7252	pInputVector += InputStride;
		7253	pOutputVector += OutputStride;
		7254	}
		7255
		7256	return pOutputStream;
		7257
		7258	#elif defined(_XM_SSE_INTRINSICS_)
		7259	XMASSERT(pOutputStream);
		7260	XMASSERT(pInputStream);
		7261	UINT i;
		7262	const BYTEpInputVector = (const BYTE)pInputStream;
		7263	BYTE pOutputVector = (BYTE)pOutputStream;
		7264	for (i = 0; i < VectorCount; i++)
		7265	{
		7266	XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2 *>(pInputVector)->x);
		7267	XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2 *>(pInputVector)->y);
		7268	vResult = _mm_mul_ps(vResult,M.r[1]);
		7269	X = _mm_mul_ps(X,M.r[0]);
		7270	vResult = _mm_add_ps(vResult,X);
		7271	_mm_store_sd(reinterpret_cast<double>(pOutputVector),reinterpret_cast<const __m128d >(&vResult)[0]);
		7272
		7273	pInputVector += InputStride;
		7274	pOutputVector += OutputStride;
		7275	}
		7276
		7277	return pOutputStream;
		7278	#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
		7279	#endif // _XM_VMX128_INTRINSICS_
		7280	}
		7281
		7282	/****************************************************************************
		7283	*
		7284	* 3D Vector
		7285	*
		7286	****************************************************************************/
		7287
		7288	//------------------------------------------------------------------------------
		7289	// Comparison operations
		7290	//------------------------------------------------------------------------------
		7291
		7292	//------------------------------------------------------------------------------
		7293
		7294	XMFINLINE BOOL XMVector3Equal
		7295	(
		7296	FXMVECTOR V1,
		7297	FXMVECTOR V2
		7298	)
		7299	{
		7300	#if defined(_XM_NO_INTRINSICS_)
		7301	return (((V1.vector4_f32[0] == V2.vector4_f32[0]) && (V1.vector4_f32[1] == V2.vector4_f32[1]) && (V1.vector4_f32[2] == V2.vector4_f32[2])) != 0);
		7302	#elif defined(_XM_SSE_INTRINSICS_)
		7303	XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
		7304	return (((_mm_movemask_ps(vTemp)&7)==7) != 0);
		7305	#else // _XM_VMX128_INTRINSICS_
		7306	return XMComparisonAllTrue(XMVector3EqualR(V1, V2));
		7307	#endif
		7308	}
		7309
		7310	//------------------------------------------------------------------------------
		7311
		7312	XMFINLINE UINT XMVector3EqualR
		7313	(
		7314	FXMVECTOR V1,
		7315	FXMVECTOR V2
		7316	)
		7317	{
		7318	#if defined(_XM_NO_INTRINSICS_)
		7319	UINT CR = 0;
		7320	if ((V1.vector4_f32[0] == V2.vector4_f32[0]) &&
		7321	(V1.vector4_f32[1] == V2.vector4_f32[1]) &&
		7322	(V1.vector4_f32[2] == V2.vector4_f32[2]))
		7323	{
		7324	CR = XM_CRMASK_CR6TRUE;
		7325	}
		7326	else if ((V1.vector4_f32[0] != V2.vector4_f32[0]) &&
		7327	(V1.vector4_f32[1] != V2.vector4_f32[1]) &&
		7328	(V1.vector4_f32[2] != V2.vector4_f32[2]))
		7329	{
		7330	CR = XM_CRMASK_CR6FALSE;
		7331	}
		7332	return CR;
		7333	#elif defined(_XM_SSE_INTRINSICS_)
		7334	XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
		7335	int iTest = _mm_movemask_ps(vTemp)&7;
		7336	UINT CR = 0;
		7337	if (iTest==7)
		7338	{
		7339	CR = XM_CRMASK_CR6TRUE;
		7340	}
		7341	else if (!iTest)
		7342	{
		7343	CR = XM_CRMASK_CR6FALSE;
		7344	}
		7345	return CR;
		7346	#else // _XM_VMX128_INTRINSICS_
		7347	#endif // _XM_VMX128_INTRINSICS_
		7348	}
		7349
		7350	//------------------------------------------------------------------------------
		7351
		7352	XMFINLINE BOOL XMVector3EqualInt
		7353	(
		7354	FXMVECTOR V1,
		7355	FXMVECTOR V2
		7356	)
		7357	{
		7358	#if defined(_XM_NO_INTRINSICS_)
		7359	return (((V1.vector4_u32[0] == V2.vector4_u32[0]) && (V1.vector4_u32[1] == V2.vector4_u32[1]) && (V1.vector4_u32[2] == V2.vector4_u32[2])) != 0);
		7360	#elif defined(_XM_SSE_INTRINSICS_)
		7361	__m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i >(&V1)[0],reinterpret_cast<const __m128i >(&V2)[0]);
		7362	return (((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&7)==7) != 0);
		7363	#else // _XM_VMX128_INTRINSICS_
		7364	return XMComparisonAllTrue(XMVector3EqualIntR(V1, V2));
		7365	#endif
		7366	}
		7367
		7368	//------------------------------------------------------------------------------
		7369
		7370	XMFINLINE UINT XMVector3EqualIntR
		7371	(
		7372	FXMVECTOR V1,
		7373	FXMVECTOR V2
		7374	)
		7375	{
		7376	#if defined(_XM_NO_INTRINSICS_)
		7377	UINT CR = 0;
		7378	if ((V1.vector4_u32[0] == V2.vector4_u32[0]) &&
		7379	(V1.vector4_u32[1] == V2.vector4_u32[1]) &&
		7380	(V1.vector4_u32[2] == V2.vector4_u32[2]))
		7381	{
		7382	CR = XM_CRMASK_CR6TRUE;
		7383	}
		7384	else if ((V1.vector4_u32[0] != V2.vector4_u32[0]) &&
		7385	(V1.vector4_u32[1] != V2.vector4_u32[1]) &&
		7386	(V1.vector4_u32[2] != V2.vector4_u32[2]))
		7387	{
		7388	CR = XM_CRMASK_CR6FALSE;
		7389	}
		7390	return CR;
		7391	#elif defined(_XM_SSE_INTRINSICS_)
		7392	__m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i >(&V1)[0],reinterpret_cast<const __m128i >(&V2)[0]);
		7393	int iTemp = _mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&7;
		7394	UINT CR = 0;
		7395	if (iTemp==7)
		7396	{
		7397	CR = XM_CRMASK_CR6TRUE;
		7398	}
		7399	else if (!iTemp)
		7400	{
		7401	CR = XM_CRMASK_CR6FALSE;
		7402	}
		7403	return CR;
		7404	#else // _XM_VMX128_INTRINSICS_
		7405	#endif // _XM_VMX128_INTRINSICS_
		7406	}
		7407
		7408	//------------------------------------------------------------------------------
		7409
		7410	XMFINLINE BOOL XMVector3NearEqual
		7411	(
		7412	FXMVECTOR V1,
		7413	FXMVECTOR V2,
		7414	FXMVECTOR Epsilon
		7415	)
		7416	{
		7417	#if defined(_XM_NO_INTRINSICS_)
		7418	FLOAT dx, dy, dz;
		7419
		7420	dx = fabsf(V1.vector4_f32[0]-V2.vector4_f32[0]);
		7421	dy = fabsf(V1.vector4_f32[1]-V2.vector4_f32[1]);
		7422	dz = fabsf(V1.vector4_f32[2]-V2.vector4_f32[2]);
		7423	return (((dx <= Epsilon.vector4_f32[0]) &&
		7424	(dy <= Epsilon.vector4_f32[1]) &&
		7425	(dz <= Epsilon.vector4_f32[2])) != 0);
		7426	#elif defined(_XM_SSE_INTRINSICS_)
		7427	// Get the difference
		7428	XMVECTOR vDelta = _mm_sub_ps(V1,V2);
		7429	// Get the absolute value of the difference
		7430	XMVECTOR vTemp = _mm_setzero_ps();
		7431	vTemp = _mm_sub_ps(vTemp,vDelta);
		7432	vTemp = _mm_max_ps(vTemp,vDelta);
		7433	vTemp = _mm_cmple_ps(vTemp,Epsilon);
		7434	// w is don't care
		7435	return (((_mm_movemask_ps(vTemp)&7)==0x7) != 0);
		7436	#else // _XM_VMX128_INTRINSICS_
		7437	#endif // _XM_VMX128_INTRINSICS_
		7438	}
		7439
		7440	//------------------------------------------------------------------------------
		7441
		7442	XMFINLINE BOOL XMVector3NotEqual
		7443	(
		7444	FXMVECTOR V1,
		7445	FXMVECTOR V2
		7446	)
		7447	{
		7448	#if defined(_XM_NO_INTRINSICS_)
		7449	return (((V1.vector4_f32[0] != V2.vector4_f32[0]) \|\| (V1.vector4_f32[1] != V2.vector4_f32[1]) \|\| (V1.vector4_f32[2] != V2.vector4_f32[2])) != 0);
		7450	#elif defined(_XM_SSE_INTRINSICS_)
		7451	XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
		7452	return (((_mm_movemask_ps(vTemp)&7)!=7) != 0);
		7453	#else // _XM_VMX128_INTRINSICS_
		7454	return XMComparisonAnyFalse(XMVector3EqualR(V1, V2));
		7455	#endif
		7456	}
		7457
		7458	//------------------------------------------------------------------------------
		7459
		7460	XMFINLINE BOOL XMVector3NotEqualInt
		7461	(
		7462	FXMVECTOR V1,
		7463	FXMVECTOR V2
		7464	)
		7465	{
		7466	#if defined(_XM_NO_INTRINSICS_)
		7467	return (((V1.vector4_u32[0] != V2.vector4_u32[0]) \|\| (V1.vector4_u32[1] != V2.vector4_u32[1]) \|\| (V1.vector4_u32[2] != V2.vector4_u32[2])) != 0);
		7468	#elif defined(_XM_SSE_INTRINSICS_)
		7469	__m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i >(&V1)[0],reinterpret_cast<const __m128i >(&V2)[0]);
		7470	return (((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&7)!=7) != 0);
		7471	#else // _XM_VMX128_INTRINSICS_
		7472	return XMComparisonAnyFalse(XMVector3EqualIntR(V1, V2));
		7473	#endif
		7474	}
		7475
		7476	//------------------------------------------------------------------------------
		7477
		7478	XMFINLINE BOOL XMVector3Greater
		7479	(
		7480	FXMVECTOR V1,
		7481	FXMVECTOR V2
		7482	)
		7483	{
		7484	#if defined(_XM_NO_INTRINSICS_)
		7485	return (((V1.vector4_f32[0] > V2.vector4_f32[0]) && (V1.vector4_f32[1] > V2.vector4_f32[1]) && (V1.vector4_f32[2] > V2.vector4_f32[2])) != 0);
		7486	#elif defined(_XM_SSE_INTRINSICS_)
		7487	XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
		7488	return (((_mm_movemask_ps(vTemp)&7)==7) != 0);
		7489	#else // _XM_VMX128_INTRINSICS_
		7490	return XMComparisonAllTrue(XMVector3GreaterR(V1, V2));
		7491	#endif
		7492	}
		7493
		7494	//------------------------------------------------------------------------------
		7495
		7496	XMFINLINE UINT XMVector3GreaterR
		7497	(
		7498	FXMVECTOR V1,
		7499	FXMVECTOR V2
		7500	)
		7501	{
		7502	#if defined(_XM_NO_INTRINSICS_)
		7503	UINT CR = 0;
		7504	if ((V1.vector4_f32[0] > V2.vector4_f32[0]) &&
		7505	(V1.vector4_f32[1] > V2.vector4_f32[1]) &&
		7506	(V1.vector4_f32[2] > V2.vector4_f32[2]))
		7507	{
		7508	CR = XM_CRMASK_CR6TRUE;
		7509	}
		7510	else if ((V1.vector4_f32[0] <= V2.vector4_f32[0]) &&
		7511	(V1.vector4_f32[1] <= V2.vector4_f32[1]) &&
		7512	(V1.vector4_f32[2] <= V2.vector4_f32[2]))
		7513	{
		7514	CR = XM_CRMASK_CR6FALSE;
		7515	}
		7516	return CR;
		7517
		7518	#elif defined(_XM_SSE_INTRINSICS_)
		7519	XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
		7520	UINT CR = 0;
		7521	int iTest = _mm_movemask_ps(vTemp)&7;
		7522	if (iTest==7)
		7523	{
		7524	CR = XM_CRMASK_CR6TRUE;
		7525	}
		7526	else if (!iTest)
		7527	{
		7528	CR = XM_CRMASK_CR6FALSE;
		7529	}
		7530	return CR;
		7531	#else // _XM_VMX128_INTRINSICS_
		7532	#endif // _XM_VMX128_INTRINSICS_
		7533	}
		7534
		7535	//------------------------------------------------------------------------------
		7536
		7537	XMFINLINE BOOL XMVector3GreaterOrEqual
		7538	(
		7539	FXMVECTOR V1,
		7540	FXMVECTOR V2
		7541	)
		7542	{
		7543	#if defined(_XM_NO_INTRINSICS_)
		7544	return (((V1.vector4_f32[0] >= V2.vector4_f32[0]) && (V1.vector4_f32[1] >= V2.vector4_f32[1]) && (V1.vector4_f32[2] >= V2.vector4_f32[2])) != 0);
		7545	#elif defined(_XM_SSE_INTRINSICS_)
		7546	XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
		7547	return (((_mm_movemask_ps(vTemp)&7)==7) != 0);
		7548	#else // _XM_VMX128_INTRINSICS_
		7549	return XMComparisonAllTrue(XMVector3GreaterOrEqualR(V1, V2));
		7550	#endif
		7551	}
		7552
		7553	//------------------------------------------------------------------------------
		7554
		7555	XMFINLINE UINT XMVector3GreaterOrEqualR
		7556	(
		7557	FXMVECTOR V1,
		7558	FXMVECTOR V2
		7559	)
		7560	{
		7561	#if defined(_XM_NO_INTRINSICS_)
		7562
		7563	UINT CR = 0;
		7564	if ((V1.vector4_f32[0] >= V2.vector4_f32[0]) &&
		7565	(V1.vector4_f32[1] >= V2.vector4_f32[1]) &&
		7566	(V1.vector4_f32[2] >= V2.vector4_f32[2]))
		7567	{
		7568	CR = XM_CRMASK_CR6TRUE;
		7569	}
		7570	else if ((V1.vector4_f32[0] < V2.vector4_f32[0]) &&
		7571	(V1.vector4_f32[1] < V2.vector4_f32[1]) &&
		7572	(V1.vector4_f32[2] < V2.vector4_f32[2]))
		7573	{
		7574	CR = XM_CRMASK_CR6FALSE;
		7575	}
		7576	return CR;
		7577
		7578	#elif defined(_XM_SSE_INTRINSICS_)
		7579	XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
		7580	UINT CR = 0;
		7581	int iTest = _mm_movemask_ps(vTemp)&7;
		7582	if (iTest==7)
		7583	{
		7584	CR = XM_CRMASK_CR6TRUE;
		7585	}
		7586	else if (!iTest)
		7587	{
		7588	CR = XM_CRMASK_CR6FALSE;
		7589	}
		7590	return CR;
		7591	#else // _XM_VMX128_INTRINSICS_
		7592	#endif // _XM_VMX128_INTRINSICS_
		7593	}
		7594
		7595	//------------------------------------------------------------------------------
		7596
		7597	XMFINLINE BOOL XMVector3Less
		7598	(
		7599	FXMVECTOR V1,
		7600	FXMVECTOR V2
		7601	)
		7602	{
		7603	#if defined(_XM_NO_INTRINSICS_)
		7604	return (((V1.vector4_f32[0] < V2.vector4_f32[0]) && (V1.vector4_f32[1] < V2.vector4_f32[1]) && (V1.vector4_f32[2] < V2.vector4_f32[2])) != 0);
		7605	#elif defined(_XM_SSE_INTRINSICS_)
		7606	XMVECTOR vTemp = _mm_cmplt_ps(V1,V2);
		7607	return (((_mm_movemask_ps(vTemp)&7)==7) != 0);
		7608	#else // _XM_VMX128_INTRINSICS_
		7609	return XMComparisonAllTrue(XMVector3GreaterR(V2, V1));
		7610	#endif
		7611	}
		7612
		7613	//------------------------------------------------------------------------------
		7614
		7615	XMFINLINE BOOL XMVector3LessOrEqual
		7616	(
		7617	FXMVECTOR V1,
		7618	FXMVECTOR V2
		7619	)
		7620	{
		7621	#if defined(_XM_NO_INTRINSICS_)
		7622	return (((V1.vector4_f32[0] <= V2.vector4_f32[0]) && (V1.vector4_f32[1] <= V2.vector4_f32[1]) && (V1.vector4_f32[2] <= V2.vector4_f32[2])) != 0);
		7623	#elif defined(_XM_SSE_INTRINSICS_)
		7624	XMVECTOR vTemp = _mm_cmple_ps(V1,V2);
		7625	return (((_mm_movemask_ps(vTemp)&7)==7) != 0);
		7626	#else // _XM_VMX128_INTRINSICS_
		7627	return XMComparisonAllTrue(XMVector3GreaterOrEqualR(V2, V1));
		7628	#endif
		7629	}
		7630
		7631	//------------------------------------------------------------------------------
		7632
		7633	XMFINLINE BOOL XMVector3InBounds
		7634	(
		7635	FXMVECTOR V,
		7636	FXMVECTOR Bounds
		7637	)
		7638	{
		7639	#if defined(_XM_NO_INTRINSICS_)
		7640	return (((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) &&
		7641	(V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) &&
		7642	(V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2])) != 0);
		7643	#elif defined(_XM_SSE_INTRINSICS_)
		7644	// Test if less than or equal
		7645	XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
		7646	// Negate the bounds
		7647	XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
		7648	// Test if greater or equal (Reversed)
		7649	vTemp2 = _mm_cmple_ps(vTemp2,V);
		7650	// Blend answers
		7651	vTemp1 = _mm_and_ps(vTemp1,vTemp2);
		7652	// x,y and z in bounds? (w is don't care)
		7653	return (((_mm_movemask_ps(vTemp1)&0x7)==0x7) != 0);
		7654	#else
		7655	return XMComparisonAllInBounds(XMVector3InBoundsR(V, Bounds));
		7656	#endif
		7657	}
		7658
		7659	//------------------------------------------------------------------------------
		7660
		7661	XMFINLINE UINT XMVector3InBoundsR
		7662	(
		7663	FXMVECTOR V,
		7664	FXMVECTOR Bounds
		7665	)
		7666	{
		7667	#if defined(_XM_NO_INTRINSICS_)
		7668	UINT CR = 0;
		7669	if ((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) &&
		7670	(V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) &&
		7671	(V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2]))
		7672	{
		7673	CR = XM_CRMASK_CR6BOUNDS;
		7674	}
		7675	return CR;
		7676
		7677	#elif defined(_XM_SSE_INTRINSICS_)
		7678	// Test if less than or equal
		7679	XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
		7680	// Negate the bounds
		7681	XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
		7682	// Test if greater or equal (Reversed)
		7683	vTemp2 = _mm_cmple_ps(vTemp2,V);
		7684	// Blend answers
		7685	vTemp1 = _mm_and_ps(vTemp1,vTemp2);
		7686	// x,y and z in bounds? (w is don't care)
		7687	return ((_mm_movemask_ps(vTemp1)&0x7)==0x7) ? XM_CRMASK_CR6BOUNDS : 0;
		7688	#else // _XM_VMX128_INTRINSICS_
		7689	#endif // _XM_VMX128_INTRINSICS_
		7690	}
		7691
		7692	//------------------------------------------------------------------------------
		7693
		7694	XMFINLINE BOOL XMVector3IsNaN
		7695	(
		7696	FXMVECTOR V
		7697	)
		7698	{
		7699	#if defined(_XM_NO_INTRINSICS_)
		7700
		7701	return (XMISNAN(V.vector4_f32[0]) \|\|
		7702	XMISNAN(V.vector4_f32[1]) \|\|
		7703	XMISNAN(V.vector4_f32[2]));
		7704
		7705	#elif defined(_XM_SSE_INTRINSICS_)
		7706	// Mask off the exponent
		7707	__m128i vTempInf = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMInfinity);
		7708	// Mask off the mantissa
		7709	__m128i vTempNan = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMQNaNTest);
		7710	// Are any of the exponents == 0x7F800000?
		7711	vTempInf = _mm_cmpeq_epi32(vTempInf,g_XMInfinity);
		7712	// Are any of the mantissa's zero? (SSE2 doesn't have a neq test)
		7713	vTempNan = _mm_cmpeq_epi32(vTempNan,g_XMZero);
		7714	// Perform a not on the NaN test to be true on NON-zero mantissas
		7715	vTempNan = _mm_andnot_si128(vTempNan,vTempInf);
		7716	// If x, y or z are NaN, the signs are true after the merge above
		7717	return ((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTempNan)[0])&7) != 0);
		7718	#else // _XM_VMX128_INTRINSICS_
		7719	#endif // _XM_VMX128_INTRINSICS_
		7720	}
		7721
		7722	//------------------------------------------------------------------------------
		7723
		7724	XMFINLINE BOOL XMVector3IsInfinite
		7725	(
		7726	FXMVECTOR V
		7727	)
		7728	{
		7729	#if defined(_XM_NO_INTRINSICS_)
		7730	return (XMISINF(V.vector4_f32[0]) \|\|
		7731	XMISINF(V.vector4_f32[1]) \|\|
		7732	XMISINF(V.vector4_f32[2]));
		7733	#elif defined(_XM_SSE_INTRINSICS_)
		7734	// Mask off the sign bit
		7735	__m128 vTemp = _mm_and_ps(V,g_XMAbsMask);
		7736	// Compare to infinity
		7737	vTemp = _mm_cmpeq_ps(vTemp,g_XMInfinity);
		7738	// If x,y or z are infinity, the signs are true.
		7739	return ((_mm_movemask_ps(vTemp)&7) != 0);
		7740	#else // _XM_VMX128_INTRINSICS_
		7741	#endif // _XM_VMX128_INTRINSICS_
		7742	}
		7743
		7744	//------------------------------------------------------------------------------
		7745	// Computation operations
		7746	//------------------------------------------------------------------------------
		7747
		7748	//------------------------------------------------------------------------------
		7749
		7750	XMFINLINE XMVECTOR XMVector3Dot
		7751	(
		7752	FXMVECTOR V1,
		7753	FXMVECTOR V2
		7754	)
		7755	{
		7756	#if defined(_XM_NO_INTRINSICS_)
		7757	FLOAT fValue = V1.vector4_f32[0] * V2.vector4_f32[0] + V1.vector4_f32[1] * V2.vector4_f32[1] + V1.vector4_f32[2] * V2.vector4_f32[2];
		7758	XMVECTOR vResult = {
		7759	fValue,
		7760	fValue,
		7761	fValue,
		7762	fValue
		7763	};
		7764	return vResult;
		7765
		7766	#elif defined(_XM_SSE_INTRINSICS_)
		7767	// Perform the dot product
		7768	XMVECTOR vDot = _mm_mul_ps(V1,V2);
		7769	// x=Dot.vector4_f32[1], y=Dot.vector4_f32[2]
		7770	XMVECTOR vTemp = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(2,1,2,1));
		7771	// Result.vector4_f32[0] = x+y
		7772	vDot = _mm_add_ss(vDot,vTemp);
		7773	// x=Dot.vector4_f32[2]
		7774	vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
		7775	// Result.vector4_f32[0] = (x+y)+z
		7776	vDot = _mm_add_ss(vDot,vTemp);
		7777	// Splat x
		7778	return _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(0,0,0,0));
		7779	#else // _XM_VMX128_INTRINSICS_
		7780	#endif // _XM_VMX128_INTRINSICS_
		7781	}
		7782
		7783	//------------------------------------------------------------------------------
		7784
		7785	XMFINLINE XMVECTOR XMVector3Cross
		7786	(
		7787	FXMVECTOR V1,
		7788	FXMVECTOR V2
		7789	)
		7790	{
		7791	#if defined(_XM_NO_INTRINSICS_)
		7792	XMVECTOR vResult = {
		7793	(V1.vector4_f32[1] * V2.vector4_f32[2]) - (V1.vector4_f32[2] * V2.vector4_f32[1]),
		7794	(V1.vector4_f32[2] * V2.vector4_f32[0]) - (V1.vector4_f32[0] * V2.vector4_f32[2]),
		7795	(V1.vector4_f32[0] * V2.vector4_f32[1]) - (V1.vector4_f32[1] * V2.vector4_f32[0]),
		7796	0.0f
		7797	};
		7798	return vResult;
		7799
		7800	#elif defined(_XM_SSE_INTRINSICS_)
		7801	// y1,z1,x1,w1
		7802	XMVECTOR vTemp1 = _mm_shuffle_ps(V1,V1,_MM_SHUFFLE(3,0,2,1));
		7803	// z2,x2,y2,w2
		7804	XMVECTOR vTemp2 = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(3,1,0,2));
		7805	// Perform the left operation
		7806	XMVECTOR vResult = _mm_mul_ps(vTemp1,vTemp2);
		7807	// z1,x1,y1,w1
		7808	vTemp1 = _mm_shuffle_ps(vTemp1,vTemp1,_MM_SHUFFLE(3,0,2,1));
		7809	// y2,z2,x2,w2
		7810	vTemp2 = _mm_shuffle_ps(vTemp2,vTemp2,_MM_SHUFFLE(3,1,0,2));
		7811	// Perform the right operation
		7812	vTemp1 = _mm_mul_ps(vTemp1,vTemp2);
		7813	// Subract the right from left, and return answer
		7814	vResult = _mm_sub_ps(vResult,vTemp1);
		7815	// Set w to zero
		7816	return _mm_and_ps(vResult,g_XMMask3);
		7817	#else // _XM_VMX128_INTRINSICS_
		7818	#endif // _XM_VMX128_INTRINSICS_
		7819	}
		7820
		7821	//------------------------------------------------------------------------------
		7822
		7823	XMFINLINE XMVECTOR XMVector3LengthSq
		7824	(
		7825	FXMVECTOR V
		7826	)
		7827	{
		7828	return XMVector3Dot(V, V);
		7829	}
		7830
		7831	//------------------------------------------------------------------------------
		7832
		7833	XMFINLINE XMVECTOR XMVector3ReciprocalLengthEst
		7834	(
		7835	FXMVECTOR V
		7836	)
		7837	{
		7838	#if defined(_XM_NO_INTRINSICS_)
		7839
		7840	XMVECTOR Result;
		7841
		7842	Result = XMVector3LengthSq(V);
		7843	Result = XMVectorReciprocalSqrtEst(Result);
		7844
		7845	return Result;
		7846
		7847	#elif defined(_XM_SSE_INTRINSICS_)
		7848	// Perform the dot product on x,y and z
		7849	XMVECTOR vLengthSq = _mm_mul_ps(V,V);
		7850	// vTemp has z and y
		7851	XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,2,1,2));
		7852	// x+z, y
		7853	vLengthSq = _mm_add_ss(vLengthSq,vTemp);
		7854	// y,y,y,y
		7855	vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
		7856	// x+z+y,??,??,??
		7857	vLengthSq = _mm_add_ss(vLengthSq,vTemp);
		7858	// Splat the length squared
		7859	vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
		7860	// Get the reciprocal
		7861	vLengthSq = _mm_rsqrt_ps(vLengthSq);
		7862	return vLengthSq;
		7863	#else // _XM_VMX128_INTRINSICS_
		7864	#endif // _XM_VMX128_INTRINSICS_
		7865	}
		7866
		7867	//------------------------------------------------------------------------------
		7868
		7869	XMFINLINE XMVECTOR XMVector3ReciprocalLength
		7870	(
		7871	FXMVECTOR V
		7872	)
		7873	{
		7874	#if defined(_XM_NO_INTRINSICS_)
		7875
		7876	XMVECTOR Result;
		7877
		7878	Result = XMVector3LengthSq(V);
		7879	Result = XMVectorReciprocalSqrt(Result);
		7880
		7881	return Result;
		7882
		7883	#elif defined(_XM_SSE_INTRINSICS_)
		7884	// Perform the dot product
		7885	XMVECTOR vDot = _mm_mul_ps(V,V);
		7886	// x=Dot.y, y=Dot.z
		7887	XMVECTOR vTemp = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(2,1,2,1));
		7888	// Result.x = x+y
		7889	vDot = _mm_add_ss(vDot,vTemp);
		7890	// x=Dot.z
		7891	vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
		7892	// Result.x = (x+y)+z
		7893	vDot = _mm_add_ss(vDot,vTemp);
		7894	// Splat x
		7895	vDot = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(0,0,0,0));
		7896	// Get the reciprocal
		7897	vDot = _mm_sqrt_ps(vDot);
		7898	// Get the reciprocal
		7899	vDot = _mm_div_ps(g_XMOne,vDot);
		7900	return vDot;
		7901	#else // _XM_VMX128_INTRINSICS_
		7902	#endif // _XM_VMX128_INTRINSICS_
		7903	}
		7904
		7905	//------------------------------------------------------------------------------
		7906
		7907	XMFINLINE XMVECTOR XMVector3LengthEst
		7908	(
		7909	FXMVECTOR V
		7910	)
		7911	{
		7912	#if defined(_XM_NO_INTRINSICS_)
		7913
		7914	XMVECTOR Result;
		7915
		7916	Result = XMVector3LengthSq(V);
		7917	Result = XMVectorSqrtEst(Result);
		7918
		7919	return Result;
		7920
		7921	#elif defined(_XM_SSE_INTRINSICS_)
		7922	// Perform the dot product on x,y and z
		7923	XMVECTOR vLengthSq = _mm_mul_ps(V,V);
		7924	// vTemp has z and y
		7925	XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,2,1,2));
		7926	// x+z, y
		7927	vLengthSq = _mm_add_ss(vLengthSq,vTemp);
		7928	// y,y,y,y
		7929	vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
		7930	// x+z+y,??,??,??
		7931	vLengthSq = _mm_add_ss(vLengthSq,vTemp);
		7932	// Splat the length squared
		7933	vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
		7934	// Get the length
		7935	vLengthSq = _mm_sqrt_ps(vLengthSq);
		7936	return vLengthSq;
		7937	#else // _XM_VMX128_INTRINSICS_
		7938	#endif // _XM_VMX128_INTRINSICS_
		7939	}
		7940
		7941	//------------------------------------------------------------------------------
		7942
		7943	XMFINLINE XMVECTOR XMVector3Length
		7944	(
		7945	FXMVECTOR V
		7946	)
		7947	{
		7948	#if defined(_XM_NO_INTRINSICS_)
		7949
		7950	XMVECTOR Result;
		7951
		7952	Result = XMVector3LengthSq(V);
		7953	Result = XMVectorSqrt(Result);
		7954
		7955	return Result;
		7956
		7957	#elif defined(_XM_SSE_INTRINSICS_)
		7958	// Perform the dot product on x,y and z
		7959	XMVECTOR vLengthSq = _mm_mul_ps(V,V);
		7960	// vTemp has z and y
		7961	XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,2,1,2));
		7962	// x+z, y
		7963	vLengthSq = _mm_add_ss(vLengthSq,vTemp);
		7964	// y,y,y,y
		7965	vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
		7966	// x+z+y,??,??,??
		7967	vLengthSq = _mm_add_ss(vLengthSq,vTemp);
		7968	// Splat the length squared
		7969	vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
		7970	// Get the length
		7971	vLengthSq = _mm_sqrt_ps(vLengthSq);
		7972	return vLengthSq;
		7973	#else // _XM_VMX128_INTRINSICS_
		7974	#endif // _XM_VMX128_INTRINSICS_
		7975	}
		7976
		7977	//------------------------------------------------------------------------------
		7978	// XMVector3NormalizeEst uses a reciprocal estimate and
		7979	// returns QNaN on zero and infinite vectors.
		7980
		7981	XMFINLINE XMVECTOR XMVector3NormalizeEst
		7982	(
		7983	FXMVECTOR V
		7984	)
		7985	{
		7986	#if defined(_XM_NO_INTRINSICS_)
		7987
		7988	XMVECTOR Result;
		7989	Result = XMVector3ReciprocalLength(V);
		7990	Result = XMVectorMultiply(V, Result);
		7991	return Result;
		7992
		7993	#elif defined(_XM_SSE_INTRINSICS_)
		7994	// Perform the dot product
		7995	XMVECTOR vDot = _mm_mul_ps(V,V);
		7996	// x=Dot.y, y=Dot.z
		7997	XMVECTOR vTemp = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(2,1,2,1));
		7998	// Result.x = x+y
		7999	vDot = _mm_add_ss(vDot,vTemp);
		8000	// x=Dot.z
		8001	vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
		8002	// Result.x = (x+y)+z
		8003	vDot = _mm_add_ss(vDot,vTemp);
		8004	// Splat x
		8005	vDot = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(0,0,0,0));
		8006	// Get the reciprocal
		8007	vDot = _mm_rsqrt_ps(vDot);
		8008	// Perform the normalization
		8009	vDot = _mm_mul_ps(vDot,V);
		8010	return vDot;
		8011	#else // _XM_VMX128_INTRINSICS_
		8012	#endif // _XM_VMX128_INTRINSICS_
		8013	}
		8014
		8015	//------------------------------------------------------------------------------
		8016
		8017	XMFINLINE XMVECTOR XMVector3Normalize
		8018	(
		8019	FXMVECTOR V
		8020	)
		8021	{
		8022	#if defined(_XM_NO_INTRINSICS_)
		8023	FLOAT fLengthSq;
		8024	XMVECTOR vResult;
		8025
		8026	fLengthSq = sqrtf((V.vector4_f32[0]V.vector4_f32[0])+(V.vector4_f32[1]V.vector4_f32[1])+(V.vector4_f32[2]*V.vector4_f32[2]));
		8027	// Prevent divide by zero
		8028	if (fLengthSq) {
		8029	fLengthSq = 1.0f/fLengthSq;
		8030	}
		8031
		8032	vResult.vector4_f32[0] = V.vector4_f32[0]*fLengthSq;
		8033	vResult.vector4_f32[1] = V.vector4_f32[1]*fLengthSq;
		8034	vResult.vector4_f32[2] = V.vector4_f32[2]*fLengthSq;
		8035	vResult.vector4_f32[3] = V.vector4_f32[3]*fLengthSq;
		8036	return vResult;
		8037
		8038	#elif defined(_XM_SSE_INTRINSICS_)
		8039	// Perform the dot product on x,y and z only
		8040	XMVECTOR vLengthSq = _mm_mul_ps(V,V);
		8041	XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,1,2,1));
		8042	vLengthSq = _mm_add_ss(vLengthSq,vTemp);
		8043	vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
		8044	vLengthSq = _mm_add_ss(vLengthSq,vTemp);
		8045	vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
		8046	// Prepare for the division
		8047	XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
		8048	// Failsafe on zero (Or epsilon) length planes
		8049	// If the length is infinity, set the elements to zero
		8050	vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
		8051	// Divide to perform the normalization
		8052	vResult = _mm_div_ps(V,vResult);
		8053	// Any that are infinity, set to zero
		8054	vResult = _mm_and_ps(vResult,vLengthSq);
		8055	return vResult;
		8056	#else // _XM_VMX128_INTRINSICS_
		8057	#endif // _XM_VMX128_INTRINSICS_
		8058	}
		8059
		8060	//------------------------------------------------------------------------------
		8061
		8062	XMFINLINE XMVECTOR XMVector3ClampLength
		8063	(
		8064	FXMVECTOR V,
		8065	FLOAT LengthMin,
		8066	FLOAT LengthMax
		8067	)
		8068	{
		8069	#if defined(_XM_NO_INTRINSICS_)
		8070
		8071	XMVECTOR ClampMax;
		8072	XMVECTOR ClampMin;
		8073
		8074	ClampMax = XMVectorReplicate(LengthMax);
		8075	ClampMin = XMVectorReplicate(LengthMin);
		8076
		8077	return XMVector3ClampLengthV(V, ClampMin, ClampMax);
		8078
		8079	#elif defined(_XM_SSE_INTRINSICS_)
		8080	XMVECTOR ClampMax = _mm_set_ps1(LengthMax);
		8081	XMVECTOR ClampMin = _mm_set_ps1(LengthMin);
		8082	return XMVector3ClampLengthV(V,ClampMin,ClampMax);
		8083	#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
		8084	#endif // _XM_VMX128_INTRINSICS_
		8085	}
		8086
		8087	//------------------------------------------------------------------------------
		8088
		8089	XMFINLINE XMVECTOR XMVector3ClampLengthV
		8090	(
		8091	FXMVECTOR V,
		8092	FXMVECTOR LengthMin,
		8093	FXMVECTOR LengthMax
		8094	)
		8095	{
		8096	#if defined(_XM_NO_INTRINSICS_)
		8097
		8098	XMVECTOR ClampLength;
		8099	XMVECTOR LengthSq;
		8100	XMVECTOR RcpLength;
		8101	XMVECTOR Length;
		8102	XMVECTOR Normal;
		8103	XMVECTOR Zero;
		8104	XMVECTOR InfiniteLength;
		8105	XMVECTOR ZeroLength;
		8106	XMVECTOR Select;
		8107	XMVECTOR ControlMax;
		8108	XMVECTOR ControlMin;
		8109	XMVECTOR Control;
		8110	XMVECTOR Result;
		8111
		8112	XMASSERT((LengthMin.vector4_f32[1] == LengthMin.vector4_f32[0]) && (LengthMin.vector4_f32[2] == LengthMin.vector4_f32[0]));
		8113	XMASSERT((LengthMax.vector4_f32[1] == LengthMax.vector4_f32[0]) && (LengthMax.vector4_f32[2] == LengthMax.vector4_f32[0]));
		8114	XMASSERT(XMVector3GreaterOrEqual(LengthMin, XMVectorZero()));
		8115	XMASSERT(XMVector3GreaterOrEqual(LengthMax, XMVectorZero()));
		8116	XMASSERT(XMVector3GreaterOrEqual(LengthMax, LengthMin));
		8117
		8118	LengthSq = XMVector3LengthSq(V);
		8119
		8120	Zero = XMVectorZero();
		8121
		8122	RcpLength = XMVectorReciprocalSqrt(LengthSq);
		8123
		8124	InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity.v);
		8125	ZeroLength = XMVectorEqual(LengthSq, Zero);
		8126
		8127	Normal = XMVectorMultiply(V, RcpLength);
		8128
		8129	Length = XMVectorMultiply(LengthSq, RcpLength);
		8130
		8131	Select = XMVectorEqualInt(InfiniteLength, ZeroLength);
		8132	Length = XMVectorSelect(LengthSq, Length, Select);
		8133	Normal = XMVectorSelect(LengthSq, Normal, Select);
		8134
		8135	ControlMax = XMVectorGreater(Length, LengthMax);
		8136	ControlMin = XMVectorLess(Length, LengthMin);
		8137
		8138	ClampLength = XMVectorSelect(Length, LengthMax, ControlMax);
		8139	ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin);
		8140
		8141	Result = XMVectorMultiply(Normal, ClampLength);
		8142
		8143	// Preserve the original vector (with no precision loss) if the length falls within the given range
		8144	Control = XMVectorEqualInt(ControlMax, ControlMin);
		8145	Result = XMVectorSelect(Result, V, Control);
		8146
		8147	return Result;
		8148
		8149	#elif defined(_XM_SSE_INTRINSICS_)
		8150	XMVECTOR ClampLength;
		8151	XMVECTOR LengthSq;
		8152	XMVECTOR RcpLength;
		8153	XMVECTOR Length;
		8154	XMVECTOR Normal;
		8155	XMVECTOR InfiniteLength;
		8156	XMVECTOR ZeroLength;
		8157	XMVECTOR Select;
		8158	XMVECTOR ControlMax;
		8159	XMVECTOR ControlMin;
		8160	XMVECTOR Control;
		8161	XMVECTOR Result;
		8162
		8163	XMASSERT((XMVectorGetY(LengthMin) == XMVectorGetX(LengthMin)) && (XMVectorGetZ(LengthMin) == XMVectorGetX(LengthMin)));
		8164	XMASSERT((XMVectorGetY(LengthMax) == XMVectorGetX(LengthMax)) && (XMVectorGetZ(LengthMax) == XMVectorGetX(LengthMax)));
		8165	XMASSERT(XMVector3GreaterOrEqual(LengthMin, g_XMZero));
		8166	XMASSERT(XMVector3GreaterOrEqual(LengthMax, g_XMZero));
		8167	XMASSERT(XMVector3GreaterOrEqual(LengthMax, LengthMin));
		8168
		8169	LengthSq = XMVector3LengthSq(V);
		8170	RcpLength = XMVectorReciprocalSqrt(LengthSq);
		8171	InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity);
		8172	ZeroLength = XMVectorEqual(LengthSq,g_XMZero);
		8173	Normal = _mm_mul_ps(V, RcpLength);
		8174	Length = _mm_mul_ps(LengthSq, RcpLength);
		8175	Select = XMVectorEqualInt(InfiniteLength, ZeroLength);
		8176	Length = XMVectorSelect(LengthSq, Length, Select);
		8177	Normal = XMVectorSelect(LengthSq, Normal, Select);
		8178	ControlMax = XMVectorGreater(Length, LengthMax);
		8179	ControlMin = XMVectorLess(Length, LengthMin);
		8180	ClampLength = XMVectorSelect(Length, LengthMax, ControlMax);
		8181	ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin);
		8182	Result = _mm_mul_ps(Normal, ClampLength);
		8183	// Preserve the original vector (with no precision loss) if the length falls within the given range
		8184	Control = XMVectorEqualInt(ControlMax, ControlMin);
		8185	Result = XMVectorSelect(Result, V, Control);
		8186	return Result;
		8187	#else // _XM_VMX128_INTRINSICS_
		8188	#endif // _XM_VMX128_INTRINSICS_
		8189	}
		8190
		8191	//------------------------------------------------------------------------------
		8192
		8193	XMFINLINE XMVECTOR XMVector3Reflect
		8194	(
		8195	FXMVECTOR Incident,
		8196	FXMVECTOR Normal
		8197	)
		8198	{
		8199	#if defined(_XM_NO_INTRINSICS_)
		8200
		8201	XMVECTOR Result;
		8202
		8203	// Result = Incident - (2 * dot(Incident, Normal)) * Normal
		8204	Result = XMVector3Dot(Incident, Normal);
		8205	Result = XMVectorAdd(Result, Result);
		8206	Result = XMVectorNegativeMultiplySubtract(Result, Normal, Incident);
		8207
		8208	return Result;
		8209
		8210	#elif defined(_XM_SSE_INTRINSICS_)
		8211	// Result = Incident - (2 * dot(Incident, Normal)) * Normal
		8212	XMVECTOR Result = XMVector3Dot(Incident, Normal);
		8213	Result = _mm_add_ps(Result, Result);
		8214	Result = _mm_mul_ps(Result, Normal);
		8215	Result = _mm_sub_ps(Incident,Result);
		8216	return Result;
		8217	#else // _XM_VMX128_INTRINSICS_
		8218	#endif // _XM_VMX128_INTRINSICS_
		8219	}
		8220
		8221	//------------------------------------------------------------------------------
		8222
		8223	XMFINLINE XMVECTOR XMVector3Refract
		8224	(
		8225	FXMVECTOR Incident,
		8226	FXMVECTOR Normal,
		8227	FLOAT RefractionIndex
		8228	)
		8229	{
		8230	#if defined(_XM_NO_INTRINSICS_)
		8231
		8232	XMVECTOR Index;
		8233	Index = XMVectorReplicate(RefractionIndex);
		8234	return XMVector3RefractV(Incident, Normal, Index);
		8235
		8236	#elif defined(_XM_SSE_INTRINSICS_)
		8237	XMVECTOR Index = _mm_set_ps1(RefractionIndex);
		8238	return XMVector3RefractV(Incident,Normal,Index);
		8239	#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
		8240	#endif // _XM_VMX128_INTRINSICS_
		8241	}
		8242
		8243	//------------------------------------------------------------------------------
		8244
		8245	XMFINLINE XMVECTOR XMVector3RefractV
		8246	(
		8247	FXMVECTOR Incident,
		8248	FXMVECTOR Normal,
		8249	FXMVECTOR RefractionIndex
		8250	)
		8251	{
		8252	#if defined(_XM_NO_INTRINSICS_)
		8253
		8254	XMVECTOR IDotN;
		8255	XMVECTOR R;
		8256	CONST XMVECTOR Zero = XMVectorZero();
		8257
		8258	// Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) +
		8259	// sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal))))
		8260
		8261	IDotN = XMVector3Dot(Incident, Normal);
		8262
		8263	// R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN)
		8264	R = XMVectorNegativeMultiplySubtract(IDotN, IDotN, g_XMOne.v);
		8265	R = XMVectorMultiply(R, RefractionIndex);
		8266	R = XMVectorNegativeMultiplySubtract(R, RefractionIndex, g_XMOne.v);
		8267
		8268	if (XMVector4LessOrEqual(R, Zero))
		8269	{
		8270	// Total internal reflection
		8271	return Zero;
		8272	}
		8273	else
		8274	{
		8275	XMVECTOR Result;
		8276
		8277	// R = RefractionIndex * IDotN + sqrt(R)
		8278	R = XMVectorSqrt(R);
		8279	R = XMVectorMultiplyAdd(RefractionIndex, IDotN, R);
		8280
		8281	// Result = RefractionIndex * Incident - Normal * R
		8282	Result = XMVectorMultiply(RefractionIndex, Incident);
		8283	Result = XMVectorNegativeMultiplySubtract(Normal, R, Result);
		8284
		8285	return Result;
		8286	}
		8287
		8288	#elif defined(_XM_SSE_INTRINSICS_)
		8289	// Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) +
		8290	// sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal))))
		8291	XMVECTOR IDotN = XMVector3Dot(Incident, Normal);
		8292	// R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN)
		8293	XMVECTOR R = _mm_mul_ps(IDotN, IDotN);
		8294	R = _mm_sub_ps(g_XMOne,R);
		8295	R = _mm_mul_ps(R, RefractionIndex);
		8296	R = _mm_mul_ps(R, RefractionIndex);
		8297	R = _mm_sub_ps(g_XMOne,R);
		8298
		8299	XMVECTOR vResult = _mm_cmple_ps(R,g_XMZero);
		8300	if (_mm_movemask_ps(vResult)==0x0f)
		8301	{
		8302	// Total internal reflection
		8303	vResult = g_XMZero;
		8304	}
		8305	else
		8306	{
		8307	// R = RefractionIndex * IDotN + sqrt(R)
		8308	R = _mm_sqrt_ps(R);
		8309	vResult = _mm_mul_ps(RefractionIndex,IDotN);
		8310	R = _mm_add_ps(R,vResult);
		8311	// Result = RefractionIndex * Incident - Normal * R
		8312	vResult = _mm_mul_ps(RefractionIndex, Incident);
		8313	R = _mm_mul_ps(R,Normal);
		8314	vResult = _mm_sub_ps(vResult,R);
		8315	}
		8316	return vResult;
		8317	#else // _XM_VMX128_INTRINSICS_
		8318	#endif // _XM_VMX128_INTRINSICS_
		8319	}
		8320
		8321	//------------------------------------------------------------------------------
		8322
		8323	XMFINLINE XMVECTOR XMVector3Orthogonal
		8324	(
		8325	FXMVECTOR V
		8326	)
		8327	{
		8328	#if defined(_XM_NO_INTRINSICS_)
		8329
		8330	XMVECTOR NegativeV;
		8331	XMVECTOR Z, YZYY;
		8332	XMVECTOR ZIsNegative, YZYYIsNegative;
		8333	XMVECTOR S, D;
		8334	XMVECTOR R0, R1;
		8335	XMVECTOR Select;
		8336	XMVECTOR Zero;
		8337	XMVECTOR Result;
		8338	static CONST XMVECTORU32 Permute1X0X0X0X = {XM_PERMUTE_1X, XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X};
		8339	static CONST XMVECTORU32 Permute0Y0Z0Y0Y= {XM_PERMUTE_0Y, XM_PERMUTE_0Z, XM_PERMUTE_0Y, XM_PERMUTE_0Y};
		8340
		8341	Zero = XMVectorZero();
		8342	Z = XMVectorSplatZ(V);
		8343	YZYY = XMVectorPermute(V, V, Permute0Y0Z0Y0Y.v);
		8344
		8345	NegativeV = XMVectorSubtract(Zero, V);
		8346
		8347	ZIsNegative = XMVectorLess(Z, Zero);
		8348	YZYYIsNegative = XMVectorLess(YZYY, Zero);
		8349
		8350	S = XMVectorAdd(YZYY, Z);
		8351	D = XMVectorSubtract(YZYY, Z);
		8352
		8353	Select = XMVectorEqualInt(ZIsNegative, YZYYIsNegative);
		8354
		8355	R0 = XMVectorPermute(NegativeV, S, Permute1X0X0X0X.v);
		8356	R1 = XMVectorPermute(V, D, Permute1X0X0X0X.v);
		8357
		8358	Result = XMVectorSelect(R1, R0, Select);
		8359
		8360	return Result;
		8361
		8362	#elif defined(_XM_SSE_INTRINSICS_)
		8363	XMVECTOR NegativeV;
		8364	XMVECTOR Z, YZYY;
		8365	XMVECTOR ZIsNegative, YZYYIsNegative;
		8366	XMVECTOR S, D;
		8367	XMVECTOR R0, R1;
		8368	XMVECTOR Select;
		8369	XMVECTOR Zero;
		8370	XMVECTOR Result;
		8371	static CONST XMVECTORI32 Permute1X0X0X0X = {XM_PERMUTE_1X, XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X};
		8372	static CONST XMVECTORI32 Permute0Y0Z0Y0Y= {XM_PERMUTE_0Y, XM_PERMUTE_0Z, XM_PERMUTE_0Y, XM_PERMUTE_0Y};
		8373
		8374	Zero = XMVectorZero();
		8375	Z = XMVectorSplatZ(V);
		8376	YZYY = XMVectorPermute(V, V, Permute0Y0Z0Y0Y);
		8377
		8378	NegativeV = _mm_sub_ps(Zero, V);
		8379
		8380	ZIsNegative = XMVectorLess(Z, Zero);
		8381	YZYYIsNegative = XMVectorLess(YZYY, Zero);
		8382
		8383	S = _mm_add_ps(YZYY, Z);
		8384	D = _mm_sub_ps(YZYY, Z);
		8385
		8386	Select = XMVectorEqualInt(ZIsNegative, YZYYIsNegative);
		8387
		8388	R0 = XMVectorPermute(NegativeV, S, Permute1X0X0X0X);
		8389	R1 = XMVectorPermute(V, D,Permute1X0X0X0X);
		8390	Result = XMVectorSelect(R1, R0, Select);
		8391	return Result;
		8392	#else // _XM_VMX128_INTRINSICS_
		8393	#endif // _XM_VMX128_INTRINSICS_
		8394	}
		8395
		8396	//------------------------------------------------------------------------------
		8397
		8398	XMFINLINE XMVECTOR XMVector3AngleBetweenNormalsEst
		8399	(
		8400	FXMVECTOR N1,
		8401	FXMVECTOR N2
		8402	)
		8403	{
		8404	#if defined(_XM_NO_INTRINSICS_)
		8405
		8406	XMVECTOR Result;
		8407	XMVECTOR NegativeOne;
		8408	XMVECTOR One;
		8409
		8410	Result = XMVector3Dot(N1, N2);
		8411	NegativeOne = XMVectorSplatConstant(-1, 0);
		8412	One = XMVectorSplatOne();
		8413	Result = XMVectorClamp(Result, NegativeOne, One);
		8414	Result = XMVectorACosEst(Result);
		8415
		8416	return Result;
		8417
		8418	#elif defined(_XM_SSE_INTRINSICS_)
		8419	XMVECTOR vResult = XMVector3Dot(N1,N2);
		8420	// Clamp to -1.0f to 1.0f
		8421	vResult = _mm_max_ps(vResult,g_XMNegativeOne);
		8422	vResult = _mm_min_ps(vResult,g_XMOne);
		8423	vResult = XMVectorACosEst(vResult);
		8424	return vResult;
		8425	#else // _XM_VMX128_INTRINSICS_
		8426	#endif // _XM_VMX128_INTRINSICS_
		8427	}
		8428
		8429	//------------------------------------------------------------------------------
		8430
		8431	XMFINLINE XMVECTOR XMVector3AngleBetweenNormals
		8432	(
		8433	FXMVECTOR N1,
		8434	FXMVECTOR N2
		8435	)
		8436	{
		8437	#if defined(_XM_NO_INTRINSICS_)
		8438
		8439	XMVECTOR Result;
		8440	XMVECTOR NegativeOne;
		8441	XMVECTOR One;
		8442
		8443	Result = XMVector3Dot(N1, N2);
		8444	NegativeOne = XMVectorSplatConstant(-1, 0);
		8445	One = XMVectorSplatOne();
		8446	Result = XMVectorClamp(Result, NegativeOne, One);
		8447	Result = XMVectorACos(Result);
		8448
		8449	return Result;
		8450
		8451	#elif defined(_XM_SSE_INTRINSICS_)
		8452	XMVECTOR vResult = XMVector3Dot(N1,N2);
		8453	// Clamp to -1.0f to 1.0f
		8454	vResult = _mm_max_ps(vResult,g_XMNegativeOne);
		8455	vResult = _mm_min_ps(vResult,g_XMOne);
		8456	vResult = XMVectorACos(vResult);
		8457	return vResult;
		8458	#else // _XM_VMX128_INTRINSICS_
		8459	#endif // _XM_VMX128_INTRINSICS_
		8460	}
		8461
		8462	//------------------------------------------------------------------------------
		8463
		8464	XMFINLINE XMVECTOR XMVector3AngleBetweenVectors
		8465	(
		8466	FXMVECTOR V1,
		8467	FXMVECTOR V2
		8468	)
		8469	{
		8470	#if defined(_XM_NO_INTRINSICS_)
		8471
		8472	XMVECTOR L1;
		8473	XMVECTOR L2;
		8474	XMVECTOR Dot;
		8475	XMVECTOR CosAngle;
		8476	XMVECTOR NegativeOne;
		8477	XMVECTOR One;
		8478	XMVECTOR Result;
		8479
		8480	L1 = XMVector3ReciprocalLength(V1);
		8481	L2 = XMVector3ReciprocalLength(V2);
		8482
		8483	Dot = XMVector3Dot(V1, V2);
		8484
		8485	L1 = XMVectorMultiply(L1, L2);
		8486
		8487	NegativeOne = XMVectorSplatConstant(-1, 0);
		8488	One = XMVectorSplatOne();
		8489
		8490	CosAngle = XMVectorMultiply(Dot, L1);
		8491
		8492	CosAngle = XMVectorClamp(CosAngle, NegativeOne, One);
		8493
		8494	Result = XMVectorACos(CosAngle);
		8495
		8496	return Result;
		8497
		8498	#elif defined(_XM_SSE_INTRINSICS_)
		8499	XMVECTOR L1;
		8500	XMVECTOR L2;
		8501	XMVECTOR Dot;
		8502	XMVECTOR CosAngle;
		8503	XMVECTOR Result;
		8504
		8505	L1 = XMVector3ReciprocalLength(V1);
		8506	L2 = XMVector3ReciprocalLength(V2);
		8507	Dot = XMVector3Dot(V1, V2);
		8508	L1 = _mm_mul_ps(L1, L2);
		8509	CosAngle = _mm_mul_ps(Dot, L1);
		8510	CosAngle = XMVectorClamp(CosAngle,g_XMNegativeOne,g_XMOne);
		8511	Result = XMVectorACos(CosAngle);
		8512	return Result;
		8513	#else // _XM_VMX128_INTRINSICS_
		8514	#endif // _XM_VMX128_INTRINSICS_
		8515	}
		8516
		8517	//------------------------------------------------------------------------------
		8518
		8519	XMFINLINE XMVECTOR XMVector3LinePointDistance
		8520	(
		8521	FXMVECTOR LinePoint1,
		8522	FXMVECTOR LinePoint2,
		8523	FXMVECTOR Point
		8524	)
		8525	{
		8526	#if defined(_XM_NO_INTRINSICS_)
		8527
		8528	XMVECTOR PointVector;
		8529	XMVECTOR LineVector;
		8530	XMVECTOR ReciprocalLengthSq;
		8531	XMVECTOR PointProjectionScale;
		8532	XMVECTOR DistanceVector;
		8533	XMVECTOR Result;
		8534
		8535	// Given a vector PointVector from LinePoint1 to Point and a vector
		8536	// LineVector from LinePoint1 to LinePoint2, the scaled distance
		8537	// PointProjectionScale from LinePoint1 to the perpendicular projection
		8538	// of PointVector onto the line is defined as:
		8539	//
		8540	// PointProjectionScale = dot(PointVector, LineVector) / LengthSq(LineVector)
		8541
		8542	PointVector = XMVectorSubtract(Point, LinePoint1);
		8543	LineVector = XMVectorSubtract(LinePoint2, LinePoint1);
		8544
		8545	ReciprocalLengthSq = XMVector3LengthSq(LineVector);
		8546	ReciprocalLengthSq = XMVectorReciprocal(ReciprocalLengthSq);
		8547
		8548	PointProjectionScale = XMVector3Dot(PointVector, LineVector);
		8549	PointProjectionScale = XMVectorMultiply(PointProjectionScale, ReciprocalLengthSq);
		8550
		8551	DistanceVector = XMVectorMultiply(LineVector, PointProjectionScale);
		8552	DistanceVector = XMVectorSubtract(PointVector, DistanceVector);
		8553
		8554	Result = XMVector3Length(DistanceVector);
		8555
		8556	return Result;
		8557
		8558	#elif defined(_XM_SSE_INTRINSICS_)
		8559	XMVECTOR PointVector = _mm_sub_ps(Point,LinePoint1);
		8560	XMVECTOR LineVector = _mm_sub_ps(LinePoint2,LinePoint1);
		8561	XMVECTOR ReciprocalLengthSq = XMVector3LengthSq(LineVector);
		8562	XMVECTOR vResult = XMVector3Dot(PointVector,LineVector);
		8563	vResult = _mm_div_ps(vResult,ReciprocalLengthSq);
		8564	vResult = _mm_mul_ps(vResult,LineVector);
		8565	vResult = _mm_sub_ps(PointVector,vResult);
		8566	vResult = XMVector3Length(vResult);
		8567	return vResult;
		8568	#else // _XM_VMX128_INTRINSICS_
		8569	#endif // _XM_VMX128_INTRINSICS_
		8570	}
		8571
		8572	//------------------------------------------------------------------------------
		8573
		8574	XMFINLINE VOID XMVector3ComponentsFromNormal
		8575	(
		8576	XMVECTOR* pParallel,
		8577	XMVECTOR* pPerpendicular,
		8578	FXMVECTOR V,
		8579	FXMVECTOR Normal
		8580	)
		8581	{
		8582	#if defined(_XM_NO_INTRINSICS_)
		8583
		8584	XMVECTOR Parallel;
		8585	XMVECTOR Scale;
		8586
		8587	XMASSERT(pParallel);
		8588	XMASSERT(pPerpendicular);
		8589
		8590	Scale = XMVector3Dot(V, Normal);
		8591
		8592	Parallel = XMVectorMultiply(Normal, Scale);
		8593
		8594	*pParallel = Parallel;
		8595	*pPerpendicular = XMVectorSubtract(V, Parallel);
		8596
		8597	#elif defined(_XM_SSE_INTRINSICS_)
		8598	XMASSERT(pParallel);
		8599	XMASSERT(pPerpendicular);
		8600	XMVECTOR Scale = XMVector3Dot(V, Normal);
		8601	XMVECTOR Parallel = _mm_mul_ps(Normal,Scale);
		8602	*pParallel = Parallel;
		8603	*pPerpendicular = _mm_sub_ps(V,Parallel);
		8604	#else // _XM_VMX128_INTRINSICS_
		8605	#endif // _XM_VMX128_INTRINSICS_
		8606	}
		8607
		8608	//------------------------------------------------------------------------------
		8609	// Transform a vector using a rotation expressed as a unit quaternion
		8610
		8611	XMFINLINE XMVECTOR XMVector3Rotate
		8612	(
		8613	FXMVECTOR V,
		8614	FXMVECTOR RotationQuaternion
		8615	)
		8616	{
		8617	#if defined(_XM_NO_INTRINSICS_)
		8618
		8619	XMVECTOR A;
		8620	XMVECTOR Q;
		8621	XMVECTOR Result;
		8622
		8623	A = XMVectorSelect(g_XMSelect1110.v, V, g_XMSelect1110.v);
		8624	Q = XMQuaternionConjugate(RotationQuaternion);
		8625	Result = XMQuaternionMultiply(Q, A);
		8626	Result = XMQuaternionMultiply(Result, RotationQuaternion);
		8627
		8628	return Result;
		8629
		8630	#elif defined(_XM_SSE_INTRINSICS_)
		8631	XMVECTOR A;
		8632	XMVECTOR Q;
		8633	XMVECTOR Result;
		8634
		8635	A = _mm_and_ps(V,g_XMMask3);
		8636	Q = XMQuaternionConjugate(RotationQuaternion);
		8637	Result = XMQuaternionMultiply(Q, A);
		8638	Result = XMQuaternionMultiply(Result, RotationQuaternion);
		8639	return Result;
		8640	#else // _XM_VMX128_INTRINSICS_
		8641	#endif // _XM_VMX128_INTRINSICS_
		8642	}
		8643
		8644	//------------------------------------------------------------------------------
		8645	// Transform a vector using the inverse of a rotation expressed as a unit quaternion
		8646
		8647	XMFINLINE XMVECTOR XMVector3InverseRotate
		8648	(
		8649	FXMVECTOR V,
		8650	FXMVECTOR RotationQuaternion
		8651	)
		8652	{
		8653	#if defined(_XM_NO_INTRINSICS_)
		8654
		8655	XMVECTOR A;
		8656	XMVECTOR Q;
		8657	XMVECTOR Result;
		8658
		8659	A = XMVectorSelect(g_XMSelect1110.v, V, g_XMSelect1110.v);
		8660	Result = XMQuaternionMultiply(RotationQuaternion, A);
		8661	Q = XMQuaternionConjugate(RotationQuaternion);
		8662	Result = XMQuaternionMultiply(Result, Q);
		8663
		8664	return Result;
		8665
		8666	#elif defined(_XM_SSE_INTRINSICS_)
		8667	XMVECTOR A;
		8668	XMVECTOR Q;
		8669	XMVECTOR Result;
		8670	A = _mm_and_ps(V,g_XMMask3);
		8671	Result = XMQuaternionMultiply(RotationQuaternion, A);
		8672	Q = XMQuaternionConjugate(RotationQuaternion);
		8673	Result = XMQuaternionMultiply(Result, Q);
		8674	return Result;
		8675	#else // _XM_VMX128_INTRINSICS_
		8676	#endif // _XM_VMX128_INTRINSICS_
		8677	}
		8678
		8679	//------------------------------------------------------------------------------
		8680
		8681	XMFINLINE XMVECTOR XMVector3Transform
		8682	(
		8683	FXMVECTOR V,
		8684	CXMMATRIX M
		8685	)
		8686	{
		8687	#if defined(_XM_NO_INTRINSICS_)
		8688
		8689	XMVECTOR X;
		8690	XMVECTOR Y;
		8691	XMVECTOR Z;
		8692	XMVECTOR Result;
		8693
		8694	Z = XMVectorSplatZ(V);
		8695	Y = XMVectorSplatY(V);
		8696	X = XMVectorSplatX(V);
		8697
		8698	Result = XMVectorMultiplyAdd(Z, M.r[2], M.r[3]);
		8699	Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
		8700	Result = XMVectorMultiplyAdd(X, M.r[0], Result);
		8701
		8702	return Result;
		8703
		8704	#elif defined(_XM_SSE_INTRINSICS_)
		8705	XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0));
		8706	vResult = _mm_mul_ps(vResult,M.r[0]);
		8707	XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
		8708	vTemp = _mm_mul_ps(vTemp,M.r[1]);
		8709	vResult = _mm_add_ps(vResult,vTemp);
		8710	vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2));
		8711	vTemp = _mm_mul_ps(vTemp,M.r[2]);
		8712	vResult = _mm_add_ps(vResult,vTemp);
		8713	vResult = _mm_add_ps(vResult,M.r[3]);
		8714	return vResult;
		8715	#else // _XM_VMX128_INTRINSICS_
		8716	#endif // _XM_VMX128_INTRINSICS_
		8717	}
		8718
		8719	//------------------------------------------------------------------------------
		8720
		8721	XMINLINE XMFLOAT4* XMVector3TransformStream
		8722	(
		8723	XMFLOAT4* pOutputStream,
		8724	UINT OutputStride,
		8725	CONST XMFLOAT3* pInputStream,
		8726	UINT InputStride,
		8727	UINT VectorCount,
		8728	CXMMATRIX M
		8729	)
		8730	{
		8731	#if defined(_XM_NO_INTRINSICS_)
		8732
		8733	XMVECTOR V;
		8734	XMVECTOR X;
		8735	XMVECTOR Y;
		8736	XMVECTOR Z;
		8737	XMVECTOR Result;
		8738	UINT i;
		8739	BYTE* pInputVector = (BYTE*)pInputStream;
		8740	BYTE* pOutputVector = (BYTE*)pOutputStream;
		8741
		8742	XMASSERT(pOutputStream);
		8743	XMASSERT(pInputStream);
		8744
		8745	for (i = 0; i < VectorCount; i++)
		8746	{
		8747	V = XMLoadFloat3((XMFLOAT3*)pInputVector);
		8748	Z = XMVectorSplatZ(V);
		8749	Y = XMVectorSplatY(V);
		8750	X = XMVectorSplatX(V);
		8751
		8752	Result = XMVectorMultiplyAdd(Z, M.r[2], M.r[3]);
		8753	Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
		8754	Result = XMVectorMultiplyAdd(X, M.r[0], Result);
		8755
		8756	XMStoreFloat4((XMFLOAT4*)pOutputVector, Result);
		8757
		8758	pInputVector += InputStride;
		8759	pOutputVector += OutputStride;
		8760	}
		8761
		8762	return pOutputStream;
		8763
		8764	#elif defined(_XM_SSE_INTRINSICS_)
		8765	XMASSERT(pOutputStream);
		8766	XMASSERT(pInputStream);
		8767	UINT i;
		8768	const BYTE* pInputVector = (const BYTE*)pInputStream;
		8769	BYTE* pOutputVector = (BYTE*)pOutputStream;
		8770
		8771	for (i = 0; i < VectorCount; i++)
		8772	{
		8773	XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->x);
		8774	XMVECTOR Y = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->y);
		8775	XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->z);
		8776	vResult = _mm_mul_ps(vResult,M.r[2]);
		8777	vResult = _mm_add_ps(vResult,M.r[3]);
		8778	Y = _mm_mul_ps(Y,M.r[1]);
		8779	vResult = _mm_add_ps(vResult,Y);
		8780	X = _mm_mul_ps(X,M.r[0]);
		8781	vResult = _mm_add_ps(vResult,X);
		8782	_mm_storeu_ps(reinterpret_cast<float *>(pOutputVector),vResult);
		8783	pInputVector += InputStride;
		8784	pOutputVector += OutputStride;
		8785	}
		8786
		8787	return pOutputStream;
		8788	#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
		8789	#endif // _XM_VMX128_INTRINSICS_
		8790	}
		8791
		8792	//------------------------------------------------------------------------------
		8793
		8794	XMINLINE XMFLOAT4* XMVector3TransformStreamNC
		8795	(
		8796	XMFLOAT4* pOutputStream,
		8797	UINT OutputStride,
		8798	CONST XMFLOAT3* pInputStream,
		8799	UINT InputStride,
		8800	UINT VectorCount,
		8801	CXMMATRIX M
		8802	)
		8803	{
		8804	#if defined(_XM_NO_INTRINSICS_) \|\| defined(XM_NO_MISALIGNED_VECTOR_ACCESS) \|\| defined(_XM_SSE_INTRINSICS_)
		8805	return XMVector3TransformStream( pOutputStream, OutputStride, pInputStream, InputStride, VectorCount, M );
		8806	#else // _XM_VMX128_INTRINSICS_
		8807	#endif // _XM_VMX128_INTRINSICS_
		8808	}
		8809
		8810	//------------------------------------------------------------------------------
		8811
		8812	XMFINLINE XMVECTOR XMVector3TransformCoord
		8813	(
		8814	FXMVECTOR V,
		8815	CXMMATRIX M
		8816	)
		8817	{
		8818	#if defined(_XM_NO_INTRINSICS_)
		8819
		8820	XMVECTOR X;
		8821	XMVECTOR Y;
		8822	XMVECTOR Z;
		8823	XMVECTOR InverseW;
		8824	XMVECTOR Result;
		8825
		8826	Z = XMVectorSplatZ(V);
		8827	Y = XMVectorSplatY(V);
		8828	X = XMVectorSplatX(V);
		8829
		8830	Result = XMVectorMultiplyAdd(Z, M.r[2], M.r[3]);
		8831	Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
		8832	Result = XMVectorMultiplyAdd(X, M.r[0], Result);
		8833
		8834	InverseW = XMVectorSplatW(Result);
		8835	InverseW = XMVectorReciprocal(InverseW);
		8836
		8837	Result = XMVectorMultiply(Result, InverseW);
		8838
		8839	return Result;
		8840
		8841	#elif defined(_XM_SSE_INTRINSICS_)
		8842	XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0));
		8843	vResult = _mm_mul_ps(vResult,M.r[0]);
		8844	XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
		8845	vTemp = _mm_mul_ps(vTemp,M.r[1]);
		8846	vResult = _mm_add_ps(vResult,vTemp);
		8847	vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2));
		8848	vTemp = _mm_mul_ps(vTemp,M.r[2]);
		8849	vResult = _mm_add_ps(vResult,vTemp);
		8850	vResult = _mm_add_ps(vResult,M.r[3]);
		8851	vTemp = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,3,3,3));
		8852	vResult = _mm_div_ps(vResult,vTemp);
		8853	return vResult;
		8854	#else // _XM_VMX128_INTRINSICS_
		8855	#endif // _XM_VMX128_INTRINSICS_
		8856	}
		8857
		8858	//------------------------------------------------------------------------------
		8859
		8860	XMINLINE XMFLOAT3* XMVector3TransformCoordStream
		8861	(
		8862	XMFLOAT3* pOutputStream,
		8863	UINT OutputStride,
		8864	CONST XMFLOAT3* pInputStream,
		8865	UINT InputStride,
		8866	UINT VectorCount,
		8867	CXMMATRIX M
		8868	)
		8869	{
		8870	#if defined(_XM_NO_INTRINSICS_)
		8871
		8872	XMVECTOR V;
		8873	XMVECTOR X;
		8874	XMVECTOR Y;
		8875	XMVECTOR Z;
		8876	XMVECTOR InverseW;
		8877	XMVECTOR Result;
		8878	UINT i;
		8879	BYTE* pInputVector = (BYTE*)pInputStream;
		8880	BYTE* pOutputVector = (BYTE*)pOutputStream;
		8881
		8882	XMASSERT(pOutputStream);
		8883	XMASSERT(pInputStream);
		8884
		8885	for (i = 0; i < VectorCount; i++)
		8886	{
		8887	V = XMLoadFloat3((XMFLOAT3*)pInputVector);
		8888	Z = XMVectorSplatZ(V);
		8889	Y = XMVectorSplatY(V);
		8890	X = XMVectorSplatX(V);
		8891	// Z = XMVectorReplicate(((XMFLOAT3*)pInputVector)->z);
		8892	// Y = XMVectorReplicate(((XMFLOAT3*)pInputVector)->y);
		8893	// X = XMVectorReplicate(((XMFLOAT3*)pInputVector)->x);
		8894
		8895	Result = XMVectorMultiplyAdd(Z, M.r[2], M.r[3]);
		8896	Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
		8897	Result = XMVectorMultiplyAdd(X, M.r[0], Result);
		8898
		8899	InverseW = XMVectorSplatW(Result);
		8900	InverseW = XMVectorReciprocal(InverseW);
		8901
		8902	Result = XMVectorMultiply(Result, InverseW);
		8903
		8904	XMStoreFloat3((XMFLOAT3*)pOutputVector, Result);
		8905
		8906	pInputVector += InputStride;
		8907	pOutputVector += OutputStride;
		8908	}
		8909
		8910	return pOutputStream;
		8911
		8912	#elif defined(_XM_SSE_INTRINSICS_)
		8913	XMASSERT(pOutputStream);
		8914	XMASSERT(pInputStream);
		8915
		8916	UINT i;
		8917	const BYTE pInputVector = (BYTE)pInputStream;
		8918	BYTE pOutputVector = (BYTE)pOutputStream;
		8919
		8920	for (i = 0; i < VectorCount; i++)
		8921	{
		8922	XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->x);
		8923	XMVECTOR Y = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->y);
		8924	XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->z);
		8925	vResult = _mm_mul_ps(vResult,M.r[2]);
		8926	vResult = _mm_add_ps(vResult,M.r[3]);
		8927	Y = _mm_mul_ps(Y,M.r[1]);
		8928	vResult = _mm_add_ps(vResult,Y);
		8929	X = _mm_mul_ps(X,M.r[0]);
		8930	vResult = _mm_add_ps(vResult,X);
		8931
		8932	X = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,3,3,3));
		8933	vResult = _mm_div_ps(vResult,X);
		8934	_mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->x,vResult);
		8935	vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,3,2,1));
		8936	_mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->y,vResult);
		8937	vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,3,2,1));
		8938	_mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->z,vResult);
		8939	pInputVector += InputStride;
		8940	pOutputVector += OutputStride;
		8941	}
		8942
		8943	return pOutputStream;
		8944	#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
		8945	#endif // _XM_VMX128_INTRINSICS_
		8946	}
		8947
		8948	//------------------------------------------------------------------------------
		8949
		8950	XMFINLINE XMVECTOR XMVector3TransformNormal
		8951	(
		8952	FXMVECTOR V,
		8953	CXMMATRIX M
		8954	)
		8955	{
		8956	#if defined(_XM_NO_INTRINSICS_)
		8957
		8958	XMVECTOR X;
		8959	XMVECTOR Y;
		8960	XMVECTOR Z;
		8961	XMVECTOR Result;
		8962
		8963	Z = XMVectorSplatZ(V);
		8964	Y = XMVectorSplatY(V);
		8965	X = XMVectorSplatX(V);
		8966
		8967	Result = XMVectorMultiply(Z, M.r[2]);
		8968	Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
		8969	Result = XMVectorMultiplyAdd(X, M.r[0], Result);
		8970
		8971	return Result;
		8972
		8973	#elif defined(_XM_SSE_INTRINSICS_)
		8974	XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0));
		8975	vResult = _mm_mul_ps(vResult,M.r[0]);
		8976	XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
		8977	vTemp = _mm_mul_ps(vTemp,M.r[1]);
		8978	vResult = _mm_add_ps(vResult,vTemp);
		8979	vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2));
		8980	vTemp = _mm_mul_ps(vTemp,M.r[2]);
		8981	vResult = _mm_add_ps(vResult,vTemp);
		8982	return vResult;
		8983	#else // _XM_VMX128_INTRINSICS_
		8984	#endif // _XM_VMX128_INTRINSICS_
		8985	}
		8986
		8987	//------------------------------------------------------------------------------
		8988
		8989	XMINLINE XMFLOAT3* XMVector3TransformNormalStream
		8990	(
		8991	XMFLOAT3* pOutputStream,
		8992	UINT OutputStride,
		8993	CONST XMFLOAT3* pInputStream,
		8994	UINT InputStride,
		8995	UINT VectorCount,
		8996	CXMMATRIX M
		8997	)
		8998	{
		8999	#if defined(_XM_NO_INTRINSICS_)
		9000
		9001	XMVECTOR V;
		9002	XMVECTOR X;
		9003	XMVECTOR Y;
		9004	XMVECTOR Z;
		9005	XMVECTOR Result;
		9006	UINT i;
		9007	BYTE* pInputVector = (BYTE*)pInputStream;
		9008	BYTE* pOutputVector = (BYTE*)pOutputStream;
		9009
		9010	XMASSERT(pOutputStream);
		9011	XMASSERT(pInputStream);
		9012
		9013	for (i = 0; i < VectorCount; i++)
		9014	{
		9015	V = XMLoadFloat3((XMFLOAT3*)pInputVector);
		9016	Z = XMVectorSplatZ(V);
		9017	Y = XMVectorSplatY(V);
		9018	X = XMVectorSplatX(V);
		9019	// Z = XMVectorReplicate(((XMFLOAT3*)pInputVector)->z);
		9020	// Y = XMVectorReplicate(((XMFLOAT3*)pInputVector)->y);
		9021	// X = XMVectorReplicate(((XMFLOAT3*)pInputVector)->x);
		9022
		9023	Result = XMVectorMultiply(Z, M.r[2]);
		9024	Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
		9025	Result = XMVectorMultiplyAdd(X, M.r[0], Result);
		9026
		9027	XMStoreFloat3((XMFLOAT3*)pOutputVector, Result);
		9028
		9029	pInputVector += InputStride;
		9030	pOutputVector += OutputStride;
		9031	}
		9032
		9033	return pOutputStream;
		9034
		9035	#elif defined(_XM_SSE_INTRINSICS_)
		9036	XMASSERT(pOutputStream);
		9037	XMASSERT(pInputStream);
		9038
		9039	UINT i;
		9040	const BYTE pInputVector = (BYTE)pInputStream;
		9041	BYTE pOutputVector = (BYTE)pOutputStream;
		9042
		9043	for (i = 0; i < VectorCount; i++)
		9044	{
		9045	XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->x);
		9046	XMVECTOR Y = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->y);
		9047	XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->z);
		9048	vResult = _mm_mul_ps(vResult,M.r[2]);
		9049	Y = _mm_mul_ps(Y,M.r[1]);
		9050	vResult = _mm_add_ps(vResult,Y);
		9051	X = _mm_mul_ps(X,M.r[0]);
		9052	vResult = _mm_add_ps(vResult,X);
		9053	_mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->x,vResult);
		9054	vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,3,2,1));
		9055	_mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->y,vResult);
		9056	vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,3,2,1));
		9057	_mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->z,vResult);
		9058	pInputVector += InputStride;
		9059	pOutputVector += OutputStride;
		9060	}
		9061
		9062	return pOutputStream;
		9063	#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
		9064	#endif // _XM_VMX128_INTRINSICS_
		9065	}
		9066
		9067	//------------------------------------------------------------------------------
		9068
		9069	XMINLINE XMVECTOR XMVector3Project
		9070	(
		9071	FXMVECTOR V,
		9072	FLOAT ViewportX,
		9073	FLOAT ViewportY,
		9074	FLOAT ViewportWidth,
		9075	FLOAT ViewportHeight,
		9076	FLOAT ViewportMinZ,
		9077	FLOAT ViewportMaxZ,
		9078	CXMMATRIX Projection,
		9079	CXMMATRIX View,
		9080	CXMMATRIX World
		9081	)
		9082	{
		9083	#if defined(_XM_NO_INTRINSICS_)
		9084
		9085	XMMATRIX Transform;
		9086	XMVECTOR Scale;
		9087	XMVECTOR Offset;
		9088	XMVECTOR Result;
		9089	FLOAT HalfViewportWidth = ViewportWidth * 0.5f;
		9090	FLOAT HalfViewportHeight = ViewportHeight * 0.5f;
		9091
		9092	Scale = XMVectorSet(HalfViewportWidth,
		9093	-HalfViewportHeight,
		9094	ViewportMaxZ - ViewportMinZ,
		9095	0.0f);
		9096
		9097	Offset = XMVectorSet(ViewportX + HalfViewportWidth,
		9098	ViewportY + HalfViewportHeight,
		9099	ViewportMinZ,
		9100	0.0f);
		9101
		9102	Transform = XMMatrixMultiply(World, View);
		9103	Transform = XMMatrixMultiply(Transform, Projection);
		9104
		9105	Result = XMVector3TransformCoord(V, Transform);
		9106
		9107	Result = XMVectorMultiplyAdd(Result, Scale, Offset);
		9108
		9109	return Result;
		9110
		9111	#elif defined(_XM_SSE_INTRINSICS_)
		9112	XMMATRIX Transform;
		9113	XMVECTOR Scale;
		9114	XMVECTOR Offset;
		9115	XMVECTOR Result;
		9116	FLOAT HalfViewportWidth = ViewportWidth * 0.5f;
		9117	FLOAT HalfViewportHeight = ViewportHeight * 0.5f;
		9118
		9119	Scale = XMVectorSet(HalfViewportWidth,
		9120	-HalfViewportHeight,
		9121	ViewportMaxZ - ViewportMinZ,
		9122	0.0f);
		9123
		9124	Offset = XMVectorSet(ViewportX + HalfViewportWidth,
		9125	ViewportY + HalfViewportHeight,
		9126	ViewportMinZ,
		9127	0.0f);
		9128	Transform = XMMatrixMultiply(World, View);
		9129	Transform = XMMatrixMultiply(Transform, Projection);
		9130	Result = XMVector3TransformCoord(V, Transform);
		9131	Result = _mm_mul_ps(Result,Scale);
		9132	Result = _mm_add_ps(Result,Offset);
		9133	return Result;
		9134	#else // _XM_VMX128_INTRINSICS_
		9135	#endif // _XM_VMX128_INTRINSICS_
		9136	}
		9137
		9138	//------------------------------------------------------------------------------
		9139
		9140	XMINLINE XMFLOAT3* XMVector3ProjectStream
		9141	(
		9142	XMFLOAT3* pOutputStream,
		9143	UINT OutputStride,
		9144	CONST XMFLOAT3* pInputStream,
		9145	UINT InputStride,
		9146	UINT VectorCount,
		9147	FLOAT ViewportX,
		9148	FLOAT ViewportY,
		9149	FLOAT ViewportWidth,
		9150	FLOAT ViewportHeight,
		9151	FLOAT ViewportMinZ,
		9152	FLOAT ViewportMaxZ,
		9153	CXMMATRIX Projection,
		9154	CXMMATRIX View,
		9155	CXMMATRIX World
		9156	)
		9157	{
		9158	#if defined(_XM_NO_INTRINSICS_)
		9159
		9160	XMMATRIX Transform;
		9161	XMVECTOR V;
		9162	XMVECTOR Scale;
		9163	XMVECTOR Offset;
		9164	XMVECTOR Result;
		9165	UINT i;
		9166	FLOAT HalfViewportWidth = ViewportWidth * 0.5f;
		9167	FLOAT HalfViewportHeight = ViewportHeight * 0.5f;
		9168	BYTE* pInputVector = (BYTE*)pInputStream;
		9169	BYTE* pOutputVector = (BYTE*)pOutputStream;
		9170
		9171	XMASSERT(pOutputStream);
		9172	XMASSERT(pInputStream);
		9173
		9174	Scale = XMVectorSet(HalfViewportWidth,
		9175	-HalfViewportHeight,
		9176	ViewportMaxZ - ViewportMinZ,
		9177	1.0f);
		9178
		9179	Offset = XMVectorSet(ViewportX + HalfViewportWidth,
		9180	ViewportY + HalfViewportHeight,
		9181	ViewportMinZ,
		9182	0.0f);
		9183
		9184	Transform = XMMatrixMultiply(World, View);
		9185	Transform = XMMatrixMultiply(Transform, Projection);
		9186
		9187	for (i = 0; i < VectorCount; i++)
		9188	{
		9189	V = XMLoadFloat3((XMFLOAT3*)pInputVector);
		9190
		9191	Result = XMVector3TransformCoord(V, Transform);
		9192
		9193	Result = XMVectorMultiplyAdd(Result, Scale, Offset);
		9194
		9195	XMStoreFloat3((XMFLOAT3*)pOutputVector, Result);
		9196
		9197	pInputVector += InputStride;
		9198	pOutputVector += OutputStride;
		9199	}
		9200
		9201	return pOutputStream;
		9202
		9203	#elif defined(_XM_SSE_INTRINSICS_)
		9204	XMASSERT(pOutputStream);
		9205	XMASSERT(pInputStream);
		9206	XMMATRIX Transform;
		9207	XMVECTOR V;
		9208	XMVECTOR Scale;
		9209	XMVECTOR Offset;
		9210	XMVECTOR Result;
		9211	UINT i;
		9212	FLOAT HalfViewportWidth = ViewportWidth * 0.5f;
		9213	FLOAT HalfViewportHeight = ViewportHeight * 0.5f;
		9214	BYTE* pInputVector = (BYTE*)pInputStream;
		9215	BYTE* pOutputVector = (BYTE*)pOutputStream;
		9216
		9217	Scale = XMVectorSet(HalfViewportWidth,
		9218	-HalfViewportHeight,
		9219	ViewportMaxZ - ViewportMinZ,
		9220	1.0f);
		9221
		9222	Offset = XMVectorSet(ViewportX + HalfViewportWidth,
		9223	ViewportY + HalfViewportHeight,
		9224	ViewportMinZ,
		9225	0.0f);
		9226
		9227	Transform = XMMatrixMultiply(World, View);
		9228	Transform = XMMatrixMultiply(Transform, Projection);
		9229
		9230	for (i = 0; i < VectorCount; i++)
		9231	{
		9232	V = XMLoadFloat3((XMFLOAT3*)pInputVector);
		9233
		9234	Result = XMVector3TransformCoord(V, Transform);
		9235
		9236	Result = _mm_mul_ps(Result,Scale);
		9237	Result = _mm_add_ps(Result,Offset);
		9238	XMStoreFloat3((XMFLOAT3*)pOutputVector, Result);
		9239	pInputVector += InputStride;
		9240	pOutputVector += OutputStride;
		9241	}
		9242	return pOutputStream;
		9243
		9244	#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
		9245	#endif // _XM_VMX128_INTRINSICS_
		9246	}
		9247
		9248	//------------------------------------------------------------------------------
		9249
		9250	XMFINLINE XMVECTOR XMVector3Unproject
		9251	(
		9252	FXMVECTOR V,
		9253	FLOAT ViewportX,
		9254	FLOAT ViewportY,
		9255	FLOAT ViewportWidth,
		9256	FLOAT ViewportHeight,
		9257	FLOAT ViewportMinZ,
		9258	FLOAT ViewportMaxZ,
		9259	CXMMATRIX Projection,
		9260	CXMMATRIX View,
		9261	CXMMATRIX World
		9262	)
		9263	{
		9264	#if defined(_XM_NO_INTRINSICS_)
		9265
		9266	XMMATRIX Transform;
		9267	XMVECTOR Scale;
		9268	XMVECTOR Offset;
		9269	XMVECTOR Determinant;
		9270	XMVECTOR Result;
		9271	CONST XMVECTOR D = XMVectorSet(-1.0f, 1.0f, 0.0f, 0.0f);
		9272
		9273	Scale = XMVectorSet(ViewportWidth * 0.5f,
		9274	-ViewportHeight * 0.5f,
		9275	ViewportMaxZ - ViewportMinZ,
		9276	1.0f);
		9277	Scale = XMVectorReciprocal(Scale);
		9278
		9279	Offset = XMVectorSet(-ViewportX,
		9280	-ViewportY,
		9281	-ViewportMinZ,
		9282	0.0f);
		9283	Offset = XMVectorMultiplyAdd(Scale, Offset, D);
		9284
		9285	Transform = XMMatrixMultiply(World, View);
		9286	Transform = XMMatrixMultiply(Transform, Projection);
		9287	Transform = XMMatrixInverse(&Determinant, Transform);
		9288
		9289	Result = XMVectorMultiplyAdd(V, Scale, Offset);
		9290
		9291	Result = XMVector3TransformCoord(Result, Transform);
		9292
		9293	return Result;
		9294
		9295	#elif defined(_XM_SSE_INTRINSICS_)
		9296	XMMATRIX Transform;
		9297	XMVECTOR Scale;
		9298	XMVECTOR Offset;
		9299	XMVECTOR Determinant;
		9300	XMVECTOR Result;
		9301	CONST XMVECTORF32 D = {-1.0f, 1.0f, 0.0f, 0.0f};
		9302
		9303	Scale = XMVectorSet(ViewportWidth * 0.5f,
		9304	-ViewportHeight * 0.5f,
		9305	ViewportMaxZ - ViewportMinZ,
		9306	1.0f);
		9307	Scale = XMVectorReciprocal(Scale);
		9308
		9309	Offset = XMVectorSet(-ViewportX,
		9310	-ViewportY,
		9311	-ViewportMinZ,
		9312	0.0f);
		9313	Offset = _mm_mul_ps(Offset,Scale);
		9314	Offset = _mm_add_ps(Offset,D);
		9315
		9316	Transform = XMMatrixMultiply(World, View);
		9317	Transform = XMMatrixMultiply(Transform, Projection);
		9318	Transform = XMMatrixInverse(&Determinant, Transform);
		9319
		9320	Result = _mm_mul_ps(V,Scale);
		9321	Result = _mm_add_ps(Result,Offset);
		9322
		9323	Result = XMVector3TransformCoord(Result, Transform);
		9324
		9325	return Result;
		9326	#else // _XM_VMX128_INTRINSICS_
		9327	#endif // _XM_VMX128_INTRINSICS_
		9328	}
		9329
		9330	//------------------------------------------------------------------------------
		9331
		9332	XMINLINE XMFLOAT3* XMVector3UnprojectStream
		9333	(
		9334	XMFLOAT3* pOutputStream,
		9335	UINT OutputStride,
		9336	CONST XMFLOAT3* pInputStream,
		9337	UINT InputStride,
		9338	UINT VectorCount,
		9339	FLOAT ViewportX,
		9340	FLOAT ViewportY,
		9341	FLOAT ViewportWidth,
		9342	FLOAT ViewportHeight,
		9343	FLOAT ViewportMinZ,
		9344	FLOAT ViewportMaxZ,
		9345	CXMMATRIX Projection,
		9346	CXMMATRIX View,
		9347	CXMMATRIX World)
		9348	{
		9349	#if defined(_XM_NO_INTRINSICS_)
		9350
		9351	XMMATRIX Transform;
		9352	XMVECTOR Scale;
		9353	XMVECTOR Offset;
		9354	XMVECTOR V;
		9355	XMVECTOR Determinant;
		9356	XMVECTOR Result;
		9357	UINT i;
		9358	BYTE* pInputVector = (BYTE*)pInputStream;
		9359	BYTE* pOutputVector = (BYTE*)pOutputStream;
		9360	CONST XMVECTOR D = XMVectorSet(-1.0f, 1.0f, 0.0f, 0.0f);
		9361
		9362	XMASSERT(pOutputStream);
		9363	XMASSERT(pInputStream);
		9364
		9365	Scale = XMVectorSet(ViewportWidth * 0.5f,
		9366	-ViewportHeight * 0.5f,
		9367	ViewportMaxZ - ViewportMinZ,
		9368	1.0f);
		9369	Scale = XMVectorReciprocal(Scale);
		9370
		9371	Offset = XMVectorSet(-ViewportX,
		9372	-ViewportY,
		9373	-ViewportMinZ,
		9374	0.0f);
		9375	Offset = XMVectorMultiplyAdd(Scale, Offset, D);
		9376
		9377	Transform = XMMatrixMultiply(World, View);
		9378	Transform = XMMatrixMultiply(Transform, Projection);
		9379	Transform = XMMatrixInverse(&Determinant, Transform);
		9380
		9381	for (i = 0; i < VectorCount; i++)
		9382	{
		9383	V = XMLoadFloat3((XMFLOAT3*)pInputVector);
		9384
		9385	Result = XMVectorMultiplyAdd(V, Scale, Offset);
		9386
		9387	Result = XMVector3TransformCoord(Result, Transform);
		9388
		9389	XMStoreFloat3((XMFLOAT3*)pOutputVector, Result);
		9390
		9391	pInputVector += InputStride;
		9392	pOutputVector += OutputStride;
		9393	}
		9394
		9395	return pOutputStream;
		9396
		9397	#elif defined(_XM_SSE_INTRINSICS_)
		9398	XMASSERT(pOutputStream);
		9399	XMASSERT(pInputStream);
		9400	XMMATRIX Transform;
		9401	XMVECTOR Scale;
		9402	XMVECTOR Offset;
		9403	XMVECTOR V;
		9404	XMVECTOR Determinant;
		9405	XMVECTOR Result;
		9406	UINT i;
		9407	BYTE* pInputVector = (BYTE*)pInputStream;
		9408	BYTE* pOutputVector = (BYTE*)pOutputStream;
		9409	CONST XMVECTORF32 D = {-1.0f, 1.0f, 0.0f, 0.0f};
		9410
		9411	Scale = XMVectorSet(ViewportWidth * 0.5f,
		9412	-ViewportHeight * 0.5f,
		9413	ViewportMaxZ - ViewportMinZ,
		9414	1.0f);
		9415	Scale = XMVectorReciprocal(Scale);
		9416
		9417	Offset = XMVectorSet(-ViewportX,
		9418	-ViewportY,
		9419	-ViewportMinZ,
		9420	0.0f);
		9421	Offset = _mm_mul_ps(Offset,Scale);
		9422	Offset = _mm_add_ps(Offset,D);
		9423
		9424	Transform = XMMatrixMultiply(World, View);
		9425	Transform = XMMatrixMultiply(Transform, Projection);
		9426	Transform = XMMatrixInverse(&Determinant, Transform);
		9427
		9428	for (i = 0; i < VectorCount; i++)
		9429	{
		9430	V = XMLoadFloat3((XMFLOAT3*)pInputVector);
		9431
		9432	Result = XMVectorMultiplyAdd(V, Scale, Offset);
		9433
		9434	Result = XMVector3TransformCoord(Result, Transform);
		9435
		9436	XMStoreFloat3((XMFLOAT3*)pOutputVector, Result);
		9437
		9438	pInputVector += InputStride;
		9439	pOutputVector += OutputStride;
		9440	}
		9441
		9442	return pOutputStream;
		9443	#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
		9444	#endif // _XM_VMX128_INTRINSICS_
		9445	}
		9446
		9447	/****************************************************************************
		9448	*
		9449	* 4D Vector
		9450	*
		9451	****************************************************************************/
		9452
		9453	//------------------------------------------------------------------------------
		9454	// Comparison operations
		9455	//------------------------------------------------------------------------------
		9456
		9457	//------------------------------------------------------------------------------
		9458
		9459	XMFINLINE BOOL XMVector4Equal
		9460	(
		9461	FXMVECTOR V1,
		9462	FXMVECTOR V2
		9463	)
		9464	{
		9465	#if defined(_XM_NO_INTRINSICS_)
		9466	return (((V1.vector4_f32[0] == V2.vector4_f32[0]) && (V1.vector4_f32[1] == V2.vector4_f32[1]) && (V1.vector4_f32[2] == V2.vector4_f32[2]) && (V1.vector4_f32[3] == V2.vector4_f32[3])) != 0);
		9467	#elif defined(_XM_SSE_INTRINSICS_)
		9468	XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
		9469	return ((_mm_movemask_ps(vTemp)==0x0f) != 0);
		9470	#else
		9471	return XMComparisonAllTrue(XMVector4EqualR(V1, V2));
		9472	#endif
		9473	}
		9474
		9475	//------------------------------------------------------------------------------
		9476
		9477	XMFINLINE UINT XMVector4EqualR
		9478	(
		9479	FXMVECTOR V1,
		9480	FXMVECTOR V2
		9481	)
		9482	{
		9483	#if defined(_XM_NO_INTRINSICS_)
		9484
		9485	UINT CR = 0;
		9486
		9487	if ((V1.vector4_f32[0] == V2.vector4_f32[0]) &&
		9488	(V1.vector4_f32[1] == V2.vector4_f32[1]) &&
		9489	(V1.vector4_f32[2] == V2.vector4_f32[2]) &&
		9490	(V1.vector4_f32[3] == V2.vector4_f32[3]))
		9491	{
		9492	CR = XM_CRMASK_CR6TRUE;
		9493	}
		9494	else if ((V1.vector4_f32[0] != V2.vector4_f32[0]) &&
		9495	(V1.vector4_f32[1] != V2.vector4_f32[1]) &&
		9496	(V1.vector4_f32[2] != V2.vector4_f32[2]) &&
		9497	(V1.vector4_f32[3] != V2.vector4_f32[3]))
		9498	{
		9499	CR = XM_CRMASK_CR6FALSE;
		9500	}
		9501	return CR;
		9502
		9503	#elif defined(_XM_SSE_INTRINSICS_)
		9504	XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
		9505	int iTest = _mm_movemask_ps(vTemp);
		9506	UINT CR = 0;
		9507	if (iTest==0xf) // All equal?
		9508	{
		9509	CR = XM_CRMASK_CR6TRUE;
		9510	}
		9511	else if (iTest==0) // All not equal?
		9512	{
		9513	CR = XM_CRMASK_CR6FALSE;
		9514	}
		9515	return CR;
		9516	#else // _XM_VMX128_INTRINSICS_
		9517	#endif // _XM_VMX128_INTRINSICS_
		9518	}
		9519
		9520	//------------------------------------------------------------------------------
		9521
		9522	XMFINLINE BOOL XMVector4EqualInt
		9523	(
		9524	FXMVECTOR V1,
		9525	FXMVECTOR V2
		9526	)
		9527	{
		9528	#if defined(_XM_NO_INTRINSICS_)
		9529	return (((V1.vector4_u32[0] == V2.vector4_u32[0]) && (V1.vector4_u32[1] == V2.vector4_u32[1]) && (V1.vector4_u32[2] == V2.vector4_u32[2]) && (V1.vector4_u32[3] == V2.vector4_u32[3])) != 0);
		9530	#elif defined(_XM_SSE_INTRINSICS_)
		9531	__m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i >(&V1)[0],reinterpret_cast<const __m128i >(&V2)[0]);
		9532	return ((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])==0xf) != 0);
		9533	#else
		9534	return XMComparisonAllTrue(XMVector4EqualIntR(V1, V2));
		9535	#endif
		9536	}
		9537
		9538	//------------------------------------------------------------------------------
		9539
		9540	XMFINLINE UINT XMVector4EqualIntR
		9541	(
		9542	FXMVECTOR V1,
		9543	FXMVECTOR V2
		9544	)
		9545	{
		9546	#if defined(_XM_NO_INTRINSICS_)
		9547	UINT CR = 0;
		9548	if (V1.vector4_u32[0] == V2.vector4_u32[0] &&
		9549	V1.vector4_u32[1] == V2.vector4_u32[1] &&
		9550	V1.vector4_u32[2] == V2.vector4_u32[2] &&
		9551	V1.vector4_u32[3] == V2.vector4_u32[3])
		9552	{
		9553	CR = XM_CRMASK_CR6TRUE;
		9554	}
		9555	else if (V1.vector4_u32[0] != V2.vector4_u32[0] &&
		9556	V1.vector4_u32[1] != V2.vector4_u32[1] &&
		9557	V1.vector4_u32[2] != V2.vector4_u32[2] &&
		9558	V1.vector4_u32[3] != V2.vector4_u32[3])
		9559	{
		9560	CR = XM_CRMASK_CR6FALSE;
		9561	}
		9562	return CR;
		9563
		9564	#elif defined(_XM_SSE_INTRINSICS_)
		9565	__m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i >(&V1)[0],reinterpret_cast<const __m128i >(&V2)[0]);
		9566	int iTest = _mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0]);
		9567	UINT CR = 0;
		9568	if (iTest==0xf) // All equal?
		9569	{
		9570	CR = XM_CRMASK_CR6TRUE;
		9571	}
		9572	else if (iTest==0) // All not equal?
		9573	{
		9574	CR = XM_CRMASK_CR6FALSE;
		9575	}
		9576	return CR;
		9577	#else // _XM_VMX128_INTRINSICS_
		9578	#endif // _XM_VMX128_INTRINSICS_
		9579	}
		9580
		9581	XMFINLINE BOOL XMVector4NearEqual
		9582	(
		9583	FXMVECTOR V1,
		9584	FXMVECTOR V2,
		9585	FXMVECTOR Epsilon
		9586	)
		9587	{
		9588	#if defined(_XM_NO_INTRINSICS_)
		9589	FLOAT dx, dy, dz, dw;
		9590
		9591	dx = fabsf(V1.vector4_f32[0]-V2.vector4_f32[0]);
		9592	dy = fabsf(V1.vector4_f32[1]-V2.vector4_f32[1]);
		9593	dz = fabsf(V1.vector4_f32[2]-V2.vector4_f32[2]);
		9594	dw = fabsf(V1.vector4_f32[3]-V2.vector4_f32[3]);
		9595	return (((dx <= Epsilon.vector4_f32[0]) &&
		9596	(dy <= Epsilon.vector4_f32[1]) &&
		9597	(dz <= Epsilon.vector4_f32[2]) &&
		9598	(dw <= Epsilon.vector4_f32[3])) != 0);
		9599	#elif defined(_XM_SSE_INTRINSICS_)
		9600	// Get the difference
		9601	XMVECTOR vDelta = _mm_sub_ps(V1,V2);
		9602	// Get the absolute value of the difference
		9603	XMVECTOR vTemp = _mm_setzero_ps();
		9604	vTemp = _mm_sub_ps(vTemp,vDelta);
		9605	vTemp = _mm_max_ps(vTemp,vDelta);
		9606	vTemp = _mm_cmple_ps(vTemp,Epsilon);
		9607	return ((_mm_movemask_ps(vTemp)==0xf) != 0);
		9608	#else // _XM_VMX128_INTRINSICS_
		9609	#endif // _XM_VMX128_INTRINSICS_
		9610	}
		9611
		9612	//------------------------------------------------------------------------------
		9613
		9614	XMFINLINE BOOL XMVector4NotEqual
		9615	(
		9616	FXMVECTOR V1,
		9617	FXMVECTOR V2
		9618	)
		9619	{
		9620	#if defined(_XM_NO_INTRINSICS_)
		9621	return (((V1.vector4_f32[0] != V2.vector4_f32[0]) \|\| (V1.vector4_f32[1] != V2.vector4_f32[1]) \|\| (V1.vector4_f32[2] != V2.vector4_f32[2]) \|\| (V1.vector4_f32[3] != V2.vector4_f32[3])) != 0);
		9622	#elif defined(_XM_SSE_INTRINSICS_)
		9623	XMVECTOR vTemp = _mm_cmpneq_ps(V1,V2);
		9624	return ((_mm_movemask_ps(vTemp)) != 0);
		9625	#else
		9626	return XMComparisonAnyFalse(XMVector4EqualR(V1, V2));
		9627	#endif
		9628	}
		9629
		9630	//------------------------------------------------------------------------------
		9631
		9632	XMFINLINE BOOL XMVector4NotEqualInt
		9633	(
		9634	FXMVECTOR V1,
		9635	FXMVECTOR V2
		9636	)
		9637	{
		9638	#if defined(_XM_NO_INTRINSICS_)
		9639	return (((V1.vector4_u32[0] != V2.vector4_u32[0]) \|\| (V1.vector4_u32[1] != V2.vector4_u32[1]) \|\| (V1.vector4_u32[2] != V2.vector4_u32[2]) \|\| (V1.vector4_u32[3] != V2.vector4_u32[3])) != 0);
		9640	#elif defined(_XM_SSE_INTRINSICS_)
		9641	__m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i >(&V1)[0],reinterpret_cast<const __m128i >(&V2)[0]);
		9642	return ((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])!=0xF) != 0);
		9643	#else
		9644	return XMComparisonAnyFalse(XMVector4EqualIntR(V1, V2));
		9645	#endif
		9646	}
		9647
		9648	//------------------------------------------------------------------------------
		9649
		9650	XMFINLINE BOOL XMVector4Greater
		9651	(
		9652	FXMVECTOR V1,
		9653	FXMVECTOR V2
		9654	)
		9655	{
		9656	#if defined(_XM_NO_INTRINSICS_)
		9657	return (((V1.vector4_f32[0] > V2.vector4_f32[0]) && (V1.vector4_f32[1] > V2.vector4_f32[1]) && (V1.vector4_f32[2] > V2.vector4_f32[2]) && (V1.vector4_f32[3] > V2.vector4_f32[3])) != 0);
		9658	#elif defined(_XM_SSE_INTRINSICS_)
		9659	XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
		9660	return ((_mm_movemask_ps(vTemp)==0x0f) != 0);
		9661	#else
		9662	return XMComparisonAllTrue(XMVector4GreaterR(V1, V2));
		9663	#endif
		9664	}
		9665
		9666	//------------------------------------------------------------------------------
		9667
		9668	XMFINLINE UINT XMVector4GreaterR
		9669	(
		9670	FXMVECTOR V1,
		9671	FXMVECTOR V2
		9672	)
		9673	{
		9674	#if defined(_XM_NO_INTRINSICS_)
		9675	UINT CR = 0;
		9676	if (V1.vector4_f32[0] > V2.vector4_f32[0] &&
		9677	V1.vector4_f32[1] > V2.vector4_f32[1] &&
		9678	V1.vector4_f32[2] > V2.vector4_f32[2] &&
		9679	V1.vector4_f32[3] > V2.vector4_f32[3])
		9680	{
		9681	CR = XM_CRMASK_CR6TRUE;
		9682	}
		9683	else if (V1.vector4_f32[0] <= V2.vector4_f32[0] &&
		9684	V1.vector4_f32[1] <= V2.vector4_f32[1] &&
		9685	V1.vector4_f32[2] <= V2.vector4_f32[2] &&
		9686	V1.vector4_f32[3] <= V2.vector4_f32[3])
		9687	{
		9688	CR = XM_CRMASK_CR6FALSE;
		9689	}
		9690	return CR;
		9691
		9692	#elif defined(_XM_SSE_INTRINSICS_)
		9693	UINT CR = 0;
		9694	XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
		9695	int iTest = _mm_movemask_ps(vTemp);
		9696	if (iTest==0xf) {
		9697	CR = XM_CRMASK_CR6TRUE;
		9698	}
		9699	else if (!iTest)
		9700	{
		9701	CR = XM_CRMASK_CR6FALSE;
		9702	}
		9703	return CR;
		9704	#else // _XM_VMX128_INTRINSICS_
		9705	#endif // _XM_VMX128_INTRINSICS_
		9706	}
		9707
		9708	//------------------------------------------------------------------------------
		9709
		9710	XMFINLINE BOOL XMVector4GreaterOrEqual
		9711	(
		9712	FXMVECTOR V1,
		9713	FXMVECTOR V2
		9714	)
		9715	{
		9716	#if defined(_XM_NO_INTRINSICS_)
		9717	return (((V1.vector4_f32[0] >= V2.vector4_f32[0]) && (V1.vector4_f32[1] >= V2.vector4_f32[1]) && (V1.vector4_f32[2] >= V2.vector4_f32[2]) && (V1.vector4_f32[3] >= V2.vector4_f32[3])) != 0);
		9718	#elif defined(_XM_SSE_INTRINSICS_)
		9719	XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
		9720	return ((_mm_movemask_ps(vTemp)==0x0f) != 0);
		9721	#else
		9722	return XMComparisonAllTrue(XMVector4GreaterOrEqualR(V1, V2));
		9723	#endif
		9724	}
		9725
		9726	//------------------------------------------------------------------------------
		9727
		9728	XMFINLINE UINT XMVector4GreaterOrEqualR
		9729	(
		9730	FXMVECTOR V1,
		9731	FXMVECTOR V2
		9732	)
		9733	{
		9734	#if defined(_XM_NO_INTRINSICS_)
		9735	UINT CR = 0;
		9736	if ((V1.vector4_f32[0] >= V2.vector4_f32[0]) &&
		9737	(V1.vector4_f32[1] >= V2.vector4_f32[1]) &&
		9738	(V1.vector4_f32[2] >= V2.vector4_f32[2]) &&
		9739	(V1.vector4_f32[3] >= V2.vector4_f32[3]))
		9740	{
		9741	CR = XM_CRMASK_CR6TRUE;
		9742	}
		9743	else if ((V1.vector4_f32[0] < V2.vector4_f32[0]) &&
		9744	(V1.vector4_f32[1] < V2.vector4_f32[1]) &&
		9745	(V1.vector4_f32[2] < V2.vector4_f32[2]) &&
		9746	(V1.vector4_f32[3] < V2.vector4_f32[3]))
		9747	{
		9748	CR = XM_CRMASK_CR6FALSE;
		9749	}
		9750	return CR;
		9751
		9752	#elif defined(_XM_SSE_INTRINSICS_)
		9753	UINT CR = 0;
		9754	XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
		9755	int iTest = _mm_movemask_ps(vTemp);
		9756	if (iTest==0x0f)
		9757	{
		9758	CR = XM_CRMASK_CR6TRUE;
		9759	}
		9760	else if (!iTest)
		9761	{
		9762	CR = XM_CRMASK_CR6FALSE;
		9763	}
		9764	return CR;
		9765	#else // _XM_VMX128_INTRINSICS_
		9766	#endif // _XM_VMX128_INTRINSICS_
		9767	}
		9768
		9769	//------------------------------------------------------------------------------
		9770
		9771	XMFINLINE BOOL XMVector4Less
		9772	(
		9773	FXMVECTOR V1,
		9774	FXMVECTOR V2
		9775	)
		9776	{
		9777	#if defined(_XM_NO_INTRINSICS_)
		9778	return (((V1.vector4_f32[0] < V2.vector4_f32[0]) && (V1.vector4_f32[1] < V2.vector4_f32[1]) && (V1.vector4_f32[2] < V2.vector4_f32[2]) && (V1.vector4_f32[3] < V2.vector4_f32[3])) != 0);
		9779	#elif defined(_XM_SSE_INTRINSICS_)
		9780	XMVECTOR vTemp = _mm_cmplt_ps(V1,V2);
		9781	return ((_mm_movemask_ps(vTemp)==0x0f) != 0);
		9782	#else
		9783	return XMComparisonAllTrue(XMVector4GreaterR(V2, V1));
		9784	#endif
		9785	}
		9786
		9787	//------------------------------------------------------------------------------
		9788
		9789	XMFINLINE BOOL XMVector4LessOrEqual
		9790	(
		9791	FXMVECTOR V1,
		9792	FXMVECTOR V2
		9793	)
		9794	{
		9795	#if defined(_XM_NO_INTRINSICS_)
		9796	return (((V1.vector4_f32[0] <= V2.vector4_f32[0]) && (V1.vector4_f32[1] <= V2.vector4_f32[1]) && (V1.vector4_f32[2] <= V2.vector4_f32[2]) && (V1.vector4_f32[3] <= V2.vector4_f32[3])) != 0);
		9797	#elif defined(_XM_SSE_INTRINSICS_)
		9798	XMVECTOR vTemp = _mm_cmple_ps(V1,V2);
		9799	return ((_mm_movemask_ps(vTemp)==0x0f) != 0);
		9800	#else
		9801	return XMComparisonAllTrue(XMVector4GreaterOrEqualR(V2, V1));
		9802	#endif
		9803	}
		9804
		9805	//------------------------------------------------------------------------------
		9806
		9807	XMFINLINE BOOL XMVector4InBounds
		9808	(
		9809	FXMVECTOR V,
		9810	FXMVECTOR Bounds
		9811	)
		9812	{
		9813	#if defined(_XM_NO_INTRINSICS_)
		9814	return (((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) &&
		9815	(V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) &&
		9816	(V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2]) &&
		9817	(V.vector4_f32[3] <= Bounds.vector4_f32[3] && V.vector4_f32[3] >= -Bounds.vector4_f32[3])) != 0);
		9818	#elif defined(_XM_SSE_INTRINSICS_)
		9819	// Test if less than or equal
		9820	XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
		9821	// Negate the bounds
		9822	XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
		9823	// Test if greater or equal (Reversed)
		9824	vTemp2 = _mm_cmple_ps(vTemp2,V);
		9825	// Blend answers
		9826	vTemp1 = _mm_and_ps(vTemp1,vTemp2);
		9827	// All in bounds?
		9828	return ((_mm_movemask_ps(vTemp1)==0x0f) != 0);
		9829	#else
		9830	return XMComparisonAllInBounds(XMVector4InBoundsR(V, Bounds));
		9831	#endif
		9832	}
		9833
		9834	//------------------------------------------------------------------------------
		9835
		9836	XMFINLINE UINT XMVector4InBoundsR
		9837	(
		9838	FXMVECTOR V,
		9839	FXMVECTOR Bounds
		9840	)
		9841	{
		9842	#if defined(_XM_NO_INTRINSICS_)
		9843
		9844	UINT CR = 0;
		9845	if ((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) &&
		9846	(V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) &&
		9847	(V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2]) &&
		9848	(V.vector4_f32[3] <= Bounds.vector4_f32[3] && V.vector4_f32[3] >= -Bounds.vector4_f32[3]))
		9849	{
		9850	CR = XM_CRMASK_CR6BOUNDS;
		9851	}
		9852	return CR;
		9853
		9854	#elif defined(_XM_SSE_INTRINSICS_)
		9855	// Test if less than or equal
		9856	XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
		9857	// Negate the bounds
		9858	XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
		9859	// Test if greater or equal (Reversed)
		9860	vTemp2 = _mm_cmple_ps(vTemp2,V);
		9861	// Blend answers
		9862	vTemp1 = _mm_and_ps(vTemp1,vTemp2);
		9863	// All in bounds?
		9864	return (_mm_movemask_ps(vTemp1)==0x0f) ? XM_CRMASK_CR6BOUNDS : 0;
		9865	#else // _XM_VMX128_INTRINSICS_
		9866	#endif // _XM_VMX128_INTRINSICS_
		9867	}
		9868
		9869	//------------------------------------------------------------------------------
		9870
		9871	XMFINLINE BOOL XMVector4IsNaN
		9872	(
		9873	FXMVECTOR V
		9874	)
		9875	{
		9876	#if defined(_XM_NO_INTRINSICS_)
		9877	return (XMISNAN(V.vector4_f32[0]) \|\|
		9878	XMISNAN(V.vector4_f32[1]) \|\|
		9879	XMISNAN(V.vector4_f32[2]) \|\|
		9880	XMISNAN(V.vector4_f32[3]));
		9881	#elif defined(_XM_SSE_INTRINSICS_)
		9882	// Test against itself. NaN is always not equal
		9883	XMVECTOR vTempNan = _mm_cmpneq_ps(V,V);
		9884	// If any are NaN, the mask is non-zero
		9885	return (_mm_movemask_ps(vTempNan)!=0);
		9886	#else // _XM_VMX128_INTRINSICS_
		9887	#endif // _XM_VMX128_INTRINSICS_
		9888	}
		9889
		9890	//------------------------------------------------------------------------------
		9891
		9892	XMFINLINE BOOL XMVector4IsInfinite
		9893	(
		9894	FXMVECTOR V
		9895	)
		9896	{
		9897	#if defined(_XM_NO_INTRINSICS_)
		9898
		9899	return (XMISINF(V.vector4_f32[0]) \|\|
		9900	XMISINF(V.vector4_f32[1]) \|\|
		9901	XMISINF(V.vector4_f32[2]) \|\|
		9902	XMISINF(V.vector4_f32[3]));
		9903
		9904	#elif defined(_XM_SSE_INTRINSICS_)
		9905	// Mask off the sign bit
		9906	XMVECTOR vTemp = _mm_and_ps(V,g_XMAbsMask);
		9907	// Compare to infinity
		9908	vTemp = _mm_cmpeq_ps(vTemp,g_XMInfinity);
		9909	// If any are infinity, the signs are true.
		9910	return (_mm_movemask_ps(vTemp) != 0);
		9911	#else // _XM_VMX128_INTRINSICS_
		9912	#endif // _XM_VMX128_INTRINSICS_
		9913	}
		9914
		9915	//------------------------------------------------------------------------------
		9916	// Computation operations
		9917	//------------------------------------------------------------------------------
		9918
		9919	//------------------------------------------------------------------------------
		9920
		9921	XMFINLINE XMVECTOR XMVector4Dot
		9922	(
		9923	FXMVECTOR V1,
		9924	FXMVECTOR V2
		9925	)
		9926	{
		9927	#if defined(_XM_NO_INTRINSICS_)
		9928
		9929	XMVECTOR Result;
		9930
		9931	Result.vector4_f32[0] =
		9932	Result.vector4_f32[1] =
		9933	Result.vector4_f32[2] =
		9934	Result.vector4_f32[3] = V1.vector4_f32[0] * V2.vector4_f32[0] + V1.vector4_f32[1] * V2.vector4_f32[1] + V1.vector4_f32[2] * V2.vector4_f32[2] + V1.vector4_f32[3] * V2.vector4_f32[3];
		9935
		9936	return Result;
		9937
		9938	#elif defined(_XM_SSE_INTRINSICS_)
		9939	XMVECTOR vTemp2 = V2;
		9940	XMVECTOR vTemp = _mm_mul_ps(V1,vTemp2);
		9941	vTemp2 = _mm_shuffle_ps(vTemp2,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position
		9942	vTemp2 = _mm_add_ps(vTemp2,vTemp); // Add Z = X+Z; W = Y+W;
		9943	vTemp = _mm_shuffle_ps(vTemp,vTemp2,_MM_SHUFFLE(0,3,0,0)); // Copy W to the Z position
		9944	vTemp = _mm_add_ps(vTemp,vTemp2); // Add Z and W together
		9945	return _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2)); // Splat Z and return
		9946	#else // _XM_VMX128_INTRINSICS_
		9947	#endif // _XM_VMX128_INTRINSICS_
		9948	}
		9949
		9950	//------------------------------------------------------------------------------
		9951
		9952	XMFINLINE XMVECTOR XMVector4Cross
		9953	(
		9954	FXMVECTOR V1,
		9955	FXMVECTOR V2,
		9956	FXMVECTOR V3
		9957	)
		9958	{
		9959	#if defined(_XM_NO_INTRINSICS_)
		9960	XMVECTOR Result;
		9961
		9962	Result.vector4_f32[0] = (((V2.vector4_f32[2]V3.vector4_f32[3])-(V2.vector4_f32[3]V3.vector4_f32[2]))V1.vector4_f32[1])-(((V2.vector4_f32[1]V3.vector4_f32[3])-(V2.vector4_f32[3]V3.vector4_f32[1]))V1.vector4_f32[2])+(((V2.vector4_f32[1]V3.vector4_f32[2])-(V2.vector4_f32[2]V3.vector4_f32[1]))*V1.vector4_f32[3]);
		9963	Result.vector4_f32[1] = (((V2.vector4_f32[3]V3.vector4_f32[2])-(V2.vector4_f32[2]V3.vector4_f32[3]))V1.vector4_f32[0])-(((V2.vector4_f32[3]V3.vector4_f32[0])-(V2.vector4_f32[0]V3.vector4_f32[3]))V1.vector4_f32[2])+(((V2.vector4_f32[2]V3.vector4_f32[0])-(V2.vector4_f32[0]V3.vector4_f32[2]))*V1.vector4_f32[3]);
		9964	Result.vector4_f32[2] = (((V2.vector4_f32[1]V3.vector4_f32[3])-(V2.vector4_f32[3]V3.vector4_f32[1]))V1.vector4_f32[0])-(((V2.vector4_f32[0]V3.vector4_f32[3])-(V2.vector4_f32[3]V3.vector4_f32[0]))V1.vector4_f32[1])+(((V2.vector4_f32[0]V3.vector4_f32[1])-(V2.vector4_f32[1]V3.vector4_f32[0]))*V1.vector4_f32[3]);
		9965	Result.vector4_f32[3] = (((V2.vector4_f32[2]V3.vector4_f32[1])-(V2.vector4_f32[1]V3.vector4_f32[2]))V1.vector4_f32[0])-(((V2.vector4_f32[2]V3.vector4_f32[0])-(V2.vector4_f32[0]V3.vector4_f32[2]))V1.vector4_f32[1])+(((V2.vector4_f32[1]V3.vector4_f32[0])-(V2.vector4_f32[0]V3.vector4_f32[1]))*V1.vector4_f32[2]);
		9966	return Result;
		9967
		9968	#elif defined(_XM_SSE_INTRINSICS_)
		9969	// V2zwyz * V3wzwy
		9970	XMVECTOR vResult = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(2,1,3,2));
		9971	XMVECTOR vTemp3 = _mm_shuffle_ps(V3,V3,_MM_SHUFFLE(1,3,2,3));
		9972	vResult = _mm_mul_ps(vResult,vTemp3);
		9973	// - V2wzwy * V3zwyz
		9974	XMVECTOR vTemp2 = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(1,3,2,3));
		9975	vTemp3 = _mm_shuffle_ps(vTemp3,vTemp3,_MM_SHUFFLE(1,3,0,1));
		9976	vTemp2 = _mm_mul_ps(vTemp2,vTemp3);
		9977	vResult = _mm_sub_ps(vResult,vTemp2);
		9978	// term1 * V1yxxx
		9979	XMVECTOR vTemp1 = _mm_shuffle_ps(V1,V1,_MM_SHUFFLE(0,0,0,1));
		9980	vResult = _mm_mul_ps(vResult,vTemp1);
		9981
		9982	// V2ywxz * V3wxwx
		9983	vTemp2 = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(2,0,3,1));
		9984	vTemp3 = _mm_shuffle_ps(V3,V3,_MM_SHUFFLE(0,3,0,3));
		9985	vTemp3 = _mm_mul_ps(vTemp3,vTemp2);
		9986	// - V2wxwx * V3ywxz
		9987	vTemp2 = _mm_shuffle_ps(vTemp2,vTemp2,_MM_SHUFFLE(2,1,2,1));
		9988	vTemp1 = _mm_shuffle_ps(V3,V3,_MM_SHUFFLE(2,0,3,1));
		9989	vTemp2 = _mm_mul_ps(vTemp2,vTemp1);
		9990	vTemp3 = _mm_sub_ps(vTemp3,vTemp2);
		9991	// vResult - temp * V1zzyy
		9992	vTemp1 = _mm_shuffle_ps(V1,V1,_MM_SHUFFLE(1,1,2,2));
		9993	vTemp1 = _mm_mul_ps(vTemp1,vTemp3);
		9994	vResult = _mm_sub_ps(vResult,vTemp1);
		9995
		9996	// V2yzxy * V3zxyx
		9997	vTemp2 = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(1,0,2,1));
		9998	vTemp3 = _mm_shuffle_ps(V3,V3,_MM_SHUFFLE(0,1,0,2));
		9999	vTemp3 = _mm_mul_ps(vTemp3,vTemp2);
		10000	// - V2zxyx * V3yzxy
		10001	vTemp2 = _mm_shuffle_ps(vTemp2,vTemp2,_MM_SHUFFLE(2,0,2,1));
		10002	vTemp1 = _mm_shuffle_ps(V3,V3,_MM_SHUFFLE(1,0,2,1));
		10003	vTemp1 = _mm_mul_ps(vTemp1,vTemp2);
		10004	vTemp3 = _mm_sub_ps(vTemp3,vTemp1);
		10005	// vResult + term * V1wwwz
		10006	vTemp1 = _mm_shuffle_ps(V1,V1,_MM_SHUFFLE(2,3,3,3));
		10007	vTemp3 = _mm_mul_ps(vTemp3,vTemp1);
		10008	vResult = _mm_add_ps(vResult,vTemp3);
		10009	return vResult;
		10010	#else // _XM_VMX128_INTRINSICS_
		10011	#endif // _XM_VMX128_INTRINSICS_
		10012	}
		10013
		10014	//------------------------------------------------------------------------------
		10015
		10016	XMFINLINE XMVECTOR XMVector4LengthSq
		10017	(
		10018	FXMVECTOR V
		10019	)
		10020	{
		10021	return XMVector4Dot(V, V);
		10022	}
		10023
		10024	//------------------------------------------------------------------------------
		10025
		10026	XMFINLINE XMVECTOR XMVector4ReciprocalLengthEst
		10027	(
		10028	FXMVECTOR V
		10029	)
		10030	{
		10031	#if defined(_XM_NO_INTRINSICS_)
		10032
		10033	XMVECTOR Result;
		10034
		10035	Result = XMVector4LengthSq(V);
		10036	Result = XMVectorReciprocalSqrtEst(Result);
		10037
		10038	return Result;
		10039
		10040	#elif defined(_XM_SSE_INTRINSICS_)
		10041	// Perform the dot product on x,y,z and w
		10042	XMVECTOR vLengthSq = _mm_mul_ps(V,V);
		10043	// vTemp has z and w
		10044	XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2));
		10045	// x+z, y+w
		10046	vLengthSq = _mm_add_ps(vLengthSq,vTemp);
		10047	// x+z,x+z,x+z,y+w
		10048	vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0));
		10049	// ??,??,y+w,y+w
		10050	vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0));
		10051	// ??,??,x+z+y+w,??
		10052	vLengthSq = _mm_add_ps(vLengthSq,vTemp);
		10053	// Splat the length
		10054	vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2));
		10055	// Get the reciprocal
		10056	vLengthSq = _mm_rsqrt_ps(vLengthSq);
		10057	return vLengthSq;
		10058	#else // _XM_VMX128_INTRINSICS_
		10059	#endif // _XM_VMX128_INTRINSICS_
		10060	}
		10061
		10062	//------------------------------------------------------------------------------
		10063
		10064	XMFINLINE XMVECTOR XMVector4ReciprocalLength
		10065	(
		10066	FXMVECTOR V
		10067	)
		10068	{
		10069	#if defined(_XM_NO_INTRINSICS_)
		10070
		10071	XMVECTOR Result;
		10072
		10073	Result = XMVector4LengthSq(V);
		10074	Result = XMVectorReciprocalSqrt(Result);
		10075
		10076	return Result;
		10077
		10078	#elif defined(_XM_SSE_INTRINSICS_)
		10079	// Perform the dot product on x,y,z and w
		10080	XMVECTOR vLengthSq = _mm_mul_ps(V,V);
		10081	// vTemp has z and w
		10082	XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2));
		10083	// x+z, y+w
		10084	vLengthSq = _mm_add_ps(vLengthSq,vTemp);
		10085	// x+z,x+z,x+z,y+w
		10086	vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0));
		10087	// ??,??,y+w,y+w
		10088	vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0));
		10089	// ??,??,x+z+y+w,??
		10090	vLengthSq = _mm_add_ps(vLengthSq,vTemp);
		10091	// Splat the length
		10092	vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2));
		10093	// Get the reciprocal
		10094	vLengthSq = _mm_sqrt_ps(vLengthSq);
		10095	// Accurate!
		10096	vLengthSq = _mm_div_ps(g_XMOne,vLengthSq);
		10097	return vLengthSq;
		10098	#else // _XM_VMX128_INTRINSICS_
		10099	#endif // _XM_VMX128_INTRINSICS_
		10100	}
		10101
		10102	//------------------------------------------------------------------------------
		10103
		10104	XMFINLINE XMVECTOR XMVector4LengthEst
		10105	(
		10106	FXMVECTOR V
		10107	)
		10108	{
		10109	#if defined(_XM_NO_INTRINSICS_)
		10110
		10111	XMVECTOR Result;
		10112
		10113	Result = XMVector4LengthSq(V);
		10114	Result = XMVectorSqrtEst(Result);
		10115
		10116	return Result;
		10117
		10118	#elif defined(_XM_SSE_INTRINSICS_)
		10119	// Perform the dot product on x,y,z and w
		10120	XMVECTOR vLengthSq = _mm_mul_ps(V,V);
		10121	// vTemp has z and w
		10122	XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2));
		10123	// x+z, y+w
		10124	vLengthSq = _mm_add_ps(vLengthSq,vTemp);
		10125	// x+z,x+z,x+z,y+w
		10126	vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0));
		10127	// ??,??,y+w,y+w
		10128	vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0));
		10129	// ??,??,x+z+y+w,??
		10130	vLengthSq = _mm_add_ps(vLengthSq,vTemp);
		10131	// Splat the length
		10132	vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2));
		10133	// Prepare for the division
		10134	vLengthSq = _mm_sqrt_ps(vLengthSq);
		10135	return vLengthSq;
		10136	#else // _XM_VMX128_INTRINSICS_
		10137	#endif // _XM_VMX128_INTRINSICS_
		10138	}
		10139
		10140	//------------------------------------------------------------------------------
		10141
		10142	XMFINLINE XMVECTOR XMVector4Length
		10143	(
		10144	FXMVECTOR V
		10145	)
		10146	{
		10147	#if defined(_XM_NO_INTRINSICS_)
		10148
		10149	XMVECTOR Result;
		10150
		10151	Result = XMVector4LengthSq(V);
		10152	Result = XMVectorSqrt(Result);
		10153
		10154	return Result;
		10155
		10156	#elif defined(_XM_SSE_INTRINSICS_)
		10157	// Perform the dot product on x,y,z and w
		10158	XMVECTOR vLengthSq = _mm_mul_ps(V,V);
		10159	// vTemp has z and w
		10160	XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2));
		10161	// x+z, y+w
		10162	vLengthSq = _mm_add_ps(vLengthSq,vTemp);
		10163	// x+z,x+z,x+z,y+w
		10164	vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0));
		10165	// ??,??,y+w,y+w
		10166	vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0));
		10167	// ??,??,x+z+y+w,??
		10168	vLengthSq = _mm_add_ps(vLengthSq,vTemp);
		10169	// Splat the length
		10170	vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2));
		10171	// Prepare for the division
		10172	vLengthSq = _mm_sqrt_ps(vLengthSq);
		10173	return vLengthSq;
		10174	#else // _XM_VMX128_INTRINSICS_
		10175	#endif // _XM_VMX128_INTRINSICS_
		10176	}
		10177
		10178	//------------------------------------------------------------------------------
		10179	// XMVector4NormalizeEst uses a reciprocal estimate and
		10180	// returns QNaN on zero and infinite vectors.
		10181
		10182	XMFINLINE XMVECTOR XMVector4NormalizeEst
		10183	(
		10184	FXMVECTOR V
		10185	)
		10186	{
		10187	#if defined(_XM_NO_INTRINSICS_)
		10188
		10189	XMVECTOR Result;
		10190	Result = XMVector4ReciprocalLength(V);
		10191	Result = XMVectorMultiply(V, Result);
		10192	return Result;
		10193
		10194	#elif defined(_XM_SSE_INTRINSICS_)
		10195	// Perform the dot product on x,y,z and w
		10196	XMVECTOR vLengthSq = _mm_mul_ps(V,V);
		10197	// vTemp has z and w
		10198	XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2));
		10199	// x+z, y+w
		10200	vLengthSq = _mm_add_ps(vLengthSq,vTemp);
		10201	// x+z,x+z,x+z,y+w
		10202	vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0));
		10203	// ??,??,y+w,y+w
		10204	vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0));
		10205	// ??,??,x+z+y+w,??
		10206	vLengthSq = _mm_add_ps(vLengthSq,vTemp);
		10207	// Splat the length
		10208	vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2));
		10209	// Prepare for the division
		10210	XMVECTOR vResult = _mm_rsqrt_ps(vLengthSq);
		10211	// Failsafe on zero (Or epsilon) length planes
		10212	// If the length is infinity, set the elements to zero
		10213	vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
		10214	// Reciprocal mul to perform the normalization
		10215	vResult = _mm_mul_ps(vResult,V);
		10216	// Any that are infinity, set to zero
		10217	vResult = _mm_and_ps(vResult,vLengthSq);
		10218	return vResult;
		10219	#else // _XM_VMX128_INTRINSICS_
		10220	#endif // _XM_VMX128_INTRINSICS_
		10221	}
		10222
		10223	//------------------------------------------------------------------------------
		10224
		10225	XMFINLINE XMVECTOR XMVector4Normalize
		10226	(
		10227	FXMVECTOR V
		10228	)
		10229	{
		10230	#if defined(_XM_NO_INTRINSICS_)
		10231
		10232	XMVECTOR LengthSq;
		10233	XMVECTOR Zero;
		10234	XMVECTOR InfiniteLength;
		10235	XMVECTOR ZeroLength;
		10236	XMVECTOR Select;
		10237	XMVECTOR Result;
		10238
		10239	LengthSq = XMVector4LengthSq(V);
		10240	Zero = XMVectorZero();
		10241	Result = XMVectorReciprocalSqrt(LengthSq);
		10242	InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity.v);
		10243	ZeroLength = XMVectorEqual(LengthSq, Zero);
		10244	Result = XMVectorMultiply(V, Result);
		10245	Select = XMVectorEqualInt(InfiniteLength, ZeroLength);
		10246	Result = XMVectorSelect(LengthSq, Result, Select);
		10247
		10248	return Result;
		10249
		10250	#elif defined(_XM_SSE_INTRINSICS_)
		10251	// Perform the dot product on x,y,z and w
		10252	XMVECTOR vLengthSq = _mm_mul_ps(V,V);
		10253	// vTemp has z and w
		10254	XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2));
		10255	// x+z, y+w
		10256	vLengthSq = _mm_add_ps(vLengthSq,vTemp);
		10257	// x+z,x+z,x+z,y+w
		10258	vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0));
		10259	// ??,??,y+w,y+w
		10260	vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0));
		10261	// ??,??,x+z+y+w,??
		10262	vLengthSq = _mm_add_ps(vLengthSq,vTemp);
		10263	// Splat the length
		10264	vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2));
		10265	// Prepare for the division
		10266	XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
		10267	// Failsafe on zero (Or epsilon) length planes
		10268	// If the length is infinity, set the elements to zero
		10269	vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
		10270	// Divide to perform the normalization
		10271	vResult = _mm_div_ps(V,vResult);
		10272	// Any that are infinity, set to zero
		10273	vResult = _mm_and_ps(vResult,vLengthSq);
		10274	return vResult;
		10275	#else // _XM_VMX128_INTRINSICS_
		10276	#endif // _XM_VMX128_INTRINSICS_
		10277	}
		10278
		10279	//------------------------------------------------------------------------------
		10280
		10281	XMFINLINE XMVECTOR XMVector4ClampLength
		10282	(
		10283	FXMVECTOR V,
		10284	FLOAT LengthMin,
		10285	FLOAT LengthMax
		10286	)
		10287	{
		10288	#if defined(_XM_NO_INTRINSICS_)
		10289
		10290	XMVECTOR ClampMax;
		10291	XMVECTOR ClampMin;
		10292
		10293	ClampMax = XMVectorReplicate(LengthMax);
		10294	ClampMin = XMVectorReplicate(LengthMin);
		10295
		10296	return XMVector4ClampLengthV(V, ClampMin, ClampMax);
		10297
		10298	#elif defined(_XM_SSE_INTRINSICS_)
		10299	XMVECTOR ClampMax = _mm_set_ps1(LengthMax);
		10300	XMVECTOR ClampMin = _mm_set_ps1(LengthMin);
		10301	return XMVector4ClampLengthV(V, ClampMin, ClampMax);
		10302	#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
		10303	#endif // _XM_VMX128_INTRINSICS_
		10304	}
		10305
		10306	//------------------------------------------------------------------------------
		10307
		10308	XMFINLINE XMVECTOR XMVector4ClampLengthV
		10309	(
		10310	FXMVECTOR V,
		10311	FXMVECTOR LengthMin,
		10312	FXMVECTOR LengthMax
		10313	)
		10314	{
		10315	#if defined(_XM_NO_INTRINSICS_)
		10316
		10317	XMVECTOR ClampLength;
		10318	XMVECTOR LengthSq;
		10319	XMVECTOR RcpLength;
		10320	XMVECTOR Length;
		10321	XMVECTOR Normal;
		10322	XMVECTOR Zero;
		10323	XMVECTOR InfiniteLength;
		10324	XMVECTOR ZeroLength;
		10325	XMVECTOR Select;
		10326	XMVECTOR ControlMax;
		10327	XMVECTOR ControlMin;
		10328	XMVECTOR Control;
		10329	XMVECTOR Result;
		10330
		10331	XMASSERT((LengthMin.vector4_f32[1] == LengthMin.vector4_f32[0]) && (LengthMin.vector4_f32[2] == LengthMin.vector4_f32[0]) && (LengthMin.vector4_f32[3] == LengthMin.vector4_f32[0]));
		10332	XMASSERT((LengthMax.vector4_f32[1] == LengthMax.vector4_f32[0]) && (LengthMax.vector4_f32[2] == LengthMax.vector4_f32[0]) && (LengthMax.vector4_f32[3] == LengthMax.vector4_f32[0]));
		10333	XMASSERT(XMVector4GreaterOrEqual(LengthMin, XMVectorZero()));
		10334	XMASSERT(XMVector4GreaterOrEqual(LengthMax, XMVectorZero()));
		10335	XMASSERT(XMVector4GreaterOrEqual(LengthMax, LengthMin));
		10336
		10337	LengthSq = XMVector4LengthSq(V);
		10338
		10339	Zero = XMVectorZero();
		10340
		10341	RcpLength = XMVectorReciprocalSqrt(LengthSq);
		10342
		10343	InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity.v);
		10344	ZeroLength = XMVectorEqual(LengthSq, Zero);
		10345
		10346	Normal = XMVectorMultiply(V, RcpLength);
		10347
		10348	Length = XMVectorMultiply(LengthSq, RcpLength);
		10349
		10350	Select = XMVectorEqualInt(InfiniteLength, ZeroLength);
		10351	Length = XMVectorSelect(LengthSq, Length, Select);
		10352	Normal = XMVectorSelect(LengthSq, Normal, Select);
		10353
		10354	ControlMax = XMVectorGreater(Length, LengthMax);
		10355	ControlMin = XMVectorLess(Length, LengthMin);
		10356
		10357	ClampLength = XMVectorSelect(Length, LengthMax, ControlMax);
		10358	ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin);
		10359
		10360	Result = XMVectorMultiply(Normal, ClampLength);
		10361
		10362	// Preserve the original vector (with no precision loss) if the length falls within the given range
		10363	Control = XMVectorEqualInt(ControlMax, ControlMin);
		10364	Result = XMVectorSelect(Result, V, Control);
		10365
		10366	return Result;
		10367
		10368	#elif defined(_XM_SSE_INTRINSICS_)
		10369	XMVECTOR ClampLength;
		10370	XMVECTOR LengthSq;
		10371	XMVECTOR RcpLength;
		10372	XMVECTOR Length;
		10373	XMVECTOR Normal;
		10374	XMVECTOR Zero;
		10375	XMVECTOR InfiniteLength;
		10376	XMVECTOR ZeroLength;
		10377	XMVECTOR Select;
		10378	XMVECTOR ControlMax;
		10379	XMVECTOR ControlMin;
		10380	XMVECTOR Control;
		10381	XMVECTOR Result;
		10382
		10383	XMASSERT((XMVectorGetY(LengthMin) == XMVectorGetX(LengthMin)) && (XMVectorGetZ(LengthMin) == XMVectorGetX(LengthMin)) && (XMVectorGetW(LengthMin) == XMVectorGetX(LengthMin)));
		10384	XMASSERT((XMVectorGetY(LengthMax) == XMVectorGetX(LengthMax)) && (XMVectorGetZ(LengthMax) == XMVectorGetX(LengthMax)) && (XMVectorGetW(LengthMax) == XMVectorGetX(LengthMax)));
		10385	XMASSERT(XMVector4GreaterOrEqual(LengthMin, g_XMZero));
		10386	XMASSERT(XMVector4GreaterOrEqual(LengthMax, g_XMZero));
		10387	XMASSERT(XMVector4GreaterOrEqual(LengthMax, LengthMin));
		10388
		10389	LengthSq = XMVector4LengthSq(V);
		10390	Zero = XMVectorZero();
		10391	RcpLength = XMVectorReciprocalSqrt(LengthSq);
		10392	InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity);
		10393	ZeroLength = XMVectorEqual(LengthSq, Zero);
		10394	Normal = _mm_mul_ps(V, RcpLength);
		10395	Length = _mm_mul_ps(LengthSq, RcpLength);
		10396	Select = XMVectorEqualInt(InfiniteLength, ZeroLength);
		10397	Length = XMVectorSelect(LengthSq, Length, Select);
		10398	Normal = XMVectorSelect(LengthSq, Normal, Select);
		10399	ControlMax = XMVectorGreater(Length, LengthMax);
		10400	ControlMin = XMVectorLess(Length, LengthMin);
		10401	ClampLength = XMVectorSelect(Length, LengthMax, ControlMax);
		10402	ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin);
		10403	Result = _mm_mul_ps(Normal, ClampLength);
		10404	// Preserve the original vector (with no precision loss) if the length falls within the given range
		10405	Control = XMVectorEqualInt(ControlMax,ControlMin);
		10406	Result = XMVectorSelect(Result,V,Control);
		10407	return Result;
		10408
		10409	#else // _XM_VMX128_INTRINSICS_
		10410	#endif // _XM_VMX128_INTRINSICS_
		10411	}
		10412
		10413	//------------------------------------------------------------------------------
		10414
		10415	XMFINLINE XMVECTOR XMVector4Reflect
		10416	(
		10417	FXMVECTOR Incident,
		10418	FXMVECTOR Normal
		10419	)
		10420	{
		10421	#if defined(_XM_NO_INTRINSICS_)
		10422
		10423	XMVECTOR Result;
		10424
		10425	// Result = Incident - (2 * dot(Incident, Normal)) * Normal
		10426	Result = XMVector4Dot(Incident, Normal);
		10427	Result = XMVectorAdd(Result, Result);
		10428	Result = XMVectorNegativeMultiplySubtract(Result, Normal, Incident);
		10429
		10430	return Result;
		10431
		10432	#elif defined(_XM_SSE_INTRINSICS_)
		10433	// Result = Incident - (2 * dot(Incident, Normal)) * Normal
		10434	XMVECTOR Result = XMVector4Dot(Incident,Normal);
		10435	Result = _mm_add_ps(Result,Result);
		10436	Result = _mm_mul_ps(Result,Normal);
		10437	Result = _mm_sub_ps(Incident,Result);
		10438	return Result;
		10439	#else // _XM_VMX128_INTRINSICS_
		10440	#endif // _XM_VMX128_INTRINSICS_
		10441	}
		10442
		10443	//------------------------------------------------------------------------------
		10444
		10445	XMFINLINE XMVECTOR XMVector4Refract
		10446	(
		10447	FXMVECTOR Incident,
		10448	FXMVECTOR Normal,
		10449	FLOAT RefractionIndex
		10450	)
		10451	{
		10452	#if defined(_XM_NO_INTRINSICS_)
		10453
		10454	XMVECTOR Index;
		10455	Index = XMVectorReplicate(RefractionIndex);
		10456	return XMVector4RefractV(Incident, Normal, Index);
		10457
		10458	#elif defined(_XM_SSE_INTRINSICS_)
		10459	XMVECTOR Index = _mm_set_ps1(RefractionIndex);
		10460	return XMVector4RefractV(Incident,Normal,Index);
		10461	#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
		10462	#endif // _XM_VMX128_INTRINSICS_
		10463	}
		10464
		10465	//------------------------------------------------------------------------------
		10466
		10467	XMFINLINE XMVECTOR XMVector4RefractV
		10468	(
		10469	FXMVECTOR Incident,
		10470	FXMVECTOR Normal,
		10471	FXMVECTOR RefractionIndex
		10472	)
		10473	{
		10474	#if defined(_XM_NO_INTRINSICS_)
		10475
		10476	XMVECTOR IDotN;
		10477	XMVECTOR R;
		10478	CONST XMVECTOR Zero = XMVectorZero();
		10479
		10480	// Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) +
		10481	// sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal))))
		10482
		10483	IDotN = XMVector4Dot(Incident, Normal);
		10484
		10485	// R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN)
		10486	R = XMVectorNegativeMultiplySubtract(IDotN, IDotN, g_XMOne.v);
		10487	R = XMVectorMultiply(R, RefractionIndex);
		10488	R = XMVectorNegativeMultiplySubtract(R, RefractionIndex, g_XMOne.v);
		10489
		10490	if (XMVector4LessOrEqual(R, Zero))
		10491	{
		10492	// Total internal reflection
		10493	return Zero;
		10494	}
		10495	else
		10496	{
		10497	XMVECTOR Result;
		10498
		10499	// R = RefractionIndex * IDotN + sqrt(R)
		10500	R = XMVectorSqrt(R);
		10501	R = XMVectorMultiplyAdd(RefractionIndex, IDotN, R);
		10502
		10503	// Result = RefractionIndex * Incident - Normal * R
		10504	Result = XMVectorMultiply(RefractionIndex, Incident);
		10505	Result = XMVectorNegativeMultiplySubtract(Normal, R, Result);
		10506
		10507	return Result;
		10508	}
		10509
		10510	#elif defined(_XM_SSE_INTRINSICS_)
		10511	// Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) +
		10512	// sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal))))
		10513
		10514	XMVECTOR IDotN = XMVector4Dot(Incident,Normal);
		10515
		10516	// R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN)
		10517	XMVECTOR R = _mm_mul_ps(IDotN,IDotN);
		10518	R = _mm_sub_ps(g_XMOne,R);
		10519	R = _mm_mul_ps(R, RefractionIndex);
		10520	R = _mm_mul_ps(R, RefractionIndex);
		10521	R = _mm_sub_ps(g_XMOne,R);
		10522
		10523	XMVECTOR vResult = _mm_cmple_ps(R,g_XMZero);
		10524	if (_mm_movemask_ps(vResult)==0x0f)
		10525	{
		10526	// Total internal reflection
		10527	vResult = g_XMZero;
		10528	}
		10529	else
		10530	{
		10531	// R = RefractionIndex * IDotN + sqrt(R)
		10532	R = _mm_sqrt_ps(R);
		10533	vResult = _mm_mul_ps(RefractionIndex, IDotN);
		10534	R = _mm_add_ps(R,vResult);
		10535	// Result = RefractionIndex * Incident - Normal * R
		10536	vResult = _mm_mul_ps(RefractionIndex, Incident);
		10537	R = _mm_mul_ps(R,Normal);
		10538	vResult = _mm_sub_ps(vResult,R);
		10539	}
		10540	return vResult;
		10541	#else // _XM_VMX128_INTRINSICS_
		10542	#endif // _XM_VMX128_INTRINSICS_
		10543	}
		10544
		10545	//------------------------------------------------------------------------------
		10546
		10547	XMFINLINE XMVECTOR XMVector4Orthogonal
		10548	(
		10549	FXMVECTOR V
		10550	)
		10551	{
		10552	#if defined(_XM_NO_INTRINSICS_)
		10553
		10554	XMVECTOR Result;
		10555	Result.vector4_f32[0] = V.vector4_f32[2];
		10556	Result.vector4_f32[1] = V.vector4_f32[3];
		10557	Result.vector4_f32[2] = -V.vector4_f32[0];
		10558	Result.vector4_f32[3] = -V.vector4_f32[1];
		10559	return Result;
		10560
		10561	#elif defined(_XM_SSE_INTRINSICS_)
		10562	static const XMVECTORF32 FlipZW = {1.0f,1.0f,-1.0f,-1.0f};
		10563	XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,0,3,2));
		10564	vResult = _mm_mul_ps(vResult,FlipZW);
		10565	return vResult;
		10566	#else // _XM_VMX128_INTRINSICS_
		10567	#endif // _XM_VMX128_INTRINSICS_
		10568	}
		10569
		10570	//------------------------------------------------------------------------------
		10571
		10572	XMFINLINE XMVECTOR XMVector4AngleBetweenNormalsEst
		10573	(
		10574	FXMVECTOR N1,
		10575	FXMVECTOR N2
		10576	)
		10577	{
		10578	#if defined(_XM_NO_INTRINSICS_)
		10579
		10580	XMVECTOR NegativeOne;
		10581	XMVECTOR One;
		10582	XMVECTOR Result;
		10583
		10584	Result = XMVector4Dot(N1, N2);
		10585	NegativeOne = XMVectorSplatConstant(-1, 0);
		10586	One = XMVectorSplatOne();
		10587	Result = XMVectorClamp(Result, NegativeOne, One);
		10588	Result = XMVectorACosEst(Result);
		10589
		10590	return Result;
		10591
		10592	#elif defined(_XM_SSE_INTRINSICS_)
		10593	XMVECTOR vResult = XMVector4Dot(N1,N2);
		10594	// Clamp to -1.0f to 1.0f
		10595	vResult = _mm_max_ps(vResult,g_XMNegativeOne);
		10596	vResult = _mm_min_ps(vResult,g_XMOne);;
		10597	vResult = XMVectorACosEst(vResult);
		10598	return vResult;
		10599	#else // _XM_VMX128_INTRINSICS_
		10600	#endif // _XM_VMX128_INTRINSICS_
		10601	}
		10602
		10603	//------------------------------------------------------------------------------
		10604
		10605	XMFINLINE XMVECTOR XMVector4AngleBetweenNormals
		10606	(
		10607	FXMVECTOR N1,
		10608	FXMVECTOR N2
		10609	)
		10610	{
		10611	#if defined(_XM_NO_INTRINSICS_)
		10612
		10613	XMVECTOR NegativeOne;
		10614	XMVECTOR One;
		10615	XMVECTOR Result;
		10616
		10617	Result = XMVector4Dot(N1, N2);
		10618	NegativeOne = XMVectorSplatConstant(-1, 0);
		10619	One = XMVectorSplatOne();
		10620	Result = XMVectorClamp(Result, NegativeOne, One);
		10621	Result = XMVectorACos(Result);
		10622
		10623	return Result;
		10624
		10625	#elif defined(_XM_SSE_INTRINSICS_)
		10626	XMVECTOR vResult = XMVector4Dot(N1,N2);
		10627	// Clamp to -1.0f to 1.0f
		10628	vResult = _mm_max_ps(vResult,g_XMNegativeOne);
		10629	vResult = _mm_min_ps(vResult,g_XMOne);;
		10630	vResult = XMVectorACos(vResult);
		10631	return vResult;
		10632	#else // _XM_VMX128_INTRINSICS_
		10633	#endif // _XM_VMX128_INTRINSICS_
		10634	}
		10635
		10636	//------------------------------------------------------------------------------
		10637
		10638	XMFINLINE XMVECTOR XMVector4AngleBetweenVectors
		10639	(
		10640	FXMVECTOR V1,
		10641	FXMVECTOR V2
		10642	)
		10643	{
		10644	#if defined(_XM_NO_INTRINSICS_)
		10645
		10646	XMVECTOR L1;
		10647	XMVECTOR L2;
		10648	XMVECTOR Dot;
		10649	XMVECTOR CosAngle;
		10650	XMVECTOR NegativeOne;
		10651	XMVECTOR One;
		10652	XMVECTOR Result;
		10653
		10654	L1 = XMVector4ReciprocalLength(V1);
		10655	L2 = XMVector4ReciprocalLength(V2);
		10656
		10657	Dot = XMVector4Dot(V1, V2);
		10658
		10659	L1 = XMVectorMultiply(L1, L2);
		10660
		10661	CosAngle = XMVectorMultiply(Dot, L1);
		10662	NegativeOne = XMVectorSplatConstant(-1, 0);
		10663	One = XMVectorSplatOne();
		10664	CosAngle = XMVectorClamp(CosAngle, NegativeOne, One);
		10665
		10666	Result = XMVectorACos(CosAngle);
		10667
		10668	return Result;
		10669
		10670	#elif defined(_XM_SSE_INTRINSICS_)
		10671	XMVECTOR L1;
		10672	XMVECTOR L2;
		10673	XMVECTOR Dot;
		10674	XMVECTOR CosAngle;
		10675	XMVECTOR Result;
		10676
		10677	L1 = XMVector4ReciprocalLength(V1);
		10678	L2 = XMVector4ReciprocalLength(V2);
		10679	Dot = XMVector4Dot(V1, V2);
		10680	L1 = _mm_mul_ps(L1,L2);
		10681	CosAngle = _mm_mul_ps(Dot,L1);
		10682	CosAngle = XMVectorClamp(CosAngle, g_XMNegativeOne, g_XMOne);
		10683	Result = XMVectorACos(CosAngle);
		10684	return Result;
		10685
		10686	#else // _XM_VMX128_INTRINSICS_
		10687	#endif // _XM_VMX128_INTRINSICS_
		10688	}
		10689
		10690	//------------------------------------------------------------------------------
		10691
		10692	XMFINLINE XMVECTOR XMVector4Transform
		10693	(
		10694	FXMVECTOR V,
		10695	CXMMATRIX M
		10696	)
		10697	{
		10698	#if defined(_XM_NO_INTRINSICS_)
		10699	FLOAT fX = (M.m[0][0]V.vector4_f32[0])+(M.m[1][0]V.vector4_f32[1])+(M.m[2][0]V.vector4_f32[2])+(M.m[3][0]V.vector4_f32[3]);
		10700	FLOAT fY = (M.m[0][1]V.vector4_f32[0])+(M.m[1][1]V.vector4_f32[1])+(M.m[2][1]V.vector4_f32[2])+(M.m[3][1]V.vector4_f32[3]);
		10701	FLOAT fZ = (M.m[0][2]V.vector4_f32[0])+(M.m[1][2]V.vector4_f32[1])+(M.m[2][2]V.vector4_f32[2])+(M.m[3][2]V.vector4_f32[3]);
		10702	FLOAT fW = (M.m[0][3]V.vector4_f32[0])+(M.m[1][3]V.vector4_f32[1])+(M.m[2][3]V.vector4_f32[2])+(M.m[3][3]V.vector4_f32[3]);
		10703	XMVECTOR vResult = {
		10704	fX,
		10705	fY,
		10706	fZ,
		10707	fW
		10708	};
		10709	return vResult;
		10710
		10711	#elif defined(_XM_SSE_INTRINSICS_)
		10712	// Splat x,y,z and w
		10713	XMVECTOR vTempX = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0));
		10714	XMVECTOR vTempY = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
		10715	XMVECTOR vTempZ = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2));
		10716	XMVECTOR vTempW = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,3,3,3));
		10717	// Mul by the matrix
		10718	vTempX = _mm_mul_ps(vTempX,M.r[0]);
		10719	vTempY = _mm_mul_ps(vTempY,M.r[1]);
		10720	vTempZ = _mm_mul_ps(vTempZ,M.r[2]);
		10721	vTempW = _mm_mul_ps(vTempW,M.r[3]);
		10722	// Add them all together
		10723	vTempX = _mm_add_ps(vTempX,vTempY);
		10724	vTempZ = _mm_add_ps(vTempZ,vTempW);
		10725	vTempX = _mm_add_ps(vTempX,vTempZ);
		10726	return vTempX;
		10727	#else // _XM_VMX128_INTRINSICS_
		10728	#endif // _XM_VMX128_INTRINSICS_
		10729	}
		10730
		10731	//------------------------------------------------------------------------------
		10732
		10733	XMINLINE XMFLOAT4* XMVector4TransformStream
		10734	(
		10735	XMFLOAT4* pOutputStream,
		10736	UINT OutputStride,
		10737	CONST XMFLOAT4* pInputStream,
		10738	UINT InputStride,
		10739	UINT VectorCount,
		10740	CXMMATRIX M
		10741	)
		10742	{
		10743	#if defined(_XM_NO_INTRINSICS_)
		10744
		10745	XMVECTOR V;
		10746	XMVECTOR X;
		10747	XMVECTOR Y;
		10748	XMVECTOR Z;
		10749	XMVECTOR W;
		10750	XMVECTOR Result;
		10751	UINT i;
		10752	BYTE* pInputVector = (BYTE*)pInputStream;
		10753	BYTE* pOutputVector = (BYTE*)pOutputStream;
		10754
		10755	XMASSERT(pOutputStream);
		10756	XMASSERT(pInputStream);
		10757
		10758	for (i = 0; i < VectorCount; i++)
		10759	{
		10760	V = XMLoadFloat4((XMFLOAT4*)pInputVector);
		10761	W = XMVectorSplatW(V);
		10762	Z = XMVectorSplatZ(V);
		10763	Y = XMVectorSplatY(V);
		10764	X = XMVectorSplatX(V);
		10765	// W = XMVectorReplicate(((XMFLOAT4*)pInputVector)->w);
		10766	// Z = XMVectorReplicate(((XMFLOAT4*)pInputVector)->z);
		10767	// Y = XMVectorReplicate(((XMFLOAT4*)pInputVector)->y);
		10768	// X = XMVectorReplicate(((XMFLOAT4*)pInputVector)->x);
		10769
		10770	Result = XMVectorMultiply(W, M.r[3]);
		10771	Result = XMVectorMultiplyAdd(Z, M.r[2], Result);
		10772	Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
		10773	Result = XMVectorMultiplyAdd(X, M.r[0], Result);
		10774
		10775	XMStoreFloat4((XMFLOAT4*)pOutputVector, Result);
		10776
		10777	pInputVector += InputStride;
		10778	pOutputVector += OutputStride;
		10779	}
		10780
		10781	return pOutputStream;
		10782
		10783	#elif defined(_XM_SSE_INTRINSICS_)
		10784	UINT i;
		10785
		10786	XMASSERT(pOutputStream);
		10787	XMASSERT(pInputStream);
		10788
		10789	const BYTEpInputVector = reinterpret_cast<const BYTE >(pInputStream);
		10790	BYTE* pOutputVector = reinterpret_cast<BYTE *>(pOutputStream);
		10791	for (i = 0; i < VectorCount; i++)
		10792	{
		10793	// Fetch the row and splat it
		10794	XMVECTOR vTempx = _mm_loadu_ps(reinterpret_cast<const float *>(pInputVector));
		10795	XMVECTOR vTempy = _mm_shuffle_ps(vTempx,vTempx,_MM_SHUFFLE(1,1,1,1));
		10796	XMVECTOR vTempz = _mm_shuffle_ps(vTempx,vTempx,_MM_SHUFFLE(2,2,2,2));
		10797	XMVECTOR vTempw = _mm_shuffle_ps(vTempx,vTempx,_MM_SHUFFLE(3,3,3,3));
		10798	vTempx = _mm_shuffle_ps(vTempx,vTempx,_MM_SHUFFLE(0,0,0,0));
		10799	vTempx = _mm_mul_ps(vTempx,M.r[0]);
		10800	vTempy = _mm_mul_ps(vTempy,M.r[1]);
		10801	vTempz = _mm_mul_ps(vTempz,M.r[2]);
		10802	vTempw = _mm_mul_ps(vTempw,M.r[3]);
		10803	vTempx = _mm_add_ps(vTempx,vTempy);
		10804	vTempw = _mm_add_ps(vTempw,vTempz);
		10805	vTempw = _mm_add_ps(vTempw,vTempx);
		10806	// Store the transformed vector
		10807	_mm_storeu_ps(reinterpret_cast<float *>(pOutputVector),vTempw);
		10808
		10809	pInputVector += InputStride;
		10810	pOutputVector += OutputStride;
		10811	}
		10812	return pOutputStream;
		10813	#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
		10814	#endif // _XM_VMX128_INTRINSICS_
		10815	}
		10816
		10817	#ifdef __cplusplus
		10818
		10819	/****************************************************************************
		10820	*
		10821	* XMVECTOR operators
		10822	*
		10823	****************************************************************************/
		10824
		10825	#ifndef XM_NO_OPERATOR_OVERLOADS
		10826
		10827	//------------------------------------------------------------------------------
		10828
		10829	XMFINLINE XMVECTOR operator+ (FXMVECTOR V)
		10830	{
		10831	return V;
		10832	}
		10833
		10834	//------------------------------------------------------------------------------
		10835
		10836	XMFINLINE XMVECTOR operator- (FXMVECTOR V)
		10837	{
		10838	return XMVectorNegate(V);
		10839	}
		10840
		10841	//------------------------------------------------------------------------------
		10842
		10843	XMFINLINE XMVECTOR& operator+=
		10844	(
		10845	XMVECTOR& V1,
		10846	FXMVECTOR V2
		10847	)
		10848	{
		10849	V1 = XMVectorAdd(V1, V2);
		10850	return V1;
		10851	}
		10852
		10853	//------------------------------------------------------------------------------
		10854
		10855	XMFINLINE XMVECTOR& operator-=
		10856	(
		10857	XMVECTOR& V1,
		10858	FXMVECTOR V2
		10859	)
		10860	{
		10861	V1 = XMVectorSubtract(V1, V2);
		10862	return V1;
		10863	}
		10864
		10865	//------------------------------------------------------------------------------
		10866
		10867	XMFINLINE XMVECTOR& operator*=
		10868	(
		10869	XMVECTOR& V1,
		10870	FXMVECTOR V2
		10871	)
		10872	{
		10873	V1 = XMVectorMultiply(V1, V2);
		10874	return V1;
		10875	}
		10876
		10877	//------------------------------------------------------------------------------
		10878
		10879	XMFINLINE XMVECTOR& operator/=
		10880	(
		10881	XMVECTOR& V1,
		10882	FXMVECTOR V2
		10883	)
		10884	{
		10885	XMVECTOR InvV = XMVectorReciprocal(V2);
		10886	V1 = XMVectorMultiply(V1, InvV);
		10887	return V1;
		10888	}
		10889
		10890	//------------------------------------------------------------------------------
		10891
		10892	XMFINLINE XMVECTOR& operator*=
		10893	(
		10894	XMVECTOR& V,
		10895	CONST FLOAT S
		10896	)
		10897	{
		10898	V = XMVectorScale(V, S);
		10899	return V;
		10900	}
		10901
		10902	//------------------------------------------------------------------------------
		10903
		10904	XMFINLINE XMVECTOR& operator/=
		10905	(
		10906	XMVECTOR& V,
		10907	CONST FLOAT S
		10908	)
		10909	{
		10910	V = XMVectorScale(V, 1.0f / S);
		10911	return V;
		10912	}
		10913
		10914	//------------------------------------------------------------------------------
		10915
		10916	XMFINLINE XMVECTOR operator+
		10917	(
		10918	FXMVECTOR V1,
		10919	FXMVECTOR V2
		10920	)
		10921	{
		10922	return XMVectorAdd(V1, V2);
		10923	}
		10924
		10925	//------------------------------------------------------------------------------
		10926
		10927	XMFINLINE XMVECTOR operator-
		10928	(
		10929	FXMVECTOR V1,
		10930	FXMVECTOR V2
		10931	)
		10932	{
		10933	return XMVectorSubtract(V1, V2);
		10934	}
		10935
		10936	//------------------------------------------------------------------------------
		10937
		10938	XMFINLINE XMVECTOR operator*
		10939	(
		10940	FXMVECTOR V1,
		10941	FXMVECTOR V2
		10942	)
		10943	{
		10944	return XMVectorMultiply(V1, V2);
		10945	}
		10946
		10947	//------------------------------------------------------------------------------
		10948
		10949	XMFINLINE XMVECTOR operator/
		10950	(
		10951	FXMVECTOR V1,
		10952	FXMVECTOR V2
		10953	)
		10954	{
		10955	XMVECTOR InvV = XMVectorReciprocal(V2);
		10956	return XMVectorMultiply(V1, InvV);
		10957	}
		10958
		10959	//------------------------------------------------------------------------------
		10960
		10961	XMFINLINE XMVECTOR operator*
		10962	(
		10963	FXMVECTOR V,
		10964	CONST FLOAT S
		10965	)
		10966	{
		10967	return XMVectorScale(V, S);
		10968	}
		10969
		10970	//------------------------------------------------------------------------------
		10971
		10972	XMFINLINE XMVECTOR operator/
		10973	(
		10974	FXMVECTOR V,
		10975	CONST FLOAT S
		10976	)
		10977	{
		10978	return XMVectorScale(V, 1.0f / S);
		10979	}
		10980
		10981	//------------------------------------------------------------------------------
		10982
		10983	XMFINLINE XMVECTOR operator*
		10984	(
		10985	FLOAT S,
		10986	FXMVECTOR V
		10987	)
		10988	{
		10989	return XMVectorScale(V, S);
		10990	}
		10991
		10992	#endif // !XM_NO_OPERATOR_OVERLOADS
		10993
		10994	/****************************************************************************
		10995	*
		10996	* XMFLOAT2 operators
		10997	*
		10998	****************************************************************************/
		10999
		11000	//------------------------------------------------------------------------------
		11001
		11002	XMFINLINE _XMFLOAT2::_XMFLOAT2
		11003	(
		11004	CONST FLOAT* pArray
		11005	)
		11006	{
		11007	x = pArray[0];
		11008	y = pArray[1];
		11009	}
		11010
		11011	//------------------------------------------------------------------------------
		11012
		11013	XMFINLINE _XMFLOAT2& _XMFLOAT2::operator=
		11014	(
		11015	CONST _XMFLOAT2& Float2
		11016	)
		11017	{
		11018	x = Float2.x;
		11019	y = Float2.y;
		11020	return *this;
		11021	}
		11022
		11023	/****************************************************************************
		11024	*
		11025	* XMHALF2 operators
		11026	*
		11027	****************************************************************************/
		11028
		11029	//------------------------------------------------------------------------------
		11030
		11031	XMFINLINE _XMHALF2::_XMHALF2
		11032	(
		11033	CONST HALF* pArray
		11034	)
		11035	{
		11036	x = pArray[0];
		11037	y = pArray[1];
		11038	}
		11039
		11040	//------------------------------------------------------------------------------
		11041
		11042	XMFINLINE _XMHALF2::_XMHALF2
		11043	(
		11044	FLOAT _x,
		11045	FLOAT _y
		11046	)
		11047	{
		11048	x = XMConvertFloatToHalf(_x);
		11049	y = XMConvertFloatToHalf(_y);
		11050	}
		11051
		11052	//------------------------------------------------------------------------------
		11053
		11054	XMFINLINE _XMHALF2::_XMHALF2
		11055	(
		11056	CONST FLOAT* pArray
		11057	)
		11058	{
		11059	x = XMConvertFloatToHalf(pArray[0]);
		11060	y = XMConvertFloatToHalf(pArray[1]);
		11061	}
		11062
		11063	//------------------------------------------------------------------------------
		11064
		11065	XMFINLINE _XMHALF2& _XMHALF2::operator=
		11066	(
		11067	CONST _XMHALF2& Half2
		11068	)
		11069	{
		11070	x = Half2.x;
		11071	y = Half2.y;
		11072	return *this;
		11073	}
		11074
		11075	/****************************************************************************
		11076	*
		11077	* XMSHORTN2 operators
		11078	*
		11079	****************************************************************************/
		11080
		11081	//------------------------------------------------------------------------------
		11082
		11083	XMFINLINE _XMSHORTN2::_XMSHORTN2
		11084	(
		11085	CONST SHORT* pArray
		11086	)
		11087	{
		11088	x = pArray[0];
		11089	y = pArray[1];
		11090	}
		11091
		11092	//------------------------------------------------------------------------------
		11093
		11094	XMFINLINE _XMSHORTN2::_XMSHORTN2
		11095	(
		11096	FLOAT _x,
		11097	FLOAT _y
		11098	)
		11099	{
		11100	XMStoreShortN2(this, XMVectorSet(_x, _y, 0.0f, 0.0f));
		11101	}
		11102
		11103	//------------------------------------------------------------------------------
		11104
		11105	XMFINLINE _XMSHORTN2::_XMSHORTN2
		11106	(
		11107	CONST FLOAT* pArray
		11108	)
		11109	{
		11110	XMStoreShortN2(this, XMLoadFloat2((XMFLOAT2*)pArray));
		11111	}
		11112
		11113	//------------------------------------------------------------------------------
		11114
		11115	XMFINLINE _XMSHORTN2& _XMSHORTN2::operator=
		11116	(
		11117	CONST _XMSHORTN2& ShortN2
		11118	)
		11119	{
		11120	x = ShortN2.x;
		11121	y = ShortN2.y;
		11122	return *this;
		11123	}
		11124
		11125	/****************************************************************************
		11126	*
		11127	* XMSHORT2 operators
		11128	*
		11129	****************************************************************************/
		11130
		11131	//------------------------------------------------------------------------------
		11132
		11133	XMFINLINE _XMSHORT2::_XMSHORT2
		11134	(
		11135	CONST SHORT* pArray
		11136	)
		11137	{
		11138	x = pArray[0];
		11139	y = pArray[1];
		11140	}
		11141
		11142	//------------------------------------------------------------------------------
		11143
		11144	XMFINLINE _XMSHORT2::_XMSHORT2
		11145	(
		11146	FLOAT _x,
		11147	FLOAT _y
		11148	)
		11149	{
		11150	XMStoreShort2(this, XMVectorSet(_x, _y, 0.0f, 0.0f));
		11151	}
		11152
		11153	//------------------------------------------------------------------------------
		11154
		11155	XMFINLINE _XMSHORT2::_XMSHORT2
		11156	(
		11157	CONST FLOAT* pArray
		11158	)
		11159	{
		11160	XMStoreShort2(this, XMLoadFloat2((XMFLOAT2*)pArray));
		11161	}
		11162
		11163	//------------------------------------------------------------------------------
		11164
		11165	XMFINLINE _XMSHORT2& _XMSHORT2::operator=
		11166	(
		11167	CONST _XMSHORT2& Short2
		11168	)
		11169	{
		11170	x = Short2.x;
		11171	y = Short2.y;
		11172	return *this;
		11173	}
		11174
		11175	/****************************************************************************
		11176	*
		11177	* XMUSHORTN2 operators
		11178	*
		11179	****************************************************************************/
		11180
		11181	//------------------------------------------------------------------------------
		11182
		11183	XMFINLINE _XMUSHORTN2::_XMUSHORTN2
		11184	(
		11185	CONST USHORT* pArray
		11186	)
		11187	{
		11188	x = pArray[0];
		11189	y = pArray[1];
		11190	}
		11191
		11192	//------------------------------------------------------------------------------
		11193
		11194	XMFINLINE _XMUSHORTN2::_XMUSHORTN2
		11195	(
		11196	FLOAT _x,
		11197	FLOAT _y
		11198	)
		11199	{
		11200	XMStoreUShortN2(this, XMVectorSet(_x, _y, 0.0f, 0.0f));
		11201	}
		11202
		11203	//------------------------------------------------------------------------------
		11204
		11205	XMFINLINE _XMUSHORTN2::_XMUSHORTN2
		11206	(
		11207	CONST FLOAT* pArray
		11208	)
		11209	{
		11210	XMStoreUShortN2(this, XMLoadFloat2((XMFLOAT2*)pArray));
		11211	}
		11212
		11213	//------------------------------------------------------------------------------
		11214
		11215	XMFINLINE _XMUSHORTN2& _XMUSHORTN2::operator=
		11216	(
		11217	CONST _XMUSHORTN2& UShortN2
		11218	)
		11219	{
		11220	x = UShortN2.x;
		11221	y = UShortN2.y;
		11222	return *this;
		11223	}
		11224
		11225	/****************************************************************************
		11226	*
		11227	* XMUSHORT2 operators
		11228	*
		11229	****************************************************************************/
		11230
		11231	//------------------------------------------------------------------------------
		11232
		11233	XMFINLINE _XMUSHORT2::_XMUSHORT2
		11234	(
		11235	CONST USHORT* pArray
		11236	)
		11237	{
		11238	x = pArray[0];
		11239	y = pArray[1];
		11240	}
		11241
		11242	//------------------------------------------------------------------------------
		11243
		11244	XMFINLINE _XMUSHORT2::_XMUSHORT2
		11245	(
		11246	FLOAT _x,
		11247	FLOAT _y
		11248	)
		11249	{
		11250	XMStoreUShort2(this, XMVectorSet(_x, _y, 0.0f, 0.0f));
		11251	}
		11252
		11253	//------------------------------------------------------------------------------
		11254
		11255	XMFINLINE _XMUSHORT2::_XMUSHORT2
		11256	(
		11257	CONST FLOAT* pArray
		11258	)
		11259	{
		11260	XMStoreUShort2(this, XMLoadFloat2((XMFLOAT2*)pArray));
		11261	}
		11262
		11263	//------------------------------------------------------------------------------
		11264
		11265	XMFINLINE _XMUSHORT2& _XMUSHORT2::operator=
		11266	(
		11267	CONST _XMUSHORT2& UShort2
		11268	)
		11269	{
		11270	x = UShort2.x;
		11271	y = UShort2.y;
		11272	return *this;
		11273	}
		11274
		11275	/****************************************************************************
		11276	*
		11277	* XMFLOAT3 operators
		11278	*
		11279	****************************************************************************/
		11280
		11281	//------------------------------------------------------------------------------
		11282
		11283	XMFINLINE _XMFLOAT3::_XMFLOAT3
		11284	(
		11285	CONST FLOAT* pArray
		11286	)
		11287	{
		11288	x = pArray[0];
		11289	y = pArray[1];
		11290	z = pArray[2];
		11291	}
		11292
		11293	//------------------------------------------------------------------------------
		11294
		11295	XMFINLINE _XMFLOAT3& _XMFLOAT3::operator=
		11296	(
		11297	CONST _XMFLOAT3& Float3
		11298	)
		11299	{
		11300	x = Float3.x;
		11301	y = Float3.y;
		11302	z = Float3.z;
		11303	return *this;
		11304	}
		11305
		11306	/****************************************************************************
		11307	*
		11308	* XMHENDN3 operators
		11309	*
		11310	****************************************************************************/
		11311
		11312	//------------------------------------------------------------------------------
		11313
		11314	XMFINLINE _XMHENDN3::_XMHENDN3
		11315	(
		11316	FLOAT _x,
		11317	FLOAT _y,
		11318	FLOAT _z
		11319	)
		11320	{
		11321	XMStoreHenDN3(this, XMVectorSet(_x, _y, _z, 0.0f));
		11322	}
		11323
		11324	//------------------------------------------------------------------------------
		11325
		11326	XMFINLINE _XMHENDN3::_XMHENDN3
		11327	(
		11328	CONST FLOAT* pArray
		11329	)
		11330	{
		11331	XMStoreHenDN3(this, XMLoadFloat3((XMFLOAT3*)pArray));
		11332	}
		11333
		11334	//------------------------------------------------------------------------------
		11335
		11336	XMFINLINE _XMHENDN3& _XMHENDN3::operator=
		11337	(
		11338	CONST _XMHENDN3& HenDN3
		11339	)
		11340	{
		11341	v = HenDN3.v;
		11342	return *this;
		11343	}
		11344
		11345	//------------------------------------------------------------------------------
		11346
		11347	XMFINLINE _XMHENDN3& _XMHENDN3::operator=
		11348	(
		11349	CONST UINT Packed
		11350	)
		11351	{
		11352	v = Packed;
		11353	return *this;
		11354	}
		11355
		11356	/****************************************************************************
		11357	*
		11358	* XMHEND3 operators
		11359	*
		11360	****************************************************************************/
		11361
		11362	//------------------------------------------------------------------------------
		11363
		11364	XMFINLINE _XMHEND3::_XMHEND3
		11365	(
		11366	FLOAT _x,
		11367	FLOAT _y,
		11368	FLOAT _z
		11369	)
		11370	{
		11371	XMStoreHenD3(this, XMVectorSet(_x, _y, _z, 0.0f));
		11372	}
		11373
		11374	//------------------------------------------------------------------------------
		11375
		11376	XMFINLINE _XMHEND3::_XMHEND3
		11377	(
		11378	CONST FLOAT* pArray
		11379	)
		11380	{
		11381	XMStoreHenD3(this, XMLoadFloat3((XMFLOAT3*)pArray));
		11382	}
		11383
		11384	//------------------------------------------------------------------------------
		11385
		11386	XMFINLINE _XMHEND3& _XMHEND3::operator=
		11387	(
		11388	CONST _XMHEND3& HenD3
		11389	)
		11390	{
		11391	v = HenD3.v;
		11392	return *this;
		11393	}
		11394
		11395	//------------------------------------------------------------------------------
		11396
		11397	XMFINLINE _XMHEND3& _XMHEND3::operator=
		11398	(
		11399	CONST UINT Packed
		11400	)
		11401	{
		11402	v = Packed;
		11403	return *this;
		11404	}
		11405
		11406	/****************************************************************************
		11407	*
		11408	* XMUHENDN3 operators
		11409	*
		11410	****************************************************************************/
		11411
		11412	//------------------------------------------------------------------------------
		11413
		11414	XMFINLINE _XMUHENDN3::_XMUHENDN3
		11415	(
		11416	FLOAT _x,
		11417	FLOAT _y,
		11418	FLOAT _z
		11419	)
		11420	{
		11421	XMStoreUHenDN3(this, XMVectorSet(_x, _y, _z, 0.0f));
		11422	}
		11423
		11424	//------------------------------------------------------------------------------
		11425
		11426	XMFINLINE _XMUHENDN3::_XMUHENDN3
		11427	(
		11428	CONST FLOAT* pArray
		11429	)
		11430	{
		11431	XMStoreUHenDN3(this, XMLoadFloat3((XMFLOAT3*)pArray));
		11432	}
		11433
		11434	//------------------------------------------------------------------------------
		11435
		11436	XMFINLINE _XMUHENDN3& _XMUHENDN3::operator=
		11437	(
		11438	CONST _XMUHENDN3& UHenDN3
		11439	)
		11440	{
		11441	v = UHenDN3.v;
		11442	return *this;
		11443	}
		11444
		11445	//------------------------------------------------------------------------------
		11446
		11447	XMFINLINE _XMUHENDN3& _XMUHENDN3::operator=
		11448	(
		11449	CONST UINT Packed
		11450	)
		11451	{
		11452	v = Packed;
		11453	return *this;
		11454	}
		11455
		11456	/****************************************************************************
		11457	*
		11458	* XMUHEND3 operators
		11459	*
		11460	****************************************************************************/
		11461
		11462	//------------------------------------------------------------------------------
		11463
		11464	XMFINLINE _XMUHEND3::_XMUHEND3
		11465	(
		11466	FLOAT _x,
		11467	FLOAT _y,
		11468	FLOAT _z
		11469	)
		11470	{
		11471	XMStoreUHenD3(this, XMVectorSet(_x, _y, _z, 0.0f));
		11472	}
		11473
		11474	//------------------------------------------------------------------------------
		11475
		11476	XMFINLINE _XMUHEND3::_XMUHEND3
		11477	(
		11478	CONST FLOAT* pArray
		11479	)
		11480	{
		11481	XMStoreUHenD3(this, XMLoadFloat3((XMFLOAT3*)pArray));
		11482	}
		11483
		11484	//------------------------------------------------------------------------------
		11485
		11486	XMFINLINE _XMUHEND3& _XMUHEND3::operator=
		11487	(
		11488	CONST _XMUHEND3& UHenD3
		11489	)
		11490	{
		11491	v = UHenD3.v;
		11492	return *this;
		11493	}
		11494
		11495	//------------------------------------------------------------------------------
		11496
		11497	XMFINLINE _XMUHEND3& _XMUHEND3::operator=
		11498	(
		11499	CONST UINT Packed
		11500	)
		11501	{
		11502	v = Packed;
		11503	return *this;
		11504	}
		11505
		11506	/****************************************************************************
		11507	*
		11508	* XMDHENN3 operators
		11509	*
		11510	****************************************************************************/
		11511
		11512	//------------------------------------------------------------------------------
		11513
		11514	XMFINLINE _XMDHENN3::_XMDHENN3
		11515	(
		11516	FLOAT _x,
		11517	FLOAT _y,
		11518	FLOAT _z
		11519	)
		11520	{
		11521	XMStoreDHenN3(this, XMVectorSet(_x, _y, _z, 0.0f));
		11522	}
		11523
		11524	//------------------------------------------------------------------------------
		11525
		11526	XMFINLINE _XMDHENN3::_XMDHENN3
		11527	(
		11528	CONST FLOAT* pArray
		11529	)
		11530	{
		11531	XMStoreDHenN3(this, XMLoadFloat3((XMFLOAT3*)pArray));
		11532	}
		11533
		11534	//------------------------------------------------------------------------------
		11535
		11536	XMFINLINE _XMDHENN3& _XMDHENN3::operator=
		11537	(
		11538	CONST _XMDHENN3& DHenN3
		11539	)
		11540	{
		11541	v = DHenN3.v;
		11542	return *this;
		11543	}
		11544
		11545	//------------------------------------------------------------------------------
		11546
		11547	XMFINLINE _XMDHENN3& _XMDHENN3::operator=
		11548	(
		11549	CONST UINT Packed
		11550	)
		11551	{
		11552	v = Packed;
		11553	return *this;
		11554	}
		11555
		11556	/****************************************************************************
		11557	*
		11558	* XMDHEN3 operators
		11559	*
		11560	****************************************************************************/
		11561
		11562	//------------------------------------------------------------------------------
		11563
		11564	XMFINLINE _XMDHEN3::_XMDHEN3
		11565	(
		11566	FLOAT _x,
		11567	FLOAT _y,
		11568	FLOAT _z
		11569	)
		11570	{
		11571	XMStoreDHen3(this, XMVectorSet(_x, _y, _z, 0.0f));
		11572	}
		11573
		11574	//------------------------------------------------------------------------------
		11575
		11576	XMFINLINE _XMDHEN3::_XMDHEN3
		11577	(
		11578	CONST FLOAT* pArray
		11579	)
		11580	{
		11581	XMStoreDHen3(this, XMLoadFloat3((XMFLOAT3*)pArray));
		11582	}
		11583
		11584	//------------------------------------------------------------------------------
		11585
		11586	XMFINLINE _XMDHEN3& _XMDHEN3::operator=
		11587	(
		11588	CONST _XMDHEN3& DHen3
		11589	)
		11590	{
		11591	v = DHen3.v;
		11592	return *this;
		11593	}
		11594
		11595	//------------------------------------------------------------------------------
		11596
		11597	XMFINLINE _XMDHEN3& _XMDHEN3::operator=
		11598	(
		11599	CONST UINT Packed
		11600	)
		11601	{
		11602	v = Packed;
		11603	return *this;
		11604	}
		11605
		11606	/****************************************************************************
		11607	*
		11608	* XMUDHENN3 operators
		11609	*
		11610	****************************************************************************/
		11611
		11612	//------------------------------------------------------------------------------
		11613
		11614	XMFINLINE _XMUDHENN3::_XMUDHENN3
		11615	(
		11616	FLOAT _x,
		11617	FLOAT _y,
		11618	FLOAT _z
		11619	)
		11620	{
		11621	XMStoreUDHenN3(this, XMVectorSet(_x, _y, _z, 0.0f));
		11622	}
		11623
		11624	//------------------------------------------------------------------------------
		11625
		11626	XMFINLINE _XMUDHENN3::_XMUDHENN3
		11627	(
		11628	CONST FLOAT* pArray
		11629	)
		11630	{
		11631	XMStoreUDHenN3(this, XMLoadFloat3((XMFLOAT3*)pArray));
		11632	}
		11633
		11634	//------------------------------------------------------------------------------
		11635
		11636	XMFINLINE _XMUDHENN3& _XMUDHENN3::operator=
		11637	(
		11638	CONST _XMUDHENN3& UDHenN3
		11639	)
		11640	{
		11641	v = UDHenN3.v;
		11642	return *this;
		11643	}
		11644
		11645	//------------------------------------------------------------------------------
		11646
		11647	XMFINLINE _XMUDHENN3& _XMUDHENN3::operator=
		11648	(
		11649	CONST UINT Packed
		11650	)
		11651	{
		11652	v = Packed;
		11653	return *this;
		11654	}
		11655
		11656	/****************************************************************************
		11657	*
		11658	* XMUDHEN3 operators
		11659	*
		11660	****************************************************************************/
		11661
		11662	//------------------------------------------------------------------------------
		11663
		11664	XMFINLINE _XMUDHEN3::_XMUDHEN3
		11665	(
		11666	FLOAT _x,
		11667	FLOAT _y,
		11668	FLOAT _z
		11669	)
		11670	{
		11671	XMStoreUDHen3(this, XMVectorSet(_x, _y, _z, 0.0f));
		11672	}
		11673
		11674	//------------------------------------------------------------------------------
		11675
		11676	XMFINLINE _XMUDHEN3::_XMUDHEN3
		11677	(
		11678	CONST FLOAT* pArray
		11679	)
		11680	{
		11681	XMStoreUDHen3(this, XMLoadFloat3((XMFLOAT3*)pArray));
		11682	}
		11683
		11684	//------------------------------------------------------------------------------
		11685
		11686	XMFINLINE _XMUDHEN3& _XMUDHEN3::operator=
		11687	(
		11688	CONST _XMUDHEN3& UDHen3
		11689	)
		11690	{
		11691	v = UDHen3.v;
		11692	return *this;
		11693	}
		11694
		11695	//------------------------------------------------------------------------------
		11696
		11697	XMFINLINE _XMUDHEN3& _XMUDHEN3::operator=
		11698	(
		11699	CONST UINT Packed
		11700	)
		11701	{
		11702	v = Packed;
		11703	return *this;
		11704	}
		11705
		11706	/****************************************************************************
		11707	*
		11708	* XMU565 operators
		11709	*
		11710	****************************************************************************/
		11711
		11712	XMFINLINE _XMU565::_XMU565
		11713	(
		11714	CONST CHAR *pArray
		11715	)
		11716	{
		11717	x = pArray[0];
		11718	y = pArray[1];
		11719	z = pArray[2];
		11720	}
		11721
		11722	XMFINLINE _XMU565::_XMU565
		11723	(
		11724	FLOAT _x,
		11725	FLOAT _y,
		11726	FLOAT _z
		11727	)
		11728	{
		11729	XMStoreU565(this, XMVectorSet( _x, _y, _z, 0.0f ));
		11730	}
		11731
		11732	XMFINLINE _XMU565::_XMU565
		11733	(
		11734	CONST FLOAT *pArray
		11735	)
		11736	{
		11737	XMStoreU565(this, XMLoadFloat3((XMFLOAT3*)pArray ));
		11738	}
		11739
		11740	XMFINLINE _XMU565& _XMU565::operator=
		11741	(
		11742	CONST _XMU565& U565
		11743	)
		11744	{
		11745	v = U565.v;
		11746	return *this;
		11747	}
		11748
		11749	XMFINLINE _XMU565& _XMU565::operator=
		11750	(
		11751	CONST USHORT Packed
		11752	)
		11753	{
		11754	v = Packed;
		11755	return *this;
		11756	}
		11757
		11758	/****************************************************************************
		11759	*
		11760	* XMFLOAT3PK operators
		11761	*
		11762	****************************************************************************/
		11763
		11764	XMFINLINE _XMFLOAT3PK::_XMFLOAT3PK
		11765	(
		11766	FLOAT _x,
		11767	FLOAT _y,
		11768	FLOAT _z
		11769	)
		11770	{
		11771	XMStoreFloat3PK(this, XMVectorSet( _x, _y, _z, 0.0f ));
		11772	}
		11773
		11774	XMFINLINE _XMFLOAT3PK::_XMFLOAT3PK
		11775	(
		11776	CONST FLOAT *pArray
		11777	)
		11778	{
		11779	XMStoreFloat3PK(this, XMLoadFloat3((XMFLOAT3*)pArray ));
		11780	}
		11781
		11782	XMFINLINE _XMFLOAT3PK& _XMFLOAT3PK::operator=
		11783	(
		11784	CONST _XMFLOAT3PK& float3pk
		11785	)
		11786	{
		11787	v = float3pk.v;
		11788	return *this;
		11789	}
		11790
		11791	XMFINLINE _XMFLOAT3PK& _XMFLOAT3PK::operator=
		11792	(
		11793	CONST UINT Packed
		11794	)
		11795	{
		11796	v = Packed;
		11797	return *this;
		11798	}
		11799
		11800	/****************************************************************************
		11801	*
		11802	* XMFLOAT3SE operators
		11803	*
		11804	****************************************************************************/
		11805
		11806	XMFINLINE _XMFLOAT3SE::_XMFLOAT3SE
		11807	(
		11808	FLOAT _x,
		11809	FLOAT _y,
		11810	FLOAT _z
		11811	)
		11812	{
		11813	XMStoreFloat3SE(this, XMVectorSet( _x, _y, _z, 0.0f ));
		11814	}
		11815
		11816	XMFINLINE _XMFLOAT3SE::_XMFLOAT3SE
		11817	(
		11818	CONST FLOAT *pArray
		11819	)
		11820	{
		11821	XMStoreFloat3SE(this, XMLoadFloat3((XMFLOAT3*)pArray ));
		11822	}
		11823
		11824	XMFINLINE _XMFLOAT3SE& _XMFLOAT3SE::operator=
		11825	(
		11826	CONST _XMFLOAT3SE& float3se
		11827	)
		11828	{
		11829	v = float3se.v;
		11830	return *this;
		11831	}
		11832
		11833	XMFINLINE _XMFLOAT3SE& _XMFLOAT3SE::operator=
		11834	(
		11835	CONST UINT Packed
		11836	)
		11837	{
		11838	v = Packed;
		11839	return *this;
		11840	}
		11841
		11842	/****************************************************************************
		11843	*
		11844	* XMFLOAT4 operators
		11845	*
		11846	****************************************************************************/
		11847
		11848	//------------------------------------------------------------------------------
		11849
		11850	XMFINLINE _XMFLOAT4::_XMFLOAT4
		11851	(
		11852	CONST FLOAT* pArray
		11853	)
		11854	{
		11855	x = pArray[0];
		11856	y = pArray[1];
		11857	z = pArray[2];
		11858	w = pArray[3];
		11859	}
		11860
		11861	//------------------------------------------------------------------------------
		11862
		11863	XMFINLINE _XMFLOAT4& _XMFLOAT4::operator=
		11864	(
		11865	CONST _XMFLOAT4& Float4
		11866	)
		11867	{
		11868	x = Float4.x;
		11869	y = Float4.y;
		11870	z = Float4.z;
		11871	w = Float4.w;
		11872	return *this;
		11873	}
		11874
		11875	/****************************************************************************
		11876	*
		11877	* XMHALF4 operators
		11878	*
		11879	****************************************************************************/
		11880
		11881	//------------------------------------------------------------------------------
		11882
		11883	XMFINLINE _XMHALF4::_XMHALF4
		11884	(
		11885	CONST HALF* pArray
		11886	)
		11887	{
		11888	x = pArray[0];
		11889	y = pArray[1];
		11890	z = pArray[2];
		11891	w = pArray[3];
		11892	}
		11893
		11894	//------------------------------------------------------------------------------
		11895
		11896	XMFINLINE _XMHALF4::_XMHALF4
		11897	(
		11898	FLOAT _x,
		11899	FLOAT _y,
		11900	FLOAT _z,
		11901	FLOAT _w
		11902	)
		11903	{
		11904	x = XMConvertFloatToHalf(_x);
		11905	y = XMConvertFloatToHalf(_y);
		11906	z = XMConvertFloatToHalf(_z);
		11907	w = XMConvertFloatToHalf(_w);
		11908	}
		11909
		11910	//------------------------------------------------------------------------------
		11911
		11912	XMFINLINE _XMHALF4::_XMHALF4
		11913	(
		11914	CONST FLOAT* pArray
		11915	)
		11916	{
		11917	XMConvertFloatToHalfStream(&x, sizeof(HALF), pArray, sizeof(FLOAT), 4);
		11918	}
		11919
		11920	//------------------------------------------------------------------------------
		11921
		11922	XMFINLINE _XMHALF4& _XMHALF4::operator=
		11923	(
		11924	CONST _XMHALF4& Half4
		11925	)
		11926	{
		11927	x = Half4.x;
		11928	y = Half4.y;
		11929	z = Half4.z;
		11930	w = Half4.w;
		11931	return *this;
		11932	}
		11933
		11934	/****************************************************************************
		11935	*
		11936	* XMSHORTN4 operators
		11937	*
		11938	****************************************************************************/
		11939
		11940	//------------------------------------------------------------------------------
		11941
		11942	XMFINLINE _XMSHORTN4::_XMSHORTN4
		11943	(
		11944	CONST SHORT* pArray
		11945	)
		11946	{
		11947	x = pArray[0];
		11948	y = pArray[1];
		11949	z = pArray[2];
		11950	w = pArray[3];
		11951	}
		11952
		11953	//------------------------------------------------------------------------------
		11954
		11955	XMFINLINE _XMSHORTN4::_XMSHORTN4
		11956	(
		11957	FLOAT _x,
		11958	FLOAT _y,
		11959	FLOAT _z,
		11960	FLOAT _w
		11961	)
		11962	{
		11963	XMStoreShortN4(this, XMVectorSet(_x, _y, _z, _w));
		11964	}
		11965
		11966	//------------------------------------------------------------------------------
		11967
		11968	XMFINLINE _XMSHORTN4::_XMSHORTN4
		11969	(
		11970	CONST FLOAT* pArray
		11971	)
		11972	{
		11973	XMStoreShortN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
		11974	}
		11975
		11976	//------------------------------------------------------------------------------
		11977
		11978	XMFINLINE _XMSHORTN4& _XMSHORTN4::operator=
		11979	(
		11980	CONST _XMSHORTN4& ShortN4
		11981	)
		11982	{
		11983	x = ShortN4.x;
		11984	y = ShortN4.y;
		11985	z = ShortN4.z;
		11986	w = ShortN4.w;
		11987	return *this;
		11988	}
		11989
		11990	/****************************************************************************
		11991	*
		11992	* XMSHORT4 operators
		11993	*
		11994	****************************************************************************/
		11995
		11996	//------------------------------------------------------------------------------
		11997
		11998	XMFINLINE _XMSHORT4::_XMSHORT4
		11999	(
		12000	CONST SHORT* pArray
		12001	)
		12002	{
		12003	x = pArray[0];
		12004	y = pArray[1];
		12005	z = pArray[2];
		12006	w = pArray[3];
		12007	}
		12008
		12009	//------------------------------------------------------------------------------
		12010
		12011	XMFINLINE _XMSHORT4::_XMSHORT4
		12012	(
		12013	FLOAT _x,
		12014	FLOAT _y,
		12015	FLOAT _z,
		12016	FLOAT _w
		12017	)
		12018	{
		12019	XMStoreShort4(this, XMVectorSet(_x, _y, _z, _w));
		12020	}
		12021
		12022	//------------------------------------------------------------------------------
		12023
		12024	XMFINLINE _XMSHORT4::_XMSHORT4
		12025	(
		12026	CONST FLOAT* pArray
		12027	)
		12028	{
		12029	XMStoreShort4(this, XMLoadFloat4((XMFLOAT4*)pArray));
		12030	}
		12031
		12032	//------------------------------------------------------------------------------
		12033
		12034	XMFINLINE _XMSHORT4& _XMSHORT4::operator=
		12035	(
		12036	CONST _XMSHORT4& Short4
		12037	)
		12038	{
		12039	x = Short4.x;
		12040	y = Short4.y;
		12041	z = Short4.z;
		12042	w = Short4.w;
		12043	return *this;
		12044	}
		12045
		12046	/****************************************************************************
		12047	*
		12048	* XMUSHORTN4 operators
		12049	*
		12050	****************************************************************************/
		12051
		12052	//------------------------------------------------------------------------------
		12053
		12054	XMFINLINE _XMUSHORTN4::_XMUSHORTN4
		12055	(
		12056	CONST USHORT* pArray
		12057	)
		12058	{
		12059	x = pArray[0];
		12060	y = pArray[1];
		12061	z = pArray[2];
		12062	w = pArray[3];
		12063	}
		12064
		12065	//------------------------------------------------------------------------------
		12066
		12067	XMFINLINE _XMUSHORTN4::_XMUSHORTN4
		12068	(
		12069	FLOAT _x,
		12070	FLOAT _y,
		12071	FLOAT _z,
		12072	FLOAT _w
		12073	)
		12074	{
		12075	XMStoreUShortN4(this, XMVectorSet(_x, _y, _z, _w));
		12076	}
		12077
		12078	//------------------------------------------------------------------------------
		12079
		12080	XMFINLINE _XMUSHORTN4::_XMUSHORTN4
		12081	(
		12082	CONST FLOAT* pArray
		12083	)
		12084	{
		12085	XMStoreUShortN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
		12086	}
		12087
		12088	//------------------------------------------------------------------------------
		12089
		12090	XMFINLINE _XMUSHORTN4& _XMUSHORTN4::operator=
		12091	(
		12092	CONST _XMUSHORTN4& UShortN4
		12093	)
		12094	{
		12095	x = UShortN4.x;
		12096	y = UShortN4.y;
		12097	z = UShortN4.z;
		12098	w = UShortN4.w;
		12099	return *this;
		12100	}
		12101
		12102	/****************************************************************************
		12103	*
		12104	* XMUSHORT4 operators
		12105	*
		12106	****************************************************************************/
		12107
		12108	//------------------------------------------------------------------------------
		12109
		12110	XMFINLINE _XMUSHORT4::_XMUSHORT4
		12111	(
		12112	CONST USHORT* pArray
		12113	)
		12114	{
		12115	x = pArray[0];
		12116	y = pArray[1];
		12117	z = pArray[2];
		12118	w = pArray[3];
		12119	}
		12120
		12121	//------------------------------------------------------------------------------
		12122
		12123	XMFINLINE _XMUSHORT4::_XMUSHORT4
		12124	(
		12125	FLOAT _x,
		12126	FLOAT _y,
		12127	FLOAT _z,
		12128	FLOAT _w
		12129	)
		12130	{
		12131	XMStoreUShort4(this, XMVectorSet(_x, _y, _z, _w));
		12132	}
		12133
		12134	//------------------------------------------------------------------------------
		12135
		12136	XMFINLINE _XMUSHORT4::_XMUSHORT4
		12137	(
		12138	CONST FLOAT* pArray
		12139	)
		12140	{
		12141	XMStoreUShort4(this, XMLoadFloat4((XMFLOAT4*)pArray));
		12142	}
		12143
		12144	//------------------------------------------------------------------------------
		12145
		12146	XMFINLINE _XMUSHORT4& _XMUSHORT4::operator=
		12147	(
		12148	CONST _XMUSHORT4& UShort4
		12149	)
		12150	{
		12151	x = UShort4.x;
		12152	y = UShort4.y;
		12153	z = UShort4.z;
		12154	w = UShort4.w;
		12155	return *this;
		12156	}
		12157
		12158	/****************************************************************************
		12159	*
		12160	* XMXDECN4 operators
		12161	*
		12162	****************************************************************************/
		12163
		12164	//------------------------------------------------------------------------------
		12165
		12166	XMFINLINE _XMXDECN4::_XMXDECN4
		12167	(
		12168	FLOAT _x,
		12169	FLOAT _y,
		12170	FLOAT _z,
		12171	FLOAT _w
		12172	)
		12173	{
		12174	XMStoreXDecN4(this, XMVectorSet(_x, _y, _z, _w));
		12175	}
		12176
		12177	//------------------------------------------------------------------------------
		12178
		12179	XMFINLINE _XMXDECN4::_XMXDECN4
		12180	(
		12181	CONST FLOAT* pArray
		12182	)
		12183	{
		12184	XMStoreXDecN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
		12185	}
		12186
		12187	//------------------------------------------------------------------------------
		12188
		12189	XMFINLINE _XMXDECN4& _XMXDECN4::operator=
		12190	(
		12191	CONST _XMXDECN4& XDecN4
		12192	)
		12193	{
		12194	v = XDecN4.v;
		12195	return *this;
		12196	}
		12197
		12198	//------------------------------------------------------------------------------
		12199
		12200	XMFINLINE _XMXDECN4& _XMXDECN4::operator=
		12201	(
		12202	CONST UINT Packed
		12203	)
		12204	{
		12205	v = Packed;
		12206	return *this;
		12207	}
		12208
		12209	/****************************************************************************
		12210	*
		12211	* XMXDEC4 operators
		12212	*
		12213	****************************************************************************/
		12214
		12215	//------------------------------------------------------------------------------
		12216
		12217	XMFINLINE _XMXDEC4::_XMXDEC4
		12218	(
		12219	FLOAT _x,
		12220	FLOAT _y,
		12221	FLOAT _z,
		12222	FLOAT _w
		12223	)
		12224	{
		12225	XMStoreXDec4(this, XMVectorSet(_x, _y, _z, _w));
		12226	}
		12227
		12228	//------------------------------------------------------------------------------
		12229
		12230	XMFINLINE _XMXDEC4::_XMXDEC4
		12231	(
		12232	CONST FLOAT* pArray
		12233	)
		12234	{
		12235	XMStoreXDec4(this, XMLoadFloat4((XMFLOAT4*)pArray));
		12236	}
		12237
		12238	//------------------------------------------------------------------------------
		12239
		12240	XMFINLINE _XMXDEC4& _XMXDEC4::operator=
		12241	(
		12242	CONST _XMXDEC4& XDec4
		12243	)
		12244	{
		12245	v = XDec4.v;
		12246	return *this;
		12247	}
		12248
		12249	//------------------------------------------------------------------------------
		12250
		12251	XMFINLINE _XMXDEC4& _XMXDEC4::operator=
		12252	(
		12253	CONST UINT Packed
		12254	)
		12255	{
		12256	v = Packed;
		12257	return *this;
		12258	}
		12259
		12260	/****************************************************************************
		12261	*
		12262	* XMDECN4 operators
		12263	*
		12264	****************************************************************************/
		12265
		12266	//------------------------------------------------------------------------------
		12267
		12268	XMFINLINE _XMDECN4::_XMDECN4
		12269	(
		12270	FLOAT _x,
		12271	FLOAT _y,
		12272	FLOAT _z,
		12273	FLOAT _w
		12274	)
		12275	{
		12276	XMStoreDecN4(this, XMVectorSet(_x, _y, _z, _w));
		12277	}
		12278
		12279	//------------------------------------------------------------------------------
		12280
		12281	XMFINLINE _XMDECN4::_XMDECN4
		12282	(
		12283	CONST FLOAT* pArray
		12284	)
		12285	{
		12286	XMStoreDecN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
		12287	}
		12288
		12289	//------------------------------------------------------------------------------
		12290
		12291	XMFINLINE _XMDECN4& _XMDECN4::operator=
		12292	(
		12293	CONST _XMDECN4& DecN4
		12294	)
		12295	{
		12296	v = DecN4.v;
		12297	return *this;
		12298	}
		12299
		12300	//------------------------------------------------------------------------------
		12301
		12302	XMFINLINE _XMDECN4& _XMDECN4::operator=
		12303	(
		12304	CONST UINT Packed
		12305	)
		12306	{
		12307	v = Packed;
		12308	return *this;
		12309	}
		12310
		12311	/****************************************************************************
		12312	*
		12313	* XMDEC4 operators
		12314	*
		12315	****************************************************************************/
		12316
		12317	//------------------------------------------------------------------------------
		12318
		12319	XMFINLINE _XMDEC4::_XMDEC4
		12320	(
		12321	FLOAT _x,
		12322	FLOAT _y,
		12323	FLOAT _z,
		12324	FLOAT _w
		12325	)
		12326	{
		12327	XMStoreDec4(this, XMVectorSet(_x, _y, _z, _w));
		12328	}
		12329
		12330	//------------------------------------------------------------------------------
		12331
		12332	XMFINLINE _XMDEC4::_XMDEC4
		12333	(
		12334	CONST FLOAT* pArray
		12335	)
		12336	{
		12337	XMStoreDec4(this, XMLoadFloat4((XMFLOAT4*)pArray));
		12338	}
		12339
		12340	//------------------------------------------------------------------------------
		12341
		12342	XMFINLINE _XMDEC4& _XMDEC4::operator=
		12343	(
		12344	CONST _XMDEC4& Dec4
		12345	)
		12346	{
		12347	v = Dec4.v;
		12348	return *this;
		12349	}
		12350
		12351	//------------------------------------------------------------------------------
		12352
		12353	XMFINLINE _XMDEC4& _XMDEC4::operator=
		12354	(
		12355	CONST UINT Packed
		12356	)
		12357	{
		12358	v = Packed;
		12359	return *this;
		12360	}
		12361
		12362	/****************************************************************************
		12363	*
		12364	* XMUDECN4 operators
		12365	*
		12366	****************************************************************************/
		12367
		12368	//------------------------------------------------------------------------------
		12369
		12370	XMFINLINE _XMUDECN4::_XMUDECN4
		12371	(
		12372	FLOAT _x,
		12373	FLOAT _y,
		12374	FLOAT _z,
		12375	FLOAT _w
		12376	)
		12377	{
		12378	XMStoreUDecN4(this, XMVectorSet(_x, _y, _z, _w));
		12379	}
		12380
		12381	//------------------------------------------------------------------------------
		12382
		12383	XMFINLINE _XMUDECN4::_XMUDECN4
		12384	(
		12385	CONST FLOAT* pArray
		12386	)
		12387	{
		12388	XMStoreUDecN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
		12389	}
		12390
		12391	//------------------------------------------------------------------------------
		12392
		12393	XMFINLINE _XMUDECN4& _XMUDECN4::operator=
		12394	(
		12395	CONST _XMUDECN4& UDecN4
		12396	)
		12397	{
		12398	v = UDecN4.v;
		12399	return *this;
		12400	}
		12401
		12402	//------------------------------------------------------------------------------
		12403
		12404	XMFINLINE _XMUDECN4& _XMUDECN4::operator=
		12405	(
		12406	CONST UINT Packed
		12407	)
		12408	{
		12409	v = Packed;
		12410	return *this;
		12411	}
		12412
		12413	/****************************************************************************
		12414	*
		12415	* XMUDEC4 operators
		12416	*
		12417	****************************************************************************/
		12418
		12419	//------------------------------------------------------------------------------
		12420
		12421	XMFINLINE _XMUDEC4::_XMUDEC4
		12422	(
		12423	FLOAT _x,
		12424	FLOAT _y,
		12425	FLOAT _z,
		12426	FLOAT _w
		12427	)
		12428	{
		12429	XMStoreUDec4(this, XMVectorSet(_x, _y, _z, _w));
		12430	}
		12431
		12432	//------------------------------------------------------------------------------
		12433
		12434	XMFINLINE _XMUDEC4::_XMUDEC4
		12435	(
		12436	CONST FLOAT* pArray
		12437	)
		12438	{
		12439	XMStoreUDec4(this, XMLoadFloat4((XMFLOAT4*)pArray));
		12440	}
		12441
		12442	//------------------------------------------------------------------------------
		12443
		12444	XMFINLINE _XMUDEC4& _XMUDEC4::operator=
		12445	(
		12446	CONST _XMUDEC4& UDec4
		12447	)
		12448	{
		12449	v = UDec4.v;
		12450	return *this;
		12451	}
		12452
		12453	//------------------------------------------------------------------------------
		12454
		12455	XMFINLINE _XMUDEC4& _XMUDEC4::operator=
		12456	(
		12457	CONST UINT Packed
		12458	)
		12459	{
		12460	v = Packed;
		12461	return *this;
		12462	}
		12463
		12464	/****************************************************************************
		12465	*
		12466	* XMXICON4 operators
		12467	*
		12468	****************************************************************************/
		12469
		12470	//------------------------------------------------------------------------------
		12471
		12472	XMFINLINE _XMXICON4::_XMXICON4
		12473	(
		12474	FLOAT _x,
		12475	FLOAT _y,
		12476	FLOAT _z,
		12477	FLOAT _w
		12478	)
		12479	{
		12480	XMStoreXIcoN4(this, XMVectorSet(_x, _y, _z, _w));
		12481	}
		12482
		12483	//------------------------------------------------------------------------------
		12484
		12485	XMFINLINE _XMXICON4::_XMXICON4
		12486	(
		12487	CONST FLOAT* pArray
		12488	)
		12489	{
		12490	XMStoreXIcoN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
		12491	}
		12492
		12493	//------------------------------------------------------------------------------
		12494
		12495	XMFINLINE _XMXICON4& _XMXICON4::operator=
		12496	(
		12497	CONST _XMXICON4& XIcoN4
		12498	)
		12499	{
		12500	v = XIcoN4.v;
		12501	return *this;
		12502	}
		12503
		12504	//------------------------------------------------------------------------------
		12505
		12506	XMFINLINE _XMXICON4& _XMXICON4::operator=
		12507	(
		12508	CONST UINT64 Packed
		12509	)
		12510	{
		12511	v = Packed;
		12512	return *this;
		12513	}
		12514
		12515	/****************************************************************************
		12516	*
		12517	* XMXICO4 operators
		12518	*
		12519	****************************************************************************/
		12520
		12521	//------------------------------------------------------------------------------
		12522
		12523	XMFINLINE _XMXICO4::_XMXICO4
		12524	(
		12525	FLOAT _x,
		12526	FLOAT _y,
		12527	FLOAT _z,
		12528	FLOAT _w
		12529	)
		12530	{
		12531	XMStoreXIco4(this, XMVectorSet(_x, _y, _z, _w));
		12532	}
		12533
		12534	//------------------------------------------------------------------------------
		12535
		12536	XMFINLINE _XMXICO4::_XMXICO4
		12537	(
		12538	CONST FLOAT* pArray
		12539	)
		12540	{
		12541	XMStoreXIco4(this, XMLoadFloat4((XMFLOAT4*)pArray));
		12542	}
		12543
		12544	//------------------------------------------------------------------------------
		12545
		12546	XMFINLINE _XMXICO4& _XMXICO4::operator=
		12547	(
		12548	CONST _XMXICO4& XIco4
		12549	)
		12550	{
		12551	v = XIco4.v;
		12552	return *this;
		12553	}
		12554
		12555	//------------------------------------------------------------------------------
		12556
		12557	XMFINLINE _XMXICO4& _XMXICO4::operator=
		12558	(
		12559	CONST UINT64 Packed
		12560	)
		12561	{
		12562	v = Packed;
		12563	return *this;
		12564	}
		12565
		12566	/****************************************************************************
		12567	*
		12568	* XMICON4 operators
		12569	*
		12570	****************************************************************************/
		12571
		12572	//------------------------------------------------------------------------------
		12573
		12574	XMFINLINE _XMICON4::_XMICON4
		12575	(
		12576	FLOAT _x,
		12577	FLOAT _y,
		12578	FLOAT _z,
		12579	FLOAT _w
		12580	)
		12581	{
		12582	XMStoreIcoN4(this, XMVectorSet(_x, _y, _z, _w));
		12583	}
		12584
		12585	//------------------------------------------------------------------------------
		12586
		12587	XMFINLINE _XMICON4::_XMICON4
		12588	(
		12589	CONST FLOAT* pArray
		12590	)
		12591	{
		12592	XMStoreIcoN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
		12593	}
		12594
		12595	//------------------------------------------------------------------------------
		12596
		12597	XMFINLINE _XMICON4& _XMICON4::operator=
		12598	(
		12599	CONST _XMICON4& IcoN4
		12600	)
		12601	{
		12602	v = IcoN4.v;
		12603	return *this;
		12604	}
		12605
		12606	//------------------------------------------------------------------------------
		12607
		12608	XMFINLINE _XMICON4& _XMICON4::operator=
		12609	(
		12610	CONST UINT64 Packed
		12611	)
		12612	{
		12613	v = Packed;
		12614	return *this;
		12615	}
		12616
		12617	/****************************************************************************
		12618	*
		12619	* XMICO4 operators
		12620	*
		12621	****************************************************************************/
		12622
		12623	//------------------------------------------------------------------------------
		12624
		12625	XMFINLINE _XMICO4::_XMICO4
		12626	(
		12627	FLOAT _x,
		12628	FLOAT _y,
		12629	FLOAT _z,
		12630	FLOAT _w
		12631	)
		12632	{
		12633	XMStoreIco4(this, XMVectorSet(_x, _y, _z, _w));
		12634	}
		12635
		12636	//------------------------------------------------------------------------------
		12637
		12638	XMFINLINE _XMICO4::_XMICO4
		12639	(
		12640	CONST FLOAT* pArray
		12641	)
		12642	{
		12643	XMStoreIco4(this, XMLoadFloat4((XMFLOAT4*)pArray));
		12644	}
		12645
		12646	//------------------------------------------------------------------------------
		12647
		12648	XMFINLINE _XMICO4& _XMICO4::operator=
		12649	(
		12650	CONST _XMICO4& Ico4
		12651	)
		12652	{
		12653	v = Ico4.v;
		12654	return *this;
		12655	}
		12656
		12657	//------------------------------------------------------------------------------
		12658
		12659	XMFINLINE _XMICO4& _XMICO4::operator=
		12660	(
		12661	CONST UINT64 Packed
		12662	)
		12663	{
		12664	v = Packed;
		12665	return *this;
		12666	}
		12667
		12668	/****************************************************************************
		12669	*
		12670	* XMUICON4 operators
		12671	*
		12672	****************************************************************************/
		12673
		12674	//------------------------------------------------------------------------------
		12675
		12676	XMFINLINE _XMUICON4::_XMUICON4
		12677	(
		12678	FLOAT _x,
		12679	FLOAT _y,
		12680	FLOAT _z,
		12681	FLOAT _w
		12682	)
		12683	{
		12684	XMStoreUIcoN4(this, XMVectorSet(_x, _y, _z, _w));
		12685	}
		12686
		12687	//------------------------------------------------------------------------------
		12688
		12689	XMFINLINE _XMUICON4::_XMUICON4
		12690	(
		12691	CONST FLOAT* pArray
		12692	)
		12693	{
		12694	XMStoreUIcoN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
		12695	}
		12696
		12697	//------------------------------------------------------------------------------
		12698
		12699	XMFINLINE _XMUICON4& _XMUICON4::operator=
		12700	(
		12701	CONST _XMUICON4& UIcoN4
		12702	)
		12703	{
		12704	v = UIcoN4.v;
		12705	return *this;
		12706	}
		12707
		12708	//------------------------------------------------------------------------------
		12709
		12710	XMFINLINE _XMUICON4& _XMUICON4::operator=
		12711	(
		12712	CONST UINT64 Packed
		12713	)
		12714	{
		12715	v = Packed;
		12716	return *this;
		12717	}
		12718
		12719	/****************************************************************************
		12720	*
		12721	* XMUICO4 operators
		12722	*
		12723	****************************************************************************/
		12724
		12725	//------------------------------------------------------------------------------
		12726
		12727	XMFINLINE _XMUICO4::_XMUICO4
		12728	(
		12729	FLOAT _x,
		12730	FLOAT _y,
		12731	FLOAT _z,
		12732	FLOAT _w
		12733	)
		12734	{
		12735	XMStoreUIco4(this, XMVectorSet(_x, _y, _z, _w));
		12736	}
		12737
		12738	//------------------------------------------------------------------------------
		12739
		12740	XMFINLINE _XMUICO4::_XMUICO4
		12741	(
		12742	CONST FLOAT* pArray
		12743	)
		12744	{
		12745	XMStoreUIco4(this, XMLoadFloat4((XMFLOAT4*)pArray));
		12746	}
		12747
		12748	//------------------------------------------------------------------------------
		12749
		12750	XMFINLINE _XMUICO4& _XMUICO4::operator=
		12751	(
		12752	CONST _XMUICO4& UIco4
		12753	)
		12754	{
		12755	v = UIco4.v;
		12756	return *this;
		12757	}
		12758
		12759	//------------------------------------------------------------------------------
		12760
		12761	XMFINLINE _XMUICO4& _XMUICO4::operator=
		12762	(
		12763	CONST UINT64 Packed
		12764	)
		12765	{
		12766	v = Packed;
		12767	return *this;
		12768	}
		12769
		12770	/****************************************************************************
		12771	*
		12772	* XMCOLOR4 operators
		12773	*
		12774	****************************************************************************/
		12775
		12776	//------------------------------------------------------------------------------
		12777
		12778	XMFINLINE _XMCOLOR::_XMCOLOR
		12779	(
		12780	FLOAT _x,
		12781	FLOAT _y,
		12782	FLOAT _z,
		12783	FLOAT _w
		12784	)
		12785	{
		12786	XMStoreColor(this, XMVectorSet(_x, _y, _z, _w));
		12787	}
		12788
		12789	//------------------------------------------------------------------------------
		12790
		12791	XMFINLINE _XMCOLOR::_XMCOLOR
		12792	(
		12793	CONST FLOAT* pArray
		12794	)
		12795	{
		12796	XMStoreColor(this, XMLoadFloat4((XMFLOAT4*)pArray));
		12797	}
		12798
		12799	//------------------------------------------------------------------------------
		12800
		12801	XMFINLINE _XMCOLOR& _XMCOLOR::operator=
		12802	(
		12803	CONST _XMCOLOR& Color
		12804	)
		12805	{
		12806	c = Color.c;
		12807	return *this;
		12808	}
		12809
		12810	//------------------------------------------------------------------------------
		12811
		12812	XMFINLINE _XMCOLOR& _XMCOLOR::operator=
		12813	(
		12814	CONST UINT Color
		12815	)
		12816	{
		12817	c = Color;
		12818	return *this;
		12819	}
		12820
		12821	/****************************************************************************
		12822	*
		12823	* XMBYTEN4 operators
		12824	*
		12825	****************************************************************************/
		12826
		12827	//------------------------------------------------------------------------------
		12828
		12829	XMFINLINE _XMBYTEN4::_XMBYTEN4
		12830	(
		12831	CONST CHAR* pArray
		12832	)
		12833	{
		12834	x = pArray[0];
		12835	y = pArray[1];
		12836	z = pArray[2];
		12837	w = pArray[3];
		12838	}
		12839
		12840	//------------------------------------------------------------------------------
		12841
		12842	XMFINLINE _XMBYTEN4::_XMBYTEN4
		12843	(
		12844	FLOAT _x,
		12845	FLOAT _y,
		12846	FLOAT _z,
		12847	FLOAT _w
		12848	)
		12849	{
		12850	XMStoreByteN4(this, XMVectorSet(_x, _y, _z, _w));
		12851	}
		12852
		12853	//------------------------------------------------------------------------------
		12854
		12855	XMFINLINE _XMBYTEN4::_XMBYTEN4
		12856	(
		12857	CONST FLOAT* pArray
		12858	)
		12859	{
		12860	XMStoreByteN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
		12861	}
		12862
		12863	//------------------------------------------------------------------------------
		12864
		12865	XMFINLINE _XMBYTEN4& _XMBYTEN4::operator=
		12866	(
		12867	CONST _XMBYTEN4& ByteN4
		12868	)
		12869	{
		12870	x = ByteN4.x;
		12871	y = ByteN4.y;
		12872	z = ByteN4.z;
		12873	w = ByteN4.w;
		12874	return *this;
		12875	}
		12876
		12877	/****************************************************************************
		12878	*
		12879	* XMBYTE4 operators
		12880	*
		12881	****************************************************************************/
		12882
		12883	//------------------------------------------------------------------------------
		12884
		12885	XMFINLINE _XMBYTE4::_XMBYTE4
		12886	(
		12887	CONST CHAR* pArray
		12888	)
		12889	{
		12890	x = pArray[0];
		12891	y = pArray[1];
		12892	z = pArray[2];
		12893	w = pArray[3];
		12894	}
		12895
		12896	//------------------------------------------------------------------------------
		12897
		12898	XMFINLINE _XMBYTE4::_XMBYTE4
		12899	(
		12900	FLOAT _x,
		12901	FLOAT _y,
		12902	FLOAT _z,
		12903	FLOAT _w
		12904	)
		12905	{
		12906	XMStoreByte4(this, XMVectorSet(_x, _y, _z, _w));
		12907	}
		12908
		12909	//------------------------------------------------------------------------------
		12910
		12911	XMFINLINE _XMBYTE4::_XMBYTE4
		12912	(
		12913	CONST FLOAT* pArray
		12914	)
		12915	{
		12916	XMStoreByte4(this, XMLoadFloat4((XMFLOAT4*)pArray));
		12917	}
		12918
		12919	//------------------------------------------------------------------------------
		12920
		12921	XMFINLINE _XMBYTE4& _XMBYTE4::operator=
		12922	(
		12923	CONST _XMBYTE4& Byte4
		12924	)
		12925	{
		12926	x = Byte4.x;
		12927	y = Byte4.y;
		12928	z = Byte4.z;
		12929	w = Byte4.w;
		12930	return *this;
		12931	}
		12932
		12933	/****************************************************************************
		12934	*
		12935	* XMUBYTEN4 operators
		12936	*
		12937	****************************************************************************/
		12938
		12939	//------------------------------------------------------------------------------
		12940
		12941	XMFINLINE _XMUBYTEN4::_XMUBYTEN4
		12942	(
		12943	CONST BYTE* pArray
		12944	)
		12945	{
		12946	x = pArray[0];
		12947	y = pArray[1];
		12948	z = pArray[2];
		12949	w = pArray[3];
		12950	}
		12951
		12952	//------------------------------------------------------------------------------
		12953
		12954	XMFINLINE _XMUBYTEN4::_XMUBYTEN4
		12955	(
		12956	FLOAT _x,
		12957	FLOAT _y,
		12958	FLOAT _z,
		12959	FLOAT _w
		12960	)
		12961	{
		12962	XMStoreUByteN4(this, XMVectorSet(_x, _y, _z, _w));
		12963	}
		12964
		12965	//------------------------------------------------------------------------------
		12966
		12967	XMFINLINE _XMUBYTEN4::_XMUBYTEN4
		12968	(
		12969	CONST FLOAT* pArray
		12970	)
		12971	{
		12972	XMStoreUByteN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
		12973	}
		12974
		12975	//------------------------------------------------------------------------------
		12976
		12977	XMFINLINE _XMUBYTEN4& _XMUBYTEN4::operator=
		12978	(
		12979	CONST _XMUBYTEN4& UByteN4
		12980	)
		12981	{
		12982	x = UByteN4.x;
		12983	y = UByteN4.y;
		12984	z = UByteN4.z;
		12985	w = UByteN4.w;
		12986	return *this;
		12987	}
		12988
		12989	/****************************************************************************
		12990	*
		12991	* XMUBYTE4 operators
		12992	*
		12993	****************************************************************************/
		12994
		12995	//------------------------------------------------------------------------------
		12996
		12997	XMFINLINE _XMUBYTE4::_XMUBYTE4
		12998	(
		12999	CONST BYTE* pArray
		13000	)
		13001	{
		13002	x = pArray[0];
		13003	y = pArray[1];
		13004	z = pArray[2];
		13005	w = pArray[3];
		13006	}
		13007
		13008	//------------------------------------------------------------------------------
		13009
		13010	XMFINLINE _XMUBYTE4::_XMUBYTE4
		13011	(
		13012	FLOAT _x,
		13013	FLOAT _y,
		13014	FLOAT _z,
		13015	FLOAT _w
		13016	)
		13017	{
		13018	XMStoreUByte4(this, XMVectorSet(_x, _y, _z, _w));
		13019	}
		13020
		13021	//------------------------------------------------------------------------------
		13022
		13023	XMFINLINE _XMUBYTE4::_XMUBYTE4
		13024	(
		13025	CONST FLOAT* pArray
		13026	)
		13027	{
		13028	XMStoreUByte4(this, XMLoadFloat4((XMFLOAT4*)pArray));
		13029	}
		13030
		13031	//------------------------------------------------------------------------------
		13032
		13033	XMFINLINE _XMUBYTE4& _XMUBYTE4::operator=
		13034	(
		13035	CONST _XMUBYTE4& UByte4
		13036	)
		13037	{
		13038	x = UByte4.x;
		13039	y = UByte4.y;
		13040	z = UByte4.z;
		13041	w = UByte4.w;
		13042	return *this;
		13043	}
		13044
		13045	/****************************************************************************
		13046	*
		13047	* XMUNIBBLE4 operators
		13048	*
		13049	****************************************************************************/
		13050
		13051	//------------------------------------------------------------------------------
		13052
		13053	XMFINLINE _XMUNIBBLE4::_XMUNIBBLE4
		13054	(
		13055	CONST CHAR *pArray
		13056	)
		13057	{
		13058	x = pArray[0];
		13059	y = pArray[1];
		13060	z = pArray[2];
		13061	w = pArray[3];
		13062	}
		13063
		13064	//------------------------------------------------------------------------------
		13065
		13066	XMFINLINE _XMUNIBBLE4::_XMUNIBBLE4
		13067	(
		13068	FLOAT _x,
		13069	FLOAT _y,
		13070	FLOAT _z,
		13071	FLOAT _w
		13072	)
		13073	{
		13074	XMStoreUNibble4(this, XMVectorSet( _x, _y, _z, _w ));
		13075	}
		13076
		13077	//------------------------------------------------------------------------------
		13078
		13079	XMFINLINE _XMUNIBBLE4::_XMUNIBBLE4
		13080	(
		13081	CONST FLOAT *pArray
		13082	)
		13083	{
		13084	XMStoreUNibble4(this, XMLoadFloat4((XMFLOAT4*)pArray));
		13085	}
		13086
		13087	//------------------------------------------------------------------------------
		13088
		13089	XMFINLINE _XMUNIBBLE4& _XMUNIBBLE4::operator=
		13090	(
		13091	CONST _XMUNIBBLE4& UNibble4
		13092	)
		13093	{
		13094	v = UNibble4.v;
		13095	return *this;
		13096	}
		13097
		13098	//------------------------------------------------------------------------------
		13099
		13100	XMFINLINE _XMUNIBBLE4& _XMUNIBBLE4::operator=
		13101	(
		13102	CONST USHORT Packed
		13103	)
		13104	{
		13105	v = Packed;
		13106	return *this;
		13107	}
		13108
		13109	/****************************************************************************
		13110	*
		13111	* XMU555 operators
		13112	*
		13113	****************************************************************************/
		13114
		13115	//------------------------------------------------------------------------------
		13116
		13117	XMFINLINE _XMU555::_XMU555
		13118	(
		13119	CONST CHAR *pArray,
		13120	BOOL _w
		13121	)
		13122	{
		13123	x = pArray[0];
		13124	y = pArray[1];
		13125	z = pArray[2];
		13126	w = _w;
		13127	}
		13128
		13129	//------------------------------------------------------------------------------
		13130
		13131	XMFINLINE _XMU555::_XMU555
		13132	(
		13133	FLOAT _x,
		13134	FLOAT _y,
		13135	FLOAT _z,
		13136	BOOL _w
		13137	)
		13138	{
		13139	XMStoreU555(this, XMVectorSet(_x, _y, _z, ((_w) ? 1.0f : 0.0f) ));
		13140	}
		13141
		13142	//------------------------------------------------------------------------------
		13143
		13144	XMFINLINE _XMU555::_XMU555
		13145	(
		13146	CONST FLOAT *pArray,
		13147	BOOL _w
		13148	)
		13149	{
		13150	XMVECTOR V = XMLoadFloat3((XMFLOAT3*)pArray);
		13151	XMStoreU555(this, XMVectorSetW(V, ((_w) ? 1.0f : 0.0f) ));
		13152	}
		13153
		13154	//------------------------------------------------------------------------------
		13155
		13156	XMFINLINE _XMU555& _XMU555::operator=
		13157	(
		13158	CONST _XMU555& U555
		13159	)
		13160	{
		13161	v = U555.v;
		13162	return *this;
		13163	}
		13164
		13165	//------------------------------------------------------------------------------
		13166
		13167	XMFINLINE _XMU555& _XMU555::operator=
		13168	(
		13169	CONST USHORT Packed
		13170	)
		13171	{
		13172	v = Packed;
		13173	return *this;
		13174	}
		13175
		13176	#endif // __cplusplus
		13177
		13178	#if defined(_XM_NO_INTRINSICS_)
		13179	#undef XMISNAN
		13180	#undef XMISINF
		13181	#endif
		13182
		13183	#endif // __XNAMATHVECTOR_INL__
		13184

Subversion Repositories Games.Chess Giants

Games.Chess Giants/DirectX9/Include/xnamathvector.inl – Rev 1