Subversion Repositories Games.Chess Giants

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
1 pmbaty 1
/*++
2
 
3
Copyright (c) Microsoft Corporation. All rights reserved.
4
 
5
Module Name:
6
 
7
    xnamathvector.inl
8
 
9
Abstract:
10
 
11
	XNA math library for Windows and Xbox 360: Vector functions
12
--*/
13
 
14
#if defined(_MSC_VER) && (_MSC_VER > 1000)
15
#pragma once
16
#endif
17
 
18
#ifndef __XNAMATHVECTOR_INL__
19
#define __XNAMATHVECTOR_INL__
20
 
21
#if defined(_XM_NO_INTRINSICS_)
22
#define XMISNAN(x)  ((*(UINT*)&(x) & 0x7F800000) == 0x7F800000 && (*(UINT*)&(x) & 0x7FFFFF) != 0)
23
#define XMISINF(x)  ((*(UINT*)&(x) & 0x7FFFFFFF) == 0x7F800000)
24
#endif
25
 
26
/****************************************************************************
27
 *
28
 * General Vector
29
 *
30
 ****************************************************************************/
31
 
32
//------------------------------------------------------------------------------
33
// Assignment operations
34
//------------------------------------------------------------------------------
35
 
36
//------------------------------------------------------------------------------
37
// Return a vector with all elements equaling zero
38
XMFINLINE XMVECTOR XMVectorZero()
39
{
40
#if defined(_XM_NO_INTRINSICS_)
41
    XMVECTOR vResult = {0.0f,0.0f,0.0f,0.0f};
42
    return vResult;
43
#elif defined(_XM_SSE_INTRINSICS_)
44
    return _mm_setzero_ps();
45
#else // _XM_VMX128_INTRINSICS_
46
#endif // _XM_VMX128_INTRINSICS_
47
}
48
 
49
//------------------------------------------------------------------------------
50
// Initialize a vector with four floating point values
51
XMFINLINE XMVECTOR XMVectorSet
52
(
53
    FLOAT x, 
54
    FLOAT y, 
55
    FLOAT z, 
56
    FLOAT w
57
)
58
{
59
#if defined(_XM_NO_INTRINSICS_)
60
    XMVECTORF32 vResult = {x,y,z,w};
61
    return vResult.v;
62
#elif defined(_XM_SSE_INTRINSICS_)
63
    return _mm_set_ps( w, z, y, x );
64
#else // _XM_VMX128_INTRINSICS_
65
#endif // _XM_VMX128_INTRINSICS_
66
}
67
 
68
//------------------------------------------------------------------------------
69
// Initialize a vector with four integer values
70
XMFINLINE XMVECTOR XMVectorSetInt
71
(
72
    UINT x, 
73
    UINT y, 
74
    UINT z, 
75
    UINT w
76
)
77
{
78
#if defined(_XM_NO_INTRINSICS_)
79
    XMVECTORU32 vResult = {x,y,z,w};
80
    return vResult.v;
81
#elif defined(_XM_SSE_INTRINSICS_)
82
    __m128i V = _mm_set_epi32( w, z, y, x );
83
    return reinterpret_cast<__m128 *>(&V)[0];
84
#else // _XM_VMX128_INTRINSICS_
85
#endif // _XM_VMX128_INTRINSICS_
86
}
87
 
88
//------------------------------------------------------------------------------
89
// Initialize a vector with a replicated floating point value
90
XMFINLINE XMVECTOR XMVectorReplicate
91
(
92
    FLOAT Value
93
)
94
{
95
#if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
96
    XMVECTORF32 vResult = {Value,Value,Value,Value};
97
    return vResult.v;
98
#elif defined(_XM_SSE_INTRINSICS_)
99
    return _mm_set_ps1( Value );
100
#else // _XM_VMX128_INTRINSICS_
101
#endif // _XM_VMX128_INTRINSICS_
102
}
103
 
104
//------------------------------------------------------------------------------
105
// Initialize a vector with a replicated floating point value passed by pointer
106
XMFINLINE XMVECTOR XMVectorReplicatePtr
107
(
108
    CONST FLOAT *pValue
109
)
110
{
111
#if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
112
    FLOAT Value = pValue[0];
113
    XMVECTORF32 vResult = {Value,Value,Value,Value};
114
    return vResult.v;
115
#elif defined(_XM_SSE_INTRINSICS_)
116
    return _mm_load_ps1( pValue );
117
#else // _XM_VMX128_INTRINSICS_
118
#endif // _XM_VMX128_INTRINSICS_
119
}
120
 
121
//------------------------------------------------------------------------------
122
// Initialize a vector with a replicated integer value
123
XMFINLINE XMVECTOR XMVectorReplicateInt
124
(
125
    UINT Value
126
)
127
{
128
#if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
129
    XMVECTORU32 vResult = {Value,Value,Value,Value};
130
    return vResult.v;
131
#elif defined(_XM_SSE_INTRINSICS_)
132
    __m128i vTemp = _mm_set1_epi32( Value );
133
    return reinterpret_cast<const __m128 *>(&vTemp)[0];
134
#else // _XM_VMX128_INTRINSICS_
135
#endif // _XM_VMX128_INTRINSICS_
136
}
137
 
138
//------------------------------------------------------------------------------
139
// Initialize a vector with a replicated integer value passed by pointer
140
XMFINLINE XMVECTOR XMVectorReplicateIntPtr
141
(
142
    CONST UINT *pValue
143
)
144
{
145
#if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
146
    UINT Value = pValue[0];
147
    XMVECTORU32 vResult = {Value,Value,Value,Value};
148
    return vResult.v;
149
#elif defined(_XM_SSE_INTRINSICS_)
150
    return _mm_load_ps1(reinterpret_cast<const float *>(pValue));
151
#else // _XM_VMX128_INTRINSICS_
152
#endif // _XM_VMX128_INTRINSICS_
153
}
154
 
155
//------------------------------------------------------------------------------
156
// Initialize a vector with all bits set (true mask)
157
XMFINLINE XMVECTOR XMVectorTrueInt()
158
{
159
#if defined(_XM_NO_INTRINSICS_)
160
    XMVECTORU32 vResult = {0xFFFFFFFFU,0xFFFFFFFFU,0xFFFFFFFFU,0xFFFFFFFFU};
161
    return vResult.v;
162
#elif defined(_XM_SSE_INTRINSICS_)
163
    __m128i V = _mm_set1_epi32(-1);
164
    return reinterpret_cast<__m128 *>(&V)[0];
165
#else // _XM_VMX128_INTRINSICS_
166
#endif // _XM_VMX128_INTRINSICS_
167
}
168
 
169
//------------------------------------------------------------------------------
170
// Initialize a vector with all bits clear (false mask)
171
XMFINLINE XMVECTOR XMVectorFalseInt()
172
{
173
#if defined(_XM_NO_INTRINSICS_)
174
    XMVECTOR vResult = {0.0f,0.0f,0.0f,0.0f};
175
    return vResult;
176
#elif defined(_XM_SSE_INTRINSICS_)
177
    return _mm_setzero_ps();
178
#else // _XM_VMX128_INTRINSICS_
179
#endif // _XM_VMX128_INTRINSICS_
180
}
181
 
182
//------------------------------------------------------------------------------
183
// Replicate the x component of the vector
184
XMFINLINE XMVECTOR XMVectorSplatX
185
(
186
    FXMVECTOR V
187
)
188
{
189
#if defined(_XM_NO_INTRINSICS_)
190
    XMVECTOR vResult;
191
    vResult.vector4_f32[0] = 
192
    vResult.vector4_f32[1] = 
193
    vResult.vector4_f32[2] = 
194
    vResult.vector4_f32[3] = V.vector4_f32[0];
195
    return vResult;
196
#elif defined(_XM_SSE_INTRINSICS_)
197
    return _mm_shuffle_ps( V, V, _MM_SHUFFLE(0, 0, 0, 0) );
198
#else // _XM_VMX128_INTRINSICS_
199
#endif // _XM_VMX128_INTRINSICS_
200
}
201
 
202
//------------------------------------------------------------------------------
203
// Replicate the y component of the vector
204
XMFINLINE XMVECTOR XMVectorSplatY
205
(
206
    FXMVECTOR V
207
)
208
{
209
#if defined(_XM_NO_INTRINSICS_)
210
    XMVECTOR vResult;
211
    vResult.vector4_f32[0] = 
212
    vResult.vector4_f32[1] = 
213
    vResult.vector4_f32[2] = 
214
    vResult.vector4_f32[3] = V.vector4_f32[1];
215
    return vResult;
216
#elif defined(_XM_SSE_INTRINSICS_)
217
    return _mm_shuffle_ps( V, V, _MM_SHUFFLE(1, 1, 1, 1) );
218
#else // _XM_VMX128_INTRINSICS_
219
#endif // _XM_VMX128_INTRINSICS_
220
}
221
 
222
//------------------------------------------------------------------------------
223
// Replicate the z component of the vector
224
XMFINLINE XMVECTOR XMVectorSplatZ
225
(
226
    FXMVECTOR V
227
)
228
{
229
#if defined(_XM_NO_INTRINSICS_)
230
    XMVECTOR vResult;
231
    vResult.vector4_f32[0] = 
232
    vResult.vector4_f32[1] = 
233
    vResult.vector4_f32[2] = 
234
    vResult.vector4_f32[3] = V.vector4_f32[2];
235
    return vResult;
236
#elif defined(_XM_SSE_INTRINSICS_)
237
    return _mm_shuffle_ps( V, V, _MM_SHUFFLE(2, 2, 2, 2) );
238
#else // _XM_VMX128_INTRINSICS_
239
#endif // _XM_VMX128_INTRINSICS_
240
}
241
 
242
//------------------------------------------------------------------------------
243
// Replicate the w component of the vector
244
XMFINLINE XMVECTOR XMVectorSplatW
245
(
246
    FXMVECTOR V
247
)
248
{
249
#if defined(_XM_NO_INTRINSICS_)
250
    XMVECTOR vResult;
251
    vResult.vector4_f32[0] = 
252
    vResult.vector4_f32[1] = 
253
    vResult.vector4_f32[2] = 
254
    vResult.vector4_f32[3] = V.vector4_f32[3];
255
    return vResult;
256
#elif defined(_XM_SSE_INTRINSICS_)
257
    return _mm_shuffle_ps( V, V, _MM_SHUFFLE(3, 3, 3, 3) );
258
#else // _XM_VMX128_INTRINSICS_
259
#endif // _XM_VMX128_INTRINSICS_
260
}
261
 
262
//------------------------------------------------------------------------------
263
// Return a vector of 1.0f,1.0f,1.0f,1.0f
264
XMFINLINE XMVECTOR XMVectorSplatOne()
265
{
266
#if defined(_XM_NO_INTRINSICS_)
267
    XMVECTOR vResult;
268
    vResult.vector4_f32[0] = 
269
    vResult.vector4_f32[1] = 
270
    vResult.vector4_f32[2] = 
271
    vResult.vector4_f32[3] = 1.0f;
272
    return vResult;
273
#elif defined(_XM_SSE_INTRINSICS_)
274
    return g_XMOne;
275
#else //  _XM_VMX128_INTRINSICS_
276
#endif // _XM_VMX128_INTRINSICS_
277
}
278
 
279
//------------------------------------------------------------------------------
280
// Return a vector of INF,INF,INF,INF
281
XMFINLINE XMVECTOR XMVectorSplatInfinity()
282
{
283
#if defined(_XM_NO_INTRINSICS_)
284
    XMVECTOR vResult;
285
    vResult.vector4_u32[0] = 
286
    vResult.vector4_u32[1] = 
287
    vResult.vector4_u32[2] = 
288
    vResult.vector4_u32[3] = 0x7F800000;
289
    return vResult;
290
#elif defined(_XM_SSE_INTRINSICS_)
291
    return g_XMInfinity;
292
#else // _XM_VMX128_INTRINSICS_
293
#endif // _XM_VMX128_INTRINSICS_
294
}
295
 
296
//------------------------------------------------------------------------------
297
// Return a vector of Q_NAN,Q_NAN,Q_NAN,Q_NAN
298
XMFINLINE XMVECTOR XMVectorSplatQNaN()
299
{
300
#if defined(_XM_NO_INTRINSICS_)
301
    XMVECTOR vResult;
302
    vResult.vector4_u32[0] = 
303
    vResult.vector4_u32[1] = 
304
    vResult.vector4_u32[2] = 
305
    vResult.vector4_u32[3] = 0x7FC00000;
306
    return vResult;
307
#elif defined(_XM_SSE_INTRINSICS_)
308
    return g_XMQNaN;
309
#else // _XM_VMX128_INTRINSICS_
310
#endif // _XM_VMX128_INTRINSICS_
311
}
312
 
313
//------------------------------------------------------------------------------
314
// Return a vector of 1.192092896e-7f,1.192092896e-7f,1.192092896e-7f,1.192092896e-7f
315
XMFINLINE XMVECTOR XMVectorSplatEpsilon()
316
{
317
#if defined(_XM_NO_INTRINSICS_)
318
    XMVECTOR vResult;
319
    vResult.vector4_u32[0] = 
320
    vResult.vector4_u32[1] = 
321
    vResult.vector4_u32[2] = 
322
    vResult.vector4_u32[3] = 0x34000000;
323
    return vResult;
324
#elif defined(_XM_SSE_INTRINSICS_)
325
    return g_XMEpsilon;
326
#else // _XM_VMX128_INTRINSICS_
327
#endif // _XM_VMX128_INTRINSICS_
328
}
329
 
330
//------------------------------------------------------------------------------
331
// Return a vector of -0.0f (0x80000000),-0.0f,-0.0f,-0.0f
332
XMFINLINE XMVECTOR XMVectorSplatSignMask()
333
{
334
#if defined(_XM_NO_INTRINSICS_)
335
    XMVECTOR vResult;
336
    vResult.vector4_u32[0] = 
337
    vResult.vector4_u32[1] = 
338
    vResult.vector4_u32[2] = 
339
    vResult.vector4_u32[3] = 0x80000000U;
340
    return vResult;
341
#elif defined(_XM_SSE_INTRINSICS_)
342
    __m128i V = _mm_set1_epi32( 0x80000000 );
343
    return reinterpret_cast<__m128*>(&V)[0];
344
#else // _XM_VMX128_INTRINSICS_
345
#endif // _XM_VMX128_INTRINSICS_
346
}
347
 
348
//------------------------------------------------------------------------------
349
// Return a floating point value via an index. This is not a recommended
350
// function to use due to performance loss.
351
XMFINLINE FLOAT XMVectorGetByIndex(FXMVECTOR V,UINT i)
352
{
353
    XMASSERT( i <= 3 );
354
#if defined(_XM_NO_INTRINSICS_)
355
    return V.vector4_f32[i];
356
#elif defined(_XM_SSE_INTRINSICS_)
357
    return V.m128_f32[i];
358
#else // _XM_VMX128_INTRINSICS_
359
#endif // _XM_VMX128_INTRINSICS_
360
}
361
 
362
//------------------------------------------------------------------------------
363
// Return the X component in an FPU register. 
364
// This causes Load/Hit/Store on VMX targets
365
XMFINLINE FLOAT XMVectorGetX(FXMVECTOR V)
366
{
367
#if defined(_XM_NO_INTRINSICS_)
368
    return V.vector4_f32[0];
369
#elif defined(_XM_SSE_INTRINSICS_)
370
#if defined(_MSC_VER) && (_MSC_VER>=1500)
371
    return _mm_cvtss_f32(V);    
372
#else
373
    return V.m128_f32[0];
374
#endif
375
#else // _XM_VMX128_INTRINSICS_
376
#endif // _XM_VMX128_INTRINSICS_
377
}
378
 
379
// Return the Y component in an FPU register. 
380
// This causes Load/Hit/Store on VMX targets
381
XMFINLINE FLOAT XMVectorGetY(FXMVECTOR V)
382
{
383
#if defined(_XM_NO_INTRINSICS_)
384
    return V.vector4_f32[1];
385
#elif defined(_XM_SSE_INTRINSICS_)
386
#if defined(_MSC_VER) && (_MSC_VER>=1500)
387
    XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
388
    return _mm_cvtss_f32(vTemp);
389
#else
390
    return V.m128_f32[1];
391
#endif
392
#else // _XM_VMX128_INTRINSICS_
393
#endif // _XM_VMX128_INTRINSICS_
394
}
395
 
396
// Return the Z component in an FPU register. 
397
// This causes Load/Hit/Store on VMX targets
398
XMFINLINE FLOAT XMVectorGetZ(FXMVECTOR V)
399
{
400
#if defined(_XM_NO_INTRINSICS_)
401
    return V.vector4_f32[2];
402
#elif defined(_XM_SSE_INTRINSICS_)
403
#if defined(_MSC_VER) && (_MSC_VER>=1500)
404
    XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2));
405
    return _mm_cvtss_f32(vTemp);
406
#else
407
    return V.m128_f32[2];
408
#endif
409
#else // _XM_VMX128_INTRINSICS_
410
#endif // _XM_VMX128_INTRINSICS_
411
}
412
 
413
// Return the W component in an FPU register. 
414
// This causes Load/Hit/Store on VMX targets
415
XMFINLINE FLOAT XMVectorGetW(FXMVECTOR V)
416
{
417
#if defined(_XM_NO_INTRINSICS_)
418
    return V.vector4_f32[3];
419
#elif defined(_XM_SSE_INTRINSICS_)
420
#if defined(_MSC_VER) && (_MSC_VER>=1500)
421
    XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,3,3,3));
422
    return _mm_cvtss_f32(vTemp);
423
#else
424
    return V.m128_f32[3];
425
#endif
426
#else // _XM_VMX128_INTRINSICS_
427
#endif // _XM_VMX128_INTRINSICS_
428
}
429
 
430
//------------------------------------------------------------------------------
431
 
432
// Store a component indexed by i into a 32 bit float location in memory.
433
// This causes Load/Hit/Store on VMX targets
434
XMFINLINE VOID XMVectorGetByIndexPtr(FLOAT *f,FXMVECTOR V,UINT i)
435
{
436
    XMASSERT( f != 0 );
437
    XMASSERT( i <  4 );
438
#if defined(_XM_NO_INTRINSICS_)
439
    *f = V.vector4_f32[i];
440
#elif defined(_XM_SSE_INTRINSICS_)
441
    *f = V.m128_f32[i];
442
#else // _XM_VMX128_INTRINSICS_
443
#endif // _XM_VMX128_INTRINSICS_
444
}
445
 
446
//------------------------------------------------------------------------------
447
 
448
// Store the X component into a 32 bit float location in memory.
449
XMFINLINE VOID XMVectorGetXPtr(FLOAT *x,FXMVECTOR V)
450
{
451
    XMASSERT( x != 0 );
452
#if defined(_XM_NO_INTRINSICS_)
453
    *x = V.vector4_f32[0];
454
#elif defined(_XM_SSE_INTRINSICS_)
455
    _mm_store_ss(x,V);
456
#else // _XM_VMX128_INTRINSICS_
457
#endif // _XM_VMX128_INTRINSICS_
458
}
459
 
460
// Store the Y component into a 32 bit float location in memory.
461
XMFINLINE VOID XMVectorGetYPtr(FLOAT *y,FXMVECTOR V)
462
{
463
    XMASSERT( y != 0 );
464
#if defined(_XM_NO_INTRINSICS_)
465
    *y = V.vector4_f32[1];
466
#elif defined(_XM_SSE_INTRINSICS_)
467
    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
468
    _mm_store_ss(y,vResult);
469
#else // _XM_VMX128_INTRINSICS_
470
#endif // _XM_VMX128_INTRINSICS_
471
}
472
 
473
// Store the Z component into a 32 bit float location in memory.
474
XMFINLINE VOID XMVectorGetZPtr(FLOAT *z,FXMVECTOR V)
475
{
476
    XMASSERT( z != 0 );
477
#if defined(_XM_NO_INTRINSICS_)
478
    *z = V.vector4_f32[2];
479
#elif defined(_XM_SSE_INTRINSICS_)
480
    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2));
481
    _mm_store_ss(z,vResult);
482
#else // _XM_VMX128_INTRINSICS_
483
#endif // _XM_VMX128_INTRINSICS_
484
}
485
 
486
// Store the W component into a 32 bit float location in memory.
487
XMFINLINE VOID XMVectorGetWPtr(FLOAT *w,FXMVECTOR V)
488
{
489
    XMASSERT( w != 0 );
490
#if defined(_XM_NO_INTRINSICS_)
491
    *w = V.vector4_f32[3];
492
#elif defined(_XM_SSE_INTRINSICS_)
493
    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,3,3,3));
494
    _mm_store_ss(w,vResult);
495
#else // _XM_VMX128_INTRINSICS_
496
#endif // _XM_VMX128_INTRINSICS_
497
}
498
 
499
//------------------------------------------------------------------------------
500
 
501
// Return an integer value via an index. This is not a recommended
502
// function to use due to performance loss.
503
XMFINLINE UINT XMVectorGetIntByIndex(FXMVECTOR V, UINT i)
504
{
505
    XMASSERT( i < 4 );
506
#if defined(_XM_NO_INTRINSICS_)
507
    return V.vector4_u32[i];
508
#elif defined(_XM_SSE_INTRINSICS_)
509
#if defined(_MSC_VER) && (_MSC_VER<1400)
510
    XMVECTORU32 tmp;
511
    tmp.v = V;
512
    return tmp.u[i];
513
#else
514
    return V.m128_u32[i];
515
#endif
516
#else // _XM_VMX128_INTRINSICS_
517
#endif // _XM_VMX128_INTRINSICS_
518
}
519
 
520
//------------------------------------------------------------------------------
521
 
522
// Return the X component in an integer register. 
523
// This causes Load/Hit/Store on VMX targets
524
XMFINLINE UINT XMVectorGetIntX(FXMVECTOR V)
525
{
526
#if defined(_XM_NO_INTRINSICS_)
527
    return V.vector4_u32[0];
528
#elif defined(_XM_SSE_INTRINSICS_)
529
    return static_cast<UINT>(_mm_cvtsi128_si32(reinterpret_cast<const __m128i *>(&V)[0]));
530
#else // _XM_VMX128_INTRINSICS_
531
#endif // _XM_VMX128_INTRINSICS_
532
}
533
 
534
// Return the Y component in an integer register. 
535
// This causes Load/Hit/Store on VMX targets
536
XMFINLINE UINT XMVectorGetIntY(FXMVECTOR V)
537
{
538
#if defined(_XM_NO_INTRINSICS_)
539
    return V.vector4_u32[1];
540
#elif defined(_XM_SSE_INTRINSICS_)
541
    __m128i vResulti = _mm_shuffle_epi32(reinterpret_cast<const __m128i *>(&V)[0],_MM_SHUFFLE(1,1,1,1));
542
    return static_cast<UINT>(_mm_cvtsi128_si32(vResulti));
543
#else // _XM_VMX128_INTRINSICS_
544
#endif // _XM_VMX128_INTRINSICS_
545
}
546
 
547
// Return the Z component in an integer register. 
548
// This causes Load/Hit/Store on VMX targets
549
XMFINLINE UINT XMVectorGetIntZ(FXMVECTOR V)
550
{
551
#if defined(_XM_NO_INTRINSICS_)
552
    return V.vector4_u32[2];
553
#elif defined(_XM_SSE_INTRINSICS_)
554
    __m128i vResulti = _mm_shuffle_epi32(reinterpret_cast<const __m128i *>(&V)[0],_MM_SHUFFLE(2,2,2,2));
555
    return static_cast<UINT>(_mm_cvtsi128_si32(vResulti));
556
#else // _XM_VMX128_INTRINSICS_
557
#endif // _XM_VMX128_INTRINSICS_
558
}
559
 
560
// Return the W component in an integer register. 
561
// This causes Load/Hit/Store on VMX targets
562
XMFINLINE UINT XMVectorGetIntW(FXMVECTOR V)
563
{
564
#if defined(_XM_NO_INTRINSICS_)
565
    return V.vector4_u32[3];
566
#elif defined(_XM_SSE_INTRINSICS_)
567
    __m128i vResulti = _mm_shuffle_epi32(reinterpret_cast<const __m128i *>(&V)[0],_MM_SHUFFLE(3,3,3,3));
568
    return static_cast<UINT>(_mm_cvtsi128_si32(vResulti));
569
#else // _XM_VMX128_INTRINSICS_
570
#endif // _XM_VMX128_INTRINSICS_
571
}
572
 
573
//------------------------------------------------------------------------------
574
 
575
// Store a component indexed by i into a 32 bit integer location in memory.
576
// This causes Load/Hit/Store on VMX targets
577
XMFINLINE VOID XMVectorGetIntByIndexPtr(UINT *x,FXMVECTOR V,UINT i)
578
{
579
    XMASSERT( x != 0 );
580
    XMASSERT( i <  4 );
581
#if defined(_XM_NO_INTRINSICS_)
582
    *x = V.vector4_u32[i];
583
#elif defined(_XM_SSE_INTRINSICS_)
584
#if defined(_MSC_VER) && (_MSC_VER<1400)
585
    XMVECTORU32 tmp;
586
    tmp.v = V;
587
    *x = tmp.u[i];
588
#else
589
    *x = V.m128_u32[i];
590
#endif
591
#else // _XM_VMX128_INTRINSICS_
592
#endif // _XM_VMX128_INTRINSICS_
593
}
594
 
595
//------------------------------------------------------------------------------
596
 
597
// Store the X component into a 32 bit integer location in memory.
598
XMFINLINE VOID XMVectorGetIntXPtr(UINT *x,FXMVECTOR V)
599
{
600
    XMASSERT( x != 0 );
601
#if defined(_XM_NO_INTRINSICS_)
602
    *x = V.vector4_u32[0];
603
#elif defined(_XM_SSE_INTRINSICS_)
604
    _mm_store_ss(reinterpret_cast<float *>(x),V);
605
#else // _XM_VMX128_INTRINSICS_
606
#endif // _XM_VMX128_INTRINSICS_
607
}
608
 
609
// Store the Y component into a 32 bit integer location in memory.
610
XMFINLINE VOID XMVectorGetIntYPtr(UINT *y,FXMVECTOR V)
611
{
612
    XMASSERT( y != 0 );
613
#if defined(_XM_NO_INTRINSICS_)
614
    *y = V.vector4_u32[1];
615
#elif defined(_XM_SSE_INTRINSICS_)
616
    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
617
    _mm_store_ss(reinterpret_cast<float *>(y),vResult);
618
#else // _XM_VMX128_INTRINSICS_
619
#endif // _XM_VMX128_INTRINSICS_
620
}
621
 
622
// Store the Z component into a 32 bit integer locaCantion in memory.
623
XMFINLINE VOID XMVectorGetIntZPtr(UINT *z,FXMVECTOR V)
624
{
625
    XMASSERT( z != 0 );
626
#if defined(_XM_NO_INTRINSICS_)
627
    *z = V.vector4_u32[2];
628
#elif defined(_XM_SSE_INTRINSICS_)
629
    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2));
630
    _mm_store_ss(reinterpret_cast<float *>(z),vResult);
631
#else // _XM_VMX128_INTRINSICS_
632
#endif // _XM_VMX128_INTRINSICS_
633
}
634
 
635
// Store the W component into a 32 bit integer location in memory.
636
XMFINLINE VOID XMVectorGetIntWPtr(UINT *w,FXMVECTOR V)
637
{
638
    XMASSERT( w != 0 );
639
#if defined(_XM_NO_INTRINSICS_)
640
    *w = V.vector4_u32[3];
641
#elif defined(_XM_SSE_INTRINSICS_)
642
    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,3,3,3));
643
    _mm_store_ss(reinterpret_cast<float *>(w),vResult);
644
#else // _XM_VMX128_INTRINSICS_
645
#endif // _XM_VMX128_INTRINSICS_
646
}
647
 
648
//------------------------------------------------------------------------------
649
 
650
// Set a single indexed floating point component
651
// This causes Load/Hit/Store on VMX targets
652
XMFINLINE XMVECTOR XMVectorSetByIndex(FXMVECTOR V, FLOAT f,UINT i)
653
{
654
#if defined(_XM_NO_INTRINSICS_)
655
    XMVECTOR U;
656
    XMASSERT( i <= 3 );
657
    U = V;
658
    U.vector4_f32[i] = f;
659
    return U;
660
#elif defined(_XM_SSE_INTRINSICS_)
661
    XMASSERT( i <= 3 );
662
    XMVECTOR U = V;
663
    U.m128_f32[i] = f;
664
    return U;
665
#else // _XM_VMX128_INTRINSICS_
666
#endif // _XM_VMX128_INTRINSICS_
667
}
668
 
669
//------------------------------------------------------------------------------
670
 
671
// Sets the X component of a vector to a passed floating point value
672
// This causes Load/Hit/Store on VMX targets
673
XMFINLINE XMVECTOR XMVectorSetX(FXMVECTOR V, FLOAT x)
674
{
675
#if defined(_XM_NO_INTRINSICS_)
676
    XMVECTOR U;
677
    U.vector4_f32[0] = x;
678
    U.vector4_f32[1] = V.vector4_f32[1];
679
    U.vector4_f32[2] = V.vector4_f32[2];
680
    U.vector4_f32[3] = V.vector4_f32[3];
681
    return U;
682
#elif defined(_XM_SSE_INTRINSICS_)
683
#if defined(_XM_ISVS2005_)
684
    XMVECTOR vResult = V;
685
    vResult.m128_f32[0] = x;
686
    return vResult;
687
#else
688
    XMVECTOR vResult = _mm_set_ss(x);
689
    vResult = _mm_move_ss(V,vResult);
690
    return vResult;
691
#endif // _XM_ISVS2005_
692
#else // _XM_VMX128_INTRINSICS_
693
#endif // _XM_VMX128_INTRINSICS_
694
}
695
 
696
// Sets the Y component of a vector to a passed floating point value
697
// This causes Load/Hit/Store on VMX targets
698
XMFINLINE XMVECTOR XMVectorSetY(FXMVECTOR V, FLOAT y)
699
{
700
#if defined(_XM_NO_INTRINSICS_)
701
    XMVECTOR U;
702
    U.vector4_f32[0] = V.vector4_f32[0];
703
    U.vector4_f32[1] = y;
704
    U.vector4_f32[2] = V.vector4_f32[2];
705
    U.vector4_f32[3] = V.vector4_f32[3];
706
    return U;
707
#elif defined(_XM_SSE_INTRINSICS_)
708
#if defined(_XM_ISVS2005_)
709
    XMVECTOR vResult = V;
710
    vResult.m128_f32[1] = y;
711
    return vResult;
712
#else
713
    // Swap y and x
714
    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,2,0,1));
715
    // Convert input to vector
716
    XMVECTOR vTemp = _mm_set_ss(y);
717
    // Replace the x component
718
    vResult = _mm_move_ss(vResult,vTemp);
719
    // Swap y and x again
720
    vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,2,0,1));
721
    return vResult;
722
#endif // _XM_ISVS2005_
723
#else // _XM_VMX128_INTRINSICS_
724
#endif // _XM_VMX128_INTRINSICS_
725
}
726
// Sets the Z component of a vector to a passed floating point value
727
// This causes Load/Hit/Store on VMX targets
728
XMFINLINE XMVECTOR XMVectorSetZ(FXMVECTOR V, FLOAT z)
729
{
730
#if defined(_XM_NO_INTRINSICS_)
731
    XMVECTOR U;
732
    U.vector4_f32[0] = V.vector4_f32[0];
733
    U.vector4_f32[1] = V.vector4_f32[1];
734
    U.vector4_f32[2] = z;
735
    U.vector4_f32[3] = V.vector4_f32[3];
736
    return U;
737
#elif defined(_XM_SSE_INTRINSICS_)
738
#if defined(_XM_ISVS2005_)
739
    XMVECTOR vResult = V;
740
    vResult.m128_f32[2] = z;
741
    return vResult;
742
#else
743
    // Swap z and x
744
    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,0,1,2));
745
    // Convert input to vector
746
    XMVECTOR vTemp = _mm_set_ss(z);
747
    // Replace the x component
748
    vResult = _mm_move_ss(vResult,vTemp);
749
    // Swap z and x again
750
    vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,0,1,2));
751
    return vResult;
752
#endif // _XM_ISVS2005_
753
#else // _XM_VMX128_INTRINSICS_
754
#endif // _XM_VMX128_INTRINSICS_
755
}
756
 
757
// Sets the W component of a vector to a passed floating point value
758
// This causes Load/Hit/Store on VMX targets
759
XMFINLINE XMVECTOR XMVectorSetW(FXMVECTOR V, FLOAT w)
760
{
761
#if defined(_XM_NO_INTRINSICS_)
762
    XMVECTOR U;
763
    U.vector4_f32[0] = V.vector4_f32[0];
764
    U.vector4_f32[1] = V.vector4_f32[1];
765
    U.vector4_f32[2] = V.vector4_f32[2];
766
    U.vector4_f32[3] = w;
767
    return U;
768
#elif defined(_XM_SSE_INTRINSICS_)
769
#if defined(_XM_ISVS2005_)
770
    XMVECTOR vResult = V;
771
    vResult.m128_f32[3] = w;
772
    return vResult;
773
#else
774
    // Swap w and x
775
    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,2,1,3));
776
    // Convert input to vector
777
    XMVECTOR vTemp = _mm_set_ss(w);
778
    // Replace the x component
779
    vResult = _mm_move_ss(vResult,vTemp);
780
    // Swap w and x again
781
    vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,2,1,3));
782
    return vResult;
783
#endif // _XM_ISVS2005_
784
#else // _XM_VMX128_INTRINSICS_
785
#endif // _XM_VMX128_INTRINSICS_
786
}
787
 
788
//------------------------------------------------------------------------------
789
 
790
// Sets a component of a vector to a floating point value passed by pointer
791
// This causes Load/Hit/Store on VMX targets
792
XMFINLINE XMVECTOR XMVectorSetByIndexPtr(FXMVECTOR V,CONST FLOAT *f,UINT i)
793
{
794
#if defined(_XM_NO_INTRINSICS_)
795
    XMVECTOR U;
796
    XMASSERT( f != 0 );
797
    XMASSERT( i <= 3 );
798
    U = V;
799
    U.vector4_f32[i] = *f;
800
    return U;
801
#elif defined(_XM_SSE_INTRINSICS_)
802
    XMASSERT( f != 0 );
803
    XMASSERT( i <= 3 );
804
    XMVECTOR U = V;
805
    U.m128_f32[i] = *f;
806
    return U;
807
#else // _XM_VMX128_INTRINSICS_
808
#endif // _XM_VMX128_INTRINSICS_
809
}
810
 
811
//------------------------------------------------------------------------------
812
 
813
// Sets the X component of a vector to a floating point value passed by pointer
814
XMFINLINE XMVECTOR XMVectorSetXPtr(FXMVECTOR V,CONST FLOAT *x)
815
{
816
#if defined(_XM_NO_INTRINSICS_)
817
    XMVECTOR U;
818
    XMASSERT( x != 0 );
819
    U.vector4_f32[0] = *x;
820
    U.vector4_f32[1] = V.vector4_f32[1];
821
    U.vector4_f32[2] = V.vector4_f32[2];
822
    U.vector4_f32[3] = V.vector4_f32[3];
823
    return U;
824
#elif defined(_XM_SSE_INTRINSICS_)
825
    XMASSERT( x != 0 );
826
    XMVECTOR vResult = _mm_load_ss(x);
827
    vResult = _mm_move_ss(V,vResult);
828
    return vResult;
829
#else // _XM_VMX128_INTRINSICS_
830
#endif // _XM_VMX128_INTRINSICS_
831
}
832
 
833
// Sets the Y component of a vector to a floating point value passed by pointer
834
XMFINLINE XMVECTOR XMVectorSetYPtr(FXMVECTOR V,CONST FLOAT *y)
835
{
836
#if defined(_XM_NO_INTRINSICS_)
837
    XMVECTOR U;
838
    XMASSERT( y != 0 );
839
    U.vector4_f32[0] = V.vector4_f32[0];
840
    U.vector4_f32[1] = *y;
841
    U.vector4_f32[2] = V.vector4_f32[2];
842
    U.vector4_f32[3] = V.vector4_f32[3];
843
    return U;
844
#elif defined(_XM_SSE_INTRINSICS_)
845
    XMASSERT( y != 0 );
846
    // Swap y and x
847
    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,2,0,1));
848
    // Convert input to vector
849
    XMVECTOR vTemp = _mm_load_ss(y);
850
    // Replace the x component
851
    vResult = _mm_move_ss(vResult,vTemp);
852
    // Swap y and x again
853
    vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,2,0,1));
854
    return vResult;
855
#else // _XM_VMX128_INTRINSICS_
856
#endif // _XM_VMX128_INTRINSICS_
857
}
858
 
859
// Sets the Z component of a vector to a floating point value passed by pointer
860
XMFINLINE XMVECTOR XMVectorSetZPtr(FXMVECTOR V,CONST FLOAT *z)
861
{
862
#if defined(_XM_NO_INTRINSICS_)
863
    XMVECTOR U;
864
    XMASSERT( z != 0 );
865
    U.vector4_f32[0] = V.vector4_f32[0];
866
    U.vector4_f32[1] = V.vector4_f32[1];
867
    U.vector4_f32[2] = *z;
868
    U.vector4_f32[3] = V.vector4_f32[3];
869
    return U;
870
#elif defined(_XM_SSE_INTRINSICS_)
871
    XMASSERT( z != 0 );
872
    // Swap z and x
873
    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,0,1,2));
874
    // Convert input to vector
875
    XMVECTOR vTemp = _mm_load_ss(z);
876
    // Replace the x component
877
    vResult = _mm_move_ss(vResult,vTemp);
878
    // Swap z and x again
879
    vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,0,1,2));
880
    return vResult;
881
#else // _XM_VMX128_INTRINSICS_
882
#endif // _XM_VMX128_INTRINSICS_
883
}
884
 
885
// Sets the W component of a vector to a floating point value passed by pointer
886
XMFINLINE XMVECTOR XMVectorSetWPtr(FXMVECTOR V,CONST FLOAT *w)
887
{
888
#if defined(_XM_NO_INTRINSICS_)
889
    XMVECTOR U;
890
    XMASSERT( w != 0 );
891
    U.vector4_f32[0] = V.vector4_f32[0];
892
    U.vector4_f32[1] = V.vector4_f32[1];
893
    U.vector4_f32[2] = V.vector4_f32[2];
894
    U.vector4_f32[3] = *w;
895
    return U;
896
#elif defined(_XM_SSE_INTRINSICS_)
897
    XMASSERT( w != 0 );
898
    // Swap w and x
899
    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,2,1,3));
900
    // Convert input to vector
901
    XMVECTOR vTemp = _mm_load_ss(w);
902
    // Replace the x component
903
    vResult = _mm_move_ss(vResult,vTemp);
904
    // Swap w and x again
905
    vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,2,1,3));
906
    return vResult;
907
#else // _XM_VMX128_INTRINSICS_
908
#endif // _XM_VMX128_INTRINSICS_
909
}
910
 
911
//------------------------------------------------------------------------------
912
 
913
// Sets a component of a vector to an integer passed by value
914
// This causes Load/Hit/Store on VMX targets
915
XMFINLINE XMVECTOR XMVectorSetIntByIndex(FXMVECTOR V, UINT x, UINT i)
916
{
917
#if defined(_XM_NO_INTRINSICS_)
918
    XMVECTOR U;
919
    XMASSERT( i <= 3 );
920
    U = V;
921
    U.vector4_u32[i] = x;
922
    return U;
923
#elif defined(_XM_SSE_INTRINSICS_)
924
    XMASSERT( i <= 3 );
925
    XMVECTORU32 tmp;
926
    tmp.v = V;
927
    tmp.u[i] = x;
928
    return tmp;
929
#else // _XM_VMX128_INTRINSICS_
930
#endif // _XM_VMX128_INTRINSICS_
931
}
932
 
933
//------------------------------------------------------------------------------
934
 
935
// Sets the X component of a vector to an integer passed by value
936
// This causes Load/Hit/Store on VMX targets
937
XMFINLINE XMVECTOR XMVectorSetIntX(FXMVECTOR V, UINT x)
938
{
939
#if defined(_XM_NO_INTRINSICS_)
940
    XMVECTOR U;
941
    U.vector4_u32[0] = x;
942
    U.vector4_u32[1] = V.vector4_u32[1];
943
    U.vector4_u32[2] = V.vector4_u32[2];
944
    U.vector4_u32[3] = V.vector4_u32[3];
945
    return U;
946
#elif defined(_XM_SSE_INTRINSICS_)
947
#if defined(_XM_ISVS2005_)
948
    XMVECTOR vResult = V;
949
    vResult.m128_i32[0] = x;
950
    return vResult;
951
#else
952
    __m128i vTemp = _mm_cvtsi32_si128(x);
953
    XMVECTOR vResult = _mm_move_ss(V,reinterpret_cast<const __m128 *>(&vTemp)[0]);
954
    return vResult;
955
#endif // _XM_ISVS2005_
956
#else // _XM_VMX128_INTRINSICS_
957
#endif // _XM_VMX128_INTRINSICS_
958
}
959
 
960
// Sets the Y component of a vector to an integer passed by value
961
// This causes Load/Hit/Store on VMX targets
962
XMFINLINE XMVECTOR XMVectorSetIntY(FXMVECTOR V, UINT y)
963
{
964
#if defined(_XM_NO_INTRINSICS_)
965
    XMVECTOR U;
966
    U.vector4_u32[0] = V.vector4_u32[0];
967
    U.vector4_u32[1] = y;
968
    U.vector4_u32[2] = V.vector4_u32[2];
969
    U.vector4_u32[3] = V.vector4_u32[3];
970
    return U;
971
#elif defined(_XM_SSE_INTRINSICS_)
972
#if defined(_XM_ISVS2005_)
973
    XMVECTOR vResult = V;
974
    vResult.m128_i32[1] = y;
975
    return vResult;
976
#else    // Swap y and x
977
    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,2,0,1));
978
    // Convert input to vector
979
    __m128i vTemp = _mm_cvtsi32_si128(y);
980
    // Replace the x component
981
    vResult = _mm_move_ss(vResult,reinterpret_cast<const __m128 *>(&vTemp)[0]);
982
    // Swap y and x again
983
    vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,2,0,1));
984
    return vResult;
985
#endif // _XM_ISVS2005_
986
#else // _XM_VMX128_INTRINSICS_
987
#endif // _XM_VMX128_INTRINSICS_
988
}
989
 
990
// Sets the Z component of a vector to an integer passed by value
991
// This causes Load/Hit/Store on VMX targets
992
XMFINLINE XMVECTOR XMVectorSetIntZ(FXMVECTOR V, UINT z)
993
{
994
#if defined(_XM_NO_INTRINSICS_)
995
    XMVECTOR U;
996
    U.vector4_u32[0] = V.vector4_u32[0];
997
    U.vector4_u32[1] = V.vector4_u32[1];
998
    U.vector4_u32[2] = z;
999
    U.vector4_u32[3] = V.vector4_u32[3];
1000
    return U;
1001
#elif defined(_XM_SSE_INTRINSICS_)
1002
#if defined(_XM_ISVS2005_)
1003
    XMVECTOR vResult = V;
1004
    vResult.m128_i32[2] = z;
1005
    return vResult;
1006
#else
1007
    // Swap z and x
1008
    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,0,1,2));
1009
    // Convert input to vector
1010
    __m128i vTemp = _mm_cvtsi32_si128(z);
1011
    // Replace the x component
1012
    vResult = _mm_move_ss(vResult,reinterpret_cast<const __m128 *>(&vTemp)[0]);
1013
    // Swap z and x again
1014
    vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,0,1,2));
1015
    return vResult;
1016
#endif // _XM_ISVS2005_
1017
#else // _XM_VMX128_INTRINSICS_
1018
#endif // _XM_VMX128_INTRINSICS_
1019
}
1020
 
1021
// Sets the W component of a vector to an integer passed by value
1022
// This causes Load/Hit/Store on VMX targets
1023
XMFINLINE XMVECTOR XMVectorSetIntW(FXMVECTOR V, UINT w)
1024
{
1025
#if defined(_XM_NO_INTRINSICS_)
1026
    XMVECTOR U;
1027
    U.vector4_u32[0] = V.vector4_u32[0];
1028
    U.vector4_u32[1] = V.vector4_u32[1];
1029
    U.vector4_u32[2] = V.vector4_u32[2];
1030
    U.vector4_u32[3] = w;
1031
    return U;
1032
#elif defined(_XM_SSE_INTRINSICS_)
1033
#if defined(_XM_ISVS2005_)
1034
    XMVECTOR vResult = V;
1035
    vResult.m128_i32[3] = w;
1036
    return vResult;
1037
#else
1038
    // Swap w and x
1039
    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,2,1,3));
1040
    // Convert input to vector
1041
    __m128i vTemp = _mm_cvtsi32_si128(w);
1042
    // Replace the x component
1043
    vResult = _mm_move_ss(vResult,reinterpret_cast<const __m128 *>(&vTemp)[0]);
1044
    // Swap w and x again
1045
    vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,2,1,3));
1046
    return vResult;
1047
#endif // _XM_ISVS2005_
1048
#else // _XM_VMX128_INTRINSICS_
1049
#endif // _XM_VMX128_INTRINSICS_
1050
}
1051
 
1052
//------------------------------------------------------------------------------
1053
 
1054
// Sets a component of a vector to an integer value passed by pointer
1055
// This causes Load/Hit/Store on VMX targets
1056
XMFINLINE XMVECTOR XMVectorSetIntByIndexPtr(FXMVECTOR V, CONST UINT *x,UINT i)
1057
{
1058
#if defined(_XM_NO_INTRINSICS_)
1059
    XMVECTOR U;
1060
    XMASSERT( x != 0 );
1061
    XMASSERT( i <= 3 );
1062
    U = V;
1063
    U.vector4_u32[i] = *x;
1064
    return U;
1065
#elif defined(_XM_SSE_INTRINSICS_)
1066
    XMASSERT( x != 0 );
1067
    XMASSERT( i <= 3 );
1068
    XMVECTORU32 tmp;
1069
    tmp.v = V;
1070
    tmp.u[i] = *x;
1071
    return tmp;
1072
#else // _XM_VMX128_INTRINSICS_
1073
#endif // _XM_VMX128_INTRINSICS_
1074
}
1075
 
1076
//------------------------------------------------------------------------------
1077
 
1078
// Sets the X component of a vector to an integer value passed by pointer
1079
XMFINLINE XMVECTOR XMVectorSetIntXPtr(FXMVECTOR V,CONST UINT *x)
1080
{
1081
#if defined(_XM_NO_INTRINSICS_)
1082
    XMVECTOR U;
1083
    XMASSERT( x != 0 );
1084
    U.vector4_u32[0] = *x;
1085
    U.vector4_u32[1] = V.vector4_u32[1];
1086
    U.vector4_u32[2] = V.vector4_u32[2];
1087
    U.vector4_u32[3] = V.vector4_u32[3];
1088
    return U;
1089
#elif defined(_XM_SSE_INTRINSICS_)
1090
    XMASSERT( x != 0 );
1091
    XMVECTOR vTemp = _mm_load_ss(reinterpret_cast<const float *>(x));
1092
    XMVECTOR vResult = _mm_move_ss(V,vTemp);
1093
    return vResult;
1094
#else // _XM_VMX128_INTRINSICS_
1095
#endif // _XM_VMX128_INTRINSICS_
1096
}
1097
 
1098
// Sets the Y component of a vector to an integer value passed by pointer
1099
XMFINLINE XMVECTOR XMVectorSetIntYPtr(FXMVECTOR V,CONST UINT *y)
1100
{
1101
#if defined(_XM_NO_INTRINSICS_)
1102
    XMVECTOR U;
1103
    XMASSERT( y != 0 );
1104
    U.vector4_u32[0] = V.vector4_u32[0];
1105
    U.vector4_u32[1] = *y;
1106
    U.vector4_u32[2] = V.vector4_u32[2];
1107
    U.vector4_u32[3] = V.vector4_u32[3];
1108
    return U;
1109
#elif defined(_XM_SSE_INTRINSICS_)
1110
    XMASSERT( y != 0 );
1111
    // Swap y and x
1112
    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,2,0,1));
1113
    // Convert input to vector
1114
    XMVECTOR vTemp = _mm_load_ss(reinterpret_cast<const float *>(y));
1115
    // Replace the x component
1116
    vResult = _mm_move_ss(vResult,vTemp);
1117
    // Swap y and x again
1118
    vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,2,0,1));
1119
    return vResult;
1120
#else // _XM_VMX128_INTRINSICS_
1121
#endif // _XM_VMX128_INTRINSICS_
1122
}
1123
 
1124
// Sets the Z component of a vector to an integer value passed by pointer
1125
XMFINLINE XMVECTOR XMVectorSetIntZPtr(FXMVECTOR V,CONST UINT *z)
1126
{
1127
#if defined(_XM_NO_INTRINSICS_)
1128
    XMVECTOR U;
1129
    XMASSERT( z != 0 );
1130
    U.vector4_u32[0] = V.vector4_u32[0];
1131
    U.vector4_u32[1] = V.vector4_u32[1];
1132
    U.vector4_u32[2] = *z;
1133
    U.vector4_u32[3] = V.vector4_u32[3];
1134
    return U;
1135
#elif defined(_XM_SSE_INTRINSICS_)
1136
    XMASSERT( z != 0 );
1137
    // Swap z and x
1138
    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,0,1,2));
1139
    // Convert input to vector
1140
    XMVECTOR vTemp = _mm_load_ss(reinterpret_cast<const float *>(z));
1141
    // Replace the x component
1142
    vResult = _mm_move_ss(vResult,vTemp);
1143
    // Swap z and x again
1144
    vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,0,1,2));
1145
    return vResult;
1146
#else // _XM_VMX128_INTRINSICS_
1147
#endif // _XM_VMX128_INTRINSICS_
1148
}
1149
 
1150
// Sets the W component of a vector to an integer value passed by pointer
1151
XMFINLINE XMVECTOR XMVectorSetIntWPtr(FXMVECTOR V,CONST UINT *w)
1152
{
1153
#if defined(_XM_NO_INTRINSICS_)
1154
    XMVECTOR U;
1155
    XMASSERT( w != 0 );
1156
    U.vector4_u32[0] = V.vector4_u32[0];
1157
    U.vector4_u32[1] = V.vector4_u32[1];
1158
    U.vector4_u32[2] = V.vector4_u32[2];
1159
    U.vector4_u32[3] = *w;
1160
    return U;
1161
#elif defined(_XM_SSE_INTRINSICS_)
1162
    XMASSERT( w != 0 );
1163
    // Swap w and x
1164
    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,2,1,3));
1165
    // Convert input to vector
1166
    XMVECTOR vTemp = _mm_load_ss(reinterpret_cast<const float *>(w));
1167
    // Replace the x component
1168
    vResult = _mm_move_ss(vResult,vTemp);
1169
    // Swap w and x again
1170
    vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,2,1,3));
1171
    return vResult;
1172
#else // _XM_VMX128_INTRINSICS_
1173
#endif // _XM_VMX128_INTRINSICS_
1174
}
1175
 
1176
//------------------------------------------------------------------------------
1177
// Define a control vector to be used in XMVectorPermute
1178
// operations.  Visualize the two vectors V1 and V2 given
1179
// in a permute as arranged back to back in a linear fashion,
1180
// such that they form an array of 8 floating point values.
1181
// The four integers specified in XMVectorPermuteControl
1182
// will serve as indices into the array to select components
1183
// from the two vectors.  ElementIndex0 is used to select
1184
// an element from the vectors to be placed in the first
1185
// component of the resulting vector, ElementIndex1 is used
1186
// to select an element for the second component, etc.
1187
 
1188
XMFINLINE XMVECTOR XMVectorPermuteControl
1189
(
1190
    UINT     ElementIndex0, 
1191
    UINT     ElementIndex1, 
1192
    UINT     ElementIndex2, 
1193
    UINT     ElementIndex3
1194
)
1195
{
1196
#if defined(_XM_SSE_INTRINSICS_) || defined(_XM_NO_INTRINSICS_)
1197
    XMVECTORU32 vControl;
1198
    static CONST UINT ControlElement[] = {
1199
                    XM_PERMUTE_0X,
1200
                    XM_PERMUTE_0Y,
1201
                    XM_PERMUTE_0Z,
1202
                    XM_PERMUTE_0W,
1203
                    XM_PERMUTE_1X,
1204
                    XM_PERMUTE_1Y,
1205
                    XM_PERMUTE_1Z,
1206
                    XM_PERMUTE_1W
1207
                };
1208
    XMASSERT(ElementIndex0 < 8);
1209
    XMASSERT(ElementIndex1 < 8);
1210
    XMASSERT(ElementIndex2 < 8);
1211
    XMASSERT(ElementIndex3 < 8);
1212
 
1213
    vControl.u[0] = ControlElement[ElementIndex0];
1214
    vControl.u[1] = ControlElement[ElementIndex1];
1215
    vControl.u[2] = ControlElement[ElementIndex2];
1216
    vControl.u[3] = ControlElement[ElementIndex3];
1217
    return vControl.v;
1218
#else
1219
#endif
1220
}
1221
 
1222
//------------------------------------------------------------------------------
1223
 
1224
// Using a control vector made up of 16 bytes from 0-31, remap V1 and V2's byte
1225
// entries into a single 16 byte vector and return it. Index 0-15 = V1,
1226
// 16-31 = V2
1227
XMFINLINE XMVECTOR XMVectorPermute
1228
(
1229
    FXMVECTOR V1, 
1230
    FXMVECTOR V2, 
1231
    FXMVECTOR Control
1232
)
1233
{
1234
#if defined(_XM_NO_INTRINSICS_)
1235
    const BYTE *aByte[2];
1236
    XMVECTOR Result;
1237
    UINT i, uIndex, VectorIndex;
1238
    const BYTE *pControl;
1239
    BYTE *pWork;
1240
 
1241
    // Indices must be in range from 0 to 31
1242
    XMASSERT((Control.vector4_u32[0] & 0xE0E0E0E0) == 0);
1243
    XMASSERT((Control.vector4_u32[1] & 0xE0E0E0E0) == 0);
1244
    XMASSERT((Control.vector4_u32[2] & 0xE0E0E0E0) == 0);
1245
    XMASSERT((Control.vector4_u32[3] & 0xE0E0E0E0) == 0);
1246
 
1247
    // 0-15 = V1, 16-31 = V2
1248
    aByte[0] = (const BYTE*)(&V1);
1249
    aByte[1] = (const BYTE*)(&V2);
1250
    i = 16;
1251
    pControl = (const BYTE *)(&Control);
1252
    pWork = (BYTE *)(&Result);
1253
    do {
1254
        // Get the byte to map from
1255
        uIndex = pControl[0];
1256
        ++pControl;
1257
        VectorIndex = (uIndex>>4)&1;
1258
        uIndex &= 0x0F;
1259
#if defined(_XM_X86_) || defined(_XM_X64_)
1260
        uIndex ^= 3; // Swap byte ordering on little endian machines
1261
#endif
1262
        pWork[0] = aByte[VectorIndex][uIndex];
1263
        ++pWork;
1264
    } while (--i);
1265
    return Result;
1266
#elif defined(_XM_SSE_INTRINSICS_)
1267
#if defined(_PREFAST_) || defined(XMDEBUG)
1268
    // Indices must be in range from 0 to 31
1269
    static const XMVECTORI32 PremuteTest = {0xE0E0E0E0,0xE0E0E0E0,0xE0E0E0E0,0xE0E0E0E0};
1270
    XMVECTOR vAssert = _mm_and_ps(Control,PremuteTest);
1271
    __m128i vAsserti = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&vAssert)[0],g_XMZero);
1272
    XMASSERT(_mm_movemask_ps(*reinterpret_cast<const __m128 *>(&vAsserti)) == 0xf);
1273
#endif
1274
    // Store the vectors onto local memory on the stack
1275
    XMVECTOR Array[2];
1276
    Array[0] = V1;
1277
    Array[1] = V2;
1278
    // Output vector, on the stack
1279
    XMVECTORU8 vResult;
1280
    // Get pointer to the two vectors on the stack
1281
    const BYTE *pInput = reinterpret_cast<const BYTE *>(Array);
1282
    // Store the Control vector on the stack to access the bytes
1283
    // don't use Control, it can cause a register variable to spill on the stack.
1284
    XMVECTORU8 vControl;
1285
    vControl.v = Control;   // Write to memory
1286
    UINT i = 0;
1287
    do {
1288
        UINT ComponentIndex = vControl.u[i] & 0x1FU;
1289
        ComponentIndex ^= 3; // Swap byte ordering
1290
        vResult.u[i] = pInput[ComponentIndex];
1291
    } while (++i<16);
1292
    return vResult;
1293
#else // _XM_SSE_INTRINSICS_
1294
#endif // _XM_VMX128_INTRINSICS_
1295
}
1296
 
1297
//------------------------------------------------------------------------------
1298
// Define a control vector to be used in XMVectorSelect 
1299
// operations.  The four integers specified in XMVectorSelectControl
1300
// serve as indices to select between components in two vectors.
1301
// The first index controls selection for the first component of 
1302
// the vectors involved in a select operation, the second index 
1303
// controls selection for the second component etc.  A value of
1304
// zero for an index causes the corresponding component from the first 
1305
// vector to be selected whereas a one causes the component from the
1306
// second vector to be selected instead.
1307
 
1308
XMFINLINE XMVECTOR XMVectorSelectControl
1309
(
1310
    UINT VectorIndex0, 
1311
    UINT VectorIndex1, 
1312
    UINT VectorIndex2, 
1313
    UINT VectorIndex3
1314
)
1315
{
1316
#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
1317
    // x=Index0,y=Index1,z=Index2,w=Index3
1318
    __m128i vTemp = _mm_set_epi32(VectorIndex3,VectorIndex2,VectorIndex1,VectorIndex0);
1319
    // Any non-zero entries become 0xFFFFFFFF else 0
1320
    vTemp = _mm_cmpgt_epi32(vTemp,g_XMZero);
1321
	return reinterpret_cast<__m128 *>(&vTemp)[0];
1322
#else
1323
    XMVECTOR    ControlVector;
1324
    CONST UINT  ControlElement[] =
1325
                {
1326
                    XM_SELECT_0,
1327
                    XM_SELECT_1
1328
                };
1329
 
1330
    XMASSERT(VectorIndex0 < 2);
1331
    XMASSERT(VectorIndex1 < 2);
1332
    XMASSERT(VectorIndex2 < 2);
1333
    XMASSERT(VectorIndex3 < 2);
1334
 
1335
    ControlVector.vector4_u32[0] = ControlElement[VectorIndex0];
1336
    ControlVector.vector4_u32[1] = ControlElement[VectorIndex1];
1337
    ControlVector.vector4_u32[2] = ControlElement[VectorIndex2];
1338
    ControlVector.vector4_u32[3] = ControlElement[VectorIndex3];
1339
 
1340
    return ControlVector;
1341
 
1342
#endif
1343
}
1344
 
1345
//------------------------------------------------------------------------------
1346
 
1347
XMFINLINE XMVECTOR XMVectorSelect
1348
(
1349
    FXMVECTOR V1, 
1350
    FXMVECTOR V2, 
1351
    FXMVECTOR Control
1352
)
1353
{
1354
#if defined(_XM_NO_INTRINSICS_)
1355
 
1356
    XMVECTOR Result;
1357
 
1358
    Result.vector4_u32[0] = (V1.vector4_u32[0] & ~Control.vector4_u32[0]) | (V2.vector4_u32[0] & Control.vector4_u32[0]);
1359
    Result.vector4_u32[1] = (V1.vector4_u32[1] & ~Control.vector4_u32[1]) | (V2.vector4_u32[1] & Control.vector4_u32[1]);
1360
    Result.vector4_u32[2] = (V1.vector4_u32[2] & ~Control.vector4_u32[2]) | (V2.vector4_u32[2] & Control.vector4_u32[2]);
1361
    Result.vector4_u32[3] = (V1.vector4_u32[3] & ~Control.vector4_u32[3]) | (V2.vector4_u32[3] & Control.vector4_u32[3]);
1362
 
1363
    return Result;
1364
 
1365
#elif defined(_XM_SSE_INTRINSICS_)
1366
	XMVECTOR vTemp1 = _mm_andnot_ps(Control,V1);
1367
    XMVECTOR vTemp2 = _mm_and_ps(V2,Control);
1368
    return _mm_or_ps(vTemp1,vTemp2);
1369
#else // _XM_VMX128_INTRINSICS_
1370
#endif // _XM_VMX128_INTRINSICS_
1371
}
1372
 
1373
//------------------------------------------------------------------------------
1374
 
1375
XMFINLINE XMVECTOR XMVectorMergeXY
1376
(
1377
    FXMVECTOR V1, 
1378
    FXMVECTOR V2
1379
)
1380
{
1381
#if defined(_XM_NO_INTRINSICS_)
1382
 
1383
    XMVECTOR Result;
1384
 
1385
    Result.vector4_u32[0] = V1.vector4_u32[0];
1386
    Result.vector4_u32[1] = V2.vector4_u32[0];
1387
    Result.vector4_u32[2] = V1.vector4_u32[1];
1388
    Result.vector4_u32[3] = V2.vector4_u32[1];
1389
 
1390
    return Result;
1391
 
1392
#elif defined(_XM_SSE_INTRINSICS_)
1393
	return _mm_unpacklo_ps( V1, V2 );
1394
#else // _XM_VMX128_INTRINSICS_
1395
#endif // _XM_VMX128_INTRINSICS_
1396
}
1397
 
1398
//------------------------------------------------------------------------------
1399
 
1400
XMFINLINE XMVECTOR XMVectorMergeZW
1401
(
1402
    FXMVECTOR V1, 
1403
    FXMVECTOR V2
1404
)
1405
{
1406
#if defined(_XM_NO_INTRINSICS_)
1407
 
1408
    XMVECTOR Result;
1409
 
1410
    Result.vector4_u32[0] = V1.vector4_u32[2];
1411
    Result.vector4_u32[1] = V2.vector4_u32[2];
1412
    Result.vector4_u32[2] = V1.vector4_u32[3];
1413
    Result.vector4_u32[3] = V2.vector4_u32[3];
1414
 
1415
    return Result;
1416
 
1417
#elif defined(_XM_SSE_INTRINSICS_)
1418
	return _mm_unpackhi_ps( V1, V2 );
1419
#else // _XM_VMX128_INTRINSICS_
1420
#endif // _XM_VMX128_INTRINSICS_
1421
}
1422
 
1423
//------------------------------------------------------------------------------
1424
// Comparison operations
1425
//------------------------------------------------------------------------------
1426
 
1427
//------------------------------------------------------------------------------
1428
 
1429
XMFINLINE XMVECTOR XMVectorEqual
1430
(
1431
    FXMVECTOR V1, 
1432
    FXMVECTOR V2
1433
)
1434
{
1435
#if defined(_XM_NO_INTRINSICS_)
1436
 
1437
    XMVECTOR Control;
1438
 
1439
    Control.vector4_u32[0] = (V1.vector4_f32[0] == V2.vector4_f32[0]) ? 0xFFFFFFFF : 0;
1440
    Control.vector4_u32[1] = (V1.vector4_f32[1] == V2.vector4_f32[1]) ? 0xFFFFFFFF : 0;
1441
    Control.vector4_u32[2] = (V1.vector4_f32[2] == V2.vector4_f32[2]) ? 0xFFFFFFFF : 0;
1442
    Control.vector4_u32[3] = (V1.vector4_f32[3] == V2.vector4_f32[3]) ? 0xFFFFFFFF : 0;
1443
 
1444
    return Control;
1445
 
1446
#elif defined(_XM_SSE_INTRINSICS_)
1447
	return _mm_cmpeq_ps( V1, V2 );
1448
#else // _XM_VMX128_INTRINSICS_
1449
#endif // _XM_VMX128_INTRINSICS_
1450
}
1451
 
1452
//------------------------------------------------------------------------------
1453
 
1454
XMFINLINE XMVECTOR XMVectorEqualR
1455
(
1456
    UINT*    pCR,
1457
    FXMVECTOR V1, 
1458
    FXMVECTOR V2
1459
)
1460
{
1461
#if defined(_XM_NO_INTRINSICS_)
1462
    UINT ux, uy, uz, uw, CR;
1463
    XMVECTOR Control;
1464
 
1465
    XMASSERT( pCR );
1466
 
1467
    ux = (V1.vector4_f32[0] == V2.vector4_f32[0]) ? 0xFFFFFFFFU : 0;
1468
    uy = (V1.vector4_f32[1] == V2.vector4_f32[1]) ? 0xFFFFFFFFU : 0;
1469
    uz = (V1.vector4_f32[2] == V2.vector4_f32[2]) ? 0xFFFFFFFFU : 0;
1470
    uw = (V1.vector4_f32[3] == V2.vector4_f32[3]) ? 0xFFFFFFFFU : 0;
1471
    CR = 0;
1472
    if (ux&uy&uz&uw)
1473
    {
1474
        // All elements are greater
1475
        CR = XM_CRMASK_CR6TRUE;
1476
    }
1477
    else if (!(ux|uy|uz|uw))
1478
    {
1479
        // All elements are not greater
1480
        CR = XM_CRMASK_CR6FALSE;
1481
    }
1482
    *pCR = CR;
1483
    Control.vector4_u32[0] = ux;
1484
    Control.vector4_u32[1] = uy;
1485
    Control.vector4_u32[2] = uz;
1486
    Control.vector4_u32[3] = uw;
1487
    return Control;
1488
 
1489
#elif defined(_XM_SSE_INTRINSICS_)
1490
    XMASSERT( pCR );
1491
    XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
1492
    UINT CR = 0;
1493
    int iTest = _mm_movemask_ps(vTemp);
1494
    if (iTest==0xf)
1495
    {
1496
        CR = XM_CRMASK_CR6TRUE;
1497
    }
1498
    else if (!iTest)
1499
    {
1500
        // All elements are not greater
1501
        CR = XM_CRMASK_CR6FALSE;
1502
    }
1503
    *pCR = CR;
1504
    return vTemp;
1505
#else // _XM_VMX128_INTRINSICS_
1506
#endif // _XM_VMX128_INTRINSICS_
1507
}
1508
 
1509
//------------------------------------------------------------------------------
1510
// Treat the components of the vectors as unsigned integers and
1511
// compare individual bits between the two.  This is useful for
1512
// comparing control vectors and result vectors returned from
1513
// other comparison operations.
1514
 
1515
XMFINLINE XMVECTOR XMVectorEqualInt
1516
(
1517
    FXMVECTOR V1, 
1518
    FXMVECTOR V2
1519
)
1520
{
1521
#if defined(_XM_NO_INTRINSICS_)
1522
 
1523
    XMVECTOR Control;
1524
 
1525
    Control.vector4_u32[0] = (V1.vector4_u32[0] == V2.vector4_u32[0]) ? 0xFFFFFFFF : 0;
1526
    Control.vector4_u32[1] = (V1.vector4_u32[1] == V2.vector4_u32[1]) ? 0xFFFFFFFF : 0;
1527
    Control.vector4_u32[2] = (V1.vector4_u32[2] == V2.vector4_u32[2]) ? 0xFFFFFFFF : 0;
1528
    Control.vector4_u32[3] = (V1.vector4_u32[3] == V2.vector4_u32[3]) ? 0xFFFFFFFF : 0;
1529
 
1530
    return Control;
1531
 
1532
#elif defined(_XM_SSE_INTRINSICS_)
1533
	__m128i V = _mm_cmpeq_epi32( reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0] );
1534
    return reinterpret_cast<__m128 *>(&V)[0];
1535
#else // _XM_VMX128_INTRINSICS_
1536
#endif // _XM_VMX128_INTRINSICS_
1537
}
1538
 
1539
//------------------------------------------------------------------------------
1540
 
1541
XMFINLINE XMVECTOR XMVectorEqualIntR
1542
(
1543
    UINT*    pCR,
1544
    FXMVECTOR V1, 
1545
    FXMVECTOR V2
1546
)
1547
{
1548
#if defined(_XM_NO_INTRINSICS_)
1549
 
1550
    XMVECTOR Control;
1551
 
1552
    XMASSERT(pCR);
1553
 
1554
    Control = XMVectorEqualInt(V1, V2);
1555
 
1556
    *pCR = 0;
1557
 
1558
    if (XMVector4EqualInt(Control, XMVectorTrueInt()))
1559
    {
1560
        // All elements are equal
1561
        *pCR |= XM_CRMASK_CR6TRUE;
1562
    }
1563
    else if (XMVector4EqualInt(Control, XMVectorFalseInt()))
1564
    {
1565
        // All elements are not equal
1566
        *pCR |= XM_CRMASK_CR6FALSE;
1567
    }
1568
 
1569
    return Control;
1570
 
1571
#elif defined(_XM_SSE_INTRINSICS_)
1572
    XMASSERT(pCR);
1573
    __m128i V = _mm_cmpeq_epi32( reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0] );
1574
    int iTemp = _mm_movemask_ps(reinterpret_cast<const __m128*>(&V)[0]);
1575
    UINT CR = 0;
1576
    if (iTemp==0x0F)
1577
    {
1578
        CR = XM_CRMASK_CR6TRUE;
1579
    }
1580
    else if (!iTemp)
1581
    {
1582
        CR = XM_CRMASK_CR6FALSE;
1583
    }
1584
    *pCR = CR;
1585
    return reinterpret_cast<__m128 *>(&V)[0];
1586
#else // _XM_VMX128_INTRINSICS_
1587
#endif // _XM_VMX128_INTRINSICS_
1588
}
1589
 
1590
//------------------------------------------------------------------------------
1591
 
1592
XMFINLINE XMVECTOR XMVectorNearEqual
1593
(
1594
    FXMVECTOR V1, 
1595
    FXMVECTOR V2, 
1596
    FXMVECTOR Epsilon
1597
)
1598
{
1599
#if defined(_XM_NO_INTRINSICS_)
1600
 
1601
    FLOAT fDeltax, fDeltay, fDeltaz, fDeltaw;
1602
    XMVECTOR Control;
1603
 
1604
    fDeltax = V1.vector4_f32[0]-V2.vector4_f32[0];
1605
    fDeltay = V1.vector4_f32[1]-V2.vector4_f32[1];
1606
    fDeltaz = V1.vector4_f32[2]-V2.vector4_f32[2];
1607
    fDeltaw = V1.vector4_f32[3]-V2.vector4_f32[3];
1608
 
1609
    fDeltax = fabsf(fDeltax);
1610
    fDeltay = fabsf(fDeltay);
1611
    fDeltaz = fabsf(fDeltaz);
1612
    fDeltaw = fabsf(fDeltaw);
1613
 
1614
    Control.vector4_u32[0] = (fDeltax <= Epsilon.vector4_f32[0]) ? 0xFFFFFFFFU : 0;
1615
    Control.vector4_u32[1] = (fDeltay <= Epsilon.vector4_f32[1]) ? 0xFFFFFFFFU : 0;
1616
    Control.vector4_u32[2] = (fDeltaz <= Epsilon.vector4_f32[2]) ? 0xFFFFFFFFU : 0;
1617
    Control.vector4_u32[3] = (fDeltaw <= Epsilon.vector4_f32[3]) ? 0xFFFFFFFFU : 0;
1618
 
1619
    return Control;
1620
 
1621
#elif defined(_XM_SSE_INTRINSICS_)
1622
    // Get the difference
1623
    XMVECTOR vDelta = _mm_sub_ps(V1,V2);
1624
    // Get the absolute value of the difference
1625
    XMVECTOR vTemp = _mm_setzero_ps();
1626
    vTemp = _mm_sub_ps(vTemp,vDelta);
1627
    vTemp = _mm_max_ps(vTemp,vDelta);
1628
    vTemp = _mm_cmple_ps(vTemp,Epsilon);
1629
    return vTemp;
1630
#else // _XM_VMX128_INTRINSICS_
1631
#endif // _XM_VMX128_INTRINSICS_
1632
}
1633
 
1634
//------------------------------------------------------------------------------
1635
 
1636
XMFINLINE XMVECTOR XMVectorNotEqual
1637
(
1638
    FXMVECTOR V1, 
1639
    FXMVECTOR V2
1640
)
1641
{
1642
#if defined(_XM_NO_INTRINSICS_)
1643
 
1644
    XMVECTOR Control;
1645
    Control.vector4_u32[0] = (V1.vector4_f32[0] != V2.vector4_f32[0]) ? 0xFFFFFFFF : 0;
1646
    Control.vector4_u32[1] = (V1.vector4_f32[1] != V2.vector4_f32[1]) ? 0xFFFFFFFF : 0;
1647
    Control.vector4_u32[2] = (V1.vector4_f32[2] != V2.vector4_f32[2]) ? 0xFFFFFFFF : 0;
1648
    Control.vector4_u32[3] = (V1.vector4_f32[3] != V2.vector4_f32[3]) ? 0xFFFFFFFF : 0;
1649
    return Control;
1650
 
1651
#elif defined(_XM_SSE_INTRINSICS_)
1652
	return _mm_cmpneq_ps( V1, V2 );
1653
#else // _XM_VMX128_INTRINSICS_
1654
#endif // _XM_VMX128_INTRINSICS_
1655
}
1656
 
1657
//------------------------------------------------------------------------------
1658
 
1659
XMFINLINE XMVECTOR XMVectorNotEqualInt
1660
(
1661
    FXMVECTOR V1, 
1662
    FXMVECTOR V2
1663
)
1664
{
1665
#if defined(_XM_NO_INTRINSICS_)
1666
 
1667
    XMVECTOR Control;
1668
    Control.vector4_u32[0] = (V1.vector4_u32[0] != V2.vector4_u32[0]) ? 0xFFFFFFFFU : 0;
1669
    Control.vector4_u32[1] = (V1.vector4_u32[1] != V2.vector4_u32[1]) ? 0xFFFFFFFFU : 0;
1670
    Control.vector4_u32[2] = (V1.vector4_u32[2] != V2.vector4_u32[2]) ? 0xFFFFFFFFU : 0;
1671
    Control.vector4_u32[3] = (V1.vector4_u32[3] != V2.vector4_u32[3]) ? 0xFFFFFFFFU : 0;
1672
    return Control;
1673
 
1674
#elif defined(_XM_SSE_INTRINSICS_)
1675
    __m128i V = _mm_cmpeq_epi32( reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0] );
1676
    return _mm_xor_ps(reinterpret_cast<__m128 *>(&V)[0],g_XMNegOneMask);
1677
#else // _XM_VMX128_INTRINSICS_
1678
#endif // _XM_VMX128_INTRINSICS_
1679
}
1680
 
1681
//------------------------------------------------------------------------------
1682
 
1683
XMFINLINE XMVECTOR XMVectorGreater
1684
(
1685
    FXMVECTOR V1, 
1686
    FXMVECTOR V2
1687
)
1688
{
1689
#if defined(_XM_NO_INTRINSICS_)
1690
 
1691
    XMVECTOR Control;
1692
    Control.vector4_u32[0] = (V1.vector4_f32[0] > V2.vector4_f32[0]) ? 0xFFFFFFFF : 0;
1693
    Control.vector4_u32[1] = (V1.vector4_f32[1] > V2.vector4_f32[1]) ? 0xFFFFFFFF : 0;
1694
    Control.vector4_u32[2] = (V1.vector4_f32[2] > V2.vector4_f32[2]) ? 0xFFFFFFFF : 0;
1695
    Control.vector4_u32[3] = (V1.vector4_f32[3] > V2.vector4_f32[3]) ? 0xFFFFFFFF : 0;
1696
    return Control;
1697
 
1698
#elif defined(_XM_SSE_INTRINSICS_)
1699
	return _mm_cmpgt_ps( V1, V2 );
1700
#else // _XM_VMX128_INTRINSICS_
1701
#endif // _XM_VMX128_INTRINSICS_
1702
}
1703
 
1704
//------------------------------------------------------------------------------
1705
 
1706
XMFINLINE XMVECTOR XMVectorGreaterR
1707
(
1708
    UINT*    pCR,
1709
    FXMVECTOR V1, 
1710
    FXMVECTOR V2
1711
)
1712
{
1713
#if defined(_XM_NO_INTRINSICS_)
1714
    UINT ux, uy, uz, uw, CR;
1715
    XMVECTOR Control;
1716
 
1717
    XMASSERT( pCR );
1718
 
1719
    ux = (V1.vector4_f32[0] > V2.vector4_f32[0]) ? 0xFFFFFFFFU : 0;
1720
    uy = (V1.vector4_f32[1] > V2.vector4_f32[1]) ? 0xFFFFFFFFU : 0;
1721
    uz = (V1.vector4_f32[2] > V2.vector4_f32[2]) ? 0xFFFFFFFFU : 0;
1722
    uw = (V1.vector4_f32[3] > V2.vector4_f32[3]) ? 0xFFFFFFFFU : 0;
1723
    CR = 0;
1724
    if (ux&uy&uz&uw)
1725
    {
1726
        // All elements are greater
1727
        CR = XM_CRMASK_CR6TRUE;
1728
    }
1729
    else if (!(ux|uy|uz|uw))
1730
    {
1731
        // All elements are not greater
1732
        CR = XM_CRMASK_CR6FALSE;
1733
    }
1734
    *pCR = CR;
1735
    Control.vector4_u32[0] = ux;
1736
    Control.vector4_u32[1] = uy;
1737
    Control.vector4_u32[2] = uz;
1738
    Control.vector4_u32[3] = uw;
1739
    return Control;
1740
 
1741
#elif defined(_XM_SSE_INTRINSICS_)
1742
    XMASSERT( pCR );
1743
    XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
1744
    UINT CR = 0;
1745
    int iTest = _mm_movemask_ps(vTemp);
1746
    if (iTest==0xf)
1747
    {
1748
        CR = XM_CRMASK_CR6TRUE;
1749
    }
1750
    else if (!iTest)
1751
    {
1752
        // All elements are not greater
1753
        CR = XM_CRMASK_CR6FALSE;
1754
    }
1755
    *pCR = CR;
1756
    return vTemp;
1757
#else // _XM_VMX128_INTRINSICS_
1758
#endif // _XM_VMX128_INTRINSICS_
1759
}
1760
 
1761
//------------------------------------------------------------------------------
1762
 
1763
XMFINLINE XMVECTOR XMVectorGreaterOrEqual
1764
(
1765
    FXMVECTOR V1, 
1766
    FXMVECTOR V2
1767
)
1768
{
1769
#if defined(_XM_NO_INTRINSICS_)
1770
 
1771
    XMVECTOR Control;
1772
    Control.vector4_u32[0] = (V1.vector4_f32[0] >= V2.vector4_f32[0]) ? 0xFFFFFFFF : 0;
1773
    Control.vector4_u32[1] = (V1.vector4_f32[1] >= V2.vector4_f32[1]) ? 0xFFFFFFFF : 0;
1774
    Control.vector4_u32[2] = (V1.vector4_f32[2] >= V2.vector4_f32[2]) ? 0xFFFFFFFF : 0;
1775
    Control.vector4_u32[3] = (V1.vector4_f32[3] >= V2.vector4_f32[3]) ? 0xFFFFFFFF : 0;
1776
    return Control;
1777
 
1778
#elif defined(_XM_SSE_INTRINSICS_)
1779
    return _mm_cmpge_ps( V1, V2 );
1780
#else // _XM_VMX128_INTRINSICS_
1781
#endif // _XM_VMX128_INTRINSICS_
1782
}
1783
 
1784
//------------------------------------------------------------------------------
1785
 
1786
XMFINLINE XMVECTOR XMVectorGreaterOrEqualR
1787
(
1788
    UINT*    pCR,
1789
    FXMVECTOR V1, 
1790
    FXMVECTOR V2
1791
)
1792
{
1793
#if defined(_XM_NO_INTRINSICS_)
1794
    UINT ux, uy, uz, uw, CR;
1795
    XMVECTOR Control;
1796
 
1797
    XMASSERT( pCR );
1798
 
1799
    ux = (V1.vector4_f32[0] >= V2.vector4_f32[0]) ? 0xFFFFFFFFU : 0;
1800
    uy = (V1.vector4_f32[1] >= V2.vector4_f32[1]) ? 0xFFFFFFFFU : 0;
1801
    uz = (V1.vector4_f32[2] >= V2.vector4_f32[2]) ? 0xFFFFFFFFU : 0;
1802
    uw = (V1.vector4_f32[3] >= V2.vector4_f32[3]) ? 0xFFFFFFFFU : 0;
1803
    CR = 0;
1804
    if (ux&uy&uz&uw)
1805
    {
1806
        // All elements are greater
1807
        CR = XM_CRMASK_CR6TRUE;
1808
    }
1809
    else if (!(ux|uy|uz|uw))
1810
    {
1811
        // All elements are not greater
1812
        CR = XM_CRMASK_CR6FALSE;
1813
    }
1814
    *pCR = CR;
1815
    Control.vector4_u32[0] = ux;
1816
    Control.vector4_u32[1] = uy;
1817
    Control.vector4_u32[2] = uz;
1818
    Control.vector4_u32[3] = uw;
1819
    return Control;
1820
 
1821
#elif defined(_XM_SSE_INTRINSICS_)
1822
    XMASSERT( pCR );
1823
    XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
1824
    UINT CR = 0;
1825
    int iTest = _mm_movemask_ps(vTemp);
1826
    if (iTest==0xf)
1827
    {
1828
        CR = XM_CRMASK_CR6TRUE;
1829
    }
1830
    else if (!iTest)
1831
    {
1832
        // All elements are not greater
1833
        CR = XM_CRMASK_CR6FALSE;
1834
    }
1835
    *pCR = CR;
1836
    return vTemp;
1837
#else // _XM_VMX128_INTRINSICS_
1838
#endif // _XM_VMX128_INTRINSICS_
1839
}
1840
 
1841
//------------------------------------------------------------------------------
1842
 
1843
XMFINLINE XMVECTOR XMVectorLess
1844
(
1845
    FXMVECTOR V1, 
1846
    FXMVECTOR V2
1847
)
1848
{
1849
#if defined(_XM_NO_INTRINSICS_)
1850
 
1851
    XMVECTOR Control;
1852
    Control.vector4_u32[0] = (V1.vector4_f32[0] < V2.vector4_f32[0]) ? 0xFFFFFFFF : 0;
1853
    Control.vector4_u32[1] = (V1.vector4_f32[1] < V2.vector4_f32[1]) ? 0xFFFFFFFF : 0;
1854
    Control.vector4_u32[2] = (V1.vector4_f32[2] < V2.vector4_f32[2]) ? 0xFFFFFFFF : 0;
1855
    Control.vector4_u32[3] = (V1.vector4_f32[3] < V2.vector4_f32[3]) ? 0xFFFFFFFF : 0;
1856
    return Control;
1857
 
1858
#elif defined(_XM_SSE_INTRINSICS_)
1859
    return _mm_cmplt_ps( V1, V2 );
1860
#else // _XM_VMX128_INTRINSICS_
1861
#endif // _XM_VMX128_INTRINSICS_
1862
}
1863
 
1864
//------------------------------------------------------------------------------
1865
 
1866
XMFINLINE XMVECTOR XMVectorLessOrEqual
1867
(
1868
    FXMVECTOR V1, 
1869
    FXMVECTOR V2
1870
)
1871
{
1872
#if defined(_XM_NO_INTRINSICS_)
1873
 
1874
    XMVECTOR Control;
1875
    Control.vector4_u32[0] = (V1.vector4_f32[0] <= V2.vector4_f32[0]) ? 0xFFFFFFFF : 0;
1876
    Control.vector4_u32[1] = (V1.vector4_f32[1] <= V2.vector4_f32[1]) ? 0xFFFFFFFF : 0;
1877
    Control.vector4_u32[2] = (V1.vector4_f32[2] <= V2.vector4_f32[2]) ? 0xFFFFFFFF : 0;
1878
    Control.vector4_u32[3] = (V1.vector4_f32[3] <= V2.vector4_f32[3]) ? 0xFFFFFFFF : 0;
1879
    return Control;
1880
 
1881
#elif defined(_XM_SSE_INTRINSICS_)
1882
    return _mm_cmple_ps( V1, V2 );
1883
#else // _XM_VMX128_INTRINSICS_
1884
#endif // _XM_VMX128_INTRINSICS_
1885
}
1886
 
1887
//------------------------------------------------------------------------------
1888
 
1889
XMFINLINE XMVECTOR XMVectorInBounds
1890
(
1891
    FXMVECTOR V, 
1892
    FXMVECTOR Bounds
1893
)
1894
{
1895
#if defined(_XM_NO_INTRINSICS_)
1896
 
1897
    XMVECTOR Control;
1898
    Control.vector4_u32[0] = (V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) ? 0xFFFFFFFF : 0;
1899
    Control.vector4_u32[1] = (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) ? 0xFFFFFFFF : 0;
1900
    Control.vector4_u32[2] = (V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2]) ? 0xFFFFFFFF : 0;
1901
    Control.vector4_u32[3] = (V.vector4_f32[3] <= Bounds.vector4_f32[3] && V.vector4_f32[3] >= -Bounds.vector4_f32[3]) ? 0xFFFFFFFF : 0;
1902
    return Control;
1903
 
1904
#elif defined(_XM_SSE_INTRINSICS_)
1905
    // Test if less than or equal
1906
    XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
1907
    // Negate the bounds
1908
    XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
1909
    // Test if greater or equal (Reversed)
1910
    vTemp2 = _mm_cmple_ps(vTemp2,V);
1911
    // Blend answers
1912
    vTemp1 = _mm_and_ps(vTemp1,vTemp2);
1913
    return vTemp1;
1914
#else // _XM_VMX128_INTRINSICS_
1915
#endif // _XM_VMX128_INTRINSICS_
1916
}
1917
 
1918
//------------------------------------------------------------------------------
1919
 
1920
XMFINLINE XMVECTOR XMVectorInBoundsR
1921
(
1922
    UINT*    pCR,
1923
    FXMVECTOR V, 
1924
    FXMVECTOR Bounds
1925
)
1926
{
1927
#if defined(_XM_NO_INTRINSICS_)
1928
    UINT ux, uy, uz, uw, CR;
1929
    XMVECTOR Control;
1930
 
1931
    XMASSERT( pCR != 0 );
1932
 
1933
    ux = (V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) ? 0xFFFFFFFFU : 0;
1934
    uy = (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) ? 0xFFFFFFFFU : 0;
1935
    uz = (V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2]) ? 0xFFFFFFFFU : 0;
1936
    uw = (V.vector4_f32[3] <= Bounds.vector4_f32[3] && V.vector4_f32[3] >= -Bounds.vector4_f32[3]) ? 0xFFFFFFFFU : 0;
1937
 
1938
    CR = 0;
1939
 
1940
    if (ux&uy&uz&uw)
1941
    {
1942
        // All elements are in bounds
1943
        CR = XM_CRMASK_CR6BOUNDS;
1944
    }
1945
    *pCR = CR;
1946
    Control.vector4_u32[0] = ux;
1947
    Control.vector4_u32[1] = uy;
1948
    Control.vector4_u32[2] = uz;
1949
    Control.vector4_u32[3] = uw;
1950
    return Control;
1951
 
1952
#elif defined(_XM_SSE_INTRINSICS_)
1953
    XMASSERT( pCR != 0 );
1954
    // Test if less than or equal
1955
    XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
1956
    // Negate the bounds
1957
    XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
1958
    // Test if greater or equal (Reversed)
1959
    vTemp2 = _mm_cmple_ps(vTemp2,V);
1960
    // Blend answers
1961
    vTemp1 = _mm_and_ps(vTemp1,vTemp2);
1962
 
1963
    UINT CR = 0;
1964
    if (_mm_movemask_ps(vTemp1)==0xf) {
1965
        // All elements are in bounds
1966
        CR = XM_CRMASK_CR6BOUNDS;
1967
    }
1968
    *pCR = CR;
1969
    return vTemp1;
1970
#else // _XM_VMX128_INTRINSICS_
1971
#endif // _XM_VMX128_INTRINSICS_
1972
}
1973
 
1974
//------------------------------------------------------------------------------
1975
 
1976
XMFINLINE XMVECTOR XMVectorIsNaN
1977
(
1978
    FXMVECTOR V
1979
)
1980
{
1981
#if defined(_XM_NO_INTRINSICS_)
1982
 
1983
    XMVECTOR Control;
1984
    Control.vector4_u32[0] = XMISNAN(V.vector4_f32[0]) ? 0xFFFFFFFFU : 0;
1985
    Control.vector4_u32[1] = XMISNAN(V.vector4_f32[1]) ? 0xFFFFFFFFU : 0;
1986
    Control.vector4_u32[2] = XMISNAN(V.vector4_f32[2]) ? 0xFFFFFFFFU : 0;
1987
    Control.vector4_u32[3] = XMISNAN(V.vector4_f32[3]) ? 0xFFFFFFFFU : 0;
1988
    return Control;
1989
 
1990
#elif defined(_XM_SSE_INTRINSICS_)
1991
    // Mask off the exponent
1992
    __m128i vTempInf = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMInfinity);
1993
    // Mask off the mantissa
1994
    __m128i vTempNan = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMQNaNTest);
1995
    // Are any of the exponents == 0x7F800000?
1996
    vTempInf = _mm_cmpeq_epi32(vTempInf,g_XMInfinity);
1997
    // Are any of the mantissa's zero? (SSE2 doesn't have a neq test)
1998
    vTempNan = _mm_cmpeq_epi32(vTempNan,g_XMZero);
1999
    // Perform a not on the NaN test to be true on NON-zero mantissas
2000
    vTempNan = _mm_andnot_si128(vTempNan,vTempInf);
2001
    // If any are NaN, the signs are true after the merge above
2002
    return reinterpret_cast<const XMVECTOR *>(&vTempNan)[0];
2003
#else // _XM_VMX128_INTRINSICS_
2004
#endif // _XM_VMX128_INTRINSICS_
2005
}
2006
 
2007
//------------------------------------------------------------------------------
2008
 
2009
XMFINLINE XMVECTOR XMVectorIsInfinite
2010
(
2011
    FXMVECTOR V
2012
)
2013
{
2014
#if defined(_XM_NO_INTRINSICS_)
2015
 
2016
    XMVECTOR Control;
2017
    Control.vector4_u32[0] = XMISINF(V.vector4_f32[0]) ? 0xFFFFFFFFU : 0;
2018
    Control.vector4_u32[1] = XMISINF(V.vector4_f32[1]) ? 0xFFFFFFFFU : 0;
2019
    Control.vector4_u32[2] = XMISINF(V.vector4_f32[2]) ? 0xFFFFFFFFU : 0;
2020
    Control.vector4_u32[3] = XMISINF(V.vector4_f32[3]) ? 0xFFFFFFFFU : 0;
2021
    return Control;
2022
 
2023
#elif defined(_XM_SSE_INTRINSICS_)
2024
    // Mask off the sign bit
2025
    __m128 vTemp = _mm_and_ps(V,g_XMAbsMask);
2026
    // Compare to infinity
2027
    vTemp = _mm_cmpeq_ps(vTemp,g_XMInfinity);
2028
    // If any are infinity, the signs are true.
2029
    return vTemp;
2030
#else // _XM_VMX128_INTRINSICS_
2031
#endif // _XM_VMX128_INTRINSICS_
2032
}
2033
 
2034
//------------------------------------------------------------------------------
2035
// Rounding and clamping operations
2036
//------------------------------------------------------------------------------
2037
 
2038
//------------------------------------------------------------------------------
2039
 
2040
XMFINLINE XMVECTOR XMVectorMin
2041
(
2042
    FXMVECTOR V1, 
2043
    FXMVECTOR V2
2044
)
2045
{
2046
#if defined(_XM_NO_INTRINSICS_)
2047
 
2048
    XMVECTOR Result;
2049
    Result.vector4_f32[0] = (V1.vector4_f32[0] < V2.vector4_f32[0]) ? V1.vector4_f32[0] : V2.vector4_f32[0];
2050
    Result.vector4_f32[1] = (V1.vector4_f32[1] < V2.vector4_f32[1]) ? V1.vector4_f32[1] : V2.vector4_f32[1];
2051
    Result.vector4_f32[2] = (V1.vector4_f32[2] < V2.vector4_f32[2]) ? V1.vector4_f32[2] : V2.vector4_f32[2];
2052
    Result.vector4_f32[3] = (V1.vector4_f32[3] < V2.vector4_f32[3]) ? V1.vector4_f32[3] : V2.vector4_f32[3];
2053
    return Result;
2054
 
2055
#elif defined(_XM_SSE_INTRINSICS_)
2056
	return _mm_min_ps( V1, V2 );
2057
#else // _XM_VMX128_INTRINSICS_
2058
#endif // _XM_VMX128_INTRINSICS_
2059
}
2060
 
2061
//------------------------------------------------------------------------------
2062
 
2063
XMFINLINE XMVECTOR XMVectorMax
2064
(
2065
    FXMVECTOR V1, 
2066
    FXMVECTOR V2
2067
)
2068
{
2069
#if defined(_XM_NO_INTRINSICS_)
2070
 
2071
    XMVECTOR Result;
2072
    Result.vector4_f32[0] = (V1.vector4_f32[0] > V2.vector4_f32[0]) ? V1.vector4_f32[0] : V2.vector4_f32[0];
2073
    Result.vector4_f32[1] = (V1.vector4_f32[1] > V2.vector4_f32[1]) ? V1.vector4_f32[1] : V2.vector4_f32[1];
2074
    Result.vector4_f32[2] = (V1.vector4_f32[2] > V2.vector4_f32[2]) ? V1.vector4_f32[2] : V2.vector4_f32[2];
2075
    Result.vector4_f32[3] = (V1.vector4_f32[3] > V2.vector4_f32[3]) ? V1.vector4_f32[3] : V2.vector4_f32[3];
2076
    return Result;
2077
 
2078
#elif defined(_XM_SSE_INTRINSICS_)
2079
	return _mm_max_ps( V1, V2 );
2080
#else // _XM_VMX128_INTRINSICS_
2081
#endif // _XM_VMX128_INTRINSICS_
2082
}
2083
 
2084
//------------------------------------------------------------------------------
2085
 
2086
XMFINLINE XMVECTOR XMVectorRound
2087
(
2088
    FXMVECTOR V
2089
)
2090
{
2091
#if defined(_XM_NO_INTRINSICS_)
2092
 
2093
    XMVECTOR       Result;
2094
    XMVECTOR       Bias;
2095
    CONST XMVECTOR Zero = XMVectorZero();
2096
    CONST XMVECTOR BiasPos = XMVectorReplicate(0.5f);
2097
    CONST XMVECTOR BiasNeg = XMVectorReplicate(-0.5f);
2098
 
2099
    Bias = XMVectorLess(V, Zero);
2100
    Bias = XMVectorSelect(BiasPos, BiasNeg, Bias);
2101
    Result = XMVectorAdd(V, Bias);
2102
    Result = XMVectorTruncate(Result);
2103
 
2104
    return Result;
2105
 
2106
#elif defined(_XM_SSE_INTRINSICS_)
2107
    // To handle NAN, INF and numbers greater than 8388608, use masking
2108
    // Get the abs value
2109
    __m128i vTest = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMAbsMask);
2110
    // Test for greater than 8388608 (All floats with NO fractionals, NAN and INF
2111
    vTest = _mm_cmplt_epi32(vTest,g_XMNoFraction);
2112
    // Convert to int and back to float for rounding
2113
    __m128i vInt = _mm_cvtps_epi32(V);
2114
    // Convert back to floats
2115
    XMVECTOR vResult = _mm_cvtepi32_ps(vInt);
2116
    // All numbers less than 8388608 will use the round to int
2117
    vResult = _mm_and_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
2118
    // All others, use the ORIGINAL value
2119
    vTest = _mm_andnot_si128(vTest,reinterpret_cast<const __m128i *>(&V)[0]);
2120
    vResult = _mm_or_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
2121
    return vResult;
2122
#else // _XM_VMX128_INTRINSICS_
2123
#endif // _XM_VMX128_INTRINSICS_
2124
}
2125
 
2126
//------------------------------------------------------------------------------
2127
 
2128
XMFINLINE XMVECTOR XMVectorTruncate
2129
(
2130
    FXMVECTOR V
2131
)
2132
{
2133
#if defined(_XM_NO_INTRINSICS_)
2134
    XMVECTOR Result;
2135
    Result.vector4_f32[0] = (FLOAT)((INT)V.vector4_f32[0]);
2136
    Result.vector4_f32[1] = (FLOAT)((INT)V.vector4_f32[1]);
2137
    Result.vector4_f32[2] = (FLOAT)((INT)V.vector4_f32[2]);
2138
    Result.vector4_f32[3] = (FLOAT)((INT)V.vector4_f32[3]);
2139
 
2140
    return Result;
2141
 
2142
#elif defined(_XM_SSE_INTRINSICS_)
2143
    // To handle NAN, INF and numbers greater than 8388608, use masking
2144
    // Get the abs value
2145
    __m128i vTest = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMAbsMask);
2146
    // Test for greater than 8388608 (All floats with NO fractionals, NAN and INF
2147
    vTest = _mm_cmplt_epi32(vTest,g_XMNoFraction);
2148
    // Convert to int and back to float for rounding with truncation
2149
    __m128i vInt = _mm_cvttps_epi32(V);
2150
    // Convert back to floats
2151
    XMVECTOR vResult = _mm_cvtepi32_ps(vInt);
2152
    // All numbers less than 8388608 will use the round to int
2153
    vResult = _mm_and_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
2154
    // All others, use the ORIGINAL value
2155
    vTest = _mm_andnot_si128(vTest,reinterpret_cast<const __m128i *>(&V)[0]);
2156
    vResult = _mm_or_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
2157
    return vResult;
2158
#else // _XM_VMX128_INTRINSICS_
2159
#endif // _XM_VMX128_INTRINSICS_
2160
}
2161
 
2162
//------------------------------------------------------------------------------
2163
 
2164
XMFINLINE XMVECTOR XMVectorFloor
2165
(
2166
    FXMVECTOR V
2167
)
2168
{
2169
#if defined(_XM_NO_INTRINSICS_)
2170
 
2171
    XMVECTOR vResult = {
2172
        floorf(V.vector4_f32[0]),
2173
        floorf(V.vector4_f32[1]),
2174
        floorf(V.vector4_f32[2]),
2175
        floorf(V.vector4_f32[3])
2176
    };
2177
    return vResult;
2178
 
2179
#elif defined(_XM_SSE_INTRINSICS_)
2180
    XMVECTOR vResult = _mm_sub_ps(V,g_XMOneHalfMinusEpsilon);
2181
    __m128i vInt = _mm_cvtps_epi32(vResult);
2182
    vResult = _mm_cvtepi32_ps(vInt);
2183
	return vResult;
2184
#else // _XM_VMX128_INTRINSICS_
2185
#endif // _XM_VMX128_INTRINSICS_
2186
}
2187
 
2188
//------------------------------------------------------------------------------
2189
 
2190
XMFINLINE XMVECTOR XMVectorCeiling
2191
(
2192
    FXMVECTOR V
2193
)
2194
{
2195
#if defined(_XM_NO_INTRINSICS_)
2196
    XMVECTOR vResult = {
2197
        ceilf(V.vector4_f32[0]),
2198
        ceilf(V.vector4_f32[1]),
2199
        ceilf(V.vector4_f32[2]),
2200
        ceilf(V.vector4_f32[3])
2201
    };
2202
    return vResult;
2203
 
2204
#elif defined(_XM_SSE_INTRINSICS_)
2205
    XMVECTOR vResult = _mm_add_ps(V,g_XMOneHalfMinusEpsilon);
2206
    __m128i vInt = _mm_cvtps_epi32(vResult);
2207
    vResult = _mm_cvtepi32_ps(vInt);
2208
	return vResult;
2209
#else // _XM_VMX128_INTRINSICS_
2210
#endif // _XM_VMX128_INTRINSICS_
2211
}
2212
 
2213
//------------------------------------------------------------------------------
2214
 
2215
XMFINLINE XMVECTOR XMVectorClamp
2216
(
2217
    FXMVECTOR V, 
2218
    FXMVECTOR Min, 
2219
    FXMVECTOR Max
2220
)
2221
{
2222
#if defined(_XM_NO_INTRINSICS_)
2223
 
2224
    XMVECTOR Result;
2225
 
2226
    XMASSERT(XMVector4LessOrEqual(Min, Max));
2227
 
2228
    Result = XMVectorMax(Min, V);
2229
    Result = XMVectorMin(Max, Result);
2230
 
2231
    return Result;
2232
 
2233
#elif defined(_XM_SSE_INTRINSICS_)
2234
	XMVECTOR vResult;
2235
	XMASSERT(XMVector4LessOrEqual(Min, Max));
2236
	vResult = _mm_max_ps(Min,V);
2237
	vResult = _mm_min_ps(vResult,Max);
2238
	return vResult;
2239
#else // _XM_VMX128_INTRINSICS_
2240
#endif // _XM_VMX128_INTRINSICS_
2241
}
2242
 
2243
//------------------------------------------------------------------------------
2244
 
2245
XMFINLINE XMVECTOR XMVectorSaturate
2246
(
2247
    FXMVECTOR V
2248
)
2249
{
2250
#if defined(_XM_NO_INTRINSICS_)
2251
 
2252
    CONST XMVECTOR Zero = XMVectorZero();
2253
 
2254
    return XMVectorClamp(V, Zero, g_XMOne.v);
2255
 
2256
#elif defined(_XM_SSE_INTRINSICS_)
2257
    // Set <0 to 0
2258
    XMVECTOR vResult = _mm_max_ps(V,g_XMZero);
2259
    // Set>1 to 1
2260
    return _mm_min_ps(vResult,g_XMOne);
2261
#else // _XM_VMX128_INTRINSICS_
2262
#endif // _XM_VMX128_INTRINSICS_
2263
}
2264
 
2265
//------------------------------------------------------------------------------
2266
// Bitwise logical operations
2267
//------------------------------------------------------------------------------
2268
 
2269
XMFINLINE XMVECTOR XMVectorAndInt
2270
(
2271
    FXMVECTOR V1,
2272
    FXMVECTOR V2
2273
)
2274
{
2275
#if defined(_XM_NO_INTRINSICS_)
2276
 
2277
    XMVECTOR Result;
2278
 
2279
    Result.vector4_u32[0] = V1.vector4_u32[0] & V2.vector4_u32[0];
2280
    Result.vector4_u32[1] = V1.vector4_u32[1] & V2.vector4_u32[1];
2281
    Result.vector4_u32[2] = V1.vector4_u32[2] & V2.vector4_u32[2];
2282
    Result.vector4_u32[3] = V1.vector4_u32[3] & V2.vector4_u32[3];
2283
    return Result;
2284
 
2285
#elif defined(_XM_SSE_INTRINSICS_)
2286
    return _mm_and_ps(V1,V2);
2287
#else // _XM_VMX128_INTRINSICS_
2288
#endif // _XM_VMX128_INTRINSICS_
2289
}
2290
 
2291
//------------------------------------------------------------------------------
2292
 
2293
XMFINLINE XMVECTOR XMVectorAndCInt
2294
(
2295
    FXMVECTOR V1,
2296
    FXMVECTOR V2
2297
)
2298
{
2299
#if defined(_XM_NO_INTRINSICS_)
2300
 
2301
    XMVECTOR Result;
2302
 
2303
    Result.vector4_u32[0] = V1.vector4_u32[0] & ~V2.vector4_u32[0];
2304
    Result.vector4_u32[1] = V1.vector4_u32[1] & ~V2.vector4_u32[1];
2305
    Result.vector4_u32[2] = V1.vector4_u32[2] & ~V2.vector4_u32[2];
2306
    Result.vector4_u32[3] = V1.vector4_u32[3] & ~V2.vector4_u32[3];
2307
 
2308
    return Result;
2309
 
2310
#elif defined(_XM_SSE_INTRINSICS_)
2311
    __m128i V = _mm_andnot_si128( reinterpret_cast<const __m128i *>(&V2)[0], reinterpret_cast<const __m128i *>(&V1)[0] );
2312
    return reinterpret_cast<__m128 *>(&V)[0];
2313
#else // _XM_VMX128_INTRINSICS_
2314
#endif // _XM_VMX128_INTRINSICS_
2315
}
2316
 
2317
//------------------------------------------------------------------------------
2318
 
2319
XMFINLINE XMVECTOR XMVectorOrInt
2320
(
2321
    FXMVECTOR V1,
2322
    FXMVECTOR V2
2323
)
2324
{
2325
#if defined(_XM_NO_INTRINSICS_)
2326
 
2327
    XMVECTOR Result;
2328
 
2329
    Result.vector4_u32[0] = V1.vector4_u32[0] | V2.vector4_u32[0];
2330
    Result.vector4_u32[1] = V1.vector4_u32[1] | V2.vector4_u32[1];
2331
    Result.vector4_u32[2] = V1.vector4_u32[2] | V2.vector4_u32[2];
2332
    Result.vector4_u32[3] = V1.vector4_u32[3] | V2.vector4_u32[3];
2333
 
2334
    return Result;
2335
 
2336
#elif defined(_XM_SSE_INTRINSICS_)
2337
    __m128i V = _mm_or_si128( reinterpret_cast<const __m128i *>(&V1)[0], reinterpret_cast<const __m128i *>(&V2)[0] );
2338
    return reinterpret_cast<__m128 *>(&V)[0];
2339
#else // _XM_VMX128_INTRINSICS_
2340
#endif // _XM_VMX128_INTRINSICS_
2341
}
2342
 
2343
//------------------------------------------------------------------------------
2344
 
2345
XMFINLINE XMVECTOR XMVectorNorInt
2346
(
2347
    FXMVECTOR V1,
2348
    FXMVECTOR V2
2349
)
2350
{
2351
#if defined(_XM_NO_INTRINSICS_)
2352
 
2353
    XMVECTOR Result;
2354
 
2355
    Result.vector4_u32[0] = ~(V1.vector4_u32[0] | V2.vector4_u32[0]);
2356
    Result.vector4_u32[1] = ~(V1.vector4_u32[1] | V2.vector4_u32[1]);
2357
    Result.vector4_u32[2] = ~(V1.vector4_u32[2] | V2.vector4_u32[2]);
2358
    Result.vector4_u32[3] = ~(V1.vector4_u32[3] | V2.vector4_u32[3]);
2359
 
2360
    return Result;
2361
 
2362
#elif defined(_XM_SSE_INTRINSICS_)
2363
    __m128i Result;
2364
    Result = _mm_or_si128( reinterpret_cast<const __m128i *>(&V1)[0], reinterpret_cast<const __m128i *>(&V2)[0] );
2365
    Result = _mm_andnot_si128( Result,g_XMNegOneMask);
2366
    return reinterpret_cast<__m128 *>(&Result)[0];
2367
#else // _XM_VMX128_INTRINSICS_
2368
#endif // _XM_VMX128_INTRINSICS_
2369
}
2370
 
2371
//------------------------------------------------------------------------------
2372
 
2373
XMFINLINE XMVECTOR XMVectorXorInt
2374
(
2375
    FXMVECTOR V1,
2376
    FXMVECTOR V2
2377
)
2378
{
2379
#if defined(_XM_NO_INTRINSICS_)
2380
 
2381
    XMVECTOR Result;
2382
 
2383
    Result.vector4_u32[0] = V1.vector4_u32[0] ^ V2.vector4_u32[0];
2384
    Result.vector4_u32[1] = V1.vector4_u32[1] ^ V2.vector4_u32[1];
2385
    Result.vector4_u32[2] = V1.vector4_u32[2] ^ V2.vector4_u32[2];
2386
    Result.vector4_u32[3] = V1.vector4_u32[3] ^ V2.vector4_u32[3];
2387
 
2388
    return Result;
2389
 
2390
#elif defined(_XM_SSE_INTRINSICS_)
2391
	__m128i V = _mm_xor_si128( reinterpret_cast<const __m128i *>(&V1)[0], reinterpret_cast<const __m128i *>(&V2)[0] );
2392
    return reinterpret_cast<__m128 *>(&V)[0];
2393
#else // _XM_VMX128_INTRINSICS_
2394
#endif // _XM_VMX128_INTRINSICS_
2395
}
2396
 
2397
//------------------------------------------------------------------------------
2398
// Computation operations
2399
//------------------------------------------------------------------------------
2400
 
2401
//------------------------------------------------------------------------------
2402
 
2403
XMFINLINE XMVECTOR XMVectorNegate
2404
(
2405
    FXMVECTOR V
2406
)
2407
{
2408
#if defined(_XM_NO_INTRINSICS_)
2409
 
2410
    XMVECTOR Result;
2411
 
2412
    Result.vector4_f32[0] = -V.vector4_f32[0];
2413
    Result.vector4_f32[1] = -V.vector4_f32[1];
2414
    Result.vector4_f32[2] = -V.vector4_f32[2];
2415
    Result.vector4_f32[3] = -V.vector4_f32[3];
2416
 
2417
    return Result;
2418
 
2419
#elif defined(_XM_SSE_INTRINSICS_)
2420
	XMVECTOR Z;
2421
 
2422
	Z = _mm_setzero_ps();
2423
 
2424
	return _mm_sub_ps( Z, V );
2425
#else // _XM_VMX128_INTRINSICS_
2426
#endif // _XM_VMX128_INTRINSICS_
2427
}
2428
 
2429
//------------------------------------------------------------------------------
2430
 
2431
XMFINLINE XMVECTOR XMVectorAdd
2432
(
2433
    FXMVECTOR V1, 
2434
    FXMVECTOR V2
2435
)
2436
{
2437
#if defined(_XM_NO_INTRINSICS_)
2438
 
2439
    XMVECTOR Result;
2440
 
2441
    Result.vector4_f32[0] = V1.vector4_f32[0] + V2.vector4_f32[0];
2442
    Result.vector4_f32[1] = V1.vector4_f32[1] + V2.vector4_f32[1];
2443
    Result.vector4_f32[2] = V1.vector4_f32[2] + V2.vector4_f32[2];
2444
    Result.vector4_f32[3] = V1.vector4_f32[3] + V2.vector4_f32[3];
2445
 
2446
    return Result;
2447
 
2448
#elif defined(_XM_SSE_INTRINSICS_)
2449
	return _mm_add_ps( V1, V2 );
2450
#else // _XM_VMX128_INTRINSICS_
2451
#endif // _XM_VMX128_INTRINSICS_
2452
}
2453
 
2454
//------------------------------------------------------------------------------
2455
 
2456
XMFINLINE XMVECTOR XMVectorAddAngles
2457
(
2458
    FXMVECTOR V1, 
2459
    FXMVECTOR V2
2460
)
2461
{
2462
#if defined(_XM_NO_INTRINSICS_)
2463
 
2464
    XMVECTOR       Mask;
2465
    XMVECTOR       Offset;
2466
    XMVECTOR       Result;
2467
    CONST XMVECTOR Zero = XMVectorZero();
2468
 
2469
    // Add the given angles together.  If the range of V1 is such
2470
    // that -Pi <= V1 < Pi and the range of V2 is such that
2471
    // -2Pi <= V2 <= 2Pi, then the range of the resulting angle
2472
    // will be -Pi <= Result < Pi.
2473
    Result = XMVectorAdd(V1, V2);
2474
 
2475
    Mask = XMVectorLess(Result, g_XMNegativePi.v);
2476
    Offset = XMVectorSelect(Zero, g_XMTwoPi.v, Mask);
2477
 
2478
    Mask = XMVectorGreaterOrEqual(Result, g_XMPi.v);
2479
    Offset = XMVectorSelect(Offset, g_XMNegativeTwoPi.v, Mask);
2480
 
2481
    Result = XMVectorAdd(Result, Offset);
2482
 
2483
    return Result;
2484
 
2485
#elif defined(_XM_SSE_INTRINSICS_)
2486
    // Adjust the angles
2487
    XMVECTOR vResult = _mm_add_ps(V1,V2);
2488
    // Less than Pi?
2489
    XMVECTOR vOffset = _mm_cmplt_ps(vResult,g_XMNegativePi);
2490
    vOffset = _mm_and_ps(vOffset,g_XMTwoPi);
2491
    // Add 2Pi to all entries less than -Pi
2492
    vResult = _mm_add_ps(vResult,vOffset);
2493
    // Greater than or equal to Pi?
2494
    vOffset = _mm_cmpge_ps(vResult,g_XMPi);
2495
    vOffset = _mm_and_ps(vOffset,g_XMTwoPi);
2496
    // Sub 2Pi to all entries greater than Pi
2497
    vResult = _mm_sub_ps(vResult,vOffset);
2498
    return vResult;
2499
#else // _XM_VMX128_INTRINSICS_
2500
#endif // _XM_VMX128_INTRINSICS_
2501
}
2502
 
2503
//------------------------------------------------------------------------------
2504
 
2505
XMFINLINE XMVECTOR XMVectorSubtract
2506
(
2507
    FXMVECTOR V1, 
2508
    FXMVECTOR V2
2509
)
2510
{
2511
#if defined(_XM_NO_INTRINSICS_)
2512
 
2513
    XMVECTOR Result;
2514
 
2515
    Result.vector4_f32[0] = V1.vector4_f32[0] - V2.vector4_f32[0];
2516
    Result.vector4_f32[1] = V1.vector4_f32[1] - V2.vector4_f32[1];
2517
    Result.vector4_f32[2] = V1.vector4_f32[2] - V2.vector4_f32[2];
2518
    Result.vector4_f32[3] = V1.vector4_f32[3] - V2.vector4_f32[3];
2519
 
2520
    return Result;
2521
 
2522
#elif defined(_XM_SSE_INTRINSICS_)
2523
	return _mm_sub_ps( V1, V2 );
2524
#else // _XM_VMX128_INTRINSICS_
2525
#endif // _XM_VMX128_INTRINSICS_
2526
}
2527
 
2528
//------------------------------------------------------------------------------
2529
 
2530
XMFINLINE XMVECTOR XMVectorSubtractAngles
2531
(
2532
    FXMVECTOR V1, 
2533
    FXMVECTOR V2
2534
)
2535
{
2536
#if defined(_XM_NO_INTRINSICS_)
2537
 
2538
    XMVECTOR       Mask;
2539
    XMVECTOR       Offset;
2540
    XMVECTOR       Result;
2541
    CONST XMVECTOR Zero = XMVectorZero();
2542
 
2543
    // Subtract the given angles.  If the range of V1 is such
2544
    // that -Pi <= V1 < Pi and the range of V2 is such that
2545
    // -2Pi <= V2 <= 2Pi, then the range of the resulting angle
2546
    // will be -Pi <= Result < Pi.
2547
    Result = XMVectorSubtract(V1, V2);
2548
 
2549
    Mask = XMVectorLess(Result, g_XMNegativePi.v);
2550
    Offset = XMVectorSelect(Zero, g_XMTwoPi.v, Mask);
2551
 
2552
    Mask = XMVectorGreaterOrEqual(Result, g_XMPi.v);
2553
    Offset = XMVectorSelect(Offset, g_XMNegativeTwoPi.v, Mask);
2554
 
2555
    Result = XMVectorAdd(Result, Offset);
2556
 
2557
    return Result;
2558
 
2559
#elif defined(_XM_SSE_INTRINSICS_)
2560
    // Adjust the angles
2561
    XMVECTOR vResult = _mm_sub_ps(V1,V2);
2562
    // Less than Pi?
2563
    XMVECTOR vOffset = _mm_cmplt_ps(vResult,g_XMNegativePi);
2564
    vOffset = _mm_and_ps(vOffset,g_XMTwoPi);
2565
    // Add 2Pi to all entries less than -Pi
2566
    vResult = _mm_add_ps(vResult,vOffset);
2567
    // Greater than or equal to Pi?
2568
    vOffset = _mm_cmpge_ps(vResult,g_XMPi);
2569
    vOffset = _mm_and_ps(vOffset,g_XMTwoPi);
2570
    // Sub 2Pi to all entries greater than Pi
2571
    vResult = _mm_sub_ps(vResult,vOffset);
2572
    return vResult;
2573
#else // _XM_VMX128_INTRINSICS_
2574
#endif // _XM_VMX128_INTRINSICS_
2575
}
2576
 
2577
//------------------------------------------------------------------------------
2578
 
2579
XMFINLINE XMVECTOR XMVectorMultiply
2580
(
2581
    FXMVECTOR V1, 
2582
    FXMVECTOR V2
2583
)
2584
{
2585
#if defined(_XM_NO_INTRINSICS_)
2586
    XMVECTOR Result = {
2587
        V1.vector4_f32[0] * V2.vector4_f32[0],
2588
        V1.vector4_f32[1] * V2.vector4_f32[1],
2589
        V1.vector4_f32[2] * V2.vector4_f32[2],
2590
        V1.vector4_f32[3] * V2.vector4_f32[3]
2591
    };
2592
    return Result;
2593
#elif defined(_XM_SSE_INTRINSICS_)
2594
	return _mm_mul_ps( V1, V2 );
2595
#else // _XM_VMX128_INTRINSICS_
2596
#endif // _XM_VMX128_INTRINSICS_
2597
}
2598
 
2599
//------------------------------------------------------------------------------
2600
 
2601
XMFINLINE XMVECTOR XMVectorMultiplyAdd
2602
(
2603
    FXMVECTOR V1, 
2604
    FXMVECTOR V2, 
2605
    FXMVECTOR V3
2606
)
2607
{
2608
#if defined(_XM_NO_INTRINSICS_)
2609
    XMVECTOR vResult = {
2610
        (V1.vector4_f32[0] * V2.vector4_f32[0]) + V3.vector4_f32[0],
2611
        (V1.vector4_f32[1] * V2.vector4_f32[1]) + V3.vector4_f32[1],
2612
        (V1.vector4_f32[2] * V2.vector4_f32[2]) + V3.vector4_f32[2],
2613
        (V1.vector4_f32[3] * V2.vector4_f32[3]) + V3.vector4_f32[3]
2614
    };
2615
    return vResult;
2616
 
2617
#elif defined(_XM_SSE_INTRINSICS_)
2618
	XMVECTOR vResult = _mm_mul_ps( V1, V2 );
2619
	return _mm_add_ps(vResult, V3 );
2620
#else // _XM_VMX128_INTRINSICS_
2621
#endif // _XM_VMX128_INTRINSICS_
2622
}
2623
 
2624
//------------------------------------------------------------------------------
2625
 
2626
XMFINLINE XMVECTOR XMVectorNegativeMultiplySubtract
2627
(
2628
    FXMVECTOR V1, 
2629
    FXMVECTOR V2, 
2630
    FXMVECTOR V3
2631
)
2632
{
2633
#if defined(_XM_NO_INTRINSICS_)
2634
 
2635
    XMVECTOR vResult = {
2636
        V3.vector4_f32[0] - (V1.vector4_f32[0] * V2.vector4_f32[0]),
2637
        V3.vector4_f32[1] - (V1.vector4_f32[1] * V2.vector4_f32[1]),
2638
        V3.vector4_f32[2] - (V1.vector4_f32[2] * V2.vector4_f32[2]),
2639
        V3.vector4_f32[3] - (V1.vector4_f32[3] * V2.vector4_f32[3])
2640
    };
2641
    return vResult;
2642
 
2643
#elif defined(_XM_SSE_INTRINSICS_)
2644
	XMVECTOR R = _mm_mul_ps( V1, V2 );
2645
	return _mm_sub_ps( V3, R );
2646
#else // _XM_VMX128_INTRINSICS_
2647
#endif // _XM_VMX128_INTRINSICS_
2648
}
2649
 
2650
//------------------------------------------------------------------------------
2651
 
2652
XMFINLINE XMVECTOR XMVectorScale
2653
(
2654
    FXMVECTOR V, 
2655
    FLOAT    ScaleFactor
2656
)
2657
{
2658
#if defined(_XM_NO_INTRINSICS_)
2659
    XMVECTOR vResult = {
2660
        V.vector4_f32[0] * ScaleFactor,
2661
        V.vector4_f32[1] * ScaleFactor,
2662
        V.vector4_f32[2] * ScaleFactor,
2663
        V.vector4_f32[3] * ScaleFactor
2664
    };
2665
    return vResult;
2666
 
2667
#elif defined(_XM_SSE_INTRINSICS_)
2668
	XMVECTOR vResult = _mm_set_ps1(ScaleFactor);
2669
	return _mm_mul_ps(vResult,V);
2670
#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
2671
#endif // _XM_VMX128_INTRINSICS_
2672
}
2673
 
2674
//------------------------------------------------------------------------------
2675
 
2676
XMFINLINE XMVECTOR XMVectorReciprocalEst
2677
(
2678
    FXMVECTOR V
2679
)
2680
{
2681
#if defined(_XM_NO_INTRINSICS_)
2682
 
2683
    XMVECTOR Result;
2684
    UINT     i;
2685
 
2686
    // Avoid C4701
2687
    Result.vector4_f32[0] = 0.0f;
2688
 
2689
    for (i = 0; i < 4; i++)
2690
    {
2691
        if (XMISINF(V.vector4_f32[i]))
2692
        {
2693
            Result.vector4_f32[i] = (V.vector4_f32[i] < 0.0f) ? -0.0f : 0.0f;
2694
        }
2695
        else if (V.vector4_f32[i] == -0.0f)
2696
        {
2697
            Result.vector4_u32[i] = 0xFF800000;
2698
        }
2699
        else if (V.vector4_f32[i] == 0.0f)
2700
        {
2701
            Result.vector4_u32[i] = 0x7F800000;
2702
        }
2703
        else
2704
        {
2705
            Result.vector4_f32[i] = 1.0f / V.vector4_f32[i];
2706
        }
2707
    }
2708
 
2709
    return Result;
2710
 
2711
#elif defined(_XM_SSE_INTRINSICS_)
2712
	return _mm_rcp_ps(V);
2713
#else // _XM_VMX128_INTRINSICS_
2714
#endif // _XM_VMX128_INTRINSICS_
2715
}
2716
 
2717
//------------------------------------------------------------------------------
2718
 
2719
XMFINLINE XMVECTOR XMVectorReciprocal
2720
(
2721
    FXMVECTOR V
2722
)
2723
{
2724
#if defined(_XM_NO_INTRINSICS_)
2725
    return XMVectorReciprocalEst(V);
2726
#elif defined(_XM_SSE_INTRINSICS_)
2727
    return _mm_div_ps(g_XMOne,V);
2728
#else // _XM_VMX128_INTRINSICS_
2729
#endif // _XM_VMX128_INTRINSICS_
2730
}
2731
 
2732
//------------------------------------------------------------------------------
2733
// Return an estimated square root
2734
XMFINLINE XMVECTOR XMVectorSqrtEst
2735
(
2736
    FXMVECTOR V
2737
)
2738
{
2739
#if defined(_XM_NO_INTRINSICS_)
2740
    XMVECTOR Select;
2741
 
2742
    // if (x == +Infinity)  sqrt(x) = +Infinity
2743
    // if (x == +0.0f)      sqrt(x) = +0.0f
2744
    // if (x == -0.0f)      sqrt(x) = -0.0f
2745
    // if (x < -0.0f)       sqrt(x) = QNaN
2746
 
2747
    XMVECTOR Result = XMVectorReciprocalSqrtEst(V);
2748
    XMVECTOR Zero = XMVectorZero();
2749
    XMVECTOR VEqualsInfinity = XMVectorEqualInt(V, g_XMInfinity.v);
2750
    XMVECTOR VEqualsZero = XMVectorEqual(V, Zero);
2751
    Result = XMVectorMultiply(V, Result);
2752
    Select = XMVectorEqualInt(VEqualsInfinity, VEqualsZero);
2753
    Result = XMVectorSelect(V, Result, Select);
2754
    return Result;
2755
 
2756
#elif defined(_XM_SSE_INTRINSICS_)
2757
	return _mm_sqrt_ps(V);
2758
#else // _XM_VMX128_INTRINSICS_
2759
#endif // _XM_VMX128_INTRINSICS_
2760
}
2761
 
2762
//------------------------------------------------------------------------------
2763
 
2764
XMFINLINE XMVECTOR XMVectorSqrt
2765
(
2766
    FXMVECTOR V
2767
)
2768
{
2769
#if defined(_XM_NO_INTRINSICS_)
2770
 
2771
    XMVECTOR Zero;
2772
    XMVECTOR VEqualsInfinity, VEqualsZero;
2773
    XMVECTOR Select;
2774
    XMVECTOR Result;
2775
 
2776
    // if (x == +Infinity)  sqrt(x) = +Infinity
2777
    // if (x == +0.0f)      sqrt(x) = +0.0f
2778
    // if (x == -0.0f)      sqrt(x) = -0.0f
2779
    // if (x < -0.0f)       sqrt(x) = QNaN
2780
 
2781
    Result = XMVectorReciprocalSqrt(V);
2782
    Zero = XMVectorZero();
2783
    VEqualsInfinity = XMVectorEqualInt(V, g_XMInfinity.v);
2784
    VEqualsZero = XMVectorEqual(V, Zero);
2785
    Result = XMVectorMultiply(V, Result);
2786
    Select = XMVectorEqualInt(VEqualsInfinity, VEqualsZero);
2787
    Result = XMVectorSelect(V, Result, Select);
2788
 
2789
    return Result;
2790
 
2791
#elif defined(_XM_SSE_INTRINSICS_)
2792
	return _mm_sqrt_ps(V);
2793
#else // _XM_VMX128_INTRINSICS_
2794
#endif // _XM_VMX128_INTRINSICS_
2795
}
2796
 
2797
//------------------------------------------------------------------------------
2798
 
2799
XMFINLINE XMVECTOR XMVectorReciprocalSqrtEst
2800
(
2801
    FXMVECTOR V
2802
)
2803
{
2804
#if defined(_XM_NO_INTRINSICS_)
2805
 
2806
    XMVECTOR Result;
2807
    UINT     i;
2808
 
2809
    // Avoid C4701
2810
    Result.vector4_f32[0] = 0.0f;
2811
 
2812
    for (i = 0; i < 4; i++)
2813
    {
2814
        if (V.vector4_f32[i] == 0.0f)
2815
        {
2816
            Result.vector4_u32[i] = 0x7F800000;
2817
        }
2818
        else if (V.vector4_f32[i] == -0.0f)
2819
        {
2820
            Result.vector4_u32[i] = 0xFF800000;
2821
        }
2822
        else if (V.vector4_f32[i] < 0.0f)
2823
        {
2824
            Result.vector4_u32[i] = 0x7FFFFFFF;
2825
        }
2826
        else if (XMISINF(V.vector4_f32[i]))
2827
        {
2828
            Result.vector4_f32[i] = 0.0f;
2829
        }
2830
        else
2831
        {
2832
            Result.vector4_f32[i] = 1.0f / sqrtf(V.vector4_f32[i]);
2833
        }
2834
    }
2835
 
2836
    return Result;
2837
 
2838
#elif defined(_XM_SSE_INTRINSICS_)
2839
	return _mm_rsqrt_ps(V);
2840
#else // _XM_VMX128_INTRINSICS_
2841
#endif // _XM_VMX128_INTRINSICS_
2842
}
2843
 
2844
//------------------------------------------------------------------------------
2845
 
2846
XMFINLINE XMVECTOR XMVectorReciprocalSqrt
2847
(
2848
    FXMVECTOR V
2849
)
2850
{
2851
#if defined(_XM_NO_INTRINSICS_)
2852
 
2853
    return XMVectorReciprocalSqrtEst(V);
2854
 
2855
#elif defined(_XM_SSE_INTRINSICS_)
2856
    XMVECTOR vResult = _mm_sqrt_ps(V);
2857
    vResult = _mm_div_ps(g_XMOne,vResult);
2858
    return vResult;
2859
#else // _XM_VMX128_INTRINSICS_
2860
#endif // _XM_VMX128_INTRINSICS_
2861
}
2862
 
2863
//------------------------------------------------------------------------------
2864
 
2865
XMFINLINE XMVECTOR XMVectorExpEst
2866
(
2867
    FXMVECTOR V
2868
)
2869
{
2870
#if defined(_XM_NO_INTRINSICS_)
2871
 
2872
    XMVECTOR Result;
2873
    Result.vector4_f32[0] = powf(2.0f, V.vector4_f32[0]);
2874
    Result.vector4_f32[1] = powf(2.0f, V.vector4_f32[1]);
2875
    Result.vector4_f32[2] = powf(2.0f, V.vector4_f32[2]);
2876
    Result.vector4_f32[3] = powf(2.0f, V.vector4_f32[3]);
2877
    return Result;
2878
 
2879
#elif defined(_XM_SSE_INTRINSICS_)
2880
    XMVECTOR vResult = _mm_setr_ps(
2881
        powf(2.0f,XMVectorGetX(V)),
2882
        powf(2.0f,XMVectorGetY(V)),
2883
        powf(2.0f,XMVectorGetZ(V)),
2884
        powf(2.0f,XMVectorGetW(V)));
2885
    return vResult;
2886
#else // _XM_VMX128_INTRINSICS_
2887
#endif // _XM_VMX128_INTRINSICS_
2888
}
2889
 
2890
//------------------------------------------------------------------------------
2891
 
2892
XMINLINE XMVECTOR XMVectorExp
2893
(
2894
    FXMVECTOR V
2895
)
2896
{
2897
#if defined(_XM_NO_INTRINSICS_)
2898
 
2899
    XMVECTOR               E, S;
2900
    XMVECTOR               R, R2, R3, R4;
2901
    XMVECTOR               V0, V1;
2902
    XMVECTOR               C0X, C0Y, C0Z, C0W;
2903
    XMVECTOR               C1X, C1Y, C1Z, C1W;
2904
    XMVECTOR               Result;
2905
    static CONST XMVECTOR  C0 = {1.0f, -6.93147182e-1f, 2.40226462e-1f, -5.55036440e-2f};
2906
    static CONST XMVECTOR  C1 = {9.61597636e-3f, -1.32823968e-3f, 1.47491097e-4f, -1.08635004e-5f};
2907
 
2908
    R = XMVectorFloor(V);
2909
    E = XMVectorExpEst(R);
2910
    R = XMVectorSubtract(V, R);
2911
    R2 = XMVectorMultiply(R, R);
2912
    R3 = XMVectorMultiply(R, R2);
2913
    R4 = XMVectorMultiply(R2, R2);
2914
 
2915
    C0X = XMVectorSplatX(C0);
2916
    C0Y = XMVectorSplatY(C0);
2917
    C0Z = XMVectorSplatZ(C0);
2918
    C0W = XMVectorSplatW(C0);
2919
 
2920
    C1X = XMVectorSplatX(C1);
2921
    C1Y = XMVectorSplatY(C1);
2922
    C1Z = XMVectorSplatZ(C1);
2923
    C1W = XMVectorSplatW(C1);
2924
 
2925
    V0 = XMVectorMultiplyAdd(R, C0Y, C0X);
2926
    V0 = XMVectorMultiplyAdd(R2, C0Z, V0);
2927
    V0 = XMVectorMultiplyAdd(R3, C0W, V0);
2928
 
2929
    V1 = XMVectorMultiplyAdd(R, C1Y, C1X);
2930
    V1 = XMVectorMultiplyAdd(R2, C1Z, V1);
2931
    V1 = XMVectorMultiplyAdd(R3, C1W, V1);
2932
 
2933
    S = XMVectorMultiplyAdd(R4, V1, V0);
2934
 
2935
    S = XMVectorReciprocal(S);
2936
    Result = XMVectorMultiply(E, S);
2937
 
2938
    return Result;
2939
 
2940
#elif defined(_XM_SSE_INTRINSICS_)
2941
    static CONST XMVECTORF32 C0 = {1.0f, -6.93147182e-1f, 2.40226462e-1f, -5.55036440e-2f};
2942
    static CONST XMVECTORF32 C1 = {9.61597636e-3f, -1.32823968e-3f, 1.47491097e-4f, -1.08635004e-5f};
2943
 
2944
    // Get the integer of the input
2945
    XMVECTOR R = XMVectorFloor(V);
2946
    // Get the exponent estimate
2947
    XMVECTOR E = XMVectorExpEst(R);
2948
    // Get the fractional only
2949
    R = _mm_sub_ps(V,R);
2950
    // Get R^2
2951
    XMVECTOR R2 = _mm_mul_ps(R,R);
2952
    // And R^3
2953
    XMVECTOR R3 = _mm_mul_ps(R,R2);
2954
 
2955
    XMVECTOR V0 = _mm_load_ps1(&C0.f[1]);
2956
    V0 = _mm_mul_ps(V0,R);
2957
    XMVECTOR vConstants = _mm_load_ps1(&C0.f[0]);
2958
    V0 = _mm_add_ps(V0,vConstants);
2959
    vConstants = _mm_load_ps1(&C0.f[2]);
2960
    vConstants = _mm_mul_ps(vConstants,R2);
2961
    V0 = _mm_add_ps(V0,vConstants);
2962
    vConstants = _mm_load_ps1(&C0.f[3]);
2963
    vConstants = _mm_mul_ps(vConstants,R3);
2964
    V0 = _mm_add_ps(V0,vConstants);
2965
 
2966
    XMVECTOR V1 = _mm_load_ps1(&C1.f[1]);
2967
    V1 = _mm_mul_ps(V1,R);
2968
    vConstants = _mm_load_ps1(&C1.f[0]);
2969
    V1 = _mm_add_ps(V1,vConstants);
2970
    vConstants = _mm_load_ps1(&C1.f[2]);
2971
    vConstants = _mm_mul_ps(vConstants,R2);
2972
    V1 = _mm_add_ps(V1,vConstants);
2973
    vConstants = _mm_load_ps1(&C1.f[3]);
2974
    vConstants = _mm_mul_ps(vConstants,R3);
2975
    V1 = _mm_add_ps(V1,vConstants);
2976
    // R2 = R^4
2977
    R2 = _mm_mul_ps(R2,R2);
2978
    R2 = _mm_mul_ps(R2,V1);
2979
    R2 = _mm_add_ps(R2,V0);
2980
    E = _mm_div_ps(E,R2);
2981
    return E;
2982
#else // _XM_VMX128_INTRINSICS_
2983
#endif // _XM_VMX128_INTRINSICS_
2984
}
2985
 
2986
//------------------------------------------------------------------------------
2987
 
2988
XMFINLINE XMVECTOR XMVectorLogEst
2989
(
2990
    FXMVECTOR V
2991
)
2992
{
2993
#if defined(_XM_NO_INTRINSICS_)
2994
 
2995
    FLOAT fScale = (1.0f / logf(2.0f));
2996
    XMVECTOR Result;
2997
 
2998
    Result.vector4_f32[0] = logf(V.vector4_f32[0])*fScale;
2999
    Result.vector4_f32[1] = logf(V.vector4_f32[1])*fScale;
3000
    Result.vector4_f32[2] = logf(V.vector4_f32[2])*fScale;
3001
    Result.vector4_f32[3] = logf(V.vector4_f32[3])*fScale;
3002
    return Result;
3003
 
3004
#elif defined(_XM_SSE_INTRINSICS_)
3005
    XMVECTOR vScale = _mm_set_ps1(1.0f / logf(2.0f));
3006
    XMVECTOR vResult = _mm_setr_ps(
3007
        logf(XMVectorGetX(V)),
3008
        logf(XMVectorGetY(V)),
3009
        logf(XMVectorGetZ(V)),
3010
        logf(XMVectorGetW(V)));
3011
    vResult = _mm_mul_ps(vResult,vScale);
3012
    return vResult;
3013
#else // _XM_VMX128_INTRINSICS_
3014
#endif // _XM_VMX128_INTRINSICS_
3015
}
3016
 
3017
//------------------------------------------------------------------------------
3018
 
3019
XMINLINE XMVECTOR XMVectorLog
3020
(
3021
    FXMVECTOR V
3022
)
3023
{
3024
#if defined(_XM_NO_INTRINSICS_)
3025
    FLOAT fScale = (1.0f / logf(2.0f));
3026
    XMVECTOR Result;
3027
 
3028
    Result.vector4_f32[0] = logf(V.vector4_f32[0])*fScale;
3029
    Result.vector4_f32[1] = logf(V.vector4_f32[1])*fScale;
3030
    Result.vector4_f32[2] = logf(V.vector4_f32[2])*fScale;
3031
    Result.vector4_f32[3] = logf(V.vector4_f32[3])*fScale;
3032
    return Result;
3033
 
3034
#elif defined(_XM_SSE_INTRINSICS_)
3035
    XMVECTOR vScale = _mm_set_ps1(1.0f / logf(2.0f));
3036
    XMVECTOR vResult = _mm_setr_ps(
3037
        logf(XMVectorGetX(V)),
3038
        logf(XMVectorGetY(V)),
3039
        logf(XMVectorGetZ(V)),
3040
        logf(XMVectorGetW(V)));
3041
    vResult = _mm_mul_ps(vResult,vScale);
3042
    return vResult;
3043
#else // _XM_VMX128_INTRINSICS_
3044
#endif // _XM_VMX128_INTRINSICS_
3045
}
3046
 
3047
//------------------------------------------------------------------------------
3048
 
3049
XMFINLINE XMVECTOR XMVectorPowEst
3050
(
3051
    FXMVECTOR V1,
3052
    FXMVECTOR V2
3053
)
3054
{
3055
#if defined(_XM_NO_INTRINSICS_)
3056
 
3057
    XMVECTOR Result;
3058
 
3059
    Result.vector4_f32[0] = powf(V1.vector4_f32[0], V2.vector4_f32[0]);
3060
    Result.vector4_f32[1] = powf(V1.vector4_f32[1], V2.vector4_f32[1]);
3061
    Result.vector4_f32[2] = powf(V1.vector4_f32[2], V2.vector4_f32[2]);
3062
    Result.vector4_f32[3] = powf(V1.vector4_f32[3], V2.vector4_f32[3]);
3063
 
3064
    return Result;
3065
 
3066
#elif defined(_XM_SSE_INTRINSICS_)
3067
    XMVECTOR vResult = _mm_setr_ps(
3068
        powf(XMVectorGetX(V1),XMVectorGetX(V2)),
3069
        powf(XMVectorGetY(V1),XMVectorGetY(V2)),
3070
        powf(XMVectorGetZ(V1),XMVectorGetZ(V2)),
3071
        powf(XMVectorGetW(V1),XMVectorGetW(V2)));
3072
    return vResult;
3073
#else // _XM_VMX128_INTRINSICS_
3074
#endif // _XM_VMX128_INTRINSICS_
3075
}
3076
 
3077
//------------------------------------------------------------------------------
3078
 
3079
XMFINLINE XMVECTOR XMVectorPow
3080
(
3081
    FXMVECTOR V1,
3082
    FXMVECTOR V2
3083
)
3084
{
3085
#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_)
3086
 
3087
    return XMVectorPowEst(V1, V2);
3088
 
3089
#else // _XM_VMX128_INTRINSICS_
3090
#endif // _XM_VMX128_INTRINSICS_
3091
}
3092
 
3093
//------------------------------------------------------------------------------
3094
 
3095
XMFINLINE XMVECTOR XMVectorAbs
3096
(
3097
    FXMVECTOR V
3098
)
3099
{
3100
#if defined(_XM_NO_INTRINSICS_)
3101
    XMVECTOR vResult = {
3102
        fabsf(V.vector4_f32[0]),
3103
        fabsf(V.vector4_f32[1]),
3104
        fabsf(V.vector4_f32[2]),
3105
        fabsf(V.vector4_f32[3])
3106
    };
3107
    return vResult;
3108
 
3109
#elif defined(_XM_SSE_INTRINSICS_)
3110
	XMVECTOR vResult = _mm_setzero_ps();
3111
	vResult = _mm_sub_ps(vResult,V);
3112
	vResult = _mm_max_ps(vResult,V);
3113
    return vResult;
3114
#else // _XM_VMX128_INTRINSICS_
3115
#endif // _XM_VMX128_INTRINSICS_
3116
}
3117
 
3118
//------------------------------------------------------------------------------
3119
 
3120
XMFINLINE XMVECTOR XMVectorMod
3121
(
3122
    FXMVECTOR V1, 
3123
    FXMVECTOR V2
3124
)
3125
{
3126
#if defined(_XM_NO_INTRINSICS_)
3127
 
3128
    XMVECTOR Reciprocal;
3129
    XMVECTOR Quotient;
3130
    XMVECTOR Result;
3131
 
3132
    // V1 % V2 = V1 - V2 * truncate(V1 / V2)
3133
    Reciprocal = XMVectorReciprocal(V2);
3134
    Quotient = XMVectorMultiply(V1, Reciprocal);
3135
    Quotient = XMVectorTruncate(Quotient);
3136
    Result = XMVectorNegativeMultiplySubtract(V2, Quotient, V1);
3137
 
3138
    return Result;
3139
 
3140
#elif defined(_XM_SSE_INTRINSICS_)
3141
    XMVECTOR vResult = _mm_div_ps(V1, V2);
3142
    vResult = XMVectorTruncate(vResult);
3143
    vResult = _mm_mul_ps(vResult,V2);
3144
    vResult = _mm_sub_ps(V1,vResult);
3145
    return vResult;
3146
#else // _XM_VMX128_INTRINSICS_
3147
#endif // _XM_VMX128_INTRINSICS_
3148
}
3149
 
3150
//------------------------------------------------------------------------------
3151
 
3152
XMFINLINE XMVECTOR XMVectorModAngles
3153
(
3154
    FXMVECTOR Angles
3155
)
3156
{
3157
#if defined(_XM_NO_INTRINSICS_)
3158
 
3159
    XMVECTOR V;
3160
    XMVECTOR Result;
3161
 
3162
    // Modulo the range of the given angles such that -XM_PI <= Angles < XM_PI
3163
    V = XMVectorMultiply(Angles, g_XMReciprocalTwoPi.v);
3164
    V = XMVectorRound(V);
3165
    Result = XMVectorNegativeMultiplySubtract(g_XMTwoPi.v, V, Angles);
3166
 
3167
    return Result;
3168
 
3169
#elif defined(_XM_SSE_INTRINSICS_)
3170
    // Modulo the range of the given angles such that -XM_PI <= Angles < XM_PI
3171
    XMVECTOR vResult = _mm_mul_ps(Angles,g_XMReciprocalTwoPi);
3172
    // Use the inline function due to complexity for rounding
3173
    vResult = XMVectorRound(vResult);
3174
    vResult = _mm_mul_ps(vResult,g_XMTwoPi);
3175
    vResult = _mm_sub_ps(Angles,vResult);
3176
    return vResult;
3177
#else // _XM_VMX128_INTRINSICS_
3178
#endif // _XM_VMX128_INTRINSICS_
3179
}
3180
 
3181
//------------------------------------------------------------------------------
3182
 
3183
XMINLINE XMVECTOR XMVectorSin
3184
(
3185
    FXMVECTOR V
3186
)
3187
{
3188
 
3189
#if defined(_XM_NO_INTRINSICS_)
3190
 
3191
    XMVECTOR V1, V2, V3, V5, V7, V9, V11, V13, V15, V17, V19, V21, V23;
3192
    XMVECTOR S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11;
3193
    XMVECTOR Result;
3194
 
3195
    V1 = XMVectorModAngles(V);
3196
 
3197
    // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! - 
3198
    //           V^15 / 15! + V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI)
3199
    V2  = XMVectorMultiply(V1, V1);
3200
    V3  = XMVectorMultiply(V2, V1);
3201
    V5  = XMVectorMultiply(V3, V2);
3202
    V7  = XMVectorMultiply(V5, V2);
3203
    V9  = XMVectorMultiply(V7, V2);
3204
    V11 = XMVectorMultiply(V9, V2);
3205
    V13 = XMVectorMultiply(V11, V2);
3206
    V15 = XMVectorMultiply(V13, V2);
3207
    V17 = XMVectorMultiply(V15, V2);
3208
    V19 = XMVectorMultiply(V17, V2);
3209
    V21 = XMVectorMultiply(V19, V2);
3210
    V23 = XMVectorMultiply(V21, V2);
3211
 
3212
    S1  = XMVectorSplatY(g_XMSinCoefficients0.v);
3213
    S2  = XMVectorSplatZ(g_XMSinCoefficients0.v);
3214
    S3  = XMVectorSplatW(g_XMSinCoefficients0.v);
3215
    S4  = XMVectorSplatX(g_XMSinCoefficients1.v);
3216
    S5  = XMVectorSplatY(g_XMSinCoefficients1.v);
3217
    S6  = XMVectorSplatZ(g_XMSinCoefficients1.v);
3218
    S7  = XMVectorSplatW(g_XMSinCoefficients1.v);
3219
    S8  = XMVectorSplatX(g_XMSinCoefficients2.v);
3220
    S9  = XMVectorSplatY(g_XMSinCoefficients2.v);
3221
    S10 = XMVectorSplatZ(g_XMSinCoefficients2.v);
3222
    S11 = XMVectorSplatW(g_XMSinCoefficients2.v);
3223
 
3224
    Result = XMVectorMultiplyAdd(S1, V3, V1);
3225
    Result = XMVectorMultiplyAdd(S2, V5, Result);
3226
    Result = XMVectorMultiplyAdd(S3, V7, Result);
3227
    Result = XMVectorMultiplyAdd(S4, V9, Result);
3228
    Result = XMVectorMultiplyAdd(S5, V11, Result);
3229
    Result = XMVectorMultiplyAdd(S6, V13, Result);
3230
    Result = XMVectorMultiplyAdd(S7, V15, Result);
3231
    Result = XMVectorMultiplyAdd(S8, V17, Result);
3232
    Result = XMVectorMultiplyAdd(S9, V19, Result);
3233
    Result = XMVectorMultiplyAdd(S10, V21, Result);
3234
    Result = XMVectorMultiplyAdd(S11, V23, Result);
3235
 
3236
    return Result;
3237
 
3238
#elif defined(_XM_SSE_INTRINSICS_)
3239
    // Force the value within the bounds of pi
3240
    XMVECTOR vResult = XMVectorModAngles(V);
3241
    // Each on is V to the "num" power
3242
    // V2 = V1^2
3243
    XMVECTOR V2  = _mm_mul_ps(vResult,vResult);
3244
    // V1^3
3245
    XMVECTOR vPower = _mm_mul_ps(vResult,V2);    
3246
    XMVECTOR vConstants = _mm_load_ps1(&g_XMSinCoefficients0.f[1]);
3247
    vConstants = _mm_mul_ps(vConstants,vPower);
3248
    vResult = _mm_add_ps(vResult,vConstants);
3249
 
3250
    // V^5
3251
    vPower = _mm_mul_ps(vPower,V2);
3252
    vConstants = _mm_load_ps1(&g_XMSinCoefficients0.f[2]);
3253
    vConstants = _mm_mul_ps(vConstants,vPower);
3254
    vResult = _mm_add_ps(vResult,vConstants);
3255
 
3256
    // V^7
3257
    vPower = _mm_mul_ps(vPower,V2);
3258
    vConstants = _mm_load_ps1(&g_XMSinCoefficients0.f[3]);
3259
    vConstants = _mm_mul_ps(vConstants,vPower);
3260
    vResult = _mm_add_ps(vResult,vConstants);
3261
 
3262
    // V^9
3263
    vPower = _mm_mul_ps(vPower,V2);
3264
    vConstants = _mm_load_ps1(&g_XMSinCoefficients1.f[0]);
3265
    vConstants = _mm_mul_ps(vConstants,vPower);
3266
    vResult = _mm_add_ps(vResult,vConstants);
3267
 
3268
    // V^11
3269
    vPower = _mm_mul_ps(vPower,V2);
3270
    vConstants = _mm_load_ps1(&g_XMSinCoefficients1.f[1]);
3271
    vConstants = _mm_mul_ps(vConstants,vPower);
3272
    vResult = _mm_add_ps(vResult,vConstants);
3273
 
3274
    // V^13
3275
    vPower = _mm_mul_ps(vPower,V2);
3276
    vConstants = _mm_load_ps1(&g_XMSinCoefficients1.f[2]);
3277
    vConstants = _mm_mul_ps(vConstants,vPower);
3278
    vResult = _mm_add_ps(vResult,vConstants);
3279
 
3280
    // V^15
3281
    vPower = _mm_mul_ps(vPower,V2);
3282
    vConstants = _mm_load_ps1(&g_XMSinCoefficients1.f[3]);
3283
    vConstants = _mm_mul_ps(vConstants,vPower);
3284
    vResult = _mm_add_ps(vResult,vConstants);
3285
 
3286
    // V^17
3287
    vPower = _mm_mul_ps(vPower,V2);
3288
    vConstants = _mm_load_ps1(&g_XMSinCoefficients2.f[0]);
3289
    vConstants = _mm_mul_ps(vConstants,vPower);
3290
    vResult = _mm_add_ps(vResult,vConstants);
3291
 
3292
    // V^19
3293
    vPower = _mm_mul_ps(vPower,V2);
3294
    vConstants = _mm_load_ps1(&g_XMSinCoefficients2.f[1]);
3295
    vConstants = _mm_mul_ps(vConstants,vPower);
3296
    vResult = _mm_add_ps(vResult,vConstants);
3297
 
3298
    // V^21
3299
    vPower = _mm_mul_ps(vPower,V2);
3300
    vConstants = _mm_load_ps1(&g_XMSinCoefficients2.f[2]);
3301
    vConstants = _mm_mul_ps(vConstants,vPower);
3302
    vResult = _mm_add_ps(vResult,vConstants);
3303
 
3304
    // V^23
3305
    vPower = _mm_mul_ps(vPower,V2);
3306
    vConstants = _mm_load_ps1(&g_XMSinCoefficients2.f[3]);
3307
    vConstants = _mm_mul_ps(vConstants,vPower);
3308
    vResult = _mm_add_ps(vResult,vConstants);
3309
    return vResult;
3310
#else // _XM_VMX128_INTRINSICS_
3311
#endif // _XM_VMX128_INTRINSICS_
3312
}
3313
 
3314
//------------------------------------------------------------------------------
3315
 
3316
XMINLINE XMVECTOR XMVectorCos
3317
(
3318
    FXMVECTOR V
3319
)
3320
{
3321
#if defined(_XM_NO_INTRINSICS_)
3322
 
3323
    XMVECTOR V1, V2, V4, V6, V8, V10, V12, V14, V16, V18, V20, V22;
3324
    XMVECTOR C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11;
3325
    XMVECTOR Result;
3326
 
3327
    V1 = XMVectorModAngles(V);
3328
 
3329
    // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! + V^12 / 12! - 
3330
    //           V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI)
3331
    V2 = XMVectorMultiply(V1, V1);
3332
    V4 = XMVectorMultiply(V2, V2);
3333
    V6 = XMVectorMultiply(V4, V2);
3334
    V8 = XMVectorMultiply(V4, V4);
3335
    V10 = XMVectorMultiply(V6, V4);
3336
    V12 = XMVectorMultiply(V6, V6);
3337
    V14 = XMVectorMultiply(V8, V6);
3338
    V16 = XMVectorMultiply(V8, V8);
3339
    V18 = XMVectorMultiply(V10, V8);
3340
    V20 = XMVectorMultiply(V10, V10);
3341
    V22 = XMVectorMultiply(V12, V10);
3342
 
3343
    C1 = XMVectorSplatY(g_XMCosCoefficients0.v);
3344
    C2 = XMVectorSplatZ(g_XMCosCoefficients0.v);
3345
    C3 = XMVectorSplatW(g_XMCosCoefficients0.v);
3346
    C4 = XMVectorSplatX(g_XMCosCoefficients1.v);
3347
    C5 = XMVectorSplatY(g_XMCosCoefficients1.v);
3348
    C6 = XMVectorSplatZ(g_XMCosCoefficients1.v);
3349
    C7 = XMVectorSplatW(g_XMCosCoefficients1.v);
3350
    C8 = XMVectorSplatX(g_XMCosCoefficients2.v);
3351
    C9 = XMVectorSplatY(g_XMCosCoefficients2.v);
3352
    C10 = XMVectorSplatZ(g_XMCosCoefficients2.v);
3353
    C11 = XMVectorSplatW(g_XMCosCoefficients2.v);
3354
 
3355
    Result = XMVectorMultiplyAdd(C1, V2, g_XMOne.v);
3356
    Result = XMVectorMultiplyAdd(C2, V4, Result);
3357
    Result = XMVectorMultiplyAdd(C3, V6, Result);
3358
    Result = XMVectorMultiplyAdd(C4, V8, Result);
3359
    Result = XMVectorMultiplyAdd(C5, V10, Result);
3360
    Result = XMVectorMultiplyAdd(C6, V12, Result);
3361
    Result = XMVectorMultiplyAdd(C7, V14, Result);
3362
    Result = XMVectorMultiplyAdd(C8, V16, Result);
3363
    Result = XMVectorMultiplyAdd(C9, V18, Result);
3364
    Result = XMVectorMultiplyAdd(C10, V20, Result);
3365
    Result = XMVectorMultiplyAdd(C11, V22, Result);
3366
 
3367
    return Result;
3368
 
3369
#elif defined(_XM_SSE_INTRINSICS_)
3370
    // Force the value within the bounds of pi
3371
    XMVECTOR V2 = XMVectorModAngles(V);
3372
    // Each on is V to the "num" power
3373
    // V2 = V1^2
3374
    V2  = _mm_mul_ps(V2,V2);
3375
    // V^2
3376
    XMVECTOR vConstants = _mm_load_ps1(&g_XMCosCoefficients0.f[1]);
3377
    vConstants = _mm_mul_ps(vConstants,V2);
3378
    XMVECTOR vResult = _mm_add_ps(vConstants,g_XMOne);
3379
 
3380
    // V^4
3381
    XMVECTOR vPower = _mm_mul_ps(V2,V2);
3382
    vConstants = _mm_load_ps1(&g_XMCosCoefficients0.f[2]);
3383
    vConstants = _mm_mul_ps(vConstants,vPower);
3384
    vResult = _mm_add_ps(vResult,vConstants);
3385
 
3386
    // V^6
3387
    vPower = _mm_mul_ps(vPower,V2);
3388
    vConstants = _mm_load_ps1(&g_XMCosCoefficients0.f[3]);
3389
    vConstants = _mm_mul_ps(vConstants,vPower);
3390
    vResult = _mm_add_ps(vResult,vConstants);
3391
 
3392
    // V^8
3393
    vPower = _mm_mul_ps(vPower,V2);
3394
    vConstants = _mm_load_ps1(&g_XMCosCoefficients1.f[0]);
3395
    vConstants = _mm_mul_ps(vConstants,vPower);
3396
    vResult = _mm_add_ps(vResult,vConstants);
3397
 
3398
    // V^10
3399
    vPower = _mm_mul_ps(vPower,V2);
3400
    vConstants = _mm_load_ps1(&g_XMCosCoefficients1.f[1]);
3401
    vConstants = _mm_mul_ps(vConstants,vPower);
3402
    vResult = _mm_add_ps(vResult,vConstants);
3403
 
3404
    // V^12
3405
    vPower = _mm_mul_ps(vPower,V2);
3406
    vConstants = _mm_load_ps1(&g_XMCosCoefficients1.f[2]);
3407
    vConstants = _mm_mul_ps(vConstants,vPower);
3408
    vResult = _mm_add_ps(vResult,vConstants);
3409
 
3410
    // V^14
3411
    vPower = _mm_mul_ps(vPower,V2);
3412
    vConstants = _mm_load_ps1(&g_XMCosCoefficients1.f[3]);
3413
    vConstants = _mm_mul_ps(vConstants,vPower);
3414
    vResult = _mm_add_ps(vResult,vConstants);
3415
 
3416
    // V^16
3417
    vPower = _mm_mul_ps(vPower,V2);
3418
    vConstants = _mm_load_ps1(&g_XMCosCoefficients2.f[0]);
3419
    vConstants = _mm_mul_ps(vConstants,vPower);
3420
    vResult = _mm_add_ps(vResult,vConstants);
3421
 
3422
    // V^18
3423
    vPower = _mm_mul_ps(vPower,V2);
3424
    vConstants = _mm_load_ps1(&g_XMCosCoefficients2.f[1]);
3425
    vConstants = _mm_mul_ps(vConstants,vPower);
3426
    vResult = _mm_add_ps(vResult,vConstants);
3427
 
3428
    // V^20
3429
    vPower = _mm_mul_ps(vPower,V2);
3430
    vConstants = _mm_load_ps1(&g_XMCosCoefficients2.f[2]);
3431
    vConstants = _mm_mul_ps(vConstants,vPower);
3432
    vResult = _mm_add_ps(vResult,vConstants);
3433
 
3434
    // V^22
3435
    vPower = _mm_mul_ps(vPower,V2);
3436
    vConstants = _mm_load_ps1(&g_XMCosCoefficients2.f[3]);
3437
    vConstants = _mm_mul_ps(vConstants,vPower);
3438
    vResult = _mm_add_ps(vResult,vConstants);
3439
    return vResult;
3440
#else // _XM_VMX128_INTRINSICS_
3441
#endif // _XM_VMX128_INTRINSICS_
3442
}
3443
 
3444
//------------------------------------------------------------------------------
3445
 
3446
XMINLINE VOID XMVectorSinCos
3447
(
3448
    XMVECTOR* pSin, 
3449
    XMVECTOR* pCos, 
3450
    FXMVECTOR  V
3451
)
3452
{
3453
#if defined(_XM_NO_INTRINSICS_)
3454
 
3455
    XMVECTOR V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13;
3456
    XMVECTOR V14, V15, V16, V17, V18, V19, V20, V21, V22, V23;
3457
    XMVECTOR S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11;
3458
    XMVECTOR C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11;
3459
    XMVECTOR Sin, Cos;
3460
 
3461
    XMASSERT(pSin);
3462
    XMASSERT(pCos);
3463
 
3464
    V1 = XMVectorModAngles(V);
3465
 
3466
    // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! - 
3467
    //           V^15 / 15! + V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI)
3468
    // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! + V^12 / 12! - 
3469
    //           V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI)
3470
 
3471
    V2 = XMVectorMultiply(V1, V1);
3472
    V3 = XMVectorMultiply(V2, V1);
3473
    V4 = XMVectorMultiply(V2, V2);
3474
    V5 = XMVectorMultiply(V3, V2);
3475
    V6 = XMVectorMultiply(V3, V3);
3476
    V7 = XMVectorMultiply(V4, V3);
3477
    V8 = XMVectorMultiply(V4, V4);
3478
    V9 = XMVectorMultiply(V5, V4);
3479
    V10 = XMVectorMultiply(V5, V5);
3480
    V11 = XMVectorMultiply(V6, V5);
3481
    V12 = XMVectorMultiply(V6, V6);
3482
    V13 = XMVectorMultiply(V7, V6);
3483
    V14 = XMVectorMultiply(V7, V7);
3484
    V15 = XMVectorMultiply(V8, V7);
3485
    V16 = XMVectorMultiply(V8, V8);
3486
    V17 = XMVectorMultiply(V9, V8);
3487
    V18 = XMVectorMultiply(V9, V9);
3488
    V19 = XMVectorMultiply(V10, V9);
3489
    V20 = XMVectorMultiply(V10, V10);
3490
    V21 = XMVectorMultiply(V11, V10);
3491
    V22 = XMVectorMultiply(V11, V11);
3492
    V23 = XMVectorMultiply(V12, V11);
3493
 
3494
    S1  = XMVectorSplatY(g_XMSinCoefficients0.v);
3495
    S2  = XMVectorSplatZ(g_XMSinCoefficients0.v);
3496
    S3  = XMVectorSplatW(g_XMSinCoefficients0.v);
3497
    S4  = XMVectorSplatX(g_XMSinCoefficients1.v);
3498
    S5  = XMVectorSplatY(g_XMSinCoefficients1.v);
3499
    S6  = XMVectorSplatZ(g_XMSinCoefficients1.v);
3500
    S7  = XMVectorSplatW(g_XMSinCoefficients1.v);
3501
    S8  = XMVectorSplatX(g_XMSinCoefficients2.v);
3502
    S9  = XMVectorSplatY(g_XMSinCoefficients2.v);
3503
    S10  = XMVectorSplatZ(g_XMSinCoefficients2.v);
3504
    S11  = XMVectorSplatW(g_XMSinCoefficients2.v);
3505
 
3506
    C1 = XMVectorSplatY(g_XMCosCoefficients0.v);
3507
    C2 = XMVectorSplatZ(g_XMCosCoefficients0.v);
3508
    C3 = XMVectorSplatW(g_XMCosCoefficients0.v);
3509
    C4 = XMVectorSplatX(g_XMCosCoefficients1.v);
3510
    C5 = XMVectorSplatY(g_XMCosCoefficients1.v);
3511
    C6 = XMVectorSplatZ(g_XMCosCoefficients1.v);
3512
    C7 = XMVectorSplatW(g_XMCosCoefficients1.v);
3513
    C8 = XMVectorSplatX(g_XMCosCoefficients2.v);
3514
    C9 = XMVectorSplatY(g_XMCosCoefficients2.v);
3515
    C10 = XMVectorSplatZ(g_XMCosCoefficients2.v);
3516
    C11 = XMVectorSplatW(g_XMCosCoefficients2.v);
3517
 
3518
    Sin = XMVectorMultiplyAdd(S1, V3, V1);
3519
    Sin = XMVectorMultiplyAdd(S2, V5, Sin);
3520
    Sin = XMVectorMultiplyAdd(S3, V7, Sin);
3521
    Sin = XMVectorMultiplyAdd(S4, V9, Sin);
3522
    Sin = XMVectorMultiplyAdd(S5, V11, Sin);
3523
    Sin = XMVectorMultiplyAdd(S6, V13, Sin);
3524
    Sin = XMVectorMultiplyAdd(S7, V15, Sin);
3525
    Sin = XMVectorMultiplyAdd(S8, V17, Sin);
3526
    Sin = XMVectorMultiplyAdd(S9, V19, Sin);
3527
    Sin = XMVectorMultiplyAdd(S10, V21, Sin);
3528
    Sin = XMVectorMultiplyAdd(S11, V23, Sin);
3529
 
3530
    Cos = XMVectorMultiplyAdd(C1, V2, g_XMOne.v);
3531
    Cos = XMVectorMultiplyAdd(C2, V4, Cos);
3532
    Cos = XMVectorMultiplyAdd(C3, V6, Cos);
3533
    Cos = XMVectorMultiplyAdd(C4, V8, Cos);
3534
    Cos = XMVectorMultiplyAdd(C5, V10, Cos);
3535
    Cos = XMVectorMultiplyAdd(C6, V12, Cos);
3536
    Cos = XMVectorMultiplyAdd(C7, V14, Cos);
3537
    Cos = XMVectorMultiplyAdd(C8, V16, Cos);
3538
    Cos = XMVectorMultiplyAdd(C9, V18, Cos);
3539
    Cos = XMVectorMultiplyAdd(C10, V20, Cos);
3540
    Cos = XMVectorMultiplyAdd(C11, V22, Cos);
3541
 
3542
    *pSin = Sin;
3543
    *pCos = Cos;
3544
 
3545
#elif defined(_XM_SSE_INTRINSICS_)
3546
    XMASSERT(pSin);
3547
    XMASSERT(pCos);
3548
    XMVECTOR V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13;
3549
    XMVECTOR V14, V15, V16, V17, V18, V19, V20, V21, V22, V23;
3550
    XMVECTOR S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11;
3551
    XMVECTOR C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11;
3552
    XMVECTOR Sin, Cos;
3553
 
3554
    V1 = XMVectorModAngles(V);
3555
 
3556
    // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! - 
3557
    //           V^15 / 15! + V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI)
3558
    // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! + V^12 / 12! - 
3559
    //           V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI)
3560
 
3561
    V2 = XMVectorMultiply(V1, V1);
3562
    V3 = XMVectorMultiply(V2, V1);
3563
    V4 = XMVectorMultiply(V2, V2);
3564
    V5 = XMVectorMultiply(V3, V2);
3565
    V6 = XMVectorMultiply(V3, V3);
3566
    V7 = XMVectorMultiply(V4, V3);
3567
    V8 = XMVectorMultiply(V4, V4);
3568
    V9 = XMVectorMultiply(V5, V4);
3569
    V10 = XMVectorMultiply(V5, V5);
3570
    V11 = XMVectorMultiply(V6, V5);
3571
    V12 = XMVectorMultiply(V6, V6);
3572
    V13 = XMVectorMultiply(V7, V6);
3573
    V14 = XMVectorMultiply(V7, V7);
3574
    V15 = XMVectorMultiply(V8, V7);
3575
    V16 = XMVectorMultiply(V8, V8);
3576
    V17 = XMVectorMultiply(V9, V8);
3577
    V18 = XMVectorMultiply(V9, V9);
3578
    V19 = XMVectorMultiply(V10, V9);
3579
    V20 = XMVectorMultiply(V10, V10);
3580
    V21 = XMVectorMultiply(V11, V10);
3581
    V22 = XMVectorMultiply(V11, V11);
3582
    V23 = XMVectorMultiply(V12, V11);
3583
 
3584
    S1  = _mm_load_ps1(&g_XMSinCoefficients0.f[1]);
3585
    S2  = _mm_load_ps1(&g_XMSinCoefficients0.f[2]);
3586
    S3  = _mm_load_ps1(&g_XMSinCoefficients0.f[3]);
3587
    S4  = _mm_load_ps1(&g_XMSinCoefficients1.f[0]);
3588
    S5  = _mm_load_ps1(&g_XMSinCoefficients1.f[1]);
3589
    S6  = _mm_load_ps1(&g_XMSinCoefficients1.f[2]);
3590
    S7  = _mm_load_ps1(&g_XMSinCoefficients1.f[3]);
3591
    S8  = _mm_load_ps1(&g_XMSinCoefficients2.f[0]);
3592
    S9  = _mm_load_ps1(&g_XMSinCoefficients2.f[1]);
3593
    S10  = _mm_load_ps1(&g_XMSinCoefficients2.f[2]);
3594
    S11  = _mm_load_ps1(&g_XMSinCoefficients2.f[3]);
3595
 
3596
    C1 = _mm_load_ps1(&g_XMCosCoefficients0.f[1]);
3597
    C2 = _mm_load_ps1(&g_XMCosCoefficients0.f[2]);
3598
    C3 = _mm_load_ps1(&g_XMCosCoefficients0.f[3]);
3599
    C4 = _mm_load_ps1(&g_XMCosCoefficients1.f[0]);
3600
    C5 = _mm_load_ps1(&g_XMCosCoefficients1.f[1]);
3601
    C6 = _mm_load_ps1(&g_XMCosCoefficients1.f[2]);
3602
    C7 = _mm_load_ps1(&g_XMCosCoefficients1.f[3]);
3603
    C8 = _mm_load_ps1(&g_XMCosCoefficients2.f[0]);
3604
    C9 = _mm_load_ps1(&g_XMCosCoefficients2.f[1]);
3605
    C10 = _mm_load_ps1(&g_XMCosCoefficients2.f[2]);
3606
    C11 = _mm_load_ps1(&g_XMCosCoefficients2.f[3]);
3607
 
3608
    S1 = _mm_mul_ps(S1,V3);
3609
    Sin = _mm_add_ps(S1,V1);
3610
    Sin = XMVectorMultiplyAdd(S2, V5, Sin);
3611
    Sin = XMVectorMultiplyAdd(S3, V7, Sin);
3612
    Sin = XMVectorMultiplyAdd(S4, V9, Sin);
3613
    Sin = XMVectorMultiplyAdd(S5, V11, Sin);
3614
    Sin = XMVectorMultiplyAdd(S6, V13, Sin);
3615
    Sin = XMVectorMultiplyAdd(S7, V15, Sin);
3616
    Sin = XMVectorMultiplyAdd(S8, V17, Sin);
3617
    Sin = XMVectorMultiplyAdd(S9, V19, Sin);
3618
    Sin = XMVectorMultiplyAdd(S10, V21, Sin);
3619
    Sin = XMVectorMultiplyAdd(S11, V23, Sin);
3620
 
3621
    Cos = _mm_mul_ps(C1,V2);
3622
    Cos = _mm_add_ps(Cos,g_XMOne);
3623
    Cos = XMVectorMultiplyAdd(C2, V4, Cos);
3624
    Cos = XMVectorMultiplyAdd(C3, V6, Cos);
3625
    Cos = XMVectorMultiplyAdd(C4, V8, Cos);
3626
    Cos = XMVectorMultiplyAdd(C5, V10, Cos);
3627
    Cos = XMVectorMultiplyAdd(C6, V12, Cos);
3628
    Cos = XMVectorMultiplyAdd(C7, V14, Cos);
3629
    Cos = XMVectorMultiplyAdd(C8, V16, Cos);
3630
    Cos = XMVectorMultiplyAdd(C9, V18, Cos);
3631
    Cos = XMVectorMultiplyAdd(C10, V20, Cos);
3632
    Cos = XMVectorMultiplyAdd(C11, V22, Cos);
3633
 
3634
    *pSin = Sin;
3635
    *pCos = Cos;
3636
#else // _XM_VMX128_INTRINSICS_
3637
#endif // _XM_VMX128_INTRINSICS_
3638
}
3639
 
3640
//------------------------------------------------------------------------------
3641
 
3642
XMINLINE XMVECTOR XMVectorTan
3643
(
3644
    FXMVECTOR V
3645
)
3646
{
3647
#if defined(_XM_NO_INTRINSICS_)
3648
 
3649
    // Cody and Waite algorithm to compute tangent.
3650
 
3651
    XMVECTOR VA, VB, VC, VC2;
3652
    XMVECTOR T0, T1, T2, T3, T4, T5, T6, T7;
3653
    XMVECTOR C0, C1, TwoDivPi, Epsilon;
3654
    XMVECTOR N, D;
3655
    XMVECTOR R0, R1;
3656
    XMVECTOR VIsZero, VCNearZero, VBIsEven;
3657
    XMVECTOR Zero;
3658
    XMVECTOR Result;
3659
    UINT     i;
3660
    static CONST XMVECTOR TanCoefficients0 = {1.0f, -4.667168334e-1f, 2.566383229e-2f, -3.118153191e-4f};
3661
    static CONST XMVECTOR TanCoefficients1 = {4.981943399e-7f, -1.333835001e-1f, 3.424887824e-3f, -1.786170734e-5f};
3662
    static CONST XMVECTOR TanConstants = {1.570796371f, 6.077100628e-11f, 0.000244140625f, 2.0f / XM_PI};
3663
    static CONST XMVECTORU32 Mask = {0x1, 0x1, 0x1, 0x1};
3664
 
3665
    TwoDivPi = XMVectorSplatW(TanConstants);
3666
 
3667
    Zero = XMVectorZero();
3668
 
3669
    C0 = XMVectorSplatX(TanConstants);
3670
    C1 = XMVectorSplatY(TanConstants);
3671
    Epsilon = XMVectorSplatZ(TanConstants);
3672
 
3673
    VA = XMVectorMultiply(V, TwoDivPi);
3674
 
3675
    VA = XMVectorRound(VA);
3676
 
3677
    VC = XMVectorNegativeMultiplySubtract(VA, C0, V);
3678
 
3679
    VB = XMVectorAbs(VA);
3680
 
3681
    VC = XMVectorNegativeMultiplySubtract(VA, C1, VC);
3682
 
3683
    for (i = 0; i < 4; i++)
3684
    {
3685
        VB.vector4_u32[i] = (UINT)VB.vector4_f32[i];
3686
    }
3687
 
3688
    VC2 = XMVectorMultiply(VC, VC);
3689
 
3690
    T7 = XMVectorSplatW(TanCoefficients1);
3691
    T6 = XMVectorSplatZ(TanCoefficients1);
3692
    T4 = XMVectorSplatX(TanCoefficients1);
3693
    T3 = XMVectorSplatW(TanCoefficients0);
3694
    T5 = XMVectorSplatY(TanCoefficients1);
3695
    T2 = XMVectorSplatZ(TanCoefficients0);
3696
    T1 = XMVectorSplatY(TanCoefficients0);
3697
    T0 = XMVectorSplatX(TanCoefficients0);
3698
 
3699
    VBIsEven = XMVectorAndInt(VB, Mask.v);
3700
    VBIsEven = XMVectorEqualInt(VBIsEven, Zero);
3701
 
3702
    N = XMVectorMultiplyAdd(VC2, T7, T6);
3703
    D = XMVectorMultiplyAdd(VC2, T4, T3);
3704
    N = XMVectorMultiplyAdd(VC2, N, T5);
3705
    D = XMVectorMultiplyAdd(VC2, D, T2);
3706
    N = XMVectorMultiply(VC2, N);
3707
    D = XMVectorMultiplyAdd(VC2, D, T1);
3708
    N = XMVectorMultiplyAdd(VC, N, VC);
3709
    VCNearZero = XMVectorInBounds(VC, Epsilon);
3710
    D = XMVectorMultiplyAdd(VC2, D, T0);
3711
 
3712
    N = XMVectorSelect(N, VC, VCNearZero);
3713
    D = XMVectorSelect(D, g_XMOne.v, VCNearZero);
3714
 
3715
    R0 = XMVectorNegate(N);
3716
    R1 = XMVectorReciprocal(D);
3717
    R0 = XMVectorReciprocal(R0);
3718
    R1 = XMVectorMultiply(N, R1);
3719
    R0 = XMVectorMultiply(D, R0);
3720
 
3721
    VIsZero = XMVectorEqual(V, Zero);
3722
 
3723
    Result = XMVectorSelect(R0, R1, VBIsEven);
3724
 
3725
    Result = XMVectorSelect(Result, Zero, VIsZero);
3726
 
3727
    return Result;
3728
 
3729
#elif defined(_XM_SSE_INTRINSICS_)
3730
    // Cody and Waite algorithm to compute tangent.
3731
 
3732
    XMVECTOR VA, VB, VC, VC2;
3733
    XMVECTOR T0, T1, T2, T3, T4, T5, T6, T7;
3734
    XMVECTOR C0, C1, TwoDivPi, Epsilon;
3735
    XMVECTOR N, D;
3736
    XMVECTOR R0, R1;
3737
    XMVECTOR VIsZero, VCNearZero, VBIsEven;
3738
    XMVECTOR Zero;
3739
    XMVECTOR Result;
3740
    static CONST XMVECTORF32 TanCoefficients0 = {1.0f, -4.667168334e-1f, 2.566383229e-2f, -3.118153191e-4f};
3741
    static CONST XMVECTORF32 TanCoefficients1 = {4.981943399e-7f, -1.333835001e-1f, 3.424887824e-3f, -1.786170734e-5f};
3742
    static CONST XMVECTORF32 TanConstants = {1.570796371f, 6.077100628e-11f, 0.000244140625f, 2.0f / XM_PI};
3743
    static CONST XMVECTORI32 Mask = {0x1, 0x1, 0x1, 0x1};
3744
 
3745
    TwoDivPi = XMVectorSplatW(TanConstants);
3746
 
3747
    Zero = XMVectorZero();
3748
 
3749
    C0 = XMVectorSplatX(TanConstants);
3750
    C1 = XMVectorSplatY(TanConstants);
3751
    Epsilon = XMVectorSplatZ(TanConstants);
3752
 
3753
    VA = XMVectorMultiply(V, TwoDivPi);
3754
 
3755
    VA = XMVectorRound(VA);
3756
 
3757
    VC = XMVectorNegativeMultiplySubtract(VA, C0, V);
3758
 
3759
    VB = XMVectorAbs(VA);
3760
 
3761
    VC = XMVectorNegativeMultiplySubtract(VA, C1, VC);
3762
 
3763
    reinterpret_cast<__m128i *>(&VB)[0] = _mm_cvttps_epi32(VB);
3764
 
3765
    VC2 = XMVectorMultiply(VC, VC);
3766
 
3767
    T7 = XMVectorSplatW(TanCoefficients1);
3768
    T6 = XMVectorSplatZ(TanCoefficients1);
3769
    T4 = XMVectorSplatX(TanCoefficients1);
3770
    T3 = XMVectorSplatW(TanCoefficients0);
3771
    T5 = XMVectorSplatY(TanCoefficients1);
3772
    T2 = XMVectorSplatZ(TanCoefficients0);
3773
    T1 = XMVectorSplatY(TanCoefficients0);
3774
    T0 = XMVectorSplatX(TanCoefficients0);
3775
 
3776
    VBIsEven = XMVectorAndInt(VB,Mask);
3777
    VBIsEven = XMVectorEqualInt(VBIsEven, Zero);
3778
 
3779
    N = XMVectorMultiplyAdd(VC2, T7, T6);
3780
    D = XMVectorMultiplyAdd(VC2, T4, T3);
3781
    N = XMVectorMultiplyAdd(VC2, N, T5);
3782
    D = XMVectorMultiplyAdd(VC2, D, T2);
3783
    N = XMVectorMultiply(VC2, N);
3784
    D = XMVectorMultiplyAdd(VC2, D, T1);
3785
    N = XMVectorMultiplyAdd(VC, N, VC);
3786
    VCNearZero = XMVectorInBounds(VC, Epsilon);
3787
    D = XMVectorMultiplyAdd(VC2, D, T0);
3788
 
3789
    N = XMVectorSelect(N, VC, VCNearZero);
3790
    D = XMVectorSelect(D, g_XMOne, VCNearZero);
3791
    R0 = XMVectorNegate(N);
3792
    R1 = _mm_div_ps(N,D);
3793
    R0 = _mm_div_ps(D,R0);
3794
    VIsZero = XMVectorEqual(V, Zero);
3795
    Result = XMVectorSelect(R0, R1, VBIsEven);
3796
    Result = XMVectorSelect(Result, Zero, VIsZero);
3797
 
3798
    return Result;
3799
 
3800
#else // _XM_VMX128_INTRINSICS_
3801
#endif // _XM_VMX128_INTRINSICS_
3802
}
3803
 
3804
//------------------------------------------------------------------------------
3805
 
3806
XMINLINE XMVECTOR XMVectorSinH
3807
(
3808
    FXMVECTOR V
3809
)
3810
{
3811
#if defined(_XM_NO_INTRINSICS_)
3812
 
3813
    XMVECTOR V1, V2;
3814
    XMVECTOR E1, E2;
3815
    XMVECTOR Result;
3816
    static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
3817
 
3818
    V1 = XMVectorMultiplyAdd(V, Scale.v, g_XMNegativeOne.v);
3819
    V2 = XMVectorNegativeMultiplySubtract(V, Scale.v, g_XMNegativeOne.v);
3820
 
3821
    E1 = XMVectorExp(V1);
3822
    E2 = XMVectorExp(V2);
3823
 
3824
    Result = XMVectorSubtract(E1, E2);
3825
 
3826
    return Result;
3827
 
3828
#elif defined(_XM_SSE_INTRINSICS_)
3829
    XMVECTOR V1, V2;
3830
    XMVECTOR E1, E2;
3831
    XMVECTOR Result;
3832
    static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
3833
 
3834
    V1 = _mm_mul_ps(V, Scale);
3835
    V1 = _mm_add_ps(V1,g_XMNegativeOne);
3836
    V2 = _mm_mul_ps(V, Scale);
3837
    V2 = _mm_sub_ps(g_XMNegativeOne,V2);
3838
    E1 = XMVectorExp(V1);
3839
    E2 = XMVectorExp(V2);
3840
 
3841
    Result = _mm_sub_ps(E1, E2);
3842
 
3843
    return Result;
3844
#else // _XM_VMX128_INTRINSICS_
3845
#endif // _XM_VMX128_INTRINSICS_
3846
}
3847
 
3848
//------------------------------------------------------------------------------
3849
 
3850
XMINLINE XMVECTOR XMVectorCosH
3851
(
3852
    FXMVECTOR V
3853
)
3854
{
3855
#if defined(_XM_NO_INTRINSICS_)
3856
 
3857
    XMVECTOR V1, V2;
3858
    XMVECTOR E1, E2;
3859
    XMVECTOR Result;
3860
    static CONST XMVECTOR Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
3861
 
3862
    V1 = XMVectorMultiplyAdd(V, Scale, g_XMNegativeOne.v);
3863
    V2 = XMVectorNegativeMultiplySubtract(V, Scale, g_XMNegativeOne.v);
3864
 
3865
    E1 = XMVectorExp(V1);
3866
    E2 = XMVectorExp(V2);
3867
 
3868
    Result = XMVectorAdd(E1, E2);
3869
 
3870
    return Result;
3871
 
3872
#elif defined(_XM_SSE_INTRINSICS_)
3873
    XMVECTOR V1, V2;
3874
    XMVECTOR E1, E2;
3875
    XMVECTOR Result;
3876
    static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
3877
 
3878
    V1 = _mm_mul_ps(V,Scale);
3879
    V1 = _mm_add_ps(V1,g_XMNegativeOne);
3880
    V2 = _mm_mul_ps(V, Scale);
3881
    V2 = _mm_sub_ps(g_XMNegativeOne,V2);
3882
    E1 = XMVectorExp(V1);
3883
    E2 = XMVectorExp(V2);
3884
    Result = _mm_add_ps(E1, E2);
3885
    return Result;
3886
#else // _XM_VMX128_INTRINSICS_
3887
#endif // _XM_VMX128_INTRINSICS_
3888
}
3889
 
3890
//------------------------------------------------------------------------------
3891
 
3892
XMINLINE XMVECTOR XMVectorTanH
3893
(
3894
    FXMVECTOR V
3895
)
3896
{
3897
#if defined(_XM_NO_INTRINSICS_)
3898
 
3899
    XMVECTOR E;
3900
    XMVECTOR Result;
3901
    static CONST XMVECTORF32 Scale = {2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f}; // 2.0f / ln(2.0f)
3902
 
3903
    E = XMVectorMultiply(V, Scale.v);
3904
    E = XMVectorExp(E);
3905
    E = XMVectorMultiplyAdd(E, g_XMOneHalf.v, g_XMOneHalf.v);
3906
    E = XMVectorReciprocal(E);
3907
 
3908
    Result = XMVectorSubtract(g_XMOne.v, E);
3909
 
3910
    return Result;
3911
 
3912
#elif defined(_XM_SSE_INTRINSICS_)
3913
    static CONST XMVECTORF32 Scale = {2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f}; // 2.0f / ln(2.0f)
3914
 
3915
    XMVECTOR E = _mm_mul_ps(V, Scale);
3916
    E = XMVectorExp(E);
3917
    E = _mm_mul_ps(E,g_XMOneHalf);
3918
    E = _mm_add_ps(E,g_XMOneHalf);
3919
    E = XMVectorReciprocal(E);
3920
    E = _mm_sub_ps(g_XMOne, E);
3921
    return E;
3922
#else // _XM_VMX128_INTRINSICS_
3923
#endif // _XM_VMX128_INTRINSICS_
3924
}
3925
 
3926
//------------------------------------------------------------------------------
3927
 
3928
XMINLINE XMVECTOR XMVectorASin
3929
(
3930
    FXMVECTOR V
3931
)
3932
{
3933
#if defined(_XM_NO_INTRINSICS_)
3934
 
3935
    XMVECTOR V2, V3, AbsV;
3936
    XMVECTOR C0, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11;
3937
    XMVECTOR R0, R1, R2, R3, R4;
3938
    XMVECTOR OneMinusAbsV;
3939
    XMVECTOR Rsq;
3940
    XMVECTOR Result;
3941
    static CONST XMVECTOR OnePlusEpsilon = {1.00000011921f, 1.00000011921f, 1.00000011921f, 1.00000011921f};
3942
 
3943
    // asin(V) = V * (C0 + C1 * V + C2 * V^2 + C3 * V^3 + C4 * V^4 + C5 * V^5) + (1 - V) * rsq(1 - V) * 
3944
    //           V * (C6 + C7 * V + C8 * V^2 + C9 * V^3 + C10 * V^4 + C11 * V^5)
3945
 
3946
    AbsV = XMVectorAbs(V);
3947
 
3948
    V2 = XMVectorMultiply(V, V);
3949
    V3 = XMVectorMultiply(V2, AbsV);
3950
 
3951
    R4 = XMVectorNegativeMultiplySubtract(AbsV, V, V);
3952
 
3953
    OneMinusAbsV = XMVectorSubtract(OnePlusEpsilon, AbsV);
3954
    Rsq = XMVectorReciprocalSqrt(OneMinusAbsV);
3955
 
3956
    C0 = XMVectorSplatX(g_XMASinCoefficients0.v);
3957
    C1 = XMVectorSplatY(g_XMASinCoefficients0.v);
3958
    C2 = XMVectorSplatZ(g_XMASinCoefficients0.v);
3959
    C3 = XMVectorSplatW(g_XMASinCoefficients0.v);
3960
 
3961
    C4 = XMVectorSplatX(g_XMASinCoefficients1.v);
3962
    C5 = XMVectorSplatY(g_XMASinCoefficients1.v);
3963
    C6 = XMVectorSplatZ(g_XMASinCoefficients1.v);
3964
    C7 = XMVectorSplatW(g_XMASinCoefficients1.v);
3965
 
3966
    C8 = XMVectorSplatX(g_XMASinCoefficients2.v);
3967
    C9 = XMVectorSplatY(g_XMASinCoefficients2.v);
3968
    C10 = XMVectorSplatZ(g_XMASinCoefficients2.v);
3969
    C11 = XMVectorSplatW(g_XMASinCoefficients2.v);
3970
 
3971
    R0 = XMVectorMultiplyAdd(C3, AbsV, C7);
3972
    R1 = XMVectorMultiplyAdd(C1, AbsV, C5);
3973
    R2 = XMVectorMultiplyAdd(C2, AbsV, C6);
3974
    R3 = XMVectorMultiplyAdd(C0, AbsV, C4);
3975
 
3976
    R0 = XMVectorMultiplyAdd(R0, AbsV, C11);
3977
    R1 = XMVectorMultiplyAdd(R1, AbsV, C9);
3978
    R2 = XMVectorMultiplyAdd(R2, AbsV, C10);
3979
    R3 = XMVectorMultiplyAdd(R3, AbsV, C8);
3980
 
3981
    R0 = XMVectorMultiplyAdd(R2, V3, R0);
3982
    R1 = XMVectorMultiplyAdd(R3, V3, R1);
3983
 
3984
    R0 = XMVectorMultiply(V, R0);
3985
    R1 = XMVectorMultiply(R4, R1);
3986
 
3987
    Result = XMVectorMultiplyAdd(R1, Rsq, R0);
3988
 
3989
    return Result;
3990
 
3991
#elif defined(_XM_SSE_INTRINSICS_)
3992
    static CONST XMVECTORF32 OnePlusEpsilon = {1.00000011921f, 1.00000011921f, 1.00000011921f, 1.00000011921f};
3993
 
3994
    // asin(V) = V * (C0 + C1 * V + C2 * V^2 + C3 * V^3 + C4 * V^4 + C5 * V^5) + (1 - V) * rsq(1 - V) * 
3995
    //           V * (C6 + C7 * V + C8 * V^2 + C9 * V^3 + C10 * V^4 + C11 * V^5)
3996
    // Get abs(V)
3997
	XMVECTOR vAbsV = _mm_setzero_ps();
3998
	vAbsV = _mm_sub_ps(vAbsV,V);
3999
	vAbsV = _mm_max_ps(vAbsV,V);
4000
 
4001
    XMVECTOR R0 = vAbsV;
4002
    XMVECTOR vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[3]);
4003
    R0 = _mm_mul_ps(R0,vConstants);
4004
    vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[3]);
4005
    R0 = _mm_add_ps(R0,vConstants);
4006
 
4007
    XMVECTOR R1 = vAbsV;
4008
    vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[1]);
4009
    R1 = _mm_mul_ps(R1,vConstants);
4010
    vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[1]);
4011
    R1 = _mm_add_ps(R1, vConstants);
4012
 
4013
    XMVECTOR R2 = vAbsV;
4014
    vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[2]);
4015
    R2 = _mm_mul_ps(R2,vConstants);
4016
    vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[2]);
4017
    R2 = _mm_add_ps(R2, vConstants);
4018
 
4019
    XMVECTOR R3 = vAbsV;
4020
    vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[0]);
4021
    R3 = _mm_mul_ps(R3,vConstants);
4022
    vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[0]);
4023
    R3 = _mm_add_ps(R3, vConstants);
4024
 
4025
    vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[3]);
4026
    R0 = _mm_mul_ps(R0,vAbsV);
4027
    R0 = _mm_add_ps(R0,vConstants);
4028
 
4029
    vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[1]);
4030
    R1 = _mm_mul_ps(R1,vAbsV);
4031
    R1 = _mm_add_ps(R1,vConstants);
4032
 
4033
    vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[2]);
4034
    R2 = _mm_mul_ps(R2,vAbsV);
4035
    R2 = _mm_add_ps(R2,vConstants);
4036
 
4037
    vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[0]);
4038
    R3 = _mm_mul_ps(R3,vAbsV);
4039
    R3 = _mm_add_ps(R3,vConstants);
4040
 
4041
    // V3 = V^3
4042
    vConstants = _mm_mul_ps(V,V);
4043
    vConstants = _mm_mul_ps(vConstants, vAbsV);
4044
    // Mul by V^3
4045
    R2 = _mm_mul_ps(R2,vConstants);
4046
    R3 = _mm_mul_ps(R3,vConstants);
4047
    // Merge the results
4048
    R0 = _mm_add_ps(R0,R2);
4049
    R1 = _mm_add_ps(R1,R3);
4050
 
4051
    R0 = _mm_mul_ps(R0,V);
4052
    // vConstants = V-(V^2 retaining sign)
4053
    vConstants = _mm_mul_ps(vAbsV, V);
4054
    vConstants = _mm_sub_ps(V,vConstants);
4055
    R1 = _mm_mul_ps(R1,vConstants);
4056
    vConstants = _mm_sub_ps(OnePlusEpsilon,vAbsV);
4057
    // Do NOT use rsqrt/mul. This needs the precision
4058
    vConstants = _mm_sqrt_ps(vConstants);
4059
    R1 = _mm_div_ps(R1,vConstants);
4060
    R0 = _mm_add_ps(R0,R1);
4061
    return R0;
4062
#else // _XM_VMX128_INTRINSICS_
4063
#endif // _XM_VMX128_INTRINSICS_
4064
}
4065
 
4066
//------------------------------------------------------------------------------
4067
 
4068
XMINLINE XMVECTOR XMVectorACos
4069
(
4070
    FXMVECTOR V
4071
)
4072
{
4073
#if defined(_XM_NO_INTRINSICS_)
4074
 
4075
    XMVECTOR V2, V3, AbsV;
4076
    XMVECTOR C0, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11;
4077
    XMVECTOR R0, R1, R2, R3, R4;
4078
    XMVECTOR OneMinusAbsV;
4079
    XMVECTOR Rsq;
4080
    XMVECTOR Result;
4081
    static CONST XMVECTOR OnePlusEpsilon = {1.00000011921f, 1.00000011921f, 1.00000011921f, 1.00000011921f};
4082
 
4083
    // acos(V) = PI / 2 - asin(V)
4084
 
4085
    AbsV = XMVectorAbs(V);
4086
 
4087
    V2 = XMVectorMultiply(V, V);
4088
    V3 = XMVectorMultiply(V2, AbsV);
4089
 
4090
    R4 = XMVectorNegativeMultiplySubtract(AbsV, V, V);
4091
 
4092
    OneMinusAbsV = XMVectorSubtract(OnePlusEpsilon, AbsV);
4093
    Rsq = XMVectorReciprocalSqrt(OneMinusAbsV);
4094
 
4095
    C0 = XMVectorSplatX(g_XMASinCoefficients0.v);
4096
    C1 = XMVectorSplatY(g_XMASinCoefficients0.v);
4097
    C2 = XMVectorSplatZ(g_XMASinCoefficients0.v);
4098
    C3 = XMVectorSplatW(g_XMASinCoefficients0.v);
4099
 
4100
    C4 = XMVectorSplatX(g_XMASinCoefficients1.v);
4101
    C5 = XMVectorSplatY(g_XMASinCoefficients1.v);
4102
    C6 = XMVectorSplatZ(g_XMASinCoefficients1.v);
4103
    C7 = XMVectorSplatW(g_XMASinCoefficients1.v);
4104
 
4105
    C8 = XMVectorSplatX(g_XMASinCoefficients2.v);
4106
    C9 = XMVectorSplatY(g_XMASinCoefficients2.v);
4107
    C10 = XMVectorSplatZ(g_XMASinCoefficients2.v);
4108
    C11 = XMVectorSplatW(g_XMASinCoefficients2.v);
4109
 
4110
    R0 = XMVectorMultiplyAdd(C3, AbsV, C7);
4111
    R1 = XMVectorMultiplyAdd(C1, AbsV, C5);
4112
    R2 = XMVectorMultiplyAdd(C2, AbsV, C6);
4113
    R3 = XMVectorMultiplyAdd(C0, AbsV, C4);
4114
 
4115
    R0 = XMVectorMultiplyAdd(R0, AbsV, C11);
4116
    R1 = XMVectorMultiplyAdd(R1, AbsV, C9);
4117
    R2 = XMVectorMultiplyAdd(R2, AbsV, C10);
4118
    R3 = XMVectorMultiplyAdd(R3, AbsV, C8);
4119
 
4120
    R0 = XMVectorMultiplyAdd(R2, V3, R0);
4121
    R1 = XMVectorMultiplyAdd(R3, V3, R1);
4122
 
4123
    R0 = XMVectorMultiply(V, R0);
4124
    R1 = XMVectorMultiply(R4, R1);
4125
 
4126
    Result = XMVectorMultiplyAdd(R1, Rsq, R0);
4127
 
4128
    Result = XMVectorSubtract(g_XMHalfPi.v, Result);
4129
 
4130
    return Result;
4131
 
4132
#elif defined(_XM_SSE_INTRINSICS_)
4133
    static CONST XMVECTORF32 OnePlusEpsilon = {1.00000011921f, 1.00000011921f, 1.00000011921f, 1.00000011921f};
4134
    // Uses only 6 registers for good code on x86 targets
4135
    // acos(V) = PI / 2 - asin(V)
4136
    // Get abs(V)
4137
	XMVECTOR vAbsV = _mm_setzero_ps();
4138
	vAbsV = _mm_sub_ps(vAbsV,V);
4139
	vAbsV = _mm_max_ps(vAbsV,V);
4140
    // Perform the series in precision groups to
4141
    // retain precision across 20 bits. (3 bits of imprecision due to operations)
4142
    XMVECTOR R0 = vAbsV;
4143
    XMVECTOR vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[3]);
4144
    R0 = _mm_mul_ps(R0,vConstants);
4145
    vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[3]);
4146
    R0 = _mm_add_ps(R0,vConstants);
4147
    R0 = _mm_mul_ps(R0,vAbsV);
4148
    vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[3]);
4149
    R0 = _mm_add_ps(R0,vConstants);
4150
 
4151
    XMVECTOR R1 = vAbsV;
4152
    vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[1]);
4153
    R1 = _mm_mul_ps(R1,vConstants);
4154
    vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[1]);
4155
    R1 = _mm_add_ps(R1,vConstants);
4156
    R1 = _mm_mul_ps(R1, vAbsV);
4157
    vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[1]);
4158
    R1 = _mm_add_ps(R1,vConstants);
4159
 
4160
    XMVECTOR R2 = vAbsV;
4161
    vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[2]);
4162
    R2 = _mm_mul_ps(R2,vConstants);
4163
    vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[2]);
4164
    R2 = _mm_add_ps(R2,vConstants);
4165
    R2 = _mm_mul_ps(R2, vAbsV);
4166
    vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[2]);
4167
    R2 = _mm_add_ps(R2,vConstants);
4168
 
4169
    XMVECTOR R3 = vAbsV;
4170
    vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[0]);
4171
    R3 = _mm_mul_ps(R3,vConstants);
4172
    vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[0]);
4173
    R3 = _mm_add_ps(R3,vConstants);
4174
    R3 = _mm_mul_ps(R3, vAbsV);
4175
    vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[0]);
4176
    R3 = _mm_add_ps(R3,vConstants);
4177
 
4178
    // vConstants = V^3
4179
    vConstants = _mm_mul_ps(V,V);
4180
    vConstants = _mm_mul_ps(vConstants,vAbsV);
4181
    R2 = _mm_mul_ps(R2,vConstants);
4182
    R3 = _mm_mul_ps(R3,vConstants);
4183
    // Add the pair of values together here to retain
4184
    // as much precision as possible
4185
    R0 = _mm_add_ps(R0,R2);
4186
    R1 = _mm_add_ps(R1,R3);
4187
 
4188
    R0 = _mm_mul_ps(R0,V);
4189
    // vConstants = V-(V*abs(V))
4190
    vConstants = _mm_mul_ps(V,vAbsV);
4191
    vConstants = _mm_sub_ps(V,vConstants);
4192
    R1 = _mm_mul_ps(R1,vConstants);
4193
    // Episilon exists to allow 1.0 as an answer
4194
    vConstants = _mm_sub_ps(OnePlusEpsilon, vAbsV);
4195
    // Use sqrt instead of rsqrt for precision
4196
    vConstants = _mm_sqrt_ps(vConstants);
4197
    R1 = _mm_div_ps(R1,vConstants);
4198
    R1 = _mm_add_ps(R1,R0);
4199
    vConstants = _mm_sub_ps(g_XMHalfPi,R1);
4200
    return vConstants;
4201
#else // _XM_VMX128_INTRINSICS_
4202
#endif // _XM_VMX128_INTRINSICS_
4203
}
4204
 
4205
//------------------------------------------------------------------------------
4206
 
4207
XMINLINE XMVECTOR XMVectorATan
4208
(
4209
    FXMVECTOR V
4210
)
4211
{
4212
#if defined(_XM_NO_INTRINSICS_)
4213
 
4214
    // Cody and Waite algorithm to compute inverse tangent.
4215
 
4216
    XMVECTOR N, D;
4217
    XMVECTOR VF, G, ReciprocalF, AbsF, FA, FB;
4218
    XMVECTOR Sqrt3, Sqrt3MinusOne, TwoMinusSqrt3;
4219
    XMVECTOR HalfPi, OneThirdPi, OneSixthPi, Epsilon, MinV, MaxV;
4220
    XMVECTOR Zero;
4221
    XMVECTOR NegativeHalfPi;
4222
    XMVECTOR Angle1, Angle2;
4223
    XMVECTOR F_GT_One, F_GT_TwoMinusSqrt3, AbsF_LT_Epsilon, V_LT_Zero, V_GT_MaxV, V_LT_MinV;
4224
    XMVECTOR NegativeResult, Result;
4225
    XMVECTOR P0, P1, P2, P3, Q0, Q1, Q2, Q3;
4226
    static CONST XMVECTOR ATanConstants0 = {-1.3688768894e+1f, -2.0505855195e+1f, -8.4946240351f, -8.3758299368e-1f};
4227
    static CONST XMVECTOR ATanConstants1 = {4.1066306682e+1f, 8.6157349597e+1f, 5.9578436142e+1f, 1.5024001160e+1f};
4228
    static CONST XMVECTOR ATanConstants2 = {1.732050808f, 7.320508076e-1f, 2.679491924e-1f, 0.000244140625f}; // <sqrt(3), sqrt(3) - 1, 2 - sqrt(3), Epsilon>
4229
    static CONST XMVECTOR ATanConstants3 = {XM_PIDIV2, XM_PI / 3.0f, XM_PI / 6.0f, 8.507059173e+37f}; // <Pi / 2, Pi / 3, Pi / 6, MaxV>
4230
 
4231
    Zero = XMVectorZero();
4232
 
4233
    P0 = XMVectorSplatX(ATanConstants0);
4234
    P1 = XMVectorSplatY(ATanConstants0);
4235
    P2 = XMVectorSplatZ(ATanConstants0);
4236
    P3 = XMVectorSplatW(ATanConstants0);
4237
 
4238
    Q0 = XMVectorSplatX(ATanConstants1);
4239
    Q1 = XMVectorSplatY(ATanConstants1);
4240
    Q2 = XMVectorSplatZ(ATanConstants1);
4241
    Q3 = XMVectorSplatW(ATanConstants1);
4242
 
4243
    Sqrt3 = XMVectorSplatX(ATanConstants2);
4244
    Sqrt3MinusOne = XMVectorSplatY(ATanConstants2);
4245
    TwoMinusSqrt3 = XMVectorSplatZ(ATanConstants2);
4246
    Epsilon = XMVectorSplatW(ATanConstants2);
4247
 
4248
    HalfPi = XMVectorSplatX(ATanConstants3);
4249
    OneThirdPi = XMVectorSplatY(ATanConstants3);
4250
    OneSixthPi = XMVectorSplatZ(ATanConstants3);
4251
    MaxV = XMVectorSplatW(ATanConstants3);
4252
 
4253
    VF = XMVectorAbs(V);
4254
    ReciprocalF = XMVectorReciprocal(VF);
4255
 
4256
    F_GT_One = XMVectorGreater(VF, g_XMOne.v);
4257
 
4258
    VF = XMVectorSelect(VF, ReciprocalF, F_GT_One);
4259
    Angle1 = XMVectorSelect(Zero, HalfPi, F_GT_One);
4260
    Angle2 = XMVectorSelect(OneSixthPi, OneThirdPi, F_GT_One);
4261
 
4262
    F_GT_TwoMinusSqrt3 = XMVectorGreater(VF, TwoMinusSqrt3);
4263
 
4264
    FA = XMVectorMultiplyAdd(Sqrt3MinusOne, VF, VF);
4265
    FA = XMVectorAdd(FA, g_XMNegativeOne.v);
4266
    FB = XMVectorAdd(VF, Sqrt3);
4267
    FB = XMVectorReciprocal(FB);
4268
    FA = XMVectorMultiply(FA, FB);
4269
 
4270
    VF = XMVectorSelect(VF, FA, F_GT_TwoMinusSqrt3);
4271
    Angle1 = XMVectorSelect(Angle1, Angle2, F_GT_TwoMinusSqrt3);
4272
 
4273
    AbsF = XMVectorAbs(VF);
4274
    AbsF_LT_Epsilon = XMVectorLess(AbsF, Epsilon);
4275
 
4276
    G = XMVectorMultiply(VF, VF);
4277
 
4278
    D = XMVectorAdd(G, Q3);
4279
    D = XMVectorMultiplyAdd(D, G, Q2);
4280
    D = XMVectorMultiplyAdd(D, G, Q1);
4281
    D = XMVectorMultiplyAdd(D, G, Q0);
4282
    D = XMVectorReciprocal(D);
4283
 
4284
    N = XMVectorMultiplyAdd(P3, G, P2);
4285
    N = XMVectorMultiplyAdd(N, G, P1);
4286
    N = XMVectorMultiplyAdd(N, G, P0);
4287
    N = XMVectorMultiply(N, G);
4288
    Result = XMVectorMultiply(N, D);
4289
 
4290
    Result = XMVectorMultiplyAdd(Result, VF, VF);
4291
 
4292
    Result = XMVectorSelect(Result, VF, AbsF_LT_Epsilon);
4293
 
4294
    NegativeResult = XMVectorNegate(Result);
4295
    Result = XMVectorSelect(Result, NegativeResult, F_GT_One);
4296
 
4297
    Result = XMVectorAdd(Result, Angle1);
4298
 
4299
    V_LT_Zero = XMVectorLess(V, Zero);
4300
    NegativeResult = XMVectorNegate(Result);
4301
    Result = XMVectorSelect(Result, NegativeResult, V_LT_Zero);
4302
 
4303
    MinV = XMVectorNegate(MaxV);
4304
    NegativeHalfPi = XMVectorNegate(HalfPi);
4305
    V_GT_MaxV = XMVectorGreater(V, MaxV);
4306
    V_LT_MinV = XMVectorLess(V, MinV);
4307
    Result = XMVectorSelect(Result, g_XMHalfPi.v, V_GT_MaxV);
4308
    Result = XMVectorSelect(Result, NegativeHalfPi, V_LT_MinV);
4309
 
4310
    return Result;
4311
 
4312
#elif defined(_XM_SSE_INTRINSICS_)
4313
    static CONST XMVECTORF32 ATanConstants0 = {-1.3688768894e+1f, -2.0505855195e+1f, -8.4946240351f, -8.3758299368e-1f};
4314
    static CONST XMVECTORF32 ATanConstants1 = {4.1066306682e+1f, 8.6157349597e+1f, 5.9578436142e+1f, 1.5024001160e+1f};
4315
    static CONST XMVECTORF32 ATanConstants2 = {1.732050808f, 7.320508076e-1f, 2.679491924e-1f, 0.000244140625f}; // <sqrt(3), sqrt(3) - 1, 2 - sqrt(3), Epsilon>
4316
    static CONST XMVECTORF32 ATanConstants3 = {XM_PIDIV2, XM_PI / 3.0f, XM_PI / 6.0f, 8.507059173e+37f}; // <Pi / 2, Pi / 3, Pi / 6, MaxV>
4317
 
4318
    XMVECTOR VF = XMVectorAbs(V);
4319
    XMVECTOR F_GT_One = _mm_cmpgt_ps(VF,g_XMOne);
4320
    XMVECTOR ReciprocalF = XMVectorReciprocal(VF);
4321
    VF = XMVectorSelect(VF, ReciprocalF, F_GT_One);
4322
    XMVECTOR Zero = XMVectorZero();
4323
    XMVECTOR HalfPi = _mm_load_ps1(&ATanConstants3.f[0]);
4324
    XMVECTOR Angle1 = XMVectorSelect(Zero, HalfPi, F_GT_One);
4325
    // Pi/3
4326
    XMVECTOR vConstants = _mm_load_ps1(&ATanConstants3.f[1]);
4327
    // Pi/6
4328
    XMVECTOR Angle2 = _mm_load_ps1(&ATanConstants3.f[2]);
4329
    Angle2 = XMVectorSelect(Angle2, vConstants, F_GT_One);
4330
 
4331
    // 1-sqrt(3)
4332
    XMVECTOR FA = _mm_load_ps1(&ATanConstants2.f[1]);
4333
    FA = _mm_mul_ps(FA,VF);
4334
    FA = _mm_add_ps(FA,VF);
4335
    FA = _mm_add_ps(FA,g_XMNegativeOne);
4336
    // sqrt(3)
4337
    vConstants = _mm_load_ps1(&ATanConstants2.f[0]);
4338
    vConstants = _mm_add_ps(vConstants,VF);
4339
    FA = _mm_div_ps(FA,vConstants);
4340
 
4341
    // 2-sqrt(3)
4342
    vConstants = _mm_load_ps1(&ATanConstants2.f[2]);
4343
    // >2-sqrt(3)?
4344
    vConstants = _mm_cmpgt_ps(VF,vConstants);
4345
    VF = XMVectorSelect(VF, FA, vConstants);
4346
    Angle1 = XMVectorSelect(Angle1, Angle2, vConstants);
4347
 
4348
    XMVECTOR AbsF = XMVectorAbs(VF);
4349
 
4350
    XMVECTOR G = _mm_mul_ps(VF,VF);
4351
    XMVECTOR D = _mm_load_ps1(&ATanConstants1.f[3]);
4352
    D = _mm_add_ps(D,G);
4353
    D = _mm_mul_ps(D,G);
4354
    vConstants = _mm_load_ps1(&ATanConstants1.f[2]);
4355
    D = _mm_add_ps(D,vConstants);
4356
    D = _mm_mul_ps(D,G);
4357
    vConstants = _mm_load_ps1(&ATanConstants1.f[1]);
4358
    D = _mm_add_ps(D,vConstants);
4359
    D = _mm_mul_ps(D,G);
4360
    vConstants = _mm_load_ps1(&ATanConstants1.f[0]);
4361
    D = _mm_add_ps(D,vConstants);
4362
 
4363
    XMVECTOR N = _mm_load_ps1(&ATanConstants0.f[3]);
4364
    N = _mm_mul_ps(N,G);
4365
    vConstants = _mm_load_ps1(&ATanConstants0.f[2]);
4366
    N = _mm_add_ps(N,vConstants);
4367
    N = _mm_mul_ps(N,G);
4368
    vConstants = _mm_load_ps1(&ATanConstants0.f[1]);
4369
    N = _mm_add_ps(N,vConstants);
4370
    N = _mm_mul_ps(N,G);
4371
    vConstants = _mm_load_ps1(&ATanConstants0.f[0]);
4372
    N = _mm_add_ps(N,vConstants);
4373
    N = _mm_mul_ps(N,G);
4374
    XMVECTOR Result = _mm_div_ps(N,D);
4375
 
4376
    Result = _mm_mul_ps(Result,VF);
4377
    Result = _mm_add_ps(Result,VF);
4378
    // Epsilon
4379
    vConstants = _mm_load_ps1(&ATanConstants2.f[3]);
4380
    vConstants = _mm_cmpge_ps(vConstants,AbsF);
4381
    Result = XMVectorSelect(Result,VF,vConstants);
4382
 
4383
    XMVECTOR NegativeResult = _mm_mul_ps(Result,g_XMNegativeOne);
4384
    Result = XMVectorSelect(Result,NegativeResult,F_GT_One);
4385
    Result = _mm_add_ps(Result,Angle1);
4386
 
4387
    Zero = _mm_cmpge_ps(Zero,V);
4388
    NegativeResult = _mm_mul_ps(Result,g_XMNegativeOne);
4389
    Result = XMVectorSelect(Result,NegativeResult,Zero);
4390
 
4391
    XMVECTOR MaxV = _mm_load_ps1(&ATanConstants3.f[3]);
4392
    XMVECTOR MinV = _mm_mul_ps(MaxV,g_XMNegativeOne);
4393
    // Negate HalfPi
4394
    HalfPi = _mm_mul_ps(HalfPi,g_XMNegativeOne);
4395
    MaxV = _mm_cmple_ps(MaxV,V);
4396
    MinV = _mm_cmpge_ps(MinV,V);
4397
    Result = XMVectorSelect(Result,g_XMHalfPi,MaxV);
4398
    // HalfPi = -HalfPi
4399
    Result = XMVectorSelect(Result,HalfPi,MinV);
4400
    return Result;
4401
#else // _XM_VMX128_INTRINSICS_
4402
#endif // _XM_VMX128_INTRINSICS_
4403
}
4404
 
4405
//------------------------------------------------------------------------------
4406
 
4407
XMINLINE XMVECTOR XMVectorATan2
4408
(
4409
    FXMVECTOR Y, 
4410
    FXMVECTOR X
4411
)
4412
{
4413
#if defined(_XM_NO_INTRINSICS_)
4414
 
4415
    // Return the inverse tangent of Y / X in the range of -Pi to Pi with the following exceptions:
4416
 
4417
    //     Y == 0 and X is Negative         -> Pi with the sign of Y
4418
    //     Y == 0 and X is Positive         -> 0 with the sign of Y
4419
    //     Y != 0 and X == 0                -> Pi / 2 with the sign of Y
4420
    //     X == -Infinity and Finite Y > 0  -> Pi with the sign of Y
4421
    //     X == +Infinity and Finite Y > 0  -> 0 with the sign of Y
4422
    //     Y == Infinity and X is Finite    -> Pi / 2 with the sign of Y
4423
    //     Y == Infinity and X == -Infinity -> 3Pi / 4 with the sign of Y
4424
    //     Y == Infinity and X == +Infinity -> Pi / 4 with the sign of Y
4425
    //     TODO: Return Y / X if the result underflows
4426
 
4427
    XMVECTOR Reciprocal;
4428
    XMVECTOR V;
4429
    XMVECTOR YSign;
4430
    XMVECTOR Pi, PiOverTwo, PiOverFour, ThreePiOverFour;
4431
    XMVECTOR YEqualsZero, XEqualsZero, XIsPositive, YEqualsInfinity, XEqualsInfinity, FiniteYGreaterZero;
4432
    XMVECTOR ATanResultValid;
4433
    XMVECTOR R0, R1, R2, R3, R4, R5, R6, R7;
4434
    XMVECTOR Zero;
4435
    XMVECTOR Result;
4436
    static CONST XMVECTOR ATan2Constants = {XM_PI, XM_PIDIV2, XM_PIDIV4, XM_PI * 3.0f / 4.0f};
4437
 
4438
    Zero = XMVectorZero();
4439
    ATanResultValid = XMVectorTrueInt();
4440
 
4441
    Pi = XMVectorSplatX(ATan2Constants);
4442
    PiOverTwo = XMVectorSplatY(ATan2Constants);
4443
    PiOverFour = XMVectorSplatZ(ATan2Constants);
4444
    ThreePiOverFour = XMVectorSplatW(ATan2Constants);
4445
 
4446
    YEqualsZero = XMVectorEqual(Y, Zero);
4447
    XEqualsZero = XMVectorEqual(X, Zero);
4448
    XIsPositive = XMVectorAndInt(X, g_XMNegativeZero.v);
4449
    XIsPositive = XMVectorEqualInt(XIsPositive, Zero);
4450
    YEqualsInfinity = XMVectorIsInfinite(Y);
4451
    XEqualsInfinity = XMVectorIsInfinite(X);
4452
    FiniteYGreaterZero = XMVectorGreater(Y, Zero);
4453
    FiniteYGreaterZero = XMVectorSelect(FiniteYGreaterZero, Zero, YEqualsInfinity);
4454
 
4455
    YSign = XMVectorAndInt(Y, g_XMNegativeZero.v);
4456
    Pi = XMVectorOrInt(Pi, YSign);
4457
    PiOverTwo = XMVectorOrInt(PiOverTwo, YSign);
4458
    PiOverFour = XMVectorOrInt(PiOverFour, YSign);
4459
    ThreePiOverFour = XMVectorOrInt(ThreePiOverFour, YSign);
4460
 
4461
    R1 = XMVectorSelect(Pi, YSign, XIsPositive);
4462
    R2 = XMVectorSelect(ATanResultValid, PiOverTwo, XEqualsZero);
4463
    R3 = XMVectorSelect(R2, R1, YEqualsZero);
4464
    R4 = XMVectorSelect(ThreePiOverFour, PiOverFour, XIsPositive);
4465
    R5 = XMVectorSelect(PiOverTwo, R4, XEqualsInfinity);
4466
    R6 = XMVectorSelect(R3, R5, YEqualsInfinity);
4467
    R7 = XMVectorSelect(R6, R1, FiniteYGreaterZero);
4468
    Result = XMVectorSelect(R6, R7, XEqualsInfinity);
4469
    ATanResultValid = XMVectorEqualInt(Result, ATanResultValid);
4470
 
4471
    Reciprocal = XMVectorReciprocal(X);
4472
    V = XMVectorMultiply(Y, Reciprocal);
4473
    R0 = XMVectorATan(V);
4474
 
4475
    Result = XMVectorSelect(Result, R0, ATanResultValid);
4476
 
4477
    return Result;
4478
 
4479
#elif defined(_XM_SSE_INTRINSICS_)
4480
    static CONST XMVECTORF32 ATan2Constants = {XM_PI, XM_PIDIV2, XM_PIDIV4, XM_PI * 3.0f / 4.0f};
4481
    // Mask if Y>0 && Y!=INF
4482
    XMVECTOR FiniteYGreaterZero = _mm_cmpgt_ps(Y,g_XMZero);
4483
    XMVECTOR YEqualsInfinity = XMVectorIsInfinite(Y);
4484
    FiniteYGreaterZero = _mm_andnot_ps(YEqualsInfinity,FiniteYGreaterZero);
4485
    // Get the sign of (Y&0x80000000)
4486
    XMVECTOR YSign = _mm_and_ps(Y, g_XMNegativeZero);
4487
    // Get the sign bits of X
4488
    XMVECTOR XIsPositive = _mm_and_ps(X,g_XMNegativeZero);
4489
    // Change them to masks
4490
    XIsPositive = XMVectorEqualInt(XIsPositive,g_XMZero);
4491
    // Get Pi
4492
    XMVECTOR R1 = _mm_load_ps1(&ATan2Constants.f[0]);
4493
    // Copy the sign of Y
4494
    R1 = _mm_or_ps(R1,YSign);
4495
    R1 = XMVectorSelect(R1,YSign,XIsPositive);
4496
    // Mask for X==0
4497
    XMVECTOR vConstants = _mm_cmpeq_ps(X,g_XMZero);
4498
    // Get Pi/2 with with sign of Y
4499
    XMVECTOR PiOverTwo = _mm_load_ps1(&ATan2Constants.f[1]);
4500
    PiOverTwo = _mm_or_ps(PiOverTwo,YSign);
4501
    XMVECTOR R2 = XMVectorSelect(g_XMNegOneMask,PiOverTwo,vConstants);
4502
    // Mask for Y==0
4503
    vConstants = _mm_cmpeq_ps(Y,g_XMZero);
4504
    R2 = XMVectorSelect(R2,R1,vConstants);
4505
    // Get Pi/4 with sign of Y
4506
    XMVECTOR PiOverFour = _mm_load_ps1(&ATan2Constants.f[2]);
4507
    PiOverFour = _mm_or_ps(PiOverFour,YSign);
4508
    // Get (Pi*3)/4 with sign of Y
4509
    XMVECTOR ThreePiOverFour = _mm_load_ps1(&ATan2Constants.f[3]);
4510
    ThreePiOverFour = _mm_or_ps(ThreePiOverFour,YSign);
4511
    vConstants = XMVectorSelect(ThreePiOverFour, PiOverFour, XIsPositive);
4512
    XMVECTOR XEqualsInfinity = XMVectorIsInfinite(X);
4513
    vConstants = XMVectorSelect(PiOverTwo,vConstants,XEqualsInfinity);
4514
 
4515
    XMVECTOR vResult = XMVectorSelect(R2,vConstants,YEqualsInfinity);
4516
    vConstants = XMVectorSelect(vResult,R1,FiniteYGreaterZero);
4517
    // At this point, any entry that's zero will get the result
4518
    // from XMVectorATan(), otherwise, return the failsafe value
4519
    vResult = XMVectorSelect(vResult,vConstants,XEqualsInfinity);
4520
    // Any entries not 0xFFFFFFFF, are considered precalculated
4521
    XMVECTOR ATanResultValid = XMVectorEqualInt(vResult,g_XMNegOneMask);
4522
    // Let's do the ATan2 function
4523
    vConstants = _mm_div_ps(Y,X);
4524
    vConstants = XMVectorATan(vConstants);
4525
    // Discard entries that have been declared void
4526
    vResult = XMVectorSelect(vResult,vConstants,ATanResultValid);
4527
    return vResult;
4528
#else // _XM_VMX128_INTRINSICS_
4529
#endif // _XM_VMX128_INTRINSICS_
4530
}
4531
 
4532
//------------------------------------------------------------------------------
4533
 
4534
XMFINLINE XMVECTOR XMVectorSinEst
4535
(
4536
    FXMVECTOR V
4537
)
4538
{
4539
#if defined(_XM_NO_INTRINSICS_)
4540
 
4541
    XMVECTOR V2, V3, V5, V7;
4542
    XMVECTOR S1, S2, S3;
4543
    XMVECTOR Result;
4544
 
4545
    // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI)
4546
    V2 = XMVectorMultiply(V, V);
4547
    V3 = XMVectorMultiply(V2, V);
4548
    V5 = XMVectorMultiply(V3, V2);
4549
    V7 = XMVectorMultiply(V5, V2);
4550
 
4551
    S1 = XMVectorSplatY(g_XMSinEstCoefficients.v);
4552
    S2 = XMVectorSplatZ(g_XMSinEstCoefficients.v);
4553
    S3 = XMVectorSplatW(g_XMSinEstCoefficients.v);
4554
 
4555
    Result = XMVectorMultiplyAdd(S1, V3, V);
4556
    Result = XMVectorMultiplyAdd(S2, V5, Result);
4557
    Result = XMVectorMultiplyAdd(S3, V7, Result);
4558
 
4559
    return Result;
4560
 
4561
#elif defined(_XM_SSE_INTRINSICS_)
4562
    // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI)
4563
    XMVECTOR V2 = _mm_mul_ps(V,V);
4564
    XMVECTOR V3 = _mm_mul_ps(V2,V);
4565
    XMVECTOR vResult = _mm_load_ps1(&g_XMSinEstCoefficients.f[1]);
4566
    vResult = _mm_mul_ps(vResult,V3);
4567
    vResult = _mm_add_ps(vResult,V);
4568
    XMVECTOR vConstants = _mm_load_ps1(&g_XMSinEstCoefficients.f[2]);
4569
    // V^5
4570
    V3 = _mm_mul_ps(V3,V2);
4571
    vConstants = _mm_mul_ps(vConstants,V3);
4572
    vResult = _mm_add_ps(vResult,vConstants);
4573
    vConstants = _mm_load_ps1(&g_XMSinEstCoefficients.f[3]);
4574
    // V^7
4575
    V3 = _mm_mul_ps(V3,V2);
4576
    vConstants = _mm_mul_ps(vConstants,V3);
4577
    vResult = _mm_add_ps(vResult,vConstants);
4578
    return vResult;
4579
#else // _XM_VMX128_INTRINSICS_
4580
#endif // _XM_VMX128_INTRINSICS_
4581
}
4582
 
4583
//------------------------------------------------------------------------------
4584
 
4585
XMFINLINE XMVECTOR XMVectorCosEst
4586
(
4587
    FXMVECTOR V
4588
)
4589
{
4590
#if defined(_XM_NO_INTRINSICS_)
4591
 
4592
    XMVECTOR V2, V4, V6;
4593
    XMVECTOR C0, C1, C2, C3;
4594
    XMVECTOR Result;
4595
 
4596
    V2 = XMVectorMultiply(V, V);
4597
    V4 = XMVectorMultiply(V2, V2);
4598
    V6 = XMVectorMultiply(V4, V2);
4599
 
4600
    C0 = XMVectorSplatX(g_XMCosEstCoefficients.v);
4601
    C1 = XMVectorSplatY(g_XMCosEstCoefficients.v);
4602
    C2 = XMVectorSplatZ(g_XMCosEstCoefficients.v);
4603
    C3 = XMVectorSplatW(g_XMCosEstCoefficients.v);
4604
 
4605
    Result = XMVectorMultiplyAdd(C1, V2, C0);
4606
    Result = XMVectorMultiplyAdd(C2, V4, Result);
4607
    Result = XMVectorMultiplyAdd(C3, V6, Result);
4608
 
4609
    return Result;
4610
 
4611
#elif defined(_XM_SSE_INTRINSICS_)
4612
    // Get V^2
4613
    XMVECTOR V2 = _mm_mul_ps(V,V);
4614
    XMVECTOR vResult = _mm_load_ps1(&g_XMCosEstCoefficients.f[1]);
4615
    vResult = _mm_mul_ps(vResult,V2);
4616
    XMVECTOR vConstants = _mm_load_ps1(&g_XMCosEstCoefficients.f[0]);
4617
    vResult = _mm_add_ps(vResult,vConstants);
4618
    vConstants = _mm_load_ps1(&g_XMCosEstCoefficients.f[2]);
4619
    // Get V^4
4620
    XMVECTOR V4 = _mm_mul_ps(V2, V2);
4621
    vConstants = _mm_mul_ps(vConstants,V4);
4622
    vResult = _mm_add_ps(vResult,vConstants);
4623
    vConstants = _mm_load_ps1(&g_XMCosEstCoefficients.f[3]);
4624
    // It's really V^6
4625
    V4 = _mm_mul_ps(V4,V2);
4626
    vConstants = _mm_mul_ps(vConstants,V4);
4627
    vResult = _mm_add_ps(vResult,vConstants);
4628
    return vResult;
4629
#else // _XM_VMX128_INTRINSICS_
4630
#endif // _XM_VMX128_INTRINSICS_
4631
}
4632
 
4633
//------------------------------------------------------------------------------
4634
 
4635
XMFINLINE VOID XMVectorSinCosEst
4636
(
4637
    XMVECTOR* pSin, 
4638
    XMVECTOR* pCos, 
4639
    FXMVECTOR  V
4640
)
4641
{
4642
#if defined(_XM_NO_INTRINSICS_)
4643
 
4644
    XMVECTOR V2, V3, V4, V5, V6, V7;
4645
    XMVECTOR S1, S2, S3;
4646
    XMVECTOR C0, C1, C2, C3;
4647
    XMVECTOR Sin, Cos;
4648
 
4649
    XMASSERT(pSin);
4650
    XMASSERT(pCos);
4651
 
4652
    // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI)
4653
    // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! (for -PI <= V < PI)
4654
    V2 = XMVectorMultiply(V, V);
4655
    V3 = XMVectorMultiply(V2, V);
4656
    V4 = XMVectorMultiply(V2, V2);
4657
    V5 = XMVectorMultiply(V3, V2);
4658
    V6 = XMVectorMultiply(V3, V3);
4659
    V7 = XMVectorMultiply(V4, V3);
4660
 
4661
    S1 = XMVectorSplatY(g_XMSinEstCoefficients.v);
4662
    S2 = XMVectorSplatZ(g_XMSinEstCoefficients.v);
4663
    S3 = XMVectorSplatW(g_XMSinEstCoefficients.v);
4664
 
4665
    C0 = XMVectorSplatX(g_XMCosEstCoefficients.v);
4666
    C1 = XMVectorSplatY(g_XMCosEstCoefficients.v);
4667
    C2 = XMVectorSplatZ(g_XMCosEstCoefficients.v);
4668
    C3 = XMVectorSplatW(g_XMCosEstCoefficients.v);
4669
 
4670
    Sin = XMVectorMultiplyAdd(S1, V3, V);
4671
    Sin = XMVectorMultiplyAdd(S2, V5, Sin);
4672
    Sin = XMVectorMultiplyAdd(S3, V7, Sin);
4673
 
4674
    Cos = XMVectorMultiplyAdd(C1, V2, C0);
4675
    Cos = XMVectorMultiplyAdd(C2, V4, Cos);
4676
    Cos = XMVectorMultiplyAdd(C3, V6, Cos);
4677
 
4678
    *pSin = Sin;
4679
    *pCos = Cos;
4680
 
4681
#elif defined(_XM_SSE_INTRINSICS_)
4682
    XMASSERT(pSin);
4683
    XMASSERT(pCos);
4684
    XMVECTOR V2, V3, V4, V5, V6, V7;
4685
    XMVECTOR S1, S2, S3;
4686
    XMVECTOR C0, C1, C2, C3;
4687
    XMVECTOR Sin, Cos;
4688
 
4689
    // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI)
4690
    // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! (for -PI <= V < PI)
4691
    V2 = XMVectorMultiply(V, V);
4692
    V3 = XMVectorMultiply(V2, V);
4693
    V4 = XMVectorMultiply(V2, V2);
4694
    V5 = XMVectorMultiply(V3, V2);
4695
    V6 = XMVectorMultiply(V3, V3);
4696
    V7 = XMVectorMultiply(V4, V3);
4697
 
4698
    S1 = _mm_load_ps1(&g_XMSinEstCoefficients.f[1]);
4699
    S2 = _mm_load_ps1(&g_XMSinEstCoefficients.f[2]);
4700
    S3 = _mm_load_ps1(&g_XMSinEstCoefficients.f[3]);
4701
 
4702
    C0 = _mm_load_ps1(&g_XMCosEstCoefficients.f[0]);
4703
    C1 = _mm_load_ps1(&g_XMCosEstCoefficients.f[1]);
4704
    C2 = _mm_load_ps1(&g_XMCosEstCoefficients.f[2]);
4705
    C3 = _mm_load_ps1(&g_XMCosEstCoefficients.f[3]);
4706
 
4707
    Sin = XMVectorMultiplyAdd(S1, V3, V);
4708
    Sin = XMVectorMultiplyAdd(S2, V5, Sin);
4709
    Sin = XMVectorMultiplyAdd(S3, V7, Sin);
4710
 
4711
    Cos = XMVectorMultiplyAdd(C1, V2, C0);
4712
    Cos = XMVectorMultiplyAdd(C2, V4, Cos);
4713
    Cos = XMVectorMultiplyAdd(C3, V6, Cos);
4714
 
4715
    *pSin = Sin;
4716
    *pCos = Cos;
4717
#else // _XM_VMX128_INTRINSICS_
4718
#endif // _XM_VMX128_INTRINSICS_
4719
}
4720
 
4721
//------------------------------------------------------------------------------
4722
 
4723
XMFINLINE XMVECTOR XMVectorTanEst
4724
(
4725
    FXMVECTOR V
4726
)
4727
{
4728
#if defined(_XM_NO_INTRINSICS_)
4729
 
4730
    XMVECTOR V1, V2, V1T0, V1T1, V2T2;
4731
    XMVECTOR T0, T1, T2;
4732
    XMVECTOR N, D;
4733
    XMVECTOR OneOverPi;
4734
    XMVECTOR Result;
4735
 
4736
    OneOverPi = XMVectorSplatW(g_XMTanEstCoefficients.v);
4737
 
4738
    V1 = XMVectorMultiply(V, OneOverPi);
4739
    V1 = XMVectorRound(V1);
4740
 
4741
    V1 = XMVectorNegativeMultiplySubtract(g_XMPi.v, V1, V);
4742
 
4743
    T0 = XMVectorSplatX(g_XMTanEstCoefficients.v);
4744
    T1 = XMVectorSplatY(g_XMTanEstCoefficients.v);
4745
    T2 = XMVectorSplatZ(g_XMTanEstCoefficients.v);
4746
 
4747
    V2T2 = XMVectorNegativeMultiplySubtract(V1, V1, T2);
4748
    V2 = XMVectorMultiply(V1, V1);
4749
    V1T0 = XMVectorMultiply(V1, T0);
4750
    V1T1 = XMVectorMultiply(V1, T1);
4751
 
4752
    D = XMVectorReciprocalEst(V2T2);
4753
    N = XMVectorMultiplyAdd(V2, V1T1, V1T0);
4754
 
4755
    Result = XMVectorMultiply(N, D);
4756
 
4757
    return Result;
4758
 
4759
#elif defined(_XM_SSE_INTRINSICS_)
4760
    XMVECTOR V1, V2, V1T0, V1T1, V2T2;
4761
    XMVECTOR T0, T1, T2;
4762
    XMVECTOR N, D;
4763
    XMVECTOR OneOverPi;
4764
    XMVECTOR Result;
4765
 
4766
    OneOverPi = XMVectorSplatW(g_XMTanEstCoefficients);
4767
 
4768
    V1 = XMVectorMultiply(V, OneOverPi);
4769
    V1 = XMVectorRound(V1);
4770
 
4771
    V1 = XMVectorNegativeMultiplySubtract(g_XMPi, V1, V);
4772
 
4773
    T0 = XMVectorSplatX(g_XMTanEstCoefficients);
4774
    T1 = XMVectorSplatY(g_XMTanEstCoefficients);
4775
    T2 = XMVectorSplatZ(g_XMTanEstCoefficients);
4776
 
4777
    V2T2 = XMVectorNegativeMultiplySubtract(V1, V1, T2);
4778
    V2 = XMVectorMultiply(V1, V1);
4779
    V1T0 = XMVectorMultiply(V1, T0);
4780
    V1T1 = XMVectorMultiply(V1, T1);
4781
 
4782
    D = XMVectorReciprocalEst(V2T2);
4783
    N = XMVectorMultiplyAdd(V2, V1T1, V1T0);
4784
 
4785
    Result = XMVectorMultiply(N, D);
4786
 
4787
    return Result;
4788
#else // _XM_VMX128_INTRINSICS_
4789
#endif // _XM_VMX128_INTRINSICS_
4790
}
4791
 
4792
//------------------------------------------------------------------------------
4793
 
4794
XMFINLINE XMVECTOR XMVectorSinHEst
4795
(
4796
    FXMVECTOR V
4797
)
4798
{
4799
#if defined(_XM_NO_INTRINSICS_)
4800
 
4801
    XMVECTOR V1, V2;
4802
    XMVECTOR E1, E2;
4803
    XMVECTOR Result;
4804
    static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
4805
 
4806
    V1 = XMVectorMultiplyAdd(V, Scale.v, g_XMNegativeOne.v);
4807
    V2 = XMVectorNegativeMultiplySubtract(V, Scale.v, g_XMNegativeOne.v);
4808
 
4809
    E1 = XMVectorExpEst(V1);
4810
    E2 = XMVectorExpEst(V2);
4811
 
4812
    Result = XMVectorSubtract(E1, E2);
4813
 
4814
    return Result;
4815
 
4816
#elif defined(_XM_SSE_INTRINSICS_)
4817
    XMVECTOR V1, V2;
4818
    XMVECTOR E1, E2;
4819
    XMVECTOR Result;
4820
    static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
4821
 
4822
    V1 = _mm_mul_ps(V,Scale);
4823
    V1 = _mm_add_ps(V1,g_XMNegativeOne);
4824
    V2 = _mm_mul_ps(V,Scale);
4825
    V2 = _mm_sub_ps(g_XMNegativeOne,V2);
4826
    E1 = XMVectorExpEst(V1);
4827
    E2 = XMVectorExpEst(V2);
4828
    Result = _mm_sub_ps(E1, E2);
4829
    return Result;
4830
#else // _XM_VMX128_INTRINSICS_
4831
#endif // _XM_VMX128_INTRINSICS_
4832
}
4833
 
4834
//------------------------------------------------------------------------------
4835
 
4836
XMFINLINE XMVECTOR XMVectorCosHEst
4837
(
4838
    FXMVECTOR V
4839
)
4840
{
4841
#if defined(_XM_NO_INTRINSICS_)
4842
 
4843
    XMVECTOR V1, V2;
4844
    XMVECTOR E1, E2;
4845
    XMVECTOR Result;
4846
    static CONST XMVECTOR Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
4847
 
4848
    V1 = XMVectorMultiplyAdd(V, Scale, g_XMNegativeOne.v);
4849
    V2 = XMVectorNegativeMultiplySubtract(V, Scale, g_XMNegativeOne.v);
4850
 
4851
    E1 = XMVectorExpEst(V1);
4852
    E2 = XMVectorExpEst(V2);
4853
 
4854
    Result = XMVectorAdd(E1, E2);
4855
 
4856
    return Result;
4857
 
4858
#elif defined(_XM_SSE_INTRINSICS_)
4859
    XMVECTOR V1, V2;
4860
    XMVECTOR E1, E2;
4861
    XMVECTOR Result;
4862
    static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
4863
 
4864
    V1 = _mm_mul_ps(V,Scale);
4865
    V1 = _mm_add_ps(V1,g_XMNegativeOne);
4866
    V2 = _mm_mul_ps(V, Scale);
4867
    V2 = _mm_sub_ps(g_XMNegativeOne,V2);
4868
    E1 = XMVectorExpEst(V1);
4869
    E2 = XMVectorExpEst(V2);
4870
    Result = _mm_add_ps(E1, E2);
4871
    return Result;
4872
#else // _XM_VMX128_INTRINSICS_
4873
#endif // _XM_VMX128_INTRINSICS_
4874
}
4875
 
4876
//------------------------------------------------------------------------------
4877
 
4878
XMFINLINE XMVECTOR XMVectorTanHEst
4879
(
4880
    FXMVECTOR V
4881
)
4882
{
4883
#if defined(_XM_NO_INTRINSICS_)
4884
 
4885
    XMVECTOR E;
4886
    XMVECTOR Result;
4887
    static CONST XMVECTOR Scale = {2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f}; // 2.0f / ln(2.0f)
4888
 
4889
    E = XMVectorMultiply(V, Scale);
4890
    E = XMVectorExpEst(E);
4891
    E = XMVectorMultiplyAdd(E, g_XMOneHalf.v, g_XMOneHalf.v);
4892
    E = XMVectorReciprocalEst(E);
4893
 
4894
    Result = XMVectorSubtract(g_XMOne.v, E);
4895
 
4896
    return Result;
4897
 
4898
#elif defined(_XM_SSE_INTRINSICS_)
4899
    static CONST XMVECTORF32 Scale = {2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f}; // 2.0f / ln(2.0f)
4900
 
4901
    XMVECTOR E = _mm_mul_ps(V, Scale);
4902
    E = XMVectorExpEst(E);
4903
    E = _mm_mul_ps(E,g_XMOneHalf);
4904
    E = _mm_add_ps(E,g_XMOneHalf);
4905
    E = XMVectorReciprocalEst(E);
4906
    E = _mm_sub_ps(g_XMOne, E);
4907
    return E;
4908
#else // _XM_VMX128_INTRINSICS_
4909
#endif // _XM_VMX128_INTRINSICS_
4910
}
4911
 
4912
//------------------------------------------------------------------------------
4913
 
4914
XMFINLINE XMVECTOR XMVectorASinEst
4915
(
4916
    FXMVECTOR V
4917
)
4918
{
4919
#if defined(_XM_NO_INTRINSICS_)
4920
 
4921
    XMVECTOR AbsV, V2, VD, VC0, V2C3;
4922
    XMVECTOR C0, C1, C2, C3;
4923
    XMVECTOR D, Rsq, SqrtD;
4924
    XMVECTOR OnePlusEps;
4925
    XMVECTOR Result;
4926
 
4927
    AbsV = XMVectorAbs(V);
4928
 
4929
    OnePlusEps = XMVectorSplatX(g_XMASinEstConstants.v);
4930
 
4931
    C0 = XMVectorSplatX(g_XMASinEstCoefficients.v);
4932
    C1 = XMVectorSplatY(g_XMASinEstCoefficients.v);
4933
    C2 = XMVectorSplatZ(g_XMASinEstCoefficients.v);
4934
    C3 = XMVectorSplatW(g_XMASinEstCoefficients.v);
4935
 
4936
    D = XMVectorSubtract(OnePlusEps, AbsV);
4937
 
4938
    Rsq = XMVectorReciprocalSqrtEst(D);
4939
    SqrtD = XMVectorMultiply(D, Rsq);
4940
 
4941
    V2 = XMVectorMultiply(V, AbsV);
4942
    V2C3 = XMVectorMultiply(V2, C3);
4943
    VD = XMVectorMultiply(D, AbsV);
4944
    VC0 = XMVectorMultiply(V, C0);
4945
 
4946
    Result = XMVectorMultiply(V, C1);
4947
    Result = XMVectorMultiplyAdd(V2, C2, Result);
4948
    Result = XMVectorMultiplyAdd(V2C3, VD, Result);
4949
    Result = XMVectorMultiplyAdd(VC0, SqrtD, Result);
4950
 
4951
    return Result;
4952
 
4953
#elif defined(_XM_SSE_INTRINSICS_)
4954
    // Get abs(V)
4955
	XMVECTOR vAbsV = _mm_setzero_ps();
4956
	vAbsV = _mm_sub_ps(vAbsV,V);
4957
	vAbsV = _mm_max_ps(vAbsV,V);
4958
 
4959
    XMVECTOR D = _mm_load_ps1(&g_XMASinEstConstants.f[0]);
4960
    D = _mm_sub_ps(D,vAbsV);
4961
    // Since this is an estimate, rqsrt is okay
4962
    XMVECTOR vConstants = _mm_rsqrt_ps(D);
4963
    XMVECTOR SqrtD = _mm_mul_ps(D,vConstants);
4964
    // V2 = V^2 retaining sign
4965
    XMVECTOR V2 = _mm_mul_ps(V,vAbsV);
4966
    D = _mm_mul_ps(D,vAbsV);
4967
 
4968
    XMVECTOR vResult = _mm_load_ps1(&g_XMASinEstCoefficients.f[1]);
4969
    vResult = _mm_mul_ps(vResult,V);
4970
    vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[2]);
4971
    vConstants = _mm_mul_ps(vConstants,V2);
4972
    vResult = _mm_add_ps(vResult,vConstants);
4973
 
4974
    vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[3]);
4975
    vConstants = _mm_mul_ps(vConstants,V2);
4976
    vConstants = _mm_mul_ps(vConstants,D);
4977
    vResult = _mm_add_ps(vResult,vConstants);
4978
 
4979
    vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[0]);
4980
    vConstants = _mm_mul_ps(vConstants,V);
4981
    vConstants = _mm_mul_ps(vConstants,SqrtD);
4982
    vResult = _mm_add_ps(vResult,vConstants);
4983
    return vResult;
4984
#else // _XM_VMX128_INTRINSICS_
4985
#endif // _XM_VMX128_INTRINSICS_
4986
}
4987
 
4988
//------------------------------------------------------------------------------
4989
 
4990
XMFINLINE XMVECTOR XMVectorACosEst
4991
(
4992
    FXMVECTOR V
4993
)
4994
{
4995
#if defined(_XM_NO_INTRINSICS_)
4996
 
4997
    XMVECTOR AbsV, V2, VD, VC0, V2C3;
4998
    XMVECTOR C0, C1, C2, C3;
4999
    XMVECTOR D, Rsq, SqrtD;
5000
    XMVECTOR OnePlusEps, HalfPi;
5001
    XMVECTOR Result;
5002
 
5003
    // acos(V) = PI / 2 - asin(V)
5004
 
5005
    AbsV = XMVectorAbs(V);
5006
 
5007
    OnePlusEps = XMVectorSplatX(g_XMASinEstConstants.v);
5008
    HalfPi = XMVectorSplatY(g_XMASinEstConstants.v);
5009
 
5010
    C0 = XMVectorSplatX(g_XMASinEstCoefficients.v);
5011
    C1 = XMVectorSplatY(g_XMASinEstCoefficients.v);
5012
    C2 = XMVectorSplatZ(g_XMASinEstCoefficients.v);
5013
    C3 = XMVectorSplatW(g_XMASinEstCoefficients.v);
5014
 
5015
    D = XMVectorSubtract(OnePlusEps, AbsV);
5016
 
5017
    Rsq = XMVectorReciprocalSqrtEst(D);
5018
    SqrtD = XMVectorMultiply(D, Rsq);
5019
 
5020
    V2 = XMVectorMultiply(V, AbsV);
5021
    V2C3 = XMVectorMultiply(V2, C3);
5022
    VD = XMVectorMultiply(D, AbsV);
5023
    VC0 = XMVectorMultiply(V, C0);
5024
 
5025
    Result = XMVectorMultiply(V, C1);
5026
    Result = XMVectorMultiplyAdd(V2, C2, Result);
5027
    Result = XMVectorMultiplyAdd(V2C3, VD, Result);
5028
    Result = XMVectorMultiplyAdd(VC0, SqrtD, Result);
5029
    Result = XMVectorSubtract(HalfPi, Result);
5030
 
5031
    return Result;
5032
 
5033
#elif defined(_XM_SSE_INTRINSICS_)
5034
    // acos(V) = PI / 2 - asin(V)
5035
    // Get abs(V)
5036
	XMVECTOR vAbsV = _mm_setzero_ps();
5037
	vAbsV = _mm_sub_ps(vAbsV,V);
5038
	vAbsV = _mm_max_ps(vAbsV,V);
5039
    // Calc D
5040
    XMVECTOR D = _mm_load_ps1(&g_XMASinEstConstants.f[0]);
5041
    D = _mm_sub_ps(D,vAbsV);
5042
    // SqrtD = sqrt(D-abs(V)) estimated
5043
    XMVECTOR vConstants = _mm_rsqrt_ps(D);
5044
    XMVECTOR SqrtD = _mm_mul_ps(D,vConstants);
5045
    // V2 = V^2 while retaining sign
5046
    XMVECTOR V2 = _mm_mul_ps(V, vAbsV);
5047
    // Drop vAbsV here. D = (Const-abs(V))*abs(V)
5048
    D = _mm_mul_ps(D, vAbsV);
5049
 
5050
    XMVECTOR vResult = _mm_load_ps1(&g_XMASinEstCoefficients.f[1]);
5051
    vResult = _mm_mul_ps(vResult,V);
5052
    vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[2]);
5053
    vConstants = _mm_mul_ps(vConstants,V2);
5054
    vResult = _mm_add_ps(vResult,vConstants);
5055
 
5056
    vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[3]);
5057
    vConstants = _mm_mul_ps(vConstants,V2);
5058
    vConstants = _mm_mul_ps(vConstants,D);
5059
    vResult = _mm_add_ps(vResult,vConstants);
5060
 
5061
    vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[0]);
5062
    vConstants = _mm_mul_ps(vConstants,V);
5063
    vConstants = _mm_mul_ps(vConstants,SqrtD);
5064
    vResult = _mm_add_ps(vResult,vConstants);
5065
 
5066
    vConstants = _mm_load_ps1(&g_XMASinEstConstants.f[1]);
5067
    vResult = _mm_sub_ps(vConstants,vResult);
5068
    return vResult;
5069
#else // _XM_VMX128_INTRINSICS_
5070
#endif // _XM_VMX128_INTRINSICS_
5071
}
5072
 
5073
//------------------------------------------------------------------------------
5074
 
5075
XMFINLINE XMVECTOR XMVectorATanEst
5076
(
5077
    FXMVECTOR V
5078
)
5079
{
5080
#if defined(_XM_NO_INTRINSICS_)
5081
 
5082
    XMVECTOR AbsV, V2S2, N, D;
5083
    XMVECTOR S0, S1, S2;
5084
    XMVECTOR HalfPi;
5085
    XMVECTOR Result;
5086
 
5087
    S0 = XMVectorSplatX(g_XMATanEstCoefficients.v);
5088
    S1 = XMVectorSplatY(g_XMATanEstCoefficients.v);
5089
    S2 = XMVectorSplatZ(g_XMATanEstCoefficients.v);
5090
    HalfPi = XMVectorSplatW(g_XMATanEstCoefficients.v);
5091
 
5092
    AbsV = XMVectorAbs(V);
5093
 
5094
    V2S2 = XMVectorMultiplyAdd(V, V, S2);
5095
    N = XMVectorMultiplyAdd(AbsV, HalfPi, S0);
5096
    D = XMVectorMultiplyAdd(AbsV, S1, V2S2);
5097
    N = XMVectorMultiply(N, V);
5098
    D = XMVectorReciprocalEst(D);
5099
 
5100
    Result = XMVectorMultiply(N, D);
5101
 
5102
    return Result;
5103
 
5104
#elif defined(_XM_SSE_INTRINSICS_)
5105
    // Get abs(V)
5106
	XMVECTOR vAbsV = _mm_setzero_ps();
5107
	vAbsV = _mm_sub_ps(vAbsV,V);
5108
	vAbsV = _mm_max_ps(vAbsV,V);
5109
 
5110
    XMVECTOR vResult = _mm_load_ps1(&g_XMATanEstCoefficients.f[3]);
5111
    vResult = _mm_mul_ps(vResult,vAbsV);
5112
    XMVECTOR vConstants = _mm_load_ps1(&g_XMATanEstCoefficients.f[0]);
5113
    vResult = _mm_add_ps(vResult,vConstants);
5114
    vResult = _mm_mul_ps(vResult,V);
5115
 
5116
    XMVECTOR D = _mm_mul_ps(V,V);
5117
    vConstants = _mm_load_ps1(&g_XMATanEstCoefficients.f[2]);
5118
    D = _mm_add_ps(D,vConstants);
5119
    vConstants = _mm_load_ps1(&g_XMATanEstCoefficients.f[1]);
5120
    vConstants = _mm_mul_ps(vConstants,vAbsV);
5121
    D = _mm_add_ps(D,vConstants);
5122
    vResult = _mm_div_ps(vResult,D);
5123
    return vResult;
5124
#else // _XM_VMX128_INTRINSICS_
5125
#endif // _XM_VMX128_INTRINSICS_
5126
}
5127
 
5128
//------------------------------------------------------------------------------
5129
 
5130
XMFINLINE XMVECTOR XMVectorATan2Est
5131
(
5132
    FXMVECTOR Y, 
5133
    FXMVECTOR X
5134
)
5135
{
5136
#if defined(_XM_NO_INTRINSICS_)
5137
 
5138
    XMVECTOR Reciprocal;
5139
    XMVECTOR V;
5140
    XMVECTOR YSign;
5141
    XMVECTOR Pi, PiOverTwo, PiOverFour, ThreePiOverFour;
5142
    XMVECTOR YEqualsZero, XEqualsZero, XIsPositive, YEqualsInfinity, XEqualsInfinity, FiniteYGreaterZero;
5143
    XMVECTOR ATanResultValid;
5144
    XMVECTOR R0, R1, R2, R3, R4, R5, R6, R7;
5145
    XMVECTOR Zero;
5146
    XMVECTOR Result;
5147
    static CONST XMVECTOR ATan2Constants = {XM_PI, XM_PIDIV2, XM_PIDIV4, XM_PI * 3.0f / 4.0f};
5148
 
5149
    Zero = XMVectorZero();
5150
    ATanResultValid = XMVectorTrueInt();
5151
 
5152
    Pi = XMVectorSplatX(ATan2Constants);
5153
    PiOverTwo = XMVectorSplatY(ATan2Constants);
5154
    PiOverFour = XMVectorSplatZ(ATan2Constants);
5155
    ThreePiOverFour = XMVectorSplatW(ATan2Constants);
5156
 
5157
    YEqualsZero = XMVectorEqual(Y, Zero);
5158
    XEqualsZero = XMVectorEqual(X, Zero);
5159
    XIsPositive = XMVectorAndInt(X, g_XMNegativeZero.v);
5160
    XIsPositive = XMVectorEqualInt(XIsPositive, Zero);
5161
    YEqualsInfinity = XMVectorIsInfinite(Y);
5162
    XEqualsInfinity = XMVectorIsInfinite(X);
5163
    FiniteYGreaterZero = XMVectorGreater(Y, Zero);
5164
    FiniteYGreaterZero = XMVectorSelect(FiniteYGreaterZero, Zero, YEqualsInfinity);
5165
 
5166
    YSign = XMVectorAndInt(Y, g_XMNegativeZero.v);
5167
    Pi = XMVectorOrInt(Pi, YSign);
5168
    PiOverTwo = XMVectorOrInt(PiOverTwo, YSign);
5169
    PiOverFour = XMVectorOrInt(PiOverFour, YSign);
5170
    ThreePiOverFour = XMVectorOrInt(ThreePiOverFour, YSign);
5171
 
5172
    R1 = XMVectorSelect(Pi, YSign, XIsPositive);
5173
    R2 = XMVectorSelect(ATanResultValid, PiOverTwo, XEqualsZero);
5174
    R3 = XMVectorSelect(R2, R1, YEqualsZero);
5175
    R4 = XMVectorSelect(ThreePiOverFour, PiOverFour, XIsPositive);
5176
    R5 = XMVectorSelect(PiOverTwo, R4, XEqualsInfinity);
5177
    R6 = XMVectorSelect(R3, R5, YEqualsInfinity);
5178
    R7 = XMVectorSelect(R6, R1, FiniteYGreaterZero);
5179
    Result = XMVectorSelect(R6, R7, XEqualsInfinity);
5180
    ATanResultValid = XMVectorEqualInt(Result, ATanResultValid);
5181
 
5182
    Reciprocal = XMVectorReciprocalEst(X);
5183
    V = XMVectorMultiply(Y, Reciprocal);
5184
    R0 = XMVectorATanEst(V);
5185
 
5186
    Result = XMVectorSelect(Result, R0, ATanResultValid);
5187
 
5188
    return Result;
5189
 
5190
#elif defined(_XM_SSE_INTRINSICS_)
5191
    static CONST XMVECTORF32 ATan2Constants = {XM_PI, XM_PIDIV2, XM_PIDIV4, XM_PI * 3.0f / 4.0f};
5192
    // Mask if Y>0 && Y!=INF
5193
    XMVECTOR FiniteYGreaterZero = _mm_cmpgt_ps(Y,g_XMZero);
5194
    XMVECTOR YEqualsInfinity = XMVectorIsInfinite(Y);
5195
    FiniteYGreaterZero = _mm_andnot_ps(YEqualsInfinity,FiniteYGreaterZero);
5196
    // Get the sign of (Y&0x80000000)
5197
    XMVECTOR YSign = _mm_and_ps(Y, g_XMNegativeZero);
5198
    // Get the sign bits of X
5199
    XMVECTOR XIsPositive = _mm_and_ps(X,g_XMNegativeZero);
5200
    // Change them to masks
5201
    XIsPositive = XMVectorEqualInt(XIsPositive,g_XMZero);
5202
    // Get Pi
5203
    XMVECTOR R1 = _mm_load_ps1(&ATan2Constants.f[0]);
5204
    // Copy the sign of Y
5205
    R1 = _mm_or_ps(R1,YSign);
5206
    R1 = XMVectorSelect(R1,YSign,XIsPositive);
5207
    // Mask for X==0
5208
    XMVECTOR vConstants = _mm_cmpeq_ps(X,g_XMZero);
5209
    // Get Pi/2 with with sign of Y
5210
    XMVECTOR PiOverTwo = _mm_load_ps1(&ATan2Constants.f[1]);
5211
    PiOverTwo = _mm_or_ps(PiOverTwo,YSign);
5212
    XMVECTOR R2 = XMVectorSelect(g_XMNegOneMask,PiOverTwo,vConstants);
5213
    // Mask for Y==0
5214
    vConstants = _mm_cmpeq_ps(Y,g_XMZero);
5215
    R2 = XMVectorSelect(R2,R1,vConstants);
5216
    // Get Pi/4 with sign of Y
5217
    XMVECTOR PiOverFour = _mm_load_ps1(&ATan2Constants.f[2]);
5218
    PiOverFour = _mm_or_ps(PiOverFour,YSign);
5219
    // Get (Pi*3)/4 with sign of Y
5220
    XMVECTOR ThreePiOverFour = _mm_load_ps1(&ATan2Constants.f[3]);
5221
    ThreePiOverFour = _mm_or_ps(ThreePiOverFour,YSign);
5222
    vConstants = XMVectorSelect(ThreePiOverFour, PiOverFour, XIsPositive);
5223
    XMVECTOR XEqualsInfinity = XMVectorIsInfinite(X);
5224
    vConstants = XMVectorSelect(PiOverTwo,vConstants,XEqualsInfinity);
5225
 
5226
    XMVECTOR vResult = XMVectorSelect(R2,vConstants,YEqualsInfinity);
5227
    vConstants = XMVectorSelect(vResult,R1,FiniteYGreaterZero);
5228
    // At this point, any entry that's zero will get the result
5229
    // from XMVectorATan(), otherwise, return the failsafe value
5230
    vResult = XMVectorSelect(vResult,vConstants,XEqualsInfinity);
5231
    // Any entries not 0xFFFFFFFF, are considered precalculated
5232
    XMVECTOR ATanResultValid = XMVectorEqualInt(vResult,g_XMNegOneMask);
5233
    // Let's do the ATan2 function
5234
    vConstants = _mm_div_ps(Y,X);
5235
    vConstants = XMVectorATanEst(vConstants);
5236
    // Discard entries that have been declared void
5237
    vResult = XMVectorSelect(vResult,vConstants,ATanResultValid);
5238
    return vResult;
5239
#else // _XM_VMX128_INTRINSICS_
5240
#endif // _XM_VMX128_INTRINSICS_
5241
}
5242
 
5243
//------------------------------------------------------------------------------
5244
 
5245
XMFINLINE XMVECTOR XMVectorLerp
5246
(
5247
    FXMVECTOR V0, 
5248
    FXMVECTOR V1, 
5249
    FLOAT    t
5250
)
5251
{
5252
#if defined(_XM_NO_INTRINSICS_)
5253
 
5254
    XMVECTOR Scale;
5255
    XMVECTOR Length;
5256
    XMVECTOR Result;
5257
 
5258
    // V0 + t * (V1 - V0)
5259
    Scale = XMVectorReplicate(t);
5260
    Length = XMVectorSubtract(V1, V0);
5261
    Result = XMVectorMultiplyAdd(Length, Scale, V0);
5262
 
5263
    return Result;
5264
 
5265
#elif defined(_XM_SSE_INTRINSICS_)
5266
	XMVECTOR L, S;
5267
	XMVECTOR Result;
5268
 
5269
	L = _mm_sub_ps( V1, V0 );
5270
 
5271
	S = _mm_set_ps1( t );
5272
 
5273
	Result = _mm_mul_ps( L, S );
5274
 
5275
	return _mm_add_ps( Result, V0 );
5276
#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
5277
#endif // _XM_VMX128_INTRINSICS_
5278
}
5279
 
5280
//------------------------------------------------------------------------------
5281
 
5282
XMFINLINE XMVECTOR XMVectorLerpV
5283
(
5284
    FXMVECTOR V0, 
5285
    FXMVECTOR V1, 
5286
    FXMVECTOR T
5287
)
5288
{
5289
#if defined(_XM_NO_INTRINSICS_)
5290
 
5291
    XMVECTOR Length;
5292
    XMVECTOR Result;
5293
 
5294
    // V0 + T * (V1 - V0)
5295
    Length = XMVectorSubtract(V1, V0);
5296
    Result = XMVectorMultiplyAdd(Length, T, V0);
5297
 
5298
    return Result;
5299
 
5300
#elif defined(_XM_SSE_INTRINSICS_)
5301
	XMVECTOR Length;
5302
	XMVECTOR Result;
5303
 
5304
	Length = _mm_sub_ps( V1, V0 );
5305
 
5306
	Result = _mm_mul_ps( Length, T );
5307
 
5308
	return _mm_add_ps( Result, V0 );
5309
#else // _XM_VMX128_INTRINSICS_
5310
#endif // _XM_VMX128_INTRINSICS_
5311
}
5312
 
5313
//------------------------------------------------------------------------------
5314
 
5315
XMFINLINE XMVECTOR XMVectorHermite
5316
(
5317
    FXMVECTOR Position0, 
5318
    FXMVECTOR Tangent0, 
5319
    FXMVECTOR Position1, 
5320
    CXMVECTOR Tangent1, 
5321
    FLOAT    t
5322
)
5323
{
5324
#if defined(_XM_NO_INTRINSICS_)
5325
 
5326
    XMVECTOR P0;
5327
    XMVECTOR T0;
5328
    XMVECTOR P1;
5329
    XMVECTOR T1;
5330
    XMVECTOR Result;
5331
    FLOAT    t2;
5332
    FLOAT    t3;
5333
 
5334
    // Result = (2 * t^3 - 3 * t^2 + 1) * Position0 +
5335
    //          (t^3 - 2 * t^2 + t) * Tangent0 +
5336
    //          (-2 * t^3 + 3 * t^2) * Position1 +
5337
    //          (t^3 - t^2) * Tangent1
5338
    t2 = t * t;
5339
    t3 = t * t2;
5340
 
5341
    P0 = XMVectorReplicate(2.0f * t3 - 3.0f * t2 + 1.0f);
5342
    T0 = XMVectorReplicate(t3 - 2.0f * t2 + t);
5343
    P1 = XMVectorReplicate(-2.0f * t3 + 3.0f * t2);
5344
    T1 = XMVectorReplicate(t3 - t2);
5345
 
5346
    Result = XMVectorMultiply(P0, Position0);
5347
    Result = XMVectorMultiplyAdd(T0, Tangent0, Result);
5348
    Result = XMVectorMultiplyAdd(P1, Position1, Result);
5349
    Result = XMVectorMultiplyAdd(T1, Tangent1, Result);
5350
 
5351
    return Result;
5352
 
5353
#elif defined(_XM_SSE_INTRINSICS_)
5354
    FLOAT t2 = t * t;
5355
    FLOAT t3 = t * t2;
5356
 
5357
    XMVECTOR P0 = _mm_set_ps1(2.0f * t3 - 3.0f * t2 + 1.0f);
5358
    XMVECTOR T0 = _mm_set_ps1(t3 - 2.0f * t2 + t);
5359
    XMVECTOR P1 = _mm_set_ps1(-2.0f * t3 + 3.0f * t2);
5360
    XMVECTOR T1 = _mm_set_ps1(t3 - t2);
5361
 
5362
    XMVECTOR vResult = _mm_mul_ps(P0, Position0);
5363
    XMVECTOR vTemp = _mm_mul_ps(T0, Tangent0);
5364
    vResult = _mm_add_ps(vResult,vTemp);
5365
    vTemp = _mm_mul_ps(P1, Position1);
5366
    vResult = _mm_add_ps(vResult,vTemp);
5367
    vTemp = _mm_mul_ps(T1, Tangent1);
5368
    vResult = _mm_add_ps(vResult,vTemp);
5369
    return vResult;
5370
#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
5371
#endif // _XM_VMX128_INTRINSICS_
5372
}
5373
 
5374
//------------------------------------------------------------------------------
5375
 
5376
XMFINLINE XMVECTOR XMVectorHermiteV
5377
(
5378
    FXMVECTOR Position0, 
5379
    FXMVECTOR Tangent0, 
5380
    FXMVECTOR Position1, 
5381
    CXMVECTOR Tangent1, 
5382
    CXMVECTOR T
5383
)
5384
{
5385
#if defined(_XM_NO_INTRINSICS_)
5386
 
5387
    XMVECTOR P0;
5388
    XMVECTOR T0;
5389
    XMVECTOR P1;
5390
    XMVECTOR T1;
5391
    XMVECTOR Result;
5392
    XMVECTOR T2;
5393
    XMVECTOR T3;
5394
 
5395
    // Result = (2 * t^3 - 3 * t^2 + 1) * Position0 +
5396
    //          (t^3 - 2 * t^2 + t) * Tangent0 +
5397
    //          (-2 * t^3 + 3 * t^2) * Position1 +
5398
    //          (t^3 - t^2) * Tangent1
5399
    T2 = XMVectorMultiply(T, T);
5400
    T3 = XMVectorMultiply(T , T2);
5401
 
5402
    P0 = XMVectorReplicate(2.0f * T3.vector4_f32[0] - 3.0f * T2.vector4_f32[0] + 1.0f);
5403
    T0 = XMVectorReplicate(T3.vector4_f32[1] - 2.0f * T2.vector4_f32[1] + T.vector4_f32[1]);
5404
    P1 = XMVectorReplicate(-2.0f * T3.vector4_f32[2] + 3.0f * T2.vector4_f32[2]);
5405
    T1 = XMVectorReplicate(T3.vector4_f32[3] - T2.vector4_f32[3]);
5406
 
5407
    Result = XMVectorMultiply(P0, Position0);
5408
    Result = XMVectorMultiplyAdd(T0, Tangent0, Result);
5409
    Result = XMVectorMultiplyAdd(P1, Position1, Result);
5410
    Result = XMVectorMultiplyAdd(T1, Tangent1, Result);
5411
 
5412
    return Result;
5413
 
5414
#elif defined(_XM_SSE_INTRINSICS_)
5415
    static const XMVECTORF32 CatMulT2 = {-3.0f,-2.0f,3.0f,-1.0f};
5416
    static const XMVECTORF32 CatMulT3 = {2.0f,1.0f,-2.0f,1.0f};
5417
 
5418
    // Result = (2 * t^3 - 3 * t^2 + 1) * Position0 +
5419
    //          (t^3 - 2 * t^2 + t) * Tangent0 +
5420
    //          (-2 * t^3 + 3 * t^2) * Position1 +
5421
    //          (t^3 - t^2) * Tangent1
5422
    XMVECTOR T2 = _mm_mul_ps(T,T);
5423
    XMVECTOR T3 = _mm_mul_ps(T,T2);
5424
    // Mul by the constants against t^2
5425
    T2 = _mm_mul_ps(T2,CatMulT2);
5426
    // Mul by the constants against t^3
5427
    T3 = _mm_mul_ps(T3,CatMulT3);
5428
    // T3 now has the pre-result.
5429
    T3 = _mm_add_ps(T3,T2);
5430
    // I need to add t.y only
5431
    T2 = _mm_and_ps(T,g_XMMaskY);
5432
    T3 = _mm_add_ps(T3,T2);
5433
    // Add 1.0f to x
5434
    T3 = _mm_add_ps(T3,g_XMIdentityR0);
5435
    // Now, I have the constants created
5436
    // Mul the x constant to Position0
5437
    XMVECTOR vResult = _mm_shuffle_ps(T3,T3,_MM_SHUFFLE(0,0,0,0));
5438
    vResult = _mm_mul_ps(vResult,Position0);
5439
    // Mul the y constant to Tangent0
5440
    T2 = _mm_shuffle_ps(T3,T3,_MM_SHUFFLE(1,1,1,1));
5441
    T2 = _mm_mul_ps(T2,Tangent0);
5442
    vResult = _mm_add_ps(vResult,T2);
5443
    // Mul the z constant to Position1
5444
    T2 = _mm_shuffle_ps(T3,T3,_MM_SHUFFLE(2,2,2,2));
5445
    T2 = _mm_mul_ps(T2,Position1);
5446
    vResult = _mm_add_ps(vResult,T2);
5447
    // Mul the w constant to Tangent1
5448
    T3 = _mm_shuffle_ps(T3,T3,_MM_SHUFFLE(3,3,3,3));
5449
    T3 = _mm_mul_ps(T3,Tangent1);
5450
    vResult = _mm_add_ps(vResult,T3);
5451
    return vResult;
5452
#else // _XM_VMX128_INTRINSICS_
5453
#endif // _XM_VMX128_INTRINSICS_
5454
}
5455
 
5456
//------------------------------------------------------------------------------
5457
 
5458
XMFINLINE XMVECTOR XMVectorCatmullRom
5459
(
5460
    FXMVECTOR Position0, 
5461
    FXMVECTOR Position1, 
5462
    FXMVECTOR Position2, 
5463
    CXMVECTOR Position3, 
5464
    FLOAT    t
5465
)
5466
{
5467
#if defined(_XM_NO_INTRINSICS_)
5468
 
5469
    XMVECTOR P0;
5470
    XMVECTOR P1;
5471
    XMVECTOR P2;
5472
    XMVECTOR P3;
5473
    XMVECTOR Result;
5474
    FLOAT    t2;
5475
    FLOAT    t3;
5476
 
5477
    // Result = ((-t^3 + 2 * t^2 - t) * Position0 +
5478
    //           (3 * t^3 - 5 * t^2 + 2) * Position1 +
5479
    //           (-3 * t^3 + 4 * t^2 + t) * Position2 +
5480
    //           (t^3 - t^2) * Position3) * 0.5
5481
    t2 = t * t;
5482
    t3 = t * t2;
5483
 
5484
    P0 = XMVectorReplicate((-t3 + 2.0f * t2 - t) * 0.5f);
5485
    P1 = XMVectorReplicate((3.0f * t3 - 5.0f * t2 + 2.0f) * 0.5f);
5486
    P2 = XMVectorReplicate((-3.0f * t3 + 4.0f * t2 + t) * 0.5f);
5487
    P3 = XMVectorReplicate((t3 - t2) * 0.5f);
5488
 
5489
    Result = XMVectorMultiply(P0, Position0);
5490
    Result = XMVectorMultiplyAdd(P1, Position1, Result);
5491
    Result = XMVectorMultiplyAdd(P2, Position2, Result);
5492
    Result = XMVectorMultiplyAdd(P3, Position3, Result);
5493
 
5494
    return Result;
5495
 
5496
#elif defined(_XM_SSE_INTRINSICS_)
5497
    FLOAT t2 = t * t;
5498
    FLOAT t3 = t * t2;
5499
 
5500
    XMVECTOR P0 = _mm_set_ps1((-t3 + 2.0f * t2 - t) * 0.5f);
5501
    XMVECTOR P1 = _mm_set_ps1((3.0f * t3 - 5.0f * t2 + 2.0f) * 0.5f);
5502
    XMVECTOR P2 = _mm_set_ps1((-3.0f * t3 + 4.0f * t2 + t) * 0.5f);
5503
    XMVECTOR P3 = _mm_set_ps1((t3 - t2) * 0.5f);
5504
 
5505
    P0 = _mm_mul_ps(P0, Position0);
5506
    P1 = _mm_mul_ps(P1, Position1);
5507
    P2 = _mm_mul_ps(P2, Position2);
5508
    P3 = _mm_mul_ps(P3, Position3);
5509
    P0 = _mm_add_ps(P0,P1);
5510
    P2 = _mm_add_ps(P2,P3);
5511
    P0 = _mm_add_ps(P0,P2);
5512
    return P0;
5513
#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
5514
#endif // _XM_VMX128_INTRINSICS_
5515
}
5516
 
5517
//------------------------------------------------------------------------------
5518
 
5519
XMFINLINE XMVECTOR XMVectorCatmullRomV
5520
(
5521
    FXMVECTOR Position0, 
5522
    FXMVECTOR Position1, 
5523
    FXMVECTOR Position2, 
5524
    CXMVECTOR Position3, 
5525
    CXMVECTOR T
5526
)
5527
{
5528
#if defined(_XM_NO_INTRINSICS_)
5529
    float fx = T.vector4_f32[0];
5530
    float fy = T.vector4_f32[1];
5531
    float fz = T.vector4_f32[2];
5532
    float fw = T.vector4_f32[3];
5533
    XMVECTOR vResult = {
5534
        0.5f*((-fx*fx*fx+2*fx*fx-fx)*Position0.vector4_f32[0]+
5535
        (3*fx*fx*fx-5*fx*fx+2)*Position1.vector4_f32[0]+
5536
        (-3*fx*fx*fx+4*fx*fx+fx)*Position2.vector4_f32[0]+
5537
        (fx*fx*fx-fx*fx)*Position3.vector4_f32[0]),
5538
        0.5f*((-fy*fy*fy+2*fy*fy-fy)*Position0.vector4_f32[1]+
5539
        (3*fy*fy*fy-5*fy*fy+2)*Position1.vector4_f32[1]+
5540
        (-3*fy*fy*fy+4*fy*fy+fy)*Position2.vector4_f32[1]+
5541
        (fy*fy*fy-fy*fy)*Position3.vector4_f32[1]),
5542
        0.5f*((-fz*fz*fz+2*fz*fz-fz)*Position0.vector4_f32[2]+
5543
        (3*fz*fz*fz-5*fz*fz+2)*Position1.vector4_f32[2]+
5544
        (-3*fz*fz*fz+4*fz*fz+fz)*Position2.vector4_f32[2]+
5545
        (fz*fz*fz-fz*fz)*Position3.vector4_f32[2]),
5546
        0.5f*((-fw*fw*fw+2*fw*fw-fw)*Position0.vector4_f32[3]+
5547
        (3*fw*fw*fw-5*fw*fw+2)*Position1.vector4_f32[3]+
5548
        (-3*fw*fw*fw+4*fw*fw+fw)*Position2.vector4_f32[3]+
5549
        (fw*fw*fw-fw*fw)*Position3.vector4_f32[3])
5550
    };
5551
    return vResult;
5552
#elif defined(_XM_SSE_INTRINSICS_)
5553
    static const XMVECTORF32 Catmul2 = {2.0f,2.0f,2.0f,2.0f};
5554
    static const XMVECTORF32 Catmul3 = {3.0f,3.0f,3.0f,3.0f};
5555
    static const XMVECTORF32 Catmul4 = {4.0f,4.0f,4.0f,4.0f};
5556
    static const XMVECTORF32 Catmul5 = {5.0f,5.0f,5.0f,5.0f};
5557
    // Cache T^2 and T^3
5558
    XMVECTOR T2 = _mm_mul_ps(T,T);
5559
    XMVECTOR T3 = _mm_mul_ps(T,T2);
5560
    // Perform the Position0 term
5561
    XMVECTOR vResult = _mm_add_ps(T2,T2);
5562
    vResult = _mm_sub_ps(vResult,T);
5563
    vResult = _mm_sub_ps(vResult,T3);
5564
    vResult = _mm_mul_ps(vResult,Position0);
5565
    // Perform the Position1 term and add
5566
    XMVECTOR vTemp = _mm_mul_ps(T3,Catmul3);
5567
    XMVECTOR vTemp2 = _mm_mul_ps(T2,Catmul5);
5568
    vTemp = _mm_sub_ps(vTemp,vTemp2);
5569
    vTemp = _mm_add_ps(vTemp,Catmul2);
5570
    vTemp = _mm_mul_ps(vTemp,Position1);
5571
    vResult = _mm_add_ps(vResult,vTemp);
5572
    // Perform the Position2 term and add
5573
    vTemp = _mm_mul_ps(T2,Catmul4);
5574
    vTemp2 = _mm_mul_ps(T3,Catmul3);
5575
    vTemp = _mm_sub_ps(vTemp,vTemp2);
5576
    vTemp = _mm_add_ps(vTemp,T);
5577
    vTemp = _mm_mul_ps(vTemp,Position2);
5578
    vResult = _mm_add_ps(vResult,vTemp);
5579
    // Position3 is the last term
5580
    T3 = _mm_sub_ps(T3,T2);
5581
    T3 = _mm_mul_ps(T3,Position3);
5582
    vResult = _mm_add_ps(vResult,T3);
5583
    // Multiply by 0.5f and exit
5584
    vResult = _mm_mul_ps(vResult,g_XMOneHalf);
5585
    return vResult;
5586
#else // _XM_VMX128_INTRINSICS_
5587
#endif // _XM_VMX128_INTRINSICS_
5588
}
5589
 
5590
//------------------------------------------------------------------------------
5591
 
5592
XMFINLINE XMVECTOR XMVectorBaryCentric
5593
(
5594
    FXMVECTOR Position0, 
5595
    FXMVECTOR Position1, 
5596
    FXMVECTOR Position2, 
5597
    FLOAT    f, 
5598
    FLOAT    g
5599
)
5600
{
5601
#if defined(_XM_NO_INTRINSICS_)
5602
 
5603
    // Result = Position0 + f * (Position1 - Position0) + g * (Position2 - Position0)
5604
    XMVECTOR P10;
5605
    XMVECTOR P20;
5606
    XMVECTOR ScaleF;
5607
    XMVECTOR ScaleG;
5608
    XMVECTOR Result;
5609
 
5610
    P10 = XMVectorSubtract(Position1, Position0);
5611
    ScaleF = XMVectorReplicate(f);
5612
 
5613
    P20 = XMVectorSubtract(Position2, Position0);
5614
    ScaleG = XMVectorReplicate(g);
5615
 
5616
    Result = XMVectorMultiplyAdd(P10, ScaleF, Position0);
5617
    Result = XMVectorMultiplyAdd(P20, ScaleG, Result);
5618
 
5619
    return Result;
5620
 
5621
#elif defined(_XM_SSE_INTRINSICS_)
5622
	XMVECTOR R1 = _mm_sub_ps(Position1,Position0);
5623
	XMVECTOR SF = _mm_set_ps1(f);
5624
	XMVECTOR R2 = _mm_sub_ps(Position2,Position0);
5625
	XMVECTOR SG = _mm_set_ps1(g);
5626
	R1 = _mm_mul_ps(R1,SF);
5627
	R2 = _mm_mul_ps(R2,SG);
5628
	R1 = _mm_add_ps(R1,Position0);
5629
	R1 = _mm_add_ps(R1,R2);
5630
    return R1;
5631
#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
5632
#endif // _XM_VMX128_INTRINSICS_
5633
}
5634
 
5635
//------------------------------------------------------------------------------
5636
 
5637
XMFINLINE XMVECTOR XMVectorBaryCentricV
5638
(
5639
    FXMVECTOR Position0, 
5640
    FXMVECTOR Position1, 
5641
    FXMVECTOR Position2, 
5642
    CXMVECTOR F, 
5643
    CXMVECTOR G
5644
)
5645
{
5646
#if defined(_XM_NO_INTRINSICS_)
5647
 
5648
    // Result = Position0 + f * (Position1 - Position0) + g * (Position2 - Position0)
5649
    XMVECTOR P10;
5650
    XMVECTOR P20;
5651
    XMVECTOR Result;
5652
 
5653
    P10 = XMVectorSubtract(Position1, Position0);
5654
    P20 = XMVectorSubtract(Position2, Position0);
5655
 
5656
    Result = XMVectorMultiplyAdd(P10, F, Position0);
5657
    Result = XMVectorMultiplyAdd(P20, G, Result);
5658
 
5659
    return Result;
5660
 
5661
#elif defined(_XM_SSE_INTRINSICS_)
5662
	XMVECTOR R1 = _mm_sub_ps(Position1,Position0);
5663
	XMVECTOR R2 = _mm_sub_ps(Position2,Position0);
5664
	R1 = _mm_mul_ps(R1,F);
5665
	R2 = _mm_mul_ps(R2,G);
5666
	R1 = _mm_add_ps(R1,Position0);
5667
	R1 = _mm_add_ps(R1,R2);
5668
    return R1;
5669
#else // _XM_VMX128_INTRINSICS_
5670
#endif // _XM_VMX128_INTRINSICS_
5671
}
5672
 
5673
/****************************************************************************
5674
 *
5675
 * 2D Vector
5676
 *
5677
 ****************************************************************************/
5678
 
5679
//------------------------------------------------------------------------------
5680
// Comparison operations
5681
//------------------------------------------------------------------------------
5682
 
5683
//------------------------------------------------------------------------------
5684
 
5685
XMFINLINE BOOL XMVector2Equal
5686
(
5687
    FXMVECTOR V1, 
5688
    FXMVECTOR V2
5689
)
5690
{
5691
#if defined(_XM_NO_INTRINSICS_)
5692
    return (((V1.vector4_f32[0] == V2.vector4_f32[0]) && (V1.vector4_f32[1] == V2.vector4_f32[1])) != 0);
5693
#elif defined(_XM_SSE_INTRINSICS_)
5694
    XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
5695
// z and w are don't care
5696
    return (((_mm_movemask_ps(vTemp)&3)==3) != 0);
5697
#else // _XM_VMX128_INTRINSICS_
5698
    return XMComparisonAllTrue(XMVector2EqualR(V1, V2));
5699
#endif
5700
}
5701
 
5702
 
5703
//------------------------------------------------------------------------------
5704
 
5705
XMFINLINE UINT XMVector2EqualR
5706
(
5707
    FXMVECTOR V1, 
5708
    FXMVECTOR V2
5709
)
5710
{
5711
#if defined(_XM_NO_INTRINSICS_)
5712
 
5713
    UINT CR = 0;
5714
 
5715
    if ((V1.vector4_f32[0] == V2.vector4_f32[0]) && 
5716
        (V1.vector4_f32[1] == V2.vector4_f32[1]))
5717
    {
5718
        CR = XM_CRMASK_CR6TRUE;
5719
    }
5720
    else if ((V1.vector4_f32[0] != V2.vector4_f32[0]) && 
5721
        (V1.vector4_f32[1] != V2.vector4_f32[1]))
5722
    {
5723
        CR = XM_CRMASK_CR6FALSE;
5724
    }
5725
    return CR;
5726
#elif defined(_XM_SSE_INTRINSICS_)
5727
    XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
5728
// z and w are don't care
5729
    int iTest = _mm_movemask_ps(vTemp)&3;
5730
    UINT CR = 0;
5731
    if (iTest==3)
5732
    {
5733
        CR = XM_CRMASK_CR6TRUE;
5734
    }
5735
    else if (!iTest)
5736
    {
5737
        CR = XM_CRMASK_CR6FALSE;
5738
    }
5739
    return CR;
5740
#else // _XM_VMX128_INTRINSICS_
5741
#endif // _XM_VMX128_INTRINSICS_
5742
}
5743
 
5744
//------------------------------------------------------------------------------
5745
 
5746
XMFINLINE BOOL XMVector2EqualInt
5747
(
5748
    FXMVECTOR V1, 
5749
    FXMVECTOR V2
5750
)
5751
{
5752
#if defined(_XM_NO_INTRINSICS_)
5753
    return (((V1.vector4_u32[0] == V2.vector4_u32[0]) && (V1.vector4_u32[1] == V2.vector4_u32[1])) != 0);
5754
#elif defined(_XM_SSE_INTRINSICS_)
5755
    __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]);
5756
    return (((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&3)==3) != 0);
5757
#else // _XM_VMX128_INTRINSICS_
5758
    return XMComparisonAllTrue(XMVector2EqualIntR(V1, V2));
5759
#endif
5760
}
5761
 
5762
//------------------------------------------------------------------------------
5763
 
5764
XMFINLINE UINT XMVector2EqualIntR
5765
(
5766
    FXMVECTOR V1, 
5767
    FXMVECTOR V2
5768
)
5769
{
5770
#if defined(_XM_NO_INTRINSICS_)
5771
 
5772
    UINT CR = 0;
5773
    if ((V1.vector4_u32[0] == V2.vector4_u32[0]) && 
5774
        (V1.vector4_u32[1] == V2.vector4_u32[1]))
5775
    {
5776
        CR = XM_CRMASK_CR6TRUE;
5777
    }
5778
    else if ((V1.vector4_u32[0] != V2.vector4_u32[0]) && 
5779
        (V1.vector4_u32[1] != V2.vector4_u32[1]))
5780
    {
5781
        CR = XM_CRMASK_CR6FALSE;
5782
    }
5783
    return CR;
5784
 
5785
#elif defined(_XM_SSE_INTRINSICS_)
5786
    __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]);
5787
    int iTest = _mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&3;
5788
    UINT CR = 0;
5789
    if (iTest==3)
5790
    {
5791
        CR = XM_CRMASK_CR6TRUE;
5792
    }
5793
    else if (!iTest)
5794
    {
5795
        CR = XM_CRMASK_CR6FALSE;
5796
    }
5797
	return CR;
5798
#else // _XM_VMX128_INTRINSICS_
5799
#endif // _XM_VMX128_INTRINSICS_
5800
}
5801
 
5802
//------------------------------------------------------------------------------
5803
 
5804
XMFINLINE BOOL XMVector2NearEqual
5805
(
5806
    FXMVECTOR V1, 
5807
    FXMVECTOR V2, 
5808
    FXMVECTOR Epsilon
5809
)
5810
{
5811
#if defined(_XM_NO_INTRINSICS_)
5812
    FLOAT dx, dy;
5813
    dx = fabsf(V1.vector4_f32[0]-V2.vector4_f32[0]);
5814
    dy = fabsf(V1.vector4_f32[1]-V2.vector4_f32[1]);
5815
    return ((dx <= Epsilon.vector4_f32[0]) &&
5816
            (dy <= Epsilon.vector4_f32[1]));
5817
#elif defined(_XM_SSE_INTRINSICS_)
5818
    // Get the difference
5819
    XMVECTOR vDelta = _mm_sub_ps(V1,V2);
5820
    // Get the absolute value of the difference
5821
    XMVECTOR vTemp = _mm_setzero_ps();
5822
    vTemp = _mm_sub_ps(vTemp,vDelta);
5823
    vTemp = _mm_max_ps(vTemp,vDelta);
5824
    vTemp = _mm_cmple_ps(vTemp,Epsilon);
5825
    // z and w are don't care
5826
    return (((_mm_movemask_ps(vTemp)&3)==0x3) != 0);
5827
#else // _XM_VMX128_INTRINSICS_
5828
#endif // _XM_VMX128_INTRINSICS_
5829
}
5830
 
5831
//------------------------------------------------------------------------------
5832
 
5833
XMFINLINE BOOL XMVector2NotEqual
5834
(
5835
    FXMVECTOR V1, 
5836
    FXMVECTOR V2
5837
)
5838
{
5839
#if defined(_XM_NO_INTRINSICS_)
5840
    return (((V1.vector4_f32[0] != V2.vector4_f32[0]) || (V1.vector4_f32[1] != V2.vector4_f32[1])) != 0);
5841
#elif defined(_XM_SSE_INTRINSICS_)
5842
    XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
5843
// z and w are don't care
5844
    return (((_mm_movemask_ps(vTemp)&3)!=3) != 0);
5845
#else // _XM_VMX128_INTRINSICS_
5846
    return XMComparisonAnyFalse(XMVector2EqualR(V1, V2));
5847
#endif
5848
}
5849
 
5850
//------------------------------------------------------------------------------
5851
 
5852
XMFINLINE BOOL XMVector2NotEqualInt
5853
(
5854
    FXMVECTOR V1, 
5855
    FXMVECTOR V2
5856
)
5857
{
5858
#if defined(_XM_NO_INTRINSICS_)
5859
    return (((V1.vector4_u32[0] != V2.vector4_u32[0]) || (V1.vector4_u32[1] != V2.vector4_u32[1])) != 0);
5860
#elif defined(_XM_SSE_INTRINSICS_)
5861
    __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]);
5862
    return (((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&3)!=3) != 0);
5863
#else // _XM_VMX128_INTRINSICS_
5864
    return XMComparisonAnyFalse(XMVector2EqualIntR(V1, V2));
5865
#endif
5866
}
5867
 
5868
//------------------------------------------------------------------------------
5869
 
5870
XMFINLINE BOOL XMVector2Greater
5871
(
5872
    FXMVECTOR V1, 
5873
    FXMVECTOR V2
5874
)
5875
{
5876
#if defined(_XM_NO_INTRINSICS_)
5877
    return (((V1.vector4_f32[0] > V2.vector4_f32[0]) && (V1.vector4_f32[1] > V2.vector4_f32[1])) != 0);
5878
 
5879
#elif defined(_XM_SSE_INTRINSICS_)
5880
    XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
5881
// z and w are don't care
5882
    return (((_mm_movemask_ps(vTemp)&3)==3) != 0);
5883
#else // _XM_VMX128_INTRINSICS_
5884
    return XMComparisonAllTrue(XMVector2GreaterR(V1, V2));
5885
#endif
5886
}
5887
 
5888
//------------------------------------------------------------------------------
5889
 
5890
XMFINLINE UINT XMVector2GreaterR
5891
(
5892
    FXMVECTOR V1, 
5893
    FXMVECTOR V2
5894
)
5895
{
5896
#if defined(_XM_NO_INTRINSICS_)
5897
 
5898
    UINT CR = 0;
5899
    if ((V1.vector4_f32[0] > V2.vector4_f32[0]) && 
5900
        (V1.vector4_f32[1] > V2.vector4_f32[1]))
5901
    {
5902
        CR = XM_CRMASK_CR6TRUE;
5903
    }
5904
    else if ((V1.vector4_f32[0] <= V2.vector4_f32[0]) && 
5905
        (V1.vector4_f32[1] <= V2.vector4_f32[1]))
5906
    {
5907
        CR = XM_CRMASK_CR6FALSE;
5908
    }
5909
    return CR;
5910
#elif defined(_XM_SSE_INTRINSICS_)
5911
    XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
5912
    int iTest = _mm_movemask_ps(vTemp)&3;
5913
    UINT CR = 0;
5914
    if (iTest==3)
5915
    {
5916
        CR = XM_CRMASK_CR6TRUE;
5917
    }
5918
    else if (!iTest)
5919
    {
5920
        CR = XM_CRMASK_CR6FALSE;
5921
    }
5922
    return CR;
5923
#else // _XM_VMX128_INTRINSICS_
5924
#endif // _XM_VMX128_INTRINSICS_
5925
}
5926
 
5927
//------------------------------------------------------------------------------
5928
 
5929
XMFINLINE BOOL XMVector2GreaterOrEqual
5930
(
5931
    FXMVECTOR V1, 
5932
    FXMVECTOR V2
5933
)
5934
{
5935
#if defined(_XM_NO_INTRINSICS_)
5936
    return (((V1.vector4_f32[0] >= V2.vector4_f32[0]) && (V1.vector4_f32[1] >= V2.vector4_f32[1])) != 0);
5937
#elif defined(_XM_SSE_INTRINSICS_)
5938
    XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
5939
    return (((_mm_movemask_ps(vTemp)&3)==3) != 0);
5940
#else // _XM_VMX128_INTRINSICS_
5941
    return XMComparisonAllTrue(XMVector2GreaterOrEqualR(V1, V2));
5942
#endif
5943
}
5944
 
5945
//------------------------------------------------------------------------------
5946
 
5947
XMFINLINE UINT XMVector2GreaterOrEqualR
5948
(
5949
    FXMVECTOR V1, 
5950
    FXMVECTOR V2
5951
)
5952
{
5953
#if defined(_XM_NO_INTRINSICS_)
5954
    UINT CR = 0;
5955
    if ((V1.vector4_f32[0] >= V2.vector4_f32[0]) && 
5956
        (V1.vector4_f32[1] >= V2.vector4_f32[1]))
5957
    {
5958
        CR = XM_CRMASK_CR6TRUE;
5959
    }
5960
    else if ((V1.vector4_f32[0] < V2.vector4_f32[0]) && 
5961
        (V1.vector4_f32[1] < V2.vector4_f32[1]))
5962
    {
5963
        CR = XM_CRMASK_CR6FALSE;
5964
    }
5965
    return CR;
5966
 
5967
#elif defined(_XM_SSE_INTRINSICS_)
5968
    XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
5969
    int iTest = _mm_movemask_ps(vTemp)&3;
5970
    UINT CR = 0;
5971
    if (iTest == 3)
5972
    {
5973
        CR = XM_CRMASK_CR6TRUE;
5974
    }
5975
    else if (!iTest)
5976
    {
5977
        CR = XM_CRMASK_CR6FALSE;
5978
    }
5979
    return CR;
5980
#else // _XM_VMX128_INTRINSICS_
5981
#endif // _XM_VMX128_INTRINSICS_
5982
}
5983
 
5984
//------------------------------------------------------------------------------
5985
 
5986
XMFINLINE BOOL XMVector2Less
5987
(
5988
    FXMVECTOR V1, 
5989
    FXMVECTOR V2
5990
)
5991
{
5992
#if defined(_XM_NO_INTRINSICS_)
5993
    return (((V1.vector4_f32[0] < V2.vector4_f32[0]) && (V1.vector4_f32[1] < V2.vector4_f32[1])) != 0);
5994
#elif defined(_XM_SSE_INTRINSICS_)
5995
    XMVECTOR vTemp = _mm_cmplt_ps(V1,V2);
5996
    return (((_mm_movemask_ps(vTemp)&3)==3) != 0);
5997
#else // _XM_VMX128_INTRINSICS_
5998
    return XMComparisonAllTrue(XMVector2GreaterR(V2, V1));
5999
#endif
6000
}
6001
 
6002
//------------------------------------------------------------------------------
6003
 
6004
XMFINLINE BOOL XMVector2LessOrEqual
6005
(
6006
    FXMVECTOR V1, 
6007
    FXMVECTOR V2
6008
)
6009
{
6010
#if defined(_XM_NO_INTRINSICS_)
6011
    return (((V1.vector4_f32[0] <= V2.vector4_f32[0]) && (V1.vector4_f32[1] <= V2.vector4_f32[1])) != 0);
6012
#elif defined(_XM_SSE_INTRINSICS_)
6013
    XMVECTOR vTemp = _mm_cmple_ps(V1,V2);
6014
    return (((_mm_movemask_ps(vTemp)&3)==3) != 0);
6015
#else // _XM_VMX128_INTRINSICS_
6016
    return XMComparisonAllTrue(XMVector2GreaterOrEqualR(V2, V1));
6017
#endif
6018
}
6019
 
6020
//------------------------------------------------------------------------------
6021
 
6022
XMFINLINE BOOL XMVector2InBounds
6023
(
6024
    FXMVECTOR V, 
6025
    FXMVECTOR Bounds
6026
)
6027
{
6028
 #if defined(_XM_NO_INTRINSICS_)
6029
    return (((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) && 
6030
        (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1])) != 0);
6031
 #elif defined(_XM_SSE_INTRINSICS_)
6032
    // Test if less than or equal
6033
    XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
6034
    // Negate the bounds
6035
    XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
6036
    // Test if greater or equal (Reversed)
6037
    vTemp2 = _mm_cmple_ps(vTemp2,V);
6038
    // Blend answers
6039
    vTemp1 = _mm_and_ps(vTemp1,vTemp2);
6040
    // x and y in bounds? (z and w are don't care)
6041
    return (((_mm_movemask_ps(vTemp1)&0x3)==0x3) != 0);
6042
#else // _XM_VMX128_INTRINSICS_   
6043
    return XMComparisonAllInBounds(XMVector2InBoundsR(V, Bounds));
6044
#endif
6045
}
6046
 
6047
//------------------------------------------------------------------------------
6048
 
6049
XMFINLINE UINT XMVector2InBoundsR
6050
(
6051
    FXMVECTOR V, 
6052
    FXMVECTOR Bounds
6053
)
6054
{
6055
#if defined(_XM_NO_INTRINSICS_)
6056
    UINT CR = 0;
6057
    if ((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) && 
6058
        (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]))
6059
    {
6060
        CR = XM_CRMASK_CR6BOUNDS;
6061
    }
6062
    return CR;
6063
 
6064
#elif defined(_XM_SSE_INTRINSICS_)
6065
    // Test if less than or equal
6066
    XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
6067
    // Negate the bounds
6068
    XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
6069
    // Test if greater or equal (Reversed)
6070
    vTemp2 = _mm_cmple_ps(vTemp2,V);
6071
    // Blend answers
6072
    vTemp1 = _mm_and_ps(vTemp1,vTemp2);
6073
    // x and y in bounds? (z and w are don't care)
6074
    return ((_mm_movemask_ps(vTemp1)&0x3)==0x3) ? XM_CRMASK_CR6BOUNDS : 0;
6075
#else // _XM_VMX128_INTRINSICS_
6076
#endif // _XM_VMX128_INTRINSICS_
6077
}
6078
 
6079
//------------------------------------------------------------------------------
6080
 
6081
XMFINLINE BOOL XMVector2IsNaN
6082
(
6083
    FXMVECTOR V
6084
)
6085
{
6086
#if defined(_XM_NO_INTRINSICS_)
6087
    return (XMISNAN(V.vector4_f32[0]) ||
6088
            XMISNAN(V.vector4_f32[1]));
6089
#elif defined(_XM_SSE_INTRINSICS_)
6090
    // Mask off the exponent
6091
    __m128i vTempInf = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMInfinity);
6092
    // Mask off the mantissa
6093
    __m128i vTempNan = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMQNaNTest);
6094
    // Are any of the exponents == 0x7F800000?
6095
    vTempInf = _mm_cmpeq_epi32(vTempInf,g_XMInfinity);
6096
    // Are any of the mantissa's zero? (SSE2 doesn't have a neq test)
6097
    vTempNan = _mm_cmpeq_epi32(vTempNan,g_XMZero);
6098
    // Perform a not on the NaN test to be true on NON-zero mantissas
6099
    vTempNan = _mm_andnot_si128(vTempNan,vTempInf);
6100
    // If x or y are NaN, the signs are true after the merge above
6101
    return ((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTempNan)[0])&3) != 0);
6102
#else // _XM_VMX128_INTRINSICS_
6103
#endif // _XM_VMX128_INTRINSICS_
6104
}
6105
 
6106
//------------------------------------------------------------------------------
6107
 
6108
XMFINLINE BOOL XMVector2IsInfinite
6109
(
6110
    FXMVECTOR V
6111
)
6112
{
6113
#if defined(_XM_NO_INTRINSICS_)
6114
 
6115
    return (XMISINF(V.vector4_f32[0]) ||
6116
            XMISINF(V.vector4_f32[1]));
6117
#elif defined(_XM_SSE_INTRINSICS_)
6118
    // Mask off the sign bit
6119
    __m128 vTemp = _mm_and_ps(V,g_XMAbsMask);
6120
    // Compare to infinity
6121
    vTemp = _mm_cmpeq_ps(vTemp,g_XMInfinity);
6122
    // If x or z are infinity, the signs are true.
6123
    return ((_mm_movemask_ps(vTemp)&3) != 0);
6124
#else // _XM_VMX128_INTRINSICS_
6125
#endif // _XM_VMX128_INTRINSICS_
6126
}
6127
 
6128
//------------------------------------------------------------------------------
6129
// Computation operations
6130
//------------------------------------------------------------------------------
6131
 
6132
//------------------------------------------------------------------------------
6133
 
6134
XMFINLINE XMVECTOR XMVector2Dot
6135
(
6136
    FXMVECTOR V1, 
6137
    FXMVECTOR V2
6138
)
6139
{
6140
#if defined(_XM_NO_INTRINSICS_)
6141
 
6142
    XMVECTOR Result;
6143
 
6144
    Result.vector4_f32[0] =
6145
    Result.vector4_f32[1] =
6146
    Result.vector4_f32[2] =
6147
    Result.vector4_f32[3] = V1.vector4_f32[0] * V2.vector4_f32[0] + V1.vector4_f32[1] * V2.vector4_f32[1];
6148
 
6149
    return Result;
6150
 
6151
#elif defined(_XM_SSE_INTRINSICS_)
6152
    // Perform the dot product on x and y
6153
    XMVECTOR vLengthSq = _mm_mul_ps(V1,V2);
6154
    // vTemp has y splatted
6155
    XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
6156
    // x+y
6157
    vLengthSq = _mm_add_ss(vLengthSq,vTemp);
6158
    vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
6159
    return vLengthSq;
6160
#else // _XM_VMX128_INTRINSICS_
6161
#endif // _XM_VMX128_INTRINSICS_
6162
}
6163
 
6164
//------------------------------------------------------------------------------
6165
 
6166
XMFINLINE XMVECTOR XMVector2Cross
6167
(
6168
    FXMVECTOR V1, 
6169
    FXMVECTOR V2
6170
)
6171
{
6172
#if defined(_XM_NO_INTRINSICS_)
6173
    FLOAT fCross = (V1.vector4_f32[0] * V2.vector4_f32[1]) - (V1.vector4_f32[1] * V2.vector4_f32[0]);
6174
    XMVECTOR vResult = { 
6175
        fCross,
6176
        fCross,
6177
        fCross,
6178
        fCross
6179
    };
6180
    return vResult;
6181
#elif defined(_XM_SSE_INTRINSICS_)
6182
    // Swap x and y
6183
    XMVECTOR vResult = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(0,1,0,1));
6184
    // Perform the muls
6185
    vResult = _mm_mul_ps(vResult,V1);
6186
    // Splat y
6187
    XMVECTOR vTemp = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(1,1,1,1));
6188
    // Sub the values
6189
    vResult = _mm_sub_ss(vResult,vTemp);
6190
    // Splat the cross product
6191
    vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,0,0,0));
6192
	return vResult;
6193
#else // _XM_VMX128_INTRINSICS_
6194
#endif // _XM_VMX128_INTRINSICS_
6195
}
6196
 
6197
//------------------------------------------------------------------------------
6198
 
6199
XMFINLINE XMVECTOR XMVector2LengthSq
6200
(
6201
    FXMVECTOR V
6202
)
6203
{
6204
#if defined(_XM_NO_INTRINSICS_)
6205
    return XMVector2Dot(V, V);
6206
#elif defined(_XM_SSE_INTRINSICS_)
6207
    // Perform the dot product on x and y
6208
    XMVECTOR vLengthSq = _mm_mul_ps(V,V);
6209
    // vTemp has y splatted
6210
    XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
6211
    // x+y
6212
    vLengthSq = _mm_add_ss(vLengthSq,vTemp);
6213
    vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
6214
    return vLengthSq;
6215
#else
6216
    return XMVector2Dot(V, V);
6217
#endif
6218
}
6219
 
6220
//------------------------------------------------------------------------------
6221
 
6222
XMFINLINE XMVECTOR XMVector2ReciprocalLengthEst
6223
(
6224
    FXMVECTOR V
6225
)
6226
{
6227
#if defined(_XM_NO_INTRINSICS_)
6228
 
6229
    XMVECTOR Result;
6230
 
6231
    Result = XMVector2LengthSq(V);
6232
    Result = XMVectorReciprocalSqrtEst(Result);
6233
 
6234
    return Result;
6235
 
6236
#elif defined(_XM_SSE_INTRINSICS_)
6237
    // Perform the dot product on x and y
6238
    XMVECTOR vLengthSq = _mm_mul_ps(V,V);
6239
    // vTemp has y splatted
6240
    XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
6241
    // x+y
6242
    vLengthSq = _mm_add_ss(vLengthSq,vTemp);
6243
    vLengthSq = _mm_rsqrt_ss(vLengthSq);
6244
    vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
6245
    return vLengthSq;
6246
#else // _XM_VMX128_INTRINSICS_
6247
#endif // _XM_VMX128_INTRINSICS_
6248
}
6249
 
6250
//------------------------------------------------------------------------------
6251
 
6252
XMFINLINE XMVECTOR XMVector2ReciprocalLength
6253
(
6254
    FXMVECTOR V
6255
)
6256
{
6257
#if defined(_XM_NO_INTRINSICS_)
6258
 
6259
    XMVECTOR Result;
6260
 
6261
    Result = XMVector2LengthSq(V);
6262
    Result = XMVectorReciprocalSqrt(Result);
6263
 
6264
    return Result;
6265
 
6266
#elif defined(_XM_SSE_INTRINSICS_)
6267
    // Perform the dot product on x and y
6268
    XMVECTOR vLengthSq = _mm_mul_ps(V,V);
6269
    // vTemp has y splatted
6270
    XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
6271
    // x+y
6272
    vLengthSq = _mm_add_ss(vLengthSq,vTemp);
6273
    vLengthSq = _mm_sqrt_ss(vLengthSq);
6274
    vLengthSq = _mm_div_ss(g_XMOne,vLengthSq);
6275
    vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
6276
    return vLengthSq;
6277
#else // _XM_VMX128_INTRINSICS_
6278
#endif // _XM_VMX128_INTRINSICS_
6279
}
6280
 
6281
//------------------------------------------------------------------------------
6282
 
6283
XMFINLINE XMVECTOR XMVector2LengthEst
6284
(
6285
    FXMVECTOR V
6286
)
6287
{
6288
#if defined(_XM_NO_INTRINSICS_)
6289
    XMVECTOR Result;
6290
    Result = XMVector2LengthSq(V);
6291
    Result = XMVectorSqrtEst(Result);
6292
    return Result;
6293
#elif defined(_XM_SSE_INTRINSICS_)
6294
    // Perform the dot product on x and y
6295
    XMVECTOR vLengthSq = _mm_mul_ps(V,V);
6296
    // vTemp has y splatted
6297
    XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
6298
    // x+y
6299
    vLengthSq = _mm_add_ss(vLengthSq,vTemp);
6300
    vLengthSq = _mm_sqrt_ss(vLengthSq);
6301
    vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
6302
    return vLengthSq;
6303
#else // _XM_VMX128_INTRINSICS_
6304
#endif // _XM_VMX128_INTRINSICS_
6305
}
6306
 
6307
//------------------------------------------------------------------------------
6308
 
6309
XMFINLINE XMVECTOR XMVector2Length
6310
(
6311
    FXMVECTOR V
6312
)
6313
{
6314
#if defined(_XM_NO_INTRINSICS_)
6315
 
6316
    XMVECTOR Result;
6317
    Result = XMVector2LengthSq(V);
6318
    Result = XMVectorSqrt(Result);
6319
    return Result;
6320
 
6321
#elif defined(_XM_SSE_INTRINSICS_)
6322
    // Perform the dot product on x and y
6323
    XMVECTOR vLengthSq = _mm_mul_ps(V,V);
6324
    // vTemp has y splatted
6325
    XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
6326
    // x+y
6327
    vLengthSq = _mm_add_ss(vLengthSq,vTemp);
6328
    vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
6329
    vLengthSq = _mm_sqrt_ps(vLengthSq);
6330
    return vLengthSq;
6331
#else // _XM_VMX128_INTRINSICS_
6332
#endif // _XM_VMX128_INTRINSICS_
6333
}
6334
 
6335
//------------------------------------------------------------------------------
6336
// XMVector2NormalizeEst uses a reciprocal estimate and
6337
// returns QNaN on zero and infinite vectors.
6338
 
6339
XMFINLINE XMVECTOR XMVector2NormalizeEst
6340
(
6341
    FXMVECTOR V
6342
)
6343
{
6344
#if defined(_XM_NO_INTRINSICS_)
6345
 
6346
    XMVECTOR Result;
6347
    Result = XMVector2ReciprocalLength(V);
6348
    Result = XMVectorMultiply(V, Result);
6349
    return Result;
6350
 
6351
#elif defined(_XM_SSE_INTRINSICS_)
6352
    // Perform the dot product on x and y
6353
    XMVECTOR vLengthSq = _mm_mul_ps(V,V);
6354
    // vTemp has y splatted
6355
    XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
6356
    // x+y
6357
    vLengthSq = _mm_add_ss(vLengthSq,vTemp);
6358
    vLengthSq = _mm_rsqrt_ss(vLengthSq);
6359
    vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
6360
    vLengthSq = _mm_mul_ps(vLengthSq,V);
6361
	return vLengthSq;
6362
#else // _XM_VMX128_INTRINSICS_
6363
#endif // _XM_VMX128_INTRINSICS_
6364
}
6365
 
6366
//------------------------------------------------------------------------------
6367
 
6368
XMFINLINE XMVECTOR XMVector2Normalize
6369
(
6370
    FXMVECTOR V
6371
)
6372
{
6373
#if defined(_XM_NO_INTRINSICS_)
6374
 
6375
    XMVECTOR LengthSq;
6376
    XMVECTOR Zero;
6377
    XMVECTOR InfiniteLength;
6378
    XMVECTOR ZeroLength;
6379
    XMVECTOR Select;
6380
    XMVECTOR Result;
6381
 
6382
    LengthSq = XMVector2LengthSq(V);
6383
    Zero = XMVectorZero();
6384
    Result = XMVectorReciprocalSqrt(LengthSq);
6385
    InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity.v);
6386
    ZeroLength = XMVectorEqual(LengthSq, Zero);
6387
    Result = XMVectorMultiply(V, Result);
6388
    Select = XMVectorEqualInt(InfiniteLength, ZeroLength);
6389
    Result = XMVectorSelect(LengthSq, Result, Select);
6390
 
6391
    return Result;
6392
 
6393
#elif defined(_XM_SSE_INTRINSICS_)
6394
    // Perform the dot product on x and y only
6395
    XMVECTOR vLengthSq = _mm_mul_ps(V,V);
6396
    XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
6397
    vLengthSq = _mm_add_ss(vLengthSq,vTemp);
6398
	vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
6399
    // Prepare for the division
6400
    XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
6401
    // Failsafe on zero (Or epsilon) length planes
6402
    // If the length is infinity, set the elements to zero
6403
    vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
6404
    // Reciprocal mul to perform the normalization
6405
    vResult = _mm_div_ps(V,vResult);
6406
    // Any that are infinity, set to zero
6407
    vResult = _mm_and_ps(vResult,vLengthSq);
6408
    return vResult;
6409
#else // _XM_VMX128_INTRINSICS_
6410
#endif // _XM_VMX128_INTRINSICS_
6411
}
6412
 
6413
//------------------------------------------------------------------------------
6414
 
6415
XMFINLINE XMVECTOR XMVector2ClampLength
6416
(
6417
    FXMVECTOR V, 
6418
    FLOAT    LengthMin, 
6419
    FLOAT    LengthMax
6420
)
6421
{
6422
#if defined(_XM_NO_INTRINSICS_)
6423
 
6424
    XMVECTOR ClampMax;
6425
    XMVECTOR ClampMin;
6426
 
6427
    ClampMax = XMVectorReplicate(LengthMax);
6428
    ClampMin = XMVectorReplicate(LengthMin);
6429
 
6430
    return XMVector2ClampLengthV(V, ClampMin, ClampMax);
6431
 
6432
#elif defined(_XM_SSE_INTRINSICS_)
6433
    XMVECTOR ClampMax = _mm_set_ps1(LengthMax);
6434
    XMVECTOR ClampMin = _mm_set_ps1(LengthMin);
6435
    return XMVector2ClampLengthV(V, ClampMin, ClampMax);
6436
#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
6437
#endif // _XM_VMX128_INTRINSICS_
6438
}
6439
 
6440
//------------------------------------------------------------------------------
6441
 
6442
XMFINLINE XMVECTOR XMVector2ClampLengthV
6443
(
6444
    FXMVECTOR V, 
6445
    FXMVECTOR LengthMin, 
6446
    FXMVECTOR LengthMax
6447
)
6448
{
6449
#if defined(_XM_NO_INTRINSICS_)
6450
 
6451
    XMVECTOR ClampLength;
6452
    XMVECTOR LengthSq;
6453
    XMVECTOR RcpLength;
6454
    XMVECTOR Length;
6455
    XMVECTOR Normal;
6456
    XMVECTOR Zero;
6457
    XMVECTOR InfiniteLength;
6458
    XMVECTOR ZeroLength;
6459
    XMVECTOR Select;
6460
    XMVECTOR ControlMax;
6461
    XMVECTOR ControlMin;
6462
    XMVECTOR Control;
6463
    XMVECTOR Result;
6464
 
6465
    XMASSERT((LengthMin.vector4_f32[1] == LengthMin.vector4_f32[0]));
6466
    XMASSERT((LengthMax.vector4_f32[1] == LengthMax.vector4_f32[0]));
6467
    XMASSERT(XMVector2GreaterOrEqual(LengthMin, XMVectorZero()));
6468
    XMASSERT(XMVector2GreaterOrEqual(LengthMax, XMVectorZero()));
6469
    XMASSERT(XMVector2GreaterOrEqual(LengthMax, LengthMin));
6470
 
6471
    LengthSq = XMVector2LengthSq(V);
6472
 
6473
    Zero = XMVectorZero();
6474
 
6475
    RcpLength = XMVectorReciprocalSqrt(LengthSq);
6476
 
6477
    InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity.v);
6478
    ZeroLength = XMVectorEqual(LengthSq, Zero);
6479
 
6480
    Length = XMVectorMultiply(LengthSq, RcpLength);
6481
 
6482
    Normal = XMVectorMultiply(V, RcpLength);
6483
 
6484
    Select = XMVectorEqualInt(InfiniteLength, ZeroLength);
6485
    Length = XMVectorSelect(LengthSq, Length, Select);
6486
    Normal = XMVectorSelect(LengthSq, Normal, Select);
6487
 
6488
    ControlMax = XMVectorGreater(Length, LengthMax);
6489
    ControlMin = XMVectorLess(Length, LengthMin);
6490
 
6491
    ClampLength = XMVectorSelect(Length, LengthMax, ControlMax);
6492
    ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin);
6493
 
6494
    Result = XMVectorMultiply(Normal, ClampLength);
6495
 
6496
    // Preserve the original vector (with no precision loss) if the length falls within the given range
6497
    Control = XMVectorEqualInt(ControlMax, ControlMin);
6498
    Result = XMVectorSelect(Result, V, Control);
6499
 
6500
    return Result;
6501
 
6502
#elif defined(_XM_SSE_INTRINSICS_)
6503
    XMVECTOR ClampLength;
6504
    XMVECTOR LengthSq;
6505
    XMVECTOR RcpLength;
6506
    XMVECTOR Length;
6507
    XMVECTOR Normal;
6508
    XMVECTOR InfiniteLength;
6509
    XMVECTOR ZeroLength;
6510
    XMVECTOR Select;
6511
    XMVECTOR ControlMax;
6512
    XMVECTOR ControlMin;
6513
    XMVECTOR Control;
6514
    XMVECTOR Result;
6515
 
6516
    XMASSERT((XMVectorGetY(LengthMin) == XMVectorGetX(LengthMin)));
6517
    XMASSERT((XMVectorGetY(LengthMax) == XMVectorGetX(LengthMax)));
6518
    XMASSERT(XMVector2GreaterOrEqual(LengthMin, g_XMZero));
6519
    XMASSERT(XMVector2GreaterOrEqual(LengthMax, g_XMZero));
6520
    XMASSERT(XMVector2GreaterOrEqual(LengthMax, LengthMin));
6521
    LengthSq = XMVector2LengthSq(V);
6522
    RcpLength = XMVectorReciprocalSqrt(LengthSq);
6523
    InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity);
6524
    ZeroLength = XMVectorEqual(LengthSq, g_XMZero);
6525
    Length = _mm_mul_ps(LengthSq, RcpLength);
6526
    Normal = _mm_mul_ps(V, RcpLength);
6527
    Select = XMVectorEqualInt(InfiniteLength, ZeroLength);
6528
    Length = XMVectorSelect(LengthSq, Length, Select);
6529
    Normal = XMVectorSelect(LengthSq, Normal, Select);
6530
    ControlMax = XMVectorGreater(Length, LengthMax);
6531
    ControlMin = XMVectorLess(Length, LengthMin);
6532
    ClampLength = XMVectorSelect(Length, LengthMax, ControlMax);
6533
    ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin);
6534
    Result = _mm_mul_ps(Normal, ClampLength);
6535
    // Preserve the original vector (with no precision loss) if the length falls within the given range
6536
    Control = XMVectorEqualInt(ControlMax, ControlMin);
6537
    Result = XMVectorSelect(Result, V, Control);
6538
    return Result;
6539
#else // _XM_VMX128_INTRINSICS_
6540
#endif // _XM_VMX128_INTRINSICS_
6541
}
6542
 
6543
//------------------------------------------------------------------------------
6544
 
6545
XMFINLINE XMVECTOR XMVector2Reflect
6546
(
6547
    FXMVECTOR Incident, 
6548
    FXMVECTOR Normal
6549
)
6550
{
6551
#if defined(_XM_NO_INTRINSICS_)
6552
 
6553
    XMVECTOR Result;
6554
 
6555
    // Result = Incident - (2 * dot(Incident, Normal)) * Normal
6556
    Result = XMVector2Dot(Incident, Normal);
6557
    Result = XMVectorAdd(Result, Result);
6558
    Result = XMVectorNegativeMultiplySubtract(Result, Normal, Incident);
6559
 
6560
    return Result;
6561
 
6562
#elif defined(_XM_SSE_INTRINSICS_)
6563
    // Result = Incident - (2 * dot(Incident, Normal)) * Normal
6564
    XMVECTOR Result = XMVector2Dot(Incident,Normal);
6565
    Result = _mm_add_ps(Result, Result);
6566
    Result = _mm_mul_ps(Result, Normal);
6567
    Result = _mm_sub_ps(Incident,Result);
6568
    return Result;
6569
#else // _XM_VMX128_INTRINSICS_
6570
#endif // _XM_VMX128_INTRINSICS_
6571
}
6572
 
6573
//------------------------------------------------------------------------------
6574
 
6575
XMFINLINE XMVECTOR XMVector2Refract
6576
(
6577
    FXMVECTOR Incident, 
6578
    FXMVECTOR Normal, 
6579
    FLOAT    RefractionIndex
6580
)
6581
{
6582
#if defined(_XM_NO_INTRINSICS_)
6583
    XMVECTOR Index;
6584
    Index = XMVectorReplicate(RefractionIndex);
6585
    return XMVector2RefractV(Incident, Normal, Index);
6586
 
6587
#elif defined(_XM_SSE_INTRINSICS_)
6588
    XMVECTOR Index = _mm_set_ps1(RefractionIndex);
6589
    return XMVector2RefractV(Incident,Normal,Index);
6590
#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
6591
#endif // _XM_VMX128_INTRINSICS_
6592
}
6593
 
6594
//------------------------------------------------------------------------------
6595
 
6596
// Return the refraction of a 2D vector
6597
XMFINLINE XMVECTOR XMVector2RefractV
6598
(
6599
    FXMVECTOR Incident, 
6600
    FXMVECTOR Normal, 
6601
    FXMVECTOR RefractionIndex
6602
)
6603
{
6604
#if defined(_XM_NO_INTRINSICS_)
6605
    float IDotN;
6606
    float RX,RY;
6607
    XMVECTOR vResult;
6608
    // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) + 
6609
    // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal))))
6610
    IDotN = (Incident.vector4_f32[0]*Normal.vector4_f32[0])+(Incident.vector4_f32[1]*Normal.vector4_f32[1]);
6611
    // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN)
6612
    RY = 1.0f-(IDotN*IDotN);
6613
    RX = 1.0f-(RY*RefractionIndex.vector4_f32[0]*RefractionIndex.vector4_f32[0]);
6614
    RY = 1.0f-(RY*RefractionIndex.vector4_f32[1]*RefractionIndex.vector4_f32[1]);
6615
    if (RX>=0.0f) {
6616
        RX = (RefractionIndex.vector4_f32[0]*Incident.vector4_f32[0])-(Normal.vector4_f32[0]*((RefractionIndex.vector4_f32[0]*IDotN)+sqrtf(RX)));
6617
    } else {
6618
        RX = 0.0f;
6619
    }
6620
    if (RY>=0.0f) {
6621
        RY = (RefractionIndex.vector4_f32[1]*Incident.vector4_f32[1])-(Normal.vector4_f32[1]*((RefractionIndex.vector4_f32[1]*IDotN)+sqrtf(RY)));
6622
    } else {
6623
        RY = 0.0f;
6624
    }
6625
    vResult.vector4_f32[0] = RX;
6626
    vResult.vector4_f32[1] = RY;
6627
    vResult.vector4_f32[2] = 0.0f;   
6628
    vResult.vector4_f32[3] = 0.0f;
6629
    return vResult;
6630
#elif defined(_XM_SSE_INTRINSICS_)
6631
    // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) + 
6632
    // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal))))
6633
    // Get the 2D Dot product of Incident-Normal
6634
    XMVECTOR IDotN = _mm_mul_ps(Incident,Normal);
6635
    XMVECTOR vTemp = _mm_shuffle_ps(IDotN,IDotN,_MM_SHUFFLE(1,1,1,1));
6636
    IDotN = _mm_add_ss(IDotN,vTemp);
6637
    IDotN = _mm_shuffle_ps(IDotN,IDotN,_MM_SHUFFLE(0,0,0,0));
6638
    // vTemp = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN)
6639
    vTemp = _mm_mul_ps(IDotN,IDotN);
6640
    vTemp = _mm_sub_ps(g_XMOne,vTemp);
6641
    vTemp = _mm_mul_ps(vTemp,RefractionIndex);
6642
    vTemp = _mm_mul_ps(vTemp,RefractionIndex);
6643
    vTemp = _mm_sub_ps(g_XMOne,vTemp);
6644
    // If any terms are <=0, sqrt() will fail, punt to zero
6645
    XMVECTOR vMask = _mm_cmpgt_ps(vTemp,g_XMZero);
6646
    // R = RefractionIndex * IDotN + sqrt(R)
6647
    vTemp = _mm_sqrt_ps(vTemp);
6648
    XMVECTOR vResult = _mm_mul_ps(RefractionIndex,IDotN);
6649
    vTemp = _mm_add_ps(vTemp,vResult);
6650
    // Result = RefractionIndex * Incident - Normal * R
6651
    vResult = _mm_mul_ps(RefractionIndex,Incident);
6652
    vTemp = _mm_mul_ps(vTemp,Normal);
6653
    vResult = _mm_sub_ps(vResult,vTemp);
6654
    vResult = _mm_and_ps(vResult,vMask);
6655
    return vResult;
6656
#else // _XM_VMX128_INTRINSICS_
6657
#endif // _XM_VMX128_INTRINSICS_
6658
}
6659
 
6660
//------------------------------------------------------------------------------
6661
 
6662
XMFINLINE XMVECTOR XMVector2Orthogonal
6663
(
6664
    FXMVECTOR V
6665
)
6666
{
6667
#if defined(_XM_NO_INTRINSICS_)
6668
 
6669
    XMVECTOR Result;
6670
 
6671
    Result.vector4_f32[0] = -V.vector4_f32[1];
6672
    Result.vector4_f32[1] = V.vector4_f32[0];
6673
 
6674
    return Result;
6675
 
6676
#elif defined(_XM_SSE_INTRINSICS_)
6677
    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,2,0,1));
6678
    vResult = _mm_mul_ps(vResult,g_XMNegateX);
6679
    return vResult;
6680
#else // _XM_VMX128_INTRINSICS_
6681
#endif // _XM_VMX128_INTRINSICS_
6682
}
6683
 
6684
//------------------------------------------------------------------------------
6685
 
6686
XMFINLINE XMVECTOR XMVector2AngleBetweenNormalsEst
6687
(
6688
    FXMVECTOR N1, 
6689
    FXMVECTOR N2
6690
)
6691
{
6692
#if defined(_XM_NO_INTRINSICS_)
6693
 
6694
    XMVECTOR NegativeOne;
6695
    XMVECTOR One;
6696
    XMVECTOR Result;
6697
 
6698
    Result = XMVector2Dot(N1, N2);
6699
    NegativeOne = XMVectorSplatConstant(-1, 0);
6700
    One = XMVectorSplatOne();
6701
    Result = XMVectorClamp(Result, NegativeOne, One);
6702
    Result = XMVectorACosEst(Result);
6703
 
6704
    return Result;
6705
 
6706
#elif defined(_XM_SSE_INTRINSICS_)
6707
    XMVECTOR vResult = XMVector2Dot(N1,N2);
6708
    // Clamp to -1.0f to 1.0f
6709
	vResult = _mm_max_ps(vResult,g_XMNegativeOne);
6710
	vResult = _mm_min_ps(vResult,g_XMOne);;
6711
    vResult = XMVectorACosEst(vResult);
6712
    return vResult;
6713
#else // _XM_VMX128_INTRINSICS_
6714
#endif // _XM_VMX128_INTRINSICS_
6715
}
6716
 
6717
//------------------------------------------------------------------------------
6718
 
6719
XMFINLINE XMVECTOR XMVector2AngleBetweenNormals
6720
(
6721
    FXMVECTOR N1, 
6722
    FXMVECTOR N2
6723
)
6724
{
6725
#if defined(_XM_NO_INTRINSICS_)
6726
 
6727
    XMVECTOR NegativeOne;
6728
    XMVECTOR One;
6729
    XMVECTOR Result;
6730
 
6731
    Result = XMVector2Dot(N1, N2);
6732
    NegativeOne = XMVectorSplatConstant(-1, 0);
6733
    One = XMVectorSplatOne();
6734
    Result = XMVectorClamp(Result, NegativeOne, One);
6735
    Result = XMVectorACos(Result);
6736
 
6737
    return Result;
6738
 
6739
#elif defined(_XM_SSE_INTRINSICS_)
6740
    XMVECTOR vResult = XMVector2Dot(N1,N2);
6741
    // Clamp to -1.0f to 1.0f
6742
	vResult = _mm_max_ps(vResult,g_XMNegativeOne);
6743
	vResult = _mm_min_ps(vResult,g_XMOne);;
6744
    vResult = XMVectorACos(vResult);
6745
    return vResult;
6746
#else // _XM_VMX128_INTRINSICS_
6747
#endif // _XM_VMX128_INTRINSICS_
6748
}
6749
 
6750
//------------------------------------------------------------------------------
6751
 
6752
XMFINLINE XMVECTOR XMVector2AngleBetweenVectors
6753
(
6754
    FXMVECTOR V1, 
6755
    FXMVECTOR V2
6756
)
6757
{
6758
#if defined(_XM_NO_INTRINSICS_)
6759
 
6760
    XMVECTOR L1;
6761
    XMVECTOR L2;
6762
    XMVECTOR Dot;
6763
    XMVECTOR CosAngle;
6764
    XMVECTOR NegativeOne;
6765
    XMVECTOR One;
6766
    XMVECTOR Result;
6767
 
6768
    L1 = XMVector2ReciprocalLength(V1);
6769
    L2 = XMVector2ReciprocalLength(V2);
6770
 
6771
    Dot = XMVector2Dot(V1, V2);
6772
 
6773
    L1 = XMVectorMultiply(L1, L2);
6774
 
6775
    CosAngle = XMVectorMultiply(Dot, L1);
6776
    NegativeOne = XMVectorSplatConstant(-1, 0);
6777
    One = XMVectorSplatOne();
6778
    CosAngle = XMVectorClamp(CosAngle, NegativeOne, One);
6779
 
6780
    Result = XMVectorACos(CosAngle);
6781
 
6782
    return Result;
6783
 
6784
#elif defined(_XM_SSE_INTRINSICS_)
6785
    XMVECTOR L1;
6786
    XMVECTOR L2;
6787
    XMVECTOR Dot;
6788
    XMVECTOR CosAngle;
6789
    XMVECTOR Result;
6790
    L1 = XMVector2ReciprocalLength(V1);
6791
    L2 = XMVector2ReciprocalLength(V2);
6792
    Dot = XMVector2Dot(V1, V2);
6793
    L1 = _mm_mul_ps(L1, L2);
6794
    CosAngle = _mm_mul_ps(Dot, L1);
6795
    CosAngle = XMVectorClamp(CosAngle, g_XMNegativeOne,g_XMOne);
6796
    Result = XMVectorACos(CosAngle);
6797
    return Result;
6798
#else // _XM_VMX128_INTRINSICS_
6799
#endif // _XM_VMX128_INTRINSICS_
6800
}
6801
 
6802
//------------------------------------------------------------------------------
6803
 
6804
XMFINLINE XMVECTOR XMVector2LinePointDistance
6805
(
6806
    FXMVECTOR LinePoint1, 
6807
    FXMVECTOR LinePoint2, 
6808
    FXMVECTOR Point
6809
)
6810
{
6811
#if defined(_XM_NO_INTRINSICS_)
6812
 
6813
    XMVECTOR PointVector;
6814
    XMVECTOR LineVector;
6815
    XMVECTOR ReciprocalLengthSq;
6816
    XMVECTOR PointProjectionScale;
6817
    XMVECTOR DistanceVector;
6818
    XMVECTOR Result;
6819
 
6820
    // Given a vector PointVector from LinePoint1 to Point and a vector
6821
    // LineVector from LinePoint1 to LinePoint2, the scaled distance 
6822
    // PointProjectionScale from LinePoint1 to the perpendicular projection
6823
    // of PointVector onto the line is defined as:
6824
    //
6825
    //     PointProjectionScale = dot(PointVector, LineVector) / LengthSq(LineVector)
6826
 
6827
    PointVector = XMVectorSubtract(Point, LinePoint1);
6828
    LineVector = XMVectorSubtract(LinePoint2, LinePoint1);
6829
 
6830
    ReciprocalLengthSq = XMVector2LengthSq(LineVector);
6831
    ReciprocalLengthSq = XMVectorReciprocal(ReciprocalLengthSq);
6832
 
6833
    PointProjectionScale = XMVector2Dot(PointVector, LineVector);
6834
    PointProjectionScale = XMVectorMultiply(PointProjectionScale, ReciprocalLengthSq);
6835
 
6836
    DistanceVector = XMVectorMultiply(LineVector, PointProjectionScale);
6837
    DistanceVector = XMVectorSubtract(PointVector, DistanceVector);
6838
 
6839
    Result = XMVector2Length(DistanceVector);
6840
 
6841
    return Result;
6842
 
6843
#elif defined(_XM_SSE_INTRINSICS_)
6844
    XMVECTOR PointVector = _mm_sub_ps(Point,LinePoint1);
6845
    XMVECTOR LineVector = _mm_sub_ps(LinePoint2,LinePoint1);
6846
    XMVECTOR ReciprocalLengthSq = XMVector2LengthSq(LineVector);
6847
    XMVECTOR vResult = XMVector2Dot(PointVector,LineVector);
6848
    vResult = _mm_div_ps(vResult,ReciprocalLengthSq);
6849
    vResult = _mm_mul_ps(vResult,LineVector);
6850
    vResult = _mm_sub_ps(PointVector,vResult);
6851
    vResult = XMVector2Length(vResult);
6852
    return vResult;
6853
#else // _XM_VMX128_INTRINSICS_
6854
#endif // _XM_VMX128_INTRINSICS_
6855
}
6856
 
6857
//------------------------------------------------------------------------------
6858
 
6859
XMFINLINE XMVECTOR XMVector2IntersectLine
6860
(
6861
    FXMVECTOR Line1Point1, 
6862
    FXMVECTOR Line1Point2, 
6863
    FXMVECTOR Line2Point1, 
6864
    CXMVECTOR Line2Point2
6865
)
6866
{
6867
#if defined(_XM_NO_INTRINSICS_)
6868
 
6869
    XMVECTOR        V1;
6870
    XMVECTOR        V2;
6871
    XMVECTOR        V3;
6872
    XMVECTOR        C1;
6873
    XMVECTOR        C2;
6874
    XMVECTOR        Result;
6875
    CONST XMVECTOR  Zero = XMVectorZero();
6876
 
6877
    V1 = XMVectorSubtract(Line1Point2, Line1Point1);
6878
    V2 = XMVectorSubtract(Line2Point2, Line2Point1);
6879
    V3 = XMVectorSubtract(Line1Point1, Line2Point1);
6880
 
6881
    C1 = XMVector2Cross(V1, V2);
6882
    C2 = XMVector2Cross(V2, V3);
6883
 
6884
    if (XMVector2NearEqual(C1, Zero, g_XMEpsilon.v))
6885
    {
6886
        if (XMVector2NearEqual(C2, Zero, g_XMEpsilon.v))
6887
        {
6888
            // Coincident
6889
            Result = g_XMInfinity.v;
6890
        }
6891
        else
6892
        {
6893
            // Parallel
6894
            Result = g_XMQNaN.v;
6895
        }
6896
    }
6897
    else
6898
    {
6899
        // Intersection point = Line1Point1 + V1 * (C2 / C1)
6900
        XMVECTOR Scale;
6901
        Scale = XMVectorReciprocal(C1);
6902
        Scale = XMVectorMultiply(C2, Scale);
6903
        Result = XMVectorMultiplyAdd(V1, Scale, Line1Point1);
6904
    }
6905
 
6906
    return Result;
6907
 
6908
#elif defined(_XM_SSE_INTRINSICS_)
6909
    XMVECTOR V1 = _mm_sub_ps(Line1Point2, Line1Point1);
6910
    XMVECTOR V2 = _mm_sub_ps(Line2Point2, Line2Point1);
6911
    XMVECTOR V3 = _mm_sub_ps(Line1Point1, Line2Point1);
6912
    // Generate the cross products
6913
    XMVECTOR C1 = XMVector2Cross(V1, V2);
6914
    XMVECTOR C2 = XMVector2Cross(V2, V3);
6915
    // If C1 is not close to epsilon, use the calculated value
6916
    XMVECTOR vResultMask = _mm_setzero_ps();
6917
    vResultMask = _mm_sub_ps(vResultMask,C1);
6918
    vResultMask = _mm_max_ps(vResultMask,C1);
6919
    // 0xFFFFFFFF if the calculated value is to be used
6920
    vResultMask = _mm_cmpgt_ps(vResultMask,g_XMEpsilon);
6921
    // If C1 is close to epsilon, which fail type is it? INFINITY or NAN?
6922
    XMVECTOR vFailMask = _mm_setzero_ps();
6923
    vFailMask = _mm_sub_ps(vFailMask,C2);
6924
    vFailMask = _mm_max_ps(vFailMask,C2);
6925
    vFailMask = _mm_cmple_ps(vFailMask,g_XMEpsilon);
6926
    XMVECTOR vFail = _mm_and_ps(vFailMask,g_XMInfinity);
6927
    vFailMask = _mm_andnot_ps(vFailMask,g_XMQNaN);
6928
    // vFail is NAN or INF
6929
    vFail = _mm_or_ps(vFail,vFailMask);
6930
    // Intersection point = Line1Point1 + V1 * (C2 / C1)
6931
    XMVECTOR vResult = _mm_div_ps(C2,C1);
6932
    vResult = _mm_mul_ps(vResult,V1);
6933
    vResult = _mm_add_ps(vResult,Line1Point1);
6934
    // Use result, or failure value
6935
    vResult = _mm_and_ps(vResult,vResultMask);
6936
    vResultMask = _mm_andnot_ps(vResultMask,vFail);
6937
    vResult = _mm_or_ps(vResult,vResultMask);
6938
    return vResult;
6939
#else // _XM_VMX128_INTRINSICS_
6940
#endif // _XM_VMX128_INTRINSICS_
6941
}
6942
 
6943
//------------------------------------------------------------------------------
6944
 
6945
XMFINLINE XMVECTOR XMVector2Transform
6946
(
6947
    FXMVECTOR V, 
6948
    CXMMATRIX M
6949
)
6950
{
6951
#if defined(_XM_NO_INTRINSICS_)
6952
 
6953
    XMVECTOR X;
6954
    XMVECTOR Y;
6955
    XMVECTOR Result;
6956
 
6957
    Y = XMVectorSplatY(V);
6958
    X = XMVectorSplatX(V);
6959
 
6960
    Result = XMVectorMultiplyAdd(Y, M.r[1], M.r[3]);
6961
    Result = XMVectorMultiplyAdd(X, M.r[0], Result);
6962
 
6963
    return Result;
6964
 
6965
#elif defined(_XM_SSE_INTRINSICS_)
6966
    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0));
6967
    vResult = _mm_mul_ps(vResult,M.r[0]);
6968
    XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
6969
    vTemp = _mm_mul_ps(vTemp,M.r[1]);
6970
    vResult = _mm_add_ps(vResult,vTemp);
6971
    vResult = _mm_add_ps(vResult,M.r[3]);
6972
    return vResult;
6973
#else // _XM_VMX128_INTRINSICS_
6974
#endif // _XM_VMX128_INTRINSICS_
6975
}
6976
 
6977
//------------------------------------------------------------------------------
6978
 
6979
XMINLINE XMFLOAT4* XMVector2TransformStream
6980
(
6981
    XMFLOAT4*       pOutputStream, 
6982
    UINT            OutputStride, 
6983
    CONST XMFLOAT2* pInputStream, 
6984
    UINT            InputStride, 
6985
    UINT            VectorCount, 
6986
    CXMMATRIX        M
6987
)
6988
{
6989
#if defined(_XM_NO_INTRINSICS_)
6990
 
6991
    XMVECTOR V;
6992
    XMVECTOR X;
6993
    XMVECTOR Y;
6994
    XMVECTOR Result;
6995
    UINT     i;
6996
    BYTE*    pInputVector = (BYTE*)pInputStream;
6997
    BYTE*    pOutputVector = (BYTE*)pOutputStream;
6998
 
6999
    XMASSERT(pOutputStream);
7000
    XMASSERT(pInputStream);
7001
 
7002
    for (i = 0; i < VectorCount; i++)
7003
    {
7004
        V = XMLoadFloat2((XMFLOAT2*)pInputVector);
7005
        Y = XMVectorSplatY(V);
7006
        X = XMVectorSplatX(V);
7007
//        Y = XMVectorReplicate(((XMFLOAT2*)pInputVector)->y);
7008
//        X = XMVectorReplicate(((XMFLOAT2*)pInputVector)->x);
7009
 
7010
        Result = XMVectorMultiplyAdd(Y, M.r[1], M.r[3]);
7011
        Result = XMVectorMultiplyAdd(X, M.r[0], Result);
7012
 
7013
        XMStoreFloat4((XMFLOAT4*)pOutputVector, Result);
7014
 
7015
        pInputVector += InputStride; 
7016
        pOutputVector += OutputStride;
7017
    }
7018
 
7019
    return pOutputStream;
7020
 
7021
#elif defined(_XM_SSE_INTRINSICS_)
7022
	XMASSERT(pOutputStream);
7023
	XMASSERT(pInputStream);
7024
    UINT i;
7025
    const BYTE* pInputVector = (const BYTE*)pInputStream;
7026
    BYTE* pOutputVector = (BYTE*)pOutputStream;
7027
 
7028
    for (i = 0; i < VectorCount; i++)
7029
    {
7030
        XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2*>(pInputVector)->x);
7031
        XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2*>(pInputVector)->y);
7032
        vResult = _mm_mul_ps(vResult,M.r[1]);
7033
        vResult = _mm_add_ps(vResult,M.r[3]);
7034
        X = _mm_mul_ps(X,M.r[0]);
7035
        vResult = _mm_add_ps(vResult,X);
7036
        _mm_storeu_ps(reinterpret_cast<float*>(pOutputVector),vResult);
7037
        pInputVector += InputStride; 
7038
        pOutputVector += OutputStride;
7039
    }
7040
    return pOutputStream;
7041
#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
7042
#endif // _XM_VMX128_INTRINSICS_
7043
}
7044
 
7045
//------------------------------------------------------------------------------
7046
 
7047
XMINLINE XMFLOAT4* XMVector2TransformStreamNC
7048
(
7049
    XMFLOAT4*       pOutputStream, 
7050
    UINT            OutputStride, 
7051
    CONST XMFLOAT2* pInputStream, 
7052
    UINT            InputStride, 
7053
    UINT            VectorCount, 
7054
    CXMMATRIX     M
7055
)
7056
{
7057
#if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS) || defined(_XM_SSE_INTRINSICS_)
7058
	return XMVector2TransformStream( pOutputStream, OutputStride, pInputStream, InputStride, VectorCount, M );
7059
#else // _XM_VMX128_INTRINSICS_
7060
#endif // _XM_VMX128_INTRINSICS_
7061
}
7062
 
7063
//------------------------------------------------------------------------------
7064
 
7065
XMFINLINE XMVECTOR XMVector2TransformCoord
7066
(
7067
    FXMVECTOR V, 
7068
    CXMMATRIX M
7069
)
7070
{
7071
#if defined(_XM_NO_INTRINSICS_)
7072
 
7073
    XMVECTOR X;
7074
    XMVECTOR Y;
7075
    XMVECTOR InverseW;
7076
    XMVECTOR Result;
7077
 
7078
    Y = XMVectorSplatY(V);
7079
    X = XMVectorSplatX(V);
7080
 
7081
    Result = XMVectorMultiplyAdd(Y, M.r[1], M.r[3]);
7082
    Result = XMVectorMultiplyAdd(X, M.r[0], Result);
7083
 
7084
    InverseW = XMVectorSplatW(Result);
7085
    InverseW = XMVectorReciprocal(InverseW);
7086
 
7087
    Result = XMVectorMultiply(Result, InverseW);
7088
 
7089
    return Result;
7090
 
7091
#elif defined(_XM_SSE_INTRINSICS_)
7092
    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0));
7093
    vResult = _mm_mul_ps(vResult,M.r[0]);
7094
    XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
7095
    vTemp = _mm_mul_ps(vTemp,M.r[1]);
7096
    vResult = _mm_add_ps(vResult,vTemp);
7097
    vResult = _mm_add_ps(vResult,M.r[3]);
7098
    vTemp = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,3,3,3));
7099
    vResult = _mm_div_ps(vResult,vTemp);
7100
    return vResult;
7101
#else // _XM_VMX128_INTRINSICS_
7102
#endif // _XM_VMX128_INTRINSICS_
7103
}
7104
 
7105
//------------------------------------------------------------------------------
7106
 
7107
XMINLINE XMFLOAT2* XMVector2TransformCoordStream
7108
(
7109
    XMFLOAT2*       pOutputStream, 
7110
    UINT            OutputStride, 
7111
    CONST XMFLOAT2* pInputStream, 
7112
    UINT            InputStride, 
7113
    UINT            VectorCount, 
7114
    CXMMATRIX     M
7115
)
7116
{
7117
#if defined(_XM_NO_INTRINSICS_)
7118
 
7119
    XMVECTOR V;
7120
    XMVECTOR X;
7121
    XMVECTOR Y;
7122
    XMVECTOR InverseW;
7123
    XMVECTOR Result;
7124
    UINT     i;
7125
    BYTE*    pInputVector = (BYTE*)pInputStream;
7126
    BYTE*    pOutputVector = (BYTE*)pOutputStream;
7127
 
7128
    XMASSERT(pOutputStream);
7129
    XMASSERT(pInputStream);
7130
 
7131
    for (i = 0; i < VectorCount; i++)
7132
    {
7133
        V = XMLoadFloat2((XMFLOAT2*)pInputVector);
7134
        Y = XMVectorSplatY(V);
7135
        X = XMVectorSplatX(V);
7136
//        Y = XMVectorReplicate(((XMFLOAT2*)pInputVector)->y);
7137
//        X = XMVectorReplicate(((XMFLOAT2*)pInputVector)->x);
7138
 
7139
        Result = XMVectorMultiplyAdd(Y, M.r[1], M.r[3]);
7140
        Result = XMVectorMultiplyAdd(X, M.r[0], Result);
7141
 
7142
        InverseW = XMVectorSplatW(Result);
7143
        InverseW = XMVectorReciprocal(InverseW);
7144
 
7145
        Result = XMVectorMultiply(Result, InverseW);
7146
 
7147
        XMStoreFloat2((XMFLOAT2*)pOutputVector, Result);
7148
 
7149
        pInputVector += InputStride; 
7150
        pOutputVector += OutputStride;
7151
    }
7152
 
7153
    return pOutputStream;
7154
 
7155
#elif defined(_XM_SSE_INTRINSICS_)
7156
    XMASSERT(pOutputStream);
7157
    XMASSERT(pInputStream);
7158
    UINT i;
7159
    const BYTE *pInputVector = (BYTE*)pInputStream;
7160
    BYTE *pOutputVector = (BYTE*)pOutputStream;
7161
 
7162
    for (i = 0; i < VectorCount; i++)
7163
    {
7164
        XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2*>(pInputVector)->x);
7165
        XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2*>(pInputVector)->y);
7166
        vResult = _mm_mul_ps(vResult,M.r[1]);
7167
        vResult = _mm_add_ps(vResult,M.r[3]);
7168
        X = _mm_mul_ps(X,M.r[0]);
7169
        vResult = _mm_add_ps(vResult,X);
7170
        X = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,3,3,3));
7171
        vResult = _mm_div_ps(vResult,X);
7172
        _mm_store_sd(reinterpret_cast<double *>(pOutputVector),reinterpret_cast<__m128d *>(&vResult)[0]);
7173
        pInputVector += InputStride; 
7174
        pOutputVector += OutputStride;
7175
    }
7176
    return pOutputStream;
7177
#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
7178
#endif // _XM_VMX128_INTRINSICS_
7179
}
7180
 
7181
//------------------------------------------------------------------------------
7182
 
7183
XMFINLINE XMVECTOR XMVector2TransformNormal
7184
(
7185
    FXMVECTOR V, 
7186
    CXMMATRIX M
7187
)
7188
{
7189
#if defined(_XM_NO_INTRINSICS_)
7190
 
7191
    XMVECTOR X;
7192
    XMVECTOR Y;
7193
    XMVECTOR Result;
7194
 
7195
    Y = XMVectorSplatY(V);
7196
    X = XMVectorSplatX(V);
7197
 
7198
    Result = XMVectorMultiply(Y, M.r[1]);
7199
    Result = XMVectorMultiplyAdd(X, M.r[0], Result);
7200
 
7201
    return Result;
7202
 
7203
#elif defined(_XM_SSE_INTRINSICS_)
7204
    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0));
7205
    vResult = _mm_mul_ps(vResult,M.r[0]);
7206
    XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
7207
    vTemp = _mm_mul_ps(vTemp,M.r[1]);
7208
    vResult = _mm_add_ps(vResult,vTemp);
7209
    return vResult;
7210
#else // _XM_VMX128_INTRINSICS_
7211
#endif // _XM_VMX128_INTRINSICS_
7212
}
7213
 
7214
//------------------------------------------------------------------------------
7215
 
7216
XMINLINE XMFLOAT2* XMVector2TransformNormalStream
7217
(
7218
    XMFLOAT2*       pOutputStream, 
7219
    UINT            OutputStride, 
7220
    CONST XMFLOAT2* pInputStream, 
7221
    UINT            InputStride, 
7222
    UINT            VectorCount, 
7223
    CXMMATRIX        M
7224
)
7225
{
7226
#if defined(_XM_NO_INTRINSICS_)
7227
 
7228
    XMVECTOR V;
7229
    XMVECTOR X;
7230
    XMVECTOR Y;
7231
    XMVECTOR Result;
7232
    UINT     i;
7233
    BYTE*    pInputVector = (BYTE*)pInputStream;
7234
    BYTE*    pOutputVector = (BYTE*)pOutputStream;
7235
 
7236
    XMASSERT(pOutputStream);
7237
    XMASSERT(pInputStream);
7238
 
7239
    for (i = 0; i < VectorCount; i++)
7240
    {
7241
        V = XMLoadFloat2((XMFLOAT2*)pInputVector);
7242
        Y = XMVectorSplatY(V);
7243
        X = XMVectorSplatX(V);
7244
//        Y = XMVectorReplicate(((XMFLOAT2*)pInputVector)->y);
7245
//        X = XMVectorReplicate(((XMFLOAT2*)pInputVector)->x);
7246
 
7247
        Result = XMVectorMultiply(Y, M.r[1]);
7248
        Result = XMVectorMultiplyAdd(X, M.r[0], Result);
7249
 
7250
        XMStoreFloat2((XMFLOAT2*)pOutputVector, Result);
7251
 
7252
        pInputVector += InputStride; 
7253
        pOutputVector += OutputStride;
7254
    }
7255
 
7256
    return pOutputStream;
7257
 
7258
#elif defined(_XM_SSE_INTRINSICS_)
7259
    XMASSERT(pOutputStream);
7260
    XMASSERT(pInputStream);
7261
    UINT i;
7262
    const BYTE*pInputVector = (const BYTE*)pInputStream;
7263
    BYTE *pOutputVector = (BYTE*)pOutputStream;
7264
    for (i = 0; i < VectorCount; i++)
7265
    {
7266
        XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2 *>(pInputVector)->x);
7267
        XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2 *>(pInputVector)->y);
7268
        vResult = _mm_mul_ps(vResult,M.r[1]);
7269
        X = _mm_mul_ps(X,M.r[0]);
7270
        vResult = _mm_add_ps(vResult,X);
7271
        _mm_store_sd(reinterpret_cast<double*>(pOutputVector),reinterpret_cast<const __m128d *>(&vResult)[0]);
7272
 
7273
        pInputVector += InputStride; 
7274
        pOutputVector += OutputStride;
7275
    }
7276
 
7277
    return pOutputStream;
7278
#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
7279
#endif // _XM_VMX128_INTRINSICS_
7280
}
7281
 
7282
/****************************************************************************
7283
 *
7284
 * 3D Vector
7285
 *
7286
 ****************************************************************************/
7287
 
7288
//------------------------------------------------------------------------------
7289
// Comparison operations
7290
//------------------------------------------------------------------------------
7291
 
7292
//------------------------------------------------------------------------------
7293
 
7294
XMFINLINE BOOL XMVector3Equal
7295
(
7296
    FXMVECTOR V1, 
7297
    FXMVECTOR V2
7298
)
7299
{
7300
#if defined(_XM_NO_INTRINSICS_)
7301
    return (((V1.vector4_f32[0] == V2.vector4_f32[0]) && (V1.vector4_f32[1] == V2.vector4_f32[1]) && (V1.vector4_f32[2] == V2.vector4_f32[2])) != 0);
7302
#elif defined(_XM_SSE_INTRINSICS_)
7303
    XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
7304
    return (((_mm_movemask_ps(vTemp)&7)==7) != 0);
7305
#else // _XM_VMX128_INTRINSICS_
7306
    return XMComparisonAllTrue(XMVector3EqualR(V1, V2));
7307
#endif
7308
}
7309
 
7310
//------------------------------------------------------------------------------
7311
 
7312
XMFINLINE UINT XMVector3EqualR
7313
(
7314
    FXMVECTOR V1, 
7315
    FXMVECTOR V2
7316
)
7317
{
7318
#if defined(_XM_NO_INTRINSICS_)
7319
    UINT CR = 0;
7320
    if ((V1.vector4_f32[0] == V2.vector4_f32[0]) && 
7321
        (V1.vector4_f32[1] == V2.vector4_f32[1]) &&
7322
        (V1.vector4_f32[2] == V2.vector4_f32[2]))
7323
    {
7324
        CR = XM_CRMASK_CR6TRUE;
7325
    }
7326
    else if ((V1.vector4_f32[0] != V2.vector4_f32[0]) && 
7327
        (V1.vector4_f32[1] != V2.vector4_f32[1]) &&
7328
        (V1.vector4_f32[2] != V2.vector4_f32[2]))
7329
    {
7330
        CR = XM_CRMASK_CR6FALSE;
7331
    }
7332
    return CR;
7333
#elif defined(_XM_SSE_INTRINSICS_)
7334
    XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
7335
    int iTest = _mm_movemask_ps(vTemp)&7;
7336
    UINT CR = 0;
7337
    if (iTest==7)
7338
    {
7339
        CR = XM_CRMASK_CR6TRUE;
7340
    }
7341
    else if (!iTest)
7342
    {
7343
        CR = XM_CRMASK_CR6FALSE;
7344
    }
7345
    return CR;
7346
#else // _XM_VMX128_INTRINSICS_
7347
#endif // _XM_VMX128_INTRINSICS_
7348
}
7349
 
7350
//------------------------------------------------------------------------------
7351
 
7352
XMFINLINE BOOL XMVector3EqualInt
7353
(
7354
    FXMVECTOR V1, 
7355
    FXMVECTOR V2
7356
)
7357
{
7358
#if defined(_XM_NO_INTRINSICS_)
7359
    return (((V1.vector4_u32[0] == V2.vector4_u32[0]) && (V1.vector4_u32[1] == V2.vector4_u32[1]) && (V1.vector4_u32[2] == V2.vector4_u32[2])) != 0);
7360
#elif defined(_XM_SSE_INTRINSICS_)
7361
    __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]);
7362
    return (((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&7)==7) != 0);
7363
#else // _XM_VMX128_INTRINSICS_
7364
    return XMComparisonAllTrue(XMVector3EqualIntR(V1, V2));
7365
#endif
7366
}
7367
 
7368
//------------------------------------------------------------------------------
7369
 
7370
XMFINLINE UINT XMVector3EqualIntR
7371
(
7372
    FXMVECTOR V1, 
7373
    FXMVECTOR V2
7374
)
7375
{
7376
#if defined(_XM_NO_INTRINSICS_)
7377
    UINT CR = 0;
7378
    if ((V1.vector4_u32[0] == V2.vector4_u32[0]) && 
7379
        (V1.vector4_u32[1] == V2.vector4_u32[1]) &&
7380
        (V1.vector4_u32[2] == V2.vector4_u32[2]))
7381
    {
7382
        CR = XM_CRMASK_CR6TRUE;
7383
    }
7384
    else if ((V1.vector4_u32[0] != V2.vector4_u32[0]) && 
7385
        (V1.vector4_u32[1] != V2.vector4_u32[1]) &&
7386
        (V1.vector4_u32[2] != V2.vector4_u32[2]))
7387
    {
7388
        CR = XM_CRMASK_CR6FALSE;
7389
    }
7390
    return CR;
7391
#elif defined(_XM_SSE_INTRINSICS_)
7392
    __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]);
7393
    int iTemp = _mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&7;
7394
    UINT CR = 0;
7395
    if (iTemp==7)
7396
    {
7397
        CR = XM_CRMASK_CR6TRUE;
7398
    }
7399
    else if (!iTemp)
7400
    {
7401
        CR = XM_CRMASK_CR6FALSE;
7402
    }
7403
    return CR;
7404
#else // _XM_VMX128_INTRINSICS_
7405
#endif // _XM_VMX128_INTRINSICS_
7406
}
7407
 
7408
//------------------------------------------------------------------------------
7409
 
7410
XMFINLINE BOOL XMVector3NearEqual
7411
(
7412
    FXMVECTOR V1, 
7413
    FXMVECTOR V2, 
7414
    FXMVECTOR Epsilon
7415
)
7416
{
7417
#if defined(_XM_NO_INTRINSICS_)
7418
    FLOAT dx, dy, dz;
7419
 
7420
    dx = fabsf(V1.vector4_f32[0]-V2.vector4_f32[0]);
7421
    dy = fabsf(V1.vector4_f32[1]-V2.vector4_f32[1]);
7422
    dz = fabsf(V1.vector4_f32[2]-V2.vector4_f32[2]);
7423
    return (((dx <= Epsilon.vector4_f32[0]) &&
7424
            (dy <= Epsilon.vector4_f32[1]) &&
7425
            (dz <= Epsilon.vector4_f32[2])) != 0);
7426
#elif defined(_XM_SSE_INTRINSICS_)
7427
    // Get the difference
7428
    XMVECTOR vDelta = _mm_sub_ps(V1,V2);
7429
    // Get the absolute value of the difference
7430
    XMVECTOR vTemp = _mm_setzero_ps();
7431
    vTemp = _mm_sub_ps(vTemp,vDelta);
7432
    vTemp = _mm_max_ps(vTemp,vDelta);
7433
    vTemp = _mm_cmple_ps(vTemp,Epsilon);
7434
    // w is don't care
7435
    return (((_mm_movemask_ps(vTemp)&7)==0x7) != 0);
7436
#else // _XM_VMX128_INTRINSICS_
7437
#endif // _XM_VMX128_INTRINSICS_
7438
}
7439
 
7440
//------------------------------------------------------------------------------
7441
 
7442
XMFINLINE BOOL XMVector3NotEqual
7443
(
7444
    FXMVECTOR V1, 
7445
    FXMVECTOR V2
7446
)
7447
{
7448
#if defined(_XM_NO_INTRINSICS_)
7449
    return (((V1.vector4_f32[0] != V2.vector4_f32[0]) || (V1.vector4_f32[1] != V2.vector4_f32[1]) || (V1.vector4_f32[2] != V2.vector4_f32[2])) != 0);
7450
#elif defined(_XM_SSE_INTRINSICS_)
7451
    XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
7452
    return (((_mm_movemask_ps(vTemp)&7)!=7) != 0);
7453
#else // _XM_VMX128_INTRINSICS_
7454
    return XMComparisonAnyFalse(XMVector3EqualR(V1, V2));
7455
#endif
7456
}
7457
 
7458
//------------------------------------------------------------------------------
7459
 
7460
XMFINLINE BOOL XMVector3NotEqualInt
7461
(
7462
    FXMVECTOR V1, 
7463
    FXMVECTOR V2
7464
)
7465
{
7466
#if defined(_XM_NO_INTRINSICS_)
7467
    return (((V1.vector4_u32[0] != V2.vector4_u32[0]) || (V1.vector4_u32[1] != V2.vector4_u32[1]) || (V1.vector4_u32[2] != V2.vector4_u32[2])) != 0);
7468
#elif defined(_XM_SSE_INTRINSICS_)
7469
    __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]);
7470
    return (((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&7)!=7) != 0);
7471
#else // _XM_VMX128_INTRINSICS_
7472
    return XMComparisonAnyFalse(XMVector3EqualIntR(V1, V2));
7473
#endif
7474
}
7475
 
7476
//------------------------------------------------------------------------------
7477
 
7478
XMFINLINE BOOL XMVector3Greater
7479
(
7480
    FXMVECTOR V1, 
7481
    FXMVECTOR V2
7482
)
7483
{
7484
#if defined(_XM_NO_INTRINSICS_)
7485
    return (((V1.vector4_f32[0] > V2.vector4_f32[0]) && (V1.vector4_f32[1] > V2.vector4_f32[1]) && (V1.vector4_f32[2] > V2.vector4_f32[2])) != 0);
7486
#elif defined(_XM_SSE_INTRINSICS_)
7487
    XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
7488
    return (((_mm_movemask_ps(vTemp)&7)==7) != 0);
7489
#else // _XM_VMX128_INTRINSICS_
7490
    return XMComparisonAllTrue(XMVector3GreaterR(V1, V2));
7491
#endif
7492
}
7493
 
7494
//------------------------------------------------------------------------------
7495
 
7496
XMFINLINE UINT XMVector3GreaterR
7497
(
7498
    FXMVECTOR V1, 
7499
    FXMVECTOR V2
7500
)
7501
{
7502
#if defined(_XM_NO_INTRINSICS_)
7503
    UINT CR = 0;
7504
    if ((V1.vector4_f32[0] > V2.vector4_f32[0]) && 
7505
        (V1.vector4_f32[1] > V2.vector4_f32[1]) &&
7506
        (V1.vector4_f32[2] > V2.vector4_f32[2]))
7507
    {
7508
        CR = XM_CRMASK_CR6TRUE;
7509
    }
7510
    else if ((V1.vector4_f32[0] <= V2.vector4_f32[0]) && 
7511
        (V1.vector4_f32[1] <= V2.vector4_f32[1]) &&
7512
        (V1.vector4_f32[2] <= V2.vector4_f32[2]))
7513
    {
7514
        CR = XM_CRMASK_CR6FALSE;
7515
    }
7516
    return CR;
7517
 
7518
#elif defined(_XM_SSE_INTRINSICS_)
7519
    XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
7520
    UINT CR = 0;
7521
    int iTest = _mm_movemask_ps(vTemp)&7;
7522
    if (iTest==7) 
7523
    {
7524
        CR =  XM_CRMASK_CR6TRUE;
7525
    }
7526
    else if (!iTest)
7527
    {
7528
        CR = XM_CRMASK_CR6FALSE;
7529
    }
7530
    return CR;
7531
#else // _XM_VMX128_INTRINSICS_
7532
#endif // _XM_VMX128_INTRINSICS_
7533
}
7534
 
7535
//------------------------------------------------------------------------------
7536
 
7537
XMFINLINE BOOL XMVector3GreaterOrEqual
7538
(
7539
    FXMVECTOR V1, 
7540
    FXMVECTOR V2
7541
)
7542
{
7543
#if defined(_XM_NO_INTRINSICS_)
7544
    return (((V1.vector4_f32[0] >= V2.vector4_f32[0]) && (V1.vector4_f32[1] >= V2.vector4_f32[1]) && (V1.vector4_f32[2] >= V2.vector4_f32[2])) != 0);
7545
#elif defined(_XM_SSE_INTRINSICS_)
7546
    XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
7547
    return (((_mm_movemask_ps(vTemp)&7)==7) != 0);
7548
#else // _XM_VMX128_INTRINSICS_
7549
    return XMComparisonAllTrue(XMVector3GreaterOrEqualR(V1, V2));
7550
#endif
7551
}
7552
 
7553
//------------------------------------------------------------------------------
7554
 
7555
XMFINLINE UINT XMVector3GreaterOrEqualR
7556
(
7557
    FXMVECTOR V1, 
7558
    FXMVECTOR V2
7559
)
7560
{
7561
#if defined(_XM_NO_INTRINSICS_)
7562
 
7563
    UINT CR = 0;
7564
    if ((V1.vector4_f32[0] >= V2.vector4_f32[0]) && 
7565
        (V1.vector4_f32[1] >= V2.vector4_f32[1]) &&
7566
        (V1.vector4_f32[2] >= V2.vector4_f32[2]))
7567
    {
7568
        CR = XM_CRMASK_CR6TRUE;
7569
    }
7570
    else if ((V1.vector4_f32[0] < V2.vector4_f32[0]) && 
7571
        (V1.vector4_f32[1] < V2.vector4_f32[1]) &&
7572
        (V1.vector4_f32[2] < V2.vector4_f32[2]))
7573
    {
7574
        CR = XM_CRMASK_CR6FALSE;
7575
    }
7576
    return CR;
7577
 
7578
#elif defined(_XM_SSE_INTRINSICS_)
7579
    XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
7580
    UINT CR = 0;
7581
    int iTest = _mm_movemask_ps(vTemp)&7;
7582
    if (iTest==7) 
7583
    {
7584
        CR =  XM_CRMASK_CR6TRUE;
7585
    }
7586
    else if (!iTest)
7587
    {
7588
        CR = XM_CRMASK_CR6FALSE;
7589
    }
7590
    return CR;
7591
#else // _XM_VMX128_INTRINSICS_
7592
#endif // _XM_VMX128_INTRINSICS_
7593
}
7594
 
7595
//------------------------------------------------------------------------------
7596
 
7597
XMFINLINE BOOL XMVector3Less
7598
(
7599
    FXMVECTOR V1, 
7600
    FXMVECTOR V2
7601
)
7602
{
7603
#if defined(_XM_NO_INTRINSICS_)
7604
    return (((V1.vector4_f32[0] < V2.vector4_f32[0]) && (V1.vector4_f32[1] < V2.vector4_f32[1]) && (V1.vector4_f32[2] < V2.vector4_f32[2])) != 0);
7605
#elif defined(_XM_SSE_INTRINSICS_)
7606
    XMVECTOR vTemp = _mm_cmplt_ps(V1,V2);
7607
    return (((_mm_movemask_ps(vTemp)&7)==7) != 0);
7608
#else // _XM_VMX128_INTRINSICS_
7609
    return XMComparisonAllTrue(XMVector3GreaterR(V2, V1));
7610
#endif
7611
}
7612
 
7613
//------------------------------------------------------------------------------
7614
 
7615
XMFINLINE BOOL XMVector3LessOrEqual
7616
(
7617
    FXMVECTOR V1, 
7618
    FXMVECTOR V2
7619
)
7620
{
7621
#if defined(_XM_NO_INTRINSICS_)
7622
    return (((V1.vector4_f32[0] <= V2.vector4_f32[0]) && (V1.vector4_f32[1] <= V2.vector4_f32[1]) && (V1.vector4_f32[2] <= V2.vector4_f32[2])) != 0);
7623
#elif defined(_XM_SSE_INTRINSICS_)
7624
    XMVECTOR vTemp = _mm_cmple_ps(V1,V2);
7625
    return (((_mm_movemask_ps(vTemp)&7)==7) != 0);
7626
#else // _XM_VMX128_INTRINSICS_
7627
    return XMComparisonAllTrue(XMVector3GreaterOrEqualR(V2, V1));
7628
#endif
7629
}
7630
 
7631
//------------------------------------------------------------------------------
7632
 
7633
XMFINLINE BOOL XMVector3InBounds
7634
(
7635
    FXMVECTOR V, 
7636
    FXMVECTOR Bounds
7637
)
7638
{
7639
#if defined(_XM_NO_INTRINSICS_)
7640
    return (((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) && 
7641
        (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) &&
7642
        (V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2])) != 0);
7643
#elif defined(_XM_SSE_INTRINSICS_)
7644
    // Test if less than or equal
7645
    XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
7646
    // Negate the bounds
7647
    XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
7648
    // Test if greater or equal (Reversed)
7649
    vTemp2 = _mm_cmple_ps(vTemp2,V);
7650
    // Blend answers
7651
    vTemp1 = _mm_and_ps(vTemp1,vTemp2);
7652
    // x,y and z in bounds? (w is don't care)
7653
    return (((_mm_movemask_ps(vTemp1)&0x7)==0x7) != 0);
7654
#else
7655
    return XMComparisonAllInBounds(XMVector3InBoundsR(V, Bounds));
7656
#endif
7657
}
7658
 
7659
//------------------------------------------------------------------------------
7660
 
7661
XMFINLINE UINT XMVector3InBoundsR
7662
(
7663
    FXMVECTOR V, 
7664
    FXMVECTOR Bounds
7665
)
7666
{
7667
#if defined(_XM_NO_INTRINSICS_)
7668
    UINT CR = 0;
7669
    if ((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) && 
7670
        (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) &&
7671
        (V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2]))
7672
    {
7673
        CR = XM_CRMASK_CR6BOUNDS;
7674
    }
7675
    return CR;
7676
 
7677
#elif defined(_XM_SSE_INTRINSICS_)
7678
    // Test if less than or equal
7679
    XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
7680
    // Negate the bounds
7681
    XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
7682
    // Test if greater or equal (Reversed)
7683
    vTemp2 = _mm_cmple_ps(vTemp2,V);
7684
    // Blend answers
7685
    vTemp1 = _mm_and_ps(vTemp1,vTemp2);
7686
    // x,y and z in bounds? (w is don't care)
7687
    return ((_mm_movemask_ps(vTemp1)&0x7)==0x7) ? XM_CRMASK_CR6BOUNDS : 0;
7688
#else // _XM_VMX128_INTRINSICS_
7689
#endif // _XM_VMX128_INTRINSICS_
7690
}
7691
 
7692
//------------------------------------------------------------------------------
7693
 
7694
XMFINLINE BOOL XMVector3IsNaN
7695
(
7696
    FXMVECTOR V
7697
)
7698
{
7699
#if defined(_XM_NO_INTRINSICS_)
7700
 
7701
    return (XMISNAN(V.vector4_f32[0]) ||
7702
            XMISNAN(V.vector4_f32[1]) ||
7703
            XMISNAN(V.vector4_f32[2]));
7704
 
7705
#elif defined(_XM_SSE_INTRINSICS_)
7706
    // Mask off the exponent
7707
    __m128i vTempInf = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMInfinity);
7708
    // Mask off the mantissa
7709
    __m128i vTempNan = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMQNaNTest);
7710
    // Are any of the exponents == 0x7F800000?
7711
    vTempInf = _mm_cmpeq_epi32(vTempInf,g_XMInfinity);
7712
    // Are any of the mantissa's zero? (SSE2 doesn't have a neq test)
7713
    vTempNan = _mm_cmpeq_epi32(vTempNan,g_XMZero);
7714
    // Perform a not on the NaN test to be true on NON-zero mantissas
7715
    vTempNan = _mm_andnot_si128(vTempNan,vTempInf);
7716
    // If x, y or z are NaN, the signs are true after the merge above
7717
    return ((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTempNan)[0])&7) != 0);
7718
#else // _XM_VMX128_INTRINSICS_
7719
#endif // _XM_VMX128_INTRINSICS_
7720
}
7721
 
7722
//------------------------------------------------------------------------------
7723
 
7724
XMFINLINE BOOL XMVector3IsInfinite
7725
(
7726
    FXMVECTOR V
7727
)
7728
{
7729
#if defined(_XM_NO_INTRINSICS_)
7730
    return (XMISINF(V.vector4_f32[0]) ||
7731
            XMISINF(V.vector4_f32[1]) ||
7732
            XMISINF(V.vector4_f32[2]));
7733
#elif defined(_XM_SSE_INTRINSICS_)
7734
    // Mask off the sign bit
7735
    __m128 vTemp = _mm_and_ps(V,g_XMAbsMask);
7736
    // Compare to infinity
7737
    vTemp = _mm_cmpeq_ps(vTemp,g_XMInfinity);
7738
    // If x,y or z are infinity, the signs are true.
7739
    return ((_mm_movemask_ps(vTemp)&7) != 0);
7740
#else // _XM_VMX128_INTRINSICS_
7741
#endif // _XM_VMX128_INTRINSICS_
7742
}
7743
 
7744
//------------------------------------------------------------------------------
7745
// Computation operations
7746
//------------------------------------------------------------------------------
7747
 
7748
//------------------------------------------------------------------------------
7749
 
7750
XMFINLINE XMVECTOR XMVector3Dot
7751
(
7752
    FXMVECTOR V1, 
7753
    FXMVECTOR V2
7754
)
7755
{
7756
#if defined(_XM_NO_INTRINSICS_)
7757
    FLOAT fValue = V1.vector4_f32[0] * V2.vector4_f32[0] + V1.vector4_f32[1] * V2.vector4_f32[1] + V1.vector4_f32[2] * V2.vector4_f32[2];
7758
    XMVECTOR vResult = {
7759
        fValue,
7760
        fValue,
7761
        fValue,
7762
        fValue
7763
    };            
7764
    return vResult;
7765
 
7766
#elif defined(_XM_SSE_INTRINSICS_)
7767
    // Perform the dot product
7768
    XMVECTOR vDot = _mm_mul_ps(V1,V2);
7769
    // x=Dot.vector4_f32[1], y=Dot.vector4_f32[2]
7770
    XMVECTOR vTemp = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(2,1,2,1));
7771
    // Result.vector4_f32[0] = x+y
7772
    vDot = _mm_add_ss(vDot,vTemp);
7773
    // x=Dot.vector4_f32[2]
7774
    vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
7775
    // Result.vector4_f32[0] = (x+y)+z
7776
    vDot = _mm_add_ss(vDot,vTemp);
7777
    // Splat x
7778
	return _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(0,0,0,0));
7779
#else // _XM_VMX128_INTRINSICS_
7780
#endif // _XM_VMX128_INTRINSICS_
7781
}
7782
 
7783
//------------------------------------------------------------------------------
7784
 
7785
XMFINLINE XMVECTOR XMVector3Cross
7786
(
7787
    FXMVECTOR V1, 
7788
    FXMVECTOR V2
7789
)
7790
{
7791
#if defined(_XM_NO_INTRINSICS_)
7792
    XMVECTOR vResult = {
7793
        (V1.vector4_f32[1] * V2.vector4_f32[2]) - (V1.vector4_f32[2] * V2.vector4_f32[1]),
7794
        (V1.vector4_f32[2] * V2.vector4_f32[0]) - (V1.vector4_f32[0] * V2.vector4_f32[2]),
7795
        (V1.vector4_f32[0] * V2.vector4_f32[1]) - (V1.vector4_f32[1] * V2.vector4_f32[0]),
7796
        0.0f
7797
    };
7798
    return vResult;
7799
 
7800
#elif defined(_XM_SSE_INTRINSICS_)
7801
    // y1,z1,x1,w1
7802
    XMVECTOR vTemp1 = _mm_shuffle_ps(V1,V1,_MM_SHUFFLE(3,0,2,1));
7803
    // z2,x2,y2,w2
7804
    XMVECTOR vTemp2 = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(3,1,0,2));
7805
    // Perform the left operation
7806
    XMVECTOR vResult = _mm_mul_ps(vTemp1,vTemp2);
7807
    // z1,x1,y1,w1
7808
    vTemp1 = _mm_shuffle_ps(vTemp1,vTemp1,_MM_SHUFFLE(3,0,2,1));
7809
    // y2,z2,x2,w2
7810
    vTemp2 = _mm_shuffle_ps(vTemp2,vTemp2,_MM_SHUFFLE(3,1,0,2));
7811
    // Perform the right operation
7812
    vTemp1 = _mm_mul_ps(vTemp1,vTemp2);
7813
    // Subract the right from left, and return answer
7814
    vResult = _mm_sub_ps(vResult,vTemp1);
7815
    // Set w to zero
7816
    return _mm_and_ps(vResult,g_XMMask3);
7817
#else // _XM_VMX128_INTRINSICS_
7818
#endif // _XM_VMX128_INTRINSICS_
7819
}
7820
 
7821
//------------------------------------------------------------------------------
7822
 
7823
XMFINLINE XMVECTOR XMVector3LengthSq
7824
(
7825
    FXMVECTOR V
7826
)
7827
{
7828
    return XMVector3Dot(V, V);
7829
}
7830
 
7831
//------------------------------------------------------------------------------
7832
 
7833
XMFINLINE XMVECTOR XMVector3ReciprocalLengthEst
7834
(
7835
    FXMVECTOR V
7836
)
7837
{
7838
#if defined(_XM_NO_INTRINSICS_)
7839
 
7840
    XMVECTOR Result;
7841
 
7842
    Result = XMVector3LengthSq(V);
7843
    Result = XMVectorReciprocalSqrtEst(Result);
7844
 
7845
    return Result;
7846
 
7847
#elif defined(_XM_SSE_INTRINSICS_)
7848
    // Perform the dot product on x,y and z
7849
    XMVECTOR vLengthSq = _mm_mul_ps(V,V);
7850
    // vTemp has z and y
7851
    XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,2,1,2));
7852
    // x+z, y
7853
    vLengthSq = _mm_add_ss(vLengthSq,vTemp);
7854
    // y,y,y,y
7855
    vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
7856
    // x+z+y,??,??,??
7857
    vLengthSq = _mm_add_ss(vLengthSq,vTemp);
7858
    // Splat the length squared
7859
	vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
7860
    // Get the reciprocal
7861
    vLengthSq = _mm_rsqrt_ps(vLengthSq);
7862
    return vLengthSq;
7863
#else // _XM_VMX128_INTRINSICS_
7864
#endif // _XM_VMX128_INTRINSICS_
7865
}
7866
 
7867
//------------------------------------------------------------------------------
7868
 
7869
XMFINLINE XMVECTOR XMVector3ReciprocalLength
7870
(
7871
    FXMVECTOR V
7872
)
7873
{
7874
#if defined(_XM_NO_INTRINSICS_)
7875
 
7876
    XMVECTOR Result;
7877
 
7878
    Result = XMVector3LengthSq(V);
7879
    Result = XMVectorReciprocalSqrt(Result);
7880
 
7881
    return Result;
7882
 
7883
#elif defined(_XM_SSE_INTRINSICS_)
7884
     // Perform the dot product
7885
    XMVECTOR vDot = _mm_mul_ps(V,V);
7886
    // x=Dot.y, y=Dot.z
7887
    XMVECTOR vTemp = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(2,1,2,1));
7888
    // Result.x = x+y
7889
    vDot = _mm_add_ss(vDot,vTemp);
7890
    // x=Dot.z
7891
    vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
7892
    // Result.x = (x+y)+z
7893
    vDot = _mm_add_ss(vDot,vTemp);
7894
    // Splat x
7895
	vDot = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(0,0,0,0));
7896
    // Get the reciprocal
7897
    vDot = _mm_sqrt_ps(vDot);
7898
    // Get the reciprocal
7899
    vDot = _mm_div_ps(g_XMOne,vDot);
7900
    return vDot;
7901
#else // _XM_VMX128_INTRINSICS_
7902
#endif // _XM_VMX128_INTRINSICS_
7903
}
7904
 
7905
//------------------------------------------------------------------------------
7906
 
7907
XMFINLINE XMVECTOR XMVector3LengthEst
7908
(
7909
    FXMVECTOR V
7910
)
7911
{
7912
#if defined(_XM_NO_INTRINSICS_)
7913
 
7914
    XMVECTOR Result;
7915
 
7916
    Result = XMVector3LengthSq(V);
7917
    Result = XMVectorSqrtEst(Result);
7918
 
7919
    return Result;
7920
 
7921
#elif defined(_XM_SSE_INTRINSICS_)
7922
    // Perform the dot product on x,y and z
7923
    XMVECTOR vLengthSq = _mm_mul_ps(V,V);
7924
    // vTemp has z and y
7925
    XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,2,1,2));
7926
    // x+z, y
7927
    vLengthSq = _mm_add_ss(vLengthSq,vTemp);
7928
    // y,y,y,y
7929
    vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
7930
    // x+z+y,??,??,??
7931
    vLengthSq = _mm_add_ss(vLengthSq,vTemp);
7932
    // Splat the length squared
7933
	vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
7934
    // Get the length
7935
    vLengthSq = _mm_sqrt_ps(vLengthSq);
7936
    return vLengthSq;
7937
#else // _XM_VMX128_INTRINSICS_
7938
#endif // _XM_VMX128_INTRINSICS_
7939
}
7940
 
7941
//------------------------------------------------------------------------------
7942
 
7943
XMFINLINE XMVECTOR XMVector3Length
7944
(
7945
    FXMVECTOR V
7946
)
7947
{
7948
#if defined(_XM_NO_INTRINSICS_)
7949
 
7950
    XMVECTOR Result;
7951
 
7952
    Result = XMVector3LengthSq(V);
7953
    Result = XMVectorSqrt(Result);
7954
 
7955
    return Result;
7956
 
7957
#elif defined(_XM_SSE_INTRINSICS_)
7958
    // Perform the dot product on x,y and z
7959
    XMVECTOR vLengthSq = _mm_mul_ps(V,V);
7960
    // vTemp has z and y
7961
    XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,2,1,2));
7962
    // x+z, y
7963
    vLengthSq = _mm_add_ss(vLengthSq,vTemp);
7964
    // y,y,y,y
7965
    vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
7966
    // x+z+y,??,??,??
7967
    vLengthSq = _mm_add_ss(vLengthSq,vTemp);
7968
    // Splat the length squared
7969
	vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
7970
    // Get the length
7971
    vLengthSq = _mm_sqrt_ps(vLengthSq);
7972
    return vLengthSq;
7973
#else // _XM_VMX128_INTRINSICS_
7974
#endif // _XM_VMX128_INTRINSICS_
7975
}
7976
 
7977
//------------------------------------------------------------------------------
7978
// XMVector3NormalizeEst uses a reciprocal estimate and
7979
// returns QNaN on zero and infinite vectors.
7980
 
7981
XMFINLINE XMVECTOR XMVector3NormalizeEst
7982
(
7983
    FXMVECTOR V
7984
)
7985
{
7986
#if defined(_XM_NO_INTRINSICS_)
7987
 
7988
    XMVECTOR Result;
7989
    Result = XMVector3ReciprocalLength(V);
7990
    Result = XMVectorMultiply(V, Result);
7991
    return Result;
7992
 
7993
#elif defined(_XM_SSE_INTRINSICS_)
7994
     // Perform the dot product
7995
    XMVECTOR vDot = _mm_mul_ps(V,V);
7996
    // x=Dot.y, y=Dot.z
7997
    XMVECTOR vTemp = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(2,1,2,1));
7998
    // Result.x = x+y
7999
    vDot = _mm_add_ss(vDot,vTemp);
8000
    // x=Dot.z
8001
    vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
8002
    // Result.x = (x+y)+z
8003
    vDot = _mm_add_ss(vDot,vTemp);
8004
    // Splat x
8005
	vDot = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(0,0,0,0));
8006
    // Get the reciprocal
8007
    vDot = _mm_rsqrt_ps(vDot);
8008
    // Perform the normalization
8009
    vDot = _mm_mul_ps(vDot,V);
8010
    return vDot;
8011
#else // _XM_VMX128_INTRINSICS_
8012
#endif // _XM_VMX128_INTRINSICS_
8013
}
8014
 
8015
//------------------------------------------------------------------------------
8016
 
8017
XMFINLINE XMVECTOR XMVector3Normalize
8018
(
8019
    FXMVECTOR V
8020
)
8021
{
8022
#if defined(_XM_NO_INTRINSICS_)
8023
    FLOAT fLengthSq;
8024
    XMVECTOR vResult;
8025
 
8026
    fLengthSq = sqrtf((V.vector4_f32[0]*V.vector4_f32[0])+(V.vector4_f32[1]*V.vector4_f32[1])+(V.vector4_f32[2]*V.vector4_f32[2]));
8027
    // Prevent divide by zero
8028
    if (fLengthSq) {
8029
        fLengthSq = 1.0f/fLengthSq;
8030
    }
8031
 
8032
    vResult.vector4_f32[0] = V.vector4_f32[0]*fLengthSq;
8033
    vResult.vector4_f32[1] = V.vector4_f32[1]*fLengthSq;
8034
    vResult.vector4_f32[2] = V.vector4_f32[2]*fLengthSq;
8035
    vResult.vector4_f32[3] = V.vector4_f32[3]*fLengthSq;
8036
    return vResult;
8037
 
8038
#elif defined(_XM_SSE_INTRINSICS_)
8039
    // Perform the dot product on x,y and z only
8040
    XMVECTOR vLengthSq = _mm_mul_ps(V,V);
8041
    XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,1,2,1));
8042
    vLengthSq = _mm_add_ss(vLengthSq,vTemp);
8043
    vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
8044
    vLengthSq = _mm_add_ss(vLengthSq,vTemp);
8045
	vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
8046
    // Prepare for the division
8047
    XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
8048
    // Failsafe on zero (Or epsilon) length planes
8049
    // If the length is infinity, set the elements to zero
8050
    vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
8051
    // Divide to perform the normalization
8052
    vResult = _mm_div_ps(V,vResult);
8053
    // Any that are infinity, set to zero
8054
    vResult = _mm_and_ps(vResult,vLengthSq);
8055
    return vResult;
8056
#else // _XM_VMX128_INTRINSICS_
8057
#endif // _XM_VMX128_INTRINSICS_
8058
}
8059
 
8060
//------------------------------------------------------------------------------
8061
 
8062
XMFINLINE XMVECTOR XMVector3ClampLength
8063
(
8064
    FXMVECTOR V, 
8065
    FLOAT    LengthMin, 
8066
    FLOAT    LengthMax
8067
)
8068
{
8069
#if defined(_XM_NO_INTRINSICS_)
8070
 
8071
    XMVECTOR ClampMax;
8072
    XMVECTOR ClampMin;
8073
 
8074
    ClampMax = XMVectorReplicate(LengthMax);
8075
    ClampMin = XMVectorReplicate(LengthMin);
8076
 
8077
    return XMVector3ClampLengthV(V, ClampMin, ClampMax);
8078
 
8079
#elif defined(_XM_SSE_INTRINSICS_)
8080
    XMVECTOR ClampMax = _mm_set_ps1(LengthMax);
8081
    XMVECTOR ClampMin = _mm_set_ps1(LengthMin);
8082
    return XMVector3ClampLengthV(V,ClampMin,ClampMax);
8083
#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
8084
#endif // _XM_VMX128_INTRINSICS_
8085
}
8086
 
8087
//------------------------------------------------------------------------------
8088
 
8089
XMFINLINE XMVECTOR XMVector3ClampLengthV
8090
(
8091
    FXMVECTOR V, 
8092
    FXMVECTOR LengthMin, 
8093
    FXMVECTOR LengthMax
8094
)
8095
{
8096
#if defined(_XM_NO_INTRINSICS_)
8097
 
8098
    XMVECTOR ClampLength;
8099
    XMVECTOR LengthSq;
8100
    XMVECTOR RcpLength;
8101
    XMVECTOR Length;
8102
    XMVECTOR Normal;
8103
    XMVECTOR Zero;
8104
    XMVECTOR InfiniteLength;
8105
    XMVECTOR ZeroLength;
8106
    XMVECTOR Select;
8107
    XMVECTOR ControlMax;
8108
    XMVECTOR ControlMin;
8109
    XMVECTOR Control;
8110
    XMVECTOR Result;
8111
 
8112
    XMASSERT((LengthMin.vector4_f32[1] == LengthMin.vector4_f32[0]) && (LengthMin.vector4_f32[2] == LengthMin.vector4_f32[0]));
8113
    XMASSERT((LengthMax.vector4_f32[1] == LengthMax.vector4_f32[0]) && (LengthMax.vector4_f32[2] == LengthMax.vector4_f32[0]));
8114
    XMASSERT(XMVector3GreaterOrEqual(LengthMin, XMVectorZero()));
8115
    XMASSERT(XMVector3GreaterOrEqual(LengthMax, XMVectorZero()));
8116
    XMASSERT(XMVector3GreaterOrEqual(LengthMax, LengthMin));
8117
 
8118
    LengthSq = XMVector3LengthSq(V);
8119
 
8120
    Zero = XMVectorZero();
8121
 
8122
    RcpLength = XMVectorReciprocalSqrt(LengthSq);
8123
 
8124
    InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity.v);
8125
    ZeroLength = XMVectorEqual(LengthSq, Zero);
8126
 
8127
    Normal = XMVectorMultiply(V, RcpLength);
8128
 
8129
    Length = XMVectorMultiply(LengthSq, RcpLength);
8130
 
8131
    Select = XMVectorEqualInt(InfiniteLength, ZeroLength);
8132
    Length = XMVectorSelect(LengthSq, Length, Select);
8133
    Normal = XMVectorSelect(LengthSq, Normal, Select);
8134
 
8135
    ControlMax = XMVectorGreater(Length, LengthMax);
8136
    ControlMin = XMVectorLess(Length, LengthMin);
8137
 
8138
    ClampLength = XMVectorSelect(Length, LengthMax, ControlMax);
8139
    ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin);
8140
 
8141
    Result = XMVectorMultiply(Normal, ClampLength);
8142
 
8143
    // Preserve the original vector (with no precision loss) if the length falls within the given range
8144
    Control = XMVectorEqualInt(ControlMax, ControlMin);
8145
    Result = XMVectorSelect(Result, V, Control);
8146
 
8147
    return Result;
8148
 
8149
#elif defined(_XM_SSE_INTRINSICS_)
8150
    XMVECTOR ClampLength;
8151
    XMVECTOR LengthSq;
8152
    XMVECTOR RcpLength;
8153
    XMVECTOR Length;
8154
    XMVECTOR Normal;
8155
    XMVECTOR InfiniteLength;
8156
    XMVECTOR ZeroLength;
8157
    XMVECTOR Select;
8158
    XMVECTOR ControlMax;
8159
    XMVECTOR ControlMin;
8160
    XMVECTOR Control;
8161
    XMVECTOR Result;
8162
 
8163
    XMASSERT((XMVectorGetY(LengthMin) == XMVectorGetX(LengthMin)) && (XMVectorGetZ(LengthMin) == XMVectorGetX(LengthMin)));
8164
    XMASSERT((XMVectorGetY(LengthMax) == XMVectorGetX(LengthMax)) && (XMVectorGetZ(LengthMax) == XMVectorGetX(LengthMax)));
8165
    XMASSERT(XMVector3GreaterOrEqual(LengthMin, g_XMZero));
8166
    XMASSERT(XMVector3GreaterOrEqual(LengthMax, g_XMZero));
8167
    XMASSERT(XMVector3GreaterOrEqual(LengthMax, LengthMin));
8168
 
8169
    LengthSq = XMVector3LengthSq(V);
8170
    RcpLength = XMVectorReciprocalSqrt(LengthSq);
8171
    InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity);
8172
    ZeroLength = XMVectorEqual(LengthSq,g_XMZero);
8173
    Normal = _mm_mul_ps(V, RcpLength);
8174
    Length = _mm_mul_ps(LengthSq, RcpLength);
8175
    Select = XMVectorEqualInt(InfiniteLength, ZeroLength);
8176
    Length = XMVectorSelect(LengthSq, Length, Select);
8177
    Normal = XMVectorSelect(LengthSq, Normal, Select);
8178
    ControlMax = XMVectorGreater(Length, LengthMax);
8179
    ControlMin = XMVectorLess(Length, LengthMin);
8180
    ClampLength = XMVectorSelect(Length, LengthMax, ControlMax);
8181
    ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin);
8182
    Result = _mm_mul_ps(Normal, ClampLength);
8183
    // Preserve the original vector (with no precision loss) if the length falls within the given range
8184
    Control = XMVectorEqualInt(ControlMax, ControlMin);
8185
    Result = XMVectorSelect(Result, V, Control);
8186
    return Result;
8187
#else // _XM_VMX128_INTRINSICS_
8188
#endif // _XM_VMX128_INTRINSICS_
8189
}
8190
 
8191
//------------------------------------------------------------------------------
8192
 
8193
XMFINLINE XMVECTOR XMVector3Reflect
8194
(
8195
    FXMVECTOR Incident, 
8196
    FXMVECTOR Normal
8197
)
8198
{
8199
#if defined(_XM_NO_INTRINSICS_)
8200
 
8201
    XMVECTOR Result;
8202
 
8203
    // Result = Incident - (2 * dot(Incident, Normal)) * Normal
8204
    Result = XMVector3Dot(Incident, Normal);
8205
    Result = XMVectorAdd(Result, Result);
8206
    Result = XMVectorNegativeMultiplySubtract(Result, Normal, Incident);
8207
 
8208
    return Result;
8209
 
8210
#elif defined(_XM_SSE_INTRINSICS_)
8211
    // Result = Incident - (2 * dot(Incident, Normal)) * Normal
8212
    XMVECTOR Result = XMVector3Dot(Incident, Normal);
8213
    Result = _mm_add_ps(Result, Result);
8214
    Result = _mm_mul_ps(Result, Normal);
8215
    Result = _mm_sub_ps(Incident,Result);
8216
    return Result;
8217
#else // _XM_VMX128_INTRINSICS_
8218
#endif // _XM_VMX128_INTRINSICS_
8219
}
8220
 
8221
//------------------------------------------------------------------------------
8222
 
8223
XMFINLINE XMVECTOR XMVector3Refract
8224
(
8225
    FXMVECTOR Incident, 
8226
    FXMVECTOR Normal, 
8227
    FLOAT    RefractionIndex
8228
)
8229
{
8230
#if defined(_XM_NO_INTRINSICS_)
8231
 
8232
    XMVECTOR Index;
8233
    Index = XMVectorReplicate(RefractionIndex);
8234
    return XMVector3RefractV(Incident, Normal, Index);
8235
 
8236
#elif defined(_XM_SSE_INTRINSICS_)
8237
    XMVECTOR Index = _mm_set_ps1(RefractionIndex);
8238
    return XMVector3RefractV(Incident,Normal,Index);
8239
#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
8240
#endif // _XM_VMX128_INTRINSICS_
8241
}
8242
 
8243
//------------------------------------------------------------------------------
8244
 
8245
XMFINLINE XMVECTOR XMVector3RefractV
8246
(
8247
    FXMVECTOR Incident, 
8248
    FXMVECTOR Normal, 
8249
    FXMVECTOR RefractionIndex
8250
)
8251
{
8252
#if defined(_XM_NO_INTRINSICS_)
8253
 
8254
    XMVECTOR        IDotN;
8255
    XMVECTOR        R;
8256
    CONST XMVECTOR  Zero = XMVectorZero();
8257
 
8258
    // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) + 
8259
    // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal))))
8260
 
8261
    IDotN = XMVector3Dot(Incident, Normal);
8262
 
8263
    // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN)
8264
    R = XMVectorNegativeMultiplySubtract(IDotN, IDotN, g_XMOne.v);
8265
    R = XMVectorMultiply(R, RefractionIndex);
8266
    R = XMVectorNegativeMultiplySubtract(R, RefractionIndex, g_XMOne.v);
8267
 
8268
    if (XMVector4LessOrEqual(R, Zero))
8269
    {
8270
        // Total internal reflection
8271
        return Zero;
8272
    }
8273
    else
8274
    {
8275
        XMVECTOR Result;
8276
 
8277
        // R = RefractionIndex * IDotN + sqrt(R)
8278
        R = XMVectorSqrt(R);
8279
        R = XMVectorMultiplyAdd(RefractionIndex, IDotN, R);
8280
 
8281
        // Result = RefractionIndex * Incident - Normal * R
8282
        Result = XMVectorMultiply(RefractionIndex, Incident);
8283
        Result = XMVectorNegativeMultiplySubtract(Normal, R, Result);
8284
 
8285
        return Result;
8286
    }
8287
 
8288
#elif defined(_XM_SSE_INTRINSICS_)
8289
    // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) + 
8290
    // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal))))
8291
    XMVECTOR IDotN = XMVector3Dot(Incident, Normal);
8292
    // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN)
8293
    XMVECTOR R = _mm_mul_ps(IDotN, IDotN);
8294
    R = _mm_sub_ps(g_XMOne,R);
8295
    R = _mm_mul_ps(R, RefractionIndex);
8296
    R = _mm_mul_ps(R, RefractionIndex);
8297
    R = _mm_sub_ps(g_XMOne,R);
8298
 
8299
    XMVECTOR vResult = _mm_cmple_ps(R,g_XMZero);
8300
    if (_mm_movemask_ps(vResult)==0x0f)
8301
    {
8302
        // Total internal reflection
8303
        vResult = g_XMZero;
8304
    }
8305
    else
8306
    {
8307
        // R = RefractionIndex * IDotN + sqrt(R)
8308
        R = _mm_sqrt_ps(R);
8309
        vResult = _mm_mul_ps(RefractionIndex,IDotN);
8310
        R = _mm_add_ps(R,vResult);
8311
        // Result = RefractionIndex * Incident - Normal * R
8312
        vResult = _mm_mul_ps(RefractionIndex, Incident);
8313
        R = _mm_mul_ps(R,Normal);
8314
        vResult = _mm_sub_ps(vResult,R);
8315
    }
8316
    return vResult;
8317
#else // _XM_VMX128_INTRINSICS_
8318
#endif // _XM_VMX128_INTRINSICS_
8319
}
8320
 
8321
//------------------------------------------------------------------------------
8322
 
8323
XMFINLINE XMVECTOR XMVector3Orthogonal
8324
(
8325
    FXMVECTOR V
8326
)
8327
{
8328
#if defined(_XM_NO_INTRINSICS_)
8329
 
8330
    XMVECTOR NegativeV;
8331
    XMVECTOR Z, YZYY;
8332
    XMVECTOR ZIsNegative, YZYYIsNegative;
8333
    XMVECTOR S, D;
8334
    XMVECTOR R0, R1;
8335
    XMVECTOR Select;
8336
    XMVECTOR Zero;
8337
    XMVECTOR Result;
8338
    static CONST XMVECTORU32 Permute1X0X0X0X = {XM_PERMUTE_1X, XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X};
8339
    static CONST XMVECTORU32 Permute0Y0Z0Y0Y= {XM_PERMUTE_0Y, XM_PERMUTE_0Z, XM_PERMUTE_0Y, XM_PERMUTE_0Y};
8340
 
8341
    Zero = XMVectorZero();
8342
    Z = XMVectorSplatZ(V);
8343
    YZYY = XMVectorPermute(V, V, Permute0Y0Z0Y0Y.v);
8344
 
8345
    NegativeV = XMVectorSubtract(Zero, V);
8346
 
8347
    ZIsNegative = XMVectorLess(Z, Zero);
8348
    YZYYIsNegative = XMVectorLess(YZYY, Zero);
8349
 
8350
    S = XMVectorAdd(YZYY, Z);
8351
    D = XMVectorSubtract(YZYY, Z);
8352
 
8353
    Select = XMVectorEqualInt(ZIsNegative, YZYYIsNegative);
8354
 
8355
    R0 = XMVectorPermute(NegativeV, S, Permute1X0X0X0X.v);
8356
    R1 = XMVectorPermute(V, D, Permute1X0X0X0X.v);
8357
 
8358
    Result = XMVectorSelect(R1, R0, Select);
8359
 
8360
    return Result;
8361
 
8362
#elif defined(_XM_SSE_INTRINSICS_)
8363
    XMVECTOR NegativeV;
8364
    XMVECTOR Z, YZYY;
8365
    XMVECTOR ZIsNegative, YZYYIsNegative;
8366
    XMVECTOR S, D;
8367
    XMVECTOR R0, R1;
8368
    XMVECTOR Select;
8369
    XMVECTOR Zero;
8370
    XMVECTOR Result;
8371
    static CONST XMVECTORI32 Permute1X0X0X0X = {XM_PERMUTE_1X, XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X};
8372
    static CONST XMVECTORI32 Permute0Y0Z0Y0Y= {XM_PERMUTE_0Y, XM_PERMUTE_0Z, XM_PERMUTE_0Y, XM_PERMUTE_0Y};
8373
 
8374
    Zero = XMVectorZero();
8375
    Z = XMVectorSplatZ(V);
8376
    YZYY = XMVectorPermute(V, V, Permute0Y0Z0Y0Y);
8377
 
8378
    NegativeV = _mm_sub_ps(Zero, V);
8379
 
8380
    ZIsNegative = XMVectorLess(Z, Zero);
8381
    YZYYIsNegative = XMVectorLess(YZYY, Zero);
8382
 
8383
    S = _mm_add_ps(YZYY, Z);
8384
    D = _mm_sub_ps(YZYY, Z);
8385
 
8386
    Select = XMVectorEqualInt(ZIsNegative, YZYYIsNegative);
8387
 
8388
    R0 = XMVectorPermute(NegativeV, S, Permute1X0X0X0X);
8389
    R1 = XMVectorPermute(V, D,Permute1X0X0X0X);
8390
    Result = XMVectorSelect(R1, R0, Select);
8391
    return Result;
8392
#else // _XM_VMX128_INTRINSICS_
8393
#endif // _XM_VMX128_INTRINSICS_
8394
}
8395
 
8396
//------------------------------------------------------------------------------
8397
 
8398
XMFINLINE XMVECTOR XMVector3AngleBetweenNormalsEst
8399
(
8400
    FXMVECTOR N1, 
8401
    FXMVECTOR N2
8402
)
8403
{
8404
#if defined(_XM_NO_INTRINSICS_)
8405
 
8406
    XMVECTOR Result;
8407
    XMVECTOR NegativeOne;
8408
    XMVECTOR One;
8409
 
8410
    Result = XMVector3Dot(N1, N2);
8411
    NegativeOne = XMVectorSplatConstant(-1, 0);
8412
    One = XMVectorSplatOne();
8413
    Result = XMVectorClamp(Result, NegativeOne, One);
8414
    Result = XMVectorACosEst(Result);
8415
 
8416
    return Result;
8417
 
8418
#elif defined(_XM_SSE_INTRINSICS_)
8419
    XMVECTOR vResult = XMVector3Dot(N1,N2);
8420
    // Clamp to -1.0f to 1.0f
8421
    vResult = _mm_max_ps(vResult,g_XMNegativeOne);
8422
    vResult = _mm_min_ps(vResult,g_XMOne);
8423
    vResult = XMVectorACosEst(vResult);
8424
    return vResult;
8425
#else // _XM_VMX128_INTRINSICS_
8426
#endif // _XM_VMX128_INTRINSICS_
8427
}
8428
 
8429
//------------------------------------------------------------------------------
8430
 
8431
XMFINLINE XMVECTOR XMVector3AngleBetweenNormals
8432
(
8433
    FXMVECTOR N1, 
8434
    FXMVECTOR N2
8435
)
8436
{
8437
#if defined(_XM_NO_INTRINSICS_)
8438
 
8439
    XMVECTOR Result;
8440
    XMVECTOR NegativeOne;
8441
    XMVECTOR One;
8442
 
8443
    Result = XMVector3Dot(N1, N2);
8444
    NegativeOne = XMVectorSplatConstant(-1, 0);
8445
    One = XMVectorSplatOne();
8446
    Result = XMVectorClamp(Result, NegativeOne, One);
8447
    Result = XMVectorACos(Result);
8448
 
8449
    return Result;
8450
 
8451
#elif defined(_XM_SSE_INTRINSICS_)
8452
    XMVECTOR vResult = XMVector3Dot(N1,N2);
8453
    // Clamp to -1.0f to 1.0f
8454
    vResult = _mm_max_ps(vResult,g_XMNegativeOne);
8455
    vResult = _mm_min_ps(vResult,g_XMOne);
8456
    vResult = XMVectorACos(vResult);
8457
    return vResult;
8458
#else // _XM_VMX128_INTRINSICS_
8459
#endif // _XM_VMX128_INTRINSICS_
8460
}
8461
 
8462
//------------------------------------------------------------------------------
8463
 
8464
XMFINLINE XMVECTOR XMVector3AngleBetweenVectors
8465
(
8466
    FXMVECTOR V1, 
8467
    FXMVECTOR V2
8468
)
8469
{
8470
#if defined(_XM_NO_INTRINSICS_)
8471
 
8472
    XMVECTOR L1;
8473
    XMVECTOR L2;
8474
    XMVECTOR Dot;
8475
    XMVECTOR CosAngle;
8476
    XMVECTOR NegativeOne;
8477
    XMVECTOR One;
8478
    XMVECTOR Result;
8479
 
8480
    L1 = XMVector3ReciprocalLength(V1);
8481
    L2 = XMVector3ReciprocalLength(V2);
8482
 
8483
    Dot = XMVector3Dot(V1, V2);
8484
 
8485
    L1 = XMVectorMultiply(L1, L2);
8486
 
8487
    NegativeOne = XMVectorSplatConstant(-1, 0);
8488
    One = XMVectorSplatOne();
8489
 
8490
    CosAngle = XMVectorMultiply(Dot, L1);
8491
 
8492
    CosAngle = XMVectorClamp(CosAngle, NegativeOne, One);
8493
 
8494
    Result = XMVectorACos(CosAngle);
8495
 
8496
    return Result;
8497
 
8498
#elif defined(_XM_SSE_INTRINSICS_)
8499
    XMVECTOR L1;
8500
    XMVECTOR L2;
8501
    XMVECTOR Dot;
8502
    XMVECTOR CosAngle;
8503
    XMVECTOR Result;
8504
 
8505
    L1 = XMVector3ReciprocalLength(V1);
8506
    L2 = XMVector3ReciprocalLength(V2);
8507
    Dot = XMVector3Dot(V1, V2);
8508
    L1 = _mm_mul_ps(L1, L2);
8509
    CosAngle = _mm_mul_ps(Dot, L1);
8510
    CosAngle = XMVectorClamp(CosAngle,g_XMNegativeOne,g_XMOne);
8511
    Result = XMVectorACos(CosAngle);
8512
    return Result;
8513
#else // _XM_VMX128_INTRINSICS_
8514
#endif // _XM_VMX128_INTRINSICS_
8515
}
8516
 
8517
//------------------------------------------------------------------------------
8518
 
8519
XMFINLINE XMVECTOR XMVector3LinePointDistance
8520
(
8521
    FXMVECTOR LinePoint1, 
8522
    FXMVECTOR LinePoint2, 
8523
    FXMVECTOR Point
8524
)
8525
{
8526
#if defined(_XM_NO_INTRINSICS_)
8527
 
8528
    XMVECTOR PointVector;
8529
    XMVECTOR LineVector;
8530
    XMVECTOR ReciprocalLengthSq;
8531
    XMVECTOR PointProjectionScale;
8532
    XMVECTOR DistanceVector;
8533
    XMVECTOR Result;
8534
 
8535
    // Given a vector PointVector from LinePoint1 to Point and a vector
8536
    // LineVector from LinePoint1 to LinePoint2, the scaled distance 
8537
    // PointProjectionScale from LinePoint1 to the perpendicular projection
8538
    // of PointVector onto the line is defined as:
8539
    //
8540
    //     PointProjectionScale = dot(PointVector, LineVector) / LengthSq(LineVector)
8541
 
8542
    PointVector = XMVectorSubtract(Point, LinePoint1);
8543
    LineVector = XMVectorSubtract(LinePoint2, LinePoint1);
8544
 
8545
    ReciprocalLengthSq = XMVector3LengthSq(LineVector);
8546
    ReciprocalLengthSq = XMVectorReciprocal(ReciprocalLengthSq);
8547
 
8548
    PointProjectionScale = XMVector3Dot(PointVector, LineVector);
8549
    PointProjectionScale = XMVectorMultiply(PointProjectionScale, ReciprocalLengthSq);
8550
 
8551
    DistanceVector = XMVectorMultiply(LineVector, PointProjectionScale);
8552
    DistanceVector = XMVectorSubtract(PointVector, DistanceVector);
8553
 
8554
    Result = XMVector3Length(DistanceVector);
8555
 
8556
    return Result;
8557
 
8558
#elif defined(_XM_SSE_INTRINSICS_)
8559
    XMVECTOR PointVector = _mm_sub_ps(Point,LinePoint1);
8560
    XMVECTOR LineVector = _mm_sub_ps(LinePoint2,LinePoint1);
8561
    XMVECTOR ReciprocalLengthSq = XMVector3LengthSq(LineVector);
8562
    XMVECTOR vResult = XMVector3Dot(PointVector,LineVector);
8563
    vResult = _mm_div_ps(vResult,ReciprocalLengthSq);
8564
    vResult = _mm_mul_ps(vResult,LineVector);
8565
    vResult = _mm_sub_ps(PointVector,vResult);
8566
    vResult = XMVector3Length(vResult);
8567
    return vResult;
8568
#else // _XM_VMX128_INTRINSICS_
8569
#endif // _XM_VMX128_INTRINSICS_
8570
}
8571
 
8572
//------------------------------------------------------------------------------
8573
 
8574
XMFINLINE VOID XMVector3ComponentsFromNormal
8575
(
8576
    XMVECTOR* pParallel, 
8577
    XMVECTOR* pPerpendicular, 
8578
    FXMVECTOR  V, 
8579
    FXMVECTOR  Normal
8580
)
8581
{
8582
#if defined(_XM_NO_INTRINSICS_)
8583
 
8584
    XMVECTOR Parallel;
8585
    XMVECTOR Scale;
8586
 
8587
    XMASSERT(pParallel);
8588
    XMASSERT(pPerpendicular);
8589
 
8590
    Scale = XMVector3Dot(V, Normal);
8591
 
8592
    Parallel = XMVectorMultiply(Normal, Scale);
8593
 
8594
    *pParallel = Parallel;
8595
    *pPerpendicular = XMVectorSubtract(V, Parallel);
8596
 
8597
#elif defined(_XM_SSE_INTRINSICS_)
8598
	XMASSERT(pParallel);
8599
	XMASSERT(pPerpendicular);
8600
    XMVECTOR Scale = XMVector3Dot(V, Normal);
8601
    XMVECTOR Parallel = _mm_mul_ps(Normal,Scale);
8602
    *pParallel = Parallel;
8603
    *pPerpendicular = _mm_sub_ps(V,Parallel);
8604
#else // _XM_VMX128_INTRINSICS_
8605
#endif // _XM_VMX128_INTRINSICS_
8606
}
8607
 
8608
//------------------------------------------------------------------------------
8609
// Transform a vector using a rotation expressed as a unit quaternion
8610
 
8611
XMFINLINE XMVECTOR XMVector3Rotate
8612
(
8613
    FXMVECTOR V, 
8614
    FXMVECTOR RotationQuaternion
8615
)
8616
{
8617
#if defined(_XM_NO_INTRINSICS_)
8618
 
8619
    XMVECTOR A;
8620
    XMVECTOR Q;
8621
    XMVECTOR Result;
8622
 
8623
    A = XMVectorSelect(g_XMSelect1110.v, V, g_XMSelect1110.v);
8624
    Q = XMQuaternionConjugate(RotationQuaternion);
8625
    Result = XMQuaternionMultiply(Q, A);
8626
    Result = XMQuaternionMultiply(Result, RotationQuaternion);
8627
 
8628
    return Result;
8629
 
8630
#elif defined(_XM_SSE_INTRINSICS_)
8631
    XMVECTOR A;
8632
    XMVECTOR Q;
8633
    XMVECTOR Result;
8634
 
8635
    A = _mm_and_ps(V,g_XMMask3);
8636
    Q = XMQuaternionConjugate(RotationQuaternion);
8637
    Result = XMQuaternionMultiply(Q, A);
8638
    Result = XMQuaternionMultiply(Result, RotationQuaternion);
8639
    return Result;
8640
#else // _XM_VMX128_INTRINSICS_
8641
#endif // _XM_VMX128_INTRINSICS_
8642
}
8643
 
8644
//------------------------------------------------------------------------------
8645
// Transform a vector using the inverse of a rotation expressed as a unit quaternion
8646
 
8647
XMFINLINE XMVECTOR XMVector3InverseRotate
8648
(
8649
    FXMVECTOR V, 
8650
    FXMVECTOR RotationQuaternion
8651
)
8652
{
8653
#if defined(_XM_NO_INTRINSICS_)
8654
 
8655
    XMVECTOR A;
8656
    XMVECTOR Q;
8657
    XMVECTOR Result;
8658
 
8659
    A = XMVectorSelect(g_XMSelect1110.v, V, g_XMSelect1110.v);
8660
    Result = XMQuaternionMultiply(RotationQuaternion, A);
8661
    Q = XMQuaternionConjugate(RotationQuaternion);
8662
    Result = XMQuaternionMultiply(Result, Q);
8663
 
8664
    return Result;
8665
 
8666
#elif defined(_XM_SSE_INTRINSICS_)
8667
    XMVECTOR A;
8668
    XMVECTOR Q;
8669
    XMVECTOR Result;
8670
    A = _mm_and_ps(V,g_XMMask3);
8671
    Result = XMQuaternionMultiply(RotationQuaternion, A);
8672
    Q = XMQuaternionConjugate(RotationQuaternion);
8673
    Result = XMQuaternionMultiply(Result, Q);
8674
    return Result;
8675
#else // _XM_VMX128_INTRINSICS_
8676
#endif // _XM_VMX128_INTRINSICS_
8677
}
8678
 
8679
//------------------------------------------------------------------------------
8680
 
8681
XMFINLINE XMVECTOR XMVector3Transform
8682
(
8683
    FXMVECTOR V, 
8684
    CXMMATRIX M
8685
)
8686
{
8687
#if defined(_XM_NO_INTRINSICS_)
8688
 
8689
    XMVECTOR X;
8690
    XMVECTOR Y;
8691
    XMVECTOR Z;
8692
    XMVECTOR Result;
8693
 
8694
    Z = XMVectorSplatZ(V);
8695
    Y = XMVectorSplatY(V);
8696
    X = XMVectorSplatX(V);
8697
 
8698
    Result = XMVectorMultiplyAdd(Z, M.r[2], M.r[3]);
8699
    Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
8700
    Result = XMVectorMultiplyAdd(X, M.r[0], Result);
8701
 
8702
    return Result;
8703
 
8704
#elif defined(_XM_SSE_INTRINSICS_)
8705
    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0));
8706
    vResult = _mm_mul_ps(vResult,M.r[0]);
8707
    XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
8708
    vTemp = _mm_mul_ps(vTemp,M.r[1]);
8709
    vResult = _mm_add_ps(vResult,vTemp);
8710
    vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2));
8711
    vTemp = _mm_mul_ps(vTemp,M.r[2]);
8712
    vResult = _mm_add_ps(vResult,vTemp);
8713
    vResult = _mm_add_ps(vResult,M.r[3]);
8714
    return vResult;
8715
#else // _XM_VMX128_INTRINSICS_
8716
#endif // _XM_VMX128_INTRINSICS_
8717
}
8718
 
8719
//------------------------------------------------------------------------------
8720
 
8721
XMINLINE XMFLOAT4* XMVector3TransformStream
8722
(
8723
    XMFLOAT4*       pOutputStream, 
8724
    UINT            OutputStride, 
8725
    CONST XMFLOAT3* pInputStream, 
8726
    UINT            InputStride, 
8727
    UINT            VectorCount, 
8728
    CXMMATRIX     M
8729
)
8730
{
8731
#if defined(_XM_NO_INTRINSICS_)
8732
 
8733
    XMVECTOR V;
8734
    XMVECTOR X;
8735
    XMVECTOR Y;
8736
    XMVECTOR Z;
8737
    XMVECTOR Result;
8738
    UINT     i;
8739
    BYTE*    pInputVector = (BYTE*)pInputStream;
8740
    BYTE*    pOutputVector = (BYTE*)pOutputStream;
8741
 
8742
    XMASSERT(pOutputStream);
8743
    XMASSERT(pInputStream);
8744
 
8745
    for (i = 0; i < VectorCount; i++)
8746
    {
8747
        V = XMLoadFloat3((XMFLOAT3*)pInputVector);
8748
        Z = XMVectorSplatZ(V);
8749
        Y = XMVectorSplatY(V);
8750
        X = XMVectorSplatX(V);
8751
 
8752
        Result = XMVectorMultiplyAdd(Z, M.r[2], M.r[3]);
8753
        Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
8754
        Result = XMVectorMultiplyAdd(X, M.r[0], Result);
8755
 
8756
        XMStoreFloat4((XMFLOAT4*)pOutputVector, Result);
8757
 
8758
        pInputVector += InputStride; 
8759
        pOutputVector += OutputStride;
8760
    }
8761
 
8762
    return pOutputStream;
8763
 
8764
#elif defined(_XM_SSE_INTRINSICS_)
8765
    XMASSERT(pOutputStream);
8766
    XMASSERT(pInputStream);
8767
    UINT     i;
8768
    const BYTE* pInputVector = (const BYTE*)pInputStream;
8769
    BYTE*    pOutputVector = (BYTE*)pOutputStream;
8770
 
8771
    for (i = 0; i < VectorCount; i++)
8772
    {
8773
        XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->x);
8774
        XMVECTOR Y = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->y);
8775
        XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->z);
8776
        vResult = _mm_mul_ps(vResult,M.r[2]);
8777
        vResult = _mm_add_ps(vResult,M.r[3]);
8778
        Y = _mm_mul_ps(Y,M.r[1]);
8779
        vResult = _mm_add_ps(vResult,Y);
8780
        X = _mm_mul_ps(X,M.r[0]);
8781
        vResult = _mm_add_ps(vResult,X);
8782
        _mm_storeu_ps(reinterpret_cast<float *>(pOutputVector),vResult);
8783
        pInputVector += InputStride; 
8784
        pOutputVector += OutputStride;
8785
    }
8786
 
8787
    return pOutputStream;
8788
#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
8789
#endif // _XM_VMX128_INTRINSICS_
8790
}
8791
 
8792
//------------------------------------------------------------------------------
8793
 
8794
XMINLINE XMFLOAT4* XMVector3TransformStreamNC
8795
(
8796
    XMFLOAT4*       pOutputStream, 
8797
    UINT            OutputStride, 
8798
    CONST XMFLOAT3* pInputStream, 
8799
    UINT            InputStride, 
8800
    UINT            VectorCount, 
8801
    CXMMATRIX     M
8802
)
8803
{
8804
#if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS) || defined(_XM_SSE_INTRINSICS_)
8805
	return XMVector3TransformStream( pOutputStream, OutputStride, pInputStream, InputStride, VectorCount, M );
8806
#else // _XM_VMX128_INTRINSICS_
8807
#endif // _XM_VMX128_INTRINSICS_
8808
}
8809
 
8810
//------------------------------------------------------------------------------
8811
 
8812
XMFINLINE XMVECTOR XMVector3TransformCoord
8813
(
8814
    FXMVECTOR V, 
8815
    CXMMATRIX M
8816
)
8817
{
8818
#if defined(_XM_NO_INTRINSICS_)
8819
 
8820
    XMVECTOR X;
8821
    XMVECTOR Y;
8822
    XMVECTOR Z;
8823
    XMVECTOR InverseW;
8824
    XMVECTOR Result;
8825
 
8826
    Z = XMVectorSplatZ(V);
8827
    Y = XMVectorSplatY(V);
8828
    X = XMVectorSplatX(V);
8829
 
8830
    Result = XMVectorMultiplyAdd(Z, M.r[2], M.r[3]);
8831
    Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
8832
    Result = XMVectorMultiplyAdd(X, M.r[0], Result);
8833
 
8834
    InverseW = XMVectorSplatW(Result);
8835
    InverseW = XMVectorReciprocal(InverseW);
8836
 
8837
    Result = XMVectorMultiply(Result, InverseW);
8838
 
8839
    return Result;
8840
 
8841
#elif defined(_XM_SSE_INTRINSICS_)
8842
    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0));
8843
    vResult = _mm_mul_ps(vResult,M.r[0]);
8844
    XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
8845
    vTemp = _mm_mul_ps(vTemp,M.r[1]);
8846
    vResult = _mm_add_ps(vResult,vTemp);
8847
    vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2));
8848
    vTemp = _mm_mul_ps(vTemp,M.r[2]);
8849
    vResult = _mm_add_ps(vResult,vTemp);
8850
    vResult = _mm_add_ps(vResult,M.r[3]);
8851
    vTemp = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,3,3,3));
8852
    vResult = _mm_div_ps(vResult,vTemp);
8853
    return vResult;
8854
#else // _XM_VMX128_INTRINSICS_
8855
#endif // _XM_VMX128_INTRINSICS_
8856
}
8857
 
8858
//------------------------------------------------------------------------------
8859
 
8860
XMINLINE XMFLOAT3* XMVector3TransformCoordStream
8861
(
8862
    XMFLOAT3*       pOutputStream, 
8863
    UINT            OutputStride, 
8864
    CONST XMFLOAT3* pInputStream, 
8865
    UINT            InputStride, 
8866
    UINT            VectorCount, 
8867
    CXMMATRIX     M
8868
)
8869
{
8870
#if defined(_XM_NO_INTRINSICS_)
8871
 
8872
    XMVECTOR V;
8873
    XMVECTOR X;
8874
    XMVECTOR Y;
8875
    XMVECTOR Z;
8876
    XMVECTOR InverseW;
8877
    XMVECTOR Result;
8878
    UINT     i;
8879
    BYTE*    pInputVector = (BYTE*)pInputStream;
8880
    BYTE*    pOutputVector = (BYTE*)pOutputStream;
8881
 
8882
    XMASSERT(pOutputStream);
8883
    XMASSERT(pInputStream);
8884
 
8885
    for (i = 0; i < VectorCount; i++)
8886
    {
8887
        V = XMLoadFloat3((XMFLOAT3*)pInputVector);
8888
        Z = XMVectorSplatZ(V);
8889
        Y = XMVectorSplatY(V);
8890
        X = XMVectorSplatX(V);
8891
//        Z = XMVectorReplicate(((XMFLOAT3*)pInputVector)->z);
8892
//        Y = XMVectorReplicate(((XMFLOAT3*)pInputVector)->y);
8893
//        X = XMVectorReplicate(((XMFLOAT3*)pInputVector)->x);
8894
 
8895
        Result = XMVectorMultiplyAdd(Z, M.r[2], M.r[3]);
8896
        Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
8897
        Result = XMVectorMultiplyAdd(X, M.r[0], Result);
8898
 
8899
        InverseW = XMVectorSplatW(Result);
8900
        InverseW = XMVectorReciprocal(InverseW);
8901
 
8902
        Result = XMVectorMultiply(Result, InverseW);
8903
 
8904
        XMStoreFloat3((XMFLOAT3*)pOutputVector, Result);
8905
 
8906
        pInputVector += InputStride; 
8907
        pOutputVector += OutputStride;
8908
    }
8909
 
8910
    return pOutputStream;
8911
 
8912
#elif defined(_XM_SSE_INTRINSICS_)
8913
    XMASSERT(pOutputStream);
8914
    XMASSERT(pInputStream);
8915
 
8916
    UINT i;
8917
    const BYTE *pInputVector = (BYTE*)pInputStream;
8918
    BYTE *pOutputVector = (BYTE*)pOutputStream;
8919
 
8920
    for (i = 0; i < VectorCount; i++)
8921
    {
8922
        XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->x);
8923
        XMVECTOR Y = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->y);
8924
        XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->z);
8925
        vResult = _mm_mul_ps(vResult,M.r[2]);
8926
        vResult = _mm_add_ps(vResult,M.r[3]);
8927
        Y = _mm_mul_ps(Y,M.r[1]);
8928
        vResult = _mm_add_ps(vResult,Y);
8929
        X = _mm_mul_ps(X,M.r[0]);
8930
        vResult = _mm_add_ps(vResult,X);
8931
 
8932
        X = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,3,3,3));
8933
        vResult = _mm_div_ps(vResult,X);
8934
    	_mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->x,vResult);
8935
        vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,3,2,1));
8936
    	_mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->y,vResult);
8937
        vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,3,2,1));
8938
	    _mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->z,vResult);
8939
        pInputVector += InputStride; 
8940
        pOutputVector += OutputStride;
8941
    }
8942
 
8943
    return pOutputStream;
8944
#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
8945
#endif // _XM_VMX128_INTRINSICS_
8946
}
8947
 
8948
//------------------------------------------------------------------------------
8949
 
8950
XMFINLINE XMVECTOR XMVector3TransformNormal
8951
(
8952
    FXMVECTOR V, 
8953
    CXMMATRIX M
8954
)
8955
{
8956
#if defined(_XM_NO_INTRINSICS_)
8957
 
8958
    XMVECTOR X;
8959
    XMVECTOR Y;
8960
    XMVECTOR Z;
8961
    XMVECTOR Result;
8962
 
8963
    Z = XMVectorSplatZ(V);
8964
    Y = XMVectorSplatY(V);
8965
    X = XMVectorSplatX(V);
8966
 
8967
    Result = XMVectorMultiply(Z, M.r[2]);
8968
    Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
8969
    Result = XMVectorMultiplyAdd(X, M.r[0], Result);
8970
 
8971
    return Result;
8972
 
8973
#elif defined(_XM_SSE_INTRINSICS_)
8974
    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0));
8975
    vResult = _mm_mul_ps(vResult,M.r[0]);
8976
    XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
8977
    vTemp = _mm_mul_ps(vTemp,M.r[1]);
8978
    vResult = _mm_add_ps(vResult,vTemp);
8979
    vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2));
8980
    vTemp = _mm_mul_ps(vTemp,M.r[2]);
8981
    vResult = _mm_add_ps(vResult,vTemp);
8982
    return vResult;
8983
#else // _XM_VMX128_INTRINSICS_
8984
#endif // _XM_VMX128_INTRINSICS_
8985
}
8986
 
8987
//------------------------------------------------------------------------------
8988
 
8989
XMINLINE XMFLOAT3* XMVector3TransformNormalStream
8990
(
8991
    XMFLOAT3*       pOutputStream, 
8992
    UINT            OutputStride, 
8993
    CONST XMFLOAT3* pInputStream, 
8994
    UINT            InputStride, 
8995
    UINT            VectorCount, 
8996
    CXMMATRIX     M
8997
)
8998
{
8999
#if defined(_XM_NO_INTRINSICS_)
9000
 
9001
    XMVECTOR V;
9002
    XMVECTOR X;
9003
    XMVECTOR Y;
9004
    XMVECTOR Z;
9005
    XMVECTOR Result;
9006
    UINT     i;
9007
    BYTE*    pInputVector = (BYTE*)pInputStream;
9008
    BYTE*    pOutputVector = (BYTE*)pOutputStream;
9009
 
9010
    XMASSERT(pOutputStream);
9011
    XMASSERT(pInputStream);
9012
 
9013
    for (i = 0; i < VectorCount; i++)
9014
    {
9015
        V = XMLoadFloat3((XMFLOAT3*)pInputVector);
9016
        Z = XMVectorSplatZ(V);
9017
        Y = XMVectorSplatY(V);
9018
        X = XMVectorSplatX(V);
9019
//        Z = XMVectorReplicate(((XMFLOAT3*)pInputVector)->z);
9020
//        Y = XMVectorReplicate(((XMFLOAT3*)pInputVector)->y);
9021
//        X = XMVectorReplicate(((XMFLOAT3*)pInputVector)->x);
9022
 
9023
        Result = XMVectorMultiply(Z, M.r[2]);
9024
        Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
9025
        Result = XMVectorMultiplyAdd(X, M.r[0], Result);
9026
 
9027
        XMStoreFloat3((XMFLOAT3*)pOutputVector, Result);
9028
 
9029
        pInputVector += InputStride; 
9030
        pOutputVector += OutputStride;
9031
    }
9032
 
9033
    return pOutputStream;
9034
 
9035
#elif defined(_XM_SSE_INTRINSICS_)
9036
    XMASSERT(pOutputStream);
9037
    XMASSERT(pInputStream);
9038
 
9039
    UINT i;
9040
    const BYTE *pInputVector = (BYTE*)pInputStream;
9041
    BYTE *pOutputVector = (BYTE*)pOutputStream;
9042
 
9043
    for (i = 0; i < VectorCount; i++)
9044
    {
9045
        XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->x);
9046
        XMVECTOR Y = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->y);
9047
        XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->z);
9048
        vResult = _mm_mul_ps(vResult,M.r[2]);
9049
        Y = _mm_mul_ps(Y,M.r[1]);
9050
        vResult = _mm_add_ps(vResult,Y);
9051
        X = _mm_mul_ps(X,M.r[0]);
9052
        vResult = _mm_add_ps(vResult,X);
9053
    	_mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->x,vResult);
9054
        vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,3,2,1));
9055
    	_mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->y,vResult);
9056
        vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,3,2,1));
9057
	    _mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->z,vResult);
9058
        pInputVector += InputStride; 
9059
        pOutputVector += OutputStride;
9060
    }
9061
 
9062
    return pOutputStream;
9063
#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
9064
#endif // _XM_VMX128_INTRINSICS_
9065
}
9066
 
9067
//------------------------------------------------------------------------------
9068
 
9069
XMINLINE XMVECTOR XMVector3Project
9070
(
9071
    FXMVECTOR V, 
9072
    FLOAT    ViewportX, 
9073
    FLOAT    ViewportY, 
9074
    FLOAT    ViewportWidth, 
9075
    FLOAT    ViewportHeight, 
9076
    FLOAT    ViewportMinZ, 
9077
    FLOAT    ViewportMaxZ, 
9078
    CXMMATRIX Projection, 
9079
    CXMMATRIX View, 
9080
    CXMMATRIX World
9081
)
9082
{
9083
#if defined(_XM_NO_INTRINSICS_)
9084
 
9085
    XMMATRIX Transform;
9086
    XMVECTOR Scale;
9087
    XMVECTOR Offset;
9088
    XMVECTOR Result;
9089
    FLOAT    HalfViewportWidth = ViewportWidth * 0.5f;
9090
    FLOAT    HalfViewportHeight = ViewportHeight * 0.5f;
9091
 
9092
    Scale = XMVectorSet(HalfViewportWidth, 
9093
                        -HalfViewportHeight,
9094
                        ViewportMaxZ - ViewportMinZ,
9095
                        0.0f);
9096
 
9097
    Offset = XMVectorSet(ViewportX + HalfViewportWidth,
9098
                        ViewportY + HalfViewportHeight,
9099
                        ViewportMinZ,
9100
                        0.0f);
9101
 
9102
    Transform = XMMatrixMultiply(World, View);
9103
    Transform = XMMatrixMultiply(Transform, Projection);
9104
 
9105
    Result = XMVector3TransformCoord(V, Transform);
9106
 
9107
    Result = XMVectorMultiplyAdd(Result, Scale, Offset);
9108
 
9109
    return Result;
9110
 
9111
#elif defined(_XM_SSE_INTRINSICS_)
9112
    XMMATRIX Transform;
9113
    XMVECTOR Scale;
9114
    XMVECTOR Offset;
9115
    XMVECTOR Result;
9116
    FLOAT    HalfViewportWidth = ViewportWidth * 0.5f;
9117
    FLOAT    HalfViewportHeight = ViewportHeight * 0.5f;
9118
 
9119
    Scale = XMVectorSet(HalfViewportWidth, 
9120
                        -HalfViewportHeight,
9121
                        ViewportMaxZ - ViewportMinZ,
9122
                        0.0f);
9123
 
9124
    Offset = XMVectorSet(ViewportX + HalfViewportWidth,
9125
                        ViewportY + HalfViewportHeight,
9126
                        ViewportMinZ,
9127
                        0.0f);
9128
    Transform = XMMatrixMultiply(World, View);
9129
    Transform = XMMatrixMultiply(Transform, Projection);
9130
    Result = XMVector3TransformCoord(V, Transform);
9131
    Result = _mm_mul_ps(Result,Scale);
9132
    Result = _mm_add_ps(Result,Offset);
9133
    return Result;
9134
#else // _XM_VMX128_INTRINSICS_
9135
#endif // _XM_VMX128_INTRINSICS_
9136
}
9137
 
9138
//------------------------------------------------------------------------------
9139
 
9140
XMINLINE XMFLOAT3* XMVector3ProjectStream
9141
(
9142
    XMFLOAT3*       pOutputStream, 
9143
    UINT            OutputStride, 
9144
    CONST XMFLOAT3* pInputStream, 
9145
    UINT            InputStride, 
9146
    UINT            VectorCount, 
9147
    FLOAT           ViewportX, 
9148
    FLOAT           ViewportY, 
9149
    FLOAT           ViewportWidth, 
9150
    FLOAT           ViewportHeight, 
9151
    FLOAT           ViewportMinZ, 
9152
    FLOAT           ViewportMaxZ, 
9153
    CXMMATRIX     Projection, 
9154
    CXMMATRIX     View, 
9155
    CXMMATRIX     World
9156
)
9157
{
9158
#if defined(_XM_NO_INTRINSICS_)
9159
 
9160
    XMMATRIX Transform;
9161
    XMVECTOR V;
9162
    XMVECTOR Scale;
9163
    XMVECTOR Offset;
9164
    XMVECTOR Result;
9165
    UINT     i;
9166
    FLOAT    HalfViewportWidth = ViewportWidth * 0.5f;
9167
    FLOAT    HalfViewportHeight = ViewportHeight * 0.5f;
9168
    BYTE*    pInputVector = (BYTE*)pInputStream;
9169
    BYTE*    pOutputVector = (BYTE*)pOutputStream;
9170
 
9171
    XMASSERT(pOutputStream);
9172
    XMASSERT(pInputStream);
9173
 
9174
    Scale = XMVectorSet(HalfViewportWidth, 
9175
                        -HalfViewportHeight,
9176
                        ViewportMaxZ - ViewportMinZ,
9177
                        1.0f);
9178
 
9179
    Offset = XMVectorSet(ViewportX + HalfViewportWidth,
9180
                        ViewportY + HalfViewportHeight,
9181
                        ViewportMinZ,
9182
                        0.0f);
9183
 
9184
    Transform = XMMatrixMultiply(World, View);
9185
    Transform = XMMatrixMultiply(Transform, Projection);
9186
 
9187
    for (i = 0; i < VectorCount; i++)
9188
    {
9189
        V = XMLoadFloat3((XMFLOAT3*)pInputVector);
9190
 
9191
        Result = XMVector3TransformCoord(V, Transform);
9192
 
9193
        Result = XMVectorMultiplyAdd(Result, Scale, Offset);
9194
 
9195
        XMStoreFloat3((XMFLOAT3*)pOutputVector, Result);
9196
 
9197
        pInputVector += InputStride; 
9198
        pOutputVector += OutputStride;
9199
    }
9200
 
9201
    return pOutputStream;
9202
 
9203
#elif defined(_XM_SSE_INTRINSICS_)
9204
	XMASSERT(pOutputStream);
9205
    XMASSERT(pInputStream);
9206
    XMMATRIX Transform;
9207
    XMVECTOR V;
9208
    XMVECTOR Scale;
9209
    XMVECTOR Offset;
9210
    XMVECTOR Result;
9211
    UINT     i;
9212
    FLOAT    HalfViewportWidth = ViewportWidth * 0.5f;
9213
    FLOAT    HalfViewportHeight = ViewportHeight * 0.5f;
9214
    BYTE*    pInputVector = (BYTE*)pInputStream;
9215
    BYTE*    pOutputVector = (BYTE*)pOutputStream;
9216
 
9217
    Scale = XMVectorSet(HalfViewportWidth, 
9218
                        -HalfViewportHeight,
9219
                        ViewportMaxZ - ViewportMinZ,
9220
                        1.0f);
9221
 
9222
    Offset = XMVectorSet(ViewportX + HalfViewportWidth,
9223
                        ViewportY + HalfViewportHeight,
9224
                        ViewportMinZ,
9225
                        0.0f);
9226
 
9227
    Transform = XMMatrixMultiply(World, View);
9228
    Transform = XMMatrixMultiply(Transform, Projection);
9229
 
9230
    for (i = 0; i < VectorCount; i++)
9231
    {
9232
        V = XMLoadFloat3((XMFLOAT3*)pInputVector);
9233
 
9234
        Result = XMVector3TransformCoord(V, Transform);
9235
 
9236
        Result = _mm_mul_ps(Result,Scale);
9237
        Result = _mm_add_ps(Result,Offset);
9238
        XMStoreFloat3((XMFLOAT3*)pOutputVector, Result);
9239
        pInputVector += InputStride; 
9240
        pOutputVector += OutputStride;
9241
    }
9242
    return pOutputStream;
9243
 
9244
#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
9245
#endif // _XM_VMX128_INTRINSICS_
9246
}
9247
 
9248
//------------------------------------------------------------------------------
9249
 
9250
XMFINLINE XMVECTOR XMVector3Unproject
9251
(
9252
    FXMVECTOR V, 
9253
    FLOAT    ViewportX, 
9254
    FLOAT    ViewportY, 
9255
    FLOAT    ViewportWidth, 
9256
    FLOAT    ViewportHeight, 
9257
    FLOAT    ViewportMinZ, 
9258
    FLOAT    ViewportMaxZ, 
9259
    CXMMATRIX Projection, 
9260
    CXMMATRIX View, 
9261
    CXMMATRIX World
9262
)
9263
{
9264
#if defined(_XM_NO_INTRINSICS_)
9265
 
9266
    XMMATRIX        Transform;
9267
    XMVECTOR        Scale;
9268
    XMVECTOR        Offset;
9269
    XMVECTOR        Determinant;
9270
    XMVECTOR        Result;
9271
    CONST XMVECTOR  D = XMVectorSet(-1.0f, 1.0f, 0.0f, 0.0f);
9272
 
9273
    Scale = XMVectorSet(ViewportWidth * 0.5f,
9274
                        -ViewportHeight * 0.5f,
9275
                        ViewportMaxZ - ViewportMinZ,
9276
                        1.0f);
9277
    Scale = XMVectorReciprocal(Scale);
9278
 
9279
    Offset = XMVectorSet(-ViewportX,
9280
                        -ViewportY,
9281
                        -ViewportMinZ,
9282
                        0.0f);
9283
    Offset = XMVectorMultiplyAdd(Scale, Offset, D);
9284
 
9285
    Transform = XMMatrixMultiply(World, View);
9286
    Transform = XMMatrixMultiply(Transform, Projection);
9287
    Transform = XMMatrixInverse(&Determinant, Transform);
9288
 
9289
    Result = XMVectorMultiplyAdd(V, Scale, Offset);
9290
 
9291
    Result = XMVector3TransformCoord(Result, Transform);
9292
 
9293
    return Result;
9294
 
9295
#elif defined(_XM_SSE_INTRINSICS_)
9296
    XMMATRIX        Transform;
9297
    XMVECTOR        Scale;
9298
    XMVECTOR        Offset;
9299
    XMVECTOR        Determinant;
9300
    XMVECTOR        Result;
9301
    CONST XMVECTORF32  D = {-1.0f, 1.0f, 0.0f, 0.0f};
9302
 
9303
    Scale = XMVectorSet(ViewportWidth * 0.5f,
9304
                        -ViewportHeight * 0.5f,
9305
                        ViewportMaxZ - ViewportMinZ,
9306
                        1.0f);
9307
    Scale = XMVectorReciprocal(Scale);
9308
 
9309
    Offset = XMVectorSet(-ViewportX,
9310
                        -ViewportY,
9311
                        -ViewportMinZ,
9312
                        0.0f);
9313
    Offset = _mm_mul_ps(Offset,Scale);
9314
    Offset = _mm_add_ps(Offset,D);
9315
 
9316
    Transform = XMMatrixMultiply(World, View);
9317
    Transform = XMMatrixMultiply(Transform, Projection);
9318
    Transform = XMMatrixInverse(&Determinant, Transform);
9319
 
9320
    Result = _mm_mul_ps(V,Scale);
9321
    Result = _mm_add_ps(Result,Offset);
9322
 
9323
    Result = XMVector3TransformCoord(Result, Transform);
9324
 
9325
    return Result;
9326
#else // _XM_VMX128_INTRINSICS_
9327
#endif // _XM_VMX128_INTRINSICS_
9328
}
9329
 
9330
//------------------------------------------------------------------------------
9331
 
9332
XMINLINE XMFLOAT3* XMVector3UnprojectStream
9333
(
9334
    XMFLOAT3*       pOutputStream, 
9335
    UINT            OutputStride, 
9336
    CONST XMFLOAT3* pInputStream, 
9337
    UINT            InputStride, 
9338
    UINT            VectorCount, 
9339
    FLOAT           ViewportX, 
9340
    FLOAT           ViewportY, 
9341
    FLOAT           ViewportWidth, 
9342
    FLOAT           ViewportHeight, 
9343
    FLOAT           ViewportMinZ, 
9344
    FLOAT           ViewportMaxZ, 
9345
    CXMMATRIX     Projection, 
9346
    CXMMATRIX     View, 
9347
    CXMMATRIX     World)
9348
{
9349
#if defined(_XM_NO_INTRINSICS_)
9350
 
9351
    XMMATRIX        Transform;
9352
    XMVECTOR        Scale;
9353
    XMVECTOR        Offset;
9354
    XMVECTOR        V;
9355
    XMVECTOR        Determinant;
9356
    XMVECTOR        Result;
9357
    UINT            i;
9358
    BYTE*           pInputVector = (BYTE*)pInputStream;
9359
    BYTE*           pOutputVector = (BYTE*)pOutputStream;
9360
    CONST XMVECTOR  D = XMVectorSet(-1.0f, 1.0f, 0.0f, 0.0f);
9361
 
9362
    XMASSERT(pOutputStream);
9363
    XMASSERT(pInputStream);
9364
 
9365
    Scale = XMVectorSet(ViewportWidth * 0.5f,
9366
                        -ViewportHeight * 0.5f,
9367
                        ViewportMaxZ - ViewportMinZ,
9368
                        1.0f);
9369
    Scale = XMVectorReciprocal(Scale);
9370
 
9371
    Offset = XMVectorSet(-ViewportX,
9372
                        -ViewportY,
9373
                        -ViewportMinZ,
9374
                        0.0f);
9375
    Offset = XMVectorMultiplyAdd(Scale, Offset, D);
9376
 
9377
    Transform = XMMatrixMultiply(World, View);
9378
    Transform = XMMatrixMultiply(Transform, Projection);
9379
    Transform = XMMatrixInverse(&Determinant, Transform);
9380
 
9381
    for (i = 0; i < VectorCount; i++)
9382
    {
9383
        V = XMLoadFloat3((XMFLOAT3*)pInputVector);
9384
 
9385
        Result = XMVectorMultiplyAdd(V, Scale, Offset);
9386
 
9387
        Result = XMVector3TransformCoord(Result, Transform);
9388
 
9389
        XMStoreFloat3((XMFLOAT3*)pOutputVector, Result);
9390
 
9391
        pInputVector += InputStride; 
9392
        pOutputVector += OutputStride;
9393
    }
9394
 
9395
    return pOutputStream;
9396
 
9397
#elif defined(_XM_SSE_INTRINSICS_)
9398
    XMASSERT(pOutputStream);
9399
    XMASSERT(pInputStream);
9400
    XMMATRIX        Transform;
9401
    XMVECTOR        Scale;
9402
    XMVECTOR        Offset;
9403
    XMVECTOR        V;
9404
    XMVECTOR        Determinant;
9405
    XMVECTOR        Result;
9406
    UINT            i;
9407
    BYTE*           pInputVector = (BYTE*)pInputStream;
9408
    BYTE*           pOutputVector = (BYTE*)pOutputStream;
9409
    CONST XMVECTORF32  D = {-1.0f, 1.0f, 0.0f, 0.0f};
9410
 
9411
    Scale = XMVectorSet(ViewportWidth * 0.5f,
9412
                        -ViewportHeight * 0.5f,
9413
                        ViewportMaxZ - ViewportMinZ,
9414
                        1.0f);
9415
    Scale = XMVectorReciprocal(Scale);
9416
 
9417
    Offset = XMVectorSet(-ViewportX,
9418
                        -ViewportY,
9419
                        -ViewportMinZ,
9420
                        0.0f);
9421
    Offset = _mm_mul_ps(Offset,Scale);
9422
    Offset = _mm_add_ps(Offset,D);
9423
 
9424
    Transform = XMMatrixMultiply(World, View);
9425
    Transform = XMMatrixMultiply(Transform, Projection);
9426
    Transform = XMMatrixInverse(&Determinant, Transform);
9427
 
9428
    for (i = 0; i < VectorCount; i++)
9429
    {
9430
        V = XMLoadFloat3((XMFLOAT3*)pInputVector);
9431
 
9432
        Result = XMVectorMultiplyAdd(V, Scale, Offset);
9433
 
9434
        Result = XMVector3TransformCoord(Result, Transform);
9435
 
9436
        XMStoreFloat3((XMFLOAT3*)pOutputVector, Result);
9437
 
9438
        pInputVector += InputStride; 
9439
        pOutputVector += OutputStride;
9440
    }
9441
 
9442
    return pOutputStream;
9443
#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
9444
#endif // _XM_VMX128_INTRINSICS_
9445
}
9446
 
9447
/****************************************************************************
9448
 *
9449
 * 4D Vector
9450
 *
9451
 ****************************************************************************/
9452
 
9453
//------------------------------------------------------------------------------
9454
// Comparison operations
9455
//------------------------------------------------------------------------------
9456
 
9457
//------------------------------------------------------------------------------
9458
 
9459
XMFINLINE BOOL XMVector4Equal
9460
(
9461
    FXMVECTOR V1, 
9462
    FXMVECTOR V2
9463
)
9464
{
9465
#if defined(_XM_NO_INTRINSICS_)
9466
    return (((V1.vector4_f32[0] == V2.vector4_f32[0]) && (V1.vector4_f32[1] == V2.vector4_f32[1]) && (V1.vector4_f32[2] == V2.vector4_f32[2]) && (V1.vector4_f32[3] == V2.vector4_f32[3])) != 0);
9467
#elif defined(_XM_SSE_INTRINSICS_)
9468
    XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
9469
    return ((_mm_movemask_ps(vTemp)==0x0f) != 0);
9470
#else
9471
    return XMComparisonAllTrue(XMVector4EqualR(V1, V2));
9472
#endif
9473
}
9474
 
9475
//------------------------------------------------------------------------------
9476
 
9477
XMFINLINE UINT XMVector4EqualR
9478
(
9479
    FXMVECTOR V1, 
9480
    FXMVECTOR V2
9481
)
9482
{
9483
#if defined(_XM_NO_INTRINSICS_)
9484
 
9485
    UINT CR = 0;
9486
 
9487
    if ((V1.vector4_f32[0] == V2.vector4_f32[0]) && 
9488
        (V1.vector4_f32[1] == V2.vector4_f32[1]) &&
9489
        (V1.vector4_f32[2] == V2.vector4_f32[2]) &&
9490
        (V1.vector4_f32[3] == V2.vector4_f32[3]))
9491
    {
9492
        CR = XM_CRMASK_CR6TRUE;
9493
    }
9494
    else if ((V1.vector4_f32[0] != V2.vector4_f32[0]) && 
9495
        (V1.vector4_f32[1] != V2.vector4_f32[1]) &&
9496
        (V1.vector4_f32[2] != V2.vector4_f32[2]) &&
9497
        (V1.vector4_f32[3] != V2.vector4_f32[3]))
9498
    {
9499
        CR = XM_CRMASK_CR6FALSE;
9500
    }
9501
    return CR;
9502
 
9503
#elif defined(_XM_SSE_INTRINSICS_)
9504
    XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
9505
    int iTest = _mm_movemask_ps(vTemp);
9506
    UINT CR = 0;
9507
    if (iTest==0xf)     // All equal?
9508
    {
9509
        CR = XM_CRMASK_CR6TRUE;
9510
    }
9511
    else if (iTest==0)  // All not equal?
9512
    {
9513
        CR = XM_CRMASK_CR6FALSE;
9514
    }
9515
	return CR;
9516
#else // _XM_VMX128_INTRINSICS_
9517
#endif // _XM_VMX128_INTRINSICS_
9518
}
9519
 
9520
//------------------------------------------------------------------------------
9521
 
9522
XMFINLINE BOOL XMVector4EqualInt
9523
(
9524
    FXMVECTOR V1, 
9525
    FXMVECTOR V2
9526
)
9527
{
9528
#if defined(_XM_NO_INTRINSICS_)
9529
    return (((V1.vector4_u32[0] == V2.vector4_u32[0]) && (V1.vector4_u32[1] == V2.vector4_u32[1]) && (V1.vector4_u32[2] == V2.vector4_u32[2]) && (V1.vector4_u32[3] == V2.vector4_u32[3])) != 0);
9530
#elif defined(_XM_SSE_INTRINSICS_)
9531
    __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]);
9532
    return ((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])==0xf) != 0);
9533
#else
9534
    return XMComparisonAllTrue(XMVector4EqualIntR(V1, V2));
9535
#endif
9536
}
9537
 
9538
//------------------------------------------------------------------------------
9539
 
9540
XMFINLINE UINT XMVector4EqualIntR
9541
(
9542
    FXMVECTOR V1, 
9543
    FXMVECTOR V2
9544
)
9545
{
9546
#if defined(_XM_NO_INTRINSICS_)
9547
    UINT CR = 0;
9548
    if (V1.vector4_u32[0] == V2.vector4_u32[0] && 
9549
        V1.vector4_u32[1] == V2.vector4_u32[1] &&
9550
        V1.vector4_u32[2] == V2.vector4_u32[2] &&
9551
        V1.vector4_u32[3] == V2.vector4_u32[3])
9552
    {
9553
        CR = XM_CRMASK_CR6TRUE;
9554
    }
9555
    else if (V1.vector4_u32[0] != V2.vector4_u32[0] && 
9556
        V1.vector4_u32[1] != V2.vector4_u32[1] &&
9557
        V1.vector4_u32[2] != V2.vector4_u32[2] &&
9558
        V1.vector4_u32[3] != V2.vector4_u32[3])
9559
    {
9560
        CR = XM_CRMASK_CR6FALSE;
9561
    }
9562
    return CR;
9563
 
9564
#elif defined(_XM_SSE_INTRINSICS_)
9565
    __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]);
9566
    int iTest = _mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0]);
9567
    UINT CR = 0;
9568
    if (iTest==0xf)     // All equal?
9569
    {
9570
        CR = XM_CRMASK_CR6TRUE;
9571
    }
9572
    else if (iTest==0)  // All not equal?
9573
    {
9574
        CR = XM_CRMASK_CR6FALSE;
9575
    }
9576
	return CR;
9577
#else // _XM_VMX128_INTRINSICS_
9578
#endif // _XM_VMX128_INTRINSICS_
9579
}
9580
 
9581
XMFINLINE BOOL XMVector4NearEqual
9582
(
9583
    FXMVECTOR V1, 
9584
    FXMVECTOR V2, 
9585
    FXMVECTOR Epsilon
9586
)
9587
{
9588
#if defined(_XM_NO_INTRINSICS_)
9589
    FLOAT dx, dy, dz, dw;
9590
 
9591
    dx = fabsf(V1.vector4_f32[0]-V2.vector4_f32[0]);
9592
    dy = fabsf(V1.vector4_f32[1]-V2.vector4_f32[1]);
9593
    dz = fabsf(V1.vector4_f32[2]-V2.vector4_f32[2]);
9594
    dw = fabsf(V1.vector4_f32[3]-V2.vector4_f32[3]);
9595
    return (((dx <= Epsilon.vector4_f32[0]) &&
9596
            (dy <= Epsilon.vector4_f32[1]) &&
9597
            (dz <= Epsilon.vector4_f32[2]) &&
9598
            (dw <= Epsilon.vector4_f32[3])) != 0);
9599
#elif defined(_XM_SSE_INTRINSICS_)
9600
    // Get the difference
9601
    XMVECTOR vDelta = _mm_sub_ps(V1,V2);
9602
    // Get the absolute value of the difference
9603
    XMVECTOR vTemp = _mm_setzero_ps();
9604
    vTemp = _mm_sub_ps(vTemp,vDelta);
9605
    vTemp = _mm_max_ps(vTemp,vDelta);
9606
    vTemp = _mm_cmple_ps(vTemp,Epsilon);
9607
    return ((_mm_movemask_ps(vTemp)==0xf) != 0);
9608
#else // _XM_VMX128_INTRINSICS_
9609
#endif // _XM_VMX128_INTRINSICS_
9610
}
9611
 
9612
//------------------------------------------------------------------------------
9613
 
9614
XMFINLINE BOOL XMVector4NotEqual
9615
(
9616
    FXMVECTOR V1, 
9617
    FXMVECTOR V2
9618
)
9619
{
9620
#if defined(_XM_NO_INTRINSICS_)
9621
    return (((V1.vector4_f32[0] != V2.vector4_f32[0]) || (V1.vector4_f32[1] != V2.vector4_f32[1]) || (V1.vector4_f32[2] != V2.vector4_f32[2]) || (V1.vector4_f32[3] != V2.vector4_f32[3])) != 0);
9622
#elif defined(_XM_SSE_INTRINSICS_)
9623
    XMVECTOR vTemp = _mm_cmpneq_ps(V1,V2);
9624
    return ((_mm_movemask_ps(vTemp)) != 0);
9625
#else
9626
    return XMComparisonAnyFalse(XMVector4EqualR(V1, V2));
9627
#endif
9628
}
9629
 
9630
//------------------------------------------------------------------------------
9631
 
9632
XMFINLINE BOOL XMVector4NotEqualInt
9633
(
9634
    FXMVECTOR V1, 
9635
    FXMVECTOR V2
9636
)
9637
{
9638
#if defined(_XM_NO_INTRINSICS_)
9639
    return (((V1.vector4_u32[0] != V2.vector4_u32[0]) || (V1.vector4_u32[1] != V2.vector4_u32[1]) || (V1.vector4_u32[2] != V2.vector4_u32[2]) || (V1.vector4_u32[3] != V2.vector4_u32[3])) != 0);
9640
#elif defined(_XM_SSE_INTRINSICS_)
9641
    __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]);
9642
    return ((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])!=0xF) != 0);
9643
#else
9644
    return XMComparisonAnyFalse(XMVector4EqualIntR(V1, V2));
9645
#endif
9646
}
9647
 
9648
//------------------------------------------------------------------------------
9649
 
9650
XMFINLINE BOOL XMVector4Greater
9651
(
9652
    FXMVECTOR V1, 
9653
    FXMVECTOR V2
9654
)
9655
{
9656
#if defined(_XM_NO_INTRINSICS_)
9657
    return (((V1.vector4_f32[0] > V2.vector4_f32[0]) && (V1.vector4_f32[1] > V2.vector4_f32[1]) && (V1.vector4_f32[2] > V2.vector4_f32[2]) && (V1.vector4_f32[3] > V2.vector4_f32[3])) != 0);
9658
#elif defined(_XM_SSE_INTRINSICS_)
9659
    XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
9660
    return ((_mm_movemask_ps(vTemp)==0x0f) != 0);
9661
#else
9662
    return XMComparisonAllTrue(XMVector4GreaterR(V1, V2));
9663
#endif
9664
}
9665
 
9666
//------------------------------------------------------------------------------
9667
 
9668
XMFINLINE UINT XMVector4GreaterR
9669
(
9670
    FXMVECTOR V1, 
9671
    FXMVECTOR V2
9672
)
9673
{
9674
#if defined(_XM_NO_INTRINSICS_)
9675
    UINT CR = 0;
9676
    if (V1.vector4_f32[0] > V2.vector4_f32[0] && 
9677
        V1.vector4_f32[1] > V2.vector4_f32[1] &&
9678
        V1.vector4_f32[2] > V2.vector4_f32[2] &&
9679
        V1.vector4_f32[3] > V2.vector4_f32[3])
9680
    {
9681
        CR = XM_CRMASK_CR6TRUE;
9682
    }
9683
    else if (V1.vector4_f32[0] <= V2.vector4_f32[0] && 
9684
        V1.vector4_f32[1] <= V2.vector4_f32[1] &&
9685
        V1.vector4_f32[2] <= V2.vector4_f32[2] &&
9686
        V1.vector4_f32[3] <= V2.vector4_f32[3])
9687
    {
9688
        CR = XM_CRMASK_CR6FALSE;
9689
    }
9690
    return CR;
9691
 
9692
#elif defined(_XM_SSE_INTRINSICS_)
9693
    UINT CR = 0;
9694
	XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
9695
    int iTest = _mm_movemask_ps(vTemp);
9696
    if (iTest==0xf) {
9697
        CR = XM_CRMASK_CR6TRUE;
9698
    }
9699
    else if (!iTest)
9700
    {
9701
        CR = XM_CRMASK_CR6FALSE;
9702
    }
9703
    return CR;
9704
#else // _XM_VMX128_INTRINSICS_
9705
#endif // _XM_VMX128_INTRINSICS_
9706
}
9707
 
9708
//------------------------------------------------------------------------------
9709
 
9710
XMFINLINE BOOL XMVector4GreaterOrEqual
9711
(
9712
    FXMVECTOR V1, 
9713
    FXMVECTOR V2
9714
)
9715
{
9716
#if defined(_XM_NO_INTRINSICS_)
9717
    return (((V1.vector4_f32[0] >= V2.vector4_f32[0]) && (V1.vector4_f32[1] >= V2.vector4_f32[1]) && (V1.vector4_f32[2] >= V2.vector4_f32[2]) && (V1.vector4_f32[3] >= V2.vector4_f32[3])) != 0);
9718
#elif defined(_XM_SSE_INTRINSICS_)
9719
    XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
9720
    return ((_mm_movemask_ps(vTemp)==0x0f) != 0);
9721
#else
9722
    return XMComparisonAllTrue(XMVector4GreaterOrEqualR(V1, V2));
9723
#endif
9724
}
9725
 
9726
//------------------------------------------------------------------------------
9727
 
9728
XMFINLINE UINT XMVector4GreaterOrEqualR
9729
(
9730
    FXMVECTOR V1, 
9731
    FXMVECTOR V2
9732
)
9733
{
9734
#if defined(_XM_NO_INTRINSICS_)
9735
    UINT CR = 0;
9736
    if ((V1.vector4_f32[0] >= V2.vector4_f32[0]) && 
9737
        (V1.vector4_f32[1] >= V2.vector4_f32[1]) &&
9738
        (V1.vector4_f32[2] >= V2.vector4_f32[2]) &&
9739
        (V1.vector4_f32[3] >= V2.vector4_f32[3]))
9740
    {
9741
        CR = XM_CRMASK_CR6TRUE;
9742
    }
9743
    else if ((V1.vector4_f32[0] < V2.vector4_f32[0]) && 
9744
        (V1.vector4_f32[1] < V2.vector4_f32[1]) &&
9745
        (V1.vector4_f32[2] < V2.vector4_f32[2]) &&
9746
        (V1.vector4_f32[3] < V2.vector4_f32[3]))
9747
    {
9748
        CR = XM_CRMASK_CR6FALSE;
9749
    }
9750
    return CR;
9751
 
9752
#elif defined(_XM_SSE_INTRINSICS_)
9753
    UINT CR = 0;
9754
	XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
9755
	int iTest = _mm_movemask_ps(vTemp);
9756
    if (iTest==0x0f)
9757
    {
9758
        CR = XM_CRMASK_CR6TRUE;
9759
    }
9760
    else if (!iTest)
9761
    {
9762
        CR = XM_CRMASK_CR6FALSE;
9763
    }
9764
    return CR;
9765
#else // _XM_VMX128_INTRINSICS_
9766
#endif // _XM_VMX128_INTRINSICS_
9767
}
9768
 
9769
//------------------------------------------------------------------------------
9770
 
9771
XMFINLINE BOOL XMVector4Less
9772
(
9773
    FXMVECTOR V1, 
9774
    FXMVECTOR V2
9775
)
9776
{
9777
#if defined(_XM_NO_INTRINSICS_)
9778
    return (((V1.vector4_f32[0] < V2.vector4_f32[0]) && (V1.vector4_f32[1] < V2.vector4_f32[1]) && (V1.vector4_f32[2] < V2.vector4_f32[2]) && (V1.vector4_f32[3] < V2.vector4_f32[3])) != 0);
9779
#elif defined(_XM_SSE_INTRINSICS_)
9780
    XMVECTOR vTemp = _mm_cmplt_ps(V1,V2);
9781
    return ((_mm_movemask_ps(vTemp)==0x0f) != 0);
9782
#else
9783
    return XMComparisonAllTrue(XMVector4GreaterR(V2, V1));
9784
#endif
9785
}
9786
 
9787
//------------------------------------------------------------------------------
9788
 
9789
XMFINLINE BOOL XMVector4LessOrEqual
9790
(
9791
    FXMVECTOR V1, 
9792
    FXMVECTOR V2
9793
)
9794
{
9795
#if defined(_XM_NO_INTRINSICS_)
9796
    return (((V1.vector4_f32[0] <= V2.vector4_f32[0]) && (V1.vector4_f32[1] <= V2.vector4_f32[1]) && (V1.vector4_f32[2] <= V2.vector4_f32[2]) && (V1.vector4_f32[3] <= V2.vector4_f32[3])) != 0);
9797
#elif defined(_XM_SSE_INTRINSICS_)
9798
    XMVECTOR vTemp = _mm_cmple_ps(V1,V2);
9799
    return ((_mm_movemask_ps(vTemp)==0x0f) != 0);
9800
#else
9801
    return XMComparisonAllTrue(XMVector4GreaterOrEqualR(V2, V1));
9802
#endif
9803
}
9804
 
9805
//------------------------------------------------------------------------------
9806
 
9807
XMFINLINE BOOL XMVector4InBounds
9808
(
9809
    FXMVECTOR V, 
9810
    FXMVECTOR Bounds
9811
)
9812
{
9813
#if defined(_XM_NO_INTRINSICS_)
9814
    return (((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) && 
9815
        (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) &&
9816
        (V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2]) &&
9817
        (V.vector4_f32[3] <= Bounds.vector4_f32[3] && V.vector4_f32[3] >= -Bounds.vector4_f32[3])) != 0);
9818
#elif defined(_XM_SSE_INTRINSICS_)
9819
    // Test if less than or equal
9820
    XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
9821
    // Negate the bounds
9822
    XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
9823
    // Test if greater or equal (Reversed)
9824
    vTemp2 = _mm_cmple_ps(vTemp2,V);
9825
    // Blend answers
9826
    vTemp1 = _mm_and_ps(vTemp1,vTemp2);
9827
    // All in bounds?
9828
    return ((_mm_movemask_ps(vTemp1)==0x0f) != 0);
9829
#else
9830
    return XMComparisonAllInBounds(XMVector4InBoundsR(V, Bounds));
9831
#endif
9832
}
9833
 
9834
//------------------------------------------------------------------------------
9835
 
9836
XMFINLINE UINT XMVector4InBoundsR
9837
(
9838
    FXMVECTOR V, 
9839
    FXMVECTOR Bounds
9840
)
9841
{
9842
#if defined(_XM_NO_INTRINSICS_)
9843
 
9844
    UINT CR = 0;
9845
    if ((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) && 
9846
        (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) &&
9847
        (V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2]) &&
9848
        (V.vector4_f32[3] <= Bounds.vector4_f32[3] && V.vector4_f32[3] >= -Bounds.vector4_f32[3]))
9849
    {
9850
        CR = XM_CRMASK_CR6BOUNDS;
9851
    }
9852
    return CR;
9853
 
9854
#elif defined(_XM_SSE_INTRINSICS_)
9855
    // Test if less than or equal
9856
    XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
9857
    // Negate the bounds
9858
    XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
9859
    // Test if greater or equal (Reversed)
9860
    vTemp2 = _mm_cmple_ps(vTemp2,V);
9861
    // Blend answers
9862
    vTemp1 = _mm_and_ps(vTemp1,vTemp2);
9863
    // All in bounds?
9864
    return (_mm_movemask_ps(vTemp1)==0x0f) ? XM_CRMASK_CR6BOUNDS : 0;
9865
#else // _XM_VMX128_INTRINSICS_
9866
#endif // _XM_VMX128_INTRINSICS_
9867
}
9868
 
9869
//------------------------------------------------------------------------------
9870
 
9871
XMFINLINE BOOL XMVector4IsNaN
9872
(
9873
    FXMVECTOR V
9874
)
9875
{
9876
#if defined(_XM_NO_INTRINSICS_)
9877
    return (XMISNAN(V.vector4_f32[0]) ||
9878
            XMISNAN(V.vector4_f32[1]) ||
9879
            XMISNAN(V.vector4_f32[2]) ||
9880
            XMISNAN(V.vector4_f32[3]));
9881
#elif defined(_XM_SSE_INTRINSICS_)
9882
    // Test against itself. NaN is always not equal
9883
    XMVECTOR vTempNan = _mm_cmpneq_ps(V,V);
9884
    // If any are NaN, the mask is non-zero
9885
    return (_mm_movemask_ps(vTempNan)!=0);
9886
#else // _XM_VMX128_INTRINSICS_
9887
#endif // _XM_VMX128_INTRINSICS_
9888
}
9889
 
9890
//------------------------------------------------------------------------------
9891
 
9892
XMFINLINE BOOL XMVector4IsInfinite
9893
(
9894
    FXMVECTOR V
9895
)
9896
{
9897
#if defined(_XM_NO_INTRINSICS_)
9898
 
9899
    return (XMISINF(V.vector4_f32[0]) ||
9900
            XMISINF(V.vector4_f32[1]) ||
9901
            XMISINF(V.vector4_f32[2]) ||
9902
            XMISINF(V.vector4_f32[3]));
9903
 
9904
#elif defined(_XM_SSE_INTRINSICS_)
9905
    // Mask off the sign bit
9906
    XMVECTOR vTemp = _mm_and_ps(V,g_XMAbsMask);
9907
    // Compare to infinity
9908
    vTemp = _mm_cmpeq_ps(vTemp,g_XMInfinity);
9909
    // If any are infinity, the signs are true.
9910
    return (_mm_movemask_ps(vTemp) != 0);
9911
#else // _XM_VMX128_INTRINSICS_
9912
#endif // _XM_VMX128_INTRINSICS_
9913
}
9914
 
9915
//------------------------------------------------------------------------------
9916
// Computation operations
9917
//------------------------------------------------------------------------------
9918
 
9919
//------------------------------------------------------------------------------
9920
 
9921
XMFINLINE XMVECTOR XMVector4Dot
9922
(
9923
    FXMVECTOR V1, 
9924
    FXMVECTOR V2
9925
)
9926
{
9927
#if defined(_XM_NO_INTRINSICS_)
9928
 
9929
    XMVECTOR Result;
9930
 
9931
    Result.vector4_f32[0] =
9932
    Result.vector4_f32[1] =
9933
    Result.vector4_f32[2] =
9934
    Result.vector4_f32[3] = V1.vector4_f32[0] * V2.vector4_f32[0] + V1.vector4_f32[1] * V2.vector4_f32[1] + V1.vector4_f32[2] * V2.vector4_f32[2] + V1.vector4_f32[3] * V2.vector4_f32[3];
9935
 
9936
    return Result;
9937
 
9938
#elif defined(_XM_SSE_INTRINSICS_)
9939
    XMVECTOR vTemp2 = V2;
9940
    XMVECTOR vTemp = _mm_mul_ps(V1,vTemp2);
9941
    vTemp2 = _mm_shuffle_ps(vTemp2,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position
9942
    vTemp2 = _mm_add_ps(vTemp2,vTemp);          // Add Z = X+Z; W = Y+W;
9943
    vTemp = _mm_shuffle_ps(vTemp,vTemp2,_MM_SHUFFLE(0,3,0,0));  // Copy W to the Z position
9944
    vTemp = _mm_add_ps(vTemp,vTemp2);           // Add Z and W together
9945
    return _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2));    // Splat Z and return
9946
#else // _XM_VMX128_INTRINSICS_
9947
#endif // _XM_VMX128_INTRINSICS_
9948
}
9949
 
9950
//------------------------------------------------------------------------------
9951
 
9952
XMFINLINE XMVECTOR XMVector4Cross
9953
(
9954
    FXMVECTOR V1, 
9955
    FXMVECTOR V2, 
9956
    FXMVECTOR V3
9957
)
9958
{
9959
#if defined(_XM_NO_INTRINSICS_)
9960
    XMVECTOR Result;   
9961
 
9962
    Result.vector4_f32[0] = (((V2.vector4_f32[2]*V3.vector4_f32[3])-(V2.vector4_f32[3]*V3.vector4_f32[2]))*V1.vector4_f32[1])-(((V2.vector4_f32[1]*V3.vector4_f32[3])-(V2.vector4_f32[3]*V3.vector4_f32[1]))*V1.vector4_f32[2])+(((V2.vector4_f32[1]*V3.vector4_f32[2])-(V2.vector4_f32[2]*V3.vector4_f32[1]))*V1.vector4_f32[3]);
9963
    Result.vector4_f32[1] = (((V2.vector4_f32[3]*V3.vector4_f32[2])-(V2.vector4_f32[2]*V3.vector4_f32[3]))*V1.vector4_f32[0])-(((V2.vector4_f32[3]*V3.vector4_f32[0])-(V2.vector4_f32[0]*V3.vector4_f32[3]))*V1.vector4_f32[2])+(((V2.vector4_f32[2]*V3.vector4_f32[0])-(V2.vector4_f32[0]*V3.vector4_f32[2]))*V1.vector4_f32[3]);
9964
    Result.vector4_f32[2] = (((V2.vector4_f32[1]*V3.vector4_f32[3])-(V2.vector4_f32[3]*V3.vector4_f32[1]))*V1.vector4_f32[0])-(((V2.vector4_f32[0]*V3.vector4_f32[3])-(V2.vector4_f32[3]*V3.vector4_f32[0]))*V1.vector4_f32[1])+(((V2.vector4_f32[0]*V3.vector4_f32[1])-(V2.vector4_f32[1]*V3.vector4_f32[0]))*V1.vector4_f32[3]);
9965
    Result.vector4_f32[3] = (((V2.vector4_f32[2]*V3.vector4_f32[1])-(V2.vector4_f32[1]*V3.vector4_f32[2]))*V1.vector4_f32[0])-(((V2.vector4_f32[2]*V3.vector4_f32[0])-(V2.vector4_f32[0]*V3.vector4_f32[2]))*V1.vector4_f32[1])+(((V2.vector4_f32[1]*V3.vector4_f32[0])-(V2.vector4_f32[0]*V3.vector4_f32[1]))*V1.vector4_f32[2]);
9966
    return Result;
9967
 
9968
#elif defined(_XM_SSE_INTRINSICS_)
9969
    // V2zwyz * V3wzwy
9970
    XMVECTOR vResult = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(2,1,3,2));
9971
    XMVECTOR vTemp3 = _mm_shuffle_ps(V3,V3,_MM_SHUFFLE(1,3,2,3));
9972
    vResult = _mm_mul_ps(vResult,vTemp3);
9973
    // - V2wzwy * V3zwyz
9974
    XMVECTOR vTemp2 = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(1,3,2,3));
9975
    vTemp3 = _mm_shuffle_ps(vTemp3,vTemp3,_MM_SHUFFLE(1,3,0,1));
9976
    vTemp2 = _mm_mul_ps(vTemp2,vTemp3);
9977
    vResult = _mm_sub_ps(vResult,vTemp2);
9978
    // term1 * V1yxxx
9979
    XMVECTOR vTemp1 = _mm_shuffle_ps(V1,V1,_MM_SHUFFLE(0,0,0,1));
9980
    vResult = _mm_mul_ps(vResult,vTemp1);
9981
 
9982
    // V2ywxz * V3wxwx
9983
    vTemp2 = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(2,0,3,1));
9984
    vTemp3 = _mm_shuffle_ps(V3,V3,_MM_SHUFFLE(0,3,0,3));
9985
    vTemp3 = _mm_mul_ps(vTemp3,vTemp2);
9986
    // - V2wxwx * V3ywxz
9987
    vTemp2 = _mm_shuffle_ps(vTemp2,vTemp2,_MM_SHUFFLE(2,1,2,1));
9988
    vTemp1 = _mm_shuffle_ps(V3,V3,_MM_SHUFFLE(2,0,3,1));
9989
    vTemp2 = _mm_mul_ps(vTemp2,vTemp1);
9990
    vTemp3 = _mm_sub_ps(vTemp3,vTemp2);
9991
    // vResult - temp * V1zzyy
9992
    vTemp1 = _mm_shuffle_ps(V1,V1,_MM_SHUFFLE(1,1,2,2));
9993
    vTemp1 = _mm_mul_ps(vTemp1,vTemp3);
9994
    vResult = _mm_sub_ps(vResult,vTemp1);
9995
 
9996
    // V2yzxy * V3zxyx
9997
    vTemp2 = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(1,0,2,1));
9998
    vTemp3 = _mm_shuffle_ps(V3,V3,_MM_SHUFFLE(0,1,0,2));
9999
    vTemp3 = _mm_mul_ps(vTemp3,vTemp2);
10000
    // - V2zxyx * V3yzxy
10001
    vTemp2 = _mm_shuffle_ps(vTemp2,vTemp2,_MM_SHUFFLE(2,0,2,1));
10002
    vTemp1 = _mm_shuffle_ps(V3,V3,_MM_SHUFFLE(1,0,2,1));
10003
    vTemp1 = _mm_mul_ps(vTemp1,vTemp2);
10004
    vTemp3 = _mm_sub_ps(vTemp3,vTemp1);
10005
    // vResult + term * V1wwwz
10006
    vTemp1 = _mm_shuffle_ps(V1,V1,_MM_SHUFFLE(2,3,3,3));
10007
    vTemp3 = _mm_mul_ps(vTemp3,vTemp1);
10008
    vResult = _mm_add_ps(vResult,vTemp3);
10009
    return vResult;
10010
#else // _XM_VMX128_INTRINSICS_
10011
#endif // _XM_VMX128_INTRINSICS_
10012
}
10013
 
10014
//------------------------------------------------------------------------------
10015
 
10016
XMFINLINE XMVECTOR XMVector4LengthSq
10017
(
10018
    FXMVECTOR V
10019
)
10020
{
10021
    return XMVector4Dot(V, V);
10022
}
10023
 
10024
//------------------------------------------------------------------------------
10025
 
10026
XMFINLINE XMVECTOR XMVector4ReciprocalLengthEst
10027
(
10028
    FXMVECTOR V
10029
)
10030
{
10031
#if defined(_XM_NO_INTRINSICS_)
10032
 
10033
    XMVECTOR Result;
10034
 
10035
    Result = XMVector4LengthSq(V);
10036
    Result = XMVectorReciprocalSqrtEst(Result);
10037
 
10038
    return Result;
10039
 
10040
#elif defined(_XM_SSE_INTRINSICS_)
10041
    // Perform the dot product on x,y,z and w
10042
    XMVECTOR vLengthSq = _mm_mul_ps(V,V);
10043
    // vTemp has z and w
10044
    XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2));
10045
    // x+z, y+w
10046
    vLengthSq = _mm_add_ps(vLengthSq,vTemp);
10047
    // x+z,x+z,x+z,y+w
10048
    vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0));
10049
    // ??,??,y+w,y+w
10050
    vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0));
10051
    // ??,??,x+z+y+w,??
10052
    vLengthSq = _mm_add_ps(vLengthSq,vTemp);
10053
    // Splat the length
10054
	vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2));
10055
    // Get the reciprocal
10056
    vLengthSq = _mm_rsqrt_ps(vLengthSq);
10057
    return vLengthSq;
10058
#else // _XM_VMX128_INTRINSICS_
10059
#endif // _XM_VMX128_INTRINSICS_
10060
}
10061
 
10062
//------------------------------------------------------------------------------
10063
 
10064
XMFINLINE XMVECTOR XMVector4ReciprocalLength
10065
(
10066
    FXMVECTOR V
10067
)
10068
{
10069
#if defined(_XM_NO_INTRINSICS_)
10070
 
10071
    XMVECTOR Result;
10072
 
10073
    Result = XMVector4LengthSq(V);
10074
    Result = XMVectorReciprocalSqrt(Result);
10075
 
10076
    return Result;
10077
 
10078
#elif defined(_XM_SSE_INTRINSICS_)
10079
    // Perform the dot product on x,y,z and w
10080
    XMVECTOR vLengthSq = _mm_mul_ps(V,V);
10081
    // vTemp has z and w
10082
    XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2));
10083
    // x+z, y+w
10084
    vLengthSq = _mm_add_ps(vLengthSq,vTemp);
10085
    // x+z,x+z,x+z,y+w
10086
    vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0));
10087
    // ??,??,y+w,y+w
10088
    vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0));
10089
    // ??,??,x+z+y+w,??
10090
    vLengthSq = _mm_add_ps(vLengthSq,vTemp);
10091
    // Splat the length
10092
	vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2));
10093
    // Get the reciprocal
10094
    vLengthSq = _mm_sqrt_ps(vLengthSq);
10095
    // Accurate!
10096
    vLengthSq = _mm_div_ps(g_XMOne,vLengthSq);
10097
    return vLengthSq;
10098
#else // _XM_VMX128_INTRINSICS_
10099
#endif // _XM_VMX128_INTRINSICS_
10100
}
10101
 
10102
//------------------------------------------------------------------------------
10103
 
10104
XMFINLINE XMVECTOR XMVector4LengthEst
10105
(
10106
    FXMVECTOR V
10107
)
10108
{
10109
#if defined(_XM_NO_INTRINSICS_)
10110
 
10111
    XMVECTOR Result;
10112
 
10113
    Result = XMVector4LengthSq(V);
10114
    Result = XMVectorSqrtEst(Result);
10115
 
10116
    return Result;
10117
 
10118
#elif defined(_XM_SSE_INTRINSICS_)
10119
    // Perform the dot product on x,y,z and w
10120
    XMVECTOR vLengthSq = _mm_mul_ps(V,V);
10121
    // vTemp has z and w
10122
    XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2));
10123
    // x+z, y+w
10124
    vLengthSq = _mm_add_ps(vLengthSq,vTemp);
10125
    // x+z,x+z,x+z,y+w
10126
    vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0));
10127
    // ??,??,y+w,y+w
10128
    vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0));
10129
    // ??,??,x+z+y+w,??
10130
    vLengthSq = _mm_add_ps(vLengthSq,vTemp);
10131
    // Splat the length
10132
	vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2));
10133
    // Prepare for the division
10134
    vLengthSq = _mm_sqrt_ps(vLengthSq);
10135
    return vLengthSq;
10136
#else // _XM_VMX128_INTRINSICS_
10137
#endif // _XM_VMX128_INTRINSICS_
10138
}
10139
 
10140
//------------------------------------------------------------------------------
10141
 
10142
XMFINLINE XMVECTOR XMVector4Length
10143
(
10144
    FXMVECTOR V
10145
)
10146
{
10147
#if defined(_XM_NO_INTRINSICS_) 
10148
 
10149
    XMVECTOR Result;
10150
 
10151
    Result = XMVector4LengthSq(V);
10152
    Result = XMVectorSqrt(Result);
10153
 
10154
    return Result;
10155
 
10156
#elif defined(_XM_SSE_INTRINSICS_)
10157
    // Perform the dot product on x,y,z and w
10158
    XMVECTOR vLengthSq = _mm_mul_ps(V,V);
10159
    // vTemp has z and w
10160
    XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2));
10161
    // x+z, y+w
10162
    vLengthSq = _mm_add_ps(vLengthSq,vTemp);
10163
    // x+z,x+z,x+z,y+w
10164
    vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0));
10165
    // ??,??,y+w,y+w
10166
    vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0));
10167
    // ??,??,x+z+y+w,??
10168
    vLengthSq = _mm_add_ps(vLengthSq,vTemp);
10169
    // Splat the length
10170
	vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2));
10171
    // Prepare for the division
10172
    vLengthSq = _mm_sqrt_ps(vLengthSq);
10173
    return vLengthSq;
10174
#else // _XM_VMX128_INTRINSICS_
10175
#endif // _XM_VMX128_INTRINSICS_
10176
}
10177
 
10178
//------------------------------------------------------------------------------
10179
// XMVector4NormalizeEst uses a reciprocal estimate and
10180
// returns QNaN on zero and infinite vectors.
10181
 
10182
XMFINLINE XMVECTOR XMVector4NormalizeEst
10183
(
10184
    FXMVECTOR V
10185
)
10186
{
10187
#if defined(_XM_NO_INTRINSICS_)
10188
 
10189
    XMVECTOR Result;
10190
    Result = XMVector4ReciprocalLength(V);
10191
    Result = XMVectorMultiply(V, Result);
10192
    return Result;
10193
 
10194
#elif defined(_XM_SSE_INTRINSICS_)
10195
    // Perform the dot product on x,y,z and w
10196
    XMVECTOR vLengthSq = _mm_mul_ps(V,V);
10197
    // vTemp has z and w
10198
    XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2));
10199
    // x+z, y+w
10200
    vLengthSq = _mm_add_ps(vLengthSq,vTemp);
10201
    // x+z,x+z,x+z,y+w
10202
    vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0));
10203
    // ??,??,y+w,y+w
10204
    vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0));
10205
    // ??,??,x+z+y+w,??
10206
    vLengthSq = _mm_add_ps(vLengthSq,vTemp);
10207
    // Splat the length
10208
	vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2));
10209
    // Prepare for the division
10210
    XMVECTOR vResult = _mm_rsqrt_ps(vLengthSq);
10211
    // Failsafe on zero (Or epsilon) length planes
10212
    // If the length is infinity, set the elements to zero
10213
    vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
10214
    // Reciprocal mul to perform the normalization
10215
    vResult = _mm_mul_ps(vResult,V);
10216
    // Any that are infinity, set to zero
10217
    vResult = _mm_and_ps(vResult,vLengthSq);
10218
    return vResult;
10219
#else // _XM_VMX128_INTRINSICS_
10220
#endif // _XM_VMX128_INTRINSICS_
10221
}
10222
 
10223
//------------------------------------------------------------------------------
10224
 
10225
XMFINLINE XMVECTOR XMVector4Normalize
10226
(
10227
    FXMVECTOR V
10228
)
10229
{
10230
#if defined(_XM_NO_INTRINSICS_)
10231
 
10232
    XMVECTOR LengthSq;
10233
    XMVECTOR Zero;
10234
    XMVECTOR InfiniteLength;
10235
    XMVECTOR ZeroLength;
10236
    XMVECTOR Select;
10237
    XMVECTOR Result;
10238
 
10239
    LengthSq = XMVector4LengthSq(V);
10240
    Zero = XMVectorZero();
10241
    Result = XMVectorReciprocalSqrt(LengthSq);
10242
    InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity.v);
10243
    ZeroLength = XMVectorEqual(LengthSq, Zero);
10244
    Result = XMVectorMultiply(V, Result);
10245
    Select = XMVectorEqualInt(InfiniteLength, ZeroLength);
10246
    Result = XMVectorSelect(LengthSq, Result, Select);
10247
 
10248
    return Result;
10249
 
10250
#elif defined(_XM_SSE_INTRINSICS_)
10251
    // Perform the dot product on x,y,z and w
10252
    XMVECTOR vLengthSq = _mm_mul_ps(V,V);
10253
    // vTemp has z and w
10254
    XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2));
10255
    // x+z, y+w
10256
    vLengthSq = _mm_add_ps(vLengthSq,vTemp);
10257
    // x+z,x+z,x+z,y+w
10258
    vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0));
10259
    // ??,??,y+w,y+w
10260
    vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0));
10261
    // ??,??,x+z+y+w,??
10262
    vLengthSq = _mm_add_ps(vLengthSq,vTemp);
10263
    // Splat the length
10264
	vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2));
10265
    // Prepare for the division
10266
    XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
10267
    // Failsafe on zero (Or epsilon) length planes
10268
    // If the length is infinity, set the elements to zero
10269
    vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
10270
    // Divide to perform the normalization
10271
    vResult = _mm_div_ps(V,vResult);
10272
    // Any that are infinity, set to zero
10273
    vResult = _mm_and_ps(vResult,vLengthSq);
10274
    return vResult;
10275
#else // _XM_VMX128_INTRINSICS_
10276
#endif // _XM_VMX128_INTRINSICS_
10277
}
10278
 
10279
//------------------------------------------------------------------------------
10280
 
10281
XMFINLINE XMVECTOR XMVector4ClampLength
10282
(
10283
    FXMVECTOR V, 
10284
    FLOAT    LengthMin, 
10285
    FLOAT    LengthMax
10286
)
10287
{
10288
#if defined(_XM_NO_INTRINSICS_)
10289
 
10290
    XMVECTOR ClampMax;
10291
    XMVECTOR ClampMin;
10292
 
10293
    ClampMax = XMVectorReplicate(LengthMax);
10294
    ClampMin = XMVectorReplicate(LengthMin);
10295
 
10296
    return XMVector4ClampLengthV(V, ClampMin, ClampMax);
10297
 
10298
#elif defined(_XM_SSE_INTRINSICS_)
10299
    XMVECTOR ClampMax = _mm_set_ps1(LengthMax);
10300
    XMVECTOR ClampMin = _mm_set_ps1(LengthMin);
10301
    return XMVector4ClampLengthV(V, ClampMin, ClampMax);
10302
#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
10303
#endif // _XM_VMX128_INTRINSICS_
10304
}
10305
 
10306
//------------------------------------------------------------------------------
10307
 
10308
XMFINLINE XMVECTOR XMVector4ClampLengthV
10309
(
10310
    FXMVECTOR V, 
10311
    FXMVECTOR LengthMin, 
10312
    FXMVECTOR LengthMax
10313
)
10314
{
10315
#if defined(_XM_NO_INTRINSICS_)
10316
 
10317
    XMVECTOR ClampLength;
10318
    XMVECTOR LengthSq;
10319
    XMVECTOR RcpLength;
10320
    XMVECTOR Length;
10321
    XMVECTOR Normal;
10322
    XMVECTOR Zero;
10323
    XMVECTOR InfiniteLength;
10324
    XMVECTOR ZeroLength;
10325
    XMVECTOR Select;
10326
    XMVECTOR ControlMax;
10327
    XMVECTOR ControlMin;
10328
    XMVECTOR Control;
10329
    XMVECTOR Result;
10330
 
10331
    XMASSERT((LengthMin.vector4_f32[1] == LengthMin.vector4_f32[0]) && (LengthMin.vector4_f32[2] == LengthMin.vector4_f32[0]) && (LengthMin.vector4_f32[3] == LengthMin.vector4_f32[0]));
10332
    XMASSERT((LengthMax.vector4_f32[1] == LengthMax.vector4_f32[0]) && (LengthMax.vector4_f32[2] == LengthMax.vector4_f32[0]) && (LengthMax.vector4_f32[3] == LengthMax.vector4_f32[0]));
10333
    XMASSERT(XMVector4GreaterOrEqual(LengthMin, XMVectorZero()));
10334
    XMASSERT(XMVector4GreaterOrEqual(LengthMax, XMVectorZero()));
10335
    XMASSERT(XMVector4GreaterOrEqual(LengthMax, LengthMin));
10336
 
10337
    LengthSq = XMVector4LengthSq(V);
10338
 
10339
    Zero = XMVectorZero();
10340
 
10341
    RcpLength = XMVectorReciprocalSqrt(LengthSq);
10342
 
10343
    InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity.v);
10344
    ZeroLength = XMVectorEqual(LengthSq, Zero);
10345
 
10346
    Normal = XMVectorMultiply(V, RcpLength);
10347
 
10348
    Length = XMVectorMultiply(LengthSq, RcpLength);
10349
 
10350
    Select = XMVectorEqualInt(InfiniteLength, ZeroLength);
10351
    Length = XMVectorSelect(LengthSq, Length, Select);
10352
    Normal = XMVectorSelect(LengthSq, Normal, Select);
10353
 
10354
    ControlMax = XMVectorGreater(Length, LengthMax);
10355
    ControlMin = XMVectorLess(Length, LengthMin);
10356
 
10357
    ClampLength = XMVectorSelect(Length, LengthMax, ControlMax);
10358
    ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin);
10359
 
10360
    Result = XMVectorMultiply(Normal, ClampLength);
10361
 
10362
    // Preserve the original vector (with no precision loss) if the length falls within the given range
10363
    Control = XMVectorEqualInt(ControlMax, ControlMin);
10364
    Result = XMVectorSelect(Result, V, Control);
10365
 
10366
    return Result;
10367
 
10368
#elif defined(_XM_SSE_INTRINSICS_)
10369
    XMVECTOR ClampLength;
10370
    XMVECTOR LengthSq;
10371
    XMVECTOR RcpLength;
10372
    XMVECTOR Length;
10373
    XMVECTOR Normal;
10374
    XMVECTOR Zero;
10375
    XMVECTOR InfiniteLength;
10376
    XMVECTOR ZeroLength;
10377
    XMVECTOR Select;
10378
    XMVECTOR ControlMax;
10379
    XMVECTOR ControlMin;
10380
    XMVECTOR Control;
10381
    XMVECTOR Result;
10382
 
10383
    XMASSERT((XMVectorGetY(LengthMin) == XMVectorGetX(LengthMin)) && (XMVectorGetZ(LengthMin) == XMVectorGetX(LengthMin)) && (XMVectorGetW(LengthMin) == XMVectorGetX(LengthMin)));
10384
    XMASSERT((XMVectorGetY(LengthMax) == XMVectorGetX(LengthMax)) && (XMVectorGetZ(LengthMax) == XMVectorGetX(LengthMax)) && (XMVectorGetW(LengthMax) == XMVectorGetX(LengthMax)));
10385
    XMASSERT(XMVector4GreaterOrEqual(LengthMin, g_XMZero));
10386
    XMASSERT(XMVector4GreaterOrEqual(LengthMax, g_XMZero));
10387
    XMASSERT(XMVector4GreaterOrEqual(LengthMax, LengthMin));
10388
 
10389
    LengthSq = XMVector4LengthSq(V);
10390
    Zero = XMVectorZero();
10391
    RcpLength = XMVectorReciprocalSqrt(LengthSq);
10392
    InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity);
10393
    ZeroLength = XMVectorEqual(LengthSq, Zero);
10394
    Normal = _mm_mul_ps(V, RcpLength);
10395
    Length = _mm_mul_ps(LengthSq, RcpLength);
10396
    Select = XMVectorEqualInt(InfiniteLength, ZeroLength);
10397
    Length = XMVectorSelect(LengthSq, Length, Select);
10398
    Normal = XMVectorSelect(LengthSq, Normal, Select);
10399
    ControlMax = XMVectorGreater(Length, LengthMax);
10400
    ControlMin = XMVectorLess(Length, LengthMin);
10401
    ClampLength = XMVectorSelect(Length, LengthMax, ControlMax);
10402
    ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin);
10403
    Result = _mm_mul_ps(Normal, ClampLength);
10404
    // Preserve the original vector (with no precision loss) if the length falls within the given range
10405
    Control = XMVectorEqualInt(ControlMax,ControlMin);
10406
    Result = XMVectorSelect(Result,V,Control);
10407
    return Result;
10408
 
10409
#else // _XM_VMX128_INTRINSICS_
10410
#endif // _XM_VMX128_INTRINSICS_
10411
}
10412
 
10413
//------------------------------------------------------------------------------
10414
 
10415
XMFINLINE XMVECTOR XMVector4Reflect
10416
(
10417
    FXMVECTOR Incident, 
10418
    FXMVECTOR Normal
10419
)
10420
{
10421
#if defined(_XM_NO_INTRINSICS_) 
10422
 
10423
    XMVECTOR Result;
10424
 
10425
    // Result = Incident - (2 * dot(Incident, Normal)) * Normal
10426
    Result = XMVector4Dot(Incident, Normal);
10427
    Result = XMVectorAdd(Result, Result);
10428
    Result = XMVectorNegativeMultiplySubtract(Result, Normal, Incident);
10429
 
10430
    return Result;
10431
 
10432
#elif defined(_XM_SSE_INTRINSICS_)
10433
    // Result = Incident - (2 * dot(Incident, Normal)) * Normal
10434
    XMVECTOR Result = XMVector4Dot(Incident,Normal);
10435
    Result = _mm_add_ps(Result,Result);
10436
    Result = _mm_mul_ps(Result,Normal);
10437
    Result = _mm_sub_ps(Incident,Result);
10438
    return Result;
10439
#else // _XM_VMX128_INTRINSICS_
10440
#endif // _XM_VMX128_INTRINSICS_
10441
}
10442
 
10443
//------------------------------------------------------------------------------
10444
 
10445
XMFINLINE XMVECTOR XMVector4Refract
10446
(
10447
    FXMVECTOR Incident, 
10448
    FXMVECTOR Normal, 
10449
    FLOAT    RefractionIndex
10450
)
10451
{
10452
#if defined(_XM_NO_INTRINSICS_)
10453
 
10454
    XMVECTOR Index;
10455
    Index = XMVectorReplicate(RefractionIndex);
10456
    return XMVector4RefractV(Incident, Normal, Index);
10457
 
10458
#elif defined(_XM_SSE_INTRINSICS_)
10459
    XMVECTOR Index = _mm_set_ps1(RefractionIndex);
10460
    return XMVector4RefractV(Incident,Normal,Index);
10461
#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
10462
#endif // _XM_VMX128_INTRINSICS_
10463
}
10464
 
10465
//------------------------------------------------------------------------------
10466
 
10467
XMFINLINE XMVECTOR XMVector4RefractV
10468
(
10469
    FXMVECTOR Incident, 
10470
    FXMVECTOR Normal, 
10471
    FXMVECTOR RefractionIndex
10472
)
10473
{
10474
#if defined(_XM_NO_INTRINSICS_)
10475
 
10476
    XMVECTOR        IDotN;
10477
    XMVECTOR        R;
10478
    CONST XMVECTOR  Zero = XMVectorZero();
10479
 
10480
    // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) + 
10481
    // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal))))
10482
 
10483
    IDotN = XMVector4Dot(Incident, Normal);
10484
 
10485
    // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN)
10486
    R = XMVectorNegativeMultiplySubtract(IDotN, IDotN, g_XMOne.v);
10487
    R = XMVectorMultiply(R, RefractionIndex);
10488
    R = XMVectorNegativeMultiplySubtract(R, RefractionIndex, g_XMOne.v);
10489
 
10490
    if (XMVector4LessOrEqual(R, Zero))
10491
    {
10492
        // Total internal reflection
10493
        return Zero;
10494
    }
10495
    else
10496
    {
10497
        XMVECTOR Result;
10498
 
10499
        // R = RefractionIndex * IDotN + sqrt(R)
10500
        R = XMVectorSqrt(R);
10501
        R = XMVectorMultiplyAdd(RefractionIndex, IDotN, R);
10502
 
10503
        // Result = RefractionIndex * Incident - Normal * R
10504
        Result = XMVectorMultiply(RefractionIndex, Incident);
10505
        Result = XMVectorNegativeMultiplySubtract(Normal, R, Result);
10506
 
10507
        return Result;
10508
    }
10509
 
10510
#elif defined(_XM_SSE_INTRINSICS_)
10511
    // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) + 
10512
    // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal))))
10513
 
10514
    XMVECTOR IDotN = XMVector4Dot(Incident,Normal);
10515
 
10516
    // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN)
10517
    XMVECTOR R = _mm_mul_ps(IDotN,IDotN);
10518
    R = _mm_sub_ps(g_XMOne,R);
10519
    R = _mm_mul_ps(R, RefractionIndex);
10520
    R = _mm_mul_ps(R, RefractionIndex);
10521
    R = _mm_sub_ps(g_XMOne,R);
10522
 
10523
    XMVECTOR vResult = _mm_cmple_ps(R,g_XMZero);
10524
    if (_mm_movemask_ps(vResult)==0x0f)
10525
    {
10526
        // Total internal reflection
10527
        vResult = g_XMZero;
10528
    }
10529
    else
10530
    {
10531
        // R = RefractionIndex * IDotN + sqrt(R)
10532
        R = _mm_sqrt_ps(R);
10533
        vResult = _mm_mul_ps(RefractionIndex, IDotN);
10534
        R = _mm_add_ps(R,vResult);
10535
        // Result = RefractionIndex * Incident - Normal * R
10536
        vResult = _mm_mul_ps(RefractionIndex, Incident);
10537
        R = _mm_mul_ps(R,Normal);
10538
        vResult = _mm_sub_ps(vResult,R);
10539
    }
10540
    return vResult;
10541
#else // _XM_VMX128_INTRINSICS_
10542
#endif // _XM_VMX128_INTRINSICS_
10543
}
10544
 
10545
//------------------------------------------------------------------------------
10546
 
10547
XMFINLINE XMVECTOR XMVector4Orthogonal
10548
(
10549
    FXMVECTOR V
10550
)
10551
{
10552
#if defined(_XM_NO_INTRINSICS_)
10553
 
10554
    XMVECTOR Result;
10555
    Result.vector4_f32[0] = V.vector4_f32[2];
10556
    Result.vector4_f32[1] = V.vector4_f32[3];
10557
    Result.vector4_f32[2] = -V.vector4_f32[0];
10558
    Result.vector4_f32[3] = -V.vector4_f32[1];
10559
    return Result;
10560
 
10561
#elif defined(_XM_SSE_INTRINSICS_)
10562
    static const XMVECTORF32 FlipZW = {1.0f,1.0f,-1.0f,-1.0f};
10563
    XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,0,3,2));
10564
    vResult = _mm_mul_ps(vResult,FlipZW);
10565
    return vResult;
10566
#else // _XM_VMX128_INTRINSICS_
10567
#endif // _XM_VMX128_INTRINSICS_
10568
}
10569
 
10570
//------------------------------------------------------------------------------
10571
 
10572
XMFINLINE XMVECTOR XMVector4AngleBetweenNormalsEst
10573
(
10574
    FXMVECTOR N1, 
10575
    FXMVECTOR N2
10576
)
10577
{
10578
#if defined(_XM_NO_INTRINSICS_)
10579
 
10580
    XMVECTOR NegativeOne;
10581
    XMVECTOR One;
10582
    XMVECTOR Result;
10583
 
10584
    Result = XMVector4Dot(N1, N2);
10585
    NegativeOne = XMVectorSplatConstant(-1, 0);
10586
    One = XMVectorSplatOne();
10587
    Result = XMVectorClamp(Result, NegativeOne, One);
10588
    Result = XMVectorACosEst(Result);
10589
 
10590
    return Result;
10591
 
10592
#elif defined(_XM_SSE_INTRINSICS_)
10593
    XMVECTOR vResult = XMVector4Dot(N1,N2);
10594
    // Clamp to -1.0f to 1.0f
10595
    vResult = _mm_max_ps(vResult,g_XMNegativeOne);
10596
    vResult = _mm_min_ps(vResult,g_XMOne);;
10597
    vResult = XMVectorACosEst(vResult);
10598
    return vResult;
10599
#else // _XM_VMX128_INTRINSICS_
10600
#endif // _XM_VMX128_INTRINSICS_
10601
}
10602
 
10603
//------------------------------------------------------------------------------
10604
 
10605
XMFINLINE XMVECTOR XMVector4AngleBetweenNormals
10606
(
10607
    FXMVECTOR N1, 
10608
    FXMVECTOR N2
10609
)
10610
{
10611
#if defined(_XM_NO_INTRINSICS_)
10612
 
10613
    XMVECTOR NegativeOne;
10614
    XMVECTOR One;
10615
    XMVECTOR Result;
10616
 
10617
    Result = XMVector4Dot(N1, N2);
10618
    NegativeOne = XMVectorSplatConstant(-1, 0);
10619
    One = XMVectorSplatOne();
10620
    Result = XMVectorClamp(Result, NegativeOne, One);
10621
    Result = XMVectorACos(Result);
10622
 
10623
    return Result;
10624
 
10625
#elif defined(_XM_SSE_INTRINSICS_)
10626
    XMVECTOR vResult = XMVector4Dot(N1,N2);
10627
    // Clamp to -1.0f to 1.0f
10628
    vResult = _mm_max_ps(vResult,g_XMNegativeOne);
10629
    vResult = _mm_min_ps(vResult,g_XMOne);;
10630
    vResult = XMVectorACos(vResult);
10631
    return vResult;
10632
#else // _XM_VMX128_INTRINSICS_
10633
#endif // _XM_VMX128_INTRINSICS_
10634
}
10635
 
10636
//------------------------------------------------------------------------------
10637
 
10638
XMFINLINE XMVECTOR XMVector4AngleBetweenVectors
10639
(
10640
    FXMVECTOR V1, 
10641
    FXMVECTOR V2
10642
)
10643
{
10644
#if defined(_XM_NO_INTRINSICS_)
10645
 
10646
    XMVECTOR L1;
10647
    XMVECTOR L2;
10648
    XMVECTOR Dot;
10649
    XMVECTOR CosAngle;
10650
    XMVECTOR NegativeOne;
10651
    XMVECTOR One;
10652
    XMVECTOR Result;
10653
 
10654
    L1 = XMVector4ReciprocalLength(V1);
10655
    L2 = XMVector4ReciprocalLength(V2);
10656
 
10657
    Dot = XMVector4Dot(V1, V2);
10658
 
10659
    L1 = XMVectorMultiply(L1, L2);
10660
 
10661
    CosAngle = XMVectorMultiply(Dot, L1);
10662
    NegativeOne = XMVectorSplatConstant(-1, 0);
10663
    One = XMVectorSplatOne();
10664
    CosAngle = XMVectorClamp(CosAngle, NegativeOne, One);
10665
 
10666
    Result = XMVectorACos(CosAngle);
10667
 
10668
    return Result;
10669
 
10670
#elif defined(_XM_SSE_INTRINSICS_)
10671
    XMVECTOR L1;
10672
    XMVECTOR L2;
10673
    XMVECTOR Dot;
10674
    XMVECTOR CosAngle;
10675
    XMVECTOR Result;
10676
 
10677
    L1 = XMVector4ReciprocalLength(V1);
10678
    L2 = XMVector4ReciprocalLength(V2);
10679
    Dot = XMVector4Dot(V1, V2);
10680
    L1 = _mm_mul_ps(L1,L2);
10681
    CosAngle = _mm_mul_ps(Dot,L1);
10682
    CosAngle = XMVectorClamp(CosAngle, g_XMNegativeOne, g_XMOne);
10683
    Result = XMVectorACos(CosAngle);
10684
    return Result;
10685
 
10686
#else // _XM_VMX128_INTRINSICS_
10687
#endif // _XM_VMX128_INTRINSICS_
10688
}
10689
 
10690
//------------------------------------------------------------------------------
10691
 
10692
XMFINLINE XMVECTOR XMVector4Transform
10693
(
10694
    FXMVECTOR V, 
10695
    CXMMATRIX M
10696
)
10697
{
10698
#if defined(_XM_NO_INTRINSICS_)
10699
    FLOAT fX = (M.m[0][0]*V.vector4_f32[0])+(M.m[1][0]*V.vector4_f32[1])+(M.m[2][0]*V.vector4_f32[2])+(M.m[3][0]*V.vector4_f32[3]);
10700
    FLOAT fY = (M.m[0][1]*V.vector4_f32[0])+(M.m[1][1]*V.vector4_f32[1])+(M.m[2][1]*V.vector4_f32[2])+(M.m[3][1]*V.vector4_f32[3]);
10701
    FLOAT fZ = (M.m[0][2]*V.vector4_f32[0])+(M.m[1][2]*V.vector4_f32[1])+(M.m[2][2]*V.vector4_f32[2])+(M.m[3][2]*V.vector4_f32[3]);
10702
    FLOAT fW = (M.m[0][3]*V.vector4_f32[0])+(M.m[1][3]*V.vector4_f32[1])+(M.m[2][3]*V.vector4_f32[2])+(M.m[3][3]*V.vector4_f32[3]);
10703
    XMVECTOR vResult = {
10704
        fX,
10705
        fY,
10706
        fZ,
10707
        fW
10708
    };
10709
    return vResult;
10710
 
10711
#elif defined(_XM_SSE_INTRINSICS_)
10712
    // Splat x,y,z and w
10713
    XMVECTOR vTempX = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0));
10714
    XMVECTOR vTempY = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
10715
    XMVECTOR vTempZ = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2));
10716
    XMVECTOR vTempW = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,3,3,3));
10717
    // Mul by the matrix
10718
    vTempX = _mm_mul_ps(vTempX,M.r[0]);
10719
    vTempY = _mm_mul_ps(vTempY,M.r[1]);
10720
    vTempZ = _mm_mul_ps(vTempZ,M.r[2]);
10721
    vTempW = _mm_mul_ps(vTempW,M.r[3]);
10722
    // Add them all together
10723
    vTempX = _mm_add_ps(vTempX,vTempY);
10724
    vTempZ = _mm_add_ps(vTempZ,vTempW);
10725
    vTempX = _mm_add_ps(vTempX,vTempZ);
10726
    return vTempX;
10727
#else // _XM_VMX128_INTRINSICS_
10728
#endif // _XM_VMX128_INTRINSICS_
10729
}
10730
 
10731
//------------------------------------------------------------------------------
10732
 
10733
XMINLINE XMFLOAT4* XMVector4TransformStream
10734
(
10735
    XMFLOAT4*       pOutputStream, 
10736
    UINT            OutputStride, 
10737
    CONST XMFLOAT4* pInputStream, 
10738
    UINT            InputStride, 
10739
    UINT            VectorCount, 
10740
    CXMMATRIX     M
10741
)
10742
{
10743
#if defined(_XM_NO_INTRINSICS_)
10744
 
10745
    XMVECTOR V;
10746
    XMVECTOR X;
10747
    XMVECTOR Y;
10748
    XMVECTOR Z;
10749
    XMVECTOR W;
10750
    XMVECTOR Result;
10751
    UINT     i;
10752
    BYTE*    pInputVector = (BYTE*)pInputStream;
10753
    BYTE*    pOutputVector = (BYTE*)pOutputStream;
10754
 
10755
    XMASSERT(pOutputStream);
10756
    XMASSERT(pInputStream);
10757
 
10758
    for (i = 0; i < VectorCount; i++)
10759
    {
10760
        V = XMLoadFloat4((XMFLOAT4*)pInputVector);
10761
        W = XMVectorSplatW(V);
10762
        Z = XMVectorSplatZ(V);
10763
        Y = XMVectorSplatY(V);
10764
        X = XMVectorSplatX(V);
10765
//        W = XMVectorReplicate(((XMFLOAT4*)pInputVector)->w);
10766
//        Z = XMVectorReplicate(((XMFLOAT4*)pInputVector)->z);
10767
//        Y = XMVectorReplicate(((XMFLOAT4*)pInputVector)->y);
10768
//        X = XMVectorReplicate(((XMFLOAT4*)pInputVector)->x);
10769
 
10770
        Result = XMVectorMultiply(W, M.r[3]);
10771
        Result = XMVectorMultiplyAdd(Z, M.r[2], Result);
10772
        Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
10773
        Result = XMVectorMultiplyAdd(X, M.r[0], Result);
10774
 
10775
        XMStoreFloat4((XMFLOAT4*)pOutputVector, Result);
10776
 
10777
        pInputVector += InputStride; 
10778
        pOutputVector += OutputStride;
10779
    }
10780
 
10781
    return pOutputStream;
10782
 
10783
#elif defined(_XM_SSE_INTRINSICS_)
10784
    UINT i;
10785
 
10786
    XMASSERT(pOutputStream);
10787
    XMASSERT(pInputStream);
10788
 
10789
    const BYTE*pInputVector = reinterpret_cast<const BYTE *>(pInputStream);
10790
    BYTE* pOutputVector = reinterpret_cast<BYTE *>(pOutputStream);
10791
    for (i = 0; i < VectorCount; i++)
10792
    {
10793
        // Fetch the row and splat it
10794
        XMVECTOR vTempx = _mm_loadu_ps(reinterpret_cast<const float *>(pInputVector));
10795
        XMVECTOR vTempy = _mm_shuffle_ps(vTempx,vTempx,_MM_SHUFFLE(1,1,1,1));
10796
        XMVECTOR vTempz = _mm_shuffle_ps(vTempx,vTempx,_MM_SHUFFLE(2,2,2,2));
10797
        XMVECTOR vTempw = _mm_shuffle_ps(vTempx,vTempx,_MM_SHUFFLE(3,3,3,3));
10798
        vTempx = _mm_shuffle_ps(vTempx,vTempx,_MM_SHUFFLE(0,0,0,0));
10799
        vTempx = _mm_mul_ps(vTempx,M.r[0]);
10800
        vTempy = _mm_mul_ps(vTempy,M.r[1]);
10801
        vTempz = _mm_mul_ps(vTempz,M.r[2]);
10802
        vTempw = _mm_mul_ps(vTempw,M.r[3]);
10803
        vTempx = _mm_add_ps(vTempx,vTempy);
10804
        vTempw = _mm_add_ps(vTempw,vTempz); 
10805
        vTempw = _mm_add_ps(vTempw,vTempx);
10806
        // Store the transformed vector
10807
        _mm_storeu_ps(reinterpret_cast<float *>(pOutputVector),vTempw);
10808
 
10809
        pInputVector += InputStride; 
10810
        pOutputVector += OutputStride;
10811
    }
10812
    return pOutputStream;
10813
#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
10814
#endif // _XM_VMX128_INTRINSICS_
10815
}
10816
 
10817
#ifdef __cplusplus
10818
 
10819
/****************************************************************************
10820
 *
10821
 * XMVECTOR operators
10822
 *
10823
 ****************************************************************************/
10824
 
10825
#ifndef XM_NO_OPERATOR_OVERLOADS
10826
 
10827
//------------------------------------------------------------------------------
10828
 
10829
XMFINLINE XMVECTOR operator+ (FXMVECTOR V)
10830
{
10831
    return V;
10832
}
10833
 
10834
//------------------------------------------------------------------------------
10835
 
10836
XMFINLINE XMVECTOR operator- (FXMVECTOR V)
10837
{
10838
    return XMVectorNegate(V);
10839
}
10840
 
10841
//------------------------------------------------------------------------------
10842
 
10843
XMFINLINE XMVECTOR& operator+=
10844
(
10845
    XMVECTOR&       V1,
10846
    FXMVECTOR       V2
10847
)
10848
{
10849
    V1 = XMVectorAdd(V1, V2);
10850
    return V1;
10851
}
10852
 
10853
//------------------------------------------------------------------------------
10854
 
10855
XMFINLINE XMVECTOR& operator-=
10856
(
10857
    XMVECTOR&       V1,
10858
    FXMVECTOR       V2
10859
)
10860
{
10861
    V1 = XMVectorSubtract(V1, V2);
10862
    return V1;
10863
}
10864
 
10865
//------------------------------------------------------------------------------
10866
 
10867
XMFINLINE XMVECTOR& operator*=
10868
(
10869
    XMVECTOR&       V1,
10870
    FXMVECTOR       V2
10871
)
10872
{
10873
    V1 = XMVectorMultiply(V1, V2);
10874
    return V1;
10875
}
10876
 
10877
//------------------------------------------------------------------------------
10878
 
10879
XMFINLINE XMVECTOR& operator/=
10880
(
10881
    XMVECTOR&       V1,
10882
    FXMVECTOR       V2
10883
)
10884
{
10885
    XMVECTOR InvV = XMVectorReciprocal(V2);
10886
    V1 = XMVectorMultiply(V1, InvV);
10887
    return V1;
10888
}
10889
 
10890
//------------------------------------------------------------------------------
10891
 
10892
XMFINLINE XMVECTOR& operator*=
10893
(
10894
    XMVECTOR&   V,
10895
    CONST FLOAT S
10896
)
10897
{
10898
    V = XMVectorScale(V, S);
10899
    return V;
10900
}
10901
 
10902
//------------------------------------------------------------------------------
10903
 
10904
XMFINLINE XMVECTOR& operator/=
10905
(
10906
    XMVECTOR&   V,
10907
    CONST FLOAT S
10908
)
10909
{
10910
    V = XMVectorScale(V, 1.0f / S);
10911
    return V;
10912
}
10913
 
10914
//------------------------------------------------------------------------------
10915
 
10916
XMFINLINE XMVECTOR operator+
10917
(
10918
    FXMVECTOR V1,
10919
    FXMVECTOR V2
10920
)
10921
{
10922
    return XMVectorAdd(V1, V2);
10923
}
10924
 
10925
//------------------------------------------------------------------------------
10926
 
10927
XMFINLINE XMVECTOR operator-
10928
(
10929
    FXMVECTOR V1,
10930
    FXMVECTOR V2
10931
)
10932
{
10933
    return XMVectorSubtract(V1, V2);
10934
}
10935
 
10936
//------------------------------------------------------------------------------
10937
 
10938
XMFINLINE XMVECTOR operator*
10939
(
10940
    FXMVECTOR V1,
10941
    FXMVECTOR V2
10942
)
10943
{
10944
    return XMVectorMultiply(V1, V2);
10945
}
10946
 
10947
//------------------------------------------------------------------------------
10948
 
10949
XMFINLINE XMVECTOR operator/
10950
(
10951
    FXMVECTOR V1,
10952
    FXMVECTOR V2
10953
)
10954
{
10955
    XMVECTOR InvV = XMVectorReciprocal(V2);
10956
    return XMVectorMultiply(V1, InvV);
10957
}
10958
 
10959
//------------------------------------------------------------------------------
10960
 
10961
XMFINLINE XMVECTOR operator*
10962
(
10963
    FXMVECTOR      V,
10964
    CONST FLOAT    S
10965
)
10966
{
10967
    return XMVectorScale(V, S);
10968
}
10969
 
10970
//------------------------------------------------------------------------------
10971
 
10972
XMFINLINE XMVECTOR operator/
10973
(
10974
    FXMVECTOR      V,
10975
    CONST FLOAT    S
10976
)
10977
{
10978
    return XMVectorScale(V, 1.0f / S);
10979
}
10980
 
10981
//------------------------------------------------------------------------------
10982
 
10983
XMFINLINE XMVECTOR operator*
10984
(
10985
    FLOAT           S,
10986
    FXMVECTOR  	    V
10987
)
10988
{
10989
    return XMVectorScale(V, S);
10990
}
10991
 
10992
#endif // !XM_NO_OPERATOR_OVERLOADS
10993
 
10994
/****************************************************************************
10995
 *
10996
 * XMFLOAT2 operators
10997
 *
10998
 ****************************************************************************/
10999
 
11000
//------------------------------------------------------------------------------
11001
 
11002
XMFINLINE _XMFLOAT2::_XMFLOAT2
11003
(
11004
    CONST FLOAT* pArray
11005
)
11006
{
11007
    x = pArray[0];
11008
    y = pArray[1];
11009
}
11010
 
11011
//------------------------------------------------------------------------------
11012
 
11013
XMFINLINE _XMFLOAT2& _XMFLOAT2::operator=
11014
(
11015
    CONST _XMFLOAT2& Float2
11016
)
11017
{
11018
    x = Float2.x;
11019
    y = Float2.y;
11020
    return *this;
11021
}
11022
 
11023
/****************************************************************************
11024
 *
11025
 * XMHALF2 operators
11026
 *
11027
 ****************************************************************************/
11028
 
11029
//------------------------------------------------------------------------------
11030
 
11031
XMFINLINE _XMHALF2::_XMHALF2
11032
(
11033
    CONST HALF* pArray
11034
)
11035
{
11036
    x = pArray[0];
11037
    y = pArray[1];
11038
}
11039
 
11040
//------------------------------------------------------------------------------
11041
 
11042
XMFINLINE _XMHALF2::_XMHALF2
11043
(
11044
    FLOAT _x,
11045
    FLOAT _y
11046
)
11047
{
11048
    x = XMConvertFloatToHalf(_x);
11049
    y = XMConvertFloatToHalf(_y);
11050
}
11051
 
11052
//------------------------------------------------------------------------------
11053
 
11054
XMFINLINE _XMHALF2::_XMHALF2
11055
(
11056
    CONST FLOAT* pArray
11057
)
11058
{
11059
    x = XMConvertFloatToHalf(pArray[0]);
11060
    y = XMConvertFloatToHalf(pArray[1]);
11061
}
11062
 
11063
//------------------------------------------------------------------------------
11064
 
11065
XMFINLINE _XMHALF2& _XMHALF2::operator=
11066
(
11067
    CONST _XMHALF2& Half2
11068
)
11069
{
11070
    x = Half2.x;
11071
    y = Half2.y;
11072
    return *this;
11073
}
11074
 
11075
/****************************************************************************
11076
 *
11077
 * XMSHORTN2 operators
11078
 *
11079
 ****************************************************************************/
11080
 
11081
//------------------------------------------------------------------------------
11082
 
11083
XMFINLINE _XMSHORTN2::_XMSHORTN2
11084
(
11085
    CONST SHORT* pArray
11086
)
11087
{
11088
    x = pArray[0];
11089
    y = pArray[1];
11090
}
11091
 
11092
//------------------------------------------------------------------------------
11093
 
11094
XMFINLINE _XMSHORTN2::_XMSHORTN2
11095
(
11096
    FLOAT _x,
11097
    FLOAT _y
11098
)
11099
{
11100
    XMStoreShortN2(this, XMVectorSet(_x, _y, 0.0f, 0.0f));
11101
}
11102
 
11103
//------------------------------------------------------------------------------
11104
 
11105
XMFINLINE _XMSHORTN2::_XMSHORTN2
11106
(
11107
    CONST FLOAT* pArray
11108
)
11109
{
11110
    XMStoreShortN2(this, XMLoadFloat2((XMFLOAT2*)pArray));
11111
}
11112
 
11113
//------------------------------------------------------------------------------
11114
 
11115
XMFINLINE _XMSHORTN2& _XMSHORTN2::operator=
11116
(
11117
    CONST _XMSHORTN2& ShortN2
11118
)
11119
{
11120
    x = ShortN2.x;
11121
    y = ShortN2.y;
11122
    return *this;
11123
}
11124
 
11125
/****************************************************************************
11126
 *
11127
 * XMSHORT2 operators
11128
 *
11129
 ****************************************************************************/
11130
 
11131
//------------------------------------------------------------------------------
11132
 
11133
XMFINLINE _XMSHORT2::_XMSHORT2
11134
(
11135
    CONST SHORT* pArray
11136
)
11137
{
11138
    x = pArray[0];
11139
    y = pArray[1];
11140
}
11141
 
11142
//------------------------------------------------------------------------------
11143
 
11144
XMFINLINE _XMSHORT2::_XMSHORT2
11145
(
11146
    FLOAT _x,
11147
    FLOAT _y
11148
)
11149
{
11150
    XMStoreShort2(this, XMVectorSet(_x, _y, 0.0f, 0.0f));
11151
}
11152
 
11153
//------------------------------------------------------------------------------
11154
 
11155
XMFINLINE _XMSHORT2::_XMSHORT2
11156
(
11157
    CONST FLOAT* pArray
11158
)
11159
{
11160
    XMStoreShort2(this, XMLoadFloat2((XMFLOAT2*)pArray));
11161
}
11162
 
11163
//------------------------------------------------------------------------------
11164
 
11165
XMFINLINE _XMSHORT2& _XMSHORT2::operator=
11166
(
11167
    CONST _XMSHORT2& Short2
11168
)
11169
{
11170
    x = Short2.x;
11171
    y = Short2.y;
11172
    return *this;
11173
}
11174
 
11175
/****************************************************************************
11176
 *
11177
 * XMUSHORTN2 operators
11178
 *
11179
 ****************************************************************************/
11180
 
11181
//------------------------------------------------------------------------------
11182
 
11183
XMFINLINE _XMUSHORTN2::_XMUSHORTN2
11184
(
11185
    CONST USHORT* pArray
11186
)
11187
{
11188
    x = pArray[0];
11189
    y = pArray[1];
11190
}
11191
 
11192
//------------------------------------------------------------------------------
11193
 
11194
XMFINLINE _XMUSHORTN2::_XMUSHORTN2
11195
(
11196
    FLOAT _x,
11197
    FLOAT _y
11198
)
11199
{
11200
    XMStoreUShortN2(this, XMVectorSet(_x, _y, 0.0f, 0.0f));
11201
}
11202
 
11203
//------------------------------------------------------------------------------
11204
 
11205
XMFINLINE _XMUSHORTN2::_XMUSHORTN2
11206
(
11207
    CONST FLOAT* pArray
11208
)
11209
{
11210
    XMStoreUShortN2(this, XMLoadFloat2((XMFLOAT2*)pArray));
11211
}
11212
 
11213
//------------------------------------------------------------------------------
11214
 
11215
XMFINLINE _XMUSHORTN2& _XMUSHORTN2::operator=
11216
(
11217
    CONST _XMUSHORTN2& UShortN2
11218
)
11219
{
11220
    x = UShortN2.x;
11221
    y = UShortN2.y;
11222
    return *this;
11223
}
11224
 
11225
/****************************************************************************
11226
 *
11227
 * XMUSHORT2 operators
11228
 *
11229
 ****************************************************************************/
11230
 
11231
//------------------------------------------------------------------------------
11232
 
11233
XMFINLINE _XMUSHORT2::_XMUSHORT2
11234
(
11235
    CONST USHORT* pArray
11236
)
11237
{
11238
    x = pArray[0];
11239
    y = pArray[1];
11240
}
11241
 
11242
//------------------------------------------------------------------------------
11243
 
11244
XMFINLINE _XMUSHORT2::_XMUSHORT2
11245
(
11246
    FLOAT _x,
11247
    FLOAT _y
11248
)
11249
{
11250
    XMStoreUShort2(this, XMVectorSet(_x, _y, 0.0f, 0.0f));
11251
}
11252
 
11253
//------------------------------------------------------------------------------
11254
 
11255
XMFINLINE _XMUSHORT2::_XMUSHORT2
11256
(
11257
    CONST FLOAT* pArray
11258
)
11259
{
11260
    XMStoreUShort2(this, XMLoadFloat2((XMFLOAT2*)pArray));
11261
}
11262
 
11263
//------------------------------------------------------------------------------
11264
 
11265
XMFINLINE _XMUSHORT2& _XMUSHORT2::operator=
11266
(
11267
    CONST _XMUSHORT2& UShort2
11268
)
11269
{
11270
    x = UShort2.x;
11271
    y = UShort2.y;
11272
    return *this;
11273
}
11274
 
11275
/****************************************************************************
11276
 *
11277
 * XMFLOAT3 operators
11278
 *
11279
 ****************************************************************************/
11280
 
11281
//------------------------------------------------------------------------------
11282
 
11283
XMFINLINE _XMFLOAT3::_XMFLOAT3
11284
(
11285
    CONST FLOAT* pArray
11286
)
11287
{
11288
    x = pArray[0];
11289
    y = pArray[1];
11290
    z = pArray[2];
11291
}
11292
 
11293
//------------------------------------------------------------------------------
11294
 
11295
XMFINLINE _XMFLOAT3& _XMFLOAT3::operator=
11296
(
11297
    CONST _XMFLOAT3& Float3
11298
)
11299
{
11300
    x = Float3.x;
11301
    y = Float3.y;
11302
    z = Float3.z;
11303
    return *this;
11304
}
11305
 
11306
/****************************************************************************
11307
 *
11308
 * XMHENDN3 operators
11309
 *
11310
 ****************************************************************************/
11311
 
11312
//------------------------------------------------------------------------------
11313
 
11314
XMFINLINE _XMHENDN3::_XMHENDN3
11315
(
11316
    FLOAT _x,
11317
    FLOAT _y,
11318
    FLOAT _z
11319
)
11320
{
11321
    XMStoreHenDN3(this, XMVectorSet(_x, _y, _z, 0.0f));
11322
}
11323
 
11324
//------------------------------------------------------------------------------
11325
 
11326
XMFINLINE _XMHENDN3::_XMHENDN3
11327
(
11328
    CONST FLOAT* pArray
11329
)
11330
{
11331
    XMStoreHenDN3(this, XMLoadFloat3((XMFLOAT3*)pArray));
11332
}
11333
 
11334
//------------------------------------------------------------------------------
11335
 
11336
XMFINLINE _XMHENDN3& _XMHENDN3::operator=
11337
(
11338
    CONST _XMHENDN3& HenDN3
11339
)
11340
{
11341
    v = HenDN3.v;
11342
    return *this;
11343
}
11344
 
11345
//------------------------------------------------------------------------------
11346
 
11347
XMFINLINE _XMHENDN3& _XMHENDN3::operator=
11348
(
11349
    CONST UINT Packed
11350
)
11351
{
11352
    v = Packed;
11353
    return *this;
11354
}
11355
 
11356
/****************************************************************************
11357
 *
11358
 * XMHEND3 operators
11359
 *
11360
 ****************************************************************************/
11361
 
11362
//------------------------------------------------------------------------------
11363
 
11364
XMFINLINE _XMHEND3::_XMHEND3
11365
(
11366
    FLOAT _x,
11367
    FLOAT _y,
11368
    FLOAT _z
11369
)
11370
{
11371
    XMStoreHenD3(this, XMVectorSet(_x, _y, _z, 0.0f));
11372
}
11373
 
11374
//------------------------------------------------------------------------------
11375
 
11376
XMFINLINE _XMHEND3::_XMHEND3
11377
(
11378
    CONST FLOAT* pArray
11379
)
11380
{
11381
    XMStoreHenD3(this, XMLoadFloat3((XMFLOAT3*)pArray));
11382
}
11383
 
11384
//------------------------------------------------------------------------------
11385
 
11386
XMFINLINE _XMHEND3& _XMHEND3::operator=
11387
(
11388
    CONST _XMHEND3& HenD3
11389
)
11390
{
11391
    v = HenD3.v;
11392
    return *this;
11393
}
11394
 
11395
//------------------------------------------------------------------------------
11396
 
11397
XMFINLINE _XMHEND3& _XMHEND3::operator=
11398
(
11399
    CONST UINT Packed
11400
)
11401
{
11402
    v = Packed;
11403
    return *this;
11404
}
11405
 
11406
/****************************************************************************
11407
 *
11408
 * XMUHENDN3 operators
11409
 *
11410
 ****************************************************************************/
11411
 
11412
//------------------------------------------------------------------------------
11413
 
11414
XMFINLINE _XMUHENDN3::_XMUHENDN3
11415
(
11416
    FLOAT _x,
11417
    FLOAT _y,
11418
    FLOAT _z
11419
)
11420
{
11421
    XMStoreUHenDN3(this, XMVectorSet(_x, _y, _z, 0.0f));
11422
}
11423
 
11424
//------------------------------------------------------------------------------
11425
 
11426
XMFINLINE _XMUHENDN3::_XMUHENDN3
11427
(
11428
    CONST FLOAT* pArray
11429
)
11430
{
11431
    XMStoreUHenDN3(this, XMLoadFloat3((XMFLOAT3*)pArray));
11432
}
11433
 
11434
//------------------------------------------------------------------------------
11435
 
11436
XMFINLINE _XMUHENDN3& _XMUHENDN3::operator=
11437
(
11438
    CONST _XMUHENDN3& UHenDN3
11439
)
11440
{
11441
    v = UHenDN3.v;
11442
    return *this;
11443
}
11444
 
11445
//------------------------------------------------------------------------------
11446
 
11447
XMFINLINE _XMUHENDN3& _XMUHENDN3::operator=
11448
(
11449
    CONST UINT Packed
11450
)
11451
{
11452
    v = Packed;
11453
    return *this;
11454
}
11455
 
11456
/****************************************************************************
11457
 *
11458
 * XMUHEND3 operators
11459
 *
11460
 ****************************************************************************/
11461
 
11462
//------------------------------------------------------------------------------
11463
 
11464
XMFINLINE _XMUHEND3::_XMUHEND3
11465
(
11466
    FLOAT _x,
11467
    FLOAT _y,
11468
    FLOAT _z
11469
)
11470
{
11471
    XMStoreUHenD3(this, XMVectorSet(_x, _y, _z, 0.0f));
11472
}
11473
 
11474
//------------------------------------------------------------------------------
11475
 
11476
XMFINLINE _XMUHEND3::_XMUHEND3
11477
(
11478
    CONST FLOAT* pArray
11479
)
11480
{
11481
    XMStoreUHenD3(this, XMLoadFloat3((XMFLOAT3*)pArray));
11482
}
11483
 
11484
//------------------------------------------------------------------------------
11485
 
11486
XMFINLINE _XMUHEND3& _XMUHEND3::operator=
11487
(
11488
    CONST _XMUHEND3& UHenD3
11489
)
11490
{
11491
    v = UHenD3.v;
11492
    return *this;
11493
}
11494
 
11495
//------------------------------------------------------------------------------
11496
 
11497
XMFINLINE _XMUHEND3& _XMUHEND3::operator=
11498
(
11499
    CONST UINT Packed
11500
)
11501
{
11502
    v = Packed;
11503
    return *this;
11504
}
11505
 
11506
/****************************************************************************
11507
 *
11508
 * XMDHENN3 operators
11509
 *
11510
 ****************************************************************************/
11511
 
11512
//------------------------------------------------------------------------------
11513
 
11514
XMFINLINE _XMDHENN3::_XMDHENN3
11515
(
11516
    FLOAT _x,
11517
    FLOAT _y,
11518
    FLOAT _z
11519
)
11520
{
11521
    XMStoreDHenN3(this, XMVectorSet(_x, _y, _z, 0.0f));
11522
}
11523
 
11524
//------------------------------------------------------------------------------
11525
 
11526
XMFINLINE _XMDHENN3::_XMDHENN3
11527
(
11528
    CONST FLOAT* pArray
11529
)
11530
{
11531
    XMStoreDHenN3(this, XMLoadFloat3((XMFLOAT3*)pArray));
11532
}
11533
 
11534
//------------------------------------------------------------------------------
11535
 
11536
XMFINLINE _XMDHENN3& _XMDHENN3::operator=
11537
(
11538
    CONST _XMDHENN3& DHenN3
11539
)
11540
{
11541
    v = DHenN3.v;
11542
    return *this;
11543
}
11544
 
11545
//------------------------------------------------------------------------------
11546
 
11547
XMFINLINE _XMDHENN3& _XMDHENN3::operator=
11548
(
11549
    CONST UINT Packed
11550
)
11551
{
11552
    v = Packed;
11553
    return *this;
11554
}
11555
 
11556
/****************************************************************************
11557
 *
11558
 * XMDHEN3 operators
11559
 *
11560
 ****************************************************************************/
11561
 
11562
//------------------------------------------------------------------------------
11563
 
11564
XMFINLINE _XMDHEN3::_XMDHEN3
11565
(
11566
    FLOAT _x,
11567
    FLOAT _y,
11568
    FLOAT _z
11569
)
11570
{
11571
    XMStoreDHen3(this, XMVectorSet(_x, _y, _z, 0.0f));
11572
}
11573
 
11574
//------------------------------------------------------------------------------
11575
 
11576
XMFINLINE _XMDHEN3::_XMDHEN3
11577
(
11578
    CONST FLOAT* pArray
11579
)
11580
{
11581
    XMStoreDHen3(this, XMLoadFloat3((XMFLOAT3*)pArray));
11582
}
11583
 
11584
//------------------------------------------------------------------------------
11585
 
11586
XMFINLINE _XMDHEN3& _XMDHEN3::operator=
11587
(
11588
    CONST _XMDHEN3& DHen3
11589
)
11590
{
11591
    v = DHen3.v;
11592
    return *this;
11593
}
11594
 
11595
//------------------------------------------------------------------------------
11596
 
11597
XMFINLINE _XMDHEN3& _XMDHEN3::operator=
11598
(
11599
    CONST UINT Packed
11600
)
11601
{
11602
    v = Packed;
11603
    return *this;
11604
}
11605
 
11606
/****************************************************************************
11607
 *
11608
 * XMUDHENN3 operators
11609
 *
11610
 ****************************************************************************/
11611
 
11612
//------------------------------------------------------------------------------
11613
 
11614
XMFINLINE _XMUDHENN3::_XMUDHENN3
11615
(
11616
    FLOAT _x,
11617
    FLOAT _y,
11618
    FLOAT _z
11619
)
11620
{
11621
    XMStoreUDHenN3(this, XMVectorSet(_x, _y, _z, 0.0f));
11622
}
11623
 
11624
//------------------------------------------------------------------------------
11625
 
11626
XMFINLINE _XMUDHENN3::_XMUDHENN3
11627
(
11628
    CONST FLOAT* pArray
11629
)
11630
{
11631
    XMStoreUDHenN3(this, XMLoadFloat3((XMFLOAT3*)pArray));
11632
}
11633
 
11634
//------------------------------------------------------------------------------
11635
 
11636
XMFINLINE _XMUDHENN3& _XMUDHENN3::operator=
11637
(
11638
    CONST _XMUDHENN3& UDHenN3
11639
)
11640
{
11641
    v = UDHenN3.v;
11642
    return *this;
11643
}
11644
 
11645
//------------------------------------------------------------------------------
11646
 
11647
XMFINLINE _XMUDHENN3& _XMUDHENN3::operator=
11648
(
11649
    CONST UINT Packed
11650
)
11651
{
11652
    v = Packed;
11653
    return *this;
11654
}
11655
 
11656
/****************************************************************************
11657
 *
11658
 * XMUDHEN3 operators
11659
 *
11660
 ****************************************************************************/
11661
 
11662
//------------------------------------------------------------------------------
11663
 
11664
XMFINLINE _XMUDHEN3::_XMUDHEN3
11665
(
11666
    FLOAT _x,
11667
    FLOAT _y,
11668
    FLOAT _z
11669
)
11670
{
11671
    XMStoreUDHen3(this, XMVectorSet(_x, _y, _z, 0.0f));
11672
}
11673
 
11674
//------------------------------------------------------------------------------
11675
 
11676
XMFINLINE _XMUDHEN3::_XMUDHEN3
11677
(
11678
    CONST FLOAT* pArray
11679
)
11680
{
11681
    XMStoreUDHen3(this, XMLoadFloat3((XMFLOAT3*)pArray));
11682
}
11683
 
11684
//------------------------------------------------------------------------------
11685
 
11686
XMFINLINE _XMUDHEN3& _XMUDHEN3::operator=
11687
(
11688
    CONST _XMUDHEN3& UDHen3
11689
)
11690
{
11691
    v = UDHen3.v;
11692
    return *this;
11693
}
11694
 
11695
//------------------------------------------------------------------------------
11696
 
11697
XMFINLINE _XMUDHEN3& _XMUDHEN3::operator=
11698
(
11699
    CONST UINT Packed
11700
)
11701
{
11702
    v = Packed;
11703
    return *this;
11704
}
11705
 
11706
/****************************************************************************
11707
 *
11708
 * XMU565 operators
11709
 *
11710
 ****************************************************************************/
11711
 
11712
XMFINLINE _XMU565::_XMU565
11713
(
11714
    CONST CHAR *pArray
11715
)
11716
{
11717
    x = pArray[0];
11718
    y = pArray[1];
11719
    z = pArray[2];
11720
}
11721
 
11722
XMFINLINE _XMU565::_XMU565
11723
(
11724
    FLOAT _x,
11725
    FLOAT _y,
11726
    FLOAT _z
11727
)
11728
{
11729
    XMStoreU565(this, XMVectorSet( _x, _y, _z, 0.0f ));
11730
}
11731
 
11732
XMFINLINE _XMU565::_XMU565
11733
(
11734
    CONST FLOAT *pArray
11735
)
11736
{
11737
    XMStoreU565(this, XMLoadFloat3((XMFLOAT3*)pArray ));
11738
}
11739
 
11740
XMFINLINE _XMU565& _XMU565::operator=
11741
(
11742
    CONST _XMU565& U565
11743
)
11744
{
11745
    v = U565.v;
11746
    return *this;
11747
}
11748
 
11749
XMFINLINE _XMU565& _XMU565::operator=
11750
(
11751
    CONST USHORT Packed
11752
)
11753
{
11754
    v = Packed;
11755
    return *this;
11756
}
11757
 
11758
/****************************************************************************
11759
 *
11760
 * XMFLOAT3PK operators
11761
 *
11762
 ****************************************************************************/
11763
 
11764
XMFINLINE _XMFLOAT3PK::_XMFLOAT3PK
11765
(
11766
    FLOAT _x,
11767
    FLOAT _y,
11768
    FLOAT _z
11769
)
11770
{
11771
    XMStoreFloat3PK(this, XMVectorSet( _x, _y, _z, 0.0f ));
11772
}
11773
 
11774
XMFINLINE _XMFLOAT3PK::_XMFLOAT3PK
11775
(
11776
    CONST FLOAT *pArray
11777
)
11778
{
11779
    XMStoreFloat3PK(this, XMLoadFloat3((XMFLOAT3*)pArray ));
11780
}
11781
 
11782
XMFINLINE _XMFLOAT3PK& _XMFLOAT3PK::operator=
11783
(
11784
    CONST _XMFLOAT3PK& float3pk
11785
)
11786
{
11787
    v = float3pk.v;
11788
    return *this;
11789
}
11790
 
11791
XMFINLINE _XMFLOAT3PK& _XMFLOAT3PK::operator=
11792
(
11793
    CONST UINT Packed
11794
)
11795
{
11796
    v = Packed;
11797
    return *this;
11798
}
11799
 
11800
/****************************************************************************
11801
 *
11802
 * XMFLOAT3SE operators
11803
 *
11804
 ****************************************************************************/
11805
 
11806
XMFINLINE _XMFLOAT3SE::_XMFLOAT3SE
11807
(
11808
    FLOAT _x,
11809
    FLOAT _y,
11810
    FLOAT _z
11811
)
11812
{
11813
    XMStoreFloat3SE(this, XMVectorSet( _x, _y, _z, 0.0f ));
11814
}
11815
 
11816
XMFINLINE _XMFLOAT3SE::_XMFLOAT3SE
11817
(
11818
    CONST FLOAT *pArray
11819
)
11820
{
11821
    XMStoreFloat3SE(this, XMLoadFloat3((XMFLOAT3*)pArray ));
11822
}
11823
 
11824
XMFINLINE _XMFLOAT3SE& _XMFLOAT3SE::operator=
11825
(
11826
    CONST _XMFLOAT3SE& float3se
11827
)
11828
{
11829
    v = float3se.v;
11830
    return *this;
11831
}
11832
 
11833
XMFINLINE _XMFLOAT3SE& _XMFLOAT3SE::operator=
11834
(
11835
    CONST UINT Packed
11836
)
11837
{
11838
    v = Packed;
11839
    return *this;
11840
}
11841
 
11842
/****************************************************************************
11843
 *
11844
 * XMFLOAT4 operators
11845
 *
11846
 ****************************************************************************/
11847
 
11848
//------------------------------------------------------------------------------
11849
 
11850
XMFINLINE _XMFLOAT4::_XMFLOAT4
11851
(
11852
    CONST FLOAT* pArray
11853
)
11854
{
11855
    x = pArray[0];
11856
    y = pArray[1];
11857
    z = pArray[2];
11858
    w = pArray[3];
11859
}
11860
 
11861
//------------------------------------------------------------------------------
11862
 
11863
XMFINLINE _XMFLOAT4& _XMFLOAT4::operator=
11864
(
11865
    CONST _XMFLOAT4& Float4
11866
)
11867
{
11868
    x = Float4.x;
11869
    y = Float4.y;
11870
    z = Float4.z;
11871
    w = Float4.w;
11872
    return *this;
11873
}
11874
 
11875
/****************************************************************************
11876
 *
11877
 * XMHALF4 operators
11878
 *
11879
 ****************************************************************************/
11880
 
11881
//------------------------------------------------------------------------------
11882
 
11883
XMFINLINE _XMHALF4::_XMHALF4
11884
(
11885
    CONST HALF* pArray
11886
)
11887
{
11888
    x = pArray[0];
11889
    y = pArray[1];
11890
    z = pArray[2];
11891
    w = pArray[3];
11892
}
11893
 
11894
//------------------------------------------------------------------------------
11895
 
11896
XMFINLINE _XMHALF4::_XMHALF4
11897
(
11898
    FLOAT _x,
11899
    FLOAT _y,
11900
    FLOAT _z,
11901
    FLOAT _w
11902
)
11903
{
11904
    x = XMConvertFloatToHalf(_x);
11905
    y = XMConvertFloatToHalf(_y);
11906
    z = XMConvertFloatToHalf(_z);
11907
    w = XMConvertFloatToHalf(_w);
11908
}
11909
 
11910
//------------------------------------------------------------------------------
11911
 
11912
XMFINLINE _XMHALF4::_XMHALF4
11913
(
11914
    CONST FLOAT* pArray
11915
)
11916
{
11917
    XMConvertFloatToHalfStream(&x, sizeof(HALF), pArray, sizeof(FLOAT), 4);
11918
}
11919
 
11920
//------------------------------------------------------------------------------
11921
 
11922
XMFINLINE _XMHALF4& _XMHALF4::operator=
11923
(
11924
    CONST _XMHALF4& Half4
11925
)
11926
{
11927
    x = Half4.x;
11928
    y = Half4.y;
11929
    z = Half4.z;
11930
    w = Half4.w;
11931
    return *this;
11932
}
11933
 
11934
/****************************************************************************
11935
 *
11936
 * XMSHORTN4 operators
11937
 *
11938
 ****************************************************************************/
11939
 
11940
//------------------------------------------------------------------------------
11941
 
11942
XMFINLINE _XMSHORTN4::_XMSHORTN4
11943
(
11944
    CONST SHORT* pArray
11945
)
11946
{
11947
    x = pArray[0];
11948
    y = pArray[1];
11949
    z = pArray[2];
11950
    w = pArray[3];
11951
}
11952
 
11953
//------------------------------------------------------------------------------
11954
 
11955
XMFINLINE _XMSHORTN4::_XMSHORTN4
11956
(
11957
    FLOAT _x,
11958
    FLOAT _y,
11959
    FLOAT _z,
11960
    FLOAT _w
11961
)
11962
{
11963
    XMStoreShortN4(this, XMVectorSet(_x, _y, _z, _w));
11964
}
11965
 
11966
//------------------------------------------------------------------------------
11967
 
11968
XMFINLINE _XMSHORTN4::_XMSHORTN4
11969
(
11970
    CONST FLOAT* pArray
11971
)
11972
{
11973
    XMStoreShortN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
11974
}
11975
 
11976
//------------------------------------------------------------------------------
11977
 
11978
XMFINLINE _XMSHORTN4& _XMSHORTN4::operator=
11979
(
11980
    CONST _XMSHORTN4& ShortN4
11981
)
11982
{
11983
    x = ShortN4.x;
11984
    y = ShortN4.y;
11985
    z = ShortN4.z;
11986
    w = ShortN4.w;
11987
    return *this;
11988
}
11989
 
11990
/****************************************************************************
11991
 *
11992
 * XMSHORT4 operators
11993
 *
11994
 ****************************************************************************/
11995
 
11996
//------------------------------------------------------------------------------
11997
 
11998
XMFINLINE _XMSHORT4::_XMSHORT4
11999
(
12000
    CONST SHORT* pArray
12001
)
12002
{
12003
    x = pArray[0];
12004
    y = pArray[1];
12005
    z = pArray[2];
12006
    w = pArray[3];
12007
}
12008
 
12009
//------------------------------------------------------------------------------
12010
 
12011
XMFINLINE _XMSHORT4::_XMSHORT4
12012
(
12013
    FLOAT _x,
12014
    FLOAT _y,
12015
    FLOAT _z,
12016
    FLOAT _w
12017
)
12018
{
12019
    XMStoreShort4(this, XMVectorSet(_x, _y, _z, _w));
12020
}
12021
 
12022
//------------------------------------------------------------------------------
12023
 
12024
XMFINLINE _XMSHORT4::_XMSHORT4
12025
(
12026
    CONST FLOAT* pArray
12027
)
12028
{
12029
    XMStoreShort4(this, XMLoadFloat4((XMFLOAT4*)pArray));
12030
}
12031
 
12032
//------------------------------------------------------------------------------
12033
 
12034
XMFINLINE _XMSHORT4& _XMSHORT4::operator=
12035
(
12036
    CONST _XMSHORT4& Short4
12037
)
12038
{
12039
    x = Short4.x;
12040
    y = Short4.y;
12041
    z = Short4.z;
12042
    w = Short4.w;
12043
    return *this;
12044
}
12045
 
12046
/****************************************************************************
12047
 *
12048
 * XMUSHORTN4 operators
12049
 *
12050
 ****************************************************************************/
12051
 
12052
//------------------------------------------------------------------------------
12053
 
12054
XMFINLINE _XMUSHORTN4::_XMUSHORTN4
12055
(
12056
    CONST USHORT* pArray
12057
)
12058
{
12059
    x = pArray[0];
12060
    y = pArray[1];
12061
    z = pArray[2];
12062
    w = pArray[3];
12063
}
12064
 
12065
//------------------------------------------------------------------------------
12066
 
12067
XMFINLINE _XMUSHORTN4::_XMUSHORTN4
12068
(
12069
    FLOAT _x,
12070
    FLOAT _y,
12071
    FLOAT _z,
12072
    FLOAT _w
12073
)
12074
{
12075
    XMStoreUShortN4(this, XMVectorSet(_x, _y, _z, _w));
12076
}
12077
 
12078
//------------------------------------------------------------------------------
12079
 
12080
XMFINLINE _XMUSHORTN4::_XMUSHORTN4
12081
(
12082
    CONST FLOAT* pArray
12083
)
12084
{
12085
    XMStoreUShortN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
12086
}
12087
 
12088
//------------------------------------------------------------------------------
12089
 
12090
XMFINLINE _XMUSHORTN4& _XMUSHORTN4::operator=
12091
(
12092
    CONST _XMUSHORTN4& UShortN4
12093
)
12094
{
12095
    x = UShortN4.x;
12096
    y = UShortN4.y;
12097
    z = UShortN4.z;
12098
    w = UShortN4.w;
12099
    return *this;
12100
}
12101
 
12102
/****************************************************************************
12103
 *
12104
 * XMUSHORT4 operators
12105
 *
12106
 ****************************************************************************/
12107
 
12108
//------------------------------------------------------------------------------
12109
 
12110
XMFINLINE _XMUSHORT4::_XMUSHORT4
12111
(
12112
    CONST USHORT* pArray
12113
)
12114
{
12115
    x = pArray[0];
12116
    y = pArray[1];
12117
    z = pArray[2];
12118
    w = pArray[3];
12119
}
12120
 
12121
//------------------------------------------------------------------------------
12122
 
12123
XMFINLINE _XMUSHORT4::_XMUSHORT4
12124
(
12125
    FLOAT _x,
12126
    FLOAT _y,
12127
    FLOAT _z,
12128
    FLOAT _w
12129
)
12130
{
12131
    XMStoreUShort4(this, XMVectorSet(_x, _y, _z, _w));
12132
}
12133
 
12134
//------------------------------------------------------------------------------
12135
 
12136
XMFINLINE _XMUSHORT4::_XMUSHORT4
12137
(
12138
    CONST FLOAT* pArray
12139
)
12140
{
12141
    XMStoreUShort4(this, XMLoadFloat4((XMFLOAT4*)pArray));
12142
}
12143
 
12144
//------------------------------------------------------------------------------
12145
 
12146
XMFINLINE _XMUSHORT4& _XMUSHORT4::operator=
12147
(
12148
    CONST _XMUSHORT4& UShort4
12149
)
12150
{
12151
    x = UShort4.x;
12152
    y = UShort4.y;
12153
    z = UShort4.z;
12154
    w = UShort4.w;
12155
    return *this;
12156
}
12157
 
12158
/****************************************************************************
12159
 *
12160
 * XMXDECN4 operators
12161
 *
12162
 ****************************************************************************/
12163
 
12164
//------------------------------------------------------------------------------
12165
 
12166
XMFINLINE _XMXDECN4::_XMXDECN4
12167
(
12168
    FLOAT _x,
12169
    FLOAT _y,
12170
    FLOAT _z,
12171
    FLOAT _w
12172
)
12173
{
12174
    XMStoreXDecN4(this, XMVectorSet(_x, _y, _z, _w));
12175
}
12176
 
12177
//------------------------------------------------------------------------------
12178
 
12179
XMFINLINE _XMXDECN4::_XMXDECN4
12180
(
12181
    CONST FLOAT* pArray
12182
)
12183
{
12184
    XMStoreXDecN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
12185
}
12186
 
12187
//------------------------------------------------------------------------------
12188
 
12189
XMFINLINE _XMXDECN4& _XMXDECN4::operator=
12190
(
12191
    CONST _XMXDECN4& XDecN4
12192
)
12193
{
12194
    v = XDecN4.v;
12195
    return *this;
12196
}
12197
 
12198
//------------------------------------------------------------------------------
12199
 
12200
XMFINLINE _XMXDECN4& _XMXDECN4::operator=
12201
(
12202
    CONST UINT Packed
12203
)
12204
{
12205
    v = Packed;
12206
    return *this;
12207
}
12208
 
12209
/****************************************************************************
12210
 *
12211
 * XMXDEC4 operators
12212
 *
12213
 ****************************************************************************/
12214
 
12215
//------------------------------------------------------------------------------
12216
 
12217
XMFINLINE _XMXDEC4::_XMXDEC4
12218
(
12219
    FLOAT _x,
12220
    FLOAT _y,
12221
    FLOAT _z,
12222
    FLOAT _w
12223
)
12224
{
12225
    XMStoreXDec4(this, XMVectorSet(_x, _y, _z, _w));
12226
}
12227
 
12228
//------------------------------------------------------------------------------
12229
 
12230
XMFINLINE _XMXDEC4::_XMXDEC4
12231
(
12232
    CONST FLOAT* pArray
12233
)
12234
{
12235
    XMStoreXDec4(this, XMLoadFloat4((XMFLOAT4*)pArray));
12236
}
12237
 
12238
//------------------------------------------------------------------------------
12239
 
12240
XMFINLINE _XMXDEC4& _XMXDEC4::operator=
12241
(
12242
    CONST _XMXDEC4& XDec4
12243
)
12244
{
12245
    v = XDec4.v;
12246
    return *this;
12247
}
12248
 
12249
//------------------------------------------------------------------------------
12250
 
12251
XMFINLINE _XMXDEC4& _XMXDEC4::operator=
12252
(
12253
    CONST UINT Packed
12254
)
12255
{
12256
    v = Packed;
12257
    return *this;
12258
}
12259
 
12260
/****************************************************************************
12261
 *
12262
 * XMDECN4 operators
12263
 *
12264
 ****************************************************************************/
12265
 
12266
//------------------------------------------------------------------------------
12267
 
12268
XMFINLINE _XMDECN4::_XMDECN4
12269
(
12270
    FLOAT _x,
12271
    FLOAT _y,
12272
    FLOAT _z,
12273
    FLOAT _w
12274
)
12275
{
12276
    XMStoreDecN4(this, XMVectorSet(_x, _y, _z, _w));
12277
}
12278
 
12279
//------------------------------------------------------------------------------
12280
 
12281
XMFINLINE _XMDECN4::_XMDECN4
12282
(
12283
    CONST FLOAT* pArray
12284
)
12285
{
12286
    XMStoreDecN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
12287
}
12288
 
12289
//------------------------------------------------------------------------------
12290
 
12291
XMFINLINE _XMDECN4& _XMDECN4::operator=
12292
(
12293
    CONST _XMDECN4& DecN4
12294
)
12295
{
12296
    v = DecN4.v;
12297
    return *this;
12298
}
12299
 
12300
//------------------------------------------------------------------------------
12301
 
12302
XMFINLINE _XMDECN4& _XMDECN4::operator=
12303
(
12304
    CONST UINT Packed
12305
)
12306
{
12307
    v = Packed;
12308
    return *this;
12309
}
12310
 
12311
/****************************************************************************
12312
 *
12313
 * XMDEC4 operators
12314
 *
12315
 ****************************************************************************/
12316
 
12317
//------------------------------------------------------------------------------
12318
 
12319
XMFINLINE _XMDEC4::_XMDEC4
12320
(
12321
    FLOAT _x,
12322
    FLOAT _y,
12323
    FLOAT _z,
12324
    FLOAT _w
12325
)
12326
{
12327
    XMStoreDec4(this, XMVectorSet(_x, _y, _z, _w));
12328
}
12329
 
12330
//------------------------------------------------------------------------------
12331
 
12332
XMFINLINE _XMDEC4::_XMDEC4
12333
(
12334
    CONST FLOAT* pArray
12335
)
12336
{
12337
    XMStoreDec4(this, XMLoadFloat4((XMFLOAT4*)pArray));
12338
}
12339
 
12340
//------------------------------------------------------------------------------
12341
 
12342
XMFINLINE _XMDEC4& _XMDEC4::operator=
12343
(
12344
    CONST _XMDEC4& Dec4
12345
)
12346
{
12347
    v = Dec4.v;
12348
    return *this;
12349
}
12350
 
12351
//------------------------------------------------------------------------------
12352
 
12353
XMFINLINE _XMDEC4& _XMDEC4::operator=
12354
(
12355
    CONST UINT Packed
12356
)
12357
{
12358
    v = Packed;
12359
    return *this;
12360
}
12361
 
12362
/****************************************************************************
12363
 *
12364
 * XMUDECN4 operators
12365
 *
12366
 ****************************************************************************/
12367
 
12368
//------------------------------------------------------------------------------
12369
 
12370
XMFINLINE _XMUDECN4::_XMUDECN4
12371
(
12372
    FLOAT _x,
12373
    FLOAT _y,
12374
    FLOAT _z,
12375
    FLOAT _w
12376
)
12377
{
12378
    XMStoreUDecN4(this, XMVectorSet(_x, _y, _z, _w));
12379
}
12380
 
12381
//------------------------------------------------------------------------------
12382
 
12383
XMFINLINE _XMUDECN4::_XMUDECN4
12384
(
12385
    CONST FLOAT* pArray
12386
)
12387
{
12388
    XMStoreUDecN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
12389
}
12390
 
12391
//------------------------------------------------------------------------------
12392
 
12393
XMFINLINE _XMUDECN4& _XMUDECN4::operator=
12394
(
12395
    CONST _XMUDECN4& UDecN4
12396
)
12397
{
12398
    v = UDecN4.v;
12399
    return *this;
12400
}
12401
 
12402
//------------------------------------------------------------------------------
12403
 
12404
XMFINLINE _XMUDECN4& _XMUDECN4::operator=
12405
(
12406
    CONST UINT Packed
12407
)
12408
{
12409
    v = Packed;
12410
    return *this;
12411
}
12412
 
12413
/****************************************************************************
12414
 *
12415
 * XMUDEC4 operators
12416
 *
12417
 ****************************************************************************/
12418
 
12419
//------------------------------------------------------------------------------
12420
 
12421
XMFINLINE _XMUDEC4::_XMUDEC4
12422
(
12423
    FLOAT _x,
12424
    FLOAT _y,
12425
    FLOAT _z,
12426
    FLOAT _w
12427
)
12428
{
12429
    XMStoreUDec4(this, XMVectorSet(_x, _y, _z, _w));
12430
}
12431
 
12432
//------------------------------------------------------------------------------
12433
 
12434
XMFINLINE _XMUDEC4::_XMUDEC4
12435
(
12436
    CONST FLOAT* pArray
12437
)
12438
{
12439
    XMStoreUDec4(this, XMLoadFloat4((XMFLOAT4*)pArray));
12440
}
12441
 
12442
//------------------------------------------------------------------------------
12443
 
12444
XMFINLINE _XMUDEC4& _XMUDEC4::operator=
12445
(
12446
    CONST _XMUDEC4& UDec4
12447
)
12448
{
12449
    v = UDec4.v;
12450
    return *this;
12451
}
12452
 
12453
//------------------------------------------------------------------------------
12454
 
12455
XMFINLINE _XMUDEC4& _XMUDEC4::operator=
12456
(
12457
    CONST UINT Packed
12458
)
12459
{
12460
    v = Packed;
12461
    return *this;
12462
}
12463
 
12464
/****************************************************************************
12465
 *
12466
 * XMXICON4 operators
12467
 *
12468
 ****************************************************************************/
12469
 
12470
//------------------------------------------------------------------------------
12471
 
12472
XMFINLINE _XMXICON4::_XMXICON4
12473
(
12474
    FLOAT _x,
12475
    FLOAT _y,
12476
    FLOAT _z,
12477
    FLOAT _w
12478
)
12479
{
12480
    XMStoreXIcoN4(this, XMVectorSet(_x, _y, _z, _w));
12481
}
12482
 
12483
//------------------------------------------------------------------------------
12484
 
12485
XMFINLINE _XMXICON4::_XMXICON4
12486
(
12487
    CONST FLOAT* pArray
12488
)
12489
{
12490
    XMStoreXIcoN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
12491
}
12492
 
12493
//------------------------------------------------------------------------------
12494
 
12495
XMFINLINE _XMXICON4& _XMXICON4::operator=
12496
(
12497
    CONST _XMXICON4& XIcoN4
12498
)
12499
{
12500
    v = XIcoN4.v;
12501
    return *this;
12502
}
12503
 
12504
//------------------------------------------------------------------------------
12505
 
12506
XMFINLINE _XMXICON4& _XMXICON4::operator=
12507
(
12508
    CONST UINT64 Packed
12509
)
12510
{
12511
    v = Packed;
12512
    return *this;
12513
}
12514
 
12515
/****************************************************************************
12516
 *
12517
 * XMXICO4 operators
12518
 *
12519
 ****************************************************************************/
12520
 
12521
//------------------------------------------------------------------------------
12522
 
12523
XMFINLINE _XMXICO4::_XMXICO4
12524
(
12525
    FLOAT _x,
12526
    FLOAT _y,
12527
    FLOAT _z,
12528
    FLOAT _w
12529
)
12530
{
12531
    XMStoreXIco4(this, XMVectorSet(_x, _y, _z, _w));
12532
}
12533
 
12534
//------------------------------------------------------------------------------
12535
 
12536
XMFINLINE _XMXICO4::_XMXICO4
12537
(
12538
    CONST FLOAT* pArray
12539
)
12540
{
12541
    XMStoreXIco4(this, XMLoadFloat4((XMFLOAT4*)pArray));
12542
}
12543
 
12544
//------------------------------------------------------------------------------
12545
 
12546
XMFINLINE _XMXICO4& _XMXICO4::operator=
12547
(
12548
    CONST _XMXICO4& XIco4
12549
)
12550
{
12551
    v = XIco4.v;
12552
    return *this;
12553
}
12554
 
12555
//------------------------------------------------------------------------------
12556
 
12557
XMFINLINE _XMXICO4& _XMXICO4::operator=
12558
(
12559
    CONST UINT64 Packed
12560
)
12561
{
12562
    v = Packed;
12563
    return *this;
12564
}
12565
 
12566
/****************************************************************************
12567
 *
12568
 * XMICON4 operators
12569
 *
12570
 ****************************************************************************/
12571
 
12572
//------------------------------------------------------------------------------
12573
 
12574
XMFINLINE _XMICON4::_XMICON4
12575
(
12576
    FLOAT _x,
12577
    FLOAT _y,
12578
    FLOAT _z,
12579
    FLOAT _w
12580
)
12581
{
12582
    XMStoreIcoN4(this, XMVectorSet(_x, _y, _z, _w));
12583
}
12584
 
12585
//------------------------------------------------------------------------------
12586
 
12587
XMFINLINE _XMICON4::_XMICON4
12588
(
12589
    CONST FLOAT* pArray
12590
)
12591
{
12592
    XMStoreIcoN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
12593
}
12594
 
12595
//------------------------------------------------------------------------------
12596
 
12597
XMFINLINE _XMICON4& _XMICON4::operator=
12598
(
12599
    CONST _XMICON4& IcoN4
12600
)
12601
{
12602
    v = IcoN4.v;
12603
    return *this;
12604
}
12605
 
12606
//------------------------------------------------------------------------------
12607
 
12608
XMFINLINE _XMICON4& _XMICON4::operator=
12609
(
12610
    CONST UINT64 Packed
12611
)
12612
{
12613
    v = Packed;
12614
    return *this;
12615
}
12616
 
12617
/****************************************************************************
12618
 *
12619
 * XMICO4 operators
12620
 *
12621
 ****************************************************************************/
12622
 
12623
//------------------------------------------------------------------------------
12624
 
12625
XMFINLINE _XMICO4::_XMICO4
12626
(
12627
    FLOAT _x,
12628
    FLOAT _y,
12629
    FLOAT _z,
12630
    FLOAT _w
12631
)
12632
{
12633
    XMStoreIco4(this, XMVectorSet(_x, _y, _z, _w));
12634
}
12635
 
12636
//------------------------------------------------------------------------------
12637
 
12638
XMFINLINE _XMICO4::_XMICO4
12639
(
12640
    CONST FLOAT* pArray
12641
)
12642
{
12643
    XMStoreIco4(this, XMLoadFloat4((XMFLOAT4*)pArray));
12644
}
12645
 
12646
//------------------------------------------------------------------------------
12647
 
12648
XMFINLINE _XMICO4& _XMICO4::operator=
12649
(
12650
    CONST _XMICO4& Ico4
12651
)
12652
{
12653
    v = Ico4.v;
12654
    return *this;
12655
}
12656
 
12657
//------------------------------------------------------------------------------
12658
 
12659
XMFINLINE _XMICO4& _XMICO4::operator=
12660
(
12661
    CONST UINT64 Packed
12662
)
12663
{
12664
    v = Packed;
12665
    return *this;
12666
}
12667
 
12668
/****************************************************************************
12669
 *
12670
 * XMUICON4 operators
12671
 *
12672
 ****************************************************************************/
12673
 
12674
//------------------------------------------------------------------------------
12675
 
12676
XMFINLINE _XMUICON4::_XMUICON4
12677
(
12678
    FLOAT _x,
12679
    FLOAT _y,
12680
    FLOAT _z,
12681
    FLOAT _w
12682
)
12683
{
12684
    XMStoreUIcoN4(this, XMVectorSet(_x, _y, _z, _w));
12685
}
12686
 
12687
//------------------------------------------------------------------------------
12688
 
12689
XMFINLINE _XMUICON4::_XMUICON4
12690
(
12691
    CONST FLOAT* pArray
12692
)
12693
{
12694
    XMStoreUIcoN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
12695
}
12696
 
12697
//------------------------------------------------------------------------------
12698
 
12699
XMFINLINE _XMUICON4& _XMUICON4::operator=
12700
(
12701
    CONST _XMUICON4& UIcoN4
12702
)
12703
{
12704
    v = UIcoN4.v;
12705
    return *this;
12706
}
12707
 
12708
//------------------------------------------------------------------------------
12709
 
12710
XMFINLINE _XMUICON4& _XMUICON4::operator=
12711
(
12712
    CONST UINT64 Packed
12713
)
12714
{
12715
    v = Packed;
12716
    return *this;
12717
}
12718
 
12719
/****************************************************************************
12720
 *
12721
 * XMUICO4 operators
12722
 *
12723
 ****************************************************************************/
12724
 
12725
//------------------------------------------------------------------------------
12726
 
12727
XMFINLINE _XMUICO4::_XMUICO4
12728
(
12729
    FLOAT _x,
12730
    FLOAT _y,
12731
    FLOAT _z,
12732
    FLOAT _w
12733
)
12734
{
12735
    XMStoreUIco4(this, XMVectorSet(_x, _y, _z, _w));
12736
}
12737
 
12738
//------------------------------------------------------------------------------
12739
 
12740
XMFINLINE _XMUICO4::_XMUICO4
12741
(
12742
    CONST FLOAT* pArray
12743
)
12744
{
12745
    XMStoreUIco4(this, XMLoadFloat4((XMFLOAT4*)pArray));
12746
}
12747
 
12748
//------------------------------------------------------------------------------
12749
 
12750
XMFINLINE _XMUICO4& _XMUICO4::operator=
12751
(
12752
    CONST _XMUICO4& UIco4
12753
)
12754
{
12755
    v = UIco4.v;
12756
    return *this;
12757
}
12758
 
12759
//------------------------------------------------------------------------------
12760
 
12761
XMFINLINE _XMUICO4& _XMUICO4::operator=
12762
(
12763
    CONST UINT64 Packed
12764
)
12765
{
12766
    v = Packed;
12767
    return *this;
12768
}
12769
 
12770
/****************************************************************************
12771
 *
12772
 * XMCOLOR4 operators
12773
 *
12774
 ****************************************************************************/
12775
 
12776
//------------------------------------------------------------------------------
12777
 
12778
XMFINLINE _XMCOLOR::_XMCOLOR
12779
(
12780
    FLOAT _x,
12781
    FLOAT _y,
12782
    FLOAT _z,
12783
    FLOAT _w
12784
)
12785
{
12786
    XMStoreColor(this, XMVectorSet(_x, _y, _z, _w));
12787
}
12788
 
12789
//------------------------------------------------------------------------------
12790
 
12791
XMFINLINE _XMCOLOR::_XMCOLOR
12792
(
12793
    CONST FLOAT* pArray
12794
)
12795
{
12796
    XMStoreColor(this, XMLoadFloat4((XMFLOAT4*)pArray));
12797
}
12798
 
12799
//------------------------------------------------------------------------------
12800
 
12801
XMFINLINE _XMCOLOR& _XMCOLOR::operator=
12802
(
12803
    CONST _XMCOLOR& Color
12804
)
12805
{
12806
    c = Color.c;
12807
    return *this;
12808
}
12809
 
12810
//------------------------------------------------------------------------------
12811
 
12812
XMFINLINE _XMCOLOR& _XMCOLOR::operator=
12813
(
12814
    CONST UINT Color
12815
)
12816
{
12817
    c = Color;
12818
    return *this;
12819
}
12820
 
12821
/****************************************************************************
12822
 *
12823
 * XMBYTEN4 operators
12824
 *
12825
 ****************************************************************************/
12826
 
12827
//------------------------------------------------------------------------------
12828
 
12829
XMFINLINE _XMBYTEN4::_XMBYTEN4
12830
(
12831
    CONST CHAR* pArray
12832
)
12833
{
12834
    x = pArray[0];
12835
    y = pArray[1];
12836
    z = pArray[2];
12837
    w = pArray[3];
12838
}
12839
 
12840
//------------------------------------------------------------------------------
12841
 
12842
XMFINLINE _XMBYTEN4::_XMBYTEN4
12843
(
12844
    FLOAT _x,
12845
    FLOAT _y,
12846
    FLOAT _z,
12847
    FLOAT _w
12848
)
12849
{
12850
    XMStoreByteN4(this, XMVectorSet(_x, _y, _z, _w));
12851
}
12852
 
12853
//------------------------------------------------------------------------------
12854
 
12855
XMFINLINE _XMBYTEN4::_XMBYTEN4
12856
(
12857
    CONST FLOAT* pArray
12858
)
12859
{
12860
    XMStoreByteN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
12861
}
12862
 
12863
//------------------------------------------------------------------------------
12864
 
12865
XMFINLINE _XMBYTEN4& _XMBYTEN4::operator=
12866
(
12867
    CONST _XMBYTEN4& ByteN4
12868
)
12869
{
12870
    x = ByteN4.x;
12871
    y = ByteN4.y;
12872
    z = ByteN4.z;
12873
    w = ByteN4.w;
12874
    return *this;
12875
}
12876
 
12877
/****************************************************************************
12878
 *
12879
 * XMBYTE4 operators
12880
 *
12881
 ****************************************************************************/
12882
 
12883
//------------------------------------------------------------------------------
12884
 
12885
XMFINLINE _XMBYTE4::_XMBYTE4
12886
(
12887
    CONST CHAR* pArray
12888
)
12889
{
12890
    x = pArray[0];
12891
    y = pArray[1];
12892
    z = pArray[2];
12893
    w = pArray[3];
12894
}
12895
 
12896
//------------------------------------------------------------------------------
12897
 
12898
XMFINLINE _XMBYTE4::_XMBYTE4
12899
(
12900
    FLOAT _x,
12901
    FLOAT _y,
12902
    FLOAT _z,
12903
    FLOAT _w
12904
)
12905
{
12906
    XMStoreByte4(this, XMVectorSet(_x, _y, _z, _w));
12907
}
12908
 
12909
//------------------------------------------------------------------------------
12910
 
12911
XMFINLINE _XMBYTE4::_XMBYTE4
12912
(
12913
    CONST FLOAT* pArray
12914
)
12915
{
12916
    XMStoreByte4(this, XMLoadFloat4((XMFLOAT4*)pArray));
12917
}
12918
 
12919
//------------------------------------------------------------------------------
12920
 
12921
XMFINLINE _XMBYTE4& _XMBYTE4::operator=
12922
(
12923
    CONST _XMBYTE4& Byte4
12924
)
12925
{
12926
    x = Byte4.x;
12927
    y = Byte4.y;
12928
    z = Byte4.z;
12929
    w = Byte4.w;
12930
    return *this;
12931
}
12932
 
12933
/****************************************************************************
12934
 *
12935
 * XMUBYTEN4 operators
12936
 *
12937
 ****************************************************************************/
12938
 
12939
//------------------------------------------------------------------------------
12940
 
12941
XMFINLINE _XMUBYTEN4::_XMUBYTEN4
12942
(
12943
    CONST BYTE* pArray
12944
)
12945
{
12946
    x = pArray[0];
12947
    y = pArray[1];
12948
    z = pArray[2];
12949
    w = pArray[3];
12950
}
12951
 
12952
//------------------------------------------------------------------------------
12953
 
12954
XMFINLINE _XMUBYTEN4::_XMUBYTEN4
12955
(
12956
    FLOAT _x,
12957
    FLOAT _y,
12958
    FLOAT _z,
12959
    FLOAT _w
12960
)
12961
{
12962
    XMStoreUByteN4(this, XMVectorSet(_x, _y, _z, _w));
12963
}
12964
 
12965
//------------------------------------------------------------------------------
12966
 
12967
XMFINLINE _XMUBYTEN4::_XMUBYTEN4
12968
(
12969
    CONST FLOAT* pArray
12970
)
12971
{
12972
    XMStoreUByteN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
12973
}
12974
 
12975
//------------------------------------------------------------------------------
12976
 
12977
XMFINLINE _XMUBYTEN4& _XMUBYTEN4::operator=
12978
(
12979
    CONST _XMUBYTEN4& UByteN4
12980
)
12981
{
12982
    x = UByteN4.x;
12983
    y = UByteN4.y;
12984
    z = UByteN4.z;
12985
    w = UByteN4.w;
12986
    return *this;
12987
}
12988
 
12989
/****************************************************************************
12990
 *
12991
 * XMUBYTE4 operators
12992
 *
12993
 ****************************************************************************/
12994
 
12995
//------------------------------------------------------------------------------
12996
 
12997
XMFINLINE _XMUBYTE4::_XMUBYTE4
12998
(
12999
    CONST BYTE* pArray
13000
)
13001
{
13002
    x = pArray[0];
13003
    y = pArray[1];
13004
    z = pArray[2];
13005
    w = pArray[3];
13006
}
13007
 
13008
//------------------------------------------------------------------------------
13009
 
13010
XMFINLINE _XMUBYTE4::_XMUBYTE4
13011
(
13012
    FLOAT _x,
13013
    FLOAT _y,
13014
    FLOAT _z,
13015
    FLOAT _w
13016
)
13017
{
13018
    XMStoreUByte4(this, XMVectorSet(_x, _y, _z, _w));
13019
}
13020
 
13021
//------------------------------------------------------------------------------
13022
 
13023
XMFINLINE _XMUBYTE4::_XMUBYTE4
13024
(
13025
    CONST FLOAT* pArray
13026
)
13027
{
13028
    XMStoreUByte4(this, XMLoadFloat4((XMFLOAT4*)pArray));
13029
}
13030
 
13031
//------------------------------------------------------------------------------
13032
 
13033
XMFINLINE _XMUBYTE4& _XMUBYTE4::operator=
13034
(
13035
    CONST _XMUBYTE4& UByte4
13036
)
13037
{
13038
    x = UByte4.x;
13039
    y = UByte4.y;
13040
    z = UByte4.z;
13041
    w = UByte4.w;
13042
    return *this;
13043
}
13044
 
13045
/****************************************************************************
13046
 *
13047
 * XMUNIBBLE4 operators
13048
 *
13049
 ****************************************************************************/
13050
 
13051
//------------------------------------------------------------------------------
13052
 
13053
XMFINLINE _XMUNIBBLE4::_XMUNIBBLE4
13054
(
13055
    CONST CHAR *pArray
13056
)
13057
{
13058
    x = pArray[0];
13059
    y = pArray[1];
13060
    z = pArray[2];
13061
    w = pArray[3];
13062
}
13063
 
13064
//------------------------------------------------------------------------------
13065
 
13066
XMFINLINE _XMUNIBBLE4::_XMUNIBBLE4
13067
(
13068
    FLOAT _x,
13069
    FLOAT _y,
13070
    FLOAT _z,
13071
    FLOAT _w
13072
)
13073
{
13074
    XMStoreUNibble4(this, XMVectorSet( _x, _y, _z, _w ));
13075
}
13076
 
13077
//------------------------------------------------------------------------------
13078
 
13079
XMFINLINE _XMUNIBBLE4::_XMUNIBBLE4
13080
(
13081
    CONST FLOAT *pArray
13082
)
13083
{
13084
    XMStoreUNibble4(this, XMLoadFloat4((XMFLOAT4*)pArray));
13085
}
13086
 
13087
//------------------------------------------------------------------------------
13088
 
13089
XMFINLINE _XMUNIBBLE4& _XMUNIBBLE4::operator=
13090
(
13091
    CONST _XMUNIBBLE4& UNibble4
13092
)
13093
{
13094
    v = UNibble4.v;
13095
    return *this;
13096
}
13097
 
13098
//------------------------------------------------------------------------------
13099
 
13100
XMFINLINE _XMUNIBBLE4& _XMUNIBBLE4::operator=
13101
(
13102
    CONST USHORT Packed
13103
)
13104
{
13105
    v = Packed;
13106
    return *this;
13107
}
13108
 
13109
/****************************************************************************
13110
 *
13111
 * XMU555 operators
13112
 *
13113
 ****************************************************************************/
13114
 
13115
//------------------------------------------------------------------------------
13116
 
13117
XMFINLINE _XMU555::_XMU555
13118
(
13119
    CONST CHAR *pArray,
13120
    BOOL _w
13121
)
13122
{
13123
    x = pArray[0];
13124
    y = pArray[1];
13125
    z = pArray[2];
13126
    w = _w;
13127
}
13128
 
13129
//------------------------------------------------------------------------------
13130
 
13131
XMFINLINE _XMU555::_XMU555
13132
(
13133
    FLOAT _x,
13134
    FLOAT _y,
13135
    FLOAT _z,
13136
    BOOL _w
13137
)
13138
{
13139
    XMStoreU555(this, XMVectorSet(_x, _y, _z, ((_w) ? 1.0f : 0.0f) ));
13140
}
13141
 
13142
//------------------------------------------------------------------------------
13143
 
13144
XMFINLINE _XMU555::_XMU555
13145
(
13146
    CONST FLOAT *pArray,
13147
    BOOL _w
13148
)
13149
{
13150
    XMVECTOR V = XMLoadFloat3((XMFLOAT3*)pArray);
13151
    XMStoreU555(this, XMVectorSetW(V, ((_w) ? 1.0f : 0.0f) ));
13152
}
13153
 
13154
//------------------------------------------------------------------------------
13155
 
13156
XMFINLINE _XMU555& _XMU555::operator=
13157
(
13158
    CONST _XMU555& U555
13159
)
13160
{
13161
    v = U555.v;
13162
    return *this;
13163
}
13164
 
13165
//------------------------------------------------------------------------------
13166
 
13167
XMFINLINE _XMU555& _XMU555::operator=
13168
(
13169
    CONST USHORT Packed
13170
)
13171
{
13172
    v = Packed;
13173
    return *this;
13174
}
13175
 
13176
#endif // __cplusplus
13177
 
13178
#if defined(_XM_NO_INTRINSICS_)
13179
#undef XMISNAN
13180
#undef XMISINF
13181
#endif
13182
 
13183
#endif // __XNAMATHVECTOR_INL__
13184