Subversion Repositories Games.Chess Giants

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
1 pmbaty 1
/*++
2
 
3
Copyright (c) Microsoft Corporation. All rights reserved.
4
 
5
Module Name:
6
 
7
    xnamathmisc.inl
8
 
9
Abstract:
10
 
11
	XNA math library for Windows and Xbox 360: Quaternion, plane, and color functions.
12
--*/
13
 
14
#if defined(_MSC_VER) && (_MSC_VER > 1000)
15
#pragma once
16
#endif
17
 
18
#ifndef __XNAMATHMISC_INL__
19
#define __XNAMATHMISC_INL__
20
 
21
/****************************************************************************
22
 *
23
 * Quaternion
24
 *
25
 ****************************************************************************/
26
 
27
//------------------------------------------------------------------------------
28
// Comparison operations
29
//------------------------------------------------------------------------------
30
 
31
//------------------------------------------------------------------------------
32
 
33
XMFINLINE BOOL XMQuaternionEqual
34
(
35
    FXMVECTOR Q1,
36
    FXMVECTOR Q2
37
)
38
{
39
    return XMVector4Equal(Q1, Q2);
40
}
41
 
42
//------------------------------------------------------------------------------
43
 
44
XMFINLINE BOOL XMQuaternionNotEqual
45
(
46
    FXMVECTOR Q1,
47
    FXMVECTOR Q2
48
)
49
{
50
    return XMVector4NotEqual(Q1, Q2);
51
}
52
 
53
//------------------------------------------------------------------------------
54
 
55
XMFINLINE BOOL XMQuaternionIsNaN
56
(
57
    FXMVECTOR Q
58
)
59
{
60
    return XMVector4IsNaN(Q);
61
}
62
 
63
//------------------------------------------------------------------------------
64
 
65
XMFINLINE BOOL XMQuaternionIsInfinite
66
(
67
    FXMVECTOR Q
68
)
69
{
70
    return XMVector4IsInfinite(Q);
71
}
72
 
73
//------------------------------------------------------------------------------
74
 
75
XMFINLINE BOOL XMQuaternionIsIdentity
76
(
77
    FXMVECTOR Q
78
)
79
{
80
#if defined(_XM_NO_INTRINSICS_)
81
 
82
    return XMVector4Equal(Q, g_XMIdentityR3.v);
83
 
84
#elif defined(_XM_SSE_INTRINSICS_)
85
    XMVECTOR vTemp = _mm_cmpeq_ps(Q,g_XMIdentityR3);
86
    return (_mm_movemask_ps(vTemp)==0x0f) ? true : false;
87
#else // _XM_VMX128_INTRINSICS_
88
#endif // _XM_VMX128_INTRINSICS_
89
}
90
 
91
//------------------------------------------------------------------------------
92
// Computation operations
93
//------------------------------------------------------------------------------
94
 
95
//------------------------------------------------------------------------------
96
 
97
XMFINLINE XMVECTOR XMQuaternionDot
98
(
99
    FXMVECTOR Q1,
100
    FXMVECTOR Q2
101
)
102
{
103
    return XMVector4Dot(Q1, Q2);
104
}
105
 
106
//------------------------------------------------------------------------------
107
 
108
XMFINLINE XMVECTOR XMQuaternionMultiply
109
(
110
    FXMVECTOR Q1,
111
    FXMVECTOR Q2
112
)
113
{
114
#if defined(_XM_NO_INTRINSICS_)
115
 
116
    XMVECTOR         NegativeQ1;
117
    XMVECTOR         Q2X;
118
    XMVECTOR         Q2Y;
119
    XMVECTOR         Q2Z;
120
    XMVECTOR         Q2W;
121
    XMVECTOR         Q1WZYX;
122
    XMVECTOR         Q1ZWXY;
123
    XMVECTOR         Q1YXWZ;
124
    XMVECTOR         Result;
125
    CONST XMVECTORU32 ControlWZYX = {XM_PERMUTE_0W, XM_PERMUTE_1Z, XM_PERMUTE_0Y, XM_PERMUTE_1X};
126
    CONST XMVECTORU32 ControlZWXY = {XM_PERMUTE_0Z, XM_PERMUTE_0W, XM_PERMUTE_1X, XM_PERMUTE_1Y};
127
    CONST XMVECTORU32 ControlYXWZ = {XM_PERMUTE_1Y, XM_PERMUTE_0X, XM_PERMUTE_0W, XM_PERMUTE_1Z};
128
 
129
    NegativeQ1 = XMVectorNegate(Q1);
130
 
131
    Q2W = XMVectorSplatW(Q2);
132
    Q2X = XMVectorSplatX(Q2);
133
    Q2Y = XMVectorSplatY(Q2);
134
    Q2Z = XMVectorSplatZ(Q2);
135
 
136
    Q1WZYX = XMVectorPermute(Q1, NegativeQ1, ControlWZYX.v);
137
    Q1ZWXY = XMVectorPermute(Q1, NegativeQ1, ControlZWXY.v);
138
    Q1YXWZ = XMVectorPermute(Q1, NegativeQ1, ControlYXWZ.v);
139
 
140
    Result = XMVectorMultiply(Q1, Q2W);
141
    Result = XMVectorMultiplyAdd(Q1WZYX, Q2X, Result);
142
    Result = XMVectorMultiplyAdd(Q1ZWXY, Q2Y, Result);
143
    Result = XMVectorMultiplyAdd(Q1YXWZ, Q2Z, Result);
144
 
145
    return Result;
146
 
147
#elif defined(_XM_SSE_INTRINSICS_)
148
    static CONST XMVECTORF32 ControlWZYX = { 1.0f,-1.0f, 1.0f,-1.0f};
149
    static CONST XMVECTORF32 ControlZWXY = { 1.0f, 1.0f,-1.0f,-1.0f};
150
    static CONST XMVECTORF32 ControlYXWZ = {-1.0f, 1.0f, 1.0f,-1.0f};
151
    // Copy to SSE registers and use as few as possible for x86
152
    XMVECTOR Q2X = Q2;
153
    XMVECTOR Q2Y = Q2;
154
    XMVECTOR Q2Z = Q2;
155
    XMVECTOR vResult = Q2;
156
    // Splat with one instruction
157
    vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,3,3,3));
158
    Q2X = _mm_shuffle_ps(Q2X,Q2X,_MM_SHUFFLE(0,0,0,0));
159
    Q2Y = _mm_shuffle_ps(Q2Y,Q2Y,_MM_SHUFFLE(1,1,1,1));
160
    Q2Z = _mm_shuffle_ps(Q2Z,Q2Z,_MM_SHUFFLE(2,2,2,2));
161
    // Retire Q1 and perform Q1*Q2W
162
    vResult = _mm_mul_ps(vResult,Q1);
163
    XMVECTOR Q1Shuffle = Q1;
164
    // Shuffle the copies of Q1
165
    Q1Shuffle = _mm_shuffle_ps(Q1Shuffle,Q1Shuffle,_MM_SHUFFLE(0,1,2,3));
166
    // Mul by Q1WZYX
167
    Q2X = _mm_mul_ps(Q2X,Q1Shuffle);
168
    Q1Shuffle = _mm_shuffle_ps(Q1Shuffle,Q1Shuffle,_MM_SHUFFLE(2,3,0,1));
169
    // Flip the signs on y and z
170
    Q2X = _mm_mul_ps(Q2X,ControlWZYX);
171
    // Mul by Q1ZWXY
172
    Q2Y = _mm_mul_ps(Q2Y,Q1Shuffle);
173
    Q1Shuffle = _mm_shuffle_ps(Q1Shuffle,Q1Shuffle,_MM_SHUFFLE(0,1,2,3));
174
    // Flip the signs on z and w
175
    Q2Y = _mm_mul_ps(Q2Y,ControlZWXY);
176
    // Mul by Q1YXWZ
177
    Q2Z = _mm_mul_ps(Q2Z,Q1Shuffle);
178
    vResult = _mm_add_ps(vResult,Q2X);
179
    // Flip the signs on x and w
180
    Q2Z = _mm_mul_ps(Q2Z,ControlYXWZ);
181
    Q2Y = _mm_add_ps(Q2Y,Q2Z);
182
    vResult = _mm_add_ps(vResult,Q2Y);
183
    return vResult;
184
#else // _XM_VMX128_INTRINSICS_
185
#endif // _XM_VMX128_INTRINSICS_
186
}
187
 
188
//------------------------------------------------------------------------------
189
 
190
XMFINLINE XMVECTOR XMQuaternionLengthSq
191
(
192
    FXMVECTOR Q
193
)
194
{
195
    return XMVector4LengthSq(Q);
196
}
197
 
198
//------------------------------------------------------------------------------
199
 
200
XMFINLINE XMVECTOR XMQuaternionReciprocalLength
201
(
202
    FXMVECTOR Q
203
)
204
{
205
    return XMVector4ReciprocalLength(Q);
206
}
207
 
208
//------------------------------------------------------------------------------
209
 
210
XMFINLINE XMVECTOR XMQuaternionLength
211
(
212
    FXMVECTOR Q
213
)
214
{
215
    return XMVector4Length(Q);
216
}
217
 
218
//------------------------------------------------------------------------------
219
 
220
XMFINLINE XMVECTOR XMQuaternionNormalizeEst
221
(
222
    FXMVECTOR Q
223
)
224
{
225
    return XMVector4NormalizeEst(Q);
226
}
227
 
228
//------------------------------------------------------------------------------
229
 
230
XMFINLINE XMVECTOR XMQuaternionNormalize
231
(
232
    FXMVECTOR Q
233
)
234
{
235
    return XMVector4Normalize(Q);
236
}
237
 
238
//------------------------------------------------------------------------------
239
 
240
XMFINLINE XMVECTOR XMQuaternionConjugate
241
(
242
    FXMVECTOR Q
243
)
244
{
245
#if defined(_XM_NO_INTRINSICS_)
246
 
247
    XMVECTOR Result = {
248
        -Q.vector4_f32[0],
249
        -Q.vector4_f32[1],
250
        -Q.vector4_f32[2],
251
        Q.vector4_f32[3]
252
    };
253
    return Result;
254
#elif defined(_XM_SSE_INTRINSICS_)
255
    static const XMVECTORF32 NegativeOne3 = {-1.0f,-1.0f,-1.0f,1.0f};
256
    XMVECTOR Result = _mm_mul_ps(Q,NegativeOne3);
257
    return Result;
258
#else // _XM_VMX128_INTRINSICS_
259
#endif // _XM_VMX128_INTRINSICS_
260
}
261
 
262
//------------------------------------------------------------------------------
263
 
264
XMFINLINE XMVECTOR XMQuaternionInverse
265
(
266
    FXMVECTOR Q
267
)
268
{
269
#if defined(_XM_NO_INTRINSICS_)
270
 
271
    XMVECTOR        Conjugate;
272
    XMVECTOR        L;
273
    XMVECTOR        Control;
274
    XMVECTOR        Result;
275
    CONST XMVECTOR  Zero = XMVectorZero();
276
 
277
    L = XMVector4LengthSq(Q);
278
    Conjugate = XMQuaternionConjugate(Q);
279
 
280
    Control = XMVectorLessOrEqual(L, g_XMEpsilon.v);
281
 
282
    L = XMVectorReciprocal(L);
283
    Result = XMVectorMultiply(Conjugate, L);
284
 
285
    Result = XMVectorSelect(Result, Zero, Control);
286
 
287
    return Result;
288
 
289
#elif defined(_XM_SSE_INTRINSICS_)
290
    XMVECTOR        Conjugate;
291
    XMVECTOR        L;
292
    XMVECTOR        Control;
293
    XMVECTOR        Result;
294
    XMVECTOR  Zero = XMVectorZero();
295
 
296
    L = XMVector4LengthSq(Q);
297
    Conjugate = XMQuaternionConjugate(Q);
298
    Control = XMVectorLessOrEqual(L, g_XMEpsilon);
299
    Result = _mm_div_ps(Conjugate,L);
300
    Result = XMVectorSelect(Result, Zero, Control);
301
    return Result;
302
#else // _XM_VMX128_INTRINSICS_
303
#endif // _XM_VMX128_INTRINSICS_
304
}
305
 
306
//------------------------------------------------------------------------------
307
 
308
XMFINLINE XMVECTOR XMQuaternionLn
309
(
310
    FXMVECTOR Q
311
)
312
{
313
#if defined(_XM_NO_INTRINSICS_)
314
 
315
    XMVECTOR Q0;
316
    XMVECTOR QW;
317
    XMVECTOR Theta;
318
    XMVECTOR SinTheta;
319
    XMVECTOR S;
320
    XMVECTOR ControlW;
321
    XMVECTOR Result;
322
    static CONST XMVECTOR OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f};
323
 
324
    QW = XMVectorSplatW(Q);
325
    Q0 = XMVectorSelect(g_XMSelect1110.v, Q, g_XMSelect1110.v);
326
 
327
    ControlW = XMVectorInBounds(QW, OneMinusEpsilon);
328
 
329
    Theta = XMVectorACos(QW);
330
    SinTheta = XMVectorSin(Theta);
331
 
332
    S = XMVectorReciprocal(SinTheta);
333
    S = XMVectorMultiply(Theta, S);
334
 
335
    Result = XMVectorMultiply(Q0, S);
336
 
337
    Result = XMVectorSelect(Q0, Result, ControlW);
338
 
339
    return Result;
340
 
341
#elif defined(_XM_SSE_INTRINSICS_)
342
    static CONST XMVECTORF32 OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f};
343
    static CONST XMVECTORF32 NegOneMinusEpsilon = {-(1.0f - 0.00001f), -(1.0f - 0.00001f),-(1.0f - 0.00001f),-(1.0f - 0.00001f)};
344
    // Get W only
345
    XMVECTOR QW = _mm_shuffle_ps(Q,Q,_MM_SHUFFLE(3,3,3,3));
346
    // W = 0
347
    XMVECTOR Q0 = _mm_and_ps(Q,g_XMMask3);
348
    // Use W if within bounds
349
    XMVECTOR ControlW = _mm_cmple_ps(QW,OneMinusEpsilon);
350
    XMVECTOR vTemp2 = _mm_cmpge_ps(QW,NegOneMinusEpsilon);
351
    ControlW = _mm_and_ps(ControlW,vTemp2);
352
    // Get theta
353
    XMVECTOR vTheta = XMVectorACos(QW);
354
    // Get Sine of theta
355
    vTemp2 = XMVectorSin(vTheta);
356
    // theta/sine of theta
357
    vTheta = _mm_div_ps(vTheta,vTemp2);
358
    // Here's the answer
359
    vTheta = _mm_mul_ps(vTheta,Q0);
360
    // Was W in bounds? If not, return input as is
361
    vTheta = XMVectorSelect(Q0,vTheta,ControlW);
362
    return vTheta;
363
#else // _XM_VMX128_INTRINSICS_
364
#endif // _XM_VMX128_INTRINSICS_
365
}
366
 
367
//------------------------------------------------------------------------------
368
 
369
XMFINLINE XMVECTOR XMQuaternionExp
370
(
371
    FXMVECTOR Q
372
)
373
{
374
#if defined(_XM_NO_INTRINSICS_) 
375
 
376
    XMVECTOR Theta;
377
    XMVECTOR SinTheta;
378
    XMVECTOR CosTheta;
379
    XMVECTOR S;
380
    XMVECTOR Control;
381
    XMVECTOR Zero;
382
    XMVECTOR Result;
383
 
384
    Theta = XMVector3Length(Q);
385
    XMVectorSinCos(&SinTheta, &CosTheta, Theta);
386
 
387
    S = XMVectorReciprocal(Theta);
388
    S = XMVectorMultiply(SinTheta, S);
389
 
390
    Result = XMVectorMultiply(Q, S);
391
 
392
    Zero = XMVectorZero();
393
    Control = XMVectorNearEqual(Theta, Zero, g_XMEpsilon.v);
394
    Result = XMVectorSelect(Result, Q, Control);
395
 
396
    Result = XMVectorSelect(CosTheta, Result, g_XMSelect1110.v);
397
 
398
    return Result;
399
 
400
#elif defined(_XM_SSE_INTRINSICS_)
401
    XMVECTOR Theta;
402
    XMVECTOR SinTheta;
403
    XMVECTOR CosTheta;
404
    XMVECTOR S;
405
    XMVECTOR Control;
406
    XMVECTOR Zero;
407
    XMVECTOR Result;
408
    Theta = XMVector3Length(Q);
409
    XMVectorSinCos(&SinTheta, &CosTheta, Theta);
410
    S = _mm_div_ps(SinTheta,Theta);
411
    Result = _mm_mul_ps(Q, S);
412
    Zero = XMVectorZero();
413
    Control = XMVectorNearEqual(Theta, Zero, g_XMEpsilon);
414
    Result = XMVectorSelect(Result,Q,Control);
415
    Result = _mm_and_ps(Result,g_XMMask3);
416
    CosTheta = _mm_and_ps(CosTheta,g_XMMaskW);
417
    Result = _mm_or_ps(Result,CosTheta);
418
    return Result;
419
#else // _XM_VMX128_INTRINSICS_
420
#endif // _XM_VMX128_INTRINSICS_
421
}
422
 
423
//------------------------------------------------------------------------------
424
 
425
XMINLINE XMVECTOR XMQuaternionSlerp
426
(
427
    FXMVECTOR Q0,
428
    FXMVECTOR Q1,
429
    FLOAT    t
430
)
431
{
432
    XMVECTOR T = XMVectorReplicate(t);
433
    return XMQuaternionSlerpV(Q0, Q1, T);
434
}
435
 
436
//------------------------------------------------------------------------------
437
 
438
XMINLINE XMVECTOR XMQuaternionSlerpV
439
(
440
    FXMVECTOR Q0,
441
    FXMVECTOR Q1,
442
    FXMVECTOR T
443
)
444
{
445
#if defined(_XM_NO_INTRINSICS_)
446
 
447
    // Result = Q0 * sin((1.0 - t) * Omega) / sin(Omega) + Q1 * sin(t * Omega) / sin(Omega)
448
    XMVECTOR Omega;
449
    XMVECTOR CosOmega;
450
    XMVECTOR SinOmega;
451
    XMVECTOR InvSinOmega;
452
    XMVECTOR V01;
453
    XMVECTOR C1000;
454
    XMVECTOR SignMask;
455
    XMVECTOR S0;
456
    XMVECTOR S1;
457
    XMVECTOR Sign;
458
    XMVECTOR Control;
459
    XMVECTOR Result;
460
    XMVECTOR Zero;
461
    CONST XMVECTOR OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f};
462
 
463
    XMASSERT((T.vector4_f32[1] == T.vector4_f32[0]) && (T.vector4_f32[2] == T.vector4_f32[0]) && (T.vector4_f32[3] == T.vector4_f32[0]));
464
 
465
    CosOmega = XMQuaternionDot(Q0, Q1);
466
 
467
    Zero = XMVectorZero();
468
    Control = XMVectorLess(CosOmega, Zero);
469
    Sign = XMVectorSelect(g_XMOne.v, g_XMNegativeOne.v, Control);
470
 
471
    CosOmega = XMVectorMultiply(CosOmega, Sign);
472
 
473
    Control = XMVectorLess(CosOmega, OneMinusEpsilon);
474
 
475
    SinOmega = XMVectorNegativeMultiplySubtract(CosOmega, CosOmega, g_XMOne.v);
476
    SinOmega = XMVectorSqrt(SinOmega);
477
 
478
    Omega = XMVectorATan2(SinOmega, CosOmega);
479
 
480
    SignMask = XMVectorSplatSignMask();
481
    C1000 = XMVectorSetBinaryConstant(1, 0, 0, 0);
482
    V01 = XMVectorShiftLeft(T, Zero, 2);
483
    SignMask = XMVectorShiftLeft(SignMask, Zero, 3);
484
    V01 = XMVectorXorInt(V01, SignMask);
485
    V01 = XMVectorAdd(C1000, V01);
486
 
487
    InvSinOmega = XMVectorReciprocal(SinOmega);
488
 
489
    S0 = XMVectorMultiply(V01, Omega);
490
    S0 = XMVectorSin(S0);
491
    S0 = XMVectorMultiply(S0, InvSinOmega);
492
 
493
    S0 = XMVectorSelect(V01, S0, Control);
494
 
495
    S1 = XMVectorSplatY(S0);
496
    S0 = XMVectorSplatX(S0);
497
 
498
    S1 = XMVectorMultiply(S1, Sign);
499
 
500
    Result = XMVectorMultiply(Q0, S0);
501
    Result = XMVectorMultiplyAdd(Q1, S1, Result);
502
 
503
    return Result;
504
 
505
#elif defined(_XM_SSE_INTRINSICS_)
506
    // Result = Q0 * sin((1.0 - t) * Omega) / sin(Omega) + Q1 * sin(t * Omega) / sin(Omega)
507
    XMVECTOR Omega;
508
    XMVECTOR CosOmega;
509
    XMVECTOR SinOmega;
510
    XMVECTOR V01;
511
    XMVECTOR S0;
512
    XMVECTOR S1;
513
    XMVECTOR Sign;
514
    XMVECTOR Control;
515
    XMVECTOR Result;
516
    XMVECTOR Zero;
517
    static const XMVECTORF32 OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f};
518
    static const XMVECTORI32 SignMask2 = {0x80000000,0x00000000,0x00000000,0x00000000};
519
    static const XMVECTORI32 MaskXY = {0xFFFFFFFF,0xFFFFFFFF,0x00000000,0x00000000};
520
 
521
    XMASSERT((XMVectorGetY(T) == XMVectorGetX(T)) && (XMVectorGetZ(T) == XMVectorGetX(T)) && (XMVectorGetW(T) == XMVectorGetX(T)));
522
 
523
    CosOmega = XMQuaternionDot(Q0, Q1);
524
 
525
    Zero = XMVectorZero();
526
    Control = XMVectorLess(CosOmega, Zero);
527
    Sign = XMVectorSelect(g_XMOne, g_XMNegativeOne, Control);
528
 
529
    CosOmega = _mm_mul_ps(CosOmega, Sign);
530
 
531
    Control = XMVectorLess(CosOmega, OneMinusEpsilon);
532
 
533
    SinOmega = _mm_mul_ps(CosOmega,CosOmega);
534
    SinOmega = _mm_sub_ps(g_XMOne,SinOmega);
535
    SinOmega = _mm_sqrt_ps(SinOmega);
536
 
537
    Omega = XMVectorATan2(SinOmega, CosOmega);
538
 
539
    V01 = _mm_shuffle_ps(T,T,_MM_SHUFFLE(2,3,0,1));
540
    V01 = _mm_and_ps(V01,MaskXY);
541
    V01 = _mm_xor_ps(V01,SignMask2);
542
    V01 = _mm_add_ps(g_XMIdentityR0, V01);
543
 
544
    S0 = _mm_mul_ps(V01, Omega);
545
    S0 = XMVectorSin(S0);
546
    S0 = _mm_div_ps(S0, SinOmega);
547
 
548
    S0 = XMVectorSelect(V01, S0, Control);
549
 
550
    S1 = XMVectorSplatY(S0);
551
    S0 = XMVectorSplatX(S0);
552
 
553
    S1 = _mm_mul_ps(S1, Sign);
554
    Result = _mm_mul_ps(Q0, S0);
555
    S1 = _mm_mul_ps(S1, Q1);
556
    Result = _mm_add_ps(Result,S1);
557
    return Result;
558
#else // _XM_VMX128_INTRINSICS_
559
#endif // _XM_VMX128_INTRINSICS_
560
}
561
 
562
//------------------------------------------------------------------------------
563
 
564
XMFINLINE XMVECTOR XMQuaternionSquad
565
(
566
    FXMVECTOR Q0,
567
    FXMVECTOR Q1,
568
    FXMVECTOR Q2,
569
    CXMVECTOR Q3,
570
    FLOAT    t
571
)
572
{
573
    XMVECTOR T = XMVectorReplicate(t);
574
    return XMQuaternionSquadV(Q0, Q1, Q2, Q3, T);
575
}
576
 
577
//------------------------------------------------------------------------------
578
 
579
XMFINLINE XMVECTOR XMQuaternionSquadV
580
(
581
    FXMVECTOR Q0,
582
    FXMVECTOR Q1,
583
    FXMVECTOR Q2,
584
    CXMVECTOR Q3,
585
    CXMVECTOR T
586
)
587
{
588
    XMVECTOR Q03;
589
    XMVECTOR Q12;
590
    XMVECTOR TP;
591
    XMVECTOR Two;
592
    XMVECTOR Result;
593
 
594
    XMASSERT( (XMVectorGetY(T) == XMVectorGetX(T)) && (XMVectorGetZ(T) == XMVectorGetX(T)) && (XMVectorGetW(T) == XMVectorGetX(T)) );
595
 
596
    TP = T;
597
    Two = XMVectorSplatConstant(2, 0);
598
 
599
    Q03 = XMQuaternionSlerpV(Q0, Q3, T);
600
    Q12 = XMQuaternionSlerpV(Q1, Q2, T);
601
 
602
    TP = XMVectorNegativeMultiplySubtract(TP, TP, TP);
603
    TP = XMVectorMultiply(TP, Two);
604
 
605
    Result = XMQuaternionSlerpV(Q03, Q12, TP);
606
 
607
    return Result;
608
 
609
}
610
 
611
//------------------------------------------------------------------------------
612
 
613
XMINLINE VOID XMQuaternionSquadSetup
614
(
615
    XMVECTOR* pA,
616
    XMVECTOR* pB,
617
    XMVECTOR* pC,
618
    FXMVECTOR  Q0,
619
    FXMVECTOR  Q1,
620
    FXMVECTOR  Q2,
621
    CXMVECTOR  Q3
622
)
623
{
624
    XMVECTOR SQ0, SQ2, SQ3;
625
    XMVECTOR InvQ1, InvQ2;
626
    XMVECTOR LnQ0, LnQ1, LnQ2, LnQ3;
627
    XMVECTOR ExpQ02, ExpQ13;
628
    XMVECTOR LS01, LS12, LS23;
629
    XMVECTOR LD01, LD12, LD23;
630
    XMVECTOR Control0, Control1, Control2;
631
    XMVECTOR NegativeOneQuarter;
632
 
633
    XMASSERT(pA);
634
    XMASSERT(pB);
635
    XMASSERT(pC);
636
 
637
    LS12 = XMQuaternionLengthSq(XMVectorAdd(Q1, Q2));
638
    LD12 = XMQuaternionLengthSq(XMVectorSubtract(Q1, Q2));
639
    SQ2 = XMVectorNegate(Q2);
640
 
641
    Control1 = XMVectorLess(LS12, LD12);
642
    SQ2 = XMVectorSelect(Q2, SQ2, Control1);
643
 
644
    LS01 = XMQuaternionLengthSq(XMVectorAdd(Q0, Q1));
645
    LD01 = XMQuaternionLengthSq(XMVectorSubtract(Q0, Q1));
646
    SQ0 = XMVectorNegate(Q0);
647
 
648
    LS23 = XMQuaternionLengthSq(XMVectorAdd(SQ2, Q3));
649
    LD23 = XMQuaternionLengthSq(XMVectorSubtract(SQ2, Q3));
650
    SQ3 = XMVectorNegate(Q3);
651
 
652
    Control0 = XMVectorLess(LS01, LD01);
653
    Control2 = XMVectorLess(LS23, LD23);
654
 
655
    SQ0 = XMVectorSelect(Q0, SQ0, Control0);
656
    SQ3 = XMVectorSelect(Q3, SQ3, Control2);
657
 
658
    InvQ1 = XMQuaternionInverse(Q1);
659
    InvQ2 = XMQuaternionInverse(SQ2);
660
 
661
    LnQ0 = XMQuaternionLn(XMQuaternionMultiply(InvQ1, SQ0));
662
    LnQ2 = XMQuaternionLn(XMQuaternionMultiply(InvQ1, SQ2));
663
    LnQ1 = XMQuaternionLn(XMQuaternionMultiply(InvQ2, Q1));
664
    LnQ3 = XMQuaternionLn(XMQuaternionMultiply(InvQ2, SQ3));
665
 
666
    NegativeOneQuarter = XMVectorSplatConstant(-1, 2);
667
 
668
    ExpQ02 = XMVectorMultiply(XMVectorAdd(LnQ0, LnQ2), NegativeOneQuarter);
669
    ExpQ13 = XMVectorMultiply(XMVectorAdd(LnQ1, LnQ3), NegativeOneQuarter);
670
    ExpQ02 = XMQuaternionExp(ExpQ02);
671
    ExpQ13 = XMQuaternionExp(ExpQ13);
672
 
673
    *pA = XMQuaternionMultiply(Q1, ExpQ02);
674
    *pB = XMQuaternionMultiply(SQ2, ExpQ13);
675
    *pC = SQ2;
676
}
677
 
678
//------------------------------------------------------------------------------
679
 
680
XMFINLINE XMVECTOR XMQuaternionBaryCentric
681
(
682
    FXMVECTOR Q0,
683
    FXMVECTOR Q1,
684
    FXMVECTOR Q2,
685
    FLOAT    f,
686
    FLOAT    g
687
)
688
{
689
    XMVECTOR Q01;
690
    XMVECTOR Q02;
691
    FLOAT    s;
692
    XMVECTOR Result;
693
 
694
    s = f + g;
695
 
696
    if (s < 0.00001f && s > -0.00001f)
697
    {
698
        Result = Q0;
699
    }
700
    else
701
    {
702
        Q01 = XMQuaternionSlerp(Q0, Q1, s);
703
        Q02 = XMQuaternionSlerp(Q0, Q2, s);
704
 
705
        Result = XMQuaternionSlerp(Q01, Q02, g / s);
706
    }
707
 
708
    return Result;
709
}
710
 
711
//------------------------------------------------------------------------------
712
 
713
XMFINLINE XMVECTOR XMQuaternionBaryCentricV
714
(
715
    FXMVECTOR Q0,
716
    FXMVECTOR Q1,
717
    FXMVECTOR Q2,
718
    CXMVECTOR F,
719
    CXMVECTOR G
720
)
721
{
722
    XMVECTOR Q01;
723
    XMVECTOR Q02;
724
    XMVECTOR S, GS;
725
    XMVECTOR Epsilon;
726
    XMVECTOR Result;
727
 
728
    XMASSERT( (XMVectorGetY(F) == XMVectorGetX(F)) && (XMVectorGetZ(F) == XMVectorGetX(F)) && (XMVectorGetW(F) == XMVectorGetX(F)) );
729
    XMASSERT( (XMVectorGetY(G) == XMVectorGetX(G)) && (XMVectorGetZ(G) == XMVectorGetX(G)) && (XMVectorGetW(G) == XMVectorGetX(G)) );
730
 
731
    Epsilon = XMVectorSplatConstant(1, 16);
732
 
733
    S = XMVectorAdd(F, G);
734
 
735
    if (XMVector4InBounds(S, Epsilon))
736
    {
737
        Result = Q0;
738
    }
739
    else
740
    {
741
        Q01 = XMQuaternionSlerpV(Q0, Q1, S);
742
        Q02 = XMQuaternionSlerpV(Q0, Q2, S);
743
        GS = XMVectorReciprocal(S);
744
        GS = XMVectorMultiply(G, GS);
745
 
746
        Result = XMQuaternionSlerpV(Q01, Q02, GS);
747
    }
748
 
749
    return Result;
750
}
751
 
752
//------------------------------------------------------------------------------
753
// Transformation operations
754
//------------------------------------------------------------------------------
755
 
756
//------------------------------------------------------------------------------
757
 
758
XMFINLINE XMVECTOR XMQuaternionIdentity()
759
{
760
#if defined(_XM_NO_INTRINSICS_)
761
    return g_XMIdentityR3.v;
762
#elif defined(_XM_SSE_INTRINSICS_)
763
    return g_XMIdentityR3;
764
#else // _XM_VMX128_INTRINSICS_
765
#endif // _XM_VMX128_INTRINSICS_
766
}
767
 
768
//------------------------------------------------------------------------------
769
 
770
XMFINLINE XMVECTOR XMQuaternionRotationRollPitchYaw
771
(
772
    FLOAT Pitch,
773
    FLOAT Yaw,
774
    FLOAT Roll
775
)
776
{
777
    XMVECTOR Angles;
778
    XMVECTOR Q;
779
 
780
    Angles = XMVectorSet(Pitch, Yaw, Roll, 0.0f);
781
    Q = XMQuaternionRotationRollPitchYawFromVector(Angles);
782
 
783
    return Q;
784
}
785
 
786
//------------------------------------------------------------------------------
787
 
788
XMFINLINE XMVECTOR XMQuaternionRotationRollPitchYawFromVector
789
(
790
    FXMVECTOR Angles // <Pitch, Yaw, Roll, 0>
791
)
792
{
793
#if defined(_XM_NO_INTRINSICS_)
794
 
795
    XMVECTOR                Q, Q0, Q1;
796
    XMVECTOR                P0, P1, Y0, Y1, R0, R1;
797
    XMVECTOR                HalfAngles;
798
    XMVECTOR                SinAngles, CosAngles;
799
    static CONST XMVECTORU32 ControlPitch = {XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1X, XM_PERMUTE_1X};
800
    static CONST XMVECTORU32 ControlYaw = {XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Y};
801
    static CONST XMVECTORU32 ControlRoll = {XM_PERMUTE_1Z, XM_PERMUTE_1Z, XM_PERMUTE_0Z, XM_PERMUTE_1Z};
802
    static CONST XMVECTOR   Sign = {1.0f, -1.0f, -1.0f, 1.0f};
803
 
804
    HalfAngles = XMVectorMultiply(Angles, g_XMOneHalf.v);
805
    XMVectorSinCos(&SinAngles, &CosAngles, HalfAngles);
806
 
807
    P0 = XMVectorPermute(SinAngles, CosAngles, ControlPitch.v);
808
    Y0 = XMVectorPermute(SinAngles, CosAngles, ControlYaw.v);
809
    R0 = XMVectorPermute(SinAngles, CosAngles, ControlRoll.v);
810
    P1 = XMVectorPermute(CosAngles, SinAngles, ControlPitch.v);
811
    Y1 = XMVectorPermute(CosAngles, SinAngles, ControlYaw.v);
812
    R1 = XMVectorPermute(CosAngles, SinAngles, ControlRoll.v);
813
 
814
    Q1 = XMVectorMultiply(P1, Sign);
815
    Q0 = XMVectorMultiply(P0, Y0);
816
    Q1 = XMVectorMultiply(Q1, Y1);
817
    Q0 = XMVectorMultiply(Q0, R0);
818
    Q = XMVectorMultiplyAdd(Q1, R1, Q0);
819
 
820
    return Q;
821
 
822
#elif defined(_XM_SSE_INTRINSICS_)
823
    XMVECTOR                Q, Q0, Q1;
824
    XMVECTOR                P0, P1, Y0, Y1, R0, R1;
825
    XMVECTOR                HalfAngles;
826
    XMVECTOR                SinAngles, CosAngles;
827
    static CONST XMVECTORI32 ControlPitch = {XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1X, XM_PERMUTE_1X};
828
    static CONST XMVECTORI32 ControlYaw = {XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Y};
829
    static CONST XMVECTORI32 ControlRoll = {XM_PERMUTE_1Z, XM_PERMUTE_1Z, XM_PERMUTE_0Z, XM_PERMUTE_1Z};
830
    static CONST XMVECTORF32 Sign = {1.0f, -1.0f, -1.0f, 1.0f};
831
 
832
    HalfAngles = _mm_mul_ps(Angles, g_XMOneHalf);
833
    XMVectorSinCos(&SinAngles, &CosAngles, HalfAngles);
834
 
835
    P0 = XMVectorPermute(SinAngles, CosAngles, ControlPitch);
836
    Y0 = XMVectorPermute(SinAngles, CosAngles, ControlYaw);
837
    R0 = XMVectorPermute(SinAngles, CosAngles, ControlRoll);
838
    P1 = XMVectorPermute(CosAngles, SinAngles, ControlPitch);
839
    Y1 = XMVectorPermute(CosAngles, SinAngles, ControlYaw);
840
    R1 = XMVectorPermute(CosAngles, SinAngles, ControlRoll);
841
 
842
    Q1 = _mm_mul_ps(P1, Sign);
843
    Q0 = _mm_mul_ps(P0, Y0);
844
    Q1 = _mm_mul_ps(Q1, Y1);
845
    Q0 = _mm_mul_ps(Q0, R0);
846
    Q = _mm_mul_ps(Q1, R1);
847
    Q = _mm_add_ps(Q,Q0);
848
    return Q;
849
#else // _XM_VMX128_INTRINSICS_
850
#endif // _XM_VMX128_INTRINSICS_
851
}
852
 
853
//------------------------------------------------------------------------------
854
 
855
XMFINLINE XMVECTOR XMQuaternionRotationNormal
856
(
857
    FXMVECTOR NormalAxis,
858
    FLOAT    Angle
859
)
860
{
861
#if defined(_XM_NO_INTRINSICS_)
862
 
863
    XMVECTOR Q;
864
    XMVECTOR N;
865
    XMVECTOR Scale;
866
 
867
    N = XMVectorSelect(g_XMOne.v, NormalAxis, g_XMSelect1110.v);
868
 
869
    XMScalarSinCos(&Scale.vector4_f32[2], &Scale.vector4_f32[3], 0.5f * Angle);
870
 
871
    Scale.vector4_f32[0] = Scale.vector4_f32[1] = Scale.vector4_f32[2];
872
 
873
    Q = XMVectorMultiply(N, Scale);
874
 
875
    return Q;
876
 
877
#elif defined(_XM_SSE_INTRINSICS_)
878
    XMVECTOR N = _mm_and_ps(NormalAxis,g_XMMask3);
879
    N = _mm_or_ps(N,g_XMIdentityR3);
880
    XMVECTOR Scale = _mm_set_ps1(0.5f * Angle);
881
    XMVECTOR vSine;
882
    XMVECTOR vCosine;
883
    XMVectorSinCos(&vSine,&vCosine,Scale);
884
    Scale = _mm_and_ps(vSine,g_XMMask3);
885
    vCosine = _mm_and_ps(vCosine,g_XMMaskW);
886
    Scale = _mm_or_ps(Scale,vCosine);
887
    N = _mm_mul_ps(N,Scale);
888
    return N;
889
#else // _XM_VMX128_INTRINSICS_
890
#endif // _XM_VMX128_INTRINSICS_
891
}
892
 
893
//------------------------------------------------------------------------------
894
 
895
XMFINLINE XMVECTOR XMQuaternionRotationAxis
896
(
897
    FXMVECTOR Axis,
898
    FLOAT    Angle
899
)
900
{
901
#if defined(_XM_NO_INTRINSICS_)
902
 
903
    XMVECTOR Normal;
904
    XMVECTOR Q;
905
 
906
    XMASSERT(!XMVector3Equal(Axis, XMVectorZero()));
907
    XMASSERT(!XMVector3IsInfinite(Axis));
908
 
909
    Normal = XMVector3Normalize(Axis);
910
    Q = XMQuaternionRotationNormal(Normal, Angle);
911
 
912
    return Q;
913
 
914
#elif defined(_XM_SSE_INTRINSICS_)
915
    XMVECTOR Normal;
916
    XMVECTOR Q;
917
 
918
    XMASSERT(!XMVector3Equal(Axis, XMVectorZero()));
919
    XMASSERT(!XMVector3IsInfinite(Axis));
920
 
921
    Normal = XMVector3Normalize(Axis);
922
    Q = XMQuaternionRotationNormal(Normal, Angle);
923
    return Q;
924
#else // _XM_VMX128_INTRINSICS_
925
#endif // _XM_VMX128_INTRINSICS_
926
}
927
 
928
//------------------------------------------------------------------------------
929
 
930
XMINLINE XMVECTOR XMQuaternionRotationMatrix
931
(
932
    CXMMATRIX M
933
)
934
{
935
#if defined(_XM_NO_INTRINSICS_)
936
 
937
    XMVECTOR Q0, Q1, Q2;
938
    XMVECTOR M00, M11, M22;
939
    XMVECTOR CQ0, CQ1, C;
940
    XMVECTOR CX, CY, CZ, CW;
941
    XMVECTOR SQ1, Scale;
942
    XMVECTOR Rsq, Sqrt, VEqualsInfinity, VEqualsZero, Select;
943
    XMVECTOR A, B, P;
944
    XMVECTOR PermuteSplat, PermuteSplatT;
945
    XMVECTOR SignB, SignBT;
946
    XMVECTOR PermuteControl, PermuteControlT;
947
    XMVECTOR Zero;
948
    XMVECTOR Result;
949
    static CONST XMVECTOR  OneQuarter = {0.25f, 0.25f, 0.25f, 0.25f};
950
    static CONST XMVECTOR  SignPNNP = {1.0f, -1.0f, -1.0f, 1.0f};
951
    static CONST XMVECTOR  SignNPNP = {-1.0f, 1.0f, -1.0f, 1.0f};
952
    static CONST XMVECTOR  SignNNPP = {-1.0f, -1.0f, 1.0f, 1.0f};
953
    static CONST XMVECTOR  SignPNPP = {1.0f, -1.0f, 1.0f, 1.0f};
954
    static CONST XMVECTOR  SignPPNP = {1.0f, 1.0f, -1.0f, 1.0f};
955
    static CONST XMVECTOR  SignNPPP = {-1.0f, 1.0f, 1.0f, 1.0f};
956
    static CONST XMVECTOR  SignNNNX = {-1.0f, -1.0f, -1.0f, 2.0e-126f};
957
    static CONST XMVECTORU32 Permute0X0X0Y0W = {XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0Y, XM_PERMUTE_0W};
958
    static CONST XMVECTORU32 Permute0Y0Z0Z1W = {XM_PERMUTE_0Y, XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_1W};
959
    static CONST XMVECTORU32 SplatX = {XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X};
960
    static CONST XMVECTORU32 SplatY = {XM_PERMUTE_0Y, XM_PERMUTE_0Y, XM_PERMUTE_0Y, XM_PERMUTE_0Y};
961
    static CONST XMVECTORU32 SplatZ = {XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_0Z};
962
    static CONST XMVECTORU32 SplatW = {XM_PERMUTE_0W, XM_PERMUTE_0W, XM_PERMUTE_0W, XM_PERMUTE_0W};
963
    static CONST XMVECTORU32 PermuteC = {XM_PERMUTE_0X, XM_PERMUTE_0Z, XM_PERMUTE_1X, XM_PERMUTE_1Y};
964
    static CONST XMVECTORU32 PermuteA = {XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Z, XM_PERMUTE_0W};
965
    static CONST XMVECTORU32 PermuteB = {XM_PERMUTE_1X, XM_PERMUTE_1W, XM_PERMUTE_0Z, XM_PERMUTE_0W};
966
    static CONST XMVECTORU32 Permute0 = {XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1Z, XM_PERMUTE_1Y};
967
    static CONST XMVECTORU32 Permute1 = {XM_PERMUTE_1X, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Z};
968
    static CONST XMVECTORU32 Permute2 = {XM_PERMUTE_1Z, XM_PERMUTE_1Y, XM_PERMUTE_0Z, XM_PERMUTE_1X};
969
    static CONST XMVECTORU32 Permute3 = {XM_PERMUTE_1Y, XM_PERMUTE_1Z, XM_PERMUTE_1X, XM_PERMUTE_0W};
970
 
971
    M00 = XMVectorSplatX(M.r[0]);
972
    M11 = XMVectorSplatY(M.r[1]);
973
    M22 = XMVectorSplatZ(M.r[2]);
974
 
975
    Q0 = XMVectorMultiply(SignPNNP, M00);
976
    Q0 = XMVectorMultiplyAdd(SignNPNP, M11, Q0);
977
    Q0 = XMVectorMultiplyAdd(SignNNPP, M22, Q0);
978
 
979
    Q1 = XMVectorAdd(Q0, g_XMOne.v);
980
 
981
    Rsq = XMVectorReciprocalSqrt(Q1);
982
    Zero = XMVectorZero();
983
    VEqualsInfinity = XMVectorEqualInt(Q1, g_XMInfinity.v);
984
    VEqualsZero = XMVectorEqual(Q1, Zero);
985
    Sqrt = XMVectorMultiply(Q1, Rsq);
986
    Select = XMVectorEqualInt(VEqualsInfinity, VEqualsZero);
987
    Q1 = XMVectorSelect(Q1, Sqrt, Select);
988
 
989
    Q1 = XMVectorMultiply(Q1, g_XMOneHalf.v);
990
 
991
    SQ1 = XMVectorMultiply(Rsq, g_XMOneHalf.v);
992
 
993
    CQ0 = XMVectorPermute(Q0, Q0, Permute0X0X0Y0W.v);
994
    CQ1 = XMVectorPermute(Q0, SignNNNX, Permute0Y0Z0Z1W.v);
995
    C = XMVectorGreaterOrEqual(CQ0, CQ1);
996
 
997
    CX = XMVectorSplatX(C);
998
    CY = XMVectorSplatY(C);
999
    CZ = XMVectorSplatZ(C);
1000
    CW = XMVectorSplatW(C);
1001
 
1002
    PermuteSplat = XMVectorSelect(SplatZ.v, SplatY.v, CZ);
1003
    SignB = XMVectorSelect(SignNPPP, SignPPNP, CZ);
1004
    PermuteControl = XMVectorSelect(Permute2.v, Permute1.v, CZ);
1005
 
1006
    PermuteSplat = XMVectorSelect(PermuteSplat, SplatZ.v, CX);
1007
    SignB = XMVectorSelect(SignB, SignNPPP, CX);
1008
    PermuteControl = XMVectorSelect(PermuteControl, Permute2.v, CX);
1009
 
1010
    PermuteSplatT = XMVectorSelect(PermuteSplat,SplatX.v, CY);
1011
    SignBT = XMVectorSelect(SignB, SignPNPP, CY);
1012
    PermuteControlT = XMVectorSelect(PermuteControl,Permute0.v, CY);
1013
 
1014
    PermuteSplat = XMVectorSelect(PermuteSplat, PermuteSplatT, CX);
1015
    SignB = XMVectorSelect(SignB, SignBT, CX);
1016
    PermuteControl = XMVectorSelect(PermuteControl, PermuteControlT, CX);
1017
 
1018
    PermuteSplat = XMVectorSelect(PermuteSplat,SplatW.v, CW);
1019
    SignB = XMVectorSelect(SignB, SignNNNX, CW);
1020
    PermuteControl = XMVectorSelect(PermuteControl,Permute3.v, CW);
1021
 
1022
    Scale = XMVectorPermute(SQ1, SQ1, PermuteSplat);
1023
 
1024
    P = XMVectorPermute(M.r[1], M.r[2],PermuteC.v);  // {M10, M12, M20, M21}
1025
    A = XMVectorPermute(M.r[0], P, PermuteA.v);       // {M01, M12, M20, M03}
1026
    B = XMVectorPermute(M.r[0], P, PermuteB.v);       // {M10, M21, M02, M03}
1027
 
1028
    Q2 = XMVectorMultiplyAdd(SignB, B, A);
1029
    Q2 = XMVectorMultiply(Q2, Scale);
1030
 
1031
    Result = XMVectorPermute(Q1, Q2, PermuteControl);
1032
 
1033
    return Result;
1034
 
1035
#elif defined(_XM_SSE_INTRINSICS_)
1036
    XMVECTOR Q0, Q1, Q2;
1037
    XMVECTOR M00, M11, M22;
1038
    XMVECTOR CQ0, CQ1, C;
1039
    XMVECTOR CX, CY, CZ, CW;
1040
    XMVECTOR SQ1, Scale;
1041
    XMVECTOR Rsq, Sqrt, VEqualsInfinity, VEqualsZero, Select;
1042
    XMVECTOR A, B, P;
1043
    XMVECTOR PermuteSplat, PermuteSplatT;
1044
    XMVECTOR SignB, SignBT;
1045
    XMVECTOR PermuteControl, PermuteControlT;
1046
    XMVECTOR Zero;
1047
    XMVECTOR Result;
1048
    static CONST XMVECTORF32  OneQuarter = {0.25f, 0.25f, 0.25f, 0.25f};
1049
    static CONST XMVECTORF32  SignPNNP = {1.0f, -1.0f, -1.0f, 1.0f};
1050
    static CONST XMVECTORF32  SignNPNP = {-1.0f, 1.0f, -1.0f, 1.0f};
1051
    static CONST XMVECTORF32  SignNNPP = {-1.0f, -1.0f, 1.0f, 1.0f};
1052
    static CONST XMVECTORF32  SignPNPP = {1.0f, -1.0f, 1.0f, 1.0f};
1053
    static CONST XMVECTORF32  SignPPNP = {1.0f, 1.0f, -1.0f, 1.0f};
1054
    static CONST XMVECTORF32  SignNPPP = {-1.0f, 1.0f, 1.0f, 1.0f};
1055
    static CONST XMVECTORF32  SignNNNX = {-1.0f, -1.0f, -1.0f, 2.0e-126f};
1056
    static CONST XMVECTORI32 Permute0X0X0Y0W = {XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0Y, XM_PERMUTE_0W};
1057
    static CONST XMVECTORI32 Permute0Y0Z0Z1W = {XM_PERMUTE_0Y, XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_1W};
1058
    static CONST XMVECTORI32 SplatX = {XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X};
1059
    static CONST XMVECTORI32 SplatY = {XM_PERMUTE_0Y, XM_PERMUTE_0Y, XM_PERMUTE_0Y, XM_PERMUTE_0Y};
1060
    static CONST XMVECTORI32 SplatZ = {XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_0Z};
1061
    static CONST XMVECTORI32 SplatW = {XM_PERMUTE_0W, XM_PERMUTE_0W, XM_PERMUTE_0W, XM_PERMUTE_0W};
1062
    static CONST XMVECTORI32 PermuteC = {XM_PERMUTE_0X, XM_PERMUTE_0Z, XM_PERMUTE_1X, XM_PERMUTE_1Y};
1063
    static CONST XMVECTORI32 PermuteA = {XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Z, XM_PERMUTE_0W};
1064
    static CONST XMVECTORI32 PermuteB = {XM_PERMUTE_1X, XM_PERMUTE_1W, XM_PERMUTE_0Z, XM_PERMUTE_0W};
1065
    static CONST XMVECTORI32 Permute0 = {XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1Z, XM_PERMUTE_1Y};
1066
    static CONST XMVECTORI32 Permute1 = {XM_PERMUTE_1X, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Z};
1067
    static CONST XMVECTORI32 Permute2 = {XM_PERMUTE_1Z, XM_PERMUTE_1Y, XM_PERMUTE_0Z, XM_PERMUTE_1X};
1068
    static CONST XMVECTORI32 Permute3 = {XM_PERMUTE_1Y, XM_PERMUTE_1Z, XM_PERMUTE_1X, XM_PERMUTE_0W};
1069
 
1070
    M00 = XMVectorSplatX(M.r[0]);
1071
    M11 = XMVectorSplatY(M.r[1]);
1072
    M22 = XMVectorSplatZ(M.r[2]);
1073
 
1074
    Q0 = XMVectorMultiply(SignPNNP, M00);
1075
    Q0 = XMVectorMultiplyAdd(SignNPNP, M11, Q0);
1076
    Q0 = XMVectorMultiplyAdd(SignNNPP, M22, Q0);
1077
 
1078
    Q1 = XMVectorAdd(Q0, g_XMOne);
1079
 
1080
    Rsq = XMVectorReciprocalSqrt(Q1);
1081
    Zero = XMVectorZero();
1082
    VEqualsInfinity = XMVectorEqualInt(Q1, g_XMInfinity);
1083
    VEqualsZero = XMVectorEqual(Q1, Zero);
1084
    Sqrt = XMVectorMultiply(Q1, Rsq);
1085
    Select = XMVectorEqualInt(VEqualsInfinity, VEqualsZero);
1086
    Q1 = XMVectorSelect(Q1, Sqrt, Select);
1087
 
1088
    Q1 = XMVectorMultiply(Q1, g_XMOneHalf);
1089
 
1090
    SQ1 = XMVectorMultiply(Rsq, g_XMOneHalf);
1091
 
1092
    CQ0 = XMVectorPermute(Q0, Q0, Permute0X0X0Y0W);
1093
    CQ1 = XMVectorPermute(Q0, SignNNNX, Permute0Y0Z0Z1W);
1094
    C = XMVectorGreaterOrEqual(CQ0, CQ1);
1095
 
1096
    CX = XMVectorSplatX(C);
1097
    CY = XMVectorSplatY(C);
1098
    CZ = XMVectorSplatZ(C);
1099
    CW = XMVectorSplatW(C);
1100
 
1101
    PermuteSplat = XMVectorSelect(SplatZ, SplatY, CZ);
1102
    SignB = XMVectorSelect(SignNPPP, SignPPNP, CZ);
1103
    PermuteControl = XMVectorSelect(Permute2, Permute1, CZ);
1104
 
1105
    PermuteSplat = XMVectorSelect(PermuteSplat, SplatZ, CX);
1106
    SignB = XMVectorSelect(SignB, SignNPPP, CX);
1107
    PermuteControl = XMVectorSelect(PermuteControl, Permute2, CX);
1108
 
1109
    PermuteSplatT = XMVectorSelect(PermuteSplat,SplatX, CY);
1110
    SignBT = XMVectorSelect(SignB, SignPNPP, CY);
1111
    PermuteControlT = XMVectorSelect(PermuteControl,Permute0, CY);
1112
 
1113
    PermuteSplat = XMVectorSelect(PermuteSplat, PermuteSplatT, CX);
1114
    SignB = XMVectorSelect(SignB, SignBT, CX);
1115
    PermuteControl = XMVectorSelect(PermuteControl, PermuteControlT, CX);
1116
 
1117
    PermuteSplat = XMVectorSelect(PermuteSplat,SplatW, CW);
1118
    SignB = XMVectorSelect(SignB, SignNNNX, CW);
1119
    PermuteControl = XMVectorSelect(PermuteControl,Permute3, CW);
1120
 
1121
    Scale = XMVectorPermute(SQ1, SQ1, PermuteSplat);
1122
 
1123
    P = XMVectorPermute(M.r[1], M.r[2],PermuteC);  // {M10, M12, M20, M21}
1124
    A = XMVectorPermute(M.r[0], P, PermuteA);       // {M01, M12, M20, M03}
1125
    B = XMVectorPermute(M.r[0], P, PermuteB);       // {M10, M21, M02, M03}
1126
 
1127
    Q2 = XMVectorMultiplyAdd(SignB, B, A);
1128
    Q2 = XMVectorMultiply(Q2, Scale);
1129
 
1130
    Result = XMVectorPermute(Q1, Q2, PermuteControl);
1131
 
1132
    return Result;
1133
#else // _XM_VMX128_INTRINSICS_
1134
#endif // _XM_VMX128_INTRINSICS_
1135
}
1136
 
1137
//------------------------------------------------------------------------------
1138
// Conversion operations
1139
//------------------------------------------------------------------------------
1140
 
1141
//------------------------------------------------------------------------------
1142
 
1143
XMFINLINE VOID XMQuaternionToAxisAngle
1144
(
1145
    XMVECTOR* pAxis,
1146
    FLOAT*    pAngle,
1147
    FXMVECTOR  Q
1148
)
1149
{
1150
    XMASSERT(pAxis);
1151
    XMASSERT(pAngle);
1152
 
1153
    *pAxis = Q;
1154
 
1155
#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
1156
    *pAngle = 2.0f * acosf(XMVectorGetW(Q));
1157
#else
1158
    *pAngle = 2.0f * XMScalarACos(XMVectorGetW(Q));
1159
#endif
1160
}
1161
 
1162
/****************************************************************************
1163
 *
1164
 * Plane
1165
 *
1166
 ****************************************************************************/
1167
 
1168
//------------------------------------------------------------------------------
1169
// Comparison operations
1170
//------------------------------------------------------------------------------
1171
 
1172
//------------------------------------------------------------------------------
1173
 
1174
XMFINLINE BOOL XMPlaneEqual
1175
(
1176
    FXMVECTOR P1,
1177
    FXMVECTOR P2
1178
)
1179
{
1180
    return XMVector4Equal(P1, P2);
1181
}
1182
 
1183
//------------------------------------------------------------------------------
1184
 
1185
XMFINLINE BOOL XMPlaneNearEqual
1186
(
1187
    FXMVECTOR P1,
1188
    FXMVECTOR P2,
1189
    FXMVECTOR Epsilon
1190
)
1191
{
1192
    XMVECTOR NP1 = XMPlaneNormalize(P1);
1193
    XMVECTOR NP2 = XMPlaneNormalize(P2);
1194
    return XMVector4NearEqual(NP1, NP2, Epsilon);
1195
}
1196
 
1197
//------------------------------------------------------------------------------
1198
 
1199
XMFINLINE BOOL XMPlaneNotEqual
1200
(
1201
    FXMVECTOR P1,
1202
    FXMVECTOR P2
1203
)
1204
{
1205
    return XMVector4NotEqual(P1, P2);
1206
}
1207
 
1208
//------------------------------------------------------------------------------
1209
 
1210
XMFINLINE BOOL XMPlaneIsNaN
1211
(
1212
    FXMVECTOR P
1213
)
1214
{
1215
    return XMVector4IsNaN(P);
1216
}
1217
 
1218
//------------------------------------------------------------------------------
1219
 
1220
XMFINLINE BOOL XMPlaneIsInfinite
1221
(
1222
    FXMVECTOR P
1223
)
1224
{
1225
    return XMVector4IsInfinite(P);
1226
}
1227
 
1228
//------------------------------------------------------------------------------
1229
// Computation operations
1230
//------------------------------------------------------------------------------
1231
 
1232
//------------------------------------------------------------------------------
1233
 
1234
XMFINLINE XMVECTOR XMPlaneDot
1235
(
1236
    FXMVECTOR P,
1237
    FXMVECTOR V
1238
)
1239
{
1240
#if defined(_XM_NO_INTRINSICS_)
1241
 
1242
    return XMVector4Dot(P, V);
1243
 
1244
#elif defined(_XM_SSE_INTRINSICS_)
1245
    __m128 vTemp2 = V;
1246
    __m128 vTemp = _mm_mul_ps(P,vTemp2);
1247
    vTemp2 = _mm_shuffle_ps(vTemp2,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position
1248
    vTemp2 = _mm_add_ps(vTemp2,vTemp);          // Add Z = X+Z; W = Y+W;
1249
    vTemp = _mm_shuffle_ps(vTemp,vTemp2,_MM_SHUFFLE(0,3,0,0));  // Copy W to the Z position
1250
    vTemp = _mm_add_ps(vTemp,vTemp2);           // Add Z and W together
1251
    return _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2));    // Splat Z and return
1252
#else // _XM_VMX128_INTRINSICS_
1253
#endif // _XM_VMX128_INTRINSICS_
1254
}
1255
 
1256
//------------------------------------------------------------------------------
1257
 
1258
XMFINLINE XMVECTOR XMPlaneDotCoord
1259
(
1260
    FXMVECTOR P,
1261
    FXMVECTOR V
1262
)
1263
{
1264
#if defined(_XM_NO_INTRINSICS_)
1265
 
1266
    XMVECTOR V3;
1267
    XMVECTOR Result;
1268
 
1269
    // Result = P[0] * V[0] + P[1] * V[1] + P[2] * V[2] + P[3]
1270
    V3 = XMVectorSelect(g_XMOne.v, V, g_XMSelect1110.v);
1271
    Result = XMVector4Dot(P, V3);
1272
 
1273
    return Result;
1274
 
1275
#elif defined(_XM_SSE_INTRINSICS_)
1276
    XMVECTOR vTemp2 = _mm_and_ps(V,g_XMMask3);
1277
    vTemp2 = _mm_or_ps(vTemp2,g_XMIdentityR3);
1278
    XMVECTOR vTemp = _mm_mul_ps(P,vTemp2);
1279
    vTemp2 = _mm_shuffle_ps(vTemp2,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position
1280
    vTemp2 = _mm_add_ps(vTemp2,vTemp);          // Add Z = X+Z; W = Y+W;
1281
    vTemp = _mm_shuffle_ps(vTemp,vTemp2,_MM_SHUFFLE(0,3,0,0));  // Copy W to the Z position
1282
    vTemp = _mm_add_ps(vTemp,vTemp2);           // Add Z and W together
1283
    return _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2));    // Splat Z and return
1284
#else // _XM_VMX128_INTRINSICS_
1285
#endif // _XM_VMX128_INTRINSICS_
1286
}
1287
 
1288
//------------------------------------------------------------------------------
1289
 
1290
XMFINLINE XMVECTOR XMPlaneDotNormal
1291
(
1292
    FXMVECTOR P,
1293
    FXMVECTOR V
1294
)
1295
{
1296
    return XMVector3Dot(P, V);
1297
}
1298
 
1299
//------------------------------------------------------------------------------
1300
// XMPlaneNormalizeEst uses a reciprocal estimate and
1301
// returns QNaN on zero and infinite vectors.
1302
 
1303
XMFINLINE XMVECTOR XMPlaneNormalizeEst
1304
(
1305
    FXMVECTOR P
1306
)
1307
{
1308
#if defined(_XM_NO_INTRINSICS_)
1309
 
1310
    XMVECTOR Result;
1311
    Result = XMVector3ReciprocalLength(P);
1312
    Result = XMVectorMultiply(P, Result);
1313
    return Result;
1314
 
1315
#elif defined(_XM_SSE_INTRINSICS_)
1316
    // Perform the dot product
1317
    XMVECTOR vDot = _mm_mul_ps(P,P);
1318
    // x=Dot.y, y=Dot.z
1319
    XMVECTOR vTemp = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(2,1,2,1));
1320
    // Result.x = x+y
1321
    vDot = _mm_add_ss(vDot,vTemp);
1322
    // x=Dot.z
1323
    vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
1324
    // Result.x = (x+y)+z
1325
    vDot = _mm_add_ss(vDot,vTemp);
1326
    // Splat x
1327
	vDot = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(0,0,0,0));
1328
    // Get the reciprocal
1329
    vDot = _mm_rsqrt_ps(vDot);
1330
    // Get the reciprocal
1331
    vDot = _mm_mul_ps(vDot,P);
1332
    return vDot;
1333
#else // _XM_VMX128_INTRINSICS_
1334
#endif // _XM_VMX128_INTRINSICS_
1335
}
1336
 
1337
//------------------------------------------------------------------------------
1338
 
1339
XMFINLINE XMVECTOR XMPlaneNormalize
1340
(
1341
    FXMVECTOR P
1342
)
1343
{
1344
#if defined(_XM_NO_INTRINSICS_)
1345
    FLOAT fLengthSq = sqrtf((P.vector4_f32[0]*P.vector4_f32[0])+(P.vector4_f32[1]*P.vector4_f32[1])+(P.vector4_f32[2]*P.vector4_f32[2]));
1346
    // Prevent divide by zero
1347
    if (fLengthSq) {
1348
        fLengthSq = 1.0f/fLengthSq;
1349
    }
1350
    {
1351
    XMVECTOR vResult = {
1352
        P.vector4_f32[0]*fLengthSq,
1353
        P.vector4_f32[1]*fLengthSq,
1354
        P.vector4_f32[2]*fLengthSq,
1355
        P.vector4_f32[3]*fLengthSq
1356
    };
1357
    return vResult;
1358
    }
1359
#elif defined(_XM_SSE_INTRINSICS_)
1360
    // Perform the dot product on x,y and z only
1361
    XMVECTOR vLengthSq = _mm_mul_ps(P,P);
1362
    XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,1,2,1));
1363
    vLengthSq = _mm_add_ss(vLengthSq,vTemp);
1364
    vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
1365
    vLengthSq = _mm_add_ss(vLengthSq,vTemp);
1366
	vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
1367
    // Prepare for the division
1368
    XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
1369
    // Failsafe on zero (Or epsilon) length planes
1370
    // If the length is infinity, set the elements to zero
1371
    vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
1372
    // Reciprocal mul to perform the normalization
1373
    vResult = _mm_div_ps(P,vResult);
1374
    // Any that are infinity, set to zero
1375
    vResult = _mm_and_ps(vResult,vLengthSq);
1376
    return vResult;
1377
#else // _XM_VMX128_INTRINSICS_
1378
#endif // _XM_VMX128_INTRINSICS_
1379
}
1380
 
1381
//------------------------------------------------------------------------------
1382
 
1383
XMFINLINE XMVECTOR XMPlaneIntersectLine
1384
(
1385
    FXMVECTOR P,
1386
    FXMVECTOR LinePoint1,
1387
    FXMVECTOR LinePoint2
1388
)
1389
{
1390
#if defined(_XM_NO_INTRINSICS_)
1391
 
1392
    XMVECTOR V1;
1393
    XMVECTOR V2;
1394
    XMVECTOR D;
1395
    XMVECTOR ReciprocalD;
1396
    XMVECTOR VT;
1397
    XMVECTOR Point;
1398
    XMVECTOR Zero;
1399
    XMVECTOR Control;
1400
    XMVECTOR Result;
1401
 
1402
    V1 = XMVector3Dot(P, LinePoint1);
1403
    V2 = XMVector3Dot(P, LinePoint2);
1404
    D = XMVectorSubtract(V1, V2);
1405
 
1406
    ReciprocalD = XMVectorReciprocal(D);
1407
    VT = XMPlaneDotCoord(P, LinePoint1);
1408
    VT = XMVectorMultiply(VT, ReciprocalD);
1409
 
1410
    Point = XMVectorSubtract(LinePoint2, LinePoint1);
1411
    Point = XMVectorMultiplyAdd(Point, VT, LinePoint1);
1412
 
1413
    Zero = XMVectorZero();
1414
    Control = XMVectorNearEqual(D, Zero, g_XMEpsilon.v);
1415
 
1416
    Result = XMVectorSelect(Point, g_XMQNaN.v, Control);
1417
 
1418
    return Result;
1419
 
1420
#elif defined(_XM_SSE_INTRINSICS_)
1421
    XMVECTOR V1;
1422
    XMVECTOR V2;
1423
    XMVECTOR D;
1424
    XMVECTOR VT;
1425
    XMVECTOR Point;
1426
    XMVECTOR Zero;
1427
    XMVECTOR Control;
1428
    XMVECTOR Result;
1429
 
1430
    V1 = XMVector3Dot(P, LinePoint1);
1431
    V2 = XMVector3Dot(P, LinePoint2);
1432
    D = _mm_sub_ps(V1, V2);
1433
 
1434
    VT = XMPlaneDotCoord(P, LinePoint1);
1435
    VT = _mm_div_ps(VT, D);
1436
 
1437
    Point = _mm_sub_ps(LinePoint2, LinePoint1);
1438
    Point = _mm_mul_ps(Point,VT);
1439
    Point = _mm_add_ps(Point,LinePoint1);
1440
    Zero = XMVectorZero();
1441
    Control = XMVectorNearEqual(D, Zero, g_XMEpsilon);
1442
    Result = XMVectorSelect(Point, g_XMQNaN, Control);
1443
    return Result;
1444
#else // _XM_VMX128_INTRINSICS_
1445
#endif // _XM_VMX128_INTRINSICS_
1446
}
1447
 
1448
//------------------------------------------------------------------------------
1449
 
1450
XMINLINE VOID XMPlaneIntersectPlane
1451
(
1452
    XMVECTOR* pLinePoint1,
1453
    XMVECTOR* pLinePoint2,
1454
    FXMVECTOR  P1,
1455
    FXMVECTOR  P2
1456
)
1457
{
1458
#if defined(_XM_NO_INTRINSICS_)
1459
 
1460
    XMVECTOR V1;
1461
    XMVECTOR V2;
1462
    XMVECTOR V3;
1463
    XMVECTOR LengthSq;
1464
    XMVECTOR RcpLengthSq;
1465
    XMVECTOR Point;
1466
    XMVECTOR P1W;
1467
    XMVECTOR P2W;
1468
    XMVECTOR Control;
1469
    XMVECTOR LinePoint1;
1470
    XMVECTOR LinePoint2;
1471
 
1472
    XMASSERT(pLinePoint1);
1473
    XMASSERT(pLinePoint2);
1474
 
1475
    V1 = XMVector3Cross(P2, P1);
1476
 
1477
    LengthSq = XMVector3LengthSq(V1);
1478
 
1479
    V2 = XMVector3Cross(P2, V1);
1480
 
1481
    P1W = XMVectorSplatW(P1);
1482
    Point = XMVectorMultiply(V2, P1W);
1483
 
1484
    V3 = XMVector3Cross(V1, P1);
1485
 
1486
    P2W = XMVectorSplatW(P2);
1487
    Point = XMVectorMultiplyAdd(V3, P2W, Point);
1488
 
1489
    RcpLengthSq = XMVectorReciprocal(LengthSq);
1490
    LinePoint1 = XMVectorMultiply(Point, RcpLengthSq);
1491
 
1492
    LinePoint2 = XMVectorAdd(LinePoint1, V1);
1493
 
1494
    Control = XMVectorLessOrEqual(LengthSq, g_XMEpsilon.v);
1495
    *pLinePoint1 = XMVectorSelect(LinePoint1,g_XMQNaN.v, Control);
1496
    *pLinePoint2 = XMVectorSelect(LinePoint2,g_XMQNaN.v, Control);
1497
 
1498
#elif defined(_XM_SSE_INTRINSICS_)
1499
    XMASSERT(pLinePoint1);
1500
    XMASSERT(pLinePoint2);
1501
    XMVECTOR V1;
1502
    XMVECTOR V2;
1503
    XMVECTOR V3;
1504
    XMVECTOR LengthSq;
1505
    XMVECTOR Point;
1506
    XMVECTOR P1W;
1507
    XMVECTOR P2W;
1508
    XMVECTOR Control;
1509
    XMVECTOR LinePoint1;
1510
    XMVECTOR LinePoint2;
1511
 
1512
    V1 = XMVector3Cross(P2, P1);
1513
 
1514
    LengthSq = XMVector3LengthSq(V1);
1515
 
1516
    V2 = XMVector3Cross(P2, V1);
1517
 
1518
    P1W = _mm_shuffle_ps(P1,P1,_MM_SHUFFLE(3,3,3,3));
1519
    Point = _mm_mul_ps(V2, P1W);
1520
 
1521
    V3 = XMVector3Cross(V1, P1);
1522
 
1523
    P2W = _mm_shuffle_ps(P2,P2,_MM_SHUFFLE(3,3,3,3));
1524
    V3 = _mm_mul_ps(V3,P2W);
1525
    Point = _mm_add_ps(Point,V3);
1526
    LinePoint1 = _mm_div_ps(Point,LengthSq);
1527
 
1528
    LinePoint2 = _mm_add_ps(LinePoint1, V1);
1529
 
1530
    Control = XMVectorLessOrEqual(LengthSq, g_XMEpsilon);
1531
    *pLinePoint1 = XMVectorSelect(LinePoint1,g_XMQNaN, Control);
1532
    *pLinePoint2 = XMVectorSelect(LinePoint2,g_XMQNaN, Control);
1533
#else // _XM_VMX128_INTRINSICS_
1534
#endif // _XM_VMX128_INTRINSICS_
1535
}
1536
 
1537
//------------------------------------------------------------------------------
1538
 
1539
XMFINLINE XMVECTOR XMPlaneTransform
1540
(
1541
    FXMVECTOR P,
1542
    CXMMATRIX M
1543
)
1544
{
1545
#if defined(_XM_NO_INTRINSICS_)
1546
 
1547
    XMVECTOR X;
1548
    XMVECTOR Y;
1549
    XMVECTOR Z;
1550
    XMVECTOR W;
1551
    XMVECTOR Result;
1552
 
1553
    W = XMVectorSplatW(P);
1554
    Z = XMVectorSplatZ(P);
1555
    Y = XMVectorSplatY(P);
1556
    X = XMVectorSplatX(P);
1557
 
1558
    Result = XMVectorMultiply(W, M.r[3]);
1559
    Result = XMVectorMultiplyAdd(Z, M.r[2], Result);
1560
    Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
1561
    Result = XMVectorMultiplyAdd(X, M.r[0], Result);
1562
 
1563
    return Result;
1564
 
1565
#elif defined(_XM_SSE_INTRINSICS_)
1566
    XMVECTOR X = _mm_shuffle_ps(P,P,_MM_SHUFFLE(0,0,0,0));
1567
    XMVECTOR Y = _mm_shuffle_ps(P,P,_MM_SHUFFLE(1,1,1,1));
1568
    XMVECTOR Z = _mm_shuffle_ps(P,P,_MM_SHUFFLE(2,2,2,2));
1569
    XMVECTOR W = _mm_shuffle_ps(P,P,_MM_SHUFFLE(3,3,3,3));
1570
    X = _mm_mul_ps(X, M.r[0]);
1571
    Y = _mm_mul_ps(Y, M.r[1]);
1572
    Z = _mm_mul_ps(Z, M.r[2]);
1573
    W = _mm_mul_ps(W, M.r[3]);
1574
    X = _mm_add_ps(X,Z);
1575
    Y = _mm_add_ps(Y,W);
1576
    X = _mm_add_ps(X,Y);
1577
    return X;
1578
#else // _XM_VMX128_INTRINSICS_
1579
#endif // _XM_VMX128_INTRINSICS_
1580
}
1581
 
1582
//------------------------------------------------------------------------------
1583
 
1584
XMFINLINE XMFLOAT4* XMPlaneTransformStream
1585
(
1586
    XMFLOAT4*       pOutputStream,
1587
    UINT            OutputStride,
1588
    CONST XMFLOAT4* pInputStream,
1589
    UINT            InputStride,
1590
    UINT            PlaneCount,
1591
    CXMMATRIX     M
1592
)
1593
{
1594
    return XMVector4TransformStream(pOutputStream,
1595
                                    OutputStride,
1596
                                    pInputStream,
1597
                                    InputStride,
1598
                                    PlaneCount,
1599
                                    M);
1600
}
1601
 
1602
//------------------------------------------------------------------------------
1603
// Conversion operations
1604
//------------------------------------------------------------------------------
1605
 
1606
//------------------------------------------------------------------------------
1607
 
1608
XMFINLINE XMVECTOR XMPlaneFromPointNormal
1609
(
1610
    FXMVECTOR Point,
1611
    FXMVECTOR Normal
1612
)
1613
{
1614
#if defined(_XM_NO_INTRINSICS_)
1615
 
1616
    XMVECTOR W;
1617
    XMVECTOR Result;
1618
 
1619
    W = XMVector3Dot(Point, Normal);
1620
    W = XMVectorNegate(W);
1621
    Result = XMVectorSelect(W, Normal, g_XMSelect1110.v);
1622
 
1623
    return Result;
1624
 
1625
#elif defined(_XM_SSE_INTRINSICS_)
1626
    XMVECTOR W;
1627
    XMVECTOR Result;
1628
    W = XMVector3Dot(Point,Normal);
1629
    W = _mm_mul_ps(W,g_XMNegativeOne);
1630
    Result = _mm_and_ps(Normal,g_XMMask3);
1631
    W = _mm_and_ps(W,g_XMMaskW);
1632
    Result = _mm_or_ps(Result,W);
1633
    return Result;
1634
#else // _XM_VMX128_INTRINSICS_
1635
#endif // _XM_VMX128_INTRINSICS_
1636
}
1637
 
1638
//------------------------------------------------------------------------------
1639
 
1640
XMFINLINE XMVECTOR XMPlaneFromPoints
1641
(
1642
    FXMVECTOR Point1,
1643
    FXMVECTOR Point2,
1644
    FXMVECTOR Point3
1645
)
1646
{
1647
#if defined(_XM_NO_INTRINSICS_)
1648
 
1649
    XMVECTOR N;
1650
    XMVECTOR D;
1651
    XMVECTOR V21;
1652
    XMVECTOR V31;
1653
    XMVECTOR Result;
1654
 
1655
    V21 = XMVectorSubtract(Point1, Point2);
1656
    V31 = XMVectorSubtract(Point1, Point3);
1657
 
1658
    N = XMVector3Cross(V21, V31);
1659
    N = XMVector3Normalize(N);
1660
 
1661
    D = XMPlaneDotNormal(N, Point1);
1662
    D = XMVectorNegate(D);
1663
 
1664
    Result = XMVectorSelect(D, N, g_XMSelect1110.v);
1665
 
1666
    return Result;
1667
 
1668
#elif defined(_XM_SSE_INTRINSICS_)
1669
    XMVECTOR N;
1670
    XMVECTOR D;
1671
    XMVECTOR V21;
1672
    XMVECTOR V31;
1673
    XMVECTOR Result;
1674
 
1675
    V21 = _mm_sub_ps(Point1, Point2);
1676
    V31 = _mm_sub_ps(Point1, Point3);
1677
 
1678
    N = XMVector3Cross(V21, V31);
1679
    N = XMVector3Normalize(N);
1680
 
1681
    D = XMPlaneDotNormal(N, Point1);
1682
    D = _mm_mul_ps(D,g_XMNegativeOne);
1683
    N = _mm_and_ps(N,g_XMMask3);
1684
    D = _mm_and_ps(D,g_XMMaskW);
1685
    Result = _mm_or_ps(D,N);
1686
    return Result;
1687
#else // _XM_VMX128_INTRINSICS_
1688
#endif // _XM_VMX128_INTRINSICS_
1689
}
1690
 
1691
/****************************************************************************
1692
 *
1693
 * Color
1694
 *
1695
 ****************************************************************************/
1696
 
1697
//------------------------------------------------------------------------------
1698
// Comparison operations
1699
//------------------------------------------------------------------------------
1700
 
1701
//------------------------------------------------------------------------------
1702
 
1703
XMFINLINE BOOL XMColorEqual
1704
(
1705
    FXMVECTOR C1,
1706
    FXMVECTOR C2
1707
)
1708
{
1709
    return XMVector4Equal(C1, C2);
1710
}
1711
 
1712
//------------------------------------------------------------------------------
1713
 
1714
XMFINLINE BOOL XMColorNotEqual
1715
(
1716
    FXMVECTOR C1,
1717
    FXMVECTOR C2
1718
)
1719
{
1720
    return XMVector4NotEqual(C1, C2);
1721
}
1722
 
1723
//------------------------------------------------------------------------------
1724
 
1725
XMFINLINE BOOL XMColorGreater
1726
(
1727
    FXMVECTOR C1,
1728
    FXMVECTOR C2
1729
)
1730
{
1731
    return XMVector4Greater(C1, C2);
1732
}
1733
 
1734
//------------------------------------------------------------------------------
1735
 
1736
XMFINLINE BOOL XMColorGreaterOrEqual
1737
(
1738
    FXMVECTOR C1,
1739
    FXMVECTOR C2
1740
)
1741
{
1742
    return XMVector4GreaterOrEqual(C1, C2);
1743
}
1744
 
1745
//------------------------------------------------------------------------------
1746
 
1747
XMFINLINE BOOL XMColorLess
1748
(
1749
    FXMVECTOR C1,
1750
    FXMVECTOR C2
1751
)
1752
{
1753
    return XMVector4Less(C1, C2);
1754
}
1755
 
1756
//------------------------------------------------------------------------------
1757
 
1758
XMFINLINE BOOL XMColorLessOrEqual
1759
(
1760
    FXMVECTOR C1,
1761
    FXMVECTOR C2
1762
)
1763
{
1764
    return XMVector4LessOrEqual(C1, C2);
1765
}
1766
 
1767
//------------------------------------------------------------------------------
1768
 
1769
XMFINLINE BOOL XMColorIsNaN
1770
(
1771
    FXMVECTOR C
1772
)
1773
{
1774
    return XMVector4IsNaN(C);
1775
}
1776
 
1777
//------------------------------------------------------------------------------
1778
 
1779
XMFINLINE BOOL XMColorIsInfinite
1780
(
1781
    FXMVECTOR C
1782
)
1783
{
1784
    return XMVector4IsInfinite(C);
1785
}
1786
 
1787
//------------------------------------------------------------------------------
1788
// Computation operations
1789
//------------------------------------------------------------------------------
1790
 
1791
//------------------------------------------------------------------------------
1792
 
1793
XMFINLINE XMVECTOR XMColorNegative
1794
(
1795
    FXMVECTOR vColor
1796
)
1797
{
1798
#if defined(_XM_NO_INTRINSICS_)
1799
//    XMASSERT(XMVector4GreaterOrEqual(C, XMVectorReplicate(0.0f)));
1800
//    XMASSERT(XMVector4LessOrEqual(C, XMVectorReplicate(1.0f)));
1801
    XMVECTOR vResult = {
1802
        1.0f - vColor.vector4_f32[0],
1803
        1.0f - vColor.vector4_f32[1],
1804
        1.0f - vColor.vector4_f32[2],
1805
        vColor.vector4_f32[3]
1806
    };
1807
    return vResult;
1808
 
1809
#elif defined(_XM_SSE_INTRINSICS_)
1810
    // Negate only x,y and z.
1811
    XMVECTOR vTemp = _mm_xor_ps(vColor,g_XMNegate3);
1812
    // Add 1,1,1,0 to -x,-y,-z,w
1813
	return _mm_add_ps(vTemp,g_XMOne3);
1814
#else // _XM_VMX128_INTRINSICS_
1815
#endif // _XM_VMX128_INTRINSICS_
1816
}
1817
 
1818
//------------------------------------------------------------------------------
1819
 
1820
XMFINLINE XMVECTOR XMColorModulate
1821
(
1822
    FXMVECTOR C1,
1823
    FXMVECTOR C2
1824
)
1825
{
1826
    return XMVectorMultiply(C1, C2);
1827
}
1828
 
1829
//------------------------------------------------------------------------------
1830
 
1831
XMFINLINE XMVECTOR XMColorAdjustSaturation
1832
(
1833
    FXMVECTOR vColor,
1834
    FLOAT    fSaturation
1835
)
1836
{
1837
#if defined(_XM_NO_INTRINSICS_)
1838
    CONST XMVECTOR gvLuminance = {0.2125f, 0.7154f, 0.0721f, 0.0f};
1839
 
1840
    // Luminance = 0.2125f * C[0] + 0.7154f * C[1] + 0.0721f * C[2];
1841
    // Result = (C - Luminance) * Saturation + Luminance;
1842
 
1843
    FLOAT fLuminance = (vColor.vector4_f32[0]*gvLuminance.vector4_f32[0])+(vColor.vector4_f32[1]*gvLuminance.vector4_f32[1])+(vColor.vector4_f32[2]*gvLuminance.vector4_f32[2]);
1844
    XMVECTOR vResult = {
1845
        ((vColor.vector4_f32[0] - fLuminance)*fSaturation)+fLuminance,
1846
        ((vColor.vector4_f32[1] - fLuminance)*fSaturation)+fLuminance,
1847
        ((vColor.vector4_f32[2] - fLuminance)*fSaturation)+fLuminance,
1848
        vColor.vector4_f32[3]};
1849
    return vResult;
1850
 
1851
#elif defined(_XM_SSE_INTRINSICS_)
1852
    static const XMVECTORF32 gvLuminance = {0.2125f, 0.7154f, 0.0721f, 0.0f};
1853
// Mul RGB by intensity constants
1854
    XMVECTOR vLuminance = _mm_mul_ps(vColor,gvLuminance);      
1855
// vResult.x = vLuminance.y, vResult.y = vLuminance.y,
1856
// vResult.z = vLuminance.z, vResult.w = vLuminance.z 
1857
    XMVECTOR vResult = vLuminance;
1858
    vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(2,2,1,1)); 
1859
// vLuminance.x += vLuminance.y
1860
    vLuminance = _mm_add_ss(vLuminance,vResult);
1861
// Splat vLuminance.z
1862
    vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(2,2,2,2));
1863
// vLuminance.x += vLuminance.z (Dot product)
1864
    vLuminance = _mm_add_ss(vLuminance,vResult);
1865
// Splat vLuminance
1866
    vLuminance = _mm_shuffle_ps(vLuminance,vLuminance,_MM_SHUFFLE(0,0,0,0));
1867
// Splat fSaturation
1868
    XMVECTOR vSaturation = _mm_set_ps1(fSaturation);
1869
// vResult = ((vColor-vLuminance)*vSaturation)+vLuminance;
1870
    vResult = _mm_sub_ps(vColor,vLuminance);
1871
    vResult = _mm_mul_ps(vResult,vSaturation);
1872
    vResult = _mm_add_ps(vResult,vLuminance);
1873
// Retain w from the source color
1874
    vLuminance = _mm_shuffle_ps(vResult,vColor,_MM_SHUFFLE(3,2,2,2));   // x = vResult.z,y = vResult.z,z = vColor.z,w=vColor.w
1875
    vResult = _mm_shuffle_ps(vResult,vLuminance,_MM_SHUFFLE(3,0,1,0));  // x = vResult.x,y = vResult.y,z = vResult.z,w=vColor.w
1876
    return vResult;
1877
#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
1878
#endif // _XM_VMX128_INTRINSICS_
1879
}
1880
 
1881
//------------------------------------------------------------------------------
1882
 
1883
XMFINLINE XMVECTOR XMColorAdjustContrast
1884
(
1885
    FXMVECTOR vColor,
1886
    FLOAT    fContrast
1887
)
1888
{
1889
#if defined(_XM_NO_INTRINSICS_)
1890
    // Result = (vColor - 0.5f) * fContrast + 0.5f;
1891
    XMVECTOR vResult = {
1892
        ((vColor.vector4_f32[0]-0.5f) * fContrast) + 0.5f,
1893
        ((vColor.vector4_f32[1]-0.5f) * fContrast) + 0.5f,
1894
        ((vColor.vector4_f32[2]-0.5f) * fContrast) + 0.5f,
1895
        vColor.vector4_f32[3]        // Leave W untouched
1896
    };
1897
    return vResult;
1898
 
1899
#elif defined(_XM_SSE_INTRINSICS_)
1900
    XMVECTOR vScale = _mm_set_ps1(fContrast);           // Splat the scale
1901
    XMVECTOR vResult = _mm_sub_ps(vColor,g_XMOneHalf);  // Subtract 0.5f from the source (Saving source)
1902
    vResult = _mm_mul_ps(vResult,vScale);               // Mul by scale
1903
    vResult = _mm_add_ps(vResult,g_XMOneHalf);          // Add 0.5f
1904
// Retain w from the source color
1905
    vScale = _mm_shuffle_ps(vResult,vColor,_MM_SHUFFLE(3,2,2,2));   // x = vResult.z,y = vResult.z,z = vColor.z,w=vColor.w
1906
    vResult = _mm_shuffle_ps(vResult,vScale,_MM_SHUFFLE(3,0,1,0));  // x = vResult.x,y = vResult.y,z = vResult.z,w=vColor.w
1907
    return vResult;
1908
#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
1909
#endif // _XM_VMX128_INTRINSICS_
1910
}
1911
 
1912
/****************************************************************************
1913
 *
1914
 * Miscellaneous
1915
 *
1916
 ****************************************************************************/
1917
 
1918
//------------------------------------------------------------------------------
1919
 
1920
XMINLINE BOOL XMVerifyCPUSupport()
1921
{
1922
#if defined(_XM_NO_INTRINSICS_) || !defined(_XM_SSE_INTRINSICS_)
1923
	return TRUE;
1924
#else // _XM_SSE_INTRINSICS_
1925
	// Note that on Windows 2000 or older, SSE2 detection is not supported so this will always fail
1926
	// Detecting SSE2 on older versions of Windows would require using cpuid directly
1927
	return ( IsProcessorFeaturePresent( PF_XMMI_INSTRUCTIONS_AVAILABLE ) && IsProcessorFeaturePresent( PF_XMMI64_INSTRUCTIONS_AVAILABLE ) );
1928
#endif
1929
}
1930
 
1931
 
1932
//------------------------------------------------------------------------------
1933
 
1934
#define XMASSERT_LINE_STRING_SIZE 16
1935
 
1936
XMINLINE VOID XMAssert
1937
(
1938
    CONST CHAR* pExpression,
1939
    CONST CHAR* pFileName,
1940
    UINT        LineNumber
1941
)
1942
{
1943
    CHAR        aLineString[XMASSERT_LINE_STRING_SIZE];
1944
    CHAR*       pLineString;
1945
    UINT        Line;
1946
 
1947
    aLineString[XMASSERT_LINE_STRING_SIZE - 2] = '0';
1948
    aLineString[XMASSERT_LINE_STRING_SIZE - 1] = '\0';
1949
    for (Line = LineNumber, pLineString = aLineString + XMASSERT_LINE_STRING_SIZE - 2;
1950
         Line != 0 && pLineString >= aLineString;
1951
         Line /= 10, pLineString--)
1952
    {
1953
        *pLineString = (CHAR)('0' + (Line % 10));
1954
    }
1955
 
1956
#ifndef NO_OUTPUT_DEBUG_STRING
1957
    OutputDebugStringA("Assertion failed: ");
1958
    OutputDebugStringA(pExpression);
1959
    OutputDebugStringA(", file ");
1960
    OutputDebugStringA(pFileName);
1961
    OutputDebugStringA(", line ");
1962
    OutputDebugStringA(pLineString + 1);
1963
    OutputDebugStringA("\r\n");
1964
#else
1965
    DbgPrint("Assertion failed: %s, file %s, line %d\r\n", pExpression, pFileName, LineNumber);
1966
#endif
1967
 
1968
    __debugbreak();
1969
}
1970
 
1971
//------------------------------------------------------------------------------
1972
 
1973
XMFINLINE XMVECTOR XMFresnelTerm
1974
(
1975
    FXMVECTOR CosIncidentAngle,
1976
    FXMVECTOR RefractionIndex
1977
)
1978
{
1979
#if defined(_XM_NO_INTRINSICS_)
1980
 
1981
    XMVECTOR G;
1982
    XMVECTOR D, S;
1983
    XMVECTOR V0, V1, V2, V3;
1984
    XMVECTOR Result;
1985
 
1986
    // Result = 0.5f * (g - c)^2 / (g + c)^2 * ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1) where
1987
    // c = CosIncidentAngle
1988
    // g = sqrt(c^2 + RefractionIndex^2 - 1)
1989
 
1990
    XMASSERT(!XMVector4IsInfinite(CosIncidentAngle));
1991
 
1992
    G = XMVectorMultiplyAdd(RefractionIndex, RefractionIndex, g_XMNegativeOne.v);
1993
    G = XMVectorMultiplyAdd(CosIncidentAngle, CosIncidentAngle, G);
1994
    G = XMVectorAbs(G);
1995
    G = XMVectorSqrt(G);
1996
 
1997
    S = XMVectorAdd(G, CosIncidentAngle);
1998
    D = XMVectorSubtract(G, CosIncidentAngle);
1999
 
2000
    V0 = XMVectorMultiply(D, D);
2001
    V1 = XMVectorMultiply(S, S);
2002
    V1 = XMVectorReciprocal(V1);
2003
    V0 = XMVectorMultiply(g_XMOneHalf.v, V0);
2004
    V0 = XMVectorMultiply(V0, V1);
2005
 
2006
    V2 = XMVectorMultiplyAdd(CosIncidentAngle, S, g_XMNegativeOne.v);
2007
    V3 = XMVectorMultiplyAdd(CosIncidentAngle, D, g_XMOne.v);
2008
    V2 = XMVectorMultiply(V2, V2);
2009
    V3 = XMVectorMultiply(V3, V3);
2010
    V3 = XMVectorReciprocal(V3);
2011
    V2 = XMVectorMultiplyAdd(V2, V3, g_XMOne.v);
2012
 
2013
    Result = XMVectorMultiply(V0, V2);
2014
 
2015
    Result = XMVectorSaturate(Result);
2016
 
2017
    return Result;
2018
 
2019
#elif defined(_XM_SSE_INTRINSICS_)
2020
    // Result = 0.5f * (g - c)^2 / (g + c)^2 * ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1) where
2021
    // c = CosIncidentAngle
2022
    // g = sqrt(c^2 + RefractionIndex^2 - 1)
2023
 
2024
    XMASSERT(!XMVector4IsInfinite(CosIncidentAngle));
2025
 
2026
    // G = sqrt(abs((RefractionIndex^2-1) + CosIncidentAngle^2))
2027
    XMVECTOR G = _mm_mul_ps(RefractionIndex,RefractionIndex);
2028
    XMVECTOR vTemp = _mm_mul_ps(CosIncidentAngle,CosIncidentAngle);
2029
    G = _mm_sub_ps(G,g_XMOne);
2030
    vTemp = _mm_add_ps(vTemp,G);
2031
    // max((0-vTemp),vTemp) == abs(vTemp)
2032
    // The abs is needed to deal with refraction and cosine being zero
2033
	G = _mm_setzero_ps();
2034
	G = _mm_sub_ps(G,vTemp);
2035
	G = _mm_max_ps(G,vTemp);
2036
    // Last operation, the sqrt()
2037
    G = _mm_sqrt_ps(G);
2038
 
2039
    // Calc G-C and G+C
2040
    XMVECTOR GAddC = _mm_add_ps(G,CosIncidentAngle);
2041
    XMVECTOR GSubC = _mm_sub_ps(G,CosIncidentAngle);
2042
    // Perform the term (0.5f *(g - c)^2) / (g + c)^2 
2043
    XMVECTOR vResult = _mm_mul_ps(GSubC,GSubC);
2044
    vTemp = _mm_mul_ps(GAddC,GAddC);
2045
    vResult = _mm_mul_ps(vResult,g_XMOneHalf);
2046
    vResult = _mm_div_ps(vResult,vTemp);
2047
    // Perform the term ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1)
2048
    GAddC = _mm_mul_ps(GAddC,CosIncidentAngle);
2049
    GSubC = _mm_mul_ps(GSubC,CosIncidentAngle);
2050
    GAddC = _mm_sub_ps(GAddC,g_XMOne);
2051
    GSubC = _mm_add_ps(GSubC,g_XMOne);
2052
    GAddC = _mm_mul_ps(GAddC,GAddC);
2053
    GSubC = _mm_mul_ps(GSubC,GSubC);
2054
    GAddC = _mm_div_ps(GAddC,GSubC);
2055
    GAddC = _mm_add_ps(GAddC,g_XMOne);
2056
    // Multiply the two term parts
2057
    vResult = _mm_mul_ps(vResult,GAddC);
2058
    // Clamp to 0.0 - 1.0f
2059
    vResult = _mm_max_ps(vResult,g_XMZero);
2060
    vResult = _mm_min_ps(vResult,g_XMOne);
2061
    return vResult;
2062
#else // _XM_VMX128_INTRINSICS_
2063
#endif // _XM_VMX128_INTRINSICS_
2064
}
2065
 
2066
//------------------------------------------------------------------------------
2067
 
2068
XMFINLINE BOOL XMScalarNearEqual
2069
(
2070
    FLOAT S1,
2071
    FLOAT S2,
2072
    FLOAT Epsilon
2073
)
2074
{
2075
    FLOAT Delta = S1 - S2;
2076
#if defined(_XM_NO_INTRINSICS_)
2077
    UINT  AbsDelta = *(UINT*)&Delta & 0x7FFFFFFF;
2078
    return (*(FLOAT*)&AbsDelta <= Epsilon);
2079
#elif defined(_XM_SSE_INTRINSICS_)
2080
    return (fabsf(Delta) <= Epsilon);
2081
#else
2082
    return (__fabs(Delta) <= Epsilon);
2083
#endif
2084
}
2085
 
2086
//------------------------------------------------------------------------------
2087
// Modulo the range of the given angle such that -XM_PI <= Angle < XM_PI
2088
XMFINLINE FLOAT XMScalarModAngle
2089
(
2090
    FLOAT Angle
2091
)
2092
{
2093
    // Note: The modulo is performed with unsigned math only to work
2094
    // around a precision error on numbers that are close to PI
2095
    float fTemp;
2096
#if defined(_XM_NO_INTRINSICS_) || !defined(_XM_VMX128_INTRINSICS_)
2097
    // Normalize the range from 0.0f to XM_2PI
2098
    Angle = Angle + XM_PI;
2099
    // Perform the modulo, unsigned
2100
    fTemp = fabsf(Angle);
2101
    fTemp = fTemp - (XM_2PI * (FLOAT)((INT)(fTemp/XM_2PI)));
2102
    // Restore the number to the range of -XM_PI to XM_PI-epsilon
2103
    fTemp = fTemp - XM_PI;
2104
    // If the modulo'd value was negative, restore negation
2105
    if (Angle<0.0f) {
2106
        fTemp = -fTemp;
2107
    }
2108
    return fTemp;
2109
#else
2110
#endif
2111
}
2112
 
2113
//------------------------------------------------------------------------------
2114
 
2115
XMINLINE FLOAT XMScalarSin
2116
(
2117
    FLOAT Value
2118
)
2119
{
2120
#if defined(_XM_NO_INTRINSICS_)
2121
 
2122
    FLOAT                  ValueMod;
2123
    FLOAT                  ValueSq;
2124
    XMVECTOR               V0123, V0246, V1357, V9111315, V17192123;
2125
    XMVECTOR               V1, V7, V8;
2126
    XMVECTOR               R0, R1, R2;
2127
 
2128
    ValueMod = XMScalarModAngle(Value);
2129
 
2130
    // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! - V^15 / 15! +
2131
    //           V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI)
2132
 
2133
    ValueSq = ValueMod * ValueMod;
2134
 
2135
    V0123     = XMVectorSet(1.0f, ValueMod, ValueSq, ValueSq * ValueMod);
2136
    V1        = XMVectorSplatY(V0123);
2137
    V0246     = XMVectorMultiply(V0123, V0123);
2138
    V1357     = XMVectorMultiply(V0246, V1);
2139
    V7        = XMVectorSplatW(V1357);
2140
    V8        = XMVectorMultiply(V7, V1);
2141
    V9111315  = XMVectorMultiply(V1357, V8);
2142
    V17192123 = XMVectorMultiply(V9111315, V8);
2143
 
2144
    R0        = XMVector4Dot(V1357, g_XMSinCoefficients0.v);
2145
    R1        = XMVector4Dot(V9111315, g_XMSinCoefficients1.v);
2146
    R2        = XMVector4Dot(V17192123, g_XMSinCoefficients2.v);
2147
 
2148
    return R0.vector4_f32[0] + R1.vector4_f32[0] + R2.vector4_f32[0];
2149
 
2150
#elif defined(_XM_SSE_INTRINSICS_)
2151
    return sinf( Value );
2152
#else // _XM_VMX128_INTRINSICS_
2153
#endif // _XM_VMX128_INTRINSICS_
2154
}
2155
 
2156
//------------------------------------------------------------------------------
2157
 
2158
XMINLINE FLOAT XMScalarCos
2159
(
2160
    FLOAT Value
2161
)
2162
{
2163
#if defined(_XM_NO_INTRINSICS_)
2164
 
2165
    FLOAT                  ValueMod;
2166
    FLOAT                  ValueSq;
2167
    XMVECTOR               V0123, V0246, V8101214, V16182022;
2168
    XMVECTOR               V2, V6, V8;
2169
    XMVECTOR               R0, R1, R2;
2170
 
2171
    ValueMod = XMScalarModAngle(Value);
2172
 
2173
    // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! +
2174
    //           V^12 / 12! - V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI)
2175
 
2176
    ValueSq = ValueMod * ValueMod;
2177
 
2178
    V0123 = XMVectorSet(1.0f, ValueMod, ValueSq, ValueSq * ValueMod);
2179
    V0246 = XMVectorMultiply(V0123, V0123);
2180
 
2181
    V2 = XMVectorSplatZ(V0123);
2182
    V6 = XMVectorSplatW(V0246);
2183
    V8 = XMVectorMultiply(V6, V2);
2184
 
2185
    V8101214 = XMVectorMultiply(V0246, V8);
2186
    V16182022 = XMVectorMultiply(V8101214, V8);
2187
 
2188
    R0 = XMVector4Dot(V0246, g_XMCosCoefficients0.v);
2189
    R1 = XMVector4Dot(V8101214, g_XMCosCoefficients1.v);
2190
    R2 = XMVector4Dot(V16182022, g_XMCosCoefficients2.v);
2191
 
2192
    return R0.vector4_f32[0] + R1.vector4_f32[0] + R2.vector4_f32[0];
2193
 
2194
#elif defined(_XM_SSE_INTRINSICS_)
2195
    return cosf(Value);
2196
#else // _XM_VMX128_INTRINSICS_
2197
#endif // _XM_VMX128_INTRINSICS_
2198
}
2199
 
2200
//------------------------------------------------------------------------------
2201
 
2202
XMINLINE VOID XMScalarSinCos
2203
(
2204
    FLOAT* pSin,
2205
    FLOAT* pCos,
2206
    FLOAT  Value
2207
)
2208
{
2209
#if defined(_XM_NO_INTRINSICS_)
2210
 
2211
    FLOAT                  ValueMod;
2212
    FLOAT                  ValueSq;
2213
    XMVECTOR               V0123, V0246, V1357, V8101214, V9111315, V16182022, V17192123;
2214
    XMVECTOR               V1, V2, V6, V8;
2215
    XMVECTOR               S0, S1, S2, C0, C1, C2;
2216
 
2217
    XMASSERT(pSin);
2218
    XMASSERT(pCos);
2219
 
2220
    ValueMod = XMScalarModAngle(Value);
2221
 
2222
    // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! - V^15 / 15! +
2223
    //           V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI)
2224
    // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! +
2225
    //           V^12 / 12! - V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI)
2226
 
2227
    ValueSq = ValueMod * ValueMod;
2228
 
2229
    V0123 = XMVectorSet(1.0f, ValueMod, ValueSq, ValueSq * ValueMod);
2230
 
2231
    V1 = XMVectorSplatY(V0123);
2232
    V2 = XMVectorSplatZ(V0123);
2233
 
2234
    V0246 = XMVectorMultiply(V0123, V0123);
2235
    V1357 = XMVectorMultiply(V0246, V1);
2236
 
2237
    V6 = XMVectorSplatW(V0246);
2238
    V8 = XMVectorMultiply(V6, V2);
2239
 
2240
    V8101214 = XMVectorMultiply(V0246, V8);
2241
    V9111315 = XMVectorMultiply(V1357, V8);
2242
    V16182022 = XMVectorMultiply(V8101214, V8);
2243
    V17192123 = XMVectorMultiply(V9111315, V8);
2244
 
2245
    C0 = XMVector4Dot(V0246, g_XMCosCoefficients0.v);
2246
    S0 = XMVector4Dot(V1357, g_XMSinCoefficients0.v);
2247
    C1 = XMVector4Dot(V8101214, g_XMCosCoefficients1.v);
2248
    S1 = XMVector4Dot(V9111315, g_XMSinCoefficients1.v);
2249
    C2 = XMVector4Dot(V16182022, g_XMCosCoefficients2.v);
2250
    S2 = XMVector4Dot(V17192123, g_XMSinCoefficients2.v);
2251
 
2252
    *pCos = C0.vector4_f32[0] + C1.vector4_f32[0] + C2.vector4_f32[0];
2253
    *pSin = S0.vector4_f32[0] + S1.vector4_f32[0] + S2.vector4_f32[0];
2254
 
2255
#elif defined(_XM_SSE_INTRINSICS_)
2256
    XMASSERT(pSin);
2257
    XMASSERT(pCos);
2258
 
2259
    *pSin = sinf(Value);
2260
    *pCos = cosf(Value);
2261
#else // _XM_VMX128_INTRINSICS_
2262
#endif // _XM_VMX128_INTRINSICS_
2263
}
2264
 
2265
//------------------------------------------------------------------------------
2266
 
2267
XMINLINE FLOAT XMScalarASin
2268
(
2269
    FLOAT Value
2270
)
2271
{
2272
#if defined(_XM_NO_INTRINSICS_)
2273
 
2274
    FLOAT AbsValue, Value2, Value3, D;
2275
    XMVECTOR AbsV, R0, R1, Result;
2276
    XMVECTOR V3;
2277
 
2278
    *(UINT*)&AbsValue = *(UINT*)&Value & 0x7FFFFFFF;
2279
 
2280
    Value2 = Value * AbsValue;
2281
    Value3 = Value * Value2;
2282
    D = (Value - Value2) / sqrtf(1.00000011921f - AbsValue);
2283
 
2284
    AbsV = XMVectorReplicate(AbsValue);
2285
 
2286
    V3.vector4_f32[0] = Value3;
2287
    V3.vector4_f32[1] = 1.0f;
2288
    V3.vector4_f32[2] = Value3;
2289
    V3.vector4_f32[3] = 1.0f;
2290
 
2291
    R1 = XMVectorSet(D, D, Value, Value);
2292
    R1 = XMVectorMultiply(R1, V3);
2293
 
2294
    R0 = XMVectorMultiplyAdd(AbsV, g_XMASinCoefficients0.v, g_XMASinCoefficients1.v);
2295
    R0 = XMVectorMultiplyAdd(AbsV, R0, g_XMASinCoefficients2.v);
2296
 
2297
    Result = XMVector4Dot(R0, R1);
2298
 
2299
    return Result.vector4_f32[0];
2300
 
2301
#elif defined(_XM_SSE_INTRINSICS_)
2302
    return asinf(Value);
2303
#else // _XM_VMX128_INTRINSICS_
2304
#endif // _XM_VMX128_INTRINSICS_
2305
}
2306
 
2307
//------------------------------------------------------------------------------
2308
 
2309
XMINLINE FLOAT XMScalarACos
2310
(
2311
    FLOAT Value
2312
)
2313
{
2314
#if defined(_XM_NO_INTRINSICS_)
2315
 
2316
    return XM_PIDIV2 - XMScalarASin(Value);
2317
 
2318
#elif defined(_XM_SSE_INTRINSICS_)
2319
    return acosf(Value);
2320
#else // _XM_VMX128_INTRINSICS_
2321
#endif // _XM_VMX128_INTRINSICS_
2322
}
2323
 
2324
//------------------------------------------------------------------------------
2325
 
2326
XMFINLINE FLOAT XMScalarSinEst
2327
(
2328
    FLOAT Value
2329
)
2330
{
2331
#if defined(_XM_NO_INTRINSICS_)
2332
 
2333
    FLOAT                  ValueSq;
2334
    XMVECTOR               V;
2335
    XMVECTOR               Y;
2336
    XMVECTOR               Result;
2337
 
2338
    XMASSERT(Value >= -XM_PI);
2339
    XMASSERT(Value < XM_PI);
2340
 
2341
    // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI)
2342
 
2343
    ValueSq = Value * Value;
2344
 
2345
    V = XMVectorSet(1.0f, Value, ValueSq, ValueSq * Value);
2346
    Y = XMVectorSplatY(V);
2347
    V = XMVectorMultiply(V, V);
2348
    V = XMVectorMultiply(V, Y);
2349
 
2350
    Result = XMVector4Dot(V, g_XMSinEstCoefficients.v);
2351
 
2352
    return Result.vector4_f32[0];
2353
 
2354
#elif defined(_XM_SSE_INTRINSICS_)
2355
    XMASSERT(Value >= -XM_PI);
2356
    XMASSERT(Value < XM_PI);
2357
    float ValueSq = Value*Value;
2358
    XMVECTOR vValue = _mm_set_ps1(Value);
2359
    XMVECTOR vTemp = _mm_set_ps(ValueSq * Value,ValueSq,Value,1.0f); 
2360
    vTemp = _mm_mul_ps(vTemp,vTemp);
2361
    vTemp = _mm_mul_ps(vTemp,vValue);
2362
    // vTemp = Value,Value^3,Value^5,Value^7
2363
    vTemp = _mm_mul_ps(vTemp,g_XMSinEstCoefficients);
2364
    vValue = _mm_shuffle_ps(vValue,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position
2365
    vValue = _mm_add_ps(vValue,vTemp);          // Add Z = X+Z; W = Y+W;
2366
    vTemp = _mm_shuffle_ps(vTemp,vValue,_MM_SHUFFLE(0,3,0,0));  // Copy W to the Z position
2367
    vTemp = _mm_add_ps(vTemp,vValue);           // Add Z and W together
2368
    vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2));    // Splat Z and return
2369
#if defined(_MSC_VER) && (_MSC_VER>=1500)
2370
    return _mm_cvtss_f32(vTemp);    
2371
#else
2372
    return vTemp.m128_f32[0];
2373
#endif
2374
#else // _XM_VMX128_INTRINSICS_
2375
#endif // _XM_VMX128_INTRINSICS_
2376
}
2377
 
2378
//------------------------------------------------------------------------------
2379
 
2380
XMFINLINE FLOAT XMScalarCosEst
2381
(
2382
    FLOAT Value
2383
)
2384
{
2385
#if defined(_XM_NO_INTRINSICS_)
2386
    FLOAT    ValueSq;
2387
    XMVECTOR V;
2388
    XMVECTOR Result;
2389
    XMASSERT(Value >= -XM_PI);
2390
    XMASSERT(Value < XM_PI);
2391
    // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! (for -PI <= V < PI)
2392
    ValueSq = Value * Value;
2393
    V = XMVectorSet(1.0f, Value, ValueSq, ValueSq * Value);
2394
    V = XMVectorMultiply(V, V);
2395
    Result = XMVector4Dot(V, g_XMCosEstCoefficients.v);
2396
    return Result.vector4_f32[0];
2397
#elif defined(_XM_SSE_INTRINSICS_)
2398
    XMASSERT(Value >= -XM_PI);
2399
    XMASSERT(Value < XM_PI);
2400
    float ValueSq = Value*Value;
2401
    XMVECTOR vValue = _mm_setzero_ps();
2402
    XMVECTOR vTemp = _mm_set_ps(ValueSq * Value,ValueSq,Value,1.0f); 
2403
    vTemp = _mm_mul_ps(vTemp,vTemp);
2404
    // vTemp = 1.0f,Value^2,Value^4,Value^6
2405
    vTemp = _mm_mul_ps(vTemp,g_XMCosEstCoefficients);
2406
    vValue = _mm_shuffle_ps(vValue,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position
2407
    vValue = _mm_add_ps(vValue,vTemp);          // Add Z = X+Z; W = Y+W;
2408
    vTemp = _mm_shuffle_ps(vTemp,vValue,_MM_SHUFFLE(0,3,0,0));  // Copy W to the Z position
2409
    vTemp = _mm_add_ps(vTemp,vValue);           // Add Z and W together
2410
    vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2));    // Splat Z and return
2411
#if defined(_MSC_VER) && (_MSC_VER>=1500)
2412
    return _mm_cvtss_f32(vTemp);    
2413
#else
2414
    return vTemp.m128_f32[0];
2415
#endif
2416
#else // _XM_VMX128_INTRINSICS_
2417
#endif // _XM_VMX128_INTRINSICS_
2418
}
2419
 
2420
//------------------------------------------------------------------------------
2421
 
2422
XMFINLINE VOID XMScalarSinCosEst
2423
(
2424
    FLOAT* pSin,
2425
    FLOAT* pCos,
2426
    FLOAT  Value
2427
)
2428
{
2429
#if defined(_XM_NO_INTRINSICS_)
2430
 
2431
    FLOAT    ValueSq;
2432
    XMVECTOR V, Sin, Cos;
2433
    XMVECTOR Y;
2434
 
2435
    XMASSERT(pSin);
2436
    XMASSERT(pCos);
2437
    XMASSERT(Value >= -XM_PI);
2438
    XMASSERT(Value < XM_PI);
2439
 
2440
    // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI)
2441
    // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! (for -PI <= V < PI)
2442
 
2443
    ValueSq = Value * Value;
2444
    V = XMVectorSet(1.0f, Value, ValueSq, Value * ValueSq);
2445
    Y = XMVectorSplatY(V);
2446
    Cos = XMVectorMultiply(V, V);
2447
    Sin = XMVectorMultiply(Cos, Y);
2448
 
2449
    Cos = XMVector4Dot(Cos, g_XMCosEstCoefficients.v);
2450
    Sin = XMVector4Dot(Sin, g_XMSinEstCoefficients.v);
2451
 
2452
    *pCos = Cos.vector4_f32[0];
2453
    *pSin = Sin.vector4_f32[0];
2454
 
2455
#elif defined(_XM_SSE_INTRINSICS_)
2456
    XMASSERT(pSin);
2457
    XMASSERT(pCos);
2458
    XMASSERT(Value >= -XM_PI);
2459
    XMASSERT(Value < XM_PI);
2460
    float ValueSq = Value * Value;
2461
    XMVECTOR Cos = _mm_set_ps(Value * ValueSq,ValueSq,Value,1.0f);
2462
    XMVECTOR Sin = _mm_set_ps1(Value);
2463
    Cos = _mm_mul_ps(Cos,Cos);
2464
    Sin = _mm_mul_ps(Sin,Cos);
2465
    // Cos = 1.0f,Value^2,Value^4,Value^6
2466
    Cos = XMVector4Dot(Cos,g_XMCosEstCoefficients);
2467
    _mm_store_ss(pCos,Cos);
2468
    // Sin = Value,Value^3,Value^5,Value^7
2469
    Sin = XMVector4Dot(Sin, g_XMSinEstCoefficients);
2470
    _mm_store_ss(pSin,Sin);
2471
#else // _XM_VMX128_INTRINSICS_
2472
#endif // _XM_VMX128_INTRINSICS_
2473
}
2474
 
2475
//------------------------------------------------------------------------------
2476
 
2477
XMFINLINE FLOAT XMScalarASinEst
2478
(
2479
    FLOAT Value
2480
)
2481
{
2482
#if defined(_XM_NO_INTRINSICS_)
2483
 
2484
    XMVECTOR VR, CR, CS;
2485
    XMVECTOR Result;
2486
    FLOAT AbsV, V2, D;
2487
    CONST FLOAT OnePlusEps = 1.00000011921f;
2488
 
2489
    *(UINT*)&AbsV = *(UINT*)&Value & 0x7FFFFFFF;
2490
    V2 = Value * AbsV;
2491
    D = OnePlusEps - AbsV;
2492
 
2493
    CS = XMVectorSet(Value, 1.0f, 1.0f, V2);
2494
    VR = XMVectorSet(sqrtf(D), Value, V2, D * AbsV);
2495
    CR = XMVectorMultiply(CS, g_XMASinEstCoefficients.v);
2496
 
2497
    Result = XMVector4Dot(VR, CR);
2498
 
2499
    return Result.vector4_f32[0];
2500
 
2501
#elif defined(_XM_SSE_INTRINSICS_)
2502
    CONST FLOAT OnePlusEps = 1.00000011921f;
2503
    FLOAT AbsV = fabsf(Value);
2504
    FLOAT V2 = Value * AbsV;    // Square with sign retained
2505
    FLOAT D = OnePlusEps - AbsV;
2506
 
2507
    XMVECTOR Result = _mm_set_ps(V2,1.0f,1.0f,Value);
2508
    XMVECTOR VR = _mm_set_ps(D * AbsV,V2,Value,sqrtf(D));
2509
    Result = _mm_mul_ps(Result, g_XMASinEstCoefficients);
2510
    Result = XMVector4Dot(VR,Result);
2511
#if defined(_MSC_VER) && (_MSC_VER>=1500)
2512
    return _mm_cvtss_f32(Result);    
2513
#else
2514
    return Result.m128_f32[0];
2515
#endif
2516
#else // _XM_VMX128_INTRINSICS_
2517
#endif // _XM_VMX128_INTRINSICS_
2518
}
2519
 
2520
//------------------------------------------------------------------------------
2521
 
2522
XMFINLINE FLOAT XMScalarACosEst
2523
(
2524
    FLOAT Value
2525
)
2526
{
2527
#if defined(_XM_NO_INTRINSICS_)
2528
 
2529
    XMVECTOR VR, CR, CS;
2530
    XMVECTOR Result;
2531
    FLOAT AbsV, V2, D;
2532
    CONST FLOAT OnePlusEps = 1.00000011921f;
2533
 
2534
    // return XM_PIDIV2 - XMScalarASin(Value);
2535
 
2536
    *(UINT*)&AbsV = *(UINT*)&Value & 0x7FFFFFFF;
2537
    V2 = Value * AbsV;
2538
    D = OnePlusEps - AbsV;
2539
 
2540
    CS = XMVectorSet(Value, 1.0f, 1.0f, V2);
2541
    VR = XMVectorSet(sqrtf(D), Value, V2, D * AbsV);
2542
    CR = XMVectorMultiply(CS, g_XMASinEstCoefficients.v);
2543
 
2544
    Result = XMVector4Dot(VR, CR);
2545
 
2546
    return XM_PIDIV2 - Result.vector4_f32[0];
2547
 
2548
#elif defined(_XM_SSE_INTRINSICS_)
2549
    CONST FLOAT OnePlusEps = 1.00000011921f;
2550
    FLOAT AbsV = fabsf(Value);
2551
    FLOAT V2 = Value * AbsV;    // Value^2 retaining sign
2552
    FLOAT D = OnePlusEps - AbsV;
2553
    XMVECTOR Result = _mm_set_ps(V2,1.0f,1.0f,Value);
2554
    XMVECTOR VR = _mm_set_ps(D * AbsV,V2,Value,sqrtf(D));
2555
    Result = _mm_mul_ps(Result,g_XMASinEstCoefficients);
2556
    Result = XMVector4Dot(VR,Result);
2557
#if defined(_MSC_VER) && (_MSC_VER>=1500)
2558
    return XM_PIDIV2 - _mm_cvtss_f32(Result);    
2559
#else
2560
    return XM_PIDIV2 - Result.m128_f32[0];
2561
#endif
2562
#else // _XM_VMX128_INTRINSICS_
2563
#endif // _XM_VMX128_INTRINSICS_
2564
}
2565
 
2566
#endif // __XNAMATHMISC_INL__
2567