Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
1 | pmbaty | 1 | /*++ |
2 | |||
3 | Copyright (c) Microsoft Corporation. All rights reserved. |
||
4 | |||
5 | Module Name: |
||
6 | |||
7 | xnamathmisc.inl |
||
8 | |||
9 | Abstract: |
||
10 | |||
11 | XNA math library for Windows and Xbox 360: Quaternion, plane, and color functions. |
||
12 | --*/ |
||
13 | |||
14 | #if defined(_MSC_VER) && (_MSC_VER > 1000) |
||
15 | #pragma once |
||
16 | #endif |
||
17 | |||
18 | #ifndef __XNAMATHMISC_INL__ |
||
19 | #define __XNAMATHMISC_INL__ |
||
20 | |||
21 | /**************************************************************************** |
||
22 | * |
||
23 | * Quaternion |
||
24 | * |
||
25 | ****************************************************************************/ |
||
26 | |||
27 | //------------------------------------------------------------------------------ |
||
28 | // Comparison operations |
||
29 | //------------------------------------------------------------------------------ |
||
30 | |||
31 | //------------------------------------------------------------------------------ |
||
32 | |||
33 | XMFINLINE BOOL XMQuaternionEqual |
||
34 | ( |
||
35 | FXMVECTOR Q1, |
||
36 | FXMVECTOR Q2 |
||
37 | ) |
||
38 | { |
||
39 | return XMVector4Equal(Q1, Q2); |
||
40 | } |
||
41 | |||
42 | //------------------------------------------------------------------------------ |
||
43 | |||
44 | XMFINLINE BOOL XMQuaternionNotEqual |
||
45 | ( |
||
46 | FXMVECTOR Q1, |
||
47 | FXMVECTOR Q2 |
||
48 | ) |
||
49 | { |
||
50 | return XMVector4NotEqual(Q1, Q2); |
||
51 | } |
||
52 | |||
53 | //------------------------------------------------------------------------------ |
||
54 | |||
55 | XMFINLINE BOOL XMQuaternionIsNaN |
||
56 | ( |
||
57 | FXMVECTOR Q |
||
58 | ) |
||
59 | { |
||
60 | return XMVector4IsNaN(Q); |
||
61 | } |
||
62 | |||
63 | //------------------------------------------------------------------------------ |
||
64 | |||
65 | XMFINLINE BOOL XMQuaternionIsInfinite |
||
66 | ( |
||
67 | FXMVECTOR Q |
||
68 | ) |
||
69 | { |
||
70 | return XMVector4IsInfinite(Q); |
||
71 | } |
||
72 | |||
73 | //------------------------------------------------------------------------------ |
||
74 | |||
75 | XMFINLINE BOOL XMQuaternionIsIdentity |
||
76 | ( |
||
77 | FXMVECTOR Q |
||
78 | ) |
||
79 | { |
||
80 | #if defined(_XM_NO_INTRINSICS_) |
||
81 | |||
82 | return XMVector4Equal(Q, g_XMIdentityR3.v); |
||
83 | |||
84 | #elif defined(_XM_SSE_INTRINSICS_) |
||
85 | XMVECTOR vTemp = _mm_cmpeq_ps(Q,g_XMIdentityR3); |
||
86 | return (_mm_movemask_ps(vTemp)==0x0f) ? true : false; |
||
87 | #else // _XM_VMX128_INTRINSICS_ |
||
88 | #endif // _XM_VMX128_INTRINSICS_ |
||
89 | } |
||
90 | |||
91 | //------------------------------------------------------------------------------ |
||
92 | // Computation operations |
||
93 | //------------------------------------------------------------------------------ |
||
94 | |||
95 | //------------------------------------------------------------------------------ |
||
96 | |||
97 | XMFINLINE XMVECTOR XMQuaternionDot |
||
98 | ( |
||
99 | FXMVECTOR Q1, |
||
100 | FXMVECTOR Q2 |
||
101 | ) |
||
102 | { |
||
103 | return XMVector4Dot(Q1, Q2); |
||
104 | } |
||
105 | |||
106 | //------------------------------------------------------------------------------ |
||
107 | |||
108 | XMFINLINE XMVECTOR XMQuaternionMultiply |
||
109 | ( |
||
110 | FXMVECTOR Q1, |
||
111 | FXMVECTOR Q2 |
||
112 | ) |
||
113 | { |
||
114 | #if defined(_XM_NO_INTRINSICS_) |
||
115 | |||
116 | XMVECTOR NegativeQ1; |
||
117 | XMVECTOR Q2X; |
||
118 | XMVECTOR Q2Y; |
||
119 | XMVECTOR Q2Z; |
||
120 | XMVECTOR Q2W; |
||
121 | XMVECTOR Q1WZYX; |
||
122 | XMVECTOR Q1ZWXY; |
||
123 | XMVECTOR Q1YXWZ; |
||
124 | XMVECTOR Result; |
||
125 | CONST XMVECTORU32 ControlWZYX = {XM_PERMUTE_0W, XM_PERMUTE_1Z, XM_PERMUTE_0Y, XM_PERMUTE_1X}; |
||
126 | CONST XMVECTORU32 ControlZWXY = {XM_PERMUTE_0Z, XM_PERMUTE_0W, XM_PERMUTE_1X, XM_PERMUTE_1Y}; |
||
127 | CONST XMVECTORU32 ControlYXWZ = {XM_PERMUTE_1Y, XM_PERMUTE_0X, XM_PERMUTE_0W, XM_PERMUTE_1Z}; |
||
128 | |||
129 | NegativeQ1 = XMVectorNegate(Q1); |
||
130 | |||
131 | Q2W = XMVectorSplatW(Q2); |
||
132 | Q2X = XMVectorSplatX(Q2); |
||
133 | Q2Y = XMVectorSplatY(Q2); |
||
134 | Q2Z = XMVectorSplatZ(Q2); |
||
135 | |||
136 | Q1WZYX = XMVectorPermute(Q1, NegativeQ1, ControlWZYX.v); |
||
137 | Q1ZWXY = XMVectorPermute(Q1, NegativeQ1, ControlZWXY.v); |
||
138 | Q1YXWZ = XMVectorPermute(Q1, NegativeQ1, ControlYXWZ.v); |
||
139 | |||
140 | Result = XMVectorMultiply(Q1, Q2W); |
||
141 | Result = XMVectorMultiplyAdd(Q1WZYX, Q2X, Result); |
||
142 | Result = XMVectorMultiplyAdd(Q1ZWXY, Q2Y, Result); |
||
143 | Result = XMVectorMultiplyAdd(Q1YXWZ, Q2Z, Result); |
||
144 | |||
145 | return Result; |
||
146 | |||
147 | #elif defined(_XM_SSE_INTRINSICS_) |
||
148 | static CONST XMVECTORF32 ControlWZYX = { 1.0f,-1.0f, 1.0f,-1.0f}; |
||
149 | static CONST XMVECTORF32 ControlZWXY = { 1.0f, 1.0f,-1.0f,-1.0f}; |
||
150 | static CONST XMVECTORF32 ControlYXWZ = {-1.0f, 1.0f, 1.0f,-1.0f}; |
||
151 | // Copy to SSE registers and use as few as possible for x86 |
||
152 | XMVECTOR Q2X = Q2; |
||
153 | XMVECTOR Q2Y = Q2; |
||
154 | XMVECTOR Q2Z = Q2; |
||
155 | XMVECTOR vResult = Q2; |
||
156 | // Splat with one instruction |
||
157 | vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,3,3,3)); |
||
158 | Q2X = _mm_shuffle_ps(Q2X,Q2X,_MM_SHUFFLE(0,0,0,0)); |
||
159 | Q2Y = _mm_shuffle_ps(Q2Y,Q2Y,_MM_SHUFFLE(1,1,1,1)); |
||
160 | Q2Z = _mm_shuffle_ps(Q2Z,Q2Z,_MM_SHUFFLE(2,2,2,2)); |
||
161 | // Retire Q1 and perform Q1*Q2W |
||
162 | vResult = _mm_mul_ps(vResult,Q1); |
||
163 | XMVECTOR Q1Shuffle = Q1; |
||
164 | // Shuffle the copies of Q1 |
||
165 | Q1Shuffle = _mm_shuffle_ps(Q1Shuffle,Q1Shuffle,_MM_SHUFFLE(0,1,2,3)); |
||
166 | // Mul by Q1WZYX |
||
167 | Q2X = _mm_mul_ps(Q2X,Q1Shuffle); |
||
168 | Q1Shuffle = _mm_shuffle_ps(Q1Shuffle,Q1Shuffle,_MM_SHUFFLE(2,3,0,1)); |
||
169 | // Flip the signs on y and z |
||
170 | Q2X = _mm_mul_ps(Q2X,ControlWZYX); |
||
171 | // Mul by Q1ZWXY |
||
172 | Q2Y = _mm_mul_ps(Q2Y,Q1Shuffle); |
||
173 | Q1Shuffle = _mm_shuffle_ps(Q1Shuffle,Q1Shuffle,_MM_SHUFFLE(0,1,2,3)); |
||
174 | // Flip the signs on z and w |
||
175 | Q2Y = _mm_mul_ps(Q2Y,ControlZWXY); |
||
176 | // Mul by Q1YXWZ |
||
177 | Q2Z = _mm_mul_ps(Q2Z,Q1Shuffle); |
||
178 | vResult = _mm_add_ps(vResult,Q2X); |
||
179 | // Flip the signs on x and w |
||
180 | Q2Z = _mm_mul_ps(Q2Z,ControlYXWZ); |
||
181 | Q2Y = _mm_add_ps(Q2Y,Q2Z); |
||
182 | vResult = _mm_add_ps(vResult,Q2Y); |
||
183 | return vResult; |
||
184 | #else // _XM_VMX128_INTRINSICS_ |
||
185 | #endif // _XM_VMX128_INTRINSICS_ |
||
186 | } |
||
187 | |||
188 | //------------------------------------------------------------------------------ |
||
189 | |||
190 | XMFINLINE XMVECTOR XMQuaternionLengthSq |
||
191 | ( |
||
192 | FXMVECTOR Q |
||
193 | ) |
||
194 | { |
||
195 | return XMVector4LengthSq(Q); |
||
196 | } |
||
197 | |||
198 | //------------------------------------------------------------------------------ |
||
199 | |||
200 | XMFINLINE XMVECTOR XMQuaternionReciprocalLength |
||
201 | ( |
||
202 | FXMVECTOR Q |
||
203 | ) |
||
204 | { |
||
205 | return XMVector4ReciprocalLength(Q); |
||
206 | } |
||
207 | |||
208 | //------------------------------------------------------------------------------ |
||
209 | |||
210 | XMFINLINE XMVECTOR XMQuaternionLength |
||
211 | ( |
||
212 | FXMVECTOR Q |
||
213 | ) |
||
214 | { |
||
215 | return XMVector4Length(Q); |
||
216 | } |
||
217 | |||
218 | //------------------------------------------------------------------------------ |
||
219 | |||
220 | XMFINLINE XMVECTOR XMQuaternionNormalizeEst |
||
221 | ( |
||
222 | FXMVECTOR Q |
||
223 | ) |
||
224 | { |
||
225 | return XMVector4NormalizeEst(Q); |
||
226 | } |
||
227 | |||
228 | //------------------------------------------------------------------------------ |
||
229 | |||
230 | XMFINLINE XMVECTOR XMQuaternionNormalize |
||
231 | ( |
||
232 | FXMVECTOR Q |
||
233 | ) |
||
234 | { |
||
235 | return XMVector4Normalize(Q); |
||
236 | } |
||
237 | |||
238 | //------------------------------------------------------------------------------ |
||
239 | |||
240 | XMFINLINE XMVECTOR XMQuaternionConjugate |
||
241 | ( |
||
242 | FXMVECTOR Q |
||
243 | ) |
||
244 | { |
||
245 | #if defined(_XM_NO_INTRINSICS_) |
||
246 | |||
247 | XMVECTOR Result = { |
||
248 | -Q.vector4_f32[0], |
||
249 | -Q.vector4_f32[1], |
||
250 | -Q.vector4_f32[2], |
||
251 | Q.vector4_f32[3] |
||
252 | }; |
||
253 | return Result; |
||
254 | #elif defined(_XM_SSE_INTRINSICS_) |
||
255 | static const XMVECTORF32 NegativeOne3 = {-1.0f,-1.0f,-1.0f,1.0f}; |
||
256 | XMVECTOR Result = _mm_mul_ps(Q,NegativeOne3); |
||
257 | return Result; |
||
258 | #else // _XM_VMX128_INTRINSICS_ |
||
259 | #endif // _XM_VMX128_INTRINSICS_ |
||
260 | } |
||
261 | |||
262 | //------------------------------------------------------------------------------ |
||
263 | |||
264 | XMFINLINE XMVECTOR XMQuaternionInverse |
||
265 | ( |
||
266 | FXMVECTOR Q |
||
267 | ) |
||
268 | { |
||
269 | #if defined(_XM_NO_INTRINSICS_) |
||
270 | |||
271 | XMVECTOR Conjugate; |
||
272 | XMVECTOR L; |
||
273 | XMVECTOR Control; |
||
274 | XMVECTOR Result; |
||
275 | CONST XMVECTOR Zero = XMVectorZero(); |
||
276 | |||
277 | L = XMVector4LengthSq(Q); |
||
278 | Conjugate = XMQuaternionConjugate(Q); |
||
279 | |||
280 | Control = XMVectorLessOrEqual(L, g_XMEpsilon.v); |
||
281 | |||
282 | L = XMVectorReciprocal(L); |
||
283 | Result = XMVectorMultiply(Conjugate, L); |
||
284 | |||
285 | Result = XMVectorSelect(Result, Zero, Control); |
||
286 | |||
287 | return Result; |
||
288 | |||
289 | #elif defined(_XM_SSE_INTRINSICS_) |
||
290 | XMVECTOR Conjugate; |
||
291 | XMVECTOR L; |
||
292 | XMVECTOR Control; |
||
293 | XMVECTOR Result; |
||
294 | XMVECTOR Zero = XMVectorZero(); |
||
295 | |||
296 | L = XMVector4LengthSq(Q); |
||
297 | Conjugate = XMQuaternionConjugate(Q); |
||
298 | Control = XMVectorLessOrEqual(L, g_XMEpsilon); |
||
299 | Result = _mm_div_ps(Conjugate,L); |
||
300 | Result = XMVectorSelect(Result, Zero, Control); |
||
301 | return Result; |
||
302 | #else // _XM_VMX128_INTRINSICS_ |
||
303 | #endif // _XM_VMX128_INTRINSICS_ |
||
304 | } |
||
305 | |||
306 | //------------------------------------------------------------------------------ |
||
307 | |||
308 | XMFINLINE XMVECTOR XMQuaternionLn |
||
309 | ( |
||
310 | FXMVECTOR Q |
||
311 | ) |
||
312 | { |
||
313 | #if defined(_XM_NO_INTRINSICS_) |
||
314 | |||
315 | XMVECTOR Q0; |
||
316 | XMVECTOR QW; |
||
317 | XMVECTOR Theta; |
||
318 | XMVECTOR SinTheta; |
||
319 | XMVECTOR S; |
||
320 | XMVECTOR ControlW; |
||
321 | XMVECTOR Result; |
||
322 | static CONST XMVECTOR OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f}; |
||
323 | |||
324 | QW = XMVectorSplatW(Q); |
||
325 | Q0 = XMVectorSelect(g_XMSelect1110.v, Q, g_XMSelect1110.v); |
||
326 | |||
327 | ControlW = XMVectorInBounds(QW, OneMinusEpsilon); |
||
328 | |||
329 | Theta = XMVectorACos(QW); |
||
330 | SinTheta = XMVectorSin(Theta); |
||
331 | |||
332 | S = XMVectorReciprocal(SinTheta); |
||
333 | S = XMVectorMultiply(Theta, S); |
||
334 | |||
335 | Result = XMVectorMultiply(Q0, S); |
||
336 | |||
337 | Result = XMVectorSelect(Q0, Result, ControlW); |
||
338 | |||
339 | return Result; |
||
340 | |||
341 | #elif defined(_XM_SSE_INTRINSICS_) |
||
342 | static CONST XMVECTORF32 OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f}; |
||
343 | static CONST XMVECTORF32 NegOneMinusEpsilon = {-(1.0f - 0.00001f), -(1.0f - 0.00001f),-(1.0f - 0.00001f),-(1.0f - 0.00001f)}; |
||
344 | // Get W only |
||
345 | XMVECTOR QW = _mm_shuffle_ps(Q,Q,_MM_SHUFFLE(3,3,3,3)); |
||
346 | // W = 0 |
||
347 | XMVECTOR Q0 = _mm_and_ps(Q,g_XMMask3); |
||
348 | // Use W if within bounds |
||
349 | XMVECTOR ControlW = _mm_cmple_ps(QW,OneMinusEpsilon); |
||
350 | XMVECTOR vTemp2 = _mm_cmpge_ps(QW,NegOneMinusEpsilon); |
||
351 | ControlW = _mm_and_ps(ControlW,vTemp2); |
||
352 | // Get theta |
||
353 | XMVECTOR vTheta = XMVectorACos(QW); |
||
354 | // Get Sine of theta |
||
355 | vTemp2 = XMVectorSin(vTheta); |
||
356 | // theta/sine of theta |
||
357 | vTheta = _mm_div_ps(vTheta,vTemp2); |
||
358 | // Here's the answer |
||
359 | vTheta = _mm_mul_ps(vTheta,Q0); |
||
360 | // Was W in bounds? If not, return input as is |
||
361 | vTheta = XMVectorSelect(Q0,vTheta,ControlW); |
||
362 | return vTheta; |
||
363 | #else // _XM_VMX128_INTRINSICS_ |
||
364 | #endif // _XM_VMX128_INTRINSICS_ |
||
365 | } |
||
366 | |||
367 | //------------------------------------------------------------------------------ |
||
368 | |||
369 | XMFINLINE XMVECTOR XMQuaternionExp |
||
370 | ( |
||
371 | FXMVECTOR Q |
||
372 | ) |
||
373 | { |
||
374 | #if defined(_XM_NO_INTRINSICS_) |
||
375 | |||
376 | XMVECTOR Theta; |
||
377 | XMVECTOR SinTheta; |
||
378 | XMVECTOR CosTheta; |
||
379 | XMVECTOR S; |
||
380 | XMVECTOR Control; |
||
381 | XMVECTOR Zero; |
||
382 | XMVECTOR Result; |
||
383 | |||
384 | Theta = XMVector3Length(Q); |
||
385 | XMVectorSinCos(&SinTheta, &CosTheta, Theta); |
||
386 | |||
387 | S = XMVectorReciprocal(Theta); |
||
388 | S = XMVectorMultiply(SinTheta, S); |
||
389 | |||
390 | Result = XMVectorMultiply(Q, S); |
||
391 | |||
392 | Zero = XMVectorZero(); |
||
393 | Control = XMVectorNearEqual(Theta, Zero, g_XMEpsilon.v); |
||
394 | Result = XMVectorSelect(Result, Q, Control); |
||
395 | |||
396 | Result = XMVectorSelect(CosTheta, Result, g_XMSelect1110.v); |
||
397 | |||
398 | return Result; |
||
399 | |||
400 | #elif defined(_XM_SSE_INTRINSICS_) |
||
401 | XMVECTOR Theta; |
||
402 | XMVECTOR SinTheta; |
||
403 | XMVECTOR CosTheta; |
||
404 | XMVECTOR S; |
||
405 | XMVECTOR Control; |
||
406 | XMVECTOR Zero; |
||
407 | XMVECTOR Result; |
||
408 | Theta = XMVector3Length(Q); |
||
409 | XMVectorSinCos(&SinTheta, &CosTheta, Theta); |
||
410 | S = _mm_div_ps(SinTheta,Theta); |
||
411 | Result = _mm_mul_ps(Q, S); |
||
412 | Zero = XMVectorZero(); |
||
413 | Control = XMVectorNearEqual(Theta, Zero, g_XMEpsilon); |
||
414 | Result = XMVectorSelect(Result,Q,Control); |
||
415 | Result = _mm_and_ps(Result,g_XMMask3); |
||
416 | CosTheta = _mm_and_ps(CosTheta,g_XMMaskW); |
||
417 | Result = _mm_or_ps(Result,CosTheta); |
||
418 | return Result; |
||
419 | #else // _XM_VMX128_INTRINSICS_ |
||
420 | #endif // _XM_VMX128_INTRINSICS_ |
||
421 | } |
||
422 | |||
423 | //------------------------------------------------------------------------------ |
||
424 | |||
425 | XMINLINE XMVECTOR XMQuaternionSlerp |
||
426 | ( |
||
427 | FXMVECTOR Q0, |
||
428 | FXMVECTOR Q1, |
||
429 | FLOAT t |
||
430 | ) |
||
431 | { |
||
432 | XMVECTOR T = XMVectorReplicate(t); |
||
433 | return XMQuaternionSlerpV(Q0, Q1, T); |
||
434 | } |
||
435 | |||
436 | //------------------------------------------------------------------------------ |
||
437 | |||
438 | XMINLINE XMVECTOR XMQuaternionSlerpV |
||
439 | ( |
||
440 | FXMVECTOR Q0, |
||
441 | FXMVECTOR Q1, |
||
442 | FXMVECTOR T |
||
443 | ) |
||
444 | { |
||
445 | #if defined(_XM_NO_INTRINSICS_) |
||
446 | |||
447 | // Result = Q0 * sin((1.0 - t) * Omega) / sin(Omega) + Q1 * sin(t * Omega) / sin(Omega) |
||
448 | XMVECTOR Omega; |
||
449 | XMVECTOR CosOmega; |
||
450 | XMVECTOR SinOmega; |
||
451 | XMVECTOR InvSinOmega; |
||
452 | XMVECTOR V01; |
||
453 | XMVECTOR C1000; |
||
454 | XMVECTOR SignMask; |
||
455 | XMVECTOR S0; |
||
456 | XMVECTOR S1; |
||
457 | XMVECTOR Sign; |
||
458 | XMVECTOR Control; |
||
459 | XMVECTOR Result; |
||
460 | XMVECTOR Zero; |
||
461 | CONST XMVECTOR OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f}; |
||
462 | |||
463 | XMASSERT((T.vector4_f32[1] == T.vector4_f32[0]) && (T.vector4_f32[2] == T.vector4_f32[0]) && (T.vector4_f32[3] == T.vector4_f32[0])); |
||
464 | |||
465 | CosOmega = XMQuaternionDot(Q0, Q1); |
||
466 | |||
467 | Zero = XMVectorZero(); |
||
468 | Control = XMVectorLess(CosOmega, Zero); |
||
469 | Sign = XMVectorSelect(g_XMOne.v, g_XMNegativeOne.v, Control); |
||
470 | |||
471 | CosOmega = XMVectorMultiply(CosOmega, Sign); |
||
472 | |||
473 | Control = XMVectorLess(CosOmega, OneMinusEpsilon); |
||
474 | |||
475 | SinOmega = XMVectorNegativeMultiplySubtract(CosOmega, CosOmega, g_XMOne.v); |
||
476 | SinOmega = XMVectorSqrt(SinOmega); |
||
477 | |||
478 | Omega = XMVectorATan2(SinOmega, CosOmega); |
||
479 | |||
480 | SignMask = XMVectorSplatSignMask(); |
||
481 | C1000 = XMVectorSetBinaryConstant(1, 0, 0, 0); |
||
482 | V01 = XMVectorShiftLeft(T, Zero, 2); |
||
483 | SignMask = XMVectorShiftLeft(SignMask, Zero, 3); |
||
484 | V01 = XMVectorXorInt(V01, SignMask); |
||
485 | V01 = XMVectorAdd(C1000, V01); |
||
486 | |||
487 | InvSinOmega = XMVectorReciprocal(SinOmega); |
||
488 | |||
489 | S0 = XMVectorMultiply(V01, Omega); |
||
490 | S0 = XMVectorSin(S0); |
||
491 | S0 = XMVectorMultiply(S0, InvSinOmega); |
||
492 | |||
493 | S0 = XMVectorSelect(V01, S0, Control); |
||
494 | |||
495 | S1 = XMVectorSplatY(S0); |
||
496 | S0 = XMVectorSplatX(S0); |
||
497 | |||
498 | S1 = XMVectorMultiply(S1, Sign); |
||
499 | |||
500 | Result = XMVectorMultiply(Q0, S0); |
||
501 | Result = XMVectorMultiplyAdd(Q1, S1, Result); |
||
502 | |||
503 | return Result; |
||
504 | |||
505 | #elif defined(_XM_SSE_INTRINSICS_) |
||
506 | // Result = Q0 * sin((1.0 - t) * Omega) / sin(Omega) + Q1 * sin(t * Omega) / sin(Omega) |
||
507 | XMVECTOR Omega; |
||
508 | XMVECTOR CosOmega; |
||
509 | XMVECTOR SinOmega; |
||
510 | XMVECTOR V01; |
||
511 | XMVECTOR S0; |
||
512 | XMVECTOR S1; |
||
513 | XMVECTOR Sign; |
||
514 | XMVECTOR Control; |
||
515 | XMVECTOR Result; |
||
516 | XMVECTOR Zero; |
||
517 | static const XMVECTORF32 OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f}; |
||
518 | static const XMVECTORI32 SignMask2 = {0x80000000,0x00000000,0x00000000,0x00000000}; |
||
519 | static const XMVECTORI32 MaskXY = {0xFFFFFFFF,0xFFFFFFFF,0x00000000,0x00000000}; |
||
520 | |||
521 | XMASSERT((XMVectorGetY(T) == XMVectorGetX(T)) && (XMVectorGetZ(T) == XMVectorGetX(T)) && (XMVectorGetW(T) == XMVectorGetX(T))); |
||
522 | |||
523 | CosOmega = XMQuaternionDot(Q0, Q1); |
||
524 | |||
525 | Zero = XMVectorZero(); |
||
526 | Control = XMVectorLess(CosOmega, Zero); |
||
527 | Sign = XMVectorSelect(g_XMOne, g_XMNegativeOne, Control); |
||
528 | |||
529 | CosOmega = _mm_mul_ps(CosOmega, Sign); |
||
530 | |||
531 | Control = XMVectorLess(CosOmega, OneMinusEpsilon); |
||
532 | |||
533 | SinOmega = _mm_mul_ps(CosOmega,CosOmega); |
||
534 | SinOmega = _mm_sub_ps(g_XMOne,SinOmega); |
||
535 | SinOmega = _mm_sqrt_ps(SinOmega); |
||
536 | |||
537 | Omega = XMVectorATan2(SinOmega, CosOmega); |
||
538 | |||
539 | V01 = _mm_shuffle_ps(T,T,_MM_SHUFFLE(2,3,0,1)); |
||
540 | V01 = _mm_and_ps(V01,MaskXY); |
||
541 | V01 = _mm_xor_ps(V01,SignMask2); |
||
542 | V01 = _mm_add_ps(g_XMIdentityR0, V01); |
||
543 | |||
544 | S0 = _mm_mul_ps(V01, Omega); |
||
545 | S0 = XMVectorSin(S0); |
||
546 | S0 = _mm_div_ps(S0, SinOmega); |
||
547 | |||
548 | S0 = XMVectorSelect(V01, S0, Control); |
||
549 | |||
550 | S1 = XMVectorSplatY(S0); |
||
551 | S0 = XMVectorSplatX(S0); |
||
552 | |||
553 | S1 = _mm_mul_ps(S1, Sign); |
||
554 | Result = _mm_mul_ps(Q0, S0); |
||
555 | S1 = _mm_mul_ps(S1, Q1); |
||
556 | Result = _mm_add_ps(Result,S1); |
||
557 | return Result; |
||
558 | #else // _XM_VMX128_INTRINSICS_ |
||
559 | #endif // _XM_VMX128_INTRINSICS_ |
||
560 | } |
||
561 | |||
562 | //------------------------------------------------------------------------------ |
||
563 | |||
564 | XMFINLINE XMVECTOR XMQuaternionSquad |
||
565 | ( |
||
566 | FXMVECTOR Q0, |
||
567 | FXMVECTOR Q1, |
||
568 | FXMVECTOR Q2, |
||
569 | CXMVECTOR Q3, |
||
570 | FLOAT t |
||
571 | ) |
||
572 | { |
||
573 | XMVECTOR T = XMVectorReplicate(t); |
||
574 | return XMQuaternionSquadV(Q0, Q1, Q2, Q3, T); |
||
575 | } |
||
576 | |||
577 | //------------------------------------------------------------------------------ |
||
578 | |||
579 | XMFINLINE XMVECTOR XMQuaternionSquadV |
||
580 | ( |
||
581 | FXMVECTOR Q0, |
||
582 | FXMVECTOR Q1, |
||
583 | FXMVECTOR Q2, |
||
584 | CXMVECTOR Q3, |
||
585 | CXMVECTOR T |
||
586 | ) |
||
587 | { |
||
588 | XMVECTOR Q03; |
||
589 | XMVECTOR Q12; |
||
590 | XMVECTOR TP; |
||
591 | XMVECTOR Two; |
||
592 | XMVECTOR Result; |
||
593 | |||
594 | XMASSERT( (XMVectorGetY(T) == XMVectorGetX(T)) && (XMVectorGetZ(T) == XMVectorGetX(T)) && (XMVectorGetW(T) == XMVectorGetX(T)) ); |
||
595 | |||
596 | TP = T; |
||
597 | Two = XMVectorSplatConstant(2, 0); |
||
598 | |||
599 | Q03 = XMQuaternionSlerpV(Q0, Q3, T); |
||
600 | Q12 = XMQuaternionSlerpV(Q1, Q2, T); |
||
601 | |||
602 | TP = XMVectorNegativeMultiplySubtract(TP, TP, TP); |
||
603 | TP = XMVectorMultiply(TP, Two); |
||
604 | |||
605 | Result = XMQuaternionSlerpV(Q03, Q12, TP); |
||
606 | |||
607 | return Result; |
||
608 | |||
609 | } |
||
610 | |||
611 | //------------------------------------------------------------------------------ |
||
612 | |||
613 | XMINLINE VOID XMQuaternionSquadSetup |
||
614 | ( |
||
615 | XMVECTOR* pA, |
||
616 | XMVECTOR* pB, |
||
617 | XMVECTOR* pC, |
||
618 | FXMVECTOR Q0, |
||
619 | FXMVECTOR Q1, |
||
620 | FXMVECTOR Q2, |
||
621 | CXMVECTOR Q3 |
||
622 | ) |
||
623 | { |
||
624 | XMVECTOR SQ0, SQ2, SQ3; |
||
625 | XMVECTOR InvQ1, InvQ2; |
||
626 | XMVECTOR LnQ0, LnQ1, LnQ2, LnQ3; |
||
627 | XMVECTOR ExpQ02, ExpQ13; |
||
628 | XMVECTOR LS01, LS12, LS23; |
||
629 | XMVECTOR LD01, LD12, LD23; |
||
630 | XMVECTOR Control0, Control1, Control2; |
||
631 | XMVECTOR NegativeOneQuarter; |
||
632 | |||
633 | XMASSERT(pA); |
||
634 | XMASSERT(pB); |
||
635 | XMASSERT(pC); |
||
636 | |||
637 | LS12 = XMQuaternionLengthSq(XMVectorAdd(Q1, Q2)); |
||
638 | LD12 = XMQuaternionLengthSq(XMVectorSubtract(Q1, Q2)); |
||
639 | SQ2 = XMVectorNegate(Q2); |
||
640 | |||
641 | Control1 = XMVectorLess(LS12, LD12); |
||
642 | SQ2 = XMVectorSelect(Q2, SQ2, Control1); |
||
643 | |||
644 | LS01 = XMQuaternionLengthSq(XMVectorAdd(Q0, Q1)); |
||
645 | LD01 = XMQuaternionLengthSq(XMVectorSubtract(Q0, Q1)); |
||
646 | SQ0 = XMVectorNegate(Q0); |
||
647 | |||
648 | LS23 = XMQuaternionLengthSq(XMVectorAdd(SQ2, Q3)); |
||
649 | LD23 = XMQuaternionLengthSq(XMVectorSubtract(SQ2, Q3)); |
||
650 | SQ3 = XMVectorNegate(Q3); |
||
651 | |||
652 | Control0 = XMVectorLess(LS01, LD01); |
||
653 | Control2 = XMVectorLess(LS23, LD23); |
||
654 | |||
655 | SQ0 = XMVectorSelect(Q0, SQ0, Control0); |
||
656 | SQ3 = XMVectorSelect(Q3, SQ3, Control2); |
||
657 | |||
658 | InvQ1 = XMQuaternionInverse(Q1); |
||
659 | InvQ2 = XMQuaternionInverse(SQ2); |
||
660 | |||
661 | LnQ0 = XMQuaternionLn(XMQuaternionMultiply(InvQ1, SQ0)); |
||
662 | LnQ2 = XMQuaternionLn(XMQuaternionMultiply(InvQ1, SQ2)); |
||
663 | LnQ1 = XMQuaternionLn(XMQuaternionMultiply(InvQ2, Q1)); |
||
664 | LnQ3 = XMQuaternionLn(XMQuaternionMultiply(InvQ2, SQ3)); |
||
665 | |||
666 | NegativeOneQuarter = XMVectorSplatConstant(-1, 2); |
||
667 | |||
668 | ExpQ02 = XMVectorMultiply(XMVectorAdd(LnQ0, LnQ2), NegativeOneQuarter); |
||
669 | ExpQ13 = XMVectorMultiply(XMVectorAdd(LnQ1, LnQ3), NegativeOneQuarter); |
||
670 | ExpQ02 = XMQuaternionExp(ExpQ02); |
||
671 | ExpQ13 = XMQuaternionExp(ExpQ13); |
||
672 | |||
673 | *pA = XMQuaternionMultiply(Q1, ExpQ02); |
||
674 | *pB = XMQuaternionMultiply(SQ2, ExpQ13); |
||
675 | *pC = SQ2; |
||
676 | } |
||
677 | |||
678 | //------------------------------------------------------------------------------ |
||
679 | |||
680 | XMFINLINE XMVECTOR XMQuaternionBaryCentric |
||
681 | ( |
||
682 | FXMVECTOR Q0, |
||
683 | FXMVECTOR Q1, |
||
684 | FXMVECTOR Q2, |
||
685 | FLOAT f, |
||
686 | FLOAT g |
||
687 | ) |
||
688 | { |
||
689 | XMVECTOR Q01; |
||
690 | XMVECTOR Q02; |
||
691 | FLOAT s; |
||
692 | XMVECTOR Result; |
||
693 | |||
694 | s = f + g; |
||
695 | |||
696 | if (s < 0.00001f && s > -0.00001f) |
||
697 | { |
||
698 | Result = Q0; |
||
699 | } |
||
700 | else |
||
701 | { |
||
702 | Q01 = XMQuaternionSlerp(Q0, Q1, s); |
||
703 | Q02 = XMQuaternionSlerp(Q0, Q2, s); |
||
704 | |||
705 | Result = XMQuaternionSlerp(Q01, Q02, g / s); |
||
706 | } |
||
707 | |||
708 | return Result; |
||
709 | } |
||
710 | |||
711 | //------------------------------------------------------------------------------ |
||
712 | |||
713 | XMFINLINE XMVECTOR XMQuaternionBaryCentricV |
||
714 | ( |
||
715 | FXMVECTOR Q0, |
||
716 | FXMVECTOR Q1, |
||
717 | FXMVECTOR Q2, |
||
718 | CXMVECTOR F, |
||
719 | CXMVECTOR G |
||
720 | ) |
||
721 | { |
||
722 | XMVECTOR Q01; |
||
723 | XMVECTOR Q02; |
||
724 | XMVECTOR S, GS; |
||
725 | XMVECTOR Epsilon; |
||
726 | XMVECTOR Result; |
||
727 | |||
728 | XMASSERT( (XMVectorGetY(F) == XMVectorGetX(F)) && (XMVectorGetZ(F) == XMVectorGetX(F)) && (XMVectorGetW(F) == XMVectorGetX(F)) ); |
||
729 | XMASSERT( (XMVectorGetY(G) == XMVectorGetX(G)) && (XMVectorGetZ(G) == XMVectorGetX(G)) && (XMVectorGetW(G) == XMVectorGetX(G)) ); |
||
730 | |||
731 | Epsilon = XMVectorSplatConstant(1, 16); |
||
732 | |||
733 | S = XMVectorAdd(F, G); |
||
734 | |||
735 | if (XMVector4InBounds(S, Epsilon)) |
||
736 | { |
||
737 | Result = Q0; |
||
738 | } |
||
739 | else |
||
740 | { |
||
741 | Q01 = XMQuaternionSlerpV(Q0, Q1, S); |
||
742 | Q02 = XMQuaternionSlerpV(Q0, Q2, S); |
||
743 | GS = XMVectorReciprocal(S); |
||
744 | GS = XMVectorMultiply(G, GS); |
||
745 | |||
746 | Result = XMQuaternionSlerpV(Q01, Q02, GS); |
||
747 | } |
||
748 | |||
749 | return Result; |
||
750 | } |
||
751 | |||
752 | //------------------------------------------------------------------------------ |
||
753 | // Transformation operations |
||
754 | //------------------------------------------------------------------------------ |
||
755 | |||
756 | //------------------------------------------------------------------------------ |
||
757 | |||
758 | XMFINLINE XMVECTOR XMQuaternionIdentity() |
||
759 | { |
||
760 | #if defined(_XM_NO_INTRINSICS_) |
||
761 | return g_XMIdentityR3.v; |
||
762 | #elif defined(_XM_SSE_INTRINSICS_) |
||
763 | return g_XMIdentityR3; |
||
764 | #else // _XM_VMX128_INTRINSICS_ |
||
765 | #endif // _XM_VMX128_INTRINSICS_ |
||
766 | } |
||
767 | |||
768 | //------------------------------------------------------------------------------ |
||
769 | |||
770 | XMFINLINE XMVECTOR XMQuaternionRotationRollPitchYaw |
||
771 | ( |
||
772 | FLOAT Pitch, |
||
773 | FLOAT Yaw, |
||
774 | FLOAT Roll |
||
775 | ) |
||
776 | { |
||
777 | XMVECTOR Angles; |
||
778 | XMVECTOR Q; |
||
779 | |||
780 | Angles = XMVectorSet(Pitch, Yaw, Roll, 0.0f); |
||
781 | Q = XMQuaternionRotationRollPitchYawFromVector(Angles); |
||
782 | |||
783 | return Q; |
||
784 | } |
||
785 | |||
786 | //------------------------------------------------------------------------------ |
||
787 | |||
788 | XMFINLINE XMVECTOR XMQuaternionRotationRollPitchYawFromVector |
||
789 | ( |
||
790 | FXMVECTOR Angles // <Pitch, Yaw, Roll, 0> |
||
791 | ) |
||
792 | { |
||
793 | #if defined(_XM_NO_INTRINSICS_) |
||
794 | |||
795 | XMVECTOR Q, Q0, Q1; |
||
796 | XMVECTOR P0, P1, Y0, Y1, R0, R1; |
||
797 | XMVECTOR HalfAngles; |
||
798 | XMVECTOR SinAngles, CosAngles; |
||
799 | static CONST XMVECTORU32 ControlPitch = {XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1X, XM_PERMUTE_1X}; |
||
800 | static CONST XMVECTORU32 ControlYaw = {XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Y}; |
||
801 | static CONST XMVECTORU32 ControlRoll = {XM_PERMUTE_1Z, XM_PERMUTE_1Z, XM_PERMUTE_0Z, XM_PERMUTE_1Z}; |
||
802 | static CONST XMVECTOR Sign = {1.0f, -1.0f, -1.0f, 1.0f}; |
||
803 | |||
804 | HalfAngles = XMVectorMultiply(Angles, g_XMOneHalf.v); |
||
805 | XMVectorSinCos(&SinAngles, &CosAngles, HalfAngles); |
||
806 | |||
807 | P0 = XMVectorPermute(SinAngles, CosAngles, ControlPitch.v); |
||
808 | Y0 = XMVectorPermute(SinAngles, CosAngles, ControlYaw.v); |
||
809 | R0 = XMVectorPermute(SinAngles, CosAngles, ControlRoll.v); |
||
810 | P1 = XMVectorPermute(CosAngles, SinAngles, ControlPitch.v); |
||
811 | Y1 = XMVectorPermute(CosAngles, SinAngles, ControlYaw.v); |
||
812 | R1 = XMVectorPermute(CosAngles, SinAngles, ControlRoll.v); |
||
813 | |||
814 | Q1 = XMVectorMultiply(P1, Sign); |
||
815 | Q0 = XMVectorMultiply(P0, Y0); |
||
816 | Q1 = XMVectorMultiply(Q1, Y1); |
||
817 | Q0 = XMVectorMultiply(Q0, R0); |
||
818 | Q = XMVectorMultiplyAdd(Q1, R1, Q0); |
||
819 | |||
820 | return Q; |
||
821 | |||
822 | #elif defined(_XM_SSE_INTRINSICS_) |
||
823 | XMVECTOR Q, Q0, Q1; |
||
824 | XMVECTOR P0, P1, Y0, Y1, R0, R1; |
||
825 | XMVECTOR HalfAngles; |
||
826 | XMVECTOR SinAngles, CosAngles; |
||
827 | static CONST XMVECTORI32 ControlPitch = {XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1X, XM_PERMUTE_1X}; |
||
828 | static CONST XMVECTORI32 ControlYaw = {XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Y}; |
||
829 | static CONST XMVECTORI32 ControlRoll = {XM_PERMUTE_1Z, XM_PERMUTE_1Z, XM_PERMUTE_0Z, XM_PERMUTE_1Z}; |
||
830 | static CONST XMVECTORF32 Sign = {1.0f, -1.0f, -1.0f, 1.0f}; |
||
831 | |||
832 | HalfAngles = _mm_mul_ps(Angles, g_XMOneHalf); |
||
833 | XMVectorSinCos(&SinAngles, &CosAngles, HalfAngles); |
||
834 | |||
835 | P0 = XMVectorPermute(SinAngles, CosAngles, ControlPitch); |
||
836 | Y0 = XMVectorPermute(SinAngles, CosAngles, ControlYaw); |
||
837 | R0 = XMVectorPermute(SinAngles, CosAngles, ControlRoll); |
||
838 | P1 = XMVectorPermute(CosAngles, SinAngles, ControlPitch); |
||
839 | Y1 = XMVectorPermute(CosAngles, SinAngles, ControlYaw); |
||
840 | R1 = XMVectorPermute(CosAngles, SinAngles, ControlRoll); |
||
841 | |||
842 | Q1 = _mm_mul_ps(P1, Sign); |
||
843 | Q0 = _mm_mul_ps(P0, Y0); |
||
844 | Q1 = _mm_mul_ps(Q1, Y1); |
||
845 | Q0 = _mm_mul_ps(Q0, R0); |
||
846 | Q = _mm_mul_ps(Q1, R1); |
||
847 | Q = _mm_add_ps(Q,Q0); |
||
848 | return Q; |
||
849 | #else // _XM_VMX128_INTRINSICS_ |
||
850 | #endif // _XM_VMX128_INTRINSICS_ |
||
851 | } |
||
852 | |||
853 | //------------------------------------------------------------------------------ |
||
854 | |||
855 | XMFINLINE XMVECTOR XMQuaternionRotationNormal |
||
856 | ( |
||
857 | FXMVECTOR NormalAxis, |
||
858 | FLOAT Angle |
||
859 | ) |
||
860 | { |
||
861 | #if defined(_XM_NO_INTRINSICS_) |
||
862 | |||
863 | XMVECTOR Q; |
||
864 | XMVECTOR N; |
||
865 | XMVECTOR Scale; |
||
866 | |||
867 | N = XMVectorSelect(g_XMOne.v, NormalAxis, g_XMSelect1110.v); |
||
868 | |||
869 | XMScalarSinCos(&Scale.vector4_f32[2], &Scale.vector4_f32[3], 0.5f * Angle); |
||
870 | |||
871 | Scale.vector4_f32[0] = Scale.vector4_f32[1] = Scale.vector4_f32[2]; |
||
872 | |||
873 | Q = XMVectorMultiply(N, Scale); |
||
874 | |||
875 | return Q; |
||
876 | |||
877 | #elif defined(_XM_SSE_INTRINSICS_) |
||
878 | XMVECTOR N = _mm_and_ps(NormalAxis,g_XMMask3); |
||
879 | N = _mm_or_ps(N,g_XMIdentityR3); |
||
880 | XMVECTOR Scale = _mm_set_ps1(0.5f * Angle); |
||
881 | XMVECTOR vSine; |
||
882 | XMVECTOR vCosine; |
||
883 | XMVectorSinCos(&vSine,&vCosine,Scale); |
||
884 | Scale = _mm_and_ps(vSine,g_XMMask3); |
||
885 | vCosine = _mm_and_ps(vCosine,g_XMMaskW); |
||
886 | Scale = _mm_or_ps(Scale,vCosine); |
||
887 | N = _mm_mul_ps(N,Scale); |
||
888 | return N; |
||
889 | #else // _XM_VMX128_INTRINSICS_ |
||
890 | #endif // _XM_VMX128_INTRINSICS_ |
||
891 | } |
||
892 | |||
893 | //------------------------------------------------------------------------------ |
||
894 | |||
895 | XMFINLINE XMVECTOR XMQuaternionRotationAxis |
||
896 | ( |
||
897 | FXMVECTOR Axis, |
||
898 | FLOAT Angle |
||
899 | ) |
||
900 | { |
||
901 | #if defined(_XM_NO_INTRINSICS_) |
||
902 | |||
903 | XMVECTOR Normal; |
||
904 | XMVECTOR Q; |
||
905 | |||
906 | XMASSERT(!XMVector3Equal(Axis, XMVectorZero())); |
||
907 | XMASSERT(!XMVector3IsInfinite(Axis)); |
||
908 | |||
909 | Normal = XMVector3Normalize(Axis); |
||
910 | Q = XMQuaternionRotationNormal(Normal, Angle); |
||
911 | |||
912 | return Q; |
||
913 | |||
914 | #elif defined(_XM_SSE_INTRINSICS_) |
||
915 | XMVECTOR Normal; |
||
916 | XMVECTOR Q; |
||
917 | |||
918 | XMASSERT(!XMVector3Equal(Axis, XMVectorZero())); |
||
919 | XMASSERT(!XMVector3IsInfinite(Axis)); |
||
920 | |||
921 | Normal = XMVector3Normalize(Axis); |
||
922 | Q = XMQuaternionRotationNormal(Normal, Angle); |
||
923 | return Q; |
||
924 | #else // _XM_VMX128_INTRINSICS_ |
||
925 | #endif // _XM_VMX128_INTRINSICS_ |
||
926 | } |
||
927 | |||
928 | //------------------------------------------------------------------------------ |
||
929 | |||
930 | XMINLINE XMVECTOR XMQuaternionRotationMatrix |
||
931 | ( |
||
932 | CXMMATRIX M |
||
933 | ) |
||
934 | { |
||
935 | #if defined(_XM_NO_INTRINSICS_) |
||
936 | |||
937 | XMVECTOR Q0, Q1, Q2; |
||
938 | XMVECTOR M00, M11, M22; |
||
939 | XMVECTOR CQ0, CQ1, C; |
||
940 | XMVECTOR CX, CY, CZ, CW; |
||
941 | XMVECTOR SQ1, Scale; |
||
942 | XMVECTOR Rsq, Sqrt, VEqualsInfinity, VEqualsZero, Select; |
||
943 | XMVECTOR A, B, P; |
||
944 | XMVECTOR PermuteSplat, PermuteSplatT; |
||
945 | XMVECTOR SignB, SignBT; |
||
946 | XMVECTOR PermuteControl, PermuteControlT; |
||
947 | XMVECTOR Zero; |
||
948 | XMVECTOR Result; |
||
949 | static CONST XMVECTOR OneQuarter = {0.25f, 0.25f, 0.25f, 0.25f}; |
||
950 | static CONST XMVECTOR SignPNNP = {1.0f, -1.0f, -1.0f, 1.0f}; |
||
951 | static CONST XMVECTOR SignNPNP = {-1.0f, 1.0f, -1.0f, 1.0f}; |
||
952 | static CONST XMVECTOR SignNNPP = {-1.0f, -1.0f, 1.0f, 1.0f}; |
||
953 | static CONST XMVECTOR SignPNPP = {1.0f, -1.0f, 1.0f, 1.0f}; |
||
954 | static CONST XMVECTOR SignPPNP = {1.0f, 1.0f, -1.0f, 1.0f}; |
||
955 | static CONST XMVECTOR SignNPPP = {-1.0f, 1.0f, 1.0f, 1.0f}; |
||
956 | static CONST XMVECTOR SignNNNX = {-1.0f, -1.0f, -1.0f, 2.0e-126f}; |
||
957 | static CONST XMVECTORU32 Permute0X0X0Y0W = {XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0Y, XM_PERMUTE_0W}; |
||
958 | static CONST XMVECTORU32 Permute0Y0Z0Z1W = {XM_PERMUTE_0Y, XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_1W}; |
||
959 | static CONST XMVECTORU32 SplatX = {XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X}; |
||
960 | static CONST XMVECTORU32 SplatY = {XM_PERMUTE_0Y, XM_PERMUTE_0Y, XM_PERMUTE_0Y, XM_PERMUTE_0Y}; |
||
961 | static CONST XMVECTORU32 SplatZ = {XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_0Z}; |
||
962 | static CONST XMVECTORU32 SplatW = {XM_PERMUTE_0W, XM_PERMUTE_0W, XM_PERMUTE_0W, XM_PERMUTE_0W}; |
||
963 | static CONST XMVECTORU32 PermuteC = {XM_PERMUTE_0X, XM_PERMUTE_0Z, XM_PERMUTE_1X, XM_PERMUTE_1Y}; |
||
964 | static CONST XMVECTORU32 PermuteA = {XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Z, XM_PERMUTE_0W}; |
||
965 | static CONST XMVECTORU32 PermuteB = {XM_PERMUTE_1X, XM_PERMUTE_1W, XM_PERMUTE_0Z, XM_PERMUTE_0W}; |
||
966 | static CONST XMVECTORU32 Permute0 = {XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1Z, XM_PERMUTE_1Y}; |
||
967 | static CONST XMVECTORU32 Permute1 = {XM_PERMUTE_1X, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Z}; |
||
968 | static CONST XMVECTORU32 Permute2 = {XM_PERMUTE_1Z, XM_PERMUTE_1Y, XM_PERMUTE_0Z, XM_PERMUTE_1X}; |
||
969 | static CONST XMVECTORU32 Permute3 = {XM_PERMUTE_1Y, XM_PERMUTE_1Z, XM_PERMUTE_1X, XM_PERMUTE_0W}; |
||
970 | |||
971 | M00 = XMVectorSplatX(M.r[0]); |
||
972 | M11 = XMVectorSplatY(M.r[1]); |
||
973 | M22 = XMVectorSplatZ(M.r[2]); |
||
974 | |||
975 | Q0 = XMVectorMultiply(SignPNNP, M00); |
||
976 | Q0 = XMVectorMultiplyAdd(SignNPNP, M11, Q0); |
||
977 | Q0 = XMVectorMultiplyAdd(SignNNPP, M22, Q0); |
||
978 | |||
979 | Q1 = XMVectorAdd(Q0, g_XMOne.v); |
||
980 | |||
981 | Rsq = XMVectorReciprocalSqrt(Q1); |
||
982 | Zero = XMVectorZero(); |
||
983 | VEqualsInfinity = XMVectorEqualInt(Q1, g_XMInfinity.v); |
||
984 | VEqualsZero = XMVectorEqual(Q1, Zero); |
||
985 | Sqrt = XMVectorMultiply(Q1, Rsq); |
||
986 | Select = XMVectorEqualInt(VEqualsInfinity, VEqualsZero); |
||
987 | Q1 = XMVectorSelect(Q1, Sqrt, Select); |
||
988 | |||
989 | Q1 = XMVectorMultiply(Q1, g_XMOneHalf.v); |
||
990 | |||
991 | SQ1 = XMVectorMultiply(Rsq, g_XMOneHalf.v); |
||
992 | |||
993 | CQ0 = XMVectorPermute(Q0, Q0, Permute0X0X0Y0W.v); |
||
994 | CQ1 = XMVectorPermute(Q0, SignNNNX, Permute0Y0Z0Z1W.v); |
||
995 | C = XMVectorGreaterOrEqual(CQ0, CQ1); |
||
996 | |||
997 | CX = XMVectorSplatX(C); |
||
998 | CY = XMVectorSplatY(C); |
||
999 | CZ = XMVectorSplatZ(C); |
||
1000 | CW = XMVectorSplatW(C); |
||
1001 | |||
1002 | PermuteSplat = XMVectorSelect(SplatZ.v, SplatY.v, CZ); |
||
1003 | SignB = XMVectorSelect(SignNPPP, SignPPNP, CZ); |
||
1004 | PermuteControl = XMVectorSelect(Permute2.v, Permute1.v, CZ); |
||
1005 | |||
1006 | PermuteSplat = XMVectorSelect(PermuteSplat, SplatZ.v, CX); |
||
1007 | SignB = XMVectorSelect(SignB, SignNPPP, CX); |
||
1008 | PermuteControl = XMVectorSelect(PermuteControl, Permute2.v, CX); |
||
1009 | |||
1010 | PermuteSplatT = XMVectorSelect(PermuteSplat,SplatX.v, CY); |
||
1011 | SignBT = XMVectorSelect(SignB, SignPNPP, CY); |
||
1012 | PermuteControlT = XMVectorSelect(PermuteControl,Permute0.v, CY); |
||
1013 | |||
1014 | PermuteSplat = XMVectorSelect(PermuteSplat, PermuteSplatT, CX); |
||
1015 | SignB = XMVectorSelect(SignB, SignBT, CX); |
||
1016 | PermuteControl = XMVectorSelect(PermuteControl, PermuteControlT, CX); |
||
1017 | |||
1018 | PermuteSplat = XMVectorSelect(PermuteSplat,SplatW.v, CW); |
||
1019 | SignB = XMVectorSelect(SignB, SignNNNX, CW); |
||
1020 | PermuteControl = XMVectorSelect(PermuteControl,Permute3.v, CW); |
||
1021 | |||
1022 | Scale = XMVectorPermute(SQ1, SQ1, PermuteSplat); |
||
1023 | |||
1024 | P = XMVectorPermute(M.r[1], M.r[2],PermuteC.v); // {M10, M12, M20, M21} |
||
1025 | A = XMVectorPermute(M.r[0], P, PermuteA.v); // {M01, M12, M20, M03} |
||
1026 | B = XMVectorPermute(M.r[0], P, PermuteB.v); // {M10, M21, M02, M03} |
||
1027 | |||
1028 | Q2 = XMVectorMultiplyAdd(SignB, B, A); |
||
1029 | Q2 = XMVectorMultiply(Q2, Scale); |
||
1030 | |||
1031 | Result = XMVectorPermute(Q1, Q2, PermuteControl); |
||
1032 | |||
1033 | return Result; |
||
1034 | |||
1035 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1036 | XMVECTOR Q0, Q1, Q2; |
||
1037 | XMVECTOR M00, M11, M22; |
||
1038 | XMVECTOR CQ0, CQ1, C; |
||
1039 | XMVECTOR CX, CY, CZ, CW; |
||
1040 | XMVECTOR SQ1, Scale; |
||
1041 | XMVECTOR Rsq, Sqrt, VEqualsInfinity, VEqualsZero, Select; |
||
1042 | XMVECTOR A, B, P; |
||
1043 | XMVECTOR PermuteSplat, PermuteSplatT; |
||
1044 | XMVECTOR SignB, SignBT; |
||
1045 | XMVECTOR PermuteControl, PermuteControlT; |
||
1046 | XMVECTOR Zero; |
||
1047 | XMVECTOR Result; |
||
1048 | static CONST XMVECTORF32 OneQuarter = {0.25f, 0.25f, 0.25f, 0.25f}; |
||
1049 | static CONST XMVECTORF32 SignPNNP = {1.0f, -1.0f, -1.0f, 1.0f}; |
||
1050 | static CONST XMVECTORF32 SignNPNP = {-1.0f, 1.0f, -1.0f, 1.0f}; |
||
1051 | static CONST XMVECTORF32 SignNNPP = {-1.0f, -1.0f, 1.0f, 1.0f}; |
||
1052 | static CONST XMVECTORF32 SignPNPP = {1.0f, -1.0f, 1.0f, 1.0f}; |
||
1053 | static CONST XMVECTORF32 SignPPNP = {1.0f, 1.0f, -1.0f, 1.0f}; |
||
1054 | static CONST XMVECTORF32 SignNPPP = {-1.0f, 1.0f, 1.0f, 1.0f}; |
||
1055 | static CONST XMVECTORF32 SignNNNX = {-1.0f, -1.0f, -1.0f, 2.0e-126f}; |
||
1056 | static CONST XMVECTORI32 Permute0X0X0Y0W = {XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0Y, XM_PERMUTE_0W}; |
||
1057 | static CONST XMVECTORI32 Permute0Y0Z0Z1W = {XM_PERMUTE_0Y, XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_1W}; |
||
1058 | static CONST XMVECTORI32 SplatX = {XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X}; |
||
1059 | static CONST XMVECTORI32 SplatY = {XM_PERMUTE_0Y, XM_PERMUTE_0Y, XM_PERMUTE_0Y, XM_PERMUTE_0Y}; |
||
1060 | static CONST XMVECTORI32 SplatZ = {XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_0Z}; |
||
1061 | static CONST XMVECTORI32 SplatW = {XM_PERMUTE_0W, XM_PERMUTE_0W, XM_PERMUTE_0W, XM_PERMUTE_0W}; |
||
1062 | static CONST XMVECTORI32 PermuteC = {XM_PERMUTE_0X, XM_PERMUTE_0Z, XM_PERMUTE_1X, XM_PERMUTE_1Y}; |
||
1063 | static CONST XMVECTORI32 PermuteA = {XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Z, XM_PERMUTE_0W}; |
||
1064 | static CONST XMVECTORI32 PermuteB = {XM_PERMUTE_1X, XM_PERMUTE_1W, XM_PERMUTE_0Z, XM_PERMUTE_0W}; |
||
1065 | static CONST XMVECTORI32 Permute0 = {XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1Z, XM_PERMUTE_1Y}; |
||
1066 | static CONST XMVECTORI32 Permute1 = {XM_PERMUTE_1X, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Z}; |
||
1067 | static CONST XMVECTORI32 Permute2 = {XM_PERMUTE_1Z, XM_PERMUTE_1Y, XM_PERMUTE_0Z, XM_PERMUTE_1X}; |
||
1068 | static CONST XMVECTORI32 Permute3 = {XM_PERMUTE_1Y, XM_PERMUTE_1Z, XM_PERMUTE_1X, XM_PERMUTE_0W}; |
||
1069 | |||
1070 | M00 = XMVectorSplatX(M.r[0]); |
||
1071 | M11 = XMVectorSplatY(M.r[1]); |
||
1072 | M22 = XMVectorSplatZ(M.r[2]); |
||
1073 | |||
1074 | Q0 = XMVectorMultiply(SignPNNP, M00); |
||
1075 | Q0 = XMVectorMultiplyAdd(SignNPNP, M11, Q0); |
||
1076 | Q0 = XMVectorMultiplyAdd(SignNNPP, M22, Q0); |
||
1077 | |||
1078 | Q1 = XMVectorAdd(Q0, g_XMOne); |
||
1079 | |||
1080 | Rsq = XMVectorReciprocalSqrt(Q1); |
||
1081 | Zero = XMVectorZero(); |
||
1082 | VEqualsInfinity = XMVectorEqualInt(Q1, g_XMInfinity); |
||
1083 | VEqualsZero = XMVectorEqual(Q1, Zero); |
||
1084 | Sqrt = XMVectorMultiply(Q1, Rsq); |
||
1085 | Select = XMVectorEqualInt(VEqualsInfinity, VEqualsZero); |
||
1086 | Q1 = XMVectorSelect(Q1, Sqrt, Select); |
||
1087 | |||
1088 | Q1 = XMVectorMultiply(Q1, g_XMOneHalf); |
||
1089 | |||
1090 | SQ1 = XMVectorMultiply(Rsq, g_XMOneHalf); |
||
1091 | |||
1092 | CQ0 = XMVectorPermute(Q0, Q0, Permute0X0X0Y0W); |
||
1093 | CQ1 = XMVectorPermute(Q0, SignNNNX, Permute0Y0Z0Z1W); |
||
1094 | C = XMVectorGreaterOrEqual(CQ0, CQ1); |
||
1095 | |||
1096 | CX = XMVectorSplatX(C); |
||
1097 | CY = XMVectorSplatY(C); |
||
1098 | CZ = XMVectorSplatZ(C); |
||
1099 | CW = XMVectorSplatW(C); |
||
1100 | |||
1101 | PermuteSplat = XMVectorSelect(SplatZ, SplatY, CZ); |
||
1102 | SignB = XMVectorSelect(SignNPPP, SignPPNP, CZ); |
||
1103 | PermuteControl = XMVectorSelect(Permute2, Permute1, CZ); |
||
1104 | |||
1105 | PermuteSplat = XMVectorSelect(PermuteSplat, SplatZ, CX); |
||
1106 | SignB = XMVectorSelect(SignB, SignNPPP, CX); |
||
1107 | PermuteControl = XMVectorSelect(PermuteControl, Permute2, CX); |
||
1108 | |||
1109 | PermuteSplatT = XMVectorSelect(PermuteSplat,SplatX, CY); |
||
1110 | SignBT = XMVectorSelect(SignB, SignPNPP, CY); |
||
1111 | PermuteControlT = XMVectorSelect(PermuteControl,Permute0, CY); |
||
1112 | |||
1113 | PermuteSplat = XMVectorSelect(PermuteSplat, PermuteSplatT, CX); |
||
1114 | SignB = XMVectorSelect(SignB, SignBT, CX); |
||
1115 | PermuteControl = XMVectorSelect(PermuteControl, PermuteControlT, CX); |
||
1116 | |||
1117 | PermuteSplat = XMVectorSelect(PermuteSplat,SplatW, CW); |
||
1118 | SignB = XMVectorSelect(SignB, SignNNNX, CW); |
||
1119 | PermuteControl = XMVectorSelect(PermuteControl,Permute3, CW); |
||
1120 | |||
1121 | Scale = XMVectorPermute(SQ1, SQ1, PermuteSplat); |
||
1122 | |||
1123 | P = XMVectorPermute(M.r[1], M.r[2],PermuteC); // {M10, M12, M20, M21} |
||
1124 | A = XMVectorPermute(M.r[0], P, PermuteA); // {M01, M12, M20, M03} |
||
1125 | B = XMVectorPermute(M.r[0], P, PermuteB); // {M10, M21, M02, M03} |
||
1126 | |||
1127 | Q2 = XMVectorMultiplyAdd(SignB, B, A); |
||
1128 | Q2 = XMVectorMultiply(Q2, Scale); |
||
1129 | |||
1130 | Result = XMVectorPermute(Q1, Q2, PermuteControl); |
||
1131 | |||
1132 | return Result; |
||
1133 | #else // _XM_VMX128_INTRINSICS_ |
||
1134 | #endif // _XM_VMX128_INTRINSICS_ |
||
1135 | } |
||
1136 | |||
1137 | //------------------------------------------------------------------------------ |
||
1138 | // Conversion operations |
||
1139 | //------------------------------------------------------------------------------ |
||
1140 | |||
1141 | //------------------------------------------------------------------------------ |
||
1142 | |||
1143 | XMFINLINE VOID XMQuaternionToAxisAngle |
||
1144 | ( |
||
1145 | XMVECTOR* pAxis, |
||
1146 | FLOAT* pAngle, |
||
1147 | FXMVECTOR Q |
||
1148 | ) |
||
1149 | { |
||
1150 | XMASSERT(pAxis); |
||
1151 | XMASSERT(pAngle); |
||
1152 | |||
1153 | *pAxis = Q; |
||
1154 | |||
1155 | #if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) |
||
1156 | *pAngle = 2.0f * acosf(XMVectorGetW(Q)); |
||
1157 | #else |
||
1158 | *pAngle = 2.0f * XMScalarACos(XMVectorGetW(Q)); |
||
1159 | #endif |
||
1160 | } |
||
1161 | |||
1162 | /**************************************************************************** |
||
1163 | * |
||
1164 | * Plane |
||
1165 | * |
||
1166 | ****************************************************************************/ |
||
1167 | |||
1168 | //------------------------------------------------------------------------------ |
||
1169 | // Comparison operations |
||
1170 | //------------------------------------------------------------------------------ |
||
1171 | |||
1172 | //------------------------------------------------------------------------------ |
||
1173 | |||
1174 | XMFINLINE BOOL XMPlaneEqual |
||
1175 | ( |
||
1176 | FXMVECTOR P1, |
||
1177 | FXMVECTOR P2 |
||
1178 | ) |
||
1179 | { |
||
1180 | return XMVector4Equal(P1, P2); |
||
1181 | } |
||
1182 | |||
1183 | //------------------------------------------------------------------------------ |
||
1184 | |||
1185 | XMFINLINE BOOL XMPlaneNearEqual |
||
1186 | ( |
||
1187 | FXMVECTOR P1, |
||
1188 | FXMVECTOR P2, |
||
1189 | FXMVECTOR Epsilon |
||
1190 | ) |
||
1191 | { |
||
1192 | XMVECTOR NP1 = XMPlaneNormalize(P1); |
||
1193 | XMVECTOR NP2 = XMPlaneNormalize(P2); |
||
1194 | return XMVector4NearEqual(NP1, NP2, Epsilon); |
||
1195 | } |
||
1196 | |||
1197 | //------------------------------------------------------------------------------ |
||
1198 | |||
1199 | XMFINLINE BOOL XMPlaneNotEqual |
||
1200 | ( |
||
1201 | FXMVECTOR P1, |
||
1202 | FXMVECTOR P2 |
||
1203 | ) |
||
1204 | { |
||
1205 | return XMVector4NotEqual(P1, P2); |
||
1206 | } |
||
1207 | |||
1208 | //------------------------------------------------------------------------------ |
||
1209 | |||
1210 | XMFINLINE BOOL XMPlaneIsNaN |
||
1211 | ( |
||
1212 | FXMVECTOR P |
||
1213 | ) |
||
1214 | { |
||
1215 | return XMVector4IsNaN(P); |
||
1216 | } |
||
1217 | |||
1218 | //------------------------------------------------------------------------------ |
||
1219 | |||
1220 | XMFINLINE BOOL XMPlaneIsInfinite |
||
1221 | ( |
||
1222 | FXMVECTOR P |
||
1223 | ) |
||
1224 | { |
||
1225 | return XMVector4IsInfinite(P); |
||
1226 | } |
||
1227 | |||
1228 | //------------------------------------------------------------------------------ |
||
1229 | // Computation operations |
||
1230 | //------------------------------------------------------------------------------ |
||
1231 | |||
1232 | //------------------------------------------------------------------------------ |
||
1233 | |||
1234 | XMFINLINE XMVECTOR XMPlaneDot |
||
1235 | ( |
||
1236 | FXMVECTOR P, |
||
1237 | FXMVECTOR V |
||
1238 | ) |
||
1239 | { |
||
1240 | #if defined(_XM_NO_INTRINSICS_) |
||
1241 | |||
1242 | return XMVector4Dot(P, V); |
||
1243 | |||
1244 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1245 | __m128 vTemp2 = V; |
||
1246 | __m128 vTemp = _mm_mul_ps(P,vTemp2); |
||
1247 | vTemp2 = _mm_shuffle_ps(vTemp2,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position |
||
1248 | vTemp2 = _mm_add_ps(vTemp2,vTemp); // Add Z = X+Z; W = Y+W; |
||
1249 | vTemp = _mm_shuffle_ps(vTemp,vTemp2,_MM_SHUFFLE(0,3,0,0)); // Copy W to the Z position |
||
1250 | vTemp = _mm_add_ps(vTemp,vTemp2); // Add Z and W together |
||
1251 | return _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2)); // Splat Z and return |
||
1252 | #else // _XM_VMX128_INTRINSICS_ |
||
1253 | #endif // _XM_VMX128_INTRINSICS_ |
||
1254 | } |
||
1255 | |||
1256 | //------------------------------------------------------------------------------ |
||
1257 | |||
1258 | XMFINLINE XMVECTOR XMPlaneDotCoord |
||
1259 | ( |
||
1260 | FXMVECTOR P, |
||
1261 | FXMVECTOR V |
||
1262 | ) |
||
1263 | { |
||
1264 | #if defined(_XM_NO_INTRINSICS_) |
||
1265 | |||
1266 | XMVECTOR V3; |
||
1267 | XMVECTOR Result; |
||
1268 | |||
1269 | // Result = P[0] * V[0] + P[1] * V[1] + P[2] * V[2] + P[3] |
||
1270 | V3 = XMVectorSelect(g_XMOne.v, V, g_XMSelect1110.v); |
||
1271 | Result = XMVector4Dot(P, V3); |
||
1272 | |||
1273 | return Result; |
||
1274 | |||
1275 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1276 | XMVECTOR vTemp2 = _mm_and_ps(V,g_XMMask3); |
||
1277 | vTemp2 = _mm_or_ps(vTemp2,g_XMIdentityR3); |
||
1278 | XMVECTOR vTemp = _mm_mul_ps(P,vTemp2); |
||
1279 | vTemp2 = _mm_shuffle_ps(vTemp2,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position |
||
1280 | vTemp2 = _mm_add_ps(vTemp2,vTemp); // Add Z = X+Z; W = Y+W; |
||
1281 | vTemp = _mm_shuffle_ps(vTemp,vTemp2,_MM_SHUFFLE(0,3,0,0)); // Copy W to the Z position |
||
1282 | vTemp = _mm_add_ps(vTemp,vTemp2); // Add Z and W together |
||
1283 | return _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2)); // Splat Z and return |
||
1284 | #else // _XM_VMX128_INTRINSICS_ |
||
1285 | #endif // _XM_VMX128_INTRINSICS_ |
||
1286 | } |
||
1287 | |||
1288 | //------------------------------------------------------------------------------ |
||
1289 | |||
1290 | XMFINLINE XMVECTOR XMPlaneDotNormal |
||
1291 | ( |
||
1292 | FXMVECTOR P, |
||
1293 | FXMVECTOR V |
||
1294 | ) |
||
1295 | { |
||
1296 | return XMVector3Dot(P, V); |
||
1297 | } |
||
1298 | |||
1299 | //------------------------------------------------------------------------------ |
||
1300 | // XMPlaneNormalizeEst uses a reciprocal estimate and |
||
1301 | // returns QNaN on zero and infinite vectors. |
||
1302 | |||
1303 | XMFINLINE XMVECTOR XMPlaneNormalizeEst |
||
1304 | ( |
||
1305 | FXMVECTOR P |
||
1306 | ) |
||
1307 | { |
||
1308 | #if defined(_XM_NO_INTRINSICS_) |
||
1309 | |||
1310 | XMVECTOR Result; |
||
1311 | Result = XMVector3ReciprocalLength(P); |
||
1312 | Result = XMVectorMultiply(P, Result); |
||
1313 | return Result; |
||
1314 | |||
1315 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1316 | // Perform the dot product |
||
1317 | XMVECTOR vDot = _mm_mul_ps(P,P); |
||
1318 | // x=Dot.y, y=Dot.z |
||
1319 | XMVECTOR vTemp = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(2,1,2,1)); |
||
1320 | // Result.x = x+y |
||
1321 | vDot = _mm_add_ss(vDot,vTemp); |
||
1322 | // x=Dot.z |
||
1323 | vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1)); |
||
1324 | // Result.x = (x+y)+z |
||
1325 | vDot = _mm_add_ss(vDot,vTemp); |
||
1326 | // Splat x |
||
1327 | vDot = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(0,0,0,0)); |
||
1328 | // Get the reciprocal |
||
1329 | vDot = _mm_rsqrt_ps(vDot); |
||
1330 | // Get the reciprocal |
||
1331 | vDot = _mm_mul_ps(vDot,P); |
||
1332 | return vDot; |
||
1333 | #else // _XM_VMX128_INTRINSICS_ |
||
1334 | #endif // _XM_VMX128_INTRINSICS_ |
||
1335 | } |
||
1336 | |||
1337 | //------------------------------------------------------------------------------ |
||
1338 | |||
1339 | XMFINLINE XMVECTOR XMPlaneNormalize |
||
1340 | ( |
||
1341 | FXMVECTOR P |
||
1342 | ) |
||
1343 | { |
||
1344 | #if defined(_XM_NO_INTRINSICS_) |
||
1345 | FLOAT fLengthSq = sqrtf((P.vector4_f32[0]*P.vector4_f32[0])+(P.vector4_f32[1]*P.vector4_f32[1])+(P.vector4_f32[2]*P.vector4_f32[2])); |
||
1346 | // Prevent divide by zero |
||
1347 | if (fLengthSq) { |
||
1348 | fLengthSq = 1.0f/fLengthSq; |
||
1349 | } |
||
1350 | { |
||
1351 | XMVECTOR vResult = { |
||
1352 | P.vector4_f32[0]*fLengthSq, |
||
1353 | P.vector4_f32[1]*fLengthSq, |
||
1354 | P.vector4_f32[2]*fLengthSq, |
||
1355 | P.vector4_f32[3]*fLengthSq |
||
1356 | }; |
||
1357 | return vResult; |
||
1358 | } |
||
1359 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1360 | // Perform the dot product on x,y and z only |
||
1361 | XMVECTOR vLengthSq = _mm_mul_ps(P,P); |
||
1362 | XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,1,2,1)); |
||
1363 | vLengthSq = _mm_add_ss(vLengthSq,vTemp); |
||
1364 | vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1)); |
||
1365 | vLengthSq = _mm_add_ss(vLengthSq,vTemp); |
||
1366 | vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0)); |
||
1367 | // Prepare for the division |
||
1368 | XMVECTOR vResult = _mm_sqrt_ps(vLengthSq); |
||
1369 | // Failsafe on zero (Or epsilon) length planes |
||
1370 | // If the length is infinity, set the elements to zero |
||
1371 | vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity); |
||
1372 | // Reciprocal mul to perform the normalization |
||
1373 | vResult = _mm_div_ps(P,vResult); |
||
1374 | // Any that are infinity, set to zero |
||
1375 | vResult = _mm_and_ps(vResult,vLengthSq); |
||
1376 | return vResult; |
||
1377 | #else // _XM_VMX128_INTRINSICS_ |
||
1378 | #endif // _XM_VMX128_INTRINSICS_ |
||
1379 | } |
||
1380 | |||
1381 | //------------------------------------------------------------------------------ |
||
1382 | |||
1383 | XMFINLINE XMVECTOR XMPlaneIntersectLine |
||
1384 | ( |
||
1385 | FXMVECTOR P, |
||
1386 | FXMVECTOR LinePoint1, |
||
1387 | FXMVECTOR LinePoint2 |
||
1388 | ) |
||
1389 | { |
||
1390 | #if defined(_XM_NO_INTRINSICS_) |
||
1391 | |||
1392 | XMVECTOR V1; |
||
1393 | XMVECTOR V2; |
||
1394 | XMVECTOR D; |
||
1395 | XMVECTOR ReciprocalD; |
||
1396 | XMVECTOR VT; |
||
1397 | XMVECTOR Point; |
||
1398 | XMVECTOR Zero; |
||
1399 | XMVECTOR Control; |
||
1400 | XMVECTOR Result; |
||
1401 | |||
1402 | V1 = XMVector3Dot(P, LinePoint1); |
||
1403 | V2 = XMVector3Dot(P, LinePoint2); |
||
1404 | D = XMVectorSubtract(V1, V2); |
||
1405 | |||
1406 | ReciprocalD = XMVectorReciprocal(D); |
||
1407 | VT = XMPlaneDotCoord(P, LinePoint1); |
||
1408 | VT = XMVectorMultiply(VT, ReciprocalD); |
||
1409 | |||
1410 | Point = XMVectorSubtract(LinePoint2, LinePoint1); |
||
1411 | Point = XMVectorMultiplyAdd(Point, VT, LinePoint1); |
||
1412 | |||
1413 | Zero = XMVectorZero(); |
||
1414 | Control = XMVectorNearEqual(D, Zero, g_XMEpsilon.v); |
||
1415 | |||
1416 | Result = XMVectorSelect(Point, g_XMQNaN.v, Control); |
||
1417 | |||
1418 | return Result; |
||
1419 | |||
1420 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1421 | XMVECTOR V1; |
||
1422 | XMVECTOR V2; |
||
1423 | XMVECTOR D; |
||
1424 | XMVECTOR VT; |
||
1425 | XMVECTOR Point; |
||
1426 | XMVECTOR Zero; |
||
1427 | XMVECTOR Control; |
||
1428 | XMVECTOR Result; |
||
1429 | |||
1430 | V1 = XMVector3Dot(P, LinePoint1); |
||
1431 | V2 = XMVector3Dot(P, LinePoint2); |
||
1432 | D = _mm_sub_ps(V1, V2); |
||
1433 | |||
1434 | VT = XMPlaneDotCoord(P, LinePoint1); |
||
1435 | VT = _mm_div_ps(VT, D); |
||
1436 | |||
1437 | Point = _mm_sub_ps(LinePoint2, LinePoint1); |
||
1438 | Point = _mm_mul_ps(Point,VT); |
||
1439 | Point = _mm_add_ps(Point,LinePoint1); |
||
1440 | Zero = XMVectorZero(); |
||
1441 | Control = XMVectorNearEqual(D, Zero, g_XMEpsilon); |
||
1442 | Result = XMVectorSelect(Point, g_XMQNaN, Control); |
||
1443 | return Result; |
||
1444 | #else // _XM_VMX128_INTRINSICS_ |
||
1445 | #endif // _XM_VMX128_INTRINSICS_ |
||
1446 | } |
||
1447 | |||
1448 | //------------------------------------------------------------------------------ |
||
1449 | |||
1450 | XMINLINE VOID XMPlaneIntersectPlane |
||
1451 | ( |
||
1452 | XMVECTOR* pLinePoint1, |
||
1453 | XMVECTOR* pLinePoint2, |
||
1454 | FXMVECTOR P1, |
||
1455 | FXMVECTOR P2 |
||
1456 | ) |
||
1457 | { |
||
1458 | #if defined(_XM_NO_INTRINSICS_) |
||
1459 | |||
1460 | XMVECTOR V1; |
||
1461 | XMVECTOR V2; |
||
1462 | XMVECTOR V3; |
||
1463 | XMVECTOR LengthSq; |
||
1464 | XMVECTOR RcpLengthSq; |
||
1465 | XMVECTOR Point; |
||
1466 | XMVECTOR P1W; |
||
1467 | XMVECTOR P2W; |
||
1468 | XMVECTOR Control; |
||
1469 | XMVECTOR LinePoint1; |
||
1470 | XMVECTOR LinePoint2; |
||
1471 | |||
1472 | XMASSERT(pLinePoint1); |
||
1473 | XMASSERT(pLinePoint2); |
||
1474 | |||
1475 | V1 = XMVector3Cross(P2, P1); |
||
1476 | |||
1477 | LengthSq = XMVector3LengthSq(V1); |
||
1478 | |||
1479 | V2 = XMVector3Cross(P2, V1); |
||
1480 | |||
1481 | P1W = XMVectorSplatW(P1); |
||
1482 | Point = XMVectorMultiply(V2, P1W); |
||
1483 | |||
1484 | V3 = XMVector3Cross(V1, P1); |
||
1485 | |||
1486 | P2W = XMVectorSplatW(P2); |
||
1487 | Point = XMVectorMultiplyAdd(V3, P2W, Point); |
||
1488 | |||
1489 | RcpLengthSq = XMVectorReciprocal(LengthSq); |
||
1490 | LinePoint1 = XMVectorMultiply(Point, RcpLengthSq); |
||
1491 | |||
1492 | LinePoint2 = XMVectorAdd(LinePoint1, V1); |
||
1493 | |||
1494 | Control = XMVectorLessOrEqual(LengthSq, g_XMEpsilon.v); |
||
1495 | *pLinePoint1 = XMVectorSelect(LinePoint1,g_XMQNaN.v, Control); |
||
1496 | *pLinePoint2 = XMVectorSelect(LinePoint2,g_XMQNaN.v, Control); |
||
1497 | |||
1498 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1499 | XMASSERT(pLinePoint1); |
||
1500 | XMASSERT(pLinePoint2); |
||
1501 | XMVECTOR V1; |
||
1502 | XMVECTOR V2; |
||
1503 | XMVECTOR V3; |
||
1504 | XMVECTOR LengthSq; |
||
1505 | XMVECTOR Point; |
||
1506 | XMVECTOR P1W; |
||
1507 | XMVECTOR P2W; |
||
1508 | XMVECTOR Control; |
||
1509 | XMVECTOR LinePoint1; |
||
1510 | XMVECTOR LinePoint2; |
||
1511 | |||
1512 | V1 = XMVector3Cross(P2, P1); |
||
1513 | |||
1514 | LengthSq = XMVector3LengthSq(V1); |
||
1515 | |||
1516 | V2 = XMVector3Cross(P2, V1); |
||
1517 | |||
1518 | P1W = _mm_shuffle_ps(P1,P1,_MM_SHUFFLE(3,3,3,3)); |
||
1519 | Point = _mm_mul_ps(V2, P1W); |
||
1520 | |||
1521 | V3 = XMVector3Cross(V1, P1); |
||
1522 | |||
1523 | P2W = _mm_shuffle_ps(P2,P2,_MM_SHUFFLE(3,3,3,3)); |
||
1524 | V3 = _mm_mul_ps(V3,P2W); |
||
1525 | Point = _mm_add_ps(Point,V3); |
||
1526 | LinePoint1 = _mm_div_ps(Point,LengthSq); |
||
1527 | |||
1528 | LinePoint2 = _mm_add_ps(LinePoint1, V1); |
||
1529 | |||
1530 | Control = XMVectorLessOrEqual(LengthSq, g_XMEpsilon); |
||
1531 | *pLinePoint1 = XMVectorSelect(LinePoint1,g_XMQNaN, Control); |
||
1532 | *pLinePoint2 = XMVectorSelect(LinePoint2,g_XMQNaN, Control); |
||
1533 | #else // _XM_VMX128_INTRINSICS_ |
||
1534 | #endif // _XM_VMX128_INTRINSICS_ |
||
1535 | } |
||
1536 | |||
1537 | //------------------------------------------------------------------------------ |
||
1538 | |||
1539 | XMFINLINE XMVECTOR XMPlaneTransform |
||
1540 | ( |
||
1541 | FXMVECTOR P, |
||
1542 | CXMMATRIX M |
||
1543 | ) |
||
1544 | { |
||
1545 | #if defined(_XM_NO_INTRINSICS_) |
||
1546 | |||
1547 | XMVECTOR X; |
||
1548 | XMVECTOR Y; |
||
1549 | XMVECTOR Z; |
||
1550 | XMVECTOR W; |
||
1551 | XMVECTOR Result; |
||
1552 | |||
1553 | W = XMVectorSplatW(P); |
||
1554 | Z = XMVectorSplatZ(P); |
||
1555 | Y = XMVectorSplatY(P); |
||
1556 | X = XMVectorSplatX(P); |
||
1557 | |||
1558 | Result = XMVectorMultiply(W, M.r[3]); |
||
1559 | Result = XMVectorMultiplyAdd(Z, M.r[2], Result); |
||
1560 | Result = XMVectorMultiplyAdd(Y, M.r[1], Result); |
||
1561 | Result = XMVectorMultiplyAdd(X, M.r[0], Result); |
||
1562 | |||
1563 | return Result; |
||
1564 | |||
1565 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1566 | XMVECTOR X = _mm_shuffle_ps(P,P,_MM_SHUFFLE(0,0,0,0)); |
||
1567 | XMVECTOR Y = _mm_shuffle_ps(P,P,_MM_SHUFFLE(1,1,1,1)); |
||
1568 | XMVECTOR Z = _mm_shuffle_ps(P,P,_MM_SHUFFLE(2,2,2,2)); |
||
1569 | XMVECTOR W = _mm_shuffle_ps(P,P,_MM_SHUFFLE(3,3,3,3)); |
||
1570 | X = _mm_mul_ps(X, M.r[0]); |
||
1571 | Y = _mm_mul_ps(Y, M.r[1]); |
||
1572 | Z = _mm_mul_ps(Z, M.r[2]); |
||
1573 | W = _mm_mul_ps(W, M.r[3]); |
||
1574 | X = _mm_add_ps(X,Z); |
||
1575 | Y = _mm_add_ps(Y,W); |
||
1576 | X = _mm_add_ps(X,Y); |
||
1577 | return X; |
||
1578 | #else // _XM_VMX128_INTRINSICS_ |
||
1579 | #endif // _XM_VMX128_INTRINSICS_ |
||
1580 | } |
||
1581 | |||
1582 | //------------------------------------------------------------------------------ |
||
1583 | |||
1584 | XMFINLINE XMFLOAT4* XMPlaneTransformStream |
||
1585 | ( |
||
1586 | XMFLOAT4* pOutputStream, |
||
1587 | UINT OutputStride, |
||
1588 | CONST XMFLOAT4* pInputStream, |
||
1589 | UINT InputStride, |
||
1590 | UINT PlaneCount, |
||
1591 | CXMMATRIX M |
||
1592 | ) |
||
1593 | { |
||
1594 | return XMVector4TransformStream(pOutputStream, |
||
1595 | OutputStride, |
||
1596 | pInputStream, |
||
1597 | InputStride, |
||
1598 | PlaneCount, |
||
1599 | M); |
||
1600 | } |
||
1601 | |||
1602 | //------------------------------------------------------------------------------ |
||
1603 | // Conversion operations |
||
1604 | //------------------------------------------------------------------------------ |
||
1605 | |||
1606 | //------------------------------------------------------------------------------ |
||
1607 | |||
1608 | XMFINLINE XMVECTOR XMPlaneFromPointNormal |
||
1609 | ( |
||
1610 | FXMVECTOR Point, |
||
1611 | FXMVECTOR Normal |
||
1612 | ) |
||
1613 | { |
||
1614 | #if defined(_XM_NO_INTRINSICS_) |
||
1615 | |||
1616 | XMVECTOR W; |
||
1617 | XMVECTOR Result; |
||
1618 | |||
1619 | W = XMVector3Dot(Point, Normal); |
||
1620 | W = XMVectorNegate(W); |
||
1621 | Result = XMVectorSelect(W, Normal, g_XMSelect1110.v); |
||
1622 | |||
1623 | return Result; |
||
1624 | |||
1625 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1626 | XMVECTOR W; |
||
1627 | XMVECTOR Result; |
||
1628 | W = XMVector3Dot(Point,Normal); |
||
1629 | W = _mm_mul_ps(W,g_XMNegativeOne); |
||
1630 | Result = _mm_and_ps(Normal,g_XMMask3); |
||
1631 | W = _mm_and_ps(W,g_XMMaskW); |
||
1632 | Result = _mm_or_ps(Result,W); |
||
1633 | return Result; |
||
1634 | #else // _XM_VMX128_INTRINSICS_ |
||
1635 | #endif // _XM_VMX128_INTRINSICS_ |
||
1636 | } |
||
1637 | |||
1638 | //------------------------------------------------------------------------------ |
||
1639 | |||
1640 | XMFINLINE XMVECTOR XMPlaneFromPoints |
||
1641 | ( |
||
1642 | FXMVECTOR Point1, |
||
1643 | FXMVECTOR Point2, |
||
1644 | FXMVECTOR Point3 |
||
1645 | ) |
||
1646 | { |
||
1647 | #if defined(_XM_NO_INTRINSICS_) |
||
1648 | |||
1649 | XMVECTOR N; |
||
1650 | XMVECTOR D; |
||
1651 | XMVECTOR V21; |
||
1652 | XMVECTOR V31; |
||
1653 | XMVECTOR Result; |
||
1654 | |||
1655 | V21 = XMVectorSubtract(Point1, Point2); |
||
1656 | V31 = XMVectorSubtract(Point1, Point3); |
||
1657 | |||
1658 | N = XMVector3Cross(V21, V31); |
||
1659 | N = XMVector3Normalize(N); |
||
1660 | |||
1661 | D = XMPlaneDotNormal(N, Point1); |
||
1662 | D = XMVectorNegate(D); |
||
1663 | |||
1664 | Result = XMVectorSelect(D, N, g_XMSelect1110.v); |
||
1665 | |||
1666 | return Result; |
||
1667 | |||
1668 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1669 | XMVECTOR N; |
||
1670 | XMVECTOR D; |
||
1671 | XMVECTOR V21; |
||
1672 | XMVECTOR V31; |
||
1673 | XMVECTOR Result; |
||
1674 | |||
1675 | V21 = _mm_sub_ps(Point1, Point2); |
||
1676 | V31 = _mm_sub_ps(Point1, Point3); |
||
1677 | |||
1678 | N = XMVector3Cross(V21, V31); |
||
1679 | N = XMVector3Normalize(N); |
||
1680 | |||
1681 | D = XMPlaneDotNormal(N, Point1); |
||
1682 | D = _mm_mul_ps(D,g_XMNegativeOne); |
||
1683 | N = _mm_and_ps(N,g_XMMask3); |
||
1684 | D = _mm_and_ps(D,g_XMMaskW); |
||
1685 | Result = _mm_or_ps(D,N); |
||
1686 | return Result; |
||
1687 | #else // _XM_VMX128_INTRINSICS_ |
||
1688 | #endif // _XM_VMX128_INTRINSICS_ |
||
1689 | } |
||
1690 | |||
1691 | /**************************************************************************** |
||
1692 | * |
||
1693 | * Color |
||
1694 | * |
||
1695 | ****************************************************************************/ |
||
1696 | |||
1697 | //------------------------------------------------------------------------------ |
||
1698 | // Comparison operations |
||
1699 | //------------------------------------------------------------------------------ |
||
1700 | |||
1701 | //------------------------------------------------------------------------------ |
||
1702 | |||
1703 | XMFINLINE BOOL XMColorEqual |
||
1704 | ( |
||
1705 | FXMVECTOR C1, |
||
1706 | FXMVECTOR C2 |
||
1707 | ) |
||
1708 | { |
||
1709 | return XMVector4Equal(C1, C2); |
||
1710 | } |
||
1711 | |||
1712 | //------------------------------------------------------------------------------ |
||
1713 | |||
1714 | XMFINLINE BOOL XMColorNotEqual |
||
1715 | ( |
||
1716 | FXMVECTOR C1, |
||
1717 | FXMVECTOR C2 |
||
1718 | ) |
||
1719 | { |
||
1720 | return XMVector4NotEqual(C1, C2); |
||
1721 | } |
||
1722 | |||
1723 | //------------------------------------------------------------------------------ |
||
1724 | |||
1725 | XMFINLINE BOOL XMColorGreater |
||
1726 | ( |
||
1727 | FXMVECTOR C1, |
||
1728 | FXMVECTOR C2 |
||
1729 | ) |
||
1730 | { |
||
1731 | return XMVector4Greater(C1, C2); |
||
1732 | } |
||
1733 | |||
1734 | //------------------------------------------------------------------------------ |
||
1735 | |||
1736 | XMFINLINE BOOL XMColorGreaterOrEqual |
||
1737 | ( |
||
1738 | FXMVECTOR C1, |
||
1739 | FXMVECTOR C2 |
||
1740 | ) |
||
1741 | { |
||
1742 | return XMVector4GreaterOrEqual(C1, C2); |
||
1743 | } |
||
1744 | |||
1745 | //------------------------------------------------------------------------------ |
||
1746 | |||
1747 | XMFINLINE BOOL XMColorLess |
||
1748 | ( |
||
1749 | FXMVECTOR C1, |
||
1750 | FXMVECTOR C2 |
||
1751 | ) |
||
1752 | { |
||
1753 | return XMVector4Less(C1, C2); |
||
1754 | } |
||
1755 | |||
1756 | //------------------------------------------------------------------------------ |
||
1757 | |||
1758 | XMFINLINE BOOL XMColorLessOrEqual |
||
1759 | ( |
||
1760 | FXMVECTOR C1, |
||
1761 | FXMVECTOR C2 |
||
1762 | ) |
||
1763 | { |
||
1764 | return XMVector4LessOrEqual(C1, C2); |
||
1765 | } |
||
1766 | |||
1767 | //------------------------------------------------------------------------------ |
||
1768 | |||
1769 | XMFINLINE BOOL XMColorIsNaN |
||
1770 | ( |
||
1771 | FXMVECTOR C |
||
1772 | ) |
||
1773 | { |
||
1774 | return XMVector4IsNaN(C); |
||
1775 | } |
||
1776 | |||
1777 | //------------------------------------------------------------------------------ |
||
1778 | |||
1779 | XMFINLINE BOOL XMColorIsInfinite |
||
1780 | ( |
||
1781 | FXMVECTOR C |
||
1782 | ) |
||
1783 | { |
||
1784 | return XMVector4IsInfinite(C); |
||
1785 | } |
||
1786 | |||
1787 | //------------------------------------------------------------------------------ |
||
1788 | // Computation operations |
||
1789 | //------------------------------------------------------------------------------ |
||
1790 | |||
1791 | //------------------------------------------------------------------------------ |
||
1792 | |||
1793 | XMFINLINE XMVECTOR XMColorNegative |
||
1794 | ( |
||
1795 | FXMVECTOR vColor |
||
1796 | ) |
||
1797 | { |
||
1798 | #if defined(_XM_NO_INTRINSICS_) |
||
1799 | // XMASSERT(XMVector4GreaterOrEqual(C, XMVectorReplicate(0.0f))); |
||
1800 | // XMASSERT(XMVector4LessOrEqual(C, XMVectorReplicate(1.0f))); |
||
1801 | XMVECTOR vResult = { |
||
1802 | 1.0f - vColor.vector4_f32[0], |
||
1803 | 1.0f - vColor.vector4_f32[1], |
||
1804 | 1.0f - vColor.vector4_f32[2], |
||
1805 | vColor.vector4_f32[3] |
||
1806 | }; |
||
1807 | return vResult; |
||
1808 | |||
1809 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1810 | // Negate only x,y and z. |
||
1811 | XMVECTOR vTemp = _mm_xor_ps(vColor,g_XMNegate3); |
||
1812 | // Add 1,1,1,0 to -x,-y,-z,w |
||
1813 | return _mm_add_ps(vTemp,g_XMOne3); |
||
1814 | #else // _XM_VMX128_INTRINSICS_ |
||
1815 | #endif // _XM_VMX128_INTRINSICS_ |
||
1816 | } |
||
1817 | |||
1818 | //------------------------------------------------------------------------------ |
||
1819 | |||
1820 | XMFINLINE XMVECTOR XMColorModulate |
||
1821 | ( |
||
1822 | FXMVECTOR C1, |
||
1823 | FXMVECTOR C2 |
||
1824 | ) |
||
1825 | { |
||
1826 | return XMVectorMultiply(C1, C2); |
||
1827 | } |
||
1828 | |||
1829 | //------------------------------------------------------------------------------ |
||
1830 | |||
1831 | XMFINLINE XMVECTOR XMColorAdjustSaturation |
||
1832 | ( |
||
1833 | FXMVECTOR vColor, |
||
1834 | FLOAT fSaturation |
||
1835 | ) |
||
1836 | { |
||
1837 | #if defined(_XM_NO_INTRINSICS_) |
||
1838 | CONST XMVECTOR gvLuminance = {0.2125f, 0.7154f, 0.0721f, 0.0f}; |
||
1839 | |||
1840 | // Luminance = 0.2125f * C[0] + 0.7154f * C[1] + 0.0721f * C[2]; |
||
1841 | // Result = (C - Luminance) * Saturation + Luminance; |
||
1842 | |||
1843 | FLOAT fLuminance = (vColor.vector4_f32[0]*gvLuminance.vector4_f32[0])+(vColor.vector4_f32[1]*gvLuminance.vector4_f32[1])+(vColor.vector4_f32[2]*gvLuminance.vector4_f32[2]); |
||
1844 | XMVECTOR vResult = { |
||
1845 | ((vColor.vector4_f32[0] - fLuminance)*fSaturation)+fLuminance, |
||
1846 | ((vColor.vector4_f32[1] - fLuminance)*fSaturation)+fLuminance, |
||
1847 | ((vColor.vector4_f32[2] - fLuminance)*fSaturation)+fLuminance, |
||
1848 | vColor.vector4_f32[3]}; |
||
1849 | return vResult; |
||
1850 | |||
1851 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1852 | static const XMVECTORF32 gvLuminance = {0.2125f, 0.7154f, 0.0721f, 0.0f}; |
||
1853 | // Mul RGB by intensity constants |
||
1854 | XMVECTOR vLuminance = _mm_mul_ps(vColor,gvLuminance); |
||
1855 | // vResult.x = vLuminance.y, vResult.y = vLuminance.y, |
||
1856 | // vResult.z = vLuminance.z, vResult.w = vLuminance.z |
||
1857 | XMVECTOR vResult = vLuminance; |
||
1858 | vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(2,2,1,1)); |
||
1859 | // vLuminance.x += vLuminance.y |
||
1860 | vLuminance = _mm_add_ss(vLuminance,vResult); |
||
1861 | // Splat vLuminance.z |
||
1862 | vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(2,2,2,2)); |
||
1863 | // vLuminance.x += vLuminance.z (Dot product) |
||
1864 | vLuminance = _mm_add_ss(vLuminance,vResult); |
||
1865 | // Splat vLuminance |
||
1866 | vLuminance = _mm_shuffle_ps(vLuminance,vLuminance,_MM_SHUFFLE(0,0,0,0)); |
||
1867 | // Splat fSaturation |
||
1868 | XMVECTOR vSaturation = _mm_set_ps1(fSaturation); |
||
1869 | // vResult = ((vColor-vLuminance)*vSaturation)+vLuminance; |
||
1870 | vResult = _mm_sub_ps(vColor,vLuminance); |
||
1871 | vResult = _mm_mul_ps(vResult,vSaturation); |
||
1872 | vResult = _mm_add_ps(vResult,vLuminance); |
||
1873 | // Retain w from the source color |
||
1874 | vLuminance = _mm_shuffle_ps(vResult,vColor,_MM_SHUFFLE(3,2,2,2)); // x = vResult.z,y = vResult.z,z = vColor.z,w=vColor.w |
||
1875 | vResult = _mm_shuffle_ps(vResult,vLuminance,_MM_SHUFFLE(3,0,1,0)); // x = vResult.x,y = vResult.y,z = vResult.z,w=vColor.w |
||
1876 | return vResult; |
||
1877 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
1878 | #endif // _XM_VMX128_INTRINSICS_ |
||
1879 | } |
||
1880 | |||
1881 | //------------------------------------------------------------------------------ |
||
1882 | |||
1883 | XMFINLINE XMVECTOR XMColorAdjustContrast |
||
1884 | ( |
||
1885 | FXMVECTOR vColor, |
||
1886 | FLOAT fContrast |
||
1887 | ) |
||
1888 | { |
||
1889 | #if defined(_XM_NO_INTRINSICS_) |
||
1890 | // Result = (vColor - 0.5f) * fContrast + 0.5f; |
||
1891 | XMVECTOR vResult = { |
||
1892 | ((vColor.vector4_f32[0]-0.5f) * fContrast) + 0.5f, |
||
1893 | ((vColor.vector4_f32[1]-0.5f) * fContrast) + 0.5f, |
||
1894 | ((vColor.vector4_f32[2]-0.5f) * fContrast) + 0.5f, |
||
1895 | vColor.vector4_f32[3] // Leave W untouched |
||
1896 | }; |
||
1897 | return vResult; |
||
1898 | |||
1899 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1900 | XMVECTOR vScale = _mm_set_ps1(fContrast); // Splat the scale |
||
1901 | XMVECTOR vResult = _mm_sub_ps(vColor,g_XMOneHalf); // Subtract 0.5f from the source (Saving source) |
||
1902 | vResult = _mm_mul_ps(vResult,vScale); // Mul by scale |
||
1903 | vResult = _mm_add_ps(vResult,g_XMOneHalf); // Add 0.5f |
||
1904 | // Retain w from the source color |
||
1905 | vScale = _mm_shuffle_ps(vResult,vColor,_MM_SHUFFLE(3,2,2,2)); // x = vResult.z,y = vResult.z,z = vColor.z,w=vColor.w |
||
1906 | vResult = _mm_shuffle_ps(vResult,vScale,_MM_SHUFFLE(3,0,1,0)); // x = vResult.x,y = vResult.y,z = vResult.z,w=vColor.w |
||
1907 | return vResult; |
||
1908 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
1909 | #endif // _XM_VMX128_INTRINSICS_ |
||
1910 | } |
||
1911 | |||
1912 | /**************************************************************************** |
||
1913 | * |
||
1914 | * Miscellaneous |
||
1915 | * |
||
1916 | ****************************************************************************/ |
||
1917 | |||
1918 | //------------------------------------------------------------------------------ |
||
1919 | |||
1920 | XMINLINE BOOL XMVerifyCPUSupport() |
||
1921 | { |
||
1922 | #if defined(_XM_NO_INTRINSICS_) || !defined(_XM_SSE_INTRINSICS_) |
||
1923 | return TRUE; |
||
1924 | #else // _XM_SSE_INTRINSICS_ |
||
1925 | // Note that on Windows 2000 or older, SSE2 detection is not supported so this will always fail |
||
1926 | // Detecting SSE2 on older versions of Windows would require using cpuid directly |
||
1927 | return ( IsProcessorFeaturePresent( PF_XMMI_INSTRUCTIONS_AVAILABLE ) && IsProcessorFeaturePresent( PF_XMMI64_INSTRUCTIONS_AVAILABLE ) ); |
||
1928 | #endif |
||
1929 | } |
||
1930 | |||
1931 | |||
1932 | //------------------------------------------------------------------------------ |
||
1933 | |||
1934 | #define XMASSERT_LINE_STRING_SIZE 16 |
||
1935 | |||
1936 | XMINLINE VOID XMAssert |
||
1937 | ( |
||
1938 | CONST CHAR* pExpression, |
||
1939 | CONST CHAR* pFileName, |
||
1940 | UINT LineNumber |
||
1941 | ) |
||
1942 | { |
||
1943 | CHAR aLineString[XMASSERT_LINE_STRING_SIZE]; |
||
1944 | CHAR* pLineString; |
||
1945 | UINT Line; |
||
1946 | |||
1947 | aLineString[XMASSERT_LINE_STRING_SIZE - 2] = '0'; |
||
1948 | aLineString[XMASSERT_LINE_STRING_SIZE - 1] = '\0'; |
||
1949 | for (Line = LineNumber, pLineString = aLineString + XMASSERT_LINE_STRING_SIZE - 2; |
||
1950 | Line != 0 && pLineString >= aLineString; |
||
1951 | Line /= 10, pLineString--) |
||
1952 | { |
||
1953 | *pLineString = (CHAR)('0' + (Line % 10)); |
||
1954 | } |
||
1955 | |||
1956 | #ifndef NO_OUTPUT_DEBUG_STRING |
||
1957 | OutputDebugStringA("Assertion failed: "); |
||
1958 | OutputDebugStringA(pExpression); |
||
1959 | OutputDebugStringA(", file "); |
||
1960 | OutputDebugStringA(pFileName); |
||
1961 | OutputDebugStringA(", line "); |
||
1962 | OutputDebugStringA(pLineString + 1); |
||
1963 | OutputDebugStringA("\r\n"); |
||
1964 | #else |
||
1965 | DbgPrint("Assertion failed: %s, file %s, line %d\r\n", pExpression, pFileName, LineNumber); |
||
1966 | #endif |
||
1967 | |||
1968 | __debugbreak(); |
||
1969 | } |
||
1970 | |||
1971 | //------------------------------------------------------------------------------ |
||
1972 | |||
1973 | XMFINLINE XMVECTOR XMFresnelTerm |
||
1974 | ( |
||
1975 | FXMVECTOR CosIncidentAngle, |
||
1976 | FXMVECTOR RefractionIndex |
||
1977 | ) |
||
1978 | { |
||
1979 | #if defined(_XM_NO_INTRINSICS_) |
||
1980 | |||
1981 | XMVECTOR G; |
||
1982 | XMVECTOR D, S; |
||
1983 | XMVECTOR V0, V1, V2, V3; |
||
1984 | XMVECTOR Result; |
||
1985 | |||
1986 | // Result = 0.5f * (g - c)^2 / (g + c)^2 * ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1) where |
||
1987 | // c = CosIncidentAngle |
||
1988 | // g = sqrt(c^2 + RefractionIndex^2 - 1) |
||
1989 | |||
1990 | XMASSERT(!XMVector4IsInfinite(CosIncidentAngle)); |
||
1991 | |||
1992 | G = XMVectorMultiplyAdd(RefractionIndex, RefractionIndex, g_XMNegativeOne.v); |
||
1993 | G = XMVectorMultiplyAdd(CosIncidentAngle, CosIncidentAngle, G); |
||
1994 | G = XMVectorAbs(G); |
||
1995 | G = XMVectorSqrt(G); |
||
1996 | |||
1997 | S = XMVectorAdd(G, CosIncidentAngle); |
||
1998 | D = XMVectorSubtract(G, CosIncidentAngle); |
||
1999 | |||
2000 | V0 = XMVectorMultiply(D, D); |
||
2001 | V1 = XMVectorMultiply(S, S); |
||
2002 | V1 = XMVectorReciprocal(V1); |
||
2003 | V0 = XMVectorMultiply(g_XMOneHalf.v, V0); |
||
2004 | V0 = XMVectorMultiply(V0, V1); |
||
2005 | |||
2006 | V2 = XMVectorMultiplyAdd(CosIncidentAngle, S, g_XMNegativeOne.v); |
||
2007 | V3 = XMVectorMultiplyAdd(CosIncidentAngle, D, g_XMOne.v); |
||
2008 | V2 = XMVectorMultiply(V2, V2); |
||
2009 | V3 = XMVectorMultiply(V3, V3); |
||
2010 | V3 = XMVectorReciprocal(V3); |
||
2011 | V2 = XMVectorMultiplyAdd(V2, V3, g_XMOne.v); |
||
2012 | |||
2013 | Result = XMVectorMultiply(V0, V2); |
||
2014 | |||
2015 | Result = XMVectorSaturate(Result); |
||
2016 | |||
2017 | return Result; |
||
2018 | |||
2019 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2020 | // Result = 0.5f * (g - c)^2 / (g + c)^2 * ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1) where |
||
2021 | // c = CosIncidentAngle |
||
2022 | // g = sqrt(c^2 + RefractionIndex^2 - 1) |
||
2023 | |||
2024 | XMASSERT(!XMVector4IsInfinite(CosIncidentAngle)); |
||
2025 | |||
2026 | // G = sqrt(abs((RefractionIndex^2-1) + CosIncidentAngle^2)) |
||
2027 | XMVECTOR G = _mm_mul_ps(RefractionIndex,RefractionIndex); |
||
2028 | XMVECTOR vTemp = _mm_mul_ps(CosIncidentAngle,CosIncidentAngle); |
||
2029 | G = _mm_sub_ps(G,g_XMOne); |
||
2030 | vTemp = _mm_add_ps(vTemp,G); |
||
2031 | // max((0-vTemp),vTemp) == abs(vTemp) |
||
2032 | // The abs is needed to deal with refraction and cosine being zero |
||
2033 | G = _mm_setzero_ps(); |
||
2034 | G = _mm_sub_ps(G,vTemp); |
||
2035 | G = _mm_max_ps(G,vTemp); |
||
2036 | // Last operation, the sqrt() |
||
2037 | G = _mm_sqrt_ps(G); |
||
2038 | |||
2039 | // Calc G-C and G+C |
||
2040 | XMVECTOR GAddC = _mm_add_ps(G,CosIncidentAngle); |
||
2041 | XMVECTOR GSubC = _mm_sub_ps(G,CosIncidentAngle); |
||
2042 | // Perform the term (0.5f *(g - c)^2) / (g + c)^2 |
||
2043 | XMVECTOR vResult = _mm_mul_ps(GSubC,GSubC); |
||
2044 | vTemp = _mm_mul_ps(GAddC,GAddC); |
||
2045 | vResult = _mm_mul_ps(vResult,g_XMOneHalf); |
||
2046 | vResult = _mm_div_ps(vResult,vTemp); |
||
2047 | // Perform the term ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1) |
||
2048 | GAddC = _mm_mul_ps(GAddC,CosIncidentAngle); |
||
2049 | GSubC = _mm_mul_ps(GSubC,CosIncidentAngle); |
||
2050 | GAddC = _mm_sub_ps(GAddC,g_XMOne); |
||
2051 | GSubC = _mm_add_ps(GSubC,g_XMOne); |
||
2052 | GAddC = _mm_mul_ps(GAddC,GAddC); |
||
2053 | GSubC = _mm_mul_ps(GSubC,GSubC); |
||
2054 | GAddC = _mm_div_ps(GAddC,GSubC); |
||
2055 | GAddC = _mm_add_ps(GAddC,g_XMOne); |
||
2056 | // Multiply the two term parts |
||
2057 | vResult = _mm_mul_ps(vResult,GAddC); |
||
2058 | // Clamp to 0.0 - 1.0f |
||
2059 | vResult = _mm_max_ps(vResult,g_XMZero); |
||
2060 | vResult = _mm_min_ps(vResult,g_XMOne); |
||
2061 | return vResult; |
||
2062 | #else // _XM_VMX128_INTRINSICS_ |
||
2063 | #endif // _XM_VMX128_INTRINSICS_ |
||
2064 | } |
||
2065 | |||
2066 | //------------------------------------------------------------------------------ |
||
2067 | |||
2068 | XMFINLINE BOOL XMScalarNearEqual |
||
2069 | ( |
||
2070 | FLOAT S1, |
||
2071 | FLOAT S2, |
||
2072 | FLOAT Epsilon |
||
2073 | ) |
||
2074 | { |
||
2075 | FLOAT Delta = S1 - S2; |
||
2076 | #if defined(_XM_NO_INTRINSICS_) |
||
2077 | UINT AbsDelta = *(UINT*)&Delta & 0x7FFFFFFF; |
||
2078 | return (*(FLOAT*)&AbsDelta <= Epsilon); |
||
2079 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2080 | return (fabsf(Delta) <= Epsilon); |
||
2081 | #else |
||
2082 | return (__fabs(Delta) <= Epsilon); |
||
2083 | #endif |
||
2084 | } |
||
2085 | |||
2086 | //------------------------------------------------------------------------------ |
||
2087 | // Modulo the range of the given angle such that -XM_PI <= Angle < XM_PI |
||
2088 | XMFINLINE FLOAT XMScalarModAngle |
||
2089 | ( |
||
2090 | FLOAT Angle |
||
2091 | ) |
||
2092 | { |
||
2093 | // Note: The modulo is performed with unsigned math only to work |
||
2094 | // around a precision error on numbers that are close to PI |
||
2095 | float fTemp; |
||
2096 | #if defined(_XM_NO_INTRINSICS_) || !defined(_XM_VMX128_INTRINSICS_) |
||
2097 | // Normalize the range from 0.0f to XM_2PI |
||
2098 | Angle = Angle + XM_PI; |
||
2099 | // Perform the modulo, unsigned |
||
2100 | fTemp = fabsf(Angle); |
||
2101 | fTemp = fTemp - (XM_2PI * (FLOAT)((INT)(fTemp/XM_2PI))); |
||
2102 | // Restore the number to the range of -XM_PI to XM_PI-epsilon |
||
2103 | fTemp = fTemp - XM_PI; |
||
2104 | // If the modulo'd value was negative, restore negation |
||
2105 | if (Angle<0.0f) { |
||
2106 | fTemp = -fTemp; |
||
2107 | } |
||
2108 | return fTemp; |
||
2109 | #else |
||
2110 | #endif |
||
2111 | } |
||
2112 | |||
2113 | //------------------------------------------------------------------------------ |
||
2114 | |||
2115 | XMINLINE FLOAT XMScalarSin |
||
2116 | ( |
||
2117 | FLOAT Value |
||
2118 | ) |
||
2119 | { |
||
2120 | #if defined(_XM_NO_INTRINSICS_) |
||
2121 | |||
2122 | FLOAT ValueMod; |
||
2123 | FLOAT ValueSq; |
||
2124 | XMVECTOR V0123, V0246, V1357, V9111315, V17192123; |
||
2125 | XMVECTOR V1, V7, V8; |
||
2126 | XMVECTOR R0, R1, R2; |
||
2127 | |||
2128 | ValueMod = XMScalarModAngle(Value); |
||
2129 | |||
2130 | // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! - V^15 / 15! + |
||
2131 | // V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI) |
||
2132 | |||
2133 | ValueSq = ValueMod * ValueMod; |
||
2134 | |||
2135 | V0123 = XMVectorSet(1.0f, ValueMod, ValueSq, ValueSq * ValueMod); |
||
2136 | V1 = XMVectorSplatY(V0123); |
||
2137 | V0246 = XMVectorMultiply(V0123, V0123); |
||
2138 | V1357 = XMVectorMultiply(V0246, V1); |
||
2139 | V7 = XMVectorSplatW(V1357); |
||
2140 | V8 = XMVectorMultiply(V7, V1); |
||
2141 | V9111315 = XMVectorMultiply(V1357, V8); |
||
2142 | V17192123 = XMVectorMultiply(V9111315, V8); |
||
2143 | |||
2144 | R0 = XMVector4Dot(V1357, g_XMSinCoefficients0.v); |
||
2145 | R1 = XMVector4Dot(V9111315, g_XMSinCoefficients1.v); |
||
2146 | R2 = XMVector4Dot(V17192123, g_XMSinCoefficients2.v); |
||
2147 | |||
2148 | return R0.vector4_f32[0] + R1.vector4_f32[0] + R2.vector4_f32[0]; |
||
2149 | |||
2150 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2151 | return sinf( Value ); |
||
2152 | #else // _XM_VMX128_INTRINSICS_ |
||
2153 | #endif // _XM_VMX128_INTRINSICS_ |
||
2154 | } |
||
2155 | |||
2156 | //------------------------------------------------------------------------------ |
||
2157 | |||
2158 | XMINLINE FLOAT XMScalarCos |
||
2159 | ( |
||
2160 | FLOAT Value |
||
2161 | ) |
||
2162 | { |
||
2163 | #if defined(_XM_NO_INTRINSICS_) |
||
2164 | |||
2165 | FLOAT ValueMod; |
||
2166 | FLOAT ValueSq; |
||
2167 | XMVECTOR V0123, V0246, V8101214, V16182022; |
||
2168 | XMVECTOR V2, V6, V8; |
||
2169 | XMVECTOR R0, R1, R2; |
||
2170 | |||
2171 | ValueMod = XMScalarModAngle(Value); |
||
2172 | |||
2173 | // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! + |
||
2174 | // V^12 / 12! - V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI) |
||
2175 | |||
2176 | ValueSq = ValueMod * ValueMod; |
||
2177 | |||
2178 | V0123 = XMVectorSet(1.0f, ValueMod, ValueSq, ValueSq * ValueMod); |
||
2179 | V0246 = XMVectorMultiply(V0123, V0123); |
||
2180 | |||
2181 | V2 = XMVectorSplatZ(V0123); |
||
2182 | V6 = XMVectorSplatW(V0246); |
||
2183 | V8 = XMVectorMultiply(V6, V2); |
||
2184 | |||
2185 | V8101214 = XMVectorMultiply(V0246, V8); |
||
2186 | V16182022 = XMVectorMultiply(V8101214, V8); |
||
2187 | |||
2188 | R0 = XMVector4Dot(V0246, g_XMCosCoefficients0.v); |
||
2189 | R1 = XMVector4Dot(V8101214, g_XMCosCoefficients1.v); |
||
2190 | R2 = XMVector4Dot(V16182022, g_XMCosCoefficients2.v); |
||
2191 | |||
2192 | return R0.vector4_f32[0] + R1.vector4_f32[0] + R2.vector4_f32[0]; |
||
2193 | |||
2194 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2195 | return cosf(Value); |
||
2196 | #else // _XM_VMX128_INTRINSICS_ |
||
2197 | #endif // _XM_VMX128_INTRINSICS_ |
||
2198 | } |
||
2199 | |||
2200 | //------------------------------------------------------------------------------ |
||
2201 | |||
2202 | XMINLINE VOID XMScalarSinCos |
||
2203 | ( |
||
2204 | FLOAT* pSin, |
||
2205 | FLOAT* pCos, |
||
2206 | FLOAT Value |
||
2207 | ) |
||
2208 | { |
||
2209 | #if defined(_XM_NO_INTRINSICS_) |
||
2210 | |||
2211 | FLOAT ValueMod; |
||
2212 | FLOAT ValueSq; |
||
2213 | XMVECTOR V0123, V0246, V1357, V8101214, V9111315, V16182022, V17192123; |
||
2214 | XMVECTOR V1, V2, V6, V8; |
||
2215 | XMVECTOR S0, S1, S2, C0, C1, C2; |
||
2216 | |||
2217 | XMASSERT(pSin); |
||
2218 | XMASSERT(pCos); |
||
2219 | |||
2220 | ValueMod = XMScalarModAngle(Value); |
||
2221 | |||
2222 | // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! - V^15 / 15! + |
||
2223 | // V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI) |
||
2224 | // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! + |
||
2225 | // V^12 / 12! - V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI) |
||
2226 | |||
2227 | ValueSq = ValueMod * ValueMod; |
||
2228 | |||
2229 | V0123 = XMVectorSet(1.0f, ValueMod, ValueSq, ValueSq * ValueMod); |
||
2230 | |||
2231 | V1 = XMVectorSplatY(V0123); |
||
2232 | V2 = XMVectorSplatZ(V0123); |
||
2233 | |||
2234 | V0246 = XMVectorMultiply(V0123, V0123); |
||
2235 | V1357 = XMVectorMultiply(V0246, V1); |
||
2236 | |||
2237 | V6 = XMVectorSplatW(V0246); |
||
2238 | V8 = XMVectorMultiply(V6, V2); |
||
2239 | |||
2240 | V8101214 = XMVectorMultiply(V0246, V8); |
||
2241 | V9111315 = XMVectorMultiply(V1357, V8); |
||
2242 | V16182022 = XMVectorMultiply(V8101214, V8); |
||
2243 | V17192123 = XMVectorMultiply(V9111315, V8); |
||
2244 | |||
2245 | C0 = XMVector4Dot(V0246, g_XMCosCoefficients0.v); |
||
2246 | S0 = XMVector4Dot(V1357, g_XMSinCoefficients0.v); |
||
2247 | C1 = XMVector4Dot(V8101214, g_XMCosCoefficients1.v); |
||
2248 | S1 = XMVector4Dot(V9111315, g_XMSinCoefficients1.v); |
||
2249 | C2 = XMVector4Dot(V16182022, g_XMCosCoefficients2.v); |
||
2250 | S2 = XMVector4Dot(V17192123, g_XMSinCoefficients2.v); |
||
2251 | |||
2252 | *pCos = C0.vector4_f32[0] + C1.vector4_f32[0] + C2.vector4_f32[0]; |
||
2253 | *pSin = S0.vector4_f32[0] + S1.vector4_f32[0] + S2.vector4_f32[0]; |
||
2254 | |||
2255 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2256 | XMASSERT(pSin); |
||
2257 | XMASSERT(pCos); |
||
2258 | |||
2259 | *pSin = sinf(Value); |
||
2260 | *pCos = cosf(Value); |
||
2261 | #else // _XM_VMX128_INTRINSICS_ |
||
2262 | #endif // _XM_VMX128_INTRINSICS_ |
||
2263 | } |
||
2264 | |||
2265 | //------------------------------------------------------------------------------ |
||
2266 | |||
2267 | XMINLINE FLOAT XMScalarASin |
||
2268 | ( |
||
2269 | FLOAT Value |
||
2270 | ) |
||
2271 | { |
||
2272 | #if defined(_XM_NO_INTRINSICS_) |
||
2273 | |||
2274 | FLOAT AbsValue, Value2, Value3, D; |
||
2275 | XMVECTOR AbsV, R0, R1, Result; |
||
2276 | XMVECTOR V3; |
||
2277 | |||
2278 | *(UINT*)&AbsValue = *(UINT*)&Value & 0x7FFFFFFF; |
||
2279 | |||
2280 | Value2 = Value * AbsValue; |
||
2281 | Value3 = Value * Value2; |
||
2282 | D = (Value - Value2) / sqrtf(1.00000011921f - AbsValue); |
||
2283 | |||
2284 | AbsV = XMVectorReplicate(AbsValue); |
||
2285 | |||
2286 | V3.vector4_f32[0] = Value3; |
||
2287 | V3.vector4_f32[1] = 1.0f; |
||
2288 | V3.vector4_f32[2] = Value3; |
||
2289 | V3.vector4_f32[3] = 1.0f; |
||
2290 | |||
2291 | R1 = XMVectorSet(D, D, Value, Value); |
||
2292 | R1 = XMVectorMultiply(R1, V3); |
||
2293 | |||
2294 | R0 = XMVectorMultiplyAdd(AbsV, g_XMASinCoefficients0.v, g_XMASinCoefficients1.v); |
||
2295 | R0 = XMVectorMultiplyAdd(AbsV, R0, g_XMASinCoefficients2.v); |
||
2296 | |||
2297 | Result = XMVector4Dot(R0, R1); |
||
2298 | |||
2299 | return Result.vector4_f32[0]; |
||
2300 | |||
2301 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2302 | return asinf(Value); |
||
2303 | #else // _XM_VMX128_INTRINSICS_ |
||
2304 | #endif // _XM_VMX128_INTRINSICS_ |
||
2305 | } |
||
2306 | |||
2307 | //------------------------------------------------------------------------------ |
||
2308 | |||
2309 | XMINLINE FLOAT XMScalarACos |
||
2310 | ( |
||
2311 | FLOAT Value |
||
2312 | ) |
||
2313 | { |
||
2314 | #if defined(_XM_NO_INTRINSICS_) |
||
2315 | |||
2316 | return XM_PIDIV2 - XMScalarASin(Value); |
||
2317 | |||
2318 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2319 | return acosf(Value); |
||
2320 | #else // _XM_VMX128_INTRINSICS_ |
||
2321 | #endif // _XM_VMX128_INTRINSICS_ |
||
2322 | } |
||
2323 | |||
2324 | //------------------------------------------------------------------------------ |
||
2325 | |||
2326 | XMFINLINE FLOAT XMScalarSinEst |
||
2327 | ( |
||
2328 | FLOAT Value |
||
2329 | ) |
||
2330 | { |
||
2331 | #if defined(_XM_NO_INTRINSICS_) |
||
2332 | |||
2333 | FLOAT ValueSq; |
||
2334 | XMVECTOR V; |
||
2335 | XMVECTOR Y; |
||
2336 | XMVECTOR Result; |
||
2337 | |||
2338 | XMASSERT(Value >= -XM_PI); |
||
2339 | XMASSERT(Value < XM_PI); |
||
2340 | |||
2341 | // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI) |
||
2342 | |||
2343 | ValueSq = Value * Value; |
||
2344 | |||
2345 | V = XMVectorSet(1.0f, Value, ValueSq, ValueSq * Value); |
||
2346 | Y = XMVectorSplatY(V); |
||
2347 | V = XMVectorMultiply(V, V); |
||
2348 | V = XMVectorMultiply(V, Y); |
||
2349 | |||
2350 | Result = XMVector4Dot(V, g_XMSinEstCoefficients.v); |
||
2351 | |||
2352 | return Result.vector4_f32[0]; |
||
2353 | |||
2354 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2355 | XMASSERT(Value >= -XM_PI); |
||
2356 | XMASSERT(Value < XM_PI); |
||
2357 | float ValueSq = Value*Value; |
||
2358 | XMVECTOR vValue = _mm_set_ps1(Value); |
||
2359 | XMVECTOR vTemp = _mm_set_ps(ValueSq * Value,ValueSq,Value,1.0f); |
||
2360 | vTemp = _mm_mul_ps(vTemp,vTemp); |
||
2361 | vTemp = _mm_mul_ps(vTemp,vValue); |
||
2362 | // vTemp = Value,Value^3,Value^5,Value^7 |
||
2363 | vTemp = _mm_mul_ps(vTemp,g_XMSinEstCoefficients); |
||
2364 | vValue = _mm_shuffle_ps(vValue,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position |
||
2365 | vValue = _mm_add_ps(vValue,vTemp); // Add Z = X+Z; W = Y+W; |
||
2366 | vTemp = _mm_shuffle_ps(vTemp,vValue,_MM_SHUFFLE(0,3,0,0)); // Copy W to the Z position |
||
2367 | vTemp = _mm_add_ps(vTemp,vValue); // Add Z and W together |
||
2368 | vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2)); // Splat Z and return |
||
2369 | #if defined(_MSC_VER) && (_MSC_VER>=1500) |
||
2370 | return _mm_cvtss_f32(vTemp); |
||
2371 | #else |
||
2372 | return vTemp.m128_f32[0]; |
||
2373 | #endif |
||
2374 | #else // _XM_VMX128_INTRINSICS_ |
||
2375 | #endif // _XM_VMX128_INTRINSICS_ |
||
2376 | } |
||
2377 | |||
2378 | //------------------------------------------------------------------------------ |
||
2379 | |||
2380 | XMFINLINE FLOAT XMScalarCosEst |
||
2381 | ( |
||
2382 | FLOAT Value |
||
2383 | ) |
||
2384 | { |
||
2385 | #if defined(_XM_NO_INTRINSICS_) |
||
2386 | FLOAT ValueSq; |
||
2387 | XMVECTOR V; |
||
2388 | XMVECTOR Result; |
||
2389 | XMASSERT(Value >= -XM_PI); |
||
2390 | XMASSERT(Value < XM_PI); |
||
2391 | // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! (for -PI <= V < PI) |
||
2392 | ValueSq = Value * Value; |
||
2393 | V = XMVectorSet(1.0f, Value, ValueSq, ValueSq * Value); |
||
2394 | V = XMVectorMultiply(V, V); |
||
2395 | Result = XMVector4Dot(V, g_XMCosEstCoefficients.v); |
||
2396 | return Result.vector4_f32[0]; |
||
2397 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2398 | XMASSERT(Value >= -XM_PI); |
||
2399 | XMASSERT(Value < XM_PI); |
||
2400 | float ValueSq = Value*Value; |
||
2401 | XMVECTOR vValue = _mm_setzero_ps(); |
||
2402 | XMVECTOR vTemp = _mm_set_ps(ValueSq * Value,ValueSq,Value,1.0f); |
||
2403 | vTemp = _mm_mul_ps(vTemp,vTemp); |
||
2404 | // vTemp = 1.0f,Value^2,Value^4,Value^6 |
||
2405 | vTemp = _mm_mul_ps(vTemp,g_XMCosEstCoefficients); |
||
2406 | vValue = _mm_shuffle_ps(vValue,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position |
||
2407 | vValue = _mm_add_ps(vValue,vTemp); // Add Z = X+Z; W = Y+W; |
||
2408 | vTemp = _mm_shuffle_ps(vTemp,vValue,_MM_SHUFFLE(0,3,0,0)); // Copy W to the Z position |
||
2409 | vTemp = _mm_add_ps(vTemp,vValue); // Add Z and W together |
||
2410 | vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2)); // Splat Z and return |
||
2411 | #if defined(_MSC_VER) && (_MSC_VER>=1500) |
||
2412 | return _mm_cvtss_f32(vTemp); |
||
2413 | #else |
||
2414 | return vTemp.m128_f32[0]; |
||
2415 | #endif |
||
2416 | #else // _XM_VMX128_INTRINSICS_ |
||
2417 | #endif // _XM_VMX128_INTRINSICS_ |
||
2418 | } |
||
2419 | |||
2420 | //------------------------------------------------------------------------------ |
||
2421 | |||
2422 | XMFINLINE VOID XMScalarSinCosEst |
||
2423 | ( |
||
2424 | FLOAT* pSin, |
||
2425 | FLOAT* pCos, |
||
2426 | FLOAT Value |
||
2427 | ) |
||
2428 | { |
||
2429 | #if defined(_XM_NO_INTRINSICS_) |
||
2430 | |||
2431 | FLOAT ValueSq; |
||
2432 | XMVECTOR V, Sin, Cos; |
||
2433 | XMVECTOR Y; |
||
2434 | |||
2435 | XMASSERT(pSin); |
||
2436 | XMASSERT(pCos); |
||
2437 | XMASSERT(Value >= -XM_PI); |
||
2438 | XMASSERT(Value < XM_PI); |
||
2439 | |||
2440 | // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI) |
||
2441 | // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! (for -PI <= V < PI) |
||
2442 | |||
2443 | ValueSq = Value * Value; |
||
2444 | V = XMVectorSet(1.0f, Value, ValueSq, Value * ValueSq); |
||
2445 | Y = XMVectorSplatY(V); |
||
2446 | Cos = XMVectorMultiply(V, V); |
||
2447 | Sin = XMVectorMultiply(Cos, Y); |
||
2448 | |||
2449 | Cos = XMVector4Dot(Cos, g_XMCosEstCoefficients.v); |
||
2450 | Sin = XMVector4Dot(Sin, g_XMSinEstCoefficients.v); |
||
2451 | |||
2452 | *pCos = Cos.vector4_f32[0]; |
||
2453 | *pSin = Sin.vector4_f32[0]; |
||
2454 | |||
2455 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2456 | XMASSERT(pSin); |
||
2457 | XMASSERT(pCos); |
||
2458 | XMASSERT(Value >= -XM_PI); |
||
2459 | XMASSERT(Value < XM_PI); |
||
2460 | float ValueSq = Value * Value; |
||
2461 | XMVECTOR Cos = _mm_set_ps(Value * ValueSq,ValueSq,Value,1.0f); |
||
2462 | XMVECTOR Sin = _mm_set_ps1(Value); |
||
2463 | Cos = _mm_mul_ps(Cos,Cos); |
||
2464 | Sin = _mm_mul_ps(Sin,Cos); |
||
2465 | // Cos = 1.0f,Value^2,Value^4,Value^6 |
||
2466 | Cos = XMVector4Dot(Cos,g_XMCosEstCoefficients); |
||
2467 | _mm_store_ss(pCos,Cos); |
||
2468 | // Sin = Value,Value^3,Value^5,Value^7 |
||
2469 | Sin = XMVector4Dot(Sin, g_XMSinEstCoefficients); |
||
2470 | _mm_store_ss(pSin,Sin); |
||
2471 | #else // _XM_VMX128_INTRINSICS_ |
||
2472 | #endif // _XM_VMX128_INTRINSICS_ |
||
2473 | } |
||
2474 | |||
2475 | //------------------------------------------------------------------------------ |
||
2476 | |||
2477 | XMFINLINE FLOAT XMScalarASinEst |
||
2478 | ( |
||
2479 | FLOAT Value |
||
2480 | ) |
||
2481 | { |
||
2482 | #if defined(_XM_NO_INTRINSICS_) |
||
2483 | |||
2484 | XMVECTOR VR, CR, CS; |
||
2485 | XMVECTOR Result; |
||
2486 | FLOAT AbsV, V2, D; |
||
2487 | CONST FLOAT OnePlusEps = 1.00000011921f; |
||
2488 | |||
2489 | *(UINT*)&AbsV = *(UINT*)&Value & 0x7FFFFFFF; |
||
2490 | V2 = Value * AbsV; |
||
2491 | D = OnePlusEps - AbsV; |
||
2492 | |||
2493 | CS = XMVectorSet(Value, 1.0f, 1.0f, V2); |
||
2494 | VR = XMVectorSet(sqrtf(D), Value, V2, D * AbsV); |
||
2495 | CR = XMVectorMultiply(CS, g_XMASinEstCoefficients.v); |
||
2496 | |||
2497 | Result = XMVector4Dot(VR, CR); |
||
2498 | |||
2499 | return Result.vector4_f32[0]; |
||
2500 | |||
2501 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2502 | CONST FLOAT OnePlusEps = 1.00000011921f; |
||
2503 | FLOAT AbsV = fabsf(Value); |
||
2504 | FLOAT V2 = Value * AbsV; // Square with sign retained |
||
2505 | FLOAT D = OnePlusEps - AbsV; |
||
2506 | |||
2507 | XMVECTOR Result = _mm_set_ps(V2,1.0f,1.0f,Value); |
||
2508 | XMVECTOR VR = _mm_set_ps(D * AbsV,V2,Value,sqrtf(D)); |
||
2509 | Result = _mm_mul_ps(Result, g_XMASinEstCoefficients); |
||
2510 | Result = XMVector4Dot(VR,Result); |
||
2511 | #if defined(_MSC_VER) && (_MSC_VER>=1500) |
||
2512 | return _mm_cvtss_f32(Result); |
||
2513 | #else |
||
2514 | return Result.m128_f32[0]; |
||
2515 | #endif |
||
2516 | #else // _XM_VMX128_INTRINSICS_ |
||
2517 | #endif // _XM_VMX128_INTRINSICS_ |
||
2518 | } |
||
2519 | |||
2520 | //------------------------------------------------------------------------------ |
||
2521 | |||
2522 | XMFINLINE FLOAT XMScalarACosEst |
||
2523 | ( |
||
2524 | FLOAT Value |
||
2525 | ) |
||
2526 | { |
||
2527 | #if defined(_XM_NO_INTRINSICS_) |
||
2528 | |||
2529 | XMVECTOR VR, CR, CS; |
||
2530 | XMVECTOR Result; |
||
2531 | FLOAT AbsV, V2, D; |
||
2532 | CONST FLOAT OnePlusEps = 1.00000011921f; |
||
2533 | |||
2534 | // return XM_PIDIV2 - XMScalarASin(Value); |
||
2535 | |||
2536 | *(UINT*)&AbsV = *(UINT*)&Value & 0x7FFFFFFF; |
||
2537 | V2 = Value * AbsV; |
||
2538 | D = OnePlusEps - AbsV; |
||
2539 | |||
2540 | CS = XMVectorSet(Value, 1.0f, 1.0f, V2); |
||
2541 | VR = XMVectorSet(sqrtf(D), Value, V2, D * AbsV); |
||
2542 | CR = XMVectorMultiply(CS, g_XMASinEstCoefficients.v); |
||
2543 | |||
2544 | Result = XMVector4Dot(VR, CR); |
||
2545 | |||
2546 | return XM_PIDIV2 - Result.vector4_f32[0]; |
||
2547 | |||
2548 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2549 | CONST FLOAT OnePlusEps = 1.00000011921f; |
||
2550 | FLOAT AbsV = fabsf(Value); |
||
2551 | FLOAT V2 = Value * AbsV; // Value^2 retaining sign |
||
2552 | FLOAT D = OnePlusEps - AbsV; |
||
2553 | XMVECTOR Result = _mm_set_ps(V2,1.0f,1.0f,Value); |
||
2554 | XMVECTOR VR = _mm_set_ps(D * AbsV,V2,Value,sqrtf(D)); |
||
2555 | Result = _mm_mul_ps(Result,g_XMASinEstCoefficients); |
||
2556 | Result = XMVector4Dot(VR,Result); |
||
2557 | #if defined(_MSC_VER) && (_MSC_VER>=1500) |
||
2558 | return XM_PIDIV2 - _mm_cvtss_f32(Result); |
||
2559 | #else |
||
2560 | return XM_PIDIV2 - Result.m128_f32[0]; |
||
2561 | #endif |
||
2562 | #else // _XM_VMX128_INTRINSICS_ |
||
2563 | #endif // _XM_VMX128_INTRINSICS_ |
||
2564 | } |
||
2565 | |||
2566 | #endif // __XNAMATHMISC_INL__ |
||
2567 |