Details | Last modification | View Log | RSS feed
| Rev | Author | Line No. | Line |
|---|---|---|---|
| 1 | pmbaty | 1 | /*++ |
| 2 | |||
| 3 | Copyright (c) Microsoft Corporation. All rights reserved. |
||
| 4 | |||
| 5 | Module Name: |
||
| 6 | |||
| 7 | xnamathmisc.inl |
||
| 8 | |||
| 9 | Abstract: |
||
| 10 | |||
| 11 | XNA math library for Windows and Xbox 360: Quaternion, plane, and color functions. |
||
| 12 | --*/ |
||
| 13 | |||
| 14 | #if defined(_MSC_VER) && (_MSC_VER > 1000) |
||
| 15 | #pragma once |
||
| 16 | #endif |
||
| 17 | |||
| 18 | #ifndef __XNAMATHMISC_INL__ |
||
| 19 | #define __XNAMATHMISC_INL__ |
||
| 20 | |||
| 21 | /**************************************************************************** |
||
| 22 | * |
||
| 23 | * Quaternion |
||
| 24 | * |
||
| 25 | ****************************************************************************/ |
||
| 26 | |||
| 27 | //------------------------------------------------------------------------------ |
||
| 28 | // Comparison operations |
||
| 29 | //------------------------------------------------------------------------------ |
||
| 30 | |||
| 31 | //------------------------------------------------------------------------------ |
||
| 32 | |||
| 33 | XMFINLINE BOOL XMQuaternionEqual |
||
| 34 | ( |
||
| 35 | FXMVECTOR Q1, |
||
| 36 | FXMVECTOR Q2 |
||
| 37 | ) |
||
| 38 | { |
||
| 39 | return XMVector4Equal(Q1, Q2); |
||
| 40 | } |
||
| 41 | |||
| 42 | //------------------------------------------------------------------------------ |
||
| 43 | |||
| 44 | XMFINLINE BOOL XMQuaternionNotEqual |
||
| 45 | ( |
||
| 46 | FXMVECTOR Q1, |
||
| 47 | FXMVECTOR Q2 |
||
| 48 | ) |
||
| 49 | { |
||
| 50 | return XMVector4NotEqual(Q1, Q2); |
||
| 51 | } |
||
| 52 | |||
| 53 | //------------------------------------------------------------------------------ |
||
| 54 | |||
| 55 | XMFINLINE BOOL XMQuaternionIsNaN |
||
| 56 | ( |
||
| 57 | FXMVECTOR Q |
||
| 58 | ) |
||
| 59 | { |
||
| 60 | return XMVector4IsNaN(Q); |
||
| 61 | } |
||
| 62 | |||
| 63 | //------------------------------------------------------------------------------ |
||
| 64 | |||
| 65 | XMFINLINE BOOL XMQuaternionIsInfinite |
||
| 66 | ( |
||
| 67 | FXMVECTOR Q |
||
| 68 | ) |
||
| 69 | { |
||
| 70 | return XMVector4IsInfinite(Q); |
||
| 71 | } |
||
| 72 | |||
| 73 | //------------------------------------------------------------------------------ |
||
| 74 | |||
| 75 | XMFINLINE BOOL XMQuaternionIsIdentity |
||
| 76 | ( |
||
| 77 | FXMVECTOR Q |
||
| 78 | ) |
||
| 79 | { |
||
| 80 | #if defined(_XM_NO_INTRINSICS_) |
||
| 81 | |||
| 82 | return XMVector4Equal(Q, g_XMIdentityR3.v); |
||
| 83 | |||
| 84 | #elif defined(_XM_SSE_INTRINSICS_) |
||
| 85 | XMVECTOR vTemp = _mm_cmpeq_ps(Q,g_XMIdentityR3); |
||
| 86 | return (_mm_movemask_ps(vTemp)==0x0f) ? true : false; |
||
| 87 | #else // _XM_VMX128_INTRINSICS_ |
||
| 88 | #endif // _XM_VMX128_INTRINSICS_ |
||
| 89 | } |
||
| 90 | |||
| 91 | //------------------------------------------------------------------------------ |
||
| 92 | // Computation operations |
||
| 93 | //------------------------------------------------------------------------------ |
||
| 94 | |||
| 95 | //------------------------------------------------------------------------------ |
||
| 96 | |||
| 97 | XMFINLINE XMVECTOR XMQuaternionDot |
||
| 98 | ( |
||
| 99 | FXMVECTOR Q1, |
||
| 100 | FXMVECTOR Q2 |
||
| 101 | ) |
||
| 102 | { |
||
| 103 | return XMVector4Dot(Q1, Q2); |
||
| 104 | } |
||
| 105 | |||
| 106 | //------------------------------------------------------------------------------ |
||
| 107 | |||
| 108 | XMFINLINE XMVECTOR XMQuaternionMultiply |
||
| 109 | ( |
||
| 110 | FXMVECTOR Q1, |
||
| 111 | FXMVECTOR Q2 |
||
| 112 | ) |
||
| 113 | { |
||
| 114 | #if defined(_XM_NO_INTRINSICS_) |
||
| 115 | |||
| 116 | XMVECTOR NegativeQ1; |
||
| 117 | XMVECTOR Q2X; |
||
| 118 | XMVECTOR Q2Y; |
||
| 119 | XMVECTOR Q2Z; |
||
| 120 | XMVECTOR Q2W; |
||
| 121 | XMVECTOR Q1WZYX; |
||
| 122 | XMVECTOR Q1ZWXY; |
||
| 123 | XMVECTOR Q1YXWZ; |
||
| 124 | XMVECTOR Result; |
||
| 125 | CONST XMVECTORU32 ControlWZYX = {XM_PERMUTE_0W, XM_PERMUTE_1Z, XM_PERMUTE_0Y, XM_PERMUTE_1X}; |
||
| 126 | CONST XMVECTORU32 ControlZWXY = {XM_PERMUTE_0Z, XM_PERMUTE_0W, XM_PERMUTE_1X, XM_PERMUTE_1Y}; |
||
| 127 | CONST XMVECTORU32 ControlYXWZ = {XM_PERMUTE_1Y, XM_PERMUTE_0X, XM_PERMUTE_0W, XM_PERMUTE_1Z}; |
||
| 128 | |||
| 129 | NegativeQ1 = XMVectorNegate(Q1); |
||
| 130 | |||
| 131 | Q2W = XMVectorSplatW(Q2); |
||
| 132 | Q2X = XMVectorSplatX(Q2); |
||
| 133 | Q2Y = XMVectorSplatY(Q2); |
||
| 134 | Q2Z = XMVectorSplatZ(Q2); |
||
| 135 | |||
| 136 | Q1WZYX = XMVectorPermute(Q1, NegativeQ1, ControlWZYX.v); |
||
| 137 | Q1ZWXY = XMVectorPermute(Q1, NegativeQ1, ControlZWXY.v); |
||
| 138 | Q1YXWZ = XMVectorPermute(Q1, NegativeQ1, ControlYXWZ.v); |
||
| 139 | |||
| 140 | Result = XMVectorMultiply(Q1, Q2W); |
||
| 141 | Result = XMVectorMultiplyAdd(Q1WZYX, Q2X, Result); |
||
| 142 | Result = XMVectorMultiplyAdd(Q1ZWXY, Q2Y, Result); |
||
| 143 | Result = XMVectorMultiplyAdd(Q1YXWZ, Q2Z, Result); |
||
| 144 | |||
| 145 | return Result; |
||
| 146 | |||
| 147 | #elif defined(_XM_SSE_INTRINSICS_) |
||
| 148 | static CONST XMVECTORF32 ControlWZYX = { 1.0f,-1.0f, 1.0f,-1.0f}; |
||
| 149 | static CONST XMVECTORF32 ControlZWXY = { 1.0f, 1.0f,-1.0f,-1.0f}; |
||
| 150 | static CONST XMVECTORF32 ControlYXWZ = {-1.0f, 1.0f, 1.0f,-1.0f}; |
||
| 151 | // Copy to SSE registers and use as few as possible for x86 |
||
| 152 | XMVECTOR Q2X = Q2; |
||
| 153 | XMVECTOR Q2Y = Q2; |
||
| 154 | XMVECTOR Q2Z = Q2; |
||
| 155 | XMVECTOR vResult = Q2; |
||
| 156 | // Splat with one instruction |
||
| 157 | vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,3,3,3)); |
||
| 158 | Q2X = _mm_shuffle_ps(Q2X,Q2X,_MM_SHUFFLE(0,0,0,0)); |
||
| 159 | Q2Y = _mm_shuffle_ps(Q2Y,Q2Y,_MM_SHUFFLE(1,1,1,1)); |
||
| 160 | Q2Z = _mm_shuffle_ps(Q2Z,Q2Z,_MM_SHUFFLE(2,2,2,2)); |
||
| 161 | // Retire Q1 and perform Q1*Q2W |
||
| 162 | vResult = _mm_mul_ps(vResult,Q1); |
||
| 163 | XMVECTOR Q1Shuffle = Q1; |
||
| 164 | // Shuffle the copies of Q1 |
||
| 165 | Q1Shuffle = _mm_shuffle_ps(Q1Shuffle,Q1Shuffle,_MM_SHUFFLE(0,1,2,3)); |
||
| 166 | // Mul by Q1WZYX |
||
| 167 | Q2X = _mm_mul_ps(Q2X,Q1Shuffle); |
||
| 168 | Q1Shuffle = _mm_shuffle_ps(Q1Shuffle,Q1Shuffle,_MM_SHUFFLE(2,3,0,1)); |
||
| 169 | // Flip the signs on y and z |
||
| 170 | Q2X = _mm_mul_ps(Q2X,ControlWZYX); |
||
| 171 | // Mul by Q1ZWXY |
||
| 172 | Q2Y = _mm_mul_ps(Q2Y,Q1Shuffle); |
||
| 173 | Q1Shuffle = _mm_shuffle_ps(Q1Shuffle,Q1Shuffle,_MM_SHUFFLE(0,1,2,3)); |
||
| 174 | // Flip the signs on z and w |
||
| 175 | Q2Y = _mm_mul_ps(Q2Y,ControlZWXY); |
||
| 176 | // Mul by Q1YXWZ |
||
| 177 | Q2Z = _mm_mul_ps(Q2Z,Q1Shuffle); |
||
| 178 | vResult = _mm_add_ps(vResult,Q2X); |
||
| 179 | // Flip the signs on x and w |
||
| 180 | Q2Z = _mm_mul_ps(Q2Z,ControlYXWZ); |
||
| 181 | Q2Y = _mm_add_ps(Q2Y,Q2Z); |
||
| 182 | vResult = _mm_add_ps(vResult,Q2Y); |
||
| 183 | return vResult; |
||
| 184 | #else // _XM_VMX128_INTRINSICS_ |
||
| 185 | #endif // _XM_VMX128_INTRINSICS_ |
||
| 186 | } |
||
| 187 | |||
| 188 | //------------------------------------------------------------------------------ |
||
| 189 | |||
| 190 | XMFINLINE XMVECTOR XMQuaternionLengthSq |
||
| 191 | ( |
||
| 192 | FXMVECTOR Q |
||
| 193 | ) |
||
| 194 | { |
||
| 195 | return XMVector4LengthSq(Q); |
||
| 196 | } |
||
| 197 | |||
| 198 | //------------------------------------------------------------------------------ |
||
| 199 | |||
| 200 | XMFINLINE XMVECTOR XMQuaternionReciprocalLength |
||
| 201 | ( |
||
| 202 | FXMVECTOR Q |
||
| 203 | ) |
||
| 204 | { |
||
| 205 | return XMVector4ReciprocalLength(Q); |
||
| 206 | } |
||
| 207 | |||
| 208 | //------------------------------------------------------------------------------ |
||
| 209 | |||
| 210 | XMFINLINE XMVECTOR XMQuaternionLength |
||
| 211 | ( |
||
| 212 | FXMVECTOR Q |
||
| 213 | ) |
||
| 214 | { |
||
| 215 | return XMVector4Length(Q); |
||
| 216 | } |
||
| 217 | |||
| 218 | //------------------------------------------------------------------------------ |
||
| 219 | |||
| 220 | XMFINLINE XMVECTOR XMQuaternionNormalizeEst |
||
| 221 | ( |
||
| 222 | FXMVECTOR Q |
||
| 223 | ) |
||
| 224 | { |
||
| 225 | return XMVector4NormalizeEst(Q); |
||
| 226 | } |
||
| 227 | |||
| 228 | //------------------------------------------------------------------------------ |
||
| 229 | |||
| 230 | XMFINLINE XMVECTOR XMQuaternionNormalize |
||
| 231 | ( |
||
| 232 | FXMVECTOR Q |
||
| 233 | ) |
||
| 234 | { |
||
| 235 | return XMVector4Normalize(Q); |
||
| 236 | } |
||
| 237 | |||
| 238 | //------------------------------------------------------------------------------ |
||
| 239 | |||
| 240 | XMFINLINE XMVECTOR XMQuaternionConjugate |
||
| 241 | ( |
||
| 242 | FXMVECTOR Q |
||
| 243 | ) |
||
| 244 | { |
||
| 245 | #if defined(_XM_NO_INTRINSICS_) |
||
| 246 | |||
| 247 | XMVECTOR Result = { |
||
| 248 | -Q.vector4_f32[0], |
||
| 249 | -Q.vector4_f32[1], |
||
| 250 | -Q.vector4_f32[2], |
||
| 251 | Q.vector4_f32[3] |
||
| 252 | }; |
||
| 253 | return Result; |
||
| 254 | #elif defined(_XM_SSE_INTRINSICS_) |
||
| 255 | static const XMVECTORF32 NegativeOne3 = {-1.0f,-1.0f,-1.0f,1.0f}; |
||
| 256 | XMVECTOR Result = _mm_mul_ps(Q,NegativeOne3); |
||
| 257 | return Result; |
||
| 258 | #else // _XM_VMX128_INTRINSICS_ |
||
| 259 | #endif // _XM_VMX128_INTRINSICS_ |
||
| 260 | } |
||
| 261 | |||
| 262 | //------------------------------------------------------------------------------ |
||
| 263 | |||
| 264 | XMFINLINE XMVECTOR XMQuaternionInverse |
||
| 265 | ( |
||
| 266 | FXMVECTOR Q |
||
| 267 | ) |
||
| 268 | { |
||
| 269 | #if defined(_XM_NO_INTRINSICS_) |
||
| 270 | |||
| 271 | XMVECTOR Conjugate; |
||
| 272 | XMVECTOR L; |
||
| 273 | XMVECTOR Control; |
||
| 274 | XMVECTOR Result; |
||
| 275 | CONST XMVECTOR Zero = XMVectorZero(); |
||
| 276 | |||
| 277 | L = XMVector4LengthSq(Q); |
||
| 278 | Conjugate = XMQuaternionConjugate(Q); |
||
| 279 | |||
| 280 | Control = XMVectorLessOrEqual(L, g_XMEpsilon.v); |
||
| 281 | |||
| 282 | L = XMVectorReciprocal(L); |
||
| 283 | Result = XMVectorMultiply(Conjugate, L); |
||
| 284 | |||
| 285 | Result = XMVectorSelect(Result, Zero, Control); |
||
| 286 | |||
| 287 | return Result; |
||
| 288 | |||
| 289 | #elif defined(_XM_SSE_INTRINSICS_) |
||
| 290 | XMVECTOR Conjugate; |
||
| 291 | XMVECTOR L; |
||
| 292 | XMVECTOR Control; |
||
| 293 | XMVECTOR Result; |
||
| 294 | XMVECTOR Zero = XMVectorZero(); |
||
| 295 | |||
| 296 | L = XMVector4LengthSq(Q); |
||
| 297 | Conjugate = XMQuaternionConjugate(Q); |
||
| 298 | Control = XMVectorLessOrEqual(L, g_XMEpsilon); |
||
| 299 | Result = _mm_div_ps(Conjugate,L); |
||
| 300 | Result = XMVectorSelect(Result, Zero, Control); |
||
| 301 | return Result; |
||
| 302 | #else // _XM_VMX128_INTRINSICS_ |
||
| 303 | #endif // _XM_VMX128_INTRINSICS_ |
||
| 304 | } |
||
| 305 | |||
| 306 | //------------------------------------------------------------------------------ |
||
| 307 | |||
| 308 | XMFINLINE XMVECTOR XMQuaternionLn |
||
| 309 | ( |
||
| 310 | FXMVECTOR Q |
||
| 311 | ) |
||
| 312 | { |
||
| 313 | #if defined(_XM_NO_INTRINSICS_) |
||
| 314 | |||
| 315 | XMVECTOR Q0; |
||
| 316 | XMVECTOR QW; |
||
| 317 | XMVECTOR Theta; |
||
| 318 | XMVECTOR SinTheta; |
||
| 319 | XMVECTOR S; |
||
| 320 | XMVECTOR ControlW; |
||
| 321 | XMVECTOR Result; |
||
| 322 | static CONST XMVECTOR OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f}; |
||
| 323 | |||
| 324 | QW = XMVectorSplatW(Q); |
||
| 325 | Q0 = XMVectorSelect(g_XMSelect1110.v, Q, g_XMSelect1110.v); |
||
| 326 | |||
| 327 | ControlW = XMVectorInBounds(QW, OneMinusEpsilon); |
||
| 328 | |||
| 329 | Theta = XMVectorACos(QW); |
||
| 330 | SinTheta = XMVectorSin(Theta); |
||
| 331 | |||
| 332 | S = XMVectorReciprocal(SinTheta); |
||
| 333 | S = XMVectorMultiply(Theta, S); |
||
| 334 | |||
| 335 | Result = XMVectorMultiply(Q0, S); |
||
| 336 | |||
| 337 | Result = XMVectorSelect(Q0, Result, ControlW); |
||
| 338 | |||
| 339 | return Result; |
||
| 340 | |||
| 341 | #elif defined(_XM_SSE_INTRINSICS_) |
||
| 342 | static CONST XMVECTORF32 OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f}; |
||
| 343 | static CONST XMVECTORF32 NegOneMinusEpsilon = {-(1.0f - 0.00001f), -(1.0f - 0.00001f),-(1.0f - 0.00001f),-(1.0f - 0.00001f)}; |
||
| 344 | // Get W only |
||
| 345 | XMVECTOR QW = _mm_shuffle_ps(Q,Q,_MM_SHUFFLE(3,3,3,3)); |
||
| 346 | // W = 0 |
||
| 347 | XMVECTOR Q0 = _mm_and_ps(Q,g_XMMask3); |
||
| 348 | // Use W if within bounds |
||
| 349 | XMVECTOR ControlW = _mm_cmple_ps(QW,OneMinusEpsilon); |
||
| 350 | XMVECTOR vTemp2 = _mm_cmpge_ps(QW,NegOneMinusEpsilon); |
||
| 351 | ControlW = _mm_and_ps(ControlW,vTemp2); |
||
| 352 | // Get theta |
||
| 353 | XMVECTOR vTheta = XMVectorACos(QW); |
||
| 354 | // Get Sine of theta |
||
| 355 | vTemp2 = XMVectorSin(vTheta); |
||
| 356 | // theta/sine of theta |
||
| 357 | vTheta = _mm_div_ps(vTheta,vTemp2); |
||
| 358 | // Here's the answer |
||
| 359 | vTheta = _mm_mul_ps(vTheta,Q0); |
||
| 360 | // Was W in bounds? If not, return input as is |
||
| 361 | vTheta = XMVectorSelect(Q0,vTheta,ControlW); |
||
| 362 | return vTheta; |
||
| 363 | #else // _XM_VMX128_INTRINSICS_ |
||
| 364 | #endif // _XM_VMX128_INTRINSICS_ |
||
| 365 | } |
||
| 366 | |||
| 367 | //------------------------------------------------------------------------------ |
||
| 368 | |||
| 369 | XMFINLINE XMVECTOR XMQuaternionExp |
||
| 370 | ( |
||
| 371 | FXMVECTOR Q |
||
| 372 | ) |
||
| 373 | { |
||
| 374 | #if defined(_XM_NO_INTRINSICS_) |
||
| 375 | |||
| 376 | XMVECTOR Theta; |
||
| 377 | XMVECTOR SinTheta; |
||
| 378 | XMVECTOR CosTheta; |
||
| 379 | XMVECTOR S; |
||
| 380 | XMVECTOR Control; |
||
| 381 | XMVECTOR Zero; |
||
| 382 | XMVECTOR Result; |
||
| 383 | |||
| 384 | Theta = XMVector3Length(Q); |
||
| 385 | XMVectorSinCos(&SinTheta, &CosTheta, Theta); |
||
| 386 | |||
| 387 | S = XMVectorReciprocal(Theta); |
||
| 388 | S = XMVectorMultiply(SinTheta, S); |
||
| 389 | |||
| 390 | Result = XMVectorMultiply(Q, S); |
||
| 391 | |||
| 392 | Zero = XMVectorZero(); |
||
| 393 | Control = XMVectorNearEqual(Theta, Zero, g_XMEpsilon.v); |
||
| 394 | Result = XMVectorSelect(Result, Q, Control); |
||
| 395 | |||
| 396 | Result = XMVectorSelect(CosTheta, Result, g_XMSelect1110.v); |
||
| 397 | |||
| 398 | return Result; |
||
| 399 | |||
| 400 | #elif defined(_XM_SSE_INTRINSICS_) |
||
| 401 | XMVECTOR Theta; |
||
| 402 | XMVECTOR SinTheta; |
||
| 403 | XMVECTOR CosTheta; |
||
| 404 | XMVECTOR S; |
||
| 405 | XMVECTOR Control; |
||
| 406 | XMVECTOR Zero; |
||
| 407 | XMVECTOR Result; |
||
| 408 | Theta = XMVector3Length(Q); |
||
| 409 | XMVectorSinCos(&SinTheta, &CosTheta, Theta); |
||
| 410 | S = _mm_div_ps(SinTheta,Theta); |
||
| 411 | Result = _mm_mul_ps(Q, S); |
||
| 412 | Zero = XMVectorZero(); |
||
| 413 | Control = XMVectorNearEqual(Theta, Zero, g_XMEpsilon); |
||
| 414 | Result = XMVectorSelect(Result,Q,Control); |
||
| 415 | Result = _mm_and_ps(Result,g_XMMask3); |
||
| 416 | CosTheta = _mm_and_ps(CosTheta,g_XMMaskW); |
||
| 417 | Result = _mm_or_ps(Result,CosTheta); |
||
| 418 | return Result; |
||
| 419 | #else // _XM_VMX128_INTRINSICS_ |
||
| 420 | #endif // _XM_VMX128_INTRINSICS_ |
||
| 421 | } |
||
| 422 | |||
| 423 | //------------------------------------------------------------------------------ |
||
| 424 | |||
| 425 | XMINLINE XMVECTOR XMQuaternionSlerp |
||
| 426 | ( |
||
| 427 | FXMVECTOR Q0, |
||
| 428 | FXMVECTOR Q1, |
||
| 429 | FLOAT t |
||
| 430 | ) |
||
| 431 | { |
||
| 432 | XMVECTOR T = XMVectorReplicate(t); |
||
| 433 | return XMQuaternionSlerpV(Q0, Q1, T); |
||
| 434 | } |
||
| 435 | |||
| 436 | //------------------------------------------------------------------------------ |
||
| 437 | |||
| 438 | XMINLINE XMVECTOR XMQuaternionSlerpV |
||
| 439 | ( |
||
| 440 | FXMVECTOR Q0, |
||
| 441 | FXMVECTOR Q1, |
||
| 442 | FXMVECTOR T |
||
| 443 | ) |
||
| 444 | { |
||
| 445 | #if defined(_XM_NO_INTRINSICS_) |
||
| 446 | |||
| 447 | // Result = Q0 * sin((1.0 - t) * Omega) / sin(Omega) + Q1 * sin(t * Omega) / sin(Omega) |
||
| 448 | XMVECTOR Omega; |
||
| 449 | XMVECTOR CosOmega; |
||
| 450 | XMVECTOR SinOmega; |
||
| 451 | XMVECTOR InvSinOmega; |
||
| 452 | XMVECTOR V01; |
||
| 453 | XMVECTOR C1000; |
||
| 454 | XMVECTOR SignMask; |
||
| 455 | XMVECTOR S0; |
||
| 456 | XMVECTOR S1; |
||
| 457 | XMVECTOR Sign; |
||
| 458 | XMVECTOR Control; |
||
| 459 | XMVECTOR Result; |
||
| 460 | XMVECTOR Zero; |
||
| 461 | CONST XMVECTOR OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f}; |
||
| 462 | |||
| 463 | XMASSERT((T.vector4_f32[1] == T.vector4_f32[0]) && (T.vector4_f32[2] == T.vector4_f32[0]) && (T.vector4_f32[3] == T.vector4_f32[0])); |
||
| 464 | |||
| 465 | CosOmega = XMQuaternionDot(Q0, Q1); |
||
| 466 | |||
| 467 | Zero = XMVectorZero(); |
||
| 468 | Control = XMVectorLess(CosOmega, Zero); |
||
| 469 | Sign = XMVectorSelect(g_XMOne.v, g_XMNegativeOne.v, Control); |
||
| 470 | |||
| 471 | CosOmega = XMVectorMultiply(CosOmega, Sign); |
||
| 472 | |||
| 473 | Control = XMVectorLess(CosOmega, OneMinusEpsilon); |
||
| 474 | |||
| 475 | SinOmega = XMVectorNegativeMultiplySubtract(CosOmega, CosOmega, g_XMOne.v); |
||
| 476 | SinOmega = XMVectorSqrt(SinOmega); |
||
| 477 | |||
| 478 | Omega = XMVectorATan2(SinOmega, CosOmega); |
||
| 479 | |||
| 480 | SignMask = XMVectorSplatSignMask(); |
||
| 481 | C1000 = XMVectorSetBinaryConstant(1, 0, 0, 0); |
||
| 482 | V01 = XMVectorShiftLeft(T, Zero, 2); |
||
| 483 | SignMask = XMVectorShiftLeft(SignMask, Zero, 3); |
||
| 484 | V01 = XMVectorXorInt(V01, SignMask); |
||
| 485 | V01 = XMVectorAdd(C1000, V01); |
||
| 486 | |||
| 487 | InvSinOmega = XMVectorReciprocal(SinOmega); |
||
| 488 | |||
| 489 | S0 = XMVectorMultiply(V01, Omega); |
||
| 490 | S0 = XMVectorSin(S0); |
||
| 491 | S0 = XMVectorMultiply(S0, InvSinOmega); |
||
| 492 | |||
| 493 | S0 = XMVectorSelect(V01, S0, Control); |
||
| 494 | |||
| 495 | S1 = XMVectorSplatY(S0); |
||
| 496 | S0 = XMVectorSplatX(S0); |
||
| 497 | |||
| 498 | S1 = XMVectorMultiply(S1, Sign); |
||
| 499 | |||
| 500 | Result = XMVectorMultiply(Q0, S0); |
||
| 501 | Result = XMVectorMultiplyAdd(Q1, S1, Result); |
||
| 502 | |||
| 503 | return Result; |
||
| 504 | |||
| 505 | #elif defined(_XM_SSE_INTRINSICS_) |
||
| 506 | // Result = Q0 * sin((1.0 - t) * Omega) / sin(Omega) + Q1 * sin(t * Omega) / sin(Omega) |
||
| 507 | XMVECTOR Omega; |
||
| 508 | XMVECTOR CosOmega; |
||
| 509 | XMVECTOR SinOmega; |
||
| 510 | XMVECTOR V01; |
||
| 511 | XMVECTOR S0; |
||
| 512 | XMVECTOR S1; |
||
| 513 | XMVECTOR Sign; |
||
| 514 | XMVECTOR Control; |
||
| 515 | XMVECTOR Result; |
||
| 516 | XMVECTOR Zero; |
||
| 517 | static const XMVECTORF32 OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f}; |
||
| 518 | static const XMVECTORI32 SignMask2 = {0x80000000,0x00000000,0x00000000,0x00000000}; |
||
| 519 | static const XMVECTORI32 MaskXY = {0xFFFFFFFF,0xFFFFFFFF,0x00000000,0x00000000}; |
||
| 520 | |||
| 521 | XMASSERT((XMVectorGetY(T) == XMVectorGetX(T)) && (XMVectorGetZ(T) == XMVectorGetX(T)) && (XMVectorGetW(T) == XMVectorGetX(T))); |
||
| 522 | |||
| 523 | CosOmega = XMQuaternionDot(Q0, Q1); |
||
| 524 | |||
| 525 | Zero = XMVectorZero(); |
||
| 526 | Control = XMVectorLess(CosOmega, Zero); |
||
| 527 | Sign = XMVectorSelect(g_XMOne, g_XMNegativeOne, Control); |
||
| 528 | |||
| 529 | CosOmega = _mm_mul_ps(CosOmega, Sign); |
||
| 530 | |||
| 531 | Control = XMVectorLess(CosOmega, OneMinusEpsilon); |
||
| 532 | |||
| 533 | SinOmega = _mm_mul_ps(CosOmega,CosOmega); |
||
| 534 | SinOmega = _mm_sub_ps(g_XMOne,SinOmega); |
||
| 535 | SinOmega = _mm_sqrt_ps(SinOmega); |
||
| 536 | |||
| 537 | Omega = XMVectorATan2(SinOmega, CosOmega); |
||
| 538 | |||
| 539 | V01 = _mm_shuffle_ps(T,T,_MM_SHUFFLE(2,3,0,1)); |
||
| 540 | V01 = _mm_and_ps(V01,MaskXY); |
||
| 541 | V01 = _mm_xor_ps(V01,SignMask2); |
||
| 542 | V01 = _mm_add_ps(g_XMIdentityR0, V01); |
||
| 543 | |||
| 544 | S0 = _mm_mul_ps(V01, Omega); |
||
| 545 | S0 = XMVectorSin(S0); |
||
| 546 | S0 = _mm_div_ps(S0, SinOmega); |
||
| 547 | |||
| 548 | S0 = XMVectorSelect(V01, S0, Control); |
||
| 549 | |||
| 550 | S1 = XMVectorSplatY(S0); |
||
| 551 | S0 = XMVectorSplatX(S0); |
||
| 552 | |||
| 553 | S1 = _mm_mul_ps(S1, Sign); |
||
| 554 | Result = _mm_mul_ps(Q0, S0); |
||
| 555 | S1 = _mm_mul_ps(S1, Q1); |
||
| 556 | Result = _mm_add_ps(Result,S1); |
||
| 557 | return Result; |
||
| 558 | #else // _XM_VMX128_INTRINSICS_ |
||
| 559 | #endif // _XM_VMX128_INTRINSICS_ |
||
| 560 | } |
||
| 561 | |||
| 562 | //------------------------------------------------------------------------------ |
||
| 563 | |||
| 564 | XMFINLINE XMVECTOR XMQuaternionSquad |
||
| 565 | ( |
||
| 566 | FXMVECTOR Q0, |
||
| 567 | FXMVECTOR Q1, |
||
| 568 | FXMVECTOR Q2, |
||
| 569 | CXMVECTOR Q3, |
||
| 570 | FLOAT t |
||
| 571 | ) |
||
| 572 | { |
||
| 573 | XMVECTOR T = XMVectorReplicate(t); |
||
| 574 | return XMQuaternionSquadV(Q0, Q1, Q2, Q3, T); |
||
| 575 | } |
||
| 576 | |||
| 577 | //------------------------------------------------------------------------------ |
||
| 578 | |||
| 579 | XMFINLINE XMVECTOR XMQuaternionSquadV |
||
| 580 | ( |
||
| 581 | FXMVECTOR Q0, |
||
| 582 | FXMVECTOR Q1, |
||
| 583 | FXMVECTOR Q2, |
||
| 584 | CXMVECTOR Q3, |
||
| 585 | CXMVECTOR T |
||
| 586 | ) |
||
| 587 | { |
||
| 588 | XMVECTOR Q03; |
||
| 589 | XMVECTOR Q12; |
||
| 590 | XMVECTOR TP; |
||
| 591 | XMVECTOR Two; |
||
| 592 | XMVECTOR Result; |
||
| 593 | |||
| 594 | XMASSERT( (XMVectorGetY(T) == XMVectorGetX(T)) && (XMVectorGetZ(T) == XMVectorGetX(T)) && (XMVectorGetW(T) == XMVectorGetX(T)) ); |
||
| 595 | |||
| 596 | TP = T; |
||
| 597 | Two = XMVectorSplatConstant(2, 0); |
||
| 598 | |||
| 599 | Q03 = XMQuaternionSlerpV(Q0, Q3, T); |
||
| 600 | Q12 = XMQuaternionSlerpV(Q1, Q2, T); |
||
| 601 | |||
| 602 | TP = XMVectorNegativeMultiplySubtract(TP, TP, TP); |
||
| 603 | TP = XMVectorMultiply(TP, Two); |
||
| 604 | |||
| 605 | Result = XMQuaternionSlerpV(Q03, Q12, TP); |
||
| 606 | |||
| 607 | return Result; |
||
| 608 | |||
| 609 | } |
||
| 610 | |||
| 611 | //------------------------------------------------------------------------------ |
||
| 612 | |||
| 613 | XMINLINE VOID XMQuaternionSquadSetup |
||
| 614 | ( |
||
| 615 | XMVECTOR* pA, |
||
| 616 | XMVECTOR* pB, |
||
| 617 | XMVECTOR* pC, |
||
| 618 | FXMVECTOR Q0, |
||
| 619 | FXMVECTOR Q1, |
||
| 620 | FXMVECTOR Q2, |
||
| 621 | CXMVECTOR Q3 |
||
| 622 | ) |
||
| 623 | { |
||
| 624 | XMVECTOR SQ0, SQ2, SQ3; |
||
| 625 | XMVECTOR InvQ1, InvQ2; |
||
| 626 | XMVECTOR LnQ0, LnQ1, LnQ2, LnQ3; |
||
| 627 | XMVECTOR ExpQ02, ExpQ13; |
||
| 628 | XMVECTOR LS01, LS12, LS23; |
||
| 629 | XMVECTOR LD01, LD12, LD23; |
||
| 630 | XMVECTOR Control0, Control1, Control2; |
||
| 631 | XMVECTOR NegativeOneQuarter; |
||
| 632 | |||
| 633 | XMASSERT(pA); |
||
| 634 | XMASSERT(pB); |
||
| 635 | XMASSERT(pC); |
||
| 636 | |||
| 637 | LS12 = XMQuaternionLengthSq(XMVectorAdd(Q1, Q2)); |
||
| 638 | LD12 = XMQuaternionLengthSq(XMVectorSubtract(Q1, Q2)); |
||
| 639 | SQ2 = XMVectorNegate(Q2); |
||
| 640 | |||
| 641 | Control1 = XMVectorLess(LS12, LD12); |
||
| 642 | SQ2 = XMVectorSelect(Q2, SQ2, Control1); |
||
| 643 | |||
| 644 | LS01 = XMQuaternionLengthSq(XMVectorAdd(Q0, Q1)); |
||
| 645 | LD01 = XMQuaternionLengthSq(XMVectorSubtract(Q0, Q1)); |
||
| 646 | SQ0 = XMVectorNegate(Q0); |
||
| 647 | |||
| 648 | LS23 = XMQuaternionLengthSq(XMVectorAdd(SQ2, Q3)); |
||
| 649 | LD23 = XMQuaternionLengthSq(XMVectorSubtract(SQ2, Q3)); |
||
| 650 | SQ3 = XMVectorNegate(Q3); |
||
| 651 | |||
| 652 | Control0 = XMVectorLess(LS01, LD01); |
||
| 653 | Control2 = XMVectorLess(LS23, LD23); |
||
| 654 | |||
| 655 | SQ0 = XMVectorSelect(Q0, SQ0, Control0); |
||
| 656 | SQ3 = XMVectorSelect(Q3, SQ3, Control2); |
||
| 657 | |||
| 658 | InvQ1 = XMQuaternionInverse(Q1); |
||
| 659 | InvQ2 = XMQuaternionInverse(SQ2); |
||
| 660 | |||
| 661 | LnQ0 = XMQuaternionLn(XMQuaternionMultiply(InvQ1, SQ0)); |
||
| 662 | LnQ2 = XMQuaternionLn(XMQuaternionMultiply(InvQ1, SQ2)); |
||
| 663 | LnQ1 = XMQuaternionLn(XMQuaternionMultiply(InvQ2, Q1)); |
||
| 664 | LnQ3 = XMQuaternionLn(XMQuaternionMultiply(InvQ2, SQ3)); |
||
| 665 | |||
| 666 | NegativeOneQuarter = XMVectorSplatConstant(-1, 2); |
||
| 667 | |||
| 668 | ExpQ02 = XMVectorMultiply(XMVectorAdd(LnQ0, LnQ2), NegativeOneQuarter); |
||
| 669 | ExpQ13 = XMVectorMultiply(XMVectorAdd(LnQ1, LnQ3), NegativeOneQuarter); |
||
| 670 | ExpQ02 = XMQuaternionExp(ExpQ02); |
||
| 671 | ExpQ13 = XMQuaternionExp(ExpQ13); |
||
| 672 | |||
| 673 | *pA = XMQuaternionMultiply(Q1, ExpQ02); |
||
| 674 | *pB = XMQuaternionMultiply(SQ2, ExpQ13); |
||
| 675 | *pC = SQ2; |
||
| 676 | } |
||
| 677 | |||
| 678 | //------------------------------------------------------------------------------ |
||
| 679 | |||
| 680 | XMFINLINE XMVECTOR XMQuaternionBaryCentric |
||
| 681 | ( |
||
| 682 | FXMVECTOR Q0, |
||
| 683 | FXMVECTOR Q1, |
||
| 684 | FXMVECTOR Q2, |
||
| 685 | FLOAT f, |
||
| 686 | FLOAT g |
||
| 687 | ) |
||
| 688 | { |
||
| 689 | XMVECTOR Q01; |
||
| 690 | XMVECTOR Q02; |
||
| 691 | FLOAT s; |
||
| 692 | XMVECTOR Result; |
||
| 693 | |||
| 694 | s = f + g; |
||
| 695 | |||
| 696 | if (s < 0.00001f && s > -0.00001f) |
||
| 697 | { |
||
| 698 | Result = Q0; |
||
| 699 | } |
||
| 700 | else |
||
| 701 | { |
||
| 702 | Q01 = XMQuaternionSlerp(Q0, Q1, s); |
||
| 703 | Q02 = XMQuaternionSlerp(Q0, Q2, s); |
||
| 704 | |||
| 705 | Result = XMQuaternionSlerp(Q01, Q02, g / s); |
||
| 706 | } |
||
| 707 | |||
| 708 | return Result; |
||
| 709 | } |
||
| 710 | |||
| 711 | //------------------------------------------------------------------------------ |
||
| 712 | |||
| 713 | XMFINLINE XMVECTOR XMQuaternionBaryCentricV |
||
| 714 | ( |
||
| 715 | FXMVECTOR Q0, |
||
| 716 | FXMVECTOR Q1, |
||
| 717 | FXMVECTOR Q2, |
||
| 718 | CXMVECTOR F, |
||
| 719 | CXMVECTOR G |
||
| 720 | ) |
||
| 721 | { |
||
| 722 | XMVECTOR Q01; |
||
| 723 | XMVECTOR Q02; |
||
| 724 | XMVECTOR S, GS; |
||
| 725 | XMVECTOR Epsilon; |
||
| 726 | XMVECTOR Result; |
||
| 727 | |||
| 728 | XMASSERT( (XMVectorGetY(F) == XMVectorGetX(F)) && (XMVectorGetZ(F) == XMVectorGetX(F)) && (XMVectorGetW(F) == XMVectorGetX(F)) ); |
||
| 729 | XMASSERT( (XMVectorGetY(G) == XMVectorGetX(G)) && (XMVectorGetZ(G) == XMVectorGetX(G)) && (XMVectorGetW(G) == XMVectorGetX(G)) ); |
||
| 730 | |||
| 731 | Epsilon = XMVectorSplatConstant(1, 16); |
||
| 732 | |||
| 733 | S = XMVectorAdd(F, G); |
||
| 734 | |||
| 735 | if (XMVector4InBounds(S, Epsilon)) |
||
| 736 | { |
||
| 737 | Result = Q0; |
||
| 738 | } |
||
| 739 | else |
||
| 740 | { |
||
| 741 | Q01 = XMQuaternionSlerpV(Q0, Q1, S); |
||
| 742 | Q02 = XMQuaternionSlerpV(Q0, Q2, S); |
||
| 743 | GS = XMVectorReciprocal(S); |
||
| 744 | GS = XMVectorMultiply(G, GS); |
||
| 745 | |||
| 746 | Result = XMQuaternionSlerpV(Q01, Q02, GS); |
||
| 747 | } |
||
| 748 | |||
| 749 | return Result; |
||
| 750 | } |
||
| 751 | |||
| 752 | //------------------------------------------------------------------------------ |
||
| 753 | // Transformation operations |
||
| 754 | //------------------------------------------------------------------------------ |
||
| 755 | |||
| 756 | //------------------------------------------------------------------------------ |
||
| 757 | |||
| 758 | XMFINLINE XMVECTOR XMQuaternionIdentity() |
||
| 759 | { |
||
| 760 | #if defined(_XM_NO_INTRINSICS_) |
||
| 761 | return g_XMIdentityR3.v; |
||
| 762 | #elif defined(_XM_SSE_INTRINSICS_) |
||
| 763 | return g_XMIdentityR3; |
||
| 764 | #else // _XM_VMX128_INTRINSICS_ |
||
| 765 | #endif // _XM_VMX128_INTRINSICS_ |
||
| 766 | } |
||
| 767 | |||
| 768 | //------------------------------------------------------------------------------ |
||
| 769 | |||
| 770 | XMFINLINE XMVECTOR XMQuaternionRotationRollPitchYaw |
||
| 771 | ( |
||
| 772 | FLOAT Pitch, |
||
| 773 | FLOAT Yaw, |
||
| 774 | FLOAT Roll |
||
| 775 | ) |
||
| 776 | { |
||
| 777 | XMVECTOR Angles; |
||
| 778 | XMVECTOR Q; |
||
| 779 | |||
| 780 | Angles = XMVectorSet(Pitch, Yaw, Roll, 0.0f); |
||
| 781 | Q = XMQuaternionRotationRollPitchYawFromVector(Angles); |
||
| 782 | |||
| 783 | return Q; |
||
| 784 | } |
||
| 785 | |||
| 786 | //------------------------------------------------------------------------------ |
||
| 787 | |||
| 788 | XMFINLINE XMVECTOR XMQuaternionRotationRollPitchYawFromVector |
||
| 789 | ( |
||
| 790 | FXMVECTOR Angles // <Pitch, Yaw, Roll, 0> |
||
| 791 | ) |
||
| 792 | { |
||
| 793 | #if defined(_XM_NO_INTRINSICS_) |
||
| 794 | |||
| 795 | XMVECTOR Q, Q0, Q1; |
||
| 796 | XMVECTOR P0, P1, Y0, Y1, R0, R1; |
||
| 797 | XMVECTOR HalfAngles; |
||
| 798 | XMVECTOR SinAngles, CosAngles; |
||
| 799 | static CONST XMVECTORU32 ControlPitch = {XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1X, XM_PERMUTE_1X}; |
||
| 800 | static CONST XMVECTORU32 ControlYaw = {XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Y}; |
||
| 801 | static CONST XMVECTORU32 ControlRoll = {XM_PERMUTE_1Z, XM_PERMUTE_1Z, XM_PERMUTE_0Z, XM_PERMUTE_1Z}; |
||
| 802 | static CONST XMVECTOR Sign = {1.0f, -1.0f, -1.0f, 1.0f}; |
||
| 803 | |||
| 804 | HalfAngles = XMVectorMultiply(Angles, g_XMOneHalf.v); |
||
| 805 | XMVectorSinCos(&SinAngles, &CosAngles, HalfAngles); |
||
| 806 | |||
| 807 | P0 = XMVectorPermute(SinAngles, CosAngles, ControlPitch.v); |
||
| 808 | Y0 = XMVectorPermute(SinAngles, CosAngles, ControlYaw.v); |
||
| 809 | R0 = XMVectorPermute(SinAngles, CosAngles, ControlRoll.v); |
||
| 810 | P1 = XMVectorPermute(CosAngles, SinAngles, ControlPitch.v); |
||
| 811 | Y1 = XMVectorPermute(CosAngles, SinAngles, ControlYaw.v); |
||
| 812 | R1 = XMVectorPermute(CosAngles, SinAngles, ControlRoll.v); |
||
| 813 | |||
| 814 | Q1 = XMVectorMultiply(P1, Sign); |
||
| 815 | Q0 = XMVectorMultiply(P0, Y0); |
||
| 816 | Q1 = XMVectorMultiply(Q1, Y1); |
||
| 817 | Q0 = XMVectorMultiply(Q0, R0); |
||
| 818 | Q = XMVectorMultiplyAdd(Q1, R1, Q0); |
||
| 819 | |||
| 820 | return Q; |
||
| 821 | |||
| 822 | #elif defined(_XM_SSE_INTRINSICS_) |
||
| 823 | XMVECTOR Q, Q0, Q1; |
||
| 824 | XMVECTOR P0, P1, Y0, Y1, R0, R1; |
||
| 825 | XMVECTOR HalfAngles; |
||
| 826 | XMVECTOR SinAngles, CosAngles; |
||
| 827 | static CONST XMVECTORI32 ControlPitch = {XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1X, XM_PERMUTE_1X}; |
||
| 828 | static CONST XMVECTORI32 ControlYaw = {XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Y}; |
||
| 829 | static CONST XMVECTORI32 ControlRoll = {XM_PERMUTE_1Z, XM_PERMUTE_1Z, XM_PERMUTE_0Z, XM_PERMUTE_1Z}; |
||
| 830 | static CONST XMVECTORF32 Sign = {1.0f, -1.0f, -1.0f, 1.0f}; |
||
| 831 | |||
| 832 | HalfAngles = _mm_mul_ps(Angles, g_XMOneHalf); |
||
| 833 | XMVectorSinCos(&SinAngles, &CosAngles, HalfAngles); |
||
| 834 | |||
| 835 | P0 = XMVectorPermute(SinAngles, CosAngles, ControlPitch); |
||
| 836 | Y0 = XMVectorPermute(SinAngles, CosAngles, ControlYaw); |
||
| 837 | R0 = XMVectorPermute(SinAngles, CosAngles, ControlRoll); |
||
| 838 | P1 = XMVectorPermute(CosAngles, SinAngles, ControlPitch); |
||
| 839 | Y1 = XMVectorPermute(CosAngles, SinAngles, ControlYaw); |
||
| 840 | R1 = XMVectorPermute(CosAngles, SinAngles, ControlRoll); |
||
| 841 | |||
| 842 | Q1 = _mm_mul_ps(P1, Sign); |
||
| 843 | Q0 = _mm_mul_ps(P0, Y0); |
||
| 844 | Q1 = _mm_mul_ps(Q1, Y1); |
||
| 845 | Q0 = _mm_mul_ps(Q0, R0); |
||
| 846 | Q = _mm_mul_ps(Q1, R1); |
||
| 847 | Q = _mm_add_ps(Q,Q0); |
||
| 848 | return Q; |
||
| 849 | #else // _XM_VMX128_INTRINSICS_ |
||
| 850 | #endif // _XM_VMX128_INTRINSICS_ |
||
| 851 | } |
||
| 852 | |||
| 853 | //------------------------------------------------------------------------------ |
||
| 854 | |||
| 855 | XMFINLINE XMVECTOR XMQuaternionRotationNormal |
||
| 856 | ( |
||
| 857 | FXMVECTOR NormalAxis, |
||
| 858 | FLOAT Angle |
||
| 859 | ) |
||
| 860 | { |
||
| 861 | #if defined(_XM_NO_INTRINSICS_) |
||
| 862 | |||
| 863 | XMVECTOR Q; |
||
| 864 | XMVECTOR N; |
||
| 865 | XMVECTOR Scale; |
||
| 866 | |||
| 867 | N = XMVectorSelect(g_XMOne.v, NormalAxis, g_XMSelect1110.v); |
||
| 868 | |||
| 869 | XMScalarSinCos(&Scale.vector4_f32[2], &Scale.vector4_f32[3], 0.5f * Angle); |
||
| 870 | |||
| 871 | Scale.vector4_f32[0] = Scale.vector4_f32[1] = Scale.vector4_f32[2]; |
||
| 872 | |||
| 873 | Q = XMVectorMultiply(N, Scale); |
||
| 874 | |||
| 875 | return Q; |
||
| 876 | |||
| 877 | #elif defined(_XM_SSE_INTRINSICS_) |
||
| 878 | XMVECTOR N = _mm_and_ps(NormalAxis,g_XMMask3); |
||
| 879 | N = _mm_or_ps(N,g_XMIdentityR3); |
||
| 880 | XMVECTOR Scale = _mm_set_ps1(0.5f * Angle); |
||
| 881 | XMVECTOR vSine; |
||
| 882 | XMVECTOR vCosine; |
||
| 883 | XMVectorSinCos(&vSine,&vCosine,Scale); |
||
| 884 | Scale = _mm_and_ps(vSine,g_XMMask3); |
||
| 885 | vCosine = _mm_and_ps(vCosine,g_XMMaskW); |
||
| 886 | Scale = _mm_or_ps(Scale,vCosine); |
||
| 887 | N = _mm_mul_ps(N,Scale); |
||
| 888 | return N; |
||
| 889 | #else // _XM_VMX128_INTRINSICS_ |
||
| 890 | #endif // _XM_VMX128_INTRINSICS_ |
||
| 891 | } |
||
| 892 | |||
| 893 | //------------------------------------------------------------------------------ |
||
| 894 | |||
| 895 | XMFINLINE XMVECTOR XMQuaternionRotationAxis |
||
| 896 | ( |
||
| 897 | FXMVECTOR Axis, |
||
| 898 | FLOAT Angle |
||
| 899 | ) |
||
| 900 | { |
||
| 901 | #if defined(_XM_NO_INTRINSICS_) |
||
| 902 | |||
| 903 | XMVECTOR Normal; |
||
| 904 | XMVECTOR Q; |
||
| 905 | |||
| 906 | XMASSERT(!XMVector3Equal(Axis, XMVectorZero())); |
||
| 907 | XMASSERT(!XMVector3IsInfinite(Axis)); |
||
| 908 | |||
| 909 | Normal = XMVector3Normalize(Axis); |
||
| 910 | Q = XMQuaternionRotationNormal(Normal, Angle); |
||
| 911 | |||
| 912 | return Q; |
||
| 913 | |||
| 914 | #elif defined(_XM_SSE_INTRINSICS_) |
||
| 915 | XMVECTOR Normal; |
||
| 916 | XMVECTOR Q; |
||
| 917 | |||
| 918 | XMASSERT(!XMVector3Equal(Axis, XMVectorZero())); |
||
| 919 | XMASSERT(!XMVector3IsInfinite(Axis)); |
||
| 920 | |||
| 921 | Normal = XMVector3Normalize(Axis); |
||
| 922 | Q = XMQuaternionRotationNormal(Normal, Angle); |
||
| 923 | return Q; |
||
| 924 | #else // _XM_VMX128_INTRINSICS_ |
||
| 925 | #endif // _XM_VMX128_INTRINSICS_ |
||
| 926 | } |
||
| 927 | |||
| 928 | //------------------------------------------------------------------------------ |
||
| 929 | |||
| 930 | XMINLINE XMVECTOR XMQuaternionRotationMatrix |
||
| 931 | ( |
||
| 932 | CXMMATRIX M |
||
| 933 | ) |
||
| 934 | { |
||
| 935 | #if defined(_XM_NO_INTRINSICS_) |
||
| 936 | |||
| 937 | XMVECTOR Q0, Q1, Q2; |
||
| 938 | XMVECTOR M00, M11, M22; |
||
| 939 | XMVECTOR CQ0, CQ1, C; |
||
| 940 | XMVECTOR CX, CY, CZ, CW; |
||
| 941 | XMVECTOR SQ1, Scale; |
||
| 942 | XMVECTOR Rsq, Sqrt, VEqualsInfinity, VEqualsZero, Select; |
||
| 943 | XMVECTOR A, B, P; |
||
| 944 | XMVECTOR PermuteSplat, PermuteSplatT; |
||
| 945 | XMVECTOR SignB, SignBT; |
||
| 946 | XMVECTOR PermuteControl, PermuteControlT; |
||
| 947 | XMVECTOR Zero; |
||
| 948 | XMVECTOR Result; |
||
| 949 | static CONST XMVECTOR OneQuarter = {0.25f, 0.25f, 0.25f, 0.25f}; |
||
| 950 | static CONST XMVECTOR SignPNNP = {1.0f, -1.0f, -1.0f, 1.0f}; |
||
| 951 | static CONST XMVECTOR SignNPNP = {-1.0f, 1.0f, -1.0f, 1.0f}; |
||
| 952 | static CONST XMVECTOR SignNNPP = {-1.0f, -1.0f, 1.0f, 1.0f}; |
||
| 953 | static CONST XMVECTOR SignPNPP = {1.0f, -1.0f, 1.0f, 1.0f}; |
||
| 954 | static CONST XMVECTOR SignPPNP = {1.0f, 1.0f, -1.0f, 1.0f}; |
||
| 955 | static CONST XMVECTOR SignNPPP = {-1.0f, 1.0f, 1.0f, 1.0f}; |
||
| 956 | static CONST XMVECTOR SignNNNX = {-1.0f, -1.0f, -1.0f, 2.0e-126f}; |
||
| 957 | static CONST XMVECTORU32 Permute0X0X0Y0W = {XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0Y, XM_PERMUTE_0W}; |
||
| 958 | static CONST XMVECTORU32 Permute0Y0Z0Z1W = {XM_PERMUTE_0Y, XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_1W}; |
||
| 959 | static CONST XMVECTORU32 SplatX = {XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X}; |
||
| 960 | static CONST XMVECTORU32 SplatY = {XM_PERMUTE_0Y, XM_PERMUTE_0Y, XM_PERMUTE_0Y, XM_PERMUTE_0Y}; |
||
| 961 | static CONST XMVECTORU32 SplatZ = {XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_0Z}; |
||
| 962 | static CONST XMVECTORU32 SplatW = {XM_PERMUTE_0W, XM_PERMUTE_0W, XM_PERMUTE_0W, XM_PERMUTE_0W}; |
||
| 963 | static CONST XMVECTORU32 PermuteC = {XM_PERMUTE_0X, XM_PERMUTE_0Z, XM_PERMUTE_1X, XM_PERMUTE_1Y}; |
||
| 964 | static CONST XMVECTORU32 PermuteA = {XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Z, XM_PERMUTE_0W}; |
||
| 965 | static CONST XMVECTORU32 PermuteB = {XM_PERMUTE_1X, XM_PERMUTE_1W, XM_PERMUTE_0Z, XM_PERMUTE_0W}; |
||
| 966 | static CONST XMVECTORU32 Permute0 = {XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1Z, XM_PERMUTE_1Y}; |
||
| 967 | static CONST XMVECTORU32 Permute1 = {XM_PERMUTE_1X, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Z}; |
||
| 968 | static CONST XMVECTORU32 Permute2 = {XM_PERMUTE_1Z, XM_PERMUTE_1Y, XM_PERMUTE_0Z, XM_PERMUTE_1X}; |
||
| 969 | static CONST XMVECTORU32 Permute3 = {XM_PERMUTE_1Y, XM_PERMUTE_1Z, XM_PERMUTE_1X, XM_PERMUTE_0W}; |
||
| 970 | |||
| 971 | M00 = XMVectorSplatX(M.r[0]); |
||
| 972 | M11 = XMVectorSplatY(M.r[1]); |
||
| 973 | M22 = XMVectorSplatZ(M.r[2]); |
||
| 974 | |||
| 975 | Q0 = XMVectorMultiply(SignPNNP, M00); |
||
| 976 | Q0 = XMVectorMultiplyAdd(SignNPNP, M11, Q0); |
||
| 977 | Q0 = XMVectorMultiplyAdd(SignNNPP, M22, Q0); |
||
| 978 | |||
| 979 | Q1 = XMVectorAdd(Q0, g_XMOne.v); |
||
| 980 | |||
| 981 | Rsq = XMVectorReciprocalSqrt(Q1); |
||
| 982 | Zero = XMVectorZero(); |
||
| 983 | VEqualsInfinity = XMVectorEqualInt(Q1, g_XMInfinity.v); |
||
| 984 | VEqualsZero = XMVectorEqual(Q1, Zero); |
||
| 985 | Sqrt = XMVectorMultiply(Q1, Rsq); |
||
| 986 | Select = XMVectorEqualInt(VEqualsInfinity, VEqualsZero); |
||
| 987 | Q1 = XMVectorSelect(Q1, Sqrt, Select); |
||
| 988 | |||
| 989 | Q1 = XMVectorMultiply(Q1, g_XMOneHalf.v); |
||
| 990 | |||
| 991 | SQ1 = XMVectorMultiply(Rsq, g_XMOneHalf.v); |
||
| 992 | |||
| 993 | CQ0 = XMVectorPermute(Q0, Q0, Permute0X0X0Y0W.v); |
||
| 994 | CQ1 = XMVectorPermute(Q0, SignNNNX, Permute0Y0Z0Z1W.v); |
||
| 995 | C = XMVectorGreaterOrEqual(CQ0, CQ1); |
||
| 996 | |||
| 997 | CX = XMVectorSplatX(C); |
||
| 998 | CY = XMVectorSplatY(C); |
||
| 999 | CZ = XMVectorSplatZ(C); |
||
| 1000 | CW = XMVectorSplatW(C); |
||
| 1001 | |||
| 1002 | PermuteSplat = XMVectorSelect(SplatZ.v, SplatY.v, CZ); |
||
| 1003 | SignB = XMVectorSelect(SignNPPP, SignPPNP, CZ); |
||
| 1004 | PermuteControl = XMVectorSelect(Permute2.v, Permute1.v, CZ); |
||
| 1005 | |||
| 1006 | PermuteSplat = XMVectorSelect(PermuteSplat, SplatZ.v, CX); |
||
| 1007 | SignB = XMVectorSelect(SignB, SignNPPP, CX); |
||
| 1008 | PermuteControl = XMVectorSelect(PermuteControl, Permute2.v, CX); |
||
| 1009 | |||
| 1010 | PermuteSplatT = XMVectorSelect(PermuteSplat,SplatX.v, CY); |
||
| 1011 | SignBT = XMVectorSelect(SignB, SignPNPP, CY); |
||
| 1012 | PermuteControlT = XMVectorSelect(PermuteControl,Permute0.v, CY); |
||
| 1013 | |||
| 1014 | PermuteSplat = XMVectorSelect(PermuteSplat, PermuteSplatT, CX); |
||
| 1015 | SignB = XMVectorSelect(SignB, SignBT, CX); |
||
| 1016 | PermuteControl = XMVectorSelect(PermuteControl, PermuteControlT, CX); |
||
| 1017 | |||
| 1018 | PermuteSplat = XMVectorSelect(PermuteSplat,SplatW.v, CW); |
||
| 1019 | SignB = XMVectorSelect(SignB, SignNNNX, CW); |
||
| 1020 | PermuteControl = XMVectorSelect(PermuteControl,Permute3.v, CW); |
||
| 1021 | |||
| 1022 | Scale = XMVectorPermute(SQ1, SQ1, PermuteSplat); |
||
| 1023 | |||
| 1024 | P = XMVectorPermute(M.r[1], M.r[2],PermuteC.v); // {M10, M12, M20, M21} |
||
| 1025 | A = XMVectorPermute(M.r[0], P, PermuteA.v); // {M01, M12, M20, M03} |
||
| 1026 | B = XMVectorPermute(M.r[0], P, PermuteB.v); // {M10, M21, M02, M03} |
||
| 1027 | |||
| 1028 | Q2 = XMVectorMultiplyAdd(SignB, B, A); |
||
| 1029 | Q2 = XMVectorMultiply(Q2, Scale); |
||
| 1030 | |||
| 1031 | Result = XMVectorPermute(Q1, Q2, PermuteControl); |
||
| 1032 | |||
| 1033 | return Result; |
||
| 1034 | |||
| 1035 | #elif defined(_XM_SSE_INTRINSICS_) |
||
| 1036 | XMVECTOR Q0, Q1, Q2; |
||
| 1037 | XMVECTOR M00, M11, M22; |
||
| 1038 | XMVECTOR CQ0, CQ1, C; |
||
| 1039 | XMVECTOR CX, CY, CZ, CW; |
||
| 1040 | XMVECTOR SQ1, Scale; |
||
| 1041 | XMVECTOR Rsq, Sqrt, VEqualsInfinity, VEqualsZero, Select; |
||
| 1042 | XMVECTOR A, B, P; |
||
| 1043 | XMVECTOR PermuteSplat, PermuteSplatT; |
||
| 1044 | XMVECTOR SignB, SignBT; |
||
| 1045 | XMVECTOR PermuteControl, PermuteControlT; |
||
| 1046 | XMVECTOR Zero; |
||
| 1047 | XMVECTOR Result; |
||
| 1048 | static CONST XMVECTORF32 OneQuarter = {0.25f, 0.25f, 0.25f, 0.25f}; |
||
| 1049 | static CONST XMVECTORF32 SignPNNP = {1.0f, -1.0f, -1.0f, 1.0f}; |
||
| 1050 | static CONST XMVECTORF32 SignNPNP = {-1.0f, 1.0f, -1.0f, 1.0f}; |
||
| 1051 | static CONST XMVECTORF32 SignNNPP = {-1.0f, -1.0f, 1.0f, 1.0f}; |
||
| 1052 | static CONST XMVECTORF32 SignPNPP = {1.0f, -1.0f, 1.0f, 1.0f}; |
||
| 1053 | static CONST XMVECTORF32 SignPPNP = {1.0f, 1.0f, -1.0f, 1.0f}; |
||
| 1054 | static CONST XMVECTORF32 SignNPPP = {-1.0f, 1.0f, 1.0f, 1.0f}; |
||
| 1055 | static CONST XMVECTORF32 SignNNNX = {-1.0f, -1.0f, -1.0f, 2.0e-126f}; |
||
| 1056 | static CONST XMVECTORI32 Permute0X0X0Y0W = {XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0Y, XM_PERMUTE_0W}; |
||
| 1057 | static CONST XMVECTORI32 Permute0Y0Z0Z1W = {XM_PERMUTE_0Y, XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_1W}; |
||
| 1058 | static CONST XMVECTORI32 SplatX = {XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X}; |
||
| 1059 | static CONST XMVECTORI32 SplatY = {XM_PERMUTE_0Y, XM_PERMUTE_0Y, XM_PERMUTE_0Y, XM_PERMUTE_0Y}; |
||
| 1060 | static CONST XMVECTORI32 SplatZ = {XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_0Z}; |
||
| 1061 | static CONST XMVECTORI32 SplatW = {XM_PERMUTE_0W, XM_PERMUTE_0W, XM_PERMUTE_0W, XM_PERMUTE_0W}; |
||
| 1062 | static CONST XMVECTORI32 PermuteC = {XM_PERMUTE_0X, XM_PERMUTE_0Z, XM_PERMUTE_1X, XM_PERMUTE_1Y}; |
||
| 1063 | static CONST XMVECTORI32 PermuteA = {XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Z, XM_PERMUTE_0W}; |
||
| 1064 | static CONST XMVECTORI32 PermuteB = {XM_PERMUTE_1X, XM_PERMUTE_1W, XM_PERMUTE_0Z, XM_PERMUTE_0W}; |
||
| 1065 | static CONST XMVECTORI32 Permute0 = {XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1Z, XM_PERMUTE_1Y}; |
||
| 1066 | static CONST XMVECTORI32 Permute1 = {XM_PERMUTE_1X, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Z}; |
||
| 1067 | static CONST XMVECTORI32 Permute2 = {XM_PERMUTE_1Z, XM_PERMUTE_1Y, XM_PERMUTE_0Z, XM_PERMUTE_1X}; |
||
| 1068 | static CONST XMVECTORI32 Permute3 = {XM_PERMUTE_1Y, XM_PERMUTE_1Z, XM_PERMUTE_1X, XM_PERMUTE_0W}; |
||
| 1069 | |||
| 1070 | M00 = XMVectorSplatX(M.r[0]); |
||
| 1071 | M11 = XMVectorSplatY(M.r[1]); |
||
| 1072 | M22 = XMVectorSplatZ(M.r[2]); |
||
| 1073 | |||
| 1074 | Q0 = XMVectorMultiply(SignPNNP, M00); |
||
| 1075 | Q0 = XMVectorMultiplyAdd(SignNPNP, M11, Q0); |
||
| 1076 | Q0 = XMVectorMultiplyAdd(SignNNPP, M22, Q0); |
||
| 1077 | |||
| 1078 | Q1 = XMVectorAdd(Q0, g_XMOne); |
||
| 1079 | |||
| 1080 | Rsq = XMVectorReciprocalSqrt(Q1); |
||
| 1081 | Zero = XMVectorZero(); |
||
| 1082 | VEqualsInfinity = XMVectorEqualInt(Q1, g_XMInfinity); |
||
| 1083 | VEqualsZero = XMVectorEqual(Q1, Zero); |
||
| 1084 | Sqrt = XMVectorMultiply(Q1, Rsq); |
||
| 1085 | Select = XMVectorEqualInt(VEqualsInfinity, VEqualsZero); |
||
| 1086 | Q1 = XMVectorSelect(Q1, Sqrt, Select); |
||
| 1087 | |||
| 1088 | Q1 = XMVectorMultiply(Q1, g_XMOneHalf); |
||
| 1089 | |||
| 1090 | SQ1 = XMVectorMultiply(Rsq, g_XMOneHalf); |
||
| 1091 | |||
| 1092 | CQ0 = XMVectorPermute(Q0, Q0, Permute0X0X0Y0W); |
||
| 1093 | CQ1 = XMVectorPermute(Q0, SignNNNX, Permute0Y0Z0Z1W); |
||
| 1094 | C = XMVectorGreaterOrEqual(CQ0, CQ1); |
||
| 1095 | |||
| 1096 | CX = XMVectorSplatX(C); |
||
| 1097 | CY = XMVectorSplatY(C); |
||
| 1098 | CZ = XMVectorSplatZ(C); |
||
| 1099 | CW = XMVectorSplatW(C); |
||
| 1100 | |||
| 1101 | PermuteSplat = XMVectorSelect(SplatZ, SplatY, CZ); |
||
| 1102 | SignB = XMVectorSelect(SignNPPP, SignPPNP, CZ); |
||
| 1103 | PermuteControl = XMVectorSelect(Permute2, Permute1, CZ); |
||
| 1104 | |||
| 1105 | PermuteSplat = XMVectorSelect(PermuteSplat, SplatZ, CX); |
||
| 1106 | SignB = XMVectorSelect(SignB, SignNPPP, CX); |
||
| 1107 | PermuteControl = XMVectorSelect(PermuteControl, Permute2, CX); |
||
| 1108 | |||
| 1109 | PermuteSplatT = XMVectorSelect(PermuteSplat,SplatX, CY); |
||
| 1110 | SignBT = XMVectorSelect(SignB, SignPNPP, CY); |
||
| 1111 | PermuteControlT = XMVectorSelect(PermuteControl,Permute0, CY); |
||
| 1112 | |||
| 1113 | PermuteSplat = XMVectorSelect(PermuteSplat, PermuteSplatT, CX); |
||
| 1114 | SignB = XMVectorSelect(SignB, SignBT, CX); |
||
| 1115 | PermuteControl = XMVectorSelect(PermuteControl, PermuteControlT, CX); |
||
| 1116 | |||
| 1117 | PermuteSplat = XMVectorSelect(PermuteSplat,SplatW, CW); |
||
| 1118 | SignB = XMVectorSelect(SignB, SignNNNX, CW); |
||
| 1119 | PermuteControl = XMVectorSelect(PermuteControl,Permute3, CW); |
||
| 1120 | |||
| 1121 | Scale = XMVectorPermute(SQ1, SQ1, PermuteSplat); |
||
| 1122 | |||
| 1123 | P = XMVectorPermute(M.r[1], M.r[2],PermuteC); // {M10, M12, M20, M21} |
||
| 1124 | A = XMVectorPermute(M.r[0], P, PermuteA); // {M01, M12, M20, M03} |
||
| 1125 | B = XMVectorPermute(M.r[0], P, PermuteB); // {M10, M21, M02, M03} |
||
| 1126 | |||
| 1127 | Q2 = XMVectorMultiplyAdd(SignB, B, A); |
||
| 1128 | Q2 = XMVectorMultiply(Q2, Scale); |
||
| 1129 | |||
| 1130 | Result = XMVectorPermute(Q1, Q2, PermuteControl); |
||
| 1131 | |||
| 1132 | return Result; |
||
| 1133 | #else // _XM_VMX128_INTRINSICS_ |
||
| 1134 | #endif // _XM_VMX128_INTRINSICS_ |
||
| 1135 | } |
||
| 1136 | |||
| 1137 | //------------------------------------------------------------------------------ |
||
| 1138 | // Conversion operations |
||
| 1139 | //------------------------------------------------------------------------------ |
||
| 1140 | |||
| 1141 | //------------------------------------------------------------------------------ |
||
| 1142 | |||
| 1143 | XMFINLINE VOID XMQuaternionToAxisAngle |
||
| 1144 | ( |
||
| 1145 | XMVECTOR* pAxis, |
||
| 1146 | FLOAT* pAngle, |
||
| 1147 | FXMVECTOR Q |
||
| 1148 | ) |
||
| 1149 | { |
||
| 1150 | XMASSERT(pAxis); |
||
| 1151 | XMASSERT(pAngle); |
||
| 1152 | |||
| 1153 | *pAxis = Q; |
||
| 1154 | |||
| 1155 | #if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) |
||
| 1156 | *pAngle = 2.0f * acosf(XMVectorGetW(Q)); |
||
| 1157 | #else |
||
| 1158 | *pAngle = 2.0f * XMScalarACos(XMVectorGetW(Q)); |
||
| 1159 | #endif |
||
| 1160 | } |
||
| 1161 | |||
| 1162 | /**************************************************************************** |
||
| 1163 | * |
||
| 1164 | * Plane |
||
| 1165 | * |
||
| 1166 | ****************************************************************************/ |
||
| 1167 | |||
| 1168 | //------------------------------------------------------------------------------ |
||
| 1169 | // Comparison operations |
||
| 1170 | //------------------------------------------------------------------------------ |
||
| 1171 | |||
| 1172 | //------------------------------------------------------------------------------ |
||
| 1173 | |||
| 1174 | XMFINLINE BOOL XMPlaneEqual |
||
| 1175 | ( |
||
| 1176 | FXMVECTOR P1, |
||
| 1177 | FXMVECTOR P2 |
||
| 1178 | ) |
||
| 1179 | { |
||
| 1180 | return XMVector4Equal(P1, P2); |
||
| 1181 | } |
||
| 1182 | |||
| 1183 | //------------------------------------------------------------------------------ |
||
| 1184 | |||
| 1185 | XMFINLINE BOOL XMPlaneNearEqual |
||
| 1186 | ( |
||
| 1187 | FXMVECTOR P1, |
||
| 1188 | FXMVECTOR P2, |
||
| 1189 | FXMVECTOR Epsilon |
||
| 1190 | ) |
||
| 1191 | { |
||
| 1192 | XMVECTOR NP1 = XMPlaneNormalize(P1); |
||
| 1193 | XMVECTOR NP2 = XMPlaneNormalize(P2); |
||
| 1194 | return XMVector4NearEqual(NP1, NP2, Epsilon); |
||
| 1195 | } |
||
| 1196 | |||
| 1197 | //------------------------------------------------------------------------------ |
||
| 1198 | |||
| 1199 | XMFINLINE BOOL XMPlaneNotEqual |
||
| 1200 | ( |
||
| 1201 | FXMVECTOR P1, |
||
| 1202 | FXMVECTOR P2 |
||
| 1203 | ) |
||
| 1204 | { |
||
| 1205 | return XMVector4NotEqual(P1, P2); |
||
| 1206 | } |
||
| 1207 | |||
| 1208 | //------------------------------------------------------------------------------ |
||
| 1209 | |||
| 1210 | XMFINLINE BOOL XMPlaneIsNaN |
||
| 1211 | ( |
||
| 1212 | FXMVECTOR P |
||
| 1213 | ) |
||
| 1214 | { |
||
| 1215 | return XMVector4IsNaN(P); |
||
| 1216 | } |
||
| 1217 | |||
| 1218 | //------------------------------------------------------------------------------ |
||
| 1219 | |||
| 1220 | XMFINLINE BOOL XMPlaneIsInfinite |
||
| 1221 | ( |
||
| 1222 | FXMVECTOR P |
||
| 1223 | ) |
||
| 1224 | { |
||
| 1225 | return XMVector4IsInfinite(P); |
||
| 1226 | } |
||
| 1227 | |||
| 1228 | //------------------------------------------------------------------------------ |
||
| 1229 | // Computation operations |
||
| 1230 | //------------------------------------------------------------------------------ |
||
| 1231 | |||
| 1232 | //------------------------------------------------------------------------------ |
||
| 1233 | |||
| 1234 | XMFINLINE XMVECTOR XMPlaneDot |
||
| 1235 | ( |
||
| 1236 | FXMVECTOR P, |
||
| 1237 | FXMVECTOR V |
||
| 1238 | ) |
||
| 1239 | { |
||
| 1240 | #if defined(_XM_NO_INTRINSICS_) |
||
| 1241 | |||
| 1242 | return XMVector4Dot(P, V); |
||
| 1243 | |||
| 1244 | #elif defined(_XM_SSE_INTRINSICS_) |
||
| 1245 | __m128 vTemp2 = V; |
||
| 1246 | __m128 vTemp = _mm_mul_ps(P,vTemp2); |
||
| 1247 | vTemp2 = _mm_shuffle_ps(vTemp2,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position |
||
| 1248 | vTemp2 = _mm_add_ps(vTemp2,vTemp); // Add Z = X+Z; W = Y+W; |
||
| 1249 | vTemp = _mm_shuffle_ps(vTemp,vTemp2,_MM_SHUFFLE(0,3,0,0)); // Copy W to the Z position |
||
| 1250 | vTemp = _mm_add_ps(vTemp,vTemp2); // Add Z and W together |
||
| 1251 | return _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2)); // Splat Z and return |
||
| 1252 | #else // _XM_VMX128_INTRINSICS_ |
||
| 1253 | #endif // _XM_VMX128_INTRINSICS_ |
||
| 1254 | } |
||
| 1255 | |||
| 1256 | //------------------------------------------------------------------------------ |
||
| 1257 | |||
| 1258 | XMFINLINE XMVECTOR XMPlaneDotCoord |
||
| 1259 | ( |
||
| 1260 | FXMVECTOR P, |
||
| 1261 | FXMVECTOR V |
||
| 1262 | ) |
||
| 1263 | { |
||
| 1264 | #if defined(_XM_NO_INTRINSICS_) |
||
| 1265 | |||
| 1266 | XMVECTOR V3; |
||
| 1267 | XMVECTOR Result; |
||
| 1268 | |||
| 1269 | // Result = P[0] * V[0] + P[1] * V[1] + P[2] * V[2] + P[3] |
||
| 1270 | V3 = XMVectorSelect(g_XMOne.v, V, g_XMSelect1110.v); |
||
| 1271 | Result = XMVector4Dot(P, V3); |
||
| 1272 | |||
| 1273 | return Result; |
||
| 1274 | |||
| 1275 | #elif defined(_XM_SSE_INTRINSICS_) |
||
| 1276 | XMVECTOR vTemp2 = _mm_and_ps(V,g_XMMask3); |
||
| 1277 | vTemp2 = _mm_or_ps(vTemp2,g_XMIdentityR3); |
||
| 1278 | XMVECTOR vTemp = _mm_mul_ps(P,vTemp2); |
||
| 1279 | vTemp2 = _mm_shuffle_ps(vTemp2,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position |
||
| 1280 | vTemp2 = _mm_add_ps(vTemp2,vTemp); // Add Z = X+Z; W = Y+W; |
||
| 1281 | vTemp = _mm_shuffle_ps(vTemp,vTemp2,_MM_SHUFFLE(0,3,0,0)); // Copy W to the Z position |
||
| 1282 | vTemp = _mm_add_ps(vTemp,vTemp2); // Add Z and W together |
||
| 1283 | return _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2)); // Splat Z and return |
||
| 1284 | #else // _XM_VMX128_INTRINSICS_ |
||
| 1285 | #endif // _XM_VMX128_INTRINSICS_ |
||
| 1286 | } |
||
| 1287 | |||
| 1288 | //------------------------------------------------------------------------------ |
||
| 1289 | |||
| 1290 | XMFINLINE XMVECTOR XMPlaneDotNormal |
||
| 1291 | ( |
||
| 1292 | FXMVECTOR P, |
||
| 1293 | FXMVECTOR V |
||
| 1294 | ) |
||
| 1295 | { |
||
| 1296 | return XMVector3Dot(P, V); |
||
| 1297 | } |
||
| 1298 | |||
| 1299 | //------------------------------------------------------------------------------ |
||
| 1300 | // XMPlaneNormalizeEst uses a reciprocal estimate and |
||
| 1301 | // returns QNaN on zero and infinite vectors. |
||
| 1302 | |||
| 1303 | XMFINLINE XMVECTOR XMPlaneNormalizeEst |
||
| 1304 | ( |
||
| 1305 | FXMVECTOR P |
||
| 1306 | ) |
||
| 1307 | { |
||
| 1308 | #if defined(_XM_NO_INTRINSICS_) |
||
| 1309 | |||
| 1310 | XMVECTOR Result; |
||
| 1311 | Result = XMVector3ReciprocalLength(P); |
||
| 1312 | Result = XMVectorMultiply(P, Result); |
||
| 1313 | return Result; |
||
| 1314 | |||
| 1315 | #elif defined(_XM_SSE_INTRINSICS_) |
||
| 1316 | // Perform the dot product |
||
| 1317 | XMVECTOR vDot = _mm_mul_ps(P,P); |
||
| 1318 | // x=Dot.y, y=Dot.z |
||
| 1319 | XMVECTOR vTemp = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(2,1,2,1)); |
||
| 1320 | // Result.x = x+y |
||
| 1321 | vDot = _mm_add_ss(vDot,vTemp); |
||
| 1322 | // x=Dot.z |
||
| 1323 | vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1)); |
||
| 1324 | // Result.x = (x+y)+z |
||
| 1325 | vDot = _mm_add_ss(vDot,vTemp); |
||
| 1326 | // Splat x |
||
| 1327 | vDot = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(0,0,0,0)); |
||
| 1328 | // Get the reciprocal |
||
| 1329 | vDot = _mm_rsqrt_ps(vDot); |
||
| 1330 | // Get the reciprocal |
||
| 1331 | vDot = _mm_mul_ps(vDot,P); |
||
| 1332 | return vDot; |
||
| 1333 | #else // _XM_VMX128_INTRINSICS_ |
||
| 1334 | #endif // _XM_VMX128_INTRINSICS_ |
||
| 1335 | } |
||
| 1336 | |||
| 1337 | //------------------------------------------------------------------------------ |
||
| 1338 | |||
| 1339 | XMFINLINE XMVECTOR XMPlaneNormalize |
||
| 1340 | ( |
||
| 1341 | FXMVECTOR P |
||
| 1342 | ) |
||
| 1343 | { |
||
| 1344 | #if defined(_XM_NO_INTRINSICS_) |
||
| 1345 | FLOAT fLengthSq = sqrtf((P.vector4_f32[0]*P.vector4_f32[0])+(P.vector4_f32[1]*P.vector4_f32[1])+(P.vector4_f32[2]*P.vector4_f32[2])); |
||
| 1346 | // Prevent divide by zero |
||
| 1347 | if (fLengthSq) { |
||
| 1348 | fLengthSq = 1.0f/fLengthSq; |
||
| 1349 | } |
||
| 1350 | { |
||
| 1351 | XMVECTOR vResult = { |
||
| 1352 | P.vector4_f32[0]*fLengthSq, |
||
| 1353 | P.vector4_f32[1]*fLengthSq, |
||
| 1354 | P.vector4_f32[2]*fLengthSq, |
||
| 1355 | P.vector4_f32[3]*fLengthSq |
||
| 1356 | }; |
||
| 1357 | return vResult; |
||
| 1358 | } |
||
| 1359 | #elif defined(_XM_SSE_INTRINSICS_) |
||
| 1360 | // Perform the dot product on x,y and z only |
||
| 1361 | XMVECTOR vLengthSq = _mm_mul_ps(P,P); |
||
| 1362 | XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,1,2,1)); |
||
| 1363 | vLengthSq = _mm_add_ss(vLengthSq,vTemp); |
||
| 1364 | vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1)); |
||
| 1365 | vLengthSq = _mm_add_ss(vLengthSq,vTemp); |
||
| 1366 | vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0)); |
||
| 1367 | // Prepare for the division |
||
| 1368 | XMVECTOR vResult = _mm_sqrt_ps(vLengthSq); |
||
| 1369 | // Failsafe on zero (Or epsilon) length planes |
||
| 1370 | // If the length is infinity, set the elements to zero |
||
| 1371 | vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity); |
||
| 1372 | // Reciprocal mul to perform the normalization |
||
| 1373 | vResult = _mm_div_ps(P,vResult); |
||
| 1374 | // Any that are infinity, set to zero |
||
| 1375 | vResult = _mm_and_ps(vResult,vLengthSq); |
||
| 1376 | return vResult; |
||
| 1377 | #else // _XM_VMX128_INTRINSICS_ |
||
| 1378 | #endif // _XM_VMX128_INTRINSICS_ |
||
| 1379 | } |
||
| 1380 | |||
| 1381 | //------------------------------------------------------------------------------ |
||
| 1382 | |||
| 1383 | XMFINLINE XMVECTOR XMPlaneIntersectLine |
||
| 1384 | ( |
||
| 1385 | FXMVECTOR P, |
||
| 1386 | FXMVECTOR LinePoint1, |
||
| 1387 | FXMVECTOR LinePoint2 |
||
| 1388 | ) |
||
| 1389 | { |
||
| 1390 | #if defined(_XM_NO_INTRINSICS_) |
||
| 1391 | |||
| 1392 | XMVECTOR V1; |
||
| 1393 | XMVECTOR V2; |
||
| 1394 | XMVECTOR D; |
||
| 1395 | XMVECTOR ReciprocalD; |
||
| 1396 | XMVECTOR VT; |
||
| 1397 | XMVECTOR Point; |
||
| 1398 | XMVECTOR Zero; |
||
| 1399 | XMVECTOR Control; |
||
| 1400 | XMVECTOR Result; |
||
| 1401 | |||
| 1402 | V1 = XMVector3Dot(P, LinePoint1); |
||
| 1403 | V2 = XMVector3Dot(P, LinePoint2); |
||
| 1404 | D = XMVectorSubtract(V1, V2); |
||
| 1405 | |||
| 1406 | ReciprocalD = XMVectorReciprocal(D); |
||
| 1407 | VT = XMPlaneDotCoord(P, LinePoint1); |
||
| 1408 | VT = XMVectorMultiply(VT, ReciprocalD); |
||
| 1409 | |||
| 1410 | Point = XMVectorSubtract(LinePoint2, LinePoint1); |
||
| 1411 | Point = XMVectorMultiplyAdd(Point, VT, LinePoint1); |
||
| 1412 | |||
| 1413 | Zero = XMVectorZero(); |
||
| 1414 | Control = XMVectorNearEqual(D, Zero, g_XMEpsilon.v); |
||
| 1415 | |||
| 1416 | Result = XMVectorSelect(Point, g_XMQNaN.v, Control); |
||
| 1417 | |||
| 1418 | return Result; |
||
| 1419 | |||
| 1420 | #elif defined(_XM_SSE_INTRINSICS_) |
||
| 1421 | XMVECTOR V1; |
||
| 1422 | XMVECTOR V2; |
||
| 1423 | XMVECTOR D; |
||
| 1424 | XMVECTOR VT; |
||
| 1425 | XMVECTOR Point; |
||
| 1426 | XMVECTOR Zero; |
||
| 1427 | XMVECTOR Control; |
||
| 1428 | XMVECTOR Result; |
||
| 1429 | |||
| 1430 | V1 = XMVector3Dot(P, LinePoint1); |
||
| 1431 | V2 = XMVector3Dot(P, LinePoint2); |
||
| 1432 | D = _mm_sub_ps(V1, V2); |
||
| 1433 | |||
| 1434 | VT = XMPlaneDotCoord(P, LinePoint1); |
||
| 1435 | VT = _mm_div_ps(VT, D); |
||
| 1436 | |||
| 1437 | Point = _mm_sub_ps(LinePoint2, LinePoint1); |
||
| 1438 | Point = _mm_mul_ps(Point,VT); |
||
| 1439 | Point = _mm_add_ps(Point,LinePoint1); |
||
| 1440 | Zero = XMVectorZero(); |
||
| 1441 | Control = XMVectorNearEqual(D, Zero, g_XMEpsilon); |
||
| 1442 | Result = XMVectorSelect(Point, g_XMQNaN, Control); |
||
| 1443 | return Result; |
||
| 1444 | #else // _XM_VMX128_INTRINSICS_ |
||
| 1445 | #endif // _XM_VMX128_INTRINSICS_ |
||
| 1446 | } |
||
| 1447 | |||
| 1448 | //------------------------------------------------------------------------------ |
||
| 1449 | |||
| 1450 | XMINLINE VOID XMPlaneIntersectPlane |
||
| 1451 | ( |
||
| 1452 | XMVECTOR* pLinePoint1, |
||
| 1453 | XMVECTOR* pLinePoint2, |
||
| 1454 | FXMVECTOR P1, |
||
| 1455 | FXMVECTOR P2 |
||
| 1456 | ) |
||
| 1457 | { |
||
| 1458 | #if defined(_XM_NO_INTRINSICS_) |
||
| 1459 | |||
| 1460 | XMVECTOR V1; |
||
| 1461 | XMVECTOR V2; |
||
| 1462 | XMVECTOR V3; |
||
| 1463 | XMVECTOR LengthSq; |
||
| 1464 | XMVECTOR RcpLengthSq; |
||
| 1465 | XMVECTOR Point; |
||
| 1466 | XMVECTOR P1W; |
||
| 1467 | XMVECTOR P2W; |
||
| 1468 | XMVECTOR Control; |
||
| 1469 | XMVECTOR LinePoint1; |
||
| 1470 | XMVECTOR LinePoint2; |
||
| 1471 | |||
| 1472 | XMASSERT(pLinePoint1); |
||
| 1473 | XMASSERT(pLinePoint2); |
||
| 1474 | |||
| 1475 | V1 = XMVector3Cross(P2, P1); |
||
| 1476 | |||
| 1477 | LengthSq = XMVector3LengthSq(V1); |
||
| 1478 | |||
| 1479 | V2 = XMVector3Cross(P2, V1); |
||
| 1480 | |||
| 1481 | P1W = XMVectorSplatW(P1); |
||
| 1482 | Point = XMVectorMultiply(V2, P1W); |
||
| 1483 | |||
| 1484 | V3 = XMVector3Cross(V1, P1); |
||
| 1485 | |||
| 1486 | P2W = XMVectorSplatW(P2); |
||
| 1487 | Point = XMVectorMultiplyAdd(V3, P2W, Point); |
||
| 1488 | |||
| 1489 | RcpLengthSq = XMVectorReciprocal(LengthSq); |
||
| 1490 | LinePoint1 = XMVectorMultiply(Point, RcpLengthSq); |
||
| 1491 | |||
| 1492 | LinePoint2 = XMVectorAdd(LinePoint1, V1); |
||
| 1493 | |||
| 1494 | Control = XMVectorLessOrEqual(LengthSq, g_XMEpsilon.v); |
||
| 1495 | *pLinePoint1 = XMVectorSelect(LinePoint1,g_XMQNaN.v, Control); |
||
| 1496 | *pLinePoint2 = XMVectorSelect(LinePoint2,g_XMQNaN.v, Control); |
||
| 1497 | |||
| 1498 | #elif defined(_XM_SSE_INTRINSICS_) |
||
| 1499 | XMASSERT(pLinePoint1); |
||
| 1500 | XMASSERT(pLinePoint2); |
||
| 1501 | XMVECTOR V1; |
||
| 1502 | XMVECTOR V2; |
||
| 1503 | XMVECTOR V3; |
||
| 1504 | XMVECTOR LengthSq; |
||
| 1505 | XMVECTOR Point; |
||
| 1506 | XMVECTOR P1W; |
||
| 1507 | XMVECTOR P2W; |
||
| 1508 | XMVECTOR Control; |
||
| 1509 | XMVECTOR LinePoint1; |
||
| 1510 | XMVECTOR LinePoint2; |
||
| 1511 | |||
| 1512 | V1 = XMVector3Cross(P2, P1); |
||
| 1513 | |||
| 1514 | LengthSq = XMVector3LengthSq(V1); |
||
| 1515 | |||
| 1516 | V2 = XMVector3Cross(P2, V1); |
||
| 1517 | |||
| 1518 | P1W = _mm_shuffle_ps(P1,P1,_MM_SHUFFLE(3,3,3,3)); |
||
| 1519 | Point = _mm_mul_ps(V2, P1W); |
||
| 1520 | |||
| 1521 | V3 = XMVector3Cross(V1, P1); |
||
| 1522 | |||
| 1523 | P2W = _mm_shuffle_ps(P2,P2,_MM_SHUFFLE(3,3,3,3)); |
||
| 1524 | V3 = _mm_mul_ps(V3,P2W); |
||
| 1525 | Point = _mm_add_ps(Point,V3); |
||
| 1526 | LinePoint1 = _mm_div_ps(Point,LengthSq); |
||
| 1527 | |||
| 1528 | LinePoint2 = _mm_add_ps(LinePoint1, V1); |
||
| 1529 | |||
| 1530 | Control = XMVectorLessOrEqual(LengthSq, g_XMEpsilon); |
||
| 1531 | *pLinePoint1 = XMVectorSelect(LinePoint1,g_XMQNaN, Control); |
||
| 1532 | *pLinePoint2 = XMVectorSelect(LinePoint2,g_XMQNaN, Control); |
||
| 1533 | #else // _XM_VMX128_INTRINSICS_ |
||
| 1534 | #endif // _XM_VMX128_INTRINSICS_ |
||
| 1535 | } |
||
| 1536 | |||
| 1537 | //------------------------------------------------------------------------------ |
||
| 1538 | |||
| 1539 | XMFINLINE XMVECTOR XMPlaneTransform |
||
| 1540 | ( |
||
| 1541 | FXMVECTOR P, |
||
| 1542 | CXMMATRIX M |
||
| 1543 | ) |
||
| 1544 | { |
||
| 1545 | #if defined(_XM_NO_INTRINSICS_) |
||
| 1546 | |||
| 1547 | XMVECTOR X; |
||
| 1548 | XMVECTOR Y; |
||
| 1549 | XMVECTOR Z; |
||
| 1550 | XMVECTOR W; |
||
| 1551 | XMVECTOR Result; |
||
| 1552 | |||
| 1553 | W = XMVectorSplatW(P); |
||
| 1554 | Z = XMVectorSplatZ(P); |
||
| 1555 | Y = XMVectorSplatY(P); |
||
| 1556 | X = XMVectorSplatX(P); |
||
| 1557 | |||
| 1558 | Result = XMVectorMultiply(W, M.r[3]); |
||
| 1559 | Result = XMVectorMultiplyAdd(Z, M.r[2], Result); |
||
| 1560 | Result = XMVectorMultiplyAdd(Y, M.r[1], Result); |
||
| 1561 | Result = XMVectorMultiplyAdd(X, M.r[0], Result); |
||
| 1562 | |||
| 1563 | return Result; |
||
| 1564 | |||
| 1565 | #elif defined(_XM_SSE_INTRINSICS_) |
||
| 1566 | XMVECTOR X = _mm_shuffle_ps(P,P,_MM_SHUFFLE(0,0,0,0)); |
||
| 1567 | XMVECTOR Y = _mm_shuffle_ps(P,P,_MM_SHUFFLE(1,1,1,1)); |
||
| 1568 | XMVECTOR Z = _mm_shuffle_ps(P,P,_MM_SHUFFLE(2,2,2,2)); |
||
| 1569 | XMVECTOR W = _mm_shuffle_ps(P,P,_MM_SHUFFLE(3,3,3,3)); |
||
| 1570 | X = _mm_mul_ps(X, M.r[0]); |
||
| 1571 | Y = _mm_mul_ps(Y, M.r[1]); |
||
| 1572 | Z = _mm_mul_ps(Z, M.r[2]); |
||
| 1573 | W = _mm_mul_ps(W, M.r[3]); |
||
| 1574 | X = _mm_add_ps(X,Z); |
||
| 1575 | Y = _mm_add_ps(Y,W); |
||
| 1576 | X = _mm_add_ps(X,Y); |
||
| 1577 | return X; |
||
| 1578 | #else // _XM_VMX128_INTRINSICS_ |
||
| 1579 | #endif // _XM_VMX128_INTRINSICS_ |
||
| 1580 | } |
||
| 1581 | |||
| 1582 | //------------------------------------------------------------------------------ |
||
| 1583 | |||
| 1584 | XMFINLINE XMFLOAT4* XMPlaneTransformStream |
||
| 1585 | ( |
||
| 1586 | XMFLOAT4* pOutputStream, |
||
| 1587 | UINT OutputStride, |
||
| 1588 | CONST XMFLOAT4* pInputStream, |
||
| 1589 | UINT InputStride, |
||
| 1590 | UINT PlaneCount, |
||
| 1591 | CXMMATRIX M |
||
| 1592 | ) |
||
| 1593 | { |
||
| 1594 | return XMVector4TransformStream(pOutputStream, |
||
| 1595 | OutputStride, |
||
| 1596 | pInputStream, |
||
| 1597 | InputStride, |
||
| 1598 | PlaneCount, |
||
| 1599 | M); |
||
| 1600 | } |
||
| 1601 | |||
| 1602 | //------------------------------------------------------------------------------ |
||
| 1603 | // Conversion operations |
||
| 1604 | //------------------------------------------------------------------------------ |
||
| 1605 | |||
| 1606 | //------------------------------------------------------------------------------ |
||
| 1607 | |||
| 1608 | XMFINLINE XMVECTOR XMPlaneFromPointNormal |
||
| 1609 | ( |
||
| 1610 | FXMVECTOR Point, |
||
| 1611 | FXMVECTOR Normal |
||
| 1612 | ) |
||
| 1613 | { |
||
| 1614 | #if defined(_XM_NO_INTRINSICS_) |
||
| 1615 | |||
| 1616 | XMVECTOR W; |
||
| 1617 | XMVECTOR Result; |
||
| 1618 | |||
| 1619 | W = XMVector3Dot(Point, Normal); |
||
| 1620 | W = XMVectorNegate(W); |
||
| 1621 | Result = XMVectorSelect(W, Normal, g_XMSelect1110.v); |
||
| 1622 | |||
| 1623 | return Result; |
||
| 1624 | |||
| 1625 | #elif defined(_XM_SSE_INTRINSICS_) |
||
| 1626 | XMVECTOR W; |
||
| 1627 | XMVECTOR Result; |
||
| 1628 | W = XMVector3Dot(Point,Normal); |
||
| 1629 | W = _mm_mul_ps(W,g_XMNegativeOne); |
||
| 1630 | Result = _mm_and_ps(Normal,g_XMMask3); |
||
| 1631 | W = _mm_and_ps(W,g_XMMaskW); |
||
| 1632 | Result = _mm_or_ps(Result,W); |
||
| 1633 | return Result; |
||
| 1634 | #else // _XM_VMX128_INTRINSICS_ |
||
| 1635 | #endif // _XM_VMX128_INTRINSICS_ |
||
| 1636 | } |
||
| 1637 | |||
| 1638 | //------------------------------------------------------------------------------ |
||
| 1639 | |||
| 1640 | XMFINLINE XMVECTOR XMPlaneFromPoints |
||
| 1641 | ( |
||
| 1642 | FXMVECTOR Point1, |
||
| 1643 | FXMVECTOR Point2, |
||
| 1644 | FXMVECTOR Point3 |
||
| 1645 | ) |
||
| 1646 | { |
||
| 1647 | #if defined(_XM_NO_INTRINSICS_) |
||
| 1648 | |||
| 1649 | XMVECTOR N; |
||
| 1650 | XMVECTOR D; |
||
| 1651 | XMVECTOR V21; |
||
| 1652 | XMVECTOR V31; |
||
| 1653 | XMVECTOR Result; |
||
| 1654 | |||
| 1655 | V21 = XMVectorSubtract(Point1, Point2); |
||
| 1656 | V31 = XMVectorSubtract(Point1, Point3); |
||
| 1657 | |||
| 1658 | N = XMVector3Cross(V21, V31); |
||
| 1659 | N = XMVector3Normalize(N); |
||
| 1660 | |||
| 1661 | D = XMPlaneDotNormal(N, Point1); |
||
| 1662 | D = XMVectorNegate(D); |
||
| 1663 | |||
| 1664 | Result = XMVectorSelect(D, N, g_XMSelect1110.v); |
||
| 1665 | |||
| 1666 | return Result; |
||
| 1667 | |||
| 1668 | #elif defined(_XM_SSE_INTRINSICS_) |
||
| 1669 | XMVECTOR N; |
||
| 1670 | XMVECTOR D; |
||
| 1671 | XMVECTOR V21; |
||
| 1672 | XMVECTOR V31; |
||
| 1673 | XMVECTOR Result; |
||
| 1674 | |||
| 1675 | V21 = _mm_sub_ps(Point1, Point2); |
||
| 1676 | V31 = _mm_sub_ps(Point1, Point3); |
||
| 1677 | |||
| 1678 | N = XMVector3Cross(V21, V31); |
||
| 1679 | N = XMVector3Normalize(N); |
||
| 1680 | |||
| 1681 | D = XMPlaneDotNormal(N, Point1); |
||
| 1682 | D = _mm_mul_ps(D,g_XMNegativeOne); |
||
| 1683 | N = _mm_and_ps(N,g_XMMask3); |
||
| 1684 | D = _mm_and_ps(D,g_XMMaskW); |
||
| 1685 | Result = _mm_or_ps(D,N); |
||
| 1686 | return Result; |
||
| 1687 | #else // _XM_VMX128_INTRINSICS_ |
||
| 1688 | #endif // _XM_VMX128_INTRINSICS_ |
||
| 1689 | } |
||
| 1690 | |||
| 1691 | /**************************************************************************** |
||
| 1692 | * |
||
| 1693 | * Color |
||
| 1694 | * |
||
| 1695 | ****************************************************************************/ |
||
| 1696 | |||
| 1697 | //------------------------------------------------------------------------------ |
||
| 1698 | // Comparison operations |
||
| 1699 | //------------------------------------------------------------------------------ |
||
| 1700 | |||
| 1701 | //------------------------------------------------------------------------------ |
||
| 1702 | |||
| 1703 | XMFINLINE BOOL XMColorEqual |
||
| 1704 | ( |
||
| 1705 | FXMVECTOR C1, |
||
| 1706 | FXMVECTOR C2 |
||
| 1707 | ) |
||
| 1708 | { |
||
| 1709 | return XMVector4Equal(C1, C2); |
||
| 1710 | } |
||
| 1711 | |||
| 1712 | //------------------------------------------------------------------------------ |
||
| 1713 | |||
| 1714 | XMFINLINE BOOL XMColorNotEqual |
||
| 1715 | ( |
||
| 1716 | FXMVECTOR C1, |
||
| 1717 | FXMVECTOR C2 |
||
| 1718 | ) |
||
| 1719 | { |
||
| 1720 | return XMVector4NotEqual(C1, C2); |
||
| 1721 | } |
||
| 1722 | |||
| 1723 | //------------------------------------------------------------------------------ |
||
| 1724 | |||
| 1725 | XMFINLINE BOOL XMColorGreater |
||
| 1726 | ( |
||
| 1727 | FXMVECTOR C1, |
||
| 1728 | FXMVECTOR C2 |
||
| 1729 | ) |
||
| 1730 | { |
||
| 1731 | return XMVector4Greater(C1, C2); |
||
| 1732 | } |
||
| 1733 | |||
| 1734 | //------------------------------------------------------------------------------ |
||
| 1735 | |||
| 1736 | XMFINLINE BOOL XMColorGreaterOrEqual |
||
| 1737 | ( |
||
| 1738 | FXMVECTOR C1, |
||
| 1739 | FXMVECTOR C2 |
||
| 1740 | ) |
||
| 1741 | { |
||
| 1742 | return XMVector4GreaterOrEqual(C1, C2); |
||
| 1743 | } |
||
| 1744 | |||
| 1745 | //------------------------------------------------------------------------------ |
||
| 1746 | |||
| 1747 | XMFINLINE BOOL XMColorLess |
||
| 1748 | ( |
||
| 1749 | FXMVECTOR C1, |
||
| 1750 | FXMVECTOR C2 |
||
| 1751 | ) |
||
| 1752 | { |
||
| 1753 | return XMVector4Less(C1, C2); |
||
| 1754 | } |
||
| 1755 | |||
| 1756 | //------------------------------------------------------------------------------ |
||
| 1757 | |||
| 1758 | XMFINLINE BOOL XMColorLessOrEqual |
||
| 1759 | ( |
||
| 1760 | FXMVECTOR C1, |
||
| 1761 | FXMVECTOR C2 |
||
| 1762 | ) |
||
| 1763 | { |
||
| 1764 | return XMVector4LessOrEqual(C1, C2); |
||
| 1765 | } |
||
| 1766 | |||
| 1767 | //------------------------------------------------------------------------------ |
||
| 1768 | |||
| 1769 | XMFINLINE BOOL XMColorIsNaN |
||
| 1770 | ( |
||
| 1771 | FXMVECTOR C |
||
| 1772 | ) |
||
| 1773 | { |
||
| 1774 | return XMVector4IsNaN(C); |
||
| 1775 | } |
||
| 1776 | |||
| 1777 | //------------------------------------------------------------------------------ |
||
| 1778 | |||
| 1779 | XMFINLINE BOOL XMColorIsInfinite |
||
| 1780 | ( |
||
| 1781 | FXMVECTOR C |
||
| 1782 | ) |
||
| 1783 | { |
||
| 1784 | return XMVector4IsInfinite(C); |
||
| 1785 | } |
||
| 1786 | |||
| 1787 | //------------------------------------------------------------------------------ |
||
| 1788 | // Computation operations |
||
| 1789 | //------------------------------------------------------------------------------ |
||
| 1790 | |||
| 1791 | //------------------------------------------------------------------------------ |
||
| 1792 | |||
| 1793 | XMFINLINE XMVECTOR XMColorNegative |
||
| 1794 | ( |
||
| 1795 | FXMVECTOR vColor |
||
| 1796 | ) |
||
| 1797 | { |
||
| 1798 | #if defined(_XM_NO_INTRINSICS_) |
||
| 1799 | // XMASSERT(XMVector4GreaterOrEqual(C, XMVectorReplicate(0.0f))); |
||
| 1800 | // XMASSERT(XMVector4LessOrEqual(C, XMVectorReplicate(1.0f))); |
||
| 1801 | XMVECTOR vResult = { |
||
| 1802 | 1.0f - vColor.vector4_f32[0], |
||
| 1803 | 1.0f - vColor.vector4_f32[1], |
||
| 1804 | 1.0f - vColor.vector4_f32[2], |
||
| 1805 | vColor.vector4_f32[3] |
||
| 1806 | }; |
||
| 1807 | return vResult; |
||
| 1808 | |||
| 1809 | #elif defined(_XM_SSE_INTRINSICS_) |
||
| 1810 | // Negate only x,y and z. |
||
| 1811 | XMVECTOR vTemp = _mm_xor_ps(vColor,g_XMNegate3); |
||
| 1812 | // Add 1,1,1,0 to -x,-y,-z,w |
||
| 1813 | return _mm_add_ps(vTemp,g_XMOne3); |
||
| 1814 | #else // _XM_VMX128_INTRINSICS_ |
||
| 1815 | #endif // _XM_VMX128_INTRINSICS_ |
||
| 1816 | } |
||
| 1817 | |||
| 1818 | //------------------------------------------------------------------------------ |
||
| 1819 | |||
| 1820 | XMFINLINE XMVECTOR XMColorModulate |
||
| 1821 | ( |
||
| 1822 | FXMVECTOR C1, |
||
| 1823 | FXMVECTOR C2 |
||
| 1824 | ) |
||
| 1825 | { |
||
| 1826 | return XMVectorMultiply(C1, C2); |
||
| 1827 | } |
||
| 1828 | |||
| 1829 | //------------------------------------------------------------------------------ |
||
| 1830 | |||
| 1831 | XMFINLINE XMVECTOR XMColorAdjustSaturation |
||
| 1832 | ( |
||
| 1833 | FXMVECTOR vColor, |
||
| 1834 | FLOAT fSaturation |
||
| 1835 | ) |
||
| 1836 | { |
||
| 1837 | #if defined(_XM_NO_INTRINSICS_) |
||
| 1838 | CONST XMVECTOR gvLuminance = {0.2125f, 0.7154f, 0.0721f, 0.0f}; |
||
| 1839 | |||
| 1840 | // Luminance = 0.2125f * C[0] + 0.7154f * C[1] + 0.0721f * C[2]; |
||
| 1841 | // Result = (C - Luminance) * Saturation + Luminance; |
||
| 1842 | |||
| 1843 | FLOAT fLuminance = (vColor.vector4_f32[0]*gvLuminance.vector4_f32[0])+(vColor.vector4_f32[1]*gvLuminance.vector4_f32[1])+(vColor.vector4_f32[2]*gvLuminance.vector4_f32[2]); |
||
| 1844 | XMVECTOR vResult = { |
||
| 1845 | ((vColor.vector4_f32[0] - fLuminance)*fSaturation)+fLuminance, |
||
| 1846 | ((vColor.vector4_f32[1] - fLuminance)*fSaturation)+fLuminance, |
||
| 1847 | ((vColor.vector4_f32[2] - fLuminance)*fSaturation)+fLuminance, |
||
| 1848 | vColor.vector4_f32[3]}; |
||
| 1849 | return vResult; |
||
| 1850 | |||
| 1851 | #elif defined(_XM_SSE_INTRINSICS_) |
||
| 1852 | static const XMVECTORF32 gvLuminance = {0.2125f, 0.7154f, 0.0721f, 0.0f}; |
||
| 1853 | // Mul RGB by intensity constants |
||
| 1854 | XMVECTOR vLuminance = _mm_mul_ps(vColor,gvLuminance); |
||
| 1855 | // vResult.x = vLuminance.y, vResult.y = vLuminance.y, |
||
| 1856 | // vResult.z = vLuminance.z, vResult.w = vLuminance.z |
||
| 1857 | XMVECTOR vResult = vLuminance; |
||
| 1858 | vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(2,2,1,1)); |
||
| 1859 | // vLuminance.x += vLuminance.y |
||
| 1860 | vLuminance = _mm_add_ss(vLuminance,vResult); |
||
| 1861 | // Splat vLuminance.z |
||
| 1862 | vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(2,2,2,2)); |
||
| 1863 | // vLuminance.x += vLuminance.z (Dot product) |
||
| 1864 | vLuminance = _mm_add_ss(vLuminance,vResult); |
||
| 1865 | // Splat vLuminance |
||
| 1866 | vLuminance = _mm_shuffle_ps(vLuminance,vLuminance,_MM_SHUFFLE(0,0,0,0)); |
||
| 1867 | // Splat fSaturation |
||
| 1868 | XMVECTOR vSaturation = _mm_set_ps1(fSaturation); |
||
| 1869 | // vResult = ((vColor-vLuminance)*vSaturation)+vLuminance; |
||
| 1870 | vResult = _mm_sub_ps(vColor,vLuminance); |
||
| 1871 | vResult = _mm_mul_ps(vResult,vSaturation); |
||
| 1872 | vResult = _mm_add_ps(vResult,vLuminance); |
||
| 1873 | // Retain w from the source color |
||
| 1874 | vLuminance = _mm_shuffle_ps(vResult,vColor,_MM_SHUFFLE(3,2,2,2)); // x = vResult.z,y = vResult.z,z = vColor.z,w=vColor.w |
||
| 1875 | vResult = _mm_shuffle_ps(vResult,vLuminance,_MM_SHUFFLE(3,0,1,0)); // x = vResult.x,y = vResult.y,z = vResult.z,w=vColor.w |
||
| 1876 | return vResult; |
||
| 1877 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
| 1878 | #endif // _XM_VMX128_INTRINSICS_ |
||
| 1879 | } |
||
| 1880 | |||
| 1881 | //------------------------------------------------------------------------------ |
||
| 1882 | |||
| 1883 | XMFINLINE XMVECTOR XMColorAdjustContrast |
||
| 1884 | ( |
||
| 1885 | FXMVECTOR vColor, |
||
| 1886 | FLOAT fContrast |
||
| 1887 | ) |
||
| 1888 | { |
||
| 1889 | #if defined(_XM_NO_INTRINSICS_) |
||
| 1890 | // Result = (vColor - 0.5f) * fContrast + 0.5f; |
||
| 1891 | XMVECTOR vResult = { |
||
| 1892 | ((vColor.vector4_f32[0]-0.5f) * fContrast) + 0.5f, |
||
| 1893 | ((vColor.vector4_f32[1]-0.5f) * fContrast) + 0.5f, |
||
| 1894 | ((vColor.vector4_f32[2]-0.5f) * fContrast) + 0.5f, |
||
| 1895 | vColor.vector4_f32[3] // Leave W untouched |
||
| 1896 | }; |
||
| 1897 | return vResult; |
||
| 1898 | |||
| 1899 | #elif defined(_XM_SSE_INTRINSICS_) |
||
| 1900 | XMVECTOR vScale = _mm_set_ps1(fContrast); // Splat the scale |
||
| 1901 | XMVECTOR vResult = _mm_sub_ps(vColor,g_XMOneHalf); // Subtract 0.5f from the source (Saving source) |
||
| 1902 | vResult = _mm_mul_ps(vResult,vScale); // Mul by scale |
||
| 1903 | vResult = _mm_add_ps(vResult,g_XMOneHalf); // Add 0.5f |
||
| 1904 | // Retain w from the source color |
||
| 1905 | vScale = _mm_shuffle_ps(vResult,vColor,_MM_SHUFFLE(3,2,2,2)); // x = vResult.z,y = vResult.z,z = vColor.z,w=vColor.w |
||
| 1906 | vResult = _mm_shuffle_ps(vResult,vScale,_MM_SHUFFLE(3,0,1,0)); // x = vResult.x,y = vResult.y,z = vResult.z,w=vColor.w |
||
| 1907 | return vResult; |
||
| 1908 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
| 1909 | #endif // _XM_VMX128_INTRINSICS_ |
||
| 1910 | } |
||
| 1911 | |||
| 1912 | /**************************************************************************** |
||
| 1913 | * |
||
| 1914 | * Miscellaneous |
||
| 1915 | * |
||
| 1916 | ****************************************************************************/ |
||
| 1917 | |||
| 1918 | //------------------------------------------------------------------------------ |
||
| 1919 | |||
| 1920 | XMINLINE BOOL XMVerifyCPUSupport() |
||
| 1921 | { |
||
| 1922 | #if defined(_XM_NO_INTRINSICS_) || !defined(_XM_SSE_INTRINSICS_) |
||
| 1923 | return TRUE; |
||
| 1924 | #else // _XM_SSE_INTRINSICS_ |
||
| 1925 | // Note that on Windows 2000 or older, SSE2 detection is not supported so this will always fail |
||
| 1926 | // Detecting SSE2 on older versions of Windows would require using cpuid directly |
||
| 1927 | return ( IsProcessorFeaturePresent( PF_XMMI_INSTRUCTIONS_AVAILABLE ) && IsProcessorFeaturePresent( PF_XMMI64_INSTRUCTIONS_AVAILABLE ) ); |
||
| 1928 | #endif |
||
| 1929 | } |
||
| 1930 | |||
| 1931 | |||
| 1932 | //------------------------------------------------------------------------------ |
||
| 1933 | |||
| 1934 | #define XMASSERT_LINE_STRING_SIZE 16 |
||
| 1935 | |||
| 1936 | XMINLINE VOID XMAssert |
||
| 1937 | ( |
||
| 1938 | CONST CHAR* pExpression, |
||
| 1939 | CONST CHAR* pFileName, |
||
| 1940 | UINT LineNumber |
||
| 1941 | ) |
||
| 1942 | { |
||
| 1943 | CHAR aLineString[XMASSERT_LINE_STRING_SIZE]; |
||
| 1944 | CHAR* pLineString; |
||
| 1945 | UINT Line; |
||
| 1946 | |||
| 1947 | aLineString[XMASSERT_LINE_STRING_SIZE - 2] = '0'; |
||
| 1948 | aLineString[XMASSERT_LINE_STRING_SIZE - 1] = '\0'; |
||
| 1949 | for (Line = LineNumber, pLineString = aLineString + XMASSERT_LINE_STRING_SIZE - 2; |
||
| 1950 | Line != 0 && pLineString >= aLineString; |
||
| 1951 | Line /= 10, pLineString--) |
||
| 1952 | { |
||
| 1953 | *pLineString = (CHAR)('0' + (Line % 10)); |
||
| 1954 | } |
||
| 1955 | |||
| 1956 | #ifndef NO_OUTPUT_DEBUG_STRING |
||
| 1957 | OutputDebugStringA("Assertion failed: "); |
||
| 1958 | OutputDebugStringA(pExpression); |
||
| 1959 | OutputDebugStringA(", file "); |
||
| 1960 | OutputDebugStringA(pFileName); |
||
| 1961 | OutputDebugStringA(", line "); |
||
| 1962 | OutputDebugStringA(pLineString + 1); |
||
| 1963 | OutputDebugStringA("\r\n"); |
||
| 1964 | #else |
||
| 1965 | DbgPrint("Assertion failed: %s, file %s, line %d\r\n", pExpression, pFileName, LineNumber); |
||
| 1966 | #endif |
||
| 1967 | |||
| 1968 | __debugbreak(); |
||
| 1969 | } |
||
| 1970 | |||
| 1971 | //------------------------------------------------------------------------------ |
||
| 1972 | |||
| 1973 | XMFINLINE XMVECTOR XMFresnelTerm |
||
| 1974 | ( |
||
| 1975 | FXMVECTOR CosIncidentAngle, |
||
| 1976 | FXMVECTOR RefractionIndex |
||
| 1977 | ) |
||
| 1978 | { |
||
| 1979 | #if defined(_XM_NO_INTRINSICS_) |
||
| 1980 | |||
| 1981 | XMVECTOR G; |
||
| 1982 | XMVECTOR D, S; |
||
| 1983 | XMVECTOR V0, V1, V2, V3; |
||
| 1984 | XMVECTOR Result; |
||
| 1985 | |||
| 1986 | // Result = 0.5f * (g - c)^2 / (g + c)^2 * ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1) where |
||
| 1987 | // c = CosIncidentAngle |
||
| 1988 | // g = sqrt(c^2 + RefractionIndex^2 - 1) |
||
| 1989 | |||
| 1990 | XMASSERT(!XMVector4IsInfinite(CosIncidentAngle)); |
||
| 1991 | |||
| 1992 | G = XMVectorMultiplyAdd(RefractionIndex, RefractionIndex, g_XMNegativeOne.v); |
||
| 1993 | G = XMVectorMultiplyAdd(CosIncidentAngle, CosIncidentAngle, G); |
||
| 1994 | G = XMVectorAbs(G); |
||
| 1995 | G = XMVectorSqrt(G); |
||
| 1996 | |||
| 1997 | S = XMVectorAdd(G, CosIncidentAngle); |
||
| 1998 | D = XMVectorSubtract(G, CosIncidentAngle); |
||
| 1999 | |||
| 2000 | V0 = XMVectorMultiply(D, D); |
||
| 2001 | V1 = XMVectorMultiply(S, S); |
||
| 2002 | V1 = XMVectorReciprocal(V1); |
||
| 2003 | V0 = XMVectorMultiply(g_XMOneHalf.v, V0); |
||
| 2004 | V0 = XMVectorMultiply(V0, V1); |
||
| 2005 | |||
| 2006 | V2 = XMVectorMultiplyAdd(CosIncidentAngle, S, g_XMNegativeOne.v); |
||
| 2007 | V3 = XMVectorMultiplyAdd(CosIncidentAngle, D, g_XMOne.v); |
||
| 2008 | V2 = XMVectorMultiply(V2, V2); |
||
| 2009 | V3 = XMVectorMultiply(V3, V3); |
||
| 2010 | V3 = XMVectorReciprocal(V3); |
||
| 2011 | V2 = XMVectorMultiplyAdd(V2, V3, g_XMOne.v); |
||
| 2012 | |||
| 2013 | Result = XMVectorMultiply(V0, V2); |
||
| 2014 | |||
| 2015 | Result = XMVectorSaturate(Result); |
||
| 2016 | |||
| 2017 | return Result; |
||
| 2018 | |||
| 2019 | #elif defined(_XM_SSE_INTRINSICS_) |
||
| 2020 | // Result = 0.5f * (g - c)^2 / (g + c)^2 * ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1) where |
||
| 2021 | // c = CosIncidentAngle |
||
| 2022 | // g = sqrt(c^2 + RefractionIndex^2 - 1) |
||
| 2023 | |||
| 2024 | XMASSERT(!XMVector4IsInfinite(CosIncidentAngle)); |
||
| 2025 | |||
| 2026 | // G = sqrt(abs((RefractionIndex^2-1) + CosIncidentAngle^2)) |
||
| 2027 | XMVECTOR G = _mm_mul_ps(RefractionIndex,RefractionIndex); |
||
| 2028 | XMVECTOR vTemp = _mm_mul_ps(CosIncidentAngle,CosIncidentAngle); |
||
| 2029 | G = _mm_sub_ps(G,g_XMOne); |
||
| 2030 | vTemp = _mm_add_ps(vTemp,G); |
||
| 2031 | // max((0-vTemp),vTemp) == abs(vTemp) |
||
| 2032 | // The abs is needed to deal with refraction and cosine being zero |
||
| 2033 | G = _mm_setzero_ps(); |
||
| 2034 | G = _mm_sub_ps(G,vTemp); |
||
| 2035 | G = _mm_max_ps(G,vTemp); |
||
| 2036 | // Last operation, the sqrt() |
||
| 2037 | G = _mm_sqrt_ps(G); |
||
| 2038 | |||
| 2039 | // Calc G-C and G+C |
||
| 2040 | XMVECTOR GAddC = _mm_add_ps(G,CosIncidentAngle); |
||
| 2041 | XMVECTOR GSubC = _mm_sub_ps(G,CosIncidentAngle); |
||
| 2042 | // Perform the term (0.5f *(g - c)^2) / (g + c)^2 |
||
| 2043 | XMVECTOR vResult = _mm_mul_ps(GSubC,GSubC); |
||
| 2044 | vTemp = _mm_mul_ps(GAddC,GAddC); |
||
| 2045 | vResult = _mm_mul_ps(vResult,g_XMOneHalf); |
||
| 2046 | vResult = _mm_div_ps(vResult,vTemp); |
||
| 2047 | // Perform the term ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1) |
||
| 2048 | GAddC = _mm_mul_ps(GAddC,CosIncidentAngle); |
||
| 2049 | GSubC = _mm_mul_ps(GSubC,CosIncidentAngle); |
||
| 2050 | GAddC = _mm_sub_ps(GAddC,g_XMOne); |
||
| 2051 | GSubC = _mm_add_ps(GSubC,g_XMOne); |
||
| 2052 | GAddC = _mm_mul_ps(GAddC,GAddC); |
||
| 2053 | GSubC = _mm_mul_ps(GSubC,GSubC); |
||
| 2054 | GAddC = _mm_div_ps(GAddC,GSubC); |
||
| 2055 | GAddC = _mm_add_ps(GAddC,g_XMOne); |
||
| 2056 | // Multiply the two term parts |
||
| 2057 | vResult = _mm_mul_ps(vResult,GAddC); |
||
| 2058 | // Clamp to 0.0 - 1.0f |
||
| 2059 | vResult = _mm_max_ps(vResult,g_XMZero); |
||
| 2060 | vResult = _mm_min_ps(vResult,g_XMOne); |
||
| 2061 | return vResult; |
||
| 2062 | #else // _XM_VMX128_INTRINSICS_ |
||
| 2063 | #endif // _XM_VMX128_INTRINSICS_ |
||
| 2064 | } |
||
| 2065 | |||
| 2066 | //------------------------------------------------------------------------------ |
||
| 2067 | |||
| 2068 | XMFINLINE BOOL XMScalarNearEqual |
||
| 2069 | ( |
||
| 2070 | FLOAT S1, |
||
| 2071 | FLOAT S2, |
||
| 2072 | FLOAT Epsilon |
||
| 2073 | ) |
||
| 2074 | { |
||
| 2075 | FLOAT Delta = S1 - S2; |
||
| 2076 | #if defined(_XM_NO_INTRINSICS_) |
||
| 2077 | UINT AbsDelta = *(UINT*)&Delta & 0x7FFFFFFF; |
||
| 2078 | return (*(FLOAT*)&AbsDelta <= Epsilon); |
||
| 2079 | #elif defined(_XM_SSE_INTRINSICS_) |
||
| 2080 | return (fabsf(Delta) <= Epsilon); |
||
| 2081 | #else |
||
| 2082 | return (__fabs(Delta) <= Epsilon); |
||
| 2083 | #endif |
||
| 2084 | } |
||
| 2085 | |||
| 2086 | //------------------------------------------------------------------------------ |
||
| 2087 | // Modulo the range of the given angle such that -XM_PI <= Angle < XM_PI |
||
| 2088 | XMFINLINE FLOAT XMScalarModAngle |
||
| 2089 | ( |
||
| 2090 | FLOAT Angle |
||
| 2091 | ) |
||
| 2092 | { |
||
| 2093 | // Note: The modulo is performed with unsigned math only to work |
||
| 2094 | // around a precision error on numbers that are close to PI |
||
| 2095 | float fTemp; |
||
| 2096 | #if defined(_XM_NO_INTRINSICS_) || !defined(_XM_VMX128_INTRINSICS_) |
||
| 2097 | // Normalize the range from 0.0f to XM_2PI |
||
| 2098 | Angle = Angle + XM_PI; |
||
| 2099 | // Perform the modulo, unsigned |
||
| 2100 | fTemp = fabsf(Angle); |
||
| 2101 | fTemp = fTemp - (XM_2PI * (FLOAT)((INT)(fTemp/XM_2PI))); |
||
| 2102 | // Restore the number to the range of -XM_PI to XM_PI-epsilon |
||
| 2103 | fTemp = fTemp - XM_PI; |
||
| 2104 | // If the modulo'd value was negative, restore negation |
||
| 2105 | if (Angle<0.0f) { |
||
| 2106 | fTemp = -fTemp; |
||
| 2107 | } |
||
| 2108 | return fTemp; |
||
| 2109 | #else |
||
| 2110 | #endif |
||
| 2111 | } |
||
| 2112 | |||
| 2113 | //------------------------------------------------------------------------------ |
||
| 2114 | |||
| 2115 | XMINLINE FLOAT XMScalarSin |
||
| 2116 | ( |
||
| 2117 | FLOAT Value |
||
| 2118 | ) |
||
| 2119 | { |
||
| 2120 | #if defined(_XM_NO_INTRINSICS_) |
||
| 2121 | |||
| 2122 | FLOAT ValueMod; |
||
| 2123 | FLOAT ValueSq; |
||
| 2124 | XMVECTOR V0123, V0246, V1357, V9111315, V17192123; |
||
| 2125 | XMVECTOR V1, V7, V8; |
||
| 2126 | XMVECTOR R0, R1, R2; |
||
| 2127 | |||
| 2128 | ValueMod = XMScalarModAngle(Value); |
||
| 2129 | |||
| 2130 | // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! - V^15 / 15! + |
||
| 2131 | // V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI) |
||
| 2132 | |||
| 2133 | ValueSq = ValueMod * ValueMod; |
||
| 2134 | |||
| 2135 | V0123 = XMVectorSet(1.0f, ValueMod, ValueSq, ValueSq * ValueMod); |
||
| 2136 | V1 = XMVectorSplatY(V0123); |
||
| 2137 | V0246 = XMVectorMultiply(V0123, V0123); |
||
| 2138 | V1357 = XMVectorMultiply(V0246, V1); |
||
| 2139 | V7 = XMVectorSplatW(V1357); |
||
| 2140 | V8 = XMVectorMultiply(V7, V1); |
||
| 2141 | V9111315 = XMVectorMultiply(V1357, V8); |
||
| 2142 | V17192123 = XMVectorMultiply(V9111315, V8); |
||
| 2143 | |||
| 2144 | R0 = XMVector4Dot(V1357, g_XMSinCoefficients0.v); |
||
| 2145 | R1 = XMVector4Dot(V9111315, g_XMSinCoefficients1.v); |
||
| 2146 | R2 = XMVector4Dot(V17192123, g_XMSinCoefficients2.v); |
||
| 2147 | |||
| 2148 | return R0.vector4_f32[0] + R1.vector4_f32[0] + R2.vector4_f32[0]; |
||
| 2149 | |||
| 2150 | #elif defined(_XM_SSE_INTRINSICS_) |
||
| 2151 | return sinf( Value ); |
||
| 2152 | #else // _XM_VMX128_INTRINSICS_ |
||
| 2153 | #endif // _XM_VMX128_INTRINSICS_ |
||
| 2154 | } |
||
| 2155 | |||
| 2156 | //------------------------------------------------------------------------------ |
||
| 2157 | |||
| 2158 | XMINLINE FLOAT XMScalarCos |
||
| 2159 | ( |
||
| 2160 | FLOAT Value |
||
| 2161 | ) |
||
| 2162 | { |
||
| 2163 | #if defined(_XM_NO_INTRINSICS_) |
||
| 2164 | |||
| 2165 | FLOAT ValueMod; |
||
| 2166 | FLOAT ValueSq; |
||
| 2167 | XMVECTOR V0123, V0246, V8101214, V16182022; |
||
| 2168 | XMVECTOR V2, V6, V8; |
||
| 2169 | XMVECTOR R0, R1, R2; |
||
| 2170 | |||
| 2171 | ValueMod = XMScalarModAngle(Value); |
||
| 2172 | |||
| 2173 | // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! + |
||
| 2174 | // V^12 / 12! - V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI) |
||
| 2175 | |||
| 2176 | ValueSq = ValueMod * ValueMod; |
||
| 2177 | |||
| 2178 | V0123 = XMVectorSet(1.0f, ValueMod, ValueSq, ValueSq * ValueMod); |
||
| 2179 | V0246 = XMVectorMultiply(V0123, V0123); |
||
| 2180 | |||
| 2181 | V2 = XMVectorSplatZ(V0123); |
||
| 2182 | V6 = XMVectorSplatW(V0246); |
||
| 2183 | V8 = XMVectorMultiply(V6, V2); |
||
| 2184 | |||
| 2185 | V8101214 = XMVectorMultiply(V0246, V8); |
||
| 2186 | V16182022 = XMVectorMultiply(V8101214, V8); |
||
| 2187 | |||
| 2188 | R0 = XMVector4Dot(V0246, g_XMCosCoefficients0.v); |
||
| 2189 | R1 = XMVector4Dot(V8101214, g_XMCosCoefficients1.v); |
||
| 2190 | R2 = XMVector4Dot(V16182022, g_XMCosCoefficients2.v); |
||
| 2191 | |||
| 2192 | return R0.vector4_f32[0] + R1.vector4_f32[0] + R2.vector4_f32[0]; |
||
| 2193 | |||
| 2194 | #elif defined(_XM_SSE_INTRINSICS_) |
||
| 2195 | return cosf(Value); |
||
| 2196 | #else // _XM_VMX128_INTRINSICS_ |
||
| 2197 | #endif // _XM_VMX128_INTRINSICS_ |
||
| 2198 | } |
||
| 2199 | |||
| 2200 | //------------------------------------------------------------------------------ |
||
| 2201 | |||
| 2202 | XMINLINE VOID XMScalarSinCos |
||
| 2203 | ( |
||
| 2204 | FLOAT* pSin, |
||
| 2205 | FLOAT* pCos, |
||
| 2206 | FLOAT Value |
||
| 2207 | ) |
||
| 2208 | { |
||
| 2209 | #if defined(_XM_NO_INTRINSICS_) |
||
| 2210 | |||
| 2211 | FLOAT ValueMod; |
||
| 2212 | FLOAT ValueSq; |
||
| 2213 | XMVECTOR V0123, V0246, V1357, V8101214, V9111315, V16182022, V17192123; |
||
| 2214 | XMVECTOR V1, V2, V6, V8; |
||
| 2215 | XMVECTOR S0, S1, S2, C0, C1, C2; |
||
| 2216 | |||
| 2217 | XMASSERT(pSin); |
||
| 2218 | XMASSERT(pCos); |
||
| 2219 | |||
| 2220 | ValueMod = XMScalarModAngle(Value); |
||
| 2221 | |||
| 2222 | // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! - V^15 / 15! + |
||
| 2223 | // V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI) |
||
| 2224 | // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! + |
||
| 2225 | // V^12 / 12! - V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI) |
||
| 2226 | |||
| 2227 | ValueSq = ValueMod * ValueMod; |
||
| 2228 | |||
| 2229 | V0123 = XMVectorSet(1.0f, ValueMod, ValueSq, ValueSq * ValueMod); |
||
| 2230 | |||
| 2231 | V1 = XMVectorSplatY(V0123); |
||
| 2232 | V2 = XMVectorSplatZ(V0123); |
||
| 2233 | |||
| 2234 | V0246 = XMVectorMultiply(V0123, V0123); |
||
| 2235 | V1357 = XMVectorMultiply(V0246, V1); |
||
| 2236 | |||
| 2237 | V6 = XMVectorSplatW(V0246); |
||
| 2238 | V8 = XMVectorMultiply(V6, V2); |
||
| 2239 | |||
| 2240 | V8101214 = XMVectorMultiply(V0246, V8); |
||
| 2241 | V9111315 = XMVectorMultiply(V1357, V8); |
||
| 2242 | V16182022 = XMVectorMultiply(V8101214, V8); |
||
| 2243 | V17192123 = XMVectorMultiply(V9111315, V8); |
||
| 2244 | |||
| 2245 | C0 = XMVector4Dot(V0246, g_XMCosCoefficients0.v); |
||
| 2246 | S0 = XMVector4Dot(V1357, g_XMSinCoefficients0.v); |
||
| 2247 | C1 = XMVector4Dot(V8101214, g_XMCosCoefficients1.v); |
||
| 2248 | S1 = XMVector4Dot(V9111315, g_XMSinCoefficients1.v); |
||
| 2249 | C2 = XMVector4Dot(V16182022, g_XMCosCoefficients2.v); |
||
| 2250 | S2 = XMVector4Dot(V17192123, g_XMSinCoefficients2.v); |
||
| 2251 | |||
| 2252 | *pCos = C0.vector4_f32[0] + C1.vector4_f32[0] + C2.vector4_f32[0]; |
||
| 2253 | *pSin = S0.vector4_f32[0] + S1.vector4_f32[0] + S2.vector4_f32[0]; |
||
| 2254 | |||
| 2255 | #elif defined(_XM_SSE_INTRINSICS_) |
||
| 2256 | XMASSERT(pSin); |
||
| 2257 | XMASSERT(pCos); |
||
| 2258 | |||
| 2259 | *pSin = sinf(Value); |
||
| 2260 | *pCos = cosf(Value); |
||
| 2261 | #else // _XM_VMX128_INTRINSICS_ |
||
| 2262 | #endif // _XM_VMX128_INTRINSICS_ |
||
| 2263 | } |
||
| 2264 | |||
| 2265 | //------------------------------------------------------------------------------ |
||
| 2266 | |||
| 2267 | XMINLINE FLOAT XMScalarASin |
||
| 2268 | ( |
||
| 2269 | FLOAT Value |
||
| 2270 | ) |
||
| 2271 | { |
||
| 2272 | #if defined(_XM_NO_INTRINSICS_) |
||
| 2273 | |||
| 2274 | FLOAT AbsValue, Value2, Value3, D; |
||
| 2275 | XMVECTOR AbsV, R0, R1, Result; |
||
| 2276 | XMVECTOR V3; |
||
| 2277 | |||
| 2278 | *(UINT*)&AbsValue = *(UINT*)&Value & 0x7FFFFFFF; |
||
| 2279 | |||
| 2280 | Value2 = Value * AbsValue; |
||
| 2281 | Value3 = Value * Value2; |
||
| 2282 | D = (Value - Value2) / sqrtf(1.00000011921f - AbsValue); |
||
| 2283 | |||
| 2284 | AbsV = XMVectorReplicate(AbsValue); |
||
| 2285 | |||
| 2286 | V3.vector4_f32[0] = Value3; |
||
| 2287 | V3.vector4_f32[1] = 1.0f; |
||
| 2288 | V3.vector4_f32[2] = Value3; |
||
| 2289 | V3.vector4_f32[3] = 1.0f; |
||
| 2290 | |||
| 2291 | R1 = XMVectorSet(D, D, Value, Value); |
||
| 2292 | R1 = XMVectorMultiply(R1, V3); |
||
| 2293 | |||
| 2294 | R0 = XMVectorMultiplyAdd(AbsV, g_XMASinCoefficients0.v, g_XMASinCoefficients1.v); |
||
| 2295 | R0 = XMVectorMultiplyAdd(AbsV, R0, g_XMASinCoefficients2.v); |
||
| 2296 | |||
| 2297 | Result = XMVector4Dot(R0, R1); |
||
| 2298 | |||
| 2299 | return Result.vector4_f32[0]; |
||
| 2300 | |||
| 2301 | #elif defined(_XM_SSE_INTRINSICS_) |
||
| 2302 | return asinf(Value); |
||
| 2303 | #else // _XM_VMX128_INTRINSICS_ |
||
| 2304 | #endif // _XM_VMX128_INTRINSICS_ |
||
| 2305 | } |
||
| 2306 | |||
| 2307 | //------------------------------------------------------------------------------ |
||
| 2308 | |||
| 2309 | XMINLINE FLOAT XMScalarACos |
||
| 2310 | ( |
||
| 2311 | FLOAT Value |
||
| 2312 | ) |
||
| 2313 | { |
||
| 2314 | #if defined(_XM_NO_INTRINSICS_) |
||
| 2315 | |||
| 2316 | return XM_PIDIV2 - XMScalarASin(Value); |
||
| 2317 | |||
| 2318 | #elif defined(_XM_SSE_INTRINSICS_) |
||
| 2319 | return acosf(Value); |
||
| 2320 | #else // _XM_VMX128_INTRINSICS_ |
||
| 2321 | #endif // _XM_VMX128_INTRINSICS_ |
||
| 2322 | } |
||
| 2323 | |||
| 2324 | //------------------------------------------------------------------------------ |
||
| 2325 | |||
| 2326 | XMFINLINE FLOAT XMScalarSinEst |
||
| 2327 | ( |
||
| 2328 | FLOAT Value |
||
| 2329 | ) |
||
| 2330 | { |
||
| 2331 | #if defined(_XM_NO_INTRINSICS_) |
||
| 2332 | |||
| 2333 | FLOAT ValueSq; |
||
| 2334 | XMVECTOR V; |
||
| 2335 | XMVECTOR Y; |
||
| 2336 | XMVECTOR Result; |
||
| 2337 | |||
| 2338 | XMASSERT(Value >= -XM_PI); |
||
| 2339 | XMASSERT(Value < XM_PI); |
||
| 2340 | |||
| 2341 | // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI) |
||
| 2342 | |||
| 2343 | ValueSq = Value * Value; |
||
| 2344 | |||
| 2345 | V = XMVectorSet(1.0f, Value, ValueSq, ValueSq * Value); |
||
| 2346 | Y = XMVectorSplatY(V); |
||
| 2347 | V = XMVectorMultiply(V, V); |
||
| 2348 | V = XMVectorMultiply(V, Y); |
||
| 2349 | |||
| 2350 | Result = XMVector4Dot(V, g_XMSinEstCoefficients.v); |
||
| 2351 | |||
| 2352 | return Result.vector4_f32[0]; |
||
| 2353 | |||
| 2354 | #elif defined(_XM_SSE_INTRINSICS_) |
||
| 2355 | XMASSERT(Value >= -XM_PI); |
||
| 2356 | XMASSERT(Value < XM_PI); |
||
| 2357 | float ValueSq = Value*Value; |
||
| 2358 | XMVECTOR vValue = _mm_set_ps1(Value); |
||
| 2359 | XMVECTOR vTemp = _mm_set_ps(ValueSq * Value,ValueSq,Value,1.0f); |
||
| 2360 | vTemp = _mm_mul_ps(vTemp,vTemp); |
||
| 2361 | vTemp = _mm_mul_ps(vTemp,vValue); |
||
| 2362 | // vTemp = Value,Value^3,Value^5,Value^7 |
||
| 2363 | vTemp = _mm_mul_ps(vTemp,g_XMSinEstCoefficients); |
||
| 2364 | vValue = _mm_shuffle_ps(vValue,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position |
||
| 2365 | vValue = _mm_add_ps(vValue,vTemp); // Add Z = X+Z; W = Y+W; |
||
| 2366 | vTemp = _mm_shuffle_ps(vTemp,vValue,_MM_SHUFFLE(0,3,0,0)); // Copy W to the Z position |
||
| 2367 | vTemp = _mm_add_ps(vTemp,vValue); // Add Z and W together |
||
| 2368 | vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2)); // Splat Z and return |
||
| 2369 | #if defined(_MSC_VER) && (_MSC_VER>=1500) |
||
| 2370 | return _mm_cvtss_f32(vTemp); |
||
| 2371 | #else |
||
| 2372 | return vTemp.m128_f32[0]; |
||
| 2373 | #endif |
||
| 2374 | #else // _XM_VMX128_INTRINSICS_ |
||
| 2375 | #endif // _XM_VMX128_INTRINSICS_ |
||
| 2376 | } |
||
| 2377 | |||
| 2378 | //------------------------------------------------------------------------------ |
||
| 2379 | |||
| 2380 | XMFINLINE FLOAT XMScalarCosEst |
||
| 2381 | ( |
||
| 2382 | FLOAT Value |
||
| 2383 | ) |
||
| 2384 | { |
||
| 2385 | #if defined(_XM_NO_INTRINSICS_) |
||
| 2386 | FLOAT ValueSq; |
||
| 2387 | XMVECTOR V; |
||
| 2388 | XMVECTOR Result; |
||
| 2389 | XMASSERT(Value >= -XM_PI); |
||
| 2390 | XMASSERT(Value < XM_PI); |
||
| 2391 | // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! (for -PI <= V < PI) |
||
| 2392 | ValueSq = Value * Value; |
||
| 2393 | V = XMVectorSet(1.0f, Value, ValueSq, ValueSq * Value); |
||
| 2394 | V = XMVectorMultiply(V, V); |
||
| 2395 | Result = XMVector4Dot(V, g_XMCosEstCoefficients.v); |
||
| 2396 | return Result.vector4_f32[0]; |
||
| 2397 | #elif defined(_XM_SSE_INTRINSICS_) |
||
| 2398 | XMASSERT(Value >= -XM_PI); |
||
| 2399 | XMASSERT(Value < XM_PI); |
||
| 2400 | float ValueSq = Value*Value; |
||
| 2401 | XMVECTOR vValue = _mm_setzero_ps(); |
||
| 2402 | XMVECTOR vTemp = _mm_set_ps(ValueSq * Value,ValueSq,Value,1.0f); |
||
| 2403 | vTemp = _mm_mul_ps(vTemp,vTemp); |
||
| 2404 | // vTemp = 1.0f,Value^2,Value^4,Value^6 |
||
| 2405 | vTemp = _mm_mul_ps(vTemp,g_XMCosEstCoefficients); |
||
| 2406 | vValue = _mm_shuffle_ps(vValue,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position |
||
| 2407 | vValue = _mm_add_ps(vValue,vTemp); // Add Z = X+Z; W = Y+W; |
||
| 2408 | vTemp = _mm_shuffle_ps(vTemp,vValue,_MM_SHUFFLE(0,3,0,0)); // Copy W to the Z position |
||
| 2409 | vTemp = _mm_add_ps(vTemp,vValue); // Add Z and W together |
||
| 2410 | vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2)); // Splat Z and return |
||
| 2411 | #if defined(_MSC_VER) && (_MSC_VER>=1500) |
||
| 2412 | return _mm_cvtss_f32(vTemp); |
||
| 2413 | #else |
||
| 2414 | return vTemp.m128_f32[0]; |
||
| 2415 | #endif |
||
| 2416 | #else // _XM_VMX128_INTRINSICS_ |
||
| 2417 | #endif // _XM_VMX128_INTRINSICS_ |
||
| 2418 | } |
||
| 2419 | |||
| 2420 | //------------------------------------------------------------------------------ |
||
| 2421 | |||
| 2422 | XMFINLINE VOID XMScalarSinCosEst |
||
| 2423 | ( |
||
| 2424 | FLOAT* pSin, |
||
| 2425 | FLOAT* pCos, |
||
| 2426 | FLOAT Value |
||
| 2427 | ) |
||
| 2428 | { |
||
| 2429 | #if defined(_XM_NO_INTRINSICS_) |
||
| 2430 | |||
| 2431 | FLOAT ValueSq; |
||
| 2432 | XMVECTOR V, Sin, Cos; |
||
| 2433 | XMVECTOR Y; |
||
| 2434 | |||
| 2435 | XMASSERT(pSin); |
||
| 2436 | XMASSERT(pCos); |
||
| 2437 | XMASSERT(Value >= -XM_PI); |
||
| 2438 | XMASSERT(Value < XM_PI); |
||
| 2439 | |||
| 2440 | // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI) |
||
| 2441 | // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! (for -PI <= V < PI) |
||
| 2442 | |||
| 2443 | ValueSq = Value * Value; |
||
| 2444 | V = XMVectorSet(1.0f, Value, ValueSq, Value * ValueSq); |
||
| 2445 | Y = XMVectorSplatY(V); |
||
| 2446 | Cos = XMVectorMultiply(V, V); |
||
| 2447 | Sin = XMVectorMultiply(Cos, Y); |
||
| 2448 | |||
| 2449 | Cos = XMVector4Dot(Cos, g_XMCosEstCoefficients.v); |
||
| 2450 | Sin = XMVector4Dot(Sin, g_XMSinEstCoefficients.v); |
||
| 2451 | |||
| 2452 | *pCos = Cos.vector4_f32[0]; |
||
| 2453 | *pSin = Sin.vector4_f32[0]; |
||
| 2454 | |||
| 2455 | #elif defined(_XM_SSE_INTRINSICS_) |
||
| 2456 | XMASSERT(pSin); |
||
| 2457 | XMASSERT(pCos); |
||
| 2458 | XMASSERT(Value >= -XM_PI); |
||
| 2459 | XMASSERT(Value < XM_PI); |
||
| 2460 | float ValueSq = Value * Value; |
||
| 2461 | XMVECTOR Cos = _mm_set_ps(Value * ValueSq,ValueSq,Value,1.0f); |
||
| 2462 | XMVECTOR Sin = _mm_set_ps1(Value); |
||
| 2463 | Cos = _mm_mul_ps(Cos,Cos); |
||
| 2464 | Sin = _mm_mul_ps(Sin,Cos); |
||
| 2465 | // Cos = 1.0f,Value^2,Value^4,Value^6 |
||
| 2466 | Cos = XMVector4Dot(Cos,g_XMCosEstCoefficients); |
||
| 2467 | _mm_store_ss(pCos,Cos); |
||
| 2468 | // Sin = Value,Value^3,Value^5,Value^7 |
||
| 2469 | Sin = XMVector4Dot(Sin, g_XMSinEstCoefficients); |
||
| 2470 | _mm_store_ss(pSin,Sin); |
||
| 2471 | #else // _XM_VMX128_INTRINSICS_ |
||
| 2472 | #endif // _XM_VMX128_INTRINSICS_ |
||
| 2473 | } |
||
| 2474 | |||
| 2475 | //------------------------------------------------------------------------------ |
||
| 2476 | |||
| 2477 | XMFINLINE FLOAT XMScalarASinEst |
||
| 2478 | ( |
||
| 2479 | FLOAT Value |
||
| 2480 | ) |
||
| 2481 | { |
||
| 2482 | #if defined(_XM_NO_INTRINSICS_) |
||
| 2483 | |||
| 2484 | XMVECTOR VR, CR, CS; |
||
| 2485 | XMVECTOR Result; |
||
| 2486 | FLOAT AbsV, V2, D; |
||
| 2487 | CONST FLOAT OnePlusEps = 1.00000011921f; |
||
| 2488 | |||
| 2489 | *(UINT*)&AbsV = *(UINT*)&Value & 0x7FFFFFFF; |
||
| 2490 | V2 = Value * AbsV; |
||
| 2491 | D = OnePlusEps - AbsV; |
||
| 2492 | |||
| 2493 | CS = XMVectorSet(Value, 1.0f, 1.0f, V2); |
||
| 2494 | VR = XMVectorSet(sqrtf(D), Value, V2, D * AbsV); |
||
| 2495 | CR = XMVectorMultiply(CS, g_XMASinEstCoefficients.v); |
||
| 2496 | |||
| 2497 | Result = XMVector4Dot(VR, CR); |
||
| 2498 | |||
| 2499 | return Result.vector4_f32[0]; |
||
| 2500 | |||
| 2501 | #elif defined(_XM_SSE_INTRINSICS_) |
||
| 2502 | CONST FLOAT OnePlusEps = 1.00000011921f; |
||
| 2503 | FLOAT AbsV = fabsf(Value); |
||
| 2504 | FLOAT V2 = Value * AbsV; // Square with sign retained |
||
| 2505 | FLOAT D = OnePlusEps - AbsV; |
||
| 2506 | |||
| 2507 | XMVECTOR Result = _mm_set_ps(V2,1.0f,1.0f,Value); |
||
| 2508 | XMVECTOR VR = _mm_set_ps(D * AbsV,V2,Value,sqrtf(D)); |
||
| 2509 | Result = _mm_mul_ps(Result, g_XMASinEstCoefficients); |
||
| 2510 | Result = XMVector4Dot(VR,Result); |
||
| 2511 | #if defined(_MSC_VER) && (_MSC_VER>=1500) |
||
| 2512 | return _mm_cvtss_f32(Result); |
||
| 2513 | #else |
||
| 2514 | return Result.m128_f32[0]; |
||
| 2515 | #endif |
||
| 2516 | #else // _XM_VMX128_INTRINSICS_ |
||
| 2517 | #endif // _XM_VMX128_INTRINSICS_ |
||
| 2518 | } |
||
| 2519 | |||
| 2520 | //------------------------------------------------------------------------------ |
||
| 2521 | |||
| 2522 | XMFINLINE FLOAT XMScalarACosEst |
||
| 2523 | ( |
||
| 2524 | FLOAT Value |
||
| 2525 | ) |
||
| 2526 | { |
||
| 2527 | #if defined(_XM_NO_INTRINSICS_) |
||
| 2528 | |||
| 2529 | XMVECTOR VR, CR, CS; |
||
| 2530 | XMVECTOR Result; |
||
| 2531 | FLOAT AbsV, V2, D; |
||
| 2532 | CONST FLOAT OnePlusEps = 1.00000011921f; |
||
| 2533 | |||
| 2534 | // return XM_PIDIV2 - XMScalarASin(Value); |
||
| 2535 | |||
| 2536 | *(UINT*)&AbsV = *(UINT*)&Value & 0x7FFFFFFF; |
||
| 2537 | V2 = Value * AbsV; |
||
| 2538 | D = OnePlusEps - AbsV; |
||
| 2539 | |||
| 2540 | CS = XMVectorSet(Value, 1.0f, 1.0f, V2); |
||
| 2541 | VR = XMVectorSet(sqrtf(D), Value, V2, D * AbsV); |
||
| 2542 | CR = XMVectorMultiply(CS, g_XMASinEstCoefficients.v); |
||
| 2543 | |||
| 2544 | Result = XMVector4Dot(VR, CR); |
||
| 2545 | |||
| 2546 | return XM_PIDIV2 - Result.vector4_f32[0]; |
||
| 2547 | |||
| 2548 | #elif defined(_XM_SSE_INTRINSICS_) |
||
| 2549 | CONST FLOAT OnePlusEps = 1.00000011921f; |
||
| 2550 | FLOAT AbsV = fabsf(Value); |
||
| 2551 | FLOAT V2 = Value * AbsV; // Value^2 retaining sign |
||
| 2552 | FLOAT D = OnePlusEps - AbsV; |
||
| 2553 | XMVECTOR Result = _mm_set_ps(V2,1.0f,1.0f,Value); |
||
| 2554 | XMVECTOR VR = _mm_set_ps(D * AbsV,V2,Value,sqrtf(D)); |
||
| 2555 | Result = _mm_mul_ps(Result,g_XMASinEstCoefficients); |
||
| 2556 | Result = XMVector4Dot(VR,Result); |
||
| 2557 | #if defined(_MSC_VER) && (_MSC_VER>=1500) |
||
| 2558 | return XM_PIDIV2 - _mm_cvtss_f32(Result); |
||
| 2559 | #else |
||
| 2560 | return XM_PIDIV2 - Result.m128_f32[0]; |
||
| 2561 | #endif |
||
| 2562 | #else // _XM_VMX128_INTRINSICS_ |
||
| 2563 | #endif // _XM_VMX128_INTRINSICS_ |
||
| 2564 | } |
||
| 2565 | |||
| 2566 | #endif // __XNAMATHMISC_INL__ |
||
| 2567 |