Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
1 | pmbaty | 1 | /*++ |
2 | |||
3 | Copyright (c) Microsoft Corporation. All rights reserved. |
||
4 | |||
5 | Module Name: |
||
6 | |||
7 | xnamathvector.inl |
||
8 | |||
9 | Abstract: |
||
10 | |||
11 | XNA math library for Windows and Xbox 360: Vector functions |
||
12 | --*/ |
||
13 | |||
14 | #if defined(_MSC_VER) && (_MSC_VER > 1000) |
||
15 | #pragma once |
||
16 | #endif |
||
17 | |||
18 | #ifndef __XNAMATHVECTOR_INL__ |
||
19 | #define __XNAMATHVECTOR_INL__ |
||
20 | |||
21 | #if defined(_XM_NO_INTRINSICS_) |
||
22 | #define XMISNAN(x) ((*(UINT*)&(x) & 0x7F800000) == 0x7F800000 && (*(UINT*)&(x) & 0x7FFFFF) != 0) |
||
23 | #define XMISINF(x) ((*(UINT*)&(x) & 0x7FFFFFFF) == 0x7F800000) |
||
24 | #endif |
||
25 | |||
26 | /**************************************************************************** |
||
27 | * |
||
28 | * General Vector |
||
29 | * |
||
30 | ****************************************************************************/ |
||
31 | |||
32 | //------------------------------------------------------------------------------ |
||
33 | // Assignment operations |
||
34 | //------------------------------------------------------------------------------ |
||
35 | |||
36 | //------------------------------------------------------------------------------ |
||
37 | // Return a vector with all elements equaling zero |
||
38 | XMFINLINE XMVECTOR XMVectorZero() |
||
39 | { |
||
40 | #if defined(_XM_NO_INTRINSICS_) |
||
41 | XMVECTOR vResult = {0.0f,0.0f,0.0f,0.0f}; |
||
42 | return vResult; |
||
43 | #elif defined(_XM_SSE_INTRINSICS_) |
||
44 | return _mm_setzero_ps(); |
||
45 | #else // _XM_VMX128_INTRINSICS_ |
||
46 | #endif // _XM_VMX128_INTRINSICS_ |
||
47 | } |
||
48 | |||
49 | //------------------------------------------------------------------------------ |
||
50 | // Initialize a vector with four floating point values |
||
51 | XMFINLINE XMVECTOR XMVectorSet |
||
52 | ( |
||
53 | FLOAT x, |
||
54 | FLOAT y, |
||
55 | FLOAT z, |
||
56 | FLOAT w |
||
57 | ) |
||
58 | { |
||
59 | #if defined(_XM_NO_INTRINSICS_) |
||
60 | XMVECTORF32 vResult = {x,y,z,w}; |
||
61 | return vResult.v; |
||
62 | #elif defined(_XM_SSE_INTRINSICS_) |
||
63 | return _mm_set_ps( w, z, y, x ); |
||
64 | #else // _XM_VMX128_INTRINSICS_ |
||
65 | #endif // _XM_VMX128_INTRINSICS_ |
||
66 | } |
||
67 | |||
68 | //------------------------------------------------------------------------------ |
||
69 | // Initialize a vector with four integer values |
||
70 | XMFINLINE XMVECTOR XMVectorSetInt |
||
71 | ( |
||
72 | UINT x, |
||
73 | UINT y, |
||
74 | UINT z, |
||
75 | UINT w |
||
76 | ) |
||
77 | { |
||
78 | #if defined(_XM_NO_INTRINSICS_) |
||
79 | XMVECTORU32 vResult = {x,y,z,w}; |
||
80 | return vResult.v; |
||
81 | #elif defined(_XM_SSE_INTRINSICS_) |
||
82 | __m128i V = _mm_set_epi32( w, z, y, x ); |
||
83 | return reinterpret_cast<__m128 *>(&V)[0]; |
||
84 | #else // _XM_VMX128_INTRINSICS_ |
||
85 | #endif // _XM_VMX128_INTRINSICS_ |
||
86 | } |
||
87 | |||
88 | //------------------------------------------------------------------------------ |
||
89 | // Initialize a vector with a replicated floating point value |
||
90 | XMFINLINE XMVECTOR XMVectorReplicate |
||
91 | ( |
||
92 | FLOAT Value |
||
93 | ) |
||
94 | { |
||
95 | #if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
96 | XMVECTORF32 vResult = {Value,Value,Value,Value}; |
||
97 | return vResult.v; |
||
98 | #elif defined(_XM_SSE_INTRINSICS_) |
||
99 | return _mm_set_ps1( Value ); |
||
100 | #else // _XM_VMX128_INTRINSICS_ |
||
101 | #endif // _XM_VMX128_INTRINSICS_ |
||
102 | } |
||
103 | |||
104 | //------------------------------------------------------------------------------ |
||
105 | // Initialize a vector with a replicated floating point value passed by pointer |
||
106 | XMFINLINE XMVECTOR XMVectorReplicatePtr |
||
107 | ( |
||
108 | CONST FLOAT *pValue |
||
109 | ) |
||
110 | { |
||
111 | #if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
112 | FLOAT Value = pValue[0]; |
||
113 | XMVECTORF32 vResult = {Value,Value,Value,Value}; |
||
114 | return vResult.v; |
||
115 | #elif defined(_XM_SSE_INTRINSICS_) |
||
116 | return _mm_load_ps1( pValue ); |
||
117 | #else // _XM_VMX128_INTRINSICS_ |
||
118 | #endif // _XM_VMX128_INTRINSICS_ |
||
119 | } |
||
120 | |||
121 | //------------------------------------------------------------------------------ |
||
122 | // Initialize a vector with a replicated integer value |
||
123 | XMFINLINE XMVECTOR XMVectorReplicateInt |
||
124 | ( |
||
125 | UINT Value |
||
126 | ) |
||
127 | { |
||
128 | #if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
129 | XMVECTORU32 vResult = {Value,Value,Value,Value}; |
||
130 | return vResult.v; |
||
131 | #elif defined(_XM_SSE_INTRINSICS_) |
||
132 | __m128i vTemp = _mm_set1_epi32( Value ); |
||
133 | return reinterpret_cast<const __m128 *>(&vTemp)[0]; |
||
134 | #else // _XM_VMX128_INTRINSICS_ |
||
135 | #endif // _XM_VMX128_INTRINSICS_ |
||
136 | } |
||
137 | |||
138 | //------------------------------------------------------------------------------ |
||
139 | // Initialize a vector with a replicated integer value passed by pointer |
||
140 | XMFINLINE XMVECTOR XMVectorReplicateIntPtr |
||
141 | ( |
||
142 | CONST UINT *pValue |
||
143 | ) |
||
144 | { |
||
145 | #if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
146 | UINT Value = pValue[0]; |
||
147 | XMVECTORU32 vResult = {Value,Value,Value,Value}; |
||
148 | return vResult.v; |
||
149 | #elif defined(_XM_SSE_INTRINSICS_) |
||
150 | return _mm_load_ps1(reinterpret_cast<const float *>(pValue)); |
||
151 | #else // _XM_VMX128_INTRINSICS_ |
||
152 | #endif // _XM_VMX128_INTRINSICS_ |
||
153 | } |
||
154 | |||
155 | //------------------------------------------------------------------------------ |
||
156 | // Initialize a vector with all bits set (true mask) |
||
157 | XMFINLINE XMVECTOR XMVectorTrueInt() |
||
158 | { |
||
159 | #if defined(_XM_NO_INTRINSICS_) |
||
160 | XMVECTORU32 vResult = {0xFFFFFFFFU,0xFFFFFFFFU,0xFFFFFFFFU,0xFFFFFFFFU}; |
||
161 | return vResult.v; |
||
162 | #elif defined(_XM_SSE_INTRINSICS_) |
||
163 | __m128i V = _mm_set1_epi32(-1); |
||
164 | return reinterpret_cast<__m128 *>(&V)[0]; |
||
165 | #else // _XM_VMX128_INTRINSICS_ |
||
166 | #endif // _XM_VMX128_INTRINSICS_ |
||
167 | } |
||
168 | |||
169 | //------------------------------------------------------------------------------ |
||
170 | // Initialize a vector with all bits clear (false mask) |
||
171 | XMFINLINE XMVECTOR XMVectorFalseInt() |
||
172 | { |
||
173 | #if defined(_XM_NO_INTRINSICS_) |
||
174 | XMVECTOR vResult = {0.0f,0.0f,0.0f,0.0f}; |
||
175 | return vResult; |
||
176 | #elif defined(_XM_SSE_INTRINSICS_) |
||
177 | return _mm_setzero_ps(); |
||
178 | #else // _XM_VMX128_INTRINSICS_ |
||
179 | #endif // _XM_VMX128_INTRINSICS_ |
||
180 | } |
||
181 | |||
182 | //------------------------------------------------------------------------------ |
||
183 | // Replicate the x component of the vector |
||
184 | XMFINLINE XMVECTOR XMVectorSplatX |
||
185 | ( |
||
186 | FXMVECTOR V |
||
187 | ) |
||
188 | { |
||
189 | #if defined(_XM_NO_INTRINSICS_) |
||
190 | XMVECTOR vResult; |
||
191 | vResult.vector4_f32[0] = |
||
192 | vResult.vector4_f32[1] = |
||
193 | vResult.vector4_f32[2] = |
||
194 | vResult.vector4_f32[3] = V.vector4_f32[0]; |
||
195 | return vResult; |
||
196 | #elif defined(_XM_SSE_INTRINSICS_) |
||
197 | return _mm_shuffle_ps( V, V, _MM_SHUFFLE(0, 0, 0, 0) ); |
||
198 | #else // _XM_VMX128_INTRINSICS_ |
||
199 | #endif // _XM_VMX128_INTRINSICS_ |
||
200 | } |
||
201 | |||
202 | //------------------------------------------------------------------------------ |
||
203 | // Replicate the y component of the vector |
||
204 | XMFINLINE XMVECTOR XMVectorSplatY |
||
205 | ( |
||
206 | FXMVECTOR V |
||
207 | ) |
||
208 | { |
||
209 | #if defined(_XM_NO_INTRINSICS_) |
||
210 | XMVECTOR vResult; |
||
211 | vResult.vector4_f32[0] = |
||
212 | vResult.vector4_f32[1] = |
||
213 | vResult.vector4_f32[2] = |
||
214 | vResult.vector4_f32[3] = V.vector4_f32[1]; |
||
215 | return vResult; |
||
216 | #elif defined(_XM_SSE_INTRINSICS_) |
||
217 | return _mm_shuffle_ps( V, V, _MM_SHUFFLE(1, 1, 1, 1) ); |
||
218 | #else // _XM_VMX128_INTRINSICS_ |
||
219 | #endif // _XM_VMX128_INTRINSICS_ |
||
220 | } |
||
221 | |||
222 | //------------------------------------------------------------------------------ |
||
223 | // Replicate the z component of the vector |
||
224 | XMFINLINE XMVECTOR XMVectorSplatZ |
||
225 | ( |
||
226 | FXMVECTOR V |
||
227 | ) |
||
228 | { |
||
229 | #if defined(_XM_NO_INTRINSICS_) |
||
230 | XMVECTOR vResult; |
||
231 | vResult.vector4_f32[0] = |
||
232 | vResult.vector4_f32[1] = |
||
233 | vResult.vector4_f32[2] = |
||
234 | vResult.vector4_f32[3] = V.vector4_f32[2]; |
||
235 | return vResult; |
||
236 | #elif defined(_XM_SSE_INTRINSICS_) |
||
237 | return _mm_shuffle_ps( V, V, _MM_SHUFFLE(2, 2, 2, 2) ); |
||
238 | #else // _XM_VMX128_INTRINSICS_ |
||
239 | #endif // _XM_VMX128_INTRINSICS_ |
||
240 | } |
||
241 | |||
242 | //------------------------------------------------------------------------------ |
||
243 | // Replicate the w component of the vector |
||
244 | XMFINLINE XMVECTOR XMVectorSplatW |
||
245 | ( |
||
246 | FXMVECTOR V |
||
247 | ) |
||
248 | { |
||
249 | #if defined(_XM_NO_INTRINSICS_) |
||
250 | XMVECTOR vResult; |
||
251 | vResult.vector4_f32[0] = |
||
252 | vResult.vector4_f32[1] = |
||
253 | vResult.vector4_f32[2] = |
||
254 | vResult.vector4_f32[3] = V.vector4_f32[3]; |
||
255 | return vResult; |
||
256 | #elif defined(_XM_SSE_INTRINSICS_) |
||
257 | return _mm_shuffle_ps( V, V, _MM_SHUFFLE(3, 3, 3, 3) ); |
||
258 | #else // _XM_VMX128_INTRINSICS_ |
||
259 | #endif // _XM_VMX128_INTRINSICS_ |
||
260 | } |
||
261 | |||
262 | //------------------------------------------------------------------------------ |
||
263 | // Return a vector of 1.0f,1.0f,1.0f,1.0f |
||
264 | XMFINLINE XMVECTOR XMVectorSplatOne() |
||
265 | { |
||
266 | #if defined(_XM_NO_INTRINSICS_) |
||
267 | XMVECTOR vResult; |
||
268 | vResult.vector4_f32[0] = |
||
269 | vResult.vector4_f32[1] = |
||
270 | vResult.vector4_f32[2] = |
||
271 | vResult.vector4_f32[3] = 1.0f; |
||
272 | return vResult; |
||
273 | #elif defined(_XM_SSE_INTRINSICS_) |
||
274 | return g_XMOne; |
||
275 | #else // _XM_VMX128_INTRINSICS_ |
||
276 | #endif // _XM_VMX128_INTRINSICS_ |
||
277 | } |
||
278 | |||
279 | //------------------------------------------------------------------------------ |
||
280 | // Return a vector of INF,INF,INF,INF |
||
281 | XMFINLINE XMVECTOR XMVectorSplatInfinity() |
||
282 | { |
||
283 | #if defined(_XM_NO_INTRINSICS_) |
||
284 | XMVECTOR vResult; |
||
285 | vResult.vector4_u32[0] = |
||
286 | vResult.vector4_u32[1] = |
||
287 | vResult.vector4_u32[2] = |
||
288 | vResult.vector4_u32[3] = 0x7F800000; |
||
289 | return vResult; |
||
290 | #elif defined(_XM_SSE_INTRINSICS_) |
||
291 | return g_XMInfinity; |
||
292 | #else // _XM_VMX128_INTRINSICS_ |
||
293 | #endif // _XM_VMX128_INTRINSICS_ |
||
294 | } |
||
295 | |||
296 | //------------------------------------------------------------------------------ |
||
297 | // Return a vector of Q_NAN,Q_NAN,Q_NAN,Q_NAN |
||
298 | XMFINLINE XMVECTOR XMVectorSplatQNaN() |
||
299 | { |
||
300 | #if defined(_XM_NO_INTRINSICS_) |
||
301 | XMVECTOR vResult; |
||
302 | vResult.vector4_u32[0] = |
||
303 | vResult.vector4_u32[1] = |
||
304 | vResult.vector4_u32[2] = |
||
305 | vResult.vector4_u32[3] = 0x7FC00000; |
||
306 | return vResult; |
||
307 | #elif defined(_XM_SSE_INTRINSICS_) |
||
308 | return g_XMQNaN; |
||
309 | #else // _XM_VMX128_INTRINSICS_ |
||
310 | #endif // _XM_VMX128_INTRINSICS_ |
||
311 | } |
||
312 | |||
313 | //------------------------------------------------------------------------------ |
||
314 | // Return a vector of 1.192092896e-7f,1.192092896e-7f,1.192092896e-7f,1.192092896e-7f |
||
315 | XMFINLINE XMVECTOR XMVectorSplatEpsilon() |
||
316 | { |
||
317 | #if defined(_XM_NO_INTRINSICS_) |
||
318 | XMVECTOR vResult; |
||
319 | vResult.vector4_u32[0] = |
||
320 | vResult.vector4_u32[1] = |
||
321 | vResult.vector4_u32[2] = |
||
322 | vResult.vector4_u32[3] = 0x34000000; |
||
323 | return vResult; |
||
324 | #elif defined(_XM_SSE_INTRINSICS_) |
||
325 | return g_XMEpsilon; |
||
326 | #else // _XM_VMX128_INTRINSICS_ |
||
327 | #endif // _XM_VMX128_INTRINSICS_ |
||
328 | } |
||
329 | |||
330 | //------------------------------------------------------------------------------ |
||
331 | // Return a vector of -0.0f (0x80000000),-0.0f,-0.0f,-0.0f |
||
332 | XMFINLINE XMVECTOR XMVectorSplatSignMask() |
||
333 | { |
||
334 | #if defined(_XM_NO_INTRINSICS_) |
||
335 | XMVECTOR vResult; |
||
336 | vResult.vector4_u32[0] = |
||
337 | vResult.vector4_u32[1] = |
||
338 | vResult.vector4_u32[2] = |
||
339 | vResult.vector4_u32[3] = 0x80000000U; |
||
340 | return vResult; |
||
341 | #elif defined(_XM_SSE_INTRINSICS_) |
||
342 | __m128i V = _mm_set1_epi32( 0x80000000 ); |
||
343 | return reinterpret_cast<__m128*>(&V)[0]; |
||
344 | #else // _XM_VMX128_INTRINSICS_ |
||
345 | #endif // _XM_VMX128_INTRINSICS_ |
||
346 | } |
||
347 | |||
348 | //------------------------------------------------------------------------------ |
||
349 | // Return a floating point value via an index. This is not a recommended |
||
350 | // function to use due to performance loss. |
||
351 | XMFINLINE FLOAT XMVectorGetByIndex(FXMVECTOR V,UINT i) |
||
352 | { |
||
353 | XMASSERT( i <= 3 ); |
||
354 | #if defined(_XM_NO_INTRINSICS_) |
||
355 | return V.vector4_f32[i]; |
||
356 | #elif defined(_XM_SSE_INTRINSICS_) |
||
357 | return V.m128_f32[i]; |
||
358 | #else // _XM_VMX128_INTRINSICS_ |
||
359 | #endif // _XM_VMX128_INTRINSICS_ |
||
360 | } |
||
361 | |||
362 | //------------------------------------------------------------------------------ |
||
363 | // Return the X component in an FPU register. |
||
364 | // This causes Load/Hit/Store on VMX targets |
||
365 | XMFINLINE FLOAT XMVectorGetX(FXMVECTOR V) |
||
366 | { |
||
367 | #if defined(_XM_NO_INTRINSICS_) |
||
368 | return V.vector4_f32[0]; |
||
369 | #elif defined(_XM_SSE_INTRINSICS_) |
||
370 | #if defined(_MSC_VER) && (_MSC_VER>=1500) |
||
371 | return _mm_cvtss_f32(V); |
||
372 | #else |
||
373 | return V.m128_f32[0]; |
||
374 | #endif |
||
375 | #else // _XM_VMX128_INTRINSICS_ |
||
376 | #endif // _XM_VMX128_INTRINSICS_ |
||
377 | } |
||
378 | |||
379 | // Return the Y component in an FPU register. |
||
380 | // This causes Load/Hit/Store on VMX targets |
||
381 | XMFINLINE FLOAT XMVectorGetY(FXMVECTOR V) |
||
382 | { |
||
383 | #if defined(_XM_NO_INTRINSICS_) |
||
384 | return V.vector4_f32[1]; |
||
385 | #elif defined(_XM_SSE_INTRINSICS_) |
||
386 | #if defined(_MSC_VER) && (_MSC_VER>=1500) |
||
387 | XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1)); |
||
388 | return _mm_cvtss_f32(vTemp); |
||
389 | #else |
||
390 | return V.m128_f32[1]; |
||
391 | #endif |
||
392 | #else // _XM_VMX128_INTRINSICS_ |
||
393 | #endif // _XM_VMX128_INTRINSICS_ |
||
394 | } |
||
395 | |||
396 | // Return the Z component in an FPU register. |
||
397 | // This causes Load/Hit/Store on VMX targets |
||
398 | XMFINLINE FLOAT XMVectorGetZ(FXMVECTOR V) |
||
399 | { |
||
400 | #if defined(_XM_NO_INTRINSICS_) |
||
401 | return V.vector4_f32[2]; |
||
402 | #elif defined(_XM_SSE_INTRINSICS_) |
||
403 | #if defined(_MSC_VER) && (_MSC_VER>=1500) |
||
404 | XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2)); |
||
405 | return _mm_cvtss_f32(vTemp); |
||
406 | #else |
||
407 | return V.m128_f32[2]; |
||
408 | #endif |
||
409 | #else // _XM_VMX128_INTRINSICS_ |
||
410 | #endif // _XM_VMX128_INTRINSICS_ |
||
411 | } |
||
412 | |||
413 | // Return the W component in an FPU register. |
||
414 | // This causes Load/Hit/Store on VMX targets |
||
415 | XMFINLINE FLOAT XMVectorGetW(FXMVECTOR V) |
||
416 | { |
||
417 | #if defined(_XM_NO_INTRINSICS_) |
||
418 | return V.vector4_f32[3]; |
||
419 | #elif defined(_XM_SSE_INTRINSICS_) |
||
420 | #if defined(_MSC_VER) && (_MSC_VER>=1500) |
||
421 | XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,3,3,3)); |
||
422 | return _mm_cvtss_f32(vTemp); |
||
423 | #else |
||
424 | return V.m128_f32[3]; |
||
425 | #endif |
||
426 | #else // _XM_VMX128_INTRINSICS_ |
||
427 | #endif // _XM_VMX128_INTRINSICS_ |
||
428 | } |
||
429 | |||
430 | //------------------------------------------------------------------------------ |
||
431 | |||
432 | // Store a component indexed by i into a 32 bit float location in memory. |
||
433 | // This causes Load/Hit/Store on VMX targets |
||
434 | XMFINLINE VOID XMVectorGetByIndexPtr(FLOAT *f,FXMVECTOR V,UINT i) |
||
435 | { |
||
436 | XMASSERT( f != 0 ); |
||
437 | XMASSERT( i < 4 ); |
||
438 | #if defined(_XM_NO_INTRINSICS_) |
||
439 | *f = V.vector4_f32[i]; |
||
440 | #elif defined(_XM_SSE_INTRINSICS_) |
||
441 | *f = V.m128_f32[i]; |
||
442 | #else // _XM_VMX128_INTRINSICS_ |
||
443 | #endif // _XM_VMX128_INTRINSICS_ |
||
444 | } |
||
445 | |||
446 | //------------------------------------------------------------------------------ |
||
447 | |||
448 | // Store the X component into a 32 bit float location in memory. |
||
449 | XMFINLINE VOID XMVectorGetXPtr(FLOAT *x,FXMVECTOR V) |
||
450 | { |
||
451 | XMASSERT( x != 0 ); |
||
452 | #if defined(_XM_NO_INTRINSICS_) |
||
453 | *x = V.vector4_f32[0]; |
||
454 | #elif defined(_XM_SSE_INTRINSICS_) |
||
455 | _mm_store_ss(x,V); |
||
456 | #else // _XM_VMX128_INTRINSICS_ |
||
457 | #endif // _XM_VMX128_INTRINSICS_ |
||
458 | } |
||
459 | |||
460 | // Store the Y component into a 32 bit float location in memory. |
||
461 | XMFINLINE VOID XMVectorGetYPtr(FLOAT *y,FXMVECTOR V) |
||
462 | { |
||
463 | XMASSERT( y != 0 ); |
||
464 | #if defined(_XM_NO_INTRINSICS_) |
||
465 | *y = V.vector4_f32[1]; |
||
466 | #elif defined(_XM_SSE_INTRINSICS_) |
||
467 | XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1)); |
||
468 | _mm_store_ss(y,vResult); |
||
469 | #else // _XM_VMX128_INTRINSICS_ |
||
470 | #endif // _XM_VMX128_INTRINSICS_ |
||
471 | } |
||
472 | |||
473 | // Store the Z component into a 32 bit float location in memory. |
||
474 | XMFINLINE VOID XMVectorGetZPtr(FLOAT *z,FXMVECTOR V) |
||
475 | { |
||
476 | XMASSERT( z != 0 ); |
||
477 | #if defined(_XM_NO_INTRINSICS_) |
||
478 | *z = V.vector4_f32[2]; |
||
479 | #elif defined(_XM_SSE_INTRINSICS_) |
||
480 | XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2)); |
||
481 | _mm_store_ss(z,vResult); |
||
482 | #else // _XM_VMX128_INTRINSICS_ |
||
483 | #endif // _XM_VMX128_INTRINSICS_ |
||
484 | } |
||
485 | |||
486 | // Store the W component into a 32 bit float location in memory. |
||
487 | XMFINLINE VOID XMVectorGetWPtr(FLOAT *w,FXMVECTOR V) |
||
488 | { |
||
489 | XMASSERT( w != 0 ); |
||
490 | #if defined(_XM_NO_INTRINSICS_) |
||
491 | *w = V.vector4_f32[3]; |
||
492 | #elif defined(_XM_SSE_INTRINSICS_) |
||
493 | XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,3,3,3)); |
||
494 | _mm_store_ss(w,vResult); |
||
495 | #else // _XM_VMX128_INTRINSICS_ |
||
496 | #endif // _XM_VMX128_INTRINSICS_ |
||
497 | } |
||
498 | |||
499 | //------------------------------------------------------------------------------ |
||
500 | |||
501 | // Return an integer value via an index. This is not a recommended |
||
502 | // function to use due to performance loss. |
||
503 | XMFINLINE UINT XMVectorGetIntByIndex(FXMVECTOR V, UINT i) |
||
504 | { |
||
505 | XMASSERT( i < 4 ); |
||
506 | #if defined(_XM_NO_INTRINSICS_) |
||
507 | return V.vector4_u32[i]; |
||
508 | #elif defined(_XM_SSE_INTRINSICS_) |
||
509 | #if defined(_MSC_VER) && (_MSC_VER<1400) |
||
510 | XMVECTORU32 tmp; |
||
511 | tmp.v = V; |
||
512 | return tmp.u[i]; |
||
513 | #else |
||
514 | return V.m128_u32[i]; |
||
515 | #endif |
||
516 | #else // _XM_VMX128_INTRINSICS_ |
||
517 | #endif // _XM_VMX128_INTRINSICS_ |
||
518 | } |
||
519 | |||
520 | //------------------------------------------------------------------------------ |
||
521 | |||
522 | // Return the X component in an integer register. |
||
523 | // This causes Load/Hit/Store on VMX targets |
||
524 | XMFINLINE UINT XMVectorGetIntX(FXMVECTOR V) |
||
525 | { |
||
526 | #if defined(_XM_NO_INTRINSICS_) |
||
527 | return V.vector4_u32[0]; |
||
528 | #elif defined(_XM_SSE_INTRINSICS_) |
||
529 | return static_cast<UINT>(_mm_cvtsi128_si32(reinterpret_cast<const __m128i *>(&V)[0])); |
||
530 | #else // _XM_VMX128_INTRINSICS_ |
||
531 | #endif // _XM_VMX128_INTRINSICS_ |
||
532 | } |
||
533 | |||
534 | // Return the Y component in an integer register. |
||
535 | // This causes Load/Hit/Store on VMX targets |
||
536 | XMFINLINE UINT XMVectorGetIntY(FXMVECTOR V) |
||
537 | { |
||
538 | #if defined(_XM_NO_INTRINSICS_) |
||
539 | return V.vector4_u32[1]; |
||
540 | #elif defined(_XM_SSE_INTRINSICS_) |
||
541 | __m128i vResulti = _mm_shuffle_epi32(reinterpret_cast<const __m128i *>(&V)[0],_MM_SHUFFLE(1,1,1,1)); |
||
542 | return static_cast<UINT>(_mm_cvtsi128_si32(vResulti)); |
||
543 | #else // _XM_VMX128_INTRINSICS_ |
||
544 | #endif // _XM_VMX128_INTRINSICS_ |
||
545 | } |
||
546 | |||
547 | // Return the Z component in an integer register. |
||
548 | // This causes Load/Hit/Store on VMX targets |
||
549 | XMFINLINE UINT XMVectorGetIntZ(FXMVECTOR V) |
||
550 | { |
||
551 | #if defined(_XM_NO_INTRINSICS_) |
||
552 | return V.vector4_u32[2]; |
||
553 | #elif defined(_XM_SSE_INTRINSICS_) |
||
554 | __m128i vResulti = _mm_shuffle_epi32(reinterpret_cast<const __m128i *>(&V)[0],_MM_SHUFFLE(2,2,2,2)); |
||
555 | return static_cast<UINT>(_mm_cvtsi128_si32(vResulti)); |
||
556 | #else // _XM_VMX128_INTRINSICS_ |
||
557 | #endif // _XM_VMX128_INTRINSICS_ |
||
558 | } |
||
559 | |||
560 | // Return the W component in an integer register. |
||
561 | // This causes Load/Hit/Store on VMX targets |
||
562 | XMFINLINE UINT XMVectorGetIntW(FXMVECTOR V) |
||
563 | { |
||
564 | #if defined(_XM_NO_INTRINSICS_) |
||
565 | return V.vector4_u32[3]; |
||
566 | #elif defined(_XM_SSE_INTRINSICS_) |
||
567 | __m128i vResulti = _mm_shuffle_epi32(reinterpret_cast<const __m128i *>(&V)[0],_MM_SHUFFLE(3,3,3,3)); |
||
568 | return static_cast<UINT>(_mm_cvtsi128_si32(vResulti)); |
||
569 | #else // _XM_VMX128_INTRINSICS_ |
||
570 | #endif // _XM_VMX128_INTRINSICS_ |
||
571 | } |
||
572 | |||
573 | //------------------------------------------------------------------------------ |
||
574 | |||
575 | // Store a component indexed by i into a 32 bit integer location in memory. |
||
576 | // This causes Load/Hit/Store on VMX targets |
||
577 | XMFINLINE VOID XMVectorGetIntByIndexPtr(UINT *x,FXMVECTOR V,UINT i) |
||
578 | { |
||
579 | XMASSERT( x != 0 ); |
||
580 | XMASSERT( i < 4 ); |
||
581 | #if defined(_XM_NO_INTRINSICS_) |
||
582 | *x = V.vector4_u32[i]; |
||
583 | #elif defined(_XM_SSE_INTRINSICS_) |
||
584 | #if defined(_MSC_VER) && (_MSC_VER<1400) |
||
585 | XMVECTORU32 tmp; |
||
586 | tmp.v = V; |
||
587 | *x = tmp.u[i]; |
||
588 | #else |
||
589 | *x = V.m128_u32[i]; |
||
590 | #endif |
||
591 | #else // _XM_VMX128_INTRINSICS_ |
||
592 | #endif // _XM_VMX128_INTRINSICS_ |
||
593 | } |
||
594 | |||
595 | //------------------------------------------------------------------------------ |
||
596 | |||
597 | // Store the X component into a 32 bit integer location in memory. |
||
598 | XMFINLINE VOID XMVectorGetIntXPtr(UINT *x,FXMVECTOR V) |
||
599 | { |
||
600 | XMASSERT( x != 0 ); |
||
601 | #if defined(_XM_NO_INTRINSICS_) |
||
602 | *x = V.vector4_u32[0]; |
||
603 | #elif defined(_XM_SSE_INTRINSICS_) |
||
604 | _mm_store_ss(reinterpret_cast<float *>(x),V); |
||
605 | #else // _XM_VMX128_INTRINSICS_ |
||
606 | #endif // _XM_VMX128_INTRINSICS_ |
||
607 | } |
||
608 | |||
609 | // Store the Y component into a 32 bit integer location in memory. |
||
610 | XMFINLINE VOID XMVectorGetIntYPtr(UINT *y,FXMVECTOR V) |
||
611 | { |
||
612 | XMASSERT( y != 0 ); |
||
613 | #if defined(_XM_NO_INTRINSICS_) |
||
614 | *y = V.vector4_u32[1]; |
||
615 | #elif defined(_XM_SSE_INTRINSICS_) |
||
616 | XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1)); |
||
617 | _mm_store_ss(reinterpret_cast<float *>(y),vResult); |
||
618 | #else // _XM_VMX128_INTRINSICS_ |
||
619 | #endif // _XM_VMX128_INTRINSICS_ |
||
620 | } |
||
621 | |||
622 | // Store the Z component into a 32 bit integer locaCantion in memory. |
||
623 | XMFINLINE VOID XMVectorGetIntZPtr(UINT *z,FXMVECTOR V) |
||
624 | { |
||
625 | XMASSERT( z != 0 ); |
||
626 | #if defined(_XM_NO_INTRINSICS_) |
||
627 | *z = V.vector4_u32[2]; |
||
628 | #elif defined(_XM_SSE_INTRINSICS_) |
||
629 | XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2)); |
||
630 | _mm_store_ss(reinterpret_cast<float *>(z),vResult); |
||
631 | #else // _XM_VMX128_INTRINSICS_ |
||
632 | #endif // _XM_VMX128_INTRINSICS_ |
||
633 | } |
||
634 | |||
635 | // Store the W component into a 32 bit integer location in memory. |
||
636 | XMFINLINE VOID XMVectorGetIntWPtr(UINT *w,FXMVECTOR V) |
||
637 | { |
||
638 | XMASSERT( w != 0 ); |
||
639 | #if defined(_XM_NO_INTRINSICS_) |
||
640 | *w = V.vector4_u32[3]; |
||
641 | #elif defined(_XM_SSE_INTRINSICS_) |
||
642 | XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,3,3,3)); |
||
643 | _mm_store_ss(reinterpret_cast<float *>(w),vResult); |
||
644 | #else // _XM_VMX128_INTRINSICS_ |
||
645 | #endif // _XM_VMX128_INTRINSICS_ |
||
646 | } |
||
647 | |||
648 | //------------------------------------------------------------------------------ |
||
649 | |||
650 | // Set a single indexed floating point component |
||
651 | // This causes Load/Hit/Store on VMX targets |
||
652 | XMFINLINE XMVECTOR XMVectorSetByIndex(FXMVECTOR V, FLOAT f,UINT i) |
||
653 | { |
||
654 | #if defined(_XM_NO_INTRINSICS_) |
||
655 | XMVECTOR U; |
||
656 | XMASSERT( i <= 3 ); |
||
657 | U = V; |
||
658 | U.vector4_f32[i] = f; |
||
659 | return U; |
||
660 | #elif defined(_XM_SSE_INTRINSICS_) |
||
661 | XMASSERT( i <= 3 ); |
||
662 | XMVECTOR U = V; |
||
663 | U.m128_f32[i] = f; |
||
664 | return U; |
||
665 | #else // _XM_VMX128_INTRINSICS_ |
||
666 | #endif // _XM_VMX128_INTRINSICS_ |
||
667 | } |
||
668 | |||
669 | //------------------------------------------------------------------------------ |
||
670 | |||
671 | // Sets the X component of a vector to a passed floating point value |
||
672 | // This causes Load/Hit/Store on VMX targets |
||
673 | XMFINLINE XMVECTOR XMVectorSetX(FXMVECTOR V, FLOAT x) |
||
674 | { |
||
675 | #if defined(_XM_NO_INTRINSICS_) |
||
676 | XMVECTOR U; |
||
677 | U.vector4_f32[0] = x; |
||
678 | U.vector4_f32[1] = V.vector4_f32[1]; |
||
679 | U.vector4_f32[2] = V.vector4_f32[2]; |
||
680 | U.vector4_f32[3] = V.vector4_f32[3]; |
||
681 | return U; |
||
682 | #elif defined(_XM_SSE_INTRINSICS_) |
||
683 | #if defined(_XM_ISVS2005_) |
||
684 | XMVECTOR vResult = V; |
||
685 | vResult.m128_f32[0] = x; |
||
686 | return vResult; |
||
687 | #else |
||
688 | XMVECTOR vResult = _mm_set_ss(x); |
||
689 | vResult = _mm_move_ss(V,vResult); |
||
690 | return vResult; |
||
691 | #endif // _XM_ISVS2005_ |
||
692 | #else // _XM_VMX128_INTRINSICS_ |
||
693 | #endif // _XM_VMX128_INTRINSICS_ |
||
694 | } |
||
695 | |||
696 | // Sets the Y component of a vector to a passed floating point value |
||
697 | // This causes Load/Hit/Store on VMX targets |
||
698 | XMFINLINE XMVECTOR XMVectorSetY(FXMVECTOR V, FLOAT y) |
||
699 | { |
||
700 | #if defined(_XM_NO_INTRINSICS_) |
||
701 | XMVECTOR U; |
||
702 | U.vector4_f32[0] = V.vector4_f32[0]; |
||
703 | U.vector4_f32[1] = y; |
||
704 | U.vector4_f32[2] = V.vector4_f32[2]; |
||
705 | U.vector4_f32[3] = V.vector4_f32[3]; |
||
706 | return U; |
||
707 | #elif defined(_XM_SSE_INTRINSICS_) |
||
708 | #if defined(_XM_ISVS2005_) |
||
709 | XMVECTOR vResult = V; |
||
710 | vResult.m128_f32[1] = y; |
||
711 | return vResult; |
||
712 | #else |
||
713 | // Swap y and x |
||
714 | XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,2,0,1)); |
||
715 | // Convert input to vector |
||
716 | XMVECTOR vTemp = _mm_set_ss(y); |
||
717 | // Replace the x component |
||
718 | vResult = _mm_move_ss(vResult,vTemp); |
||
719 | // Swap y and x again |
||
720 | vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,2,0,1)); |
||
721 | return vResult; |
||
722 | #endif // _XM_ISVS2005_ |
||
723 | #else // _XM_VMX128_INTRINSICS_ |
||
724 | #endif // _XM_VMX128_INTRINSICS_ |
||
725 | } |
||
726 | // Sets the Z component of a vector to a passed floating point value |
||
727 | // This causes Load/Hit/Store on VMX targets |
||
728 | XMFINLINE XMVECTOR XMVectorSetZ(FXMVECTOR V, FLOAT z) |
||
729 | { |
||
730 | #if defined(_XM_NO_INTRINSICS_) |
||
731 | XMVECTOR U; |
||
732 | U.vector4_f32[0] = V.vector4_f32[0]; |
||
733 | U.vector4_f32[1] = V.vector4_f32[1]; |
||
734 | U.vector4_f32[2] = z; |
||
735 | U.vector4_f32[3] = V.vector4_f32[3]; |
||
736 | return U; |
||
737 | #elif defined(_XM_SSE_INTRINSICS_) |
||
738 | #if defined(_XM_ISVS2005_) |
||
739 | XMVECTOR vResult = V; |
||
740 | vResult.m128_f32[2] = z; |
||
741 | return vResult; |
||
742 | #else |
||
743 | // Swap z and x |
||
744 | XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,0,1,2)); |
||
745 | // Convert input to vector |
||
746 | XMVECTOR vTemp = _mm_set_ss(z); |
||
747 | // Replace the x component |
||
748 | vResult = _mm_move_ss(vResult,vTemp); |
||
749 | // Swap z and x again |
||
750 | vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,0,1,2)); |
||
751 | return vResult; |
||
752 | #endif // _XM_ISVS2005_ |
||
753 | #else // _XM_VMX128_INTRINSICS_ |
||
754 | #endif // _XM_VMX128_INTRINSICS_ |
||
755 | } |
||
756 | |||
757 | // Sets the W component of a vector to a passed floating point value |
||
758 | // This causes Load/Hit/Store on VMX targets |
||
759 | XMFINLINE XMVECTOR XMVectorSetW(FXMVECTOR V, FLOAT w) |
||
760 | { |
||
761 | #if defined(_XM_NO_INTRINSICS_) |
||
762 | XMVECTOR U; |
||
763 | U.vector4_f32[0] = V.vector4_f32[0]; |
||
764 | U.vector4_f32[1] = V.vector4_f32[1]; |
||
765 | U.vector4_f32[2] = V.vector4_f32[2]; |
||
766 | U.vector4_f32[3] = w; |
||
767 | return U; |
||
768 | #elif defined(_XM_SSE_INTRINSICS_) |
||
769 | #if defined(_XM_ISVS2005_) |
||
770 | XMVECTOR vResult = V; |
||
771 | vResult.m128_f32[3] = w; |
||
772 | return vResult; |
||
773 | #else |
||
774 | // Swap w and x |
||
775 | XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,2,1,3)); |
||
776 | // Convert input to vector |
||
777 | XMVECTOR vTemp = _mm_set_ss(w); |
||
778 | // Replace the x component |
||
779 | vResult = _mm_move_ss(vResult,vTemp); |
||
780 | // Swap w and x again |
||
781 | vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,2,1,3)); |
||
782 | return vResult; |
||
783 | #endif // _XM_ISVS2005_ |
||
784 | #else // _XM_VMX128_INTRINSICS_ |
||
785 | #endif // _XM_VMX128_INTRINSICS_ |
||
786 | } |
||
787 | |||
788 | //------------------------------------------------------------------------------ |
||
789 | |||
790 | // Sets a component of a vector to a floating point value passed by pointer |
||
791 | // This causes Load/Hit/Store on VMX targets |
||
792 | XMFINLINE XMVECTOR XMVectorSetByIndexPtr(FXMVECTOR V,CONST FLOAT *f,UINT i) |
||
793 | { |
||
794 | #if defined(_XM_NO_INTRINSICS_) |
||
795 | XMVECTOR U; |
||
796 | XMASSERT( f != 0 ); |
||
797 | XMASSERT( i <= 3 ); |
||
798 | U = V; |
||
799 | U.vector4_f32[i] = *f; |
||
800 | return U; |
||
801 | #elif defined(_XM_SSE_INTRINSICS_) |
||
802 | XMASSERT( f != 0 ); |
||
803 | XMASSERT( i <= 3 ); |
||
804 | XMVECTOR U = V; |
||
805 | U.m128_f32[i] = *f; |
||
806 | return U; |
||
807 | #else // _XM_VMX128_INTRINSICS_ |
||
808 | #endif // _XM_VMX128_INTRINSICS_ |
||
809 | } |
||
810 | |||
811 | //------------------------------------------------------------------------------ |
||
812 | |||
813 | // Sets the X component of a vector to a floating point value passed by pointer |
||
814 | XMFINLINE XMVECTOR XMVectorSetXPtr(FXMVECTOR V,CONST FLOAT *x) |
||
815 | { |
||
816 | #if defined(_XM_NO_INTRINSICS_) |
||
817 | XMVECTOR U; |
||
818 | XMASSERT( x != 0 ); |
||
819 | U.vector4_f32[0] = *x; |
||
820 | U.vector4_f32[1] = V.vector4_f32[1]; |
||
821 | U.vector4_f32[2] = V.vector4_f32[2]; |
||
822 | U.vector4_f32[3] = V.vector4_f32[3]; |
||
823 | return U; |
||
824 | #elif defined(_XM_SSE_INTRINSICS_) |
||
825 | XMASSERT( x != 0 ); |
||
826 | XMVECTOR vResult = _mm_load_ss(x); |
||
827 | vResult = _mm_move_ss(V,vResult); |
||
828 | return vResult; |
||
829 | #else // _XM_VMX128_INTRINSICS_ |
||
830 | #endif // _XM_VMX128_INTRINSICS_ |
||
831 | } |
||
832 | |||
833 | // Sets the Y component of a vector to a floating point value passed by pointer |
||
834 | XMFINLINE XMVECTOR XMVectorSetYPtr(FXMVECTOR V,CONST FLOAT *y) |
||
835 | { |
||
836 | #if defined(_XM_NO_INTRINSICS_) |
||
837 | XMVECTOR U; |
||
838 | XMASSERT( y != 0 ); |
||
839 | U.vector4_f32[0] = V.vector4_f32[0]; |
||
840 | U.vector4_f32[1] = *y; |
||
841 | U.vector4_f32[2] = V.vector4_f32[2]; |
||
842 | U.vector4_f32[3] = V.vector4_f32[3]; |
||
843 | return U; |
||
844 | #elif defined(_XM_SSE_INTRINSICS_) |
||
845 | XMASSERT( y != 0 ); |
||
846 | // Swap y and x |
||
847 | XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,2,0,1)); |
||
848 | // Convert input to vector |
||
849 | XMVECTOR vTemp = _mm_load_ss(y); |
||
850 | // Replace the x component |
||
851 | vResult = _mm_move_ss(vResult,vTemp); |
||
852 | // Swap y and x again |
||
853 | vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,2,0,1)); |
||
854 | return vResult; |
||
855 | #else // _XM_VMX128_INTRINSICS_ |
||
856 | #endif // _XM_VMX128_INTRINSICS_ |
||
857 | } |
||
858 | |||
859 | // Sets the Z component of a vector to a floating point value passed by pointer |
||
860 | XMFINLINE XMVECTOR XMVectorSetZPtr(FXMVECTOR V,CONST FLOAT *z) |
||
861 | { |
||
862 | #if defined(_XM_NO_INTRINSICS_) |
||
863 | XMVECTOR U; |
||
864 | XMASSERT( z != 0 ); |
||
865 | U.vector4_f32[0] = V.vector4_f32[0]; |
||
866 | U.vector4_f32[1] = V.vector4_f32[1]; |
||
867 | U.vector4_f32[2] = *z; |
||
868 | U.vector4_f32[3] = V.vector4_f32[3]; |
||
869 | return U; |
||
870 | #elif defined(_XM_SSE_INTRINSICS_) |
||
871 | XMASSERT( z != 0 ); |
||
872 | // Swap z and x |
||
873 | XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,0,1,2)); |
||
874 | // Convert input to vector |
||
875 | XMVECTOR vTemp = _mm_load_ss(z); |
||
876 | // Replace the x component |
||
877 | vResult = _mm_move_ss(vResult,vTemp); |
||
878 | // Swap z and x again |
||
879 | vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,0,1,2)); |
||
880 | return vResult; |
||
881 | #else // _XM_VMX128_INTRINSICS_ |
||
882 | #endif // _XM_VMX128_INTRINSICS_ |
||
883 | } |
||
884 | |||
885 | // Sets the W component of a vector to a floating point value passed by pointer |
||
886 | XMFINLINE XMVECTOR XMVectorSetWPtr(FXMVECTOR V,CONST FLOAT *w) |
||
887 | { |
||
888 | #if defined(_XM_NO_INTRINSICS_) |
||
889 | XMVECTOR U; |
||
890 | XMASSERT( w != 0 ); |
||
891 | U.vector4_f32[0] = V.vector4_f32[0]; |
||
892 | U.vector4_f32[1] = V.vector4_f32[1]; |
||
893 | U.vector4_f32[2] = V.vector4_f32[2]; |
||
894 | U.vector4_f32[3] = *w; |
||
895 | return U; |
||
896 | #elif defined(_XM_SSE_INTRINSICS_) |
||
897 | XMASSERT( w != 0 ); |
||
898 | // Swap w and x |
||
899 | XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,2,1,3)); |
||
900 | // Convert input to vector |
||
901 | XMVECTOR vTemp = _mm_load_ss(w); |
||
902 | // Replace the x component |
||
903 | vResult = _mm_move_ss(vResult,vTemp); |
||
904 | // Swap w and x again |
||
905 | vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,2,1,3)); |
||
906 | return vResult; |
||
907 | #else // _XM_VMX128_INTRINSICS_ |
||
908 | #endif // _XM_VMX128_INTRINSICS_ |
||
909 | } |
||
910 | |||
911 | //------------------------------------------------------------------------------ |
||
912 | |||
913 | // Sets a component of a vector to an integer passed by value |
||
914 | // This causes Load/Hit/Store on VMX targets |
||
915 | XMFINLINE XMVECTOR XMVectorSetIntByIndex(FXMVECTOR V, UINT x, UINT i) |
||
916 | { |
||
917 | #if defined(_XM_NO_INTRINSICS_) |
||
918 | XMVECTOR U; |
||
919 | XMASSERT( i <= 3 ); |
||
920 | U = V; |
||
921 | U.vector4_u32[i] = x; |
||
922 | return U; |
||
923 | #elif defined(_XM_SSE_INTRINSICS_) |
||
924 | XMASSERT( i <= 3 ); |
||
925 | XMVECTORU32 tmp; |
||
926 | tmp.v = V; |
||
927 | tmp.u[i] = x; |
||
928 | return tmp; |
||
929 | #else // _XM_VMX128_INTRINSICS_ |
||
930 | #endif // _XM_VMX128_INTRINSICS_ |
||
931 | } |
||
932 | |||
933 | //------------------------------------------------------------------------------ |
||
934 | |||
935 | // Sets the X component of a vector to an integer passed by value |
||
936 | // This causes Load/Hit/Store on VMX targets |
||
937 | XMFINLINE XMVECTOR XMVectorSetIntX(FXMVECTOR V, UINT x) |
||
938 | { |
||
939 | #if defined(_XM_NO_INTRINSICS_) |
||
940 | XMVECTOR U; |
||
941 | U.vector4_u32[0] = x; |
||
942 | U.vector4_u32[1] = V.vector4_u32[1]; |
||
943 | U.vector4_u32[2] = V.vector4_u32[2]; |
||
944 | U.vector4_u32[3] = V.vector4_u32[3]; |
||
945 | return U; |
||
946 | #elif defined(_XM_SSE_INTRINSICS_) |
||
947 | #if defined(_XM_ISVS2005_) |
||
948 | XMVECTOR vResult = V; |
||
949 | vResult.m128_i32[0] = x; |
||
950 | return vResult; |
||
951 | #else |
||
952 | __m128i vTemp = _mm_cvtsi32_si128(x); |
||
953 | XMVECTOR vResult = _mm_move_ss(V,reinterpret_cast<const __m128 *>(&vTemp)[0]); |
||
954 | return vResult; |
||
955 | #endif // _XM_ISVS2005_ |
||
956 | #else // _XM_VMX128_INTRINSICS_ |
||
957 | #endif // _XM_VMX128_INTRINSICS_ |
||
958 | } |
||
959 | |||
960 | // Sets the Y component of a vector to an integer passed by value |
||
961 | // This causes Load/Hit/Store on VMX targets |
||
962 | XMFINLINE XMVECTOR XMVectorSetIntY(FXMVECTOR V, UINT y) |
||
963 | { |
||
964 | #if defined(_XM_NO_INTRINSICS_) |
||
965 | XMVECTOR U; |
||
966 | U.vector4_u32[0] = V.vector4_u32[0]; |
||
967 | U.vector4_u32[1] = y; |
||
968 | U.vector4_u32[2] = V.vector4_u32[2]; |
||
969 | U.vector4_u32[3] = V.vector4_u32[3]; |
||
970 | return U; |
||
971 | #elif defined(_XM_SSE_INTRINSICS_) |
||
972 | #if defined(_XM_ISVS2005_) |
||
973 | XMVECTOR vResult = V; |
||
974 | vResult.m128_i32[1] = y; |
||
975 | return vResult; |
||
976 | #else // Swap y and x |
||
977 | XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,2,0,1)); |
||
978 | // Convert input to vector |
||
979 | __m128i vTemp = _mm_cvtsi32_si128(y); |
||
980 | // Replace the x component |
||
981 | vResult = _mm_move_ss(vResult,reinterpret_cast<const __m128 *>(&vTemp)[0]); |
||
982 | // Swap y and x again |
||
983 | vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,2,0,1)); |
||
984 | return vResult; |
||
985 | #endif // _XM_ISVS2005_ |
||
986 | #else // _XM_VMX128_INTRINSICS_ |
||
987 | #endif // _XM_VMX128_INTRINSICS_ |
||
988 | } |
||
989 | |||
990 | // Sets the Z component of a vector to an integer passed by value |
||
991 | // This causes Load/Hit/Store on VMX targets |
||
992 | XMFINLINE XMVECTOR XMVectorSetIntZ(FXMVECTOR V, UINT z) |
||
993 | { |
||
994 | #if defined(_XM_NO_INTRINSICS_) |
||
995 | XMVECTOR U; |
||
996 | U.vector4_u32[0] = V.vector4_u32[0]; |
||
997 | U.vector4_u32[1] = V.vector4_u32[1]; |
||
998 | U.vector4_u32[2] = z; |
||
999 | U.vector4_u32[3] = V.vector4_u32[3]; |
||
1000 | return U; |
||
1001 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1002 | #if defined(_XM_ISVS2005_) |
||
1003 | XMVECTOR vResult = V; |
||
1004 | vResult.m128_i32[2] = z; |
||
1005 | return vResult; |
||
1006 | #else |
||
1007 | // Swap z and x |
||
1008 | XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,0,1,2)); |
||
1009 | // Convert input to vector |
||
1010 | __m128i vTemp = _mm_cvtsi32_si128(z); |
||
1011 | // Replace the x component |
||
1012 | vResult = _mm_move_ss(vResult,reinterpret_cast<const __m128 *>(&vTemp)[0]); |
||
1013 | // Swap z and x again |
||
1014 | vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,0,1,2)); |
||
1015 | return vResult; |
||
1016 | #endif // _XM_ISVS2005_ |
||
1017 | #else // _XM_VMX128_INTRINSICS_ |
||
1018 | #endif // _XM_VMX128_INTRINSICS_ |
||
1019 | } |
||
1020 | |||
1021 | // Sets the W component of a vector to an integer passed by value |
||
1022 | // This causes Load/Hit/Store on VMX targets |
||
1023 | XMFINLINE XMVECTOR XMVectorSetIntW(FXMVECTOR V, UINT w) |
||
1024 | { |
||
1025 | #if defined(_XM_NO_INTRINSICS_) |
||
1026 | XMVECTOR U; |
||
1027 | U.vector4_u32[0] = V.vector4_u32[0]; |
||
1028 | U.vector4_u32[1] = V.vector4_u32[1]; |
||
1029 | U.vector4_u32[2] = V.vector4_u32[2]; |
||
1030 | U.vector4_u32[3] = w; |
||
1031 | return U; |
||
1032 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1033 | #if defined(_XM_ISVS2005_) |
||
1034 | XMVECTOR vResult = V; |
||
1035 | vResult.m128_i32[3] = w; |
||
1036 | return vResult; |
||
1037 | #else |
||
1038 | // Swap w and x |
||
1039 | XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,2,1,3)); |
||
1040 | // Convert input to vector |
||
1041 | __m128i vTemp = _mm_cvtsi32_si128(w); |
||
1042 | // Replace the x component |
||
1043 | vResult = _mm_move_ss(vResult,reinterpret_cast<const __m128 *>(&vTemp)[0]); |
||
1044 | // Swap w and x again |
||
1045 | vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,2,1,3)); |
||
1046 | return vResult; |
||
1047 | #endif // _XM_ISVS2005_ |
||
1048 | #else // _XM_VMX128_INTRINSICS_ |
||
1049 | #endif // _XM_VMX128_INTRINSICS_ |
||
1050 | } |
||
1051 | |||
1052 | //------------------------------------------------------------------------------ |
||
1053 | |||
1054 | // Sets a component of a vector to an integer value passed by pointer |
||
1055 | // This causes Load/Hit/Store on VMX targets |
||
1056 | XMFINLINE XMVECTOR XMVectorSetIntByIndexPtr(FXMVECTOR V, CONST UINT *x,UINT i) |
||
1057 | { |
||
1058 | #if defined(_XM_NO_INTRINSICS_) |
||
1059 | XMVECTOR U; |
||
1060 | XMASSERT( x != 0 ); |
||
1061 | XMASSERT( i <= 3 ); |
||
1062 | U = V; |
||
1063 | U.vector4_u32[i] = *x; |
||
1064 | return U; |
||
1065 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1066 | XMASSERT( x != 0 ); |
||
1067 | XMASSERT( i <= 3 ); |
||
1068 | XMVECTORU32 tmp; |
||
1069 | tmp.v = V; |
||
1070 | tmp.u[i] = *x; |
||
1071 | return tmp; |
||
1072 | #else // _XM_VMX128_INTRINSICS_ |
||
1073 | #endif // _XM_VMX128_INTRINSICS_ |
||
1074 | } |
||
1075 | |||
1076 | //------------------------------------------------------------------------------ |
||
1077 | |||
1078 | // Sets the X component of a vector to an integer value passed by pointer |
||
1079 | XMFINLINE XMVECTOR XMVectorSetIntXPtr(FXMVECTOR V,CONST UINT *x) |
||
1080 | { |
||
1081 | #if defined(_XM_NO_INTRINSICS_) |
||
1082 | XMVECTOR U; |
||
1083 | XMASSERT( x != 0 ); |
||
1084 | U.vector4_u32[0] = *x; |
||
1085 | U.vector4_u32[1] = V.vector4_u32[1]; |
||
1086 | U.vector4_u32[2] = V.vector4_u32[2]; |
||
1087 | U.vector4_u32[3] = V.vector4_u32[3]; |
||
1088 | return U; |
||
1089 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1090 | XMASSERT( x != 0 ); |
||
1091 | XMVECTOR vTemp = _mm_load_ss(reinterpret_cast<const float *>(x)); |
||
1092 | XMVECTOR vResult = _mm_move_ss(V,vTemp); |
||
1093 | return vResult; |
||
1094 | #else // _XM_VMX128_INTRINSICS_ |
||
1095 | #endif // _XM_VMX128_INTRINSICS_ |
||
1096 | } |
||
1097 | |||
1098 | // Sets the Y component of a vector to an integer value passed by pointer |
||
1099 | XMFINLINE XMVECTOR XMVectorSetIntYPtr(FXMVECTOR V,CONST UINT *y) |
||
1100 | { |
||
1101 | #if defined(_XM_NO_INTRINSICS_) |
||
1102 | XMVECTOR U; |
||
1103 | XMASSERT( y != 0 ); |
||
1104 | U.vector4_u32[0] = V.vector4_u32[0]; |
||
1105 | U.vector4_u32[1] = *y; |
||
1106 | U.vector4_u32[2] = V.vector4_u32[2]; |
||
1107 | U.vector4_u32[3] = V.vector4_u32[3]; |
||
1108 | return U; |
||
1109 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1110 | XMASSERT( y != 0 ); |
||
1111 | // Swap y and x |
||
1112 | XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,2,0,1)); |
||
1113 | // Convert input to vector |
||
1114 | XMVECTOR vTemp = _mm_load_ss(reinterpret_cast<const float *>(y)); |
||
1115 | // Replace the x component |
||
1116 | vResult = _mm_move_ss(vResult,vTemp); |
||
1117 | // Swap y and x again |
||
1118 | vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,2,0,1)); |
||
1119 | return vResult; |
||
1120 | #else // _XM_VMX128_INTRINSICS_ |
||
1121 | #endif // _XM_VMX128_INTRINSICS_ |
||
1122 | } |
||
1123 | |||
1124 | // Sets the Z component of a vector to an integer value passed by pointer |
||
1125 | XMFINLINE XMVECTOR XMVectorSetIntZPtr(FXMVECTOR V,CONST UINT *z) |
||
1126 | { |
||
1127 | #if defined(_XM_NO_INTRINSICS_) |
||
1128 | XMVECTOR U; |
||
1129 | XMASSERT( z != 0 ); |
||
1130 | U.vector4_u32[0] = V.vector4_u32[0]; |
||
1131 | U.vector4_u32[1] = V.vector4_u32[1]; |
||
1132 | U.vector4_u32[2] = *z; |
||
1133 | U.vector4_u32[3] = V.vector4_u32[3]; |
||
1134 | return U; |
||
1135 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1136 | XMASSERT( z != 0 ); |
||
1137 | // Swap z and x |
||
1138 | XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,0,1,2)); |
||
1139 | // Convert input to vector |
||
1140 | XMVECTOR vTemp = _mm_load_ss(reinterpret_cast<const float *>(z)); |
||
1141 | // Replace the x component |
||
1142 | vResult = _mm_move_ss(vResult,vTemp); |
||
1143 | // Swap z and x again |
||
1144 | vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,0,1,2)); |
||
1145 | return vResult; |
||
1146 | #else // _XM_VMX128_INTRINSICS_ |
||
1147 | #endif // _XM_VMX128_INTRINSICS_ |
||
1148 | } |
||
1149 | |||
1150 | // Sets the W component of a vector to an integer value passed by pointer |
||
1151 | XMFINLINE XMVECTOR XMVectorSetIntWPtr(FXMVECTOR V,CONST UINT *w) |
||
1152 | { |
||
1153 | #if defined(_XM_NO_INTRINSICS_) |
||
1154 | XMVECTOR U; |
||
1155 | XMASSERT( w != 0 ); |
||
1156 | U.vector4_u32[0] = V.vector4_u32[0]; |
||
1157 | U.vector4_u32[1] = V.vector4_u32[1]; |
||
1158 | U.vector4_u32[2] = V.vector4_u32[2]; |
||
1159 | U.vector4_u32[3] = *w; |
||
1160 | return U; |
||
1161 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1162 | XMASSERT( w != 0 ); |
||
1163 | // Swap w and x |
||
1164 | XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,2,1,3)); |
||
1165 | // Convert input to vector |
||
1166 | XMVECTOR vTemp = _mm_load_ss(reinterpret_cast<const float *>(w)); |
||
1167 | // Replace the x component |
||
1168 | vResult = _mm_move_ss(vResult,vTemp); |
||
1169 | // Swap w and x again |
||
1170 | vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,2,1,3)); |
||
1171 | return vResult; |
||
1172 | #else // _XM_VMX128_INTRINSICS_ |
||
1173 | #endif // _XM_VMX128_INTRINSICS_ |
||
1174 | } |
||
1175 | |||
1176 | //------------------------------------------------------------------------------ |
||
1177 | // Define a control vector to be used in XMVectorPermute |
||
1178 | // operations. Visualize the two vectors V1 and V2 given |
||
1179 | // in a permute as arranged back to back in a linear fashion, |
||
1180 | // such that they form an array of 8 floating point values. |
||
1181 | // The four integers specified in XMVectorPermuteControl |
||
1182 | // will serve as indices into the array to select components |
||
1183 | // from the two vectors. ElementIndex0 is used to select |
||
1184 | // an element from the vectors to be placed in the first |
||
1185 | // component of the resulting vector, ElementIndex1 is used |
||
1186 | // to select an element for the second component, etc. |
||
1187 | |||
1188 | XMFINLINE XMVECTOR XMVectorPermuteControl |
||
1189 | ( |
||
1190 | UINT ElementIndex0, |
||
1191 | UINT ElementIndex1, |
||
1192 | UINT ElementIndex2, |
||
1193 | UINT ElementIndex3 |
||
1194 | ) |
||
1195 | { |
||
1196 | #if defined(_XM_SSE_INTRINSICS_) || defined(_XM_NO_INTRINSICS_) |
||
1197 | XMVECTORU32 vControl; |
||
1198 | static CONST UINT ControlElement[] = { |
||
1199 | XM_PERMUTE_0X, |
||
1200 | XM_PERMUTE_0Y, |
||
1201 | XM_PERMUTE_0Z, |
||
1202 | XM_PERMUTE_0W, |
||
1203 | XM_PERMUTE_1X, |
||
1204 | XM_PERMUTE_1Y, |
||
1205 | XM_PERMUTE_1Z, |
||
1206 | XM_PERMUTE_1W |
||
1207 | }; |
||
1208 | XMASSERT(ElementIndex0 < 8); |
||
1209 | XMASSERT(ElementIndex1 < 8); |
||
1210 | XMASSERT(ElementIndex2 < 8); |
||
1211 | XMASSERT(ElementIndex3 < 8); |
||
1212 | |||
1213 | vControl.u[0] = ControlElement[ElementIndex0]; |
||
1214 | vControl.u[1] = ControlElement[ElementIndex1]; |
||
1215 | vControl.u[2] = ControlElement[ElementIndex2]; |
||
1216 | vControl.u[3] = ControlElement[ElementIndex3]; |
||
1217 | return vControl.v; |
||
1218 | #else |
||
1219 | #endif |
||
1220 | } |
||
1221 | |||
1222 | //------------------------------------------------------------------------------ |
||
1223 | |||
1224 | // Using a control vector made up of 16 bytes from 0-31, remap V1 and V2's byte |
||
1225 | // entries into a single 16 byte vector and return it. Index 0-15 = V1, |
||
1226 | // 16-31 = V2 |
||
1227 | XMFINLINE XMVECTOR XMVectorPermute |
||
1228 | ( |
||
1229 | FXMVECTOR V1, |
||
1230 | FXMVECTOR V2, |
||
1231 | FXMVECTOR Control |
||
1232 | ) |
||
1233 | { |
||
1234 | #if defined(_XM_NO_INTRINSICS_) |
||
1235 | const BYTE *aByte[2]; |
||
1236 | XMVECTOR Result; |
||
1237 | UINT i, uIndex, VectorIndex; |
||
1238 | const BYTE *pControl; |
||
1239 | BYTE *pWork; |
||
1240 | |||
1241 | // Indices must be in range from 0 to 31 |
||
1242 | XMASSERT((Control.vector4_u32[0] & 0xE0E0E0E0) == 0); |
||
1243 | XMASSERT((Control.vector4_u32[1] & 0xE0E0E0E0) == 0); |
||
1244 | XMASSERT((Control.vector4_u32[2] & 0xE0E0E0E0) == 0); |
||
1245 | XMASSERT((Control.vector4_u32[3] & 0xE0E0E0E0) == 0); |
||
1246 | |||
1247 | // 0-15 = V1, 16-31 = V2 |
||
1248 | aByte[0] = (const BYTE*)(&V1); |
||
1249 | aByte[1] = (const BYTE*)(&V2); |
||
1250 | i = 16; |
||
1251 | pControl = (const BYTE *)(&Control); |
||
1252 | pWork = (BYTE *)(&Result); |
||
1253 | do { |
||
1254 | // Get the byte to map from |
||
1255 | uIndex = pControl[0]; |
||
1256 | ++pControl; |
||
1257 | VectorIndex = (uIndex>>4)&1; |
||
1258 | uIndex &= 0x0F; |
||
1259 | #if defined(_XM_X86_) || defined(_XM_X64_) |
||
1260 | uIndex ^= 3; // Swap byte ordering on little endian machines |
||
1261 | #endif |
||
1262 | pWork[0] = aByte[VectorIndex][uIndex]; |
||
1263 | ++pWork; |
||
1264 | } while (--i); |
||
1265 | return Result; |
||
1266 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1267 | #if defined(_PREFAST_) || defined(XMDEBUG) |
||
1268 | // Indices must be in range from 0 to 31 |
||
1269 | static const XMVECTORI32 PremuteTest = {0xE0E0E0E0,0xE0E0E0E0,0xE0E0E0E0,0xE0E0E0E0}; |
||
1270 | XMVECTOR vAssert = _mm_and_ps(Control,PremuteTest); |
||
1271 | __m128i vAsserti = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&vAssert)[0],g_XMZero); |
||
1272 | XMASSERT(_mm_movemask_ps(*reinterpret_cast<const __m128 *>(&vAsserti)) == 0xf); |
||
1273 | #endif |
||
1274 | // Store the vectors onto local memory on the stack |
||
1275 | XMVECTOR Array[2]; |
||
1276 | Array[0] = V1; |
||
1277 | Array[1] = V2; |
||
1278 | // Output vector, on the stack |
||
1279 | XMVECTORU8 vResult; |
||
1280 | // Get pointer to the two vectors on the stack |
||
1281 | const BYTE *pInput = reinterpret_cast<const BYTE *>(Array); |
||
1282 | // Store the Control vector on the stack to access the bytes |
||
1283 | // don't use Control, it can cause a register variable to spill on the stack. |
||
1284 | XMVECTORU8 vControl; |
||
1285 | vControl.v = Control; // Write to memory |
||
1286 | UINT i = 0; |
||
1287 | do { |
||
1288 | UINT ComponentIndex = vControl.u[i] & 0x1FU; |
||
1289 | ComponentIndex ^= 3; // Swap byte ordering |
||
1290 | vResult.u[i] = pInput[ComponentIndex]; |
||
1291 | } while (++i<16); |
||
1292 | return vResult; |
||
1293 | #else // _XM_SSE_INTRINSICS_ |
||
1294 | #endif // _XM_VMX128_INTRINSICS_ |
||
1295 | } |
||
1296 | |||
1297 | //------------------------------------------------------------------------------ |
||
1298 | // Define a control vector to be used in XMVectorSelect |
||
1299 | // operations. The four integers specified in XMVectorSelectControl |
||
1300 | // serve as indices to select between components in two vectors. |
||
1301 | // The first index controls selection for the first component of |
||
1302 | // the vectors involved in a select operation, the second index |
||
1303 | // controls selection for the second component etc. A value of |
||
1304 | // zero for an index causes the corresponding component from the first |
||
1305 | // vector to be selected whereas a one causes the component from the |
||
1306 | // second vector to be selected instead. |
||
1307 | |||
1308 | XMFINLINE XMVECTOR XMVectorSelectControl |
||
1309 | ( |
||
1310 | UINT VectorIndex0, |
||
1311 | UINT VectorIndex1, |
||
1312 | UINT VectorIndex2, |
||
1313 | UINT VectorIndex3 |
||
1314 | ) |
||
1315 | { |
||
1316 | #if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) |
||
1317 | // x=Index0,y=Index1,z=Index2,w=Index3 |
||
1318 | __m128i vTemp = _mm_set_epi32(VectorIndex3,VectorIndex2,VectorIndex1,VectorIndex0); |
||
1319 | // Any non-zero entries become 0xFFFFFFFF else 0 |
||
1320 | vTemp = _mm_cmpgt_epi32(vTemp,g_XMZero); |
||
1321 | return reinterpret_cast<__m128 *>(&vTemp)[0]; |
||
1322 | #else |
||
1323 | XMVECTOR ControlVector; |
||
1324 | CONST UINT ControlElement[] = |
||
1325 | { |
||
1326 | XM_SELECT_0, |
||
1327 | XM_SELECT_1 |
||
1328 | }; |
||
1329 | |||
1330 | XMASSERT(VectorIndex0 < 2); |
||
1331 | XMASSERT(VectorIndex1 < 2); |
||
1332 | XMASSERT(VectorIndex2 < 2); |
||
1333 | XMASSERT(VectorIndex3 < 2); |
||
1334 | |||
1335 | ControlVector.vector4_u32[0] = ControlElement[VectorIndex0]; |
||
1336 | ControlVector.vector4_u32[1] = ControlElement[VectorIndex1]; |
||
1337 | ControlVector.vector4_u32[2] = ControlElement[VectorIndex2]; |
||
1338 | ControlVector.vector4_u32[3] = ControlElement[VectorIndex3]; |
||
1339 | |||
1340 | return ControlVector; |
||
1341 | |||
1342 | #endif |
||
1343 | } |
||
1344 | |||
1345 | //------------------------------------------------------------------------------ |
||
1346 | |||
1347 | XMFINLINE XMVECTOR XMVectorSelect |
||
1348 | ( |
||
1349 | FXMVECTOR V1, |
||
1350 | FXMVECTOR V2, |
||
1351 | FXMVECTOR Control |
||
1352 | ) |
||
1353 | { |
||
1354 | #if defined(_XM_NO_INTRINSICS_) |
||
1355 | |||
1356 | XMVECTOR Result; |
||
1357 | |||
1358 | Result.vector4_u32[0] = (V1.vector4_u32[0] & ~Control.vector4_u32[0]) | (V2.vector4_u32[0] & Control.vector4_u32[0]); |
||
1359 | Result.vector4_u32[1] = (V1.vector4_u32[1] & ~Control.vector4_u32[1]) | (V2.vector4_u32[1] & Control.vector4_u32[1]); |
||
1360 | Result.vector4_u32[2] = (V1.vector4_u32[2] & ~Control.vector4_u32[2]) | (V2.vector4_u32[2] & Control.vector4_u32[2]); |
||
1361 | Result.vector4_u32[3] = (V1.vector4_u32[3] & ~Control.vector4_u32[3]) | (V2.vector4_u32[3] & Control.vector4_u32[3]); |
||
1362 | |||
1363 | return Result; |
||
1364 | |||
1365 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1366 | XMVECTOR vTemp1 = _mm_andnot_ps(Control,V1); |
||
1367 | XMVECTOR vTemp2 = _mm_and_ps(V2,Control); |
||
1368 | return _mm_or_ps(vTemp1,vTemp2); |
||
1369 | #else // _XM_VMX128_INTRINSICS_ |
||
1370 | #endif // _XM_VMX128_INTRINSICS_ |
||
1371 | } |
||
1372 | |||
1373 | //------------------------------------------------------------------------------ |
||
1374 | |||
1375 | XMFINLINE XMVECTOR XMVectorMergeXY |
||
1376 | ( |
||
1377 | FXMVECTOR V1, |
||
1378 | FXMVECTOR V2 |
||
1379 | ) |
||
1380 | { |
||
1381 | #if defined(_XM_NO_INTRINSICS_) |
||
1382 | |||
1383 | XMVECTOR Result; |
||
1384 | |||
1385 | Result.vector4_u32[0] = V1.vector4_u32[0]; |
||
1386 | Result.vector4_u32[1] = V2.vector4_u32[0]; |
||
1387 | Result.vector4_u32[2] = V1.vector4_u32[1]; |
||
1388 | Result.vector4_u32[3] = V2.vector4_u32[1]; |
||
1389 | |||
1390 | return Result; |
||
1391 | |||
1392 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1393 | return _mm_unpacklo_ps( V1, V2 ); |
||
1394 | #else // _XM_VMX128_INTRINSICS_ |
||
1395 | #endif // _XM_VMX128_INTRINSICS_ |
||
1396 | } |
||
1397 | |||
1398 | //------------------------------------------------------------------------------ |
||
1399 | |||
1400 | XMFINLINE XMVECTOR XMVectorMergeZW |
||
1401 | ( |
||
1402 | FXMVECTOR V1, |
||
1403 | FXMVECTOR V2 |
||
1404 | ) |
||
1405 | { |
||
1406 | #if defined(_XM_NO_INTRINSICS_) |
||
1407 | |||
1408 | XMVECTOR Result; |
||
1409 | |||
1410 | Result.vector4_u32[0] = V1.vector4_u32[2]; |
||
1411 | Result.vector4_u32[1] = V2.vector4_u32[2]; |
||
1412 | Result.vector4_u32[2] = V1.vector4_u32[3]; |
||
1413 | Result.vector4_u32[3] = V2.vector4_u32[3]; |
||
1414 | |||
1415 | return Result; |
||
1416 | |||
1417 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1418 | return _mm_unpackhi_ps( V1, V2 ); |
||
1419 | #else // _XM_VMX128_INTRINSICS_ |
||
1420 | #endif // _XM_VMX128_INTRINSICS_ |
||
1421 | } |
||
1422 | |||
1423 | //------------------------------------------------------------------------------ |
||
1424 | // Comparison operations |
||
1425 | //------------------------------------------------------------------------------ |
||
1426 | |||
1427 | //------------------------------------------------------------------------------ |
||
1428 | |||
1429 | XMFINLINE XMVECTOR XMVectorEqual |
||
1430 | ( |
||
1431 | FXMVECTOR V1, |
||
1432 | FXMVECTOR V2 |
||
1433 | ) |
||
1434 | { |
||
1435 | #if defined(_XM_NO_INTRINSICS_) |
||
1436 | |||
1437 | XMVECTOR Control; |
||
1438 | |||
1439 | Control.vector4_u32[0] = (V1.vector4_f32[0] == V2.vector4_f32[0]) ? 0xFFFFFFFF : 0; |
||
1440 | Control.vector4_u32[1] = (V1.vector4_f32[1] == V2.vector4_f32[1]) ? 0xFFFFFFFF : 0; |
||
1441 | Control.vector4_u32[2] = (V1.vector4_f32[2] == V2.vector4_f32[2]) ? 0xFFFFFFFF : 0; |
||
1442 | Control.vector4_u32[3] = (V1.vector4_f32[3] == V2.vector4_f32[3]) ? 0xFFFFFFFF : 0; |
||
1443 | |||
1444 | return Control; |
||
1445 | |||
1446 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1447 | return _mm_cmpeq_ps( V1, V2 ); |
||
1448 | #else // _XM_VMX128_INTRINSICS_ |
||
1449 | #endif // _XM_VMX128_INTRINSICS_ |
||
1450 | } |
||
1451 | |||
1452 | //------------------------------------------------------------------------------ |
||
1453 | |||
1454 | XMFINLINE XMVECTOR XMVectorEqualR |
||
1455 | ( |
||
1456 | UINT* pCR, |
||
1457 | FXMVECTOR V1, |
||
1458 | FXMVECTOR V2 |
||
1459 | ) |
||
1460 | { |
||
1461 | #if defined(_XM_NO_INTRINSICS_) |
||
1462 | UINT ux, uy, uz, uw, CR; |
||
1463 | XMVECTOR Control; |
||
1464 | |||
1465 | XMASSERT( pCR ); |
||
1466 | |||
1467 | ux = (V1.vector4_f32[0] == V2.vector4_f32[0]) ? 0xFFFFFFFFU : 0; |
||
1468 | uy = (V1.vector4_f32[1] == V2.vector4_f32[1]) ? 0xFFFFFFFFU : 0; |
||
1469 | uz = (V1.vector4_f32[2] == V2.vector4_f32[2]) ? 0xFFFFFFFFU : 0; |
||
1470 | uw = (V1.vector4_f32[3] == V2.vector4_f32[3]) ? 0xFFFFFFFFU : 0; |
||
1471 | CR = 0; |
||
1472 | if (ux&uy&uz&uw) |
||
1473 | { |
||
1474 | // All elements are greater |
||
1475 | CR = XM_CRMASK_CR6TRUE; |
||
1476 | } |
||
1477 | else if (!(ux|uy|uz|uw)) |
||
1478 | { |
||
1479 | // All elements are not greater |
||
1480 | CR = XM_CRMASK_CR6FALSE; |
||
1481 | } |
||
1482 | *pCR = CR; |
||
1483 | Control.vector4_u32[0] = ux; |
||
1484 | Control.vector4_u32[1] = uy; |
||
1485 | Control.vector4_u32[2] = uz; |
||
1486 | Control.vector4_u32[3] = uw; |
||
1487 | return Control; |
||
1488 | |||
1489 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1490 | XMASSERT( pCR ); |
||
1491 | XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2); |
||
1492 | UINT CR = 0; |
||
1493 | int iTest = _mm_movemask_ps(vTemp); |
||
1494 | if (iTest==0xf) |
||
1495 | { |
||
1496 | CR = XM_CRMASK_CR6TRUE; |
||
1497 | } |
||
1498 | else if (!iTest) |
||
1499 | { |
||
1500 | // All elements are not greater |
||
1501 | CR = XM_CRMASK_CR6FALSE; |
||
1502 | } |
||
1503 | *pCR = CR; |
||
1504 | return vTemp; |
||
1505 | #else // _XM_VMX128_INTRINSICS_ |
||
1506 | #endif // _XM_VMX128_INTRINSICS_ |
||
1507 | } |
||
1508 | |||
1509 | //------------------------------------------------------------------------------ |
||
1510 | // Treat the components of the vectors as unsigned integers and |
||
1511 | // compare individual bits between the two. This is useful for |
||
1512 | // comparing control vectors and result vectors returned from |
||
1513 | // other comparison operations. |
||
1514 | |||
1515 | XMFINLINE XMVECTOR XMVectorEqualInt |
||
1516 | ( |
||
1517 | FXMVECTOR V1, |
||
1518 | FXMVECTOR V2 |
||
1519 | ) |
||
1520 | { |
||
1521 | #if defined(_XM_NO_INTRINSICS_) |
||
1522 | |||
1523 | XMVECTOR Control; |
||
1524 | |||
1525 | Control.vector4_u32[0] = (V1.vector4_u32[0] == V2.vector4_u32[0]) ? 0xFFFFFFFF : 0; |
||
1526 | Control.vector4_u32[1] = (V1.vector4_u32[1] == V2.vector4_u32[1]) ? 0xFFFFFFFF : 0; |
||
1527 | Control.vector4_u32[2] = (V1.vector4_u32[2] == V2.vector4_u32[2]) ? 0xFFFFFFFF : 0; |
||
1528 | Control.vector4_u32[3] = (V1.vector4_u32[3] == V2.vector4_u32[3]) ? 0xFFFFFFFF : 0; |
||
1529 | |||
1530 | return Control; |
||
1531 | |||
1532 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1533 | __m128i V = _mm_cmpeq_epi32( reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0] ); |
||
1534 | return reinterpret_cast<__m128 *>(&V)[0]; |
||
1535 | #else // _XM_VMX128_INTRINSICS_ |
||
1536 | #endif // _XM_VMX128_INTRINSICS_ |
||
1537 | } |
||
1538 | |||
1539 | //------------------------------------------------------------------------------ |
||
1540 | |||
1541 | XMFINLINE XMVECTOR XMVectorEqualIntR |
||
1542 | ( |
||
1543 | UINT* pCR, |
||
1544 | FXMVECTOR V1, |
||
1545 | FXMVECTOR V2 |
||
1546 | ) |
||
1547 | { |
||
1548 | #if defined(_XM_NO_INTRINSICS_) |
||
1549 | |||
1550 | XMVECTOR Control; |
||
1551 | |||
1552 | XMASSERT(pCR); |
||
1553 | |||
1554 | Control = XMVectorEqualInt(V1, V2); |
||
1555 | |||
1556 | *pCR = 0; |
||
1557 | |||
1558 | if (XMVector4EqualInt(Control, XMVectorTrueInt())) |
||
1559 | { |
||
1560 | // All elements are equal |
||
1561 | *pCR |= XM_CRMASK_CR6TRUE; |
||
1562 | } |
||
1563 | else if (XMVector4EqualInt(Control, XMVectorFalseInt())) |
||
1564 | { |
||
1565 | // All elements are not equal |
||
1566 | *pCR |= XM_CRMASK_CR6FALSE; |
||
1567 | } |
||
1568 | |||
1569 | return Control; |
||
1570 | |||
1571 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1572 | XMASSERT(pCR); |
||
1573 | __m128i V = _mm_cmpeq_epi32( reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0] ); |
||
1574 | int iTemp = _mm_movemask_ps(reinterpret_cast<const __m128*>(&V)[0]); |
||
1575 | UINT CR = 0; |
||
1576 | if (iTemp==0x0F) |
||
1577 | { |
||
1578 | CR = XM_CRMASK_CR6TRUE; |
||
1579 | } |
||
1580 | else if (!iTemp) |
||
1581 | { |
||
1582 | CR = XM_CRMASK_CR6FALSE; |
||
1583 | } |
||
1584 | *pCR = CR; |
||
1585 | return reinterpret_cast<__m128 *>(&V)[0]; |
||
1586 | #else // _XM_VMX128_INTRINSICS_ |
||
1587 | #endif // _XM_VMX128_INTRINSICS_ |
||
1588 | } |
||
1589 | |||
1590 | //------------------------------------------------------------------------------ |
||
1591 | |||
1592 | XMFINLINE XMVECTOR XMVectorNearEqual |
||
1593 | ( |
||
1594 | FXMVECTOR V1, |
||
1595 | FXMVECTOR V2, |
||
1596 | FXMVECTOR Epsilon |
||
1597 | ) |
||
1598 | { |
||
1599 | #if defined(_XM_NO_INTRINSICS_) |
||
1600 | |||
1601 | FLOAT fDeltax, fDeltay, fDeltaz, fDeltaw; |
||
1602 | XMVECTOR Control; |
||
1603 | |||
1604 | fDeltax = V1.vector4_f32[0]-V2.vector4_f32[0]; |
||
1605 | fDeltay = V1.vector4_f32[1]-V2.vector4_f32[1]; |
||
1606 | fDeltaz = V1.vector4_f32[2]-V2.vector4_f32[2]; |
||
1607 | fDeltaw = V1.vector4_f32[3]-V2.vector4_f32[3]; |
||
1608 | |||
1609 | fDeltax = fabsf(fDeltax); |
||
1610 | fDeltay = fabsf(fDeltay); |
||
1611 | fDeltaz = fabsf(fDeltaz); |
||
1612 | fDeltaw = fabsf(fDeltaw); |
||
1613 | |||
1614 | Control.vector4_u32[0] = (fDeltax <= Epsilon.vector4_f32[0]) ? 0xFFFFFFFFU : 0; |
||
1615 | Control.vector4_u32[1] = (fDeltay <= Epsilon.vector4_f32[1]) ? 0xFFFFFFFFU : 0; |
||
1616 | Control.vector4_u32[2] = (fDeltaz <= Epsilon.vector4_f32[2]) ? 0xFFFFFFFFU : 0; |
||
1617 | Control.vector4_u32[3] = (fDeltaw <= Epsilon.vector4_f32[3]) ? 0xFFFFFFFFU : 0; |
||
1618 | |||
1619 | return Control; |
||
1620 | |||
1621 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1622 | // Get the difference |
||
1623 | XMVECTOR vDelta = _mm_sub_ps(V1,V2); |
||
1624 | // Get the absolute value of the difference |
||
1625 | XMVECTOR vTemp = _mm_setzero_ps(); |
||
1626 | vTemp = _mm_sub_ps(vTemp,vDelta); |
||
1627 | vTemp = _mm_max_ps(vTemp,vDelta); |
||
1628 | vTemp = _mm_cmple_ps(vTemp,Epsilon); |
||
1629 | return vTemp; |
||
1630 | #else // _XM_VMX128_INTRINSICS_ |
||
1631 | #endif // _XM_VMX128_INTRINSICS_ |
||
1632 | } |
||
1633 | |||
1634 | //------------------------------------------------------------------------------ |
||
1635 | |||
1636 | XMFINLINE XMVECTOR XMVectorNotEqual |
||
1637 | ( |
||
1638 | FXMVECTOR V1, |
||
1639 | FXMVECTOR V2 |
||
1640 | ) |
||
1641 | { |
||
1642 | #if defined(_XM_NO_INTRINSICS_) |
||
1643 | |||
1644 | XMVECTOR Control; |
||
1645 | Control.vector4_u32[0] = (V1.vector4_f32[0] != V2.vector4_f32[0]) ? 0xFFFFFFFF : 0; |
||
1646 | Control.vector4_u32[1] = (V1.vector4_f32[1] != V2.vector4_f32[1]) ? 0xFFFFFFFF : 0; |
||
1647 | Control.vector4_u32[2] = (V1.vector4_f32[2] != V2.vector4_f32[2]) ? 0xFFFFFFFF : 0; |
||
1648 | Control.vector4_u32[3] = (V1.vector4_f32[3] != V2.vector4_f32[3]) ? 0xFFFFFFFF : 0; |
||
1649 | return Control; |
||
1650 | |||
1651 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1652 | return _mm_cmpneq_ps( V1, V2 ); |
||
1653 | #else // _XM_VMX128_INTRINSICS_ |
||
1654 | #endif // _XM_VMX128_INTRINSICS_ |
||
1655 | } |
||
1656 | |||
1657 | //------------------------------------------------------------------------------ |
||
1658 | |||
1659 | XMFINLINE XMVECTOR XMVectorNotEqualInt |
||
1660 | ( |
||
1661 | FXMVECTOR V1, |
||
1662 | FXMVECTOR V2 |
||
1663 | ) |
||
1664 | { |
||
1665 | #if defined(_XM_NO_INTRINSICS_) |
||
1666 | |||
1667 | XMVECTOR Control; |
||
1668 | Control.vector4_u32[0] = (V1.vector4_u32[0] != V2.vector4_u32[0]) ? 0xFFFFFFFFU : 0; |
||
1669 | Control.vector4_u32[1] = (V1.vector4_u32[1] != V2.vector4_u32[1]) ? 0xFFFFFFFFU : 0; |
||
1670 | Control.vector4_u32[2] = (V1.vector4_u32[2] != V2.vector4_u32[2]) ? 0xFFFFFFFFU : 0; |
||
1671 | Control.vector4_u32[3] = (V1.vector4_u32[3] != V2.vector4_u32[3]) ? 0xFFFFFFFFU : 0; |
||
1672 | return Control; |
||
1673 | |||
1674 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1675 | __m128i V = _mm_cmpeq_epi32( reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0] ); |
||
1676 | return _mm_xor_ps(reinterpret_cast<__m128 *>(&V)[0],g_XMNegOneMask); |
||
1677 | #else // _XM_VMX128_INTRINSICS_ |
||
1678 | #endif // _XM_VMX128_INTRINSICS_ |
||
1679 | } |
||
1680 | |||
1681 | //------------------------------------------------------------------------------ |
||
1682 | |||
1683 | XMFINLINE XMVECTOR XMVectorGreater |
||
1684 | ( |
||
1685 | FXMVECTOR V1, |
||
1686 | FXMVECTOR V2 |
||
1687 | ) |
||
1688 | { |
||
1689 | #if defined(_XM_NO_INTRINSICS_) |
||
1690 | |||
1691 | XMVECTOR Control; |
||
1692 | Control.vector4_u32[0] = (V1.vector4_f32[0] > V2.vector4_f32[0]) ? 0xFFFFFFFF : 0; |
||
1693 | Control.vector4_u32[1] = (V1.vector4_f32[1] > V2.vector4_f32[1]) ? 0xFFFFFFFF : 0; |
||
1694 | Control.vector4_u32[2] = (V1.vector4_f32[2] > V2.vector4_f32[2]) ? 0xFFFFFFFF : 0; |
||
1695 | Control.vector4_u32[3] = (V1.vector4_f32[3] > V2.vector4_f32[3]) ? 0xFFFFFFFF : 0; |
||
1696 | return Control; |
||
1697 | |||
1698 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1699 | return _mm_cmpgt_ps( V1, V2 ); |
||
1700 | #else // _XM_VMX128_INTRINSICS_ |
||
1701 | #endif // _XM_VMX128_INTRINSICS_ |
||
1702 | } |
||
1703 | |||
1704 | //------------------------------------------------------------------------------ |
||
1705 | |||
1706 | XMFINLINE XMVECTOR XMVectorGreaterR |
||
1707 | ( |
||
1708 | UINT* pCR, |
||
1709 | FXMVECTOR V1, |
||
1710 | FXMVECTOR V2 |
||
1711 | ) |
||
1712 | { |
||
1713 | #if defined(_XM_NO_INTRINSICS_) |
||
1714 | UINT ux, uy, uz, uw, CR; |
||
1715 | XMVECTOR Control; |
||
1716 | |||
1717 | XMASSERT( pCR ); |
||
1718 | |||
1719 | ux = (V1.vector4_f32[0] > V2.vector4_f32[0]) ? 0xFFFFFFFFU : 0; |
||
1720 | uy = (V1.vector4_f32[1] > V2.vector4_f32[1]) ? 0xFFFFFFFFU : 0; |
||
1721 | uz = (V1.vector4_f32[2] > V2.vector4_f32[2]) ? 0xFFFFFFFFU : 0; |
||
1722 | uw = (V1.vector4_f32[3] > V2.vector4_f32[3]) ? 0xFFFFFFFFU : 0; |
||
1723 | CR = 0; |
||
1724 | if (ux&uy&uz&uw) |
||
1725 | { |
||
1726 | // All elements are greater |
||
1727 | CR = XM_CRMASK_CR6TRUE; |
||
1728 | } |
||
1729 | else if (!(ux|uy|uz|uw)) |
||
1730 | { |
||
1731 | // All elements are not greater |
||
1732 | CR = XM_CRMASK_CR6FALSE; |
||
1733 | } |
||
1734 | *pCR = CR; |
||
1735 | Control.vector4_u32[0] = ux; |
||
1736 | Control.vector4_u32[1] = uy; |
||
1737 | Control.vector4_u32[2] = uz; |
||
1738 | Control.vector4_u32[3] = uw; |
||
1739 | return Control; |
||
1740 | |||
1741 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1742 | XMASSERT( pCR ); |
||
1743 | XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2); |
||
1744 | UINT CR = 0; |
||
1745 | int iTest = _mm_movemask_ps(vTemp); |
||
1746 | if (iTest==0xf) |
||
1747 | { |
||
1748 | CR = XM_CRMASK_CR6TRUE; |
||
1749 | } |
||
1750 | else if (!iTest) |
||
1751 | { |
||
1752 | // All elements are not greater |
||
1753 | CR = XM_CRMASK_CR6FALSE; |
||
1754 | } |
||
1755 | *pCR = CR; |
||
1756 | return vTemp; |
||
1757 | #else // _XM_VMX128_INTRINSICS_ |
||
1758 | #endif // _XM_VMX128_INTRINSICS_ |
||
1759 | } |
||
1760 | |||
1761 | //------------------------------------------------------------------------------ |
||
1762 | |||
1763 | XMFINLINE XMVECTOR XMVectorGreaterOrEqual |
||
1764 | ( |
||
1765 | FXMVECTOR V1, |
||
1766 | FXMVECTOR V2 |
||
1767 | ) |
||
1768 | { |
||
1769 | #if defined(_XM_NO_INTRINSICS_) |
||
1770 | |||
1771 | XMVECTOR Control; |
||
1772 | Control.vector4_u32[0] = (V1.vector4_f32[0] >= V2.vector4_f32[0]) ? 0xFFFFFFFF : 0; |
||
1773 | Control.vector4_u32[1] = (V1.vector4_f32[1] >= V2.vector4_f32[1]) ? 0xFFFFFFFF : 0; |
||
1774 | Control.vector4_u32[2] = (V1.vector4_f32[2] >= V2.vector4_f32[2]) ? 0xFFFFFFFF : 0; |
||
1775 | Control.vector4_u32[3] = (V1.vector4_f32[3] >= V2.vector4_f32[3]) ? 0xFFFFFFFF : 0; |
||
1776 | return Control; |
||
1777 | |||
1778 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1779 | return _mm_cmpge_ps( V1, V2 ); |
||
1780 | #else // _XM_VMX128_INTRINSICS_ |
||
1781 | #endif // _XM_VMX128_INTRINSICS_ |
||
1782 | } |
||
1783 | |||
1784 | //------------------------------------------------------------------------------ |
||
1785 | |||
1786 | XMFINLINE XMVECTOR XMVectorGreaterOrEqualR |
||
1787 | ( |
||
1788 | UINT* pCR, |
||
1789 | FXMVECTOR V1, |
||
1790 | FXMVECTOR V2 |
||
1791 | ) |
||
1792 | { |
||
1793 | #if defined(_XM_NO_INTRINSICS_) |
||
1794 | UINT ux, uy, uz, uw, CR; |
||
1795 | XMVECTOR Control; |
||
1796 | |||
1797 | XMASSERT( pCR ); |
||
1798 | |||
1799 | ux = (V1.vector4_f32[0] >= V2.vector4_f32[0]) ? 0xFFFFFFFFU : 0; |
||
1800 | uy = (V1.vector4_f32[1] >= V2.vector4_f32[1]) ? 0xFFFFFFFFU : 0; |
||
1801 | uz = (V1.vector4_f32[2] >= V2.vector4_f32[2]) ? 0xFFFFFFFFU : 0; |
||
1802 | uw = (V1.vector4_f32[3] >= V2.vector4_f32[3]) ? 0xFFFFFFFFU : 0; |
||
1803 | CR = 0; |
||
1804 | if (ux&uy&uz&uw) |
||
1805 | { |
||
1806 | // All elements are greater |
||
1807 | CR = XM_CRMASK_CR6TRUE; |
||
1808 | } |
||
1809 | else if (!(ux|uy|uz|uw)) |
||
1810 | { |
||
1811 | // All elements are not greater |
||
1812 | CR = XM_CRMASK_CR6FALSE; |
||
1813 | } |
||
1814 | *pCR = CR; |
||
1815 | Control.vector4_u32[0] = ux; |
||
1816 | Control.vector4_u32[1] = uy; |
||
1817 | Control.vector4_u32[2] = uz; |
||
1818 | Control.vector4_u32[3] = uw; |
||
1819 | return Control; |
||
1820 | |||
1821 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1822 | XMASSERT( pCR ); |
||
1823 | XMVECTOR vTemp = _mm_cmpge_ps(V1,V2); |
||
1824 | UINT CR = 0; |
||
1825 | int iTest = _mm_movemask_ps(vTemp); |
||
1826 | if (iTest==0xf) |
||
1827 | { |
||
1828 | CR = XM_CRMASK_CR6TRUE; |
||
1829 | } |
||
1830 | else if (!iTest) |
||
1831 | { |
||
1832 | // All elements are not greater |
||
1833 | CR = XM_CRMASK_CR6FALSE; |
||
1834 | } |
||
1835 | *pCR = CR; |
||
1836 | return vTemp; |
||
1837 | #else // _XM_VMX128_INTRINSICS_ |
||
1838 | #endif // _XM_VMX128_INTRINSICS_ |
||
1839 | } |
||
1840 | |||
1841 | //------------------------------------------------------------------------------ |
||
1842 | |||
1843 | XMFINLINE XMVECTOR XMVectorLess |
||
1844 | ( |
||
1845 | FXMVECTOR V1, |
||
1846 | FXMVECTOR V2 |
||
1847 | ) |
||
1848 | { |
||
1849 | #if defined(_XM_NO_INTRINSICS_) |
||
1850 | |||
1851 | XMVECTOR Control; |
||
1852 | Control.vector4_u32[0] = (V1.vector4_f32[0] < V2.vector4_f32[0]) ? 0xFFFFFFFF : 0; |
||
1853 | Control.vector4_u32[1] = (V1.vector4_f32[1] < V2.vector4_f32[1]) ? 0xFFFFFFFF : 0; |
||
1854 | Control.vector4_u32[2] = (V1.vector4_f32[2] < V2.vector4_f32[2]) ? 0xFFFFFFFF : 0; |
||
1855 | Control.vector4_u32[3] = (V1.vector4_f32[3] < V2.vector4_f32[3]) ? 0xFFFFFFFF : 0; |
||
1856 | return Control; |
||
1857 | |||
1858 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1859 | return _mm_cmplt_ps( V1, V2 ); |
||
1860 | #else // _XM_VMX128_INTRINSICS_ |
||
1861 | #endif // _XM_VMX128_INTRINSICS_ |
||
1862 | } |
||
1863 | |||
1864 | //------------------------------------------------------------------------------ |
||
1865 | |||
1866 | XMFINLINE XMVECTOR XMVectorLessOrEqual |
||
1867 | ( |
||
1868 | FXMVECTOR V1, |
||
1869 | FXMVECTOR V2 |
||
1870 | ) |
||
1871 | { |
||
1872 | #if defined(_XM_NO_INTRINSICS_) |
||
1873 | |||
1874 | XMVECTOR Control; |
||
1875 | Control.vector4_u32[0] = (V1.vector4_f32[0] <= V2.vector4_f32[0]) ? 0xFFFFFFFF : 0; |
||
1876 | Control.vector4_u32[1] = (V1.vector4_f32[1] <= V2.vector4_f32[1]) ? 0xFFFFFFFF : 0; |
||
1877 | Control.vector4_u32[2] = (V1.vector4_f32[2] <= V2.vector4_f32[2]) ? 0xFFFFFFFF : 0; |
||
1878 | Control.vector4_u32[3] = (V1.vector4_f32[3] <= V2.vector4_f32[3]) ? 0xFFFFFFFF : 0; |
||
1879 | return Control; |
||
1880 | |||
1881 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1882 | return _mm_cmple_ps( V1, V2 ); |
||
1883 | #else // _XM_VMX128_INTRINSICS_ |
||
1884 | #endif // _XM_VMX128_INTRINSICS_ |
||
1885 | } |
||
1886 | |||
1887 | //------------------------------------------------------------------------------ |
||
1888 | |||
1889 | XMFINLINE XMVECTOR XMVectorInBounds |
||
1890 | ( |
||
1891 | FXMVECTOR V, |
||
1892 | FXMVECTOR Bounds |
||
1893 | ) |
||
1894 | { |
||
1895 | #if defined(_XM_NO_INTRINSICS_) |
||
1896 | |||
1897 | XMVECTOR Control; |
||
1898 | Control.vector4_u32[0] = (V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) ? 0xFFFFFFFF : 0; |
||
1899 | Control.vector4_u32[1] = (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) ? 0xFFFFFFFF : 0; |
||
1900 | Control.vector4_u32[2] = (V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2]) ? 0xFFFFFFFF : 0; |
||
1901 | Control.vector4_u32[3] = (V.vector4_f32[3] <= Bounds.vector4_f32[3] && V.vector4_f32[3] >= -Bounds.vector4_f32[3]) ? 0xFFFFFFFF : 0; |
||
1902 | return Control; |
||
1903 | |||
1904 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1905 | // Test if less than or equal |
||
1906 | XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds); |
||
1907 | // Negate the bounds |
||
1908 | XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne); |
||
1909 | // Test if greater or equal (Reversed) |
||
1910 | vTemp2 = _mm_cmple_ps(vTemp2,V); |
||
1911 | // Blend answers |
||
1912 | vTemp1 = _mm_and_ps(vTemp1,vTemp2); |
||
1913 | return vTemp1; |
||
1914 | #else // _XM_VMX128_INTRINSICS_ |
||
1915 | #endif // _XM_VMX128_INTRINSICS_ |
||
1916 | } |
||
1917 | |||
1918 | //------------------------------------------------------------------------------ |
||
1919 | |||
1920 | XMFINLINE XMVECTOR XMVectorInBoundsR |
||
1921 | ( |
||
1922 | UINT* pCR, |
||
1923 | FXMVECTOR V, |
||
1924 | FXMVECTOR Bounds |
||
1925 | ) |
||
1926 | { |
||
1927 | #if defined(_XM_NO_INTRINSICS_) |
||
1928 | UINT ux, uy, uz, uw, CR; |
||
1929 | XMVECTOR Control; |
||
1930 | |||
1931 | XMASSERT( pCR != 0 ); |
||
1932 | |||
1933 | ux = (V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) ? 0xFFFFFFFFU : 0; |
||
1934 | uy = (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) ? 0xFFFFFFFFU : 0; |
||
1935 | uz = (V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2]) ? 0xFFFFFFFFU : 0; |
||
1936 | uw = (V.vector4_f32[3] <= Bounds.vector4_f32[3] && V.vector4_f32[3] >= -Bounds.vector4_f32[3]) ? 0xFFFFFFFFU : 0; |
||
1937 | |||
1938 | CR = 0; |
||
1939 | |||
1940 | if (ux&uy&uz&uw) |
||
1941 | { |
||
1942 | // All elements are in bounds |
||
1943 | CR = XM_CRMASK_CR6BOUNDS; |
||
1944 | } |
||
1945 | *pCR = CR; |
||
1946 | Control.vector4_u32[0] = ux; |
||
1947 | Control.vector4_u32[1] = uy; |
||
1948 | Control.vector4_u32[2] = uz; |
||
1949 | Control.vector4_u32[3] = uw; |
||
1950 | return Control; |
||
1951 | |||
1952 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1953 | XMASSERT( pCR != 0 ); |
||
1954 | // Test if less than or equal |
||
1955 | XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds); |
||
1956 | // Negate the bounds |
||
1957 | XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne); |
||
1958 | // Test if greater or equal (Reversed) |
||
1959 | vTemp2 = _mm_cmple_ps(vTemp2,V); |
||
1960 | // Blend answers |
||
1961 | vTemp1 = _mm_and_ps(vTemp1,vTemp2); |
||
1962 | |||
1963 | UINT CR = 0; |
||
1964 | if (_mm_movemask_ps(vTemp1)==0xf) { |
||
1965 | // All elements are in bounds |
||
1966 | CR = XM_CRMASK_CR6BOUNDS; |
||
1967 | } |
||
1968 | *pCR = CR; |
||
1969 | return vTemp1; |
||
1970 | #else // _XM_VMX128_INTRINSICS_ |
||
1971 | #endif // _XM_VMX128_INTRINSICS_ |
||
1972 | } |
||
1973 | |||
1974 | //------------------------------------------------------------------------------ |
||
1975 | |||
1976 | XMFINLINE XMVECTOR XMVectorIsNaN |
||
1977 | ( |
||
1978 | FXMVECTOR V |
||
1979 | ) |
||
1980 | { |
||
1981 | #if defined(_XM_NO_INTRINSICS_) |
||
1982 | |||
1983 | XMVECTOR Control; |
||
1984 | Control.vector4_u32[0] = XMISNAN(V.vector4_f32[0]) ? 0xFFFFFFFFU : 0; |
||
1985 | Control.vector4_u32[1] = XMISNAN(V.vector4_f32[1]) ? 0xFFFFFFFFU : 0; |
||
1986 | Control.vector4_u32[2] = XMISNAN(V.vector4_f32[2]) ? 0xFFFFFFFFU : 0; |
||
1987 | Control.vector4_u32[3] = XMISNAN(V.vector4_f32[3]) ? 0xFFFFFFFFU : 0; |
||
1988 | return Control; |
||
1989 | |||
1990 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1991 | // Mask off the exponent |
||
1992 | __m128i vTempInf = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMInfinity); |
||
1993 | // Mask off the mantissa |
||
1994 | __m128i vTempNan = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMQNaNTest); |
||
1995 | // Are any of the exponents == 0x7F800000? |
||
1996 | vTempInf = _mm_cmpeq_epi32(vTempInf,g_XMInfinity); |
||
1997 | // Are any of the mantissa's zero? (SSE2 doesn't have a neq test) |
||
1998 | vTempNan = _mm_cmpeq_epi32(vTempNan,g_XMZero); |
||
1999 | // Perform a not on the NaN test to be true on NON-zero mantissas |
||
2000 | vTempNan = _mm_andnot_si128(vTempNan,vTempInf); |
||
2001 | // If any are NaN, the signs are true after the merge above |
||
2002 | return reinterpret_cast<const XMVECTOR *>(&vTempNan)[0]; |
||
2003 | #else // _XM_VMX128_INTRINSICS_ |
||
2004 | #endif // _XM_VMX128_INTRINSICS_ |
||
2005 | } |
||
2006 | |||
2007 | //------------------------------------------------------------------------------ |
||
2008 | |||
2009 | XMFINLINE XMVECTOR XMVectorIsInfinite |
||
2010 | ( |
||
2011 | FXMVECTOR V |
||
2012 | ) |
||
2013 | { |
||
2014 | #if defined(_XM_NO_INTRINSICS_) |
||
2015 | |||
2016 | XMVECTOR Control; |
||
2017 | Control.vector4_u32[0] = XMISINF(V.vector4_f32[0]) ? 0xFFFFFFFFU : 0; |
||
2018 | Control.vector4_u32[1] = XMISINF(V.vector4_f32[1]) ? 0xFFFFFFFFU : 0; |
||
2019 | Control.vector4_u32[2] = XMISINF(V.vector4_f32[2]) ? 0xFFFFFFFFU : 0; |
||
2020 | Control.vector4_u32[3] = XMISINF(V.vector4_f32[3]) ? 0xFFFFFFFFU : 0; |
||
2021 | return Control; |
||
2022 | |||
2023 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2024 | // Mask off the sign bit |
||
2025 | __m128 vTemp = _mm_and_ps(V,g_XMAbsMask); |
||
2026 | // Compare to infinity |
||
2027 | vTemp = _mm_cmpeq_ps(vTemp,g_XMInfinity); |
||
2028 | // If any are infinity, the signs are true. |
||
2029 | return vTemp; |
||
2030 | #else // _XM_VMX128_INTRINSICS_ |
||
2031 | #endif // _XM_VMX128_INTRINSICS_ |
||
2032 | } |
||
2033 | |||
2034 | //------------------------------------------------------------------------------ |
||
2035 | // Rounding and clamping operations |
||
2036 | //------------------------------------------------------------------------------ |
||
2037 | |||
2038 | //------------------------------------------------------------------------------ |
||
2039 | |||
2040 | XMFINLINE XMVECTOR XMVectorMin |
||
2041 | ( |
||
2042 | FXMVECTOR V1, |
||
2043 | FXMVECTOR V2 |
||
2044 | ) |
||
2045 | { |
||
2046 | #if defined(_XM_NO_INTRINSICS_) |
||
2047 | |||
2048 | XMVECTOR Result; |
||
2049 | Result.vector4_f32[0] = (V1.vector4_f32[0] < V2.vector4_f32[0]) ? V1.vector4_f32[0] : V2.vector4_f32[0]; |
||
2050 | Result.vector4_f32[1] = (V1.vector4_f32[1] < V2.vector4_f32[1]) ? V1.vector4_f32[1] : V2.vector4_f32[1]; |
||
2051 | Result.vector4_f32[2] = (V1.vector4_f32[2] < V2.vector4_f32[2]) ? V1.vector4_f32[2] : V2.vector4_f32[2]; |
||
2052 | Result.vector4_f32[3] = (V1.vector4_f32[3] < V2.vector4_f32[3]) ? V1.vector4_f32[3] : V2.vector4_f32[3]; |
||
2053 | return Result; |
||
2054 | |||
2055 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2056 | return _mm_min_ps( V1, V2 ); |
||
2057 | #else // _XM_VMX128_INTRINSICS_ |
||
2058 | #endif // _XM_VMX128_INTRINSICS_ |
||
2059 | } |
||
2060 | |||
2061 | //------------------------------------------------------------------------------ |
||
2062 | |||
2063 | XMFINLINE XMVECTOR XMVectorMax |
||
2064 | ( |
||
2065 | FXMVECTOR V1, |
||
2066 | FXMVECTOR V2 |
||
2067 | ) |
||
2068 | { |
||
2069 | #if defined(_XM_NO_INTRINSICS_) |
||
2070 | |||
2071 | XMVECTOR Result; |
||
2072 | Result.vector4_f32[0] = (V1.vector4_f32[0] > V2.vector4_f32[0]) ? V1.vector4_f32[0] : V2.vector4_f32[0]; |
||
2073 | Result.vector4_f32[1] = (V1.vector4_f32[1] > V2.vector4_f32[1]) ? V1.vector4_f32[1] : V2.vector4_f32[1]; |
||
2074 | Result.vector4_f32[2] = (V1.vector4_f32[2] > V2.vector4_f32[2]) ? V1.vector4_f32[2] : V2.vector4_f32[2]; |
||
2075 | Result.vector4_f32[3] = (V1.vector4_f32[3] > V2.vector4_f32[3]) ? V1.vector4_f32[3] : V2.vector4_f32[3]; |
||
2076 | return Result; |
||
2077 | |||
2078 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2079 | return _mm_max_ps( V1, V2 ); |
||
2080 | #else // _XM_VMX128_INTRINSICS_ |
||
2081 | #endif // _XM_VMX128_INTRINSICS_ |
||
2082 | } |
||
2083 | |||
2084 | //------------------------------------------------------------------------------ |
||
2085 | |||
2086 | XMFINLINE XMVECTOR XMVectorRound |
||
2087 | ( |
||
2088 | FXMVECTOR V |
||
2089 | ) |
||
2090 | { |
||
2091 | #if defined(_XM_NO_INTRINSICS_) |
||
2092 | |||
2093 | XMVECTOR Result; |
||
2094 | XMVECTOR Bias; |
||
2095 | CONST XMVECTOR Zero = XMVectorZero(); |
||
2096 | CONST XMVECTOR BiasPos = XMVectorReplicate(0.5f); |
||
2097 | CONST XMVECTOR BiasNeg = XMVectorReplicate(-0.5f); |
||
2098 | |||
2099 | Bias = XMVectorLess(V, Zero); |
||
2100 | Bias = XMVectorSelect(BiasPos, BiasNeg, Bias); |
||
2101 | Result = XMVectorAdd(V, Bias); |
||
2102 | Result = XMVectorTruncate(Result); |
||
2103 | |||
2104 | return Result; |
||
2105 | |||
2106 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2107 | // To handle NAN, INF and numbers greater than 8388608, use masking |
||
2108 | // Get the abs value |
||
2109 | __m128i vTest = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMAbsMask); |
||
2110 | // Test for greater than 8388608 (All floats with NO fractionals, NAN and INF |
||
2111 | vTest = _mm_cmplt_epi32(vTest,g_XMNoFraction); |
||
2112 | // Convert to int and back to float for rounding |
||
2113 | __m128i vInt = _mm_cvtps_epi32(V); |
||
2114 | // Convert back to floats |
||
2115 | XMVECTOR vResult = _mm_cvtepi32_ps(vInt); |
||
2116 | // All numbers less than 8388608 will use the round to int |
||
2117 | vResult = _mm_and_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]); |
||
2118 | // All others, use the ORIGINAL value |
||
2119 | vTest = _mm_andnot_si128(vTest,reinterpret_cast<const __m128i *>(&V)[0]); |
||
2120 | vResult = _mm_or_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]); |
||
2121 | return vResult; |
||
2122 | #else // _XM_VMX128_INTRINSICS_ |
||
2123 | #endif // _XM_VMX128_INTRINSICS_ |
||
2124 | } |
||
2125 | |||
2126 | //------------------------------------------------------------------------------ |
||
2127 | |||
2128 | XMFINLINE XMVECTOR XMVectorTruncate |
||
2129 | ( |
||
2130 | FXMVECTOR V |
||
2131 | ) |
||
2132 | { |
||
2133 | #if defined(_XM_NO_INTRINSICS_) |
||
2134 | XMVECTOR Result; |
||
2135 | Result.vector4_f32[0] = (FLOAT)((INT)V.vector4_f32[0]); |
||
2136 | Result.vector4_f32[1] = (FLOAT)((INT)V.vector4_f32[1]); |
||
2137 | Result.vector4_f32[2] = (FLOAT)((INT)V.vector4_f32[2]); |
||
2138 | Result.vector4_f32[3] = (FLOAT)((INT)V.vector4_f32[3]); |
||
2139 | |||
2140 | return Result; |
||
2141 | |||
2142 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2143 | // To handle NAN, INF and numbers greater than 8388608, use masking |
||
2144 | // Get the abs value |
||
2145 | __m128i vTest = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMAbsMask); |
||
2146 | // Test for greater than 8388608 (All floats with NO fractionals, NAN and INF |
||
2147 | vTest = _mm_cmplt_epi32(vTest,g_XMNoFraction); |
||
2148 | // Convert to int and back to float for rounding with truncation |
||
2149 | __m128i vInt = _mm_cvttps_epi32(V); |
||
2150 | // Convert back to floats |
||
2151 | XMVECTOR vResult = _mm_cvtepi32_ps(vInt); |
||
2152 | // All numbers less than 8388608 will use the round to int |
||
2153 | vResult = _mm_and_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]); |
||
2154 | // All others, use the ORIGINAL value |
||
2155 | vTest = _mm_andnot_si128(vTest,reinterpret_cast<const __m128i *>(&V)[0]); |
||
2156 | vResult = _mm_or_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]); |
||
2157 | return vResult; |
||
2158 | #else // _XM_VMX128_INTRINSICS_ |
||
2159 | #endif // _XM_VMX128_INTRINSICS_ |
||
2160 | } |
||
2161 | |||
2162 | //------------------------------------------------------------------------------ |
||
2163 | |||
2164 | XMFINLINE XMVECTOR XMVectorFloor |
||
2165 | ( |
||
2166 | FXMVECTOR V |
||
2167 | ) |
||
2168 | { |
||
2169 | #if defined(_XM_NO_INTRINSICS_) |
||
2170 | |||
2171 | XMVECTOR vResult = { |
||
2172 | floorf(V.vector4_f32[0]), |
||
2173 | floorf(V.vector4_f32[1]), |
||
2174 | floorf(V.vector4_f32[2]), |
||
2175 | floorf(V.vector4_f32[3]) |
||
2176 | }; |
||
2177 | return vResult; |
||
2178 | |||
2179 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2180 | XMVECTOR vResult = _mm_sub_ps(V,g_XMOneHalfMinusEpsilon); |
||
2181 | __m128i vInt = _mm_cvtps_epi32(vResult); |
||
2182 | vResult = _mm_cvtepi32_ps(vInt); |
||
2183 | return vResult; |
||
2184 | #else // _XM_VMX128_INTRINSICS_ |
||
2185 | #endif // _XM_VMX128_INTRINSICS_ |
||
2186 | } |
||
2187 | |||
2188 | //------------------------------------------------------------------------------ |
||
2189 | |||
2190 | XMFINLINE XMVECTOR XMVectorCeiling |
||
2191 | ( |
||
2192 | FXMVECTOR V |
||
2193 | ) |
||
2194 | { |
||
2195 | #if defined(_XM_NO_INTRINSICS_) |
||
2196 | XMVECTOR vResult = { |
||
2197 | ceilf(V.vector4_f32[0]), |
||
2198 | ceilf(V.vector4_f32[1]), |
||
2199 | ceilf(V.vector4_f32[2]), |
||
2200 | ceilf(V.vector4_f32[3]) |
||
2201 | }; |
||
2202 | return vResult; |
||
2203 | |||
2204 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2205 | XMVECTOR vResult = _mm_add_ps(V,g_XMOneHalfMinusEpsilon); |
||
2206 | __m128i vInt = _mm_cvtps_epi32(vResult); |
||
2207 | vResult = _mm_cvtepi32_ps(vInt); |
||
2208 | return vResult; |
||
2209 | #else // _XM_VMX128_INTRINSICS_ |
||
2210 | #endif // _XM_VMX128_INTRINSICS_ |
||
2211 | } |
||
2212 | |||
2213 | //------------------------------------------------------------------------------ |
||
2214 | |||
2215 | XMFINLINE XMVECTOR XMVectorClamp |
||
2216 | ( |
||
2217 | FXMVECTOR V, |
||
2218 | FXMVECTOR Min, |
||
2219 | FXMVECTOR Max |
||
2220 | ) |
||
2221 | { |
||
2222 | #if defined(_XM_NO_INTRINSICS_) |
||
2223 | |||
2224 | XMVECTOR Result; |
||
2225 | |||
2226 | XMASSERT(XMVector4LessOrEqual(Min, Max)); |
||
2227 | |||
2228 | Result = XMVectorMax(Min, V); |
||
2229 | Result = XMVectorMin(Max, Result); |
||
2230 | |||
2231 | return Result; |
||
2232 | |||
2233 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2234 | XMVECTOR vResult; |
||
2235 | XMASSERT(XMVector4LessOrEqual(Min, Max)); |
||
2236 | vResult = _mm_max_ps(Min,V); |
||
2237 | vResult = _mm_min_ps(vResult,Max); |
||
2238 | return vResult; |
||
2239 | #else // _XM_VMX128_INTRINSICS_ |
||
2240 | #endif // _XM_VMX128_INTRINSICS_ |
||
2241 | } |
||
2242 | |||
2243 | //------------------------------------------------------------------------------ |
||
2244 | |||
2245 | XMFINLINE XMVECTOR XMVectorSaturate |
||
2246 | ( |
||
2247 | FXMVECTOR V |
||
2248 | ) |
||
2249 | { |
||
2250 | #if defined(_XM_NO_INTRINSICS_) |
||
2251 | |||
2252 | CONST XMVECTOR Zero = XMVectorZero(); |
||
2253 | |||
2254 | return XMVectorClamp(V, Zero, g_XMOne.v); |
||
2255 | |||
2256 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2257 | // Set <0 to 0 |
||
2258 | XMVECTOR vResult = _mm_max_ps(V,g_XMZero); |
||
2259 | // Set>1 to 1 |
||
2260 | return _mm_min_ps(vResult,g_XMOne); |
||
2261 | #else // _XM_VMX128_INTRINSICS_ |
||
2262 | #endif // _XM_VMX128_INTRINSICS_ |
||
2263 | } |
||
2264 | |||
2265 | //------------------------------------------------------------------------------ |
||
2266 | // Bitwise logical operations |
||
2267 | //------------------------------------------------------------------------------ |
||
2268 | |||
2269 | XMFINLINE XMVECTOR XMVectorAndInt |
||
2270 | ( |
||
2271 | FXMVECTOR V1, |
||
2272 | FXMVECTOR V2 |
||
2273 | ) |
||
2274 | { |
||
2275 | #if defined(_XM_NO_INTRINSICS_) |
||
2276 | |||
2277 | XMVECTOR Result; |
||
2278 | |||
2279 | Result.vector4_u32[0] = V1.vector4_u32[0] & V2.vector4_u32[0]; |
||
2280 | Result.vector4_u32[1] = V1.vector4_u32[1] & V2.vector4_u32[1]; |
||
2281 | Result.vector4_u32[2] = V1.vector4_u32[2] & V2.vector4_u32[2]; |
||
2282 | Result.vector4_u32[3] = V1.vector4_u32[3] & V2.vector4_u32[3]; |
||
2283 | return Result; |
||
2284 | |||
2285 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2286 | return _mm_and_ps(V1,V2); |
||
2287 | #else // _XM_VMX128_INTRINSICS_ |
||
2288 | #endif // _XM_VMX128_INTRINSICS_ |
||
2289 | } |
||
2290 | |||
2291 | //------------------------------------------------------------------------------ |
||
2292 | |||
2293 | XMFINLINE XMVECTOR XMVectorAndCInt |
||
2294 | ( |
||
2295 | FXMVECTOR V1, |
||
2296 | FXMVECTOR V2 |
||
2297 | ) |
||
2298 | { |
||
2299 | #if defined(_XM_NO_INTRINSICS_) |
||
2300 | |||
2301 | XMVECTOR Result; |
||
2302 | |||
2303 | Result.vector4_u32[0] = V1.vector4_u32[0] & ~V2.vector4_u32[0]; |
||
2304 | Result.vector4_u32[1] = V1.vector4_u32[1] & ~V2.vector4_u32[1]; |
||
2305 | Result.vector4_u32[2] = V1.vector4_u32[2] & ~V2.vector4_u32[2]; |
||
2306 | Result.vector4_u32[3] = V1.vector4_u32[3] & ~V2.vector4_u32[3]; |
||
2307 | |||
2308 | return Result; |
||
2309 | |||
2310 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2311 | __m128i V = _mm_andnot_si128( reinterpret_cast<const __m128i *>(&V2)[0], reinterpret_cast<const __m128i *>(&V1)[0] ); |
||
2312 | return reinterpret_cast<__m128 *>(&V)[0]; |
||
2313 | #else // _XM_VMX128_INTRINSICS_ |
||
2314 | #endif // _XM_VMX128_INTRINSICS_ |
||
2315 | } |
||
2316 | |||
2317 | //------------------------------------------------------------------------------ |
||
2318 | |||
2319 | XMFINLINE XMVECTOR XMVectorOrInt |
||
2320 | ( |
||
2321 | FXMVECTOR V1, |
||
2322 | FXMVECTOR V2 |
||
2323 | ) |
||
2324 | { |
||
2325 | #if defined(_XM_NO_INTRINSICS_) |
||
2326 | |||
2327 | XMVECTOR Result; |
||
2328 | |||
2329 | Result.vector4_u32[0] = V1.vector4_u32[0] | V2.vector4_u32[0]; |
||
2330 | Result.vector4_u32[1] = V1.vector4_u32[1] | V2.vector4_u32[1]; |
||
2331 | Result.vector4_u32[2] = V1.vector4_u32[2] | V2.vector4_u32[2]; |
||
2332 | Result.vector4_u32[3] = V1.vector4_u32[3] | V2.vector4_u32[3]; |
||
2333 | |||
2334 | return Result; |
||
2335 | |||
2336 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2337 | __m128i V = _mm_or_si128( reinterpret_cast<const __m128i *>(&V1)[0], reinterpret_cast<const __m128i *>(&V2)[0] ); |
||
2338 | return reinterpret_cast<__m128 *>(&V)[0]; |
||
2339 | #else // _XM_VMX128_INTRINSICS_ |
||
2340 | #endif // _XM_VMX128_INTRINSICS_ |
||
2341 | } |
||
2342 | |||
2343 | //------------------------------------------------------------------------------ |
||
2344 | |||
2345 | XMFINLINE XMVECTOR XMVectorNorInt |
||
2346 | ( |
||
2347 | FXMVECTOR V1, |
||
2348 | FXMVECTOR V2 |
||
2349 | ) |
||
2350 | { |
||
2351 | #if defined(_XM_NO_INTRINSICS_) |
||
2352 | |||
2353 | XMVECTOR Result; |
||
2354 | |||
2355 | Result.vector4_u32[0] = ~(V1.vector4_u32[0] | V2.vector4_u32[0]); |
||
2356 | Result.vector4_u32[1] = ~(V1.vector4_u32[1] | V2.vector4_u32[1]); |
||
2357 | Result.vector4_u32[2] = ~(V1.vector4_u32[2] | V2.vector4_u32[2]); |
||
2358 | Result.vector4_u32[3] = ~(V1.vector4_u32[3] | V2.vector4_u32[3]); |
||
2359 | |||
2360 | return Result; |
||
2361 | |||
2362 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2363 | __m128i Result; |
||
2364 | Result = _mm_or_si128( reinterpret_cast<const __m128i *>(&V1)[0], reinterpret_cast<const __m128i *>(&V2)[0] ); |
||
2365 | Result = _mm_andnot_si128( Result,g_XMNegOneMask); |
||
2366 | return reinterpret_cast<__m128 *>(&Result)[0]; |
||
2367 | #else // _XM_VMX128_INTRINSICS_ |
||
2368 | #endif // _XM_VMX128_INTRINSICS_ |
||
2369 | } |
||
2370 | |||
2371 | //------------------------------------------------------------------------------ |
||
2372 | |||
2373 | XMFINLINE XMVECTOR XMVectorXorInt |
||
2374 | ( |
||
2375 | FXMVECTOR V1, |
||
2376 | FXMVECTOR V2 |
||
2377 | ) |
||
2378 | { |
||
2379 | #if defined(_XM_NO_INTRINSICS_) |
||
2380 | |||
2381 | XMVECTOR Result; |
||
2382 | |||
2383 | Result.vector4_u32[0] = V1.vector4_u32[0] ^ V2.vector4_u32[0]; |
||
2384 | Result.vector4_u32[1] = V1.vector4_u32[1] ^ V2.vector4_u32[1]; |
||
2385 | Result.vector4_u32[2] = V1.vector4_u32[2] ^ V2.vector4_u32[2]; |
||
2386 | Result.vector4_u32[3] = V1.vector4_u32[3] ^ V2.vector4_u32[3]; |
||
2387 | |||
2388 | return Result; |
||
2389 | |||
2390 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2391 | __m128i V = _mm_xor_si128( reinterpret_cast<const __m128i *>(&V1)[0], reinterpret_cast<const __m128i *>(&V2)[0] ); |
||
2392 | return reinterpret_cast<__m128 *>(&V)[0]; |
||
2393 | #else // _XM_VMX128_INTRINSICS_ |
||
2394 | #endif // _XM_VMX128_INTRINSICS_ |
||
2395 | } |
||
2396 | |||
2397 | //------------------------------------------------------------------------------ |
||
2398 | // Computation operations |
||
2399 | //------------------------------------------------------------------------------ |
||
2400 | |||
2401 | //------------------------------------------------------------------------------ |
||
2402 | |||
2403 | XMFINLINE XMVECTOR XMVectorNegate |
||
2404 | ( |
||
2405 | FXMVECTOR V |
||
2406 | ) |
||
2407 | { |
||
2408 | #if defined(_XM_NO_INTRINSICS_) |
||
2409 | |||
2410 | XMVECTOR Result; |
||
2411 | |||
2412 | Result.vector4_f32[0] = -V.vector4_f32[0]; |
||
2413 | Result.vector4_f32[1] = -V.vector4_f32[1]; |
||
2414 | Result.vector4_f32[2] = -V.vector4_f32[2]; |
||
2415 | Result.vector4_f32[3] = -V.vector4_f32[3]; |
||
2416 | |||
2417 | return Result; |
||
2418 | |||
2419 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2420 | XMVECTOR Z; |
||
2421 | |||
2422 | Z = _mm_setzero_ps(); |
||
2423 | |||
2424 | return _mm_sub_ps( Z, V ); |
||
2425 | #else // _XM_VMX128_INTRINSICS_ |
||
2426 | #endif // _XM_VMX128_INTRINSICS_ |
||
2427 | } |
||
2428 | |||
2429 | //------------------------------------------------------------------------------ |
||
2430 | |||
2431 | XMFINLINE XMVECTOR XMVectorAdd |
||
2432 | ( |
||
2433 | FXMVECTOR V1, |
||
2434 | FXMVECTOR V2 |
||
2435 | ) |
||
2436 | { |
||
2437 | #if defined(_XM_NO_INTRINSICS_) |
||
2438 | |||
2439 | XMVECTOR Result; |
||
2440 | |||
2441 | Result.vector4_f32[0] = V1.vector4_f32[0] + V2.vector4_f32[0]; |
||
2442 | Result.vector4_f32[1] = V1.vector4_f32[1] + V2.vector4_f32[1]; |
||
2443 | Result.vector4_f32[2] = V1.vector4_f32[2] + V2.vector4_f32[2]; |
||
2444 | Result.vector4_f32[3] = V1.vector4_f32[3] + V2.vector4_f32[3]; |
||
2445 | |||
2446 | return Result; |
||
2447 | |||
2448 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2449 | return _mm_add_ps( V1, V2 ); |
||
2450 | #else // _XM_VMX128_INTRINSICS_ |
||
2451 | #endif // _XM_VMX128_INTRINSICS_ |
||
2452 | } |
||
2453 | |||
2454 | //------------------------------------------------------------------------------ |
||
2455 | |||
2456 | XMFINLINE XMVECTOR XMVectorAddAngles |
||
2457 | ( |
||
2458 | FXMVECTOR V1, |
||
2459 | FXMVECTOR V2 |
||
2460 | ) |
||
2461 | { |
||
2462 | #if defined(_XM_NO_INTRINSICS_) |
||
2463 | |||
2464 | XMVECTOR Mask; |
||
2465 | XMVECTOR Offset; |
||
2466 | XMVECTOR Result; |
||
2467 | CONST XMVECTOR Zero = XMVectorZero(); |
||
2468 | |||
2469 | // Add the given angles together. If the range of V1 is such |
||
2470 | // that -Pi <= V1 < Pi and the range of V2 is such that |
||
2471 | // -2Pi <= V2 <= 2Pi, then the range of the resulting angle |
||
2472 | // will be -Pi <= Result < Pi. |
||
2473 | Result = XMVectorAdd(V1, V2); |
||
2474 | |||
2475 | Mask = XMVectorLess(Result, g_XMNegativePi.v); |
||
2476 | Offset = XMVectorSelect(Zero, g_XMTwoPi.v, Mask); |
||
2477 | |||
2478 | Mask = XMVectorGreaterOrEqual(Result, g_XMPi.v); |
||
2479 | Offset = XMVectorSelect(Offset, g_XMNegativeTwoPi.v, Mask); |
||
2480 | |||
2481 | Result = XMVectorAdd(Result, Offset); |
||
2482 | |||
2483 | return Result; |
||
2484 | |||
2485 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2486 | // Adjust the angles |
||
2487 | XMVECTOR vResult = _mm_add_ps(V1,V2); |
||
2488 | // Less than Pi? |
||
2489 | XMVECTOR vOffset = _mm_cmplt_ps(vResult,g_XMNegativePi); |
||
2490 | vOffset = _mm_and_ps(vOffset,g_XMTwoPi); |
||
2491 | // Add 2Pi to all entries less than -Pi |
||
2492 | vResult = _mm_add_ps(vResult,vOffset); |
||
2493 | // Greater than or equal to Pi? |
||
2494 | vOffset = _mm_cmpge_ps(vResult,g_XMPi); |
||
2495 | vOffset = _mm_and_ps(vOffset,g_XMTwoPi); |
||
2496 | // Sub 2Pi to all entries greater than Pi |
||
2497 | vResult = _mm_sub_ps(vResult,vOffset); |
||
2498 | return vResult; |
||
2499 | #else // _XM_VMX128_INTRINSICS_ |
||
2500 | #endif // _XM_VMX128_INTRINSICS_ |
||
2501 | } |
||
2502 | |||
2503 | //------------------------------------------------------------------------------ |
||
2504 | |||
2505 | XMFINLINE XMVECTOR XMVectorSubtract |
||
2506 | ( |
||
2507 | FXMVECTOR V1, |
||
2508 | FXMVECTOR V2 |
||
2509 | ) |
||
2510 | { |
||
2511 | #if defined(_XM_NO_INTRINSICS_) |
||
2512 | |||
2513 | XMVECTOR Result; |
||
2514 | |||
2515 | Result.vector4_f32[0] = V1.vector4_f32[0] - V2.vector4_f32[0]; |
||
2516 | Result.vector4_f32[1] = V1.vector4_f32[1] - V2.vector4_f32[1]; |
||
2517 | Result.vector4_f32[2] = V1.vector4_f32[2] - V2.vector4_f32[2]; |
||
2518 | Result.vector4_f32[3] = V1.vector4_f32[3] - V2.vector4_f32[3]; |
||
2519 | |||
2520 | return Result; |
||
2521 | |||
2522 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2523 | return _mm_sub_ps( V1, V2 ); |
||
2524 | #else // _XM_VMX128_INTRINSICS_ |
||
2525 | #endif // _XM_VMX128_INTRINSICS_ |
||
2526 | } |
||
2527 | |||
2528 | //------------------------------------------------------------------------------ |
||
2529 | |||
2530 | XMFINLINE XMVECTOR XMVectorSubtractAngles |
||
2531 | ( |
||
2532 | FXMVECTOR V1, |
||
2533 | FXMVECTOR V2 |
||
2534 | ) |
||
2535 | { |
||
2536 | #if defined(_XM_NO_INTRINSICS_) |
||
2537 | |||
2538 | XMVECTOR Mask; |
||
2539 | XMVECTOR Offset; |
||
2540 | XMVECTOR Result; |
||
2541 | CONST XMVECTOR Zero = XMVectorZero(); |
||
2542 | |||
2543 | // Subtract the given angles. If the range of V1 is such |
||
2544 | // that -Pi <= V1 < Pi and the range of V2 is such that |
||
2545 | // -2Pi <= V2 <= 2Pi, then the range of the resulting angle |
||
2546 | // will be -Pi <= Result < Pi. |
||
2547 | Result = XMVectorSubtract(V1, V2); |
||
2548 | |||
2549 | Mask = XMVectorLess(Result, g_XMNegativePi.v); |
||
2550 | Offset = XMVectorSelect(Zero, g_XMTwoPi.v, Mask); |
||
2551 | |||
2552 | Mask = XMVectorGreaterOrEqual(Result, g_XMPi.v); |
||
2553 | Offset = XMVectorSelect(Offset, g_XMNegativeTwoPi.v, Mask); |
||
2554 | |||
2555 | Result = XMVectorAdd(Result, Offset); |
||
2556 | |||
2557 | return Result; |
||
2558 | |||
2559 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2560 | // Adjust the angles |
||
2561 | XMVECTOR vResult = _mm_sub_ps(V1,V2); |
||
2562 | // Less than Pi? |
||
2563 | XMVECTOR vOffset = _mm_cmplt_ps(vResult,g_XMNegativePi); |
||
2564 | vOffset = _mm_and_ps(vOffset,g_XMTwoPi); |
||
2565 | // Add 2Pi to all entries less than -Pi |
||
2566 | vResult = _mm_add_ps(vResult,vOffset); |
||
2567 | // Greater than or equal to Pi? |
||
2568 | vOffset = _mm_cmpge_ps(vResult,g_XMPi); |
||
2569 | vOffset = _mm_and_ps(vOffset,g_XMTwoPi); |
||
2570 | // Sub 2Pi to all entries greater than Pi |
||
2571 | vResult = _mm_sub_ps(vResult,vOffset); |
||
2572 | return vResult; |
||
2573 | #else // _XM_VMX128_INTRINSICS_ |
||
2574 | #endif // _XM_VMX128_INTRINSICS_ |
||
2575 | } |
||
2576 | |||
2577 | //------------------------------------------------------------------------------ |
||
2578 | |||
2579 | XMFINLINE XMVECTOR XMVectorMultiply |
||
2580 | ( |
||
2581 | FXMVECTOR V1, |
||
2582 | FXMVECTOR V2 |
||
2583 | ) |
||
2584 | { |
||
2585 | #if defined(_XM_NO_INTRINSICS_) |
||
2586 | XMVECTOR Result = { |
||
2587 | V1.vector4_f32[0] * V2.vector4_f32[0], |
||
2588 | V1.vector4_f32[1] * V2.vector4_f32[1], |
||
2589 | V1.vector4_f32[2] * V2.vector4_f32[2], |
||
2590 | V1.vector4_f32[3] * V2.vector4_f32[3] |
||
2591 | }; |
||
2592 | return Result; |
||
2593 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2594 | return _mm_mul_ps( V1, V2 ); |
||
2595 | #else // _XM_VMX128_INTRINSICS_ |
||
2596 | #endif // _XM_VMX128_INTRINSICS_ |
||
2597 | } |
||
2598 | |||
2599 | //------------------------------------------------------------------------------ |
||
2600 | |||
2601 | XMFINLINE XMVECTOR XMVectorMultiplyAdd |
||
2602 | ( |
||
2603 | FXMVECTOR V1, |
||
2604 | FXMVECTOR V2, |
||
2605 | FXMVECTOR V3 |
||
2606 | ) |
||
2607 | { |
||
2608 | #if defined(_XM_NO_INTRINSICS_) |
||
2609 | XMVECTOR vResult = { |
||
2610 | (V1.vector4_f32[0] * V2.vector4_f32[0]) + V3.vector4_f32[0], |
||
2611 | (V1.vector4_f32[1] * V2.vector4_f32[1]) + V3.vector4_f32[1], |
||
2612 | (V1.vector4_f32[2] * V2.vector4_f32[2]) + V3.vector4_f32[2], |
||
2613 | (V1.vector4_f32[3] * V2.vector4_f32[3]) + V3.vector4_f32[3] |
||
2614 | }; |
||
2615 | return vResult; |
||
2616 | |||
2617 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2618 | XMVECTOR vResult = _mm_mul_ps( V1, V2 ); |
||
2619 | return _mm_add_ps(vResult, V3 ); |
||
2620 | #else // _XM_VMX128_INTRINSICS_ |
||
2621 | #endif // _XM_VMX128_INTRINSICS_ |
||
2622 | } |
||
2623 | |||
2624 | //------------------------------------------------------------------------------ |
||
2625 | |||
2626 | XMFINLINE XMVECTOR XMVectorNegativeMultiplySubtract |
||
2627 | ( |
||
2628 | FXMVECTOR V1, |
||
2629 | FXMVECTOR V2, |
||
2630 | FXMVECTOR V3 |
||
2631 | ) |
||
2632 | { |
||
2633 | #if defined(_XM_NO_INTRINSICS_) |
||
2634 | |||
2635 | XMVECTOR vResult = { |
||
2636 | V3.vector4_f32[0] - (V1.vector4_f32[0] * V2.vector4_f32[0]), |
||
2637 | V3.vector4_f32[1] - (V1.vector4_f32[1] * V2.vector4_f32[1]), |
||
2638 | V3.vector4_f32[2] - (V1.vector4_f32[2] * V2.vector4_f32[2]), |
||
2639 | V3.vector4_f32[3] - (V1.vector4_f32[3] * V2.vector4_f32[3]) |
||
2640 | }; |
||
2641 | return vResult; |
||
2642 | |||
2643 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2644 | XMVECTOR R = _mm_mul_ps( V1, V2 ); |
||
2645 | return _mm_sub_ps( V3, R ); |
||
2646 | #else // _XM_VMX128_INTRINSICS_ |
||
2647 | #endif // _XM_VMX128_INTRINSICS_ |
||
2648 | } |
||
2649 | |||
2650 | //------------------------------------------------------------------------------ |
||
2651 | |||
2652 | XMFINLINE XMVECTOR XMVectorScale |
||
2653 | ( |
||
2654 | FXMVECTOR V, |
||
2655 | FLOAT ScaleFactor |
||
2656 | ) |
||
2657 | { |
||
2658 | #if defined(_XM_NO_INTRINSICS_) |
||
2659 | XMVECTOR vResult = { |
||
2660 | V.vector4_f32[0] * ScaleFactor, |
||
2661 | V.vector4_f32[1] * ScaleFactor, |
||
2662 | V.vector4_f32[2] * ScaleFactor, |
||
2663 | V.vector4_f32[3] * ScaleFactor |
||
2664 | }; |
||
2665 | return vResult; |
||
2666 | |||
2667 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2668 | XMVECTOR vResult = _mm_set_ps1(ScaleFactor); |
||
2669 | return _mm_mul_ps(vResult,V); |
||
2670 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
2671 | #endif // _XM_VMX128_INTRINSICS_ |
||
2672 | } |
||
2673 | |||
2674 | //------------------------------------------------------------------------------ |
||
2675 | |||
2676 | XMFINLINE XMVECTOR XMVectorReciprocalEst |
||
2677 | ( |
||
2678 | FXMVECTOR V |
||
2679 | ) |
||
2680 | { |
||
2681 | #if defined(_XM_NO_INTRINSICS_) |
||
2682 | |||
2683 | XMVECTOR Result; |
||
2684 | UINT i; |
||
2685 | |||
2686 | // Avoid C4701 |
||
2687 | Result.vector4_f32[0] = 0.0f; |
||
2688 | |||
2689 | for (i = 0; i < 4; i++) |
||
2690 | { |
||
2691 | if (XMISINF(V.vector4_f32[i])) |
||
2692 | { |
||
2693 | Result.vector4_f32[i] = (V.vector4_f32[i] < 0.0f) ? -0.0f : 0.0f; |
||
2694 | } |
||
2695 | else if (V.vector4_f32[i] == -0.0f) |
||
2696 | { |
||
2697 | Result.vector4_u32[i] = 0xFF800000; |
||
2698 | } |
||
2699 | else if (V.vector4_f32[i] == 0.0f) |
||
2700 | { |
||
2701 | Result.vector4_u32[i] = 0x7F800000; |
||
2702 | } |
||
2703 | else |
||
2704 | { |
||
2705 | Result.vector4_f32[i] = 1.0f / V.vector4_f32[i]; |
||
2706 | } |
||
2707 | } |
||
2708 | |||
2709 | return Result; |
||
2710 | |||
2711 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2712 | return _mm_rcp_ps(V); |
||
2713 | #else // _XM_VMX128_INTRINSICS_ |
||
2714 | #endif // _XM_VMX128_INTRINSICS_ |
||
2715 | } |
||
2716 | |||
2717 | //------------------------------------------------------------------------------ |
||
2718 | |||
2719 | XMFINLINE XMVECTOR XMVectorReciprocal |
||
2720 | ( |
||
2721 | FXMVECTOR V |
||
2722 | ) |
||
2723 | { |
||
2724 | #if defined(_XM_NO_INTRINSICS_) |
||
2725 | return XMVectorReciprocalEst(V); |
||
2726 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2727 | return _mm_div_ps(g_XMOne,V); |
||
2728 | #else // _XM_VMX128_INTRINSICS_ |
||
2729 | #endif // _XM_VMX128_INTRINSICS_ |
||
2730 | } |
||
2731 | |||
2732 | //------------------------------------------------------------------------------ |
||
2733 | // Return an estimated square root |
||
2734 | XMFINLINE XMVECTOR XMVectorSqrtEst |
||
2735 | ( |
||
2736 | FXMVECTOR V |
||
2737 | ) |
||
2738 | { |
||
2739 | #if defined(_XM_NO_INTRINSICS_) |
||
2740 | XMVECTOR Select; |
||
2741 | |||
2742 | // if (x == +Infinity) sqrt(x) = +Infinity |
||
2743 | // if (x == +0.0f) sqrt(x) = +0.0f |
||
2744 | // if (x == -0.0f) sqrt(x) = -0.0f |
||
2745 | // if (x < -0.0f) sqrt(x) = QNaN |
||
2746 | |||
2747 | XMVECTOR Result = XMVectorReciprocalSqrtEst(V); |
||
2748 | XMVECTOR Zero = XMVectorZero(); |
||
2749 | XMVECTOR VEqualsInfinity = XMVectorEqualInt(V, g_XMInfinity.v); |
||
2750 | XMVECTOR VEqualsZero = XMVectorEqual(V, Zero); |
||
2751 | Result = XMVectorMultiply(V, Result); |
||
2752 | Select = XMVectorEqualInt(VEqualsInfinity, VEqualsZero); |
||
2753 | Result = XMVectorSelect(V, Result, Select); |
||
2754 | return Result; |
||
2755 | |||
2756 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2757 | return _mm_sqrt_ps(V); |
||
2758 | #else // _XM_VMX128_INTRINSICS_ |
||
2759 | #endif // _XM_VMX128_INTRINSICS_ |
||
2760 | } |
||
2761 | |||
2762 | //------------------------------------------------------------------------------ |
||
2763 | |||
2764 | XMFINLINE XMVECTOR XMVectorSqrt |
||
2765 | ( |
||
2766 | FXMVECTOR V |
||
2767 | ) |
||
2768 | { |
||
2769 | #if defined(_XM_NO_INTRINSICS_) |
||
2770 | |||
2771 | XMVECTOR Zero; |
||
2772 | XMVECTOR VEqualsInfinity, VEqualsZero; |
||
2773 | XMVECTOR Select; |
||
2774 | XMVECTOR Result; |
||
2775 | |||
2776 | // if (x == +Infinity) sqrt(x) = +Infinity |
||
2777 | // if (x == +0.0f) sqrt(x) = +0.0f |
||
2778 | // if (x == -0.0f) sqrt(x) = -0.0f |
||
2779 | // if (x < -0.0f) sqrt(x) = QNaN |
||
2780 | |||
2781 | Result = XMVectorReciprocalSqrt(V); |
||
2782 | Zero = XMVectorZero(); |
||
2783 | VEqualsInfinity = XMVectorEqualInt(V, g_XMInfinity.v); |
||
2784 | VEqualsZero = XMVectorEqual(V, Zero); |
||
2785 | Result = XMVectorMultiply(V, Result); |
||
2786 | Select = XMVectorEqualInt(VEqualsInfinity, VEqualsZero); |
||
2787 | Result = XMVectorSelect(V, Result, Select); |
||
2788 | |||
2789 | return Result; |
||
2790 | |||
2791 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2792 | return _mm_sqrt_ps(V); |
||
2793 | #else // _XM_VMX128_INTRINSICS_ |
||
2794 | #endif // _XM_VMX128_INTRINSICS_ |
||
2795 | } |
||
2796 | |||
2797 | //------------------------------------------------------------------------------ |
||
2798 | |||
2799 | XMFINLINE XMVECTOR XMVectorReciprocalSqrtEst |
||
2800 | ( |
||
2801 | FXMVECTOR V |
||
2802 | ) |
||
2803 | { |
||
2804 | #if defined(_XM_NO_INTRINSICS_) |
||
2805 | |||
2806 | XMVECTOR Result; |
||
2807 | UINT i; |
||
2808 | |||
2809 | // Avoid C4701 |
||
2810 | Result.vector4_f32[0] = 0.0f; |
||
2811 | |||
2812 | for (i = 0; i < 4; i++) |
||
2813 | { |
||
2814 | if (V.vector4_f32[i] == 0.0f) |
||
2815 | { |
||
2816 | Result.vector4_u32[i] = 0x7F800000; |
||
2817 | } |
||
2818 | else if (V.vector4_f32[i] == -0.0f) |
||
2819 | { |
||
2820 | Result.vector4_u32[i] = 0xFF800000; |
||
2821 | } |
||
2822 | else if (V.vector4_f32[i] < 0.0f) |
||
2823 | { |
||
2824 | Result.vector4_u32[i] = 0x7FFFFFFF; |
||
2825 | } |
||
2826 | else if (XMISINF(V.vector4_f32[i])) |
||
2827 | { |
||
2828 | Result.vector4_f32[i] = 0.0f; |
||
2829 | } |
||
2830 | else |
||
2831 | { |
||
2832 | Result.vector4_f32[i] = 1.0f / sqrtf(V.vector4_f32[i]); |
||
2833 | } |
||
2834 | } |
||
2835 | |||
2836 | return Result; |
||
2837 | |||
2838 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2839 | return _mm_rsqrt_ps(V); |
||
2840 | #else // _XM_VMX128_INTRINSICS_ |
||
2841 | #endif // _XM_VMX128_INTRINSICS_ |
||
2842 | } |
||
2843 | |||
2844 | //------------------------------------------------------------------------------ |
||
2845 | |||
2846 | XMFINLINE XMVECTOR XMVectorReciprocalSqrt |
||
2847 | ( |
||
2848 | FXMVECTOR V |
||
2849 | ) |
||
2850 | { |
||
2851 | #if defined(_XM_NO_INTRINSICS_) |
||
2852 | |||
2853 | return XMVectorReciprocalSqrtEst(V); |
||
2854 | |||
2855 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2856 | XMVECTOR vResult = _mm_sqrt_ps(V); |
||
2857 | vResult = _mm_div_ps(g_XMOne,vResult); |
||
2858 | return vResult; |
||
2859 | #else // _XM_VMX128_INTRINSICS_ |
||
2860 | #endif // _XM_VMX128_INTRINSICS_ |
||
2861 | } |
||
2862 | |||
2863 | //------------------------------------------------------------------------------ |
||
2864 | |||
2865 | XMFINLINE XMVECTOR XMVectorExpEst |
||
2866 | ( |
||
2867 | FXMVECTOR V |
||
2868 | ) |
||
2869 | { |
||
2870 | #if defined(_XM_NO_INTRINSICS_) |
||
2871 | |||
2872 | XMVECTOR Result; |
||
2873 | Result.vector4_f32[0] = powf(2.0f, V.vector4_f32[0]); |
||
2874 | Result.vector4_f32[1] = powf(2.0f, V.vector4_f32[1]); |
||
2875 | Result.vector4_f32[2] = powf(2.0f, V.vector4_f32[2]); |
||
2876 | Result.vector4_f32[3] = powf(2.0f, V.vector4_f32[3]); |
||
2877 | return Result; |
||
2878 | |||
2879 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2880 | XMVECTOR vResult = _mm_setr_ps( |
||
2881 | powf(2.0f,XMVectorGetX(V)), |
||
2882 | powf(2.0f,XMVectorGetY(V)), |
||
2883 | powf(2.0f,XMVectorGetZ(V)), |
||
2884 | powf(2.0f,XMVectorGetW(V))); |
||
2885 | return vResult; |
||
2886 | #else // _XM_VMX128_INTRINSICS_ |
||
2887 | #endif // _XM_VMX128_INTRINSICS_ |
||
2888 | } |
||
2889 | |||
2890 | //------------------------------------------------------------------------------ |
||
2891 | |||
2892 | XMINLINE XMVECTOR XMVectorExp |
||
2893 | ( |
||
2894 | FXMVECTOR V |
||
2895 | ) |
||
2896 | { |
||
2897 | #if defined(_XM_NO_INTRINSICS_) |
||
2898 | |||
2899 | XMVECTOR E, S; |
||
2900 | XMVECTOR R, R2, R3, R4; |
||
2901 | XMVECTOR V0, V1; |
||
2902 | XMVECTOR C0X, C0Y, C0Z, C0W; |
||
2903 | XMVECTOR C1X, C1Y, C1Z, C1W; |
||
2904 | XMVECTOR Result; |
||
2905 | static CONST XMVECTOR C0 = {1.0f, -6.93147182e-1f, 2.40226462e-1f, -5.55036440e-2f}; |
||
2906 | static CONST XMVECTOR C1 = {9.61597636e-3f, -1.32823968e-3f, 1.47491097e-4f, -1.08635004e-5f}; |
||
2907 | |||
2908 | R = XMVectorFloor(V); |
||
2909 | E = XMVectorExpEst(R); |
||
2910 | R = XMVectorSubtract(V, R); |
||
2911 | R2 = XMVectorMultiply(R, R); |
||
2912 | R3 = XMVectorMultiply(R, R2); |
||
2913 | R4 = XMVectorMultiply(R2, R2); |
||
2914 | |||
2915 | C0X = XMVectorSplatX(C0); |
||
2916 | C0Y = XMVectorSplatY(C0); |
||
2917 | C0Z = XMVectorSplatZ(C0); |
||
2918 | C0W = XMVectorSplatW(C0); |
||
2919 | |||
2920 | C1X = XMVectorSplatX(C1); |
||
2921 | C1Y = XMVectorSplatY(C1); |
||
2922 | C1Z = XMVectorSplatZ(C1); |
||
2923 | C1W = XMVectorSplatW(C1); |
||
2924 | |||
2925 | V0 = XMVectorMultiplyAdd(R, C0Y, C0X); |
||
2926 | V0 = XMVectorMultiplyAdd(R2, C0Z, V0); |
||
2927 | V0 = XMVectorMultiplyAdd(R3, C0W, V0); |
||
2928 | |||
2929 | V1 = XMVectorMultiplyAdd(R, C1Y, C1X); |
||
2930 | V1 = XMVectorMultiplyAdd(R2, C1Z, V1); |
||
2931 | V1 = XMVectorMultiplyAdd(R3, C1W, V1); |
||
2932 | |||
2933 | S = XMVectorMultiplyAdd(R4, V1, V0); |
||
2934 | |||
2935 | S = XMVectorReciprocal(S); |
||
2936 | Result = XMVectorMultiply(E, S); |
||
2937 | |||
2938 | return Result; |
||
2939 | |||
2940 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2941 | static CONST XMVECTORF32 C0 = {1.0f, -6.93147182e-1f, 2.40226462e-1f, -5.55036440e-2f}; |
||
2942 | static CONST XMVECTORF32 C1 = {9.61597636e-3f, -1.32823968e-3f, 1.47491097e-4f, -1.08635004e-5f}; |
||
2943 | |||
2944 | // Get the integer of the input |
||
2945 | XMVECTOR R = XMVectorFloor(V); |
||
2946 | // Get the exponent estimate |
||
2947 | XMVECTOR E = XMVectorExpEst(R); |
||
2948 | // Get the fractional only |
||
2949 | R = _mm_sub_ps(V,R); |
||
2950 | // Get R^2 |
||
2951 | XMVECTOR R2 = _mm_mul_ps(R,R); |
||
2952 | // And R^3 |
||
2953 | XMVECTOR R3 = _mm_mul_ps(R,R2); |
||
2954 | |||
2955 | XMVECTOR V0 = _mm_load_ps1(&C0.f[1]); |
||
2956 | V0 = _mm_mul_ps(V0,R); |
||
2957 | XMVECTOR vConstants = _mm_load_ps1(&C0.f[0]); |
||
2958 | V0 = _mm_add_ps(V0,vConstants); |
||
2959 | vConstants = _mm_load_ps1(&C0.f[2]); |
||
2960 | vConstants = _mm_mul_ps(vConstants,R2); |
||
2961 | V0 = _mm_add_ps(V0,vConstants); |
||
2962 | vConstants = _mm_load_ps1(&C0.f[3]); |
||
2963 | vConstants = _mm_mul_ps(vConstants,R3); |
||
2964 | V0 = _mm_add_ps(V0,vConstants); |
||
2965 | |||
2966 | XMVECTOR V1 = _mm_load_ps1(&C1.f[1]); |
||
2967 | V1 = _mm_mul_ps(V1,R); |
||
2968 | vConstants = _mm_load_ps1(&C1.f[0]); |
||
2969 | V1 = _mm_add_ps(V1,vConstants); |
||
2970 | vConstants = _mm_load_ps1(&C1.f[2]); |
||
2971 | vConstants = _mm_mul_ps(vConstants,R2); |
||
2972 | V1 = _mm_add_ps(V1,vConstants); |
||
2973 | vConstants = _mm_load_ps1(&C1.f[3]); |
||
2974 | vConstants = _mm_mul_ps(vConstants,R3); |
||
2975 | V1 = _mm_add_ps(V1,vConstants); |
||
2976 | // R2 = R^4 |
||
2977 | R2 = _mm_mul_ps(R2,R2); |
||
2978 | R2 = _mm_mul_ps(R2,V1); |
||
2979 | R2 = _mm_add_ps(R2,V0); |
||
2980 | E = _mm_div_ps(E,R2); |
||
2981 | return E; |
||
2982 | #else // _XM_VMX128_INTRINSICS_ |
||
2983 | #endif // _XM_VMX128_INTRINSICS_ |
||
2984 | } |
||
2985 | |||
2986 | //------------------------------------------------------------------------------ |
||
2987 | |||
2988 | XMFINLINE XMVECTOR XMVectorLogEst |
||
2989 | ( |
||
2990 | FXMVECTOR V |
||
2991 | ) |
||
2992 | { |
||
2993 | #if defined(_XM_NO_INTRINSICS_) |
||
2994 | |||
2995 | FLOAT fScale = (1.0f / logf(2.0f)); |
||
2996 | XMVECTOR Result; |
||
2997 | |||
2998 | Result.vector4_f32[0] = logf(V.vector4_f32[0])*fScale; |
||
2999 | Result.vector4_f32[1] = logf(V.vector4_f32[1])*fScale; |
||
3000 | Result.vector4_f32[2] = logf(V.vector4_f32[2])*fScale; |
||
3001 | Result.vector4_f32[3] = logf(V.vector4_f32[3])*fScale; |
||
3002 | return Result; |
||
3003 | |||
3004 | #elif defined(_XM_SSE_INTRINSICS_) |
||
3005 | XMVECTOR vScale = _mm_set_ps1(1.0f / logf(2.0f)); |
||
3006 | XMVECTOR vResult = _mm_setr_ps( |
||
3007 | logf(XMVectorGetX(V)), |
||
3008 | logf(XMVectorGetY(V)), |
||
3009 | logf(XMVectorGetZ(V)), |
||
3010 | logf(XMVectorGetW(V))); |
||
3011 | vResult = _mm_mul_ps(vResult,vScale); |
||
3012 | return vResult; |
||
3013 | #else // _XM_VMX128_INTRINSICS_ |
||
3014 | #endif // _XM_VMX128_INTRINSICS_ |
||
3015 | } |
||
3016 | |||
3017 | //------------------------------------------------------------------------------ |
||
3018 | |||
3019 | XMINLINE XMVECTOR XMVectorLog |
||
3020 | ( |
||
3021 | FXMVECTOR V |
||
3022 | ) |
||
3023 | { |
||
3024 | #if defined(_XM_NO_INTRINSICS_) |
||
3025 | FLOAT fScale = (1.0f / logf(2.0f)); |
||
3026 | XMVECTOR Result; |
||
3027 | |||
3028 | Result.vector4_f32[0] = logf(V.vector4_f32[0])*fScale; |
||
3029 | Result.vector4_f32[1] = logf(V.vector4_f32[1])*fScale; |
||
3030 | Result.vector4_f32[2] = logf(V.vector4_f32[2])*fScale; |
||
3031 | Result.vector4_f32[3] = logf(V.vector4_f32[3])*fScale; |
||
3032 | return Result; |
||
3033 | |||
3034 | #elif defined(_XM_SSE_INTRINSICS_) |
||
3035 | XMVECTOR vScale = _mm_set_ps1(1.0f / logf(2.0f)); |
||
3036 | XMVECTOR vResult = _mm_setr_ps( |
||
3037 | logf(XMVectorGetX(V)), |
||
3038 | logf(XMVectorGetY(V)), |
||
3039 | logf(XMVectorGetZ(V)), |
||
3040 | logf(XMVectorGetW(V))); |
||
3041 | vResult = _mm_mul_ps(vResult,vScale); |
||
3042 | return vResult; |
||
3043 | #else // _XM_VMX128_INTRINSICS_ |
||
3044 | #endif // _XM_VMX128_INTRINSICS_ |
||
3045 | } |
||
3046 | |||
3047 | //------------------------------------------------------------------------------ |
||
3048 | |||
3049 | XMFINLINE XMVECTOR XMVectorPowEst |
||
3050 | ( |
||
3051 | FXMVECTOR V1, |
||
3052 | FXMVECTOR V2 |
||
3053 | ) |
||
3054 | { |
||
3055 | #if defined(_XM_NO_INTRINSICS_) |
||
3056 | |||
3057 | XMVECTOR Result; |
||
3058 | |||
3059 | Result.vector4_f32[0] = powf(V1.vector4_f32[0], V2.vector4_f32[0]); |
||
3060 | Result.vector4_f32[1] = powf(V1.vector4_f32[1], V2.vector4_f32[1]); |
||
3061 | Result.vector4_f32[2] = powf(V1.vector4_f32[2], V2.vector4_f32[2]); |
||
3062 | Result.vector4_f32[3] = powf(V1.vector4_f32[3], V2.vector4_f32[3]); |
||
3063 | |||
3064 | return Result; |
||
3065 | |||
3066 | #elif defined(_XM_SSE_INTRINSICS_) |
||
3067 | XMVECTOR vResult = _mm_setr_ps( |
||
3068 | powf(XMVectorGetX(V1),XMVectorGetX(V2)), |
||
3069 | powf(XMVectorGetY(V1),XMVectorGetY(V2)), |
||
3070 | powf(XMVectorGetZ(V1),XMVectorGetZ(V2)), |
||
3071 | powf(XMVectorGetW(V1),XMVectorGetW(V2))); |
||
3072 | return vResult; |
||
3073 | #else // _XM_VMX128_INTRINSICS_ |
||
3074 | #endif // _XM_VMX128_INTRINSICS_ |
||
3075 | } |
||
3076 | |||
3077 | //------------------------------------------------------------------------------ |
||
3078 | |||
3079 | XMFINLINE XMVECTOR XMVectorPow |
||
3080 | ( |
||
3081 | FXMVECTOR V1, |
||
3082 | FXMVECTOR V2 |
||
3083 | ) |
||
3084 | { |
||
3085 | #if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) |
||
3086 | |||
3087 | return XMVectorPowEst(V1, V2); |
||
3088 | |||
3089 | #else // _XM_VMX128_INTRINSICS_ |
||
3090 | #endif // _XM_VMX128_INTRINSICS_ |
||
3091 | } |
||
3092 | |||
3093 | //------------------------------------------------------------------------------ |
||
3094 | |||
3095 | XMFINLINE XMVECTOR XMVectorAbs |
||
3096 | ( |
||
3097 | FXMVECTOR V |
||
3098 | ) |
||
3099 | { |
||
3100 | #if defined(_XM_NO_INTRINSICS_) |
||
3101 | XMVECTOR vResult = { |
||
3102 | fabsf(V.vector4_f32[0]), |
||
3103 | fabsf(V.vector4_f32[1]), |
||
3104 | fabsf(V.vector4_f32[2]), |
||
3105 | fabsf(V.vector4_f32[3]) |
||
3106 | }; |
||
3107 | return vResult; |
||
3108 | |||
3109 | #elif defined(_XM_SSE_INTRINSICS_) |
||
3110 | XMVECTOR vResult = _mm_setzero_ps(); |
||
3111 | vResult = _mm_sub_ps(vResult,V); |
||
3112 | vResult = _mm_max_ps(vResult,V); |
||
3113 | return vResult; |
||
3114 | #else // _XM_VMX128_INTRINSICS_ |
||
3115 | #endif // _XM_VMX128_INTRINSICS_ |
||
3116 | } |
||
3117 | |||
3118 | //------------------------------------------------------------------------------ |
||
3119 | |||
3120 | XMFINLINE XMVECTOR XMVectorMod |
||
3121 | ( |
||
3122 | FXMVECTOR V1, |
||
3123 | FXMVECTOR V2 |
||
3124 | ) |
||
3125 | { |
||
3126 | #if defined(_XM_NO_INTRINSICS_) |
||
3127 | |||
3128 | XMVECTOR Reciprocal; |
||
3129 | XMVECTOR Quotient; |
||
3130 | XMVECTOR Result; |
||
3131 | |||
3132 | // V1 % V2 = V1 - V2 * truncate(V1 / V2) |
||
3133 | Reciprocal = XMVectorReciprocal(V2); |
||
3134 | Quotient = XMVectorMultiply(V1, Reciprocal); |
||
3135 | Quotient = XMVectorTruncate(Quotient); |
||
3136 | Result = XMVectorNegativeMultiplySubtract(V2, Quotient, V1); |
||
3137 | |||
3138 | return Result; |
||
3139 | |||
3140 | #elif defined(_XM_SSE_INTRINSICS_) |
||
3141 | XMVECTOR vResult = _mm_div_ps(V1, V2); |
||
3142 | vResult = XMVectorTruncate(vResult); |
||
3143 | vResult = _mm_mul_ps(vResult,V2); |
||
3144 | vResult = _mm_sub_ps(V1,vResult); |
||
3145 | return vResult; |
||
3146 | #else // _XM_VMX128_INTRINSICS_ |
||
3147 | #endif // _XM_VMX128_INTRINSICS_ |
||
3148 | } |
||
3149 | |||
3150 | //------------------------------------------------------------------------------ |
||
3151 | |||
3152 | XMFINLINE XMVECTOR XMVectorModAngles |
||
3153 | ( |
||
3154 | FXMVECTOR Angles |
||
3155 | ) |
||
3156 | { |
||
3157 | #if defined(_XM_NO_INTRINSICS_) |
||
3158 | |||
3159 | XMVECTOR V; |
||
3160 | XMVECTOR Result; |
||
3161 | |||
3162 | // Modulo the range of the given angles such that -XM_PI <= Angles < XM_PI |
||
3163 | V = XMVectorMultiply(Angles, g_XMReciprocalTwoPi.v); |
||
3164 | V = XMVectorRound(V); |
||
3165 | Result = XMVectorNegativeMultiplySubtract(g_XMTwoPi.v, V, Angles); |
||
3166 | |||
3167 | return Result; |
||
3168 | |||
3169 | #elif defined(_XM_SSE_INTRINSICS_) |
||
3170 | // Modulo the range of the given angles such that -XM_PI <= Angles < XM_PI |
||
3171 | XMVECTOR vResult = _mm_mul_ps(Angles,g_XMReciprocalTwoPi); |
||
3172 | // Use the inline function due to complexity for rounding |
||
3173 | vResult = XMVectorRound(vResult); |
||
3174 | vResult = _mm_mul_ps(vResult,g_XMTwoPi); |
||
3175 | vResult = _mm_sub_ps(Angles,vResult); |
||
3176 | return vResult; |
||
3177 | #else // _XM_VMX128_INTRINSICS_ |
||
3178 | #endif // _XM_VMX128_INTRINSICS_ |
||
3179 | } |
||
3180 | |||
3181 | //------------------------------------------------------------------------------ |
||
3182 | |||
3183 | XMINLINE XMVECTOR XMVectorSin |
||
3184 | ( |
||
3185 | FXMVECTOR V |
||
3186 | ) |
||
3187 | { |
||
3188 | |||
3189 | #if defined(_XM_NO_INTRINSICS_) |
||
3190 | |||
3191 | XMVECTOR V1, V2, V3, V5, V7, V9, V11, V13, V15, V17, V19, V21, V23; |
||
3192 | XMVECTOR S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11; |
||
3193 | XMVECTOR Result; |
||
3194 | |||
3195 | V1 = XMVectorModAngles(V); |
||
3196 | |||
3197 | // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! - |
||
3198 | // V^15 / 15! + V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI) |
||
3199 | V2 = XMVectorMultiply(V1, V1); |
||
3200 | V3 = XMVectorMultiply(V2, V1); |
||
3201 | V5 = XMVectorMultiply(V3, V2); |
||
3202 | V7 = XMVectorMultiply(V5, V2); |
||
3203 | V9 = XMVectorMultiply(V7, V2); |
||
3204 | V11 = XMVectorMultiply(V9, V2); |
||
3205 | V13 = XMVectorMultiply(V11, V2); |
||
3206 | V15 = XMVectorMultiply(V13, V2); |
||
3207 | V17 = XMVectorMultiply(V15, V2); |
||
3208 | V19 = XMVectorMultiply(V17, V2); |
||
3209 | V21 = XMVectorMultiply(V19, V2); |
||
3210 | V23 = XMVectorMultiply(V21, V2); |
||
3211 | |||
3212 | S1 = XMVectorSplatY(g_XMSinCoefficients0.v); |
||
3213 | S2 = XMVectorSplatZ(g_XMSinCoefficients0.v); |
||
3214 | S3 = XMVectorSplatW(g_XMSinCoefficients0.v); |
||
3215 | S4 = XMVectorSplatX(g_XMSinCoefficients1.v); |
||
3216 | S5 = XMVectorSplatY(g_XMSinCoefficients1.v); |
||
3217 | S6 = XMVectorSplatZ(g_XMSinCoefficients1.v); |
||
3218 | S7 = XMVectorSplatW(g_XMSinCoefficients1.v); |
||
3219 | S8 = XMVectorSplatX(g_XMSinCoefficients2.v); |
||
3220 | S9 = XMVectorSplatY(g_XMSinCoefficients2.v); |
||
3221 | S10 = XMVectorSplatZ(g_XMSinCoefficients2.v); |
||
3222 | S11 = XMVectorSplatW(g_XMSinCoefficients2.v); |
||
3223 | |||
3224 | Result = XMVectorMultiplyAdd(S1, V3, V1); |
||
3225 | Result = XMVectorMultiplyAdd(S2, V5, Result); |
||
3226 | Result = XMVectorMultiplyAdd(S3, V7, Result); |
||
3227 | Result = XMVectorMultiplyAdd(S4, V9, Result); |
||
3228 | Result = XMVectorMultiplyAdd(S5, V11, Result); |
||
3229 | Result = XMVectorMultiplyAdd(S6, V13, Result); |
||
3230 | Result = XMVectorMultiplyAdd(S7, V15, Result); |
||
3231 | Result = XMVectorMultiplyAdd(S8, V17, Result); |
||
3232 | Result = XMVectorMultiplyAdd(S9, V19, Result); |
||
3233 | Result = XMVectorMultiplyAdd(S10, V21, Result); |
||
3234 | Result = XMVectorMultiplyAdd(S11, V23, Result); |
||
3235 | |||
3236 | return Result; |
||
3237 | |||
3238 | #elif defined(_XM_SSE_INTRINSICS_) |
||
3239 | // Force the value within the bounds of pi |
||
3240 | XMVECTOR vResult = XMVectorModAngles(V); |
||
3241 | // Each on is V to the "num" power |
||
3242 | // V2 = V1^2 |
||
3243 | XMVECTOR V2 = _mm_mul_ps(vResult,vResult); |
||
3244 | // V1^3 |
||
3245 | XMVECTOR vPower = _mm_mul_ps(vResult,V2); |
||
3246 | XMVECTOR vConstants = _mm_load_ps1(&g_XMSinCoefficients0.f[1]); |
||
3247 | vConstants = _mm_mul_ps(vConstants,vPower); |
||
3248 | vResult = _mm_add_ps(vResult,vConstants); |
||
3249 | |||
3250 | // V^5 |
||
3251 | vPower = _mm_mul_ps(vPower,V2); |
||
3252 | vConstants = _mm_load_ps1(&g_XMSinCoefficients0.f[2]); |
||
3253 | vConstants = _mm_mul_ps(vConstants,vPower); |
||
3254 | vResult = _mm_add_ps(vResult,vConstants); |
||
3255 | |||
3256 | // V^7 |
||
3257 | vPower = _mm_mul_ps(vPower,V2); |
||
3258 | vConstants = _mm_load_ps1(&g_XMSinCoefficients0.f[3]); |
||
3259 | vConstants = _mm_mul_ps(vConstants,vPower); |
||
3260 | vResult = _mm_add_ps(vResult,vConstants); |
||
3261 | |||
3262 | // V^9 |
||
3263 | vPower = _mm_mul_ps(vPower,V2); |
||
3264 | vConstants = _mm_load_ps1(&g_XMSinCoefficients1.f[0]); |
||
3265 | vConstants = _mm_mul_ps(vConstants,vPower); |
||
3266 | vResult = _mm_add_ps(vResult,vConstants); |
||
3267 | |||
3268 | // V^11 |
||
3269 | vPower = _mm_mul_ps(vPower,V2); |
||
3270 | vConstants = _mm_load_ps1(&g_XMSinCoefficients1.f[1]); |
||
3271 | vConstants = _mm_mul_ps(vConstants,vPower); |
||
3272 | vResult = _mm_add_ps(vResult,vConstants); |
||
3273 | |||
3274 | // V^13 |
||
3275 | vPower = _mm_mul_ps(vPower,V2); |
||
3276 | vConstants = _mm_load_ps1(&g_XMSinCoefficients1.f[2]); |
||
3277 | vConstants = _mm_mul_ps(vConstants,vPower); |
||
3278 | vResult = _mm_add_ps(vResult,vConstants); |
||
3279 | |||
3280 | // V^15 |
||
3281 | vPower = _mm_mul_ps(vPower,V2); |
||
3282 | vConstants = _mm_load_ps1(&g_XMSinCoefficients1.f[3]); |
||
3283 | vConstants = _mm_mul_ps(vConstants,vPower); |
||
3284 | vResult = _mm_add_ps(vResult,vConstants); |
||
3285 | |||
3286 | // V^17 |
||
3287 | vPower = _mm_mul_ps(vPower,V2); |
||
3288 | vConstants = _mm_load_ps1(&g_XMSinCoefficients2.f[0]); |
||
3289 | vConstants = _mm_mul_ps(vConstants,vPower); |
||
3290 | vResult = _mm_add_ps(vResult,vConstants); |
||
3291 | |||
3292 | // V^19 |
||
3293 | vPower = _mm_mul_ps(vPower,V2); |
||
3294 | vConstants = _mm_load_ps1(&g_XMSinCoefficients2.f[1]); |
||
3295 | vConstants = _mm_mul_ps(vConstants,vPower); |
||
3296 | vResult = _mm_add_ps(vResult,vConstants); |
||
3297 | |||
3298 | // V^21 |
||
3299 | vPower = _mm_mul_ps(vPower,V2); |
||
3300 | vConstants = _mm_load_ps1(&g_XMSinCoefficients2.f[2]); |
||
3301 | vConstants = _mm_mul_ps(vConstants,vPower); |
||
3302 | vResult = _mm_add_ps(vResult,vConstants); |
||
3303 | |||
3304 | // V^23 |
||
3305 | vPower = _mm_mul_ps(vPower,V2); |
||
3306 | vConstants = _mm_load_ps1(&g_XMSinCoefficients2.f[3]); |
||
3307 | vConstants = _mm_mul_ps(vConstants,vPower); |
||
3308 | vResult = _mm_add_ps(vResult,vConstants); |
||
3309 | return vResult; |
||
3310 | #else // _XM_VMX128_INTRINSICS_ |
||
3311 | #endif // _XM_VMX128_INTRINSICS_ |
||
3312 | } |
||
3313 | |||
3314 | //------------------------------------------------------------------------------ |
||
3315 | |||
3316 | XMINLINE XMVECTOR XMVectorCos |
||
3317 | ( |
||
3318 | FXMVECTOR V |
||
3319 | ) |
||
3320 | { |
||
3321 | #if defined(_XM_NO_INTRINSICS_) |
||
3322 | |||
3323 | XMVECTOR V1, V2, V4, V6, V8, V10, V12, V14, V16, V18, V20, V22; |
||
3324 | XMVECTOR C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11; |
||
3325 | XMVECTOR Result; |
||
3326 | |||
3327 | V1 = XMVectorModAngles(V); |
||
3328 | |||
3329 | // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! + V^12 / 12! - |
||
3330 | // V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI) |
||
3331 | V2 = XMVectorMultiply(V1, V1); |
||
3332 | V4 = XMVectorMultiply(V2, V2); |
||
3333 | V6 = XMVectorMultiply(V4, V2); |
||
3334 | V8 = XMVectorMultiply(V4, V4); |
||
3335 | V10 = XMVectorMultiply(V6, V4); |
||
3336 | V12 = XMVectorMultiply(V6, V6); |
||
3337 | V14 = XMVectorMultiply(V8, V6); |
||
3338 | V16 = XMVectorMultiply(V8, V8); |
||
3339 | V18 = XMVectorMultiply(V10, V8); |
||
3340 | V20 = XMVectorMultiply(V10, V10); |
||
3341 | V22 = XMVectorMultiply(V12, V10); |
||
3342 | |||
3343 | C1 = XMVectorSplatY(g_XMCosCoefficients0.v); |
||
3344 | C2 = XMVectorSplatZ(g_XMCosCoefficients0.v); |
||
3345 | C3 = XMVectorSplatW(g_XMCosCoefficients0.v); |
||
3346 | C4 = XMVectorSplatX(g_XMCosCoefficients1.v); |
||
3347 | C5 = XMVectorSplatY(g_XMCosCoefficients1.v); |
||
3348 | C6 = XMVectorSplatZ(g_XMCosCoefficients1.v); |
||
3349 | C7 = XMVectorSplatW(g_XMCosCoefficients1.v); |
||
3350 | C8 = XMVectorSplatX(g_XMCosCoefficients2.v); |
||
3351 | C9 = XMVectorSplatY(g_XMCosCoefficients2.v); |
||
3352 | C10 = XMVectorSplatZ(g_XMCosCoefficients2.v); |
||
3353 | C11 = XMVectorSplatW(g_XMCosCoefficients2.v); |
||
3354 | |||
3355 | Result = XMVectorMultiplyAdd(C1, V2, g_XMOne.v); |
||
3356 | Result = XMVectorMultiplyAdd(C2, V4, Result); |
||
3357 | Result = XMVectorMultiplyAdd(C3, V6, Result); |
||
3358 | Result = XMVectorMultiplyAdd(C4, V8, Result); |
||
3359 | Result = XMVectorMultiplyAdd(C5, V10, Result); |
||
3360 | Result = XMVectorMultiplyAdd(C6, V12, Result); |
||
3361 | Result = XMVectorMultiplyAdd(C7, V14, Result); |
||
3362 | Result = XMVectorMultiplyAdd(C8, V16, Result); |
||
3363 | Result = XMVectorMultiplyAdd(C9, V18, Result); |
||
3364 | Result = XMVectorMultiplyAdd(C10, V20, Result); |
||
3365 | Result = XMVectorMultiplyAdd(C11, V22, Result); |
||
3366 | |||
3367 | return Result; |
||
3368 | |||
3369 | #elif defined(_XM_SSE_INTRINSICS_) |
||
3370 | // Force the value within the bounds of pi |
||
3371 | XMVECTOR V2 = XMVectorModAngles(V); |
||
3372 | // Each on is V to the "num" power |
||
3373 | // V2 = V1^2 |
||
3374 | V2 = _mm_mul_ps(V2,V2); |
||
3375 | // V^2 |
||
3376 | XMVECTOR vConstants = _mm_load_ps1(&g_XMCosCoefficients0.f[1]); |
||
3377 | vConstants = _mm_mul_ps(vConstants,V2); |
||
3378 | XMVECTOR vResult = _mm_add_ps(vConstants,g_XMOne); |
||
3379 | |||
3380 | // V^4 |
||
3381 | XMVECTOR vPower = _mm_mul_ps(V2,V2); |
||
3382 | vConstants = _mm_load_ps1(&g_XMCosCoefficients0.f[2]); |
||
3383 | vConstants = _mm_mul_ps(vConstants,vPower); |
||
3384 | vResult = _mm_add_ps(vResult,vConstants); |
||
3385 | |||
3386 | // V^6 |
||
3387 | vPower = _mm_mul_ps(vPower,V2); |
||
3388 | vConstants = _mm_load_ps1(&g_XMCosCoefficients0.f[3]); |
||
3389 | vConstants = _mm_mul_ps(vConstants,vPower); |
||
3390 | vResult = _mm_add_ps(vResult,vConstants); |
||
3391 | |||
3392 | // V^8 |
||
3393 | vPower = _mm_mul_ps(vPower,V2); |
||
3394 | vConstants = _mm_load_ps1(&g_XMCosCoefficients1.f[0]); |
||
3395 | vConstants = _mm_mul_ps(vConstants,vPower); |
||
3396 | vResult = _mm_add_ps(vResult,vConstants); |
||
3397 | |||
3398 | // V^10 |
||
3399 | vPower = _mm_mul_ps(vPower,V2); |
||
3400 | vConstants = _mm_load_ps1(&g_XMCosCoefficients1.f[1]); |
||
3401 | vConstants = _mm_mul_ps(vConstants,vPower); |
||
3402 | vResult = _mm_add_ps(vResult,vConstants); |
||
3403 | |||
3404 | // V^12 |
||
3405 | vPower = _mm_mul_ps(vPower,V2); |
||
3406 | vConstants = _mm_load_ps1(&g_XMCosCoefficients1.f[2]); |
||
3407 | vConstants = _mm_mul_ps(vConstants,vPower); |
||
3408 | vResult = _mm_add_ps(vResult,vConstants); |
||
3409 | |||
3410 | // V^14 |
||
3411 | vPower = _mm_mul_ps(vPower,V2); |
||
3412 | vConstants = _mm_load_ps1(&g_XMCosCoefficients1.f[3]); |
||
3413 | vConstants = _mm_mul_ps(vConstants,vPower); |
||
3414 | vResult = _mm_add_ps(vResult,vConstants); |
||
3415 | |||
3416 | // V^16 |
||
3417 | vPower = _mm_mul_ps(vPower,V2); |
||
3418 | vConstants = _mm_load_ps1(&g_XMCosCoefficients2.f[0]); |
||
3419 | vConstants = _mm_mul_ps(vConstants,vPower); |
||
3420 | vResult = _mm_add_ps(vResult,vConstants); |
||
3421 | |||
3422 | // V^18 |
||
3423 | vPower = _mm_mul_ps(vPower,V2); |
||
3424 | vConstants = _mm_load_ps1(&g_XMCosCoefficients2.f[1]); |
||
3425 | vConstants = _mm_mul_ps(vConstants,vPower); |
||
3426 | vResult = _mm_add_ps(vResult,vConstants); |
||
3427 | |||
3428 | // V^20 |
||
3429 | vPower = _mm_mul_ps(vPower,V2); |
||
3430 | vConstants = _mm_load_ps1(&g_XMCosCoefficients2.f[2]); |
||
3431 | vConstants = _mm_mul_ps(vConstants,vPower); |
||
3432 | vResult = _mm_add_ps(vResult,vConstants); |
||
3433 | |||
3434 | // V^22 |
||
3435 | vPower = _mm_mul_ps(vPower,V2); |
||
3436 | vConstants = _mm_load_ps1(&g_XMCosCoefficients2.f[3]); |
||
3437 | vConstants = _mm_mul_ps(vConstants,vPower); |
||
3438 | vResult = _mm_add_ps(vResult,vConstants); |
||
3439 | return vResult; |
||
3440 | #else // _XM_VMX128_INTRINSICS_ |
||
3441 | #endif // _XM_VMX128_INTRINSICS_ |
||
3442 | } |
||
3443 | |||
3444 | //------------------------------------------------------------------------------ |
||
3445 | |||
3446 | XMINLINE VOID XMVectorSinCos |
||
3447 | ( |
||
3448 | XMVECTOR* pSin, |
||
3449 | XMVECTOR* pCos, |
||
3450 | FXMVECTOR V |
||
3451 | ) |
||
3452 | { |
||
3453 | #if defined(_XM_NO_INTRINSICS_) |
||
3454 | |||
3455 | XMVECTOR V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13; |
||
3456 | XMVECTOR V14, V15, V16, V17, V18, V19, V20, V21, V22, V23; |
||
3457 | XMVECTOR S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11; |
||
3458 | XMVECTOR C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11; |
||
3459 | XMVECTOR Sin, Cos; |
||
3460 | |||
3461 | XMASSERT(pSin); |
||
3462 | XMASSERT(pCos); |
||
3463 | |||
3464 | V1 = XMVectorModAngles(V); |
||
3465 | |||
3466 | // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! - |
||
3467 | // V^15 / 15! + V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI) |
||
3468 | // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! + V^12 / 12! - |
||
3469 | // V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI) |
||
3470 | |||
3471 | V2 = XMVectorMultiply(V1, V1); |
||
3472 | V3 = XMVectorMultiply(V2, V1); |
||
3473 | V4 = XMVectorMultiply(V2, V2); |
||
3474 | V5 = XMVectorMultiply(V3, V2); |
||
3475 | V6 = XMVectorMultiply(V3, V3); |
||
3476 | V7 = XMVectorMultiply(V4, V3); |
||
3477 | V8 = XMVectorMultiply(V4, V4); |
||
3478 | V9 = XMVectorMultiply(V5, V4); |
||
3479 | V10 = XMVectorMultiply(V5, V5); |
||
3480 | V11 = XMVectorMultiply(V6, V5); |
||
3481 | V12 = XMVectorMultiply(V6, V6); |
||
3482 | V13 = XMVectorMultiply(V7, V6); |
||
3483 | V14 = XMVectorMultiply(V7, V7); |
||
3484 | V15 = XMVectorMultiply(V8, V7); |
||
3485 | V16 = XMVectorMultiply(V8, V8); |
||
3486 | V17 = XMVectorMultiply(V9, V8); |
||
3487 | V18 = XMVectorMultiply(V9, V9); |
||
3488 | V19 = XMVectorMultiply(V10, V9); |
||
3489 | V20 = XMVectorMultiply(V10, V10); |
||
3490 | V21 = XMVectorMultiply(V11, V10); |
||
3491 | V22 = XMVectorMultiply(V11, V11); |
||
3492 | V23 = XMVectorMultiply(V12, V11); |
||
3493 | |||
3494 | S1 = XMVectorSplatY(g_XMSinCoefficients0.v); |
||
3495 | S2 = XMVectorSplatZ(g_XMSinCoefficients0.v); |
||
3496 | S3 = XMVectorSplatW(g_XMSinCoefficients0.v); |
||
3497 | S4 = XMVectorSplatX(g_XMSinCoefficients1.v); |
||
3498 | S5 = XMVectorSplatY(g_XMSinCoefficients1.v); |
||
3499 | S6 = XMVectorSplatZ(g_XMSinCoefficients1.v); |
||
3500 | S7 = XMVectorSplatW(g_XMSinCoefficients1.v); |
||
3501 | S8 = XMVectorSplatX(g_XMSinCoefficients2.v); |
||
3502 | S9 = XMVectorSplatY(g_XMSinCoefficients2.v); |
||
3503 | S10 = XMVectorSplatZ(g_XMSinCoefficients2.v); |
||
3504 | S11 = XMVectorSplatW(g_XMSinCoefficients2.v); |
||
3505 | |||
3506 | C1 = XMVectorSplatY(g_XMCosCoefficients0.v); |
||
3507 | C2 = XMVectorSplatZ(g_XMCosCoefficients0.v); |
||
3508 | C3 = XMVectorSplatW(g_XMCosCoefficients0.v); |
||
3509 | C4 = XMVectorSplatX(g_XMCosCoefficients1.v); |
||
3510 | C5 = XMVectorSplatY(g_XMCosCoefficients1.v); |
||
3511 | C6 = XMVectorSplatZ(g_XMCosCoefficients1.v); |
||
3512 | C7 = XMVectorSplatW(g_XMCosCoefficients1.v); |
||
3513 | C8 = XMVectorSplatX(g_XMCosCoefficients2.v); |
||
3514 | C9 = XMVectorSplatY(g_XMCosCoefficients2.v); |
||
3515 | C10 = XMVectorSplatZ(g_XMCosCoefficients2.v); |
||
3516 | C11 = XMVectorSplatW(g_XMCosCoefficients2.v); |
||
3517 | |||
3518 | Sin = XMVectorMultiplyAdd(S1, V3, V1); |
||
3519 | Sin = XMVectorMultiplyAdd(S2, V5, Sin); |
||
3520 | Sin = XMVectorMultiplyAdd(S3, V7, Sin); |
||
3521 | Sin = XMVectorMultiplyAdd(S4, V9, Sin); |
||
3522 | Sin = XMVectorMultiplyAdd(S5, V11, Sin); |
||
3523 | Sin = XMVectorMultiplyAdd(S6, V13, Sin); |
||
3524 | Sin = XMVectorMultiplyAdd(S7, V15, Sin); |
||
3525 | Sin = XMVectorMultiplyAdd(S8, V17, Sin); |
||
3526 | Sin = XMVectorMultiplyAdd(S9, V19, Sin); |
||
3527 | Sin = XMVectorMultiplyAdd(S10, V21, Sin); |
||
3528 | Sin = XMVectorMultiplyAdd(S11, V23, Sin); |
||
3529 | |||
3530 | Cos = XMVectorMultiplyAdd(C1, V2, g_XMOne.v); |
||
3531 | Cos = XMVectorMultiplyAdd(C2, V4, Cos); |
||
3532 | Cos = XMVectorMultiplyAdd(C3, V6, Cos); |
||
3533 | Cos = XMVectorMultiplyAdd(C4, V8, Cos); |
||
3534 | Cos = XMVectorMultiplyAdd(C5, V10, Cos); |
||
3535 | Cos = XMVectorMultiplyAdd(C6, V12, Cos); |
||
3536 | Cos = XMVectorMultiplyAdd(C7, V14, Cos); |
||
3537 | Cos = XMVectorMultiplyAdd(C8, V16, Cos); |
||
3538 | Cos = XMVectorMultiplyAdd(C9, V18, Cos); |
||
3539 | Cos = XMVectorMultiplyAdd(C10, V20, Cos); |
||
3540 | Cos = XMVectorMultiplyAdd(C11, V22, Cos); |
||
3541 | |||
3542 | *pSin = Sin; |
||
3543 | *pCos = Cos; |
||
3544 | |||
3545 | #elif defined(_XM_SSE_INTRINSICS_) |
||
3546 | XMASSERT(pSin); |
||
3547 | XMASSERT(pCos); |
||
3548 | XMVECTOR V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13; |
||
3549 | XMVECTOR V14, V15, V16, V17, V18, V19, V20, V21, V22, V23; |
||
3550 | XMVECTOR S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11; |
||
3551 | XMVECTOR C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11; |
||
3552 | XMVECTOR Sin, Cos; |
||
3553 | |||
3554 | V1 = XMVectorModAngles(V); |
||
3555 | |||
3556 | // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! - |
||
3557 | // V^15 / 15! + V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI) |
||
3558 | // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! + V^12 / 12! - |
||
3559 | // V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI) |
||
3560 | |||
3561 | V2 = XMVectorMultiply(V1, V1); |
||
3562 | V3 = XMVectorMultiply(V2, V1); |
||
3563 | V4 = XMVectorMultiply(V2, V2); |
||
3564 | V5 = XMVectorMultiply(V3, V2); |
||
3565 | V6 = XMVectorMultiply(V3, V3); |
||
3566 | V7 = XMVectorMultiply(V4, V3); |
||
3567 | V8 = XMVectorMultiply(V4, V4); |
||
3568 | V9 = XMVectorMultiply(V5, V4); |
||
3569 | V10 = XMVectorMultiply(V5, V5); |
||
3570 | V11 = XMVectorMultiply(V6, V5); |
||
3571 | V12 = XMVectorMultiply(V6, V6); |
||
3572 | V13 = XMVectorMultiply(V7, V6); |
||
3573 | V14 = XMVectorMultiply(V7, V7); |
||
3574 | V15 = XMVectorMultiply(V8, V7); |
||
3575 | V16 = XMVectorMultiply(V8, V8); |
||
3576 | V17 = XMVectorMultiply(V9, V8); |
||
3577 | V18 = XMVectorMultiply(V9, V9); |
||
3578 | V19 = XMVectorMultiply(V10, V9); |
||
3579 | V20 = XMVectorMultiply(V10, V10); |
||
3580 | V21 = XMVectorMultiply(V11, V10); |
||
3581 | V22 = XMVectorMultiply(V11, V11); |
||
3582 | V23 = XMVectorMultiply(V12, V11); |
||
3583 | |||
3584 | S1 = _mm_load_ps1(&g_XMSinCoefficients0.f[1]); |
||
3585 | S2 = _mm_load_ps1(&g_XMSinCoefficients0.f[2]); |
||
3586 | S3 = _mm_load_ps1(&g_XMSinCoefficients0.f[3]); |
||
3587 | S4 = _mm_load_ps1(&g_XMSinCoefficients1.f[0]); |
||
3588 | S5 = _mm_load_ps1(&g_XMSinCoefficients1.f[1]); |
||
3589 | S6 = _mm_load_ps1(&g_XMSinCoefficients1.f[2]); |
||
3590 | S7 = _mm_load_ps1(&g_XMSinCoefficients1.f[3]); |
||
3591 | S8 = _mm_load_ps1(&g_XMSinCoefficients2.f[0]); |
||
3592 | S9 = _mm_load_ps1(&g_XMSinCoefficients2.f[1]); |
||
3593 | S10 = _mm_load_ps1(&g_XMSinCoefficients2.f[2]); |
||
3594 | S11 = _mm_load_ps1(&g_XMSinCoefficients2.f[3]); |
||
3595 | |||
3596 | C1 = _mm_load_ps1(&g_XMCosCoefficients0.f[1]); |
||
3597 | C2 = _mm_load_ps1(&g_XMCosCoefficients0.f[2]); |
||
3598 | C3 = _mm_load_ps1(&g_XMCosCoefficients0.f[3]); |
||
3599 | C4 = _mm_load_ps1(&g_XMCosCoefficients1.f[0]); |
||
3600 | C5 = _mm_load_ps1(&g_XMCosCoefficients1.f[1]); |
||
3601 | C6 = _mm_load_ps1(&g_XMCosCoefficients1.f[2]); |
||
3602 | C7 = _mm_load_ps1(&g_XMCosCoefficients1.f[3]); |
||
3603 | C8 = _mm_load_ps1(&g_XMCosCoefficients2.f[0]); |
||
3604 | C9 = _mm_load_ps1(&g_XMCosCoefficients2.f[1]); |
||
3605 | C10 = _mm_load_ps1(&g_XMCosCoefficients2.f[2]); |
||
3606 | C11 = _mm_load_ps1(&g_XMCosCoefficients2.f[3]); |
||
3607 | |||
3608 | S1 = _mm_mul_ps(S1,V3); |
||
3609 | Sin = _mm_add_ps(S1,V1); |
||
3610 | Sin = XMVectorMultiplyAdd(S2, V5, Sin); |
||
3611 | Sin = XMVectorMultiplyAdd(S3, V7, Sin); |
||
3612 | Sin = XMVectorMultiplyAdd(S4, V9, Sin); |
||
3613 | Sin = XMVectorMultiplyAdd(S5, V11, Sin); |
||
3614 | Sin = XMVectorMultiplyAdd(S6, V13, Sin); |
||
3615 | Sin = XMVectorMultiplyAdd(S7, V15, Sin); |
||
3616 | Sin = XMVectorMultiplyAdd(S8, V17, Sin); |
||
3617 | Sin = XMVectorMultiplyAdd(S9, V19, Sin); |
||
3618 | Sin = XMVectorMultiplyAdd(S10, V21, Sin); |
||
3619 | Sin = XMVectorMultiplyAdd(S11, V23, Sin); |
||
3620 | |||
3621 | Cos = _mm_mul_ps(C1,V2); |
||
3622 | Cos = _mm_add_ps(Cos,g_XMOne); |
||
3623 | Cos = XMVectorMultiplyAdd(C2, V4, Cos); |
||
3624 | Cos = XMVectorMultiplyAdd(C3, V6, Cos); |
||
3625 | Cos = XMVectorMultiplyAdd(C4, V8, Cos); |
||
3626 | Cos = XMVectorMultiplyAdd(C5, V10, Cos); |
||
3627 | Cos = XMVectorMultiplyAdd(C6, V12, Cos); |
||
3628 | Cos = XMVectorMultiplyAdd(C7, V14, Cos); |
||
3629 | Cos = XMVectorMultiplyAdd(C8, V16, Cos); |
||
3630 | Cos = XMVectorMultiplyAdd(C9, V18, Cos); |
||
3631 | Cos = XMVectorMultiplyAdd(C10, V20, Cos); |
||
3632 | Cos = XMVectorMultiplyAdd(C11, V22, Cos); |
||
3633 | |||
3634 | *pSin = Sin; |
||
3635 | *pCos = Cos; |
||
3636 | #else // _XM_VMX128_INTRINSICS_ |
||
3637 | #endif // _XM_VMX128_INTRINSICS_ |
||
3638 | } |
||
3639 | |||
3640 | //------------------------------------------------------------------------------ |
||
3641 | |||
3642 | XMINLINE XMVECTOR XMVectorTan |
||
3643 | ( |
||
3644 | FXMVECTOR V |
||
3645 | ) |
||
3646 | { |
||
3647 | #if defined(_XM_NO_INTRINSICS_) |
||
3648 | |||
3649 | // Cody and Waite algorithm to compute tangent. |
||
3650 | |||
3651 | XMVECTOR VA, VB, VC, VC2; |
||
3652 | XMVECTOR T0, T1, T2, T3, T4, T5, T6, T7; |
||
3653 | XMVECTOR C0, C1, TwoDivPi, Epsilon; |
||
3654 | XMVECTOR N, D; |
||
3655 | XMVECTOR R0, R1; |
||
3656 | XMVECTOR VIsZero, VCNearZero, VBIsEven; |
||
3657 | XMVECTOR Zero; |
||
3658 | XMVECTOR Result; |
||
3659 | UINT i; |
||
3660 | static CONST XMVECTOR TanCoefficients0 = {1.0f, -4.667168334e-1f, 2.566383229e-2f, -3.118153191e-4f}; |
||
3661 | static CONST XMVECTOR TanCoefficients1 = {4.981943399e-7f, -1.333835001e-1f, 3.424887824e-3f, -1.786170734e-5f}; |
||
3662 | static CONST XMVECTOR TanConstants = {1.570796371f, 6.077100628e-11f, 0.000244140625f, 2.0f / XM_PI}; |
||
3663 | static CONST XMVECTORU32 Mask = {0x1, 0x1, 0x1, 0x1}; |
||
3664 | |||
3665 | TwoDivPi = XMVectorSplatW(TanConstants); |
||
3666 | |||
3667 | Zero = XMVectorZero(); |
||
3668 | |||
3669 | C0 = XMVectorSplatX(TanConstants); |
||
3670 | C1 = XMVectorSplatY(TanConstants); |
||
3671 | Epsilon = XMVectorSplatZ(TanConstants); |
||
3672 | |||
3673 | VA = XMVectorMultiply(V, TwoDivPi); |
||
3674 | |||
3675 | VA = XMVectorRound(VA); |
||
3676 | |||
3677 | VC = XMVectorNegativeMultiplySubtract(VA, C0, V); |
||
3678 | |||
3679 | VB = XMVectorAbs(VA); |
||
3680 | |||
3681 | VC = XMVectorNegativeMultiplySubtract(VA, C1, VC); |
||
3682 | |||
3683 | for (i = 0; i < 4; i++) |
||
3684 | { |
||
3685 | VB.vector4_u32[i] = (UINT)VB.vector4_f32[i]; |
||
3686 | } |
||
3687 | |||
3688 | VC2 = XMVectorMultiply(VC, VC); |
||
3689 | |||
3690 | T7 = XMVectorSplatW(TanCoefficients1); |
||
3691 | T6 = XMVectorSplatZ(TanCoefficients1); |
||
3692 | T4 = XMVectorSplatX(TanCoefficients1); |
||
3693 | T3 = XMVectorSplatW(TanCoefficients0); |
||
3694 | T5 = XMVectorSplatY(TanCoefficients1); |
||
3695 | T2 = XMVectorSplatZ(TanCoefficients0); |
||
3696 | T1 = XMVectorSplatY(TanCoefficients0); |
||
3697 | T0 = XMVectorSplatX(TanCoefficients0); |
||
3698 | |||
3699 | VBIsEven = XMVectorAndInt(VB, Mask.v); |
||
3700 | VBIsEven = XMVectorEqualInt(VBIsEven, Zero); |
||
3701 | |||
3702 | N = XMVectorMultiplyAdd(VC2, T7, T6); |
||
3703 | D = XMVectorMultiplyAdd(VC2, T4, T3); |
||
3704 | N = XMVectorMultiplyAdd(VC2, N, T5); |
||
3705 | D = XMVectorMultiplyAdd(VC2, D, T2); |
||
3706 | N = XMVectorMultiply(VC2, N); |
||
3707 | D = XMVectorMultiplyAdd(VC2, D, T1); |
||
3708 | N = XMVectorMultiplyAdd(VC, N, VC); |
||
3709 | VCNearZero = XMVectorInBounds(VC, Epsilon); |
||
3710 | D = XMVectorMultiplyAdd(VC2, D, T0); |
||
3711 | |||
3712 | N = XMVectorSelect(N, VC, VCNearZero); |
||
3713 | D = XMVectorSelect(D, g_XMOne.v, VCNearZero); |
||
3714 | |||
3715 | R0 = XMVectorNegate(N); |
||
3716 | R1 = XMVectorReciprocal(D); |
||
3717 | R0 = XMVectorReciprocal(R0); |
||
3718 | R1 = XMVectorMultiply(N, R1); |
||
3719 | R0 = XMVectorMultiply(D, R0); |
||
3720 | |||
3721 | VIsZero = XMVectorEqual(V, Zero); |
||
3722 | |||
3723 | Result = XMVectorSelect(R0, R1, VBIsEven); |
||
3724 | |||
3725 | Result = XMVectorSelect(Result, Zero, VIsZero); |
||
3726 | |||
3727 | return Result; |
||
3728 | |||
3729 | #elif defined(_XM_SSE_INTRINSICS_) |
||
3730 | // Cody and Waite algorithm to compute tangent. |
||
3731 | |||
3732 | XMVECTOR VA, VB, VC, VC2; |
||
3733 | XMVECTOR T0, T1, T2, T3, T4, T5, T6, T7; |
||
3734 | XMVECTOR C0, C1, TwoDivPi, Epsilon; |
||
3735 | XMVECTOR N, D; |
||
3736 | XMVECTOR R0, R1; |
||
3737 | XMVECTOR VIsZero, VCNearZero, VBIsEven; |
||
3738 | XMVECTOR Zero; |
||
3739 | XMVECTOR Result; |
||
3740 | static CONST XMVECTORF32 TanCoefficients0 = {1.0f, -4.667168334e-1f, 2.566383229e-2f, -3.118153191e-4f}; |
||
3741 | static CONST XMVECTORF32 TanCoefficients1 = {4.981943399e-7f, -1.333835001e-1f, 3.424887824e-3f, -1.786170734e-5f}; |
||
3742 | static CONST XMVECTORF32 TanConstants = {1.570796371f, 6.077100628e-11f, 0.000244140625f, 2.0f / XM_PI}; |
||
3743 | static CONST XMVECTORI32 Mask = {0x1, 0x1, 0x1, 0x1}; |
||
3744 | |||
3745 | TwoDivPi = XMVectorSplatW(TanConstants); |
||
3746 | |||
3747 | Zero = XMVectorZero(); |
||
3748 | |||
3749 | C0 = XMVectorSplatX(TanConstants); |
||
3750 | C1 = XMVectorSplatY(TanConstants); |
||
3751 | Epsilon = XMVectorSplatZ(TanConstants); |
||
3752 | |||
3753 | VA = XMVectorMultiply(V, TwoDivPi); |
||
3754 | |||
3755 | VA = XMVectorRound(VA); |
||
3756 | |||
3757 | VC = XMVectorNegativeMultiplySubtract(VA, C0, V); |
||
3758 | |||
3759 | VB = XMVectorAbs(VA); |
||
3760 | |||
3761 | VC = XMVectorNegativeMultiplySubtract(VA, C1, VC); |
||
3762 | |||
3763 | reinterpret_cast<__m128i *>(&VB)[0] = _mm_cvttps_epi32(VB); |
||
3764 | |||
3765 | VC2 = XMVectorMultiply(VC, VC); |
||
3766 | |||
3767 | T7 = XMVectorSplatW(TanCoefficients1); |
||
3768 | T6 = XMVectorSplatZ(TanCoefficients1); |
||
3769 | T4 = XMVectorSplatX(TanCoefficients1); |
||
3770 | T3 = XMVectorSplatW(TanCoefficients0); |
||
3771 | T5 = XMVectorSplatY(TanCoefficients1); |
||
3772 | T2 = XMVectorSplatZ(TanCoefficients0); |
||
3773 | T1 = XMVectorSplatY(TanCoefficients0); |
||
3774 | T0 = XMVectorSplatX(TanCoefficients0); |
||
3775 | |||
3776 | VBIsEven = XMVectorAndInt(VB,Mask); |
||
3777 | VBIsEven = XMVectorEqualInt(VBIsEven, Zero); |
||
3778 | |||
3779 | N = XMVectorMultiplyAdd(VC2, T7, T6); |
||
3780 | D = XMVectorMultiplyAdd(VC2, T4, T3); |
||
3781 | N = XMVectorMultiplyAdd(VC2, N, T5); |
||
3782 | D = XMVectorMultiplyAdd(VC2, D, T2); |
||
3783 | N = XMVectorMultiply(VC2, N); |
||
3784 | D = XMVectorMultiplyAdd(VC2, D, T1); |
||
3785 | N = XMVectorMultiplyAdd(VC, N, VC); |
||
3786 | VCNearZero = XMVectorInBounds(VC, Epsilon); |
||
3787 | D = XMVectorMultiplyAdd(VC2, D, T0); |
||
3788 | |||
3789 | N = XMVectorSelect(N, VC, VCNearZero); |
||
3790 | D = XMVectorSelect(D, g_XMOne, VCNearZero); |
||
3791 | R0 = XMVectorNegate(N); |
||
3792 | R1 = _mm_div_ps(N,D); |
||
3793 | R0 = _mm_div_ps(D,R0); |
||
3794 | VIsZero = XMVectorEqual(V, Zero); |
||
3795 | Result = XMVectorSelect(R0, R1, VBIsEven); |
||
3796 | Result = XMVectorSelect(Result, Zero, VIsZero); |
||
3797 | |||
3798 | return Result; |
||
3799 | |||
3800 | #else // _XM_VMX128_INTRINSICS_ |
||
3801 | #endif // _XM_VMX128_INTRINSICS_ |
||
3802 | } |
||
3803 | |||
3804 | //------------------------------------------------------------------------------ |
||
3805 | |||
3806 | XMINLINE XMVECTOR XMVectorSinH |
||
3807 | ( |
||
3808 | FXMVECTOR V |
||
3809 | ) |
||
3810 | { |
||
3811 | #if defined(_XM_NO_INTRINSICS_) |
||
3812 | |||
3813 | XMVECTOR V1, V2; |
||
3814 | XMVECTOR E1, E2; |
||
3815 | XMVECTOR Result; |
||
3816 | static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f) |
||
3817 | |||
3818 | V1 = XMVectorMultiplyAdd(V, Scale.v, g_XMNegativeOne.v); |
||
3819 | V2 = XMVectorNegativeMultiplySubtract(V, Scale.v, g_XMNegativeOne.v); |
||
3820 | |||
3821 | E1 = XMVectorExp(V1); |
||
3822 | E2 = XMVectorExp(V2); |
||
3823 | |||
3824 | Result = XMVectorSubtract(E1, E2); |
||
3825 | |||
3826 | return Result; |
||
3827 | |||
3828 | #elif defined(_XM_SSE_INTRINSICS_) |
||
3829 | XMVECTOR V1, V2; |
||
3830 | XMVECTOR E1, E2; |
||
3831 | XMVECTOR Result; |
||
3832 | static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f) |
||
3833 | |||
3834 | V1 = _mm_mul_ps(V, Scale); |
||
3835 | V1 = _mm_add_ps(V1,g_XMNegativeOne); |
||
3836 | V2 = _mm_mul_ps(V, Scale); |
||
3837 | V2 = _mm_sub_ps(g_XMNegativeOne,V2); |
||
3838 | E1 = XMVectorExp(V1); |
||
3839 | E2 = XMVectorExp(V2); |
||
3840 | |||
3841 | Result = _mm_sub_ps(E1, E2); |
||
3842 | |||
3843 | return Result; |
||
3844 | #else // _XM_VMX128_INTRINSICS_ |
||
3845 | #endif // _XM_VMX128_INTRINSICS_ |
||
3846 | } |
||
3847 | |||
3848 | //------------------------------------------------------------------------------ |
||
3849 | |||
3850 | XMINLINE XMVECTOR XMVectorCosH |
||
3851 | ( |
||
3852 | FXMVECTOR V |
||
3853 | ) |
||
3854 | { |
||
3855 | #if defined(_XM_NO_INTRINSICS_) |
||
3856 | |||
3857 | XMVECTOR V1, V2; |
||
3858 | XMVECTOR E1, E2; |
||
3859 | XMVECTOR Result; |
||
3860 | static CONST XMVECTOR Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f) |
||
3861 | |||
3862 | V1 = XMVectorMultiplyAdd(V, Scale, g_XMNegativeOne.v); |
||
3863 | V2 = XMVectorNegativeMultiplySubtract(V, Scale, g_XMNegativeOne.v); |
||
3864 | |||
3865 | E1 = XMVectorExp(V1); |
||
3866 | E2 = XMVectorExp(V2); |
||
3867 | |||
3868 | Result = XMVectorAdd(E1, E2); |
||
3869 | |||
3870 | return Result; |
||
3871 | |||
3872 | #elif defined(_XM_SSE_INTRINSICS_) |
||
3873 | XMVECTOR V1, V2; |
||
3874 | XMVECTOR E1, E2; |
||
3875 | XMVECTOR Result; |
||
3876 | static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f) |
||
3877 | |||
3878 | V1 = _mm_mul_ps(V,Scale); |
||
3879 | V1 = _mm_add_ps(V1,g_XMNegativeOne); |
||
3880 | V2 = _mm_mul_ps(V, Scale); |
||
3881 | V2 = _mm_sub_ps(g_XMNegativeOne,V2); |
||
3882 | E1 = XMVectorExp(V1); |
||
3883 | E2 = XMVectorExp(V2); |
||
3884 | Result = _mm_add_ps(E1, E2); |
||
3885 | return Result; |
||
3886 | #else // _XM_VMX128_INTRINSICS_ |
||
3887 | #endif // _XM_VMX128_INTRINSICS_ |
||
3888 | } |
||
3889 | |||
3890 | //------------------------------------------------------------------------------ |
||
3891 | |||
3892 | XMINLINE XMVECTOR XMVectorTanH |
||
3893 | ( |
||
3894 | FXMVECTOR V |
||
3895 | ) |
||
3896 | { |
||
3897 | #if defined(_XM_NO_INTRINSICS_) |
||
3898 | |||
3899 | XMVECTOR E; |
||
3900 | XMVECTOR Result; |
||
3901 | static CONST XMVECTORF32 Scale = {2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f}; // 2.0f / ln(2.0f) |
||
3902 | |||
3903 | E = XMVectorMultiply(V, Scale.v); |
||
3904 | E = XMVectorExp(E); |
||
3905 | E = XMVectorMultiplyAdd(E, g_XMOneHalf.v, g_XMOneHalf.v); |
||
3906 | E = XMVectorReciprocal(E); |
||
3907 | |||
3908 | Result = XMVectorSubtract(g_XMOne.v, E); |
||
3909 | |||
3910 | return Result; |
||
3911 | |||
3912 | #elif defined(_XM_SSE_INTRINSICS_) |
||
3913 | static CONST XMVECTORF32 Scale = {2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f}; // 2.0f / ln(2.0f) |
||
3914 | |||
3915 | XMVECTOR E = _mm_mul_ps(V, Scale); |
||
3916 | E = XMVectorExp(E); |
||
3917 | E = _mm_mul_ps(E,g_XMOneHalf); |
||
3918 | E = _mm_add_ps(E,g_XMOneHalf); |
||
3919 | E = XMVectorReciprocal(E); |
||
3920 | E = _mm_sub_ps(g_XMOne, E); |
||
3921 | return E; |
||
3922 | #else // _XM_VMX128_INTRINSICS_ |
||
3923 | #endif // _XM_VMX128_INTRINSICS_ |
||
3924 | } |
||
3925 | |||
3926 | //------------------------------------------------------------------------------ |
||
3927 | |||
3928 | XMINLINE XMVECTOR XMVectorASin |
||
3929 | ( |
||
3930 | FXMVECTOR V |
||
3931 | ) |
||
3932 | { |
||
3933 | #if defined(_XM_NO_INTRINSICS_) |
||
3934 | |||
3935 | XMVECTOR V2, V3, AbsV; |
||
3936 | XMVECTOR C0, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11; |
||
3937 | XMVECTOR R0, R1, R2, R3, R4; |
||
3938 | XMVECTOR OneMinusAbsV; |
||
3939 | XMVECTOR Rsq; |
||
3940 | XMVECTOR Result; |
||
3941 | static CONST XMVECTOR OnePlusEpsilon = {1.00000011921f, 1.00000011921f, 1.00000011921f, 1.00000011921f}; |
||
3942 | |||
3943 | // asin(V) = V * (C0 + C1 * V + C2 * V^2 + C3 * V^3 + C4 * V^4 + C5 * V^5) + (1 - V) * rsq(1 - V) * |
||
3944 | // V * (C6 + C7 * V + C8 * V^2 + C9 * V^3 + C10 * V^4 + C11 * V^5) |
||
3945 | |||
3946 | AbsV = XMVectorAbs(V); |
||
3947 | |||
3948 | V2 = XMVectorMultiply(V, V); |
||
3949 | V3 = XMVectorMultiply(V2, AbsV); |
||
3950 | |||
3951 | R4 = XMVectorNegativeMultiplySubtract(AbsV, V, V); |
||
3952 | |||
3953 | OneMinusAbsV = XMVectorSubtract(OnePlusEpsilon, AbsV); |
||
3954 | Rsq = XMVectorReciprocalSqrt(OneMinusAbsV); |
||
3955 | |||
3956 | C0 = XMVectorSplatX(g_XMASinCoefficients0.v); |
||
3957 | C1 = XMVectorSplatY(g_XMASinCoefficients0.v); |
||
3958 | C2 = XMVectorSplatZ(g_XMASinCoefficients0.v); |
||
3959 | C3 = XMVectorSplatW(g_XMASinCoefficients0.v); |
||
3960 | |||
3961 | C4 = XMVectorSplatX(g_XMASinCoefficients1.v); |
||
3962 | C5 = XMVectorSplatY(g_XMASinCoefficients1.v); |
||
3963 | C6 = XMVectorSplatZ(g_XMASinCoefficients1.v); |
||
3964 | C7 = XMVectorSplatW(g_XMASinCoefficients1.v); |
||
3965 | |||
3966 | C8 = XMVectorSplatX(g_XMASinCoefficients2.v); |
||
3967 | C9 = XMVectorSplatY(g_XMASinCoefficients2.v); |
||
3968 | C10 = XMVectorSplatZ(g_XMASinCoefficients2.v); |
||
3969 | C11 = XMVectorSplatW(g_XMASinCoefficients2.v); |
||
3970 | |||
3971 | R0 = XMVectorMultiplyAdd(C3, AbsV, C7); |
||
3972 | R1 = XMVectorMultiplyAdd(C1, AbsV, C5); |
||
3973 | R2 = XMVectorMultiplyAdd(C2, AbsV, C6); |
||
3974 | R3 = XMVectorMultiplyAdd(C0, AbsV, C4); |
||
3975 | |||
3976 | R0 = XMVectorMultiplyAdd(R0, AbsV, C11); |
||
3977 | R1 = XMVectorMultiplyAdd(R1, AbsV, C9); |
||
3978 | R2 = XMVectorMultiplyAdd(R2, AbsV, C10); |
||
3979 | R3 = XMVectorMultiplyAdd(R3, AbsV, C8); |
||
3980 | |||
3981 | R0 = XMVectorMultiplyAdd(R2, V3, R0); |
||
3982 | R1 = XMVectorMultiplyAdd(R3, V3, R1); |
||
3983 | |||
3984 | R0 = XMVectorMultiply(V, R0); |
||
3985 | R1 = XMVectorMultiply(R4, R1); |
||
3986 | |||
3987 | Result = XMVectorMultiplyAdd(R1, Rsq, R0); |
||
3988 | |||
3989 | return Result; |
||
3990 | |||
3991 | #elif defined(_XM_SSE_INTRINSICS_) |
||
3992 | static CONST XMVECTORF32 OnePlusEpsilon = {1.00000011921f, 1.00000011921f, 1.00000011921f, 1.00000011921f}; |
||
3993 | |||
3994 | // asin(V) = V * (C0 + C1 * V + C2 * V^2 + C3 * V^3 + C4 * V^4 + C5 * V^5) + (1 - V) * rsq(1 - V) * |
||
3995 | // V * (C6 + C7 * V + C8 * V^2 + C9 * V^3 + C10 * V^4 + C11 * V^5) |
||
3996 | // Get abs(V) |
||
3997 | XMVECTOR vAbsV = _mm_setzero_ps(); |
||
3998 | vAbsV = _mm_sub_ps(vAbsV,V); |
||
3999 | vAbsV = _mm_max_ps(vAbsV,V); |
||
4000 | |||
4001 | XMVECTOR R0 = vAbsV; |
||
4002 | XMVECTOR vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[3]); |
||
4003 | R0 = _mm_mul_ps(R0,vConstants); |
||
4004 | vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[3]); |
||
4005 | R0 = _mm_add_ps(R0,vConstants); |
||
4006 | |||
4007 | XMVECTOR R1 = vAbsV; |
||
4008 | vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[1]); |
||
4009 | R1 = _mm_mul_ps(R1,vConstants); |
||
4010 | vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[1]); |
||
4011 | R1 = _mm_add_ps(R1, vConstants); |
||
4012 | |||
4013 | XMVECTOR R2 = vAbsV; |
||
4014 | vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[2]); |
||
4015 | R2 = _mm_mul_ps(R2,vConstants); |
||
4016 | vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[2]); |
||
4017 | R2 = _mm_add_ps(R2, vConstants); |
||
4018 | |||
4019 | XMVECTOR R3 = vAbsV; |
||
4020 | vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[0]); |
||
4021 | R3 = _mm_mul_ps(R3,vConstants); |
||
4022 | vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[0]); |
||
4023 | R3 = _mm_add_ps(R3, vConstants); |
||
4024 | |||
4025 | vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[3]); |
||
4026 | R0 = _mm_mul_ps(R0,vAbsV); |
||
4027 | R0 = _mm_add_ps(R0,vConstants); |
||
4028 | |||
4029 | vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[1]); |
||
4030 | R1 = _mm_mul_ps(R1,vAbsV); |
||
4031 | R1 = _mm_add_ps(R1,vConstants); |
||
4032 | |||
4033 | vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[2]); |
||
4034 | R2 = _mm_mul_ps(R2,vAbsV); |
||
4035 | R2 = _mm_add_ps(R2,vConstants); |
||
4036 | |||
4037 | vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[0]); |
||
4038 | R3 = _mm_mul_ps(R3,vAbsV); |
||
4039 | R3 = _mm_add_ps(R3,vConstants); |
||
4040 | |||
4041 | // V3 = V^3 |
||
4042 | vConstants = _mm_mul_ps(V,V); |
||
4043 | vConstants = _mm_mul_ps(vConstants, vAbsV); |
||
4044 | // Mul by V^3 |
||
4045 | R2 = _mm_mul_ps(R2,vConstants); |
||
4046 | R3 = _mm_mul_ps(R3,vConstants); |
||
4047 | // Merge the results |
||
4048 | R0 = _mm_add_ps(R0,R2); |
||
4049 | R1 = _mm_add_ps(R1,R3); |
||
4050 | |||
4051 | R0 = _mm_mul_ps(R0,V); |
||
4052 | // vConstants = V-(V^2 retaining sign) |
||
4053 | vConstants = _mm_mul_ps(vAbsV, V); |
||
4054 | vConstants = _mm_sub_ps(V,vConstants); |
||
4055 | R1 = _mm_mul_ps(R1,vConstants); |
||
4056 | vConstants = _mm_sub_ps(OnePlusEpsilon,vAbsV); |
||
4057 | // Do NOT use rsqrt/mul. This needs the precision |
||
4058 | vConstants = _mm_sqrt_ps(vConstants); |
||
4059 | R1 = _mm_div_ps(R1,vConstants); |
||
4060 | R0 = _mm_add_ps(R0,R1); |
||
4061 | return R0; |
||
4062 | #else // _XM_VMX128_INTRINSICS_ |
||
4063 | #endif // _XM_VMX128_INTRINSICS_ |
||
4064 | } |
||
4065 | |||
4066 | //------------------------------------------------------------------------------ |
||
4067 | |||
4068 | XMINLINE XMVECTOR XMVectorACos |
||
4069 | ( |
||
4070 | FXMVECTOR V |
||
4071 | ) |
||
4072 | { |
||
4073 | #if defined(_XM_NO_INTRINSICS_) |
||
4074 | |||
4075 | XMVECTOR V2, V3, AbsV; |
||
4076 | XMVECTOR C0, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11; |
||
4077 | XMVECTOR R0, R1, R2, R3, R4; |
||
4078 | XMVECTOR OneMinusAbsV; |
||
4079 | XMVECTOR Rsq; |
||
4080 | XMVECTOR Result; |
||
4081 | static CONST XMVECTOR OnePlusEpsilon = {1.00000011921f, 1.00000011921f, 1.00000011921f, 1.00000011921f}; |
||
4082 | |||
4083 | // acos(V) = PI / 2 - asin(V) |
||
4084 | |||
4085 | AbsV = XMVectorAbs(V); |
||
4086 | |||
4087 | V2 = XMVectorMultiply(V, V); |
||
4088 | V3 = XMVectorMultiply(V2, AbsV); |
||
4089 | |||
4090 | R4 = XMVectorNegativeMultiplySubtract(AbsV, V, V); |
||
4091 | |||
4092 | OneMinusAbsV = XMVectorSubtract(OnePlusEpsilon, AbsV); |
||
4093 | Rsq = XMVectorReciprocalSqrt(OneMinusAbsV); |
||
4094 | |||
4095 | C0 = XMVectorSplatX(g_XMASinCoefficients0.v); |
||
4096 | C1 = XMVectorSplatY(g_XMASinCoefficients0.v); |
||
4097 | C2 = XMVectorSplatZ(g_XMASinCoefficients0.v); |
||
4098 | C3 = XMVectorSplatW(g_XMASinCoefficients0.v); |
||
4099 | |||
4100 | C4 = XMVectorSplatX(g_XMASinCoefficients1.v); |
||
4101 | C5 = XMVectorSplatY(g_XMASinCoefficients1.v); |
||
4102 | C6 = XMVectorSplatZ(g_XMASinCoefficients1.v); |
||
4103 | C7 = XMVectorSplatW(g_XMASinCoefficients1.v); |
||
4104 | |||
4105 | C8 = XMVectorSplatX(g_XMASinCoefficients2.v); |
||
4106 | C9 = XMVectorSplatY(g_XMASinCoefficients2.v); |
||
4107 | C10 = XMVectorSplatZ(g_XMASinCoefficients2.v); |
||
4108 | C11 = XMVectorSplatW(g_XMASinCoefficients2.v); |
||
4109 | |||
4110 | R0 = XMVectorMultiplyAdd(C3, AbsV, C7); |
||
4111 | R1 = XMVectorMultiplyAdd(C1, AbsV, C5); |
||
4112 | R2 = XMVectorMultiplyAdd(C2, AbsV, C6); |
||
4113 | R3 = XMVectorMultiplyAdd(C0, AbsV, C4); |
||
4114 | |||
4115 | R0 = XMVectorMultiplyAdd(R0, AbsV, C11); |
||
4116 | R1 = XMVectorMultiplyAdd(R1, AbsV, C9); |
||
4117 | R2 = XMVectorMultiplyAdd(R2, AbsV, C10); |
||
4118 | R3 = XMVectorMultiplyAdd(R3, AbsV, C8); |
||
4119 | |||
4120 | R0 = XMVectorMultiplyAdd(R2, V3, R0); |
||
4121 | R1 = XMVectorMultiplyAdd(R3, V3, R1); |
||
4122 | |||
4123 | R0 = XMVectorMultiply(V, R0); |
||
4124 | R1 = XMVectorMultiply(R4, R1); |
||
4125 | |||
4126 | Result = XMVectorMultiplyAdd(R1, Rsq, R0); |
||
4127 | |||
4128 | Result = XMVectorSubtract(g_XMHalfPi.v, Result); |
||
4129 | |||
4130 | return Result; |
||
4131 | |||
4132 | #elif defined(_XM_SSE_INTRINSICS_) |
||
4133 | static CONST XMVECTORF32 OnePlusEpsilon = {1.00000011921f, 1.00000011921f, 1.00000011921f, 1.00000011921f}; |
||
4134 | // Uses only 6 registers for good code on x86 targets |
||
4135 | // acos(V) = PI / 2 - asin(V) |
||
4136 | // Get abs(V) |
||
4137 | XMVECTOR vAbsV = _mm_setzero_ps(); |
||
4138 | vAbsV = _mm_sub_ps(vAbsV,V); |
||
4139 | vAbsV = _mm_max_ps(vAbsV,V); |
||
4140 | // Perform the series in precision groups to |
||
4141 | // retain precision across 20 bits. (3 bits of imprecision due to operations) |
||
4142 | XMVECTOR R0 = vAbsV; |
||
4143 | XMVECTOR vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[3]); |
||
4144 | R0 = _mm_mul_ps(R0,vConstants); |
||
4145 | vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[3]); |
||
4146 | R0 = _mm_add_ps(R0,vConstants); |
||
4147 | R0 = _mm_mul_ps(R0,vAbsV); |
||
4148 | vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[3]); |
||
4149 | R0 = _mm_add_ps(R0,vConstants); |
||
4150 | |||
4151 | XMVECTOR R1 = vAbsV; |
||
4152 | vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[1]); |
||
4153 | R1 = _mm_mul_ps(R1,vConstants); |
||
4154 | vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[1]); |
||
4155 | R1 = _mm_add_ps(R1,vConstants); |
||
4156 | R1 = _mm_mul_ps(R1, vAbsV); |
||
4157 | vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[1]); |
||
4158 | R1 = _mm_add_ps(R1,vConstants); |
||
4159 | |||
4160 | XMVECTOR R2 = vAbsV; |
||
4161 | vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[2]); |
||
4162 | R2 = _mm_mul_ps(R2,vConstants); |
||
4163 | vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[2]); |
||
4164 | R2 = _mm_add_ps(R2,vConstants); |
||
4165 | R2 = _mm_mul_ps(R2, vAbsV); |
||
4166 | vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[2]); |
||
4167 | R2 = _mm_add_ps(R2,vConstants); |
||
4168 | |||
4169 | XMVECTOR R3 = vAbsV; |
||
4170 | vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[0]); |
||
4171 | R3 = _mm_mul_ps(R3,vConstants); |
||
4172 | vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[0]); |
||
4173 | R3 = _mm_add_ps(R3,vConstants); |
||
4174 | R3 = _mm_mul_ps(R3, vAbsV); |
||
4175 | vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[0]); |
||
4176 | R3 = _mm_add_ps(R3,vConstants); |
||
4177 | |||
4178 | // vConstants = V^3 |
||
4179 | vConstants = _mm_mul_ps(V,V); |
||
4180 | vConstants = _mm_mul_ps(vConstants,vAbsV); |
||
4181 | R2 = _mm_mul_ps(R2,vConstants); |
||
4182 | R3 = _mm_mul_ps(R3,vConstants); |
||
4183 | // Add the pair of values together here to retain |
||
4184 | // as much precision as possible |
||
4185 | R0 = _mm_add_ps(R0,R2); |
||
4186 | R1 = _mm_add_ps(R1,R3); |
||
4187 | |||
4188 | R0 = _mm_mul_ps(R0,V); |
||
4189 | // vConstants = V-(V*abs(V)) |
||
4190 | vConstants = _mm_mul_ps(V,vAbsV); |
||
4191 | vConstants = _mm_sub_ps(V,vConstants); |
||
4192 | R1 = _mm_mul_ps(R1,vConstants); |
||
4193 | // Episilon exists to allow 1.0 as an answer |
||
4194 | vConstants = _mm_sub_ps(OnePlusEpsilon, vAbsV); |
||
4195 | // Use sqrt instead of rsqrt for precision |
||
4196 | vConstants = _mm_sqrt_ps(vConstants); |
||
4197 | R1 = _mm_div_ps(R1,vConstants); |
||
4198 | R1 = _mm_add_ps(R1,R0); |
||
4199 | vConstants = _mm_sub_ps(g_XMHalfPi,R1); |
||
4200 | return vConstants; |
||
4201 | #else // _XM_VMX128_INTRINSICS_ |
||
4202 | #endif // _XM_VMX128_INTRINSICS_ |
||
4203 | } |
||
4204 | |||
4205 | //------------------------------------------------------------------------------ |
||
4206 | |||
4207 | XMINLINE XMVECTOR XMVectorATan |
||
4208 | ( |
||
4209 | FXMVECTOR V |
||
4210 | ) |
||
4211 | { |
||
4212 | #if defined(_XM_NO_INTRINSICS_) |
||
4213 | |||
4214 | // Cody and Waite algorithm to compute inverse tangent. |
||
4215 | |||
4216 | XMVECTOR N, D; |
||
4217 | XMVECTOR VF, G, ReciprocalF, AbsF, FA, FB; |
||
4218 | XMVECTOR Sqrt3, Sqrt3MinusOne, TwoMinusSqrt3; |
||
4219 | XMVECTOR HalfPi, OneThirdPi, OneSixthPi, Epsilon, MinV, MaxV; |
||
4220 | XMVECTOR Zero; |
||
4221 | XMVECTOR NegativeHalfPi; |
||
4222 | XMVECTOR Angle1, Angle2; |
||
4223 | XMVECTOR F_GT_One, F_GT_TwoMinusSqrt3, AbsF_LT_Epsilon, V_LT_Zero, V_GT_MaxV, V_LT_MinV; |
||
4224 | XMVECTOR NegativeResult, Result; |
||
4225 | XMVECTOR P0, P1, P2, P3, Q0, Q1, Q2, Q3; |
||
4226 | static CONST XMVECTOR ATanConstants0 = {-1.3688768894e+1f, -2.0505855195e+1f, -8.4946240351f, -8.3758299368e-1f}; |
||
4227 | static CONST XMVECTOR ATanConstants1 = {4.1066306682e+1f, 8.6157349597e+1f, 5.9578436142e+1f, 1.5024001160e+1f}; |
||
4228 | static CONST XMVECTOR ATanConstants2 = {1.732050808f, 7.320508076e-1f, 2.679491924e-1f, 0.000244140625f}; // <sqrt(3), sqrt(3) - 1, 2 - sqrt(3), Epsilon> |
||
4229 | static CONST XMVECTOR ATanConstants3 = {XM_PIDIV2, XM_PI / 3.0f, XM_PI / 6.0f, 8.507059173e+37f}; // <Pi / 2, Pi / 3, Pi / 6, MaxV> |
||
4230 | |||
4231 | Zero = XMVectorZero(); |
||
4232 | |||
4233 | P0 = XMVectorSplatX(ATanConstants0); |
||
4234 | P1 = XMVectorSplatY(ATanConstants0); |
||
4235 | P2 = XMVectorSplatZ(ATanConstants0); |
||
4236 | P3 = XMVectorSplatW(ATanConstants0); |
||
4237 | |||
4238 | Q0 = XMVectorSplatX(ATanConstants1); |
||
4239 | Q1 = XMVectorSplatY(ATanConstants1); |
||
4240 | Q2 = XMVectorSplatZ(ATanConstants1); |
||
4241 | Q3 = XMVectorSplatW(ATanConstants1); |
||
4242 | |||
4243 | Sqrt3 = XMVectorSplatX(ATanConstants2); |
||
4244 | Sqrt3MinusOne = XMVectorSplatY(ATanConstants2); |
||
4245 | TwoMinusSqrt3 = XMVectorSplatZ(ATanConstants2); |
||
4246 | Epsilon = XMVectorSplatW(ATanConstants2); |
||
4247 | |||
4248 | HalfPi = XMVectorSplatX(ATanConstants3); |
||
4249 | OneThirdPi = XMVectorSplatY(ATanConstants3); |
||
4250 | OneSixthPi = XMVectorSplatZ(ATanConstants3); |
||
4251 | MaxV = XMVectorSplatW(ATanConstants3); |
||
4252 | |||
4253 | VF = XMVectorAbs(V); |
||
4254 | ReciprocalF = XMVectorReciprocal(VF); |
||
4255 | |||
4256 | F_GT_One = XMVectorGreater(VF, g_XMOne.v); |
||
4257 | |||
4258 | VF = XMVectorSelect(VF, ReciprocalF, F_GT_One); |
||
4259 | Angle1 = XMVectorSelect(Zero, HalfPi, F_GT_One); |
||
4260 | Angle2 = XMVectorSelect(OneSixthPi, OneThirdPi, F_GT_One); |
||
4261 | |||
4262 | F_GT_TwoMinusSqrt3 = XMVectorGreater(VF, TwoMinusSqrt3); |
||
4263 | |||
4264 | FA = XMVectorMultiplyAdd(Sqrt3MinusOne, VF, VF); |
||
4265 | FA = XMVectorAdd(FA, g_XMNegativeOne.v); |
||
4266 | FB = XMVectorAdd(VF, Sqrt3); |
||
4267 | FB = XMVectorReciprocal(FB); |
||
4268 | FA = XMVectorMultiply(FA, FB); |
||
4269 | |||
4270 | VF = XMVectorSelect(VF, FA, F_GT_TwoMinusSqrt3); |
||
4271 | Angle1 = XMVectorSelect(Angle1, Angle2, F_GT_TwoMinusSqrt3); |
||
4272 | |||
4273 | AbsF = XMVectorAbs(VF); |
||
4274 | AbsF_LT_Epsilon = XMVectorLess(AbsF, Epsilon); |
||
4275 | |||
4276 | G = XMVectorMultiply(VF, VF); |
||
4277 | |||
4278 | D = XMVectorAdd(G, Q3); |
||
4279 | D = XMVectorMultiplyAdd(D, G, Q2); |
||
4280 | D = XMVectorMultiplyAdd(D, G, Q1); |
||
4281 | D = XMVectorMultiplyAdd(D, G, Q0); |
||
4282 | D = XMVectorReciprocal(D); |
||
4283 | |||
4284 | N = XMVectorMultiplyAdd(P3, G, P2); |
||
4285 | N = XMVectorMultiplyAdd(N, G, P1); |
||
4286 | N = XMVectorMultiplyAdd(N, G, P0); |
||
4287 | N = XMVectorMultiply(N, G); |
||
4288 | Result = XMVectorMultiply(N, D); |
||
4289 | |||
4290 | Result = XMVectorMultiplyAdd(Result, VF, VF); |
||
4291 | |||
4292 | Result = XMVectorSelect(Result, VF, AbsF_LT_Epsilon); |
||
4293 | |||
4294 | NegativeResult = XMVectorNegate(Result); |
||
4295 | Result = XMVectorSelect(Result, NegativeResult, F_GT_One); |
||
4296 | |||
4297 | Result = XMVectorAdd(Result, Angle1); |
||
4298 | |||
4299 | V_LT_Zero = XMVectorLess(V, Zero); |
||
4300 | NegativeResult = XMVectorNegate(Result); |
||
4301 | Result = XMVectorSelect(Result, NegativeResult, V_LT_Zero); |
||
4302 | |||
4303 | MinV = XMVectorNegate(MaxV); |
||
4304 | NegativeHalfPi = XMVectorNegate(HalfPi); |
||
4305 | V_GT_MaxV = XMVectorGreater(V, MaxV); |
||
4306 | V_LT_MinV = XMVectorLess(V, MinV); |
||
4307 | Result = XMVectorSelect(Result, g_XMHalfPi.v, V_GT_MaxV); |
||
4308 | Result = XMVectorSelect(Result, NegativeHalfPi, V_LT_MinV); |
||
4309 | |||
4310 | return Result; |
||
4311 | |||
4312 | #elif defined(_XM_SSE_INTRINSICS_) |
||
4313 | static CONST XMVECTORF32 ATanConstants0 = {-1.3688768894e+1f, -2.0505855195e+1f, -8.4946240351f, -8.3758299368e-1f}; |
||
4314 | static CONST XMVECTORF32 ATanConstants1 = {4.1066306682e+1f, 8.6157349597e+1f, 5.9578436142e+1f, 1.5024001160e+1f}; |
||
4315 | static CONST XMVECTORF32 ATanConstants2 = {1.732050808f, 7.320508076e-1f, 2.679491924e-1f, 0.000244140625f}; // <sqrt(3), sqrt(3) - 1, 2 - sqrt(3), Epsilon> |
||
4316 | static CONST XMVECTORF32 ATanConstants3 = {XM_PIDIV2, XM_PI / 3.0f, XM_PI / 6.0f, 8.507059173e+37f}; // <Pi / 2, Pi / 3, Pi / 6, MaxV> |
||
4317 | |||
4318 | XMVECTOR VF = XMVectorAbs(V); |
||
4319 | XMVECTOR F_GT_One = _mm_cmpgt_ps(VF,g_XMOne); |
||
4320 | XMVECTOR ReciprocalF = XMVectorReciprocal(VF); |
||
4321 | VF = XMVectorSelect(VF, ReciprocalF, F_GT_One); |
||
4322 | XMVECTOR Zero = XMVectorZero(); |
||
4323 | XMVECTOR HalfPi = _mm_load_ps1(&ATanConstants3.f[0]); |
||
4324 | XMVECTOR Angle1 = XMVectorSelect(Zero, HalfPi, F_GT_One); |
||
4325 | // Pi/3 |
||
4326 | XMVECTOR vConstants = _mm_load_ps1(&ATanConstants3.f[1]); |
||
4327 | // Pi/6 |
||
4328 | XMVECTOR Angle2 = _mm_load_ps1(&ATanConstants3.f[2]); |
||
4329 | Angle2 = XMVectorSelect(Angle2, vConstants, F_GT_One); |
||
4330 | |||
4331 | // 1-sqrt(3) |
||
4332 | XMVECTOR FA = _mm_load_ps1(&ATanConstants2.f[1]); |
||
4333 | FA = _mm_mul_ps(FA,VF); |
||
4334 | FA = _mm_add_ps(FA,VF); |
||
4335 | FA = _mm_add_ps(FA,g_XMNegativeOne); |
||
4336 | // sqrt(3) |
||
4337 | vConstants = _mm_load_ps1(&ATanConstants2.f[0]); |
||
4338 | vConstants = _mm_add_ps(vConstants,VF); |
||
4339 | FA = _mm_div_ps(FA,vConstants); |
||
4340 | |||
4341 | // 2-sqrt(3) |
||
4342 | vConstants = _mm_load_ps1(&ATanConstants2.f[2]); |
||
4343 | // >2-sqrt(3)? |
||
4344 | vConstants = _mm_cmpgt_ps(VF,vConstants); |
||
4345 | VF = XMVectorSelect(VF, FA, vConstants); |
||
4346 | Angle1 = XMVectorSelect(Angle1, Angle2, vConstants); |
||
4347 | |||
4348 | XMVECTOR AbsF = XMVectorAbs(VF); |
||
4349 | |||
4350 | XMVECTOR G = _mm_mul_ps(VF,VF); |
||
4351 | XMVECTOR D = _mm_load_ps1(&ATanConstants1.f[3]); |
||
4352 | D = _mm_add_ps(D,G); |
||
4353 | D = _mm_mul_ps(D,G); |
||
4354 | vConstants = _mm_load_ps1(&ATanConstants1.f[2]); |
||
4355 | D = _mm_add_ps(D,vConstants); |
||
4356 | D = _mm_mul_ps(D,G); |
||
4357 | vConstants = _mm_load_ps1(&ATanConstants1.f[1]); |
||
4358 | D = _mm_add_ps(D,vConstants); |
||
4359 | D = _mm_mul_ps(D,G); |
||
4360 | vConstants = _mm_load_ps1(&ATanConstants1.f[0]); |
||
4361 | D = _mm_add_ps(D,vConstants); |
||
4362 | |||
4363 | XMVECTOR N = _mm_load_ps1(&ATanConstants0.f[3]); |
||
4364 | N = _mm_mul_ps(N,G); |
||
4365 | vConstants = _mm_load_ps1(&ATanConstants0.f[2]); |
||
4366 | N = _mm_add_ps(N,vConstants); |
||
4367 | N = _mm_mul_ps(N,G); |
||
4368 | vConstants = _mm_load_ps1(&ATanConstants0.f[1]); |
||
4369 | N = _mm_add_ps(N,vConstants); |
||
4370 | N = _mm_mul_ps(N,G); |
||
4371 | vConstants = _mm_load_ps1(&ATanConstants0.f[0]); |
||
4372 | N = _mm_add_ps(N,vConstants); |
||
4373 | N = _mm_mul_ps(N,G); |
||
4374 | XMVECTOR Result = _mm_div_ps(N,D); |
||
4375 | |||
4376 | Result = _mm_mul_ps(Result,VF); |
||
4377 | Result = _mm_add_ps(Result,VF); |
||
4378 | // Epsilon |
||
4379 | vConstants = _mm_load_ps1(&ATanConstants2.f[3]); |
||
4380 | vConstants = _mm_cmpge_ps(vConstants,AbsF); |
||
4381 | Result = XMVectorSelect(Result,VF,vConstants); |
||
4382 | |||
4383 | XMVECTOR NegativeResult = _mm_mul_ps(Result,g_XMNegativeOne); |
||
4384 | Result = XMVectorSelect(Result,NegativeResult,F_GT_One); |
||
4385 | Result = _mm_add_ps(Result,Angle1); |
||
4386 | |||
4387 | Zero = _mm_cmpge_ps(Zero,V); |
||
4388 | NegativeResult = _mm_mul_ps(Result,g_XMNegativeOne); |
||
4389 | Result = XMVectorSelect(Result,NegativeResult,Zero); |
||
4390 | |||
4391 | XMVECTOR MaxV = _mm_load_ps1(&ATanConstants3.f[3]); |
||
4392 | XMVECTOR MinV = _mm_mul_ps(MaxV,g_XMNegativeOne); |
||
4393 | // Negate HalfPi |
||
4394 | HalfPi = _mm_mul_ps(HalfPi,g_XMNegativeOne); |
||
4395 | MaxV = _mm_cmple_ps(MaxV,V); |
||
4396 | MinV = _mm_cmpge_ps(MinV,V); |
||
4397 | Result = XMVectorSelect(Result,g_XMHalfPi,MaxV); |
||
4398 | // HalfPi = -HalfPi |
||
4399 | Result = XMVectorSelect(Result,HalfPi,MinV); |
||
4400 | return Result; |
||
4401 | #else // _XM_VMX128_INTRINSICS_ |
||
4402 | #endif // _XM_VMX128_INTRINSICS_ |
||
4403 | } |
||
4404 | |||
4405 | //------------------------------------------------------------------------------ |
||
4406 | |||
4407 | XMINLINE XMVECTOR XMVectorATan2 |
||
4408 | ( |
||
4409 | FXMVECTOR Y, |
||
4410 | FXMVECTOR X |
||
4411 | ) |
||
4412 | { |
||
4413 | #if defined(_XM_NO_INTRINSICS_) |
||
4414 | |||
4415 | // Return the inverse tangent of Y / X in the range of -Pi to Pi with the following exceptions: |
||
4416 | |||
4417 | // Y == 0 and X is Negative -> Pi with the sign of Y |
||
4418 | // Y == 0 and X is Positive -> 0 with the sign of Y |
||
4419 | // Y != 0 and X == 0 -> Pi / 2 with the sign of Y |
||
4420 | // X == -Infinity and Finite Y > 0 -> Pi with the sign of Y |
||
4421 | // X == +Infinity and Finite Y > 0 -> 0 with the sign of Y |
||
4422 | // Y == Infinity and X is Finite -> Pi / 2 with the sign of Y |
||
4423 | // Y == Infinity and X == -Infinity -> 3Pi / 4 with the sign of Y |
||
4424 | // Y == Infinity and X == +Infinity -> Pi / 4 with the sign of Y |
||
4425 | // TODO: Return Y / X if the result underflows |
||
4426 | |||
4427 | XMVECTOR Reciprocal; |
||
4428 | XMVECTOR V; |
||
4429 | XMVECTOR YSign; |
||
4430 | XMVECTOR Pi, PiOverTwo, PiOverFour, ThreePiOverFour; |
||
4431 | XMVECTOR YEqualsZero, XEqualsZero, XIsPositive, YEqualsInfinity, XEqualsInfinity, FiniteYGreaterZero; |
||
4432 | XMVECTOR ATanResultValid; |
||
4433 | XMVECTOR R0, R1, R2, R3, R4, R5, R6, R7; |
||
4434 | XMVECTOR Zero; |
||
4435 | XMVECTOR Result; |
||
4436 | static CONST XMVECTOR ATan2Constants = {XM_PI, XM_PIDIV2, XM_PIDIV4, XM_PI * 3.0f / 4.0f}; |
||
4437 | |||
4438 | Zero = XMVectorZero(); |
||
4439 | ATanResultValid = XMVectorTrueInt(); |
||
4440 | |||
4441 | Pi = XMVectorSplatX(ATan2Constants); |
||
4442 | PiOverTwo = XMVectorSplatY(ATan2Constants); |
||
4443 | PiOverFour = XMVectorSplatZ(ATan2Constants); |
||
4444 | ThreePiOverFour = XMVectorSplatW(ATan2Constants); |
||
4445 | |||
4446 | YEqualsZero = XMVectorEqual(Y, Zero); |
||
4447 | XEqualsZero = XMVectorEqual(X, Zero); |
||
4448 | XIsPositive = XMVectorAndInt(X, g_XMNegativeZero.v); |
||
4449 | XIsPositive = XMVectorEqualInt(XIsPositive, Zero); |
||
4450 | YEqualsInfinity = XMVectorIsInfinite(Y); |
||
4451 | XEqualsInfinity = XMVectorIsInfinite(X); |
||
4452 | FiniteYGreaterZero = XMVectorGreater(Y, Zero); |
||
4453 | FiniteYGreaterZero = XMVectorSelect(FiniteYGreaterZero, Zero, YEqualsInfinity); |
||
4454 | |||
4455 | YSign = XMVectorAndInt(Y, g_XMNegativeZero.v); |
||
4456 | Pi = XMVectorOrInt(Pi, YSign); |
||
4457 | PiOverTwo = XMVectorOrInt(PiOverTwo, YSign); |
||
4458 | PiOverFour = XMVectorOrInt(PiOverFour, YSign); |
||
4459 | ThreePiOverFour = XMVectorOrInt(ThreePiOverFour, YSign); |
||
4460 | |||
4461 | R1 = XMVectorSelect(Pi, YSign, XIsPositive); |
||
4462 | R2 = XMVectorSelect(ATanResultValid, PiOverTwo, XEqualsZero); |
||
4463 | R3 = XMVectorSelect(R2, R1, YEqualsZero); |
||
4464 | R4 = XMVectorSelect(ThreePiOverFour, PiOverFour, XIsPositive); |
||
4465 | R5 = XMVectorSelect(PiOverTwo, R4, XEqualsInfinity); |
||
4466 | R6 = XMVectorSelect(R3, R5, YEqualsInfinity); |
||
4467 | R7 = XMVectorSelect(R6, R1, FiniteYGreaterZero); |
||
4468 | Result = XMVectorSelect(R6, R7, XEqualsInfinity); |
||
4469 | ATanResultValid = XMVectorEqualInt(Result, ATanResultValid); |
||
4470 | |||
4471 | Reciprocal = XMVectorReciprocal(X); |
||
4472 | V = XMVectorMultiply(Y, Reciprocal); |
||
4473 | R0 = XMVectorATan(V); |
||
4474 | |||
4475 | Result = XMVectorSelect(Result, R0, ATanResultValid); |
||
4476 | |||
4477 | return Result; |
||
4478 | |||
4479 | #elif defined(_XM_SSE_INTRINSICS_) |
||
4480 | static CONST XMVECTORF32 ATan2Constants = {XM_PI, XM_PIDIV2, XM_PIDIV4, XM_PI * 3.0f / 4.0f}; |
||
4481 | // Mask if Y>0 && Y!=INF |
||
4482 | XMVECTOR FiniteYGreaterZero = _mm_cmpgt_ps(Y,g_XMZero); |
||
4483 | XMVECTOR YEqualsInfinity = XMVectorIsInfinite(Y); |
||
4484 | FiniteYGreaterZero = _mm_andnot_ps(YEqualsInfinity,FiniteYGreaterZero); |
||
4485 | // Get the sign of (Y&0x80000000) |
||
4486 | XMVECTOR YSign = _mm_and_ps(Y, g_XMNegativeZero); |
||
4487 | // Get the sign bits of X |
||
4488 | XMVECTOR XIsPositive = _mm_and_ps(X,g_XMNegativeZero); |
||
4489 | // Change them to masks |
||
4490 | XIsPositive = XMVectorEqualInt(XIsPositive,g_XMZero); |
||
4491 | // Get Pi |
||
4492 | XMVECTOR R1 = _mm_load_ps1(&ATan2Constants.f[0]); |
||
4493 | // Copy the sign of Y |
||
4494 | R1 = _mm_or_ps(R1,YSign); |
||
4495 | R1 = XMVectorSelect(R1,YSign,XIsPositive); |
||
4496 | // Mask for X==0 |
||
4497 | XMVECTOR vConstants = _mm_cmpeq_ps(X,g_XMZero); |
||
4498 | // Get Pi/2 with with sign of Y |
||
4499 | XMVECTOR PiOverTwo = _mm_load_ps1(&ATan2Constants.f[1]); |
||
4500 | PiOverTwo = _mm_or_ps(PiOverTwo,YSign); |
||
4501 | XMVECTOR R2 = XMVectorSelect(g_XMNegOneMask,PiOverTwo,vConstants); |
||
4502 | // Mask for Y==0 |
||
4503 | vConstants = _mm_cmpeq_ps(Y,g_XMZero); |
||
4504 | R2 = XMVectorSelect(R2,R1,vConstants); |
||
4505 | // Get Pi/4 with sign of Y |
||
4506 | XMVECTOR PiOverFour = _mm_load_ps1(&ATan2Constants.f[2]); |
||
4507 | PiOverFour = _mm_or_ps(PiOverFour,YSign); |
||
4508 | // Get (Pi*3)/4 with sign of Y |
||
4509 | XMVECTOR ThreePiOverFour = _mm_load_ps1(&ATan2Constants.f[3]); |
||
4510 | ThreePiOverFour = _mm_or_ps(ThreePiOverFour,YSign); |
||
4511 | vConstants = XMVectorSelect(ThreePiOverFour, PiOverFour, XIsPositive); |
||
4512 | XMVECTOR XEqualsInfinity = XMVectorIsInfinite(X); |
||
4513 | vConstants = XMVectorSelect(PiOverTwo,vConstants,XEqualsInfinity); |
||
4514 | |||
4515 | XMVECTOR vResult = XMVectorSelect(R2,vConstants,YEqualsInfinity); |
||
4516 | vConstants = XMVectorSelect(vResult,R1,FiniteYGreaterZero); |
||
4517 | // At this point, any entry that's zero will get the result |
||
4518 | // from XMVectorATan(), otherwise, return the failsafe value |
||
4519 | vResult = XMVectorSelect(vResult,vConstants,XEqualsInfinity); |
||
4520 | // Any entries not 0xFFFFFFFF, are considered precalculated |
||
4521 | XMVECTOR ATanResultValid = XMVectorEqualInt(vResult,g_XMNegOneMask); |
||
4522 | // Let's do the ATan2 function |
||
4523 | vConstants = _mm_div_ps(Y,X); |
||
4524 | vConstants = XMVectorATan(vConstants); |
||
4525 | // Discard entries that have been declared void |
||
4526 | vResult = XMVectorSelect(vResult,vConstants,ATanResultValid); |
||
4527 | return vResult; |
||
4528 | #else // _XM_VMX128_INTRINSICS_ |
||
4529 | #endif // _XM_VMX128_INTRINSICS_ |
||
4530 | } |
||
4531 | |||
4532 | //------------------------------------------------------------------------------ |
||
4533 | |||
4534 | XMFINLINE XMVECTOR XMVectorSinEst |
||
4535 | ( |
||
4536 | FXMVECTOR V |
||
4537 | ) |
||
4538 | { |
||
4539 | #if defined(_XM_NO_INTRINSICS_) |
||
4540 | |||
4541 | XMVECTOR V2, V3, V5, V7; |
||
4542 | XMVECTOR S1, S2, S3; |
||
4543 | XMVECTOR Result; |
||
4544 | |||
4545 | // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI) |
||
4546 | V2 = XMVectorMultiply(V, V); |
||
4547 | V3 = XMVectorMultiply(V2, V); |
||
4548 | V5 = XMVectorMultiply(V3, V2); |
||
4549 | V7 = XMVectorMultiply(V5, V2); |
||
4550 | |||
4551 | S1 = XMVectorSplatY(g_XMSinEstCoefficients.v); |
||
4552 | S2 = XMVectorSplatZ(g_XMSinEstCoefficients.v); |
||
4553 | S3 = XMVectorSplatW(g_XMSinEstCoefficients.v); |
||
4554 | |||
4555 | Result = XMVectorMultiplyAdd(S1, V3, V); |
||
4556 | Result = XMVectorMultiplyAdd(S2, V5, Result); |
||
4557 | Result = XMVectorMultiplyAdd(S3, V7, Result); |
||
4558 | |||
4559 | return Result; |
||
4560 | |||
4561 | #elif defined(_XM_SSE_INTRINSICS_) |
||
4562 | // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI) |
||
4563 | XMVECTOR V2 = _mm_mul_ps(V,V); |
||
4564 | XMVECTOR V3 = _mm_mul_ps(V2,V); |
||
4565 | XMVECTOR vResult = _mm_load_ps1(&g_XMSinEstCoefficients.f[1]); |
||
4566 | vResult = _mm_mul_ps(vResult,V3); |
||
4567 | vResult = _mm_add_ps(vResult,V); |
||
4568 | XMVECTOR vConstants = _mm_load_ps1(&g_XMSinEstCoefficients.f[2]); |
||
4569 | // V^5 |
||
4570 | V3 = _mm_mul_ps(V3,V2); |
||
4571 | vConstants = _mm_mul_ps(vConstants,V3); |
||
4572 | vResult = _mm_add_ps(vResult,vConstants); |
||
4573 | vConstants = _mm_load_ps1(&g_XMSinEstCoefficients.f[3]); |
||
4574 | // V^7 |
||
4575 | V3 = _mm_mul_ps(V3,V2); |
||
4576 | vConstants = _mm_mul_ps(vConstants,V3); |
||
4577 | vResult = _mm_add_ps(vResult,vConstants); |
||
4578 | return vResult; |
||
4579 | #else // _XM_VMX128_INTRINSICS_ |
||
4580 | #endif // _XM_VMX128_INTRINSICS_ |
||
4581 | } |
||
4582 | |||
4583 | //------------------------------------------------------------------------------ |
||
4584 | |||
4585 | XMFINLINE XMVECTOR XMVectorCosEst |
||
4586 | ( |
||
4587 | FXMVECTOR V |
||
4588 | ) |
||
4589 | { |
||
4590 | #if defined(_XM_NO_INTRINSICS_) |
||
4591 | |||
4592 | XMVECTOR V2, V4, V6; |
||
4593 | XMVECTOR C0, C1, C2, C3; |
||
4594 | XMVECTOR Result; |
||
4595 | |||
4596 | V2 = XMVectorMultiply(V, V); |
||
4597 | V4 = XMVectorMultiply(V2, V2); |
||
4598 | V6 = XMVectorMultiply(V4, V2); |
||
4599 | |||
4600 | C0 = XMVectorSplatX(g_XMCosEstCoefficients.v); |
||
4601 | C1 = XMVectorSplatY(g_XMCosEstCoefficients.v); |
||
4602 | C2 = XMVectorSplatZ(g_XMCosEstCoefficients.v); |
||
4603 | C3 = XMVectorSplatW(g_XMCosEstCoefficients.v); |
||
4604 | |||
4605 | Result = XMVectorMultiplyAdd(C1, V2, C0); |
||
4606 | Result = XMVectorMultiplyAdd(C2, V4, Result); |
||
4607 | Result = XMVectorMultiplyAdd(C3, V6, Result); |
||
4608 | |||
4609 | return Result; |
||
4610 | |||
4611 | #elif defined(_XM_SSE_INTRINSICS_) |
||
4612 | // Get V^2 |
||
4613 | XMVECTOR V2 = _mm_mul_ps(V,V); |
||
4614 | XMVECTOR vResult = _mm_load_ps1(&g_XMCosEstCoefficients.f[1]); |
||
4615 | vResult = _mm_mul_ps(vResult,V2); |
||
4616 | XMVECTOR vConstants = _mm_load_ps1(&g_XMCosEstCoefficients.f[0]); |
||
4617 | vResult = _mm_add_ps(vResult,vConstants); |
||
4618 | vConstants = _mm_load_ps1(&g_XMCosEstCoefficients.f[2]); |
||
4619 | // Get V^4 |
||
4620 | XMVECTOR V4 = _mm_mul_ps(V2, V2); |
||
4621 | vConstants = _mm_mul_ps(vConstants,V4); |
||
4622 | vResult = _mm_add_ps(vResult,vConstants); |
||
4623 | vConstants = _mm_load_ps1(&g_XMCosEstCoefficients.f[3]); |
||
4624 | // It's really V^6 |
||
4625 | V4 = _mm_mul_ps(V4,V2); |
||
4626 | vConstants = _mm_mul_ps(vConstants,V4); |
||
4627 | vResult = _mm_add_ps(vResult,vConstants); |
||
4628 | return vResult; |
||
4629 | #else // _XM_VMX128_INTRINSICS_ |
||
4630 | #endif // _XM_VMX128_INTRINSICS_ |
||
4631 | } |
||
4632 | |||
4633 | //------------------------------------------------------------------------------ |
||
4634 | |||
4635 | XMFINLINE VOID XMVectorSinCosEst |
||
4636 | ( |
||
4637 | XMVECTOR* pSin, |
||
4638 | XMVECTOR* pCos, |
||
4639 | FXMVECTOR V |
||
4640 | ) |
||
4641 | { |
||
4642 | #if defined(_XM_NO_INTRINSICS_) |
||
4643 | |||
4644 | XMVECTOR V2, V3, V4, V5, V6, V7; |
||
4645 | XMVECTOR S1, S2, S3; |
||
4646 | XMVECTOR C0, C1, C2, C3; |
||
4647 | XMVECTOR Sin, Cos; |
||
4648 | |||
4649 | XMASSERT(pSin); |
||
4650 | XMASSERT(pCos); |
||
4651 | |||
4652 | // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI) |
||
4653 | // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! (for -PI <= V < PI) |
||
4654 | V2 = XMVectorMultiply(V, V); |
||
4655 | V3 = XMVectorMultiply(V2, V); |
||
4656 | V4 = XMVectorMultiply(V2, V2); |
||
4657 | V5 = XMVectorMultiply(V3, V2); |
||
4658 | V6 = XMVectorMultiply(V3, V3); |
||
4659 | V7 = XMVectorMultiply(V4, V3); |
||
4660 | |||
4661 | S1 = XMVectorSplatY(g_XMSinEstCoefficients.v); |
||
4662 | S2 = XMVectorSplatZ(g_XMSinEstCoefficients.v); |
||
4663 | S3 = XMVectorSplatW(g_XMSinEstCoefficients.v); |
||
4664 | |||
4665 | C0 = XMVectorSplatX(g_XMCosEstCoefficients.v); |
||
4666 | C1 = XMVectorSplatY(g_XMCosEstCoefficients.v); |
||
4667 | C2 = XMVectorSplatZ(g_XMCosEstCoefficients.v); |
||
4668 | C3 = XMVectorSplatW(g_XMCosEstCoefficients.v); |
||
4669 | |||
4670 | Sin = XMVectorMultiplyAdd(S1, V3, V); |
||
4671 | Sin = XMVectorMultiplyAdd(S2, V5, Sin); |
||
4672 | Sin = XMVectorMultiplyAdd(S3, V7, Sin); |
||
4673 | |||
4674 | Cos = XMVectorMultiplyAdd(C1, V2, C0); |
||
4675 | Cos = XMVectorMultiplyAdd(C2, V4, Cos); |
||
4676 | Cos = XMVectorMultiplyAdd(C3, V6, Cos); |
||
4677 | |||
4678 | *pSin = Sin; |
||
4679 | *pCos = Cos; |
||
4680 | |||
4681 | #elif defined(_XM_SSE_INTRINSICS_) |
||
4682 | XMASSERT(pSin); |
||
4683 | XMASSERT(pCos); |
||
4684 | XMVECTOR V2, V3, V4, V5, V6, V7; |
||
4685 | XMVECTOR S1, S2, S3; |
||
4686 | XMVECTOR C0, C1, C2, C3; |
||
4687 | XMVECTOR Sin, Cos; |
||
4688 | |||
4689 | // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI) |
||
4690 | // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! (for -PI <= V < PI) |
||
4691 | V2 = XMVectorMultiply(V, V); |
||
4692 | V3 = XMVectorMultiply(V2, V); |
||
4693 | V4 = XMVectorMultiply(V2, V2); |
||
4694 | V5 = XMVectorMultiply(V3, V2); |
||
4695 | V6 = XMVectorMultiply(V3, V3); |
||
4696 | V7 = XMVectorMultiply(V4, V3); |
||
4697 | |||
4698 | S1 = _mm_load_ps1(&g_XMSinEstCoefficients.f[1]); |
||
4699 | S2 = _mm_load_ps1(&g_XMSinEstCoefficients.f[2]); |
||
4700 | S3 = _mm_load_ps1(&g_XMSinEstCoefficients.f[3]); |
||
4701 | |||
4702 | C0 = _mm_load_ps1(&g_XMCosEstCoefficients.f[0]); |
||
4703 | C1 = _mm_load_ps1(&g_XMCosEstCoefficients.f[1]); |
||
4704 | C2 = _mm_load_ps1(&g_XMCosEstCoefficients.f[2]); |
||
4705 | C3 = _mm_load_ps1(&g_XMCosEstCoefficients.f[3]); |
||
4706 | |||
4707 | Sin = XMVectorMultiplyAdd(S1, V3, V); |
||
4708 | Sin = XMVectorMultiplyAdd(S2, V5, Sin); |
||
4709 | Sin = XMVectorMultiplyAdd(S3, V7, Sin); |
||
4710 | |||
4711 | Cos = XMVectorMultiplyAdd(C1, V2, C0); |
||
4712 | Cos = XMVectorMultiplyAdd(C2, V4, Cos); |
||
4713 | Cos = XMVectorMultiplyAdd(C3, V6, Cos); |
||
4714 | |||
4715 | *pSin = Sin; |
||
4716 | *pCos = Cos; |
||
4717 | #else // _XM_VMX128_INTRINSICS_ |
||
4718 | #endif // _XM_VMX128_INTRINSICS_ |
||
4719 | } |
||
4720 | |||
4721 | //------------------------------------------------------------------------------ |
||
4722 | |||
4723 | XMFINLINE XMVECTOR XMVectorTanEst |
||
4724 | ( |
||
4725 | FXMVECTOR V |
||
4726 | ) |
||
4727 | { |
||
4728 | #if defined(_XM_NO_INTRINSICS_) |
||
4729 | |||
4730 | XMVECTOR V1, V2, V1T0, V1T1, V2T2; |
||
4731 | XMVECTOR T0, T1, T2; |
||
4732 | XMVECTOR N, D; |
||
4733 | XMVECTOR OneOverPi; |
||
4734 | XMVECTOR Result; |
||
4735 | |||
4736 | OneOverPi = XMVectorSplatW(g_XMTanEstCoefficients.v); |
||
4737 | |||
4738 | V1 = XMVectorMultiply(V, OneOverPi); |
||
4739 | V1 = XMVectorRound(V1); |
||
4740 | |||
4741 | V1 = XMVectorNegativeMultiplySubtract(g_XMPi.v, V1, V); |
||
4742 | |||
4743 | T0 = XMVectorSplatX(g_XMTanEstCoefficients.v); |
||
4744 | T1 = XMVectorSplatY(g_XMTanEstCoefficients.v); |
||
4745 | T2 = XMVectorSplatZ(g_XMTanEstCoefficients.v); |
||
4746 | |||
4747 | V2T2 = XMVectorNegativeMultiplySubtract(V1, V1, T2); |
||
4748 | V2 = XMVectorMultiply(V1, V1); |
||
4749 | V1T0 = XMVectorMultiply(V1, T0); |
||
4750 | V1T1 = XMVectorMultiply(V1, T1); |
||
4751 | |||
4752 | D = XMVectorReciprocalEst(V2T2); |
||
4753 | N = XMVectorMultiplyAdd(V2, V1T1, V1T0); |
||
4754 | |||
4755 | Result = XMVectorMultiply(N, D); |
||
4756 | |||
4757 | return Result; |
||
4758 | |||
4759 | #elif defined(_XM_SSE_INTRINSICS_) |
||
4760 | XMVECTOR V1, V2, V1T0, V1T1, V2T2; |
||
4761 | XMVECTOR T0, T1, T2; |
||
4762 | XMVECTOR N, D; |
||
4763 | XMVECTOR OneOverPi; |
||
4764 | XMVECTOR Result; |
||
4765 | |||
4766 | OneOverPi = XMVectorSplatW(g_XMTanEstCoefficients); |
||
4767 | |||
4768 | V1 = XMVectorMultiply(V, OneOverPi); |
||
4769 | V1 = XMVectorRound(V1); |
||
4770 | |||
4771 | V1 = XMVectorNegativeMultiplySubtract(g_XMPi, V1, V); |
||
4772 | |||
4773 | T0 = XMVectorSplatX(g_XMTanEstCoefficients); |
||
4774 | T1 = XMVectorSplatY(g_XMTanEstCoefficients); |
||
4775 | T2 = XMVectorSplatZ(g_XMTanEstCoefficients); |
||
4776 | |||
4777 | V2T2 = XMVectorNegativeMultiplySubtract(V1, V1, T2); |
||
4778 | V2 = XMVectorMultiply(V1, V1); |
||
4779 | V1T0 = XMVectorMultiply(V1, T0); |
||
4780 | V1T1 = XMVectorMultiply(V1, T1); |
||
4781 | |||
4782 | D = XMVectorReciprocalEst(V2T2); |
||
4783 | N = XMVectorMultiplyAdd(V2, V1T1, V1T0); |
||
4784 | |||
4785 | Result = XMVectorMultiply(N, D); |
||
4786 | |||
4787 | return Result; |
||
4788 | #else // _XM_VMX128_INTRINSICS_ |
||
4789 | #endif // _XM_VMX128_INTRINSICS_ |
||
4790 | } |
||
4791 | |||
4792 | //------------------------------------------------------------------------------ |
||
4793 | |||
4794 | XMFINLINE XMVECTOR XMVectorSinHEst |
||
4795 | ( |
||
4796 | FXMVECTOR V |
||
4797 | ) |
||
4798 | { |
||
4799 | #if defined(_XM_NO_INTRINSICS_) |
||
4800 | |||
4801 | XMVECTOR V1, V2; |
||
4802 | XMVECTOR E1, E2; |
||
4803 | XMVECTOR Result; |
||
4804 | static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f) |
||
4805 | |||
4806 | V1 = XMVectorMultiplyAdd(V, Scale.v, g_XMNegativeOne.v); |
||
4807 | V2 = XMVectorNegativeMultiplySubtract(V, Scale.v, g_XMNegativeOne.v); |
||
4808 | |||
4809 | E1 = XMVectorExpEst(V1); |
||
4810 | E2 = XMVectorExpEst(V2); |
||
4811 | |||
4812 | Result = XMVectorSubtract(E1, E2); |
||
4813 | |||
4814 | return Result; |
||
4815 | |||
4816 | #elif defined(_XM_SSE_INTRINSICS_) |
||
4817 | XMVECTOR V1, V2; |
||
4818 | XMVECTOR E1, E2; |
||
4819 | XMVECTOR Result; |
||
4820 | static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f) |
||
4821 | |||
4822 | V1 = _mm_mul_ps(V,Scale); |
||
4823 | V1 = _mm_add_ps(V1,g_XMNegativeOne); |
||
4824 | V2 = _mm_mul_ps(V,Scale); |
||
4825 | V2 = _mm_sub_ps(g_XMNegativeOne,V2); |
||
4826 | E1 = XMVectorExpEst(V1); |
||
4827 | E2 = XMVectorExpEst(V2); |
||
4828 | Result = _mm_sub_ps(E1, E2); |
||
4829 | return Result; |
||
4830 | #else // _XM_VMX128_INTRINSICS_ |
||
4831 | #endif // _XM_VMX128_INTRINSICS_ |
||
4832 | } |
||
4833 | |||
4834 | //------------------------------------------------------------------------------ |
||
4835 | |||
4836 | XMFINLINE XMVECTOR XMVectorCosHEst |
||
4837 | ( |
||
4838 | FXMVECTOR V |
||
4839 | ) |
||
4840 | { |
||
4841 | #if defined(_XM_NO_INTRINSICS_) |
||
4842 | |||
4843 | XMVECTOR V1, V2; |
||
4844 | XMVECTOR E1, E2; |
||
4845 | XMVECTOR Result; |
||
4846 | static CONST XMVECTOR Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f) |
||
4847 | |||
4848 | V1 = XMVectorMultiplyAdd(V, Scale, g_XMNegativeOne.v); |
||
4849 | V2 = XMVectorNegativeMultiplySubtract(V, Scale, g_XMNegativeOne.v); |
||
4850 | |||
4851 | E1 = XMVectorExpEst(V1); |
||
4852 | E2 = XMVectorExpEst(V2); |
||
4853 | |||
4854 | Result = XMVectorAdd(E1, E2); |
||
4855 | |||
4856 | return Result; |
||
4857 | |||
4858 | #elif defined(_XM_SSE_INTRINSICS_) |
||
4859 | XMVECTOR V1, V2; |
||
4860 | XMVECTOR E1, E2; |
||
4861 | XMVECTOR Result; |
||
4862 | static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f) |
||
4863 | |||
4864 | V1 = _mm_mul_ps(V,Scale); |
||
4865 | V1 = _mm_add_ps(V1,g_XMNegativeOne); |
||
4866 | V2 = _mm_mul_ps(V, Scale); |
||
4867 | V2 = _mm_sub_ps(g_XMNegativeOne,V2); |
||
4868 | E1 = XMVectorExpEst(V1); |
||
4869 | E2 = XMVectorExpEst(V2); |
||
4870 | Result = _mm_add_ps(E1, E2); |
||
4871 | return Result; |
||
4872 | #else // _XM_VMX128_INTRINSICS_ |
||
4873 | #endif // _XM_VMX128_INTRINSICS_ |
||
4874 | } |
||
4875 | |||
4876 | //------------------------------------------------------------------------------ |
||
4877 | |||
4878 | XMFINLINE XMVECTOR XMVectorTanHEst |
||
4879 | ( |
||
4880 | FXMVECTOR V |
||
4881 | ) |
||
4882 | { |
||
4883 | #if defined(_XM_NO_INTRINSICS_) |
||
4884 | |||
4885 | XMVECTOR E; |
||
4886 | XMVECTOR Result; |
||
4887 | static CONST XMVECTOR Scale = {2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f}; // 2.0f / ln(2.0f) |
||
4888 | |||
4889 | E = XMVectorMultiply(V, Scale); |
||
4890 | E = XMVectorExpEst(E); |
||
4891 | E = XMVectorMultiplyAdd(E, g_XMOneHalf.v, g_XMOneHalf.v); |
||
4892 | E = XMVectorReciprocalEst(E); |
||
4893 | |||
4894 | Result = XMVectorSubtract(g_XMOne.v, E); |
||
4895 | |||
4896 | return Result; |
||
4897 | |||
4898 | #elif defined(_XM_SSE_INTRINSICS_) |
||
4899 | static CONST XMVECTORF32 Scale = {2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f}; // 2.0f / ln(2.0f) |
||
4900 | |||
4901 | XMVECTOR E = _mm_mul_ps(V, Scale); |
||
4902 | E = XMVectorExpEst(E); |
||
4903 | E = _mm_mul_ps(E,g_XMOneHalf); |
||
4904 | E = _mm_add_ps(E,g_XMOneHalf); |
||
4905 | E = XMVectorReciprocalEst(E); |
||
4906 | E = _mm_sub_ps(g_XMOne, E); |
||
4907 | return E; |
||
4908 | #else // _XM_VMX128_INTRINSICS_ |
||
4909 | #endif // _XM_VMX128_INTRINSICS_ |
||
4910 | } |
||
4911 | |||
4912 | //------------------------------------------------------------------------------ |
||
4913 | |||
4914 | XMFINLINE XMVECTOR XMVectorASinEst |
||
4915 | ( |
||
4916 | FXMVECTOR V |
||
4917 | ) |
||
4918 | { |
||
4919 | #if defined(_XM_NO_INTRINSICS_) |
||
4920 | |||
4921 | XMVECTOR AbsV, V2, VD, VC0, V2C3; |
||
4922 | XMVECTOR C0, C1, C2, C3; |
||
4923 | XMVECTOR D, Rsq, SqrtD; |
||
4924 | XMVECTOR OnePlusEps; |
||
4925 | XMVECTOR Result; |
||
4926 | |||
4927 | AbsV = XMVectorAbs(V); |
||
4928 | |||
4929 | OnePlusEps = XMVectorSplatX(g_XMASinEstConstants.v); |
||
4930 | |||
4931 | C0 = XMVectorSplatX(g_XMASinEstCoefficients.v); |
||
4932 | C1 = XMVectorSplatY(g_XMASinEstCoefficients.v); |
||
4933 | C2 = XMVectorSplatZ(g_XMASinEstCoefficients.v); |
||
4934 | C3 = XMVectorSplatW(g_XMASinEstCoefficients.v); |
||
4935 | |||
4936 | D = XMVectorSubtract(OnePlusEps, AbsV); |
||
4937 | |||
4938 | Rsq = XMVectorReciprocalSqrtEst(D); |
||
4939 | SqrtD = XMVectorMultiply(D, Rsq); |
||
4940 | |||
4941 | V2 = XMVectorMultiply(V, AbsV); |
||
4942 | V2C3 = XMVectorMultiply(V2, C3); |
||
4943 | VD = XMVectorMultiply(D, AbsV); |
||
4944 | VC0 = XMVectorMultiply(V, C0); |
||
4945 | |||
4946 | Result = XMVectorMultiply(V, C1); |
||
4947 | Result = XMVectorMultiplyAdd(V2, C2, Result); |
||
4948 | Result = XMVectorMultiplyAdd(V2C3, VD, Result); |
||
4949 | Result = XMVectorMultiplyAdd(VC0, SqrtD, Result); |
||
4950 | |||
4951 | return Result; |
||
4952 | |||
4953 | #elif defined(_XM_SSE_INTRINSICS_) |
||
4954 | // Get abs(V) |
||
4955 | XMVECTOR vAbsV = _mm_setzero_ps(); |
||
4956 | vAbsV = _mm_sub_ps(vAbsV,V); |
||
4957 | vAbsV = _mm_max_ps(vAbsV,V); |
||
4958 | |||
4959 | XMVECTOR D = _mm_load_ps1(&g_XMASinEstConstants.f[0]); |
||
4960 | D = _mm_sub_ps(D,vAbsV); |
||
4961 | // Since this is an estimate, rqsrt is okay |
||
4962 | XMVECTOR vConstants = _mm_rsqrt_ps(D); |
||
4963 | XMVECTOR SqrtD = _mm_mul_ps(D,vConstants); |
||
4964 | // V2 = V^2 retaining sign |
||
4965 | XMVECTOR V2 = _mm_mul_ps(V,vAbsV); |
||
4966 | D = _mm_mul_ps(D,vAbsV); |
||
4967 | |||
4968 | XMVECTOR vResult = _mm_load_ps1(&g_XMASinEstCoefficients.f[1]); |
||
4969 | vResult = _mm_mul_ps(vResult,V); |
||
4970 | vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[2]); |
||
4971 | vConstants = _mm_mul_ps(vConstants,V2); |
||
4972 | vResult = _mm_add_ps(vResult,vConstants); |
||
4973 | |||
4974 | vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[3]); |
||
4975 | vConstants = _mm_mul_ps(vConstants,V2); |
||
4976 | vConstants = _mm_mul_ps(vConstants,D); |
||
4977 | vResult = _mm_add_ps(vResult,vConstants); |
||
4978 | |||
4979 | vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[0]); |
||
4980 | vConstants = _mm_mul_ps(vConstants,V); |
||
4981 | vConstants = _mm_mul_ps(vConstants,SqrtD); |
||
4982 | vResult = _mm_add_ps(vResult,vConstants); |
||
4983 | return vResult; |
||
4984 | #else // _XM_VMX128_INTRINSICS_ |
||
4985 | #endif // _XM_VMX128_INTRINSICS_ |
||
4986 | } |
||
4987 | |||
4988 | //------------------------------------------------------------------------------ |
||
4989 | |||
4990 | XMFINLINE XMVECTOR XMVectorACosEst |
||
4991 | ( |
||
4992 | FXMVECTOR V |
||
4993 | ) |
||
4994 | { |
||
4995 | #if defined(_XM_NO_INTRINSICS_) |
||
4996 | |||
4997 | XMVECTOR AbsV, V2, VD, VC0, V2C3; |
||
4998 | XMVECTOR C0, C1, C2, C3; |
||
4999 | XMVECTOR D, Rsq, SqrtD; |
||
5000 | XMVECTOR OnePlusEps, HalfPi; |
||
5001 | XMVECTOR Result; |
||
5002 | |||
5003 | // acos(V) = PI / 2 - asin(V) |
||
5004 | |||
5005 | AbsV = XMVectorAbs(V); |
||
5006 | |||
5007 | OnePlusEps = XMVectorSplatX(g_XMASinEstConstants.v); |
||
5008 | HalfPi = XMVectorSplatY(g_XMASinEstConstants.v); |
||
5009 | |||
5010 | C0 = XMVectorSplatX(g_XMASinEstCoefficients.v); |
||
5011 | C1 = XMVectorSplatY(g_XMASinEstCoefficients.v); |
||
5012 | C2 = XMVectorSplatZ(g_XMASinEstCoefficients.v); |
||
5013 | C3 = XMVectorSplatW(g_XMASinEstCoefficients.v); |
||
5014 | |||
5015 | D = XMVectorSubtract(OnePlusEps, AbsV); |
||
5016 | |||
5017 | Rsq = XMVectorReciprocalSqrtEst(D); |
||
5018 | SqrtD = XMVectorMultiply(D, Rsq); |
||
5019 | |||
5020 | V2 = XMVectorMultiply(V, AbsV); |
||
5021 | V2C3 = XMVectorMultiply(V2, C3); |
||
5022 | VD = XMVectorMultiply(D, AbsV); |
||
5023 | VC0 = XMVectorMultiply(V, C0); |
||
5024 | |||
5025 | Result = XMVectorMultiply(V, C1); |
||
5026 | Result = XMVectorMultiplyAdd(V2, C2, Result); |
||
5027 | Result = XMVectorMultiplyAdd(V2C3, VD, Result); |
||
5028 | Result = XMVectorMultiplyAdd(VC0, SqrtD, Result); |
||
5029 | Result = XMVectorSubtract(HalfPi, Result); |
||
5030 | |||
5031 | return Result; |
||
5032 | |||
5033 | #elif defined(_XM_SSE_INTRINSICS_) |
||
5034 | // acos(V) = PI / 2 - asin(V) |
||
5035 | // Get abs(V) |
||
5036 | XMVECTOR vAbsV = _mm_setzero_ps(); |
||
5037 | vAbsV = _mm_sub_ps(vAbsV,V); |
||
5038 | vAbsV = _mm_max_ps(vAbsV,V); |
||
5039 | // Calc D |
||
5040 | XMVECTOR D = _mm_load_ps1(&g_XMASinEstConstants.f[0]); |
||
5041 | D = _mm_sub_ps(D,vAbsV); |
||
5042 | // SqrtD = sqrt(D-abs(V)) estimated |
||
5043 | XMVECTOR vConstants = _mm_rsqrt_ps(D); |
||
5044 | XMVECTOR SqrtD = _mm_mul_ps(D,vConstants); |
||
5045 | // V2 = V^2 while retaining sign |
||
5046 | XMVECTOR V2 = _mm_mul_ps(V, vAbsV); |
||
5047 | // Drop vAbsV here. D = (Const-abs(V))*abs(V) |
||
5048 | D = _mm_mul_ps(D, vAbsV); |
||
5049 | |||
5050 | XMVECTOR vResult = _mm_load_ps1(&g_XMASinEstCoefficients.f[1]); |
||
5051 | vResult = _mm_mul_ps(vResult,V); |
||
5052 | vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[2]); |
||
5053 | vConstants = _mm_mul_ps(vConstants,V2); |
||
5054 | vResult = _mm_add_ps(vResult,vConstants); |
||
5055 | |||
5056 | vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[3]); |
||
5057 | vConstants = _mm_mul_ps(vConstants,V2); |
||
5058 | vConstants = _mm_mul_ps(vConstants,D); |
||
5059 | vResult = _mm_add_ps(vResult,vConstants); |
||
5060 | |||
5061 | vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[0]); |
||
5062 | vConstants = _mm_mul_ps(vConstants,V); |
||
5063 | vConstants = _mm_mul_ps(vConstants,SqrtD); |
||
5064 | vResult = _mm_add_ps(vResult,vConstants); |
||
5065 | |||
5066 | vConstants = _mm_load_ps1(&g_XMASinEstConstants.f[1]); |
||
5067 | vResult = _mm_sub_ps(vConstants,vResult); |
||
5068 | return vResult; |
||
5069 | #else // _XM_VMX128_INTRINSICS_ |
||
5070 | #endif // _XM_VMX128_INTRINSICS_ |
||
5071 | } |
||
5072 | |||
5073 | //------------------------------------------------------------------------------ |
||
5074 | |||
5075 | XMFINLINE XMVECTOR XMVectorATanEst |
||
5076 | ( |
||
5077 | FXMVECTOR V |
||
5078 | ) |
||
5079 | { |
||
5080 | #if defined(_XM_NO_INTRINSICS_) |
||
5081 | |||
5082 | XMVECTOR AbsV, V2S2, N, D; |
||
5083 | XMVECTOR S0, S1, S2; |
||
5084 | XMVECTOR HalfPi; |
||
5085 | XMVECTOR Result; |
||
5086 | |||
5087 | S0 = XMVectorSplatX(g_XMATanEstCoefficients.v); |
||
5088 | S1 = XMVectorSplatY(g_XMATanEstCoefficients.v); |
||
5089 | S2 = XMVectorSplatZ(g_XMATanEstCoefficients.v); |
||
5090 | HalfPi = XMVectorSplatW(g_XMATanEstCoefficients.v); |
||
5091 | |||
5092 | AbsV = XMVectorAbs(V); |
||
5093 | |||
5094 | V2S2 = XMVectorMultiplyAdd(V, V, S2); |
||
5095 | N = XMVectorMultiplyAdd(AbsV, HalfPi, S0); |
||
5096 | D = XMVectorMultiplyAdd(AbsV, S1, V2S2); |
||
5097 | N = XMVectorMultiply(N, V); |
||
5098 | D = XMVectorReciprocalEst(D); |
||
5099 | |||
5100 | Result = XMVectorMultiply(N, D); |
||
5101 | |||
5102 | return Result; |
||
5103 | |||
5104 | #elif defined(_XM_SSE_INTRINSICS_) |
||
5105 | // Get abs(V) |
||
5106 | XMVECTOR vAbsV = _mm_setzero_ps(); |
||
5107 | vAbsV = _mm_sub_ps(vAbsV,V); |
||
5108 | vAbsV = _mm_max_ps(vAbsV,V); |
||
5109 | |||
5110 | XMVECTOR vResult = _mm_load_ps1(&g_XMATanEstCoefficients.f[3]); |
||
5111 | vResult = _mm_mul_ps(vResult,vAbsV); |
||
5112 | XMVECTOR vConstants = _mm_load_ps1(&g_XMATanEstCoefficients.f[0]); |
||
5113 | vResult = _mm_add_ps(vResult,vConstants); |
||
5114 | vResult = _mm_mul_ps(vResult,V); |
||
5115 | |||
5116 | XMVECTOR D = _mm_mul_ps(V,V); |
||
5117 | vConstants = _mm_load_ps1(&g_XMATanEstCoefficients.f[2]); |
||
5118 | D = _mm_add_ps(D,vConstants); |
||
5119 | vConstants = _mm_load_ps1(&g_XMATanEstCoefficients.f[1]); |
||
5120 | vConstants = _mm_mul_ps(vConstants,vAbsV); |
||
5121 | D = _mm_add_ps(D,vConstants); |
||
5122 | vResult = _mm_div_ps(vResult,D); |
||
5123 | return vResult; |
||
5124 | #else // _XM_VMX128_INTRINSICS_ |
||
5125 | #endif // _XM_VMX128_INTRINSICS_ |
||
5126 | } |
||
5127 | |||
5128 | //------------------------------------------------------------------------------ |
||
5129 | |||
5130 | XMFINLINE XMVECTOR XMVectorATan2Est |
||
5131 | ( |
||
5132 | FXMVECTOR Y, |
||
5133 | FXMVECTOR X |
||
5134 | ) |
||
5135 | { |
||
5136 | #if defined(_XM_NO_INTRINSICS_) |
||
5137 | |||
5138 | XMVECTOR Reciprocal; |
||
5139 | XMVECTOR V; |
||
5140 | XMVECTOR YSign; |
||
5141 | XMVECTOR Pi, PiOverTwo, PiOverFour, ThreePiOverFour; |
||
5142 | XMVECTOR YEqualsZero, XEqualsZero, XIsPositive, YEqualsInfinity, XEqualsInfinity, FiniteYGreaterZero; |
||
5143 | XMVECTOR ATanResultValid; |
||
5144 | XMVECTOR R0, R1, R2, R3, R4, R5, R6, R7; |
||
5145 | XMVECTOR Zero; |
||
5146 | XMVECTOR Result; |
||
5147 | static CONST XMVECTOR ATan2Constants = {XM_PI, XM_PIDIV2, XM_PIDIV4, XM_PI * 3.0f / 4.0f}; |
||
5148 | |||
5149 | Zero = XMVectorZero(); |
||
5150 | ATanResultValid = XMVectorTrueInt(); |
||
5151 | |||
5152 | Pi = XMVectorSplatX(ATan2Constants); |
||
5153 | PiOverTwo = XMVectorSplatY(ATan2Constants); |
||
5154 | PiOverFour = XMVectorSplatZ(ATan2Constants); |
||
5155 | ThreePiOverFour = XMVectorSplatW(ATan2Constants); |
||
5156 | |||
5157 | YEqualsZero = XMVectorEqual(Y, Zero); |
||
5158 | XEqualsZero = XMVectorEqual(X, Zero); |
||
5159 | XIsPositive = XMVectorAndInt(X, g_XMNegativeZero.v); |
||
5160 | XIsPositive = XMVectorEqualInt(XIsPositive, Zero); |
||
5161 | YEqualsInfinity = XMVectorIsInfinite(Y); |
||
5162 | XEqualsInfinity = XMVectorIsInfinite(X); |
||
5163 | FiniteYGreaterZero = XMVectorGreater(Y, Zero); |
||
5164 | FiniteYGreaterZero = XMVectorSelect(FiniteYGreaterZero, Zero, YEqualsInfinity); |
||
5165 | |||
5166 | YSign = XMVectorAndInt(Y, g_XMNegativeZero.v); |
||
5167 | Pi = XMVectorOrInt(Pi, YSign); |
||
5168 | PiOverTwo = XMVectorOrInt(PiOverTwo, YSign); |
||
5169 | PiOverFour = XMVectorOrInt(PiOverFour, YSign); |
||
5170 | ThreePiOverFour = XMVectorOrInt(ThreePiOverFour, YSign); |
||
5171 | |||
5172 | R1 = XMVectorSelect(Pi, YSign, XIsPositive); |
||
5173 | R2 = XMVectorSelect(ATanResultValid, PiOverTwo, XEqualsZero); |
||
5174 | R3 = XMVectorSelect(R2, R1, YEqualsZero); |
||
5175 | R4 = XMVectorSelect(ThreePiOverFour, PiOverFour, XIsPositive); |
||
5176 | R5 = XMVectorSelect(PiOverTwo, R4, XEqualsInfinity); |
||
5177 | R6 = XMVectorSelect(R3, R5, YEqualsInfinity); |
||
5178 | R7 = XMVectorSelect(R6, R1, FiniteYGreaterZero); |
||
5179 | Result = XMVectorSelect(R6, R7, XEqualsInfinity); |
||
5180 | ATanResultValid = XMVectorEqualInt(Result, ATanResultValid); |
||
5181 | |||
5182 | Reciprocal = XMVectorReciprocalEst(X); |
||
5183 | V = XMVectorMultiply(Y, Reciprocal); |
||
5184 | R0 = XMVectorATanEst(V); |
||
5185 | |||
5186 | Result = XMVectorSelect(Result, R0, ATanResultValid); |
||
5187 | |||
5188 | return Result; |
||
5189 | |||
5190 | #elif defined(_XM_SSE_INTRINSICS_) |
||
5191 | static CONST XMVECTORF32 ATan2Constants = {XM_PI, XM_PIDIV2, XM_PIDIV4, XM_PI * 3.0f / 4.0f}; |
||
5192 | // Mask if Y>0 && Y!=INF |
||
5193 | XMVECTOR FiniteYGreaterZero = _mm_cmpgt_ps(Y,g_XMZero); |
||
5194 | XMVECTOR YEqualsInfinity = XMVectorIsInfinite(Y); |
||
5195 | FiniteYGreaterZero = _mm_andnot_ps(YEqualsInfinity,FiniteYGreaterZero); |
||
5196 | // Get the sign of (Y&0x80000000) |
||
5197 | XMVECTOR YSign = _mm_and_ps(Y, g_XMNegativeZero); |
||
5198 | // Get the sign bits of X |
||
5199 | XMVECTOR XIsPositive = _mm_and_ps(X,g_XMNegativeZero); |
||
5200 | // Change them to masks |
||
5201 | XIsPositive = XMVectorEqualInt(XIsPositive,g_XMZero); |
||
5202 | // Get Pi |
||
5203 | XMVECTOR R1 = _mm_load_ps1(&ATan2Constants.f[0]); |
||
5204 | // Copy the sign of Y |
||
5205 | R1 = _mm_or_ps(R1,YSign); |
||
5206 | R1 = XMVectorSelect(R1,YSign,XIsPositive); |
||
5207 | // Mask for X==0 |
||
5208 | XMVECTOR vConstants = _mm_cmpeq_ps(X,g_XMZero); |
||
5209 | // Get Pi/2 with with sign of Y |
||
5210 | XMVECTOR PiOverTwo = _mm_load_ps1(&ATan2Constants.f[1]); |
||
5211 | PiOverTwo = _mm_or_ps(PiOverTwo,YSign); |
||
5212 | XMVECTOR R2 = XMVectorSelect(g_XMNegOneMask,PiOverTwo,vConstants); |
||
5213 | // Mask for Y==0 |
||
5214 | vConstants = _mm_cmpeq_ps(Y,g_XMZero); |
||
5215 | R2 = XMVectorSelect(R2,R1,vConstants); |
||
5216 | // Get Pi/4 with sign of Y |
||
5217 | XMVECTOR PiOverFour = _mm_load_ps1(&ATan2Constants.f[2]); |
||
5218 | PiOverFour = _mm_or_ps(PiOverFour,YSign); |
||
5219 | // Get (Pi*3)/4 with sign of Y |
||
5220 | XMVECTOR ThreePiOverFour = _mm_load_ps1(&ATan2Constants.f[3]); |
||
5221 | ThreePiOverFour = _mm_or_ps(ThreePiOverFour,YSign); |
||
5222 | vConstants = XMVectorSelect(ThreePiOverFour, PiOverFour, XIsPositive); |
||
5223 | XMVECTOR XEqualsInfinity = XMVectorIsInfinite(X); |
||
5224 | vConstants = XMVectorSelect(PiOverTwo,vConstants,XEqualsInfinity); |
||
5225 | |||
5226 | XMVECTOR vResult = XMVectorSelect(R2,vConstants,YEqualsInfinity); |
||
5227 | vConstants = XMVectorSelect(vResult,R1,FiniteYGreaterZero); |
||
5228 | // At this point, any entry that's zero will get the result |
||
5229 | // from XMVectorATan(), otherwise, return the failsafe value |
||
5230 | vResult = XMVectorSelect(vResult,vConstants,XEqualsInfinity); |
||
5231 | // Any entries not 0xFFFFFFFF, are considered precalculated |
||
5232 | XMVECTOR ATanResultValid = XMVectorEqualInt(vResult,g_XMNegOneMask); |
||
5233 | // Let's do the ATan2 function |
||
5234 | vConstants = _mm_div_ps(Y,X); |
||
5235 | vConstants = XMVectorATanEst(vConstants); |
||
5236 | // Discard entries that have been declared void |
||
5237 | vResult = XMVectorSelect(vResult,vConstants,ATanResultValid); |
||
5238 | return vResult; |
||
5239 | #else // _XM_VMX128_INTRINSICS_ |
||
5240 | #endif // _XM_VMX128_INTRINSICS_ |
||
5241 | } |
||
5242 | |||
5243 | //------------------------------------------------------------------------------ |
||
5244 | |||
5245 | XMFINLINE XMVECTOR XMVectorLerp |
||
5246 | ( |
||
5247 | FXMVECTOR V0, |
||
5248 | FXMVECTOR V1, |
||
5249 | FLOAT t |
||
5250 | ) |
||
5251 | { |
||
5252 | #if defined(_XM_NO_INTRINSICS_) |
||
5253 | |||
5254 | XMVECTOR Scale; |
||
5255 | XMVECTOR Length; |
||
5256 | XMVECTOR Result; |
||
5257 | |||
5258 | // V0 + t * (V1 - V0) |
||
5259 | Scale = XMVectorReplicate(t); |
||
5260 | Length = XMVectorSubtract(V1, V0); |
||
5261 | Result = XMVectorMultiplyAdd(Length, Scale, V0); |
||
5262 | |||
5263 | return Result; |
||
5264 | |||
5265 | #elif defined(_XM_SSE_INTRINSICS_) |
||
5266 | XMVECTOR L, S; |
||
5267 | XMVECTOR Result; |
||
5268 | |||
5269 | L = _mm_sub_ps( V1, V0 ); |
||
5270 | |||
5271 | S = _mm_set_ps1( t ); |
||
5272 | |||
5273 | Result = _mm_mul_ps( L, S ); |
||
5274 | |||
5275 | return _mm_add_ps( Result, V0 ); |
||
5276 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
5277 | #endif // _XM_VMX128_INTRINSICS_ |
||
5278 | } |
||
5279 | |||
5280 | //------------------------------------------------------------------------------ |
||
5281 | |||
5282 | XMFINLINE XMVECTOR XMVectorLerpV |
||
5283 | ( |
||
5284 | FXMVECTOR V0, |
||
5285 | FXMVECTOR V1, |
||
5286 | FXMVECTOR T |
||
5287 | ) |
||
5288 | { |
||
5289 | #if defined(_XM_NO_INTRINSICS_) |
||
5290 | |||
5291 | XMVECTOR Length; |
||
5292 | XMVECTOR Result; |
||
5293 | |||
5294 | // V0 + T * (V1 - V0) |
||
5295 | Length = XMVectorSubtract(V1, V0); |
||
5296 | Result = XMVectorMultiplyAdd(Length, T, V0); |
||
5297 | |||
5298 | return Result; |
||
5299 | |||
5300 | #elif defined(_XM_SSE_INTRINSICS_) |
||
5301 | XMVECTOR Length; |
||
5302 | XMVECTOR Result; |
||
5303 | |||
5304 | Length = _mm_sub_ps( V1, V0 ); |
||
5305 | |||
5306 | Result = _mm_mul_ps( Length, T ); |
||
5307 | |||
5308 | return _mm_add_ps( Result, V0 ); |
||
5309 | #else // _XM_VMX128_INTRINSICS_ |
||
5310 | #endif // _XM_VMX128_INTRINSICS_ |
||
5311 | } |
||
5312 | |||
5313 | //------------------------------------------------------------------------------ |
||
5314 | |||
5315 | XMFINLINE XMVECTOR XMVectorHermite |
||
5316 | ( |
||
5317 | FXMVECTOR Position0, |
||
5318 | FXMVECTOR Tangent0, |
||
5319 | FXMVECTOR Position1, |
||
5320 | CXMVECTOR Tangent1, |
||
5321 | FLOAT t |
||
5322 | ) |
||
5323 | { |
||
5324 | #if defined(_XM_NO_INTRINSICS_) |
||
5325 | |||
5326 | XMVECTOR P0; |
||
5327 | XMVECTOR T0; |
||
5328 | XMVECTOR P1; |
||
5329 | XMVECTOR T1; |
||
5330 | XMVECTOR Result; |
||
5331 | FLOAT t2; |
||
5332 | FLOAT t3; |
||
5333 | |||
5334 | // Result = (2 * t^3 - 3 * t^2 + 1) * Position0 + |
||
5335 | // (t^3 - 2 * t^2 + t) * Tangent0 + |
||
5336 | // (-2 * t^3 + 3 * t^2) * Position1 + |
||
5337 | // (t^3 - t^2) * Tangent1 |
||
5338 | t2 = t * t; |
||
5339 | t3 = t * t2; |
||
5340 | |||
5341 | P0 = XMVectorReplicate(2.0f * t3 - 3.0f * t2 + 1.0f); |
||
5342 | T0 = XMVectorReplicate(t3 - 2.0f * t2 + t); |
||
5343 | P1 = XMVectorReplicate(-2.0f * t3 + 3.0f * t2); |
||
5344 | T1 = XMVectorReplicate(t3 - t2); |
||
5345 | |||
5346 | Result = XMVectorMultiply(P0, Position0); |
||
5347 | Result = XMVectorMultiplyAdd(T0, Tangent0, Result); |
||
5348 | Result = XMVectorMultiplyAdd(P1, Position1, Result); |
||
5349 | Result = XMVectorMultiplyAdd(T1, Tangent1, Result); |
||
5350 | |||
5351 | return Result; |
||
5352 | |||
5353 | #elif defined(_XM_SSE_INTRINSICS_) |
||
5354 | FLOAT t2 = t * t; |
||
5355 | FLOAT t3 = t * t2; |
||
5356 | |||
5357 | XMVECTOR P0 = _mm_set_ps1(2.0f * t3 - 3.0f * t2 + 1.0f); |
||
5358 | XMVECTOR T0 = _mm_set_ps1(t3 - 2.0f * t2 + t); |
||
5359 | XMVECTOR P1 = _mm_set_ps1(-2.0f * t3 + 3.0f * t2); |
||
5360 | XMVECTOR T1 = _mm_set_ps1(t3 - t2); |
||
5361 | |||
5362 | XMVECTOR vResult = _mm_mul_ps(P0, Position0); |
||
5363 | XMVECTOR vTemp = _mm_mul_ps(T0, Tangent0); |
||
5364 | vResult = _mm_add_ps(vResult,vTemp); |
||
5365 | vTemp = _mm_mul_ps(P1, Position1); |
||
5366 | vResult = _mm_add_ps(vResult,vTemp); |
||
5367 | vTemp = _mm_mul_ps(T1, Tangent1); |
||
5368 | vResult = _mm_add_ps(vResult,vTemp); |
||
5369 | return vResult; |
||
5370 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
5371 | #endif // _XM_VMX128_INTRINSICS_ |
||
5372 | } |
||
5373 | |||
5374 | //------------------------------------------------------------------------------ |
||
5375 | |||
5376 | XMFINLINE XMVECTOR XMVectorHermiteV |
||
5377 | ( |
||
5378 | FXMVECTOR Position0, |
||
5379 | FXMVECTOR Tangent0, |
||
5380 | FXMVECTOR Position1, |
||
5381 | CXMVECTOR Tangent1, |
||
5382 | CXMVECTOR T |
||
5383 | ) |
||
5384 | { |
||
5385 | #if defined(_XM_NO_INTRINSICS_) |
||
5386 | |||
5387 | XMVECTOR P0; |
||
5388 | XMVECTOR T0; |
||
5389 | XMVECTOR P1; |
||
5390 | XMVECTOR T1; |
||
5391 | XMVECTOR Result; |
||
5392 | XMVECTOR T2; |
||
5393 | XMVECTOR T3; |
||
5394 | |||
5395 | // Result = (2 * t^3 - 3 * t^2 + 1) * Position0 + |
||
5396 | // (t^3 - 2 * t^2 + t) * Tangent0 + |
||
5397 | // (-2 * t^3 + 3 * t^2) * Position1 + |
||
5398 | // (t^3 - t^2) * Tangent1 |
||
5399 | T2 = XMVectorMultiply(T, T); |
||
5400 | T3 = XMVectorMultiply(T , T2); |
||
5401 | |||
5402 | P0 = XMVectorReplicate(2.0f * T3.vector4_f32[0] - 3.0f * T2.vector4_f32[0] + 1.0f); |
||
5403 | T0 = XMVectorReplicate(T3.vector4_f32[1] - 2.0f * T2.vector4_f32[1] + T.vector4_f32[1]); |
||
5404 | P1 = XMVectorReplicate(-2.0f * T3.vector4_f32[2] + 3.0f * T2.vector4_f32[2]); |
||
5405 | T1 = XMVectorReplicate(T3.vector4_f32[3] - T2.vector4_f32[3]); |
||
5406 | |||
5407 | Result = XMVectorMultiply(P0, Position0); |
||
5408 | Result = XMVectorMultiplyAdd(T0, Tangent0, Result); |
||
5409 | Result = XMVectorMultiplyAdd(P1, Position1, Result); |
||
5410 | Result = XMVectorMultiplyAdd(T1, Tangent1, Result); |
||
5411 | |||
5412 | return Result; |
||
5413 | |||
5414 | #elif defined(_XM_SSE_INTRINSICS_) |
||
5415 | static const XMVECTORF32 CatMulT2 = {-3.0f,-2.0f,3.0f,-1.0f}; |
||
5416 | static const XMVECTORF32 CatMulT3 = {2.0f,1.0f,-2.0f,1.0f}; |
||
5417 | |||
5418 | // Result = (2 * t^3 - 3 * t^2 + 1) * Position0 + |
||
5419 | // (t^3 - 2 * t^2 + t) * Tangent0 + |
||
5420 | // (-2 * t^3 + 3 * t^2) * Position1 + |
||
5421 | // (t^3 - t^2) * Tangent1 |
||
5422 | XMVECTOR T2 = _mm_mul_ps(T,T); |
||
5423 | XMVECTOR T3 = _mm_mul_ps(T,T2); |
||
5424 | // Mul by the constants against t^2 |
||
5425 | T2 = _mm_mul_ps(T2,CatMulT2); |
||
5426 | // Mul by the constants against t^3 |
||
5427 | T3 = _mm_mul_ps(T3,CatMulT3); |
||
5428 | // T3 now has the pre-result. |
||
5429 | T3 = _mm_add_ps(T3,T2); |
||
5430 | // I need to add t.y only |
||
5431 | T2 = _mm_and_ps(T,g_XMMaskY); |
||
5432 | T3 = _mm_add_ps(T3,T2); |
||
5433 | // Add 1.0f to x |
||
5434 | T3 = _mm_add_ps(T3,g_XMIdentityR0); |
||
5435 | // Now, I have the constants created |
||
5436 | // Mul the x constant to Position0 |
||
5437 | XMVECTOR vResult = _mm_shuffle_ps(T3,T3,_MM_SHUFFLE(0,0,0,0)); |
||
5438 | vResult = _mm_mul_ps(vResult,Position0); |
||
5439 | // Mul the y constant to Tangent0 |
||
5440 | T2 = _mm_shuffle_ps(T3,T3,_MM_SHUFFLE(1,1,1,1)); |
||
5441 | T2 = _mm_mul_ps(T2,Tangent0); |
||
5442 | vResult = _mm_add_ps(vResult,T2); |
||
5443 | // Mul the z constant to Position1 |
||
5444 | T2 = _mm_shuffle_ps(T3,T3,_MM_SHUFFLE(2,2,2,2)); |
||
5445 | T2 = _mm_mul_ps(T2,Position1); |
||
5446 | vResult = _mm_add_ps(vResult,T2); |
||
5447 | // Mul the w constant to Tangent1 |
||
5448 | T3 = _mm_shuffle_ps(T3,T3,_MM_SHUFFLE(3,3,3,3)); |
||
5449 | T3 = _mm_mul_ps(T3,Tangent1); |
||
5450 | vResult = _mm_add_ps(vResult,T3); |
||
5451 | return vResult; |
||
5452 | #else // _XM_VMX128_INTRINSICS_ |
||
5453 | #endif // _XM_VMX128_INTRINSICS_ |
||
5454 | } |
||
5455 | |||
5456 | //------------------------------------------------------------------------------ |
||
5457 | |||
5458 | XMFINLINE XMVECTOR XMVectorCatmullRom |
||
5459 | ( |
||
5460 | FXMVECTOR Position0, |
||
5461 | FXMVECTOR Position1, |
||
5462 | FXMVECTOR Position2, |
||
5463 | CXMVECTOR Position3, |
||
5464 | FLOAT t |
||
5465 | ) |
||
5466 | { |
||
5467 | #if defined(_XM_NO_INTRINSICS_) |
||
5468 | |||
5469 | XMVECTOR P0; |
||
5470 | XMVECTOR P1; |
||
5471 | XMVECTOR P2; |
||
5472 | XMVECTOR P3; |
||
5473 | XMVECTOR Result; |
||
5474 | FLOAT t2; |
||
5475 | FLOAT t3; |
||
5476 | |||
5477 | // Result = ((-t^3 + 2 * t^2 - t) * Position0 + |
||
5478 | // (3 * t^3 - 5 * t^2 + 2) * Position1 + |
||
5479 | // (-3 * t^3 + 4 * t^2 + t) * Position2 + |
||
5480 | // (t^3 - t^2) * Position3) * 0.5 |
||
5481 | t2 = t * t; |
||
5482 | t3 = t * t2; |
||
5483 | |||
5484 | P0 = XMVectorReplicate((-t3 + 2.0f * t2 - t) * 0.5f); |
||
5485 | P1 = XMVectorReplicate((3.0f * t3 - 5.0f * t2 + 2.0f) * 0.5f); |
||
5486 | P2 = XMVectorReplicate((-3.0f * t3 + 4.0f * t2 + t) * 0.5f); |
||
5487 | P3 = XMVectorReplicate((t3 - t2) * 0.5f); |
||
5488 | |||
5489 | Result = XMVectorMultiply(P0, Position0); |
||
5490 | Result = XMVectorMultiplyAdd(P1, Position1, Result); |
||
5491 | Result = XMVectorMultiplyAdd(P2, Position2, Result); |
||
5492 | Result = XMVectorMultiplyAdd(P3, Position3, Result); |
||
5493 | |||
5494 | return Result; |
||
5495 | |||
5496 | #elif defined(_XM_SSE_INTRINSICS_) |
||
5497 | FLOAT t2 = t * t; |
||
5498 | FLOAT t3 = t * t2; |
||
5499 | |||
5500 | XMVECTOR P0 = _mm_set_ps1((-t3 + 2.0f * t2 - t) * 0.5f); |
||
5501 | XMVECTOR P1 = _mm_set_ps1((3.0f * t3 - 5.0f * t2 + 2.0f) * 0.5f); |
||
5502 | XMVECTOR P2 = _mm_set_ps1((-3.0f * t3 + 4.0f * t2 + t) * 0.5f); |
||
5503 | XMVECTOR P3 = _mm_set_ps1((t3 - t2) * 0.5f); |
||
5504 | |||
5505 | P0 = _mm_mul_ps(P0, Position0); |
||
5506 | P1 = _mm_mul_ps(P1, Position1); |
||
5507 | P2 = _mm_mul_ps(P2, Position2); |
||
5508 | P3 = _mm_mul_ps(P3, Position3); |
||
5509 | P0 = _mm_add_ps(P0,P1); |
||
5510 | P2 = _mm_add_ps(P2,P3); |
||
5511 | P0 = _mm_add_ps(P0,P2); |
||
5512 | return P0; |
||
5513 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
5514 | #endif // _XM_VMX128_INTRINSICS_ |
||
5515 | } |
||
5516 | |||
5517 | //------------------------------------------------------------------------------ |
||
5518 | |||
5519 | XMFINLINE XMVECTOR XMVectorCatmullRomV |
||
5520 | ( |
||
5521 | FXMVECTOR Position0, |
||
5522 | FXMVECTOR Position1, |
||
5523 | FXMVECTOR Position2, |
||
5524 | CXMVECTOR Position3, |
||
5525 | CXMVECTOR T |
||
5526 | ) |
||
5527 | { |
||
5528 | #if defined(_XM_NO_INTRINSICS_) |
||
5529 | float fx = T.vector4_f32[0]; |
||
5530 | float fy = T.vector4_f32[1]; |
||
5531 | float fz = T.vector4_f32[2]; |
||
5532 | float fw = T.vector4_f32[3]; |
||
5533 | XMVECTOR vResult = { |
||
5534 | 0.5f*((-fx*fx*fx+2*fx*fx-fx)*Position0.vector4_f32[0]+ |
||
5535 | (3*fx*fx*fx-5*fx*fx+2)*Position1.vector4_f32[0]+ |
||
5536 | (-3*fx*fx*fx+4*fx*fx+fx)*Position2.vector4_f32[0]+ |
||
5537 | (fx*fx*fx-fx*fx)*Position3.vector4_f32[0]), |
||
5538 | 0.5f*((-fy*fy*fy+2*fy*fy-fy)*Position0.vector4_f32[1]+ |
||
5539 | (3*fy*fy*fy-5*fy*fy+2)*Position1.vector4_f32[1]+ |
||
5540 | (-3*fy*fy*fy+4*fy*fy+fy)*Position2.vector4_f32[1]+ |
||
5541 | (fy*fy*fy-fy*fy)*Position3.vector4_f32[1]), |
||
5542 | 0.5f*((-fz*fz*fz+2*fz*fz-fz)*Position0.vector4_f32[2]+ |
||
5543 | (3*fz*fz*fz-5*fz*fz+2)*Position1.vector4_f32[2]+ |
||
5544 | (-3*fz*fz*fz+4*fz*fz+fz)*Position2.vector4_f32[2]+ |
||
5545 | (fz*fz*fz-fz*fz)*Position3.vector4_f32[2]), |
||
5546 | 0.5f*((-fw*fw*fw+2*fw*fw-fw)*Position0.vector4_f32[3]+ |
||
5547 | (3*fw*fw*fw-5*fw*fw+2)*Position1.vector4_f32[3]+ |
||
5548 | (-3*fw*fw*fw+4*fw*fw+fw)*Position2.vector4_f32[3]+ |
||
5549 | (fw*fw*fw-fw*fw)*Position3.vector4_f32[3]) |
||
5550 | }; |
||
5551 | return vResult; |
||
5552 | #elif defined(_XM_SSE_INTRINSICS_) |
||
5553 | static const XMVECTORF32 Catmul2 = {2.0f,2.0f,2.0f,2.0f}; |
||
5554 | static const XMVECTORF32 Catmul3 = {3.0f,3.0f,3.0f,3.0f}; |
||
5555 | static const XMVECTORF32 Catmul4 = {4.0f,4.0f,4.0f,4.0f}; |
||
5556 | static const XMVECTORF32 Catmul5 = {5.0f,5.0f,5.0f,5.0f}; |
||
5557 | // Cache T^2 and T^3 |
||
5558 | XMVECTOR T2 = _mm_mul_ps(T,T); |
||
5559 | XMVECTOR T3 = _mm_mul_ps(T,T2); |
||
5560 | // Perform the Position0 term |
||
5561 | XMVECTOR vResult = _mm_add_ps(T2,T2); |
||
5562 | vResult = _mm_sub_ps(vResult,T); |
||
5563 | vResult = _mm_sub_ps(vResult,T3); |
||
5564 | vResult = _mm_mul_ps(vResult,Position0); |
||
5565 | // Perform the Position1 term and add |
||
5566 | XMVECTOR vTemp = _mm_mul_ps(T3,Catmul3); |
||
5567 | XMVECTOR vTemp2 = _mm_mul_ps(T2,Catmul5); |
||
5568 | vTemp = _mm_sub_ps(vTemp,vTemp2); |
||
5569 | vTemp = _mm_add_ps(vTemp,Catmul2); |
||
5570 | vTemp = _mm_mul_ps(vTemp,Position1); |
||
5571 | vResult = _mm_add_ps(vResult,vTemp); |
||
5572 | // Perform the Position2 term and add |
||
5573 | vTemp = _mm_mul_ps(T2,Catmul4); |
||
5574 | vTemp2 = _mm_mul_ps(T3,Catmul3); |
||
5575 | vTemp = _mm_sub_ps(vTemp,vTemp2); |
||
5576 | vTemp = _mm_add_ps(vTemp,T); |
||
5577 | vTemp = _mm_mul_ps(vTemp,Position2); |
||
5578 | vResult = _mm_add_ps(vResult,vTemp); |
||
5579 | // Position3 is the last term |
||
5580 | T3 = _mm_sub_ps(T3,T2); |
||
5581 | T3 = _mm_mul_ps(T3,Position3); |
||
5582 | vResult = _mm_add_ps(vResult,T3); |
||
5583 | // Multiply by 0.5f and exit |
||
5584 | vResult = _mm_mul_ps(vResult,g_XMOneHalf); |
||
5585 | return vResult; |
||
5586 | #else // _XM_VMX128_INTRINSICS_ |
||
5587 | #endif // _XM_VMX128_INTRINSICS_ |
||
5588 | } |
||
5589 | |||
5590 | //------------------------------------------------------------------------------ |
||
5591 | |||
5592 | XMFINLINE XMVECTOR XMVectorBaryCentric |
||
5593 | ( |
||
5594 | FXMVECTOR Position0, |
||
5595 | FXMVECTOR Position1, |
||
5596 | FXMVECTOR Position2, |
||
5597 | FLOAT f, |
||
5598 | FLOAT g |
||
5599 | ) |
||
5600 | { |
||
5601 | #if defined(_XM_NO_INTRINSICS_) |
||
5602 | |||
5603 | // Result = Position0 + f * (Position1 - Position0) + g * (Position2 - Position0) |
||
5604 | XMVECTOR P10; |
||
5605 | XMVECTOR P20; |
||
5606 | XMVECTOR ScaleF; |
||
5607 | XMVECTOR ScaleG; |
||
5608 | XMVECTOR Result; |
||
5609 | |||
5610 | P10 = XMVectorSubtract(Position1, Position0); |
||
5611 | ScaleF = XMVectorReplicate(f); |
||
5612 | |||
5613 | P20 = XMVectorSubtract(Position2, Position0); |
||
5614 | ScaleG = XMVectorReplicate(g); |
||
5615 | |||
5616 | Result = XMVectorMultiplyAdd(P10, ScaleF, Position0); |
||
5617 | Result = XMVectorMultiplyAdd(P20, ScaleG, Result); |
||
5618 | |||
5619 | return Result; |
||
5620 | |||
5621 | #elif defined(_XM_SSE_INTRINSICS_) |
||
5622 | XMVECTOR R1 = _mm_sub_ps(Position1,Position0); |
||
5623 | XMVECTOR SF = _mm_set_ps1(f); |
||
5624 | XMVECTOR R2 = _mm_sub_ps(Position2,Position0); |
||
5625 | XMVECTOR SG = _mm_set_ps1(g); |
||
5626 | R1 = _mm_mul_ps(R1,SF); |
||
5627 | R2 = _mm_mul_ps(R2,SG); |
||
5628 | R1 = _mm_add_ps(R1,Position0); |
||
5629 | R1 = _mm_add_ps(R1,R2); |
||
5630 | return R1; |
||
5631 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
5632 | #endif // _XM_VMX128_INTRINSICS_ |
||
5633 | } |
||
5634 | |||
5635 | //------------------------------------------------------------------------------ |
||
5636 | |||
5637 | XMFINLINE XMVECTOR XMVectorBaryCentricV |
||
5638 | ( |
||
5639 | FXMVECTOR Position0, |
||
5640 | FXMVECTOR Position1, |
||
5641 | FXMVECTOR Position2, |
||
5642 | CXMVECTOR F, |
||
5643 | CXMVECTOR G |
||
5644 | ) |
||
5645 | { |
||
5646 | #if defined(_XM_NO_INTRINSICS_) |
||
5647 | |||
5648 | // Result = Position0 + f * (Position1 - Position0) + g * (Position2 - Position0) |
||
5649 | XMVECTOR P10; |
||
5650 | XMVECTOR P20; |
||
5651 | XMVECTOR Result; |
||
5652 | |||
5653 | P10 = XMVectorSubtract(Position1, Position0); |
||
5654 | P20 = XMVectorSubtract(Position2, Position0); |
||
5655 | |||
5656 | Result = XMVectorMultiplyAdd(P10, F, Position0); |
||
5657 | Result = XMVectorMultiplyAdd(P20, G, Result); |
||
5658 | |||
5659 | return Result; |
||
5660 | |||
5661 | #elif defined(_XM_SSE_INTRINSICS_) |
||
5662 | XMVECTOR R1 = _mm_sub_ps(Position1,Position0); |
||
5663 | XMVECTOR R2 = _mm_sub_ps(Position2,Position0); |
||
5664 | R1 = _mm_mul_ps(R1,F); |
||
5665 | R2 = _mm_mul_ps(R2,G); |
||
5666 | R1 = _mm_add_ps(R1,Position0); |
||
5667 | R1 = _mm_add_ps(R1,R2); |
||
5668 | return R1; |
||
5669 | #else // _XM_VMX128_INTRINSICS_ |
||
5670 | #endif // _XM_VMX128_INTRINSICS_ |
||
5671 | } |
||
5672 | |||
5673 | /**************************************************************************** |
||
5674 | * |
||
5675 | * 2D Vector |
||
5676 | * |
||
5677 | ****************************************************************************/ |
||
5678 | |||
5679 | //------------------------------------------------------------------------------ |
||
5680 | // Comparison operations |
||
5681 | //------------------------------------------------------------------------------ |
||
5682 | |||
5683 | //------------------------------------------------------------------------------ |
||
5684 | |||
5685 | XMFINLINE BOOL XMVector2Equal |
||
5686 | ( |
||
5687 | FXMVECTOR V1, |
||
5688 | FXMVECTOR V2 |
||
5689 | ) |
||
5690 | { |
||
5691 | #if defined(_XM_NO_INTRINSICS_) |
||
5692 | return (((V1.vector4_f32[0] == V2.vector4_f32[0]) && (V1.vector4_f32[1] == V2.vector4_f32[1])) != 0); |
||
5693 | #elif defined(_XM_SSE_INTRINSICS_) |
||
5694 | XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2); |
||
5695 | // z and w are don't care |
||
5696 | return (((_mm_movemask_ps(vTemp)&3)==3) != 0); |
||
5697 | #else // _XM_VMX128_INTRINSICS_ |
||
5698 | return XMComparisonAllTrue(XMVector2EqualR(V1, V2)); |
||
5699 | #endif |
||
5700 | } |
||
5701 | |||
5702 | |||
5703 | //------------------------------------------------------------------------------ |
||
5704 | |||
5705 | XMFINLINE UINT XMVector2EqualR |
||
5706 | ( |
||
5707 | FXMVECTOR V1, |
||
5708 | FXMVECTOR V2 |
||
5709 | ) |
||
5710 | { |
||
5711 | #if defined(_XM_NO_INTRINSICS_) |
||
5712 | |||
5713 | UINT CR = 0; |
||
5714 | |||
5715 | if ((V1.vector4_f32[0] == V2.vector4_f32[0]) && |
||
5716 | (V1.vector4_f32[1] == V2.vector4_f32[1])) |
||
5717 | { |
||
5718 | CR = XM_CRMASK_CR6TRUE; |
||
5719 | } |
||
5720 | else if ((V1.vector4_f32[0] != V2.vector4_f32[0]) && |
||
5721 | (V1.vector4_f32[1] != V2.vector4_f32[1])) |
||
5722 | { |
||
5723 | CR = XM_CRMASK_CR6FALSE; |
||
5724 | } |
||
5725 | return CR; |
||
5726 | #elif defined(_XM_SSE_INTRINSICS_) |
||
5727 | XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2); |
||
5728 | // z and w are don't care |
||
5729 | int iTest = _mm_movemask_ps(vTemp)&3; |
||
5730 | UINT CR = 0; |
||
5731 | if (iTest==3) |
||
5732 | { |
||
5733 | CR = XM_CRMASK_CR6TRUE; |
||
5734 | } |
||
5735 | else if (!iTest) |
||
5736 | { |
||
5737 | CR = XM_CRMASK_CR6FALSE; |
||
5738 | } |
||
5739 | return CR; |
||
5740 | #else // _XM_VMX128_INTRINSICS_ |
||
5741 | #endif // _XM_VMX128_INTRINSICS_ |
||
5742 | } |
||
5743 | |||
5744 | //------------------------------------------------------------------------------ |
||
5745 | |||
5746 | XMFINLINE BOOL XMVector2EqualInt |
||
5747 | ( |
||
5748 | FXMVECTOR V1, |
||
5749 | FXMVECTOR V2 |
||
5750 | ) |
||
5751 | { |
||
5752 | #if defined(_XM_NO_INTRINSICS_) |
||
5753 | return (((V1.vector4_u32[0] == V2.vector4_u32[0]) && (V1.vector4_u32[1] == V2.vector4_u32[1])) != 0); |
||
5754 | #elif defined(_XM_SSE_INTRINSICS_) |
||
5755 | __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]); |
||
5756 | return (((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&3)==3) != 0); |
||
5757 | #else // _XM_VMX128_INTRINSICS_ |
||
5758 | return XMComparisonAllTrue(XMVector2EqualIntR(V1, V2)); |
||
5759 | #endif |
||
5760 | } |
||
5761 | |||
5762 | //------------------------------------------------------------------------------ |
||
5763 | |||
5764 | XMFINLINE UINT XMVector2EqualIntR |
||
5765 | ( |
||
5766 | FXMVECTOR V1, |
||
5767 | FXMVECTOR V2 |
||
5768 | ) |
||
5769 | { |
||
5770 | #if defined(_XM_NO_INTRINSICS_) |
||
5771 | |||
5772 | UINT CR = 0; |
||
5773 | if ((V1.vector4_u32[0] == V2.vector4_u32[0]) && |
||
5774 | (V1.vector4_u32[1] == V2.vector4_u32[1])) |
||
5775 | { |
||
5776 | CR = XM_CRMASK_CR6TRUE; |
||
5777 | } |
||
5778 | else if ((V1.vector4_u32[0] != V2.vector4_u32[0]) && |
||
5779 | (V1.vector4_u32[1] != V2.vector4_u32[1])) |
||
5780 | { |
||
5781 | CR = XM_CRMASK_CR6FALSE; |
||
5782 | } |
||
5783 | return CR; |
||
5784 | |||
5785 | #elif defined(_XM_SSE_INTRINSICS_) |
||
5786 | __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]); |
||
5787 | int iTest = _mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&3; |
||
5788 | UINT CR = 0; |
||
5789 | if (iTest==3) |
||
5790 | { |
||
5791 | CR = XM_CRMASK_CR6TRUE; |
||
5792 | } |
||
5793 | else if (!iTest) |
||
5794 | { |
||
5795 | CR = XM_CRMASK_CR6FALSE; |
||
5796 | } |
||
5797 | return CR; |
||
5798 | #else // _XM_VMX128_INTRINSICS_ |
||
5799 | #endif // _XM_VMX128_INTRINSICS_ |
||
5800 | } |
||
5801 | |||
5802 | //------------------------------------------------------------------------------ |
||
5803 | |||
5804 | XMFINLINE BOOL XMVector2NearEqual |
||
5805 | ( |
||
5806 | FXMVECTOR V1, |
||
5807 | FXMVECTOR V2, |
||
5808 | FXMVECTOR Epsilon |
||
5809 | ) |
||
5810 | { |
||
5811 | #if defined(_XM_NO_INTRINSICS_) |
||
5812 | FLOAT dx, dy; |
||
5813 | dx = fabsf(V1.vector4_f32[0]-V2.vector4_f32[0]); |
||
5814 | dy = fabsf(V1.vector4_f32[1]-V2.vector4_f32[1]); |
||
5815 | return ((dx <= Epsilon.vector4_f32[0]) && |
||
5816 | (dy <= Epsilon.vector4_f32[1])); |
||
5817 | #elif defined(_XM_SSE_INTRINSICS_) |
||
5818 | // Get the difference |
||
5819 | XMVECTOR vDelta = _mm_sub_ps(V1,V2); |
||
5820 | // Get the absolute value of the difference |
||
5821 | XMVECTOR vTemp = _mm_setzero_ps(); |
||
5822 | vTemp = _mm_sub_ps(vTemp,vDelta); |
||
5823 | vTemp = _mm_max_ps(vTemp,vDelta); |
||
5824 | vTemp = _mm_cmple_ps(vTemp,Epsilon); |
||
5825 | // z and w are don't care |
||
5826 | return (((_mm_movemask_ps(vTemp)&3)==0x3) != 0); |
||
5827 | #else // _XM_VMX128_INTRINSICS_ |
||
5828 | #endif // _XM_VMX128_INTRINSICS_ |
||
5829 | } |
||
5830 | |||
5831 | //------------------------------------------------------------------------------ |
||
5832 | |||
5833 | XMFINLINE BOOL XMVector2NotEqual |
||
5834 | ( |
||
5835 | FXMVECTOR V1, |
||
5836 | FXMVECTOR V2 |
||
5837 | ) |
||
5838 | { |
||
5839 | #if defined(_XM_NO_INTRINSICS_) |
||
5840 | return (((V1.vector4_f32[0] != V2.vector4_f32[0]) || (V1.vector4_f32[1] != V2.vector4_f32[1])) != 0); |
||
5841 | #elif defined(_XM_SSE_INTRINSICS_) |
||
5842 | XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2); |
||
5843 | // z and w are don't care |
||
5844 | return (((_mm_movemask_ps(vTemp)&3)!=3) != 0); |
||
5845 | #else // _XM_VMX128_INTRINSICS_ |
||
5846 | return XMComparisonAnyFalse(XMVector2EqualR(V1, V2)); |
||
5847 | #endif |
||
5848 | } |
||
5849 | |||
5850 | //------------------------------------------------------------------------------ |
||
5851 | |||
5852 | XMFINLINE BOOL XMVector2NotEqualInt |
||
5853 | ( |
||
5854 | FXMVECTOR V1, |
||
5855 | FXMVECTOR V2 |
||
5856 | ) |
||
5857 | { |
||
5858 | #if defined(_XM_NO_INTRINSICS_) |
||
5859 | return (((V1.vector4_u32[0] != V2.vector4_u32[0]) || (V1.vector4_u32[1] != V2.vector4_u32[1])) != 0); |
||
5860 | #elif defined(_XM_SSE_INTRINSICS_) |
||
5861 | __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]); |
||
5862 | return (((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&3)!=3) != 0); |
||
5863 | #else // _XM_VMX128_INTRINSICS_ |
||
5864 | return XMComparisonAnyFalse(XMVector2EqualIntR(V1, V2)); |
||
5865 | #endif |
||
5866 | } |
||
5867 | |||
5868 | //------------------------------------------------------------------------------ |
||
5869 | |||
5870 | XMFINLINE BOOL XMVector2Greater |
||
5871 | ( |
||
5872 | FXMVECTOR V1, |
||
5873 | FXMVECTOR V2 |
||
5874 | ) |
||
5875 | { |
||
5876 | #if defined(_XM_NO_INTRINSICS_) |
||
5877 | return (((V1.vector4_f32[0] > V2.vector4_f32[0]) && (V1.vector4_f32[1] > V2.vector4_f32[1])) != 0); |
||
5878 | |||
5879 | #elif defined(_XM_SSE_INTRINSICS_) |
||
5880 | XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2); |
||
5881 | // z and w are don't care |
||
5882 | return (((_mm_movemask_ps(vTemp)&3)==3) != 0); |
||
5883 | #else // _XM_VMX128_INTRINSICS_ |
||
5884 | return XMComparisonAllTrue(XMVector2GreaterR(V1, V2)); |
||
5885 | #endif |
||
5886 | } |
||
5887 | |||
5888 | //------------------------------------------------------------------------------ |
||
5889 | |||
5890 | XMFINLINE UINT XMVector2GreaterR |
||
5891 | ( |
||
5892 | FXMVECTOR V1, |
||
5893 | FXMVECTOR V2 |
||
5894 | ) |
||
5895 | { |
||
5896 | #if defined(_XM_NO_INTRINSICS_) |
||
5897 | |||
5898 | UINT CR = 0; |
||
5899 | if ((V1.vector4_f32[0] > V2.vector4_f32[0]) && |
||
5900 | (V1.vector4_f32[1] > V2.vector4_f32[1])) |
||
5901 | { |
||
5902 | CR = XM_CRMASK_CR6TRUE; |
||
5903 | } |
||
5904 | else if ((V1.vector4_f32[0] <= V2.vector4_f32[0]) && |
||
5905 | (V1.vector4_f32[1] <= V2.vector4_f32[1])) |
||
5906 | { |
||
5907 | CR = XM_CRMASK_CR6FALSE; |
||
5908 | } |
||
5909 | return CR; |
||
5910 | #elif defined(_XM_SSE_INTRINSICS_) |
||
5911 | XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2); |
||
5912 | int iTest = _mm_movemask_ps(vTemp)&3; |
||
5913 | UINT CR = 0; |
||
5914 | if (iTest==3) |
||
5915 | { |
||
5916 | CR = XM_CRMASK_CR6TRUE; |
||
5917 | } |
||
5918 | else if (!iTest) |
||
5919 | { |
||
5920 | CR = XM_CRMASK_CR6FALSE; |
||
5921 | } |
||
5922 | return CR; |
||
5923 | #else // _XM_VMX128_INTRINSICS_ |
||
5924 | #endif // _XM_VMX128_INTRINSICS_ |
||
5925 | } |
||
5926 | |||
5927 | //------------------------------------------------------------------------------ |
||
5928 | |||
5929 | XMFINLINE BOOL XMVector2GreaterOrEqual |
||
5930 | ( |
||
5931 | FXMVECTOR V1, |
||
5932 | FXMVECTOR V2 |
||
5933 | ) |
||
5934 | { |
||
5935 | #if defined(_XM_NO_INTRINSICS_) |
||
5936 | return (((V1.vector4_f32[0] >= V2.vector4_f32[0]) && (V1.vector4_f32[1] >= V2.vector4_f32[1])) != 0); |
||
5937 | #elif defined(_XM_SSE_INTRINSICS_) |
||
5938 | XMVECTOR vTemp = _mm_cmpge_ps(V1,V2); |
||
5939 | return (((_mm_movemask_ps(vTemp)&3)==3) != 0); |
||
5940 | #else // _XM_VMX128_INTRINSICS_ |
||
5941 | return XMComparisonAllTrue(XMVector2GreaterOrEqualR(V1, V2)); |
||
5942 | #endif |
||
5943 | } |
||
5944 | |||
5945 | //------------------------------------------------------------------------------ |
||
5946 | |||
5947 | XMFINLINE UINT XMVector2GreaterOrEqualR |
||
5948 | ( |
||
5949 | FXMVECTOR V1, |
||
5950 | FXMVECTOR V2 |
||
5951 | ) |
||
5952 | { |
||
5953 | #if defined(_XM_NO_INTRINSICS_) |
||
5954 | UINT CR = 0; |
||
5955 | if ((V1.vector4_f32[0] >= V2.vector4_f32[0]) && |
||
5956 | (V1.vector4_f32[1] >= V2.vector4_f32[1])) |
||
5957 | { |
||
5958 | CR = XM_CRMASK_CR6TRUE; |
||
5959 | } |
||
5960 | else if ((V1.vector4_f32[0] < V2.vector4_f32[0]) && |
||
5961 | (V1.vector4_f32[1] < V2.vector4_f32[1])) |
||
5962 | { |
||
5963 | CR = XM_CRMASK_CR6FALSE; |
||
5964 | } |
||
5965 | return CR; |
||
5966 | |||
5967 | #elif defined(_XM_SSE_INTRINSICS_) |
||
5968 | XMVECTOR vTemp = _mm_cmpge_ps(V1,V2); |
||
5969 | int iTest = _mm_movemask_ps(vTemp)&3; |
||
5970 | UINT CR = 0; |
||
5971 | if (iTest == 3) |
||
5972 | { |
||
5973 | CR = XM_CRMASK_CR6TRUE; |
||
5974 | } |
||
5975 | else if (!iTest) |
||
5976 | { |
||
5977 | CR = XM_CRMASK_CR6FALSE; |
||
5978 | } |
||
5979 | return CR; |
||
5980 | #else // _XM_VMX128_INTRINSICS_ |
||
5981 | #endif // _XM_VMX128_INTRINSICS_ |
||
5982 | } |
||
5983 | |||
5984 | //------------------------------------------------------------------------------ |
||
5985 | |||
5986 | XMFINLINE BOOL XMVector2Less |
||
5987 | ( |
||
5988 | FXMVECTOR V1, |
||
5989 | FXMVECTOR V2 |
||
5990 | ) |
||
5991 | { |
||
5992 | #if defined(_XM_NO_INTRINSICS_) |
||
5993 | return (((V1.vector4_f32[0] < V2.vector4_f32[0]) && (V1.vector4_f32[1] < V2.vector4_f32[1])) != 0); |
||
5994 | #elif defined(_XM_SSE_INTRINSICS_) |
||
5995 | XMVECTOR vTemp = _mm_cmplt_ps(V1,V2); |
||
5996 | return (((_mm_movemask_ps(vTemp)&3)==3) != 0); |
||
5997 | #else // _XM_VMX128_INTRINSICS_ |
||
5998 | return XMComparisonAllTrue(XMVector2GreaterR(V2, V1)); |
||
5999 | #endif |
||
6000 | } |
||
6001 | |||
6002 | //------------------------------------------------------------------------------ |
||
6003 | |||
6004 | XMFINLINE BOOL XMVector2LessOrEqual |
||
6005 | ( |
||
6006 | FXMVECTOR V1, |
||
6007 | FXMVECTOR V2 |
||
6008 | ) |
||
6009 | { |
||
6010 | #if defined(_XM_NO_INTRINSICS_) |
||
6011 | return (((V1.vector4_f32[0] <= V2.vector4_f32[0]) && (V1.vector4_f32[1] <= V2.vector4_f32[1])) != 0); |
||
6012 | #elif defined(_XM_SSE_INTRINSICS_) |
||
6013 | XMVECTOR vTemp = _mm_cmple_ps(V1,V2); |
||
6014 | return (((_mm_movemask_ps(vTemp)&3)==3) != 0); |
||
6015 | #else // _XM_VMX128_INTRINSICS_ |
||
6016 | return XMComparisonAllTrue(XMVector2GreaterOrEqualR(V2, V1)); |
||
6017 | #endif |
||
6018 | } |
||
6019 | |||
6020 | //------------------------------------------------------------------------------ |
||
6021 | |||
6022 | XMFINLINE BOOL XMVector2InBounds |
||
6023 | ( |
||
6024 | FXMVECTOR V, |
||
6025 | FXMVECTOR Bounds |
||
6026 | ) |
||
6027 | { |
||
6028 | #if defined(_XM_NO_INTRINSICS_) |
||
6029 | return (((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) && |
||
6030 | (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1])) != 0); |
||
6031 | #elif defined(_XM_SSE_INTRINSICS_) |
||
6032 | // Test if less than or equal |
||
6033 | XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds); |
||
6034 | // Negate the bounds |
||
6035 | XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne); |
||
6036 | // Test if greater or equal (Reversed) |
||
6037 | vTemp2 = _mm_cmple_ps(vTemp2,V); |
||
6038 | // Blend answers |
||
6039 | vTemp1 = _mm_and_ps(vTemp1,vTemp2); |
||
6040 | // x and y in bounds? (z and w are don't care) |
||
6041 | return (((_mm_movemask_ps(vTemp1)&0x3)==0x3) != 0); |
||
6042 | #else // _XM_VMX128_INTRINSICS_ |
||
6043 | return XMComparisonAllInBounds(XMVector2InBoundsR(V, Bounds)); |
||
6044 | #endif |
||
6045 | } |
||
6046 | |||
6047 | //------------------------------------------------------------------------------ |
||
6048 | |||
6049 | XMFINLINE UINT XMVector2InBoundsR |
||
6050 | ( |
||
6051 | FXMVECTOR V, |
||
6052 | FXMVECTOR Bounds |
||
6053 | ) |
||
6054 | { |
||
6055 | #if defined(_XM_NO_INTRINSICS_) |
||
6056 | UINT CR = 0; |
||
6057 | if ((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) && |
||
6058 | (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1])) |
||
6059 | { |
||
6060 | CR = XM_CRMASK_CR6BOUNDS; |
||
6061 | } |
||
6062 | return CR; |
||
6063 | |||
6064 | #elif defined(_XM_SSE_INTRINSICS_) |
||
6065 | // Test if less than or equal |
||
6066 | XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds); |
||
6067 | // Negate the bounds |
||
6068 | XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne); |
||
6069 | // Test if greater or equal (Reversed) |
||
6070 | vTemp2 = _mm_cmple_ps(vTemp2,V); |
||
6071 | // Blend answers |
||
6072 | vTemp1 = _mm_and_ps(vTemp1,vTemp2); |
||
6073 | // x and y in bounds? (z and w are don't care) |
||
6074 | return ((_mm_movemask_ps(vTemp1)&0x3)==0x3) ? XM_CRMASK_CR6BOUNDS : 0; |
||
6075 | #else // _XM_VMX128_INTRINSICS_ |
||
6076 | #endif // _XM_VMX128_INTRINSICS_ |
||
6077 | } |
||
6078 | |||
6079 | //------------------------------------------------------------------------------ |
||
6080 | |||
6081 | XMFINLINE BOOL XMVector2IsNaN |
||
6082 | ( |
||
6083 | FXMVECTOR V |
||
6084 | ) |
||
6085 | { |
||
6086 | #if defined(_XM_NO_INTRINSICS_) |
||
6087 | return (XMISNAN(V.vector4_f32[0]) || |
||
6088 | XMISNAN(V.vector4_f32[1])); |
||
6089 | #elif defined(_XM_SSE_INTRINSICS_) |
||
6090 | // Mask off the exponent |
||
6091 | __m128i vTempInf = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMInfinity); |
||
6092 | // Mask off the mantissa |
||
6093 | __m128i vTempNan = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMQNaNTest); |
||
6094 | // Are any of the exponents == 0x7F800000? |
||
6095 | vTempInf = _mm_cmpeq_epi32(vTempInf,g_XMInfinity); |
||
6096 | // Are any of the mantissa's zero? (SSE2 doesn't have a neq test) |
||
6097 | vTempNan = _mm_cmpeq_epi32(vTempNan,g_XMZero); |
||
6098 | // Perform a not on the NaN test to be true on NON-zero mantissas |
||
6099 | vTempNan = _mm_andnot_si128(vTempNan,vTempInf); |
||
6100 | // If x or y are NaN, the signs are true after the merge above |
||
6101 | return ((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTempNan)[0])&3) != 0); |
||
6102 | #else // _XM_VMX128_INTRINSICS_ |
||
6103 | #endif // _XM_VMX128_INTRINSICS_ |
||
6104 | } |
||
6105 | |||
6106 | //------------------------------------------------------------------------------ |
||
6107 | |||
6108 | XMFINLINE BOOL XMVector2IsInfinite |
||
6109 | ( |
||
6110 | FXMVECTOR V |
||
6111 | ) |
||
6112 | { |
||
6113 | #if defined(_XM_NO_INTRINSICS_) |
||
6114 | |||
6115 | return (XMISINF(V.vector4_f32[0]) || |
||
6116 | XMISINF(V.vector4_f32[1])); |
||
6117 | #elif defined(_XM_SSE_INTRINSICS_) |
||
6118 | // Mask off the sign bit |
||
6119 | __m128 vTemp = _mm_and_ps(V,g_XMAbsMask); |
||
6120 | // Compare to infinity |
||
6121 | vTemp = _mm_cmpeq_ps(vTemp,g_XMInfinity); |
||
6122 | // If x or z are infinity, the signs are true. |
||
6123 | return ((_mm_movemask_ps(vTemp)&3) != 0); |
||
6124 | #else // _XM_VMX128_INTRINSICS_ |
||
6125 | #endif // _XM_VMX128_INTRINSICS_ |
||
6126 | } |
||
6127 | |||
6128 | //------------------------------------------------------------------------------ |
||
6129 | // Computation operations |
||
6130 | //------------------------------------------------------------------------------ |
||
6131 | |||
6132 | //------------------------------------------------------------------------------ |
||
6133 | |||
6134 | XMFINLINE XMVECTOR XMVector2Dot |
||
6135 | ( |
||
6136 | FXMVECTOR V1, |
||
6137 | FXMVECTOR V2 |
||
6138 | ) |
||
6139 | { |
||
6140 | #if defined(_XM_NO_INTRINSICS_) |
||
6141 | |||
6142 | XMVECTOR Result; |
||
6143 | |||
6144 | Result.vector4_f32[0] = |
||
6145 | Result.vector4_f32[1] = |
||
6146 | Result.vector4_f32[2] = |
||
6147 | Result.vector4_f32[3] = V1.vector4_f32[0] * V2.vector4_f32[0] + V1.vector4_f32[1] * V2.vector4_f32[1]; |
||
6148 | |||
6149 | return Result; |
||
6150 | |||
6151 | #elif defined(_XM_SSE_INTRINSICS_) |
||
6152 | // Perform the dot product on x and y |
||
6153 | XMVECTOR vLengthSq = _mm_mul_ps(V1,V2); |
||
6154 | // vTemp has y splatted |
||
6155 | XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1)); |
||
6156 | // x+y |
||
6157 | vLengthSq = _mm_add_ss(vLengthSq,vTemp); |
||
6158 | vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0)); |
||
6159 | return vLengthSq; |
||
6160 | #else // _XM_VMX128_INTRINSICS_ |
||
6161 | #endif // _XM_VMX128_INTRINSICS_ |
||
6162 | } |
||
6163 | |||
6164 | //------------------------------------------------------------------------------ |
||
6165 | |||
6166 | XMFINLINE XMVECTOR XMVector2Cross |
||
6167 | ( |
||
6168 | FXMVECTOR V1, |
||
6169 | FXMVECTOR V2 |
||
6170 | ) |
||
6171 | { |
||
6172 | #if defined(_XM_NO_INTRINSICS_) |
||
6173 | FLOAT fCross = (V1.vector4_f32[0] * V2.vector4_f32[1]) - (V1.vector4_f32[1] * V2.vector4_f32[0]); |
||
6174 | XMVECTOR vResult = { |
||
6175 | fCross, |
||
6176 | fCross, |
||
6177 | fCross, |
||
6178 | fCross |
||
6179 | }; |
||
6180 | return vResult; |
||
6181 | #elif defined(_XM_SSE_INTRINSICS_) |
||
6182 | // Swap x and y |
||
6183 | XMVECTOR vResult = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(0,1,0,1)); |
||
6184 | // Perform the muls |
||
6185 | vResult = _mm_mul_ps(vResult,V1); |
||
6186 | // Splat y |
||
6187 | XMVECTOR vTemp = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(1,1,1,1)); |
||
6188 | // Sub the values |
||
6189 | vResult = _mm_sub_ss(vResult,vTemp); |
||
6190 | // Splat the cross product |
||
6191 | vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,0,0,0)); |
||
6192 | return vResult; |
||
6193 | #else // _XM_VMX128_INTRINSICS_ |
||
6194 | #endif // _XM_VMX128_INTRINSICS_ |
||
6195 | } |
||
6196 | |||
6197 | //------------------------------------------------------------------------------ |
||
6198 | |||
6199 | XMFINLINE XMVECTOR XMVector2LengthSq |
||
6200 | ( |
||
6201 | FXMVECTOR V |
||
6202 | ) |
||
6203 | { |
||
6204 | #if defined(_XM_NO_INTRINSICS_) |
||
6205 | return XMVector2Dot(V, V); |
||
6206 | #elif defined(_XM_SSE_INTRINSICS_) |
||
6207 | // Perform the dot product on x and y |
||
6208 | XMVECTOR vLengthSq = _mm_mul_ps(V,V); |
||
6209 | // vTemp has y splatted |
||
6210 | XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1)); |
||
6211 | // x+y |
||
6212 | vLengthSq = _mm_add_ss(vLengthSq,vTemp); |
||
6213 | vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0)); |
||
6214 | return vLengthSq; |
||
6215 | #else |
||
6216 | return XMVector2Dot(V, V); |
||
6217 | #endif |
||
6218 | } |
||
6219 | |||
6220 | //------------------------------------------------------------------------------ |
||
6221 | |||
6222 | XMFINLINE XMVECTOR XMVector2ReciprocalLengthEst |
||
6223 | ( |
||
6224 | FXMVECTOR V |
||
6225 | ) |
||
6226 | { |
||
6227 | #if defined(_XM_NO_INTRINSICS_) |
||
6228 | |||
6229 | XMVECTOR Result; |
||
6230 | |||
6231 | Result = XMVector2LengthSq(V); |
||
6232 | Result = XMVectorReciprocalSqrtEst(Result); |
||
6233 | |||
6234 | return Result; |
||
6235 | |||
6236 | #elif defined(_XM_SSE_INTRINSICS_) |
||
6237 | // Perform the dot product on x and y |
||
6238 | XMVECTOR vLengthSq = _mm_mul_ps(V,V); |
||
6239 | // vTemp has y splatted |
||
6240 | XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1)); |
||
6241 | // x+y |
||
6242 | vLengthSq = _mm_add_ss(vLengthSq,vTemp); |
||
6243 | vLengthSq = _mm_rsqrt_ss(vLengthSq); |
||
6244 | vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0)); |
||
6245 | return vLengthSq; |
||
6246 | #else // _XM_VMX128_INTRINSICS_ |
||
6247 | #endif // _XM_VMX128_INTRINSICS_ |
||
6248 | } |
||
6249 | |||
6250 | //------------------------------------------------------------------------------ |
||
6251 | |||
6252 | XMFINLINE XMVECTOR XMVector2ReciprocalLength |
||
6253 | ( |
||
6254 | FXMVECTOR V |
||
6255 | ) |
||
6256 | { |
||
6257 | #if defined(_XM_NO_INTRINSICS_) |
||
6258 | |||
6259 | XMVECTOR Result; |
||
6260 | |||
6261 | Result = XMVector2LengthSq(V); |
||
6262 | Result = XMVectorReciprocalSqrt(Result); |
||
6263 | |||
6264 | return Result; |
||
6265 | |||
6266 | #elif defined(_XM_SSE_INTRINSICS_) |
||
6267 | // Perform the dot product on x and y |
||
6268 | XMVECTOR vLengthSq = _mm_mul_ps(V,V); |
||
6269 | // vTemp has y splatted |
||
6270 | XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1)); |
||
6271 | // x+y |
||
6272 | vLengthSq = _mm_add_ss(vLengthSq,vTemp); |
||
6273 | vLengthSq = _mm_sqrt_ss(vLengthSq); |
||
6274 | vLengthSq = _mm_div_ss(g_XMOne,vLengthSq); |
||
6275 | vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0)); |
||
6276 | return vLengthSq; |
||
6277 | #else // _XM_VMX128_INTRINSICS_ |
||
6278 | #endif // _XM_VMX128_INTRINSICS_ |
||
6279 | } |
||
6280 | |||
6281 | //------------------------------------------------------------------------------ |
||
6282 | |||
6283 | XMFINLINE XMVECTOR XMVector2LengthEst |
||
6284 | ( |
||
6285 | FXMVECTOR V |
||
6286 | ) |
||
6287 | { |
||
6288 | #if defined(_XM_NO_INTRINSICS_) |
||
6289 | XMVECTOR Result; |
||
6290 | Result = XMVector2LengthSq(V); |
||
6291 | Result = XMVectorSqrtEst(Result); |
||
6292 | return Result; |
||
6293 | #elif defined(_XM_SSE_INTRINSICS_) |
||
6294 | // Perform the dot product on x and y |
||
6295 | XMVECTOR vLengthSq = _mm_mul_ps(V,V); |
||
6296 | // vTemp has y splatted |
||
6297 | XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1)); |
||
6298 | // x+y |
||
6299 | vLengthSq = _mm_add_ss(vLengthSq,vTemp); |
||
6300 | vLengthSq = _mm_sqrt_ss(vLengthSq); |
||
6301 | vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0)); |
||
6302 | return vLengthSq; |
||
6303 | #else // _XM_VMX128_INTRINSICS_ |
||
6304 | #endif // _XM_VMX128_INTRINSICS_ |
||
6305 | } |
||
6306 | |||
6307 | //------------------------------------------------------------------------------ |
||
6308 | |||
6309 | XMFINLINE XMVECTOR XMVector2Length |
||
6310 | ( |
||
6311 | FXMVECTOR V |
||
6312 | ) |
||
6313 | { |
||
6314 | #if defined(_XM_NO_INTRINSICS_) |
||
6315 | |||
6316 | XMVECTOR Result; |
||
6317 | Result = XMVector2LengthSq(V); |
||
6318 | Result = XMVectorSqrt(Result); |
||
6319 | return Result; |
||
6320 | |||
6321 | #elif defined(_XM_SSE_INTRINSICS_) |
||
6322 | // Perform the dot product on x and y |
||
6323 | XMVECTOR vLengthSq = _mm_mul_ps(V,V); |
||
6324 | // vTemp has y splatted |
||
6325 | XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1)); |
||
6326 | // x+y |
||
6327 | vLengthSq = _mm_add_ss(vLengthSq,vTemp); |
||
6328 | vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0)); |
||
6329 | vLengthSq = _mm_sqrt_ps(vLengthSq); |
||
6330 | return vLengthSq; |
||
6331 | #else // _XM_VMX128_INTRINSICS_ |
||
6332 | #endif // _XM_VMX128_INTRINSICS_ |
||
6333 | } |
||
6334 | |||
6335 | //------------------------------------------------------------------------------ |
||
6336 | // XMVector2NormalizeEst uses a reciprocal estimate and |
||
6337 | // returns QNaN on zero and infinite vectors. |
||
6338 | |||
6339 | XMFINLINE XMVECTOR XMVector2NormalizeEst |
||
6340 | ( |
||
6341 | FXMVECTOR V |
||
6342 | ) |
||
6343 | { |
||
6344 | #if defined(_XM_NO_INTRINSICS_) |
||
6345 | |||
6346 | XMVECTOR Result; |
||
6347 | Result = XMVector2ReciprocalLength(V); |
||
6348 | Result = XMVectorMultiply(V, Result); |
||
6349 | return Result; |
||
6350 | |||
6351 | #elif defined(_XM_SSE_INTRINSICS_) |
||
6352 | // Perform the dot product on x and y |
||
6353 | XMVECTOR vLengthSq = _mm_mul_ps(V,V); |
||
6354 | // vTemp has y splatted |
||
6355 | XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1)); |
||
6356 | // x+y |
||
6357 | vLengthSq = _mm_add_ss(vLengthSq,vTemp); |
||
6358 | vLengthSq = _mm_rsqrt_ss(vLengthSq); |
||
6359 | vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0)); |
||
6360 | vLengthSq = _mm_mul_ps(vLengthSq,V); |
||
6361 | return vLengthSq; |
||
6362 | #else // _XM_VMX128_INTRINSICS_ |
||
6363 | #endif // _XM_VMX128_INTRINSICS_ |
||
6364 | } |
||
6365 | |||
6366 | //------------------------------------------------------------------------------ |
||
6367 | |||
6368 | XMFINLINE XMVECTOR XMVector2Normalize |
||
6369 | ( |
||
6370 | FXMVECTOR V |
||
6371 | ) |
||
6372 | { |
||
6373 | #if defined(_XM_NO_INTRINSICS_) |
||
6374 | |||
6375 | XMVECTOR LengthSq; |
||
6376 | XMVECTOR Zero; |
||
6377 | XMVECTOR InfiniteLength; |
||
6378 | XMVECTOR ZeroLength; |
||
6379 | XMVECTOR Select; |
||
6380 | XMVECTOR Result; |
||
6381 | |||
6382 | LengthSq = XMVector2LengthSq(V); |
||
6383 | Zero = XMVectorZero(); |
||
6384 | Result = XMVectorReciprocalSqrt(LengthSq); |
||
6385 | InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity.v); |
||
6386 | ZeroLength = XMVectorEqual(LengthSq, Zero); |
||
6387 | Result = XMVectorMultiply(V, Result); |
||
6388 | Select = XMVectorEqualInt(InfiniteLength, ZeroLength); |
||
6389 | Result = XMVectorSelect(LengthSq, Result, Select); |
||
6390 | |||
6391 | return Result; |
||
6392 | |||
6393 | #elif defined(_XM_SSE_INTRINSICS_) |
||
6394 | // Perform the dot product on x and y only |
||
6395 | XMVECTOR vLengthSq = _mm_mul_ps(V,V); |
||
6396 | XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1)); |
||
6397 | vLengthSq = _mm_add_ss(vLengthSq,vTemp); |
||
6398 | vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0)); |
||
6399 | // Prepare for the division |
||
6400 | XMVECTOR vResult = _mm_sqrt_ps(vLengthSq); |
||
6401 | // Failsafe on zero (Or epsilon) length planes |
||
6402 | // If the length is infinity, set the elements to zero |
||
6403 | vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity); |
||
6404 | // Reciprocal mul to perform the normalization |
||
6405 | vResult = _mm_div_ps(V,vResult); |
||
6406 | // Any that are infinity, set to zero |
||
6407 | vResult = _mm_and_ps(vResult,vLengthSq); |
||
6408 | return vResult; |
||
6409 | #else // _XM_VMX128_INTRINSICS_ |
||
6410 | #endif // _XM_VMX128_INTRINSICS_ |
||
6411 | } |
||
6412 | |||
6413 | //------------------------------------------------------------------------------ |
||
6414 | |||
6415 | XMFINLINE XMVECTOR XMVector2ClampLength |
||
6416 | ( |
||
6417 | FXMVECTOR V, |
||
6418 | FLOAT LengthMin, |
||
6419 | FLOAT LengthMax |
||
6420 | ) |
||
6421 | { |
||
6422 | #if defined(_XM_NO_INTRINSICS_) |
||
6423 | |||
6424 | XMVECTOR ClampMax; |
||
6425 | XMVECTOR ClampMin; |
||
6426 | |||
6427 | ClampMax = XMVectorReplicate(LengthMax); |
||
6428 | ClampMin = XMVectorReplicate(LengthMin); |
||
6429 | |||
6430 | return XMVector2ClampLengthV(V, ClampMin, ClampMax); |
||
6431 | |||
6432 | #elif defined(_XM_SSE_INTRINSICS_) |
||
6433 | XMVECTOR ClampMax = _mm_set_ps1(LengthMax); |
||
6434 | XMVECTOR ClampMin = _mm_set_ps1(LengthMin); |
||
6435 | return XMVector2ClampLengthV(V, ClampMin, ClampMax); |
||
6436 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
6437 | #endif // _XM_VMX128_INTRINSICS_ |
||
6438 | } |
||
6439 | |||
6440 | //------------------------------------------------------------------------------ |
||
6441 | |||
6442 | XMFINLINE XMVECTOR XMVector2ClampLengthV |
||
6443 | ( |
||
6444 | FXMVECTOR V, |
||
6445 | FXMVECTOR LengthMin, |
||
6446 | FXMVECTOR LengthMax |
||
6447 | ) |
||
6448 | { |
||
6449 | #if defined(_XM_NO_INTRINSICS_) |
||
6450 | |||
6451 | XMVECTOR ClampLength; |
||
6452 | XMVECTOR LengthSq; |
||
6453 | XMVECTOR RcpLength; |
||
6454 | XMVECTOR Length; |
||
6455 | XMVECTOR Normal; |
||
6456 | XMVECTOR Zero; |
||
6457 | XMVECTOR InfiniteLength; |
||
6458 | XMVECTOR ZeroLength; |
||
6459 | XMVECTOR Select; |
||
6460 | XMVECTOR ControlMax; |
||
6461 | XMVECTOR ControlMin; |
||
6462 | XMVECTOR Control; |
||
6463 | XMVECTOR Result; |
||
6464 | |||
6465 | XMASSERT((LengthMin.vector4_f32[1] == LengthMin.vector4_f32[0])); |
||
6466 | XMASSERT((LengthMax.vector4_f32[1] == LengthMax.vector4_f32[0])); |
||
6467 | XMASSERT(XMVector2GreaterOrEqual(LengthMin, XMVectorZero())); |
||
6468 | XMASSERT(XMVector2GreaterOrEqual(LengthMax, XMVectorZero())); |
||
6469 | XMASSERT(XMVector2GreaterOrEqual(LengthMax, LengthMin)); |
||
6470 | |||
6471 | LengthSq = XMVector2LengthSq(V); |
||
6472 | |||
6473 | Zero = XMVectorZero(); |
||
6474 | |||
6475 | RcpLength = XMVectorReciprocalSqrt(LengthSq); |
||
6476 | |||
6477 | InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity.v); |
||
6478 | ZeroLength = XMVectorEqual(LengthSq, Zero); |
||
6479 | |||
6480 | Length = XMVectorMultiply(LengthSq, RcpLength); |
||
6481 | |||
6482 | Normal = XMVectorMultiply(V, RcpLength); |
||
6483 | |||
6484 | Select = XMVectorEqualInt(InfiniteLength, ZeroLength); |
||
6485 | Length = XMVectorSelect(LengthSq, Length, Select); |
||
6486 | Normal = XMVectorSelect(LengthSq, Normal, Select); |
||
6487 | |||
6488 | ControlMax = XMVectorGreater(Length, LengthMax); |
||
6489 | ControlMin = XMVectorLess(Length, LengthMin); |
||
6490 | |||
6491 | ClampLength = XMVectorSelect(Length, LengthMax, ControlMax); |
||
6492 | ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin); |
||
6493 | |||
6494 | Result = XMVectorMultiply(Normal, ClampLength); |
||
6495 | |||
6496 | // Preserve the original vector (with no precision loss) if the length falls within the given range |
||
6497 | Control = XMVectorEqualInt(ControlMax, ControlMin); |
||
6498 | Result = XMVectorSelect(Result, V, Control); |
||
6499 | |||
6500 | return Result; |
||
6501 | |||
6502 | #elif defined(_XM_SSE_INTRINSICS_) |
||
6503 | XMVECTOR ClampLength; |
||
6504 | XMVECTOR LengthSq; |
||
6505 | XMVECTOR RcpLength; |
||
6506 | XMVECTOR Length; |
||
6507 | XMVECTOR Normal; |
||
6508 | XMVECTOR InfiniteLength; |
||
6509 | XMVECTOR ZeroLength; |
||
6510 | XMVECTOR Select; |
||
6511 | XMVECTOR ControlMax; |
||
6512 | XMVECTOR ControlMin; |
||
6513 | XMVECTOR Control; |
||
6514 | XMVECTOR Result; |
||
6515 | |||
6516 | XMASSERT((XMVectorGetY(LengthMin) == XMVectorGetX(LengthMin))); |
||
6517 | XMASSERT((XMVectorGetY(LengthMax) == XMVectorGetX(LengthMax))); |
||
6518 | XMASSERT(XMVector2GreaterOrEqual(LengthMin, g_XMZero)); |
||
6519 | XMASSERT(XMVector2GreaterOrEqual(LengthMax, g_XMZero)); |
||
6520 | XMASSERT(XMVector2GreaterOrEqual(LengthMax, LengthMin)); |
||
6521 | LengthSq = XMVector2LengthSq(V); |
||
6522 | RcpLength = XMVectorReciprocalSqrt(LengthSq); |
||
6523 | InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity); |
||
6524 | ZeroLength = XMVectorEqual(LengthSq, g_XMZero); |
||
6525 | Length = _mm_mul_ps(LengthSq, RcpLength); |
||
6526 | Normal = _mm_mul_ps(V, RcpLength); |
||
6527 | Select = XMVectorEqualInt(InfiniteLength, ZeroLength); |
||
6528 | Length = XMVectorSelect(LengthSq, Length, Select); |
||
6529 | Normal = XMVectorSelect(LengthSq, Normal, Select); |
||
6530 | ControlMax = XMVectorGreater(Length, LengthMax); |
||
6531 | ControlMin = XMVectorLess(Length, LengthMin); |
||
6532 | ClampLength = XMVectorSelect(Length, LengthMax, ControlMax); |
||
6533 | ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin); |
||
6534 | Result = _mm_mul_ps(Normal, ClampLength); |
||
6535 | // Preserve the original vector (with no precision loss) if the length falls within the given range |
||
6536 | Control = XMVectorEqualInt(ControlMax, ControlMin); |
||
6537 | Result = XMVectorSelect(Result, V, Control); |
||
6538 | return Result; |
||
6539 | #else // _XM_VMX128_INTRINSICS_ |
||
6540 | #endif // _XM_VMX128_INTRINSICS_ |
||
6541 | } |
||
6542 | |||
6543 | //------------------------------------------------------------------------------ |
||
6544 | |||
6545 | XMFINLINE XMVECTOR XMVector2Reflect |
||
6546 | ( |
||
6547 | FXMVECTOR Incident, |
||
6548 | FXMVECTOR Normal |
||
6549 | ) |
||
6550 | { |
||
6551 | #if defined(_XM_NO_INTRINSICS_) |
||
6552 | |||
6553 | XMVECTOR Result; |
||
6554 | |||
6555 | // Result = Incident - (2 * dot(Incident, Normal)) * Normal |
||
6556 | Result = XMVector2Dot(Incident, Normal); |
||
6557 | Result = XMVectorAdd(Result, Result); |
||
6558 | Result = XMVectorNegativeMultiplySubtract(Result, Normal, Incident); |
||
6559 | |||
6560 | return Result; |
||
6561 | |||
6562 | #elif defined(_XM_SSE_INTRINSICS_) |
||
6563 | // Result = Incident - (2 * dot(Incident, Normal)) * Normal |
||
6564 | XMVECTOR Result = XMVector2Dot(Incident,Normal); |
||
6565 | Result = _mm_add_ps(Result, Result); |
||
6566 | Result = _mm_mul_ps(Result, Normal); |
||
6567 | Result = _mm_sub_ps(Incident,Result); |
||
6568 | return Result; |
||
6569 | #else // _XM_VMX128_INTRINSICS_ |
||
6570 | #endif // _XM_VMX128_INTRINSICS_ |
||
6571 | } |
||
6572 | |||
6573 | //------------------------------------------------------------------------------ |
||
6574 | |||
6575 | XMFINLINE XMVECTOR XMVector2Refract |
||
6576 | ( |
||
6577 | FXMVECTOR Incident, |
||
6578 | FXMVECTOR Normal, |
||
6579 | FLOAT RefractionIndex |
||
6580 | ) |
||
6581 | { |
||
6582 | #if defined(_XM_NO_INTRINSICS_) |
||
6583 | XMVECTOR Index; |
||
6584 | Index = XMVectorReplicate(RefractionIndex); |
||
6585 | return XMVector2RefractV(Incident, Normal, Index); |
||
6586 | |||
6587 | #elif defined(_XM_SSE_INTRINSICS_) |
||
6588 | XMVECTOR Index = _mm_set_ps1(RefractionIndex); |
||
6589 | return XMVector2RefractV(Incident,Normal,Index); |
||
6590 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
6591 | #endif // _XM_VMX128_INTRINSICS_ |
||
6592 | } |
||
6593 | |||
6594 | //------------------------------------------------------------------------------ |
||
6595 | |||
6596 | // Return the refraction of a 2D vector |
||
6597 | XMFINLINE XMVECTOR XMVector2RefractV |
||
6598 | ( |
||
6599 | FXMVECTOR Incident, |
||
6600 | FXMVECTOR Normal, |
||
6601 | FXMVECTOR RefractionIndex |
||
6602 | ) |
||
6603 | { |
||
6604 | #if defined(_XM_NO_INTRINSICS_) |
||
6605 | float IDotN; |
||
6606 | float RX,RY; |
||
6607 | XMVECTOR vResult; |
||
6608 | // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) + |
||
6609 | // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal)))) |
||
6610 | IDotN = (Incident.vector4_f32[0]*Normal.vector4_f32[0])+(Incident.vector4_f32[1]*Normal.vector4_f32[1]); |
||
6611 | // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN) |
||
6612 | RY = 1.0f-(IDotN*IDotN); |
||
6613 | RX = 1.0f-(RY*RefractionIndex.vector4_f32[0]*RefractionIndex.vector4_f32[0]); |
||
6614 | RY = 1.0f-(RY*RefractionIndex.vector4_f32[1]*RefractionIndex.vector4_f32[1]); |
||
6615 | if (RX>=0.0f) { |
||
6616 | RX = (RefractionIndex.vector4_f32[0]*Incident.vector4_f32[0])-(Normal.vector4_f32[0]*((RefractionIndex.vector4_f32[0]*IDotN)+sqrtf(RX))); |
||
6617 | } else { |
||
6618 | RX = 0.0f; |
||
6619 | } |
||
6620 | if (RY>=0.0f) { |
||
6621 | RY = (RefractionIndex.vector4_f32[1]*Incident.vector4_f32[1])-(Normal.vector4_f32[1]*((RefractionIndex.vector4_f32[1]*IDotN)+sqrtf(RY))); |
||
6622 | } else { |
||
6623 | RY = 0.0f; |
||
6624 | } |
||
6625 | vResult.vector4_f32[0] = RX; |
||
6626 | vResult.vector4_f32[1] = RY; |
||
6627 | vResult.vector4_f32[2] = 0.0f; |
||
6628 | vResult.vector4_f32[3] = 0.0f; |
||
6629 | return vResult; |
||
6630 | #elif defined(_XM_SSE_INTRINSICS_) |
||
6631 | // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) + |
||
6632 | // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal)))) |
||
6633 | // Get the 2D Dot product of Incident-Normal |
||
6634 | XMVECTOR IDotN = _mm_mul_ps(Incident,Normal); |
||
6635 | XMVECTOR vTemp = _mm_shuffle_ps(IDotN,IDotN,_MM_SHUFFLE(1,1,1,1)); |
||
6636 | IDotN = _mm_add_ss(IDotN,vTemp); |
||
6637 | IDotN = _mm_shuffle_ps(IDotN,IDotN,_MM_SHUFFLE(0,0,0,0)); |
||
6638 | // vTemp = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN) |
||
6639 | vTemp = _mm_mul_ps(IDotN,IDotN); |
||
6640 | vTemp = _mm_sub_ps(g_XMOne,vTemp); |
||
6641 | vTemp = _mm_mul_ps(vTemp,RefractionIndex); |
||
6642 | vTemp = _mm_mul_ps(vTemp,RefractionIndex); |
||
6643 | vTemp = _mm_sub_ps(g_XMOne,vTemp); |
||
6644 | // If any terms are <=0, sqrt() will fail, punt to zero |
||
6645 | XMVECTOR vMask = _mm_cmpgt_ps(vTemp,g_XMZero); |
||
6646 | // R = RefractionIndex * IDotN + sqrt(R) |
||
6647 | vTemp = _mm_sqrt_ps(vTemp); |
||
6648 | XMVECTOR vResult = _mm_mul_ps(RefractionIndex,IDotN); |
||
6649 | vTemp = _mm_add_ps(vTemp,vResult); |
||
6650 | // Result = RefractionIndex * Incident - Normal * R |
||
6651 | vResult = _mm_mul_ps(RefractionIndex,Incident); |
||
6652 | vTemp = _mm_mul_ps(vTemp,Normal); |
||
6653 | vResult = _mm_sub_ps(vResult,vTemp); |
||
6654 | vResult = _mm_and_ps(vResult,vMask); |
||
6655 | return vResult; |
||
6656 | #else // _XM_VMX128_INTRINSICS_ |
||
6657 | #endif // _XM_VMX128_INTRINSICS_ |
||
6658 | } |
||
6659 | |||
6660 | //------------------------------------------------------------------------------ |
||
6661 | |||
6662 | XMFINLINE XMVECTOR XMVector2Orthogonal |
||
6663 | ( |
||
6664 | FXMVECTOR V |
||
6665 | ) |
||
6666 | { |
||
6667 | #if defined(_XM_NO_INTRINSICS_) |
||
6668 | |||
6669 | XMVECTOR Result; |
||
6670 | |||
6671 | Result.vector4_f32[0] = -V.vector4_f32[1]; |
||
6672 | Result.vector4_f32[1] = V.vector4_f32[0]; |
||
6673 | |||
6674 | return Result; |
||
6675 | |||
6676 | #elif defined(_XM_SSE_INTRINSICS_) |
||
6677 | XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,2,0,1)); |
||
6678 | vResult = _mm_mul_ps(vResult,g_XMNegateX); |
||
6679 | return vResult; |
||
6680 | #else // _XM_VMX128_INTRINSICS_ |
||
6681 | #endif // _XM_VMX128_INTRINSICS_ |
||
6682 | } |
||
6683 | |||
6684 | //------------------------------------------------------------------------------ |
||
6685 | |||
6686 | XMFINLINE XMVECTOR XMVector2AngleBetweenNormalsEst |
||
6687 | ( |
||
6688 | FXMVECTOR N1, |
||
6689 | FXMVECTOR N2 |
||
6690 | ) |
||
6691 | { |
||
6692 | #if defined(_XM_NO_INTRINSICS_) |
||
6693 | |||
6694 | XMVECTOR NegativeOne; |
||
6695 | XMVECTOR One; |
||
6696 | XMVECTOR Result; |
||
6697 | |||
6698 | Result = XMVector2Dot(N1, N2); |
||
6699 | NegativeOne = XMVectorSplatConstant(-1, 0); |
||
6700 | One = XMVectorSplatOne(); |
||
6701 | Result = XMVectorClamp(Result, NegativeOne, One); |
||
6702 | Result = XMVectorACosEst(Result); |
||
6703 | |||
6704 | return Result; |
||
6705 | |||
6706 | #elif defined(_XM_SSE_INTRINSICS_) |
||
6707 | XMVECTOR vResult = XMVector2Dot(N1,N2); |
||
6708 | // Clamp to -1.0f to 1.0f |
||
6709 | vResult = _mm_max_ps(vResult,g_XMNegativeOne); |
||
6710 | vResult = _mm_min_ps(vResult,g_XMOne);; |
||
6711 | vResult = XMVectorACosEst(vResult); |
||
6712 | return vResult; |
||
6713 | #else // _XM_VMX128_INTRINSICS_ |
||
6714 | #endif // _XM_VMX128_INTRINSICS_ |
||
6715 | } |
||
6716 | |||
6717 | //------------------------------------------------------------------------------ |
||
6718 | |||
6719 | XMFINLINE XMVECTOR XMVector2AngleBetweenNormals |
||
6720 | ( |
||
6721 | FXMVECTOR N1, |
||
6722 | FXMVECTOR N2 |
||
6723 | ) |
||
6724 | { |
||
6725 | #if defined(_XM_NO_INTRINSICS_) |
||
6726 | |||
6727 | XMVECTOR NegativeOne; |
||
6728 | XMVECTOR One; |
||
6729 | XMVECTOR Result; |
||
6730 | |||
6731 | Result = XMVector2Dot(N1, N2); |
||
6732 | NegativeOne = XMVectorSplatConstant(-1, 0); |
||
6733 | One = XMVectorSplatOne(); |
||
6734 | Result = XMVectorClamp(Result, NegativeOne, One); |
||
6735 | Result = XMVectorACos(Result); |
||
6736 | |||
6737 | return Result; |
||
6738 | |||
6739 | #elif defined(_XM_SSE_INTRINSICS_) |
||
6740 | XMVECTOR vResult = XMVector2Dot(N1,N2); |
||
6741 | // Clamp to -1.0f to 1.0f |
||
6742 | vResult = _mm_max_ps(vResult,g_XMNegativeOne); |
||
6743 | vResult = _mm_min_ps(vResult,g_XMOne);; |
||
6744 | vResult = XMVectorACos(vResult); |
||
6745 | return vResult; |
||
6746 | #else // _XM_VMX128_INTRINSICS_ |
||
6747 | #endif // _XM_VMX128_INTRINSICS_ |
||
6748 | } |
||
6749 | |||
6750 | //------------------------------------------------------------------------------ |
||
6751 | |||
6752 | XMFINLINE XMVECTOR XMVector2AngleBetweenVectors |
||
6753 | ( |
||
6754 | FXMVECTOR V1, |
||
6755 | FXMVECTOR V2 |
||
6756 | ) |
||
6757 | { |
||
6758 | #if defined(_XM_NO_INTRINSICS_) |
||
6759 | |||
6760 | XMVECTOR L1; |
||
6761 | XMVECTOR L2; |
||
6762 | XMVECTOR Dot; |
||
6763 | XMVECTOR CosAngle; |
||
6764 | XMVECTOR NegativeOne; |
||
6765 | XMVECTOR One; |
||
6766 | XMVECTOR Result; |
||
6767 | |||
6768 | L1 = XMVector2ReciprocalLength(V1); |
||
6769 | L2 = XMVector2ReciprocalLength(V2); |
||
6770 | |||
6771 | Dot = XMVector2Dot(V1, V2); |
||
6772 | |||
6773 | L1 = XMVectorMultiply(L1, L2); |
||
6774 | |||
6775 | CosAngle = XMVectorMultiply(Dot, L1); |
||
6776 | NegativeOne = XMVectorSplatConstant(-1, 0); |
||
6777 | One = XMVectorSplatOne(); |
||
6778 | CosAngle = XMVectorClamp(CosAngle, NegativeOne, One); |
||
6779 | |||
6780 | Result = XMVectorACos(CosAngle); |
||
6781 | |||
6782 | return Result; |
||
6783 | |||
6784 | #elif defined(_XM_SSE_INTRINSICS_) |
||
6785 | XMVECTOR L1; |
||
6786 | XMVECTOR L2; |
||
6787 | XMVECTOR Dot; |
||
6788 | XMVECTOR CosAngle; |
||
6789 | XMVECTOR Result; |
||
6790 | L1 = XMVector2ReciprocalLength(V1); |
||
6791 | L2 = XMVector2ReciprocalLength(V2); |
||
6792 | Dot = XMVector2Dot(V1, V2); |
||
6793 | L1 = _mm_mul_ps(L1, L2); |
||
6794 | CosAngle = _mm_mul_ps(Dot, L1); |
||
6795 | CosAngle = XMVectorClamp(CosAngle, g_XMNegativeOne,g_XMOne); |
||
6796 | Result = XMVectorACos(CosAngle); |
||
6797 | return Result; |
||
6798 | #else // _XM_VMX128_INTRINSICS_ |
||
6799 | #endif // _XM_VMX128_INTRINSICS_ |
||
6800 | } |
||
6801 | |||
6802 | //------------------------------------------------------------------------------ |
||
6803 | |||
6804 | XMFINLINE XMVECTOR XMVector2LinePointDistance |
||
6805 | ( |
||
6806 | FXMVECTOR LinePoint1, |
||
6807 | FXMVECTOR LinePoint2, |
||
6808 | FXMVECTOR Point |
||
6809 | ) |
||
6810 | { |
||
6811 | #if defined(_XM_NO_INTRINSICS_) |
||
6812 | |||
6813 | XMVECTOR PointVector; |
||
6814 | XMVECTOR LineVector; |
||
6815 | XMVECTOR ReciprocalLengthSq; |
||
6816 | XMVECTOR PointProjectionScale; |
||
6817 | XMVECTOR DistanceVector; |
||
6818 | XMVECTOR Result; |
||
6819 | |||
6820 | // Given a vector PointVector from LinePoint1 to Point and a vector |
||
6821 | // LineVector from LinePoint1 to LinePoint2, the scaled distance |
||
6822 | // PointProjectionScale from LinePoint1 to the perpendicular projection |
||
6823 | // of PointVector onto the line is defined as: |
||
6824 | // |
||
6825 | // PointProjectionScale = dot(PointVector, LineVector) / LengthSq(LineVector) |
||
6826 | |||
6827 | PointVector = XMVectorSubtract(Point, LinePoint1); |
||
6828 | LineVector = XMVectorSubtract(LinePoint2, LinePoint1); |
||
6829 | |||
6830 | ReciprocalLengthSq = XMVector2LengthSq(LineVector); |
||
6831 | ReciprocalLengthSq = XMVectorReciprocal(ReciprocalLengthSq); |
||
6832 | |||
6833 | PointProjectionScale = XMVector2Dot(PointVector, LineVector); |
||
6834 | PointProjectionScale = XMVectorMultiply(PointProjectionScale, ReciprocalLengthSq); |
||
6835 | |||
6836 | DistanceVector = XMVectorMultiply(LineVector, PointProjectionScale); |
||
6837 | DistanceVector = XMVectorSubtract(PointVector, DistanceVector); |
||
6838 | |||
6839 | Result = XMVector2Length(DistanceVector); |
||
6840 | |||
6841 | return Result; |
||
6842 | |||
6843 | #elif defined(_XM_SSE_INTRINSICS_) |
||
6844 | XMVECTOR PointVector = _mm_sub_ps(Point,LinePoint1); |
||
6845 | XMVECTOR LineVector = _mm_sub_ps(LinePoint2,LinePoint1); |
||
6846 | XMVECTOR ReciprocalLengthSq = XMVector2LengthSq(LineVector); |
||
6847 | XMVECTOR vResult = XMVector2Dot(PointVector,LineVector); |
||
6848 | vResult = _mm_div_ps(vResult,ReciprocalLengthSq); |
||
6849 | vResult = _mm_mul_ps(vResult,LineVector); |
||
6850 | vResult = _mm_sub_ps(PointVector,vResult); |
||
6851 | vResult = XMVector2Length(vResult); |
||
6852 | return vResult; |
||
6853 | #else // _XM_VMX128_INTRINSICS_ |
||
6854 | #endif // _XM_VMX128_INTRINSICS_ |
||
6855 | } |
||
6856 | |||
6857 | //------------------------------------------------------------------------------ |
||
6858 | |||
6859 | XMFINLINE XMVECTOR XMVector2IntersectLine |
||
6860 | ( |
||
6861 | FXMVECTOR Line1Point1, |
||
6862 | FXMVECTOR Line1Point2, |
||
6863 | FXMVECTOR Line2Point1, |
||
6864 | CXMVECTOR Line2Point2 |
||
6865 | ) |
||
6866 | { |
||
6867 | #if defined(_XM_NO_INTRINSICS_) |
||
6868 | |||
6869 | XMVECTOR V1; |
||
6870 | XMVECTOR V2; |
||
6871 | XMVECTOR V3; |
||
6872 | XMVECTOR C1; |
||
6873 | XMVECTOR C2; |
||
6874 | XMVECTOR Result; |
||
6875 | CONST XMVECTOR Zero = XMVectorZero(); |
||
6876 | |||
6877 | V1 = XMVectorSubtract(Line1Point2, Line1Point1); |
||
6878 | V2 = XMVectorSubtract(Line2Point2, Line2Point1); |
||
6879 | V3 = XMVectorSubtract(Line1Point1, Line2Point1); |
||
6880 | |||
6881 | C1 = XMVector2Cross(V1, V2); |
||
6882 | C2 = XMVector2Cross(V2, V3); |
||
6883 | |||
6884 | if (XMVector2NearEqual(C1, Zero, g_XMEpsilon.v)) |
||
6885 | { |
||
6886 | if (XMVector2NearEqual(C2, Zero, g_XMEpsilon.v)) |
||
6887 | { |
||
6888 | // Coincident |
||
6889 | Result = g_XMInfinity.v; |
||
6890 | } |
||
6891 | else |
||
6892 | { |
||
6893 | // Parallel |
||
6894 | Result = g_XMQNaN.v; |
||
6895 | } |
||
6896 | } |
||
6897 | else |
||
6898 | { |
||
6899 | // Intersection point = Line1Point1 + V1 * (C2 / C1) |
||
6900 | XMVECTOR Scale; |
||
6901 | Scale = XMVectorReciprocal(C1); |
||
6902 | Scale = XMVectorMultiply(C2, Scale); |
||
6903 | Result = XMVectorMultiplyAdd(V1, Scale, Line1Point1); |
||
6904 | } |
||
6905 | |||
6906 | return Result; |
||
6907 | |||
6908 | #elif defined(_XM_SSE_INTRINSICS_) |
||
6909 | XMVECTOR V1 = _mm_sub_ps(Line1Point2, Line1Point1); |
||
6910 | XMVECTOR V2 = _mm_sub_ps(Line2Point2, Line2Point1); |
||
6911 | XMVECTOR V3 = _mm_sub_ps(Line1Point1, Line2Point1); |
||
6912 | // Generate the cross products |
||
6913 | XMVECTOR C1 = XMVector2Cross(V1, V2); |
||
6914 | XMVECTOR C2 = XMVector2Cross(V2, V3); |
||
6915 | // If C1 is not close to epsilon, use the calculated value |
||
6916 | XMVECTOR vResultMask = _mm_setzero_ps(); |
||
6917 | vResultMask = _mm_sub_ps(vResultMask,C1); |
||
6918 | vResultMask = _mm_max_ps(vResultMask,C1); |
||
6919 | // 0xFFFFFFFF if the calculated value is to be used |
||
6920 | vResultMask = _mm_cmpgt_ps(vResultMask,g_XMEpsilon); |
||
6921 | // If C1 is close to epsilon, which fail type is it? INFINITY or NAN? |
||
6922 | XMVECTOR vFailMask = _mm_setzero_ps(); |
||
6923 | vFailMask = _mm_sub_ps(vFailMask,C2); |
||
6924 | vFailMask = _mm_max_ps(vFailMask,C2); |
||
6925 | vFailMask = _mm_cmple_ps(vFailMask,g_XMEpsilon); |
||
6926 | XMVECTOR vFail = _mm_and_ps(vFailMask,g_XMInfinity); |
||
6927 | vFailMask = _mm_andnot_ps(vFailMask,g_XMQNaN); |
||
6928 | // vFail is NAN or INF |
||
6929 | vFail = _mm_or_ps(vFail,vFailMask); |
||
6930 | // Intersection point = Line1Point1 + V1 * (C2 / C1) |
||
6931 | XMVECTOR vResult = _mm_div_ps(C2,C1); |
||
6932 | vResult = _mm_mul_ps(vResult,V1); |
||
6933 | vResult = _mm_add_ps(vResult,Line1Point1); |
||
6934 | // Use result, or failure value |
||
6935 | vResult = _mm_and_ps(vResult,vResultMask); |
||
6936 | vResultMask = _mm_andnot_ps(vResultMask,vFail); |
||
6937 | vResult = _mm_or_ps(vResult,vResultMask); |
||
6938 | return vResult; |
||
6939 | #else // _XM_VMX128_INTRINSICS_ |
||
6940 | #endif // _XM_VMX128_INTRINSICS_ |
||
6941 | } |
||
6942 | |||
6943 | //------------------------------------------------------------------------------ |
||
6944 | |||
6945 | XMFINLINE XMVECTOR XMVector2Transform |
||
6946 | ( |
||
6947 | FXMVECTOR V, |
||
6948 | CXMMATRIX M |
||
6949 | ) |
||
6950 | { |
||
6951 | #if defined(_XM_NO_INTRINSICS_) |
||
6952 | |||
6953 | XMVECTOR X; |
||
6954 | XMVECTOR Y; |
||
6955 | XMVECTOR Result; |
||
6956 | |||
6957 | Y = XMVectorSplatY(V); |
||
6958 | X = XMVectorSplatX(V); |
||
6959 | |||
6960 | Result = XMVectorMultiplyAdd(Y, M.r[1], M.r[3]); |
||
6961 | Result = XMVectorMultiplyAdd(X, M.r[0], Result); |
||
6962 | |||
6963 | return Result; |
||
6964 | |||
6965 | #elif defined(_XM_SSE_INTRINSICS_) |
||
6966 | XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0)); |
||
6967 | vResult = _mm_mul_ps(vResult,M.r[0]); |
||
6968 | XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1)); |
||
6969 | vTemp = _mm_mul_ps(vTemp,M.r[1]); |
||
6970 | vResult = _mm_add_ps(vResult,vTemp); |
||
6971 | vResult = _mm_add_ps(vResult,M.r[3]); |
||
6972 | return vResult; |
||
6973 | #else // _XM_VMX128_INTRINSICS_ |
||
6974 | #endif // _XM_VMX128_INTRINSICS_ |
||
6975 | } |
||
6976 | |||
6977 | //------------------------------------------------------------------------------ |
||
6978 | |||
6979 | XMINLINE XMFLOAT4* XMVector2TransformStream |
||
6980 | ( |
||
6981 | XMFLOAT4* pOutputStream, |
||
6982 | UINT OutputStride, |
||
6983 | CONST XMFLOAT2* pInputStream, |
||
6984 | UINT InputStride, |
||
6985 | UINT VectorCount, |
||
6986 | CXMMATRIX M |
||
6987 | ) |
||
6988 | { |
||
6989 | #if defined(_XM_NO_INTRINSICS_) |
||
6990 | |||
6991 | XMVECTOR V; |
||
6992 | XMVECTOR X; |
||
6993 | XMVECTOR Y; |
||
6994 | XMVECTOR Result; |
||
6995 | UINT i; |
||
6996 | BYTE* pInputVector = (BYTE*)pInputStream; |
||
6997 | BYTE* pOutputVector = (BYTE*)pOutputStream; |
||
6998 | |||
6999 | XMASSERT(pOutputStream); |
||
7000 | XMASSERT(pInputStream); |
||
7001 | |||
7002 | for (i = 0; i < VectorCount; i++) |
||
7003 | { |
||
7004 | V = XMLoadFloat2((XMFLOAT2*)pInputVector); |
||
7005 | Y = XMVectorSplatY(V); |
||
7006 | X = XMVectorSplatX(V); |
||
7007 | // Y = XMVectorReplicate(((XMFLOAT2*)pInputVector)->y); |
||
7008 | // X = XMVectorReplicate(((XMFLOAT2*)pInputVector)->x); |
||
7009 | |||
7010 | Result = XMVectorMultiplyAdd(Y, M.r[1], M.r[3]); |
||
7011 | Result = XMVectorMultiplyAdd(X, M.r[0], Result); |
||
7012 | |||
7013 | XMStoreFloat4((XMFLOAT4*)pOutputVector, Result); |
||
7014 | |||
7015 | pInputVector += InputStride; |
||
7016 | pOutputVector += OutputStride; |
||
7017 | } |
||
7018 | |||
7019 | return pOutputStream; |
||
7020 | |||
7021 | #elif defined(_XM_SSE_INTRINSICS_) |
||
7022 | XMASSERT(pOutputStream); |
||
7023 | XMASSERT(pInputStream); |
||
7024 | UINT i; |
||
7025 | const BYTE* pInputVector = (const BYTE*)pInputStream; |
||
7026 | BYTE* pOutputVector = (BYTE*)pOutputStream; |
||
7027 | |||
7028 | for (i = 0; i < VectorCount; i++) |
||
7029 | { |
||
7030 | XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2*>(pInputVector)->x); |
||
7031 | XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2*>(pInputVector)->y); |
||
7032 | vResult = _mm_mul_ps(vResult,M.r[1]); |
||
7033 | vResult = _mm_add_ps(vResult,M.r[3]); |
||
7034 | X = _mm_mul_ps(X,M.r[0]); |
||
7035 | vResult = _mm_add_ps(vResult,X); |
||
7036 | _mm_storeu_ps(reinterpret_cast<float*>(pOutputVector),vResult); |
||
7037 | pInputVector += InputStride; |
||
7038 | pOutputVector += OutputStride; |
||
7039 | } |
||
7040 | return pOutputStream; |
||
7041 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
7042 | #endif // _XM_VMX128_INTRINSICS_ |
||
7043 | } |
||
7044 | |||
7045 | //------------------------------------------------------------------------------ |
||
7046 | |||
7047 | XMINLINE XMFLOAT4* XMVector2TransformStreamNC |
||
7048 | ( |
||
7049 | XMFLOAT4* pOutputStream, |
||
7050 | UINT OutputStride, |
||
7051 | CONST XMFLOAT2* pInputStream, |
||
7052 | UINT InputStride, |
||
7053 | UINT VectorCount, |
||
7054 | CXMMATRIX M |
||
7055 | ) |
||
7056 | { |
||
7057 | #if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS) || defined(_XM_SSE_INTRINSICS_) |
||
7058 | return XMVector2TransformStream( pOutputStream, OutputStride, pInputStream, InputStride, VectorCount, M ); |
||
7059 | #else // _XM_VMX128_INTRINSICS_ |
||
7060 | #endif // _XM_VMX128_INTRINSICS_ |
||
7061 | } |
||
7062 | |||
7063 | //------------------------------------------------------------------------------ |
||
7064 | |||
7065 | XMFINLINE XMVECTOR XMVector2TransformCoord |
||
7066 | ( |
||
7067 | FXMVECTOR V, |
||
7068 | CXMMATRIX M |
||
7069 | ) |
||
7070 | { |
||
7071 | #if defined(_XM_NO_INTRINSICS_) |
||
7072 | |||
7073 | XMVECTOR X; |
||
7074 | XMVECTOR Y; |
||
7075 | XMVECTOR InverseW; |
||
7076 | XMVECTOR Result; |
||
7077 | |||
7078 | Y = XMVectorSplatY(V); |
||
7079 | X = XMVectorSplatX(V); |
||
7080 | |||
7081 | Result = XMVectorMultiplyAdd(Y, M.r[1], M.r[3]); |
||
7082 | Result = XMVectorMultiplyAdd(X, M.r[0], Result); |
||
7083 | |||
7084 | InverseW = XMVectorSplatW(Result); |
||
7085 | InverseW = XMVectorReciprocal(InverseW); |
||
7086 | |||
7087 | Result = XMVectorMultiply(Result, InverseW); |
||
7088 | |||
7089 | return Result; |
||
7090 | |||
7091 | #elif defined(_XM_SSE_INTRINSICS_) |
||
7092 | XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0)); |
||
7093 | vResult = _mm_mul_ps(vResult,M.r[0]); |
||
7094 | XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1)); |
||
7095 | vTemp = _mm_mul_ps(vTemp,M.r[1]); |
||
7096 | vResult = _mm_add_ps(vResult,vTemp); |
||
7097 | vResult = _mm_add_ps(vResult,M.r[3]); |
||
7098 | vTemp = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,3,3,3)); |
||
7099 | vResult = _mm_div_ps(vResult,vTemp); |
||
7100 | return vResult; |
||
7101 | #else // _XM_VMX128_INTRINSICS_ |
||
7102 | #endif // _XM_VMX128_INTRINSICS_ |
||
7103 | } |
||
7104 | |||
7105 | //------------------------------------------------------------------------------ |
||
7106 | |||
7107 | XMINLINE XMFLOAT2* XMVector2TransformCoordStream |
||
7108 | ( |
||
7109 | XMFLOAT2* pOutputStream, |
||
7110 | UINT OutputStride, |
||
7111 | CONST XMFLOAT2* pInputStream, |
||
7112 | UINT InputStride, |
||
7113 | UINT VectorCount, |
||
7114 | CXMMATRIX M |
||
7115 | ) |
||
7116 | { |
||
7117 | #if defined(_XM_NO_INTRINSICS_) |
||
7118 | |||
7119 | XMVECTOR V; |
||
7120 | XMVECTOR X; |
||
7121 | XMVECTOR Y; |
||
7122 | XMVECTOR InverseW; |
||
7123 | XMVECTOR Result; |
||
7124 | UINT i; |
||
7125 | BYTE* pInputVector = (BYTE*)pInputStream; |
||
7126 | BYTE* pOutputVector = (BYTE*)pOutputStream; |
||
7127 | |||
7128 | XMASSERT(pOutputStream); |
||
7129 | XMASSERT(pInputStream); |
||
7130 | |||
7131 | for (i = 0; i < VectorCount; i++) |
||
7132 | { |
||
7133 | V = XMLoadFloat2((XMFLOAT2*)pInputVector); |
||
7134 | Y = XMVectorSplatY(V); |
||
7135 | X = XMVectorSplatX(V); |
||
7136 | // Y = XMVectorReplicate(((XMFLOAT2*)pInputVector)->y); |
||
7137 | // X = XMVectorReplicate(((XMFLOAT2*)pInputVector)->x); |
||
7138 | |||
7139 | Result = XMVectorMultiplyAdd(Y, M.r[1], M.r[3]); |
||
7140 | Result = XMVectorMultiplyAdd(X, M.r[0], Result); |
||
7141 | |||
7142 | InverseW = XMVectorSplatW(Result); |
||
7143 | InverseW = XMVectorReciprocal(InverseW); |
||
7144 | |||
7145 | Result = XMVectorMultiply(Result, InverseW); |
||
7146 | |||
7147 | XMStoreFloat2((XMFLOAT2*)pOutputVector, Result); |
||
7148 | |||
7149 | pInputVector += InputStride; |
||
7150 | pOutputVector += OutputStride; |
||
7151 | } |
||
7152 | |||
7153 | return pOutputStream; |
||
7154 | |||
7155 | #elif defined(_XM_SSE_INTRINSICS_) |
||
7156 | XMASSERT(pOutputStream); |
||
7157 | XMASSERT(pInputStream); |
||
7158 | UINT i; |
||
7159 | const BYTE *pInputVector = (BYTE*)pInputStream; |
||
7160 | BYTE *pOutputVector = (BYTE*)pOutputStream; |
||
7161 | |||
7162 | for (i = 0; i < VectorCount; i++) |
||
7163 | { |
||
7164 | XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2*>(pInputVector)->x); |
||
7165 | XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2*>(pInputVector)->y); |
||
7166 | vResult = _mm_mul_ps(vResult,M.r[1]); |
||
7167 | vResult = _mm_add_ps(vResult,M.r[3]); |
||
7168 | X = _mm_mul_ps(X,M.r[0]); |
||
7169 | vResult = _mm_add_ps(vResult,X); |
||
7170 | X = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,3,3,3)); |
||
7171 | vResult = _mm_div_ps(vResult,X); |
||
7172 | _mm_store_sd(reinterpret_cast<double *>(pOutputVector),reinterpret_cast<__m128d *>(&vResult)[0]); |
||
7173 | pInputVector += InputStride; |
||
7174 | pOutputVector += OutputStride; |
||
7175 | } |
||
7176 | return pOutputStream; |
||
7177 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
7178 | #endif // _XM_VMX128_INTRINSICS_ |
||
7179 | } |
||
7180 | |||
7181 | //------------------------------------------------------------------------------ |
||
7182 | |||
7183 | XMFINLINE XMVECTOR XMVector2TransformNormal |
||
7184 | ( |
||
7185 | FXMVECTOR V, |
||
7186 | CXMMATRIX M |
||
7187 | ) |
||
7188 | { |
||
7189 | #if defined(_XM_NO_INTRINSICS_) |
||
7190 | |||
7191 | XMVECTOR X; |
||
7192 | XMVECTOR Y; |
||
7193 | XMVECTOR Result; |
||
7194 | |||
7195 | Y = XMVectorSplatY(V); |
||
7196 | X = XMVectorSplatX(V); |
||
7197 | |||
7198 | Result = XMVectorMultiply(Y, M.r[1]); |
||
7199 | Result = XMVectorMultiplyAdd(X, M.r[0], Result); |
||
7200 | |||
7201 | return Result; |
||
7202 | |||
7203 | #elif defined(_XM_SSE_INTRINSICS_) |
||
7204 | XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0)); |
||
7205 | vResult = _mm_mul_ps(vResult,M.r[0]); |
||
7206 | XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1)); |
||
7207 | vTemp = _mm_mul_ps(vTemp,M.r[1]); |
||
7208 | vResult = _mm_add_ps(vResult,vTemp); |
||
7209 | return vResult; |
||
7210 | #else // _XM_VMX128_INTRINSICS_ |
||
7211 | #endif // _XM_VMX128_INTRINSICS_ |
||
7212 | } |
||
7213 | |||
7214 | //------------------------------------------------------------------------------ |
||
7215 | |||
7216 | XMINLINE XMFLOAT2* XMVector2TransformNormalStream |
||
7217 | ( |
||
7218 | XMFLOAT2* pOutputStream, |
||
7219 | UINT OutputStride, |
||
7220 | CONST XMFLOAT2* pInputStream, |
||
7221 | UINT InputStride, |
||
7222 | UINT VectorCount, |
||
7223 | CXMMATRIX M |
||
7224 | ) |
||
7225 | { |
||
7226 | #if defined(_XM_NO_INTRINSICS_) |
||
7227 | |||
7228 | XMVECTOR V; |
||
7229 | XMVECTOR X; |
||
7230 | XMVECTOR Y; |
||
7231 | XMVECTOR Result; |
||
7232 | UINT i; |
||
7233 | BYTE* pInputVector = (BYTE*)pInputStream; |
||
7234 | BYTE* pOutputVector = (BYTE*)pOutputStream; |
||
7235 | |||
7236 | XMASSERT(pOutputStream); |
||
7237 | XMASSERT(pInputStream); |
||
7238 | |||
7239 | for (i = 0; i < VectorCount; i++) |
||
7240 | { |
||
7241 | V = XMLoadFloat2((XMFLOAT2*)pInputVector); |
||
7242 | Y = XMVectorSplatY(V); |
||
7243 | X = XMVectorSplatX(V); |
||
7244 | // Y = XMVectorReplicate(((XMFLOAT2*)pInputVector)->y); |
||
7245 | // X = XMVectorReplicate(((XMFLOAT2*)pInputVector)->x); |
||
7246 | |||
7247 | Result = XMVectorMultiply(Y, M.r[1]); |
||
7248 | Result = XMVectorMultiplyAdd(X, M.r[0], Result); |
||
7249 | |||
7250 | XMStoreFloat2((XMFLOAT2*)pOutputVector, Result); |
||
7251 | |||
7252 | pInputVector += InputStride; |
||
7253 | pOutputVector += OutputStride; |
||
7254 | } |
||
7255 | |||
7256 | return pOutputStream; |
||
7257 | |||
7258 | #elif defined(_XM_SSE_INTRINSICS_) |
||
7259 | XMASSERT(pOutputStream); |
||
7260 | XMASSERT(pInputStream); |
||
7261 | UINT i; |
||
7262 | const BYTE*pInputVector = (const BYTE*)pInputStream; |
||
7263 | BYTE *pOutputVector = (BYTE*)pOutputStream; |
||
7264 | for (i = 0; i < VectorCount; i++) |
||
7265 | { |
||
7266 | XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2 *>(pInputVector)->x); |
||
7267 | XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2 *>(pInputVector)->y); |
||
7268 | vResult = _mm_mul_ps(vResult,M.r[1]); |
||
7269 | X = _mm_mul_ps(X,M.r[0]); |
||
7270 | vResult = _mm_add_ps(vResult,X); |
||
7271 | _mm_store_sd(reinterpret_cast<double*>(pOutputVector),reinterpret_cast<const __m128d *>(&vResult)[0]); |
||
7272 | |||
7273 | pInputVector += InputStride; |
||
7274 | pOutputVector += OutputStride; |
||
7275 | } |
||
7276 | |||
7277 | return pOutputStream; |
||
7278 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
7279 | #endif // _XM_VMX128_INTRINSICS_ |
||
7280 | } |
||
7281 | |||
7282 | /**************************************************************************** |
||
7283 | * |
||
7284 | * 3D Vector |
||
7285 | * |
||
7286 | ****************************************************************************/ |
||
7287 | |||
7288 | //------------------------------------------------------------------------------ |
||
7289 | // Comparison operations |
||
7290 | //------------------------------------------------------------------------------ |
||
7291 | |||
7292 | //------------------------------------------------------------------------------ |
||
7293 | |||
7294 | XMFINLINE BOOL XMVector3Equal |
||
7295 | ( |
||
7296 | FXMVECTOR V1, |
||
7297 | FXMVECTOR V2 |
||
7298 | ) |
||
7299 | { |
||
7300 | #if defined(_XM_NO_INTRINSICS_) |
||
7301 | return (((V1.vector4_f32[0] == V2.vector4_f32[0]) && (V1.vector4_f32[1] == V2.vector4_f32[1]) && (V1.vector4_f32[2] == V2.vector4_f32[2])) != 0); |
||
7302 | #elif defined(_XM_SSE_INTRINSICS_) |
||
7303 | XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2); |
||
7304 | return (((_mm_movemask_ps(vTemp)&7)==7) != 0); |
||
7305 | #else // _XM_VMX128_INTRINSICS_ |
||
7306 | return XMComparisonAllTrue(XMVector3EqualR(V1, V2)); |
||
7307 | #endif |
||
7308 | } |
||
7309 | |||
7310 | //------------------------------------------------------------------------------ |
||
7311 | |||
7312 | XMFINLINE UINT XMVector3EqualR |
||
7313 | ( |
||
7314 | FXMVECTOR V1, |
||
7315 | FXMVECTOR V2 |
||
7316 | ) |
||
7317 | { |
||
7318 | #if defined(_XM_NO_INTRINSICS_) |
||
7319 | UINT CR = 0; |
||
7320 | if ((V1.vector4_f32[0] == V2.vector4_f32[0]) && |
||
7321 | (V1.vector4_f32[1] == V2.vector4_f32[1]) && |
||
7322 | (V1.vector4_f32[2] == V2.vector4_f32[2])) |
||
7323 | { |
||
7324 | CR = XM_CRMASK_CR6TRUE; |
||
7325 | } |
||
7326 | else if ((V1.vector4_f32[0] != V2.vector4_f32[0]) && |
||
7327 | (V1.vector4_f32[1] != V2.vector4_f32[1]) && |
||
7328 | (V1.vector4_f32[2] != V2.vector4_f32[2])) |
||
7329 | { |
||
7330 | CR = XM_CRMASK_CR6FALSE; |
||
7331 | } |
||
7332 | return CR; |
||
7333 | #elif defined(_XM_SSE_INTRINSICS_) |
||
7334 | XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2); |
||
7335 | int iTest = _mm_movemask_ps(vTemp)&7; |
||
7336 | UINT CR = 0; |
||
7337 | if (iTest==7) |
||
7338 | { |
||
7339 | CR = XM_CRMASK_CR6TRUE; |
||
7340 | } |
||
7341 | else if (!iTest) |
||
7342 | { |
||
7343 | CR = XM_CRMASK_CR6FALSE; |
||
7344 | } |
||
7345 | return CR; |
||
7346 | #else // _XM_VMX128_INTRINSICS_ |
||
7347 | #endif // _XM_VMX128_INTRINSICS_ |
||
7348 | } |
||
7349 | |||
7350 | //------------------------------------------------------------------------------ |
||
7351 | |||
7352 | XMFINLINE BOOL XMVector3EqualInt |
||
7353 | ( |
||
7354 | FXMVECTOR V1, |
||
7355 | FXMVECTOR V2 |
||
7356 | ) |
||
7357 | { |
||
7358 | #if defined(_XM_NO_INTRINSICS_) |
||
7359 | return (((V1.vector4_u32[0] == V2.vector4_u32[0]) && (V1.vector4_u32[1] == V2.vector4_u32[1]) && (V1.vector4_u32[2] == V2.vector4_u32[2])) != 0); |
||
7360 | #elif defined(_XM_SSE_INTRINSICS_) |
||
7361 | __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]); |
||
7362 | return (((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&7)==7) != 0); |
||
7363 | #else // _XM_VMX128_INTRINSICS_ |
||
7364 | return XMComparisonAllTrue(XMVector3EqualIntR(V1, V2)); |
||
7365 | #endif |
||
7366 | } |
||
7367 | |||
7368 | //------------------------------------------------------------------------------ |
||
7369 | |||
7370 | XMFINLINE UINT XMVector3EqualIntR |
||
7371 | ( |
||
7372 | FXMVECTOR V1, |
||
7373 | FXMVECTOR V2 |
||
7374 | ) |
||
7375 | { |
||
7376 | #if defined(_XM_NO_INTRINSICS_) |
||
7377 | UINT CR = 0; |
||
7378 | if ((V1.vector4_u32[0] == V2.vector4_u32[0]) && |
||
7379 | (V1.vector4_u32[1] == V2.vector4_u32[1]) && |
||
7380 | (V1.vector4_u32[2] == V2.vector4_u32[2])) |
||
7381 | { |
||
7382 | CR = XM_CRMASK_CR6TRUE; |
||
7383 | } |
||
7384 | else if ((V1.vector4_u32[0] != V2.vector4_u32[0]) && |
||
7385 | (V1.vector4_u32[1] != V2.vector4_u32[1]) && |
||
7386 | (V1.vector4_u32[2] != V2.vector4_u32[2])) |
||
7387 | { |
||
7388 | CR = XM_CRMASK_CR6FALSE; |
||
7389 | } |
||
7390 | return CR; |
||
7391 | #elif defined(_XM_SSE_INTRINSICS_) |
||
7392 | __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]); |
||
7393 | int iTemp = _mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&7; |
||
7394 | UINT CR = 0; |
||
7395 | if (iTemp==7) |
||
7396 | { |
||
7397 | CR = XM_CRMASK_CR6TRUE; |
||
7398 | } |
||
7399 | else if (!iTemp) |
||
7400 | { |
||
7401 | CR = XM_CRMASK_CR6FALSE; |
||
7402 | } |
||
7403 | return CR; |
||
7404 | #else // _XM_VMX128_INTRINSICS_ |
||
7405 | #endif // _XM_VMX128_INTRINSICS_ |
||
7406 | } |
||
7407 | |||
7408 | //------------------------------------------------------------------------------ |
||
7409 | |||
7410 | XMFINLINE BOOL XMVector3NearEqual |
||
7411 | ( |
||
7412 | FXMVECTOR V1, |
||
7413 | FXMVECTOR V2, |
||
7414 | FXMVECTOR Epsilon |
||
7415 | ) |
||
7416 | { |
||
7417 | #if defined(_XM_NO_INTRINSICS_) |
||
7418 | FLOAT dx, dy, dz; |
||
7419 | |||
7420 | dx = fabsf(V1.vector4_f32[0]-V2.vector4_f32[0]); |
||
7421 | dy = fabsf(V1.vector4_f32[1]-V2.vector4_f32[1]); |
||
7422 | dz = fabsf(V1.vector4_f32[2]-V2.vector4_f32[2]); |
||
7423 | return (((dx <= Epsilon.vector4_f32[0]) && |
||
7424 | (dy <= Epsilon.vector4_f32[1]) && |
||
7425 | (dz <= Epsilon.vector4_f32[2])) != 0); |
||
7426 | #elif defined(_XM_SSE_INTRINSICS_) |
||
7427 | // Get the difference |
||
7428 | XMVECTOR vDelta = _mm_sub_ps(V1,V2); |
||
7429 | // Get the absolute value of the difference |
||
7430 | XMVECTOR vTemp = _mm_setzero_ps(); |
||
7431 | vTemp = _mm_sub_ps(vTemp,vDelta); |
||
7432 | vTemp = _mm_max_ps(vTemp,vDelta); |
||
7433 | vTemp = _mm_cmple_ps(vTemp,Epsilon); |
||
7434 | // w is don't care |
||
7435 | return (((_mm_movemask_ps(vTemp)&7)==0x7) != 0); |
||
7436 | #else // _XM_VMX128_INTRINSICS_ |
||
7437 | #endif // _XM_VMX128_INTRINSICS_ |
||
7438 | } |
||
7439 | |||
7440 | //------------------------------------------------------------------------------ |
||
7441 | |||
7442 | XMFINLINE BOOL XMVector3NotEqual |
||
7443 | ( |
||
7444 | FXMVECTOR V1, |
||
7445 | FXMVECTOR V2 |
||
7446 | ) |
||
7447 | { |
||
7448 | #if defined(_XM_NO_INTRINSICS_) |
||
7449 | return (((V1.vector4_f32[0] != V2.vector4_f32[0]) || (V1.vector4_f32[1] != V2.vector4_f32[1]) || (V1.vector4_f32[2] != V2.vector4_f32[2])) != 0); |
||
7450 | #elif defined(_XM_SSE_INTRINSICS_) |
||
7451 | XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2); |
||
7452 | return (((_mm_movemask_ps(vTemp)&7)!=7) != 0); |
||
7453 | #else // _XM_VMX128_INTRINSICS_ |
||
7454 | return XMComparisonAnyFalse(XMVector3EqualR(V1, V2)); |
||
7455 | #endif |
||
7456 | } |
||
7457 | |||
7458 | //------------------------------------------------------------------------------ |
||
7459 | |||
7460 | XMFINLINE BOOL XMVector3NotEqualInt |
||
7461 | ( |
||
7462 | FXMVECTOR V1, |
||
7463 | FXMVECTOR V2 |
||
7464 | ) |
||
7465 | { |
||
7466 | #if defined(_XM_NO_INTRINSICS_) |
||
7467 | return (((V1.vector4_u32[0] != V2.vector4_u32[0]) || (V1.vector4_u32[1] != V2.vector4_u32[1]) || (V1.vector4_u32[2] != V2.vector4_u32[2])) != 0); |
||
7468 | #elif defined(_XM_SSE_INTRINSICS_) |
||
7469 | __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]); |
||
7470 | return (((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&7)!=7) != 0); |
||
7471 | #else // _XM_VMX128_INTRINSICS_ |
||
7472 | return XMComparisonAnyFalse(XMVector3EqualIntR(V1, V2)); |
||
7473 | #endif |
||
7474 | } |
||
7475 | |||
7476 | //------------------------------------------------------------------------------ |
||
7477 | |||
7478 | XMFINLINE BOOL XMVector3Greater |
||
7479 | ( |
||
7480 | FXMVECTOR V1, |
||
7481 | FXMVECTOR V2 |
||
7482 | ) |
||
7483 | { |
||
7484 | #if defined(_XM_NO_INTRINSICS_) |
||
7485 | return (((V1.vector4_f32[0] > V2.vector4_f32[0]) && (V1.vector4_f32[1] > V2.vector4_f32[1]) && (V1.vector4_f32[2] > V2.vector4_f32[2])) != 0); |
||
7486 | #elif defined(_XM_SSE_INTRINSICS_) |
||
7487 | XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2); |
||
7488 | return (((_mm_movemask_ps(vTemp)&7)==7) != 0); |
||
7489 | #else // _XM_VMX128_INTRINSICS_ |
||
7490 | return XMComparisonAllTrue(XMVector3GreaterR(V1, V2)); |
||
7491 | #endif |
||
7492 | } |
||
7493 | |||
7494 | //------------------------------------------------------------------------------ |
||
7495 | |||
7496 | XMFINLINE UINT XMVector3GreaterR |
||
7497 | ( |
||
7498 | FXMVECTOR V1, |
||
7499 | FXMVECTOR V2 |
||
7500 | ) |
||
7501 | { |
||
7502 | #if defined(_XM_NO_INTRINSICS_) |
||
7503 | UINT CR = 0; |
||
7504 | if ((V1.vector4_f32[0] > V2.vector4_f32[0]) && |
||
7505 | (V1.vector4_f32[1] > V2.vector4_f32[1]) && |
||
7506 | (V1.vector4_f32[2] > V2.vector4_f32[2])) |
||
7507 | { |
||
7508 | CR = XM_CRMASK_CR6TRUE; |
||
7509 | } |
||
7510 | else if ((V1.vector4_f32[0] <= V2.vector4_f32[0]) && |
||
7511 | (V1.vector4_f32[1] <= V2.vector4_f32[1]) && |
||
7512 | (V1.vector4_f32[2] <= V2.vector4_f32[2])) |
||
7513 | { |
||
7514 | CR = XM_CRMASK_CR6FALSE; |
||
7515 | } |
||
7516 | return CR; |
||
7517 | |||
7518 | #elif defined(_XM_SSE_INTRINSICS_) |
||
7519 | XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2); |
||
7520 | UINT CR = 0; |
||
7521 | int iTest = _mm_movemask_ps(vTemp)&7; |
||
7522 | if (iTest==7) |
||
7523 | { |
||
7524 | CR = XM_CRMASK_CR6TRUE; |
||
7525 | } |
||
7526 | else if (!iTest) |
||
7527 | { |
||
7528 | CR = XM_CRMASK_CR6FALSE; |
||
7529 | } |
||
7530 | return CR; |
||
7531 | #else // _XM_VMX128_INTRINSICS_ |
||
7532 | #endif // _XM_VMX128_INTRINSICS_ |
||
7533 | } |
||
7534 | |||
7535 | //------------------------------------------------------------------------------ |
||
7536 | |||
7537 | XMFINLINE BOOL XMVector3GreaterOrEqual |
||
7538 | ( |
||
7539 | FXMVECTOR V1, |
||
7540 | FXMVECTOR V2 |
||
7541 | ) |
||
7542 | { |
||
7543 | #if defined(_XM_NO_INTRINSICS_) |
||
7544 | return (((V1.vector4_f32[0] >= V2.vector4_f32[0]) && (V1.vector4_f32[1] >= V2.vector4_f32[1]) && (V1.vector4_f32[2] >= V2.vector4_f32[2])) != 0); |
||
7545 | #elif defined(_XM_SSE_INTRINSICS_) |
||
7546 | XMVECTOR vTemp = _mm_cmpge_ps(V1,V2); |
||
7547 | return (((_mm_movemask_ps(vTemp)&7)==7) != 0); |
||
7548 | #else // _XM_VMX128_INTRINSICS_ |
||
7549 | return XMComparisonAllTrue(XMVector3GreaterOrEqualR(V1, V2)); |
||
7550 | #endif |
||
7551 | } |
||
7552 | |||
7553 | //------------------------------------------------------------------------------ |
||
7554 | |||
7555 | XMFINLINE UINT XMVector3GreaterOrEqualR |
||
7556 | ( |
||
7557 | FXMVECTOR V1, |
||
7558 | FXMVECTOR V2 |
||
7559 | ) |
||
7560 | { |
||
7561 | #if defined(_XM_NO_INTRINSICS_) |
||
7562 | |||
7563 | UINT CR = 0; |
||
7564 | if ((V1.vector4_f32[0] >= V2.vector4_f32[0]) && |
||
7565 | (V1.vector4_f32[1] >= V2.vector4_f32[1]) && |
||
7566 | (V1.vector4_f32[2] >= V2.vector4_f32[2])) |
||
7567 | { |
||
7568 | CR = XM_CRMASK_CR6TRUE; |
||
7569 | } |
||
7570 | else if ((V1.vector4_f32[0] < V2.vector4_f32[0]) && |
||
7571 | (V1.vector4_f32[1] < V2.vector4_f32[1]) && |
||
7572 | (V1.vector4_f32[2] < V2.vector4_f32[2])) |
||
7573 | { |
||
7574 | CR = XM_CRMASK_CR6FALSE; |
||
7575 | } |
||
7576 | return CR; |
||
7577 | |||
7578 | #elif defined(_XM_SSE_INTRINSICS_) |
||
7579 | XMVECTOR vTemp = _mm_cmpge_ps(V1,V2); |
||
7580 | UINT CR = 0; |
||
7581 | int iTest = _mm_movemask_ps(vTemp)&7; |
||
7582 | if (iTest==7) |
||
7583 | { |
||
7584 | CR = XM_CRMASK_CR6TRUE; |
||
7585 | } |
||
7586 | else if (!iTest) |
||
7587 | { |
||
7588 | CR = XM_CRMASK_CR6FALSE; |
||
7589 | } |
||
7590 | return CR; |
||
7591 | #else // _XM_VMX128_INTRINSICS_ |
||
7592 | #endif // _XM_VMX128_INTRINSICS_ |
||
7593 | } |
||
7594 | |||
7595 | //------------------------------------------------------------------------------ |
||
7596 | |||
7597 | XMFINLINE BOOL XMVector3Less |
||
7598 | ( |
||
7599 | FXMVECTOR V1, |
||
7600 | FXMVECTOR V2 |
||
7601 | ) |
||
7602 | { |
||
7603 | #if defined(_XM_NO_INTRINSICS_) |
||
7604 | return (((V1.vector4_f32[0] < V2.vector4_f32[0]) && (V1.vector4_f32[1] < V2.vector4_f32[1]) && (V1.vector4_f32[2] < V2.vector4_f32[2])) != 0); |
||
7605 | #elif defined(_XM_SSE_INTRINSICS_) |
||
7606 | XMVECTOR vTemp = _mm_cmplt_ps(V1,V2); |
||
7607 | return (((_mm_movemask_ps(vTemp)&7)==7) != 0); |
||
7608 | #else // _XM_VMX128_INTRINSICS_ |
||
7609 | return XMComparisonAllTrue(XMVector3GreaterR(V2, V1)); |
||
7610 | #endif |
||
7611 | } |
||
7612 | |||
7613 | //------------------------------------------------------------------------------ |
||
7614 | |||
7615 | XMFINLINE BOOL XMVector3LessOrEqual |
||
7616 | ( |
||
7617 | FXMVECTOR V1, |
||
7618 | FXMVECTOR V2 |
||
7619 | ) |
||
7620 | { |
||
7621 | #if defined(_XM_NO_INTRINSICS_) |
||
7622 | return (((V1.vector4_f32[0] <= V2.vector4_f32[0]) && (V1.vector4_f32[1] <= V2.vector4_f32[1]) && (V1.vector4_f32[2] <= V2.vector4_f32[2])) != 0); |
||
7623 | #elif defined(_XM_SSE_INTRINSICS_) |
||
7624 | XMVECTOR vTemp = _mm_cmple_ps(V1,V2); |
||
7625 | return (((_mm_movemask_ps(vTemp)&7)==7) != 0); |
||
7626 | #else // _XM_VMX128_INTRINSICS_ |
||
7627 | return XMComparisonAllTrue(XMVector3GreaterOrEqualR(V2, V1)); |
||
7628 | #endif |
||
7629 | } |
||
7630 | |||
7631 | //------------------------------------------------------------------------------ |
||
7632 | |||
7633 | XMFINLINE BOOL XMVector3InBounds |
||
7634 | ( |
||
7635 | FXMVECTOR V, |
||
7636 | FXMVECTOR Bounds |
||
7637 | ) |
||
7638 | { |
||
7639 | #if defined(_XM_NO_INTRINSICS_) |
||
7640 | return (((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) && |
||
7641 | (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) && |
||
7642 | (V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2])) != 0); |
||
7643 | #elif defined(_XM_SSE_INTRINSICS_) |
||
7644 | // Test if less than or equal |
||
7645 | XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds); |
||
7646 | // Negate the bounds |
||
7647 | XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne); |
||
7648 | // Test if greater or equal (Reversed) |
||
7649 | vTemp2 = _mm_cmple_ps(vTemp2,V); |
||
7650 | // Blend answers |
||
7651 | vTemp1 = _mm_and_ps(vTemp1,vTemp2); |
||
7652 | // x,y and z in bounds? (w is don't care) |
||
7653 | return (((_mm_movemask_ps(vTemp1)&0x7)==0x7) != 0); |
||
7654 | #else |
||
7655 | return XMComparisonAllInBounds(XMVector3InBoundsR(V, Bounds)); |
||
7656 | #endif |
||
7657 | } |
||
7658 | |||
7659 | //------------------------------------------------------------------------------ |
||
7660 | |||
7661 | XMFINLINE UINT XMVector3InBoundsR |
||
7662 | ( |
||
7663 | FXMVECTOR V, |
||
7664 | FXMVECTOR Bounds |
||
7665 | ) |
||
7666 | { |
||
7667 | #if defined(_XM_NO_INTRINSICS_) |
||
7668 | UINT CR = 0; |
||
7669 | if ((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) && |
||
7670 | (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) && |
||
7671 | (V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2])) |
||
7672 | { |
||
7673 | CR = XM_CRMASK_CR6BOUNDS; |
||
7674 | } |
||
7675 | return CR; |
||
7676 | |||
7677 | #elif defined(_XM_SSE_INTRINSICS_) |
||
7678 | // Test if less than or equal |
||
7679 | XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds); |
||
7680 | // Negate the bounds |
||
7681 | XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne); |
||
7682 | // Test if greater or equal (Reversed) |
||
7683 | vTemp2 = _mm_cmple_ps(vTemp2,V); |
||
7684 | // Blend answers |
||
7685 | vTemp1 = _mm_and_ps(vTemp1,vTemp2); |
||
7686 | // x,y and z in bounds? (w is don't care) |
||
7687 | return ((_mm_movemask_ps(vTemp1)&0x7)==0x7) ? XM_CRMASK_CR6BOUNDS : 0; |
||
7688 | #else // _XM_VMX128_INTRINSICS_ |
||
7689 | #endif // _XM_VMX128_INTRINSICS_ |
||
7690 | } |
||
7691 | |||
7692 | //------------------------------------------------------------------------------ |
||
7693 | |||
7694 | XMFINLINE BOOL XMVector3IsNaN |
||
7695 | ( |
||
7696 | FXMVECTOR V |
||
7697 | ) |
||
7698 | { |
||
7699 | #if defined(_XM_NO_INTRINSICS_) |
||
7700 | |||
7701 | return (XMISNAN(V.vector4_f32[0]) || |
||
7702 | XMISNAN(V.vector4_f32[1]) || |
||
7703 | XMISNAN(V.vector4_f32[2])); |
||
7704 | |||
7705 | #elif defined(_XM_SSE_INTRINSICS_) |
||
7706 | // Mask off the exponent |
||
7707 | __m128i vTempInf = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMInfinity); |
||
7708 | // Mask off the mantissa |
||
7709 | __m128i vTempNan = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMQNaNTest); |
||
7710 | // Are any of the exponents == 0x7F800000? |
||
7711 | vTempInf = _mm_cmpeq_epi32(vTempInf,g_XMInfinity); |
||
7712 | // Are any of the mantissa's zero? (SSE2 doesn't have a neq test) |
||
7713 | vTempNan = _mm_cmpeq_epi32(vTempNan,g_XMZero); |
||
7714 | // Perform a not on the NaN test to be true on NON-zero mantissas |
||
7715 | vTempNan = _mm_andnot_si128(vTempNan,vTempInf); |
||
7716 | // If x, y or z are NaN, the signs are true after the merge above |
||
7717 | return ((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTempNan)[0])&7) != 0); |
||
7718 | #else // _XM_VMX128_INTRINSICS_ |
||
7719 | #endif // _XM_VMX128_INTRINSICS_ |
||
7720 | } |
||
7721 | |||
7722 | //------------------------------------------------------------------------------ |
||
7723 | |||
7724 | XMFINLINE BOOL XMVector3IsInfinite |
||
7725 | ( |
||
7726 | FXMVECTOR V |
||
7727 | ) |
||
7728 | { |
||
7729 | #if defined(_XM_NO_INTRINSICS_) |
||
7730 | return (XMISINF(V.vector4_f32[0]) || |
||
7731 | XMISINF(V.vector4_f32[1]) || |
||
7732 | XMISINF(V.vector4_f32[2])); |
||
7733 | #elif defined(_XM_SSE_INTRINSICS_) |
||
7734 | // Mask off the sign bit |
||
7735 | __m128 vTemp = _mm_and_ps(V,g_XMAbsMask); |
||
7736 | // Compare to infinity |
||
7737 | vTemp = _mm_cmpeq_ps(vTemp,g_XMInfinity); |
||
7738 | // If x,y or z are infinity, the signs are true. |
||
7739 | return ((_mm_movemask_ps(vTemp)&7) != 0); |
||
7740 | #else // _XM_VMX128_INTRINSICS_ |
||
7741 | #endif // _XM_VMX128_INTRINSICS_ |
||
7742 | } |
||
7743 | |||
7744 | //------------------------------------------------------------------------------ |
||
7745 | // Computation operations |
||
7746 | //------------------------------------------------------------------------------ |
||
7747 | |||
7748 | //------------------------------------------------------------------------------ |
||
7749 | |||
7750 | XMFINLINE XMVECTOR XMVector3Dot |
||
7751 | ( |
||
7752 | FXMVECTOR V1, |
||
7753 | FXMVECTOR V2 |
||
7754 | ) |
||
7755 | { |
||
7756 | #if defined(_XM_NO_INTRINSICS_) |
||
7757 | FLOAT fValue = V1.vector4_f32[0] * V2.vector4_f32[0] + V1.vector4_f32[1] * V2.vector4_f32[1] + V1.vector4_f32[2] * V2.vector4_f32[2]; |
||
7758 | XMVECTOR vResult = { |
||
7759 | fValue, |
||
7760 | fValue, |
||
7761 | fValue, |
||
7762 | fValue |
||
7763 | }; |
||
7764 | return vResult; |
||
7765 | |||
7766 | #elif defined(_XM_SSE_INTRINSICS_) |
||
7767 | // Perform the dot product |
||
7768 | XMVECTOR vDot = _mm_mul_ps(V1,V2); |
||
7769 | // x=Dot.vector4_f32[1], y=Dot.vector4_f32[2] |
||
7770 | XMVECTOR vTemp = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(2,1,2,1)); |
||
7771 | // Result.vector4_f32[0] = x+y |
||
7772 | vDot = _mm_add_ss(vDot,vTemp); |
||
7773 | // x=Dot.vector4_f32[2] |
||
7774 | vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1)); |
||
7775 | // Result.vector4_f32[0] = (x+y)+z |
||
7776 | vDot = _mm_add_ss(vDot,vTemp); |
||
7777 | // Splat x |
||
7778 | return _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(0,0,0,0)); |
||
7779 | #else // _XM_VMX128_INTRINSICS_ |
||
7780 | #endif // _XM_VMX128_INTRINSICS_ |
||
7781 | } |
||
7782 | |||
7783 | //------------------------------------------------------------------------------ |
||
7784 | |||
7785 | XMFINLINE XMVECTOR XMVector3Cross |
||
7786 | ( |
||
7787 | FXMVECTOR V1, |
||
7788 | FXMVECTOR V2 |
||
7789 | ) |
||
7790 | { |
||
7791 | #if defined(_XM_NO_INTRINSICS_) |
||
7792 | XMVECTOR vResult = { |
||
7793 | (V1.vector4_f32[1] * V2.vector4_f32[2]) - (V1.vector4_f32[2] * V2.vector4_f32[1]), |
||
7794 | (V1.vector4_f32[2] * V2.vector4_f32[0]) - (V1.vector4_f32[0] * V2.vector4_f32[2]), |
||
7795 | (V1.vector4_f32[0] * V2.vector4_f32[1]) - (V1.vector4_f32[1] * V2.vector4_f32[0]), |
||
7796 | 0.0f |
||
7797 | }; |
||
7798 | return vResult; |
||
7799 | |||
7800 | #elif defined(_XM_SSE_INTRINSICS_) |
||
7801 | // y1,z1,x1,w1 |
||
7802 | XMVECTOR vTemp1 = _mm_shuffle_ps(V1,V1,_MM_SHUFFLE(3,0,2,1)); |
||
7803 | // z2,x2,y2,w2 |
||
7804 | XMVECTOR vTemp2 = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(3,1,0,2)); |
||
7805 | // Perform the left operation |
||
7806 | XMVECTOR vResult = _mm_mul_ps(vTemp1,vTemp2); |
||
7807 | // z1,x1,y1,w1 |
||
7808 | vTemp1 = _mm_shuffle_ps(vTemp1,vTemp1,_MM_SHUFFLE(3,0,2,1)); |
||
7809 | // y2,z2,x2,w2 |
||
7810 | vTemp2 = _mm_shuffle_ps(vTemp2,vTemp2,_MM_SHUFFLE(3,1,0,2)); |
||
7811 | // Perform the right operation |
||
7812 | vTemp1 = _mm_mul_ps(vTemp1,vTemp2); |
||
7813 | // Subract the right from left, and return answer |
||
7814 | vResult = _mm_sub_ps(vResult,vTemp1); |
||
7815 | // Set w to zero |
||
7816 | return _mm_and_ps(vResult,g_XMMask3); |
||
7817 | #else // _XM_VMX128_INTRINSICS_ |
||
7818 | #endif // _XM_VMX128_INTRINSICS_ |
||
7819 | } |
||
7820 | |||
7821 | //------------------------------------------------------------------------------ |
||
7822 | |||
7823 | XMFINLINE XMVECTOR XMVector3LengthSq |
||
7824 | ( |
||
7825 | FXMVECTOR V |
||
7826 | ) |
||
7827 | { |
||
7828 | return XMVector3Dot(V, V); |
||
7829 | } |
||
7830 | |||
7831 | //------------------------------------------------------------------------------ |
||
7832 | |||
7833 | XMFINLINE XMVECTOR XMVector3ReciprocalLengthEst |
||
7834 | ( |
||
7835 | FXMVECTOR V |
||
7836 | ) |
||
7837 | { |
||
7838 | #if defined(_XM_NO_INTRINSICS_) |
||
7839 | |||
7840 | XMVECTOR Result; |
||
7841 | |||
7842 | Result = XMVector3LengthSq(V); |
||
7843 | Result = XMVectorReciprocalSqrtEst(Result); |
||
7844 | |||
7845 | return Result; |
||
7846 | |||
7847 | #elif defined(_XM_SSE_INTRINSICS_) |
||
7848 | // Perform the dot product on x,y and z |
||
7849 | XMVECTOR vLengthSq = _mm_mul_ps(V,V); |
||
7850 | // vTemp has z and y |
||
7851 | XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,2,1,2)); |
||
7852 | // x+z, y |
||
7853 | vLengthSq = _mm_add_ss(vLengthSq,vTemp); |
||
7854 | // y,y,y,y |
||
7855 | vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1)); |
||
7856 | // x+z+y,??,??,?? |
||
7857 | vLengthSq = _mm_add_ss(vLengthSq,vTemp); |
||
7858 | // Splat the length squared |
||
7859 | vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0)); |
||
7860 | // Get the reciprocal |
||
7861 | vLengthSq = _mm_rsqrt_ps(vLengthSq); |
||
7862 | return vLengthSq; |
||
7863 | #else // _XM_VMX128_INTRINSICS_ |
||
7864 | #endif // _XM_VMX128_INTRINSICS_ |
||
7865 | } |
||
7866 | |||
7867 | //------------------------------------------------------------------------------ |
||
7868 | |||
7869 | XMFINLINE XMVECTOR XMVector3ReciprocalLength |
||
7870 | ( |
||
7871 | FXMVECTOR V |
||
7872 | ) |
||
7873 | { |
||
7874 | #if defined(_XM_NO_INTRINSICS_) |
||
7875 | |||
7876 | XMVECTOR Result; |
||
7877 | |||
7878 | Result = XMVector3LengthSq(V); |
||
7879 | Result = XMVectorReciprocalSqrt(Result); |
||
7880 | |||
7881 | return Result; |
||
7882 | |||
7883 | #elif defined(_XM_SSE_INTRINSICS_) |
||
7884 | // Perform the dot product |
||
7885 | XMVECTOR vDot = _mm_mul_ps(V,V); |
||
7886 | // x=Dot.y, y=Dot.z |
||
7887 | XMVECTOR vTemp = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(2,1,2,1)); |
||
7888 | // Result.x = x+y |
||
7889 | vDot = _mm_add_ss(vDot,vTemp); |
||
7890 | // x=Dot.z |
||
7891 | vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1)); |
||
7892 | // Result.x = (x+y)+z |
||
7893 | vDot = _mm_add_ss(vDot,vTemp); |
||
7894 | // Splat x |
||
7895 | vDot = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(0,0,0,0)); |
||
7896 | // Get the reciprocal |
||
7897 | vDot = _mm_sqrt_ps(vDot); |
||
7898 | // Get the reciprocal |
||
7899 | vDot = _mm_div_ps(g_XMOne,vDot); |
||
7900 | return vDot; |
||
7901 | #else // _XM_VMX128_INTRINSICS_ |
||
7902 | #endif // _XM_VMX128_INTRINSICS_ |
||
7903 | } |
||
7904 | |||
7905 | //------------------------------------------------------------------------------ |
||
7906 | |||
7907 | XMFINLINE XMVECTOR XMVector3LengthEst |
||
7908 | ( |
||
7909 | FXMVECTOR V |
||
7910 | ) |
||
7911 | { |
||
7912 | #if defined(_XM_NO_INTRINSICS_) |
||
7913 | |||
7914 | XMVECTOR Result; |
||
7915 | |||
7916 | Result = XMVector3LengthSq(V); |
||
7917 | Result = XMVectorSqrtEst(Result); |
||
7918 | |||
7919 | return Result; |
||
7920 | |||
7921 | #elif defined(_XM_SSE_INTRINSICS_) |
||
7922 | // Perform the dot product on x,y and z |
||
7923 | XMVECTOR vLengthSq = _mm_mul_ps(V,V); |
||
7924 | // vTemp has z and y |
||
7925 | XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,2,1,2)); |
||
7926 | // x+z, y |
||
7927 | vLengthSq = _mm_add_ss(vLengthSq,vTemp); |
||
7928 | // y,y,y,y |
||
7929 | vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1)); |
||
7930 | // x+z+y,??,??,?? |
||
7931 | vLengthSq = _mm_add_ss(vLengthSq,vTemp); |
||
7932 | // Splat the length squared |
||
7933 | vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0)); |
||
7934 | // Get the length |
||
7935 | vLengthSq = _mm_sqrt_ps(vLengthSq); |
||
7936 | return vLengthSq; |
||
7937 | #else // _XM_VMX128_INTRINSICS_ |
||
7938 | #endif // _XM_VMX128_INTRINSICS_ |
||
7939 | } |
||
7940 | |||
7941 | //------------------------------------------------------------------------------ |
||
7942 | |||
7943 | XMFINLINE XMVECTOR XMVector3Length |
||
7944 | ( |
||
7945 | FXMVECTOR V |
||
7946 | ) |
||
7947 | { |
||
7948 | #if defined(_XM_NO_INTRINSICS_) |
||
7949 | |||
7950 | XMVECTOR Result; |
||
7951 | |||
7952 | Result = XMVector3LengthSq(V); |
||
7953 | Result = XMVectorSqrt(Result); |
||
7954 | |||
7955 | return Result; |
||
7956 | |||
7957 | #elif defined(_XM_SSE_INTRINSICS_) |
||
7958 | // Perform the dot product on x,y and z |
||
7959 | XMVECTOR vLengthSq = _mm_mul_ps(V,V); |
||
7960 | // vTemp has z and y |
||
7961 | XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,2,1,2)); |
||
7962 | // x+z, y |
||
7963 | vLengthSq = _mm_add_ss(vLengthSq,vTemp); |
||
7964 | // y,y,y,y |
||
7965 | vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1)); |
||
7966 | // x+z+y,??,??,?? |
||
7967 | vLengthSq = _mm_add_ss(vLengthSq,vTemp); |
||
7968 | // Splat the length squared |
||
7969 | vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0)); |
||
7970 | // Get the length |
||
7971 | vLengthSq = _mm_sqrt_ps(vLengthSq); |
||
7972 | return vLengthSq; |
||
7973 | #else // _XM_VMX128_INTRINSICS_ |
||
7974 | #endif // _XM_VMX128_INTRINSICS_ |
||
7975 | } |
||
7976 | |||
7977 | //------------------------------------------------------------------------------ |
||
7978 | // XMVector3NormalizeEst uses a reciprocal estimate and |
||
7979 | // returns QNaN on zero and infinite vectors. |
||
7980 | |||
7981 | XMFINLINE XMVECTOR XMVector3NormalizeEst |
||
7982 | ( |
||
7983 | FXMVECTOR V |
||
7984 | ) |
||
7985 | { |
||
7986 | #if defined(_XM_NO_INTRINSICS_) |
||
7987 | |||
7988 | XMVECTOR Result; |
||
7989 | Result = XMVector3ReciprocalLength(V); |
||
7990 | Result = XMVectorMultiply(V, Result); |
||
7991 | return Result; |
||
7992 | |||
7993 | #elif defined(_XM_SSE_INTRINSICS_) |
||
7994 | // Perform the dot product |
||
7995 | XMVECTOR vDot = _mm_mul_ps(V,V); |
||
7996 | // x=Dot.y, y=Dot.z |
||
7997 | XMVECTOR vTemp = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(2,1,2,1)); |
||
7998 | // Result.x = x+y |
||
7999 | vDot = _mm_add_ss(vDot,vTemp); |
||
8000 | // x=Dot.z |
||
8001 | vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1)); |
||
8002 | // Result.x = (x+y)+z |
||
8003 | vDot = _mm_add_ss(vDot,vTemp); |
||
8004 | // Splat x |
||
8005 | vDot = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(0,0,0,0)); |
||
8006 | // Get the reciprocal |
||
8007 | vDot = _mm_rsqrt_ps(vDot); |
||
8008 | // Perform the normalization |
||
8009 | vDot = _mm_mul_ps(vDot,V); |
||
8010 | return vDot; |
||
8011 | #else // _XM_VMX128_INTRINSICS_ |
||
8012 | #endif // _XM_VMX128_INTRINSICS_ |
||
8013 | } |
||
8014 | |||
8015 | //------------------------------------------------------------------------------ |
||
8016 | |||
8017 | XMFINLINE XMVECTOR XMVector3Normalize |
||
8018 | ( |
||
8019 | FXMVECTOR V |
||
8020 | ) |
||
8021 | { |
||
8022 | #if defined(_XM_NO_INTRINSICS_) |
||
8023 | FLOAT fLengthSq; |
||
8024 | XMVECTOR vResult; |
||
8025 | |||
8026 | fLengthSq = sqrtf((V.vector4_f32[0]*V.vector4_f32[0])+(V.vector4_f32[1]*V.vector4_f32[1])+(V.vector4_f32[2]*V.vector4_f32[2])); |
||
8027 | // Prevent divide by zero |
||
8028 | if (fLengthSq) { |
||
8029 | fLengthSq = 1.0f/fLengthSq; |
||
8030 | } |
||
8031 | |||
8032 | vResult.vector4_f32[0] = V.vector4_f32[0]*fLengthSq; |
||
8033 | vResult.vector4_f32[1] = V.vector4_f32[1]*fLengthSq; |
||
8034 | vResult.vector4_f32[2] = V.vector4_f32[2]*fLengthSq; |
||
8035 | vResult.vector4_f32[3] = V.vector4_f32[3]*fLengthSq; |
||
8036 | return vResult; |
||
8037 | |||
8038 | #elif defined(_XM_SSE_INTRINSICS_) |
||
8039 | // Perform the dot product on x,y and z only |
||
8040 | XMVECTOR vLengthSq = _mm_mul_ps(V,V); |
||
8041 | XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,1,2,1)); |
||
8042 | vLengthSq = _mm_add_ss(vLengthSq,vTemp); |
||
8043 | vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1)); |
||
8044 | vLengthSq = _mm_add_ss(vLengthSq,vTemp); |
||
8045 | vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0)); |
||
8046 | // Prepare for the division |
||
8047 | XMVECTOR vResult = _mm_sqrt_ps(vLengthSq); |
||
8048 | // Failsafe on zero (Or epsilon) length planes |
||
8049 | // If the length is infinity, set the elements to zero |
||
8050 | vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity); |
||
8051 | // Divide to perform the normalization |
||
8052 | vResult = _mm_div_ps(V,vResult); |
||
8053 | // Any that are infinity, set to zero |
||
8054 | vResult = _mm_and_ps(vResult,vLengthSq); |
||
8055 | return vResult; |
||
8056 | #else // _XM_VMX128_INTRINSICS_ |
||
8057 | #endif // _XM_VMX128_INTRINSICS_ |
||
8058 | } |
||
8059 | |||
8060 | //------------------------------------------------------------------------------ |
||
8061 | |||
8062 | XMFINLINE XMVECTOR XMVector3ClampLength |
||
8063 | ( |
||
8064 | FXMVECTOR V, |
||
8065 | FLOAT LengthMin, |
||
8066 | FLOAT LengthMax |
||
8067 | ) |
||
8068 | { |
||
8069 | #if defined(_XM_NO_INTRINSICS_) |
||
8070 | |||
8071 | XMVECTOR ClampMax; |
||
8072 | XMVECTOR ClampMin; |
||
8073 | |||
8074 | ClampMax = XMVectorReplicate(LengthMax); |
||
8075 | ClampMin = XMVectorReplicate(LengthMin); |
||
8076 | |||
8077 | return XMVector3ClampLengthV(V, ClampMin, ClampMax); |
||
8078 | |||
8079 | #elif defined(_XM_SSE_INTRINSICS_) |
||
8080 | XMVECTOR ClampMax = _mm_set_ps1(LengthMax); |
||
8081 | XMVECTOR ClampMin = _mm_set_ps1(LengthMin); |
||
8082 | return XMVector3ClampLengthV(V,ClampMin,ClampMax); |
||
8083 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
8084 | #endif // _XM_VMX128_INTRINSICS_ |
||
8085 | } |
||
8086 | |||
8087 | //------------------------------------------------------------------------------ |
||
8088 | |||
8089 | XMFINLINE XMVECTOR XMVector3ClampLengthV |
||
8090 | ( |
||
8091 | FXMVECTOR V, |
||
8092 | FXMVECTOR LengthMin, |
||
8093 | FXMVECTOR LengthMax |
||
8094 | ) |
||
8095 | { |
||
8096 | #if defined(_XM_NO_INTRINSICS_) |
||
8097 | |||
8098 | XMVECTOR ClampLength; |
||
8099 | XMVECTOR LengthSq; |
||
8100 | XMVECTOR RcpLength; |
||
8101 | XMVECTOR Length; |
||
8102 | XMVECTOR Normal; |
||
8103 | XMVECTOR Zero; |
||
8104 | XMVECTOR InfiniteLength; |
||
8105 | XMVECTOR ZeroLength; |
||
8106 | XMVECTOR Select; |
||
8107 | XMVECTOR ControlMax; |
||
8108 | XMVECTOR ControlMin; |
||
8109 | XMVECTOR Control; |
||
8110 | XMVECTOR Result; |
||
8111 | |||
8112 | XMASSERT((LengthMin.vector4_f32[1] == LengthMin.vector4_f32[0]) && (LengthMin.vector4_f32[2] == LengthMin.vector4_f32[0])); |
||
8113 | XMASSERT((LengthMax.vector4_f32[1] == LengthMax.vector4_f32[0]) && (LengthMax.vector4_f32[2] == LengthMax.vector4_f32[0])); |
||
8114 | XMASSERT(XMVector3GreaterOrEqual(LengthMin, XMVectorZero())); |
||
8115 | XMASSERT(XMVector3GreaterOrEqual(LengthMax, XMVectorZero())); |
||
8116 | XMASSERT(XMVector3GreaterOrEqual(LengthMax, LengthMin)); |
||
8117 | |||
8118 | LengthSq = XMVector3LengthSq(V); |
||
8119 | |||
8120 | Zero = XMVectorZero(); |
||
8121 | |||
8122 | RcpLength = XMVectorReciprocalSqrt(LengthSq); |
||
8123 | |||
8124 | InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity.v); |
||
8125 | ZeroLength = XMVectorEqual(LengthSq, Zero); |
||
8126 | |||
8127 | Normal = XMVectorMultiply(V, RcpLength); |
||
8128 | |||
8129 | Length = XMVectorMultiply(LengthSq, RcpLength); |
||
8130 | |||
8131 | Select = XMVectorEqualInt(InfiniteLength, ZeroLength); |
||
8132 | Length = XMVectorSelect(LengthSq, Length, Select); |
||
8133 | Normal = XMVectorSelect(LengthSq, Normal, Select); |
||
8134 | |||
8135 | ControlMax = XMVectorGreater(Length, LengthMax); |
||
8136 | ControlMin = XMVectorLess(Length, LengthMin); |
||
8137 | |||
8138 | ClampLength = XMVectorSelect(Length, LengthMax, ControlMax); |
||
8139 | ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin); |
||
8140 | |||
8141 | Result = XMVectorMultiply(Normal, ClampLength); |
||
8142 | |||
8143 | // Preserve the original vector (with no precision loss) if the length falls within the given range |
||
8144 | Control = XMVectorEqualInt(ControlMax, ControlMin); |
||
8145 | Result = XMVectorSelect(Result, V, Control); |
||
8146 | |||
8147 | return Result; |
||
8148 | |||
8149 | #elif defined(_XM_SSE_INTRINSICS_) |
||
8150 | XMVECTOR ClampLength; |
||
8151 | XMVECTOR LengthSq; |
||
8152 | XMVECTOR RcpLength; |
||
8153 | XMVECTOR Length; |
||
8154 | XMVECTOR Normal; |
||
8155 | XMVECTOR InfiniteLength; |
||
8156 | XMVECTOR ZeroLength; |
||
8157 | XMVECTOR Select; |
||
8158 | XMVECTOR ControlMax; |
||
8159 | XMVECTOR ControlMin; |
||
8160 | XMVECTOR Control; |
||
8161 | XMVECTOR Result; |
||
8162 | |||
8163 | XMASSERT((XMVectorGetY(LengthMin) == XMVectorGetX(LengthMin)) && (XMVectorGetZ(LengthMin) == XMVectorGetX(LengthMin))); |
||
8164 | XMASSERT((XMVectorGetY(LengthMax) == XMVectorGetX(LengthMax)) && (XMVectorGetZ(LengthMax) == XMVectorGetX(LengthMax))); |
||
8165 | XMASSERT(XMVector3GreaterOrEqual(LengthMin, g_XMZero)); |
||
8166 | XMASSERT(XMVector3GreaterOrEqual(LengthMax, g_XMZero)); |
||
8167 | XMASSERT(XMVector3GreaterOrEqual(LengthMax, LengthMin)); |
||
8168 | |||
8169 | LengthSq = XMVector3LengthSq(V); |
||
8170 | RcpLength = XMVectorReciprocalSqrt(LengthSq); |
||
8171 | InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity); |
||
8172 | ZeroLength = XMVectorEqual(LengthSq,g_XMZero); |
||
8173 | Normal = _mm_mul_ps(V, RcpLength); |
||
8174 | Length = _mm_mul_ps(LengthSq, RcpLength); |
||
8175 | Select = XMVectorEqualInt(InfiniteLength, ZeroLength); |
||
8176 | Length = XMVectorSelect(LengthSq, Length, Select); |
||
8177 | Normal = XMVectorSelect(LengthSq, Normal, Select); |
||
8178 | ControlMax = XMVectorGreater(Length, LengthMax); |
||
8179 | ControlMin = XMVectorLess(Length, LengthMin); |
||
8180 | ClampLength = XMVectorSelect(Length, LengthMax, ControlMax); |
||
8181 | ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin); |
||
8182 | Result = _mm_mul_ps(Normal, ClampLength); |
||
8183 | // Preserve the original vector (with no precision loss) if the length falls within the given range |
||
8184 | Control = XMVectorEqualInt(ControlMax, ControlMin); |
||
8185 | Result = XMVectorSelect(Result, V, Control); |
||
8186 | return Result; |
||
8187 | #else // _XM_VMX128_INTRINSICS_ |
||
8188 | #endif // _XM_VMX128_INTRINSICS_ |
||
8189 | } |
||
8190 | |||
8191 | //------------------------------------------------------------------------------ |
||
8192 | |||
8193 | XMFINLINE XMVECTOR XMVector3Reflect |
||
8194 | ( |
||
8195 | FXMVECTOR Incident, |
||
8196 | FXMVECTOR Normal |
||
8197 | ) |
||
8198 | { |
||
8199 | #if defined(_XM_NO_INTRINSICS_) |
||
8200 | |||
8201 | XMVECTOR Result; |
||
8202 | |||
8203 | // Result = Incident - (2 * dot(Incident, Normal)) * Normal |
||
8204 | Result = XMVector3Dot(Incident, Normal); |
||
8205 | Result = XMVectorAdd(Result, Result); |
||
8206 | Result = XMVectorNegativeMultiplySubtract(Result, Normal, Incident); |
||
8207 | |||
8208 | return Result; |
||
8209 | |||
8210 | #elif defined(_XM_SSE_INTRINSICS_) |
||
8211 | // Result = Incident - (2 * dot(Incident, Normal)) * Normal |
||
8212 | XMVECTOR Result = XMVector3Dot(Incident, Normal); |
||
8213 | Result = _mm_add_ps(Result, Result); |
||
8214 | Result = _mm_mul_ps(Result, Normal); |
||
8215 | Result = _mm_sub_ps(Incident,Result); |
||
8216 | return Result; |
||
8217 | #else // _XM_VMX128_INTRINSICS_ |
||
8218 | #endif // _XM_VMX128_INTRINSICS_ |
||
8219 | } |
||
8220 | |||
8221 | //------------------------------------------------------------------------------ |
||
8222 | |||
8223 | XMFINLINE XMVECTOR XMVector3Refract |
||
8224 | ( |
||
8225 | FXMVECTOR Incident, |
||
8226 | FXMVECTOR Normal, |
||
8227 | FLOAT RefractionIndex |
||
8228 | ) |
||
8229 | { |
||
8230 | #if defined(_XM_NO_INTRINSICS_) |
||
8231 | |||
8232 | XMVECTOR Index; |
||
8233 | Index = XMVectorReplicate(RefractionIndex); |
||
8234 | return XMVector3RefractV(Incident, Normal, Index); |
||
8235 | |||
8236 | #elif defined(_XM_SSE_INTRINSICS_) |
||
8237 | XMVECTOR Index = _mm_set_ps1(RefractionIndex); |
||
8238 | return XMVector3RefractV(Incident,Normal,Index); |
||
8239 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
8240 | #endif // _XM_VMX128_INTRINSICS_ |
||
8241 | } |
||
8242 | |||
8243 | //------------------------------------------------------------------------------ |
||
8244 | |||
8245 | XMFINLINE XMVECTOR XMVector3RefractV |
||
8246 | ( |
||
8247 | FXMVECTOR Incident, |
||
8248 | FXMVECTOR Normal, |
||
8249 | FXMVECTOR RefractionIndex |
||
8250 | ) |
||
8251 | { |
||
8252 | #if defined(_XM_NO_INTRINSICS_) |
||
8253 | |||
8254 | XMVECTOR IDotN; |
||
8255 | XMVECTOR R; |
||
8256 | CONST XMVECTOR Zero = XMVectorZero(); |
||
8257 | |||
8258 | // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) + |
||
8259 | // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal)))) |
||
8260 | |||
8261 | IDotN = XMVector3Dot(Incident, Normal); |
||
8262 | |||
8263 | // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN) |
||
8264 | R = XMVectorNegativeMultiplySubtract(IDotN, IDotN, g_XMOne.v); |
||
8265 | R = XMVectorMultiply(R, RefractionIndex); |
||
8266 | R = XMVectorNegativeMultiplySubtract(R, RefractionIndex, g_XMOne.v); |
||
8267 | |||
8268 | if (XMVector4LessOrEqual(R, Zero)) |
||
8269 | { |
||
8270 | // Total internal reflection |
||
8271 | return Zero; |
||
8272 | } |
||
8273 | else |
||
8274 | { |
||
8275 | XMVECTOR Result; |
||
8276 | |||
8277 | // R = RefractionIndex * IDotN + sqrt(R) |
||
8278 | R = XMVectorSqrt(R); |
||
8279 | R = XMVectorMultiplyAdd(RefractionIndex, IDotN, R); |
||
8280 | |||
8281 | // Result = RefractionIndex * Incident - Normal * R |
||
8282 | Result = XMVectorMultiply(RefractionIndex, Incident); |
||
8283 | Result = XMVectorNegativeMultiplySubtract(Normal, R, Result); |
||
8284 | |||
8285 | return Result; |
||
8286 | } |
||
8287 | |||
8288 | #elif defined(_XM_SSE_INTRINSICS_) |
||
8289 | // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) + |
||
8290 | // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal)))) |
||
8291 | XMVECTOR IDotN = XMVector3Dot(Incident, Normal); |
||
8292 | // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN) |
||
8293 | XMVECTOR R = _mm_mul_ps(IDotN, IDotN); |
||
8294 | R = _mm_sub_ps(g_XMOne,R); |
||
8295 | R = _mm_mul_ps(R, RefractionIndex); |
||
8296 | R = _mm_mul_ps(R, RefractionIndex); |
||
8297 | R = _mm_sub_ps(g_XMOne,R); |
||
8298 | |||
8299 | XMVECTOR vResult = _mm_cmple_ps(R,g_XMZero); |
||
8300 | if (_mm_movemask_ps(vResult)==0x0f) |
||
8301 | { |
||
8302 | // Total internal reflection |
||
8303 | vResult = g_XMZero; |
||
8304 | } |
||
8305 | else |
||
8306 | { |
||
8307 | // R = RefractionIndex * IDotN + sqrt(R) |
||
8308 | R = _mm_sqrt_ps(R); |
||
8309 | vResult = _mm_mul_ps(RefractionIndex,IDotN); |
||
8310 | R = _mm_add_ps(R,vResult); |
||
8311 | // Result = RefractionIndex * Incident - Normal * R |
||
8312 | vResult = _mm_mul_ps(RefractionIndex, Incident); |
||
8313 | R = _mm_mul_ps(R,Normal); |
||
8314 | vResult = _mm_sub_ps(vResult,R); |
||
8315 | } |
||
8316 | return vResult; |
||
8317 | #else // _XM_VMX128_INTRINSICS_ |
||
8318 | #endif // _XM_VMX128_INTRINSICS_ |
||
8319 | } |
||
8320 | |||
8321 | //------------------------------------------------------------------------------ |
||
8322 | |||
8323 | XMFINLINE XMVECTOR XMVector3Orthogonal |
||
8324 | ( |
||
8325 | FXMVECTOR V |
||
8326 | ) |
||
8327 | { |
||
8328 | #if defined(_XM_NO_INTRINSICS_) |
||
8329 | |||
8330 | XMVECTOR NegativeV; |
||
8331 | XMVECTOR Z, YZYY; |
||
8332 | XMVECTOR ZIsNegative, YZYYIsNegative; |
||
8333 | XMVECTOR S, D; |
||
8334 | XMVECTOR R0, R1; |
||
8335 | XMVECTOR Select; |
||
8336 | XMVECTOR Zero; |
||
8337 | XMVECTOR Result; |
||
8338 | static CONST XMVECTORU32 Permute1X0X0X0X = {XM_PERMUTE_1X, XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X}; |
||
8339 | static CONST XMVECTORU32 Permute0Y0Z0Y0Y= {XM_PERMUTE_0Y, XM_PERMUTE_0Z, XM_PERMUTE_0Y, XM_PERMUTE_0Y}; |
||
8340 | |||
8341 | Zero = XMVectorZero(); |
||
8342 | Z = XMVectorSplatZ(V); |
||
8343 | YZYY = XMVectorPermute(V, V, Permute0Y0Z0Y0Y.v); |
||
8344 | |||
8345 | NegativeV = XMVectorSubtract(Zero, V); |
||
8346 | |||
8347 | ZIsNegative = XMVectorLess(Z, Zero); |
||
8348 | YZYYIsNegative = XMVectorLess(YZYY, Zero); |
||
8349 | |||
8350 | S = XMVectorAdd(YZYY, Z); |
||
8351 | D = XMVectorSubtract(YZYY, Z); |
||
8352 | |||
8353 | Select = XMVectorEqualInt(ZIsNegative, YZYYIsNegative); |
||
8354 | |||
8355 | R0 = XMVectorPermute(NegativeV, S, Permute1X0X0X0X.v); |
||
8356 | R1 = XMVectorPermute(V, D, Permute1X0X0X0X.v); |
||
8357 | |||
8358 | Result = XMVectorSelect(R1, R0, Select); |
||
8359 | |||
8360 | return Result; |
||
8361 | |||
8362 | #elif defined(_XM_SSE_INTRINSICS_) |
||
8363 | XMVECTOR NegativeV; |
||
8364 | XMVECTOR Z, YZYY; |
||
8365 | XMVECTOR ZIsNegative, YZYYIsNegative; |
||
8366 | XMVECTOR S, D; |
||
8367 | XMVECTOR R0, R1; |
||
8368 | XMVECTOR Select; |
||
8369 | XMVECTOR Zero; |
||
8370 | XMVECTOR Result; |
||
8371 | static CONST XMVECTORI32 Permute1X0X0X0X = {XM_PERMUTE_1X, XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X}; |
||
8372 | static CONST XMVECTORI32 Permute0Y0Z0Y0Y= {XM_PERMUTE_0Y, XM_PERMUTE_0Z, XM_PERMUTE_0Y, XM_PERMUTE_0Y}; |
||
8373 | |||
8374 | Zero = XMVectorZero(); |
||
8375 | Z = XMVectorSplatZ(V); |
||
8376 | YZYY = XMVectorPermute(V, V, Permute0Y0Z0Y0Y); |
||
8377 | |||
8378 | NegativeV = _mm_sub_ps(Zero, V); |
||
8379 | |||
8380 | ZIsNegative = XMVectorLess(Z, Zero); |
||
8381 | YZYYIsNegative = XMVectorLess(YZYY, Zero); |
||
8382 | |||
8383 | S = _mm_add_ps(YZYY, Z); |
||
8384 | D = _mm_sub_ps(YZYY, Z); |
||
8385 | |||
8386 | Select = XMVectorEqualInt(ZIsNegative, YZYYIsNegative); |
||
8387 | |||
8388 | R0 = XMVectorPermute(NegativeV, S, Permute1X0X0X0X); |
||
8389 | R1 = XMVectorPermute(V, D,Permute1X0X0X0X); |
||
8390 | Result = XMVectorSelect(R1, R0, Select); |
||
8391 | return Result; |
||
8392 | #else // _XM_VMX128_INTRINSICS_ |
||
8393 | #endif // _XM_VMX128_INTRINSICS_ |
||
8394 | } |
||
8395 | |||
8396 | //------------------------------------------------------------------------------ |
||
8397 | |||
8398 | XMFINLINE XMVECTOR XMVector3AngleBetweenNormalsEst |
||
8399 | ( |
||
8400 | FXMVECTOR N1, |
||
8401 | FXMVECTOR N2 |
||
8402 | ) |
||
8403 | { |
||
8404 | #if defined(_XM_NO_INTRINSICS_) |
||
8405 | |||
8406 | XMVECTOR Result; |
||
8407 | XMVECTOR NegativeOne; |
||
8408 | XMVECTOR One; |
||
8409 | |||
8410 | Result = XMVector3Dot(N1, N2); |
||
8411 | NegativeOne = XMVectorSplatConstant(-1, 0); |
||
8412 | One = XMVectorSplatOne(); |
||
8413 | Result = XMVectorClamp(Result, NegativeOne, One); |
||
8414 | Result = XMVectorACosEst(Result); |
||
8415 | |||
8416 | return Result; |
||
8417 | |||
8418 | #elif defined(_XM_SSE_INTRINSICS_) |
||
8419 | XMVECTOR vResult = XMVector3Dot(N1,N2); |
||
8420 | // Clamp to -1.0f to 1.0f |
||
8421 | vResult = _mm_max_ps(vResult,g_XMNegativeOne); |
||
8422 | vResult = _mm_min_ps(vResult,g_XMOne); |
||
8423 | vResult = XMVectorACosEst(vResult); |
||
8424 | return vResult; |
||
8425 | #else // _XM_VMX128_INTRINSICS_ |
||
8426 | #endif // _XM_VMX128_INTRINSICS_ |
||
8427 | } |
||
8428 | |||
8429 | //------------------------------------------------------------------------------ |
||
8430 | |||
8431 | XMFINLINE XMVECTOR XMVector3AngleBetweenNormals |
||
8432 | ( |
||
8433 | FXMVECTOR N1, |
||
8434 | FXMVECTOR N2 |
||
8435 | ) |
||
8436 | { |
||
8437 | #if defined(_XM_NO_INTRINSICS_) |
||
8438 | |||
8439 | XMVECTOR Result; |
||
8440 | XMVECTOR NegativeOne; |
||
8441 | XMVECTOR One; |
||
8442 | |||
8443 | Result = XMVector3Dot(N1, N2); |
||
8444 | NegativeOne = XMVectorSplatConstant(-1, 0); |
||
8445 | One = XMVectorSplatOne(); |
||
8446 | Result = XMVectorClamp(Result, NegativeOne, One); |
||
8447 | Result = XMVectorACos(Result); |
||
8448 | |||
8449 | return Result; |
||
8450 | |||
8451 | #elif defined(_XM_SSE_INTRINSICS_) |
||
8452 | XMVECTOR vResult = XMVector3Dot(N1,N2); |
||
8453 | // Clamp to -1.0f to 1.0f |
||
8454 | vResult = _mm_max_ps(vResult,g_XMNegativeOne); |
||
8455 | vResult = _mm_min_ps(vResult,g_XMOne); |
||
8456 | vResult = XMVectorACos(vResult); |
||
8457 | return vResult; |
||
8458 | #else // _XM_VMX128_INTRINSICS_ |
||
8459 | #endif // _XM_VMX128_INTRINSICS_ |
||
8460 | } |
||
8461 | |||
8462 | //------------------------------------------------------------------------------ |
||
8463 | |||
8464 | XMFINLINE XMVECTOR XMVector3AngleBetweenVectors |
||
8465 | ( |
||
8466 | FXMVECTOR V1, |
||
8467 | FXMVECTOR V2 |
||
8468 | ) |
||
8469 | { |
||
8470 | #if defined(_XM_NO_INTRINSICS_) |
||
8471 | |||
8472 | XMVECTOR L1; |
||
8473 | XMVECTOR L2; |
||
8474 | XMVECTOR Dot; |
||
8475 | XMVECTOR CosAngle; |
||
8476 | XMVECTOR NegativeOne; |
||
8477 | XMVECTOR One; |
||
8478 | XMVECTOR Result; |
||
8479 | |||
8480 | L1 = XMVector3ReciprocalLength(V1); |
||
8481 | L2 = XMVector3ReciprocalLength(V2); |
||
8482 | |||
8483 | Dot = XMVector3Dot(V1, V2); |
||
8484 | |||
8485 | L1 = XMVectorMultiply(L1, L2); |
||
8486 | |||
8487 | NegativeOne = XMVectorSplatConstant(-1, 0); |
||
8488 | One = XMVectorSplatOne(); |
||
8489 | |||
8490 | CosAngle = XMVectorMultiply(Dot, L1); |
||
8491 | |||
8492 | CosAngle = XMVectorClamp(CosAngle, NegativeOne, One); |
||
8493 | |||
8494 | Result = XMVectorACos(CosAngle); |
||
8495 | |||
8496 | return Result; |
||
8497 | |||
8498 | #elif defined(_XM_SSE_INTRINSICS_) |
||
8499 | XMVECTOR L1; |
||
8500 | XMVECTOR L2; |
||
8501 | XMVECTOR Dot; |
||
8502 | XMVECTOR CosAngle; |
||
8503 | XMVECTOR Result; |
||
8504 | |||
8505 | L1 = XMVector3ReciprocalLength(V1); |
||
8506 | L2 = XMVector3ReciprocalLength(V2); |
||
8507 | Dot = XMVector3Dot(V1, V2); |
||
8508 | L1 = _mm_mul_ps(L1, L2); |
||
8509 | CosAngle = _mm_mul_ps(Dot, L1); |
||
8510 | CosAngle = XMVectorClamp(CosAngle,g_XMNegativeOne,g_XMOne); |
||
8511 | Result = XMVectorACos(CosAngle); |
||
8512 | return Result; |
||
8513 | #else // _XM_VMX128_INTRINSICS_ |
||
8514 | #endif // _XM_VMX128_INTRINSICS_ |
||
8515 | } |
||
8516 | |||
8517 | //------------------------------------------------------------------------------ |
||
8518 | |||
8519 | XMFINLINE XMVECTOR XMVector3LinePointDistance |
||
8520 | ( |
||
8521 | FXMVECTOR LinePoint1, |
||
8522 | FXMVECTOR LinePoint2, |
||
8523 | FXMVECTOR Point |
||
8524 | ) |
||
8525 | { |
||
8526 | #if defined(_XM_NO_INTRINSICS_) |
||
8527 | |||
8528 | XMVECTOR PointVector; |
||
8529 | XMVECTOR LineVector; |
||
8530 | XMVECTOR ReciprocalLengthSq; |
||
8531 | XMVECTOR PointProjectionScale; |
||
8532 | XMVECTOR DistanceVector; |
||
8533 | XMVECTOR Result; |
||
8534 | |||
8535 | // Given a vector PointVector from LinePoint1 to Point and a vector |
||
8536 | // LineVector from LinePoint1 to LinePoint2, the scaled distance |
||
8537 | // PointProjectionScale from LinePoint1 to the perpendicular projection |
||
8538 | // of PointVector onto the line is defined as: |
||
8539 | // |
||
8540 | // PointProjectionScale = dot(PointVector, LineVector) / LengthSq(LineVector) |
||
8541 | |||
8542 | PointVector = XMVectorSubtract(Point, LinePoint1); |
||
8543 | LineVector = XMVectorSubtract(LinePoint2, LinePoint1); |
||
8544 | |||
8545 | ReciprocalLengthSq = XMVector3LengthSq(LineVector); |
||
8546 | ReciprocalLengthSq = XMVectorReciprocal(ReciprocalLengthSq); |
||
8547 | |||
8548 | PointProjectionScale = XMVector3Dot(PointVector, LineVector); |
||
8549 | PointProjectionScale = XMVectorMultiply(PointProjectionScale, ReciprocalLengthSq); |
||
8550 | |||
8551 | DistanceVector = XMVectorMultiply(LineVector, PointProjectionScale); |
||
8552 | DistanceVector = XMVectorSubtract(PointVector, DistanceVector); |
||
8553 | |||
8554 | Result = XMVector3Length(DistanceVector); |
||
8555 | |||
8556 | return Result; |
||
8557 | |||
8558 | #elif defined(_XM_SSE_INTRINSICS_) |
||
8559 | XMVECTOR PointVector = _mm_sub_ps(Point,LinePoint1); |
||
8560 | XMVECTOR LineVector = _mm_sub_ps(LinePoint2,LinePoint1); |
||
8561 | XMVECTOR ReciprocalLengthSq = XMVector3LengthSq(LineVector); |
||
8562 | XMVECTOR vResult = XMVector3Dot(PointVector,LineVector); |
||
8563 | vResult = _mm_div_ps(vResult,ReciprocalLengthSq); |
||
8564 | vResult = _mm_mul_ps(vResult,LineVector); |
||
8565 | vResult = _mm_sub_ps(PointVector,vResult); |
||
8566 | vResult = XMVector3Length(vResult); |
||
8567 | return vResult; |
||
8568 | #else // _XM_VMX128_INTRINSICS_ |
||
8569 | #endif // _XM_VMX128_INTRINSICS_ |
||
8570 | } |
||
8571 | |||
8572 | //------------------------------------------------------------------------------ |
||
8573 | |||
8574 | XMFINLINE VOID XMVector3ComponentsFromNormal |
||
8575 | ( |
||
8576 | XMVECTOR* pParallel, |
||
8577 | XMVECTOR* pPerpendicular, |
||
8578 | FXMVECTOR V, |
||
8579 | FXMVECTOR Normal |
||
8580 | ) |
||
8581 | { |
||
8582 | #if defined(_XM_NO_INTRINSICS_) |
||
8583 | |||
8584 | XMVECTOR Parallel; |
||
8585 | XMVECTOR Scale; |
||
8586 | |||
8587 | XMASSERT(pParallel); |
||
8588 | XMASSERT(pPerpendicular); |
||
8589 | |||
8590 | Scale = XMVector3Dot(V, Normal); |
||
8591 | |||
8592 | Parallel = XMVectorMultiply(Normal, Scale); |
||
8593 | |||
8594 | *pParallel = Parallel; |
||
8595 | *pPerpendicular = XMVectorSubtract(V, Parallel); |
||
8596 | |||
8597 | #elif defined(_XM_SSE_INTRINSICS_) |
||
8598 | XMASSERT(pParallel); |
||
8599 | XMASSERT(pPerpendicular); |
||
8600 | XMVECTOR Scale = XMVector3Dot(V, Normal); |
||
8601 | XMVECTOR Parallel = _mm_mul_ps(Normal,Scale); |
||
8602 | *pParallel = Parallel; |
||
8603 | *pPerpendicular = _mm_sub_ps(V,Parallel); |
||
8604 | #else // _XM_VMX128_INTRINSICS_ |
||
8605 | #endif // _XM_VMX128_INTRINSICS_ |
||
8606 | } |
||
8607 | |||
8608 | //------------------------------------------------------------------------------ |
||
8609 | // Transform a vector using a rotation expressed as a unit quaternion |
||
8610 | |||
8611 | XMFINLINE XMVECTOR XMVector3Rotate |
||
8612 | ( |
||
8613 | FXMVECTOR V, |
||
8614 | FXMVECTOR RotationQuaternion |
||
8615 | ) |
||
8616 | { |
||
8617 | #if defined(_XM_NO_INTRINSICS_) |
||
8618 | |||
8619 | XMVECTOR A; |
||
8620 | XMVECTOR Q; |
||
8621 | XMVECTOR Result; |
||
8622 | |||
8623 | A = XMVectorSelect(g_XMSelect1110.v, V, g_XMSelect1110.v); |
||
8624 | Q = XMQuaternionConjugate(RotationQuaternion); |
||
8625 | Result = XMQuaternionMultiply(Q, A); |
||
8626 | Result = XMQuaternionMultiply(Result, RotationQuaternion); |
||
8627 | |||
8628 | return Result; |
||
8629 | |||
8630 | #elif defined(_XM_SSE_INTRINSICS_) |
||
8631 | XMVECTOR A; |
||
8632 | XMVECTOR Q; |
||
8633 | XMVECTOR Result; |
||
8634 | |||
8635 | A = _mm_and_ps(V,g_XMMask3); |
||
8636 | Q = XMQuaternionConjugate(RotationQuaternion); |
||
8637 | Result = XMQuaternionMultiply(Q, A); |
||
8638 | Result = XMQuaternionMultiply(Result, RotationQuaternion); |
||
8639 | return Result; |
||
8640 | #else // _XM_VMX128_INTRINSICS_ |
||
8641 | #endif // _XM_VMX128_INTRINSICS_ |
||
8642 | } |
||
8643 | |||
8644 | //------------------------------------------------------------------------------ |
||
8645 | // Transform a vector using the inverse of a rotation expressed as a unit quaternion |
||
8646 | |||
8647 | XMFINLINE XMVECTOR XMVector3InverseRotate |
||
8648 | ( |
||
8649 | FXMVECTOR V, |
||
8650 | FXMVECTOR RotationQuaternion |
||
8651 | ) |
||
8652 | { |
||
8653 | #if defined(_XM_NO_INTRINSICS_) |
||
8654 | |||
8655 | XMVECTOR A; |
||
8656 | XMVECTOR Q; |
||
8657 | XMVECTOR Result; |
||
8658 | |||
8659 | A = XMVectorSelect(g_XMSelect1110.v, V, g_XMSelect1110.v); |
||
8660 | Result = XMQuaternionMultiply(RotationQuaternion, A); |
||
8661 | Q = XMQuaternionConjugate(RotationQuaternion); |
||
8662 | Result = XMQuaternionMultiply(Result, Q); |
||
8663 | |||
8664 | return Result; |
||
8665 | |||
8666 | #elif defined(_XM_SSE_INTRINSICS_) |
||
8667 | XMVECTOR A; |
||
8668 | XMVECTOR Q; |
||
8669 | XMVECTOR Result; |
||
8670 | A = _mm_and_ps(V,g_XMMask3); |
||
8671 | Result = XMQuaternionMultiply(RotationQuaternion, A); |
||
8672 | Q = XMQuaternionConjugate(RotationQuaternion); |
||
8673 | Result = XMQuaternionMultiply(Result, Q); |
||
8674 | return Result; |
||
8675 | #else // _XM_VMX128_INTRINSICS_ |
||
8676 | #endif // _XM_VMX128_INTRINSICS_ |
||
8677 | } |
||
8678 | |||
8679 | //------------------------------------------------------------------------------ |
||
8680 | |||
8681 | XMFINLINE XMVECTOR XMVector3Transform |
||
8682 | ( |
||
8683 | FXMVECTOR V, |
||
8684 | CXMMATRIX M |
||
8685 | ) |
||
8686 | { |
||
8687 | #if defined(_XM_NO_INTRINSICS_) |
||
8688 | |||
8689 | XMVECTOR X; |
||
8690 | XMVECTOR Y; |
||
8691 | XMVECTOR Z; |
||
8692 | XMVECTOR Result; |
||
8693 | |||
8694 | Z = XMVectorSplatZ(V); |
||
8695 | Y = XMVectorSplatY(V); |
||
8696 | X = XMVectorSplatX(V); |
||
8697 | |||
8698 | Result = XMVectorMultiplyAdd(Z, M.r[2], M.r[3]); |
||
8699 | Result = XMVectorMultiplyAdd(Y, M.r[1], Result); |
||
8700 | Result = XMVectorMultiplyAdd(X, M.r[0], Result); |
||
8701 | |||
8702 | return Result; |
||
8703 | |||
8704 | #elif defined(_XM_SSE_INTRINSICS_) |
||
8705 | XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0)); |
||
8706 | vResult = _mm_mul_ps(vResult,M.r[0]); |
||
8707 | XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1)); |
||
8708 | vTemp = _mm_mul_ps(vTemp,M.r[1]); |
||
8709 | vResult = _mm_add_ps(vResult,vTemp); |
||
8710 | vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2)); |
||
8711 | vTemp = _mm_mul_ps(vTemp,M.r[2]); |
||
8712 | vResult = _mm_add_ps(vResult,vTemp); |
||
8713 | vResult = _mm_add_ps(vResult,M.r[3]); |
||
8714 | return vResult; |
||
8715 | #else // _XM_VMX128_INTRINSICS_ |
||
8716 | #endif // _XM_VMX128_INTRINSICS_ |
||
8717 | } |
||
8718 | |||
8719 | //------------------------------------------------------------------------------ |
||
8720 | |||
8721 | XMINLINE XMFLOAT4* XMVector3TransformStream |
||
8722 | ( |
||
8723 | XMFLOAT4* pOutputStream, |
||
8724 | UINT OutputStride, |
||
8725 | CONST XMFLOAT3* pInputStream, |
||
8726 | UINT InputStride, |
||
8727 | UINT VectorCount, |
||
8728 | CXMMATRIX M |
||
8729 | ) |
||
8730 | { |
||
8731 | #if defined(_XM_NO_INTRINSICS_) |
||
8732 | |||
8733 | XMVECTOR V; |
||
8734 | XMVECTOR X; |
||
8735 | XMVECTOR Y; |
||
8736 | XMVECTOR Z; |
||
8737 | XMVECTOR Result; |
||
8738 | UINT i; |
||
8739 | BYTE* pInputVector = (BYTE*)pInputStream; |
||
8740 | BYTE* pOutputVector = (BYTE*)pOutputStream; |
||
8741 | |||
8742 | XMASSERT(pOutputStream); |
||
8743 | XMASSERT(pInputStream); |
||
8744 | |||
8745 | for (i = 0; i < VectorCount; i++) |
||
8746 | { |
||
8747 | V = XMLoadFloat3((XMFLOAT3*)pInputVector); |
||
8748 | Z = XMVectorSplatZ(V); |
||
8749 | Y = XMVectorSplatY(V); |
||
8750 | X = XMVectorSplatX(V); |
||
8751 | |||
8752 | Result = XMVectorMultiplyAdd(Z, M.r[2], M.r[3]); |
||
8753 | Result = XMVectorMultiplyAdd(Y, M.r[1], Result); |
||
8754 | Result = XMVectorMultiplyAdd(X, M.r[0], Result); |
||
8755 | |||
8756 | XMStoreFloat4((XMFLOAT4*)pOutputVector, Result); |
||
8757 | |||
8758 | pInputVector += InputStride; |
||
8759 | pOutputVector += OutputStride; |
||
8760 | } |
||
8761 | |||
8762 | return pOutputStream; |
||
8763 | |||
8764 | #elif defined(_XM_SSE_INTRINSICS_) |
||
8765 | XMASSERT(pOutputStream); |
||
8766 | XMASSERT(pInputStream); |
||
8767 | UINT i; |
||
8768 | const BYTE* pInputVector = (const BYTE*)pInputStream; |
||
8769 | BYTE* pOutputVector = (BYTE*)pOutputStream; |
||
8770 | |||
8771 | for (i = 0; i < VectorCount; i++) |
||
8772 | { |
||
8773 | XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->x); |
||
8774 | XMVECTOR Y = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->y); |
||
8775 | XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->z); |
||
8776 | vResult = _mm_mul_ps(vResult,M.r[2]); |
||
8777 | vResult = _mm_add_ps(vResult,M.r[3]); |
||
8778 | Y = _mm_mul_ps(Y,M.r[1]); |
||
8779 | vResult = _mm_add_ps(vResult,Y); |
||
8780 | X = _mm_mul_ps(X,M.r[0]); |
||
8781 | vResult = _mm_add_ps(vResult,X); |
||
8782 | _mm_storeu_ps(reinterpret_cast<float *>(pOutputVector),vResult); |
||
8783 | pInputVector += InputStride; |
||
8784 | pOutputVector += OutputStride; |
||
8785 | } |
||
8786 | |||
8787 | return pOutputStream; |
||
8788 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
8789 | #endif // _XM_VMX128_INTRINSICS_ |
||
8790 | } |
||
8791 | |||
8792 | //------------------------------------------------------------------------------ |
||
8793 | |||
8794 | XMINLINE XMFLOAT4* XMVector3TransformStreamNC |
||
8795 | ( |
||
8796 | XMFLOAT4* pOutputStream, |
||
8797 | UINT OutputStride, |
||
8798 | CONST XMFLOAT3* pInputStream, |
||
8799 | UINT InputStride, |
||
8800 | UINT VectorCount, |
||
8801 | CXMMATRIX M |
||
8802 | ) |
||
8803 | { |
||
8804 | #if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS) || defined(_XM_SSE_INTRINSICS_) |
||
8805 | return XMVector3TransformStream( pOutputStream, OutputStride, pInputStream, InputStride, VectorCount, M ); |
||
8806 | #else // _XM_VMX128_INTRINSICS_ |
||
8807 | #endif // _XM_VMX128_INTRINSICS_ |
||
8808 | } |
||
8809 | |||
8810 | //------------------------------------------------------------------------------ |
||
8811 | |||
8812 | XMFINLINE XMVECTOR XMVector3TransformCoord |
||
8813 | ( |
||
8814 | FXMVECTOR V, |
||
8815 | CXMMATRIX M |
||
8816 | ) |
||
8817 | { |
||
8818 | #if defined(_XM_NO_INTRINSICS_) |
||
8819 | |||
8820 | XMVECTOR X; |
||
8821 | XMVECTOR Y; |
||
8822 | XMVECTOR Z; |
||
8823 | XMVECTOR InverseW; |
||
8824 | XMVECTOR Result; |
||
8825 | |||
8826 | Z = XMVectorSplatZ(V); |
||
8827 | Y = XMVectorSplatY(V); |
||
8828 | X = XMVectorSplatX(V); |
||
8829 | |||
8830 | Result = XMVectorMultiplyAdd(Z, M.r[2], M.r[3]); |
||
8831 | Result = XMVectorMultiplyAdd(Y, M.r[1], Result); |
||
8832 | Result = XMVectorMultiplyAdd(X, M.r[0], Result); |
||
8833 | |||
8834 | InverseW = XMVectorSplatW(Result); |
||
8835 | InverseW = XMVectorReciprocal(InverseW); |
||
8836 | |||
8837 | Result = XMVectorMultiply(Result, InverseW); |
||
8838 | |||
8839 | return Result; |
||
8840 | |||
8841 | #elif defined(_XM_SSE_INTRINSICS_) |
||
8842 | XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0)); |
||
8843 | vResult = _mm_mul_ps(vResult,M.r[0]); |
||
8844 | XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1)); |
||
8845 | vTemp = _mm_mul_ps(vTemp,M.r[1]); |
||
8846 | vResult = _mm_add_ps(vResult,vTemp); |
||
8847 | vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2)); |
||
8848 | vTemp = _mm_mul_ps(vTemp,M.r[2]); |
||
8849 | vResult = _mm_add_ps(vResult,vTemp); |
||
8850 | vResult = _mm_add_ps(vResult,M.r[3]); |
||
8851 | vTemp = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,3,3,3)); |
||
8852 | vResult = _mm_div_ps(vResult,vTemp); |
||
8853 | return vResult; |
||
8854 | #else // _XM_VMX128_INTRINSICS_ |
||
8855 | #endif // _XM_VMX128_INTRINSICS_ |
||
8856 | } |
||
8857 | |||
8858 | //------------------------------------------------------------------------------ |
||
8859 | |||
8860 | XMINLINE XMFLOAT3* XMVector3TransformCoordStream |
||
8861 | ( |
||
8862 | XMFLOAT3* pOutputStream, |
||
8863 | UINT OutputStride, |
||
8864 | CONST XMFLOAT3* pInputStream, |
||
8865 | UINT InputStride, |
||
8866 | UINT VectorCount, |
||
8867 | CXMMATRIX M |
||
8868 | ) |
||
8869 | { |
||
8870 | #if defined(_XM_NO_INTRINSICS_) |
||
8871 | |||
8872 | XMVECTOR V; |
||
8873 | XMVECTOR X; |
||
8874 | XMVECTOR Y; |
||
8875 | XMVECTOR Z; |
||
8876 | XMVECTOR InverseW; |
||
8877 | XMVECTOR Result; |
||
8878 | UINT i; |
||
8879 | BYTE* pInputVector = (BYTE*)pInputStream; |
||
8880 | BYTE* pOutputVector = (BYTE*)pOutputStream; |
||
8881 | |||
8882 | XMASSERT(pOutputStream); |
||
8883 | XMASSERT(pInputStream); |
||
8884 | |||
8885 | for (i = 0; i < VectorCount; i++) |
||
8886 | { |
||
8887 | V = XMLoadFloat3((XMFLOAT3*)pInputVector); |
||
8888 | Z = XMVectorSplatZ(V); |
||
8889 | Y = XMVectorSplatY(V); |
||
8890 | X = XMVectorSplatX(V); |
||
8891 | // Z = XMVectorReplicate(((XMFLOAT3*)pInputVector)->z); |
||
8892 | // Y = XMVectorReplicate(((XMFLOAT3*)pInputVector)->y); |
||
8893 | // X = XMVectorReplicate(((XMFLOAT3*)pInputVector)->x); |
||
8894 | |||
8895 | Result = XMVectorMultiplyAdd(Z, M.r[2], M.r[3]); |
||
8896 | Result = XMVectorMultiplyAdd(Y, M.r[1], Result); |
||
8897 | Result = XMVectorMultiplyAdd(X, M.r[0], Result); |
||
8898 | |||
8899 | InverseW = XMVectorSplatW(Result); |
||
8900 | InverseW = XMVectorReciprocal(InverseW); |
||
8901 | |||
8902 | Result = XMVectorMultiply(Result, InverseW); |
||
8903 | |||
8904 | XMStoreFloat3((XMFLOAT3*)pOutputVector, Result); |
||
8905 | |||
8906 | pInputVector += InputStride; |
||
8907 | pOutputVector += OutputStride; |
||
8908 | } |
||
8909 | |||
8910 | return pOutputStream; |
||
8911 | |||
8912 | #elif defined(_XM_SSE_INTRINSICS_) |
||
8913 | XMASSERT(pOutputStream); |
||
8914 | XMASSERT(pInputStream); |
||
8915 | |||
8916 | UINT i; |
||
8917 | const BYTE *pInputVector = (BYTE*)pInputStream; |
||
8918 | BYTE *pOutputVector = (BYTE*)pOutputStream; |
||
8919 | |||
8920 | for (i = 0; i < VectorCount; i++) |
||
8921 | { |
||
8922 | XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->x); |
||
8923 | XMVECTOR Y = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->y); |
||
8924 | XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->z); |
||
8925 | vResult = _mm_mul_ps(vResult,M.r[2]); |
||
8926 | vResult = _mm_add_ps(vResult,M.r[3]); |
||
8927 | Y = _mm_mul_ps(Y,M.r[1]); |
||
8928 | vResult = _mm_add_ps(vResult,Y); |
||
8929 | X = _mm_mul_ps(X,M.r[0]); |
||
8930 | vResult = _mm_add_ps(vResult,X); |
||
8931 | |||
8932 | X = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,3,3,3)); |
||
8933 | vResult = _mm_div_ps(vResult,X); |
||
8934 | _mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->x,vResult); |
||
8935 | vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,3,2,1)); |
||
8936 | _mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->y,vResult); |
||
8937 | vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,3,2,1)); |
||
8938 | _mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->z,vResult); |
||
8939 | pInputVector += InputStride; |
||
8940 | pOutputVector += OutputStride; |
||
8941 | } |
||
8942 | |||
8943 | return pOutputStream; |
||
8944 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
8945 | #endif // _XM_VMX128_INTRINSICS_ |
||
8946 | } |
||
8947 | |||
8948 | //------------------------------------------------------------------------------ |
||
8949 | |||
8950 | XMFINLINE XMVECTOR XMVector3TransformNormal |
||
8951 | ( |
||
8952 | FXMVECTOR V, |
||
8953 | CXMMATRIX M |
||
8954 | ) |
||
8955 | { |
||
8956 | #if defined(_XM_NO_INTRINSICS_) |
||
8957 | |||
8958 | XMVECTOR X; |
||
8959 | XMVECTOR Y; |
||
8960 | XMVECTOR Z; |
||
8961 | XMVECTOR Result; |
||
8962 | |||
8963 | Z = XMVectorSplatZ(V); |
||
8964 | Y = XMVectorSplatY(V); |
||
8965 | X = XMVectorSplatX(V); |
||
8966 | |||
8967 | Result = XMVectorMultiply(Z, M.r[2]); |
||
8968 | Result = XMVectorMultiplyAdd(Y, M.r[1], Result); |
||
8969 | Result = XMVectorMultiplyAdd(X, M.r[0], Result); |
||
8970 | |||
8971 | return Result; |
||
8972 | |||
8973 | #elif defined(_XM_SSE_INTRINSICS_) |
||
8974 | XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0)); |
||
8975 | vResult = _mm_mul_ps(vResult,M.r[0]); |
||
8976 | XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1)); |
||
8977 | vTemp = _mm_mul_ps(vTemp,M.r[1]); |
||
8978 | vResult = _mm_add_ps(vResult,vTemp); |
||
8979 | vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2)); |
||
8980 | vTemp = _mm_mul_ps(vTemp,M.r[2]); |
||
8981 | vResult = _mm_add_ps(vResult,vTemp); |
||
8982 | return vResult; |
||
8983 | #else // _XM_VMX128_INTRINSICS_ |
||
8984 | #endif // _XM_VMX128_INTRINSICS_ |
||
8985 | } |
||
8986 | |||
8987 | //------------------------------------------------------------------------------ |
||
8988 | |||
8989 | XMINLINE XMFLOAT3* XMVector3TransformNormalStream |
||
8990 | ( |
||
8991 | XMFLOAT3* pOutputStream, |
||
8992 | UINT OutputStride, |
||
8993 | CONST XMFLOAT3* pInputStream, |
||
8994 | UINT InputStride, |
||
8995 | UINT VectorCount, |
||
8996 | CXMMATRIX M |
||
8997 | ) |
||
8998 | { |
||
8999 | #if defined(_XM_NO_INTRINSICS_) |
||
9000 | |||
9001 | XMVECTOR V; |
||
9002 | XMVECTOR X; |
||
9003 | XMVECTOR Y; |
||
9004 | XMVECTOR Z; |
||
9005 | XMVECTOR Result; |
||
9006 | UINT i; |
||
9007 | BYTE* pInputVector = (BYTE*)pInputStream; |
||
9008 | BYTE* pOutputVector = (BYTE*)pOutputStream; |
||
9009 | |||
9010 | XMASSERT(pOutputStream); |
||
9011 | XMASSERT(pInputStream); |
||
9012 | |||
9013 | for (i = 0; i < VectorCount; i++) |
||
9014 | { |
||
9015 | V = XMLoadFloat3((XMFLOAT3*)pInputVector); |
||
9016 | Z = XMVectorSplatZ(V); |
||
9017 | Y = XMVectorSplatY(V); |
||
9018 | X = XMVectorSplatX(V); |
||
9019 | // Z = XMVectorReplicate(((XMFLOAT3*)pInputVector)->z); |
||
9020 | // Y = XMVectorReplicate(((XMFLOAT3*)pInputVector)->y); |
||
9021 | // X = XMVectorReplicate(((XMFLOAT3*)pInputVector)->x); |
||
9022 | |||
9023 | Result = XMVectorMultiply(Z, M.r[2]); |
||
9024 | Result = XMVectorMultiplyAdd(Y, M.r[1], Result); |
||
9025 | Result = XMVectorMultiplyAdd(X, M.r[0], Result); |
||
9026 | |||
9027 | XMStoreFloat3((XMFLOAT3*)pOutputVector, Result); |
||
9028 | |||
9029 | pInputVector += InputStride; |
||
9030 | pOutputVector += OutputStride; |
||
9031 | } |
||
9032 | |||
9033 | return pOutputStream; |
||
9034 | |||
9035 | #elif defined(_XM_SSE_INTRINSICS_) |
||
9036 | XMASSERT(pOutputStream); |
||
9037 | XMASSERT(pInputStream); |
||
9038 | |||
9039 | UINT i; |
||
9040 | const BYTE *pInputVector = (BYTE*)pInputStream; |
||
9041 | BYTE *pOutputVector = (BYTE*)pOutputStream; |
||
9042 | |||
9043 | for (i = 0; i < VectorCount; i++) |
||
9044 | { |
||
9045 | XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->x); |
||
9046 | XMVECTOR Y = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->y); |
||
9047 | XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->z); |
||
9048 | vResult = _mm_mul_ps(vResult,M.r[2]); |
||
9049 | Y = _mm_mul_ps(Y,M.r[1]); |
||
9050 | vResult = _mm_add_ps(vResult,Y); |
||
9051 | X = _mm_mul_ps(X,M.r[0]); |
||
9052 | vResult = _mm_add_ps(vResult,X); |
||
9053 | _mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->x,vResult); |
||
9054 | vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,3,2,1)); |
||
9055 | _mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->y,vResult); |
||
9056 | vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,3,2,1)); |
||
9057 | _mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->z,vResult); |
||
9058 | pInputVector += InputStride; |
||
9059 | pOutputVector += OutputStride; |
||
9060 | } |
||
9061 | |||
9062 | return pOutputStream; |
||
9063 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
9064 | #endif // _XM_VMX128_INTRINSICS_ |
||
9065 | } |
||
9066 | |||
9067 | //------------------------------------------------------------------------------ |
||
9068 | |||
9069 | XMINLINE XMVECTOR XMVector3Project |
||
9070 | ( |
||
9071 | FXMVECTOR V, |
||
9072 | FLOAT ViewportX, |
||
9073 | FLOAT ViewportY, |
||
9074 | FLOAT ViewportWidth, |
||
9075 | FLOAT ViewportHeight, |
||
9076 | FLOAT ViewportMinZ, |
||
9077 | FLOAT ViewportMaxZ, |
||
9078 | CXMMATRIX Projection, |
||
9079 | CXMMATRIX View, |
||
9080 | CXMMATRIX World |
||
9081 | ) |
||
9082 | { |
||
9083 | #if defined(_XM_NO_INTRINSICS_) |
||
9084 | |||
9085 | XMMATRIX Transform; |
||
9086 | XMVECTOR Scale; |
||
9087 | XMVECTOR Offset; |
||
9088 | XMVECTOR Result; |
||
9089 | FLOAT HalfViewportWidth = ViewportWidth * 0.5f; |
||
9090 | FLOAT HalfViewportHeight = ViewportHeight * 0.5f; |
||
9091 | |||
9092 | Scale = XMVectorSet(HalfViewportWidth, |
||
9093 | -HalfViewportHeight, |
||
9094 | ViewportMaxZ - ViewportMinZ, |
||
9095 | 0.0f); |
||
9096 | |||
9097 | Offset = XMVectorSet(ViewportX + HalfViewportWidth, |
||
9098 | ViewportY + HalfViewportHeight, |
||
9099 | ViewportMinZ, |
||
9100 | 0.0f); |
||
9101 | |||
9102 | Transform = XMMatrixMultiply(World, View); |
||
9103 | Transform = XMMatrixMultiply(Transform, Projection); |
||
9104 | |||
9105 | Result = XMVector3TransformCoord(V, Transform); |
||
9106 | |||
9107 | Result = XMVectorMultiplyAdd(Result, Scale, Offset); |
||
9108 | |||
9109 | return Result; |
||
9110 | |||
9111 | #elif defined(_XM_SSE_INTRINSICS_) |
||
9112 | XMMATRIX Transform; |
||
9113 | XMVECTOR Scale; |
||
9114 | XMVECTOR Offset; |
||
9115 | XMVECTOR Result; |
||
9116 | FLOAT HalfViewportWidth = ViewportWidth * 0.5f; |
||
9117 | FLOAT HalfViewportHeight = ViewportHeight * 0.5f; |
||
9118 | |||
9119 | Scale = XMVectorSet(HalfViewportWidth, |
||
9120 | -HalfViewportHeight, |
||
9121 | ViewportMaxZ - ViewportMinZ, |
||
9122 | 0.0f); |
||
9123 | |||
9124 | Offset = XMVectorSet(ViewportX + HalfViewportWidth, |
||
9125 | ViewportY + HalfViewportHeight, |
||
9126 | ViewportMinZ, |
||
9127 | 0.0f); |
||
9128 | Transform = XMMatrixMultiply(World, View); |
||
9129 | Transform = XMMatrixMultiply(Transform, Projection); |
||
9130 | Result = XMVector3TransformCoord(V, Transform); |
||
9131 | Result = _mm_mul_ps(Result,Scale); |
||
9132 | Result = _mm_add_ps(Result,Offset); |
||
9133 | return Result; |
||
9134 | #else // _XM_VMX128_INTRINSICS_ |
||
9135 | #endif // _XM_VMX128_INTRINSICS_ |
||
9136 | } |
||
9137 | |||
9138 | //------------------------------------------------------------------------------ |
||
9139 | |||
9140 | XMINLINE XMFLOAT3* XMVector3ProjectStream |
||
9141 | ( |
||
9142 | XMFLOAT3* pOutputStream, |
||
9143 | UINT OutputStride, |
||
9144 | CONST XMFLOAT3* pInputStream, |
||
9145 | UINT InputStride, |
||
9146 | UINT VectorCount, |
||
9147 | FLOAT ViewportX, |
||
9148 | FLOAT ViewportY, |
||
9149 | FLOAT ViewportWidth, |
||
9150 | FLOAT ViewportHeight, |
||
9151 | FLOAT ViewportMinZ, |
||
9152 | FLOAT ViewportMaxZ, |
||
9153 | CXMMATRIX Projection, |
||
9154 | CXMMATRIX View, |
||
9155 | CXMMATRIX World |
||
9156 | ) |
||
9157 | { |
||
9158 | #if defined(_XM_NO_INTRINSICS_) |
||
9159 | |||
9160 | XMMATRIX Transform; |
||
9161 | XMVECTOR V; |
||
9162 | XMVECTOR Scale; |
||
9163 | XMVECTOR Offset; |
||
9164 | XMVECTOR Result; |
||
9165 | UINT i; |
||
9166 | FLOAT HalfViewportWidth = ViewportWidth * 0.5f; |
||
9167 | FLOAT HalfViewportHeight = ViewportHeight * 0.5f; |
||
9168 | BYTE* pInputVector = (BYTE*)pInputStream; |
||
9169 | BYTE* pOutputVector = (BYTE*)pOutputStream; |
||
9170 | |||
9171 | XMASSERT(pOutputStream); |
||
9172 | XMASSERT(pInputStream); |
||
9173 | |||
9174 | Scale = XMVectorSet(HalfViewportWidth, |
||
9175 | -HalfViewportHeight, |
||
9176 | ViewportMaxZ - ViewportMinZ, |
||
9177 | 1.0f); |
||
9178 | |||
9179 | Offset = XMVectorSet(ViewportX + HalfViewportWidth, |
||
9180 | ViewportY + HalfViewportHeight, |
||
9181 | ViewportMinZ, |
||
9182 | 0.0f); |
||
9183 | |||
9184 | Transform = XMMatrixMultiply(World, View); |
||
9185 | Transform = XMMatrixMultiply(Transform, Projection); |
||
9186 | |||
9187 | for (i = 0; i < VectorCount; i++) |
||
9188 | { |
||
9189 | V = XMLoadFloat3((XMFLOAT3*)pInputVector); |
||
9190 | |||
9191 | Result = XMVector3TransformCoord(V, Transform); |
||
9192 | |||
9193 | Result = XMVectorMultiplyAdd(Result, Scale, Offset); |
||
9194 | |||
9195 | XMStoreFloat3((XMFLOAT3*)pOutputVector, Result); |
||
9196 | |||
9197 | pInputVector += InputStride; |
||
9198 | pOutputVector += OutputStride; |
||
9199 | } |
||
9200 | |||
9201 | return pOutputStream; |
||
9202 | |||
9203 | #elif defined(_XM_SSE_INTRINSICS_) |
||
9204 | XMASSERT(pOutputStream); |
||
9205 | XMASSERT(pInputStream); |
||
9206 | XMMATRIX Transform; |
||
9207 | XMVECTOR V; |
||
9208 | XMVECTOR Scale; |
||
9209 | XMVECTOR Offset; |
||
9210 | XMVECTOR Result; |
||
9211 | UINT i; |
||
9212 | FLOAT HalfViewportWidth = ViewportWidth * 0.5f; |
||
9213 | FLOAT HalfViewportHeight = ViewportHeight * 0.5f; |
||
9214 | BYTE* pInputVector = (BYTE*)pInputStream; |
||
9215 | BYTE* pOutputVector = (BYTE*)pOutputStream; |
||
9216 | |||
9217 | Scale = XMVectorSet(HalfViewportWidth, |
||
9218 | -HalfViewportHeight, |
||
9219 | ViewportMaxZ - ViewportMinZ, |
||
9220 | 1.0f); |
||
9221 | |||
9222 | Offset = XMVectorSet(ViewportX + HalfViewportWidth, |
||
9223 | ViewportY + HalfViewportHeight, |
||
9224 | ViewportMinZ, |
||
9225 | 0.0f); |
||
9226 | |||
9227 | Transform = XMMatrixMultiply(World, View); |
||
9228 | Transform = XMMatrixMultiply(Transform, Projection); |
||
9229 | |||
9230 | for (i = 0; i < VectorCount; i++) |
||
9231 | { |
||
9232 | V = XMLoadFloat3((XMFLOAT3*)pInputVector); |
||
9233 | |||
9234 | Result = XMVector3TransformCoord(V, Transform); |
||
9235 | |||
9236 | Result = _mm_mul_ps(Result,Scale); |
||
9237 | Result = _mm_add_ps(Result,Offset); |
||
9238 | XMStoreFloat3((XMFLOAT3*)pOutputVector, Result); |
||
9239 | pInputVector += InputStride; |
||
9240 | pOutputVector += OutputStride; |
||
9241 | } |
||
9242 | return pOutputStream; |
||
9243 | |||
9244 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
9245 | #endif // _XM_VMX128_INTRINSICS_ |
||
9246 | } |
||
9247 | |||
9248 | //------------------------------------------------------------------------------ |
||
9249 | |||
9250 | XMFINLINE XMVECTOR XMVector3Unproject |
||
9251 | ( |
||
9252 | FXMVECTOR V, |
||
9253 | FLOAT ViewportX, |
||
9254 | FLOAT ViewportY, |
||
9255 | FLOAT ViewportWidth, |
||
9256 | FLOAT ViewportHeight, |
||
9257 | FLOAT ViewportMinZ, |
||
9258 | FLOAT ViewportMaxZ, |
||
9259 | CXMMATRIX Projection, |
||
9260 | CXMMATRIX View, |
||
9261 | CXMMATRIX World |
||
9262 | ) |
||
9263 | { |
||
9264 | #if defined(_XM_NO_INTRINSICS_) |
||
9265 | |||
9266 | XMMATRIX Transform; |
||
9267 | XMVECTOR Scale; |
||
9268 | XMVECTOR Offset; |
||
9269 | XMVECTOR Determinant; |
||
9270 | XMVECTOR Result; |
||
9271 | CONST XMVECTOR D = XMVectorSet(-1.0f, 1.0f, 0.0f, 0.0f); |
||
9272 | |||
9273 | Scale = XMVectorSet(ViewportWidth * 0.5f, |
||
9274 | -ViewportHeight * 0.5f, |
||
9275 | ViewportMaxZ - ViewportMinZ, |
||
9276 | 1.0f); |
||
9277 | Scale = XMVectorReciprocal(Scale); |
||
9278 | |||
9279 | Offset = XMVectorSet(-ViewportX, |
||
9280 | -ViewportY, |
||
9281 | -ViewportMinZ, |
||
9282 | 0.0f); |
||
9283 | Offset = XMVectorMultiplyAdd(Scale, Offset, D); |
||
9284 | |||
9285 | Transform = XMMatrixMultiply(World, View); |
||
9286 | Transform = XMMatrixMultiply(Transform, Projection); |
||
9287 | Transform = XMMatrixInverse(&Determinant, Transform); |
||
9288 | |||
9289 | Result = XMVectorMultiplyAdd(V, Scale, Offset); |
||
9290 | |||
9291 | Result = XMVector3TransformCoord(Result, Transform); |
||
9292 | |||
9293 | return Result; |
||
9294 | |||
9295 | #elif defined(_XM_SSE_INTRINSICS_) |
||
9296 | XMMATRIX Transform; |
||
9297 | XMVECTOR Scale; |
||
9298 | XMVECTOR Offset; |
||
9299 | XMVECTOR Determinant; |
||
9300 | XMVECTOR Result; |
||
9301 | CONST XMVECTORF32 D = {-1.0f, 1.0f, 0.0f, 0.0f}; |
||
9302 | |||
9303 | Scale = XMVectorSet(ViewportWidth * 0.5f, |
||
9304 | -ViewportHeight * 0.5f, |
||
9305 | ViewportMaxZ - ViewportMinZ, |
||
9306 | 1.0f); |
||
9307 | Scale = XMVectorReciprocal(Scale); |
||
9308 | |||
9309 | Offset = XMVectorSet(-ViewportX, |
||
9310 | -ViewportY, |
||
9311 | -ViewportMinZ, |
||
9312 | 0.0f); |
||
9313 | Offset = _mm_mul_ps(Offset,Scale); |
||
9314 | Offset = _mm_add_ps(Offset,D); |
||
9315 | |||
9316 | Transform = XMMatrixMultiply(World, View); |
||
9317 | Transform = XMMatrixMultiply(Transform, Projection); |
||
9318 | Transform = XMMatrixInverse(&Determinant, Transform); |
||
9319 | |||
9320 | Result = _mm_mul_ps(V,Scale); |
||
9321 | Result = _mm_add_ps(Result,Offset); |
||
9322 | |||
9323 | Result = XMVector3TransformCoord(Result, Transform); |
||
9324 | |||
9325 | return Result; |
||
9326 | #else // _XM_VMX128_INTRINSICS_ |
||
9327 | #endif // _XM_VMX128_INTRINSICS_ |
||
9328 | } |
||
9329 | |||
9330 | //------------------------------------------------------------------------------ |
||
9331 | |||
9332 | XMINLINE XMFLOAT3* XMVector3UnprojectStream |
||
9333 | ( |
||
9334 | XMFLOAT3* pOutputStream, |
||
9335 | UINT OutputStride, |
||
9336 | CONST XMFLOAT3* pInputStream, |
||
9337 | UINT InputStride, |
||
9338 | UINT VectorCount, |
||
9339 | FLOAT ViewportX, |
||
9340 | FLOAT ViewportY, |
||
9341 | FLOAT ViewportWidth, |
||
9342 | FLOAT ViewportHeight, |
||
9343 | FLOAT ViewportMinZ, |
||
9344 | FLOAT ViewportMaxZ, |
||
9345 | CXMMATRIX Projection, |
||
9346 | CXMMATRIX View, |
||
9347 | CXMMATRIX World) |
||
9348 | { |
||
9349 | #if defined(_XM_NO_INTRINSICS_) |
||
9350 | |||
9351 | XMMATRIX Transform; |
||
9352 | XMVECTOR Scale; |
||
9353 | XMVECTOR Offset; |
||
9354 | XMVECTOR V; |
||
9355 | XMVECTOR Determinant; |
||
9356 | XMVECTOR Result; |
||
9357 | UINT i; |
||
9358 | BYTE* pInputVector = (BYTE*)pInputStream; |
||
9359 | BYTE* pOutputVector = (BYTE*)pOutputStream; |
||
9360 | CONST XMVECTOR D = XMVectorSet(-1.0f, 1.0f, 0.0f, 0.0f); |
||
9361 | |||
9362 | XMASSERT(pOutputStream); |
||
9363 | XMASSERT(pInputStream); |
||
9364 | |||
9365 | Scale = XMVectorSet(ViewportWidth * 0.5f, |
||
9366 | -ViewportHeight * 0.5f, |
||
9367 | ViewportMaxZ - ViewportMinZ, |
||
9368 | 1.0f); |
||
9369 | Scale = XMVectorReciprocal(Scale); |
||
9370 | |||
9371 | Offset = XMVectorSet(-ViewportX, |
||
9372 | -ViewportY, |
||
9373 | -ViewportMinZ, |
||
9374 | 0.0f); |
||
9375 | Offset = XMVectorMultiplyAdd(Scale, Offset, D); |
||
9376 | |||
9377 | Transform = XMMatrixMultiply(World, View); |
||
9378 | Transform = XMMatrixMultiply(Transform, Projection); |
||
9379 | Transform = XMMatrixInverse(&Determinant, Transform); |
||
9380 | |||
9381 | for (i = 0; i < VectorCount; i++) |
||
9382 | { |
||
9383 | V = XMLoadFloat3((XMFLOAT3*)pInputVector); |
||
9384 | |||
9385 | Result = XMVectorMultiplyAdd(V, Scale, Offset); |
||
9386 | |||
9387 | Result = XMVector3TransformCoord(Result, Transform); |
||
9388 | |||
9389 | XMStoreFloat3((XMFLOAT3*)pOutputVector, Result); |
||
9390 | |||
9391 | pInputVector += InputStride; |
||
9392 | pOutputVector += OutputStride; |
||
9393 | } |
||
9394 | |||
9395 | return pOutputStream; |
||
9396 | |||
9397 | #elif defined(_XM_SSE_INTRINSICS_) |
||
9398 | XMASSERT(pOutputStream); |
||
9399 | XMASSERT(pInputStream); |
||
9400 | XMMATRIX Transform; |
||
9401 | XMVECTOR Scale; |
||
9402 | XMVECTOR Offset; |
||
9403 | XMVECTOR V; |
||
9404 | XMVECTOR Determinant; |
||
9405 | XMVECTOR Result; |
||
9406 | UINT i; |
||
9407 | BYTE* pInputVector = (BYTE*)pInputStream; |
||
9408 | BYTE* pOutputVector = (BYTE*)pOutputStream; |
||
9409 | CONST XMVECTORF32 D = {-1.0f, 1.0f, 0.0f, 0.0f}; |
||
9410 | |||
9411 | Scale = XMVectorSet(ViewportWidth * 0.5f, |
||
9412 | -ViewportHeight * 0.5f, |
||
9413 | ViewportMaxZ - ViewportMinZ, |
||
9414 | 1.0f); |
||
9415 | Scale = XMVectorReciprocal(Scale); |
||
9416 | |||
9417 | Offset = XMVectorSet(-ViewportX, |
||
9418 | -ViewportY, |
||
9419 | -ViewportMinZ, |
||
9420 | 0.0f); |
||
9421 | Offset = _mm_mul_ps(Offset,Scale); |
||
9422 | Offset = _mm_add_ps(Offset,D); |
||
9423 | |||
9424 | Transform = XMMatrixMultiply(World, View); |
||
9425 | Transform = XMMatrixMultiply(Transform, Projection); |
||
9426 | Transform = XMMatrixInverse(&Determinant, Transform); |
||
9427 | |||
9428 | for (i = 0; i < VectorCount; i++) |
||
9429 | { |
||
9430 | V = XMLoadFloat3((XMFLOAT3*)pInputVector); |
||
9431 | |||
9432 | Result = XMVectorMultiplyAdd(V, Scale, Offset); |
||
9433 | |||
9434 | Result = XMVector3TransformCoord(Result, Transform); |
||
9435 | |||
9436 | XMStoreFloat3((XMFLOAT3*)pOutputVector, Result); |
||
9437 | |||
9438 | pInputVector += InputStride; |
||
9439 | pOutputVector += OutputStride; |
||
9440 | } |
||
9441 | |||
9442 | return pOutputStream; |
||
9443 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
9444 | #endif // _XM_VMX128_INTRINSICS_ |
||
9445 | } |
||
9446 | |||
9447 | /**************************************************************************** |
||
9448 | * |
||
9449 | * 4D Vector |
||
9450 | * |
||
9451 | ****************************************************************************/ |
||
9452 | |||
9453 | //------------------------------------------------------------------------------ |
||
9454 | // Comparison operations |
||
9455 | //------------------------------------------------------------------------------ |
||
9456 | |||
9457 | //------------------------------------------------------------------------------ |
||
9458 | |||
9459 | XMFINLINE BOOL XMVector4Equal |
||
9460 | ( |
||
9461 | FXMVECTOR V1, |
||
9462 | FXMVECTOR V2 |
||
9463 | ) |
||
9464 | { |
||
9465 | #if defined(_XM_NO_INTRINSICS_) |
||
9466 | return (((V1.vector4_f32[0] == V2.vector4_f32[0]) && (V1.vector4_f32[1] == V2.vector4_f32[1]) && (V1.vector4_f32[2] == V2.vector4_f32[2]) && (V1.vector4_f32[3] == V2.vector4_f32[3])) != 0); |
||
9467 | #elif defined(_XM_SSE_INTRINSICS_) |
||
9468 | XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2); |
||
9469 | return ((_mm_movemask_ps(vTemp)==0x0f) != 0); |
||
9470 | #else |
||
9471 | return XMComparisonAllTrue(XMVector4EqualR(V1, V2)); |
||
9472 | #endif |
||
9473 | } |
||
9474 | |||
9475 | //------------------------------------------------------------------------------ |
||
9476 | |||
9477 | XMFINLINE UINT XMVector4EqualR |
||
9478 | ( |
||
9479 | FXMVECTOR V1, |
||
9480 | FXMVECTOR V2 |
||
9481 | ) |
||
9482 | { |
||
9483 | #if defined(_XM_NO_INTRINSICS_) |
||
9484 | |||
9485 | UINT CR = 0; |
||
9486 | |||
9487 | if ((V1.vector4_f32[0] == V2.vector4_f32[0]) && |
||
9488 | (V1.vector4_f32[1] == V2.vector4_f32[1]) && |
||
9489 | (V1.vector4_f32[2] == V2.vector4_f32[2]) && |
||
9490 | (V1.vector4_f32[3] == V2.vector4_f32[3])) |
||
9491 | { |
||
9492 | CR = XM_CRMASK_CR6TRUE; |
||
9493 | } |
||
9494 | else if ((V1.vector4_f32[0] != V2.vector4_f32[0]) && |
||
9495 | (V1.vector4_f32[1] != V2.vector4_f32[1]) && |
||
9496 | (V1.vector4_f32[2] != V2.vector4_f32[2]) && |
||
9497 | (V1.vector4_f32[3] != V2.vector4_f32[3])) |
||
9498 | { |
||
9499 | CR = XM_CRMASK_CR6FALSE; |
||
9500 | } |
||
9501 | return CR; |
||
9502 | |||
9503 | #elif defined(_XM_SSE_INTRINSICS_) |
||
9504 | XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2); |
||
9505 | int iTest = _mm_movemask_ps(vTemp); |
||
9506 | UINT CR = 0; |
||
9507 | if (iTest==0xf) // All equal? |
||
9508 | { |
||
9509 | CR = XM_CRMASK_CR6TRUE; |
||
9510 | } |
||
9511 | else if (iTest==0) // All not equal? |
||
9512 | { |
||
9513 | CR = XM_CRMASK_CR6FALSE; |
||
9514 | } |
||
9515 | return CR; |
||
9516 | #else // _XM_VMX128_INTRINSICS_ |
||
9517 | #endif // _XM_VMX128_INTRINSICS_ |
||
9518 | } |
||
9519 | |||
9520 | //------------------------------------------------------------------------------ |
||
9521 | |||
9522 | XMFINLINE BOOL XMVector4EqualInt |
||
9523 | ( |
||
9524 | FXMVECTOR V1, |
||
9525 | FXMVECTOR V2 |
||
9526 | ) |
||
9527 | { |
||
9528 | #if defined(_XM_NO_INTRINSICS_) |
||
9529 | return (((V1.vector4_u32[0] == V2.vector4_u32[0]) && (V1.vector4_u32[1] == V2.vector4_u32[1]) && (V1.vector4_u32[2] == V2.vector4_u32[2]) && (V1.vector4_u32[3] == V2.vector4_u32[3])) != 0); |
||
9530 | #elif defined(_XM_SSE_INTRINSICS_) |
||
9531 | __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]); |
||
9532 | return ((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])==0xf) != 0); |
||
9533 | #else |
||
9534 | return XMComparisonAllTrue(XMVector4EqualIntR(V1, V2)); |
||
9535 | #endif |
||
9536 | } |
||
9537 | |||
9538 | //------------------------------------------------------------------------------ |
||
9539 | |||
9540 | XMFINLINE UINT XMVector4EqualIntR |
||
9541 | ( |
||
9542 | FXMVECTOR V1, |
||
9543 | FXMVECTOR V2 |
||
9544 | ) |
||
9545 | { |
||
9546 | #if defined(_XM_NO_INTRINSICS_) |
||
9547 | UINT CR = 0; |
||
9548 | if (V1.vector4_u32[0] == V2.vector4_u32[0] && |
||
9549 | V1.vector4_u32[1] == V2.vector4_u32[1] && |
||
9550 | V1.vector4_u32[2] == V2.vector4_u32[2] && |
||
9551 | V1.vector4_u32[3] == V2.vector4_u32[3]) |
||
9552 | { |
||
9553 | CR = XM_CRMASK_CR6TRUE; |
||
9554 | } |
||
9555 | else if (V1.vector4_u32[0] != V2.vector4_u32[0] && |
||
9556 | V1.vector4_u32[1] != V2.vector4_u32[1] && |
||
9557 | V1.vector4_u32[2] != V2.vector4_u32[2] && |
||
9558 | V1.vector4_u32[3] != V2.vector4_u32[3]) |
||
9559 | { |
||
9560 | CR = XM_CRMASK_CR6FALSE; |
||
9561 | } |
||
9562 | return CR; |
||
9563 | |||
9564 | #elif defined(_XM_SSE_INTRINSICS_) |
||
9565 | __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]); |
||
9566 | int iTest = _mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0]); |
||
9567 | UINT CR = 0; |
||
9568 | if (iTest==0xf) // All equal? |
||
9569 | { |
||
9570 | CR = XM_CRMASK_CR6TRUE; |
||
9571 | } |
||
9572 | else if (iTest==0) // All not equal? |
||
9573 | { |
||
9574 | CR = XM_CRMASK_CR6FALSE; |
||
9575 | } |
||
9576 | return CR; |
||
9577 | #else // _XM_VMX128_INTRINSICS_ |
||
9578 | #endif // _XM_VMX128_INTRINSICS_ |
||
9579 | } |
||
9580 | |||
9581 | XMFINLINE BOOL XMVector4NearEqual |
||
9582 | ( |
||
9583 | FXMVECTOR V1, |
||
9584 | FXMVECTOR V2, |
||
9585 | FXMVECTOR Epsilon |
||
9586 | ) |
||
9587 | { |
||
9588 | #if defined(_XM_NO_INTRINSICS_) |
||
9589 | FLOAT dx, dy, dz, dw; |
||
9590 | |||
9591 | dx = fabsf(V1.vector4_f32[0]-V2.vector4_f32[0]); |
||
9592 | dy = fabsf(V1.vector4_f32[1]-V2.vector4_f32[1]); |
||
9593 | dz = fabsf(V1.vector4_f32[2]-V2.vector4_f32[2]); |
||
9594 | dw = fabsf(V1.vector4_f32[3]-V2.vector4_f32[3]); |
||
9595 | return (((dx <= Epsilon.vector4_f32[0]) && |
||
9596 | (dy <= Epsilon.vector4_f32[1]) && |
||
9597 | (dz <= Epsilon.vector4_f32[2]) && |
||
9598 | (dw <= Epsilon.vector4_f32[3])) != 0); |
||
9599 | #elif defined(_XM_SSE_INTRINSICS_) |
||
9600 | // Get the difference |
||
9601 | XMVECTOR vDelta = _mm_sub_ps(V1,V2); |
||
9602 | // Get the absolute value of the difference |
||
9603 | XMVECTOR vTemp = _mm_setzero_ps(); |
||
9604 | vTemp = _mm_sub_ps(vTemp,vDelta); |
||
9605 | vTemp = _mm_max_ps(vTemp,vDelta); |
||
9606 | vTemp = _mm_cmple_ps(vTemp,Epsilon); |
||
9607 | return ((_mm_movemask_ps(vTemp)==0xf) != 0); |
||
9608 | #else // _XM_VMX128_INTRINSICS_ |
||
9609 | #endif // _XM_VMX128_INTRINSICS_ |
||
9610 | } |
||
9611 | |||
9612 | //------------------------------------------------------------------------------ |
||
9613 | |||
9614 | XMFINLINE BOOL XMVector4NotEqual |
||
9615 | ( |
||
9616 | FXMVECTOR V1, |
||
9617 | FXMVECTOR V2 |
||
9618 | ) |
||
9619 | { |
||
9620 | #if defined(_XM_NO_INTRINSICS_) |
||
9621 | return (((V1.vector4_f32[0] != V2.vector4_f32[0]) || (V1.vector4_f32[1] != V2.vector4_f32[1]) || (V1.vector4_f32[2] != V2.vector4_f32[2]) || (V1.vector4_f32[3] != V2.vector4_f32[3])) != 0); |
||
9622 | #elif defined(_XM_SSE_INTRINSICS_) |
||
9623 | XMVECTOR vTemp = _mm_cmpneq_ps(V1,V2); |
||
9624 | return ((_mm_movemask_ps(vTemp)) != 0); |
||
9625 | #else |
||
9626 | return XMComparisonAnyFalse(XMVector4EqualR(V1, V2)); |
||
9627 | #endif |
||
9628 | } |
||
9629 | |||
9630 | //------------------------------------------------------------------------------ |
||
9631 | |||
9632 | XMFINLINE BOOL XMVector4NotEqualInt |
||
9633 | ( |
||
9634 | FXMVECTOR V1, |
||
9635 | FXMVECTOR V2 |
||
9636 | ) |
||
9637 | { |
||
9638 | #if defined(_XM_NO_INTRINSICS_) |
||
9639 | return (((V1.vector4_u32[0] != V2.vector4_u32[0]) || (V1.vector4_u32[1] != V2.vector4_u32[1]) || (V1.vector4_u32[2] != V2.vector4_u32[2]) || (V1.vector4_u32[3] != V2.vector4_u32[3])) != 0); |
||
9640 | #elif defined(_XM_SSE_INTRINSICS_) |
||
9641 | __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]); |
||
9642 | return ((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])!=0xF) != 0); |
||
9643 | #else |
||
9644 | return XMComparisonAnyFalse(XMVector4EqualIntR(V1, V2)); |
||
9645 | #endif |
||
9646 | } |
||
9647 | |||
9648 | //------------------------------------------------------------------------------ |
||
9649 | |||
9650 | XMFINLINE BOOL XMVector4Greater |
||
9651 | ( |
||
9652 | FXMVECTOR V1, |
||
9653 | FXMVECTOR V2 |
||
9654 | ) |
||
9655 | { |
||
9656 | #if defined(_XM_NO_INTRINSICS_) |
||
9657 | return (((V1.vector4_f32[0] > V2.vector4_f32[0]) && (V1.vector4_f32[1] > V2.vector4_f32[1]) && (V1.vector4_f32[2] > V2.vector4_f32[2]) && (V1.vector4_f32[3] > V2.vector4_f32[3])) != 0); |
||
9658 | #elif defined(_XM_SSE_INTRINSICS_) |
||
9659 | XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2); |
||
9660 | return ((_mm_movemask_ps(vTemp)==0x0f) != 0); |
||
9661 | #else |
||
9662 | return XMComparisonAllTrue(XMVector4GreaterR(V1, V2)); |
||
9663 | #endif |
||
9664 | } |
||
9665 | |||
9666 | //------------------------------------------------------------------------------ |
||
9667 | |||
9668 | XMFINLINE UINT XMVector4GreaterR |
||
9669 | ( |
||
9670 | FXMVECTOR V1, |
||
9671 | FXMVECTOR V2 |
||
9672 | ) |
||
9673 | { |
||
9674 | #if defined(_XM_NO_INTRINSICS_) |
||
9675 | UINT CR = 0; |
||
9676 | if (V1.vector4_f32[0] > V2.vector4_f32[0] && |
||
9677 | V1.vector4_f32[1] > V2.vector4_f32[1] && |
||
9678 | V1.vector4_f32[2] > V2.vector4_f32[2] && |
||
9679 | V1.vector4_f32[3] > V2.vector4_f32[3]) |
||
9680 | { |
||
9681 | CR = XM_CRMASK_CR6TRUE; |
||
9682 | } |
||
9683 | else if (V1.vector4_f32[0] <= V2.vector4_f32[0] && |
||
9684 | V1.vector4_f32[1] <= V2.vector4_f32[1] && |
||
9685 | V1.vector4_f32[2] <= V2.vector4_f32[2] && |
||
9686 | V1.vector4_f32[3] <= V2.vector4_f32[3]) |
||
9687 | { |
||
9688 | CR = XM_CRMASK_CR6FALSE; |
||
9689 | } |
||
9690 | return CR; |
||
9691 | |||
9692 | #elif defined(_XM_SSE_INTRINSICS_) |
||
9693 | UINT CR = 0; |
||
9694 | XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2); |
||
9695 | int iTest = _mm_movemask_ps(vTemp); |
||
9696 | if (iTest==0xf) { |
||
9697 | CR = XM_CRMASK_CR6TRUE; |
||
9698 | } |
||
9699 | else if (!iTest) |
||
9700 | { |
||
9701 | CR = XM_CRMASK_CR6FALSE; |
||
9702 | } |
||
9703 | return CR; |
||
9704 | #else // _XM_VMX128_INTRINSICS_ |
||
9705 | #endif // _XM_VMX128_INTRINSICS_ |
||
9706 | } |
||
9707 | |||
9708 | //------------------------------------------------------------------------------ |
||
9709 | |||
9710 | XMFINLINE BOOL XMVector4GreaterOrEqual |
||
9711 | ( |
||
9712 | FXMVECTOR V1, |
||
9713 | FXMVECTOR V2 |
||
9714 | ) |
||
9715 | { |
||
9716 | #if defined(_XM_NO_INTRINSICS_) |
||
9717 | return (((V1.vector4_f32[0] >= V2.vector4_f32[0]) && (V1.vector4_f32[1] >= V2.vector4_f32[1]) && (V1.vector4_f32[2] >= V2.vector4_f32[2]) && (V1.vector4_f32[3] >= V2.vector4_f32[3])) != 0); |
||
9718 | #elif defined(_XM_SSE_INTRINSICS_) |
||
9719 | XMVECTOR vTemp = _mm_cmpge_ps(V1,V2); |
||
9720 | return ((_mm_movemask_ps(vTemp)==0x0f) != 0); |
||
9721 | #else |
||
9722 | return XMComparisonAllTrue(XMVector4GreaterOrEqualR(V1, V2)); |
||
9723 | #endif |
||
9724 | } |
||
9725 | |||
9726 | //------------------------------------------------------------------------------ |
||
9727 | |||
9728 | XMFINLINE UINT XMVector4GreaterOrEqualR |
||
9729 | ( |
||
9730 | FXMVECTOR V1, |
||
9731 | FXMVECTOR V2 |
||
9732 | ) |
||
9733 | { |
||
9734 | #if defined(_XM_NO_INTRINSICS_) |
||
9735 | UINT CR = 0; |
||
9736 | if ((V1.vector4_f32[0] >= V2.vector4_f32[0]) && |
||
9737 | (V1.vector4_f32[1] >= V2.vector4_f32[1]) && |
||
9738 | (V1.vector4_f32[2] >= V2.vector4_f32[2]) && |
||
9739 | (V1.vector4_f32[3] >= V2.vector4_f32[3])) |
||
9740 | { |
||
9741 | CR = XM_CRMASK_CR6TRUE; |
||
9742 | } |
||
9743 | else if ((V1.vector4_f32[0] < V2.vector4_f32[0]) && |
||
9744 | (V1.vector4_f32[1] < V2.vector4_f32[1]) && |
||
9745 | (V1.vector4_f32[2] < V2.vector4_f32[2]) && |
||
9746 | (V1.vector4_f32[3] < V2.vector4_f32[3])) |
||
9747 | { |
||
9748 | CR = XM_CRMASK_CR6FALSE; |
||
9749 | } |
||
9750 | return CR; |
||
9751 | |||
9752 | #elif defined(_XM_SSE_INTRINSICS_) |
||
9753 | UINT CR = 0; |
||
9754 | XMVECTOR vTemp = _mm_cmpge_ps(V1,V2); |
||
9755 | int iTest = _mm_movemask_ps(vTemp); |
||
9756 | if (iTest==0x0f) |
||
9757 | { |
||
9758 | CR = XM_CRMASK_CR6TRUE; |
||
9759 | } |
||
9760 | else if (!iTest) |
||
9761 | { |
||
9762 | CR = XM_CRMASK_CR6FALSE; |
||
9763 | } |
||
9764 | return CR; |
||
9765 | #else // _XM_VMX128_INTRINSICS_ |
||
9766 | #endif // _XM_VMX128_INTRINSICS_ |
||
9767 | } |
||
9768 | |||
9769 | //------------------------------------------------------------------------------ |
||
9770 | |||
9771 | XMFINLINE BOOL XMVector4Less |
||
9772 | ( |
||
9773 | FXMVECTOR V1, |
||
9774 | FXMVECTOR V2 |
||
9775 | ) |
||
9776 | { |
||
9777 | #if defined(_XM_NO_INTRINSICS_) |
||
9778 | return (((V1.vector4_f32[0] < V2.vector4_f32[0]) && (V1.vector4_f32[1] < V2.vector4_f32[1]) && (V1.vector4_f32[2] < V2.vector4_f32[2]) && (V1.vector4_f32[3] < V2.vector4_f32[3])) != 0); |
||
9779 | #elif defined(_XM_SSE_INTRINSICS_) |
||
9780 | XMVECTOR vTemp = _mm_cmplt_ps(V1,V2); |
||
9781 | return ((_mm_movemask_ps(vTemp)==0x0f) != 0); |
||
9782 | #else |
||
9783 | return XMComparisonAllTrue(XMVector4GreaterR(V2, V1)); |
||
9784 | #endif |
||
9785 | } |
||
9786 | |||
9787 | //------------------------------------------------------------------------------ |
||
9788 | |||
9789 | XMFINLINE BOOL XMVector4LessOrEqual |
||
9790 | ( |
||
9791 | FXMVECTOR V1, |
||
9792 | FXMVECTOR V2 |
||
9793 | ) |
||
9794 | { |
||
9795 | #if defined(_XM_NO_INTRINSICS_) |
||
9796 | return (((V1.vector4_f32[0] <= V2.vector4_f32[0]) && (V1.vector4_f32[1] <= V2.vector4_f32[1]) && (V1.vector4_f32[2] <= V2.vector4_f32[2]) && (V1.vector4_f32[3] <= V2.vector4_f32[3])) != 0); |
||
9797 | #elif defined(_XM_SSE_INTRINSICS_) |
||
9798 | XMVECTOR vTemp = _mm_cmple_ps(V1,V2); |
||
9799 | return ((_mm_movemask_ps(vTemp)==0x0f) != 0); |
||
9800 | #else |
||
9801 | return XMComparisonAllTrue(XMVector4GreaterOrEqualR(V2, V1)); |
||
9802 | #endif |
||
9803 | } |
||
9804 | |||
9805 | //------------------------------------------------------------------------------ |
||
9806 | |||
9807 | XMFINLINE BOOL XMVector4InBounds |
||
9808 | ( |
||
9809 | FXMVECTOR V, |
||
9810 | FXMVECTOR Bounds |
||
9811 | ) |
||
9812 | { |
||
9813 | #if defined(_XM_NO_INTRINSICS_) |
||
9814 | return (((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) && |
||
9815 | (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) && |
||
9816 | (V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2]) && |
||
9817 | (V.vector4_f32[3] <= Bounds.vector4_f32[3] && V.vector4_f32[3] >= -Bounds.vector4_f32[3])) != 0); |
||
9818 | #elif defined(_XM_SSE_INTRINSICS_) |
||
9819 | // Test if less than or equal |
||
9820 | XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds); |
||
9821 | // Negate the bounds |
||
9822 | XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne); |
||
9823 | // Test if greater or equal (Reversed) |
||
9824 | vTemp2 = _mm_cmple_ps(vTemp2,V); |
||
9825 | // Blend answers |
||
9826 | vTemp1 = _mm_and_ps(vTemp1,vTemp2); |
||
9827 | // All in bounds? |
||
9828 | return ((_mm_movemask_ps(vTemp1)==0x0f) != 0); |
||
9829 | #else |
||
9830 | return XMComparisonAllInBounds(XMVector4InBoundsR(V, Bounds)); |
||
9831 | #endif |
||
9832 | } |
||
9833 | |||
9834 | //------------------------------------------------------------------------------ |
||
9835 | |||
9836 | XMFINLINE UINT XMVector4InBoundsR |
||
9837 | ( |
||
9838 | FXMVECTOR V, |
||
9839 | FXMVECTOR Bounds |
||
9840 | ) |
||
9841 | { |
||
9842 | #if defined(_XM_NO_INTRINSICS_) |
||
9843 | |||
9844 | UINT CR = 0; |
||
9845 | if ((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) && |
||
9846 | (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) && |
||
9847 | (V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2]) && |
||
9848 | (V.vector4_f32[3] <= Bounds.vector4_f32[3] && V.vector4_f32[3] >= -Bounds.vector4_f32[3])) |
||
9849 | { |
||
9850 | CR = XM_CRMASK_CR6BOUNDS; |
||
9851 | } |
||
9852 | return CR; |
||
9853 | |||
9854 | #elif defined(_XM_SSE_INTRINSICS_) |
||
9855 | // Test if less than or equal |
||
9856 | XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds); |
||
9857 | // Negate the bounds |
||
9858 | XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne); |
||
9859 | // Test if greater or equal (Reversed) |
||
9860 | vTemp2 = _mm_cmple_ps(vTemp2,V); |
||
9861 | // Blend answers |
||
9862 | vTemp1 = _mm_and_ps(vTemp1,vTemp2); |
||
9863 | // All in bounds? |
||
9864 | return (_mm_movemask_ps(vTemp1)==0x0f) ? XM_CRMASK_CR6BOUNDS : 0; |
||
9865 | #else // _XM_VMX128_INTRINSICS_ |
||
9866 | #endif // _XM_VMX128_INTRINSICS_ |
||
9867 | } |
||
9868 | |||
9869 | //------------------------------------------------------------------------------ |
||
9870 | |||
9871 | XMFINLINE BOOL XMVector4IsNaN |
||
9872 | ( |
||
9873 | FXMVECTOR V |
||
9874 | ) |
||
9875 | { |
||
9876 | #if defined(_XM_NO_INTRINSICS_) |
||
9877 | return (XMISNAN(V.vector4_f32[0]) || |
||
9878 | XMISNAN(V.vector4_f32[1]) || |
||
9879 | XMISNAN(V.vector4_f32[2]) || |
||
9880 | XMISNAN(V.vector4_f32[3])); |
||
9881 | #elif defined(_XM_SSE_INTRINSICS_) |
||
9882 | // Test against itself. NaN is always not equal |
||
9883 | XMVECTOR vTempNan = _mm_cmpneq_ps(V,V); |
||
9884 | // If any are NaN, the mask is non-zero |
||
9885 | return (_mm_movemask_ps(vTempNan)!=0); |
||
9886 | #else // _XM_VMX128_INTRINSICS_ |
||
9887 | #endif // _XM_VMX128_INTRINSICS_ |
||
9888 | } |
||
9889 | |||
9890 | //------------------------------------------------------------------------------ |
||
9891 | |||
9892 | XMFINLINE BOOL XMVector4IsInfinite |
||
9893 | ( |
||
9894 | FXMVECTOR V |
||
9895 | ) |
||
9896 | { |
||
9897 | #if defined(_XM_NO_INTRINSICS_) |
||
9898 | |||
9899 | return (XMISINF(V.vector4_f32[0]) || |
||
9900 | XMISINF(V.vector4_f32[1]) || |
||
9901 | XMISINF(V.vector4_f32[2]) || |
||
9902 | XMISINF(V.vector4_f32[3])); |
||
9903 | |||
9904 | #elif defined(_XM_SSE_INTRINSICS_) |
||
9905 | // Mask off the sign bit |
||
9906 | XMVECTOR vTemp = _mm_and_ps(V,g_XMAbsMask); |
||
9907 | // Compare to infinity |
||
9908 | vTemp = _mm_cmpeq_ps(vTemp,g_XMInfinity); |
||
9909 | // If any are infinity, the signs are true. |
||
9910 | return (_mm_movemask_ps(vTemp) != 0); |
||
9911 | #else // _XM_VMX128_INTRINSICS_ |
||
9912 | #endif // _XM_VMX128_INTRINSICS_ |
||
9913 | } |
||
9914 | |||
9915 | //------------------------------------------------------------------------------ |
||
9916 | // Computation operations |
||
9917 | //------------------------------------------------------------------------------ |
||
9918 | |||
9919 | //------------------------------------------------------------------------------ |
||
9920 | |||
9921 | XMFINLINE XMVECTOR XMVector4Dot |
||
9922 | ( |
||
9923 | FXMVECTOR V1, |
||
9924 | FXMVECTOR V2 |
||
9925 | ) |
||
9926 | { |
||
9927 | #if defined(_XM_NO_INTRINSICS_) |
||
9928 | |||
9929 | XMVECTOR Result; |
||
9930 | |||
9931 | Result.vector4_f32[0] = |
||
9932 | Result.vector4_f32[1] = |
||
9933 | Result.vector4_f32[2] = |
||
9934 | Result.vector4_f32[3] = V1.vector4_f32[0] * V2.vector4_f32[0] + V1.vector4_f32[1] * V2.vector4_f32[1] + V1.vector4_f32[2] * V2.vector4_f32[2] + V1.vector4_f32[3] * V2.vector4_f32[3]; |
||
9935 | |||
9936 | return Result; |
||
9937 | |||
9938 | #elif defined(_XM_SSE_INTRINSICS_) |
||
9939 | XMVECTOR vTemp2 = V2; |
||
9940 | XMVECTOR vTemp = _mm_mul_ps(V1,vTemp2); |
||
9941 | vTemp2 = _mm_shuffle_ps(vTemp2,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position |
||
9942 | vTemp2 = _mm_add_ps(vTemp2,vTemp); // Add Z = X+Z; W = Y+W; |
||
9943 | vTemp = _mm_shuffle_ps(vTemp,vTemp2,_MM_SHUFFLE(0,3,0,0)); // Copy W to the Z position |
||
9944 | vTemp = _mm_add_ps(vTemp,vTemp2); // Add Z and W together |
||
9945 | return _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2)); // Splat Z and return |
||
9946 | #else // _XM_VMX128_INTRINSICS_ |
||
9947 | #endif // _XM_VMX128_INTRINSICS_ |
||
9948 | } |
||
9949 | |||
9950 | //------------------------------------------------------------------------------ |
||
9951 | |||
9952 | XMFINLINE XMVECTOR XMVector4Cross |
||
9953 | ( |
||
9954 | FXMVECTOR V1, |
||
9955 | FXMVECTOR V2, |
||
9956 | FXMVECTOR V3 |
||
9957 | ) |
||
9958 | { |
||
9959 | #if defined(_XM_NO_INTRINSICS_) |
||
9960 | XMVECTOR Result; |
||
9961 | |||
9962 | Result.vector4_f32[0] = (((V2.vector4_f32[2]*V3.vector4_f32[3])-(V2.vector4_f32[3]*V3.vector4_f32[2]))*V1.vector4_f32[1])-(((V2.vector4_f32[1]*V3.vector4_f32[3])-(V2.vector4_f32[3]*V3.vector4_f32[1]))*V1.vector4_f32[2])+(((V2.vector4_f32[1]*V3.vector4_f32[2])-(V2.vector4_f32[2]*V3.vector4_f32[1]))*V1.vector4_f32[3]); |
||
9963 | Result.vector4_f32[1] = (((V2.vector4_f32[3]*V3.vector4_f32[2])-(V2.vector4_f32[2]*V3.vector4_f32[3]))*V1.vector4_f32[0])-(((V2.vector4_f32[3]*V3.vector4_f32[0])-(V2.vector4_f32[0]*V3.vector4_f32[3]))*V1.vector4_f32[2])+(((V2.vector4_f32[2]*V3.vector4_f32[0])-(V2.vector4_f32[0]*V3.vector4_f32[2]))*V1.vector4_f32[3]); |
||
9964 | Result.vector4_f32[2] = (((V2.vector4_f32[1]*V3.vector4_f32[3])-(V2.vector4_f32[3]*V3.vector4_f32[1]))*V1.vector4_f32[0])-(((V2.vector4_f32[0]*V3.vector4_f32[3])-(V2.vector4_f32[3]*V3.vector4_f32[0]))*V1.vector4_f32[1])+(((V2.vector4_f32[0]*V3.vector4_f32[1])-(V2.vector4_f32[1]*V3.vector4_f32[0]))*V1.vector4_f32[3]); |
||
9965 | Result.vector4_f32[3] = (((V2.vector4_f32[2]*V3.vector4_f32[1])-(V2.vector4_f32[1]*V3.vector4_f32[2]))*V1.vector4_f32[0])-(((V2.vector4_f32[2]*V3.vector4_f32[0])-(V2.vector4_f32[0]*V3.vector4_f32[2]))*V1.vector4_f32[1])+(((V2.vector4_f32[1]*V3.vector4_f32[0])-(V2.vector4_f32[0]*V3.vector4_f32[1]))*V1.vector4_f32[2]); |
||
9966 | return Result; |
||
9967 | |||
9968 | #elif defined(_XM_SSE_INTRINSICS_) |
||
9969 | // V2zwyz * V3wzwy |
||
9970 | XMVECTOR vResult = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(2,1,3,2)); |
||
9971 | XMVECTOR vTemp3 = _mm_shuffle_ps(V3,V3,_MM_SHUFFLE(1,3,2,3)); |
||
9972 | vResult = _mm_mul_ps(vResult,vTemp3); |
||
9973 | // - V2wzwy * V3zwyz |
||
9974 | XMVECTOR vTemp2 = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(1,3,2,3)); |
||
9975 | vTemp3 = _mm_shuffle_ps(vTemp3,vTemp3,_MM_SHUFFLE(1,3,0,1)); |
||
9976 | vTemp2 = _mm_mul_ps(vTemp2,vTemp3); |
||
9977 | vResult = _mm_sub_ps(vResult,vTemp2); |
||
9978 | // term1 * V1yxxx |
||
9979 | XMVECTOR vTemp1 = _mm_shuffle_ps(V1,V1,_MM_SHUFFLE(0,0,0,1)); |
||
9980 | vResult = _mm_mul_ps(vResult,vTemp1); |
||
9981 | |||
9982 | // V2ywxz * V3wxwx |
||
9983 | vTemp2 = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(2,0,3,1)); |
||
9984 | vTemp3 = _mm_shuffle_ps(V3,V3,_MM_SHUFFLE(0,3,0,3)); |
||
9985 | vTemp3 = _mm_mul_ps(vTemp3,vTemp2); |
||
9986 | // - V2wxwx * V3ywxz |
||
9987 | vTemp2 = _mm_shuffle_ps(vTemp2,vTemp2,_MM_SHUFFLE(2,1,2,1)); |
||
9988 | vTemp1 = _mm_shuffle_ps(V3,V3,_MM_SHUFFLE(2,0,3,1)); |
||
9989 | vTemp2 = _mm_mul_ps(vTemp2,vTemp1); |
||
9990 | vTemp3 = _mm_sub_ps(vTemp3,vTemp2); |
||
9991 | // vResult - temp * V1zzyy |
||
9992 | vTemp1 = _mm_shuffle_ps(V1,V1,_MM_SHUFFLE(1,1,2,2)); |
||
9993 | vTemp1 = _mm_mul_ps(vTemp1,vTemp3); |
||
9994 | vResult = _mm_sub_ps(vResult,vTemp1); |
||
9995 | |||
9996 | // V2yzxy * V3zxyx |
||
9997 | vTemp2 = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(1,0,2,1)); |
||
9998 | vTemp3 = _mm_shuffle_ps(V3,V3,_MM_SHUFFLE(0,1,0,2)); |
||
9999 | vTemp3 = _mm_mul_ps(vTemp3,vTemp2); |
||
10000 | // - V2zxyx * V3yzxy |
||
10001 | vTemp2 = _mm_shuffle_ps(vTemp2,vTemp2,_MM_SHUFFLE(2,0,2,1)); |
||
10002 | vTemp1 = _mm_shuffle_ps(V3,V3,_MM_SHUFFLE(1,0,2,1)); |
||
10003 | vTemp1 = _mm_mul_ps(vTemp1,vTemp2); |
||
10004 | vTemp3 = _mm_sub_ps(vTemp3,vTemp1); |
||
10005 | // vResult + term * V1wwwz |
||
10006 | vTemp1 = _mm_shuffle_ps(V1,V1,_MM_SHUFFLE(2,3,3,3)); |
||
10007 | vTemp3 = _mm_mul_ps(vTemp3,vTemp1); |
||
10008 | vResult = _mm_add_ps(vResult,vTemp3); |
||
10009 | return vResult; |
||
10010 | #else // _XM_VMX128_INTRINSICS_ |
||
10011 | #endif // _XM_VMX128_INTRINSICS_ |
||
10012 | } |
||
10013 | |||
10014 | //------------------------------------------------------------------------------ |
||
10015 | |||
10016 | XMFINLINE XMVECTOR XMVector4LengthSq |
||
10017 | ( |
||
10018 | FXMVECTOR V |
||
10019 | ) |
||
10020 | { |
||
10021 | return XMVector4Dot(V, V); |
||
10022 | } |
||
10023 | |||
10024 | //------------------------------------------------------------------------------ |
||
10025 | |||
10026 | XMFINLINE XMVECTOR XMVector4ReciprocalLengthEst |
||
10027 | ( |
||
10028 | FXMVECTOR V |
||
10029 | ) |
||
10030 | { |
||
10031 | #if defined(_XM_NO_INTRINSICS_) |
||
10032 | |||
10033 | XMVECTOR Result; |
||
10034 | |||
10035 | Result = XMVector4LengthSq(V); |
||
10036 | Result = XMVectorReciprocalSqrtEst(Result); |
||
10037 | |||
10038 | return Result; |
||
10039 | |||
10040 | #elif defined(_XM_SSE_INTRINSICS_) |
||
10041 | // Perform the dot product on x,y,z and w |
||
10042 | XMVECTOR vLengthSq = _mm_mul_ps(V,V); |
||
10043 | // vTemp has z and w |
||
10044 | XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2)); |
||
10045 | // x+z, y+w |
||
10046 | vLengthSq = _mm_add_ps(vLengthSq,vTemp); |
||
10047 | // x+z,x+z,x+z,y+w |
||
10048 | vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0)); |
||
10049 | // ??,??,y+w,y+w |
||
10050 | vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0)); |
||
10051 | // ??,??,x+z+y+w,?? |
||
10052 | vLengthSq = _mm_add_ps(vLengthSq,vTemp); |
||
10053 | // Splat the length |
||
10054 | vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2)); |
||
10055 | // Get the reciprocal |
||
10056 | vLengthSq = _mm_rsqrt_ps(vLengthSq); |
||
10057 | return vLengthSq; |
||
10058 | #else // _XM_VMX128_INTRINSICS_ |
||
10059 | #endif // _XM_VMX128_INTRINSICS_ |
||
10060 | } |
||
10061 | |||
10062 | //------------------------------------------------------------------------------ |
||
10063 | |||
10064 | XMFINLINE XMVECTOR XMVector4ReciprocalLength |
||
10065 | ( |
||
10066 | FXMVECTOR V |
||
10067 | ) |
||
10068 | { |
||
10069 | #if defined(_XM_NO_INTRINSICS_) |
||
10070 | |||
10071 | XMVECTOR Result; |
||
10072 | |||
10073 | Result = XMVector4LengthSq(V); |
||
10074 | Result = XMVectorReciprocalSqrt(Result); |
||
10075 | |||
10076 | return Result; |
||
10077 | |||
10078 | #elif defined(_XM_SSE_INTRINSICS_) |
||
10079 | // Perform the dot product on x,y,z and w |
||
10080 | XMVECTOR vLengthSq = _mm_mul_ps(V,V); |
||
10081 | // vTemp has z and w |
||
10082 | XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2)); |
||
10083 | // x+z, y+w |
||
10084 | vLengthSq = _mm_add_ps(vLengthSq,vTemp); |
||
10085 | // x+z,x+z,x+z,y+w |
||
10086 | vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0)); |
||
10087 | // ??,??,y+w,y+w |
||
10088 | vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0)); |
||
10089 | // ??,??,x+z+y+w,?? |
||
10090 | vLengthSq = _mm_add_ps(vLengthSq,vTemp); |
||
10091 | // Splat the length |
||
10092 | vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2)); |
||
10093 | // Get the reciprocal |
||
10094 | vLengthSq = _mm_sqrt_ps(vLengthSq); |
||
10095 | // Accurate! |
||
10096 | vLengthSq = _mm_div_ps(g_XMOne,vLengthSq); |
||
10097 | return vLengthSq; |
||
10098 | #else // _XM_VMX128_INTRINSICS_ |
||
10099 | #endif // _XM_VMX128_INTRINSICS_ |
||
10100 | } |
||
10101 | |||
10102 | //------------------------------------------------------------------------------ |
||
10103 | |||
10104 | XMFINLINE XMVECTOR XMVector4LengthEst |
||
10105 | ( |
||
10106 | FXMVECTOR V |
||
10107 | ) |
||
10108 | { |
||
10109 | #if defined(_XM_NO_INTRINSICS_) |
||
10110 | |||
10111 | XMVECTOR Result; |
||
10112 | |||
10113 | Result = XMVector4LengthSq(V); |
||
10114 | Result = XMVectorSqrtEst(Result); |
||
10115 | |||
10116 | return Result; |
||
10117 | |||
10118 | #elif defined(_XM_SSE_INTRINSICS_) |
||
10119 | // Perform the dot product on x,y,z and w |
||
10120 | XMVECTOR vLengthSq = _mm_mul_ps(V,V); |
||
10121 | // vTemp has z and w |
||
10122 | XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2)); |
||
10123 | // x+z, y+w |
||
10124 | vLengthSq = _mm_add_ps(vLengthSq,vTemp); |
||
10125 | // x+z,x+z,x+z,y+w |
||
10126 | vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0)); |
||
10127 | // ??,??,y+w,y+w |
||
10128 | vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0)); |
||
10129 | // ??,??,x+z+y+w,?? |
||
10130 | vLengthSq = _mm_add_ps(vLengthSq,vTemp); |
||
10131 | // Splat the length |
||
10132 | vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2)); |
||
10133 | // Prepare for the division |
||
10134 | vLengthSq = _mm_sqrt_ps(vLengthSq); |
||
10135 | return vLengthSq; |
||
10136 | #else // _XM_VMX128_INTRINSICS_ |
||
10137 | #endif // _XM_VMX128_INTRINSICS_ |
||
10138 | } |
||
10139 | |||
10140 | //------------------------------------------------------------------------------ |
||
10141 | |||
10142 | XMFINLINE XMVECTOR XMVector4Length |
||
10143 | ( |
||
10144 | FXMVECTOR V |
||
10145 | ) |
||
10146 | { |
||
10147 | #if defined(_XM_NO_INTRINSICS_) |
||
10148 | |||
10149 | XMVECTOR Result; |
||
10150 | |||
10151 | Result = XMVector4LengthSq(V); |
||
10152 | Result = XMVectorSqrt(Result); |
||
10153 | |||
10154 | return Result; |
||
10155 | |||
10156 | #elif defined(_XM_SSE_INTRINSICS_) |
||
10157 | // Perform the dot product on x,y,z and w |
||
10158 | XMVECTOR vLengthSq = _mm_mul_ps(V,V); |
||
10159 | // vTemp has z and w |
||
10160 | XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2)); |
||
10161 | // x+z, y+w |
||
10162 | vLengthSq = _mm_add_ps(vLengthSq,vTemp); |
||
10163 | // x+z,x+z,x+z,y+w |
||
10164 | vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0)); |
||
10165 | // ??,??,y+w,y+w |
||
10166 | vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0)); |
||
10167 | // ??,??,x+z+y+w,?? |
||
10168 | vLengthSq = _mm_add_ps(vLengthSq,vTemp); |
||
10169 | // Splat the length |
||
10170 | vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2)); |
||
10171 | // Prepare for the division |
||
10172 | vLengthSq = _mm_sqrt_ps(vLengthSq); |
||
10173 | return vLengthSq; |
||
10174 | #else // _XM_VMX128_INTRINSICS_ |
||
10175 | #endif // _XM_VMX128_INTRINSICS_ |
||
10176 | } |
||
10177 | |||
10178 | //------------------------------------------------------------------------------ |
||
10179 | // XMVector4NormalizeEst uses a reciprocal estimate and |
||
10180 | // returns QNaN on zero and infinite vectors. |
||
10181 | |||
10182 | XMFINLINE XMVECTOR XMVector4NormalizeEst |
||
10183 | ( |
||
10184 | FXMVECTOR V |
||
10185 | ) |
||
10186 | { |
||
10187 | #if defined(_XM_NO_INTRINSICS_) |
||
10188 | |||
10189 | XMVECTOR Result; |
||
10190 | Result = XMVector4ReciprocalLength(V); |
||
10191 | Result = XMVectorMultiply(V, Result); |
||
10192 | return Result; |
||
10193 | |||
10194 | #elif defined(_XM_SSE_INTRINSICS_) |
||
10195 | // Perform the dot product on x,y,z and w |
||
10196 | XMVECTOR vLengthSq = _mm_mul_ps(V,V); |
||
10197 | // vTemp has z and w |
||
10198 | XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2)); |
||
10199 | // x+z, y+w |
||
10200 | vLengthSq = _mm_add_ps(vLengthSq,vTemp); |
||
10201 | // x+z,x+z,x+z,y+w |
||
10202 | vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0)); |
||
10203 | // ??,??,y+w,y+w |
||
10204 | vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0)); |
||
10205 | // ??,??,x+z+y+w,?? |
||
10206 | vLengthSq = _mm_add_ps(vLengthSq,vTemp); |
||
10207 | // Splat the length |
||
10208 | vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2)); |
||
10209 | // Prepare for the division |
||
10210 | XMVECTOR vResult = _mm_rsqrt_ps(vLengthSq); |
||
10211 | // Failsafe on zero (Or epsilon) length planes |
||
10212 | // If the length is infinity, set the elements to zero |
||
10213 | vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity); |
||
10214 | // Reciprocal mul to perform the normalization |
||
10215 | vResult = _mm_mul_ps(vResult,V); |
||
10216 | // Any that are infinity, set to zero |
||
10217 | vResult = _mm_and_ps(vResult,vLengthSq); |
||
10218 | return vResult; |
||
10219 | #else // _XM_VMX128_INTRINSICS_ |
||
10220 | #endif // _XM_VMX128_INTRINSICS_ |
||
10221 | } |
||
10222 | |||
10223 | //------------------------------------------------------------------------------ |
||
10224 | |||
10225 | XMFINLINE XMVECTOR XMVector4Normalize |
||
10226 | ( |
||
10227 | FXMVECTOR V |
||
10228 | ) |
||
10229 | { |
||
10230 | #if defined(_XM_NO_INTRINSICS_) |
||
10231 | |||
10232 | XMVECTOR LengthSq; |
||
10233 | XMVECTOR Zero; |
||
10234 | XMVECTOR InfiniteLength; |
||
10235 | XMVECTOR ZeroLength; |
||
10236 | XMVECTOR Select; |
||
10237 | XMVECTOR Result; |
||
10238 | |||
10239 | LengthSq = XMVector4LengthSq(V); |
||
10240 | Zero = XMVectorZero(); |
||
10241 | Result = XMVectorReciprocalSqrt(LengthSq); |
||
10242 | InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity.v); |
||
10243 | ZeroLength = XMVectorEqual(LengthSq, Zero); |
||
10244 | Result = XMVectorMultiply(V, Result); |
||
10245 | Select = XMVectorEqualInt(InfiniteLength, ZeroLength); |
||
10246 | Result = XMVectorSelect(LengthSq, Result, Select); |
||
10247 | |||
10248 | return Result; |
||
10249 | |||
10250 | #elif defined(_XM_SSE_INTRINSICS_) |
||
10251 | // Perform the dot product on x,y,z and w |
||
10252 | XMVECTOR vLengthSq = _mm_mul_ps(V,V); |
||
10253 | // vTemp has z and w |
||
10254 | XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2)); |
||
10255 | // x+z, y+w |
||
10256 | vLengthSq = _mm_add_ps(vLengthSq,vTemp); |
||
10257 | // x+z,x+z,x+z,y+w |
||
10258 | vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0)); |
||
10259 | // ??,??,y+w,y+w |
||
10260 | vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0)); |
||
10261 | // ??,??,x+z+y+w,?? |
||
10262 | vLengthSq = _mm_add_ps(vLengthSq,vTemp); |
||
10263 | // Splat the length |
||
10264 | vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2)); |
||
10265 | // Prepare for the division |
||
10266 | XMVECTOR vResult = _mm_sqrt_ps(vLengthSq); |
||
10267 | // Failsafe on zero (Or epsilon) length planes |
||
10268 | // If the length is infinity, set the elements to zero |
||
10269 | vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity); |
||
10270 | // Divide to perform the normalization |
||
10271 | vResult = _mm_div_ps(V,vResult); |
||
10272 | // Any that are infinity, set to zero |
||
10273 | vResult = _mm_and_ps(vResult,vLengthSq); |
||
10274 | return vResult; |
||
10275 | #else // _XM_VMX128_INTRINSICS_ |
||
10276 | #endif // _XM_VMX128_INTRINSICS_ |
||
10277 | } |
||
10278 | |||
10279 | //------------------------------------------------------------------------------ |
||
10280 | |||
10281 | XMFINLINE XMVECTOR XMVector4ClampLength |
||
10282 | ( |
||
10283 | FXMVECTOR V, |
||
10284 | FLOAT LengthMin, |
||
10285 | FLOAT LengthMax |
||
10286 | ) |
||
10287 | { |
||
10288 | #if defined(_XM_NO_INTRINSICS_) |
||
10289 | |||
10290 | XMVECTOR ClampMax; |
||
10291 | XMVECTOR ClampMin; |
||
10292 | |||
10293 | ClampMax = XMVectorReplicate(LengthMax); |
||
10294 | ClampMin = XMVectorReplicate(LengthMin); |
||
10295 | |||
10296 | return XMVector4ClampLengthV(V, ClampMin, ClampMax); |
||
10297 | |||
10298 | #elif defined(_XM_SSE_INTRINSICS_) |
||
10299 | XMVECTOR ClampMax = _mm_set_ps1(LengthMax); |
||
10300 | XMVECTOR ClampMin = _mm_set_ps1(LengthMin); |
||
10301 | return XMVector4ClampLengthV(V, ClampMin, ClampMax); |
||
10302 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
10303 | #endif // _XM_VMX128_INTRINSICS_ |
||
10304 | } |
||
10305 | |||
10306 | //------------------------------------------------------------------------------ |
||
10307 | |||
10308 | XMFINLINE XMVECTOR XMVector4ClampLengthV |
||
10309 | ( |
||
10310 | FXMVECTOR V, |
||
10311 | FXMVECTOR LengthMin, |
||
10312 | FXMVECTOR LengthMax |
||
10313 | ) |
||
10314 | { |
||
10315 | #if defined(_XM_NO_INTRINSICS_) |
||
10316 | |||
10317 | XMVECTOR ClampLength; |
||
10318 | XMVECTOR LengthSq; |
||
10319 | XMVECTOR RcpLength; |
||
10320 | XMVECTOR Length; |
||
10321 | XMVECTOR Normal; |
||
10322 | XMVECTOR Zero; |
||
10323 | XMVECTOR InfiniteLength; |
||
10324 | XMVECTOR ZeroLength; |
||
10325 | XMVECTOR Select; |
||
10326 | XMVECTOR ControlMax; |
||
10327 | XMVECTOR ControlMin; |
||
10328 | XMVECTOR Control; |
||
10329 | XMVECTOR Result; |
||
10330 | |||
10331 | XMASSERT((LengthMin.vector4_f32[1] == LengthMin.vector4_f32[0]) && (LengthMin.vector4_f32[2] == LengthMin.vector4_f32[0]) && (LengthMin.vector4_f32[3] == LengthMin.vector4_f32[0])); |
||
10332 | XMASSERT((LengthMax.vector4_f32[1] == LengthMax.vector4_f32[0]) && (LengthMax.vector4_f32[2] == LengthMax.vector4_f32[0]) && (LengthMax.vector4_f32[3] == LengthMax.vector4_f32[0])); |
||
10333 | XMASSERT(XMVector4GreaterOrEqual(LengthMin, XMVectorZero())); |
||
10334 | XMASSERT(XMVector4GreaterOrEqual(LengthMax, XMVectorZero())); |
||
10335 | XMASSERT(XMVector4GreaterOrEqual(LengthMax, LengthMin)); |
||
10336 | |||
10337 | LengthSq = XMVector4LengthSq(V); |
||
10338 | |||
10339 | Zero = XMVectorZero(); |
||
10340 | |||
10341 | RcpLength = XMVectorReciprocalSqrt(LengthSq); |
||
10342 | |||
10343 | InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity.v); |
||
10344 | ZeroLength = XMVectorEqual(LengthSq, Zero); |
||
10345 | |||
10346 | Normal = XMVectorMultiply(V, RcpLength); |
||
10347 | |||
10348 | Length = XMVectorMultiply(LengthSq, RcpLength); |
||
10349 | |||
10350 | Select = XMVectorEqualInt(InfiniteLength, ZeroLength); |
||
10351 | Length = XMVectorSelect(LengthSq, Length, Select); |
||
10352 | Normal = XMVectorSelect(LengthSq, Normal, Select); |
||
10353 | |||
10354 | ControlMax = XMVectorGreater(Length, LengthMax); |
||
10355 | ControlMin = XMVectorLess(Length, LengthMin); |
||
10356 | |||
10357 | ClampLength = XMVectorSelect(Length, LengthMax, ControlMax); |
||
10358 | ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin); |
||
10359 | |||
10360 | Result = XMVectorMultiply(Normal, ClampLength); |
||
10361 | |||
10362 | // Preserve the original vector (with no precision loss) if the length falls within the given range |
||
10363 | Control = XMVectorEqualInt(ControlMax, ControlMin); |
||
10364 | Result = XMVectorSelect(Result, V, Control); |
||
10365 | |||
10366 | return Result; |
||
10367 | |||
10368 | #elif defined(_XM_SSE_INTRINSICS_) |
||
10369 | XMVECTOR ClampLength; |
||
10370 | XMVECTOR LengthSq; |
||
10371 | XMVECTOR RcpLength; |
||
10372 | XMVECTOR Length; |
||
10373 | XMVECTOR Normal; |
||
10374 | XMVECTOR Zero; |
||
10375 | XMVECTOR InfiniteLength; |
||
10376 | XMVECTOR ZeroLength; |
||
10377 | XMVECTOR Select; |
||
10378 | XMVECTOR ControlMax; |
||
10379 | XMVECTOR ControlMin; |
||
10380 | XMVECTOR Control; |
||
10381 | XMVECTOR Result; |
||
10382 | |||
10383 | XMASSERT((XMVectorGetY(LengthMin) == XMVectorGetX(LengthMin)) && (XMVectorGetZ(LengthMin) == XMVectorGetX(LengthMin)) && (XMVectorGetW(LengthMin) == XMVectorGetX(LengthMin))); |
||
10384 | XMASSERT((XMVectorGetY(LengthMax) == XMVectorGetX(LengthMax)) && (XMVectorGetZ(LengthMax) == XMVectorGetX(LengthMax)) && (XMVectorGetW(LengthMax) == XMVectorGetX(LengthMax))); |
||
10385 | XMASSERT(XMVector4GreaterOrEqual(LengthMin, g_XMZero)); |
||
10386 | XMASSERT(XMVector4GreaterOrEqual(LengthMax, g_XMZero)); |
||
10387 | XMASSERT(XMVector4GreaterOrEqual(LengthMax, LengthMin)); |
||
10388 | |||
10389 | LengthSq = XMVector4LengthSq(V); |
||
10390 | Zero = XMVectorZero(); |
||
10391 | RcpLength = XMVectorReciprocalSqrt(LengthSq); |
||
10392 | InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity); |
||
10393 | ZeroLength = XMVectorEqual(LengthSq, Zero); |
||
10394 | Normal = _mm_mul_ps(V, RcpLength); |
||
10395 | Length = _mm_mul_ps(LengthSq, RcpLength); |
||
10396 | Select = XMVectorEqualInt(InfiniteLength, ZeroLength); |
||
10397 | Length = XMVectorSelect(LengthSq, Length, Select); |
||
10398 | Normal = XMVectorSelect(LengthSq, Normal, Select); |
||
10399 | ControlMax = XMVectorGreater(Length, LengthMax); |
||
10400 | ControlMin = XMVectorLess(Length, LengthMin); |
||
10401 | ClampLength = XMVectorSelect(Length, LengthMax, ControlMax); |
||
10402 | ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin); |
||
10403 | Result = _mm_mul_ps(Normal, ClampLength); |
||
10404 | // Preserve the original vector (with no precision loss) if the length falls within the given range |
||
10405 | Control = XMVectorEqualInt(ControlMax,ControlMin); |
||
10406 | Result = XMVectorSelect(Result,V,Control); |
||
10407 | return Result; |
||
10408 | |||
10409 | #else // _XM_VMX128_INTRINSICS_ |
||
10410 | #endif // _XM_VMX128_INTRINSICS_ |
||
10411 | } |
||
10412 | |||
10413 | //------------------------------------------------------------------------------ |
||
10414 | |||
10415 | XMFINLINE XMVECTOR XMVector4Reflect |
||
10416 | ( |
||
10417 | FXMVECTOR Incident, |
||
10418 | FXMVECTOR Normal |
||
10419 | ) |
||
10420 | { |
||
10421 | #if defined(_XM_NO_INTRINSICS_) |
||
10422 | |||
10423 | XMVECTOR Result; |
||
10424 | |||
10425 | // Result = Incident - (2 * dot(Incident, Normal)) * Normal |
||
10426 | Result = XMVector4Dot(Incident, Normal); |
||
10427 | Result = XMVectorAdd(Result, Result); |
||
10428 | Result = XMVectorNegativeMultiplySubtract(Result, Normal, Incident); |
||
10429 | |||
10430 | return Result; |
||
10431 | |||
10432 | #elif defined(_XM_SSE_INTRINSICS_) |
||
10433 | // Result = Incident - (2 * dot(Incident, Normal)) * Normal |
||
10434 | XMVECTOR Result = XMVector4Dot(Incident,Normal); |
||
10435 | Result = _mm_add_ps(Result,Result); |
||
10436 | Result = _mm_mul_ps(Result,Normal); |
||
10437 | Result = _mm_sub_ps(Incident,Result); |
||
10438 | return Result; |
||
10439 | #else // _XM_VMX128_INTRINSICS_ |
||
10440 | #endif // _XM_VMX128_INTRINSICS_ |
||
10441 | } |
||
10442 | |||
10443 | //------------------------------------------------------------------------------ |
||
10444 | |||
10445 | XMFINLINE XMVECTOR XMVector4Refract |
||
10446 | ( |
||
10447 | FXMVECTOR Incident, |
||
10448 | FXMVECTOR Normal, |
||
10449 | FLOAT RefractionIndex |
||
10450 | ) |
||
10451 | { |
||
10452 | #if defined(_XM_NO_INTRINSICS_) |
||
10453 | |||
10454 | XMVECTOR Index; |
||
10455 | Index = XMVectorReplicate(RefractionIndex); |
||
10456 | return XMVector4RefractV(Incident, Normal, Index); |
||
10457 | |||
10458 | #elif defined(_XM_SSE_INTRINSICS_) |
||
10459 | XMVECTOR Index = _mm_set_ps1(RefractionIndex); |
||
10460 | return XMVector4RefractV(Incident,Normal,Index); |
||
10461 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
10462 | #endif // _XM_VMX128_INTRINSICS_ |
||
10463 | } |
||
10464 | |||
10465 | //------------------------------------------------------------------------------ |
||
10466 | |||
10467 | XMFINLINE XMVECTOR XMVector4RefractV |
||
10468 | ( |
||
10469 | FXMVECTOR Incident, |
||
10470 | FXMVECTOR Normal, |
||
10471 | FXMVECTOR RefractionIndex |
||
10472 | ) |
||
10473 | { |
||
10474 | #if defined(_XM_NO_INTRINSICS_) |
||
10475 | |||
10476 | XMVECTOR IDotN; |
||
10477 | XMVECTOR R; |
||
10478 | CONST XMVECTOR Zero = XMVectorZero(); |
||
10479 | |||
10480 | // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) + |
||
10481 | // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal)))) |
||
10482 | |||
10483 | IDotN = XMVector4Dot(Incident, Normal); |
||
10484 | |||
10485 | // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN) |
||
10486 | R = XMVectorNegativeMultiplySubtract(IDotN, IDotN, g_XMOne.v); |
||
10487 | R = XMVectorMultiply(R, RefractionIndex); |
||
10488 | R = XMVectorNegativeMultiplySubtract(R, RefractionIndex, g_XMOne.v); |
||
10489 | |||
10490 | if (XMVector4LessOrEqual(R, Zero)) |
||
10491 | { |
||
10492 | // Total internal reflection |
||
10493 | return Zero; |
||
10494 | } |
||
10495 | else |
||
10496 | { |
||
10497 | XMVECTOR Result; |
||
10498 | |||
10499 | // R = RefractionIndex * IDotN + sqrt(R) |
||
10500 | R = XMVectorSqrt(R); |
||
10501 | R = XMVectorMultiplyAdd(RefractionIndex, IDotN, R); |
||
10502 | |||
10503 | // Result = RefractionIndex * Incident - Normal * R |
||
10504 | Result = XMVectorMultiply(RefractionIndex, Incident); |
||
10505 | Result = XMVectorNegativeMultiplySubtract(Normal, R, Result); |
||
10506 | |||
10507 | return Result; |
||
10508 | } |
||
10509 | |||
10510 | #elif defined(_XM_SSE_INTRINSICS_) |
||
10511 | // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) + |
||
10512 | // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal)))) |
||
10513 | |||
10514 | XMVECTOR IDotN = XMVector4Dot(Incident,Normal); |
||
10515 | |||
10516 | // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN) |
||
10517 | XMVECTOR R = _mm_mul_ps(IDotN,IDotN); |
||
10518 | R = _mm_sub_ps(g_XMOne,R); |
||
10519 | R = _mm_mul_ps(R, RefractionIndex); |
||
10520 | R = _mm_mul_ps(R, RefractionIndex); |
||
10521 | R = _mm_sub_ps(g_XMOne,R); |
||
10522 | |||
10523 | XMVECTOR vResult = _mm_cmple_ps(R,g_XMZero); |
||
10524 | if (_mm_movemask_ps(vResult)==0x0f) |
||
10525 | { |
||
10526 | // Total internal reflection |
||
10527 | vResult = g_XMZero; |
||
10528 | } |
||
10529 | else |
||
10530 | { |
||
10531 | // R = RefractionIndex * IDotN + sqrt(R) |
||
10532 | R = _mm_sqrt_ps(R); |
||
10533 | vResult = _mm_mul_ps(RefractionIndex, IDotN); |
||
10534 | R = _mm_add_ps(R,vResult); |
||
10535 | // Result = RefractionIndex * Incident - Normal * R |
||
10536 | vResult = _mm_mul_ps(RefractionIndex, Incident); |
||
10537 | R = _mm_mul_ps(R,Normal); |
||
10538 | vResult = _mm_sub_ps(vResult,R); |
||
10539 | } |
||
10540 | return vResult; |
||
10541 | #else // _XM_VMX128_INTRINSICS_ |
||
10542 | #endif // _XM_VMX128_INTRINSICS_ |
||
10543 | } |
||
10544 | |||
10545 | //------------------------------------------------------------------------------ |
||
10546 | |||
10547 | XMFINLINE XMVECTOR XMVector4Orthogonal |
||
10548 | ( |
||
10549 | FXMVECTOR V |
||
10550 | ) |
||
10551 | { |
||
10552 | #if defined(_XM_NO_INTRINSICS_) |
||
10553 | |||
10554 | XMVECTOR Result; |
||
10555 | Result.vector4_f32[0] = V.vector4_f32[2]; |
||
10556 | Result.vector4_f32[1] = V.vector4_f32[3]; |
||
10557 | Result.vector4_f32[2] = -V.vector4_f32[0]; |
||
10558 | Result.vector4_f32[3] = -V.vector4_f32[1]; |
||
10559 | return Result; |
||
10560 | |||
10561 | #elif defined(_XM_SSE_INTRINSICS_) |
||
10562 | static const XMVECTORF32 FlipZW = {1.0f,1.0f,-1.0f,-1.0f}; |
||
10563 | XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,0,3,2)); |
||
10564 | vResult = _mm_mul_ps(vResult,FlipZW); |
||
10565 | return vResult; |
||
10566 | #else // _XM_VMX128_INTRINSICS_ |
||
10567 | #endif // _XM_VMX128_INTRINSICS_ |
||
10568 | } |
||
10569 | |||
10570 | //------------------------------------------------------------------------------ |
||
10571 | |||
10572 | XMFINLINE XMVECTOR XMVector4AngleBetweenNormalsEst |
||
10573 | ( |
||
10574 | FXMVECTOR N1, |
||
10575 | FXMVECTOR N2 |
||
10576 | ) |
||
10577 | { |
||
10578 | #if defined(_XM_NO_INTRINSICS_) |
||
10579 | |||
10580 | XMVECTOR NegativeOne; |
||
10581 | XMVECTOR One; |
||
10582 | XMVECTOR Result; |
||
10583 | |||
10584 | Result = XMVector4Dot(N1, N2); |
||
10585 | NegativeOne = XMVectorSplatConstant(-1, 0); |
||
10586 | One = XMVectorSplatOne(); |
||
10587 | Result = XMVectorClamp(Result, NegativeOne, One); |
||
10588 | Result = XMVectorACosEst(Result); |
||
10589 | |||
10590 | return Result; |
||
10591 | |||
10592 | #elif defined(_XM_SSE_INTRINSICS_) |
||
10593 | XMVECTOR vResult = XMVector4Dot(N1,N2); |
||
10594 | // Clamp to -1.0f to 1.0f |
||
10595 | vResult = _mm_max_ps(vResult,g_XMNegativeOne); |
||
10596 | vResult = _mm_min_ps(vResult,g_XMOne);; |
||
10597 | vResult = XMVectorACosEst(vResult); |
||
10598 | return vResult; |
||
10599 | #else // _XM_VMX128_INTRINSICS_ |
||
10600 | #endif // _XM_VMX128_INTRINSICS_ |
||
10601 | } |
||
10602 | |||
10603 | //------------------------------------------------------------------------------ |
||
10604 | |||
10605 | XMFINLINE XMVECTOR XMVector4AngleBetweenNormals |
||
10606 | ( |
||
10607 | FXMVECTOR N1, |
||
10608 | FXMVECTOR N2 |
||
10609 | ) |
||
10610 | { |
||
10611 | #if defined(_XM_NO_INTRINSICS_) |
||
10612 | |||
10613 | XMVECTOR NegativeOne; |
||
10614 | XMVECTOR One; |
||
10615 | XMVECTOR Result; |
||
10616 | |||
10617 | Result = XMVector4Dot(N1, N2); |
||
10618 | NegativeOne = XMVectorSplatConstant(-1, 0); |
||
10619 | One = XMVectorSplatOne(); |
||
10620 | Result = XMVectorClamp(Result, NegativeOne, One); |
||
10621 | Result = XMVectorACos(Result); |
||
10622 | |||
10623 | return Result; |
||
10624 | |||
10625 | #elif defined(_XM_SSE_INTRINSICS_) |
||
10626 | XMVECTOR vResult = XMVector4Dot(N1,N2); |
||
10627 | // Clamp to -1.0f to 1.0f |
||
10628 | vResult = _mm_max_ps(vResult,g_XMNegativeOne); |
||
10629 | vResult = _mm_min_ps(vResult,g_XMOne);; |
||
10630 | vResult = XMVectorACos(vResult); |
||
10631 | return vResult; |
||
10632 | #else // _XM_VMX128_INTRINSICS_ |
||
10633 | #endif // _XM_VMX128_INTRINSICS_ |
||
10634 | } |
||
10635 | |||
10636 | //------------------------------------------------------------------------------ |
||
10637 | |||
10638 | XMFINLINE XMVECTOR XMVector4AngleBetweenVectors |
||
10639 | ( |
||
10640 | FXMVECTOR V1, |
||
10641 | FXMVECTOR V2 |
||
10642 | ) |
||
10643 | { |
||
10644 | #if defined(_XM_NO_INTRINSICS_) |
||
10645 | |||
10646 | XMVECTOR L1; |
||
10647 | XMVECTOR L2; |
||
10648 | XMVECTOR Dot; |
||
10649 | XMVECTOR CosAngle; |
||
10650 | XMVECTOR NegativeOne; |
||
10651 | XMVECTOR One; |
||
10652 | XMVECTOR Result; |
||
10653 | |||
10654 | L1 = XMVector4ReciprocalLength(V1); |
||
10655 | L2 = XMVector4ReciprocalLength(V2); |
||
10656 | |||
10657 | Dot = XMVector4Dot(V1, V2); |
||
10658 | |||
10659 | L1 = XMVectorMultiply(L1, L2); |
||
10660 | |||
10661 | CosAngle = XMVectorMultiply(Dot, L1); |
||
10662 | NegativeOne = XMVectorSplatConstant(-1, 0); |
||
10663 | One = XMVectorSplatOne(); |
||
10664 | CosAngle = XMVectorClamp(CosAngle, NegativeOne, One); |
||
10665 | |||
10666 | Result = XMVectorACos(CosAngle); |
||
10667 | |||
10668 | return Result; |
||
10669 | |||
10670 | #elif defined(_XM_SSE_INTRINSICS_) |
||
10671 | XMVECTOR L1; |
||
10672 | XMVECTOR L2; |
||
10673 | XMVECTOR Dot; |
||
10674 | XMVECTOR CosAngle; |
||
10675 | XMVECTOR Result; |
||
10676 | |||
10677 | L1 = XMVector4ReciprocalLength(V1); |
||
10678 | L2 = XMVector4ReciprocalLength(V2); |
||
10679 | Dot = XMVector4Dot(V1, V2); |
||
10680 | L1 = _mm_mul_ps(L1,L2); |
||
10681 | CosAngle = _mm_mul_ps(Dot,L1); |
||
10682 | CosAngle = XMVectorClamp(CosAngle, g_XMNegativeOne, g_XMOne); |
||
10683 | Result = XMVectorACos(CosAngle); |
||
10684 | return Result; |
||
10685 | |||
10686 | #else // _XM_VMX128_INTRINSICS_ |
||
10687 | #endif // _XM_VMX128_INTRINSICS_ |
||
10688 | } |
||
10689 | |||
10690 | //------------------------------------------------------------------------------ |
||
10691 | |||
10692 | XMFINLINE XMVECTOR XMVector4Transform |
||
10693 | ( |
||
10694 | FXMVECTOR V, |
||
10695 | CXMMATRIX M |
||
10696 | ) |
||
10697 | { |
||
10698 | #if defined(_XM_NO_INTRINSICS_) |
||
10699 | FLOAT fX = (M.m[0][0]*V.vector4_f32[0])+(M.m[1][0]*V.vector4_f32[1])+(M.m[2][0]*V.vector4_f32[2])+(M.m[3][0]*V.vector4_f32[3]); |
||
10700 | FLOAT fY = (M.m[0][1]*V.vector4_f32[0])+(M.m[1][1]*V.vector4_f32[1])+(M.m[2][1]*V.vector4_f32[2])+(M.m[3][1]*V.vector4_f32[3]); |
||
10701 | FLOAT fZ = (M.m[0][2]*V.vector4_f32[0])+(M.m[1][2]*V.vector4_f32[1])+(M.m[2][2]*V.vector4_f32[2])+(M.m[3][2]*V.vector4_f32[3]); |
||
10702 | FLOAT fW = (M.m[0][3]*V.vector4_f32[0])+(M.m[1][3]*V.vector4_f32[1])+(M.m[2][3]*V.vector4_f32[2])+(M.m[3][3]*V.vector4_f32[3]); |
||
10703 | XMVECTOR vResult = { |
||
10704 | fX, |
||
10705 | fY, |
||
10706 | fZ, |
||
10707 | fW |
||
10708 | }; |
||
10709 | return vResult; |
||
10710 | |||
10711 | #elif defined(_XM_SSE_INTRINSICS_) |
||
10712 | // Splat x,y,z and w |
||
10713 | XMVECTOR vTempX = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0)); |
||
10714 | XMVECTOR vTempY = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1)); |
||
10715 | XMVECTOR vTempZ = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2)); |
||
10716 | XMVECTOR vTempW = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,3,3,3)); |
||
10717 | // Mul by the matrix |
||
10718 | vTempX = _mm_mul_ps(vTempX,M.r[0]); |
||
10719 | vTempY = _mm_mul_ps(vTempY,M.r[1]); |
||
10720 | vTempZ = _mm_mul_ps(vTempZ,M.r[2]); |
||
10721 | vTempW = _mm_mul_ps(vTempW,M.r[3]); |
||
10722 | // Add them all together |
||
10723 | vTempX = _mm_add_ps(vTempX,vTempY); |
||
10724 | vTempZ = _mm_add_ps(vTempZ,vTempW); |
||
10725 | vTempX = _mm_add_ps(vTempX,vTempZ); |
||
10726 | return vTempX; |
||
10727 | #else // _XM_VMX128_INTRINSICS_ |
||
10728 | #endif // _XM_VMX128_INTRINSICS_ |
||
10729 | } |
||
10730 | |||
10731 | //------------------------------------------------------------------------------ |
||
10732 | |||
10733 | XMINLINE XMFLOAT4* XMVector4TransformStream |
||
10734 | ( |
||
10735 | XMFLOAT4* pOutputStream, |
||
10736 | UINT OutputStride, |
||
10737 | CONST XMFLOAT4* pInputStream, |
||
10738 | UINT InputStride, |
||
10739 | UINT VectorCount, |
||
10740 | CXMMATRIX M |
||
10741 | ) |
||
10742 | { |
||
10743 | #if defined(_XM_NO_INTRINSICS_) |
||
10744 | |||
10745 | XMVECTOR V; |
||
10746 | XMVECTOR X; |
||
10747 | XMVECTOR Y; |
||
10748 | XMVECTOR Z; |
||
10749 | XMVECTOR W; |
||
10750 | XMVECTOR Result; |
||
10751 | UINT i; |
||
10752 | BYTE* pInputVector = (BYTE*)pInputStream; |
||
10753 | BYTE* pOutputVector = (BYTE*)pOutputStream; |
||
10754 | |||
10755 | XMASSERT(pOutputStream); |
||
10756 | XMASSERT(pInputStream); |
||
10757 | |||
10758 | for (i = 0; i < VectorCount; i++) |
||
10759 | { |
||
10760 | V = XMLoadFloat4((XMFLOAT4*)pInputVector); |
||
10761 | W = XMVectorSplatW(V); |
||
10762 | Z = XMVectorSplatZ(V); |
||
10763 | Y = XMVectorSplatY(V); |
||
10764 | X = XMVectorSplatX(V); |
||
10765 | // W = XMVectorReplicate(((XMFLOAT4*)pInputVector)->w); |
||
10766 | // Z = XMVectorReplicate(((XMFLOAT4*)pInputVector)->z); |
||
10767 | // Y = XMVectorReplicate(((XMFLOAT4*)pInputVector)->y); |
||
10768 | // X = XMVectorReplicate(((XMFLOAT4*)pInputVector)->x); |
||
10769 | |||
10770 | Result = XMVectorMultiply(W, M.r[3]); |
||
10771 | Result = XMVectorMultiplyAdd(Z, M.r[2], Result); |
||
10772 | Result = XMVectorMultiplyAdd(Y, M.r[1], Result); |
||
10773 | Result = XMVectorMultiplyAdd(X, M.r[0], Result); |
||
10774 | |||
10775 | XMStoreFloat4((XMFLOAT4*)pOutputVector, Result); |
||
10776 | |||
10777 | pInputVector += InputStride; |
||
10778 | pOutputVector += OutputStride; |
||
10779 | } |
||
10780 | |||
10781 | return pOutputStream; |
||
10782 | |||
10783 | #elif defined(_XM_SSE_INTRINSICS_) |
||
10784 | UINT i; |
||
10785 | |||
10786 | XMASSERT(pOutputStream); |
||
10787 | XMASSERT(pInputStream); |
||
10788 | |||
10789 | const BYTE*pInputVector = reinterpret_cast<const BYTE *>(pInputStream); |
||
10790 | BYTE* pOutputVector = reinterpret_cast<BYTE *>(pOutputStream); |
||
10791 | for (i = 0; i < VectorCount; i++) |
||
10792 | { |
||
10793 | // Fetch the row and splat it |
||
10794 | XMVECTOR vTempx = _mm_loadu_ps(reinterpret_cast<const float *>(pInputVector)); |
||
10795 | XMVECTOR vTempy = _mm_shuffle_ps(vTempx,vTempx,_MM_SHUFFLE(1,1,1,1)); |
||
10796 | XMVECTOR vTempz = _mm_shuffle_ps(vTempx,vTempx,_MM_SHUFFLE(2,2,2,2)); |
||
10797 | XMVECTOR vTempw = _mm_shuffle_ps(vTempx,vTempx,_MM_SHUFFLE(3,3,3,3)); |
||
10798 | vTempx = _mm_shuffle_ps(vTempx,vTempx,_MM_SHUFFLE(0,0,0,0)); |
||
10799 | vTempx = _mm_mul_ps(vTempx,M.r[0]); |
||
10800 | vTempy = _mm_mul_ps(vTempy,M.r[1]); |
||
10801 | vTempz = _mm_mul_ps(vTempz,M.r[2]); |
||
10802 | vTempw = _mm_mul_ps(vTempw,M.r[3]); |
||
10803 | vTempx = _mm_add_ps(vTempx,vTempy); |
||
10804 | vTempw = _mm_add_ps(vTempw,vTempz); |
||
10805 | vTempw = _mm_add_ps(vTempw,vTempx); |
||
10806 | // Store the transformed vector |
||
10807 | _mm_storeu_ps(reinterpret_cast<float *>(pOutputVector),vTempw); |
||
10808 | |||
10809 | pInputVector += InputStride; |
||
10810 | pOutputVector += OutputStride; |
||
10811 | } |
||
10812 | return pOutputStream; |
||
10813 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
10814 | #endif // _XM_VMX128_INTRINSICS_ |
||
10815 | } |
||
10816 | |||
10817 | #ifdef __cplusplus |
||
10818 | |||
10819 | /**************************************************************************** |
||
10820 | * |
||
10821 | * XMVECTOR operators |
||
10822 | * |
||
10823 | ****************************************************************************/ |
||
10824 | |||
10825 | #ifndef XM_NO_OPERATOR_OVERLOADS |
||
10826 | |||
10827 | //------------------------------------------------------------------------------ |
||
10828 | |||
10829 | XMFINLINE XMVECTOR operator+ (FXMVECTOR V) |
||
10830 | { |
||
10831 | return V; |
||
10832 | } |
||
10833 | |||
10834 | //------------------------------------------------------------------------------ |
||
10835 | |||
10836 | XMFINLINE XMVECTOR operator- (FXMVECTOR V) |
||
10837 | { |
||
10838 | return XMVectorNegate(V); |
||
10839 | } |
||
10840 | |||
10841 | //------------------------------------------------------------------------------ |
||
10842 | |||
10843 | XMFINLINE XMVECTOR& operator+= |
||
10844 | ( |
||
10845 | XMVECTOR& V1, |
||
10846 | FXMVECTOR V2 |
||
10847 | ) |
||
10848 | { |
||
10849 | V1 = XMVectorAdd(V1, V2); |
||
10850 | return V1; |
||
10851 | } |
||
10852 | |||
10853 | //------------------------------------------------------------------------------ |
||
10854 | |||
10855 | XMFINLINE XMVECTOR& operator-= |
||
10856 | ( |
||
10857 | XMVECTOR& V1, |
||
10858 | FXMVECTOR V2 |
||
10859 | ) |
||
10860 | { |
||
10861 | V1 = XMVectorSubtract(V1, V2); |
||
10862 | return V1; |
||
10863 | } |
||
10864 | |||
10865 | //------------------------------------------------------------------------------ |
||
10866 | |||
10867 | XMFINLINE XMVECTOR& operator*= |
||
10868 | ( |
||
10869 | XMVECTOR& V1, |
||
10870 | FXMVECTOR V2 |
||
10871 | ) |
||
10872 | { |
||
10873 | V1 = XMVectorMultiply(V1, V2); |
||
10874 | return V1; |
||
10875 | } |
||
10876 | |||
10877 | //------------------------------------------------------------------------------ |
||
10878 | |||
10879 | XMFINLINE XMVECTOR& operator/= |
||
10880 | ( |
||
10881 | XMVECTOR& V1, |
||
10882 | FXMVECTOR V2 |
||
10883 | ) |
||
10884 | { |
||
10885 | XMVECTOR InvV = XMVectorReciprocal(V2); |
||
10886 | V1 = XMVectorMultiply(V1, InvV); |
||
10887 | return V1; |
||
10888 | } |
||
10889 | |||
10890 | //------------------------------------------------------------------------------ |
||
10891 | |||
10892 | XMFINLINE XMVECTOR& operator*= |
||
10893 | ( |
||
10894 | XMVECTOR& V, |
||
10895 | CONST FLOAT S |
||
10896 | ) |
||
10897 | { |
||
10898 | V = XMVectorScale(V, S); |
||
10899 | return V; |
||
10900 | } |
||
10901 | |||
10902 | //------------------------------------------------------------------------------ |
||
10903 | |||
10904 | XMFINLINE XMVECTOR& operator/= |
||
10905 | ( |
||
10906 | XMVECTOR& V, |
||
10907 | CONST FLOAT S |
||
10908 | ) |
||
10909 | { |
||
10910 | V = XMVectorScale(V, 1.0f / S); |
||
10911 | return V; |
||
10912 | } |
||
10913 | |||
10914 | //------------------------------------------------------------------------------ |
||
10915 | |||
10916 | XMFINLINE XMVECTOR operator+ |
||
10917 | ( |
||
10918 | FXMVECTOR V1, |
||
10919 | FXMVECTOR V2 |
||
10920 | ) |
||
10921 | { |
||
10922 | return XMVectorAdd(V1, V2); |
||
10923 | } |
||
10924 | |||
10925 | //------------------------------------------------------------------------------ |
||
10926 | |||
10927 | XMFINLINE XMVECTOR operator- |
||
10928 | ( |
||
10929 | FXMVECTOR V1, |
||
10930 | FXMVECTOR V2 |
||
10931 | ) |
||
10932 | { |
||
10933 | return XMVectorSubtract(V1, V2); |
||
10934 | } |
||
10935 | |||
10936 | //------------------------------------------------------------------------------ |
||
10937 | |||
10938 | XMFINLINE XMVECTOR operator* |
||
10939 | ( |
||
10940 | FXMVECTOR V1, |
||
10941 | FXMVECTOR V2 |
||
10942 | ) |
||
10943 | { |
||
10944 | return XMVectorMultiply(V1, V2); |
||
10945 | } |
||
10946 | |||
10947 | //------------------------------------------------------------------------------ |
||
10948 | |||
10949 | XMFINLINE XMVECTOR operator/ |
||
10950 | ( |
||
10951 | FXMVECTOR V1, |
||
10952 | FXMVECTOR V2 |
||
10953 | ) |
||
10954 | { |
||
10955 | XMVECTOR InvV = XMVectorReciprocal(V2); |
||
10956 | return XMVectorMultiply(V1, InvV); |
||
10957 | } |
||
10958 | |||
10959 | //------------------------------------------------------------------------------ |
||
10960 | |||
10961 | XMFINLINE XMVECTOR operator* |
||
10962 | ( |
||
10963 | FXMVECTOR V, |
||
10964 | CONST FLOAT S |
||
10965 | ) |
||
10966 | { |
||
10967 | return XMVectorScale(V, S); |
||
10968 | } |
||
10969 | |||
10970 | //------------------------------------------------------------------------------ |
||
10971 | |||
10972 | XMFINLINE XMVECTOR operator/ |
||
10973 | ( |
||
10974 | FXMVECTOR V, |
||
10975 | CONST FLOAT S |
||
10976 | ) |
||
10977 | { |
||
10978 | return XMVectorScale(V, 1.0f / S); |
||
10979 | } |
||
10980 | |||
10981 | //------------------------------------------------------------------------------ |
||
10982 | |||
10983 | XMFINLINE XMVECTOR operator* |
||
10984 | ( |
||
10985 | FLOAT S, |
||
10986 | FXMVECTOR V |
||
10987 | ) |
||
10988 | { |
||
10989 | return XMVectorScale(V, S); |
||
10990 | } |
||
10991 | |||
10992 | #endif // !XM_NO_OPERATOR_OVERLOADS |
||
10993 | |||
10994 | /**************************************************************************** |
||
10995 | * |
||
10996 | * XMFLOAT2 operators |
||
10997 | * |
||
10998 | ****************************************************************************/ |
||
10999 | |||
11000 | //------------------------------------------------------------------------------ |
||
11001 | |||
11002 | XMFINLINE _XMFLOAT2::_XMFLOAT2 |
||
11003 | ( |
||
11004 | CONST FLOAT* pArray |
||
11005 | ) |
||
11006 | { |
||
11007 | x = pArray[0]; |
||
11008 | y = pArray[1]; |
||
11009 | } |
||
11010 | |||
11011 | //------------------------------------------------------------------------------ |
||
11012 | |||
11013 | XMFINLINE _XMFLOAT2& _XMFLOAT2::operator= |
||
11014 | ( |
||
11015 | CONST _XMFLOAT2& Float2 |
||
11016 | ) |
||
11017 | { |
||
11018 | x = Float2.x; |
||
11019 | y = Float2.y; |
||
11020 | return *this; |
||
11021 | } |
||
11022 | |||
11023 | /**************************************************************************** |
||
11024 | * |
||
11025 | * XMHALF2 operators |
||
11026 | * |
||
11027 | ****************************************************************************/ |
||
11028 | |||
11029 | //------------------------------------------------------------------------------ |
||
11030 | |||
11031 | XMFINLINE _XMHALF2::_XMHALF2 |
||
11032 | ( |
||
11033 | CONST HALF* pArray |
||
11034 | ) |
||
11035 | { |
||
11036 | x = pArray[0]; |
||
11037 | y = pArray[1]; |
||
11038 | } |
||
11039 | |||
11040 | //------------------------------------------------------------------------------ |
||
11041 | |||
11042 | XMFINLINE _XMHALF2::_XMHALF2 |
||
11043 | ( |
||
11044 | FLOAT _x, |
||
11045 | FLOAT _y |
||
11046 | ) |
||
11047 | { |
||
11048 | x = XMConvertFloatToHalf(_x); |
||
11049 | y = XMConvertFloatToHalf(_y); |
||
11050 | } |
||
11051 | |||
11052 | //------------------------------------------------------------------------------ |
||
11053 | |||
11054 | XMFINLINE _XMHALF2::_XMHALF2 |
||
11055 | ( |
||
11056 | CONST FLOAT* pArray |
||
11057 | ) |
||
11058 | { |
||
11059 | x = XMConvertFloatToHalf(pArray[0]); |
||
11060 | y = XMConvertFloatToHalf(pArray[1]); |
||
11061 | } |
||
11062 | |||
11063 | //------------------------------------------------------------------------------ |
||
11064 | |||
11065 | XMFINLINE _XMHALF2& _XMHALF2::operator= |
||
11066 | ( |
||
11067 | CONST _XMHALF2& Half2 |
||
11068 | ) |
||
11069 | { |
||
11070 | x = Half2.x; |
||
11071 | y = Half2.y; |
||
11072 | return *this; |
||
11073 | } |
||
11074 | |||
11075 | /**************************************************************************** |
||
11076 | * |
||
11077 | * XMSHORTN2 operators |
||
11078 | * |
||
11079 | ****************************************************************************/ |
||
11080 | |||
11081 | //------------------------------------------------------------------------------ |
||
11082 | |||
11083 | XMFINLINE _XMSHORTN2::_XMSHORTN2 |
||
11084 | ( |
||
11085 | CONST SHORT* pArray |
||
11086 | ) |
||
11087 | { |
||
11088 | x = pArray[0]; |
||
11089 | y = pArray[1]; |
||
11090 | } |
||
11091 | |||
11092 | //------------------------------------------------------------------------------ |
||
11093 | |||
11094 | XMFINLINE _XMSHORTN2::_XMSHORTN2 |
||
11095 | ( |
||
11096 | FLOAT _x, |
||
11097 | FLOAT _y |
||
11098 | ) |
||
11099 | { |
||
11100 | XMStoreShortN2(this, XMVectorSet(_x, _y, 0.0f, 0.0f)); |
||
11101 | } |
||
11102 | |||
11103 | //------------------------------------------------------------------------------ |
||
11104 | |||
11105 | XMFINLINE _XMSHORTN2::_XMSHORTN2 |
||
11106 | ( |
||
11107 | CONST FLOAT* pArray |
||
11108 | ) |
||
11109 | { |
||
11110 | XMStoreShortN2(this, XMLoadFloat2((XMFLOAT2*)pArray)); |
||
11111 | } |
||
11112 | |||
11113 | //------------------------------------------------------------------------------ |
||
11114 | |||
11115 | XMFINLINE _XMSHORTN2& _XMSHORTN2::operator= |
||
11116 | ( |
||
11117 | CONST _XMSHORTN2& ShortN2 |
||
11118 | ) |
||
11119 | { |
||
11120 | x = ShortN2.x; |
||
11121 | y = ShortN2.y; |
||
11122 | return *this; |
||
11123 | } |
||
11124 | |||
11125 | /**************************************************************************** |
||
11126 | * |
||
11127 | * XMSHORT2 operators |
||
11128 | * |
||
11129 | ****************************************************************************/ |
||
11130 | |||
11131 | //------------------------------------------------------------------------------ |
||
11132 | |||
11133 | XMFINLINE _XMSHORT2::_XMSHORT2 |
||
11134 | ( |
||
11135 | CONST SHORT* pArray |
||
11136 | ) |
||
11137 | { |
||
11138 | x = pArray[0]; |
||
11139 | y = pArray[1]; |
||
11140 | } |
||
11141 | |||
11142 | //------------------------------------------------------------------------------ |
||
11143 | |||
11144 | XMFINLINE _XMSHORT2::_XMSHORT2 |
||
11145 | ( |
||
11146 | FLOAT _x, |
||
11147 | FLOAT _y |
||
11148 | ) |
||
11149 | { |
||
11150 | XMStoreShort2(this, XMVectorSet(_x, _y, 0.0f, 0.0f)); |
||
11151 | } |
||
11152 | |||
11153 | //------------------------------------------------------------------------------ |
||
11154 | |||
11155 | XMFINLINE _XMSHORT2::_XMSHORT2 |
||
11156 | ( |
||
11157 | CONST FLOAT* pArray |
||
11158 | ) |
||
11159 | { |
||
11160 | XMStoreShort2(this, XMLoadFloat2((XMFLOAT2*)pArray)); |
||
11161 | } |
||
11162 | |||
11163 | //------------------------------------------------------------------------------ |
||
11164 | |||
11165 | XMFINLINE _XMSHORT2& _XMSHORT2::operator= |
||
11166 | ( |
||
11167 | CONST _XMSHORT2& Short2 |
||
11168 | ) |
||
11169 | { |
||
11170 | x = Short2.x; |
||
11171 | y = Short2.y; |
||
11172 | return *this; |
||
11173 | } |
||
11174 | |||
11175 | /**************************************************************************** |
||
11176 | * |
||
11177 | * XMUSHORTN2 operators |
||
11178 | * |
||
11179 | ****************************************************************************/ |
||
11180 | |||
11181 | //------------------------------------------------------------------------------ |
||
11182 | |||
11183 | XMFINLINE _XMUSHORTN2::_XMUSHORTN2 |
||
11184 | ( |
||
11185 | CONST USHORT* pArray |
||
11186 | ) |
||
11187 | { |
||
11188 | x = pArray[0]; |
||
11189 | y = pArray[1]; |
||
11190 | } |
||
11191 | |||
11192 | //------------------------------------------------------------------------------ |
||
11193 | |||
11194 | XMFINLINE _XMUSHORTN2::_XMUSHORTN2 |
||
11195 | ( |
||
11196 | FLOAT _x, |
||
11197 | FLOAT _y |
||
11198 | ) |
||
11199 | { |
||
11200 | XMStoreUShortN2(this, XMVectorSet(_x, _y, 0.0f, 0.0f)); |
||
11201 | } |
||
11202 | |||
11203 | //------------------------------------------------------------------------------ |
||
11204 | |||
11205 | XMFINLINE _XMUSHORTN2::_XMUSHORTN2 |
||
11206 | ( |
||
11207 | CONST FLOAT* pArray |
||
11208 | ) |
||
11209 | { |
||
11210 | XMStoreUShortN2(this, XMLoadFloat2((XMFLOAT2*)pArray)); |
||
11211 | } |
||
11212 | |||
11213 | //------------------------------------------------------------------------------ |
||
11214 | |||
11215 | XMFINLINE _XMUSHORTN2& _XMUSHORTN2::operator= |
||
11216 | ( |
||
11217 | CONST _XMUSHORTN2& UShortN2 |
||
11218 | ) |
||
11219 | { |
||
11220 | x = UShortN2.x; |
||
11221 | y = UShortN2.y; |
||
11222 | return *this; |
||
11223 | } |
||
11224 | |||
11225 | /**************************************************************************** |
||
11226 | * |
||
11227 | * XMUSHORT2 operators |
||
11228 | * |
||
11229 | ****************************************************************************/ |
||
11230 | |||
11231 | //------------------------------------------------------------------------------ |
||
11232 | |||
11233 | XMFINLINE _XMUSHORT2::_XMUSHORT2 |
||
11234 | ( |
||
11235 | CONST USHORT* pArray |
||
11236 | ) |
||
11237 | { |
||
11238 | x = pArray[0]; |
||
11239 | y = pArray[1]; |
||
11240 | } |
||
11241 | |||
11242 | //------------------------------------------------------------------------------ |
||
11243 | |||
11244 | XMFINLINE _XMUSHORT2::_XMUSHORT2 |
||
11245 | ( |
||
11246 | FLOAT _x, |
||
11247 | FLOAT _y |
||
11248 | ) |
||
11249 | { |
||
11250 | XMStoreUShort2(this, XMVectorSet(_x, _y, 0.0f, 0.0f)); |
||
11251 | } |
||
11252 | |||
11253 | //------------------------------------------------------------------------------ |
||
11254 | |||
11255 | XMFINLINE _XMUSHORT2::_XMUSHORT2 |
||
11256 | ( |
||
11257 | CONST FLOAT* pArray |
||
11258 | ) |
||
11259 | { |
||
11260 | XMStoreUShort2(this, XMLoadFloat2((XMFLOAT2*)pArray)); |
||
11261 | } |
||
11262 | |||
11263 | //------------------------------------------------------------------------------ |
||
11264 | |||
11265 | XMFINLINE _XMUSHORT2& _XMUSHORT2::operator= |
||
11266 | ( |
||
11267 | CONST _XMUSHORT2& UShort2 |
||
11268 | ) |
||
11269 | { |
||
11270 | x = UShort2.x; |
||
11271 | y = UShort2.y; |
||
11272 | return *this; |
||
11273 | } |
||
11274 | |||
11275 | /**************************************************************************** |
||
11276 | * |
||
11277 | * XMFLOAT3 operators |
||
11278 | * |
||
11279 | ****************************************************************************/ |
||
11280 | |||
11281 | //------------------------------------------------------------------------------ |
||
11282 | |||
11283 | XMFINLINE _XMFLOAT3::_XMFLOAT3 |
||
11284 | ( |
||
11285 | CONST FLOAT* pArray |
||
11286 | ) |
||
11287 | { |
||
11288 | x = pArray[0]; |
||
11289 | y = pArray[1]; |
||
11290 | z = pArray[2]; |
||
11291 | } |
||
11292 | |||
11293 | //------------------------------------------------------------------------------ |
||
11294 | |||
11295 | XMFINLINE _XMFLOAT3& _XMFLOAT3::operator= |
||
11296 | ( |
||
11297 | CONST _XMFLOAT3& Float3 |
||
11298 | ) |
||
11299 | { |
||
11300 | x = Float3.x; |
||
11301 | y = Float3.y; |
||
11302 | z = Float3.z; |
||
11303 | return *this; |
||
11304 | } |
||
11305 | |||
11306 | /**************************************************************************** |
||
11307 | * |
||
11308 | * XMHENDN3 operators |
||
11309 | * |
||
11310 | ****************************************************************************/ |
||
11311 | |||
11312 | //------------------------------------------------------------------------------ |
||
11313 | |||
11314 | XMFINLINE _XMHENDN3::_XMHENDN3 |
||
11315 | ( |
||
11316 | FLOAT _x, |
||
11317 | FLOAT _y, |
||
11318 | FLOAT _z |
||
11319 | ) |
||
11320 | { |
||
11321 | XMStoreHenDN3(this, XMVectorSet(_x, _y, _z, 0.0f)); |
||
11322 | } |
||
11323 | |||
11324 | //------------------------------------------------------------------------------ |
||
11325 | |||
11326 | XMFINLINE _XMHENDN3::_XMHENDN3 |
||
11327 | ( |
||
11328 | CONST FLOAT* pArray |
||
11329 | ) |
||
11330 | { |
||
11331 | XMStoreHenDN3(this, XMLoadFloat3((XMFLOAT3*)pArray)); |
||
11332 | } |
||
11333 | |||
11334 | //------------------------------------------------------------------------------ |
||
11335 | |||
11336 | XMFINLINE _XMHENDN3& _XMHENDN3::operator= |
||
11337 | ( |
||
11338 | CONST _XMHENDN3& HenDN3 |
||
11339 | ) |
||
11340 | { |
||
11341 | v = HenDN3.v; |
||
11342 | return *this; |
||
11343 | } |
||
11344 | |||
11345 | //------------------------------------------------------------------------------ |
||
11346 | |||
11347 | XMFINLINE _XMHENDN3& _XMHENDN3::operator= |
||
11348 | ( |
||
11349 | CONST UINT Packed |
||
11350 | ) |
||
11351 | { |
||
11352 | v = Packed; |
||
11353 | return *this; |
||
11354 | } |
||
11355 | |||
11356 | /**************************************************************************** |
||
11357 | * |
||
11358 | * XMHEND3 operators |
||
11359 | * |
||
11360 | ****************************************************************************/ |
||
11361 | |||
11362 | //------------------------------------------------------------------------------ |
||
11363 | |||
11364 | XMFINLINE _XMHEND3::_XMHEND3 |
||
11365 | ( |
||
11366 | FLOAT _x, |
||
11367 | FLOAT _y, |
||
11368 | FLOAT _z |
||
11369 | ) |
||
11370 | { |
||
11371 | XMStoreHenD3(this, XMVectorSet(_x, _y, _z, 0.0f)); |
||
11372 | } |
||
11373 | |||
11374 | //------------------------------------------------------------------------------ |
||
11375 | |||
11376 | XMFINLINE _XMHEND3::_XMHEND3 |
||
11377 | ( |
||
11378 | CONST FLOAT* pArray |
||
11379 | ) |
||
11380 | { |
||
11381 | XMStoreHenD3(this, XMLoadFloat3((XMFLOAT3*)pArray)); |
||
11382 | } |
||
11383 | |||
11384 | //------------------------------------------------------------------------------ |
||
11385 | |||
11386 | XMFINLINE _XMHEND3& _XMHEND3::operator= |
||
11387 | ( |
||
11388 | CONST _XMHEND3& HenD3 |
||
11389 | ) |
||
11390 | { |
||
11391 | v = HenD3.v; |
||
11392 | return *this; |
||
11393 | } |
||
11394 | |||
11395 | //------------------------------------------------------------------------------ |
||
11396 | |||
11397 | XMFINLINE _XMHEND3& _XMHEND3::operator= |
||
11398 | ( |
||
11399 | CONST UINT Packed |
||
11400 | ) |
||
11401 | { |
||
11402 | v = Packed; |
||
11403 | return *this; |
||
11404 | } |
||
11405 | |||
11406 | /**************************************************************************** |
||
11407 | * |
||
11408 | * XMUHENDN3 operators |
||
11409 | * |
||
11410 | ****************************************************************************/ |
||
11411 | |||
11412 | //------------------------------------------------------------------------------ |
||
11413 | |||
11414 | XMFINLINE _XMUHENDN3::_XMUHENDN3 |
||
11415 | ( |
||
11416 | FLOAT _x, |
||
11417 | FLOAT _y, |
||
11418 | FLOAT _z |
||
11419 | ) |
||
11420 | { |
||
11421 | XMStoreUHenDN3(this, XMVectorSet(_x, _y, _z, 0.0f)); |
||
11422 | } |
||
11423 | |||
11424 | //------------------------------------------------------------------------------ |
||
11425 | |||
11426 | XMFINLINE _XMUHENDN3::_XMUHENDN3 |
||
11427 | ( |
||
11428 | CONST FLOAT* pArray |
||
11429 | ) |
||
11430 | { |
||
11431 | XMStoreUHenDN3(this, XMLoadFloat3((XMFLOAT3*)pArray)); |
||
11432 | } |
||
11433 | |||
11434 | //------------------------------------------------------------------------------ |
||
11435 | |||
11436 | XMFINLINE _XMUHENDN3& _XMUHENDN3::operator= |
||
11437 | ( |
||
11438 | CONST _XMUHENDN3& UHenDN3 |
||
11439 | ) |
||
11440 | { |
||
11441 | v = UHenDN3.v; |
||
11442 | return *this; |
||
11443 | } |
||
11444 | |||
11445 | //------------------------------------------------------------------------------ |
||
11446 | |||
11447 | XMFINLINE _XMUHENDN3& _XMUHENDN3::operator= |
||
11448 | ( |
||
11449 | CONST UINT Packed |
||
11450 | ) |
||
11451 | { |
||
11452 | v = Packed; |
||
11453 | return *this; |
||
11454 | } |
||
11455 | |||
11456 | /**************************************************************************** |
||
11457 | * |
||
11458 | * XMUHEND3 operators |
||
11459 | * |
||
11460 | ****************************************************************************/ |
||
11461 | |||
11462 | //------------------------------------------------------------------------------ |
||
11463 | |||
11464 | XMFINLINE _XMUHEND3::_XMUHEND3 |
||
11465 | ( |
||
11466 | FLOAT _x, |
||
11467 | FLOAT _y, |
||
11468 | FLOAT _z |
||
11469 | ) |
||
11470 | { |
||
11471 | XMStoreUHenD3(this, XMVectorSet(_x, _y, _z, 0.0f)); |
||
11472 | } |
||
11473 | |||
11474 | //------------------------------------------------------------------------------ |
||
11475 | |||
11476 | XMFINLINE _XMUHEND3::_XMUHEND3 |
||
11477 | ( |
||
11478 | CONST FLOAT* pArray |
||
11479 | ) |
||
11480 | { |
||
11481 | XMStoreUHenD3(this, XMLoadFloat3((XMFLOAT3*)pArray)); |
||
11482 | } |
||
11483 | |||
11484 | //------------------------------------------------------------------------------ |
||
11485 | |||
11486 | XMFINLINE _XMUHEND3& _XMUHEND3::operator= |
||
11487 | ( |
||
11488 | CONST _XMUHEND3& UHenD3 |
||
11489 | ) |
||
11490 | { |
||
11491 | v = UHenD3.v; |
||
11492 | return *this; |
||
11493 | } |
||
11494 | |||
11495 | //------------------------------------------------------------------------------ |
||
11496 | |||
11497 | XMFINLINE _XMUHEND3& _XMUHEND3::operator= |
||
11498 | ( |
||
11499 | CONST UINT Packed |
||
11500 | ) |
||
11501 | { |
||
11502 | v = Packed; |
||
11503 | return *this; |
||
11504 | } |
||
11505 | |||
11506 | /**************************************************************************** |
||
11507 | * |
||
11508 | * XMDHENN3 operators |
||
11509 | * |
||
11510 | ****************************************************************************/ |
||
11511 | |||
11512 | //------------------------------------------------------------------------------ |
||
11513 | |||
11514 | XMFINLINE _XMDHENN3::_XMDHENN3 |
||
11515 | ( |
||
11516 | FLOAT _x, |
||
11517 | FLOAT _y, |
||
11518 | FLOAT _z |
||
11519 | ) |
||
11520 | { |
||
11521 | XMStoreDHenN3(this, XMVectorSet(_x, _y, _z, 0.0f)); |
||
11522 | } |
||
11523 | |||
11524 | //------------------------------------------------------------------------------ |
||
11525 | |||
11526 | XMFINLINE _XMDHENN3::_XMDHENN3 |
||
11527 | ( |
||
11528 | CONST FLOAT* pArray |
||
11529 | ) |
||
11530 | { |
||
11531 | XMStoreDHenN3(this, XMLoadFloat3((XMFLOAT3*)pArray)); |
||
11532 | } |
||
11533 | |||
11534 | //------------------------------------------------------------------------------ |
||
11535 | |||
11536 | XMFINLINE _XMDHENN3& _XMDHENN3::operator= |
||
11537 | ( |
||
11538 | CONST _XMDHENN3& DHenN3 |
||
11539 | ) |
||
11540 | { |
||
11541 | v = DHenN3.v; |
||
11542 | return *this; |
||
11543 | } |
||
11544 | |||
11545 | //------------------------------------------------------------------------------ |
||
11546 | |||
11547 | XMFINLINE _XMDHENN3& _XMDHENN3::operator= |
||
11548 | ( |
||
11549 | CONST UINT Packed |
||
11550 | ) |
||
11551 | { |
||
11552 | v = Packed; |
||
11553 | return *this; |
||
11554 | } |
||
11555 | |||
11556 | /**************************************************************************** |
||
11557 | * |
||
11558 | * XMDHEN3 operators |
||
11559 | * |
||
11560 | ****************************************************************************/ |
||
11561 | |||
11562 | //------------------------------------------------------------------------------ |
||
11563 | |||
11564 | XMFINLINE _XMDHEN3::_XMDHEN3 |
||
11565 | ( |
||
11566 | FLOAT _x, |
||
11567 | FLOAT _y, |
||
11568 | FLOAT _z |
||
11569 | ) |
||
11570 | { |
||
11571 | XMStoreDHen3(this, XMVectorSet(_x, _y, _z, 0.0f)); |
||
11572 | } |
||
11573 | |||
11574 | //------------------------------------------------------------------------------ |
||
11575 | |||
11576 | XMFINLINE _XMDHEN3::_XMDHEN3 |
||
11577 | ( |
||
11578 | CONST FLOAT* pArray |
||
11579 | ) |
||
11580 | { |
||
11581 | XMStoreDHen3(this, XMLoadFloat3((XMFLOAT3*)pArray)); |
||
11582 | } |
||
11583 | |||
11584 | //------------------------------------------------------------------------------ |
||
11585 | |||
11586 | XMFINLINE _XMDHEN3& _XMDHEN3::operator= |
||
11587 | ( |
||
11588 | CONST _XMDHEN3& DHen3 |
||
11589 | ) |
||
11590 | { |
||
11591 | v = DHen3.v; |
||
11592 | return *this; |
||
11593 | } |
||
11594 | |||
11595 | //------------------------------------------------------------------------------ |
||
11596 | |||
11597 | XMFINLINE _XMDHEN3& _XMDHEN3::operator= |
||
11598 | ( |
||
11599 | CONST UINT Packed |
||
11600 | ) |
||
11601 | { |
||
11602 | v = Packed; |
||
11603 | return *this; |
||
11604 | } |
||
11605 | |||
11606 | /**************************************************************************** |
||
11607 | * |
||
11608 | * XMUDHENN3 operators |
||
11609 | * |
||
11610 | ****************************************************************************/ |
||
11611 | |||
11612 | //------------------------------------------------------------------------------ |
||
11613 | |||
11614 | XMFINLINE _XMUDHENN3::_XMUDHENN3 |
||
11615 | ( |
||
11616 | FLOAT _x, |
||
11617 | FLOAT _y, |
||
11618 | FLOAT _z |
||
11619 | ) |
||
11620 | { |
||
11621 | XMStoreUDHenN3(this, XMVectorSet(_x, _y, _z, 0.0f)); |
||
11622 | } |
||
11623 | |||
11624 | //------------------------------------------------------------------------------ |
||
11625 | |||
11626 | XMFINLINE _XMUDHENN3::_XMUDHENN3 |
||
11627 | ( |
||
11628 | CONST FLOAT* pArray |
||
11629 | ) |
||
11630 | { |
||
11631 | XMStoreUDHenN3(this, XMLoadFloat3((XMFLOAT3*)pArray)); |
||
11632 | } |
||
11633 | |||
11634 | //------------------------------------------------------------------------------ |
||
11635 | |||
11636 | XMFINLINE _XMUDHENN3& _XMUDHENN3::operator= |
||
11637 | ( |
||
11638 | CONST _XMUDHENN3& UDHenN3 |
||
11639 | ) |
||
11640 | { |
||
11641 | v = UDHenN3.v; |
||
11642 | return *this; |
||
11643 | } |
||
11644 | |||
11645 | //------------------------------------------------------------------------------ |
||
11646 | |||
11647 | XMFINLINE _XMUDHENN3& _XMUDHENN3::operator= |
||
11648 | ( |
||
11649 | CONST UINT Packed |
||
11650 | ) |
||
11651 | { |
||
11652 | v = Packed; |
||
11653 | return *this; |
||
11654 | } |
||
11655 | |||
11656 | /**************************************************************************** |
||
11657 | * |
||
11658 | * XMUDHEN3 operators |
||
11659 | * |
||
11660 | ****************************************************************************/ |
||
11661 | |||
11662 | //------------------------------------------------------------------------------ |
||
11663 | |||
11664 | XMFINLINE _XMUDHEN3::_XMUDHEN3 |
||
11665 | ( |
||
11666 | FLOAT _x, |
||
11667 | FLOAT _y, |
||
11668 | FLOAT _z |
||
11669 | ) |
||
11670 | { |
||
11671 | XMStoreUDHen3(this, XMVectorSet(_x, _y, _z, 0.0f)); |
||
11672 | } |
||
11673 | |||
11674 | //------------------------------------------------------------------------------ |
||
11675 | |||
11676 | XMFINLINE _XMUDHEN3::_XMUDHEN3 |
||
11677 | ( |
||
11678 | CONST FLOAT* pArray |
||
11679 | ) |
||
11680 | { |
||
11681 | XMStoreUDHen3(this, XMLoadFloat3((XMFLOAT3*)pArray)); |
||
11682 | } |
||
11683 | |||
11684 | //------------------------------------------------------------------------------ |
||
11685 | |||
11686 | XMFINLINE _XMUDHEN3& _XMUDHEN3::operator= |
||
11687 | ( |
||
11688 | CONST _XMUDHEN3& UDHen3 |
||
11689 | ) |
||
11690 | { |
||
11691 | v = UDHen3.v; |
||
11692 | return *this; |
||
11693 | } |
||
11694 | |||
11695 | //------------------------------------------------------------------------------ |
||
11696 | |||
11697 | XMFINLINE _XMUDHEN3& _XMUDHEN3::operator= |
||
11698 | ( |
||
11699 | CONST UINT Packed |
||
11700 | ) |
||
11701 | { |
||
11702 | v = Packed; |
||
11703 | return *this; |
||
11704 | } |
||
11705 | |||
11706 | /**************************************************************************** |
||
11707 | * |
||
11708 | * XMU565 operators |
||
11709 | * |
||
11710 | ****************************************************************************/ |
||
11711 | |||
11712 | XMFINLINE _XMU565::_XMU565 |
||
11713 | ( |
||
11714 | CONST CHAR *pArray |
||
11715 | ) |
||
11716 | { |
||
11717 | x = pArray[0]; |
||
11718 | y = pArray[1]; |
||
11719 | z = pArray[2]; |
||
11720 | } |
||
11721 | |||
11722 | XMFINLINE _XMU565::_XMU565 |
||
11723 | ( |
||
11724 | FLOAT _x, |
||
11725 | FLOAT _y, |
||
11726 | FLOAT _z |
||
11727 | ) |
||
11728 | { |
||
11729 | XMStoreU565(this, XMVectorSet( _x, _y, _z, 0.0f )); |
||
11730 | } |
||
11731 | |||
11732 | XMFINLINE _XMU565::_XMU565 |
||
11733 | ( |
||
11734 | CONST FLOAT *pArray |
||
11735 | ) |
||
11736 | { |
||
11737 | XMStoreU565(this, XMLoadFloat3((XMFLOAT3*)pArray )); |
||
11738 | } |
||
11739 | |||
11740 | XMFINLINE _XMU565& _XMU565::operator= |
||
11741 | ( |
||
11742 | CONST _XMU565& U565 |
||
11743 | ) |
||
11744 | { |
||
11745 | v = U565.v; |
||
11746 | return *this; |
||
11747 | } |
||
11748 | |||
11749 | XMFINLINE _XMU565& _XMU565::operator= |
||
11750 | ( |
||
11751 | CONST USHORT Packed |
||
11752 | ) |
||
11753 | { |
||
11754 | v = Packed; |
||
11755 | return *this; |
||
11756 | } |
||
11757 | |||
11758 | /**************************************************************************** |
||
11759 | * |
||
11760 | * XMFLOAT3PK operators |
||
11761 | * |
||
11762 | ****************************************************************************/ |
||
11763 | |||
11764 | XMFINLINE _XMFLOAT3PK::_XMFLOAT3PK |
||
11765 | ( |
||
11766 | FLOAT _x, |
||
11767 | FLOAT _y, |
||
11768 | FLOAT _z |
||
11769 | ) |
||
11770 | { |
||
11771 | XMStoreFloat3PK(this, XMVectorSet( _x, _y, _z, 0.0f )); |
||
11772 | } |
||
11773 | |||
11774 | XMFINLINE _XMFLOAT3PK::_XMFLOAT3PK |
||
11775 | ( |
||
11776 | CONST FLOAT *pArray |
||
11777 | ) |
||
11778 | { |
||
11779 | XMStoreFloat3PK(this, XMLoadFloat3((XMFLOAT3*)pArray )); |
||
11780 | } |
||
11781 | |||
11782 | XMFINLINE _XMFLOAT3PK& _XMFLOAT3PK::operator= |
||
11783 | ( |
||
11784 | CONST _XMFLOAT3PK& float3pk |
||
11785 | ) |
||
11786 | { |
||
11787 | v = float3pk.v; |
||
11788 | return *this; |
||
11789 | } |
||
11790 | |||
11791 | XMFINLINE _XMFLOAT3PK& _XMFLOAT3PK::operator= |
||
11792 | ( |
||
11793 | CONST UINT Packed |
||
11794 | ) |
||
11795 | { |
||
11796 | v = Packed; |
||
11797 | return *this; |
||
11798 | } |
||
11799 | |||
11800 | /**************************************************************************** |
||
11801 | * |
||
11802 | * XMFLOAT3SE operators |
||
11803 | * |
||
11804 | ****************************************************************************/ |
||
11805 | |||
11806 | XMFINLINE _XMFLOAT3SE::_XMFLOAT3SE |
||
11807 | ( |
||
11808 | FLOAT _x, |
||
11809 | FLOAT _y, |
||
11810 | FLOAT _z |
||
11811 | ) |
||
11812 | { |
||
11813 | XMStoreFloat3SE(this, XMVectorSet( _x, _y, _z, 0.0f )); |
||
11814 | } |
||
11815 | |||
11816 | XMFINLINE _XMFLOAT3SE::_XMFLOAT3SE |
||
11817 | ( |
||
11818 | CONST FLOAT *pArray |
||
11819 | ) |
||
11820 | { |
||
11821 | XMStoreFloat3SE(this, XMLoadFloat3((XMFLOAT3*)pArray )); |
||
11822 | } |
||
11823 | |||
11824 | XMFINLINE _XMFLOAT3SE& _XMFLOAT3SE::operator= |
||
11825 | ( |
||
11826 | CONST _XMFLOAT3SE& float3se |
||
11827 | ) |
||
11828 | { |
||
11829 | v = float3se.v; |
||
11830 | return *this; |
||
11831 | } |
||
11832 | |||
11833 | XMFINLINE _XMFLOAT3SE& _XMFLOAT3SE::operator= |
||
11834 | ( |
||
11835 | CONST UINT Packed |
||
11836 | ) |
||
11837 | { |
||
11838 | v = Packed; |
||
11839 | return *this; |
||
11840 | } |
||
11841 | |||
11842 | /**************************************************************************** |
||
11843 | * |
||
11844 | * XMFLOAT4 operators |
||
11845 | * |
||
11846 | ****************************************************************************/ |
||
11847 | |||
11848 | //------------------------------------------------------------------------------ |
||
11849 | |||
11850 | XMFINLINE _XMFLOAT4::_XMFLOAT4 |
||
11851 | ( |
||
11852 | CONST FLOAT* pArray |
||
11853 | ) |
||
11854 | { |
||
11855 | x = pArray[0]; |
||
11856 | y = pArray[1]; |
||
11857 | z = pArray[2]; |
||
11858 | w = pArray[3]; |
||
11859 | } |
||
11860 | |||
11861 | //------------------------------------------------------------------------------ |
||
11862 | |||
11863 | XMFINLINE _XMFLOAT4& _XMFLOAT4::operator= |
||
11864 | ( |
||
11865 | CONST _XMFLOAT4& Float4 |
||
11866 | ) |
||
11867 | { |
||
11868 | x = Float4.x; |
||
11869 | y = Float4.y; |
||
11870 | z = Float4.z; |
||
11871 | w = Float4.w; |
||
11872 | return *this; |
||
11873 | } |
||
11874 | |||
11875 | /**************************************************************************** |
||
11876 | * |
||
11877 | * XMHALF4 operators |
||
11878 | * |
||
11879 | ****************************************************************************/ |
||
11880 | |||
11881 | //------------------------------------------------------------------------------ |
||
11882 | |||
11883 | XMFINLINE _XMHALF4::_XMHALF4 |
||
11884 | ( |
||
11885 | CONST HALF* pArray |
||
11886 | ) |
||
11887 | { |
||
11888 | x = pArray[0]; |
||
11889 | y = pArray[1]; |
||
11890 | z = pArray[2]; |
||
11891 | w = pArray[3]; |
||
11892 | } |
||
11893 | |||
11894 | //------------------------------------------------------------------------------ |
||
11895 | |||
11896 | XMFINLINE _XMHALF4::_XMHALF4 |
||
11897 | ( |
||
11898 | FLOAT _x, |
||
11899 | FLOAT _y, |
||
11900 | FLOAT _z, |
||
11901 | FLOAT _w |
||
11902 | ) |
||
11903 | { |
||
11904 | x = XMConvertFloatToHalf(_x); |
||
11905 | y = XMConvertFloatToHalf(_y); |
||
11906 | z = XMConvertFloatToHalf(_z); |
||
11907 | w = XMConvertFloatToHalf(_w); |
||
11908 | } |
||
11909 | |||
11910 | //------------------------------------------------------------------------------ |
||
11911 | |||
11912 | XMFINLINE _XMHALF4::_XMHALF4 |
||
11913 | ( |
||
11914 | CONST FLOAT* pArray |
||
11915 | ) |
||
11916 | { |
||
11917 | XMConvertFloatToHalfStream(&x, sizeof(HALF), pArray, sizeof(FLOAT), 4); |
||
11918 | } |
||
11919 | |||
11920 | //------------------------------------------------------------------------------ |
||
11921 | |||
11922 | XMFINLINE _XMHALF4& _XMHALF4::operator= |
||
11923 | ( |
||
11924 | CONST _XMHALF4& Half4 |
||
11925 | ) |
||
11926 | { |
||
11927 | x = Half4.x; |
||
11928 | y = Half4.y; |
||
11929 | z = Half4.z; |
||
11930 | w = Half4.w; |
||
11931 | return *this; |
||
11932 | } |
||
11933 | |||
11934 | /**************************************************************************** |
||
11935 | * |
||
11936 | * XMSHORTN4 operators |
||
11937 | * |
||
11938 | ****************************************************************************/ |
||
11939 | |||
11940 | //------------------------------------------------------------------------------ |
||
11941 | |||
11942 | XMFINLINE _XMSHORTN4::_XMSHORTN4 |
||
11943 | ( |
||
11944 | CONST SHORT* pArray |
||
11945 | ) |
||
11946 | { |
||
11947 | x = pArray[0]; |
||
11948 | y = pArray[1]; |
||
11949 | z = pArray[2]; |
||
11950 | w = pArray[3]; |
||
11951 | } |
||
11952 | |||
11953 | //------------------------------------------------------------------------------ |
||
11954 | |||
11955 | XMFINLINE _XMSHORTN4::_XMSHORTN4 |
||
11956 | ( |
||
11957 | FLOAT _x, |
||
11958 | FLOAT _y, |
||
11959 | FLOAT _z, |
||
11960 | FLOAT _w |
||
11961 | ) |
||
11962 | { |
||
11963 | XMStoreShortN4(this, XMVectorSet(_x, _y, _z, _w)); |
||
11964 | } |
||
11965 | |||
11966 | //------------------------------------------------------------------------------ |
||
11967 | |||
11968 | XMFINLINE _XMSHORTN4::_XMSHORTN4 |
||
11969 | ( |
||
11970 | CONST FLOAT* pArray |
||
11971 | ) |
||
11972 | { |
||
11973 | XMStoreShortN4(this, XMLoadFloat4((XMFLOAT4*)pArray)); |
||
11974 | } |
||
11975 | |||
11976 | //------------------------------------------------------------------------------ |
||
11977 | |||
11978 | XMFINLINE _XMSHORTN4& _XMSHORTN4::operator= |
||
11979 | ( |
||
11980 | CONST _XMSHORTN4& ShortN4 |
||
11981 | ) |
||
11982 | { |
||
11983 | x = ShortN4.x; |
||
11984 | y = ShortN4.y; |
||
11985 | z = ShortN4.z; |
||
11986 | w = ShortN4.w; |
||
11987 | return *this; |
||
11988 | } |
||
11989 | |||
11990 | /**************************************************************************** |
||
11991 | * |
||
11992 | * XMSHORT4 operators |
||
11993 | * |
||
11994 | ****************************************************************************/ |
||
11995 | |||
11996 | //------------------------------------------------------------------------------ |
||
11997 | |||
11998 | XMFINLINE _XMSHORT4::_XMSHORT4 |
||
11999 | ( |
||
12000 | CONST SHORT* pArray |
||
12001 | ) |
||
12002 | { |
||
12003 | x = pArray[0]; |
||
12004 | y = pArray[1]; |
||
12005 | z = pArray[2]; |
||
12006 | w = pArray[3]; |
||
12007 | } |
||
12008 | |||
12009 | //------------------------------------------------------------------------------ |
||
12010 | |||
12011 | XMFINLINE _XMSHORT4::_XMSHORT4 |
||
12012 | ( |
||
12013 | FLOAT _x, |
||
12014 | FLOAT _y, |
||
12015 | FLOAT _z, |
||
12016 | FLOAT _w |
||
12017 | ) |
||
12018 | { |
||
12019 | XMStoreShort4(this, XMVectorSet(_x, _y, _z, _w)); |
||
12020 | } |
||
12021 | |||
12022 | //------------------------------------------------------------------------------ |
||
12023 | |||
12024 | XMFINLINE _XMSHORT4::_XMSHORT4 |
||
12025 | ( |
||
12026 | CONST FLOAT* pArray |
||
12027 | ) |
||
12028 | { |
||
12029 | XMStoreShort4(this, XMLoadFloat4((XMFLOAT4*)pArray)); |
||
12030 | } |
||
12031 | |||
12032 | //------------------------------------------------------------------------------ |
||
12033 | |||
12034 | XMFINLINE _XMSHORT4& _XMSHORT4::operator= |
||
12035 | ( |
||
12036 | CONST _XMSHORT4& Short4 |
||
12037 | ) |
||
12038 | { |
||
12039 | x = Short4.x; |
||
12040 | y = Short4.y; |
||
12041 | z = Short4.z; |
||
12042 | w = Short4.w; |
||
12043 | return *this; |
||
12044 | } |
||
12045 | |||
12046 | /**************************************************************************** |
||
12047 | * |
||
12048 | * XMUSHORTN4 operators |
||
12049 | * |
||
12050 | ****************************************************************************/ |
||
12051 | |||
12052 | //------------------------------------------------------------------------------ |
||
12053 | |||
12054 | XMFINLINE _XMUSHORTN4::_XMUSHORTN4 |
||
12055 | ( |
||
12056 | CONST USHORT* pArray |
||
12057 | ) |
||
12058 | { |
||
12059 | x = pArray[0]; |
||
12060 | y = pArray[1]; |
||
12061 | z = pArray[2]; |
||
12062 | w = pArray[3]; |
||
12063 | } |
||
12064 | |||
12065 | //------------------------------------------------------------------------------ |
||
12066 | |||
12067 | XMFINLINE _XMUSHORTN4::_XMUSHORTN4 |
||
12068 | ( |
||
12069 | FLOAT _x, |
||
12070 | FLOAT _y, |
||
12071 | FLOAT _z, |
||
12072 | FLOAT _w |
||
12073 | ) |
||
12074 | { |
||
12075 | XMStoreUShortN4(this, XMVectorSet(_x, _y, _z, _w)); |
||
12076 | } |
||
12077 | |||
12078 | //------------------------------------------------------------------------------ |
||
12079 | |||
12080 | XMFINLINE _XMUSHORTN4::_XMUSHORTN4 |
||
12081 | ( |
||
12082 | CONST FLOAT* pArray |
||
12083 | ) |
||
12084 | { |
||
12085 | XMStoreUShortN4(this, XMLoadFloat4((XMFLOAT4*)pArray)); |
||
12086 | } |
||
12087 | |||
12088 | //------------------------------------------------------------------------------ |
||
12089 | |||
12090 | XMFINLINE _XMUSHORTN4& _XMUSHORTN4::operator= |
||
12091 | ( |
||
12092 | CONST _XMUSHORTN4& UShortN4 |
||
12093 | ) |
||
12094 | { |
||
12095 | x = UShortN4.x; |
||
12096 | y = UShortN4.y; |
||
12097 | z = UShortN4.z; |
||
12098 | w = UShortN4.w; |
||
12099 | return *this; |
||
12100 | } |
||
12101 | |||
12102 | /**************************************************************************** |
||
12103 | * |
||
12104 | * XMUSHORT4 operators |
||
12105 | * |
||
12106 | ****************************************************************************/ |
||
12107 | |||
12108 | //------------------------------------------------------------------------------ |
||
12109 | |||
12110 | XMFINLINE _XMUSHORT4::_XMUSHORT4 |
||
12111 | ( |
||
12112 | CONST USHORT* pArray |
||
12113 | ) |
||
12114 | { |
||
12115 | x = pArray[0]; |
||
12116 | y = pArray[1]; |
||
12117 | z = pArray[2]; |
||
12118 | w = pArray[3]; |
||
12119 | } |
||
12120 | |||
12121 | //------------------------------------------------------------------------------ |
||
12122 | |||
12123 | XMFINLINE _XMUSHORT4::_XMUSHORT4 |
||
12124 | ( |
||
12125 | FLOAT _x, |
||
12126 | FLOAT _y, |
||
12127 | FLOAT _z, |
||
12128 | FLOAT _w |
||
12129 | ) |
||
12130 | { |
||
12131 | XMStoreUShort4(this, XMVectorSet(_x, _y, _z, _w)); |
||
12132 | } |
||
12133 | |||
12134 | //------------------------------------------------------------------------------ |
||
12135 | |||
12136 | XMFINLINE _XMUSHORT4::_XMUSHORT4 |
||
12137 | ( |
||
12138 | CONST FLOAT* pArray |
||
12139 | ) |
||
12140 | { |
||
12141 | XMStoreUShort4(this, XMLoadFloat4((XMFLOAT4*)pArray)); |
||
12142 | } |
||
12143 | |||
12144 | //------------------------------------------------------------------------------ |
||
12145 | |||
12146 | XMFINLINE _XMUSHORT4& _XMUSHORT4::operator= |
||
12147 | ( |
||
12148 | CONST _XMUSHORT4& UShort4 |
||
12149 | ) |
||
12150 | { |
||
12151 | x = UShort4.x; |
||
12152 | y = UShort4.y; |
||
12153 | z = UShort4.z; |
||
12154 | w = UShort4.w; |
||
12155 | return *this; |
||
12156 | } |
||
12157 | |||
12158 | /**************************************************************************** |
||
12159 | * |
||
12160 | * XMXDECN4 operators |
||
12161 | * |
||
12162 | ****************************************************************************/ |
||
12163 | |||
12164 | //------------------------------------------------------------------------------ |
||
12165 | |||
12166 | XMFINLINE _XMXDECN4::_XMXDECN4 |
||
12167 | ( |
||
12168 | FLOAT _x, |
||
12169 | FLOAT _y, |
||
12170 | FLOAT _z, |
||
12171 | FLOAT _w |
||
12172 | ) |
||
12173 | { |
||
12174 | XMStoreXDecN4(this, XMVectorSet(_x, _y, _z, _w)); |
||
12175 | } |
||
12176 | |||
12177 | //------------------------------------------------------------------------------ |
||
12178 | |||
12179 | XMFINLINE _XMXDECN4::_XMXDECN4 |
||
12180 | ( |
||
12181 | CONST FLOAT* pArray |
||
12182 | ) |
||
12183 | { |
||
12184 | XMStoreXDecN4(this, XMLoadFloat4((XMFLOAT4*)pArray)); |
||
12185 | } |
||
12186 | |||
12187 | //------------------------------------------------------------------------------ |
||
12188 | |||
12189 | XMFINLINE _XMXDECN4& _XMXDECN4::operator= |
||
12190 | ( |
||
12191 | CONST _XMXDECN4& XDecN4 |
||
12192 | ) |
||
12193 | { |
||
12194 | v = XDecN4.v; |
||
12195 | return *this; |
||
12196 | } |
||
12197 | |||
12198 | //------------------------------------------------------------------------------ |
||
12199 | |||
12200 | XMFINLINE _XMXDECN4& _XMXDECN4::operator= |
||
12201 | ( |
||
12202 | CONST UINT Packed |
||
12203 | ) |
||
12204 | { |
||
12205 | v = Packed; |
||
12206 | return *this; |
||
12207 | } |
||
12208 | |||
12209 | /**************************************************************************** |
||
12210 | * |
||
12211 | * XMXDEC4 operators |
||
12212 | * |
||
12213 | ****************************************************************************/ |
||
12214 | |||
12215 | //------------------------------------------------------------------------------ |
||
12216 | |||
12217 | XMFINLINE _XMXDEC4::_XMXDEC4 |
||
12218 | ( |
||
12219 | FLOAT _x, |
||
12220 | FLOAT _y, |
||
12221 | FLOAT _z, |
||
12222 | FLOAT _w |
||
12223 | ) |
||
12224 | { |
||
12225 | XMStoreXDec4(this, XMVectorSet(_x, _y, _z, _w)); |
||
12226 | } |
||
12227 | |||
12228 | //------------------------------------------------------------------------------ |
||
12229 | |||
12230 | XMFINLINE _XMXDEC4::_XMXDEC4 |
||
12231 | ( |
||
12232 | CONST FLOAT* pArray |
||
12233 | ) |
||
12234 | { |
||
12235 | XMStoreXDec4(this, XMLoadFloat4((XMFLOAT4*)pArray)); |
||
12236 | } |
||
12237 | |||
12238 | //------------------------------------------------------------------------------ |
||
12239 | |||
12240 | XMFINLINE _XMXDEC4& _XMXDEC4::operator= |
||
12241 | ( |
||
12242 | CONST _XMXDEC4& XDec4 |
||
12243 | ) |
||
12244 | { |
||
12245 | v = XDec4.v; |
||
12246 | return *this; |
||
12247 | } |
||
12248 | |||
12249 | //------------------------------------------------------------------------------ |
||
12250 | |||
12251 | XMFINLINE _XMXDEC4& _XMXDEC4::operator= |
||
12252 | ( |
||
12253 | CONST UINT Packed |
||
12254 | ) |
||
12255 | { |
||
12256 | v = Packed; |
||
12257 | return *this; |
||
12258 | } |
||
12259 | |||
12260 | /**************************************************************************** |
||
12261 | * |
||
12262 | * XMDECN4 operators |
||
12263 | * |
||
12264 | ****************************************************************************/ |
||
12265 | |||
12266 | //------------------------------------------------------------------------------ |
||
12267 | |||
12268 | XMFINLINE _XMDECN4::_XMDECN4 |
||
12269 | ( |
||
12270 | FLOAT _x, |
||
12271 | FLOAT _y, |
||
12272 | FLOAT _z, |
||
12273 | FLOAT _w |
||
12274 | ) |
||
12275 | { |
||
12276 | XMStoreDecN4(this, XMVectorSet(_x, _y, _z, _w)); |
||
12277 | } |
||
12278 | |||
12279 | //------------------------------------------------------------------------------ |
||
12280 | |||
12281 | XMFINLINE _XMDECN4::_XMDECN4 |
||
12282 | ( |
||
12283 | CONST FLOAT* pArray |
||
12284 | ) |
||
12285 | { |
||
12286 | XMStoreDecN4(this, XMLoadFloat4((XMFLOAT4*)pArray)); |
||
12287 | } |
||
12288 | |||
12289 | //------------------------------------------------------------------------------ |
||
12290 | |||
12291 | XMFINLINE _XMDECN4& _XMDECN4::operator= |
||
12292 | ( |
||
12293 | CONST _XMDECN4& DecN4 |
||
12294 | ) |
||
12295 | { |
||
12296 | v = DecN4.v; |
||
12297 | return *this; |
||
12298 | } |
||
12299 | |||
12300 | //------------------------------------------------------------------------------ |
||
12301 | |||
12302 | XMFINLINE _XMDECN4& _XMDECN4::operator= |
||
12303 | ( |
||
12304 | CONST UINT Packed |
||
12305 | ) |
||
12306 | { |
||
12307 | v = Packed; |
||
12308 | return *this; |
||
12309 | } |
||
12310 | |||
12311 | /**************************************************************************** |
||
12312 | * |
||
12313 | * XMDEC4 operators |
||
12314 | * |
||
12315 | ****************************************************************************/ |
||
12316 | |||
12317 | //------------------------------------------------------------------------------ |
||
12318 | |||
12319 | XMFINLINE _XMDEC4::_XMDEC4 |
||
12320 | ( |
||
12321 | FLOAT _x, |
||
12322 | FLOAT _y, |
||
12323 | FLOAT _z, |
||
12324 | FLOAT _w |
||
12325 | ) |
||
12326 | { |
||
12327 | XMStoreDec4(this, XMVectorSet(_x, _y, _z, _w)); |
||
12328 | } |
||
12329 | |||
12330 | //------------------------------------------------------------------------------ |
||
12331 | |||
12332 | XMFINLINE _XMDEC4::_XMDEC4 |
||
12333 | ( |
||
12334 | CONST FLOAT* pArray |
||
12335 | ) |
||
12336 | { |
||
12337 | XMStoreDec4(this, XMLoadFloat4((XMFLOAT4*)pArray)); |
||
12338 | } |
||
12339 | |||
12340 | //------------------------------------------------------------------------------ |
||
12341 | |||
12342 | XMFINLINE _XMDEC4& _XMDEC4::operator= |
||
12343 | ( |
||
12344 | CONST _XMDEC4& Dec4 |
||
12345 | ) |
||
12346 | { |
||
12347 | v = Dec4.v; |
||
12348 | return *this; |
||
12349 | } |
||
12350 | |||
12351 | //------------------------------------------------------------------------------ |
||
12352 | |||
12353 | XMFINLINE _XMDEC4& _XMDEC4::operator= |
||
12354 | ( |
||
12355 | CONST UINT Packed |
||
12356 | ) |
||
12357 | { |
||
12358 | v = Packed; |
||
12359 | return *this; |
||
12360 | } |
||
12361 | |||
12362 | /**************************************************************************** |
||
12363 | * |
||
12364 | * XMUDECN4 operators |
||
12365 | * |
||
12366 | ****************************************************************************/ |
||
12367 | |||
12368 | //------------------------------------------------------------------------------ |
||
12369 | |||
12370 | XMFINLINE _XMUDECN4::_XMUDECN4 |
||
12371 | ( |
||
12372 | FLOAT _x, |
||
12373 | FLOAT _y, |
||
12374 | FLOAT _z, |
||
12375 | FLOAT _w |
||
12376 | ) |
||
12377 | { |
||
12378 | XMStoreUDecN4(this, XMVectorSet(_x, _y, _z, _w)); |
||
12379 | } |
||
12380 | |||
12381 | //------------------------------------------------------------------------------ |
||
12382 | |||
12383 | XMFINLINE _XMUDECN4::_XMUDECN4 |
||
12384 | ( |
||
12385 | CONST FLOAT* pArray |
||
12386 | ) |
||
12387 | { |
||
12388 | XMStoreUDecN4(this, XMLoadFloat4((XMFLOAT4*)pArray)); |
||
12389 | } |
||
12390 | |||
12391 | //------------------------------------------------------------------------------ |
||
12392 | |||
12393 | XMFINLINE _XMUDECN4& _XMUDECN4::operator= |
||
12394 | ( |
||
12395 | CONST _XMUDECN4& UDecN4 |
||
12396 | ) |
||
12397 | { |
||
12398 | v = UDecN4.v; |
||
12399 | return *this; |
||
12400 | } |
||
12401 | |||
12402 | //------------------------------------------------------------------------------ |
||
12403 | |||
12404 | XMFINLINE _XMUDECN4& _XMUDECN4::operator= |
||
12405 | ( |
||
12406 | CONST UINT Packed |
||
12407 | ) |
||
12408 | { |
||
12409 | v = Packed; |
||
12410 | return *this; |
||
12411 | } |
||
12412 | |||
12413 | /**************************************************************************** |
||
12414 | * |
||
12415 | * XMUDEC4 operators |
||
12416 | * |
||
12417 | ****************************************************************************/ |
||
12418 | |||
12419 | //------------------------------------------------------------------------------ |
||
12420 | |||
12421 | XMFINLINE _XMUDEC4::_XMUDEC4 |
||
12422 | ( |
||
12423 | FLOAT _x, |
||
12424 | FLOAT _y, |
||
12425 | FLOAT _z, |
||
12426 | FLOAT _w |
||
12427 | ) |
||
12428 | { |
||
12429 | XMStoreUDec4(this, XMVectorSet(_x, _y, _z, _w)); |
||
12430 | } |
||
12431 | |||
12432 | //------------------------------------------------------------------------------ |
||
12433 | |||
12434 | XMFINLINE _XMUDEC4::_XMUDEC4 |
||
12435 | ( |
||
12436 | CONST FLOAT* pArray |
||
12437 | ) |
||
12438 | { |
||
12439 | XMStoreUDec4(this, XMLoadFloat4((XMFLOAT4*)pArray)); |
||
12440 | } |
||
12441 | |||
12442 | //------------------------------------------------------------------------------ |
||
12443 | |||
12444 | XMFINLINE _XMUDEC4& _XMUDEC4::operator= |
||
12445 | ( |
||
12446 | CONST _XMUDEC4& UDec4 |
||
12447 | ) |
||
12448 | { |
||
12449 | v = UDec4.v; |
||
12450 | return *this; |
||
12451 | } |
||
12452 | |||
12453 | //------------------------------------------------------------------------------ |
||
12454 | |||
12455 | XMFINLINE _XMUDEC4& _XMUDEC4::operator= |
||
12456 | ( |
||
12457 | CONST UINT Packed |
||
12458 | ) |
||
12459 | { |
||
12460 | v = Packed; |
||
12461 | return *this; |
||
12462 | } |
||
12463 | |||
12464 | /**************************************************************************** |
||
12465 | * |
||
12466 | * XMXICON4 operators |
||
12467 | * |
||
12468 | ****************************************************************************/ |
||
12469 | |||
12470 | //------------------------------------------------------------------------------ |
||
12471 | |||
12472 | XMFINLINE _XMXICON4::_XMXICON4 |
||
12473 | ( |
||
12474 | FLOAT _x, |
||
12475 | FLOAT _y, |
||
12476 | FLOAT _z, |
||
12477 | FLOAT _w |
||
12478 | ) |
||
12479 | { |
||
12480 | XMStoreXIcoN4(this, XMVectorSet(_x, _y, _z, _w)); |
||
12481 | } |
||
12482 | |||
12483 | //------------------------------------------------------------------------------ |
||
12484 | |||
12485 | XMFINLINE _XMXICON4::_XMXICON4 |
||
12486 | ( |
||
12487 | CONST FLOAT* pArray |
||
12488 | ) |
||
12489 | { |
||
12490 | XMStoreXIcoN4(this, XMLoadFloat4((XMFLOAT4*)pArray)); |
||
12491 | } |
||
12492 | |||
12493 | //------------------------------------------------------------------------------ |
||
12494 | |||
12495 | XMFINLINE _XMXICON4& _XMXICON4::operator= |
||
12496 | ( |
||
12497 | CONST _XMXICON4& XIcoN4 |
||
12498 | ) |
||
12499 | { |
||
12500 | v = XIcoN4.v; |
||
12501 | return *this; |
||
12502 | } |
||
12503 | |||
12504 | //------------------------------------------------------------------------------ |
||
12505 | |||
12506 | XMFINLINE _XMXICON4& _XMXICON4::operator= |
||
12507 | ( |
||
12508 | CONST UINT64 Packed |
||
12509 | ) |
||
12510 | { |
||
12511 | v = Packed; |
||
12512 | return *this; |
||
12513 | } |
||
12514 | |||
12515 | /**************************************************************************** |
||
12516 | * |
||
12517 | * XMXICO4 operators |
||
12518 | * |
||
12519 | ****************************************************************************/ |
||
12520 | |||
12521 | //------------------------------------------------------------------------------ |
||
12522 | |||
12523 | XMFINLINE _XMXICO4::_XMXICO4 |
||
12524 | ( |
||
12525 | FLOAT _x, |
||
12526 | FLOAT _y, |
||
12527 | FLOAT _z, |
||
12528 | FLOAT _w |
||
12529 | ) |
||
12530 | { |
||
12531 | XMStoreXIco4(this, XMVectorSet(_x, _y, _z, _w)); |
||
12532 | } |
||
12533 | |||
12534 | //------------------------------------------------------------------------------ |
||
12535 | |||
12536 | XMFINLINE _XMXICO4::_XMXICO4 |
||
12537 | ( |
||
12538 | CONST FLOAT* pArray |
||
12539 | ) |
||
12540 | { |
||
12541 | XMStoreXIco4(this, XMLoadFloat4((XMFLOAT4*)pArray)); |
||
12542 | } |
||
12543 | |||
12544 | //------------------------------------------------------------------------------ |
||
12545 | |||
12546 | XMFINLINE _XMXICO4& _XMXICO4::operator= |
||
12547 | ( |
||
12548 | CONST _XMXICO4& XIco4 |
||
12549 | ) |
||
12550 | { |
||
12551 | v = XIco4.v; |
||
12552 | return *this; |
||
12553 | } |
||
12554 | |||
12555 | //------------------------------------------------------------------------------ |
||
12556 | |||
12557 | XMFINLINE _XMXICO4& _XMXICO4::operator= |
||
12558 | ( |
||
12559 | CONST UINT64 Packed |
||
12560 | ) |
||
12561 | { |
||
12562 | v = Packed; |
||
12563 | return *this; |
||
12564 | } |
||
12565 | |||
12566 | /**************************************************************************** |
||
12567 | * |
||
12568 | * XMICON4 operators |
||
12569 | * |
||
12570 | ****************************************************************************/ |
||
12571 | |||
12572 | //------------------------------------------------------------------------------ |
||
12573 | |||
12574 | XMFINLINE _XMICON4::_XMICON4 |
||
12575 | ( |
||
12576 | FLOAT _x, |
||
12577 | FLOAT _y, |
||
12578 | FLOAT _z, |
||
12579 | FLOAT _w |
||
12580 | ) |
||
12581 | { |
||
12582 | XMStoreIcoN4(this, XMVectorSet(_x, _y, _z, _w)); |
||
12583 | } |
||
12584 | |||
12585 | //------------------------------------------------------------------------------ |
||
12586 | |||
12587 | XMFINLINE _XMICON4::_XMICON4 |
||
12588 | ( |
||
12589 | CONST FLOAT* pArray |
||
12590 | ) |
||
12591 | { |
||
12592 | XMStoreIcoN4(this, XMLoadFloat4((XMFLOAT4*)pArray)); |
||
12593 | } |
||
12594 | |||
12595 | //------------------------------------------------------------------------------ |
||
12596 | |||
12597 | XMFINLINE _XMICON4& _XMICON4::operator= |
||
12598 | ( |
||
12599 | CONST _XMICON4& IcoN4 |
||
12600 | ) |
||
12601 | { |
||
12602 | v = IcoN4.v; |
||
12603 | return *this; |
||
12604 | } |
||
12605 | |||
12606 | //------------------------------------------------------------------------------ |
||
12607 | |||
12608 | XMFINLINE _XMICON4& _XMICON4::operator= |
||
12609 | ( |
||
12610 | CONST UINT64 Packed |
||
12611 | ) |
||
12612 | { |
||
12613 | v = Packed; |
||
12614 | return *this; |
||
12615 | } |
||
12616 | |||
12617 | /**************************************************************************** |
||
12618 | * |
||
12619 | * XMICO4 operators |
||
12620 | * |
||
12621 | ****************************************************************************/ |
||
12622 | |||
12623 | //------------------------------------------------------------------------------ |
||
12624 | |||
12625 | XMFINLINE _XMICO4::_XMICO4 |
||
12626 | ( |
||
12627 | FLOAT _x, |
||
12628 | FLOAT _y, |
||
12629 | FLOAT _z, |
||
12630 | FLOAT _w |
||
12631 | ) |
||
12632 | { |
||
12633 | XMStoreIco4(this, XMVectorSet(_x, _y, _z, _w)); |
||
12634 | } |
||
12635 | |||
12636 | //------------------------------------------------------------------------------ |
||
12637 | |||
12638 | XMFINLINE _XMICO4::_XMICO4 |
||
12639 | ( |
||
12640 | CONST FLOAT* pArray |
||
12641 | ) |
||
12642 | { |
||
12643 | XMStoreIco4(this, XMLoadFloat4((XMFLOAT4*)pArray)); |
||
12644 | } |
||
12645 | |||
12646 | //------------------------------------------------------------------------------ |
||
12647 | |||
12648 | XMFINLINE _XMICO4& _XMICO4::operator= |
||
12649 | ( |
||
12650 | CONST _XMICO4& Ico4 |
||
12651 | ) |
||
12652 | { |
||
12653 | v = Ico4.v; |
||
12654 | return *this; |
||
12655 | } |
||
12656 | |||
12657 | //------------------------------------------------------------------------------ |
||
12658 | |||
12659 | XMFINLINE _XMICO4& _XMICO4::operator= |
||
12660 | ( |
||
12661 | CONST UINT64 Packed |
||
12662 | ) |
||
12663 | { |
||
12664 | v = Packed; |
||
12665 | return *this; |
||
12666 | } |
||
12667 | |||
12668 | /**************************************************************************** |
||
12669 | * |
||
12670 | * XMUICON4 operators |
||
12671 | * |
||
12672 | ****************************************************************************/ |
||
12673 | |||
12674 | //------------------------------------------------------------------------------ |
||
12675 | |||
12676 | XMFINLINE _XMUICON4::_XMUICON4 |
||
12677 | ( |
||
12678 | FLOAT _x, |
||
12679 | FLOAT _y, |
||
12680 | FLOAT _z, |
||
12681 | FLOAT _w |
||
12682 | ) |
||
12683 | { |
||
12684 | XMStoreUIcoN4(this, XMVectorSet(_x, _y, _z, _w)); |
||
12685 | } |
||
12686 | |||
12687 | //------------------------------------------------------------------------------ |
||
12688 | |||
12689 | XMFINLINE _XMUICON4::_XMUICON4 |
||
12690 | ( |
||
12691 | CONST FLOAT* pArray |
||
12692 | ) |
||
12693 | { |
||
12694 | XMStoreUIcoN4(this, XMLoadFloat4((XMFLOAT4*)pArray)); |
||
12695 | } |
||
12696 | |||
12697 | //------------------------------------------------------------------------------ |
||
12698 | |||
12699 | XMFINLINE _XMUICON4& _XMUICON4::operator= |
||
12700 | ( |
||
12701 | CONST _XMUICON4& UIcoN4 |
||
12702 | ) |
||
12703 | { |
||
12704 | v = UIcoN4.v; |
||
12705 | return *this; |
||
12706 | } |
||
12707 | |||
12708 | //------------------------------------------------------------------------------ |
||
12709 | |||
12710 | XMFINLINE _XMUICON4& _XMUICON4::operator= |
||
12711 | ( |
||
12712 | CONST UINT64 Packed |
||
12713 | ) |
||
12714 | { |
||
12715 | v = Packed; |
||
12716 | return *this; |
||
12717 | } |
||
12718 | |||
12719 | /**************************************************************************** |
||
12720 | * |
||
12721 | * XMUICO4 operators |
||
12722 | * |
||
12723 | ****************************************************************************/ |
||
12724 | |||
12725 | //------------------------------------------------------------------------------ |
||
12726 | |||
12727 | XMFINLINE _XMUICO4::_XMUICO4 |
||
12728 | ( |
||
12729 | FLOAT _x, |
||
12730 | FLOAT _y, |
||
12731 | FLOAT _z, |
||
12732 | FLOAT _w |
||
12733 | ) |
||
12734 | { |
||
12735 | XMStoreUIco4(this, XMVectorSet(_x, _y, _z, _w)); |
||
12736 | } |
||
12737 | |||
12738 | //------------------------------------------------------------------------------ |
||
12739 | |||
12740 | XMFINLINE _XMUICO4::_XMUICO4 |
||
12741 | ( |
||
12742 | CONST FLOAT* pArray |
||
12743 | ) |
||
12744 | { |
||
12745 | XMStoreUIco4(this, XMLoadFloat4((XMFLOAT4*)pArray)); |
||
12746 | } |
||
12747 | |||
12748 | //------------------------------------------------------------------------------ |
||
12749 | |||
12750 | XMFINLINE _XMUICO4& _XMUICO4::operator= |
||
12751 | ( |
||
12752 | CONST _XMUICO4& UIco4 |
||
12753 | ) |
||
12754 | { |
||
12755 | v = UIco4.v; |
||
12756 | return *this; |
||
12757 | } |
||
12758 | |||
12759 | //------------------------------------------------------------------------------ |
||
12760 | |||
12761 | XMFINLINE _XMUICO4& _XMUICO4::operator= |
||
12762 | ( |
||
12763 | CONST UINT64 Packed |
||
12764 | ) |
||
12765 | { |
||
12766 | v = Packed; |
||
12767 | return *this; |
||
12768 | } |
||
12769 | |||
12770 | /**************************************************************************** |
||
12771 | * |
||
12772 | * XMCOLOR4 operators |
||
12773 | * |
||
12774 | ****************************************************************************/ |
||
12775 | |||
12776 | //------------------------------------------------------------------------------ |
||
12777 | |||
12778 | XMFINLINE _XMCOLOR::_XMCOLOR |
||
12779 | ( |
||
12780 | FLOAT _x, |
||
12781 | FLOAT _y, |
||
12782 | FLOAT _z, |
||
12783 | FLOAT _w |
||
12784 | ) |
||
12785 | { |
||
12786 | XMStoreColor(this, XMVectorSet(_x, _y, _z, _w)); |
||
12787 | } |
||
12788 | |||
12789 | //------------------------------------------------------------------------------ |
||
12790 | |||
12791 | XMFINLINE _XMCOLOR::_XMCOLOR |
||
12792 | ( |
||
12793 | CONST FLOAT* pArray |
||
12794 | ) |
||
12795 | { |
||
12796 | XMStoreColor(this, XMLoadFloat4((XMFLOAT4*)pArray)); |
||
12797 | } |
||
12798 | |||
12799 | //------------------------------------------------------------------------------ |
||
12800 | |||
12801 | XMFINLINE _XMCOLOR& _XMCOLOR::operator= |
||
12802 | ( |
||
12803 | CONST _XMCOLOR& Color |
||
12804 | ) |
||
12805 | { |
||
12806 | c = Color.c; |
||
12807 | return *this; |
||
12808 | } |
||
12809 | |||
12810 | //------------------------------------------------------------------------------ |
||
12811 | |||
12812 | XMFINLINE _XMCOLOR& _XMCOLOR::operator= |
||
12813 | ( |
||
12814 | CONST UINT Color |
||
12815 | ) |
||
12816 | { |
||
12817 | c = Color; |
||
12818 | return *this; |
||
12819 | } |
||
12820 | |||
12821 | /**************************************************************************** |
||
12822 | * |
||
12823 | * XMBYTEN4 operators |
||
12824 | * |
||
12825 | ****************************************************************************/ |
||
12826 | |||
12827 | //------------------------------------------------------------------------------ |
||
12828 | |||
12829 | XMFINLINE _XMBYTEN4::_XMBYTEN4 |
||
12830 | ( |
||
12831 | CONST CHAR* pArray |
||
12832 | ) |
||
12833 | { |
||
12834 | x = pArray[0]; |
||
12835 | y = pArray[1]; |
||
12836 | z = pArray[2]; |
||
12837 | w = pArray[3]; |
||
12838 | } |
||
12839 | |||
12840 | //------------------------------------------------------------------------------ |
||
12841 | |||
12842 | XMFINLINE _XMBYTEN4::_XMBYTEN4 |
||
12843 | ( |
||
12844 | FLOAT _x, |
||
12845 | FLOAT _y, |
||
12846 | FLOAT _z, |
||
12847 | FLOAT _w |
||
12848 | ) |
||
12849 | { |
||
12850 | XMStoreByteN4(this, XMVectorSet(_x, _y, _z, _w)); |
||
12851 | } |
||
12852 | |||
12853 | //------------------------------------------------------------------------------ |
||
12854 | |||
12855 | XMFINLINE _XMBYTEN4::_XMBYTEN4 |
||
12856 | ( |
||
12857 | CONST FLOAT* pArray |
||
12858 | ) |
||
12859 | { |
||
12860 | XMStoreByteN4(this, XMLoadFloat4((XMFLOAT4*)pArray)); |
||
12861 | } |
||
12862 | |||
12863 | //------------------------------------------------------------------------------ |
||
12864 | |||
12865 | XMFINLINE _XMBYTEN4& _XMBYTEN4::operator= |
||
12866 | ( |
||
12867 | CONST _XMBYTEN4& ByteN4 |
||
12868 | ) |
||
12869 | { |
||
12870 | x = ByteN4.x; |
||
12871 | y = ByteN4.y; |
||
12872 | z = ByteN4.z; |
||
12873 | w = ByteN4.w; |
||
12874 | return *this; |
||
12875 | } |
||
12876 | |||
12877 | /**************************************************************************** |
||
12878 | * |
||
12879 | * XMBYTE4 operators |
||
12880 | * |
||
12881 | ****************************************************************************/ |
||
12882 | |||
12883 | //------------------------------------------------------------------------------ |
||
12884 | |||
12885 | XMFINLINE _XMBYTE4::_XMBYTE4 |
||
12886 | ( |
||
12887 | CONST CHAR* pArray |
||
12888 | ) |
||
12889 | { |
||
12890 | x = pArray[0]; |
||
12891 | y = pArray[1]; |
||
12892 | z = pArray[2]; |
||
12893 | w = pArray[3]; |
||
12894 | } |
||
12895 | |||
12896 | //------------------------------------------------------------------------------ |
||
12897 | |||
12898 | XMFINLINE _XMBYTE4::_XMBYTE4 |
||
12899 | ( |
||
12900 | FLOAT _x, |
||
12901 | FLOAT _y, |
||
12902 | FLOAT _z, |
||
12903 | FLOAT _w |
||
12904 | ) |
||
12905 | { |
||
12906 | XMStoreByte4(this, XMVectorSet(_x, _y, _z, _w)); |
||
12907 | } |
||
12908 | |||
12909 | //------------------------------------------------------------------------------ |
||
12910 | |||
12911 | XMFINLINE _XMBYTE4::_XMBYTE4 |
||
12912 | ( |
||
12913 | CONST FLOAT* pArray |
||
12914 | ) |
||
12915 | { |
||
12916 | XMStoreByte4(this, XMLoadFloat4((XMFLOAT4*)pArray)); |
||
12917 | } |
||
12918 | |||
12919 | //------------------------------------------------------------------------------ |
||
12920 | |||
12921 | XMFINLINE _XMBYTE4& _XMBYTE4::operator= |
||
12922 | ( |
||
12923 | CONST _XMBYTE4& Byte4 |
||
12924 | ) |
||
12925 | { |
||
12926 | x = Byte4.x; |
||
12927 | y = Byte4.y; |
||
12928 | z = Byte4.z; |
||
12929 | w = Byte4.w; |
||
12930 | return *this; |
||
12931 | } |
||
12932 | |||
12933 | /**************************************************************************** |
||
12934 | * |
||
12935 | * XMUBYTEN4 operators |
||
12936 | * |
||
12937 | ****************************************************************************/ |
||
12938 | |||
12939 | //------------------------------------------------------------------------------ |
||
12940 | |||
12941 | XMFINLINE _XMUBYTEN4::_XMUBYTEN4 |
||
12942 | ( |
||
12943 | CONST BYTE* pArray |
||
12944 | ) |
||
12945 | { |
||
12946 | x = pArray[0]; |
||
12947 | y = pArray[1]; |
||
12948 | z = pArray[2]; |
||
12949 | w = pArray[3]; |
||
12950 | } |
||
12951 | |||
12952 | //------------------------------------------------------------------------------ |
||
12953 | |||
12954 | XMFINLINE _XMUBYTEN4::_XMUBYTEN4 |
||
12955 | ( |
||
12956 | FLOAT _x, |
||
12957 | FLOAT _y, |
||
12958 | FLOAT _z, |
||
12959 | FLOAT _w |
||
12960 | ) |
||
12961 | { |
||
12962 | XMStoreUByteN4(this, XMVectorSet(_x, _y, _z, _w)); |
||
12963 | } |
||
12964 | |||
12965 | //------------------------------------------------------------------------------ |
||
12966 | |||
12967 | XMFINLINE _XMUBYTEN4::_XMUBYTEN4 |
||
12968 | ( |
||
12969 | CONST FLOAT* pArray |
||
12970 | ) |
||
12971 | { |
||
12972 | XMStoreUByteN4(this, XMLoadFloat4((XMFLOAT4*)pArray)); |
||
12973 | } |
||
12974 | |||
12975 | //------------------------------------------------------------------------------ |
||
12976 | |||
12977 | XMFINLINE _XMUBYTEN4& _XMUBYTEN4::operator= |
||
12978 | ( |
||
12979 | CONST _XMUBYTEN4& UByteN4 |
||
12980 | ) |
||
12981 | { |
||
12982 | x = UByteN4.x; |
||
12983 | y = UByteN4.y; |
||
12984 | z = UByteN4.z; |
||
12985 | w = UByteN4.w; |
||
12986 | return *this; |
||
12987 | } |
||
12988 | |||
12989 | /**************************************************************************** |
||
12990 | * |
||
12991 | * XMUBYTE4 operators |
||
12992 | * |
||
12993 | ****************************************************************************/ |
||
12994 | |||
12995 | //------------------------------------------------------------------------------ |
||
12996 | |||
12997 | XMFINLINE _XMUBYTE4::_XMUBYTE4 |
||
12998 | ( |
||
12999 | CONST BYTE* pArray |
||
13000 | ) |
||
13001 | { |
||
13002 | x = pArray[0]; |
||
13003 | y = pArray[1]; |
||
13004 | z = pArray[2]; |
||
13005 | w = pArray[3]; |
||
13006 | } |
||
13007 | |||
13008 | //------------------------------------------------------------------------------ |
||
13009 | |||
13010 | XMFINLINE _XMUBYTE4::_XMUBYTE4 |
||
13011 | ( |
||
13012 | FLOAT _x, |
||
13013 | FLOAT _y, |
||
13014 | FLOAT _z, |
||
13015 | FLOAT _w |
||
13016 | ) |
||
13017 | { |
||
13018 | XMStoreUByte4(this, XMVectorSet(_x, _y, _z, _w)); |
||
13019 | } |
||
13020 | |||
13021 | //------------------------------------------------------------------------------ |
||
13022 | |||
13023 | XMFINLINE _XMUBYTE4::_XMUBYTE4 |
||
13024 | ( |
||
13025 | CONST FLOAT* pArray |
||
13026 | ) |
||
13027 | { |
||
13028 | XMStoreUByte4(this, XMLoadFloat4((XMFLOAT4*)pArray)); |
||
13029 | } |
||
13030 | |||
13031 | //------------------------------------------------------------------------------ |
||
13032 | |||
13033 | XMFINLINE _XMUBYTE4& _XMUBYTE4::operator= |
||
13034 | ( |
||
13035 | CONST _XMUBYTE4& UByte4 |
||
13036 | ) |
||
13037 | { |
||
13038 | x = UByte4.x; |
||
13039 | y = UByte4.y; |
||
13040 | z = UByte4.z; |
||
13041 | w = UByte4.w; |
||
13042 | return *this; |
||
13043 | } |
||
13044 | |||
13045 | /**************************************************************************** |
||
13046 | * |
||
13047 | * XMUNIBBLE4 operators |
||
13048 | * |
||
13049 | ****************************************************************************/ |
||
13050 | |||
13051 | //------------------------------------------------------------------------------ |
||
13052 | |||
13053 | XMFINLINE _XMUNIBBLE4::_XMUNIBBLE4 |
||
13054 | ( |
||
13055 | CONST CHAR *pArray |
||
13056 | ) |
||
13057 | { |
||
13058 | x = pArray[0]; |
||
13059 | y = pArray[1]; |
||
13060 | z = pArray[2]; |
||
13061 | w = pArray[3]; |
||
13062 | } |
||
13063 | |||
13064 | //------------------------------------------------------------------------------ |
||
13065 | |||
13066 | XMFINLINE _XMUNIBBLE4::_XMUNIBBLE4 |
||
13067 | ( |
||
13068 | FLOAT _x, |
||
13069 | FLOAT _y, |
||
13070 | FLOAT _z, |
||
13071 | FLOAT _w |
||
13072 | ) |
||
13073 | { |
||
13074 | XMStoreUNibble4(this, XMVectorSet( _x, _y, _z, _w )); |
||
13075 | } |
||
13076 | |||
13077 | //------------------------------------------------------------------------------ |
||
13078 | |||
13079 | XMFINLINE _XMUNIBBLE4::_XMUNIBBLE4 |
||
13080 | ( |
||
13081 | CONST FLOAT *pArray |
||
13082 | ) |
||
13083 | { |
||
13084 | XMStoreUNibble4(this, XMLoadFloat4((XMFLOAT4*)pArray)); |
||
13085 | } |
||
13086 | |||
13087 | //------------------------------------------------------------------------------ |
||
13088 | |||
13089 | XMFINLINE _XMUNIBBLE4& _XMUNIBBLE4::operator= |
||
13090 | ( |
||
13091 | CONST _XMUNIBBLE4& UNibble4 |
||
13092 | ) |
||
13093 | { |
||
13094 | v = UNibble4.v; |
||
13095 | return *this; |
||
13096 | } |
||
13097 | |||
13098 | //------------------------------------------------------------------------------ |
||
13099 | |||
13100 | XMFINLINE _XMUNIBBLE4& _XMUNIBBLE4::operator= |
||
13101 | ( |
||
13102 | CONST USHORT Packed |
||
13103 | ) |
||
13104 | { |
||
13105 | v = Packed; |
||
13106 | return *this; |
||
13107 | } |
||
13108 | |||
13109 | /**************************************************************************** |
||
13110 | * |
||
13111 | * XMU555 operators |
||
13112 | * |
||
13113 | ****************************************************************************/ |
||
13114 | |||
13115 | //------------------------------------------------------------------------------ |
||
13116 | |||
13117 | XMFINLINE _XMU555::_XMU555 |
||
13118 | ( |
||
13119 | CONST CHAR *pArray, |
||
13120 | BOOL _w |
||
13121 | ) |
||
13122 | { |
||
13123 | x = pArray[0]; |
||
13124 | y = pArray[1]; |
||
13125 | z = pArray[2]; |
||
13126 | w = _w; |
||
13127 | } |
||
13128 | |||
13129 | //------------------------------------------------------------------------------ |
||
13130 | |||
13131 | XMFINLINE _XMU555::_XMU555 |
||
13132 | ( |
||
13133 | FLOAT _x, |
||
13134 | FLOAT _y, |
||
13135 | FLOAT _z, |
||
13136 | BOOL _w |
||
13137 | ) |
||
13138 | { |
||
13139 | XMStoreU555(this, XMVectorSet(_x, _y, _z, ((_w) ? 1.0f : 0.0f) )); |
||
13140 | } |
||
13141 | |||
13142 | //------------------------------------------------------------------------------ |
||
13143 | |||
13144 | XMFINLINE _XMU555::_XMU555 |
||
13145 | ( |
||
13146 | CONST FLOAT *pArray, |
||
13147 | BOOL _w |
||
13148 | ) |
||
13149 | { |
||
13150 | XMVECTOR V = XMLoadFloat3((XMFLOAT3*)pArray); |
||
13151 | XMStoreU555(this, XMVectorSetW(V, ((_w) ? 1.0f : 0.0f) )); |
||
13152 | } |
||
13153 | |||
13154 | //------------------------------------------------------------------------------ |
||
13155 | |||
13156 | XMFINLINE _XMU555& _XMU555::operator= |
||
13157 | ( |
||
13158 | CONST _XMU555& U555 |
||
13159 | ) |
||
13160 | { |
||
13161 | v = U555.v; |
||
13162 | return *this; |
||
13163 | } |
||
13164 | |||
13165 | //------------------------------------------------------------------------------ |
||
13166 | |||
13167 | XMFINLINE _XMU555& _XMU555::operator= |
||
13168 | ( |
||
13169 | CONST USHORT Packed |
||
13170 | ) |
||
13171 | { |
||
13172 | v = Packed; |
||
13173 | return *this; |
||
13174 | } |
||
13175 | |||
13176 | #endif // __cplusplus |
||
13177 | |||
13178 | #if defined(_XM_NO_INTRINSICS_) |
||
13179 | #undef XMISNAN |
||
13180 | #undef XMISINF |
||
13181 | #endif |
||
13182 | |||
13183 | #endif // __XNAMATHVECTOR_INL__ |
||
13184 |