Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
1 | pmbaty | 1 | /*++ |
2 | |||
3 | Copyright (c) Microsoft Corporation. All rights reserved. |
||
4 | |||
5 | Module Name: |
||
6 | |||
7 | xnamathconvert.inl |
||
8 | |||
9 | Abstract: |
||
10 | |||
11 | XNA math library for Windows and Xbox 360: Conversion, loading, and storing functions. |
||
12 | --*/ |
||
13 | |||
14 | #if defined(_MSC_VER) && (_MSC_VER > 1000) |
||
15 | #pragma once |
||
16 | #endif |
||
17 | |||
18 | #ifndef __XNAMATHCONVERT_INL__ |
||
19 | #define __XNAMATHCONVERT_INL__ |
||
20 | |||
21 | #define XM_PACK_FACTOR (FLOAT)(1 << 22) |
||
22 | #define XM_UNPACK_FACTOR_UNSIGNED (FLOAT)(1 << 23) |
||
23 | #define XM_UNPACK_FACTOR_SIGNED XM_PACK_FACTOR |
||
24 | |||
25 | #define XM_UNPACK_UNSIGNEDN_OFFSET(BitsX, BitsY, BitsZ, BitsW) \ |
||
26 | {-XM_UNPACK_FACTOR_UNSIGNED / (FLOAT)((1 << (BitsX)) - 1), \ |
||
27 | -XM_UNPACK_FACTOR_UNSIGNED / (FLOAT)((1 << (BitsY)) - 1), \ |
||
28 | -XM_UNPACK_FACTOR_UNSIGNED / (FLOAT)((1 << (BitsZ)) - 1), \ |
||
29 | -XM_UNPACK_FACTOR_UNSIGNED / (FLOAT)((1 << (BitsW)) - 1)} |
||
30 | |||
31 | #define XM_UNPACK_UNSIGNEDN_SCALE(BitsX, BitsY, BitsZ, BitsW) \ |
||
32 | {XM_UNPACK_FACTOR_UNSIGNED / (FLOAT)((1 << (BitsX)) - 1), \ |
||
33 | XM_UNPACK_FACTOR_UNSIGNED / (FLOAT)((1 << (BitsY)) - 1), \ |
||
34 | XM_UNPACK_FACTOR_UNSIGNED / (FLOAT)((1 << (BitsZ)) - 1), \ |
||
35 | XM_UNPACK_FACTOR_UNSIGNED / (FLOAT)((1 << (BitsW)) - 1)} |
||
36 | |||
37 | #define XM_UNPACK_SIGNEDN_SCALE(BitsX, BitsY, BitsZ, BitsW) \ |
||
38 | {-XM_UNPACK_FACTOR_SIGNED / (FLOAT)((1 << ((BitsX) - 1)) - 1), \ |
||
39 | -XM_UNPACK_FACTOR_SIGNED / (FLOAT)((1 << ((BitsY) - 1)) - 1), \ |
||
40 | -XM_UNPACK_FACTOR_SIGNED / (FLOAT)((1 << ((BitsZ) - 1)) - 1), \ |
||
41 | -XM_UNPACK_FACTOR_SIGNED / (FLOAT)((1 << ((BitsW) - 1)) - 1)} |
||
42 | |||
43 | //#define XM_UNPACK_SIGNEDN_OFFSET(BitsX, BitsY, BitsZ, BitsW) \ |
||
44 | // {-XM_UNPACK_FACTOR_SIGNED / (FLOAT)((1 << ((BitsX) - 1)) - 1) * 3.0f, \ |
||
45 | // -XM_UNPACK_FACTOR_SIGNED / (FLOAT)((1 << ((BitsY) - 1)) - 1) * 3.0f, \ |
||
46 | // -XM_UNPACK_FACTOR_SIGNED / (FLOAT)((1 << ((BitsZ) - 1)) - 1) * 3.0f, \ |
||
47 | // -XM_UNPACK_FACTOR_SIGNED / (FLOAT)((1 << ((BitsW) - 1)) - 1) * 3.0f} |
||
48 | |||
49 | #define XM_PACK_UNSIGNEDN_SCALE(BitsX, BitsY, BitsZ, BitsW) \ |
||
50 | {-(FLOAT)((1 << (BitsX)) - 1) / XM_PACK_FACTOR, \ |
||
51 | -(FLOAT)((1 << (BitsY)) - 1) / XM_PACK_FACTOR, \ |
||
52 | -(FLOAT)((1 << (BitsZ)) - 1) / XM_PACK_FACTOR, \ |
||
53 | -(FLOAT)((1 << (BitsW)) - 1) / XM_PACK_FACTOR} |
||
54 | |||
55 | #define XM_PACK_SIGNEDN_SCALE(BitsX, BitsY, BitsZ, BitsW) \ |
||
56 | {-(FLOAT)((1 << ((BitsX) - 1)) - 1) / XM_PACK_FACTOR, \ |
||
57 | -(FLOAT)((1 << ((BitsY) - 1)) - 1) / XM_PACK_FACTOR, \ |
||
58 | -(FLOAT)((1 << ((BitsZ) - 1)) - 1) / XM_PACK_FACTOR, \ |
||
59 | -(FLOAT)((1 << ((BitsW) - 1)) - 1) / XM_PACK_FACTOR} |
||
60 | |||
61 | #define XM_PACK_OFFSET XMVectorSplatConstant(3, 0) |
||
62 | //#define XM_UNPACK_OFFSET XM_PACK_OFFSET |
||
63 | |||
64 | /**************************************************************************** |
||
65 | * |
||
66 | * Data conversion |
||
67 | * |
||
68 | ****************************************************************************/ |
||
69 | |||
70 | //------------------------------------------------------------------------------ |
||
71 | |||
72 | XMFINLINE FLOAT XMConvertHalfToFloat |
||
73 | ( |
||
74 | HALF Value |
||
75 | ) |
||
76 | { |
||
77 | #if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) |
||
78 | |||
79 | UINT Mantissa; |
||
80 | UINT Exponent; |
||
81 | UINT Result; |
||
82 | |||
83 | Mantissa = (UINT)(Value & 0x03FF); |
||
84 | |||
85 | if ((Value & 0x7C00) != 0) // The value is normalized |
||
86 | { |
||
87 | Exponent = (UINT)((Value >> 10) & 0x1F); |
||
88 | } |
||
89 | else if (Mantissa != 0) // The value is denormalized |
||
90 | { |
||
91 | // Normalize the value in the resulting float |
||
92 | Exponent = 1; |
||
93 | |||
94 | do |
||
95 | { |
||
96 | Exponent--; |
||
97 | Mantissa <<= 1; |
||
98 | } while ((Mantissa & 0x0400) == 0); |
||
99 | |||
100 | Mantissa &= 0x03FF; |
||
101 | } |
||
102 | else // The value is zero |
||
103 | { |
||
104 | Exponent = (UINT)-112; |
||
105 | } |
||
106 | |||
107 | Result = ((Value & 0x8000) << 16) | // Sign |
||
108 | ((Exponent + 112) << 23) | // Exponent |
||
109 | (Mantissa << 13); // Mantissa |
||
110 | |||
111 | return *(FLOAT*)&Result; |
||
112 | |||
113 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
114 | #endif |
||
115 | } |
||
116 | |||
117 | //------------------------------------------------------------------------------ |
||
118 | |||
119 | XMINLINE FLOAT* XMConvertHalfToFloatStream |
||
120 | ( |
||
121 | FLOAT* pOutputStream, |
||
122 | UINT OutputStride, |
||
123 | CONST HALF* pInputStream, |
||
124 | UINT InputStride, |
||
125 | UINT HalfCount |
||
126 | ) |
||
127 | { |
||
128 | #if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) |
||
129 | |||
130 | UINT i; |
||
131 | BYTE* pHalf = (BYTE*)pInputStream; |
||
132 | BYTE* pFloat = (BYTE*)pOutputStream; |
||
133 | |||
134 | XMASSERT(pOutputStream); |
||
135 | XMASSERT(pInputStream); |
||
136 | |||
137 | for (i = 0; i < HalfCount; i++) |
||
138 | { |
||
139 | *(FLOAT*)pFloat = XMConvertHalfToFloat(*(HALF*)pHalf); |
||
140 | pHalf += InputStride; |
||
141 | pFloat += OutputStride; |
||
142 | } |
||
143 | |||
144 | return pOutputStream; |
||
145 | |||
146 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
147 | #endif // _XM_VMX128_INTRINSICS_ |
||
148 | } |
||
149 | |||
150 | //------------------------------------------------------------------------------ |
||
151 | |||
152 | XMFINLINE HALF XMConvertFloatToHalf |
||
153 | ( |
||
154 | FLOAT Value |
||
155 | ) |
||
156 | { |
||
157 | #if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) |
||
158 | UINT Result; |
||
159 | |||
160 | UINT IValue = ((UINT *)(&Value))[0]; |
||
161 | UINT Sign = (IValue & 0x80000000U) >> 16U; |
||
162 | IValue = IValue & 0x7FFFFFFFU; // Hack off the sign |
||
163 | |||
164 | if (IValue > 0x47FFEFFFU) |
||
165 | { |
||
166 | // The number is too large to be represented as a half. Saturate to infinity. |
||
167 | Result = 0x7FFFU; |
||
168 | } |
||
169 | else |
||
170 | { |
||
171 | if (IValue < 0x38800000U) |
||
172 | { |
||
173 | // The number is too small to be represented as a normalized half. |
||
174 | // Convert it to a denormalized value. |
||
175 | UINT Shift = 113U - (IValue >> 23U); |
||
176 | IValue = (0x800000U | (IValue & 0x7FFFFFU)) >> Shift; |
||
177 | } |
||
178 | else |
||
179 | { |
||
180 | // Rebias the exponent to represent the value as a normalized half. |
||
181 | IValue += 0xC8000000U; |
||
182 | } |
||
183 | |||
184 | Result = ((IValue + 0x0FFFU + ((IValue >> 13U) & 1U)) >> 13U)&0x7FFFU; |
||
185 | } |
||
186 | return (HALF)(Result|Sign); |
||
187 | |||
188 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
189 | #endif |
||
190 | } |
||
191 | |||
192 | //------------------------------------------------------------------------------ |
||
193 | |||
194 | XMINLINE HALF* XMConvertFloatToHalfStream |
||
195 | ( |
||
196 | HALF* pOutputStream, |
||
197 | UINT OutputStride, |
||
198 | CONST FLOAT* pInputStream, |
||
199 | UINT InputStride, |
||
200 | UINT FloatCount |
||
201 | ) |
||
202 | { |
||
203 | #if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) |
||
204 | |||
205 | UINT i; |
||
206 | BYTE* pFloat = (BYTE*)pInputStream; |
||
207 | BYTE* pHalf = (BYTE*)pOutputStream; |
||
208 | |||
209 | XMASSERT(pOutputStream); |
||
210 | XMASSERT(pInputStream); |
||
211 | |||
212 | for (i = 0; i < FloatCount; i++) |
||
213 | { |
||
214 | *(HALF*)pHalf = XMConvertFloatToHalf(*(FLOAT*)pFloat); |
||
215 | pFloat += InputStride; |
||
216 | pHalf += OutputStride; |
||
217 | } |
||
218 | return pOutputStream; |
||
219 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
220 | #endif // _XM_VMX128_INTRINSICS_ |
||
221 | } |
||
222 | |||
223 | //------------------------------------------------------------------------------ |
||
224 | |||
225 | #if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) |
||
226 | // For VMX128, these routines are all defines in the main header |
||
227 | |||
228 | #pragma warning(push) |
||
229 | #pragma warning(disable:4701) // Prevent warnings about 'Result' potentially being used without having been initialized |
||
230 | |||
231 | XMINLINE XMVECTOR XMConvertVectorIntToFloat |
||
232 | ( |
||
233 | FXMVECTOR VInt, |
||
234 | UINT DivExponent |
||
235 | ) |
||
236 | { |
||
237 | #if defined(_XM_NO_INTRINSICS_) |
||
238 | UINT ElementIndex; |
||
239 | FLOAT fScale; |
||
240 | XMVECTOR Result; |
||
241 | XMASSERT(DivExponent<32); |
||
242 | fScale = 1.0f / (FLOAT)(1U << DivExponent); |
||
243 | ElementIndex = 0; |
||
244 | do { |
||
245 | INT iTemp = (INT)VInt.vector4_u32[ElementIndex]; |
||
246 | Result.vector4_f32[ElementIndex] = ((FLOAT)iTemp) * fScale; |
||
247 | } while (++ElementIndex<4); |
||
248 | return Result; |
||
249 | #else // _XM_SSE_INTRINSICS_ |
||
250 | XMASSERT(DivExponent<32); |
||
251 | // Convert to floats |
||
252 | XMVECTOR vResult = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&VInt)[0]); |
||
253 | // Convert DivExponent into 1.0f/(1<<DivExponent) |
||
254 | UINT uScale = 0x3F800000U - (DivExponent << 23); |
||
255 | // Splat the scalar value |
||
256 | __m128i vScale = _mm_set1_epi32(uScale); |
||
257 | vResult = _mm_mul_ps(vResult,reinterpret_cast<const __m128 *>(&vScale)[0]); |
||
258 | return vResult; |
||
259 | #endif |
||
260 | } |
||
261 | |||
262 | //------------------------------------------------------------------------------ |
||
263 | |||
264 | XMINLINE XMVECTOR XMConvertVectorFloatToInt |
||
265 | ( |
||
266 | FXMVECTOR VFloat, |
||
267 | UINT MulExponent |
||
268 | ) |
||
269 | { |
||
270 | #if defined(_XM_NO_INTRINSICS_) |
||
271 | UINT ElementIndex; |
||
272 | XMVECTOR Result; |
||
273 | FLOAT fScale; |
||
274 | XMASSERT(MulExponent<32); |
||
275 | // Get the scalar factor. |
||
276 | fScale = (FLOAT)(1U << MulExponent); |
||
277 | ElementIndex = 0; |
||
278 | do { |
||
279 | INT iResult; |
||
280 | FLOAT fTemp = VFloat.vector4_f32[ElementIndex]*fScale; |
||
281 | if (fTemp <= -(65536.0f*32768.0f)) { |
||
282 | iResult = (-0x7FFFFFFF)-1; |
||
283 | } else if (fTemp > (65536.0f*32768.0f)-128.0f) { |
||
284 | iResult = 0x7FFFFFFF; |
||
285 | } else { |
||
286 | iResult = (INT)fTemp; |
||
287 | } |
||
288 | Result.vector4_u32[ElementIndex] = (UINT)iResult; |
||
289 | } while (++ElementIndex<4); |
||
290 | return Result; |
||
291 | #else // _XM_SSE_INTRINSICS_ |
||
292 | XMASSERT(MulExponent<32); |
||
293 | static const XMVECTORF32 MaxInt = {65536.0f*32768.0f-128.0f,65536.0f*32768.0f-128.0f,65536.0f*32768.0f-128.0f,65536.0f*32768.0f-128.0f}; |
||
294 | XMVECTOR vResult = _mm_set_ps1((FLOAT)(1U << MulExponent)); |
||
295 | vResult = _mm_mul_ps(vResult,VFloat); |
||
296 | // In case of positive overflow, detect it |
||
297 | XMVECTOR vOverflow = _mm_cmpgt_ps(vResult,MaxInt); |
||
298 | // Float to int conversion |
||
299 | __m128i vResulti = _mm_cvttps_epi32(vResult); |
||
300 | // If there was positive overflow, set to 0x7FFFFFFF |
||
301 | vResult = _mm_and_ps(vOverflow,g_XMAbsMask); |
||
302 | vOverflow = _mm_andnot_ps(vOverflow,reinterpret_cast<const __m128 *>(&vResulti)[0]); |
||
303 | vOverflow = _mm_or_ps(vOverflow,vResult); |
||
304 | return vOverflow; |
||
305 | #endif |
||
306 | } |
||
307 | |||
308 | //------------------------------------------------------------------------------ |
||
309 | |||
310 | XMINLINE XMVECTOR XMConvertVectorUIntToFloat |
||
311 | ( |
||
312 | FXMVECTOR VUInt, |
||
313 | UINT DivExponent |
||
314 | ) |
||
315 | { |
||
316 | #if defined(_XM_NO_INTRINSICS_) |
||
317 | UINT ElementIndex; |
||
318 | FLOAT fScale; |
||
319 | XMVECTOR Result; |
||
320 | XMASSERT(DivExponent<32); |
||
321 | fScale = 1.0f / (FLOAT)(1U << DivExponent); |
||
322 | ElementIndex = 0; |
||
323 | do { |
||
324 | Result.vector4_f32[ElementIndex] = (FLOAT)VUInt.vector4_u32[ElementIndex] * fScale; |
||
325 | } while (++ElementIndex<4); |
||
326 | return Result; |
||
327 | #else // _XM_SSE_INTRINSICS_ |
||
328 | XMASSERT(DivExponent<32); |
||
329 | static const XMVECTORF32 FixUnsigned = {32768.0f*65536.0f,32768.0f*65536.0f,32768.0f*65536.0f,32768.0f*65536.0f}; |
||
330 | // For the values that are higher than 0x7FFFFFFF, a fixup is needed |
||
331 | // Determine which ones need the fix. |
||
332 | XMVECTOR vMask = _mm_and_ps(VUInt,g_XMNegativeZero); |
||
333 | // Force all values positive |
||
334 | XMVECTOR vResult = _mm_xor_ps(VUInt,vMask); |
||
335 | // Convert to floats |
||
336 | vResult = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vResult)[0]); |
||
337 | // Convert 0x80000000 -> 0xFFFFFFFF |
||
338 | __m128i iMask = _mm_srai_epi32(reinterpret_cast<const __m128i *>(&vMask)[0],31); |
||
339 | // For only the ones that are too big, add the fixup |
||
340 | vMask = _mm_and_ps(reinterpret_cast<const __m128 *>(&iMask)[0],FixUnsigned); |
||
341 | vResult = _mm_add_ps(vResult,vMask); |
||
342 | // Convert DivExponent into 1.0f/(1<<DivExponent) |
||
343 | UINT uScale = 0x3F800000U - (DivExponent << 23); |
||
344 | // Splat |
||
345 | iMask = _mm_set1_epi32(uScale); |
||
346 | vResult = _mm_mul_ps(vResult,reinterpret_cast<const __m128 *>(&iMask)[0]); |
||
347 | return vResult; |
||
348 | #endif |
||
349 | } |
||
350 | |||
351 | //------------------------------------------------------------------------------ |
||
352 | |||
353 | XMINLINE XMVECTOR XMConvertVectorFloatToUInt |
||
354 | ( |
||
355 | FXMVECTOR VFloat, |
||
356 | UINT MulExponent |
||
357 | ) |
||
358 | { |
||
359 | #if defined(_XM_NO_INTRINSICS_) |
||
360 | UINT ElementIndex; |
||
361 | XMVECTOR Result; |
||
362 | FLOAT fScale; |
||
363 | XMASSERT(MulExponent<32); |
||
364 | // Get the scalar factor. |
||
365 | fScale = (FLOAT)(1U << MulExponent); |
||
366 | ElementIndex = 0; |
||
367 | do { |
||
368 | UINT uResult; |
||
369 | FLOAT fTemp = VFloat.vector4_f32[ElementIndex]*fScale; |
||
370 | if (fTemp <= 0.0f) { |
||
371 | uResult = 0; |
||
372 | } else if (fTemp >= (65536.0f*65536.0f)) { |
||
373 | uResult = 0xFFFFFFFFU; |
||
374 | } else { |
||
375 | uResult = (UINT)fTemp; |
||
376 | } |
||
377 | Result.vector4_u32[ElementIndex] = uResult; |
||
378 | } while (++ElementIndex<4); |
||
379 | return Result; |
||
380 | #else // _XM_SSE_INTRINSICS_ |
||
381 | XMASSERT(MulExponent<32); |
||
382 | static const XMVECTORF32 MaxUInt = {65536.0f*65536.0f-256.0f,65536.0f*65536.0f-256.0f,65536.0f*65536.0f-256.0f,65536.0f*65536.0f-256.0f}; |
||
383 | static const XMVECTORF32 UnsignedFix = {32768.0f*65536.0f,32768.0f*65536.0f,32768.0f*65536.0f,32768.0f*65536.0f}; |
||
384 | XMVECTOR vResult = _mm_set_ps1(static_cast<float>(1U << MulExponent)); |
||
385 | vResult = _mm_mul_ps(vResult,VFloat); |
||
386 | // Clamp to >=0 |
||
387 | vResult = _mm_max_ps(vResult,g_XMZero); |
||
388 | // Any numbers that are too big, set to 0xFFFFFFFFU |
||
389 | XMVECTOR vOverflow = _mm_cmpgt_ps(vResult,MaxUInt); |
||
390 | XMVECTOR vValue = UnsignedFix; |
||
391 | // Too large for a signed integer? |
||
392 | XMVECTOR vMask = _mm_cmpge_ps(vResult,vValue); |
||
393 | // Zero for number's lower than 0x80000000, 32768.0f*65536.0f otherwise |
||
394 | vValue = _mm_and_ps(vValue,vMask); |
||
395 | // Perform fixup only on numbers too large (Keeps low bit precision) |
||
396 | vResult = _mm_sub_ps(vResult,vValue); |
||
397 | __m128i vResulti = _mm_cvttps_epi32(vResult); |
||
398 | // Convert from signed to unsigned pnly if greater than 0x80000000 |
||
399 | vMask = _mm_and_ps(vMask,g_XMNegativeZero); |
||
400 | vResult = _mm_xor_ps(reinterpret_cast<const __m128 *>(&vResulti)[0],vMask); |
||
401 | // On those that are too large, set to 0xFFFFFFFF |
||
402 | vResult = _mm_or_ps(vResult,vOverflow); |
||
403 | return vResult; |
||
404 | #endif |
||
405 | } |
||
406 | |||
407 | #pragma warning(pop) |
||
408 | |||
409 | #endif // _XM_NO_INTRINSICS_ || _XM_SSE_INTRINSICS_ |
||
410 | |||
411 | /**************************************************************************** |
||
412 | * |
||
413 | * Vector and matrix load operations |
||
414 | * |
||
415 | ****************************************************************************/ |
||
416 | |||
417 | //------------------------------------------------------------------------------ |
||
418 | |||
419 | XMFINLINE XMVECTOR XMLoadInt(CONST UINT* pSource) |
||
420 | { |
||
421 | #if defined(_XM_NO_INTRINSICS_) |
||
422 | |||
423 | XMVECTOR V; |
||
424 | XMASSERT(pSource); |
||
425 | XMASSERT(((UINT_PTR)pSource & 3) == 0); |
||
426 | |||
427 | V.vector4_u32[0] = *pSource; |
||
428 | |||
429 | return V; |
||
430 | |||
431 | #elif defined(_XM_SSE_INTRINSICS_) |
||
432 | XMASSERT(pSource); |
||
433 | XMASSERT(((UINT_PTR)pSource & 3) == 0); |
||
434 | __m128i V = _mm_set_epi32( 0, 0, 0, *pSource ); |
||
435 | return reinterpret_cast<__m128 *>(&V)[0]; |
||
436 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
437 | #endif // _XM_VMX128_INTRINSICS_ |
||
438 | } |
||
439 | |||
440 | //------------------------------------------------------------------------------ |
||
441 | |||
442 | XMFINLINE XMVECTOR XMLoadFloat(CONST FLOAT* pSource) |
||
443 | { |
||
444 | #if defined(_XM_NO_INTRINSICS_) |
||
445 | |||
446 | XMVECTOR V; |
||
447 | XMASSERT(pSource); |
||
448 | XMASSERT(((UINT_PTR)pSource & 3) == 0); |
||
449 | |||
450 | V.vector4_f32[0] = *pSource; |
||
451 | |||
452 | return V; |
||
453 | |||
454 | #elif defined(_XM_SSE_INTRINSICS_) |
||
455 | XMASSERT(pSource); |
||
456 | XMASSERT(((UINT_PTR)pSource & 3) == 0); |
||
457 | |||
458 | return _mm_load_ss( pSource ); |
||
459 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
460 | #endif // _XM_VMX128_INTRINSICS_ |
||
461 | } |
||
462 | |||
463 | //------------------------------------------------------------------------------ |
||
464 | |||
465 | XMFINLINE XMVECTOR XMLoadInt2 |
||
466 | ( |
||
467 | CONST UINT* pSource |
||
468 | ) |
||
469 | { |
||
470 | #if defined(_XM_NO_INTRINSICS_) |
||
471 | |||
472 | XMVECTOR V; |
||
473 | |||
474 | XMASSERT(pSource); |
||
475 | |||
476 | V.vector4_u32[0] = pSource[0]; |
||
477 | V.vector4_u32[1] = pSource[1]; |
||
478 | |||
479 | return V; |
||
480 | #elif defined(_XM_SSE_INTRINSICS_) |
||
481 | |||
482 | XMASSERT(pSource); |
||
483 | __m128i V = _mm_set_epi32( 0, 0, *(pSource+1), *pSource ); |
||
484 | return reinterpret_cast<__m128 *>(&V)[0]; |
||
485 | |||
486 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
487 | #endif // _XM_VMX128_INTRINSICS_ |
||
488 | } |
||
489 | |||
490 | //------------------------------------------------------------------------------ |
||
491 | |||
492 | XMFINLINE XMVECTOR XMLoadInt2A |
||
493 | ( |
||
494 | CONST UINT* pSource |
||
495 | ) |
||
496 | { |
||
497 | #if defined(_XM_NO_INTRINSICS_) |
||
498 | |||
499 | XMVECTOR V; |
||
500 | |||
501 | XMASSERT(pSource); |
||
502 | XMASSERT(((UINT_PTR)pSource & 0xF) == 0); |
||
503 | |||
504 | V.vector4_u32[0] = pSource[0]; |
||
505 | V.vector4_u32[1] = pSource[1]; |
||
506 | |||
507 | return V; |
||
508 | |||
509 | #elif defined(_XM_SSE_INTRINSICS_) |
||
510 | |||
511 | XMASSERT(pSource); |
||
512 | __m128i V = _mm_loadl_epi64( (const __m128i*)pSource ); |
||
513 | return reinterpret_cast<__m128 *>(&V)[0]; |
||
514 | |||
515 | #else // _XM_VMX128_INTRINSICS_ |
||
516 | #endif // _XM_VMX128_INTRINSICS_ |
||
517 | } |
||
518 | |||
519 | //------------------------------------------------------------------------------ |
||
520 | |||
521 | XMFINLINE XMVECTOR XMLoadFloat2 |
||
522 | ( |
||
523 | CONST XMFLOAT2* pSource |
||
524 | ) |
||
525 | { |
||
526 | #if defined(_XM_NO_INTRINSICS_) |
||
527 | XMVECTOR V; |
||
528 | XMASSERT(pSource); |
||
529 | ((UINT *)(&V.vector4_f32[0]))[0] = ((const UINT *)(&pSource->x))[0]; |
||
530 | ((UINT *)(&V.vector4_f32[1]))[0] = ((const UINT *)(&pSource->y))[0]; |
||
531 | V.vector4_f32[2] = V.vector4_f32[3] = 0.0f; |
||
532 | return V; |
||
533 | #elif defined(_XM_SSE_INTRINSICS_) |
||
534 | XMASSERT(pSource); |
||
535 | #ifdef _XM_X86_ |
||
536 | __m128 x = _mm_load_ss( &pSource->x ); |
||
537 | __m128 y = _mm_load_ss( &pSource->y ); |
||
538 | return _mm_unpacklo_ps( x, y ); |
||
539 | #else // _XM_X64_ |
||
540 | // This reads 2 floats past the memory that should be ignored. |
||
541 | return _mm_loadu_ps( &pSource->x ); |
||
542 | #endif |
||
543 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
544 | #endif // _XM_VMX128_INTRINSICS_ |
||
545 | } |
||
546 | |||
547 | //------------------------------------------------------------------------------ |
||
548 | |||
549 | XMFINLINE XMVECTOR XMLoadFloat2A |
||
550 | ( |
||
551 | CONST XMFLOAT2A* pSource |
||
552 | ) |
||
553 | { |
||
554 | #if defined(_XM_NO_INTRINSICS_) |
||
555 | |||
556 | XMVECTOR V; |
||
557 | |||
558 | XMASSERT(pSource); |
||
559 | XMASSERT(((UINT_PTR)pSource & 0xF) == 0); |
||
560 | |||
561 | V.vector4_f32[0] = pSource->x; |
||
562 | V.vector4_f32[1] = pSource->y; |
||
563 | |||
564 | return V; |
||
565 | |||
566 | #elif defined(_XM_SSE_INTRINSICS_) |
||
567 | XMASSERT(pSource); |
||
568 | #ifdef _XM_X86_ |
||
569 | __m128 x = _mm_load_ss( &pSource->x ); |
||
570 | __m128 y = _mm_load_ss( &pSource->y ); |
||
571 | return _mm_unpacklo_ps( x, y ); |
||
572 | #else // _XM_X64_ |
||
573 | // This reads 2 floats past the memory that should be ignored. |
||
574 | return _mm_load_ps( &pSource->x ); |
||
575 | #endif |
||
576 | #else // _XM_VMX128_INTRINSICS_ |
||
577 | #endif // _XM_VMX128_INTRINSICS_ |
||
578 | } |
||
579 | |||
580 | //------------------------------------------------------------------------------ |
||
581 | |||
582 | XMFINLINE XMVECTOR XMLoadHalf2 |
||
583 | ( |
||
584 | CONST XMHALF2* pSource |
||
585 | ) |
||
586 | { |
||
587 | #if defined(_XM_NO_INTRINSICS_) |
||
588 | XMASSERT(pSource); |
||
589 | { |
||
590 | XMVECTOR vResult = { |
||
591 | XMConvertHalfToFloat(pSource->x), |
||
592 | XMConvertHalfToFloat(pSource->y), |
||
593 | 0.0f, |
||
594 | 0.0f |
||
595 | }; |
||
596 | return vResult; |
||
597 | } |
||
598 | #elif defined(_XM_SSE_INTRINSICS_) |
||
599 | XMASSERT(pSource); |
||
600 | XMVECTOR vResult = { |
||
601 | XMConvertHalfToFloat(pSource->x), |
||
602 | XMConvertHalfToFloat(pSource->y), |
||
603 | 0.0f, |
||
604 | 0.0f |
||
605 | }; |
||
606 | return vResult; |
||
607 | |||
608 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
609 | #endif // _XM_VMX128_INTRINSICS_ |
||
610 | } |
||
611 | |||
612 | //------------------------------------------------------------------------------ |
||
613 | |||
614 | XMFINLINE XMVECTOR XMLoadShortN2 |
||
615 | ( |
||
616 | CONST XMSHORTN2* pSource |
||
617 | ) |
||
618 | { |
||
619 | #if defined(_XM_NO_INTRINSICS_) |
||
620 | XMASSERT(pSource); |
||
621 | XMASSERT(pSource->x != -32768); |
||
622 | XMASSERT(pSource->y != -32768); |
||
623 | { |
||
624 | XMVECTOR vResult = { |
||
625 | (FLOAT)pSource->x * (1.0f/32767.0f), |
||
626 | (FLOAT)pSource->y * (1.0f/32767.0f), |
||
627 | 0.0f, |
||
628 | 0.0f |
||
629 | }; |
||
630 | return vResult; |
||
631 | } |
||
632 | |||
633 | #elif defined(_XM_SSE_INTRINSICS_) |
||
634 | XMASSERT(pSource); |
||
635 | XMASSERT(pSource->x != -32768); |
||
636 | XMASSERT(pSource->y != -32768); |
||
637 | // Splat the two shorts in all four entries (WORD alignment okay, |
||
638 | // DWORD alignment preferred) |
||
639 | __m128 vTemp = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->x)); |
||
640 | // Mask x&0xFFFF, y&0xFFFF0000,z&0,w&0 |
||
641 | vTemp = _mm_and_ps(vTemp,g_XMMaskX16Y16); |
||
642 | // x needs to be sign extended |
||
643 | vTemp = _mm_xor_ps(vTemp,g_XMFlipX16Y16); |
||
644 | // Convert to floating point numbers |
||
645 | vTemp = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vTemp)[0]); |
||
646 | // x - 0x8000 to undo the signed order. |
||
647 | vTemp = _mm_add_ps(vTemp,g_XMFixX16Y16); |
||
648 | // Convert 0-32767 to 0.0f-1.0f |
||
649 | return _mm_mul_ps(vTemp,g_XMNormalizeX16Y16); |
||
650 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
651 | #endif // _XM_VMX128_INTRINSICS_ |
||
652 | } |
||
653 | |||
654 | //------------------------------------------------------------------------------ |
||
655 | |||
656 | XMFINLINE XMVECTOR XMLoadShort2 |
||
657 | ( |
||
658 | CONST XMSHORT2* pSource |
||
659 | ) |
||
660 | { |
||
661 | #if defined(_XM_NO_INTRINSICS_) |
||
662 | |||
663 | XMVECTOR V; |
||
664 | |||
665 | XMASSERT(pSource); |
||
666 | XMASSERT(pSource->x != -32768); |
||
667 | XMASSERT(pSource->y != -32768); |
||
668 | |||
669 | V.vector4_f32[0] = (FLOAT)pSource->x; |
||
670 | V.vector4_f32[1] = (FLOAT)pSource->y; |
||
671 | |||
672 | return V; |
||
673 | |||
674 | #elif defined(_XM_SSE_INTRINSICS_) |
||
675 | XMASSERT(pSource); |
||
676 | XMASSERT(pSource->x != -32768); |
||
677 | XMASSERT(pSource->y != -32768); |
||
678 | // Splat the two shorts in all four entries (WORD alignment okay, |
||
679 | // DWORD alignment preferred) |
||
680 | __m128 vTemp = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->x)); |
||
681 | // Mask x&0xFFFF, y&0xFFFF0000,z&0,w&0 |
||
682 | vTemp = _mm_and_ps(vTemp,g_XMMaskX16Y16); |
||
683 | // x needs to be sign extended |
||
684 | vTemp = _mm_xor_ps(vTemp,g_XMFlipX16Y16); |
||
685 | // Convert to floating point numbers |
||
686 | vTemp = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vTemp)[0]); |
||
687 | // x - 0x8000 to undo the signed order. |
||
688 | vTemp = _mm_add_ps(vTemp,g_XMFixX16Y16); |
||
689 | // Y is 65536 too large |
||
690 | return _mm_mul_ps(vTemp,g_XMFixupY16); |
||
691 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
692 | #endif // _XM_VMX128_INTRINSICS_ |
||
693 | } |
||
694 | |||
695 | //------------------------------------------------------------------------------ |
||
696 | |||
697 | XMFINLINE XMVECTOR XMLoadUShortN2 |
||
698 | ( |
||
699 | CONST XMUSHORTN2* pSource |
||
700 | ) |
||
701 | { |
||
702 | #if defined(_XM_NO_INTRINSICS_) |
||
703 | |||
704 | XMVECTOR V; |
||
705 | |||
706 | XMASSERT(pSource); |
||
707 | |||
708 | V.vector4_f32[0] = (FLOAT)pSource->x / 65535.0f; |
||
709 | V.vector4_f32[1] = (FLOAT)pSource->y / 65535.0f; |
||
710 | |||
711 | return V; |
||
712 | |||
713 | #elif defined(_XM_SSE_INTRINSICS_) |
||
714 | static const XMVECTORF32 FixupY16 = {1.0f/65535.0f,1.0f/(65535.0f*65536.0f),0.0f,0.0f}; |
||
715 | static const XMVECTORF32 FixaddY16 = {0,32768.0f*65536.0f,0,0}; |
||
716 | XMASSERT(pSource); |
||
717 | // Splat the two shorts in all four entries (WORD alignment okay, |
||
718 | // DWORD alignment preferred) |
||
719 | __m128 vTemp = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->x)); |
||
720 | // Mask x&0xFFFF, y&0xFFFF0000,z&0,w&0 |
||
721 | vTemp = _mm_and_ps(vTemp,g_XMMaskX16Y16); |
||
722 | // y needs to be sign flipped |
||
723 | vTemp = _mm_xor_ps(vTemp,g_XMFlipY); |
||
724 | // Convert to floating point numbers |
||
725 | vTemp = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vTemp)[0]); |
||
726 | // y + 0x8000 to undo the signed order. |
||
727 | vTemp = _mm_add_ps(vTemp,FixaddY16); |
||
728 | // Y is 65536 times too large |
||
729 | vTemp = _mm_mul_ps(vTemp,FixupY16); |
||
730 | return vTemp; |
||
731 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
732 | #endif // _XM_VMX128_INTRINSICS_ |
||
733 | } |
||
734 | |||
735 | //------------------------------------------------------------------------------ |
||
736 | |||
737 | XMFINLINE XMVECTOR XMLoadUShort2 |
||
738 | ( |
||
739 | CONST XMUSHORT2* pSource |
||
740 | ) |
||
741 | { |
||
742 | #if defined(_XM_NO_INTRINSICS_) |
||
743 | |||
744 | XMVECTOR V; |
||
745 | |||
746 | XMASSERT(pSource); |
||
747 | |||
748 | V.vector4_f32[0] = (FLOAT)pSource->x; |
||
749 | V.vector4_f32[1] = (FLOAT)pSource->y; |
||
750 | |||
751 | return V; |
||
752 | |||
753 | #elif defined(_XM_SSE_INTRINSICS_) |
||
754 | static const XMVECTORF32 FixaddY16 = {0,32768.0f,0,0}; |
||
755 | XMASSERT(pSource); |
||
756 | // Splat the two shorts in all four entries (WORD alignment okay, |
||
757 | // DWORD alignment preferred) |
||
758 | __m128 vTemp = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->x)); |
||
759 | // Mask x&0xFFFF, y&0xFFFF0000,z&0,w&0 |
||
760 | vTemp = _mm_and_ps(vTemp,g_XMMaskX16Y16); |
||
761 | // y needs to be sign flipped |
||
762 | vTemp = _mm_xor_ps(vTemp,g_XMFlipY); |
||
763 | // Convert to floating point numbers |
||
764 | vTemp = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vTemp)[0]); |
||
765 | // Y is 65536 times too large |
||
766 | vTemp = _mm_mul_ps(vTemp,g_XMFixupY16); |
||
767 | // y + 0x8000 to undo the signed order. |
||
768 | vTemp = _mm_add_ps(vTemp,FixaddY16); |
||
769 | return vTemp; |
||
770 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
771 | #endif // _XM_VMX128_INTRINSICS_ |
||
772 | } |
||
773 | |||
774 | //------------------------------------------------------------------------------ |
||
775 | |||
776 | XMFINLINE XMVECTOR XMLoadInt3 |
||
777 | ( |
||
778 | CONST UINT* pSource |
||
779 | ) |
||
780 | { |
||
781 | #if defined(_XM_NO_INTRINSICS_) |
||
782 | |||
783 | XMVECTOR V; |
||
784 | |||
785 | XMASSERT(pSource); |
||
786 | |||
787 | V.vector4_u32[0] = pSource[0]; |
||
788 | V.vector4_u32[1] = pSource[1]; |
||
789 | V.vector4_u32[2] = pSource[2]; |
||
790 | |||
791 | return V; |
||
792 | |||
793 | #elif defined(_XM_SSE_INTRINSICS_) |
||
794 | XMASSERT(pSource); |
||
795 | __m128i V = _mm_set_epi32( 0, *(pSource+2), *(pSource+1), *pSource ); |
||
796 | return reinterpret_cast<__m128 *>(&V)[0]; |
||
797 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
798 | #endif // _XM_VMX128_INTRINSICS_ |
||
799 | } |
||
800 | |||
801 | //------------------------------------------------------------------------------ |
||
802 | |||
803 | XMFINLINE XMVECTOR XMLoadInt3A |
||
804 | ( |
||
805 | CONST UINT* pSource |
||
806 | ) |
||
807 | { |
||
808 | #if defined(_XM_NO_INTRINSICS_) |
||
809 | |||
810 | XMVECTOR V; |
||
811 | |||
812 | XMASSERT(pSource); |
||
813 | XMASSERT(((UINT_PTR)pSource & 0xF) == 0); |
||
814 | |||
815 | V.vector4_u32[0] = pSource[0]; |
||
816 | V.vector4_u32[1] = pSource[1]; |
||
817 | V.vector4_u32[2] = pSource[2]; |
||
818 | |||
819 | return V; |
||
820 | |||
821 | #elif defined(_XM_SSE_INTRINSICS_) |
||
822 | XMASSERT(pSource); |
||
823 | |||
824 | // Reads an extra integer that is 'undefined' |
||
825 | |||
826 | __m128i V = _mm_load_si128( (const __m128i*)pSource ); |
||
827 | return reinterpret_cast<__m128 *>(&V)[0]; |
||
828 | #else // _XM_VMX128_INTRINSICS_ |
||
829 | #endif // _XM_VMX128_INTRINSICS_ |
||
830 | } |
||
831 | |||
832 | //------------------------------------------------------------------------------ |
||
833 | |||
834 | XMFINLINE XMVECTOR XMLoadFloat3 |
||
835 | ( |
||
836 | CONST XMFLOAT3* pSource |
||
837 | ) |
||
838 | { |
||
839 | #if defined(_XM_NO_INTRINSICS_) |
||
840 | XMVECTOR V; |
||
841 | XMASSERT(pSource); |
||
842 | ((UINT *)(&V.vector4_f32[0]))[0] = ((const UINT *)(&pSource->x))[0]; |
||
843 | ((UINT *)(&V.vector4_f32[1]))[0] = ((const UINT *)(&pSource->y))[0]; |
||
844 | ((UINT *)(&V.vector4_f32[2]))[0] = ((const UINT *)(&pSource->z))[0]; |
||
845 | V.vector4_f32[3] = 0.0f; |
||
846 | return V; |
||
847 | #elif defined(_XM_SSE_INTRINSICS_) |
||
848 | XMASSERT(pSource); |
||
849 | // This reads 1 floats past the memory that should be ignored. |
||
850 | return _mm_loadu_ps( &pSource->x ); |
||
851 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
852 | #endif // _XM_VMX128_INTRINSICS_ |
||
853 | } |
||
854 | |||
855 | //------------------------------------------------------------------------------ |
||
856 | |||
857 | XMFINLINE XMVECTOR XMLoadFloat3A |
||
858 | ( |
||
859 | CONST XMFLOAT3A* pSource |
||
860 | ) |
||
861 | { |
||
862 | #if defined(_XM_NO_INTRINSICS_) |
||
863 | |||
864 | XMVECTOR V; |
||
865 | |||
866 | XMASSERT(pSource); |
||
867 | XMASSERT(((UINT_PTR)pSource & 0xF) == 0); |
||
868 | |||
869 | V.vector4_f32[0] = pSource->x; |
||
870 | V.vector4_f32[1] = pSource->y; |
||
871 | V.vector4_f32[2] = pSource->z; |
||
872 | |||
873 | return V; |
||
874 | |||
875 | #elif defined(_XM_SSE_INTRINSICS_) |
||
876 | XMASSERT(pSource); |
||
877 | |||
878 | // This reads 1 floats past the memory that should be ignored. |
||
879 | |||
880 | return _mm_load_ps( &pSource->x ); |
||
881 | #else // _XM_VMX128_INTRINSICS_ |
||
882 | #endif // _XM_VMX128_INTRINSICS_ |
||
883 | } |
||
884 | |||
885 | //------------------------------------------------------------------------------ |
||
886 | |||
887 | XMFINLINE XMVECTOR XMLoadUHenDN3 |
||
888 | ( |
||
889 | CONST XMUHENDN3* pSource |
||
890 | ) |
||
891 | { |
||
892 | #if defined(_XM_NO_INTRINSICS_) |
||
893 | |||
894 | XMVECTOR V; |
||
895 | UINT Element; |
||
896 | |||
897 | XMASSERT(pSource); |
||
898 | |||
899 | Element = pSource->v & 0x7FF; |
||
900 | V.vector4_f32[0] = (FLOAT)Element / 2047.0f; |
||
901 | Element = (pSource->v >> 11) & 0x7FF; |
||
902 | V.vector4_f32[1] = (FLOAT)Element / 2047.0f; |
||
903 | Element = (pSource->v >> 22) & 0x3FF; |
||
904 | V.vector4_f32[2] = (FLOAT)Element / 1023.0f; |
||
905 | |||
906 | return V; |
||
907 | |||
908 | #elif defined(_XM_SSE_INTRINSICS_) |
||
909 | static const XMVECTORF32 UHenDN3Mul = {1.0f/2047.0f,1.0f/(2047.0f*2048.0f),1.0f/(1023.0f*2048.0f*2048.0f),0}; |
||
910 | XMASSERT(pSource); |
||
911 | // Get the 32 bit value and splat it |
||
912 | XMVECTOR vResult = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->v)); |
||
913 | // Mask off x, y and z |
||
914 | vResult = _mm_and_ps(vResult,g_XMMaskHenD3); |
||
915 | // Convert x and y to unsigned |
||
916 | vResult = _mm_xor_ps(vResult,g_XMFlipZ); |
||
917 | // Convert to float |
||
918 | vResult = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vResult)[0]); |
||
919 | // Convert x and y back to signed |
||
920 | vResult = _mm_add_ps(vResult,g_XMAddUHenD3); |
||
921 | // Normalize x,y and z to -1.0f-1.0f |
||
922 | vResult = _mm_mul_ps(vResult,UHenDN3Mul); |
||
923 | return vResult; |
||
924 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
925 | #endif // _XM_VMX128_INTRINSICS_ |
||
926 | } |
||
927 | |||
928 | //------------------------------------------------------------------------------ |
||
929 | |||
930 | XMFINLINE XMVECTOR XMLoadUHenD3 |
||
931 | ( |
||
932 | CONST XMUHEND3* pSource |
||
933 | ) |
||
934 | { |
||
935 | #if defined(_XM_NO_INTRINSICS_) |
||
936 | |||
937 | XMVECTOR V; |
||
938 | UINT Element; |
||
939 | |||
940 | XMASSERT(pSource); |
||
941 | |||
942 | Element = pSource->v & 0x7FF; |
||
943 | V.vector4_f32[0] = (FLOAT)Element; |
||
944 | Element = (pSource->v >> 11) & 0x7FF; |
||
945 | V.vector4_f32[1] = (FLOAT)Element; |
||
946 | Element = (pSource->v >> 22) & 0x3FF; |
||
947 | V.vector4_f32[2] = (FLOAT)Element; |
||
948 | |||
949 | return V; |
||
950 | |||
951 | #elif defined(_XM_SSE_INTRINSICS_) |
||
952 | XMASSERT(pSource); |
||
953 | // Get the 32 bit value and splat it |
||
954 | XMVECTOR vResult = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->v)); |
||
955 | // Mask off x, y and z |
||
956 | vResult = _mm_and_ps(vResult,g_XMMaskHenD3); |
||
957 | // Convert x and y to unsigned |
||
958 | vResult = _mm_xor_ps(vResult,g_XMFlipZ); |
||
959 | // Convert to float |
||
960 | vResult = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vResult)[0]); |
||
961 | // Convert x and y back to signed |
||
962 | vResult = _mm_add_ps(vResult,g_XMAddUHenD3); |
||
963 | // Normalize x and y to -1024-1023.0f and z to -512-511.0f |
||
964 | vResult = _mm_mul_ps(vResult,g_XMMulHenD3); |
||
965 | return vResult; |
||
966 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
967 | #endif // _XM_VMX128_INTRINSICS_ |
||
968 | } |
||
969 | |||
970 | //------------------------------------------------------------------------------ |
||
971 | |||
972 | XMFINLINE XMVECTOR XMLoadHenDN3 |
||
973 | ( |
||
974 | CONST XMHENDN3* pSource |
||
975 | ) |
||
976 | { |
||
977 | #if defined(_XM_NO_INTRINSICS_) |
||
978 | |||
979 | XMVECTOR V; |
||
980 | UINT Element; |
||
981 | static CONST UINT SignExtendXY[] = {0x00000000, 0xFFFFF800}; |
||
982 | static CONST UINT SignExtendZ[] = {0x00000000, 0xFFFFFC00}; |
||
983 | |||
984 | XMASSERT(pSource); |
||
985 | XMASSERT((pSource->v & 0x7FF) != 0x400); |
||
986 | XMASSERT(((pSource->v >> 11) & 0x7FF) != 0x400); |
||
987 | XMASSERT(((pSource->v >> 22) & 0x3FF) != 0x200); |
||
988 | |||
989 | Element = pSource->v & 0x7FF; |
||
990 | V.vector4_f32[0] = (FLOAT)(SHORT)(Element | SignExtendXY[Element >> 10]) / 1023.0f; |
||
991 | Element = (pSource->v >> 11) & 0x7FF; |
||
992 | V.vector4_f32[1] = (FLOAT)(SHORT)(Element | SignExtendXY[Element >> 10]) / 1023.0f; |
||
993 | Element = (pSource->v >> 22) & 0x3FF; |
||
994 | V.vector4_f32[2] = (FLOAT)(SHORT)(Element | SignExtendZ[Element >> 9]) / 511.0f; |
||
995 | |||
996 | return V; |
||
997 | |||
998 | #elif defined(_XM_SSE_INTRINSICS_) |
||
999 | static const XMVECTORF32 HenDN3Mul = {1.0f/1023.0f,1.0f/(1023.0f*2048.0f),1.0f/(511.0f*2048.0f*2048.0f),0}; |
||
1000 | XMASSERT(pSource); |
||
1001 | XMASSERT((pSource->v & 0x7FF) != 0x400); |
||
1002 | XMASSERT(((pSource->v >> 11) & 0x7FF) != 0x400); |
||
1003 | XMASSERT(((pSource->v >> 22) & 0x3FF) != 0x200); |
||
1004 | // Get the 32 bit value and splat it |
||
1005 | XMVECTOR vResult = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->v)); |
||
1006 | // Mask off x, y and z |
||
1007 | vResult = _mm_and_ps(vResult,g_XMMaskHenD3); |
||
1008 | // Convert x and y to unsigned |
||
1009 | vResult = _mm_xor_ps(vResult,g_XMXorHenD3); |
||
1010 | // Convert to float |
||
1011 | vResult = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vResult)[0]); |
||
1012 | // Convert x and y back to signed |
||
1013 | vResult = _mm_add_ps(vResult,g_XMAddHenD3); |
||
1014 | // Normalize x,y and z to -1.0f-1.0f |
||
1015 | vResult = _mm_mul_ps(vResult,HenDN3Mul); |
||
1016 | return vResult; |
||
1017 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
1018 | #endif // _XM_VMX128_INTRINSICS_ |
||
1019 | } |
||
1020 | |||
1021 | //------------------------------------------------------------------------------ |
||
1022 | |||
1023 | XMFINLINE XMVECTOR XMLoadHenD3 |
||
1024 | ( |
||
1025 | CONST XMHEND3* pSource |
||
1026 | ) |
||
1027 | { |
||
1028 | #if defined(_XM_NO_INTRINSICS_) |
||
1029 | |||
1030 | XMVECTOR V; |
||
1031 | UINT Element; |
||
1032 | static CONST UINT SignExtendXY[] = {0x00000000, 0xFFFFF800}; |
||
1033 | static CONST UINT SignExtendZ[] = {0x00000000, 0xFFFFFC00}; |
||
1034 | |||
1035 | XMASSERT(pSource); |
||
1036 | XMASSERT((pSource->v & 0x7FF) != 0x400); |
||
1037 | XMASSERT(((pSource->v >> 11) & 0x7FF) != 0x400); |
||
1038 | XMASSERT(((pSource->v >> 22) & 0x3FF) != 0x200); |
||
1039 | |||
1040 | Element = pSource->v & 0x7FF; |
||
1041 | V.vector4_f32[0] = (FLOAT)(SHORT)(Element | SignExtendXY[Element >> 10]); |
||
1042 | Element = (pSource->v >> 11) & 0x7FF; |
||
1043 | V.vector4_f32[1] = (FLOAT)(SHORT)(Element | SignExtendXY[Element >> 10]); |
||
1044 | Element = (pSource->v >> 22) & 0x3FF; |
||
1045 | V.vector4_f32[2] = (FLOAT)(SHORT)(Element | SignExtendZ[Element >> 9]); |
||
1046 | |||
1047 | return V; |
||
1048 | |||
1049 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1050 | XMASSERT(pSource); |
||
1051 | XMASSERT((pSource->v & 0x7FF) != 0x400); |
||
1052 | XMASSERT(((pSource->v >> 11) & 0x7FF) != 0x400); |
||
1053 | XMASSERT(((pSource->v >> 22) & 0x3FF) != 0x200); |
||
1054 | // Get the 32 bit value and splat it |
||
1055 | XMVECTOR vResult = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->v)); |
||
1056 | // Mask off x, y and z |
||
1057 | vResult = _mm_and_ps(vResult,g_XMMaskHenD3); |
||
1058 | // Convert x and y to unsigned |
||
1059 | vResult = _mm_xor_ps(vResult,g_XMXorHenD3); |
||
1060 | // Convert to float |
||
1061 | vResult = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vResult)[0]); |
||
1062 | // Convert x and y back to signed |
||
1063 | vResult = _mm_add_ps(vResult,g_XMAddHenD3); |
||
1064 | // Normalize x and y to -1024-1023.0f and z to -512-511.0f |
||
1065 | vResult = _mm_mul_ps(vResult,g_XMMulHenD3); |
||
1066 | return vResult; |
||
1067 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
1068 | #endif // _XM_VMX128_INTRINSICS_ |
||
1069 | } |
||
1070 | |||
1071 | //------------------------------------------------------------------------------ |
||
1072 | |||
1073 | XMFINLINE XMVECTOR XMLoadUDHenN3 |
||
1074 | ( |
||
1075 | CONST XMUDHENN3* pSource |
||
1076 | ) |
||
1077 | { |
||
1078 | #if defined(_XM_NO_INTRINSICS_) |
||
1079 | |||
1080 | XMVECTOR V; |
||
1081 | UINT Element; |
||
1082 | |||
1083 | XMASSERT(pSource); |
||
1084 | |||
1085 | Element = pSource->v & 0x3FF; |
||
1086 | V.vector4_f32[0] = (FLOAT)Element / 1023.0f; |
||
1087 | Element = (pSource->v >> 10) & 0x7FF; |
||
1088 | V.vector4_f32[1] = (FLOAT)Element / 2047.0f; |
||
1089 | Element = (pSource->v >> 21) & 0x7FF; |
||
1090 | V.vector4_f32[2] = (FLOAT)Element / 2047.0f; |
||
1091 | |||
1092 | return V; |
||
1093 | |||
1094 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1095 | static const XMVECTORF32 UDHenN3Mul = {1.0f/1023.0f,1.0f/(2047.0f*1024.0f),1.0f/(2047.0f*1024.0f*2048.0f),0}; |
||
1096 | XMASSERT(pSource); |
||
1097 | // Get the 32 bit value and splat it |
||
1098 | XMVECTOR vResult = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->v)); |
||
1099 | // Mask off x, y and z |
||
1100 | vResult = _mm_and_ps(vResult,g_XMMaskDHen3); |
||
1101 | // Convert x and y to unsigned |
||
1102 | vResult = _mm_xor_ps(vResult,g_XMFlipZ); |
||
1103 | // Convert to float |
||
1104 | vResult = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vResult)[0]); |
||
1105 | // Convert x and y back to signed |
||
1106 | vResult = _mm_add_ps(vResult,g_XMAddUHenD3); |
||
1107 | // Normalize x,y and z to -1.0f-1.0f |
||
1108 | vResult = _mm_mul_ps(vResult,UDHenN3Mul); |
||
1109 | return vResult; |
||
1110 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
1111 | #endif // _XM_VMX128_INTRINSICS_ |
||
1112 | } |
||
1113 | |||
1114 | //------------------------------------------------------------------------------ |
||
1115 | |||
1116 | XMFINLINE XMVECTOR XMLoadUDHen3 |
||
1117 | ( |
||
1118 | CONST XMUDHEN3* pSource |
||
1119 | ) |
||
1120 | { |
||
1121 | #if defined(_XM_NO_INTRINSICS_) |
||
1122 | |||
1123 | XMVECTOR V; |
||
1124 | UINT Element; |
||
1125 | |||
1126 | XMASSERT(pSource); |
||
1127 | |||
1128 | Element = pSource->v & 0x3FF; |
||
1129 | V.vector4_f32[0] = (FLOAT)Element; |
||
1130 | Element = (pSource->v >> 10) & 0x7FF; |
||
1131 | V.vector4_f32[1] = (FLOAT)Element; |
||
1132 | Element = (pSource->v >> 21) & 0x7FF; |
||
1133 | V.vector4_f32[2] = (FLOAT)Element; |
||
1134 | |||
1135 | return V; |
||
1136 | |||
1137 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1138 | XMASSERT(pSource); |
||
1139 | // Get the 32 bit value and splat it |
||
1140 | XMVECTOR vResult = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->v)); |
||
1141 | // Mask off x, y and z |
||
1142 | vResult = _mm_and_ps(vResult,g_XMMaskDHen3); |
||
1143 | // Convert x and y to unsigned |
||
1144 | vResult = _mm_xor_ps(vResult,g_XMFlipZ); |
||
1145 | // Convert to float |
||
1146 | vResult = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vResult)[0]); |
||
1147 | // Convert x and y back to signed |
||
1148 | vResult = _mm_add_ps(vResult,g_XMAddUHenD3); |
||
1149 | // Normalize x to 0-1023.0f and y and z to 0-2047.0f |
||
1150 | vResult = _mm_mul_ps(vResult,g_XMMulDHen3); |
||
1151 | return vResult; |
||
1152 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
1153 | #endif // _XM_VMX128_INTRINSICS_ |
||
1154 | } |
||
1155 | |||
1156 | //------------------------------------------------------------------------------ |
||
1157 | |||
1158 | XMFINLINE XMVECTOR XMLoadDHenN3 |
||
1159 | ( |
||
1160 | CONST XMDHENN3* pSource |
||
1161 | ) |
||
1162 | { |
||
1163 | #if defined(_XM_NO_INTRINSICS_) |
||
1164 | |||
1165 | XMVECTOR V; |
||
1166 | UINT Element; |
||
1167 | static CONST UINT SignExtendX[] = {0x00000000, 0xFFFFFC00}; |
||
1168 | static CONST UINT SignExtendYZ[] = {0x00000000, 0xFFFFF800}; |
||
1169 | |||
1170 | XMASSERT(pSource); |
||
1171 | XMASSERT((pSource->v & 0x3FF) != 0x200); |
||
1172 | XMASSERT(((pSource->v >> 10) & 0x7FF) != 0x400); |
||
1173 | XMASSERT(((pSource->v >> 21) & 0x7FF) != 0x400); |
||
1174 | |||
1175 | Element = pSource->v & 0x3FF; |
||
1176 | V.vector4_f32[0] = (FLOAT)(SHORT)(Element | SignExtendX[Element >> 9]) / 511.0f; |
||
1177 | Element = (pSource->v >> 10) & 0x7FF; |
||
1178 | V.vector4_f32[1] = (FLOAT)(SHORT)(Element | SignExtendYZ[Element >> 10]) / 1023.0f; |
||
1179 | Element = (pSource->v >> 21) & 0x7FF; |
||
1180 | V.vector4_f32[2] = (FLOAT)(SHORT)(Element | SignExtendYZ[Element >> 10]) / 1023.0f; |
||
1181 | |||
1182 | return V; |
||
1183 | |||
1184 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1185 | static const XMVECTORF32 DHenN3Mul = {1.0f/511.0f,1.0f/(1023.0f*1024.0f),1.0f/(1023.0f*1024.0f*2048.0f),0}; |
||
1186 | XMASSERT(pSource); |
||
1187 | XMASSERT((pSource->v & 0x3FF) != 0x200); |
||
1188 | XMASSERT(((pSource->v >> 10) & 0x7FF) != 0x400); |
||
1189 | XMASSERT(((pSource->v >> 21) & 0x7FF) != 0x400); |
||
1190 | // Get the 32 bit value and splat it |
||
1191 | XMVECTOR vResult = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->v)); |
||
1192 | // Mask off x, y and z |
||
1193 | vResult = _mm_and_ps(vResult,g_XMMaskDHen3); |
||
1194 | // Convert x and y to unsigned |
||
1195 | vResult = _mm_xor_ps(vResult,g_XMXorDHen3); |
||
1196 | // Convert to float |
||
1197 | vResult = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vResult)[0]); |
||
1198 | // Convert x and y back to signed |
||
1199 | vResult = _mm_add_ps(vResult,g_XMAddDHen3); |
||
1200 | // Normalize x,y and z to -1.0f-1.0f |
||
1201 | vResult = _mm_mul_ps(vResult,DHenN3Mul); |
||
1202 | return vResult; |
||
1203 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
1204 | #endif // _XM_VMX128_INTRINSICS_ |
||
1205 | } |
||
1206 | |||
1207 | //------------------------------------------------------------------------------ |
||
1208 | |||
1209 | XMFINLINE XMVECTOR XMLoadDHen3 |
||
1210 | ( |
||
1211 | CONST XMDHEN3* pSource |
||
1212 | ) |
||
1213 | { |
||
1214 | #if defined(_XM_NO_INTRINSICS_) |
||
1215 | |||
1216 | XMVECTOR V; |
||
1217 | UINT Element; |
||
1218 | static CONST UINT SignExtendX[] = {0x00000000, 0xFFFFFC00}; |
||
1219 | static CONST UINT SignExtendYZ[] = {0x00000000, 0xFFFFF800}; |
||
1220 | |||
1221 | XMASSERT(pSource); |
||
1222 | XMASSERT((pSource->v & 0x3FF) != 0x200); |
||
1223 | XMASSERT(((pSource->v >> 10) & 0x7FF) != 0x400); |
||
1224 | XMASSERT(((pSource->v >> 21) & 0x7FF) != 0x400); |
||
1225 | |||
1226 | Element = pSource->v & 0x3FF; |
||
1227 | V.vector4_f32[0] = (FLOAT)(SHORT)(Element | SignExtendX[Element >> 9]); |
||
1228 | Element = (pSource->v >> 10) & 0x7FF; |
||
1229 | V.vector4_f32[1] = (FLOAT)(SHORT)(Element | SignExtendYZ[Element >> 10]); |
||
1230 | Element = (pSource->v >> 21) & 0x7FF; |
||
1231 | V.vector4_f32[2] = (FLOAT)(SHORT)(Element | SignExtendYZ[Element >> 10]); |
||
1232 | |||
1233 | return V; |
||
1234 | |||
1235 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1236 | XMASSERT(pSource); |
||
1237 | XMASSERT((pSource->v & 0x3FF) != 0x200); |
||
1238 | XMASSERT(((pSource->v >> 10) & 0x7FF) != 0x400); |
||
1239 | XMASSERT(((pSource->v >> 21) & 0x7FF) != 0x400); |
||
1240 | // Get the 32 bit value and splat it |
||
1241 | XMVECTOR vResult = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->v)); |
||
1242 | // Mask off x, y and z |
||
1243 | vResult = _mm_and_ps(vResult,g_XMMaskDHen3); |
||
1244 | // Convert x and y to unsigned |
||
1245 | vResult = _mm_xor_ps(vResult,g_XMXorDHen3); |
||
1246 | // Convert to float |
||
1247 | vResult = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vResult)[0]); |
||
1248 | // Convert x and y back to signed |
||
1249 | vResult = _mm_add_ps(vResult,g_XMAddDHen3); |
||
1250 | // Normalize x to -210-511.0f and y and z to -1024-1023.0f |
||
1251 | vResult = _mm_mul_ps(vResult,g_XMMulDHen3); |
||
1252 | return vResult; |
||
1253 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
1254 | #endif // _XM_VMX128_INTRINSICS_ |
||
1255 | } |
||
1256 | |||
1257 | //------------------------------------------------------------------------------ |
||
1258 | |||
1259 | XMFINLINE XMVECTOR XMLoadU565 |
||
1260 | ( |
||
1261 | CONST XMU565* pSource |
||
1262 | ) |
||
1263 | { |
||
1264 | #if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) |
||
1265 | static const XMVECTORI32 U565And = {0x1F,0x3F<<5,0x1F<<11,0}; |
||
1266 | static const XMVECTORF32 U565Mul = {1.0f,1.0f/32.0f,1.0f/2048.f,0}; |
||
1267 | XMASSERT(pSource); |
||
1268 | // Get the 32 bit value and splat it |
||
1269 | XMVECTOR vResult = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->v)); |
||
1270 | // Mask off x, y and z |
||
1271 | vResult = _mm_and_ps(vResult,U565And); |
||
1272 | // Convert to float |
||
1273 | vResult = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vResult)[0]); |
||
1274 | // Normalize x, y, and z |
||
1275 | vResult = _mm_mul_ps(vResult,U565Mul); |
||
1276 | return vResult; |
||
1277 | #else |
||
1278 | XMVECTOR V; |
||
1279 | UINT Element; |
||
1280 | |||
1281 | XMASSERT(pSource); |
||
1282 | |||
1283 | Element = pSource->v & 0x1F; |
||
1284 | V.vector4_f32[0] = (FLOAT)Element; |
||
1285 | Element = (pSource->v >> 5) & 0x3F; |
||
1286 | V.vector4_f32[1] = (FLOAT)Element; |
||
1287 | Element = (pSource->v >> 11) & 0x1F; |
||
1288 | V.vector4_f32[2] = (FLOAT)Element; |
||
1289 | |||
1290 | return V; |
||
1291 | #endif // !_XM_SSE_INTRINSICS_ |
||
1292 | } |
||
1293 | |||
1294 | //------------------------------------------------------------------------------ |
||
1295 | |||
1296 | XMFINLINE XMVECTOR XMLoadFloat3PK |
||
1297 | ( |
||
1298 | CONST XMFLOAT3PK* pSource |
||
1299 | ) |
||
1300 | { |
||
1301 | UINT Mantissa; |
||
1302 | UINT Exponent; |
||
1303 | UINT Result[3]; |
||
1304 | |||
1305 | XMASSERT(pSource); |
||
1306 | |||
1307 | // X Channel (6-bit mantissa) |
||
1308 | Mantissa = pSource->xm; |
||
1309 | |||
1310 | if ( pSource->xe == 0x1f ) // INF or NAN |
||
1311 | { |
||
1312 | Result[0] = 0x7f800000 | (pSource->xm << 17); |
||
1313 | } |
||
1314 | else |
||
1315 | { |
||
1316 | if ( pSource->xe != 0 ) // The value is normalized |
||
1317 | { |
||
1318 | Exponent = pSource->xe; |
||
1319 | } |
||
1320 | else if (Mantissa != 0) // The value is denormalized |
||
1321 | { |
||
1322 | // Normalize the value in the resulting float |
||
1323 | Exponent = 1; |
||
1324 | |||
1325 | do |
||
1326 | { |
||
1327 | Exponent--; |
||
1328 | Mantissa <<= 1; |
||
1329 | } while ((Mantissa & 0x40) == 0); |
||
1330 | |||
1331 | Mantissa &= 0x3F; |
||
1332 | } |
||
1333 | else // The value is zero |
||
1334 | { |
||
1335 | Exponent = (UINT)-112; |
||
1336 | } |
||
1337 | |||
1338 | Result[0] = ((Exponent + 112) << 23) | (Mantissa << 17); |
||
1339 | } |
||
1340 | |||
1341 | // Y Channel (6-bit mantissa) |
||
1342 | Mantissa = pSource->ym; |
||
1343 | |||
1344 | if ( pSource->ye == 0x1f ) // INF or NAN |
||
1345 | { |
||
1346 | Result[1] = 0x7f800000 | (pSource->ym << 17); |
||
1347 | } |
||
1348 | else |
||
1349 | { |
||
1350 | if ( pSource->ye != 0 ) // The value is normalized |
||
1351 | { |
||
1352 | Exponent = pSource->ye; |
||
1353 | } |
||
1354 | else if (Mantissa != 0) // The value is denormalized |
||
1355 | { |
||
1356 | // Normalize the value in the resulting float |
||
1357 | Exponent = 1; |
||
1358 | |||
1359 | do |
||
1360 | { |
||
1361 | Exponent--; |
||
1362 | Mantissa <<= 1; |
||
1363 | } while ((Mantissa & 0x40) == 0); |
||
1364 | |||
1365 | Mantissa &= 0x3F; |
||
1366 | } |
||
1367 | else // The value is zero |
||
1368 | { |
||
1369 | Exponent = (UINT)-112; |
||
1370 | } |
||
1371 | |||
1372 | Result[1] = ((Exponent + 112) << 23) | (Mantissa << 17); |
||
1373 | } |
||
1374 | |||
1375 | // Z Channel (5-bit mantissa) |
||
1376 | Mantissa = pSource->zm; |
||
1377 | |||
1378 | if ( pSource->ze == 0x1f ) // INF or NAN |
||
1379 | { |
||
1380 | Result[2] = 0x7f800000 | (pSource->zm << 17); |
||
1381 | } |
||
1382 | else |
||
1383 | { |
||
1384 | if ( pSource->ze != 0 ) // The value is normalized |
||
1385 | { |
||
1386 | Exponent = pSource->ze; |
||
1387 | } |
||
1388 | else if (Mantissa != 0) // The value is denormalized |
||
1389 | { |
||
1390 | // Normalize the value in the resulting float |
||
1391 | Exponent = 1; |
||
1392 | |||
1393 | do |
||
1394 | { |
||
1395 | Exponent--; |
||
1396 | Mantissa <<= 1; |
||
1397 | } while ((Mantissa & 0x20) == 0); |
||
1398 | |||
1399 | Mantissa &= 0x1F; |
||
1400 | } |
||
1401 | else // The value is zero |
||
1402 | { |
||
1403 | Exponent = (UINT)-112; |
||
1404 | } |
||
1405 | |||
1406 | Result[2] = ((Exponent + 112) << 23) | (Mantissa << 18); |
||
1407 | } |
||
1408 | |||
1409 | return XMLoadFloat3( (XMFLOAT3*)&Result ); |
||
1410 | } |
||
1411 | |||
1412 | //------------------------------------------------------------------------------ |
||
1413 | |||
1414 | XMFINLINE XMVECTOR XMLoadFloat3SE |
||
1415 | ( |
||
1416 | CONST XMFLOAT3SE* pSource |
||
1417 | ) |
||
1418 | { |
||
1419 | UINT Mantissa; |
||
1420 | UINT Exponent, ExpBits; |
||
1421 | UINT Result[3]; |
||
1422 | |||
1423 | XMASSERT(pSource); |
||
1424 | |||
1425 | if ( pSource->e == 0x1f ) // INF or NAN |
||
1426 | { |
||
1427 | Result[0] = 0x7f800000 | (pSource->xm << 14); |
||
1428 | Result[1] = 0x7f800000 | (pSource->ym << 14); |
||
1429 | Result[2] = 0x7f800000 | (pSource->zm << 14); |
||
1430 | } |
||
1431 | else if ( pSource->e != 0 ) // The values are all normalized |
||
1432 | { |
||
1433 | Exponent = pSource->e; |
||
1434 | |||
1435 | ExpBits = (Exponent + 112) << 23; |
||
1436 | |||
1437 | Mantissa = pSource->xm; |
||
1438 | Result[0] = ExpBits | (Mantissa << 14); |
||
1439 | |||
1440 | Mantissa = pSource->ym; |
||
1441 | Result[1] = ExpBits | (Mantissa << 14); |
||
1442 | |||
1443 | Mantissa = pSource->zm; |
||
1444 | Result[2] = ExpBits | (Mantissa << 14); |
||
1445 | } |
||
1446 | else |
||
1447 | { |
||
1448 | // X Channel |
||
1449 | Mantissa = pSource->xm; |
||
1450 | |||
1451 | if (Mantissa != 0) // The value is denormalized |
||
1452 | { |
||
1453 | // Normalize the value in the resulting float |
||
1454 | Exponent = 1; |
||
1455 | |||
1456 | do |
||
1457 | { |
||
1458 | Exponent--; |
||
1459 | Mantissa <<= 1; |
||
1460 | } while ((Mantissa & 0x200) == 0); |
||
1461 | |||
1462 | Mantissa &= 0x1FF; |
||
1463 | } |
||
1464 | else // The value is zero |
||
1465 | { |
||
1466 | Exponent = (UINT)-112; |
||
1467 | } |
||
1468 | |||
1469 | Result[0] = ((Exponent + 112) << 23) | (Mantissa << 14); |
||
1470 | |||
1471 | // Y Channel |
||
1472 | Mantissa = pSource->ym; |
||
1473 | |||
1474 | if (Mantissa != 0) // The value is denormalized |
||
1475 | { |
||
1476 | // Normalize the value in the resulting float |
||
1477 | Exponent = 1; |
||
1478 | |||
1479 | do |
||
1480 | { |
||
1481 | Exponent--; |
||
1482 | Mantissa <<= 1; |
||
1483 | } while ((Mantissa & 0x200) == 0); |
||
1484 | |||
1485 | Mantissa &= 0x1FF; |
||
1486 | } |
||
1487 | else // The value is zero |
||
1488 | { |
||
1489 | Exponent = (UINT)-112; |
||
1490 | } |
||
1491 | |||
1492 | Result[1] = ((Exponent + 112) << 23) | (Mantissa << 14); |
||
1493 | |||
1494 | // Z Channel |
||
1495 | Mantissa = pSource->zm; |
||
1496 | |||
1497 | if (Mantissa != 0) // The value is denormalized |
||
1498 | { |
||
1499 | // Normalize the value in the resulting float |
||
1500 | Exponent = 1; |
||
1501 | |||
1502 | do |
||
1503 | { |
||
1504 | Exponent--; |
||
1505 | Mantissa <<= 1; |
||
1506 | } while ((Mantissa & 0x200) == 0); |
||
1507 | |||
1508 | Mantissa &= 0x1FF; |
||
1509 | } |
||
1510 | else // The value is zero |
||
1511 | { |
||
1512 | Exponent = (UINT)-112; |
||
1513 | } |
||
1514 | |||
1515 | Result[2] = ((Exponent + 112) << 23) | (Mantissa << 14); |
||
1516 | } |
||
1517 | |||
1518 | return XMLoadFloat3( (XMFLOAT3*)&Result ); |
||
1519 | } |
||
1520 | |||
1521 | //------------------------------------------------------------------------------ |
||
1522 | |||
1523 | XMFINLINE XMVECTOR XMLoadInt4 |
||
1524 | ( |
||
1525 | CONST UINT* pSource |
||
1526 | ) |
||
1527 | { |
||
1528 | #if defined(_XM_NO_INTRINSICS_) |
||
1529 | |||
1530 | XMVECTOR V; |
||
1531 | |||
1532 | XMASSERT(pSource); |
||
1533 | |||
1534 | V.vector4_u32[0] = pSource[0]; |
||
1535 | V.vector4_u32[1] = pSource[1]; |
||
1536 | V.vector4_u32[2] = pSource[2]; |
||
1537 | V.vector4_u32[3] = pSource[3]; |
||
1538 | |||
1539 | return V; |
||
1540 | |||
1541 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1542 | |||
1543 | XMASSERT(pSource); |
||
1544 | __m128i V = _mm_loadu_si128( (const __m128i*)pSource ); |
||
1545 | return reinterpret_cast<__m128 *>(&V)[0]; |
||
1546 | |||
1547 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
1548 | #endif // _XM_VMX128_INTRINSICS_ |
||
1549 | } |
||
1550 | |||
1551 | //------------------------------------------------------------------------------ |
||
1552 | |||
1553 | XMFINLINE XMVECTOR XMLoadInt4A |
||
1554 | ( |
||
1555 | CONST UINT* pSource |
||
1556 | ) |
||
1557 | { |
||
1558 | #if defined(_XM_NO_INTRINSICS_) |
||
1559 | |||
1560 | XMVECTOR V; |
||
1561 | |||
1562 | XMASSERT(pSource); |
||
1563 | XMASSERT(((UINT_PTR)pSource & 0xF) == 0); |
||
1564 | |||
1565 | V.vector4_u32[0] = pSource[0]; |
||
1566 | V.vector4_u32[1] = pSource[1]; |
||
1567 | V.vector4_u32[2] = pSource[2]; |
||
1568 | V.vector4_u32[3] = pSource[3]; |
||
1569 | |||
1570 | return V; |
||
1571 | |||
1572 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1573 | |||
1574 | XMASSERT(pSource); |
||
1575 | XMASSERT(((UINT_PTR)pSource & 0xF) == 0); |
||
1576 | |||
1577 | __m128i V = _mm_load_si128( (const __m128i*)pSource ); |
||
1578 | return reinterpret_cast<__m128 *>(&V)[0]; |
||
1579 | |||
1580 | |||
1581 | #else // _XM_VMX128_INTRINSICS_ |
||
1582 | #endif // _XM_VMX128_INTRINSICS_ |
||
1583 | } |
||
1584 | |||
1585 | //------------------------------------------------------------------------------ |
||
1586 | |||
1587 | XMFINLINE XMVECTOR XMLoadFloat4 |
||
1588 | ( |
||
1589 | CONST XMFLOAT4* pSource |
||
1590 | ) |
||
1591 | { |
||
1592 | #if defined(_XM_NO_INTRINSICS_) |
||
1593 | XMVECTOR V; |
||
1594 | XMASSERT(pSource); |
||
1595 | ((UINT *)(&V.vector4_f32[0]))[0] = ((const UINT *)(&pSource->x))[0]; |
||
1596 | ((UINT *)(&V.vector4_f32[1]))[0] = ((const UINT *)(&pSource->y))[0]; |
||
1597 | ((UINT *)(&V.vector4_f32[2]))[0] = ((const UINT *)(&pSource->z))[0]; |
||
1598 | ((UINT *)(&V.vector4_f32[3]))[0] = ((const UINT *)(&pSource->w))[0]; |
||
1599 | return V; |
||
1600 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1601 | XMASSERT(pSource); |
||
1602 | return _mm_loadu_ps( &pSource->x ); |
||
1603 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
1604 | #endif // _XM_VMX128_INTRINSICS_ |
||
1605 | } |
||
1606 | |||
1607 | //------------------------------------------------------------------------------ |
||
1608 | |||
1609 | XMFINLINE XMVECTOR XMLoadFloat4A |
||
1610 | ( |
||
1611 | CONST XMFLOAT4A* pSource |
||
1612 | ) |
||
1613 | { |
||
1614 | #if defined(_XM_NO_INTRINSICS_) |
||
1615 | |||
1616 | XMVECTOR V; |
||
1617 | |||
1618 | XMASSERT(pSource); |
||
1619 | XMASSERT(((UINT_PTR)pSource & 0xF) == 0); |
||
1620 | |||
1621 | V.vector4_f32[0] = pSource->x; |
||
1622 | V.vector4_f32[1] = pSource->y; |
||
1623 | V.vector4_f32[2] = pSource->z; |
||
1624 | V.vector4_f32[3] = pSource->w; |
||
1625 | |||
1626 | return V; |
||
1627 | |||
1628 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1629 | |||
1630 | XMASSERT(pSource); |
||
1631 | XMASSERT(((UINT_PTR)pSource & 0xF) == 0); |
||
1632 | |||
1633 | return _mm_load_ps( &pSource->x ); |
||
1634 | |||
1635 | #else // _XM_VMX128_INTRINSICS_ |
||
1636 | #endif // _XM_VMX128_INTRINSICS_ |
||
1637 | } |
||
1638 | |||
1639 | //------------------------------------------------------------------------------ |
||
1640 | |||
1641 | XMFINLINE XMVECTOR XMLoadHalf4 |
||
1642 | ( |
||
1643 | CONST XMHALF4* pSource |
||
1644 | ) |
||
1645 | { |
||
1646 | #if defined(_XM_NO_INTRINSICS_) |
||
1647 | XMASSERT(pSource); |
||
1648 | { |
||
1649 | XMVECTOR vResult = { |
||
1650 | XMConvertHalfToFloat(pSource->x), |
||
1651 | XMConvertHalfToFloat(pSource->y), |
||
1652 | XMConvertHalfToFloat(pSource->z), |
||
1653 | XMConvertHalfToFloat(pSource->w) |
||
1654 | }; |
||
1655 | return vResult; |
||
1656 | } |
||
1657 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1658 | XMASSERT(pSource); |
||
1659 | XMVECTOR vResult = { |
||
1660 | XMConvertHalfToFloat(pSource->x), |
||
1661 | XMConvertHalfToFloat(pSource->y), |
||
1662 | XMConvertHalfToFloat(pSource->z), |
||
1663 | XMConvertHalfToFloat(pSource->w) |
||
1664 | }; |
||
1665 | return vResult; |
||
1666 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
1667 | #endif // _XM_VMX128_INTRINSICS_ |
||
1668 | } |
||
1669 | |||
1670 | //------------------------------------------------------------------------------ |
||
1671 | |||
1672 | XMFINLINE XMVECTOR XMLoadShortN4 |
||
1673 | ( |
||
1674 | CONST XMSHORTN4* pSource |
||
1675 | ) |
||
1676 | { |
||
1677 | #if defined(_XM_NO_INTRINSICS_) |
||
1678 | XMASSERT(pSource); |
||
1679 | XMASSERT(pSource->x != -32768); |
||
1680 | XMASSERT(pSource->y != -32768); |
||
1681 | XMASSERT(pSource->z != -32768); |
||
1682 | XMASSERT(pSource->w != -32768); |
||
1683 | { |
||
1684 | XMVECTOR vResult = { |
||
1685 | (FLOAT)pSource->x * (1.0f/32767.0f), |
||
1686 | (FLOAT)pSource->y * (1.0f/32767.0f), |
||
1687 | (FLOAT)pSource->z * (1.0f/32767.0f), |
||
1688 | (FLOAT)pSource->w * (1.0f/32767.0f) |
||
1689 | }; |
||
1690 | return vResult; |
||
1691 | } |
||
1692 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1693 | XMASSERT(pSource); |
||
1694 | XMASSERT(pSource->x != -32768); |
||
1695 | XMASSERT(pSource->y != -32768); |
||
1696 | XMASSERT(pSource->z != -32768); |
||
1697 | XMASSERT(pSource->w != -32768); |
||
1698 | // Splat the color in all four entries (x,z,y,w) |
||
1699 | __m128d vIntd = _mm_load1_pd(reinterpret_cast<const double *>(&pSource->x)); |
||
1700 | // Shift x&0ffff,z&0xffff,y&0xffff0000,w&0xffff0000 |
||
1701 | __m128 vTemp = _mm_and_ps(reinterpret_cast<const __m128 *>(&vIntd)[0],g_XMMaskX16Y16Z16W16); |
||
1702 | // x and z are unsigned! Flip the bits to convert the order to signed |
||
1703 | vTemp = _mm_xor_ps(vTemp,g_XMFlipX16Y16Z16W16); |
||
1704 | // Convert to floating point numbers |
||
1705 | vTemp = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vTemp)[0]); |
||
1706 | // x and z - 0x8000 to complete the conversion |
||
1707 | vTemp = _mm_add_ps(vTemp,g_XMFixX16Y16Z16W16); |
||
1708 | // Convert -32767-32767 to -1.0f-1.0f |
||
1709 | vTemp = _mm_mul_ps(vTemp,g_XMNormalizeX16Y16Z16W16); |
||
1710 | // Very important! The entries are x,z,y,w, flip it to x,y,z,w |
||
1711 | vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(3,1,2,0)); |
||
1712 | return vTemp; |
||
1713 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
1714 | #endif // _XM_VMX128_INTRINSICS_ |
||
1715 | } |
||
1716 | |||
1717 | //------------------------------------------------------------------------------ |
||
1718 | |||
1719 | XMFINLINE XMVECTOR XMLoadShort4 |
||
1720 | ( |
||
1721 | CONST XMSHORT4* pSource |
||
1722 | ) |
||
1723 | { |
||
1724 | #if defined(_XM_NO_INTRINSICS_) |
||
1725 | |||
1726 | XMVECTOR V; |
||
1727 | |||
1728 | XMASSERT(pSource); |
||
1729 | XMASSERT(pSource->x != -32768); |
||
1730 | XMASSERT(pSource->y != -32768); |
||
1731 | XMASSERT(pSource->z != -32768); |
||
1732 | XMASSERT(pSource->w != -32768); |
||
1733 | |||
1734 | V.vector4_f32[0] = (FLOAT)pSource->x; |
||
1735 | V.vector4_f32[1] = (FLOAT)pSource->y; |
||
1736 | V.vector4_f32[2] = (FLOAT)pSource->z; |
||
1737 | V.vector4_f32[3] = (FLOAT)pSource->w; |
||
1738 | |||
1739 | return V; |
||
1740 | |||
1741 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1742 | XMASSERT(pSource); |
||
1743 | XMASSERT(pSource->x != -32768); |
||
1744 | XMASSERT(pSource->y != -32768); |
||
1745 | XMASSERT(pSource->z != -32768); |
||
1746 | XMASSERT(pSource->w != -32768); |
||
1747 | // Splat the color in all four entries (x,z,y,w) |
||
1748 | __m128d vIntd = _mm_load1_pd(reinterpret_cast<const double *>(&pSource->x)); |
||
1749 | // Shift x&0ffff,z&0xffff,y&0xffff0000,w&0xffff0000 |
||
1750 | __m128 vTemp = _mm_and_ps(reinterpret_cast<const __m128 *>(&vIntd)[0],g_XMMaskX16Y16Z16W16); |
||
1751 | // x and z are unsigned! Flip the bits to convert the order to signed |
||
1752 | vTemp = _mm_xor_ps(vTemp,g_XMFlipX16Y16Z16W16); |
||
1753 | // Convert to floating point numbers |
||
1754 | vTemp = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vTemp)[0]); |
||
1755 | // x and z - 0x8000 to complete the conversion |
||
1756 | vTemp = _mm_add_ps(vTemp,g_XMFixX16Y16Z16W16); |
||
1757 | // Fix y and w because they are 65536 too large |
||
1758 | vTemp = _mm_mul_ps(vTemp,g_XMFixupY16W16); |
||
1759 | // Very important! The entries are x,z,y,w, flip it to x,y,z,w |
||
1760 | return _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(3,1,2,0)); |
||
1761 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
1762 | #endif // _XM_VMX128_INTRINSICS_ |
||
1763 | } |
||
1764 | |||
1765 | //------------------------------------------------------------------------------ |
||
1766 | |||
1767 | XMFINLINE XMVECTOR XMLoadUShortN4 |
||
1768 | ( |
||
1769 | CONST XMUSHORTN4* pSource |
||
1770 | ) |
||
1771 | { |
||
1772 | #if defined(_XM_NO_INTRINSICS_) |
||
1773 | |||
1774 | XMVECTOR V; |
||
1775 | |||
1776 | XMASSERT(pSource); |
||
1777 | |||
1778 | V.vector4_f32[0] = (FLOAT)pSource->x / 65535.0f; |
||
1779 | V.vector4_f32[1] = (FLOAT)pSource->y / 65535.0f; |
||
1780 | V.vector4_f32[2] = (FLOAT)pSource->z / 65535.0f; |
||
1781 | V.vector4_f32[3] = (FLOAT)pSource->w / 65535.0f; |
||
1782 | |||
1783 | return V; |
||
1784 | |||
1785 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1786 | XMASSERT(pSource); |
||
1787 | static const XMVECTORF32 FixupY16W16 = {1.0f/65535.0f,1.0f/65535.0f,1.0f/(65535.0f*65536.0f),1.0f/(65535.0f*65536.0f)}; |
||
1788 | static const XMVECTORF32 FixaddY16W16 = {0,0,32768.0f*65536.0f,32768.0f*65536.0f}; |
||
1789 | XMASSERT(pSource); |
||
1790 | // Splat the color in all four entries (x,z,y,w) |
||
1791 | __m128d vIntd = _mm_load1_pd(reinterpret_cast<const double *>(&pSource->x)); |
||
1792 | // Shift x&0ffff,z&0xffff,y&0xffff0000,w&0xffff0000 |
||
1793 | __m128 vTemp = _mm_and_ps(reinterpret_cast<const __m128 *>(&vIntd)[0],g_XMMaskX16Y16Z16W16); |
||
1794 | // y and w are signed! Flip the bits to convert the order to unsigned |
||
1795 | vTemp = _mm_xor_ps(vTemp,g_XMFlipZW); |
||
1796 | // Convert to floating point numbers |
||
1797 | vTemp = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vTemp)[0]); |
||
1798 | // y and w + 0x8000 to complete the conversion |
||
1799 | vTemp = _mm_add_ps(vTemp,FixaddY16W16); |
||
1800 | // Fix y and w because they are 65536 too large |
||
1801 | vTemp = _mm_mul_ps(vTemp,FixupY16W16); |
||
1802 | // Very important! The entries are x,z,y,w, flip it to x,y,z,w |
||
1803 | return _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(3,1,2,0)); |
||
1804 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
1805 | #endif // _XM_VMX128_INTRINSICS_ |
||
1806 | } |
||
1807 | |||
1808 | //------------------------------------------------------------------------------ |
||
1809 | |||
1810 | XMFINLINE XMVECTOR XMLoadUShort4 |
||
1811 | ( |
||
1812 | CONST XMUSHORT4* pSource |
||
1813 | ) |
||
1814 | { |
||
1815 | #if defined(_XM_NO_INTRINSICS_) |
||
1816 | |||
1817 | XMVECTOR V; |
||
1818 | |||
1819 | XMASSERT(pSource); |
||
1820 | |||
1821 | V.vector4_f32[0] = (FLOAT)pSource->x; |
||
1822 | V.vector4_f32[1] = (FLOAT)pSource->y; |
||
1823 | V.vector4_f32[2] = (FLOAT)pSource->z; |
||
1824 | V.vector4_f32[3] = (FLOAT)pSource->w; |
||
1825 | |||
1826 | return V; |
||
1827 | |||
1828 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1829 | XMASSERT(pSource); |
||
1830 | static const XMVECTORF32 FixaddY16W16 = {0,0,32768.0f,32768.0f}; |
||
1831 | XMASSERT(pSource); |
||
1832 | // Splat the color in all four entries (x,z,y,w) |
||
1833 | __m128d vIntd = _mm_load1_pd(reinterpret_cast<const double *>(&pSource->x)); |
||
1834 | // Shift x&0ffff,z&0xffff,y&0xffff0000,w&0xffff0000 |
||
1835 | __m128 vTemp = _mm_and_ps(reinterpret_cast<const __m128 *>(&vIntd)[0],g_XMMaskX16Y16Z16W16); |
||
1836 | // y and w are signed! Flip the bits to convert the order to unsigned |
||
1837 | vTemp = _mm_xor_ps(vTemp,g_XMFlipZW); |
||
1838 | // Convert to floating point numbers |
||
1839 | vTemp = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vTemp)[0]); |
||
1840 | // Fix y and w because they are 65536 too large |
||
1841 | vTemp = _mm_mul_ps(vTemp,g_XMFixupY16W16); |
||
1842 | // y and w + 0x8000 to complete the conversion |
||
1843 | vTemp = _mm_add_ps(vTemp,FixaddY16W16); |
||
1844 | // Very important! The entries are x,z,y,w, flip it to x,y,z,w |
||
1845 | return _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(3,1,2,0)); |
||
1846 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
1847 | #endif // _XM_VMX128_INTRINSICS_ |
||
1848 | } |
||
1849 | |||
1850 | //------------------------------------------------------------------------------ |
||
1851 | |||
1852 | XMFINLINE XMVECTOR XMLoadXIcoN4 |
||
1853 | ( |
||
1854 | CONST XMXICON4* pSource |
||
1855 | ) |
||
1856 | { |
||
1857 | #if defined(_XM_NO_INTRINSICS_) |
||
1858 | |||
1859 | XMVECTOR V; |
||
1860 | UINT Element; |
||
1861 | static CONST UINT SignExtend[] = {0x00000000, 0xFFF00000}; |
||
1862 | |||
1863 | XMASSERT(pSource); |
||
1864 | XMASSERT((pSource->v & 0xFFFFFull) != 0x80000ull); |
||
1865 | XMASSERT(((pSource->v >> 20) & 0xFFFFFull) != 0x80000ull); |
||
1866 | XMASSERT(((pSource->v >> 40) & 0xFFFFFull) != 0x80000ull); |
||
1867 | |||
1868 | Element = (UINT)pSource->v & 0xFFFFF; |
||
1869 | V.vector4_f32[0] = (FLOAT)(INT)(Element | SignExtend[Element >> 19]) / 524287.0f; |
||
1870 | Element = (UINT)(pSource->v >> 20) & 0xFFFFF; |
||
1871 | V.vector4_f32[1] = (FLOAT)(INT)(Element | SignExtend[Element >> 19]) / 524287.0f; |
||
1872 | Element = (UINT)(pSource->v >> 40) & 0xFFFFF; |
||
1873 | V.vector4_f32[2] = (FLOAT)(INT)(Element | SignExtend[Element >> 19]) / 524287.0f; |
||
1874 | V.vector4_f32[3] = (FLOAT)(pSource->v >> 60) / 15.0f; |
||
1875 | |||
1876 | return V; |
||
1877 | |||
1878 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1879 | XMASSERT((pSource->v & 0xFFFFFull) != 0x80000ull); |
||
1880 | XMASSERT(((pSource->v >> 20) & 0xFFFFFull) != 0x80000ull); |
||
1881 | XMASSERT(((pSource->v >> 40) & 0xFFFFFull) != 0x80000ull); |
||
1882 | static const XMVECTORF32 LoadXIcoN4Mul = {1.0f/524287.0f,1.0f/(524287.0f*4096.0f),1.0f/524287.0f,1.0f/(15.0f*4096.0f*65536.0f)}; |
||
1883 | XMASSERT(pSource); |
||
1884 | // Grab the 64 bit structure |
||
1885 | __m128d vResultd = _mm_load_sd(reinterpret_cast<const double *>(&pSource->v)); |
||
1886 | // By shifting down 8 bits, y and z are in seperate 32 bit elements |
||
1887 | __m128i vResulti = _mm_srli_si128(reinterpret_cast<const __m128i *>(&vResultd)[0],8/8); |
||
1888 | // vResultd has x and w, vResulti has y and z, merge into one as x,w,y,z |
||
1889 | XMVECTOR vTemp = _mm_shuffle_ps(reinterpret_cast<const __m128 *>(&vResultd)[0],reinterpret_cast<const __m128 *>(&vResulti)[0],_MM_SHUFFLE(1,0,1,0)); |
||
1890 | // Fix the entries to x,y,z,w |
||
1891 | vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,3,2,0)); |
||
1892 | // Mask x,y,z and w |
||
1893 | vTemp = _mm_and_ps(vTemp,g_XMMaskIco4); |
||
1894 | // x and z are unsigned! Flip the bits to convert the order to signed |
||
1895 | vTemp = _mm_xor_ps(vTemp,g_XMXorXIco4); |
||
1896 | // Convert to floating point numbers |
||
1897 | vTemp = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vTemp)[0]); |
||
1898 | // x and z - 0x80 to complete the conversion |
||
1899 | vTemp = _mm_add_ps(vTemp,g_XMAddXIco4); |
||
1900 | // Fix y and w because they are too large |
||
1901 | vTemp = _mm_mul_ps(vTemp,LoadXIcoN4Mul); |
||
1902 | return vTemp; |
||
1903 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
1904 | #endif // _XM_VMX128_INTRINSICS_ |
||
1905 | } |
||
1906 | |||
1907 | //------------------------------------------------------------------------------ |
||
1908 | |||
1909 | XMFINLINE XMVECTOR XMLoadXIco4 |
||
1910 | ( |
||
1911 | CONST XMXICO4* pSource |
||
1912 | ) |
||
1913 | { |
||
1914 | #if defined(_XM_NO_INTRINSICS_) |
||
1915 | |||
1916 | XMVECTOR V; |
||
1917 | UINT Element; |
||
1918 | static CONST UINT SignExtend[] = {0x00000000, 0xFFF00000}; |
||
1919 | |||
1920 | XMASSERT(pSource); |
||
1921 | XMASSERT((pSource->v & 0xFFFFFull) != 0x80000ull); |
||
1922 | XMASSERT(((pSource->v >> 20) & 0xFFFFFull) != 0x80000ull); |
||
1923 | XMASSERT(((pSource->v >> 40) & 0xFFFFFull) != 0x80000ull); |
||
1924 | |||
1925 | Element = (UINT)pSource->v & 0xFFFFF; |
||
1926 | V.vector4_f32[0] = (FLOAT)(INT)(Element | SignExtend[Element >> 19]); |
||
1927 | Element = (UINT)(pSource->v >> 20) & 0xFFFFF; |
||
1928 | V.vector4_f32[1] = (FLOAT)(INT)(Element | SignExtend[Element >> 19]); |
||
1929 | Element = (UINT)(pSource->v >> 40) & 0xFFFFF; |
||
1930 | V.vector4_f32[2] = (FLOAT)(INT)(Element | SignExtend[Element >> 19]); |
||
1931 | V.vector4_f32[3] = (FLOAT)(pSource->v >> 60); |
||
1932 | |||
1933 | return V; |
||
1934 | |||
1935 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1936 | XMASSERT((pSource->v & 0xFFFFFull) != 0x80000ull); |
||
1937 | XMASSERT(((pSource->v >> 20) & 0xFFFFFull) != 0x80000ull); |
||
1938 | XMASSERT(((pSource->v >> 40) & 0xFFFFFull) != 0x80000ull); |
||
1939 | XMASSERT(pSource); |
||
1940 | // Grab the 64 bit structure |
||
1941 | __m128d vResultd = _mm_load_sd(reinterpret_cast<const double *>(&pSource->v)); |
||
1942 | // By shifting down 8 bits, y and z are in seperate 32 bit elements |
||
1943 | __m128i vResulti = _mm_srli_si128(reinterpret_cast<const __m128i *>(&vResultd)[0],8/8); |
||
1944 | // vResultd has x and w, vResulti has y and z, merge into one as x,w,y,z |
||
1945 | XMVECTOR vTemp = _mm_shuffle_ps(reinterpret_cast<const __m128 *>(&vResultd)[0],reinterpret_cast<const __m128 *>(&vResulti)[0],_MM_SHUFFLE(1,0,1,0)); |
||
1946 | // Fix the entries to x,y,z,w |
||
1947 | vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,3,2,0)); |
||
1948 | // Mask x,y,z and w |
||
1949 | vTemp = _mm_and_ps(vTemp,g_XMMaskIco4); |
||
1950 | // x and z are unsigned! Flip the bits to convert the order to signed |
||
1951 | vTemp = _mm_xor_ps(vTemp,g_XMXorXIco4); |
||
1952 | // Convert to floating point numbers |
||
1953 | vTemp = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vTemp)[0]); |
||
1954 | // x and z - 0x80 to complete the conversion |
||
1955 | vTemp = _mm_add_ps(vTemp,g_XMAddXIco4); |
||
1956 | // Fix y and w because they are too large |
||
1957 | vTemp = _mm_mul_ps(vTemp,g_XMMulIco4); |
||
1958 | return vTemp; |
||
1959 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
1960 | #endif // _XM_VMX128_INTRINSICS_ |
||
1961 | } |
||
1962 | |||
1963 | //------------------------------------------------------------------------------ |
||
1964 | |||
1965 | XMFINLINE XMVECTOR XMLoadUIcoN4 |
||
1966 | ( |
||
1967 | CONST XMUICON4* pSource |
||
1968 | ) |
||
1969 | { |
||
1970 | #if defined(_XM_NO_INTRINSICS_) |
||
1971 | |||
1972 | XMVECTOR V; |
||
1973 | |||
1974 | XMASSERT(pSource); |
||
1975 | |||
1976 | V.vector4_f32[0] = (FLOAT)(pSource->v & 0xFFFFF) / 1048575.0f; |
||
1977 | V.vector4_f32[1] = (FLOAT)((pSource->v >> 20) & 0xFFFFF) / 1048575.0f; |
||
1978 | V.vector4_f32[2] = (FLOAT)((pSource->v >> 40) & 0xFFFFF) / 1048575.0f; |
||
1979 | V.vector4_f32[3] = (FLOAT)(pSource->v >> 60) / 15.0f; |
||
1980 | |||
1981 | return V; |
||
1982 | |||
1983 | #elif defined(_XM_SSE_INTRINSICS_) |
||
1984 | static const XMVECTORF32 LoadUIcoN4Mul = {1.0f/1048575.0f,1.0f/(1048575.0f*4096.0f),1.0f/1048575.0f,1.0f/(15.0f*4096.0f*65536.0f)}; |
||
1985 | XMASSERT(pSource); |
||
1986 | // Grab the 64 bit structure |
||
1987 | __m128d vResultd = _mm_load_sd(reinterpret_cast<const double *>(&pSource->v)); |
||
1988 | // By shifting down 8 bits, y and z are in seperate 32 bit elements |
||
1989 | __m128i vResulti = _mm_srli_si128(reinterpret_cast<const __m128i *>(&vResultd)[0],8/8); |
||
1990 | // vResultd has x and w, vResulti has y and z, merge into one as x,w,y,z |
||
1991 | XMVECTOR vTemp = _mm_shuffle_ps(reinterpret_cast<const __m128 *>(&vResultd)[0],reinterpret_cast<const __m128 *>(&vResulti)[0],_MM_SHUFFLE(1,0,1,0)); |
||
1992 | // Fix the entries to x,y,z,w |
||
1993 | vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,3,2,0)); |
||
1994 | // Mask x,y,z and w |
||
1995 | vTemp = _mm_and_ps(vTemp,g_XMMaskIco4); |
||
1996 | // x and z are unsigned! Flip the bits to convert the order to signed |
||
1997 | vTemp = _mm_xor_ps(vTemp,g_XMFlipYW); |
||
1998 | // Convert to floating point numbers |
||
1999 | vTemp = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vTemp)[0]); |
||
2000 | // x and z - 0x80 to complete the conversion |
||
2001 | vTemp = _mm_add_ps(vTemp,g_XMAddUIco4); |
||
2002 | // Fix y and w because they are too large |
||
2003 | vTemp = _mm_mul_ps(vTemp,LoadUIcoN4Mul); |
||
2004 | return vTemp; |
||
2005 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
2006 | #endif // _XM_VMX128_INTRINSICS_ |
||
2007 | } |
||
2008 | |||
2009 | //------------------------------------------------------------------------------ |
||
2010 | |||
2011 | XMFINLINE XMVECTOR XMLoadUIco4 |
||
2012 | ( |
||
2013 | CONST XMUICO4* pSource |
||
2014 | ) |
||
2015 | { |
||
2016 | #if defined(_XM_NO_INTRINSICS_) |
||
2017 | |||
2018 | XMVECTOR V; |
||
2019 | |||
2020 | XMASSERT(pSource); |
||
2021 | |||
2022 | V.vector4_f32[0] = (FLOAT)(pSource->v & 0xFFFFF); |
||
2023 | V.vector4_f32[1] = (FLOAT)((pSource->v >> 20) & 0xFFFFF); |
||
2024 | V.vector4_f32[2] = (FLOAT)((pSource->v >> 40) & 0xFFFFF); |
||
2025 | V.vector4_f32[3] = (FLOAT)(pSource->v >> 60); |
||
2026 | |||
2027 | return V; |
||
2028 | |||
2029 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2030 | XMASSERT(pSource); |
||
2031 | // Grab the 64 bit structure |
||
2032 | __m128d vResultd = _mm_load_sd(reinterpret_cast<const double *>(&pSource->v)); |
||
2033 | // By shifting down 8 bits, y and z are in seperate 32 bit elements |
||
2034 | __m128i vResulti = _mm_srli_si128(reinterpret_cast<const __m128i *>(&vResultd)[0],8/8); |
||
2035 | // vResultd has x and w, vResulti has y and z, merge into one as x,w,y,z |
||
2036 | XMVECTOR vTemp = _mm_shuffle_ps(reinterpret_cast<const __m128 *>(&vResultd)[0],reinterpret_cast<const __m128 *>(&vResulti)[0],_MM_SHUFFLE(1,0,1,0)); |
||
2037 | // Fix the entries to x,y,z,w |
||
2038 | vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,3,2,0)); |
||
2039 | // Mask x,y,z and w |
||
2040 | vTemp = _mm_and_ps(vTemp,g_XMMaskIco4); |
||
2041 | // x and z are unsigned! Flip the bits to convert the order to signed |
||
2042 | vTemp = _mm_xor_ps(vTemp,g_XMFlipYW); |
||
2043 | // Convert to floating point numbers |
||
2044 | vTemp = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vTemp)[0]); |
||
2045 | // x and z - 0x80 to complete the conversion |
||
2046 | vTemp = _mm_add_ps(vTemp,g_XMAddUIco4); |
||
2047 | // Fix y and w because they are too large |
||
2048 | vTemp = _mm_mul_ps(vTemp,g_XMMulIco4); |
||
2049 | return vTemp; |
||
2050 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
2051 | #endif // _XM_VMX128_INTRINSICS_ |
||
2052 | } |
||
2053 | |||
2054 | //------------------------------------------------------------------------------ |
||
2055 | |||
2056 | XMFINLINE XMVECTOR XMLoadIcoN4 |
||
2057 | ( |
||
2058 | CONST XMICON4* pSource |
||
2059 | ) |
||
2060 | { |
||
2061 | #if defined(_XM_NO_INTRINSICS_) |
||
2062 | |||
2063 | XMVECTOR V; |
||
2064 | UINT Element; |
||
2065 | static CONST UINT SignExtend[] = {0x00000000, 0xFFF00000}; |
||
2066 | static CONST UINT SignExtendW[] = {0x00000000, 0xFFFFFFF0}; |
||
2067 | |||
2068 | XMASSERT(pSource); |
||
2069 | |||
2070 | Element = (UINT)pSource->v & 0xFFFFF; |
||
2071 | V.vector4_f32[0] = (FLOAT)(INT)(Element | SignExtend[Element >> 19]) / 524287.0f; |
||
2072 | Element = (UINT)(pSource->v >> 20) & 0xFFFFF; |
||
2073 | V.vector4_f32[1] = (FLOAT)(INT)(Element | SignExtend[Element >> 19]) / 524287.0f; |
||
2074 | Element = (UINT)(pSource->v >> 40) & 0xFFFFF; |
||
2075 | V.vector4_f32[2] = (FLOAT)(INT)(Element | SignExtend[Element >> 19]) / 524287.0f; |
||
2076 | Element = (UINT)(pSource->v >> 60); |
||
2077 | V.vector4_f32[3] = (FLOAT)(INT)(Element | SignExtendW[Element >> 3]) / 7.0f; |
||
2078 | |||
2079 | return V; |
||
2080 | |||
2081 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2082 | static const XMVECTORF32 LoadIcoN4Mul = {1.0f/524287.0f,1.0f/(524287.0f*4096.0f),1.0f/524287.0f,1.0f/(7.0f*4096.0f*65536.0f)}; |
||
2083 | XMASSERT(pSource); |
||
2084 | // Grab the 64 bit structure |
||
2085 | __m128d vResultd = _mm_load_sd(reinterpret_cast<const double *>(&pSource->v)); |
||
2086 | // By shifting down 8 bits, y and z are in seperate 32 bit elements |
||
2087 | __m128i vResulti = _mm_srli_si128(reinterpret_cast<const __m128i *>(&vResultd)[0],8/8); |
||
2088 | // vResultd has x and w, vResulti has y and z, merge into one as x,w,y,z |
||
2089 | XMVECTOR vTemp = _mm_shuffle_ps(reinterpret_cast<const __m128 *>(&vResultd)[0],reinterpret_cast<const __m128 *>(&vResulti)[0],_MM_SHUFFLE(1,0,1,0)); |
||
2090 | // Fix the entries to x,y,z,w |
||
2091 | vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,3,2,0)); |
||
2092 | // Mask x,y,z and w |
||
2093 | vTemp = _mm_and_ps(vTemp,g_XMMaskIco4); |
||
2094 | // x and z are unsigned! Flip the bits to convert the order to signed |
||
2095 | vTemp = _mm_xor_ps(vTemp,g_XMXorIco4); |
||
2096 | // Convert to floating point numbers |
||
2097 | vTemp = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vTemp)[0]); |
||
2098 | // x and z - 0x80 to complete the conversion |
||
2099 | vTemp = _mm_add_ps(vTemp,g_XMAddIco4); |
||
2100 | // Fix y and w because they are too large |
||
2101 | vTemp = _mm_mul_ps(vTemp,LoadIcoN4Mul); |
||
2102 | return vTemp; |
||
2103 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
2104 | #endif // _XM_VMX128_INTRINSICS_ |
||
2105 | } |
||
2106 | |||
2107 | //------------------------------------------------------------------------------ |
||
2108 | |||
2109 | XMFINLINE XMVECTOR XMLoadIco4 |
||
2110 | ( |
||
2111 | CONST XMICO4* pSource |
||
2112 | ) |
||
2113 | { |
||
2114 | #if defined(_XM_NO_INTRINSICS_) |
||
2115 | |||
2116 | XMVECTOR V; |
||
2117 | UINT Element; |
||
2118 | static CONST UINT SignExtend[] = {0x00000000, 0xFFF00000}; |
||
2119 | static CONST UINT SignExtendW[] = {0x00000000, 0xFFFFFFF0}; |
||
2120 | |||
2121 | XMASSERT(pSource); |
||
2122 | |||
2123 | Element = (UINT)pSource->v & 0xFFFFF; |
||
2124 | V.vector4_f32[0] = (FLOAT)(INT)(Element | SignExtend[Element >> 19]); |
||
2125 | Element = (UINT)(pSource->v >> 20) & 0xFFFFF; |
||
2126 | V.vector4_f32[1] = (FLOAT)(INT)(Element | SignExtend[Element >> 19]); |
||
2127 | Element = (UINT)(pSource->v >> 40) & 0xFFFFF; |
||
2128 | V.vector4_f32[2] = (FLOAT)(INT)(Element | SignExtend[Element >> 19]); |
||
2129 | Element = (UINT)(pSource->v >> 60); |
||
2130 | V.vector4_f32[3] = (FLOAT)(INT)(Element | SignExtendW[Element >> 3]); |
||
2131 | |||
2132 | return V; |
||
2133 | |||
2134 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2135 | XMASSERT(pSource); |
||
2136 | // Grab the 64 bit structure |
||
2137 | __m128d vResultd = _mm_load_sd(reinterpret_cast<const double *>(&pSource->v)); |
||
2138 | // By shifting down 8 bits, y and z are in seperate 32 bit elements |
||
2139 | __m128i vResulti = _mm_srli_si128(reinterpret_cast<const __m128i *>(&vResultd)[0],8/8); |
||
2140 | // vResultd has x and w, vResulti has y and z, merge into one as x,w,y,z |
||
2141 | XMVECTOR vTemp = _mm_shuffle_ps(reinterpret_cast<const __m128 *>(&vResultd)[0],reinterpret_cast<const __m128 *>(&vResulti)[0],_MM_SHUFFLE(1,0,1,0)); |
||
2142 | // Fix the entries to x,y,z,w |
||
2143 | vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,3,2,0)); |
||
2144 | // Mask x,y,z and w |
||
2145 | vTemp = _mm_and_ps(vTemp,g_XMMaskIco4); |
||
2146 | // x and z are unsigned! Flip the bits to convert the order to signed |
||
2147 | vTemp = _mm_xor_ps(vTemp,g_XMXorIco4); |
||
2148 | // Convert to floating point numbers |
||
2149 | vTemp = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vTemp)[0]); |
||
2150 | // x and z - 0x80 to complete the conversion |
||
2151 | vTemp = _mm_add_ps(vTemp,g_XMAddIco4); |
||
2152 | // Fix y and w because they are too large |
||
2153 | vTemp = _mm_mul_ps(vTemp,g_XMMulIco4); |
||
2154 | return vTemp; |
||
2155 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
2156 | #endif // _XM_VMX128_INTRINSICS_ |
||
2157 | } |
||
2158 | |||
2159 | |||
2160 | //------------------------------------------------------------------------------ |
||
2161 | |||
2162 | XMFINLINE XMVECTOR XMLoadXDecN4 |
||
2163 | ( |
||
2164 | CONST XMXDECN4* pSource |
||
2165 | ) |
||
2166 | { |
||
2167 | #if defined(_XM_NO_INTRINSICS_) |
||
2168 | XMVECTOR V; |
||
2169 | UINT Element; |
||
2170 | static CONST UINT SignExtend[] = {0x00000000, 0xFFFFFC00}; |
||
2171 | |||
2172 | XMASSERT(pSource); |
||
2173 | XMASSERT((pSource->v & 0x3FF) != 0x200); |
||
2174 | XMASSERT(((pSource->v >> 10) & 0x3FF) != 0x200); |
||
2175 | XMASSERT(((pSource->v >> 20) & 0x3FF) != 0x200); |
||
2176 | |||
2177 | Element = pSource->v & 0x3FF; |
||
2178 | V.vector4_f32[0] = (FLOAT)(SHORT)(Element | SignExtend[Element >> 9]) / 511.0f; |
||
2179 | Element = (pSource->v >> 10) & 0x3FF; |
||
2180 | V.vector4_f32[1] = (FLOAT)(SHORT)(Element | SignExtend[Element >> 9]) / 511.0f; |
||
2181 | Element = (pSource->v >> 20) & 0x3FF; |
||
2182 | V.vector4_f32[2] = (FLOAT)(SHORT)(Element | SignExtend[Element >> 9]) / 511.0f; |
||
2183 | V.vector4_f32[3] = (FLOAT)(pSource->v >> 30) / 3.0f; |
||
2184 | |||
2185 | return V; |
||
2186 | |||
2187 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2188 | XMASSERT(pSource); |
||
2189 | // Splat the color in all four entries |
||
2190 | __m128 vTemp = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->v)); |
||
2191 | // Shift R&0xFF0000, G&0xFF00, B&0xFF, A&0xFF000000 |
||
2192 | vTemp = _mm_and_ps(vTemp,g_XMMaskA2B10G10R10); |
||
2193 | // a is unsigned! Flip the bit to convert the order to signed |
||
2194 | vTemp = _mm_xor_ps(vTemp,g_XMFlipA2B10G10R10); |
||
2195 | // Convert to floating point numbers |
||
2196 | vTemp = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vTemp)[0]); |
||
2197 | // RGB + 0, A + 0x80000000.f to undo the signed order. |
||
2198 | vTemp = _mm_add_ps(vTemp,g_XMFixAA2B10G10R10); |
||
2199 | // Convert 0-255 to 0.0f-1.0f |
||
2200 | return _mm_mul_ps(vTemp,g_XMNormalizeA2B10G10R10); |
||
2201 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
2202 | #endif // _XM_VMX128_INTRINSICS_ |
||
2203 | } |
||
2204 | |||
2205 | //------------------------------------------------------------------------------ |
||
2206 | |||
2207 | XMFINLINE XMVECTOR XMLoadXDec4 |
||
2208 | ( |
||
2209 | CONST XMXDEC4* pSource |
||
2210 | ) |
||
2211 | { |
||
2212 | #if defined(_XM_NO_INTRINSICS_) |
||
2213 | |||
2214 | XMVECTOR V; |
||
2215 | UINT Element; |
||
2216 | static CONST UINT SignExtend[] = {0x00000000, 0xFFFFFC00}; |
||
2217 | |||
2218 | XMASSERT(pSource); |
||
2219 | XMASSERT((pSource->v & 0x3FF) != 0x200); |
||
2220 | XMASSERT(((pSource->v >> 10) & 0x3FF) != 0x200); |
||
2221 | XMASSERT(((pSource->v >> 20) & 0x3FF) != 0x200); |
||
2222 | |||
2223 | Element = pSource->v & 0x3FF; |
||
2224 | V.vector4_f32[0] = (FLOAT)(SHORT)(Element | SignExtend[Element >> 9]); |
||
2225 | Element = (pSource->v >> 10) & 0x3FF; |
||
2226 | V.vector4_f32[1] = (FLOAT)(SHORT)(Element | SignExtend[Element >> 9]); |
||
2227 | Element = (pSource->v >> 20) & 0x3FF; |
||
2228 | V.vector4_f32[2] = (FLOAT)(SHORT)(Element | SignExtend[Element >> 9]); |
||
2229 | V.vector4_f32[3] = (FLOAT)(pSource->v >> 30); |
||
2230 | |||
2231 | return V; |
||
2232 | |||
2233 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2234 | XMASSERT((pSource->v & 0x3FF) != 0x200); |
||
2235 | XMASSERT(((pSource->v >> 10) & 0x3FF) != 0x200); |
||
2236 | XMASSERT(((pSource->v >> 20) & 0x3FF) != 0x200); |
||
2237 | static const XMVECTORI32 XDec4Xor = {0x200, 0x200<<10, 0x200<<20, 0x80000000}; |
||
2238 | static const XMVECTORF32 XDec4Add = {-512.0f,-512.0f*1024.0f,-512.0f*1024.0f*1024.0f,32768*65536.0f}; |
||
2239 | XMASSERT(pSource); |
||
2240 | // Splat the color in all four entries |
||
2241 | XMVECTOR vTemp = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->v)); |
||
2242 | // Shift R&0xFF0000, G&0xFF00, B&0xFF, A&0xFF000000 |
||
2243 | vTemp = _mm_and_ps(vTemp,g_XMMaskDec4); |
||
2244 | // a is unsigned! Flip the bit to convert the order to signed |
||
2245 | vTemp = _mm_xor_ps(vTemp,XDec4Xor); |
||
2246 | // Convert to floating point numbers |
||
2247 | vTemp = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vTemp)[0]); |
||
2248 | // RGB + 0, A + 0x80000000.f to undo the signed order. |
||
2249 | vTemp = _mm_add_ps(vTemp,XDec4Add); |
||
2250 | // Convert 0-255 to 0.0f-1.0f |
||
2251 | vTemp = _mm_mul_ps(vTemp,g_XMMulDec4); |
||
2252 | return vTemp; |
||
2253 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
2254 | #endif // _XM_VMX128_INTRINSICS_ |
||
2255 | } |
||
2256 | |||
2257 | //------------------------------------------------------------------------------ |
||
2258 | |||
2259 | XMFINLINE XMVECTOR XMLoadUDecN4 |
||
2260 | ( |
||
2261 | CONST XMUDECN4* pSource |
||
2262 | ) |
||
2263 | { |
||
2264 | #if defined(_XM_NO_INTRINSICS_) |
||
2265 | |||
2266 | XMVECTOR V; |
||
2267 | UINT Element; |
||
2268 | |||
2269 | XMASSERT(pSource); |
||
2270 | |||
2271 | Element = pSource->v & 0x3FF; |
||
2272 | V.vector4_f32[0] = (FLOAT)Element / 1023.0f; |
||
2273 | Element = (pSource->v >> 10) & 0x3FF; |
||
2274 | V.vector4_f32[1] = (FLOAT)Element / 1023.0f; |
||
2275 | Element = (pSource->v >> 20) & 0x3FF; |
||
2276 | V.vector4_f32[2] = (FLOAT)Element / 1023.0f; |
||
2277 | V.vector4_f32[3] = (FLOAT)(pSource->v >> 30) / 3.0f; |
||
2278 | |||
2279 | return V; |
||
2280 | |||
2281 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2282 | XMASSERT(pSource); |
||
2283 | static const XMVECTORF32 UDecN4Mul = {1.0f/1023.0f,1.0f/(1023.0f*1024.0f),1.0f/(1023.0f*1024.0f*1024.0f),1.0f/(3.0f*1024.0f*1024.0f*1024.0f)}; |
||
2284 | // Splat the color in all four entries |
||
2285 | XMVECTOR vTemp = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->v)); |
||
2286 | // Shift R&0xFF0000, G&0xFF00, B&0xFF, A&0xFF000000 |
||
2287 | vTemp = _mm_and_ps(vTemp,g_XMMaskDec4); |
||
2288 | // a is unsigned! Flip the bit to convert the order to signed |
||
2289 | vTemp = _mm_xor_ps(vTemp,g_XMFlipW); |
||
2290 | // Convert to floating point numbers |
||
2291 | vTemp = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vTemp)[0]); |
||
2292 | // RGB + 0, A + 0x80000000.f to undo the signed order. |
||
2293 | vTemp = _mm_add_ps(vTemp,g_XMAddUDec4); |
||
2294 | // Convert 0-255 to 0.0f-1.0f |
||
2295 | vTemp = _mm_mul_ps(vTemp,UDecN4Mul); |
||
2296 | return vTemp; |
||
2297 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
2298 | #endif // _XM_VMX128_INTRINSICS_ |
||
2299 | } |
||
2300 | |||
2301 | //------------------------------------------------------------------------------ |
||
2302 | |||
2303 | XMFINLINE XMVECTOR XMLoadUDec4 |
||
2304 | ( |
||
2305 | CONST XMUDEC4* pSource |
||
2306 | ) |
||
2307 | { |
||
2308 | #if defined(_XM_NO_INTRINSICS_) |
||
2309 | |||
2310 | XMVECTOR V; |
||
2311 | UINT Element; |
||
2312 | |||
2313 | XMASSERT(pSource); |
||
2314 | |||
2315 | Element = pSource->v & 0x3FF; |
||
2316 | V.vector4_f32[0] = (FLOAT)Element; |
||
2317 | Element = (pSource->v >> 10) & 0x3FF; |
||
2318 | V.vector4_f32[1] = (FLOAT)Element; |
||
2319 | Element = (pSource->v >> 20) & 0x3FF; |
||
2320 | V.vector4_f32[2] = (FLOAT)Element; |
||
2321 | V.vector4_f32[3] = (FLOAT)(pSource->v >> 30); |
||
2322 | |||
2323 | return V; |
||
2324 | |||
2325 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2326 | XMASSERT(pSource); |
||
2327 | // Splat the color in all four entries |
||
2328 | XMVECTOR vTemp = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->v)); |
||
2329 | // Shift R&0xFF0000, G&0xFF00, B&0xFF, A&0xFF000000 |
||
2330 | vTemp = _mm_and_ps(vTemp,g_XMMaskDec4); |
||
2331 | // a is unsigned! Flip the bit to convert the order to signed |
||
2332 | vTemp = _mm_xor_ps(vTemp,g_XMFlipW); |
||
2333 | // Convert to floating point numbers |
||
2334 | vTemp = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vTemp)[0]); |
||
2335 | // RGB + 0, A + 0x80000000.f to undo the signed order. |
||
2336 | vTemp = _mm_add_ps(vTemp,g_XMAddUDec4); |
||
2337 | // Convert 0-255 to 0.0f-1.0f |
||
2338 | vTemp = _mm_mul_ps(vTemp,g_XMMulDec4); |
||
2339 | return vTemp; |
||
2340 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
2341 | #endif // _XM_VMX128_INTRINSICS_ |
||
2342 | } |
||
2343 | |||
2344 | //------------------------------------------------------------------------------ |
||
2345 | |||
2346 | XMFINLINE XMVECTOR XMLoadDecN4 |
||
2347 | ( |
||
2348 | CONST XMDECN4* pSource |
||
2349 | ) |
||
2350 | { |
||
2351 | #if defined(_XM_NO_INTRINSICS_) |
||
2352 | |||
2353 | XMVECTOR V; |
||
2354 | UINT Element; |
||
2355 | static CONST UINT SignExtend[] = {0x00000000, 0xFFFFFC00}; |
||
2356 | static CONST UINT SignExtendW[] = {0x00000000, 0xFFFFFFFC}; |
||
2357 | |||
2358 | XMASSERT(pSource); |
||
2359 | XMASSERT((pSource->v & 0x3FF) != 0x200); |
||
2360 | XMASSERT(((pSource->v >> 10) & 0x3FF) != 0x200); |
||
2361 | XMASSERT(((pSource->v >> 20) & 0x3FF) != 0x200); |
||
2362 | XMASSERT(((pSource->v >> 30) & 0x3) != 0x2); |
||
2363 | |||
2364 | Element = pSource->v & 0x3FF; |
||
2365 | V.vector4_f32[0] = (FLOAT)(SHORT)(Element | SignExtend[Element >> 9]) / 511.0f; |
||
2366 | Element = (pSource->v >> 10) & 0x3FF; |
||
2367 | V.vector4_f32[1] = (FLOAT)(SHORT)(Element | SignExtend[Element >> 9]) / 511.0f; |
||
2368 | Element = (pSource->v >> 20) & 0x3FF; |
||
2369 | V.vector4_f32[2] = (FLOAT)(SHORT)(Element | SignExtend[Element >> 9]) / 511.0f; |
||
2370 | Element = pSource->v >> 30; |
||
2371 | V.vector4_f32[3] = (FLOAT)(SHORT)(Element | SignExtendW[Element >> 1]); |
||
2372 | |||
2373 | return V; |
||
2374 | |||
2375 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2376 | XMASSERT(pSource); |
||
2377 | XMASSERT((pSource->v & 0x3FF) != 0x200); |
||
2378 | XMASSERT(((pSource->v >> 10) & 0x3FF) != 0x200); |
||
2379 | XMASSERT(((pSource->v >> 20) & 0x3FF) != 0x200); |
||
2380 | XMASSERT(((pSource->v >> 30) & 0x3) != 0x2); |
||
2381 | static const XMVECTORF32 DecN4Mul = {1.0f/511.0f,1.0f/(511.0f*1024.0f),1.0f/(511.0f*1024.0f*1024.0f),1.0f/(1024.0f*1024.0f*1024.0f)}; |
||
2382 | // Splat the color in all four entries |
||
2383 | XMVECTOR vTemp = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->v)); |
||
2384 | // Shift R&0xFF0000, G&0xFF00, B&0xFF, A&0xFF000000 |
||
2385 | vTemp = _mm_and_ps(vTemp,g_XMMaskDec4); |
||
2386 | // a is unsigned! Flip the bit to convert the order to signed |
||
2387 | vTemp = _mm_xor_ps(vTemp,g_XMXorDec4); |
||
2388 | // Convert to floating point numbers |
||
2389 | vTemp = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vTemp)[0]); |
||
2390 | // RGB + 0, A + 0x80000000.f to undo the signed order. |
||
2391 | vTemp = _mm_add_ps(vTemp,g_XMAddDec4); |
||
2392 | // Convert 0-255 to 0.0f-1.0f |
||
2393 | vTemp = _mm_mul_ps(vTemp,DecN4Mul); |
||
2394 | return vTemp; |
||
2395 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
2396 | #endif // _XM_VMX128_INTRINSICS_ |
||
2397 | } |
||
2398 | |||
2399 | //------------------------------------------------------------------------------ |
||
2400 | |||
2401 | XMFINLINE XMVECTOR XMLoadDec4 |
||
2402 | ( |
||
2403 | CONST XMDEC4* pSource |
||
2404 | ) |
||
2405 | { |
||
2406 | #if defined(_XM_NO_INTRINSICS_) |
||
2407 | |||
2408 | XMVECTOR V; |
||
2409 | UINT Element; |
||
2410 | static CONST UINT SignExtend[] = {0x00000000, 0xFFFFFC00}; |
||
2411 | static CONST UINT SignExtendW[] = {0x00000000, 0xFFFFFFFC}; |
||
2412 | |||
2413 | XMASSERT(pSource); |
||
2414 | XMASSERT((pSource->v & 0x3FF) != 0x200); |
||
2415 | XMASSERT(((pSource->v >> 10) & 0x3FF) != 0x200); |
||
2416 | XMASSERT(((pSource->v >> 20) & 0x3FF) != 0x200); |
||
2417 | XMASSERT(((pSource->v >> 30) & 0x3) != 0x2); |
||
2418 | |||
2419 | Element = pSource->v & 0x3FF; |
||
2420 | V.vector4_f32[0] = (FLOAT)(SHORT)(Element | SignExtend[Element >> 9]); |
||
2421 | Element = (pSource->v >> 10) & 0x3FF; |
||
2422 | V.vector4_f32[1] = (FLOAT)(SHORT)(Element | SignExtend[Element >> 9]); |
||
2423 | Element = (pSource->v >> 20) & 0x3FF; |
||
2424 | V.vector4_f32[2] = (FLOAT)(SHORT)(Element | SignExtend[Element >> 9]); |
||
2425 | Element = pSource->v >> 30; |
||
2426 | V.vector4_f32[3] = (FLOAT)(SHORT)(Element | SignExtendW[Element >> 1]); |
||
2427 | |||
2428 | return V; |
||
2429 | |||
2430 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2431 | XMASSERT((pSource->v & 0x3FF) != 0x200); |
||
2432 | XMASSERT(((pSource->v >> 10) & 0x3FF) != 0x200); |
||
2433 | XMASSERT(((pSource->v >> 20) & 0x3FF) != 0x200); |
||
2434 | XMASSERT(((pSource->v >> 30) & 0x3) != 0x2); |
||
2435 | XMASSERT(pSource); |
||
2436 | // Splat the color in all four entries |
||
2437 | XMVECTOR vTemp = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->v)); |
||
2438 | // Shift R&0xFF0000, G&0xFF00, B&0xFF, A&0xFF000000 |
||
2439 | vTemp = _mm_and_ps(vTemp,g_XMMaskDec4); |
||
2440 | // a is unsigned! Flip the bit to convert the order to signed |
||
2441 | vTemp = _mm_xor_ps(vTemp,g_XMXorDec4); |
||
2442 | // Convert to floating point numbers |
||
2443 | vTemp = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vTemp)[0]); |
||
2444 | // RGB + 0, A + 0x80000000.f to undo the signed order. |
||
2445 | vTemp = _mm_add_ps(vTemp,g_XMAddDec4); |
||
2446 | // Convert 0-255 to 0.0f-1.0f |
||
2447 | vTemp = _mm_mul_ps(vTemp,g_XMMulDec4); |
||
2448 | return vTemp; |
||
2449 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
2450 | #endif // _XM_VMX128_INTRINSICS_ |
||
2451 | } |
||
2452 | |||
2453 | //------------------------------------------------------------------------------ |
||
2454 | |||
2455 | XMFINLINE XMVECTOR XMLoadUByteN4 |
||
2456 | ( |
||
2457 | CONST XMUBYTEN4* pSource |
||
2458 | ) |
||
2459 | { |
||
2460 | #if defined(_XM_NO_INTRINSICS_) |
||
2461 | |||
2462 | XMVECTOR V; |
||
2463 | |||
2464 | XMASSERT(pSource); |
||
2465 | |||
2466 | V.vector4_f32[0] = (FLOAT)pSource->x / 255.0f; |
||
2467 | V.vector4_f32[1] = (FLOAT)pSource->y / 255.0f; |
||
2468 | V.vector4_f32[2] = (FLOAT)pSource->z / 255.0f; |
||
2469 | V.vector4_f32[3] = (FLOAT)pSource->w / 255.0f; |
||
2470 | |||
2471 | return V; |
||
2472 | |||
2473 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2474 | static const XMVECTORF32 LoadUByteN4Mul = {1.0f/255.0f,1.0f/(255.0f*256.0f),1.0f/(255.0f*65536.0f),1.0f/(255.0f*65536.0f*256.0f)}; |
||
2475 | XMASSERT(pSource); |
||
2476 | // Splat the color in all four entries (x,z,y,w) |
||
2477 | XMVECTOR vTemp = _mm_load1_ps(reinterpret_cast<const float *>(&pSource->x)); |
||
2478 | // Mask x&0ff,y&0xff00,z&0xff0000,w&0xff000000 |
||
2479 | vTemp = _mm_and_ps(vTemp,g_XMMaskByte4); |
||
2480 | // w is signed! Flip the bits to convert the order to unsigned |
||
2481 | vTemp = _mm_xor_ps(vTemp,g_XMFlipW); |
||
2482 | // Convert to floating point numbers |
||
2483 | vTemp = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vTemp)[0]); |
||
2484 | // w + 0x80 to complete the conversion |
||
2485 | vTemp = _mm_add_ps(vTemp,g_XMAddUDec4); |
||
2486 | // Fix y, z and w because they are too large |
||
2487 | vTemp = _mm_mul_ps(vTemp,LoadUByteN4Mul); |
||
2488 | return vTemp; |
||
2489 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
2490 | #endif // _XM_VMX128_INTRINSICS_ |
||
2491 | } |
||
2492 | |||
2493 | //------------------------------------------------------------------------------ |
||
2494 | |||
2495 | XMFINLINE XMVECTOR XMLoadUByte4 |
||
2496 | ( |
||
2497 | CONST XMUBYTE4* pSource |
||
2498 | ) |
||
2499 | { |
||
2500 | #if defined(_XM_NO_INTRINSICS_) |
||
2501 | |||
2502 | XMVECTOR V; |
||
2503 | |||
2504 | XMASSERT(pSource); |
||
2505 | |||
2506 | V.vector4_f32[0] = (FLOAT)pSource->x; |
||
2507 | V.vector4_f32[1] = (FLOAT)pSource->y; |
||
2508 | V.vector4_f32[2] = (FLOAT)pSource->z; |
||
2509 | V.vector4_f32[3] = (FLOAT)pSource->w; |
||
2510 | |||
2511 | return V; |
||
2512 | |||
2513 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2514 | static const XMVECTORF32 LoadUByte4Mul = {1.0f,1.0f/256.0f,1.0f/65536.0f,1.0f/(65536.0f*256.0f)}; |
||
2515 | XMASSERT(pSource); |
||
2516 | // Splat the color in all four entries (x,z,y,w) |
||
2517 | XMVECTOR vTemp = _mm_load1_ps(reinterpret_cast<const float *>(&pSource->x)); |
||
2518 | // Mask x&0ff,y&0xff00,z&0xff0000,w&0xff000000 |
||
2519 | vTemp = _mm_and_ps(vTemp,g_XMMaskByte4); |
||
2520 | // w is signed! Flip the bits to convert the order to unsigned |
||
2521 | vTemp = _mm_xor_ps(vTemp,g_XMFlipW); |
||
2522 | // Convert to floating point numbers |
||
2523 | vTemp = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vTemp)[0]); |
||
2524 | // w + 0x80 to complete the conversion |
||
2525 | vTemp = _mm_add_ps(vTemp,g_XMAddUDec4); |
||
2526 | // Fix y, z and w because they are too large |
||
2527 | vTemp = _mm_mul_ps(vTemp,LoadUByte4Mul); |
||
2528 | return vTemp; |
||
2529 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
2530 | #endif // _XM_VMX128_INTRINSICS_ |
||
2531 | } |
||
2532 | |||
2533 | //------------------------------------------------------------------------------ |
||
2534 | |||
2535 | XMFINLINE XMVECTOR XMLoadByteN4 |
||
2536 | ( |
||
2537 | CONST XMBYTEN4* pSource |
||
2538 | ) |
||
2539 | { |
||
2540 | #if defined(_XM_NO_INTRINSICS_) |
||
2541 | |||
2542 | XMVECTOR V; |
||
2543 | |||
2544 | XMASSERT(pSource); |
||
2545 | XMASSERT(pSource->x != -128); |
||
2546 | XMASSERT(pSource->y != -128); |
||
2547 | XMASSERT(pSource->z != -128); |
||
2548 | XMASSERT(pSource->w != -128); |
||
2549 | |||
2550 | V.vector4_f32[0] = (FLOAT)pSource->x / 127.0f; |
||
2551 | V.vector4_f32[1] = (FLOAT)pSource->y / 127.0f; |
||
2552 | V.vector4_f32[2] = (FLOAT)pSource->z / 127.0f; |
||
2553 | V.vector4_f32[3] = (FLOAT)pSource->w / 127.0f; |
||
2554 | |||
2555 | return V; |
||
2556 | |||
2557 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2558 | static const XMVECTORF32 LoadByteN4Mul = {1.0f/127.0f,1.0f/(127.0f*256.0f),1.0f/(127.0f*65536.0f),1.0f/(127.0f*65536.0f*256.0f)}; |
||
2559 | XMASSERT(pSource); |
||
2560 | XMASSERT(pSource->x != -128); |
||
2561 | XMASSERT(pSource->y != -128); |
||
2562 | XMASSERT(pSource->z != -128); |
||
2563 | XMASSERT(pSource->w != -128); |
||
2564 | // Splat the color in all four entries (x,z,y,w) |
||
2565 | XMVECTOR vTemp = _mm_load1_ps(reinterpret_cast<const float *>(&pSource->x)); |
||
2566 | // Mask x&0ff,y&0xff00,z&0xff0000,w&0xff000000 |
||
2567 | vTemp = _mm_and_ps(vTemp,g_XMMaskByte4); |
||
2568 | // x,y and z are unsigned! Flip the bits to convert the order to signed |
||
2569 | vTemp = _mm_xor_ps(vTemp,g_XMXorByte4); |
||
2570 | // Convert to floating point numbers |
||
2571 | vTemp = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vTemp)[0]); |
||
2572 | // x, y and z - 0x80 to complete the conversion |
||
2573 | vTemp = _mm_add_ps(vTemp,g_XMAddByte4); |
||
2574 | // Fix y, z and w because they are too large |
||
2575 | vTemp = _mm_mul_ps(vTemp,LoadByteN4Mul); |
||
2576 | return vTemp; |
||
2577 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
2578 | #endif // _XM_VMX128_INTRINSICS_ |
||
2579 | } |
||
2580 | |||
2581 | //------------------------------------------------------------------------------ |
||
2582 | |||
2583 | XMFINLINE XMVECTOR XMLoadByte4 |
||
2584 | ( |
||
2585 | CONST XMBYTE4* pSource |
||
2586 | ) |
||
2587 | { |
||
2588 | #if defined(_XM_NO_INTRINSICS_) |
||
2589 | |||
2590 | XMVECTOR V; |
||
2591 | |||
2592 | XMASSERT(pSource); |
||
2593 | XMASSERT(pSource->x != -128); |
||
2594 | XMASSERT(pSource->y != -128); |
||
2595 | XMASSERT(pSource->z != -128); |
||
2596 | XMASSERT(pSource->w != -128); |
||
2597 | |||
2598 | V.vector4_f32[0] = (FLOAT)pSource->x; |
||
2599 | V.vector4_f32[1] = (FLOAT)pSource->y; |
||
2600 | V.vector4_f32[2] = (FLOAT)pSource->z; |
||
2601 | V.vector4_f32[3] = (FLOAT)pSource->w; |
||
2602 | |||
2603 | return V; |
||
2604 | |||
2605 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2606 | static const XMVECTORF32 LoadByte4Mul = {1.0f,1.0f/256.0f,1.0f/65536.0f,1.0f/(65536.0f*256.0f)}; |
||
2607 | XMASSERT(pSource); |
||
2608 | XMASSERT(pSource->x != -128); |
||
2609 | XMASSERT(pSource->y != -128); |
||
2610 | XMASSERT(pSource->z != -128); |
||
2611 | XMASSERT(pSource->w != -128); |
||
2612 | // Splat the color in all four entries (x,z,y,w) |
||
2613 | XMVECTOR vTemp = _mm_load1_ps(reinterpret_cast<const float *>(&pSource->x)); |
||
2614 | // Mask x&0ff,y&0xff00,z&0xff0000,w&0xff000000 |
||
2615 | vTemp = _mm_and_ps(vTemp,g_XMMaskByte4); |
||
2616 | // x,y and z are unsigned! Flip the bits to convert the order to signed |
||
2617 | vTemp = _mm_xor_ps(vTemp,g_XMXorByte4); |
||
2618 | // Convert to floating point numbers |
||
2619 | vTemp = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vTemp)[0]); |
||
2620 | // x, y and z - 0x80 to complete the conversion |
||
2621 | vTemp = _mm_add_ps(vTemp,g_XMAddByte4); |
||
2622 | // Fix y, z and w because they are too large |
||
2623 | vTemp = _mm_mul_ps(vTemp,LoadByte4Mul); |
||
2624 | return vTemp; |
||
2625 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
2626 | #endif // _XM_VMX128_INTRINSICS_ |
||
2627 | } |
||
2628 | |||
2629 | //------------------------------------------------------------------------------ |
||
2630 | |||
2631 | XMFINLINE XMVECTOR XMLoadUNibble4 |
||
2632 | ( |
||
2633 | CONST XMUNIBBLE4* pSource |
||
2634 | ) |
||
2635 | { |
||
2636 | #if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) |
||
2637 | static const XMVECTORI32 UNibble4And = {0xF,0xF0,0xF00,0xF000}; |
||
2638 | static const XMVECTORF32 UNibble4Mul = {1.0f,1.0f/16.f,1.0f/256.f,1.0f/4096.f}; |
||
2639 | XMASSERT(pSource); |
||
2640 | // Get the 32 bit value and splat it |
||
2641 | XMVECTOR vResult = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->v)); |
||
2642 | // Mask off x, y and z |
||
2643 | vResult = _mm_and_ps(vResult,UNibble4And); |
||
2644 | // Convert to float |
||
2645 | vResult = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vResult)[0]); |
||
2646 | // Normalize x, y, and z |
||
2647 | vResult = _mm_mul_ps(vResult,UNibble4Mul); |
||
2648 | return vResult; |
||
2649 | #else |
||
2650 | XMVECTOR V; |
||
2651 | UINT Element; |
||
2652 | |||
2653 | XMASSERT(pSource); |
||
2654 | |||
2655 | Element = pSource->v & 0xF; |
||
2656 | V.vector4_f32[0] = (FLOAT)Element; |
||
2657 | Element = (pSource->v >> 4) & 0xF; |
||
2658 | V.vector4_f32[1] = (FLOAT)Element; |
||
2659 | Element = (pSource->v >> 8) & 0xF; |
||
2660 | V.vector4_f32[2] = (FLOAT)Element; |
||
2661 | Element = (pSource->v >> 12) & 0xF; |
||
2662 | V.vector4_f32[3] = (FLOAT)Element; |
||
2663 | |||
2664 | return V; |
||
2665 | #endif // !_XM_SSE_INTRISICS_ |
||
2666 | } |
||
2667 | |||
2668 | //------------------------------------------------------------------------------ |
||
2669 | |||
2670 | XMFINLINE XMVECTOR XMLoadU555 |
||
2671 | ( |
||
2672 | CONST XMU555* pSource |
||
2673 | ) |
||
2674 | { |
||
2675 | #if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) |
||
2676 | static const XMVECTORI32 U555And = {0x1F,0x1F<<5,0x1F<<10,0x8000}; |
||
2677 | static const XMVECTORF32 U555Mul = {1.0f,1.0f/32.f,1.0f/1024.f,1.0f/32768.f}; |
||
2678 | XMASSERT(pSource); |
||
2679 | // Get the 32 bit value and splat it |
||
2680 | XMVECTOR vResult = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->v)); |
||
2681 | // Mask off x, y and z |
||
2682 | vResult = _mm_and_ps(vResult,U555And); |
||
2683 | // Convert to float |
||
2684 | vResult = _mm_cvtepi32_ps(reinterpret_cast<const __m128i *>(&vResult)[0]); |
||
2685 | // Normalize x, y, and z |
||
2686 | vResult = _mm_mul_ps(vResult,U555Mul); |
||
2687 | return vResult; |
||
2688 | #else |
||
2689 | XMVECTOR V; |
||
2690 | UINT Element; |
||
2691 | |||
2692 | XMASSERT(pSource); |
||
2693 | |||
2694 | Element = pSource->v & 0x1F; |
||
2695 | V.vector4_f32[0] = (FLOAT)Element; |
||
2696 | Element = (pSource->v >> 5) & 0x1F; |
||
2697 | V.vector4_f32[1] = (FLOAT)Element; |
||
2698 | Element = (pSource->v >> 10) & 0x1F; |
||
2699 | V.vector4_f32[2] = (FLOAT)Element; |
||
2700 | Element = (pSource->v >> 15) & 0x1; |
||
2701 | V.vector4_f32[3] = (FLOAT)Element; |
||
2702 | |||
2703 | return V; |
||
2704 | #endif // !_XM_SSE_INTRISICS_ |
||
2705 | } |
||
2706 | |||
2707 | //------------------------------------------------------------------------------ |
||
2708 | |||
2709 | XMFINLINE XMVECTOR XMLoadColor |
||
2710 | ( |
||
2711 | CONST XMCOLOR* pSource |
||
2712 | ) |
||
2713 | { |
||
2714 | #if defined(_XM_NO_INTRINSICS_) |
||
2715 | XMASSERT(pSource); |
||
2716 | { |
||
2717 | // INT -> Float conversions are done in one instruction. |
||
2718 | // UINT -> Float calls a runtime function. Keep in INT |
||
2719 | INT iColor = (INT)(pSource->c); |
||
2720 | XMVECTOR vColor = { |
||
2721 | (FLOAT)((iColor >> 16) & 0xFF) * (1.0f/255.0f), |
||
2722 | (FLOAT)((iColor >> 8) & 0xFF) * (1.0f/255.0f), |
||
2723 | (FLOAT)(iColor & 0xFF) * (1.0f/255.0f), |
||
2724 | (FLOAT)((iColor >> 24) & 0xFF) * (1.0f/255.0f) |
||
2725 | }; |
||
2726 | return vColor; |
||
2727 | } |
||
2728 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2729 | XMASSERT(pSource); |
||
2730 | // Splat the color in all four entries |
||
2731 | __m128i vInt = _mm_set1_epi32(pSource->c); |
||
2732 | // Shift R&0xFF0000, G&0xFF00, B&0xFF, A&0xFF000000 |
||
2733 | vInt = _mm_and_si128(vInt,g_XMMaskA8R8G8B8); |
||
2734 | // a is unsigned! Flip the bit to convert the order to signed |
||
2735 | vInt = _mm_xor_si128(vInt,g_XMFlipA8R8G8B8); |
||
2736 | // Convert to floating point numbers |
||
2737 | XMVECTOR vTemp = _mm_cvtepi32_ps(vInt); |
||
2738 | // RGB + 0, A + 0x80000000.f to undo the signed order. |
||
2739 | vTemp = _mm_add_ps(vTemp,g_XMFixAA8R8G8B8); |
||
2740 | // Convert 0-255 to 0.0f-1.0f |
||
2741 | return _mm_mul_ps(vTemp,g_XMNormalizeA8R8G8B8); |
||
2742 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
2743 | #endif // _XM_VMX128_INTRINSICS_ |
||
2744 | } |
||
2745 | |||
2746 | //------------------------------------------------------------------------------ |
||
2747 | |||
2748 | XMFINLINE XMMATRIX XMLoadFloat3x3 |
||
2749 | ( |
||
2750 | CONST XMFLOAT3X3* pSource |
||
2751 | ) |
||
2752 | { |
||
2753 | #if defined(_XM_NO_INTRINSICS_) |
||
2754 | |||
2755 | XMMATRIX M; |
||
2756 | |||
2757 | XMASSERT(pSource); |
||
2758 | |||
2759 | M.r[0].vector4_f32[0] = pSource->m[0][0]; |
||
2760 | M.r[0].vector4_f32[1] = pSource->m[0][1]; |
||
2761 | M.r[0].vector4_f32[2] = pSource->m[0][2]; |
||
2762 | M.r[0].vector4_f32[3] = 0.0f; |
||
2763 | |||
2764 | M.r[1].vector4_f32[0] = pSource->m[1][0]; |
||
2765 | M.r[1].vector4_f32[1] = pSource->m[1][1]; |
||
2766 | M.r[1].vector4_f32[2] = pSource->m[1][2]; |
||
2767 | M.r[1].vector4_f32[3] = 0.0f; |
||
2768 | |||
2769 | M.r[2].vector4_f32[0] = pSource->m[2][0]; |
||
2770 | M.r[2].vector4_f32[1] = pSource->m[2][1]; |
||
2771 | M.r[2].vector4_f32[2] = pSource->m[2][2]; |
||
2772 | M.r[2].vector4_f32[3] = 0.0f; |
||
2773 | |||
2774 | M.r[3].vector4_f32[0] = 0.0f; |
||
2775 | M.r[3].vector4_f32[1] = 0.0f; |
||
2776 | M.r[3].vector4_f32[2] = 0.0f; |
||
2777 | M.r[3].vector4_f32[3] = 1.0f; |
||
2778 | |||
2779 | return M; |
||
2780 | |||
2781 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2782 | XMMATRIX M; |
||
2783 | XMVECTOR V1, V2, V3, Z, T1, T2, T3, T4, T5; |
||
2784 | |||
2785 | Z = _mm_setzero_ps(); |
||
2786 | |||
2787 | XMASSERT(pSource); |
||
2788 | |||
2789 | V1 = _mm_loadu_ps( &pSource->m[0][0] ); |
||
2790 | V2 = _mm_loadu_ps( &pSource->m[1][1] ); |
||
2791 | V3 = _mm_load_ss( &pSource->m[2][2] ); |
||
2792 | |||
2793 | T1 = _mm_unpackhi_ps( V1, Z ); |
||
2794 | T2 = _mm_unpacklo_ps( V2, Z ); |
||
2795 | T3 = _mm_shuffle_ps( V3, T2, _MM_SHUFFLE( 0, 1, 0, 0 ) ); |
||
2796 | T4 = _mm_movehl_ps( T2, T3 ); |
||
2797 | T5 = _mm_movehl_ps( Z, T1 ); |
||
2798 | |||
2799 | M.r[0] = _mm_movelh_ps( V1, T1 ); |
||
2800 | M.r[1] = _mm_add_ps( T4, T5 ); |
||
2801 | M.r[2] = _mm_shuffle_ps( V2, V3, _MM_SHUFFLE(1, 0, 3, 2) ); |
||
2802 | M.r[3] = g_XMIdentityR3; |
||
2803 | |||
2804 | return M; |
||
2805 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
2806 | #endif // _XM_VMX128_INTRINSICS_ |
||
2807 | } |
||
2808 | |||
2809 | //------------------------------------------------------------------------------ |
||
2810 | |||
2811 | XMFINLINE XMMATRIX XMLoadFloat4x3 |
||
2812 | ( |
||
2813 | CONST XMFLOAT4X3* pSource |
||
2814 | ) |
||
2815 | { |
||
2816 | #if defined(_XM_NO_INTRINSICS_) |
||
2817 | XMMATRIX M; |
||
2818 | XMASSERT(pSource); |
||
2819 | |||
2820 | ((UINT *)(&M.r[0].vector4_f32[0]))[0] = ((const UINT *)(&pSource->m[0][0]))[0]; |
||
2821 | ((UINT *)(&M.r[0].vector4_f32[1]))[0] = ((const UINT *)(&pSource->m[0][1]))[0]; |
||
2822 | ((UINT *)(&M.r[0].vector4_f32[2]))[0] = ((const UINT *)(&pSource->m[0][2]))[0]; |
||
2823 | M.r[0].vector4_f32[3] = 0.0f; |
||
2824 | |||
2825 | ((UINT *)(&M.r[1].vector4_f32[0]))[0] = ((const UINT *)(&pSource->m[1][0]))[0]; |
||
2826 | ((UINT *)(&M.r[1].vector4_f32[1]))[0] = ((const UINT *)(&pSource->m[1][1]))[0]; |
||
2827 | ((UINT *)(&M.r[1].vector4_f32[2]))[0] = ((const UINT *)(&pSource->m[1][2]))[0]; |
||
2828 | M.r[1].vector4_f32[3] = 0.0f; |
||
2829 | |||
2830 | ((UINT *)(&M.r[2].vector4_f32[0]))[0] = ((const UINT *)(&pSource->m[2][0]))[0]; |
||
2831 | ((UINT *)(&M.r[2].vector4_f32[1]))[0] = ((const UINT *)(&pSource->m[2][1]))[0]; |
||
2832 | ((UINT *)(&M.r[2].vector4_f32[2]))[0] = ((const UINT *)(&pSource->m[2][2]))[0]; |
||
2833 | M.r[2].vector4_f32[3] = 0.0f; |
||
2834 | |||
2835 | ((UINT *)(&M.r[3].vector4_f32[0]))[0] = ((const UINT *)(&pSource->m[3][0]))[0]; |
||
2836 | ((UINT *)(&M.r[3].vector4_f32[1]))[0] = ((const UINT *)(&pSource->m[3][1]))[0]; |
||
2837 | ((UINT *)(&M.r[3].vector4_f32[2]))[0] = ((const UINT *)(&pSource->m[3][2]))[0]; |
||
2838 | M.r[3].vector4_f32[3] = 1.0f; |
||
2839 | |||
2840 | return M; |
||
2841 | |||
2842 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2843 | XMASSERT(pSource); |
||
2844 | // Use unaligned load instructions to |
||
2845 | // load the 12 floats |
||
2846 | // vTemp1 = x1,y1,z1,x2 |
||
2847 | XMVECTOR vTemp1 = _mm_loadu_ps(&pSource->m[0][0]); |
||
2848 | // vTemp2 = y2,z2,x3,y3 |
||
2849 | XMVECTOR vTemp2 = _mm_loadu_ps(&pSource->m[1][1]); |
||
2850 | // vTemp4 = z3,x4,y4,z4 |
||
2851 | XMVECTOR vTemp4 = _mm_loadu_ps(&pSource->m[2][2]); |
||
2852 | // vTemp3 = x3,y3,z3,z3 |
||
2853 | XMVECTOR vTemp3 = _mm_shuffle_ps(vTemp2,vTemp4,_MM_SHUFFLE(0,0,3,2)); |
||
2854 | // vTemp2 = y2,z2,x2,x2 |
||
2855 | vTemp2 = _mm_shuffle_ps(vTemp2,vTemp1,_MM_SHUFFLE(3,3,1,0)); |
||
2856 | // vTemp2 = x2,y2,z2,z2 |
||
2857 | vTemp2 = _mm_shuffle_ps(vTemp2,vTemp2,_MM_SHUFFLE(1,1,0,2)); |
||
2858 | // vTemp1 = x1,y1,z1,0 |
||
2859 | vTemp1 = _mm_and_ps(vTemp1,g_XMMask3); |
||
2860 | // vTemp2 = x2,y2,z2,0 |
||
2861 | vTemp2 = _mm_and_ps(vTemp2,g_XMMask3); |
||
2862 | // vTemp3 = x3,y3,z3,0 |
||
2863 | vTemp3 = _mm_and_ps(vTemp3,g_XMMask3); |
||
2864 | // vTemp4i = x4,y4,z4,0 |
||
2865 | __m128i vTemp4i = _mm_srli_si128(reinterpret_cast<const __m128i *>(&vTemp4)[0],32/8); |
||
2866 | // vTemp4i = x4,y4,z4,1.0f |
||
2867 | vTemp4i = _mm_or_si128(vTemp4i,g_XMIdentityR3); |
||
2868 | XMMATRIX M(vTemp1, |
||
2869 | vTemp2, |
||
2870 | vTemp3, |
||
2871 | reinterpret_cast<const __m128 *>(&vTemp4i)[0]); |
||
2872 | return M; |
||
2873 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
2874 | #endif // _XM_VMX128_INTRINSICS_ |
||
2875 | } |
||
2876 | |||
2877 | //------------------------------------------------------------------------------ |
||
2878 | |||
2879 | XMFINLINE XMMATRIX XMLoadFloat4x3A |
||
2880 | ( |
||
2881 | CONST XMFLOAT4X3A* pSource |
||
2882 | ) |
||
2883 | { |
||
2884 | #if defined(_XM_NO_INTRINSICS_) |
||
2885 | |||
2886 | XMMATRIX M; |
||
2887 | |||
2888 | XMASSERT(pSource); |
||
2889 | XMASSERT(((UINT_PTR)pSource & 0xF) == 0); |
||
2890 | |||
2891 | M.r[0].vector4_f32[0] = pSource->m[0][0]; |
||
2892 | M.r[0].vector4_f32[1] = pSource->m[0][1]; |
||
2893 | M.r[0].vector4_f32[2] = pSource->m[0][2]; |
||
2894 | M.r[0].vector4_f32[3] = 0.0f; |
||
2895 | |||
2896 | M.r[1].vector4_f32[0] = pSource->m[1][0]; |
||
2897 | M.r[1].vector4_f32[1] = pSource->m[1][1]; |
||
2898 | M.r[1].vector4_f32[2] = pSource->m[1][2]; |
||
2899 | M.r[1].vector4_f32[3] = 0.0f; |
||
2900 | |||
2901 | M.r[2].vector4_f32[0] = pSource->m[2][0]; |
||
2902 | M.r[2].vector4_f32[1] = pSource->m[2][1]; |
||
2903 | M.r[2].vector4_f32[2] = pSource->m[2][2]; |
||
2904 | M.r[2].vector4_f32[3] = 0.0f; |
||
2905 | |||
2906 | M.r[3].vector4_f32[0] = pSource->m[3][0]; |
||
2907 | M.r[3].vector4_f32[1] = pSource->m[3][1]; |
||
2908 | M.r[3].vector4_f32[2] = pSource->m[3][2]; |
||
2909 | M.r[3].vector4_f32[3] = 1.0f; |
||
2910 | |||
2911 | return M; |
||
2912 | |||
2913 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2914 | XMASSERT(pSource); |
||
2915 | // Use aligned load instructions to |
||
2916 | // load the 12 floats |
||
2917 | // vTemp1 = x1,y1,z1,x2 |
||
2918 | XMVECTOR vTemp1 = _mm_load_ps(&pSource->m[0][0]); |
||
2919 | // vTemp2 = y2,z2,x3,y3 |
||
2920 | XMVECTOR vTemp2 = _mm_load_ps(&pSource->m[1][1]); |
||
2921 | // vTemp4 = z3,x4,y4,z4 |
||
2922 | XMVECTOR vTemp4 = _mm_load_ps(&pSource->m[2][2]); |
||
2923 | // vTemp3 = x3,y3,z3,z3 |
||
2924 | XMVECTOR vTemp3 = _mm_shuffle_ps(vTemp2,vTemp4,_MM_SHUFFLE(0,0,3,2)); |
||
2925 | // vTemp2 = y2,z2,x2,x2 |
||
2926 | vTemp2 = _mm_shuffle_ps(vTemp2,vTemp1,_MM_SHUFFLE(3,3,1,0)); |
||
2927 | // vTemp2 = x2,y2,z2,z2 |
||
2928 | vTemp2 = _mm_shuffle_ps(vTemp2,vTemp2,_MM_SHUFFLE(1,1,0,2)); |
||
2929 | // vTemp1 = x1,y1,z1,0 |
||
2930 | vTemp1 = _mm_and_ps(vTemp1,g_XMMask3); |
||
2931 | // vTemp2 = x2,y2,z2,0 |
||
2932 | vTemp2 = _mm_and_ps(vTemp2,g_XMMask3); |
||
2933 | // vTemp3 = x3,y3,z3,0 |
||
2934 | vTemp3 = _mm_and_ps(vTemp3,g_XMMask3); |
||
2935 | // vTemp4i = x4,y4,z4,0 |
||
2936 | __m128i vTemp4i = _mm_srli_si128(reinterpret_cast<const __m128i *>(&vTemp4)[0],32/8); |
||
2937 | // vTemp4i = x4,y4,z4,1.0f |
||
2938 | vTemp4i = _mm_or_si128(vTemp4i,g_XMIdentityR3); |
||
2939 | XMMATRIX M(vTemp1, |
||
2940 | vTemp2, |
||
2941 | vTemp3, |
||
2942 | reinterpret_cast<const __m128 *>(&vTemp4i)[0]); |
||
2943 | return M; |
||
2944 | #else // _XM_VMX128_INTRINSICS_ |
||
2945 | #endif // _XM_VMX128_INTRINSICS_ |
||
2946 | } |
||
2947 | |||
2948 | //------------------------------------------------------------------------------ |
||
2949 | |||
2950 | XMFINLINE XMMATRIX XMLoadFloat4x4 |
||
2951 | ( |
||
2952 | CONST XMFLOAT4X4* pSource |
||
2953 | ) |
||
2954 | { |
||
2955 | #if defined(_XM_NO_INTRINSICS_) |
||
2956 | XMMATRIX M; |
||
2957 | XMASSERT(pSource); |
||
2958 | |||
2959 | ((UINT *)(&M.r[0].vector4_f32[0]))[0] = ((const UINT *)(&pSource->m[0][0]))[0]; |
||
2960 | ((UINT *)(&M.r[0].vector4_f32[1]))[0] = ((const UINT *)(&pSource->m[0][1]))[0]; |
||
2961 | ((UINT *)(&M.r[0].vector4_f32[2]))[0] = ((const UINT *)(&pSource->m[0][2]))[0]; |
||
2962 | ((UINT *)(&M.r[0].vector4_f32[3]))[0] = ((const UINT *)(&pSource->m[0][3]))[0]; |
||
2963 | |||
2964 | ((UINT *)(&M.r[1].vector4_f32[0]))[0] = ((const UINT *)(&pSource->m[1][0]))[0]; |
||
2965 | ((UINT *)(&M.r[1].vector4_f32[1]))[0] = ((const UINT *)(&pSource->m[1][1]))[0]; |
||
2966 | ((UINT *)(&M.r[1].vector4_f32[2]))[0] = ((const UINT *)(&pSource->m[1][2]))[0]; |
||
2967 | ((UINT *)(&M.r[1].vector4_f32[3]))[0] = ((const UINT *)(&pSource->m[1][3]))[0]; |
||
2968 | |||
2969 | ((UINT *)(&M.r[2].vector4_f32[0]))[0] = ((const UINT *)(&pSource->m[2][0]))[0]; |
||
2970 | ((UINT *)(&M.r[2].vector4_f32[1]))[0] = ((const UINT *)(&pSource->m[2][1]))[0]; |
||
2971 | ((UINT *)(&M.r[2].vector4_f32[2]))[0] = ((const UINT *)(&pSource->m[2][2]))[0]; |
||
2972 | ((UINT *)(&M.r[2].vector4_f32[3]))[0] = ((const UINT *)(&pSource->m[2][3]))[0]; |
||
2973 | |||
2974 | ((UINT *)(&M.r[3].vector4_f32[0]))[0] = ((const UINT *)(&pSource->m[3][0]))[0]; |
||
2975 | ((UINT *)(&M.r[3].vector4_f32[1]))[0] = ((const UINT *)(&pSource->m[3][1]))[0]; |
||
2976 | ((UINT *)(&M.r[3].vector4_f32[2]))[0] = ((const UINT *)(&pSource->m[3][2]))[0]; |
||
2977 | ((UINT *)(&M.r[3].vector4_f32[3]))[0] = ((const UINT *)(&pSource->m[3][3]))[0]; |
||
2978 | |||
2979 | return M; |
||
2980 | |||
2981 | #elif defined(_XM_SSE_INTRINSICS_) |
||
2982 | XMASSERT(pSource); |
||
2983 | XMMATRIX M; |
||
2984 | |||
2985 | M.r[0] = _mm_loadu_ps( &pSource->_11 ); |
||
2986 | M.r[1] = _mm_loadu_ps( &pSource->_21 ); |
||
2987 | M.r[2] = _mm_loadu_ps( &pSource->_31 ); |
||
2988 | M.r[3] = _mm_loadu_ps( &pSource->_41 ); |
||
2989 | |||
2990 | return M; |
||
2991 | #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
2992 | #endif // _XM_VMX128_INTRINSICS_ |
||
2993 | } |
||
2994 | |||
2995 | //------------------------------------------------------------------------------ |
||
2996 | |||
2997 | XMFINLINE XMMATRIX XMLoadFloat4x4A |
||
2998 | ( |
||
2999 | CONST XMFLOAT4X4A* pSource |
||
3000 | ) |
||
3001 | { |
||
3002 | #if defined(_XM_NO_INTRINSICS_) |
||
3003 | |||
3004 | XMMATRIX M; |
||
3005 | |||
3006 | XMASSERT(pSource); |
||
3007 | XMASSERT(((UINT_PTR)pSource & 0xF) == 0); |
||
3008 | |||
3009 | M.r[0].vector4_f32[0] = pSource->m[0][0]; |
||
3010 | M.r[0].vector4_f32[1] = pSource->m[0][1]; |
||
3011 | M.r[0].vector4_f32[2] = pSource->m[0][2]; |
||
3012 | M.r[0].vector4_f32[3] = pSource->m[0][3]; |
||
3013 | |||
3014 | M.r[1].vector4_f32[0] = pSource->m[1][0]; |
||
3015 | M.r[1].vector4_f32[1] = pSource->m[1][1]; |
||
3016 | M.r[1].vector4_f32[2] = pSource->m[1][2]; |
||
3017 | M.r[1].vector4_f32[3] = pSource->m[1][3]; |
||
3018 | |||
3019 | M.r[2].vector4_f32[0] = pSource->m[2][0]; |
||
3020 | M.r[2].vector4_f32[1] = pSource->m[2][1]; |
||
3021 | M.r[2].vector4_f32[2] = pSource->m[2][2]; |
||
3022 | M.r[2].vector4_f32[3] = pSource->m[2][3]; |
||
3023 | |||
3024 | M.r[3].vector4_f32[0] = pSource->m[3][0]; |
||
3025 | M.r[3].vector4_f32[1] = pSource->m[3][1]; |
||
3026 | M.r[3].vector4_f32[2] = pSource->m[3][2]; |
||
3027 | M.r[3].vector4_f32[3] = pSource->m[3][3]; |
||
3028 | |||
3029 | return M; |
||
3030 | |||
3031 | #elif defined(_XM_SSE_INTRINSICS_) |
||
3032 | XMMATRIX M; |
||
3033 | |||
3034 | XMASSERT(pSource); |
||
3035 | |||
3036 | M.r[0] = _mm_load_ps( &pSource->_11 ); |
||
3037 | M.r[1] = _mm_load_ps( &pSource->_21 ); |
||
3038 | M.r[2] = _mm_load_ps( &pSource->_31 ); |
||
3039 | M.r[3] = _mm_load_ps( &pSource->_41 ); |
||
3040 | |||
3041 | return M; |
||
3042 | #else // _XM_VMX128_INTRINSICS_ |
||
3043 | #endif // _XM_VMX128_INTRINSICS_ |
||
3044 | } |
||
3045 | |||
3046 | /**************************************************************************** |
||
3047 | * |
||
3048 | * Vector and matrix store operations |
||
3049 | * |
||
3050 | ****************************************************************************/ |
||
3051 | |||
3052 | XMFINLINE VOID XMStoreInt |
||
3053 | ( |
||
3054 | UINT* pDestination, |
||
3055 | FXMVECTOR V |
||
3056 | ) |
||
3057 | { |
||
3058 | #if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) |
||
3059 | |||
3060 | XMASSERT(pDestination); |
||
3061 | XMASSERT(((UINT_PTR)pDestination & 3) == 0); |
||
3062 | |||
3063 | *pDestination = XMVectorGetIntX( V ); |
||
3064 | |||
3065 | #else // _XM_VMX128_INTRINSICS_ |
||
3066 | #endif // _XM_VMX128_INTRINSICS_ |
||
3067 | } |
||
3068 | |||
3069 | //------------------------------------------------------------------------------ |
||
3070 | |||
3071 | XMFINLINE VOID XMStoreFloat |
||
3072 | ( |
||
3073 | FLOAT* pDestination, |
||
3074 | FXMVECTOR V |
||
3075 | ) |
||
3076 | { |
||
3077 | #if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) |
||
3078 | |||
3079 | XMASSERT(pDestination); |
||
3080 | XMASSERT(((UINT_PTR)pDestination & 3) == 0); |
||
3081 | |||
3082 | *pDestination = XMVectorGetX( V ); |
||
3083 | |||
3084 | #else // _XM_VMX128_INTRINSICS_ |
||
3085 | #endif // _XM_VMX128_INTRINSICS_ |
||
3086 | } |
||
3087 | |||
3088 | //------------------------------------------------------------------------------ |
||
3089 | |||
3090 | XMFINLINE VOID XMStoreInt2 |
||
3091 | ( |
||
3092 | UINT* pDestination, |
||
3093 | FXMVECTOR V |
||
3094 | ) |
||
3095 | { |
||
3096 | #if defined(_XM_NO_INTRINSICS_) |
||
3097 | |||
3098 | XMASSERT(pDestination); |
||
3099 | XMASSERT(((UINT_PTR)pDestination & 3) == 0); |
||
3100 | |||
3101 | pDestination[0] = V.vector4_u32[0]; |
||
3102 | pDestination[1] = V.vector4_u32[1]; |
||
3103 | |||
3104 | #elif defined(_XM_SSE_INTRINSICS_) |
||
3105 | |||
3106 | XMASSERT(pDestination); |
||
3107 | XMASSERT(((UINT_PTR)pDestination & 3) == 0); |
||
3108 | pDestination[0] = XMVectorGetIntX( V ); |
||
3109 | pDestination[1] = XMVectorGetIntY( V ); |
||
3110 | |||
3111 | #else // _XM_VMX128_INTRINSICS_ |
||
3112 | #endif // _XM_VMX128_INTRINSICS_ |
||
3113 | } |
||
3114 | |||
3115 | //------------------------------------------------------------------------------ |
||
3116 | |||
3117 | XMFINLINE VOID XMStoreInt2A |
||
3118 | ( |
||
3119 | UINT* pDestination, |
||
3120 | FXMVECTOR V |
||
3121 | ) |
||
3122 | { |
||
3123 | #if defined(_XM_NO_INTRINSICS_) |
||
3124 | |||
3125 | XMASSERT(pDestination); |
||
3126 | XMASSERT(((UINT_PTR)pDestination & 0xF) == 0); |
||
3127 | |||
3128 | pDestination[0] = V.vector4_u32[0]; |
||
3129 | pDestination[1] = V.vector4_u32[1]; |
||
3130 | |||
3131 | #elif defined(_XM_SSE_INTRINSICS_) |
||
3132 | |||
3133 | XMASSERT(pDestination); |
||
3134 | XMASSERT(((UINT_PTR)pDestination & 3) == 0); |
||
3135 | |||
3136 | _mm_storel_epi64( (__m128i*)pDestination, reinterpret_cast<const __m128i *>(&V)[0] ); |
||
3137 | |||
3138 | #else // _XM_VMX128_INTRINSICS_ |
||
3139 | #endif // _XM_VMX128_INTRINSICS_ |
||
3140 | } |
||
3141 | |||
3142 | //------------------------------------------------------------------------------ |
||
3143 | |||
3144 | XMFINLINE VOID XMStoreFloat2 |
||
3145 | ( |
||
3146 | XMFLOAT2* pDestination, |
||
3147 | FXMVECTOR V |
||
3148 | ) |
||
3149 | { |
||
3150 | #if defined(_XM_NO_INTRINSICS_) |
||
3151 | |||
3152 | XMASSERT(pDestination); |
||
3153 | XMASSERT(((UINT_PTR)pDestination & 3) == 0); |
||
3154 | |||
3155 | pDestination->x = V.vector4_f32[0]; |
||
3156 | pDestination->y = V.vector4_f32[1]; |
||
3157 | |||
3158 | #elif defined(_XM_SSE_INTRINSICS_) |
||
3159 | |||
3160 | XMASSERT(pDestination); |
||
3161 | XMASSERT(((UINT_PTR)pDestination & 3) == 0); |
||
3162 | |||
3163 | XMVECTOR T = _mm_shuffle_ps( V, V, _MM_SHUFFLE( 1, 1, 1, 1 ) ); |
||
3164 | _mm_store_ss( &pDestination->x, V ); |
||
3165 | _mm_store_ss( &pDestination->y, T ); |
||
3166 | |||
3167 | #else // _XM_VMX128_INTRINSICS_ |
||
3168 | #endif // _XM_VMX128_INTRINSICS_ |
||
3169 | } |
||
3170 | |||
3171 | //------------------------------------------------------------------------------ |
||
3172 | |||
3173 | XMFINLINE VOID XMStoreFloat2A |
||
3174 | ( |
||
3175 | XMFLOAT2A* pDestination, |
||
3176 | FXMVECTOR V |
||
3177 | ) |
||
3178 | { |
||
3179 | #if defined(_XM_NO_INTRINSICS_) |
||
3180 | |||
3181 | XMASSERT(pDestination); |
||
3182 | XMASSERT(((UINT_PTR)pDestination & 0xF) == 0); |
||
3183 | |||
3184 | pDestination->x = V.vector4_f32[0]; |
||
3185 | pDestination->y = V.vector4_f32[1]; |
||
3186 | |||
3187 | #elif defined(_XM_SSE_INTRINSICS_) |
||
3188 | |||
3189 | XMASSERT(pDestination); |
||
3190 | XMASSERT(((UINT_PTR)pDestination & 3) == 0); |
||
3191 | |||
3192 | XMVECTOR T = _mm_shuffle_ps( V, V, _MM_SHUFFLE( 1, 1, 1, 1 ) ); |
||
3193 | _mm_store_ss( &pDestination->x, V ); |
||
3194 | _mm_store_ss( &pDestination->y, T ); |
||
3195 | |||
3196 | #else // _XM_VMX128_INTRINSICS_ |
||
3197 | #endif // _XM_VMX128_INTRINSICS_ |
||
3198 | } |
||
3199 | |||
3200 | //------------------------------------------------------------------------------ |
||
3201 | |||
3202 | XMFINLINE VOID XMStoreHalf2 |
||
3203 | ( |
||
3204 | XMHALF2* pDestination, |
||
3205 | FXMVECTOR V |
||
3206 | ) |
||
3207 | { |
||
3208 | #if defined(_XM_NO_INTRINSICS_) |
||
3209 | |||
3210 | XMASSERT(pDestination); |
||
3211 | |||
3212 | pDestination->x = XMConvertFloatToHalf(V.vector4_f32[0]); |
||
3213 | pDestination->y = XMConvertFloatToHalf(V.vector4_f32[1]); |
||
3214 | |||
3215 | #elif defined(_XM_SSE_INTRINSICS_) |
||
3216 | XMASSERT(pDestination); |
||
3217 | pDestination->x = XMConvertFloatToHalf(XMVectorGetX(V)); |
||
3218 | pDestination->y = XMConvertFloatToHalf(XMVectorGetY(V)); |
||
3219 | #else // _XM_VMX128_INTRINSICS_ |
||
3220 | #endif // _XM_VMX128_INTRINSICS_ |
||
3221 | } |
||
3222 | |||
3223 | //------------------------------------------------------------------------------ |
||
3224 | |||
3225 | XMFINLINE VOID XMStoreShortN2 |
||
3226 | ( |
||
3227 | XMSHORTN2* pDestination, |
||
3228 | FXMVECTOR V |
||
3229 | ) |
||
3230 | { |
||
3231 | #if defined(_XM_NO_INTRINSICS_) |
||
3232 | |||
3233 | XMVECTOR N; |
||
3234 | static CONST XMVECTORF32 Scale = {32767.0f, 32767.0f, 32767.0f, 32767.0f}; |
||
3235 | |||
3236 | XMASSERT(pDestination); |
||
3237 | |||
3238 | N = XMVectorClamp(V, g_XMNegativeOne.v, g_XMOne.v); |
||
3239 | N = XMVectorMultiply(N, Scale.v); |
||
3240 | N = XMVectorRound(N); |
||
3241 | |||
3242 | pDestination->x = (SHORT)N.vector4_f32[0]; |
||
3243 | pDestination->y = (SHORT)N.vector4_f32[1]; |
||
3244 | |||
3245 | #elif defined(_XM_SSE_INTRINSICS_) |
||
3246 | XMASSERT(pDestination); |
||
3247 | static CONST XMVECTORF32 Scale = {32767.0f, 32767.0f, 32767.0f, 32767.0f}; |
||
3248 | |||
3249 | XMVECTOR vResult = _mm_max_ps(V,g_XMNegativeOne); |
||
3250 | vResult = _mm_min_ps(vResult,g_XMOne); |
||
3251 | vResult = _mm_mul_ps(vResult,Scale); |
||
3252 | __m128i vResulti = _mm_cvtps_epi32(vResult); |
||
3253 | vResulti = _mm_packs_epi32(vResulti,vResulti); |
||
3254 | _mm_store_ss(reinterpret_cast<float *>(&pDestination->x),reinterpret_cast<const __m128 *>(&vResulti)[0]); |
||
3255 | #else // _XM_VMX128_INTRINSICS_ |
||
3256 | #endif // _XM_VMX128_INTRINSICS_ |
||
3257 | } |
||
3258 | |||
3259 | //------------------------------------------------------------------------------ |
||
3260 | |||
3261 | XMFINLINE VOID XMStoreShort2 |
||
3262 | ( |
||
3263 | XMSHORT2* pDestination, |
||
3264 | FXMVECTOR V |
||
3265 | ) |
||
3266 | { |
||
3267 | #if defined(_XM_NO_INTRINSICS_) |
||
3268 | |||
3269 | XMVECTOR N; |
||
3270 | static CONST XMVECTOR Min = {-32767.0f, -32767.0f, -32767.0f, -32767.0f}; |
||
3271 | static CONST XMVECTOR Max = {32767.0f, 32767.0f, 32767.0f, 32767.0f}; |
||
3272 | |||
3273 | XMASSERT(pDestination); |
||
3274 | |||
3275 | N = XMVectorClamp(V, Min, Max); |
||
3276 | N = XMVectorRound(N); |
||
3277 | |||
3278 | pDestination->x = (SHORT)N.vector4_f32[0]; |
||
3279 | pDestination->y = (SHORT)N.vector4_f32[1]; |
||
3280 | |||
3281 | #elif defined(_XM_SSE_INTRINSICS_) |
||
3282 | XMASSERT(pDestination); |
||
3283 | static CONST XMVECTORF32 Min = {-32767.0f, -32767.0f, -32767.0f, -32767.0f}; |
||
3284 | static CONST XMVECTORF32 Max = {32767.0f, 32767.0f, 32767.0f, 32767.0f}; |
||
3285 | // Bounds check |
||
3286 | XMVECTOR vResult = _mm_max_ps(V,Min); |
||
3287 | vResult = _mm_min_ps(vResult,Max); |
||
3288 | // Convert to int with rounding |
||
3289 | __m128i vInt = _mm_cvtps_epi32(vResult); |
||
3290 | // Pack the ints into shorts |
||
3291 | vInt = _mm_packs_epi32(vInt,vInt); |
||
3292 | _mm_store_ss(reinterpret_cast<float *>(&pDestination->x),reinterpret_cast<const __m128 *>(&vInt)[0]); |
||
3293 | #else // _XM_VMX128_INTRINSICS_ |
||
3294 | #endif // _XM_VMX128_INTRINSICS_ |
||
3295 | } |
||
3296 | |||
3297 | //------------------------------------------------------------------------------ |
||
3298 | |||
3299 | XMFINLINE VOID XMStoreUShortN2 |
||
3300 | ( |
||
3301 | XMUSHORTN2* pDestination, |
||
3302 | FXMVECTOR V |
||
3303 | ) |
||
3304 | { |
||
3305 | #if defined(_XM_NO_INTRINSICS_) |
||
3306 | |||
3307 | XMVECTOR N; |
||
3308 | static CONST XMVECTORF32 Scale = {65535.0f, 65535.0f, 65535.0f, 65535.0f}; |
||
3309 | |||
3310 | XMASSERT(pDestination); |
||
3311 | |||
3312 | N = XMVectorClamp(V, XMVectorZero(), g_XMOne.v); |
||
3313 | N = XMVectorMultiplyAdd(N, Scale.v, g_XMOneHalf.v); |
||
3314 | N = XMVectorTruncate(N); |
||
3315 | |||
3316 | pDestination->x = (SHORT)N.vector4_f32[0]; |
||
3317 | pDestination->y = (SHORT)N.vector4_f32[1]; |
||
3318 | |||
3319 | #elif defined(_XM_SSE_INTRINSICS_) |
||
3320 | XMASSERT(pDestination); |
||
3321 | static CONST XMVECTORF32 Scale = {65535.0f, 65535.0f, 65535.0f, 65535.0f}; |
||
3322 | // Bounds check |
||
3323 | XMVECTOR vResult = _mm_max_ps(V,g_XMZero); |
||
3324 | vResult = _mm_min_ps(vResult,g_XMOne); |
||
3325 | vResult = _mm_mul_ps(vResult,Scale); |
||
3326 | // Convert to int with rounding |
||
3327 | __m128i vInt = _mm_cvtps_epi32(vResult); |
||
3328 | // Since the SSE pack instruction clamps using signed rules, |
||
3329 | // manually extract the values to store them to memory |
||
3330 | pDestination->x = static_cast<SHORT>(_mm_extract_epi16(vInt,0)); |
||
3331 | pDestination->y = static_cast<SHORT>(_mm_extract_epi16(vInt,2)); |
||
3332 | #else // _XM_VMX128_INTRINSICS_ |
||
3333 | #endif // _XM_VMX128_INTRINSICS_ |
||
3334 | } |
||
3335 | |||
3336 | //------------------------------------------------------------------------------ |
||
3337 | |||
3338 | XMFINLINE VOID XMStoreUShort2 |
||
3339 | ( |
||
3340 | XMUSHORT2* pDestination, |
||
3341 | FXMVECTOR V |
||
3342 | ) |
||
3343 | { |
||
3344 | #if defined(_XM_NO_INTRINSICS_) |
||
3345 | |||
3346 | XMVECTOR N; |
||
3347 | static CONST XMVECTOR Max = {65535.0f, 65535.0f, 65535.0f, 65535.0f}; |
||
3348 | |||
3349 | XMASSERT(pDestination); |
||
3350 | |||
3351 | N = XMVectorClamp(V, XMVectorZero(), Max); |
||
3352 | N = XMVectorRound(N); |
||
3353 | |||
3354 | pDestination->x = (SHORT)N.vector4_f32[0]; |
||
3355 | pDestination->y = (SHORT)N.vector4_f32[1]; |
||
3356 | |||
3357 | #elif defined(_XM_SSE_INTRINSICS_) |
||
3358 | XMASSERT(pDestination); |
||
3359 | static CONST XMVECTORF32 Max = {65535.0f, 65535.0f, 65535.0f, 65535.0f}; |
||
3360 | // Bounds check |
||
3361 | XMVECTOR vResult = _mm_max_ps(V,g_XMZero); |
||
3362 | vResult = _mm_min_ps(vResult,Max); |
||
3363 | // Convert to int with rounding |
||
3364 | __m128i vInt = _mm_cvtps_epi32(vResult); |
||
3365 | // Since the SSE pack instruction clamps using signed rules, |
||
3366 | // manually extract the values to store them to memory |
||
3367 | pDestination->x = static_cast<SHORT>(_mm_extract_epi16(vInt,0)); |
||
3368 | pDestination->y = static_cast<SHORT>(_mm_extract_epi16(vInt,2)); |
||
3369 | #else // _XM_VMX128_INTRINSICS_ |
||
3370 | #endif // _XM_VMX128_INTRINSICS_ |
||
3371 | } |
||
3372 | |||
3373 | //------------------------------------------------------------------------------ |
||
3374 | |||
3375 | XMFINLINE VOID XMStoreInt3 |
||
3376 | ( |
||
3377 | UINT* pDestination, |
||
3378 | FXMVECTOR V |
||
3379 | ) |
||
3380 | { |
||
3381 | #if defined(_XM_NO_INTRINSICS_) |
||
3382 | |||
3383 | XMASSERT(pDestination); |
||
3384 | XMASSERT(((UINT_PTR)pDestination & 3) == 0); |
||
3385 | |||
3386 | pDestination[0] = V.vector4_u32[0]; |
||
3387 | pDestination[1] = V.vector4_u32[1]; |
||
3388 | pDestination[2] = V.vector4_u32[2]; |
||
3389 | |||
3390 | #elif defined(_XM_SSE_INTRINSICS_) |
||
3391 | |||
3392 | XMASSERT(pDestination); |
||
3393 | XMASSERT(((UINT_PTR)pDestination & 3) == 0); |
||
3394 | pDestination[0] = XMVectorGetIntX( V ); |
||
3395 | pDestination[1] = XMVectorGetIntY( V ); |
||
3396 | pDestination[2] = XMVectorGetIntZ( V ); |
||
3397 | |||
3398 | #else // _XM_VMX128_INTRINSICS_ |
||
3399 | #endif // _XM_VMX128_INTRINSICS_ |
||
3400 | } |
||
3401 | |||
3402 | //------------------------------------------------------------------------------ |
||
3403 | |||
3404 | XMFINLINE VOID XMStoreInt3A |
||
3405 | ( |
||
3406 | UINT* pDestination, |
||
3407 | FXMVECTOR V |
||
3408 | ) |
||
3409 | { |
||
3410 | #if defined(_XM_NO_INTRINSICS_) |
||
3411 | |||
3412 | XMASSERT(pDestination); |
||
3413 | XMASSERT(((UINT_PTR)pDestination & 0xF) == 0); |
||
3414 | |||
3415 | pDestination[0] = V.vector4_u32[0]; |
||
3416 | pDestination[1] = V.vector4_u32[1]; |
||
3417 | pDestination[2] = V.vector4_u32[2]; |
||
3418 | |||
3419 | #elif defined(_XM_SSE_INTRINSICS_) |
||
3420 | |||
3421 | XMASSERT(pDestination); |
||
3422 | XMASSERT(((UINT_PTR)pDestination & 3) == 0); |
||
3423 | pDestination[0] = XMVectorGetIntX( V ); |
||
3424 | pDestination[1] = XMVectorGetIntY( V ); |
||
3425 | pDestination[2] = XMVectorGetIntZ( V ); |
||
3426 | |||
3427 | #else // _XM_VMX128_INTRINSICS_ |
||
3428 | #endif // _XM_VMX128_INTRINSICS_ |
||
3429 | } |
||
3430 | |||
3431 | //------------------------------------------------------------------------------ |
||
3432 | |||
3433 | XMFINLINE VOID XMStoreFloat3 |
||
3434 | ( |
||
3435 | XMFLOAT3* pDestination, |
||
3436 | FXMVECTOR V |
||
3437 | ) |
||
3438 | { |
||
3439 | #if defined(_XM_NO_INTRINSICS_) |
||
3440 | |||
3441 | XMASSERT(pDestination); |
||
3442 | XMASSERT(((UINT_PTR)pDestination & 3) == 0); |
||
3443 | |||
3444 | pDestination->x = V.vector4_f32[0]; |
||
3445 | pDestination->y = V.vector4_f32[1]; |
||
3446 | pDestination->z = V.vector4_f32[2]; |
||
3447 | |||
3448 | #elif defined(_XM_SSE_INTRINSICS_) |
||
3449 | |||
3450 | XMASSERT(pDestination); |
||
3451 | XMASSERT(((UINT_PTR)pDestination & 3) == 0); |
||
3452 | |||
3453 | XMVECTOR T1 = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1)); |
||
3454 | XMVECTOR T2 = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2)); |
||
3455 | _mm_store_ss( &pDestination->x, V ); |
||
3456 | _mm_store_ss( &pDestination->y, T1 ); |
||
3457 | _mm_store_ss( &pDestination->z, T2 ); |
||
3458 | |||
3459 | #else // _XM_VMX128_INTRINSICS_ |
||
3460 | #endif // _XM_VMX128_INTRINSICS_ |
||
3461 | } |
||
3462 | |||
3463 | //------------------------------------------------------------------------------ |
||
3464 | |||
3465 | XMFINLINE VOID XMStoreFloat3A |
||
3466 | ( |
||
3467 | XMFLOAT3A* pDestination, |
||
3468 | FXMVECTOR V |
||
3469 | ) |
||
3470 | { |
||
3471 | #if defined(_XM_NO_INTRINSICS_) |
||
3472 | |||
3473 | XMASSERT(pDestination); |
||
3474 | XMASSERT(((UINT_PTR)pDestination & 0xF) == 0); |
||
3475 | |||
3476 | pDestination->x = V.vector4_f32[0]; |
||
3477 | pDestination->y = V.vector4_f32[1]; |
||
3478 | pDestination->z = V.vector4_f32[2]; |
||
3479 | |||
3480 | #elif defined(_XM_SSE_INTRINSICS_) |
||
3481 | |||
3482 | XMASSERT(pDestination); |
||
3483 | XMASSERT(((UINT_PTR)pDestination & 0xF) == 0); |
||
3484 | |||
3485 | XMVECTOR T1 = _mm_shuffle_ps( V, V, _MM_SHUFFLE( 1, 1, 1, 1 ) ); |
||
3486 | XMVECTOR T2 = _mm_unpackhi_ps( V, V ); |
||
3487 | _mm_store_ss( &pDestination->x, V ); |
||
3488 | _mm_store_ss( &pDestination->y, T1 ); |
||
3489 | _mm_store_ss( &pDestination->z, T2 ); |
||
3490 | |||
3491 | #else // _XM_VMX128_INTRINSICS_ |
||
3492 | #endif // _XM_VMX128_INTRINSICS_ |
||
3493 | } |
||
3494 | |||
3495 | //------------------------------------------------------------------------------ |
||
3496 | |||
3497 | XMFINLINE VOID XMStoreUHenDN3 |
||
3498 | ( |
||
3499 | XMUHENDN3* pDestination, |
||
3500 | FXMVECTOR V |
||
3501 | ) |
||
3502 | { |
||
3503 | #if defined(_XM_NO_INTRINSICS_) |
||
3504 | |||
3505 | XMVECTOR N; |
||
3506 | static CONST XMVECTORF32 Scale = {2047.0f, 2047.0f, 1023.0f, 0.0f}; |
||
3507 | |||
3508 | XMASSERT(pDestination); |
||
3509 | |||
3510 | N = XMVectorClamp(V, XMVectorZero(), g_XMOne.v); |
||
3511 | N = XMVectorMultiply(N, Scale.v); |
||
3512 | |||
3513 | pDestination->v = (((UINT)N.vector4_f32[2] & 0x3FF) << 22) | |
||
3514 | (((UINT)N.vector4_f32[1] & 0x7FF) << 11) | |
||
3515 | (((UINT)N.vector4_f32[0] & 0x7FF)); |
||
3516 | |||
3517 | #elif defined(_XM_SSE_INTRINSICS_) |
||
3518 | XMASSERT(pDestination); |
||
3519 | static const XMVECTORF32 ScaleUHenDN3 = {2047.0f, 2047.0f*2048.0f,1023.0f*(2048.0f*2048.0f)/2.0f,1.0f}; |
||
3520 | static const XMVECTORI32 MaskUHenDN3 = {0x7FF,0x7FF<<11,0x3FF<<(22-1),0}; |
||
3521 | // Clamp to bounds |
||
3522 | XMVECTOR vResult = _mm_max_ps(V,g_XMZero); |
||
3523 | vResult = _mm_min_ps(vResult,g_XMOne); |
||
3524 | // Scale by multiplication |
||
3525 | vResult = _mm_mul_ps(vResult,ScaleUHenDN3); |
||
3526 | // Convert to int |
||
3527 | __m128i vResulti = _mm_cvttps_epi32(vResult); |
||
3528 | // Mask off any fraction |
||
3529 | vResulti = _mm_and_si128(vResulti,MaskUHenDN3); |
||
3530 | // Do a horizontal or of 3 entries |
||
3531 | __m128i vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(0,3,2,1)); |
||
3532 | // i = x|y |
||
3533 | vResulti = _mm_or_si128(vResulti,vResulti2); |
||
3534 | // Move Z to the x position |
||
3535 | vResulti2 = _mm_shuffle_epi32(vResulti2,_MM_SHUFFLE(0,3,2,1)); |
||
3536 | // Add Z to itself to perform a single bit left shift |
||
3537 | vResulti2 = _mm_add_epi32(vResulti2,vResulti2); |
||
3538 | // i = x|y|z |
||
3539 | vResulti = _mm_or_si128(vResulti,vResulti2); |
||
3540 | _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),reinterpret_cast<const __m128 *>(&vResulti)[0]); |
||
3541 | #else // _XM_VMX128_INTRINSICS_ |
||
3542 | #endif // _XM_VMX128_INTRINSICS_ |
||
3543 | } |
||
3544 | |||
3545 | //------------------------------------------------------------------------------ |
||
3546 | |||
3547 | XMFINLINE VOID XMStoreUHenD3 |
||
3548 | ( |
||
3549 | XMUHEND3* pDestination, |
||
3550 | FXMVECTOR V |
||
3551 | ) |
||
3552 | { |
||
3553 | #if defined(_XM_NO_INTRINSICS_) |
||
3554 | |||
3555 | XMVECTOR N; |
||
3556 | static CONST XMVECTOR Max = {2047.0f, 2047.0f, 1023.0f, 0.0f}; |
||
3557 | |||
3558 | XMASSERT(pDestination); |
||
3559 | |||
3560 | N = XMVectorClamp(V, XMVectorZero(), Max); |
||
3561 | |||
3562 | pDestination->v = (((UINT)N.vector4_f32[2] & 0x3FF) << 22) | |
||
3563 | (((UINT)N.vector4_f32[1] & 0x7FF) << 11) | |
||
3564 | (((UINT)N.vector4_f32[0] & 0x7FF)); |
||
3565 | |||
3566 | #elif defined(_XM_SSE_INTRINSICS_) |
||
3567 | XMASSERT(pDestination); |
||
3568 | static const XMVECTORF32 MaxUHenD3 = { 2047.0f, 2047.0f, 1023.0f, 1.0f}; |
||
3569 | static const XMVECTORF32 ScaleUHenD3 = {1.0f, 2048.0f,(2048.0f*2048.0f)/2.0f,1.0f}; |
||
3570 | static const XMVECTORI32 MaskUHenD3 = {0x7FF,0x7FF<<11,0x3FF<<(22-1),0}; |
||
3571 | // Clamp to bounds |
||
3572 | XMVECTOR vResult = _mm_max_ps(V,g_XMZero); |
||
3573 | vResult = _mm_min_ps(vResult,MaxUHenD3); |
||
3574 | // Scale by multiplication |
||
3575 | vResult = _mm_mul_ps(vResult,ScaleUHenD3); |
||
3576 | // Convert to int |
||
3577 | __m128i vResulti = _mm_cvttps_epi32(vResult); |
||
3578 | // Mask off any fraction |
||
3579 | vResulti = _mm_and_si128(vResulti,MaskUHenD3); |
||
3580 | // Do a horizontal or of 3 entries |
||
3581 | __m128i vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(0,3,2,1)); |
||
3582 | // i = x|y |
||
3583 | vResulti = _mm_or_si128(vResulti,vResulti2); |
||
3584 | // Move Z to the x position |
||
3585 | vResulti2 = _mm_shuffle_epi32(vResulti2,_MM_SHUFFLE(0,3,2,1)); |
||
3586 | // Add Z to itself to perform a single bit left shift |
||
3587 | vResulti2 = _mm_add_epi32(vResulti2,vResulti2); |
||
3588 | // i = x|y|z |
||
3589 | vResulti = _mm_or_si128(vResulti,vResulti2); |
||
3590 | _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),reinterpret_cast<const __m128 *>(&vResulti)[0]); |
||
3591 | #else // _XM_VMX128_INTRINSICS_ |
||
3592 | #endif // _XM_VMX128_INTRINSICS_ |
||
3593 | } |
||
3594 | |||
3595 | //------------------------------------------------------------------------------ |
||
3596 | |||
3597 | XMFINLINE VOID XMStoreHenDN3 |
||
3598 | ( |
||
3599 | XMHENDN3* pDestination, |
||
3600 | FXMVECTOR V |
||
3601 | ) |
||
3602 | { |
||
3603 | #if defined(_XM_NO_INTRINSICS_) |
||
3604 | |||
3605 | XMVECTOR N; |
||
3606 | static CONST XMVECTORF32 Scale = {1023.0f, 1023.0f, 511.0f, 1.0f}; |
||
3607 | |||
3608 | XMASSERT(pDestination); |
||
3609 | |||
3610 | N = XMVectorClamp(V, g_XMNegativeOne.v, g_XMOne.v); |
||
3611 | N = XMVectorMultiply(N, Scale.v); |
||
3612 | |||
3613 | pDestination->v = (((INT)N.vector4_f32[2] & 0x3FF) << 22) | |
||
3614 | (((INT)N.vector4_f32[1] & 0x7FF) << 11) | |
||
3615 | (((INT)N.vector4_f32[0] & 0x7FF)); |
||
3616 | |||
3617 | #elif defined(_XM_SSE_INTRINSICS_) |
||
3618 | XMASSERT(pDestination); |
||
3619 | static const XMVECTORF32 ScaleHenDN3 = {1023.0f, 1023.0f*2048.0f,511.0f*(2048.0f*2048.0f),1.0f}; |
||
3620 | // Clamp to bounds |
||
3621 | XMVECTOR vResult = _mm_max_ps(V,g_XMNegativeOne); |
||
3622 | vResult = _mm_min_ps(vResult,g_XMOne); |
||
3623 | // Scale by multiplication |
||
3624 | vResult = _mm_mul_ps(vResult,ScaleHenDN3); |
||
3625 | // Convert to int |
||
3626 | __m128i vResulti = _mm_cvttps_epi32(vResult); |
||
3627 | // Mask off any fraction |
||
3628 | vResulti = _mm_and_si128(vResulti,g_XMMaskHenD3); |
||
3629 | // Do a horizontal or of all 4 entries |
||
3630 | vResult = _mm_shuffle_ps(reinterpret_cast<const __m128 *>(&vResulti)[0],reinterpret_cast<const __m128 *>(&vResulti)[0],_MM_SHUFFLE(0,3,2,1)); |
||
3631 | vResulti = _mm_or_si128(vResulti,reinterpret_cast<const __m128i *>(&vResult)[0]); |
||
3632 | vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,3,2,1)); |
||
3633 | vResulti = _mm_or_si128(vResulti,reinterpret_cast<const __m128i *>(&vResult)[0]); |
||
3634 | _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),reinterpret_cast<const __m128 *>(&vResulti)[0]); |
||
3635 | #else // _XM_VMX128_INTRINSICS_ |
||
3636 | #endif // _XM_VMX128_INTRINSICS_ |
||
3637 | } |
||
3638 | |||
3639 | //------------------------------------------------------------------------------ |
||
3640 | |||
3641 | XMFINLINE VOID XMStoreHenD3 |
||
3642 | ( |
||
3643 | XMHEND3* pDestination, |
||
3644 | FXMVECTOR V |
||
3645 | ) |
||
3646 | { |
||
3647 | #if defined(_XM_NO_INTRINSICS_) |
||
3648 | |||
3649 | XMVECTOR N; |
||
3650 | static CONST XMVECTOR Min = {-1023.0f, -1023.0f, -511.0f, -1.0f}; |
||
3651 | static CONST XMVECTOR Max = {1023.0f, 1023.0f, 511.0f, 1.0f}; |
||
3652 | |||
3653 | XMASSERT(pDestination); |
||
3654 | |||
3655 | N = XMVectorClamp(V, Min, Max); |
||
3656 | |||
3657 | pDestination->v = (((INT)N.vector4_f32[2] & 0x3FF) << 22) | |
||
3658 | (((INT)N.vector4_f32[1] & 0x7FF) << 11) | |
||
3659 | (((INT)N.vector4_f32[0] & 0x7FF)); |
||
3660 | |||
3661 | #elif defined(_XM_SSE_INTRINSICS_) |
||
3662 | XMASSERT(pDestination); |
||
3663 | static const XMVECTORF32 MinHenD3 = {-1023.0f,-1023.0f,-511.0f,-1.0f}; |
||
3664 | static const XMVECTORF32 MaxHenD3 = { 1023.0f, 1023.0f, 511.0f, 1.0f}; |
||
3665 | static const XMVECTORF32 ScaleHenD3 = {1.0f, 2048.0f,(2048.0f*2048.0f),1.0f}; |
||
3666 | // Clamp to bounds |
||
3667 | XMVECTOR vResult = _mm_max_ps(V,MinHenD3); |
||
3668 | vResult = _mm_min_ps(vResult,MaxHenD3); |
||
3669 | // Scale by multiplication |
||
3670 | vResult = _mm_mul_ps(vResult,ScaleHenD3); |
||
3671 | // Convert to int |
||
3672 | __m128i vResulti = _mm_cvttps_epi32(vResult); |
||
3673 | // Mask off any fraction |
||
3674 | vResulti = _mm_and_si128(vResulti,g_XMMaskHenD3); |
||
3675 | // Do a horizontal or of all 4 entries |
||
3676 | vResult = _mm_shuffle_ps(reinterpret_cast<const __m128 *>(&vResulti)[0],reinterpret_cast<const __m128 *>(&vResulti)[0],_MM_SHUFFLE(0,3,2,1)); |
||
3677 | vResulti = _mm_or_si128(vResulti,reinterpret_cast<const __m128i *>(&vResult)[0]); |
||
3678 | vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,3,2,1)); |
||
3679 | vResulti = _mm_or_si128(vResulti,reinterpret_cast<const __m128i *>(&vResult)[0]); |
||
3680 | _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),reinterpret_cast<const __m128 *>(&vResulti)[0]); |
||
3681 | #else // _XM_VMX128_INTRINSICS_ |
||
3682 | #endif // _XM_VMX128_INTRINSICS_ |
||
3683 | } |
||
3684 | |||
3685 | //------------------------------------------------------------------------------ |
||
3686 | |||
3687 | XMFINLINE VOID XMStoreUDHenN3 |
||
3688 | ( |
||
3689 | XMUDHENN3* pDestination, |
||
3690 | FXMVECTOR V |
||
3691 | ) |
||
3692 | { |
||
3693 | #if defined(_XM_NO_INTRINSICS_) |
||
3694 | |||
3695 | XMVECTOR N; |
||
3696 | static CONST XMVECTORF32 Scale = {1023.0f, 2047.0f, 2047.0f, 0.0f}; |
||
3697 | |||
3698 | XMASSERT(pDestination); |
||
3699 | |||
3700 | N = XMVectorClamp(V, XMVectorZero(), g_XMOne.v); |
||
3701 | N = XMVectorMultiply(N, Scale.v); |
||
3702 | |||
3703 | pDestination->v = (((UINT)N.vector4_f32[2] & 0x7FF) << 21) | |
||
3704 | (((UINT)N.vector4_f32[1] & 0x7FF) << 10) | |
||
3705 | (((UINT)N.vector4_f32[0] & 0x3FF)); |
||
3706 | |||
3707 | #elif defined(_XM_SSE_INTRINSICS_) |
||
3708 | XMASSERT(pDestination); |
||
3709 | static const XMVECTORF32 ScaleUDHenN3 = {1023.0f,2047.0f*1024.0f,2047.0f*(1024.0f*2048.0f)/2.0f,1.0f}; |
||
3710 | static const XMVECTORI32 MaskUDHenN3 = {0x3FF,0x7FF<<10,0x7FF<<(21-1),0}; |
||
3711 | // Clamp to bounds |
||
3712 | XMVECTOR vResult = _mm_max_ps(V,g_XMZero); |
||
3713 | vResult = _mm_min_ps(vResult,g_XMOne); |
||
3714 | // Scale by multiplication |
||
3715 | vResult = _mm_mul_ps(vResult,ScaleUDHenN3); |
||
3716 | // Convert to int |
||
3717 | __m128i vResulti = _mm_cvttps_epi32(vResult); |
||
3718 | // Mask off any fraction |
||
3719 | vResulti = _mm_and_si128(vResulti,MaskUDHenN3); |
||
3720 | // Do a horizontal or of 3 entries |
||
3721 | __m128i vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(0,3,2,1)); |
||
3722 | // i = x|y |
||
3723 | vResulti = _mm_or_si128(vResulti,vResulti2); |
||
3724 | // Move Z to the x position |
||
3725 | vResulti2 = _mm_shuffle_epi32(vResulti2,_MM_SHUFFLE(0,3,2,1)); |
||
3726 | // Add Z to itself to perform a single bit left shift |
||
3727 | vResulti2 = _mm_add_epi32(vResulti2,vResulti2); |
||
3728 | // i = x|y|z |
||
3729 | vResulti = _mm_or_si128(vResulti,vResulti2); |
||
3730 | _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),reinterpret_cast<const __m128 *>(&vResulti)[0]); |
||
3731 | #else // _XM_VMX128_INTRINSICS_ |
||
3732 | #endif // _XM_VMX128_INTRINSICS_ |
||
3733 | } |
||
3734 | |||
3735 | //------------------------------------------------------------------------------ |
||
3736 | |||
3737 | XMFINLINE VOID XMStoreUDHen3 |
||
3738 | ( |
||
3739 | XMUDHEN3* pDestination, |
||
3740 | FXMVECTOR V |
||
3741 | ) |
||
3742 | { |
||
3743 | #if defined(_XM_NO_INTRINSICS_) |
||
3744 | |||
3745 | XMVECTOR N; |
||
3746 | static CONST XMVECTOR Max = {1023.0f, 2047.0f, 2047.0f, 0.0f}; |
||
3747 | |||
3748 | XMASSERT(pDestination); |
||
3749 | |||
3750 | N = XMVectorClamp(V, XMVectorZero(), Max); |
||
3751 | |||
3752 | pDestination->v = (((UINT)N.vector4_f32[2] & 0x7FF) << 21) | |
||
3753 | (((UINT)N.vector4_f32[1] & 0x7FF) << 10) | |
||
3754 | (((UINT)N.vector4_f32[0] & 0x3FF)); |
||
3755 | |||
3756 | #elif defined(_XM_SSE_INTRINSICS_) |
||
3757 | XMASSERT(pDestination); |
||
3758 | static const XMVECTORF32 MaxUDHen3 = { 1023.0f, 2047.0f, 2047.0f, 1.0f}; |
||
3759 | static const XMVECTORF32 ScaleUDHen3 = {1.0f, 1024.0f,(1024.0f*2048.0f)/2.0f,1.0f}; |
||
3760 | static const XMVECTORI32 MaskUDHen3 = {0x3FF,0x7FF<<10,0x7FF<<(21-1),0}; |
||
3761 | // Clamp to bounds |
||
3762 | XMVECTOR vResult = _mm_max_ps(V,g_XMZero); |
||
3763 | vResult = _mm_min_ps(vResult,MaxUDHen3); |
||
3764 | // Scale by multiplication |
||
3765 | vResult = _mm_mul_ps(vResult,ScaleUDHen3); |
||
3766 | // Convert to int |
||
3767 | __m128i vResulti = _mm_cvttps_epi32(vResult); |
||
3768 | // Mask off any fraction |
||
3769 | vResulti = _mm_and_si128(vResulti,MaskUDHen3); |
||
3770 | // Do a horizontal or of 3 entries |
||
3771 | __m128i vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(0,3,2,1)); |
||
3772 | // i = x|y |
||
3773 | vResulti = _mm_or_si128(vResulti,vResulti2); |
||
3774 | // Move Z to the x position |
||
3775 | vResulti2 = _mm_shuffle_epi32(vResulti2,_MM_SHUFFLE(0,3,2,1)); |
||
3776 | // Add Z to itself to perform a single bit left shift |
||
3777 | vResulti2 = _mm_add_epi32(vResulti2,vResulti2); |
||
3778 | // i = x|y|z |
||
3779 | vResulti = _mm_or_si128(vResulti,vResulti2); |
||
3780 | _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),reinterpret_cast<const __m128 *>(&vResulti)[0]); |
||
3781 | #else // _XM_VMX128_INTRINSICS_ |
||
3782 | #endif // _XM_VMX128_INTRINSICS_ |
||
3783 | } |
||
3784 | |||
3785 | //------------------------------------------------------------------------------ |
||
3786 | |||
3787 | XMFINLINE VOID XMStoreDHenN3 |
||
3788 | ( |
||
3789 | XMDHENN3* pDestination, |
||
3790 | FXMVECTOR V |
||
3791 | ) |
||
3792 | { |
||
3793 | #if defined(_XM_NO_INTRINSICS_) |
||
3794 | |||
3795 | XMVECTOR N; |
||
3796 | static CONST XMVECTORF32 Scale = {511.0f, 1023.0f, 1023.0f, 1.0f}; |
||
3797 | |||
3798 | XMASSERT(pDestination); |
||
3799 | |||
3800 | N = XMVectorClamp(V, g_XMNegativeOne.v, g_XMOne.v); |
||
3801 | N = XMVectorMultiply(N, Scale.v); |
||
3802 | |||
3803 | pDestination->v = (((INT)N.vector4_f32[2] & 0x7FF) << 21) | |
||
3804 | (((INT)N.vector4_f32[1] & 0x7FF) << 10) | |
||
3805 | (((INT)N.vector4_f32[0] & 0x3FF)); |
||
3806 | |||
3807 | #elif defined(_XM_SSE_INTRINSICS_) |
||
3808 | XMASSERT(pDestination); |
||
3809 | static const XMVECTORF32 ScaleDHenN3 = {511.0f, 1023.0f*1024.0f,1023.0f*(1024.0f*2048.0f),1.0f}; |
||
3810 | // Clamp to bounds |
||
3811 | XMVECTOR vResult = _mm_max_ps(V,g_XMNegativeOne); |
||
3812 | vResult = _mm_min_ps(vResult,g_XMOne); |
||
3813 | // Scale by multiplication |
||
3814 | vResult = _mm_mul_ps(vResult,ScaleDHenN3); |
||
3815 | // Convert to int |
||
3816 | __m128i vResulti = _mm_cvttps_epi32(vResult); |
||
3817 | // Mask off any fraction |
||
3818 | vResulti = _mm_and_si128(vResulti,g_XMMaskDHen3); |
||
3819 | // Do a horizontal or of all 4 entries |
||
3820 | vResult = _mm_shuffle_ps(reinterpret_cast<const __m128 *>(&vResulti)[0],reinterpret_cast<const __m128 *>(&vResulti)[0],_MM_SHUFFLE(0,3,2,1)); |
||
3821 | vResulti = _mm_or_si128(vResulti,reinterpret_cast<const __m128i *>(&vResult)[0]); |
||
3822 | vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,3,2,1)); |
||
3823 | vResulti = _mm_or_si128(vResulti,reinterpret_cast<const __m128i *>(&vResult)[0]); |
||
3824 | _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),reinterpret_cast<const __m128 *>(&vResulti)[0]); |
||
3825 | #else // _XM_VMX128_INTRINSICS_ |
||
3826 | #endif // _XM_VMX128_INTRINSICS_ |
||
3827 | } |
||
3828 | |||
3829 | //------------------------------------------------------------------------------ |
||
3830 | |||
3831 | XMFINLINE VOID XMStoreDHen3 |
||
3832 | ( |
||
3833 | XMDHEN3* pDestination, |
||
3834 | FXMVECTOR V |
||
3835 | ) |
||
3836 | { |
||
3837 | #if defined(_XM_NO_INTRINSICS_) |
||
3838 | |||
3839 | XMVECTOR N; |
||
3840 | static CONST XMVECTOR Min = {-511.0f, -1023.0f, -1023.0f, -1.0f}; |
||
3841 | static CONST XMVECTOR Max = {511.0f, 1023.0f, 1023.0f, 1.0f}; |
||
3842 | |||
3843 | XMASSERT(pDestination); |
||
3844 | |||
3845 | N = XMVectorClamp(V, Min, Max); |
||
3846 | |||
3847 | pDestination->v = (((INT)N.vector4_f32[2] & 0x7FF) << 21) | |
||
3848 | (((INT)N.vector4_f32[1] & 0x7FF) << 10) | |
||
3849 | (((INT)N.vector4_f32[0] & 0x3FF)); |
||
3850 | |||
3851 | #elif defined(_XM_SSE_INTRINSICS_) |
||
3852 | XMASSERT(pDestination); |
||
3853 | static const XMVECTORF32 MinDHen3 = {-511.0f,-1023.0f,-1023.0f,-1.0f}; |
||
3854 | static const XMVECTORF32 MaxDHen3 = { 511.0f, 1023.0f, 1023.0f, 1.0f}; |
||
3855 | static const XMVECTORF32 ScaleDHen3 = {1.0f, 1024.0f,(1024.0f*2048.0f),1.0f}; |
||
3856 | // Clamp to bounds |
||
3857 | XMVECTOR vResult = _mm_max_ps(V,MinDHen3); |
||
3858 | vResult = _mm_min_ps(vResult,MaxDHen3); |
||
3859 | // Scale by multiplication |
||
3860 | vResult = _mm_mul_ps(vResult,ScaleDHen3); |
||
3861 | // Convert to int |
||
3862 | __m128i vResulti = _mm_cvttps_epi32(vResult); |
||
3863 | // Mask off any fraction |
||
3864 | vResulti = _mm_and_si128(vResulti,g_XMMaskDHen3); |
||
3865 | // Do a horizontal or of all 4 entries |
||
3866 | vResult = _mm_shuffle_ps(reinterpret_cast<const __m128 *>(&vResulti)[0],reinterpret_cast<const __m128 *>(&vResulti)[0],_MM_SHUFFLE(0,3,2,1)); |
||
3867 | vResulti = _mm_or_si128(vResulti,reinterpret_cast<const __m128i *>(&vResult)[0]); |
||
3868 | vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,3,2,1)); |
||
3869 | vResulti = _mm_or_si128(vResulti,reinterpret_cast<const __m128i *>(&vResult)[0]); |
||
3870 | _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),reinterpret_cast<const __m128 *>(&vResulti)[0]); |
||
3871 | #else // _XM_VMX128_INTRINSICS_ |
||
3872 | #endif // _XM_VMX128_INTRINSICS_ |
||
3873 | } |
||
3874 | |||
3875 | //------------------------------------------------------------------------------ |
||
3876 | |||
3877 | XMFINLINE VOID XMStoreU565 |
||
3878 | ( |
||
3879 | XMU565* pDestination, |
||
3880 | FXMVECTOR V |
||
3881 | ) |
||
3882 | { |
||
3883 | #if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) |
||
3884 | XMASSERT(pDestination); |
||
3885 | static CONST XMVECTORF32 Max = {31.0f, 63.0f, 31.0f, 0.0f}; |
||
3886 | // Bounds check |
||
3887 | XMVECTOR vResult = _mm_max_ps(V,g_XMZero); |
||
3888 | vResult = _mm_min_ps(vResult,Max); |
||
3889 | // Convert to int with rounding |
||
3890 | __m128i vInt = _mm_cvtps_epi32(vResult); |
||
3891 | // No SSE operations will write to 16-bit values, so we have to extract them manually |
||
3892 | USHORT x = static_cast<USHORT>(_mm_extract_epi16(vInt,0)); |
||
3893 | USHORT y = static_cast<USHORT>(_mm_extract_epi16(vInt,2)); |
||
3894 | USHORT z = static_cast<USHORT>(_mm_extract_epi16(vInt,4)); |
||
3895 | pDestination->v = ((z & 0x1F) << 11) | |
||
3896 | ((y & 0x3F) << 5) | |
||
3897 | ((x & 0x1F)); |
||
3898 | #else |
||
3899 | XMVECTOR N; |
||
3900 | static CONST XMVECTORF32 Max = {31.0f, 63.0f, 31.0f, 0.0f}; |
||
3901 | |||
3902 | XMASSERT(pDestination); |
||
3903 | |||
3904 | N = XMVectorClamp(V, XMVectorZero(), Max.v); |
||
3905 | N = XMVectorRound(N); |
||
3906 | |||
3907 | pDestination->v = (((USHORT)N.vector4_f32[2] & 0x1F) << 11) | |
||
3908 | (((USHORT)N.vector4_f32[1] & 0x3F) << 5) | |
||
3909 | (((USHORT)N.vector4_f32[0] & 0x1F)); |
||
3910 | #endif !_XM_SSE_INTRINSICS_ |
||
3911 | } |
||
3912 | |||
3913 | //------------------------------------------------------------------------------ |
||
3914 | |||
3915 | XMFINLINE VOID XMStoreFloat3PK |
||
3916 | ( |
||
3917 | XMFLOAT3PK* pDestination, |
||
3918 | FXMVECTOR V |
||
3919 | ) |
||
3920 | { |
||
3921 | UINT I, Sign, j; |
||
3922 | UINT IValue[3]; |
||
3923 | UINT Result[3]; |
||
3924 | |||
3925 | XMASSERT(pDestination); |
||
3926 | |||
3927 | XMStoreFloat3( (XMFLOAT3*)&IValue, V ); |
||
3928 | |||
3929 | // X & Y Channels (5-bit exponent, 6-bit mantissa) |
||
3930 | for(j=0; j < 2; ++j) |
||
3931 | { |
||
3932 | Sign = IValue[j] & 0x80000000; |
||
3933 | I = IValue[j] & 0x7FFFFFFF; |
||
3934 | |||
3935 | if ((I & 0x7F800000) == 0x7F800000) |
||
3936 | { |
||
3937 | // INF or NAN |
||
3938 | Result[j] = 0x7c0; |
||
3939 | if (( I & 0x7FFFFF ) != 0) |
||
3940 | { |
||
3941 | Result[j] = 0x7c0 | (((I>>17)|(I>11)|(I>>6)|(I))&0x3f); |
||
3942 | } |
||
3943 | else if ( Sign ) |
||
3944 | { |
||
3945 | // -INF is clamped to 0 since 3PK is positive only |
||
3946 | Result[j] = 0; |
||
3947 | } |
||
3948 | } |
||
3949 | else if ( Sign ) |
||
3950 | { |
||
3951 | // 3PK is positive only, so clamp to zero |
||
3952 | Result[j] = 0; |
||
3953 | } |
||
3954 | else if (I > 0x477E0000U) |
||
3955 | { |
||
3956 | // The number is too large to be represented as a float11, set to max |
||
3957 | Result[j] = 0x7BF; |
||
3958 | } |
||
3959 | else |
||
3960 | { |
||
3961 | if (I < 0x38800000U) |
||
3962 | { |
||
3963 | // The number is too small to be represented as a normalized float11 |
||
3964 | // Convert it to a denormalized value. |
||
3965 | UINT Shift = 113U - (I >> 23U); |
||
3966 | I = (0x800000U | (I & 0x7FFFFFU)) >> Shift; |
||
3967 | } |
||
3968 | else |
||
3969 | { |
||
3970 | // Rebias the exponent to represent the value as a normalized float11 |
||
3971 | I += 0xC8000000U; |
||
3972 | } |
||
3973 | |||
3974 | Result[j] = ((I + 0xFFFFU + ((I >> 17U) & 1U)) >> 17U)&0x7ffU; |
||
3975 | } |
||
3976 | } |
||
3977 | |||
3978 | // Z Channel (5-bit exponent, 5-bit mantissa) |
||
3979 | Sign = IValue[2] & 0x80000000; |
||
3980 | I = IValue[2] & 0x7FFFFFFF; |
||
3981 | |||
3982 | if ((I & 0x7F800000) == 0x7F800000) |
||
3983 | { |
||
3984 | // INF or NAN |
||
3985 | Result[2] = 0x3e0; |
||
3986 | if ( I & 0x7FFFFF ) |
||
3987 | { |
||
3988 | Result[2] = 0x3e0 | (((I>>18)|(I>13)|(I>>3)|(I))&0x1f); |
||
3989 | } |
||
3990 | else if ( Sign ) |
||
3991 | { |
||
3992 | // -INF is clamped to 0 since 3PK is positive only |
||
3993 | Result[2] = 0; |
||
3994 | } |
||
3995 | } |
||
3996 | else if ( Sign ) |
||
3997 | { |
||
3998 | // 3PK is positive only, so clamp to zero |
||
3999 | Result[2] = 0; |
||
4000 | } |
||
4001 | else if (I > 0x477C0000U) |
||
4002 | { |
||
4003 | // The number is too large to be represented as a float10, set to max |
||
4004 | Result[2] = 0x3df; |
||
4005 | } |
||
4006 | else |
||
4007 | { |
||
4008 | if (I < 0x38800000U) |
||
4009 | { |
||
4010 | // The number is too small to be represented as a normalized float10 |
||
4011 | // Convert it to a denormalized value. |
||
4012 | UINT Shift = 113U - (I >> 23U); |
||
4013 | I = (0x800000U | (I & 0x7FFFFFU)) >> Shift; |
||
4014 | } |
||
4015 | else |
||
4016 | { |
||
4017 | // Rebias the exponent to represent the value as a normalized float10 |
||
4018 | I += 0xC8000000U; |
||
4019 | } |
||
4020 | |||
4021 | Result[2] = ((I + 0x1FFFFU + ((I >> 18U) & 1U)) >> 18U)&0x3ffU; |
||
4022 | } |
||
4023 | |||
4024 | // Pack Result into memory |
||
4025 | pDestination->v = (Result[0] & 0x7ff) |
||
4026 | | ( (Result[1] & 0x7ff) << 11 ) |
||
4027 | | ( (Result[2] & 0x3ff) << 22 ); |
||
4028 | } |
||
4029 | |||
4030 | |||
4031 | //------------------------------------------------------------------------------ |
||
4032 | |||
4033 | XMFINLINE VOID XMStoreFloat3SE |
||
4034 | ( |
||
4035 | XMFLOAT3SE* pDestination, |
||
4036 | FXMVECTOR V |
||
4037 | ) |
||
4038 | { |
||
4039 | UINT I, Sign, j, T; |
||
4040 | UINT IValue[3]; |
||
4041 | UINT Frac[3]; |
||
4042 | UINT Exp[3]; |
||
4043 | |||
4044 | XMASSERT(pDestination); |
||
4045 | |||
4046 | XMStoreFloat3( (XMFLOAT3*)&IValue, V ); |
||
4047 | |||
4048 | // X, Y, Z Channels (5-bit exponent, 9-bit mantissa) |
||
4049 | for(j=0; j < 3; ++j) |
||
4050 | { |
||
4051 | Sign = IValue[j] & 0x80000000; |
||
4052 | I = IValue[j] & 0x7FFFFFFF; |
||
4053 | |||
4054 | if ((I & 0x7F800000) == 0x7F800000) |
||
4055 | { |
||
4056 | // INF or NAN |
||
4057 | Exp[j] = 0x1f; |
||
4058 | if (( I & 0x7FFFFF ) != 0) |
||
4059 | { |
||
4060 | Frac[j] = ((I>>14)|(I>5)|(I))&0x1ff; |
||
4061 | } |
||
4062 | else if ( Sign ) |
||
4063 | { |
||
4064 | // -INF is clamped to 0 since 3SE is positive only |
||
4065 | Exp[j] = Frac[j] = 0; |
||
4066 | } |
||
4067 | } |
||
4068 | else if ( Sign ) |
||
4069 | { |
||
4070 | // 3SE is positive only, so clamp to zero |
||
4071 | Exp[j] = Frac[j] = 0; |
||
4072 | } |
||
4073 | else if (I > 0x477FC000U) |
||
4074 | { |
||
4075 | // The number is too large, set to max |
||
4076 | Exp[j] = 0x1e; |
||
4077 | Frac[j] = 0x1ff; |
||
4078 | } |
||
4079 | else |
||
4080 | { |
||
4081 | if (I < 0x38800000U) |
||
4082 | { |
||
4083 | // The number is too small to be represented as a normalized float11 |
||
4084 | // Convert it to a denormalized value. |
||
4085 | UINT Shift = 113U - (I >> 23U); |
||
4086 | I = (0x800000U | (I & 0x7FFFFFU)) >> Shift; |
||
4087 | } |
||
4088 | else |
||
4089 | { |
||
4090 | // Rebias the exponent to represent the value as a normalized float11 |
||
4091 | I += 0xC8000000U; |
||
4092 | } |
||
4093 | |||
4094 | T = ((I + 0x1FFFU + ((I >> 14U) & 1U)) >> 14U)&0x3fffU; |
||
4095 | |||
4096 | Exp[j] = (T & 0x3E00) >> 9; |
||
4097 | Frac[j] = T & 0x1ff; |
||
4098 | } |
||
4099 | } |
||
4100 | |||
4101 | // Adjust to a shared exponent |
||
4102 | T = XMMax( Exp[0], XMMax( Exp[1], Exp[2] ) ); |
||
4103 | |||
4104 | Frac[0] = Frac[0] >> (T - Exp[0]); |
||
4105 | Frac[1] = Frac[1] >> (T - Exp[1]); |
||
4106 | Frac[2] = Frac[2] >> (T - Exp[2]); |
||
4107 | |||
4108 | // Store packed into memory |
||
4109 | pDestination->xm = Frac[0]; |
||
4110 | pDestination->ym = Frac[1]; |
||
4111 | pDestination->zm = Frac[2]; |
||
4112 | pDestination->e = T; |
||
4113 | } |
||
4114 | |||
4115 | //------------------------------------------------------------------------------ |
||
4116 | |||
4117 | XMFINLINE VOID XMStoreInt4 |
||
4118 | ( |
||
4119 | UINT* pDestination, |
||
4120 | FXMVECTOR V |
||
4121 | ) |
||
4122 | { |
||
4123 | #if defined(_XM_NO_INTRINSICS_) |
||
4124 | |||
4125 | XMASSERT(pDestination); |
||
4126 | |||
4127 | pDestination[0] = V.vector4_u32[0]; |
||
4128 | pDestination[1] = V.vector4_u32[1]; |
||
4129 | pDestination[2] = V.vector4_u32[2]; |
||
4130 | pDestination[3] = V.vector4_u32[3]; |
||
4131 | |||
4132 | #elif defined(_XM_SSE_INTRINSICS_) |
||
4133 | XMASSERT(pDestination); |
||
4134 | |||
4135 | _mm_storeu_si128( (__m128i*)pDestination, reinterpret_cast<const __m128i *>(&V)[0] ); |
||
4136 | |||
4137 | #else // _XM_VMX128_INTRINSICS_ |
||
4138 | #endif // _XM_VMX128_INTRINSICS_ |
||
4139 | } |
||
4140 | |||
4141 | //------------------------------------------------------------------------------ |
||
4142 | |||
4143 | XMFINLINE VOID XMStoreInt4A |
||
4144 | ( |
||
4145 | UINT* pDestination, |
||
4146 | FXMVECTOR V |
||
4147 | ) |
||
4148 | { |
||
4149 | #if defined(_XM_NO_INTRINSICS_) |
||
4150 | |||
4151 | XMASSERT(pDestination); |
||
4152 | XMASSERT(((UINT_PTR)pDestination & 0xF) == 0); |
||
4153 | |||
4154 | pDestination[0] = V.vector4_u32[0]; |
||
4155 | pDestination[1] = V.vector4_u32[1]; |
||
4156 | pDestination[2] = V.vector4_u32[2]; |
||
4157 | pDestination[3] = V.vector4_u32[3]; |
||
4158 | |||
4159 | #elif defined(_XM_SSE_INTRINSICS_) |
||
4160 | XMASSERT(pDestination); |
||
4161 | |||
4162 | _mm_store_si128( (__m128i*)pDestination, reinterpret_cast<const __m128i *>(&V)[0] ); |
||
4163 | |||
4164 | #else // _XM_VMX128_INTRINSICS_ |
||
4165 | #endif // _XM_VMX128_INTRINSICS_ |
||
4166 | } |
||
4167 | |||
4168 | //------------------------------------------------------------------------------ |
||
4169 | |||
4170 | XMFINLINE VOID XMStoreInt4NC |
||
4171 | ( |
||
4172 | UINT* pDestination, |
||
4173 | FXMVECTOR V |
||
4174 | ) |
||
4175 | { |
||
4176 | #if defined(_XM_NO_INTRINSICS_) |
||
4177 | |||
4178 | XMASSERT(pDestination); |
||
4179 | |||
4180 | pDestination[0] = V.vector4_u32[0]; |
||
4181 | pDestination[1] = V.vector4_u32[1]; |
||
4182 | pDestination[2] = V.vector4_u32[2]; |
||
4183 | pDestination[3] = V.vector4_u32[3]; |
||
4184 | |||
4185 | #elif defined(_XM_SSE_INTRINSICS_) |
||
4186 | XMASSERT(pDestination); |
||
4187 | |||
4188 | _mm_storeu_si128( (__m128i*)pDestination, reinterpret_cast<const __m128i *>(&V)[0] ); |
||
4189 | |||
4190 | #else // _XM_VMX128_INTRINSICS_ |
||
4191 | #endif // _XM_VMX128_INTRINSICS_ |
||
4192 | } |
||
4193 | |||
4194 | //------------------------------------------------------------------------------ |
||
4195 | |||
4196 | XMFINLINE VOID XMStoreFloat4 |
||
4197 | ( |
||
4198 | XMFLOAT4* pDestination, |
||
4199 | FXMVECTOR V |
||
4200 | ) |
||
4201 | { |
||
4202 | #if defined(_XM_NO_INTRINSICS_) |
||
4203 | |||
4204 | XMASSERT(pDestination); |
||
4205 | |||
4206 | pDestination->x = V.vector4_f32[0]; |
||
4207 | pDestination->y = V.vector4_f32[1]; |
||
4208 | pDestination->z = V.vector4_f32[2]; |
||
4209 | pDestination->w = V.vector4_f32[3]; |
||
4210 | |||
4211 | #elif defined(_XM_SSE_INTRINSICS_) |
||
4212 | XMASSERT(pDestination); |
||
4213 | |||
4214 | _mm_storeu_ps( &pDestination->x, V ); |
||
4215 | |||
4216 | #else // _XM_VMX128_INTRINSICS_ |
||
4217 | #endif // _XM_VMX128_INTRINSICS_ |
||
4218 | } |
||
4219 | |||
4220 | //------------------------------------------------------------------------------ |
||
4221 | |||
4222 | XMFINLINE VOID XMStoreFloat4A |
||
4223 | ( |
||
4224 | XMFLOAT4A* pDestination, |
||
4225 | FXMVECTOR V |
||
4226 | ) |
||
4227 | { |
||
4228 | #if defined(_XM_NO_INTRINSICS_) |
||
4229 | |||
4230 | XMASSERT(pDestination); |
||
4231 | XMASSERT(((UINT_PTR)pDestination & 0xF) == 0); |
||
4232 | |||
4233 | pDestination->x = V.vector4_f32[0]; |
||
4234 | pDestination->y = V.vector4_f32[1]; |
||
4235 | pDestination->z = V.vector4_f32[2]; |
||
4236 | pDestination->w = V.vector4_f32[3]; |
||
4237 | |||
4238 | #elif defined(_XM_SSE_INTRINSICS_) |
||
4239 | XMASSERT(pDestination); |
||
4240 | XMASSERT(((UINT_PTR)pDestination & 0xF) == 0); |
||
4241 | |||
4242 | _mm_store_ps( &pDestination->x, V ); |
||
4243 | #else // _XM_VMX128_INTRINSICS_ |
||
4244 | #endif // _XM_VMX128_INTRINSICS_ |
||
4245 | } |
||
4246 | |||
4247 | //------------------------------------------------------------------------------ |
||
4248 | |||
4249 | XMFINLINE VOID XMStoreFloat4NC |
||
4250 | ( |
||
4251 | XMFLOAT4* pDestination, |
||
4252 | FXMVECTOR V |
||
4253 | ) |
||
4254 | { |
||
4255 | #if defined(_XM_NO_INTRINSICS_) |
||
4256 | |||
4257 | XMASSERT(pDestination); |
||
4258 | |||
4259 | pDestination->x = V.vector4_f32[0]; |
||
4260 | pDestination->y = V.vector4_f32[1]; |
||
4261 | pDestination->z = V.vector4_f32[2]; |
||
4262 | pDestination->w = V.vector4_f32[3]; |
||
4263 | |||
4264 | #elif defined(_XM_SSE_INTRINSICS_) |
||
4265 | XMASSERT(pDestination); |
||
4266 | |||
4267 | _mm_storeu_ps( &pDestination->x, V ); |
||
4268 | |||
4269 | #else // _XM_VMX128_INTRINSICS_ |
||
4270 | #endif // _XM_VMX128_INTRINSICS_ |
||
4271 | } |
||
4272 | |||
4273 | //------------------------------------------------------------------------------ |
||
4274 | |||
4275 | XMFINLINE VOID XMStoreHalf4 |
||
4276 | ( |
||
4277 | XMHALF4* pDestination, |
||
4278 | FXMVECTOR V |
||
4279 | ) |
||
4280 | { |
||
4281 | #if defined(_XM_NO_INTRINSICS_) |
||
4282 | |||
4283 | XMASSERT(pDestination); |
||
4284 | |||
4285 | pDestination->x = XMConvertFloatToHalf(V.vector4_f32[0]); |
||
4286 | pDestination->y = XMConvertFloatToHalf(V.vector4_f32[1]); |
||
4287 | pDestination->z = XMConvertFloatToHalf(V.vector4_f32[2]); |
||
4288 | pDestination->w = XMConvertFloatToHalf(V.vector4_f32[3]); |
||
4289 | |||
4290 | #elif defined(_XM_SSE_INTRINSICS_) |
||
4291 | XMASSERT(pDestination); |
||
4292 | pDestination->x = XMConvertFloatToHalf(XMVectorGetX(V)); |
||
4293 | pDestination->y = XMConvertFloatToHalf(XMVectorGetY(V)); |
||
4294 | pDestination->z = XMConvertFloatToHalf(XMVectorGetZ(V)); |
||
4295 | pDestination->w = XMConvertFloatToHalf(XMVectorGetW(V)); |
||
4296 | #else // _XM_VMX128_INTRINSICS_ |
||
4297 | #endif // _XM_VMX128_INTRINSICS_ |
||
4298 | } |
||
4299 | |||
4300 | //------------------------------------------------------------------------------ |
||
4301 | |||
4302 | XMFINLINE VOID XMStoreShortN4 |
||
4303 | ( |
||
4304 | XMSHORTN4* pDestination, |
||
4305 | FXMVECTOR V |
||
4306 | ) |
||
4307 | { |
||
4308 | #if defined(_XM_NO_INTRINSICS_) |
||
4309 | |||
4310 | XMVECTOR N; |
||
4311 | static CONST XMVECTORF32 Scale = {32767.0f, 32767.0f, 32767.0f, 32767.0f}; |
||
4312 | |||
4313 | XMASSERT(pDestination); |
||
4314 | |||
4315 | N = XMVectorClamp(V, g_XMNegativeOne.v, g_XMOne.v); |
||
4316 | N = XMVectorMultiply(N, Scale.v); |
||
4317 | N = XMVectorRound(N); |
||
4318 | |||
4319 | pDestination->x = (SHORT)N.vector4_f32[0]; |
||
4320 | pDestination->y = (SHORT)N.vector4_f32[1]; |
||
4321 | pDestination->z = (SHORT)N.vector4_f32[2]; |
||
4322 | pDestination->w = (SHORT)N.vector4_f32[3]; |
||
4323 | |||
4324 | #elif defined(_XM_SSE_INTRINSICS_) |
||
4325 | XMASSERT(pDestination); |
||
4326 | static CONST XMVECTORF32 Scale = {32767.0f, 32767.0f, 32767.0f, 32767.0f}; |
||
4327 | |||
4328 | XMVECTOR vResult = _mm_max_ps(V,g_XMNegativeOne); |
||
4329 | vResult = _mm_min_ps(vResult,g_XMOne); |
||
4330 | vResult = _mm_mul_ps(vResult,Scale); |
||
4331 | __m128i vResulti = _mm_cvtps_epi32(vResult); |
||
4332 | vResulti = _mm_packs_epi32(vResulti,vResulti); |
||
4333 | _mm_store_sd(reinterpret_cast<double *>(&pDestination->x),reinterpret_cast<const __m128d *>(&vResulti)[0]); |
||
4334 | #else // _XM_VMX128_INTRINSICS_ |
||
4335 | #endif // _XM_VMX128_INTRINSICS_ |
||
4336 | } |
||
4337 | |||
4338 | //------------------------------------------------------------------------------ |
||
4339 | |||
4340 | XMFINLINE VOID XMStoreShort4 |
||
4341 | ( |
||
4342 | XMSHORT4* pDestination, |
||
4343 | FXMVECTOR V |
||
4344 | ) |
||
4345 | { |
||
4346 | #if defined(_XM_NO_INTRINSICS_) |
||
4347 | |||
4348 | XMVECTOR N; |
||
4349 | static CONST XMVECTOR Min = {-32767.0f, -32767.0f, -32767.0f, -32767.0f}; |
||
4350 | static CONST XMVECTOR Max = {32767.0f, 32767.0f, 32767.0f, 32767.0f}; |
||
4351 | |||
4352 | XMASSERT(pDestination); |
||
4353 | |||
4354 | N = XMVectorClamp(V, Min, Max); |
||
4355 | N = XMVectorRound(N); |
||
4356 | |||
4357 | pDestination->x = (SHORT)N.vector4_f32[0]; |
||
4358 | pDestination->y = (SHORT)N.vector4_f32[1]; |
||
4359 | pDestination->z = (SHORT)N.vector4_f32[2]; |
||
4360 | pDestination->w = (SHORT)N.vector4_f32[3]; |
||
4361 | |||
4362 | #elif defined(_XM_SSE_INTRINSICS_) |
||
4363 | XMASSERT(pDestination); |
||
4364 | static CONST XMVECTORF32 Min = {-32767.0f, -32767.0f, -32767.0f, -32767.0f}; |
||
4365 | static CONST XMVECTORF32 Max = {32767.0f, 32767.0f, 32767.0f, 32767.0f}; |
||
4366 | // Bounds check |
||
4367 | XMVECTOR vResult = _mm_max_ps(V,Min); |
||
4368 | vResult = _mm_min_ps(vResult,Max); |
||
4369 | // Convert to int with rounding |
||
4370 | __m128i vInt = _mm_cvtps_epi32(vResult); |
||
4371 | // Pack the ints into shorts |
||
4372 | vInt = _mm_packs_epi32(vInt,vInt); |
||
4373 | _mm_store_sd(reinterpret_cast<double *>(&pDestination->x),reinterpret_cast<const __m128d *>(&vInt)[0]); |
||
4374 | #else // _XM_VMX128_INTRINSICS_ |
||
4375 | #endif // _XM_VMX128_INTRINSICS_ |
||
4376 | } |
||
4377 | |||
4378 | //------------------------------------------------------------------------------ |
||
4379 | |||
4380 | XMFINLINE VOID XMStoreUShortN4 |
||
4381 | ( |
||
4382 | XMUSHORTN4* pDestination, |
||
4383 | FXMVECTOR V |
||
4384 | ) |
||
4385 | { |
||
4386 | #if defined(_XM_NO_INTRINSICS_) |
||
4387 | |||
4388 | XMVECTOR N; |
||
4389 | static CONST XMVECTORF32 Scale = {65535.0f, 65535.0f, 65535.0f, 65535.0f}; |
||
4390 | |||
4391 | XMASSERT(pDestination); |
||
4392 | |||
4393 | N = XMVectorClamp(V, XMVectorZero(), g_XMOne.v); |
||
4394 | N = XMVectorMultiplyAdd(N, Scale.v, g_XMOneHalf.v); |
||
4395 | N = XMVectorTruncate(N); |
||
4396 | |||
4397 | pDestination->x = (SHORT)N.vector4_f32[0]; |
||
4398 | pDestination->y = (SHORT)N.vector4_f32[1]; |
||
4399 | pDestination->z = (SHORT)N.vector4_f32[2]; |
||
4400 | pDestination->w = (SHORT)N.vector4_f32[3]; |
||
4401 | |||
4402 | #elif defined(_XM_SSE_INTRINSICS_) |
||
4403 | XMASSERT(pDestination); |
||
4404 | static CONST XMVECTORF32 Scale = {65535.0f, 65535.0f, 65535.0f, 65535.0f}; |
||
4405 | // Bounds check |
||
4406 | XMVECTOR vResult = _mm_max_ps(V,g_XMZero); |
||
4407 | vResult = _mm_min_ps(vResult,g_XMOne); |
||
4408 | vResult = _mm_mul_ps(vResult,Scale); |
||
4409 | // Convert to int with rounding |
||
4410 | __m128i vInt = _mm_cvtps_epi32(vResult); |
||
4411 | // Since the SSE pack instruction clamps using signed rules, |
||
4412 | // manually extract the values to store them to memory |
||
4413 | pDestination->x = static_cast<SHORT>(_mm_extract_epi16(vInt,0)); |
||
4414 | pDestination->y = static_cast<SHORT>(_mm_extract_epi16(vInt,2)); |
||
4415 | pDestination->z = static_cast<SHORT>(_mm_extract_epi16(vInt,4)); |
||
4416 | pDestination->w = static_cast<SHORT>(_mm_extract_epi16(vInt,6)); |
||
4417 | #else // _XM_VMX128_INTRINSICS_ |
||
4418 | #endif // _XM_VMX128_INTRINSICS_ |
||
4419 | } |
||
4420 | |||
4421 | //------------------------------------------------------------------------------ |
||
4422 | |||
4423 | XMFINLINE VOID XMStoreUShort4 |
||
4424 | ( |
||
4425 | XMUSHORT4* pDestination, |
||
4426 | FXMVECTOR V |
||
4427 | ) |
||
4428 | { |
||
4429 | #if defined(_XM_NO_INTRINSICS_) |
||
4430 | |||
4431 | XMVECTOR N; |
||
4432 | static CONST XMVECTOR Max = {65535.0f, 65535.0f, 65535.0f, 65535.0f}; |
||
4433 | |||
4434 | XMASSERT(pDestination); |
||
4435 | |||
4436 | N = XMVectorClamp(V, XMVectorZero(), Max); |
||
4437 | N = XMVectorRound(N); |
||
4438 | |||
4439 | pDestination->x = (SHORT)N.vector4_f32[0]; |
||
4440 | pDestination->y = (SHORT)N.vector4_f32[1]; |
||
4441 | pDestination->z = (SHORT)N.vector4_f32[2]; |
||
4442 | pDestination->w = (SHORT)N.vector4_f32[3]; |
||
4443 | |||
4444 | #elif defined(_XM_SSE_INTRINSICS_) |
||
4445 | XMASSERT(pDestination); |
||
4446 | static CONST XMVECTORF32 Max = {65535.0f, 65535.0f, 65535.0f, 65535.0f}; |
||
4447 | // Bounds check |
||
4448 | XMVECTOR vResult = _mm_max_ps(V,g_XMZero); |
||
4449 | vResult = _mm_min_ps(vResult,Max); |
||
4450 | // Convert to int with rounding |
||
4451 | __m128i vInt = _mm_cvtps_epi32(vResult); |
||
4452 | // Since the SSE pack instruction clamps using signed rules, |
||
4453 | // manually extract the values to store them to memory |
||
4454 | pDestination->x = static_cast<SHORT>(_mm_extract_epi16(vInt,0)); |
||
4455 | pDestination->y = static_cast<SHORT>(_mm_extract_epi16(vInt,2)); |
||
4456 | pDestination->z = static_cast<SHORT>(_mm_extract_epi16(vInt,4)); |
||
4457 | pDestination->w = static_cast<SHORT>(_mm_extract_epi16(vInt,6)); |
||
4458 | #else // _XM_VMX128_INTRINSICS_ |
||
4459 | #endif // _XM_VMX128_INTRINSICS_ |
||
4460 | } |
||
4461 | |||
4462 | //------------------------------------------------------------------------------ |
||
4463 | |||
4464 | XMFINLINE VOID XMStoreXIcoN4 |
||
4465 | ( |
||
4466 | XMXICON4* pDestination, |
||
4467 | FXMVECTOR V |
||
4468 | ) |
||
4469 | { |
||
4470 | #if defined(_XM_NO_INTRINSICS_) |
||
4471 | |||
4472 | XMVECTOR N; |
||
4473 | static CONST XMVECTORF32 Min = {-1.0f, -1.0f, -1.0f, 0.0f}; |
||
4474 | static CONST XMVECTORF32 Scale = {524287.0f, 524287.0f, 524287.0f, 15.0f}; |
||
4475 | |||
4476 | XMASSERT(pDestination); |
||
4477 | |||
4478 | N = XMVectorClamp(V, Min.v, g_XMOne.v); |
||
4479 | N = XMVectorMultiply(N, Scale.v); |
||
4480 | N = XMVectorRound(N); |
||
4481 | |||
4482 | pDestination->v = ((UINT64)N.vector4_f32[3] << 60) | |
||
4483 | (((INT64)N.vector4_f32[2] & 0xFFFFF) << 40) | |
||
4484 | (((INT64)N.vector4_f32[1] & 0xFFFFF) << 20) | |
||
4485 | (((INT64)N.vector4_f32[0] & 0xFFFFF)); |
||
4486 | |||
4487 | #elif defined(_XM_SSE_INTRINSICS_) |
||
4488 | XMASSERT(pDestination); |
||
4489 | // Note: Masks are x,w,y and z |
||
4490 | static const XMVECTORF32 MinXIcoN4 = {-1.0f, 0.0f,-1.0f,-1.0f}; |
||
4491 | static const XMVECTORF32 ScaleXIcoN4 = {524287.0f,15.0f*4096.0f*65536.0f*0.5f,524287.0f*4096.0f,524287.0f}; |
||
4492 | static const XMVECTORI32 MaskXIcoN4 = {0xFFFFF,0xF<<((60-32)-1),0xFFFFF000,0xFFFFF}; |
||
4493 | |||
4494 | // Clamp to bounds |
||
4495 | XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,1,3,0)); |
||
4496 | vResult = _mm_max_ps(vResult,MinXIcoN4); |
||
4497 | vResult = _mm_min_ps(vResult,g_XMOne); |
||
4498 | // Scale by multiplication |
||
4499 | vResult = _mm_mul_ps(vResult,ScaleXIcoN4); |
||
4500 | // Convert to integer (w is unsigned) |
||
4501 | __m128i vResulti = _mm_cvttps_epi32(vResult); |
||
4502 | // Mask off unused bits |
||
4503 | vResulti = _mm_and_si128(vResulti,MaskXIcoN4); |
||
4504 | // Isolate Y |
||
4505 | __m128i vResulti2 = _mm_and_si128(vResulti,g_XMMaskY); |
||
4506 | // Double Y (Really W) to fixup for unsigned conversion |
||
4507 | vResulti = _mm_add_epi32(vResulti,vResulti2); |
||
4508 | // Shift y and z to straddle the 32-bit boundary |
||
4509 | vResulti2 = _mm_srli_si128(vResulti,(64+12)/8); |
||
4510 | // Shift it into place |
||
4511 | vResulti2 = _mm_slli_si128(vResulti2,20/8); |
||
4512 | // i = x|y<<20|z<<40|w<<60 |
||
4513 | vResulti = _mm_or_si128(vResulti,vResulti2); |
||
4514 | _mm_store_sd(reinterpret_cast<double *>(&pDestination->v),reinterpret_cast<const __m128d *>(&vResulti)[0]); |
||
4515 | #else // _XM_VMX128_INTRINSICS_ |
||
4516 | #endif // _XM_VMX128_INTRINSICS_ |
||
4517 | } |
||
4518 | |||
4519 | //------------------------------------------------------------------------------ |
||
4520 | |||
4521 | XMFINLINE VOID XMStoreXIco4 |
||
4522 | ( |
||
4523 | XMXICO4* pDestination, |
||
4524 | FXMVECTOR V |
||
4525 | ) |
||
4526 | { |
||
4527 | #if defined(_XM_NO_INTRINSICS_) |
||
4528 | |||
4529 | XMVECTOR N; |
||
4530 | static CONST XMVECTORF32 Min = {-524287.0f, -524287.0f, -524287.0f, 0.0f}; |
||
4531 | static CONST XMVECTORF32 Max = {524287.0f, 524287.0f, 524287.0f, 15.0f}; |
||
4532 | |||
4533 | XMASSERT(pDestination); |
||
4534 | N = XMVectorClamp(V, Min.v, Max.v); |
||
4535 | pDestination->v = ((UINT64)N.vector4_f32[3] << 60) | |
||
4536 | (((INT64)N.vector4_f32[2] & 0xFFFFF) << 40) | |
||
4537 | (((INT64)N.vector4_f32[1] & 0xFFFFF) << 20) | |
||
4538 | (((INT64)N.vector4_f32[0] & 0xFFFFF)); |
||
4539 | |||
4540 | #elif defined(_XM_SSE_INTRINSICS_) |
||
4541 | XMASSERT(pDestination); |
||
4542 | // Note: Masks are x,w,y and z |
||
4543 | static const XMVECTORF32 MinXIco4 = {-524287.0f, 0.0f,-524287.0f,-524287.0f}; |
||
4544 | static const XMVECTORF32 MaxXIco4 = { 524287.0f,15.0f, 524287.0f, 524287.0f}; |
||
4545 | static const XMVECTORF32 ScaleXIco4 = {1.0f,4096.0f*65536.0f*0.5f,4096.0f,1.0f}; |
||
4546 | static const XMVECTORI32 MaskXIco4 = {0xFFFFF,0xF<<((60-1)-32),0xFFFFF000,0xFFFFF}; |
||
4547 | // Clamp to bounds |
||
4548 | XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,1,3,0)); |
||
4549 | vResult = _mm_max_ps(vResult,MinXIco4); |
||
4550 | vResult = _mm_min_ps(vResult,MaxXIco4); |
||
4551 | // Scale by multiplication |
||
4552 | vResult = _mm_mul_ps(vResult,ScaleXIco4); |
||
4553 | // Convert to int |
||
4554 | __m128i vResulti = _mm_cvttps_epi32(vResult); |
||
4555 | // Mask off any fraction |
||
4556 | vResulti = _mm_and_si128(vResulti,MaskXIco4); |
||
4557 | // Isolate Y |
||
4558 | __m128i vResulti2 = _mm_and_si128(vResulti,g_XMMaskY); |
||
4559 | // Double Y (Really W) to fixup for unsigned conversion |
||
4560 | vResulti = _mm_add_epi32(vResulti,vResulti2); |
||
4561 | // Shift y and z to straddle the 32-bit boundary |
||
4562 | vResulti2 = _mm_srli_si128(vResulti,(64+12)/8); |
||
4563 | // Shift it into place |
||
4564 | vResulti2 = _mm_slli_si128(vResulti2,20/8); |
||
4565 | // i = x|y<<20|z<<40|w<<60 |
||
4566 | vResulti = _mm_or_si128(vResulti,vResulti2); |
||
4567 | _mm_store_sd(reinterpret_cast<double *>(&pDestination->v),reinterpret_cast<const __m128d *>(&vResulti)[0]); |
||
4568 | #else // _XM_VMX128_INTRINSICS_ |
||
4569 | #endif // _XM_VMX128_INTRINSICS_ |
||
4570 | } |
||
4571 | |||
4572 | //------------------------------------------------------------------------------ |
||
4573 | |||
4574 | XMFINLINE VOID XMStoreUIcoN4 |
||
4575 | ( |
||
4576 | XMUICON4* pDestination, |
||
4577 | FXMVECTOR V |
||
4578 | ) |
||
4579 | { |
||
4580 | #define XM_URange ((FLOAT)(1 << 20)) |
||
4581 | #define XM_URangeDiv2 ((FLOAT)(1 << 19)) |
||
4582 | #define XM_UMaxXYZ ((FLOAT)((1 << 20) - 1)) |
||
4583 | #define XM_UMaxW ((FLOAT)((1 << 4) - 1)) |
||
4584 | #define XM_ScaleXYZ (-(FLOAT)((1 << 20) - 1) / XM_PACK_FACTOR) |
||
4585 | #define XM_ScaleW (-(FLOAT)((1 << 4) - 1) / XM_PACK_FACTOR) |
||
4586 | #define XM_Scale (-1.0f / XM_PACK_FACTOR) |
||
4587 | #define XM_Offset (3.0f) |
||
4588 | |||
4589 | #if defined(_XM_NO_INTRINSICS_) |
||
4590 | |||
4591 | XMVECTOR N; |
||
4592 | static CONST XMVECTORF32 Scale = {1048575.0f, 1048575.0f, 1048575.0f, 15.0f}; |
||
4593 | |||
4594 | XMASSERT(pDestination); |
||
4595 | |||
4596 | N = XMVectorClamp(V, XMVectorZero(), g_XMOne.v); |
||
4597 | N = XMVectorMultiplyAdd(N, Scale.v, g_XMOneHalf.v); |
||
4598 | |||
4599 | pDestination->v = ((UINT64)N.vector4_f32[3] << 60) | |
||
4600 | (((UINT64)N.vector4_f32[2] & 0xFFFFF) << 40) | |
||
4601 | (((UINT64)N.vector4_f32[1] & 0xFFFFF) << 20) | |
||
4602 | (((UINT64)N.vector4_f32[0] & 0xFFFFF)); |
||
4603 | |||
4604 | #elif defined(_XM_SSE_INTRINSICS_) |
||
4605 | XMASSERT(pDestination); |
||
4606 | // Note: Masks are x,w,y and z |
||
4607 | static const XMVECTORF32 ScaleUIcoN4 = {1048575.0f,15.0f*4096.0f*65536.0f,1048575.0f*4096.0f,1048575.0f}; |
||
4608 | static const XMVECTORI32 MaskUIcoN4 = {0xFFFFF,0xF<<(60-32),0xFFFFF000,0xFFFFF}; |
||
4609 | static const XMVECTORF32 AddUIcoN4 = {0.0f,-32768.0f*65536.0f,-32768.0f*65536.0f,0.0f}; |
||
4610 | // Clamp to bounds |
||
4611 | XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,1,3,0)); |
||
4612 | vResult = _mm_max_ps(vResult,g_XMZero); |
||
4613 | vResult = _mm_min_ps(vResult,g_XMOne); |
||
4614 | // Scale by multiplication |
||
4615 | vResult = _mm_mul_ps(vResult,ScaleUIcoN4); |
||
4616 | // Adjust for unsigned entries |
||
4617 | vResult = _mm_add_ps(vResult,AddUIcoN4); |
||
4618 | // Convert to int |
||
4619 | __m128i vResulti = _mm_cvttps_epi32(vResult); |
||
4620 | // Fix the signs on the unsigned entries |
||
4621 | vResulti = _mm_xor_si128(vResulti,g_XMFlipYZ); |
||
4622 | // Mask off any fraction |
||
4623 | vResulti = _mm_and_si128(vResulti,MaskUIcoN4); |
||
4624 | // Shift y and z to straddle the 32-bit boundary |
||
4625 | __m128i vResulti2 = _mm_srli_si128(vResulti,(64+12)/8); |
||
4626 | // Shift it into place |
||
4627 | vResulti2 = _mm_slli_si128(vResulti2,20/8); |
||
4628 | // i = x|y<<20|z<<40|w<<60 |
||
4629 | vResulti = _mm_or_si128(vResulti,vResulti2); |
||
4630 | _mm_store_sd(reinterpret_cast<double *>(&pDestination->v),reinterpret_cast<const __m128d *>(&vResulti)[0]); |
||
4631 | #else // _XM_VMX128_INTRINSICS_ |
||
4632 | #endif // _XM_VMX128_INTRINSICS_ |
||
4633 | |||
4634 | #undef XM_URange |
||
4635 | #undef XM_URangeDiv2 |
||
4636 | #undef XM_UMaxXYZ |
||
4637 | #undef XM_UMaxW |
||
4638 | #undef XM_ScaleXYZ |
||
4639 | #undef XM_ScaleW |
||
4640 | #undef XM_Scale |
||
4641 | #undef XM_Offset |
||
4642 | } |
||
4643 | |||
4644 | //------------------------------------------------------------------------------ |
||
4645 | |||
4646 | XMFINLINE VOID XMStoreUIco4 |
||
4647 | ( |
||
4648 | XMUICO4* pDestination, |
||
4649 | FXMVECTOR V |
||
4650 | ) |
||
4651 | { |
||
4652 | #define XM_Scale (-1.0f / XM_PACK_FACTOR) |
||
4653 | #define XM_URange ((FLOAT)(1 << 20)) |
||
4654 | #define XM_URangeDiv2 ((FLOAT)(1 << 19)) |
||
4655 | |||
4656 | #if defined(_XM_NO_INTRINSICS_) |
||
4657 | |||
4658 | XMVECTOR N; |
||
4659 | static CONST XMVECTOR Max = {1048575.0f, 1048575.0f, 1048575.0f, 15.0f}; |
||
4660 | |||
4661 | XMASSERT(pDestination); |
||
4662 | |||
4663 | N = XMVectorClamp(V, XMVectorZero(), Max); |
||
4664 | N = XMVectorRound(N); |
||
4665 | |||
4666 | pDestination->v = ((UINT64)N.vector4_f32[3] << 60) | |
||
4667 | (((UINT64)N.vector4_f32[2] & 0xFFFFF) << 40) | |
||
4668 | (((UINT64)N.vector4_f32[1] & 0xFFFFF) << 20) | |
||
4669 | (((UINT64)N.vector4_f32[0] & 0xFFFFF)); |
||
4670 | |||
4671 | #elif defined(_XM_SSE_INTRINSICS_) |
||
4672 | XMASSERT(pDestination); |
||
4673 | // Note: Masks are x,w,y and z |
||
4674 | static const XMVECTORF32 MaxUIco4 = { 1048575.0f, 15.0f, 1048575.0f, 1048575.0f}; |
||
4675 | static const XMVECTORF32 ScaleUIco4 = {1.0f,4096.0f*65536.0f,4096.0f,1.0f}; |
||
4676 | static const XMVECTORI32 MaskUIco4 = {0xFFFFF,0xF<<(60-32),0xFFFFF000,0xFFFFF}; |
||
4677 | static const XMVECTORF32 AddUIco4 = {0.0f,-32768.0f*65536.0f,-32768.0f*65536.0f,0.0f}; |
||
4678 | // Clamp to bounds |
||
4679 | XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,1,3,0)); |
||
4680 | vResult = _mm_max_ps(vResult,g_XMZero); |
||
4681 | vResult = _mm_min_ps(vResult,MaxUIco4); |
||
4682 | // Scale by multiplication |
||
4683 | vResult = _mm_mul_ps(vResult,ScaleUIco4); |
||
4684 | vResult = _mm_add_ps(vResult,AddUIco4); |
||
4685 | // Convert to int |
||
4686 | __m128i vResulti = _mm_cvttps_epi32(vResult); |
||
4687 | vResulti = _mm_xor_si128(vResulti,g_XMFlipYZ); |
||
4688 | // Mask off any fraction |
||
4689 | vResulti = _mm_and_si128(vResulti,MaskUIco4); |
||
4690 | // Shift y and z to straddle the 32-bit boundary |
||
4691 | __m128i vResulti2 = _mm_srli_si128(vResulti,(64+12)/8); |
||
4692 | // Shift it into place |
||
4693 | vResulti2 = _mm_slli_si128(vResulti2,20/8); |
||
4694 | // i = x|y<<20|z<<40|w<<60 |
||
4695 | vResulti = _mm_or_si128(vResulti,vResulti2); |
||
4696 | _mm_store_sd(reinterpret_cast<double *>(&pDestination->v),reinterpret_cast<const __m128d *>(&vResulti)[0]); |
||
4697 | #else // _XM_VMX128_INTRINSICS_ |
||
4698 | #endif // _XM_VMX128_INTRINSICS_ |
||
4699 | |||
4700 | #undef XM_Scale |
||
4701 | #undef XM_URange |
||
4702 | #undef XM_URangeDiv2 |
||
4703 | } |
||
4704 | |||
4705 | //------------------------------------------------------------------------------ |
||
4706 | |||
4707 | XMFINLINE VOID XMStoreIcoN4 |
||
4708 | ( |
||
4709 | XMICON4* pDestination, |
||
4710 | FXMVECTOR V |
||
4711 | ) |
||
4712 | { |
||
4713 | #define XM_Scale (-1.0f / XM_PACK_FACTOR) |
||
4714 | #define XM_URange ((FLOAT)(1 << 4)) |
||
4715 | #define XM_Offset (3.0f) |
||
4716 | #define XM_UMaxXYZ ((FLOAT)((1 << (20 - 1)) - 1)) |
||
4717 | #define XM_UMaxW ((FLOAT)((1 << (4 - 1)) - 1)) |
||
4718 | |||
4719 | #if defined(_XM_NO_INTRINSICS_) |
||
4720 | |||
4721 | XMVECTOR N; |
||
4722 | static CONST XMVECTORF32 Scale = {524287.0f, 524287.0f, 524287.0f, 7.0f}; |
||
4723 | |||
4724 | XMASSERT(pDestination); |
||
4725 | |||
4726 | N = XMVectorClamp(V, g_XMNegativeOne.v, g_XMOne.v); |
||
4727 | N = XMVectorMultiplyAdd(N, Scale.v, g_XMNegativeZero.v); |
||
4728 | N = XMVectorRound(N); |
||
4729 | |||
4730 | pDestination->v = ((UINT64)N.vector4_f32[3] << 60) | |
||
4731 | (((UINT64)N.vector4_f32[2] & 0xFFFFF) << 40) | |
||
4732 | (((UINT64)N.vector4_f32[1] & 0xFFFFF) << 20) | |
||
4733 | (((UINT64)N.vector4_f32[0] & 0xFFFFF)); |
||
4734 | |||
4735 | #elif defined(_XM_SSE_INTRINSICS_) |
||
4736 | XMASSERT(pDestination); |
||
4737 | // Note: Masks are x,w,y and z |
||
4738 | static const XMVECTORF32 ScaleIcoN4 = {524287.0f,7.0f*4096.0f*65536.0f,524287.0f*4096.0f,524287.0f}; |
||
4739 | static const XMVECTORI32 MaskIcoN4 = {0xFFFFF,0xF<<(60-32),0xFFFFF000,0xFFFFF}; |
||
4740 | // Clamp to bounds |
||
4741 | XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,1,3,0)); |
||
4742 | vResult = _mm_max_ps(vResult,g_XMNegativeOne); |
||
4743 | vResult = _mm_min_ps(vResult,g_XMOne); |
||
4744 | // Scale by multiplication |
||
4745 | vResult = _mm_mul_ps(vResult,ScaleIcoN4); |
||
4746 | // Convert to int |
||
4747 | __m128i vResulti = _mm_cvttps_epi32(vResult); |
||
4748 | // Mask off any fraction |
||
4749 | vResulti = _mm_and_si128(vResulti,MaskIcoN4); |
||
4750 | // Shift y and z to straddle the 32-bit boundary |
||
4751 | __m128i vResulti2 = _mm_srli_si128(vResulti,(64+12)/8); |
||
4752 | // Shift it into place |
||
4753 | vResulti2 = _mm_slli_si128(vResulti2,20/8); |
||
4754 | // i = x|y<<20|z<<40|w<<60 |
||
4755 | vResulti = _mm_or_si128(vResulti,vResulti2); |
||
4756 | _mm_store_sd(reinterpret_cast<double *>(&pDestination->v),reinterpret_cast<const __m128d *>(&vResulti)[0]); |
||
4757 | #else // _XM_VMX128_INTRINSICS_ |
||
4758 | #endif // _XM_VMX128_INTRINSICS_ |
||
4759 | |||
4760 | #undef XM_Scale |
||
4761 | #undef XM_URange |
||
4762 | #undef XM_Offset |
||
4763 | #undef XM_UMaxXYZ |
||
4764 | #undef XM_UMaxW |
||
4765 | } |
||
4766 | |||
4767 | //------------------------------------------------------------------------------ |
||
4768 | |||
4769 | XMFINLINE VOID XMStoreIco4 |
||
4770 | ( |
||
4771 | XMICO4* pDestination, |
||
4772 | FXMVECTOR V |
||
4773 | ) |
||
4774 | { |
||
4775 | #define XM_Scale (-1.0f / XM_PACK_FACTOR) |
||
4776 | #define XM_URange ((FLOAT)(1 << 4)) |
||
4777 | #define XM_Offset (3.0f) |
||
4778 | |||
4779 | #if defined(_XM_NO_INTRINSICS_) |
||
4780 | |||
4781 | XMVECTOR N; |
||
4782 | static CONST XMVECTOR Min = {-524287.0f, -524287.0f, -524287.0f, -7.0f}; |
||
4783 | static CONST XMVECTOR Max = {524287.0f, 524287.0f, 524287.0f, 7.0f}; |
||
4784 | |||
4785 | XMASSERT(pDestination); |
||
4786 | |||
4787 | N = XMVectorClamp(V, Min, Max); |
||
4788 | N = XMVectorRound(N); |
||
4789 | |||
4790 | pDestination->v = ((INT64)N.vector4_f32[3] << 60) | |
||
4791 | (((INT64)N.vector4_f32[2] & 0xFFFFF) << 40) | |
||
4792 | (((INT64)N.vector4_f32[1] & 0xFFFFF) << 20) | |
||
4793 | (((INT64)N.vector4_f32[0] & 0xFFFFF)); |
||
4794 | |||
4795 | #elif defined(_XM_SSE_INTRINSICS_) |
||
4796 | XMASSERT(pDestination); |
||
4797 | // Note: Masks are x,w,y and z |
||
4798 | static const XMVECTORF32 MinIco4 = {-524287.0f,-7.0f,-524287.0f,-524287.0f}; |
||
4799 | static const XMVECTORF32 MaxIco4 = { 524287.0f, 7.0f, 524287.0f, 524287.0f}; |
||
4800 | static const XMVECTORF32 ScaleIco4 = {1.0f,4096.0f*65536.0f,4096.0f,1.0f}; |
||
4801 | static const XMVECTORI32 MaskIco4 = {0xFFFFF,0xF<<(60-32),0xFFFFF000,0xFFFFF}; |
||
4802 | // Clamp to bounds |
||
4803 | XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,1,3,0)); |
||
4804 | vResult = _mm_max_ps(vResult,MinIco4); |
||
4805 | vResult = _mm_min_ps(vResult,MaxIco4); |
||
4806 | // Scale by multiplication |
||
4807 | vResult = _mm_mul_ps(vResult,ScaleIco4); |
||
4808 | // Convert to int |
||
4809 | __m128i vResulti = _mm_cvttps_epi32(vResult); |
||
4810 | // Mask off any fraction |
||
4811 | vResulti = _mm_and_si128(vResulti,MaskIco4); |
||
4812 | // Shift y and z to straddle the 32-bit boundary |
||
4813 | __m128i vResulti2 = _mm_srli_si128(vResulti,(64+12)/8); |
||
4814 | // Shift it into place |
||
4815 | vResulti2 = _mm_slli_si128(vResulti2,20/8); |
||
4816 | // i = x|y<<20|z<<40|w<<60 |
||
4817 | vResulti = _mm_or_si128(vResulti,vResulti2); |
||
4818 | _mm_store_sd(reinterpret_cast<double *>(&pDestination->v),reinterpret_cast<const __m128d *>(&vResulti)[0]); |
||
4819 | #else // _XM_VMX128_INTRINSICS_ |
||
4820 | #endif // _XM_VMX128_INTRINSICS_ |
||
4821 | |||
4822 | #undef XM_Scale |
||
4823 | #undef XM_URange |
||
4824 | #undef XM_Offset |
||
4825 | } |
||
4826 | |||
4827 | //------------------------------------------------------------------------------ |
||
4828 | |||
4829 | XMFINLINE VOID XMStoreXDecN4 |
||
4830 | ( |
||
4831 | XMXDECN4* pDestination, |
||
4832 | FXMVECTOR V |
||
4833 | ) |
||
4834 | { |
||
4835 | #if defined(_XM_NO_INTRINSICS_) |
||
4836 | |||
4837 | XMVECTOR N; |
||
4838 | static CONST XMVECTORF32 Min = {-1.0f, -1.0f, -1.0f, 0.0f}; |
||
4839 | static CONST XMVECTORF32 Scale = {511.0f, 511.0f, 511.0f, 3.0f}; |
||
4840 | |||
4841 | XMASSERT(pDestination); |
||
4842 | |||
4843 | N = XMVectorClamp(V, Min.v, g_XMOne.v); |
||
4844 | N = XMVectorMultiply(N, Scale.v); |
||
4845 | N = XMVectorRound(N); |
||
4846 | |||
4847 | pDestination->v = ((UINT)N.vector4_f32[3] << 30) | |
||
4848 | (((INT)N.vector4_f32[2] & 0x3FF) << 20) | |
||
4849 | (((INT)N.vector4_f32[1] & 0x3FF) << 10) | |
||
4850 | (((INT)N.vector4_f32[0] & 0x3FF)); |
||
4851 | |||
4852 | #elif defined(_XM_SSE_INTRINSICS_) |
||
4853 | static const XMVECTORF32 Min = {-1.0f, -1.0f, -1.0f, 0.0f}; |
||
4854 | static const XMVECTORF32 Scale = {511.0f, 511.0f*1024.0f, 511.0f*1048576.0f,3.0f*536870912.0f}; |
||
4855 | static const XMVECTORI32 ScaleMask = {0x3FF,0x3FF<<10,0x3FF<<20,0x3<<29}; |
||
4856 | XMASSERT(pDestination); |
||
4857 | XMVECTOR vResult = _mm_max_ps(V,Min); |
||
4858 | vResult = _mm_min_ps(vResult,g_XMOne); |
||
4859 | // Scale by multiplication |
||
4860 | vResult = _mm_mul_ps(vResult,Scale); |
||
4861 | // Convert to int (W is unsigned) |
||
4862 | __m128i vResulti = _mm_cvtps_epi32(vResult); |
||
4863 | // Mask off any fraction |
||
4864 | vResulti = _mm_and_si128(vResulti,ScaleMask); |
||
4865 | // To fix W, add itself to shift it up to <<30 instead of <<29 |
||
4866 | __m128i vResultw = _mm_and_si128(vResulti,g_XMMaskW); |
||
4867 | vResulti = _mm_add_epi32(vResulti,vResultw); |
||
4868 | // Do a horizontal or of all 4 entries |
||
4869 | vResult = _mm_shuffle_ps(reinterpret_cast<const __m128 *>(&vResulti)[0],reinterpret_cast<const __m128 *>(&vResulti)[0],_MM_SHUFFLE(0,3,2,1)); |
||
4870 | vResulti = _mm_or_si128(vResulti,reinterpret_cast<const __m128i *>(&vResult)[0]); |
||
4871 | vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,3,2,1)); |
||
4872 | vResulti = _mm_or_si128(vResulti,reinterpret_cast<const __m128i *>(&vResult)[0]); |
||
4873 | vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,3,2,1)); |
||
4874 | vResulti = _mm_or_si128(vResulti,reinterpret_cast<const __m128i *>(&vResult)[0]); |
||
4875 | _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),reinterpret_cast<const __m128 *>(&vResulti)[0]); |
||
4876 | #else // _XM_VMX128_INTRINSICS_ |
||
4877 | #endif // _XM_VMX128_INTRINSICS_ |
||
4878 | } |
||
4879 | |||
4880 | //------------------------------------------------------------------------------ |
||
4881 | |||
4882 | XMFINLINE VOID XMStoreXDec4 |
||
4883 | ( |
||
4884 | XMXDEC4* pDestination, |
||
4885 | FXMVECTOR V |
||
4886 | ) |
||
4887 | { |
||
4888 | #if defined(_XM_NO_INTRINSICS_) |
||
4889 | |||
4890 | XMVECTOR N; |
||
4891 | static CONST XMVECTOR Min = {-511.0f, -511.0f, -511.0f, 0.0f}; |
||
4892 | static CONST XMVECTOR Max = {511.0f, 511.0f, 511.0f, 3.0f}; |
||
4893 | |||
4894 | XMASSERT(pDestination); |
||
4895 | |||
4896 | N = XMVectorClamp(V, Min, Max); |
||
4897 | |||
4898 | pDestination->v = ((UINT)N.vector4_f32[3] << 30) | |
||
4899 | (((INT)N.vector4_f32[2] & 0x3FF) << 20) | |
||
4900 | (((INT)N.vector4_f32[1] & 0x3FF) << 10) | |
||
4901 | (((INT)N.vector4_f32[0] & 0x3FF)); |
||
4902 | |||
4903 | #elif defined(_XM_SSE_INTRINSICS_) |
||
4904 | XMASSERT(pDestination); |
||
4905 | static const XMVECTORF32 MinXDec4 = {-511.0f,-511.0f,-511.0f, 0.0f}; |
||
4906 | static const XMVECTORF32 MaxXDec4 = { 511.0f, 511.0f, 511.0f, 3.0f}; |
||
4907 | static const XMVECTORF32 ScaleXDec4 = {1.0f,1024.0f/2.0f,1024.0f*1024.0f,1024.0f*1024.0f*1024.0f/2.0f}; |
||
4908 | static const XMVECTORI32 MaskXDec4= {0x3FF,0x3FF<<(10-1),0x3FF<<20,0x3<<(30-1)}; |
||
4909 | // Clamp to bounds |
||
4910 | XMVECTOR vResult = _mm_max_ps(V,MinXDec4); |
||
4911 | vResult = _mm_min_ps(vResult,MaxXDec4); |
||
4912 | // Scale by multiplication |
||
4913 | vResult = _mm_mul_ps(vResult,ScaleXDec4); |
||
4914 | // Convert to int |
||
4915 | __m128i vResulti = _mm_cvttps_epi32(vResult); |
||
4916 | // Mask off any fraction |
||
4917 | vResulti = _mm_and_si128(vResulti,MaskXDec4); |
||
4918 | // Do a horizontal or of 4 entries |
||
4919 | __m128i vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(3,2,3,2)); |
||
4920 | // x = x|z, y = y|w |
||
4921 | vResulti = _mm_or_si128(vResulti,vResulti2); |
||
4922 | // Move Z to the x position |
||
4923 | vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(1,1,1,1)); |
||
4924 | // Perform a single bit left shift on y|w |
||
4925 | vResulti2 = _mm_add_epi32(vResulti2,vResulti2); |
||
4926 | // i = x|y|z|w |
||
4927 | vResulti = _mm_or_si128(vResulti,vResulti2); |
||
4928 | _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),reinterpret_cast<const __m128 *>(&vResulti)[0]); |
||
4929 | #else // _XM_VMX128_INTRINSICS_ |
||
4930 | #endif // _XM_VMX128_INTRINSICS_ |
||
4931 | } |
||
4932 | |||
4933 | //------------------------------------------------------------------------------ |
||
4934 | |||
4935 | XMFINLINE VOID XMStoreUDecN4 |
||
4936 | ( |
||
4937 | XMUDECN4* pDestination, |
||
4938 | FXMVECTOR V |
||
4939 | ) |
||
4940 | { |
||
4941 | #if defined(_XM_NO_INTRINSICS_) |
||
4942 | |||
4943 | XMVECTOR N; |
||
4944 | static CONST XMVECTORF32 Scale = {1023.0f, 1023.0f, 1023.0f, 3.0f}; |
||
4945 | |||
4946 | XMASSERT(pDestination); |
||
4947 | |||
4948 | N = XMVectorClamp(V, XMVectorZero(), g_XMOne.v); |
||
4949 | N = XMVectorMultiply(N, Scale.v); |
||
4950 | |||
4951 | pDestination->v = ((UINT)N.vector4_f32[3] << 30) | |
||
4952 | (((UINT)N.vector4_f32[2] & 0x3FF) << 20) | |
||
4953 | (((UINT)N.vector4_f32[1] & 0x3FF) << 10) | |
||
4954 | (((UINT)N.vector4_f32[0] & 0x3FF)); |
||
4955 | |||
4956 | #elif defined(_XM_SSE_INTRINSICS_) |
||
4957 | XMASSERT(pDestination); |
||
4958 | static const XMVECTORF32 ScaleUDecN4 = {1023.0f,1023.0f*1024.0f*0.5f,1023.0f*1024.0f*1024.0f,3.0f*1024.0f*1024.0f*1024.0f*0.5f}; |
||
4959 | static const XMVECTORI32 MaskUDecN4= {0x3FF,0x3FF<<(10-1),0x3FF<<20,0x3<<(30-1)}; |
||
4960 | // Clamp to bounds |
||
4961 | XMVECTOR vResult = _mm_max_ps(V,g_XMZero); |
||
4962 | vResult = _mm_min_ps(vResult,g_XMOne); |
||
4963 | // Scale by multiplication |
||
4964 | vResult = _mm_mul_ps(vResult,ScaleUDecN4); |
||
4965 | // Convert to int |
||
4966 | __m128i vResulti = _mm_cvttps_epi32(vResult); |
||
4967 | // Mask off any fraction |
||
4968 | vResulti = _mm_and_si128(vResulti,MaskUDecN4); |
||
4969 | // Do a horizontal or of 4 entries |
||
4970 | __m128i vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(3,2,3,2)); |
||
4971 | // x = x|z, y = y|w |
||
4972 | vResulti = _mm_or_si128(vResulti,vResulti2); |
||
4973 | // Move Z to the x position |
||
4974 | vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(1,1,1,1)); |
||
4975 | // Perform a left shift by one bit on y|w |
||
4976 | vResulti2 = _mm_add_epi32(vResulti2,vResulti2); |
||
4977 | // i = x|y|z|w |
||
4978 | vResulti = _mm_or_si128(vResulti,vResulti2); |
||
4979 | _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),reinterpret_cast<const __m128 *>(&vResulti)[0]); |
||
4980 | #else // _XM_VMX128_INTRINSICS_ |
||
4981 | #endif // _XM_VMX128_INTRINSICS_ |
||
4982 | } |
||
4983 | |||
4984 | //------------------------------------------------------------------------------ |
||
4985 | |||
4986 | XMFINLINE VOID XMStoreUDec4 |
||
4987 | ( |
||
4988 | XMUDEC4* pDestination, |
||
4989 | FXMVECTOR V |
||
4990 | ) |
||
4991 | { |
||
4992 | #if defined(_XM_NO_INTRINSICS_) |
||
4993 | |||
4994 | XMVECTOR N; |
||
4995 | static CONST XMVECTOR Max = {1023.0f, 1023.0f, 1023.0f, 3.0f}; |
||
4996 | |||
4997 | XMASSERT(pDestination); |
||
4998 | |||
4999 | N = XMVectorClamp(V, XMVectorZero(), Max); |
||
5000 | |||
5001 | pDestination->v = ((UINT)N.vector4_f32[3] << 30) | |
||
5002 | (((UINT)N.vector4_f32[2] & 0x3FF) << 20) | |
||
5003 | (((UINT)N.vector4_f32[1] & 0x3FF) << 10) | |
||
5004 | (((UINT)N.vector4_f32[0] & 0x3FF)); |
||
5005 | |||
5006 | #elif defined(_XM_SSE_INTRINSICS_) |
||
5007 | XMASSERT(pDestination); |
||
5008 | static const XMVECTORF32 MaxUDec4 = { 1023.0f, 1023.0f, 1023.0f, 3.0f}; |
||
5009 | static const XMVECTORF32 ScaleUDec4 = {1.0f,1024.0f/2.0f,1024.0f*1024.0f,1024.0f*1024.0f*1024.0f/2.0f}; |
||
5010 | static const XMVECTORI32 MaskUDec4= {0x3FF,0x3FF<<(10-1),0x3FF<<20,0x3<<(30-1)}; |
||
5011 | // Clamp to bounds |
||
5012 | XMVECTOR vResult = _mm_max_ps(V,g_XMZero); |
||
5013 | vResult = _mm_min_ps(vResult,MaxUDec4); |
||
5014 | // Scale by multiplication |
||
5015 | vResult = _mm_mul_ps(vResult,ScaleUDec4); |
||
5016 | // Convert to int |
||
5017 | __m128i vResulti = _mm_cvttps_epi32(vResult); |
||
5018 | // Mask off any fraction |
||
5019 | vResulti = _mm_and_si128(vResulti,MaskUDec4); |
||
5020 | // Do a horizontal or of 4 entries |
||
5021 | __m128i vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(3,2,3,2)); |
||
5022 | // x = x|z, y = y|w |
||
5023 | vResulti = _mm_or_si128(vResulti,vResulti2); |
||
5024 | // Move Z to the x position |
||
5025 | vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(1,1,1,1)); |
||
5026 | // Perform a left shift by one bit on y|w |
||
5027 | vResulti2 = _mm_add_epi32(vResulti2,vResulti2); |
||
5028 | // i = x|y|z|w |
||
5029 | vResulti = _mm_or_si128(vResulti,vResulti2); |
||
5030 | _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),reinterpret_cast<const __m128 *>(&vResulti)[0]); |
||
5031 | #else // _XM_VMX128_INTRINSICS_ |
||
5032 | #endif // _XM_VMX128_INTRINSICS_ |
||
5033 | } |
||
5034 | |||
5035 | //------------------------------------------------------------------------------ |
||
5036 | |||
5037 | XMFINLINE VOID XMStoreDecN4 |
||
5038 | ( |
||
5039 | XMDECN4* pDestination, |
||
5040 | FXMVECTOR V |
||
5041 | ) |
||
5042 | { |
||
5043 | #if defined(_XM_NO_INTRINSICS_) |
||
5044 | |||
5045 | XMVECTOR N; |
||
5046 | static CONST XMVECTORF32 Scale = {511.0f, 511.0f, 511.0f, 1.0f}; |
||
5047 | |||
5048 | XMASSERT(pDestination); |
||
5049 | |||
5050 | N = XMVectorClamp(V, g_XMNegativeOne.v, g_XMOne.v); |
||
5051 | N = XMVectorMultiply(N, Scale.v); |
||
5052 | |||
5053 | pDestination->v = ((INT)N.vector4_f32[3] << 30) | |
||
5054 | (((INT)N.vector4_f32[2] & 0x3FF) << 20) | |
||
5055 | (((INT)N.vector4_f32[1] & 0x3FF) << 10) | |
||
5056 | (((INT)N.vector4_f32[0] & 0x3FF)); |
||
5057 | |||
5058 | #elif defined(_XM_SSE_INTRINSICS_) |
||
5059 | XMASSERT(pDestination); |
||
5060 | static const XMVECTORF32 ScaleDecN4 = {511.0f,511.0f*1024.0f,511.0f*1024.0f*1024.0f,1.0f*1024.0f*1024.0f*1024.0f}; |
||
5061 | static const XMVECTORI32 MaskDecN4= {0x3FF,0x3FF<<10,0x3FF<<20,0x3<<30}; |
||
5062 | // Clamp to bounds |
||
5063 | XMVECTOR vResult = _mm_max_ps(V,g_XMNegativeOne); |
||
5064 | vResult = _mm_min_ps(vResult,g_XMOne); |
||
5065 | // Scale by multiplication |
||
5066 | vResult = _mm_mul_ps(vResult,ScaleDecN4); |
||
5067 | // Convert to int |
||
5068 | __m128i vResulti = _mm_cvttps_epi32(vResult); |
||
5069 | // Mask off any fraction |
||
5070 | vResulti = _mm_and_si128(vResulti,MaskDecN4); |
||
5071 | // Do a horizontal or of 4 entries |
||
5072 | __m128i vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(3,2,3,2)); |
||
5073 | // x = x|z, y = y|w |
||
5074 | vResulti = _mm_or_si128(vResulti,vResulti2); |
||
5075 | // Move Z to the x position |
||
5076 | vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(1,1,1,1)); |
||
5077 | // i = x|y|z|w |
||
5078 | vResulti = _mm_or_si128(vResulti,vResulti2); |
||
5079 | _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),reinterpret_cast<const __m128 *>(&vResulti)[0]); |
||
5080 | #else // _XM_VMX128_INTRINSICS_ |
||
5081 | #endif // _XM_VMX128_INTRINSICS_ |
||
5082 | } |
||
5083 | |||
5084 | //------------------------------------------------------------------------------ |
||
5085 | |||
5086 | XMFINLINE VOID XMStoreDec4 |
||
5087 | ( |
||
5088 | XMDEC4* pDestination, |
||
5089 | FXMVECTOR V |
||
5090 | ) |
||
5091 | { |
||
5092 | #if defined(_XM_NO_INTRINSICS_) |
||
5093 | |||
5094 | XMVECTOR N; |
||
5095 | static CONST XMVECTOR Min = {-511.0f, -511.0f, -511.0f, -1.0f}; |
||
5096 | static CONST XMVECTOR Max = {511.0f, 511.0f, 511.0f, 1.0f}; |
||
5097 | |||
5098 | XMASSERT(pDestination); |
||
5099 | |||
5100 | N = XMVectorClamp(V, Min, Max); |
||
5101 | |||
5102 | pDestination->v = ((INT)N.vector4_f32[3] << 30) | |
||
5103 | (((INT)N.vector4_f32[2] & 0x3FF) << 20) | |
||
5104 | (((INT)N.vector4_f32[1] & 0x3FF) << 10) | |
||
5105 | (((INT)N.vector4_f32[0] & 0x3FF)); |
||
5106 | |||
5107 | #elif defined(_XM_SSE_INTRINSICS_) |
||
5108 | XMASSERT(pDestination); |
||
5109 | static const XMVECTORF32 MinDec4 = {-511.0f,-511.0f,-511.0f,-1.0f}; |
||
5110 | static const XMVECTORF32 MaxDec4 = { 511.0f, 511.0f, 511.0f, 1.0f}; |
||
5111 | static const XMVECTORF32 ScaleDec4 = {1.0f,1024.0f,1024.0f*1024.0f,1024.0f*1024.0f*1024.0f}; |
||
5112 | static const XMVECTORI32 MaskDec4= {0x3FF,0x3FF<<10,0x3FF<<20,0x3<<30}; |
||
5113 | // Clamp to bounds |
||
5114 | XMVECTOR vResult = _mm_max_ps(V,MinDec4); |
||
5115 | vResult = _mm_min_ps(vResult,MaxDec4); |
||
5116 | // Scale by multiplication |
||
5117 | vResult = _mm_mul_ps(vResult,ScaleDec4); |
||
5118 | // Convert to int |
||
5119 | __m128i vResulti = _mm_cvttps_epi32(vResult); |
||
5120 | // Mask off any fraction |
||
5121 | vResulti = _mm_and_si128(vResulti,MaskDec4); |
||
5122 | // Do a horizontal or of 4 entries |
||
5123 | __m128i vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(3,2,3,2)); |
||
5124 | // x = x|z, y = y|w |
||
5125 | vResulti = _mm_or_si128(vResulti,vResulti2); |
||
5126 | // Move Z to the x position |
||
5127 | vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(1,1,1,1)); |
||
5128 | // i = x|y|z|w |
||
5129 | vResulti = _mm_or_si128(vResulti,vResulti2); |
||
5130 | _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),reinterpret_cast<const __m128 *>(&vResulti)[0]); |
||
5131 | #else // _XM_VMX128_INTRINSICS_ |
||
5132 | #endif // _XM_VMX128_INTRINSICS_ |
||
5133 | } |
||
5134 | |||
5135 | //------------------------------------------------------------------------------ |
||
5136 | |||
5137 | XMFINLINE VOID XMStoreUByteN4 |
||
5138 | ( |
||
5139 | XMUBYTEN4* pDestination, |
||
5140 | FXMVECTOR V |
||
5141 | ) |
||
5142 | { |
||
5143 | #if defined(_XM_NO_INTRINSICS_) |
||
5144 | |||
5145 | XMVECTOR N; |
||
5146 | static CONST XMVECTORF32 Scale = {255.0f, 255.0f, 255.0f, 255.0f}; |
||
5147 | |||
5148 | XMASSERT(pDestination); |
||
5149 | |||
5150 | N = XMVectorSaturate(V); |
||
5151 | N = XMVectorMultiply(N, Scale.v); |
||
5152 | N = XMVectorRound(N); |
||
5153 | |||
5154 | pDestination->x = (BYTE)N.vector4_f32[0]; |
||
5155 | pDestination->y = (BYTE)N.vector4_f32[1]; |
||
5156 | pDestination->z = (BYTE)N.vector4_f32[2]; |
||
5157 | pDestination->w = (BYTE)N.vector4_f32[3]; |
||
5158 | |||
5159 | #elif defined(_XM_SSE_INTRINSICS_) |
||
5160 | XMASSERT(pDestination); |
||
5161 | static const XMVECTORF32 ScaleUByteN4 = {255.0f,255.0f*256.0f*0.5f,255.0f*256.0f*256.0f,255.0f*256.0f*256.0f*256.0f*0.5f}; |
||
5162 | static const XMVECTORI32 MaskUByteN4 = {0xFF,0xFF<<(8-1),0xFF<<16,0xFF<<(24-1)}; |
||
5163 | // Clamp to bounds |
||
5164 | XMVECTOR vResult = _mm_max_ps(V,g_XMZero); |
||
5165 | vResult = _mm_min_ps(vResult,g_XMOne); |
||
5166 | // Scale by multiplication |
||
5167 | vResult = _mm_mul_ps(vResult,ScaleUByteN4); |
||
5168 | // Convert to int |
||
5169 | __m128i vResulti = _mm_cvttps_epi32(vResult); |
||
5170 | // Mask off any fraction |
||
5171 | vResulti = _mm_and_si128(vResulti,MaskUByteN4); |
||
5172 | // Do a horizontal or of 4 entries |
||
5173 | __m128i vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(3,2,3,2)); |
||
5174 | // x = x|z, y = y|w |
||
5175 | vResulti = _mm_or_si128(vResulti,vResulti2); |
||
5176 | // Move Z to the x position |
||
5177 | vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(1,1,1,1)); |
||
5178 | // Perform a single bit left shift to fix y|w |
||
5179 | vResulti2 = _mm_add_epi32(vResulti2,vResulti2); |
||
5180 | // i = x|y|z|w |
||
5181 | vResulti = _mm_or_si128(vResulti,vResulti2); |
||
5182 | _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),reinterpret_cast<const __m128 *>(&vResulti)[0]); |
||
5183 | #else // _XM_VMX128_INTRINSICS_ |
||
5184 | #endif // _XM_VMX128_INTRINSICS_ |
||
5185 | } |
||
5186 | |||
5187 | //------------------------------------------------------------------------------ |
||
5188 | |||
5189 | XMFINLINE VOID XMStoreUByte4 |
||
5190 | ( |
||
5191 | XMUBYTE4* pDestination, |
||
5192 | FXMVECTOR V |
||
5193 | ) |
||
5194 | { |
||
5195 | #if defined(_XM_NO_INTRINSICS_) |
||
5196 | |||
5197 | XMVECTOR N; |
||
5198 | static CONST XMVECTOR Max = {255.0f, 255.0f, 255.0f, 255.0f}; |
||
5199 | |||
5200 | XMASSERT(pDestination); |
||
5201 | |||
5202 | N = XMVectorClamp(V, XMVectorZero(), Max); |
||
5203 | N = XMVectorRound(N); |
||
5204 | |||
5205 | pDestination->x = (BYTE)N.vector4_f32[0]; |
||
5206 | pDestination->y = (BYTE)N.vector4_f32[1]; |
||
5207 | pDestination->z = (BYTE)N.vector4_f32[2]; |
||
5208 | pDestination->w = (BYTE)N.vector4_f32[3]; |
||
5209 | |||
5210 | #elif defined(_XM_SSE_INTRINSICS_) |
||
5211 | XMASSERT(pDestination); |
||
5212 | static const XMVECTORF32 MaxUByte4 = { 255.0f, 255.0f, 255.0f, 255.0f}; |
||
5213 | static const XMVECTORF32 ScaleUByte4 = {1.0f,256.0f*0.5f,256.0f*256.0f,256.0f*256.0f*256.0f*0.5f}; |
||
5214 | static const XMVECTORI32 MaskUByte4 = {0xFF,0xFF<<(8-1),0xFF<<16,0xFF<<(24-1)}; |
||
5215 | // Clamp to bounds |
||
5216 | XMVECTOR vResult = _mm_max_ps(V,g_XMZero); |
||
5217 | vResult = _mm_min_ps(vResult,MaxUByte4); |
||
5218 | // Scale by multiplication |
||
5219 | vResult = _mm_mul_ps(vResult,ScaleUByte4); |
||
5220 | // Convert to int |
||
5221 | __m128i vResulti = _mm_cvttps_epi32(vResult); |
||
5222 | // Mask off any fraction |
||
5223 | vResulti = _mm_and_si128(vResulti,MaskUByte4); |
||
5224 | // Do a horizontal or of 4 entries |
||
5225 | __m128i vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(3,2,3,2)); |
||
5226 | // x = x|z, y = y|w |
||
5227 | vResulti = _mm_or_si128(vResulti,vResulti2); |
||
5228 | // Move Z to the x position |
||
5229 | vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(1,1,1,1)); |
||
5230 | // Perform a single bit left shift to fix y|w |
||
5231 | vResulti2 = _mm_add_epi32(vResulti2,vResulti2); |
||
5232 | // i = x|y|z|w |
||
5233 | vResulti = _mm_or_si128(vResulti,vResulti2); |
||
5234 | _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),reinterpret_cast<const __m128 *>(&vResulti)[0]); |
||
5235 | #else // _XM_VMX128_INTRINSICS_ |
||
5236 | #endif // _XM_VMX128_INTRINSICS_ |
||
5237 | } |
||
5238 | |||
5239 | //------------------------------------------------------------------------------ |
||
5240 | |||
5241 | XMFINLINE VOID XMStoreByteN4 |
||
5242 | ( |
||
5243 | XMBYTEN4* pDestination, |
||
5244 | FXMVECTOR V |
||
5245 | ) |
||
5246 | { |
||
5247 | #if defined(_XM_NO_INTRINSICS_) |
||
5248 | |||
5249 | XMVECTOR N; |
||
5250 | static CONST XMVECTORF32 Scale = {127.0f, 127.0f, 127.0f, 127.0f}; |
||
5251 | |||
5252 | XMASSERT(pDestination); |
||
5253 | |||
5254 | N = XMVectorMultiply(V, Scale.v); |
||
5255 | N = XMVectorRound(N); |
||
5256 | |||
5257 | pDestination->x = (CHAR)N.vector4_f32[0]; |
||
5258 | pDestination->y = (CHAR)N.vector4_f32[1]; |
||
5259 | pDestination->z = (CHAR)N.vector4_f32[2]; |
||
5260 | pDestination->w = (CHAR)N.vector4_f32[3]; |
||
5261 | |||
5262 | #elif defined(_XM_SSE_INTRINSICS_) |
||
5263 | XMASSERT(pDestination); |
||
5264 | static const XMVECTORF32 ScaleByteN4 = {127.0f,127.0f*256.0f,127.0f*256.0f*256.0f,127.0f*256.0f*256.0f*256.0f}; |
||
5265 | static const XMVECTORI32 MaskByteN4 = {0xFF,0xFF<<8,0xFF<<16,0xFF<<24}; |
||
5266 | // Clamp to bounds |
||
5267 | XMVECTOR vResult = _mm_max_ps(V,g_XMNegativeOne); |
||
5268 | vResult = _mm_min_ps(vResult,g_XMOne); |
||
5269 | // Scale by multiplication |
||
5270 | vResult = _mm_mul_ps(vResult,ScaleByteN4); |
||
5271 | // Convert to int |
||
5272 | __m128i vResulti = _mm_cvttps_epi32(vResult); |
||
5273 | // Mask off any fraction |
||
5274 | vResulti = _mm_and_si128(vResulti,MaskByteN4); |
||
5275 | // Do a horizontal or of 4 entries |
||
5276 | __m128i vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(3,2,3,2)); |
||
5277 | // x = x|z, y = y|w |
||
5278 | vResulti = _mm_or_si128(vResulti,vResulti2); |
||
5279 | // Move Z to the x position |
||
5280 | vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(1,1,1,1)); |
||
5281 | // i = x|y|z|w |
||
5282 | vResulti = _mm_or_si128(vResulti,vResulti2); |
||
5283 | _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),reinterpret_cast<const __m128 *>(&vResulti)[0]); |
||
5284 | #else // _XM_VMX128_INTRINSICS_ |
||
5285 | #endif // _XM_VMX128_INTRINSICS_ |
||
5286 | } |
||
5287 | |||
5288 | //------------------------------------------------------------------------------ |
||
5289 | |||
5290 | XMFINLINE VOID XMStoreByte4 |
||
5291 | ( |
||
5292 | XMBYTE4* pDestination, |
||
5293 | FXMVECTOR V |
||
5294 | ) |
||
5295 | { |
||
5296 | #if defined(_XM_NO_INTRINSICS_) |
||
5297 | |||
5298 | XMVECTOR N; |
||
5299 | static CONST XMVECTOR Min = {-127.0f, -127.0f, -127.0f, -127.0f}; |
||
5300 | static CONST XMVECTOR Max = {127.0f, 127.0f, 127.0f, 127.0f}; |
||
5301 | |||
5302 | XMASSERT(pDestination); |
||
5303 | |||
5304 | N = XMVectorClamp(V, Min, Max); |
||
5305 | N = XMVectorRound(N); |
||
5306 | |||
5307 | pDestination->x = (CHAR)N.vector4_f32[0]; |
||
5308 | pDestination->y = (CHAR)N.vector4_f32[1]; |
||
5309 | pDestination->z = (CHAR)N.vector4_f32[2]; |
||
5310 | pDestination->w = (CHAR)N.vector4_f32[3]; |
||
5311 | |||
5312 | #elif defined(_XM_SSE_INTRINSICS_) |
||
5313 | XMASSERT(pDestination); |
||
5314 | static const XMVECTORF32 MinByte4 = {-127.0f,-127.0f,-127.0f,-127.0f}; |
||
5315 | static const XMVECTORF32 MaxByte4 = { 127.0f, 127.0f, 127.0f, 127.0f}; |
||
5316 | static const XMVECTORF32 ScaleByte4 = {1.0f,256.0f,256.0f*256.0f,256.0f*256.0f*256.0f}; |
||
5317 | static const XMVECTORI32 MaskByte4 = {0xFF,0xFF<<8,0xFF<<16,0xFF<<24}; |
||
5318 | // Clamp to bounds |
||
5319 | XMVECTOR vResult = _mm_max_ps(V,MinByte4); |
||
5320 | vResult = _mm_min_ps(vResult,MaxByte4); |
||
5321 | // Scale by multiplication |
||
5322 | vResult = _mm_mul_ps(vResult,ScaleByte4); |
||
5323 | // Convert to int |
||
5324 | __m128i vResulti = _mm_cvttps_epi32(vResult); |
||
5325 | // Mask off any fraction |
||
5326 | vResulti = _mm_and_si128(vResulti,MaskByte4); |
||
5327 | // Do a horizontal or of 4 entries |
||
5328 | __m128i vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(3,2,3,2)); |
||
5329 | // x = x|z, y = y|w |
||
5330 | vResulti = _mm_or_si128(vResulti,vResulti2); |
||
5331 | // Move Z to the x position |
||
5332 | vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(1,1,1,1)); |
||
5333 | // i = x|y|z|w |
||
5334 | vResulti = _mm_or_si128(vResulti,vResulti2); |
||
5335 | _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),reinterpret_cast<const __m128 *>(&vResulti)[0]); |
||
5336 | #else // _XM_VMX128_INTRINSICS_ |
||
5337 | #endif // _XM_VMX128_INTRINSICS_ |
||
5338 | } |
||
5339 | |||
5340 | //------------------------------------------------------------------------------ |
||
5341 | |||
5342 | XMFINLINE VOID XMStoreUNibble4 |
||
5343 | ( |
||
5344 | XMUNIBBLE4* pDestination, |
||
5345 | FXMVECTOR V |
||
5346 | ) |
||
5347 | { |
||
5348 | #if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) |
||
5349 | XMASSERT(pDestination); |
||
5350 | static CONST XMVECTORF32 Max = {15.0f,15.0f,15.0f,15.0f}; |
||
5351 | // Bounds check |
||
5352 | XMVECTOR vResult = _mm_max_ps(V,g_XMZero); |
||
5353 | vResult = _mm_min_ps(vResult,Max); |
||
5354 | // Convert to int with rounding |
||
5355 | __m128i vInt = _mm_cvtps_epi32(vResult); |
||
5356 | // No SSE operations will write to 16-bit values, so we have to extract them manually |
||
5357 | USHORT x = static_cast<USHORT>(_mm_extract_epi16(vInt,0)); |
||
5358 | USHORT y = static_cast<USHORT>(_mm_extract_epi16(vInt,2)); |
||
5359 | USHORT z = static_cast<USHORT>(_mm_extract_epi16(vInt,4)); |
||
5360 | USHORT w = static_cast<USHORT>(_mm_extract_epi16(vInt,6)); |
||
5361 | pDestination->v = ((w & 0xF) << 12) | |
||
5362 | ((z & 0xF) << 8) | |
||
5363 | ((y & 0xF) << 4) | |
||
5364 | ((x & 0xF)); |
||
5365 | #else |
||
5366 | XMVECTOR N; |
||
5367 | static CONST XMVECTORF32 Max = {15.0f,15.0f,15.0f,15.0f}; |
||
5368 | |||
5369 | XMASSERT(pDestination); |
||
5370 | |||
5371 | N = XMVectorClamp(V, XMVectorZero(), Max.v); |
||
5372 | N = XMVectorRound(N); |
||
5373 | |||
5374 | pDestination->v = (((USHORT)N.vector4_f32[3] & 0xF) << 12) | |
||
5375 | (((USHORT)N.vector4_f32[2] & 0xF) << 8) | |
||
5376 | (((USHORT)N.vector4_f32[1] & 0xF) << 4) | |
||
5377 | (((USHORT)N.vector4_f32[0] & 0xF)); |
||
5378 | #endif !_XM_SSE_INTRINSICS_ |
||
5379 | } |
||
5380 | |||
5381 | //------------------------------------------------------------------------------ |
||
5382 | |||
5383 | XMFINLINE VOID XMStoreU555( |
||
5384 | XMU555* pDestination, |
||
5385 | FXMVECTOR V |
||
5386 | ) |
||
5387 | { |
||
5388 | #if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) |
||
5389 | XMASSERT(pDestination); |
||
5390 | static CONST XMVECTORF32 Max = {31.0f, 31.0f, 31.0f, 1.0f}; |
||
5391 | // Bounds check |
||
5392 | XMVECTOR vResult = _mm_max_ps(V,g_XMZero); |
||
5393 | vResult = _mm_min_ps(vResult,Max); |
||
5394 | // Convert to int with rounding |
||
5395 | __m128i vInt = _mm_cvtps_epi32(vResult); |
||
5396 | // No SSE operations will write to 16-bit values, so we have to extract them manually |
||
5397 | USHORT x = static_cast<USHORT>(_mm_extract_epi16(vInt,0)); |
||
5398 | USHORT y = static_cast<USHORT>(_mm_extract_epi16(vInt,2)); |
||
5399 | USHORT z = static_cast<USHORT>(_mm_extract_epi16(vInt,4)); |
||
5400 | USHORT w = static_cast<USHORT>(_mm_extract_epi16(vInt,6)); |
||
5401 | pDestination->v = ((w) ? 0x8000 : 0) | |
||
5402 | ((z & 0x1F) << 10) | |
||
5403 | ((y & 0x1F) << 5) | |
||
5404 | ((x & 0x1F)); |
||
5405 | #else |
||
5406 | XMVECTOR N; |
||
5407 | static CONST XMVECTORF32 Max = {31.0f, 31.0f, 31.0f, 1.0f}; |
||
5408 | |||
5409 | XMASSERT(pDestination); |
||
5410 | |||
5411 | N = XMVectorClamp(V, XMVectorZero(), Max.v); |
||
5412 | N = XMVectorRound(N); |
||
5413 | |||
5414 | pDestination->v = ((N.vector4_f32[3] > 0.f) ? 0x8000 : 0) | |
||
5415 | (((USHORT)N.vector4_f32[2] & 0x1F) << 10) | |
||
5416 | (((USHORT)N.vector4_f32[1] & 0x1F) << 5) | |
||
5417 | (((USHORT)N.vector4_f32[0] & 0x1F)); |
||
5418 | #endif !_XM_SSE_INTRINSICS_ |
||
5419 | } |
||
5420 | |||
5421 | //------------------------------------------------------------------------------ |
||
5422 | |||
5423 | XMFINLINE VOID XMStoreColor |
||
5424 | ( |
||
5425 | XMCOLOR* pDestination, |
||
5426 | FXMVECTOR V |
||
5427 | ) |
||
5428 | { |
||
5429 | #if defined(_XM_NO_INTRINSICS_) |
||
5430 | |||
5431 | XMVECTOR N; |
||
5432 | static CONST XMVECTORF32 Scale = {255.0f, 255.0f, 255.0f, 255.0f}; |
||
5433 | |||
5434 | XMASSERT(pDestination); |
||
5435 | |||
5436 | N = XMVectorSaturate(V); |
||
5437 | N = XMVectorMultiply(N, Scale.v); |
||
5438 | N = XMVectorRound(N); |
||
5439 | |||
5440 | pDestination->c = ((UINT)N.vector4_f32[3] << 24) | |
||
5441 | ((UINT)N.vector4_f32[0] << 16) | |
||
5442 | ((UINT)N.vector4_f32[1] << 8) | |
||
5443 | ((UINT)N.vector4_f32[2]); |
||
5444 | |||
5445 | #elif defined(_XM_SSE_INTRINSICS_) |
||
5446 | XMASSERT(pDestination); |
||
5447 | static CONST XMVECTORF32 Scale = {255.0f,255.0f,255.0f,255.0f}; |
||
5448 | // Set <0 to 0 |
||
5449 | XMVECTOR vResult = _mm_max_ps(V,g_XMZero); |
||
5450 | // Set>1 to 1 |
||
5451 | vResult = _mm_min_ps(vResult,g_XMOne); |
||
5452 | // Convert to 0-255 |
||
5453 | vResult = _mm_mul_ps(vResult,Scale); |
||
5454 | // Shuffle RGBA to ARGB |
||
5455 | vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(2,1,0,3)); |
||
5456 | // Convert to int |
||
5457 | __m128i vInt = _mm_cvtps_epi32(vResult); |
||
5458 | // Mash to shorts |
||
5459 | vInt = _mm_packs_epi32(vInt,vInt); |
||
5460 | // Mash to bytes |
||
5461 | vInt = _mm_packs_epi16(vInt,vInt); |
||
5462 | // Store the color |
||
5463 | _mm_store_ss(reinterpret_cast<float *>(&pDestination->c),reinterpret_cast<__m128 *>(&vInt)[0]); |
||
5464 | #else // _XM_VMX128_INTRINSICS_ |
||
5465 | #endif // _XM_VMX128_INTRINSICS_ |
||
5466 | } |
||
5467 | |||
5468 | //------------------------------------------------------------------------------ |
||
5469 | |||
5470 | XMFINLINE VOID XMStoreFloat3x3 |
||
5471 | ( |
||
5472 | XMFLOAT3X3* pDestination, |
||
5473 | CXMMATRIX M |
||
5474 | ) |
||
5475 | { |
||
5476 | #if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS) || defined(_XM_SSE_INTRINSICS_) |
||
5477 | |||
5478 | XMStoreFloat3x3NC(pDestination, M); |
||
5479 | |||
5480 | #else // _XM_VMX128_INTRINSICS_ |
||
5481 | #endif // _XM_VMX128_INTRINSICS_ |
||
5482 | } |
||
5483 | |||
5484 | //------------------------------------------------------------------------------ |
||
5485 | |||
5486 | XMFINLINE VOID XMStoreFloat3x3NC |
||
5487 | ( |
||
5488 | XMFLOAT3X3* pDestination, |
||
5489 | CXMMATRIX M |
||
5490 | ) |
||
5491 | { |
||
5492 | #if defined(_XM_NO_INTRINSICS_) |
||
5493 | |||
5494 | XMASSERT(pDestination); |
||
5495 | |||
5496 | pDestination->m[0][0] = M.r[0].vector4_f32[0]; |
||
5497 | pDestination->m[0][1] = M.r[0].vector4_f32[1]; |
||
5498 | pDestination->m[0][2] = M.r[0].vector4_f32[2]; |
||
5499 | |||
5500 | pDestination->m[1][0] = M.r[1].vector4_f32[0]; |
||
5501 | pDestination->m[1][1] = M.r[1].vector4_f32[1]; |
||
5502 | pDestination->m[1][2] = M.r[1].vector4_f32[2]; |
||
5503 | |||
5504 | pDestination->m[2][0] = M.r[2].vector4_f32[0]; |
||
5505 | pDestination->m[2][1] = M.r[2].vector4_f32[1]; |
||
5506 | pDestination->m[2][2] = M.r[2].vector4_f32[2]; |
||
5507 | |||
5508 | #elif defined(_XM_SSE_INTRINSICS_) |
||
5509 | XMASSERT(pDestination); |
||
5510 | XMVECTOR vTemp1 = M.r[0]; |
||
5511 | XMVECTOR vTemp2 = M.r[1]; |
||
5512 | XMVECTOR vTemp3 = M.r[2]; |
||
5513 | XMVECTOR vWork = _mm_shuffle_ps(vTemp1,vTemp2,_MM_SHUFFLE(0,0,2,2)); |
||
5514 | vTemp1 = _mm_shuffle_ps(vTemp1,vWork,_MM_SHUFFLE(2,0,1,0)); |
||
5515 | _mm_storeu_ps(&pDestination->m[0][0],vTemp1); |
||
5516 | vTemp2 = _mm_shuffle_ps(vTemp2,vTemp3,_MM_SHUFFLE(1,0,2,1)); |
||
5517 | _mm_storeu_ps(&pDestination->m[1][1],vTemp2); |
||
5518 | vTemp3 = _mm_shuffle_ps(vTemp3,vTemp3,_MM_SHUFFLE(2,2,2,2)); |
||
5519 | _mm_store_ss(&pDestination->m[2][2],vTemp3); |
||
5520 | #else // _XM_VMX128_INTRINSICS_ |
||
5521 | #endif // _XM_VMX128_INTRINSICS_ |
||
5522 | } |
||
5523 | |||
5524 | //------------------------------------------------------------------------------ |
||
5525 | |||
5526 | XMFINLINE VOID XMStoreFloat4x3 |
||
5527 | ( |
||
5528 | XMFLOAT4X3* pDestination, |
||
5529 | CXMMATRIX M |
||
5530 | ) |
||
5531 | { |
||
5532 | #if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS) || defined(_XM_SSE_INTRINSICS_) |
||
5533 | |||
5534 | XMStoreFloat4x3NC(pDestination, M); |
||
5535 | |||
5536 | #else // _XM_VMX128_INTRINSICS_ |
||
5537 | #endif // _XM_VMX128_INTRINSICS_ |
||
5538 | } |
||
5539 | |||
5540 | //------------------------------------------------------------------------------ |
||
5541 | |||
5542 | XMFINLINE VOID XMStoreFloat4x3A |
||
5543 | ( |
||
5544 | XMFLOAT4X3A* pDestination, |
||
5545 | CXMMATRIX M |
||
5546 | ) |
||
5547 | { |
||
5548 | #if defined(_XM_NO_INTRINSICS_) |
||
5549 | |||
5550 | XMASSERT(pDestination); |
||
5551 | XMASSERT(((UINT_PTR)pDestination & 0xF) == 0); |
||
5552 | |||
5553 | pDestination->m[0][0] = M.r[0].vector4_f32[0]; |
||
5554 | pDestination->m[0][1] = M.r[0].vector4_f32[1]; |
||
5555 | pDestination->m[0][2] = M.r[0].vector4_f32[2]; |
||
5556 | |||
5557 | pDestination->m[1][0] = M.r[1].vector4_f32[0]; |
||
5558 | pDestination->m[1][1] = M.r[1].vector4_f32[1]; |
||
5559 | pDestination->m[1][2] = M.r[1].vector4_f32[2]; |
||
5560 | |||
5561 | pDestination->m[2][0] = M.r[2].vector4_f32[0]; |
||
5562 | pDestination->m[2][1] = M.r[2].vector4_f32[1]; |
||
5563 | pDestination->m[2][2] = M.r[2].vector4_f32[2]; |
||
5564 | |||
5565 | pDestination->m[3][0] = M.r[3].vector4_f32[0]; |
||
5566 | pDestination->m[3][1] = M.r[3].vector4_f32[1]; |
||
5567 | pDestination->m[3][2] = M.r[3].vector4_f32[2]; |
||
5568 | |||
5569 | #elif defined(_XM_SSE_INTRINSICS_) |
||
5570 | XMASSERT(pDestination); |
||
5571 | XMASSERT(((UINT_PTR)pDestination & 0xF) == 0); |
||
5572 | // x1,y1,z1,w1 |
||
5573 | XMVECTOR vTemp1 = M.r[0]; |
||
5574 | // x2,y2,z2,w2 |
||
5575 | XMVECTOR vTemp2 = M.r[1]; |
||
5576 | // x3,y3,z3,w3 |
||
5577 | XMVECTOR vTemp3 = M.r[2]; |
||
5578 | // x4,y4,z4,w4 |
||
5579 | XMVECTOR vTemp4 = M.r[3]; |
||
5580 | // z1,z1,x2,y2 |
||
5581 | XMVECTOR vTemp = _mm_shuffle_ps(vTemp1,vTemp2,_MM_SHUFFLE(1,0,2,2)); |
||
5582 | // y2,z2,x3,y3 (Final) |
||
5583 | vTemp2 = _mm_shuffle_ps(vTemp2,vTemp3,_MM_SHUFFLE(1,0,2,1)); |
||
5584 | // x1,y1,z1,x2 (Final) |
||
5585 | vTemp1 = _mm_shuffle_ps(vTemp1,vTemp,_MM_SHUFFLE(2,0,1,0)); |
||
5586 | // z3,z3,x4,x4 |
||
5587 | vTemp3 = _mm_shuffle_ps(vTemp3,vTemp4,_MM_SHUFFLE(0,0,2,2)); |
||
5588 | // z3,x4,y4,z4 (Final) |
||
5589 | vTemp3 = _mm_shuffle_ps(vTemp3,vTemp4,_MM_SHUFFLE(2,1,2,0)); |
||
5590 | // Store in 3 operations |
||
5591 | _mm_store_ps(&pDestination->m[0][0],vTemp1); |
||
5592 | _mm_store_ps(&pDestination->m[1][1],vTemp2); |
||
5593 | _mm_store_ps(&pDestination->m[2][2],vTemp3); |
||
5594 | #else // _XM_VMX128_INTRINSICS_ |
||
5595 | #endif // _XM_VMX128_INTRINSICS_ |
||
5596 | } |
||
5597 | |||
5598 | //------------------------------------------------------------------------------ |
||
5599 | |||
5600 | XMFINLINE VOID XMStoreFloat4x3NC |
||
5601 | ( |
||
5602 | XMFLOAT4X3* pDestination, |
||
5603 | CXMMATRIX M |
||
5604 | ) |
||
5605 | { |
||
5606 | #if defined(_XM_NO_INTRINSICS_) |
||
5607 | |||
5608 | XMASSERT(pDestination); |
||
5609 | |||
5610 | pDestination->m[0][0] = M.r[0].vector4_f32[0]; |
||
5611 | pDestination->m[0][1] = M.r[0].vector4_f32[1]; |
||
5612 | pDestination->m[0][2] = M.r[0].vector4_f32[2]; |
||
5613 | |||
5614 | pDestination->m[1][0] = M.r[1].vector4_f32[0]; |
||
5615 | pDestination->m[1][1] = M.r[1].vector4_f32[1]; |
||
5616 | pDestination->m[1][2] = M.r[1].vector4_f32[2]; |
||
5617 | |||
5618 | pDestination->m[2][0] = M.r[2].vector4_f32[0]; |
||
5619 | pDestination->m[2][1] = M.r[2].vector4_f32[1]; |
||
5620 | pDestination->m[2][2] = M.r[2].vector4_f32[2]; |
||
5621 | |||
5622 | pDestination->m[3][0] = M.r[3].vector4_f32[0]; |
||
5623 | pDestination->m[3][1] = M.r[3].vector4_f32[1]; |
||
5624 | pDestination->m[3][2] = M.r[3].vector4_f32[2]; |
||
5625 | |||
5626 | #elif defined(_XM_SSE_INTRINSICS_) |
||
5627 | XMASSERT(pDestination); |
||
5628 | XMVECTOR vTemp1 = M.r[0]; |
||
5629 | XMVECTOR vTemp2 = M.r[1]; |
||
5630 | XMVECTOR vTemp3 = M.r[2]; |
||
5631 | XMVECTOR vTemp4 = M.r[3]; |
||
5632 | XMVECTOR vTemp2x = _mm_shuffle_ps(vTemp2,vTemp3,_MM_SHUFFLE(1,0,2,1)); |
||
5633 | vTemp2 = _mm_shuffle_ps(vTemp2,vTemp1,_MM_SHUFFLE(2,2,0,0)); |
||
5634 | vTemp1 = _mm_shuffle_ps(vTemp1,vTemp2,_MM_SHUFFLE(0,2,1,0)); |
||
5635 | vTemp3 = _mm_shuffle_ps(vTemp3,vTemp4,_MM_SHUFFLE(0,0,2,2)); |
||
5636 | vTemp3 = _mm_shuffle_ps(vTemp3,vTemp4,_MM_SHUFFLE(2,1,2,0)); |
||
5637 | _mm_storeu_ps(&pDestination->m[0][0],vTemp1); |
||
5638 | _mm_storeu_ps(&pDestination->m[1][1],vTemp2x); |
||
5639 | _mm_storeu_ps(&pDestination->m[2][2],vTemp3); |
||
5640 | #else // _XM_VMX128_INTRINSICS_ |
||
5641 | #endif // _XM_VMX128_INTRINSICS_ |
||
5642 | } |
||
5643 | |||
5644 | //------------------------------------------------------------------------------ |
||
5645 | |||
5646 | XMFINLINE VOID XMStoreFloat4x4 |
||
5647 | ( |
||
5648 | XMFLOAT4X4* pDestination, |
||
5649 | CXMMATRIX M |
||
5650 | ) |
||
5651 | { |
||
5652 | #if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS) |
||
5653 | |||
5654 | XMStoreFloat4x4NC(pDestination, M); |
||
5655 | |||
5656 | #elif defined(_XM_SSE_INTRINSICS_) |
||
5657 | XMASSERT(pDestination); |
||
5658 | |||
5659 | _mm_storeu_ps( &pDestination->_11, M.r[0] ); |
||
5660 | _mm_storeu_ps( &pDestination->_21, M.r[1] ); |
||
5661 | _mm_storeu_ps( &pDestination->_31, M.r[2] ); |
||
5662 | _mm_storeu_ps( &pDestination->_41, M.r[3] ); |
||
5663 | #else // _XM_VMX128_INTRINSICS_ |
||
5664 | #endif // _XM_VMX128_INTRINSICS_ |
||
5665 | } |
||
5666 | |||
5667 | //------------------------------------------------------------------------------ |
||
5668 | |||
5669 | XMFINLINE VOID XMStoreFloat4x4A |
||
5670 | ( |
||
5671 | XMFLOAT4X4A* pDestination, |
||
5672 | CXMMATRIX M |
||
5673 | ) |
||
5674 | { |
||
5675 | #if defined(_XM_NO_INTRINSICS_) |
||
5676 | |||
5677 | XMASSERT(pDestination); |
||
5678 | XMASSERT(((UINT_PTR)pDestination & 0xF) == 0); |
||
5679 | |||
5680 | pDestination->m[0][0] = M.r[0].vector4_f32[0]; |
||
5681 | pDestination->m[0][1] = M.r[0].vector4_f32[1]; |
||
5682 | pDestination->m[0][2] = M.r[0].vector4_f32[2]; |
||
5683 | pDestination->m[0][3] = M.r[0].vector4_f32[3]; |
||
5684 | |||
5685 | pDestination->m[1][0] = M.r[1].vector4_f32[0]; |
||
5686 | pDestination->m[1][1] = M.r[1].vector4_f32[1]; |
||
5687 | pDestination->m[1][2] = M.r[1].vector4_f32[2]; |
||
5688 | pDestination->m[1][3] = M.r[1].vector4_f32[3]; |
||
5689 | |||
5690 | pDestination->m[2][0] = M.r[2].vector4_f32[0]; |
||
5691 | pDestination->m[2][1] = M.r[2].vector4_f32[1]; |
||
5692 | pDestination->m[2][2] = M.r[2].vector4_f32[2]; |
||
5693 | pDestination->m[2][3] = M.r[2].vector4_f32[3]; |
||
5694 | |||
5695 | pDestination->m[3][0] = M.r[3].vector4_f32[0]; |
||
5696 | pDestination->m[3][1] = M.r[3].vector4_f32[1]; |
||
5697 | pDestination->m[3][2] = M.r[3].vector4_f32[2]; |
||
5698 | pDestination->m[3][3] = M.r[3].vector4_f32[3]; |
||
5699 | |||
5700 | #elif defined(_XM_SSE_INTRINSICS_) |
||
5701 | XMASSERT(pDestination); |
||
5702 | |||
5703 | _mm_store_ps( &pDestination->_11, M.r[0] ); |
||
5704 | _mm_store_ps( &pDestination->_21, M.r[1] ); |
||
5705 | _mm_store_ps( &pDestination->_31, M.r[2] ); |
||
5706 | _mm_store_ps( &pDestination->_41, M.r[3] ); |
||
5707 | #else // _XM_VMX128_INTRINSICS_ |
||
5708 | #endif // _XM_VMX128_INTRINSICS_ |
||
5709 | } |
||
5710 | |||
5711 | //------------------------------------------------------------------------------ |
||
5712 | |||
5713 | XMFINLINE VOID XMStoreFloat4x4NC |
||
5714 | ( |
||
5715 | XMFLOAT4X4* pDestination, |
||
5716 | CXMMATRIX M |
||
5717 | ) |
||
5718 | { |
||
5719 | #if defined(_XM_NO_INTRINSICS_) |
||
5720 | |||
5721 | XMASSERT(pDestination); |
||
5722 | |||
5723 | pDestination->m[0][0] = M.r[0].vector4_f32[0]; |
||
5724 | pDestination->m[0][1] = M.r[0].vector4_f32[1]; |
||
5725 | pDestination->m[0][2] = M.r[0].vector4_f32[2]; |
||
5726 | pDestination->m[0][3] = M.r[0].vector4_f32[3]; |
||
5727 | |||
5728 | pDestination->m[1][0] = M.r[1].vector4_f32[0]; |
||
5729 | pDestination->m[1][1] = M.r[1].vector4_f32[1]; |
||
5730 | pDestination->m[1][2] = M.r[1].vector4_f32[2]; |
||
5731 | pDestination->m[1][3] = M.r[1].vector4_f32[3]; |
||
5732 | |||
5733 | pDestination->m[2][0] = M.r[2].vector4_f32[0]; |
||
5734 | pDestination->m[2][1] = M.r[2].vector4_f32[1]; |
||
5735 | pDestination->m[2][2] = M.r[2].vector4_f32[2]; |
||
5736 | pDestination->m[2][3] = M.r[2].vector4_f32[3]; |
||
5737 | |||
5738 | pDestination->m[3][0] = M.r[3].vector4_f32[0]; |
||
5739 | pDestination->m[3][1] = M.r[3].vector4_f32[1]; |
||
5740 | pDestination->m[3][2] = M.r[3].vector4_f32[2]; |
||
5741 | pDestination->m[3][3] = M.r[3].vector4_f32[3]; |
||
5742 | |||
5743 | #elif defined(_XM_SSE_INTRINSICS_) |
||
5744 | XMASSERT(pDestination); |
||
5745 | _mm_storeu_ps(&pDestination->m[0][0],M.r[0]); |
||
5746 | _mm_storeu_ps(&pDestination->m[1][0],M.r[1]); |
||
5747 | _mm_storeu_ps(&pDestination->m[2][0],M.r[2]); |
||
5748 | _mm_storeu_ps(&pDestination->m[3][0],M.r[3]); |
||
5749 | #else // _XM_VMX128_INTRINSICS_ |
||
5750 | #endif // _XM_VMX128_INTRINSICS_ |
||
5751 | } |
||
5752 | |||
5753 | #endif // __XNAMATHCONVERT_INL__ |
||
5754 |