Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
14 | pmbaty | 1 | /*===---- pmmintrin.h - Implementation of SSE3 intrinsics on PowerPC -------=== |
2 | * |
||
3 | * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
4 | * See https://llvm.org/LICENSE.txt for license information. |
||
5 | * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
6 | * |
||
7 | *===-----------------------------------------------------------------------=== |
||
8 | */ |
||
9 | |||
10 | /* Implemented from the specification included in the Intel C++ Compiler |
||
11 | User Guide and Reference, version 9.0. */ |
||
12 | |||
13 | #ifndef NO_WARN_X86_INTRINSICS |
||
14 | /* This header is distributed to simplify porting x86_64 code that |
||
15 | makes explicit use of Intel intrinsics to powerpc64le. |
||
16 | It is the user's responsibility to determine if the results are |
||
17 | acceptable and make additional changes as necessary. |
||
18 | Note that much code that uses Intel intrinsics can be rewritten in |
||
19 | standard C or GNU C extensions, which are more portable and better |
||
20 | optimized across multiple targets. |
||
21 | |||
22 | In the specific case of X86 SSE3 intrinsics, the PowerPC VMX/VSX ISA |
||
23 | is a good match for most SIMD operations. However the Horizontal |
||
24 | add/sub requires the data pairs be permuted into a separate |
||
25 | registers with vertical even/odd alignment for the operation. |
||
26 | And the addsub operation requires the sign of only the even numbered |
||
27 | elements be flipped (xored with -0.0). |
||
28 | For larger blocks of code using these intrinsic implementations, |
||
29 | the compiler be should be able to schedule instructions to avoid |
||
30 | additional latency. |
||
31 | |||
32 | In the specific case of the monitor and mwait instructions there are |
||
33 | no direct equivalent in the PowerISA at this time. So those |
||
34 | intrinsics are not implemented. */ |
||
35 | #error \ |
||
36 | "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this warning." |
||
37 | #endif |
||
38 | |||
39 | #ifndef PMMINTRIN_H_ |
||
40 | #define PMMINTRIN_H_ |
||
41 | |||
42 | #if defined(__powerpc64__) && \ |
||
43 | (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) |
||
44 | |||
45 | /* We need definitions from the SSE2 and SSE header files*/ |
||
46 | #include <emmintrin.h> |
||
47 | |||
48 | extern __inline __m128 |
||
49 | __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
||
50 | _mm_addsub_ps(__m128 __X, __m128 __Y) { |
||
51 | const __v4sf __even_n0 = {-0.0, 0.0, -0.0, 0.0}; |
||
52 | __v4sf __even_neg_Y = vec_xor(__Y, __even_n0); |
||
53 | return (__m128)vec_add(__X, __even_neg_Y); |
||
54 | } |
||
55 | |||
56 | extern __inline __m128d |
||
57 | __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
||
58 | _mm_addsub_pd(__m128d __X, __m128d __Y) { |
||
59 | const __v2df __even_n0 = {-0.0, 0.0}; |
||
60 | __v2df __even_neg_Y = vec_xor(__Y, __even_n0); |
||
61 | return (__m128d)vec_add(__X, __even_neg_Y); |
||
62 | } |
||
63 | |||
64 | extern __inline __m128 |
||
65 | __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
||
66 | _mm_hadd_ps(__m128 __X, __m128 __Y) { |
||
67 | __vector unsigned char __xform2 = {0x00, 0x01, 0x02, 0x03, 0x08, 0x09, |
||
68 | 0x0A, 0x0B, 0x10, 0x11, 0x12, 0x13, |
||
69 | 0x18, 0x19, 0x1A, 0x1B}; |
||
70 | __vector unsigned char __xform1 = {0x04, 0x05, 0x06, 0x07, 0x0C, 0x0D, |
||
71 | 0x0E, 0x0F, 0x14, 0x15, 0x16, 0x17, |
||
72 | 0x1C, 0x1D, 0x1E, 0x1F}; |
||
73 | return (__m128)vec_add(vec_perm((__v4sf)__X, (__v4sf)__Y, __xform2), |
||
74 | vec_perm((__v4sf)__X, (__v4sf)__Y, __xform1)); |
||
75 | } |
||
76 | |||
77 | extern __inline __m128 |
||
78 | __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
||
79 | _mm_hsub_ps(__m128 __X, __m128 __Y) { |
||
80 | __vector unsigned char __xform2 = {0x00, 0x01, 0x02, 0x03, 0x08, 0x09, |
||
81 | 0x0A, 0x0B, 0x10, 0x11, 0x12, 0x13, |
||
82 | 0x18, 0x19, 0x1A, 0x1B}; |
||
83 | __vector unsigned char __xform1 = {0x04, 0x05, 0x06, 0x07, 0x0C, 0x0D, |
||
84 | 0x0E, 0x0F, 0x14, 0x15, 0x16, 0x17, |
||
85 | 0x1C, 0x1D, 0x1E, 0x1F}; |
||
86 | return (__m128)vec_sub(vec_perm((__v4sf)__X, (__v4sf)__Y, __xform2), |
||
87 | vec_perm((__v4sf)__X, (__v4sf)__Y, __xform1)); |
||
88 | } |
||
89 | |||
90 | extern __inline __m128d |
||
91 | __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
||
92 | _mm_hadd_pd(__m128d __X, __m128d __Y) { |
||
93 | return (__m128d)vec_add(vec_mergeh((__v2df)__X, (__v2df)__Y), |
||
94 | vec_mergel((__v2df)__X, (__v2df)__Y)); |
||
95 | } |
||
96 | |||
97 | extern __inline __m128d |
||
98 | __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
||
99 | _mm_hsub_pd(__m128d __X, __m128d __Y) { |
||
100 | return (__m128d)vec_sub(vec_mergeh((__v2df)__X, (__v2df)__Y), |
||
101 | vec_mergel((__v2df)__X, (__v2df)__Y)); |
||
102 | } |
||
103 | |||
104 | #ifdef _ARCH_PWR8 |
||
105 | extern __inline __m128 |
||
106 | __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
||
107 | _mm_movehdup_ps(__m128 __X) { |
||
108 | return (__m128)vec_mergeo((__v4su)__X, (__v4su)__X); |
||
109 | } |
||
110 | #endif |
||
111 | |||
112 | #ifdef _ARCH_PWR8 |
||
113 | extern __inline __m128 |
||
114 | __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
||
115 | _mm_moveldup_ps(__m128 __X) { |
||
116 | return (__m128)vec_mergee((__v4su)__X, (__v4su)__X); |
||
117 | } |
||
118 | #endif |
||
119 | |||
120 | extern __inline __m128d |
||
121 | __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
||
122 | _mm_loaddup_pd(double const *__P) { |
||
123 | return (__m128d)vec_splats(*__P); |
||
124 | } |
||
125 | |||
126 | extern __inline __m128d |
||
127 | __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
||
128 | _mm_movedup_pd(__m128d __X) { |
||
129 | return _mm_shuffle_pd(__X, __X, _MM_SHUFFLE2(0, 0)); |
||
130 | } |
||
131 | |||
132 | extern __inline __m128i |
||
133 | __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
||
134 | _mm_lddqu_si128(__m128i const *__P) { |
||
135 | return (__m128i)(vec_vsx_ld(0, (signed int const *)__P)); |
||
136 | } |
||
137 | |||
138 | /* POWER8 / POWER9 have no equivalent for _mm_monitor nor _mm_wait. */ |
||
139 | |||
140 | #else |
||
141 | #include_next <pmmintrin.h> |
||
142 | #endif /* defined(__powerpc64__) && \ |
||
143 | * (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) */ |
||
144 | |||
145 | #endif /* PMMINTRIN_H_ */ |