Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
14 | pmbaty | 1 | /*===---- avx512erintrin.h - AVX512ER intrinsics ---------------------------=== |
2 | * |
||
3 | * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
4 | * See https://llvm.org/LICENSE.txt for license information. |
||
5 | * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
6 | * |
||
7 | *===-----------------------------------------------------------------------=== |
||
8 | */ |
||
9 | #ifndef __IMMINTRIN_H |
||
10 | #error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead." |
||
11 | #endif |
||
12 | |||
13 | #ifndef __AVX512ERINTRIN_H |
||
14 | #define __AVX512ERINTRIN_H |
||
15 | |||
16 | /* exp2a23 */ |
||
17 | #define _mm512_exp2a23_round_pd(A, R) \ |
||
18 | ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \ |
||
19 | (__v8df)_mm512_setzero_pd(), \ |
||
20 | (__mmask8)-1, (int)(R))) |
||
21 | |||
22 | #define _mm512_mask_exp2a23_round_pd(S, M, A, R) \ |
||
23 | ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \ |
||
24 | (__v8df)(__m512d)(S), (__mmask8)(M), \ |
||
25 | (int)(R))) |
||
26 | |||
27 | #define _mm512_maskz_exp2a23_round_pd(M, A, R) \ |
||
28 | ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \ |
||
29 | (__v8df)_mm512_setzero_pd(), \ |
||
30 | (__mmask8)(M), (int)(R))) |
||
31 | |||
32 | #define _mm512_exp2a23_pd(A) \ |
||
33 | _mm512_exp2a23_round_pd((A), _MM_FROUND_CUR_DIRECTION) |
||
34 | |||
35 | #define _mm512_mask_exp2a23_pd(S, M, A) \ |
||
36 | _mm512_mask_exp2a23_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION) |
||
37 | |||
38 | #define _mm512_maskz_exp2a23_pd(M, A) \ |
||
39 | _mm512_maskz_exp2a23_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION) |
||
40 | |||
41 | #define _mm512_exp2a23_round_ps(A, R) \ |
||
42 | ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \ |
||
43 | (__v16sf)_mm512_setzero_ps(), \ |
||
44 | (__mmask16)-1, (int)(R))) |
||
45 | |||
46 | #define _mm512_mask_exp2a23_round_ps(S, M, A, R) \ |
||
47 | ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \ |
||
48 | (__v16sf)(__m512)(S), (__mmask16)(M), \ |
||
49 | (int)(R))) |
||
50 | |||
51 | #define _mm512_maskz_exp2a23_round_ps(M, A, R) \ |
||
52 | ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \ |
||
53 | (__v16sf)_mm512_setzero_ps(), \ |
||
54 | (__mmask16)(M), (int)(R))) |
||
55 | |||
56 | #define _mm512_exp2a23_ps(A) \ |
||
57 | _mm512_exp2a23_round_ps((A), _MM_FROUND_CUR_DIRECTION) |
||
58 | |||
59 | #define _mm512_mask_exp2a23_ps(S, M, A) \ |
||
60 | _mm512_mask_exp2a23_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION) |
||
61 | |||
62 | #define _mm512_maskz_exp2a23_ps(M, A) \ |
||
63 | _mm512_maskz_exp2a23_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION) |
||
64 | |||
65 | /* rsqrt28 */ |
||
66 | #define _mm512_rsqrt28_round_pd(A, R) \ |
||
67 | ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \ |
||
68 | (__v8df)_mm512_setzero_pd(), \ |
||
69 | (__mmask8)-1, (int)(R))) |
||
70 | |||
71 | #define _mm512_mask_rsqrt28_round_pd(S, M, A, R) \ |
||
72 | ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \ |
||
73 | (__v8df)(__m512d)(S), (__mmask8)(M), \ |
||
74 | (int)(R))) |
||
75 | |||
76 | #define _mm512_maskz_rsqrt28_round_pd(M, A, R) \ |
||
77 | ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \ |
||
78 | (__v8df)_mm512_setzero_pd(), \ |
||
79 | (__mmask8)(M), (int)(R))) |
||
80 | |||
81 | #define _mm512_rsqrt28_pd(A) \ |
||
82 | _mm512_rsqrt28_round_pd((A), _MM_FROUND_CUR_DIRECTION) |
||
83 | |||
84 | #define _mm512_mask_rsqrt28_pd(S, M, A) \ |
||
85 | _mm512_mask_rsqrt28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION) |
||
86 | |||
87 | #define _mm512_maskz_rsqrt28_pd(M, A) \ |
||
88 | _mm512_maskz_rsqrt28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION) |
||
89 | |||
90 | #define _mm512_rsqrt28_round_ps(A, R) \ |
||
91 | ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \ |
||
92 | (__v16sf)_mm512_setzero_ps(), \ |
||
93 | (__mmask16)-1, (int)(R))) |
||
94 | |||
95 | #define _mm512_mask_rsqrt28_round_ps(S, M, A, R) \ |
||
96 | ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \ |
||
97 | (__v16sf)(__m512)(S), (__mmask16)(M), \ |
||
98 | (int)(R))) |
||
99 | |||
100 | #define _mm512_maskz_rsqrt28_round_ps(M, A, R) \ |
||
101 | ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \ |
||
102 | (__v16sf)_mm512_setzero_ps(), \ |
||
103 | (__mmask16)(M), (int)(R))) |
||
104 | |||
105 | #define _mm512_rsqrt28_ps(A) \ |
||
106 | _mm512_rsqrt28_round_ps((A), _MM_FROUND_CUR_DIRECTION) |
||
107 | |||
108 | #define _mm512_mask_rsqrt28_ps(S, M, A) \ |
||
109 | _mm512_mask_rsqrt28_round_ps((S), (M), A, _MM_FROUND_CUR_DIRECTION) |
||
110 | |||
111 | #define _mm512_maskz_rsqrt28_ps(M, A) \ |
||
112 | _mm512_maskz_rsqrt28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION) |
||
113 | |||
114 | #define _mm_rsqrt28_round_ss(A, B, R) \ |
||
115 | ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \ |
||
116 | (__v4sf)(__m128)(B), \ |
||
117 | (__v4sf)_mm_setzero_ps(), \ |
||
118 | (__mmask8)-1, (int)(R))) |
||
119 | |||
120 | #define _mm_mask_rsqrt28_round_ss(S, M, A, B, R) \ |
||
121 | ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \ |
||
122 | (__v4sf)(__m128)(B), \ |
||
123 | (__v4sf)(__m128)(S), \ |
||
124 | (__mmask8)(M), (int)(R))) |
||
125 | |||
126 | #define _mm_maskz_rsqrt28_round_ss(M, A, B, R) \ |
||
127 | ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \ |
||
128 | (__v4sf)(__m128)(B), \ |
||
129 | (__v4sf)_mm_setzero_ps(), \ |
||
130 | (__mmask8)(M), (int)(R))) |
||
131 | |||
132 | #define _mm_rsqrt28_ss(A, B) \ |
||
133 | _mm_rsqrt28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION) |
||
134 | |||
135 | #define _mm_mask_rsqrt28_ss(S, M, A, B) \ |
||
136 | _mm_mask_rsqrt28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION) |
||
137 | |||
138 | #define _mm_maskz_rsqrt28_ss(M, A, B) \ |
||
139 | _mm_maskz_rsqrt28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION) |
||
140 | |||
141 | #define _mm_rsqrt28_round_sd(A, B, R) \ |
||
142 | ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \ |
||
143 | (__v2df)(__m128d)(B), \ |
||
144 | (__v2df)_mm_setzero_pd(), \ |
||
145 | (__mmask8)-1, (int)(R))) |
||
146 | |||
147 | #define _mm_mask_rsqrt28_round_sd(S, M, A, B, R) \ |
||
148 | ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \ |
||
149 | (__v2df)(__m128d)(B), \ |
||
150 | (__v2df)(__m128d)(S), \ |
||
151 | (__mmask8)(M), (int)(R))) |
||
152 | |||
153 | #define _mm_maskz_rsqrt28_round_sd(M, A, B, R) \ |
||
154 | ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \ |
||
155 | (__v2df)(__m128d)(B), \ |
||
156 | (__v2df)_mm_setzero_pd(), \ |
||
157 | (__mmask8)(M), (int)(R))) |
||
158 | |||
159 | #define _mm_rsqrt28_sd(A, B) \ |
||
160 | _mm_rsqrt28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION) |
||
161 | |||
162 | #define _mm_mask_rsqrt28_sd(S, M, A, B) \ |
||
163 | _mm_mask_rsqrt28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION) |
||
164 | |||
165 | #define _mm_maskz_rsqrt28_sd(M, A, B) \ |
||
166 | _mm_maskz_rsqrt28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION) |
||
167 | |||
168 | /* rcp28 */ |
||
169 | #define _mm512_rcp28_round_pd(A, R) \ |
||
170 | ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \ |
||
171 | (__v8df)_mm512_setzero_pd(), \ |
||
172 | (__mmask8)-1, (int)(R))) |
||
173 | |||
174 | #define _mm512_mask_rcp28_round_pd(S, M, A, R) \ |
||
175 | ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \ |
||
176 | (__v8df)(__m512d)(S), (__mmask8)(M), \ |
||
177 | (int)(R))) |
||
178 | |||
179 | #define _mm512_maskz_rcp28_round_pd(M, A, R) \ |
||
180 | ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \ |
||
181 | (__v8df)_mm512_setzero_pd(), \ |
||
182 | (__mmask8)(M), (int)(R))) |
||
183 | |||
184 | #define _mm512_rcp28_pd(A) \ |
||
185 | _mm512_rcp28_round_pd((A), _MM_FROUND_CUR_DIRECTION) |
||
186 | |||
187 | #define _mm512_mask_rcp28_pd(S, M, A) \ |
||
188 | _mm512_mask_rcp28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION) |
||
189 | |||
190 | #define _mm512_maskz_rcp28_pd(M, A) \ |
||
191 | _mm512_maskz_rcp28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION) |
||
192 | |||
193 | #define _mm512_rcp28_round_ps(A, R) \ |
||
194 | ((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \ |
||
195 | (__v16sf)_mm512_setzero_ps(), \ |
||
196 | (__mmask16)-1, (int)(R))) |
||
197 | |||
198 | #define _mm512_mask_rcp28_round_ps(S, M, A, R) \ |
||
199 | ((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \ |
||
200 | (__v16sf)(__m512)(S), (__mmask16)(M), \ |
||
201 | (int)(R))) |
||
202 | |||
203 | #define _mm512_maskz_rcp28_round_ps(M, A, R) \ |
||
204 | ((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \ |
||
205 | (__v16sf)_mm512_setzero_ps(), \ |
||
206 | (__mmask16)(M), (int)(R))) |
||
207 | |||
208 | #define _mm512_rcp28_ps(A) \ |
||
209 | _mm512_rcp28_round_ps((A), _MM_FROUND_CUR_DIRECTION) |
||
210 | |||
211 | #define _mm512_mask_rcp28_ps(S, M, A) \ |
||
212 | _mm512_mask_rcp28_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION) |
||
213 | |||
214 | #define _mm512_maskz_rcp28_ps(M, A) \ |
||
215 | _mm512_maskz_rcp28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION) |
||
216 | |||
217 | #define _mm_rcp28_round_ss(A, B, R) \ |
||
218 | ((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \ |
||
219 | (__v4sf)(__m128)(B), \ |
||
220 | (__v4sf)_mm_setzero_ps(), \ |
||
221 | (__mmask8)-1, (int)(R))) |
||
222 | |||
223 | #define _mm_mask_rcp28_round_ss(S, M, A, B, R) \ |
||
224 | ((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \ |
||
225 | (__v4sf)(__m128)(B), \ |
||
226 | (__v4sf)(__m128)(S), \ |
||
227 | (__mmask8)(M), (int)(R))) |
||
228 | |||
229 | #define _mm_maskz_rcp28_round_ss(M, A, B, R) \ |
||
230 | ((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \ |
||
231 | (__v4sf)(__m128)(B), \ |
||
232 | (__v4sf)_mm_setzero_ps(), \ |
||
233 | (__mmask8)(M), (int)(R))) |
||
234 | |||
235 | #define _mm_rcp28_ss(A, B) \ |
||
236 | _mm_rcp28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION) |
||
237 | |||
238 | #define _mm_mask_rcp28_ss(S, M, A, B) \ |
||
239 | _mm_mask_rcp28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION) |
||
240 | |||
241 | #define _mm_maskz_rcp28_ss(M, A, B) \ |
||
242 | _mm_maskz_rcp28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION) |
||
243 | |||
244 | #define _mm_rcp28_round_sd(A, B, R) \ |
||
245 | ((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \ |
||
246 | (__v2df)(__m128d)(B), \ |
||
247 | (__v2df)_mm_setzero_pd(), \ |
||
248 | (__mmask8)-1, (int)(R))) |
||
249 | |||
250 | #define _mm_mask_rcp28_round_sd(S, M, A, B, R) \ |
||
251 | ((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \ |
||
252 | (__v2df)(__m128d)(B), \ |
||
253 | (__v2df)(__m128d)(S), \ |
||
254 | (__mmask8)(M), (int)(R))) |
||
255 | |||
256 | #define _mm_maskz_rcp28_round_sd(M, A, B, R) \ |
||
257 | ((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \ |
||
258 | (__v2df)(__m128d)(B), \ |
||
259 | (__v2df)_mm_setzero_pd(), \ |
||
260 | (__mmask8)(M), (int)(R))) |
||
261 | |||
262 | #define _mm_rcp28_sd(A, B) \ |
||
263 | _mm_rcp28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION) |
||
264 | |||
265 | #define _mm_mask_rcp28_sd(S, M, A, B) \ |
||
266 | _mm_mask_rcp28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION) |
||
267 | |||
268 | #define _mm_maskz_rcp28_sd(M, A, B) \ |
||
269 | _mm_maskz_rcp28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION) |
||
270 | |||
271 | #endif /* __AVX512ERINTRIN_H */ |