Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
14 | pmbaty | 1 | //===- IntrinsicsARM.td - Defines ARM intrinsics -----------*- tablegen -*-===// |
2 | // |
||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
4 | // See https://llvm.org/LICENSE.txt for license information. |
||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
6 | // |
||
7 | //===----------------------------------------------------------------------===// |
||
8 | // |
||
9 | // This file defines all of the ARM-specific intrinsics. |
||
10 | // |
||
11 | //===----------------------------------------------------------------------===// |
||
12 | |||
13 | |||
14 | //===----------------------------------------------------------------------===// |
||
15 | // TLS |
||
16 | |||
17 | let TargetPrefix = "arm" in { // All intrinsics start with "llvm.arm.". |
||
18 | |||
19 | // A space-consuming intrinsic primarily for testing ARMConstantIslands. The |
||
20 | // first argument is the number of bytes this "instruction" takes up, the second |
||
21 | // and return value are essentially chains, used to force ordering during ISel. |
||
22 | def int_arm_space : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>]>; |
||
23 | |||
24 | // 16-bit multiplications |
||
25 | def int_arm_smulbb : ClangBuiltin<"__builtin_arm_smulbb">, |
||
26 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], |
||
27 | [IntrNoMem]>; |
||
28 | def int_arm_smulbt : ClangBuiltin<"__builtin_arm_smulbt">, |
||
29 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], |
||
30 | [IntrNoMem]>; |
||
31 | def int_arm_smultb : ClangBuiltin<"__builtin_arm_smultb">, |
||
32 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], |
||
33 | [IntrNoMem]>; |
||
34 | def int_arm_smultt : ClangBuiltin<"__builtin_arm_smultt">, |
||
35 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], |
||
36 | [IntrNoMem]>; |
||
37 | def int_arm_smulwb : ClangBuiltin<"__builtin_arm_smulwb">, |
||
38 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], |
||
39 | [IntrNoMem]>; |
||
40 | def int_arm_smulwt : ClangBuiltin<"__builtin_arm_smulwt">, |
||
41 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], |
||
42 | [IntrNoMem]>; |
||
43 | |||
44 | //===----------------------------------------------------------------------===// |
||
45 | // Saturating Arithmetic |
||
46 | |||
47 | def int_arm_qadd : ClangBuiltin<"__builtin_arm_qadd">, |
||
48 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], |
||
49 | [Commutative, IntrNoMem]>; |
||
50 | def int_arm_qsub : ClangBuiltin<"__builtin_arm_qsub">, |
||
51 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], |
||
52 | [IntrNoMem]>; |
||
53 | def int_arm_ssat : ClangBuiltin<"__builtin_arm_ssat">, |
||
54 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], |
||
55 | [IntrNoMem]>; |
||
56 | def int_arm_usat : ClangBuiltin<"__builtin_arm_usat">, |
||
57 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], |
||
58 | [IntrNoMem]>; |
||
59 | |||
60 | // Accumulating multiplications |
||
61 | def int_arm_smlabb : ClangBuiltin<"__builtin_arm_smlabb">, |
||
62 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, |
||
63 | llvm_i32_ty], |
||
64 | [IntrNoMem]>; |
||
65 | def int_arm_smlabt : ClangBuiltin<"__builtin_arm_smlabt">, |
||
66 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, |
||
67 | llvm_i32_ty], |
||
68 | [IntrNoMem]>; |
||
69 | def int_arm_smlatb : ClangBuiltin<"__builtin_arm_smlatb">, |
||
70 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, |
||
71 | llvm_i32_ty], |
||
72 | [IntrNoMem]>; |
||
73 | def int_arm_smlatt : ClangBuiltin<"__builtin_arm_smlatt">, |
||
74 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, |
||
75 | llvm_i32_ty], |
||
76 | [IntrNoMem]>; |
||
77 | def int_arm_smlawb : ClangBuiltin<"__builtin_arm_smlawb">, |
||
78 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, |
||
79 | llvm_i32_ty], |
||
80 | [IntrNoMem]>; |
||
81 | def int_arm_smlawt : ClangBuiltin<"__builtin_arm_smlawt">, |
||
82 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, |
||
83 | llvm_i32_ty], |
||
84 | [IntrNoMem]>; |
||
85 | |||
86 | // Parallel 16-bit saturation |
||
87 | def int_arm_ssat16 : ClangBuiltin<"__builtin_arm_ssat16">, |
||
88 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], |
||
89 | [IntrNoMem]>; |
||
90 | def int_arm_usat16 : ClangBuiltin<"__builtin_arm_usat16">, |
||
91 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], |
||
92 | [IntrNoMem]>; |
||
93 | |||
94 | // Packing and unpacking |
||
95 | def int_arm_sxtab16 : ClangBuiltin<"__builtin_arm_sxtab16">, |
||
96 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], |
||
97 | [IntrNoMem]>; |
||
98 | def int_arm_sxtb16 : ClangBuiltin<"__builtin_arm_sxtb16">, |
||
99 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; |
||
100 | def int_arm_uxtab16 : ClangBuiltin<"__builtin_arm_uxtab16">, |
||
101 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], |
||
102 | [IntrNoMem]>; |
||
103 | def int_arm_uxtb16 : ClangBuiltin<"__builtin_arm_uxtb16">, |
||
104 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; |
||
105 | |||
106 | // Parallel selection, reads the GE flags. |
||
107 | def int_arm_sel : ClangBuiltin<"__builtin_arm_sel">, |
||
108 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], |
||
109 | [IntrReadMem]>; |
||
110 | |||
111 | // Parallel 8-bit addition and subtraction |
||
112 | def int_arm_qadd8 : ClangBuiltin<"__builtin_arm_qadd8">, |
||
113 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], |
||
114 | [IntrNoMem]>; |
||
115 | def int_arm_qsub8 : ClangBuiltin<"__builtin_arm_qsub8">, |
||
116 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], |
||
117 | [IntrNoMem]>; |
||
118 | // Writes to the GE bits. |
||
119 | def int_arm_sadd8 : ClangBuiltin<"__builtin_arm_sadd8">, |
||
120 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; |
||
121 | def int_arm_shadd8 : ClangBuiltin<"__builtin_arm_shadd8">, |
||
122 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], |
||
123 | [IntrNoMem]>; |
||
124 | def int_arm_shsub8 : ClangBuiltin<"__builtin_arm_shsub8">, |
||
125 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], |
||
126 | [IntrNoMem]>; |
||
127 | // Writes to the GE bits. |
||
128 | def int_arm_ssub8 : ClangBuiltin<"__builtin_arm_ssub8">, |
||
129 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; |
||
130 | // Writes to the GE bits. |
||
131 | def int_arm_uadd8 : ClangBuiltin<"__builtin_arm_uadd8">, |
||
132 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; |
||
133 | def int_arm_uhadd8 : ClangBuiltin<"__builtin_arm_uhadd8">, |
||
134 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], |
||
135 | [IntrNoMem]>; |
||
136 | def int_arm_uhsub8 : ClangBuiltin<"__builtin_arm_uhsub8">, |
||
137 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], |
||
138 | [IntrNoMem]>; |
||
139 | def int_arm_uqadd8 : ClangBuiltin<"__builtin_arm_uqadd8">, |
||
140 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], |
||
141 | [IntrNoMem]>; |
||
142 | def int_arm_uqsub8 : ClangBuiltin<"__builtin_arm_uqsub8">, |
||
143 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], |
||
144 | [IntrNoMem]>; |
||
145 | // Writes to the GE bits. |
||
146 | def int_arm_usub8 : ClangBuiltin<"__builtin_arm_usub8">, |
||
147 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; |
||
148 | |||
149 | // Sum of 8-bit absolute differences |
||
150 | def int_arm_usad8 : ClangBuiltin<"__builtin_arm_usad8">, |
||
151 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], |
||
152 | [IntrNoMem]>; |
||
153 | def int_arm_usada8 : ClangBuiltin<"__builtin_arm_usada8">, |
||
154 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, |
||
155 | llvm_i32_ty], |
||
156 | [IntrNoMem]>; |
||
157 | |||
158 | // Parallel 16-bit addition and subtraction |
||
159 | def int_arm_qadd16 : ClangBuiltin<"__builtin_arm_qadd16">, |
||
160 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], |
||
161 | [IntrNoMem]>; |
||
162 | def int_arm_qasx : ClangBuiltin<"__builtin_arm_qasx">, |
||
163 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], |
||
164 | [IntrNoMem]>; |
||
165 | def int_arm_qsax : ClangBuiltin<"__builtin_arm_qsax">, |
||
166 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], |
||
167 | [IntrNoMem]>; |
||
168 | def int_arm_qsub16 : ClangBuiltin<"__builtin_arm_qsub16">, |
||
169 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], |
||
170 | [IntrNoMem]>; |
||
171 | // Writes to the GE bits. |
||
172 | def int_arm_sadd16 : ClangBuiltin<"__builtin_arm_sadd16">, |
||
173 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; |
||
174 | // Writes to the GE bits. |
||
175 | def int_arm_sasx : ClangBuiltin<"__builtin_arm_sasx">, |
||
176 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; |
||
177 | def int_arm_shadd16 : ClangBuiltin<"__builtin_arm_shadd16">, |
||
178 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], |
||
179 | [IntrNoMem]>; |
||
180 | def int_arm_shasx : ClangBuiltin<"__builtin_arm_shasx">, |
||
181 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], |
||
182 | [IntrNoMem]>; |
||
183 | def int_arm_shsax : ClangBuiltin<"__builtin_arm_shsax">, |
||
184 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], |
||
185 | [IntrNoMem]>; |
||
186 | def int_arm_shsub16 : ClangBuiltin<"__builtin_arm_shsub16">, |
||
187 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], |
||
188 | [IntrNoMem]>; |
||
189 | // Writes to the GE bits. |
||
190 | def int_arm_ssax : ClangBuiltin<"__builtin_arm_ssax">, |
||
191 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; |
||
192 | // Writes to the GE bits. |
||
193 | def int_arm_ssub16 : ClangBuiltin<"__builtin_arm_ssub16">, |
||
194 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; |
||
195 | // Writes to the GE bits. |
||
196 | def int_arm_uadd16 : ClangBuiltin<"__builtin_arm_uadd16">, |
||
197 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; |
||
198 | // Writes to the GE bits. |
||
199 | def int_arm_uasx : ClangBuiltin<"__builtin_arm_uasx">, |
||
200 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; |
||
201 | def int_arm_uhadd16 : ClangBuiltin<"__builtin_arm_uhadd16">, |
||
202 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], |
||
203 | [IntrNoMem]>; |
||
204 | def int_arm_uhasx : ClangBuiltin<"__builtin_arm_uhasx">, |
||
205 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], |
||
206 | [IntrNoMem]>; |
||
207 | def int_arm_uhsax : ClangBuiltin<"__builtin_arm_uhsax">, |
||
208 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], |
||
209 | [IntrNoMem]>; |
||
210 | def int_arm_uhsub16 : ClangBuiltin<"__builtin_arm_uhsub16">, |
||
211 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], |
||
212 | [IntrNoMem]>; |
||
213 | def int_arm_uqadd16 : ClangBuiltin<"__builtin_arm_uqadd16">, |
||
214 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], |
||
215 | [IntrNoMem]>; |
||
216 | def int_arm_uqasx : ClangBuiltin<"__builtin_arm_uqasx">, |
||
217 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], |
||
218 | [IntrNoMem]>; |
||
219 | def int_arm_uqsax : ClangBuiltin<"__builtin_arm_uqsax">, |
||
220 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], |
||
221 | [IntrNoMem]>; |
||
222 | def int_arm_uqsub16 : ClangBuiltin<"__builtin_arm_uqsub16">, |
||
223 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], |
||
224 | [IntrNoMem]>; |
||
225 | // Writes to the GE bits. |
||
226 | def int_arm_usax : ClangBuiltin<"__builtin_arm_usax">, |
||
227 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; |
||
228 | // Writes to the GE bits. |
||
229 | def int_arm_usub16 : ClangBuiltin<"__builtin_arm_usub16">, |
||
230 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; |
||
231 | |||
232 | // Parallel 16-bit multiplication |
||
233 | def int_arm_smlad : ClangBuiltin<"__builtin_arm_smlad">, |
||
234 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, |
||
235 | llvm_i32_ty], |
||
236 | [IntrNoMem]>; |
||
237 | def int_arm_smladx : ClangBuiltin<"__builtin_arm_smladx">, |
||
238 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, |
||
239 | llvm_i32_ty], |
||
240 | [IntrNoMem]>; |
||
241 | def int_arm_smlald : ClangBuiltin<"__builtin_arm_smlald">, |
||
242 | DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty, |
||
243 | llvm_i64_ty], |
||
244 | [IntrNoMem]>; |
||
245 | def int_arm_smlaldx : ClangBuiltin<"__builtin_arm_smlaldx">, |
||
246 | DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty, |
||
247 | llvm_i64_ty], |
||
248 | [IntrNoMem]>; |
||
249 | def int_arm_smlsd : ClangBuiltin<"__builtin_arm_smlsd">, |
||
250 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, |
||
251 | llvm_i32_ty], |
||
252 | [IntrNoMem]>; |
||
253 | def int_arm_smlsdx : ClangBuiltin<"__builtin_arm_smlsdx">, |
||
254 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, |
||
255 | llvm_i32_ty], |
||
256 | [IntrNoMem]>; |
||
257 | def int_arm_smlsld : ClangBuiltin<"__builtin_arm_smlsld">, |
||
258 | DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty, |
||
259 | llvm_i64_ty], |
||
260 | [IntrNoMem]>; |
||
261 | def int_arm_smlsldx : ClangBuiltin<"__builtin_arm_smlsldx">, |
||
262 | DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty, |
||
263 | llvm_i64_ty], |
||
264 | [IntrNoMem]>; |
||
265 | def int_arm_smuad : ClangBuiltin<"__builtin_arm_smuad">, |
||
266 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], |
||
267 | [IntrNoMem]>; |
||
268 | def int_arm_smuadx : ClangBuiltin<"__builtin_arm_smuadx">, |
||
269 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], |
||
270 | [IntrNoMem]>; |
||
271 | def int_arm_smusd : ClangBuiltin<"__builtin_arm_smusd">, |
||
272 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], |
||
273 | [IntrNoMem]>; |
||
274 | def int_arm_smusdx : ClangBuiltin<"__builtin_arm_smusdx">, |
||
275 | DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], |
||
276 | [IntrNoMem]>; |
||
277 | |||
278 | |||
279 | //===----------------------------------------------------------------------===// |
||
280 | // Load, Store and Clear exclusive |
||
281 | |||
282 | // TODO: Add applicable default attributes. |
||
283 | def int_arm_ldrex : Intrinsic<[llvm_i32_ty], [llvm_anyptr_ty]>; |
||
284 | def int_arm_strex : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_anyptr_ty]>; |
||
285 | |||
286 | def int_arm_ldaex : Intrinsic<[llvm_i32_ty], [llvm_anyptr_ty]>; |
||
287 | def int_arm_stlex : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_anyptr_ty]>; |
||
288 | |||
289 | def int_arm_clrex : Intrinsic<[]>; |
||
290 | |||
291 | def int_arm_strexd : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, |
||
292 | llvm_ptr_ty]>; |
||
293 | def int_arm_ldrexd : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_ptr_ty]>; |
||
294 | |||
295 | def int_arm_stlexd : Intrinsic<[llvm_i32_ty], |
||
296 | [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty]>; |
||
297 | def int_arm_ldaexd : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_ptr_ty]>; |
||
298 | |||
299 | //===----------------------------------------------------------------------===// |
||
300 | // Data barrier instructions |
||
301 | |||
302 | // TODO: Add applicable default attributes. |
||
303 | def int_arm_dmb : ClangBuiltin<"__builtin_arm_dmb">, MSBuiltin<"__dmb">, |
||
304 | Intrinsic<[], [llvm_i32_ty]>; |
||
305 | def int_arm_dsb : ClangBuiltin<"__builtin_arm_dsb">, MSBuiltin<"__dsb">, |
||
306 | Intrinsic<[], [llvm_i32_ty]>; |
||
307 | def int_arm_isb : ClangBuiltin<"__builtin_arm_isb">, MSBuiltin<"__isb">, |
||
308 | Intrinsic<[], [llvm_i32_ty]>; |
||
309 | |||
310 | //===----------------------------------------------------------------------===// |
||
311 | // VFP |
||
312 | |||
313 | def int_arm_get_fpscr : ClangBuiltin<"__builtin_arm_get_fpscr">, |
||
314 | DefaultAttrsIntrinsic<[llvm_i32_ty], [], []>; |
||
315 | def int_arm_set_fpscr : ClangBuiltin<"__builtin_arm_set_fpscr">, |
||
316 | DefaultAttrsIntrinsic<[], [llvm_i32_ty], []>; |
||
317 | def int_arm_vcvtr : DefaultAttrsIntrinsic<[llvm_float_ty], |
||
318 | [llvm_anyfloat_ty], [IntrNoMem]>; |
||
319 | def int_arm_vcvtru : DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_anyfloat_ty], |
||
320 | [IntrNoMem]>; |
||
321 | |||
322 | //===----------------------------------------------------------------------===// |
||
323 | // Coprocessor |
||
324 | |||
325 | // TODO: Add applicable default attributes. |
||
326 | def int_arm_ldc : ClangBuiltin<"__builtin_arm_ldc">, |
||
327 | Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>; |
||
328 | def int_arm_ldcl : ClangBuiltin<"__builtin_arm_ldcl">, |
||
329 | Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>; |
||
330 | def int_arm_ldc2 : ClangBuiltin<"__builtin_arm_ldc2">, |
||
331 | Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>; |
||
332 | def int_arm_ldc2l : ClangBuiltin<"__builtin_arm_ldc2l">, |
||
333 | Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>; |
||
334 | |||
335 | def int_arm_stc : ClangBuiltin<"__builtin_arm_stc">, |
||
336 | Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>; |
||
337 | def int_arm_stcl : ClangBuiltin<"__builtin_arm_stcl">, |
||
338 | Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>; |
||
339 | def int_arm_stc2 : ClangBuiltin<"__builtin_arm_stc2">, |
||
340 | Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>; |
||
341 | def int_arm_stc2l : ClangBuiltin<"__builtin_arm_stc2l">, |
||
342 | Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>; |
||
343 | |||
344 | // Move to coprocessor |
||
345 | def int_arm_mcr : ClangBuiltin<"__builtin_arm_mcr">, |
||
346 | Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, |
||
347 | llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>; |
||
348 | def int_arm_mcr2 : ClangBuiltin<"__builtin_arm_mcr2">, |
||
349 | Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, |
||
350 | llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>; |
||
351 | |||
352 | // Move from coprocessor |
||
353 | def int_arm_mrc : ClangBuiltin<"__builtin_arm_mrc">, |
||
354 | MSBuiltin<"_MoveFromCoprocessor">, |
||
355 | Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, |
||
356 | llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>; |
||
357 | def int_arm_mrc2 : ClangBuiltin<"__builtin_arm_mrc2">, |
||
358 | MSBuiltin<"_MoveFromCoprocessor2">, |
||
359 | Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, |
||
360 | llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>; |
||
361 | |||
362 | // Coprocessor data processing |
||
363 | def int_arm_cdp : ClangBuiltin<"__builtin_arm_cdp">, |
||
364 | Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, |
||
365 | llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>; |
||
366 | def int_arm_cdp2 : ClangBuiltin<"__builtin_arm_cdp2">, |
||
367 | Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, |
||
368 | llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>; |
||
369 | |||
370 | // Move from two registers to coprocessor |
||
371 | def int_arm_mcrr : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, |
||
372 | llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>>]>; |
||
373 | def int_arm_mcrr2 : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, |
||
374 | llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>>]>; |
||
375 | |||
376 | def int_arm_mrrc : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_i32_ty, |
||
377 | llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>; |
||
378 | def int_arm_mrrc2 : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_i32_ty, |
||
379 | llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>; |
||
380 | |||
381 | //===----------------------------------------------------------------------===// |
||
382 | // CRC32 |
||
383 | |||
384 | def int_arm_crc32b : DefaultAttrsIntrinsic< |
||
385 | [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; |
||
386 | def int_arm_crc32cb : DefaultAttrsIntrinsic< |
||
387 | [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; |
||
388 | def int_arm_crc32h : DefaultAttrsIntrinsic< |
||
389 | [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; |
||
390 | def int_arm_crc32ch : DefaultAttrsIntrinsic< |
||
391 | [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; |
||
392 | def int_arm_crc32w : DefaultAttrsIntrinsic< |
||
393 | [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; |
||
394 | def int_arm_crc32cw : DefaultAttrsIntrinsic< |
||
395 | [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; |
||
396 | |||
397 | //===----------------------------------------------------------------------===// |
||
398 | // CMSE |
||
399 | |||
400 | // TODO: Add applicable default attributes. |
||
401 | def int_arm_cmse_tt : ClangBuiltin<"__builtin_arm_cmse_TT">, |
||
402 | Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>; |
||
403 | def int_arm_cmse_ttt : ClangBuiltin<"__builtin_arm_cmse_TTT">, |
||
404 | Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>; |
||
405 | def int_arm_cmse_tta : ClangBuiltin<"__builtin_arm_cmse_TTA">, |
||
406 | Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>; |
||
407 | def int_arm_cmse_ttat : ClangBuiltin<"__builtin_arm_cmse_TTAT">, |
||
408 | Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>; |
||
409 | |||
410 | //===----------------------------------------------------------------------===// |
||
411 | // HINT |
||
412 | |||
413 | // TODO: Add applicable default attributes. |
||
414 | def int_arm_hint : Intrinsic<[], [llvm_i32_ty]>; |
||
415 | def int_arm_dbg : Intrinsic<[], [llvm_i32_ty]>; |
||
416 | |||
417 | //===----------------------------------------------------------------------===// |
||
418 | // UND (reserved undefined sequence) |
||
419 | |||
420 | // TODO: Add applicable default attributes. |
||
421 | def int_arm_undefined : Intrinsic<[], [llvm_i32_ty]>; |
||
422 | |||
423 | //===----------------------------------------------------------------------===// |
||
424 | // Advanced SIMD (NEON) |
||
425 | |||
426 | // The following classes do not correspond directly to GCC builtins. |
||
427 | class Neon_1Arg_Intrinsic |
||
428 | : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>; |
||
429 | class Neon_1Arg_Narrow_Intrinsic |
||
430 | : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMExtendedType<0>], |
||
431 | [IntrNoMem]>; |
||
432 | class Neon_2Arg_Intrinsic |
||
433 | : DefaultAttrsIntrinsic<[llvm_anyvector_ty], |
||
434 | [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; |
||
435 | class Neon_2Arg_Narrow_Intrinsic |
||
436 | : DefaultAttrsIntrinsic<[llvm_anyvector_ty], |
||
437 | [LLVMExtendedType<0>, LLVMExtendedType<0>], |
||
438 | [IntrNoMem]>; |
||
439 | class Neon_2Arg_Long_Intrinsic |
||
440 | : DefaultAttrsIntrinsic<[llvm_anyvector_ty], |
||
441 | [LLVMTruncatedType<0>, LLVMTruncatedType<0>], |
||
442 | [IntrNoMem]>; |
||
443 | class Neon_3Arg_Intrinsic |
||
444 | : DefaultAttrsIntrinsic<[llvm_anyvector_ty], |
||
445 | [LLVMMatchType<0>, LLVMMatchType<0>, |
||
446 | LLVMMatchType<0>], |
||
447 | [IntrNoMem]>; |
||
448 | class Neon_3Arg_Long_Intrinsic |
||
449 | : DefaultAttrsIntrinsic<[llvm_anyvector_ty], |
||
450 | [LLVMMatchType<0>, LLVMTruncatedType<0>, |
||
451 | LLVMTruncatedType<0>], |
||
452 | [IntrNoMem]>; |
||
453 | |||
454 | class Neon_1FloatArg_Intrinsic |
||
455 | : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; |
||
456 | |||
457 | class Neon_CvtFxToFP_Intrinsic |
||
458 | : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty, llvm_i32_ty], |
||
459 | [IntrNoMem]>; |
||
460 | class Neon_CvtFPToFx_Intrinsic |
||
461 | : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, llvm_i32_ty], |
||
462 | [IntrNoMem]>; |
||
463 | class Neon_CvtFPtoInt_1Arg_Intrinsic |
||
464 | : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], |
||
465 | [IntrNoMem]>; |
||
466 | |||
467 | class Neon_Compare_Intrinsic |
||
468 | : DefaultAttrsIntrinsic<[llvm_anyvector_ty], |
||
469 | [llvm_anyvector_ty, LLVMMatchType<1>], [IntrNoMem]>; |
||
470 | |||
471 | // The table operands for VTBL and VTBX consist of 1 to 4 v8i8 vectors. |
||
472 | // Besides the table, VTBL has one other v8i8 argument and VTBX has two. |
||
473 | // Overall, the classes range from 2 to 6 v8i8 arguments. |
||
474 | class Neon_Tbl2Arg_Intrinsic |
||
475 | : DefaultAttrsIntrinsic<[llvm_v8i8_ty], |
||
476 | [llvm_v8i8_ty, llvm_v8i8_ty], [IntrNoMem]>; |
||
477 | class Neon_Tbl3Arg_Intrinsic |
||
478 | : DefaultAttrsIntrinsic<[llvm_v8i8_ty], |
||
479 | [llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty], |
||
480 | [IntrNoMem]>; |
||
481 | class Neon_Tbl4Arg_Intrinsic |
||
482 | : DefaultAttrsIntrinsic<[llvm_v8i8_ty], |
||
483 | [llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty, |
||
484 | llvm_v8i8_ty], |
||
485 | [IntrNoMem]>; |
||
486 | class Neon_Tbl5Arg_Intrinsic |
||
487 | : DefaultAttrsIntrinsic<[llvm_v8i8_ty], |
||
488 | [llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty, |
||
489 | llvm_v8i8_ty, llvm_v8i8_ty], |
||
490 | [IntrNoMem]>; |
||
491 | class Neon_Tbl6Arg_Intrinsic |
||
492 | : DefaultAttrsIntrinsic<[llvm_v8i8_ty], |
||
493 | [llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty, |
||
494 | llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty], |
||
495 | [IntrNoMem]>; |
||
496 | |||
497 | // Arithmetic ops |
||
498 | |||
499 | let IntrProperties = [IntrNoMem, Commutative] in { |
||
500 | |||
501 | // Vector Add. |
||
502 | def int_arm_neon_vhadds : Neon_2Arg_Intrinsic; |
||
503 | def int_arm_neon_vhaddu : Neon_2Arg_Intrinsic; |
||
504 | def int_arm_neon_vrhadds : Neon_2Arg_Intrinsic; |
||
505 | def int_arm_neon_vrhaddu : Neon_2Arg_Intrinsic; |
||
506 | def int_arm_neon_vraddhn : Neon_2Arg_Narrow_Intrinsic; |
||
507 | |||
508 | // Vector Multiply. |
||
509 | def int_arm_neon_vmulp : Neon_2Arg_Intrinsic; |
||
510 | def int_arm_neon_vqdmulh : Neon_2Arg_Intrinsic; |
||
511 | def int_arm_neon_vqrdmulh : Neon_2Arg_Intrinsic; |
||
512 | def int_arm_neon_vmulls : Neon_2Arg_Long_Intrinsic; |
||
513 | def int_arm_neon_vmullu : Neon_2Arg_Long_Intrinsic; |
||
514 | def int_arm_neon_vmullp : Neon_2Arg_Long_Intrinsic; |
||
515 | def int_arm_neon_vqdmull : Neon_2Arg_Long_Intrinsic; |
||
516 | |||
517 | // Vector Maximum. |
||
518 | def int_arm_neon_vmaxs : Neon_2Arg_Intrinsic; |
||
519 | def int_arm_neon_vmaxu : Neon_2Arg_Intrinsic; |
||
520 | def int_arm_neon_vmaxnm : Neon_2Arg_Intrinsic; |
||
521 | |||
522 | // Vector Minimum. |
||
523 | def int_arm_neon_vmins : Neon_2Arg_Intrinsic; |
||
524 | def int_arm_neon_vminu : Neon_2Arg_Intrinsic; |
||
525 | def int_arm_neon_vminnm : Neon_2Arg_Intrinsic; |
||
526 | |||
527 | // Vector Reciprocal Step. |
||
528 | def int_arm_neon_vrecps : Neon_2Arg_Intrinsic; |
||
529 | |||
530 | // Vector Reciprocal Square Root Step. |
||
531 | def int_arm_neon_vrsqrts : Neon_2Arg_Intrinsic; |
||
532 | } |
||
533 | |||
534 | // Vector Subtract. |
||
535 | def int_arm_neon_vhsubs : Neon_2Arg_Intrinsic; |
||
536 | def int_arm_neon_vhsubu : Neon_2Arg_Intrinsic; |
||
537 | def int_arm_neon_vrsubhn : Neon_2Arg_Narrow_Intrinsic; |
||
538 | |||
539 | // Vector Absolute Compare. |
||
540 | def int_arm_neon_vacge : Neon_Compare_Intrinsic; |
||
541 | def int_arm_neon_vacgt : Neon_Compare_Intrinsic; |
||
542 | |||
543 | // Vector Absolute Differences. |
||
544 | def int_arm_neon_vabds : Neon_2Arg_Intrinsic; |
||
545 | def int_arm_neon_vabdu : Neon_2Arg_Intrinsic; |
||
546 | |||
547 | // Vector Pairwise Add. |
||
548 | def int_arm_neon_vpadd : Neon_2Arg_Intrinsic; |
||
549 | |||
550 | // Vector Pairwise Add Long. |
||
551 | // Note: This is different than the other "long" NEON intrinsics because |
||
552 | // the result vector has half as many elements as the source vector. |
||
553 | // The source and destination vector types must be specified separately. |
||
554 | def int_arm_neon_vpaddls : DefaultAttrsIntrinsic< |
||
555 | [llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>; |
||
556 | def int_arm_neon_vpaddlu : DefaultAttrsIntrinsic< |
||
557 | [llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>; |
||
558 | |||
559 | // Vector Pairwise Add and Accumulate Long. |
||
560 | // Note: This is similar to vpaddl but the destination vector also appears |
||
561 | // as the first argument. |
||
562 | def int_arm_neon_vpadals : DefaultAttrsIntrinsic< |
||
563 | [llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty], [IntrNoMem]>; |
||
564 | def int_arm_neon_vpadalu : DefaultAttrsIntrinsic< |
||
565 | [llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty], [IntrNoMem]>; |
||
566 | |||
567 | // Vector Pairwise Maximum and Minimum. |
||
568 | def int_arm_neon_vpmaxs : Neon_2Arg_Intrinsic; |
||
569 | def int_arm_neon_vpmaxu : Neon_2Arg_Intrinsic; |
||
570 | def int_arm_neon_vpmins : Neon_2Arg_Intrinsic; |
||
571 | def int_arm_neon_vpminu : Neon_2Arg_Intrinsic; |
||
572 | |||
573 | // Vector Shifts: |
||
574 | // |
||
575 | // The various saturating and rounding vector shift operations need to be |
||
576 | // represented by intrinsics in LLVM, and even the basic VSHL variable shift |
||
577 | // operation cannot be safely translated to LLVM's shift operators. VSHL can |
||
578 | // be used for both left and right shifts, or even combinations of the two, |
||
579 | // depending on the signs of the shift amounts. It also has well-defined |
||
580 | // behavior for shift amounts that LLVM leaves undefined. Only basic shifts |
||
581 | // by constants can be represented with LLVM's shift operators. |
||
582 | // |
||
583 | // The shift counts for these intrinsics are always vectors, even for constant |
||
584 | // shifts, where the constant is replicated. For consistency with VSHL (and |
||
585 | // other variable shift instructions), left shifts have positive shift counts |
||
586 | // and right shifts have negative shift counts. This convention is also used |
||
587 | // for constant right shift intrinsics, and to help preserve sanity, the |
||
588 | // intrinsic names use "shift" instead of either "shl" or "shr". Where |
||
589 | // applicable, signed and unsigned versions of the intrinsics are |
||
590 | // distinguished with "s" and "u" suffixes. A few NEON shift instructions, |
||
591 | // such as VQSHLU, take signed operands but produce unsigned results; these |
||
592 | // use a "su" suffix. |
||
593 | |||
594 | // Vector Shift. |
||
595 | def int_arm_neon_vshifts : Neon_2Arg_Intrinsic; |
||
596 | def int_arm_neon_vshiftu : Neon_2Arg_Intrinsic; |
||
597 | |||
598 | // Vector Rounding Shift. |
||
599 | def int_arm_neon_vrshifts : Neon_2Arg_Intrinsic; |
||
600 | def int_arm_neon_vrshiftu : Neon_2Arg_Intrinsic; |
||
601 | def int_arm_neon_vrshiftn : Neon_2Arg_Narrow_Intrinsic; |
||
602 | |||
603 | // Vector Saturating Shift. |
||
604 | def int_arm_neon_vqshifts : Neon_2Arg_Intrinsic; |
||
605 | def int_arm_neon_vqshiftu : Neon_2Arg_Intrinsic; |
||
606 | def int_arm_neon_vqshiftsu : Neon_2Arg_Intrinsic; |
||
607 | def int_arm_neon_vqshiftns : Neon_2Arg_Narrow_Intrinsic; |
||
608 | def int_arm_neon_vqshiftnu : Neon_2Arg_Narrow_Intrinsic; |
||
609 | def int_arm_neon_vqshiftnsu : Neon_2Arg_Narrow_Intrinsic; |
||
610 | |||
611 | // Vector Saturating Rounding Shift. |
||
612 | def int_arm_neon_vqrshifts : Neon_2Arg_Intrinsic; |
||
613 | def int_arm_neon_vqrshiftu : Neon_2Arg_Intrinsic; |
||
614 | def int_arm_neon_vqrshiftns : Neon_2Arg_Narrow_Intrinsic; |
||
615 | def int_arm_neon_vqrshiftnu : Neon_2Arg_Narrow_Intrinsic; |
||
616 | def int_arm_neon_vqrshiftnsu : Neon_2Arg_Narrow_Intrinsic; |
||
617 | |||
618 | // Vector Shift and Insert. |
||
619 | def int_arm_neon_vshiftins : Neon_3Arg_Intrinsic; |
||
620 | |||
621 | // Vector Absolute Value and Saturating Absolute Value. |
||
622 | def int_arm_neon_vabs : Neon_1Arg_Intrinsic; |
||
623 | def int_arm_neon_vqabs : Neon_1Arg_Intrinsic; |
||
624 | |||
625 | // Vector Saturating Negate. |
||
626 | def int_arm_neon_vqneg : Neon_1Arg_Intrinsic; |
||
627 | |||
628 | // Vector Count Leading Sign/Zero Bits. |
||
629 | def int_arm_neon_vcls : Neon_1Arg_Intrinsic; |
||
630 | |||
631 | // Vector Reciprocal Estimate. |
||
632 | def int_arm_neon_vrecpe : Neon_1Arg_Intrinsic; |
||
633 | |||
634 | // Vector Reciprocal Square Root Estimate. |
||
635 | def int_arm_neon_vrsqrte : Neon_1Arg_Intrinsic; |
||
636 | |||
637 | // Vector Conversions Between Floating-point and Integer |
||
638 | def int_arm_neon_vcvtau : Neon_CvtFPtoInt_1Arg_Intrinsic; |
||
639 | def int_arm_neon_vcvtas : Neon_CvtFPtoInt_1Arg_Intrinsic; |
||
640 | def int_arm_neon_vcvtnu : Neon_CvtFPtoInt_1Arg_Intrinsic; |
||
641 | def int_arm_neon_vcvtns : Neon_CvtFPtoInt_1Arg_Intrinsic; |
||
642 | def int_arm_neon_vcvtpu : Neon_CvtFPtoInt_1Arg_Intrinsic; |
||
643 | def int_arm_neon_vcvtps : Neon_CvtFPtoInt_1Arg_Intrinsic; |
||
644 | def int_arm_neon_vcvtmu : Neon_CvtFPtoInt_1Arg_Intrinsic; |
||
645 | def int_arm_neon_vcvtms : Neon_CvtFPtoInt_1Arg_Intrinsic; |
||
646 | |||
647 | // Vector Conversions Between Floating-point and Fixed-point. |
||
648 | def int_arm_neon_vcvtfp2fxs : Neon_CvtFPToFx_Intrinsic; |
||
649 | def int_arm_neon_vcvtfp2fxu : Neon_CvtFPToFx_Intrinsic; |
||
650 | def int_arm_neon_vcvtfxs2fp : Neon_CvtFxToFP_Intrinsic; |
||
651 | def int_arm_neon_vcvtfxu2fp : Neon_CvtFxToFP_Intrinsic; |
||
652 | |||
653 | // Vector Conversions Between Half-Precision and Single-Precision. |
||
654 | def int_arm_neon_vcvtfp2hf |
||
655 | : DefaultAttrsIntrinsic<[llvm_v4i16_ty], [llvm_v4f32_ty], [IntrNoMem]>; |
||
656 | def int_arm_neon_vcvthf2fp |
||
657 | : DefaultAttrsIntrinsic<[llvm_v4f32_ty], [llvm_v4i16_ty], [IntrNoMem]>; |
||
658 | |||
659 | // Narrowing Saturating Vector Moves. |
||
660 | def int_arm_neon_vqmovns : Neon_1Arg_Narrow_Intrinsic; |
||
661 | def int_arm_neon_vqmovnu : Neon_1Arg_Narrow_Intrinsic; |
||
662 | def int_arm_neon_vqmovnsu : Neon_1Arg_Narrow_Intrinsic; |
||
663 | |||
664 | // Vector Table Lookup. |
||
665 | // The first 1-4 arguments are the table. |
||
666 | def int_arm_neon_vtbl1 : Neon_Tbl2Arg_Intrinsic; |
||
667 | def int_arm_neon_vtbl2 : Neon_Tbl3Arg_Intrinsic; |
||
668 | def int_arm_neon_vtbl3 : Neon_Tbl4Arg_Intrinsic; |
||
669 | def int_arm_neon_vtbl4 : Neon_Tbl5Arg_Intrinsic; |
||
670 | |||
671 | // Vector Table Extension. |
||
672 | // Some elements of the destination vector may not be updated, so the original |
||
673 | // value of that vector is passed as the first argument. The next 1-4 |
||
674 | // arguments after that are the table. |
||
675 | def int_arm_neon_vtbx1 : Neon_Tbl3Arg_Intrinsic; |
||
676 | def int_arm_neon_vtbx2 : Neon_Tbl4Arg_Intrinsic; |
||
677 | def int_arm_neon_vtbx3 : Neon_Tbl5Arg_Intrinsic; |
||
678 | def int_arm_neon_vtbx4 : Neon_Tbl6Arg_Intrinsic; |
||
679 | |||
680 | // Vector and Scalar Rounding. |
||
681 | def int_arm_neon_vrintn : Neon_1FloatArg_Intrinsic; |
||
682 | def int_arm_neon_vrintx : Neon_1Arg_Intrinsic; |
||
683 | def int_arm_neon_vrinta : Neon_1Arg_Intrinsic; |
||
684 | def int_arm_neon_vrintz : Neon_1Arg_Intrinsic; |
||
685 | def int_arm_neon_vrintm : Neon_1Arg_Intrinsic; |
||
686 | def int_arm_neon_vrintp : Neon_1Arg_Intrinsic; |
||
687 | |||
688 | // De-interleaving vector loads from N-element structures. |
||
689 | // Source operands are the address and alignment. |
||
690 | def int_arm_neon_vld1 : DefaultAttrsIntrinsic< |
||
691 | [llvm_anyvector_ty], [llvm_anyptr_ty, llvm_i32_ty], |
||
692 | [IntrReadMem, IntrArgMemOnly]>; |
||
693 | def int_arm_neon_vld2 : DefaultAttrsIntrinsic< |
||
694 | [llvm_anyvector_ty, LLVMMatchType<0>], [llvm_anyptr_ty, llvm_i32_ty], |
||
695 | [IntrReadMem, IntrArgMemOnly]>; |
||
696 | def int_arm_neon_vld3 : DefaultAttrsIntrinsic< |
||
697 | [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>], |
||
698 | [llvm_anyptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; |
||
699 | def int_arm_neon_vld4 : DefaultAttrsIntrinsic< |
||
700 | [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], |
||
701 | [llvm_anyptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; |
||
702 | |||
703 | def int_arm_neon_vld1x2 : DefaultAttrsIntrinsic< |
||
704 | [llvm_anyvector_ty, LLVMMatchType<0>], |
||
705 | [LLVMAnyPointerType<LLVMMatchType<0>>], [IntrReadMem, IntrArgMemOnly]>; |
||
706 | def int_arm_neon_vld1x3 : DefaultAttrsIntrinsic< |
||
707 | [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>], |
||
708 | [LLVMAnyPointerType<LLVMMatchType<0>>], [IntrReadMem, IntrArgMemOnly]>; |
||
709 | def int_arm_neon_vld1x4 : DefaultAttrsIntrinsic< |
||
710 | [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], |
||
711 | [LLVMAnyPointerType<LLVMMatchType<0>>], [IntrReadMem, IntrArgMemOnly]>; |
||
712 | |||
713 | // Vector load N-element structure to one lane. |
||
714 | // Source operands are: the address, the N input vectors (since only one |
||
715 | // lane is assigned), the lane number, and the alignment. |
||
716 | def int_arm_neon_vld2lane : DefaultAttrsIntrinsic< |
||
717 | [llvm_anyvector_ty, LLVMMatchType<0>], |
||
718 | [llvm_anyptr_ty, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty, |
||
719 | llvm_i32_ty], |
||
720 | [IntrReadMem, IntrArgMemOnly]>; |
||
721 | def int_arm_neon_vld3lane : DefaultAttrsIntrinsic< |
||
722 | [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>], |
||
723 | [llvm_anyptr_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, |
||
724 | llvm_i32_ty, llvm_i32_ty], |
||
725 | [IntrReadMem, IntrArgMemOnly]>; |
||
726 | def int_arm_neon_vld4lane : DefaultAttrsIntrinsic< |
||
727 | [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], |
||
728 | [llvm_anyptr_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, |
||
729 | LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty], |
||
730 | [IntrReadMem, IntrArgMemOnly]>; |
||
731 | |||
732 | // Vector load N-element structure to all lanes. |
||
733 | // Source operands are the address and alignment. |
||
734 | def int_arm_neon_vld2dup : DefaultAttrsIntrinsic< |
||
735 | [llvm_anyvector_ty, LLVMMatchType<0>], [llvm_anyptr_ty, llvm_i32_ty], |
||
736 | [IntrReadMem, IntrArgMemOnly]>; |
||
737 | def int_arm_neon_vld3dup : DefaultAttrsIntrinsic< |
||
738 | [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>], |
||
739 | [llvm_anyptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; |
||
740 | def int_arm_neon_vld4dup : DefaultAttrsIntrinsic< |
||
741 | [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], |
||
742 | [llvm_anyptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; |
||
743 | |||
744 | // Interleaving vector stores from N-element structures. |
||
745 | // Source operands are: the address, the N vectors, and the alignment. |
||
746 | def int_arm_neon_vst1 : DefaultAttrsIntrinsic< |
||
747 | [], [llvm_anyptr_ty, llvm_anyvector_ty, llvm_i32_ty], [IntrArgMemOnly]>; |
||
748 | def int_arm_neon_vst2 : DefaultAttrsIntrinsic< |
||
749 | [], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty], |
||
750 | [IntrArgMemOnly]>; |
||
751 | def int_arm_neon_vst3 : DefaultAttrsIntrinsic< |
||
752 | [], |
||
753 | [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>, |
||
754 | llvm_i32_ty], |
||
755 | [IntrArgMemOnly]>; |
||
756 | def int_arm_neon_vst4 : DefaultAttrsIntrinsic< |
||
757 | [], |
||
758 | [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>, |
||
759 | LLVMMatchType<1>, llvm_i32_ty], |
||
760 | [IntrArgMemOnly]>; |
||
761 | |||
762 | def int_arm_neon_vst1x2 : DefaultAttrsIntrinsic< |
||
763 | [], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>], |
||
764 | [IntrArgMemOnly, NoCapture<ArgIndex<0>>]>; |
||
765 | def int_arm_neon_vst1x3 : DefaultAttrsIntrinsic< |
||
766 | [], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>], |
||
767 | [IntrArgMemOnly, NoCapture<ArgIndex<0>>]>; |
||
768 | def int_arm_neon_vst1x4 : DefaultAttrsIntrinsic< |
||
769 | [], |
||
770 | [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>, |
||
771 | LLVMMatchType<1>], |
||
772 | [IntrArgMemOnly, NoCapture<ArgIndex<0>>]>; |
||
773 | |||
774 | // Vector store N-element structure from one lane. |
||
775 | // Source operands are: the address, the N vectors, the lane number, and |
||
776 | // the alignment. |
||
777 | def int_arm_neon_vst2lane : DefaultAttrsIntrinsic< |
||
778 | [], |
||
779 | [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty, |
||
780 | llvm_i32_ty], |
||
781 | [IntrArgMemOnly]>; |
||
782 | def int_arm_neon_vst3lane : DefaultAttrsIntrinsic< |
||
783 | [], |
||
784 | [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>, |
||
785 | llvm_i32_ty, llvm_i32_ty], |
||
786 | [IntrArgMemOnly]>; |
||
787 | def int_arm_neon_vst4lane : DefaultAttrsIntrinsic< |
||
788 | [], |
||
789 | [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>, |
||
790 | LLVMMatchType<1>, llvm_i32_ty, llvm_i32_ty], |
||
791 | [IntrArgMemOnly]>; |
||
792 | |||
793 | // Vector bitwise select. |
||
794 | def int_arm_neon_vbsl : DefaultAttrsIntrinsic< |
||
795 | [llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], |
||
796 | [IntrNoMem]>; |
||
797 | |||
798 | |||
799 | // Crypto instructions |
||
800 | class AES_1Arg_Intrinsic : DefaultAttrsIntrinsic< |
||
801 | [llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; |
||
802 | class AES_2Arg_Intrinsic : DefaultAttrsIntrinsic< |
||
803 | [llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; |
||
804 | |||
805 | class SHA_1Arg_Intrinsic : DefaultAttrsIntrinsic< |
||
806 | [llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; |
||
807 | class SHA_2Arg_Intrinsic : DefaultAttrsIntrinsic< |
||
808 | [llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; |
||
809 | class SHA_3Arg_i32_Intrinsic : DefaultAttrsIntrinsic< |
||
810 | [llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty, llvm_v4i32_ty], [IntrNoMem]>; |
||
811 | class SHA_3Arg_v4i32_Intrinsic : DefaultAttrsIntrinsic< |
||
812 | [llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,llvm_v4i32_ty], [IntrNoMem]>; |
||
813 | |||
814 | def int_arm_neon_aesd : AES_2Arg_Intrinsic; |
||
815 | def int_arm_neon_aese : AES_2Arg_Intrinsic; |
||
816 | def int_arm_neon_aesimc : AES_1Arg_Intrinsic; |
||
817 | def int_arm_neon_aesmc : AES_1Arg_Intrinsic; |
||
818 | def int_arm_neon_sha1h : SHA_1Arg_Intrinsic; |
||
819 | def int_arm_neon_sha1su1 : SHA_2Arg_Intrinsic; |
||
820 | def int_arm_neon_sha256su0 : SHA_2Arg_Intrinsic; |
||
821 | def int_arm_neon_sha1c : SHA_3Arg_i32_Intrinsic; |
||
822 | def int_arm_neon_sha1m : SHA_3Arg_i32_Intrinsic; |
||
823 | def int_arm_neon_sha1p : SHA_3Arg_i32_Intrinsic; |
||
824 | def int_arm_neon_sha1su0: SHA_3Arg_v4i32_Intrinsic; |
||
825 | def int_arm_neon_sha256h: SHA_3Arg_v4i32_Intrinsic; |
||
826 | def int_arm_neon_sha256h2: SHA_3Arg_v4i32_Intrinsic; |
||
827 | def int_arm_neon_sha256su1: SHA_3Arg_v4i32_Intrinsic; |
||
828 | |||
829 | def int_arm_neon_vqrdmlah : Neon_3Arg_Intrinsic; |
||
830 | def int_arm_neon_vqrdmlsh : Neon_3Arg_Intrinsic; |
||
831 | |||
832 | // Armv8.2-A dot product instructions |
||
833 | class Neon_Dot_Intrinsic |
||
834 | : DefaultAttrsIntrinsic<[llvm_anyvector_ty], |
||
835 | [LLVMMatchType<0>, llvm_anyvector_ty, |
||
836 | LLVMMatchType<1>], |
||
837 | [IntrNoMem]>; |
||
838 | def int_arm_neon_udot : Neon_Dot_Intrinsic; |
||
839 | def int_arm_neon_sdot : Neon_Dot_Intrinsic; |
||
840 | |||
841 | // v8.6-A Matrix Multiply Intrinsics |
||
842 | class Neon_MatMul_Intrinsic |
||
843 | : DefaultAttrsIntrinsic<[llvm_anyvector_ty], |
||
844 | [LLVMMatchType<0>, llvm_anyvector_ty, |
||
845 | LLVMMatchType<1>], |
||
846 | [IntrNoMem]>; |
||
847 | def int_arm_neon_ummla : Neon_MatMul_Intrinsic; |
||
848 | def int_arm_neon_smmla : Neon_MatMul_Intrinsic; |
||
849 | def int_arm_neon_usmmla : Neon_MatMul_Intrinsic; |
||
850 | def int_arm_neon_usdot : Neon_Dot_Intrinsic; |
||
851 | |||
852 | // v8.6-A Bfloat Intrinsics |
||
853 | def int_arm_neon_vcvtfp2bf |
||
854 | : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_v4f32_ty], [IntrNoMem]>; |
||
855 | def int_arm_neon_vcvtbfp2bf |
||
856 | : DefaultAttrsIntrinsic<[llvm_bfloat_ty], [llvm_float_ty], [IntrNoMem]>; |
||
857 | |||
858 | def int_arm_neon_bfdot : Neon_Dot_Intrinsic; |
||
859 | def int_arm_neon_bfmmla |
||
860 | : DefaultAttrsIntrinsic<[llvm_v4f32_ty], |
||
861 | [llvm_v4f32_ty, llvm_v8bf16_ty, llvm_v8bf16_ty], |
||
862 | [IntrNoMem]>; |
||
863 | |||
864 | class Neon_BF16FML_Intrinsic |
||
865 | : DefaultAttrsIntrinsic<[llvm_v4f32_ty], |
||
866 | [llvm_v4f32_ty, llvm_v8bf16_ty, llvm_v8bf16_ty], |
||
867 | [IntrNoMem]>; |
||
868 | def int_arm_neon_bfmlalb : Neon_BF16FML_Intrinsic; |
||
869 | def int_arm_neon_bfmlalt : Neon_BF16FML_Intrinsic; |
||
870 | |||
871 | def int_arm_cls: DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty], |
||
872 | [IntrNoMem]>; |
||
873 | def int_arm_cls64: DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i64_ty], |
||
874 | [IntrNoMem]>; |
||
875 | |||
876 | def int_arm_mve_vctp8 : DefaultAttrsIntrinsic<[llvm_v16i1_ty], [llvm_i32_ty], |
||
877 | [IntrNoMem]>; |
||
878 | def int_arm_mve_vctp16 : DefaultAttrsIntrinsic<[llvm_v8i1_ty], [llvm_i32_ty], |
||
879 | [IntrNoMem]>; |
||
880 | def int_arm_mve_vctp32 : DefaultAttrsIntrinsic<[llvm_v4i1_ty], [llvm_i32_ty], |
||
881 | [IntrNoMem]>; |
||
882 | def int_arm_mve_vctp64 : DefaultAttrsIntrinsic<[llvm_v2i1_ty], [llvm_i32_ty], |
||
883 | [IntrNoMem]>; |
||
884 | |||
885 | // v8.3-A Floating-point complex add |
||
886 | def int_arm_neon_vcadd_rot90 : Neon_2Arg_Intrinsic; |
||
887 | def int_arm_neon_vcadd_rot270 : Neon_2Arg_Intrinsic; |
||
888 | |||
889 | // GNU eabi mcount |
||
890 | // TODO: Add applicable default attributes. |
||
891 | def int_arm_gnu_eabi_mcount : Intrinsic<[], [], []>; |
||
892 | |||
893 | def int_arm_mve_pred_i2v : DefaultAttrsIntrinsic< |
||
894 | [llvm_anyvector_ty], [llvm_i32_ty], [IntrNoMem]>; |
||
895 | def int_arm_mve_pred_v2i : DefaultAttrsIntrinsic< |
||
896 | [llvm_i32_ty], [llvm_anyvector_ty], [IntrNoMem]>; |
||
897 | def int_arm_mve_vreinterpretq : DefaultAttrsIntrinsic< |
||
898 | [llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>; |
||
899 | |||
900 | def int_arm_mve_min_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty], |
||
901 | [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */, |
||
902 | llvm_anyvector_ty, LLVMMatchType<0>], |
||
903 | [IntrNoMem]>; |
||
904 | def int_arm_mve_max_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty], |
||
905 | [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */, |
||
906 | llvm_anyvector_ty, LLVMMatchType<0>], |
||
907 | [IntrNoMem]>; |
||
908 | def int_arm_mve_abd_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty], |
||
909 | [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */, |
||
910 | llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>; |
||
911 | def int_arm_mve_add_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty], |
||
912 | [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], |
||
913 | [IntrNoMem]>; |
||
914 | def int_arm_mve_and_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty], |
||
915 | [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], |
||
916 | [IntrNoMem]>; |
||
917 | def int_arm_mve_bic_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty], |
||
918 | [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], |
||
919 | [IntrNoMem]>; |
||
920 | def int_arm_mve_eor_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty], |
||
921 | [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], |
||
922 | [IntrNoMem]>; |
||
923 | def int_arm_mve_orn_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty], |
||
924 | [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], |
||
925 | [IntrNoMem]>; |
||
926 | def int_arm_mve_orr_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty], |
||
927 | [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], |
||
928 | [IntrNoMem]>; |
||
929 | def int_arm_mve_sub_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty], |
||
930 | [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], |
||
931 | [IntrNoMem]>; |
||
932 | def int_arm_mve_mul_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty], |
||
933 | [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], |
||
934 | [IntrNoMem]>; |
||
935 | def int_arm_mve_mulh_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty], |
||
936 | [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */, |
||
937 | llvm_anyvector_ty, LLVMMatchType<0>], |
||
938 | [IntrNoMem]>; |
||
939 | def int_arm_mve_qdmulh_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty], |
||
940 | [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], |
||
941 | [IntrNoMem]>; |
||
942 | def int_arm_mve_rmulh_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty], |
||
943 | [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */, |
||
944 | llvm_anyvector_ty, LLVMMatchType<0>], |
||
945 | [IntrNoMem]>; |
||
946 | def int_arm_mve_qrdmulh_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty], |
||
947 | [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], |
||
948 | [IntrNoMem]>; |
||
949 | def int_arm_mve_mull_int_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty], |
||
950 | [llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty /* unsigned */, |
||
951 | llvm_i32_ty /* top */, llvm_anyvector_ty, LLVMMatchType<0>], |
||
952 | [IntrNoMem]>; |
||
953 | def int_arm_mve_mull_poly_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty], |
||
954 | [llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty, llvm_anyvector_ty, |
||
955 | LLVMMatchType<0>], |
||
956 | [IntrNoMem]>; |
||
957 | def int_arm_mve_qadd_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty], |
||
958 | [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */, |
||
959 | llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>; |
||
960 | def int_arm_mve_hadd_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty], |
||
961 | [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */, |
||
962 | llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>; |
||
963 | def int_arm_mve_rhadd_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty], |
||
964 | [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */, |
||
965 | llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>; |
||
966 | def int_arm_mve_qsub_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty], |
||
967 | [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */, |
||
968 | llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>; |
||
969 | def int_arm_mve_hsub_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty], |
||
970 | [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */, |
||
971 | llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>; |
||
972 | def int_arm_mve_vmina_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty], |
||
973 | [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty], |
||
974 | [IntrNoMem]>; |
||
975 | def int_arm_mve_vmaxa_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty], |
||
976 | [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty], |
||
977 | [IntrNoMem]>; |
||
978 | def int_arm_mve_vminnma_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty], |
||
979 | [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty], |
||
980 | [IntrNoMem]>; |
||
981 | def int_arm_mve_vmaxnma_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty], |
||
982 | [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty], |
||
983 | [IntrNoMem]>; |
||
984 | |||
985 | multiclass MVEPredicated<list<LLVMType> rets, list<LLVMType> params, |
||
986 | LLVMType pred = llvm_anyvector_ty, |
||
987 | list<IntrinsicProperty> props = [IntrNoMem], |
||
988 | list<SDNodeProperty> sdprops = []> { |
||
989 | def "": DefaultAttrsIntrinsic<rets, params, props, "", sdprops>; |
||
990 | def _predicated: DefaultAttrsIntrinsic<rets, params # [pred], props, "", |
||
991 | sdprops>; |
||
992 | } |
||
993 | multiclass MVEPredicatedM<list<LLVMType> rets, list<LLVMType> params, |
||
994 | LLVMType pred = llvm_anyvector_ty, |
||
995 | list<IntrinsicProperty> props = [IntrNoMem]> { |
||
996 | def "": DefaultAttrsIntrinsic<rets, params, props>; |
||
997 | def _predicated: DefaultAttrsIntrinsic<rets, params # [pred, |
||
998 | !if(!eq(rets[0], llvm_anyvector_ty), |
||
999 | LLVMMatchType<0>, rets[0])], props>; |
||
1000 | } |
||
1001 | |||
1002 | multiclass MVE_minmaxv { |
||
1003 | defm v: MVEPredicated<[llvm_i32_ty], |
||
1004 | [llvm_i32_ty, llvm_anyvector_ty, llvm_i32_ty /* unsigned */]>; |
||
1005 | defm av: MVEPredicated<[llvm_i32_ty], |
||
1006 | [llvm_i32_ty, llvm_anyvector_ty]>; |
||
1007 | defm nmv: MVEPredicated<[llvm_anyfloat_ty], |
||
1008 | [LLVMMatchType<0>, llvm_anyvector_ty]>; |
||
1009 | defm nmav: MVEPredicated<[llvm_anyfloat_ty], |
||
1010 | [LLVMMatchType<0>, llvm_anyvector_ty]>; |
||
1011 | } |
||
1012 | defm int_arm_mve_min: MVE_minmaxv; |
||
1013 | defm int_arm_mve_max: MVE_minmaxv; |
||
1014 | |||
1015 | defm int_arm_mve_addv: MVEPredicated<[llvm_i32_ty], |
||
1016 | [llvm_anyvector_ty, llvm_i32_ty /* unsigned */]>; |
||
1017 | defm int_arm_mve_addlv: MVEPredicated<[llvm_i64_ty], |
||
1018 | [llvm_anyvector_ty, llvm_i32_ty /* unsigned */]>; |
||
1019 | |||
1020 | // Intrinsic with a predicated and a non-predicated case. The predicated case |
||
1021 | // has two additional parameters: inactive (the value for inactive lanes, can |
||
1022 | // be undef) and predicate. |
||
1023 | multiclass MVEMXPredicated<list<LLVMType> rets, list<LLVMType> flags, |
||
1024 | list<LLVMType> params, LLVMType inactive, |
||
1025 | LLVMType predicate, |
||
1026 | list<IntrinsicProperty> props = [IntrNoMem]> { |
||
1027 | def "": DefaultAttrsIntrinsic<rets, flags # params, props>; |
||
1028 | def _predicated: DefaultAttrsIntrinsic< |
||
1029 | rets, flags # [inactive] # params # [predicate], props>; |
||
1030 | } |
||
1031 | |||
1032 | defm int_arm_mve_vcvt_narrow: MVEPredicated<[llvm_v8f16_ty], |
||
1033 | [llvm_v8f16_ty, llvm_v4f32_ty, llvm_i32_ty], llvm_v4i1_ty>; |
||
1034 | defm int_arm_mve_vcvt_widen: MVEMXPredicated<[llvm_v4f32_ty], [], |
||
1035 | [llvm_v8f16_ty, llvm_i32_ty], llvm_v4f32_ty, llvm_v4i1_ty>; |
||
1036 | |||
1037 | defm int_arm_mve_vldr_gather_base: MVEPredicated< |
||
1038 | [llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i32_ty], |
||
1039 | llvm_anyvector_ty, [IntrReadMem], [SDNPMemOperand]>; |
||
1040 | defm int_arm_mve_vldr_gather_base_wb: MVEPredicated< |
||
1041 | [llvm_anyvector_ty, llvm_anyvector_ty], |
||
1042 | [LLVMMatchType<1>, llvm_i32_ty], llvm_anyvector_ty, [IntrReadMem], |
||
1043 | [SDNPMemOperand]>; |
||
1044 | defm int_arm_mve_vstr_scatter_base: MVEPredicated< |
||
1045 | [], [llvm_anyvector_ty, llvm_i32_ty, llvm_anyvector_ty], |
||
1046 | llvm_anyvector_ty, [IntrWriteMem], [SDNPMemOperand]>; |
||
1047 | defm int_arm_mve_vstr_scatter_base_wb: MVEPredicated< |
||
1048 | [llvm_anyvector_ty], [LLVMMatchType<0>, llvm_i32_ty, llvm_anyvector_ty], |
||
1049 | llvm_anyvector_ty, [IntrWriteMem], [SDNPMemOperand]>; |
||
1050 | |||
1051 | // gather_offset takes three i32 parameters. The first is the size of |
||
1052 | // memory element loaded, in bits. The second is a left bit shift to |
||
1053 | // apply to each offset in the vector parameter (must be either 0, or |
||
1054 | // correspond to the element size of the destination vector type). The |
||
1055 | // last is 1 to indicate zero extension (if the load is widening), or |
||
1056 | // 0 for sign extension. |
||
1057 | // |
||
1058 | // scatter_offset has the first two of those parameters, but since it |
||
1059 | // narrows rather than widening, it doesn't have the last one. |
||
1060 | defm int_arm_mve_vldr_gather_offset: MVEPredicated< |
||
1061 | [llvm_anyvector_ty], [llvm_anyptr_ty, llvm_anyvector_ty, |
||
1062 | llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], llvm_anyvector_ty, [IntrReadMem], |
||
1063 | [SDNPMemOperand]>; |
||
1064 | defm int_arm_mve_vstr_scatter_offset: MVEPredicated< |
||
1065 | [], [llvm_anyptr_ty, llvm_anyvector_ty, llvm_anyvector_ty, |
||
1066 | llvm_i32_ty, llvm_i32_ty], llvm_anyvector_ty, [IntrWriteMem], |
||
1067 | [SDNPMemOperand]>; |
||
1068 | |||
1069 | def int_arm_mve_shl_imm_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty], |
||
1070 | [LLVMMatchType<0>, llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>], |
||
1071 | [IntrNoMem]>; |
||
1072 | def int_arm_mve_shr_imm_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty], |
||
1073 | [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty, // extra i32 is unsigned flag |
||
1074 | llvm_anyvector_ty, LLVMMatchType<0>], |
||
1075 | [IntrNoMem]>; |
||
1076 | |||
1077 | defm int_arm_mve_vqshl_imm: MVEPredicatedM<[llvm_anyvector_ty], |
||
1078 | [LLVMMatchType<0>, llvm_i32_ty /*shiftcount*/, llvm_i32_ty /*unsigned*/]>; |
||
1079 | defm int_arm_mve_vrshr_imm: MVEPredicatedM<[llvm_anyvector_ty], |
||
1080 | [LLVMMatchType<0>, llvm_i32_ty /*shiftcount*/, llvm_i32_ty /*unsigned*/]>; |
||
1081 | defm int_arm_mve_vqshlu_imm: MVEPredicatedM<[llvm_anyvector_ty], |
||
1082 | [LLVMMatchType<0>, llvm_i32_ty /*shiftcount*/]>; |
||
1083 | defm int_arm_mve_vshll_imm: MVEPredicatedM<[llvm_anyvector_ty], |
||
1084 | [llvm_anyvector_ty, llvm_i32_ty /*shiftcount*/, llvm_i32_ty /*unsigned*/, |
||
1085 | llvm_i32_ty /*top-half*/]>; |
||
1086 | |||
1087 | defm int_arm_mve_vsli: MVEPredicated< |
||
1088 | [llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty]>; |
||
1089 | defm int_arm_mve_vsri: MVEPredicated< |
||
1090 | [llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty]>; |
||
1091 | |||
1092 | defm int_arm_mve_vshrn: MVEPredicated< |
||
1093 | [llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty, |
||
1094 | llvm_i32_ty /*shiftcount*/, llvm_i32_ty /*saturate*/, llvm_i32_ty /*round*/, |
||
1095 | llvm_i32_ty /*unsigned-out*/, llvm_i32_ty /*unsigned-in*/, |
||
1096 | llvm_i32_ty /*top-half*/]>; |
||
1097 | |||
1098 | defm int_arm_mve_vshl_scalar: MVEPredicated< |
||
1099 | [llvm_anyvector_ty], [LLVMMatchType<0>, llvm_i32_ty /*shiftcount*/, |
||
1100 | llvm_i32_ty /*saturate*/, llvm_i32_ty /*round*/, llvm_i32_ty /*unsigned*/]>; |
||
1101 | defm int_arm_mve_vshl_vector: MVEPredicatedM< |
||
1102 | [llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty /*shiftcounts*/, |
||
1103 | llvm_i32_ty /*saturate*/, llvm_i32_ty /*round*/, llvm_i32_ty /*unsigned*/]>; |
||
1104 | |||
1105 | // MVE scalar shifts. |
||
1106 | class ARM_MVE_qrshift_single<list<LLVMType> value, |
||
1107 | list<LLVMType> saturate = []> : |
||
1108 | DefaultAttrsIntrinsic<value, value # [llvm_i32_ty] # saturate, [IntrNoMem]>; |
||
1109 | multiclass ARM_MVE_qrshift<list<LLVMType> saturate = []> { |
||
1110 | // Most of these shifts come in 32- and 64-bit versions. But only |
||
1111 | // the 64-bit ones have the extra saturation argument (if any). |
||
1112 | def "": ARM_MVE_qrshift_single<[llvm_i32_ty]>; |
||
1113 | def l: ARM_MVE_qrshift_single<[llvm_i32_ty, llvm_i32_ty], saturate>; |
||
1114 | } |
||
1115 | defm int_arm_mve_urshr: ARM_MVE_qrshift; |
||
1116 | defm int_arm_mve_uqshl: ARM_MVE_qrshift; |
||
1117 | defm int_arm_mve_srshr: ARM_MVE_qrshift; |
||
1118 | defm int_arm_mve_sqshl: ARM_MVE_qrshift; |
||
1119 | defm int_arm_mve_uqrshl: ARM_MVE_qrshift<[llvm_i32_ty]>; |
||
1120 | defm int_arm_mve_sqrshr: ARM_MVE_qrshift<[llvm_i32_ty]>; |
||
1121 | // LSLL and ASRL only have 64-bit versions, not 32. |
||
1122 | def int_arm_mve_lsll: ARM_MVE_qrshift_single<[llvm_i32_ty, llvm_i32_ty]>; |
||
1123 | def int_arm_mve_asrl: ARM_MVE_qrshift_single<[llvm_i32_ty, llvm_i32_ty]>; |
||
1124 | |||
1125 | def int_arm_mve_vabd: DefaultAttrsIntrinsic< |
||
1126 | [llvm_anyvector_ty], |
||
1127 | [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */], |
||
1128 | [IntrNoMem]>; |
||
1129 | def int_arm_mve_vadc: DefaultAttrsIntrinsic< |
||
1130 | [llvm_anyvector_ty, llvm_i32_ty], |
||
1131 | [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]>; |
||
1132 | def int_arm_mve_vsbc: DefaultAttrsIntrinsic< |
||
1133 | [llvm_anyvector_ty, llvm_i32_ty], |
||
1134 | [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]>; |
||
1135 | def int_arm_mve_vadc_predicated: DefaultAttrsIntrinsic< |
||
1136 | [llvm_anyvector_ty, llvm_i32_ty], |
||
1137 | [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, |
||
1138 | llvm_i32_ty, llvm_anyvector_ty], [IntrNoMem]>; |
||
1139 | def int_arm_mve_vsbc_predicated: DefaultAttrsIntrinsic< |
||
1140 | [llvm_anyvector_ty, llvm_i32_ty], |
||
1141 | [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, |
||
1142 | llvm_i32_ty, llvm_anyvector_ty], [IntrNoMem]>; |
||
1143 | def int_arm_mve_vshlc: DefaultAttrsIntrinsic< |
||
1144 | [llvm_i32_ty /* bits shifted out */, llvm_anyvector_ty], |
||
1145 | [LLVMMatchType<0>, llvm_i32_ty /* bits shifted in */, |
||
1146 | llvm_i32_ty /* shift count */], [IntrNoMem]>; |
||
1147 | def int_arm_mve_vshlc_predicated: DefaultAttrsIntrinsic< |
||
1148 | [llvm_i32_ty /* bits shifted out */, llvm_anyvector_ty], |
||
1149 | [LLVMMatchType<0>, llvm_i32_ty /* bits shifted in */, |
||
1150 | llvm_i32_ty /* shift count */, llvm_anyvector_ty], [IntrNoMem]>; |
||
1151 | def int_arm_mve_vmulh: DefaultAttrsIntrinsic< |
||
1152 | [llvm_anyvector_ty], |
||
1153 | [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */], |
||
1154 | [IntrNoMem]>; |
||
1155 | def int_arm_mve_vqdmulh: DefaultAttrsIntrinsic< |
||
1156 | [llvm_anyvector_ty], |
||
1157 | [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; |
||
1158 | def int_arm_mve_vhadd: DefaultAttrsIntrinsic< |
||
1159 | [llvm_anyvector_ty], |
||
1160 | [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */], |
||
1161 | [IntrNoMem]>; |
||
1162 | def int_arm_mve_vrhadd: DefaultAttrsIntrinsic< |
||
1163 | [llvm_anyvector_ty], |
||
1164 | [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */], |
||
1165 | [IntrNoMem]>; |
||
1166 | def int_arm_mve_vhsub: DefaultAttrsIntrinsic< |
||
1167 | [llvm_anyvector_ty], |
||
1168 | [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */], |
||
1169 | [IntrNoMem]>; |
||
1170 | def int_arm_mve_vrmulh: DefaultAttrsIntrinsic< |
||
1171 | [llvm_anyvector_ty], |
||
1172 | [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */], |
||
1173 | [IntrNoMem]>; |
||
1174 | def int_arm_mve_vqrdmulh: DefaultAttrsIntrinsic< |
||
1175 | [llvm_anyvector_ty], |
||
1176 | [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; |
||
1177 | def int_arm_mve_vmull: DefaultAttrsIntrinsic< |
||
1178 | [llvm_anyvector_ty], |
||
1179 | [llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty /* unsigned */, |
||
1180 | llvm_i32_ty /* top */], [IntrNoMem]>; |
||
1181 | def int_arm_mve_vmull_poly: DefaultAttrsIntrinsic< |
||
1182 | [llvm_anyvector_ty], |
||
1183 | [llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty], [IntrNoMem]>; |
||
1184 | |||
1185 | // The first two parameters are compile-time constants: |
||
1186 | // * Halving: 0 means halving (vhcaddq), 1 means non-halving (vcaddq) |
||
1187 | // instruction. Note: the flag is inverted to match the corresponding |
||
1188 | // bit in the instruction encoding |
||
1189 | // * Rotation angle: 0 mean 90 deg, 1 means 180 deg |
||
1190 | defm int_arm_mve_vcaddq : MVEMXPredicated< |
||
1191 | [llvm_anyvector_ty], |
||
1192 | [llvm_i32_ty, llvm_i32_ty], [LLVMMatchType<0>, LLVMMatchType<0>], |
||
1193 | LLVMMatchType<0>, llvm_anyvector_ty>; |
||
1194 | |||
1195 | // The first operand of the following two intrinsics is the rotation angle |
||
1196 | // (must be a compile-time constant): |
||
1197 | // 0 - 0 deg |
||
1198 | // 1 - 90 deg |
||
1199 | // 2 - 180 deg |
||
1200 | // 3 - 270 deg |
||
1201 | defm int_arm_mve_vcmulq : MVEMXPredicated< |
||
1202 | [llvm_anyvector_ty], |
||
1203 | [llvm_i32_ty], [LLVMMatchType<0>, LLVMMatchType<0>], |
||
1204 | LLVMMatchType<0>, llvm_anyvector_ty>; |
||
1205 | |||
1206 | defm int_arm_mve_vcmlaq : MVEPredicated< |
||
1207 | [llvm_anyvector_ty], |
||
1208 | [llvm_i32_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], |
||
1209 | llvm_anyvector_ty>; |
||
1210 | |||
1211 | def int_arm_mve_vld2q: DefaultAttrsIntrinsic< |
||
1212 | [llvm_anyvector_ty, LLVMMatchType<0>], [llvm_anyptr_ty], |
||
1213 | [IntrReadMem, IntrArgMemOnly]>; |
||
1214 | def int_arm_mve_vld4q: DefaultAttrsIntrinsic< |
||
1215 | [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], |
||
1216 | [llvm_anyptr_ty], [IntrReadMem, IntrArgMemOnly]>; |
||
1217 | |||
1218 | def int_arm_mve_vst2q: DefaultAttrsIntrinsic< |
||
1219 | [], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty], |
||
1220 | [IntrWriteMem, IntrArgMemOnly], "", [SDNPMemOperand]>; |
||
1221 | def int_arm_mve_vst4q: DefaultAttrsIntrinsic< |
||
1222 | [], |
||
1223 | [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>, |
||
1224 | LLVMMatchType<1>, llvm_i32_ty], |
||
1225 | [IntrWriteMem, IntrArgMemOnly], "", [SDNPMemOperand]>; |
||
1226 | |||
1227 | // MVE vector absolute difference and accumulate across vector |
||
1228 | // The first operand is an 'unsigned' flag. The remaining operands are: |
||
1229 | // * accumulator |
||
1230 | // * first vector operand |
||
1231 | // * second vector operand |
||
1232 | // * mask (only in predicated versions) |
||
1233 | defm int_arm_mve_vabav: MVEPredicated< |
||
1234 | [llvm_i32_ty], |
||
1235 | [llvm_i32_ty, llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>], llvm_anyvector_ty>; |
||
1236 | |||
1237 | // The following 3 intrinsics are MVE vector reductions with two vector |
||
1238 | // operands. |
||
1239 | // The first 3 operands are boolean flags (must be compile-time constants): |
||
1240 | // * unsigned - the instruction operates on vectors of unsigned values and |
||
1241 | // unsigned scalars |
||
1242 | // * subtract - the instruction performs subtraction after multiplication of |
||
1243 | // lane pairs (e.g., vmlsdav vs vmladav) |
||
1244 | // * exchange - the instruction exchanges successive even and odd lanes of |
||
1245 | // the first operands before multiplication of lane pairs |
||
1246 | // (e.g., vmladavx vs vmladav) |
||
1247 | // The remaining operands are: |
||
1248 | // * accumulator |
||
1249 | // * first vector operand |
||
1250 | // * second vector operand |
||
1251 | // * mask (only in predicated versions) |
||
1252 | |||
1253 | // Version with 32-bit result, vml{a,s}dav[a][x] |
||
1254 | defm int_arm_mve_vmldava: MVEPredicated< |
||
1255 | [llvm_i32_ty], |
||
1256 | [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, |
||
1257 | llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>], |
||
1258 | llvm_anyvector_ty>; |
||
1259 | |||
1260 | // Version with 64-bit result, vml{a,s}ldav[a][x] |
||
1261 | defm int_arm_mve_vmlldava: MVEPredicated< |
||
1262 | [llvm_i32_ty, llvm_i32_ty], |
||
1263 | [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, |
||
1264 | llvm_i32_ty, llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>], |
||
1265 | llvm_anyvector_ty>; |
||
1266 | |||
1267 | // Version with 72-bit rounded result, vrml{a,s}ldavh[a][x] |
||
1268 | defm int_arm_mve_vrmlldavha: MVEPredicated< |
||
1269 | [llvm_i32_ty, llvm_i32_ty], |
||
1270 | [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, |
||
1271 | llvm_i32_ty, llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>], |
||
1272 | llvm_anyvector_ty>; |
||
1273 | |||
1274 | defm int_arm_mve_vidup: MVEMXPredicated< |
||
1275 | [llvm_anyvector_ty /* output */, llvm_i32_ty /* written-back base */], [], |
||
1276 | [llvm_i32_ty /* base */, llvm_i32_ty /* step */], |
||
1277 | LLVMMatchType<0>, llvm_anyvector_ty>; |
||
1278 | defm int_arm_mve_vddup: MVEMXPredicated< |
||
1279 | [llvm_anyvector_ty /* output */, llvm_i32_ty /* written-back base */], [], |
||
1280 | [llvm_i32_ty /* base */, llvm_i32_ty /* step */], |
||
1281 | LLVMMatchType<0>, llvm_anyvector_ty>; |
||
1282 | defm int_arm_mve_viwdup: MVEMXPredicated< |
||
1283 | [llvm_anyvector_ty /* output */, llvm_i32_ty /* written-back base */], [], |
||
1284 | [llvm_i32_ty /* base */, llvm_i32_ty /* limit */, llvm_i32_ty /* step */], |
||
1285 | LLVMMatchType<0>, llvm_anyvector_ty>; |
||
1286 | defm int_arm_mve_vdwdup: MVEMXPredicated< |
||
1287 | [llvm_anyvector_ty /* output */, llvm_i32_ty /* written-back base */], [], |
||
1288 | [llvm_i32_ty /* base */, llvm_i32_ty /* limit */, llvm_i32_ty /* step */], |
||
1289 | LLVMMatchType<0>, llvm_anyvector_ty>; |
||
1290 | |||
1291 | // Flags: |
||
1292 | // * unsigned |
||
1293 | defm int_arm_mve_vcvt_fix: MVEMXPredicated< |
||
1294 | [llvm_anyvector_ty /* output */], [llvm_i32_ty], |
||
1295 | [llvm_anyvector_ty /* input vector */, llvm_i32_ty /* scale */], |
||
1296 | LLVMMatchType<0>, llvm_anyvector_ty>; |
||
1297 | |||
1298 | def int_arm_mve_vcvt_fp_int_predicated: DefaultAttrsIntrinsic< |
||
1299 | [llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i32_ty /* unsigned */, |
||
1300 | llvm_anyvector_ty /* predicate */, LLVMMatchType<0> /* inactive */], |
||
1301 | [IntrNoMem]>; |
||
1302 | |||
1303 | foreach suffix = ["a","n","p","m"] in { |
||
1304 | defm "int_arm_mve_vcvt"#suffix: MVEMXPredicated< |
||
1305 | [llvm_anyvector_ty /* output */], [llvm_i32_ty /* unsigned */], |
||
1306 | [llvm_anyvector_ty /* input */], LLVMMatchType<0>, llvm_anyvector_ty>; |
||
1307 | } |
||
1308 | |||
1309 | def int_arm_mve_vrintn: DefaultAttrsIntrinsic< |
||
1310 | [llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>; |
||
1311 | def int_arm_mve_vcls: DefaultAttrsIntrinsic< |
||
1312 | [llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>; |
||
1313 | |||
1314 | defm int_arm_mve_vbrsr: MVEMXPredicated< |
||
1315 | [llvm_anyvector_ty], [], |
||
1316 | [LLVMMatchType<0>, llvm_i32_ty], LLVMMatchType<0>, llvm_anyvector_ty>; |
||
1317 | |||
1318 | def int_arm_mve_vqdmull: DefaultAttrsIntrinsic< |
||
1319 | [llvm_anyvector_ty], |
||
1320 | [llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty], |
||
1321 | [IntrNoMem]>; |
||
1322 | def int_arm_mve_vqdmull_predicated: DefaultAttrsIntrinsic< |
||
1323 | [llvm_anyvector_ty], |
||
1324 | [llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty, llvm_anyvector_ty, |
||
1325 | LLVMMatchType<0>], |
||
1326 | [IntrNoMem]>; |
||
1327 | |||
1328 | class MVESimpleUnaryPredicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty], |
||
1329 | [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>; |
||
1330 | |||
1331 | def int_arm_mve_mvn_predicated: MVESimpleUnaryPredicated; |
||
1332 | def int_arm_mve_abs_predicated: MVESimpleUnaryPredicated; |
||
1333 | def int_arm_mve_neg_predicated: MVESimpleUnaryPredicated; |
||
1334 | def int_arm_mve_qabs_predicated: MVESimpleUnaryPredicated; |
||
1335 | def int_arm_mve_qneg_predicated: MVESimpleUnaryPredicated; |
||
1336 | def int_arm_mve_clz_predicated: MVESimpleUnaryPredicated; |
||
1337 | def int_arm_mve_cls_predicated: MVESimpleUnaryPredicated; |
||
1338 | def int_arm_mve_vrintz_predicated: MVESimpleUnaryPredicated; |
||
1339 | def int_arm_mve_vrintm_predicated: MVESimpleUnaryPredicated; |
||
1340 | def int_arm_mve_vrintp_predicated: MVESimpleUnaryPredicated; |
||
1341 | def int_arm_mve_vrinta_predicated: MVESimpleUnaryPredicated; |
||
1342 | def int_arm_mve_vrintx_predicated: MVESimpleUnaryPredicated; |
||
1343 | def int_arm_mve_vrintn_predicated: MVESimpleUnaryPredicated; |
||
1344 | |||
1345 | def int_arm_mve_vrev_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty], |
||
1346 | [LLVMMatchType<0>, llvm_i32_ty /* size to reverse */, |
||
1347 | llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>; |
||
1348 | |||
1349 | def int_arm_mve_vmovl_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty], |
||
1350 | [llvm_anyvector_ty, llvm_i32_ty /* unsigned */, llvm_i32_ty /* top half */, |
||
1351 | llvm_anyvector_ty /* predicate */, LLVMMatchType<0>], [IntrNoMem]>; |
||
1352 | def int_arm_mve_vmovn_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty], |
||
1353 | [LLVMMatchType<0>, llvm_anyvector_ty, llvm_i32_ty /* top half */, |
||
1354 | llvm_anyvector_ty /* predicate */], [IntrNoMem]>; |
||
1355 | |||
1356 | def int_arm_mve_vqmovn: DefaultAttrsIntrinsic<[llvm_anyvector_ty], |
||
1357 | [LLVMMatchType<0>, llvm_anyvector_ty, |
||
1358 | llvm_i32_ty /* unsigned output */, llvm_i32_ty /* unsigned input */, |
||
1359 | llvm_i32_ty /* top half */], [IntrNoMem]>; |
||
1360 | def int_arm_mve_vqmovn_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty], |
||
1361 | [LLVMMatchType<0>, llvm_anyvector_ty, |
||
1362 | llvm_i32_ty /* unsigned output */, llvm_i32_ty /* unsigned input */, |
||
1363 | llvm_i32_ty /* top half */, llvm_anyvector_ty /* pred */], [IntrNoMem]>; |
||
1364 | |||
1365 | def int_arm_mve_fma_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty], |
||
1366 | [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* mult op #2 */, |
||
1367 | LLVMMatchType<0> /* addend */, llvm_anyvector_ty /* pred */], [IntrNoMem]>; |
||
1368 | def int_arm_mve_vmla_n_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty], |
||
1369 | [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* addend */, |
||
1370 | llvm_i32_ty /* mult op #2 (scalar) */, llvm_anyvector_ty /* pred */], |
||
1371 | [IntrNoMem]>; |
||
1372 | def int_arm_mve_vmlas_n_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty], |
||
1373 | [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* mult op #2 */, |
||
1374 | llvm_i32_ty /* addend (scalar) */, llvm_anyvector_ty /* pred */], |
||
1375 | [IntrNoMem]>; |
||
1376 | |||
1377 | defm int_arm_mve_vqdmlah: MVEPredicated<[llvm_anyvector_ty], |
||
1378 | [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* addend */, |
||
1379 | llvm_i32_ty /* mult op #2 (scalar) */]>; |
||
1380 | defm int_arm_mve_vqrdmlah: MVEPredicated<[llvm_anyvector_ty], |
||
1381 | [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* addend */, |
||
1382 | llvm_i32_ty /* mult op #2 (scalar) */]>; |
||
1383 | defm int_arm_mve_vqdmlash: MVEPredicated<[llvm_anyvector_ty], |
||
1384 | [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* mult op #2 */, |
||
1385 | llvm_i32_ty /* addend (scalar) */]>; |
||
1386 | defm int_arm_mve_vqrdmlash: MVEPredicated<[llvm_anyvector_ty], |
||
1387 | [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* mult op #2 */, |
||
1388 | llvm_i32_ty /* addend (scalar) */]>; |
||
1389 | |||
1390 | defm int_arm_mve_vqdmlad: MVEPredicated<[llvm_anyvector_ty], |
||
1391 | [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, |
||
1392 | llvm_i32_ty /* exchange */, llvm_i32_ty /* round */, |
||
1393 | llvm_i32_ty /* subtract */]>; |
||
1394 | |||
1395 | // CDE (Custom Datapath Extension) |
||
1396 | |||
1397 | multiclass CDEGPRIntrinsics<list<LLVMType> args> { |
||
1398 | def "" : DefaultAttrsIntrinsic< |
||
1399 | [llvm_i32_ty], |
||
1400 | !listconcat([llvm_i32_ty /* coproc */], args, [llvm_i32_ty /* imm */]), |
||
1401 | [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 1)>>]>; |
||
1402 | def a : DefaultAttrsIntrinsic< |
||
1403 | [llvm_i32_ty], |
||
1404 | !listconcat([llvm_i32_ty /* coproc */, llvm_i32_ty /* acc */], args, |
||
1405 | [llvm_i32_ty /* imm */]), |
||
1406 | [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 2)>>]>; |
||
1407 | |||
1408 | def d: DefaultAttrsIntrinsic< |
||
1409 | [llvm_i32_ty /* lo */, llvm_i32_ty /* hi */], |
||
1410 | !listconcat([llvm_i32_ty /* coproc */], args, [llvm_i32_ty /* imm */]), |
||
1411 | [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 1)>>]>; |
||
1412 | def da: DefaultAttrsIntrinsic< |
||
1413 | [llvm_i32_ty /* lo */, llvm_i32_ty /* hi */], |
||
1414 | !listconcat([llvm_i32_ty /* coproc */, llvm_i32_ty /* acc_lo */, |
||
1415 | llvm_i32_ty /* acc_hi */], args, [llvm_i32_ty /* imm */]), |
||
1416 | [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 3)>>]>; |
||
1417 | } |
||
1418 | |||
1419 | defm int_arm_cde_cx1: CDEGPRIntrinsics<[]>; |
||
1420 | defm int_arm_cde_cx2: CDEGPRIntrinsics<[llvm_i32_ty]>; |
||
1421 | defm int_arm_cde_cx3: CDEGPRIntrinsics<[llvm_i32_ty, llvm_i32_ty]>; |
||
1422 | |||
1423 | multiclass CDEVCXIntrinsics<list<LLVMType> args> { |
||
1424 | def "" : DefaultAttrsIntrinsic< |
||
1425 | [llvm_anyfloat_ty], |
||
1426 | !listconcat([llvm_i32_ty /* coproc */], args, [llvm_i32_ty /* imm */]), |
||
1427 | [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 1)>>]>; |
||
1428 | def a : DefaultAttrsIntrinsic< |
||
1429 | [llvm_anyfloat_ty], |
||
1430 | !listconcat([llvm_i32_ty /* coproc */, LLVMMatchType<0> /* acc */], |
||
1431 | args, [llvm_i32_ty /* imm */]), |
||
1432 | [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 2)>>]>; |
||
1433 | } |
||
1434 | |||
1435 | defm int_arm_cde_vcx1 : CDEVCXIntrinsics<[]>; |
||
1436 | defm int_arm_cde_vcx2 : CDEVCXIntrinsics<[LLVMMatchType<0>]>; |
||
1437 | defm int_arm_cde_vcx3 : CDEVCXIntrinsics<[LLVMMatchType<0>, LLVMMatchType<0>]>; |
||
1438 | |||
1439 | multiclass CDEVCXVecIntrinsics<list<LLVMType> args> { |
||
1440 | def "" : DefaultAttrsIntrinsic< |
||
1441 | [llvm_v16i8_ty], |
||
1442 | !listconcat([llvm_i32_ty /* coproc */], args, [llvm_i32_ty /* imm */]), |
||
1443 | [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 1)>>]>; |
||
1444 | def a : DefaultAttrsIntrinsic< |
||
1445 | [llvm_v16i8_ty], |
||
1446 | !listconcat([llvm_i32_ty /* coproc */, llvm_v16i8_ty /* acc */], |
||
1447 | args, [llvm_i32_ty /* imm */]), |
||
1448 | [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 2)>>]>; |
||
1449 | |||
1450 | def _predicated : DefaultAttrsIntrinsic< |
||
1451 | [llvm_anyvector_ty], |
||
1452 | !listconcat([llvm_i32_ty /* coproc */, LLVMMatchType<0> /* inactive */], |
||
1453 | args, [llvm_i32_ty /* imm */, llvm_anyvector_ty /* mask */]), |
||
1454 | [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 2)>>]>; |
||
1455 | def a_predicated : DefaultAttrsIntrinsic< |
||
1456 | [llvm_anyvector_ty], |
||
1457 | !listconcat([llvm_i32_ty /* coproc */, LLVMMatchType<0> /* acc */], |
||
1458 | args, [llvm_i32_ty /* imm */, llvm_anyvector_ty /* mask */]), |
||
1459 | [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 2)>>]>; |
||
1460 | } |
||
1461 | |||
1462 | defm int_arm_cde_vcx1q : CDEVCXVecIntrinsics<[]>; |
||
1463 | defm int_arm_cde_vcx2q : CDEVCXVecIntrinsics<[llvm_v16i8_ty]>; |
||
1464 | defm int_arm_cde_vcx3q : CDEVCXVecIntrinsics<[llvm_v16i8_ty, llvm_v16i8_ty]>; |
||
1465 | |||
1466 | } // end TargetPrefix |