Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
14 pmbaty 1
//===- IntrinsicsARM.td - Defines ARM intrinsics -----------*- tablegen -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file defines all of the ARM-specific intrinsics.
10
//
11
//===----------------------------------------------------------------------===//
12
 
13
 
14
//===----------------------------------------------------------------------===//
15
// TLS
16
 
17
let TargetPrefix = "arm" in {  // All intrinsics start with "llvm.arm.".
18
 
19
// A space-consuming intrinsic primarily for testing ARMConstantIslands. The
20
// first argument is the number of bytes this "instruction" takes up, the second
21
// and return value are essentially chains, used to force ordering during ISel.
22
def int_arm_space : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>]>;
23
 
24
// 16-bit multiplications
25
def int_arm_smulbb : ClangBuiltin<"__builtin_arm_smulbb">,
26
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
27
                          [IntrNoMem]>;
28
def int_arm_smulbt : ClangBuiltin<"__builtin_arm_smulbt">,
29
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
30
                          [IntrNoMem]>;
31
def int_arm_smultb : ClangBuiltin<"__builtin_arm_smultb">,
32
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
33
                          [IntrNoMem]>;
34
def int_arm_smultt : ClangBuiltin<"__builtin_arm_smultt">,
35
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
36
                          [IntrNoMem]>;
37
def int_arm_smulwb : ClangBuiltin<"__builtin_arm_smulwb">,
38
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
39
                          [IntrNoMem]>;
40
def int_arm_smulwt : ClangBuiltin<"__builtin_arm_smulwt">,
41
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
42
                          [IntrNoMem]>;
43
 
44
//===----------------------------------------------------------------------===//
45
// Saturating Arithmetic
46
 
47
def int_arm_qadd : ClangBuiltin<"__builtin_arm_qadd">,
48
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
49
                          [Commutative, IntrNoMem]>;
50
def int_arm_qsub : ClangBuiltin<"__builtin_arm_qsub">,
51
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
52
                          [IntrNoMem]>;
53
def int_arm_ssat : ClangBuiltin<"__builtin_arm_ssat">,
54
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
55
                          [IntrNoMem]>;
56
def int_arm_usat : ClangBuiltin<"__builtin_arm_usat">,
57
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
58
                          [IntrNoMem]>;
59
 
60
// Accumulating multiplications
61
def int_arm_smlabb : ClangBuiltin<"__builtin_arm_smlabb">,
62
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty,
63
                           llvm_i32_ty],
64
                          [IntrNoMem]>;
65
def int_arm_smlabt : ClangBuiltin<"__builtin_arm_smlabt">,
66
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty,
67
                           llvm_i32_ty],
68
                          [IntrNoMem]>;
69
def int_arm_smlatb : ClangBuiltin<"__builtin_arm_smlatb">,
70
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty,
71
                           llvm_i32_ty],
72
                          [IntrNoMem]>;
73
def int_arm_smlatt : ClangBuiltin<"__builtin_arm_smlatt">,
74
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty,
75
                           llvm_i32_ty],
76
                          [IntrNoMem]>;
77
def int_arm_smlawb : ClangBuiltin<"__builtin_arm_smlawb">,
78
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty,
79
                           llvm_i32_ty],
80
                          [IntrNoMem]>;
81
def int_arm_smlawt : ClangBuiltin<"__builtin_arm_smlawt">,
82
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty,
83
                           llvm_i32_ty],
84
                          [IntrNoMem]>;
85
 
86
// Parallel 16-bit saturation
87
def int_arm_ssat16 : ClangBuiltin<"__builtin_arm_ssat16">,
88
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
89
                          [IntrNoMem]>;
90
def int_arm_usat16 : ClangBuiltin<"__builtin_arm_usat16">,
91
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
92
                          [IntrNoMem]>;
93
 
94
// Packing and unpacking
95
def int_arm_sxtab16 : ClangBuiltin<"__builtin_arm_sxtab16">,
96
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
97
                          [IntrNoMem]>;
98
def int_arm_sxtb16 : ClangBuiltin<"__builtin_arm_sxtb16">,
99
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
100
def int_arm_uxtab16 : ClangBuiltin<"__builtin_arm_uxtab16">,
101
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
102
                          [IntrNoMem]>;
103
def int_arm_uxtb16 : ClangBuiltin<"__builtin_arm_uxtb16">,
104
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
105
 
106
// Parallel selection, reads the GE flags.
107
def int_arm_sel : ClangBuiltin<"__builtin_arm_sel">,
108
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
109
                          [IntrReadMem]>;
110
 
111
// Parallel 8-bit addition and subtraction
112
def int_arm_qadd8  : ClangBuiltin<"__builtin_arm_qadd8">,
113
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
114
                          [IntrNoMem]>;
115
def int_arm_qsub8  : ClangBuiltin<"__builtin_arm_qsub8">,
116
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
117
                          [IntrNoMem]>;
118
// Writes to the GE bits.
119
def int_arm_sadd8  : ClangBuiltin<"__builtin_arm_sadd8">,
120
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
121
def int_arm_shadd8  : ClangBuiltin<"__builtin_arm_shadd8">,
122
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
123
                          [IntrNoMem]>;
124
def int_arm_shsub8  : ClangBuiltin<"__builtin_arm_shsub8">,
125
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
126
                          [IntrNoMem]>;
127
// Writes to the GE bits.
128
def int_arm_ssub8  : ClangBuiltin<"__builtin_arm_ssub8">,
129
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
130
// Writes to the GE bits.
131
def int_arm_uadd8  : ClangBuiltin<"__builtin_arm_uadd8">,
132
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
133
def int_arm_uhadd8  : ClangBuiltin<"__builtin_arm_uhadd8">,
134
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
135
                          [IntrNoMem]>;
136
def int_arm_uhsub8  : ClangBuiltin<"__builtin_arm_uhsub8">,
137
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
138
                          [IntrNoMem]>;
139
def int_arm_uqadd8  : ClangBuiltin<"__builtin_arm_uqadd8">,
140
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
141
                          [IntrNoMem]>;
142
def int_arm_uqsub8  : ClangBuiltin<"__builtin_arm_uqsub8">,
143
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
144
                          [IntrNoMem]>;
145
// Writes to the GE bits.
146
def int_arm_usub8  : ClangBuiltin<"__builtin_arm_usub8">,
147
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
148
 
149
// Sum of 8-bit absolute differences
150
def int_arm_usad8  : ClangBuiltin<"__builtin_arm_usad8">,
151
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
152
                          [IntrNoMem]>;
153
def int_arm_usada8  : ClangBuiltin<"__builtin_arm_usada8">,
154
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty,
155
                           llvm_i32_ty],
156
                          [IntrNoMem]>;
157
 
158
// Parallel 16-bit addition and subtraction
159
def int_arm_qadd16  : ClangBuiltin<"__builtin_arm_qadd16">,
160
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
161
                          [IntrNoMem]>;
162
def int_arm_qasx  : ClangBuiltin<"__builtin_arm_qasx">,
163
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
164
                          [IntrNoMem]>;
165
def int_arm_qsax  : ClangBuiltin<"__builtin_arm_qsax">,
166
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
167
                          [IntrNoMem]>;
168
def int_arm_qsub16  : ClangBuiltin<"__builtin_arm_qsub16">,
169
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
170
                          [IntrNoMem]>;
171
// Writes to the GE bits.
172
def int_arm_sadd16  : ClangBuiltin<"__builtin_arm_sadd16">,
173
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
174
// Writes to the GE bits.
175
def int_arm_sasx  : ClangBuiltin<"__builtin_arm_sasx">,
176
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
177
def int_arm_shadd16  : ClangBuiltin<"__builtin_arm_shadd16">,
178
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
179
                          [IntrNoMem]>;
180
def int_arm_shasx  : ClangBuiltin<"__builtin_arm_shasx">,
181
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
182
                          [IntrNoMem]>;
183
def int_arm_shsax  : ClangBuiltin<"__builtin_arm_shsax">,
184
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
185
                          [IntrNoMem]>;
186
def int_arm_shsub16  : ClangBuiltin<"__builtin_arm_shsub16">,
187
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
188
                          [IntrNoMem]>;
189
// Writes to the GE bits.
190
def int_arm_ssax  : ClangBuiltin<"__builtin_arm_ssax">,
191
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
192
// Writes to the GE bits.
193
def int_arm_ssub16  : ClangBuiltin<"__builtin_arm_ssub16">,
194
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
195
// Writes to the GE bits.
196
def int_arm_uadd16  : ClangBuiltin<"__builtin_arm_uadd16">,
197
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
198
// Writes to the GE bits.
199
def int_arm_uasx  : ClangBuiltin<"__builtin_arm_uasx">,
200
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
201
def int_arm_uhadd16  : ClangBuiltin<"__builtin_arm_uhadd16">,
202
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
203
                          [IntrNoMem]>;
204
def int_arm_uhasx  : ClangBuiltin<"__builtin_arm_uhasx">,
205
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
206
                          [IntrNoMem]>;
207
def int_arm_uhsax  : ClangBuiltin<"__builtin_arm_uhsax">,
208
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
209
                          [IntrNoMem]>;
210
def int_arm_uhsub16  : ClangBuiltin<"__builtin_arm_uhsub16">,
211
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
212
                          [IntrNoMem]>;
213
def int_arm_uqadd16  : ClangBuiltin<"__builtin_arm_uqadd16">,
214
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
215
                          [IntrNoMem]>;
216
def int_arm_uqasx  : ClangBuiltin<"__builtin_arm_uqasx">,
217
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
218
                          [IntrNoMem]>;
219
def int_arm_uqsax  : ClangBuiltin<"__builtin_arm_uqsax">,
220
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
221
                          [IntrNoMem]>;
222
def int_arm_uqsub16  : ClangBuiltin<"__builtin_arm_uqsub16">,
223
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
224
                          [IntrNoMem]>;
225
// Writes to the GE bits.
226
def int_arm_usax  : ClangBuiltin<"__builtin_arm_usax">,
227
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
228
// Writes to the GE bits.
229
def int_arm_usub16  : ClangBuiltin<"__builtin_arm_usub16">,
230
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
231
 
232
// Parallel 16-bit multiplication
233
def int_arm_smlad : ClangBuiltin<"__builtin_arm_smlad">,
234
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty,
235
                           llvm_i32_ty],
236
                          [IntrNoMem]>;
237
def int_arm_smladx : ClangBuiltin<"__builtin_arm_smladx">,
238
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty,
239
                           llvm_i32_ty],
240
                          [IntrNoMem]>;
241
def int_arm_smlald : ClangBuiltin<"__builtin_arm_smlald">,
242
    DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty,
243
                           llvm_i64_ty],
244
                          [IntrNoMem]>;
245
def int_arm_smlaldx : ClangBuiltin<"__builtin_arm_smlaldx">,
246
    DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty,
247
                           llvm_i64_ty],
248
                          [IntrNoMem]>;
249
def int_arm_smlsd : ClangBuiltin<"__builtin_arm_smlsd">,
250
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty,
251
                           llvm_i32_ty],
252
                          [IntrNoMem]>;
253
def int_arm_smlsdx : ClangBuiltin<"__builtin_arm_smlsdx">,
254
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty,
255
                           llvm_i32_ty],
256
                          [IntrNoMem]>;
257
def int_arm_smlsld : ClangBuiltin<"__builtin_arm_smlsld">,
258
    DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty,
259
                           llvm_i64_ty],
260
                          [IntrNoMem]>;
261
def int_arm_smlsldx : ClangBuiltin<"__builtin_arm_smlsldx">,
262
    DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty,
263
                           llvm_i64_ty],
264
                          [IntrNoMem]>;
265
def int_arm_smuad : ClangBuiltin<"__builtin_arm_smuad">,
266
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
267
                          [IntrNoMem]>;
268
def int_arm_smuadx : ClangBuiltin<"__builtin_arm_smuadx">,
269
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
270
                          [IntrNoMem]>;
271
def int_arm_smusd : ClangBuiltin<"__builtin_arm_smusd">,
272
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
273
                          [IntrNoMem]>;
274
def int_arm_smusdx : ClangBuiltin<"__builtin_arm_smusdx">,
275
    DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
276
                          [IntrNoMem]>;
277
 
278
 
279
//===----------------------------------------------------------------------===//
280
// Load, Store and Clear exclusive
281
 
282
// TODO: Add applicable default attributes.
283
def int_arm_ldrex : Intrinsic<[llvm_i32_ty], [llvm_anyptr_ty]>;
284
def int_arm_strex : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_anyptr_ty]>;
285
 
286
def int_arm_ldaex : Intrinsic<[llvm_i32_ty], [llvm_anyptr_ty]>;
287
def int_arm_stlex : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_anyptr_ty]>;
288
 
289
def int_arm_clrex : Intrinsic<[]>;
290
 
291
def int_arm_strexd : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty,
292
    llvm_ptr_ty]>;
293
def int_arm_ldrexd : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_ptr_ty]>;
294
 
295
def int_arm_stlexd : Intrinsic<[llvm_i32_ty],
296
                               [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty]>;
297
def int_arm_ldaexd : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_ptr_ty]>;
298
 
299
//===----------------------------------------------------------------------===//
300
// Data barrier instructions
301
 
302
// TODO: Add applicable default attributes.
303
def int_arm_dmb : ClangBuiltin<"__builtin_arm_dmb">, MSBuiltin<"__dmb">,
304
                  Intrinsic<[], [llvm_i32_ty]>;
305
def int_arm_dsb : ClangBuiltin<"__builtin_arm_dsb">, MSBuiltin<"__dsb">,
306
                  Intrinsic<[], [llvm_i32_ty]>;
307
def int_arm_isb : ClangBuiltin<"__builtin_arm_isb">, MSBuiltin<"__isb">,
308
                  Intrinsic<[], [llvm_i32_ty]>;
309
 
310
//===----------------------------------------------------------------------===//
311
// VFP
312
 
313
def int_arm_get_fpscr : ClangBuiltin<"__builtin_arm_get_fpscr">,
314
                       DefaultAttrsIntrinsic<[llvm_i32_ty], [], []>;
315
def int_arm_set_fpscr : ClangBuiltin<"__builtin_arm_set_fpscr">,
316
                       DefaultAttrsIntrinsic<[], [llvm_i32_ty], []>;
317
def int_arm_vcvtr : DefaultAttrsIntrinsic<[llvm_float_ty],
318
                                          [llvm_anyfloat_ty], [IntrNoMem]>;
319
def int_arm_vcvtru : DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_anyfloat_ty],
320
                                           [IntrNoMem]>;
321
 
322
//===----------------------------------------------------------------------===//
323
// Coprocessor
324
 
325
// TODO: Add applicable default attributes.
326
def int_arm_ldc : ClangBuiltin<"__builtin_arm_ldc">,
327
   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
328
def int_arm_ldcl : ClangBuiltin<"__builtin_arm_ldcl">,
329
   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
330
def int_arm_ldc2 : ClangBuiltin<"__builtin_arm_ldc2">,
331
   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
332
def int_arm_ldc2l : ClangBuiltin<"__builtin_arm_ldc2l">,
333
   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
334
 
335
def int_arm_stc : ClangBuiltin<"__builtin_arm_stc">,
336
   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
337
def int_arm_stcl : ClangBuiltin<"__builtin_arm_stcl">,
338
   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
339
def int_arm_stc2 : ClangBuiltin<"__builtin_arm_stc2">,
340
   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
341
def int_arm_stc2l : ClangBuiltin<"__builtin_arm_stc2l">,
342
   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
343
 
344
// Move to coprocessor
345
def int_arm_mcr : ClangBuiltin<"__builtin_arm_mcr">,
346
   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
347
                  llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
348
def int_arm_mcr2 : ClangBuiltin<"__builtin_arm_mcr2">,
349
   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
350
                  llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
351
 
352
// Move from coprocessor
353
def int_arm_mrc : ClangBuiltin<"__builtin_arm_mrc">,
354
                  MSBuiltin<"_MoveFromCoprocessor">,
355
   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
356
                             llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
357
def int_arm_mrc2 : ClangBuiltin<"__builtin_arm_mrc2">,
358
                   MSBuiltin<"_MoveFromCoprocessor2">,
359
   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
360
                             llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
361
 
362
// Coprocessor data processing
363
def int_arm_cdp : ClangBuiltin<"__builtin_arm_cdp">,
364
   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
365
                  llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
366
def int_arm_cdp2 : ClangBuiltin<"__builtin_arm_cdp2">,
367
   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
368
                  llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
369
 
370
// Move from two registers to coprocessor
371
def int_arm_mcrr : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
372
                                  llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>>]>;
373
def int_arm_mcrr2 : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
374
                                   llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>>]>;
375
 
376
def int_arm_mrrc : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_i32_ty,
377
                              llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>;
378
def int_arm_mrrc2 : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_i32_ty,
379
                               llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>;
380
 
381
//===----------------------------------------------------------------------===//
382
// CRC32
383
 
384
def int_arm_crc32b : DefaultAttrsIntrinsic<
385
    [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
386
def int_arm_crc32cb : DefaultAttrsIntrinsic<
387
    [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
388
def int_arm_crc32h  : DefaultAttrsIntrinsic<
389
    [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
390
def int_arm_crc32ch : DefaultAttrsIntrinsic<
391
    [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
392
def int_arm_crc32w  : DefaultAttrsIntrinsic<
393
    [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
394
def int_arm_crc32cw : DefaultAttrsIntrinsic<
395
    [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
396
 
397
//===----------------------------------------------------------------------===//
398
// CMSE
399
 
400
// TODO: Add applicable default attributes.
401
def int_arm_cmse_tt : ClangBuiltin<"__builtin_arm_cmse_TT">,
402
    Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>;
403
def int_arm_cmse_ttt : ClangBuiltin<"__builtin_arm_cmse_TTT">,
404
    Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>;
405
def int_arm_cmse_tta : ClangBuiltin<"__builtin_arm_cmse_TTA">,
406
    Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>;
407
def int_arm_cmse_ttat : ClangBuiltin<"__builtin_arm_cmse_TTAT">,
408
    Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>;
409
 
410
//===----------------------------------------------------------------------===//
411
// HINT
412
 
413
// TODO: Add applicable default attributes.
414
def int_arm_hint : Intrinsic<[], [llvm_i32_ty]>;
415
def int_arm_dbg : Intrinsic<[], [llvm_i32_ty]>;
416
 
417
//===----------------------------------------------------------------------===//
418
// UND (reserved undefined sequence)
419
 
420
// TODO: Add applicable default attributes.
421
def int_arm_undefined : Intrinsic<[], [llvm_i32_ty]>;
422
 
423
//===----------------------------------------------------------------------===//
424
// Advanced SIMD (NEON)
425
 
426
// The following classes do not correspond directly to GCC builtins.
427
class Neon_1Arg_Intrinsic
428
  : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>;
429
class Neon_1Arg_Narrow_Intrinsic
430
  : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMExtendedType<0>],
431
                          [IntrNoMem]>;
432
class Neon_2Arg_Intrinsic
433
  : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
434
                          [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
435
class Neon_2Arg_Narrow_Intrinsic
436
  : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
437
                          [LLVMExtendedType<0>, LLVMExtendedType<0>],
438
                          [IntrNoMem]>;
439
class Neon_2Arg_Long_Intrinsic
440
  : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
441
                          [LLVMTruncatedType<0>, LLVMTruncatedType<0>],
442
                          [IntrNoMem]>;
443
class Neon_3Arg_Intrinsic
444
  : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
445
                          [LLVMMatchType<0>, LLVMMatchType<0>,
446
                           LLVMMatchType<0>],
447
                          [IntrNoMem]>;
448
class Neon_3Arg_Long_Intrinsic
449
  : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
450
                          [LLVMMatchType<0>, LLVMTruncatedType<0>,
451
                           LLVMTruncatedType<0>],
452
                          [IntrNoMem]>;
453
 
454
class Neon_1FloatArg_Intrinsic
455
  : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
456
 
457
class Neon_CvtFxToFP_Intrinsic
458
  : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty, llvm_i32_ty],
459
                          [IntrNoMem]>;
460
class Neon_CvtFPToFx_Intrinsic
461
  : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, llvm_i32_ty],
462
                          [IntrNoMem]>;
463
class Neon_CvtFPtoInt_1Arg_Intrinsic
464
  : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty],
465
                          [IntrNoMem]>;
466
 
467
class Neon_Compare_Intrinsic
468
  : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
469
                          [llvm_anyvector_ty, LLVMMatchType<1>], [IntrNoMem]>;
470
 
471
// The table operands for VTBL and VTBX consist of 1 to 4 v8i8 vectors.
472
// Besides the table, VTBL has one other v8i8 argument and VTBX has two.
473
// Overall, the classes range from 2 to 6 v8i8 arguments.
474
class Neon_Tbl2Arg_Intrinsic
475
  : DefaultAttrsIntrinsic<[llvm_v8i8_ty],
476
                          [llvm_v8i8_ty, llvm_v8i8_ty], [IntrNoMem]>;
477
class Neon_Tbl3Arg_Intrinsic
478
  : DefaultAttrsIntrinsic<[llvm_v8i8_ty],
479
                          [llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty],
480
                          [IntrNoMem]>;
481
class Neon_Tbl4Arg_Intrinsic
482
  : DefaultAttrsIntrinsic<[llvm_v8i8_ty],
483
                          [llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty,
484
                           llvm_v8i8_ty],
485
                          [IntrNoMem]>;
486
class Neon_Tbl5Arg_Intrinsic
487
  : DefaultAttrsIntrinsic<[llvm_v8i8_ty],
488
                          [llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty,
489
                           llvm_v8i8_ty, llvm_v8i8_ty],
490
                          [IntrNoMem]>;
491
class Neon_Tbl6Arg_Intrinsic
492
  : DefaultAttrsIntrinsic<[llvm_v8i8_ty],
493
                          [llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty,
494
                           llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty],
495
                          [IntrNoMem]>;
496
 
497
// Arithmetic ops
498
 
499
let IntrProperties = [IntrNoMem, Commutative] in {
500
 
501
  // Vector Add.
502
  def int_arm_neon_vhadds : Neon_2Arg_Intrinsic;
503
  def int_arm_neon_vhaddu : Neon_2Arg_Intrinsic;
504
  def int_arm_neon_vrhadds : Neon_2Arg_Intrinsic;
505
  def int_arm_neon_vrhaddu : Neon_2Arg_Intrinsic;
506
  def int_arm_neon_vraddhn : Neon_2Arg_Narrow_Intrinsic;
507
 
508
  // Vector Multiply.
509
  def int_arm_neon_vmulp : Neon_2Arg_Intrinsic;
510
  def int_arm_neon_vqdmulh : Neon_2Arg_Intrinsic;
511
  def int_arm_neon_vqrdmulh : Neon_2Arg_Intrinsic;
512
  def int_arm_neon_vmulls : Neon_2Arg_Long_Intrinsic;
513
  def int_arm_neon_vmullu : Neon_2Arg_Long_Intrinsic;
514
  def int_arm_neon_vmullp : Neon_2Arg_Long_Intrinsic;
515
  def int_arm_neon_vqdmull : Neon_2Arg_Long_Intrinsic;
516
 
517
  // Vector Maximum.
518
  def int_arm_neon_vmaxs : Neon_2Arg_Intrinsic;
519
  def int_arm_neon_vmaxu : Neon_2Arg_Intrinsic;
520
  def int_arm_neon_vmaxnm : Neon_2Arg_Intrinsic;
521
 
522
  // Vector Minimum.
523
  def int_arm_neon_vmins : Neon_2Arg_Intrinsic;
524
  def int_arm_neon_vminu : Neon_2Arg_Intrinsic;
525
  def int_arm_neon_vminnm : Neon_2Arg_Intrinsic;
526
 
527
  // Vector Reciprocal Step.
528
  def int_arm_neon_vrecps : Neon_2Arg_Intrinsic;
529
 
530
  // Vector Reciprocal Square Root Step.
531
  def int_arm_neon_vrsqrts : Neon_2Arg_Intrinsic;
532
}
533
 
534
// Vector Subtract.
535
def int_arm_neon_vhsubs : Neon_2Arg_Intrinsic;
536
def int_arm_neon_vhsubu : Neon_2Arg_Intrinsic;
537
def int_arm_neon_vrsubhn : Neon_2Arg_Narrow_Intrinsic;
538
 
539
// Vector Absolute Compare.
540
def int_arm_neon_vacge : Neon_Compare_Intrinsic;
541
def int_arm_neon_vacgt : Neon_Compare_Intrinsic;
542
 
543
// Vector Absolute Differences.
544
def int_arm_neon_vabds : Neon_2Arg_Intrinsic;
545
def int_arm_neon_vabdu : Neon_2Arg_Intrinsic;
546
 
547
// Vector Pairwise Add.
548
def int_arm_neon_vpadd : Neon_2Arg_Intrinsic;
549
 
550
// Vector Pairwise Add Long.
551
// Note: This is different than the other "long" NEON intrinsics because
552
// the result vector has half as many elements as the source vector.
553
// The source and destination vector types must be specified separately.
554
def int_arm_neon_vpaddls : DefaultAttrsIntrinsic<
555
    [llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
556
def int_arm_neon_vpaddlu : DefaultAttrsIntrinsic<
557
    [llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
558
 
559
// Vector Pairwise Add and Accumulate Long.
560
// Note: This is similar to vpaddl but the destination vector also appears
561
// as the first argument.
562
def int_arm_neon_vpadals : DefaultAttrsIntrinsic<
563
    [llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty], [IntrNoMem]>;
564
def int_arm_neon_vpadalu : DefaultAttrsIntrinsic<
565
    [llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty], [IntrNoMem]>;
566
 
567
// Vector Pairwise Maximum and Minimum.
568
def int_arm_neon_vpmaxs : Neon_2Arg_Intrinsic;
569
def int_arm_neon_vpmaxu : Neon_2Arg_Intrinsic;
570
def int_arm_neon_vpmins : Neon_2Arg_Intrinsic;
571
def int_arm_neon_vpminu : Neon_2Arg_Intrinsic;
572
 
573
// Vector Shifts:
574
//
575
// The various saturating and rounding vector shift operations need to be
576
// represented by intrinsics in LLVM, and even the basic VSHL variable shift
577
// operation cannot be safely translated to LLVM's shift operators.  VSHL can
578
// be used for both left and right shifts, or even combinations of the two,
579
// depending on the signs of the shift amounts.  It also has well-defined
580
// behavior for shift amounts that LLVM leaves undefined.  Only basic shifts
581
// by constants can be represented with LLVM's shift operators.
582
//
583
// The shift counts for these intrinsics are always vectors, even for constant
584
// shifts, where the constant is replicated.  For consistency with VSHL (and
585
// other variable shift instructions), left shifts have positive shift counts
586
// and right shifts have negative shift counts.  This convention is also used
587
// for constant right shift intrinsics, and to help preserve sanity, the
588
// intrinsic names use "shift" instead of either "shl" or "shr".  Where
589
// applicable, signed and unsigned versions of the intrinsics are
590
// distinguished with "s" and "u" suffixes.  A few NEON shift instructions,
591
// such as VQSHLU, take signed operands but produce unsigned results; these
592
// use a "su" suffix.
593
 
594
// Vector Shift.
595
def int_arm_neon_vshifts : Neon_2Arg_Intrinsic;
596
def int_arm_neon_vshiftu : Neon_2Arg_Intrinsic;
597
 
598
// Vector Rounding Shift.
599
def int_arm_neon_vrshifts : Neon_2Arg_Intrinsic;
600
def int_arm_neon_vrshiftu : Neon_2Arg_Intrinsic;
601
def int_arm_neon_vrshiftn : Neon_2Arg_Narrow_Intrinsic;
602
 
603
// Vector Saturating Shift.
604
def int_arm_neon_vqshifts : Neon_2Arg_Intrinsic;
605
def int_arm_neon_vqshiftu : Neon_2Arg_Intrinsic;
606
def int_arm_neon_vqshiftsu : Neon_2Arg_Intrinsic;
607
def int_arm_neon_vqshiftns : Neon_2Arg_Narrow_Intrinsic;
608
def int_arm_neon_vqshiftnu : Neon_2Arg_Narrow_Intrinsic;
609
def int_arm_neon_vqshiftnsu : Neon_2Arg_Narrow_Intrinsic;
610
 
611
// Vector Saturating Rounding Shift.
612
def int_arm_neon_vqrshifts : Neon_2Arg_Intrinsic;
613
def int_arm_neon_vqrshiftu : Neon_2Arg_Intrinsic;
614
def int_arm_neon_vqrshiftns : Neon_2Arg_Narrow_Intrinsic;
615
def int_arm_neon_vqrshiftnu : Neon_2Arg_Narrow_Intrinsic;
616
def int_arm_neon_vqrshiftnsu : Neon_2Arg_Narrow_Intrinsic;
617
 
618
// Vector Shift and Insert.
619
def int_arm_neon_vshiftins : Neon_3Arg_Intrinsic;
620
 
621
// Vector Absolute Value and Saturating Absolute Value.
622
def int_arm_neon_vabs : Neon_1Arg_Intrinsic;
623
def int_arm_neon_vqabs : Neon_1Arg_Intrinsic;
624
 
625
// Vector Saturating Negate.
626
def int_arm_neon_vqneg : Neon_1Arg_Intrinsic;
627
 
628
// Vector Count Leading Sign/Zero Bits.
629
def int_arm_neon_vcls : Neon_1Arg_Intrinsic;
630
 
631
// Vector Reciprocal Estimate.
632
def int_arm_neon_vrecpe : Neon_1Arg_Intrinsic;
633
 
634
// Vector Reciprocal Square Root Estimate.
635
def int_arm_neon_vrsqrte : Neon_1Arg_Intrinsic;
636
 
637
// Vector Conversions Between Floating-point and Integer
638
def int_arm_neon_vcvtau : Neon_CvtFPtoInt_1Arg_Intrinsic;
639
def int_arm_neon_vcvtas : Neon_CvtFPtoInt_1Arg_Intrinsic;
640
def int_arm_neon_vcvtnu : Neon_CvtFPtoInt_1Arg_Intrinsic;
641
def int_arm_neon_vcvtns : Neon_CvtFPtoInt_1Arg_Intrinsic;
642
def int_arm_neon_vcvtpu : Neon_CvtFPtoInt_1Arg_Intrinsic;
643
def int_arm_neon_vcvtps : Neon_CvtFPtoInt_1Arg_Intrinsic;
644
def int_arm_neon_vcvtmu : Neon_CvtFPtoInt_1Arg_Intrinsic;
645
def int_arm_neon_vcvtms : Neon_CvtFPtoInt_1Arg_Intrinsic;
646
 
647
// Vector Conversions Between Floating-point and Fixed-point.
648
def int_arm_neon_vcvtfp2fxs : Neon_CvtFPToFx_Intrinsic;
649
def int_arm_neon_vcvtfp2fxu : Neon_CvtFPToFx_Intrinsic;
650
def int_arm_neon_vcvtfxs2fp : Neon_CvtFxToFP_Intrinsic;
651
def int_arm_neon_vcvtfxu2fp : Neon_CvtFxToFP_Intrinsic;
652
 
653
// Vector Conversions Between Half-Precision and Single-Precision.
654
def int_arm_neon_vcvtfp2hf
655
    : DefaultAttrsIntrinsic<[llvm_v4i16_ty], [llvm_v4f32_ty], [IntrNoMem]>;
656
def int_arm_neon_vcvthf2fp
657
    : DefaultAttrsIntrinsic<[llvm_v4f32_ty], [llvm_v4i16_ty], [IntrNoMem]>;
658
 
659
// Narrowing Saturating Vector Moves.
660
def int_arm_neon_vqmovns : Neon_1Arg_Narrow_Intrinsic;
661
def int_arm_neon_vqmovnu : Neon_1Arg_Narrow_Intrinsic;
662
def int_arm_neon_vqmovnsu : Neon_1Arg_Narrow_Intrinsic;
663
 
664
// Vector Table Lookup.
665
// The first 1-4 arguments are the table.
666
def int_arm_neon_vtbl1 : Neon_Tbl2Arg_Intrinsic;
667
def int_arm_neon_vtbl2 : Neon_Tbl3Arg_Intrinsic;
668
def int_arm_neon_vtbl3 : Neon_Tbl4Arg_Intrinsic;
669
def int_arm_neon_vtbl4 : Neon_Tbl5Arg_Intrinsic;
670
 
671
// Vector Table Extension.
672
// Some elements of the destination vector may not be updated, so the original
673
// value of that vector is passed as the first argument.  The next 1-4
674
// arguments after that are the table.
675
def int_arm_neon_vtbx1 : Neon_Tbl3Arg_Intrinsic;
676
def int_arm_neon_vtbx2 : Neon_Tbl4Arg_Intrinsic;
677
def int_arm_neon_vtbx3 : Neon_Tbl5Arg_Intrinsic;
678
def int_arm_neon_vtbx4 : Neon_Tbl6Arg_Intrinsic;
679
 
680
// Vector and Scalar Rounding.
681
def int_arm_neon_vrintn : Neon_1FloatArg_Intrinsic;
682
def int_arm_neon_vrintx : Neon_1Arg_Intrinsic;
683
def int_arm_neon_vrinta : Neon_1Arg_Intrinsic;
684
def int_arm_neon_vrintz : Neon_1Arg_Intrinsic;
685
def int_arm_neon_vrintm : Neon_1Arg_Intrinsic;
686
def int_arm_neon_vrintp : Neon_1Arg_Intrinsic;
687
 
688
// De-interleaving vector loads from N-element structures.
689
// Source operands are the address and alignment.
690
def int_arm_neon_vld1 : DefaultAttrsIntrinsic<
691
    [llvm_anyvector_ty], [llvm_anyptr_ty, llvm_i32_ty],
692
    [IntrReadMem, IntrArgMemOnly]>;
693
def int_arm_neon_vld2 : DefaultAttrsIntrinsic<
694
    [llvm_anyvector_ty, LLVMMatchType<0>], [llvm_anyptr_ty, llvm_i32_ty],
695
    [IntrReadMem, IntrArgMemOnly]>;
696
def int_arm_neon_vld3 : DefaultAttrsIntrinsic<
697
    [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>],
698
    [llvm_anyptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>;
699
def int_arm_neon_vld4 : DefaultAttrsIntrinsic<
700
    [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
701
    [llvm_anyptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>;
702
 
703
def int_arm_neon_vld1x2 : DefaultAttrsIntrinsic<
704
    [llvm_anyvector_ty, LLVMMatchType<0>],
705
    [LLVMAnyPointerType<LLVMMatchType<0>>], [IntrReadMem, IntrArgMemOnly]>;
706
def int_arm_neon_vld1x3 : DefaultAttrsIntrinsic<
707
    [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>],
708
    [LLVMAnyPointerType<LLVMMatchType<0>>], [IntrReadMem, IntrArgMemOnly]>;
709
def int_arm_neon_vld1x4 : DefaultAttrsIntrinsic<
710
    [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
711
    [LLVMAnyPointerType<LLVMMatchType<0>>], [IntrReadMem, IntrArgMemOnly]>;
712
 
713
// Vector load N-element structure to one lane.
714
// Source operands are: the address, the N input vectors (since only one
715
// lane is assigned), the lane number, and the alignment.
716
def int_arm_neon_vld2lane : DefaultAttrsIntrinsic<
717
    [llvm_anyvector_ty, LLVMMatchType<0>],
718
    [llvm_anyptr_ty, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty,
719
     llvm_i32_ty],
720
    [IntrReadMem, IntrArgMemOnly]>;
721
def int_arm_neon_vld3lane : DefaultAttrsIntrinsic<
722
    [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>],
723
    [llvm_anyptr_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>,
724
     llvm_i32_ty, llvm_i32_ty],
725
    [IntrReadMem, IntrArgMemOnly]>;
726
def int_arm_neon_vld4lane : DefaultAttrsIntrinsic<
727
    [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
728
    [llvm_anyptr_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>,
729
     LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty],
730
    [IntrReadMem, IntrArgMemOnly]>;
731
 
732
// Vector load N-element structure to all lanes.
733
// Source operands are the address and alignment.
734
def int_arm_neon_vld2dup : DefaultAttrsIntrinsic<
735
    [llvm_anyvector_ty, LLVMMatchType<0>], [llvm_anyptr_ty, llvm_i32_ty],
736
    [IntrReadMem, IntrArgMemOnly]>;
737
def int_arm_neon_vld3dup : DefaultAttrsIntrinsic<
738
    [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>],
739
    [llvm_anyptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>;
740
def int_arm_neon_vld4dup : DefaultAttrsIntrinsic<
741
    [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
742
    [llvm_anyptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>;
743
 
744
// Interleaving vector stores from N-element structures.
745
// Source operands are: the address, the N vectors, and the alignment.
746
def int_arm_neon_vst1 : DefaultAttrsIntrinsic<
747
    [], [llvm_anyptr_ty, llvm_anyvector_ty, llvm_i32_ty], [IntrArgMemOnly]>;
748
def int_arm_neon_vst2 : DefaultAttrsIntrinsic<
749
    [], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty],
750
    [IntrArgMemOnly]>;
751
def int_arm_neon_vst3 : DefaultAttrsIntrinsic<
752
    [],
753
    [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>,
754
     llvm_i32_ty],
755
    [IntrArgMemOnly]>;
756
def int_arm_neon_vst4 : DefaultAttrsIntrinsic<
757
    [],
758
    [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>,
759
     LLVMMatchType<1>, llvm_i32_ty],
760
    [IntrArgMemOnly]>;
761
 
762
def int_arm_neon_vst1x2 : DefaultAttrsIntrinsic<
763
    [], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>],
764
    [IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
765
def int_arm_neon_vst1x3 : DefaultAttrsIntrinsic<
766
    [], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>],
767
    [IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
768
def int_arm_neon_vst1x4 : DefaultAttrsIntrinsic<
769
    [],
770
    [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>,
771
     LLVMMatchType<1>],
772
    [IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
773
 
774
// Vector store N-element structure from one lane.
775
// Source operands are: the address, the N vectors, the lane number, and
776
// the alignment.
777
def int_arm_neon_vst2lane : DefaultAttrsIntrinsic<
778
    [],
779
    [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty,
780
     llvm_i32_ty],
781
    [IntrArgMemOnly]>;
782
def int_arm_neon_vst3lane : DefaultAttrsIntrinsic<
783
    [],
784
    [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>,
785
     llvm_i32_ty, llvm_i32_ty],
786
    [IntrArgMemOnly]>;
787
def int_arm_neon_vst4lane : DefaultAttrsIntrinsic<
788
    [],
789
    [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>,
790
     LLVMMatchType<1>, llvm_i32_ty, llvm_i32_ty],
791
    [IntrArgMemOnly]>;
792
 
793
// Vector bitwise select.
794
def int_arm_neon_vbsl : DefaultAttrsIntrinsic<
795
    [llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
796
    [IntrNoMem]>;
797
 
798
 
799
// Crypto instructions
800
class AES_1Arg_Intrinsic : DefaultAttrsIntrinsic<
801
    [llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
802
class AES_2Arg_Intrinsic : DefaultAttrsIntrinsic<
803
    [llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
804
 
805
class SHA_1Arg_Intrinsic : DefaultAttrsIntrinsic<
806
    [llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
807
class SHA_2Arg_Intrinsic : DefaultAttrsIntrinsic<
808
    [llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
809
class SHA_3Arg_i32_Intrinsic : DefaultAttrsIntrinsic<
810
    [llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
811
class SHA_3Arg_v4i32_Intrinsic : DefaultAttrsIntrinsic<
812
    [llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,llvm_v4i32_ty], [IntrNoMem]>;
813
 
814
def int_arm_neon_aesd : AES_2Arg_Intrinsic;
815
def int_arm_neon_aese : AES_2Arg_Intrinsic;
816
def int_arm_neon_aesimc : AES_1Arg_Intrinsic;
817
def int_arm_neon_aesmc : AES_1Arg_Intrinsic;
818
def int_arm_neon_sha1h : SHA_1Arg_Intrinsic;
819
def int_arm_neon_sha1su1 : SHA_2Arg_Intrinsic;
820
def int_arm_neon_sha256su0 : SHA_2Arg_Intrinsic;
821
def int_arm_neon_sha1c : SHA_3Arg_i32_Intrinsic;
822
def int_arm_neon_sha1m : SHA_3Arg_i32_Intrinsic;
823
def int_arm_neon_sha1p : SHA_3Arg_i32_Intrinsic;
824
def int_arm_neon_sha1su0: SHA_3Arg_v4i32_Intrinsic;
825
def int_arm_neon_sha256h: SHA_3Arg_v4i32_Intrinsic;
826
def int_arm_neon_sha256h2: SHA_3Arg_v4i32_Intrinsic;
827
def int_arm_neon_sha256su1: SHA_3Arg_v4i32_Intrinsic;
828
 
829
def int_arm_neon_vqrdmlah : Neon_3Arg_Intrinsic;
830
def int_arm_neon_vqrdmlsh : Neon_3Arg_Intrinsic;
831
 
832
// Armv8.2-A dot product instructions
833
class Neon_Dot_Intrinsic
834
  : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
835
                          [LLVMMatchType<0>, llvm_anyvector_ty,
836
                           LLVMMatchType<1>],
837
                          [IntrNoMem]>;
838
def int_arm_neon_udot : Neon_Dot_Intrinsic;
839
def int_arm_neon_sdot : Neon_Dot_Intrinsic;
840
 
841
// v8.6-A Matrix Multiply Intrinsics
842
class Neon_MatMul_Intrinsic
843
  : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
844
                          [LLVMMatchType<0>, llvm_anyvector_ty,
845
                           LLVMMatchType<1>],
846
                          [IntrNoMem]>;
847
def int_arm_neon_ummla  : Neon_MatMul_Intrinsic;
848
def int_arm_neon_smmla  : Neon_MatMul_Intrinsic;
849
def int_arm_neon_usmmla : Neon_MatMul_Intrinsic;
850
def int_arm_neon_usdot  : Neon_Dot_Intrinsic;
851
 
852
// v8.6-A Bfloat Intrinsics
853
def int_arm_neon_vcvtfp2bf
854
    : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_v4f32_ty], [IntrNoMem]>;
855
def int_arm_neon_vcvtbfp2bf
856
    : DefaultAttrsIntrinsic<[llvm_bfloat_ty], [llvm_float_ty], [IntrNoMem]>;
857
 
858
def int_arm_neon_bfdot : Neon_Dot_Intrinsic;
859
def int_arm_neon_bfmmla
860
    : DefaultAttrsIntrinsic<[llvm_v4f32_ty],
861
                            [llvm_v4f32_ty, llvm_v8bf16_ty, llvm_v8bf16_ty],
862
                            [IntrNoMem]>;
863
 
864
class Neon_BF16FML_Intrinsic
865
    : DefaultAttrsIntrinsic<[llvm_v4f32_ty],
866
                            [llvm_v4f32_ty, llvm_v8bf16_ty, llvm_v8bf16_ty],
867
                            [IntrNoMem]>;
868
def int_arm_neon_bfmlalb : Neon_BF16FML_Intrinsic;
869
def int_arm_neon_bfmlalt : Neon_BF16FML_Intrinsic;
870
 
871
def int_arm_cls: DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty],
872
                                       [IntrNoMem]>;
873
def int_arm_cls64: DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i64_ty],
874
                                         [IntrNoMem]>;
875
 
876
def int_arm_mve_vctp8  : DefaultAttrsIntrinsic<[llvm_v16i1_ty], [llvm_i32_ty],
877
                                               [IntrNoMem]>;
878
def int_arm_mve_vctp16 : DefaultAttrsIntrinsic<[llvm_v8i1_ty], [llvm_i32_ty],
879
                                               [IntrNoMem]>;
880
def int_arm_mve_vctp32 : DefaultAttrsIntrinsic<[llvm_v4i1_ty], [llvm_i32_ty],
881
                                               [IntrNoMem]>;
882
def int_arm_mve_vctp64 : DefaultAttrsIntrinsic<[llvm_v2i1_ty], [llvm_i32_ty],
883
                                               [IntrNoMem]>;
884
 
885
// v8.3-A Floating-point complex add
886
def int_arm_neon_vcadd_rot90  : Neon_2Arg_Intrinsic;
887
def int_arm_neon_vcadd_rot270 : Neon_2Arg_Intrinsic;
888
 
889
// GNU eabi mcount
890
// TODO: Add applicable default attributes.
891
def int_arm_gnu_eabi_mcount : Intrinsic<[], [], []>;
892
 
893
def int_arm_mve_pred_i2v : DefaultAttrsIntrinsic<
894
  [llvm_anyvector_ty], [llvm_i32_ty], [IntrNoMem]>;
895
def int_arm_mve_pred_v2i : DefaultAttrsIntrinsic<
896
  [llvm_i32_ty], [llvm_anyvector_ty], [IntrNoMem]>;
897
def int_arm_mve_vreinterpretq : DefaultAttrsIntrinsic<
898
  [llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
899
 
900
def int_arm_mve_min_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
901
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
902
    llvm_anyvector_ty, LLVMMatchType<0>],
903
   [IntrNoMem]>;
904
def int_arm_mve_max_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
905
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
906
    llvm_anyvector_ty, LLVMMatchType<0>],
907
   [IntrNoMem]>;
908
def int_arm_mve_abd_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
909
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
910
    llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
911
def int_arm_mve_add_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
912
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
913
   [IntrNoMem]>;
914
def int_arm_mve_and_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
915
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
916
   [IntrNoMem]>;
917
def int_arm_mve_bic_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
918
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
919
   [IntrNoMem]>;
920
def int_arm_mve_eor_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
921
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
922
   [IntrNoMem]>;
923
def int_arm_mve_orn_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
924
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
925
   [IntrNoMem]>;
926
def int_arm_mve_orr_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
927
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
928
   [IntrNoMem]>;
929
def int_arm_mve_sub_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
930
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
931
   [IntrNoMem]>;
932
def int_arm_mve_mul_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
933
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
934
   [IntrNoMem]>;
935
def int_arm_mve_mulh_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
936
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
937
    llvm_anyvector_ty, LLVMMatchType<0>],
938
   [IntrNoMem]>;
939
def int_arm_mve_qdmulh_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
940
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
941
   [IntrNoMem]>;
942
def int_arm_mve_rmulh_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
943
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
944
    llvm_anyvector_ty, LLVMMatchType<0>],
945
   [IntrNoMem]>;
946
def int_arm_mve_qrdmulh_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
947
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
948
   [IntrNoMem]>;
949
def int_arm_mve_mull_int_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
950
   [llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty /* unsigned */,
951
    llvm_i32_ty /* top */, llvm_anyvector_ty, LLVMMatchType<0>],
952
   [IntrNoMem]>;
953
def int_arm_mve_mull_poly_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
954
   [llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty, llvm_anyvector_ty,
955
    LLVMMatchType<0>],
956
   [IntrNoMem]>;
957
def int_arm_mve_qadd_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
958
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
959
    llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
960
def int_arm_mve_hadd_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
961
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
962
    llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
963
def int_arm_mve_rhadd_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
964
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
965
    llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
966
def int_arm_mve_qsub_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
967
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
968
    llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
969
def int_arm_mve_hsub_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
970
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
971
    llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
972
def int_arm_mve_vmina_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
973
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty],
974
    [IntrNoMem]>;
975
def int_arm_mve_vmaxa_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
976
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty],
977
    [IntrNoMem]>;
978
def int_arm_mve_vminnma_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
979
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty],
980
    [IntrNoMem]>;
981
def int_arm_mve_vmaxnma_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
982
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty],
983
    [IntrNoMem]>;
984
 
985
multiclass MVEPredicated<list<LLVMType> rets, list<LLVMType> params,
986
                         LLVMType pred = llvm_anyvector_ty,
987
                         list<IntrinsicProperty> props = [IntrNoMem],
988
                         list<SDNodeProperty> sdprops = []> {
989
  def "": DefaultAttrsIntrinsic<rets, params, props, "", sdprops>;
990
  def _predicated: DefaultAttrsIntrinsic<rets, params # [pred], props, "",
991
                                         sdprops>;
992
}
993
multiclass MVEPredicatedM<list<LLVMType> rets, list<LLVMType> params,
994
                          LLVMType pred = llvm_anyvector_ty,
995
                          list<IntrinsicProperty> props = [IntrNoMem]> {
996
  def "": DefaultAttrsIntrinsic<rets, params, props>;
997
  def _predicated: DefaultAttrsIntrinsic<rets, params # [pred,
998
      !if(!eq(rets[0], llvm_anyvector_ty),
999
          LLVMMatchType<0>, rets[0])], props>;
1000
}
1001
 
1002
multiclass MVE_minmaxv {
1003
  defm v: MVEPredicated<[llvm_i32_ty],
1004
     [llvm_i32_ty, llvm_anyvector_ty, llvm_i32_ty /* unsigned */]>;
1005
  defm av: MVEPredicated<[llvm_i32_ty],
1006
     [llvm_i32_ty, llvm_anyvector_ty]>;
1007
  defm nmv: MVEPredicated<[llvm_anyfloat_ty],
1008
     [LLVMMatchType<0>, llvm_anyvector_ty]>;
1009
  defm nmav: MVEPredicated<[llvm_anyfloat_ty],
1010
     [LLVMMatchType<0>, llvm_anyvector_ty]>;
1011
}
1012
defm int_arm_mve_min: MVE_minmaxv;
1013
defm int_arm_mve_max: MVE_minmaxv;
1014
 
1015
defm int_arm_mve_addv: MVEPredicated<[llvm_i32_ty],
1016
   [llvm_anyvector_ty, llvm_i32_ty /* unsigned */]>;
1017
defm int_arm_mve_addlv: MVEPredicated<[llvm_i64_ty],
1018
   [llvm_anyvector_ty, llvm_i32_ty /* unsigned */]>;
1019
 
1020
// Intrinsic with a predicated and a non-predicated case. The predicated case
1021
// has two additional parameters: inactive (the value for inactive lanes, can
1022
// be undef) and predicate.
1023
multiclass MVEMXPredicated<list<LLVMType> rets, list<LLVMType> flags,
1024
                           list<LLVMType> params, LLVMType inactive,
1025
                           LLVMType predicate,
1026
                           list<IntrinsicProperty> props = [IntrNoMem]> {
1027
  def "":          DefaultAttrsIntrinsic<rets, flags # params, props>;
1028
  def _predicated: DefaultAttrsIntrinsic<
1029
      rets, flags # [inactive] # params # [predicate], props>;
1030
}
1031
 
1032
defm int_arm_mve_vcvt_narrow: MVEPredicated<[llvm_v8f16_ty],
1033
   [llvm_v8f16_ty, llvm_v4f32_ty, llvm_i32_ty], llvm_v4i1_ty>;
1034
defm int_arm_mve_vcvt_widen: MVEMXPredicated<[llvm_v4f32_ty], [],
1035
   [llvm_v8f16_ty, llvm_i32_ty], llvm_v4f32_ty, llvm_v4i1_ty>;
1036
 
1037
defm int_arm_mve_vldr_gather_base: MVEPredicated<
1038
   [llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i32_ty],
1039
   llvm_anyvector_ty, [IntrReadMem], [SDNPMemOperand]>;
1040
defm int_arm_mve_vldr_gather_base_wb: MVEPredicated<
1041
   [llvm_anyvector_ty, llvm_anyvector_ty],
1042
   [LLVMMatchType<1>, llvm_i32_ty], llvm_anyvector_ty, [IntrReadMem],
1043
   [SDNPMemOperand]>;
1044
defm int_arm_mve_vstr_scatter_base: MVEPredicated<
1045
   [], [llvm_anyvector_ty, llvm_i32_ty, llvm_anyvector_ty],
1046
   llvm_anyvector_ty, [IntrWriteMem], [SDNPMemOperand]>;
1047
defm int_arm_mve_vstr_scatter_base_wb: MVEPredicated<
1048
   [llvm_anyvector_ty], [LLVMMatchType<0>, llvm_i32_ty, llvm_anyvector_ty],
1049
   llvm_anyvector_ty, [IntrWriteMem], [SDNPMemOperand]>;
1050
 
1051
// gather_offset takes three i32 parameters. The first is the size of
1052
// memory element loaded, in bits. The second is a left bit shift to
1053
// apply to each offset in the vector parameter (must be either 0, or
1054
// correspond to the element size of the destination vector type). The
1055
// last is 1 to indicate zero extension (if the load is widening), or
1056
// 0 for sign extension.
1057
//
1058
// scatter_offset has the first two of those parameters, but since it
1059
// narrows rather than widening, it doesn't have the last one.
1060
defm int_arm_mve_vldr_gather_offset: MVEPredicated<
1061
   [llvm_anyvector_ty], [llvm_anyptr_ty, llvm_anyvector_ty,
1062
   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], llvm_anyvector_ty, [IntrReadMem],
1063
   [SDNPMemOperand]>;
1064
defm int_arm_mve_vstr_scatter_offset: MVEPredicated<
1065
   [], [llvm_anyptr_ty, llvm_anyvector_ty, llvm_anyvector_ty,
1066
   llvm_i32_ty, llvm_i32_ty], llvm_anyvector_ty, [IntrWriteMem],
1067
   [SDNPMemOperand]>;
1068
 
1069
def int_arm_mve_shl_imm_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1070
   [LLVMMatchType<0>, llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>],
1071
   [IntrNoMem]>;
1072
def int_arm_mve_shr_imm_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1073
   [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty, // extra i32 is unsigned flag
1074
    llvm_anyvector_ty, LLVMMatchType<0>],
1075
   [IntrNoMem]>;
1076
 
1077
defm int_arm_mve_vqshl_imm: MVEPredicatedM<[llvm_anyvector_ty],
1078
   [LLVMMatchType<0>, llvm_i32_ty /*shiftcount*/, llvm_i32_ty /*unsigned*/]>;
1079
defm int_arm_mve_vrshr_imm: MVEPredicatedM<[llvm_anyvector_ty],
1080
   [LLVMMatchType<0>, llvm_i32_ty /*shiftcount*/, llvm_i32_ty /*unsigned*/]>;
1081
defm int_arm_mve_vqshlu_imm: MVEPredicatedM<[llvm_anyvector_ty],
1082
   [LLVMMatchType<0>, llvm_i32_ty /*shiftcount*/]>;
1083
defm int_arm_mve_vshll_imm: MVEPredicatedM<[llvm_anyvector_ty],
1084
   [llvm_anyvector_ty, llvm_i32_ty /*shiftcount*/, llvm_i32_ty /*unsigned*/,
1085
                       llvm_i32_ty /*top-half*/]>;
1086
 
1087
defm int_arm_mve_vsli: MVEPredicated<
1088
   [llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty]>;
1089
defm int_arm_mve_vsri: MVEPredicated<
1090
   [llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty]>;
1091
 
1092
defm int_arm_mve_vshrn: MVEPredicated<
1093
   [llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty,
1094
    llvm_i32_ty /*shiftcount*/, llvm_i32_ty /*saturate*/, llvm_i32_ty /*round*/,
1095
    llvm_i32_ty /*unsigned-out*/, llvm_i32_ty /*unsigned-in*/,
1096
    llvm_i32_ty /*top-half*/]>;
1097
 
1098
defm int_arm_mve_vshl_scalar: MVEPredicated<
1099
   [llvm_anyvector_ty], [LLVMMatchType<0>, llvm_i32_ty /*shiftcount*/,
1100
    llvm_i32_ty /*saturate*/, llvm_i32_ty /*round*/, llvm_i32_ty /*unsigned*/]>;
1101
defm int_arm_mve_vshl_vector: MVEPredicatedM<
1102
   [llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty /*shiftcounts*/,
1103
    llvm_i32_ty /*saturate*/, llvm_i32_ty /*round*/, llvm_i32_ty /*unsigned*/]>;
1104
 
1105
// MVE scalar shifts.
1106
class ARM_MVE_qrshift_single<list<LLVMType> value,
1107
                             list<LLVMType> saturate = []> :
1108
  DefaultAttrsIntrinsic<value, value # [llvm_i32_ty] # saturate, [IntrNoMem]>;
1109
multiclass ARM_MVE_qrshift<list<LLVMType> saturate = []> {
1110
  // Most of these shifts come in 32- and 64-bit versions. But only
1111
  // the 64-bit ones have the extra saturation argument (if any).
1112
  def "": ARM_MVE_qrshift_single<[llvm_i32_ty]>;
1113
  def l:  ARM_MVE_qrshift_single<[llvm_i32_ty, llvm_i32_ty], saturate>;
1114
}
1115
defm int_arm_mve_urshr: ARM_MVE_qrshift;
1116
defm int_arm_mve_uqshl: ARM_MVE_qrshift;
1117
defm int_arm_mve_srshr: ARM_MVE_qrshift;
1118
defm int_arm_mve_sqshl: ARM_MVE_qrshift;
1119
defm int_arm_mve_uqrshl: ARM_MVE_qrshift<[llvm_i32_ty]>;
1120
defm int_arm_mve_sqrshr: ARM_MVE_qrshift<[llvm_i32_ty]>;
1121
// LSLL and ASRL only have 64-bit versions, not 32.
1122
def int_arm_mve_lsll: ARM_MVE_qrshift_single<[llvm_i32_ty, llvm_i32_ty]>;
1123
def int_arm_mve_asrl: ARM_MVE_qrshift_single<[llvm_i32_ty, llvm_i32_ty]>;
1124
 
1125
def int_arm_mve_vabd: DefaultAttrsIntrinsic<
1126
   [llvm_anyvector_ty],
1127
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */],
1128
   [IntrNoMem]>;
1129
def int_arm_mve_vadc: DefaultAttrsIntrinsic<
1130
   [llvm_anyvector_ty, llvm_i32_ty],
1131
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]>;
1132
def int_arm_mve_vsbc: DefaultAttrsIntrinsic<
1133
   [llvm_anyvector_ty, llvm_i32_ty],
1134
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]>;
1135
def int_arm_mve_vadc_predicated: DefaultAttrsIntrinsic<
1136
   [llvm_anyvector_ty, llvm_i32_ty],
1137
   [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>,
1138
    llvm_i32_ty, llvm_anyvector_ty], [IntrNoMem]>;
1139
def int_arm_mve_vsbc_predicated: DefaultAttrsIntrinsic<
1140
   [llvm_anyvector_ty, llvm_i32_ty],
1141
   [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>,
1142
    llvm_i32_ty, llvm_anyvector_ty], [IntrNoMem]>;
1143
def int_arm_mve_vshlc: DefaultAttrsIntrinsic<
1144
   [llvm_i32_ty /* bits shifted out */, llvm_anyvector_ty],
1145
   [LLVMMatchType<0>, llvm_i32_ty /* bits shifted in */,
1146
    llvm_i32_ty /* shift count */], [IntrNoMem]>;
1147
def int_arm_mve_vshlc_predicated: DefaultAttrsIntrinsic<
1148
   [llvm_i32_ty /* bits shifted out */, llvm_anyvector_ty],
1149
   [LLVMMatchType<0>, llvm_i32_ty /* bits shifted in */,
1150
    llvm_i32_ty /* shift count */, llvm_anyvector_ty], [IntrNoMem]>;
1151
def int_arm_mve_vmulh: DefaultAttrsIntrinsic<
1152
   [llvm_anyvector_ty],
1153
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */],
1154
   [IntrNoMem]>;
1155
def int_arm_mve_vqdmulh: DefaultAttrsIntrinsic<
1156
   [llvm_anyvector_ty],
1157
   [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
1158
def int_arm_mve_vhadd: DefaultAttrsIntrinsic<
1159
   [llvm_anyvector_ty],
1160
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */],
1161
   [IntrNoMem]>;
1162
def int_arm_mve_vrhadd: DefaultAttrsIntrinsic<
1163
   [llvm_anyvector_ty],
1164
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */],
1165
   [IntrNoMem]>;
1166
def int_arm_mve_vhsub: DefaultAttrsIntrinsic<
1167
   [llvm_anyvector_ty],
1168
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */],
1169
   [IntrNoMem]>;
1170
def int_arm_mve_vrmulh: DefaultAttrsIntrinsic<
1171
   [llvm_anyvector_ty],
1172
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */],
1173
   [IntrNoMem]>;
1174
def int_arm_mve_vqrdmulh: DefaultAttrsIntrinsic<
1175
   [llvm_anyvector_ty],
1176
   [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
1177
def int_arm_mve_vmull: DefaultAttrsIntrinsic<
1178
   [llvm_anyvector_ty],
1179
   [llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty /* unsigned */,
1180
    llvm_i32_ty /* top */], [IntrNoMem]>;
1181
def int_arm_mve_vmull_poly: DefaultAttrsIntrinsic<
1182
   [llvm_anyvector_ty],
1183
   [llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty], [IntrNoMem]>;
1184
 
1185
// The first two parameters are compile-time constants:
1186
// * Halving: 0 means  halving (vhcaddq), 1 means non-halving (vcaddq) 
1187
//            instruction. Note: the flag is inverted to match the corresponding
1188
//            bit in the instruction encoding
1189
// * Rotation angle: 0 mean 90 deg, 1 means 180 deg
1190
defm int_arm_mve_vcaddq : MVEMXPredicated<
1191
  [llvm_anyvector_ty],
1192
  [llvm_i32_ty, llvm_i32_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
1193
   LLVMMatchType<0>, llvm_anyvector_ty>;
1194
 
1195
// The first operand of the following two intrinsics is the rotation angle
1196
// (must be a compile-time constant):
1197
// 0 - 0 deg
1198
// 1 - 90 deg
1199
// 2 - 180 deg
1200
// 3 - 270 deg
1201
defm int_arm_mve_vcmulq : MVEMXPredicated<
1202
  [llvm_anyvector_ty],
1203
  [llvm_i32_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
1204
   LLVMMatchType<0>, llvm_anyvector_ty>;
1205
 
1206
defm int_arm_mve_vcmlaq : MVEPredicated<
1207
  [llvm_anyvector_ty],
1208
  [llvm_i32_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
1209
   llvm_anyvector_ty>;
1210
 
1211
def int_arm_mve_vld2q: DefaultAttrsIntrinsic<
1212
    [llvm_anyvector_ty, LLVMMatchType<0>], [llvm_anyptr_ty],
1213
    [IntrReadMem, IntrArgMemOnly]>;
1214
def int_arm_mve_vld4q: DefaultAttrsIntrinsic<
1215
    [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
1216
    [llvm_anyptr_ty], [IntrReadMem, IntrArgMemOnly]>;
1217
 
1218
def int_arm_mve_vst2q: DefaultAttrsIntrinsic<
1219
    [], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty],
1220
    [IntrWriteMem, IntrArgMemOnly], "", [SDNPMemOperand]>;
1221
def int_arm_mve_vst4q: DefaultAttrsIntrinsic<
1222
    [],
1223
    [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>,
1224
     LLVMMatchType<1>, llvm_i32_ty],
1225
    [IntrWriteMem, IntrArgMemOnly], "", [SDNPMemOperand]>;
1226
 
1227
// MVE vector absolute difference and accumulate across vector
1228
// The first operand is an 'unsigned' flag. The remaining operands are:
1229
// * accumulator
1230
// * first vector operand
1231
// * second vector operand
1232
// * mask (only in predicated versions)
1233
defm int_arm_mve_vabav: MVEPredicated<
1234
  [llvm_i32_ty],
1235
  [llvm_i32_ty, llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>], llvm_anyvector_ty>;
1236
 
1237
// The following 3 intrinsics are MVE vector reductions with two vector
1238
// operands.
1239
// The first 3 operands are boolean flags (must be compile-time constants):
1240
// * unsigned - the instruction operates on vectors of unsigned values and
1241
//              unsigned scalars
1242
// * subtract - the instruction performs subtraction after multiplication of
1243
//              lane pairs (e.g., vmlsdav vs vmladav)
1244
// * exchange - the instruction exchanges successive even and odd lanes of
1245
//              the first operands before multiplication of lane pairs
1246
//              (e.g., vmladavx vs vmladav)
1247
// The remaining operands are:
1248
// * accumulator
1249
// * first vector operand
1250
// * second vector operand
1251
// * mask (only in predicated versions)
1252
 
1253
// Version with 32-bit result, vml{a,s}dav[a][x]
1254
defm int_arm_mve_vmldava: MVEPredicated<
1255
  [llvm_i32_ty],
1256
  [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
1257
   llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>],
1258
  llvm_anyvector_ty>;
1259
 
1260
// Version with 64-bit result, vml{a,s}ldav[a][x]
1261
defm int_arm_mve_vmlldava: MVEPredicated<
1262
  [llvm_i32_ty, llvm_i32_ty],
1263
  [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
1264
   llvm_i32_ty, llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>],
1265
  llvm_anyvector_ty>;
1266
 
1267
// Version with 72-bit rounded result, vrml{a,s}ldavh[a][x]
1268
defm int_arm_mve_vrmlldavha: MVEPredicated<
1269
  [llvm_i32_ty, llvm_i32_ty],
1270
  [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
1271
   llvm_i32_ty, llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>],
1272
  llvm_anyvector_ty>;
1273
 
1274
defm int_arm_mve_vidup: MVEMXPredicated<
1275
   [llvm_anyvector_ty /* output */, llvm_i32_ty /* written-back base */], [],
1276
   [llvm_i32_ty /* base */, llvm_i32_ty /* step */],
1277
   LLVMMatchType<0>, llvm_anyvector_ty>;
1278
defm int_arm_mve_vddup: MVEMXPredicated<
1279
   [llvm_anyvector_ty /* output */, llvm_i32_ty /* written-back base */], [],
1280
   [llvm_i32_ty /* base */, llvm_i32_ty /* step */],
1281
   LLVMMatchType<0>, llvm_anyvector_ty>;
1282
defm int_arm_mve_viwdup: MVEMXPredicated<
1283
   [llvm_anyvector_ty /* output */, llvm_i32_ty /* written-back base */], [],
1284
   [llvm_i32_ty /* base */, llvm_i32_ty /* limit */, llvm_i32_ty /* step */],
1285
   LLVMMatchType<0>, llvm_anyvector_ty>;
1286
defm int_arm_mve_vdwdup: MVEMXPredicated<
1287
   [llvm_anyvector_ty /* output */, llvm_i32_ty /* written-back base */], [],
1288
   [llvm_i32_ty /* base */, llvm_i32_ty /* limit */, llvm_i32_ty /* step */],
1289
   LLVMMatchType<0>, llvm_anyvector_ty>;
1290
 
1291
// Flags:
1292
// * unsigned
1293
defm int_arm_mve_vcvt_fix: MVEMXPredicated<
1294
  [llvm_anyvector_ty /* output */], [llvm_i32_ty],
1295
  [llvm_anyvector_ty /* input vector */, llvm_i32_ty /* scale */],
1296
  LLVMMatchType<0>, llvm_anyvector_ty>;
1297
 
1298
def int_arm_mve_vcvt_fp_int_predicated: DefaultAttrsIntrinsic<
1299
  [llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i32_ty /* unsigned */,
1300
   llvm_anyvector_ty /* predicate */, LLVMMatchType<0> /* inactive */],
1301
  [IntrNoMem]>;
1302
 
1303
foreach suffix = ["a","n","p","m"] in {
1304
  defm "int_arm_mve_vcvt"#suffix: MVEMXPredicated<
1305
    [llvm_anyvector_ty /* output */], [llvm_i32_ty /* unsigned */],
1306
    [llvm_anyvector_ty /* input */], LLVMMatchType<0>, llvm_anyvector_ty>;
1307
}
1308
 
1309
def int_arm_mve_vrintn: DefaultAttrsIntrinsic<
1310
  [llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>;
1311
def int_arm_mve_vcls: DefaultAttrsIntrinsic<
1312
  [llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>;
1313
 
1314
defm int_arm_mve_vbrsr: MVEMXPredicated<
1315
  [llvm_anyvector_ty], [],
1316
  [LLVMMatchType<0>, llvm_i32_ty], LLVMMatchType<0>, llvm_anyvector_ty>;
1317
 
1318
def int_arm_mve_vqdmull: DefaultAttrsIntrinsic<
1319
  [llvm_anyvector_ty],
1320
  [llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty],
1321
  [IntrNoMem]>;
1322
def int_arm_mve_vqdmull_predicated: DefaultAttrsIntrinsic<
1323
  [llvm_anyvector_ty],
1324
  [llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty, llvm_anyvector_ty,
1325
   LLVMMatchType<0>],
1326
  [IntrNoMem]>;
1327
 
1328
class MVESimpleUnaryPredicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1329
   [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
1330
 
1331
def int_arm_mve_mvn_predicated: MVESimpleUnaryPredicated;
1332
def int_arm_mve_abs_predicated: MVESimpleUnaryPredicated;
1333
def int_arm_mve_neg_predicated: MVESimpleUnaryPredicated;
1334
def int_arm_mve_qabs_predicated: MVESimpleUnaryPredicated;
1335
def int_arm_mve_qneg_predicated: MVESimpleUnaryPredicated;
1336
def int_arm_mve_clz_predicated: MVESimpleUnaryPredicated;
1337
def int_arm_mve_cls_predicated: MVESimpleUnaryPredicated;
1338
def int_arm_mve_vrintz_predicated: MVESimpleUnaryPredicated;
1339
def int_arm_mve_vrintm_predicated: MVESimpleUnaryPredicated;
1340
def int_arm_mve_vrintp_predicated: MVESimpleUnaryPredicated;
1341
def int_arm_mve_vrinta_predicated: MVESimpleUnaryPredicated;
1342
def int_arm_mve_vrintx_predicated: MVESimpleUnaryPredicated;
1343
def int_arm_mve_vrintn_predicated: MVESimpleUnaryPredicated;
1344
 
1345
def int_arm_mve_vrev_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1346
   [LLVMMatchType<0>, llvm_i32_ty /* size to reverse */,
1347
    llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
1348
 
1349
def int_arm_mve_vmovl_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1350
   [llvm_anyvector_ty, llvm_i32_ty /* unsigned */, llvm_i32_ty /* top half */,
1351
    llvm_anyvector_ty /* predicate */, LLVMMatchType<0>], [IntrNoMem]>;
1352
def int_arm_mve_vmovn_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1353
   [LLVMMatchType<0>, llvm_anyvector_ty, llvm_i32_ty /* top half */,
1354
    llvm_anyvector_ty /* predicate */], [IntrNoMem]>;
1355
 
1356
def int_arm_mve_vqmovn: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1357
   [LLVMMatchType<0>, llvm_anyvector_ty,
1358
    llvm_i32_ty /* unsigned output */, llvm_i32_ty /* unsigned input */,
1359
    llvm_i32_ty /* top half */], [IntrNoMem]>;
1360
def int_arm_mve_vqmovn_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1361
   [LLVMMatchType<0>, llvm_anyvector_ty,
1362
    llvm_i32_ty /* unsigned output */, llvm_i32_ty /* unsigned input */,
1363
    llvm_i32_ty /* top half */, llvm_anyvector_ty /* pred */], [IntrNoMem]>;
1364
 
1365
def int_arm_mve_fma_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1366
   [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* mult op #2 */,
1367
    LLVMMatchType<0> /* addend */, llvm_anyvector_ty /* pred */], [IntrNoMem]>;
1368
def int_arm_mve_vmla_n_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1369
   [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* addend */,
1370
    llvm_i32_ty /* mult op #2 (scalar) */, llvm_anyvector_ty /* pred */],
1371
   [IntrNoMem]>;
1372
def int_arm_mve_vmlas_n_predicated: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1373
   [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* mult op #2 */,
1374
    llvm_i32_ty /* addend (scalar) */, llvm_anyvector_ty /* pred */],
1375
   [IntrNoMem]>;
1376
 
1377
defm int_arm_mve_vqdmlah: MVEPredicated<[llvm_anyvector_ty],
1378
  [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* addend */,
1379
   llvm_i32_ty /* mult op #2 (scalar) */]>;
1380
defm int_arm_mve_vqrdmlah: MVEPredicated<[llvm_anyvector_ty],
1381
  [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* addend */,
1382
   llvm_i32_ty /* mult op #2 (scalar) */]>;
1383
defm int_arm_mve_vqdmlash: MVEPredicated<[llvm_anyvector_ty],
1384
  [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* mult op #2 */,
1385
   llvm_i32_ty /* addend (scalar) */]>;
1386
defm int_arm_mve_vqrdmlash: MVEPredicated<[llvm_anyvector_ty],
1387
  [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* mult op #2 */,
1388
   llvm_i32_ty /* addend (scalar) */]>;
1389
 
1390
defm int_arm_mve_vqdmlad: MVEPredicated<[llvm_anyvector_ty],
1391
  [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>,
1392
   llvm_i32_ty /* exchange */, llvm_i32_ty /* round */,
1393
   llvm_i32_ty /* subtract */]>;
1394
 
1395
// CDE (Custom Datapath Extension)
1396
 
1397
multiclass CDEGPRIntrinsics<list<LLVMType> args> {
1398
  def "" : DefaultAttrsIntrinsic<
1399
    [llvm_i32_ty],
1400
    !listconcat([llvm_i32_ty /* coproc */], args, [llvm_i32_ty /* imm */]),
1401
    [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 1)>>]>;
1402
  def a : DefaultAttrsIntrinsic<
1403
    [llvm_i32_ty],
1404
    !listconcat([llvm_i32_ty /* coproc */, llvm_i32_ty /* acc */], args,
1405
                [llvm_i32_ty /* imm */]),
1406
    [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 2)>>]>;
1407
 
1408
  def d: DefaultAttrsIntrinsic<
1409
    [llvm_i32_ty /* lo */, llvm_i32_ty /* hi */],
1410
    !listconcat([llvm_i32_ty /* coproc */], args, [llvm_i32_ty /* imm */]),
1411
    [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 1)>>]>;
1412
  def da: DefaultAttrsIntrinsic<
1413
    [llvm_i32_ty /* lo */, llvm_i32_ty /* hi */],
1414
    !listconcat([llvm_i32_ty /* coproc */, llvm_i32_ty /* acc_lo */,
1415
                 llvm_i32_ty /* acc_hi */], args, [llvm_i32_ty /* imm */]),
1416
    [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 3)>>]>;
1417
}
1418
 
1419
defm int_arm_cde_cx1: CDEGPRIntrinsics<[]>;
1420
defm int_arm_cde_cx2: CDEGPRIntrinsics<[llvm_i32_ty]>;
1421
defm int_arm_cde_cx3: CDEGPRIntrinsics<[llvm_i32_ty, llvm_i32_ty]>;
1422
 
1423
multiclass CDEVCXIntrinsics<list<LLVMType> args> {
1424
  def "" : DefaultAttrsIntrinsic<
1425
    [llvm_anyfloat_ty],
1426
    !listconcat([llvm_i32_ty /* coproc */], args, [llvm_i32_ty /* imm */]),
1427
    [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 1)>>]>;
1428
  def a : DefaultAttrsIntrinsic<
1429
    [llvm_anyfloat_ty],
1430
    !listconcat([llvm_i32_ty /* coproc */,  LLVMMatchType<0> /* acc */],
1431
                args, [llvm_i32_ty /* imm */]),
1432
    [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 2)>>]>;
1433
}
1434
 
1435
defm int_arm_cde_vcx1 : CDEVCXIntrinsics<[]>;
1436
defm int_arm_cde_vcx2 : CDEVCXIntrinsics<[LLVMMatchType<0>]>;
1437
defm int_arm_cde_vcx3 : CDEVCXIntrinsics<[LLVMMatchType<0>, LLVMMatchType<0>]>;
1438
 
1439
multiclass CDEVCXVecIntrinsics<list<LLVMType> args> {
1440
  def "" : DefaultAttrsIntrinsic<
1441
    [llvm_v16i8_ty],
1442
    !listconcat([llvm_i32_ty /* coproc */], args, [llvm_i32_ty /* imm */]),
1443
    [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 1)>>]>;
1444
  def a : DefaultAttrsIntrinsic<
1445
    [llvm_v16i8_ty],
1446
    !listconcat([llvm_i32_ty /* coproc */, llvm_v16i8_ty /* acc */],
1447
                args, [llvm_i32_ty /* imm */]),
1448
    [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 2)>>]>;
1449
 
1450
  def _predicated : DefaultAttrsIntrinsic<
1451
    [llvm_anyvector_ty],
1452
    !listconcat([llvm_i32_ty /* coproc */, LLVMMatchType<0> /* inactive */],
1453
                args, [llvm_i32_ty /* imm */, llvm_anyvector_ty /* mask */]),
1454
    [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 2)>>]>;
1455
  def a_predicated : DefaultAttrsIntrinsic<
1456
    [llvm_anyvector_ty],
1457
    !listconcat([llvm_i32_ty /* coproc */, LLVMMatchType<0> /* acc */],
1458
                args, [llvm_i32_ty /* imm */, llvm_anyvector_ty /* mask */]),
1459
    [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 2)>>]>;
1460
}
1461
 
1462
defm int_arm_cde_vcx1q : CDEVCXVecIntrinsics<[]>;
1463
defm int_arm_cde_vcx2q : CDEVCXVecIntrinsics<[llvm_v16i8_ty]>;
1464
defm int_arm_cde_vcx3q : CDEVCXVecIntrinsics<[llvm_v16i8_ty, llvm_v16i8_ty]>;
1465
 
1466
} // end TargetPrefix