Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
14 pmbaty 1
//===--- BuiltinsPTX.def - PTX Builtin function database ----*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file defines the PTX-specific builtin function database.  Users of
10
// this file must define the BUILTIN macro to make use of this information.
11
//
12
//===----------------------------------------------------------------------===//
13
 
14
// The format of this database matches clang/Basic/Builtins.def.
15
 
16
#if defined(BUILTIN) && !defined(TARGET_BUILTIN)
17
#   define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS)
18
#endif
19
 
20
#pragma push_macro("SM_53")
21
#pragma push_macro("SM_70")
22
#pragma push_macro("SM_72")
23
#pragma push_macro("SM_75")
24
#pragma push_macro("SM_80")
25
#pragma push_macro("SM_86")
26
#pragma push_macro("SM_87")
27
#pragma push_macro("SM_89")
28
#pragma push_macro("SM_90")
29
#define SM_90 "sm_90"
30
#define SM_89 "sm_89|" SM_90
31
#define SM_87 "sm_87|" SM_89
32
#define SM_86 "sm_86|" SM_87
33
#define SM_80 "sm_80|" SM_86
34
#define SM_75 "sm_75|" SM_80
35
#define SM_72 "sm_72|" SM_75
36
#define SM_70 "sm_70|" SM_72
37
 
38
#pragma push_macro("SM_60")
39
#define SM_60 "sm_60|sm_61|sm_62|" SM_70
40
#define SM_53 "sm_53|" SM_60
41
 
42
#pragma push_macro("PTX42")
43
#pragma push_macro("PTX60")
44
#pragma push_macro("PTX61")
45
#pragma push_macro("PTX63")
46
#pragma push_macro("PTX64")
47
#pragma push_macro("PTX65")
48
#pragma push_macro("PTX70")
49
#pragma push_macro("PTX71")
50
#pragma push_macro("PTX72")
51
#pragma push_macro("PTX73")
52
#pragma push_macro("PTX74")
53
#pragma push_macro("PTX75")
54
#pragma push_macro("PTX76")
55
#pragma push_macro("PTX77")
56
#pragma push_macro("PTX78")
57
#define PTX78 "ptx78"
58
#define PTX77 "ptx77|" PTX78
59
#define PTX76 "ptx76|" PTX77
60
#define PTX75 "ptx75|" PTX76
61
#define PTX74 "ptx74|" PTX75
62
#define PTX73 "ptx73|" PTX74
63
#define PTX72 "ptx72|" PTX73
64
#define PTX71 "ptx71|" PTX72
65
#define PTX70 "ptx70|" PTX71
66
#define PTX65 "ptx65|" PTX70
67
#define PTX64 "ptx64|" PTX65
68
#define PTX63 "ptx63|" PTX64
69
#define PTX61 "ptx61|" PTX63
70
#define PTX60 "ptx60|" PTX61
71
#define PTX42 "ptx42|" PTX60
72
 
73
#pragma push_macro("AND")
74
#define AND(a, b) "(" a "),(" b ")"
75
 
76
// Special Registers
77
 
78
BUILTIN(__nvvm_read_ptx_sreg_tid_x, "i", "nc")
79
BUILTIN(__nvvm_read_ptx_sreg_tid_y, "i", "nc")
80
BUILTIN(__nvvm_read_ptx_sreg_tid_z, "i", "nc")
81
BUILTIN(__nvvm_read_ptx_sreg_tid_w, "i", "nc")
82
 
83
BUILTIN(__nvvm_read_ptx_sreg_ntid_x, "i", "nc")
84
BUILTIN(__nvvm_read_ptx_sreg_ntid_y, "i", "nc")
85
BUILTIN(__nvvm_read_ptx_sreg_ntid_z, "i", "nc")
86
BUILTIN(__nvvm_read_ptx_sreg_ntid_w, "i", "nc")
87
 
88
BUILTIN(__nvvm_read_ptx_sreg_ctaid_x, "i", "nc")
89
BUILTIN(__nvvm_read_ptx_sreg_ctaid_y, "i", "nc")
90
BUILTIN(__nvvm_read_ptx_sreg_ctaid_z, "i", "nc")
91
BUILTIN(__nvvm_read_ptx_sreg_ctaid_w, "i", "nc")
92
 
93
BUILTIN(__nvvm_read_ptx_sreg_nctaid_x, "i", "nc")
94
BUILTIN(__nvvm_read_ptx_sreg_nctaid_y, "i", "nc")
95
BUILTIN(__nvvm_read_ptx_sreg_nctaid_z, "i", "nc")
96
BUILTIN(__nvvm_read_ptx_sreg_nctaid_w, "i", "nc")
97
 
98
BUILTIN(__nvvm_read_ptx_sreg_laneid, "i", "nc")
99
BUILTIN(__nvvm_read_ptx_sreg_warpid, "i", "nc")
100
BUILTIN(__nvvm_read_ptx_sreg_nwarpid, "i", "nc")
101
 
102
BUILTIN(__nvvm_read_ptx_sreg_smid, "i", "nc")
103
BUILTIN(__nvvm_read_ptx_sreg_nsmid, "i", "nc")
104
BUILTIN(__nvvm_read_ptx_sreg_gridid, "i", "nc")
105
 
106
BUILTIN(__nvvm_read_ptx_sreg_lanemask_eq, "i", "nc")
107
BUILTIN(__nvvm_read_ptx_sreg_lanemask_le, "i", "nc")
108
BUILTIN(__nvvm_read_ptx_sreg_lanemask_lt, "i", "nc")
109
BUILTIN(__nvvm_read_ptx_sreg_lanemask_ge, "i", "nc")
110
BUILTIN(__nvvm_read_ptx_sreg_lanemask_gt, "i", "nc")
111
 
112
BUILTIN(__nvvm_read_ptx_sreg_clock, "i", "n")
113
BUILTIN(__nvvm_read_ptx_sreg_clock64, "LLi", "n")
114
 
115
BUILTIN(__nvvm_read_ptx_sreg_pm0, "i", "n")
116
BUILTIN(__nvvm_read_ptx_sreg_pm1, "i", "n")
117
BUILTIN(__nvvm_read_ptx_sreg_pm2, "i", "n")
118
BUILTIN(__nvvm_read_ptx_sreg_pm3, "i", "n")
119
 
120
// MISC
121
 
122
BUILTIN(__nvvm_prmt, "UiUiUiUi", "")
123
 
124
// Min Max
125
 
126
TARGET_BUILTIN(__nvvm_fmin_f16, "hhh", "", AND(SM_80, PTX70))
127
TARGET_BUILTIN(__nvvm_fmin_ftz_f16, "hhh", "", AND(SM_80, PTX70))
128
TARGET_BUILTIN(__nvvm_fmin_nan_f16, "hhh", "", AND(SM_80, PTX70))
129
TARGET_BUILTIN(__nvvm_fmin_ftz_nan_f16, "hhh", "", AND(SM_80, PTX70))
130
TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
131
TARGET_BUILTIN(__nvvm_fmin_ftz_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
132
TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
133
TARGET_BUILTIN(__nvvm_fmin_ftz_nan_xorsign_abs_f16, "hhh", "",
134
               AND(SM_86, PTX72))
135
TARGET_BUILTIN(__nvvm_fmin_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
136
TARGET_BUILTIN(__nvvm_fmin_ftz_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
137
TARGET_BUILTIN(__nvvm_fmin_nan_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
138
TARGET_BUILTIN(__nvvm_fmin_ftz_nan_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
139
TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_f16x2, "V2hV2hV2h", "",
140
               AND(SM_86, PTX72))
141
TARGET_BUILTIN(__nvvm_fmin_ftz_xorsign_abs_f16x2, "V2hV2hV2h", "",
142
               AND(SM_86, PTX72))
143
TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_f16x2, "V2hV2hV2h", "",
144
               AND(SM_86, PTX72))
145
TARGET_BUILTIN(__nvvm_fmin_ftz_nan_xorsign_abs_f16x2, "V2hV2hV2h", "",
146
               AND(SM_86, PTX72))
147
TARGET_BUILTIN(__nvvm_fmin_bf16, "UsUsUs", "", AND(SM_80, PTX70))
148
TARGET_BUILTIN(__nvvm_fmin_nan_bf16, "UsUsUs", "", AND(SM_80, PTX70))
149
TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_bf16, "UsUsUs", "", AND(SM_86, PTX72))
150
TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_bf16, "UsUsUs", "",
151
               AND(SM_86, PTX72))
152
TARGET_BUILTIN(__nvvm_fmin_bf16x2, "ZUiZUiZUi", "", AND(SM_80, PTX70))
153
TARGET_BUILTIN(__nvvm_fmin_nan_bf16x2, "ZUiZUiZUi", "", AND(SM_80, PTX70))
154
TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_bf16x2, "ZUiZUiZUi", "",
155
               AND(SM_86, PTX72))
156
TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_bf16x2, "ZUiZUiZUi", "",
157
               AND(SM_86, PTX72))
158
BUILTIN(__nvvm_fmin_f, "fff", "")
159
BUILTIN(__nvvm_fmin_ftz_f, "fff", "")
160
TARGET_BUILTIN(__nvvm_fmin_nan_f, "fff", "", AND(SM_80, PTX70))
161
TARGET_BUILTIN(__nvvm_fmin_ftz_nan_f, "fff", "", AND(SM_80, PTX70))
162
TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
163
TARGET_BUILTIN(__nvvm_fmin_ftz_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
164
TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
165
TARGET_BUILTIN(__nvvm_fmin_ftz_nan_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
166
BUILTIN(__nvvm_fmin_d, "ddd", "")
167
 
168
TARGET_BUILTIN(__nvvm_fmax_f16, "hhh", "", AND(SM_80, PTX70))
169
TARGET_BUILTIN(__nvvm_fmax_ftz_f16, "hhh", "", AND(SM_80, PTX70))
170
TARGET_BUILTIN(__nvvm_fmax_nan_f16, "hhh", "", AND(SM_80, PTX70))
171
TARGET_BUILTIN(__nvvm_fmax_ftz_nan_f16, "hhh", "", AND(SM_80, PTX70))
172
TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
173
TARGET_BUILTIN(__nvvm_fmax_ftz_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
174
TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72))
175
TARGET_BUILTIN(__nvvm_fmax_ftz_nan_xorsign_abs_f16, "hhh", "",
176
               AND(SM_86, PTX72))
177
TARGET_BUILTIN(__nvvm_fmax_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
178
TARGET_BUILTIN(__nvvm_fmax_ftz_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
179
TARGET_BUILTIN(__nvvm_fmax_nan_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
180
TARGET_BUILTIN(__nvvm_fmax_ftz_nan_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70))
181
TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_f16x2, "V2hV2hV2h", "",
182
               AND(SM_86, PTX72))
183
TARGET_BUILTIN(__nvvm_fmax_ftz_xorsign_abs_f16x2, "V2hV2hV2h", "",
184
               AND(SM_86, PTX72))
185
TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_f16x2, "V2hV2hV2h", "",
186
               AND(SM_86, PTX72))
187
TARGET_BUILTIN(__nvvm_fmax_ftz_nan_xorsign_abs_f16x2, "V2hV2hV2h", "",
188
               AND(SM_86, PTX72))
189
TARGET_BUILTIN(__nvvm_fmax_bf16, "UsUsUs", "", AND(SM_80, PTX70))
190
TARGET_BUILTIN(__nvvm_fmax_nan_bf16, "UsUsUs", "", AND(SM_80, PTX70))
191
TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_bf16, "UsUsUs", "", AND(SM_86, PTX72))
192
TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_bf16, "UsUsUs", "",
193
               AND(SM_86, PTX72))
194
TARGET_BUILTIN(__nvvm_fmax_bf16x2, "ZUiZUiZUi", "", AND(SM_80, PTX70))
195
TARGET_BUILTIN(__nvvm_fmax_nan_bf16x2, "ZUiZUiZUi", "", AND(SM_80, PTX70))
196
TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_bf16x2, "ZUiZUiZUi", "",
197
               AND(SM_86, PTX72))
198
TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_bf16x2, "ZUiZUiZUi", "",
199
               AND(SM_86, PTX72))
200
BUILTIN(__nvvm_fmax_f, "fff", "")
201
BUILTIN(__nvvm_fmax_ftz_f, "fff", "")
202
TARGET_BUILTIN(__nvvm_fmax_nan_f, "fff", "", AND(SM_80, PTX70))
203
TARGET_BUILTIN(__nvvm_fmax_ftz_nan_f, "fff", "", AND(SM_80, PTX70))
204
TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
205
TARGET_BUILTIN(__nvvm_fmax_ftz_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
206
TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
207
TARGET_BUILTIN(__nvvm_fmax_ftz_nan_xorsign_abs_f, "fff", "", AND(SM_86, PTX72))
208
BUILTIN(__nvvm_fmax_d, "ddd", "")
209
 
210
// Multiplication
211
 
212
BUILTIN(__nvvm_mulhi_i, "iii", "")
213
BUILTIN(__nvvm_mulhi_ui, "UiUiUi", "")
214
BUILTIN(__nvvm_mulhi_ll, "LLiLLiLLi", "")
215
BUILTIN(__nvvm_mulhi_ull, "ULLiULLiULLi", "")
216
 
217
BUILTIN(__nvvm_mul_rn_ftz_f,  "fff", "")
218
BUILTIN(__nvvm_mul_rn_f,  "fff", "")
219
BUILTIN(__nvvm_mul_rz_ftz_f,  "fff", "")
220
BUILTIN(__nvvm_mul_rz_f,  "fff", "")
221
BUILTIN(__nvvm_mul_rm_ftz_f,  "fff", "")
222
BUILTIN(__nvvm_mul_rm_f,  "fff", "")
223
BUILTIN(__nvvm_mul_rp_ftz_f,  "fff", "")
224
BUILTIN(__nvvm_mul_rp_f,  "fff", "")
225
 
226
BUILTIN(__nvvm_mul_rn_d,  "ddd", "")
227
BUILTIN(__nvvm_mul_rz_d,  "ddd", "")
228
BUILTIN(__nvvm_mul_rm_d,  "ddd", "")
229
BUILTIN(__nvvm_mul_rp_d,  "ddd", "")
230
 
231
BUILTIN(__nvvm_mul24_i,  "iii", "")
232
BUILTIN(__nvvm_mul24_ui,  "UiUiUi", "")
233
 
234
// Div
235
 
236
BUILTIN(__nvvm_div_approx_ftz_f,  "fff", "")
237
BUILTIN(__nvvm_div_approx_f,  "fff", "")
238
 
239
BUILTIN(__nvvm_div_rn_ftz_f,  "fff", "")
240
BUILTIN(__nvvm_div_rn_f,  "fff", "")
241
BUILTIN(__nvvm_div_rz_ftz_f,  "fff", "")
242
BUILTIN(__nvvm_div_rz_f,  "fff", "")
243
BUILTIN(__nvvm_div_rm_ftz_f,  "fff", "")
244
BUILTIN(__nvvm_div_rm_f,  "fff", "")
245
BUILTIN(__nvvm_div_rp_ftz_f,  "fff", "")
246
BUILTIN(__nvvm_div_rp_f,  "fff", "")
247
 
248
BUILTIN(__nvvm_div_rn_d,  "ddd", "")
249
BUILTIN(__nvvm_div_rz_d,  "ddd", "")
250
BUILTIN(__nvvm_div_rm_d,  "ddd", "")
251
BUILTIN(__nvvm_div_rp_d,  "ddd", "")
252
 
253
// Sad
254
 
255
BUILTIN(__nvvm_sad_i, "iiii", "")
256
BUILTIN(__nvvm_sad_ui, "UiUiUiUi", "")
257
 
258
// Floor, Ceil
259
 
260
BUILTIN(__nvvm_floor_ftz_f, "ff", "")
261
BUILTIN(__nvvm_floor_f, "ff", "")
262
BUILTIN(__nvvm_floor_d, "dd", "")
263
 
264
BUILTIN(__nvvm_ceil_ftz_f, "ff", "")
265
BUILTIN(__nvvm_ceil_f, "ff", "")
266
BUILTIN(__nvvm_ceil_d, "dd", "")
267
 
268
// Abs
269
 
270
BUILTIN(__nvvm_fabs_ftz_f, "ff", "")
271
BUILTIN(__nvvm_fabs_f, "ff", "")
272
BUILTIN(__nvvm_fabs_d, "dd", "")
273
 
274
// Round
275
 
276
BUILTIN(__nvvm_round_ftz_f, "ff", "")
277
BUILTIN(__nvvm_round_f, "ff", "")
278
BUILTIN(__nvvm_round_d, "dd", "")
279
 
280
// Trunc
281
 
282
BUILTIN(__nvvm_trunc_ftz_f, "ff", "")
283
BUILTIN(__nvvm_trunc_f, "ff", "")
284
BUILTIN(__nvvm_trunc_d, "dd", "")
285
 
286
// Saturate
287
 
288
BUILTIN(__nvvm_saturate_ftz_f, "ff", "")
289
BUILTIN(__nvvm_saturate_f, "ff", "")
290
BUILTIN(__nvvm_saturate_d, "dd", "")
291
 
292
// Exp2, Log2
293
 
294
BUILTIN(__nvvm_ex2_approx_ftz_f, "ff", "")
295
BUILTIN(__nvvm_ex2_approx_f, "ff", "")
296
BUILTIN(__nvvm_ex2_approx_d, "dd", "")
297
TARGET_BUILTIN(__nvvm_ex2_approx_f16, "hh", "", AND(SM_75, PTX70))
298
TARGET_BUILTIN(__nvvm_ex2_approx_f16x2, "V2hV2h", "", AND(SM_75, PTX70))
299
 
300
BUILTIN(__nvvm_lg2_approx_ftz_f, "ff", "")
301
BUILTIN(__nvvm_lg2_approx_f, "ff", "")
302
BUILTIN(__nvvm_lg2_approx_d, "dd", "")
303
 
304
// Sin, Cos
305
 
306
BUILTIN(__nvvm_sin_approx_ftz_f, "ff", "")
307
BUILTIN(__nvvm_sin_approx_f, "ff", "")
308
 
309
BUILTIN(__nvvm_cos_approx_ftz_f, "ff", "")
310
BUILTIN(__nvvm_cos_approx_f, "ff", "")
311
 
312
// Fma
313
 
314
TARGET_BUILTIN(__nvvm_fma_rn_f16, "hhhh", "", AND(SM_53, PTX42))
315
TARGET_BUILTIN(__nvvm_fma_rn_ftz_f16, "hhhh", "", AND(SM_53, PTX42))
316
TARGET_BUILTIN(__nvvm_fma_rn_sat_f16, "hhhh", "", AND(SM_53, PTX42))
317
TARGET_BUILTIN(__nvvm_fma_rn_ftz_sat_f16, "hhhh", "", AND(SM_53, PTX42))
318
TARGET_BUILTIN(__nvvm_fma_rn_relu_f16, "hhhh", "", AND(SM_80, PTX70))
319
TARGET_BUILTIN(__nvvm_fma_rn_ftz_relu_f16, "hhhh", "", AND(SM_80, PTX70))
320
TARGET_BUILTIN(__nvvm_fma_rn_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42))
321
TARGET_BUILTIN(__nvvm_fma_rn_ftz_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42))
322
TARGET_BUILTIN(__nvvm_fma_rn_sat_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42))
323
TARGET_BUILTIN(__nvvm_fma_rn_ftz_sat_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42))
324
TARGET_BUILTIN(__nvvm_fma_rn_relu_f16x2, "V2hV2hV2hV2h", "", AND(SM_80, PTX70))
325
TARGET_BUILTIN(__nvvm_fma_rn_ftz_relu_f16x2, "V2hV2hV2hV2h", "", AND(SM_80, PTX70))
326
TARGET_BUILTIN(__nvvm_fma_rn_bf16, "UsUsUsUs", "", AND(SM_80, PTX70))
327
TARGET_BUILTIN(__nvvm_fma_rn_relu_bf16, "UsUsUsUs", "", AND(SM_80, PTX70))
328
TARGET_BUILTIN(__nvvm_fma_rn_bf16x2, "ZUiZUiZUiZUi", "", AND(SM_80, PTX70))
329
TARGET_BUILTIN(__nvvm_fma_rn_relu_bf16x2, "ZUiZUiZUiZUi", "", AND(SM_80, PTX70))
330
BUILTIN(__nvvm_fma_rn_ftz_f, "ffff", "")
331
BUILTIN(__nvvm_fma_rn_f, "ffff", "")
332
BUILTIN(__nvvm_fma_rz_ftz_f, "ffff", "")
333
BUILTIN(__nvvm_fma_rz_f, "ffff", "")
334
BUILTIN(__nvvm_fma_rm_ftz_f, "ffff", "")
335
BUILTIN(__nvvm_fma_rm_f, "ffff", "")
336
BUILTIN(__nvvm_fma_rp_ftz_f, "ffff", "")
337
BUILTIN(__nvvm_fma_rp_f, "ffff", "")
338
BUILTIN(__nvvm_fma_rn_d, "dddd", "")
339
BUILTIN(__nvvm_fma_rz_d, "dddd", "")
340
BUILTIN(__nvvm_fma_rm_d, "dddd", "")
341
BUILTIN(__nvvm_fma_rp_d, "dddd", "")
342
 
343
// Rcp
344
 
345
BUILTIN(__nvvm_rcp_rn_ftz_f, "ff", "")
346
BUILTIN(__nvvm_rcp_rn_f, "ff", "")
347
BUILTIN(__nvvm_rcp_rz_ftz_f, "ff", "")
348
BUILTIN(__nvvm_rcp_rz_f, "ff", "")
349
BUILTIN(__nvvm_rcp_rm_ftz_f, "ff", "")
350
BUILTIN(__nvvm_rcp_rm_f, "ff", "")
351
BUILTIN(__nvvm_rcp_rp_ftz_f, "ff", "")
352
BUILTIN(__nvvm_rcp_rp_f, "ff", "")
353
 
354
BUILTIN(__nvvm_rcp_rn_d, "dd", "")
355
BUILTIN(__nvvm_rcp_rz_d, "dd", "")
356
BUILTIN(__nvvm_rcp_rm_d, "dd", "")
357
BUILTIN(__nvvm_rcp_rp_d, "dd", "")
358
 
359
BUILTIN(__nvvm_rcp_approx_ftz_f, "ff", "")
360
BUILTIN(__nvvm_rcp_approx_ftz_d, "dd", "")
361
 
362
// Sqrt
363
 
364
BUILTIN(__nvvm_sqrt_rn_ftz_f, "ff", "")
365
BUILTIN(__nvvm_sqrt_rn_f, "ff", "")
366
BUILTIN(__nvvm_sqrt_rz_ftz_f, "ff", "")
367
BUILTIN(__nvvm_sqrt_rz_f, "ff", "")
368
BUILTIN(__nvvm_sqrt_rm_ftz_f, "ff", "")
369
BUILTIN(__nvvm_sqrt_rm_f, "ff", "")
370
BUILTIN(__nvvm_sqrt_rp_ftz_f, "ff", "")
371
BUILTIN(__nvvm_sqrt_rp_f, "ff", "")
372
BUILTIN(__nvvm_sqrt_approx_ftz_f, "ff", "")
373
BUILTIN(__nvvm_sqrt_approx_f, "ff", "")
374
 
375
BUILTIN(__nvvm_sqrt_rn_d, "dd", "")
376
BUILTIN(__nvvm_sqrt_rz_d, "dd", "")
377
BUILTIN(__nvvm_sqrt_rm_d, "dd", "")
378
BUILTIN(__nvvm_sqrt_rp_d, "dd", "")
379
 
380
// Rsqrt
381
 
382
BUILTIN(__nvvm_rsqrt_approx_ftz_f, "ff", "")
383
BUILTIN(__nvvm_rsqrt_approx_f, "ff", "")
384
BUILTIN(__nvvm_rsqrt_approx_d, "dd", "")
385
 
386
// Add
387
 
388
BUILTIN(__nvvm_add_rn_ftz_f, "fff", "")
389
BUILTIN(__nvvm_add_rn_f, "fff", "")
390
BUILTIN(__nvvm_add_rz_ftz_f, "fff", "")
391
BUILTIN(__nvvm_add_rz_f, "fff", "")
392
BUILTIN(__nvvm_add_rm_ftz_f, "fff", "")
393
BUILTIN(__nvvm_add_rm_f, "fff", "")
394
BUILTIN(__nvvm_add_rp_ftz_f, "fff", "")
395
BUILTIN(__nvvm_add_rp_f, "fff", "")
396
 
397
BUILTIN(__nvvm_add_rn_d, "ddd", "")
398
BUILTIN(__nvvm_add_rz_d, "ddd", "")
399
BUILTIN(__nvvm_add_rm_d, "ddd", "")
400
BUILTIN(__nvvm_add_rp_d, "ddd", "")
401
 
402
// Convert
403
 
404
BUILTIN(__nvvm_d2f_rn_ftz, "fd", "")
405
BUILTIN(__nvvm_d2f_rn, "fd", "")
406
BUILTIN(__nvvm_d2f_rz_ftz, "fd", "")
407
BUILTIN(__nvvm_d2f_rz, "fd", "")
408
BUILTIN(__nvvm_d2f_rm_ftz, "fd", "")
409
BUILTIN(__nvvm_d2f_rm, "fd", "")
410
BUILTIN(__nvvm_d2f_rp_ftz, "fd", "")
411
BUILTIN(__nvvm_d2f_rp, "fd", "")
412
 
413
BUILTIN(__nvvm_d2i_rn, "id", "")
414
BUILTIN(__nvvm_d2i_rz, "id", "")
415
BUILTIN(__nvvm_d2i_rm, "id", "")
416
BUILTIN(__nvvm_d2i_rp, "id", "")
417
 
418
BUILTIN(__nvvm_d2ui_rn, "Uid", "")
419
BUILTIN(__nvvm_d2ui_rz, "Uid", "")
420
BUILTIN(__nvvm_d2ui_rm, "Uid", "")
421
BUILTIN(__nvvm_d2ui_rp, "Uid", "")
422
 
423
BUILTIN(__nvvm_i2d_rn, "di", "")
424
BUILTIN(__nvvm_i2d_rz, "di", "")
425
BUILTIN(__nvvm_i2d_rm, "di", "")
426
BUILTIN(__nvvm_i2d_rp, "di", "")
427
 
428
BUILTIN(__nvvm_ui2d_rn, "dUi", "")
429
BUILTIN(__nvvm_ui2d_rz, "dUi", "")
430
BUILTIN(__nvvm_ui2d_rm, "dUi", "")
431
BUILTIN(__nvvm_ui2d_rp, "dUi", "")
432
 
433
BUILTIN(__nvvm_f2i_rn_ftz, "if", "")
434
BUILTIN(__nvvm_f2i_rn, "if", "")
435
BUILTIN(__nvvm_f2i_rz_ftz, "if", "")
436
BUILTIN(__nvvm_f2i_rz, "if", "")
437
BUILTIN(__nvvm_f2i_rm_ftz, "if", "")
438
BUILTIN(__nvvm_f2i_rm, "if", "")
439
BUILTIN(__nvvm_f2i_rp_ftz, "if", "")
440
BUILTIN(__nvvm_f2i_rp, "if", "")
441
 
442
BUILTIN(__nvvm_f2ui_rn_ftz, "Uif", "")
443
BUILTIN(__nvvm_f2ui_rn, "Uif", "")
444
BUILTIN(__nvvm_f2ui_rz_ftz, "Uif", "")
445
BUILTIN(__nvvm_f2ui_rz, "Uif", "")
446
BUILTIN(__nvvm_f2ui_rm_ftz, "Uif", "")
447
BUILTIN(__nvvm_f2ui_rm, "Uif", "")
448
BUILTIN(__nvvm_f2ui_rp_ftz, "Uif", "")
449
BUILTIN(__nvvm_f2ui_rp, "Uif", "")
450
 
451
BUILTIN(__nvvm_i2f_rn, "fi", "")
452
BUILTIN(__nvvm_i2f_rz, "fi", "")
453
BUILTIN(__nvvm_i2f_rm, "fi", "")
454
BUILTIN(__nvvm_i2f_rp, "fi", "")
455
 
456
BUILTIN(__nvvm_ui2f_rn, "fUi", "")
457
BUILTIN(__nvvm_ui2f_rz, "fUi", "")
458
BUILTIN(__nvvm_ui2f_rm, "fUi", "")
459
BUILTIN(__nvvm_ui2f_rp, "fUi", "")
460
 
461
BUILTIN(__nvvm_lohi_i2d, "dii", "")
462
 
463
BUILTIN(__nvvm_d2i_lo, "id", "")
464
BUILTIN(__nvvm_d2i_hi, "id", "")
465
 
466
BUILTIN(__nvvm_f2ll_rn_ftz, "LLif", "")
467
BUILTIN(__nvvm_f2ll_rn, "LLif", "")
468
BUILTIN(__nvvm_f2ll_rz_ftz, "LLif", "")
469
BUILTIN(__nvvm_f2ll_rz, "LLif", "")
470
BUILTIN(__nvvm_f2ll_rm_ftz, "LLif", "")
471
BUILTIN(__nvvm_f2ll_rm, "LLif", "")
472
BUILTIN(__nvvm_f2ll_rp_ftz, "LLif", "")
473
BUILTIN(__nvvm_f2ll_rp, "LLif", "")
474
 
475
BUILTIN(__nvvm_f2ull_rn_ftz, "ULLif", "")
476
BUILTIN(__nvvm_f2ull_rn, "ULLif", "")
477
BUILTIN(__nvvm_f2ull_rz_ftz, "ULLif", "")
478
BUILTIN(__nvvm_f2ull_rz, "ULLif", "")
479
BUILTIN(__nvvm_f2ull_rm_ftz, "ULLif", "")
480
BUILTIN(__nvvm_f2ull_rm, "ULLif", "")
481
BUILTIN(__nvvm_f2ull_rp_ftz, "ULLif", "")
482
BUILTIN(__nvvm_f2ull_rp, "ULLif", "")
483
 
484
BUILTIN(__nvvm_d2ll_rn, "LLid", "")
485
BUILTIN(__nvvm_d2ll_rz, "LLid", "")
486
BUILTIN(__nvvm_d2ll_rm, "LLid", "")
487
BUILTIN(__nvvm_d2ll_rp, "LLid", "")
488
 
489
BUILTIN(__nvvm_d2ull_rn, "ULLid", "")
490
BUILTIN(__nvvm_d2ull_rz, "ULLid", "")
491
BUILTIN(__nvvm_d2ull_rm, "ULLid", "")
492
BUILTIN(__nvvm_d2ull_rp, "ULLid", "")
493
 
494
BUILTIN(__nvvm_ll2f_rn, "fLLi", "")
495
BUILTIN(__nvvm_ll2f_rz, "fLLi", "")
496
BUILTIN(__nvvm_ll2f_rm, "fLLi", "")
497
BUILTIN(__nvvm_ll2f_rp, "fLLi", "")
498
 
499
BUILTIN(__nvvm_ull2f_rn, "fULLi", "")
500
BUILTIN(__nvvm_ull2f_rz, "fULLi", "")
501
BUILTIN(__nvvm_ull2f_rm, "fULLi", "")
502
BUILTIN(__nvvm_ull2f_rp, "fULLi", "")
503
 
504
BUILTIN(__nvvm_ll2d_rn, "dLLi", "")
505
BUILTIN(__nvvm_ll2d_rz, "dLLi", "")
506
BUILTIN(__nvvm_ll2d_rm, "dLLi", "")
507
BUILTIN(__nvvm_ll2d_rp, "dLLi", "")
508
 
509
BUILTIN(__nvvm_ull2d_rn, "dULLi", "")
510
BUILTIN(__nvvm_ull2d_rz, "dULLi", "")
511
BUILTIN(__nvvm_ull2d_rm, "dULLi", "")
512
BUILTIN(__nvvm_ull2d_rp, "dULLi", "")
513
 
514
BUILTIN(__nvvm_f2h_rn_ftz, "Usf", "")
515
BUILTIN(__nvvm_f2h_rn, "Usf", "")
516
 
517
TARGET_BUILTIN(__nvvm_ff2bf16x2_rn, "ZUiff", "", AND(SM_80,PTX70))
518
TARGET_BUILTIN(__nvvm_ff2bf16x2_rn_relu, "ZUiff", "", AND(SM_80,PTX70))
519
TARGET_BUILTIN(__nvvm_ff2bf16x2_rz, "ZUiff", "", AND(SM_80,PTX70))
520
TARGET_BUILTIN(__nvvm_ff2bf16x2_rz_relu, "ZUiff", "", AND(SM_80,PTX70))
521
 
522
TARGET_BUILTIN(__nvvm_ff2f16x2_rn, "V2hff", "", AND(SM_80,PTX70))
523
TARGET_BUILTIN(__nvvm_ff2f16x2_rn_relu, "V2hff", "", AND(SM_80,PTX70))
524
TARGET_BUILTIN(__nvvm_ff2f16x2_rz, "V2hff", "", AND(SM_80,PTX70))
525
TARGET_BUILTIN(__nvvm_ff2f16x2_rz_relu, "V2hff", "", AND(SM_80,PTX70))
526
 
527
TARGET_BUILTIN(__nvvm_f2bf16_rn, "ZUsf", "", AND(SM_80,PTX70))
528
TARGET_BUILTIN(__nvvm_f2bf16_rn_relu, "ZUsf", "", AND(SM_80,PTX70))
529
TARGET_BUILTIN(__nvvm_f2bf16_rz, "ZUsf", "", AND(SM_80,PTX70))
530
TARGET_BUILTIN(__nvvm_f2bf16_rz_relu, "ZUsf", "", AND(SM_80,PTX70))
531
 
532
TARGET_BUILTIN(__nvvm_f2tf32_rna, "ZUif", "", AND(SM_80,PTX70))
533
 
534
// Bitcast
535
 
536
BUILTIN(__nvvm_bitcast_f2i, "if", "")
537
BUILTIN(__nvvm_bitcast_i2f, "fi", "")
538
 
539
BUILTIN(__nvvm_bitcast_ll2d, "dLLi", "")
540
BUILTIN(__nvvm_bitcast_d2ll, "LLid", "")
541
 
542
// FNS
543
TARGET_BUILTIN(__nvvm_fns, "UiUiUii", "n", PTX60)
544
 
545
// Sync
546
 
547
BUILTIN(__syncthreads, "v", "")
548
BUILTIN(__nvvm_bar0_popc, "ii", "")
549
BUILTIN(__nvvm_bar0_and, "ii", "")
550
BUILTIN(__nvvm_bar0_or, "ii", "")
551
BUILTIN(__nvvm_bar_sync, "vi", "n")
552
TARGET_BUILTIN(__nvvm_bar_warp_sync, "vUi", "n", PTX60)
553
TARGET_BUILTIN(__nvvm_barrier_sync, "vUi", "n", PTX60)
554
TARGET_BUILTIN(__nvvm_barrier_sync_cnt, "vUiUi", "n", PTX60)
555
 
556
// Shuffle
557
 
558
BUILTIN(__nvvm_shfl_down_i32, "iiii", "")
559
BUILTIN(__nvvm_shfl_down_f32, "ffii", "")
560
BUILTIN(__nvvm_shfl_up_i32, "iiii", "")
561
BUILTIN(__nvvm_shfl_up_f32, "ffii", "")
562
BUILTIN(__nvvm_shfl_bfly_i32, "iiii", "")
563
BUILTIN(__nvvm_shfl_bfly_f32, "ffii", "")
564
BUILTIN(__nvvm_shfl_idx_i32, "iiii", "")
565
BUILTIN(__nvvm_shfl_idx_f32, "ffii", "")
566
 
567
TARGET_BUILTIN(__nvvm_shfl_sync_down_i32, "iUiiii", "", PTX60)
568
TARGET_BUILTIN(__nvvm_shfl_sync_down_f32, "fUifii", "", PTX60)
569
TARGET_BUILTIN(__nvvm_shfl_sync_up_i32, "iUiiii", "", PTX60)
570
TARGET_BUILTIN(__nvvm_shfl_sync_up_f32, "fUifii", "", PTX60)
571
TARGET_BUILTIN(__nvvm_shfl_sync_bfly_i32, "iUiiii", "", PTX60)
572
TARGET_BUILTIN(__nvvm_shfl_sync_bfly_f32, "fUifii", "", PTX60)
573
TARGET_BUILTIN(__nvvm_shfl_sync_idx_i32, "iUiiii", "", PTX60)
574
TARGET_BUILTIN(__nvvm_shfl_sync_idx_f32, "fUifii", "", PTX60)
575
 
576
// Vote
577
BUILTIN(__nvvm_vote_all, "bb", "")
578
BUILTIN(__nvvm_vote_any, "bb", "")
579
BUILTIN(__nvvm_vote_uni, "bb", "")
580
BUILTIN(__nvvm_vote_ballot, "Uib", "")
581
 
582
TARGET_BUILTIN(__nvvm_vote_all_sync, "bUib", "", PTX60)
583
TARGET_BUILTIN(__nvvm_vote_any_sync, "bUib", "", PTX60)
584
TARGET_BUILTIN(__nvvm_vote_uni_sync, "bUib", "", PTX60)
585
TARGET_BUILTIN(__nvvm_vote_ballot_sync, "UiUib", "", PTX60)
586
 
587
// Match
588
TARGET_BUILTIN(__nvvm_match_any_sync_i32, "UiUiUi", "", AND(SM_70,PTX60))
589
TARGET_BUILTIN(__nvvm_match_any_sync_i64, "UiUiWi", "", AND(SM_70,PTX60))
590
// These return a pair {value, predicate}, which requires custom lowering.
591
TARGET_BUILTIN(__nvvm_match_all_sync_i32p, "UiUiUii*", "", AND(SM_70,PTX60))
592
TARGET_BUILTIN(__nvvm_match_all_sync_i64p, "UiUiWii*", "", AND(SM_70,PTX60))
593
 
594
// Redux
595
TARGET_BUILTIN(__nvvm_redux_sync_add, "iii", "", AND(SM_80,PTX70))
596
TARGET_BUILTIN(__nvvm_redux_sync_min, "iii", "", AND(SM_80,PTX70))
597
TARGET_BUILTIN(__nvvm_redux_sync_max, "iii", "", AND(SM_80,PTX70))
598
TARGET_BUILTIN(__nvvm_redux_sync_umin, "UiUii", "", AND(SM_80,PTX70))
599
TARGET_BUILTIN(__nvvm_redux_sync_umax, "UiUii", "", AND(SM_80,PTX70))
600
TARGET_BUILTIN(__nvvm_redux_sync_and, "iii", "", AND(SM_80,PTX70))
601
TARGET_BUILTIN(__nvvm_redux_sync_xor, "iii", "", AND(SM_80,PTX70))
602
TARGET_BUILTIN(__nvvm_redux_sync_or, "iii", "", AND(SM_80,PTX70))
603
 
604
// Membar
605
 
606
BUILTIN(__nvvm_membar_cta, "v", "")
607
BUILTIN(__nvvm_membar_gl, "v", "")
608
BUILTIN(__nvvm_membar_sys, "v", "")
609
 
610
// mbarrier
611
 
612
TARGET_BUILTIN(__nvvm_mbarrier_init, "vWi*i", "", AND(SM_80,PTX70))
613
TARGET_BUILTIN(__nvvm_mbarrier_init_shared, "vWi*3i", "", AND(SM_80,PTX70))
614
 
615
TARGET_BUILTIN(__nvvm_mbarrier_inval, "vWi*", "", AND(SM_80,PTX70))
616
TARGET_BUILTIN(__nvvm_mbarrier_inval_shared, "vWi*3", "", AND(SM_80,PTX70))
617
 
618
TARGET_BUILTIN(__nvvm_mbarrier_arrive, "WiWi*", "", AND(SM_80,PTX70))
619
TARGET_BUILTIN(__nvvm_mbarrier_arrive_shared, "WiWi*3", "", AND(SM_80,PTX70))
620
TARGET_BUILTIN(__nvvm_mbarrier_arrive_noComplete, "WiWi*i", "", AND(SM_80,PTX70))
621
TARGET_BUILTIN(__nvvm_mbarrier_arrive_noComplete_shared, "WiWi*3i", "", AND(SM_80,PTX70))
622
 
623
TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop, "WiWi*", "", AND(SM_80,PTX70))
624
TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop_shared, "WiWi*3", "", AND(SM_80,PTX70))
625
TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop_noComplete, "WiWi*i", "", AND(SM_80,PTX70))
626
TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop_noComplete_shared, "WiWi*3i", "", AND(SM_80,PTX70))
627
 
628
TARGET_BUILTIN(__nvvm_mbarrier_test_wait, "bWi*Wi", "", AND(SM_80,PTX70))
629
TARGET_BUILTIN(__nvvm_mbarrier_test_wait_shared, "bWi*3Wi", "", AND(SM_80,PTX70))
630
 
631
TARGET_BUILTIN(__nvvm_mbarrier_pending_count, "iWi", "", AND(SM_80,PTX70))
632
 
633
// Memcpy, Memset
634
 
635
BUILTIN(__nvvm_memcpy, "vUc*Uc*zi","")
636
BUILTIN(__nvvm_memset, "vUc*Uczi","")
637
 
638
// Image
639
 
640
BUILTIN(__builtin_ptx_read_image2Dfi_, "V4fiiii", "")
641
BUILTIN(__builtin_ptx_read_image2Dff_, "V4fiiff", "")
642
BUILTIN(__builtin_ptx_read_image2Dii_, "V4iiiii", "")
643
BUILTIN(__builtin_ptx_read_image2Dif_, "V4iiiff", "")
644
 
645
BUILTIN(__builtin_ptx_read_image3Dfi_, "V4fiiiiii", "")
646
BUILTIN(__builtin_ptx_read_image3Dff_, "V4fiiffff", "")
647
BUILTIN(__builtin_ptx_read_image3Dii_, "V4iiiiiii", "")
648
BUILTIN(__builtin_ptx_read_image3Dif_, "V4iiiffff", "")
649
 
650
BUILTIN(__builtin_ptx_write_image2Df_, "viiiffff", "")
651
BUILTIN(__builtin_ptx_write_image2Di_, "viiiiiii", "")
652
BUILTIN(__builtin_ptx_write_image2Dui_, "viiiUiUiUiUi", "")
653
BUILTIN(__builtin_ptx_get_image_depthi_, "ii", "")
654
BUILTIN(__builtin_ptx_get_image_heighti_, "ii", "")
655
BUILTIN(__builtin_ptx_get_image_widthi_, "ii", "")
656
BUILTIN(__builtin_ptx_get_image_channel_data_typei_, "ii", "")
657
BUILTIN(__builtin_ptx_get_image_channel_orderi_, "ii", "")
658
 
659
// Atomic
660
//
661
// We need the atom intrinsics because
662
// - they are used in converging analysis
663
// - they are used in address space analysis and optimization
664
// So it does not hurt to expose them as builtins.
665
//
666
BUILTIN(__nvvm_atom_add_gen_i, "iiD*i", "n")
667
TARGET_BUILTIN(__nvvm_atom_cta_add_gen_i, "iiD*i", "n", SM_60)
668
TARGET_BUILTIN(__nvvm_atom_sys_add_gen_i, "iiD*i", "n", SM_60)
669
BUILTIN(__nvvm_atom_add_gen_l, "LiLiD*Li", "n")
670
TARGET_BUILTIN(__nvvm_atom_cta_add_gen_l, "LiLiD*Li", "n", SM_60)
671
TARGET_BUILTIN(__nvvm_atom_sys_add_gen_l, "LiLiD*Li", "n", SM_60)
672
BUILTIN(__nvvm_atom_add_gen_ll, "LLiLLiD*LLi", "n")
673
TARGET_BUILTIN(__nvvm_atom_cta_add_gen_ll, "LLiLLiD*LLi", "n", SM_60)
674
TARGET_BUILTIN(__nvvm_atom_sys_add_gen_ll, "LLiLLiD*LLi", "n", SM_60)
675
BUILTIN(__nvvm_atom_add_gen_f, "ffD*f", "n")
676
TARGET_BUILTIN(__nvvm_atom_cta_add_gen_f, "ffD*f", "n", SM_60)
677
TARGET_BUILTIN(__nvvm_atom_sys_add_gen_f, "ffD*f", "n", SM_60)
678
TARGET_BUILTIN(__nvvm_atom_add_gen_d, "ddD*d", "n", SM_60)
679
TARGET_BUILTIN(__nvvm_atom_cta_add_gen_d, "ddD*d", "n", SM_60)
680
TARGET_BUILTIN(__nvvm_atom_sys_add_gen_d, "ddD*d", "n", SM_60)
681
 
682
BUILTIN(__nvvm_atom_sub_gen_i, "iiD*i", "n")
683
BUILTIN(__nvvm_atom_sub_gen_l, "LiLiD*Li", "n")
684
BUILTIN(__nvvm_atom_sub_gen_ll, "LLiLLiD*LLi", "n")
685
 
686
BUILTIN(__nvvm_atom_xchg_gen_i, "iiD*i", "n")
687
TARGET_BUILTIN(__nvvm_atom_cta_xchg_gen_i, "iiD*i", "n", SM_60)
688
TARGET_BUILTIN(__nvvm_atom_sys_xchg_gen_i, "iiD*i", "n", SM_60)
689
BUILTIN(__nvvm_atom_xchg_gen_l, "LiLiD*Li", "n")
690
TARGET_BUILTIN(__nvvm_atom_cta_xchg_gen_l, "LiLiD*Li", "n", SM_60)
691
TARGET_BUILTIN(__nvvm_atom_sys_xchg_gen_l, "LiLiD*Li", "n", SM_60)
692
BUILTIN(__nvvm_atom_xchg_gen_ll, "LLiLLiD*LLi", "n")
693
TARGET_BUILTIN(__nvvm_atom_cta_xchg_gen_ll, "LLiLLiD*LLi", "n", SM_60)
694
TARGET_BUILTIN(__nvvm_atom_sys_xchg_gen_ll, "LLiLLiD*LLi", "n", SM_60)
695
 
696
BUILTIN(__nvvm_atom_max_gen_i, "iiD*i", "n")
697
TARGET_BUILTIN(__nvvm_atom_cta_max_gen_i, "iiD*i", "n", SM_60)
698
TARGET_BUILTIN(__nvvm_atom_sys_max_gen_i, "iiD*i", "n", SM_60)
699
BUILTIN(__nvvm_atom_max_gen_ui, "UiUiD*Ui", "n")
700
TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ui, "UiUiD*Ui", "n", SM_60)
701
TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ui, "UiUiD*Ui", "n", SM_60)
702
BUILTIN(__nvvm_atom_max_gen_l, "LiLiD*Li", "n")
703
TARGET_BUILTIN(__nvvm_atom_cta_max_gen_l, "LiLiD*Li", "n", SM_60)
704
TARGET_BUILTIN(__nvvm_atom_sys_max_gen_l, "LiLiD*Li", "n", SM_60)
705
BUILTIN(__nvvm_atom_max_gen_ul, "ULiULiD*ULi", "n")
706
TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ul, "ULiULiD*ULi", "n", SM_60)
707
TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ul, "ULiULiD*ULi", "n", SM_60)
708
BUILTIN(__nvvm_atom_max_gen_ll, "LLiLLiD*LLi", "n")
709
TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ll, "LLiLLiD*LLi", "n", SM_60)
710
TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ll, "LLiLLiD*LLi", "n", SM_60)
711
BUILTIN(__nvvm_atom_max_gen_ull, "ULLiULLiD*ULLi", "n")
712
TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ull, "ULLiULLiD*ULLi", "n", SM_60)
713
TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ull, "ULLiULLiD*ULLi", "n", SM_60)
714
 
715
BUILTIN(__nvvm_atom_min_gen_i, "iiD*i", "n")
716
TARGET_BUILTIN(__nvvm_atom_cta_min_gen_i, "iiD*i", "n", SM_60)
717
TARGET_BUILTIN(__nvvm_atom_sys_min_gen_i, "iiD*i", "n", SM_60)
718
BUILTIN(__nvvm_atom_min_gen_ui, "UiUiD*Ui", "n")
719
TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ui, "UiUiD*Ui", "n", SM_60)
720
TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ui, "UiUiD*Ui", "n", SM_60)
721
BUILTIN(__nvvm_atom_min_gen_l, "LiLiD*Li", "n")
722
TARGET_BUILTIN(__nvvm_atom_cta_min_gen_l, "LiLiD*Li", "n", SM_60)
723
TARGET_BUILTIN(__nvvm_atom_sys_min_gen_l, "LiLiD*Li", "n", SM_60)
724
BUILTIN(__nvvm_atom_min_gen_ul, "ULiULiD*ULi", "n")
725
TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ul, "ULiULiD*ULi", "n", SM_60)
726
TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ul, "ULiULiD*ULi", "n", SM_60)
727
BUILTIN(__nvvm_atom_min_gen_ll, "LLiLLiD*LLi", "n")
728
TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ll, "LLiLLiD*LLi", "n", SM_60)
729
TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ll, "LLiLLiD*LLi", "n", SM_60)
730
BUILTIN(__nvvm_atom_min_gen_ull, "ULLiULLiD*ULLi", "n")
731
TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ull, "ULLiULLiD*ULLi", "n", SM_60)
732
TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ull, "ULLiULLiD*ULLi", "n", SM_60)
733
 
734
BUILTIN(__nvvm_atom_inc_gen_ui, "UiUiD*Ui", "n")
735
TARGET_BUILTIN(__nvvm_atom_cta_inc_gen_ui, "UiUiD*Ui", "n", SM_60)
736
TARGET_BUILTIN(__nvvm_atom_sys_inc_gen_ui, "UiUiD*Ui", "n", SM_60)
737
BUILTIN(__nvvm_atom_dec_gen_ui, "UiUiD*Ui", "n")
738
TARGET_BUILTIN(__nvvm_atom_cta_dec_gen_ui, "UiUiD*Ui", "n", SM_60)
739
TARGET_BUILTIN(__nvvm_atom_sys_dec_gen_ui, "UiUiD*Ui", "n", SM_60)
740
 
741
BUILTIN(__nvvm_atom_and_gen_i, "iiD*i", "n")
742
TARGET_BUILTIN(__nvvm_atom_cta_and_gen_i, "iiD*i", "n", SM_60)
743
TARGET_BUILTIN(__nvvm_atom_sys_and_gen_i, "iiD*i", "n", SM_60)
744
BUILTIN(__nvvm_atom_and_gen_l, "LiLiD*Li", "n")
745
TARGET_BUILTIN(__nvvm_atom_cta_and_gen_l, "LiLiD*Li", "n", SM_60)
746
TARGET_BUILTIN(__nvvm_atom_sys_and_gen_l, "LiLiD*Li", "n", SM_60)
747
BUILTIN(__nvvm_atom_and_gen_ll, "LLiLLiD*LLi", "n")
748
TARGET_BUILTIN(__nvvm_atom_cta_and_gen_ll, "LLiLLiD*LLi", "n", SM_60)
749
TARGET_BUILTIN(__nvvm_atom_sys_and_gen_ll, "LLiLLiD*LLi", "n", SM_60)
750
 
751
BUILTIN(__nvvm_atom_or_gen_i, "iiD*i", "n")
752
TARGET_BUILTIN(__nvvm_atom_cta_or_gen_i, "iiD*i", "n", SM_60)
753
TARGET_BUILTIN(__nvvm_atom_sys_or_gen_i, "iiD*i", "n", SM_60)
754
BUILTIN(__nvvm_atom_or_gen_l, "LiLiD*Li", "n")
755
TARGET_BUILTIN(__nvvm_atom_cta_or_gen_l, "LiLiD*Li", "n", SM_60)
756
TARGET_BUILTIN(__nvvm_atom_sys_or_gen_l, "LiLiD*Li", "n", SM_60)
757
BUILTIN(__nvvm_atom_or_gen_ll, "LLiLLiD*LLi", "n")
758
TARGET_BUILTIN(__nvvm_atom_cta_or_gen_ll, "LLiLLiD*LLi", "n", SM_60)
759
TARGET_BUILTIN(__nvvm_atom_sys_or_gen_ll, "LLiLLiD*LLi", "n", SM_60)
760
 
761
BUILTIN(__nvvm_atom_xor_gen_i, "iiD*i", "n")
762
TARGET_BUILTIN(__nvvm_atom_cta_xor_gen_i, "iiD*i", "n", SM_60)
763
TARGET_BUILTIN(__nvvm_atom_sys_xor_gen_i, "iiD*i", "n", SM_60)
764
BUILTIN(__nvvm_atom_xor_gen_l, "LiLiD*Li", "n")
765
TARGET_BUILTIN(__nvvm_atom_cta_xor_gen_l, "LiLiD*Li", "n", SM_60)
766
TARGET_BUILTIN(__nvvm_atom_sys_xor_gen_l, "LiLiD*Li", "n", SM_60)
767
BUILTIN(__nvvm_atom_xor_gen_ll, "LLiLLiD*LLi", "n")
768
TARGET_BUILTIN(__nvvm_atom_cta_xor_gen_ll, "LLiLLiD*LLi", "n", SM_60)
769
TARGET_BUILTIN(__nvvm_atom_sys_xor_gen_ll, "LLiLLiD*LLi", "n", SM_60)
770
 
771
BUILTIN(__nvvm_atom_cas_gen_i, "iiD*ii", "n")
772
TARGET_BUILTIN(__nvvm_atom_cta_cas_gen_i, "iiD*ii", "n", SM_60)
773
TARGET_BUILTIN(__nvvm_atom_sys_cas_gen_i, "iiD*ii", "n", SM_60)
774
BUILTIN(__nvvm_atom_cas_gen_l, "LiLiD*LiLi", "n")
775
TARGET_BUILTIN(__nvvm_atom_cta_cas_gen_l, "LiLiD*LiLi", "n", SM_60)
776
TARGET_BUILTIN(__nvvm_atom_sys_cas_gen_l, "LiLiD*LiLi", "n", SM_60)
777
BUILTIN(__nvvm_atom_cas_gen_ll, "LLiLLiD*LLiLLi", "n")
778
TARGET_BUILTIN(__nvvm_atom_cta_cas_gen_ll, "LLiLLiD*LLiLLi", "n", SM_60)
779
TARGET_BUILTIN(__nvvm_atom_sys_cas_gen_ll, "LLiLLiD*LLiLLi", "n", SM_60)
780
 
781
// Compiler Error Warn
782
BUILTIN(__nvvm_compiler_error, "vcC*4", "n")
783
BUILTIN(__nvvm_compiler_warn, "vcC*4", "n")
784
 
785
// __ldg.  This is not implemented as a builtin by nvcc.
786
BUILTIN(__nvvm_ldg_c, "ccC*", "")
787
BUILTIN(__nvvm_ldg_s, "ssC*", "")
788
BUILTIN(__nvvm_ldg_i, "iiC*", "")
789
BUILTIN(__nvvm_ldg_l, "LiLiC*", "")
790
BUILTIN(__nvvm_ldg_ll, "LLiLLiC*", "")
791
 
792
BUILTIN(__nvvm_ldg_uc, "UcUcC*", "")
793
BUILTIN(__nvvm_ldg_us, "UsUsC*", "")
794
BUILTIN(__nvvm_ldg_ui, "UiUiC*", "")
795
BUILTIN(__nvvm_ldg_ul, "ULiULiC*", "")
796
BUILTIN(__nvvm_ldg_ull, "ULLiULLiC*", "")
797
 
798
BUILTIN(__nvvm_ldg_f, "ffC*", "")
799
BUILTIN(__nvvm_ldg_d, "ddC*", "")
800
 
801
BUILTIN(__nvvm_ldg_c2, "E2cE2cC*", "")
802
BUILTIN(__nvvm_ldg_c4, "E4cE4cC*", "")
803
BUILTIN(__nvvm_ldg_s2, "E2sE2sC*", "")
804
BUILTIN(__nvvm_ldg_s4, "E4sE4sC*", "")
805
BUILTIN(__nvvm_ldg_i2, "E2iE2iC*", "")
806
BUILTIN(__nvvm_ldg_i4, "E4iE4iC*", "")
807
BUILTIN(__nvvm_ldg_ll2, "E2LLiE2LLiC*", "")
808
 
809
BUILTIN(__nvvm_ldg_uc2, "E2UcE2UcC*", "")
810
BUILTIN(__nvvm_ldg_uc4, "E4UcE4UcC*", "")
811
BUILTIN(__nvvm_ldg_us2, "E2UsE2UsC*", "")
812
BUILTIN(__nvvm_ldg_us4, "E4UsE4UsC*", "")
813
BUILTIN(__nvvm_ldg_ui2, "E2UiE2UiC*", "")
814
BUILTIN(__nvvm_ldg_ui4, "E4UiE4UiC*", "")
815
BUILTIN(__nvvm_ldg_ull2, "E2ULLiE2ULLiC*", "")
816
 
817
BUILTIN(__nvvm_ldg_f2, "E2fE2fC*", "")
818
BUILTIN(__nvvm_ldg_f4, "E4fE4fC*", "")
819
BUILTIN(__nvvm_ldg_d2, "E2dE2dC*", "")
820
 
821
// Address space predicates.
822
BUILTIN(__nvvm_isspacep_const, "bvC*", "nc")
823
BUILTIN(__nvvm_isspacep_global, "bvC*", "nc")
824
BUILTIN(__nvvm_isspacep_local, "bvC*", "nc")
825
BUILTIN(__nvvm_isspacep_shared, "bvC*", "nc")
826
 
827
// Builtins to support WMMA instructions on sm_70
828
TARGET_BUILTIN(__hmma_m16n16k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX60))
829
TARGET_BUILTIN(__hmma_m16n16k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX60))
830
TARGET_BUILTIN(__hmma_m16n16k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX60))
831
TARGET_BUILTIN(__hmma_m16n16k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX60))
832
TARGET_BUILTIN(__hmma_m16n16k16_st_c_f16, "vi*i*UiIi", "", AND(SM_70,PTX60))
833
TARGET_BUILTIN(__hmma_m16n16k16_st_c_f32, "vf*f*UiIi", "", AND(SM_70,PTX60))
834
 
835
TARGET_BUILTIN(__hmma_m32n8k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX61))
836
TARGET_BUILTIN(__hmma_m32n8k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX61))
837
TARGET_BUILTIN(__hmma_m32n8k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX61))
838
TARGET_BUILTIN(__hmma_m32n8k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX61))
839
TARGET_BUILTIN(__hmma_m32n8k16_st_c_f16, "vi*i*UiIi", "", AND(SM_70,PTX61))
840
TARGET_BUILTIN(__hmma_m32n8k16_st_c_f32, "vf*f*UiIi", "", AND(SM_70,PTX61))
841
 
842
TARGET_BUILTIN(__hmma_m8n32k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX61))
843
TARGET_BUILTIN(__hmma_m8n32k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX61))
844
TARGET_BUILTIN(__hmma_m8n32k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX61))
845
TARGET_BUILTIN(__hmma_m8n32k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX61))
846
TARGET_BUILTIN(__hmma_m8n32k16_st_c_f16, "vi*i*UiIi", "", AND(SM_70,PTX61))
847
TARGET_BUILTIN(__hmma_m8n32k16_st_c_f32, "vf*f*UiIi", "", AND(SM_70,PTX61))
848
 
849
TARGET_BUILTIN(__hmma_m16n16k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", AND(SM_70,PTX60))
850
TARGET_BUILTIN(__hmma_m16n16k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", AND(SM_70,PTX60))
851
TARGET_BUILTIN(__hmma_m16n16k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", AND(SM_70,PTX60))
852
TARGET_BUILTIN(__hmma_m16n16k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", AND(SM_70,PTX60))
853
 
854
TARGET_BUILTIN(__hmma_m32n8k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", AND(SM_70,PTX61))
855
TARGET_BUILTIN(__hmma_m32n8k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", AND(SM_70,PTX61))
856
TARGET_BUILTIN(__hmma_m32n8k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", AND(SM_70,PTX61))
857
TARGET_BUILTIN(__hmma_m32n8k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", AND(SM_70,PTX61))
858
 
859
TARGET_BUILTIN(__hmma_m8n32k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", AND(SM_70,PTX61))
860
TARGET_BUILTIN(__hmma_m8n32k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", AND(SM_70,PTX61))
861
TARGET_BUILTIN(__hmma_m8n32k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", AND(SM_70,PTX61))
862
TARGET_BUILTIN(__hmma_m8n32k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", AND(SM_70,PTX61))
863
 
864
// Builtins to support integer and sub-integer WMMA instructions on sm_72/sm_75
865
TARGET_BUILTIN(__bmma_m8n8k128_ld_a_b1, "vi*iC*UiIi", "", AND(SM_75,PTX63))
866
TARGET_BUILTIN(__bmma_m8n8k128_ld_b_b1, "vi*iC*UiIi", "", AND(SM_75,PTX63))
867
TARGET_BUILTIN(__bmma_m8n8k128_ld_c, "vi*iC*UiIi", "", AND(SM_75,PTX63))
868
TARGET_BUILTIN(__bmma_m8n8k128_mma_and_popc_b1, "vi*iC*iC*iC*Ii", "", AND(SM_80,PTX71))
869
TARGET_BUILTIN(__bmma_m8n8k128_mma_xor_popc_b1, "vi*iC*iC*iC*Ii", "", AND(SM_75,PTX63))
870
TARGET_BUILTIN(__bmma_m8n8k128_st_c_i32, "vi*iC*UiIi", "", AND(SM_75,PTX63))
871
TARGET_BUILTIN(__imma_m16n16k16_ld_a_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
872
TARGET_BUILTIN(__imma_m16n16k16_ld_a_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
873
TARGET_BUILTIN(__imma_m16n16k16_ld_b_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
874
TARGET_BUILTIN(__imma_m16n16k16_ld_b_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
875
TARGET_BUILTIN(__imma_m16n16k16_ld_c, "vi*iC*UiIi", "", AND(SM_72,PTX63))
876
TARGET_BUILTIN(__imma_m16n16k16_mma_s8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63))
877
TARGET_BUILTIN(__imma_m16n16k16_mma_u8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63))
878
TARGET_BUILTIN(__imma_m16n16k16_st_c_i32, "vi*iC*UiIi", "", AND(SM_72,PTX63))
879
TARGET_BUILTIN(__imma_m32n8k16_ld_a_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
880
TARGET_BUILTIN(__imma_m32n8k16_ld_a_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
881
TARGET_BUILTIN(__imma_m32n8k16_ld_b_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
882
TARGET_BUILTIN(__imma_m32n8k16_ld_b_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
883
TARGET_BUILTIN(__imma_m32n8k16_ld_c, "vi*iC*UiIi", "", AND(SM_72,PTX63))
884
TARGET_BUILTIN(__imma_m32n8k16_mma_s8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63))
885
TARGET_BUILTIN(__imma_m32n8k16_mma_u8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63))
886
TARGET_BUILTIN(__imma_m32n8k16_st_c_i32, "vi*iC*UiIi", "", AND(SM_72,PTX63))
887
TARGET_BUILTIN(__imma_m8n32k16_ld_a_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
888
TARGET_BUILTIN(__imma_m8n32k16_ld_a_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
889
TARGET_BUILTIN(__imma_m8n32k16_ld_b_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
890
TARGET_BUILTIN(__imma_m8n32k16_ld_b_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63))
891
TARGET_BUILTIN(__imma_m8n32k16_ld_c, "vi*iC*UiIi", "", AND(SM_72,PTX63))
892
TARGET_BUILTIN(__imma_m8n32k16_mma_s8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63))
893
TARGET_BUILTIN(__imma_m8n32k16_mma_u8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63))
894
TARGET_BUILTIN(__imma_m8n32k16_st_c_i32, "vi*iC*UiIi", "", AND(SM_72,PTX63))
895
TARGET_BUILTIN(__imma_m8n8k32_ld_a_s4, "vi*iC*UiIi", "", AND(SM_75,PTX63))
896
TARGET_BUILTIN(__imma_m8n8k32_ld_a_u4, "vi*iC*UiIi", "", AND(SM_75,PTX63))
897
TARGET_BUILTIN(__imma_m8n8k32_ld_b_s4, "vi*iC*UiIi", "", AND(SM_75,PTX63))
898
TARGET_BUILTIN(__imma_m8n8k32_ld_b_u4, "vi*iC*UiIi", "", AND(SM_75,PTX63))
899
TARGET_BUILTIN(__imma_m8n8k32_ld_c, "vi*iC*UiIi", "", AND(SM_75,PTX63))
900
TARGET_BUILTIN(__imma_m8n8k32_mma_s4, "vi*iC*iC*iC*IiIi", "", AND(SM_75,PTX63))
901
TARGET_BUILTIN(__imma_m8n8k32_mma_u4, "vi*iC*iC*iC*IiIi", "", AND(SM_75,PTX63))
902
TARGET_BUILTIN(__imma_m8n8k32_st_c_i32, "vi*iC*UiIi", "", AND(SM_75,PTX63))
903
 
904
// Builtins to support double and alternate float WMMA instructions on sm_80
905
TARGET_BUILTIN(__dmma_m8n8k4_ld_a, "vd*dC*UiIi", "", AND(SM_80,PTX70))
906
TARGET_BUILTIN(__dmma_m8n8k4_ld_b, "vd*dC*UiIi", "", AND(SM_80,PTX70))
907
TARGET_BUILTIN(__dmma_m8n8k4_ld_c, "vd*dC*UiIi", "", AND(SM_80,PTX70))
908
TARGET_BUILTIN(__dmma_m8n8k4_st_c_f64, "vd*dC*UiIi", "", AND(SM_80,PTX70))
909
TARGET_BUILTIN(__dmma_m8n8k4_mma_f64, "vd*dC*dC*dC*IiIi", "", AND(SM_80,PTX70))
910
 
911
TARGET_BUILTIN(__mma_bf16_m16n16k16_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70))
912
TARGET_BUILTIN(__mma_bf16_m16n16k16_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70))
913
TARGET_BUILTIN(__mma_bf16_m16n16k16_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70))
914
TARGET_BUILTIN(__mma_bf16_m8n32k16_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70))
915
TARGET_BUILTIN(__mma_bf16_m8n32k16_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70))
916
TARGET_BUILTIN(__mma_bf16_m8n32k16_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70))
917
TARGET_BUILTIN(__mma_bf16_m32n8k16_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70))
918
TARGET_BUILTIN(__mma_bf16_m32n8k16_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70))
919
TARGET_BUILTIN(__mma_bf16_m32n8k16_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70))
920
 
921
TARGET_BUILTIN(__mma_tf32_m16n16k8_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70))
922
TARGET_BUILTIN(__mma_tf32_m16n16k8_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70))
923
TARGET_BUILTIN(__mma_tf32_m16n16k8_ld_c, "vf*fC*UiIi", "", AND(SM_80,PTX70))
924
TARGET_BUILTIN(__mma_m16n16k8_st_c_f32, "vf*fC*UiIi", "", AND(SM_80,PTX70))
925
TARGET_BUILTIN(__mma_tf32_m16n16k8_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70))
926
 
927
// Async Copy
928
TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive, "vWi*", "", AND(SM_80,PTX70))
929
TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive_shared, "vWi*3", "", AND(SM_80,PTX70))
930
TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive_noinc, "vWi*", "", AND(SM_80,PTX70))
931
TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive_noinc_shared, "vWi*3", "", AND(SM_80,PTX70))
932
 
933
TARGET_BUILTIN(__nvvm_cp_async_ca_shared_global_4, "vv*3vC*1", "", AND(SM_80,PTX70))
934
TARGET_BUILTIN(__nvvm_cp_async_ca_shared_global_8, "vv*3vC*1", "", AND(SM_80,PTX70))
935
TARGET_BUILTIN(__nvvm_cp_async_ca_shared_global_16, "vv*3vC*1", "", AND(SM_80,PTX70))
936
TARGET_BUILTIN(__nvvm_cp_async_cg_shared_global_16, "vv*3vC*1", "", AND(SM_80,PTX70))
937
 
938
TARGET_BUILTIN(__nvvm_cp_async_commit_group, "v", "", AND(SM_80,PTX70))
939
TARGET_BUILTIN(__nvvm_cp_async_wait_group, "vIi", "", AND(SM_80,PTX70))
940
TARGET_BUILTIN(__nvvm_cp_async_wait_all, "v", "", AND(SM_80,PTX70))
941
 
942
 
943
// bf16, bf16x2 abs, neg
944
TARGET_BUILTIN(__nvvm_abs_bf16, "UsUs", "", AND(SM_80,PTX70))
945
TARGET_BUILTIN(__nvvm_abs_bf16x2, "ZUiZUi", "", AND(SM_80,PTX70))
946
TARGET_BUILTIN(__nvvm_neg_bf16, "UsUs", "", AND(SM_80,PTX70))
947
TARGET_BUILTIN(__nvvm_neg_bf16x2, "ZUiZUi", "", AND(SM_80,PTX70))
948
 
949
#undef BUILTIN
950
#undef TARGET_BUILTIN
951
#pragma pop_macro("AND")
952
#pragma pop_macro("SM_53")
953
#pragma pop_macro("SM_60")
954
#pragma pop_macro("SM_70")
955
#pragma pop_macro("SM_72")
956
#pragma pop_macro("SM_75")
957
#pragma pop_macro("SM_80")
958
#pragma pop_macro("SM_86")
959
#pragma pop_macro("SM_87")
960
#pragma pop_macro("SM_89")
961
#pragma pop_macro("SM_90")
962
#pragma pop_macro("PTX42")
963
#pragma pop_macro("PTX60")
964
#pragma pop_macro("PTX61")
965
#pragma pop_macro("PTX63")
966
#pragma pop_macro("PTX64")
967
#pragma pop_macro("PTX65")
968
#pragma pop_macro("PTX70")
969
#pragma pop_macro("PTX71")
970
#pragma pop_macro("PTX72")
971
#pragma pop_macro("PTX73")
972
#pragma pop_macro("PTX74")
973
#pragma pop_macro("PTX75")
974
#pragma pop_macro("PTX76")
975
#pragma pop_macro("PTX77")
976
#pragma pop_macro("PTX78")