Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
14 | pmbaty | 1 | //===--- AMDHSAKernelDescriptor.h -----------------------------*- C++ -*---===// |
2 | // |
||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
4 | // See https://llvm.org/LICENSE.txt for license information. |
||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
6 | // |
||
7 | //===----------------------------------------------------------------------===// |
||
8 | // |
||
9 | /// \file |
||
10 | /// AMDHSA kernel descriptor definitions. For more information, visit |
||
11 | /// https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor |
||
12 | // |
||
13 | //===----------------------------------------------------------------------===// |
||
14 | |||
15 | #ifndef LLVM_SUPPORT_AMDHSAKERNELDESCRIPTOR_H |
||
16 | #define LLVM_SUPPORT_AMDHSAKERNELDESCRIPTOR_H |
||
17 | |||
18 | #include <cstddef> |
||
19 | #include <cstdint> |
||
20 | |||
21 | // Gets offset of specified member in specified type. |
||
22 | #ifndef offsetof |
||
23 | #define offsetof(TYPE, MEMBER) ((size_t)&((TYPE*)0)->MEMBER) |
||
24 | #endif // offsetof |
||
25 | |||
26 | // Creates enumeration entries used for packing bits into integers. Enumeration |
||
27 | // entries include bit shift amount, bit width, and bit mask. |
||
28 | #ifndef AMDHSA_BITS_ENUM_ENTRY |
||
29 | #define AMDHSA_BITS_ENUM_ENTRY(NAME, SHIFT, WIDTH) \ |
||
30 | NAME ## _SHIFT = (SHIFT), \ |
||
31 | NAME ## _WIDTH = (WIDTH), \ |
||
32 | NAME = (((1 << (WIDTH)) - 1) << (SHIFT)) |
||
33 | #endif // AMDHSA_BITS_ENUM_ENTRY |
||
34 | |||
35 | // Gets bits for specified bit mask from specified source. |
||
36 | #ifndef AMDHSA_BITS_GET |
||
37 | #define AMDHSA_BITS_GET(SRC, MSK) ((SRC & MSK) >> MSK ## _SHIFT) |
||
38 | #endif // AMDHSA_BITS_GET |
||
39 | |||
40 | // Sets bits for specified bit mask in specified destination. |
||
41 | #ifndef AMDHSA_BITS_SET |
||
42 | #define AMDHSA_BITS_SET(DST, MSK, VAL) \ |
||
43 | DST &= ~MSK; \ |
||
44 | DST |= ((VAL << MSK ## _SHIFT) & MSK) |
||
45 | #endif // AMDHSA_BITS_SET |
||
46 | |||
47 | namespace llvm { |
||
48 | namespace amdhsa { |
||
49 | |||
50 | // Floating point rounding modes. Must match hardware definition. |
||
51 | enum : uint8_t { |
||
52 | FLOAT_ROUND_MODE_NEAR_EVEN = 0, |
||
53 | FLOAT_ROUND_MODE_PLUS_INFINITY = 1, |
||
54 | FLOAT_ROUND_MODE_MINUS_INFINITY = 2, |
||
55 | FLOAT_ROUND_MODE_ZERO = 3, |
||
56 | }; |
||
57 | |||
58 | // Floating point denorm modes. Must match hardware definition. |
||
59 | enum : uint8_t { |
||
60 | FLOAT_DENORM_MODE_FLUSH_SRC_DST = 0, |
||
61 | FLOAT_DENORM_MODE_FLUSH_DST = 1, |
||
62 | FLOAT_DENORM_MODE_FLUSH_SRC = 2, |
||
63 | FLOAT_DENORM_MODE_FLUSH_NONE = 3, |
||
64 | }; |
||
65 | |||
66 | // System VGPR workitem IDs. Must match hardware definition. |
||
67 | enum : uint8_t { |
||
68 | SYSTEM_VGPR_WORKITEM_ID_X = 0, |
||
69 | SYSTEM_VGPR_WORKITEM_ID_X_Y = 1, |
||
70 | SYSTEM_VGPR_WORKITEM_ID_X_Y_Z = 2, |
||
71 | SYSTEM_VGPR_WORKITEM_ID_UNDEFINED = 3, |
||
72 | }; |
||
73 | |||
74 | // Compute program resource register 1. Must match hardware definition. |
||
75 | #define COMPUTE_PGM_RSRC1(NAME, SHIFT, WIDTH) \ |
||
76 | AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_ ## NAME, SHIFT, WIDTH) |
||
77 | enum : int32_t { |
||
78 | COMPUTE_PGM_RSRC1(GRANULATED_WORKITEM_VGPR_COUNT, 0, 6), |
||
79 | COMPUTE_PGM_RSRC1(GRANULATED_WAVEFRONT_SGPR_COUNT, 6, 4), |
||
80 | COMPUTE_PGM_RSRC1(PRIORITY, 10, 2), |
||
81 | COMPUTE_PGM_RSRC1(FLOAT_ROUND_MODE_32, 12, 2), |
||
82 | COMPUTE_PGM_RSRC1(FLOAT_ROUND_MODE_16_64, 14, 2), |
||
83 | COMPUTE_PGM_RSRC1(FLOAT_DENORM_MODE_32, 16, 2), |
||
84 | COMPUTE_PGM_RSRC1(FLOAT_DENORM_MODE_16_64, 18, 2), |
||
85 | COMPUTE_PGM_RSRC1(PRIV, 20, 1), |
||
86 | COMPUTE_PGM_RSRC1(ENABLE_DX10_CLAMP, 21, 1), |
||
87 | COMPUTE_PGM_RSRC1(DEBUG_MODE, 22, 1), |
||
88 | COMPUTE_PGM_RSRC1(ENABLE_IEEE_MODE, 23, 1), |
||
89 | COMPUTE_PGM_RSRC1(BULKY, 24, 1), |
||
90 | COMPUTE_PGM_RSRC1(CDBG_USER, 25, 1), |
||
91 | COMPUTE_PGM_RSRC1(FP16_OVFL, 26, 1), // GFX9+ |
||
92 | COMPUTE_PGM_RSRC1(RESERVED0, 27, 2), |
||
93 | COMPUTE_PGM_RSRC1(WGP_MODE, 29, 1), // GFX10+ |
||
94 | COMPUTE_PGM_RSRC1(MEM_ORDERED, 30, 1), // GFX10+ |
||
95 | COMPUTE_PGM_RSRC1(FWD_PROGRESS, 31, 1), // GFX10+ |
||
96 | }; |
||
97 | #undef COMPUTE_PGM_RSRC1 |
||
98 | |||
99 | // Compute program resource register 2. Must match hardware definition. |
||
100 | #define COMPUTE_PGM_RSRC2(NAME, SHIFT, WIDTH) \ |
||
101 | AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC2_ ## NAME, SHIFT, WIDTH) |
||
102 | enum : int32_t { |
||
103 | COMPUTE_PGM_RSRC2(ENABLE_PRIVATE_SEGMENT, 0, 1), |
||
104 | COMPUTE_PGM_RSRC2(USER_SGPR_COUNT, 1, 5), |
||
105 | COMPUTE_PGM_RSRC2(ENABLE_TRAP_HANDLER, 6, 1), |
||
106 | COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_X, 7, 1), |
||
107 | COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_Y, 8, 1), |
||
108 | COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_Z, 9, 1), |
||
109 | COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_INFO, 10, 1), |
||
110 | COMPUTE_PGM_RSRC2(ENABLE_VGPR_WORKITEM_ID, 11, 2), |
||
111 | COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_ADDRESS_WATCH, 13, 1), |
||
112 | COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_MEMORY, 14, 1), |
||
113 | COMPUTE_PGM_RSRC2(GRANULATED_LDS_SIZE, 15, 9), |
||
114 | COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, 24, 1), |
||
115 | COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 25, 1), |
||
116 | COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, 26, 1), |
||
117 | COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 27, 1), |
||
118 | COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 28, 1), |
||
119 | COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 29, 1), |
||
120 | COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 30, 1), |
||
121 | COMPUTE_PGM_RSRC2(RESERVED0, 31, 1), |
||
122 | }; |
||
123 | #undef COMPUTE_PGM_RSRC2 |
||
124 | |||
125 | // Compute program resource register 3 for GFX90A+. Must match hardware |
||
126 | // definition. |
||
127 | #define COMPUTE_PGM_RSRC3_GFX90A(NAME, SHIFT, WIDTH) \ |
||
128 | AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX90A_ ## NAME, SHIFT, WIDTH) |
||
129 | enum : int32_t { |
||
130 | COMPUTE_PGM_RSRC3_GFX90A(ACCUM_OFFSET, 0, 6), |
||
131 | COMPUTE_PGM_RSRC3_GFX90A(RESERVED0, 6, 10), |
||
132 | COMPUTE_PGM_RSRC3_GFX90A(TG_SPLIT, 16, 1), |
||
133 | COMPUTE_PGM_RSRC3_GFX90A(RESERVED1, 17, 15), |
||
134 | }; |
||
135 | #undef COMPUTE_PGM_RSRC3_GFX90A |
||
136 | |||
137 | // Compute program resource register 3 for GFX10+. Must match hardware |
||
138 | // definition. |
||
139 | #define COMPUTE_PGM_RSRC3_GFX10_PLUS(NAME, SHIFT, WIDTH) \ |
||
140 | AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_PLUS_ ## NAME, SHIFT, WIDTH) |
||
141 | enum : int32_t { |
||
142 | COMPUTE_PGM_RSRC3_GFX10_PLUS(SHARED_VGPR_COUNT, 0, 4), // GFX10+ |
||
143 | COMPUTE_PGM_RSRC3_GFX10_PLUS(INST_PREF_SIZE, 4, 6), // GFX11+ |
||
144 | COMPUTE_PGM_RSRC3_GFX10_PLUS(TRAP_ON_START, 10, 1), // GFX11+ |
||
145 | COMPUTE_PGM_RSRC3_GFX10_PLUS(TRAP_ON_END, 11, 1), // GFX11+ |
||
146 | COMPUTE_PGM_RSRC3_GFX10_PLUS(RESERVED0, 12, 19), |
||
147 | COMPUTE_PGM_RSRC3_GFX10_PLUS(IMAGE_OP, 31, 1), // GFX11+ |
||
148 | }; |
||
149 | #undef COMPUTE_PGM_RSRC3_GFX10_PLUS |
||
150 | |||
151 | // Kernel code properties. Must be kept backwards compatible. |
||
152 | #define KERNEL_CODE_PROPERTY(NAME, SHIFT, WIDTH) \ |
||
153 | AMDHSA_BITS_ENUM_ENTRY(KERNEL_CODE_PROPERTY_ ## NAME, SHIFT, WIDTH) |
||
154 | enum : int32_t { |
||
155 | KERNEL_CODE_PROPERTY(ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 0, 1), |
||
156 | KERNEL_CODE_PROPERTY(ENABLE_SGPR_DISPATCH_PTR, 1, 1), |
||
157 | KERNEL_CODE_PROPERTY(ENABLE_SGPR_QUEUE_PTR, 2, 1), |
||
158 | KERNEL_CODE_PROPERTY(ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3, 1), |
||
159 | KERNEL_CODE_PROPERTY(ENABLE_SGPR_DISPATCH_ID, 4, 1), |
||
160 | KERNEL_CODE_PROPERTY(ENABLE_SGPR_FLAT_SCRATCH_INIT, 5, 1), |
||
161 | KERNEL_CODE_PROPERTY(ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 6, 1), |
||
162 | KERNEL_CODE_PROPERTY(RESERVED0, 7, 3), |
||
163 | KERNEL_CODE_PROPERTY(ENABLE_WAVEFRONT_SIZE32, 10, 1), // GFX10+ |
||
164 | KERNEL_CODE_PROPERTY(USES_DYNAMIC_STACK, 11, 1), |
||
165 | KERNEL_CODE_PROPERTY(RESERVED1, 12, 4), |
||
166 | }; |
||
167 | #undef KERNEL_CODE_PROPERTY |
||
168 | |||
169 | // Kernel descriptor. Must be kept backwards compatible. |
||
170 | struct kernel_descriptor_t { |
||
171 | uint32_t group_segment_fixed_size; |
||
172 | uint32_t private_segment_fixed_size; |
||
173 | uint32_t kernarg_size; |
||
174 | uint8_t reserved0[4]; |
||
175 | int64_t kernel_code_entry_byte_offset; |
||
176 | uint8_t reserved1[20]; |
||
177 | uint32_t compute_pgm_rsrc3; // GFX10+ and GFX90A+ |
||
178 | uint32_t compute_pgm_rsrc1; |
||
179 | uint32_t compute_pgm_rsrc2; |
||
180 | uint16_t kernel_code_properties; |
||
181 | uint8_t reserved2[6]; |
||
182 | }; |
||
183 | |||
184 | enum : uint32_t { |
||
185 | GROUP_SEGMENT_FIXED_SIZE_OFFSET = 0, |
||
186 | PRIVATE_SEGMENT_FIXED_SIZE_OFFSET = 4, |
||
187 | KERNARG_SIZE_OFFSET = 8, |
||
188 | RESERVED0_OFFSET = 12, |
||
189 | KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET = 16, |
||
190 | RESERVED1_OFFSET = 24, |
||
191 | COMPUTE_PGM_RSRC3_OFFSET = 44, |
||
192 | COMPUTE_PGM_RSRC1_OFFSET = 48, |
||
193 | COMPUTE_PGM_RSRC2_OFFSET = 52, |
||
194 | KERNEL_CODE_PROPERTIES_OFFSET = 56, |
||
195 | RESERVED2_OFFSET = 58, |
||
196 | }; |
||
197 | |||
198 | static_assert( |
||
199 | sizeof(kernel_descriptor_t) == 64, |
||
200 | "invalid size for kernel_descriptor_t"); |
||
201 | static_assert(offsetof(kernel_descriptor_t, group_segment_fixed_size) == |
||
202 | GROUP_SEGMENT_FIXED_SIZE_OFFSET, |
||
203 | "invalid offset for group_segment_fixed_size"); |
||
204 | static_assert(offsetof(kernel_descriptor_t, private_segment_fixed_size) == |
||
205 | PRIVATE_SEGMENT_FIXED_SIZE_OFFSET, |
||
206 | "invalid offset for private_segment_fixed_size"); |
||
207 | static_assert(offsetof(kernel_descriptor_t, kernarg_size) == |
||
208 | KERNARG_SIZE_OFFSET, |
||
209 | "invalid offset for kernarg_size"); |
||
210 | static_assert(offsetof(kernel_descriptor_t, reserved0) == RESERVED0_OFFSET, |
||
211 | "invalid offset for reserved0"); |
||
212 | static_assert(offsetof(kernel_descriptor_t, kernel_code_entry_byte_offset) == |
||
213 | KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET, |
||
214 | "invalid offset for kernel_code_entry_byte_offset"); |
||
215 | static_assert(offsetof(kernel_descriptor_t, reserved1) == RESERVED1_OFFSET, |
||
216 | "invalid offset for reserved1"); |
||
217 | static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc3) == |
||
218 | COMPUTE_PGM_RSRC3_OFFSET, |
||
219 | "invalid offset for compute_pgm_rsrc3"); |
||
220 | static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc1) == |
||
221 | COMPUTE_PGM_RSRC1_OFFSET, |
||
222 | "invalid offset for compute_pgm_rsrc1"); |
||
223 | static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc2) == |
||
224 | COMPUTE_PGM_RSRC2_OFFSET, |
||
225 | "invalid offset for compute_pgm_rsrc2"); |
||
226 | static_assert(offsetof(kernel_descriptor_t, kernel_code_properties) == |
||
227 | KERNEL_CODE_PROPERTIES_OFFSET, |
||
228 | "invalid offset for kernel_code_properties"); |
||
229 | static_assert(offsetof(kernel_descriptor_t, reserved2) == RESERVED2_OFFSET, |
||
230 | "invalid offset for reserved2"); |
||
231 | |||
232 | } // end namespace amdhsa |
||
233 | } // end namespace llvm |
||
234 | |||
235 | #endif // LLVM_SUPPORT_AMDHSAKERNELDESCRIPTOR_H |