Details | Last modification | View Log | RSS feed
| Rev | Author | Line No. | Line |
|---|---|---|---|
| 14 | pmbaty | 1 | //===--- AMDHSAKernelDescriptor.h -----------------------------*- C++ -*---===// |
| 2 | // |
||
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
| 6 | // |
||
| 7 | //===----------------------------------------------------------------------===// |
||
| 8 | // |
||
| 9 | /// \file |
||
| 10 | /// AMDHSA kernel descriptor definitions. For more information, visit |
||
| 11 | /// https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor |
||
| 12 | // |
||
| 13 | //===----------------------------------------------------------------------===// |
||
| 14 | |||
| 15 | #ifndef LLVM_SUPPORT_AMDHSAKERNELDESCRIPTOR_H |
||
| 16 | #define LLVM_SUPPORT_AMDHSAKERNELDESCRIPTOR_H |
||
| 17 | |||
| 18 | #include <cstddef> |
||
| 19 | #include <cstdint> |
||
| 20 | |||
| 21 | // Gets offset of specified member in specified type. |
||
| 22 | #ifndef offsetof |
||
| 23 | #define offsetof(TYPE, MEMBER) ((size_t)&((TYPE*)0)->MEMBER) |
||
| 24 | #endif // offsetof |
||
| 25 | |||
| 26 | // Creates enumeration entries used for packing bits into integers. Enumeration |
||
| 27 | // entries include bit shift amount, bit width, and bit mask. |
||
| 28 | #ifndef AMDHSA_BITS_ENUM_ENTRY |
||
| 29 | #define AMDHSA_BITS_ENUM_ENTRY(NAME, SHIFT, WIDTH) \ |
||
| 30 | NAME ## _SHIFT = (SHIFT), \ |
||
| 31 | NAME ## _WIDTH = (WIDTH), \ |
||
| 32 | NAME = (((1 << (WIDTH)) - 1) << (SHIFT)) |
||
| 33 | #endif // AMDHSA_BITS_ENUM_ENTRY |
||
| 34 | |||
| 35 | // Gets bits for specified bit mask from specified source. |
||
| 36 | #ifndef AMDHSA_BITS_GET |
||
| 37 | #define AMDHSA_BITS_GET(SRC, MSK) ((SRC & MSK) >> MSK ## _SHIFT) |
||
| 38 | #endif // AMDHSA_BITS_GET |
||
| 39 | |||
| 40 | // Sets bits for specified bit mask in specified destination. |
||
| 41 | #ifndef AMDHSA_BITS_SET |
||
| 42 | #define AMDHSA_BITS_SET(DST, MSK, VAL) \ |
||
| 43 | DST &= ~MSK; \ |
||
| 44 | DST |= ((VAL << MSK ## _SHIFT) & MSK) |
||
| 45 | #endif // AMDHSA_BITS_SET |
||
| 46 | |||
| 47 | namespace llvm { |
||
| 48 | namespace amdhsa { |
||
| 49 | |||
| 50 | // Floating point rounding modes. Must match hardware definition. |
||
| 51 | enum : uint8_t { |
||
| 52 | FLOAT_ROUND_MODE_NEAR_EVEN = 0, |
||
| 53 | FLOAT_ROUND_MODE_PLUS_INFINITY = 1, |
||
| 54 | FLOAT_ROUND_MODE_MINUS_INFINITY = 2, |
||
| 55 | FLOAT_ROUND_MODE_ZERO = 3, |
||
| 56 | }; |
||
| 57 | |||
| 58 | // Floating point denorm modes. Must match hardware definition. |
||
| 59 | enum : uint8_t { |
||
| 60 | FLOAT_DENORM_MODE_FLUSH_SRC_DST = 0, |
||
| 61 | FLOAT_DENORM_MODE_FLUSH_DST = 1, |
||
| 62 | FLOAT_DENORM_MODE_FLUSH_SRC = 2, |
||
| 63 | FLOAT_DENORM_MODE_FLUSH_NONE = 3, |
||
| 64 | }; |
||
| 65 | |||
| 66 | // System VGPR workitem IDs. Must match hardware definition. |
||
| 67 | enum : uint8_t { |
||
| 68 | SYSTEM_VGPR_WORKITEM_ID_X = 0, |
||
| 69 | SYSTEM_VGPR_WORKITEM_ID_X_Y = 1, |
||
| 70 | SYSTEM_VGPR_WORKITEM_ID_X_Y_Z = 2, |
||
| 71 | SYSTEM_VGPR_WORKITEM_ID_UNDEFINED = 3, |
||
| 72 | }; |
||
| 73 | |||
| 74 | // Compute program resource register 1. Must match hardware definition. |
||
| 75 | #define COMPUTE_PGM_RSRC1(NAME, SHIFT, WIDTH) \ |
||
| 76 | AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_ ## NAME, SHIFT, WIDTH) |
||
| 77 | enum : int32_t { |
||
| 78 | COMPUTE_PGM_RSRC1(GRANULATED_WORKITEM_VGPR_COUNT, 0, 6), |
||
| 79 | COMPUTE_PGM_RSRC1(GRANULATED_WAVEFRONT_SGPR_COUNT, 6, 4), |
||
| 80 | COMPUTE_PGM_RSRC1(PRIORITY, 10, 2), |
||
| 81 | COMPUTE_PGM_RSRC1(FLOAT_ROUND_MODE_32, 12, 2), |
||
| 82 | COMPUTE_PGM_RSRC1(FLOAT_ROUND_MODE_16_64, 14, 2), |
||
| 83 | COMPUTE_PGM_RSRC1(FLOAT_DENORM_MODE_32, 16, 2), |
||
| 84 | COMPUTE_PGM_RSRC1(FLOAT_DENORM_MODE_16_64, 18, 2), |
||
| 85 | COMPUTE_PGM_RSRC1(PRIV, 20, 1), |
||
| 86 | COMPUTE_PGM_RSRC1(ENABLE_DX10_CLAMP, 21, 1), |
||
| 87 | COMPUTE_PGM_RSRC1(DEBUG_MODE, 22, 1), |
||
| 88 | COMPUTE_PGM_RSRC1(ENABLE_IEEE_MODE, 23, 1), |
||
| 89 | COMPUTE_PGM_RSRC1(BULKY, 24, 1), |
||
| 90 | COMPUTE_PGM_RSRC1(CDBG_USER, 25, 1), |
||
| 91 | COMPUTE_PGM_RSRC1(FP16_OVFL, 26, 1), // GFX9+ |
||
| 92 | COMPUTE_PGM_RSRC1(RESERVED0, 27, 2), |
||
| 93 | COMPUTE_PGM_RSRC1(WGP_MODE, 29, 1), // GFX10+ |
||
| 94 | COMPUTE_PGM_RSRC1(MEM_ORDERED, 30, 1), // GFX10+ |
||
| 95 | COMPUTE_PGM_RSRC1(FWD_PROGRESS, 31, 1), // GFX10+ |
||
| 96 | }; |
||
| 97 | #undef COMPUTE_PGM_RSRC1 |
||
| 98 | |||
| 99 | // Compute program resource register 2. Must match hardware definition. |
||
| 100 | #define COMPUTE_PGM_RSRC2(NAME, SHIFT, WIDTH) \ |
||
| 101 | AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC2_ ## NAME, SHIFT, WIDTH) |
||
| 102 | enum : int32_t { |
||
| 103 | COMPUTE_PGM_RSRC2(ENABLE_PRIVATE_SEGMENT, 0, 1), |
||
| 104 | COMPUTE_PGM_RSRC2(USER_SGPR_COUNT, 1, 5), |
||
| 105 | COMPUTE_PGM_RSRC2(ENABLE_TRAP_HANDLER, 6, 1), |
||
| 106 | COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_X, 7, 1), |
||
| 107 | COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_Y, 8, 1), |
||
| 108 | COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_Z, 9, 1), |
||
| 109 | COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_INFO, 10, 1), |
||
| 110 | COMPUTE_PGM_RSRC2(ENABLE_VGPR_WORKITEM_ID, 11, 2), |
||
| 111 | COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_ADDRESS_WATCH, 13, 1), |
||
| 112 | COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_MEMORY, 14, 1), |
||
| 113 | COMPUTE_PGM_RSRC2(GRANULATED_LDS_SIZE, 15, 9), |
||
| 114 | COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, 24, 1), |
||
| 115 | COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 25, 1), |
||
| 116 | COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, 26, 1), |
||
| 117 | COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 27, 1), |
||
| 118 | COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 28, 1), |
||
| 119 | COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 29, 1), |
||
| 120 | COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 30, 1), |
||
| 121 | COMPUTE_PGM_RSRC2(RESERVED0, 31, 1), |
||
| 122 | }; |
||
| 123 | #undef COMPUTE_PGM_RSRC2 |
||
| 124 | |||
| 125 | // Compute program resource register 3 for GFX90A+. Must match hardware |
||
| 126 | // definition. |
||
| 127 | #define COMPUTE_PGM_RSRC3_GFX90A(NAME, SHIFT, WIDTH) \ |
||
| 128 | AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX90A_ ## NAME, SHIFT, WIDTH) |
||
| 129 | enum : int32_t { |
||
| 130 | COMPUTE_PGM_RSRC3_GFX90A(ACCUM_OFFSET, 0, 6), |
||
| 131 | COMPUTE_PGM_RSRC3_GFX90A(RESERVED0, 6, 10), |
||
| 132 | COMPUTE_PGM_RSRC3_GFX90A(TG_SPLIT, 16, 1), |
||
| 133 | COMPUTE_PGM_RSRC3_GFX90A(RESERVED1, 17, 15), |
||
| 134 | }; |
||
| 135 | #undef COMPUTE_PGM_RSRC3_GFX90A |
||
| 136 | |||
| 137 | // Compute program resource register 3 for GFX10+. Must match hardware |
||
| 138 | // definition. |
||
| 139 | #define COMPUTE_PGM_RSRC3_GFX10_PLUS(NAME, SHIFT, WIDTH) \ |
||
| 140 | AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_PLUS_ ## NAME, SHIFT, WIDTH) |
||
| 141 | enum : int32_t { |
||
| 142 | COMPUTE_PGM_RSRC3_GFX10_PLUS(SHARED_VGPR_COUNT, 0, 4), // GFX10+ |
||
| 143 | COMPUTE_PGM_RSRC3_GFX10_PLUS(INST_PREF_SIZE, 4, 6), // GFX11+ |
||
| 144 | COMPUTE_PGM_RSRC3_GFX10_PLUS(TRAP_ON_START, 10, 1), // GFX11+ |
||
| 145 | COMPUTE_PGM_RSRC3_GFX10_PLUS(TRAP_ON_END, 11, 1), // GFX11+ |
||
| 146 | COMPUTE_PGM_RSRC3_GFX10_PLUS(RESERVED0, 12, 19), |
||
| 147 | COMPUTE_PGM_RSRC3_GFX10_PLUS(IMAGE_OP, 31, 1), // GFX11+ |
||
| 148 | }; |
||
| 149 | #undef COMPUTE_PGM_RSRC3_GFX10_PLUS |
||
| 150 | |||
| 151 | // Kernel code properties. Must be kept backwards compatible. |
||
| 152 | #define KERNEL_CODE_PROPERTY(NAME, SHIFT, WIDTH) \ |
||
| 153 | AMDHSA_BITS_ENUM_ENTRY(KERNEL_CODE_PROPERTY_ ## NAME, SHIFT, WIDTH) |
||
| 154 | enum : int32_t { |
||
| 155 | KERNEL_CODE_PROPERTY(ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 0, 1), |
||
| 156 | KERNEL_CODE_PROPERTY(ENABLE_SGPR_DISPATCH_PTR, 1, 1), |
||
| 157 | KERNEL_CODE_PROPERTY(ENABLE_SGPR_QUEUE_PTR, 2, 1), |
||
| 158 | KERNEL_CODE_PROPERTY(ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3, 1), |
||
| 159 | KERNEL_CODE_PROPERTY(ENABLE_SGPR_DISPATCH_ID, 4, 1), |
||
| 160 | KERNEL_CODE_PROPERTY(ENABLE_SGPR_FLAT_SCRATCH_INIT, 5, 1), |
||
| 161 | KERNEL_CODE_PROPERTY(ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 6, 1), |
||
| 162 | KERNEL_CODE_PROPERTY(RESERVED0, 7, 3), |
||
| 163 | KERNEL_CODE_PROPERTY(ENABLE_WAVEFRONT_SIZE32, 10, 1), // GFX10+ |
||
| 164 | KERNEL_CODE_PROPERTY(USES_DYNAMIC_STACK, 11, 1), |
||
| 165 | KERNEL_CODE_PROPERTY(RESERVED1, 12, 4), |
||
| 166 | }; |
||
| 167 | #undef KERNEL_CODE_PROPERTY |
||
| 168 | |||
| 169 | // Kernel descriptor. Must be kept backwards compatible. |
||
| 170 | struct kernel_descriptor_t { |
||
| 171 | uint32_t group_segment_fixed_size; |
||
| 172 | uint32_t private_segment_fixed_size; |
||
| 173 | uint32_t kernarg_size; |
||
| 174 | uint8_t reserved0[4]; |
||
| 175 | int64_t kernel_code_entry_byte_offset; |
||
| 176 | uint8_t reserved1[20]; |
||
| 177 | uint32_t compute_pgm_rsrc3; // GFX10+ and GFX90A+ |
||
| 178 | uint32_t compute_pgm_rsrc1; |
||
| 179 | uint32_t compute_pgm_rsrc2; |
||
| 180 | uint16_t kernel_code_properties; |
||
| 181 | uint8_t reserved2[6]; |
||
| 182 | }; |
||
| 183 | |||
| 184 | enum : uint32_t { |
||
| 185 | GROUP_SEGMENT_FIXED_SIZE_OFFSET = 0, |
||
| 186 | PRIVATE_SEGMENT_FIXED_SIZE_OFFSET = 4, |
||
| 187 | KERNARG_SIZE_OFFSET = 8, |
||
| 188 | RESERVED0_OFFSET = 12, |
||
| 189 | KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET = 16, |
||
| 190 | RESERVED1_OFFSET = 24, |
||
| 191 | COMPUTE_PGM_RSRC3_OFFSET = 44, |
||
| 192 | COMPUTE_PGM_RSRC1_OFFSET = 48, |
||
| 193 | COMPUTE_PGM_RSRC2_OFFSET = 52, |
||
| 194 | KERNEL_CODE_PROPERTIES_OFFSET = 56, |
||
| 195 | RESERVED2_OFFSET = 58, |
||
| 196 | }; |
||
| 197 | |||
| 198 | static_assert( |
||
| 199 | sizeof(kernel_descriptor_t) == 64, |
||
| 200 | "invalid size for kernel_descriptor_t"); |
||
| 201 | static_assert(offsetof(kernel_descriptor_t, group_segment_fixed_size) == |
||
| 202 | GROUP_SEGMENT_FIXED_SIZE_OFFSET, |
||
| 203 | "invalid offset for group_segment_fixed_size"); |
||
| 204 | static_assert(offsetof(kernel_descriptor_t, private_segment_fixed_size) == |
||
| 205 | PRIVATE_SEGMENT_FIXED_SIZE_OFFSET, |
||
| 206 | "invalid offset for private_segment_fixed_size"); |
||
| 207 | static_assert(offsetof(kernel_descriptor_t, kernarg_size) == |
||
| 208 | KERNARG_SIZE_OFFSET, |
||
| 209 | "invalid offset for kernarg_size"); |
||
| 210 | static_assert(offsetof(kernel_descriptor_t, reserved0) == RESERVED0_OFFSET, |
||
| 211 | "invalid offset for reserved0"); |
||
| 212 | static_assert(offsetof(kernel_descriptor_t, kernel_code_entry_byte_offset) == |
||
| 213 | KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET, |
||
| 214 | "invalid offset for kernel_code_entry_byte_offset"); |
||
| 215 | static_assert(offsetof(kernel_descriptor_t, reserved1) == RESERVED1_OFFSET, |
||
| 216 | "invalid offset for reserved1"); |
||
| 217 | static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc3) == |
||
| 218 | COMPUTE_PGM_RSRC3_OFFSET, |
||
| 219 | "invalid offset for compute_pgm_rsrc3"); |
||
| 220 | static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc1) == |
||
| 221 | COMPUTE_PGM_RSRC1_OFFSET, |
||
| 222 | "invalid offset for compute_pgm_rsrc1"); |
||
| 223 | static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc2) == |
||
| 224 | COMPUTE_PGM_RSRC2_OFFSET, |
||
| 225 | "invalid offset for compute_pgm_rsrc2"); |
||
| 226 | static_assert(offsetof(kernel_descriptor_t, kernel_code_properties) == |
||
| 227 | KERNEL_CODE_PROPERTIES_OFFSET, |
||
| 228 | "invalid offset for kernel_code_properties"); |
||
| 229 | static_assert(offsetof(kernel_descriptor_t, reserved2) == RESERVED2_OFFSET, |
||
| 230 | "invalid offset for reserved2"); |
||
| 231 | |||
| 232 | } // end namespace amdhsa |
||
| 233 | } // end namespace llvm |
||
| 234 | |||
| 235 | #endif // LLVM_SUPPORT_AMDHSAKERNELDESCRIPTOR_H |