Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
14 | pmbaty | 1 | //====--- OMPGridValues.h - Language-specific address spaces --*- C++ -*-====// |
2 | // |
||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
4 | // See https://llvm.org/LICENSE.txt for license information. |
||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
6 | // |
||
7 | //===----------------------------------------------------------------------===// |
||
8 | /// |
||
9 | /// \file |
||
10 | /// \brief Provides definitions for Target specific Grid Values |
||
11 | /// |
||
12 | //===----------------------------------------------------------------------===// |
||
13 | |||
14 | #ifndef LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H |
||
15 | #define LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H |
||
16 | |||
17 | namespace llvm { |
||
18 | |||
19 | namespace omp { |
||
20 | |||
21 | /// \brief Defines various target-specific GPU grid values that must be |
||
22 | /// consistent between host RTL (plugin), device RTL, and clang. |
||
23 | /// We can change grid values for a "fat" binary so that different |
||
24 | /// passes get the correct values when generating code for a |
||
25 | /// multi-target binary. Both amdgcn and nvptx values are stored in |
||
26 | /// this file. In the future, should there be differences between GPUs |
||
27 | /// of the same architecture, then simply make a different array and |
||
28 | /// use the new array name. |
||
29 | /// |
||
30 | /// Example usage in clang: |
||
31 | /// const unsigned slot_size = |
||
32 | /// ctx.GetTargetInfo().getGridValue().GV_Warp_Size; |
||
33 | /// |
||
34 | /// Example usage in libomptarget/deviceRTLs: |
||
35 | /// #include "llvm/Frontend/OpenMP/OMPGridValues.h" |
||
36 | /// #ifdef __AMDGPU__ |
||
37 | /// #define GRIDVAL AMDGPUGridValues |
||
38 | /// #else |
||
39 | /// #define GRIDVAL NVPTXGridValues |
||
40 | /// #endif |
||
41 | /// ... Then use this reference for GV_Warp_Size in the deviceRTL source. |
||
42 | /// llvm::omp::GRIDVAL().GV_Warp_Size |
||
43 | /// |
||
44 | /// Example usage in libomptarget hsa plugin: |
||
45 | /// #include "llvm/Frontend/OpenMP/OMPGridValues.h" |
||
46 | /// #define GRIDVAL AMDGPUGridValues |
||
47 | /// ... Then use this reference to access GV_Warp_Size in the hsa plugin. |
||
48 | /// llvm::omp::GRIDVAL().GV_Warp_Size |
||
49 | /// |
||
50 | /// Example usage in libomptarget cuda plugin: |
||
51 | /// #include "llvm/Frontend/OpenMP/OMPGridValues.h" |
||
52 | /// #define GRIDVAL NVPTXGridValues |
||
53 | /// ... Then use this reference to access GV_Warp_Size in the cuda plugin. |
||
54 | /// llvm::omp::GRIDVAL().GV_Warp_Size |
||
55 | /// |
||
56 | |||
57 | struct GV { |
||
58 | /// The size reserved for data in a shared memory slot. |
||
59 | unsigned GV_Slot_Size; |
||
60 | /// The default value of maximum number of threads in a worker warp. |
||
61 | unsigned GV_Warp_Size; |
||
62 | |||
63 | constexpr unsigned warpSlotSize() const { |
||
64 | return GV_Warp_Size * GV_Slot_Size; |
||
65 | } |
||
66 | |||
67 | /// the maximum number of teams. |
||
68 | unsigned GV_Max_Teams; |
||
69 | // The default number of teams in the absence of any other information. |
||
70 | unsigned GV_Default_Num_Teams; |
||
71 | |||
72 | // An alternative to the heavy data sharing infrastructure that uses global |
||
73 | // memory is one that uses device __shared__ memory. The amount of such space |
||
74 | // (in bytes) reserved by the OpenMP runtime is noted here. |
||
75 | unsigned GV_SimpleBufferSize; |
||
76 | // The absolute maximum team size for a working group |
||
77 | unsigned GV_Max_WG_Size; |
||
78 | // The default maximum team size for a working group |
||
79 | unsigned GV_Default_WG_Size; |
||
80 | |||
81 | constexpr unsigned maxWarpNumber() const { |
||
82 | return GV_Max_WG_Size / GV_Warp_Size; |
||
83 | } |
||
84 | }; |
||
85 | |||
86 | /// For AMDGPU GPUs |
||
87 | static constexpr GV AMDGPUGridValues64 = { |
||
88 | 256, // GV_Slot_Size |
||
89 | 64, // GV_Warp_Size |
||
90 | (1 << 16), // GV_Max_Teams |
||
91 | 440, // GV_Default_Num_Teams |
||
92 | 896, // GV_SimpleBufferSize |
||
93 | 1024, // GV_Max_WG_Size, |
||
94 | 256, // GV_Default_WG_Size |
||
95 | }; |
||
96 | |||
97 | static constexpr GV AMDGPUGridValues32 = { |
||
98 | 256, // GV_Slot_Size |
||
99 | 32, // GV_Warp_Size |
||
100 | (1 << 16), // GV_Max_Teams |
||
101 | 440, // GV_Default_Num_Teams |
||
102 | 896, // GV_SimpleBufferSize |
||
103 | 1024, // GV_Max_WG_Size, |
||
104 | 256, // GV_Default_WG_Size |
||
105 | }; |
||
106 | |||
107 | template <unsigned wavesize> constexpr const GV &getAMDGPUGridValues() { |
||
108 | static_assert(wavesize == 32 || wavesize == 64, "Unexpected wavesize"); |
||
109 | return wavesize == 32 ? AMDGPUGridValues32 : AMDGPUGridValues64; |
||
110 | } |
||
111 | |||
112 | /// For Nvidia GPUs |
||
113 | static constexpr GV NVPTXGridValues = { |
||
114 | 256, // GV_Slot_Size |
||
115 | 32, // GV_Warp_Size |
||
116 | (1 << 16), // GV_Max_Teams |
||
117 | 3200, // GV_Default_Num_Teams |
||
118 | 896, // GV_SimpleBufferSize |
||
119 | 1024, // GV_Max_WG_Size |
||
120 | 128, // GV_Default_WG_Size |
||
121 | }; |
||
122 | |||
123 | } // namespace omp |
||
124 | } // namespace llvm |
||
125 | |||
126 | #endif // LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H |