Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. //====--- OMPGridValues.h - Language-specific address spaces --*- C++ -*-====//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. ///
  9. /// \file
  10. /// \brief Provides definitions for Target specific Grid Values
  11. ///
  12. //===----------------------------------------------------------------------===//
  13.  
  14. #ifndef LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H
  15. #define LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H
  16.  
  17. namespace llvm {
  18.  
  19. namespace omp {
  20.  
  21. /// \brief Defines various target-specific GPU grid values that must be
  22. ///        consistent between host RTL (plugin), device RTL, and clang.
  23. ///        We can change grid values for a "fat" binary so that different
  24. ///        passes get the correct values when generating code for a
  25. ///        multi-target binary. Both amdgcn and nvptx values are stored in
  26. ///        this file. In the future, should there be differences between GPUs
  27. ///        of the same architecture, then simply make a different array and
  28. ///        use the new array name.
  29. ///
  30. /// Example usage in clang:
  31. ///   const unsigned slot_size =
  32. ///   ctx.GetTargetInfo().getGridValue().GV_Warp_Size;
  33. ///
  34. /// Example usage in libomptarget/deviceRTLs:
  35. ///   #include "llvm/Frontend/OpenMP/OMPGridValues.h"
  36. ///   #ifdef __AMDGPU__
  37. ///     #define GRIDVAL AMDGPUGridValues
  38. ///   #else
  39. ///     #define GRIDVAL NVPTXGridValues
  40. ///   #endif
  41. ///   ... Then use this reference for GV_Warp_Size in the deviceRTL source.
  42. ///   llvm::omp::GRIDVAL().GV_Warp_Size
  43. ///
  44. /// Example usage in libomptarget hsa plugin:
  45. ///   #include "llvm/Frontend/OpenMP/OMPGridValues.h"
  46. ///   #define GRIDVAL AMDGPUGridValues
  47. ///   ... Then use this reference to access GV_Warp_Size in the hsa plugin.
  48. ///   llvm::omp::GRIDVAL().GV_Warp_Size
  49. ///
  50. /// Example usage in libomptarget cuda plugin:
  51. ///    #include "llvm/Frontend/OpenMP/OMPGridValues.h"
  52. ///    #define GRIDVAL NVPTXGridValues
  53. ///   ... Then use this reference to access GV_Warp_Size in the cuda plugin.
  54. ///    llvm::omp::GRIDVAL().GV_Warp_Size
  55. ///
  56.  
  57. struct GV {
  58.   /// The size reserved for data in a shared memory slot.
  59.   unsigned GV_Slot_Size;
  60.   /// The default value of maximum number of threads in a worker warp.
  61.   unsigned GV_Warp_Size;
  62.  
  63.   constexpr unsigned warpSlotSize() const {
  64.     return GV_Warp_Size * GV_Slot_Size;
  65.   }
  66.  
  67.   /// the maximum number of teams.
  68.   unsigned GV_Max_Teams;
  69.   // The default number of teams in the absence of any other information.
  70.   unsigned GV_Default_Num_Teams;
  71.  
  72.   // An alternative to the heavy data sharing infrastructure that uses global
  73.   // memory is one that uses device __shared__ memory.  The amount of such space
  74.   // (in bytes) reserved by the OpenMP runtime is noted here.
  75.   unsigned GV_SimpleBufferSize;
  76.   // The absolute maximum team size for a working group
  77.   unsigned GV_Max_WG_Size;
  78.   // The default maximum team size for a working group
  79.   unsigned GV_Default_WG_Size;
  80.  
  81.   constexpr unsigned maxWarpNumber() const {
  82.     return GV_Max_WG_Size / GV_Warp_Size;
  83.   }
  84. };
  85.  
  86. /// For AMDGPU GPUs
  87. static constexpr GV AMDGPUGridValues64 = {
  88.     256,  // GV_Slot_Size
  89.     64,   // GV_Warp_Size
  90.     (1 << 16), // GV_Max_Teams
  91.     440,  // GV_Default_Num_Teams
  92.     896,  // GV_SimpleBufferSize
  93.     1024, // GV_Max_WG_Size,
  94.     256,  // GV_Default_WG_Size
  95. };
  96.  
  97. static constexpr GV AMDGPUGridValues32 = {
  98.     256,  // GV_Slot_Size
  99.     32,   // GV_Warp_Size
  100.     (1 << 16), // GV_Max_Teams
  101.     440,  // GV_Default_Num_Teams
  102.     896,  // GV_SimpleBufferSize
  103.     1024, // GV_Max_WG_Size,
  104.     256,  // GV_Default_WG_Size
  105. };
  106.  
  107. template <unsigned wavesize> constexpr const GV &getAMDGPUGridValues() {
  108.   static_assert(wavesize == 32 || wavesize == 64, "Unexpected wavesize");
  109.   return wavesize == 32 ? AMDGPUGridValues32 : AMDGPUGridValues64;
  110. }
  111.  
  112. /// For Nvidia GPUs
  113. static constexpr GV NVPTXGridValues = {
  114.     256,  // GV_Slot_Size
  115.     32,   // GV_Warp_Size
  116.     (1 << 16), // GV_Max_Teams
  117.     3200, // GV_Default_Num_Teams
  118.     896,  // GV_SimpleBufferSize
  119.     1024, // GV_Max_WG_Size
  120.     128,  // GV_Default_WG_Size
  121. };
  122.  
  123. } // namespace omp
  124. } // namespace llvm
  125.  
  126. #endif // LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H
  127.