Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. /*===---- cuda_builtin_vars.h - CUDA built-in variables ---------------------===
  2.  *
  3.  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4.  * See https://llvm.org/LICENSE.txt for license information.
  5.  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6.  *
  7.  *===-----------------------------------------------------------------------===
  8.  */
  9.  
  10. #ifndef __CUDA_BUILTIN_VARS_H
  11. #define __CUDA_BUILTIN_VARS_H
  12.  
  13. // Forward declares from vector_types.h.
  14. struct uint3;
  15. struct dim3;
  16.  
  17. // The file implements built-in CUDA variables using __declspec(property).
  18. // https://msdn.microsoft.com/en-us/library/yhfk0thd.aspx
  19. // All read accesses of built-in variable fields get converted into calls to a
  20. // getter function which in turn calls the appropriate builtin to fetch the
  21. // value.
  22. //
  23. // Example:
  24. //    int x = threadIdx.x;
  25. // IR output:
  26. //  %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #3
  27. // PTX output:
  28. //  mov.u32     %r2, %tid.x;
  29.  
  30. #define __CUDA_DEVICE_BUILTIN(FIELD, INTRINSIC)                                \
  31.   __declspec(property(get = __fetch_builtin_##FIELD)) unsigned int FIELD;      \
  32.   static inline __attribute__((always_inline))                                 \
  33.       __attribute__((device)) unsigned int __fetch_builtin_##FIELD(void) {     \
  34.     return INTRINSIC;                                                          \
  35.   }
  36.  
  37. #if __cplusplus >= 201103L
  38. #define __DELETE =delete
  39. #else
  40. #define __DELETE
  41. #endif
  42.  
  43. // Make sure nobody can create instances of the special variable types.  nvcc
  44. // also disallows taking address of special variables, so we disable address-of
  45. // operator as well.
  46. #define __CUDA_DISALLOW_BUILTINVAR_ACCESS(TypeName)                            \
  47.   __attribute__((device)) TypeName() __DELETE;                                 \
  48.   __attribute__((device)) TypeName(const TypeName &) __DELETE;                 \
  49.   __attribute__((device)) void operator=(const TypeName &) const __DELETE;     \
  50.   __attribute__((device)) TypeName *operator&() const __DELETE
  51.  
  52. struct __cuda_builtin_threadIdx_t {
  53.   __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_tid_x());
  54.   __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_tid_y());
  55.   __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_tid_z());
  56.   // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a
  57.   // uint3).  This function is defined after we pull in vector_types.h.
  58.   __attribute__((device)) operator dim3() const;
  59.   __attribute__((device)) operator uint3() const;
  60.  
  61. private:
  62.   __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_threadIdx_t);
  63. };
  64.  
  65. struct __cuda_builtin_blockIdx_t {
  66.   __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ctaid_x());
  67.   __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ctaid_y());
  68.   __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ctaid_z());
  69.   // blockIdx should be convertible to uint3 (in fact in nvcc, it *is* a
  70.   // uint3).  This function is defined after we pull in vector_types.h.
  71.   __attribute__((device)) operator dim3() const;
  72.   __attribute__((device)) operator uint3() const;
  73.  
  74. private:
  75.   __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockIdx_t);
  76. };
  77.  
  78. struct __cuda_builtin_blockDim_t {
  79.   __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ntid_x());
  80.   __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ntid_y());
  81.   __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ntid_z());
  82.   // blockDim should be convertible to dim3 (in fact in nvcc, it *is* a
  83.   // dim3).  This function is defined after we pull in vector_types.h.
  84.   __attribute__((device)) operator dim3() const;
  85.   __attribute__((device)) operator uint3() const;
  86.  
  87. private:
  88.   __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockDim_t);
  89. };
  90.  
  91. struct __cuda_builtin_gridDim_t {
  92.   __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_nctaid_x());
  93.   __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_nctaid_y());
  94.   __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_nctaid_z());
  95.   // gridDim should be convertible to dim3 (in fact in nvcc, it *is* a
  96.   // dim3).  This function is defined after we pull in vector_types.h.
  97.   __attribute__((device)) operator dim3() const;
  98.   __attribute__((device)) operator uint3() const;
  99.  
  100. private:
  101.   __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_gridDim_t);
  102. };
  103.  
  104. #define __CUDA_BUILTIN_VAR                                                     \
  105.   extern const __attribute__((device)) __attribute__((weak))
  106. __CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx;
  107. __CUDA_BUILTIN_VAR __cuda_builtin_blockIdx_t blockIdx;
  108. __CUDA_BUILTIN_VAR __cuda_builtin_blockDim_t blockDim;
  109. __CUDA_BUILTIN_VAR __cuda_builtin_gridDim_t gridDim;
  110.  
  111. // warpSize should translate to read of %WARP_SZ but there's currently no
  112. // builtin to do so. According to PTX v4.2 docs 'to date, all target
  113. // architectures have a WARP_SZ value of 32'.
  114. __attribute__((device)) const int warpSize = 32;
  115.  
  116. #undef __CUDA_DEVICE_BUILTIN
  117. #undef __CUDA_BUILTIN_VAR
  118. #undef __CUDA_DISALLOW_BUILTINVAR_ACCESS
  119. #undef __DELETE
  120.  
  121. #endif /* __CUDA_BUILTIN_VARS_H */
  122.