Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. /*===---- f16cintrin.h - F16C intrinsics -----------------------------------===
  2.  *
  3.  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4.  * See https://llvm.org/LICENSE.txt for license information.
  5.  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6.  *
  7.  *===-----------------------------------------------------------------------===
  8.  */
  9.  
  10. #if !defined __IMMINTRIN_H
  11. #error "Never use <f16cintrin.h> directly; include <immintrin.h> instead."
  12. #endif
  13.  
  14. #ifndef __F16CINTRIN_H
  15. #define __F16CINTRIN_H
  16.  
  17. /* Define the default attributes for the functions in this file. */
  18. #define __DEFAULT_FN_ATTRS128 \
  19.   __attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(128)))
  20. #define __DEFAULT_FN_ATTRS256 \
  21.   __attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(256)))
  22.  
  23. /* NOTE: Intel documents the 128-bit versions of these as being in emmintrin.h,
  24.  * but that's because icc can emulate these without f16c using a library call.
  25.  * Since we don't do that let's leave these in f16cintrin.h.
  26.  */
  27.  
  28. /// Converts a 16-bit half-precision float value into a 32-bit float
  29. ///    value.
  30. ///
  31. /// \headerfile <x86intrin.h>
  32. ///
  33. /// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.
  34. ///
  35. /// \param __a
  36. ///    A 16-bit half-precision float value.
  37. /// \returns The converted 32-bit float value.
  38. static __inline float __DEFAULT_FN_ATTRS128
  39. _cvtsh_ss(unsigned short __a)
  40. {
  41.   __v8hi __v = {(short)__a, 0, 0, 0, 0, 0, 0, 0};
  42.   __v4sf __r = __builtin_ia32_vcvtph2ps(__v);
  43.   return __r[0];
  44. }
  45.  
  46. /// Converts a 32-bit single-precision float value to a 16-bit
  47. ///    half-precision float value.
  48. ///
  49. /// \headerfile <x86intrin.h>
  50. ///
  51. /// \code
  52. /// unsigned short _cvtss_sh(float a, const int imm);
  53. /// \endcode
  54. ///
  55. /// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.
  56. ///
  57. /// \param a
  58. ///    A 32-bit single-precision float value to be converted to a 16-bit
  59. ///    half-precision float value.
  60. /// \param imm
  61. ///    An immediate value controlling rounding using bits [2:0]: \n
  62. ///    000: Nearest \n
  63. ///    001: Down \n
  64. ///    010: Up \n
  65. ///    011: Truncate \n
  66. ///    1XX: Use MXCSR.RC for rounding
  67. /// \returns The converted 16-bit half-precision float value.
  68. #define _cvtss_sh(a, imm) __extension__ ({ \
  69.   (unsigned short)(((__v8hi)__builtin_ia32_vcvtps2ph((__v4sf){a, 0, 0, 0}, \
  70.                                                      (imm)))[0]); })
  71.  
  72. /// Converts a 128-bit vector containing 32-bit float values into a
  73. ///    128-bit vector containing 16-bit half-precision float values.
  74. ///
  75. /// \headerfile <x86intrin.h>
  76. ///
  77. /// \code
  78. /// __m128i _mm_cvtps_ph(__m128 a, const int imm);
  79. /// \endcode
  80. ///
  81. /// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.
  82. ///
  83. /// \param a
  84. ///    A 128-bit vector containing 32-bit float values.
  85. /// \param imm
  86. ///    An immediate value controlling rounding using bits [2:0]: \n
  87. ///    000: Nearest \n
  88. ///    001: Down \n
  89. ///    010: Up \n
  90. ///    011: Truncate \n
  91. ///    1XX: Use MXCSR.RC for rounding
  92. /// \returns A 128-bit vector containing converted 16-bit half-precision float
  93. ///    values. The lower 64 bits are used to store the converted 16-bit
  94. ///    half-precision floating-point values.
  95. #define _mm_cvtps_ph(a, imm) \
  96.   ((__m128i)__builtin_ia32_vcvtps2ph((__v4sf)(__m128)(a), (imm)))
  97.  
  98. /// Converts a 128-bit vector containing 16-bit half-precision float
  99. ///    values into a 128-bit vector containing 32-bit float values.
  100. ///
  101. /// \headerfile <x86intrin.h>
  102. ///
  103. /// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.
  104. ///
  105. /// \param __a
  106. ///    A 128-bit vector containing 16-bit half-precision float values. The lower
  107. ///    64 bits are used in the conversion.
  108. /// \returns A 128-bit vector of [4 x float] containing converted float values.
  109. static __inline __m128 __DEFAULT_FN_ATTRS128
  110. _mm_cvtph_ps(__m128i __a)
  111. {
  112.   return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__a);
  113. }
  114.  
  115. /// Converts a 256-bit vector of [8 x float] into a 128-bit vector
  116. ///    containing 16-bit half-precision float values.
  117. ///
  118. /// \headerfile <x86intrin.h>
  119. ///
  120. /// \code
  121. /// __m128i _mm256_cvtps_ph(__m256 a, const int imm);
  122. /// \endcode
  123. ///
  124. /// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.
  125. ///
  126. /// \param a
  127. ///    A 256-bit vector containing 32-bit single-precision float values to be
  128. ///    converted to 16-bit half-precision float values.
  129. /// \param imm
  130. ///    An immediate value controlling rounding using bits [2:0]: \n
  131. ///    000: Nearest \n
  132. ///    001: Down \n
  133. ///    010: Up \n
  134. ///    011: Truncate \n
  135. ///    1XX: Use MXCSR.RC for rounding
  136. /// \returns A 128-bit vector containing the converted 16-bit half-precision
  137. ///    float values.
  138. #define _mm256_cvtps_ph(a, imm) \
  139.  ((__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm)))
  140.  
  141. /// Converts a 128-bit vector containing 16-bit half-precision float
  142. ///    values into a 256-bit vector of [8 x float].
  143. ///
  144. /// \headerfile <x86intrin.h>
  145. ///
  146. /// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.
  147. ///
  148. /// \param __a
  149. ///    A 128-bit vector containing 16-bit half-precision float values to be
  150. ///    converted to 32-bit single-precision float values.
  151. /// \returns A vector of [8 x float] containing the converted 32-bit
  152. ///    single-precision float values.
  153. static __inline __m256 __DEFAULT_FN_ATTRS256
  154. _mm256_cvtph_ps(__m128i __a)
  155. {
  156.   return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);
  157. }
  158.  
  159. #undef __DEFAULT_FN_ATTRS128
  160. #undef __DEFAULT_FN_ATTRS256
  161.  
  162. #endif /* __F16CINTRIN_H */
  163.