Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. /*===---- tmmintrin.h - Implementation of SSSE3 intrinsics on PowerPC ------===
  2.  *
  3.  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4.  * See https://llvm.org/LICENSE.txt for license information.
  5.  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6.  *
  7.  *===-----------------------------------------------------------------------===
  8.  */
  9.  
  10. /* Implemented from the specification included in the Intel C++ Compiler
  11.    User Guide and Reference, version 9.0.  */
  12.  
  13. #ifndef NO_WARN_X86_INTRINSICS
  14. /* This header is distributed to simplify porting x86_64 code that
  15.    makes explicit use of Intel intrinsics to powerpc64le.
  16.  
  17.    It is the user's responsibility to determine if the results are
  18.    acceptable and make additional changes as necessary.
  19.  
  20.    Note that much code that uses Intel intrinsics can be rewritten in
  21.    standard C or GNU C extensions, which are more portable and better
  22.    optimized across multiple targets.  */
  23. #endif
  24.  
  25. #ifndef TMMINTRIN_H_
  26. #define TMMINTRIN_H_
  27.  
  28. #if defined(__powerpc64__) &&                                                  \
  29.     (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX))
  30.  
  31. #include <altivec.h>
  32.  
  33. /* We need definitions from the SSE header files.  */
  34. #include <pmmintrin.h>
  35.  
  36. extern __inline __m128i
  37.     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  38.     _mm_abs_epi16(__m128i __A) {
  39.   return (__m128i)vec_abs((__v8hi)__A);
  40. }
  41.  
  42. extern __inline __m128i
  43.     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  44.     _mm_abs_epi32(__m128i __A) {
  45.   return (__m128i)vec_abs((__v4si)__A);
  46. }
  47.  
  48. extern __inline __m128i
  49.     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  50.     _mm_abs_epi8(__m128i __A) {
  51.   return (__m128i)vec_abs((__v16qi)__A);
  52. }
  53.  
  54. extern __inline __m64
  55.     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  56.     _mm_abs_pi16(__m64 __A) {
  57.   __v8hi __B = (__v8hi)(__v2du){__A, __A};
  58.   return (__m64)((__v2du)vec_abs(__B))[0];
  59. }
  60.  
  61. extern __inline __m64
  62.     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  63.     _mm_abs_pi32(__m64 __A) {
  64.   __v4si __B = (__v4si)(__v2du){__A, __A};
  65.   return (__m64)((__v2du)vec_abs(__B))[0];
  66. }
  67.  
  68. extern __inline __m64
  69.     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  70.     _mm_abs_pi8(__m64 __A) {
  71.   __v16qi __B = (__v16qi)(__v2du){__A, __A};
  72.   return (__m64)((__v2du)vec_abs(__B))[0];
  73. }
  74.  
  75. extern __inline __m128i
  76.     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  77.     _mm_alignr_epi8(__m128i __A, __m128i __B, const unsigned int __count) {
  78.   if (__builtin_constant_p(__count) && __count < 16) {
  79. #ifdef __LITTLE_ENDIAN__
  80.     __A = (__m128i)vec_reve((__v16qu)__A);
  81.     __B = (__m128i)vec_reve((__v16qu)__B);
  82. #endif
  83.     __A = (__m128i)vec_sld((__v16qu)__B, (__v16qu)__A, __count);
  84. #ifdef __LITTLE_ENDIAN__
  85.     __A = (__m128i)vec_reve((__v16qu)__A);
  86. #endif
  87.     return __A;
  88.   }
  89.  
  90.   if (__count == 0)
  91.     return __B;
  92.  
  93.   if (__count >= 16) {
  94.     if (__count >= 32) {
  95.       const __v16qu __zero = {0};
  96.       return (__m128i)__zero;
  97.     } else {
  98.       const __v16qu __shift = vec_splats((unsigned char)((__count - 16) * 8));
  99. #ifdef __LITTLE_ENDIAN__
  100.       return (__m128i)vec_sro((__v16qu)__A, __shift);
  101. #else
  102.       return (__m128i)vec_slo((__v16qu)__A, __shift);
  103. #endif
  104.     }
  105.   } else {
  106.     const __v16qu __shiftA = vec_splats((unsigned char)((16 - __count) * 8));
  107.     const __v16qu __shiftB = vec_splats((unsigned char)(__count * 8));
  108. #ifdef __LITTLE_ENDIAN__
  109.     __A = (__m128i)vec_slo((__v16qu)__A, __shiftA);
  110.     __B = (__m128i)vec_sro((__v16qu)__B, __shiftB);
  111. #else
  112.     __A = (__m128i)vec_sro((__v16qu)__A, __shiftA);
  113.     __B = (__m128i)vec_slo((__v16qu)__B, __shiftB);
  114. #endif
  115.     return (__m128i)vec_or((__v16qu)__A, (__v16qu)__B);
  116.   }
  117. }
  118.  
  119. extern __inline __m64
  120.     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  121.     _mm_alignr_pi8(__m64 __A, __m64 __B, unsigned int __count) {
  122.   if (__count < 16) {
  123.     __v2du __C = {__B, __A};
  124. #ifdef __LITTLE_ENDIAN__
  125.     const __v4su __shift = {__count << 3, 0, 0, 0};
  126.     __C = (__v2du)vec_sro((__v16qu)__C, (__v16qu)__shift);
  127. #else
  128.     const __v4su __shift = {0, 0, 0, __count << 3};
  129.     __C = (__v2du)vec_slo((__v16qu)__C, (__v16qu)__shift);
  130. #endif
  131.     return (__m64)__C[0];
  132.   } else {
  133.     const __m64 __zero = {0};
  134.     return __zero;
  135.   }
  136. }
  137.  
  138. extern __inline __m128i
  139.     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  140.     _mm_hadd_epi16(__m128i __A, __m128i __B) {
  141.   const __v16qu __P = {0,  1,  4,  5,  8,  9,  12, 13,
  142.                        16, 17, 20, 21, 24, 25, 28, 29};
  143.   const __v16qu __Q = {2,  3,  6,  7,  10, 11, 14, 15,
  144.                        18, 19, 22, 23, 26, 27, 30, 31};
  145.   __v8hi __C = vec_perm((__v8hi)__A, (__v8hi)__B, __P);
  146.   __v8hi __D = vec_perm((__v8hi)__A, (__v8hi)__B, __Q);
  147.   return (__m128i)vec_add(__C, __D);
  148. }
  149.  
  150. extern __inline __m128i
  151.     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  152.     _mm_hadd_epi32(__m128i __A, __m128i __B) {
  153.   const __v16qu __P = {0,  1,  2,  3,  8,  9,  10, 11,
  154.                        16, 17, 18, 19, 24, 25, 26, 27};
  155.   const __v16qu __Q = {4,  5,  6,  7,  12, 13, 14, 15,
  156.                        20, 21, 22, 23, 28, 29, 30, 31};
  157.   __v4si __C = vec_perm((__v4si)__A, (__v4si)__B, __P);
  158.   __v4si __D = vec_perm((__v4si)__A, (__v4si)__B, __Q);
  159.   return (__m128i)vec_add(__C, __D);
  160. }
  161.  
  162. extern __inline __m64
  163.     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  164.     _mm_hadd_pi16(__m64 __A, __m64 __B) {
  165.   __v8hi __C = (__v8hi)(__v2du){__A, __B};
  166.   const __v16qu __P = {0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13};
  167.   const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15};
  168.   __v8hi __D = vec_perm(__C, __C, __Q);
  169.   __C = vec_perm(__C, __C, __P);
  170.   __C = vec_add(__C, __D);
  171.   return (__m64)((__v2du)__C)[1];
  172. }
  173.  
  174. extern __inline __m64
  175.     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  176.     _mm_hadd_pi32(__m64 __A, __m64 __B) {
  177.   __v4si __C = (__v4si)(__v2du){__A, __B};
  178.   const __v16qu __P = {0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 8, 9, 10, 11};
  179.   const __v16qu __Q = {4, 5, 6, 7, 12, 13, 14, 15, 4, 5, 6, 7, 12, 13, 14, 15};
  180.   __v4si __D = vec_perm(__C, __C, __Q);
  181.   __C = vec_perm(__C, __C, __P);
  182.   __C = vec_add(__C, __D);
  183.   return (__m64)((__v2du)__C)[1];
  184. }
  185.  
  186. extern __inline __m128i
  187.     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  188.     _mm_hadds_epi16(__m128i __A, __m128i __B) {
  189.   __v4si __C = {0}, __D = {0};
  190.   __C = vec_sum4s((__v8hi)__A, __C);
  191.   __D = vec_sum4s((__v8hi)__B, __D);
  192.   __C = (__v4si)vec_packs(__C, __D);
  193.   return (__m128i)__C;
  194. }
  195.  
  196. extern __inline __m64
  197.     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  198.     _mm_hadds_pi16(__m64 __A, __m64 __B) {
  199.   const __v4si __zero = {0};
  200.   __v8hi __C = (__v8hi)(__v2du){__A, __B};
  201.   __v4si __D = vec_sum4s(__C, __zero);
  202.   __C = vec_packs(__D, __D);
  203.   return (__m64)((__v2du)__C)[1];
  204. }
  205.  
  206. extern __inline __m128i
  207.     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  208.     _mm_hsub_epi16(__m128i __A, __m128i __B) {
  209.   const __v16qu __P = {0,  1,  4,  5,  8,  9,  12, 13,
  210.                        16, 17, 20, 21, 24, 25, 28, 29};
  211.   const __v16qu __Q = {2,  3,  6,  7,  10, 11, 14, 15,
  212.                        18, 19, 22, 23, 26, 27, 30, 31};
  213.   __v8hi __C = vec_perm((__v8hi)__A, (__v8hi)__B, __P);
  214.   __v8hi __D = vec_perm((__v8hi)__A, (__v8hi)__B, __Q);
  215.   return (__m128i)vec_sub(__C, __D);
  216. }
  217.  
  218. extern __inline __m128i
  219.     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  220.     _mm_hsub_epi32(__m128i __A, __m128i __B) {
  221.   const __v16qu __P = {0,  1,  2,  3,  8,  9,  10, 11,
  222.                        16, 17, 18, 19, 24, 25, 26, 27};
  223.   const __v16qu __Q = {4,  5,  6,  7,  12, 13, 14, 15,
  224.                        20, 21, 22, 23, 28, 29, 30, 31};
  225.   __v4si __C = vec_perm((__v4si)__A, (__v4si)__B, __P);
  226.   __v4si __D = vec_perm((__v4si)__A, (__v4si)__B, __Q);
  227.   return (__m128i)vec_sub(__C, __D);
  228. }
  229.  
  230. extern __inline __m64
  231.     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  232.     _mm_hsub_pi16(__m64 __A, __m64 __B) {
  233.   const __v16qu __P = {0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13};
  234.   const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15};
  235.   __v8hi __C = (__v8hi)(__v2du){__A, __B};
  236.   __v8hi __D = vec_perm(__C, __C, __Q);
  237.   __C = vec_perm(__C, __C, __P);
  238.   __C = vec_sub(__C, __D);
  239.   return (__m64)((__v2du)__C)[1];
  240. }
  241.  
  242. extern __inline __m64
  243.     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  244.     _mm_hsub_pi32(__m64 __A, __m64 __B) {
  245.   const __v16qu __P = {0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 8, 9, 10, 11};
  246.   const __v16qu __Q = {4, 5, 6, 7, 12, 13, 14, 15, 4, 5, 6, 7, 12, 13, 14, 15};
  247.   __v4si __C = (__v4si)(__v2du){__A, __B};
  248.   __v4si __D = vec_perm(__C, __C, __Q);
  249.   __C = vec_perm(__C, __C, __P);
  250.   __C = vec_sub(__C, __D);
  251.   return (__m64)((__v2du)__C)[1];
  252. }
  253.  
  254. extern __inline __m128i
  255.     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  256.     _mm_hsubs_epi16(__m128i __A, __m128i __B) {
  257.   const __v16qu __P = {0,  1,  4,  5,  8,  9,  12, 13,
  258.                        16, 17, 20, 21, 24, 25, 28, 29};
  259.   const __v16qu __Q = {2,  3,  6,  7,  10, 11, 14, 15,
  260.                        18, 19, 22, 23, 26, 27, 30, 31};
  261.   __v8hi __C = vec_perm((__v8hi)__A, (__v8hi)__B, __P);
  262.   __v8hi __D = vec_perm((__v8hi)__A, (__v8hi)__B, __Q);
  263.   return (__m128i)vec_subs(__C, __D);
  264. }
  265.  
  266. extern __inline __m64
  267.     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  268.     _mm_hsubs_pi16(__m64 __A, __m64 __B) {
  269.   const __v16qu __P = {0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13};
  270.   const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15};
  271.   __v8hi __C = (__v8hi)(__v2du){__A, __B};
  272.   __v8hi __D = vec_perm(__C, __C, __P);
  273.   __v8hi __E = vec_perm(__C, __C, __Q);
  274.   __C = vec_subs(__D, __E);
  275.   return (__m64)((__v2du)__C)[1];
  276. }
  277.  
  278. extern __inline __m128i
  279.     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  280.     _mm_shuffle_epi8(__m128i __A, __m128i __B) {
  281.   const __v16qi __zero = {0};
  282.   __vector __bool char __select = vec_cmplt((__v16qi)__B, __zero);
  283.   __v16qi __C = vec_perm((__v16qi)__A, (__v16qi)__A, (__v16qu)__B);
  284.   return (__m128i)vec_sel(__C, __zero, __select);
  285. }
  286.  
  287. extern __inline __m64
  288.     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  289.     _mm_shuffle_pi8(__m64 __A, __m64 __B) {
  290.   const __v16qi __zero = {0};
  291.   __v16qi __C = (__v16qi)(__v2du){__A, __A};
  292.   __v16qi __D = (__v16qi)(__v2du){__B, __B};
  293.   __vector __bool char __select = vec_cmplt((__v16qi)__D, __zero);
  294.   __C = vec_perm((__v16qi)__C, (__v16qi)__C, (__v16qu)__D);
  295.   __C = vec_sel(__C, __zero, __select);
  296.   return (__m64)((__v2du)(__C))[0];
  297. }
  298.  
  299. #ifdef _ARCH_PWR8
  300. extern __inline __m128i
  301.     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  302.     _mm_sign_epi8(__m128i __A, __m128i __B) {
  303.   const __v16qi __zero = {0};
  304.   __v16qi __selectneg = (__v16qi)vec_cmplt((__v16qi)__B, __zero);
  305.   __v16qi __selectpos =
  306.       (__v16qi)vec_neg((__v16qi)vec_cmpgt((__v16qi)__B, __zero));
  307.   __v16qi __conv = vec_add(__selectneg, __selectpos);
  308.   return (__m128i)vec_mul((__v16qi)__A, (__v16qi)__conv);
  309. }
  310. #endif
  311.  
  312. #ifdef _ARCH_PWR8
  313. extern __inline __m128i
  314.     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  315.     _mm_sign_epi16(__m128i __A, __m128i __B) {
  316.   const __v8hi __zero = {0};
  317.   __v8hi __selectneg = (__v8hi)vec_cmplt((__v8hi)__B, __zero);
  318.   __v8hi __selectpos = (__v8hi)vec_neg((__v8hi)vec_cmpgt((__v8hi)__B, __zero));
  319.   __v8hi __conv = vec_add(__selectneg, __selectpos);
  320.   return (__m128i)vec_mul((__v8hi)__A, (__v8hi)__conv);
  321. }
  322. #endif
  323.  
  324. #ifdef _ARCH_PWR8
  325. extern __inline __m128i
  326.     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  327.     _mm_sign_epi32(__m128i __A, __m128i __B) {
  328.   const __v4si __zero = {0};
  329.   __v4si __selectneg = (__v4si)vec_cmplt((__v4si)__B, __zero);
  330.   __v4si __selectpos = (__v4si)vec_neg((__v4si)vec_cmpgt((__v4si)__B, __zero));
  331.   __v4si __conv = vec_add(__selectneg, __selectpos);
  332.   return (__m128i)vec_mul((__v4si)__A, (__v4si)__conv);
  333. }
  334. #endif
  335.  
  336. #ifdef _ARCH_PWR8
  337. extern __inline __m64
  338.     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  339.     _mm_sign_pi8(__m64 __A, __m64 __B) {
  340.   const __v16qi __zero = {0};
  341.   __v16qi __C = (__v16qi)(__v2du){__A, __A};
  342.   __v16qi __D = (__v16qi)(__v2du){__B, __B};
  343.   __C = (__v16qi)_mm_sign_epi8((__m128i)__C, (__m128i)__D);
  344.   return (__m64)((__v2du)(__C))[0];
  345. }
  346. #endif
  347.  
  348. #ifdef _ARCH_PWR8
  349. extern __inline __m64
  350.     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  351.     _mm_sign_pi16(__m64 __A, __m64 __B) {
  352.   const __v8hi __zero = {0};
  353.   __v8hi __C = (__v8hi)(__v2du){__A, __A};
  354.   __v8hi __D = (__v8hi)(__v2du){__B, __B};
  355.   __C = (__v8hi)_mm_sign_epi16((__m128i)__C, (__m128i)__D);
  356.   return (__m64)((__v2du)(__C))[0];
  357. }
  358. #endif
  359.  
  360. #ifdef _ARCH_PWR8
  361. extern __inline __m64
  362.     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  363.     _mm_sign_pi32(__m64 __A, __m64 __B) {
  364.   const __v4si __zero = {0};
  365.   __v4si __C = (__v4si)(__v2du){__A, __A};
  366.   __v4si __D = (__v4si)(__v2du){__B, __B};
  367.   __C = (__v4si)_mm_sign_epi32((__m128i)__C, (__m128i)__D);
  368.   return (__m64)((__v2du)(__C))[0];
  369. }
  370. #endif
  371.  
  372. extern __inline __m128i
  373.     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  374.     _mm_maddubs_epi16(__m128i __A, __m128i __B) {
  375.   __v8hi __unsigned = vec_splats((signed short)0x00ff);
  376.   __v8hi __C = vec_and(vec_unpackh((__v16qi)__A), __unsigned);
  377.   __v8hi __D = vec_and(vec_unpackl((__v16qi)__A), __unsigned);
  378.   __v8hi __E = vec_unpackh((__v16qi)__B);
  379.   __v8hi __F = vec_unpackl((__v16qi)__B);
  380.   __C = vec_mul(__C, __E);
  381.   __D = vec_mul(__D, __F);
  382.   const __v16qu __odds = {0,  1,  4,  5,  8,  9,  12, 13,
  383.                           16, 17, 20, 21, 24, 25, 28, 29};
  384.   const __v16qu __evens = {2,  3,  6,  7,  10, 11, 14, 15,
  385.                            18, 19, 22, 23, 26, 27, 30, 31};
  386.   __E = vec_perm(__C, __D, __odds);
  387.   __F = vec_perm(__C, __D, __evens);
  388.   return (__m128i)vec_adds(__E, __F);
  389. }
  390.  
  391. extern __inline __m64
  392.     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  393.     _mm_maddubs_pi16(__m64 __A, __m64 __B) {
  394.   __v8hi __C = (__v8hi)(__v2du){__A, __A};
  395.   __C = vec_unpackl((__v16qi)__C);
  396.   const __v8hi __unsigned = vec_splats((signed short)0x00ff);
  397.   __C = vec_and(__C, __unsigned);
  398.   __v8hi __D = (__v8hi)(__v2du){__B, __B};
  399.   __D = vec_unpackl((__v16qi)__D);
  400.   __D = vec_mul(__C, __D);
  401.   const __v16qu __odds = {0,  1,  4,  5,  8,  9,  12, 13,
  402.                           16, 17, 20, 21, 24, 25, 28, 29};
  403.   const __v16qu __evens = {2,  3,  6,  7,  10, 11, 14, 15,
  404.                            18, 19, 22, 23, 26, 27, 30, 31};
  405.   __C = vec_perm(__D, __D, __odds);
  406.   __D = vec_perm(__D, __D, __evens);
  407.   __C = vec_adds(__C, __D);
  408.   return (__m64)((__v2du)(__C))[0];
  409. }
  410.  
  411. extern __inline __m128i
  412.     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  413.     _mm_mulhrs_epi16(__m128i __A, __m128i __B) {
  414.   __v4si __C = vec_unpackh((__v8hi)__A);
  415.   __v4si __D = vec_unpackh((__v8hi)__B);
  416.   __C = vec_mul(__C, __D);
  417.   __D = vec_unpackl((__v8hi)__A);
  418.   __v4si __E = vec_unpackl((__v8hi)__B);
  419.   __D = vec_mul(__D, __E);
  420.   const __v4su __shift = vec_splats((unsigned int)14);
  421.   __C = vec_sr(__C, __shift);
  422.   __D = vec_sr(__D, __shift);
  423.   const __v4si __ones = vec_splats((signed int)1);
  424.   __C = vec_add(__C, __ones);
  425.   __C = vec_sr(__C, (__v4su)__ones);
  426.   __D = vec_add(__D, __ones);
  427.   __D = vec_sr(__D, (__v4su)__ones);
  428.   return (__m128i)vec_pack(__C, __D);
  429. }
  430.  
  431. extern __inline __m64
  432.     __attribute__((__gnu_inline__, __always_inline__, __artificial__))
  433.     _mm_mulhrs_pi16(__m64 __A, __m64 __B) {
  434.   __v4si __C = (__v4si)(__v2du){__A, __A};
  435.   __C = vec_unpackh((__v8hi)__C);
  436.   __v4si __D = (__v4si)(__v2du){__B, __B};
  437.   __D = vec_unpackh((__v8hi)__D);
  438.   __C = vec_mul(__C, __D);
  439.   const __v4su __shift = vec_splats((unsigned int)14);
  440.   __C = vec_sr(__C, __shift);
  441.   const __v4si __ones = vec_splats((signed int)1);
  442.   __C = vec_add(__C, __ones);
  443.   __C = vec_sr(__C, (__v4su)__ones);
  444.   __v8hi __E = vec_pack(__C, __D);
  445.   return (__m64)((__v2du)(__E))[0];
  446. }
  447.  
  448. #else
  449. #include_next <tmmintrin.h>
  450. #endif /* defined(__powerpc64__) &&                                            \
  451.         *   (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) */
  452.  
  453. #endif /* TMMINTRIN_H_ */
  454.