Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. /*===---- mmintrin.h - MMX intrinsics --------------------------------------===
  2.  *
  3.  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4.  * See https://llvm.org/LICENSE.txt for license information.
  5.  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6.  *
  7.  *===-----------------------------------------------------------------------===
  8.  */
  9.  
  10. #ifndef __MMINTRIN_H
  11. #define __MMINTRIN_H
  12.  
  13. #if !defined(__i386__) && !defined(__x86_64__)
  14. #error "This header is only meant to be used on x86 and x64 architecture"
  15. #endif
  16.  
  17. typedef long long __m64 __attribute__((__vector_size__(8), __aligned__(8)));
  18.  
  19. typedef long long __v1di __attribute__((__vector_size__(8)));
  20. typedef int __v2si __attribute__((__vector_size__(8)));
  21. typedef short __v4hi __attribute__((__vector_size__(8)));
  22. typedef char __v8qi __attribute__((__vector_size__(8)));
  23.  
  24. /* Define the default attributes for the functions in this file. */
  25. #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mmx"), __min_vector_width__(64)))
  26.  
  27. /// Clears the MMX state by setting the state of the x87 stack registers
  28. ///    to empty.
  29. ///
  30. /// \headerfile <x86intrin.h>
  31. ///
  32. /// This intrinsic corresponds to the <c> EMMS </c> instruction.
  33. ///
  34. static __inline__ void  __attribute__((__always_inline__, __nodebug__, __target__("mmx")))
  35. _mm_empty(void)
  36. {
  37.     __builtin_ia32_emms();
  38. }
  39.  
  40. /// Constructs a 64-bit integer vector, setting the lower 32 bits to the
  41. ///    value of the 32-bit integer parameter and setting the upper 32 bits to 0.
  42. ///
  43. /// \headerfile <x86intrin.h>
  44. ///
  45. /// This intrinsic corresponds to the <c> MOVD </c> instruction.
  46. ///
  47. /// \param __i
  48. ///    A 32-bit integer value.
  49. /// \returns A 64-bit integer vector. The lower 32 bits contain the value of the
  50. ///    parameter. The upper 32 bits are set to 0.
  51. static __inline__ __m64 __DEFAULT_FN_ATTRS
  52. _mm_cvtsi32_si64(int __i)
  53. {
  54.     return (__m64)__builtin_ia32_vec_init_v2si(__i, 0);
  55. }
  56.  
  57. /// Returns the lower 32 bits of a 64-bit integer vector as a 32-bit
  58. ///    signed integer.
  59. ///
  60. /// \headerfile <x86intrin.h>
  61. ///
  62. /// This intrinsic corresponds to the <c> MOVD </c> instruction.
  63. ///
  64. /// \param __m
  65. ///    A 64-bit integer vector.
  66. /// \returns A 32-bit signed integer value containing the lower 32 bits of the
  67. ///    parameter.
  68. static __inline__ int __DEFAULT_FN_ATTRS
  69. _mm_cvtsi64_si32(__m64 __m)
  70. {
  71.     return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0);
  72. }
  73.  
  74. /// Casts a 64-bit signed integer value into a 64-bit integer vector.
  75. ///
  76. /// \headerfile <x86intrin.h>
  77. ///
  78. /// This intrinsic corresponds to the <c> MOVQ </c> instruction.
  79. ///
  80. /// \param __i
  81. ///    A 64-bit signed integer.
  82. /// \returns A 64-bit integer vector containing the same bitwise pattern as the
  83. ///    parameter.
  84. static __inline__ __m64 __DEFAULT_FN_ATTRS
  85. _mm_cvtsi64_m64(long long __i)
  86. {
  87.     return (__m64)__i;
  88. }
  89.  
  90. /// Casts a 64-bit integer vector into a 64-bit signed integer value.
  91. ///
  92. /// \headerfile <x86intrin.h>
  93. ///
  94. /// This intrinsic corresponds to the <c> MOVQ </c> instruction.
  95. ///
  96. /// \param __m
  97. ///    A 64-bit integer vector.
  98. /// \returns A 64-bit signed integer containing the same bitwise pattern as the
  99. ///    parameter.
  100. static __inline__ long long __DEFAULT_FN_ATTRS
  101. _mm_cvtm64_si64(__m64 __m)
  102. {
  103.     return (long long)__m;
  104. }
  105.  
  106. /// Converts 16-bit signed integers from both 64-bit integer vector
  107. ///    parameters of [4 x i16] into 8-bit signed integer values, and constructs
  108. ///    a 64-bit integer vector of [8 x i8] as the result. Positive values
  109. ///    greater than 0x7F are saturated to 0x7F. Negative values less than 0x80
  110. ///    are saturated to 0x80.
  111. ///
  112. /// \headerfile <x86intrin.h>
  113. ///
  114. /// This intrinsic corresponds to the <c> PACKSSWB </c> instruction.
  115. ///
  116. /// \param __m1
  117. ///    A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
  118. ///    16-bit signed integer and is converted to an 8-bit signed integer with
  119. ///    saturation. Positive values greater than 0x7F are saturated to 0x7F.
  120. ///    Negative values less than 0x80 are saturated to 0x80. The converted
  121. ///    [4 x i8] values are written to the lower 32 bits of the result.
  122. /// \param __m2
  123. ///    A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
  124. ///    16-bit signed integer and is converted to an 8-bit signed integer with
  125. ///    saturation. Positive values greater than 0x7F are saturated to 0x7F.
  126. ///    Negative values less than 0x80 are saturated to 0x80. The converted
  127. ///    [4 x i8] values are written to the upper 32 bits of the result.
  128. /// \returns A 64-bit integer vector of [8 x i8] containing the converted
  129. ///    values.
  130. static __inline__ __m64 __DEFAULT_FN_ATTRS
  131. _mm_packs_pi16(__m64 __m1, __m64 __m2)
  132. {
  133.     return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2);
  134. }
  135.  
  136. /// Converts 32-bit signed integers from both 64-bit integer vector
  137. ///    parameters of [2 x i32] into 16-bit signed integer values, and constructs
  138. ///    a 64-bit integer vector of [4 x i16] as the result. Positive values
  139. ///    greater than 0x7FFF are saturated to 0x7FFF. Negative values less than
  140. ///    0x8000 are saturated to 0x8000.
  141. ///
  142. /// \headerfile <x86intrin.h>
  143. ///
  144. /// This intrinsic corresponds to the <c> PACKSSDW </c> instruction.
  145. ///
  146. /// \param __m1
  147. ///    A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a
  148. ///    32-bit signed integer and is converted to a 16-bit signed integer with
  149. ///    saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF.
  150. ///    Negative values less than 0x8000 are saturated to 0x8000. The converted
  151. ///    [2 x i16] values are written to the lower 32 bits of the result.
  152. /// \param __m2
  153. ///    A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a
  154. ///    32-bit signed integer and is converted to a 16-bit signed integer with
  155. ///    saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF.
  156. ///    Negative values less than 0x8000 are saturated to 0x8000. The converted
  157. ///    [2 x i16] values are written to the upper 32 bits of the result.
  158. /// \returns A 64-bit integer vector of [4 x i16] containing the converted
  159. ///    values.
  160. static __inline__ __m64 __DEFAULT_FN_ATTRS
  161. _mm_packs_pi32(__m64 __m1, __m64 __m2)
  162. {
  163.     return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2);
  164. }
  165.  
  166. /// Converts 16-bit signed integers from both 64-bit integer vector
  167. ///    parameters of [4 x i16] into 8-bit unsigned integer values, and
  168. ///    constructs a 64-bit integer vector of [8 x i8] as the result. Values
  169. ///    greater than 0xFF are saturated to 0xFF. Values less than 0 are saturated
  170. ///    to 0.
  171. ///
  172. /// \headerfile <x86intrin.h>
  173. ///
  174. /// This intrinsic corresponds to the <c> PACKUSWB </c> instruction.
  175. ///
  176. /// \param __m1
  177. ///    A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
  178. ///    16-bit signed integer and is converted to an 8-bit unsigned integer with
  179. ///    saturation. Values greater than 0xFF are saturated to 0xFF. Values less
  180. ///    than 0 are saturated to 0. The converted [4 x i8] values are written to
  181. ///    the lower 32 bits of the result.
  182. /// \param __m2
  183. ///    A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
  184. ///    16-bit signed integer and is converted to an 8-bit unsigned integer with
  185. ///    saturation. Values greater than 0xFF are saturated to 0xFF. Values less
  186. ///    than 0 are saturated to 0. The converted [4 x i8] values are written to
  187. ///    the upper 32 bits of the result.
  188. /// \returns A 64-bit integer vector of [8 x i8] containing the converted
  189. ///    values.
  190. static __inline__ __m64 __DEFAULT_FN_ATTRS
  191. _mm_packs_pu16(__m64 __m1, __m64 __m2)
  192. {
  193.     return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2);
  194. }
  195.  
  196. /// Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8]
  197. ///    and interleaves them into a 64-bit integer vector of [8 x i8].
  198. ///
  199. /// \headerfile <x86intrin.h>
  200. ///
  201. /// This intrinsic corresponds to the <c> PUNPCKHBW </c> instruction.
  202. ///
  203. /// \param __m1
  204. ///    A 64-bit integer vector of [8 x i8]. \n
  205. ///    Bits [39:32] are written to bits [7:0] of the result. \n
  206. ///    Bits [47:40] are written to bits [23:16] of the result. \n
  207. ///    Bits [55:48] are written to bits [39:32] of the result. \n
  208. ///    Bits [63:56] are written to bits [55:48] of the result.
  209. /// \param __m2
  210. ///    A 64-bit integer vector of [8 x i8].
  211. ///    Bits [39:32] are written to bits [15:8] of the result. \n
  212. ///    Bits [47:40] are written to bits [31:24] of the result. \n
  213. ///    Bits [55:48] are written to bits [47:40] of the result. \n
  214. ///    Bits [63:56] are written to bits [63:56] of the result.
  215. /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
  216. ///    values.
  217. static __inline__ __m64 __DEFAULT_FN_ATTRS
  218. _mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
  219. {
  220.     return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2);
  221. }
  222.  
  223. /// Unpacks the upper 32 bits from two 64-bit integer vectors of
  224. ///    [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
  225. ///
  226. /// \headerfile <x86intrin.h>
  227. ///
  228. /// This intrinsic corresponds to the <c> PUNPCKHWD </c> instruction.
  229. ///
  230. /// \param __m1
  231. ///    A 64-bit integer vector of [4 x i16].
  232. ///    Bits [47:32] are written to bits [15:0] of the result. \n
  233. ///    Bits [63:48] are written to bits [47:32] of the result.
  234. /// \param __m2
  235. ///    A 64-bit integer vector of [4 x i16].
  236. ///    Bits [47:32] are written to bits [31:16] of the result. \n
  237. ///    Bits [63:48] are written to bits [63:48] of the result.
  238. /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
  239. ///    values.
  240. static __inline__ __m64 __DEFAULT_FN_ATTRS
  241. _mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
  242. {
  243.     return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2);
  244. }
  245.  
  246. /// Unpacks the upper 32 bits from two 64-bit integer vectors of
  247. ///    [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].
  248. ///
  249. /// \headerfile <x86intrin.h>
  250. ///
  251. /// This intrinsic corresponds to the <c> PUNPCKHDQ </c> instruction.
  252. ///
  253. /// \param __m1
  254. ///    A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to
  255. ///    the lower 32 bits of the result.
  256. /// \param __m2
  257. ///    A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to
  258. ///    the upper 32 bits of the result.
  259. /// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
  260. ///    values.
  261. static __inline__ __m64 __DEFAULT_FN_ATTRS
  262. _mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
  263. {
  264.     return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2);
  265. }
  266.  
  267. /// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8]
  268. ///    and interleaves them into a 64-bit integer vector of [8 x i8].
  269. ///
  270. /// \headerfile <x86intrin.h>
  271. ///
  272. /// This intrinsic corresponds to the <c> PUNPCKLBW </c> instruction.
  273. ///
  274. /// \param __m1
  275. ///    A 64-bit integer vector of [8 x i8].
  276. ///    Bits [7:0] are written to bits [7:0] of the result. \n
  277. ///    Bits [15:8] are written to bits [23:16] of the result. \n
  278. ///    Bits [23:16] are written to bits [39:32] of the result. \n
  279. ///    Bits [31:24] are written to bits [55:48] of the result.
  280. /// \param __m2
  281. ///    A 64-bit integer vector of [8 x i8].
  282. ///    Bits [7:0] are written to bits [15:8] of the result. \n
  283. ///    Bits [15:8] are written to bits [31:24] of the result. \n
  284. ///    Bits [23:16] are written to bits [47:40] of the result. \n
  285. ///    Bits [31:24] are written to bits [63:56] of the result.
  286. /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
  287. ///    values.
  288. static __inline__ __m64 __DEFAULT_FN_ATTRS
  289. _mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
  290. {
  291.     return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2);
  292. }
  293.  
  294. /// Unpacks the lower 32 bits from two 64-bit integer vectors of
  295. ///    [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
  296. ///
  297. /// \headerfile <x86intrin.h>
  298. ///
  299. /// This intrinsic corresponds to the <c> PUNPCKLWD </c> instruction.
  300. ///
  301. /// \param __m1
  302. ///    A 64-bit integer vector of [4 x i16].
  303. ///    Bits [15:0] are written to bits [15:0] of the result. \n
  304. ///    Bits [31:16] are written to bits [47:32] of the result.
  305. /// \param __m2
  306. ///    A 64-bit integer vector of [4 x i16].
  307. ///    Bits [15:0] are written to bits [31:16] of the result. \n
  308. ///    Bits [31:16] are written to bits [63:48] of the result.
  309. /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
  310. ///    values.
  311. static __inline__ __m64 __DEFAULT_FN_ATTRS
  312. _mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
  313. {
  314.     return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2);
  315. }
  316.  
  317. /// Unpacks the lower 32 bits from two 64-bit integer vectors of
  318. ///    [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].
  319. ///
  320. /// \headerfile <x86intrin.h>
  321. ///
  322. /// This intrinsic corresponds to the <c> PUNPCKLDQ </c> instruction.
  323. ///
  324. /// \param __m1
  325. ///    A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to
  326. ///    the lower 32 bits of the result.
  327. /// \param __m2
  328. ///    A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to
  329. ///    the upper 32 bits of the result.
  330. /// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
  331. ///    values.
  332. static __inline__ __m64 __DEFAULT_FN_ATTRS
  333. _mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
  334. {
  335.     return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2);
  336. }
  337.  
  338. /// Adds each 8-bit integer element of the first 64-bit integer vector
  339. ///    of [8 x i8] to the corresponding 8-bit integer element of the second
  340. ///    64-bit integer vector of [8 x i8]. The lower 8 bits of the results are
  341. ///    packed into a 64-bit integer vector of [8 x i8].
  342. ///
  343. /// \headerfile <x86intrin.h>
  344. ///
  345. /// This intrinsic corresponds to the <c> PADDB </c> instruction.
  346. ///
  347. /// \param __m1
  348. ///    A 64-bit integer vector of [8 x i8].
  349. /// \param __m2
  350. ///    A 64-bit integer vector of [8 x i8].
  351. /// \returns A 64-bit integer vector of [8 x i8] containing the sums of both
  352. ///    parameters.
  353. static __inline__ __m64 __DEFAULT_FN_ATTRS
  354. _mm_add_pi8(__m64 __m1, __m64 __m2)
  355. {
  356.     return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2);
  357. }
  358.  
  359. /// Adds each 16-bit integer element of the first 64-bit integer vector
  360. ///    of [4 x i16] to the corresponding 16-bit integer element of the second
  361. ///    64-bit integer vector of [4 x i16]. The lower 16 bits of the results are
  362. ///    packed into a 64-bit integer vector of [4 x i16].
  363. ///
  364. /// \headerfile <x86intrin.h>
  365. ///
  366. /// This intrinsic corresponds to the <c> PADDW </c> instruction.
  367. ///
  368. /// \param __m1
  369. ///    A 64-bit integer vector of [4 x i16].
  370. /// \param __m2
  371. ///    A 64-bit integer vector of [4 x i16].
  372. /// \returns A 64-bit integer vector of [4 x i16] containing the sums of both
  373. ///    parameters.
  374. static __inline__ __m64 __DEFAULT_FN_ATTRS
  375. _mm_add_pi16(__m64 __m1, __m64 __m2)
  376. {
  377.     return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2);
  378. }
  379.  
  380. /// Adds each 32-bit integer element of the first 64-bit integer vector
  381. ///    of [2 x i32] to the corresponding 32-bit integer element of the second
  382. ///    64-bit integer vector of [2 x i32]. The lower 32 bits of the results are
  383. ///    packed into a 64-bit integer vector of [2 x i32].
  384. ///
  385. /// \headerfile <x86intrin.h>
  386. ///
  387. /// This intrinsic corresponds to the <c> PADDD </c> instruction.
  388. ///
  389. /// \param __m1
  390. ///    A 64-bit integer vector of [2 x i32].
  391. /// \param __m2
  392. ///    A 64-bit integer vector of [2 x i32].
  393. /// \returns A 64-bit integer vector of [2 x i32] containing the sums of both
  394. ///    parameters.
  395. static __inline__ __m64 __DEFAULT_FN_ATTRS
  396. _mm_add_pi32(__m64 __m1, __m64 __m2)
  397. {
  398.     return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2);
  399. }
  400.  
  401. /// Adds each 8-bit signed integer element of the first 64-bit integer
  402. ///    vector of [8 x i8] to the corresponding 8-bit signed integer element of
  403. ///    the second 64-bit integer vector of [8 x i8]. Positive sums greater than
  404. ///    0x7F are saturated to 0x7F. Negative sums less than 0x80 are saturated to
  405. ///    0x80. The results are packed into a 64-bit integer vector of [8 x i8].
  406. ///
  407. /// \headerfile <x86intrin.h>
  408. ///
  409. /// This intrinsic corresponds to the <c> PADDSB </c> instruction.
  410. ///
  411. /// \param __m1
  412. ///    A 64-bit integer vector of [8 x i8].
  413. /// \param __m2
  414. ///    A 64-bit integer vector of [8 x i8].
  415. /// \returns A 64-bit integer vector of [8 x i8] containing the saturated sums
  416. ///    of both parameters.
  417. static __inline__ __m64 __DEFAULT_FN_ATTRS
  418. _mm_adds_pi8(__m64 __m1, __m64 __m2)
  419. {
  420.     return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2);
  421. }
  422.  
  423. /// Adds each 16-bit signed integer element of the first 64-bit integer
  424. ///    vector of [4 x i16] to the corresponding 16-bit signed integer element of
  425. ///    the second 64-bit integer vector of [4 x i16]. Positive sums greater than
  426. ///    0x7FFF are saturated to 0x7FFF. Negative sums less than 0x8000 are
  427. ///    saturated to 0x8000. The results are packed into a 64-bit integer vector
  428. ///    of [4 x i16].
  429. ///
  430. /// \headerfile <x86intrin.h>
  431. ///
  432. /// This intrinsic corresponds to the <c> PADDSW </c> instruction.
  433. ///
  434. /// \param __m1
  435. ///    A 64-bit integer vector of [4 x i16].
  436. /// \param __m2
  437. ///    A 64-bit integer vector of [4 x i16].
  438. /// \returns A 64-bit integer vector of [4 x i16] containing the saturated sums
  439. ///    of both parameters.
  440. static __inline__ __m64 __DEFAULT_FN_ATTRS
  441. _mm_adds_pi16(__m64 __m1, __m64 __m2)
  442. {
  443.     return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2);
  444. }
  445.  
  446. /// Adds each 8-bit unsigned integer element of the first 64-bit integer
  447. ///    vector of [8 x i8] to the corresponding 8-bit unsigned integer element of
  448. ///    the second 64-bit integer vector of [8 x i8]. Sums greater than 0xFF are
  449. ///    saturated to 0xFF. The results are packed into a 64-bit integer vector of
  450. ///    [8 x i8].
  451. ///
  452. /// \headerfile <x86intrin.h>
  453. ///
  454. /// This intrinsic corresponds to the <c> PADDUSB </c> instruction.
  455. ///
  456. /// \param __m1
  457. ///    A 64-bit integer vector of [8 x i8].
  458. /// \param __m2
  459. ///    A 64-bit integer vector of [8 x i8].
  460. /// \returns A 64-bit integer vector of [8 x i8] containing the saturated
  461. ///    unsigned sums of both parameters.
  462. static __inline__ __m64 __DEFAULT_FN_ATTRS
  463. _mm_adds_pu8(__m64 __m1, __m64 __m2)
  464. {
  465.     return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2);
  466. }
  467.  
  468. /// Adds each 16-bit unsigned integer element of the first 64-bit integer
  469. ///    vector of [4 x i16] to the corresponding 16-bit unsigned integer element
  470. ///    of the second 64-bit integer vector of [4 x i16]. Sums greater than
  471. ///    0xFFFF are saturated to 0xFFFF. The results are packed into a 64-bit
  472. ///    integer vector of [4 x i16].
  473. ///
  474. /// \headerfile <x86intrin.h>
  475. ///
  476. /// This intrinsic corresponds to the <c> PADDUSW </c> instruction.
  477. ///
  478. /// \param __m1
  479. ///    A 64-bit integer vector of [4 x i16].
  480. /// \param __m2
  481. ///    A 64-bit integer vector of [4 x i16].
  482. /// \returns A 64-bit integer vector of [4 x i16] containing the saturated
  483. ///    unsigned sums of both parameters.
  484. static __inline__ __m64 __DEFAULT_FN_ATTRS
  485. _mm_adds_pu16(__m64 __m1, __m64 __m2)
  486. {
  487.     return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2);
  488. }
  489.  
  490. /// Subtracts each 8-bit integer element of the second 64-bit integer
  491. ///    vector of [8 x i8] from the corresponding 8-bit integer element of the
  492. ///    first 64-bit integer vector of [8 x i8]. The lower 8 bits of the results
  493. ///    are packed into a 64-bit integer vector of [8 x i8].
  494. ///
  495. /// \headerfile <x86intrin.h>
  496. ///
  497. /// This intrinsic corresponds to the <c> PSUBB </c> instruction.
  498. ///
  499. /// \param __m1
  500. ///    A 64-bit integer vector of [8 x i8] containing the minuends.
  501. /// \param __m2
  502. ///    A 64-bit integer vector of [8 x i8] containing the subtrahends.
  503. /// \returns A 64-bit integer vector of [8 x i8] containing the differences of
  504. ///    both parameters.
  505. static __inline__ __m64 __DEFAULT_FN_ATTRS
  506. _mm_sub_pi8(__m64 __m1, __m64 __m2)
  507. {
  508.     return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2);
  509. }
  510.  
  511. /// Subtracts each 16-bit integer element of the second 64-bit integer
  512. ///    vector of [4 x i16] from the corresponding 16-bit integer element of the
  513. ///    first 64-bit integer vector of [4 x i16]. The lower 16 bits of the
  514. ///    results are packed into a 64-bit integer vector of [4 x i16].
  515. ///
  516. /// \headerfile <x86intrin.h>
  517. ///
  518. /// This intrinsic corresponds to the <c> PSUBW </c> instruction.
  519. ///
  520. /// \param __m1
  521. ///    A 64-bit integer vector of [4 x i16] containing the minuends.
  522. /// \param __m2
  523. ///    A 64-bit integer vector of [4 x i16] containing the subtrahends.
  524. /// \returns A 64-bit integer vector of [4 x i16] containing the differences of
  525. ///    both parameters.
  526. static __inline__ __m64 __DEFAULT_FN_ATTRS
  527. _mm_sub_pi16(__m64 __m1, __m64 __m2)
  528. {
  529.     return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2);
  530. }
  531.  
  532. /// Subtracts each 32-bit integer element of the second 64-bit integer
  533. ///    vector of [2 x i32] from the corresponding 32-bit integer element of the
  534. ///    first 64-bit integer vector of [2 x i32]. The lower 32 bits of the
  535. ///    results are packed into a 64-bit integer vector of [2 x i32].
  536. ///
  537. /// \headerfile <x86intrin.h>
  538. ///
  539. /// This intrinsic corresponds to the <c> PSUBD </c> instruction.
  540. ///
  541. /// \param __m1
  542. ///    A 64-bit integer vector of [2 x i32] containing the minuends.
  543. /// \param __m2
  544. ///    A 64-bit integer vector of [2 x i32] containing the subtrahends.
  545. /// \returns A 64-bit integer vector of [2 x i32] containing the differences of
  546. ///    both parameters.
  547. static __inline__ __m64 __DEFAULT_FN_ATTRS
  548. _mm_sub_pi32(__m64 __m1, __m64 __m2)
  549. {
  550.     return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2);
  551. }
  552.  
  553. /// Subtracts each 8-bit signed integer element of the second 64-bit
  554. ///    integer vector of [8 x i8] from the corresponding 8-bit signed integer
  555. ///    element of the first 64-bit integer vector of [8 x i8]. Positive results
  556. ///    greater than 0x7F are saturated to 0x7F. Negative results less than 0x80
  557. ///    are saturated to 0x80. The results are packed into a 64-bit integer
  558. ///    vector of [8 x i8].
  559. ///
  560. /// \headerfile <x86intrin.h>
  561. ///
  562. /// This intrinsic corresponds to the <c> PSUBSB </c> instruction.
  563. ///
  564. /// \param __m1
  565. ///    A 64-bit integer vector of [8 x i8] containing the minuends.
  566. /// \param __m2
  567. ///    A 64-bit integer vector of [8 x i8] containing the subtrahends.
  568. /// \returns A 64-bit integer vector of [8 x i8] containing the saturated
  569. ///    differences of both parameters.
  570. static __inline__ __m64 __DEFAULT_FN_ATTRS
  571. _mm_subs_pi8(__m64 __m1, __m64 __m2)
  572. {
  573.     return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2);
  574. }
  575.  
  576. /// Subtracts each 16-bit signed integer element of the second 64-bit
  577. ///    integer vector of [4 x i16] from the corresponding 16-bit signed integer
  578. ///    element of the first 64-bit integer vector of [4 x i16]. Positive results
  579. ///    greater than 0x7FFF are saturated to 0x7FFF. Negative results less than
  580. ///    0x8000 are saturated to 0x8000. The results are packed into a 64-bit
  581. ///    integer vector of [4 x i16].
  582. ///
  583. /// \headerfile <x86intrin.h>
  584. ///
  585. /// This intrinsic corresponds to the <c> PSUBSW </c> instruction.
  586. ///
  587. /// \param __m1
  588. ///    A 64-bit integer vector of [4 x i16] containing the minuends.
  589. /// \param __m2
  590. ///    A 64-bit integer vector of [4 x i16] containing the subtrahends.
  591. /// \returns A 64-bit integer vector of [4 x i16] containing the saturated
  592. ///    differences of both parameters.
  593. static __inline__ __m64 __DEFAULT_FN_ATTRS
  594. _mm_subs_pi16(__m64 __m1, __m64 __m2)
  595. {
  596.     return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2);
  597. }
  598.  
  599. /// Subtracts each 8-bit unsigned integer element of the second 64-bit
  600. ///    integer vector of [8 x i8] from the corresponding 8-bit unsigned integer
  601. ///    element of the first 64-bit integer vector of [8 x i8].
  602. ///
  603. ///    If an element of the first vector is less than the corresponding element
  604. ///    of the second vector, the result is saturated to 0. The results are
  605. ///    packed into a 64-bit integer vector of [8 x i8].
  606. ///
  607. /// \headerfile <x86intrin.h>
  608. ///
  609. /// This intrinsic corresponds to the <c> PSUBUSB </c> instruction.
  610. ///
  611. /// \param __m1
  612. ///    A 64-bit integer vector of [8 x i8] containing the minuends.
  613. /// \param __m2
  614. ///    A 64-bit integer vector of [8 x i8] containing the subtrahends.
  615. /// \returns A 64-bit integer vector of [8 x i8] containing the saturated
  616. ///    differences of both parameters.
  617. static __inline__ __m64 __DEFAULT_FN_ATTRS
  618. _mm_subs_pu8(__m64 __m1, __m64 __m2)
  619. {
  620.     return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2);
  621. }
  622.  
  623. /// Subtracts each 16-bit unsigned integer element of the second 64-bit
  624. ///    integer vector of [4 x i16] from the corresponding 16-bit unsigned
  625. ///    integer element of the first 64-bit integer vector of [4 x i16].
  626. ///
  627. ///    If an element of the first vector is less than the corresponding element
  628. ///    of the second vector, the result is saturated to 0. The results are
  629. ///    packed into a 64-bit integer vector of [4 x i16].
  630. ///
  631. /// \headerfile <x86intrin.h>
  632. ///
  633. /// This intrinsic corresponds to the <c> PSUBUSW </c> instruction.
  634. ///
  635. /// \param __m1
  636. ///    A 64-bit integer vector of [4 x i16] containing the minuends.
  637. /// \param __m2
  638. ///    A 64-bit integer vector of [4 x i16] containing the subtrahends.
  639. /// \returns A 64-bit integer vector of [4 x i16] containing the saturated
  640. ///    differences of both parameters.
  641. static __inline__ __m64 __DEFAULT_FN_ATTRS
  642. _mm_subs_pu16(__m64 __m1, __m64 __m2)
  643. {
  644.     return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2);
  645. }
  646.  
  647. /// Multiplies each 16-bit signed integer element of the first 64-bit
  648. ///    integer vector of [4 x i16] by the corresponding 16-bit signed integer
  649. ///    element of the second 64-bit integer vector of [4 x i16] and get four
  650. ///    32-bit products. Adds adjacent pairs of products to get two 32-bit sums.
  651. ///    The lower 32 bits of these two sums are packed into a 64-bit integer
  652. ///    vector of [2 x i32].
  653. ///
  654. ///    For example, bits [15:0] of both parameters are multiplied, bits [31:16]
  655. ///    of both parameters are multiplied, and the sum of both results is written
  656. ///    to bits [31:0] of the result.
  657. ///
  658. /// \headerfile <x86intrin.h>
  659. ///
  660. /// This intrinsic corresponds to the <c> PMADDWD </c> instruction.
  661. ///
  662. /// \param __m1
  663. ///    A 64-bit integer vector of [4 x i16].
  664. /// \param __m2
  665. ///    A 64-bit integer vector of [4 x i16].
  666. /// \returns A 64-bit integer vector of [2 x i32] containing the sums of
  667. ///    products of both parameters.
  668. static __inline__ __m64 __DEFAULT_FN_ATTRS
  669. _mm_madd_pi16(__m64 __m1, __m64 __m2)
  670. {
  671.     return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2);
  672. }
  673.  
  674. /// Multiplies each 16-bit signed integer element of the first 64-bit
  675. ///    integer vector of [4 x i16] by the corresponding 16-bit signed integer
  676. ///    element of the second 64-bit integer vector of [4 x i16]. Packs the upper
  677. ///    16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].
  678. ///
  679. /// \headerfile <x86intrin.h>
  680. ///
  681. /// This intrinsic corresponds to the <c> PMULHW </c> instruction.
  682. ///
  683. /// \param __m1
  684. ///    A 64-bit integer vector of [4 x i16].
  685. /// \param __m2
  686. ///    A 64-bit integer vector of [4 x i16].
  687. /// \returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits
  688. ///    of the products of both parameters.
  689. static __inline__ __m64 __DEFAULT_FN_ATTRS
  690. _mm_mulhi_pi16(__m64 __m1, __m64 __m2)
  691. {
  692.     return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2);
  693. }
  694.  
  695. /// Multiplies each 16-bit signed integer element of the first 64-bit
  696. ///    integer vector of [4 x i16] by the corresponding 16-bit signed integer
  697. ///    element of the second 64-bit integer vector of [4 x i16]. Packs the lower
  698. ///    16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].
  699. ///
  700. /// \headerfile <x86intrin.h>
  701. ///
  702. /// This intrinsic corresponds to the <c> PMULLW </c> instruction.
  703. ///
  704. /// \param __m1
  705. ///    A 64-bit integer vector of [4 x i16].
  706. /// \param __m2
  707. ///    A 64-bit integer vector of [4 x i16].
  708. /// \returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits
  709. ///    of the products of both parameters.
  710. static __inline__ __m64 __DEFAULT_FN_ATTRS
  711. _mm_mullo_pi16(__m64 __m1, __m64 __m2)
  712. {
  713.     return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2);
  714. }
  715.  
  716. /// Left-shifts each 16-bit signed integer element of the first
  717. ///    parameter, which is a 64-bit integer vector of [4 x i16], by the number
  718. ///    of bits specified by the second parameter, which is a 64-bit integer. The
  719. ///    lower 16 bits of the results are packed into a 64-bit integer vector of
  720. ///    [4 x i16].
  721. ///
  722. /// \headerfile <x86intrin.h>
  723. ///
  724. /// This intrinsic corresponds to the <c> PSLLW </c> instruction.
  725. ///
  726. /// \param __m
  727. ///    A 64-bit integer vector of [4 x i16].
  728. /// \param __count
  729. ///    A 64-bit integer vector interpreted as a single 64-bit integer.
  730. /// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted
  731. ///    values. If \a __count is greater or equal to 16, the result is set to all
  732. ///    0.
  733. static __inline__ __m64 __DEFAULT_FN_ATTRS
  734. _mm_sll_pi16(__m64 __m, __m64 __count)
  735. {
  736.     return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count);
  737. }
  738.  
  739. /// Left-shifts each 16-bit signed integer element of a 64-bit integer
  740. ///    vector of [4 x i16] by the number of bits specified by a 32-bit integer.
  741. ///    The lower 16 bits of the results are packed into a 64-bit integer vector
  742. ///    of [4 x i16].
  743. ///
  744. /// \headerfile <x86intrin.h>
  745. ///
  746. /// This intrinsic corresponds to the <c> PSLLW </c> instruction.
  747. ///
  748. /// \param __m
  749. ///    A 64-bit integer vector of [4 x i16].
  750. /// \param __count
  751. ///    A 32-bit integer value.
  752. /// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted
  753. ///    values. If \a __count is greater or equal to 16, the result is set to all
  754. ///    0.
  755. static __inline__ __m64 __DEFAULT_FN_ATTRS
  756. _mm_slli_pi16(__m64 __m, int __count)
  757. {
  758.     return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count);
  759. }
  760.  
  761. /// Left-shifts each 32-bit signed integer element of the first
  762. ///    parameter, which is a 64-bit integer vector of [2 x i32], by the number
  763. ///    of bits specified by the second parameter, which is a 64-bit integer. The
  764. ///    lower 32 bits of the results are packed into a 64-bit integer vector of
  765. ///    [2 x i32].
  766. ///
  767. /// \headerfile <x86intrin.h>
  768. ///
  769. /// This intrinsic corresponds to the <c> PSLLD </c> instruction.
  770. ///
  771. /// \param __m
  772. ///    A 64-bit integer vector of [2 x i32].
  773. /// \param __count
  774. ///    A 64-bit integer vector interpreted as a single 64-bit integer.
  775. /// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted
  776. ///    values. If \a __count is greater or equal to 32, the result is set to all
  777. ///    0.
  778. static __inline__ __m64 __DEFAULT_FN_ATTRS
  779. _mm_sll_pi32(__m64 __m, __m64 __count)
  780. {
  781.     return (__m64)__builtin_ia32_pslld((__v2si)__m, __count);
  782. }
  783.  
  784. /// Left-shifts each 32-bit signed integer element of a 64-bit integer
  785. ///    vector of [2 x i32] by the number of bits specified by a 32-bit integer.
  786. ///    The lower 32 bits of the results are packed into a 64-bit integer vector
  787. ///    of [2 x i32].
  788. ///
  789. /// \headerfile <x86intrin.h>
  790. ///
  791. /// This intrinsic corresponds to the <c> PSLLD </c> instruction.
  792. ///
  793. /// \param __m
  794. ///    A 64-bit integer vector of [2 x i32].
  795. /// \param __count
  796. ///    A 32-bit integer value.
  797. /// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted
  798. ///    values. If \a __count is greater or equal to 32, the result is set to all
  799. ///    0.
  800. static __inline__ __m64 __DEFAULT_FN_ATTRS
  801. _mm_slli_pi32(__m64 __m, int __count)
  802. {
  803.     return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count);
  804. }
  805.  
  806. /// Left-shifts the first 64-bit integer parameter by the number of bits
  807. ///    specified by the second 64-bit integer parameter. The lower 64 bits of
  808. ///    result are returned.
  809. ///
  810. /// \headerfile <x86intrin.h>
  811. ///
  812. /// This intrinsic corresponds to the <c> PSLLQ </c> instruction.
  813. ///
  814. /// \param __m
  815. ///    A 64-bit integer vector interpreted as a single 64-bit integer.
  816. /// \param __count
  817. ///    A 64-bit integer vector interpreted as a single 64-bit integer.
  818. /// \returns A 64-bit integer vector containing the left-shifted value. If
  819. ///     \a __count is greater or equal to 64, the result is set to 0.
  820. static __inline__ __m64 __DEFAULT_FN_ATTRS
  821. _mm_sll_si64(__m64 __m, __m64 __count)
  822. {
  823.     return (__m64)__builtin_ia32_psllq((__v1di)__m, __count);
  824. }
  825.  
  826. /// Left-shifts the first parameter, which is a 64-bit integer, by the
  827. ///    number of bits specified by the second parameter, which is a 32-bit
  828. ///    integer. The lower 64 bits of result are returned.
  829. ///
  830. /// \headerfile <x86intrin.h>
  831. ///
  832. /// This intrinsic corresponds to the <c> PSLLQ </c> instruction.
  833. ///
  834. /// \param __m
  835. ///    A 64-bit integer vector interpreted as a single 64-bit integer.
  836. /// \param __count
  837. ///    A 32-bit integer value.
  838. /// \returns A 64-bit integer vector containing the left-shifted value. If
  839. ///     \a __count is greater or equal to 64, the result is set to 0.
  840. static __inline__ __m64 __DEFAULT_FN_ATTRS
  841. _mm_slli_si64(__m64 __m, int __count)
  842. {
  843.     return (__m64)__builtin_ia32_psllqi((__v1di)__m, __count);
  844. }
  845.  
  846. /// Right-shifts each 16-bit integer element of the first parameter,
  847. ///    which is a 64-bit integer vector of [4 x i16], by the number of bits
  848. ///    specified by the second parameter, which is a 64-bit integer.
  849. ///
  850. ///    High-order bits are filled with the sign bit of the initial value of each
  851. ///    16-bit element. The 16-bit results are packed into a 64-bit integer
  852. ///    vector of [4 x i16].
  853. ///
  854. /// \headerfile <x86intrin.h>
  855. ///
  856. /// This intrinsic corresponds to the <c> PSRAW </c> instruction.
  857. ///
  858. /// \param __m
  859. ///    A 64-bit integer vector of [4 x i16].
  860. /// \param __count
  861. ///    A 64-bit integer vector interpreted as a single 64-bit integer.
  862. /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
  863. ///    values.
  864. static __inline__ __m64 __DEFAULT_FN_ATTRS
  865. _mm_sra_pi16(__m64 __m, __m64 __count)
  866. {
  867.     return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count);
  868. }
  869.  
  870. /// Right-shifts each 16-bit integer element of a 64-bit integer vector
  871. ///    of [4 x i16] by the number of bits specified by a 32-bit integer.
  872. ///
  873. ///    High-order bits are filled with the sign bit of the initial value of each
  874. ///    16-bit element. The 16-bit results are packed into a 64-bit integer
  875. ///    vector of [4 x i16].
  876. ///
  877. /// \headerfile <x86intrin.h>
  878. ///
  879. /// This intrinsic corresponds to the <c> PSRAW </c> instruction.
  880. ///
  881. /// \param __m
  882. ///    A 64-bit integer vector of [4 x i16].
  883. /// \param __count
  884. ///    A 32-bit integer value.
  885. /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
  886. ///    values.
  887. static __inline__ __m64 __DEFAULT_FN_ATTRS
  888. _mm_srai_pi16(__m64 __m, int __count)
  889. {
  890.     return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count);
  891. }
  892.  
  893. /// Right-shifts each 32-bit integer element of the first parameter,
  894. ///    which is a 64-bit integer vector of [2 x i32], by the number of bits
  895. ///    specified by the second parameter, which is a 64-bit integer.
  896. ///
  897. ///    High-order bits are filled with the sign bit of the initial value of each
  898. ///    32-bit element. The 32-bit results are packed into a 64-bit integer
  899. ///    vector of [2 x i32].
  900. ///
  901. /// \headerfile <x86intrin.h>
  902. ///
  903. /// This intrinsic corresponds to the <c> PSRAD </c> instruction.
  904. ///
  905. /// \param __m
  906. ///    A 64-bit integer vector of [2 x i32].
  907. /// \param __count
  908. ///    A 64-bit integer vector interpreted as a single 64-bit integer.
  909. /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
  910. ///    values.
  911. static __inline__ __m64 __DEFAULT_FN_ATTRS
  912. _mm_sra_pi32(__m64 __m, __m64 __count)
  913. {
  914.     return (__m64)__builtin_ia32_psrad((__v2si)__m, __count);
  915. }
  916.  
  917. /// Right-shifts each 32-bit integer element of a 64-bit integer vector
  918. ///    of [2 x i32] by the number of bits specified by a 32-bit integer.
  919. ///
  920. ///    High-order bits are filled with the sign bit of the initial value of each
  921. ///    32-bit element. The 32-bit results are packed into a 64-bit integer
  922. ///    vector of [2 x i32].
  923. ///
  924. /// \headerfile <x86intrin.h>
  925. ///
  926. /// This intrinsic corresponds to the <c> PSRAD </c> instruction.
  927. ///
  928. /// \param __m
  929. ///    A 64-bit integer vector of [2 x i32].
  930. /// \param __count
  931. ///    A 32-bit integer value.
  932. /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
  933. ///    values.
  934. static __inline__ __m64 __DEFAULT_FN_ATTRS
  935. _mm_srai_pi32(__m64 __m, int __count)
  936. {
  937.     return (__m64)__builtin_ia32_psradi((__v2si)__m, __count);
  938. }
  939.  
  940. /// Right-shifts each 16-bit integer element of the first parameter,
  941. ///    which is a 64-bit integer vector of [4 x i16], by the number of bits
  942. ///    specified by the second parameter, which is a 64-bit integer.
  943. ///
  944. ///    High-order bits are cleared. The 16-bit results are packed into a 64-bit
  945. ///    integer vector of [4 x i16].
  946. ///
  947. /// \headerfile <x86intrin.h>
  948. ///
  949. /// This intrinsic corresponds to the <c> PSRLW </c> instruction.
  950. ///
  951. /// \param __m
  952. ///    A 64-bit integer vector of [4 x i16].
  953. /// \param __count
  954. ///    A 64-bit integer vector interpreted as a single 64-bit integer.
  955. /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
  956. ///    values.
  957. static __inline__ __m64 __DEFAULT_FN_ATTRS
  958. _mm_srl_pi16(__m64 __m, __m64 __count)
  959. {
  960.     return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count);
  961. }
  962.  
  963. /// Right-shifts each 16-bit integer element of a 64-bit integer vector
  964. ///    of [4 x i16] by the number of bits specified by a 32-bit integer.
  965. ///
  966. ///    High-order bits are cleared. The 16-bit results are packed into a 64-bit
  967. ///    integer vector of [4 x i16].
  968. ///
  969. /// \headerfile <x86intrin.h>
  970. ///
  971. /// This intrinsic corresponds to the <c> PSRLW </c> instruction.
  972. ///
  973. /// \param __m
  974. ///    A 64-bit integer vector of [4 x i16].
  975. /// \param __count
  976. ///    A 32-bit integer value.
  977. /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
  978. ///    values.
  979. static __inline__ __m64 __DEFAULT_FN_ATTRS
  980. _mm_srli_pi16(__m64 __m, int __count)
  981. {
  982.     return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count);
  983. }
  984.  
  985. /// Right-shifts each 32-bit integer element of the first parameter,
  986. ///    which is a 64-bit integer vector of [2 x i32], by the number of bits
  987. ///    specified by the second parameter, which is a 64-bit integer.
  988. ///
  989. ///    High-order bits are cleared. The 32-bit results are packed into a 64-bit
  990. ///    integer vector of [2 x i32].
  991. ///
  992. /// \headerfile <x86intrin.h>
  993. ///
  994. /// This intrinsic corresponds to the <c> PSRLD </c> instruction.
  995. ///
  996. /// \param __m
  997. ///    A 64-bit integer vector of [2 x i32].
  998. /// \param __count
  999. ///    A 64-bit integer vector interpreted as a single 64-bit integer.
  1000. /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
  1001. ///    values.
  1002. static __inline__ __m64 __DEFAULT_FN_ATTRS
  1003. _mm_srl_pi32(__m64 __m, __m64 __count)
  1004. {
  1005.     return (__m64)__builtin_ia32_psrld((__v2si)__m, __count);
  1006. }
  1007.  
  1008. /// Right-shifts each 32-bit integer element of a 64-bit integer vector
  1009. ///    of [2 x i32] by the number of bits specified by a 32-bit integer.
  1010. ///
  1011. ///    High-order bits are cleared. The 32-bit results are packed into a 64-bit
  1012. ///    integer vector of [2 x i32].
  1013. ///
  1014. /// \headerfile <x86intrin.h>
  1015. ///
  1016. /// This intrinsic corresponds to the <c> PSRLD </c> instruction.
  1017. ///
  1018. /// \param __m
  1019. ///    A 64-bit integer vector of [2 x i32].
  1020. /// \param __count
  1021. ///    A 32-bit integer value.
  1022. /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
  1023. ///    values.
  1024. static __inline__ __m64 __DEFAULT_FN_ATTRS
  1025. _mm_srli_pi32(__m64 __m, int __count)
  1026. {
  1027.     return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count);
  1028. }
  1029.  
  1030. /// Right-shifts the first 64-bit integer parameter by the number of bits
  1031. ///    specified by the second 64-bit integer parameter.
  1032. ///
  1033. ///    High-order bits are cleared.
  1034. ///
  1035. /// \headerfile <x86intrin.h>
  1036. ///
  1037. /// This intrinsic corresponds to the <c> PSRLQ </c> instruction.
  1038. ///
  1039. /// \param __m
  1040. ///    A 64-bit integer vector interpreted as a single 64-bit integer.
  1041. /// \param __count
  1042. ///    A 64-bit integer vector interpreted as a single 64-bit integer.
  1043. /// \returns A 64-bit integer vector containing the right-shifted value.
  1044. static __inline__ __m64 __DEFAULT_FN_ATTRS
  1045. _mm_srl_si64(__m64 __m, __m64 __count)
  1046. {
  1047.     return (__m64)__builtin_ia32_psrlq((__v1di)__m, __count);
  1048. }
  1049.  
  1050. /// Right-shifts the first parameter, which is a 64-bit integer, by the
  1051. ///    number of bits specified by the second parameter, which is a 32-bit
  1052. ///    integer.
  1053. ///
  1054. ///    High-order bits are cleared.
  1055. ///
  1056. /// \headerfile <x86intrin.h>
  1057. ///
  1058. /// This intrinsic corresponds to the <c> PSRLQ </c> instruction.
  1059. ///
  1060. /// \param __m
  1061. ///    A 64-bit integer vector interpreted as a single 64-bit integer.
  1062. /// \param __count
  1063. ///    A 32-bit integer value.
  1064. /// \returns A 64-bit integer vector containing the right-shifted value.
  1065. static __inline__ __m64 __DEFAULT_FN_ATTRS
  1066. _mm_srli_si64(__m64 __m, int __count)
  1067. {
  1068.     return (__m64)__builtin_ia32_psrlqi((__v1di)__m, __count);
  1069. }
  1070.  
  1071. /// Performs a bitwise AND of two 64-bit integer vectors.
  1072. ///
  1073. /// \headerfile <x86intrin.h>
  1074. ///
  1075. /// This intrinsic corresponds to the <c> PAND </c> instruction.
  1076. ///
  1077. /// \param __m1
  1078. ///    A 64-bit integer vector.
  1079. /// \param __m2
  1080. ///    A 64-bit integer vector.
  1081. /// \returns A 64-bit integer vector containing the bitwise AND of both
  1082. ///    parameters.
  1083. static __inline__ __m64 __DEFAULT_FN_ATTRS
  1084. _mm_and_si64(__m64 __m1, __m64 __m2)
  1085. {
  1086.     return __builtin_ia32_pand((__v1di)__m1, (__v1di)__m2);
  1087. }
  1088.  
  1089. /// Performs a bitwise NOT of the first 64-bit integer vector, and then
  1090. ///    performs a bitwise AND of the intermediate result and the second 64-bit
  1091. ///    integer vector.
  1092. ///
  1093. /// \headerfile <x86intrin.h>
  1094. ///
  1095. /// This intrinsic corresponds to the <c> PANDN </c> instruction.
  1096. ///
  1097. /// \param __m1
  1098. ///    A 64-bit integer vector. The one's complement of this parameter is used
  1099. ///    in the bitwise AND.
  1100. /// \param __m2
  1101. ///    A 64-bit integer vector.
  1102. /// \returns A 64-bit integer vector containing the bitwise AND of the second
  1103. ///    parameter and the one's complement of the first parameter.
  1104. static __inline__ __m64 __DEFAULT_FN_ATTRS
  1105. _mm_andnot_si64(__m64 __m1, __m64 __m2)
  1106. {
  1107.     return __builtin_ia32_pandn((__v1di)__m1, (__v1di)__m2);
  1108. }
  1109.  
  1110. /// Performs a bitwise OR of two 64-bit integer vectors.
  1111. ///
  1112. /// \headerfile <x86intrin.h>
  1113. ///
  1114. /// This intrinsic corresponds to the <c> POR </c> instruction.
  1115. ///
  1116. /// \param __m1
  1117. ///    A 64-bit integer vector.
  1118. /// \param __m2
  1119. ///    A 64-bit integer vector.
  1120. /// \returns A 64-bit integer vector containing the bitwise OR of both
  1121. ///    parameters.
  1122. static __inline__ __m64 __DEFAULT_FN_ATTRS
  1123. _mm_or_si64(__m64 __m1, __m64 __m2)
  1124. {
  1125.     return __builtin_ia32_por((__v1di)__m1, (__v1di)__m2);
  1126. }
  1127.  
  1128. /// Performs a bitwise exclusive OR of two 64-bit integer vectors.
  1129. ///
  1130. /// \headerfile <x86intrin.h>
  1131. ///
  1132. /// This intrinsic corresponds to the <c> PXOR </c> instruction.
  1133. ///
  1134. /// \param __m1
  1135. ///    A 64-bit integer vector.
  1136. /// \param __m2
  1137. ///    A 64-bit integer vector.
  1138. /// \returns A 64-bit integer vector containing the bitwise exclusive OR of both
  1139. ///    parameters.
  1140. static __inline__ __m64 __DEFAULT_FN_ATTRS
  1141. _mm_xor_si64(__m64 __m1, __m64 __m2)
  1142. {
  1143.     return __builtin_ia32_pxor((__v1di)__m1, (__v1di)__m2);
  1144. }
  1145.  
  1146. /// Compares the 8-bit integer elements of two 64-bit integer vectors of
  1147. ///    [8 x i8] to determine if the element of the first vector is equal to the
  1148. ///    corresponding element of the second vector.
  1149. ///
  1150. ///    The comparison yields 0 for false, 0xFF for true.
  1151. ///
  1152. /// \headerfile <x86intrin.h>
  1153. ///
  1154. /// This intrinsic corresponds to the <c> PCMPEQB </c> instruction.
  1155. ///
  1156. /// \param __m1
  1157. ///    A 64-bit integer vector of [8 x i8].
  1158. /// \param __m2
  1159. ///    A 64-bit integer vector of [8 x i8].
  1160. /// \returns A 64-bit integer vector of [8 x i8] containing the comparison
  1161. ///    results.
  1162. static __inline__ __m64 __DEFAULT_FN_ATTRS
  1163. _mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
  1164. {
  1165.     return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2);
  1166. }
  1167.  
  1168. /// Compares the 16-bit integer elements of two 64-bit integer vectors of
  1169. ///    [4 x i16] to determine if the element of the first vector is equal to the
  1170. ///    corresponding element of the second vector.
  1171. ///
  1172. ///    The comparison yields 0 for false, 0xFFFF for true.
  1173. ///
  1174. /// \headerfile <x86intrin.h>
  1175. ///
  1176. /// This intrinsic corresponds to the <c> PCMPEQW </c> instruction.
  1177. ///
  1178. /// \param __m1
  1179. ///    A 64-bit integer vector of [4 x i16].
  1180. /// \param __m2
  1181. ///    A 64-bit integer vector of [4 x i16].
  1182. /// \returns A 64-bit integer vector of [4 x i16] containing the comparison
  1183. ///    results.
  1184. static __inline__ __m64 __DEFAULT_FN_ATTRS
  1185. _mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
  1186. {
  1187.     return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2);
  1188. }
  1189.  
  1190. /// Compares the 32-bit integer elements of two 64-bit integer vectors of
  1191. ///    [2 x i32] to determine if the element of the first vector is equal to the
  1192. ///    corresponding element of the second vector.
  1193. ///
  1194. ///    The comparison yields 0 for false, 0xFFFFFFFF for true.
  1195. ///
  1196. /// \headerfile <x86intrin.h>
  1197. ///
  1198. /// This intrinsic corresponds to the <c> PCMPEQD </c> instruction.
  1199. ///
  1200. /// \param __m1
  1201. ///    A 64-bit integer vector of [2 x i32].
  1202. /// \param __m2
  1203. ///    A 64-bit integer vector of [2 x i32].
  1204. /// \returns A 64-bit integer vector of [2 x i32] containing the comparison
  1205. ///    results.
  1206. static __inline__ __m64 __DEFAULT_FN_ATTRS
  1207. _mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
  1208. {
  1209.     return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2);
  1210. }
  1211.  
  1212. /// Compares the 8-bit integer elements of two 64-bit integer vectors of
  1213. ///    [8 x i8] to determine if the element of the first vector is greater than
  1214. ///    the corresponding element of the second vector.
  1215. ///
  1216. ///    The comparison yields 0 for false, 0xFF for true.
  1217. ///
  1218. /// \headerfile <x86intrin.h>
  1219. ///
  1220. /// This intrinsic corresponds to the <c> PCMPGTB </c> instruction.
  1221. ///
  1222. /// \param __m1
  1223. ///    A 64-bit integer vector of [8 x i8].
  1224. /// \param __m2
  1225. ///    A 64-bit integer vector of [8 x i8].
  1226. /// \returns A 64-bit integer vector of [8 x i8] containing the comparison
  1227. ///    results.
  1228. static __inline__ __m64 __DEFAULT_FN_ATTRS
  1229. _mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
  1230. {
  1231.     return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2);
  1232. }
  1233.  
  1234. /// Compares the 16-bit integer elements of two 64-bit integer vectors of
  1235. ///    [4 x i16] to determine if the element of the first vector is greater than
  1236. ///    the corresponding element of the second vector.
  1237. ///
  1238. ///    The comparison yields 0 for false, 0xFFFF for true.
  1239. ///
  1240. /// \headerfile <x86intrin.h>
  1241. ///
  1242. /// This intrinsic corresponds to the <c> PCMPGTW </c> instruction.
  1243. ///
  1244. /// \param __m1
  1245. ///    A 64-bit integer vector of [4 x i16].
  1246. /// \param __m2
  1247. ///    A 64-bit integer vector of [4 x i16].
  1248. /// \returns A 64-bit integer vector of [4 x i16] containing the comparison
  1249. ///    results.
  1250. static __inline__ __m64 __DEFAULT_FN_ATTRS
  1251. _mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
  1252. {
  1253.     return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2);
  1254. }
  1255.  
  1256. /// Compares the 32-bit integer elements of two 64-bit integer vectors of
  1257. ///    [2 x i32] to determine if the element of the first vector is greater than
  1258. ///    the corresponding element of the second vector.
  1259. ///
  1260. ///    The comparison yields 0 for false, 0xFFFFFFFF for true.
  1261. ///
  1262. /// \headerfile <x86intrin.h>
  1263. ///
  1264. /// This intrinsic corresponds to the <c> PCMPGTD </c> instruction.
  1265. ///
  1266. /// \param __m1
  1267. ///    A 64-bit integer vector of [2 x i32].
  1268. /// \param __m2
  1269. ///    A 64-bit integer vector of [2 x i32].
  1270. /// \returns A 64-bit integer vector of [2 x i32] containing the comparison
  1271. ///    results.
  1272. static __inline__ __m64 __DEFAULT_FN_ATTRS
  1273. _mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
  1274. {
  1275.     return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2);
  1276. }
  1277.  
  1278. /// Constructs a 64-bit integer vector initialized to zero.
  1279. ///
  1280. /// \headerfile <x86intrin.h>
  1281. ///
  1282. /// This intrinsic corresponds to the <c> PXOR </c> instruction.
  1283. ///
  1284. /// \returns An initialized 64-bit integer vector with all elements set to zero.
  1285. static __inline__ __m64 __DEFAULT_FN_ATTRS
  1286. _mm_setzero_si64(void)
  1287. {
  1288.     return __extension__ (__m64){ 0LL };
  1289. }
  1290.  
  1291. /// Constructs a 64-bit integer vector initialized with the specified
  1292. ///    32-bit integer values.
  1293. ///
  1294. /// \headerfile <x86intrin.h>
  1295. ///
  1296. /// This intrinsic is a utility function and does not correspond to a specific
  1297. ///    instruction.
  1298. ///
  1299. /// \param __i1
  1300. ///    A 32-bit integer value used to initialize the upper 32 bits of the
  1301. ///    result.
  1302. /// \param __i0
  1303. ///    A 32-bit integer value used to initialize the lower 32 bits of the
  1304. ///    result.
  1305. /// \returns An initialized 64-bit integer vector.
  1306. static __inline__ __m64 __DEFAULT_FN_ATTRS
  1307. _mm_set_pi32(int __i1, int __i0)
  1308. {
  1309.     return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1);
  1310. }
  1311.  
  1312. /// Constructs a 64-bit integer vector initialized with the specified
  1313. ///    16-bit integer values.
  1314. ///
  1315. /// \headerfile <x86intrin.h>
  1316. ///
  1317. /// This intrinsic is a utility function and does not correspond to a specific
  1318. ///    instruction.
  1319. ///
  1320. /// \param __s3
  1321. ///    A 16-bit integer value used to initialize bits [63:48] of the result.
  1322. /// \param __s2
  1323. ///    A 16-bit integer value used to initialize bits [47:32] of the result.
  1324. /// \param __s1
  1325. ///    A 16-bit integer value used to initialize bits [31:16] of the result.
  1326. /// \param __s0
  1327. ///    A 16-bit integer value used to initialize bits [15:0] of the result.
  1328. /// \returns An initialized 64-bit integer vector.
  1329. static __inline__ __m64 __DEFAULT_FN_ATTRS
  1330. _mm_set_pi16(short __s3, short __s2, short __s1, short __s0)
  1331. {
  1332.     return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3);
  1333. }
  1334.  
  1335. /// Constructs a 64-bit integer vector initialized with the specified
  1336. ///    8-bit integer values.
  1337. ///
  1338. /// \headerfile <x86intrin.h>
  1339. ///
  1340. /// This intrinsic is a utility function and does not correspond to a specific
  1341. ///    instruction.
  1342. ///
  1343. /// \param __b7
  1344. ///    An 8-bit integer value used to initialize bits [63:56] of the result.
  1345. /// \param __b6
  1346. ///    An 8-bit integer value used to initialize bits [55:48] of the result.
  1347. /// \param __b5
  1348. ///    An 8-bit integer value used to initialize bits [47:40] of the result.
  1349. /// \param __b4
  1350. ///    An 8-bit integer value used to initialize bits [39:32] of the result.
  1351. /// \param __b3
  1352. ///    An 8-bit integer value used to initialize bits [31:24] of the result.
  1353. /// \param __b2
  1354. ///    An 8-bit integer value used to initialize bits [23:16] of the result.
  1355. /// \param __b1
  1356. ///    An 8-bit integer value used to initialize bits [15:8] of the result.
  1357. /// \param __b0
  1358. ///    An 8-bit integer value used to initialize bits [7:0] of the result.
  1359. /// \returns An initialized 64-bit integer vector.
  1360. static __inline__ __m64 __DEFAULT_FN_ATTRS
  1361. _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,
  1362.             char __b1, char __b0)
  1363. {
  1364.     return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3,
  1365.                                                __b4, __b5, __b6, __b7);
  1366. }
  1367.  
  1368. /// Constructs a 64-bit integer vector of [2 x i32], with each of the
  1369. ///    32-bit integer vector elements set to the specified 32-bit integer
  1370. ///    value.
  1371. ///
  1372. /// \headerfile <x86intrin.h>
  1373. ///
  1374. /// This intrinsic is a utility function and does not correspond to a specific
  1375. ///    instruction.
  1376. ///
  1377. /// \param __i
  1378. ///    A 32-bit integer value used to initialize each vector element of the
  1379. ///    result.
  1380. /// \returns An initialized 64-bit integer vector of [2 x i32].
  1381. static __inline__ __m64 __DEFAULT_FN_ATTRS
  1382. _mm_set1_pi32(int __i)
  1383. {
  1384.     return _mm_set_pi32(__i, __i);
  1385. }
  1386.  
  1387. /// Constructs a 64-bit integer vector of [4 x i16], with each of the
  1388. ///    16-bit integer vector elements set to the specified 16-bit integer
  1389. ///    value.
  1390. ///
  1391. /// \headerfile <x86intrin.h>
  1392. ///
  1393. /// This intrinsic is a utility function and does not correspond to a specific
  1394. ///    instruction.
  1395. ///
  1396. /// \param __w
  1397. ///    A 16-bit integer value used to initialize each vector element of the
  1398. ///    result.
  1399. /// \returns An initialized 64-bit integer vector of [4 x i16].
  1400. static __inline__ __m64 __DEFAULT_FN_ATTRS
  1401. _mm_set1_pi16(short __w)
  1402. {
  1403.     return _mm_set_pi16(__w, __w, __w, __w);
  1404. }
  1405.  
  1406. /// Constructs a 64-bit integer vector of [8 x i8], with each of the
  1407. ///    8-bit integer vector elements set to the specified 8-bit integer value.
  1408. ///
  1409. /// \headerfile <x86intrin.h>
  1410. ///
  1411. /// This intrinsic is a utility function and does not correspond to a specific
  1412. ///    instruction.
  1413. ///
  1414. /// \param __b
  1415. ///    An 8-bit integer value used to initialize each vector element of the
  1416. ///    result.
  1417. /// \returns An initialized 64-bit integer vector of [8 x i8].
  1418. static __inline__ __m64 __DEFAULT_FN_ATTRS
  1419. _mm_set1_pi8(char __b)
  1420. {
  1421.     return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b);
  1422. }
  1423.  
  1424. /// Constructs a 64-bit integer vector, initialized in reverse order with
  1425. ///    the specified 32-bit integer values.
  1426. ///
  1427. /// \headerfile <x86intrin.h>
  1428. ///
  1429. /// This intrinsic is a utility function and does not correspond to a specific
  1430. ///    instruction.
  1431. ///
  1432. /// \param __i0
  1433. ///    A 32-bit integer value used to initialize the lower 32 bits of the
  1434. ///    result.
  1435. /// \param __i1
  1436. ///    A 32-bit integer value used to initialize the upper 32 bits of the
  1437. ///    result.
  1438. /// \returns An initialized 64-bit integer vector.
  1439. static __inline__ __m64 __DEFAULT_FN_ATTRS
  1440. _mm_setr_pi32(int __i0, int __i1)
  1441. {
  1442.     return _mm_set_pi32(__i1, __i0);
  1443. }
  1444.  
  1445. /// Constructs a 64-bit integer vector, initialized in reverse order with
  1446. ///    the specified 16-bit integer values.
  1447. ///
  1448. /// \headerfile <x86intrin.h>
  1449. ///
  1450. /// This intrinsic is a utility function and does not correspond to a specific
  1451. ///    instruction.
  1452. ///
  1453. /// \param __w0
  1454. ///    A 16-bit integer value used to initialize bits [15:0] of the result.
  1455. /// \param __w1
  1456. ///    A 16-bit integer value used to initialize bits [31:16] of the result.
  1457. /// \param __w2
  1458. ///    A 16-bit integer value used to initialize bits [47:32] of the result.
  1459. /// \param __w3
  1460. ///    A 16-bit integer value used to initialize bits [63:48] of the result.
  1461. /// \returns An initialized 64-bit integer vector.
  1462. static __inline__ __m64 __DEFAULT_FN_ATTRS
  1463. _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3)
  1464. {
  1465.     return _mm_set_pi16(__w3, __w2, __w1, __w0);
  1466. }
  1467.  
  1468. /// Constructs a 64-bit integer vector, initialized in reverse order with
  1469. ///    the specified 8-bit integer values.
  1470. ///
  1471. /// \headerfile <x86intrin.h>
  1472. ///
  1473. /// This intrinsic is a utility function and does not correspond to a specific
  1474. ///    instruction.
  1475. ///
  1476. /// \param __b0
  1477. ///    An 8-bit integer value used to initialize bits [7:0] of the result.
  1478. /// \param __b1
  1479. ///    An 8-bit integer value used to initialize bits [15:8] of the result.
  1480. /// \param __b2
  1481. ///    An 8-bit integer value used to initialize bits [23:16] of the result.
  1482. /// \param __b3
  1483. ///    An 8-bit integer value used to initialize bits [31:24] of the result.
  1484. /// \param __b4
  1485. ///    An 8-bit integer value used to initialize bits [39:32] of the result.
  1486. /// \param __b5
  1487. ///    An 8-bit integer value used to initialize bits [47:40] of the result.
  1488. /// \param __b6
  1489. ///    An 8-bit integer value used to initialize bits [55:48] of the result.
  1490. /// \param __b7
  1491. ///    An 8-bit integer value used to initialize bits [63:56] of the result.
  1492. /// \returns An initialized 64-bit integer vector.
  1493. static __inline__ __m64 __DEFAULT_FN_ATTRS
  1494. _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,
  1495.              char __b6, char __b7)
  1496. {
  1497.     return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
  1498. }
  1499.  
  1500. #undef __DEFAULT_FN_ATTRS
  1501.  
  1502. /* Aliases for compatibility. */
  1503. #define _m_empty _mm_empty
  1504. #define _m_from_int _mm_cvtsi32_si64
  1505. #define _m_from_int64 _mm_cvtsi64_m64
  1506. #define _m_to_int _mm_cvtsi64_si32
  1507. #define _m_to_int64 _mm_cvtm64_si64
  1508. #define _m_packsswb _mm_packs_pi16
  1509. #define _m_packssdw _mm_packs_pi32
  1510. #define _m_packuswb _mm_packs_pu16
  1511. #define _m_punpckhbw _mm_unpackhi_pi8
  1512. #define _m_punpckhwd _mm_unpackhi_pi16
  1513. #define _m_punpckhdq _mm_unpackhi_pi32
  1514. #define _m_punpcklbw _mm_unpacklo_pi8
  1515. #define _m_punpcklwd _mm_unpacklo_pi16
  1516. #define _m_punpckldq _mm_unpacklo_pi32
  1517. #define _m_paddb _mm_add_pi8
  1518. #define _m_paddw _mm_add_pi16
  1519. #define _m_paddd _mm_add_pi32
  1520. #define _m_paddsb _mm_adds_pi8
  1521. #define _m_paddsw _mm_adds_pi16
  1522. #define _m_paddusb _mm_adds_pu8
  1523. #define _m_paddusw _mm_adds_pu16
  1524. #define _m_psubb _mm_sub_pi8
  1525. #define _m_psubw _mm_sub_pi16
  1526. #define _m_psubd _mm_sub_pi32
  1527. #define _m_psubsb _mm_subs_pi8
  1528. #define _m_psubsw _mm_subs_pi16
  1529. #define _m_psubusb _mm_subs_pu8
  1530. #define _m_psubusw _mm_subs_pu16
  1531. #define _m_pmaddwd _mm_madd_pi16
  1532. #define _m_pmulhw _mm_mulhi_pi16
  1533. #define _m_pmullw _mm_mullo_pi16
  1534. #define _m_psllw _mm_sll_pi16
  1535. #define _m_psllwi _mm_slli_pi16
  1536. #define _m_pslld _mm_sll_pi32
  1537. #define _m_pslldi _mm_slli_pi32
  1538. #define _m_psllq _mm_sll_si64
  1539. #define _m_psllqi _mm_slli_si64
  1540. #define _m_psraw _mm_sra_pi16
  1541. #define _m_psrawi _mm_srai_pi16
  1542. #define _m_psrad _mm_sra_pi32
  1543. #define _m_psradi _mm_srai_pi32
  1544. #define _m_psrlw _mm_srl_pi16
  1545. #define _m_psrlwi _mm_srli_pi16
  1546. #define _m_psrld _mm_srl_pi32
  1547. #define _m_psrldi _mm_srli_pi32
  1548. #define _m_psrlq _mm_srl_si64
  1549. #define _m_psrlqi _mm_srli_si64
  1550. #define _m_pand _mm_and_si64
  1551. #define _m_pandn _mm_andnot_si64
  1552. #define _m_por _mm_or_si64
  1553. #define _m_pxor _mm_xor_si64
  1554. #define _m_pcmpeqb _mm_cmpeq_pi8
  1555. #define _m_pcmpeqw _mm_cmpeq_pi16
  1556. #define _m_pcmpeqd _mm_cmpeq_pi32
  1557. #define _m_pcmpgtb _mm_cmpgt_pi8
  1558. #define _m_pcmpgtw _mm_cmpgt_pi16
  1559. #define _m_pcmpgtd _mm_cmpgt_pi32
  1560.  
  1561. #endif /* __MMINTRIN_H */
  1562.  
  1563.