Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. /*===---- arm_acle.h - ARM Non-Neon intrinsics -----------------------------===
  2.  *
  3.  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4.  * See https://llvm.org/LICENSE.txt for license information.
  5.  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6.  *
  7.  *===-----------------------------------------------------------------------===
  8.  */
  9.  
  10. #ifndef __ARM_ACLE_H
  11. #define __ARM_ACLE_H
  12.  
  13. #ifndef __ARM_ACLE
  14. #error "ACLE intrinsics support not enabled."
  15. #endif
  16.  
  17. #include <stdint.h>
  18.  
  19. #if defined(__cplusplus)
  20. extern "C" {
  21. #endif
  22.  
  23. /* 8 SYNCHRONIZATION, BARRIER AND HINT INTRINSICS */
  24. /* 8.3 Memory barriers */
  25. #if !__has_builtin(__dmb)
  26. #define __dmb(i) __builtin_arm_dmb(i)
  27. #endif
  28. #if !__has_builtin(__dsb)
  29. #define __dsb(i) __builtin_arm_dsb(i)
  30. #endif
  31. #if !__has_builtin(__isb)
  32. #define __isb(i) __builtin_arm_isb(i)
  33. #endif
  34.  
  35. /* 8.4 Hints */
  36.  
  37. #if !__has_builtin(__wfi)
  38. static __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfi(void) {
  39.   __builtin_arm_wfi();
  40. }
  41. #endif
  42.  
  43. #if !__has_builtin(__wfe)
  44. static __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfe(void) {
  45.   __builtin_arm_wfe();
  46. }
  47. #endif
  48.  
  49. #if !__has_builtin(__sev)
  50. static __inline__ void __attribute__((__always_inline__, __nodebug__)) __sev(void) {
  51.   __builtin_arm_sev();
  52. }
  53. #endif
  54.  
  55. #if !__has_builtin(__sevl)
  56. static __inline__ void __attribute__((__always_inline__, __nodebug__)) __sevl(void) {
  57.   __builtin_arm_sevl();
  58. }
  59. #endif
  60.  
  61. #if !__has_builtin(__yield)
  62. static __inline__ void __attribute__((__always_inline__, __nodebug__)) __yield(void) {
  63.   __builtin_arm_yield();
  64. }
  65. #endif
  66.  
  67. #if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE
  68. #define __dbg(t) __builtin_arm_dbg(t)
  69. #endif
  70.  
  71. /* 8.5 Swap */
  72. static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
  73. __swp(uint32_t __x, volatile uint32_t *__p) {
  74.   uint32_t v;
  75.   do
  76.     v = __builtin_arm_ldrex(__p);
  77.   while (__builtin_arm_strex(__x, __p));
  78.   return v;
  79. }
  80.  
  81. /* 8.6 Memory prefetch intrinsics */
  82. /* 8.6.1 Data prefetch */
  83. #define __pld(addr) __pldx(0, 0, 0, addr)
  84.  
  85. #if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE
  86. #define __pldx(access_kind, cache_level, retention_policy, addr) \
  87.   __builtin_arm_prefetch(addr, access_kind, 1)
  88. #else
  89. #define __pldx(access_kind, cache_level, retention_policy, addr) \
  90.   __builtin_arm_prefetch(addr, access_kind, cache_level, retention_policy, 1)
  91. #endif
  92.  
  93. /* 8.6.2 Instruction prefetch */
  94. #define __pli(addr) __plix(0, 0, addr)
  95.  
  96. #if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE
  97. #define __plix(cache_level, retention_policy, addr) \
  98.   __builtin_arm_prefetch(addr, 0, 0)
  99. #else
  100. #define __plix(cache_level, retention_policy, addr) \
  101.   __builtin_arm_prefetch(addr, 0, cache_level, retention_policy, 0)
  102. #endif
  103.  
  104. /* 8.7 NOP */
  105. #if !defined(_MSC_VER) || !defined(__aarch64__)
  106. static __inline__ void __attribute__((__always_inline__, __nodebug__)) __nop(void) {
  107.   __builtin_arm_nop();
  108. }
  109. #endif
  110.  
  111. /* 9 DATA-PROCESSING INTRINSICS */
  112. /* 9.2 Miscellaneous data-processing intrinsics */
  113. /* ROR */
  114. static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
  115. __ror(uint32_t __x, uint32_t __y) {
  116.   __y %= 32;
  117.   if (__y == 0)
  118.     return __x;
  119.   return (__x >> __y) | (__x << (32 - __y));
  120. }
  121.  
  122. static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
  123. __rorll(uint64_t __x, uint32_t __y) {
  124.   __y %= 64;
  125.   if (__y == 0)
  126.     return __x;
  127.   return (__x >> __y) | (__x << (64 - __y));
  128. }
  129.  
  130. static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
  131. __rorl(unsigned long __x, uint32_t __y) {
  132. #if __SIZEOF_LONG__ == 4
  133.   return __ror(__x, __y);
  134. #else
  135.   return __rorll(__x, __y);
  136. #endif
  137. }
  138.  
  139.  
  140. /* CLZ */
  141. static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
  142. __clz(uint32_t __t) {
  143.   return (uint32_t)__builtin_clz(__t);
  144. }
  145.  
  146. static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
  147. __clzl(unsigned long __t) {
  148.   return (unsigned long)__builtin_clzl(__t);
  149. }
  150.  
  151. static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
  152. __clzll(uint64_t __t) {
  153.   return (uint64_t)__builtin_clzll(__t);
  154. }
  155.  
  156. /* CLS */
  157. static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
  158. __cls(uint32_t __t) {
  159.   return __builtin_arm_cls(__t);
  160. }
  161.  
  162. static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
  163. __clsl(unsigned long __t) {
  164. #if __SIZEOF_LONG__ == 4
  165.   return __builtin_arm_cls(__t);
  166. #else
  167.   return __builtin_arm_cls64(__t);
  168. #endif
  169. }
  170.  
  171. static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
  172. __clsll(uint64_t __t) {
  173.   return __builtin_arm_cls64(__t);
  174. }
  175.  
  176. /* REV */
  177. static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
  178. __rev(uint32_t __t) {
  179.   return __builtin_bswap32(__t);
  180. }
  181.  
  182. static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
  183. __revl(unsigned long __t) {
  184. #if __SIZEOF_LONG__ == 4
  185.   return __builtin_bswap32(__t);
  186. #else
  187.   return __builtin_bswap64(__t);
  188. #endif
  189. }
  190.  
  191. static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
  192. __revll(uint64_t __t) {
  193.   return __builtin_bswap64(__t);
  194. }
  195.  
  196. /* REV16 */
  197. static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
  198. __rev16(uint32_t __t) {
  199.   return __ror(__rev(__t), 16);
  200. }
  201.  
  202. static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
  203. __rev16ll(uint64_t __t) {
  204.   return (((uint64_t)__rev16(__t >> 32)) << 32) | (uint64_t)__rev16((uint32_t)__t);
  205. }
  206.  
  207. static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
  208. __rev16l(unsigned long __t) {
  209. #if __SIZEOF_LONG__ == 4
  210.     return __rev16(__t);
  211. #else
  212.     return __rev16ll(__t);
  213. #endif
  214. }
  215.  
  216. /* REVSH */
  217. static __inline__ int16_t __attribute__((__always_inline__, __nodebug__))
  218. __revsh(int16_t __t) {
  219.   return (int16_t)__builtin_bswap16((uint16_t)__t);
  220. }
  221.  
  222. /* RBIT */
  223. static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
  224. __rbit(uint32_t __t) {
  225.   return __builtin_arm_rbit(__t);
  226. }
  227.  
  228. static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
  229. __rbitll(uint64_t __t) {
  230. #if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE
  231.   return (((uint64_t)__builtin_arm_rbit(__t)) << 32) |
  232.          __builtin_arm_rbit(__t >> 32);
  233. #else
  234.   return __builtin_arm_rbit64(__t);
  235. #endif
  236. }
  237.  
  238. static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
  239. __rbitl(unsigned long __t) {
  240. #if __SIZEOF_LONG__ == 4
  241.   return __rbit(__t);
  242. #else
  243.   return __rbitll(__t);
  244. #endif
  245. }
  246.  
  247. /*
  248.  * 9.3 16-bit multiplications
  249.  */
  250. #if defined(__ARM_FEATURE_DSP) && __ARM_FEATURE_DSP
  251. static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
  252. __smulbb(int32_t __a, int32_t __b) {
  253.   return __builtin_arm_smulbb(__a, __b);
  254. }
  255. static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
  256. __smulbt(int32_t __a, int32_t __b) {
  257.   return __builtin_arm_smulbt(__a, __b);
  258. }
  259. static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
  260. __smultb(int32_t __a, int32_t __b) {
  261.   return __builtin_arm_smultb(__a, __b);
  262. }
  263. static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
  264. __smultt(int32_t __a, int32_t __b) {
  265.   return __builtin_arm_smultt(__a, __b);
  266. }
  267. static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
  268. __smulwb(int32_t __a, int32_t __b) {
  269.   return __builtin_arm_smulwb(__a, __b);
  270. }
  271. static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
  272. __smulwt(int32_t __a, int32_t __b) {
  273.   return __builtin_arm_smulwt(__a, __b);
  274. }
  275. #endif
  276.  
  277. /*
  278.  * 9.4 Saturating intrinsics
  279.  *
  280.  * FIXME: Change guard to their corresponding __ARM_FEATURE flag when Q flag
  281.  * intrinsics are implemented and the flag is enabled.
  282.  */
  283. /* 9.4.1 Width-specified saturation intrinsics */
  284. #if defined(__ARM_FEATURE_SAT) && __ARM_FEATURE_SAT
  285. #define __ssat(x, y) __builtin_arm_ssat(x, y)
  286. #define __usat(x, y) __builtin_arm_usat(x, y)
  287. #endif
  288.  
  289. /* 9.4.2 Saturating addition and subtraction intrinsics */
  290. #if defined(__ARM_FEATURE_DSP) && __ARM_FEATURE_DSP
  291. static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
  292. __qadd(int32_t __t, int32_t __v) {
  293.   return __builtin_arm_qadd(__t, __v);
  294. }
  295.  
  296. static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
  297. __qsub(int32_t __t, int32_t __v) {
  298.   return __builtin_arm_qsub(__t, __v);
  299. }
  300.  
  301. static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
  302. __qdbl(int32_t __t) {
  303.   return __builtin_arm_qadd(__t, __t);
  304. }
  305. #endif
  306.  
  307. /* 9.4.3 Accumultating multiplications */
  308. #if defined(__ARM_FEATURE_DSP) && __ARM_FEATURE_DSP
  309. static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
  310. __smlabb(int32_t __a, int32_t __b, int32_t __c) {
  311.   return __builtin_arm_smlabb(__a, __b, __c);
  312. }
  313. static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
  314. __smlabt(int32_t __a, int32_t __b, int32_t __c) {
  315.   return __builtin_arm_smlabt(__a, __b, __c);
  316. }
  317. static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
  318. __smlatb(int32_t __a, int32_t __b, int32_t __c) {
  319.   return __builtin_arm_smlatb(__a, __b, __c);
  320. }
  321. static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
  322. __smlatt(int32_t __a, int32_t __b, int32_t __c) {
  323.   return __builtin_arm_smlatt(__a, __b, __c);
  324. }
  325. static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
  326. __smlawb(int32_t __a, int32_t __b, int32_t __c) {
  327.   return __builtin_arm_smlawb(__a, __b, __c);
  328. }
  329. static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
  330. __smlawt(int32_t __a, int32_t __b, int32_t __c) {
  331.   return __builtin_arm_smlawt(__a, __b, __c);
  332. }
  333. #endif
  334.  
  335.  
  336. /* 9.5.4 Parallel 16-bit saturation */
  337. #if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
  338. #define __ssat16(x, y) __builtin_arm_ssat16(x, y)
  339. #define __usat16(x, y) __builtin_arm_usat16(x, y)
  340. #endif
  341.  
  342. /* 9.5.5 Packing and unpacking */
  343. #if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
  344. typedef int32_t int8x4_t;
  345. typedef int32_t int16x2_t;
  346. typedef uint32_t uint8x4_t;
  347. typedef uint32_t uint16x2_t;
  348.  
  349. static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
  350. __sxtab16(int16x2_t __a, int8x4_t __b) {
  351.   return __builtin_arm_sxtab16(__a, __b);
  352. }
  353. static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
  354. __sxtb16(int8x4_t __a) {
  355.   return __builtin_arm_sxtb16(__a);
  356. }
  357. static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
  358. __uxtab16(int16x2_t __a, int8x4_t __b) {
  359.   return __builtin_arm_uxtab16(__a, __b);
  360. }
  361. static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
  362. __uxtb16(int8x4_t __a) {
  363.   return __builtin_arm_uxtb16(__a);
  364. }
  365. #endif
  366.  
  367. /* 9.5.6 Parallel selection */
  368. #if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
  369. static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
  370. __sel(uint8x4_t __a, uint8x4_t __b) {
  371.   return __builtin_arm_sel(__a, __b);
  372. }
  373. #endif
  374.  
  375. /* 9.5.7 Parallel 8-bit addition and subtraction */
  376. #if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
  377. static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
  378. __qadd8(int8x4_t __a, int8x4_t __b) {
  379.   return __builtin_arm_qadd8(__a, __b);
  380. }
  381. static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
  382. __qsub8(int8x4_t __a, int8x4_t __b) {
  383.   return __builtin_arm_qsub8(__a, __b);
  384. }
  385. static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
  386. __sadd8(int8x4_t __a, int8x4_t __b) {
  387.   return __builtin_arm_sadd8(__a, __b);
  388. }
  389. static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
  390. __shadd8(int8x4_t __a, int8x4_t __b) {
  391.   return __builtin_arm_shadd8(__a, __b);
  392. }
  393. static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
  394. __shsub8(int8x4_t __a, int8x4_t __b) {
  395.   return __builtin_arm_shsub8(__a, __b);
  396. }
  397. static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
  398. __ssub8(int8x4_t __a, int8x4_t __b) {
  399.   return __builtin_arm_ssub8(__a, __b);
  400. }
  401. static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
  402. __uadd8(uint8x4_t __a, uint8x4_t __b) {
  403.   return __builtin_arm_uadd8(__a, __b);
  404. }
  405. static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
  406. __uhadd8(uint8x4_t __a, uint8x4_t __b) {
  407.   return __builtin_arm_uhadd8(__a, __b);
  408. }
  409. static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
  410. __uhsub8(uint8x4_t __a, uint8x4_t __b) {
  411.   return __builtin_arm_uhsub8(__a, __b);
  412. }
  413. static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
  414. __uqadd8(uint8x4_t __a, uint8x4_t __b) {
  415.   return __builtin_arm_uqadd8(__a, __b);
  416. }
  417. static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
  418. __uqsub8(uint8x4_t __a, uint8x4_t __b) {
  419.   return __builtin_arm_uqsub8(__a, __b);
  420. }
  421. static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
  422. __usub8(uint8x4_t __a, uint8x4_t __b) {
  423.   return __builtin_arm_usub8(__a, __b);
  424. }
  425. #endif
  426.  
  427. /* 9.5.8 Sum of 8-bit absolute differences */
  428. #if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
  429. static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
  430. __usad8(uint8x4_t __a, uint8x4_t __b) {
  431.   return __builtin_arm_usad8(__a, __b);
  432. }
  433. static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
  434. __usada8(uint8x4_t __a, uint8x4_t __b, uint32_t __c) {
  435.   return __builtin_arm_usada8(__a, __b, __c);
  436. }
  437. #endif
  438.  
  439. /* 9.5.9 Parallel 16-bit addition and subtraction */
  440. #if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
  441. static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
  442. __qadd16(int16x2_t __a, int16x2_t __b) {
  443.   return __builtin_arm_qadd16(__a, __b);
  444. }
  445. static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
  446. __qasx(int16x2_t __a, int16x2_t __b) {
  447.   return __builtin_arm_qasx(__a, __b);
  448. }
  449. static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
  450. __qsax(int16x2_t __a, int16x2_t __b) {
  451.   return __builtin_arm_qsax(__a, __b);
  452. }
  453. static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
  454. __qsub16(int16x2_t __a, int16x2_t __b) {
  455.   return __builtin_arm_qsub16(__a, __b);
  456. }
  457. static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
  458. __sadd16(int16x2_t __a, int16x2_t __b) {
  459.   return __builtin_arm_sadd16(__a, __b);
  460. }
  461. static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
  462. __sasx(int16x2_t __a, int16x2_t __b) {
  463.   return __builtin_arm_sasx(__a, __b);
  464. }
  465. static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
  466. __shadd16(int16x2_t __a, int16x2_t __b) {
  467.   return __builtin_arm_shadd16(__a, __b);
  468. }
  469. static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
  470. __shasx(int16x2_t __a, int16x2_t __b) {
  471.   return __builtin_arm_shasx(__a, __b);
  472. }
  473. static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
  474. __shsax(int16x2_t __a, int16x2_t __b) {
  475.   return __builtin_arm_shsax(__a, __b);
  476. }
  477. static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
  478. __shsub16(int16x2_t __a, int16x2_t __b) {
  479.   return __builtin_arm_shsub16(__a, __b);
  480. }
  481. static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
  482. __ssax(int16x2_t __a, int16x2_t __b) {
  483.   return __builtin_arm_ssax(__a, __b);
  484. }
  485. static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
  486. __ssub16(int16x2_t __a, int16x2_t __b) {
  487.   return __builtin_arm_ssub16(__a, __b);
  488. }
  489. static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
  490. __uadd16(uint16x2_t __a, uint16x2_t __b) {
  491.   return __builtin_arm_uadd16(__a, __b);
  492. }
  493. static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
  494. __uasx(uint16x2_t __a, uint16x2_t __b) {
  495.   return __builtin_arm_uasx(__a, __b);
  496. }
  497. static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
  498. __uhadd16(uint16x2_t __a, uint16x2_t __b) {
  499.   return __builtin_arm_uhadd16(__a, __b);
  500. }
  501. static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
  502. __uhasx(uint16x2_t __a, uint16x2_t __b) {
  503.   return __builtin_arm_uhasx(__a, __b);
  504. }
  505. static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
  506. __uhsax(uint16x2_t __a, uint16x2_t __b) {
  507.   return __builtin_arm_uhsax(__a, __b);
  508. }
  509. static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
  510. __uhsub16(uint16x2_t __a, uint16x2_t __b) {
  511.   return __builtin_arm_uhsub16(__a, __b);
  512. }
  513. static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
  514. __uqadd16(uint16x2_t __a, uint16x2_t __b) {
  515.   return __builtin_arm_uqadd16(__a, __b);
  516. }
  517. static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
  518. __uqasx(uint16x2_t __a, uint16x2_t __b) {
  519.   return __builtin_arm_uqasx(__a, __b);
  520. }
  521. static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
  522. __uqsax(uint16x2_t __a, uint16x2_t __b) {
  523.   return __builtin_arm_uqsax(__a, __b);
  524. }
  525. static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
  526. __uqsub16(uint16x2_t __a, uint16x2_t __b) {
  527.   return __builtin_arm_uqsub16(__a, __b);
  528. }
  529. static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
  530. __usax(uint16x2_t __a, uint16x2_t __b) {
  531.   return __builtin_arm_usax(__a, __b);
  532. }
  533. static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
  534. __usub16(uint16x2_t __a, uint16x2_t __b) {
  535.   return __builtin_arm_usub16(__a, __b);
  536. }
  537. #endif
  538.  
  539. /* 9.5.10 Parallel 16-bit multiplications */
  540. #if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
  541. static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
  542. __smlad(int16x2_t __a, int16x2_t __b, int32_t __c) {
  543.   return __builtin_arm_smlad(__a, __b, __c);
  544. }
  545. static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
  546. __smladx(int16x2_t __a, int16x2_t __b, int32_t __c) {
  547.   return __builtin_arm_smladx(__a, __b, __c);
  548. }
  549. static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
  550. __smlald(int16x2_t __a, int16x2_t __b, int64_t __c) {
  551.   return __builtin_arm_smlald(__a, __b, __c);
  552. }
  553. static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
  554. __smlaldx(int16x2_t __a, int16x2_t __b, int64_t __c) {
  555.   return __builtin_arm_smlaldx(__a, __b, __c);
  556. }
  557. static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
  558. __smlsd(int16x2_t __a, int16x2_t __b, int32_t __c) {
  559.   return __builtin_arm_smlsd(__a, __b, __c);
  560. }
  561. static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
  562. __smlsdx(int16x2_t __a, int16x2_t __b, int32_t __c) {
  563.   return __builtin_arm_smlsdx(__a, __b, __c);
  564. }
  565. static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
  566. __smlsld(int16x2_t __a, int16x2_t __b, int64_t __c) {
  567.   return __builtin_arm_smlsld(__a, __b, __c);
  568. }
  569. static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
  570. __smlsldx(int16x2_t __a, int16x2_t __b, int64_t __c) {
  571.   return __builtin_arm_smlsldx(__a, __b, __c);
  572. }
  573. static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
  574. __smuad(int16x2_t __a, int16x2_t __b) {
  575.   return __builtin_arm_smuad(__a, __b);
  576. }
  577. static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
  578. __smuadx(int16x2_t __a, int16x2_t __b) {
  579.   return __builtin_arm_smuadx(__a, __b);
  580. }
  581. static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
  582. __smusd(int16x2_t __a, int16x2_t __b) {
  583.   return __builtin_arm_smusd(__a, __b);
  584. }
  585. static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
  586. __smusdx(int16x2_t __a, int16x2_t __b) {
  587.   return __builtin_arm_smusdx(__a, __b);
  588. }
  589. #endif
  590.  
  591. /* 9.7 CRC32 intrinsics */
  592. #if (defined(__ARM_FEATURE_CRC32) && __ARM_FEATURE_CRC32) ||                   \
  593.     (defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE)
  594. static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
  595. __crc32b(uint32_t __a, uint8_t __b) {
  596.   return __builtin_arm_crc32b(__a, __b);
  597. }
  598.  
  599. static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
  600. __crc32h(uint32_t __a, uint16_t __b) {
  601.   return __builtin_arm_crc32h(__a, __b);
  602. }
  603.  
  604. static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
  605. __crc32w(uint32_t __a, uint32_t __b) {
  606.   return __builtin_arm_crc32w(__a, __b);
  607. }
  608.  
  609. static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
  610. __crc32d(uint32_t __a, uint64_t __b) {
  611.   return __builtin_arm_crc32d(__a, __b);
  612. }
  613.  
  614. static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
  615. __crc32cb(uint32_t __a, uint8_t __b) {
  616.   return __builtin_arm_crc32cb(__a, __b);
  617. }
  618.  
  619. static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
  620. __crc32ch(uint32_t __a, uint16_t __b) {
  621.   return __builtin_arm_crc32ch(__a, __b);
  622. }
  623.  
  624. static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
  625. __crc32cw(uint32_t __a, uint32_t __b) {
  626.   return __builtin_arm_crc32cw(__a, __b);
  627. }
  628.  
  629. static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
  630. __crc32cd(uint32_t __a, uint64_t __b) {
  631.   return __builtin_arm_crc32cd(__a, __b);
  632. }
  633. #endif
  634.  
  635. /* Armv8.3-A Javascript conversion intrinsic */
  636. #if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
  637. static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("v8.3a")))
  638. __jcvt(double __a) {
  639.   return __builtin_arm_jcvt(__a);
  640. }
  641. #endif
  642.  
  643. /* Armv8.5-A FP rounding intrinsics */
  644. #if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
  645. static __inline__ float __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
  646. __rint32zf(float __a) {
  647.   return __builtin_arm_rint32zf(__a);
  648. }
  649.  
  650. static __inline__ double __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
  651. __rint32z(double __a) {
  652.   return __builtin_arm_rint32z(__a);
  653. }
  654.  
  655. static __inline__ float __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
  656. __rint64zf(float __a) {
  657.   return __builtin_arm_rint64zf(__a);
  658. }
  659.  
  660. static __inline__ double __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
  661. __rint64z(double __a) {
  662.   return __builtin_arm_rint64z(__a);
  663. }
  664.  
  665. static __inline__ float __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
  666. __rint32xf(float __a) {
  667.   return __builtin_arm_rint32xf(__a);
  668. }
  669.  
  670. static __inline__ double __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
  671. __rint32x(double __a) {
  672.   return __builtin_arm_rint32x(__a);
  673. }
  674.  
  675. static __inline__ float __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
  676. __rint64xf(float __a) {
  677.   return __builtin_arm_rint64xf(__a);
  678. }
  679.  
  680. static __inline__ double __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
  681. __rint64x(double __a) {
  682.   return __builtin_arm_rint64x(__a);
  683. }
  684. #endif
  685.  
  686. /* Armv8.7-A load/store 64-byte intrinsics */
  687. #if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
  688. typedef struct {
  689.     uint64_t val[8];
  690. } data512_t;
  691.  
  692. static __inline__ data512_t __attribute__((__always_inline__, __nodebug__, target("ls64")))
  693. __arm_ld64b(const void *__addr) {
  694.   data512_t __value;
  695.   __builtin_arm_ld64b(__addr, __value.val);
  696.   return __value;
  697. }
  698. static __inline__ void __attribute__((__always_inline__, __nodebug__, target("ls64")))
  699. __arm_st64b(void *__addr, data512_t __value) {
  700.   __builtin_arm_st64b(__addr, __value.val);
  701. }
  702. static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__, target("ls64")))
  703. __arm_st64bv(void *__addr, data512_t __value) {
  704.   return __builtin_arm_st64bv(__addr, __value.val);
  705. }
  706. static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__, target("ls64")))
  707. __arm_st64bv0(void *__addr, data512_t __value) {
  708.   return __builtin_arm_st64bv0(__addr, __value.val);
  709. }
  710. #endif
  711.  
  712. /* 10.1 Special register intrinsics */
  713. #define __arm_rsr(sysreg) __builtin_arm_rsr(sysreg)
  714. #define __arm_rsr64(sysreg) __builtin_arm_rsr64(sysreg)
  715. #define __arm_rsr128(sysreg) __builtin_arm_rsr128(sysreg)
  716. #define __arm_rsrp(sysreg) __builtin_arm_rsrp(sysreg)
  717. #define __arm_rsrf(sysreg) __builtin_bit_cast(float, __arm_rsr(sysreg))
  718. #define __arm_rsrf64(sysreg) __builtin_bit_cast(double, __arm_rsr64(sysreg))
  719. #define __arm_wsr(sysreg, v) __builtin_arm_wsr(sysreg, v)
  720. #define __arm_wsr64(sysreg, v) __builtin_arm_wsr64(sysreg, v)
  721. #define __arm_wsr128(sysreg, v) __builtin_arm_wsr128(sysreg, v)
  722. #define __arm_wsrp(sysreg, v) __builtin_arm_wsrp(sysreg, v)
  723. #define __arm_wsrf(sysreg, v) __arm_wsr(sysreg, __builtin_bit_cast(uint32_t, v))
  724. #define __arm_wsrf64(sysreg, v) __arm_wsr64(sysreg, __builtin_bit_cast(uint64_t, v))
  725.  
  726. /* Memory Tagging Extensions (MTE) Intrinsics */
  727. #if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
  728. #define __arm_mte_create_random_tag(__ptr, __mask)  __builtin_arm_irg(__ptr, __mask)
  729. #define __arm_mte_increment_tag(__ptr, __tag_offset)  __builtin_arm_addg(__ptr, __tag_offset)
  730. #define __arm_mte_exclude_tag(__ptr, __excluded)  __builtin_arm_gmi(__ptr, __excluded)
  731. #define __arm_mte_get_tag(__ptr) __builtin_arm_ldg(__ptr)
  732. #define __arm_mte_set_tag(__ptr) __builtin_arm_stg(__ptr)
  733. #define __arm_mte_ptrdiff(__ptra, __ptrb) __builtin_arm_subp(__ptra, __ptrb)
  734.  
  735. /* Memory Operations Intrinsics */
  736. #define __arm_mops_memset_tag(__tagged_address, __value, __size)    \
  737.   __builtin_arm_mops_memset_tag(__tagged_address, __value, __size)
  738. #endif
  739.  
  740. /* Transactional Memory Extension (TME) Intrinsics */
  741. #if defined(__ARM_FEATURE_TME) && __ARM_FEATURE_TME
  742.  
  743. #define _TMFAILURE_REASON  0x00007fffu
  744. #define _TMFAILURE_RTRY    0x00008000u
  745. #define _TMFAILURE_CNCL    0x00010000u
  746. #define _TMFAILURE_MEM     0x00020000u
  747. #define _TMFAILURE_IMP     0x00040000u
  748. #define _TMFAILURE_ERR     0x00080000u
  749. #define _TMFAILURE_SIZE    0x00100000u
  750. #define _TMFAILURE_NEST    0x00200000u
  751. #define _TMFAILURE_DBG     0x00400000u
  752. #define _TMFAILURE_INT     0x00800000u
  753. #define _TMFAILURE_TRIVIAL 0x01000000u
  754.  
  755. #define __tstart()        __builtin_arm_tstart()
  756. #define __tcommit()       __builtin_arm_tcommit()
  757. #define __tcancel(__arg)  __builtin_arm_tcancel(__arg)
  758. #define __ttest()         __builtin_arm_ttest()
  759.  
  760. #endif /* __ARM_FEATURE_TME */
  761.  
  762. /* Armv8.5-A Random number generation intrinsics */
  763. #if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
  764. static __inline__ int __attribute__((__always_inline__, __nodebug__, target("rand")))
  765. __rndr(uint64_t *__p) {
  766.   return __builtin_arm_rndr(__p);
  767. }
  768. static __inline__ int __attribute__((__always_inline__, __nodebug__, target("rand")))
  769. __rndrrs(uint64_t *__p) {
  770.   return __builtin_arm_rndrrs(__p);
  771. }
  772. #endif
  773.  
  774. #if defined(__cplusplus)
  775. }
  776. #endif
  777.  
  778. #endif /* __ARM_ACLE_H */
  779.