arm_acle.h 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626
  1. /*===---- arm_acle.h - ARM Non-Neon intrinsics -----------------------------===
  2. *
  3. * Permission is hereby granted, free of charge, to any person obtaining a copy
  4. * of this software and associated documentation files (the "Software"), to deal
  5. * in the Software without restriction, including without limitation the rights
  6. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  7. * copies of the Software, and to permit persons to whom the Software is
  8. * furnished to do so, subject to the following conditions:
  9. *
  10. * The above copyright notice and this permission notice shall be included in
  11. * all copies or substantial portions of the Software.
  12. *
  13. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  16. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  17. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  18. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  19. * THE SOFTWARE.
  20. *
  21. *===-----------------------------------------------------------------------===
  22. */
  23. #ifndef __ARM_ACLE_H
  24. #define __ARM_ACLE_H
  25. #ifndef __ARM_ACLE
  26. #error "ACLE intrinsics support not enabled."
  27. #endif
  28. #include <stdint.h>
  29. #if defined(__cplusplus)
  30. extern "C" {
  31. #endif
  32. /* 8 SYNCHRONIZATION, BARRIER AND HINT INTRINSICS */
  33. /* 8.3 Memory barriers */
  34. #if !defined(_MSC_VER)
  35. #define __dmb(i) __builtin_arm_dmb(i)
  36. #define __dsb(i) __builtin_arm_dsb(i)
  37. #define __isb(i) __builtin_arm_isb(i)
  38. #endif
  39. /* 8.4 Hints */
  40. #if !defined(_MSC_VER)
  41. static __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfi(void) {
  42. __builtin_arm_wfi();
  43. }
  44. static __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfe(void) {
  45. __builtin_arm_wfe();
  46. }
  47. static __inline__ void __attribute__((__always_inline__, __nodebug__)) __sev(void) {
  48. __builtin_arm_sev();
  49. }
  50. static __inline__ void __attribute__((__always_inline__, __nodebug__)) __sevl(void) {
  51. __builtin_arm_sevl();
  52. }
  53. static __inline__ void __attribute__((__always_inline__, __nodebug__)) __yield(void) {
  54. __builtin_arm_yield();
  55. }
  56. #endif
  57. #if __ARM_32BIT_STATE
  58. #define __dbg(t) __builtin_arm_dbg(t)
  59. #endif
  60. /* 8.5 Swap */
  61. static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
  62. __swp(uint32_t __x, volatile uint32_t *__p) {
  63. uint32_t v;
  64. do
  65. v = __builtin_arm_ldrex(__p);
  66. while (__builtin_arm_strex(__x, __p));
  67. return v;
  68. }
  69. /* 8.6 Memory prefetch intrinsics */
  70. /* 8.6.1 Data prefetch */
  71. #define __pld(addr) __pldx(0, 0, 0, addr)
  72. #if __ARM_32BIT_STATE
  73. #define __pldx(access_kind, cache_level, retention_policy, addr) \
  74. __builtin_arm_prefetch(addr, access_kind, 1)
  75. #else
  76. #define __pldx(access_kind, cache_level, retention_policy, addr) \
  77. __builtin_arm_prefetch(addr, access_kind, cache_level, retention_policy, 1)
  78. #endif
  79. /* 8.6.2 Instruction prefetch */
  80. #define __pli(addr) __plix(0, 0, addr)
  81. #if __ARM_32BIT_STATE
  82. #define __plix(cache_level, retention_policy, addr) \
  83. __builtin_arm_prefetch(addr, 0, 0)
  84. #else
  85. #define __plix(cache_level, retention_policy, addr) \
  86. __builtin_arm_prefetch(addr, 0, cache_level, retention_policy, 0)
  87. #endif
  88. /* 8.7 NOP */
  89. static __inline__ void __attribute__((__always_inline__, __nodebug__)) __nop(void) {
  90. __builtin_arm_nop();
  91. }
  92. /* 9 DATA-PROCESSING INTRINSICS */
  93. /* 9.2 Miscellaneous data-processing intrinsics */
  94. /* ROR */
  95. static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
  96. __ror(uint32_t __x, uint32_t __y) {
  97. __y %= 32;
  98. if (__y == 0)
  99. return __x;
  100. return (__x >> __y) | (__x << (32 - __y));
  101. }
  102. static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
  103. __rorll(uint64_t __x, uint32_t __y) {
  104. __y %= 64;
  105. if (__y == 0)
  106. return __x;
  107. return (__x >> __y) | (__x << (64 - __y));
  108. }
  109. static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
  110. __rorl(unsigned long __x, uint32_t __y) {
  111. #if __SIZEOF_LONG__ == 4
  112. return __ror(__x, __y);
  113. #else
  114. return __rorll(__x, __y);
  115. #endif
  116. }
  117. /* CLZ */
  118. static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
  119. __clz(uint32_t __t) {
  120. return __builtin_clz(__t);
  121. }
  122. static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
  123. __clzl(unsigned long __t) {
  124. return __builtin_clzl(__t);
  125. }
  126. static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
  127. __clzll(uint64_t __t) {
  128. return __builtin_clzll(__t);
  129. }
  130. /* REV */
  131. static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
  132. __rev(uint32_t __t) {
  133. return __builtin_bswap32(__t);
  134. }
  135. static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
  136. __revl(unsigned long __t) {
  137. #if __SIZEOF_LONG__ == 4
  138. return __builtin_bswap32(__t);
  139. #else
  140. return __builtin_bswap64(__t);
  141. #endif
  142. }
  143. static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
  144. __revll(uint64_t __t) {
  145. return __builtin_bswap64(__t);
  146. }
  147. /* REV16 */
  148. static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
  149. __rev16(uint32_t __t) {
  150. return __ror(__rev(__t), 16);
  151. }
  152. static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
  153. __rev16ll(uint64_t __t) {
  154. return (((uint64_t)__rev16(__t >> 32)) << 32) | __rev16(__t);
  155. }
  156. static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
  157. __rev16l(unsigned long __t) {
  158. #if __SIZEOF_LONG__ == 4
  159. return __rev16(__t);
  160. #else
  161. return __rev16ll(__t);
  162. #endif
  163. }
  164. /* REVSH */
  165. static __inline__ int16_t __attribute__((__always_inline__, __nodebug__))
  166. __revsh(int16_t __t) {
  167. return __builtin_bswap16(__t);
  168. }
  169. /* RBIT */
  170. static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
  171. __rbit(uint32_t __t) {
  172. return __builtin_arm_rbit(__t);
  173. }
  174. static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
  175. __rbitll(uint64_t __t) {
  176. #if __ARM_32BIT_STATE
  177. return (((uint64_t)__builtin_arm_rbit(__t)) << 32) |
  178. __builtin_arm_rbit(__t >> 32);
  179. #else
  180. return __builtin_arm_rbit64(__t);
  181. #endif
  182. }
  183. static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
  184. __rbitl(unsigned long __t) {
  185. #if __SIZEOF_LONG__ == 4
  186. return __rbit(__t);
  187. #else
  188. return __rbitll(__t);
  189. #endif
  190. }
  191. /*
  192. * 9.3 16-bit multiplications
  193. */
  194. #if __ARM_FEATURE_DSP
  195. static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
  196. __smulbb(int32_t __a, int32_t __b) {
  197. return __builtin_arm_smulbb(__a, __b);
  198. }
  199. static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
  200. __smulbt(int32_t __a, int32_t __b) {
  201. return __builtin_arm_smulbt(__a, __b);
  202. }
  203. static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
  204. __smultb(int32_t __a, int32_t __b) {
  205. return __builtin_arm_smultb(__a, __b);
  206. }
  207. static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
  208. __smultt(int32_t __a, int32_t __b) {
  209. return __builtin_arm_smultt(__a, __b);
  210. }
  211. static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
  212. __smulwb(int32_t __a, int32_t __b) {
  213. return __builtin_arm_smulwb(__a, __b);
  214. }
  215. static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))
  216. __smulwt(int32_t __a, int32_t __b) {
  217. return __builtin_arm_smulwt(__a, __b);
  218. }
  219. #endif
  220. /*
  221. * 9.4 Saturating intrinsics
  222. *
  223. * FIXME: Change guard to their corrosponding __ARM_FEATURE flag when Q flag
  224. * intrinsics are implemented and the flag is enabled.
  225. */
  226. /* 9.4.1 Width-specified saturation intrinsics */
  227. #if __ARM_FEATURE_SAT
  228. #define __ssat(x, y) __builtin_arm_ssat(x, y)
  229. #define __usat(x, y) __builtin_arm_usat(x, y)
  230. #endif
  231. /* 9.4.2 Saturating addition and subtraction intrinsics */
  232. #if __ARM_FEATURE_DSP
  233. static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
  234. __qadd(int32_t __t, int32_t __v) {
  235. return __builtin_arm_qadd(__t, __v);
  236. }
  237. static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
  238. __qsub(int32_t __t, int32_t __v) {
  239. return __builtin_arm_qsub(__t, __v);
  240. }
  241. static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
  242. __qdbl(int32_t __t) {
  243. return __builtin_arm_qadd(__t, __t);
  244. }
  245. #endif
  246. /* 9.4.3 Accumultating multiplications */
  247. #if __ARM_FEATURE_DSP
  248. static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
  249. __smlabb(int32_t __a, int32_t __b, int32_t __c) {
  250. return __builtin_arm_smlabb(__a, __b, __c);
  251. }
  252. static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
  253. __smlabt(int32_t __a, int32_t __b, int32_t __c) {
  254. return __builtin_arm_smlabt(__a, __b, __c);
  255. }
  256. static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
  257. __smlatb(int32_t __a, int32_t __b, int32_t __c) {
  258. return __builtin_arm_smlatb(__a, __b, __c);
  259. }
  260. static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
  261. __smlatt(int32_t __a, int32_t __b, int32_t __c) {
  262. return __builtin_arm_smlatt(__a, __b, __c);
  263. }
  264. static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
  265. __smlawb(int32_t __a, int32_t __b, int32_t __c) {
  266. return __builtin_arm_smlawb(__a, __b, __c);
  267. }
  268. static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
  269. __smlawt(int32_t __a, int32_t __b, int32_t __c) {
  270. return __builtin_arm_smlawt(__a, __b, __c);
  271. }
  272. #endif
  273. /* 9.5.4 Parallel 16-bit saturation */
  274. #if __ARM_FEATURE_SIMD32
  275. #define __ssat16(x, y) __builtin_arm_ssat16(x, y)
  276. #define __usat16(x, y) __builtin_arm_usat16(x, y)
  277. #endif
  278. /* 9.5.5 Packing and unpacking */
  279. #if __ARM_FEATURE_SIMD32
  280. typedef int32_t int8x4_t;
  281. typedef int32_t int16x2_t;
  282. typedef uint32_t uint8x4_t;
  283. typedef uint32_t uint16x2_t;
  284. static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
  285. __sxtab16(int16x2_t __a, int8x4_t __b) {
  286. return __builtin_arm_sxtab16(__a, __b);
  287. }
  288. static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
  289. __sxtb16(int8x4_t __a) {
  290. return __builtin_arm_sxtb16(__a);
  291. }
  292. static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
  293. __uxtab16(int16x2_t __a, int8x4_t __b) {
  294. return __builtin_arm_uxtab16(__a, __b);
  295. }
  296. static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
  297. __uxtb16(int8x4_t __a) {
  298. return __builtin_arm_uxtb16(__a);
  299. }
  300. #endif
  301. /* 9.5.6 Parallel selection */
  302. #if __ARM_FEATURE_SIMD32
  303. static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
  304. __sel(uint8x4_t __a, uint8x4_t __b) {
  305. return __builtin_arm_sel(__a, __b);
  306. }
  307. #endif
  308. /* 9.5.7 Parallel 8-bit addition and subtraction */
  309. #if __ARM_FEATURE_SIMD32
  310. static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
  311. __qadd8(int8x4_t __a, int8x4_t __b) {
  312. return __builtin_arm_qadd8(__a, __b);
  313. }
  314. static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
  315. __qsub8(int8x4_t __a, int8x4_t __b) {
  316. return __builtin_arm_qsub8(__a, __b);
  317. }
  318. static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
  319. __sadd8(int8x4_t __a, int8x4_t __b) {
  320. return __builtin_arm_sadd8(__a, __b);
  321. }
  322. static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
  323. __shadd8(int8x4_t __a, int8x4_t __b) {
  324. return __builtin_arm_shadd8(__a, __b);
  325. }
  326. static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
  327. __shsub8(int8x4_t __a, int8x4_t __b) {
  328. return __builtin_arm_shsub8(__a, __b);
  329. }
  330. static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
  331. __ssub8(int8x4_t __a, int8x4_t __b) {
  332. return __builtin_arm_ssub8(__a, __b);
  333. }
  334. static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
  335. __uadd8(uint8x4_t __a, uint8x4_t __b) {
  336. return __builtin_arm_uadd8(__a, __b);
  337. }
  338. static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
  339. __uhadd8(uint8x4_t __a, uint8x4_t __b) {
  340. return __builtin_arm_uhadd8(__a, __b);
  341. }
  342. static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
  343. __uhsub8(uint8x4_t __a, uint8x4_t __b) {
  344. return __builtin_arm_uhsub8(__a, __b);
  345. }
  346. static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
  347. __uqadd8(uint8x4_t __a, uint8x4_t __b) {
  348. return __builtin_arm_uqadd8(__a, __b);
  349. }
  350. static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
  351. __uqsub8(uint8x4_t __a, uint8x4_t __b) {
  352. return __builtin_arm_uqsub8(__a, __b);
  353. }
  354. static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
  355. __usub8(uint8x4_t __a, uint8x4_t __b) {
  356. return __builtin_arm_usub8(__a, __b);
  357. }
  358. #endif
  359. /* 9.5.8 Sum of 8-bit absolute differences */
  360. #if __ARM_FEATURE_SIMD32
  361. static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
  362. __usad8(uint8x4_t __a, uint8x4_t __b) {
  363. return __builtin_arm_usad8(__a, __b);
  364. }
  365. static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
  366. __usada8(uint8x4_t __a, uint8x4_t __b, uint32_t __c) {
  367. return __builtin_arm_usada8(__a, __b, __c);
  368. }
  369. #endif
  370. /* 9.5.9 Parallel 16-bit addition and subtraction */
  371. #if __ARM_FEATURE_SIMD32
  372. static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
  373. __qadd16(int16x2_t __a, int16x2_t __b) {
  374. return __builtin_arm_qadd16(__a, __b);
  375. }
  376. static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
  377. __qasx(int16x2_t __a, int16x2_t __b) {
  378. return __builtin_arm_qasx(__a, __b);
  379. }
  380. static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
  381. __qsax(int16x2_t __a, int16x2_t __b) {
  382. return __builtin_arm_qsax(__a, __b);
  383. }
  384. static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
  385. __qsub16(int16x2_t __a, int16x2_t __b) {
  386. return __builtin_arm_qsub16(__a, __b);
  387. }
  388. static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
  389. __sadd16(int16x2_t __a, int16x2_t __b) {
  390. return __builtin_arm_sadd16(__a, __b);
  391. }
  392. static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
  393. __sasx(int16x2_t __a, int16x2_t __b) {
  394. return __builtin_arm_sasx(__a, __b);
  395. }
  396. static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
  397. __shadd16(int16x2_t __a, int16x2_t __b) {
  398. return __builtin_arm_shadd16(__a, __b);
  399. }
  400. static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
  401. __shasx(int16x2_t __a, int16x2_t __b) {
  402. return __builtin_arm_shasx(__a, __b);
  403. }
  404. static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
  405. __shsax(int16x2_t __a, int16x2_t __b) {
  406. return __builtin_arm_shsax(__a, __b);
  407. }
  408. static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
  409. __shsub16(int16x2_t __a, int16x2_t __b) {
  410. return __builtin_arm_shsub16(__a, __b);
  411. }
  412. static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
  413. __ssax(int16x2_t __a, int16x2_t __b) {
  414. return __builtin_arm_ssax(__a, __b);
  415. }
  416. static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
  417. __ssub16(int16x2_t __a, int16x2_t __b) {
  418. return __builtin_arm_ssub16(__a, __b);
  419. }
  420. static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
  421. __uadd16(uint16x2_t __a, uint16x2_t __b) {
  422. return __builtin_arm_uadd16(__a, __b);
  423. }
  424. static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
  425. __uasx(uint16x2_t __a, uint16x2_t __b) {
  426. return __builtin_arm_uasx(__a, __b);
  427. }
  428. static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
  429. __uhadd16(uint16x2_t __a, uint16x2_t __b) {
  430. return __builtin_arm_uhadd16(__a, __b);
  431. }
  432. static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
  433. __uhasx(uint16x2_t __a, uint16x2_t __b) {
  434. return __builtin_arm_uhasx(__a, __b);
  435. }
  436. static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
  437. __uhsax(uint16x2_t __a, uint16x2_t __b) {
  438. return __builtin_arm_uhsax(__a, __b);
  439. }
  440. static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
  441. __uhsub16(uint16x2_t __a, uint16x2_t __b) {
  442. return __builtin_arm_uhsub16(__a, __b);
  443. }
  444. static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
  445. __uqadd16(uint16x2_t __a, uint16x2_t __b) {
  446. return __builtin_arm_uqadd16(__a, __b);
  447. }
  448. static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
  449. __uqasx(uint16x2_t __a, uint16x2_t __b) {
  450. return __builtin_arm_uqasx(__a, __b);
  451. }
  452. static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
  453. __uqsax(uint16x2_t __a, uint16x2_t __b) {
  454. return __builtin_arm_uqsax(__a, __b);
  455. }
  456. static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
  457. __uqsub16(uint16x2_t __a, uint16x2_t __b) {
  458. return __builtin_arm_uqsub16(__a, __b);
  459. }
  460. static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
  461. __usax(uint16x2_t __a, uint16x2_t __b) {
  462. return __builtin_arm_usax(__a, __b);
  463. }
  464. static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
  465. __usub16(uint16x2_t __a, uint16x2_t __b) {
  466. return __builtin_arm_usub16(__a, __b);
  467. }
  468. #endif
  469. /* 9.5.10 Parallel 16-bit multiplications */
  470. #if __ARM_FEATURE_SIMD32
  471. static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
  472. __smlad(int16x2_t __a, int16x2_t __b, int32_t __c) {
  473. return __builtin_arm_smlad(__a, __b, __c);
  474. }
  475. static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
  476. __smladx(int16x2_t __a, int16x2_t __b, int32_t __c) {
  477. return __builtin_arm_smladx(__a, __b, __c);
  478. }
  479. static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
  480. __smlald(int16x2_t __a, int16x2_t __b, int64_t __c) {
  481. return __builtin_arm_smlald(__a, __b, __c);
  482. }
  483. static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
  484. __smlaldx(int16x2_t __a, int16x2_t __b, int64_t __c) {
  485. return __builtin_arm_smlaldx(__a, __b, __c);
  486. }
  487. static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
  488. __smlsd(int16x2_t __a, int16x2_t __b, int32_t __c) {
  489. return __builtin_arm_smlsd(__a, __b, __c);
  490. }
  491. static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
  492. __smlsdx(int16x2_t __a, int16x2_t __b, int32_t __c) {
  493. return __builtin_arm_smlsdx(__a, __b, __c);
  494. }
  495. static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
  496. __smlsld(int16x2_t __a, int16x2_t __b, int64_t __c) {
  497. return __builtin_arm_smlsld(__a, __b, __c);
  498. }
  499. static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
  500. __smlsldx(int16x2_t __a, int16x2_t __b, int64_t __c) {
  501. return __builtin_arm_smlsldx(__a, __b, __c);
  502. }
  503. static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
  504. __smuad(int16x2_t __a, int16x2_t __b) {
  505. return __builtin_arm_smuad(__a, __b);
  506. }
  507. static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
  508. __smuadx(int16x2_t __a, int16x2_t __b) {
  509. return __builtin_arm_smuadx(__a, __b);
  510. }
  511. static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
  512. __smusd(int16x2_t __a, int16x2_t __b) {
  513. return __builtin_arm_smusd(__a, __b);
  514. }
  515. static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
  516. __smusdx(int16x2_t __a, int16x2_t __b) {
  517. return __builtin_arm_smusdx(__a, __b);
  518. }
  519. #endif
  520. /* 9.7 CRC32 intrinsics */
  521. #if __ARM_FEATURE_CRC32
  522. static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
  523. __crc32b(uint32_t __a, uint8_t __b) {
  524. return __builtin_arm_crc32b(__a, __b);
  525. }
  526. static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
  527. __crc32h(uint32_t __a, uint16_t __b) {
  528. return __builtin_arm_crc32h(__a, __b);
  529. }
  530. static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
  531. __crc32w(uint32_t __a, uint32_t __b) {
  532. return __builtin_arm_crc32w(__a, __b);
  533. }
  534. static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
  535. __crc32d(uint32_t __a, uint64_t __b) {
  536. return __builtin_arm_crc32d(__a, __b);
  537. }
  538. static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
  539. __crc32cb(uint32_t __a, uint8_t __b) {
  540. return __builtin_arm_crc32cb(__a, __b);
  541. }
  542. static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
  543. __crc32ch(uint32_t __a, uint16_t __b) {
  544. return __builtin_arm_crc32ch(__a, __b);
  545. }
  546. static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
  547. __crc32cw(uint32_t __a, uint32_t __b) {
  548. return __builtin_arm_crc32cw(__a, __b);
  549. }
  550. static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
  551. __crc32cd(uint32_t __a, uint64_t __b) {
  552. return __builtin_arm_crc32cd(__a, __b);
  553. }
  554. #endif
  555. /* 10.1 Special register intrinsics */
  556. #define __arm_rsr(sysreg) __builtin_arm_rsr(sysreg)
  557. #define __arm_rsr64(sysreg) __builtin_arm_rsr64(sysreg)
  558. #define __arm_rsrp(sysreg) __builtin_arm_rsrp(sysreg)
  559. #define __arm_wsr(sysreg, v) __builtin_arm_wsr(sysreg, v)
  560. #define __arm_wsr64(sysreg, v) __builtin_arm_wsr64(sysreg, v)
  561. #define __arm_wsrp(sysreg, v) __builtin_arm_wsrp(sysreg, v)
  562. #if defined(__cplusplus)
  563. }
  564. #endif
  565. #endif /* __ARM_ACLE_H */