mmintrin.h 57 KB


  1. /*===---- mmintrin.h - MMX intrinsics --------------------------------------===
  2. *
  3. * Permission is hereby granted, free of charge, to any person obtaining a copy
  4. * of this software and associated documentation files (the "Software"), to deal
  5. * in the Software without restriction, including without limitation the rights
  6. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  7. * copies of the Software, and to permit persons to whom the Software is
  8. * furnished to do so, subject to the following conditions:
  9. *
  10. * The above copyright notice and this permission notice shall be included in
  11. * all copies or substantial portions of the Software.
  12. *
  13. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  16. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  17. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  18. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  19. * THE SOFTWARE.
  20. *
  21. *===-----------------------------------------------------------------------===
  22. */
  23. #ifndef __MMINTRIN_H
  24. #define __MMINTRIN_H
  25. typedef long long __m64 __attribute__((__vector_size__(8)));
  26. typedef long long __v1di __attribute__((__vector_size__(8)));
  27. typedef int __v2si __attribute__((__vector_size__(8)));
  28. typedef short __v4hi __attribute__((__vector_size__(8)));
  29. typedef char __v8qi __attribute__((__vector_size__(8)));
  30. /* Define the default attributes for the functions in this file. */
  31. #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mmx")))
  32. /// \brief Clears the MMX state by setting the state of the x87 stack registers
  33. /// to empty.
  34. ///
  35. /// \headerfile <x86intrin.h>
  36. ///
  37. /// This intrinsic corresponds to the <c> EMMS </c> instruction.
  38. ///
  39. static __inline__ void __DEFAULT_FN_ATTRS
  40. _mm_empty(void)
  41. {
  42. __builtin_ia32_emms();
  43. }
  44. /// \brief Constructs a 64-bit integer vector, setting the lower 32 bits to the
  45. /// value of the 32-bit integer parameter and setting the upper 32 bits to 0.
  46. ///
  47. /// \headerfile <x86intrin.h>
  48. ///
  49. /// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
  50. ///
  51. /// \param __i
  52. /// A 32-bit integer value.
  53. /// \returns A 64-bit integer vector. The lower 32 bits contain the value of the
  54. /// parameter. The upper 32 bits are set to 0.
  55. static __inline__ __m64 __DEFAULT_FN_ATTRS
  56. _mm_cvtsi32_si64(int __i)
  57. {
  58. return (__m64)__builtin_ia32_vec_init_v2si(__i, 0);
  59. }
  60. /// \brief Returns the lower 32 bits of a 64-bit integer vector as a 32-bit
  61. /// signed integer.
  62. ///
  63. /// \headerfile <x86intrin.h>
  64. ///
  65. /// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
  66. ///
  67. /// \param __m
  68. /// A 64-bit integer vector.
  69. /// \returns A 32-bit signed integer value containing the lower 32 bits of the
  70. /// parameter.
  71. static __inline__ int __DEFAULT_FN_ATTRS
  72. _mm_cvtsi64_si32(__m64 __m)
  73. {
  74. return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0);
  75. }
  76. /// \brief Casts a 64-bit signed integer value into a 64-bit integer vector.
  77. ///
  78. /// \headerfile <x86intrin.h>
  79. ///
  80. /// This intrinsic corresponds to the <c> VMOVQ / MOVD </c> instruction.
  81. ///
  82. /// \param __i
  83. /// A 64-bit signed integer.
  84. /// \returns A 64-bit integer vector containing the same bitwise pattern as the
  85. /// parameter.
  86. static __inline__ __m64 __DEFAULT_FN_ATTRS
  87. _mm_cvtsi64_m64(long long __i)
  88. {
  89. return (__m64)__i;
  90. }
  91. /// \brief Casts a 64-bit integer vector into a 64-bit signed integer value.
  92. ///
  93. /// \headerfile <x86intrin.h>
  94. ///
  95. /// This intrinsic corresponds to the <c> VMOVQ / MOVD </c> instruction.
  96. ///
  97. /// \param __m
  98. /// A 64-bit integer vector.
  99. /// \returns A 64-bit signed integer containing the same bitwise pattern as the
  100. /// parameter.
  101. static __inline__ long long __DEFAULT_FN_ATTRS
  102. _mm_cvtm64_si64(__m64 __m)
  103. {
  104. return (long long)__m;
  105. }
  106. /// \brief Converts 16-bit signed integers from both 64-bit integer vector
  107. /// parameters of [4 x i16] into 8-bit signed integer values, and constructs
  108. /// a 64-bit integer vector of [8 x i8] as the result. Positive values
  109. /// greater than 0x7F are saturated to 0x7F. Negative values less than 0x80
  110. /// are saturated to 0x80.
  111. ///
  112. /// \headerfile <x86intrin.h>
  113. ///
  114. /// This intrinsic corresponds to the <c> PACKSSWB </c> instruction.
  115. ///
  116. /// \param __m1
  117. /// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
  118. /// 16-bit signed integer and is converted to an 8-bit signed integer with
  119. /// saturation. Positive values greater than 0x7F are saturated to 0x7F.
  120. /// Negative values less than 0x80 are saturated to 0x80. The converted
  121. /// [4 x i8] values are written to the lower 32 bits of the result.
  122. /// \param __m2
  123. /// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
  124. /// 16-bit signed integer and is converted to an 8-bit signed integer with
  125. /// saturation. Positive values greater than 0x7F are saturated to 0x7F.
  126. /// Negative values less than 0x80 are saturated to 0x80. The converted
  127. /// [4 x i8] values are written to the upper 32 bits of the result.
  128. /// \returns A 64-bit integer vector of [8 x i8] containing the converted
  129. /// values.
  130. static __inline__ __m64 __DEFAULT_FN_ATTRS
  131. _mm_packs_pi16(__m64 __m1, __m64 __m2)
  132. {
  133. return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2);
  134. }
  135. /// \brief Converts 32-bit signed integers from both 64-bit integer vector
  136. /// parameters of [2 x i32] into 16-bit signed integer values, and constructs
  137. /// a 64-bit integer vector of [4 x i16] as the result. Positive values
  138. /// greater than 0x7FFF are saturated to 0x7FFF. Negative values less than
  139. /// 0x8000 are saturated to 0x8000.
  140. ///
  141. /// \headerfile <x86intrin.h>
  142. ///
  143. /// This intrinsic corresponds to the <c> PACKSSDW </c> instruction.
  144. ///
  145. /// \param __m1
  146. /// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a
  147. /// 32-bit signed integer and is converted to a 16-bit signed integer with
  148. /// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF.
  149. /// Negative values less than 0x8000 are saturated to 0x8000. The converted
  150. /// [2 x i16] values are written to the lower 32 bits of the result.
  151. /// \param __m2
  152. /// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a
  153. /// 32-bit signed integer and is converted to a 16-bit signed integer with
  154. /// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF.
  155. /// Negative values less than 0x8000 are saturated to 0x8000. The converted
  156. /// [2 x i16] values are written to the upper 32 bits of the result.
  157. /// \returns A 64-bit integer vector of [4 x i16] containing the converted
  158. /// values.
  159. static __inline__ __m64 __DEFAULT_FN_ATTRS
  160. _mm_packs_pi32(__m64 __m1, __m64 __m2)
  161. {
  162. return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2);
  163. }
  164. /// \brief Converts 16-bit signed integers from both 64-bit integer vector
  165. /// parameters of [4 x i16] into 8-bit unsigned integer values, and
  166. /// constructs a 64-bit integer vector of [8 x i8] as the result. Values
  167. /// greater than 0xFF are saturated to 0xFF. Values less than 0 are saturated
  168. /// to 0.
  169. ///
  170. /// \headerfile <x86intrin.h>
  171. ///
  172. /// This intrinsic corresponds to the <c> PACKUSWB </c> instruction.
  173. ///
  174. /// \param __m1
  175. /// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
  176. /// 16-bit signed integer and is converted to an 8-bit unsigned integer with
  177. /// saturation. Values greater than 0xFF are saturated to 0xFF. Values less
  178. /// than 0 are saturated to 0. The converted [4 x i8] values are written to
  179. /// the lower 32 bits of the result.
  180. /// \param __m2
  181. /// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
  182. /// 16-bit signed integer and is converted to an 8-bit unsigned integer with
  183. /// saturation. Values greater than 0xFF are saturated to 0xFF. Values less
  184. /// than 0 are saturated to 0. The converted [4 x i8] values are written to
  185. /// the upper 32 bits of the result.
  186. /// \returns A 64-bit integer vector of [8 x i8] containing the converted
  187. /// values.
  188. static __inline__ __m64 __DEFAULT_FN_ATTRS
  189. _mm_packs_pu16(__m64 __m1, __m64 __m2)
  190. {
  191. return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2);
  192. }
  193. /// \brief Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8]
  194. /// and interleaves them into a 64-bit integer vector of [8 x i8].
  195. ///
  196. /// \headerfile <x86intrin.h>
  197. ///
  198. /// This intrinsic corresponds to the <c> PUNPCKHBW </c> instruction.
  199. ///
  200. /// \param __m1
  201. /// A 64-bit integer vector of [8 x i8]. \n
  202. /// Bits [39:32] are written to bits [7:0] of the result. \n
  203. /// Bits [47:40] are written to bits [23:16] of the result. \n
  204. /// Bits [55:48] are written to bits [39:32] of the result. \n
  205. /// Bits [63:56] are written to bits [55:48] of the result.
  206. /// \param __m2
  207. /// A 64-bit integer vector of [8 x i8].
  208. /// Bits [39:32] are written to bits [15:8] of the result. \n
  209. /// Bits [47:40] are written to bits [31:24] of the result. \n
  210. /// Bits [55:48] are written to bits [47:40] of the result. \n
  211. /// Bits [63:56] are written to bits [63:56] of the result.
  212. /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
  213. /// values.
  214. static __inline__ __m64 __DEFAULT_FN_ATTRS
  215. _mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
  216. {
  217. return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2);
  218. }
  219. /// \brief Unpacks the upper 32 bits from two 64-bit integer vectors of
  220. /// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
  221. ///
  222. /// \headerfile <x86intrin.h>
  223. ///
  224. /// This intrinsic corresponds to the <c> PUNPCKHWD </c> instruction.
  225. ///
  226. /// \param __m1
  227. /// A 64-bit integer vector of [4 x i16].
  228. /// Bits [47:32] are written to bits [15:0] of the result. \n
  229. /// Bits [63:48] are written to bits [47:32] of the result.
  230. /// \param __m2
  231. /// A 64-bit integer vector of [4 x i16].
  232. /// Bits [47:32] are written to bits [31:16] of the result. \n
  233. /// Bits [63:48] are written to bits [63:48] of the result.
  234. /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
  235. /// values.
  236. static __inline__ __m64 __DEFAULT_FN_ATTRS
  237. _mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
  238. {
  239. return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2);
  240. }
  241. /// \brief Unpacks the upper 32 bits from two 64-bit integer vectors of
  242. /// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].
  243. ///
  244. /// \headerfile <x86intrin.h>
  245. ///
  246. /// This intrinsic corresponds to the <c> PUNPCKHDQ </c> instruction.
  247. ///
  248. /// \param __m1
  249. /// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to
  250. /// the lower 32 bits of the result.
  251. /// \param __m2
  252. /// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to
  253. /// the upper 32 bits of the result.
  254. /// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
  255. /// values.
  256. static __inline__ __m64 __DEFAULT_FN_ATTRS
  257. _mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
  258. {
  259. return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2);
  260. }
  261. /// \brief Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8]
  262. /// and interleaves them into a 64-bit integer vector of [8 x i8].
  263. ///
  264. /// \headerfile <x86intrin.h>
  265. ///
  266. /// This intrinsic corresponds to the <c> PUNPCKLBW </c> instruction.
  267. ///
  268. /// \param __m1
  269. /// A 64-bit integer vector of [8 x i8].
  270. /// Bits [7:0] are written to bits [7:0] of the result. \n
  271. /// Bits [15:8] are written to bits [23:16] of the result. \n
  272. /// Bits [23:16] are written to bits [39:32] of the result. \n
  273. /// Bits [31:24] are written to bits [55:48] of the result.
  274. /// \param __m2
  275. /// A 64-bit integer vector of [8 x i8].
  276. /// Bits [7:0] are written to bits [15:8] of the result. \n
  277. /// Bits [15:8] are written to bits [31:24] of the result. \n
  278. /// Bits [23:16] are written to bits [47:40] of the result. \n
  279. /// Bits [31:24] are written to bits [63:56] of the result.
  280. /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
  281. /// values.
  282. static __inline__ __m64 __DEFAULT_FN_ATTRS
  283. _mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
  284. {
  285. return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2);
  286. }
  287. /// \brief Unpacks the lower 32 bits from two 64-bit integer vectors of
  288. /// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
  289. ///
  290. /// \headerfile <x86intrin.h>
  291. ///
  292. /// This intrinsic corresponds to the <c> PUNPCKLWD </c> instruction.
  293. ///
  294. /// \param __m1
  295. /// A 64-bit integer vector of [4 x i16].
  296. /// Bits [15:0] are written to bits [15:0] of the result. \n
  297. /// Bits [31:16] are written to bits [47:32] of the result.
  298. /// \param __m2
  299. /// A 64-bit integer vector of [4 x i16].
  300. /// Bits [15:0] are written to bits [31:16] of the result. \n
  301. /// Bits [31:16] are written to bits [63:48] of the result.
  302. /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
  303. /// values.
  304. static __inline__ __m64 __DEFAULT_FN_ATTRS
  305. _mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
  306. {
  307. return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2);
  308. }
  309. /// \brief Unpacks the lower 32 bits from two 64-bit integer vectors of
  310. /// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].
  311. ///
  312. /// \headerfile <x86intrin.h>
  313. ///
  314. /// This intrinsic corresponds to the <c> PUNPCKLDQ </c> instruction.
  315. ///
  316. /// \param __m1
  317. /// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to
  318. /// the lower 32 bits of the result.
  319. /// \param __m2
  320. /// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to
  321. /// the upper 32 bits of the result.
  322. /// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
  323. /// values.
  324. static __inline__ __m64 __DEFAULT_FN_ATTRS
  325. _mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
  326. {
  327. return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2);
  328. }
  329. /// \brief Adds each 8-bit integer element of the first 64-bit integer vector
  330. /// of [8 x i8] to the corresponding 8-bit integer element of the second
  331. /// 64-bit integer vector of [8 x i8]. The lower 8 bits of the results are
  332. /// packed into a 64-bit integer vector of [8 x i8].
  333. ///
  334. /// \headerfile <x86intrin.h>
  335. ///
  336. /// This intrinsic corresponds to the <c> PADDB </c> instruction.
  337. ///
  338. /// \param __m1
  339. /// A 64-bit integer vector of [8 x i8].
  340. /// \param __m2
  341. /// A 64-bit integer vector of [8 x i8].
  342. /// \returns A 64-bit integer vector of [8 x i8] containing the sums of both
  343. /// parameters.
  344. static __inline__ __m64 __DEFAULT_FN_ATTRS
  345. _mm_add_pi8(__m64 __m1, __m64 __m2)
  346. {
  347. return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2);
  348. }
  349. /// \brief Adds each 16-bit integer element of the first 64-bit integer vector
  350. /// of [4 x i16] to the corresponding 16-bit integer element of the second
  351. /// 64-bit integer vector of [4 x i16]. The lower 16 bits of the results are
  352. /// packed into a 64-bit integer vector of [4 x i16].
  353. ///
  354. /// \headerfile <x86intrin.h>
  355. ///
  356. /// This intrinsic corresponds to the <c> PADDW </c> instruction.
  357. ///
  358. /// \param __m1
  359. /// A 64-bit integer vector of [4 x i16].
  360. /// \param __m2
  361. /// A 64-bit integer vector of [4 x i16].
  362. /// \returns A 64-bit integer vector of [4 x i16] containing the sums of both
  363. /// parameters.
  364. static __inline__ __m64 __DEFAULT_FN_ATTRS
  365. _mm_add_pi16(__m64 __m1, __m64 __m2)
  366. {
  367. return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2);
  368. }
  369. /// \brief Adds each 32-bit integer element of the first 64-bit integer vector
  370. /// of [2 x i32] to the corresponding 32-bit integer element of the second
  371. /// 64-bit integer vector of [2 x i32]. The lower 32 bits of the results are
  372. /// packed into a 64-bit integer vector of [2 x i32].
  373. ///
  374. /// \headerfile <x86intrin.h>
  375. ///
  376. /// This intrinsic corresponds to the <c> PADDD </c> instruction.
  377. ///
  378. /// \param __m1
  379. /// A 64-bit integer vector of [2 x i32].
  380. /// \param __m2
  381. /// A 64-bit integer vector of [2 x i32].
  382. /// \returns A 64-bit integer vector of [2 x i32] containing the sums of both
  383. /// parameters.
  384. static __inline__ __m64 __DEFAULT_FN_ATTRS
  385. _mm_add_pi32(__m64 __m1, __m64 __m2)
  386. {
  387. return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2);
  388. }
  389. /// \brief Adds each 8-bit signed integer element of the first 64-bit integer
  390. /// vector of [8 x i8] to the corresponding 8-bit signed integer element of
  391. /// the second 64-bit integer vector of [8 x i8]. Positive sums greater than
  392. /// 0x7F are saturated to 0x7F. Negative sums less than 0x80 are saturated to
  393. /// 0x80. The results are packed into a 64-bit integer vector of [8 x i8].
  394. ///
  395. /// \headerfile <x86intrin.h>
  396. ///
  397. /// This intrinsic corresponds to the <c> PADDSB </c> instruction.
  398. ///
  399. /// \param __m1
  400. /// A 64-bit integer vector of [8 x i8].
  401. /// \param __m2
  402. /// A 64-bit integer vector of [8 x i8].
  403. /// \returns A 64-bit integer vector of [8 x i8] containing the saturated sums
  404. /// of both parameters.
  405. static __inline__ __m64 __DEFAULT_FN_ATTRS
  406. _mm_adds_pi8(__m64 __m1, __m64 __m2)
  407. {
  408. return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2);
  409. }
  410. /// \brief Adds each 16-bit signed integer element of the first 64-bit integer
  411. /// vector of [4 x i16] to the corresponding 16-bit signed integer element of
  412. /// the second 64-bit integer vector of [4 x i16]. Positive sums greater than
  413. /// 0x7FFF are saturated to 0x7FFF. Negative sums less than 0x8000 are
  414. /// saturated to 0x8000. The results are packed into a 64-bit integer vector
  415. /// of [4 x i16].
  416. ///
  417. /// \headerfile <x86intrin.h>
  418. ///
  419. /// This intrinsic corresponds to the <c> PADDSW </c> instruction.
  420. ///
  421. /// \param __m1
  422. /// A 64-bit integer vector of [4 x i16].
  423. /// \param __m2
  424. /// A 64-bit integer vector of [4 x i16].
  425. /// \returns A 64-bit integer vector of [4 x i16] containing the saturated sums
  426. /// of both parameters.
  427. static __inline__ __m64 __DEFAULT_FN_ATTRS
  428. _mm_adds_pi16(__m64 __m1, __m64 __m2)
  429. {
  430. return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2);
  431. }
  432. /// \brief Adds each 8-bit unsigned integer element of the first 64-bit integer
  433. /// vector of [8 x i8] to the corresponding 8-bit unsigned integer element of
  434. /// the second 64-bit integer vector of [8 x i8]. Sums greater than 0xFF are
  435. /// saturated to 0xFF. The results are packed into a 64-bit integer vector of
  436. /// [8 x i8].
  437. ///
  438. /// \headerfile <x86intrin.h>
  439. ///
  440. /// This intrinsic corresponds to the <c> PADDUSB </c> instruction.
  441. ///
  442. /// \param __m1
  443. /// A 64-bit integer vector of [8 x i8].
  444. /// \param __m2
  445. /// A 64-bit integer vector of [8 x i8].
  446. /// \returns A 64-bit integer vector of [8 x i8] containing the saturated
  447. /// unsigned sums of both parameters.
  448. static __inline__ __m64 __DEFAULT_FN_ATTRS
  449. _mm_adds_pu8(__m64 __m1, __m64 __m2)
  450. {
  451. return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2);
  452. }
  453. /// \brief Adds each 16-bit unsigned integer element of the first 64-bit integer
  454. /// vector of [4 x i16] to the corresponding 16-bit unsigned integer element
  455. /// of the second 64-bit integer vector of [4 x i16]. Sums greater than
  456. /// 0xFFFF are saturated to 0xFFFF. The results are packed into a 64-bit
  457. /// integer vector of [4 x i16].
  458. ///
  459. /// \headerfile <x86intrin.h>
  460. ///
  461. /// This intrinsic corresponds to the <c> PADDUSW </c> instruction.
  462. ///
  463. /// \param __m1
  464. /// A 64-bit integer vector of [4 x i16].
  465. /// \param __m2
  466. /// A 64-bit integer vector of [4 x i16].
  467. /// \returns A 64-bit integer vector of [4 x i16] containing the saturated
  468. /// unsigned sums of both parameters.
  469. static __inline__ __m64 __DEFAULT_FN_ATTRS
  470. _mm_adds_pu16(__m64 __m1, __m64 __m2)
  471. {
  472. return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2);
  473. }
  474. /// \brief Subtracts each 8-bit integer element of the second 64-bit integer
  475. /// vector of [8 x i8] from the corresponding 8-bit integer element of the
  476. /// first 64-bit integer vector of [8 x i8]. The lower 8 bits of the results
  477. /// are packed into a 64-bit integer vector of [8 x i8].
  478. ///
  479. /// \headerfile <x86intrin.h>
  480. ///
  481. /// This intrinsic corresponds to the <c> PSUBB </c> instruction.
  482. ///
  483. /// \param __m1
  484. /// A 64-bit integer vector of [8 x i8] containing the minuends.
  485. /// \param __m2
  486. /// A 64-bit integer vector of [8 x i8] containing the subtrahends.
  487. /// \returns A 64-bit integer vector of [8 x i8] containing the differences of
  488. /// both parameters.
  489. static __inline__ __m64 __DEFAULT_FN_ATTRS
  490. _mm_sub_pi8(__m64 __m1, __m64 __m2)
  491. {
  492. return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2);
  493. }
  494. /// \brief Subtracts each 16-bit integer element of the second 64-bit integer
  495. /// vector of [4 x i16] from the corresponding 16-bit integer element of the
  496. /// first 64-bit integer vector of [4 x i16]. The lower 16 bits of the
  497. /// results are packed into a 64-bit integer vector of [4 x i16].
  498. ///
  499. /// \headerfile <x86intrin.h>
  500. ///
  501. /// This intrinsic corresponds to the <c> PSUBW </c> instruction.
  502. ///
  503. /// \param __m1
  504. /// A 64-bit integer vector of [4 x i16] containing the minuends.
  505. /// \param __m2
  506. /// A 64-bit integer vector of [4 x i16] containing the subtrahends.
  507. /// \returns A 64-bit integer vector of [4 x i16] containing the differences of
  508. /// both parameters.
  509. static __inline__ __m64 __DEFAULT_FN_ATTRS
  510. _mm_sub_pi16(__m64 __m1, __m64 __m2)
  511. {
  512. return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2);
  513. }
  514. /// \brief Subtracts each 32-bit integer element of the second 64-bit integer
  515. /// vector of [2 x i32] from the corresponding 32-bit integer element of the
  516. /// first 64-bit integer vector of [2 x i32]. The lower 32 bits of the
  517. /// results are packed into a 64-bit integer vector of [2 x i32].
  518. ///
  519. /// \headerfile <x86intrin.h>
  520. ///
  521. /// This intrinsic corresponds to the <c> PSUBD </c> instruction.
  522. ///
  523. /// \param __m1
  524. /// A 64-bit integer vector of [2 x i32] containing the minuends.
  525. /// \param __m2
  526. /// A 64-bit integer vector of [2 x i32] containing the subtrahends.
  527. /// \returns A 64-bit integer vector of [2 x i32] containing the differences of
  528. /// both parameters.
  529. static __inline__ __m64 __DEFAULT_FN_ATTRS
  530. _mm_sub_pi32(__m64 __m1, __m64 __m2)
  531. {
  532. return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2);
  533. }
  534. /// \brief Subtracts each 8-bit signed integer element of the second 64-bit
  535. /// integer vector of [8 x i8] from the corresponding 8-bit signed integer
  536. /// element of the first 64-bit integer vector of [8 x i8]. Positive results
  537. /// greater than 0x7F are saturated to 0x7F. Negative results less than 0x80
  538. /// are saturated to 0x80. The results are packed into a 64-bit integer
  539. /// vector of [8 x i8].
  540. ///
  541. /// \headerfile <x86intrin.h>
  542. ///
  543. /// This intrinsic corresponds to the <c> PSUBSB </c> instruction.
  544. ///
  545. /// \param __m1
  546. /// A 64-bit integer vector of [8 x i8] containing the minuends.
  547. /// \param __m2
  548. /// A 64-bit integer vector of [8 x i8] containing the subtrahends.
  549. /// \returns A 64-bit integer vector of [8 x i8] containing the saturated
  550. /// differences of both parameters.
  551. static __inline__ __m64 __DEFAULT_FN_ATTRS
  552. _mm_subs_pi8(__m64 __m1, __m64 __m2)
  553. {
  554. return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2);
  555. }
  556. /// \brief Subtracts each 16-bit signed integer element of the second 64-bit
  557. /// integer vector of [4 x i16] from the corresponding 16-bit signed integer
  558. /// element of the first 64-bit integer vector of [4 x i16]. Positive results
  559. /// greater than 0x7FFF are saturated to 0x7FFF. Negative results less than
  560. /// 0x8000 are saturated to 0x8000. The results are packed into a 64-bit
  561. /// integer vector of [4 x i16].
  562. ///
  563. /// \headerfile <x86intrin.h>
  564. ///
  565. /// This intrinsic corresponds to the <c> PSUBSW </c> instruction.
  566. ///
  567. /// \param __m1
  568. /// A 64-bit integer vector of [4 x i16] containing the minuends.
  569. /// \param __m2
  570. /// A 64-bit integer vector of [4 x i16] containing the subtrahends.
  571. /// \returns A 64-bit integer vector of [4 x i16] containing the saturated
  572. /// differences of both parameters.
  573. static __inline__ __m64 __DEFAULT_FN_ATTRS
  574. _mm_subs_pi16(__m64 __m1, __m64 __m2)
  575. {
  576. return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2);
  577. }
  578. /// \brief Subtracts each 8-bit unsigned integer element of the second 64-bit
  579. /// integer vector of [8 x i8] from the corresponding 8-bit unsigned integer
  580. /// element of the first 64-bit integer vector of [8 x i8].
  581. ///
  582. /// If an element of the first vector is less than the corresponding element
  583. /// of the second vector, the result is saturated to 0. The results are
  584. /// packed into a 64-bit integer vector of [8 x i8].
  585. ///
  586. /// \headerfile <x86intrin.h>
  587. ///
  588. /// This intrinsic corresponds to the <c> PSUBUSB </c> instruction.
  589. ///
  590. /// \param __m1
  591. /// A 64-bit integer vector of [8 x i8] containing the minuends.
  592. /// \param __m2
  593. /// A 64-bit integer vector of [8 x i8] containing the subtrahends.
  594. /// \returns A 64-bit integer vector of [8 x i8] containing the saturated
  595. /// differences of both parameters.
  596. static __inline__ __m64 __DEFAULT_FN_ATTRS
  597. _mm_subs_pu8(__m64 __m1, __m64 __m2)
  598. {
  599. return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2);
  600. }
  601. /// \brief Subtracts each 16-bit unsigned integer element of the second 64-bit
  602. /// integer vector of [4 x i16] from the corresponding 16-bit unsigned
  603. /// integer element of the first 64-bit integer vector of [4 x i16].
  604. ///
  605. /// If an element of the first vector is less than the corresponding element
  606. /// of the second vector, the result is saturated to 0. The results are
  607. /// packed into a 64-bit integer vector of [4 x i16].
  608. ///
  609. /// \headerfile <x86intrin.h>
  610. ///
  611. /// This intrinsic corresponds to the <c> PSUBUSW </c> instruction.
  612. ///
  613. /// \param __m1
  614. /// A 64-bit integer vector of [4 x i16] containing the minuends.
  615. /// \param __m2
  616. /// A 64-bit integer vector of [4 x i16] containing the subtrahends.
  617. /// \returns A 64-bit integer vector of [4 x i16] containing the saturated
  618. /// differences of both parameters.
  619. static __inline__ __m64 __DEFAULT_FN_ATTRS
  620. _mm_subs_pu16(__m64 __m1, __m64 __m2)
  621. {
  622. return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2);
  623. }
  624. /// \brief Multiplies each 16-bit signed integer element of the first 64-bit
  625. /// integer vector of [4 x i16] by the corresponding 16-bit signed integer
  626. /// element of the second 64-bit integer vector of [4 x i16] and get four
  627. /// 32-bit products. Adds adjacent pairs of products to get two 32-bit sums.
  628. /// The lower 32 bits of these two sums are packed into a 64-bit integer
  629. /// vector of [2 x i32].
  630. ///
  631. /// For example, bits [15:0] of both parameters are multiplied, bits [31:16]
  632. /// of both parameters are multiplied, and the sum of both results is written
  633. /// to bits [31:0] of the result.
  634. ///
  635. /// \headerfile <x86intrin.h>
  636. ///
  637. /// This intrinsic corresponds to the <c> PMADDWD </c> instruction.
  638. ///
  639. /// \param __m1
  640. /// A 64-bit integer vector of [4 x i16].
  641. /// \param __m2
  642. /// A 64-bit integer vector of [4 x i16].
  643. /// \returns A 64-bit integer vector of [2 x i32] containing the sums of
  644. /// products of both parameters.
  645. static __inline__ __m64 __DEFAULT_FN_ATTRS
  646. _mm_madd_pi16(__m64 __m1, __m64 __m2)
  647. {
  648. return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2);
  649. }
  650. /// \brief Multiplies each 16-bit signed integer element of the first 64-bit
  651. /// integer vector of [4 x i16] by the corresponding 16-bit signed integer
  652. /// element of the second 64-bit integer vector of [4 x i16]. Packs the upper
  653. /// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].
  654. ///
  655. /// \headerfile <x86intrin.h>
  656. ///
  657. /// This intrinsic corresponds to the <c> PMULHW </c> instruction.
  658. ///
  659. /// \param __m1
  660. /// A 64-bit integer vector of [4 x i16].
  661. /// \param __m2
  662. /// A 64-bit integer vector of [4 x i16].
  663. /// \returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits
  664. /// of the products of both parameters.
  665. static __inline__ __m64 __DEFAULT_FN_ATTRS
  666. _mm_mulhi_pi16(__m64 __m1, __m64 __m2)
  667. {
  668. return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2);
  669. }
  670. /// \brief Multiplies each 16-bit signed integer element of the first 64-bit
  671. /// integer vector of [4 x i16] by the corresponding 16-bit signed integer
  672. /// element of the second 64-bit integer vector of [4 x i16]. Packs the lower
  673. /// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].
  674. ///
  675. /// \headerfile <x86intrin.h>
  676. ///
  677. /// This intrinsic corresponds to the <c> PMULLW </c> instruction.
  678. ///
  679. /// \param __m1
  680. /// A 64-bit integer vector of [4 x i16].
  681. /// \param __m2
  682. /// A 64-bit integer vector of [4 x i16].
  683. /// \returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits
  684. /// of the products of both parameters.
  685. static __inline__ __m64 __DEFAULT_FN_ATTRS
  686. _mm_mullo_pi16(__m64 __m1, __m64 __m2)
  687. {
  688. return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2);
  689. }
  690. /// \brief Left-shifts each 16-bit signed integer element of the first
  691. /// parameter, which is a 64-bit integer vector of [4 x i16], by the number
  692. /// of bits specified by the second parameter, which is a 64-bit integer. The
  693. /// lower 16 bits of the results are packed into a 64-bit integer vector of
  694. /// [4 x i16].
  695. ///
  696. /// \headerfile <x86intrin.h>
  697. ///
  698. /// This intrinsic corresponds to the <c> PSLLW </c> instruction.
  699. ///
  700. /// \param __m
  701. /// A 64-bit integer vector of [4 x i16].
  702. /// \param __count
  703. /// A 64-bit integer vector interpreted as a single 64-bit integer.
  704. /// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted
  705. /// values. If \a __count is greater or equal to 16, the result is set to all
  706. /// 0.
  707. static __inline__ __m64 __DEFAULT_FN_ATTRS
  708. _mm_sll_pi16(__m64 __m, __m64 __count)
  709. {
  710. return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count);
  711. }
  712. /// \brief Left-shifts each 16-bit signed integer element of a 64-bit integer
  713. /// vector of [4 x i16] by the number of bits specified by a 32-bit integer.
  714. /// The lower 16 bits of the results are packed into a 64-bit integer vector
  715. /// of [4 x i16].
  716. ///
  717. /// \headerfile <x86intrin.h>
  718. ///
  719. /// This intrinsic corresponds to the <c> PSLLW </c> instruction.
  720. ///
  721. /// \param __m
  722. /// A 64-bit integer vector of [4 x i16].
  723. /// \param __count
  724. /// A 32-bit integer value.
  725. /// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted
  726. /// values. If \a __count is greater or equal to 16, the result is set to all
  727. /// 0.
  728. static __inline__ __m64 __DEFAULT_FN_ATTRS
  729. _mm_slli_pi16(__m64 __m, int __count)
  730. {
  731. return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count);
  732. }
  733. /// \brief Left-shifts each 32-bit signed integer element of the first
  734. /// parameter, which is a 64-bit integer vector of [2 x i32], by the number
  735. /// of bits specified by the second parameter, which is a 64-bit integer. The
  736. /// lower 32 bits of the results are packed into a 64-bit integer vector of
  737. /// [2 x i32].
  738. ///
  739. /// \headerfile <x86intrin.h>
  740. ///
  741. /// This intrinsic corresponds to the <c> PSLLD </c> instruction.
  742. ///
  743. /// \param __m
  744. /// A 64-bit integer vector of [2 x i32].
  745. /// \param __count
  746. /// A 64-bit integer vector interpreted as a single 64-bit integer.
  747. /// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted
  748. /// values. If \a __count is greater or equal to 32, the result is set to all
  749. /// 0.
  750. static __inline__ __m64 __DEFAULT_FN_ATTRS
  751. _mm_sll_pi32(__m64 __m, __m64 __count)
  752. {
  753. return (__m64)__builtin_ia32_pslld((__v2si)__m, __count);
  754. }
  755. /// \brief Left-shifts each 32-bit signed integer element of a 64-bit integer
  756. /// vector of [2 x i32] by the number of bits specified by a 32-bit integer.
  757. /// The lower 32 bits of the results are packed into a 64-bit integer vector
  758. /// of [2 x i32].
  759. ///
  760. /// \headerfile <x86intrin.h>
  761. ///
  762. /// This intrinsic corresponds to the <c> PSLLD </c> instruction.
  763. ///
  764. /// \param __m
  765. /// A 64-bit integer vector of [2 x i32].
  766. /// \param __count
  767. /// A 32-bit integer value.
  768. /// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted
  769. /// values. If \a __count is greater or equal to 32, the result is set to all
  770. /// 0.
  771. static __inline__ __m64 __DEFAULT_FN_ATTRS
  772. _mm_slli_pi32(__m64 __m, int __count)
  773. {
  774. return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count);
  775. }
  776. /// \brief Left-shifts the first 64-bit integer parameter by the number of bits
  777. /// specified by the second 64-bit integer parameter. The lower 64 bits of
  778. /// result are returned.
  779. ///
  780. /// \headerfile <x86intrin.h>
  781. ///
  782. /// This intrinsic corresponds to the <c> PSLLQ </c> instruction.
  783. ///
  784. /// \param __m
  785. /// A 64-bit integer vector interpreted as a single 64-bit integer.
  786. /// \param __count
  787. /// A 64-bit integer vector interpreted as a single 64-bit integer.
  788. /// \returns A 64-bit integer vector containing the left-shifted value. If
  789. /// \a __count is greater or equal to 64, the result is set to 0.
  790. static __inline__ __m64 __DEFAULT_FN_ATTRS
  791. _mm_sll_si64(__m64 __m, __m64 __count)
  792. {
  793. return (__m64)__builtin_ia32_psllq((__v1di)__m, __count);
  794. }
  795. /// \brief Left-shifts the first parameter, which is a 64-bit integer, by the
  796. /// number of bits specified by the second parameter, which is a 32-bit
  797. /// integer. The lower 64 bits of result are returned.
  798. ///
  799. /// \headerfile <x86intrin.h>
  800. ///
  801. /// This intrinsic corresponds to the <c> PSLLQ </c> instruction.
  802. ///
  803. /// \param __m
  804. /// A 64-bit integer vector interpreted as a single 64-bit integer.
  805. /// \param __count
  806. /// A 32-bit integer value.
  807. /// \returns A 64-bit integer vector containing the left-shifted value. If
  808. /// \a __count is greater or equal to 64, the result is set to 0.
  809. static __inline__ __m64 __DEFAULT_FN_ATTRS
  810. _mm_slli_si64(__m64 __m, int __count)
  811. {
  812. return (__m64)__builtin_ia32_psllqi((__v1di)__m, __count);
  813. }
  814. /// \brief Right-shifts each 16-bit integer element of the first parameter,
  815. /// which is a 64-bit integer vector of [4 x i16], by the number of bits
  816. /// specified by the second parameter, which is a 64-bit integer.
  817. ///
  818. /// High-order bits are filled with the sign bit of the initial value of each
  819. /// 16-bit element. The 16-bit results are packed into a 64-bit integer
  820. /// vector of [4 x i16].
  821. ///
  822. /// \headerfile <x86intrin.h>
  823. ///
  824. /// This intrinsic corresponds to the <c> PSRAW </c> instruction.
  825. ///
  826. /// \param __m
  827. /// A 64-bit integer vector of [4 x i16].
  828. /// \param __count
  829. /// A 64-bit integer vector interpreted as a single 64-bit integer.
  830. /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
  831. /// values.
  832. static __inline__ __m64 __DEFAULT_FN_ATTRS
  833. _mm_sra_pi16(__m64 __m, __m64 __count)
  834. {
  835. return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count);
  836. }
  837. /// \brief Right-shifts each 16-bit integer element of a 64-bit integer vector
  838. /// of [4 x i16] by the number of bits specified by a 32-bit integer.
  839. ///
  840. /// High-order bits are filled with the sign bit of the initial value of each
  841. /// 16-bit element. The 16-bit results are packed into a 64-bit integer
  842. /// vector of [4 x i16].
  843. ///
  844. /// \headerfile <x86intrin.h>
  845. ///
  846. /// This intrinsic corresponds to the <c> PSRAW </c> instruction.
  847. ///
  848. /// \param __m
  849. /// A 64-bit integer vector of [4 x i16].
  850. /// \param __count
  851. /// A 32-bit integer value.
  852. /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
  853. /// values.
  854. static __inline__ __m64 __DEFAULT_FN_ATTRS
  855. _mm_srai_pi16(__m64 __m, int __count)
  856. {
  857. return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count);
  858. }
  859. /// \brief Right-shifts each 32-bit integer element of the first parameter,
  860. /// which is a 64-bit integer vector of [2 x i32], by the number of bits
  861. /// specified by the second parameter, which is a 64-bit integer.
  862. ///
  863. /// High-order bits are filled with the sign bit of the initial value of each
  864. /// 32-bit element. The 32-bit results are packed into a 64-bit integer
  865. /// vector of [2 x i32].
  866. ///
  867. /// \headerfile <x86intrin.h>
  868. ///
  869. /// This intrinsic corresponds to the <c> PSRAD </c> instruction.
  870. ///
  871. /// \param __m
  872. /// A 64-bit integer vector of [2 x i32].
  873. /// \param __count
  874. /// A 64-bit integer vector interpreted as a single 64-bit integer.
  875. /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
  876. /// values.
  877. static __inline__ __m64 __DEFAULT_FN_ATTRS
  878. _mm_sra_pi32(__m64 __m, __m64 __count)
  879. {
  880. return (__m64)__builtin_ia32_psrad((__v2si)__m, __count);
  881. }
  882. /// \brief Right-shifts each 32-bit integer element of a 64-bit integer vector
  883. /// of [2 x i32] by the number of bits specified by a 32-bit integer.
  884. ///
  885. /// High-order bits are filled with the sign bit of the initial value of each
  886. /// 32-bit element. The 32-bit results are packed into a 64-bit integer
  887. /// vector of [2 x i32].
  888. ///
  889. /// \headerfile <x86intrin.h>
  890. ///
  891. /// This intrinsic corresponds to the <c> PSRAD </c> instruction.
  892. ///
  893. /// \param __m
  894. /// A 64-bit integer vector of [2 x i32].
  895. /// \param __count
  896. /// A 32-bit integer value.
  897. /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
  898. /// values.
  899. static __inline__ __m64 __DEFAULT_FN_ATTRS
  900. _mm_srai_pi32(__m64 __m, int __count)
  901. {
  902. return (__m64)__builtin_ia32_psradi((__v2si)__m, __count);
  903. }
  904. /// \brief Right-shifts each 16-bit integer element of the first parameter,
  905. /// which is a 64-bit integer vector of [4 x i16], by the number of bits
  906. /// specified by the second parameter, which is a 64-bit integer.
  907. ///
  908. /// High-order bits are cleared. The 16-bit results are packed into a 64-bit
  909. /// integer vector of [4 x i16].
  910. ///
  911. /// \headerfile <x86intrin.h>
  912. ///
  913. /// This intrinsic corresponds to the <c> PSRLW </c> instruction.
  914. ///
  915. /// \param __m
  916. /// A 64-bit integer vector of [4 x i16].
  917. /// \param __count
  918. /// A 64-bit integer vector interpreted as a single 64-bit integer.
  919. /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
  920. /// values.
  921. static __inline__ __m64 __DEFAULT_FN_ATTRS
  922. _mm_srl_pi16(__m64 __m, __m64 __count)
  923. {
  924. return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count);
  925. }
  926. /// \brief Right-shifts each 16-bit integer element of a 64-bit integer vector
  927. /// of [4 x i16] by the number of bits specified by a 32-bit integer.
  928. ///
  929. /// High-order bits are cleared. The 16-bit results are packed into a 64-bit
  930. /// integer vector of [4 x i16].
  931. ///
  932. /// \headerfile <x86intrin.h>
  933. ///
  934. /// This intrinsic corresponds to the <c> PSRLW </c> instruction.
  935. ///
  936. /// \param __m
  937. /// A 64-bit integer vector of [4 x i16].
  938. /// \param __count
  939. /// A 32-bit integer value.
  940. /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
  941. /// values.
  942. static __inline__ __m64 __DEFAULT_FN_ATTRS
  943. _mm_srli_pi16(__m64 __m, int __count)
  944. {
  945. return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count);
  946. }
  947. /// \brief Right-shifts each 32-bit integer element of the first parameter,
  948. /// which is a 64-bit integer vector of [2 x i32], by the number of bits
  949. /// specified by the second parameter, which is a 64-bit integer.
  950. ///
  951. /// High-order bits are cleared. The 32-bit results are packed into a 64-bit
  952. /// integer vector of [2 x i32].
  953. ///
  954. /// \headerfile <x86intrin.h>
  955. ///
  956. /// This intrinsic corresponds to the <c> PSRLD </c> instruction.
  957. ///
  958. /// \param __m
  959. /// A 64-bit integer vector of [2 x i32].
  960. /// \param __count
  961. /// A 64-bit integer vector interpreted as a single 64-bit integer.
  962. /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
  963. /// values.
  964. static __inline__ __m64 __DEFAULT_FN_ATTRS
  965. _mm_srl_pi32(__m64 __m, __m64 __count)
  966. {
  967. return (__m64)__builtin_ia32_psrld((__v2si)__m, __count);
  968. }
  969. /// \brief Right-shifts each 32-bit integer element of a 64-bit integer vector
  970. /// of [2 x i32] by the number of bits specified by a 32-bit integer.
  971. ///
  972. /// High-order bits are cleared. The 32-bit results are packed into a 64-bit
  973. /// integer vector of [2 x i32].
  974. ///
  975. /// \headerfile <x86intrin.h>
  976. ///
  977. /// This intrinsic corresponds to the <c> PSRLD </c> instruction.
  978. ///
  979. /// \param __m
  980. /// A 64-bit integer vector of [2 x i32].
  981. /// \param __count
  982. /// A 32-bit integer value.
  983. /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
  984. /// values.
  985. static __inline__ __m64 __DEFAULT_FN_ATTRS
  986. _mm_srli_pi32(__m64 __m, int __count)
  987. {
  988. return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count);
  989. }
  990. /// \brief Right-shifts the first 64-bit integer parameter by the number of bits
  991. /// specified by the second 64-bit integer parameter.
  992. ///
  993. /// High-order bits are cleared.
  994. ///
  995. /// \headerfile <x86intrin.h>
  996. ///
  997. /// This intrinsic corresponds to the <c> PSRLQ </c> instruction.
  998. ///
  999. /// \param __m
  1000. /// A 64-bit integer vector interpreted as a single 64-bit integer.
  1001. /// \param __count
  1002. /// A 64-bit integer vector interpreted as a single 64-bit integer.
  1003. /// \returns A 64-bit integer vector containing the right-shifted value.
  1004. static __inline__ __m64 __DEFAULT_FN_ATTRS
  1005. _mm_srl_si64(__m64 __m, __m64 __count)
  1006. {
  1007. return (__m64)__builtin_ia32_psrlq((__v1di)__m, __count);
  1008. }
  1009. /// \brief Right-shifts the first parameter, which is a 64-bit integer, by the
  1010. /// number of bits specified by the second parameter, which is a 32-bit
  1011. /// integer.
  1012. ///
  1013. /// High-order bits are cleared.
  1014. ///
  1015. /// \headerfile <x86intrin.h>
  1016. ///
  1017. /// This intrinsic corresponds to the <c> PSRLQ </c> instruction.
  1018. ///
  1019. /// \param __m
  1020. /// A 64-bit integer vector interpreted as a single 64-bit integer.
  1021. /// \param __count
  1022. /// A 32-bit integer value.
  1023. /// \returns A 64-bit integer vector containing the right-shifted value.
  1024. static __inline__ __m64 __DEFAULT_FN_ATTRS
  1025. _mm_srli_si64(__m64 __m, int __count)
  1026. {
  1027. return (__m64)__builtin_ia32_psrlqi((__v1di)__m, __count);
  1028. }
  1029. /// \brief Performs a bitwise AND of two 64-bit integer vectors.
  1030. ///
  1031. /// \headerfile <x86intrin.h>
  1032. ///
  1033. /// This intrinsic corresponds to the <c> PAND </c> instruction.
  1034. ///
  1035. /// \param __m1
  1036. /// A 64-bit integer vector.
  1037. /// \param __m2
  1038. /// A 64-bit integer vector.
  1039. /// \returns A 64-bit integer vector containing the bitwise AND of both
  1040. /// parameters.
  1041. static __inline__ __m64 __DEFAULT_FN_ATTRS
  1042. _mm_and_si64(__m64 __m1, __m64 __m2)
  1043. {
  1044. return __builtin_ia32_pand((__v1di)__m1, (__v1di)__m2);
  1045. }
  1046. /// \brief Performs a bitwise NOT of the first 64-bit integer vector, and then
  1047. /// performs a bitwise AND of the intermediate result and the second 64-bit
  1048. /// integer vector.
  1049. ///
  1050. /// \headerfile <x86intrin.h>
  1051. ///
  1052. /// This intrinsic corresponds to the <c> PANDN </c> instruction.
  1053. ///
  1054. /// \param __m1
  1055. /// A 64-bit integer vector. The one's complement of this parameter is used
  1056. /// in the bitwise AND.
  1057. /// \param __m2
  1058. /// A 64-bit integer vector.
  1059. /// \returns A 64-bit integer vector containing the bitwise AND of the second
  1060. /// parameter and the one's complement of the first parameter.
  1061. static __inline__ __m64 __DEFAULT_FN_ATTRS
  1062. _mm_andnot_si64(__m64 __m1, __m64 __m2)
  1063. {
  1064. return __builtin_ia32_pandn((__v1di)__m1, (__v1di)__m2);
  1065. }
  1066. /// \brief Performs a bitwise OR of two 64-bit integer vectors.
  1067. ///
  1068. /// \headerfile <x86intrin.h>
  1069. ///
  1070. /// This intrinsic corresponds to the <c> POR </c> instruction.
  1071. ///
  1072. /// \param __m1
  1073. /// A 64-bit integer vector.
  1074. /// \param __m2
  1075. /// A 64-bit integer vector.
  1076. /// \returns A 64-bit integer vector containing the bitwise OR of both
  1077. /// parameters.
  1078. static __inline__ __m64 __DEFAULT_FN_ATTRS
  1079. _mm_or_si64(__m64 __m1, __m64 __m2)
  1080. {
  1081. return __builtin_ia32_por((__v1di)__m1, (__v1di)__m2);
  1082. }
  1083. /// \brief Performs a bitwise exclusive OR of two 64-bit integer vectors.
  1084. ///
  1085. /// \headerfile <x86intrin.h>
  1086. ///
  1087. /// This intrinsic corresponds to the <c> PXOR </c> instruction.
  1088. ///
  1089. /// \param __m1
  1090. /// A 64-bit integer vector.
  1091. /// \param __m2
  1092. /// A 64-bit integer vector.
  1093. /// \returns A 64-bit integer vector containing the bitwise exclusive OR of both
  1094. /// parameters.
  1095. static __inline__ __m64 __DEFAULT_FN_ATTRS
  1096. _mm_xor_si64(__m64 __m1, __m64 __m2)
  1097. {
  1098. return __builtin_ia32_pxor((__v1di)__m1, (__v1di)__m2);
  1099. }
  1100. /// \brief Compares the 8-bit integer elements of two 64-bit integer vectors of
  1101. /// [8 x i8] to determine if the element of the first vector is equal to the
  1102. /// corresponding element of the second vector.
  1103. ///
  1104. /// The comparison yields 0 for false, 0xFF for true.
  1105. ///
  1106. /// \headerfile <x86intrin.h>
  1107. ///
  1108. /// This intrinsic corresponds to the <c> PCMPEQB </c> instruction.
  1109. ///
  1110. /// \param __m1
  1111. /// A 64-bit integer vector of [8 x i8].
  1112. /// \param __m2
  1113. /// A 64-bit integer vector of [8 x i8].
  1114. /// \returns A 64-bit integer vector of [8 x i8] containing the comparison
  1115. /// results.
  1116. static __inline__ __m64 __DEFAULT_FN_ATTRS
  1117. _mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
  1118. {
  1119. return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2);
  1120. }
  1121. /// \brief Compares the 16-bit integer elements of two 64-bit integer vectors of
  1122. /// [4 x i16] to determine if the element of the first vector is equal to the
  1123. /// corresponding element of the second vector.
  1124. ///
  1125. /// The comparison yields 0 for false, 0xFFFF for true.
  1126. ///
  1127. /// \headerfile <x86intrin.h>
  1128. ///
  1129. /// This intrinsic corresponds to the <c> PCMPEQW </c> instruction.
  1130. ///
  1131. /// \param __m1
  1132. /// A 64-bit integer vector of [4 x i16].
  1133. /// \param __m2
  1134. /// A 64-bit integer vector of [4 x i16].
  1135. /// \returns A 64-bit integer vector of [4 x i16] containing the comparison
  1136. /// results.
  1137. static __inline__ __m64 __DEFAULT_FN_ATTRS
  1138. _mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
  1139. {
  1140. return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2);
  1141. }
  1142. /// \brief Compares the 32-bit integer elements of two 64-bit integer vectors of
  1143. /// [2 x i32] to determine if the element of the first vector is equal to the
  1144. /// corresponding element of the second vector.
  1145. ///
  1146. /// The comparison yields 0 for false, 0xFFFFFFFF for true.
  1147. ///
  1148. /// \headerfile <x86intrin.h>
  1149. ///
  1150. /// This intrinsic corresponds to the <c> PCMPEQD </c> instruction.
  1151. ///
  1152. /// \param __m1
  1153. /// A 64-bit integer vector of [2 x i32].
  1154. /// \param __m2
  1155. /// A 64-bit integer vector of [2 x i32].
  1156. /// \returns A 64-bit integer vector of [2 x i32] containing the comparison
  1157. /// results.
  1158. static __inline__ __m64 __DEFAULT_FN_ATTRS
  1159. _mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
  1160. {
  1161. return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2);
  1162. }
  1163. /// \brief Compares the 8-bit integer elements of two 64-bit integer vectors of
  1164. /// [8 x i8] to determine if the element of the first vector is greater than
  1165. /// the corresponding element of the second vector.
  1166. ///
  1167. /// The comparison yields 0 for false, 0xFF for true.
  1168. ///
  1169. /// \headerfile <x86intrin.h>
  1170. ///
  1171. /// This intrinsic corresponds to the <c> PCMPGTB </c> instruction.
  1172. ///
  1173. /// \param __m1
  1174. /// A 64-bit integer vector of [8 x i8].
  1175. /// \param __m2
  1176. /// A 64-bit integer vector of [8 x i8].
  1177. /// \returns A 64-bit integer vector of [8 x i8] containing the comparison
  1178. /// results.
  1179. static __inline__ __m64 __DEFAULT_FN_ATTRS
  1180. _mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
  1181. {
  1182. return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2);
  1183. }
  1184. /// \brief Compares the 16-bit integer elements of two 64-bit integer vectors of
  1185. /// [4 x i16] to determine if the element of the first vector is greater than
  1186. /// the corresponding element of the second vector.
  1187. ///
  1188. /// The comparison yields 0 for false, 0xFFFF for true.
  1189. ///
  1190. /// \headerfile <x86intrin.h>
  1191. ///
  1192. /// This intrinsic corresponds to the <c> PCMPGTW </c> instruction.
  1193. ///
  1194. /// \param __m1
  1195. /// A 64-bit integer vector of [4 x i16].
  1196. /// \param __m2
  1197. /// A 64-bit integer vector of [4 x i16].
  1198. /// \returns A 64-bit integer vector of [4 x i16] containing the comparison
  1199. /// results.
  1200. static __inline__ __m64 __DEFAULT_FN_ATTRS
  1201. _mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
  1202. {
  1203. return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2);
  1204. }
  1205. /// \brief Compares the 32-bit integer elements of two 64-bit integer vectors of
  1206. /// [2 x i32] to determine if the element of the first vector is greater than
  1207. /// the corresponding element of the second vector.
  1208. ///
  1209. /// The comparison yields 0 for false, 0xFFFFFFFF for true.
  1210. ///
  1211. /// \headerfile <x86intrin.h>
  1212. ///
  1213. /// This intrinsic corresponds to the <c> PCMPGTD </c> instruction.
  1214. ///
  1215. /// \param __m1
  1216. /// A 64-bit integer vector of [2 x i32].
  1217. /// \param __m2
  1218. /// A 64-bit integer vector of [2 x i32].
  1219. /// \returns A 64-bit integer vector of [2 x i32] containing the comparison
  1220. /// results.
  1221. static __inline__ __m64 __DEFAULT_FN_ATTRS
  1222. _mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
  1223. {
  1224. return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2);
  1225. }
  1226. /// \brief Constructs a 64-bit integer vector initialized to zero.
  1227. ///
  1228. /// \headerfile <x86intrin.h>
  1229. ///
  1230. /// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instruction.
  1231. ///
  1232. /// \returns An initialized 64-bit integer vector with all elements set to zero.
  1233. static __inline__ __m64 __DEFAULT_FN_ATTRS
  1234. _mm_setzero_si64(void)
  1235. {
  1236. return (__m64){ 0LL };
  1237. }
  1238. /// \brief Constructs a 64-bit integer vector initialized with the specified
  1239. /// 32-bit integer values.
  1240. ///
  1241. /// \headerfile <x86intrin.h>
  1242. ///
  1243. /// This intrinsic is a utility function and does not correspond to a specific
  1244. /// instruction.
  1245. ///
  1246. /// \param __i1
  1247. /// A 32-bit integer value used to initialize the upper 32 bits of the
  1248. /// result.
  1249. /// \param __i0
  1250. /// A 32-bit integer value used to initialize the lower 32 bits of the
  1251. /// result.
  1252. /// \returns An initialized 64-bit integer vector.
  1253. static __inline__ __m64 __DEFAULT_FN_ATTRS
  1254. _mm_set_pi32(int __i1, int __i0)
  1255. {
  1256. return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1);
  1257. }
  1258. /// \brief Constructs a 64-bit integer vector initialized with the specified
  1259. /// 16-bit integer values.
  1260. ///
  1261. /// \headerfile <x86intrin.h>
  1262. ///
  1263. /// This intrinsic is a utility function and does not correspond to a specific
  1264. /// instruction.
  1265. ///
  1266. /// \param __s3
  1267. /// A 16-bit integer value used to initialize bits [63:48] of the result.
  1268. /// \param __s2
  1269. /// A 16-bit integer value used to initialize bits [47:32] of the result.
  1270. /// \param __s1
  1271. /// A 16-bit integer value used to initialize bits [31:16] of the result.
  1272. /// \param __s0
  1273. /// A 16-bit integer value used to initialize bits [15:0] of the result.
  1274. /// \returns An initialized 64-bit integer vector.
  1275. static __inline__ __m64 __DEFAULT_FN_ATTRS
  1276. _mm_set_pi16(short __s3, short __s2, short __s1, short __s0)
  1277. {
  1278. return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3);
  1279. }
  1280. /// \brief Constructs a 64-bit integer vector initialized with the specified
  1281. /// 8-bit integer values.
  1282. ///
  1283. /// \headerfile <x86intrin.h>
  1284. ///
  1285. /// This intrinsic is a utility function and does not correspond to a specific
  1286. /// instruction.
  1287. ///
  1288. /// \param __b7
  1289. /// An 8-bit integer value used to initialize bits [63:56] of the result.
  1290. /// \param __b6
  1291. /// An 8-bit integer value used to initialize bits [55:48] of the result.
  1292. /// \param __b5
  1293. /// An 8-bit integer value used to initialize bits [47:40] of the result.
  1294. /// \param __b4
  1295. /// An 8-bit integer value used to initialize bits [39:32] of the result.
  1296. /// \param __b3
  1297. /// An 8-bit integer value used to initialize bits [31:24] of the result.
  1298. /// \param __b2
  1299. /// An 8-bit integer value used to initialize bits [23:16] of the result.
  1300. /// \param __b1
  1301. /// An 8-bit integer value used to initialize bits [15:8] of the result.
  1302. /// \param __b0
  1303. /// An 8-bit integer value used to initialize bits [7:0] of the result.
  1304. /// \returns An initialized 64-bit integer vector.
  1305. static __inline__ __m64 __DEFAULT_FN_ATTRS
  1306. _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,
  1307. char __b1, char __b0)
  1308. {
  1309. return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3,
  1310. __b4, __b5, __b6, __b7);
  1311. }
  1312. /// \brief Constructs a 64-bit integer vector of [2 x i32], with each of the
  1313. /// 32-bit integer vector elements set to the specified 32-bit integer
  1314. /// value.
  1315. ///
  1316. /// \headerfile <x86intrin.h>
  1317. ///
  1318. /// This intrinsic corresponds to the <c> VPSHUFD / PSHUFD </c> instruction.
  1319. ///
  1320. /// \param __i
  1321. /// A 32-bit integer value used to initialize each vector element of the
  1322. /// result.
  1323. /// \returns An initialized 64-bit integer vector of [2 x i32].
  1324. static __inline__ __m64 __DEFAULT_FN_ATTRS
  1325. _mm_set1_pi32(int __i)
  1326. {
  1327. return _mm_set_pi32(__i, __i);
  1328. }
  1329. /// \brief Constructs a 64-bit integer vector of [4 x i16], with each of the
  1330. /// 16-bit integer vector elements set to the specified 16-bit integer
  1331. /// value.
  1332. ///
  1333. /// \headerfile <x86intrin.h>
  1334. ///
  1335. /// This intrinsic corresponds to the <c> VPSHUFLW / PSHUFLW </c> instruction.
  1336. ///
  1337. /// \param __w
  1338. /// A 16-bit integer value used to initialize each vector element of the
  1339. /// result.
  1340. /// \returns An initialized 64-bit integer vector of [4 x i16].
  1341. static __inline__ __m64 __DEFAULT_FN_ATTRS
  1342. _mm_set1_pi16(short __w)
  1343. {
  1344. return _mm_set_pi16(__w, __w, __w, __w);
  1345. }
  1346. /// \brief Constructs a 64-bit integer vector of [8 x i8], with each of the
  1347. /// 8-bit integer vector elements set to the specified 8-bit integer value.
  1348. ///
  1349. /// \headerfile <x86intrin.h>
  1350. ///
  1351. /// This intrinsic corresponds to the <c> VPUNPCKLBW + VPSHUFLW / PUNPCKLBW +
  1352. /// PSHUFLW </c> instruction.
  1353. ///
  1354. /// \param __b
  1355. /// An 8-bit integer value used to initialize each vector element of the
  1356. /// result.
  1357. /// \returns An initialized 64-bit integer vector of [8 x i8].
  1358. static __inline__ __m64 __DEFAULT_FN_ATTRS
  1359. _mm_set1_pi8(char __b)
  1360. {
  1361. return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b);
  1362. }
  1363. /// \brief Constructs a 64-bit integer vector, initialized in reverse order with
  1364. /// the specified 32-bit integer values.
  1365. ///
  1366. /// \headerfile <x86intrin.h>
  1367. ///
  1368. /// This intrinsic is a utility function and does not correspond to a specific
  1369. /// instruction.
  1370. ///
  1371. /// \param __i0
  1372. /// A 32-bit integer value used to initialize the lower 32 bits of the
  1373. /// result.
  1374. /// \param __i1
  1375. /// A 32-bit integer value used to initialize the upper 32 bits of the
  1376. /// result.
  1377. /// \returns An initialized 64-bit integer vector.
  1378. static __inline__ __m64 __DEFAULT_FN_ATTRS
  1379. _mm_setr_pi32(int __i0, int __i1)
  1380. {
  1381. return _mm_set_pi32(__i1, __i0);
  1382. }
  1383. /// \brief Constructs a 64-bit integer vector, initialized in reverse order with
  1384. /// the specified 16-bit integer values.
  1385. ///
  1386. /// \headerfile <x86intrin.h>
  1387. ///
  1388. /// This intrinsic is a utility function and does not correspond to a specific
  1389. /// instruction.
  1390. ///
  1391. /// \param __w0
  1392. /// A 16-bit integer value used to initialize bits [15:0] of the result.
  1393. /// \param __w1
  1394. /// A 16-bit integer value used to initialize bits [31:16] of the result.
  1395. /// \param __w2
  1396. /// A 16-bit integer value used to initialize bits [47:32] of the result.
  1397. /// \param __w3
  1398. /// A 16-bit integer value used to initialize bits [63:48] of the result.
  1399. /// \returns An initialized 64-bit integer vector.
  1400. static __inline__ __m64 __DEFAULT_FN_ATTRS
  1401. _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3)
  1402. {
  1403. return _mm_set_pi16(__w3, __w2, __w1, __w0);
  1404. }
  1405. /// \brief Constructs a 64-bit integer vector, initialized in reverse order with
  1406. /// the specified 8-bit integer values.
  1407. ///
  1408. /// \headerfile <x86intrin.h>
  1409. ///
  1410. /// This intrinsic is a utility function and does not correspond to a specific
  1411. /// instruction.
  1412. ///
  1413. /// \param __b0
  1414. /// An 8-bit integer value used to initialize bits [7:0] of the result.
  1415. /// \param __b1
  1416. /// An 8-bit integer value used to initialize bits [15:8] of the result.
  1417. /// \param __b2
  1418. /// An 8-bit integer value used to initialize bits [23:16] of the result.
  1419. /// \param __b3
  1420. /// An 8-bit integer value used to initialize bits [31:24] of the result.
  1421. /// \param __b4
  1422. /// An 8-bit integer value used to initialize bits [39:32] of the result.
  1423. /// \param __b5
  1424. /// An 8-bit integer value used to initialize bits [47:40] of the result.
  1425. /// \param __b6
  1426. /// An 8-bit integer value used to initialize bits [55:48] of the result.
  1427. /// \param __b7
  1428. /// An 8-bit integer value used to initialize bits [63:56] of the result.
  1429. /// \returns An initialized 64-bit integer vector.
  1430. static __inline__ __m64 __DEFAULT_FN_ATTRS
  1431. _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,
  1432. char __b6, char __b7)
  1433. {
  1434. return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
  1435. }
  1436. #undef __DEFAULT_FN_ATTRS
  1437. /* Aliases for compatibility. */
  1438. #define _m_empty _mm_empty
  1439. #define _m_from_int _mm_cvtsi32_si64
  1440. #define _m_from_int64 _mm_cvtsi64_m64
  1441. #define _m_to_int _mm_cvtsi64_si32
  1442. #define _m_to_int64 _mm_cvtm64_si64
  1443. #define _m_packsswb _mm_packs_pi16
  1444. #define _m_packssdw _mm_packs_pi32
  1445. #define _m_packuswb _mm_packs_pu16
  1446. #define _m_punpckhbw _mm_unpackhi_pi8
  1447. #define _m_punpckhwd _mm_unpackhi_pi16
  1448. #define _m_punpckhdq _mm_unpackhi_pi32
  1449. #define _m_punpcklbw _mm_unpacklo_pi8
  1450. #define _m_punpcklwd _mm_unpacklo_pi16
  1451. #define _m_punpckldq _mm_unpacklo_pi32
  1452. #define _m_paddb _mm_add_pi8
  1453. #define _m_paddw _mm_add_pi16
  1454. #define _m_paddd _mm_add_pi32
  1455. #define _m_paddsb _mm_adds_pi8
  1456. #define _m_paddsw _mm_adds_pi16
  1457. #define _m_paddusb _mm_adds_pu8
  1458. #define _m_paddusw _mm_adds_pu16
  1459. #define _m_psubb _mm_sub_pi8
  1460. #define _m_psubw _mm_sub_pi16
  1461. #define _m_psubd _mm_sub_pi32
  1462. #define _m_psubsb _mm_subs_pi8
  1463. #define _m_psubsw _mm_subs_pi16
  1464. #define _m_psubusb _mm_subs_pu8
  1465. #define _m_psubusw _mm_subs_pu16
  1466. #define _m_pmaddwd _mm_madd_pi16
  1467. #define _m_pmulhw _mm_mulhi_pi16
  1468. #define _m_pmullw _mm_mullo_pi16
  1469. #define _m_psllw _mm_sll_pi16
  1470. #define _m_psllwi _mm_slli_pi16
  1471. #define _m_pslld _mm_sll_pi32
  1472. #define _m_pslldi _mm_slli_pi32
  1473. #define _m_psllq _mm_sll_si64
  1474. #define _m_psllqi _mm_slli_si64
  1475. #define _m_psraw _mm_sra_pi16
  1476. #define _m_psrawi _mm_srai_pi16
  1477. #define _m_psrad _mm_sra_pi32
  1478. #define _m_psradi _mm_srai_pi32
  1479. #define _m_psrlw _mm_srl_pi16
  1480. #define _m_psrlwi _mm_srli_pi16
  1481. #define _m_psrld _mm_srl_pi32
  1482. #define _m_psrldi _mm_srli_pi32
  1483. #define _m_psrlq _mm_srl_si64
  1484. #define _m_psrlqi _mm_srli_si64
  1485. #define _m_pand _mm_and_si64
  1486. #define _m_pandn _mm_andnot_si64
  1487. #define _m_por _mm_or_si64
  1488. #define _m_pxor _mm_xor_si64
  1489. #define _m_pcmpeqb _mm_cmpeq_pi8
  1490. #define _m_pcmpeqw _mm_cmpeq_pi16
  1491. #define _m_pcmpeqd _mm_cmpeq_pi32
  1492. #define _m_pcmpgtb _mm_cmpgt_pi8
  1493. #define _m_pcmpgtw _mm_cmpgt_pi16
  1494. #define _m_pcmpgtd _mm_cmpgt_pi32
  1495. #endif /* __MMINTRIN_H */